@@ -126,6 +126,9 @@ <h1 class="text-nowrap mt-5" style="font-size: xx-large;">
126126
127127 < div class ="container d-flex flex-column align-items-center gap-3 mt-5 ">
128128 < h3 > Win-rate Leaderboard</ h3 >
129+ < p > 📝 Notes: the default prompt does not emphasize efficiency requirements as our work shows such emphasis
130+ might degrade both efficiency and correctness for some weak models. Yet, "(🔥)" marks models using
131+ performance-encouraging prompts as they might be able to accurately understand such needs.</ p >
129132 < table id ="leaderboard "
130133 class ="table table-responsive table-striped table-bordered flex-shrink-1 border border-5 ">
131134 </ table >
@@ -233,15 +236,10 @@ <h2 id="sponsor" class="text-nowrap mt-5">🤗 Acknowledgment</h2>
233236 modelId = modelId [ 1 ] ;
234237 url = hfLinkPrefix + modelOrg + "/" + modelId ;
235238 linkMapping . set ( modelId , url ) ;
236- } else if ( modelId . startsWith ( "gpt-4 -" ) ) {
239+ } else if ( modelId . startsWith ( "o1-" ) || modelId . startsWith ( "gpt -") ) {
237240 linkMapping . set (
238241 modelId ,
239- "https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4" ,
240- ) ;
241- } else if ( modelId . startsWith ( "gpt-3.5-" ) ) {
242- linkMapping . set (
243- modelId ,
244- "https://platform.openai.com/docs/models/gpt-3-5-turbo" ,
242+ "https://platform.openai.com/docs/models" ,
245243 ) ;
246244 } else if ( modelId . startsWith ( "claude-3-" ) ) {
247245 linkMapping . set (
@@ -258,8 +256,6 @@ <h2 id="sponsor" class="text-nowrap mt-5">🤗 Acknowledgment</h2>
258256 modelId ,
259257 "https://deepmind.google/technologies/gemini/flash/" ,
260258 ) ;
261- } else if ( modelId . startsWith ( "gpt-4o-" ) ) {
262- linkMapping . set ( modelId , "https://openai.com/index/hello-gpt-4o/" ) ;
263259 } else if ( modelId . startsWith ( "deepseek-chat" ) ) {
264260 linkMapping . set ( modelId , "https://chat.deepseek.com/" )
265261 } else if ( modelId == "heatmap_data" ) {
0 commit comments