@@ -126,6 +126,7 @@ <h1 class="text-nowrap mt-5" style="font-size: xx-large;">
126126
127127 < div class ="container d-flex flex-column align-items-center gap-3 mt-5 ">
128128 < h3 > Win-rate Leaderboard</ h3 >
129+ < p > 📊 Ranking metrics: WR (Win-Rate; %) based on task- and model-wise competiton (i.e., pairwise DPS).</ p >
129130 < p > 📝 Notes: the default prompt does not emphasize efficiency requirements as our work shows such emphasis
130131 might degrade both efficiency and correctness for some weak models. Yet, "(⏩)" marks models using
131132 performance-encouraging prompts as they might be able to accurately understand such needs.</ p >
@@ -275,15 +276,15 @@ <h2 id="sponsor" class="text-nowrap mt-5">🤗 Acknowledgment</h2>
275276 }
276277 const globalData = data ;
277278 const HeatmapTable = heatmapTable ;
278- const winrate_tag = "🏆 Win Rate (%) " ;
279+ const winrate_tag = "🏆 Model WR " ;
279280
280281 // each row represents a model
281282 const theaders = [
282283 "#" , // rank
283284 "Model" , // model name
284- "DPS" ,
285- // "DPS Norm ",
286- "pass@1" ,
285+ // "DPS",
286+ // "pass@1 ",
287+ "Task WR" , // task winrate
287288 winrate_tag , // computed over the same set of passing solutions
288289 ] ;
289290
@@ -310,7 +311,7 @@ <h2 id="sponsor" class="text-nowrap mt-5">🤗 Acknowledgment</h2>
310311 data = data . map (
311312 ( [ modelId , value ] ) => new Map ( [ [ "modelId" , modelId ] , ...value ] ) ,
312313 )
313- data . sort ( ( a , b ) => b . get ( "win_rate " ) - a . get ( "win_rate " ) ) ;
314+ data . sort ( ( a , b ) => b . get ( "model_win_rate " ) - a . get ( "model_win_rate " ) ) ;
314315
315316 var tbody = document . createElement ( "tbody" ) ;
316317 // add rank
@@ -329,10 +330,10 @@ <h2 id="sponsor" class="text-nowrap mt-5">🤗 Acknowledgment</h2>
329330 if ( modelId . includes ( "--" ) ) {
330331 modelName = modelId . split ( "--" ) [ 1 ] ;
331332 }
332- var cur_win_rate = row . get ( 'win_rate ' ) . toFixed ( 3 ) ;
333- if ( last_best != cur_win_rate ) {
333+ var cur_model_wr = row . get ( 'model_win_rate ' ) . toFixed ( 3 ) ;
334+ if ( last_best != cur_model_wr ) {
334335 rank += n_last_best ;
335- last_best = cur_win_rate ;
336+ last_best = cur_model_wr ;
336337 rankCell . textContent = rank ;
337338 n_last_best = 1 ;
338339 } else {
@@ -354,19 +355,23 @@ <h2 id="sponsor" class="text-nowrap mt-5">🤗 Acknowledgment</h2>
354355 modelLink . classList . add ( "text-nowrap" ) ;
355356 modelCell . appendChild ( modelLink ) ;
356357 dataRow . appendChild ( modelCell ) ;
357- dpsRow = document . createElement ( "td" ) ;
358- dpsRow . textContent = row . get ( "dps" ) . toFixed ( 1 ) ;
359- dataRow . appendChild ( dpsRow ) ;
360- // dpsNormRow = document.createElement("td");
361- // dpsNormRow.textContent = row.get("dps_norm").toFixed(1);
362- // dataRow.appendChild(dpsNormRow);
363- passRow = document . createElement ( "td" ) ;
364- passRow . textContent = row . get ( "pass@1" ) . toFixed ( 1 ) ;
365- dataRow . appendChild ( passRow ) ;
366- winRateRow = document . createElement ( "td" ) ;
367- winRateRow . textContent = ( row . get ( 'win_rate' ) * 100 ) . toFixed ( 1 ) ;
368- winRateRow . style . backgroundColor = "#EEFFEE" ;
369- dataRow . appendChild ( winRateRow ) ;
358+
359+ // dpsRow = document.createElement("td");
360+ // dpsRow.textContent = row.get("dps").toFixed(1);
361+ // dataRow.appendChild(dpsRow);
362+ // passRow = document.createElement("td");
363+ // passRow.textContent = row.get("pass@1").toFixed(1);
364+ // dataRow.appendChild(passRow);
365+
366+ taskWinRateRow = document . createElement ( "td" ) ;
367+ taskWinRateRow . textContent = ( row . get ( 'task_win_rate' ) * 100 ) . toFixed ( 1 ) ;
368+ dataRow . appendChild ( taskWinRateRow ) ;
369+
370+
371+ modelWinRateRow = document . createElement ( "td" ) ;
372+ modelWinRateRow . textContent = ( row . get ( 'model_win_rate' ) * 100 ) . toFixed ( 1 ) ;
373+ modelWinRateRow . style . backgroundColor = "#EEFFEE" ;
374+ dataRow . appendChild ( modelWinRateRow ) ;
370375 tbody . appendChild ( dataRow ) ;
371376 } ) ;
372377 table . appendChild ( tbody ) ;
@@ -393,13 +398,13 @@ <h2 id="sponsor" class="text-nowrap mt-5">🤗 Acknowledgment</h2>
393398 const modelData = Array . from ( globalData ) . map ( ( [ modelId , value ] ) => ( {
394399 id : modelId ,
395400 name : modelId . includes ( '--' ) ? modelId . split ( '--' ) [ 1 ] : modelId ,
396- winrate : parseFloat ( value . get ( 'win_rate ' ) ) ,
401+ winrate : parseFloat ( value . get ( 'model_win_rate ' ) ) ,
397402 } ) ) ;
398403
399404 // sort by general winrate
400405 modelData . sort ( ( a , b ) => b . winrate - a . winrate ) ;
401406
402- const defaultDisplayNum = 6 ;
407+ const defaultDisplayNum = 7 ;
403408
404409 let selectedXModels = modelData . slice ( 0 , defaultDisplayNum ) . map ( m => m . id ) ;
405410 let selectedYModels = modelData . slice ( 0 , defaultDisplayNum ) . map ( m => m . id ) ;
0 commit comments