-
Notifications
You must be signed in to change notification settings - Fork 0
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Test PR 2 #2
base: main
Are you sure you want to change the base?
Test PR 2 #2
Conversation
/add-to-leaderboard --python 3.11 --appworld 0.1.3 --experiment-prefix temp_test |
Latest Leaderboard Entry{
"id": "5509efd4-3e4bf4e2",
"method": {
"name": "temp",
"tooltip": "temp"
},
"llm": {
"name": "temp",
"tooltip": "temp"
},
"url": "temp",
"date": "2024-11-27",
"test_normal": {
"all": {
"task_goal_completion": 9.5,
"scenario_goal_completion": 7.1,
"interactions": 3.32
},
"level 1": {
"task_goal_completion": 28.1,
"scenario_goal_completion": 21.1,
"interactions": 3.32
},
"level 2": {
"task_goal_completion": 0,
"scenario_goal_completion": 0,
"interactions": 3.32
},
"level 3": {
"task_goal_completion": 0,
"scenario_goal_completion": 0,
"interactions": 3.32
}
},
"test_challenge": {
"all": {
"task_goal_completion": 5.5,
"scenario_goal_completion": 4.3,
"interactions": 4.39
},
"level 1": {
"task_goal_completion": 26.4,
"scenario_goal_completion": 20.8,
"interactions": 4.39
},
"level 2": {
"task_goal_completion": 2.7,
"scenario_goal_completion": 2,
"interactions": 4.39
},
"level 3": {
"task_goal_completion": 0,
"scenario_goal_completion": 0,
"interactions": 4.39
}
}
} |
/remove-from-entry 5509efd4-3e4bf4e2 |
/remove-from-leaderboard 5509efd4-3e4bf4e2 |
/remove-from-leaderboard 5509efd4-3e4bf4e2 |
1 similar comment
/remove-from-leaderboard 5509efd4-3e4bf4e2 |
/remove-from-leaderboard --entry-id 5509efd4-3e4bf4e2 |
4 similar comments
/remove-from-leaderboard --entry-id 5509efd4-3e4bf4e2 |
/remove-from-leaderboard --entry-id 5509efd4-3e4bf4e2 |
/remove-from-leaderboard --entry-id 5509efd4-3e4bf4e2 |
/remove-from-leaderboard --entry-id 5509efd4-3e4bf4e2 |
Removed the following entry from the leaderboard: {
"id": "5509efd4-3e4bf4e2",
"method": {
"name": "temp",
"tooltip": "temp"
},
"llm": {
"name": "temp",
"tooltip": "temp"
},
"url": "temp",
"date": "2024-11-27",
"test_normal": {
"all": {
"task_goal_completion": 9.5,
"scenario_goal_completion": 7.1,
"interactions": 3.32
},
"level 1": {
"task_goal_completion": 28.1,
"scenario_goal_completion": 21.1,
"interactions": 3.32
},
"level 2": {
"task_goal_completion": 0,
"scenario_goal_completion": 0,
"interactions": 3.32
},
"level 3": {
"task_goal_completion": 0,
"scenario_goal_completion": 0,
"interactions": 3.32
}
},
"test_challenge": {
"all": {
"task_goal_completion": 5.5,
"scenario_goal_completion": 4.3,
"interactions": 4.39
},
"level 1": {
"task_goal_completion": 26.4,
"scenario_goal_completion": 20.8,
"interactions": 4.39
},
"level 2": {
"task_goal_completion": 2.7,
"scenario_goal_completion": 2,
"interactions": 4.39
},
"level 3": {
"task_goal_completion": 0,
"scenario_goal_completion": 0,
"interactions": 4.39
}
}
} |
/add-to-leaderboard --python 3.11 --appworld 0.1.3 temp_test |
1 similar comment
/add-to-leaderboard --python 3.11 --appworld 0.1.3 temp_test |
/add-to-leaderboard --python 3.11 --appworld 0.1.3 temp_test |
Added leaderboard entry{
"id": "5509efd4-3e4bf4e2",
"method": {
"name": "temp",
"tooltip": "temp"
},
"llm": {
"name": "temp",
"tooltip": "temp"
},
"url": "temp",
"date": "2024-11-29",
"test_normal": {
"all": {
"task_goal_completion": 9.5,
"scenario_goal_completion": 7.1,
"interactions": 3.32
},
"level 1": {
"task_goal_completion": 28.1,
"scenario_goal_completion": 21.1,
"interactions": 3.32
},
"level 2": {
"task_goal_completion": 0,
"scenario_goal_completion": 0,
"interactions": 3.32
},
"level 3": {
"task_goal_completion": 0,
"scenario_goal_completion": 0,
"interactions": 3.32
}
},
"test_challenge": {
"all": {
"task_goal_completion": 5.5,
"scenario_goal_completion": 4.3,
"interactions": 4.39
},
"level 1": {
"task_goal_completion": 26.4,
"scenario_goal_completion": 20.8,
"interactions": 4.39
},
"level 2": {
"task_goal_completion": 2.7,
"scenario_goal_completion": 2,
"interactions": 4.39
},
"level 3": {
"task_goal_completion": 0,
"scenario_goal_completion": 0,
"interactions": 4.39
}
}
} |
/remove-from-leaderboard 5509efd4-3e4bf4e2 |
2 similar comments
/remove-from-leaderboard 5509efd4-3e4bf4e2 |
/remove-from-leaderboard 5509efd4-3e4bf4e2 |
/add-to-leaderboard --python 3.11 --appworld 0.1.3 temp_test |
Added to leaderboard:[ |
/add-to-leaderboard --python 3.11 --appworld 0.1.3 temp_test |
Added to leaderboard:[
{
"id": "5509efd4-3e4bf4e2",
"method": {
"name": "temp",
"tooltip": "temp"
},
"llm": {
"name": "temp",
"tooltip": "temp"
},
"url": "temp",
"date": "2024-11-29",
"test_normal": {
"all": {
"task_goal_completion": 9.5,
"scenario_goal_completion": 7.1,
"interactions": 3.32
},
"level 1": {
"task_goal_completion": 28.1,
"scenario_goal_completion": 21.1,
"interactions": 3.32
},
"level 2": {
"task_goal_completion": 0.0,
"scenario_goal_completion": 0.0,
"interactions": 3.32
},
"level 3": {
"task_goal_completion": 0.0,
"scenario_goal_completion": 0.0,
"interactions": 3.32
}
},
"test_challenge": {
"all": {
"task_goal_completion": 5.5,
"scenario_goal_completion": 4.3,
"interactions": 4.39
},
"level 1": {
"task_goal_completion": 26.4,
"scenario_goal_completion": 20.8,
"interactions": 4.39
},
"level 2": {
"task_goal_completion": 2.7,
"scenario_goal_completion": 2.0,
"interactions": 4.39
},
"level 3": {
"task_goal_completion": 0.0,
"scenario_goal_completion": 0.0,
"interactions": 4.39
}
}
}
] |
/add-to-leaderboard --python 3.11 --appworld 0.1.3 temp_test |
/add-to-leaderboard --python 3.11 --appworld 0.1.3 temp_test |
/add-to-leaderboard --python 3.11 --appworld 0.1.3 temp_test |
Added to leaderboard:[
{
"id": "5509efd4-3e4bf4e2",
"method": {
"name": "temp",
"tooltip": "temp"
},
"llm": {
"name": "temp",
"tooltip": "temp"
},
"url": "temp",
"date": "2024-11-29",
"test_normal": {
"all": {
"task_goal_completion": 9.5,
"scenario_goal_completion": 7.1,
"interactions": 3.32
},
"level 1": {
"task_goal_completion": 28.1,
"scenario_goal_completion": 21.1,
"interactions": 3.32
},
"level 2": {
"task_goal_completion": 0.0,
"scenario_goal_completion": 0.0,
"interactions": 3.32
},
"level 3": {
"task_goal_completion": 0.0,
"scenario_goal_completion": 0.0,
"interactions": 3.32
}
},
"test_challenge": {
"all": {
"task_goal_completion": 5.5,
"scenario_goal_completion": 4.3,
"interactions": 4.39
},
"level 1": {
"task_goal_completion": 26.4,
"scenario_goal_completion": 20.8,
"interactions": 4.39
},
"level 2": {
"task_goal_completion": 2.7,
"scenario_goal_completion": 2.0,
"interactions": 4.39
},
"level 3": {
"task_goal_completion": 0.0,
"scenario_goal_completion": 0.0,
"interactions": 4.39
}
}
}
] |
/add-to-leaderboard --python 3.11 --appworld 0.1.3 temp_test |
Added to leaderboard:[
{
"id": "5509efd4-3e4bf4e2",
"method": {
"name": "temp",
"tooltip": "temp"
},
"llm": {
"name": "temp",
"tooltip": "temp"
},
"url": "temp",
"date": "2024-11-29",
"test_normal": {
"all": {
"task_goal_completion": 9.5,
"scenario_goal_completion": 7.1,
"interactions": 3.32
},
"level 1": {
"task_goal_completion": 28.1,
"scenario_goal_completion": 21.1,
"interactions": 3.32
},
"level 2": {
"task_goal_completion": 0.0,
"scenario_goal_completion": 0.0,
"interactions": 3.32
},
"level 3": {
"task_goal_completion": 0.0,
"scenario_goal_completion": 0.0,
"interactions": 3.32
}
},
"test_challenge": {
"all": {
"task_goal_completion": 5.5,
"scenario_goal_completion": 4.3,
"interactions": 4.39
},
"level 1": {
"task_goal_completion": 26.4,
"scenario_goal_completion": 20.8,
"interactions": 4.39
},
"level 2": {
"task_goal_completion": 2.7,
"scenario_goal_completion": 2.0,
"interactions": 4.39
},
"level 3": {
"task_goal_completion": 0.0,
"scenario_goal_completion": 0.0,
"interactions": 4.39
}
}
}
] |
/add-to-leaderboard --python 3.11 --appworld 0.1.3 temp_test |
Added to leaderboard:[
{
"id": "5509efd4-3e4bf4e2",
"method": {
"name": "temp",
"tooltip": "temp"
},
"llm": {
"name": "temp",
"tooltip": "temp"
},
"url": "temp",
"date": "2024-11-29",
"test_normal": {
"all": {
"task_goal_completion": 9.5,
"scenario_goal_completion": 7.1,
"interactions": 3.32
},
"level 1": {
"task_goal_completion": 28.1,
"scenario_goal_completion": 21.1,
"interactions": 3.32
},
"level 2": {
"task_goal_completion": 0.0,
"scenario_goal_completion": 0.0,
"interactions": 3.32
},
"level 3": {
"task_goal_completion": 0.0,
"scenario_goal_completion": 0.0,
"interactions": 3.32
}
},
"test_challenge": {
"all": {
"task_goal_completion": 5.5,
"scenario_goal_completion": 4.3,
"interactions": 4.39
},
"level 1": {
"task_goal_completion": 26.4,
"scenario_goal_completion": 20.8,
"interactions": 4.39
},
"level 2": {
"task_goal_completion": 2.7,
"scenario_goal_completion": 2.0,
"interactions": 4.39
},
"level 3": {
"task_goal_completion": 0.0,
"scenario_goal_completion": 0.0,
"interactions": 4.39
}
}
}
] |
No description provided.