-
Notifications
You must be signed in to change notification settings - Fork 998
Commit
…taset; Handle vLLM Benign Error (#540) In this PR: 1. **Support Multi-Model Multi-Category Generation**: - The `openfunctions_evaluation.py` can now take a list of model names and a list of test categories as command line input. - Partially address #501. 2. **Handling vLLM's Error**: - A benign error would occur during the cleanup phase after completing a generation task, causing the pipeline to fail despite generating model results. This issue stems from vLLM and is outside our control. [See this issue](vllm-project/vllm#6145) from the vLLM repo. - This is annoying because when users attempt category-specific generation for locally-hosted models (as supported in #512), only the first category result for the first model is generated since the error occurs immediately after. - To improve the user experience, we now combine all selected test categories into one task and submit that single task to vLLM, splitting the results afterwards. - Note: If multiple locally-hosted models are queued for inference, only the tasks of the first model will complete. Subsequent tasks will still fail due to the cleanup phase error from the first model. Therefore, we recommend running the inference command for one model at a time until vLLM rolls out the fix. 3. **Adding Index to Dataset**: - Each test file and possible_answer file now includes an index to help match entries. This PR **will not** affect the leaderboard score.
- Loading branch information
There are no files selected for viewing
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,50 +1,50 @@ | ||
{"validateUserInput":{"inputField":["userInputField"],"isComplete":[true]}} | ||
{"getActiveDataEntries":{"listElement":["listElement"],"attribute":["data-active", ""],"value":[true,""]}} | ||
{"extractLastTransactionId":{"filepath":["/var/log/db.log"],"status":[["completed","failed"]],"encoding":["utf-8"],"processFunction":["processFunction"]}} | ||
{"submitAtCoordinate":{"action":["submit"],"formId":["loginForm"],"coordinates":[[0.6,0.3]]}} | ||
{"emailFormatValidator":{"email":["example@domain.com"],"domain":["domain.com"]}} | ||
{"manageReactState":{"store":[{"initialState":["initialStateObject"],"reducers":["reducersMap"],"middlewares":[["loggerMiddleware"]],"enhancers":[["applyMiddleware('myMiddleWare')"]]}],"context":["React.createContext()"],"hooks":[{"useStateSelector":"useStateSelectorHook","useDispatchAction":"useDispatchActionHook"}]}} | ||
{"mapTransitions":{"category":["transition"],"limit":[4.0]}} | ||
{"getNextKeyValues":{"ctx":["dataAnalysisContext"],"currentKey":["userId"]}} | ||
{"doesEmailInputExist":{"formElem":["emailForm"],"inputName":["emailAddress"]}} | ||
{"validateApiResponse":{"jsonPayload":["responseData"],"keyToCheck":["expectedKey"],"processingCallback":["processKeyFunction"]}} | ||
{"fetchSalesDepartmentRecords":{"databaseName":["employeeRecords"],"queryFunction":["getSales"]}} | ||
{"prioritizeAndSort":{"items":["myItemList"],"priorityStatus":["urgent"],"ascending":[true]}} | ||
{"performDataFetch":{"apiEndpoint":["https://api.example.com/data"],"requestConfig":[{"method":["GET"]}],"expectedResponse":[{"key":["value"]}],"handleErrors":[true]}} | ||
{"DynamicChartGenerator":{"userData":[["userDataArray"]],"scalingFactor":[3.0],"dashboard":["dashboardElement"],"options":["", {}]}} | ||
{"chartDataAccessorFactory":{"chart":[{"nm":["BarChart"],"mn":["chartModule"]}],"library":["visualizationLibrary"],"configObject":["config"]}} | ||
{"ChartSeriesGenerator":{"labels":["axisLabelsArray"],"data":["dataPointsArray"],"color":["defaultColor"],"chartLayout":["chartLayoutObject"]}} | ||
{"rotateVertices":{"vertices":[[10.0,15.0],[20.0,25.0]],"pivot":[[12.0,17.0]],"angle":[30.0]}} | ||
{"generateNotificationHandler":{"app":["app"],"priorityLevel":[3],"messagingService":["messagingSvc"],"notificationType":[2]}} | ||
{"calculateFinalVelocity":{"time":[5.0],"gravity":[9.81],"initialVelocity":[0.0]}} | ||
{"configureShaderMaterial":{"property":["materialProps"],"textures":["textureList"],"object3D":["meshObject"]}} | ||
{"buttonAddClickHandler":{"element":["myButton"],"callback":["handleButtonClick"],"options":[{"stopPropagation":[true]}]}} | ||
{"findProductById":{"products":[["Product A","Product B","Product C"]],"id":[123]}} | ||
{"resetStateProperty":{"stateProperty":["userSession"]}} | ||
{"createAuthToken":{"username":["johndoe"],"validity":[3600],"options":[{"issuer":["myapp.net"],"role":["admin"],"algorithm":["HS256"]}]}} | ||
{"getUniqueSorted":{"array":[[3,1,2,1,4,3]]}} | ||
{"trackSubmitWithValidation":{"obj":["formHandler"],"validationFlags":[["isRequired","isValidEmail"]]}} | ||
{"contentUpdater":{"elementID":["contentBox"],"newContent":["Hello World"],"action":["update"]}} | ||
{"validateReactProp":{"obj":["serviceProvider"],"componentName":["UserProfile"]}} | ||
{"filterBooksByAuthor":{"library":[["bookA","bookB","bookC"]],"author":["J.K. Rowling"]}} | ||
{"EventScheduler":{"events":[{"setupStage": ["setupStageFunction"],"cleanupStage": ["cleanupStageFunction"]}],"concurrencyLimit":[3.0]}} | ||
{"setText":{"newText":["Hello, World!"],"start":[5.0],"length":[7.0]}} | ||
{"transformAllDecoratorsOfDeclaration":{"node":["myNode"],"container":["myContainer"]}} | ||
{"pollQueue":{"queue":["fileWatchQueue"],"pollingInterval":[500.0],"pollIndex":[0.0],"chunkSize":[10.0]}} | ||
{"emitNewLineBeforeLeadingComments":{"lineMap":["tsLineMap"],"writer":["tsWriter"],"node":[42]}} | ||
{"forEachType":{"type":["unionTypeObj"],"f":["processType"]}} | ||
{"areDeclarationFlagsIdentical":{"left":["parameterObjects"],"right":["variableDeclarationObject"]}} | ||
{"updateBreak":{"node":["breakNode"],"label":["loopEnd"]}} | ||
{"addInitializedPropertyStatements":{"statements":["shapeStatements"],"property":[["width","height"],["height","width"]],"receiver":["shape"]}} | ||
{"getDirectoryToWatchFromFailedLookupLocationDirectory":{"dir":["/projects/myApp/node_modules/react"],"dirPath":["/projects/myApp/node_modules/react"]}} | ||
{"maybeAddJsSyntheticRestParameter":{"declaration":["funcDeclaration"],"parameters":["funcParameters"]}} | ||
{"assignOwnDefaults":{"objectValue":[12.0],"sourceValue":[10.0],"key":["maxItems"],"object":[{}]}} | ||
{"queue":{"worker":["myWorkerFunction"],"concurrency":[5.0],"payload":["", 0.0]}} | ||
{"B":{"t":[5.0]}} | ||
{"invokeCallback":{"callback":["processResult"],"error":["null"],"value":["Operation successful"]}} | ||
{"skipThrough":{"node":["currentNode"],"st":["nodeState"],"c":["processNode"]}} | ||
{"Sde":{"t":["https://github.com/yarnpkg/berry"],"e":[{"startingCwd":["/home/user/projects"]}]}} | ||
{"vOe":{"r":["packageInfo"],"e":["version"],"t":["1.2.3"]}} | ||
{"sTe":{"r":["2023-04-01"],"e":["2023-04-15"],"t":["days"]}} | ||
{"updateDOMListeners":{"oldVnode":["oldVirtualNode"],"vnode":["newVirtualNode"]}} | ||
{"convertEnumeratedValue":{"key":["contenteditable"],"value":["plaintext-only"]}} | ||
{"id": "javascript_0", "ground_truth": {"validateUserInput": {"inputField": ["userInputField"], "isComplete": [true]}}} | ||
{"id": "javascript_1", "ground_truth": {"getActiveDataEntries": {"listElement": ["listElement"], "attribute": ["data-active", ""], "value": [true, ""]}}} | ||
{"id": "javascript_2", "ground_truth": {"extractLastTransactionId": {"filepath": ["/var/log/db.log"], "status": [["completed", "failed"]], "encoding": ["utf-8"], "processFunction": ["processFunction"]}}} | ||
{"id": "javascript_3", "ground_truth": {"submitAtCoordinate": {"action": ["submit"], "formId": ["loginForm"], "coordinates": [[0.6, 0.3]]}}} | ||
{"id": "javascript_4", "ground_truth": {"emailFormatValidator": {"email": ["example@domain.com"], "domain": ["domain.com"]}}} | ||
{"id": "javascript_5", "ground_truth": {"manageReactState": {"store": [{"initialState": ["initialStateObject"], "reducers": ["reducersMap"], "middlewares": [["loggerMiddleware"]], "enhancers": [["applyMiddleware('myMiddleWare')"]]}], "context": ["React.createContext()"], "hooks": [{"useStateSelector": "useStateSelectorHook", "useDispatchAction": "useDispatchActionHook"}]}}} | ||
{"id": "javascript_6", "ground_truth": {"mapTransitions": {"category": ["transition"], "limit": [4.0]}}} | ||
{"id": "javascript_7", "ground_truth": {"getNextKeyValues": {"ctx": ["dataAnalysisContext"], "currentKey": ["userId"]}}} | ||
{"id": "javascript_8", "ground_truth": {"doesEmailInputExist": {"formElem": ["emailForm"], "inputName": ["emailAddress"]}}} | ||
{"id": "javascript_9", "ground_truth": {"validateApiResponse": {"jsonPayload": ["responseData"], "keyToCheck": ["expectedKey"], "processingCallback": ["processKeyFunction"]}}} | ||
{"id": "javascript_10", "ground_truth": {"fetchSalesDepartmentRecords": {"databaseName": ["employeeRecords"], "queryFunction": ["getSales"]}}} | ||
{"id": "javascript_11", "ground_truth": {"prioritizeAndSort": {"items": ["myItemList"], "priorityStatus": ["urgent"], "ascending": [true]}}} | ||
{"id": "javascript_12", "ground_truth": {"performDataFetch": {"apiEndpoint": ["https://api.example.com/data"], "requestConfig": [{"method": ["GET"]}], "expectedResponse": [{"key": ["value"]}], "handleErrors": [true]}}} | ||
{"id": "javascript_13", "ground_truth": {"DynamicChartGenerator": {"userData": [["userDataArray"]], "scalingFactor": [3.0], "dashboard": ["dashboardElement"], "options": ["", {}]}}} | ||
{"id": "javascript_14", "ground_truth": {"chartDataAccessorFactory": {"chart": [{"nm": ["BarChart"], "mn": ["chartModule"]}], "library": ["visualizationLibrary"], "configObject": ["config"]}}} | ||
{"id": "javascript_15", "ground_truth": {"ChartSeriesGenerator": {"labels": ["axisLabelsArray"], "data": ["dataPointsArray"], "color": ["defaultColor"], "chartLayout": ["chartLayoutObject"]}}} | ||
{"id": "javascript_16", "ground_truth": {"rotateVertices": {"vertices": [[10.0, 15.0], [20.0, 25.0]], "pivot": [[12.0, 17.0]], "angle": [30.0]}}} | ||
{"id": "javascript_17", "ground_truth": {"generateNotificationHandler": {"app": ["app"], "priorityLevel": [3], "messagingService": ["messagingSvc"], "notificationType": [2]}}} | ||
{"id": "javascript_18", "ground_truth": {"calculateFinalVelocity": {"time": [5.0], "gravity": [9.81], "initialVelocity": [0.0]}}} | ||
{"id": "javascript_19", "ground_truth": {"configureShaderMaterial": {"property": ["materialProps"], "textures": ["textureList"], "object3D": ["meshObject"]}}} | ||
{"id": "javascript_20", "ground_truth": {"buttonAddClickHandler": {"element": ["myButton"], "callback": ["handleButtonClick"], "options": [{"stopPropagation": [true]}]}}} | ||
{"id": "javascript_21", "ground_truth": {"findProductById": {"products": [["Product A", "Product B", "Product C"]], "id": [123]}}} | ||
{"id": "javascript_22", "ground_truth": {"resetStateProperty": {"stateProperty": ["userSession"]}}} | ||
{"id": "javascript_23", "ground_truth": {"createAuthToken": {"username": ["johndoe"], "validity": [3600], "options": [{"issuer": ["myapp.net"], "role": ["admin"], "algorithm": ["HS256"]}]}}} | ||
{"id": "javascript_24", "ground_truth": {"getUniqueSorted": {"array": [[3, 1, 2, 1, 4, 3]]}}} | ||
{"id": "javascript_25", "ground_truth": {"trackSubmitWithValidation": {"obj": ["formHandler"], "validationFlags": [["isRequired", "isValidEmail"]]}}} | ||
{"id": "javascript_26", "ground_truth": {"contentUpdater": {"elementID": ["contentBox"], "newContent": ["Hello World"], "action": ["update"]}}} | ||
{"id": "javascript_27", "ground_truth": {"validateReactProp": {"obj": ["serviceProvider"], "componentName": ["UserProfile"]}}} | ||
{"id": "javascript_28", "ground_truth": {"filterBooksByAuthor": {"library": [["bookA", "bookB", "bookC"]], "author": ["J.K. Rowling"]}}} | ||
{"id": "javascript_29", "ground_truth": {"EventScheduler": {"events": [{"setupStage": ["setupStageFunction"], "cleanupStage": ["cleanupStageFunction"]}], "concurrencyLimit": [3.0]}}} | ||
{"id": "javascript_30", "ground_truth": {"setText": {"newText": ["Hello, World!"], "start": [5.0], "length": [7.0]}}} | ||
{"id": "javascript_31", "ground_truth": {"transformAllDecoratorsOfDeclaration": {"node": ["myNode"], "container": ["myContainer"]}}} | ||
{"id": "javascript_32", "ground_truth": {"pollQueue": {"queue": ["fileWatchQueue"], "pollingInterval": [500.0], "pollIndex": [0.0], "chunkSize": [10.0]}}} | ||
{"id": "javascript_33", "ground_truth": {"emitNewLineBeforeLeadingComments": {"lineMap": ["tsLineMap"], "writer": ["tsWriter"], "node": [42]}}} | ||
{"id": "javascript_34", "ground_truth": {"forEachType": {"type": ["unionTypeObj"], "f": ["processType"]}}} | ||
{"id": "javascript_35", "ground_truth": {"areDeclarationFlagsIdentical": {"left": ["parameterObjects"], "right": ["variableDeclarationObject"]}}} | ||
{"id": "javascript_36", "ground_truth": {"updateBreak": {"node": ["breakNode"], "label": ["loopEnd"]}}} | ||
{"id": "javascript_37", "ground_truth": {"addInitializedPropertyStatements": {"statements": ["shapeStatements"], "property": [["width", "height"], ["height", "width"]], "receiver": ["shape"]}}} | ||
{"id": "javascript_38", "ground_truth": {"getDirectoryToWatchFromFailedLookupLocationDirectory": {"dir": ["/projects/myApp/node_modules/react"], "dirPath": ["/projects/myApp/node_modules/react"]}}} | ||
{"id": "javascript_39", "ground_truth": {"maybeAddJsSyntheticRestParameter": {"declaration": ["funcDeclaration"], "parameters": ["funcParameters"]}}} | ||
{"id": "javascript_40", "ground_truth": {"assignOwnDefaults": {"objectValue": [12.0], "sourceValue": [10.0], "key": ["maxItems"], "object": [{}]}}} | ||
{"id": "javascript_41", "ground_truth": {"queue": {"worker": ["myWorkerFunction"], "concurrency": [5.0], "payload": ["", 0.0]}}} | ||
{"id": "javascript_42", "ground_truth": {"B": {"t": [5.0]}}} | ||
{"id": "javascript_43", "ground_truth": {"invokeCallback": {"callback": ["processResult"], "error": ["null"], "value": ["Operation successful"]}}} | ||
{"id": "javascript_44", "ground_truth": {"skipThrough": {"node": ["currentNode"], "st": ["nodeState"], "c": ["processNode"]}}} | ||
{"id": "javascript_45", "ground_truth": {"Sde": {"t": ["https://github.com/yarnpkg/berry"], "e": [{"startingCwd": ["/home/user/projects"]}]}}} | ||
{"id": "javascript_46", "ground_truth": {"vOe": {"r": ["packageInfo"], "e": ["version"], "t": ["1.2.3"]}}} | ||
{"id": "javascript_47", "ground_truth": {"sTe": {"r": ["2023-04-01"], "e": ["2023-04-15"], "t": ["days"]}}} | ||
{"id": "javascript_48", "ground_truth": {"updateDOMListeners": {"oldVnode": ["oldVirtualNode"], "vnode": ["newVirtualNode"]}}} | ||
{"id": "javascript_49", "ground_truth": {"convertEnumeratedValue": {"key": ["contenteditable"], "value": ["plaintext-only"]}}} |