Significant-Gravitas · waynehamadi · Aug 9, 2023 · Aug 8, 2023 · Aug 8, 2023 · Aug 8, 2023
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -187,6 +187,14 @@ jobs:
             poetry run playwright install
             uvicorn beebot.initiator.api:create_app --reload &
             prefix="poetry run "
+          elif [ "$AGENT_NAME" == "PolyGPT" ]; then
+            cp .env.template .env
+            curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.1/install.sh | bash
+            export NVM_DIR=$HOME/.nvm
+            source $NVM_DIR/nvm.sh
+            nvm install && nvm use
+            yarn install
+            export NODE_TLS_REJECT_UNAUTHORIZED=0
           else
             echo "Unknown agent name: $AGENT_NAME"
             exit 1

diff --git a/.gitmodules b/.gitmodules
@@ -30,7 +30,7 @@
 	path = agbenchmark/challenges
 	url = https://github.com/SilenNaihin/agbenchmark_challenges.git
 	branch = main
-[submodule "agent/PolyGPT"]
-	path = agent/PolyGPT
-	url = https://github.com/polywrap/PolyGPT.git
-	branch = nerfzael-agent-protocol
+[submodule "agent/PolyGPT"]
+	path = agent/PolyGPT
+	url = https://github.com/polywrap/PolyGPT.git
+	branch = nerfzael-use-local-wrap-library
diff --git a/agent/PolyGPT b/agent/PolyGPT
diff --git a/reports/PolyGPT/folder1_08-08-20-39/report.json b/reports/PolyGPT/folder1_08-08-20-39/report.json
diff --git a/reports/PolyGPT/folder2_08-08-21-39/report.json b/reports/PolyGPT/folder2_08-08-21-39/report.json
diff --git a/reports/PolyGPT/folder3_08-09-09-23/radar_chart.png b/reports/PolyGPT/folder3_08-09-09-23/radar_chart.png
diff --git a/reports/PolyGPT/folder3_08-09-09-23/report.json b/reports/PolyGPT/folder3_08-09-09-23/report.json
diff --git a/reports/PolyGPT/folder4_08-09-14-34/radar_chart.png b/reports/PolyGPT/folder4_08-09-14-34/radar_chart.png
diff --git a/reports/PolyGPT/folder4_08-09-14-34/report.json b/reports/PolyGPT/folder4_08-09-14-34/report.json
diff --git a/reports/PolyGPT/folder5_08-09-15-11/report.json b/reports/PolyGPT/folder5_08-09-15-11/report.json
diff --git a/reports/PolyGPT/folder6_08-09-16-57/radar_chart.png b/reports/PolyGPT/folder6_08-09-16-57/radar_chart.png
diff --git a/reports/PolyGPT/folder6_08-09-16-57/report.json b/reports/PolyGPT/folder6_08-09-16-57/report.json
diff --git a/reports/PolyGPT/folder7_08-09-17-36/radar_chart.png b/reports/PolyGPT/folder7_08-09-17-36/radar_chart.png
diff --git a/reports/PolyGPT/folder7_08-09-17-36/report.json b/reports/PolyGPT/folder7_08-09-17-36/report.json
diff --git a/reports/PolyGPT/regression_tests.json b/reports/PolyGPT/regression_tests.json
@@ -0,0 +1,22 @@
+{
+    "TestAgentProtocol_CreateAgentTask": {
+        "difficulty": "interface",
+        "data_path": "agbenchmark/challenges/interface/agent_protocol_suite/1_create_agent_task/data.json"
+    },
+    "TestAgentProtocol_ExecuteAgentTaskStep": {
+        "difficulty": "interface",
+        "data_path": "agbenchmark/challenges/interface/agent_protocol_suite/5_execute_agent_task_step/data.json"
+    },
+    "TestAgentProtocol_GetAgentTask": {
+        "difficulty": "interface",
+        "data_path": "agbenchmark/challenges/interface/agent_protocol_suite/3_get_agent_task/data.json"
+    },
+    "TestAgentProtocol_ListAgentTaskSteps": {
+        "difficulty": "interface",
+        "data_path": "agbenchmark/challenges/interface/agent_protocol_suite/4_list_agent_tasks_steps/data.json"
+    },
+    "TestAgentProtocol_ListAgentTasksIds": {
+        "difficulty": "interface",
+        "data_path": "agbenchmark/challenges/interface/agent_protocol_suite/2_list_agent_tasks_ids/data.json"
+    }
+}
diff --git a/reports/PolyGPT/success_rate.json b/reports/PolyGPT/success_rate.json
@@ -0,0 +1,242 @@
+{
+    "TestAdaptLink": [
+        false,
+        false,
+        false,
+        false
+    ],
+    "TestAdaptSimpleTypoWithGuidance": [
+        false,
+        false,
+        false,
+        false
+    ],
+    "TestAdaptTeslaRevenue": [
+        false,
+        false,
+        false,
+        false
+    ],
+    "TestAgentProtocol_CreateAgentTask": [
+        true,
+        true,
+        true,
+        true
+    ],
+    "TestAgentProtocol_ExecuteAgentTaskStep": [
+        true,
+        true,
+        true,
+        true
+    ],
+    "TestAgentProtocol_GetAgentTask": [
+        true,
+        true,
+        true,
+        true
+    ],
+    "TestAgentProtocol_ListAgentTaskSteps": [
+        true,
+        true,
+        true,
+        true
+    ],
+    "TestAgentProtocol_ListAgentTasksIds": [
+        true,
+        true,
+        true,
+        true
+    ],
+    "TestBasicContentGen": [
+        false,
+        false,
+        false,
+        false
+    ],
+    "TestBasicMemory": [
+        false,
+        false,
+        false,
+        false
+    ],
+    "TestBasicRetrieval": [
+        false,
+        false,
+        false,
+        false
+    ],
+    "TestDebugMultipleTypo": [
+        false,
+        false,
+        false,
+        false
+    ],
+    "TestDebugSimpleTypoWithGuidance": [
+        false,
+        false,
+        false,
+        false
+    ],
+    "TestDebugSimpleTypoWithoutGuidance": [
+        false,
+        false,
+        false,
+        false
+    ],
+    "TestFunctionCodeGeneration": [
+        false,
+        false,
+        false,
+        false
+    ],
+    "TestGoalDivergence": [
+        false,
+        false,
+        false,
+        false
+    ],
+    "TestGoalLoss_Advanced": [
+        false,
+        false,
+        false,
+        false
+    ],
+    "TestGoalLoss_Hard": [
+        false,
+        false,
+        false,
+        false
+    ],
+    "TestGoalLoss_Medium": [
+        false,
+        false,
+        false,
+        false
+    ],
+    "TestGoalLoss_Simple": [
+        false,
+        false,
+        false,
+        false
+    ],
+    "TestInstructionFollowing": [
+        false,
+        false,
+        false,
+        false
+    ],
+    "TestPasswordGenerator_Easy": [
+        false,
+        false,
+        false,
+        true
+    ],
+    "TestPlanCreation": [
+        false,
+        false,
+        false,
+        true
+    ],
+    "TestProductAdvisor_GamingMonitor": [
+        false,
+        false,
+        false,
+        false
+    ],
+    "TestReadFile": [
+        false,
+        false,
+        false,
+        true
+    ],
+    "TestRememberMultipleIds": [
+        false,
+        false,
+        false,
+        false
+    ],
+    "TestRememberMultiplePhrasesWithNoise": [
+        false,
+        false,
+        false,
+        false
+    ],
+    "TestRememberMultipleWithNoise": [
+        false,
+        false,
+        false,
+        false
+    ],
+    "TestRetrieval3": [
+        false,
+        false,
+        false,
+        false
+    ],
+    "TestReturnCode_Modify": [
+        false,
+        false,
+        false,
+        false
+    ],
+    "TestReturnCode_Simple": [
+        false,
+        false,
+        false,
+        false
+    ],
+    "TestReturnCode_Tests": [
+        false,
+        false,
+        false,
+        false
+    ],
+    "TestReturnCode_Write": [
+        false,
+        false,
+        false,
+        false
+    ],
+    "TestRevenueRetrieval_1.0": [
+        false,
+        false,
+        false,
+        false
+    ],
+    "TestRevenueRetrieval_1.1": [
+        false,
+        false,
+        false,
+        false
+    ],
+    "TestRevenueRetrieval_1.2": [
+        false,
+        false,
+        false,
+        false
+    ],
+    "TestSearch": [
+        false,
+        false,
+        false,
+        false
+    ],
+    "TestThreeSum": [
+        false,
+        false,
+        false,
+        false
+    ],
+    "TestWriteFile": [
+        false,
+        false,
+        false,
+        true
+    ],
+    "TestWritingCLI_FileOrganizer": [
+        false,
+        false,
+        false,
+        false
+    ]
+}
diff --git a/reports/combined_charts/run31/bar_chart.png b/reports/combined_charts/run31/bar_chart.png
diff --git a/reports/combined_charts/run31/radar_chart.png b/reports/combined_charts/run31/radar_chart.png
diff --git a/reports/combined_charts/run31/run_info.json b/reports/combined_charts/run31/run_info.json
@@ -1 +1 @@
-{"mini-agi": "2023-07-31-19:38", "BabyAGI": "2023-07-31-19:07", "Auto-GPT": "2023-07-31-19:06", "smol-developer": "2023-07-31-16:11", "gpt-engineer": "2023-07-31-19:38"}
+{"mini-agi": "2023-07-31-19:38", "BabyAGI": "2023-07-31-19:41", "Auto-GPT": "2023-07-31-19:39", "smol-developer": "2023-07-31-16:11", "gpt-engineer": "2023-07-31-19:38"}
diff --git a/reports/combined_charts/run32/run_info.json b/reports/combined_charts/run32/run_info.json
@@ -1 +1 @@
-{"mini-agi": "2023-07-31-19:38", "BabyAGI": "2023-07-31-19:07", "Auto-GPT": "2023-07-31-19:39", "smol-developer": "2023-07-31-19:38", "gpt-engineer": "2023-07-31-19:38"}
+{"mini-agi": "2023-07-31-19:38", "BabyAGI": "2023-07-31-19:07", "Auto-GPT": "2023-07-31-19:06", "PolyGPT": "2023-08-09-09:23", "smol-developer": "2023-07-31-19:38", "gpt-engineer": "2023-07-31-19:38"}
diff --git a/reports/combined_charts/run33/run_info.json b/reports/combined_charts/run33/run_info.json
@@ -1 +1 @@
-{"mini-agi": "2023-07-31-19:38", "BabyAGI": "2023-07-31-19:07", "Auto-GPT": "2023-07-31-19:39", "smol-developer": "2023-07-31-16:11", "gpt-engineer": "2023-07-31-19:38"}
+{"mini-agi": "2023-07-31-19:38", "BabyAGI": "2023-07-31-19:07", "Auto-GPT": "2023-07-31-19:06", "PolyGPT": "2023-08-09-14:34", "smol-developer": "2023-07-31-19:05", "gpt-engineer": "2023-07-31-19:38"}
diff --git a/reports/combined_charts/run34/run_info.json b/reports/combined_charts/run34/run_info.json
@@ -0,0 +1 @@
+{"mini-agi": "2023-07-31-19:38", "BabyAGI": "2023-07-31-19:07", "Auto-GPT": "2023-07-31-19:06", "PolyGPT": "2023-08-09-16:57", "smol-developer": "2023-07-31-19:05", "gpt-engineer": "2023-07-31-19:38"}
+1 −1		.env.template
+0 −0		agbenchmark/__init__.py
+19 −0		agbenchmark/benchmarks.py
+4 −0		agbenchmark/config.json
+11 −6		src/agent.ts
+4 −2		src/cli.ts
+6 −1		src/openai.ts
+3 −0		src/utils/isFileSystemUri.ts
+8 −1		src/wrap/client.ts
+14 −6		src/wrap/library.ts
+1 −1		wrap-library/index.json
+3 −0		wrap-library/user.graphql
+14 −0		wrap-library/user.json
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		{"mini-agi": "2023-07-31-19:38", "BabyAGI": "2023-07-31-19:07", "Auto-GPT": "2023-07-31-19:06", "smol-developer": "2023-07-31-16:11", "gpt-engineer": "2023-07-31-19:38"}
		{"mini-agi": "2023-07-31-19:38", "BabyAGI": "2023-07-31-19:41", "Auto-GPT": "2023-07-31-19:39", "smol-developer": "2023-07-31-16:11", "gpt-engineer": "2023-07-31-19:38"}
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		{"mini-agi": "2023-07-31-19:38", "BabyAGI": "2023-07-31-19:07", "Auto-GPT": "2023-07-31-19:06", "PolyGPT": "2023-08-09-16:57", "smol-developer": "2023-07-31-19:05", "gpt-engineer": "2023-07-31-19:38"}