From 5234ea560bea9529b5b984b3c3474aca62064f1e Mon Sep 17 00:00:00 2001 From: Merwane Hamadi Date: Sat, 15 Jul 2023 13:31:01 -0700 Subject: [PATCH] Add three sum challenge Signed-off-by: Merwane Hamadi --- .github/workflows/ci.yml | 9 ++++-- agbenchmark/challenges/code/d1/data.json | 2 +- agbenchmark/challenges/code/d2/data.json | 2 +- agbenchmark/challenges/code/d4/data.json | 2 +- .../code/d5/artifacts_out/__init__.py | 0 .../challenges/code/d5/artifacts_out/code.py | 23 ++++++++++++++ .../challenges/code/d5/custom_python/test.py | 31 +++++++++++++++++++ agbenchmark/challenges/code/d5/data.json | 18 +++++++++++ agent/gpt-engineer | 2 +- 9 files changed, 82 insertions(+), 7 deletions(-) create mode 100644 agbenchmark/challenges/code/d5/artifacts_out/__init__.py create mode 100644 agbenchmark/challenges/code/d5/artifacts_out/code.py create mode 100644 agbenchmark/challenges/code/d5/custom_python/test.py create mode 100644 agbenchmark/challenges/code/d5/data.json diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b7864db6adf..dbb0a6acebe 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -64,7 +64,7 @@ jobs: if: success() || failure() tests: - name: ${{ matrix.agent-name }} + name: "${{ matrix.agent-name }} (Cache: ${{ matrix.cache-enabled }})" runs-on: ubuntu-latest timeout-minutes: 10 env: @@ -77,6 +77,8 @@ jobs: - "smol-developer" - "Auto-GPT" - "mini-agi" + cache-enabled: [ true, false ] + steps: - name: Checkout repository uses: actions/checkout@v3 @@ -156,7 +158,7 @@ jobs: PROMPT_USER: false # For mini-agi. TODO: Remove this once mini-agi follows the standards. HELICONE_API_KEY: ${{ secrets.HELICONE_API_KEY }} REQUESTS_CA_BUNDLE: /etc/ssl/certs/ca-certificates.crt - HELICONE_CACHE_ENABLED: true + HELICONE_CACHE_ENABLED: ${{ matrix.cache-enabled }} HELICONE_PROPERTY_AGENT: ${{ matrix.agent-name }} @@ -164,5 +166,6 @@ jobs: if: always() uses: actions/upload-artifact@v3 with: - name: ${{ matrix.agent-name }} + name: + ${{ matrix.agent-name }} (Cache: ${{ matrix.cache-enabled }}) path: agent/${{ matrix.agent-name }}/agbenchmark diff --git a/agbenchmark/challenges/code/d1/data.json b/agbenchmark/challenges/code/d1/data.json index 061c924f52d..bc1a15b425b 100644 --- a/agbenchmark/challenges/code/d1/data.json +++ b/agbenchmark/challenges/code/d1/data.json @@ -1,6 +1,6 @@ { "name": "TestDebugSimpleTypoWithGuidance", - "category": ["code"], + "category": ["code", "iterate"], "task": "1- Run test.py.\n2- Read code.py.\n3- Modify code.py.\nRepeat step 1, 2 and 3 until test.py runs without errors.\n", "dependencies": ["TestReadFile", "TestWriteFile"], "ground": { diff --git a/agbenchmark/challenges/code/d2/data.json b/agbenchmark/challenges/code/d2/data.json index 6523ef1d843..fca86f29bb8 100644 --- a/agbenchmark/challenges/code/d2/data.json +++ b/agbenchmark/challenges/code/d2/data.json @@ -1,6 +1,6 @@ { "name": "TestDebugSimpleTypoWithoutGuidance", - "category": ["code"], + "category": ["code", "iterate"], "task": "Make test.py run without errors.", "dependencies": ["TestDebugSimpleTypoWithGuidance"], "ground": { diff --git a/agbenchmark/challenges/code/d4/data.json b/agbenchmark/challenges/code/d4/data.json index 51f6f27021d..b2320a4e58c 100644 --- a/agbenchmark/challenges/code/d4/data.json +++ b/agbenchmark/challenges/code/d4/data.json @@ -1,6 +1,6 @@ { "name": "TestBasicCodeGeneration", - "category": ["code", "iterate"], + "category": ["code"], "task": "Create a two_sum function in a file called code.py. Given an array of integers, return indices of the two numbers such that they add up to a specific target. You may assume that each input would have exactly one solution, and you may not use the same element twice. Example: Given nums = [2, 7, 11, 15], target = 9, Because nums[0] + nums[1] = 2 + 7 = 9, return [0, 1].", "dependencies": ["TestWriteFile"], "ground": { diff --git a/agbenchmark/challenges/code/d5/artifacts_out/__init__.py b/agbenchmark/challenges/code/d5/artifacts_out/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/agbenchmark/challenges/code/d5/artifacts_out/code.py b/agbenchmark/challenges/code/d5/artifacts_out/code.py new file mode 100644 index 00000000000..6056691dafa --- /dev/null +++ b/agbenchmark/challenges/code/d5/artifacts_out/code.py @@ -0,0 +1,23 @@ +# mypy: ignore-errors +from typing import List, Optional + + +def three_sum(nums: List[int], target: int) -> Optional[List[int]]: + nums_indices = [(num, index) for index, num in enumerate(nums)] + nums_indices.sort() + for i in range(len(nums_indices) - 2): + if i > 0 and nums_indices[i] == nums_indices[i - 1]: + continue + l, r = i + 1, len(nums_indices) - 1 + while l < r: + three_sum = nums_indices[i][0] + nums_indices[l][0] + nums_indices[r][0] + if three_sum < target: + l += 1 + elif three_sum > target: + r -= 1 + else: + indices = sorted( + [nums_indices[i][1], nums_indices[l][1], nums_indices[r][1]] + ) + return indices + return None diff --git a/agbenchmark/challenges/code/d5/custom_python/test.py b/agbenchmark/challenges/code/d5/custom_python/test.py new file mode 100644 index 00000000000..761b9f5c656 --- /dev/null +++ b/agbenchmark/challenges/code/d5/custom_python/test.py @@ -0,0 +1,31 @@ +# mypy: ignore-errors +from code import three_sum +from typing import List + + +def test_three_sum(nums: List[int], target: int, expected_result: List[int]) -> None: + result = three_sum(nums, target) + print(result) + assert ( + result == expected_result + ), f"AssertionError: Expected the output to be {expected_result}" + + +if __name__ == "__main__": + # test the trivial case with the first three numbers + nums = [2, 7, 11, 15] + target = 20 + expected_result = [0, 1, 2] + test_three_sum(nums, target, expected_result) + + # test for ability to use zero and the same number twice + nums = [2, 7, 0, 15, 12, 0] + target = 2 + expected_result = [0, 2, 5] + test_three_sum(nums, target, expected_result) + + # test for first and last index usage and negative numbers + nums = [-6, 7, 11, 4] + target = 9 + expected_result = [0, 2, 3] + test_three_sum(nums, target, expected_result) diff --git a/agbenchmark/challenges/code/d5/data.json b/agbenchmark/challenges/code/d5/data.json new file mode 100644 index 00000000000..4b44c694339 --- /dev/null +++ b/agbenchmark/challenges/code/d5/data.json @@ -0,0 +1,18 @@ +{ + "name": "TestThreeSum", + "category": ["code", "iterate"], + "task": "Create a three_sum function in a file called code.py. Given an array of integers, return indices of the three numbers such that they add up to a specific target. You may assume that each input would have exactly one solution, and you may not use the same element twice. Example: Given nums = [2, 7, 11, 15], target = 20, Because nums[0] + nums[1] + nums[2] = 2 + 7 + 11 = 20, return [0, 1, 2].", + "dependencies": ["TestWriteFile", "TestBasicCodeGeneration"], + "ground": { + "answer": "The three_sum function coded properly.", + "should_contain": ["[0, 1, 2]", "[0, 2, 5]", "[0, 2, 3]"], + "should_not_contain": [], + "files": ["test.py"], + "type": "execute_python_code" + }, + "info": { + "difficulty": "intermediate", + "description": "Tests ability for the agent to create the three_sum function.", + "side_effects": [] + } +} diff --git a/agent/gpt-engineer b/agent/gpt-engineer index bca191cd76c..f0c76918dff 160000 --- a/agent/gpt-engineer +++ b/agent/gpt-engineer @@ -1 +1 @@ -Subproject commit bca191cd76cdea0335da91d004c64d9bb8520fea +Subproject commit f0c76918dff7a6cf5e0611a09b060fc5d4913b82