Skip to content
This repository has been archived by the owner on Jun 9, 2024. It is now read-only.

Add three sum challenge #108

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ jobs:
if: success() || failure()

tests:
name: ${{ matrix.agent-name }}
name: "${{ matrix.agent-name }} (Cache: ${{ matrix.cache-enabled }})"
runs-on: ubuntu-latest
timeout-minutes: 10
env:
Expand All @@ -77,6 +77,8 @@ jobs:
- "smol-developer"
- "Auto-GPT"
- "mini-agi"
cache-enabled: [ true, false ]

steps:
- name: Checkout repository
uses: actions/checkout@v3
Expand Down Expand Up @@ -156,13 +158,14 @@ jobs:
PROMPT_USER: false # For mini-agi. TODO: Remove this once mini-agi follows the standards.
HELICONE_API_KEY: ${{ secrets.HELICONE_API_KEY }}
REQUESTS_CA_BUNDLE: /etc/ssl/certs/ca-certificates.crt
HELICONE_CACHE_ENABLED: true
HELICONE_CACHE_ENABLED: ${{ matrix.cache-enabled }}
HELICONE_PROPERTY_AGENT: ${{ matrix.agent-name }}


- name: Upload reports
if: always()
uses: actions/upload-artifact@v3
with:
name: ${{ matrix.agent-name }}
name:
${{ matrix.agent-name }} (Cache: ${{ matrix.cache-enabled }})
path: agent/${{ matrix.agent-name }}/agbenchmark
2 changes: 1 addition & 1 deletion agbenchmark/challenges/code/d1/data.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "TestDebugSimpleTypoWithGuidance",
"category": ["code"],
"category": ["code", "iterate"],
"task": "1- Run test.py.\n2- Read code.py.\n3- Modify code.py.\nRepeat step 1, 2 and 3 until test.py runs without errors.\n",
"dependencies": ["TestReadFile", "TestWriteFile"],
"ground": {
Expand Down
2 changes: 1 addition & 1 deletion agbenchmark/challenges/code/d2/data.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "TestDebugSimpleTypoWithoutGuidance",
"category": ["code"],
"category": ["code", "iterate"],
"task": "Make test.py run without errors.",
"dependencies": ["TestDebugSimpleTypoWithGuidance"],
"ground": {
Expand Down
2 changes: 1 addition & 1 deletion agbenchmark/challenges/code/d4/data.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "TestBasicCodeGeneration",
"category": ["code", "iterate"],
"category": ["code"],
"task": "Create a two_sum function in a file called code.py. Given an array of integers, return indices of the two numbers such that they add up to a specific target. You may assume that each input would have exactly one solution, and you may not use the same element twice. Example: Given nums = [2, 7, 11, 15], target = 9, Because nums[0] + nums[1] = 2 + 7 = 9, return [0, 1].",
"dependencies": ["TestWriteFile"],
"ground": {
Expand Down
Empty file.
23 changes: 23 additions & 0 deletions agbenchmark/challenges/code/d5/artifacts_out/code.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# mypy: ignore-errors
from typing import List, Optional


def three_sum(nums: List[int], target: int) -> Optional[List[int]]:
nums_indices = [(num, index) for index, num in enumerate(nums)]
nums_indices.sort()
for i in range(len(nums_indices) - 2):
if i > 0 and nums_indices[i] == nums_indices[i - 1]:
continue
l, r = i + 1, len(nums_indices) - 1
while l < r:
three_sum = nums_indices[i][0] + nums_indices[l][0] + nums_indices[r][0]
if three_sum < target:
l += 1
elif three_sum > target:
r -= 1
else:
indices = sorted(
[nums_indices[i][1], nums_indices[l][1], nums_indices[r][1]]
)
return indices
return None
31 changes: 31 additions & 0 deletions agbenchmark/challenges/code/d5/custom_python/test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# mypy: ignore-errors
from code import three_sum
from typing import List


def test_three_sum(nums: List[int], target: int, expected_result: List[int]) -> None:
result = three_sum(nums, target)
print(result)
assert (
result == expected_result
), f"AssertionError: Expected the output to be {expected_result}"


if __name__ == "__main__":
# test the trivial case with the first three numbers
nums = [2, 7, 11, 15]
target = 20
expected_result = [0, 1, 2]
test_three_sum(nums, target, expected_result)

# test for ability to use zero and the same number twice
nums = [2, 7, 0, 15, 12, 0]
target = 2
expected_result = [0, 2, 5]
test_three_sum(nums, target, expected_result)

# test for first and last index usage and negative numbers
nums = [-6, 7, 11, 4]
target = 9
expected_result = [0, 2, 3]
test_three_sum(nums, target, expected_result)
18 changes: 18 additions & 0 deletions agbenchmark/challenges/code/d5/data.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
{
"name": "TestThreeSum",
"category": ["code", "iterate"],
"task": "Create a three_sum function in a file called code.py. Given an array of integers, return indices of the three numbers such that they add up to a specific target. You may assume that each input would have exactly one solution, and you may not use the same element twice. Example: Given nums = [2, 7, 11, 15], target = 20, Because nums[0] + nums[1] + nums[2] = 2 + 7 + 11 = 20, return [0, 1, 2].",
"dependencies": ["TestWriteFile", "TestBasicCodeGeneration"],
"ground": {
"answer": "The three_sum function coded properly.",
"should_contain": ["[0, 1, 2]", "[0, 2, 5]", "[0, 2, 3]"],
"should_not_contain": [],
"files": ["test.py"],
"type": "execute_python_code"
},
"info": {
"difficulty": "intermediate",
"description": "Tests ability for the agent to create the three_sum function.",
"side_effects": []
}
}
2 changes: 1 addition & 1 deletion agent/gpt-engineer