From 1c079cc90be547790575f4d8e92e39cddaa1fe3c Mon Sep 17 00:00:00 2001 From: Merwane Hamadi Date: Sun, 9 Jul 2023 15:42:52 -0700 Subject: [PATCH 1/2] WIP --- .github/workflows/autogpt.yml | 64 -------------------- .github/workflows/ci.yml | 76 +++++++++++++++++++++++- .github/workflows/gpt-engineer.yml | 88 ---------------------------- .github/workflows/mini-agi.yml | 66 --------------------- .github/workflows/smol-developer.yml | 64 -------------------- .github/workflows/superagi.yml | 62 -------------------- 6 files changed, 74 insertions(+), 346 deletions(-) delete mode 100644 .github/workflows/autogpt.yml delete mode 100644 .github/workflows/gpt-engineer.yml delete mode 100644 .github/workflows/mini-agi.yml delete mode 100644 .github/workflows/smol-developer.yml delete mode 100644 .github/workflows/superagi.yml diff --git a/.github/workflows/autogpt.yml b/.github/workflows/autogpt.yml deleted file mode 100644 index 2d7e2dfbd8d..00000000000 --- a/.github/workflows/autogpt.yml +++ /dev/null @@ -1,64 +0,0 @@ -name: Auto-GPT Regression Test - -on: - workflow_dispatch: - branches: [master] - push: - branches: [stable, master, ci-test*] - -jobs: - regression-tests: - permissions: - pull-requests: write - contents: write - runs-on: ubuntu-latest - timeout-minutes: 30 - strategy: - matrix: - python-version: ['3.10'] - - steps: - - name: Checkout repository - uses: actions/checkout@v3 - with: - fetch-depth: 0 - ref: ${{ github.event.pull_request.head.ref }} - repository: ${{ github.event.pull_request.head.repo.full_name }} - submodules: true - - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 - with: - python-version: ${{ matrix.python-version }} - - - id: get_date - name: Get date - run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_OUTPUT - - - name: Install Poetry - run: | - curl -sSL https://install.python-poetry.org | python - - - - name: Set up Poetry cache - uses: actions/cache@v2 - with: - path: | - ~/.cache/pypoetry - .venv - key: ${{ runner.os }}-poetry-${{ hashFiles('**/pyproject.toml') }}-${{ hashFiles('**/poetry.lock') }}-${{ steps.get_date.outputs.date }} - - - name: Set up venv and install Python dependencies - run: | - poetry install --only main - poetry build - - - name: Run regression tests - run: | - python -m venv venv - source venv/bin/activate - cd agent/Auto-GPT - pip install -r requirements.txt - pip install ../../dist/agbenchmark-0.1.0-py3-none-any.whl - agbenchmark start --maintain - env: - OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 6a0f4503ae3..d989389dbca 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,6 +1,10 @@ -name: Python CI +name: CI on: + workflow_dispatch: + branches: [ master ] + schedule: + - cron: '0 8 * * *' push: branches: [ master, ci-test* ] pull_request: @@ -20,6 +24,7 @@ jobs: fetch-depth: 0 ref: ${{ github.event.pull_request.head.ref }} repository: ${{ github.event.pull_request.head.repo.full_name }} + submodules: true - name: Set up Python ${{ env.min-python-version }} uses: actions/setup-python@v2 @@ -68,10 +73,18 @@ jobs: if: success() || failure() tests: - + name: ${{ matrix.agent-name }} runs-on: ubuntu-latest env: min-python-version: "3.10" + strategy: + fail-fast: false + matrix: + agent-name: + - "gpt-engineer" + - "Auto-GPT" + - "mini-agi" + - "smol-developer" steps: - name: Checkout repository @@ -80,6 +93,7 @@ jobs: fetch-depth: 0 ref: ${{ github.event.pull_request.head.ref }} repository: ${{ github.event.pull_request.head.repo.full_name }} + submodules: true - name: Set up Python ${{ env.min-python-version }} uses: actions/setup-python@v2 @@ -107,3 +121,61 @@ jobs: poetry install poetry run agbenchmark start --mock poetry run agbenchmark start --mock --maintain + poetry build + + - name: Run regression tests + run: | + cd agent/$AGENT_NAME + if [ "$AGENT_NAME" == "gpt-engineer" ]; then + make install + source venv/bin/activate + elif [ "$AGENT_NAME" == "Auto-GPT" ]; then + python -m venv venv + source venv/bin/activate + pip install -r requirements.txt + elif [ "$AGENT_NAME" == "mini-agi" ]; then + python -m venv venv + source venv/bin/activate + pip install -r requirements.txt + cp .env_example .env + elif [ "$AGENT_NAME" == "smol-developer" ]; then + python -m venv venv + source venv/bin/activate + pip install -r requirements.txt + elif [ "$AGENT_NAME" == "SuperAGI" ]; then + cp config_template.yaml config.yaml + sed -i 's/OPENAI_API_KEY:.*/OPENAI_API_KEY: "'"${{ secrets.OPENAI_API_KEY }}"'"/' config.yaml + docker-compose up -d --build + else + echo "Unknown agent name: $AGENT_NAME" + exit 1 + fi + + pip install ../../dist/*.whl + + if [ "${GITHUB_EVENT_NAME}" == "schedule" ] || [ "${GITHUB_EVENT_NAME}" == "workflow_dispatch" ]; then + agbenchmark start --maintain + else + exit 0 + agbenchmark start --maintain --mock + agbenchmark start --improve --mock + agbenchmark start --mock + agbenchmark start --mock --category=retrieval + agbenchmark start --mock --category=regression + agbenchmark start --mock --category=interface + agbenchmark start --mock --category=code + agbenchmark start --mock --category=memory + agbenchmark start --mock --category=memory --category=code + fi + env: + GITHUB_EVENT_NAME: ${{ github.event_name }} + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + AGENT_NAME: ${{ matrix.agent-name }} + PROMPT_USER: false # For mini-agi. TODO: Remove this once mini-agi follows the standards. + + - name: Upload logs as artifact + if: always() + uses: actions/upload-artifact@v3 + with: + name: gpt-engineer-projects + path: agent/gpt-engineer/projects diff --git a/.github/workflows/gpt-engineer.yml b/.github/workflows/gpt-engineer.yml deleted file mode 100644 index 833026e8e1e..00000000000 --- a/.github/workflows/gpt-engineer.yml +++ /dev/null @@ -1,88 +0,0 @@ -name: gpt-engineer Regression Test - -on: - workflow_dispatch: - branches: [master] - push: - branches: [stable, master, ci-test*] - pull_request: - branches: [stable, master, ci-test*] - -jobs: - regression-tests: - permissions: - pull-requests: write - contents: write - runs-on: ubuntu-latest - timeout-minutes: 30 - strategy: - matrix: - python-version: ['3.10'] - - steps: - - name: Checkout repository - uses: actions/checkout@v3 - with: - fetch-depth: 0 - ref: ${{ github.event.pull_request.head.ref }} - repository: ${{ github.event.pull_request.head.repo.full_name }} - submodules: true - - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 - with: - python-version: ${{ matrix.python-version }} - - - id: get_date - name: Get date - run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_OUTPUT - - - name: Install Poetry - run: | - curl -sSL https://install.python-poetry.org | python - - - - name: Set up Poetry cache - uses: actions/cache@v2 - with: - path: | - ~/.cache/pypoetry - .venv - key: ${{ runner.os }}-poetry-${{ hashFiles('**/pyproject.toml') }}-${{ hashFiles('**/poetry.lock') }}-${{ steps.get_date.outputs.date }} - - - name: Set up venv and install Python dependencies - run: | - poetry install --only main - poetry build - - - name: Run regression tests (push) - if: ${{ github.event_name != 'pull_request' }} - run: | - cd agent/gpt-engineer - make install - source venv/bin/activate - pip install ../../dist/*.whl - - if [ "${GITHUB_EVENT_NAME}" != "pull_request" ]; then - agbenchmark start --maintain - else - agbenchmark start --maintain --mock - agbenchmark start --improve --mock - agbenchmark start --mock - agbenchmark start --mock --category=retrieval - agbenchmark start --mock --category=regression - agbenchmark start --mock --category=interface - agbenchmark start --mock --category=code - agbenchmark start --mock --category=memory - agbenchmark start --mock --category=memory --category=code - fi - - env: - GITHUB_EVENT_NAME: ${{ github.event_name }} - OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} - - - name: Upload logs as artifact - if: always() - uses: actions/upload-artifact@v3 - with: - name: gpt-engineer-projects - path: agent/gpt-engineer/projects diff --git a/.github/workflows/mini-agi.yml b/.github/workflows/mini-agi.yml deleted file mode 100644 index 53c479df41a..00000000000 --- a/.github/workflows/mini-agi.yml +++ /dev/null @@ -1,66 +0,0 @@ -name: mini-agi Regression Test - -on: - workflow_dispatch: - branches: [master] - push: - branches: [stable, master, ci-test*] - -jobs: - regression-tests: - permissions: - pull-requests: write - contents: write - runs-on: ubuntu-latest - timeout-minutes: 30 - strategy: - matrix: - python-version: ['3.10'] - - steps: - - name: Checkout repository - uses: actions/checkout@v3 - with: - fetch-depth: 0 - ref: ${{ github.event.pull_request.head.ref }} - repository: ${{ github.event.pull_request.head.repo.full_name }} - submodules: true - - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 - with: - python-version: ${{ matrix.python-version }} - - - id: get_date - name: Get date - run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_OUTPUT - - - name: Install Poetry - run: | - curl -sSL https://install.python-poetry.org | python - - - - name: Set up Poetry cache - uses: actions/cache@v2 - with: - path: | - ~/.cache/pypoetry - .venv - key: ${{ runner.os }}-poetry-${{ hashFiles('**/pyproject.toml') }}-${{ hashFiles('**/poetry.lock') }}-${{ steps.get_date.outputs.date }} - - - name: Set up venv and install Python dependencies - run: | - poetry install --only main - poetry build - - - name: Run regression tests - run: | - cd agent/mini-agi - python -m venv venv - source venv/bin/activate - pip install -r requirements.txt - cp .env_example .env - pip install ../../dist/agbenchmark-0.1.0-py3-none-any.whl - agbenchmark start --maintain - env: - OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} - PROMPT_USER: false diff --git a/.github/workflows/smol-developer.yml b/.github/workflows/smol-developer.yml deleted file mode 100644 index 6926df54bc0..00000000000 --- a/.github/workflows/smol-developer.yml +++ /dev/null @@ -1,64 +0,0 @@ -name: smol developer Regression Test - -on: - workflow_dispatch: - branches: [master] - push: - branches: [stable, master, ci-test*] - -jobs: - regression-tests: - permissions: - pull-requests: write - contents: write - runs-on: ubuntu-latest - timeout-minutes: 30 - strategy: - matrix: - python-version: ['3.10'] - - steps: - - name: Checkout repository - uses: actions/checkout@v3 - with: - fetch-depth: 0 - ref: ${{ github.event.pull_request.head.ref }} - repository: ${{ github.event.pull_request.head.repo.full_name }} - submodules: true - - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 - with: - python-version: ${{ matrix.python-version }} - - - id: get_date - name: Get date - run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_OUTPUT - - - name: Install Poetry - run: | - curl -sSL https://install.python-poetry.org | python - - - - name: Set up Poetry cache - uses: actions/cache@v2 - with: - path: | - ~/.cache/pypoetry - .venv - key: ${{ runner.os }}-poetry-${{ hashFiles('**/pyproject.toml') }}-${{ hashFiles('**/poetry.lock') }}-${{ steps.get_date.outputs.date }} - - - name: Set up venv and install Python dependencies - run: | - poetry install --only main - poetry build - - - name: Run regression tests - run: | - cd agent/smol-developer - python -m venv venv - source venv/bin/activate - pip install -r requirements.txt - pip install ../../dist/agbenchmark-0.1.0-py3-none-any.whl - agbenchmark start --maintain - env: - OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} diff --git a/.github/workflows/superagi.yml b/.github/workflows/superagi.yml deleted file mode 100644 index 5ab52d33f3e..00000000000 --- a/.github/workflows/superagi.yml +++ /dev/null @@ -1,62 +0,0 @@ -name: SuperAgi Regression Test - -on: - workflow_dispatch: - branches: [master] - push: - branches: [stable, master, ci-test*] - -jobs: - regression-tests: - permissions: - pull-requests: write - contents: write - runs-on: ubuntu-latest - timeout-minutes: 30 - strategy: - matrix: - python-version: ['3.10'] - - steps: - - name: Checkout repository - uses: actions/checkout@v3 - with: - fetch-depth: 0 - ref: ${{ github.event.pull_request.head.ref }} - repository: ${{ github.event.pull_request.head.repo.full_name }} - submodules: true - - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 - with: - python-version: ${{ matrix.python-version }} - - - id: get_date - name: Get date - run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_OUTPUT - - - name: Install Poetry - run: | - curl -sSL https://install.python-poetry.org | python - - - - name: Set up Poetry cache - uses: actions/cache@v2 - with: - path: | - ~/.cache/pypoetry - .venv - key: ${{ runner.os }}-poetry-${{ hashFiles('**/pyproject.toml') }}-${{ hashFiles('**/poetry.lock') }}-${{ steps.get_date.outputs.date }} - - - name: Set up venv and install Python dependencies - run: | - poetry install --only main - poetry build - - - name: Run regression tests - run: | - cd agent/SuperAGI - cp config_template.yaml config.yaml - sed -i 's/OPENAI_API_KEY:.*/OPENAI_API_KEY: "'"${{ secrets.OPENAI_API_KEY }}"'"/' config.yaml - docker-compose up -d --build - pip install ../../dist/agbenchmark-0.1.0-py3-none-any.whl - agbenchmark start --maintain From f5894b478b7d1a5ab14ec59d28ba230d6b72f869 Mon Sep 17 00:00:00 2001 From: Merwane Hamadi Date: Sun, 9 Jul 2023 19:30:58 -0700 Subject: [PATCH 2/2] Add Helicone Signed-off-by: Merwane Hamadi --- .github/workflows/ci.yml | 1 + agbenchmark/challenge.py | 3 ++- agbenchmark/challenges/test_all.py | 3 ++- agent/Auto-GPT | 2 +- 4 files changed, 6 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d989389dbca..cac1dedb1de 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -172,6 +172,7 @@ jobs: OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} AGENT_NAME: ${{ matrix.agent-name }} PROMPT_USER: false # For mini-agi. TODO: Remove this once mini-agi follows the standards. + HELICONE_API_KEY: ${{ secrets.HELICONE_API_KEY }} - name: Upload logs as artifact if: always() diff --git a/agbenchmark/challenge.py b/agbenchmark/challenge.py index cf7ce104c57..aeebd7ad897 100644 --- a/agbenchmark/challenge.py +++ b/agbenchmark/challenge.py @@ -7,6 +7,7 @@ from dotenv import load_dotenv from agbenchmark.challenges.define_task_types import ChallengeData, Ground +from agbenchmark.start_benchmark import CURRENT_DIRECTORY load_dotenv() @@ -23,7 +24,7 @@ class Challenge(ABC): @property def data(self) -> ChallengeData: - file_path = f"{self.CHALLENGE_LOCATION}/data.json" + file_path = f"{CURRENT_DIRECTORY}/../{self.CHALLENGE_LOCATION}/data.json" if file_path not in Challenge._data_cache: Challenge._data_cache[file_path] = ChallengeData.deserialize(file_path) return Challenge._data_cache[file_path] diff --git a/agbenchmark/challenges/test_all.py b/agbenchmark/challenges/test_all.py index 4f9e5b7f828..e7fe99e738e 100644 --- a/agbenchmark/challenges/test_all.py +++ b/agbenchmark/challenges/test_all.py @@ -10,13 +10,14 @@ from dotenv import load_dotenv from agbenchmark.challenge import Challenge +from agbenchmark.start_benchmark import CURRENT_DIRECTORY load_dotenv() IMPROVE = os.getenv("IMPROVE", "False") -json_files = glob.glob("agbenchmark/challenges/**/data.json", recursive=True) +json_files = glob.glob(f"{CURRENT_DIRECTORY}/challenges/**/data.json", recursive=True) def get_test_path(json_file: str) -> str: diff --git a/agent/Auto-GPT b/agent/Auto-GPT index cec424ad250..f360d503b11 160000 --- a/agent/Auto-GPT +++ b/agent/Auto-GPT @@ -1 +1 @@ -Subproject commit cec424ad2504020a830c3af9f74536a420545931 +Subproject commit f360d503b113119f6b3ce0acff1dbb4dfae2223a