Skip to content

Commit

Permalink
Update on "Fix CI after torchmetrics update"
Browse files Browse the repository at this point in the history
It now takes an argument: https://torchmetrics.readthedocs.io/en/stable/classification/accuracy.html

Change in pytorch lightning:
Lightning-AI/torchmetrics@20eab43

Somehow this is failing with a SEGFAULT on my A100 (in a triton kernel):
```
#0  0x00007fffc0f62e10 in ?? () from /lib/x86_64-linux-gnu/libcuda.so
#1  0x00007fffc0f9303c in ?? () from /lib/x86_64-linux-gnu/libcuda.so
#2  0x00007fffc0f2ea13 in ?? () from /lib/x86_64-linux-gnu/libcuda.so
#3  0x00007fffc0f94603 in ?? () from /lib/x86_64-linux-gnu/libcuda.so
#4  0x00007fffc119e4a0 in ?? () from /lib/x86_64-linux-gnu/libcuda.so
#5  0x00007fffc0f3728f in ?? () from /lib/x86_64-linux-gnu/libcuda.so
#6  0x00007fffc0f3999f in ?? () from /lib/x86_64-linux-gnu/libcuda.so
#7  0x00007fffc0fdb1c2 in ?? () from /lib/x86_64-linux-gnu/libcuda.so
#8  0x00007fff502234c0 in _launch ()
   from /data/home/XXXXX/.triton/cache/704a3e6949e60326bc68d18a620bee50/layer_norm_fw.so
#9  0x00007fff3c0eea25 in launch ()
   from /data/home/XXXXX/.triton/cache/2cebb5590a024a2e06fe9de08c6b7079/k_dropout_bw.so
#10 0x0000555555698422 in cfunction_call (func=0x7fff3c6e5760, args=<optimized out>, kwargs=<optimized out>)
    at /usr/local/src/conda/python-3.10.6/Objects/methodobject.c:552
```

[ghstack-poisoned]
  • Loading branch information
danthe3rd committed Dec 8, 2022
1 parent d119652 commit 29ebaa4
Showing 1 changed file with 6 additions and 6 deletions.
12 changes: 6 additions & 6 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -303,7 +303,7 @@ commands:
# Cache the venv directory that contains dependencies
- restore_cache:
keys:
- cache-key-gpu-arch<<parameters.arch>>-{{ checksum "requirements-test.txt" }}-{{ checksum ".circleci/config.yml" }}
- cache-key-gpu-arch<<parameters.arch>>-{{ checksum "requirements-test.txt" }}-{{ checksum "requirements-benchmark.txt" }}-{{ checksum ".circleci/config.yml" }}

- <<: *setup_conda
- <<: *install_dep
Expand All @@ -317,7 +317,7 @@ commands:
- ~/miniconda
- ~/venv

key: cache-key-gpu-arch<<parameters.arch>>-{{ checksum "requirements-test.txt"}}-{{ checksum ".circleci/config.yml"}}
key: cache-key-gpu-arch<<parameters.arch>>-{{ checksum "requirements-test.txt"}}-{{ checksum "requirements-benchmark.txt" }}-{{ checksum ".circleci/config.yml"}}

- <<: *install_repo
- <<: *run_coverage
Expand All @@ -344,7 +344,7 @@ jobs:
# Cache the venv directory that contains dependencies
- restore_cache:
keys:
- cache-key-cpu-py38-{{ checksum "requirements-test.txt" }}-{{ checksum ".circleci/config.yml" }}
- cache-key-cpu-py38-{{ checksum "requirements-test.txt" }}-{{ checksum "requirements-benchmark.txt" }}-{{ checksum ".circleci/config.yml" }}

- <<: *setup_conda

Expand All @@ -359,7 +359,7 @@ jobs:
- ~/miniconda
- ~/venv

key: cache-key-cpu-py38-{{ checksum "requirements-test.txt" }}-{{ checksum ".circleci/config.yml" }}
key: cache-key-cpu-py38-{{ checksum "requirements-test.txt" }}-{{ checksum "requirements-benchmark.txt" }}-{{ checksum ".circleci/config.yml" }}

- <<: *install_repo

Expand Down Expand Up @@ -460,7 +460,7 @@ jobs:
# Cache the venv directory that contains dependencies
- restore_cache:
keys:
- cache-key-gpu-exp-114-{{ checksum "experimental/requirements.txt" }}-{{ checksum ".circleci/config.yml" }}
- cache-key-gpu-exp-114-{{ checksum "experimental/requirements.txt" }}-{{ checksum "requirements-benchmark.txt" }}-{{ checksum ".circleci/config.yml" }}

- <<: *setup_conda
- <<: *install_dep_exp
Expand All @@ -475,7 +475,7 @@ jobs:
- ~/miniconda
- ~/venv

key: cache-key-gpu-exp-114-{{ checksum "experimental/requirements.txt" }}-{{ checksum ".circleci/config.yml" }}
key: cache-key-gpu-exp-114-{{ checksum "experimental/requirements.txt" }}-{{ checksum "requirements-benchmark.txt" }}-{{ checksum ".circleci/config.yml" }}

- <<: *install_experimental_repo
- <<: *run_experimental_unittests
Expand Down

0 comments on commit 29ebaa4

Please sign in to comment.