Skip to content

Commit

Permalink
upgraded github actions (#32)
Browse files Browse the repository at this point in the history
* upgraded github actions

* fix ruff errors

* fix ruff errors
  • Loading branch information
seanmacavaney authored Dec 5, 2024
1 parent e1b46f4 commit 18a8f85
Show file tree
Hide file tree
Showing 7 changed files with 72 additions and 24 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/build.yml → .github/workflows/deploy.yml
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
name: Upload Python Package
name: deploy

on:
release:
types: [created]

jobs:
deploy-bdist:
pypi:
runs-on: ubuntu-20.04
steps:
- uses: actions/checkout@v2
Expand Down
35 changes: 35 additions & 0 deletions .github/workflows/style.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
name: style

on:
push: {branches: [main]} # pushes to main
pull_request: {} # all PRs

jobs:
ruff:
strategy:
matrix:
python-version: ['3.10']
os: ['ubuntu-latest']

runs-on: ${{ matrix.os }}
steps:
- name: Checkout
uses: actions/checkout@v4

- name: Install Python
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}

- name: Cache Dependencies
uses: actions/cache@v4
with:
path: ${{ env.pythonLocation }}
key: ${{ matrix.os }}-${{ matrix.python-version }}-${{ hashFiles('requirements.txt', 'requirements-dev.txt') }}

- name: Install Dependencies
run: |
pip install --upgrade -r requirements-dev.txt -r requirements.txt
- name: Ruff
run: 'ruff check --output-format=github src/pyterrier_pisa'
11 changes: 7 additions & 4 deletions .github/workflows/ci.yml → .github/workflows/test.yml
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
name: Test Python package
name: test

on: [push, pull_request]
on:
push: {branches: [main]} # pushes to main
pull_request: {} # all PRs
schedule: [cron: '0 12 * * 3'] # every Wednesday at noon

jobs:
build:
pytest:
runs-on: ${{ matrix.os }}
strategy:
matrix:
Expand Down Expand Up @@ -45,7 +48,7 @@ jobs:
python setup.py bdist_wheel
python patcher.py dist/
pip install dist/*.whl
pip install -r requirements-test.txt
pip install -r requirements-dev.txt
- uses: actions/upload-artifact@v4
with:
Expand Down
6 changes: 6 additions & 0 deletions requirements-dev.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
nltk
pytest
pytest-subtests
pytest-cov
pytest-json-report
ruff
1 change: 0 additions & 1 deletion requirements-test.txt

This file was deleted.

21 changes: 13 additions & 8 deletions src/pyterrier_pisa/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from typing import List
import numpy as np
import json
import sys
from pathlib import Path
import tempfile
import os
Expand Down Expand Up @@ -121,9 +120,12 @@ def __init__(self,
overwrite: If True, the index will be overwritten if it already exists. Defaults to False.
"""
super().__init__(path)
if stemmer is not None: stemmer = PisaStemmer(stemmer)
if index_encoding is not None: index_encoding = PisaIndexEncoding(index_encoding)
if stops is not None and not isinstance(stops, list): stops = PisaStopwords(stops)
if stemmer is not None:
stemmer = PisaStemmer(stemmer)
if index_encoding is not None:
index_encoding = PisaIndexEncoding(index_encoding)
if stops is not None and not isinstance(stops, list):
stops = PisaStopwords(stops)
if (_old_metadata := (self.path/'pt_pisa_config.json').exists()) or (self.path/'pt_meta.json').exists():
if _old_metadata:
with (self.path/'pt_pisa_config.json').open('rt') as fin:
Expand All @@ -135,9 +137,12 @@ def __init__(self,
stemmer = PisaStemmer(config['stemmer'])
if stemmer.value != config['stemmer']:
warn(f'requested stemmer={stemmer.value}, but index was constructed with {config["stemmer"]}')
if stemmer is None: stemmer = PISA_INDEX_DEFAULTS['stemmer']
if index_encoding is None: index_encoding = PISA_INDEX_DEFAULTS['index_encoding']
if stops is None: stops = PISA_INDEX_DEFAULTS['stops']
if stemmer is None:
stemmer = PISA_INDEX_DEFAULTS['stemmer']
if index_encoding is None:
index_encoding = PISA_INDEX_DEFAULTS['index_encoding']
if stops is None:
stops = PISA_INDEX_DEFAULTS['stops']
self.text_field = text_field
self.stemmer = stemmer
self.index_encoding = index_encoding
Expand Down Expand Up @@ -329,7 +334,7 @@ def get_corpus_iter(self, field='toks', verbose=True):
assert self.built()
assert (self.path/'fwd').exists(), "get_corpus_iter requires a fwd index"
m = np.memmap(self.path/'fwd', mode='r', dtype=np.uint32)
lexicon = [l.strip() for l in (self.path/'fwd.terms').open('rt')]
lexicon = [term.strip() for term in (self.path/'fwd.terms').open('rt')]
idx = 2
it = iter((self.path/'fwd.documents').open('rt'))
if verbose:
Expand Down
18 changes: 9 additions & 9 deletions src/pyterrier_pisa/indexers.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,26 +87,26 @@ def _index(self, it):
inv_score = defaultdict(list)
lens = []
for doc in batch:
l = 0
doclen = 0
f_docs.write(doc['docno']+'\n')
for term, score in doc[self.text_field].items():
score = int(score * self.scale)
if score <= 0:
continue
l += score
doclen += score
if term not in lexicon:
lexicon[term] = len(lexicon)
f_lex.write(term+'\n')
inv_did[lexicon[term]].append(docid)
inv_score[lexicon[term]].append(int(score))
lens.append(l)
lens.append(doclen)
docid += 1
with (path/f'inv.batch.{bidx}.docs').open('wb') as f_did, (path/f'inv.batch.{bidx}.freqs').open('wb') as f_score, (path/f'inv.batch.{bidx}.sizes').open('wb') as f_len:
f_did.write(np.array([1, len(batch)], dtype=np.uint32).tobytes())
for i in range(len(lexicon)):
l = len(inv_did[i])
f_did.write(np.array([l] + inv_did[i], dtype=np.uint32).tobytes())
f_score.write(np.array([l] + inv_score[i], dtype=np.uint32).tobytes())
doclen = len(inv_did[i])
f_did.write(np.array([doclen] + inv_did[i], dtype=np.uint32).tobytes())
f_score.write(np.array([doclen] + inv_score[i], dtype=np.uint32).tobytes())
f_len.write(np.array([len(lens)] + lens, dtype=np.uint32).tobytes())
_pisathon.merge_inv(str(path/'inv'), bidx+1, len(lexicon))
for i in range(bidx+1):
Expand All @@ -128,9 +128,9 @@ def _index(self, it):
for term in _logger.pbar(sorted(lexicon), desc='re-mapping term ids'):
f_lex.write(f'{term}\n')
i = lexicon[term]
start, l = offsets_lens[i]
f_docs.write(in_docs[start:start+l])
f_freqs.write(in_freqs[start:start+l])
start, doclen = offsets_lens[i]
f_docs.write(in_docs[start:start+doclen])
f_freqs.write(in_freqs[start:start+doclen])
del in_docs # close mmap
del in_freqs # close mmap
(path/'inv.docs.tmp').unlink()
Expand Down

0 comments on commit 18a8f85

Please sign in to comment.