Skip to content

Commit

Permalink
Always display test coverage; add tests (#240)
Browse files Browse the repository at this point in the history
* Set up a coverage config file

`pytest` now displays coverage stats and `pytest --cov-report html` produces a useful coverage report

* Always display the 10 slowest tests

* Expanded on data.py tests

* Wrote tests for exporting ONNX with Torch head

However, these must be skipped currently, as they all fail!

* Reformat test_onnx.py

For some reason, locally I get different warnings for ./make quality than the CI does.

* Rerun ./make style
  • Loading branch information
tomaarsen authored Dec 28, 2022
1 parent efef17e commit 35c0511
Show file tree
Hide file tree
Showing 4 changed files with 208 additions and 28 deletions.
36 changes: 36 additions & 0 deletions .coveragerc
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# Configuration file to control (pytest) coverage
[run]
# Run branch coverage, too
branch = True

[paths]
source =
src/setfit

[report]
# Regexes for lines to exclude from consideration
exclude_lines =
# Have to re-enable the standard pragma
pragma: no cover

# Don't complain about missing debug-only code:
def __repr__
if self\.debug

# Don't complain if tests don't hit defensive assertion code:
raise AssertionError
raise NotImplementedError

# Don't complain if non-runnable code isn't run:
if 0:
if __name__ == .__main__.:

# Don't complain about abstract methods, they aren't run:
@(abc\.)?abstractmethod

# Ignore TYPE_CHECKING code
if TYPE_CHECKING:

[html]
directory = coverage_report_html
title = SetFit coverage report
6 changes: 5 additions & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,8 @@ per-file-ignores =
exclude =
results
scripts/adapet
scripts/tfew
scripts/tfew

[tool:pytest]
testpaths = tests
addopts = --cov=setfit --durations=10
125 changes: 99 additions & 26 deletions tests/exporters/test_onnx.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,14 @@

import numpy as np
import onnxruntime
import pytest
from datasets import Dataset
from transformers import AutoTokenizer

from setfit import SetFitModel
from setfit.data import get_augmented_samples
from setfit.exporters.onnx import export_onnx
from setfit.trainer import SetFitTrainer


def test_export_onnx_sklearn_head():
Expand All @@ -15,34 +19,103 @@ def test_export_onnx_sklearn_head():

# Export the sklearn based model
output_path = "model.onnx"
export_onnx(model.model_body, model.model_head, opset=12, output_path=output_path)

# Check that the model was saved.
assert output_path in os.listdir(), "Model not saved to output_path"

# Run inference using the original model.
input_text = ["i loved the spiderman movie!", "pineapple on pizza is the worst 🤮"]
pytorch_preds = model(input_text)

# Run inference using the exported onnx model.
tokenizer = AutoTokenizer.from_pretrained(model_path)
inputs = tokenizer(
input_text,
padding=True,
truncation=True,
return_attention_mask=True,
return_token_type_ids=True,
return_tensors="np",
try:
export_onnx(model.model_body, model.model_head, opset=12, output_path=output_path)

# Check that the model was saved.
assert output_path in os.listdir(), "Model not saved to output_path"

# Run inference using the original model.
input_text = ["i loved the spiderman movie!", "pineapple on pizza is the worst 🤮"]
pytorch_preds = model(input_text)

# Run inference using the exported onnx model.
tokenizer = AutoTokenizer.from_pretrained(model_path)
inputs = tokenizer(
input_text,
padding=True,
truncation=True,
return_attention_mask=True,
return_token_type_ids=True,
return_tensors="np",
)
# Map inputs to int64 from int32
inputs = {key: value.astype("int64") for key, value in inputs.items()}

session = onnxruntime.InferenceSession(output_path)

onnx_preds = session.run(None, dict(inputs))[0]

# Compare the results and ensure that we get the same predictions.
assert np.array_equal(onnx_preds, pytorch_preds)

finally:
# Cleanup the model.
os.remove(output_path)


@pytest.mark.skip("ONNX exporting of SetFit model with Torch head not yet supported.")
@pytest.mark.parametrize("out_features", [1, 2, 3])
def test_export_onnx_torch_head(out_features):
"""Test that the exported `ONNX` model returns the same predictions as the original model."""
dataset = Dataset.from_dict(get_augmented_samples("SentEval-CR"))
model_path = "sentence-transformers/paraphrase-albert-small-v2"
model = SetFitModel.from_pretrained(
model_path, use_differentiable_head=True, head_params={"out_features": out_features}
)
# Map inputs to int64 from int32
inputs = {key: value.astype("int64") for key, value in inputs.items()}

session = onnxruntime.InferenceSession(output_path)
trainer = SetFitTrainer(
model=model,
train_dataset=dataset,
eval_dataset=dataset,
num_iterations=15,
column_mapping={"text": "text", "label": "label"},
)
# Train and evaluate
trainer.freeze() # Freeze the head
trainer.train() # Train only the body
# Unfreeze the head and unfreeze the body -> end-to-end training
trainer.unfreeze(keep_body_frozen=False)
trainer.train(
num_epochs=15,
batch_size=16,
body_learning_rate=1e-5,
learning_rate=1e-2,
l2_weight=0.0,
)

# Export the sklearn based model
output_path = "model.onnx"
try:
export_onnx(model.model_body, model.model_head, opset=12, output_path=output_path)

# Check that the model was saved.
assert output_path in os.listdir(), "Model not saved to output_path"

# Run inference using the original model.
input_text = ["i loved the spiderman movie!", "pineapple on pizza is the worst 🤮"]
pytorch_preds = model(input_text)

# Run inference using the exported onnx model.
tokenizer = AutoTokenizer.from_pretrained(model_path)
inputs = tokenizer(
input_text,
padding=True,
truncation=True,
return_attention_mask=True,
return_token_type_ids=True,
return_tensors="np",
)
# Map inputs to int64 from int32
inputs = {key: value.astype("int64") for key, value in inputs.items()}

session = onnxruntime.InferenceSession(output_path)

onnx_preds = session.run(None, dict(inputs))[0]
onnx_preds = session.run(None, dict(inputs))[0]

# Compare the results and ensure that we get the same predictions.
assert np.array_equal(onnx_preds, pytorch_preds)
# Compare the results and ensure that we get the same predictions.
assert np.array_equal(onnx_preds, pytorch_preds)

# Cleanup the model.
os.remove(output_path)
finally:
# Cleanup the model.
os.remove(output_path)
69 changes: 68 additions & 1 deletion tests/test_data.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,18 @@
import string

import numpy as np
import pandas as pd
import pytest
from datasets import Dataset
from datasets import Dataset, load_dataset

from setfit.data import (
SAMPLE_SIZES,
SEEDS,
add_templated_examples,
create_fewshot_splits,
create_fewshot_splits_multilabel,
create_samples,
get_augmented_samples,
sample_dataset,
)

Expand Down Expand Up @@ -102,9 +105,48 @@ def test_subset_is_smaller_than_sample_size(sample_size):

def test_expected_number_of_splits():
dataset = Dataset.from_pandas(pd.DataFrame({"label": [0] * 50 + [1] * 50}))
num_labels = 2
splits_ds = create_fewshot_splits(dataset, SAMPLE_SIZES)
assert len(splits_ds) == len(SAMPLE_SIZES) * len(SEEDS)

split: Dataset
for idx, split in enumerate(splits_ds.values()):
sample_size = SAMPLE_SIZES[idx // len(SEEDS)]
# The number of rows is limited by 100 due to the size of the original dataset
assert len(split) == min(sample_size * num_labels, len(dataset))


def test_create_fewshot_splits_with_augmentation():
dataset_name = "sst5"
dataset = load_dataset(f"SetFit/{dataset_name}", split="train")
num_labels = len(set(dataset["label"]))
splits_ds = create_fewshot_splits(dataset, SAMPLE_SIZES, add_data_augmentation=True, dataset_name=dataset_name)
assert len(splits_ds) == len(SAMPLE_SIZES) * len(SEEDS)

split: Dataset
for idx, split in enumerate(splits_ds.values()):
sample_size = SAMPLE_SIZES[idx // len(SEEDS)]
# Each split should have sample_size * num_labels * 2 rows:
# for each label we sample `sample_size`, and then we generate
# another `sample_size` samples through augmentation.
assert len(split) == sample_size * num_labels * 2


def test_create_fewshot_splits_multilabel():
num_samples = 50
dataset = Dataset.from_dict(
{
"text": string.ascii_letters[:50],
"label_one": np.random.randint(2, size=(num_samples,)),
"label_two": np.random.randint(2, size=(num_samples,)),
"label_three": np.random.randint(2, size=(num_samples,)),
}
)
splits_ds = create_fewshot_splits_multilabel(dataset, SAMPLE_SIZES)
assert len(splits_ds) == len(SAMPLE_SIZES) * len(SEEDS)
# We can't safely test the number of rows of each of the splits
# as duplicate samples are removed.


def test_sample_dataset_returns_expected_samples():
num_samples = 2
Expand All @@ -130,3 +172,28 @@ def test_sample_dataset_with_unbalanced_ds(unbalanced_dataset):
# has one label with more than `num_samples` entries and another label with just 1 row.
# We sample `num_samples` from the former, and 1 from the latter.
assert ds.num_rows == num_samples + 1


@pytest.mark.parametrize(
"dataset",
[
"emotion",
"ag_news",
"amazon_counterfactual_en",
"SentEval-CR",
"sst5",
"enron_spam",
"tweet_eval_stance_abortion",
"ade_corpus_v2_classification",
],
)
def test_get_augmented_samples(dataset: str):
dataset_dict = get_augmented_samples(dataset)
assert set(dataset_dict.keys()) == {"text", "label"}
assert len(dataset_dict["text"])
assert len(dataset_dict["label"])


def test_get_augmented_samples_negative():
with pytest.raises(ValueError):
get_augmented_samples(None)

0 comments on commit 35c0511

Please sign in to comment.