Skip to content

Commit

Permalink
Merge pull request #148 from SciCatProject/validate-dataset
Browse files Browse the repository at this point in the history
Validate dataset
  • Loading branch information
jl-wynen authored Oct 2, 2023
2 parents 8536a83 + 329ea0d commit 88b3318
Show file tree
Hide file tree
Showing 4 changed files with 54 additions and 1 deletion.
26 changes: 25 additions & 1 deletion src/scitacean/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,7 +249,7 @@ def upload_new_dataset_now(self, dataset: Dataset) -> Dataset:
dataset = dataset.replace(
source_folder=self._expect_file_transfer().source_folder_for(dataset)
)
dataset.validate()
self.scicat.validate_dataset_model(dataset.make_upload_model())
# TODO skip if there are no files
with self._connect_for_file_upload(dataset) as con:
# TODO check if any remote file is out of date.
Expand Down Expand Up @@ -829,6 +829,30 @@ def create_attachment_for_dataset(
model.DownloadAttachment, _strict_validation=False, **uploaded
)

def validate_dataset_model(
self, dset: Union[model.UploadDerivedDataset, model.UploadRawDataset]
) -> None:
"""Validate a dataset in SciCat.
Parameters
----------
dset:
Model of the dataset to validate.
Raises
------
ValueError
If the dataset does not pass validation.
"""
response = self._call_endpoint(
cmd="post",
url="datasets/isValid",
data=dset,
operation="validate_dataset_model",
)
if not response["valid"]:
raise ValueError(f"Dataset {dset} did not pass validation in SciCat.")

def _send_to_scicat(
self, *, cmd: str, url: str, data: Optional[model.BaseModel] = None
) -> requests.Response:
Expand Down
8 changes: 8 additions & 0 deletions src/scitacean/testing/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,14 @@ def create_attachment_for_dataset(
self.main.attachments.setdefault(dataset_id, []).append(ingested)
return ingested

@_conditionally_disabled
def validate_dataset_model(
self, dset: Union[model.UploadDerivedDataset, model.UploadRawDataset]
):
"""Validate model remotely in SciCat."""
# Models were locally validated on construction, assume they are valid.
pass


def _model_dict(mod: model.BaseModel) -> Dict[str, Any]:
return {
Expand Down
7 changes: 7 additions & 0 deletions tests/client/dataset_client_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,13 @@ def test_create_dataset_model(scicat_client, derived_dataset):
assert expected == dict(downloaded)[key], f"key = {key}"


def test_validate_dataset_model(real_client, require_scicat_backend, derived_dataset):
real_client.scicat.validate_dataset_model(derived_dataset)
derived_dataset.contactEmail = "NotAnEmail"
with pytest.raises(ValueError):
real_client.scicat.validate_dataset_model(derived_dataset)


def test_get_dataset(client):
dset = INITIAL_DATASETS["raw"]
dblock = INITIAL_ORIG_DATABLOCKS["raw"][0]
Expand Down
14 changes: 14 additions & 0 deletions tests/upload_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,20 @@ def test_upload_cleans_up_files_if_dataset_ingestion_fails(dataset_with_files, f
assert not get_file_transfer(client).files


def test_upload_does_not_create_dataset_if_validation_fails(dataset_with_files, fs):
client = FakeClient(
disable={"validate_dataset_model": ValueError},
file_transfer=FakeFileTransfer(fs=fs),
)
with pytest.raises(ValueError):
client.upload_new_dataset_now(dataset_with_files)

assert not client.datasets
assert not client.orig_datablocks
assert not client.attachments
assert not get_file_transfer(client).files


def test_failed_datablock_upload_does_not_revert(dataset_with_files, fs):
client = FakeClient(
disable={"create_orig_datablock": ScicatCommError("Ingestion failed")},
Expand Down

0 comments on commit 88b3318

Please sign in to comment.