Skip to content

Commit f517108

Browse files
committed
partial work toward aggregating errors
1 parent 24ac6b6 commit f517108

File tree

7 files changed

+74
-29
lines changed

7 files changed

+74
-29
lines changed

eido/cli.py

+21-1
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,26 @@ def _parse_filter_args_str(input):
3333
else lst
3434
)
3535

36+
def print_error_summary(errors_by_type):
37+
""" Print a summary of errors, organized by error type """
38+
n_error_types = len(errors_by_type)
39+
print(f"Found {n_error_types} types of error:")
40+
for type in errors_by_type:
41+
n = len(errors_by_type[type])
42+
msg = f" - {type}: ({n} samples) "
43+
if n < 50:
44+
msg += ", ".join([x["sample_name"] for x in errors_by_type[type]])
45+
print(msg)
46+
47+
if len(errors_by_type) > 1:
48+
final_msg = f"Validation unsuccessful. {len(errors_by_type)} error types found."
49+
else:
50+
final_msg = f"Validation unsuccessful. {len(errors_by_type)} error type found."
51+
52+
print(final_msg)
53+
return final_msg
54+
55+
3656

3757
def main():
3858
"""Primary workflow"""
@@ -128,7 +148,7 @@ def main():
128148
try:
129149
validator(*arguments)
130150
except EidoValidationError as e:
131-
print(e)
151+
print_error_summary(e.errors_by_type)
132152
return False
133153
_LOGGER.info("Validation successful")
134154
sys.exit(0)

eido/exceptions.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,6 @@ def __init__(self, key):
4040
class EidoValidationError(EidoException):
4141
"""Object was not validated successfully according to schema."""
4242

43-
def __init__(self, message, errors):
43+
def __init__(self, message, errors_by_type):
4444
super().__init__(message)
45-
self.errors = errors
45+
self.errors_by_type = errors_by_type

eido/output_formatters.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -102,13 +102,14 @@ def _convert_sample_to_row(
102102
Representation of sample as a CSV row.
103103
"""
104104
sample_row = []
105+
105106
for attribute in sample_attributes:
106107
if MultilineOutputFormatter._sample_attribute_is_list(
107108
sample, attribute
108-
) and getattr(sample, attribute):
109-
value = getattr(sample, attribute)[sample_index]
109+
) and sample[attribute]:
110+
value = sample[attribute][sample_index]
110111
else:
111-
value = getattr(sample, attribute, "")
112+
value = sample[attribute]
112113

113114
sample_row.append(value or "")
114115

eido/validation.py

+15-21
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
_LOGGER = getLogger(__name__)
2424

2525

26-
def _validate_object(object, schema, exclude_case=False):
26+
def _validate_object(object, schema, exclude_case=False, sample_name_colname=False):
2727
"""
2828
Generic function to validate object against a schema
2929
@@ -34,37 +34,30 @@ def _validate_object(object, schema, exclude_case=False):
3434
"""
3535

3636
validator = Draft7Validator(schema)
37+
print(object,schema)
3738
if not validator.is_valid(object):
3839
errors = sorted(validator.iter_errors(object), key=lambda e: e.path)
3940
errors_by_type = {}
41+
4042
# Accumulate and restructure error objects by error type
4143
for error in errors:
4244
if not error.message in errors_by_type:
4345
errors_by_type[error.message] = []
46+
47+
try:
48+
instance_name = error.instance[sample_name_colname]
49+
except KeyError:
50+
instance_name = "unnamed"
4451
errors_by_type[error.message].append(
4552
{
4653
"type": error.message,
47-
"message": f"{error.message} on instance {error.instance['sample_name']}",
48-
"sample_name": error.instance['sample_name']
54+
"message": f"{error.message} on instance {instance_name}",
55+
"sample_name": instance_name
4956
})
50-
51-
# Print a summary of errors, organized by error type
52-
n_error_types = len(errors_by_type)
53-
print(f"Found {n_error_types} types of error:")
54-
for type in errors_by_type:
55-
n = len(errors_by_type[type])
56-
msg = f" - {type}: ({n} samples) "
57-
if n < 50:
58-
msg += ", ".join([x["sample_name"] for x in errors_by_type[type]])
59-
print(msg)
60-
61-
if len(errors) > 1:
62-
final_msg = f"Validation unsuccessful. {len(errors)} errors found."
63-
else:
64-
final_msg = f"Validation unsuccessful. {len(errors)} error found."
65-
66-
raise EidoValidationError(final_msg, errors)
67-
57+
58+
raise EidoValidationError("Validation failed", errors_by_type)
59+
else:
60+
_LOGGER.debug("Validation was successful...")
6861

6962
def validate_project(project, schema, exclude_case=False):
7063
"""
@@ -75,6 +68,7 @@ def validate_project(project, schema, exclude_case=False):
7568
:param bool exclude_case: whether to exclude validated objects
7669
from the error. Useful when used ith large projects
7770
"""
71+
sample_name_colname = project.sample_name_colname
7872
schema_dicts = read_schema(schema=schema)
7973
for schema_dict in schema_dicts:
8074
project_dict = project.to_dict()

requirements/requirements-all.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
jsonschema>=3.0.1
22
logmuse>=0.2.5
33
pandas
4-
peppy>=0.32.0
4+
peppy>=0.40.0
55
ubiquerg>=0.5.2

tests/conftest.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,12 @@ def output_pep_with_fasta_column(path_pep_with_fasta_column):
9090

9191
@pytest.fixture
9292
def taxprofiler_csv_multiline_output(path_to_taxprofiler_csv_multiline_output):
93-
return pd.read_csv(path_to_taxprofiler_csv_multiline_output).to_csv(
93+
with open(path_to_taxprofiler_csv_multiline_output, 'r') as file:
94+
data = file.read()
95+
return data
96+
# This is broken unless I add na_filter=False. But it's a bad idea anyway, since
97+
# we're just using this for string comparison anyway...
98+
return pd.read_csv(path_to_taxprofiler_csv_multiline_output, na_filter=False).to_csv(
9499
path_or_buf=None, index=None
95100
)
96101

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
description: test PEP schema
2+
type: object
3+
properties:
4+
dcc:
5+
type: object
6+
properties:
7+
compute_packages:
8+
type: object
9+
samples:
10+
type: array
11+
items:
12+
type: object
13+
properties:
14+
sample_name:
15+
type: string
16+
protocol:
17+
type: string
18+
genome:
19+
type: string
20+
invalid:
21+
type: string
22+
23+
required:
24+
- samples
25+
- invalid

0 commit comments

Comments
 (0)