Skip to content

Commit

Permalink
Use regex variant when checking schema validity
Browse files Browse the repository at this point in the history
Rather than relying directly on `validator_cls.check_schema`,
reimplement this functionality in-tree here to allow for
customizations to the validator class before it is run on the user's
schema.

A new test uses an invalid regex under the regress unicode engine
which is valid in the python engine to ensure that consistent checking
is applied.

Manual testing revealed that the `_fail()` message production for
SchemaError was showing error information twice, which is fixed
without a new test to guarantee the new behavior.
  • Loading branch information
sirosen committed Jan 8, 2025
1 parent e9aa64e commit cc12d98
Show file tree
Hide file tree
Showing 4 changed files with 47 additions and 10 deletions.
2 changes: 1 addition & 1 deletion src/check_jsonschema/checker.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def get_validator(
except SchemaParseError as e:
self._fail("Error: schemafile could not be parsed as JSON", e)
except jsonschema.SchemaError as e:
self._fail(f"Error: schemafile was not valid: {e}\n", e)
self._fail("Error: schemafile was not valid\n", e)
except UnsupportedUrlScheme as e:
self._fail(f"Error: {e}\n", e)
except Exception as e:
Expand Down
10 changes: 2 additions & 8 deletions src/check_jsonschema/regex_variants.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,10 +70,7 @@ def pattern_keyword(
if not validator.is_type(instance, "string"):
return

try:
regress_pattern = self._compile_pattern(pattern)
except regress.RegressError:
yield jsonschema.ValidationError(f"pattern {pattern!r} failed to compile")
regress_pattern = self._compile_pattern(pattern)
if not regress_pattern.find(instance):
yield jsonschema.ValidationError(f"{instance!r} does not match {pattern!r}")

Expand Down Expand Up @@ -120,10 +117,7 @@ def pattern_keyword(
if not validator.is_type(instance, "string"):
return

try:
re_pattern = re.compile(pattern)
except re.error:
yield jsonschema.ValidationError(f"pattern {pattern!r} failed to compile")
re_pattern = re.compile(pattern)
if not re_pattern.search(instance):
yield jsonschema.ValidationError(f"{instance!r} does not match {pattern!r}")

Expand Down
33 changes: 32 additions & 1 deletion src/check_jsonschema/schema_loader/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,12 @@ def _get_validator(
if self.validator_class is None:
# get the correct validator class and check the schema under its metaschema
validator_cls = jsonschema.validators.validator_for(schema)
validator_cls.check_schema(schema, format_checker=format_checker)
_check_schema(
validator_cls,
schema,
format_checker=format_checker,
regex_impl=regex_impl,
)
else:
# for a user-provided validator class, don't check_schema
# on the grounds that it might *not* be valid but the user wants to use
Expand All @@ -197,6 +202,32 @@ def _get_validator(
return t.cast(jsonschema.protocols.Validator, validator)


def _check_schema(
validator_cls: type[jsonschema.protocols.Validator],
schema: dict[str, t.Any],
*,
format_checker: jsonschema.FormatChecker | None,
regex_impl: RegexImplementation,
) -> None:
"""A variant definition of Validator.check_schema which uses the regex
implementation and format checker specified."""
schema_validator_cls = jsonschema.validators.validator_for(
validator_cls.META_SCHEMA, default=validator_cls
)
schema_validator_cls = _extend_with_pattern_implementation(
schema_validator_cls, regex_impl
)

if format_checker is None:
format_checker = schema_validator_cls.FORMAT_CHECKER

schema_validator = schema_validator_cls(
validator_cls.META_SCHEMA, format_checker=format_checker
)
for error in schema_validator.iter_errors(schema):
raise jsonschema.exceptions.SchemaError.create_from(error)


class BuiltinSchemaLoader(SchemaLoader):
def __init__(self, schema_name: str, *, base_uri: str | None = None) -> None:
self.schema_name = schema_name
Expand Down
12 changes: 12 additions & 0 deletions tests/acceptance/test_invalid_schema_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,15 @@ def test_checker_invalid_schemafile_scheme(run_line, tmp_path):
res = run_line(["check-jsonschema", "--schemafile", f"ftp://{foo}", str(bar)])
assert res.exit_code == 1
assert "only supports http, https" in res.stderr


def test_checker_invalid_schemafile_due_to_bad_regex(run_line, tmp_path):
foo = tmp_path / "foo.json"
bar = tmp_path / "bar.json"
# too many backslash escapes -- not a valid Unicode-mode regex
foo.write_text(r'{"properties": {"foo": {"pattern": "\\\\p{N}"}}}')
bar.write_text("{}")

res = run_line(["check-jsonschema", "--schemafile", str(foo), str(bar)])
assert res.exit_code == 1
assert "schemafile was not valid" in res.stderr

0 comments on commit cc12d98

Please sign in to comment.