-
Notifications
You must be signed in to change notification settings - Fork 1.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[FEATURE] Add parsing of Expectation diagnostics to contrib packaging JSON object #4114
Changes from 73 commits
843ed94
d8dca58
b2899d4
f549285
8dc1775
3d5d815
7c62428
e0ca6ad
232d07d
8ff8812
2f7e1a5
e443a16
150a858
4ef74e2
214b32a
95ad012
5bf7916
7439e3b
8231c67
f9c1edc
dc8e996
259bd6a
e6ef081
c3647bf
85c11be
57a748f
a824773
fcd2234
0cfc069
568a4a1
b66bfad
846eb75
cb5d6a1
9b7127b
0df786e
05d87cd
1478978
243ab37
016e1bf
1a132e9
d9e08b5
debd6c1
997b93a
11f9e0b
29daafe
03514f1
c1ee85d
713efff
3a5299e
f6bb8fb
6993a84
6b4adf0
f6ddb0a
a80f26a
9d5b9ba
5c92972
4428ab1
2784ff8
991872c
3bb9fe2
089d7bb
061f864
824723f
0a63e6b
c7f37ef
a1a0145
53285fd
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,7 +6,7 @@ | |
init_cmd, | ||
publish_cmd, | ||
read_package_from_file, | ||
write_package_to_disk, | ||
sync_package, | ||
) | ||
from great_expectations_contrib.package import GreatExpectationsContribPackageManifest | ||
|
||
|
@@ -39,16 +39,20 @@ def init() -> None: | |
@click.pass_obj | ||
def publish(pkg: GreatExpectationsContribPackageManifest) -> None: | ||
publish_cmd() | ||
pkg.update_package_state() | ||
write_package_to_disk(pkg, PACKAGE_PATH) | ||
sync_package(pkg, PACKAGE_PATH) | ||
|
||
|
||
@cli.command(help="Check your package to make sure it's met all the requirements") | ||
@click.pass_obj | ||
def check(pkg: GreatExpectationsContribPackageManifest) -> None: | ||
check_cmd() | ||
pkg.update_package_state() | ||
write_package_to_disk(pkg, PACKAGE_PATH) | ||
sync_package(pkg, PACKAGE_PATH) | ||
|
||
|
||
@cli.command(help="Manually sync your package state") | ||
@click.pass_obj | ||
def sync(pkg: GreatExpectationsContribPackageManifest) -> None: | ||
sync_package(pkg, PACKAGE_PATH) | ||
Comment on lines
+52
to
+55
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is the hook that is called after each CLI invocation to update the underlying JSON object. I've exposed the functionality in a command to aid with debugging. |
||
|
||
|
||
if __name__ == "__main__": | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,89 +5,92 @@ | |
import sys | ||
from dataclasses import dataclass | ||
from enum import Enum | ||
from typing import Any, Dict, List, Optional, Type | ||
from typing import Any, List, Optional, Type | ||
|
||
import pkg_resources | ||
|
||
from great_expectations.core.expectation_diagnostics.expectation_diagnostics import ( | ||
ExpectationDiagnostics, | ||
) | ||
from great_expectations.expectations.expectation import Expectation | ||
from great_expectations.types import SerializableDictDot | ||
|
||
logger = logging.getLogger(__name__) | ||
logger.setLevel(logging.INFO) | ||
|
||
# Type alias that will need to be updated to reflect the complex nature of the 'run_diagnostics' return object | ||
Diagnostics = Dict[str, Any] | ||
|
||
|
||
@dataclass(frozen=True) | ||
class ExpectationCompletenessCheck: | ||
@dataclass | ||
class ExpectationCompletenessCheck(SerializableDictDot): | ||
Comment on lines
+25
to
+26
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
message: str | ||
passed: bool | ||
|
||
|
||
@dataclass(frozen=True) | ||
class ExpectationCompletenessChecklist: | ||
@dataclass | ||
class ExpectationCompletenessChecklist(SerializableDictDot): | ||
experimental: List[ExpectationCompletenessCheck] | ||
beta: List[ExpectationCompletenessCheck] | ||
production: List[ExpectationCompletenessCheck] | ||
|
||
|
||
@dataclass(frozen=True) | ||
class PackageCompletenessStatus: | ||
@dataclass | ||
class PackageCompletenessStatus(SerializableDictDot): | ||
concept_only: int | ||
experimental: int | ||
beta: int | ||
production: int | ||
total: int | ||
|
||
|
||
@dataclass(frozen=True) | ||
class RenderedExpectation: | ||
@dataclass | ||
class RenderedExpectation(SerializableDictDot): | ||
name: str | ||
tags: List[str] | ||
supported: List[str] | ||
status: ExpectationCompletenessChecklist | ||
|
||
|
||
@dataclass(frozen=True) | ||
class Dependency: | ||
@dataclass | ||
class Dependency(SerializableDictDot): | ||
text: str | ||
link: str | ||
version: Optional[str] | ||
version: Optional[str] = None | ||
|
||
|
||
@dataclass(frozen=True) | ||
class GitHubUser: | ||
@dataclass | ||
class GitHubUser(SerializableDictDot): | ||
username: str | ||
full_name: Optional[str] | ||
full_name: Optional[str] = None | ||
|
||
|
||
class SocialLinkType(Enum): | ||
class SocialLinkType(str, Enum): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. By inheriting |
||
TWITTER = "TWITTER" | ||
INSTAGRAM = "INSTAGRAM" | ||
LINKEDIN = "LINKEDIN" | ||
MEDIUM = "MEDIUM" | ||
|
||
|
||
@dataclass(frozen=True) | ||
class SocialLink: | ||
@dataclass | ||
class SocialLink(SerializableDictDot): | ||
account_type: SocialLinkType | ||
identifier: str | ||
|
||
|
||
@dataclass(frozen=True) | ||
class DomainExpert: | ||
@dataclass | ||
class DomainExpert(SerializableDictDot): | ||
full_name: str | ||
social_links: List[SocialLink] | ||
picture: str | ||
|
||
|
||
class Maturity(Enum): | ||
class Maturity(str, Enum): | ||
CONCEPT_ONLY = "CONCEPT_ONLY" | ||
EXPERIMENTAL = "EXPERIMENTAL" | ||
BETA = "BETA" | ||
PRODUCTION = "PRODUCTION" | ||
|
||
|
||
@dataclass(frozen=True) | ||
class GreatExpectationsContribPackageManifest: | ||
@dataclass | ||
class GreatExpectationsContribPackageManifest(SerializableDictDot): | ||
# Core | ||
package_name: Optional[str] = None | ||
icon: Optional[str] = None | ||
|
@@ -110,22 +113,97 @@ def update_package_state(self) -> None: | |
""" | ||
Parses diagnostic reports from package Expectations and uses them to update JSON state | ||
""" | ||
diagnostics = self._retrieve_package_expectations_diagnostics() | ||
diagnostics = ( | ||
GreatExpectationsContribPackageManifest.retrieve_package_expectations_diagnostics() | ||
) | ||
self._update_attrs_with_diagnostics(diagnostics) | ||
|
||
def _update_attrs_with_diagnostics(self, diagnostics: List[Diagnostics]) -> None: | ||
# TODO: Write logic to assign values to attrs | ||
# This is a black box for now | ||
# for diagnostic in diagnostics: | ||
# pass | ||
raise NotImplementedError | ||
|
||
def _retrieve_package_expectations_diagnostics(self) -> List[Diagnostics]: | ||
def _update_attrs_with_diagnostics( | ||
self, diagnostics: List[ExpectationDiagnostics] | ||
) -> None: | ||
self._update_expectations(diagnostics) | ||
self._update_dependencies("requirements.txt") | ||
self._update_contributors(diagnostics) | ||
|
||
def _update_expectations(self, diagnostics: List[ExpectationDiagnostics]) -> None: | ||
expectations = [] | ||
status = {maturity.name: 0 for maturity in Maturity} | ||
|
||
for diagnostic in diagnostics: | ||
expectation = RenderedExpectation( | ||
name=diagnostic.description.snake_name, | ||
tags=diagnostic.library_metadata.tags, | ||
supported=[], | ||
status=diagnostic.maturity_checklist, # Should be converted to the proper type | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @abegong it seems as though |
||
) | ||
expectations.append(expectation) | ||
|
||
expectation_maturity = diagnostic.library_metadata.maturity | ||
status[expectation_maturity] += 1 | ||
|
||
self.expectations = expectations | ||
self.expectation_count = len(expectations) | ||
|
||
# Enum is all caps but status attributes are lowercase | ||
lowercase_status = {k.lower(): v for k, v in status.items()} | ||
lowercase_status["total"] = sum(status.values()) | ||
|
||
self.status = PackageCompletenessStatus(**lowercase_status) | ||
maturity = max(status, key=status.get) | ||
self.maturity = Maturity[maturity] | ||
|
||
def _update_dependencies(self, path: str) -> None: | ||
if not os.path.exists(path): | ||
logger.warning(f"Could not find requirements file {path}") | ||
self.dependencies = [] | ||
return | ||
|
||
with open(path) as f: | ||
requirements = [req for req in pkg_resources.parse_requirements(f)] | ||
|
||
def _convert_to_dependency( | ||
requirement: pkg_resources.Requirement, | ||
) -> Dependency: | ||
name = requirement.project_name | ||
pypi_url = f"https://pypi.org/project/{name}" | ||
if requirement.specs: | ||
# Stringify tuple of pins | ||
version = ", ".join( | ||
"".join(symbol for symbol in pin) | ||
for pin in sorted(requirement.specs) | ||
) | ||
else: | ||
version = None | ||
return Dependency(text=name, link=pypi_url, version=version) | ||
|
||
dependencies = list(map(_convert_to_dependency, requirements)) | ||
self.dependencies = dependencies | ||
|
||
def _update_contributors(self, diagnostics: List[ExpectationDiagnostics]) -> None: | ||
contributors = [] | ||
for diagnostic in diagnostics: | ||
for contributor in diagnostic.library_metadata.contributors: | ||
github_user = GitHubUser(contributor) | ||
if github_user not in contributors: | ||
contributors.append(github_user) | ||
|
||
self.contributors = contributors | ||
|
||
@staticmethod | ||
def retrieve_package_expectations_diagnostics() -> List[ExpectationDiagnostics]: | ||
try: | ||
package = self._identify_user_package() | ||
expectations_module = self._import_expectations_module(package) | ||
expectations = self._retrieve_expectations_from_module(expectations_module) | ||
diagnostics = self._gather_diagnostics(expectations) | ||
package = GreatExpectationsContribPackageManifest._identify_user_package() | ||
expectations_module = ( | ||
GreatExpectationsContribPackageManifest._import_expectations_module( | ||
package | ||
) | ||
) | ||
expectations = GreatExpectationsContribPackageManifest._retrieve_expectations_from_module( | ||
expectations_module | ||
) | ||
diagnostics = GreatExpectationsContribPackageManifest._gather_diagnostics( | ||
expectations | ||
) | ||
Comment on lines
+232
to
+243
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Made these all static since they do not use or modify state. |
||
return diagnostics | ||
except Exception as e: | ||
# Exceptions should not break the CLI - this behavior should be working in the background | ||
|
@@ -135,7 +213,8 @@ def _retrieve_package_expectations_diagnostics(self) -> List[Diagnostics]: | |
) | ||
return [] | ||
|
||
def _identify_user_package(self) -> str: | ||
@staticmethod | ||
def _identify_user_package() -> str: | ||
# Guaranteed to have a dir named '<MY_PACKAGE>_expectations' through Cookiecutter validation | ||
packages = [ | ||
d for d in os.listdir() if os.path.isdir(d) and d.endswith("_expectations") | ||
|
@@ -149,7 +228,8 @@ def _identify_user_package(self) -> str: | |
|
||
return packages[0] | ||
|
||
def _import_expectations_module(self, package: str) -> Any: | ||
@staticmethod | ||
def _import_expectations_module(package: str) -> Any: | ||
# Need to add user's project to the PYTHONPATH | ||
cwd = os.getcwd() | ||
sys.path.append(cwd) | ||
|
@@ -159,8 +239,9 @@ def _import_expectations_module(self, package: str) -> Any: | |
except ModuleNotFoundError: | ||
raise | ||
|
||
@staticmethod | ||
def _retrieve_expectations_from_module( | ||
self, expectations_module: Any | ||
expectations_module: Any, | ||
) -> List[Type[Expectation]]: | ||
expectations: List[Type[Expectation]] = [] | ||
names: List[str] = [] | ||
|
@@ -172,9 +253,10 @@ def _retrieve_expectations_from_module( | |
logger.info(f"Found {len(names)} expectation(s): {names}") | ||
return expectations | ||
|
||
@staticmethod | ||
def _gather_diagnostics( | ||
self, expectations: List[Type[Expectation]] | ||
) -> List[Diagnostics]: | ||
expectations: List[Type[Expectation]], | ||
) -> List[ExpectationDiagnostics]: | ||
diagnostics_list = [] | ||
for expectation in expectations: | ||
instance = expectation() | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do you want to call
sync_package
here or justcheck_cmd
?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The general idea is that the
sync
action is what does the updating of the attributes (based on the state of the package). While you can invokesync
on its own, it's meant to be a hook that automatically invokes at the end of a given user action.This keeps the underlying JSON object up to date as the user iterates on their package. Perhaps it's not entirely necessary since we have the CI/CD script to parse these files but I think it's okay for now.