diff --git a/docs/contributing/contributing_setup.md b/docs/contributing/contributing_setup.md index 582ba5c0964f..b65e2fd3a74c 100644 --- a/docs/contributing/contributing_setup.md +++ b/docs/contributing/contributing_setup.md @@ -47,6 +47,32 @@ In order to contribute to Great Expectations, you will need the following: ### Install Python dependencies +#### (Easy version of steps 5-7 below for Mac/Linux users) + +Create a virtual environment in your locally cloned repo, use the same version of `pip` that we use in our CI/CD pipelines (for Python 3.6 - 3.9), and install the fewest dependencies needed for a dev environment (to minimize potential setup headaches). + +``` +python3 -m venv ge_dev + +source ge_dev/bin/activate + +pip install --upgrade pip==21.3.1 + +pip install -r requirements-dev-lite.txt -c constraints-dev.txt -e . +``` + +Confirm that tests are passing (only against pandas and sqlalchemy with sqlite), without the need for running any Docker containers. + +``` +ulimit -n 4096 + +pytest -v --no-spark --no-postgresql +``` + +> In your `~/.zshrc` or `~/.bashrc` file, you will want to add `ulimit -n 4096` so that it is already set for future runs. **You WILL eventually see many tests failing with `OSError: [Errno 24] Too many open files`** if you do not set it! + +Later on, try setting up the full dev environment (as mentioned in step 6) when you are ready for more robust testing of your custom Expectations! + #### 5. Create a new virtual environment * Make a new virtual environment (e.g. using virtualenv or conda), name it “great_expectations_dev” or similar. @@ -132,6 +158,8 @@ Depending on which features of Great Expectations you want to work on, you may w * Caution: If another service is using port 3306, Docker may start the container but silently fail to set up the port. +> If you have a Silicon Mac (M1) this Docker image does not work + #### If you want to develop against local Spark: * In most cases, `pip install requirements-dev.txt` should set up pyspark for you. diff --git a/great_expectations/self_check/util.py b/great_expectations/self_check/util.py index 9b06ca26e48c..6735784b4b2c 100644 --- a/great_expectations/self_check/util.py +++ b/great_expectations/self_check/util.py @@ -385,7 +385,7 @@ def get_dataset( return PandasDataset(df, profiler=profiler, caching=caching) elif dataset_type == "sqlite": - if not create_engine: + if not create_engine or not SQLITE_TYPES: return None engine = create_engine(get_sqlite_connection_url(sqlite_db_path=sqlite_db_path)) @@ -445,7 +445,7 @@ def get_dataset( ) elif dataset_type == "postgresql": - if not create_engine: + if not create_engine or not POSTGRESQL_TYPES: return None # Create a new database @@ -508,7 +508,7 @@ def get_dataset( ) elif dataset_type == "mysql": - if not create_engine: + if not create_engine or not MYSQL_TYPES: return None db_hostname = os.getenv("GE_TEST_LOCAL_DB_HOSTNAME", "localhost") @@ -596,7 +596,7 @@ def get_dataset( ) elif dataset_type == "mssql": - if not create_engine: + if not create_engine or not MSSQL_TYPES: return None db_hostname = os.getenv("GE_TEST_LOCAL_DB_HOSTNAME", "localhost") @@ -967,20 +967,34 @@ def build_sa_validator_with_data( sqlite_db_path=None, batch_definition: Optional[BatchDefinition] = None, ): - dialect_classes = { - "sqlite": sqlitetypes.dialect, - "postgresql": postgresqltypes.dialect, - "mysql": mysqltypes.dialect, - "mssql": mssqltypes.dialect, - "bigquery": sqla_bigquery.BigQueryDialect, - } - dialect_types = { - "sqlite": SQLITE_TYPES, - "postgresql": POSTGRESQL_TYPES, - "mysql": MYSQL_TYPES, - "mssql": MSSQL_TYPES, - "bigquery": BIGQUERY_TYPES, - } + dialect_classes = {} + dialect_types = {} + try: + dialect_classes["sqlite"] = sqlitetypes.dialect + dialect_types["sqlite"] = SQLITE_TYPES + except AttributeError: + pass + try: + dialect_classes["postgresql"] = postgresqltypes.dialect + dialect_types["postgresql"] = POSTGRESQL_TYPES + except AttributeError: + pass + try: + dialect_classes["mysql"] = mysqltypes.dialect + dialect_types["mysql"] = MYSQL_TYPES + except AttributeError: + pass + try: + dialect_classes["mssql"] = mssqltypes.dialect + dialect_types["mssql"] = MSSQL_TYPES + except AttributeError: + pass + try: + dialect_classes["bigquery"] = sqla_bigquery.BigQueryDialect + dialect_types["bigquery"] = BIGQUERY_TYPES + except AttributeError: + pass + db_hostname = os.getenv("GE_TEST_LOCAL_DB_HOSTNAME", "localhost") if sa_engine_name == "sqlite": engine = create_engine(get_sqlite_connection_url(sqlite_db_path)) diff --git a/requirements-dev-base.txt b/requirements-dev-base.txt index ac348a4ace87..60e742c7179d 100644 --- a/requirements-dev-base.txt +++ b/requirements-dev-base.txt @@ -14,7 +14,7 @@ boto3>=1.9 # all_tests feather-format>=0.4.1 # all_tests flake8==3.8.3 # lint -flask>=1.0.0 # for s3 test only +flask>=1.0.0 # for s3 test only (with moto) freezegun>=0.3.15 # all_tests gcsfs>=0.5.1 # all_tests google-cloud-secret-manager>=1.0.0 # all_tests diff --git a/requirements-dev-lite.txt b/requirements-dev-lite.txt new file mode 100644 index 000000000000..5cd9f30838e7 --- /dev/null +++ b/requirements-dev-lite.txt @@ -0,0 +1,17 @@ + +--requirement requirements.txt + +black==22.1.0 # lint +boto3>=1.9 # all_tests +flake8==3.8.3 # lint +flask>=1.0.0 # for s3 test only (with moto) +freezegun>=0.3.15 # all_tests +isort==5.4.2 # lint +moto>=1.3.7,<2.0.0 # all_tests +pyfakefs>=4.5.1 # all_tests +pytest-benchmark>=3.4.1 # performance tests +pytest>=5.3.5,<6.0.0 # all_tests +requirements-parser>=0.2.0 # all_tests +s3fs>=0.5.1 # all_tests +snapshottest==0.6.0 # GE Cloud atomic renderer tests +sqlalchemy>=1.3.18,<1.4.10 # sqlalchemy_tests diff --git a/tests/actions/test_core_actions.py b/tests/actions/test_core_actions.py index 2dbc86775ce3..ccece859302a 100644 --- a/tests/actions/test_core_actions.py +++ b/tests/actions/test_core_actions.py @@ -17,6 +17,7 @@ ExpectationSuiteIdentifier, ValidationResultIdentifier, ) +from great_expectations.util import is_library_loadable from great_expectations.validation_operators import ( CloudNotificationAction, EmailAction, @@ -228,6 +229,10 @@ def test_SlackNotificationAction( ) == {"slack_notification_result": "none required"} +@pytest.mark.skipif( + not is_library_loadable(library_name="pypd"), + reason="pypd is not installed", +) @mock.patch("pypd.EventV2") def test_PagerdutyAlertAction( data_context_parameterized_expectation_suite, diff --git a/tests/cli/test_sanitize_yaml_and_save_datasource.py b/tests/cli/test_sanitize_yaml_and_save_datasource.py index 1aded52ed688..02678c21c83d 100644 --- a/tests/cli/test_sanitize_yaml_and_save_datasource.py +++ b/tests/cli/test_sanitize_yaml_and_save_datasource.py @@ -1,6 +1,7 @@ import pytest from great_expectations.cli.datasource import sanitize_yaml_and_save_datasource +from great_expectations.util import is_library_loadable def test_sanitize_yaml_and_save_datasource_raises_error_on_empty_yaml( @@ -65,6 +66,10 @@ def test_sanitize_yaml_and_save_datasource_works_without_credentials( assert obs == {} +@pytest.mark.skipif( + not is_library_loadable(library_name="psycopg2"), + reason="psycopg2 is not installed", +) def test_sanitize_yaml_and_save_datasource_works_with_credentials( sa, empty_data_context, diff --git a/tests/cli/v012/test_init_missing_libraries.py b/tests/cli/v012/test_init_missing_libraries.py index 7a070b0e4111..1bc0e7bfb5a1 100644 --- a/tests/cli/v012/test_init_missing_libraries.py +++ b/tests/cli/v012/test_init_missing_libraries.py @@ -37,19 +37,23 @@ def _library_not_loaded_test( assert "Which database backend are you using" in stdout assert "Give your new Datasource a short name" in stdout assert ( - """Next, we will configure database credentials and store them in the `my_db` section - of this config file: great_expectations/uncommitted/config_variables.yml""" + "Next, we will configure database credentials and store them in the `my_db` section" in stdout ) assert ( - f"""Great Expectations relies on the library `{library_import_name}` to connect to your data, \ - but the package `{library_name}` containing this library is not installed. - Would you like Great Expectations to try to execute `pip install {library_name}` for you?""" + f"Great Expectations relies on the library `{library_import_name}` to connect to your data" + in stdout + ) + assert ( + f"but the package `{library_name}` containing this library is not installed" + in stdout + ) + assert ( + f"Would you like Great Expectations to try to execute `pip install {library_name}` for you?" in stdout ) assert ( - f"""\nOK, exiting now. - - Please execute `pip install {library_name}` before trying again.""" + f"Please execute `pip install {library_name}` before trying again." in stdout ) @@ -71,31 +75,25 @@ def _library_not_loaded_test( assert ( obs_tree == """\ - great_expectations/ - .gitignore - great_expectations.yml - checkpoints/ - expectations/ +great_expectations/ + .gitignore + great_expectations.yml + checkpoints/ + expectations/ + .ge_store_backend_id + plugins/ + custom_data_docs/ + renderers/ + styles/ + data_docs_custom_styles.css + views/ + profilers/ + uncommitted/ + config_variables.yml + data_docs/ + validations/ .ge_store_backend_id - notebooks/ - pandas/ - validation_playground.ipynb - spark/ - validation_playground.ipynb - sql/ - validation_playground.ipynb - plugins/ - custom_data_docs/ - renderers/ - styles/ - data_docs_custom_styles.css - views/ - uncommitted/ - config_variables.yml - data_docs/ - validations/ - .ge_store_backend_id - """ +""" ) assert_no_logging_messages_or_tracebacks(my_caplog, result) @@ -227,16 +225,18 @@ def test_cli_init_spark_without_library_installed_instructs_user( assert "What data would you like Great Expectations to connect to" in stdout assert "What are you processing your files with" in stdout assert ( - f"""Great Expectations relies on the library `pyspark` to connect to your data, \ - but the package `pyspark` containing this library is not installed. - Would you like Great Expectations to try to execute `pip install pyspark` for you?""" + f"Great Expectations relies on the library `pyspark` to connect to your data" in stdout ) assert ( - f"""\nOK, exiting now. - - Please execute `pip install pyspark` before trying again.""" + f"but the package `pyspark` containing this library is not installed." in stdout ) + assert ( + f"Would you like Great Expectations to try to execute `pip install pyspark` for you?" + in stdout + ) + assert f"Please execute `pip install pyspark` before trying again." in stdout # assert "Great Expectations relies on the library `pyspark`" in stdout # assert "Please `pip install pyspark` before trying again" in stdout @@ -258,31 +258,25 @@ def test_cli_init_spark_without_library_installed_instructs_user( assert ( obs_tree == """\ - great_expectations/ - .gitignore - great_expectations.yml - checkpoints/ - expectations/ +great_expectations/ + .gitignore + great_expectations.yml + checkpoints/ + expectations/ + .ge_store_backend_id + plugins/ + custom_data_docs/ + renderers/ + styles/ + data_docs_custom_styles.css + views/ + profilers/ + uncommitted/ + config_variables.yml + data_docs/ + validations/ .ge_store_backend_id - notebooks/ - pandas/ - validation_playground.ipynb - spark/ - validation_playground.ipynb - sql/ - validation_playground.ipynb - plugins/ - custom_data_docs/ - renderers/ - styles/ - data_docs_custom_styles.css - views/ - uncommitted/ - config_variables.yml - data_docs/ - validations/ - .ge_store_backend_id - """ +""" ) assert_no_logging_messages_or_tracebacks(caplog, result) diff --git a/tests/data_context/store/test_store_backends.py b/tests/data_context/store/test_store_backends.py index 7463828b250a..298ccd604552 100644 --- a/tests/data_context/store/test_store_backends.py +++ b/tests/data_context/store/test_store_backends.py @@ -34,7 +34,7 @@ from great_expectations.data_context.util import file_relative_path from great_expectations.exceptions import InvalidKeyError, StoreBackendError, StoreError from great_expectations.self_check.util import expectationSuiteSchema -from great_expectations.util import gen_directory_tree_str +from great_expectations.util import gen_directory_tree_str, is_library_loadable @pytest.fixture() @@ -1006,6 +1006,10 @@ def test_TupleS3StoreBackend_with_s3_put_options(): assert my_store.list_keys() == [(".ge_store_backend_id",), ("AAA",)] +@pytest.mark.skipif( + not is_library_loadable(library_name="google"), + reason="google is not installed", +) def test_TupleGCSStoreBackend_base_public_path(): """ What does this test and why? @@ -1053,6 +1057,10 @@ def test_TupleGCSStoreBackend_base_public_path(): ) +@pytest.mark.skipif( + not is_library_loadable(library_name="google"), + reason="google is not installed", +) def test_TupleGCSStoreBackend(): # pytest.importorskip("google-cloud-storage") """ diff --git a/tests/data_context/test_data_context.py b/tests/data_context/test_data_context.py index f6aa06991a37..50eaa483022f 100644 --- a/tests/data_context/test_data_context.py +++ b/tests/data_context/test_data_context.py @@ -41,6 +41,7 @@ from great_expectations.util import ( deep_filter_properties_iterable, gen_directory_tree_str, + is_library_loadable, ) from tests.test_utils import create_files_in_directory, safe_remove @@ -297,87 +298,89 @@ def test_list_datasources(data_context_parameterized_expectation_suite): }, ] - # Make sure passwords are masked in password or url fields - data_context_parameterized_expectation_suite.add_datasource( - "postgres_source_with_password", - initialize=False, - module_name="great_expectations.datasource", - class_name="SqlAlchemyDatasource", - credentials={ - "drivername": "postgresql", - "host": os.getenv("GE_TEST_LOCAL_DB_HOSTNAME", "localhost"), - "port": "65432", - "username": "username_str", - "password": "password_str", - "database": "database_str", - }, - ) - - data_context_parameterized_expectation_suite.add_datasource( - "postgres_source_with_password_in_url", - initialize=False, - module_name="great_expectations.datasource", - class_name="SqlAlchemyDatasource", - credentials={ - "url": "postgresql+psycopg2://username:password@host:65432/database", - }, - ) + if is_library_loadable(library_name="psycopg2"): - datasources = data_context_parameterized_expectation_suite.list_datasources() - - assert datasources == [ - { - "name": "mydatasource", - "class_name": "PandasDatasource", - "module_name": "great_expectations.datasource", - "data_asset_type": {"class_name": "PandasDataset"}, - "batch_kwargs_generators": { - "mygenerator": { - "base_directory": "../data", - "class_name": "SubdirReaderBatchKwargsGenerator", - "reader_options": {"engine": "python", "sep": None}, - } - }, - }, - { - "name": "second_pandas_source", - "class_name": "PandasDatasource", - "module_name": "great_expectations.datasource", - "data_asset_type": { - "class_name": "PandasDataset", - "module_name": "great_expectations.dataset", - }, - }, - { - "name": "postgres_source_with_password", - "class_name": "SqlAlchemyDatasource", - "module_name": "great_expectations.datasource", - "data_asset_type": { - "class_name": "SqlAlchemyDataset", - "module_name": "great_expectations.dataset", - }, - "credentials": { + # Make sure passwords are masked in password or url fields + data_context_parameterized_expectation_suite.add_datasource( + "postgres_source_with_password", + initialize=False, + module_name="great_expectations.datasource", + class_name="SqlAlchemyDatasource", + credentials={ "drivername": "postgresql", "host": os.getenv("GE_TEST_LOCAL_DB_HOSTNAME", "localhost"), "port": "65432", "username": "username_str", - "password": "***", + "password": "password_str", "database": "database_str", }, - }, - { - "name": "postgres_source_with_password_in_url", - "class_name": "SqlAlchemyDatasource", - "module_name": "great_expectations.datasource", - "data_asset_type": { - "class_name": "SqlAlchemyDataset", - "module_name": "great_expectations.dataset", + ) + + data_context_parameterized_expectation_suite.add_datasource( + "postgres_source_with_password_in_url", + initialize=False, + module_name="great_expectations.datasource", + class_name="SqlAlchemyDatasource", + credentials={ + "url": "postgresql+psycopg2://username:password@host:65432/database", }, - "credentials": { - "url": "postgresql+psycopg2://username:***@host:65432/database", + ) + + datasources = data_context_parameterized_expectation_suite.list_datasources() + + assert datasources == [ + { + "name": "mydatasource", + "class_name": "PandasDatasource", + "module_name": "great_expectations.datasource", + "data_asset_type": {"class_name": "PandasDataset"}, + "batch_kwargs_generators": { + "mygenerator": { + "base_directory": "../data", + "class_name": "SubdirReaderBatchKwargsGenerator", + "reader_options": {"engine": "python", "sep": None}, + } + }, }, - }, - ] + { + "name": "second_pandas_source", + "class_name": "PandasDatasource", + "module_name": "great_expectations.datasource", + "data_asset_type": { + "class_name": "PandasDataset", + "module_name": "great_expectations.dataset", + }, + }, + { + "name": "postgres_source_with_password", + "class_name": "SqlAlchemyDatasource", + "module_name": "great_expectations.datasource", + "data_asset_type": { + "class_name": "SqlAlchemyDataset", + "module_name": "great_expectations.dataset", + }, + "credentials": { + "drivername": "postgresql", + "host": os.getenv("GE_TEST_LOCAL_DB_HOSTNAME", "localhost"), + "port": "65432", + "username": "username_str", + "password": "***", + "database": "database_str", + }, + }, + { + "name": "postgres_source_with_password_in_url", + "class_name": "SqlAlchemyDatasource", + "module_name": "great_expectations.datasource", + "data_asset_type": { + "class_name": "SqlAlchemyDataset", + "module_name": "great_expectations.dataset", + }, + "credentials": { + "url": "postgresql+psycopg2://username:***@host:65432/database", + }, + }, + ] @freeze_time("09/26/2019 13:42:41") diff --git a/tests/data_context/test_data_context_utils.py b/tests/data_context/test_data_context_utils.py index 2412cd34b0c2..67420ea30d9c 100644 --- a/tests/data_context/test_data_context_utils.py +++ b/tests/data_context/test_data_context_utils.py @@ -8,6 +8,7 @@ from great_expectations.data_context.util import ( PasswordMasker, parse_substitution_variable, + secretmanager, substitute_value_from_aws_secrets_manager, substitute_value_from_azure_keyvault, substitute_value_from_gcp_secret_manager, @@ -55,15 +56,18 @@ def test_password_masker_mask_db_url(monkeypatch, tmp_path): This test uses database url examples from https://docs.sqlalchemy.org/en/14/core/engines.html#database-urls """ - # PostgreSQL + # PostgreSQL (if installed in test environment) # default db_hostname = os.getenv("GE_TEST_LOCAL_DB_HOSTNAME", "localhost") - assert ( - PasswordMasker.mask_db_url( - f"postgresql://scott:tiger@{db_hostname}:65432/mydatabase" + try: + assert ( + PasswordMasker.mask_db_url( + f"postgresql://scott:tiger@{db_hostname}:65432/mydatabase" + ) + == f"postgresql://scott:***@{db_hostname}:65432/mydatabase" ) - == f"postgresql://scott:***@{db_hostname}:65432/mydatabase" - ) + except ModuleNotFoundError: + pass assert ( PasswordMasker.mask_db_url( f"postgresql://scott:tiger@{db_hostname}:65432/mydatabase", @@ -79,13 +83,16 @@ def test_password_masker_mask_db_url(monkeypatch, tmp_path): == f"postgresql://scott:***@{db_hostname}/mydatabase" ) - # psycopg2 - assert ( - PasswordMasker.mask_db_url( - f"postgresql+psycopg2://scott:tiger@{db_hostname}:65432/mydatabase" + # psycopg2 (if installed in test environment) + try: + assert ( + PasswordMasker.mask_db_url( + f"postgresql+psycopg2://scott:tiger@{db_hostname}:65432/mydatabase" + ) + == f"postgresql+psycopg2://scott:***@{db_hostname}:65432/mydatabase" ) - == f"postgresql+psycopg2://scott:***@{db_hostname}:65432/mydatabase" - ) + except ModuleNotFoundError: + pass assert ( PasswordMasker.mask_db_url( f"postgresql+psycopg2://scott:tiger@{db_hostname}:65432/mydatabase", @@ -146,13 +153,16 @@ def test_password_masker_mask_db_url(monkeypatch, tmp_path): == f"mysql+mysqldb://scott:***@{db_hostname}:65432/foo" ) - # PyMySQL - assert ( - PasswordMasker.mask_db_url( - f"mysql+pymysql://scott:tiger@{db_hostname}:65432/foo" + # PyMySQL (if installed in test environment) + try: + assert ( + PasswordMasker.mask_db_url( + f"mysql+pymysql://scott:tiger@{db_hostname}:65432/foo" + ) + == f"mysql+pymysql://scott:***@{db_hostname}:65432/foo" ) - == f"mysql+pymysql://scott:***@{db_hostname}:65432/foo" - ) + except ModuleNotFoundError: + pass assert ( PasswordMasker.mask_db_url( f"mysql+pymysql://scott:tiger@{db_hostname}:65432/foo", use_urlparse=True @@ -192,11 +202,14 @@ def test_password_masker_mask_db_url(monkeypatch, tmp_path): ) # Microsoft SQL Server - # pyodbc - assert ( - PasswordMasker.mask_db_url("mssql+pyodbc://scott:tiger@mydsn") - == "mssql+pyodbc://scott:***@mydsn" - ) + # pyodbc (if installed in test environment) + try: + assert ( + PasswordMasker.mask_db_url("mssql+pyodbc://scott:tiger@mydsn") + == "mssql+pyodbc://scott:***@mydsn" + ) + except ModuleNotFoundError: + pass assert ( PasswordMasker.mask_db_url( "mssql+pyodbc://scott:tiger@mydsn", use_urlparse=True @@ -417,6 +430,10 @@ class Response: return response +@pytest.mark.skipif( + secretmanager is None, + reason="Could not import 'secretmanager' from google.cloud in data_context.util", +) @pytest.mark.parametrize( "input_value,secret_response,raises,expected", [ diff --git a/tests/dataset/test_sqlalchemydataset.py b/tests/dataset/test_sqlalchemydataset.py index febdee150a65..d8396f1de6e6 100644 --- a/tests/dataset/test_sqlalchemydataset.py +++ b/tests/dataset/test_sqlalchemydataset.py @@ -425,18 +425,30 @@ def pyathena_dataset(sa): return dataset +@pytest.mark.skipif( + not is_library_loadable(library_name="pyathena"), + reason="pyathena is not installed", +) def test_expect_column_values_to_be_of_type_string_dialect_pyathena(pyathena_dataset): assert pyathena_dataset.expect_column_values_to_be_of_type( "col", type_="STRINGTYPE" ).success +@pytest.mark.skipif( + not is_library_loadable(library_name="pyathena"), + reason="pyathena is not installed", +) def test_expect_column_values_to_be_in_type_list_pyathena(pyathena_dataset): assert pyathena_dataset.expect_column_values_to_be_in_type_list( "col", type_list=["STRINGTYPE", "BOOLEAN"] ).success +@pytest.mark.skipif( + not is_library_loadable(library_name="pyathena"), + reason="pyathena is not installed", +) def test_expect_column_values_to_match_like_pattern_pyathena(pyathena_dataset): assert pyathena_dataset.expect_column_values_to_match_like_pattern( "col", like_pattern="test%" diff --git a/tests/datasource/data_connector/test_configured_asset_gcs_data_connector.py b/tests/datasource/data_connector/test_configured_asset_gcs_data_connector.py index f43747b36b01..9cd36726b92f 100644 --- a/tests/datasource/data_connector/test_configured_asset_gcs_data_connector.py +++ b/tests/datasource/data_connector/test_configured_asset_gcs_data_connector.py @@ -13,6 +13,9 @@ ) from great_expectations.data_context.util import instantiate_class_from_config from great_expectations.datasource.data_connector import ConfiguredAssetGCSDataConnector +from great_expectations.datasource.data_connector.configured_asset_gcs_data_connector import ( + storage, +) from great_expectations.execution_engine import PandasExecutionEngine yaml = YAML() @@ -226,6 +229,10 @@ def expected_batch_definitions_sorted(): return expected +@pytest.mark.skipif( + storage is None, + reason="Could not import 'storage' from google.cloud in configured_asset_gcs_data_connector.py", +) @mock.patch( "great_expectations.datasource.data_connector.configured_asset_gcs_data_connector.list_gcs_keys", return_value=["alpha-1.csv", "alpha-2.csv", "alpha-3.csv"], @@ -255,6 +262,10 @@ def test_instantiation_without_args( assert my_data_connector.get_unmatched_data_references() == [] +@pytest.mark.skipif( + storage is None, + reason="Could not import 'storage' from google.cloud in configured_asset_gcs_data_connector.py", +) @mock.patch( "great_expectations.datasource.data_connector.configured_asset_gcs_data_connector.list_gcs_keys", return_value=["alpha-1.csv", "alpha-2.csv", "alpha-3.csv"], @@ -291,6 +302,10 @@ def test_instantiation_with_filename_arg( assert my_data_connector.get_unmatched_data_references() == [] +@pytest.mark.skipif( + storage is None, + reason="Could not import 'storage' from google.cloud in configured_asset_gcs_data_connector.py", +) @mock.patch( "great_expectations.datasource.data_connector.configured_asset_gcs_data_connector.list_gcs_keys", return_value=["alpha-1.csv", "alpha-2.csv", "alpha-3.csv"], @@ -327,6 +342,10 @@ def test_instantiation_with_info_arg( assert my_data_connector.get_unmatched_data_references() == [] +@pytest.mark.skipif( + storage is None, + reason="Could not import 'storage' from google.cloud in configured_asset_gcs_data_connector.py", +) @mock.patch( "great_expectations.core.usage_statistics.usage_statistics.UsageStatisticsHandler.emit" ) @@ -370,6 +389,10 @@ def test_instantiation_with_test_yaml_config( assert report_object == expected_config_dict +@pytest.mark.skipif( + storage is None, + reason="Could not import 'storage' from google.cloud in configured_asset_gcs_data_connector.py", +) @mock.patch( "great_expectations.core.usage_statistics.usage_statistics.UsageStatisticsHandler.emit" ) @@ -425,6 +448,10 @@ def test_instantiation_with_test_yaml_config_emits_proper_payload( assert mock_emit.call_args_list == expected_call_args_list +@pytest.mark.skipif( + storage is None, + reason="Could not import 'storage' from google.cloud in configured_asset_gcs_data_connector.py", +) @mock.patch( "great_expectations.core.usage_statistics.usage_statistics.UsageStatisticsHandler.emit" ) @@ -479,6 +506,10 @@ def test_instantiation_from_a_config_with_nonmatching_regex_creates_unmatched_re } +@pytest.mark.skipif( + storage is None, + reason="Could not import 'storage' from google.cloud in configured_asset_gcs_data_connector.py", +) @mock.patch( "great_expectations.core.usage_statistics.usage_statistics.UsageStatisticsHandler.emit" ) @@ -516,6 +547,10 @@ def test_get_batch_definition_list_from_batch_request_with_nonexistent_datasourc ) +@pytest.mark.skipif( + storage is None, + reason="Could not import 'storage' from google.cloud in configured_asset_gcs_data_connector.py", +) @mock.patch( "great_expectations.core.usage_statistics.usage_statistics.UsageStatisticsHandler.emit" ) @@ -575,6 +610,10 @@ def test_get_definition_list_from_batch_request_with_empty_args_raises_error( my_data_connector.get_batch_definition_list_from_batch_request() +@pytest.mark.skipif( + storage is None, + reason="Could not import 'storage' from google.cloud in configured_asset_gcs_data_connector.py", +) @mock.patch( "great_expectations.core.usage_statistics.usage_statistics.UsageStatisticsHandler.emit" ) @@ -624,6 +663,10 @@ def test_get_definition_list_from_batch_request_with_unnamed_data_asset_name_rai ) +@pytest.mark.skipif( + storage is None, + reason="Could not import 'storage' from google.cloud in configured_asset_gcs_data_connector.py", +) @mock.patch( "great_expectations.core.usage_statistics.usage_statistics.UsageStatisticsHandler.emit" ) @@ -698,6 +741,10 @@ def test_return_all_batch_definitions_unsorted_without_named_data_asset_name( assert unsorted_batch_definition_list == expected_batch_definitions_unsorted +@pytest.mark.skipif( + storage is None, + reason="Could not import 'storage' from google.cloud in configured_asset_gcs_data_connector.py", +) @mock.patch( "great_expectations.core.usage_statistics.usage_statistics.UsageStatisticsHandler.emit" ) @@ -772,6 +819,10 @@ def test_return_all_batch_definitions_unsorted_with_named_data_asset_name( assert unsorted_batch_definition_list == expected_batch_definitions_unsorted +@pytest.mark.skipif( + storage is None, + reason="Could not import 'storage' from google.cloud in configured_asset_gcs_data_connector.py", +) @mock.patch( "great_expectations.core.usage_statistics.usage_statistics.UsageStatisticsHandler.emit" ) @@ -857,6 +908,10 @@ def test_return_all_batch_definitions_basic_sorted( assert sorted_batch_definition_list == expected_batch_definitions_sorted +@pytest.mark.skipif( + storage is None, + reason="Could not import 'storage' from google.cloud in configured_asset_gcs_data_connector.py", +) @mock.patch( "great_expectations.core.usage_statistics.usage_statistics.UsageStatisticsHandler.emit" ) @@ -964,6 +1019,10 @@ def test_return_all_batch_definitions_returns_specified_partition( assert my_batch_definition == expected_batch_definition +@pytest.mark.skipif( + storage is None, + reason="Could not import 'storage' from google.cloud in configured_asset_gcs_data_connector.py", +) @mock.patch( "great_expectations.datasource.data_connector.configured_asset_gcs_data_connector.storage.Client" ) @@ -1049,6 +1108,10 @@ def test_return_all_batch_definitions_sorted_without_data_connector_query( assert sorted_batch_definition_list == expected_batch_definitions_sorted +@pytest.mark.skipif( + storage is None, + reason="Could not import 'storage' from google.cloud in configured_asset_gcs_data_connector.py", +) @mock.patch( "great_expectations.datasource.data_connector.configured_asset_gcs_data_connector.storage.Client" ) @@ -1118,6 +1181,10 @@ def test_return_all_batch_definitions_raises_error_due_to_sorter_that_does_not_m ) +@pytest.mark.skipif( + storage is None, + reason="Could not import 'storage' from google.cloud in configured_asset_gcs_data_connector.py", +) @mock.patch( "great_expectations.datasource.data_connector.configured_asset_gcs_data_connector.storage.Client" ) @@ -1183,6 +1250,10 @@ def test_return_all_batch_definitions_too_many_sorters( ) +@pytest.mark.skipif( + storage is None, + reason="Could not import 'storage' from google.cloud in configured_asset_gcs_data_connector.py", +) @mock.patch( "great_expectations.datasource.data_connector.configured_asset_gcs_data_connector.storage.Client" ) @@ -1314,6 +1385,10 @@ def test_example_with_explicit_data_asset_names( ) +@pytest.mark.skipif( + storage is None, + reason="Could not import 'storage' from google.cloud in configured_asset_gcs_data_connector.py", +) @mock.patch( "great_expectations.datasource.data_connector.configured_asset_gcs_data_connector.storage.Client" ) diff --git a/tests/datasource/data_connector/test_data_connector_util.py b/tests/datasource/data_connector/test_data_connector_util.py index dfc9c698a2af..222355637057 100644 --- a/tests/datasource/data_connector/test_data_connector_util.py +++ b/tests/datasource/data_connector/test_data_connector_util.py @@ -15,6 +15,7 @@ list_gcs_keys, map_batch_definition_to_data_reference_string_using_regex, map_data_reference_string_to_batch_definition_list_using_regex, + storage, ) @@ -501,6 +502,10 @@ def test_build_sorters_from_config_bad_config(): build_sorters_from_config(sorters_config) +@pytest.mark.skipif( + storage is None, + reason="Could not import 'storage' from google.cloud in datasource.data_connector.util", +) @mock.patch("great_expectations.datasource.data_connector.util.storage.Client") def test_list_gcs_keys_overwrites_delimiter(mock_gcs_conn): # Set defaults for ConfiguredAssetGCSDataConnector diff --git a/tests/datasource/data_connector/test_inferred_asset_gcs_data_connector.py b/tests/datasource/data_connector/test_inferred_asset_gcs_data_connector.py index 4a59df54dbbc..b2171cae4177 100644 --- a/tests/datasource/data_connector/test_inferred_asset_gcs_data_connector.py +++ b/tests/datasource/data_connector/test_inferred_asset_gcs_data_connector.py @@ -8,6 +8,9 @@ from great_expectations.core.batch import BatchDefinition, BatchRequest, IDDict from great_expectations.data_context.util import instantiate_class_from_config from great_expectations.datasource.data_connector import InferredAssetGCSDataConnector +from great_expectations.datasource.data_connector.inferred_asset_gcs_data_connector import ( + storage, +) from great_expectations.execution_engine import PandasExecutionEngine yaml = YAML() @@ -36,6 +39,10 @@ def expected_config_dict(): return config +@pytest.mark.skipif( + storage is None, + reason="Could not import 'storage' from google.cloud in inferred_asset_gcs_data_connector.py", +) @mock.patch( "great_expectations.datasource.data_connector.inferred_asset_gcs_data_connector.list_gcs_keys", return_value=[ @@ -69,6 +76,10 @@ def test_instantiation_without_args( assert my_data_connector.get_unmatched_data_references() == [] +@pytest.mark.skipif( + storage is None, + reason="Could not import 'storage' from google.cloud in inferred_asset_gcs_data_connector.py", +) @mock.patch( "great_expectations.datasource.data_connector.inferred_asset_gcs_data_connector.list_gcs_keys", return_value=[ @@ -108,6 +119,10 @@ def test_instantiation_with_filename_arg( assert my_data_connector.get_unmatched_data_references() == [] +@pytest.mark.skipif( + storage is None, + reason="Could not import 'storage' from google.cloud in inferred_asset_gcs_data_connector.py", +) @mock.patch( "great_expectations.datasource.data_connector.inferred_asset_gcs_data_connector.list_gcs_keys", return_value=[ @@ -147,6 +162,10 @@ def test_instantiation_with_info_arg( assert my_data_connector.get_unmatched_data_references() == [] +@pytest.mark.skipif( + storage is None, + reason="Could not import 'storage' from google.cloud in inferred_asset_gcs_data_connector.py", +) @mock.patch( "great_expectations.core.usage_statistics.usage_statistics.UsageStatisticsHandler.emit" ) @@ -188,6 +207,10 @@ def test_get_batch_definition_list_from_batch_request_with_nonexistent_datasourc ) +@pytest.mark.skipif( + storage is None, + reason="Could not import 'storage' from google.cloud in inferred_asset_gcs_data_connector.py", +) @mock.patch( "great_expectations.core.usage_statistics.usage_statistics.UsageStatisticsHandler.emit" ) @@ -234,6 +257,10 @@ def test_get_batch_definition_list_from_batch_request_with_unknown_data_connecto ) +@pytest.mark.skipif( + storage is None, + reason="Could not import 'storage' from google.cloud in inferred_asset_gcs_data_connector.py", +) @mock.patch( "great_expectations.core.usage_statistics.usage_statistics.UsageStatisticsHandler.emit" ) @@ -291,6 +318,10 @@ def test_simple_regex_example_with_implicit_data_asset_names_self_check( } +@pytest.mark.skipif( + storage is None, + reason="Could not import 'storage' from google.cloud in inferred_asset_gcs_data_connector.py", +) @mock.patch( "great_expectations.core.usage_statistics.usage_statistics.UsageStatisticsHandler.emit" ) @@ -377,6 +408,10 @@ def test_complex_regex_example_with_implicit_data_asset_names( ] +@pytest.mark.skipif( + storage is None, + reason="Could not import 'storage' from google.cloud in inferred_asset_gcs_data_connector.py", +) @mock.patch( "great_expectations.core.usage_statistics.usage_statistics.UsageStatisticsHandler.emit" ) @@ -427,6 +462,10 @@ def test_self_check(mock_gcs_conn, mock_list_keys, mock_emit): } +@pytest.mark.skipif( + storage is None, + reason="Could not import 'storage' from google.cloud in inferred_asset_gcs_data_connector.py", +) @mock.patch( "great_expectations.core.usage_statistics.usage_statistics.UsageStatisticsHandler.emit" ) @@ -498,6 +537,10 @@ def test_test_yaml_config( } +@pytest.mark.skipif( + storage is None, + reason="Could not import 'storage' from google.cloud in inferred_asset_gcs_data_connector.py", +) @mock.patch( "great_expectations.core.usage_statistics.usage_statistics.UsageStatisticsHandler.emit" ) @@ -561,6 +604,10 @@ def test_instantiation_with_test_yaml_config_emits_proper_payload( assert mock_emit.call_args_list == expected_call_args_list +@pytest.mark.skipif( + storage is None, + reason="Could not import 'storage' from google.cloud in inferred_asset_gcs_data_connector.py", +) @mock.patch( "great_expectations.core.usage_statistics.usage_statistics.UsageStatisticsHandler.emit" ) @@ -636,6 +683,10 @@ def test_yaml_config_excluding_non_regex_matching_files( } +@pytest.mark.skipif( + storage is None, + reason="Could not import 'storage' from google.cloud in inferred_asset_gcs_data_connector.py", +) @mock.patch( "great_expectations.core.usage_statistics.usage_statistics.UsageStatisticsHandler.emit" ) @@ -708,6 +759,10 @@ def test_nested_directory_data_asset_name_in_folder( } +@pytest.mark.skipif( + storage is None, + reason="Could not import 'storage' from google.cloud in inferred_asset_gcs_data_connector.py", +) @mock.patch( "great_expectations.core.usage_statistics.usage_statistics.UsageStatisticsHandler.emit" ) @@ -772,6 +827,10 @@ def test_redundant_information_in_naming_convention_random_hash( } +@pytest.mark.skipif( + storage is None, + reason="Could not import 'storage' from google.cloud in inferred_asset_gcs_data_connector.py", +) @mock.patch( "great_expectations.core.usage_statistics.usage_statistics.UsageStatisticsHandler.emit" ) @@ -835,6 +894,10 @@ def test_redundant_information_in_naming_convention_timestamp( } +@pytest.mark.skipif( + storage is None, + reason="Could not import 'storage' from google.cloud in inferred_asset_gcs_data_connector.py", +) @mock.patch( "great_expectations.core.usage_statistics.usage_statistics.UsageStatisticsHandler.emit" ) @@ -899,6 +962,10 @@ def test_redundant_information_in_naming_convention_bucket( } +@pytest.mark.skipif( + storage is None, + reason="Could not import 'storage' from google.cloud in inferred_asset_gcs_data_connector.py", +) @mock.patch( "great_expectations.core.usage_statistics.usage_statistics.UsageStatisticsHandler.emit" ) @@ -1023,6 +1090,10 @@ def test_redundant_information_in_naming_convention_bucket_sorted( assert expected == sorted_batch_definition_list +@pytest.mark.skipif( + storage is None, + reason="Could not import 'storage' from google.cloud in inferred_asset_gcs_data_connector.py", +) @mock.patch( "great_expectations.core.usage_statistics.usage_statistics.UsageStatisticsHandler.emit" ) @@ -1081,6 +1152,10 @@ def test_redundant_information_in_naming_convention_bucket_sorter_does_not_match ) +@pytest.mark.skipif( + storage is None, + reason="Could not import 'storage' from google.cloud in inferred_asset_gcs_data_connector.py", +) @mock.patch( "great_expectations.core.usage_statistics.usage_statistics.UsageStatisticsHandler.emit" ) @@ -1142,6 +1217,10 @@ def test_redundant_information_in_naming_convention_bucket_too_many_sorters( ) +@pytest.mark.skipif( + storage is None, + reason="Could not import 'storage' from google.cloud in inferred_asset_gcs_data_connector.py", +) @mock.patch( "great_expectations.datasource.data_connector.inferred_asset_gcs_data_connector.storage.Client" ) diff --git a/tests/datasource/test_new_datasource_with_sql_data_connector.py b/tests/datasource/test_new_datasource_with_sql_data_connector.py index a95490d72166..f100e8c51a6f 100644 --- a/tests/datasource/test_new_datasource_with_sql_data_connector.py +++ b/tests/datasource/test_new_datasource_with_sql_data_connector.py @@ -29,6 +29,14 @@ except ImportError: sqlalchemy = None +try: + import sqlalchemy_bigquery as sqla_bigquery +except ImportError: + try: + import pybigquery.sqlalchemy_bigquery as sqla_bigquery + except ImportError: + sqla_bigquery = None + yaml = YAML() @@ -502,6 +510,10 @@ def test_SimpleSqlalchemyDatasource(empty_data_context): # Here we should test getting another batch +@pytest.mark.skipif( + sqla_bigquery is None, + reason="sqlalchemy_bigquery/pybigquery is not installed", +) def test_basic_instantiation_with_bigquery_creds(sa, empty_data_context): context = empty_data_context my_data_source = instantiate_class_from_config( diff --git a/tests/datasource/test_pandas_datasource.py b/tests/datasource/test_pandas_datasource.py index 5ecca81dff5d..fa4bb069a66a 100644 --- a/tests/datasource/test_pandas_datasource.py +++ b/tests/datasource/test_pandas_datasource.py @@ -17,6 +17,7 @@ from great_expectations.datasource import PandasDatasource from great_expectations.datasource.types import PathBatchKwargs from great_expectations.exceptions import BatchKwargsError +from great_expectations.util import is_library_loadable from great_expectations.validator.validator import BridgeValidator, Validator yaml = YAML() @@ -267,6 +268,11 @@ def test_pandas_source_read_csv( assert "😁" in list(batch["Μ"]) +@pytest.mark.skipif( + not is_library_loadable(library_name="pyarrow") + and not is_library_loadable(library_name="fastparquet"), + reason="pyarrow and fastparquet are not installed", +) @mock_s3 def test_s3_pandas_source_read_parquet( data_context_parameterized_expectation_suite, tmp_path_factory diff --git a/tests/datasource/test_sparkdf_datasource.py b/tests/datasource/test_sparkdf_datasource.py index 00d85012a357..d3d6fc168fb9 100644 --- a/tests/datasource/test_sparkdf_datasource.py +++ b/tests/datasource/test_sparkdf_datasource.py @@ -251,6 +251,11 @@ def test_create_sparkdf_datasource( assert " header: false\n" in lines +@pytest.mark.skipif( + not is_library_loadable(library_name="pyarrow") + and not is_library_loadable(library_name="fastparquet"), + reason="pyarrow and fastparquet are not installed", +) def test_standalone_spark_parquet_datasource( test_parquet_folder_connection_path, spark_session ): diff --git a/tests/execution_engine/test_pandas_execution_engine.py b/tests/execution_engine/test_pandas_execution_engine.py index d7a581f7a5a8..4eb7a5ba4c98 100644 --- a/tests/execution_engine/test_pandas_execution_engine.py +++ b/tests/execution_engine/test_pandas_execution_engine.py @@ -33,7 +33,9 @@ ) from great_expectations.execution_engine.pandas_execution_engine import ( PandasExecutionEngine, + storage, ) +from great_expectations.util import is_library_loadable from great_expectations.validator.metric_configuration import MetricConfiguration from tests.expectations.test_util import get_table_columns_metric @@ -718,6 +720,11 @@ def test_get_batch_s3_compressed_files(test_s3_files_compressed, test_df_small): assert df.dataframe.shape == test_df_small.shape +@pytest.mark.skipif( + not is_library_loadable(library_name="pyarrow") + and not is_library_loadable(library_name="fastparquet"), + reason="pyarrow and fastparquet are not installed", +) def test_get_batch_s3_parquet(test_s3_files_parquet, test_df_small): bucket, keys = test_s3_files_parquet path = [key for key in keys if key.endswith(".parquet")][0] @@ -1031,6 +1038,10 @@ def test_get_batch_with_no_azure_configured(azure_batch_spec): execution_engine_no_azure.get_batch_data(batch_spec=azure_batch_spec) +@pytest.mark.skipif( + storage is None, + reason="Could not import 'storage' from google.cloud in pandas_execution_engine.py", +) @mock.patch( "great_expectations.execution_engine.pandas_execution_engine.service_account", ) @@ -1051,6 +1062,10 @@ def test_constructor_with_gcs_options(mock_gcs_conn, mock_auth_method): assert "filename" in engine.config.get("gcs_options") +@pytest.mark.skipif( + storage is None, + reason="Could not import 'storage' from google.cloud in pandas_execution_engine.py", +) @mock.patch( "great_expectations.execution_engine.pandas_execution_engine.storage.Client", ) @@ -1085,6 +1100,10 @@ def test_get_batch_data_with_gcs_batch_spec_no_credentials(gcs_batch_spec, monke PandasExecutionEngine().get_batch_data(batch_spec=gcs_batch_spec) +@pytest.mark.skipif( + storage is None, + reason="Could not import 'storage' from google.cloud in pandas_execution_engine.py", +) def test_get_batch_with_gcs_misconfigured(gcs_batch_spec): # gcs_batchspec point to data that the ExecutionEngine does not have access to execution_engine_no_gcs = PandasExecutionEngine() diff --git a/tests/expectations/core/test_expect_column_values_to_be_in_type_list.py b/tests/expectations/core/test_expect_column_values_to_be_in_type_list.py index b050abca31c8..6cbd3c9659e7 100644 --- a/tests/expectations/core/test_expect_column_values_to_be_in_type_list.py +++ b/tests/expectations/core/test_expect_column_values_to_be_in_type_list.py @@ -8,8 +8,13 @@ build_pandas_validator_with_data, build_sa_validator_with_data, ) +from great_expectations.util import is_library_loadable +@pytest.mark.skipif( + not is_library_loadable(library_name="pyathena"), + reason="pyathena is not installed", +) def test_expect_column_values_to_be_in_type_list_dialect_pyathena(sa): from pyathena import sqlalchemy_athena diff --git a/tests/expectations/core/test_expect_column_values_to_be_of_type.py b/tests/expectations/core/test_expect_column_values_to_be_of_type.py index 62927f7b951a..907673e7c78c 100644 --- a/tests/expectations/core/test_expect_column_values_to_be_of_type.py +++ b/tests/expectations/core/test_expect_column_values_to_be_of_type.py @@ -1,11 +1,17 @@ import pandas as pd +import pytest from great_expectations.core.expectation_validation_result import ( ExpectationValidationResult, ) from great_expectations.self_check.util import build_sa_validator_with_data +from great_expectations.util import is_library_loadable +@pytest.mark.skipif( + not is_library_loadable(library_name="pyathena"), + reason="pyathena is not installed", +) def test_expect_column_values_to_be_of_type_string_dialect_pyathena(sa): from pyathena import sqlalchemy_athena diff --git a/tests/profile/test_user_configurable_profiler_v3_batch_request.py b/tests/profile/test_user_configurable_profiler_v3_batch_request.py index 0b5996884fea..ba41e87b635a 100644 --- a/tests/profile/test_user_configurable_profiler_v3_batch_request.py +++ b/tests/profile/test_user_configurable_profiler_v3_batch_request.py @@ -118,7 +118,7 @@ def get_sqlalchemy_runtime_validator_postgresql( engine = connection_manager.get_engine( f"postgresql://postgres@{db_hostname}/test_ci" ) - except sqlalchemy.exc.OperationalError: + except (sqlalchemy.exc.OperationalError, ModuleNotFoundError): return None sql_dtypes = {} diff --git a/tests/test_great_expectations.py b/tests/test_great_expectations.py index 0c352e13ad8b..7e24769f7da0 100644 --- a/tests/test_great_expectations.py +++ b/tests/test_great_expectations.py @@ -1064,6 +1064,10 @@ def test_read_table(self): assert df["Name"][0] == "Allen, Miss Elisabeth Walton" assert isinstance(df, PandasDataset) + @pytest.mark.skipif( + not is_library_loadable(library_name="pyarrow"), + reason="pyarrow is not installed", + ) def test_read_feather(self): pandas_version = re.match(r"(\d+)\.(\d+)\..+", pd.__version__) if pandas_version is None: @@ -1079,6 +1083,11 @@ def test_read_feather(self): assert df["Name"][0] == "Allen, Miss Elisabeth Walton" assert isinstance(df, PandasDataset) + @pytest.mark.skipif( + not is_library_loadable(library_name="pyarrow") + and not is_library_loadable(library_name="fastparquet"), + reason="pyarrow and fastparquet are not installed", + ) def test_read_parquet(self): """ This test is unusual, because on travis (but only on travis), we have observed problems importing pyarrow,