great-expectations · kenwade4 · Mar 1, 2022 · Feb 24, 2022 · Feb 24, 2022 · Feb 24, 2022
diff --git a/docs/contributing/contributing_setup.md b/docs/contributing/contributing_setup.md
@@ -47,6 +47,32 @@ In order to contribute to Great Expectations, you will need the following:
 
 ### Install Python dependencies
 
+#### (Easy version of steps 5-7 below for Mac/Linux users)
+
+Create a virtual environment in your locally cloned repo, use the same version of `pip` that we use in our CI/CD pipelines (for Python 3.6 - 3.9), and install the fewest dependencies needed for a dev environment (to minimize potential setup headaches).
+
+```
+python3 -m venv ge_dev
+
+source ge_dev/bin/activate
+
+pip install --upgrade pip==21.3.1
+
+pip install -r requirements-dev-lite.txt -c constraints-dev.txt -e .
+```
+
+Confirm that tests are passing (only against pandas and sqlalchemy with sqlite), without the need for running any Docker containers.
+
+```
+ulimit -n 4096
+
+pytest -v --no-spark --no-postgresql
+```
+
+> In your `~/.zshrc` or `~/.bashrc` file, you will want to add `ulimit -n 4096` so that it is already set for future runs. **You WILL eventually see many tests failing with `OSError: [Errno 24] Too many open files`** if you do not set it!
+
+Later on, try setting up the full dev environment (as mentioned in step 6) when you are ready for more robust testing of your custom Expectations!
+
 #### 5. Create a new virtual environment
 
 * Make a new virtual environment (e.g. using virtualenv or conda), name it “great_expectations_dev” or similar.
@@ -132,6 +158,8 @@ Depending on which features of Great Expectations you want to work on, you may w
 
 * Caution: If another service is using port 3306, Docker may start the container but silently fail to set up the port.
 
+> If you have a Silicon Mac (M1) this Docker image does not work
+
 #### If you want to develop against local Spark:
 
 * In most cases, `pip install requirements-dev.txt` should set up pyspark for you.

diff --git a/great_expectations/self_check/util.py b/great_expectations/self_check/util.py
@@ -385,7 +385,7 @@ def get_dataset(
         return PandasDataset(df, profiler=profiler, caching=caching)
 
     elif dataset_type == "sqlite":
-        if not create_engine:
+        if not create_engine or not SQLITE_TYPES:
             return None
 
         engine = create_engine(get_sqlite_connection_url(sqlite_db_path=sqlite_db_path))
@@ -445,7 +445,7 @@ def get_dataset(
         )
 
     elif dataset_type == "postgresql":
-        if not create_engine:
+        if not create_engine or not POSTGRESQL_TYPES:
             return None
 
         # Create a new database
@@ -508,7 +508,7 @@ def get_dataset(
         )
 
     elif dataset_type == "mysql":
-        if not create_engine:
+        if not create_engine or not MYSQL_TYPES:
             return None
 
         db_hostname = os.getenv("GE_TEST_LOCAL_DB_HOSTNAME", "localhost")
@@ -596,7 +596,7 @@ def get_dataset(
         )
 
     elif dataset_type == "mssql":
-        if not create_engine:
+        if not create_engine or not MSSQL_TYPES:
             return None
 
         db_hostname = os.getenv("GE_TEST_LOCAL_DB_HOSTNAME", "localhost")
@@ -967,20 +967,34 @@ def build_sa_validator_with_data(
     sqlite_db_path=None,
     batch_definition: Optional[BatchDefinition] = None,
 ):
-    dialect_classes = {
-        "sqlite": sqlitetypes.dialect,
-        "postgresql": postgresqltypes.dialect,
-        "mysql": mysqltypes.dialect,
-        "mssql": mssqltypes.dialect,
-        "bigquery": sqla_bigquery.BigQueryDialect,
-    }
-    dialect_types = {
-        "sqlite": SQLITE_TYPES,
-        "postgresql": POSTGRESQL_TYPES,
-        "mysql": MYSQL_TYPES,
-        "mssql": MSSQL_TYPES,
-        "bigquery": BIGQUERY_TYPES,
-    }
+    dialect_classes = {}
+    dialect_types = {}
+    try:
+        dialect_classes["sqlite"] = sqlitetypes.dialect
+        dialect_types["sqlite"] = SQLITE_TYPES
+    except AttributeError:
+        pass
+    try:
+        dialect_classes["postgresql"] = postgresqltypes.dialect
+        dialect_types["postgresql"] = POSTGRESQL_TYPES
+    except AttributeError:
+        pass
+    try:
+        dialect_classes["mysql"] = mysqltypes.dialect
+        dialect_types["mysql"] = MYSQL_TYPES
+    except AttributeError:
+        pass
+    try:
+        dialect_classes["mssql"] = mssqltypes.dialect
+        dialect_types["mssql"] = MSSQL_TYPES
+    except AttributeError:
+        pass
+    try:
+        dialect_classes["bigquery"] = sqla_bigquery.BigQueryDialect
+        dialect_types["bigquery"] = BIGQUERY_TYPES
+    except AttributeError:
+        pass
+
     db_hostname = os.getenv("GE_TEST_LOCAL_DB_HOSTNAME", "localhost")
     if sa_engine_name == "sqlite":
         engine = create_engine(get_sqlite_connection_url(sqlite_db_path))

diff --git a/requirements-dev-base.txt b/requirements-dev-base.txt
@@ -14,7 +14,7 @@ boto3>=1.9  # all_tests
 feather-format>=0.4.1  # all_tests
 
 flake8==3.8.3  # lint
-flask>=1.0.0 # for s3 test only
+flask>=1.0.0 # for s3 test only (with moto)
 freezegun>=0.3.15  # all_tests
 gcsfs>=0.5.1 # all_tests
 google-cloud-secret-manager>=1.0.0  # all_tests

diff --git a/requirements-dev-lite.txt b/requirements-dev-lite.txt
@@ -0,0 +1,17 @@
+
+--requirement requirements.txt
+
+black==22.1.0  # lint
+boto3>=1.9  # all_tests
+flake8==3.8.3  # lint
+flask>=1.0.0 # for s3 test only (with moto)
+freezegun>=0.3.15  # all_tests
+isort==5.4.2  # lint
+moto>=1.3.7,<2.0.0  # all_tests
+pyfakefs>=4.5.1 # all_tests
+pytest-benchmark>=3.4.1  # performance tests
+pytest>=5.3.5,<6.0.0  # all_tests
+requirements-parser>=0.2.0  # all_tests
+s3fs>=0.5.1 # all_tests
+snapshottest==0.6.0 # GE Cloud atomic renderer tests
+sqlalchemy>=1.3.18,<1.4.10 # sqlalchemy_tests
diff --git a/tests/actions/test_core_actions.py b/tests/actions/test_core_actions.py
@@ -17,6 +17,7 @@
     ExpectationSuiteIdentifier,
     ValidationResultIdentifier,
 )
+from great_expectations.util import is_library_loadable
 from great_expectations.validation_operators import (
     CloudNotificationAction,
     EmailAction,
@@ -228,6 +229,10 @@ def test_SlackNotificationAction(
     ) == {"slack_notification_result": "none required"}
 
 
+@pytest.mark.skipif(
+    not is_library_loadable(library_name="pypd"),
+    reason="pypd is not installed",
+)
 @mock.patch("pypd.EventV2")
 def test_PagerdutyAlertAction(
     data_context_parameterized_expectation_suite,

diff --git a/tests/cli/test_sanitize_yaml_and_save_datasource.py b/tests/cli/test_sanitize_yaml_and_save_datasource.py
@@ -1,6 +1,7 @@
 import pytest
 
 from great_expectations.cli.datasource import sanitize_yaml_and_save_datasource
+from great_expectations.util import is_library_loadable
 
 
 def test_sanitize_yaml_and_save_datasource_raises_error_on_empty_yaml(
@@ -65,6 +66,10 @@ def test_sanitize_yaml_and_save_datasource_works_without_credentials(
     assert obs == {}
 
 
+@pytest.mark.skipif(
+    not is_library_loadable(library_name="psycopg2"),
+    reason="psycopg2 is not installed",
+)
 def test_sanitize_yaml_and_save_datasource_works_with_credentials(
     sa,
     empty_data_context,

diff --git a/tests/cli/v012/test_init_missing_libraries.py b/tests/cli/v012/test_init_missing_libraries.py
@@ -37,19 +37,23 @@ def _library_not_loaded_test(
         assert "Which database backend are you using" in stdout
         assert "Give your new Datasource a short name" in stdout
         assert (
-            """Next, we will configure database credentials and store them in the `my_db` section
-    of this config file: great_expectations/uncommitted/config_variables.yml"""
+            "Next, we will configure database credentials and store them in the `my_db` section"
             in stdout
         )
         assert (
-            f"""Great Expectations relies on the library `{library_import_name}` to connect to your data, \
-    but the package `{library_name}` containing this library is not installed.
-        Would you like Great Expectations to try to execute `pip install {library_name}` for you?"""
+            f"Great Expectations relies on the library `{library_import_name}` to connect to your data"
+            in stdout
+        )
+        assert (
+            f"but the package `{library_name}` containing this library is not installed"
+            in stdout
+        )
+        assert (
+            f"Would you like Great Expectations to try to execute `pip install {library_name}` for you?"
             in stdout
         )
         assert (
-            f"""\nOK, exiting now.
-        - Please execute `pip install {library_name}` before trying again."""
+            f"Please execute `pip install {library_name}` before trying again."
             in stdout
         )
 
@@ -71,31 +75,25 @@ def _library_not_loaded_test(
         assert (
             obs_tree
             == """\
-    great_expectations/
-        .gitignore
-        great_expectations.yml
-        checkpoints/
-        expectations/
+great_expectations/
+    .gitignore
+    great_expectations.yml
+    checkpoints/
+    expectations/
+        .ge_store_backend_id
+    plugins/
+        custom_data_docs/
+            renderers/
+            styles/
+                data_docs_custom_styles.css
+            views/
+    profilers/
+    uncommitted/
+        config_variables.yml
+        data_docs/
+        validations/
             .ge_store_backend_id
-        notebooks/
-            pandas/
-                validation_playground.ipynb
-            spark/
-                validation_playground.ipynb
-            sql/
-                validation_playground.ipynb
-        plugins/
-            custom_data_docs/
-                renderers/
-                styles/
-                    data_docs_custom_styles.css
-                views/
-        uncommitted/
-            config_variables.yml
-            data_docs/
-            validations/
-                .ge_store_backend_id
-    """
+"""
         )
 
         assert_no_logging_messages_or_tracebacks(my_caplog, result)
@@ -227,16 +225,18 @@ def test_cli_init_spark_without_library_installed_instructs_user(
         assert "What data would you like Great Expectations to connect to" in stdout
         assert "What are you processing your files with" in stdout
         assert (
-            f"""Great Expectations relies on the library `pyspark` to connect to your data, \
-    but the package `pyspark` containing this library is not installed.
-        Would you like Great Expectations to try to execute `pip install pyspark` for you?"""
+            f"Great Expectations relies on the library `pyspark` to connect to your data"
             in stdout
         )
         assert (
-            f"""\nOK, exiting now.
-        - Please execute `pip install pyspark` before trying again."""
+            f"but the package `pyspark` containing this library is not installed."
             in stdout
         )
+        assert (
+            f"Would you like Great Expectations to try to execute `pip install pyspark` for you?"
+            in stdout
+        )
+        assert f"Please execute `pip install pyspark` before trying again." in stdout
         # assert "Great Expectations relies on the library `pyspark`" in stdout
         # assert "Please `pip install pyspark` before trying again" in stdout
 
@@ -258,31 +258,25 @@ def test_cli_init_spark_without_library_installed_instructs_user(
         assert (
             obs_tree
             == """\
-    great_expectations/
-        .gitignore
-        great_expectations.yml
-        checkpoints/
-        expectations/
+great_expectations/
+    .gitignore
+    great_expectations.yml
+    checkpoints/
+    expectations/
+        .ge_store_backend_id
+    plugins/
+        custom_data_docs/
+            renderers/
+            styles/
+                data_docs_custom_styles.css
+            views/
+    profilers/
+    uncommitted/
+        config_variables.yml
+        data_docs/
+        validations/
             .ge_store_backend_id
-        notebooks/
-            pandas/
-                validation_playground.ipynb
-            spark/
-                validation_playground.ipynb
-            sql/
-                validation_playground.ipynb
-        plugins/
-            custom_data_docs/
-                renderers/
-                styles/
-                    data_docs_custom_styles.css
-                views/
-        uncommitted/
-            config_variables.yml
-            data_docs/
-            validations/
-                .ge_store_backend_id
-    """
+"""
         )
 
         assert_no_logging_messages_or_tracebacks(caplog, result)
diff --git a/tests/data_context/store/test_store_backends.py b/tests/data_context/store/test_store_backends.py
@@ -34,7 +34,7 @@
 from great_expectations.data_context.util import file_relative_path
 from great_expectations.exceptions import InvalidKeyError, StoreBackendError, StoreError
 from great_expectations.self_check.util import expectationSuiteSchema
-from great_expectations.util import gen_directory_tree_str
+from great_expectations.util import gen_directory_tree_str, is_library_loadable
 
 
 @pytest.fixture()
@@ -1006,6 +1006,10 @@ def test_TupleS3StoreBackend_with_s3_put_options():
     assert my_store.list_keys() == [(".ge_store_backend_id",), ("AAA",)]
 
 
+@pytest.mark.skipif(
+    not is_library_loadable(library_name="google"),
+    reason="google is not installed",
+)
 def test_TupleGCSStoreBackend_base_public_path():
     """
     What does this test and why?
@@ -1053,6 +1057,10 @@ def test_TupleGCSStoreBackend_base_public_path():
     )
 
 
+@pytest.mark.skipif(
+    not is_library_loadable(library_name="google"),
+    reason="google is not installed",
+)
 def test_TupleGCSStoreBackend():
     # pytest.importorskip("google-cloud-storage")
     """