diff --git a/.circleci/config.yml b/.circleci/config.yml index 6c23d17e0..98cc294db 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -15,25 +15,25 @@ commands: type: steps default: [] steps: - - run: + - run: name: "Install open JDK" command: sudo add-apt-repository -y ppa:openjdk-r/ppa - - run: + - run: name: "Install qq" command: sudo apt-get -qq update - - run: + - run: name: "No install recommends for JDK" command: sudo apt-get install -y openjdk-8-jdk --no-install-recommends - - run: + - run: name: "Run Java Alternatives install for JDK" command: sudo update-java-alternatives -s java-1.8.0-openjdk-amd64 - run: name: "Run pip install setup tools and wheel" command: pip install -U pip setuptools wheel - - run: + - run: name: "Install Tox" - command: pip install -U tox==3.12.0 - - run: + command: pip install -U tox==4.11.4 + - run: name: "Install Code Cov" command: pip install -U codecov - steps: << parameters.after-deps >> @@ -49,27 +49,11 @@ run_complex: &run_complex only: /.*/ jobs: - Python38-Unit-Tests: - docker: - - image: cimg/python:3.8 - environment: - TOXENV: coverage,doctest,type,check - TOX_INSTALL_DIR: .env - JAVA_HOME: /usr/lib/jvm/java-8-openjdk-amd64 - PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION: python - - steps: - - checkout - - setup_dependencies - - run: - name: "Run Tox" - command: tox - - Python37-Unit-Tests: + Python311-Unit-Tests: docker: - - image: cimg/python:3.7 + - image: cimg/python:3.11 environment: - TOXENV: coverage,doctest,type,check + TOXENV: coverage,doctest,type,check TOX_INSTALL_DIR: .env JAVA_HOME: /usr/lib/jvm/java-8-openjdk-amd64 PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION: python @@ -81,23 +65,9 @@ jobs: name: "Run Tox" command: tox - Python36-Unit-Tests: - docker: - - image: cimg/python:3.6 - environment: - TOXENV: coverage,doctest,type,check - TOX_INSTALL_DIR: .env - JAVA_HOME: /usr/lib/jvm/java-8-openjdk-amd64 - - steps: - - checkout - - setup_dependencies - - run: - name: "Run Tox" - command: tox - Python38-Integration-Tests: + Python311-Integration-Tests: docker: - - image: cimg/python:3.8 + - image: cimg/python:3.11 environment: TOXENV: complex,type,check TOX_INSTALL_DIR: .env @@ -108,12 +78,12 @@ jobs: - run: name: Setup python3 command: | - pyenv global 3.8.13 > /dev/null && activated=0 || activated=1 + pyenv global 3.11.3 > /dev/null && activated=0 || activated=1 if [[ $activated -ne 0 ]]; then for i in {1..6}; do - pyenv install 3.8.13 && break || sleep $((2 ** $i)) + pyenv install 3.11.3 && break || sleep $((2 ** $i)) done - pyenv global 3.8.13 + pyenv global 3.11.3 fi - setup_dependencies - run: @@ -123,52 +93,13 @@ jobs: export PYTHONUNBUFFERED=1 tox - Python37-Integration-Tests: - docker: - - image: cimg/python:3.7 - environment: - TOXENV: complex,type,check - TOX_INSTALL_DIR: .env - JAVA_HOME: /usr/lib/jvm/java-8-openjdk-amd64 - - steps: - - checkout - - setup_dependencies - - run: - name: "Run Tox" - no_output_timeout: 60m - command: | - export PYTHONUNBUFFERED=1 - tox - - Python36-Integration-Tests: - docker: - - image: cimg/python:3.6 - environment: - TOXENV: coverage,complex,spark,doctest,type,check - TOX_INSTALL_DIR: .env - JAVA_HOME: /usr/lib/jvm/java-8-openjdk-amd64 - - steps: - - checkout - - setup_dependencies - - run: - name: "Run Tox" - command: tox - workflows: version: 2 - Integration-Tests: + Integration-Tests: jobs: - - Python38-Integration-Tests: - <<: *run_complex - - Python37-Integration-Tests: - <<: *run_complex - - Python36-Integration-Tests: + - Python311-Integration-Tests: <<: *run_complex Unit-Tests: jobs: - - Python37-Unit-Tests - - Python36-Unit-Tests - - Python38-Unit-Tests + - Python311-Unit-Tests diff --git a/.readthedocs.yml b/.readthedocs.yml index 80cf0c6bf..b3fbec9ce 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -9,12 +9,9 @@ version: 2 sphinx: configuration: docs/conf.py -# Optionally build your docs in additional formats such as PDF and ePub -formats: all - # Optionally set the version of Python and requirements required to build your docs python: - version: 3.6 + version: 3.11 install: - requirements: docs/requirements-doc.txt - method: pip diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index e9ef01491..ebf0ca62a 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -21,7 +21,7 @@ cd snorkel tox --devenv .env ``` -Running `tox --devenv .env` will install create a virtual environment with Snorkel +Running `tox --devenv .env` will create a virtual environment with Snorkel and all of its dependencies installed in the directory `.env`. This can be used in a number of ways, e.g. with `source .env/bin/activate` or for [linting in VSCode](https://code.visualstudio.com/docs/python/environments#_where-the-extension-looks-for-environments). @@ -38,8 +38,7 @@ python3 -c "import snorkel.labeling; print(dir(snorkel.labeling))" There are a number of useful tox commands defined: ```bash -tox -e py36 # Run unit tests pytest in Python 3.6 -tox -e py37 # Run unit tests pytest in Python 3.7 +tox -e py311 # Run unit tests pytest in Python 3.11 tox -e coverage # Compute unit test coverage tox -e spark # Run Spark-based tests (marked with @pytest.mark.spark) tox -e complex # Run more complex, integration tests (marked with @pytest.mark.complex) diff --git a/README.md b/README.md index f7a03683b..7cd6a0969 100644 --- a/README.md +++ b/README.md @@ -41,7 +41,7 @@ These tutorials demonstrate a variety of tasks, domains, labeling techniques, an # Installation -Snorkel requires Python 3.6 or later. To install Snorkel, we recommend using `pip`: +Snorkel requires Python 3.11 or later. To install Snorkel, we recommend using `pip`: ```bash pip install snorkel @@ -60,12 +60,12 @@ For information on installing from source and contributing to Snorkel, see our

The following example commands give some more color on installing with `conda`. -These commands assume that your `conda` installation is Python 3.6, +These commands assume that your `conda` installation is Python 3.11, and that you want to use a virtual environment called `snorkel-env`. ```bash # [OPTIONAL] Activate a virtual environment called "snorkel" -conda create --yes -n snorkel-env python=3.6 +conda create --yes -n snorkel-env python=3.11 conda activate snorkel-env # We specify PyTorch here to ensure compatibility, but it may not be necessary. diff --git a/pyproject.toml b/pyproject.toml index 8bc1963fe..b9551368a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,7 +7,7 @@ build-backend = "setuptools.build_meta" [tool.black] line-length = 88 -target-version = ['py35', 'py36', 'py37', 'py38'] +target-version = ['py311'] exclude = ''' /( \.eggs diff --git a/requirements.txt b/requirements.txt index 6e982875d..3a5179d50 100644 --- a/requirements.txt +++ b/requirements.txt @@ -9,8 +9,7 @@ #### ESSENTIAL LIBRARIES # General scientific computing - -numpy>=1.25.1 +numpy>=1.24.0 scipy>=1.2.0 # Data storage and function application @@ -26,6 +25,7 @@ munkres>=1.0.6 networkx>=2.2 # Model introspection tools +protobuf>=3.19.6 tensorboard>=2.13.0 #### EXTRA/TEST LIBRARIES @@ -41,16 +41,15 @@ distributed>=2023.7.0 # Dill (serialization) dill>=0.3.0 - #### DEV TOOLS -black>=22.3 +black>=22.8 flake8>=3.7.0 +importlib_metadata<5 # necessary for flake8 isort>=4.3.0 -mypy==0.760 +mypy>=0.760 pydocstyle>=4.0.0 -pytest>=5.0.0,<6.0.0 +pytest>=6.0.0 pytest-cov>=2.7.0 pytest-doctestplus>=0.3.0 tox>=3.13.0 -protobuf>=3.19.5 diff --git a/setup.cfg b/setup.cfg index 3ea0bbfd5..60e034d95 100644 --- a/setup.cfg +++ b/setup.cfg @@ -12,7 +12,8 @@ doctest_optionflags = [flake8] extend-ignore = E203, - E265, # Throws errors for '#%%' delimiter in VSCode jupyter notebook syntax + # Throws errors for '#%%' delimiter in VSCode jupyter notebook syntax + E265, E501, E731, E741, @@ -43,7 +44,7 @@ known_third_party= setuptools, tqdm, default_section=THIRDPARTY -skip=.env,.venv +skip=.env,.venv,.tox [pydocstyle] convention = numpy diff --git a/setup.py b/setup.py index d59f4a5e8..8a73ab2fe 100644 --- a/setup.py +++ b/setup.py @@ -36,15 +36,16 @@ include_package_data=True, install_requires=[ "munkres>=1.0.6", - "numpy>=1.16.5", + "numpy>=1.24.0", "scipy>=1.2.0", "pandas>=1.0.0", "tqdm>=4.33.0", "scikit-learn>=0.20.2", "torch>=1.2.0", - "tensorboard>=2.9.1", + "tensorboard>=2.13.0", + "protobuf>=3.19.6", "networkx>=2.2", ], - python_requires=">=3.6", + python_requires=">=3.11", keywords="machine-learning ai weak-supervision", ) diff --git a/snorkel/analysis/scorer.py b/snorkel/analysis/scorer.py index 47b2a4da2..6822fcf2d 100644 --- a/snorkel/analysis/scorer.py +++ b/snorkel/analysis/scorer.py @@ -42,7 +42,6 @@ def __init__( custom_metric_funcs: Optional[Mapping[str, Callable[..., float]]] = None, abstain_label: Optional[int] = -1, ) -> None: - self.metrics: Dict[str, Callable[..., float]] self.metrics = {} if metrics: diff --git a/snorkel/classification/multitask_classifier.py b/snorkel/classification/multitask_classifier.py index 88795021a..32c612193 100644 --- a/snorkel/classification/multitask_classifier.py +++ b/snorkel/classification/multitask_classifier.py @@ -82,7 +82,7 @@ def __init__( self, tasks: List[Task], name: Optional[str] = None, **kwargs: Any ) -> None: super().__init__() - self.config = ClassifierConfig(**kwargs) + self.config = ClassifierConfig(**kwargs) # type: ignore self.name = name or type(self).__name__ # Initiate the model attributes diff --git a/snorkel/classification/training/loggers/checkpointer.py b/snorkel/classification/training/loggers/checkpointer.py index d26f1414f..ce6a8d121 100644 --- a/snorkel/classification/training/loggers/checkpointer.py +++ b/snorkel/classification/training/loggers/checkpointer.py @@ -61,7 +61,7 @@ class Checkpointer: def __init__( self, counter_unit: str, evaluation_freq: float, **kwargs: Any ) -> None: - self.config = CheckpointerConfig(**kwargs) + self.config = CheckpointerConfig(**kwargs) # type: ignore self._validate_config() # Pull out checkpoint settings @@ -151,7 +151,6 @@ def checkpoint( ) def _is_new_best(self, metric_dict: Metrics) -> Set[str]: - best_metric = set() for metric in metric_dict: diff --git a/snorkel/classification/training/loggers/log_manager.py b/snorkel/classification/training/loggers/log_manager.py index a3757ed7e..6f73ba4a8 100644 --- a/snorkel/classification/training/loggers/log_manager.py +++ b/snorkel/classification/training/loggers/log_manager.py @@ -46,7 +46,7 @@ def __init__( checkpointer: Optional[Checkpointer] = None, **kwargs: Any, ) -> None: - self.config = LogManagerConfig(**kwargs) + self.config = LogManagerConfig(**kwargs) # type: ignore self.n_batches_per_epoch = n_batches_per_epoch self.log_writer = log_writer diff --git a/snorkel/classification/training/loggers/log_writer.py b/snorkel/classification/training/loggers/log_writer.py index 4ac9413a0..239ecaf19 100644 --- a/snorkel/classification/training/loggers/log_writer.py +++ b/snorkel/classification/training/loggers/log_writer.py @@ -44,7 +44,7 @@ class LogWriter: """ def __init__(self, **kwargs: Any) -> None: - self.config = LogWriterConfig(**kwargs) + self.config = LogWriterConfig(**kwargs) # type: ignore self.run_name = self.config.run_name if self.run_name is None: diff --git a/snorkel/classification/training/trainer.py b/snorkel/classification/training/trainer.py index 743c69644..762befe87 100644 --- a/snorkel/classification/training/trainer.py +++ b/snorkel/classification/training/trainer.py @@ -341,7 +341,7 @@ def _set_lr_scheduler(self) -> None: # Set lr scheduler lr_scheduler_name = self.config.lr_scheduler lr_scheduler_config = self.config.lr_scheduler_config - lr_scheduler: Optional[optim.lr_scheduler._LRScheduler] + lr_scheduler: Any if lr_scheduler_name == "constant": lr_scheduler = None @@ -448,7 +448,6 @@ def _logging( # Evaluate the model and log the metric if self.log_manager.trigger_evaluation(): - # Log metrics metric_dict.update( self._evaluate(model, dataloaders, self.config.valid_split) @@ -570,7 +569,9 @@ def load(self, trainer_path: str, model: Optional[MultitaskClassifier]) -> None: ) raise - self.config = TrainerConfig(**saved_state["trainer_config"]) + self.config = TrainerConfig( + *[saved_state["trainer_config"][field] for field in TrainerConfig._fields] + ) logging.info(f"[{self.name}] Trainer config loaded from {trainer_path}") if model is not None: diff --git a/snorkel/labeling/model/label_model.py b/snorkel/labeling/model/label_model.py index df636e21c..b1dc9ab34 100644 --- a/snorkel/labeling/model/label_model.py +++ b/snorkel/labeling/model/label_model.py @@ -135,7 +135,7 @@ class LabelModel(nn.Module, BaseLabeler): def __init__(self, cardinality: int = 2, **kwargs: Any) -> None: super().__init__() - self.config: LabelModelConfig = LabelModelConfig(**kwargs) + self.config: LabelModelConfig = LabelModelConfig(**kwargs) # type: ignore self.cardinality = cardinality # Confirm that cuda is available if config is using CUDA @@ -575,7 +575,7 @@ def _set_class_balance( f"class_balance has {len(self.p)} entries. Does not match LabelModel cardinality {self.cardinality}." ) elif Y_dev is not None: - class_counts = Counter(Y_dev) # type: ignore + class_counts: Counter = Counter(Y_dev) sorted_counts = np.array([v for k, v in sorted(class_counts.items())]) # type: ignore self.p = sorted_counts / sum(sorted_counts) if len(self.p) != self.cardinality: @@ -667,7 +667,7 @@ def _set_lr_scheduler(self) -> None: # Set lr scheduler lr_scheduler_name = self.train_config.lr_scheduler lr_scheduler_config = self.train_config.lr_scheduler_config - lr_scheduler: Optional[optim.lr_scheduler._LRScheduler] + lr_scheduler: Any if lr_scheduler_name == "constant": lr_scheduler = None diff --git a/snorkel/slicing/sliceaware_classifier.py b/snorkel/slicing/sliceaware_classifier.py index a1037e2c7..75ef67c3e 100644 --- a/snorkel/slicing/sliceaware_classifier.py +++ b/snorkel/slicing/sliceaware_classifier.py @@ -53,7 +53,6 @@ def __init__( scorer: Scorer = Scorer(metrics=["accuracy", "f1"]), **multitask_kwargs: Any, ) -> None: - # Initialize module_pool with 1) base_architecture and 2) prediction_head # Assuming `head_dim` can be used to map base_architecture to prediction_head module_pool = nn.ModuleDict( diff --git a/snorkel/slicing/utils.py b/snorkel/slicing/utils.py index 5373e014e..19127c84c 100644 --- a/snorkel/slicing/utils.py +++ b/snorkel/slicing/utils.py @@ -118,7 +118,6 @@ def convert_to_slice_tasks(base_task: Task, slice_names: List[str]) -> List[Task # Create slice indicator tasks for slice_name in slice_names: - ind_task_name = f"{base_task.name}_slice:{slice_name}_ind" ind_head_module_name = f"{ind_task_name}_head" # Indicator head always predicts "in the slice or not", so is always binary @@ -151,7 +150,6 @@ def convert_to_slice_tasks(base_task: Task, slice_names: List[str]) -> List[Task # Create slice predictor tasks shared_pred_head_module = nn.Linear(neck_size, base_task_cardinality) for slice_name in slice_names: - pred_task_name = f"{base_task.name}_slice:{slice_name}_pred" pred_head_module_name = f"{pred_task_name}_head" diff --git a/test/classification/training/schedulers/test_schedulers.py b/test/classification/training/schedulers/test_schedulers.py index 35e71a6bb..943d26d01 100644 --- a/test/classification/training/schedulers/test_schedulers.py +++ b/test/classification/training/schedulers/test_schedulers.py @@ -32,7 +32,7 @@ class SequentialTest(unittest.TestCase): def test_sequential(self): scheduler = SequentialScheduler() data = [] - for (batch, dl) in scheduler.get_batches(dataloaders): + for batch, dl in scheduler.get_batches(dataloaders): X_dict, Y_dict = batch data.extend(X_dict["data"]) self.assertEqual(data, sorted(data)) @@ -43,7 +43,7 @@ def test_shuffled(self): torch.manual_seed(123) scheduler = ShuffledScheduler() data = [] - for (batch, dl) in scheduler.get_batches(dataloaders): + for batch, dl in scheduler.get_batches(dataloaders): X_dict, Y_dict = batch data.extend(X_dict["data"]) self.assertNotEqual(data, sorted(data)) diff --git a/test/classification/training/test_trainer.py b/test/classification/training/test_trainer.py index 38622ae9e..ab8363074 100644 --- a/test/classification/training/test_trainer.py +++ b/test/classification/training/test_trainer.py @@ -1,9 +1,9 @@ -import collections import copy import json import os import tempfile import unittest +from collections.abc import Mapping import torch import torch.nn as nn @@ -247,7 +247,7 @@ def dict_check(self, dict1, dict2): for k in dict1.keys(): dict1_ = dict1[k] dict2_ = dict2[k] - if isinstance(dict1_, collections.Mapping): + if isinstance(dict1_, Mapping): self.dict_check(dict1_, dict2_) elif isinstance(dict1_, torch.Tensor): self.assertTrue( diff --git a/test/labeling/apply/lf_applier_spark_test_script.py b/test/labeling/apply/lf_applier_spark_test_script.py index eb8264f4b..8c5b97a19 100644 --- a/test/labeling/apply/lf_applier_spark_test_script.py +++ b/test/labeling/apply/lf_applier_spark_test_script.py @@ -56,7 +56,6 @@ def g(x: DataPoint, db: List[int]) -> int: def build_lf_matrix() -> None: - logging.info("Getting Spark context") sc = SparkContext() sc.addPyFile("snorkel-package.zip") diff --git a/test/labeling/apply/test_lf_applier.py b/test/labeling/apply/test_lf_applier.py index 760eff636..534c5c90a 100644 --- a/test/labeling/apply/test_lf_applier.py +++ b/test/labeling/apply/test_lf_applier.py @@ -70,7 +70,7 @@ def f_bad(x: DataPoint) -> int: L_EXPECTED_BAD = np.array([[-1, -1], [0, -1], [-1, -1], [-1, -1], [-1, -1]]) L_PREPROCESS_EXPECTED = np.array([[-1, -1], [0, 0], [-1, 0], [-1, 0], [-1, -1]]) -TEXT_DATA = ["Jane.", "Jane plays soccer.", "Jane plays soccer."] +TEXT_DATA = ["Jane Doe.", "Jane plays soccer.", "Jane plays soccer."] L_TEXT_EXPECTED = np.array([[0, -1], [0, 0], [0, 0]]) diff --git a/test/labeling/lf/test_nlp.py b/test/labeling/lf/test_nlp.py index fffba92a8..1f2e956a7 100644 --- a/test/labeling/lf/test_nlp.py +++ b/test/labeling/lf/test_nlp.py @@ -84,7 +84,6 @@ def lf2(x: DataPoint) -> int: self.assertEqual(len(lf2._nlp_config.nlp._cache), 2) def test_nlp_labeling_function_raises(self) -> None: - with self.assertRaisesRegex(ValueError, "different parameters"): @nlp_labeling_function() diff --git a/test/labeling/model/test_label_model.py b/test/labeling/model/test_label_model.py index a257e44b1..ecd6247b5 100644 --- a/test/labeling/model/test_label_model.py +++ b/test/labeling/model/test_label_model.py @@ -236,7 +236,6 @@ def test_get_weight(self): self.assertAlmostEqual(accs[i], true_acc, delta=0.1) def test_build_mask(self): - L = np.array([[0, 1, 0], [0, 1, 0]]) label_model = self._set_up_model(L) diff --git a/tox.ini b/tox.ini index 9645f0473..9d8a83dda 100644 --- a/tox.ini +++ b/tox.ini @@ -1,8 +1,7 @@ [tox] skip_missing_interpreters = true envlist = - py36, - py37, + py11, type, check, doctest, @@ -42,6 +41,9 @@ commands = python -m pytest --doctest-plus snorkel [testenv:check] description = check the code and doc style basepython = python3 +allowlist_externals = + {toxinidir}/scripts/check_requirements.py + {toxinidir}/scripts/sync_api_docs.py commands_pre = commands = isort -rc -c .