diff --git a/sdks/python/setup.py b/sdks/python/setup.py index 558b886912fe..9a45d287d49d 100644 --- a/sdks/python/setup.py +++ b/sdks/python/setup.py @@ -541,7 +541,40 @@ def get_portability_package_data(): 'virtualenv-clone>=0.5,<1.0', # https://github.com/PiotrDabkowski/Js2Py/issues/317 'js2py>=0.74,<1; python_version<"3.12"', - ] + dataframe_dependency + ] + dataframe_dependency, + # Keep the following dependencies in line with what we test against + # in https://github.com/apache/beam/blob/master/sdks/python/tox.ini + # For more info, see + # https://docs.google.com/document/d/1c84Gc-cZRCfrU8f7kWGsNR2o8oSRjCM-dGHO9KvPWPw/edit?usp=sharing + 'torch': [ + 'torch<=1.13.0,<=2.0.0' + ], + 'tensorflow': [ + 'tensorflow>=2.12rc1,<2.13' + ], + 'transformers': [ + 'transformers>=4.28.0,<4.49.0', + 'tensorflow==2.12.0', + 'torch>=1.9.0,<2.1.0' + ], + 'tft': [ + 'tensorflow_transform>=1.14.0,<1.15.0' + ], + 'onnx': [ + 'onnxruntime==1.13.1', + 'torch==1.13.1', + 'tensorflow==2.11.0', + 'tf2onnx==1.13.0', + 'skl2onnx==1.13', + 'transformers==4.25.1' + ], + 'xgboost': [ + 'xgboost>=1.6.0,<2.1.3', + 'datatable==1.0.0' + ], + 'tensorflow-hub': [ + 'tensorflow-hub>=0.14.0,<0.16.0' + ] }, zip_safe=False, # PyPI package information. diff --git a/sdks/python/test-suites/tox/py39/build.gradle b/sdks/python/test-suites/tox/py39/build.gradle index acc273519b87..52283fdd7123 100644 --- a/sdks/python/test-suites/tox/py39/build.gradle +++ b/sdks/python/test-suites/tox/py39/build.gradle @@ -166,6 +166,10 @@ toxTask "testPy39transformers-448", "py39-transformers-448", "${posargs}" test.dependsOn "testPy39transformers-448" postCommitPyDep.dependsOn "testPy39transformers-448" +toxTask "testPy39transformers-latest", "py39-transformers-latest", "${posargs}" +test.dependsOn "testPy39transformers-latest" +postCommitPyDep.dependsOn "testPy39transformers-latest" + toxTask "testPy39embeddingsMLTransform", "py39-embeddings", "${posargs}" test.dependsOn "testPy39embeddingsMLTransform" postCommitPyDep.dependsOn "testPy39embeddingsMLTransform" diff --git a/sdks/python/tox.ini b/sdks/python/tox.ini index 016b2c4bfd46..acf6b358b462 100644 --- a/sdks/python/tox.ini +++ b/sdks/python/tox.ini @@ -454,7 +454,7 @@ commands = # Allow exit code 5 (no tests run) so that we can run this command safely on arbitrary subdirectories. /bin/sh -c 'pytest -o junit_suite_name={envname} --junitxml=pytest_{envname}.xml -n 6 -m uses_xgboost {posargs}; ret=$?; [ $ret = 5 ] && exit 0 || exit $ret' -[testenv:py{39,310}-transformers-{428,447,448}] +[testenv:py{39,310}-transformers-{428,447,448,latest}] deps = # sentence-transformers 2.2.2 is the latest version that supports transformers 4.28.x 428: sentence-transformers==2.2.2 @@ -463,7 +463,9 @@ deps = 447: transformers>=4.47.0,<4.48.0 447: torch>=1.9.0,<1.14.0 448: transformers>=4.48.0,<4.49.0 - 448: torch>=2.0.0 + 448: torch>=2.0.0,torch<2.1.0 + latest: transformers>=4.48.0 + latest: torch>=2.0.0 tensorflow==2.12.0 protobuf==4.25.5 extras = test,gcp,ml_test diff --git a/website/www/site/content/en/documentation/ml/ml-dependency-extras.md b/website/www/site/content/en/documentation/ml/ml-dependency-extras.md new file mode 100644 index 000000000000..e29b45212153 --- /dev/null +++ b/website/www/site/content/en/documentation/ml/ml-dependency-extras.md @@ -0,0 +1,41 @@ +--- +title: "ML Dependency Extras" +--- + + +# ML Dependency Extras + +In order to make it easy to make sure you are using dependencies which have +been well tested with Beam ML, Beam provides a set of ML extras which can +be installed alongside of Beam. For example, if you want to use a version +of PyTorch which has been tested with Beam, you can install it with: + +``` +pip install beam[torch] +``` + +A full set of extras can be found in +[setup.py](https://github.com/apache/beam/blob/6e3cf2b113026e27db7833a1f0fd08977b7c71e1/sdks/python/setup.py#L397). + +**Note:** You can also pin to dependencies outside of the extra range with +a normal install - for example: + +``` +pip install beam==2.XX.0 +pip install torch== +``` + +this will usually work, but can break if the dependency releases a breaking +change between the version Beam tests with and the version you pin to. \ No newline at end of file diff --git a/website/www/site/content/en/documentation/ml/overview.md b/website/www/site/content/en/documentation/ml/overview.md index d04c234eb537..7e3305f96334 100644 --- a/website/www/site/content/en/documentation/ml/overview.md +++ b/website/www/site/content/en/documentation/ml/overview.md @@ -131,3 +131,4 @@ When you use Apache Beam as one of the building blocks in your project, these or * [ML model evaluation](/documentation/ml/model-evaluation/) * [RunInference public codelab](https://colab.sandbox.google.com/github/apache/beam/blob/master/examples/notebooks/beam-ml/run_inference_basic.ipynb) * [RunInference notebooks](https://github.com/apache/beam/tree/master/examples/notebooks/beam-ml) +* [Beam ML dependency management](/documentation/ml/ml-dependency-extras) diff --git a/website/www/site/layouts/partials/section-menu/en/documentation.html b/website/www/site/layouts/partials/section-menu/en/documentation.html index 7ae5da62192d..f85d47b2494e 100755 --- a/website/www/site/layouts/partials/section-menu/en/documentation.html +++ b/website/www/site/layouts/partials/section-menu/en/documentation.html @@ -253,6 +253,7 @@
  • ML model evaluation
  • +
  • ML Dependency Extras
  • Use cases