Skip to content

Commit

Permalink
Remove mlflow/utils/insecure_hash.py (mlflow#13626)
Browse files Browse the repository at this point in the history
Signed-off-by: harupy <[email protected]>
  • Loading branch information
harupy authored Nov 1, 2024
1 parent b263072 commit 44f5c17
Show file tree
Hide file tree
Showing 11 changed files with 29 additions and 38 deletions.
4 changes: 2 additions & 2 deletions mlflow/data/digest_utils.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import hashlib
from typing import Any

from packaging.version import Version

from mlflow.exceptions import MlflowException
from mlflow.protos.databricks_pb2 import INVALID_PARAMETER_VALUE
from mlflow.utils import insecure_hash

MAX_ROWS = 10000

Expand Down Expand Up @@ -101,7 +101,7 @@ def get_normalized_md5_digest(elements: list[Any]) -> str:
INVALID_PARAMETER_VALUE,
)

md5 = insecure_hash.md5()
md5 = hashlib.md5(usedforsecurity=False)
for element in elements:
md5.update(element)

Expand Down
4 changes: 2 additions & 2 deletions mlflow/data/evaluation_dataset.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import hashlib
import json
import logging
import math
Expand All @@ -10,7 +11,6 @@
from mlflow.entities import RunTag
from mlflow.exceptions import MlflowException
from mlflow.protos.databricks_pb2 import INVALID_PARAMETER_VALUE
from mlflow.utils import insecure_hash
from mlflow.utils.string_utils import generate_feature_name_if_not_string

try:
Expand Down Expand Up @@ -406,7 +406,7 @@ def __init__(
)

# generate dataset hash
md5_gen = insecure_hash.md5()
md5_gen = hashlib.md5(usedforsecurity=False)
_gen_md5_for_arraylike_obj(md5_gen, self._features_data)
if self._labels_data is not None:
_gen_md5_for_arraylike_obj(md5_gen, self._labels_data)
Expand Down
4 changes: 2 additions & 2 deletions mlflow/pyfunc/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -398,6 +398,7 @@ def predict(self, context, model_input, params=None):

import collections
import functools
import hashlib
import importlib
import inspect
import json
Expand Down Expand Up @@ -508,7 +509,6 @@ def predict(self, context, model_input, params=None):
databricks_utils,
find_free_port,
get_major_minor_py_version,
insecure_hash,
)
from mlflow.utils import env_manager as _EnvManager
from mlflow.utils._spark_utils import modified_environ
Expand Down Expand Up @@ -2499,7 +2499,7 @@ def batch_predict_fn(pdf, params=None):
model_path = os.path.join(
tempfile.gettempdir(),
"mlflow",
insecure_hash.sha1(model_uri.encode()).hexdigest(),
hashlib.sha1(model_uri.encode(), usedforsecurity=False).hexdigest(),
# Use pid to avoid conflict when multiple spark UDF tasks
str(os.getpid()),
)
Expand Down
7 changes: 4 additions & 3 deletions mlflow/store/tracking/file_store.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import hashlib
import json
import logging
import os
Expand Down Expand Up @@ -49,7 +50,7 @@
)
from mlflow.store.tracking.abstract_store import AbstractStore
from mlflow.tracing.utils import generate_request_id
from mlflow.utils import get_results_from_paginated_fn, insecure_hash
from mlflow.utils import get_results_from_paginated_fn
from mlflow.utils.file_utils import (
append_to,
exists,
Expand Down Expand Up @@ -1185,13 +1186,13 @@ def log_inputs(self, run_id: str, datasets: Optional[list[DatasetInput]] = None)

@staticmethod
def _get_dataset_id(dataset_name: str, dataset_digest: str) -> str:
md5 = insecure_hash.md5(dataset_name.encode("utf-8"))
md5 = hashlib.md5(dataset_name.encode("utf-8"), usedforsecurity=False)
md5.update(dataset_digest.encode("utf-8"))
return md5.hexdigest()

@staticmethod
def _get_input_id(dataset_id: str, run_id: str) -> str:
md5 = insecure_hash.md5(dataset_id.encode("utf-8"))
md5 = hashlib.md5(dataset_id.encode("utf-8"), usedforsecurity=False)
md5.update(run_id.encode("utf-8"))
return md5.hexdigest()

Expand Down
9 changes: 6 additions & 3 deletions mlflow/utils/conda.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import hashlib
import json
import logging
import os
Expand All @@ -6,7 +7,7 @@

from mlflow.environment_variables import MLFLOW_CONDA_CREATE_ENV_CMD, MLFLOW_CONDA_HOME
from mlflow.exceptions import ExecutionException
from mlflow.utils import insecure_hash, process
from mlflow.utils import process
from mlflow.utils.environment import Environment
from mlflow.utils.os import is_windows

Expand Down Expand Up @@ -62,13 +63,15 @@ def _get_conda_env_name(conda_env_path, env_id=None, env_root_dir=None):
conda_env_contents += env_id

env_name = "mlflow-{}".format(
insecure_hash.sha1(conda_env_contents.encode("utf-8")).hexdigest()
hashlib.sha1(conda_env_contents.encode("utf-8"), usedforsecurity=False).hexdigest()
)
if env_root_dir:
env_root_dir = os.path.normpath(env_root_dir)
# Generate env name with format "mlflow-{conda_env_contents_hash}-{env_root_dir_hash}"
# hashing `conda_env_contents` and `env_root_dir` separately helps debugging
env_name += "-{}".format(insecure_hash.sha1(env_root_dir.encode("utf-8")).hexdigest())
env_name += "-{}".format(
hashlib.sha1(env_root_dir.encode("utf-8"), usedforsecurity=False).hexdigest()
)

return env_name

Expand Down
5 changes: 3 additions & 2 deletions mlflow/utils/environment.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import hashlib
import importlib.metadata
import logging
import os
Expand All @@ -20,7 +21,7 @@
from mlflow.exceptions import MlflowException
from mlflow.protos.databricks_pb2 import INVALID_PARAMETER_VALUE
from mlflow.tracking import get_tracking_uri
from mlflow.utils import PYTHON_VERSION, insecure_hash
from mlflow.utils import PYTHON_VERSION
from mlflow.utils.databricks_utils import (
_get_databricks_serverless_env_vars,
get_databricks_env_vars,
Expand Down Expand Up @@ -746,7 +747,7 @@ def _get_mlflow_env_name(s):
(e.g. "mlflow-da39a3ee5e6b4b0d3255bfef95601890afd80709")
"""
return "mlflow-" + insecure_hash.sha1(s.encode("utf-8")).hexdigest()
return "mlflow-" + hashlib.sha1(s.encode("utf-8"), usedforsecurity=False).hexdigest()


def _get_pip_install_mlflow():
Expand Down
15 changes: 0 additions & 15 deletions mlflow/utils/insecure_hash.py

This file was deleted.

6 changes: 3 additions & 3 deletions tests/evaluate/test_evaluation.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import hashlib
import inspect
import io
import json
Expand Down Expand Up @@ -59,7 +60,6 @@
from mlflow.tracing.constant import TraceMetadataKey
from mlflow.tracing.fluent import TRACE_BUFFER
from mlflow.tracking.artifact_utils import get_artifact_uri
from mlflow.utils import insecure_hash
from mlflow.utils.autologging_utils import (
MLFLOW_EVALUATE_RESTRICT_LANGCHAIN_AUTOLOG_TO_TRACES_CONFIG,
)
Expand Down Expand Up @@ -863,7 +863,7 @@ def test_dataset_metadata():

def test_gen_md5_for_arraylike_obj():
def get_md5(data):
md5_gen = insecure_hash.md5()
md5_gen = hashlib.md5(usedforsecurity=False)
_gen_md5_for_arraylike_obj(md5_gen, data)
return md5_gen.hexdigest()

Expand All @@ -881,7 +881,7 @@ def get_md5(data):
def test_gen_md5_for_arraylike_obj_with_pandas_df_using_float_idx_does_not_raise_keyerror():
float_indices = np.random.uniform(low=0.5, high=13.3, size=(10,))
df = pd.DataFrame(np.random.randn(10, 4), index=float_indices, columns=["A", "B", "C", "D"])
md5_gen = insecure_hash.md5()
md5_gen = hashlib.md5(usedforsecurity=False)
assert _gen_md5_for_arraylike_obj(md5_gen, df) is None


Expand Down
5 changes: 3 additions & 2 deletions tests/projects/test_projects_cli.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import hashlib
import json
import logging
import os
Expand All @@ -8,7 +9,7 @@
from click.testing import CliRunner

from mlflow import MlflowClient, cli
from mlflow.utils import insecure_hash, process
from mlflow.utils import process

from tests.integration.utils import invoke_cli_runner
from tests.projects.utils import (
Expand Down Expand Up @@ -90,7 +91,7 @@ def test_run_local_conda_env():
with open(os.path.join(TEST_PROJECT_DIR, "conda.yaml")) as handle:
conda_env_contents = handle.read()
expected_env_name = "mlflow-{}".format(
insecure_hash.sha1(conda_env_contents.encode("utf-8")).hexdigest()
hashlib.sha1(conda_env_contents.encode("utf-8"), usedforsecurity=False).hexdigest()
)
try:
process._exec_cmd(cmd=["conda", "env", "remove", "--name", expected_env_name])
Expand Down
4 changes: 2 additions & 2 deletions tests/resources/data/dataset.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import base64
import hashlib
import json
from typing import Any, Optional

Expand All @@ -8,7 +9,6 @@
from mlflow.data.dataset import Dataset
from mlflow.types import Schema
from mlflow.types.utils import _infer_schema
from mlflow.utils import insecure_hash

from tests.resources.data.dataset_source import SampleDatasetSource

Expand All @@ -29,7 +29,7 @@ def _compute_digest(self) -> str:
Computes a digest for the dataset. Called if the user doesn't supply
a digest when constructing the dataset.
"""
hash_md5 = insecure_hash.md5()
hash_md5 = hashlib.md5(usedforsecurity=False)
for hash_part in pd.util.hash_array(np.array(self._data_list)):
hash_md5.update(hash_part)
return base64.b64encode(hash_md5.digest()).decode("ascii")
Expand Down
4 changes: 2 additions & 2 deletions tests/store/tracking/test_file_store.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import hashlib
import json
import os
import posixpath
Expand Down Expand Up @@ -43,7 +44,6 @@
from mlflow.store.tracking.file_store import FileStore
from mlflow.tracing.constant import TraceMetadataKey, TraceTagKey
from mlflow.tracking._tracking_service.utils import _use_tracking_uri
from mlflow.utils import insecure_hash
from mlflow.utils.file_utils import (
TempDir,
path_to_local_file_uri,
Expand Down Expand Up @@ -2659,7 +2659,7 @@ def assert_expected_input_storage_ids_present(run, dataset_storage_ids):
inputs_dir = os.path.join(run_dir, FileStore.INPUTS_FOLDER_NAME)
expected_input_storage_ids = []
for dataset_storage_id in dataset_storage_ids:
md5 = insecure_hash.md5(dataset_storage_id.encode("utf-8"))
md5 = hashlib.md5(dataset_storage_id.encode("utf-8"), usedforsecurity=False)
md5.update(run.info.run_id.encode("utf-8"))
expected_input_storage_ids.append(md5.hexdigest())
assert set(os.listdir(inputs_dir)) == set(expected_input_storage_ids)
Expand Down

0 comments on commit 44f5c17

Please sign in to comment.