Add support for sharded weights.

james77777778 · james77777778 · commit b7d330f85103 · 2025-04-25T11:23:32.000+08:00
diff --git a/keras_hub/src/utils/keras_utils.py b/keras_hub/src/utils/keras_utils.py
@@ -1,3 +1,4 @@
+import inspect
 import sys
 
 import keras
@@ -147,3 +148,16 @@ def get_gpu_names():
         ]
     else:
         return [""]
+
+
+def sharded_weights_available():
+    """Whether sharded weights serialization is available.
+
+    Returns:
+        `True` if sharded weights are available, `False` otherwise.
+    """
+    save_weights_signature = inspect.signature(keras.saving.save_weights)
+    if "max_shard_size" in save_weights_signature.parameters:
+        return True
+    else:
+        return False
diff --git a/keras_hub/src/utils/preset_utils.py b/keras_hub/src/utils/preset_utils.py
@@ -1,7 +1,9 @@
 import collections
 import datetime
+import functools
 import inspect
 import json
+import math
 import os
 import re
 
@@ -10,6 +12,7 @@
 
 from keras_hub.src.api_export import keras_hub_export
 from keras_hub.src.utils.keras_utils import print_msg
+from keras_hub.src.utils.keras_utils import sharded_weights_available
 
 try:
     import kagglehub
@@ -48,6 +51,7 @@
 # Weight file names.
 MODEL_WEIGHTS_FILE = "model.weights.h5"
 TASK_WEIGHTS_FILE = "task.weights.h5"
+SHARDED_MODEL_WEIGHTS_CONFIG_FILE = "model.weights.json"
 
 # HuggingFace filenames.
 README_FILE = "README.md"
@@ -647,7 +651,7 @@ def load_backbone(self, cls, load_weights, **kwargs):
         backbone = self._load_serialized_object(self.config, **kwargs)
         if load_weights:
             jax_memory_cleanup(backbone)
-            backbone.load_weights(get_file(self.preset, MODEL_WEIGHTS_FILE))
+            self._load_backbone_weights(backbone)
         return backbone
 
     def load_tokenizer(self, cls, config_file=TOKENIZER_CONFIG_FILE, **kwargs):
@@ -697,8 +701,7 @@ def load_task(self, cls, load_weights, load_task_weights, **kwargs):
                 task.load_task_weights(task_weights)
             else:
                 jax_memory_cleanup(task.backbone)
-            backbone_weights = get_file(self.preset, MODEL_WEIGHTS_FILE)
-            task.backbone.load_weights(backbone_weights)
+            self._load_backbone_weights(task.backbone)
         return task
 
     def load_preprocessor(
@@ -726,18 +729,64 @@ def _load_serialized_object(self, config, **kwargs):
         config["config"] = {**config["config"], **kwargs}
         return keras.saving.deserialize_keras_object(config)
 
+    def _get_sharded_filenames(self, config_path):
+        with open(config_path, encoding="utf-8") as config_file:
+            config = json.load(config_file)
+        weight_map = config["weight_map"]
+        return sorted(set(weight_map.values()))
+
+    def _load_backbone_weights(self, backbone):
+        # Detect if the backbone is sharded or not.
+        has_single_file_weights = check_file_exists(
+            self.preset, MODEL_WEIGHTS_FILE
+        )
+        if has_single_file_weights:
+            filepath = get_file(self.preset, MODEL_WEIGHTS_FILE)
+        else:
+            if not sharded_weights_available():
+                raise RuntimeError(
+                    "Sharded weights loading is not supported in the current "
+                    f"Keras version {keras.__version__}. "
+                    "Please update to a newer version."
+                )
+            filepath = get_file(self.preset, SHARDED_MODEL_WEIGHTS_CONFIG_FILE)
+            sharded_filenames = self._get_sharded_filenames(filepath)
+            for sharded_filename in sharded_filenames:
+                # Download the sharded weights.
+                _ = get_file(self.preset, sharded_filename)
+        backbone.load_weights(filepath)
+
 
 class KerasPresetSaver:
     def __init__(self, preset_dir):
         os.makedirs(preset_dir, exist_ok=True)
         self.preset_dir = preset_dir
 
-    def save_backbone(self, backbone):
+    def save_backbone(self, backbone, max_shard_size=10):
         self._save_serialized_object(backbone, config_file=CONFIG_FILE)
-        backbone_weight_path = os.path.join(self.preset_dir, MODEL_WEIGHTS_FILE)
-        backbone.save_weights(backbone_weight_path)
         self._save_metadata(backbone)
 
+        # Save the weights.
+        backbone_size_in_bytes = self._get_variables_size_in_bytes(
+            backbone.variables
+        )
+        backbone_size_in_gb = backbone_size_in_bytes / (1024**3)
+        # If the size of the backbone is larger than `max_shard_size`, save
+        # sharded weights.
+        if sharded_weights_available() and backbone_size_in_gb > max_shard_size:
+            backbone_sharded_weights_config_path = os.path.join(
+                self.preset_dir, SHARDED_MODEL_WEIGHTS_CONFIG_FILE
+            )
+            backbone.save_weights(
+                backbone_sharded_weights_config_path,
+                max_shard_size=max_shard_size,
+            )
+        else:
+            backbone_weight_path = os.path.join(
+                self.preset_dir, MODEL_WEIGHTS_FILE
+            )
+            backbone.save_weights(backbone_weight_path)
+
     def save_tokenizer(self, tokenizer):
         config_file = TOKENIZER_CONFIG_FILE
         if hasattr(tokenizer, "config_file"):
@@ -823,3 +872,28 @@ def _save_metadata(self, layer):
         metadata_path = os.path.join(self.preset_dir, METADATA_FILE)
         with open(metadata_path, "w") as metadata_file:
             metadata_file.write(json.dumps(metadata, indent=4))
+
+    def _get_variables_size_in_bytes(self, variables):
+        @functools.lru_cache(512)
+        def _compute_memory_size(shape, dtype):
+            weight_counts = math.prod(shape)
+            dtype = keras.backend.standardize_dtype(dtype)
+            dtype_size = int(
+                (
+                    dtype.replace("bfloat", "")
+                    .replace("float", "")
+                    .replace("uint", "")
+                    .replace("int", "")
+                    .replace("bool", "1")
+                )
+            )
+            return weight_counts * dtype_size
+
+        unique_variables = {}
+        for v in variables:
+            if id(v) not in unique_variables:
+                unique_variables[id(v)] = (v.shape, v.dtype)
+        total_memory_size = 0
+        for shape, dtype in unique_variables.values():
+            total_memory_size += _compute_memory_size(shape, dtype)
+        return total_memory_size / 8
diff --git a/keras_hub/src/utils/preset_utils_test.py b/keras_hub/src/utils/preset_utils_test.py
@@ -10,12 +10,58 @@
 )
 from keras_hub.src.models.bert.bert_backbone import BertBackbone
 from keras_hub.src.models.bert.bert_tokenizer import BertTokenizer
+from keras_hub.src.models.gemma.gemma_backbone import GemmaBackbone
 from keras_hub.src.tests.test_case import TestCase
+from keras_hub.src.utils.keras_utils import sharded_weights_available
 from keras_hub.src.utils.preset_utils import CONFIG_FILE
+from keras_hub.src.utils.preset_utils import get_preset_saver
 from keras_hub.src.utils.preset_utils import upload_preset
 
 
 class PresetUtilsTest(TestCase):
+    @pytest.mark.large
+    def test_sharded_weights(self):
+        if not sharded_weights_available():
+            self.skipTest("Sharded weights are not available.")
+
+        # Gemma2 config.
+        init_kwargs = {
+            "vocabulary_size": 4096,  # 256128
+            "num_layers": 24,  # 46
+            "num_query_heads": 16,  # 32
+            "num_key_value_heads": 8,  # 16
+            "hidden_dim": 64,  # 4608
+            "intermediate_dim": 128,  # 73728
+            "head_dim": 8,  # 128
+            "sliding_window_size": 5,  # 4096
+            "attention_logit_soft_cap": 50,
+            "final_logit_soft_cap": 30,
+            "layer_norm_epsilon": 1e-6,
+            "query_head_dim_normalize": False,
+            "use_post_ffw_norm": True,
+            "use_post_attention_norm": True,
+            "use_sliding_window_attention": True,
+        }
+        backbone = GemmaBackbone(**init_kwargs)  # ~4.4MB
+
+        # Save the sharded weights.
+        preset_dir = self.get_temp_dir()
+        preset_saver = get_preset_saver(preset_dir)
+        preset_saver.save_backbone(backbone, max_shard_size=0.002)
+        self.assertTrue(
+            os.path.exists(os.path.join(preset_dir, "model.weights.json"))
+        )
+        self.assertTrue(
+            os.path.exists(os.path.join(preset_dir, "model_00000.weights.h5"))
+        )
+
+        # Load the sharded weights.
+        revived_backbone = GemmaBackbone.from_preset(preset_dir)
+        for v1, v2 in zip(
+            backbone.trainable_variables, revived_backbone.trainable_variables
+        ):
+            self.assertAllClose(v1, v2)
+
     @pytest.mark.large
     def test_preset_errors(self):
         with self.assertRaisesRegex(ValueError, "must be a string"):