diff --git a/optimum/exporters/openvino/model_configs.py b/optimum/exporters/openvino/model_configs.py index 56ccd2a2b1..552eb8dd04 100644 --- a/optimum/exporters/openvino/model_configs.py +++ b/optimum/exporters/openvino/model_configs.py @@ -44,6 +44,7 @@ DummyInputGenerator, DummyPastKeyValuesGenerator, DummyTextInputGenerator, + DummyTimestepInputGenerator, DummyVisionInputGenerator, FalconDummyPastKeyValuesGenerator, MistralDummyPastKeyValuesGenerator, @@ -1527,7 +1528,7 @@ def patch_model_for_export( class PooledProjectionsDummyInputGenerator(DummyInputGenerator): - SUPPORTED_INPUT_NAMES = "pooled_projection" + SUPPORTED_INPUT_NAMES = "pooled_projections" def __init__( self, @@ -1550,10 +1551,20 @@ def generate(self, input_name: str, framework: str = "pt", int_dtype: str = "int return self.random_float_tensor(shape, framework=framework, dtype=float_dtype) +class DummyTransformerTimestpsInputGenerator(DummyTimestepInputGenerator): + def generate(self, input_name: str, framework: str = "pt", int_dtype: str = "int64", float_dtype: str = "fp32"): + if input_name == "timestep": + shape = [self.batch_size] + return self.random_float_tensor(shape, max_value=self.vocab_size, framework=framework, dtype=float_dtype) + return super().generate(input_name, framework, int_dtype, float_dtype) + + @register_in_tasks_manager("sd3-transformer", *["semantic-segmentation"], library_name="diffusers") -class TransformerOpenVINOConfig(UNetOnnxConfig): - DUMMY_INPUT_GENERATOR_CLASSES = UNetOnnxConfig.DUMMY_INPUT_GENERATOR_CLASSES + ( - PooledProjectionsDummyInputGenerator, +class SD3TransformerOpenVINOConfig(UNetOnnxConfig): + DUMMY_INPUT_GENERATOR_CLASSES = ( + (DummyTransformerTimestpsInputGenerator,) + + UNetOnnxConfig.DUMMY_INPUT_GENERATOR_CLASSES + + (PooledProjectionsDummyInputGenerator,) ) NORMALIZED_CONFIG_CLASS = NormalizedConfig.with_args( image_size="sample_size", diff --git a/tests/openvino/test_diffusion.py b/tests/openvino/test_diffusion.py index 5e2d998c18..ad9f18c1ab 100644 --- a/tests/openvino/test_diffusion.py +++ b/tests/openvino/test_diffusion.py @@ -35,6 +35,7 @@ OVPipelineForInpainting, OVPipelineForText2Image, ) +from optimum.intel.utils.import_utils import is_transformers_version from optimum.utils.testing_utils import require_diffusers @@ -72,7 +73,10 @@ def _generate_images(height=128, width=128, batch_size=1, channel=3, input_type= class OVPipelineForText2ImageTest(unittest.TestCase): - SUPPORTED_ARCHITECTURES = ["stable-diffusion", "stable-diffusion-xl", "latent-consistency", "stable-diffusion-3"] + SUPPORTED_ARCHITECTURES = ["stable-diffusion", "stable-diffusion-xl", "latent-consistency"] + if is_transformers_version(">=", "4.40.0"): + SUPPORTED_ARCHITECTURES.append("stable-diffusion-3") + CALLBACK_SUPPORT_ARCHITECTURES = ["stable-diffusion", "stable-diffusion-xl", "latent-consistency"] OVMODEL_CLASS = OVPipelineForText2Image AUTOMODEL_CLASS = AutoPipelineForText2Image @@ -138,7 +142,7 @@ def test_compare_to_diffusers_pipeline(self, model_arch: str): np.testing.assert_allclose(ov_output, diffusers_output, atol=6e-3, rtol=1e-2) - @parameterized.expand(["stable-diffusion", "stable-diffusion-xl", "latent-consistency"]) + @parameterized.expand(CALLBACK_SUPPORT_ARCHITECTURES) @require_diffusers def test_callback(self, model_arch: str): height, width, batch_size = 64, 128, 1 @@ -353,7 +357,9 @@ def test_textual_inversion(self): class OVPipelineForImage2ImageTest(unittest.TestCase): - SUPPORTED_ARCHITECTURES = ["stable-diffusion", "stable-diffusion-xl", "latent-consistency", "stable-diffusion-3"] + SUPPORTED_ARCHITECTURES = ["stable-diffusion", "stable-diffusion-xl", "latent-consistency"] + if is_transformers_version(">=", "4.40.0"): + SUPPORTED_ARCHITECTURES.append("stable-diffusion-3") AUTOMODEL_CLASS = AutoPipelineForImage2Image OVMODEL_CLASS = OVPipelineForImage2Image @@ -576,7 +582,10 @@ def test_textual_inversion(self): class OVPipelineForInpaintingTest(unittest.TestCase): - SUPPORTED_ARCHITECTURES = ["stable-diffusion", "stable-diffusion-xl", "stable-diffusion-3"] + SUPPORTED_ARCHITECTURES = ["stable-diffusion", "stable-diffusion-xl"] + + if is_transformers_version(">=", "4.40.0"): + SUPPORTED_ARCHITECTURES.append("stable-diffusion-3") AUTOMODEL_CLASS = AutoPipelineForInpainting OVMODEL_CLASS = OVPipelineForInpainting diff --git a/tests/openvino/test_exporters_cli.py b/tests/openvino/test_exporters_cli.py index 404d9c08aa..bc968aa116 100644 --- a/tests/openvino/test_exporters_cli.py +++ b/tests/openvino/test_exporters_cli.py @@ -94,6 +94,7 @@ class OVCLIExportTestCase(unittest.TestCase): ("stable-diffusion", 72, 195), ("stable-diffusion-xl", 84, 331), ("latent-consistency", 50, 135), + ("stable-diffusion-3", 84, 331), ) TEST_4BIT_CONFIGURATONS = [ diff --git a/tests/openvino/utils_tests.py b/tests/openvino/utils_tests.py index bd17134d8b..114de633c2 100644 --- a/tests/openvino/utils_tests.py +++ b/tests/openvino/utils_tests.py @@ -171,6 +171,7 @@ "stable-diffusion-xl": (366, 34, 42, 66), "stable-diffusion-xl-refiner": (366, 34, 42, 66), "open-clip": (20, 28), + "stable-diffusion-3": (366, 34, 42, 66), }