From cd44722e428155256da426839aaad9ad2d0c1b85 Mon Sep 17 00:00:00 2001 From: Matt Watson Date: Thu, 10 Nov 2022 12:01:58 -0800 Subject: [PATCH] Conditionally import tf text In keeping with other layers, we should not rely on tf text being installed to import the library (this is useful for building keras.io for example). --- keras_nlp/tokenizers/byte_pair_tokenizer.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/keras_nlp/tokenizers/byte_pair_tokenizer.py b/keras_nlp/tokenizers/byte_pair_tokenizer.py index 3e094b5d27..3788747a05 100644 --- a/keras_nlp/tokenizers/byte_pair_tokenizer.py +++ b/keras_nlp/tokenizers/byte_pair_tokenizer.py @@ -24,10 +24,15 @@ from typing import List import tensorflow as tf -import tensorflow_text as tf_text from tensorflow import keras from keras_nlp.tokenizers import tokenizer +from keras_nlp.utils.tf_utils import assert_tf_text_installed + +try: + import tensorflow_text as tf_text +except ImportError: + tf_text = None # As python and TF handles special spaces differently, we need to # manually handle special spaces during string split. @@ -220,6 +225,8 @@ def __init__( sequence_length=None, **kwargs, ) -> None: + assert_tf_text_installed(self.__class__.__name__) + # Check dtype and provide a default. if "dtype" not in kwargs or kwargs["dtype"] is None: kwargs["dtype"] = tf.int32 @@ -241,7 +248,8 @@ def __init__( else: raise ValueError( "Vocabulary must be an file path or dictionary mapping string " - f"token to int ids. Received: `type(vocabulary)={type(vocabulary)}`." + "token to int ids. Received: " + f"`type(vocabulary)={type(vocabulary)}`." ) if isinstance(merges, str): self.merges = [bp.rstrip() for bp in tf.io.gfile.GFile(merges)]