diff --git a/keras_cv/models/feature_extractor/clip/clip_model.py b/keras_cv/models/feature_extractor/clip/clip_model.py index 860739388e..67868f2d71 100644 --- a/keras_cv/models/feature_extractor/clip/clip_model.py +++ b/keras_cv/models/feature_extractor/clip/clip_model.py @@ -26,6 +26,7 @@ CLIPTextEncoder, ) from keras_cv.models.task import Task +from keras_cv.utils.conditional_imports import assert_keras_nlp_installed from keras_cv.utils.python_utils import classproperty try: @@ -98,11 +99,7 @@ def __init__( **kwargs, ): super().__init__(**kwargs) - if keras_nlp is None: - raise ValueError( - "ClipTokenizer requires keras-nlp. Please install " - "using pip `pip install -U keras-nlp && pip install -U keras`" - ) + assert_keras_nlp_installed("CLIP") self.embed_dim = embed_dim self.image_resolution = image_resolution self.vision_layers = vision_layers diff --git a/keras_cv/models/feature_extractor/clip/clip_processor.py b/keras_cv/models/feature_extractor/clip/clip_processor.py index 16d8d24222..1a07a3d710 100644 --- a/keras_cv/models/feature_extractor/clip/clip_processor.py +++ b/keras_cv/models/feature_extractor/clip/clip_processor.py @@ -16,10 +16,10 @@ from keras_cv.backend import keras from keras_cv.backend import ops from keras_cv.models.feature_extractor.clip.clip_tokenizer import CLIPTokenizer +from keras_cv.utils.conditional_imports import assert_keras_nlp_installed try: import keras_nlp - from keras_nlp.layers import StartEndPacker except ImportError: keras_nlp = None @@ -50,11 +50,7 @@ class CLIPProcessor: """ def __init__(self, input_resolution, vocabulary, merges, **kwargs): - if keras_nlp is None: - raise ValueError( - "ClipTokenizer requires keras-nlp. Please install " - "using pip `pip install -U keras-nlp && pip install -U keras`" - ) + assert_keras_nlp_installed("CLIPProcessor") self.input_resolution = input_resolution self.vocabulary = vocabulary self.merges = merges @@ -64,7 +60,7 @@ def __init__(self, input_resolution, vocabulary, merges, **kwargs): merges=self.merges, unsplittable_tokens=[""], ) - self.packer = StartEndPacker( + self.packer = keras_nlp.layers.StartEndPacker( start_value=self.tokenizer.token_to_id("<|startoftext|>"), end_value=self.tokenizer.token_to_id("<|endoftext|>"), pad_value=None, diff --git a/keras_cv/models/feature_extractor/clip/clip_tokenizer.py b/keras_cv/models/feature_extractor/clip/clip_tokenizer.py index 66b4d7cef6..6758296818 100644 --- a/keras_cv/models/feature_extractor/clip/clip_tokenizer.py +++ b/keras_cv/models/feature_extractor/clip/clip_tokenizer.py @@ -13,13 +13,19 @@ # limitations under the License. import regex as re import tensorflow as tf -import tensorflow_text as tf_text + +from keras_cv.utils.conditional_imports import assert_keras_nlp_installed +from keras_cv.utils.conditional_imports import assert_tf_text_installed try: - import keras_nlp from keras_nlp.tokenizers import BytePairTokenizer except ImportError: - keras_nlp = None + BytePairTokenizer = object + +try: + import tensorflow_text as tf_text +except ImportError: + tf_text = None # As python and TF handles special spaces differently, we need to # manually handle special spaces during string split. @@ -41,6 +47,9 @@ def split_strings_for_bpe(inputs, unsplittable_tokens=None): # support lookahead match, we are using an alternative insert a special # token "६" before leading space of non-space characters and after the # trailing space, e.g., " keras" will be "६ keras". + + assert_tf_text_installed("split_strings_for_bpe") + inputs = tf.strings.regex_replace( inputs, rf"( )([^\s{SPECIAL_WHITESPACES}])", r"६\1\2" ) @@ -106,12 +115,8 @@ def remove_strings_from_inputs(tensor, string_to_remove): class CLIPTokenizer(BytePairTokenizer): def __init__(self, **kwargs): + assert_keras_nlp_installed("CLIPTokenizer") super().__init__(**kwargs) - if keras_nlp is None: - raise ValueError( - "ClipTokenizer requires keras-nlp. Please install " - "using pip `pip install -U keras-nlp && pip install -U keras`" - ) def _bpe_merge_and_update_cache(self, tokens): """Process unseen tokens and add to cache.""" @@ -154,8 +159,9 @@ def process_unseen_tokens(): self._bpe_merge_and_update_cache(unseen_tokens) return self.cache.lookup(flat_tokens) - # If `has_unseen_words == True`, it means not all tokens are in cache, - # we will process the unseen tokens. Otherwise return the cache lookup. + # If `has_unseen_words == True`, it means not all tokens are, + # in cache we will process the unseen tokens. Otherwise + # return the cache lookup. tokenized_words = tf.cond( has_unseen_words, process_unseen_tokens, diff --git a/keras_cv/utils/conditional_imports.py b/keras_cv/utils/conditional_imports.py index fc9cc32810..d6eaf64299 100644 --- a/keras_cv/utils/conditional_imports.py +++ b/keras_cv/utils/conditional_imports.py @@ -33,6 +33,16 @@ except ImportError: pycocotools = None +try: + import keras_nlp +except ImportError: + keras_nlp = None + +try: + import tensorflow_text +except ImportError: + tensorflow_text = None + def assert_cv2_installed(symbol_name): if cv2 is None: @@ -70,3 +80,21 @@ def assert_pycocotools_installed(symbol_name): "Please install the package using " "`pip install pycocotools`." ) + + +def assert_keras_nlp_installed(symbol_name): + if keras_nlp is None: + raise ImportError( + f"{symbol_name} requires the `keras_nlp` package. " + "Please install the package using " + "`pip install keras_nlp`." + ) + + +def assert_tf_text_installed(symbol_name): + if tensorflow_text is None: + raise ImportError( + f"{symbol_name} requires the `tensorflow_text` package. " + "Please install the package using " + "`pip install tensorflow_text`." + )