-
Notifications
You must be signed in to change notification settings - Fork 162
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
refactor: Upgrade the models to use keras 3.0 #1138
Conversation
@@ -237,7 +237,8 @@ def _construct_model(self) -> None: | |||
model_loc = self._parameters["model_path"] | |||
|
|||
self._model: tf.keras.Model = tf.keras.models.load_model(model_loc) | |||
softmax_output_layer_name = self._model.outputs[0].name.split("/")[0] | |||
self._model = tf.keras.Model(self._model.inputs, self._model.outputs) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Required for the function to have output_names
. Sequential models do not have that output.
@@ -253,20 +254,27 @@ def _construct_model(self) -> None: | |||
)(self._model.layers[softmax_layer_ind - 1].output) | |||
|
|||
# Output the model into a .pb file for TensorFlow | |||
argmax_layer = tf.keras.backend.argmax(new_softmax_layer) | |||
argmax_layer = tf.keras.ops.argmax(new_softmax_layer, axis=2) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
keras v3 method
metrics = {softmax_output_layer_name: ["acc", f1_score_training]} | ||
metrics = { | ||
softmax_output_layer_name: [ | ||
"categorical_crossentropy", |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
keras v3 requires specification of loss while v2 did not
@@ -294,30 +302,33 @@ def _reconstruct_model(self) -> None: | |||
num_labels = self.num_labels | |||
default_ind = self.label_mapping[self._parameters["default_label"]] | |||
|
|||
# Remove the 2 output layers ('softmax', 'tf_op_layer_ArgMax') |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
popping does nothing in v3
final_softmax_layer = tf.keras.layers.Dense( | ||
num_labels, activation="softmax", name="softmax_output" | ||
)(self._model.layers[-4].output) | ||
)(self._model.layers[-2].output) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
argmax ops does not show as a layer anymore
metrics = {softmax_output_layer_name: ["acc", f1_score_training]} | ||
metrics = { | ||
softmax_output_layer_name: [ | ||
"categorical_crossentropy", |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
keras v3 requires specification of loss while v2 did not
@@ -74,6 +74,133 @@ def create_glove_char(n_dims: int, source_file: str = None) -> None: | |||
file.write(word + " " + " ".join(str(num) for num in embd) + "\n") | |||
|
|||
|
|||
@tf.keras.utils.register_keras_serializable(package="CharacterLevelCnnModel") | |||
class ThreshArgMaxLayer(tf.keras.layers.Layer): |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Refactored threshargmax out of the class function so it could be properly serialized in keras v3
|
||
|
||
@tf.keras.utils.register_keras_serializable(package="CharacterLevelCnnModel") | ||
class EncodingLayer(tf.keras.layers.Layer): |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
as above with the thresh class
@@ -280,7 +407,7 @@ def save_to_disk(self, dirpath: str) -> None: | |||
labels_dirpath = os.path.join(dirpath, "label_mapping.json") | |||
with open(labels_dirpath, "w") as fp: | |||
json.dump(self.label_mapping, fp) | |||
self._model.save(os.path.join(dirpath)) | |||
self._model.save(os.path.join(dirpath, "model.keras")) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
keras v3 requires the ext to be .keras
} | ||
with tf.keras.utils.custom_object_scope(custom_objects): | ||
tf_model = tf.keras.models.load_model(dirpath) | ||
tf_model = tf.keras.models.load_model(os.path.join(dirpath, "model.keras")) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
serialized in keras v3 hence the above custom scopes were not needed
@@ -333,35 +452,6 @@ def load_from_disk(cls, dirpath: str) -> CharacterLevelCnnModel: | |||
] | |||
return loaded_model | |||
|
|||
@staticmethod | |||
def _char_encoding_layer( |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
moved to a class
@@ -383,47 +473,7 @@ def _argmax_threshold_layer( | |||
""" | |||
# Initialize the thresholds vector variable and create the threshold | |||
# matrix. | |||
class ThreshArgMaxLayer(tf.keras.layers.Layer): |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
moved to a class
@@ -449,17 +499,13 @@ def _construct_model(self) -> None: | |||
max_length = self._parameters["max_length"] | |||
max_char_encoding_id = self._parameters["max_char_encoding_id"] | |||
|
|||
# Encoding layer | |||
def encoding_function(input_str: tf.Tensor) -> tf.Tensor: |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
moved to a class
@@ -485,7 +530,6 @@ def encoding_function(input_str: tf.Tensor) -> tf.Tensor: | |||
max_char_encoding_id + 2, | |||
self._parameters["dim_embed"], | |||
weights=[embedding_matrix], | |||
input_length=input_shape[0], |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
api change in keras v3
@@ -503,7 +547,7 @@ def encoding_function(input_str: tf.Tensor) -> tf.Tensor: | |||
if self._parameters["dropout"]: | |||
self._model.add(tf.keras.layers.Dropout(self._parameters["dropout"])) | |||
# Add batch normalization, set fused = True for compactness | |||
self._model.add(tf.keras.layers.BatchNormalization(fused=False, scale=True)) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
keras v3 api change
@@ -564,22 +614,18 @@ def _reconstruct_model(self) -> None: | |||
num_labels = self.num_labels | |||
default_ind = self.label_mapping[self._parameters["default_label"]] | |||
|
|||
# Remove the 3 output layers (dense_2', 'tf_op_layer_ArgMax', |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
popping does nothing in keras v3
final_softmax_layer = tf.keras.layers.Dense( | ||
num_labels, activation="softmax", name="dense_2" | ||
)(self._model.layers[-4].output) | ||
)(self._model.layers[-3].output) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
argmax does not show as a layer in v3 hence the reduction
losses = {softmax_output_layer_name: "categorical_crossentropy"} | ||
|
||
# use f1 score metric | ||
f1_score_training = labeler_utils.F1Score( | ||
num_classes=num_labels, average="micro" | ||
) | ||
metrics = {softmax_output_layer_name: ["acc", f1_score_training]} | ||
metrics = { | ||
softmax_output_layer_name: [ |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
keras v3 requires specification of loss while v2 did not
@@ -729,7 +781,9 @@ def _validate_training( | |||
for x_val, y_val in val_data: | |||
y_val_pred.append( | |||
self._model.predict( | |||
x_val, batch_size=batch_size_test, verbose=verbose_keras | |||
tf.convert_to_tensor(x_val), |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
v3 requires the conversion to tensor
@@ -141,11 +141,11 @@ def load_from_library(cls, name: str, trainable: bool = False) -> BaseDataLabele | |||
:type trainable: bool | |||
:return: DataLabeler class | |||
""" | |||
for labeler_name, labeler_class_obj in cls.labeler_classes.items(): |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
fixes a bug in identification
@@ -435,11 +435,6 @@ def get_config(self) -> dict: | |||
base_config = super().get_config() | |||
return {**base_config, **config} | |||
|
|||
def reset_state(self) -> None: |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
no longer needed as this is builtin
encode_output = cnn_model._char_encoding_layer( | ||
input_str_tensor, max_char_encoding_id, max_len | ||
).numpy()[0] | ||
encode_layer = EncodingLayer(max_char_encoding_id, max_len) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
now tests the encoding layer
tensorflow>=2.6.4,<2.15.0; sys.platform != 'darwin' | ||
tensorflow>=2.6.4,<2.15.0; sys_platform == 'darwin' and platform_machine != 'arm64' | ||
tensorflow-macos>=2.6.4,<2.15.0; sys_platform == 'darwin' and platform_machine == 'arm64' | ||
tensorflow>=2.16.0; sys.platform != 'darwin' |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
update to v3 keras
@@ -358,7 +358,7 @@ def __init__( | |||
|
|||
def _zero_wt_init(name: str) -> tf.Variable: | |||
return self.add_weight( | |||
name, shape=self.init_shape, initializer="zeros", dtype=self.dtype | |||
name=name, shape=self.init_shape, initializer="zeros", dtype=self.dtype |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
the initial error that started this all
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
keras v3 api change
Added a commit to drop support for Python 3.8 since tensorflow >= |
Hello, any way we can get this merged? |
Hey @lettergram, yes.
Thanks! |
* add downloads tile (capitalone#1085) * Replace snappy with cramjam * Delete test_no_snappy --------- Co-authored-by: Taylor Turner <taylorfturner@gmail.com>
…a with t…" (capitalone#1133) This reverts commit d3159bd.
Head branch was pushed to by a user without write access
1188e83
to
4adc8e0
Compare
@taylorfturner @lettergram rebase complete! |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
* refactor: Upgrade the models to use keras 3.0 (#1138) * Replace snappy with cramjam (#1091) * add downloads tile (#1085) * Replace snappy with cramjam * Delete test_no_snappy --------- Co-authored-by: Taylor Turner <taylorfturner@gmail.com> * pre-commit fix (#1122) * Bug fix for float precision calculation using categorical data with trailing zeros. (#1125) * Revert "Bug fix for float precision calculation using categorical data with t…" (#1133) This reverts commit d3159bd. * refactor: move layers outside of class * refactor: update model to keras 3.0 * fix: manifest * fix: bugs in compile and train * fix: bug in load_from_library * fix: bugs in CharCNN * refactor: loading tf model labeler * fix: bug in data_labeler identification * fix: update model to use proper softmax layer names * fix: formatting * fix: remove unused line * refactor: drop support for 3.8 * fix: comments * fix: comment --------- Co-authored-by: Gábor Lipták <gliptak@gmail.com> Co-authored-by: Taylor Turner <taylorfturner@gmail.com> Co-authored-by: James Schadt <jamesrschadt@gmail.com> * Fix Tox (#1143) * tox new * update * update * update * update * update * update * update * update tox.ini * update * update * remove docs * empty retrigger * update (#1146) * bump version * update 3.11 * remove dist/ --------- Co-authored-by: JGSweets <JGSweets@users.noreply.github.com> Co-authored-by: Gábor Lipták <gliptak@gmail.com> Co-authored-by: James Schadt <jamesrschadt@gmail.com>
This pr:
Closes: #1126