Skip to content

Commit

Permalink
[wip] Add tox.ini and update paths in example notebooks (#813)
Browse files Browse the repository at this point in the history
  • Loading branch information
nv-alaiacano authored Oct 20, 2022
1 parent 058e338 commit 969c9a0
Show file tree
Hide file tree
Showing 12 changed files with 1,385 additions and 931 deletions.
238 changes: 110 additions & 128 deletions examples/02-Merlin-Models-and-NVTabular-integration.ipynb

Large diffs are not rendered by default.

449 changes: 381 additions & 68 deletions examples/03-Exploring-different-models.ipynb

Large diffs are not rendered by default.

165 changes: 127 additions & 38 deletions examples/04-Exporting-ranking-models.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": null,
"execution_count": 1,
"id": "bc80cfdd",
"metadata": {
"pycharm": {
Expand Down Expand Up @@ -92,9 +92,12 @@
"name": "stderr",
"output_type": "stream",
"text": [
"2022-06-03 16:47:15.434414: I tensorflow/core/platform/cpu_feature_guard.cc:152] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n",
"To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
"2022-06-03 16:47:16.472878: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 16254 MB memory: -> device: 0, name: Quadro GV100, pci bus id: 0000:15:00.0, compute capability: 7.0\n"
"2022-10-19 17:20:17.650375: I tensorflow/core/util/util.cc:169] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n",
"2022-10-19 17:20:19.081535: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudnn.so.8'; dlerror: libcudnn.so.8: cannot open shared object file: No such file or directory\n",
"2022-10-19 17:20:19.081560: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1850] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.\n",
"Skipping registering GPU devices...\n",
"2022-10-19 17:20:19.121312: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 AVX512F AVX512_VNNI FMA\n",
"To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n"
]
}
],
Expand Down Expand Up @@ -147,11 +150,24 @@
"name": "#%%\n"
}
},
"outputs": [],
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/alaiacano/.pyenv/versions/3.8.10/envs/merlin38/lib/python3.8/site-packages/merlin/schema/tags.py:148: UserWarning: Compound tags like Tags.USER_ID have been deprecated and will be removed in a future version. Please use the atomic versions of these tags, like [<Tags.USER: 'user'>, <Tags.ID: 'id'>].\n",
" warnings.warn(\n",
"/home/alaiacano/.pyenv/versions/3.8.10/envs/merlin38/lib/python3.8/site-packages/merlin/schema/tags.py:148: UserWarning: Compound tags like Tags.ITEM_ID have been deprecated and will be removed in a future version. Please use the atomic versions of these tags, like [<Tags.ITEM: 'item'>, <Tags.ID: 'id'>].\n",
" warnings.warn(\n",
"/home/alaiacano/.pyenv/versions/3.8.10/envs/merlin38/lib/python3.8/site-packages/merlin/io/dataset.py:251: UserWarning: Initializing an NVTabular Dataset in CPU mode.This is an experimental feature with extremely limited support!\n",
" warnings.warn(\n"
]
}
],
"source": [
"from merlin.datasets.synthetic import generate_data\n",
"\n",
"DATA_FOLDER = os.environ.get(\"DATA_FOLDER\", \"/workspace/data/\")\n",
"DATA_FOLDER = os.environ.get(\"DATA_FOLDER\", \"workspace/data/\")\n",
"NUM_ROWS = os.environ.get(\"NUM_ROWS\", 1000000)\n",
"SYNTHETIC_DATA = eval(os.environ.get(\"SYNTHETIC_DATA\", \"True\"))\n",
"BATCH_SIZE = int(os.environ.get(\"BATCH_SIZE\", 512))\n",
Expand Down Expand Up @@ -217,26 +233,31 @@
"name": "stderr",
"output_type": "stream",
"text": [
"/usr/local/lib/python3.8/dist-packages/cudf/core/dataframe.py:1292: UserWarning: The deep parameter is ignored and is only included for pandas compatibility.\n",
"/home/alaiacano/.pyenv/versions/3.8.10/envs/merlin38/lib/python3.8/site-packages/merlin/io/dataset.py:251: UserWarning: Initializing an NVTabular Dataset in CPU mode.This is an experimental feature with extremely limited support!\n",
" warnings.warn(\n",
"/home/alaiacano/.pyenv/versions/3.8.10/envs/merlin38/lib/python3.8/site-packages/merlin/schema/tags.py:148: UserWarning: Compound tags like Tags.USER_ID have been deprecated and will be removed in a future version. Please use the atomic versions of these tags, like [<Tags.USER: 'user'>, <Tags.ID: 'id'>].\n",
" warnings.warn(\n",
"/home/alaiacano/.pyenv/versions/3.8.10/envs/merlin38/lib/python3.8/site-packages/merlin/schema/tags.py:148: UserWarning: Compound tags like Tags.ITEM_ID have been deprecated and will be removed in a future version. Please use the atomic versions of these tags, like [<Tags.ITEM: 'item'>, <Tags.ID: 'id'>].\n",
" warnings.warn(\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 1.28 s, sys: 682 ms, total: 1.96 s\n",
"Wall time: 2.04 s\n"
"CPU times: user 7.6 s, sys: 1.49 s, total: 9.09 s\n",
"Wall time: 8.23 s\n"
]
}
],
"source": [
"%%time\n",
"user_id = [\"user_id\"] >> Categorify() >> TagAsUserID()\n",
"item_id = [\"item_id\"] >> Categorify() >> TagAsItemID()\n",
"category_temp_directory = os.path.join(DATA_FOLDER, \"categories\")\n",
"user_id = [\"user_id\"] >> Categorify(out_path=category_temp_directory) >> TagAsUserID()\n",
"item_id = [\"item_id\"] >> Categorify(out_path=category_temp_directory) >> TagAsItemID()\n",
"targets = [\"click\"] >> AddMetadata(tags=[Tags.BINARY_CLASSIFICATION, \"target\"])\n",
"\n",
"item_features = [\"item_category\", \"item_shop\", \"item_brand\"] >> Categorify() >> TagAsItemFeatures()\n",
"item_features = [\"item_category\", \"item_shop\", \"item_brand\"] >> Categorify(out_path=category_temp_directory) >> TagAsItemFeatures()\n",
"\n",
"user_features = (\n",
" [\n",
Expand All @@ -252,7 +273,7 @@
" \"user_brands\",\n",
" \"user_categories\",\n",
" ]\n",
" >> Categorify()\n",
" >> Categorify(out_path=category_temp_directory)\n",
" >> TagAsUserFeatures()\n",
")\n",
"\n",
Expand Down Expand Up @@ -291,7 +312,7 @@
},
"outputs": [],
"source": [
"workflow.save(\"workflow\")"
"workflow.save(os.path.join(DATA_FOLDER, \"workflow\"))"
]
},
{
Expand All @@ -308,14 +329,31 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 7,
"id": "5e03167a",
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Collecting seedir\n",
" Using cached seedir-0.3.1-py3-none-any.whl (114 kB)\n",
"Collecting emoji\n",
" Using cached emoji-2.1.0-py3-none-any.whl\n",
"Requirement already satisfied: natsort in /home/alaiacano/.pyenv/versions/3.8.10/envs/merlin38/lib/python3.8/site-packages (from seedir) (8.1.0)\n",
"Installing collected packages: emoji, seedir\n",
"Successfully installed emoji-2.1.0 seedir-0.3.1\n",
"\n",
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.2.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m22.3\u001b[0m\n",
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpython -m pip install --upgrade pip\u001b[0m\n"
]
}
],
"source": [
"!pip install seedir"
]
Expand All @@ -338,6 +376,18 @@
"output_type": "stream",
"text": [
"data/\n",
"├─categories/\n",
"│ └─categories/\n",
"│ ├─unique.item_brand.parquet\n",
"│ ├─unique.item_category.parquet\n",
"│ ├─unique.item_id.parquet\n",
"│ ├─unique.item_shop.parquet\n",
"│ ├─unique.user_age.parquet\n",
"│ ├─unique.user_brands.parquet\n",
"│ ├─unique.user_categories.parquet\n",
"│ ├─unique.user_consumption_2.parquet\n",
"│ ├─unique.user_gender.parquet\n",
"│ └─unique.user_geography.parquet\n",
"├─processed/\n",
"│ ├─train/\n",
"│ │ ├─_file_list.txt\n",
Expand All @@ -352,11 +402,23 @@
"│ ├─part_0.parquet\n",
"│ └─schema.pbtxt\n",
"├─train/\n",
"│ ├─_metadata\n",
"│ └─part.0.parquet\n",
"└─valid/\n",
" ├─_metadata\n",
" └─part.0.parquet\n"
"├─valid/\n",
"│ └─part.0.parquet\n",
"└─workflow/\n",
" ├─categories/\n",
" │ ├─unique.item_brand.parquet\n",
" │ ├─unique.item_category.parquet\n",
" │ ├─unique.item_id.parquet\n",
" │ ├─unique.item_shop.parquet\n",
" │ ├─unique.user_age.parquet\n",
" │ ├─unique.user_brands.parquet\n",
" │ ├─unique.user_categories.parquet\n",
" │ ├─unique.user_consumption_2.parquet\n",
" │ ├─unique.user_gender.parquet\n",
" │ └─unique.user_geography.parquet\n",
" ├─metadata.json\n",
" └─workflow.pkl\n"
]
}
],
Expand Down Expand Up @@ -472,15 +534,7 @@
"name": "#%%\n"
}
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"2022-06-03 16:47:31.255173: W tensorflow/python/util/util.cc:368] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.\n"
]
}
],
"outputs": [],
"source": [
"model = mm.DLRMModel(\n",
" schema,\n",
Expand Down Expand Up @@ -508,15 +562,15 @@
"name": "stdout",
"output_type": "stream",
"text": [
"1368/1368 [==============================] - 36s 22ms/step - loss: 0.6932 - auc: 0.5001 - val_loss: 0.6932 - val_auc: 0.4996\n",
"CPU times: user 1min 13s, sys: 11.9 s, total: 1min 25s\n",
"Wall time: 39 s\n"
"1368/1368 [==============================] - 30s 18ms/step - loss: 0.6932 - auc: 0.4999 - regularization_loss: 0.0000e+00 - val_loss: 0.6932 - val_auc: 0.4998 - val_regularization_loss: 0.0000e+00\n",
"CPU times: user 1min 21s, sys: 12.1 s, total: 1min 33s\n",
"Wall time: 30.9 s\n"
]
},
{
"data": {
"text/plain": [
"<keras.callbacks.History at 0x7f51480815e0>"
"<keras.callbacks.History at 0x7f5127386700>"
]
},
"execution_count": 12,
Expand Down Expand Up @@ -559,16 +613,46 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 13,
"id": "f999a063",
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow:Unsupported signature for serialization: ((PredictionOutput(predictions=TensorSpec(shape=(None, 1), dtype=tf.float32, name='outputs/predictions'), targets=TensorSpec(shape=(None, 1), dtype=tf.float32, name='outputs/targets'), positive_item_ids=None, label_relevant_counts=None, valid_negatives_mask=None, negative_item_ids=None, sample_weight=None), <tensorflow.python.framework.func_graph.UnknownArgument object at 0x7f4ebc177f40>), {}).\n",
"INFO:tensorflow:Unsupported signature for serialization: ((PredictionOutput(predictions=TensorSpec(shape=(None, 1), dtype=tf.float32, name='outputs/predictions'), targets=TensorSpec(shape=(None, 1), dtype=tf.float32, name='outputs/targets'), positive_item_ids=None, label_relevant_counts=None, valid_negatives_mask=None, negative_item_ids=None, sample_weight=None), <tensorflow.python.framework.func_graph.UnknownArgument object at 0x7f4ebc177f40>), {}).\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"WARNING:absl:Found untraced functions such as train_compute_metrics, model_context_layer_call_fn, model_context_layer_call_and_return_conditional_losses, output_layer_layer_call_fn, output_layer_layer_call_and_return_conditional_losses while saving (showing 5 of 97). These functions will not be directly callable after loading.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow:Assets written to: workspace/data/dlrm/assets\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"INFO:tensorflow:Assets written to: workspace/data/dlrm/assets\n"
]
}
],
"source": [
"model.save(\"dlrm\")"
"model.save(os.path.join(DATA_FOLDER, \"dlrm\"))"
]
},
{
Expand Down Expand Up @@ -612,7 +696,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"display_name": "Python 3.8.10 64-bit ('merlin38')",
"language": "python",
"name": "python3"
},
Expand All @@ -626,12 +710,17 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.13"
"version": "3.8.10"
},
"merlin": {
"containers": [
"nvcr.io/nvidia/merlin/merlin-tensorflow:latest"
]
},
"vscode": {
"interpreter": {
"hash": "a398807c5c2ed8e5ff9d9890488d007fa99cbabcec733962e21659a28c5da99b"
}
}
},
"nbformat": 4,
Expand Down
Loading

0 comments on commit 969c9a0

Please sign in to comment.