Skip to content

Commit

Permalink
Update example notebooks to create schema object from merlin core (#650)
Browse files Browse the repository at this point in the history
* create schema from core

* update ETL and read schema from core

* read schema from core

* update nbs read schema from core
  • Loading branch information
rnyak committed Mar 17, 2023
1 parent 8c13823 commit a293d8c
Show file tree
Hide file tree
Showing 11 changed files with 3,037 additions and 3,129 deletions.
574 changes: 302 additions & 272 deletions examples/end-to-end-session-based/01-ETL-with-NVTabular.ipynb

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,8 @@
"from transformers4rec import torch as tr\n",
"from transformers4rec.torch.ranking_metric import NDCGAt, AvgPrecisionAt, RecallAt\n",
"from transformers4rec.torch.utils.examples_utils import wipe_memory\n",
"from merlin.schema import Schema\n",
"from merlin.io import Dataset\n",
"\n",
"import argparse\n",
"\n",
Expand All @@ -131,14 +133,15 @@
"parser.add_argument('--per-device-eval-batch-size', type=int, default=512, help='Per device batch size for evaluation')\n",
"sh_args = parser.parse_args()\n",
"\n",
"# create the schema object by reading the schema.pbtxt file generated by NVTabular pipeline in the previous 01-ETL-with-NVTabular notebook\n",
"from merlin_standard_lib import Schema\n",
"SCHEMA_PATH = \"schema_demo.pb\"\n",
"schema = Schema().from_proto_text(SCHEMA_PATH)\n",
"# create the schema object by reading the processed train set generated in the previous 01-ETL-with-NVTabular notebook\n",
"\n",
"INPUT_DATA_DIR = os.environ.get(\"INPUT_DATA_DIR\", \"/workspace/data\")\n",
"train = Dataset(os.path.join(INPUT_DATA_DIR, \"processed_nvt/part_0.parquet\"))\n",
"schema = train.schema\n",
"\n",
"# select the subset of features we want to use for training the model by their tags or their names.\n",
"schema = schema.select_by_name(\n",
" ['item_id-list_seq', 'category-list_seq', 'product_recency_days_log_norm-list_seq', 'et_dayofweek_sin-list_seq']\n",
" ['item_id-list', 'category-list', 'product_recency_days_log_norm-list', 'et_dayofweek_sin-list']\n",
")\n",
"\n",
"max_sequence_length, d_model = 20, 320\n",
Expand Down Expand Up @@ -256,7 +259,7 @@
"metadata": {},
"outputs": [],
"source": [
"!python -m torch.distributed.launch --nproc_per_node 2 pyt_trainer.py --path \"./preproc_sessions_by_day\" --learning-rate 0.0005"
"!python -m torch.distributed.launch --nproc_per_node 2 pyt_trainer.py --path \"/workspace/data/preproc_sessions_by_day\" --learning-rate 0.0005"
]
},
{
Expand Down Expand Up @@ -284,8 +287,7 @@
"\n",
"- Merlin Transformers4rec: https://github.com/NVIDIA-Merlin/Transformers4Rec\n",
"\n",
"- Merlin NVTabular: https://github.com/NVIDIA-Merlin/NVTabular/tree/main/nvtabular",
"\n",
"- Merlin NVTabular: https://github.com/NVIDIA-Merlin/NVTabular/tree/main/nvtabular\n",
"- Merlin Dataloader: https://github.com/NVIDIA-Merlin/dataloader"
]
}
Expand Down
87 changes: 0 additions & 87 deletions examples/end-to-end-session-based/schema_demo.pb

This file was deleted.

Loading

0 comments on commit a293d8c

Please sign in to comment.