From 1633a4f805f6d0fabffb2b9e4968844bef4ba269 Mon Sep 17 00:00:00 2001
From: Eli Fajardo <efajardo@nvidia.com>
Date: Tue, 2 Jan 2024 16:06:52 -0500
Subject: [PATCH 1/6] gnn fraud detection notebook fix

---
 .../gnn-fraud-detection-training.ipynb        | 333 +++++++-----------
 1 file changed, 127 insertions(+), 206 deletions(-)

diff --git a/models/training-tuning-scripts/fraud-detection-models/gnn-fraud-detection-training.ipynb b/models/training-tuning-scripts/fraud-detection-models/gnn-fraud-detection-training.ipynb
index d66234974d..f58d441d58 100644
--- a/models/training-tuning-scripts/fraud-detection-models/gnn-fraud-detection-training.ipynb
+++ b/models/training-tuning-scripts/fraud-detection-models/gnn-fraud-detection-training.ipynb
@@ -52,6 +52,7 @@
     "%autoreload 2\n",
     "import pandas as pd\n",
     "import numpy as np\n",
+    "import matplotlib.pylab as plt\n",
     "import os\n",
     "import dgl\n",
     "import numpy as np\n",
@@ -60,6 +61,7 @@
     "import torch.nn as nn\n",
     "from model import HeteroRGCN\n",
     "from model import HinSAGE\n",
+    "from model import prepare_data\n",
     "from sklearn.metrics import accuracy_score\n",
     "from sklearn.metrics import auc\n",
     "from sklearn.metrics import average_precision_score\n",
@@ -70,7 +72,8 @@
     "from tqdm import trange\n",
     "from xgboost import XGBClassifier\n",
     "from training import (get_metrics, evaluate, init_loaders, build_fsi_graph,\n",
-    "                     map_node_id, prepare_data, save_model, train)\n"
+    "                   save_model, train)\n",
+    "import cudf as cf"
    ]
   },
   {
@@ -85,37 +88,17 @@
     "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": 73,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "device(type='cuda', index=0)"
-      ]
-     },
-     "execution_count": 73,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "#device "
-   ]
-  },
   {
    "attachments": {},
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "##### Load traing and test dataset"
+    "##### Load training and test dataset"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -136,30 +119,29 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [],
    "source": [
     "# Increase number of samples.\n",
-    "def augement_data(train_data=train_data, n=20):\n",
-    "    max_id = inductive_data.index.max()\n",
+    "def augment_data(train_data=train_data, n=20):\n",
+    "    train_data.drop(columns=['index'], inplace=True, axis=1)\n",
     "    non_fraud = train_data[train_data['fraud_label'] == 0]\n",
-    "    \n",
-    "    non_fraud = non_fraud.drop(['index'], axis=1)\n",
-    "    df_fraud = pd.concat([non_fraud for i in range(n)])\n",
-    "    df_fraud.index = np.arange(1076, 1076 + df_fraud.shape[0])\n",
-    "    df_fraud['index'] = df_fraud.index\n",
-    "    \n",
-    "    return pd.concat((train_data, df_fraud))"
+    "    df_fraud = pd.concat([non_fraud for _ in range(n)])\n",
+    "    df_train = pd.concat([train_data, df_fraud])\n",
+    "    df_train.reset_index(inplace=True)\n",
+    "    df_train['index'] = df_train.index\n",
+    "\n",
+    "    return df_train\n"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [],
    "source": [
-    "train_data = augement_data(train_data, n=20)"
+    "train_data = augment_data(train_data, n=20)"
    ]
   },
   {
@@ -173,7 +155,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [
     {
@@ -196,39 +178,14 @@
     "print('The distribution of fraud for the inductive data is:\\n', inductive_data['fraud_label'].value_counts())"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# split train, test and create nodes index\n",
-    "def prepare_data(df_train, df_test):\n",
-    "    \n",
-    "    train_idx_ = df_train.shape[0]\n",
-    "    df = pd.concat([df_train, df_test], axis=0)\n",
-    "    df['tran_id'] = df['index']\n",
-    "\n",
-    "    meta_cols = ['tran_id', 'client_node', 'merchant_node']\n",
-    "    for col in meta_cols:\n",
-    "        map_node_id(df, col)\n",
-    "\n",
-    "    train_idx = df['tran_id'][:train_idx_]\n",
-    "    test_idx = df['tran_id'][train_idx_:]\n",
-    "\n",
-    "    df['index'] = df['tran_id']\n",
-    "    df.index = df['index']\n",
-    "\n",
-    "    return (df.iloc[train_idx, :], df.iloc[test_idx, :], train_idx, test_idx, df['fraud_label'].values, df)"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": 8,
    "metadata": {},
    "outputs": [],
    "source": [
-    "train_data, test_data, train_idx, inductive_idx, labels, df = prepare_data(train_data, inductive_data)"
+    "# train_data, test_data, train_index, test_index, labels, all_data\n",
+    "train_data, test_data, train_idx, inductive_idx, labels, df = prepare_data(cf.from_pandas(train_data), cf.from_pandas(inductive_data))"
    ]
   },
   {
@@ -236,7 +193,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### 3. Construct transasction graph network"
+    "### 3. Construct transaction graph network"
    ]
   },
   {
@@ -253,45 +210,18 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "meta_cols = [\"client_node\", \"merchant_node\", \"fraud_label\", \"index\", \"tran_id\"]\n",
+    "\n",
+    "meta_cols = [\"client_node\", \"merchant_node\", \"index\"]\n",
     "\n",
     "# Build graph\n",
     "whole_graph, feature_tensors = build_fsi_graph(df, meta_cols)\n",
     "train_graph, _ = build_fsi_graph(train_data, meta_cols)\n",
-    "whole_graph = whole_graph.to(device)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Dataset to tensors\n",
-    "feature_tensors = feature_tensors.to(device)\n",
-    "train_idx = torch.from_numpy(train_idx.values).to(device)\n",
-    "inductive_idx = torch.from_numpy(inductive_idx.values).to(device)\n",
-    "labels = torch.LongTensor(labels).to(device)\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Graph(num_nodes={'client': 623, 'merchant': 388, 'transaction': 12053},\n",
-      "      num_edges={('client', 'buy', 'transaction'): 12053, ('merchant', 'sell', 'transaction'): 12053, ('transaction', 'bought', 'client'): 12053, ('transaction', 'issued', 'merchant'): 12053},\n",
-      "      metagraph=[('client', 'transaction', 'buy'), ('transaction', 'client', 'bought'), ('transaction', 'merchant', 'issued'), ('merchant', 'transaction', 'sell')])\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Show structure of training graph.\n",
-    "print(train_graph)"
+    "\n",
+    "# Dataset\n",
+    "feature_tensors = feature_tensors.float()\n",
+    "train_idx = torch.from_dlpack(train_idx.values.toDlpack()).long()\n",
+    "inductive_idx = torch.from_dlpack(inductive_idx.values.toDlpack()).long()\n",
+    "labels = torch.from_dlpack(labels.toDlpack()).long()\n"
    ]
   },
   {
@@ -312,7 +242,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": 10,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -322,21 +252,24 @@
     "in_size, hidden_size, out_size, n_layers,\\\n",
     "    embedding_size = 111, 64, 2, 2, 1\n",
     "batch_size = 100\n",
-    "hyperparameters = {\"in_size\": in_size, \"hidden_size\": hidden_size,\n",
-    "                   \"out_size\": out_size, \"n_layers\": n_layers,\n",
-    "                   \"embedding_size\": embedding_size,\n",
-    "                   \"target_node\": target_node,\n",
-    "                   \"epoch\": epochs}\n",
+    "in_size, hidden_size, out_size, n_layers, embedding_size = 111, 64, 2, 2, 1\n",
+    "hyperparameters = {\n",
+    "    \"in_size\": in_size,\n",
+    "    \"hidden_size\": hidden_size,\n",
+    "    \"out_size\": out_size,\n",
+    "    \"n_layers\": n_layers,\n",
+    "    \"embedding_size\": embedding_size,\n",
+    "    \"target_node\": target_node,\n",
+    "    \"epoch\": epochs\n",
+    "}\n",
     "\n",
-    "\n",
-    "scale_pos_weight = train_data['fraud_label'].sum() / train_data.shape[0]\n",
-    "scale_pos_weight = torch.tensor(\n",
-    "    [scale_pos_weight, 1-scale_pos_weight]).to(device)"
+    "scale_pos_weight = (labels[train_idx].sum() / train_data.shape[0]).item()\n",
+    "scale_pos_weight = torch.FloatTensor([scale_pos_weight, 1 - scale_pos_weight]).to(device)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": 11,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -354,314 +287,309 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 23,
+   "execution_count": 12,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "  0%|          | 0/20 [00:00<?, ?it/s]"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "  5%|▌         | 1/20 [00:02<00:47,  2.51s/it]"
+      "  0%|          | 0/20 [00:00<?, ?it/s]/home/efajardo/miniconda3/envs/morpheus/lib/python3.10/site-packages/dgl/backend/pytorch/tensor.py:445: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly.  To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n",
+      "  assert input.numel() == input.storage().size(), (\n",
+      "  5%|▌         | 1/20 [00:02<00:51,  2.70s/it]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 0/20 | Train Accuracy: 1.0 | Train Loss: 4.077046836914391\n",
-      "Validation Accuracy: 0.9207547307014465 auc 0.13992974238875877\n"
+      "Epoch 0/20 | Train Accuracy: 1.0 | Train Loss: 8.364539084920459\n",
+      "Validation Accuracy: 0.7320754528045654 auc 0.1592130100188761\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      " 10%|█         | 2/20 [00:04<00:43,  2.40s/it]"
+      " 10%|█         | 2/20 [00:05<00:46,  2.58s/it]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 1/20 | Train Accuracy: 1.0 | Train Loss: 110.9858230000423\n",
-      "Validation Accuracy: 0.9207547307014465 auc 0.5852849336455894\n"
+      "Epoch 1/20 | Train Accuracy: 1.0 | Train Loss: 112.15737052381232\n",
+      "Validation Accuracy: 0.7320754528045654 auc 0.5462465514737912\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      " 15%|█▌        | 3/20 [00:07<00:40,  2.37s/it]"
+      " 15%|█▌        | 3/20 [00:07<00:43,  2.53s/it]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 2/20 | Train Accuracy: 1.0 | Train Loss: 419.0077720507543\n",
-      "Validation Accuracy: 0.9207547307014465 auc 0.6083138173302107\n"
+      "Epoch 2/20 | Train Accuracy: 1.0 | Train Loss: 525.2876669457024\n",
+      "Validation Accuracy: 0.7358490824699402 auc 0.8560331058516045\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      " 20%|██        | 4/20 [00:09<00:37,  2.34s/it]"
+      " 20%|██        | 4/20 [00:10<00:40,  2.51s/it]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 3/20 | Train Accuracy: 1.0 | Train Loss: 176.4732639742433\n",
-      "Validation Accuracy: 0.9169811606407166 auc 0.7976190476190476\n"
+      "Epoch 3/20 | Train Accuracy: 1.0 | Train Loss: 200.01578086774106\n",
+      "Validation Accuracy: 0.7396226525306702 auc 0.8799186873820241\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      " 25%|██▌       | 5/20 [00:11<00:34,  2.31s/it]"
+      " 25%|██▌       | 5/20 [00:12<00:37,  2.52s/it]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 4/20 | Train Accuracy: 1.0 | Train Loss: 49.66766470632865\n",
-      "Validation Accuracy: 0.9245283007621765 auc 0.8080601092896175\n"
+      "Epoch 4/20 | Train Accuracy: 1.0 | Train Loss: 90.4673070323206\n",
+      "Validation Accuracy: 0.7433962225914001 auc 0.8205314360389139\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      " 30%|███       | 6/20 [00:14<00:33,  2.36s/it]"
+      " 30%|███       | 6/20 [00:15<00:35,  2.51s/it]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 5/20 | Train Accuracy: 1.0 | Train Loss: 31.406425931840204\n",
-      "Validation Accuracy: 0.9283018708229065 auc 0.858216237314598\n"
+      "Epoch 5/20 | Train Accuracy: 1.0 | Train Loss: 57.875138325151056\n",
+      "Validation Accuracy: 0.7358490824699402 auc 0.8681573979962247\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      " 35%|███▌      | 7/20 [00:16<00:30,  2.35s/it]"
+      " 35%|███▌      | 7/20 [00:17<00:32,  2.52s/it]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 6/20 | Train Accuracy: 1.0 | Train Loss: 24.368114110082388\n",
-      "Validation Accuracy: 0.9283018708229065 auc 0.8635831381733021\n"
+      "Epoch 6/20 | Train Accuracy: 1.0 | Train Loss: 24.510831227060407\n",
+      "Validation Accuracy: 0.7433962225914001 auc 0.9358211122404531\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      " 40%|████      | 8/20 [00:18<00:27,  2.31s/it]"
+      " 40%|████      | 8/20 [00:20<00:30,  2.52s/it]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 7/20 | Train Accuracy: 1.0 | Train Loss: 17.363841364858672\n",
-      "Validation Accuracy: 0.9283018708229065 auc 0.8762685402029665\n"
+      "Epoch 7/20 | Train Accuracy: 1.0 | Train Loss: 19.741579514928162\n",
+      "Validation Accuracy: 0.7584905624389648 auc 0.9371279221722085\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      " 45%|████▌     | 9/20 [00:21<00:25,  2.33s/it]"
+      " 45%|████▌     | 9/20 [00:22<00:27,  2.51s/it]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 8/20 | Train Accuracy: 1.0 | Train Loss: 16.201855568680912\n",
-      "Validation Accuracy: 0.9320755004882812 auc 0.8788056206088993\n"
+      "Epoch 8/20 | Train Accuracy: 1.0 | Train Loss: 15.446274195797741\n",
+      "Validation Accuracy: 0.7547169923782349 auc 0.9406127486568898\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      " 50%|█████     | 10/20 [00:23<00:23,  2.38s/it]"
+      " 50%|█████     | 10/20 [00:25<00:25,  2.51s/it]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 9/20 | Train Accuracy: 1.0 | Train Loss: 15.001215729862452\n",
-      "Validation Accuracy: 0.9320755004882812 auc 0.8873926619828258\n"
+      "Epoch 9/20 | Train Accuracy: 1.0 | Train Loss: 14.80114306602627\n",
+      "Validation Accuracy: 0.7547169923782349 auc 0.941846958036881\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      " 55%|█████▌    | 11/20 [00:25<00:21,  2.37s/it]"
+      " 55%|█████▌    | 11/20 [00:27<00:22,  2.51s/it]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 10/20 | Train Accuracy: 1.0 | Train Loss: 14.861962082330137\n",
-      "Validation Accuracy: 0.9358490705490112 auc 0.8791959406713505\n"
+      "Epoch 10/20 | Train Accuracy: 1.0 | Train Loss: 13.875870674848557\n",
+      "Validation Accuracy: 0.7547169923782349 auc 0.9493974154203572\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      " 60%|██████    | 12/20 [00:28<00:19,  2.40s/it]"
+      " 60%|██████    | 12/20 [00:30<00:20,  2.53s/it]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 11/20 | Train Accuracy: 1.0 | Train Loss: 13.089418702758849\n",
-      "Validation Accuracy: 0.9320755004882812 auc 0.8858313817330211\n"
+      "Epoch 11/20 | Train Accuracy: 1.0 | Train Loss: 14.225338330026716\n",
+      "Validation Accuracy: 0.7584905624389648 auc 0.9449687817627413\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      " 65%|██████▌   | 13/20 [00:30<00:16,  2.40s/it]"
+      " 65%|██████▌   | 13/20 [00:32<00:17,  2.52s/it]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 12/20 | Train Accuracy: 1.0 | Train Loss: 12.216756469802931\n",
-      "Validation Accuracy: 0.9320755004882812 auc 0.9127634660421545\n"
+      "Epoch 12/20 | Train Accuracy: 1.0 | Train Loss: 14.037583370693028\n",
+      "Validation Accuracy: 0.7584905624389648 auc 0.9457673878321475\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      " 70%|███████   | 14/20 [00:33<00:14,  2.46s/it]"
+      " 70%|███████   | 14/20 [00:35<00:15,  2.51s/it]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 13/20 | Train Accuracy: 1.0 | Train Loss: 12.858742844546214\n",
-      "Validation Accuracy: 0.9433962106704712 auc 0.9182279469164715\n"
+      "Epoch 13/20 | Train Accuracy: 1.0 | Train Loss: 13.155217349529266\n",
+      "Validation Accuracy: 0.7622641324996948 auc 0.9478002032815449\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      " 75%|███████▌  | 15/20 [00:35<00:12,  2.43s/it]"
+      " 75%|███████▌  | 15/20 [00:37<00:12,  2.50s/it]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 14/20 | Train Accuracy: 1.0 | Train Loss: 11.10123936785385\n",
-      "Validation Accuracy: 0.9320755004882812 auc 0.911592505854801\n"
+      "Epoch 14/20 | Train Accuracy: 1.0 | Train Loss: 13.074130383087322\n",
+      "Validation Accuracy: 0.7622641324996948 auc 0.9480906054886018\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      " 80%|████████  | 16/20 [00:38<00:09,  2.44s/it]"
+      " 80%|████████  | 16/20 [00:40<00:10,  2.50s/it]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 15/20 | Train Accuracy: 1.0 | Train Loss: 15.444379360007588\n",
-      "Validation Accuracy: 0.9207547307014465 auc 0.8721701795472288\n"
+      "Epoch 15/20 | Train Accuracy: 1.0 | Train Loss: 12.887040423229337\n",
+      "Validation Accuracy: 0.7622641324996948 auc 0.9485262087991868\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      " 85%|████████▌ | 17/20 [00:40<00:07,  2.39s/it]"
+      " 85%|████████▌ | 17/20 [00:42<00:07,  2.53s/it]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 16/20 | Train Accuracy: 1.0 | Train Loss: 15.353719354665373\n",
-      "Validation Accuracy: 0.9169811606407166 auc 0.822599531615925\n"
+      "Epoch 16/20 | Train Accuracy: 1.0 | Train Loss: 13.250047401059419\n",
+      "Validation Accuracy: 0.7584905624389648 auc 0.9495426165238856\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      " 90%|█████████ | 18/20 [00:42<00:04,  2.35s/it]"
+      " 90%|█████████ | 18/20 [00:45<00:05,  2.55s/it]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 17/20 | Train Accuracy: 1.0 | Train Loss: 15.88208947563544\n",
-      "Validation Accuracy: 0.9283018708229065 auc 0.8528493364558939\n"
+      "Epoch 17/20 | Train Accuracy: 1.0 | Train Loss: 11.86157701490447\n",
+      "Validation Accuracy: 0.7660377621650696 auc 0.9554232612167852\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      " 95%|█████████▌| 19/20 [00:45<00:02,  2.33s/it]"
+      " 95%|█████████▌| 19/20 [00:48<00:02,  2.55s/it]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 18/20 | Train Accuracy: 1.0 | Train Loss: 12.539632054162212\n",
-      "Validation Accuracy: 0.9283018708229065 auc 0.9022248243559718\n"
+      "Epoch 18/20 | Train Accuracy: 1.0 | Train Loss: 12.42901663039811\n",
+      "Validation Accuracy: 0.7660377621650696 auc 0.9425003630027589\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 20/20 [00:47<00:00,  2.37s/it]"
+      "100%|██████████| 20/20 [00:50<00:00,  2.53s/it]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 19/20 | Train Accuracy: 1.0 | Train Loss: 13.172684742690763\n",
-      "Validation Accuracy: 0.9433962106704712 auc 0.9342310694769711\n"
+      "Epoch 19/20 | Train Accuracy: 1.0 | Train Loss: 12.172889983397909\n",
+      "Validation Accuracy: 0.7811321020126343 auc 0.9520110352838682\n"
      ]
     },
     {
@@ -678,9 +606,9 @@
     "\n",
     "    train_acc, loss = train(\n",
     "        model, loss_func, train_loader, labels, optimizer, feature_tensors,\n",
-    "        target_node, device=device)\n",
+    "        target_node)\n",
     "    print(f\"Epoch {epoch}/{epochs} | Train Accuracy: {train_acc} | Train Loss: {loss}\")\n",
-    "    val_logits, val_seed, _ = evaluate(model, val_loader, feature_tensors, target_node, device=device)\n",
+    "    val_logits, val_seed, _ = evaluate(model, val_loader, feature_tensors, target_node)\n",
     "    val_accuracy = accuracy(val_logits.argmax(1), labels.long()[val_seed].cpu(), \"binary\").item()\n",
     "    val_auc = roc_auc_score(\n",
     "        labels.long()[val_seed].cpu().numpy(),\n",
@@ -707,14 +635,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 25,
+   "execution_count": 13,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Graph(num_nodes={'client': 861, 'merchant': 482, 'transaction': 12318},\n",
+      "Graph(num_nodes={'client': 861, 'merchant': 482, 'transaction': 12053},\n",
       "      num_edges={('client', 'buy', 'transaction'): 12318, ('merchant', 'sell', 'transaction'): 12318, ('transaction', 'bought', 'client'): 12318, ('transaction', 'issued', 'merchant'): 12318},\n",
       "      metagraph=[('client', 'transaction', 'buy'), ('transaction', 'client', 'bought'), ('transaction', 'merchant', 'issued'), ('merchant', 'transaction', 'sell')])\n"
      ]
@@ -734,21 +662,21 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 35,
+   "execution_count": 14,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Final Test Accuracy: 0.9320755004882812 auc 0.889344262295082\n"
+      "Final Test Accuracy: 0.7735849022865295 auc 0.9296500653404965\n"
      ]
     }
    ],
    "source": [
     "# Create embeddings\n",
-    "_, train_seeds, train_embedding = evaluate(model, train_loader, feature_tensors, target_node, device=device)\n",
-    "test_logits, test_seeds, test_embedding = evaluate(model, test_loader, feature_tensors, target_node, device=device)\n",
+    "_, train_seeds, train_embedding = evaluate(model, train_loader, feature_tensors, target_node)\n",
+    "test_logits, test_seeds, test_embedding = evaluate(model, test_loader, feature_tensors, target_node)\n",
     "\n",
     "# compute metrics\n",
     "test_acc = accuracy(test_logits.argmax(dim=1), labels.long()[test_seeds].cpu(), \"binary\").item()\n",
@@ -781,7 +709,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 27,
+   "execution_count": 15,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -790,13 +718,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 28,
+   "execution_count": 16,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/html": [
-       "<style>#sk-container-id-2 {color: black;background-color: white;}#sk-container-id-2 pre{padding: 0;}#sk-container-id-2 div.sk-toggleable {background-color: white;}#sk-container-id-2 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-2 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-2 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-2 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-2 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-2 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-2 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-2 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-2 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-2 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-2 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-2 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-2 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-2 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-2 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-2 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-2 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-2 div.sk-item {position: relative;z-index: 1;}#sk-container-id-2 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-2 div.sk-item::before, #sk-container-id-2 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-2 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-2 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-2 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-2 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-2 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-2 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-2 div.sk-label-container {text-align: center;}#sk-container-id-2 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-2 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-2\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>XGBClassifier(base_score=None, booster=None, callbacks=None,\n",
+       "<style>#sk-container-id-1 {color: black;background-color: white;}#sk-container-id-1 pre{padding: 0;}#sk-container-id-1 div.sk-toggleable {background-color: white;}#sk-container-id-1 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-1 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-1 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-1 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-1 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-1 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-1 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-1 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-1 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-1 div.sk-item {position: relative;z-index: 1;}#sk-container-id-1 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-1 div.sk-item::before, #sk-container-id-1 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-1 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-1 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-1 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-1 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-1 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-1 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-1 div.sk-label-container {text-align: center;}#sk-container-id-1 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-1 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-1\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>XGBClassifier(base_score=None, booster=None, callbacks=None,\n",
        "              colsample_bylevel=None, colsample_bynode=None,\n",
        "              colsample_bytree=None, early_stopping_rounds=None,\n",
        "              enable_categorical=False, eval_metric=None, feature_types=None,\n",
@@ -806,7 +734,7 @@
        "              max_delta_step=None, max_depth=None, max_leaves=None,\n",
        "              min_child_weight=None, missing=nan, monotone_constraints=None,\n",
        "              n_estimators=100, n_jobs=None, num_parallel_tree=None,\n",
-       "              predictor=None, random_state=None, ...)</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-2\" type=\"checkbox\" checked><label for=\"sk-estimator-id-2\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">XGBClassifier</label><div class=\"sk-toggleable__content\"><pre>XGBClassifier(base_score=None, booster=None, callbacks=None,\n",
+       "              predictor=None, random_state=None, ...)</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-1\" type=\"checkbox\" checked><label for=\"sk-estimator-id-1\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">XGBClassifier</label><div class=\"sk-toggleable__content\"><pre>XGBClassifier(base_score=None, booster=None, callbacks=None,\n",
        "              colsample_bylevel=None, colsample_bynode=None,\n",
        "              colsample_bytree=None, early_stopping_rounds=None,\n",
        "              enable_categorical=False, eval_metric=None, feature_types=None,\n",
@@ -832,7 +760,7 @@
        "              predictor=None, random_state=None, ...)"
       ]
      },
-     "execution_count": 28,
+     "execution_count": 16,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -853,7 +781,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 29,
+   "execution_count": 17,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -878,14 +806,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 32,
+   "execution_count": 18,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Final Test Accuracy: 0.9245283018867925 auc 0.9055425448868072\n"
+      "Final Test Accuracy: 0.8339622641509434 auc 0.9136053434006098\n"
      ]
     }
    ],
@@ -921,7 +849,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 45,
+   "execution_count": 19,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -932,7 +860,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 40,
+   "execution_count": 20,
    "metadata": {},
    "outputs": [
     {
@@ -949,14 +877,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 21,
    "metadata": {},
    "outputs": [],
    "source": [
-    "## For inference we can load from file as follows. \n",
+    "## For inference we can load from file as follows.\n",
     "from training import load_model\n",
     "# do inference on loaded model, as follows\n",
-    "# hinsage_model,  hyperparam, g = load_model(model_dir, device)"
+    "# hinsage_model,  hyperparam, g = load_model(model_dir)"
    ]
   },
   {
@@ -985,13 +913,6 @@
     "2.https://stellargraph.readthedocs.io/en/stable/hinsage.html?highlight=hinsage\n",
     "3.https://github.com/rapidsai/clx/blob/branch-0.20/examples/forest_inference/xgboost_training.ipynb\""
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
   }
  ],
  "metadata": {

From d2e1467b45421bab2f37df06d321d614cb5f1da3 Mon Sep 17 00:00:00 2001
From: Eli Fajardo <efajardo@nvidia.com>
Date: Tue, 2 Jan 2024 16:17:38 -0500
Subject: [PATCH 2/6] migrate remaining pandas code to cudf

---
 .../gnn-fraud-detection-training.ipynb        | 149 +++++++++---------
 1 file changed, 73 insertions(+), 76 deletions(-)

diff --git a/models/training-tuning-scripts/fraud-detection-models/gnn-fraud-detection-training.ipynb b/models/training-tuning-scripts/fraud-detection-models/gnn-fraud-detection-training.ipynb
index f58d441d58..25ab125a6e 100644
--- a/models/training-tuning-scripts/fraud-detection-models/gnn-fraud-detection-training.ipynb
+++ b/models/training-tuning-scripts/fraud-detection-models/gnn-fraud-detection-training.ipynb
@@ -50,13 +50,11 @@
    "source": [
     "%load_ext autoreload\n",
     "%autoreload 2\n",
-    "import pandas as pd\n",
     "import numpy as np\n",
     "import matplotlib.pylab as plt\n",
     "import os\n",
     "import dgl\n",
     "import numpy as np\n",
-    "import pandas as pd\n",
     "import torch\n",
     "import torch.nn as nn\n",
     "from model import HeteroRGCN\n",
@@ -73,7 +71,7 @@
     "from xgboost import XGBClassifier\n",
     "from training import (get_metrics, evaluate, init_loaders, build_fsi_graph,\n",
     "                   save_model, train)\n",
-    "import cudf as cf"
+    "import cudf"
    ]
   },
   {
@@ -98,15 +96,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [],
    "source": [
     "# Replace training-data.csv and validation-data.csv with training & validation csv in dataset file.\n",
     "TRAINING_DATA ='../../datasets/training-data/fraud-detection-training-data.csv'\n",
     "VALIDATION_DATA = '../../datasets/validation-data/fraud-detection-validation-data.csv'\n",
-    "train_data = pd.read_csv(TRAINING_DATA)\n",
-    "inductive_data = pd.read_csv(VALIDATION_DATA)"
+    "train_data = cudf.read_csv(TRAINING_DATA)\n",
+    "inductive_data = cudf.read_csv(VALIDATION_DATA)"
    ]
   },
   {
@@ -119,7 +117,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -127,8 +125,8 @@
     "def augment_data(train_data=train_data, n=20):\n",
     "    train_data.drop(columns=['index'], inplace=True, axis=1)\n",
     "    non_fraud = train_data[train_data['fraud_label'] == 0]\n",
-    "    df_fraud = pd.concat([non_fraud for _ in range(n)])\n",
-    "    df_train = pd.concat([train_data, df_fraud])\n",
+    "    df_fraud = cudf.concat([non_fraud for _ in range(n)])\n",
+    "    df_train = cudf.concat([train_data, df_fraud])\n",
     "    df_train.reset_index(inplace=True)\n",
     "    df_train['index'] = df_train.index\n",
     "\n",
@@ -137,7 +135,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -155,7 +153,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [
     {
@@ -165,11 +163,11 @@
       "The distribution of fraud for the train data is:\n",
       " 0    11865\n",
       "1      188\n",
-      "Name: fraud_label, dtype: int64\n",
+      "Name: fraud_label, dtype: int32\n",
       "The distribution of fraud for the inductive data is:\n",
       " 0    244\n",
       "1     21\n",
-      "Name: fraud_label, dtype: int64\n"
+      "Name: fraud_label, dtype: int32\n"
      ]
     }
    ],
@@ -180,12 +178,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [],
    "source": [
     "# train_data, test_data, train_index, test_index, labels, all_data\n",
-    "train_data, test_data, train_idx, inductive_idx, labels, df = prepare_data(cf.from_pandas(train_data), cf.from_pandas(inductive_data))"
+    "train_data, test_data, train_idx, inductive_idx, labels, df = prepare_data(train_data, inductive_data)"
    ]
   },
   {
@@ -206,7 +204,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -242,7 +240,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -269,7 +267,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 10,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -287,7 +285,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 11,
    "metadata": {},
    "outputs": [
     {
@@ -296,14 +294,14 @@
      "text": [
       "  0%|          | 0/20 [00:00<?, ?it/s]/home/efajardo/miniconda3/envs/morpheus/lib/python3.10/site-packages/dgl/backend/pytorch/tensor.py:445: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly.  To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n",
       "  assert input.numel() == input.storage().size(), (\n",
-      "  5%|▌         | 1/20 [00:02<00:51,  2.70s/it]"
+      "  5%|▌         | 1/20 [00:02<00:52,  2.76s/it]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 0/20 | Train Accuracy: 1.0 | Train Loss: 8.364539084920459\n",
+      "Epoch 0/20 | Train Accuracy: 1.0 | Train Loss: 8.364538975722887\n",
       "Validation Accuracy: 0.7320754528045654 auc 0.1592130100188761\n"
      ]
     },
@@ -311,14 +309,14 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      " 10%|█         | 2/20 [00:05<00:46,  2.58s/it]"
+      " 10%|█         | 2/20 [00:05<00:47,  2.64s/it]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 1/20 | Train Accuracy: 1.0 | Train Loss: 112.15737052381232\n",
+      "Epoch 1/20 | Train Accuracy: 1.0 | Train Loss: 112.15738137422963\n",
       "Validation Accuracy: 0.7320754528045654 auc 0.5462465514737912\n"
      ]
     },
@@ -326,14 +324,14 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      " 15%|█▌        | 3/20 [00:07<00:43,  2.53s/it]"
+      " 15%|█▌        | 3/20 [00:07<00:44,  2.61s/it]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 2/20 | Train Accuracy: 1.0 | Train Loss: 525.2876669457024\n",
+      "Epoch 2/20 | Train Accuracy: 1.0 | Train Loss: 525.2877785372972\n",
       "Validation Accuracy: 0.7358490824699402 auc 0.8560331058516045\n"
      ]
     },
@@ -341,14 +339,14 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      " 20%|██        | 4/20 [00:10<00:40,  2.51s/it]"
+      " 20%|██        | 4/20 [00:10<00:41,  2.58s/it]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 3/20 | Train Accuracy: 1.0 | Train Loss: 200.01578086774106\n",
+      "Epoch 3/20 | Train Accuracy: 1.0 | Train Loss: 200.01628349609354\n",
       "Validation Accuracy: 0.7396226525306702 auc 0.8799186873820241\n"
      ]
     },
@@ -356,14 +354,14 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      " 25%|██▌       | 5/20 [00:12<00:37,  2.52s/it]"
+      " 25%|██▌       | 5/20 [00:13<00:38,  2.58s/it]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 4/20 | Train Accuracy: 1.0 | Train Loss: 90.4673070323206\n",
+      "Epoch 4/20 | Train Accuracy: 1.0 | Train Loss: 90.46722278861125\n",
       "Validation Accuracy: 0.7433962225914001 auc 0.8205314360389139\n"
      ]
     },
@@ -371,14 +369,14 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      " 30%|███       | 6/20 [00:15<00:35,  2.51s/it]"
+      " 30%|███       | 6/20 [00:15<00:35,  2.57s/it]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 5/20 | Train Accuracy: 1.0 | Train Loss: 57.875138325151056\n",
+      "Epoch 5/20 | Train Accuracy: 1.0 | Train Loss: 57.87523431493901\n",
       "Validation Accuracy: 0.7358490824699402 auc 0.8681573979962247\n"
      ]
     },
@@ -386,14 +384,14 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      " 35%|███▌      | 7/20 [00:17<00:32,  2.52s/it]"
+      " 35%|███▌      | 7/20 [00:18<00:33,  2.58s/it]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 6/20 | Train Accuracy: 1.0 | Train Loss: 24.510831227060407\n",
+      "Epoch 6/20 | Train Accuracy: 1.0 | Train Loss: 24.51080822898075\n",
       "Validation Accuracy: 0.7433962225914001 auc 0.9358211122404531\n"
      ]
     },
@@ -401,14 +399,14 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      " 40%|████      | 8/20 [00:20<00:30,  2.52s/it]"
+      " 40%|████      | 8/20 [00:20<00:31,  2.59s/it]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 7/20 | Train Accuracy: 1.0 | Train Loss: 19.741579514928162\n",
+      "Epoch 7/20 | Train Accuracy: 1.0 | Train Loss: 19.741554783657193\n",
       "Validation Accuracy: 0.7584905624389648 auc 0.9371279221722085\n"
      ]
     },
@@ -416,14 +414,14 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      " 45%|████▌     | 9/20 [00:22<00:27,  2.51s/it]"
+      " 45%|████▌     | 9/20 [00:23<00:28,  2.60s/it]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 8/20 | Train Accuracy: 1.0 | Train Loss: 15.446274195797741\n",
+      "Epoch 8/20 | Train Accuracy: 1.0 | Train Loss: 15.446269849315286\n",
       "Validation Accuracy: 0.7547169923782349 auc 0.9406127486568898\n"
      ]
     },
@@ -431,14 +429,14 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      " 50%|█████     | 10/20 [00:25<00:25,  2.51s/it]"
+      " 50%|█████     | 10/20 [00:25<00:25,  2.57s/it]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 9/20 | Train Accuracy: 1.0 | Train Loss: 14.80114306602627\n",
+      "Epoch 9/20 | Train Accuracy: 1.0 | Train Loss: 14.801136838272214\n",
       "Validation Accuracy: 0.7547169923782349 auc 0.941846958036881\n"
      ]
     },
@@ -446,14 +444,14 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      " 55%|█████▌    | 11/20 [00:27<00:22,  2.51s/it]"
+      " 55%|█████▌    | 11/20 [00:28<00:23,  2.56s/it]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 10/20 | Train Accuracy: 1.0 | Train Loss: 13.875870674848557\n",
+      "Epoch 10/20 | Train Accuracy: 1.0 | Train Loss: 13.87586941383779\n",
       "Validation Accuracy: 0.7547169923782349 auc 0.9493974154203572\n"
      ]
     },
@@ -461,14 +459,14 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      " 60%|██████    | 12/20 [00:30<00:20,  2.53s/it]"
+      " 60%|██████    | 12/20 [00:30<00:20,  2.55s/it]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 11/20 | Train Accuracy: 1.0 | Train Loss: 14.225338330026716\n",
+      "Epoch 11/20 | Train Accuracy: 1.0 | Train Loss: 14.225337034091353\n",
       "Validation Accuracy: 0.7584905624389648 auc 0.9449687817627413\n"
      ]
     },
@@ -476,14 +474,14 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      " 65%|██████▌   | 13/20 [00:32<00:17,  2.52s/it]"
+      " 65%|██████▌   | 13/20 [00:33<00:17,  2.55s/it]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 12/20 | Train Accuracy: 1.0 | Train Loss: 14.037583370693028\n",
+      "Epoch 12/20 | Train Accuracy: 1.0 | Train Loss: 14.03758096601814\n",
       "Validation Accuracy: 0.7584905624389648 auc 0.9457673878321475\n"
      ]
     },
@@ -491,14 +489,14 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      " 70%|███████   | 14/20 [00:35<00:15,  2.51s/it]"
+      " 70%|███████   | 14/20 [00:36<00:15,  2.58s/it]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 13/20 | Train Accuracy: 1.0 | Train Loss: 13.155217349529266\n",
+      "Epoch 13/20 | Train Accuracy: 1.0 | Train Loss: 13.155211296398193\n",
       "Validation Accuracy: 0.7622641324996948 auc 0.9478002032815449\n"
      ]
     },
@@ -506,14 +504,14 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      " 75%|███████▌  | 15/20 [00:37<00:12,  2.50s/it]"
+      " 75%|███████▌  | 15/20 [00:38<00:12,  2.57s/it]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 14/20 | Train Accuracy: 1.0 | Train Loss: 13.074130383087322\n",
+      "Epoch 14/20 | Train Accuracy: 1.0 | Train Loss: 13.074136503273621\n",
       "Validation Accuracy: 0.7622641324996948 auc 0.9480906054886018\n"
      ]
     },
@@ -521,14 +519,14 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      " 80%|████████  | 16/20 [00:40<00:10,  2.50s/it]"
+      " 80%|████████  | 16/20 [00:41<00:10,  2.57s/it]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 15/20 | Train Accuracy: 1.0 | Train Loss: 12.887040423229337\n",
+      "Epoch 15/20 | Train Accuracy: 1.0 | Train Loss: 12.887006599456072\n",
       "Validation Accuracy: 0.7622641324996948 auc 0.9485262087991868\n"
      ]
     },
@@ -536,60 +534,60 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      " 85%|████████▌ | 17/20 [00:42<00:07,  2.53s/it]"
+      " 85%|████████▌ | 17/20 [00:43<00:07,  2.57s/it]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 16/20 | Train Accuracy: 1.0 | Train Loss: 13.250047401059419\n",
-      "Validation Accuracy: 0.7584905624389648 auc 0.9495426165238856\n"
+      "Epoch 16/20 | Train Accuracy: 1.0 | Train Loss: 13.221301457379013\n",
+      "Validation Accuracy: 0.7584905624389648 auc 0.9499056192827066\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      " 90%|█████████ | 18/20 [00:45<00:05,  2.55s/it]"
+      " 90%|█████████ | 18/20 [00:46<00:05,  2.56s/it]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 17/20 | Train Accuracy: 1.0 | Train Loss: 11.86157701490447\n",
-      "Validation Accuracy: 0.7660377621650696 auc 0.9554232612167852\n"
+      "Epoch 17/20 | Train Accuracy: 1.0 | Train Loss: 12.095282299211249\n",
+      "Validation Accuracy: 0.7660377621650696 auc 0.9544068534920864\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      " 95%|█████████▌| 19/20 [00:48<00:02,  2.55s/it]"
+      " 95%|█████████▌| 19/20 [00:48<00:02,  2.58s/it]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 18/20 | Train Accuracy: 1.0 | Train Loss: 12.42901663039811\n",
-      "Validation Accuracy: 0.7660377621650696 auc 0.9425003630027589\n"
+      "Epoch 18/20 | Train Accuracy: 1.0 | Train Loss: 12.169320295681246\n",
+      "Validation Accuracy: 0.7660377621650696 auc 0.9448235806592131\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 20/20 [00:50<00:00,  2.53s/it]"
+      "100%|██████████| 20/20 [00:51<00:00,  2.58s/it]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 19/20 | Train Accuracy: 1.0 | Train Loss: 12.172889983397909\n",
-      "Validation Accuracy: 0.7811321020126343 auc 0.9520110352838682\n"
+      "Epoch 19/20 | Train Accuracy: 1.0 | Train Loss: 12.063936041318811\n",
+      "Validation Accuracy: 0.7698113322257996 auc 0.9520836358356324\n"
      ]
     },
     {
@@ -635,7 +633,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 12,
    "metadata": {},
    "outputs": [
     {
@@ -662,14 +660,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 13,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Final Test Accuracy: 0.7735849022865295 auc 0.9296500653404965\n"
+      "Final Test Accuracy: 0.7509434223175049 auc 0.9314650791346014\n"
      ]
     }
    ],
@@ -682,7 +680,6 @@
     "test_acc = accuracy(test_logits.argmax(dim=1), labels.long()[test_seeds].cpu(), \"binary\").item()\n",
     "test_auc = roc_auc_score(labels.long()[test_seeds].cpu().numpy(), test_logits[:, 1].numpy())\n",
     "\n",
-    "metrics_result = pd.DataFrame()\n",
     "print(f\"Final Test Accuracy: {test_acc} auc {test_auc}\")\n",
     "\n",
     "#acc, f_1, precision, recall, roc_auc, pr_auc, average_precision, _, _ = get_metrics(\n",
@@ -709,7 +706,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 14,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -718,7 +715,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 15,
    "metadata": {},
    "outputs": [
     {
@@ -760,7 +757,7 @@
        "              predictor=None, random_state=None, ...)"
       ]
      },
-     "execution_count": 16,
+     "execution_count": 15,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -781,7 +778,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 16,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -806,14 +803,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 17,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Final Test Accuracy: 0.8339622641509434 auc 0.9136053434006098\n"
+      "Final Test Accuracy: 0.8377358490566038 auc 0.9036590678089155\n"
      ]
     }
    ],
@@ -849,7 +846,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 18,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -860,7 +857,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 19,
    "metadata": {},
    "outputs": [
     {
@@ -877,7 +874,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": 20,
    "metadata": {},
    "outputs": [],
    "source": [

From 7d5ee7d86e9cbbaa0a83fc68e546a30685728793 Mon Sep 17 00:00:00 2001
From: Eli Fajardo <efajardo@nvidia.com>
Date: Wed, 3 Jan 2024 14:33:40 -0500
Subject: [PATCH 3/6] additional notebook updates from tad

---
 .../gnn-fraud-detection-training.ipynb        | 272 ++++++++++++------
 1 file changed, 184 insertions(+), 88 deletions(-)

diff --git a/models/training-tuning-scripts/fraud-detection-models/gnn-fraud-detection-training.ipynb b/models/training-tuning-scripts/fraud-detection-models/gnn-fraud-detection-training.ipynb
index 25ab125a6e..bd3ce6a285 100644
--- a/models/training-tuning-scripts/fraud-detection-models/gnn-fraud-detection-training.ipynb
+++ b/models/training-tuning-scripts/fraud-detection-models/gnn-fraud-detection-training.ipynb
@@ -86,17 +86,26 @@
     "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#device"
+   ]
+  },
   {
    "attachments": {},
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "##### Load training and test dataset"
+    "##### Load traing and test dataset"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -117,7 +126,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -135,13 +144,25 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [],
    "source": [
     "train_data = augment_data(train_data, n=20)"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# rearage test data index\n",
+    "last_train_index = train_data.index.max()+1\n",
+    "inductive_data.index = np.arange(last_train_index, last_train_index + inductive_data.shape[0])\n",
+    "inductive_data['index'] = inductive_data.index"
+   ]
+  },
   {
    "attachments": {},
    "cell_type": "markdown",
@@ -153,7 +174,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [
     {
@@ -178,12 +199,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# train_data, test_data, train_index, test_index, labels, all_data\n",
-    "train_data, test_data, train_idx, inductive_idx, labels, df = prepare_data(train_data, inductive_data)"
+    "# Split trainig, testing dataset\n",
+    "train_data, test_data, train_idx, inductive_idx, labels, df = prepare_data(train_data, inductive_data)\n"
    ]
   },
   {
@@ -204,7 +225,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 10,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -240,16 +261,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 11,
    "metadata": {},
    "outputs": [],
    "source": [
     "# Hyperparameters\n",
     "target_node = \"transaction\"\n",
-    "epochs = 20\n",
+    "epochs = 25\n",
     "in_size, hidden_size, out_size, n_layers,\\\n",
     "    embedding_size = 111, 64, 2, 2, 1\n",
-    "batch_size = 100\n",
+    "batch_size = 256\n",
     "in_size, hidden_size, out_size, n_layers, embedding_size = 111, 64, 2, 2, 1\n",
     "hyperparameters = {\n",
     "    \"in_size\": in_size,\n",
@@ -267,7 +288,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 12,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -285,309 +306,384 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 13,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "  0%|          | 0/20 [00:00<?, ?it/s]/home/efajardo/miniconda3/envs/morpheus/lib/python3.10/site-packages/dgl/backend/pytorch/tensor.py:445: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly.  To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n",
+      "  0%|          | 0/25 [00:00<?, ?it/s]/home/efajardo/miniconda3/envs/morpheus/lib/python3.10/site-packages/dgl/backend/pytorch/tensor.py:445: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly.  To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n",
       "  assert input.numel() == input.storage().size(), (\n",
-      "  5%|▌         | 1/20 [00:02<00:52,  2.76s/it]"
+      "  4%|▍         | 1/25 [00:01<00:29,  1.22s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch 0/25 | Train Accuracy: 1.0 | Train Loss: 7.589520657285958\n",
+      "Validation Accuracy: 0.9207547307014465 auc 0.20833333333333334\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "  8%|▊         | 2/25 [00:02<00:25,  1.12s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/25 | Train Accuracy: 1.0 | Train Loss: 118.13089177341317\n",
+      "Validation Accuracy: 0.9207547307014465 auc 0.7851288056206087\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      " 12%|█▏        | 3/25 [00:03<00:23,  1.07s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch 2/25 | Train Accuracy: 1.0 | Train Loss: 38.45386062969919\n",
+      "Validation Accuracy: 0.9132075309753418 auc 0.8367486338797815\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      " 16%|█▌        | 4/25 [00:04<00:22,  1.06s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch 3/25 | Train Accuracy: 1.0 | Train Loss: 14.068549347110093\n",
+      "Validation Accuracy: 0.9245283007621765 auc 0.8592896174863389\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      " 20%|██        | 5/25 [00:05<00:21,  1.05s/it]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 0/20 | Train Accuracy: 1.0 | Train Loss: 8.364538975722887\n",
-      "Validation Accuracy: 0.7320754528045654 auc 0.1592130100188761\n"
+      "Epoch 4/25 | Train Accuracy: 1.0 | Train Loss: 7.064016109332442\n",
+      "Validation Accuracy: 0.9358490705490112 auc 0.87743950039032\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      " 10%|█         | 2/20 [00:05<00:47,  2.64s/it]"
+      " 24%|██▍       | 6/25 [00:06<00:20,  1.06s/it]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 1/20 | Train Accuracy: 1.0 | Train Loss: 112.15738137422963\n",
-      "Validation Accuracy: 0.7320754528045654 auc 0.5462465514737912\n"
+      "Epoch 5/25 | Train Accuracy: 1.0 | Train Loss: 6.148923043161631\n",
+      "Validation Accuracy: 0.9358490705490112 auc 0.882903981264637\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      " 15%|█▌        | 3/20 [00:07<00:44,  2.61s/it]"
+      " 28%|██▊       | 7/25 [00:07<00:19,  1.06s/it]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 2/20 | Train Accuracy: 1.0 | Train Loss: 525.2877785372972\n",
-      "Validation Accuracy: 0.7358490824699402 auc 0.8560331058516045\n"
+      "Epoch 6/25 | Train Accuracy: 1.0 | Train Loss: 6.028049414977431\n",
+      "Validation Accuracy: 0.9358490705490112 auc 0.8889539422326307\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      " 20%|██        | 4/20 [00:10<00:41,  2.58s/it]"
+      " 32%|███▏      | 8/25 [00:08<00:18,  1.07s/it]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 3/20 | Train Accuracy: 1.0 | Train Loss: 200.01628349609354\n",
-      "Validation Accuracy: 0.7396226525306702 auc 0.8799186873820241\n"
+      "Epoch 7/25 | Train Accuracy: 1.0 | Train Loss: 5.655132298357785\n",
+      "Validation Accuracy: 0.9358490705490112 auc 0.8924668227946916\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      " 25%|██▌       | 5/20 [00:13<00:38,  2.58s/it]"
+      " 36%|███▌      | 9/25 [00:09<00:17,  1.06s/it]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 4/20 | Train Accuracy: 1.0 | Train Loss: 90.46722278861125\n",
-      "Validation Accuracy: 0.7433962225914001 auc 0.8205314360389139\n"
+      "Epoch 8/25 | Train Accuracy: 1.0 | Train Loss: 5.505190880969167\n",
+      "Validation Accuracy: 0.9433962106704712 auc 0.8950039032006245\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      " 30%|███       | 6/20 [00:15<00:35,  2.57s/it]"
+      " 40%|████      | 10/25 [00:10<00:15,  1.05s/it]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 5/20 | Train Accuracy: 1.0 | Train Loss: 57.87523431493901\n",
-      "Validation Accuracy: 0.7358490824699402 auc 0.8681573979962247\n"
+      "Epoch 9/25 | Train Accuracy: 1.0 | Train Loss: 5.286113473121077\n",
+      "Validation Accuracy: 0.9396226406097412 auc 0.8955893832943013\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      " 35%|███▌      | 7/20 [00:18<00:33,  2.58s/it]"
+      " 44%|████▍     | 11/25 [00:11<00:14,  1.04s/it]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 6/20 | Train Accuracy: 1.0 | Train Loss: 24.51080822898075\n",
-      "Validation Accuracy: 0.7433962225914001 auc 0.9358211122404531\n"
+      "Epoch 10/25 | Train Accuracy: 1.0 | Train Loss: 5.151115003041923\n",
+      "Validation Accuracy: 0.9396226406097412 auc 0.9028103044496486\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      " 40%|████      | 8/20 [00:20<00:31,  2.59s/it]"
+      " 48%|████▊     | 12/25 [00:12<00:13,  1.04s/it]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 7/20 | Train Accuracy: 1.0 | Train Loss: 19.741554783657193\n",
-      "Validation Accuracy: 0.7584905624389648 auc 0.9371279221722085\n"
+      "Epoch 11/25 | Train Accuracy: 1.0 | Train Loss: 4.604029227048159\n",
+      "Validation Accuracy: 0.9320755004882812 auc 0.9106167056986728\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      " 45%|████▌     | 9/20 [00:23<00:28,  2.60s/it]"
+      " 52%|█████▏    | 13/25 [00:13<00:12,  1.04s/it]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 8/20 | Train Accuracy: 1.0 | Train Loss: 15.446269849315286\n",
-      "Validation Accuracy: 0.7547169923782349 auc 0.9406127486568898\n"
+      "Epoch 12/25 | Train Accuracy: 1.0 | Train Loss: 4.592545760795474\n",
+      "Validation Accuracy: 0.947169840335846 auc 0.9080796252927401\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      " 50%|█████     | 10/20 [00:25<00:25,  2.57s/it]"
+      " 56%|█████▌    | 14/25 [00:14<00:11,  1.04s/it]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 9/20 | Train Accuracy: 1.0 | Train Loss: 14.801136838272214\n",
-      "Validation Accuracy: 0.7547169923782349 auc 0.941846958036881\n"
+      "Epoch 13/25 | Train Accuracy: 1.0 | Train Loss: 4.154761942103505\n",
+      "Validation Accuracy: 0.9358490705490112 auc 0.9067135050741607\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      " 55%|█████▌    | 11/20 [00:28<00:23,  2.56s/it]"
+      " 60%|██████    | 15/25 [00:15<00:10,  1.06s/it]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 10/20 | Train Accuracy: 1.0 | Train Loss: 13.87586941383779\n",
-      "Validation Accuracy: 0.7547169923782349 auc 0.9493974154203572\n"
+      "Epoch 14/25 | Train Accuracy: 1.0 | Train Loss: 3.5454122377559543\n",
+      "Validation Accuracy: 0.9358490705490112 auc 0.9192037470725994\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      " 60%|██████    | 12/20 [00:30<00:20,  2.55s/it]"
+      " 64%|██████▍   | 16/25 [00:16<00:09,  1.06s/it]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 11/20 | Train Accuracy: 1.0 | Train Loss: 14.225337034091353\n",
-      "Validation Accuracy: 0.7584905624389648 auc 0.9449687817627413\n"
+      "Epoch 15/25 | Train Accuracy: 1.0 | Train Loss: 3.129574440885335\n",
+      "Validation Accuracy: 0.9358490705490112 auc 0.9311085089773614\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      " 65%|██████▌   | 13/20 [00:33<00:17,  2.55s/it]"
+      " 68%|██████▊   | 17/25 [00:18<00:08,  1.06s/it]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 12/20 | Train Accuracy: 1.0 | Train Loss: 14.03758096601814\n",
-      "Validation Accuracy: 0.7584905624389648 auc 0.9457673878321475\n"
+      "Epoch 16/25 | Train Accuracy: 1.0 | Train Loss: 3.14078975119628\n",
+      "Validation Accuracy: 0.9358490705490112 auc 0.9449648711943794\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      " 70%|███████   | 14/20 [00:36<00:15,  2.58s/it]"
+      " 72%|███████▏  | 18/25 [00:19<00:07,  1.05s/it]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 13/20 | Train Accuracy: 1.0 | Train Loss: 13.155211296398193\n",
-      "Validation Accuracy: 0.7622641324996948 auc 0.9478002032815449\n"
+      "Epoch 17/25 | Train Accuracy: 1.0 | Train Loss: 2.7704373244196177\n",
+      "Validation Accuracy: 0.9358490705490112 auc 0.9533567525370804\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      " 75%|███████▌  | 15/20 [00:38<00:12,  2.57s/it]"
+      " 76%|███████▌  | 19/25 [00:20<00:06,  1.05s/it]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 14/20 | Train Accuracy: 1.0 | Train Loss: 13.074136503273621\n",
-      "Validation Accuracy: 0.7622641324996948 auc 0.9480906054886018\n"
+      "Epoch 18/25 | Train Accuracy: 1.0 | Train Loss: 3.2044622113462538\n",
+      "Validation Accuracy: 0.9320755004882812 auc 0.948087431693989\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      " 80%|████████  | 16/20 [00:41<00:10,  2.57s/it]"
+      " 80%|████████  | 20/25 [00:21<00:05,  1.04s/it]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 15/20 | Train Accuracy: 1.0 | Train Loss: 12.887006599456072\n",
-      "Validation Accuracy: 0.7622641324996948 auc 0.9485262087991868\n"
+      "Epoch 19/25 | Train Accuracy: 1.0 | Train Loss: 2.7323956332402304\n",
+      "Validation Accuracy: 0.9433962106704712 auc 0.9510148321623731\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      " 85%|████████▌ | 17/20 [00:43<00:07,  2.57s/it]"
+      " 84%|████████▍ | 21/25 [00:22<00:04,  1.04s/it]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 16/20 | Train Accuracy: 1.0 | Train Loss: 13.221301457379013\n",
-      "Validation Accuracy: 0.7584905624389648 auc 0.9499056192827066\n"
+      "Epoch 20/25 | Train Accuracy: 1.0 | Train Loss: 2.5043672195170075\n",
+      "Validation Accuracy: 0.9433962106704712 auc 0.9535519125683061\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      " 90%|█████████ | 18/20 [00:46<00:05,  2.56s/it]"
+      " 88%|████████▊ | 22/25 [00:23<00:03,  1.03s/it]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 17/20 | Train Accuracy: 1.0 | Train Loss: 12.095282299211249\n",
-      "Validation Accuracy: 0.7660377621650696 auc 0.9544068534920864\n"
+      "Epoch 21/25 | Train Accuracy: 1.0 | Train Loss: 2.120341687972541\n",
+      "Validation Accuracy: 0.9132075309753418 auc 0.961943793911007\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      " 95%|█████████▌| 19/20 [00:48<00:02,  2.58s/it]"
+      " 92%|█████████▏| 23/25 [00:24<00:02,  1.04s/it]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 18/20 | Train Accuracy: 1.0 | Train Loss: 12.169320295681246\n",
-      "Validation Accuracy: 0.7660377621650696 auc 0.9448235806592131\n"
+      "Epoch 22/25 | Train Accuracy: 1.0 | Train Loss: 10.550111314398237\n",
+      "Validation Accuracy: 0.9169811606407166 auc 0.8948087431693988\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 20/20 [00:51<00:00,  2.58s/it]"
+      " 96%|█████████▌| 24/25 [00:25<00:01,  1.04s/it]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 19/20 | Train Accuracy: 1.0 | Train Loss: 12.063936041318811\n",
-      "Validation Accuracy: 0.7698113322257996 auc 0.9520836358356324\n"
+      "Epoch 23/25 | Train Accuracy: 1.0 | Train Loss: 12.62315985956775\n",
+      "Validation Accuracy: 0.9207547307014465 auc 0.9137392661982825\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "100%|██████████| 25/25 [00:26<00:00,  1.05s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch 24/25 | Train Accuracy: 1.0 | Train Loss: 14.581160920747408\n",
+      "Validation Accuracy: 0.9396226406097412 auc 0.8467993754879001\n"
      ]
     },
     {
@@ -633,14 +729,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 14,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Graph(num_nodes={'client': 861, 'merchant': 482, 'transaction': 12053},\n",
+      "Graph(num_nodes={'client': 861, 'merchant': 482, 'transaction': 12318},\n",
       "      num_edges={('client', 'buy', 'transaction'): 12318, ('merchant', 'sell', 'transaction'): 12318, ('transaction', 'bought', 'client'): 12318, ('transaction', 'issued', 'merchant'): 12318},\n",
       "      metagraph=[('client', 'transaction', 'buy'), ('transaction', 'client', 'bought'), ('transaction', 'merchant', 'issued'), ('merchant', 'transaction', 'sell')])\n"
      ]
@@ -660,14 +756,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 15,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Final Test Accuracy: 0.7509434223175049 auc 0.9314650791346014\n"
+      "Final Test Accuracy: 0.9245283007621765 auc 0.8122560499609679\n"
      ]
     }
    ],
@@ -706,7 +802,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 16,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -715,7 +811,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 17,
    "metadata": {},
    "outputs": [
     {
@@ -757,7 +853,7 @@
        "              predictor=None, random_state=None, ...)"
       ]
      },
-     "execution_count": 15,
+     "execution_count": 17,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -778,7 +874,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 18,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -803,14 +899,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 19,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Final Test Accuracy: 0.8377358490566038 auc 0.9036590678089155\n"
+      "Final Test Accuracy: 0.9283018867924528 auc 0.8677790788446526\n"
      ]
     }
    ],
@@ -846,7 +942,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 20,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -857,7 +953,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 21,
    "metadata": {},
    "outputs": [
     {
@@ -874,7 +970,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 22,
    "metadata": {},
    "outputs": [],
    "source": [

From 72bdf64d66b1bc1094ca592fcb00497e73b2de7a Mon Sep 17 00:00:00 2001
From: Eli Fajardo <efajardo@nvidia.com>
Date: Mon, 8 Jan 2024 14:30:43 -0500
Subject: [PATCH 4/6] pr feedback updates

---
 .../gnn-fraud-detection-training.ipynb        | 578 ++----------------
 1 file changed, 47 insertions(+), 531 deletions(-)

diff --git a/models/training-tuning-scripts/fraud-detection-models/gnn-fraud-detection-training.ipynb b/models/training-tuning-scripts/fraud-detection-models/gnn-fraud-detection-training.ipynb
index bd3ce6a285..8f58106db4 100644
--- a/models/training-tuning-scripts/fraud-detection-models/gnn-fraud-detection-training.ipynb
+++ b/models/training-tuning-scripts/fraud-detection-models/gnn-fraud-detection-training.ipynb
@@ -44,16 +44,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
     "%load_ext autoreload\n",
     "%autoreload 2\n",
-    "import numpy as np\n",
-    "import matplotlib.pylab as plt\n",
     "import os\n",
+    "\n",
     "import dgl\n",
+    "import matplotlib.pylab as plt\n",
     "import numpy as np\n",
     "import torch\n",
     "import torch.nn as nn\n",
@@ -68,15 +68,20 @@
     "from sklearn.metrics import roc_curve\n",
     "from torchmetrics.functional import accuracy\n",
     "from tqdm import trange\n",
+    "from training import build_fsi_graph\n",
+    "from training import evaluate\n",
+    "from training import get_metrics\n",
+    "from training import init_loaders\n",
+    "from training import save_model\n",
+    "from training import train\n",
     "from xgboost import XGBClassifier\n",
-    "from training import (get_metrics, evaluate, init_loaders, build_fsi_graph,\n",
-    "                   save_model, train)\n",
+    "\n",
     "import cudf"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -86,15 +91,6 @@
     "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#device"
-   ]
-  },
   {
    "attachments": {},
    "cell_type": "markdown",
@@ -105,7 +101,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -126,7 +122,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -139,12 +135,12 @@
     "    df_train.reset_index(inplace=True)\n",
     "    df_train['index'] = df_train.index\n",
     "\n",
-    "    return df_train\n"
+    "    return df_train"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -153,7 +149,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -174,24 +170,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "The distribution of fraud for the train data is:\n",
-      " 0    11865\n",
-      "1      188\n",
-      "Name: fraud_label, dtype: int32\n",
-      "The distribution of fraud for the inductive data is:\n",
-      " 0    244\n",
-      "1     21\n",
-      "Name: fraud_label, dtype: int32\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "print('The distribution of fraud for the train data is:\\n', train_data['fraud_label'].value_counts())\n",
     "print('The distribution of fraud for the inductive data is:\\n', inductive_data['fraud_label'].value_counts())"
@@ -199,12 +180,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
     "# Split trainig, testing dataset\n",
-    "train_data, test_data, train_idx, inductive_idx, labels, df = prepare_data(train_data, inductive_data)\n"
+    "train_data, test_data, train_idx, inductive_idx, labels, df = prepare_data(train_data, inductive_data)"
    ]
   },
   {
@@ -225,11 +206,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
-    "\n",
     "meta_cols = [\"client_node\", \"merchant_node\", \"index\"]\n",
     "\n",
     "# Build graph\n",
@@ -240,7 +220,7 @@
     "feature_tensors = feature_tensors.float()\n",
     "train_idx = torch.from_dlpack(train_idx.values.toDlpack()).long()\n",
     "inductive_idx = torch.from_dlpack(inductive_idx.values.toDlpack()).long()\n",
-    "labels = torch.from_dlpack(labels.toDlpack()).long()\n"
+    "labels = torch.from_dlpack(labels.toDlpack()).long()"
    ]
   },
   {
@@ -261,7 +241,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -288,7 +268,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -297,7 +277,6 @@
     "    device), train_idx, test_idx=inductive_idx,\n",
     "    val_idx=inductive_idx, g_test=whole_graph, batch_size=batch_size)\n",
     "\n",
-    "\n",
     "# Set model variables\n",
     "model = HinSAGE(train_graph, in_size, hidden_size, out_size, n_layers, embedding_size).to(device)\n",
     "optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)\n",
@@ -306,396 +285,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "  0%|          | 0/25 [00:00<?, ?it/s]/home/efajardo/miniconda3/envs/morpheus/lib/python3.10/site-packages/dgl/backend/pytorch/tensor.py:445: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly.  To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n",
-      "  assert input.numel() == input.storage().size(), (\n",
-      "  4%|▍         | 1/25 [00:01<00:29,  1.22s/it]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 0/25 | Train Accuracy: 1.0 | Train Loss: 7.589520657285958\n",
-      "Validation Accuracy: 0.9207547307014465 auc 0.20833333333333334\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "  8%|▊         | 2/25 [00:02<00:25,  1.12s/it]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 1/25 | Train Accuracy: 1.0 | Train Loss: 118.13089177341317\n",
-      "Validation Accuracy: 0.9207547307014465 auc 0.7851288056206087\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 12%|█▏        | 3/25 [00:03<00:23,  1.07s/it]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 2/25 | Train Accuracy: 1.0 | Train Loss: 38.45386062969919\n",
-      "Validation Accuracy: 0.9132075309753418 auc 0.8367486338797815\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 16%|█▌        | 4/25 [00:04<00:22,  1.06s/it]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 3/25 | Train Accuracy: 1.0 | Train Loss: 14.068549347110093\n",
-      "Validation Accuracy: 0.9245283007621765 auc 0.8592896174863389\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 20%|██        | 5/25 [00:05<00:21,  1.05s/it]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 4/25 | Train Accuracy: 1.0 | Train Loss: 7.064016109332442\n",
-      "Validation Accuracy: 0.9358490705490112 auc 0.87743950039032\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 24%|██▍       | 6/25 [00:06<00:20,  1.06s/it]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 5/25 | Train Accuracy: 1.0 | Train Loss: 6.148923043161631\n",
-      "Validation Accuracy: 0.9358490705490112 auc 0.882903981264637\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 28%|██▊       | 7/25 [00:07<00:19,  1.06s/it]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 6/25 | Train Accuracy: 1.0 | Train Loss: 6.028049414977431\n",
-      "Validation Accuracy: 0.9358490705490112 auc 0.8889539422326307\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 32%|███▏      | 8/25 [00:08<00:18,  1.07s/it]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 7/25 | Train Accuracy: 1.0 | Train Loss: 5.655132298357785\n",
-      "Validation Accuracy: 0.9358490705490112 auc 0.8924668227946916\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 36%|███▌      | 9/25 [00:09<00:17,  1.06s/it]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 8/25 | Train Accuracy: 1.0 | Train Loss: 5.505190880969167\n",
-      "Validation Accuracy: 0.9433962106704712 auc 0.8950039032006245\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 40%|████      | 10/25 [00:10<00:15,  1.05s/it]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 9/25 | Train Accuracy: 1.0 | Train Loss: 5.286113473121077\n",
-      "Validation Accuracy: 0.9396226406097412 auc 0.8955893832943013\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 44%|████▍     | 11/25 [00:11<00:14,  1.04s/it]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 10/25 | Train Accuracy: 1.0 | Train Loss: 5.151115003041923\n",
-      "Validation Accuracy: 0.9396226406097412 auc 0.9028103044496486\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 48%|████▊     | 12/25 [00:12<00:13,  1.04s/it]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 11/25 | Train Accuracy: 1.0 | Train Loss: 4.604029227048159\n",
-      "Validation Accuracy: 0.9320755004882812 auc 0.9106167056986728\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 52%|█████▏    | 13/25 [00:13<00:12,  1.04s/it]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 12/25 | Train Accuracy: 1.0 | Train Loss: 4.592545760795474\n",
-      "Validation Accuracy: 0.947169840335846 auc 0.9080796252927401\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 56%|█████▌    | 14/25 [00:14<00:11,  1.04s/it]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 13/25 | Train Accuracy: 1.0 | Train Loss: 4.154761942103505\n",
-      "Validation Accuracy: 0.9358490705490112 auc 0.9067135050741607\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 60%|██████    | 15/25 [00:15<00:10,  1.06s/it]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 14/25 | Train Accuracy: 1.0 | Train Loss: 3.5454122377559543\n",
-      "Validation Accuracy: 0.9358490705490112 auc 0.9192037470725994\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 64%|██████▍   | 16/25 [00:16<00:09,  1.06s/it]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 15/25 | Train Accuracy: 1.0 | Train Loss: 3.129574440885335\n",
-      "Validation Accuracy: 0.9358490705490112 auc 0.9311085089773614\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 68%|██████▊   | 17/25 [00:18<00:08,  1.06s/it]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 16/25 | Train Accuracy: 1.0 | Train Loss: 3.14078975119628\n",
-      "Validation Accuracy: 0.9358490705490112 auc 0.9449648711943794\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 72%|███████▏  | 18/25 [00:19<00:07,  1.05s/it]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 17/25 | Train Accuracy: 1.0 | Train Loss: 2.7704373244196177\n",
-      "Validation Accuracy: 0.9358490705490112 auc 0.9533567525370804\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 76%|███████▌  | 19/25 [00:20<00:06,  1.05s/it]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 18/25 | Train Accuracy: 1.0 | Train Loss: 3.2044622113462538\n",
-      "Validation Accuracy: 0.9320755004882812 auc 0.948087431693989\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 80%|████████  | 20/25 [00:21<00:05,  1.04s/it]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 19/25 | Train Accuracy: 1.0 | Train Loss: 2.7323956332402304\n",
-      "Validation Accuracy: 0.9433962106704712 auc 0.9510148321623731\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 84%|████████▍ | 21/25 [00:22<00:04,  1.04s/it]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 20/25 | Train Accuracy: 1.0 | Train Loss: 2.5043672195170075\n",
-      "Validation Accuracy: 0.9433962106704712 auc 0.9535519125683061\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 88%|████████▊ | 22/25 [00:23<00:03,  1.03s/it]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 21/25 | Train Accuracy: 1.0 | Train Loss: 2.120341687972541\n",
-      "Validation Accuracy: 0.9132075309753418 auc 0.961943793911007\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 92%|█████████▏| 23/25 [00:24<00:02,  1.04s/it]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 22/25 | Train Accuracy: 1.0 | Train Loss: 10.550111314398237\n",
-      "Validation Accuracy: 0.9169811606407166 auc 0.8948087431693988\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 96%|█████████▌| 24/25 [00:25<00:01,  1.04s/it]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 23/25 | Train Accuracy: 1.0 | Train Loss: 12.62315985956775\n",
-      "Validation Accuracy: 0.9207547307014465 auc 0.9137392661982825\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "100%|██████████| 25/25 [00:26<00:00,  1.05s/it]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 24/25 | Train Accuracy: 1.0 | Train Loss: 14.581160920747408\n",
-      "Validation Accuracy: 0.9396226406097412 auc 0.8467993754879001\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
-    "\n",
     "for epoch in trange(epochs):\n",
     "\n",
     "    train_acc, loss = train(\n",
@@ -708,7 +301,7 @@
     "        labels.long()[val_seed].cpu().numpy(),\n",
     "        val_logits[:, 1].numpy(),\n",
     "    )\n",
-    "    print(f\"Validation Accuracy: {val_accuracy} auc {val_auc}\")\n"
+    "    print(f\"Validation Accuracy: {val_accuracy} auc {val_auc}\")"
    ]
   },
   {
@@ -729,19 +322,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Graph(num_nodes={'client': 861, 'merchant': 482, 'transaction': 12318},\n",
-      "      num_edges={('client', 'buy', 'transaction'): 12318, ('merchant', 'sell', 'transaction'): 12318, ('transaction', 'bought', 'client'): 12318, ('transaction', 'issued', 'merchant'): 12318},\n",
-      "      metagraph=[('client', 'transaction', 'buy'), ('transaction', 'client', 'bought'), ('transaction', 'merchant', 'issued'), ('merchant', 'transaction', 'sell')])\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "print(whole_graph)"
    ]
@@ -756,17 +339,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Final Test Accuracy: 0.9245283007621765 auc 0.8122560499609679\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "# Create embeddings\n",
     "_, train_seeds, train_embedding = evaluate(model, train_loader, feature_tensors, target_node)\n",
@@ -802,66 +377,22 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
-    "from xgboost import XGBClassifier\n"
+    "from xgboost import XGBClassifier"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<style>#sk-container-id-1 {color: black;background-color: white;}#sk-container-id-1 pre{padding: 0;}#sk-container-id-1 div.sk-toggleable {background-color: white;}#sk-container-id-1 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-1 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-1 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-1 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-1 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-1 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-1 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-1 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-1 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-1 div.sk-item {position: relative;z-index: 1;}#sk-container-id-1 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-1 div.sk-item::before, #sk-container-id-1 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-1 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-1 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-1 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-1 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-1 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-1 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-1 div.sk-label-container {text-align: center;}#sk-container-id-1 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-1 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-1\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>XGBClassifier(base_score=None, booster=None, callbacks=None,\n",
-       "              colsample_bylevel=None, colsample_bynode=None,\n",
-       "              colsample_bytree=None, early_stopping_rounds=None,\n",
-       "              enable_categorical=False, eval_metric=None, feature_types=None,\n",
-       "              gamma=None, gpu_id=None, grow_policy=None, importance_type=None,\n",
-       "              interaction_constraints=None, learning_rate=None, max_bin=None,\n",
-       "              max_cat_threshold=None, max_cat_to_onehot=None,\n",
-       "              max_delta_step=None, max_depth=None, max_leaves=None,\n",
-       "              min_child_weight=None, missing=nan, monotone_constraints=None,\n",
-       "              n_estimators=100, n_jobs=None, num_parallel_tree=None,\n",
-       "              predictor=None, random_state=None, ...)</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-1\" type=\"checkbox\" checked><label for=\"sk-estimator-id-1\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">XGBClassifier</label><div class=\"sk-toggleable__content\"><pre>XGBClassifier(base_score=None, booster=None, callbacks=None,\n",
-       "              colsample_bylevel=None, colsample_bynode=None,\n",
-       "              colsample_bytree=None, early_stopping_rounds=None,\n",
-       "              enable_categorical=False, eval_metric=None, feature_types=None,\n",
-       "              gamma=None, gpu_id=None, grow_policy=None, importance_type=None,\n",
-       "              interaction_constraints=None, learning_rate=None, max_bin=None,\n",
-       "              max_cat_threshold=None, max_cat_to_onehot=None,\n",
-       "              max_delta_step=None, max_depth=None, max_leaves=None,\n",
-       "              min_child_weight=None, missing=nan, monotone_constraints=None,\n",
-       "              n_estimators=100, n_jobs=None, num_parallel_tree=None,\n",
-       "              predictor=None, random_state=None, ...)</pre></div></div></div></div></div>"
-      ],
-      "text/plain": [
-       "XGBClassifier(base_score=None, booster=None, callbacks=None,\n",
-       "              colsample_bylevel=None, colsample_bynode=None,\n",
-       "              colsample_bytree=None, early_stopping_rounds=None,\n",
-       "              enable_categorical=False, eval_metric=None, feature_types=None,\n",
-       "              gamma=None, gpu_id=None, grow_policy=None, importance_type=None,\n",
-       "              interaction_constraints=None, learning_rate=None, max_bin=None,\n",
-       "              max_cat_threshold=None, max_cat_to_onehot=None,\n",
-       "              max_delta_step=None, max_depth=None, max_leaves=None,\n",
-       "              min_child_weight=None, missing=nan, monotone_constraints=None,\n",
-       "              n_estimators=100, n_jobs=None, num_parallel_tree=None,\n",
-       "              predictor=None, random_state=None, ...)"
-      ]
-     },
-     "execution_count": 17,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "# Train XGBoost classifier on embedding vector\n",
     "classifier = XGBClassifier(n_estimators=100)\n",
-    "classifier.fit(train_embedding.cpu().numpy(), labels[train_seeds].cpu().numpy())\n"
+    "classifier.fit(train_embedding.cpu().numpy(), labels[train_seeds].cpu().numpy())"
    ]
   },
   {
@@ -874,11 +405,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
-    "xgb_pred = classifier.predict_proba(test_embedding.cpu().numpy())\n"
+    "xgb_pred = classifier.predict_proba(test_embedding.cpu().numpy())"
    ]
   },
   {
@@ -899,17 +430,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Final Test Accuracy: 0.9283018867924528 auc 0.8677790788446526\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "acc, f_1, precision, recall, roc_auc, pr_auc, average_precision, _, _ = get_metrics(\n",
     "    xgb_pred, labels[inductive_idx].cpu().numpy(),  name='HinSAGE_XGB')\n",
@@ -942,42 +465,35 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
     "model_dir= \"modelpath/\"\n",
     "\n",
-    "save_model(train_graph, model, hyperparameters, classifier, model_dir)\n"
+    "save_model(train_graph, model, hyperparameters, classifier, model_dir)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "graph.pkl  hyperparams.pkl  model.pt  xgb.pt\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "!ls modelpath"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
     "## For inference we can load from file as follows.\n",
     "from training import load_model\n",
+    "\n",
     "# do inference on loaded model, as follows\n",
-    "# hinsage_model,  hyperparam, g = load_model(model_dir)"
+    "hinsage_model,  hyperparam, g = load_model(model_dir)"
    ]
   },
   {

From cf624747da5986eb8e76de67e8e5ff6434a3297d Mon Sep 17 00:00:00 2001
From: Eli Fajardo <efajardo@nvidia.com>
Date: Mon, 8 Jan 2024 14:39:00 -0500
Subject: [PATCH 5/6] add back cell outputs

---
 .../gnn-fraud-detection-training.ipynb        | 534 +++++++++++++++++-
 1 file changed, 506 insertions(+), 28 deletions(-)

diff --git a/models/training-tuning-scripts/fraud-detection-models/gnn-fraud-detection-training.ipynb b/models/training-tuning-scripts/fraud-detection-models/gnn-fraud-detection-training.ipynb
index 8f58106db4..5af696e719 100644
--- a/models/training-tuning-scripts/fraud-detection-models/gnn-fraud-detection-training.ipynb
+++ b/models/training-tuning-scripts/fraud-detection-models/gnn-fraud-detection-training.ipynb
@@ -44,7 +44,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -81,7 +81,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -101,7 +101,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -122,7 +122,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -140,7 +140,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -149,7 +149,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -170,9 +170,24 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 7,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "The distribution of fraud for the train data is:\n",
+      " 0    11865\n",
+      "1      188\n",
+      "Name: fraud_label, dtype: int32\n",
+      "The distribution of fraud for the inductive data is:\n",
+      " 0    244\n",
+      "1     21\n",
+      "Name: fraud_label, dtype: int32\n"
+     ]
+    }
+   ],
    "source": [
     "print('The distribution of fraud for the train data is:\\n', train_data['fraud_label'].value_counts())\n",
     "print('The distribution of fraud for the inductive data is:\\n', inductive_data['fraud_label'].value_counts())"
@@ -180,7 +195,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -206,7 +221,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -241,7 +256,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 10,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -268,7 +283,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 11,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -285,9 +300,394 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 12,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "  0%|          | 0/25 [00:00<?, ?it/s]/home/efajardo/miniconda3/envs/morpheus/lib/python3.10/site-packages/dgl/backend/pytorch/tensor.py:445: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly.  To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()\n",
+      "  assert input.numel() == input.storage().size(), (\n",
+      "  4%|▍         | 1/25 [00:01<00:31,  1.30s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch 0/25 | Train Accuracy: 1.0 | Train Loss: 7.589520640056491\n",
+      "Validation Accuracy: 0.9207547307014465 auc 0.20833333333333334\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "  8%|▊         | 2/25 [00:02<00:26,  1.15s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/25 | Train Accuracy: 1.0 | Train Loss: 118.13089790023514\n",
+      "Validation Accuracy: 0.9207547307014465 auc 0.7851288056206087\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      " 12%|█▏        | 3/25 [00:03<00:24,  1.10s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch 2/25 | Train Accuracy: 1.0 | Train Loss: 38.45385842246469\n",
+      "Validation Accuracy: 0.9132075309753418 auc 0.8367486338797815\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      " 16%|█▌        | 4/25 [00:04<00:22,  1.09s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch 3/25 | Train Accuracy: 1.0 | Train Loss: 14.068548373878002\n",
+      "Validation Accuracy: 0.9245283007621765 auc 0.8592896174863389\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      " 20%|██        | 5/25 [00:05<00:21,  1.07s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch 4/25 | Train Accuracy: 1.0 | Train Loss: 7.06401611212641\n",
+      "Validation Accuracy: 0.9358490705490112 auc 0.87743950039032\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      " 24%|██▍       | 6/25 [00:06<00:20,  1.06s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch 5/25 | Train Accuracy: 1.0 | Train Loss: 6.148922558873892\n",
+      "Validation Accuracy: 0.9358490705490112 auc 0.882903981264637\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      " 28%|██▊       | 7/25 [00:07<00:19,  1.07s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch 6/25 | Train Accuracy: 1.0 | Train Loss: 6.028049402870238\n",
+      "Validation Accuracy: 0.9358490705490112 auc 0.8889539422326307\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      " 32%|███▏      | 8/25 [00:08<00:17,  1.05s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch 7/25 | Train Accuracy: 1.0 | Train Loss: 5.655132191255689\n",
+      "Validation Accuracy: 0.9358490705490112 auc 0.8924668227946916\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      " 36%|███▌      | 9/25 [00:09<00:16,  1.05s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch 8/25 | Train Accuracy: 1.0 | Train Loss: 5.50519098713994\n",
+      "Validation Accuracy: 0.9433962106704712 auc 0.8950039032006245\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      " 40%|████      | 10/25 [00:10<00:15,  1.05s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch 9/25 | Train Accuracy: 1.0 | Train Loss: 5.286113580223173\n",
+      "Validation Accuracy: 0.9396226406097412 auc 0.8955893832943013\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      " 44%|████▍     | 11/25 [00:11<00:14,  1.05s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch 10/25 | Train Accuracy: 1.0 | Train Loss: 5.151115204207599\n",
+      "Validation Accuracy: 0.9396226406097412 auc 0.9028103044496486\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      " 48%|████▊     | 12/25 [00:12<00:13,  1.05s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch 11/25 | Train Accuracy: 1.0 | Train Loss: 4.6040293434634805\n",
+      "Validation Accuracy: 0.9320755004882812 auc 0.9106167056986728\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      " 52%|█████▏    | 13/25 [00:13<00:12,  1.05s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch 12/25 | Train Accuracy: 1.0 | Train Loss: 4.592546273488551\n",
+      "Validation Accuracy: 0.947169840335846 auc 0.9080796252927401\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      " 56%|█████▌    | 14/25 [00:14<00:11,  1.05s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch 13/25 | Train Accuracy: 1.0 | Train Loss: 4.154761636629701\n",
+      "Validation Accuracy: 0.9358490705490112 auc 0.9067135050741607\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      " 60%|██████    | 15/25 [00:15<00:10,  1.05s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch 14/25 | Train Accuracy: 1.0 | Train Loss: 3.5454123290255666\n",
+      "Validation Accuracy: 0.9358490705490112 auc 0.9192037470725994\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      " 64%|██████▍   | 16/25 [00:17<00:09,  1.05s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch 15/25 | Train Accuracy: 1.0 | Train Loss: 3.1295745647512376\n",
+      "Validation Accuracy: 0.9358490705490112 auc 0.9311085089773614\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      " 68%|██████▊   | 17/25 [00:18<00:08,  1.04s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch 16/25 | Train Accuracy: 1.0 | Train Loss: 3.140789811965078\n",
+      "Validation Accuracy: 0.9358490705490112 auc 0.9449648711943794\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      " 72%|███████▏  | 18/25 [00:19<00:07,  1.04s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch 17/25 | Train Accuracy: 1.0 | Train Loss: 2.7704373160377145\n",
+      "Validation Accuracy: 0.9358490705490112 auc 0.9533567525370804\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      " 76%|███████▌  | 19/25 [00:20<00:06,  1.04s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch 18/25 | Train Accuracy: 1.0 | Train Loss: 3.2044623312540352\n",
+      "Validation Accuracy: 0.9320755004882812 auc 0.948087431693989\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      " 80%|████████  | 20/25 [00:21<00:05,  1.04s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch 19/25 | Train Accuracy: 1.0 | Train Loss: 2.732395632308908\n",
+      "Validation Accuracy: 0.9433962106704712 auc 0.9510148321623731\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      " 84%|████████▍ | 21/25 [00:22<00:04,  1.05s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch 20/25 | Train Accuracy: 1.0 | Train Loss: 2.5043671822641045\n",
+      "Validation Accuracy: 0.9433962106704712 auc 0.9535519125683061\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      " 88%|████████▊ | 22/25 [00:23<00:03,  1.05s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch 21/25 | Train Accuracy: 1.0 | Train Loss: 2.1203417778451694\n",
+      "Validation Accuracy: 0.9132075309753418 auc 0.961943793911007\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      " 92%|█████████▏| 23/25 [00:24<00:02,  1.06s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch 22/25 | Train Accuracy: 1.0 | Train Loss: 10.550110493495595\n",
+      "Validation Accuracy: 0.9169811606407166 auc 0.8948087431693988\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      " 96%|█████████▌| 24/25 [00:25<00:01,  1.06s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch 23/25 | Train Accuracy: 1.0 | Train Loss: 12.623157457801426\n",
+      "Validation Accuracy: 0.9207547307014465 auc 0.9137392661982825\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "100%|██████████| 25/25 [00:26<00:00,  1.06s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch 24/25 | Train Accuracy: 1.0 | Train Loss: 14.581157505754874\n",
+      "Validation Accuracy: 0.9396226406097412 auc 0.8467993754879001\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    }
+   ],
    "source": [
     "for epoch in trange(epochs):\n",
     "\n",
@@ -322,9 +722,19 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 13,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Graph(num_nodes={'client': 861, 'merchant': 482, 'transaction': 12318},\n",
+      "      num_edges={('client', 'buy', 'transaction'): 12318, ('merchant', 'sell', 'transaction'): 12318, ('transaction', 'bought', 'client'): 12318, ('transaction', 'issued', 'merchant'): 12318},\n",
+      "      metagraph=[('client', 'transaction', 'buy'), ('transaction', 'client', 'bought'), ('transaction', 'merchant', 'issued'), ('merchant', 'transaction', 'sell')])\n"
+     ]
+    }
+   ],
    "source": [
     "print(whole_graph)"
    ]
@@ -339,9 +749,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 14,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Final Test Accuracy: 0.9245283007621765 auc 0.8380171740827479\n"
+     ]
+    }
+   ],
    "source": [
     "# Create embeddings\n",
     "_, train_seeds, train_embedding = evaluate(model, train_loader, feature_tensors, target_node)\n",
@@ -377,7 +795,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 15,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -386,9 +804,53 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 16,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<style>#sk-container-id-1 {color: black;background-color: white;}#sk-container-id-1 pre{padding: 0;}#sk-container-id-1 div.sk-toggleable {background-color: white;}#sk-container-id-1 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-1 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-1 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-1 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-1 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-1 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-1 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-1 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-1 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-1 div.sk-item {position: relative;z-index: 1;}#sk-container-id-1 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-1 div.sk-item::before, #sk-container-id-1 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-1 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-1 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-1 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-1 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-1 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-1 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-1 div.sk-label-container {text-align: center;}#sk-container-id-1 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-1 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-1\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>XGBClassifier(base_score=None, booster=None, callbacks=None,\n",
+       "              colsample_bylevel=None, colsample_bynode=None,\n",
+       "              colsample_bytree=None, early_stopping_rounds=None,\n",
+       "              enable_categorical=False, eval_metric=None, feature_types=None,\n",
+       "              gamma=None, gpu_id=None, grow_policy=None, importance_type=None,\n",
+       "              interaction_constraints=None, learning_rate=None, max_bin=None,\n",
+       "              max_cat_threshold=None, max_cat_to_onehot=None,\n",
+       "              max_delta_step=None, max_depth=None, max_leaves=None,\n",
+       "              min_child_weight=None, missing=nan, monotone_constraints=None,\n",
+       "              n_estimators=100, n_jobs=None, num_parallel_tree=None,\n",
+       "              predictor=None, random_state=None, ...)</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-1\" type=\"checkbox\" checked><label for=\"sk-estimator-id-1\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">XGBClassifier</label><div class=\"sk-toggleable__content\"><pre>XGBClassifier(base_score=None, booster=None, callbacks=None,\n",
+       "              colsample_bylevel=None, colsample_bynode=None,\n",
+       "              colsample_bytree=None, early_stopping_rounds=None,\n",
+       "              enable_categorical=False, eval_metric=None, feature_types=None,\n",
+       "              gamma=None, gpu_id=None, grow_policy=None, importance_type=None,\n",
+       "              interaction_constraints=None, learning_rate=None, max_bin=None,\n",
+       "              max_cat_threshold=None, max_cat_to_onehot=None,\n",
+       "              max_delta_step=None, max_depth=None, max_leaves=None,\n",
+       "              min_child_weight=None, missing=nan, monotone_constraints=None,\n",
+       "              n_estimators=100, n_jobs=None, num_parallel_tree=None,\n",
+       "              predictor=None, random_state=None, ...)</pre></div></div></div></div></div>"
+      ],
+      "text/plain": [
+       "XGBClassifier(base_score=None, booster=None, callbacks=None,\n",
+       "              colsample_bylevel=None, colsample_bynode=None,\n",
+       "              colsample_bytree=None, early_stopping_rounds=None,\n",
+       "              enable_categorical=False, eval_metric=None, feature_types=None,\n",
+       "              gamma=None, gpu_id=None, grow_policy=None, importance_type=None,\n",
+       "              interaction_constraints=None, learning_rate=None, max_bin=None,\n",
+       "              max_cat_threshold=None, max_cat_to_onehot=None,\n",
+       "              max_delta_step=None, max_depth=None, max_leaves=None,\n",
+       "              min_child_weight=None, missing=nan, monotone_constraints=None,\n",
+       "              n_estimators=100, n_jobs=None, num_parallel_tree=None,\n",
+       "              predictor=None, random_state=None, ...)"
+      ]
+     },
+     "execution_count": 16,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "# Train XGBoost classifier on embedding vector\n",
     "classifier = XGBClassifier(n_estimators=100)\n",
@@ -405,7 +867,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 17,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -430,9 +892,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 18,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Final Test Accuracy: 0.9320754716981132 auc 0.9040788446526152\n"
+     ]
+    }
+   ],
    "source": [
     "acc, f_1, precision, recall, roc_auc, pr_auc, average_precision, _, _ = get_metrics(\n",
     "    xgb_pred, labels[inductive_idx].cpu().numpy(),  name='HinSAGE_XGB')\n",
@@ -465,7 +935,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 19,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -476,16 +946,24 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 20,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "graph.pkl  hyperparams.pkl  model.pt  xgb.pt\n"
+     ]
+    }
+   ],
    "source": [
     "!ls modelpath"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 21,
    "metadata": {},
    "outputs": [],
    "source": [

From af16410d8b8d6d8ad9eafc3d76e3e4b854e05f73 Mon Sep 17 00:00:00 2001
From: Eli Fajardo <efajardo@nvidia.com>
Date: Tue, 16 Jan 2024 12:34:11 -0500
Subject: [PATCH 6/6] fix typos

---
 .../fraud-detection-models/gnn-fraud-detection-training.ipynb | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/models/training-tuning-scripts/fraud-detection-models/gnn-fraud-detection-training.ipynb b/models/training-tuning-scripts/fraud-detection-models/gnn-fraud-detection-training.ipynb
index 5af696e719..7decd59636 100644
--- a/models/training-tuning-scripts/fraud-detection-models/gnn-fraud-detection-training.ipynb
+++ b/models/training-tuning-scripts/fraud-detection-models/gnn-fraud-detection-training.ipynb
@@ -153,7 +153,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# rearage test data index\n",
+    "# re-arange test data index\n",
     "last_train_index = train_data.index.max()+1\n",
     "inductive_data.index = np.arange(last_train_index, last_train_index + inductive_data.shape[0])\n",
     "inductive_data['index'] = inductive_data.index"
@@ -199,7 +199,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Split trainig, testing dataset\n",
+    "# Split into training, testing datasets\n",
     "train_data, test_data, train_idx, inductive_idx, labels, df = prepare_data(train_data, inductive_data)"
    ]
   },