diff --git a/models/training-tuning-scripts/log-parsing-models/log-parsing-training.ipynb b/models/training-tuning-scripts/log-parsing-models/log-parsing-training.ipynb
index e900f265d1..3fee317af1 100644
--- a/models/training-tuning-scripts/log-parsing-models/log-parsing-training.ipynb
+++ b/models/training-tuning-scripts/log-parsing-models/log-parsing-training.ipynb
@@ -66,16 +66,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#logs_df = cudf.read_csv(\"../../datasets/training-data/log-parsing-training-data.csv\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [
     {
@@ -120,59 +111,56 @@
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
-       "      <th>2410</th>\n",
+       "      <th>257</th>\n",
        "      <td>&lt;NA&gt;</td>\n",
        "      <td>&lt;NA&gt;</td>\n",
-       "      <td>188.138.56.91 - - [17/May/2018:06:28:15 +0200]...</td>\n",
-       "      <td>188.138.56.91</td>\n",
+       "      <td>158.69.5.181 - - [04/Apr/2018:23:06:49 +0200] ...</td>\n",
+       "      <td>158.69.5.181</td>\n",
+       "      <td>-</td>\n",
        "      <td>-</td>\n",
        "      <td>-</td>\n",
-       "      <td>http://www.almhuette-raith.at/</td>\n",
-       "      <td>Mozilla/5.0 (X11; U; Linux x86_64; de-DE; rv:1...</td>\n",
-       "      <td>ThumbShotsBot</td>\n",
-       "      <td>Linux</td>\n",
+       "      <td>-</td>\n",
+       "      <td>Other</td>\n",
+       "      <td>Other</td>\n",
        "      <td>&lt;NA&gt;</td>\n",
        "      <td>1.1</td>\n",
-       "      <td>GET</td>\n",
-       "      <td>/images/stories/slideshow/almhuette_raith_01.jpg</td>\n",
-       "      <td>88161</td>\n",
+       "      <td>POST</td>\n",
+       "      <td>/administrator/index.php</td>\n",
+       "      <td>4498</td>\n",
        "      <td>200</td>\n",
-       "      <td>[17/May/2018:06:28:15 +0200]</td>\n",
+       "      <td>[04/Apr/2018:23:06:49 +0200]</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "text/plain": [
-       "     error_level error_message  \\\n",
-       "2410        <NA>          <NA>   \n",
-       "\n",
-       "                                                    raw    remote_host  \\\n",
-       "2410  188.138.56.91 - - [17/May/2018:06:28:15 +0200]...  188.138.56.91   \n",
+       "    error_level error_message  \\\n",
+       "257        <NA>          <NA>   \n",
        "\n",
-       "     remote_logname remote_user          request_header_referer  \\\n",
-       "2410              -           -  http://www.almhuette-raith.at/   \n",
+       "                                                   raw   remote_host  \\\n",
+       "257  158.69.5.181 - - [04/Apr/2018:23:06:49 +0200] ...  158.69.5.181   \n",
        "\n",
-       "                              request_header_user_agent  \\\n",
-       "2410  Mozilla/5.0 (X11; U; Linux x86_64; de-DE; rv:1...   \n",
+       "    remote_logname remote_user request_header_referer  \\\n",
+       "257              -           -                      -   \n",
        "\n",
-       "     request_header_user_agent__browser__family  \\\n",
-       "2410                              ThumbShotsBot   \n",
+       "    request_header_user_agent request_header_user_agent__browser__family  \\\n",
+       "257                         -                                      Other   \n",
        "\n",
-       "     request_header_user_agent__os__family  \\\n",
-       "2410                                 Linux   \n",
+       "    request_header_user_agent__os__family  \\\n",
+       "257                                 Other   \n",
        "\n",
-       "     request_header_user_agent__os__version_string  request_http_ver  \\\n",
-       "2410                                          <NA>               1.1   \n",
+       "    request_header_user_agent__os__version_string  request_http_ver  \\\n",
+       "257                                          <NA>               1.1   \n",
        "\n",
-       "     request_method                                       request_url  \\\n",
-       "2410            GET  /images/stories/slideshow/almhuette_raith_01.jpg   \n",
+       "    request_method               request_url response_bytes_clf status  \\\n",
+       "257           POST  /administrator/index.php               4498    200   \n",
        "\n",
-       "     response_bytes_clf status                 time_received  \n",
-       "2410              88161    200  [17/May/2018:06:28:15 +0200]  "
+       "                    time_received  \n",
+       "257  [04/Apr/2018:23:06:49 +0200]  "
       ]
      },
-     "execution_count": 3,
+     "execution_count": 4,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -184,7 +172,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [
     {
@@ -203,7 +191,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -215,7 +203,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -227,7 +215,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -259,7 +247,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -276,6 +264,24 @@
     "    labels.append(labeler(indx, cols))"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Model Size\n",
+    "Choose the maximum number of tokens and the overlap(stride) for your model. The tokenizer will split up logger logs and they will go through the model separately. There is a speed-tradeoff with smaller models inferencing faster, but potentially containing errors. You may need to experiment with these parameters."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "MAX_SEQ_LEN = 128\n",
+    "STRIDE = 12"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -286,7 +292,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 11,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -307,13 +313,17 @@
     "        for i, tag in enumerate(tags):\n",
     "            temp_tags.append(tag)\n",
     "            temp_tags.extend('X'* subword_counts[i].item())\n",
-    "        subword_labels.append(temp_tags)\n",
+    "        if len(temp_tags) > MAX_SEQ_LEN:\n",
+    "            split_temp_tags = [temp_tags[i:i+MAX_SEQ_LEN] for i in range(0, len(temp_tags)-STRIDE, MAX_SEQ_LEN-STRIDE)]\n",
+    "            subword_labels.extend(split_temp_tags)\n",
+    "        else:\n",
+    "            subword_labels.append(temp_tags)\n",
     "    return subword_labels"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 12,
    "metadata": {},
    "outputs": [
     {
@@ -339,7 +349,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 13,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -355,7 +365,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 14,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -366,11 +376,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 15,
    "metadata": {},
    "outputs": [],
    "source": [
-    "padded_labels = [pad(x[:256], '[PAD]', 256) for x in subword_labels]\n",
+    "padded_labels = [pad(x[:MAX_SEQ_LEN], '[PAD]', MAX_SEQ_LEN) for x in subword_labels]\n",
     "int_labels = [[label2id.get(l) for l in lab] for lab in padded_labels]\n",
     "label_tensor = torch.tensor(int_labels).to('cuda')"
    ]
@@ -385,14 +395,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 17,
    "metadata": {},
    "outputs": [],
    "source": [
     "output = tokenizer(logs_df.raw_preprocess,\n",
-    "          max_length=256,\n",
-    "          truncation=True,\n",
-    "          max_num_rows = len(logs_df.raw_preprocess),\n",
+    "          max_length=MAX_SEQ_LEN,\n",
+    "          stride = STRIDE,\n",
+    "          truncation=False,\n",
+    "          max_num_rows = len(logs_df.raw_preprocess)*3,\n",
     "          add_special_tokens=False,\n",
     "          return_tensors='pt'\n",
     "     )\n",
@@ -403,7 +414,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 18,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -413,7 +424,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 19,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -426,7 +437,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 20,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -445,14 +456,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 21,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForTokenClassification: ['cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias']\n",
+      "Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForTokenClassification: ['cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias']\n",
       "- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
       "- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
       "Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']\n",
@@ -480,7 +491,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 22,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -504,39 +515,37 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 23,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "Epoch:   0%|          | 0/2 [00:00<?, ?it/s]/opt/conda/envs/rapids/lib/python3.8/site-packages/torch/nn/parallel/_functions.py:64: UserWarning: Was asked to gather along dimension 0, but all input tensors were scalars; will instead unsqueeze and return a vector.\n",
-      "  warnings.warn('Was asked to gather along dimension 0, but all '\n",
-      "Epoch:  50%|█████     | 1/2 [00:34<00:34, 34.13s/it]"
+      "Epoch:  50%|█████     | 1/2 [00:35<00:35, 35.41s/it]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Train loss: 0.8918188477100264\n"
+      "Train loss: 0.18636336472931586\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "Epoch: 100%|██████████| 2/2 [00:58<00:00, 29.18s/it]"
+      "Epoch: 100%|██████████| 2/2 [01:10<00:00, 35.27s/it]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Train loss: 0.027731418560799814\n",
-      "CPU times: user 1min 31s, sys: 33.8 s, total: 2min 5s\n",
-      "Wall time: 58.4 s\n"
+      "Train loss: 0.0059268270875965185\n",
+      "CPU times: user 44.8 s, sys: 25.7 s, total: 1min 10s\n",
+      "Wall time: 1min 10s\n"
      ]
     },
     {
@@ -587,7 +596,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": 24,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -597,33 +606,33 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": 25,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "f1 score: 1.000000\n",
-      "Accuracy score: 1.000000\n",
+      "f1 score: 0.998655\n",
+      "Accuracy score: 0.999771\n",
       "                                               precision    recall  f1-score   support\n",
       "\n",
-      "                                  error_level      1.000     1.000     1.000        87\n",
-      "                                error_message      1.000     1.000     1.000        87\n",
-      "                                  remote_host      1.000     1.000     1.000       905\n",
-      "                       request_header_referer      1.000     1.000     1.000       498\n",
-      "                    request_header_user_agent      1.000     1.000     1.000       851\n",
-      "request_header_user_agent__os__version_string      1.000     1.000     1.000        18\n",
-      "                             request_http_ver      1.000     1.000     1.000       905\n",
-      "                               request_method      1.000     1.000     1.000       905\n",
-      "                                  request_url      1.000     1.000     1.000       905\n",
-      "                           response_bytes_clf      1.000     1.000     1.000       904\n",
-      "                                       status      1.000     1.000     1.000       905\n",
-      "                                time_received      1.000     1.000     1.000       964\n",
+      "                                  error_level      1.000     1.000     1.000       100\n",
+      "                                error_message      1.000     1.000     1.000       100\n",
+      "                                  remote_host      1.000     1.000     1.000       913\n",
+      "                       request_header_referer      1.000     1.000     1.000       508\n",
+      "                    request_header_user_agent      1.000     1.000     1.000      1002\n",
+      "request_header_user_agent__os__version_string      0.875     1.000     0.933        14\n",
+      "                             request_http_ver      1.000     1.000     1.000       913\n",
+      "                               request_method      1.000     1.000     1.000       913\n",
+      "                                  request_url      0.997     0.981     0.989       913\n",
+      "                           response_bytes_clf      1.000     1.000     1.000       911\n",
+      "                                       status      1.000     1.000     1.000       912\n",
+      "                                time_received      1.000     1.000     1.000       985\n",
       "\n",
-      "                                    micro avg      1.000     1.000     1.000      7934\n",
-      "                                    macro avg      1.000     1.000     1.000      7934\n",
-      "                                 weighted avg      1.000     1.000     1.000      7934\n",
+      "                                    micro avg      0.999     0.998     0.999      8184\n",
+      "                                    macro avg      0.989     0.998     0.994      8184\n",
+      "                                 weighted avg      0.999     0.998     0.999      8184\n",
       "\n"
      ]
     }