diff --git a/AUTHORS.md b/AUTHORS.md
index 0c480792e2..966987f430 100644
--- a/AUTHORS.md
+++ b/AUTHORS.md
@@ -41,6 +41,9 @@ To contributors: please add your name to the list when you submit a patch to the
 * **[Aaron He](https://github.com/AaronHeee)**
    * Reco utils of NCF
    * Deep dive notebook demonstrating the use of NCF
+* **[Abir Chakraborty](https://github.com/aeroabir)**
+   * Self-Attentive Sequential Recommendation (SASRec)
+   * Sequential Recommendation Via Personalized Transformer (SSEPT)
 * **[Alexandros Ioannou](https://github.com/aioannou96)**
    * Standard VAE algorithm 
    * Multinomial VAE algorithm  
diff --git a/examples/00_quick_start/sasrec_amazon.ipynb b/examples/00_quick_start/sasrec_amazon.ipynb
index aa3edea2c0..7df6f965bc 100644
--- a/examples/00_quick_start/sasrec_amazon.ipynb
+++ b/examples/00_quick_start/sasrec_amazon.ipynb
@@ -30,38 +30,35 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/anaconda/envs/py38/lib/python3.8/site-packages/papermill/iorw.py:50: FutureWarning: pyarrow.HadoopFileSystem is deprecated as of 2.0.0, please use pyarrow.fs.HadoopFileSystem instead.\n",
-      "  from pyarrow import HadoopFileSystem\n"
-     ]
-    },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "System version: 3.8.12 (default, Oct 12 2021, 13:49:34) \n",
+      "System version: 3.7.11 (default, Jul 27 2021, 14:32:16) \n",
       "[GCC 7.5.0]\n",
-      "Tensorflow version: 2.7.0\n"
+      "Tensorflow version: 2.7.1\n"
      ]
     }
    ],
    "source": [
+    "import re\n",
     "import sys\n",
     "import os\n",
     "import scrapbook as sb\n",
+    "from tempfile import TemporaryDirectory\n",
+    "import numpy as np\n",
+    "import pandas as pd \n",
+    "\n",
     "from collections import defaultdict\n",
     "import tensorflow as tf\n",
     "tf.get_logger().setLevel('ERROR') # only show error messages\n",
     "\n",
     "from recommenders.utils.timer import Timer\n",
-    "from recommenders.datasets.amazon_reviews import download_and_extract\n",
-    "from recommenders.datasets.amazon_reviews import _reviews_preprocessing\n",
+    "from recommenders.datasets.amazon_reviews import get_review_data\n",
+    "from recommenders.datasets.split_utils import min_rating_filter_pandas\n",
     "\n",
     "# Transformer Based Models\n",
     "from recommenders.models.sasrec.model import SASREC\n",
@@ -83,7 +80,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 9,
    "metadata": {
     "tags": [
      "parameters"
@@ -96,145 +93,90 @@
     "RANDOM_SEED = 100  # Set None for non-deterministic result\n",
     "\n",
     "# data_dir = os.path.join(\"tests\", \"recsys_data\", \"RecSys\", \"SASRec-tf2\", \"data\")\n",
-    "data_dir = os.path.join(\"tests\", \"resources\", \"deeprec\", \"sasrec\")\n",
+    "data_dir = os.path.join(\"..\", \"..\", \"tests\", \"resources\", \"deeprec\", \"sasrec\")\n",
     "\n",
-    "# Amazon Electronics Data (already converted into integer user-ids and item-ids)\n",
+    "# Amazon Electronics Data\n",
     "dataset = \"reviews_Electronics_5\"\n",
     "\n",
-    "lr = 0.001          # learning rate\n",
-    "maxlen = 50         # maximum sequence length for each user\n",
-    "num_blocks = 2      # number of transformer blocks\n",
-    "hidden_units = 100  # number of units in the attention calculation\n",
-    "num_heads = 1       # number of attention heads\n",
-    "dropout_rate = 0.1  # dropout rate\n",
-    "l2_emb = 0.0        # L2 regularization coefficient\n",
-    "num_neg_test = 100  # number of negative examples per positive example"
+    "lr = 0.001             # learning rate\n",
+    "maxlen = 50            # maximum sequence length for each user\n",
+    "num_blocks = 2         # number of transformer blocks\n",
+    "hidden_units = 100     # number of units in the attention calculation\n",
+    "num_heads = 1          # number of attention heads\n",
+    "dropout_rate = 0.1     # dropout rate\n",
+    "l2_emb = 0.0           # L2 regularization coefficient\n",
+    "num_neg_test = 100     # number of negative examples per positive example"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 10,
    "metadata": {},
    "outputs": [],
    "source": [
-    "def data_process_with_time(fname, pname, K=10, sep=\" \", item_set=None, add_time=False):\n",
-    "    User = defaultdict(list)\n",
-    "    Users = set()\n",
-    "    Items = set()\n",
-    "    user_dict, item_dict = {}, {}\n",
-    "\n",
-    "    item_counter = defaultdict(lambda: 0)\n",
-    "    user_counter = defaultdict(lambda: 0)\n",
-    "    with open(fname, \"r\") as fr:\n",
-    "        for line in fr:\n",
-    "            u, i, t = line.rstrip().split(sep)\n",
-    "            User[u].append((i, t))\n",
-    "            Items.add(i)\n",
-    "            Users.add(u)\n",
-    "            item_counter[i] += 1\n",
-    "            user_counter[u] += 1\n",
-    "\n",
-    "    # remove items with less than K interactions\n",
-    "    print(f\"Read {len(User)} users and {len(Items)} items\")\n",
-    "    remove_items = set()\n",
-    "    count_remove, count_missing = 0, 0\n",
-    "    for item in Items:\n",
-    "        if item_counter[item] < K:\n",
-    "            count_remove += 1\n",
-    "            remove_items.add(item)\n",
-    "        elif item_set and item not in item_set:\n",
-    "            count_missing += 1\n",
-    "            remove_items.add(item)\n",
-    "\n",
-    "    if count_remove > 0:\n",
-    "        print(f\"{count_remove} items have less than {K} interactions\")\n",
-    "\n",
-    "    if count_missing > 0:\n",
-    "        print(f\"{count_missing} items are not in the meta data\")\n",
-    "\n",
-    "    Items = Items - remove_items\n",
-    "\n",
-    "    # remove users with less than K interactions\n",
-    "    remove_users = set()\n",
-    "    count_remove = 0\n",
-    "    # Users = set(User.keys())\n",
-    "    for user in Users:\n",
-    "        if user_counter[user] < K:\n",
-    "            remove_users.add(user)\n",
-    "            count_remove += 1\n",
-    "    if count_remove > 0:\n",
-    "        print(f\"{count_remove} users have less than {K} interactions\")\n",
-    "        Users = Users - remove_users\n",
-    "\n",
-    "    print(f\"Total {len(Users)} users and {len(Items)} items\")\n",
-    "    item_count = 1\n",
-    "    for item in Items:\n",
-    "        item_dict[item] = item_count\n",
-    "        item_count += 1\n",
-    "\n",
-    "    count_del = 0\n",
-    "    user_count = 1\n",
-    "    with open(pname, \"w\") as fw:\n",
-    "        for user in Users:\n",
-    "            items = User[user]\n",
-    "            items = [tup for tup in items if tup[0] in Items]\n",
-    "            if len(items) < K:\n",
-    "                # del User[user]\n",
-    "                count_del += 1\n",
-    "            else:\n",
-    "                user_dict[user] = user_count\n",
-    "                # sort by time\n",
-    "                items = sorted(items, key=lambda x: x[1])\n",
-    "\n",
-    "                # replace by the item-code\n",
-    "                timestamps = [x[1] for x in items]\n",
-    "                items = [item_dict[x[0]] for x in items]\n",
-    "                for i, t in zip(items, timestamps):\n",
-    "                    out_txt = [str(user_count), str(i)]\n",
-    "                    if add_time:\n",
-    "                        out_txt.append(str(t))\n",
-    "                    fw.write(sep.join(out_txt) + \"\\n\")\n",
-    "                user_count += 1\n",
+    "model_name = 'sasrec'  # 'sasrec' or 'ssept'\n",
+    "reviews_name = dataset + '.json'\n",
+    "outfile = dataset + '.txt'\n",
     "\n",
-    "    print(f\"Total {user_count-1} users, {count_del} removed\")\n",
-    "    print(f\"Processed model input data in {pname}\")\n",
-    "    return user_dict, item_dict\n"
+    "reviews_file = os.path.join(data_dir, reviews_name)\n",
+    "if not os.path.exists(reviews_file):\n",
+    "    reviews_output = get_review_data(reviews_file)\n",
+    "else:\n",
+    "    reviews_output = os.path.join(data_dir, dataset+\".json_output\")"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 11,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "100%|██████████| 484k/484k [02:31<00:00, 3.20kKB/s] \n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Read 192403 users and 63001 items\n",
-      "27927 items have less than 10 interactions\n",
-      "147178 users have less than 10 interactions\n",
-      "Total 45225 users and 35074 items\n",
-      "Total 36262 users, 8963 removed\n",
-      "Processed model input data in recsys_data/RecSys/SASRec-tf2/data/reviews_Electronics_5.txt\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
-    "reviews_name = dataset + '.json'\n",
-    "outfile = dataset + '.txt'\n",
-    "\n",
-    "reviews_file = os.path.join(data_dir, reviews_name)\n",
-    "download_and_extract(reviews_name, reviews_file)\n",
-    "reviews_output = _reviews_preprocessing(reviews_file)\n",
-    "udict, idict = data_process_with_time(reviews_output,\n",
-    "                                      os.path.join(data_dir, outfile), K=10, sep=\"\\t\")\n"
+    "def filter_K_core(data, core_num=0, col_user=\"userID\", col_item=\"itemID\"):\n",
+    "    \"\"\"Filter rating dataframe for minimum number of users and items by \n",
+    "    repeatedly applying min_rating_filter until the condition is satisfied. \n",
+    "        \n",
+    "    \"\"\"\n",
+    "    num_users, num_items = len(data[col_user].unique()), len(data[col_item].unique())\n",
+    "    print(f\"Original: {num_users} users and {num_items} items\")\n",
+    "    df = data.copy()\n",
+    "\n",
+    "    if core_num > 0:\n",
+    "        while True:\n",
+    "            df = min_rating_filter_pandas(df, min_rating=core_num, filter_by=\"item\")\n",
+    "            df = min_rating_filter_pandas(df, min_rating=core_num, filter_by=\"user\")\n",
+    "            count_u = df.groupby(col_user)[col_item].count()\n",
+    "            count_i = df.groupby(col_item)[col_user].count()\n",
+    "            if len(count_i[count_i < core_num]) == 0 and len(count_u[count_u < core_num]) == 0:\n",
+    "                break\n",
+    "    df = df.sort_values(by=[col_user])\n",
+    "    print(f\"Final: {len(df[col_user].unique())} users and {len(df[col_item].unique())} items\")\n",
+    "\n",
+    "    return df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "if not os.path.exists(os.path.join(data_dir, outfile)):\n",
+    "    df = pd.read_csv(reviews_output, sep=\"\\t\", names=[\"userID\", \"itemID\", \"time\"])\n",
+    "    df = filter_K_core(df, 10)\n",
+    "    \n",
+    "    user_set, item_set = set(df['userID'].unique()), set(df['itemID'].unique())\n",
+    "    user_map = dict()\n",
+    "    item_map = dict()\n",
+    "    for u, user in enumerate(user_set):\n",
+    "        user_map[user] = u+1\n",
+    "    for i, item in enumerate(item_set):\n",
+    "        item_map[item] = i+1\n",
+    "    \n",
+    "    df[\"userID\"] = df[\"userID\"].apply(lambda x: user_map[x])\n",
+    "    df[\"itemID\"] = df[\"itemID\"].apply(lambda x: item_map[x])\n",
+    "    df = df.sort_values(by=[\"userID\", \"time\"])\n",
+    "    df.drop(columns=[\"time\"], inplace=True)\n",
+    "    df.to_csv(os.path.join(data_dir, outfile), sep=\"\\t\", header=False, index=False)"
    ]
   },
   {
@@ -284,7 +226,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 13,
    "metadata": {},
    "outputs": [
     {
@@ -336,78 +278,39 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 15,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "2021-10-07 10:16:04.997837: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcuda.so.1\n",
-      "2021-10-07 10:16:05.007240: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1716] Found device 0 with properties: \n",
-      "pciBusID: 0001:00:00.0 name: Tesla K80 computeCapability: 3.7\n",
-      "coreClock: 0.8235GHz coreCount: 13 deviceMemorySize: 11.17GiB deviceMemoryBandwidth: 223.96GiB/s\n",
-      "2021-10-07 10:16:05.007272: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudart.so.10.1\n",
-      "2021-10-07 10:16:05.009754: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcublas.so.10\n",
-      "2021-10-07 10:16:05.011395: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcufft.so.10\n",
-      "2021-10-07 10:16:05.011757: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcurand.so.10\n",
-      "2021-10-07 10:16:05.013467: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcusolver.so.10\n",
-      "2021-10-07 10:16:05.014335: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcusparse.so.10\n",
-      "2021-10-07 10:16:05.018536: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudnn.so.7\n",
-      "2021-10-07 10:16:05.019445: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1858] Adding visible gpu devices: 0\n",
-      "2021-10-07 10:16:05.020033: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN)to use the following CPU instructions in performance-critical operations:  AVX2 FMA\n",
-      "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
-      "2021-10-07 10:16:05.026869: I tensorflow/core/platform/profile_utils/cpu_utils.cc:104] CPU Frequency: 2596990000 Hz\n",
-      "2021-10-07 10:16:05.027667: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x55d8124eefb0 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
-      "2021-10-07 10:16:05.027684: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): Host, Default Version\n",
-      "2021-10-07 10:16:05.137748: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x55d81236d2e0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n",
-      "2021-10-07 10:16:05.137778: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): Tesla K80, Compute Capability 3.7\n",
-      "2021-10-07 10:16:05.138278: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1716] Found device 0 with properties: \n",
-      "pciBusID: 0001:00:00.0 name: Tesla K80 computeCapability: 3.7\n",
-      "coreClock: 0.8235GHz coreCount: 13 deviceMemorySize: 11.17GiB deviceMemoryBandwidth: 223.96GiB/s\n",
-      "2021-10-07 10:16:05.138332: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudart.so.10.1\n",
-      "2021-10-07 10:16:05.138373: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcublas.so.10\n",
-      "2021-10-07 10:16:05.138400: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcufft.so.10\n",
-      "2021-10-07 10:16:05.138425: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcurand.so.10\n",
-      "2021-10-07 10:16:05.138450: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcusolver.so.10\n",
-      "2021-10-07 10:16:05.138475: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcusparse.so.10\n",
-      "2021-10-07 10:16:05.138502: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudnn.so.7\n",
-      "2021-10-07 10:16:05.139142: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1858] Adding visible gpu devices: 0\n",
-      "2021-10-07 10:16:05.139191: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudart.so.10.1\n",
-      "2021-10-07 10:16:05.553534: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1257] Device interconnect StreamExecutor with strength 1 edge matrix:\n",
-      "2021-10-07 10:16:05.553578: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1263]      0 \n",
-      "2021-10-07 10:16:05.553586: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1276] 0:   N \n",
-      "2021-10-07 10:16:05.554520: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1402] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 205 MB memory) -> physical GPU (device: 0, name: Tesla K80, pci bus id: 0001:00:00.0, compute capability: 3.7)\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
-    "model = SASREC(item_num=data.itemnum,\n",
-    "               seq_max_len=maxlen,\n",
-    "               num_blocks=num_blocks,\n",
-    "               embedding_dim=hidden_units,\n",
-    "               attention_dim=hidden_units,\n",
-    "               attention_num_heads=num_heads,\n",
-    "               dropout_rate=dropout_rate,\n",
-    "               conv_dims = [100, 100],\n",
-    "               l2_reg=l2_emb,\n",
-    "               num_neg_test=num_neg_test\n",
-    ")\n",
-    "\n",
-    "# model = SSEPT(item_num=data.itemnum,\n",
-    "#               user_num=data.usernum,\n",
-    "#               seq_max_len=maxlen,\n",
-    "#               num_blocks=num_blocks,\n",
-    "#               # embedding_dim=hidden_units,  # optional\n",
-    "#               user_embedding_dim=hidden_units,\n",
-    "#               item_embedding_dim=hidden_units,\n",
-    "#               attention_dim=hidden_units,\n",
-    "#               attention_num_heads=num_heads,\n",
-    "#               dropout_rate=dropout_rate,\n",
-    "#               conv_dims = [200, 200],\n",
-    "#               l2_reg=l2_emb,\n",
-    "#               num_neg_test=num_neg_test\n",
-    "# )\n"
+    "if model_name == 'sasrec':\n",
+    "    model = SASREC(item_num=data.itemnum,\n",
+    "                   seq_max_len=maxlen,\n",
+    "                   num_blocks=num_blocks,\n",
+    "                   embedding_dim=hidden_units,\n",
+    "                   attention_dim=hidden_units,\n",
+    "                   attention_num_heads=num_heads,\n",
+    "                   dropout_rate=dropout_rate,\n",
+    "                   conv_dims = [100, 100],\n",
+    "                   l2_reg=l2_emb,\n",
+    "                   num_neg_test=num_neg_test\n",
+    "    )\n",
+    "elif model_name == \"ssept\":\n",
+    "    model = SSEPT(item_num=data.itemnum,\n",
+    "                  user_num=data.usernum,\n",
+    "                  seq_max_len=maxlen,\n",
+    "                  num_blocks=num_blocks,\n",
+    "                  # embedding_dim=hidden_units,  # optional\n",
+    "                  user_embedding_dim=hidden_units,\n",
+    "                  item_embedding_dim=hidden_units,\n",
+    "                  attention_dim=hidden_units,\n",
+    "                  attention_num_heads=num_heads,\n",
+    "                  dropout_rate=dropout_rate,\n",
+    "                  conv_dims = [200, 200],\n",
+    "                  l2_reg=l2_emb,\n",
+    "                  num_neg_test=num_neg_test\n",
+    "    )\n",
+    "else:\n",
+    "    print(f\"Model-{model_name} not found\")"
    ]
   },
   {
@@ -423,7 +326,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 16,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -445,314 +348,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 17,
    "metadata": {},
    "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "  0%|                                          | 0/493 [00:00<?, ?b/s]2021-10-07 10:16:10.744900: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcublas.so.10\n",
-      "2021-10-07 10:16:10.909178: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudnn.so.7\n",
-      "2021-10-07 10:16:11.491254: W tensorflow/core/common_runtime/bfc_allocator.cc:246] Allocator (GPU_0_bfc) ran out of memory trying to allocate 750.86MiB with freed_by_count=0. The caller indicates that this is not a failure, but may mean that there could be performance gains if more memory were available.\n",
-      "2021-10-07 10:16:11.506965: W tensorflow/core/common_runtime/bfc_allocator.cc:246] Allocator (GPU_0_bfc) ran out of memory trying to allocate 843.44MiB with freed_by_count=0. The caller indicates that this is not a failure, but may mean that there could be performance gains if more memory were available.\n",
-      "2021-10-07 10:16:11.511466: W tensorflow/core/common_runtime/bfc_allocator.cc:246] Allocator (GPU_0_bfc) ran out of memory trying to allocate 750.86MiB with freed_by_count=0. The caller indicates that this is not a failure, but may mean that there could be performance gains if more memory were available.\n",
-      "                                                                      \r"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Evaluating...\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "                                                                      \r"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "epoch: 1, time: 35.36634353798581, valid (NDCG@10: 0.31063965706049146, HR@10: 0.496)\n",
-      "epoch: 1, time: 35.36634353798581,  test (NDCG@10: 0.258061524289867, HR@10: 0.4166)\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "                                                                      \r"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Evaluating...\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "                                                                      \r"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "epoch: 2, time: 64.9202349189436, valid (NDCG@10: 0.3173033681000527, HR@10: 0.5095)\n",
-      "epoch: 2, time: 64.9202349189436,  test (NDCG@10: 0.2719324064816851, HR@10: 0.4409)\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "                                                                      \r"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Evaluating...\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "                                                                      \r"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "epoch: 3, time: 94.63635901996167, valid (NDCG@10: 0.3370665067369662, HR@10: 0.5313)\n",
-      "epoch: 3, time: 94.63635901996167,  test (NDCG@10: 0.2863327472381085, HR@10: 0.4691)\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "                                                                      \r"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Evaluating...\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "                                                                      \r"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "epoch: 4, time: 124.40015883598244, valid (NDCG@10: 0.3488540643723714, HR@10: 0.5469)\n",
-      "epoch: 4, time: 124.40015883598244,  test (NDCG@10: 0.29684509398024406, HR@10: 0.4794)\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "                                                                      \r"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Evaluating...\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "                                                                      \r"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "epoch: 5, time: 154.30854103900492, valid (NDCG@10: 0.3512199870530204, HR@10: 0.5557)\n",
-      "epoch: 5, time: 154.30854103900492,  test (NDCG@10: 0.2970581866988732, HR@10: 0.4801)\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "                                                                      \r"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Evaluating...\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "                                                                      \r"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "epoch: 6, time: 183.94312521297252, valid (NDCG@10: 0.3450110516424811, HR@10: 0.5451)\n",
-      "epoch: 6, time: 183.94312521297252,  test (NDCG@10: 0.29914819393886066, HR@10: 0.4856)\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "                                                                      \r"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Evaluating...\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "                                                                      \r"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "epoch: 7, time: 213.735454595997, valid (NDCG@10: 0.35821412112983164, HR@10: 0.56)\n",
-      "epoch: 7, time: 213.735454595997,  test (NDCG@10: 0.3031587964198979, HR@10: 0.492)\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "                                                                      \r"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Evaluating...\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "                                                                      \r"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "epoch: 8, time: 243.46382136800094, valid (NDCG@10: 0.35507303137777174, HR@10: 0.5539)\n",
-      "epoch: 8, time: 243.46382136800094,  test (NDCG@10: 0.2976049963712675, HR@10: 0.4836)\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "                                                                      \r"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Evaluating...\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "                                                                      \r"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "epoch: 9, time: 273.1792358889943, valid (NDCG@10: 0.35494519094588645, HR@10: 0.5549)\n",
-      "epoch: 9, time: 273.1792358889943,  test (NDCG@10: 0.30153903488005235, HR@10: 0.4819)\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "                                                                      \r"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Evaluating...\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "                                                                      \r"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "epoch: 10, time: 303.08915990800597, valid (NDCG@10: 0.35545429006069795, HR@10: 0.5567)\n",
-      "epoch: 10, time: 303.08915990800597,  test (NDCG@10: 0.30701897641406106, HR@10: 0.492)\n"
-     ]
-    },
     {
      "name": "stderr",
      "output_type": "stream",
@@ -765,7 +363,8 @@
      "output_type": "stream",
      "text": [
       "\n",
-      "epoch: 10, test (NDCG@10: 0.30936434525860257, HR@10: 0.4946)\n"
+      "epoch: 5, test (NDCG@10: 0.3410638795485906, HR@10: 0.5421)\n",
+      "Time cost for training is 7.36 mins\n"
      ]
     },
     {
@@ -778,21 +377,21 @@
    ],
    "source": [
     "with Timer() as train_time:\n",
-    "    t_test = model.train(data, sampler, num_epochs=10, batch_size=batch_size, lr=lr, val_epoch=1)\n",
+    "    t_test = model.train(data, sampler, num_epochs=num_epochs, batch_size=batch_size, lr=lr, val_epoch=6)\n",
     "\n",
-    "# t_test = train(model, dataset, sampler, num_epochs=10, maxlen=maxlen, num_neg_test=num_neg_test, batch_size=batch_size, lr=lr)"
+    "print('Time cost for training is {0:.2f} mins'.format(train_time.interval/60.0))"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 18,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "{'ndcg@10': 0.30936434525860257, 'Hit@10': 0.4946}\n"
+      "{'ndcg@10': 0.3410638795485906, 'Hit@10': 0.5421}\n"
      ]
     }
    ],
@@ -803,13 +402,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 19,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "application/scrapbook.scrap.json+json": {
-       "data": 0.30936434525860257,
+       "data": 0.3410638795485906,
        "encoder": "json",
        "name": "ndcg@10",
        "version": 1
@@ -827,7 +426,7 @@
     {
      "data": {
       "application/scrapbook.scrap.json+json": {
-       "data": 0.4946,
+       "data": 0.5421,
        "encoder": "json",
        "name": "Hit@10",
        "version": 1
@@ -866,22 +465,15 @@
     "\n",
     "\\[5\\] Zeping Yu, Jianxun Lian, Ahmad Mahmoody, Gongshen Liu, Xing Xie. Adaptive User Modeling with Long and Short-Term Preferences for Personailzed Recommendation. In Proceedings of the 28th International Joint Conferences on Artificial Intelligence, IJCAI’19, Pages 4213-4219. AAAI Press, 2019."
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
   }
  ],
  "metadata": {
   "celltoolbar": "Tags",
   "interpreter": {
-   "hash": "74e1e608f292f7dca250d57ef9fee6215d6d7d71f8af2f8fa1898ee2c2a10179"
+   "hash": "adf311e09e3d70e4b770d653e66a69805c21f44d471e9851e226c4ddc6ad9826"
   },
   "kernelspec": {
-   "display_name": "recoenv_tf2p6",
+   "display_name": "Python (reco-new)",
    "language": "python",
    "name": "python3"
   },
@@ -895,7 +487,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.12"
+   "version": "3.7.11"
   }
  },
  "nbformat": 4,
diff --git a/recommenders/datasets/amazon_reviews.py b/recommenders/datasets/amazon_reviews.py
index b37da7c974..5cb1bed243 100644
--- a/recommenders/datasets/amazon_reviews.py
+++ b/recommenders/datasets/amazon_reviews.py
@@ -17,6 +17,19 @@
 logger = logging.getLogger()
 
 
+def get_review_data(reviews_file):
+    """Downloads amazon review data (only), prepares in the required format
+    and stores in the same location
+
+    Args:
+        reviews_file (str): Filename for downloaded reviews dataset.
+    """
+    reviews_name = reviews_file.split("/")[-1]  # *.json (for url)
+    download_and_extract(reviews_name, reviews_file)
+    reviews_output = _reviews_preprocessing(reviews_file)
+    return reviews_output
+
+
 def data_preprocessing(
     reviews_file,
     meta_file,
diff --git a/tests/unit/examples/test_notebooks_pyspark.py b/tests/unit/examples/test_notebooks_pyspark.py
index 6515002ea7..b065537509 100644
--- a/tests/unit/examples/test_notebooks_pyspark.py
+++ b/tests/unit/examples/test_notebooks_pyspark.py
@@ -117,7 +117,7 @@ def test_spark_tuning(notebooks, output_notebook, kernel_name):
             NUMBER_CORES="*",
             NUMBER_ITERATIONS=3,
             SUBSET_RATIO=0.5,
-            RANK=[5, 5],
+            RANK=[5, 10],
             REG=[0.1, 0.01],
         ),
     )