Skip to content

Commit

Permalink
Replace SARSingleNode with SAR (#1768)
Browse files Browse the repository at this point in the history
  • Loading branch information
simonzhaoms authored Jul 1, 2022
1 parent 7381e40 commit 32107f1
Show file tree
Hide file tree
Showing 4 changed files with 23 additions and 23 deletions.
4 changes: 2 additions & 2 deletions examples/00_quick_start/sar_movielens_with_azureml.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -341,7 +341,7 @@
"from recommenders.datasets import movielens\n",
"from recommenders.datasets.python_splitters import python_stratified_split\n",
"from recommenders.evaluation.python_evaluation import map_at_k, ndcg_at_k, precision_at_k, recall_at_k\n",
"from recommenders.models.sar.sar_singlenode import SARSingleNode\n",
"from recommenders.models.sar import SAR\n",
"\n",
"\n",
"logging.basicConfig(level=logging.DEBUG, \n",
Expand Down Expand Up @@ -384,7 +384,7 @@
"train, test = python_stratified_split(data, ratio=0.75, col_user=header[\"col_user\"], col_item=header[\"col_item\"], seed=42)\n",
"\n",
"# instantiate the model\n",
"model = SARSingleNode(\n",
"model = SAR(\n",
" similarity_type=\"jaccard\", \n",
" time_decay_coefficient=30, \n",
" time_now=None, \n",
Expand Down
4 changes: 2 additions & 2 deletions examples/02_model_collaborative_filtering/sar_deep_dive.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@
"from recommenders.datasets import movielens\n",
"from recommenders.datasets.python_splitters import python_stratified_split\n",
"from recommenders.evaluation.python_evaluation import map_at_k, ndcg_at_k, precision_at_k, recall_at_k\n",
"from recommenders.models.sar.sar_singlenode import SARSingleNode\n",
"from recommenders.models.sar import SAR\n",
"\n",
"print(\"System version: {}\".format(sys.version))\n",
"print(\"Pandas version: {}\".format(pd.__version__))"
Expand Down Expand Up @@ -339,7 +339,7 @@
"logging.basicConfig(level=logging.DEBUG, \n",
" format='%(asctime)s %(levelname)-8s %(message)s')\n",
"\n",
"model = SARSingleNode(\n",
"model = SAR(\n",
" similarity_type=\"jaccard\", \n",
" time_decay_coefficient=30, \n",
" time_now=None, \n",
Expand Down
4 changes: 2 additions & 2 deletions examples/06_benchmarks/benchmark_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
)
from recommenders.utils.timer import Timer
from recommenders.utils.spark_utils import start_or_get_spark
from recommenders.models.sar.sar_singlenode import SARSingleNode
from recommenders.models.sar import SAR
from recommenders.models.ncf.ncf_singlenode import NCF
from recommenders.models.ncf.dataset import Dataset as NCFDataset
from recommenders.models.surprise.surprise_utils import (
Expand Down Expand Up @@ -304,7 +304,7 @@ def prepare_training_sar(train, test):


def train_sar(params, data):
model = SARSingleNode(**params)
model = SAR(**params)
model.set_index(data)
with Timer() as t:
model.fit(data)
Expand Down
34 changes: 17 additions & 17 deletions tests/unit/recommenders/models/test_sar_singlenode.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
import urllib

from recommenders.utils.constants import DEFAULT_PREDICTION_COL
from recommenders.models.sar.sar_singlenode import SARSingleNode
from recommenders.models.sar import SAR


def _csv_reader_url(url, delimiter=",", encoding="utf-8"):
Expand All @@ -32,7 +32,7 @@ def load_userpred(file, k=10):


def test_init(header):
model = SARSingleNode(similarity_type="jaccard", **header)
model = SAR(similarity_type="jaccard", **header)

assert model.col_user == "UserId"
assert model.col_item == "MovieId"
Expand All @@ -50,7 +50,7 @@ def test_init(header):
"similarity_type, timedecay_formula", [("jaccard", False), ("lift", True)]
)
def test_fit(similarity_type, timedecay_formula, train_test_dummy_timestamp, header):
model = SARSingleNode(
model = SAR(
similarity_type=similarity_type, timedecay_formula=timedecay_formula, **header
)
trainset, testset = train_test_dummy_timestamp
Expand All @@ -63,7 +63,7 @@ def test_fit(similarity_type, timedecay_formula, train_test_dummy_timestamp, hea
def test_predict(
similarity_type, timedecay_formula, train_test_dummy_timestamp, header
):
model = SARSingleNode(
model = SAR(
similarity_type=similarity_type, timedecay_formula=timedecay_formula, **header
)
trainset, testset = train_test_dummy_timestamp
Expand All @@ -78,7 +78,7 @@ def test_predict(


def test_predict_all_items(train_test_dummy_timestamp, header):
model = SARSingleNode(**header)
model = SAR(**header)
trainset, _ = train_test_dummy_timestamp
model.fit(trainset)

Expand Down Expand Up @@ -110,7 +110,7 @@ def test_sar_item_similarity(
threshold, similarity_type, file, demo_usage_data, sar_settings, header
):

model = SARSingleNode(
model = SAR(
similarity_type=similarity_type,
timedecay_formula=False,
time_decay_coefficient=30,
Expand Down Expand Up @@ -160,7 +160,7 @@ def test_sar_item_similarity(

def test_user_affinity(demo_usage_data, sar_settings, header):
time_now = demo_usage_data[header["col_timestamp"]].max()
model = SARSingleNode(
model = SAR(
similarity_type="cooccurrence",
timedecay_formula=True,
time_decay_coefficient=30,
Expand All @@ -185,7 +185,7 @@ def test_user_affinity(demo_usage_data, sar_settings, header):

# Set time_now to 60 days later
two_months = 2 * 30 * (24 * 60 * 60)
model = SARSingleNode(
model = SAR(
similarity_type="cooccurrence",
timedecay_formula=True,
time_decay_coefficient=30,
Expand Down Expand Up @@ -217,7 +217,7 @@ def test_recommend_k_items(
threshold, similarity_type, file, header, sar_settings, demo_usage_data
):
time_now = demo_usage_data[header["col_timestamp"]].max()
model = SARSingleNode(
model = SAR(
similarity_type=similarity_type,
timedecay_formula=True,
time_decay_coefficient=30,
Expand Down Expand Up @@ -250,7 +250,7 @@ def test_recommend_k_items(

def test_get_item_based_topk(header, pandas_dummy):

sar = SARSingleNode(**header)
sar = SAR(**header)
sar.fit(pandas_dummy)

# test with just items provided
Expand Down Expand Up @@ -307,7 +307,7 @@ def test_get_popularity_based_topk(header):
}
)

sar = SARSingleNode(**header)
sar = SAR(**header)
sar.fit(train_df)

expected = pd.DataFrame(dict(MovieId=[4, 1, 2], prediction=[4, 3, 2]))
Expand Down Expand Up @@ -337,7 +337,7 @@ def test_get_normalized_scores(header):
}
)

model = SARSingleNode(**header, timedecay_formula=True, normalize=True)
model = SAR(**header, timedecay_formula=True, normalize=True)
model.fit(train)
actual = model.score(test, remove_seen=True)
expected = np.array(
Expand Down Expand Up @@ -387,7 +387,7 @@ def test_match_similarity_type_from_json_file(header):

params.update(header)

model = SARSingleNode(**params)
model = SAR(**params)

train = pd.DataFrame(
{
Expand All @@ -403,7 +403,7 @@ def test_match_similarity_type_from_json_file(header):


def test_dataset_with_duplicates(header):
model = SARSingleNode(**header)
model = SAR(**header)
train = pd.DataFrame(
{
header["col_user"]: [1, 1, 2, 2, 2],
Expand All @@ -416,7 +416,7 @@ def test_dataset_with_duplicates(header):


def test_get_topk_most_similar_users(header):
model = SARSingleNode(**header)
model = SAR(**header)
# 1, 2, and 4 used the same items, but 1 and 2 have the same ratings also
train = pd.DataFrame(
{
Expand All @@ -441,7 +441,7 @@ def test_get_topk_most_similar_users(header):


def test_item_frequencies(header):
model = SARSingleNode(**header)
model = SAR(**header)
train = pd.DataFrame(
{
header["col_user"]: [1, 1, 2, 2, 3, 3, 3, 3, 4, 4],
Expand All @@ -454,7 +454,7 @@ def test_item_frequencies(header):


def test_user_frequencies(header):
model = SARSingleNode(**header)
model = SAR(**header)
train = pd.DataFrame(
{
header["col_user"]: [1, 1, 2, 2, 3, 3, 3, 3, 4, 4],
Expand Down

0 comments on commit 32107f1

Please sign in to comment.