Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Replace SARSingleNode with SAR #1768

Merged
merged 1 commit into from
Jul 1, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions examples/00_quick_start/sar_movielens_with_azureml.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -341,7 +341,7 @@
"from recommenders.datasets import movielens\n",
"from recommenders.datasets.python_splitters import python_stratified_split\n",
"from recommenders.evaluation.python_evaluation import map_at_k, ndcg_at_k, precision_at_k, recall_at_k\n",
"from recommenders.models.sar.sar_singlenode import SARSingleNode\n",
"from recommenders.models.sar import SAR\n",
"\n",
"\n",
"logging.basicConfig(level=logging.DEBUG, \n",
Expand Down Expand Up @@ -384,7 +384,7 @@
"train, test = python_stratified_split(data, ratio=0.75, col_user=header[\"col_user\"], col_item=header[\"col_item\"], seed=42)\n",
"\n",
"# instantiate the model\n",
"model = SARSingleNode(\n",
"model = SAR(\n",
" similarity_type=\"jaccard\", \n",
" time_decay_coefficient=30, \n",
" time_now=None, \n",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@
"from recommenders.datasets import movielens\n",
"from recommenders.datasets.python_splitters import python_stratified_split\n",
"from recommenders.evaluation.python_evaluation import map_at_k, ndcg_at_k, precision_at_k, recall_at_k\n",
"from recommenders.models.sar.sar_singlenode import SARSingleNode\n",
"from recommenders.models.sar import SAR\n",
"\n",
"print(\"System version: {}\".format(sys.version))\n",
"print(\"Pandas version: {}\".format(pd.__version__))"
Expand Down Expand Up @@ -339,7 +339,7 @@
"logging.basicConfig(level=logging.DEBUG, \n",
" format='%(asctime)s %(levelname)-8s %(message)s')\n",
"\n",
"model = SARSingleNode(\n",
"model = SAR(\n",
" similarity_type=\"jaccard\", \n",
" time_decay_coefficient=30, \n",
" time_now=None, \n",
Expand Down
4 changes: 2 additions & 2 deletions examples/06_benchmarks/benchmark_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
)
from recommenders.utils.timer import Timer
from recommenders.utils.spark_utils import start_or_get_spark
from recommenders.models.sar.sar_singlenode import SARSingleNode
from recommenders.models.sar import SAR
from recommenders.models.ncf.ncf_singlenode import NCF
from recommenders.models.ncf.dataset import Dataset as NCFDataset
from recommenders.models.surprise.surprise_utils import (
Expand Down Expand Up @@ -304,7 +304,7 @@ def prepare_training_sar(train, test):


def train_sar(params, data):
model = SARSingleNode(**params)
model = SAR(**params)
model.set_index(data)
with Timer() as t:
model.fit(data)
Expand Down
34 changes: 17 additions & 17 deletions tests/unit/recommenders/models/test_sar_singlenode.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
import urllib

from recommenders.utils.constants import DEFAULT_PREDICTION_COL
from recommenders.models.sar.sar_singlenode import SARSingleNode
from recommenders.models.sar import SAR


def _csv_reader_url(url, delimiter=",", encoding="utf-8"):
Expand All @@ -32,7 +32,7 @@ def load_userpred(file, k=10):


def test_init(header):
model = SARSingleNode(similarity_type="jaccard", **header)
model = SAR(similarity_type="jaccard", **header)

assert model.col_user == "UserId"
assert model.col_item == "MovieId"
Expand All @@ -50,7 +50,7 @@ def test_init(header):
"similarity_type, timedecay_formula", [("jaccard", False), ("lift", True)]
)
def test_fit(similarity_type, timedecay_formula, train_test_dummy_timestamp, header):
model = SARSingleNode(
model = SAR(
similarity_type=similarity_type, timedecay_formula=timedecay_formula, **header
)
trainset, testset = train_test_dummy_timestamp
Expand All @@ -63,7 +63,7 @@ def test_fit(similarity_type, timedecay_formula, train_test_dummy_timestamp, hea
def test_predict(
similarity_type, timedecay_formula, train_test_dummy_timestamp, header
):
model = SARSingleNode(
model = SAR(
similarity_type=similarity_type, timedecay_formula=timedecay_formula, **header
)
trainset, testset = train_test_dummy_timestamp
Expand All @@ -78,7 +78,7 @@ def test_predict(


def test_predict_all_items(train_test_dummy_timestamp, header):
model = SARSingleNode(**header)
model = SAR(**header)
trainset, _ = train_test_dummy_timestamp
model.fit(trainset)

Expand Down Expand Up @@ -110,7 +110,7 @@ def test_sar_item_similarity(
threshold, similarity_type, file, demo_usage_data, sar_settings, header
):

model = SARSingleNode(
model = SAR(
similarity_type=similarity_type,
timedecay_formula=False,
time_decay_coefficient=30,
Expand Down Expand Up @@ -160,7 +160,7 @@ def test_sar_item_similarity(

def test_user_affinity(demo_usage_data, sar_settings, header):
time_now = demo_usage_data[header["col_timestamp"]].max()
model = SARSingleNode(
model = SAR(
similarity_type="cooccurrence",
timedecay_formula=True,
time_decay_coefficient=30,
Expand All @@ -185,7 +185,7 @@ def test_user_affinity(demo_usage_data, sar_settings, header):

# Set time_now to 60 days later
two_months = 2 * 30 * (24 * 60 * 60)
model = SARSingleNode(
model = SAR(
similarity_type="cooccurrence",
timedecay_formula=True,
time_decay_coefficient=30,
Expand Down Expand Up @@ -217,7 +217,7 @@ def test_recommend_k_items(
threshold, similarity_type, file, header, sar_settings, demo_usage_data
):
time_now = demo_usage_data[header["col_timestamp"]].max()
model = SARSingleNode(
model = SAR(
similarity_type=similarity_type,
timedecay_formula=True,
time_decay_coefficient=30,
Expand Down Expand Up @@ -250,7 +250,7 @@ def test_recommend_k_items(

def test_get_item_based_topk(header, pandas_dummy):

sar = SARSingleNode(**header)
sar = SAR(**header)
sar.fit(pandas_dummy)

# test with just items provided
Expand Down Expand Up @@ -307,7 +307,7 @@ def test_get_popularity_based_topk(header):
}
)

sar = SARSingleNode(**header)
sar = SAR(**header)
sar.fit(train_df)

expected = pd.DataFrame(dict(MovieId=[4, 1, 2], prediction=[4, 3, 2]))
Expand Down Expand Up @@ -337,7 +337,7 @@ def test_get_normalized_scores(header):
}
)

model = SARSingleNode(**header, timedecay_formula=True, normalize=True)
model = SAR(**header, timedecay_formula=True, normalize=True)
model.fit(train)
actual = model.score(test, remove_seen=True)
expected = np.array(
Expand Down Expand Up @@ -387,7 +387,7 @@ def test_match_similarity_type_from_json_file(header):

params.update(header)

model = SARSingleNode(**params)
model = SAR(**params)

train = pd.DataFrame(
{
Expand All @@ -403,7 +403,7 @@ def test_match_similarity_type_from_json_file(header):


def test_dataset_with_duplicates(header):
model = SARSingleNode(**header)
model = SAR(**header)
train = pd.DataFrame(
{
header["col_user"]: [1, 1, 2, 2, 2],
Expand All @@ -416,7 +416,7 @@ def test_dataset_with_duplicates(header):


def test_get_topk_most_similar_users(header):
model = SARSingleNode(**header)
model = SAR(**header)
# 1, 2, and 4 used the same items, but 1 and 2 have the same ratings also
train = pd.DataFrame(
{
Expand All @@ -441,7 +441,7 @@ def test_get_topk_most_similar_users(header):


def test_item_frequencies(header):
model = SARSingleNode(**header)
model = SAR(**header)
train = pd.DataFrame(
{
header["col_user"]: [1, 1, 2, 2, 3, 3, 3, 3, 4, 4],
Expand All @@ -454,7 +454,7 @@ def test_item_frequencies(header):


def test_user_frequencies(header):
model = SARSingleNode(**header)
model = SAR(**header)
train = pd.DataFrame(
{
header["col_user"]: [1, 1, 2, 2, 3, 3, 3, 3, 4, 4],
Expand Down