Skip to content

Commit

Permalink
add recommend & train utils
Browse files Browse the repository at this point in the history
  • Loading branch information
mdekstrand committed May 6, 2024
1 parent 8ef29be commit cb0d7d2
Show file tree
Hide file tree
Showing 2 changed files with 123 additions and 0 deletions.
70 changes: 70 additions & 0 deletions utils/recommend.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
#!/usr/bin/env python3
"""
Train and recommend with a model for basic timing info.
Usage:
test-algo.py [options] [-d DATA] MODEL USER...
test-algo.py [options] [-d DATA] MODEL --random-users=N
Options:
-v, --verbose
Enable verbose logging.
-d DATA, --dataset=DATA
Train with DATA [default: ml-latest-small].
-o FILE, --output=FILE
Write recommendations to FILE.
-r N, --random-users=N
Recommend for N random users.
-N N, --num-recs=N
Generate N recommendations per user [default: 10].
"""

import logging
import pickle
import sys

import seedbank
from docopt import docopt

from lenskit import batch
from lenskit.algorithms import Recommender
from lenskit.algorithms.item_knn import ItemItem
from lenskit.datasets import MovieLens

_log = logging.getLogger("test-algo")


def main(args):
level = logging.DEBUG if args["--verbose"] else logging.INFO
logging.basicConfig(stream=sys.stderr, level=level)

data = args["--dataset"]
_log.info("loading data %s", data)
ml = MovieLens(f"data/{data}")

_log.info("reading model from %s", args["MODEL"])
with open(args["MODEL"], "rb") as f:
algo = pickle.load(f)

rng = seedbank.numpy_rng()

if args["--random-users"]:
n = int(args["--random-users"])
_log.info("selecting %d random users", n)
users = rng.choice(ml.ratings["user"].unique(), n)
else:
_log.info("using %d specified users", len(args["USER"]))
users = [int(u) for u in args["USER"]]

recs = batch.recommend(algo, users, int(args["--num-recs"]), n_jobs=1)
_log.info("recommendation complete")

outf = args["--output"]
if outf:
_log.info("saving %d recs to %s", len(recs), outf)
recs.to_csv(outf, index=False)


if __name__ == "__main__":
args = docopt(__doc__)
main(args)
53 changes: 53 additions & 0 deletions utils/train-model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
#!/usr/bin/env python3
"""
Train a recommendation model and save it to disk.
Usage:
test-algo.py [options] [-d DATA] --item-item FILE
Options:
-v, --verbose
Enable verbose logging.
-d DATA, --dataset=DATA
Train with DATA [default: ml-latest-small].
"""

import logging
import pickle
import sys

from docopt import docopt

from lenskit.algorithms import Recommender
from lenskit.algorithms.item_knn import ItemItem
from lenskit.datasets import MovieLens

_log = logging.getLogger("train-model")


def main(args):
logging.basicConfig(stream=sys.stderr, level=logging.INFO)
data = args["--dataset"]
_log.info("loading data %s", data)
ml = MovieLens(f"data/{data}")

if args["--item-item"]:
algo = ItemItem(20)
else:
_log.error("no algorithm specified")
sys.exit(2)

algo = Recommender.adapt(algo)
_log.info("training algorithm")
algo.fit(ml.ratings)
_log.info("training complete")

file = args["FILE"]
_log.info("saving to %s", file)
with open(file, "wb") as f:
pickle.dump(algo, f, 5)


if __name__ == "__main__":
args = docopt(__doc__)
main(args)

0 comments on commit cb0d7d2

Please sign in to comment.