diff --git a/utils/recommend.py b/utils/recommend.py new file mode 100755 index 000000000..812f09892 --- /dev/null +++ b/utils/recommend.py @@ -0,0 +1,70 @@ +#!/usr/bin/env python3 +""" +Train and recommend with a model for basic timing info. + +Usage: + test-algo.py [options] [-d DATA] MODEL USER... + test-algo.py [options] [-d DATA] MODEL --random-users=N + +Options: + -v, --verbose + Enable verbose logging. + -d DATA, --dataset=DATA + Train with DATA [default: ml-latest-small]. + -o FILE, --output=FILE + Write recommendations to FILE. + -r N, --random-users=N + Recommend for N random users. + -N N, --num-recs=N + Generate N recommendations per user [default: 10]. +""" + +import logging +import pickle +import sys + +import seedbank +from docopt import docopt + +from lenskit import batch +from lenskit.algorithms import Recommender +from lenskit.algorithms.item_knn import ItemItem +from lenskit.datasets import MovieLens + +_log = logging.getLogger("test-algo") + + +def main(args): + level = logging.DEBUG if args["--verbose"] else logging.INFO + logging.basicConfig(stream=sys.stderr, level=level) + + data = args["--dataset"] + _log.info("loading data %s", data) + ml = MovieLens(f"data/{data}") + + _log.info("reading model from %s", args["MODEL"]) + with open(args["MODEL"], "rb") as f: + algo = pickle.load(f) + + rng = seedbank.numpy_rng() + + if args["--random-users"]: + n = int(args["--random-users"]) + _log.info("selecting %d random users", n) + users = rng.choice(ml.ratings["user"].unique(), n) + else: + _log.info("using %d specified users", len(args["USER"])) + users = [int(u) for u in args["USER"]] + + recs = batch.recommend(algo, users, int(args["--num-recs"]), n_jobs=1) + _log.info("recommendation complete") + + outf = args["--output"] + if outf: + _log.info("saving %d recs to %s", len(recs), outf) + recs.to_csv(outf, index=False) + + +if __name__ == "__main__": + args = docopt(__doc__) + main(args) diff --git a/utils/train-model.py b/utils/train-model.py new file mode 100755 index 000000000..b7dad9e6c --- /dev/null +++ b/utils/train-model.py @@ -0,0 +1,53 @@ +#!/usr/bin/env python3 +""" +Train a recommendation model and save it to disk. + +Usage: + test-algo.py [options] [-d DATA] --item-item FILE + +Options: + -v, --verbose + Enable verbose logging. + -d DATA, --dataset=DATA + Train with DATA [default: ml-latest-small]. +""" + +import logging +import pickle +import sys + +from docopt import docopt + +from lenskit.algorithms import Recommender +from lenskit.algorithms.item_knn import ItemItem +from lenskit.datasets import MovieLens + +_log = logging.getLogger("train-model") + + +def main(args): + logging.basicConfig(stream=sys.stderr, level=logging.INFO) + data = args["--dataset"] + _log.info("loading data %s", data) + ml = MovieLens(f"data/{data}") + + if args["--item-item"]: + algo = ItemItem(20) + else: + _log.error("no algorithm specified") + sys.exit(2) + + algo = Recommender.adapt(algo) + _log.info("training algorithm") + algo.fit(ml.ratings) + _log.info("training complete") + + file = args["FILE"] + _log.info("saving to %s", file) + with open(file, "wb") as f: + pickle.dump(algo, f, 5) + + +if __name__ == "__main__": + args = docopt(__doc__) + main(args)