From 0e9fa57bf6eff16edab0fd05018c6932a1094d70 Mon Sep 17 00:00:00 2001
From: Michael Ekstrand <md@ekstrandom.net>
Date: Sat, 4 May 2024 08:53:17 -0600
Subject: [PATCH] update bib

---
 docs/lenskit.bib | 102 +++++++++++++++++++++++------------------------
 1 file changed, 51 insertions(+), 51 deletions(-)

diff --git a/docs/lenskit.bib b/docs/lenskit.bib
index 8f3c5a215..39435da54 100644
--- a/docs/lenskit.bib
+++ b/docs/lenskit.bib
@@ -50,18 +50,18 @@ @inproceedings{caoMakingSystemsForget2015
   author = {Cao, Yinzhi and Yang, Junfeng},
   year = {2015},
   month = may,
-  publisher = {{IEEE}},
+  publisher = {IEEE},
   abstract = {Today's systems produce a wealth of data every day, and the data further generates more data, i.e., the derived data, forming into a complex data propagation network, defined as the data's lineage. There are many reasons for users and administrators to forget certain data including the data's lineage. From the privacy perspective, a system may leak private information of certain users, and those users unhappy about privacy leaks naturally want to forget their data and its lineage. From the security perspective, an anomaly detection system can be polluted by adversaries through injecting manually crafted data into the training set. Therefore, we envision forgetting systems, capable of completely forgetting certain data and its lineage. In this paper, we focus on making learning systems forget, the process of which is defined as machine unlearning or unlearning. To perform unlearning upon learning system, we present general unlearning criteria, i.e., converting a learning system or part of it into a summation form of statistical query learning model, and updating all the summations to achieve unlearning. Then, we integrate our unlearning criteria into an unlearning architecture that interacts with all the components of a learning system, such as sample clustering and feature selection. To demonstrate our unlearning criteria and architecture, we select four real-world learning systems, including an item-item recommendation system, an online social network spam filter, and a malware detection system. These systems are first exposed to an adversarial environment, e.g., if the system is potentially vulnerable to training data pollution, we first pollute the training data set and show that the detection rate drops significantly. Then, we apply our unlearning technique upon those affected systems, either polluted or leaking private information. Our results show that after unlearning, the detection rate of a polluted system increases back to the one before pollution, and a system leaking a particular user's private information completely forgets that information.},
   keywords = {Research Using LensKit,Zotero Import (Mar 30),Zotero Import (Mar 30)/Group Libraries/LensKit}
 }
 
 @inproceedings{carvalhoFAiRFrameworkAnalyses2018,
   title = {{{FAiR}}: {{A Framework}} for {{Analyses}} and {{Evaluations}} on {{Recommender Systems}}},
-  booktitle = {Computational {{Science}} and {{Its Applications}} {\textendash} {{ICCSA}} 2018},
+  booktitle = {Computational {{Science}} and {{Its Applications}} -- {{ICCSA}} 2018},
   author = {Carvalho, Diego and Silva, N{\'i}collas and Silveira, Thiago and Mour{\~a}o, Fernando and Pereira, Adriano and Dias, Diego and Rocha, Leonardo},
   year = {2018},
   pages = {383--397},
-  publisher = {{Springer International Publishing}},
+  publisher = {Springer International Publishing},
   doi = {10.1007/978-3-319-95168-3_26},
   abstract = {Recommender systems (RSs) have become essential tools in e-commerce applications, helping users in the decision-making process. Evaluation on these tools is, however, a major divergence point nowadays, since there is no consensus regarding which metrics are necessary to consolidate new RSs. For this reason, distinct frameworks have been developed to ease the deployment of RSs in research and/or production environments. In the present work, we perform an extensive study of the most popular evaluation metrics, organizing them into three groups: Effectiveness-based, Complementary Dimensions of Quality and Domain Profiling. Further, we consolidate a framework named FAiR to help researchers in evaluating their RSs using these metrics, besides identifying the characteristics of data collections that may intrinsically affect RSs performance. FAiR is compatible with the output format of the main existing RSs libraries (i.e., MyMediaLite and LensKit).},
   keywords = {Research Using LensKit}
@@ -76,11 +76,11 @@ @article{dacremaTroublingAnalysisReproducibility2021
   volume = {39},
   number = {2},
   pages = {1--49},
-  publisher = {{Association for Computing Machinery}},
-  address = {{New York, NY, USA}},
+  publisher = {Association for Computing Machinery},
+  address = {New York, NY, USA},
   issn = {1094-9224},
   doi = {10.1145/3434185},
-  abstract = {The design of algorithms that generate personalized ranked item lists is a central topic of research in the field of recommender systems. In the past few years, in particular, approaches based on deep learning (neural) techniques have become dominant in the literature. For all of them, substantial progress over the state-of-the-art is claimed. However, indications exist of certain problems in today's research practice, e.g., with respect to the choice and optimization of the baselines used for comparison, raising questions about the published claims. To obtain a better understanding of the actual progress, we have compared recent results in the area of neural recommendation approaches based on collaborative filtering against a consistent set of existing simple baselines. The worrying outcome of the analysis of these recent works{\textemdash}all were published at prestigious scientific conferences between 2015 and 2018{\textemdash}is that 11 of the 12 reproducible neural approaches can be outperformed by conceptually simple methods, e.g., based on the nearest-neighbor heuristic or linear models. None of the computationally complex neural methods was actually consistently better than already existing learning-based techniques, e.g., using matrix factorization or linear models. In our analysis, we discuss common issues in today's research practice, which, despite the many papers that are published on the topic, have apparently led the field to a certain level of stagnation.1},
+  abstract = {The design of algorithms that generate personalized ranked item lists is a central topic of research in the field of recommender systems. In the past few years, in particular, approaches based on deep learning (neural) techniques have become dominant in the literature. For all of them, substantial progress over the state-of-the-art is claimed. However, indications exist of certain problems in today's research practice, e.g., with respect to the choice and optimization of the baselines used for comparison, raising questions about the published claims. To obtain a better understanding of the actual progress, we have compared recent results in the area of neural recommendation approaches based on collaborative filtering against a consistent set of existing simple baselines. The worrying outcome of the analysis of these recent works---all were published at prestigious scientific conferences between 2015 and 2018---is that 11 of the 12 reproducible neural approaches can be outperformed by conceptually simple methods, e.g., based on the nearest-neighbor heuristic or linear models. None of the computationally complex neural methods was actually consistently better than already existing learning-based techniques, e.g., using matrix factorization or linear models. In our analysis, we discuss common issues in today's research practice, which, despite the many papers that are published on the topic, have apparently led the field to a certain level of stagnation.1},
   keywords = {evaluation,LensKit References,reproducibility Recommender systems deep learning}
 }
 
@@ -132,7 +132,7 @@ @inproceedings{ekstrandExploringAuthorGender2018
   booktitle = {Proceedings of the {{Twelfth ACM Conference}} on {{Recommender Systems}}},
   author = {Ekstrand, Michael D and Tian, Mucun and Imran Kazi, Mohammed R and Mehrpouyan, Hoda and Kluver, Daniel},
   year = {2018},
-  publisher = {{ACM}},
+  publisher = {ACM},
   keywords = {My Papers,Research Using LensKit}
 }
 
@@ -150,7 +150,7 @@ @techreport{ekstrandRecommenderSystemsNotation2019
   author = {Ekstrand, Michael D and Konstan, Joseph A},
   year = {2019},
   number = {177},
-  institution = {{Boise State University}},
+  institution = {Boise State University},
   doi = {10.18122/cs_facpubs/177/boisestate},
   urldate = {2020-05-11},
   abstract = {As the field of recommender systems has developed, authors have used a myriad of notations for describing the mathematical workings of recommendation algorithms. These notations appear in research papers, books, lecture notes, blog posts, and software documentation. The disciplinary diversity of the field has not contributed to consistency in notation; scholars whose home base is in information retrieval have different habits and expectations than those in machine learning or human-computer interaction. In the course of years of teaching and research on recommender systems, we have seen the value in adopting a consistent notation across our work. This has been particularly highlighted in our development of the Recommender Systems MOOC on Coursera (Konstan et al. 2015), as we need to explain a wide variety of algorithms and our learners are not well-served by changing notation between algorithms. In this paper, we describe the notation we have adopted in our work, along with its justification and some discussion of considered alternatives. We present this in hope that it will be useful to others writing and teaching about recommender systems. This notation has served us well for some time now, in research, online education, and traditional classroom instruction. We feel it is ready for broad use.},
@@ -164,7 +164,7 @@ @inproceedings{ekstrandRethinkingRecommenderResearch2011
   author = {Ekstrand, Michael and Ludwig, Michael and Konstan, Joseph A. and Riedl, John},
   year = {2011},
   pages = {133--140},
-  publisher = {{ACM}},
+  publisher = {ACM},
   doi = {10.1145/2043932.2043958},
   urldate = {2012-04-07},
   abstract = {Recommender systems research is being slowed by the difficulty of replicating and comparing research results. Published research uses various experimental methodologies and metrics that are difficult to compare. It also often fails to sufficiently document the details of proposed algorithms or the evaluations employed. Researchers waste time reimplementing well-known algorithms, and the new implementations may miss key details from the original algorithm or its subsequent refinements. When proposing new algorithms, researchers should compare them against finely-tuned implementations of the leading prior algorithms using state-of-the-art evaluation methodologies. With few exceptions, published algorithmic improvements in our field should be accompanied by working code in a standard framework, including test harnesses to reproduce the described results. To that end, we present the design and freely distributable source code of LensKit, a flexible platform for reproducible recommender systems research. LensKit provides carefully tuned implementations of the leading collaborative filtering algorithms, APIs for common recommender system use cases, and an evaluation framework for performing reproducible offline evaluations of algorithms. We demonstrate the utility of LensKit by replicating and extending a set of prior comparative studies of recommender algorithms --- showing limitations in some of the original results --- and by investigating a question recently raised by a leader in the recommender systems community on problems with error-based prediction evaluation.},
@@ -178,7 +178,7 @@ @inproceedings{ekstrandSturgeonCoolKids2017
   author = {Ekstrand, Michael D and Mahant, Vaibhav},
   year = {2017},
   month = may,
-  publisher = {{AAAI Press}},
+  publisher = {AAAI Press},
   abstract = {Top-N evaluation of recommender systems, typically carried out using metrics from information retrieval or machine learning, has several challenges. Two of these challenges are popularity bias, where the evaluation intrinsically favors algorithms that recommend popular items, and misclassified decoys, where items for which no user relevance is known are actually relevant to the user, but the evaluation is unaware and penalizes the recommender for suggesting them. One strategy for mitigating the misclassified decoy problem is the one-plus-random evaluation strategy and its generalization, which we call random decoys. In this work, we explore the random decoy strategy through both a theoretical treatment and an empirical study, but find little evidence to guide its tuning and show that it has complex and deleterious interactions with popularity bias.},
   keywords = {CAREER,Dagstuhl Perspectives IR Eval for RecSys,My Papers,Research Using LensKit,Zotero Import (Mar 30),Zotero Import (Mar 30)/My Library,Zotero Import (Mar 30)/My Library/My Papers}
 }
@@ -190,7 +190,7 @@ @misc{ekstrandTestingRecommenders2016
   month = feb,
   journal = {A Practical Guide to Building Recommender Systems},
   urldate = {2017-01-06},
-  abstract = {Why Test? When I met fellow GroupLens alum Sean McNee, he had a bit of advice for me: Write tests for your code. It took me some time to grasp the wisdom of this {\textemdash} after all, isn't it just re{\dots}}
+  abstract = {Why Test? When I met fellow GroupLens alum Sean McNee, he had a bit of advice for me: Write tests for your code. It took me some time to grasp the wisdom of this --- after all, isn't it just re{\dots}}
 }
 
 @inproceedings{ekstrandWhenRecommendersFail2012,
@@ -201,8 +201,8 @@ @inproceedings{ekstrandWhenRecommendersFail2012
   year = {2012},
   series = {{{RecSys}} '12},
   pages = {233--236},
-  publisher = {{ACM}},
-  address = {{New York, NY, USA}},
+  publisher = {ACM},
+  address = {New York, NY, USA},
   doi = {10.1145/2365952.2366002},
   urldate = {2012-12-13},
   abstract = {Hybrid recommender systems --- systems using multiple algorithms together to improve recommendation quality --- have been well-known for many years and have shown good performance in recent demonstrations such as the NetFlix Prize. Modern hybridization techniques, such as feature-weighted linear stacking, take advantage of the hypothesis that the relative performance of recommenders varies by circumstance and attempt to optimize each item score to maximize the strengths of the component recommenders. Less attention, however, has been paid to understanding what these strengths and failure modes are. Understanding what causes particular recommenders to fail will facilitate better selection of the component recommenders for future hybrid systems and a better understanding of how individual recommender personalities can be harnessed to improve the recommender user experience. We present an analysis of the predictions made by several well-known recommender algorithms on the MovieLens 10M data set, showing that for many cases in which one algorithm fails, there is another that will correctly predict the rating.},
@@ -243,8 +243,8 @@ @inproceedings{gantnerMyMediaLiteFreeRecommender2011
   year = {2011},
   series = {{{RecSys}} '11},
   pages = {305--308},
-  publisher = {{ACM}},
-  address = {{New York, NY, USA}},
+  publisher = {ACM},
+  address = {New York, NY, USA},
   doi = {10.1145/2043932.2043989},
   abstract = {MyMediaLite is a fast and scalable, multi-purpose library of recommender system algorithms, aimed both at recommender system researchers and practitioners. It addresses two common scenarios in collaborative filtering: rating prediction (e.g. on a scale of 1 to 5 stars) and item prediction from positive-only implicit feedback (e.g. from clicks or purchase actions). The library offers state-of-the-art algorithms for those two tasks. Programs that expose most of the library's functionality, plus a GUI demo, are included in the package. Efficient data structures and a common API are used by the implemented algorithms, and may be used to implement further algorithms. The API also contains methods for real-time updates and loading/storing of already trained recommender models. MyMediaLite is free/open source software, distributed under the terms of the GNU General Public License (GPL). Its methods have been used in four different industrial field trials of the MyMedia project, including one trial involving over 50,000 households.},
   isbn = {978-1-4503-0683-6}
@@ -315,7 +315,7 @@ @inproceedings{huCollaborativeFilteringImplicit2008a
   year = {2008},
   month = dec,
   pages = {263--272},
-  publisher = {{ieeexplore.ieee.org}},
+  publisher = {ieeexplore.ieee.org},
   doi = {10.1109/ICDM.2008.22},
   abstract = {A common task of recommender systems is to improve customer experience through personalized recommendations based on prior implicit feedback. These systems passively track different sorts of user behavior, such as purchase history, watching habits and browsing activity, in order to model user preferences. Unlike the much more extensively researched explicit feedback, we do not have any direct input from the users regarding their preferences. In particular, we lack substantial evidence on which products consumer dislike. In this work we identify unique properties of implicit feedback datasets. We propose treating the data as indication of positive and negative preference associated with vastly varying confidence levels. This leads to a factor model which is especially tailored for implicit feedback recommenders. We also suggest a scalable optimization procedure, which scales linearly with the data size. The algorithm is used successfully within a recommender system for television shows. It compares favorably with well tuned implementations of other known methods. In addition, we offer a novel way to give explanations to recommendations given by this factor model.},
   keywords = {browsing activity,collaborative filtering,Collaborative filtering,customer experience,Data mining,Demography,electronic commerce,feedback,Filtering,History,implicit feedback,implicit feedback datasets,International collaboration,LensKit References,Motion pictures,Negative feedback,personalized recommendations,purchase history,recommender system,recommender systems,Recommender systems,scalable optimization procedure,TV,user preferences,Watches,watching habits}
@@ -340,7 +340,7 @@ @book{kluverBookLens2014
   title = {{{BookLens}}},
   author = {Kluver, Daniel and Ludwig, Michael and Davies, Richard T. and Konstan, Joseph A. and Riedl, John T.},
   year = {2014},
-  publisher = {{GroupLens Research, University of Minnesota}}
+  publisher = {GroupLens Research, University of Minnesota}
 }
 
 @inproceedings{kluverEvaluatingRecommenderBehavior2014,
@@ -349,7 +349,7 @@ @inproceedings{kluverEvaluatingRecommenderBehavior2014
   author = {Kluver, Daniel and Konstan, Joseph A.},
   year = {2014},
   month = oct,
-  publisher = {{ACM}},
+  publisher = {ACM},
   doi = {10.1145/2645710.2645742},
   keywords = {CAREER,Research Using LensKit,Zotero Import (Mar 30),Zotero Import (Mar 30)/Group Libraries/LensKit}
 }
@@ -361,8 +361,8 @@ @inproceedings{kluverHowManyBits2012
   year = {2012},
   series = {{{RecSys}} '12},
   pages = {99--106},
-  publisher = {{ACM}},
-  address = {{New York, NY, USA}},
+  publisher = {ACM},
+  address = {New York, NY, USA},
   doi = {10.1145/2365952.2365974},
   urldate = {2013-09-12},
   abstract = {Most recommender systems assume user ratings accurately represent user preferences. However, prior research shows that user ratings are imperfect and noisy. Moreover, this noise limits the measurable predictive power of any recommender system. We propose an information theoretic framework for quantifying the preference information contained in ratings and predictions. We computationally explore the properties of our model and apply our framework to estimate the efficiency of different rating scales for real world datasets. We then estimate how the amount of information predictions give to users is related to the scale ratings are collected on. Our findings suggest a tradeoff in rating scale granularity: while previous research indicates that coarse scales (such as thumbs up / thumbs down) take less time, we find that ratings with these scales provide less predictive value to users. We introduce a new measure, preference bits per second, to quantitatively reconcile this tradeoff.},
@@ -407,8 +407,8 @@ @inproceedings{lamNumbaLLVMbasedPython2015
   year = {2015},
   series = {{{LLVM}} '15},
   pages = {7:1--7:6},
-  publisher = {{ACM}},
-  address = {{New York, NY, USA}},
+  publisher = {ACM},
+  address = {New York, NY, USA},
   doi = {10.1145/2833157.2833162},
   urldate = {2019-05-30},
   abstract = {Dynamic, interpreted languages, like Python, are attractive for domain-experts and scientists experimenting with new ideas. However, the performance of the interpreter is often a barrier when scaling to larger data sets. This paper presents a just-in-time compiler for Python that focuses in scientific and array-oriented computing. Starting with the simple syntax of Python, Numba compiles a subset of the language into efficient machine code that is comparable in performance to a traditional compiled language. In addition, we share our experience in building a JIT compiler using LLVM[1].},
@@ -430,7 +430,7 @@ @inproceedings{mckinneyDataStructuresStatistical2010
   year = {2010},
   volume = {445},
   pages = {51--56},
-  publisher = {{Austin, TX}},
+  publisher = {Austin, TX},
   keywords = {LensKit References,Software}
 }
 
@@ -438,7 +438,7 @@ @book{mckinneyPythonDataAnalysis2018
   title = {Python for {{Data Analysis}}: {{Data Wrangling}} with Pandas, {{NumPy}}, and {{IPython}}},
   author = {McKinney, Wes},
   year = {2018},
-  publisher = {{O'Reilly}},
+  publisher = {O'Reilly},
   isbn = {978-1-4919-5766-0},
   keywords = {LensKit References,Software}
 }
@@ -452,8 +452,8 @@ @article{ndcg
   volume = {20},
   number = {4},
   pages = {422--446},
-  publisher = {{Association for Computing Machinery}},
-  address = {{New York, NY, USA}},
+  publisher = {Association for Computing Machinery},
+  address = {New York, NY, USA},
   issn = {1094-9224},
   doi = {10.1145/582415.582418},
   keywords = {CAREER,Graded relevance judgments cumulated gain,LensKit References,ndcg}
@@ -465,8 +465,8 @@ @inproceedings{ningSLIMSparseLinear2011
   author = {Ning, Xia and Karypis, George},
   year = {2011},
   pages = {497--506},
-  publisher = {{IEEE Computer Society}},
-  address = {{Washington, DC, USA}},
+  publisher = {IEEE Computer Society},
+  address = {Washington, DC, USA},
   doi = {10.1109/ICDM.2011.134},
   urldate = {2017-01-04},
   abstract = {This paper focuses on developing effective and efficient algorithms for top-N recommender systems. A novel Sparse Linear Method (SLIM) is proposed, which generates top-N recommendations by aggregating from user purchase/rating profiles. A sparse aggregation coefficient matrix W is learned from SLIM by solving an `1-norm and `2-norm regularized optimization problem. W is demonstrated to produce high quality recommendations and its sparsity allows SLIM to generate recommendations very fast. A comprehensive set of experiments is conducted by comparing the SLIM method and other state-of-the-art top-N recommendation methods. The experiments show that SLIM achieves significant improvements both in run time performance and recommendation quality over the best existing methods.},
@@ -477,7 +477,7 @@ @book{oliphantGuideNumPy2006
   title = {A {{Guide}} to {{NumPy}}},
   author = {Oliphant, Travis E},
   year = {2006},
-  publisher = {{Trelgol Publishing}},
+  publisher = {Trelgol Publishing},
   keywords = {LensKit References,Software}
 }
 
@@ -542,8 +542,8 @@ @inproceedings{pilaszyFastALSbasedMatrix2010
   author = {Pil{\'a}szy, Istv{\'a}n and Zibriczky, D{\'a}vid and Tikk, Domonkos},
   year = {2010},
   pages = {71--78},
-  publisher = {{ACM}},
-  address = {{New York, NY, USA}},
+  publisher = {ACM},
+  address = {New York, NY, USA},
   doi = {10.1145/1864708.1864726},
   urldate = {2015-06-01},
   abstract = {Alternating least squares (ALS) is a powerful matrix factorization (MF) algorithm for both explicit and implicit feedback based recommender systems. As shown in many articles, increasing the number of latent factors (denoted by K) boosts the prediction accuracy of MF based recommender systems, including ALS as well. The price of the better accuracy is paid by the increased running time: the running time of the original version of ALS is proportional to K3. Yet, the running time of model building can be important in recommendation systems; if the model cannot keep up with the changing item portfolio and/or user profile, the prediction accuracy can be degraded. In this paper we present novel and fast ALS variants both for the implicit and explicit feedback datasets, which offers better trade-off between running time and accuracy. Due to the significantly lower computational complexity of the algorithm - linear in terms of K - the model being generated under the same amount of time is more accurate, since the faster training enables to build model with more latent factors. We demonstrate the efficiency of our ALS variants on two datasets using two performance measures, RMSE and average relative position (ARP), and show that either a significantly more accurate model can be generated under the same amount of time or a model with similar prediction accuracy can be created faster; for explicit feedback the speed-up factor can be even 5-10.},
@@ -556,8 +556,8 @@ @inproceedings{pilaszyRecommendingNewMovies2009
   author = {Pil{\'a}szy, Istv{\'a}n and Tikk, Domonkos},
   year = {2009},
   pages = {93--100},
-  publisher = {{ACM}},
-  address = {{New York, NY, USA}},
+  publisher = {ACM},
+  address = {New York, NY, USA},
   doi = {10.1145/1639714.1639731},
   urldate = {2016-12-14},
   abstract = {The Netflix Prize (NP) competition gave much attention to collaborative filtering (CF) approaches. Matrix factorization (MF) based CF approaches assign low dimensional feature vectors to users and items. We link CF and content-based filtering (CBF) by finding a linear transformation that transforms user or item descriptions so that they are as close as possible to the feature vectors generated by MF for CF. We propose methods for explicit feedback that are able to handle 140,000 features when feature vectors are very sparse. With movie metadata collected for the NP movies we show that the prediction performance of the methods is comparable to that of CF, and can be used to predict user preferences on new movies. We also investigate the value of movie metadata compared to movie ratings in regards of predictive power. We compare our solely CBF approach with a simple baseline rating-based predictor. We show that even 10 ratings of a new movie are more valuable than its metadata for predicting user ratings.},
@@ -573,7 +573,7 @@ @article{rbp
   volume = {27},
   number = {1},
   pages = {2:1-27},
-  publisher = {{ACM}},
+  publisher = {ACM},
   issn = {1094-9224},
   doi = {10.1145/1416950.1416952},
   abstract = {A range of methods for measuring the effectiveness of information retrieval systems has been proposed. These are typically intended to provide a quantitative single-value summary of a document ranking relative to a query. However, many of these measures have failings {\dots}}
@@ -585,8 +585,8 @@ @inproceedings{rendleBPRBayesianPersonalized2009
   author = {Rendle, Steffen and Freudenthaler, Christoph and Gantner, Zeno and {Schmidt-Thieme}, Lars},
   year = {2009},
   pages = {452--461},
-  publisher = {{AUAI Press}},
-  address = {{Arlington, Virginia, United States}},
+  publisher = {AUAI Press},
+  address = {Arlington, Virginia, United States},
   urldate = {2015-10-16},
   abstract = {Item recommendation is the task of predicting a personalized ranking on a set of items (e.g. websites, movies, products). In this paper, we investigate the most common scenario with implicit feedback (e.g. clicks, purchases). There are many methods for item recommendation from implicit feedback like matrix factorization (MF) or adaptive k-nearest-neighbor (kNN). Even though these methods are designed for the item prediction task of personalized ranking, none of them is directly optimized for ranking. In this paper we present a generic optimization criterion BPR-Opt for personalized ranking that is the maximum posterior estimator derived from a Bayesian analysis of the problem. We also provide a generic learning algorithm for optimizing models with respect to BPR-Opt. The learning method is based on stochastic gradient descent with bootstrap sampling. We show how to apply our method to two state-of-the-art recommender models: matrix factorization and adaptive kNN. Our experiments indicate that for the task of personalized ranking our optimization method outperforms the standard learning techniques for MF and kNN. The results show the importance of optimizing models for the right criterion.},
   keywords = {CAREER,Zotero Import (Mar 30),Zotero Import (Mar 30)/My Library,Zotero Import (Mar 30)/My Library/Recommender Systems,Zotero Import (Mar 30)/My Library/Recommender Systems/Class Spring 2017}
@@ -600,8 +600,8 @@ @inproceedings{resnickGroupLensOpenArchitecture1994
   year = {1994},
   series = {{{CSCW}} '94},
   pages = {175--186},
-  publisher = {{ACM}},
-  address = {{New York, NY, USA}},
+  publisher = {ACM},
+  address = {New York, NY, USA},
   doi = {10.1145/192844.192905},
   urldate = {2015-12-02},
   abstract = {Collaborative filters help people make choices based on the opinions of other people. GroupLens is a system for collaborative filtering of netnews, to help people find articles they will like in the huge stream of available articles. News reader clients display predicted scores and make it easy for users to rate articles after they read them. Rating servers, called Better Bit Bureaus, gather and disseminate the ratings. The rating servers predict scores based on the heuristic that people who agreed in the past will probably agree again. Users can protect their privacy by entering ratings under pseudonyms, without reducing the effectiveness of the score prediction. The entire architecture is open: alternative software for news clients and Better Bit Bureaus can be developed  independently and can interoperate with the components we have developed.},
@@ -616,8 +616,8 @@ @inproceedings{sarwarItembasedCollaborativeFiltering2001
   year = {2001},
   series = {{{WWW}} '01},
   pages = {285--295},
-  publisher = {{ACM}},
-  address = {{New York, NY, USA}},
+  publisher = {ACM},
+  address = {New York, NY, USA},
   doi = {10.1145/371920.372071},
   urldate = {2015-12-02},
   isbn = {978-1-58113-348-6},
@@ -646,8 +646,8 @@ @inproceedings{takacsAlternatingLeastSquares2012
   year = {2012},
   month = sep,
   pages = {83--90},
-  publisher = {{Association for Computing Machinery}},
-  address = {{New York, NY, USA}},
+  publisher = {Association for Computing Machinery},
+  address = {New York, NY, USA},
   doi = {10.1145/2365952.2365972},
   abstract = {Two flavors of the recommendation problem are the explicit and the implicit feedback settings. In the explicit feedback case, users rate items and the user-item preference relationship can be modelled on the basis of the ratings. In the harder but more common implicit feedback case, the system has to infer user preferences from indirect information: presence or absence of events, such as a user viewed an item. One approach for handling implicit feedback is to minimize a ranking objective function instead of the conventional prediction mean squared error. The naive minimization of a ranking objective function is typically expensive. This difficulty is usually overcome by a trade-off: sacrificing the accuracy to some extent for computational efficiency by sampling the objective function. In this paper, we present a computationally effective approach for the direct minimization of a ranking objective function, without sampling. We demonstrate by experiments on the Y!Music and Netflix data sets that the proposed method outperforms other implicit feedback recommenders in many cases in terms of the ErrorRate, ARP and Recall evaluation metrics.},
   keywords = {collaborative filtering alternating least squares ranking,Exemplars/Introduction,LensKit References}
@@ -660,8 +660,8 @@ @inproceedings{takacsApplicationsConjugateGradient2011
   year = {2011},
   series = {{{RecSys}} '11},
   pages = {297--300},
-  publisher = {{ACM}},
-  address = {{New York, NY, USA}},
+  publisher = {ACM},
+  address = {New York, NY, USA},
   doi = {10.1145/2043932.2043987},
   urldate = {2019-07-12},
   abstract = {The need for solving weighted ridge regression (WRR) problems arises in a number of collaborative filtering (CF) algorithms. Often, there is not enough time to calculate the exact solution of the WRR problem, or it is not required. The conjugate gradient (CG) method is a state-of-the-art approach for the approximate solution of WRR problems. In this paper, we investigate some applications of the CG method for new and existing implicit feedback CF models. We demonstrate through experiments on the Netflix dataset that CG can be an efficient tool for training implicit feedback CF models.},
@@ -676,8 +676,8 @@ @inproceedings{tammQualityMetricsRecommender2021
   year = {2021},
   month = sep,
   pages = {708--713},
-  publisher = {{Association for Computing Machinery}},
-  address = {{New York, NY, USA}},
+  publisher = {Association for Computing Machinery},
+  address = {New York, NY, USA},
   doi = {10.1145/3460231.3478848},
   urldate = {2021-10-04},
   abstract = {Offline evaluation is a popular approach to determine the best algorithm in terms of the chosen quality metric. However, if the chosen metric calculates something unexpected, this miscommunication can lead to poor decisions and wrong conclusions. In this paper, we thoroughly investigate quality metrics used for recommender systems evaluation. We look at the practical aspect of implementations found in modern RecSys libraries and at the theoretical aspect of definitions in academic papers. We find that Precision is the only metric universally understood among papers and libraries, while other metrics may have different interpretations. Metrics implemented in different libraries sometimes have the same name but measure different things, which leads to different results given the same input. When defining metrics in an academic paper, authors sometimes omit explicit formulations or give references that do not contain explanations either. In 47\% of cases, we cannot easily know how the metric is defined because the definition is not clear or absent. These findings highlight yet another difficulty in recommender system evaluation and call for a more detailed description of evaluation protocols.},
@@ -706,8 +706,8 @@ @inproceedings{yagciParallelizingSGDPairwise2017
   year = {2017},
   series = {{{RecSys}} '17},
   pages = {37--41},
-  publisher = {{ACM}},
-  address = {{New York, NY, USA}},
+  publisher = {ACM},
+  address = {New York, NY, USA},
   doi = {10.1145/3109859.3109906},
   urldate = {2019-05-17},
   abstract = {Learning to rank with pairwise loss functions has been found useful in collaborative filtering recommender systems. At web scale, the optimization is often based on matrix factorization with stochastic gradient descent (SGD) which has a sequential nature. We investigate two different shared memory lock-free parallel SGD schemes based on block partitioning and no partitioning for use with pairwise loss functions. To speed up convergence to a solution, we extrapolate simple practical algorithms from their application to pointwise learning to rank. Experimental results show that the proposed algorithms are quite useful regarding their ranking ability and speedup patterns in comparison to their sequential counterpart.},
@@ -736,8 +736,8 @@ @inproceedings{zhangConferConferenceRecommendation2016
   year = {2016},
   series = {{{CSCW}} '16 {{Companion}}},
   pages = {118--121},
-  publisher = {{ACM}},
-  address = {{New York, NY, USA}},
+  publisher = {ACM},
+  address = {New York, NY, USA},
   doi = {10.1145/2818052.2874340},
   isbn = {978-1-4503-3950-6},
   keywords = {collaborative filtering,conference planning,match-making,recommendation}
@@ -749,7 +749,7 @@ @inproceedings{zhouLargeScaleParallelCollaborative2008a
   author = {Zhou, Yunhong and Wilkinson, Dennis and Schreiber, Robert and Pan, Rong},
   year = {2008},
   pages = {337--348},
-  publisher = {{Springer Berlin Heidelberg}},
+  publisher = {Springer Berlin Heidelberg},
   doi = {10.1007/978-3-540-68880-8_32},
   abstract = {Many recommendation systems suggest items to users by utilizing the techniques of collaborative filtering (CF) based on historical records of items that the users have viewed, purchased, or rated. Two major problems that most CF approaches have to contend with are scalability and sparseness of the user profiles. To tackle these issues, in this paper, we describe a CF algorithm alternating-least-squares with weighted-{$\lambda$}-regularization (ALS-WR), which is implemented on a parallel Matlab platform. We show empirically that the performance of ALS-WR (in terms of root mean squared error (RMSE)) monotonically improves with both the number of features and the number of ALS iterations. We applied the ALS-WR algorithm on a large-scale CF problem, the Netflix Challenge, with 1000 hidden features and obtained a RMSE score of 0.8985, which is one of the best results based on a pure method. In addition, combining with the parallel version of other known methods, we achieved a performance improvement of 5.91\% over Netflix's own CineMatch recommendation system. Our method is simple and scales well to very large datasets.},
   keywords = {LensKit References}