diff --git a/.github/workflows/actions/run-tests/action.yml b/.github/workflows/actions/run-tests/action.yml index c32f093d71..86b791fd99 100644 --- a/.github/workflows/actions/run-tests/action.yml +++ b/.github/workflows/actions/run-tests/action.yml @@ -7,21 +7,27 @@ # - uses: actions/checkout@v2 # - uses: ./.github/workflows/actions/run-tests # with: +# tox-env: 'spark' # test-kind: 'unit' # test-marker: 'spark and notebooks' name: 'Run Python tests' description: 'Specify parameters to configure test subsets to run and collect test report for.' inputs: + tox-env: + description: "Name of the tox env. EX) cpu|gpu|spark + See tox.ini at root level for more info" + required: true + # Options are "cpu|gpu|spark|all" and any default tox env. + # For more info on default tox env, see https://tox.readthedocs.io/en/latest/config.html#tox-environments + default: 'all' test-kind: - description: - "The kinds of tests to run. EX) unit|integration|smoke + description: "The kinds of tests to run. EX) unit|integration|smoke This maps to those in the 'tests/' folder" required: true default: 'unit' test-marker: - description: - "Finer filter for selecting the tests to run with pytest markers. + description: "Finer filter for selecting the tests to run with pytest markers. See https://docs.pytest.org/en/6.2.x/example/markers.html" default: 'not gpu and not notebooks and not spark' outputs: @@ -44,12 +50,12 @@ runs: # '-e py' will use the default 'python' executable found in system path # for why using tox, see: https://tox.readthedocs.io/en/latest/index.html # tox will do: - # 1. build and install source distribution (sdist) - # 2. run static analysis on the code (not implemented yet) - # 3. run all of the specified test environment (i.e. run tests in different py-versions, etc) + # 1. create a virtual env + # 2. build and install source distribution (sdist) + # 3. run the specified tests # 4. show test reports run: | - tox -e py -- tests/${{ inputs.test-kind }} -m '${{ inputs.test-marker }}' + tox -ve ${{ inputs.tox-env }} -- tests/${{ inputs.test-kind }} -m '${{ inputs.test-marker }}' - name: Prepare Code Coverage Report id: rename-cov-report diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index 15ef7ba159..7f87ab1220 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -114,6 +114,7 @@ jobs: - name: Run ${{ matrix.test-kind }} tests ('${{ matrix.test-marker }}') uses: ./.github/workflows/actions/run-tests with: + tox-env: 'cpu' # run the cpu tests with the 'recommenders[dev,examples]' dependencies test-kind: ${{ matrix.test-kind }} test-marker: ${{ matrix.test-marker }} # Currently GitHub workflow cannot call an action from another action @@ -167,6 +168,7 @@ jobs: - name: Run ${{ matrix.test-kind }} tests ('${{ matrix.test-marker }}') uses: ./.github/workflows/actions/run-tests with: + tox-env: 'spark' # run the gpu tests with the 'recommenders[spark,examples,dev]' dependencies test-kind: ${{ matrix.test-kind }} test-marker: ${{ matrix.test-marker }} @@ -202,6 +204,7 @@ jobs: - name: Run ${{ matrix.test-kind }} tests ('${{ matrix.test-marker }}') uses: ./.github/workflows/actions/run-tests with: + tox-env: 'gpu' # run the gpu tests with the 'recommenders[gpu,examples,dev]' dependencies test-kind: ${{ matrix.test-kind }} test-marker: ${{ matrix.test-marker }} diff --git a/.github/workflows/pr-gate.yml b/.github/workflows/pr-gate.yml index 5f0320ba7a..16ea4111e7 100644 --- a/.github/workflows/pr-gate.yml +++ b/.github/workflows/pr-gate.yml @@ -93,6 +93,7 @@ jobs: - name: Run ${{ matrix.test-kind }} tests ('${{ matrix.test-marker }}') uses: ./.github/workflows/actions/run-tests with: + tox-env: 'cpu' # run the cpu tests with the 'recommenders[dev,examples]' dependencies test-kind: ${{ matrix.test-kind }} test-marker: ${{ matrix.test-marker }} # Currently GitHub workflow cannot call an action from another action @@ -138,6 +139,7 @@ jobs: - name: Run ${{ matrix.test-kind }} tests ('${{ matrix.test-marker }}') uses: ./.github/workflows/actions/run-tests with: + tox-env: 'spark' # run the spark tests with the 'recommenders[spark,examples,dev]' dependencies test-kind: ${{ matrix.test-kind }} test-marker: ${{ matrix.test-marker }} @@ -173,6 +175,7 @@ jobs: - name: Run ${{ matrix.test-kind }} tests ('${{ matrix.test-marker }}') uses: ./.github/workflows/actions/run-tests with: + tox-env: 'gpu' # run the gpu tests with the 'recommenders[gpu,examples,dev]' dependencies test-kind: ${{ matrix.test-kind }} test-marker: ${{ matrix.test-marker }} diff --git a/.gitignore b/.gitignore index 96c1146f0c..a163274a80 100644 --- a/.gitignore +++ b/.gitignore @@ -87,8 +87,8 @@ celerybeat-schedule *.sage.py # Environments -.env -.venv +.env* +.venv* env/ venv/ ENV/ diff --git a/docs/.readthedocs.yaml b/.readthedocs.yaml similarity index 50% rename from docs/.readthedocs.yaml rename to .readthedocs.yaml index 7fc97323f0..28d6dd565d 100644 --- a/docs/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -1,11 +1,19 @@ version: 2 -# Build from the docs/ directory with Sphinx -sphinx: - configuration: docs/source/conf.py +# Add necessary apt-get packages +build: + apt_packages: + - cmake # Explicitly set the version of Python and its requirements python: - version: 3.7 + version: "3.7" install: - - requirements: docs/requirements.txt \ No newline at end of file + - method: pip + path: . + extra_requirements: + - all + +# Build from the docs/ directory with Sphinx +sphinx: + configuration: docs/source/conf.py diff --git a/docs/requirements.txt b/docs/requirements.txt deleted file mode 100644 index 8d986c3fd7..0000000000 --- a/docs/requirements.txt +++ /dev/null @@ -1,39 +0,0 @@ -numpy>=1.14 -pandas>1.0.3,<2 -scipy>=1.0.0,<2 -tqdm>=4.31.1,<5 -matplotlib>=2.2.2,<4 -scikit-learn>=0.22.1,<1 -numba>=0.38.1,<1 -lightfm>=1.15,<2 -lightgbm>=2.2.1,<3 -memory_profiler>=0.54.0,<1 -nltk>=3.4,<4 -pydocumentdb>=2.3.3<3 -pymanopt>=0.2.5,<1 -seaborn>=0.8.1,<1 -transformers>=2.5.0,<5 -bottleneck>=1.2.1,<2 -category_encoders>=1.3.0,<2 -jinja2>=2,<3 -pyyaml>=5.4.1,<6 -requests>=2.0.0,<3 -cornac>=1.1.2,<2 -scikit-surprise>=0.19.1,<=1.1.1 -retrying>=1.3.3 -azure.mgmt.cosmosdb>=0.8.0,<1 -hyperopt>=0.1.2,<1 -ipykernel>=4.6.1,<5 -jupyter>=1,<2 -locust>=1,<2 -papermill>=2.1.2,<3 -scrapbook>=0.5.0,<1.0.0 -nvidia-ml-py3>=7.352.0 -tensorflow-gpu>=1.15.0,<2 -torch==1.2.0 -fastai>=1.0.46,<2 -databricks_cli>=0.8.6,<1 -pyarrow>=0.8.0,<1.0.0 -pyspark>=2.4.5,<3.0.0 -cmake>=3.18.4.post1 -xlearn==0.40a1 diff --git a/recommenders/evaluation/python_evaluation.py b/recommenders/evaluation/python_evaluation.py index f639930bad..a04fff5450 100644 --- a/recommenders/evaluation/python_evaluation.py +++ b/recommenders/evaluation/python_evaluation.py @@ -20,6 +20,8 @@ DEFAULT_PREDICTION_COL, DEFAULT_RELEVANCE_COL, DEFAULT_SIMILARITY_COL, + DEFAULT_ITEM_FEATURES_COL, + DEFAULT_ITEM_SIM_MEASURE, DEFAULT_K, DEFAULT_THRESHOLD, ) @@ -696,46 +698,131 @@ def get_top_k_items( } # diversity metrics -class PythonDiversityEvaluation: - """Python Diversity Evaluator""" +def check_column_dtypes_diversity_serendipity(func): + """Checks columns of DataFrame inputs + + This includes the checks on: + + * whether the input columns exist in the input DataFrames + * whether the data types of col_user as well as col_item are matched in the two input DataFrames. + * whether reco_df contains any user_item pairs that are already shown in train_df + * check relevance column in reco_df + * check column names in item_feature_df - def __init__( - self, + Args: + func (function): function that will be wrapped + + Returns: + function: Wrapper function for checking dtypes. + """ + + @wraps(func) + def check_column_dtypes_diversity_serendipity_wrapper( train_df, reco_df, + item_feature_df=None, + item_sim_measure=DEFAULT_ITEM_SIM_MEASURE, + col_item_features=DEFAULT_ITEM_FEATURES_COL, col_user=DEFAULT_USER_COL, col_item=DEFAULT_ITEM_COL, + col_sim=DEFAULT_SIMILARITY_COL, col_relevance=None, + *args, + **kwargs ): - """Initializer. + """Check columns of DataFrame inputs + + Args: + train_df (pandas.DataFrame): Data set with historical data for users and items they + have interacted with; contains col_user, col_item. Assumed to not contain any duplicate rows. + reco_df (pandas.DataFrame): Recommender's prediction output, containing col_user, col_item, + col_relevance (optional). Assumed to not contain any duplicate user-item pairs. + item_feature_df (pandas.DataFrame): (Optional) It is required only when item_sim_measure='item_feature_vector'. It contains two columns: col_item and features (a feature vector). + item_sim_measure (str): (Optional) This column indicates which item similarity measure to be used. Available measures include item_cooccurrence_count (default choice) and item_feature_vector. + col_item_features (str): item feature column name. + col_user (str): User id column name. + col_item (str): Item id column name. + col_relevance (str): This column indicates whether the recommended item is actually + relevant to the user or not. + """ + + if not has_columns(train_df, [col_user, col_item]): + raise ValueError("Missing columns in train_df DataFrame") + if not has_columns(reco_df, [col_user, col_item]): + raise ValueError("Missing columns in reco_df DataFrame") + if not has_same_base_dtype(train_df, reco_df, columns=[col_user, col_item]): + raise ValueError("Columns in provided DataFrames are not the same datatype") + if col_relevance is None: + col_relevance = DEFAULT_RELEVANCE_COL + # relevance term, default is 1 (relevant) for all + reco_df = reco_df[[col_user, col_item]] + reco_df[col_relevance] = 1.0 + else: + col_relevance = col_relevance + reco_df = reco_df[[col_user, col_item, col_relevance]].astype( + {col_relevance: np.float16} + ) + if item_sim_measure == "item_feature_vector": + required_columns = [col_item, col_item_features] + if item_feature_df is not None: + if not has_columns(item_feature_df, required_columns): + raise ValueError("Missing columns in item_feature_df DataFrame") + else: + raise Exception( + "item_feature_df not specified! item_feature_df must be provided if choosing to use item_feature_vector to calculate item similarity. item_feature_df should have columns:" + + str(required_columns) + ) + # check if reco_df contains any user_item pairs that are already shown in train_df + count_intersection = pd.merge( + train_df, reco_df, how="inner", on=[col_user, col_item] + ).shape[0] + if count_intersection != 0: + raise Exception( + "reco_df should not contain any user_item pairs that are already shown in train_df" + ) - This is the Python version of diversity metrics evaluator. - The methods of this class calculate the following diversity metrics: + return func( + train_df=train_df, + reco_df=reco_df, + item_feature_df=item_feature_df, + item_sim_measure=item_sim_measure, + col_user=col_user, + col_item=col_item, + col_sim=col_sim, + col_relevance=col_relevance, + *args, + **kwargs + ) - * Coverage - it includes two metrics: - 1. catalog_coverage, which measures the proportion of items that get recommended from the item catalog; - 2. distributional_coverage, which measures how unequally different items are recommended in the - recommendations to all users. + return check_column_dtypes_diversity_serendipity_wrapper - * Novelty - A more novel item indicates it is less popular, i.e. it gets recommended less frequently. - * Diversity - The dissimilarity of items being recommended. - * Serendipity - The "unusualness" or "surprise" of recommendations to a user. When 'col_relevance' is used, it indicates how "pleasant surprise" of recommendations is to a user. - The metric definitions/formulations are based on the following references with modification: +def check_column_dtypes_novelty_coverage(func): + """Checks columns of DataFrame inputs - :Citation: + This includes the checks on: - G. Shani and A. Gunawardana, Evaluating Recommendation Systems, - Recommender Systems Handbook pp. 257-297, 2010. + * whether the input columns exist in the input DataFrames + * whether the data types of col_user as well as col_item are matched in the two input DataFrames. + * whether reco_df contains any user_item pairs that are already shown in train_df - Y.C. Zhang, D.Ó. Séaghdha, D. Quercia and T. Jambor, Auralist: introducing - serendipity into music recommendation, WSDM 2012 + Args: + func (function): function that will be wrapped - P. Castells, S. Vargas, and J. Wang, Novelty and diversity metrics for recommender systems: - choice, discovery and relevance, ECIR 2011 + Returns: + function: Wrapper function for checking dtypes. + """ - Eugene Yan, Serendipity: Accuracy’s unpopular best friend in Recommender Systems, - eugeneyan.com, April 2020 + @wraps(func) + def check_column_dtypes_novelty_coverage_wrapper( + train_df, + reco_df, + col_user=DEFAULT_USER_COL, + col_item=DEFAULT_ITEM_COL, + *args, + **kwargs + ): + """Check columns of DataFrame inputs Args: train_df (pandas.DataFrame): Data set with historical data for users and items they @@ -745,390 +832,580 @@ def __init__( col_relevance (optional). Assumed to not contain any duplicate user-item pairs. col_user (str): User id column name. col_item (str): Item id column name. - col_relevance (str): This column indicates whether the recommended item is actually - relevant to the user or not. + """ - self.train_df = train_df[[col_user, col_item]] - self.col_user = col_user - self.col_item = col_item - self.sim_col = DEFAULT_SIMILARITY_COL - self.df_cosine_similarity = None - self.df_user_item_serendipity = None - self.df_user_serendipity = None - self.avg_serendipity = None - self.df_item_novelty = None - self.avg_novelty = None - self.df_intralist_similarity = None - self.df_user_diversity = None - self.avg_diversity = None + if not has_columns(train_df, [col_user, col_item]): + raise ValueError("Missing columns in train_df DataFrame") + if not has_columns(reco_df, [col_user, col_item]): + raise ValueError("Missing columns in reco_df DataFrame") + if not has_same_base_dtype(train_df, reco_df, columns=[col_user, col_item]): + raise ValueError("Columns in provided DataFrames are not the same datatype") - if col_relevance is None: - self.col_relevance = DEFAULT_RELEVANCE_COL - # relevance term, default is 1 (relevant) for all - self.reco_df = reco_df[[col_user, col_item]] - self.reco_df[self.col_relevance] = 1.0 - else: - self.col_relevance = col_relevance - self.reco_df = reco_df[[col_user, col_item, col_relevance]].astype( - {col_relevance: np.float16} - ) - # check if reco_df contains any user_item pairs that are already shown in train_df count_intersection = pd.merge( - self.train_df, self.reco_df, how="inner", on=[self.col_user, self.col_item] + train_df, reco_df, how="inner", on=[col_user, col_item] ).shape[0] if count_intersection != 0: raise Exception( "reco_df should not contain any user_item pairs that are already shown in train_df" ) - def _get_pairwise_items(self, df): - """Get pairwise combinations of items per user (ignoring duplicate pairs [1,2] == [2,1])""" - df_user_i1 = df[[self.col_user, self.col_item]] - df_user_i1.columns = [self.col_user, "i1"] + return func( + train_df=train_df, + reco_df=reco_df, + col_user=col_user, + col_item=col_item, + *args, + **kwargs + ) + + return check_column_dtypes_novelty_coverage_wrapper - df_user_i2 = df[[self.col_user, self.col_item]] - df_user_i2.columns = [self.col_user, "i2"] - df_user_i1_i2 = pd.merge( - df_user_i1, df_user_i2, how="inner", on=[self.col_user] +@lru_cache_df(maxsize=1) +def _get_pairwise_items( + df, + col_user=DEFAULT_USER_COL, + col_item=DEFAULT_ITEM_COL, +): + """Get pairwise combinations of items per user (ignoring duplicate pairs [1,2] == [2,1])""" + df_user_i1 = df[[col_user, col_item]] + df_user_i1.columns = [col_user, "i1"] + + df_user_i2 = df[[col_user, col_item]] + df_user_i2.columns = [col_user, "i2"] + + df_user_i1_i2 = pd.merge(df_user_i1, df_user_i2, how="inner", on=[col_user]) + + df_pairwise_items = df_user_i1_i2[(df_user_i1_i2["i1"] <= df_user_i1_i2["i2"])][ + [col_user, "i1", "i2"] + ].reset_index(drop=True) + return df_pairwise_items + + +@lru_cache_df(maxsize=1) +def _get_cosine_similarity( + train_df, + item_feature_df=None, + item_sim_measure=DEFAULT_ITEM_SIM_MEASURE, + col_item_features=DEFAULT_ITEM_FEATURES_COL, + col_user=DEFAULT_USER_COL, + col_item=DEFAULT_ITEM_COL, + col_sim=DEFAULT_SIMILARITY_COL, +): + + if item_sim_measure == "item_cooccurrence_count": + # calculate item-item similarity based on item co-occurrence count + df_cosine_similarity = _get_cooccurrence_similarity( + train_df, col_user, col_item, col_sim ) + elif item_sim_measure == "item_feature_vector": + # calculdf_cosine_similarity = ate item-item similarity based on item feature vectors + df_cosine_similarity = _get_item_feature_similarity( + item_feature_df, col_item_features, col_user, col_item + ) + else: + raise Exception( + "item_sim_measure not recognized! The available options include 'item_cooccurrence_count' and 'item_feature_vector'." + ) + return df_cosine_similarity - df_pairwise_items = df_user_i1_i2[(df_user_i1_i2["i1"] <= df_user_i1_i2["i2"])][ - [self.col_user, "i1", "i2"] - ].reset_index(drop=True) - return df_pairwise_items - def _get_cosine_similarity(self, n_partitions=200): - """Cosine similarity metric from +@lru_cache_df(maxsize=1) +def _get_cooccurrence_similarity( + train_df, + col_user=DEFAULT_USER_COL, + col_item=DEFAULT_ITEM_COL, + col_sim=DEFAULT_SIMILARITY_COL, +): + """Cosine similarity metric from - :Citation: + :Citation: - Y.C. Zhang, D.Ó. Séaghdha, D. Quercia and T. Jambor, Auralist: - introducing serendipity into music recommendation, WSDM 2012 + Y.C. Zhang, D.Ó. Séaghdha, D. Quercia and T. Jambor, Auralist: + introducing serendipity into music recommendation, WSDM 2012 - The item indexes in the result are such that i1 <= i2. - """ - if self.df_cosine_similarity is None: - pairs = self._get_pairwise_items(df=self.train_df) - pairs_count = pd.DataFrame( - {"count": pairs.groupby(["i1", "i2"]).size()} - ).reset_index() - item_count = pd.DataFrame( - {"count": self.train_df.groupby([self.col_item]).size()} - ).reset_index() - item_count["item_sqrt_count"] = item_count["count"] ** 0.5 - item_co_occur = pairs_count.merge( - item_count[[self.col_item, "item_sqrt_count"]], - left_on=["i1"], - right_on=[self.col_item], - ).drop(columns=[self.col_item]) - - item_co_occur.columns = ["i1", "i2", "count", "i1_sqrt_count"] - - item_co_occur = item_co_occur.merge( - item_count[[self.col_item, "item_sqrt_count"]], - left_on=["i2"], - right_on=[self.col_item], - ).drop(columns=[self.col_item]) - item_co_occur.columns = [ - "i1", - "i2", - "count", - "i1_sqrt_count", - "i2_sqrt_count", - ] - - item_co_occur[self.sim_col] = item_co_occur["count"] / ( - item_co_occur["i1_sqrt_count"] * item_co_occur["i2_sqrt_count"] - ) - self.df_cosine_similarity = ( - item_co_occur[["i1", "i2", self.sim_col]] - .sort_values(["i1", "i2"]) - .reset_index(drop=True) - ) + The item indexes in the result are such that i1 <= i2. + """ + pairs = _get_pairwise_items(train_df, col_user, col_item) + pairs_count = pd.DataFrame( + {"count": pairs.groupby(["i1", "i2"]).size()} + ).reset_index() + item_count = pd.DataFrame( + {"count": train_df.groupby([col_item]).size()} + ).reset_index() + item_count["item_sqrt_count"] = item_count["count"] ** 0.5 + item_co_occur = pairs_count.merge( + item_count[[col_item, "item_sqrt_count"]], + left_on=["i1"], + right_on=[col_item], + ).drop(columns=[col_item]) + + item_co_occur.columns = ["i1", "i2", "count", "i1_sqrt_count"] + + item_co_occur = item_co_occur.merge( + item_count[[col_item, "item_sqrt_count"]], + left_on=["i2"], + right_on=[col_item], + ).drop(columns=[col_item]) + item_co_occur.columns = [ + "i1", + "i2", + "count", + "i1_sqrt_count", + "i2_sqrt_count", + ] - return self.df_cosine_similarity + item_co_occur[col_sim] = item_co_occur["count"] / ( + item_co_occur["i1_sqrt_count"] * item_co_occur["i2_sqrt_count"] + ) + df_cosine_similarity = ( + item_co_occur[["i1", "i2", col_sim]] + .sort_values(["i1", "i2"]) + .reset_index(drop=True) + ) - # Diversity metrics - def _get_intralist_similarity(self, df): - """Intra-list similarity from + return df_cosine_similarity - :Citation: - "Improving Recommendation Lists Through Topic Diversification", - Ziegler, McNee, Konstan and Lausen, 2005. - """ - if self.df_intralist_similarity is None: - pairs = self._get_pairwise_items(df=df) - similarity_df = self._get_cosine_similarity() - # Fillna(0) is needed in the cases where similarity_df does not have an entry for a pair of items. - # e.g. i1 and i2 have never occurred together. - - item_pair_sim = pairs.merge(similarity_df, on=["i1", "i2"], how="left") - item_pair_sim[self.sim_col].fillna(0, inplace=True) - item_pair_sim = item_pair_sim.loc[ - item_pair_sim["i1"] != item_pair_sim["i2"] - ].reset_index(drop=True) - self.df_intralist_similarity = ( - item_pair_sim.groupby([self.col_user]) - .agg({self.sim_col: "mean"}) - .reset_index() - ) - self.df_intralist_similarity.columns = [self.col_user, "avg_il_sim"] +@lru_cache_df(maxsize=1) +def _get_item_feature_similarity( + item_feature_df, + col_item_features=DEFAULT_ITEM_FEATURES_COL, + col_user=DEFAULT_USER_COL, + col_item=DEFAULT_ITEM_COL, + col_sim=DEFAULT_SIMILARITY_COL, +): + """Cosine similarity metric based on item feature vectors - return self.df_intralist_similarity + The item indexes in the result are such that i1 <= i2. + """ + df1 = item_feature_df[[col_item, col_item_features]] + df1.columns = ["i1", "f1"] + df1["key"] = 0 + df2 = item_feature_df[[col_item, col_item_features]] + df2.columns = ["i2", "f2"] + df2["key"] = 0 + + df = pd.merge(df1, df2, on="key", how="outer").drop("key", axis=1) + df_item_feature_pair = df[(df["i1"] <= df["i2"])].reset_index(drop=True) + + df_item_feature_pair[col_sim] = df_item_feature_pair.apply( + lambda x: float(x.f1.dot(x.f2)) + / float(np.linalg.norm(x.f1, 2) * np.linalg.norm(x.f2, 2)), + axis=1, + ) - def user_diversity(self): - """Calculate average diversity of recommendations for each user. - The metric definition is based on formula (3) in the following reference: + df_cosine_similarity = df_item_feature_pair[["i1", "i2", col_sim]].sort_values( + ["i1", "i2"] + ) - :Citation: + return df_cosine_similarity - Y.C. Zhang, D.Ó. Séaghdha, D. Quercia and T. Jambor, Auralist: - introducing serendipity into music recommendation, WSDM 2012 - Returns: - pandas.DataFrame: A dataframe with the following columns: col_user, user_diversity. - """ - if self.df_user_diversity is None: - self.df_intralist_similarity = self._get_intralist_similarity(self.reco_df) - self.df_user_diversity = self.df_intralist_similarity - self.df_user_diversity["user_diversity"] = ( - 1 - self.df_user_diversity["avg_il_sim"] - ) - self.df_user_diversity = ( - self.df_user_diversity[[self.col_user, "user_diversity"]] - .sort_values(self.col_user) - .reset_index(drop=True) - ) +# Diversity metrics +@lru_cache_df(maxsize=1) +def _get_intralist_similarity( + train_df, + reco_df, + item_feature_df=None, + item_sim_measure=DEFAULT_ITEM_SIM_MEASURE, + col_item_features=DEFAULT_ITEM_FEATURES_COL, + col_user=DEFAULT_USER_COL, + col_item=DEFAULT_ITEM_COL, + col_sim=DEFAULT_SIMILARITY_COL, +): + """Intra-list similarity from - return self.df_user_diversity + :Citation: - def diversity(self): - """Calculate average diversity of recommendations across all users. + "Improving Recommendation Lists Through Topic Diversification", + Ziegler, McNee, Konstan and Lausen, 2005. + """ + pairs = _get_pairwise_items(reco_df, col_user, col_item) + similarity_df = _get_cosine_similarity( + train_df, + item_feature_df, + item_sim_measure, + col_item_features, + col_user, + col_item, + col_sim, + ) + # Fillna(0) is needed in the cases where similarity_df does not have an entry for a pair of items. + # e.g. i1 and i2 have never occurred together. + + item_pair_sim = pairs.merge(similarity_df, on=["i1", "i2"], how="left") + item_pair_sim[col_sim].fillna(0, inplace=True) + item_pair_sim = item_pair_sim.loc[ + item_pair_sim["i1"] != item_pair_sim["i2"] + ].reset_index(drop=True) + df_intralist_similarity = ( + item_pair_sim.groupby([col_user]).agg({col_sim: "mean"}).reset_index() + ) + df_intralist_similarity.columns = [col_user, "avg_il_sim"] - Returns: - float: diversity. - """ - if self.avg_diversity is None: - self.df_user_diversity = self.user_diversity() - self.avg_diversity = self.df_user_diversity.agg({"user_diversity": "mean"})[ - 0 - ] - return self.avg_diversity - - # Novelty metrics - def historical_item_novelty(self): - """Calculate novelty for each item. Novelty is computed as the minus logarithm of - (number of interactions with item / total number of interactions). The definition of the metric - is based on the following reference using the choice model (eqs. 1 and 6): - - :Citation: - - P. Castells, S. Vargas, and J. Wang, Novelty and diversity metrics for recommender systems: - choice, discovery and relevance, ECIR 2011 - - The novelty of an item can be defined relative to a set of observed events on the set of all items. - These can be events of user choice (item "is picked" by a random user) or user discovery - (item "is known" to a random user). The above definition of novelty reflects a factor of item popularity. - High novelty values correspond to long-tail items in the density function, that few users have interacted - with and low novelty values correspond to popular head items. - - Returns: - pandas.DataFrame: A dataframe with the following columns: col_item, item_novelty. - """ - if self.df_item_novelty is None: - n_records = self.train_df.shape[0] - item_count = pd.DataFrame( - {"count": self.train_df.groupby([self.col_item]).size()} - ).reset_index() - item_count["item_novelty"] = -np.log2(item_count["count"] / n_records) - self.df_item_novelty = ( - item_count[[self.col_item, "item_novelty"]] - .sort_values(self.col_item) - .reset_index(drop=True) - ) + return df_intralist_similarity - return self.df_item_novelty - def novelty(self): - """Calculate the average novelty in a list of recommended items (this assumes that the recommendation list - is already computed). Follows section 5 from +@check_column_dtypes_diversity_serendipity +@lru_cache_df(maxsize=1) +def user_diversity( + train_df, + reco_df, + item_feature_df=None, + item_sim_measure=DEFAULT_ITEM_SIM_MEASURE, + col_item_features=DEFAULT_ITEM_FEATURES_COL, + col_user=DEFAULT_USER_COL, + col_item=DEFAULT_ITEM_COL, + col_sim=DEFAULT_SIMILARITY_COL, + col_relevance=None, +): + """Calculate average diversity of recommendations for each user. + The metric definition is based on formula (3) in the following reference: - :Citation: + :Citation: - P. Castells, S. Vargas, and J. Wang, Novelty and diversity metrics for recommender systems: - choice, discovery and relevance, ECIR 2011 + Y.C. Zhang, D.Ó. Séaghdha, D. Quercia and T. Jambor, Auralist: + introducing serendipity into music recommendation, WSDM 2012 - Returns: - float: novelty. - """ - if self.avg_novelty is None: - self.df_item_novelty = self.historical_item_novelty() - n_recommendations = self.reco_df.shape[0] - reco_item_count = pd.DataFrame( - {"count": self.reco_df.groupby([self.col_item]).size()} - ).reset_index() - reco_item_novelty = reco_item_count.merge( - self.df_item_novelty, on=self.col_item - ) - reco_item_novelty["product"] = ( - reco_item_novelty["count"] * reco_item_novelty["item_novelty"] - ) - self.avg_novelty = ( - reco_item_novelty.agg({"product": "sum"})[0] / n_recommendations - ) + Returns: + pandas.DataFrame: A dataframe with the following columns: col_user, user_diversity. + """ + + df_intralist_similarity = _get_intralist_similarity( + train_df, + reco_df, + item_feature_df, + item_sim_measure, + col_item_features, + col_user, + col_item, + col_sim, + ) + df_user_diversity = df_intralist_similarity + df_user_diversity["user_diversity"] = 1 - df_user_diversity["avg_il_sim"] + df_user_diversity = ( + df_user_diversity[[col_user, "user_diversity"]] + .sort_values(col_user) + .reset_index(drop=True) + ) - return self.avg_novelty + return df_user_diversity - # Serendipity metrics - def user_item_serendipity(self): - """Calculate serendipity of each item in the recommendations for each user. - The metric definition is based on the following references: - :Citation: +@check_column_dtypes_diversity_serendipity +def diversity( + train_df, + reco_df, + item_feature_df=None, + item_sim_measure=DEFAULT_ITEM_SIM_MEASURE, + col_item_features=DEFAULT_ITEM_FEATURES_COL, + col_user=DEFAULT_USER_COL, + col_item=DEFAULT_ITEM_COL, + col_sim=DEFAULT_SIMILARITY_COL, + col_relevance=None, +): + """Calculate average diversity of recommendations across all users. + + Returns: + float: diversity. + """ + df_user_diversity = user_diversity( + train_df, + reco_df, + item_feature_df, + item_sim_measure, + col_item_features, + col_user, + col_item, + col_sim, + ) + avg_diversity = df_user_diversity.agg({"user_diversity": "mean"})[0] + return avg_diversity - Y.C. Zhang, D.Ó. Séaghdha, D. Quercia and T. Jambor, Auralist: - introducing serendipity into music recommendation, WSDM 2012 - Eugene Yan, Serendipity: Accuracy’s unpopular best friend in Recommender Systems, - eugeneyan.com, April 2020 +# Novelty metrics +@check_column_dtypes_novelty_coverage +@lru_cache_df(maxsize=1) +def historical_item_novelty( + train_df, + reco_df, + col_user=DEFAULT_USER_COL, + col_item=DEFAULT_ITEM_COL, +): + """Calculate novelty for each item. Novelty is computed as the minus logarithm of + (number of interactions with item / total number of interactions). The definition of the metric + is based on the following reference using the choice model (eqs. 1 and 6): - Returns: - pandas.DataFrame: A dataframe with columns: col_user, col_item, user_item_serendipity. - """ - # for every col_user, col_item in reco_df, join all interacted items from train_df. - # These interacted items are repeated for each item in reco_df for a specific user. - if self.df_user_item_serendipity is None: - self.df_cosine_similarity = self._get_cosine_similarity() - reco_user_item = self.reco_df[[self.col_user, self.col_item]] - reco_user_item["reco_item_tmp"] = reco_user_item[self.col_item] - - train_user_item = self.train_df[[self.col_user, self.col_item]] - train_user_item.columns = [self.col_user, "train_item_tmp"] - - reco_train_user_item = reco_user_item.merge( - train_user_item, on=[self.col_user] - ) - reco_train_user_item["i1"] = reco_train_user_item[ - ["reco_item_tmp", "train_item_tmp"] - ].min(axis=1) - reco_train_user_item["i2"] = reco_train_user_item[ - ["reco_item_tmp", "train_item_tmp"] - ].max(axis=1) - - reco_train_user_item_sim = reco_train_user_item.merge( - self.df_cosine_similarity, on=["i1", "i2"], how="left" - ) - reco_train_user_item_sim[self.sim_col].fillna(0, inplace=True) + :Citation: - reco_user_item_avg_sim = ( - reco_train_user_item_sim.groupby([self.col_user, self.col_item]) - .agg({self.sim_col: "mean"}) - .reset_index() - ) - reco_user_item_avg_sim.columns = [ - self.col_user, - self.col_item, - "avg_item2interactedHistory_sim", - ] - - self.df_user_item_serendipity = reco_user_item_avg_sim.merge( - self.reco_df, on=[self.col_user, self.col_item] - ) - self.df_user_item_serendipity["user_item_serendipity"] = ( - 1 - self.df_user_item_serendipity["avg_item2interactedHistory_sim"] - ) * self.df_user_item_serendipity[self.col_relevance] - self.df_user_item_serendipity = ( - self.df_user_item_serendipity[ - [self.col_user, self.col_item, "user_item_serendipity"] - ] - .sort_values([self.col_user, self.col_item]) - .reset_index(drop=True) - ) + P. Castells, S. Vargas, and J. Wang, Novelty and diversity metrics for recommender systems: + choice, discovery and relevance, ECIR 2011 - return self.df_user_item_serendipity + The novelty of an item can be defined relative to a set of observed events on the set of all items. + These can be events of user choice (item "is picked" by a random user) or user discovery + (item "is known" to a random user). The above definition of novelty reflects a factor of item popularity. + High novelty values correspond to long-tail items in the density function, that few users have interacted + with and low novelty values correspond to popular head items. - def user_serendipity(self): - """Calculate average serendipity for each user's recommendations. + Returns: + pandas.DataFrame: A dataframe with the following columns: col_item, item_novelty. + """ - Returns: - pandas.DataFrame: A dataframe with following columns: col_user, user_serendipity. - """ - if self.df_user_serendipity is None: - self.df_user_item_serendipity = self.user_item_serendipity() - self.df_user_serendipity = ( - self.df_user_item_serendipity.groupby(self.col_user) - .agg({"user_item_serendipity": "mean"}) - .reset_index() - ) - self.df_user_serendipity.columns = [self.col_user, "user_serendipity"] - self.df_user_serendipity = self.df_user_serendipity.sort_values( - self.col_user - ).reset_index(drop=True) + n_records = train_df.shape[0] + item_count = pd.DataFrame( + {"count": train_df.groupby([col_item]).size()} + ).reset_index() + item_count["item_novelty"] = -np.log2(item_count["count"] / n_records) + df_item_novelty = ( + item_count[[col_item, "item_novelty"]] + .sort_values(col_item) + .reset_index(drop=True) + ) - return self.df_user_serendipity + return df_item_novelty - def serendipity(self): - """Calculate average serendipity for recommendations across all users. - Returns: - float: serendipity. - """ - if self.avg_serendipity is None: - self.df_user_serendipity = self.user_serendipity() - self.avg_serendipity = self.df_user_serendipity.agg( - {"user_serendipity": "mean"} - )[0] - return self.avg_serendipity +@check_column_dtypes_novelty_coverage +def novelty(train_df, reco_df, col_user=DEFAULT_USER_COL, col_item=DEFAULT_ITEM_COL): + """Calculate the average novelty in a list of recommended items (this assumes that the recommendation list + is already computed). Follows section 5 from - # Coverage metrics - def catalog_coverage(self): - """Calculate catalog coverage for recommendations across all users. - The metric definition is based on the "catalog coverage" definition in the following reference: + :Citation: - :Citation: + P. Castells, S. Vargas, and J. Wang, Novelty and diversity metrics for recommender systems: + choice, discovery and relevance, ECIR 2011 - G. Shani and A. Gunawardana, Evaluating Recommendation Systems, - Recommender Systems Handbook pp. 257-297, 2010. + Returns: + float: novelty. + """ - Returns: - float: catalog coverage - """ - # distinct item count in reco_df - count_distinct_item_reco = self.reco_df[self.col_item].nunique() - # distinct item count in train_df - count_distinct_item_train = self.train_df[self.col_item].nunique() + df_item_novelty = historical_item_novelty(train_df, reco_df, col_user, col_item) + n_recommendations = reco_df.shape[0] + reco_item_count = pd.DataFrame( + {"count": reco_df.groupby([col_item]).size()} + ).reset_index() + reco_item_novelty = reco_item_count.merge(df_item_novelty, on=col_item) + reco_item_novelty["product"] = ( + reco_item_novelty["count"] * reco_item_novelty["item_novelty"] + ) + avg_novelty = reco_item_novelty.agg({"product": "sum"})[0] / n_recommendations - # catalog coverage - c_coverage = count_distinct_item_reco / count_distinct_item_train - return c_coverage + return avg_novelty - def distributional_coverage(self): - """Calculate distributional coverage for recommendations across all users. - The metric definition is based on formula (21) in the following reference: - :Citation: +# Serendipity metrics +@check_column_dtypes_diversity_serendipity +@lru_cache_df(maxsize=1) +def user_item_serendipity( + train_df, + reco_df, + item_feature_df=None, + item_sim_measure=DEFAULT_ITEM_SIM_MEASURE, + col_item_features=DEFAULT_ITEM_FEATURES_COL, + col_user=DEFAULT_USER_COL, + col_item=DEFAULT_ITEM_COL, + col_sim=DEFAULT_SIMILARITY_COL, + col_relevance=None, +): + """Calculate serendipity of each item in the recommendations for each user. + The metric definition is based on the following references: - G. Shani and A. Gunawardana, Evaluating Recommendation Systems, - Recommender Systems Handbook pp. 257-297, 2010. + :Citation: - Returns: - float: distributional coverage - """ - # In reco_df, how many times each col_item is being recommended - df_itemcnt_reco = pd.DataFrame( - {"count": self.reco_df.groupby([self.col_item]).size()} - ).reset_index() + Y.C. Zhang, D.Ó. Séaghdha, D. Quercia and T. Jambor, Auralist: + introducing serendipity into music recommendation, WSDM 2012 + + Eugene Yan, Serendipity: Accuracy’s unpopular best friend in Recommender Systems, + eugeneyan.com, April 2020 + + Returns: + pandas.DataFrame: A dataframe with columns: col_user, col_item, user_item_serendipity. + """ + # for every col_user, col_item in reco_df, join all interacted items from train_df. + # These interacted items are repeated for each item in reco_df for a specific user. + df_cosine_similarity = _get_cosine_similarity( + train_df, + item_feature_df, + item_sim_measure, + col_item_features, + col_user, + col_item, + col_sim, + ) + reco_user_item = reco_df[[col_user, col_item]] + reco_user_item["reco_item_tmp"] = reco_user_item[col_item] + + train_user_item = train_df[[col_user, col_item]] + train_user_item.columns = [col_user, "train_item_tmp"] + + reco_train_user_item = reco_user_item.merge(train_user_item, on=[col_user]) + reco_train_user_item["i1"] = reco_train_user_item[ + ["reco_item_tmp", "train_item_tmp"] + ].min(axis=1) + reco_train_user_item["i2"] = reco_train_user_item[ + ["reco_item_tmp", "train_item_tmp"] + ].max(axis=1) + + reco_train_user_item_sim = reco_train_user_item.merge( + df_cosine_similarity, on=["i1", "i2"], how="left" + ) + reco_train_user_item_sim[col_sim].fillna(0, inplace=True) + + reco_user_item_avg_sim = ( + reco_train_user_item_sim.groupby([col_user, col_item]) + .agg({col_sim: "mean"}) + .reset_index() + ) + reco_user_item_avg_sim.columns = [ + col_user, + col_item, + "avg_item2interactedHistory_sim", + ] + + df_user_item_serendipity = reco_user_item_avg_sim.merge( + reco_df, on=[col_user, col_item] + ) + df_user_item_serendipity["user_item_serendipity"] = ( + 1 - df_user_item_serendipity["avg_item2interactedHistory_sim"] + ) * df_user_item_serendipity[col_relevance] + df_user_item_serendipity = ( + df_user_item_serendipity[[col_user, col_item, "user_item_serendipity"]] + .sort_values([col_user, col_item]) + .reset_index(drop=True) + ) + + return df_user_item_serendipity + + +@lru_cache_df(maxsize=1) +@check_column_dtypes_diversity_serendipity +def user_serendipity( + train_df, + reco_df, + item_feature_df=None, + item_sim_measure=DEFAULT_ITEM_SIM_MEASURE, + col_item_features=DEFAULT_ITEM_FEATURES_COL, + col_user=DEFAULT_USER_COL, + col_item=DEFAULT_ITEM_COL, + col_sim=DEFAULT_SIMILARITY_COL, + col_relevance=None, +): + """Calculate average serendipity for each user's recommendations. + + Returns: + pandas.DataFrame: A dataframe with following columns: col_user, user_serendipity. + """ + df_user_item_serendipity = user_item_serendipity( + train_df, + reco_df, + item_feature_df, + item_sim_measure, + col_item_features, + col_user, + col_item, + col_sim, + col_relevance, + ) + df_user_serendipity = ( + df_user_item_serendipity.groupby(col_user) + .agg({"user_item_serendipity": "mean"}) + .reset_index() + ) + df_user_serendipity.columns = [col_user, "user_serendipity"] + df_user_serendipity = df_user_serendipity.sort_values(col_user).reset_index( + drop=True + ) + + return df_user_serendipity + + +@check_column_dtypes_diversity_serendipity +def serendipity( + train_df, + reco_df, + item_feature_df=None, + item_sim_measure=DEFAULT_ITEM_SIM_MEASURE, + col_item_features=DEFAULT_ITEM_FEATURES_COL, + col_user=DEFAULT_USER_COL, + col_item=DEFAULT_ITEM_COL, + col_sim=DEFAULT_SIMILARITY_COL, + col_relevance=None, +): + """Calculate average serendipity for recommendations across all users. + + Returns: + float: serendipity. + """ + df_user_serendipity = user_serendipity( + train_df, + reco_df, + item_feature_df, + item_sim_measure, + col_item_features, + col_user, + col_item, + col_sim, + col_relevance, + ) + avg_serendipity = df_user_serendipity.agg({"user_serendipity": "mean"})[0] + return avg_serendipity + + +# Coverage metrics +@check_column_dtypes_novelty_coverage +def catalog_coverage( + train_df, reco_df, col_user=DEFAULT_USER_COL, col_item=DEFAULT_ITEM_COL +): + """Calculate catalog coverage for recommendations across all users. + The metric definition is based on the "catalog coverage" definition in the following reference: + + :Citation: + + G. Shani and A. Gunawardana, Evaluating Recommendation Systems, + Recommender Systems Handbook pp. 257-297, 2010. + + Returns: + float: catalog coverage + """ + # distinct item count in reco_df + count_distinct_item_reco = reco_df[col_item].nunique() + # distinct item count in train_df + count_distinct_item_train = train_df[col_item].nunique() + + # catalog coverage + c_coverage = count_distinct_item_reco / count_distinct_item_train + return c_coverage + + +@check_column_dtypes_novelty_coverage +def distributional_coverage( + train_df, reco_df, col_user=DEFAULT_USER_COL, col_item=DEFAULT_ITEM_COL +): + """Calculate distributional coverage for recommendations across all users. + The metric definition is based on formula (21) in the following reference: + + :Citation: + + G. Shani and A. Gunawardana, Evaluating Recommendation Systems, + Recommender Systems Handbook pp. 257-297, 2010. + + Returns: + float: distributional coverage + """ + # In reco_df, how many times each col_item is being recommended + df_itemcnt_reco = pd.DataFrame( + {"count": reco_df.groupby([col_item]).size()} + ).reset_index() - # the number of total recommendations - count_row_reco = self.reco_df.shape[0] + # the number of total recommendations + count_row_reco = reco_df.shape[0] - df_entropy = df_itemcnt_reco - df_entropy["p(i)"] = df_entropy["count"] / count_row_reco - df_entropy["entropy(i)"] = df_entropy["p(i)"] * np.log2(df_entropy["p(i)"]) + df_entropy = df_itemcnt_reco + df_entropy["p(i)"] = df_entropy["count"] / count_row_reco + df_entropy["entropy(i)"] = df_entropy["p(i)"] * np.log2(df_entropy["p(i)"]) - d_coverage = -df_entropy.agg({"entropy(i)": "sum"})[0] + d_coverage = -df_entropy.agg({"entropy(i)": "sum"})[0] - return d_coverage + return d_coverage diff --git a/setup.py b/setup.py index 54122f29e2..0aba6982be 100644 --- a/setup.py +++ b/setup.py @@ -30,7 +30,7 @@ "scikit-learn>=0.22.1,<1", "numba>=0.38.1,<1", "lightfm>=1.15,<2", - "lightgbm>=2.2.1,<3", + "lightgbm>=2.2.1", "memory_profiler>=0.54.0,<1", "nltk>=3.4,<4", "pydocumentdb>=2.3.3<3", # TODO: replace with azure-cosmos diff --git a/tests/README.md b/tests/README.md index e69c7a56b5..e6a5cab670 100644 --- a/tests/README.md +++ b/tests/README.md @@ -157,6 +157,28 @@ Example: +### Test execution with tox + +[Tox](https://tox.readthedocs.io/en/latest/) is a great tool for both virtual environment management and test execution. Tox acts like a front-end for our CI workflows. Our existing [CI pipelines](https://github.com/microsoft/recommenders/actions) in GitHub is leveraging it to orchestrate the build. This way we can provide a **parity** in both local and remote execution environments if both run tox. Run tox and no more **"tests run fine in my dev box but fail in the remote build"**! + +1. If you haven't, `pip install tox` +2. To run static analysis: `tox -e flake8` +3. To run any of our test suites: + `tox -e {TOX_ENV} -- {PYTEST_PARAM}` + + where + - `TOX_ENV` can be `cpu|gpu|spark|all`, each env maps to the "extra" dependency, for example recommenders[gpu], and recommenders[spark]. It can also be any of the [default envs](https://tox.readthedocs.io/en/latest/config.html#tox-environments): `py|pyNM` + - `PYTEST_PARAM` are any standard parameters to supply to `pytest` cli. + + For example: + + 1. `tox -e cpu -- tests/unit -m "not notebook and not spark and not gpu` (runs the unit tests with `recommenders[dev,example]` dependencies) + 2. `tox -e gpu -- tests/unit -m "gpu and notebook"` (runs the gpu notebook tests with `recommenders[dev,example,gpu]` dependencies) + 3. `tox -e spark -- tests/unit -m "spark and notebook"` (runs the spark notebook tests with `recommenders[dev,example,spark]` dependencies) + 4. `tox -e all -- tests/unit` (to run all of the unit tests with `recommenders[all]` dependencies) + 5. `tox -e py -- tests/unit` (runs the unit tests under the default python interpreter with `recommenders[all]`) + 6. `tox -e py37 -- tests/unit` (runs the unit tests under Python3.7 with `recommenders[all]` ) + ## How to create tests on notebooks with Papermill In the notebooks of this repo, we use [Papermill](https://github.com/nteract/papermill) in unit, smoke and integration tests. Papermill is a tool that enables you to parameterize notebooks, execute and collect metrics across the notebooks, and summarize collections of notebooks. diff --git a/tests/unit/recommenders/evaluation/test_python_evaluation.py b/tests/unit/recommenders/evaluation/test_python_evaluation.py index 8586131f15..b25da4aec8 100644 --- a/tests/unit/recommenders/evaluation/test_python_evaluation.py +++ b/tests/unit/recommenders/evaluation/test_python_evaluation.py @@ -28,7 +28,15 @@ map_at_k, auc, logloss, - PythonDiversityEvaluation, + user_diversity, + diversity, + historical_item_novelty, + novelty, + user_item_serendipity, + user_serendipity, + serendipity, + catalog_coverage, + distributional_coverage, ) TOL = 0.0001 @@ -392,6 +400,11 @@ def target_metrics(): "user_diversity": pd.DataFrame( dict(UserId=[1, 2, 3], user_diversity=[0.29289, 1.0, 0.0]) ), + # diversity values when using item features to calculate item similarity + "diversity_item_feature_vector": pytest.approx(0.5000, TOL), + "user_diversity_item_feature_vector": pd.DataFrame( + dict(UserId=[1, 2, 3], user_diversity=[0.5000, 0.5000, 0.5000]) + ), "user_item_serendipity": pd.DataFrame( dict( UserId=[1, 1, 2, 2, 3, 3], @@ -410,6 +423,25 @@ def target_metrics(): dict(UserId=[1, 2, 3], user_serendipity=[0.363915, 0.53455, 0.403775]) ), "serendipity": pytest.approx(0.43408, TOL), + # serendipity values when using item features to calculate item similarity + "user_item_serendipity_item_feature_vector": pd.DataFrame( + dict( + UserId=[1, 1, 2, 2, 3, 3], + ItemId=[3, 5, 2, 5, 1, 2], + user_item_serendipity=[ + 0.5000, + 0.0, + 0.75, + 0.5000, + 0.6667, + 0.0, + ], + ) + ), + "user_serendipity_item_feature_vector": pd.DataFrame( + dict(UserId=[1, 2, 3], user_serendipity=[0.2500, 0.625, 0.3333]) + ), + "serendipity_item_feature_vector": pytest.approx(0.4028, TOL), } @@ -426,69 +458,88 @@ def python_diversity_data(): "Relevance": [1, 0, 1, 1, 1, 0], } ) - return train_df, reco_df + + item_feature_df = pd.DataFrame( + { + "ItemId": [1, 2, 3, 4, 5], + "features": [ + np.array([0.0, 1.0, 1.0, 0.0, 0.0], dtype=float), + np.array([0.0, 1.0, 0.0, 1.0, 0.0], dtype=float), + np.array([0.0, 0.0, 1.0, 1.0, 0.0], dtype=float), + np.array([0.0, 0.0, 1.0, 0.0, 1.0], dtype=float), + np.array([0.0, 0.0, 0.0, 1.0, 1.0], dtype=float), + ], + } + ) + return train_df, reco_df, item_feature_df def test_catalog_coverage(python_diversity_data, target_metrics): - train_df, reco_df = python_diversity_data - evaluator = PythonDiversityEvaluation( + train_df, reco_df, _ = python_diversity_data + c_coverage = catalog_coverage( train_df=train_df, reco_df=reco_df, col_user="UserId", col_item="ItemId" ) - c_coverage = evaluator.catalog_coverage() assert c_coverage == target_metrics["c_coverage"] def test_distributional_coverage(python_diversity_data, target_metrics): - train_df, reco_df = python_diversity_data - evaluator = PythonDiversityEvaluation( + train_df, reco_df, _ = python_diversity_data + d_coverage = distributional_coverage( train_df=train_df, reco_df=reco_df, col_user="UserId", col_item="ItemId" ) - d_coverage = evaluator.distributional_coverage() assert d_coverage == target_metrics["d_coverage"] def test_item_novelty(python_diversity_data, target_metrics): - train_df, reco_df = python_diversity_data - evaluator = PythonDiversityEvaluation( + train_df, reco_df, _ = python_diversity_data + actual = historical_item_novelty( train_df=train_df, reco_df=reco_df, col_user="UserId", col_item="ItemId" ) - actual = evaluator.historical_item_novelty() assert_frame_equal( target_metrics["item_novelty"], actual, check_exact=False, check_less_precise=4 ) assert np.all(actual["item_novelty"].values >= 0) # Test that novelty is zero when data includes only one item train_df_new = train_df.loc[train_df["ItemId"] == 3] - evaluator = PythonDiversityEvaluation( + actual = historical_item_novelty( train_df=train_df_new, reco_df=reco_df, col_user="UserId", col_item="ItemId" ) - actual = evaluator.historical_item_novelty() assert actual["item_novelty"].values[0] == 0 def test_novelty(python_diversity_data, target_metrics): - train_df, reco_df = python_diversity_data - evaluator = PythonDiversityEvaluation( + train_df, reco_df, _ = python_diversity_data + actual = novelty( train_df=train_df, reco_df=reco_df, col_user="UserId", col_item="ItemId" ) - novelty = evaluator.novelty() - assert target_metrics["novelty"] == novelty - assert novelty >= 0 + assert target_metrics["novelty"] == actual + assert actual >= 0 # Test that novelty is zero when data includes only one item train_df_new = train_df.loc[train_df["ItemId"] == 3] reco_df_new = reco_df.loc[reco_df["ItemId"] == 3] - evaluator = PythonDiversityEvaluation( - train_df=train_df_new, reco_df=reco_df_new, col_user="UserId", col_item="ItemId" + assert ( + novelty( + train_df=train_df_new, + reco_df=reco_df_new, + col_user="UserId", + col_item="ItemId", + ) + == 0 ) - assert evaluator.novelty() == 0 def test_user_diversity(python_diversity_data, target_metrics): - train_df, reco_df = python_diversity_data - evaluator = PythonDiversityEvaluation( - train_df=train_df, reco_df=reco_df, col_user="UserId", col_item="ItemId" + train_df, reco_df, _ = python_diversity_data + actual = user_diversity( + train_df=train_df, + reco_df=reco_df, + item_feature_df=None, + item_sim_measure="item_cooccurrence_count", + col_user="UserId", + col_item="ItemId", + col_sim="sim", + col_relevance=None, ) - actual = evaluator.user_diversity() assert_frame_equal( target_metrics["user_diversity"], actual, @@ -498,23 +549,31 @@ def test_user_diversity(python_diversity_data, target_metrics): def test_diversity(python_diversity_data, target_metrics): - train_df, reco_df = python_diversity_data - evaluator = PythonDiversityEvaluation( - train_df=train_df, reco_df=reco_df, col_user="UserId", col_item="ItemId" + train_df, reco_df, _ = python_diversity_data + assert target_metrics["diversity"] == diversity( + train_df=train_df, + reco_df=reco_df, + item_feature_df=None, + item_sim_measure="item_cooccurrence_count", + col_user="UserId", + col_item="ItemId", + col_sim="sim", + col_relevance=None, ) - assert target_metrics["diversity"] == evaluator.diversity() def test_user_item_serendipity(python_diversity_data, target_metrics): - train_df, reco_df = python_diversity_data - evaluator = PythonDiversityEvaluation( + train_df, reco_df, _ = python_diversity_data + actual = user_item_serendipity( train_df=train_df, reco_df=reco_df, + item_feature_df=None, + item_sim_measure="item_cooccurrence_count", col_user="UserId", col_item="ItemId", + col_sim="sim", col_relevance="Relevance", ) - actual = evaluator.user_item_serendipity() assert_frame_equal( target_metrics["user_item_serendipity"], actual, @@ -524,15 +583,17 @@ def test_user_item_serendipity(python_diversity_data, target_metrics): def test_user_serendipity(python_diversity_data, target_metrics): - train_df, reco_df = python_diversity_data - evaluator = PythonDiversityEvaluation( + train_df, reco_df, _ = python_diversity_data + actual = user_serendipity( train_df=train_df, reco_df=reco_df, + item_feature_df=None, + item_sim_measure="item_cooccurrence_count", col_user="UserId", col_item="ItemId", + col_sim="sim", col_relevance="Relevance", ) - actual = evaluator.user_serendipity() assert_frame_equal( target_metrics["user_serendipity"], actual, @@ -542,12 +603,104 @@ def test_user_serendipity(python_diversity_data, target_metrics): def test_serendipity(python_diversity_data, target_metrics): - train_df, reco_df = python_diversity_data - evaluator = PythonDiversityEvaluation( + train_df, reco_df, _ = python_diversity_data + assert target_metrics["serendipity"] == serendipity( + train_df=train_df, + reco_df=reco_df, + item_feature_df=None, + item_sim_measure="item_cooccurrence_count", + col_user="UserId", + col_item="ItemId", + col_sim="sim", + col_relevance="Relevance", + ) + + +def test_user_diversity_item_feature_vector(python_diversity_data, target_metrics): + train_df, reco_df, item_feature_df = python_diversity_data + actual = user_diversity( + train_df=train_df, + reco_df=reco_df, + item_feature_df=item_feature_df, + item_sim_measure="item_feature_vector", + col_user="UserId", + col_item="ItemId", + col_sim="sim", + col_relevance=None, + ) + assert_frame_equal( + target_metrics["user_diversity_item_feature_vector"], + actual, + check_exact=False, + check_less_precise=4, + ) + + +def test_diversity_item_feature_vector(python_diversity_data, target_metrics): + train_df, reco_df, item_feature_df = python_diversity_data + assert target_metrics["diversity_item_feature_vector"] == diversity( + train_df=train_df, + reco_df=reco_df, + item_feature_df=item_feature_df, + item_sim_measure="item_feature_vector", + col_user="UserId", + col_item="ItemId", + col_sim="sim", + col_relevance=None, + ) + + +def test_user_item_serendipity_item_feature_vector( + python_diversity_data, target_metrics +): + train_df, reco_df, item_feature_df = python_diversity_data + actual = user_item_serendipity( + train_df=train_df, + reco_df=reco_df, + item_feature_df=item_feature_df, + item_sim_measure="item_feature_vector", + col_user="UserId", + col_item="ItemId", + col_sim="sim", + col_relevance="Relevance", + ) + assert_frame_equal( + target_metrics["user_item_serendipity_item_feature_vector"], + actual, + check_exact=False, + check_less_precise=4, + ) + + +def test_user_serendipity_item_feature_vector(python_diversity_data, target_metrics): + train_df, reco_df, item_feature_df = python_diversity_data + actual = user_serendipity( + train_df=train_df, + reco_df=reco_df, + item_feature_df=item_feature_df, + item_sim_measure="item_feature_vector", + col_user="UserId", + col_item="ItemId", + col_sim="sim", + col_relevance="Relevance", + ) + assert_frame_equal( + target_metrics["user_serendipity_item_feature_vector"], + actual, + check_exact=False, + check_less_precise=4, + ) + + +def test_serendipity_item_feature_vector(python_diversity_data, target_metrics): + train_df, reco_df, item_feature_df = python_diversity_data + assert target_metrics["serendipity_item_feature_vector"] == serendipity( train_df=train_df, reco_df=reco_df, + item_feature_df=item_feature_df, + item_sim_measure="item_feature_vector", col_user="UserId", col_item="ItemId", + col_sim="sim", col_relevance="Relevance", ) - assert target_metrics["serendipity"] == evaluator.serendipity() diff --git a/tox.ini b/tox.ini index 4fd2246c20..815e06dc14 100644 --- a/tox.ini +++ b/tox.ini @@ -2,13 +2,11 @@ # py will use whatever the basepython `python` maps to from PATH # you can use py38, for example, to chosse a different version # See https://tox.readthedocs.io/en/latest/config.html#tox-environments -envlist = py +envlist = py, cpu, gpu, spark, all +# Default env settings [testenv] -# Reading additional dependencies to run the test -# https://tox.readthedocs.io/en/latest/example/basic.html#depending-on-requirements-txt-or-defining-constraints -; deps = -rrequirements-dev.txt # similar to 'pip install recommenders-*.whl[test]' extras = all commands = @@ -20,6 +18,29 @@ commands = # See https://tox.readthedocs.io/en/latest/example/general.html for more details pytest {posargs} +[testenv:cpu] +# i.e: 'pip install recommenders-*.whl[dev]' +# with this dependency subset, we should be able to run the test markers: +# 1. "not notebooks and not spark and not gpu" (tests for general sdk utilities) +# 2. "notebooks and not spark and not gpu" (tests for notebook example without extra dependencies) +extras = dev,examples + +[testenv:gpu] +# with this dependency subset, we should be able to run the test markers: +# 1. "gpu and not notebook and not spark" (tests for gpu utilities) +# 2. "gpu and notebooks and not spark" (tests for notebooks needing gpu resources) +extras = dev,gpu,examples + +[testenv:spark] +# with this dependency subset, we should be able to run the test markers: +# 1. "spark and not notebook and not spark" (test for spark utilities) +# 2. "spark and notebooks and not spark" (tests for notebook using spark) +extras = dev,spark,examples + +[testenv:all] +# i.e: 'pip install recommenders-*.whl[all]' +# with this, we should be able to run ANY tests +extras = all [testenv:flake8] deps = flake8 @@ -54,10 +75,11 @@ addopts = --cov-append --cov=recommenders --cov-report=term-missing --cov-report=xml --junitxml=junit/test-results.xml +[coverage:report] +skip_empty = true + + [flake8] -; # Configs for flake8-import-order, see https://pypi.org/project/flake8-import-order/ for more info. -; import-order-style=edited -; application-import-names=recommenders, tests # Native flake8 configs max-line-length = 140 exclude =