RUCAIBox · 2017pxy · Aug 23, 2021 · Aug 20, 2021 · Aug 20, 2021 · Aug 21, 2021
diff --git a/docs/source/developer_guide/customize_metrics.rst b/docs/source/developer_guide/customize_metrics.rst
@@ -7,7 +7,7 @@ Here, it only takes three steps to incorporate a new metric and we introduce the
 
 
 Sign in Your Metric in Register
------------------------------
+--------------------------------
 To begin with, we must add a new line in :obj:`~recbole.evaluator.register.metric_information`:
 All the metrics are registered by :obj:`metric_information` which is a dict. Keys are the name of
 metrics and should be lowercase. Value is a list which contain one or multiple string that corresponding
@@ -47,7 +47,7 @@ and the total item number, we can sign in the metric as follow.
 
 
 Create a New Metric Class
------------------------
+--------------------------
 Then, we create a new class in the file :file:`~recbole.evaluator.metrics` and define the parameter in
 ``__init__()``
 
@@ -59,7 +59,7 @@ Then, we create a new class in the file :file:`~recbole.evaluator.metrics` and d
 
 
 Implement calculate_metric(self, dataobject)
-------------------------------
+---------------------------------------------
 All the computational process is defined in this function. The args is a packaged data object that
 contains all the result above. We can treat it as a dict and get data from it by
 ``rec_items = dataobject.get('rec.items')`` . The returned value should be a dict with key of metric name

diff --git a/docs/source/index.rst b/docs/source/index.rst
@@ -32,6 +32,7 @@ RecBole v0.2.0
    developer_guide/customize_trainers
    developer_guide/customize_dataloaders
    developer_guide/customize_samplers
+   developer_guide/customize_metrics
 
 
 .. toctree::

diff --git a/docs/source/user_guide/config_settings.rst b/docs/source/user_guide/config_settings.rst
@@ -34,6 +34,12 @@ model training and evaluation.
   Defaults to ``'saved/'``.
 - ``show_progress (bool)`` : Show the progress of training epoch and evaluate epoch.
   Defaults to ``True``.
+- ``save_dataset (bool)``: Whether or not save filtered dataset.
+  If True, save filtered dataset, otherwise it will not be saved.
+  Defaults to ``False``.
+- ``save_dataloaders (bool)``: Whether or not save split dataloaders.
+  If True, save split dataloaders, otherwise they will not be saved.
+  Defaults to ``False``.
 
 **Training Setting**
 

diff --git a/docs/source/user_guide/usage.rst b/docs/source/user_guide/usage.rst
@@ -11,4 +11,6 @@ Here we introduce how to use RecBole.
    usage/running_new_dataset
    usage/running_different_models
    usage/qa
-   usage/load_pretrained_embedding
+   usage/load_pretrained_embedding
+   usage/save_and_load_data_and_model
+   usage/case_study
diff --git a/docs/source/user_guide/usage/case_study.rst b/docs/source/user_guide/usage/case_study.rst
@@ -0,0 +1,81 @@
+Case study
+=============
+
+Case study is an in-depth study of the performance of a specific recommendation algorithm,
+which will analysis the recommendation result of some users.
+In RecBole, we implemented :meth:`~recbole.utils.case_study.full_sort_scores`
+and :meth:`~recbole.utils.case_study.full_sort_topk` for case study purpose.
+In this section, we will present a typical usage of these two functions.
+
+Reload model
+-------------
+
+First, we need to reload the recommendation model,
+we can use :meth:`~recbole.quick_start.quick_start.load_data_and_model` to load saved data and model.
+
+.. code:: python3
+
+    config, model, dataset, train_data, valid_data, test_data = load_data_and_model(
+        model_file='../saved/BPR-Aug-20-2021_03-32-13.pth',
+    )  # Here you can replace it by your model path.
+
+Convert external user id into internal user id
+-------------------------------------------------
+
+Then, we need to use :meth:`~recbole.data.dataset.dataset.Dataset.token2id`
+to convert external user id which we want to do case study into internal user id.
+
+.. code:: python3
+
+    uid_series = dataset.token2id(dataset.uid_field, ['196', '186'])
+
+Get scores of every user-item pairs
+-------------------------------------
+
+If we want to calculate the scores of every user-item pairs for given user,
+we can call :meth:`~recbole.utils.case_study.full_sort_scores` function to get the scores matrix.
+
+.. code:: python3
+
+    score = full_sort_scores(uid_series, model, test_data, device=config['device'])
+    print(score)  # score of all items
+    print(score[0, dataset.token2id(dataset.iid_field, ['242', '302'])])
+    # score of item ['242', '302'] for user '196'.
+
+The output will be like this:
+
+.. code:: none
+
+    tensor([[   -inf,    -inf,  0.1074,  ..., -0.0966, -0.1217, -0.0966],
+            [   -inf, -0.0013,    -inf,  ..., -0.1115, -0.1089, -0.1196]],
+           device='cuda:0')
+    tensor([  -inf, 0.1074], device='cuda:0')
+
+Note that the score of ``[pad]`` and history items (for non-repeatable recommendation) will be set into ``-inf``.
+
+Get the top ranked item for each user
+--------------------------------------
+
+If we want to get the top ranked item for given user,
+we can call :meth:`~recbole.utils.case_study.full_sort_topk` function to get the scores and internal ids of these items.
+
+.. code:: python3
+
+    topk_score, topk_iid_list = full_sort_topk(uid_series, model, test_data, k=10, device=config['device'])
+    print(topk_score)  # scores of top 10 items
+    print(topk_iid_list)  # internal id of top 10 items
+    external_item_list = dataset.id2token(dataset.iid_field, topk_iid_list.cpu())
+    print(external_item_list)  # external tokens of top 10 items
+
+The output will be like this:
+
+.. code:: none
+
+    tensor([[0.1985, 0.1947, 0.1850, 0.1849, 0.1822, 0.1770, 0.1770, 0.1765, 0.1752,
+             0.1744],
+            [0.2487, 0.2379, 0.2351, 0.2311, 0.2293, 0.2239, 0.2215, 0.2156, 0.2137,
+             0.2114]], device='cuda:0')
+    tensor([[ 50,  32, 158, 210,  13, 100, 201,  61, 167, 312],
+            [102, 312, 358, 100,  32,  53, 167, 472, 162, 201]], device='cuda:0')
+    [['100' '98' '258' '7' '222' '496' '318' '288' '216' '176']
+     ['174' '176' '50' '496' '98' '181' '216' '28' '172' '318']]
diff --git a/docs/source/user_guide/usage/save_and_load_data_and_model.rst b/docs/source/user_guide/usage/save_and_load_data_and_model.rst
@@ -0,0 +1,67 @@
+Save and load data and model
+==============================
+
+In this section, we will present how to save and load data and model.
+
+Save data and model
+--------------------
+
+When we use the :meth:`~recbole.quick_start.quick_start.run_recbole` function mentioned in :doc:`run_recbole`,
+it will save the best model parameters in training process and its corresponding config settings.
+If you want to save filtered dataset and split dataloaders,
+you can set parameter :attr:`save_dataset` and parameter :attr:`save_dataloaders` to ``True``
+to save filtered dataset and split dataloaders.
+
+You can refer to :doc:`../config_settings` for more details about :attr:`save_dataset` and :attr:`save_dataloaders`.
+
+Here we present a typical output when two parameters above is ``True``:
+
+.. code:: none
+
+    21 Aug 13:05    INFO  Saving filtered dataset into [saved/ml-100k-dataset.pth]
+    21 Aug 13:05    INFO  ml-100k
+    The number of users: 944
+    Average actions of users: 106.04453870625663
+    The number of items: 1683
+    Average actions of items: 59.45303210463734
+    The number of inters: 100000
+    The sparsity of the dataset: 93.70575143257098%
+    Remain Fields: ['user_id', 'item_id', 'rating', 'timestamp']
+    21 Aug 13:05    INFO  Saved split dataloaders: saved/ml-100k-for-BPR-dataloader.pth
+    21 Aug 13:06    INFO  BPR(
+        (user_embedding): Embedding(944, 64)
+        (item_embedding): Embedding(1683, 64)
+        (loss): BPRLoss()
+    )
+    Trainable parameters: 168128
+    Train     0: 100%|█████████████████████████| 40/40 [00:01<00:00, 32.52it/s, GPU RAM: 0.01 G/11.91 G]
+    21 Aug 13:06    INFO  epoch 0 training [time: 1.24s, train loss: 27.7228]
+    Evaluate   : 100%|███████████████████████| 472/472 [00:04<00:00, 94.53it/s, GPU RAM: 0.01 G/11.91 G]
+    21 Aug 13:06    INFO  epoch 0 evaluating [time: 5.00s, valid_score: 0.020500]
+    21 Aug 13:06    INFO  valid result:
+    recall@10 : 0.0067    mrr@10 : 0.0205    ndcg@10 : 0.0086    hit@10 : 0.0732    precision@10 : 0.0081
+    21 Aug 13:06    INFO  Saving current best: saved/BPR-Aug-21-2021_13-06-00.pth
+
+    ...
+
+As we can see, the filtered dataset is saved to ``saved/ml-100k-dataset.pth``,
+the split dataloaders are saved to ``saved/ml-100k-for-BPR-dataloader.pth``,
+and the model is saved to ``saved/BPR-Aug-21-2021_13-06-00.pth``.
+
+Load data and model
+--------------------
+
+If you want to reload the data and model,
+you can apply :meth:`~recbole.quick_start.quick_start.load_data_and_model` to get them.
+You can also pass :attr:`dataset_file` and :attr:`dataloader_file` to this function to reload data from file,
+which can reduce the time of data filtering and data splitting.
+
+Here we present a typical usage of :meth:`~recbole.quick_start.quick_start.load_data_and_model`:
+
+.. code:: python3
+
+    config, model, dataset, train_data, valid_data, test_data = load_data_and_model(
+        model_file='saved/BPR-Aug-21-2021_13-06-00.pth',
+    )
+    # Here you can replace it by your model path.
+    # And you can also pass 'dataset_file' and 'dataloader_file' to this function.
diff --git a/recbole/config/configurator.py b/recbole/config/configurator.py
@@ -113,7 +113,7 @@ def _convert_config_dict(self, config_dict):
                 continue
             try:
                 value = eval(param)
-                if not isinstance(value, (str, int, float, list, tuple, dict, bool, Enum)):
+                if value is not None and not isinstance(value, (str, int, float, list, tuple, dict, bool, Enum)):
                     value = param
             except (NameError, SyntaxError, TypeError):
                 if isinstance(param, str):

diff --git a/recbole/data/dataset/__init__.py b/recbole/data/dataset/__init__.py
@@ -1,6 +1,6 @@
 from recbole.data.dataset.dataset import Dataset
 from recbole.data.dataset.sequential_dataset import SequentialDataset
 from recbole.data.dataset.kg_dataset import KnowledgeBasedDataset
-from recbole.data.dataset.kg_seq_dataset import Kg_Seq_Dataset
+from recbole.data.dataset.kg_seq_dataset import KGSeqDataset
 from recbole.data.dataset.decisiontree_dataset import DecisionTreeDataset
 from recbole.data.dataset.customized_dataset import *
diff --git a/recbole/data/dataset/customized_dataset.py b/recbole/data/dataset/customized_dataset.py
@@ -19,19 +19,19 @@
 import numpy as np
 import torch
 
-from recbole.data.dataset import Kg_Seq_Dataset, SequentialDataset
+from recbole.data.dataset import KGSeqDataset, SequentialDataset
 from recbole.data.interaction import Interaction
 from recbole.sampler import SeqSampler
 from recbole.utils.enum_type import FeatureType
 
 
-class GRU4RecKGDataset(Kg_Seq_Dataset):
+class GRU4RecKGDataset(KGSeqDataset):
 
     def __init__(self, config):
         super().__init__(config)
 
 
-class KSRDataset(Kg_Seq_Dataset):
+class KSRDataset(KGSeqDataset):
 
     def __init__(self, config):
         super().__init__(config)

diff --git a/recbole/data/dataset/dataset.py b/recbole/data/dataset/dataset.py
@@ -1485,16 +1485,10 @@ def build(self):
 
         return datasets
 
-    def save(self, filepath):
-        """Saving this :class:`Dataset` object to local path.
-
-        Args:
-            filepath (str): path of saved dir.
+    def save(self):
+        """Saving this :class:`Dataset` object to :attr:`config['checkpoint_dir']`.
         """
-        if (filepath is None) or (not os.path.isdir(filepath)):
-            raise ValueError(f'Filepath [{filepath}] need to be a dir.')
-
-        file = os.path.join(filepath, f'{self.config["dataset"]}-dataset.pth')
+        file = os.path.join(self.config['checkpoint_dir'], f'{self.config["dataset"]}-dataset.pth')
         self.logger.info(set_color('Saving filtered dataset into ', 'pink') + f'[{file}]')
         with open(file, 'wb') as f:
             pickle.dump(self, f)

diff --git a/recbole/data/dataset/kg_dataset.py b/recbole/data/dataset/kg_dataset.py
@@ -148,9 +148,6 @@ def _build_feat_name_list(self):
             feat_name_list.append('kg_feat')
         return feat_name_list
 
-    def save(self, filepath):
-        raise NotImplementedError()
-
     def _load_kg(self, token, dataset_path):
         self.logger.debug(set_color(f'Loading kg from [{dataset_path}].', 'green'))
         kg_path = os.path.join(dataset_path, f'{token}.kg')

diff --git a/recbole/data/dataset/kg_seq_dataset.py b/recbole/data/dataset/kg_seq_dataset.py
@@ -10,7 +10,7 @@
 from recbole.data.dataset import SequentialDataset, KnowledgeBasedDataset
 
 
-class Kg_Seq_Dataset(SequentialDataset, KnowledgeBasedDataset):
+class KGSeqDataset(SequentialDataset, KnowledgeBasedDataset):
     """Containing both processing of Sequential Models and Knowledge-based Models.
 
     Inherit from :class:`~recbole.data.dataset.sequential_dataset.SequentialDataset` and

diff --git a/recbole/evaluator/metrics.py b/recbole/evaluator/metrics.py
@@ -8,7 +8,7 @@
 # @Author  :   Kaiyuan Li, Zhichao Feng, Xingyu Pan, Zihan Lin
 # @email   :   [email protected], [email protected], [email protected], [email protected]
 
-"""
+r"""
 recbole.evaluator.metrics
 ############################
 
@@ -227,7 +227,7 @@ class GAUC(AbstractMetric):
     the area under the ROC curve grouped by user. We weighted the index of each user :math:`u` by the number of positive
     samples of users to get the final result.
 
-    For further details, please refer to the `paper <https://dl.acm.org/doi/10.1145/3219819.3219823>`_
+    For further details, please refer to the `paper <https://dl.acm.org/doi/10.1145/3219819.3219823>`__
 
     Note:
         It calculates the AUC score of each user, and finally obtains GAUC by weighting the user AUC.
@@ -421,8 +421,8 @@ class ItemCoverage(AbstractMetric):
 
     .. _ItemCoverage: https://en.wikipedia.org/wiki/Coverage_(information_systems)
 
-    For further details, please refer to the `paper <https://dl.acm.org/doi/10.1145/1864708.1864761>`_
-    and `paper <https://link.springer.com/article/10.1007/s13042-017-0762-9>`_.
+    For further details, please refer to the `paper <https://dl.acm.org/doi/10.1145/1864708.1864761>`__
+    and `paper <https://link.springer.com/article/10.1007/s13042-017-0762-9>`__.
 
     .. math::
        \mathrm{Coverage@K}=\frac{\left| \bigcup_{u \in U} \hat{R}(u) \right|}{|I|}
@@ -462,8 +462,8 @@ def get_coverage(self, item_matrix, num_items):
 class AveragePopularity(AbstractMetric):
     r"""AveragePopularity computes the average popularity of recommended items.
 
-    For further details, please refer to the `paper <https://arxiv.org/abs/1205.6700>`_
-    and `paper <https://link.springer.com/article/10.1007/s13042-017-0762-9>`_.
+    For further details, please refer to the `paper <https://arxiv.org/abs/1205.6700>`__
+    and `paper <https://link.springer.com/article/10.1007/s13042-017-0762-9>`__.
 
     .. math::
         \mathrm{AveragePopularity@K}=\frac{1}{|U|} \sum_{u \in U } \frac{\sum_{i \in R_{u}} \phi(i)}{|R_{u}|}
@@ -530,8 +530,8 @@ class ShannonEntropy(AbstractMetric):
 
     .. _ShannonEntropy: https://en.wikipedia.org/wiki/Entropy_(information_theory)
 
-    For further details, please refer to the `paper <https://arxiv.org/abs/1205.6700>`_
-    and `paper <https://link.springer.com/article/10.1007/s13042-017-0762-9>`_
+    For further details, please refer to the `paper <https://arxiv.org/abs/1205.6700>`__
+    and `paper <https://link.springer.com/article/10.1007/s13042-017-0762-9>`__
 
     .. math::
         \mathrm {ShannonEntropy@K}=-\sum_{i=1}^{|I|} p(i) \log p(i)
@@ -582,7 +582,7 @@ class GiniIndex(AbstractMetric):
 
     .. _GiniIndex: https://en.wikipedia.org/wiki/Gini_coefficient
 
-    For further details, please refer to the `paper <https://dl.acm.org/doi/10.1145/3308560.3317303>`_.
+    For further details, please refer to the `paper <https://dl.acm.org/doi/10.1145/3308560.3317303>`__.
 
     .. math::
         \mathrm {GiniIndex@K}=\left(\frac{\sum_{i=1}^{|I|}(2 i-|I|-1) P{(i)}}{|I| \sum_{i=1}^{|I|} P{(i)}}\right)
@@ -633,7 +633,7 @@ class TailPercentage(AbstractMetric):
 
     .. _TailPercentage: https://en.wikipedia.org/wiki/Long_tail#Criticisms
 
-    For further details, please refer to the `paper <https://arxiv.org/pdf/2007.12329.pdf>`_.
+    For further details, please refer to the `paper <https://arxiv.org/pdf/2007.12329.pdf>`__.
 
     .. math::
         \mathrm {TailPercentage@K}=\frac{1}{|U|} \sum_{u \in U} \frac{\sum_{i \in R_{u}} {\delta(i \in T)}}{|R_{u}|}

diff --git a/recbole/properties/overall.yaml b/recbole/properties/overall.yaml
@@ -7,6 +7,8 @@ reproducibility: True
 data_path: 'dataset/'
 checkpoint_dir: 'saved'
 show_progress: True
+save_dataset: False
+save_dataloaders: False
 
 # training settings
 epochs: 300

diff --git a/recbole/quick_start/__init__.py b/recbole/quick_start/__init__.py
@@ -1 +1 @@
-from recbole.quick_start.quick_start import run_recbole, objective_function
+from recbole.quick_start.quick_start import run_recbole, objective_function, load_data_and_model
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		from recbole.quick_start.quick_start import run_recbole, objective_function
		from recbole.quick_start.quick_start import run_recbole, objective_function, load_data_and_model