RUCAIBox · dipakmeher · Dec 2, 2023 · Dec 2, 2023 · Dec 2, 2023 · Dec 11, 2023
diff --git a/.run.slurm.swp b/.run.slurm.swp
diff --git a/config_files/original_config.yaml b/config_files/original_config.yaml
@@ -0,0 +1,44 @@
+seed: 2022
+field_separator: "\t"
+source_domain:
+  dataset: "/home/dmeher/datasets/AmazonBooks/AmazonBooks"
+  USER_ID_FIELD: user_id
+  ITEM_ID_FIELD: item_id
+  RATING_FIELD: rating
+  TIME_FIELD: timestamp
+  NEG_PREFIX: neg_
+  LABEL_FIELD: rating
+  load_col:
+    inter: [user_id, item_id, rating]
+  user_inter_num_interval: "[10,inf)"
+  item_inter_num_interval: "[10,inf)"
+  val_interval:
+    rating: "[3,inf)"
+  drop_filter_field: True
+
+target_domain:
+  dataset: "/home/dmeher/datasets/AmazonMov/AmazonMov"
+  USER_ID_FIELD: user_id
+  ITEM_ID_FIELD: item_id
+  RATING_FIELD: rating
+  TIME_FIELD: timestamp
+  NEG_PREFIX: neg_
+  LABEL_FIELD: rating
+  load_col:
+    inter: [user_id, item_id, rating]
+  user_inter_num_interval: "[10,inf)"
+  item_inter_num_interval: "[10,inf)"
+  val_interval:
+    rating: "[3,inf)"
+  drop_filter_field: True
+
+epochs: 5
+train_batch_size: 4096
+eval_batch_size: 409600
+valid_metric: RMSE
+embedding_size: 64
+eval_setting: TO_LS,full
+metrics: ['RMSE', 'MAE']
+eval_args:
+        mode: labeled
+        split_valid: {'LS':'valid_and_test'}
diff --git a/config_files/ranking_config.yaml b/config_files/ranking_config.yaml
@@ -0,0 +1,61 @@
+seed: 2022
+fiield_separator: "\t"
+source_domain:
+  dataset: "/home/dmeher/datasets/AmazonBooks/AmazonBooks"
+  USER_ID_FIELD: user_id
+  ITEM_ID_FIELD: item_id
+  RATING_FIELD: rating
+  TIME_FIELD: timestamp
+  NEG_PREFIX: neg_
+  LABEL_FIELD: label
+  load_col:
+    inter: [user_id, item_id, rating]
+  user_inter_num_interval: "[10,inf)"
+  item_inter_num_interval: "[10,inf)"
+  val_interval:
+    rating: "[3,inf)"
+  drop_filter_field: True
+
+target_domain:
+  dataset: "/home/dmeher/datasets/AmazonMov/AmazonMov"
+  USER_ID_FIELD: user_id
+  ITEM_ID_FIELD: item_id
+  RATING_FIELD: rating
+  TIME_FIELD: timestamp
+  NEG_PREFIX: neg_
+  LABEL_FIELD: label
+  load_col:
+    inter: [user_id, item_id, rating]
+  user_inter_num_interval: "[10,inf)"
+  item_inter_num_interval: "[10,inf)"
+  val_interval:
+    rating: "[3,inf)"
+  drop_filter_field: True
+
+epochs: 1 
+train_batch_size: 4096
+eval_batch_size: 409600
+valid_metric: NDCG@10
+embedding_size: 64
+
+eval_args:
+        split: {'LS':'valid_and_test'}
+        mode: 'uni100'
+        #mode: 'full'
+
+
+
+#epochs: 1
+#train_batch_size: 4096
+#eval_batch_size: 409600
+#valid_metric: NDCG@10
+#embedding_size: 64
+#metrics: ["Recall","MRR","NDCG","Hit","Precision"]  # (list or str) Evaluation metrics.
+#topk: [10]                      # (list or int or None) The value of k for topk evaluation metrics.
+#valid_metric: MRR@10            # (str) The evaluation metric for early stopping. 
+#valid_metric_bigger: True 
+#metric_decimal_place: 4
+#eval_args:
+#  split: {'LS':'valid_and_test'}
+#  group_by: user
+#  mode: full
diff --git a/config_files/rating_config.yaml b/config_files/rating_config.yaml
@@ -0,0 +1,44 @@
+iseed: 2022
+field_separator: "\t"
+source_domain:
+  dataset: "/home/dmeher/datasets/AmazonBooks/AmazonBooks"
+  USER_ID_FIELD: user_id
+  ITEM_ID_FIELD: item_id
+  RATING_FIELD: rating
+  TIME_FIELD: timestamp
+  NEG_PREFIX: neg_
+  LABEL_FIELD: rating
+  load_col:
+    inter: [user_id, item_id, rating]
+  user_inter_num_interval: "[10,inf)"
+  item_inter_num_interval: "[10,inf)"
+  val_interval:
+    rating: "[3,inf)"
+  drop_filter_field: True
+
+target_domain:
+  dataset: "/home/dmeher/datasets/AmazonMov/AmazonMov"
+  USER_ID_FIELD: user_id
+  ITEM_ID_FIELD: item_id
+  RATING_FIELD: rating
+  TIME_FIELD: timestamp
+  NEG_PREFIX: neg_
+  LABEL_FIELD: rating
+  load_col:
+    inter: [user_id, item_id, rating]
+  user_inter_num_interval: "[10,inf)"
+  item_inter_num_interval: "[10,inf)"
+  val_interval:
+    rating: "[3,inf)"
+    drop_filter_field: True
+
+epochs: 5
+train_batch_size: 4096
+eval_batch_size: 409600
+valid_metric: RMSE
+embedding_size: 64
+metrics: ['RMSE', 'MAE']
+eval_setting: TO_LS,full
+drop_filter_field: True
+eval_args:
+        mode: labeled
diff --git a/recbole_cdr/data/dataset.py b/recbole_cdr/data/dataset.py
@@ -248,6 +248,72 @@ def get_history_matrix(self, user_num, item_num, row, value_field=None):
 
         return torch.LongTensor(history_matrix), torch.FloatTensor(history_value), torch.LongTensor(history_len)
 
+    def _split_index_by_leave_one_out(self, grouped_index, leave_one_num):
+        """Split indexes by strategy leave one out.
+
+        Args:
+            grouped_index (list of list of int): Index to be split.
+            leave_one_num (int): Number of parts whose length is expected to be ``1``.
+
+        Returns:
+            list: List of index that has been split.
+        """
+        next_index = [[] for _ in range(leave_one_num + 1)]
+        for index in grouped_index:
+            index = list(index)
+            tot_cnt = len(index)
+            legal_leave_one_num = min(leave_one_num, tot_cnt - 1)
+            pr = tot_cnt - legal_leave_one_num
+            next_index[0].extend(index[:pr])
+            for i in range(legal_leave_one_num):
+                next_index[-legal_leave_one_num + i].append(index[pr])
+                pr += 1
+        return next_index
+
+    def leave_one_out(self, group_by, leave_one_mode):
+        """Split interaction records by leave one out strategy.
+
+        Args:
+            group_by (str): Field name that interaction records should grouped by before splitting.
+            leave_one_mode (str): The way to leave one out. It can only take three values:
+                'valid_and_test', 'valid_only' and 'test_only'.
+
+        Returns:
+            list: List of :class:`~Dataset`, whose interaction features has been split.
+        """
+        self.logger.debug(
+            f"leave one out, group_by=[{group_by}], leave_one_mode=[{leave_one_mode}]"
+        )
+        if group_by is None:
+            raise ValueError("leave one out strategy require a group field")
+
+        grouped_inter_feat_index = self._grouped_index(
+            self.inter_feat[group_by].numpy()
+        )
+        if leave_one_mode == "valid_and_test":
+            next_index = self._split_index_by_leave_one_out(
+                grouped_inter_feat_index, leave_one_num=2
+            )
+        elif leave_one_mode == "valid_only":
+            next_index = self._split_index_by_leave_one_out(
+                grouped_inter_feat_index, leave_one_num=1
+            )
+            next_index.append([])
+        elif leave_one_mode == "test_only":
+            next_index = self._split_index_by_leave_one_out(
+                grouped_inter_feat_index, leave_one_num=1
+            )
+            next_index = [next_index[0], [], next_index[1]]
+        else:
+            raise NotImplementedError(
+                f"The leave_one_mode [{leave_one_mode}] has not been implemented."
+            )
+
+        self._drop_unused_col()
+        next_df = [self.inter_feat[index] for index in next_index]
+        next_ds = [self.copy(_) for _ in next_df]
+        return next_ds
+
     def split_train_valid(self):
         self._change_feat_format()
 
@@ -266,7 +332,7 @@ def split_train_valid(self):
             raise NotImplementedError(f'The ordering_method [{ordering_args}] has not been implemented.')
 
         # splitting & grouping
-        split_args = self.config['eval_args']['split_valid']
+        split_args = self.config['eval_args']['split']
         if split_args is None:
             raise ValueError('The split_args in eval_args should not be None.')
         if not isinstance(split_args, dict):
@@ -284,9 +350,13 @@ def split_train_valid(self):
                 datasets = self.split_by_ratio(split_args['RS'], group_by=self.uid_field)
             else:
                 raise NotImplementedError(f'The grouping method [{group_by}] has not been implemented.')
+        elif split_mode == "LS":
+            datasets = self.leave_one_out(
+                group_by=self.uid_field, leave_one_mode=split_args["LS"]
+            )
         else:
             raise NotImplementedError(f'The splitting_method [{split_mode}] has not been implemented.')
-
+           
         return datasets
 
 
@@ -563,7 +633,7 @@ def build(self):
             return [source_domain_train_dataset, None, target_domain_train_dataset,
                     target_domain_valid_dataset, target_domain_test_dataset]
         else:
-            source_domain_train_dataset, source_domain_valid_dataset = self.source_domain_dataset.split_train_valid()
+            source_domain_train_dataset, source_domain_valid_dataset, source_domain_test_dataset  = self.source_domain_dataset.split_train_valid()
             return [source_domain_train_dataset, source_domain_valid_dataset, target_domain_train_dataset,
                     target_domain_valid_dataset, target_domain_test_dataset]