RUCAIBox · chenyushuo · Sep 16, 2021 · Sep 16, 2021 · Sep 16, 2021 · Sep 16, 2021
diff --git a/docs/source/user_guide/config/environment_settings.rst b/docs/source/user_guide/config/environment_settings.rst
@@ -8,6 +8,8 @@ Environment settings are designed to set basic parameters of running environment
 - ``seed (int)`` : Random seed. Defaults to ``2020``.
 - ``state (str)`` : Logging level. Defaults to ``'INFO'``.
   Range in ``['INFO', 'DEBUG', 'WARNING', 'ERROR', 'CRITICAL']``.
+- ``encoding (str)``: Encoding to use for reading atomic files. Defaults to ``'utf-8'``.
+  The available encoding can be found in `here <https://docs.python.org/3/library/codecs.html#standard-encodings>`__.
 - ``reproducibility (bool)`` : If True, the tool will use deterministic
   convolution algorithms, which makes the result reproducible. If False,
   the tool will benchmark multiple convolution algorithms and select the fastest one,

diff --git a/recbole/data/dataset/dataset.py b/recbole/data/dataset/dataset.py
@@ -404,7 +404,8 @@ def _load_feat(self, filepath, source):
         columns = []
         usecols = []
         dtype = {}
-        with open(filepath, 'r') as f:
+        encoding = self.config['encoding']
+        with open(filepath, 'r', encoding=encoding) as f:
             head = f.readline()[:-1]
         for field_type in head.split(field_separator):
             field, ftype = field_type.split(':')
@@ -429,7 +430,9 @@ def _load_feat(self, filepath, source):
             self.logger.warning(f'No columns has been loaded from [{source}]')
             return None
 
-        df = pd.read_csv(filepath, delimiter=self.config['field_separator'], usecols=usecols, dtype=dtype)
+        df = pd.read_csv(
+            filepath, delimiter=self.config['field_separator'], usecols=usecols, dtype=dtype, encoding=encoding
+        )
         df.columns = columns
 
         seq_separator = self.config['seq_separator']
@@ -484,7 +487,7 @@ def _user_item_feat_preparation(self):
         if self.item_feat is not None:
             new_item_df = pd.DataFrame({self.iid_field: np.arange(self.item_num)})
             self.item_feat = pd.merge(new_item_df, self.item_feat, on=self.iid_field, how='left')
-            self.logger.debug(set_color('ordering item features by user id.', 'green'))
+            self.logger.debug(set_color('ordering item features by item id.', 'green'))
 
     def _preload_weight_matrix(self):
         """Transfer preload weight features into :class:`numpy.ndarray` with shape ``[id_token_length]``