alibaba · yangxudong · Jun 23, 2022 · Jun 14, 2022 · Jun 17, 2022 · Jun 20, 2022
diff --git a/.git_bin_path b/.git_bin_path
@@ -24,6 +24,7 @@
 {"leaf_name": "data/test/inference/tb_multitower_rtp_export/assets", "leaf_file": ["data/test/inference/tb_multitower_rtp_export/assets/pipeline.config"]}
 {"leaf_name": "data/test/inference/tb_multitower_rtp_export/variables", "leaf_file": ["data/test/inference/tb_multitower_rtp_export/variables/variables.data-00000-of-00001", "data/test/inference/tb_multitower_rtp_export/variables/variables.index"]}
 {"leaf_name": "data/test/latest_ckpt_test", "leaf_file": ["data/test/latest_ckpt_test/model.ckpt-500.data-00000-of-00001", "data/test/latest_ckpt_test/model.ckpt-500.index", "data/test/latest_ckpt_test/model.ckpt-500.meta"]}
+{"leaf_name": "data/test/movielens_1m", "leaf_file": ["data/test/movielens_1m/ml_test_data", "data/test/movielens_1m/ml_train_data"]}
 {"leaf_name": "data/test/rtp", "leaf_file": ["data/test/rtp/taobao_fg_pred.out", "data/test/rtp/taobao_test_bucketize_feature.txt", "data/test/rtp/taobao_test_feature.txt", "data/test/rtp/taobao_test_input.txt", "data/test/rtp/taobao_train_bucketize_feature.txt", "data/test/rtp/taobao_train_feature.txt", "data/test/rtp/taobao_train_input.txt", "data/test/rtp/taobao_valid.csv", "data/test/rtp/taobao_valid_feature.txt"]}
 {"leaf_name": "data/test/tb_data", "leaf_file": ["data/test/tb_data/taobao_ad_feature_gl", "data/test/tb_data/taobao_clk_edge_gl", "data/test/tb_data/taobao_multi_seq_test_data", "data/test/tb_data/taobao_multi_seq_train_data", "data/test/tb_data/taobao_noclk_edge_gl", "data/test/tb_data/taobao_test_data", "data/test/tb_data/taobao_test_data_compress.gz", "data/test/tb_data/taobao_test_data_for_expr", "data/test/tb_data/taobao_test_data_kd", "data/test/tb_data/taobao_train_data", "data/test/tb_data/taobao_train_data_for_expr", "data/test/tb_data/taobao_train_data_kd", "data/test/tb_data/taobao_user_profile_gl"]}
 {"leaf_name": "data/test/tb_data_with_time", "leaf_file": ["data/test/tb_data_with_time/taobao_test_data_with_time", "data/test/tb_data_with_time/taobao_train_data_with_time"]}
diff --git a/docs/source/models/cmbf.md b/docs/source/models/cmbf.md
@@ -0,0 +1,102 @@
+# CMBF
+
+### 简介
+
+Cross-Modal-Based Fusion Recommendation Algorithm（CMBF）是一个能够捕获多个模态之间的交叉信息的模型，它能够缓解数据稀疏的问题，对冷启动物品比较友好。
+
+注意：CMBF 模型要求所有文本侧（feature group=text）输入特征的 embedding_dim 保持一致。
+
+![CMFB_framework_v2](https://cdn.jsdelivr.net/gh/yangxudong/blogimg@master/rec/CMFB_framework_v2.jpg)
+
+CMBF主要有4个模块：
+1. 预处理模块：提取图片和文本特征
+2. 单模态学习模块：基于Transformer学习图像、文本的语义特征
+3. 跨模态融合模块：学习两个模态之间的交叉特性
+4. 输出模块：获取高阶特征并预测结果
+
+视觉特征提取模块通常是一个CNN-based的模型，用来提取图像或视频特征，以便后续接入transformer模块。
+
+文本特征为多个其他常用特征的拼接，包括数组特征、单值类别特征、多值类别特征，每个特征需要转换为相同维度的embedding，以便接入后续的transformer模块。
+
+单模块学习模块采用标准的transformer结构，如下：
+![CMBF_feature_learning](https://cdn.jsdelivr.net/gh/yangxudong/blogimg@master/rec/CMBF_feature_learning.jpg)
+
+跨模态融合模块使用了一个交叉attention的结构，如下：
+
+![cross-model-fusion-layer](https://cdn.jsdelivr.net/gh/yangxudong/blogimg@master/rec/cross-model-fusion-layer.jpg)
+
+### 配置说明
+
+```protobuf
+model_config: {
+  model_class: 'CMBF'
+  feature_groups: {
+    group_name: 'image'
+    feature_names: 'embedding'
+    wide_deep: DEEP
+  }
+  feature_groups: {
+    group_name: 'text'
+    feature_names: 'user_id'
+    feature_names: 'movie_id'
+    feature_names: 'gender'
+    feature_names: 'age'
+    feature_names: 'occupation'
+    feature_names: 'zip_id'
+    feature_names: 'genres'
+    feature_names: 'movie_year_bin'
+    wide_deep: DEEP
+  }
+  feature_groups: {
+    group_name: 'text_seq'
+    feature_names: 'title'
+    wide_deep: DEEP
+  }
+  cmbf {
+    image_feature_dim: 64
+    multi_head_num: 2
+    image_head_size: 8
+    text_head_size: 8
+    image_self_attention_layer_num: 2
+    text_self_attention_layer_num: 2
+    cross_modal_layer_num: 3
+    image_cross_head_size: 16
+    text_cross_head_size: 16
+    final_dnn: {
+        hidden_units: 64
+    }
+  }
+  embedding_regularization: 0
+}
+```
+
+- model_class: 'CMBF', 不需要修改
+
+- feature_groups: 
+  - 配置一个名为`image`的feature_group，包含一个图像特征，或者一组embedding_size相同的图像特征（对应视频的多个帧，或者图像的多个region）。
+  - 配置一个名为`text`的feature_group，包含需要做跨模态attention的所有特征，这些特征的`embedding_dim`必须相同。
+  - [可选] 配置一个名为`other`的feature_group，包含不需要做跨模态attention的其他特征，如各类统计特征。
+
+- cmbf: CMBF 模型相关的参数
+
+  - image_feature_dim: 在单模态学习模块之前做图像特征维度调整，调整到该参数指定的维度
+  - multi_head_num: 单模态学习模块和跨模态融合模块中的 head 数量，默认为1
+  - image_head_size: 单模态学习模块中的图像tower，multi-headed self-attention的每个head的size
+  - text_head_size: 单模态学习模块中的文本tower，multi-headed self-attention的每个head的size
+  - image_region_num: [可选，默认值为1] 表示图像的区域个数，或者CNN的filter个数。当只有一个image feature时生效，表示该图像特征是一个复合embedding，是shape为[image_region_num, embedding_size]的特征平铺之后的结果。
+  - image_self_attention_layer_num: 单模态学习模块中的图像tower，multi-headed self-attention的层数
+  - text_self_attention_layer_num: 单模态学习模块中的文本tower，multi-headed self-attention的层数
+  - cross_modal_layer_num: 跨模态融合模块的层数，建议设在1到5之间，默认为1
+  - image_cross_head_size: 跨模模态学习模块中的图像tower，multi-headed attention的每个head的size
+  - text_cross_head_size: 跨模模态学习模块中的文本tower，multi-headed attention的每个head的size
+  - final_dnn: 输出模块的MLP网络配置
+
+- embedding_regularization: 对embedding部分加regularization，防止overfit
+
+### 示例Config
+
+[CMBF_demo.config](https://easyrec.oss-cn-beijing.aliyuncs.com/config/cmbf.config)
+
+### 参考论文
+
+[CMBF: Cross-Modal-Based Fusion Recommendation Algorithm](https://www.mdpi.com/1424-8220/21/16/5275)
diff --git a/docs/source/models/dropoutnet.md b/docs/source/models/dropoutnet.md
@@ -123,3 +123,4 @@ model_config {
 ### 参考论文
 
 [DropoutNet.pdf](https://papers.nips.cc/paper/2017/file/dbd22ba3bd0df8f385bdac3e9f8be207-Paper.pdf)
+[冷启动推荐模型DropoutNet深度解析与改进](https://zhuanlan.zhihu.com/p/474671484)
diff --git a/easy_rec/python/builders/loss_builder.py b/easy_rec/python/builders/loss_builder.py
@@ -34,7 +34,7 @@ def build(loss_type, label, pred, loss_weight=1.0, num_class=1, **kwargs):
   elif loss_type == LossType.F1_REWEIGHTED_LOSS:
     beta_square = kwargs['beta_square'] if 'beta_square' in kwargs else 1.0
     return f1_reweight_sigmoid_cross_entropy(
-        pred, label, beta_square, weights=loss_weight)
+        label, pred, beta_square, weights=loss_weight)
   else:
     raise ValueError('unsupported loss type: %s' % LossType.Name(loss_type))
 

diff --git a/easy_rec/python/compat/feature_column/feature_column_v2.py b/easy_rec/python/compat/feature_column/feature_column_v2.py
@@ -817,7 +817,8 @@ def embedding_column(categorical_column,
                      max_norm=None,
                      trainable=True,
                      partitioner=None,
-                     use_embedding_variable=False):
+                     use_embedding_variable=False,
+                     max_seq_len=-1):
   """`DenseColumn` that converts from sparse, categorical input.
 
   Use this when your inputs are sparse, but you want to convert them to a dense
@@ -878,6 +879,7 @@ def model_fn(features, ...):
       `None`.
     max_norm: If not `None`, embedding values are l2-normalized to this value.
     trainable: Whether or not the embedding is trainable. Default is True.
+    max_seq_len: Max sequence length. Default is -1.
 
   Returns:
     `DenseColumn` that converts from sparse input.
@@ -913,7 +915,8 @@ def model_fn(features, ...):
       max_norm=max_norm,
       trainable=trainable,
       partitioner=partitioner,
-      use_embedding_variable=use_embedding_variable)
+      use_embedding_variable=use_embedding_variable,
+      max_seq_len=max_seq_len)
 
 
 def shared_embedding_columns(categorical_columns,
@@ -3433,7 +3436,7 @@ class EmbeddingColumn(
         'EmbeddingColumn',
         ('categorical_column', 'dimension', 'combiner', 'initializer',
          'ckpt_to_load_from', 'tensor_name_in_ckpt', 'max_norm', 'trainable',
-         'partitioner', 'use_embedding_variable'))):
+         'partitioner', 'use_embedding_variable', 'max_seq_len'))):
   """See `embedding_column`."""
 
   @property

diff --git a/easy_rec/python/feature_column/feature_column.py b/easy_rec/python/feature_column/feature_column.py
@@ -543,7 +543,8 @@ def _add_deep_embedding_column(self, fc, config):
           combiner=config.combiner,
           initializer=initializer,
           partitioner=self._build_partitioner(config),
-          use_embedding_variable=self._use_embedding_variable or config.use_embedding_variable)
+          use_embedding_variable=self._use_embedding_variable or config.use_embedding_variable,
+          max_seq_len=config.max_seq_len if config.HasField('max_seq_len') else -1)
     if config.feature_type != config.SequenceFeature:
       self._deep_columns[feature_name] = fc
     else:

diff --git a/easy_rec/python/input/input.py b/easy_rec/python/input/input.py
@@ -447,7 +447,7 @@ def _preprocess(self, field_dict):
           # raw values to a vector, it maybe better implemented
           # by a ProjectionColumn later
           logging.info(
-              'Not set boundaries or num_buckets or hash_bucket_size, %s will process as two dimentsion raw feature'
+              'Not set boundaries or num_buckets or hash_bucket_size, %s will process as two dimension raw feature'
               % input_0)
           parsed_dict[input_0] = tf.sparse_to_dense(
               parsed_dict[input_0].indices,
@@ -481,7 +481,7 @@ def _preprocess(self, field_dict):
           # raw values to a vector, it maybe better implemented
           # by a ProjectionColumn later
           logging.info(
-              'Not set boundaries or num_buckets or hash_bucket_size, %s will process as three dimentsion raw feature'
+              'Not set boundaries or num_buckets or hash_bucket_size, %s will process as three dimension raw feature'
               % input_0)
           parsed_dict[input_0] = tf.sparse_to_dense(
               parsed_dict[input_0].indices, [

diff --git a/easy_rec/python/layers/input_layer.py b/easy_rec/python/layers/input_layer.py
@@ -14,6 +14,7 @@
 from easy_rec.python.layers import variational_dropout_layer
 from easy_rec.python.layers.common_layers import text_cnn
 from easy_rec.python.protos.feature_config_pb2 import WideOrDeep
+from easy_rec.python.utils import shape_utils
 
 from easy_rec.python.compat.feature_column.feature_column import _SharedEmbeddingColumn  # NOQA
 from easy_rec.python.compat.feature_column.feature_column_v2 import EmbeddingColumn  # NOQA
@@ -278,6 +279,8 @@ def single_call_input_layer(self,
       for fc in group_seq_columns:
         with tf.variable_scope('input_layer/' + fc.categorical_column.name):
           tmp_embedding, tmp_seq_len = fc._get_sequence_dense_tensor(builder)
+          if fc.max_seq_len > 0:
+            tmp_embedding, tmp_seq_len = shape_utils.truncate_sequence(tmp_embedding, tmp_seq_len, fc.max_seq_len)
           seq_features.append((tmp_embedding, tmp_seq_len))
           embedding_reg_lst.append(tmp_embedding)
       regularizers.apply_regularization(
Original file line number	Diff line number	Diff line change
Expand Up		@@ -123,3 +123,4 @@ model_config {
		### 参考论文

		[DropoutNet.pdf](https://papers.nips.cc/paper/2017/file/dbd22ba3bd0df8f385bdac3e9f8be207-Paper.pdf)
		[冷启动推荐模型DropoutNet深度解析与改进](https://zhuanlan.zhihu.com/p/474671484)