Skip to content

Commit

Permalink
v0.2.6
Browse files Browse the repository at this point in the history
- Add IFM and DIFM model
- Support multi gpus running
  • Loading branch information
shenweichen authored Apr 4, 2021
2 parents d18ea26 + ea6bc38 commit 8265c75
Show file tree
Hide file tree
Showing 40 changed files with 451 additions and 115 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ jobs:
strategy:
matrix:
python-version: [3.6,3.7]
torch-version: [1.1.0,1.2.0,1.3.0,1.4.0,1.5.0,1.6.0,1.7.0]
torch-version: [1.1.0,1.2.0,1.3.0,1.4.0,1.5.0,1.6.0,1.7.0,1.8.1]

# exclude:
# - python-version: 3.5
Expand Down
12 changes: 7 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,9 @@ Let's [**Get Started!**](https://deepctr-torch.readthedocs.io/en/latest/Quick-St
| AutoInt | [CIKM 2019][AutoInt: Automatic Feature Interaction Learning via Self-Attentive Neural Networks](https://arxiv.org/abs/1810.11921) |
| ONN | [arxiv 2019][Operation-aware Neural Networks for User Response Prediction](https://arxiv.org/pdf/1904.12579.pdf) |
| FiBiNET | [RecSys 2019][FiBiNET: Combining Feature Importance and Bilinear feature Interaction for Click-Through Rate Prediction](https://arxiv.org/pdf/1905.09433.pdf) |
| IFM | [IJCAI 2019][An Input-aware Factorization Machine for Sparse Prediction](https://www.ijcai.org/Proceedings/2019/0203.pdf) |
| DCN V2 | [arxiv 2020][DCN V2: Improved Deep & Cross Network and Practical Lessons for Web-scale Learning to Rank Systems](https://arxiv.org/abs/2008.13535) |
| DIFM | [IJCAI 2020][A Dual Input-aware Factorization Machine for CTR Prediction](https://www.ijcai.org/Proceedings/2020/0434.pdf) |


## DisscussionGroup & Related Projects
Expand Down Expand Up @@ -82,6 +84,11 @@ Let's [**Get Started!**](https://deepctr-torch.readthedocs.io/en/latest/Quick-St
​ <a href="https://github.com/shenweichen">Shen Weichen</a> ​
<p>Core Dev<br> Zhejiang Unversity <br> <br> </p>​
</td>
<td>
​ <a href="https://github.com/zanshuxun"><img width="70" height="70" src="https://github.com/zanshuxun.png?s=40" alt="pic"></a><br>
​ <a href="https://github.com/zanshuxun">Zan Shuxun</a>
<p>Core Dev<br> Beijing University <br> of Posts and <br> Telecommunications</p>​
</td>
<td>
<a href="https://github.com/weberrr"><img width="70" height="70" src="https://github.com/weberrr.png?s=40" alt="pic"></a><br>
<a href="https://github.com/weberrr">Wang Ze</a> ​
Expand All @@ -92,11 +99,6 @@ Let's [**Get Started!**](https://deepctr-torch.readthedocs.io/en/latest/Quick-St
<a href="https://github.com/wutongzhang">Zhang Wutong</a>
<p>Core Dev<br> Beijing University <br> of Posts and <br> Telecommunications</p>​
</td>
<td>
​ <a href="https://github.com/zanshuxun"><img width="70" height="70" src="https://github.com/zanshuxun.png?s=40" alt="pic"></a><br>
​ <a href="https://github.com/zanshuxun">Zan Shuxun</a>
<p>Core Dev<br> Beijing University <br> of Posts and <br> Telecommunications</p>​
</td>
<td>
​ <a href="https://github.com/ZhangYuef"><img width="70" height="70" src="https://github.com/ZhangYuef.png?s=40" alt="pic"></a><br>
​ <a href="https://github.com/ZhangYuef">Zhang Yuefeng</a>
Expand Down
2 changes: 1 addition & 1 deletion deepctr_torch/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@
from . import models
from .utils import check_version

__version__ = '0.2.5'
__version__ = '0.2.6'
check_version(__version__)
2 changes: 1 addition & 1 deletion deepctr_torch/layers/activation.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ class Dice(nn.Module):
Output shape:
- Same shape as input.
References
- [Zhou G, Zhu X, Song C, et al. Deep interest network for click-through rate prediction[C]//Proceedings of the 24th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining. ACM, 2018: 1059-1068.](https://arxiv.org/pdf/1706.06978.pdf)
- https://github.com/zhougr1993/DeepInterestNetwork, https://github.com/fanoping/DIN-pytorch
Expand Down
49 changes: 30 additions & 19 deletions deepctr_torch/layers/interaction.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,10 +106,11 @@ class BilinearInteraction(nn.Module):
Input shape
- A list of 3D tensor with shape: ``(batch_size,filed_size, embedding_size)``.
Output shape
- 3D tensor with shape: ``(batch_size,filed_size, embedding_size)``.
- 3D tensor with shape: ``(batch_size,filed_size*(filed_size-1)/2, embedding_size)``.
Arguments
- **filed_size** : Positive integer, number of feature groups.
- **str** : String, types of bilinear functions used in this layer.
- **embedding_size** : Positive integer, embedding size of sparse features.
- **bilinear_type** : String, types of bilinear functions used in this layer.
- **seed** : A Python integer to use as random seed.
References
- [FiBiNET: Combining Feature Importance and Bilinear feature Interaction for Click-Through Rate Prediction
Expand All @@ -125,7 +126,7 @@ def __init__(self, filed_size, embedding_size, bilinear_type="interaction", seed
self.bilinear = nn.Linear(
embedding_size, embedding_size, bias=False)
elif self.bilinear_type == "each":
for i in range(filed_size):
for _ in range(filed_size):
self.bilinear.append(
nn.Linear(embedding_size, embedding_size, bias=False))
elif self.bilinear_type == "interaction":
Expand Down Expand Up @@ -340,13 +341,14 @@ class InteractingLayer(nn.Module):
- [Song W, Shi C, Xiao Z, et al. AutoInt: Automatic Feature Interaction Learning via Self-Attentive Neural Networks[J]. arXiv preprint arXiv:1810.11921, 2018.](https://arxiv.org/abs/1810.11921)
"""

def __init__(self, in_features, att_embedding_size=8, head_num=2, use_res=True, seed=1024, device='cpu'):
def __init__(self, in_features, att_embedding_size=8, head_num=2, use_res=True, scaling=False, seed=1024, device='cpu'):
super(InteractingLayer, self).__init__()
if head_num <= 0:
raise ValueError('head_num must be a int > 0')
self.att_embedding_size = att_embedding_size
self.head_num = head_num
self.use_res = use_res
self.scaling = scaling
self.seed = seed

embedding_size = in_features
Expand Down Expand Up @@ -388,7 +390,8 @@ def forward(self, inputs):
values, self.att_embedding_size, dim=2))
inner_product = torch.einsum(
'bnik,bnjk->bnij', querys, keys) # head_num None F F

if self.scaling:
inner_product /= self.att_embedding_size ** 0.5
self.normalized_att_scores = F.softmax(
inner_product, dim=-1) # head_num None F F
result = torch.matmul(self.normalized_att_scores,
Expand Down Expand Up @@ -428,17 +431,20 @@ def __init__(self, in_features, layer_num=2, parameterization='vector', seed=102
self.parameterization = parameterization
if self.parameterization == 'vector':
# weight in DCN. (in_features, 1)
self.kernels = torch.nn.ParameterList(
[nn.Parameter(nn.init.xavier_normal_(torch.empty(in_features, 1))) for i in range(self.layer_num)])
self.kernels = nn.Parameter(torch.Tensor(self.layer_num, in_features, 1))
elif self.parameterization == 'matrix':
# weight matrix in DCN-M. (in_features, in_features)
self.kernels = torch.nn.ParameterList([nn.Parameter(nn.init.xavier_normal_(
torch.empty(in_features, in_features))) for i in range(self.layer_num)])
self.kernels = nn.Parameter(torch.Tensor(self.layer_num, in_features, in_features))
else: # error
raise ValueError("parameterization should be 'vector' or 'matrix'")

self.bias = torch.nn.ParameterList(
[nn.Parameter(nn.init.zeros_(torch.empty(in_features, 1))) for i in range(self.layer_num)])
self.bias = nn.Parameter(torch.Tensor(self.layer_num, in_features, 1))

for i in range(self.kernels.shape[0]):
nn.init.xavier_normal_(self.kernels[i])
for i in range(self.bias.shape[0]):
nn.init.zeros_(self.bias[i])

self.to(device)

def forward(self, inputs):
Expand Down Expand Up @@ -483,18 +489,23 @@ def __init__(self, in_features, low_rank=32, num_experts=4, layer_num=2, device=
self.num_experts = num_experts

# U: (in_features, low_rank)
self.U_list = torch.nn.ParameterList([nn.Parameter(nn.init.xavier_normal_(
torch.empty(num_experts, in_features, low_rank))) for i in range(self.layer_num)])
self.U_list = nn.Parameter(torch.Tensor(self.layer_num, num_experts, in_features, low_rank))
# V: (in_features, low_rank)
self.V_list = torch.nn.ParameterList([nn.Parameter(nn.init.xavier_normal_(
torch.empty(num_experts, in_features, low_rank))) for i in range(self.layer_num)])
self.V_list = nn.Parameter(torch.Tensor(self.layer_num, num_experts, in_features, low_rank))
# C: (low_rank, low_rank)
self.C_list = torch.nn.ParameterList([nn.Parameter(nn.init.xavier_normal_(
torch.empty(num_experts, low_rank, low_rank))) for i in range(self.layer_num)])
self.C_list = nn.Parameter(torch.Tensor(self.layer_num, num_experts, low_rank, low_rank))
self.gating = nn.ModuleList([nn.Linear(in_features, 1, bias=False) for i in range(self.num_experts)])

self.bias = torch.nn.ParameterList([nn.Parameter(nn.init.zeros_(
torch.empty(in_features, 1))) for i in range(self.layer_num)])
self.bias = nn.Parameter(torch.Tensor(self.layer_num, in_features, 1))

init_para_list = [self.U_list, self.V_list, self.C_list]
for i in range(len(init_para_list)):
for j in range(self.layer_num):
nn.init.xavier_normal_(init_para_list[i][j])

for i in range(len(self.bias)):
nn.init.zeros_(self.bias[i])

self.to(device)

def forward(self, inputs):
Expand Down
3 changes: 2 additions & 1 deletion deepctr_torch/layers/sequence.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def _sequence_mask(self, lengths, maxlen=None, dtype=torch.bool):
# Returns a mask tensor representing the first N positions of each cell.
if maxlen is None:
maxlen = lengths.max()
row_vector = torch.arange(0, maxlen, 1).to(self.device)
row_vector = torch.arange(0, maxlen, 1).to(lengths.device)
matrix = torch.unsqueeze(lengths, dim=-1)
mask = row_vector < matrix

Expand Down Expand Up @@ -70,6 +70,7 @@ def forward(self, seq_value_len_list):
hist = torch.sum(hist, dim=1, keepdim=False)

if self.mode == 'mean':
self.eps = self.eps.to(user_behavior_length.device)
hist = torch.div(hist, user_behavior_length.type(torch.float32) + self.eps)

hist = torch.unsqueeze(hist, dim=1)
Expand Down
2 changes: 2 additions & 0 deletions deepctr_torch/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
from .deepfm import DeepFM
from .xdeepfm import xDeepFM
from .afm import AFM
from .difm import DIFM
from .ifm import IFM
from .autoint import AutoInt
from .dcn import DCN
from .dcnmix import DCNMix
Expand Down
5 changes: 3 additions & 2 deletions deepctr_torch/models/afm.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,16 +27,17 @@ class AFM(BaseModel):
:param seed: integer ,to use as random seed.
:param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss
:param device: str, ``"cpu"`` or ``"cuda:0"``
:param gpus: list of int or torch.device for multiple gpus. If None, run on `device`. `gpus[0]` should be the same gpu with `device`.
:return: A PyTorch model instance.
"""

def __init__(self, linear_feature_columns, dnn_feature_columns, use_attention=True, attention_factor=8,
l2_reg_linear=1e-5, l2_reg_embedding=1e-5, l2_reg_att=1e-5, afm_dropout=0, init_std=0.0001, seed=1024,
task='binary', device='cpu'):
task='binary', device='cpu', gpus=None):
super(AFM, self).__init__(linear_feature_columns, dnn_feature_columns, l2_reg_linear=l2_reg_linear,
l2_reg_embedding=l2_reg_embedding, init_std=init_std, seed=seed, task=task,
device=device)
device=device, gpus=gpus)

self.use_attention = use_attention

Expand Down
7 changes: 4 additions & 3 deletions deepctr_torch/models/autoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,19 +32,20 @@ class AutoInt(BaseModel):
:param seed: integer ,to use as random seed.
:param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss
:param device: str, ``"cpu"`` or ``"cuda:0"``
:param gpus: list of int or torch.device for multiple gpus. If None, run on `device`. `gpus[0]` should be the same gpu with `device`.
:return: A PyTorch model instance.
"""

def __init__(self, linear_feature_columns, dnn_feature_columns, att_layer_num=3, att_embedding_size=8, att_head_num=2,
att_res=True,
dnn_hidden_units=(256, 128), dnn_activation='relu',
l2_reg_dnn=0, l2_reg_embedding=1e-5, dnn_use_bn=False, dnn_dropout=0, init_std=0.0001, seed=1024,
task='binary', device='cpu'):
task='binary', device='cpu', gpus=None):

super(AutoInt, self).__init__(linear_feature_columns, dnn_feature_columns, l2_reg_linear=0,
l2_reg_embedding=l2_reg_embedding, init_std=init_std, seed=seed, task=task,
device=device)
device=device, gpus=gpus)

if len(dnn_hidden_units) <= 0 and att_layer_num <= 0:
raise ValueError("Either hidden_layer or att_layer_num must > 0")
Expand Down
52 changes: 31 additions & 21 deletions deepctr_torch/models/basemodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def __init__(self, feature_columns, feature_index, init_std=0.0001, device='cpu'
device))
torch.nn.init.normal_(self.weight, mean=0, std=init_std)

def forward(self, X):
def forward(self, X, sparse_feat_refine_weight=None):

sparse_embedding_list = [self.embedding_dict[feat.embedding_name](
X[:, self.feature_index[feat.name][0]:self.feature_index[feat.name][1]].long()) for
Expand All @@ -73,34 +73,37 @@ def forward(self, X):

sparse_embedding_list += varlen_embedding_list

if len(sparse_embedding_list) > 0 and len(dense_value_list) > 0:
linear_sparse_logit = torch.sum(
torch.cat(sparse_embedding_list, dim=-1), dim=-1, keepdim=False)
linear_dense_logit = torch.cat(
linear_logit = torch.zeros([X.shape[0], 1]).to(sparse_embedding_list[0].device)
if len(sparse_embedding_list) > 0:
sparse_embedding_cat = torch.cat(sparse_embedding_list, dim=-1)
if sparse_feat_refine_weight is not None:
# w_{x,i}=m_{x,i} * w_i (in IFM and DIFM)
sparse_embedding_cat = sparse_embedding_cat * sparse_feat_refine_weight.unsqueeze(1)
sparse_feat_logit = torch.sum(sparse_embedding_cat, dim=-1, keepdim=False)
linear_logit += sparse_feat_logit
if len(dense_value_list) > 0:
dense_value_logit = torch.cat(
dense_value_list, dim=-1).matmul(self.weight)
linear_logit = linear_sparse_logit + linear_dense_logit
elif len(sparse_embedding_list) > 0:
linear_logit = torch.sum(
torch.cat(sparse_embedding_list, dim=-1), dim=-1, keepdim=False)
elif len(dense_value_list) > 0:
linear_logit = torch.cat(
dense_value_list, dim=-1).matmul(self.weight)
else:
linear_logit = torch.zeros([X.shape[0], 1])
linear_logit += dense_value_logit

return linear_logit


class BaseModel(nn.Module):
def __init__(self, linear_feature_columns, dnn_feature_columns, l2_reg_linear=1e-5, l2_reg_embedding=1e-5,
init_std=0.0001, seed=1024, task='binary', device='cpu'):
init_std=0.0001, seed=1024, task='binary', device='cpu', gpus=None):

super(BaseModel, self).__init__()
torch.manual_seed(seed)
self.dnn_feature_columns = dnn_feature_columns

self.reg_loss = torch.zeros((1,), device=device)
self.aux_loss = torch.zeros((1,), device=device)
self.device = device # device
self.device = device
self.gpus = gpus
if gpus and str(self.gpus[0]) not in self.device:
raise ValueError(
"`gpus[0]` should be the same gpu with `device`")

self.feature_index = build_input_features(
linear_feature_columns + dnn_feature_columns)
Expand Down Expand Up @@ -192,14 +195,21 @@ def fit(self, x=None, y=None, batch_size=None, epochs=1, verbose=1, initial_epoc
torch.from_numpy(y))
if batch_size is None:
batch_size = 256
train_loader = DataLoader(
dataset=train_tensor_data, shuffle=shuffle, batch_size=batch_size)

print(self.device, end="\n")
model = self.train()
loss_func = self.loss_func
optim = self.optim

if self.gpus:
print('parallel running on these gpus:', self.gpus)
model = torch.nn.DataParallel(model, device_ids=self.gpus)
batch_size *= len(self.gpus) # input `batch_size` is batch_size per gpu
else:
print(self.device)

train_loader = DataLoader(
dataset=train_tensor_data, shuffle=shuffle, batch_size=batch_size)

sample_num = len(train_tensor_data)
steps_per_epoch = (sample_num - 1) // batch_size + 1

Expand All @@ -224,7 +234,7 @@ def fit(self, x=None, y=None, batch_size=None, epochs=1, verbose=1, initial_epoc
train_result = {}
try:
with tqdm(enumerate(train_loader), disable=verbose != 1) as t:
for index, (x_train, y_train) in t:
for _, (x_train, y_train) in t:
x = x_train.to(self.device).float()
y = y_train.to(self.device).float()

Expand Down Expand Up @@ -323,7 +333,7 @@ def predict(self, x, batch_size=256):

pred_ans = []
with torch.no_grad():
for index, x_test in enumerate(test_loader):
for _, x_test in enumerate(test_loader):
x = x_test[0].to(self.device).float()

y_pred = model(x).cpu().data.numpy() # .squeeze()
Expand Down
5 changes: 3 additions & 2 deletions deepctr_torch/models/ccpm.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,18 +34,19 @@ class CCPM(BaseModel):
:param seed: integer ,to use as random seed.
:param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss
:param device: str, ``"cpu"`` or ``"cuda:0"``
:param gpus: list of int or torch.device for multiple gpus. If None, run on `device`. `gpus[0]` should be the same gpu with `device`.
:return: A PyTorch model instance.
"""

def __init__(self, linear_feature_columns, dnn_feature_columns, conv_kernel_width=(6, 5),
conv_filters=(4, 4),
dnn_hidden_units=(256,), l2_reg_linear=1e-5, l2_reg_embedding=1e-5, l2_reg_dnn=0, dnn_dropout=0,
init_std=0.0001, seed=1024, task='binary', device='cpu', dnn_use_bn=False, dnn_activation='relu'):
init_std=0.0001, seed=1024, task='binary', device='cpu', dnn_use_bn=False, dnn_activation='relu', gpus=None):

super(CCPM, self).__init__(linear_feature_columns, dnn_feature_columns, l2_reg_linear=l2_reg_linear,
l2_reg_embedding=l2_reg_embedding, init_std=init_std, seed=seed, task=task,
device=device)
device=device, gpus=gpus)

if len(conv_kernel_width) != len(conv_filters):
raise ValueError(
Expand Down
Loading

0 comments on commit 8265c75

Please sign in to comment.