Skip to content

Commit

Permalink
"update comment"
Browse files Browse the repository at this point in the history
  • Loading branch information
dzhwinter committed May 24, 2017
1 parent 561b6c8 commit a87a3c9
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 18 deletions.
34 changes: 28 additions & 6 deletions ltr/lambdaRank.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,15 @@


def lambdaRank(input_dim):
"""
lambdaRank is a ListWise Rank Model, input data and label must be sequence
https://papers.nips.cc/paper/2971-learning-to-rank-with-nonsmooth-cost-functions.pdf
parameters :
input_dim, one document's dense feature vector dimension
dense_vector_sequence format
[[f, ...], [f, ...], ...], f is represent for an float or int number
"""
label = paddle.layer.data("label",
paddle.data_type.dense_vector_sequence(1))
data = paddle.layer.data("data",
Expand All @@ -16,14 +25,24 @@ def lambdaRank(input_dim):
# hidden layer
hd1 = paddle.layer.fc(
input=data,
size=128,
act=paddle.activation.Tanh(),
param_attr=paddle.attr.Param(initial_std=0.01))

hd2 = paddle.layer.fc(
input=hd1,
size=10,
act=paddle.activation.Tanh(),
param_attr=paddle.attr.Param(initial_std=0.01))
output = paddle.layer.fc(
input=hd1,
input=hd2,
size=1,
act=paddle.activation.Linear(),
param_attr=paddle.attr.Param(initial_std=0.01))

# evaluator
evaluator = paddle.evaluator.auc(input=output, label=label)
# cost layer
cost = paddle.layer.lambda_cost(
input=output, score=label, NDCG_num=6, max_sort_size=-1)
return cost, output
Expand All @@ -39,7 +58,7 @@ def train_lambdaRank(num_passes):
paddle.reader.shuffle(fill_default_train, buf_size=100), batch_size=32)
test_reader = paddle.batch(fill_default_test, batch_size=32)

# mq2007 input_dim = 46, dense format
# mq2007 input_dim = 46, dense format
input_dim = 46
cost, output = lambdaRank(input_dim)
parameters = paddle.parameters.create(cost)
Expand Down Expand Up @@ -83,20 +102,23 @@ def lambdaRank_infer(pass_id):

infer_query_id = None
infer_data = []
infer_data_num = 1000
infer_data_num = 1
fill_default_test = functools.partial(
paddle.dataset.mq2007.test, format="listwise")
for label, querylist in fill_default_test():
infer_data.append(querylist)
if len(infer_data) == infer_data_num:
break

# predict score of infer_data document. Re-sort the document base on predict score
# in descending order. then we build the ranking documents
predicitons = paddle.infer(
output_layer=output, parameters=parameters, input=infer_data)
for i, score in enumerate(predicitons):
print score
print i, score


if __name__ == '__main__':
paddle.init(use_gpu=False, trainer_count=4)
train_lambdaRank(100)
lambdaRank_infer(pass_id=2)
train_lambdaRank(2)
lambdaRank_infer(pass_id=1)
26 changes: 14 additions & 12 deletions ltr/ranknet.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import os, sys
import os
import sys
import gzip
import functools
import paddle.v2 as paddle
Expand Down Expand Up @@ -37,7 +38,7 @@ def half_ranknet(name_prefix, input_dim):

def ranknet(input_dim):
# label layer
label = paddle.layer.data("label", paddle.data_type.integer_value(1))
label = paddle.layer.data("label", paddle.data_type.dense_vector(1))

# reuse the parameter in half_ranknet
output_left = half_ranknet("left", input_dim)
Expand All @@ -56,7 +57,7 @@ def train_ranknet(num_passes):
batch_size=100)
test_reader = paddle.batch(paddle.dataset.mq2007.test, batch_size=100)

# mq2007 feature_dim = 46, dense format
# mq2007 feature_dim = 46, dense format
# fc hidden_dim = 128
feature_dim = 46
cost = ranknet(feature_dim)
Expand Down Expand Up @@ -106,28 +107,29 @@ def ranknet_infer(pass_id):
gzip.open("ranknet_params_%d.tar.gz" % (pass_id - 1)))

# load data of same query and relevance documents, need ranknet to rank these candidates
infer_query_id = None
infer_query_id = []
infer_data = []
infer_score_list = []
infer_data_num = 1000
infer_doc_index = []

# convert to mq2007 built-in data format
# <query_id> <relevance_score> <feature_vector>
plain_txt_test = functools.partial(
paddle.dataset.mq2007.test, format="plain_txt")

for query_id, relevance_score, feature_vector in plain_txt_test():
if infer_query_id == None:
infer_query_id = query_id
elif infer_query_id != query_id:
break
infer_query_id.append(query_id)
infer_data.append(feature_vector)
predicitons = paddle.infer(

# predict score of infer_data document. Re-sort the document base on predict score
# in descending order. then we build the ranking documents
scores = paddle.infer(
output_layer=output, parameters=parameters, input=infer_data)
for query_id, score in zip(infer_query_id, scores):
print "query_id : ", query_id, " ranknet rank document order : ", score


if __name__ == '__main__':
paddle.init(use_gpu=False, trainer_count=4)
pass_num = 10
pass_num = 2
train_ranknet(pass_num)
ranknet_infer(pass_id=pass_num - 1)

0 comments on commit a87a3c9

Please sign in to comment.