Skip to content

Commit

Permalink
fix LTR example.
Browse files Browse the repository at this point in the history
  • Loading branch information
lcy-seso committed Nov 23, 2017
1 parent ede5a04 commit f6c3431
Show file tree
Hide file tree
Showing 5 changed files with 178 additions and 119 deletions.
114 changes: 76 additions & 38 deletions ltr/lambda_rank.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,14 @@
import gzip
import functools
import argparse
import logging
import numpy as np

import paddle.v2 as paddle

logger = logging.getLogger("paddle")
logger.setLevel(logging.INFO)


def lambda_rank(input_dim, is_infer):
"""
Expand All @@ -26,43 +30,39 @@ def lambda_rank(input_dim, is_infer):
data = paddle.layer.data("data",
paddle.data_type.dense_vector_sequence(input_dim))

# Define hidden layer.
hd1 = paddle.layer.fc(
input=data,
size=128,
act=paddle.activation.Tanh(),
param_attr=paddle.attr.Param(initial_std=0.01))

hd2 = paddle.layer.fc(
input=hd1,
size=10,
act=paddle.activation.Tanh(),
param_attr=paddle.attr.Param(initial_std=0.01))
output = paddle.layer.fc(
input=hd2,
size=1,
act=paddle.activation.Linear(),
param_attr=paddle.attr.Param(initial_std=0.01))
# Define the hidden layer.
hd1 = paddle.layer.fc(input=data,
size=128,
act=paddle.activation.Tanh(),
param_attr=paddle.attr.Param(initial_std=0.01))

hd2 = paddle.layer.fc(input=hd1,
size=10,
act=paddle.activation.Tanh(),
param_attr=paddle.attr.Param(initial_std=0.01))
output = paddle.layer.fc(input=hd2,
size=1,
act=paddle.activation.Linear(),
param_attr=paddle.attr.Param(initial_std=0.01))

if not is_infer:
# Define evaluator.
evaluator = paddle.evaluator.auc(input=output, label=label)
# Define cost layer.
# Define the cost layer.
cost = paddle.layer.lambda_cost(
input=output, score=label, NDCG_num=6, max_sort_size=-1)
return cost, output
return output


def train_lambda_rank(num_passes):
# The input for LambdaRank is a sequence.
def lambda_rank_train(num_passes, model_save_dir):
# The input for LambdaRank must be a sequence.
fill_default_train = functools.partial(
paddle.dataset.mq2007.train, format="listwise")
fill_default_test = functools.partial(
paddle.dataset.mq2007.test, format="listwise")

train_reader = paddle.batch(
paddle.reader.shuffle(fill_default_train, buf_size=100), batch_size=32)
paddle.reader.shuffle(
fill_default_train, buf_size=100), batch_size=32)
test_reader = paddle.batch(fill_default_test, batch_size=32)

# Training dataset: mq2007, input_dim = 46, dense format.
Expand All @@ -78,13 +78,15 @@ def train_lambda_rank(num_passes):
# Define end batch and end pass event handler.
def event_handler(event):
if isinstance(event, paddle.event.EndIteration):
print "Pass %d Batch %d Cost %.9f" % (event.pass_id, event.batch_id,
event.cost)
logger.info("Pass %d Batch %d Cost %.9f" %
(event.pass_id, event.batch_id, event.cost))
if isinstance(event, paddle.event.EndPass):
result = trainer.test(reader=test_reader, feeding=feeding)
print "\nTest with Pass %d, %s" % (event.pass_id, result.metrics)
with gzip.open("lambda_rank_params_%d.tar.gz" % (event.pass_id),
"w") as f:
logger.info("\nTest with Pass %d, %s" %
(event.pass_id, result.metrics))
with gzip.open(
os.path.join(model_save_dir, "lambda_rank_params_%d.tar.gz"
% (event.pass_id)), "w") as f:
trainer.save_parameter_to_tar(f)

feeding = {"label": 0, "data": 1}
Expand All @@ -95,17 +97,17 @@ def event_handler(event):
num_passes=num_passes)


def lambda_rank_infer(pass_id):
def lambda_rank_infer(test_model_path):
"""LambdaRank model inference interface.
Parameters:
pass_id : inference model in pass_id
test_model_path : The path of the trained model.
"""
print "Begin to Infer..."
logger.info("Begin to Infer...")
input_dim = 46
output = lambda_rank(input_dim, is_infer=True)
parameters = paddle.parameters.Parameters.from_tar(
gzip.open("lambda_rank_params_%d.tar.gz" % (pass_id - 1)))
gzip.open(test_model_path))

infer_query_id = None
infer_data = []
Expand All @@ -128,15 +130,51 @@ def lambda_rank_infer(pass_id):


if __name__ == '__main__':
parser = argparse.ArgumentParser(description='LambdaRank demo')
parser.add_argument("--run_type", type=str, help="run type is train|infer")
parser = argparse.ArgumentParser(
description="PaddlePaddle LambdaRank example.")
parser.add_argument(
"--run_type",
type=str,
help=("A flag indicating to run the training or the inferring task. "
"Available options are: train or infer."),
default="train")
parser.add_argument(
"--num_passes",
type=int,
help="The Num of passes in train| infer pass number of model.")
help="The number of passes to train the model.",
default=10)
parser.add_argument(
"--use_gpu",
type=bool,
help="A flag indicating whether to use the GPU device in training.",
default=False)
parser.add_argument(
"--trainer_count",
type=int,
help="The thread number used in training.",
default=1)
parser.add_argument(
"--model_save_dir",
type=str,
required=False,
help=("The path to save the trained models."),
default="models")
parser.add_argument(
"--test_model_path",
type=str,
required=False,
help=("This parameter works only in inferring task to "
"specify path of a trained model."),
default="")

args = parser.parse_args()
paddle.init(use_gpu=False, trainer_count=1)
paddle.init(use_gpu=args.use_gpu, trainer_count=args.trainer_count)
if args.run_type == "train":
train_lambda_rank(args.num_passes)
lambda_rank_train(args.num_passes, args.model_save_dir)
elif args.run_type == "infer":
lambda_rank_infer(pass_id=args.num_passes - 1)
assert os.path.exists(args.test_model_path), (
"The trained model does not exit. Please set a correct path.")
lambda_rank_infer(args.test_model_path)
else:
logger.fatal(("A wrong value for parameter run type. "
"Available options are: train or infer."))
119 changes: 81 additions & 38 deletions ltr/ranknet.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,15 @@
import sys
import gzip
import functools
import paddle.v2 as paddle
import argparse
import logging
import numpy as np

import paddle.v2 as paddle
from metrics import ndcg
import argparse

logger = logging.getLogger("paddle")
logger.setLevel(logging.INFO)

# ranknet is the classic pairwise learning to rank algorithm
# http://icml.cc/2015/wp-content/uploads/2015/06/icml_ranking.pdf
Expand All @@ -23,17 +28,18 @@ def half_ranknet(name_prefix, input_dim):
paddle.data_type.dense_vector(input_dim))

# hidden layer
hd1 = paddle.layer.fc(
input=data,
size=10,
act=paddle.activation.Tanh(),
param_attr=paddle.attr.Param(initial_std=0.01, name="hidden_w1"))
# fully connect layer/ output layer
output = paddle.layer.fc(
input=hd1,
size=1,
act=paddle.activation.Linear(),
param_attr=paddle.attr.Param(initial_std=0.01, name="output"))
hd1 = paddle.layer.fc(input=data,
size=10,
act=paddle.activation.Tanh(),
param_attr=paddle.attr.Param(
initial_std=0.01, name="hidden_w1"))

# fully connected layer and output layer
output = paddle.layer.fc(input=hd1,
size=1,
act=paddle.activation.Linear(),
param_attr=paddle.attr.Param(
initial_std=0.01, name="output"))
return output


Expand All @@ -45,16 +51,16 @@ def ranknet(input_dim):
output_left = half_ranknet("left", input_dim)
output_right = half_ranknet("right", input_dim)

evaluator = paddle.evaluator.auc(input=output_left, label=label)
# rankcost layer
cost = paddle.layer.rank_cost(
name="cost", left=output_left, right=output_right, label=label)
return cost


def train_ranknet(num_passes):
def ranknet_train(num_passes, model_save_dir):
train_reader = paddle.batch(
paddle.reader.shuffle(paddle.dataset.mq2007.train, buf_size=100),
paddle.reader.shuffle(
paddle.dataset.mq2007.train, buf_size=100),
batch_size=100)
test_reader = paddle.batch(paddle.dataset.mq2007.test, batch_size=100)

Expand All @@ -75,17 +81,16 @@ def train_ranknet(num_passes):
# Define end batch and end pass event handler
def event_handler(event):
if isinstance(event, paddle.event.EndIteration):
if event.batch_id % 100 == 0:
print "Pass %d Batch %d Cost %.9f" % (
event.pass_id, event.batch_id, event.cost)
else:
sys.stdout.write(".")
sys.stdout.flush()
if event.batch_id % 25 == 0:
logger.info("Pass %d Batch %d Cost %.9f" %
(event.pass_id, event.batch_id, event.cost))
if isinstance(event, paddle.event.EndPass):
result = trainer.test(reader=test_reader, feeding=feeding)
print "\nTest with Pass %d, %s" % (event.pass_id, result.metrics)
with gzip.open("ranknet_params_%d.tar.gz" % (event.pass_id),
"w") as f:
logger.info("\nTest with Pass %d, %s" %
(event.pass_id, result.metrics))
with gzip.open(
os.path.join(model_save_dir, "ranknet_params_%d.tar.gz" %
(event.pass_id)), "w") as f:
trainer.save_parameter_to_tar(f)

trainer.train(
Expand All @@ -95,18 +100,17 @@ def event_handler(event):
num_passes=num_passes)


def ranknet_infer(pass_id):
def ranknet_infer(model_path):
"""
load the trained model. And predict with plain txt input
"""
print "Begin to Infer..."
load the trained model. And predict with plain txt input
"""
logger.info("Begin to Infer...")
feature_dim = 46

# we just need half_ranknet to predict a rank score,
# which can be used in sort documents
output = half_ranknet("infer", feature_dim)
parameters = paddle.parameters.Parameters.from_tar(
gzip.open("ranknet_params_%d.tar.gz" % (pass_id)))
parameters = paddle.parameters.Parameters.from_tar(gzip.open(model_path))

# load data of same query and relevance documents,
# need ranknet to rank these candidates
Expand All @@ -133,16 +137,55 @@ def ranknet_infer(pass_id):
print "query_id : ", query_id, " ranknet rank document order : ", score


if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Ranknet demo')
parser.add_argument("--run_type", type=str, help="run type is train|infer")
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="PaddlePaddle RankNet example.")
parser.add_argument(
"--run_type",
type=str,
help=("A flag indicating to run the training or the inferring task. "
"Available options are: train or infer."),
default="train")
parser.add_argument(
"--num_passes",
type=int,
help="num of passes in train| infer pass number of model")
help="The number of passes to train the model.",
default=10)
parser.add_argument(
"--use_gpu",
type=bool,
help="A flag indicating whether to use the GPU device in training.",
default=False)
parser.add_argument(
"--trainer_count",
type=int,
help="The thread number used in training.",
default=1)
parser.add_argument(
"--model_save_dir",
type=str,
required=False,
help=("The path to save the trained models."),
default="models")
parser.add_argument(
"--test_model_path",
type=str,
required=False,
help=("This parameter works only in inferring task to "
"specify path of a trained model."),
default="")

args = parser.parse_args()
paddle.init(use_gpu=False, trainer_count=4)
if not os.path.exists(args.model_save_dir): os.mkdir(args.model_save_dir)

paddle.init(use_gpu=args.use_gpu, trainer_count=args.trainer_count)

if args.run_type == "train":
train_ranknet(args.num_passes)
ranknet_train(args.num_passes, args.model_save_dir)
elif args.run_type == "infer":
ranknet_infer(pass_id=args.pass_num - 1)
assert os.path.exists(
args.test_model_path), "The trained model does not exit."
ranknet_infer(args.test_model_path)
else:
logger.fatal(("A wrong value for parameter run type. "
"Available options are: train or infer."))
11 changes: 0 additions & 11 deletions ltr/run_lambdarank.sh

This file was deleted.

11 changes: 0 additions & 11 deletions ltr/run_ranknet.sh

This file was deleted.

Loading

0 comments on commit f6c3431

Please sign in to comment.