Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimizer use init program #5275

Merged
22 changes: 22 additions & 0 deletions python/paddle/v2/framework/framework.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,11 @@
__all__ = ['Block', 'Variable', 'Program', 'Operator']


def unique_name(prefix):
uid = core.unique_integer(prefix) # unique during whole process.
return "_".join([prefix, str(uid)])


class Variable(object):
def __init__(self,
block,
Expand Down Expand Up @@ -358,6 +363,23 @@ def create_var(self, *args, **kwargs):
kwargs['initializer'](var, self)
return var

def create_persistable_var(self, init_program, initializer, prefix, *args,
**kwargs):
"""
create a persistable var in init_program.global_block and current block.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why we need to create the variable in both init_program and main_program. Parameters are created only in init_program.
If init_program and main_program are in the same scope, will the same var_name be a conflict?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

now framework will create some persistable var or parameter in both init_program and program, and prepend init_op in init_program to initialize the persistable var, when executor create a real variable, if it finds the var is persistable, it will only create it in the root scope of Executor.

"""
if not isinstance(init_program, Program):
raise ValueError("must have init_program")
if init_program.global_block() == self:
raise ValueError(
"this method should not call on init_program.global_block()")
var_name = unique_name(prefix)
kwargs['persistable'] = True
init_program.global_block().create_var(
name=var_name, initializer=initializer, *args, **kwargs)
var = Variable(self, name=var_name, *args, **kwargs)
return var

def has_var(self, name):
return name in self.vars

Expand Down
7 changes: 1 addition & 6 deletions python/paddle/v2/framework/layer_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,11 @@
import paddle.v2.framework.core as core

from paddle.v2.framework.framework import Variable, g_program, \
g_init_program
g_init_program, unique_name
from paddle.v2.framework.initializer import ConstantInitializer, \
UniformInitializer


def unique_name(prefix):
uid = core.unique_integer(prefix) # unique during whole process.
return "_".join([prefix, str(uid)])


class LayerHelper(object):
def __init__(self, layer_type, **kwargs):
self.kwargs = kwargs
Expand Down
204 changes: 93 additions & 111 deletions python/paddle/v2/framework/optimizer.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
from collections import defaultdict

import paddle.v2.framework.framework as framework
from paddle.v2.framework.framework import unique_name, Program
from paddle.v2.framework.backward import append_backward_ops
from paddle.v2.framework.initializer import ConstantInitializer
from paddle.v2.framework.regularizer import append_regularization_ops

__all__ = [
Expand All @@ -25,6 +27,7 @@ def __init__(self, global_step=None):
# to train. These variables are called accumulators.
# {accum_name : { paramter_name : accumulator_for_parameter, ...}, ...}
self._accumulators = defaultdict(lambda: dict())
self._init_program = None

def _append_optimize_op(self, block, param_and_grad):
""" append optimize operator to block and return all the added optimize_op
Expand Down Expand Up @@ -77,22 +80,16 @@ def _add_accumulator(self, block, name, param, dtype=None, fill_value=0.0):
param.name in self._accumulators[name]):
raise Exception("Accumulator {} already exists for parmeter {}".
format(name, param.name))
global_block = block.program.global_block()
main_block = block.program.global_block()
param_shape = list(param.shape)
param_acc = global_block.create_var(
dtype=dtype, shape=param_shape, lod_level=0)

# Initialize the accumulator with fill_value
# FIXME: Fix when Initialization design has been implemented
# /~https://github.com/PaddlePaddle/Paddle/pull/4852
global_block.append_op(
type="fill_constant",
outputs={"Out": param_acc},
attrs={"shape": param_shape,
"value": fill_value})

# Add to accumulators dict
self._accumulators[name][param.name] = param_acc
self._accumulators[name][
param.name] = main_block.create_persistable_var(
init_program=self._init_program,
initializer=ConstantInitializer(fill_value),
prefix="accumulator",
dtype=dtype,
shape=param_shape,
lod_level=0)

def _get_accumulator(self, name, param):
"""Utility function to fetch an accumulator for a parameter
Expand Down Expand Up @@ -130,7 +127,8 @@ def _increment_global_step(self, block):

return increment_op

def create_optimization_pass(self, parameters_and_grads, loss):
def create_optimization_pass(self, parameters_and_grads, loss,
init_program):
"""Add optimization operators to update gradients to variables.

Args:
Expand All @@ -151,6 +149,9 @@ def create_optimization_pass(self, parameters_and_grads, loss):
# for parameters and extend _finish_update method to add custom ops.

# Create any accumulators
if not isinstance(init_program, Program):
raise ValueError("init_program should be Program")
self._init_program = init_program
self._create_accumulators(loss.block,
[p[0] for p in parameters_and_grads])
# Create any necessary tensors
Expand All @@ -177,7 +178,11 @@ def create_optimization_pass(self, parameters_and_grads, loss):
return_ops.append(self._increment_global_step(loss.block))
return return_ops

def minimize(self, loss, parameter_list=None, no_grad_set=None):
def minimize(self,
loss,
init_program,
parameter_list=None,
no_grad_set=None):
"""Add operations to minimize `loss` by updating `parameter_list`.

This method combines interface `append_backward_ops()` and
Expand All @@ -187,7 +192,8 @@ def minimize(self, loss, parameter_list=None, no_grad_set=None):
set())
# Add regularization if any
params_grads = append_regularization_ops(params_grads)
optimize_ops = self.create_optimization_pass(params_grads, loss)
optimize_ops = self.create_optimization_pass(params_grads, loss,
init_program)
return optimize_ops


Expand All @@ -204,18 +210,15 @@ def __init__(self, learning_rate, global_step=None):
def _initialize_tensors(self, block):
assert isinstance(block, framework.Block)
lr_shape = [1]
main_block = block.program.global_block()
# create a variable for learning_rate
self._lr = block.create_var(
dtype="float32", shape=lr_shape, lod_level=0)

# create an op to init the learning_rate
# FIXME: Fix when Initialization design has been implemented
# /~https://github.com/PaddlePaddle/Paddle/pull/4852
block.append_op(
type="fill_constant",
outputs={"Out": self._lr},
attrs={"shape": lr_shape,
"value": self._learning_rate})
self._lr = main_block.create_persistable_var(
init_program=self._init_program,
initializer=ConstantInitializer(self._learning_rate),
prefix="learning_rate",
dtype="float32",
shape=lr_shape,
lod_level=0)

def _append_optimize_op(self, block, param_and_grad):
assert isinstance(block, framework.Block)
Expand Down Expand Up @@ -254,18 +257,15 @@ def __init__(self,
def _initialize_tensors(self, block):
assert isinstance(block, framework.Block)
lr_shape = [1]
main_block = block.program.global_block()
# create a variable for learning_rate
self._lr = block.create_var(
dtype="float32", shape=lr_shape, lod_level=0)

# create an op to init the learning_rate
# FIXME: Fix when Initialization design has been implemented
# /~https://github.com/PaddlePaddle/Paddle/pull/4852
block.append_op(
type="fill_constant",
outputs={"Out": self._lr},
attrs={"shape": lr_shape,
"value": self._learning_rate})
self._lr = main_block.create_persistable_var(
init_program=self._init_program,
initializer=ConstantInitializer(self._learning_rate),
prefix="learning_rate",
dtype="float32",
shape=lr_shape,
lod_level=0)

def _create_accumulators(self, block, parameters):
assert isinstance(block, framework.Block)
Expand Down Expand Up @@ -313,18 +313,14 @@ def __init__(self, learning_rate, epsilon=1.0e-6, global_step=None):
def _initialize_tensors(self, block):
assert isinstance(block, framework.Block)
lr_shape = [1]
# create a variable for learning_rate
self._lr = block.create_var(
dtype="float32", shape=lr_shape, lod_level=0)

# create an op to init the learning_rate
# FIXME: Fix when Initialization design has been implemented
# /~https://github.com/PaddlePaddle/Paddle/pull/4852
block.append_op(
type="fill_constant",
outputs={"Out": self._lr},
attrs={"shape": lr_shape,
"value": self._learning_rate})
main_block = block.program.global_block()
self._lr = main_block.create_persistable_var(
init_program=self._init_program,
initializer=ConstantInitializer(self._learning_rate),
prefix="learning_rate",
dtype="float32",
shape=lr_shape,
lod_level=0)

def _create_accumulators(self, block, parameters):
assert isinstance(block, framework.Block)
Expand Down Expand Up @@ -381,43 +377,36 @@ def _initialize_tensors(self, block):
assert isinstance(block, framework.Block)
lr_shape = [1]
# create a variable for learning_rate
self._lr = block.create_var(
dtype="float32", shape=lr_shape, lod_level=0)

# create an op to init the learning_rate
# FIXME: Fix when Initialization design has been implemented
# /~https://github.com/PaddlePaddle/Paddle/pull/4852
block.append_op(
type="fill_constant",
outputs={"Out": self._lr},
attrs={"shape": lr_shape,
"value": self._learning_rate})
main_block = block.program.global_block()
self._lr = main_block.create_persistable_var(
init_program=self._init_program,
initializer=ConstantInitializer(self._learning_rate),
prefix="learning_rate",
dtype="float32",
shape=lr_shape,
lod_level=0)

def _create_accumulators(self, block, parameters):
assert isinstance(block, framework.Block)

global_block = block.program.global_block()
main_block = block.program.global_block()
# Create beta1 and beta2 power tensors
beta_shape = [1]
# Create variables for beta1 and beta2 powers
self._beta1_pow_acc = global_block.create_var(
dtype="float32", shape=beta_shape, lod_level=0)
self._beta2_pow_acc = global_block.create_var(
dtype="float32", shape=beta_shape, lod_level=0)

# Initialize beta1 and beta2 power accumulators
# FIXME: Fix when Initialization design has been implemented
# /~https://github.com/PaddlePaddle/Paddle/pull/4852
global_block.append_op(
type="fill_constant",
outputs={"Out": self._beta1_pow_acc},
attrs={"shape": beta_shape,
"value": self._beta1})
global_block.append_op(
type="fill_constant",
outputs={"Out": self._beta2_pow_acc},
attrs={"shape": beta_shape,
"value": self._beta2})
self._beta1_pow_acc = main_block.create_persistable_var(
init_program=self._init_program,
initializer=ConstantInitializer(self._beta1),
prefix="beta1_pow_acc",
dtype="float32",
shape=beta_shape,
lod_level=0)

self._beta2_pow_acc = main_block.create_persistable_var(
init_program=self._init_program,
initializer=ConstantInitializer(self._beta2),
prefix="beta2_pow_acc",
dtype="float32",
shape=beta_shape,
lod_level=0)

# Create accumulator tensors for first and second moments
for p in parameters:
Expand Down Expand Up @@ -460,14 +449,14 @@ def _finish_update(self, block):
"""Update Beta1 and Beta2 Power accumulators
"""
assert isinstance(block, framework.Block)
global_block = block.program.global_block()
scale_beta1 = global_block.append_op(
main_block = block.program.global_block()
scale_beta1 = main_block.append_op(
type="scale",
inputs={"X": self._beta1_pow_acc},
outputs={"Out": self._beta1_pow_acc},
attrs={"scale": self._beta1})

scale_beta2 = global_block.append_op(
scale_beta2 = main_block.append_op(
type="scale",
inputs={"X": self._beta2_pow_acc},
outputs={"Out": self._beta2_pow_acc},
Expand Down Expand Up @@ -502,36 +491,29 @@ def __init__(self,
def _initialize_tensors(self, block):
assert isinstance(block, framework.Block)
lr_shape = [1]
main_block = block.program.global_block()
# create a variable for learning_rate
self._lr = block.create_var(
dtype="float32", shape=lr_shape, lod_level=0)

# create an op to init the learning_rate
# FIXME: Fix when Initialization design has been implemented
# /~https://github.com/PaddlePaddle/Paddle/pull/4852
block.append_op(
type="fill_constant",
outputs={"Out": self._lr},
attrs={"shape": lr_shape,
"value": self._learning_rate})
self._lr = main_block.create_persistable_var(
init_program=self._init_program,
initializer=ConstantInitializer(self._learning_rate),
prefix="learning_rate",
dtype="float32",
shape=lr_shape,
lod_level=0)

def _create_accumulators(self, block, parameters):
assert isinstance(block, framework.Block)

global_block = block.program.global_block()
main_block = block.program.global_block()
# Create beta1 power accumulator tensor
beta_shape = [1]
self._beta1_pow_acc = global_block.create_var(
dtype="float32", shape=beta_shape, lod_level=0)

# Initialize beta1 power accumulator
# FIXME: Fix when Initialization design has been implemented
# /~https://github.com/PaddlePaddle/Paddle/pull/4852
global_block.append_op(
type="fill_constant",
outputs={"Out": self._beta1_pow_acc},
attrs={"shape": beta_shape,
"value": self._beta1})
self._beta1_pow_acc = main_block.create_persistable_var(
init_program=self._init_program,
initializer=ConstantInitializer(self._beta1),
prefix="beta1_pow_acc",
dtype="float32",
shape=beta_shape,
lod_level=0)

# Create accumulator tensors for first moment and infinity norm
for p in parameters:
Expand Down Expand Up @@ -572,8 +554,8 @@ def _finish_update(self, block):
"""Update Beta1 Power accumulator
"""
assert isinstance(block, framework.Block)
global_block = block.program.global_block()
scale_beta1 = global_block.append_op(
main_block = block.program.global_block()
scale_beta1 = main_block.append_op(
type="scale",
inputs={"X": self._beta1_pow_acc},
outputs={"Out": self._beta1_pow_acc},
Expand Down
2 changes: 1 addition & 1 deletion python/paddle/v2/framework/tests/test_fit_a_line.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
avg_cost = layers.mean(x=cost, program=program, init_program=init_program)

sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.001)
opts = sgd_optimizer.minimize(avg_cost)
opts = sgd_optimizer.minimize(avg_cost, init_program)

BATCH_SIZE = 20

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@ def conv_block(input,
avg_cost = layers.mean(x=cost, program=program, init_program=init_program)

sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.001)
opts = sgd_optimizer.minimize(avg_cost)
opts = sgd_optimizer.minimize(avg_cost, init_program)

BATCH_SIZE = 128
PASS_NUM = 1
Expand Down
Loading