-
Notifications
You must be signed in to change notification settings - Fork 5.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
easy parameter sharing problem #3830
Comments
Superjomn
changed the title
namespace/nameprefix
easy net structure and parameter sharing
Sep 3, 2017
Superjomn
changed the title
easy net structure and parameter sharing
easy parameter sharing problem
Sep 3, 2017
This issue wants to describe the following problems:
Here is my solution. It uses cur_id = 0
def unique_name_generator(prefix):
global cur_id
tmp = cur_id
cur_id += 1
return prefix + str(tmp)
def get_weight_name(layer_name, id=0):
return layer_name + ".w." + str(id)
def get_bias_name(layer_name, id=0):
return layer_name + ".bias." + str(id)
def get_temp_name(layer_name, id=0):
return layer_name + ".tmp." + str(id)
def fc_layer(input, size, has_bias=False, name=None):
if name is None:
name = unique_name_generator("fc")
w_name = get_weight_name(name)
mul_out = get_temp_name(name, 0)
create_operator("mul", X=input, Y=w_name, Out=mul_out)
pre_activation = mul_out
if has_bias:
b_name = get_bias_name(name)
bias_out = get_temp_name(name, 1)
create_operator("add_bias", X=mul_out, Y=b_name, Out=bias_out)
pre_activation = bias_out
create_operator("sigmoid", X=pre_activation, Out=name)
def main_share_weight():
data = data_layer()
hidden1 = fc_layer(data, name="FC")
hidden2 = fc_layer(hidden, name="FC")
# hidden1 and hidden2 share same weight and bias
def main_not_share_weight():
data = data_layer()
hidden1 = fc_layer(data)
hidden2 = fc_layer(hidden2)
# hidden1 and hdden2 do not share same weight and bias |
borrowed much ideas from TF's class namescope(object):
namescopes = []
def __init__(self,
name=None,
abs_name=None,
initializer=None,
regularizer=None,
reuse=None,
dtype=None):
self.name = name
prefix = "%s/%s" % ('/'.join(_.name for _ in namescope.namescopes), name)
self.prefix = prefix if not abs_name else abs_name
self.initializer = initializer
self.regularizer = regularizer
self.reuse = reuse
self.dtype = dtype
def namescope(self):
return self
def gen_name(self, name):
return '%s/%s' % (self.prefix, name)
def __enter__(self):
namescope.namescopes.append(self)
def __exit__(self, type, value, traceback):
namescope.names.pop()
def __str__(self):
return self.prefix
def __repr__(self):
return "<namescope %s>" % self.__str__()
class Block(object):
def __init__(self):
# NOTE if block has its own scope, namescope is not needed to inherit
# parent's namescope
# or namescope should be a static member of Block
self.namescope = namescope()
g_block = Block()
class Variable(object):
counter = 0
def __init__(self,
name=None,
initializer=None,
regularizer=None,
trainable=None,
reuse=None,
block=None):
if not name:
name = "var-%d" % Variable.counter
Variable.counter += 1
if not block:
block = g_block
# NOTE need to check unique
self.block = block
self.name = block.namescope.gen_name(name)
# set initializer
if not initializer and block.namescope.initializer:
initializer = block.namescope.initializer
self.initializer = initializer
# set regularizer
if not regularizer and block.namescope.regularizer:
regularizer = block.namescope.regularizer
self.regularizer = regularizer
# set reuse
if reuse is None and block.namescope.reuse is not None:
reuse = block.namescope.reuse
self.reuse = reuse
# set trainable
class Operator(object):
counter = 0
def __init__(self, type):
'''
each op has a unique name, two op with the same prefix will be put
in the same group for visulazation.
'''
self.name = Block.namescope.gen_name('op-%d' % counter)
Operator.counter += 1
def fc(inputs, num_outputs, activation, reuse=false, trainable=True,
weights_initializer, biases_initializer, namescope=None):
'''
Args:
inputs: A tensor of at least rank 2.
num_outputs: int, the number of output units in the layer.
activation: activation function, default is sigmoid.
weights_initializer: an initializer for the weights.
biases_initializer: an initializer for the biases, if None skip biases.
reuse: Whether or not the layer and its parameters should be reused. To be
able to reuse the layer, namescope should be given.
trainable: if True, the parameters in this layer will be updated during training.
Returns:
the variable representing the result of the series of operations.
'''
W_shape = [x.shape[1], size]
b_shape = [x.shape[1], 1]
W_initializer = weights_initializer if weights_initializer else pd.gaussian_random_initializer()
b_initializer = biases_initializer if weights_initializer else pd.zeros_initializer()
if not reuse:
# make unique names for both W and b
W = pd.Variable(shape=W_shape, initializer=W_initializer)
b = pd.Variable(shape=b_shape, initializer=b_initializer)
else:
assert namescope, "namescope should be provided to help reuse the parameters"
with namescope:
W = pd.Variable(name="W", shape=W_shape, initializer=W_initializer, reuse=True)
b = pd.Variable(name="b", shape=b_shape, initializer=b_initializer, reuse=True)
return pd.add_two(pd.mat_mul(W, x), b)
if __name__ == '__main__':
import paddle as pd
with pd.namescope(initializer=pd.gaussian_random_initializer()):
a1 = pd.Variable(shape=[20, 20])
a2 = pd.Variable(shape=[20, 20])
a3 = pd.Variable(shape=[20, 20])
b = pd.Variable(shape=[20, 20])
def two_level_fc(x, prefix=None):
fc_out1 = fc(x, namescope=pd.namescope(prefix+'-level0') if prefix else None)
fc_out2 = fc(fc_out1, namescope=pd.namescope(prefix+'-level1') if prefix else None)
return fc_out2
# the first 3 fcs share parameter variables with namescope "fc0"
# while the last fc use its own parameters.
fc_out1 = two_level_fc(a1, "fc0")
fc_out2 = two_level_fc(a2, "fc0")
fc_out3 = two_level_fc(a3, "fc0")
fc_out4 = two_level_fc(a4) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
PaddlePaddle uses the class
Scope
to storeVariables
,Scope
can be viewed as a key-value map fromVariable
's name to the correspondingVariable
.There might be many variables in a scope, so users should make sure all the variables in the same scope have unique names, with a light wrapper as follows:
it is easy to make the work done automatically.
But it is often for users to have some parameter variables shared between multiple sub-models, let's take a simple model for example:
both
fc_out11
,fc_out21
,fc_out31
take the same FC with shared parameter variables,fc_out41
have similar network structure but parameters are not shared.It is better to have some help functions to make such code cleaner.
The text was updated successfully, but these errors were encountered: