diff --git a/python/paddle/fluid/executor.py b/python/paddle/fluid/executor.py index 7ad028714d3b47..93aa5f908ec929 100644 --- a/python/paddle/fluid/executor.py +++ b/python/paddle/fluid/executor.py @@ -299,14 +299,18 @@ def run(self, if feed is None: feed = {} if not isinstance(feed, dict): - raise TypeError("feed should be a map") + raise TypeError( + "feed requires dict as its Parameter. But you passed in %s" % + (type(feed))) if fetch_list is None: fetch_list = [] if program is None: program = default_main_program() if not isinstance(program, Program): - raise TypeError() + raise TypeError( + "Executor requires Program as its Parameter. But you passed in %s" + % (type(program))) if scope is None: scope = global_scope() diff --git a/python/paddle/fluid/optimizer.py b/python/paddle/fluid/optimizer.py index 0a314ddfd7c607..0fc48055220ed8 100644 --- a/python/paddle/fluid/optimizer.py +++ b/python/paddle/fluid/optimizer.py @@ -47,6 +47,8 @@ def __init__(self, learning_rate, regularization=None): raise TypeError("learning rate should be float or Variable") self.regularization = regularization self._learning_rate = learning_rate + # the learning rate type should be inferenced from loss + self._dtype = None # each program should have a independent learning rate # program -> Variable(learning_rate) self._learning_rate_map = dict() @@ -77,7 +79,7 @@ def _create_global_learning_rate(self): name=unique_name.generate("learning_rate"), shape=[1], value=float(self._learning_rate), - dtype='float32', + dtype='float32' if self._dtype == None else self._dtype, persistable=True) def global_learning_rate(self, program=None): @@ -200,6 +202,7 @@ def create_optimization_pass(self, # Create any accumulators program = loss.block.program + self._dtype = loss.dtype with program_guard(program, startup_program): global_block = framework.default_main_program().global_block() start = len(global_block.ops) @@ -391,7 +394,7 @@ def _create_accumulators(self, block, parameters): beta_shape = [1] self._beta1_pow_acc = self.helper.create_global_variable( name=unique_name.generate('beta1_pow_acc'), - dtype='float32', + dtype='float32' if self._dtype == None else self._dtype, shape=beta_shape, lod_level=0, persistable=True) @@ -400,7 +403,7 @@ def _create_accumulators(self, block, parameters): self._beta2_pow_acc = self.helper.create_global_variable( name=unique_name.generate('beta2_pow_acc'), - dtype='float32', + dtype='float32' if self._dtype == None else self._dtype, shape=beta_shape, lod_level=0, persistable=True) @@ -493,7 +496,7 @@ def _create_accumulators(self, block, parameters): beta_shape = [1] self._beta1_pow_acc = self.helper.create_global_variable( name=unique_name.generate('beta1_pow_acc'), - dtype='float32', + dtype='float32' if self._dtype == None else self._dtype, shape=beta_shape, lod_level=0, persistable=True) @@ -900,8 +903,10 @@ def _add_average_apply_op(self, block, param_grad): # param = (sum_1 + sum_2 + sum_3) / (num_accumulates + old_num_accumulates) tmp = layers.sum(x=[num_accumulates, old_num_accumulates]) sum = layers.sum(x=[sum_1, sum_2, sum_3]) - tmp = layers.cast(x=tmp, dtype='float32') - sum = layers.cast(x=sum, dtype='float32') + tmp = layers.cast( + x=tmp, dtype='float32' if self._dtype == None else self._dtype) + sum = layers.cast( + x=sum, dtype='float32' if self._dtype == None else self._dtype) layers.elementwise_div(x=sum, y=tmp, out=param) def _add_average_restore_op(self, block, param_grad): diff --git a/python/paddle/fluid/tests/unittests/test_network_with_dtype.py b/python/paddle/fluid/tests/unittests/test_network_with_dtype.py new file mode 100644 index 00000000000000..baafcdbb802383 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_network_with_dtype.py @@ -0,0 +1,74 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import numpy as np +import paddle +import paddle.fluid as fluid +import paddle.fluid.core as core +from paddle.fluid.executor import Executor + +BATCH_SIZE = 20 + + +class TestNetWithDtype(unittest.TestCase): + def setUp(self): + self.dtype = "float64" + self.init_dtype() + self.x = fluid.layers.data(name='x', shape=[13], dtype=self.dtype) + self.y = fluid.layers.data(name='y', shape=[1], dtype=self.dtype) + y_predict = fluid.layers.fc(input=self.x, size=1, act=None) + + cost = fluid.layers.square_error_cost(input=y_predict, label=self.y) + avg_cost = fluid.layers.mean(cost) + self.fetch_list = [avg_cost] + + sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001) + sgd_optimizer.minimize(avg_cost) + + def run_net_on_place(self, place): + train_reader = paddle.batch( + paddle.dataset.uci_housing.train(), batch_size=BATCH_SIZE) + feeder = fluid.DataFeeder(place=place, feed_list=[self.x, self.y]) + exe = fluid.Executor(place) + exe.run(fluid.default_startup_program()) + for data in train_reader(): + exe.run(fluid.default_main_program(), + feed=feeder.feed(data), + fetch_list=self.fetch_list) + # the main program is runable, the datatype is fully supported + break + + def init_dtype(self): + pass + + def test_cpu(self): + place = fluid.CPUPlace() + self.run_net_on_place(place) + + def test_gpu(self): + if not core.is_compiled_with_cuda(): + return + place = fluid.CUDAPlace(0) + self.run_net_on_place(place) + + +# TODO(dzhwinter): make sure the fp16 is runable +# class TestFloat16(SimpleNet): +# def init_dtype(self): +# self.dtype = "float16" + +if __name__ == '__main__': + unittest.main()