From 36cc75996087b4671fdba3bdd6e0fd3a9b841816 Mon Sep 17 00:00:00 2001 From: lilong12 Date: Wed, 18 Dec 2019 09:49:47 +0800 Subject: [PATCH] add some docs, requirements.txt, update setup.py and bug fix (#3) * add some docs and requirements.txt * bug fix --- .gitignore | 1 + README.md | 7 +- docs/base64_preprocessor.md | 70 ++++ docs/custom_models.md | 1 - docs/distributed_params.md | 48 +++ docs/export_for_infer.md | 1 + plsc/__init__.py | 3 + plsc/__init__.pyc | Bin 154 -> 0 bytes plsc/config.pyc | Bin 895 -> 0 bytes plsc/do_train.py | 674 ----------------------------------- plsc/entry.py | 222 ++++++------ plsc/entry.pyc | Bin 27675 -> 0 bytes plsc/models/__init__.pyc | Bin 285 -> 0 bytes plsc/models/base_model.pyc | Bin 4424 -> 0 bytes plsc/models/dist_algo.pyc | Bin 13719 -> 0 bytes plsc/models/resnet.pyc | Bin 4786 -> 0 bytes plsc/run.sh | 19 - plsc/utils/__init__.pyc | Bin 109 -> 0 bytes plsc/utils/jpeg_reader.pyc | Bin 9307 -> 0 bytes plsc/utils/learning_rate.pyc | Bin 1374 -> 0 bytes plsc/utils/verification.pyc | Bin 5596 -> 0 bytes plsc/version.py | 15 + requirements.txt | 8 + setup.py | 62 +++- 24 files changed, 319 insertions(+), 812 deletions(-) create mode 100644 .gitignore create mode 100644 docs/base64_preprocessor.md create mode 100644 docs/distributed_params.md delete mode 100644 plsc/__init__.pyc delete mode 100644 plsc/config.pyc delete mode 100755 plsc/do_train.py delete mode 100644 plsc/entry.pyc delete mode 100644 plsc/models/__init__.pyc delete mode 100644 plsc/models/base_model.pyc delete mode 100644 plsc/models/dist_algo.pyc delete mode 100644 plsc/models/resnet.pyc delete mode 100644 plsc/run.sh delete mode 100644 plsc/utils/__init__.pyc delete mode 100644 plsc/utils/jpeg_reader.pyc delete mode 100644 plsc/utils/learning_rate.pyc delete mode 100644 plsc/utils/verification.pyc create mode 100644 plsc/version.py create mode 100644 requirements.txt diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000000000..0d20b6487c61e7 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +*.pyc diff --git a/README.md b/README.md index 6b817214d4c1b9..32a84383fd34e7 100644 --- a/README.md +++ b/README.md @@ -23,7 +23,7 @@ PLSC具备以下特点: ### 基础功能 * [API简介](docs/api_intro.md) -* [自定义模型](docs/custom_modes.md) +* [自定义模型](docs/custom_models.md) * [自定义Reader接口] ### 预测部署 @@ -33,6 +33,5 @@ PLSC具备以下特点: ### 高级功能 -* [混合精度训练](docs/mixed_precision.md) -* [分布式参数转换] -* [Base64格式图像预处理] +* [分布式参数转换](docs/distributed_params.md) +* [Base64格式图像预处理](docs/base64_preprocessor.md) diff --git a/docs/base64_preprocessor.md b/docs/base64_preprocessor.md new file mode 100644 index 00000000000000..13eee49db0c328 --- /dev/null +++ b/docs/base64_preprocessor.md @@ -0,0 +1,70 @@ +# Base64格式图像预处理 + +## 简介 + +实际业务中,一种常见的训练数据存储格式是将图像数据编码为base64格式。训练数据文件 +的每一行存储一张图像的base64数据和该图像的标签,并通常以制表符('\t')分隔。 + +通常,所有训练数据文件的文件列表记录在一个单独的文件中,整个训练数据集的目录结构如下: + +```shell +dataset + |-- file_list.txt + |-- dataset.part1 + |-- dataset.part2 + ... .... + `-- dataset.part10 +``` + +其中,file_list.txt记录训练数据的文件列表,每行代表一个文件,以上面的例子来说, +file_list.txt的文件内容如下: + +```shell +dataset.part1 +dataset.part2 +... +dataset.part10 +``` + +而数据文件的每一行表示一张图像数据的base64表示,以及以制表符分隔的图像标签。 + +对于分布式训练,需要每张GPU卡处理相同数量的图像数据,并且通常需要在训练前做一次 +训练数据的全局shuffle。 + +本文档介绍Base64格式图像预处理工具,用于在对训练数据做全局shuffle,并将训练数据均分到多个数据文件, +数据文件的数量和训练中使用的GPU卡数相同。当训练数据的总量不能整除GPU卡数时,通常会填充部分图像 +数据(填充的图像数据随机选自训练数据集),以保证总的训练图像数量是GPU卡数的整数倍。 + +## 工具使用方法 + +工具位于tools目录下。 +可以通过下面的命令行查看工具的使用帮助信息: + +```python +python tools/process_base64_files.py --help +``` + +该工具支持以下命令行选项: + +* data_dir: 训练数据的根目录 +* file_list: 记录训练数据文件的列表文件,如file_list.txt +* nranks: 训练所使用的GPU卡的数量。 + +可以通过以下命令行运行该工具: + +```shell +python tools/process_base64_files.py --data_dir=./dataset --file_list=file_list.txt --nranks=8 +``` + +那么,会生成8个数量数据文件,每个文件中包含相同数量的训练数据。 + +最终的目录格式如下: + +```shell +dataset + |-- file_list.txt + |-- dataset.part1 + |-- dataset.part2 + ... .... + `-- dataset.part8 +``` diff --git a/docs/custom_models.md b/docs/custom_models.md index 53b6cc1b9f87f6..7a3e3896125d03 100644 --- a/docs/custom_models.md +++ b/docs/custom_models.md @@ -3,7 +3,6 @@ 默认地,PaddlePaddle大规模分类库构建基于ResNet50模型的训练模型。 PLSC提供了模型基类plsc.models.base_model.BaseModel,用户可以基于该基类构建自己的网络模型。用户自定义的模型类需要继承自该基类,并实现build_network方法,该方法用于构建用户自定义模型。 -用户在使用时需要调用类的get_output方法,该方法在用户自定义模型的尾端自动添加分布式FC层。 下面的例子给出如何使用BaseModel基类定义用户自己的网络模型, 以及如何使用。 ```python diff --git a/docs/distributed_params.md b/docs/distributed_params.md new file mode 100644 index 00000000000000..422261760e14e4 --- /dev/null +++ b/docs/distributed_params.md @@ -0,0 +1,48 @@ +# 分布式参数转换 + +## 简介 + +对于最后一层全连接层参数(W和b,假设参数b存在,否则,全连接参数仅为W),通常切分到所有训练GPU卡。例如, +假设训练阶段使用的GPU卡数为N,那么 + +$$W = [W_{1}, W_{2},..., W_{N}$$ +$$b = [b_{1}, b_{2},..., b_{N}$$ + +并且,参数$W_{i}$和$b_{i}$保存在第i个GPU。 + +当保存模型时,各个GPU卡的分布式参数均会得到保存。 + +在热启动或fine-tuning阶段,如果训练GPU卡数和热启动前或者预训练阶段使用的GPU卡数不同时,需要 +对分布式参数进行转换,以保证分布式参数的数量和训练使用的GPU卡数相同。 + +默认地,当使用plsc.entry.Entry.train()方法时,会自动进行分布式参数的转换。 + +## 工具使用方法 + +分布式参数转换工具也可以单独使用,可以通过下面的命令查看使用方法: + +```shell +python -m plsc.utils.process_distfc_parameter --help +``` + +该工具支持以下命令行选项: + +| 选项 | 描述 | +| :---------------------- | :------------------- | +| name_feature | 分布式参数的名称特征,用于识别分布式参数。默认的,分布式参数的名称前缀为dist@arcface@rank@rankid或者dist@softmax@rank@rankid。其中,rankid为表示GPU卡的id。默认地,name_feature的值为@rank@。用户通常不需要改变该参数的值 | +| pretrain_nranks | 预训练阶段使用的GPU卡数 | +| nranks | 本次训练将使用的GPU卡数 | +| num_classes | 分类类别的数目 | +| emb_dim | 倒数第二层全连接层的输出维度,不包含batch size | +| pretrained_model_dir | 预训练模型的保存目录 | +| output_dir | 转换后分布式参数的保存目录 | + +通常,在预训练模型中包含meta.pickle文件,该文件记录预训练阶段使用的GPU卡数,分类类别书和倒数第二层全连接层的输出维度,因此通常不需要指定pretrain_nranks、num_classes和emb_dim参数。 + +可以通过以下命令转换分布式参数: +```shell +python -m plsc.utils.process_distfc_parameter --nranks=4 --pretrained_model_dir=./output --output_dir=./output_post +``` + +需要注意的是,转换后的分布式参数保存目录只包含转换后的分布式参数,而不包含其它模型参数。因此,通常需要使用转换后的分布式参数替换 +预训练模型中的分布式参数。 diff --git a/docs/export_for_infer.md b/docs/export_for_infer.md index 27dbed10070cc1..9a3e0aaffbbb1f 100644 --- a/docs/export_for_infer.md +++ b/docs/export_for_infer.md @@ -2,6 +2,7 @@ 通常,PaddlePaddle大规模分类库在训练过程中保存的模型只保存模型参数信息, 而不包括预测模型结构。为了部署PLSC预测库,需要将预训练模型导出为预测模型。 +预测模型包括预测所需要的模型参数和模型结构,用于后续地预测任务(参见[C++预测库使用]) 可以通过下面的代码将预训练模型导出为预测模型: diff --git a/plsc/__init__.py b/plsc/__init__.py index e67f93465c86ba..a4341c9e293aaf 100644 --- a/plsc/__init__.py +++ b/plsc/__init__.py @@ -12,3 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. +from .entry import Entry + +__all__ = ['Entry'] diff --git a/plsc/__init__.pyc b/plsc/__init__.pyc deleted file mode 100644 index 5a0f0b581d03423120ca6c40ece35a2990783d59..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 154 zcmZSn%*%C4`BQ8%0~9a;X$K%K76B3|K*Y$9!@!Ws$PmTIz?j0s5Ujxrl*nWR5*i?) zgcV4*=9Lsx`e}glg88W+eh`qypn*yh3jp~AImOBP@$s2?nI-Y@dIgmw96&*v-29Z% NoK!o0AiEf(2>|#>865xs diff --git a/plsc/config.pyc b/plsc/config.pyc deleted file mode 100644 index 1058bd325986e6a339aa7edb7e892dc4a057b296..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 895 zcmZuuJ95-O5N%2Ox7WJ{{2?aU$R;79umec9zz|TTs9~(`-JL>O2~EppiKwEAJ3z!W z2*^1CAtxY!13*t}`4%Io=gphfUnSp$gL~J$K2Bl164{UC^B={ayb)voc?731L=?j- zfYSsp21>xR022jmz?OmzU|T^Ku%n;{*j3Q~DINg!l-U5R0Ct)v(O zHkxg~J!LKdjuc!594oj2xUFCZa9_bL;EsYlz+JdMxhl=L@3JP5Anu8|WBuy3OWC~) zch3VIde3GTepB{?L$0h_8nkTPhffb1rxWs1;v3gl6u=!ZMa^Z+PS3#)oo8dKbY@d> zU#1bb5Y?4p=0ODT`DhL99?##t3G4lld-qzD@GJ)R~5LGsjygesPI1Bx(Q<`5`xetFUmu4h$ zn2b+tMl5rNtkU6SmGMla=dN&+pGneuA`pHX9~vk#pB^^TJSbP{dQ(cRGrAS#=!1OR M?Z^G-KhHnx570cwlK=n! diff --git a/plsc/do_train.py b/plsc/do_train.py deleted file mode 100755 index 36fc5103fd0d7d..00000000000000 --- a/plsc/do_train.py +++ /dev/null @@ -1,674 +0,0 @@ -import os -import sys -import time -import argparse -import functools -import numpy as np - -import paddle -import paddle.fluid as fluid -import resnet -import sklearn -import reader -from verification import evaluate -from utility import add_arguments, print_arguments -from paddle.fluid.incubate.fleet.collective import fleet, DistributedStrategy -from paddle.fluid.incubate.fleet.collective import DistFCConfig -import paddle.fluid.incubate.fleet.base.role_maker as role_maker -from paddle.fluid.transpiler.details.program_utils import program_to_code -from paddle.fluid.optimizer import Optimizer -import paddle.fluid.profiler as profiler -from fp16_utils import rewrite_program, update_role_var_grad, update_loss_scaling, move_optimize_ops_back -from fp16_lists import AutoMixedPrecisionLists -from paddle.fluid.transpiler.details import program_to_code -import paddle.fluid.layers as layers -import paddle.fluid.unique_name as unique_name - -parser = argparse.ArgumentParser(description="Train parallel face network.") -add_arg = functools.partial(add_arguments, argparser=parser) -# yapf: disable -add_arg('train_batch_size', int, 128, "Minibatch size for training.") -add_arg('test_batch_size', int, 120, "Minibatch size for test.") -add_arg('num_epochs', int, 120, "Number of epochs to run.") -add_arg('image_shape', str, "3,112,112", "Image size in the format of CHW.") -add_arg('emb_dim', int, 512, "Embedding dim size.") -add_arg('class_dim', int, 85742, "Number of classes.") -add_arg('model_save_dir', str, None, "Directory to save model.") -add_arg('pretrained_model', str, None, "Directory for pretrained model.") -add_arg('lr', float, 0.1, "Initial learning rate.") -add_arg('model', str, "ResNet_ARCFACE50", "The network to use.") -add_arg('loss_type', str, "softmax", "Type of network loss to use.") -add_arg('margin', float, 0.5, "Parameter of margin for arcface or dist_arcface.") -add_arg('scale', float, 64.0, "Parameter of scale for arcface or dist_arcface.") -add_arg('with_test', bool, False, "Whether to do test during training.") -add_arg('fp16', bool, True, "Whether to do test during training.") -add_arg('profile', bool, False, "Enable profiler or not." ) -# yapf: enable -args = parser.parse_args() - - -model_list = [m for m in dir(resnet) if "__" not in m] - - - -def optimizer_setting(params, args): - ls = params["learning_strategy"] - step = 1 - bd = [step * e for e in ls["epochs"]] - base_lr = params["lr"] - lr = [base_lr * (0.1 ** i) for i in range(len(bd) + 1)] - print("bd: {}".format(bd)) - print("lr_step: {}".format(lr)) - step_lr = fluid.layers.piecewise_decay(boundaries=bd, values=lr) - optimizer = fluid.optimizer.Momentum( - learning_rate=step_lr, - momentum=0.9, - regularization=fluid.regularizer.L2Decay(5e-4)) - num_trainers = int(os.getenv("PADDLE_TRAINERS_NUM", 1)) - if args.loss_type in ["dist_softmax", "dist_arcface"]: - if args.fp16: - wrapper = DistributedClassificationOptimizer( - optimizer, args.train_batch_size * num_trainers, step_lr, - loss_type=args.loss_type, init_loss_scaling=1.0) - else: - wrapper = DistributedClassificationOptimizer(optimizer, args.train_batch_size * num_trainers, step_lr) - elif args.loss_type in ["softmax", "arcface"]: - wrapper = optimizer - - - return wrapper - - -def build_program(args, - main_program, - startup_program, - is_train=True, - use_parallel_test=False, - fleet=None, - strategy=None): - model_name = args.model - assert model_name in model_list, \ - "{} is not in supported lists: {}".format(args.model, model_list) - assert not (is_train and use_parallel_test), \ - "is_train and use_parallel_test cannot be set simultaneously" - - trainer_id = int(os.getenv("PADDLE_TRAINER_ID", 0)) - worker_num = int(os.getenv("PADDLE_TRAINERS_NUM", 1)) - - image_shape = [int(m) for m in args.image_shape.split(",")] - # model definition - model = resnet.__dict__[model_name]() - with fluid.program_guard(main_program, startup_program): - with fluid.unique_name.guard(): - image = fluid.layers.data(name='image', shape=image_shape, dtype='float32') - label = fluid.layers.data(name='label', shape=[1], dtype='int64') - emb, loss = model.net(input=image, - label=label, - is_train=is_train, - emb_dim=args.emb_dim, - class_dim=args.class_dim, - loss_type=args.loss_type, - margin=args.margin, - scale=args.scale) - if args.loss_type in ["dist_softmax", "dist_arcface"]: - shard_prob = loss._get_info("shard_prob") - prob_all = fluid.layers.collective._c_allgather(shard_prob, - nranks=worker_num, use_calc_stream=True) - prob_list = fluid.layers.split(prob_all, dim=0, - num_or_sections=worker_num) - prob = fluid.layers.concat(prob_list, axis=1) - label_all = fluid.layers.collective._c_allgather(label, - nranks=worker_num, use_calc_stream=True) - acc1 = fluid.layers.accuracy(input=prob, label=label_all, k=1) - acc5 = fluid.layers.accuracy(input=prob, label=label_all, k=5) - elif args.loss_type in ["softmax", "arcface"]: - prob = loss[1] - loss = loss[0] - acc1 = fluid.layers.accuracy(input=prob, label=label, k=1) - acc5 = fluid.layers.accuracy(input=prob, label=label, k=5) - optimizer = None - if is_train: - # parameters from model and arguments - params = model.params - params["lr"] = args.lr - params["num_epochs"] = args.num_epochs - params["learning_strategy"]["batch_size"] = args.train_batch_size - # initialize optimizer - optimizer = optimizer_setting(params, args) - dist_optimizer = fleet.distributed_optimizer(optimizer, strategy=strategy) - dist_optimizer.minimize(loss) - elif use_parallel_test: - emb = fluid.layers.collective._c_allgather(emb, - nranks=worker_num, use_calc_stream=True) - return emb, loss, acc1, acc5, optimizer - - -def train(args): - pretrained_model = args.pretrained_model - model_save_dir = args.model_save_dir - model_name = args.model - - trainer_id = int(os.getenv("PADDLE_TRAINER_ID", 0)) - worker_num = int(os.getenv("PADDLE_TRAINERS_NUM", 1)) - - role = role_maker.PaddleCloudRoleMaker(is_collective=True) - fleet.init(role) - strategy = DistributedStrategy() - strategy.mode = "collective" - strategy.collective_mode = "grad_allreduce" - - startup_prog = fluid.Program() - train_prog = fluid.Program() - test_program = fluid.Program() - train_emb, train_loss, train_acc1, train_acc5, optimizer = \ - build_program(args, train_prog, startup_prog, True, False, - fleet, strategy) - test_emb, test_loss, test_acc1, test_acc5, _ = \ - build_program(args, test_program, startup_prog, False, True) - - if args.loss_type in ["dist_softmax", "dist_arcface"]: - if not args.fp16: - global_lr = optimizer._optimizer._global_learning_rate( - program=train_prog) - else: - global_lr = optimizer._optimizer._global_learning_rate( - program=train_prog) - elif args.loss_type in ["softmax", "arcface"]: - global_lr = optimizer._global_learning_rate(program=train_prog) - - origin_prog = fleet._origin_program - train_prog = fleet.main_program - if trainer_id == 0: - with open('start.program', 'w') as fout: - program_to_code(startup_prog, fout, True) - with open('main.program', 'w') as fout: - program_to_code(train_prog, fout, True) - with open('origin.program', 'w') as fout: - program_to_code(origin_prog, fout, True) - - gpu_id = int(os.getenv("FLAGS_selected_gpus", 0)) - place = fluid.CUDAPlace(gpu_id) - exe = fluid.Executor(place) - exe.run(startup_prog) - - if pretrained_model: - pretrained_model = os.path.join(pretrained_model, str(trainer_id)) - def if_exist(var): - has_var = os.path.exists(os.path.join(pretrained_model, var.name)) - if has_var: - print('var: %s found' % (var.name)) - return has_var - fluid.io.load_vars(exe, pretrained_model, predicate=if_exist, - main_program=train_prog) - - train_reader = paddle.batch(reader.arc_train(args.class_dim), - batch_size=args.train_batch_size) - if args.with_test: - test_list, test_name_list = reader.test() - test_feeder = fluid.DataFeeder(place=place, feed_list=['image', 'label'], program=test_program) - fetch_list_test = [test_emb.name, test_acc1.name, test_acc5.name] - feeder = fluid.DataFeeder(place=place, feed_list=['image', 'label'], program=train_prog) - - fetch_list_train = [train_loss.name, global_lr.name, train_acc1.name, train_acc5.name,train_emb.name,"loss_scaling_0"] - # test_program = test_program._prune(targets=loss) - - num_trainers = int(os.getenv("PADDLE_TRAINERS_NUM", 1)) - real_batch_size = args.train_batch_size * num_trainers - real_test_batch_size = args.test_batch_size * num_trainers - local_time = 0.0 - nsamples = 0 - inspect_steps = 100 - step_cnt = 0 - for pass_id in range(args.num_epochs): - train_info = [[], [], [], []] - local_train_info = [[], [], [], []] - for batch_id, data in enumerate(train_reader()): - nsamples += real_batch_size - t1 = time.time() - loss, lr, acc1, acc5, train_embedding, loss_scaling = exe.run(train_prog, feed=feeder.feed(data), - fetch_list=fetch_list_train, use_program_cache=True) - t2 = time.time() - if args.profile and step_cnt == 50: - print("begin profiler") - if trainer_id == 0: - profiler.start_profiler("All") - elif args.profile and batch_id == 55: - print("begin to end profiler") - if trainer_id == 0: - profiler.stop_profiler("total", "./profile_%d" % (trainer_id)) - print("end profiler break!") - args.profile=False - - - period = t2 - t1 - local_time += period - train_info[0].append(np.array(loss)[0]) - train_info[1].append(np.array(lr)[0]) - local_train_info[0].append(np.array(loss)[0]) - local_train_info[1].append(np.array(lr)[0]) - if batch_id % inspect_steps == 0: - avg_loss = np.mean(local_train_info[0]) - avg_lr = np.mean(local_train_info[1]) - print("Pass:%d batch:%d lr:%f loss:%f qps:%.2f acc1:%.4f acc5:%.4f" % ( - pass_id, batch_id, avg_lr, avg_loss, nsamples / local_time, - acc1, acc5)) - #print("train_embedding:,",np.array(train_embedding)[0]) - print("train_embedding is nan:",np.isnan(np.array(train_embedding)[0]).sum()) - print("loss_scaling",loss_scaling) - local_time = 0 - nsamples = 0 - local_train_info = [[], [], [], []] - step_cnt += 1 - - if args.with_test and step_cnt % inspect_steps == 0: - test_start = time.time() - for i in xrange(len(test_list)): - data_list, issame_list = test_list[i] - embeddings_list = [] - for j in xrange(len(data_list)): - data = data_list[j] - embeddings = None - parallel_test_steps = data.shape[0] // real_test_batch_size - beg = 0 - end = 0 - for idx in range(parallel_test_steps): - start = idx * real_test_batch_size - offset = trainer_id * args.test_batch_size - begin = start + offset - end = begin + args.test_batch_size - _data = [] - for k in xrange(begin, end): - _data.append((data[k], 0)) - assert len(_data) == args.test_batch_size - [_embeddings, acc1, acc5] = exe.run(test_program, - fetch_list = fetch_list_test, feed=test_feeder.feed(_data), - use_program_cache=True) - if embeddings is None: - embeddings = np.zeros((data.shape[0], _embeddings.shape[1])) - embeddings[start:start+real_test_batch_size, :] = _embeddings[:, :] - beg = parallel_test_steps * real_test_batch_size - - while beg < data.shape[0]: - end = min(beg + args.test_batch_size, data.shape[0]) - count = end - beg - _data = [] - for k in xrange(end - args.test_batch_size, end): - _data.append((data[k], 0)) - [_embeddings, acc1, acc5] = exe.run(test_program, - fetch_list = fetch_list_test, feed=test_feeder.feed(_data), - use_program_cache=True) - _embeddings = _embeddings[0:args.test_batch_size,:] - embeddings[beg:end, :] = _embeddings[(args.test_batch_size-count):, :] - beg = end - embeddings_list.append(embeddings) - - xnorm = 0.0 - xnorm_cnt = 0 - for embed in embeddings_list: - xnorm += np.sqrt((embed * embed).sum(axis=1)).sum(axis=0) - xnorm_cnt += embed.shape[0] - xnorm /= xnorm_cnt - - embeddings = embeddings_list[0] + embeddings_list[1] - if np.isnan(embeddings).sum() > 1: - print("======test np.isnan(embeddings).sum()",np.isnan(embeddings).sum()) - continue - embeddings = sklearn.preprocessing.normalize(embeddings) - _, _, accuracy, val, val_std, far = evaluate(embeddings, issame_list, nrof_folds=10) - acc, std = np.mean(accuracy), np.std(accuracy) - - print('[%s][%d]XNorm: %f' % (test_name_list[i], step_cnt, xnorm)) - print('[%s][%d]Accuracy-Flip: %1.5f+-%1.5f' % (test_name_list[i], step_cnt, acc, std)) - sys.stdout.flush() - test_end = time.time() - print("test time: {}".format(test_end - test_start)) - - train_loss = np.array(train_info[0]).mean() - print("End pass {0}, train_loss {1}".format(pass_id, train_loss)) - sys.stdout.flush() - - #save model - #if trainer_id == 0: - if model_save_dir: - model_path = os.path.join(model_save_dir + '/' + model_name, - str(pass_id), str(trainer_id)) - if not os.path.isdir(model_path): - os.makedirs(model_path) - fluid.io.save_persistables(exe, model_path) - - -class DistributedClassificationOptimizer(Optimizer): - ''' - A optimizer wrapper to generate backward network for distributed - classification training of model parallelism. - ''' - - def __init__(self,optimizer, batch_size, lr, - loss_type='dist_arcface', - amp_lists=None, - init_loss_scaling=1.0, - incr_every_n_steps=1000, - decr_every_n_nan_or_inf=2, - incr_ratio=2.0, - decr_ratio=0.5, - use_dynamic_loss_scaling=True): - super(DistributedClassificationOptimizer, self).__init__( - learning_rate=lr) - self._optimizer = optimizer - self._batch_size = batch_size - self._amp_lists = amp_lists - if amp_lists is None: - self._amp_lists = AutoMixedPrecisionLists() - - self._param_grads = None - self._scaled_loss = None - self._loss_type = loss_type - self._init_loss_scaling = init_loss_scaling - self._loss_scaling = layers.create_global_var( - name=unique_name.generate("loss_scaling"), - shape=[1], - value=init_loss_scaling, - dtype='float32', - persistable=True) - self._use_dynamic_loss_scaling = use_dynamic_loss_scaling - if self._use_dynamic_loss_scaling: - self._incr_every_n_steps = layers.fill_constant( - shape=[1], dtype='int32', value=incr_every_n_steps) - self._decr_every_n_nan_or_inf = layers.fill_constant( - shape=[1], dtype='int32', value=decr_every_n_nan_or_inf) - self._incr_ratio = incr_ratio - self._decr_ratio = decr_ratio - self._num_good_steps = layers.create_global_var( - name=unique_name.generate("num_good_steps"), - shape=[1], - value=0, - dtype='int32', - persistable=True) - self._num_bad_steps = layers.create_global_var( - name=unique_name.generate("num_bad_steps"), - shape=[1], - value=0, - dtype='int32', - persistable=True) - - # Ensure the data type of learning rate vars is float32 (same as the - # master parameter dtype) - if isinstance(optimizer._learning_rate, float): - optimizer._learning_rate_map[fluid.default_main_program()] = \ - layers.create_global_var( - name=unique_name.generate("learning_rate"), - shape=[1], - value=float(optimizer._learning_rate), - dtype='float32', - persistable=True) - - def minimize(self, - loss, - startup_program=None, - parameter_list=None, - no_grad_set=None, - callbacks=None): - assert loss._get_info('shard_logit') - - shard_logit = loss._get_info('shard_logit') - shard_prob = loss._get_info('shard_prob') - shard_label = loss._get_info('shard_label') - shard_dim = loss._get_info('shard_dim') - - op_maker = fluid.core.op_proto_and_checker_maker - op_role_key = op_maker.kOpRoleAttrName() - op_role_var_key = op_maker.kOpRoleVarAttrName() - backward_role = int(op_maker.OpRole.Backward) - loss_backward_role = int(op_maker.OpRole.Loss) | int( - op_maker.OpRole.Backward) - - # minimize a scalar of reduce_sum to generate the backward network - scalar = fluid.layers.reduce_sum(shard_logit) - if not args.fp16: - ret = self._optimizer.minimize(scalar) - with open("fp32_before.program", "w") as f: - program_to_code(block.program,fout=f, skip_op_callstack=False) - - block = loss.block - # remove the unnecessary ops - index = 0 - for i, op in enumerate(block.ops): - if op.all_attrs()[op_role_key] == loss_backward_role: - index = i - break - print("op_role_key: ",op_role_key) - print("loss_backward_role:",loss_backward_role) - # print("\nblock.ops: ",block.ops) - print("block.ops[index - 1].type: ", block.ops[index - 1].type) - print("block.ops[index].type: ", block.ops[index].type) - print("block.ops[index + 1].type: ", block.ops[index + 1].type) - - assert block.ops[index - 1].type == 'reduce_sum' - assert block.ops[index].type == 'fill_constant' - assert block.ops[index + 1].type == 'reduce_sum_grad' - block._remove_op(index + 1) - block._remove_op(index) - block._remove_op(index - 1) - - # insert the calculated gradient - dtype = shard_logit.dtype - shard_one_hot = fluid.layers.create_tensor(dtype, name='shard_one_hot') - block._insert_op( - index - 1, - type='one_hot', - inputs={'X': shard_label}, - outputs={'Out': shard_one_hot}, - attrs={ - 'depth': shard_dim, - 'allow_out_of_range': True, - op_role_key: backward_role - }) - shard_logit_grad = fluid.layers.create_tensor( - dtype, name=fluid.backward._append_grad_suffix_(shard_logit.name)) - block._insert_op( - index, - type='elementwise_sub', - inputs={'X': shard_prob, - 'Y': shard_one_hot}, - outputs={'Out': shard_logit_grad}, - attrs={op_role_key: backward_role}) - block._insert_op( - index + 1, - type='scale', - inputs={'X': shard_logit_grad}, - outputs={'Out': shard_logit_grad}, - attrs={ - 'scale': 1.0 / self._batch_size, - op_role_key: loss_backward_role - }) - with open("fp32_after.program", "w") as f: - program_to_code(block.program,fout=f, skip_op_callstack=False) - - # use mixed_precision for training - else: - block = loss.block - rewrite_program(block.program, self._amp_lists) - self._params_grads = self._optimizer.backward( - scalar, startup_program, parameter_list, no_grad_set, - callbacks) - update_role_var_grad(block.program, self._params_grads) - move_optimize_ops_back(block.program.global_block()) - scaled_params_grads = [] - for p, g in self._params_grads: - with fluid.default_main_program()._optimized_guard([p, g]): - scaled_g = g / self._loss_scaling - scaled_params_grads.append([p, scaled_g]) - - index = 0 - for i, op in enumerate(block.ops): - if op.all_attrs()[op_role_key] == loss_backward_role: - index = i - break - fp32 = fluid.core.VarDesc.VarType.FP32 - dtype = shard_logit.dtype - - if self._loss_type == 'dist_arcface': - assert block.ops[index - 2].type == 'fill_constant' - assert block.ops[index - 1].type == 'reduce_sum' - assert block.ops[index].type == 'fill_constant' - assert block.ops[index + 1].type == 'reduce_sum_grad' - assert block.ops[index + 2].type == 'scale' - assert block.ops[index + 3].type == 'elementwise_add_grad' - - block._remove_op(index + 2) - block._remove_op(index + 1) - block._remove_op(index) - block._remove_op(index - 1) - - # insert the calculated gradient - shard_one_hot = fluid.layers.create_tensor(dtype, name='shard_one_hot') - block._insert_op( - index - 1, - type='one_hot', - inputs={'X': shard_label}, - outputs={'Out': shard_one_hot}, - attrs={ - 'depth': shard_dim, - 'allow_out_of_range': True, - op_role_key: backward_role - }) - shard_one_hot_fp32 = fluid.layers.create_tensor(fp32, name=(shard_one_hot.name+".cast_fp32")) - block._insert_op( - index, - type="cast", - inputs={"X": shard_one_hot}, - outputs={"Out": shard_one_hot_fp32}, - attrs={ - "in_dtype": fluid.core.VarDesc.VarType.FP16, - "out_dtype": fluid.core.VarDesc.VarType.FP32, - op_role_key: backward_role - }) - name = 'tmp_3@GRAD' - shard_logit_grad_fp32 = block.var(name) - - block._insert_op( - index+1, - type='elementwise_sub', - inputs={'X': shard_prob, - 'Y': shard_one_hot_fp32}, - outputs={'Out': shard_logit_grad_fp32}, - attrs={op_role_key: backward_role}) - - block._insert_op( - index+2, - type='elementwise_mul', - inputs={'X': shard_logit_grad_fp32, - 'Y': self._loss_scaling}, - outputs={'Out': shard_logit_grad_fp32}, - attrs={op_role_key: backward_role}) - - block._insert_op( - index+3, - type='scale', - inputs={'X': shard_logit_grad_fp32}, - outputs={'Out': shard_logit_grad_fp32}, - attrs={ - 'scale': 1.0 / self._batch_size, - op_role_key: loss_backward_role - }) - elif self._loss_type == 'dist_softmax': - print("block.ops[index - 3].type: ", block.ops[index - 3].type) - print("block.ops[index - 2].type: ", block.ops[index - 2].type) - print("block.ops[index-1].type: ", block.ops[index - 1].type) - print("block.ops[index].type: ", block.ops[index].type) - print("block.ops[index + 1].type: ", block.ops[index +1].type) - print("block.ops[index + 2].type: ", block.ops[index +2].type) - print("block.ops[index + 3].type: ", block.ops[index +3].type) - with open("fp16_softmax_before.program", "w") as f: - program_to_code(block.program,fout=f, skip_op_callstack=False) - - assert block.ops[index - 1].type == 'reduce_sum' - assert block.ops[index].type == 'fill_constant' - assert block.ops[index + 1].type == 'reduce_sum_grad' - assert block.ops[index + 2].type == 'cast' - assert block.ops[index + 3].type == 'elementwise_add_grad' - - block._remove_op(index + 1) - block._remove_op(index) - block._remove_op(index - 1) - - # insert the calculated gradient - shard_one_hot = fluid.layers.create_tensor(fp32, name='shard_one_hot') - shard_one_hot_fp32 = fluid.layers.create_tensor(fp32, - name=(shard_one_hot.name+".cast_fp32")) - shard_logit_grad_fp32 = block.var(shard_logit.name+".cast_fp32@GRAD") - block._insert_op( - index - 1, - type='one_hot', - inputs={'X': shard_label}, - outputs={'Out': shard_one_hot_fp32}, - attrs={ - 'depth': shard_dim, - 'allow_out_of_range': True, - op_role_key: backward_role - }) - - block._insert_op( - index, - type='elementwise_sub', - inputs={'X': shard_prob, - 'Y': shard_one_hot_fp32}, - outputs={'Out': shard_logit_grad_fp32}, - attrs={op_role_key: backward_role}) - block._insert_op( - index + 1, - type='elementwise_mul', - inputs={'X': shard_logit_grad_fp32, - 'Y': self._loss_scaling}, - outputs={'Out': shard_logit_grad_fp32}, - attrs={op_role_key: backward_role}) - block._insert_op( - index + 2, - type='scale', - inputs={'X': shard_logit_grad_fp32}, - outputs={'Out': shard_logit_grad_fp32}, - attrs={ - 'scale': 1.0 / self._batch_size, - op_role_key: loss_backward_role - }) - - if self._use_dynamic_loss_scaling: - grads = [layers.reduce_sum(g) for [_, g] in scaled_params_grads] - all_grads = layers.concat(grads) - all_grads_sum = layers.reduce_sum(all_grads) - is_overall_finite = layers.isfinite(all_grads_sum) - - update_loss_scaling(is_overall_finite, self._loss_scaling, - self._num_good_steps, self._num_bad_steps, - self._incr_every_n_steps, - self._decr_every_n_nan_or_inf, self._incr_ratio, - self._decr_ratio) - - with layers.Switch() as switch: - with switch.case(is_overall_finite): - pass - with switch.default(): - for _, g in scaled_params_grads: - layers.assign(layers.zeros_like(g), g) - - optimize_ops = self._optimizer.apply_gradients(scaled_params_grads) - ret = optimize_ops, scaled_params_grads - - with open("fp16_softmax.program", "w") as f: - program_to_code(block.program,fout=f, skip_op_callstack=False) - return ret - - - -def main(): - global args - all_loss_types = ["softmax", "arcface", "dist_softmax", "dist_arcface"] - assert args.loss_type in all_loss_types, \ - "All supported loss types [{}], but give {}.".format( - all_loss_types, args.loss_type) - print_arguments(args) - train(args) - - -if __name__ == '__main__': - main() diff --git a/plsc/entry.py b/plsc/entry.py index bf57948745cc3f..bcd69e39596171 100644 --- a/plsc/entry.py +++ b/plsc/entry.py @@ -24,6 +24,7 @@ import subprocess import shutil import logging +import tempfile import paddle import paddle.fluid as fluid @@ -43,7 +44,8 @@ logging.basicConfig( - format='[%(asctime)s %(levelname)s line:%(lineno)d] %(message)s', + level=logging.INFO, + format='%(asctime)s - %(levelname)s - %(message)s', datefmt='%d %b %Y %H:%M:%S') logger = logging.getLogger(__name__) @@ -57,6 +59,9 @@ def _check(self): """ Check the validation of parameters. """ + assert os.getenv("PADDLE_TRAINERS_NUM") is not None, \ + "Please start script using paddle.distributed.launch module." + supported_types = ["softmax", "arcface", "dist_softmax", "dist_arcface"] assert self.loss_type in supported_types, \ @@ -70,10 +75,8 @@ def _check(self): def __init__(self): self.config = config.config super(Entry, self).__init__() - assert os.getenv("PADDLE_TRAINERS_NUM") is not None, \ - "Please start script using paddle.distributed.launch module." - num_trainers = int(os.getenv("PADDLE_TRAINERS_NUM")) - trainer_id = int(os.getenv("PADDLE_TRAINER_ID")) + num_trainers = int(os.getenv("PADDLE_TRAINERS_NUM", 1)) + trainer_id = int(os.getenv("PADDLE_TRAINER_ID", 0)) self.trainer_id = trainer_id self.num_trainers = num_trainers @@ -114,8 +117,15 @@ def __init__(self): self.model_save_dir = self.config.model_save_dir self.warmup_epochs = self.config.warmup_epochs + if self.checkpoint_dir: + self.checkpoint_dir = os.path.abspath(self.checkpoint_dir) + if self.model_save_dir: + self.model_save_dir = os.path.abspath(self.model_save_dir) + if self.dataset_dir: + self.dataset_dir = os.path.abspath(self.dataset_dir) + logger.info('=' * 30) - logger.info("Default configuration: ") + logger.info("Default configuration:") for key in self.config: logger.info('\t' + str(key) + ": " + str(self.config[key])) logger.info('trainer_id: {}, num_trainers: {}'.format( @@ -123,18 +133,21 @@ def __init__(self): logger.info('=' * 30) def set_val_targets(self, targets): + """ + Set the names of validation datasets, separated by comma. + """ self.val_targets = targets - logger.info("Set val_targets to {} by user.".format(targets)) + logger.info("Set val_targets to {}.".format(targets)) def set_train_batch_size(self, batch_size): self.train_batch_size = batch_size self.global_train_batch_size = batch_size * self.num_trainers - logger.info("Set train batch size to {} by user.".format(batch_size)) + logger.info("Set train batch size to {}.".format(batch_size)) def set_test_batch_size(self, batch_size): self.test_batch_size = batch_size self.global_test_batch_size = batch_size * self.num_trainers - logger.info("Set test batch size to {} by user.".format(batch_size)) + logger.info("Set test batch size to {}.".format(batch_size)) def set_hdfs_info(self, fs_name, fs_ugi, directory): """ @@ -153,38 +166,42 @@ def set_hdfs_info(self, fs_name, fs_ugi, directory): def set_model_save_dir(self, directory): """ - Set the directory to save model. + Set the directory to save models. """ + if directory: + directory = os.path.abspath(directory) self.model_save_dir = directory - logger.info("Set model_save_dir to {} by user.".format(directory)) + logger.info("Set model_save_dir to {}.".format(directory)) def set_dataset_dir(self, directory): """ Set the root directory for datasets. """ + if directory: + directory = os.path.abspath(directory) self.dataset_dir = directory - logger.info("Set dataset_dir to {} by user.".format(directory)) + logger.info("Set dataset_dir to {}.".format(directory)) def set_train_image_num(self, num): """ Set the total number of images for train. """ self.train_image_num = num - logger.info("Set train_image_num to {} by user.".format(num)) + logger.info("Set train_image_num to {}.".format(num)) def set_class_num(self, num): """ Set the number of classes. """ self.num_classes = num - logger.info("Set num_classes to {} by user.".format(num)) + logger.info("Set num_classes to {}.".format(num)) def set_emb_size(self, size): """ Set the size of the last hidding layer before the distributed fc-layer. """ self.emb_size = size - logger.info("Set emb_size to {} by user.".format(size)) + logger.info("Set emb_size to {}.".format(size)) def set_model(self, model): """ @@ -194,25 +211,27 @@ def set_model(self, model): if not isinstance(model, base_model.BaseModel): raise ValueError("The parameter for set_model must be an " "instance of BaseModel.") - logger.info("Set model to {} by user.".format(model)) + logger.info("Set model to {}.".format(model)) def set_train_epochs(self, num): """ Set the number of epochs to train. """ self.train_epochs = num - logger.info("Set train_epochs to {} by user.".format(num)) + logger.info("Set train_epochs to {}.".format(num)) def set_checkpoint_dir(self, directory): """ Set the directory for checkpoint loaded before training/testing. """ + if directory: + directory = os.path.abspath(directory) self.checkpoint_dir = directory - logger.info("Set checkpoint_dir to {} by user.".format(directory)) + logger.info("Set checkpoint_dir to {}.".format(directory)) def set_warmup_epochs(self, num): self.warmup_epochs = num - logger.info("Set warmup_epochs to {} by user.".format(num)) + logger.info("Set warmup_epochs to {}.".format(num)) def set_loss_type(self, type): supported_types = ["dist_softmax", "dist_arcface", "softmax", "arcface"] @@ -220,54 +239,53 @@ def set_loss_type(self, type): raise ValueError("All supported loss types: {}".format( supported_types)) self.loss_type = type - logger.info("Set loss_type to {} by user.".format(type)) + logger.info("Set loss_type to {}.".format(type)) def set_image_shape(self, shape): if not isinstance(shape, (list, tuple)): - raise ValueError("shape must be of type list or tuple") + raise ValueError("Shape must be of type list or tuple") self.image_shape = shape - logger.info("Set image_shape to {} by user.".format(shape)) + logger.info("Set image_shape to {}.".format(shape)) def set_optimizer(self, optimizer): if not isinstance(optimizer, Optimizer): - raise ValueError("optimizer must be as type of Optimizer") + raise ValueError("Optimizer must be type of Optimizer") self.optimizer = optimizer logger.info("User manually set optimizer") - def get_optimizer(self): - if self.optimizer: - return self.optimizer - - bd = [step for step in self.lr_steps] - start_lr = self.lr + def _get_optimizer(self): + if not self.optimizer: + bd = [step for step in self.lr_steps] + start_lr = self.lr - global_batch_size = self.global_train_batch_size - train_image_num = self.train_image_num - images_per_trainer = int(math.ceil( - train_image_num * 1.0 / self.num_trainers)) - steps_per_pass = int(math.ceil( - images_per_trainer * 1.0 / self.train_batch_size)) - logger.info("steps per epoch: %d" % steps_per_pass) - warmup_steps = steps_per_pass * self.warmup_epochs - batch_denom = 1024 - base_lr = start_lr * global_batch_size / batch_denom - lr = [base_lr * (0.1 ** i) for i in range(len(bd) + 1)] - logger.info("lr boundaries: {}".format(bd)) - logger.info("lr_step: {}".format(lr)) - if self.warmup_epochs: - lr_val = lr_warmup(fluid.layers.piecewise_decay(boundaries=bd, - values=lr), warmup_steps, start_lr, base_lr) - else: - lr_val = fluid.layers.piecewise_decay(boundaries=bd, values=lr) + global_batch_size = self.global_train_batch_size + train_image_num = self.train_image_num + images_per_trainer = int(math.ceil( + train_image_num * 1.0 / self.num_trainers)) + steps_per_pass = int(math.ceil( + images_per_trainer * 1.0 / self.train_batch_size)) + logger.info("Steps per epoch: %d" % steps_per_pass) + warmup_steps = steps_per_pass * self.warmup_epochs + batch_denom = 1024 + base_lr = start_lr * global_batch_size / batch_denom + lr = [base_lr * (0.1 ** i) for i in range(len(bd) + 1)] + logger.info("LR boundaries: {}".format(bd)) + logger.info("lr_step: {}".format(lr)) + if self.warmup_epochs: + lr_val = lr_warmup(fluid.layers.piecewise_decay(boundaries=bd, + values=lr), warmup_steps, start_lr, base_lr) + else: + lr_val = fluid.layers.piecewise_decay(boundaries=bd, values=lr) - optimizer = fluid.optimizer.Momentum( - learning_rate=lr_val, momentum=0.9, - regularization=fluid.regularizer.L2Decay(5e-4)) - self.optimizer = optimizer + optimizer = fluid.optimizer.Momentum( + learning_rate=lr_val, momentum=0.9, + regularization=fluid.regularizer.L2Decay(5e-4)) + self.optimizer = optimizer if self.loss_type in ["dist_softmax", "dist_arcface"]: self.optimizer = DistributedClassificationOptimizer( self.optimizer, global_batch_size) + return self.optimizer def build_program(self, @@ -302,6 +320,7 @@ def build_program(self, loss_type=self.loss_type, margin=self.margin, scale=self.scale) + if self.loss_type in ["dist_softmax", "dist_arcface"]: shard_prob = loss._get_info("shard_prob") @@ -320,10 +339,12 @@ def build_program(self, optimizer = None if is_train: # initialize optimizer - optimizer = self.get_optimizer() + optimizer = self._get_optimizer() dist_optimizer = self.fleet.distributed_optimizer( optimizer, strategy=self.strategy) dist_optimizer.minimize(loss) + if "dist" in self.loss_type: + optimizer = optimizer._optimizer elif use_parallel_test: emb = fluid.layers.collective._c_allgather(emb, nranks=num_trainers, use_calc_stream=True) @@ -361,11 +382,7 @@ def put_files_to_hdfs(self, local_dir): def preprocess_distributed_params(self, local_dir): local_dir = os.path.abspath(local_dir) - output_dir = local_dir + "_@tmp" - assert not os.path.exists(output_dir), \ - "The temp directory {} for distributed params exists.".format( - output_dir) - os.makedirs(output_dir) + output_dir = tempfile.mkdtemp() cmd = sys.executable + ' -m plsc.utils.process_distfc_parameter ' cmd += "--nranks {} ".format(self.num_trainers) cmd += "--num_classes {} ".format(self.num_classes) @@ -388,13 +405,11 @@ def preprocess_distributed_params(self, file = os.path.join(output_dir, file) shutil.move(file, local_dir) shutil.rmtree(output_dir) - file_name = os.path.join(local_dir, '.lock') - with open(file_name, 'w') as f: - pass - def append_broadcast_ops(self, program): + def _append_broadcast_ops(self, program): """ - Before test, we broadcast bn-related parameters to all other trainers. + Before test, we broadcast bathnorm-related parameters to all + other trainers from trainer-0. """ bn_vars = [var for var in program.list_vars() if 'batch_norm' in var.name and var.persistable] @@ -420,40 +435,42 @@ def load_checkpoint(self, checkpoint_dir = self.checkpoint_dir if self.fs_name is not None: + ans = 'y' if os.path.exists(checkpoint_dir): - ans = input("Downloading pretrained model, but the local " + ans = input("Downloading pretrained models, but the local " "checkpoint directory ({}) exists, overwrite it " "or not? [Y/N]".format(checkpoint_dir)) - if ans.lower() == n: - logger.info("Using the local checkpoint directory, instead" - " of the remote one.") - else: - logger.info("Overwriting the local checkpoint directory.") + if ans.lower() == 'y': + if os.path.exists(checkpoint_dir): + logger.info("Using the local checkpoint directory.") shutil.rmtree(checkpoint_dir) - os.makedirs(checkpoint_dir) - file_name = os.path.join(checkpoint_dir, '.lock') - if self.trainer_id == 0: - self.get_files_from_hdfs(checkpoint_dir) - with open(file_name, 'w') as f: - pass - time.sleep(5) - os.remove(file_name) - else: - while True: - if not os.path.exists(file_name): - time.sleep(1) - else: - break - else: + os.makedirs(checkpoint_dir) + + # sync all trainers to avoid loading checkpoints before + # parameters are downloaded + file_name = os.path.join(checkpoint_dir, '.lock') + if self.trainer_id == 0: self.get_files_from_hdfs(checkpoint_dir) + with open(file_name, 'w') as f: + pass + time.sleep(10) + os.remove(file_name) + else: + while True: + if not os.path.exists(file_name): + time.sleep(1) + else: + break # Preporcess distributed parameters. file_name = os.path.join(checkpoint_dir, '.lock') distributed = self.loss_type in ["dist_softmax", "dist_arcface"] if load_for_train and self.trainer_id == 0 and distributed: self.preprocess_distributed_params(checkpoint_dir) - time.sleep(5) + with open(file_name, 'w') as f: + pass + time.sleep(10) os.remove(file_name) elif load_for_train and distributed: # wait trainer_id (0) to complete @@ -503,11 +520,11 @@ def convert_for_prediction(self): load_for_train=False) assert self.model_save_dir, \ - "Does not set model_save_dir for inference." + "Does not set model_save_dir for inference model converting." if os.path.exists(self.model_save_dir): ans = input("model_save_dir for inference model ({}) exists, " "overwrite it or not? [Y/N]".format(model_save_dir)) - if ans.lower() == n: + if ans.lower() == 'n': logger.error("model_save_dir for inference model exists, " "and cannot overwrite it.") exit() @@ -551,17 +568,17 @@ def predict(self): load_for_train=False) if self.train_reader is None: - train_reader = paddle.batch(reader.arc_train( + predict_reader = paddle.batch(reader.arc_train( self.dataset_dir, self.num_classes), batch_size=self.train_batch_size) else: - train_reader = self.train_reader + predict_reader = self.train_reader feeder = fluid.DataFeeder(place=place, feed_list=['image', 'label'], program=main_program) fetch_list = [emb.name] - for data in train_reader(): + for data in predict_reader(): emb = exe.run(main_program, feed=feeder.feed(data), fetch_list=fetch_list, use_program_cache=True) print("emb: ", emb) @@ -684,18 +701,14 @@ def train(self): self.build_program(True, False) if self.with_test: test_emb, test_loss, test_acc1, test_acc5, _ = \ - self.build_program(False, True) + self.build_program(False, self.num_trainers > 1) test_list, test_name_list = reader.test( self.dataset_dir, self.val_targets) test_program = self.test_program - self.append_broadcast_ops(test_program) + self._append_broadcast_ops(test_program) - if self.loss_type in ["dist_softmax", "dist_arcface"]: - global_lr = optimizer._optimizer._global_learning_rate( - program=self.train_program) - else: - global_lr = optimizer._global_learning_rate( - program=self.train_program) + global_lr = optimizer._global_learning_rate( + program=self.train_program) origin_prog = fleet._origin_program train_prog = fleet.main_program @@ -720,10 +733,10 @@ def train(self): fetch_list_test = [test_emb.name, test_acc1.name, test_acc5.name] real_test_batch_size = self.global_test_batch_size - if self.checkpoint_dir == "": - load_checkpoint = False - else: + if self.checkpoint_dir: load_checkpoint = True + else: + load_checkpoint = False if load_checkpoint: self.load_checkpoint(executor=exe, main_program=origin_prog) @@ -839,7 +852,14 @@ def train(self): model_save_dir = os.path.join( self.model_save_dir, str(pass_id)) if not os.path.exists(model_save_dir): - os.makedirs(model_save_dir) + # may be more than one processes trying + # to create the directory + try: + os.makedirs(model_save_dir) + except OSError as exc: + if exc.errno != errno.EEXIST: + raise + pass if trainer_id == 0: fluid.io.save_persistables(exe, model_save_dir, diff --git a/plsc/entry.pyc b/plsc/entry.pyc deleted file mode 100644 index 89228c6c7017a959079d61f1585f5b3237b2d5a4..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 27675 zcmeI5dvILWec#XB1@R*IegNQe`2Y!uB>0d>N)#oLBB+NgLDVG`%8;1NV)p`EaIp*S zy%0$Y8#|U#C$4MvHR&YvbP{(mQ#WXh&9!qa^lUsed*Hs4H+>o0b^65TT8Fq8q+~hW&?st{#Zf=L0 z+~L!!U1g`6+vO&AlLkBku99s~J6(_B zYLxhNKjl+}ILgn}N>Rn;S0T%Il$3mWc)`J_Boet%T=b^EAW zspoGO>T?V8K1)-HUMo}<3Q6R1R+BS3AI;?JQK3Z34CzG3Jza^SB-uo)W4tR#ot86q zecN>u&ev-*^}<{}spX4IvpsTaGKtnXU8~O(lK2eKS4Kt)G2oet4#l~V(Mt4MRH+s$ zRVi1avt;q7T02y_PVrn6$AuZHCOxz*G5Yje5^o?rQp$}?c)n2pFHW2nVstBAmii7U|JpP-V_yk3~3$ zy%ymV_F04@*l!We-)f6+_y#P($y;L)5OJ+VK*x0!0V&s8WTOyNLu?Y7DzaIasmK-= zZFSM0;U~moNQkNYVOQE}k!`LtXp!x%G-Qz-s!yxZuzQmxJ6&m; zr+bqlkaMM7mbu4;S4MXOulOWEe~$mI-iUHVVOTDy<)Uh_Fdr{e*nwQ3QpwfkBX9w% zi^r^TQHU+<>EidIVE!>83D^b9106R&bhFRJdt72*C+92iyK zyV>pH>sGnP&8||8FM1=R-e~KS0Gyijk64mj>vcD~T>KGh*q7GSiS6ga4lt$o6oE}y zYrJ?PD&EW`tdwI>2ESNQt~Q;UFM!oi64le?sji)d%NNGSpSzU58eVvI;!=1eKk?G@ z@uT#3xe{?!b8%9rC%L#-FV82rg}7Xup{r7<5{;GG$9AkzfI{5Jaj+LCG1QK0)5%=n zj>QZ0;&h=HC7Q09<(q}AjI>64h#D8zjd)>xzE+1KZQiyqg=H_YidYy?HZK}m_UegWNr~|F7x&S3f zC*(-P zG@r)SKHrn#aK6`Yl`;Bk3=0`=w%?K#swi-qHHFnyo7^;QJYEIH)D?lW_}Lm?SI!2P zY;?1mG+QM%FHLS)n%ufH39<35AT~Y%vGI{@HWg;N-MY0<6W?J;3zghyNeh+SWl0N_ z+-*qLM2BmX`zw_ENP*V2Q6u#lB1TiP{~7< zv{1*y)n=$rkY@CV&;)lLQ#I<3Uq9&@*=FrtmyEeZP$zJM zFpNQ^tQvX4%2eB17c$q<&iW;|2xlOhhFZ;0+QYIRS4PFVC&KlFcCr(3#iisa|Lk~~ zOyRI|kCk^3#)Aao(R5*=z&*N%maEe>LnmZl;~S&O4Yw;05{3H+ ztisLc?n(?I+(X4@`Hd$D986(TFcb`B`hvb-9skw`>n%Rnq7H|Cbq0*VXog;x@!VzB#8Fs_&b$3cL%L%8Zz9&2y%Hr7Pn!EK?)-WqH+5G$ zx<2M1AgLz2q~Iw90;K2D0*#?;iZv?%#Un4+)G^wR(mocD9gw+qH4jqIT8)n&@fqXF zXd{s2;F4Z`yLWYhg;r-LFK0(Qr(?f-iNb<#zXIFj1B$)nVntip)MCZ;N-WUnSx$C7 zqco37X9atiTj0WsX3XB z=GH21b<&(>+Bl+3VIiq$QqbedU9VF~>4=5R&x}8HCHE}TJ{yab^rdV<95@z{R|dSo z(&Djw^=Ph!q6W>4ib<`0S0xh$)==tQs8bt0q+l67Y3DsPZPhcX-RfyY6cg1EiU~0A z{^{CBl_B}x^@y`vtz1Fab={AYeqS&+O)o8UYQmsit0nDC z&w=K-R6gi91ud4+Av)Y22pZ!D0D`G(0HI@?PtxPN1p#T@_dp=2C51{3QDrKsOOH3v z1#$o+>?C)fLyHJ6m2vJ53Te%62?~=p7;{CGXp!Hg!+q*qgvgQmL8J|fR4nWOK~uP( zPE%;S|MF$Hd&}jAA*Cy>y-62r26;yN%)7Yi_uX&RCPc9RBF&hvlH84QsU-4jHVmjW z7=e^Q>3wY+Exd{)KIwQ#%)O6f;>m>x&dnNc2`3X^)yImk5`h=#XmTevjQr*179{0Fa7NfS2C`}~($Rdf5J%4&iH?WpL8JNl4A3Fo zPK@?a8Xh7r^#eP4+DRx(-tL=JA;AK4gJT>2X`EPq-rHgC%odbPEdG8YY z(gf~-K%0e&Ubl6}oJ2eDuX$=kK{?CEq($<#0~MPrzTH-8<^5^eg%u(6j#>QDq5jnm z4kLCr)%|jGrrlnm5ieVe7gt0Det_MU*_(}LuhN-b4yWalkZTPa@ zo6QOd|7MS?pOzIBX&!B`CG&eCl#;8K>d>4stO7 z{MI#LC?T>1!>36ACDCjn+tX%Dj`y4M!T_=aVlA`ta{4Wl(dJCVk|E95)1ta!f*9Fz z6&wjtl(0*o8Z|k8hk0l@m_+Zwmo-DpSIx~}WbHlpWLa-RZn@1tV795O2)d?_*fEj+ zq=p*Z1zVN^PHQye)5tBriPP;8*X!hTDUxA?H3d^s4hr7%W+unfHmE%QdbR9p_sRajTYU8P1_U`s$fNkG?A&*{~ypIg@^1= zs19hxPJrv>*WmFl$Uz;)^ju$`My|~GtN?5DfH~Fz_rn=?YnvTY>Sko3R->$I;BX_t z30@@;0UM#@1I*3dk~7=uwtG>b(cMvQ{tsDta8@+Sd&Hz?@rObRdfZmQoWH6w;5IjK zp)we*4F>MQNJP6vzhk17V+`xn_UCn2ToS32CLns&6@qcc8!7N5X^F2 zo3;e$z&H!Z9cY@;{LuC10?f-;$Y;$hf|HQ3u*IvM%Pf*)M*Z#$Ah5<&x4&ggO4SL= z!9$Bn1~)Hj#UtbMx42pSu4`>?&l0?Ytw%FB zgzM$^n=NJLyPx{@zj=P@{1a`m?a^Md_ntx-pzQ#J=M<9$7wQ~_rd~-QGg^ZgU~KN8gXd0;tD@%+8f{dEuFV`|CpXjiRLMFs=WiZ6y0}&9glHQgZIQU; zQk#67XD^%HP6MZz-7R+>r@I-{Q-N`n6oL`1rbIRMJO9U{i^FS;l49k z1N`zWTJwkW6Q3oJ;j<^R1M9ap+zgT)3}o5v4%P-I%#70qeICke&kVrgwwRqf%ip4Q zLlV9GEk>QX$nlshiW9inMjgL_c(2%Ai)gC`G|7`9$iylU24!|Z3pP4WdKSBNxI_sa zcMVTix*c?dBMfc7+3V^bmhVaKQ=Ds}LUh2vb^(=izyeqQDa%~#s@)cWI8vKxYaGOg zoe=#XhgKQ)@l%;VS=>q5Krim&yM5Y|k>=$ENE{A9u7|wAsb6h#cZ!zWXaxhz0a05=oSafvutEMsNw_WBG~U|mMj>$ zzih#<4d<>g+KVsy5Z)l_Pqi%?V_J4?NnI21Il?lcsni&6E`}sYd9G9|>_JL?uFUfg1WfZ3hCBEtDEY^WM zR_r2vM0}@nMxF5xmg&Qphe_t98g52x)4D)pODWtzz!Vj`S=QuIdYWE0lhE8Aeso01 z#}$aon9I*|2@^=gMohmqhi?e&X?mm0-I_P>g=4BEb4Pegf$-*QIo8ke8%ZAd*OEj!47=EeRzb|%i6>5MsmCDpXY|sOV(vn2H&Hu zVta!9nLUKmgCojLw4V6JV3&7yXb}=kj3fR$5g4BoX3wHXX58QzpHa%XG$x8=cZ^y^ zo;`Ndr9D?jR3CWD+ltEF>1Ja9-BdFDN0pF|A;n0)i1K~njj%0ct~P=Xf2J2n#i_ES zO`3>z65c43YPIKvhFj{fSjXllxSxxK zqUJVh`PhnccDKaPJ5EY9D3QQKG^*D}wP`E6tiqq6il;~mQ(#b$8>4n=E^|e(YB-1^ zF#NP?spBhG$6t8qs-e>DLOJpF6CDIY!AcFvWmge|yv)t0Xxi~I}Mvhavmg8RX5wa+QBSg~yqNbOGk(-e-p<5VYbP1n(a#wBoo6=FouV0k3 zFZp+s$*}7KNx{u{3K0Y;;DlI;j(~}RRUlVMO6Bg{di`9xbFJB}&3yd&Q%k#@>F9PQ zXm`s#w7T6YL7SxDk)h`q{aj1PMkY>y&Cu+g%}2UE^MRBOJ5&=yb?FQ}QcbRzEJ$!= zRL1lPBZB$!$=rPW5NSC~xScTH;jYDqZys9&q4a*gcd5j=NWxJ(W;b4QN9S_7>N>WN zlq>OAN}3>9aWY*r{%!Y`b7p=#deqBha;TVTfQ$}bpK`_7j~;DbRY-l-GMlS-ofSis zh5SC#A7@DbPvxoaAo1=L{PdG}-fZ>Eyh`#`Bgr2JO9ReB}11CS|t;jZV+Y z9w3FhFcn)|r^&;_OMv1MUc4#Xv=LU^%B#;I`9_u}CgeR>CT#tRW~wV4ZHTrBkTnqh zFjl09mpUOCZhA%PB>v16b|OQHLLMVxFgDfeky`qpP`U=aCcTca2}x%id7dHlXSO2I zYza1}e+Q|x1v#cDDO{Xf&^9UNuM?4u&S|i>Gvn9yOQeC4aa0&l_wqT&#L=KEBE=oG z6~aRsNC&D|m%HVrM-7RCe76(ihEz% z(uV-rKyQhua+9b`%7qyDoaqb2e5-py5Yw)_Zu7EXl;8T;M3WZ~cLY;}da(8*#QZfK zFN}bC*l{TMV6$^6zrG%xfEUA0Fj4u6{DsCxG%E1e)PM53>a zP0u!;sj3ZP^RRwaM77QIQkVy;OYLUGE| zu!&u-5Kl_nAIbw2?DV%m9c0N}ts zIGoQBdxe_A`H*kfpaFZ<^_OLEp5^|$r|7p{4$`!7vj8O(_T@XUAvSa-=slw}*y(A% zpDuE@UW^R|N^Wd5fXOjmtX50;Lb6%>OBN&emmq+R!c1EMli?nDGU)10x0mP3;)_Ar z&RB79YkP2+-)pyCSvn||h!#+nPn1jjYjrSXGwX8oFIh_tk)J(DOlUrHxV472oNM(7 z>m(}+dOe%DbKK_ry?f^Ur_1L3Rp$NG;2QHp`PA#X)$T7r3i@1PW-7fW2n@uC1nj4g zjyEzH@4gX7nuu=@j;EIZ#c0|L%-qMvFi+%OdEc>#>+yL8d&wU2tlZ%dyWqe}I{A-OHNA(&E10qPFrB=Rj-eT^ z+|^i{CwltwIhvbk#4I>%Ye-StL8Ms<<|ZAjCyBK7KaFGsPGP1Ytxkmro%P3!f)ckP z^VhH|K)>mO7O{s=;BL?@Dx3=b7NPXG@SiI9yn-(%_#lB15qkycZU6sPc_KVF3bCk+ zJrE2pkTJ@yyN{aQOLloWZ|wc6qAAL7P?YXqvre}$IxIQR?C|GEE%z~q75aw;HoY$> zsqw<673@=RQh}_~e)6E6cLm$zE4AB@PX>0?4iVvts`DiUzoy``3cgHWEL)Emt!>Ps z$@%ctRVdo)9S6UunAw-KOXZp;?Jyvtx}&0so<3`#UR8Q24uKj`HhYp&DrJvkN+8)^+=O!zX}AJfW7W zy3t&>XLK=sBW9BUM2huUk$DLU>j?F}iYxFKO(@CqX4YqwzujuA4+dKK>$-=$GjlDy zYDg0+&I2dmBNQQM`T*n)#0P+W1fMXfEm30AQWqw2gZBZvDm;{W6yO8!$SvD^fUqI# zrte;33q<-%dN-e?#C}hxe-xTbmgzZgjgQ~(Y144v8D2DzssrJq1?Ft;_iRKEAr+Co7Q8N>0fYp=OWxgXBN$hX`y+T0w+%c zNz@?OZsZuni_m5yTIL{vkerR)CyCmqOsx1QYM9CKConlY05${T1lwHvlhy!D%^q25 z7TePoUN{lRtbZ#=cvQMc54*rnax^$J8kV}7rMM(#obY#oWRDE&SjwpoB>PmR$J5FF z^r@Eb&$3v@f%J(Mxjb?OLOjuu(*(}wy(b-$mb|B>9CGz@X4XfHu&LPNZ0TG&-V@%` z`$3}Nk0~fB_>cmo;*FG^n*O+wMh9R>&K`O`_1uN0uVAB=i8d0mc+p*b-_bPcv zfzjNoVt-YE{21O4e@rpa$ndWzkYW-34Fxp?f0MwZU2neSN$Krc{iaderwWxg>UdWs z{&4Z7@e7wLdO$0RddU<(-kfUO`a;z+16kz4Zz>REIyhO28HB&5g8!)C5`m3|MUsxh zXaQ6beo=+S6uzMvn+fbCQ)?T&E^Um_Go4?h%BYB)QG5@hpH~F?bltzNykr z5|}Ol7V=!q<}z<9)S}=S;UDnVQ*|QuW2;-qPO;P21@e;IA^2Z@C!oP)HG`@*&EGO} z-)sKfk{N=IL*X-9p!7Xv=9U3mFKX9h`f1^BJ9P&=}+oIAm#6vP@YibwVO5b{-GltD=GTPSFRyBnI!=JeDjW)Lb{|H^n^mo~ zB!t6mOz1)i_7lj2b!wuY84#4_@d=~#tm~BWLtwTv0wz{;tOHU-8m-Nc#%11xIIB*k6Ywt z3-=}WYL_(A{UoSEdLKR>C?`LbJxB+d%!s=)=9r8MjF69CXLSeM;)u!Gto;D0&>E*3 z#`2gn_}T!=6va(@!gv*bE(7SLCHM^l)@&G11ptWq)*JOjst?jI`znw;n4;n$DuL%O zyRylseNUmi=!3R=E@b)Pfr!+wgKELgkAsQL2o;9uN-O1OiyK^4c<$DNnQOqxz_Z)k zSraU>=+;O#5%F$yCEVHZ-~|EoS$Ba#O$xUW6-9{cY{VcD>N0`Kq3YOJT9_A(PO0uPK`TQwFB~0I>DLKN#Ja<@XQB|InCi$y z#pV=@H0yYuOIxZ!kHADZZ^9bieB;|oSu(iaW%Gz{)f3Bz`D#ZG1@TS0)op4tqEOpK zYm1c|i|=2(KMGaXLoF(#w`Y=4C<(58m%_%Jt}Mhi{F@I6J{>M5E-&kwF8h=#3#K8l zGDs{vw@9dQxk`ddj(OhmBY^1zoArce)0G zVY@&#jJsMlr{qlBM0}0?cH^$z&Oco|m(AfoaF+4520N|x&Q{6p;1O~*FoG=7$Ee+p zwR&T4FxZ{h&egmPv@L22Lw-eUO?dwXJ1JIdUhSsv?gb@e2u!n0;T@<;tj25UpUl8M zW$N?-xw)}%vPmGv<4pvC98nzfCPY>zf~3tjJZVANH~Cr;{!$@d71p}?H=8w)z8Mt( znrd**)iV70yWrk$srp{3Mh5nB_BzwQ$;V*six7ZyFUQGVss{KX51YS!b08_%uE9uc zph)-41H|(xdOk&54_}(Er6Xl<@m$@)#EG+# ztV2wZ3_y{{~Y`fSkAXW8N^6 zw~QhO%yfRh6chi2HH7NzO;5$B6db|w6B#fBm4asrfQ<-}ZT;8H`i+Fg@-g!REPeDX z)7;hvnnL-XW&Fp*gd$_MtsW1KX|1y0F!kdsE0ApN6~YIkxvaDjjg&>1!D*s3;3yzO z9VJBFVm-X3Ldn_c8o(8mb&nJY=`@UW!lBRHd8Q{p-Xi&+AUb~iOb@yZ0!aIZE=D?% z6p?el-T6R}JY;gN`TfVQR|Dy0z-`o&r+wUR+v5`Cy;Dq5B5=A_C^PIElHX~El+B}DU4IIKR&GJK#E zHLzWJcu6T%t`*u+xi6-c8eLBm}wRF9H{dCXp0yMbEAXpa9nJQ}TG=U;{ z%*{UTW}i@{<~ezzuMOUlrhA`v$ptB&wBW~d?+2A$e9-IO54pxEVMF7zlkV-S^MT&B z?u`TwUYtSq=G;*6VQus5lLi-5@@b)FyM@bno+4@Tks{>E0~lylE6*V%X`V{CoZ0KtB+lY$p{bY)ziw`%C`R{gXUoLen`bk?pIFRbz z4;x_tB2AUM6*1i#fU#13w%uX_U*mi6ep zy7!~Prd#`&)M?x9hqbN2tzTYsO_Xok5bcub-lx6p4S=}Q%Qijj7KuOQHqSmI#g*d@ zqor}0G>`QedRSyXpK2oUnZ_k|>!sdn^g;ek6`%Il%^!Ao5R^}zb+hkrv+s2v2K_1L zImH)Gxe~N42arwo=JgmqPrJr5uJNpEyr)Sso>nW(@Oigr@l4~ruJN25?LB4-<2=gJ zG~U*|IrUI>%w%a1gFn~Sy*p3;tv6O(YajgQt=D}Uu9Nt7W%-cR43U#Tz1JdqmOh1nq`b} z9CYWDc-a!8J^^!qq#knT5QY{nyAu4w3qvou7&iGAdPe!c;;YezmpZQ09uT2eG()&Bz=bX6rTu-t-h*KGto* zPU(6(~j&#JV{Ty#M#zNz2=1%nFyw}S5}2(%D^);x0lS-n}1&XZ@BfQYOA z`e=I;eqGhf;~nZ-w!|Jr7saF+`7YR+4#iYk?T%DWP`s9>vt zK?Oq!h81j6uw6lqD$AzOaqHp>%6nSD=N0^#f^QJmlSD(ejyun>Eu@Kr{z0LxF&JW@ zf8yrz$k#=*v@kc{aqU7HzIQ;jaZA;NTmFHhA&-5Y4Ks;tyGLGoyC_`pNqdU8B$+-) z>{S)nqSV|vGc%Q5KDJWUxHEMs+#`VOQ?Orwu+Qo@am;`Cw*#d*rX~t(_|6@qdn{!_ zHk*%kmKtidz0>y6I2_TCZKTR7`CYZs0{v?^yWXg^Rk4KW<1wCNV1eaol(%VxwgAgM)8@#bNWlXN4l6jK;HZKz1;-Q|S8zhXNd*ro_?UWmNU>82PAfR0 z;9&)yQ0XIzomKFtf^!NCw;xmNqN-dV=HCREHQFAJg}*_`igZ>d(J<}ru=Myj`bceQ zSgzJ!H%tFz_h2Q{b5lK4U60*tK)z|aUFmP+3}ffon*I*AY;Cg0$-*{33w;g0I~caM zou2KDmaki$YQK@^u>JHdTiX`Ge*W!B|E#w7NBd#LenbIcmkT2WA5dV|Ba9gt zTl%c>epSJ5DlisgXY+q5*(JP^jWw(gv{A@n_uk>P@36}>AD<`a&UR<|HuRn9>*`y@ zzjb{*t$+O%>+0+6>*n7`HW)bDx0Zi<`!*3j*LS>cY``BnfTZW%`BCwi6l~KA$wD2l ziM+yv6c|_gCB?o@V6f(cx4H|>jkap7Zv-W>I?q*6t8&PX33q{3De&D_mjK}|PK5T+xKQb->PN>g;Q_mKaYR(kbPds3U4 zh1>Z5w?LLOubkf5F);Ky=MVSe>u70LYx%sp(F)Ovj(>O^L`PNH{wGhvdg#k~=!|YQ QuFG!yp3}cGdNB8Y01?F3u>b%7 diff --git a/plsc/models/__init__.pyc b/plsc/models/__init__.pyc deleted file mode 100644 index f0b714ded9c830cbf9367e808aa83eb751034629..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 285 zcmYL@L2kk@5Jmqufug9W-Sr9)i?ZydLOlTsV1Xi`ArK3tGBm<-h%hA1)}L(Xd<-X@%qVxIw|e}ltefX)JkQ3WMf`4fAXH1|RP&hr q!@tlr&hMw~bw5?DPjygzZN9^Zvv=kfXJ=2;{*ic=OxR{9WBCQ#z%4ES diff --git a/plsc/models/base_model.pyc b/plsc/models/base_model.pyc deleted file mode 100644 index 8c9ced86df0af8d4f75d37309b3f9ca70933802c..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 4424 zcmb7H?Q$GP6}>Ynt+cDPWy_MSkdT^!6y6Gs3<-oHP)USss$4}>&e*9UDyU&JGrJmj zc4j@@BU!LZQOJMcA^6Kf@C5wiClrs6syqOkb9;9s#|b#Hr?;p3cK7`__uQV==a;&_ z`SWMLPGt7i!taN8%cb!C7}m*knqnaHR6YdrR0e;F#||H0p0-$U1~p=rweNs~oo zJj!eb?>~!8_ED8)#lnIWj^Z&7&}7hE_1*WmCk}wh88r3f8IE7pK9o>Bph3DUX@Dqf=~FLN73E@n#<$cjN9zoLzK1-)@$ zlH1Ntw-4aNNc%hE(~#SK8;|kPh?j5!K774{$6P@J%@EBqPyUGD`cR3hOoK+vXK3!s z{q};}ZUymIxQ*++uIsqQd>E!OmJfR!jiw}5?`0x_Tcs-Pz4OR zBh7R^@F~EI zlq@aqD^eMq8D=&sk3$wJJ$#Tvs&76x)x+^VcPV8w`Ql<&*>B=4x;?VhOLa*+!#oU>qaHG2G;ka=!YHuV8-{ z?r0%q!VRuxfYmA4=)h843TS6sR|hXmS_~5W7*&l01@7QY(JhzSUe8lwdJ6ah|2r~ZEhLTa%_#e`4_rRcNS5V1 zF7n@^TGMb1g~98EaC2NU#5i;i@pWdKx||nlx9!5z`DB|i=SGX#? zu{b}A%RWu~BDlFdaDz-5T`-bZG&Vg}aAG=qlS{vgMkydZ9x9e!7NOzmSQ&FqW{dz~ zuj6^m-vBn-inc9p%S)&W6J~+6e}c(ykwIkyzGfZZ$c-pa05exx&peOC5fE{+t`2JL z1EkJ;Z;GS7Y1mpO*N-zz+x*DP8fr=z&o(-fE4Xpr0E>dDP<1#i+?B&Ry9?vED=hW{O;v8@LF}gg03;S+0?-o;STRAK%jI zeMPG`T!6xR9oA|BUX>|c-B~xZ8ehefH@!v~oD*%!1UhWEOx#4c_Y`FSpi4wzFMgVv z9KUj&1Nf|2^(<8^SOA9ve+yUc$p4u=x?2x6x=ts`G$p3XrXYQlrBu9lz^JC3^ zmsCIG4w}xSk6*^eIVvQp#LwfxWR~Y8br#z!(qfC6SgTPXCS1x5vofn8puQS4g)<;Q z6=hN7FV#o2iw-O6jQTMKlmWu`xC`quKEB4$u#VFl4AH0azrY9KPdKKOAy5^FTj9)${P|x_8CvwK}c;0!{Xgw*UYD diff --git a/plsc/models/dist_algo.pyc b/plsc/models/dist_algo.pyc deleted file mode 100644 index 26ad023201a533656b00842aa1a5fcb9f327f347..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 13719 zcmeHOOLH98b-q1==MW$O;!QGAhf$)TO@aU=BQlGyNl{W|p@NtuBU^~*W~1j0&_GZ3 zu)Bu*DvljHPF&8bQi&FMtn=Q*t5i1m5m~3aP?di`@_pxa_Y44%f+MEN3jvrr zx9{WJb8nx=cTP8|KOL{#|Nf`_)xe*nLZw=k+OU+x>xv31YNH~3 zRTWlMYd~!blzoG0XGm=fN%w#XhgEAtZH$=iK^2awR!wcxRO^)5IAvOgR5+$u<7#8v zTo0@8v}#SLjS1D7R2!2e=_!)lIiq%_)yA|e0A6N_##yy7TQtrUjq_?_UK&SKxS(2# zYGV=PMwMPvO-nUu>Zwvsl=^7ntZJN6`kb_mm96KwUHuA6{*9}r*z2pRaauJdipEK4 zw0U`_K84c@{s%vGoSw=~qi82Vl)24L_d69kz8+ zuWn_ef(+%)r&tkZVeNxYSKEkj}y18#e`yqRS=0j{zhCoL}& zTDRf|Go!3G{)2WFw1VH#Np=B4-wDzz2{t>KZrlpJG!2@8?~&_E` zZEt%1?w*%4>_}&Oak6VSp7Hj%n_lK`yD5${m09YyvDLoD?qG3QQ5e%5DlV&X zIb2*0$mNL2Dr%>yb_P^;R2__}ok8kUw?~?ksqPk~P zcUm1x(|UllgPK}A7+1R$mHb2EmtME>V-m{f zm%b6{`{vO;%$=A1QR(0B>z`4E_B@!gxR^8?y5P#v08=`vx--Qw%&CJJnk*QqCzjQn zm0ix~^cEUpv`?^10q*56?bB*wQZ8T>r*Lsb1(4x4cd`^BrPMb~XD5>uT9*$xSw_Y6V8~OcHFP`M&d#tc4C6gF?qqJ< zbQ3Sy(!!GtbxTLtUI3eub~c6ZM=~|_y-;sqCVoEsdi@vhWy}+1!e}&OaZ)$a6ixu9 z{@o;r6PefyJAv7pA169Hi;1zgoGf;|sNwqC+TYcQ+wxEf6T2U@ojBC=KkGaX@|(B8 zjg`&ZB3xp|b1*Z+guR`cdO5WB<2024A)R9C%weoUTPZ_ZFm`lX!slif`@6Cw9d%mb zHZm>|w^M1wiMeu;&NrgNqI)*w8utV}fBIMR6M@PK40Sk2CI>WKTS!E)( zQJmA&nZpxxrde=VXBLHUgBxRR1J}bvg4^Kk>PNCpasALsq>Y>u?8`{GQDh4)%(z?A z%o{}U0odSyLI|8U@hxW-L=C+!Viv$wa8c*0_;M&FFu<7D(?;Q69NA}kGv^J=y~z1M zok|VPRwgX@t&CaxJ%iuLD$2@eWym^j%~?a%NX16Ywx+CcYpObGEugK2&p3Xo)-Zlg zRYt8>(8Avue#foTQY*%s#)$Pgl~j(?b*ay;Yb3yh)9v6qT~SomZNxt66rA_GFx3t* zpWNAXXt?m-+v^E7OswzZ-U-$fzN>24hF2H8;@ zW*^32m+T<2_qGFn+s?MN;4hJCLNYc0uw_E6*$KnTu&$BzfjZF+ZMTE8AL0=YLWd2k zgDo%sj(ewkQZNPB{52{s0B{Ebg5=-<;f^YBJ(gO)_gMlyAXNkS1{G`$TLj^tdk!7D zRh4`~xyU;}0zL`T2_HTY*971rhzZVe&JaWm6(FJoUJt4VpQs3iM!{i`<8^U2;&|kE zMd~6sLv8F{1lzuwio~(z!W2_)T{$6lTcQM`|5W+5*JuFoy6|SsbzEl>hor84(D6dF zJ`7UWcIVg8Rp+URHFEB<_8TldM3E6A_LDcp1Gg)2OJCWO>=$BE(TP3KOKA@b76l%y z%nB-?lJ1c6dq9Py=Ltu!PUAdb(!^EQyexf3|fe?N!4&nf*Y5;H+ zCIGP21%9pZgq{fgWC;igI6I*h(Fbmcc864Q#v+p?UDUB2Vk6K6yJihg2jB&DP{gd- zbEww}>f8`;bxk!HVUFOW)^mTZZit5<^bfp{ah&rj=oZ_ATh`WMlf^dC1Bx^Su=0VI z)@h?1hoc8jv^?MoR?-JaW(0#8=7_M+5;@;MA%;Eok&LNCr$wra)G^!z{l56jAx)92 ztuWq%EE;dkR=A^7oMfE>U?h?*MBIK5QI>H$lx-{|9$efxnKxJ50b;!5md0ms9Ko2?-vsd07acW#i=%#ZH4Wq@rwGyM99Cir^=S+mHT^Yw%_&>l_qt7C3Rj)bL%i zCZL`~hl}W8&8JFocVtFXGcu|P>3s^b3+E$h^eQy;NmUXdkWtZA_KAj4*Pt(`!^p!M zL0zDmYVc#I=P*4D%n&jn7@sQ*#zSe~v7qdznBG#Q`g@^&qI@ohz}bi^Vr&on0Y4QB z8i}=%|BznP@dp*gx~J#`IYrbF2?MGY6wl}wmVh3j1!+vlWFi63HTjq^g|&VvBQeZ^vhsRLax=Z_GdY=}A9)xojXaDQa;HuaUiTS9 z6rVfH$Wgm766ZEkq~6e?Olp++?@9Qvc~Ze4z3ZXZ+H81V`*+k+d`#GHB9|sE9ohhA zo5c8Y8I6DLmJYti+KNuh}Vs6ZeGL<MqD&<_SPITn!Yxd2=f6w#vk`2_`POn|L;wAgR!=*Ucy5eo7S61FyURhfH)zz=Q zRZjK``^xXnw6V_bVU+V76m@(~f{ee5VL!#^`Ic2|LyRF}V7z^VrMT#`v(EQ;`F$3D zz~ToejKTbUUS42P0=8!w(eoVgEF;R`SMZqQu|^AN5E(H*ia;6pt)rF_k+VbN>--@L z@dnpf`y&=VVsV#+*v%iJX13+Wsn}k8_h6pjIx()!A9K#1ps3Sk7%=yzto<1Z2~Apn zPDqgwl}J9@l`O8YuYb;wJyS|cz3+xsWgfgt%QXnlRB2V6zu=4#DCThxW{|uL%Ojy4 zjN}d&|H02>o&ZY25u9IYQhZ#t5j2~9xxO6lafm?E(sVn2iNPlnBBAFceiNeY7AE_@ zF+W{Lp+{!3#V=VhTnTnFPsx z(KP^w0;a+O59X<;^mm4H`*Q2>V^ouiBgD4fyLHLV-KdS!zb%m4&LOwW$f^T3j|>y? zdtz;v0Wc4e?T1L&u?Go3-%GV*0OJ^$J_H9gfYT-te#i>uxi9m0R%qNzw$f{*?b^5N z#iQ_x*X+Bg9KA^Ax~tCUFb-NIYK2_kqNB$rj12EZOfaZ7tcq`Nq{t_R8|rH&Np6 zo_(A)D3_z|nZ1&N0H9+VG_82}g6*~AhzUOCLZOr35f6*X-Id(PArg(+b?icK=_Eg^ zW9M8XVsj14pYz*{)9@sz$C*h(KA)UqDM_p^=Tl~Cdt~wy=21Fq#$IrsB|jbPHp&VT zJqo=nkX^scjL=Oav@Y5C8xb~B@A+QK#FWjPmq;PA)#V|{fom^`_*%swzRQ{E6=M-o z!xr+4*7qG}I#MQ=LL@jN0U|0ovDhIxcd5u=H!p@dhcVb$rGN7&tR+M zz-a07w<-NU^EU6Dh~IpV(Dl_f|NnOQ$Ud^WUe;@=Pwm~KfmVHgP44kYZt*itIB$~k zTa)th5h{)2+RF0s^5>&v%;MTxpN~pz71x$8iZrEjaP~aV8~I7z;D8&yy!@5MdR?;d zOxHUE?*jJl+#Amy9byZCB?is(fF!|0xoHDl1*kH1HR+9Q))r9+J@ZgW&|cvd@Fp61 zPk{Y1zE{wn_&+8MUjiminF~=XV|em&hPgSb45RbP?HeToA`B7;L?#J=@E*?Rgg`)1 z2n3O72M|IIoXI+YLrmQ90n(s(x|03?CntC0)^ib2zZEUq-*W+v(8^u_dc>#mS)E`g zAQhPbE6m3-*RJ&p@B3?)#}dzfywZKV-u)OU>~F2RkAt<9?#FO0(xboiIIY8-)JF;3 z-N%pIp!?<4#f#nUP!-bY}e*(Jp}8#XpcR+vU290hDu}9g?pN_LulBYbrT! zc}UnFmOB?YkPyGW)kNdPTVxVYF!ne!q{-KN_%9a@A0wLgc;=4|>bVI=(#7(B3Vm6D zrT$lwnACKJQDny!%d-Pf YoRiNif}mNv&N_Ez?%(o!@T&R#UnMahB>(^b diff --git a/plsc/models/resnet.pyc b/plsc/models/resnet.pyc deleted file mode 100644 index d8d9c88f6695c07a0be23ec05f7e7e7012211c21..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 4786 zcmb_gO>Z1o6}{CTZnx96V<&b#$9N!_>A*0P1R?<%O_CWv8YEg!Mj9|071dp3yX@-f zPE~a#fwqKjgjlg*14wL;&@5Q6f?oj=EM@^)#Eu0B7VO}h^XzVW2pNOu?s~rU>V4dM z-#zEva`kWXbKm&>U*7K7;irbb_iz=zKojBTS!%X9kv-e3*kuqj5vIQOxx$>1PJvZM?GIU)@o$9YjU@G$OogDB~@xp7(J zyg1$R}~KA4Ig^uz1}&%OhbHEXY8`9LmqQB`MD z?L(fGyA?(3X;`+UDHY?CpB_y?bWNscaD*6J&?1=f2~DYL%6po!XwlhcPH{%z;Fk>N z5-m%UbDDeJq7{o))n2gZqD7Z%T*VPK_Gj(UfbPagjb~^eexH5&>$cytM?N2*cigl~ z>b__Db8;Qb#4fOO*~Se$kIvf|W?YuWTV8a&BqK_O+`NPR6t)nUM zDt5udYtkIxb=zInfJNJdS6`64WV>+Mi-xVEwnaCvb>#rW@zJM^JcpDz1!#3L9d7j%m;i2+6lp_E)hBUC2Ya8!!4vQa`~c58|8!HU{nUfe9+zg@sIfV{oQi)*ps9`Oyho>m0_6-vhpk* zjfO=pOk;40+uAV)RVSwn!#wN@yFsH^?3a-7L__$wPUVZO8Ii3gxMGcTkp) zFuqauPkyI zvJChV?Y|H6{VVLEqF)UkCQ$wy#5V?6QHELh=F}~1GNn(~`3wbZnugst^_SVl5R(3r z_;gkl8W_dHa>wUUN=5oX{mw!jbRsponVwT~p9qj=K>w$qczL)xO42BRPu>~idxA>` z%UG@HE#kXUX?e@uig&E7G`)-HzvNv&Mws#DaJN)h!f2w*?|C`)hy%`^fx8X*e)K; zF*Bm-h+0RS14@~N+yo7iW*0=Oe>&O}=^UG)b<4Wcuq4huM?%hj#_8nGIV@5KX&-8P zMkMXh!Bdq1Pp&EpfZ4pt#^JPM`%xRWpWA8CJGN7816FRd>8Fn3N5S9|6h3jIfCYU- z0GfhvJ0AeRJ`v$FXceYDGr0d8nh6^HHFhnrVc<@ggaO$fkP=4y?ce;GpFiLAiB^Re z3k1hiX?nQWZ|}CH&vh=QcqS?stG6TnMRrP&mnac5{>RxcH2oLY++agZVD}V4ZN|qG zH}A9C7pgwPA%c=i=A*7`_#G3?Wc~>se{_U^NI+~6G@;m$xh=6`MuZy@AgLJkeiXN%$p~~y+%bw z^El{q8T~83)fId+&n*8rYdiP=wH>VjdSM=1BG95zhMoDR89{_l9T2L~Qy32fiZr-H z^`twE1FLSdiS|wsqjh{DA^9&;JAJ@;*0XwJIVPQa9}pGF1| za4$hX3Ib{YJ46Kn2{rk?cxj`}v+U^HR6(os(YvSKHAP11lXLZ|IMw$Vs`9vgfUDs8 z26y)UcfAz246mo@<+Erxl}gk^uLl1ImAKA-bk*rq;u-w}*ThSz97XR;@M)})-ia@( zuobvmRu0P@6@mj-H{u0k_*fm;(8roATL2mS$ArO%+LH&M+93$SG(~7KN_-xaPmlOi oX6ncfh(*Qxt1il4Bbp1oVzht|CefC6);n8WuC6|6)m!y{0e0p-{{R30 diff --git a/plsc/run.sh b/plsc/run.sh deleted file mode 100644 index be89ebb8531f26..00000000000000 --- a/plsc/run.sh +++ /dev/null @@ -1,19 +0,0 @@ -#!/usr/bin/env bash - -export FLAGS_cudnn_exhaustive_search=true -export FLAGS_fraction_of_gpu_memory_to_use=0.96 -export FLAGS_eager_delete_tensor_gb=0.0 -selected_gpus="0,1,2,3,4,5,6,7" -#selected_gpus="4,5,6" - -python -m paddle.distributed.launch \ - --selected_gpus $selected_gpus \ - --log_dir mylog \ - do_train.py \ - --model=ResNet_ARCFACE50 \ - --loss_type=dist_softmax \ - --model_save_dir=output \ - --margin=0.5 \ - --train_batch_size 32 \ - --class_dim 85742 \ - --with_test=True diff --git a/plsc/utils/__init__.pyc b/plsc/utils/__init__.pyc deleted file mode 100644 index 1e6cc9491f8f801cfe58c7dc8e9af5826b63f3c4..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 109 zcmZSn%*&O~_aQc!0SXv_v;z4!(;)H06~Df1VJz_N&b$!CBWn*3-V%MfV@tA-?`P@ltzd+K!(k} zed_M_obN36RQU%JqaXk77x(I}_#5K)3g7H|fpZc6fonLo*`ouu8FcGQZnM;-%WkvW zr3c*RK$jkLn?qfC*li9QU2=^P*Q~hBiffL#%~2OTnzn|rYmB+(nA;q4!QIx98;Hl< zgHf0Mr3->UyP?BGw?iv%zaE#|?g-NA{Lu;(U9AmauJK2bkT_0JmsQ_`;0T3 zcJYwgWuejHf4i(6;*5(<8gbS| zr;Iq~qSG##cH8iXC1%|WU#J;X%(>6t-gy_zTi;h)v|z*q7o9O;(Zx&8{6}ZqXPoa< z7oD>Xues>F5npoAD@MF7VXZGOA-&{>{H-mQAoD?r)n;uw&d0mdjn+=BRgaeuvCBs& z*7G!JZLj`n0_~EH3RNd=nGR*B#@55SdTRA%~!( zQBB^cC)u`AAH^Aqwa86 z+g-VL-F?K#M%-gYRNU^E&a^wOQuU+m@t8J!NPQssuKoaKT^T5Cn^CY%8+ zJNsK(jo8q{AZrGbaqANguws1N+Hb~bEsqVi8My3ts?$jPpjwBuqGch`u&aGmqa@8I zRbyLkBw1eFN?2|5nYz4xI^#xF9s7Oaw3+FcOr5h0+2w{k3^u5kE z`)vyJ7gYtd00$LDkuf3*1yynj=-!N!7iC3L`W;Gcnn6N4Ek_#}gT8ee{g^>v9!&%D z$bA=`LTOkS!?Uc7u6es7RxZo#7<;!G?OJZwXciz77()}Gj*udo$fGQ_7V11AuC3ld zSFVQ3>Si+}&Fx%jme*2oS=|kkNVN{|Oy;T05>+39fgO{1w`M0!ws-Pl%TmbnL#x?N zJI&wYITJ2i@J4W+&)L9aAqCOtO&0$EA@+KM0()Q>W~=NT1TKBU=)gTFx$Ls>vSjtw zjkfv}8g>?A*ub|XSJ_hJ%6}Q-hbSd5q+xQD=^HeeV6Dvhu4@Qp)(=6746`9!(Z65_ zUU}Ud$(pe{e4T@skis`a@pYkCx2)raZ>mrivr`YP-0`O6q5s)AHlHQk^?I!l|2#w{ z4RXO`@J=v*G7JUND27U7wLqoyjnyFUc~<)q7g}JjF2Op_3brM#4!U_XL);J#aTlnJ z2?(AXZNUq`U8ILAf;NXVCu+dWZCRYMZhm>KP)v7aStj)e-=ywlQ0i&BWBSl%Tllv6 zzM~%I+&%Ad&%1+nt@6>k$7Z2Wc^w6a)9b9Y%2NT<$zUNk!>2BQs+X8;-1~EgK2Xbo zm`4pvf+s*Uy<)H|7$)~@Kr<%cbX#v}?{J9tKxTn{G6!(9SL^$$w#Pwyub7D6mkWl& z^zj^~q2fGepS3AskQtUfFpaGn>3$rVOB<$#Tom>t{2AzFozye?%TQ)qWFO+xERwej z!jZQK9>|*HVKu|FPCu)d2xwEfJvm&UO1Yr~mfWzkT|>%Z2UvqDzciNo0hI>M0*S z6i?s3oJf+SJUP>(wknt&(=@ zMw+(M=bT0OC9S9<^i7{B7ZE|+?Bu4$HNvN(UfrHGrj~W$dR`@pOxk^e8Gc36eO1K; z3UfWX$JR)N;-N*ep^p<}Fh8a;vnhzi=26Hqr3=A@(y3sobWxtZ^jdJXbiQ=nVmIjE z{}4cr_jO4^jO{rrZ53SxkX|QI(9fN@AygI|4xq1Gl#H5@AvFaQhEQI`a2~Wg0KFM( zD0i*XGpKOUh1z}>`p*C~>mt~qu$WrHLa#!qLcM&IVm%+sd8P8p^=F+f_G`+<$Z7EI z4+5xV4Z61`_9PP`+k&DS((Tzhg`Of8^A{dt{8=sc$CDCikC+UvL*wocniAI`>UkYP zyLzfTW_LP9n~PgFztQ#6UtI3%kyi*^6vCS-ZmE#S!W$|C-taXQud7hR+6Vu0NVmJy z5s*v4G6gl}H9UmxXo9}uSC{)1{8qZ1tIgVhUB7H-m4G??6%CRXGqi^vKph+G3YGoU zttv01fGK;G!U1x84&Z(hz@IL?!qa3h1>6g;2X)2pG8bjO=8MDvFLMCuQvzfNBpfWa z52gxkYsgyiKV&YqPdyURa6I#(q>toBO>F1Yl(Sn`%nH@fB$uP`#t;n!DWlhWGuARb-rebisft%QiQI1_e&~l zvh>3SpZr+_ z*6ZyCKJ@?RAf_PZ=}3Pt?Oz=|#q%~$s_u1E=l6lOaq*rSsw!#}b_s8HystEtz9b(a z+ZvIa;ri@%@uyT|rznVl2ka9}l(0P5p1QPG5*2LB=9~n^;2kwxkUR7WNFLGRPw=1q zqFjeum({R4XndOV#YdprVnPMEy*@3|)%0l*_V#QUimxlrQ}rYq)moKe+P$Y1%u-Ln znTtH@{*84T)1zOSX?+0c7wYA*71?&Thv!QF!*)37zRLcz*D3dYu~UvIH_Xio29*PH zhRT=*7O7g$tY+fWb-PI($+VbE!dur&zb)DF-cOrj%YF_%S}Jm}y|ri=sYNB(vN&HP zp|RhH7VpQT53_7BCyz`KFV-8iEF%loykutE=ILW)S%UMa0HG_qt74795_l_DH?CG! zRzC>qsy16>2EU>Xs8rTzv$XJa^Umtms&{VO+^B}Dw{CABG3S~jcqAjxu9w@uC;8C9*6iScwSnHjEPHMWQQajI z8K>cgG+X>Bwa4cwj!2n`2@L9CcD&IbO)DV=#v3(Hq88MlBF#t8CeEQ0q#>& zWwubrr{)6#G~nQurH)MKs{rrHy`PnLrtRkd)wM?)$?|{;XQDN{6bP|grCr2N9oj} zs1b*c^sLzrnzc@mQTECAjoaakt1E{2YaiaMuCH$1@Cl9sdi(WzCwkFN{M0%JGvnTu zp$azH486fGvE|xaEv;9{j}yO zH_#9m_%G|SN_2IwCi@3ed8@W^?}}X^44E?CW@Q2JwBTzon+zqL7m8~%cm`6GW-^1l zcir8CR|A0j%xDATXdu&TUSmPJLf(K9TTg1>A4Oy3pO_lSrgM|9)ByF2<{ynY`0Jxv z{jRo(?kIV{Fs>nJ8D0VnnGCod;R-a%qzxW5PeLUxHiyiUykmX3#-8C?m4lyB)`0bP zdG}}zL@vc&wpfDe*EBxFQ#1Gwqu>FH>ngXfy!S8eE<`lqX5p)L^SQky8qA{Uh6+Td_A|Pc29(lc1rYwA@2FreANsHthuRHDWK5k5Jm3a3x zqAtAA(t~$SLZK~CZ+9MrGFah`3Y{g~RnbtuUzVj%Rh6MVZ`DqdAJ3KOm|~b_`ETe< z*<%;Hf-yQB{1h*Z&f6_+5~Fk)WgO=h9}+C-rbo>b^te}0iBOl>>wL2-6euXFDRcvN zyK&%4kbmEQa5`>4_!w^Xly;v7m-KlO3>P7Znbafp=tV|RYzSXf@huhaYjKJFX04IM z-gKKeGOK!q25+n&US_8y0a+)DbB3@wnHc7b5A*)qGD;R@^P7E_LZ+u=W!W8kQ>F!Q zaCA7aa<65VK6@vw*8mnb!jL(6E4bhrOylUDo-&EM z5*q>KmGPY0JBKo2l*+1Ew_D7bUBlk8i^nA)N4(1cy?>Q3!&9h3_g$Zfu^z>d9+^IBF;-jytYk!arirgyb;|A=f!X zf^7jb>NV&DzusI3l>T)$eZ70?zx=d@<%=}5{fFIE&-R^X<3P{hJiVrHvcw8`V3j>l+@uvLB|( zydadx=$n!7lEx`g3Kf5cQZkE?!wpe)Rn%1|00@6y#kW+L!2Up#7}n>mQ7`e#lFKg; zK<_u=U(zYF+Y~YZ{m(3Kn5SX)bZLU1l23W4T$x)6mdcfp$_c)cmGiI7SB5Hcm5XKi M>N7rBIa?Y2UmtvO0RR91 diff --git a/plsc/utils/learning_rate.pyc b/plsc/utils/learning_rate.pyc deleted file mode 100644 index 351fde41eb6d7f378e7023484eaca1a87101e1e0..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1374 zcma)6O^+Nk5H0t7?93#S4M~O|B?prO8bY!w!4)DR#5EGx2v8){c-!8Y9=zQxyE?0t zW-ct}~huftB>x%yZP+b*KdC*s2>^rzr~FAAuN81Jkdtc zOGP%LKZsstNqMpyz@YpK<5@Apa*BBq^E=G=KoK#~c0jD?DxQkqUKJ&F< z4Jf03G@&%7-W|;Z!3X_~j%)(wSbm|G-Mt;eL!7uqANOSPw2PlV(idwN_6)^RpvtIS zWfRQfe-IyOQ`rs$#|*kf8-lA?bX^cUN@-b}#+677$@Z1;ty^IyLsXV88^;GwNHkx4 z$tj(U3lZr_5pinED&^u17W-@pzr2g;+43Wt@C>4iWd*mYZ~`und;`>DFG=ITKvA1{fjG1j8_&pb-{JaKvcDa3mk|BuTAK3|{LsoGBt Kv5qP3L-jW@iddik diff --git a/plsc/utils/verification.pyc b/plsc/utils/verification.pyc deleted file mode 100644 index b1eae3494332ad3853e05786ed199b35713c6b71..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 5596 zcmcgwOLH5?5$;)lFAya76iHFCSuV#$EL+b!4pCWBB+E+KQ8~+lW0VzmiCs`jEf&zu zK%o@i5-F!t<(?mqLoPn&FSsfv|AM4)$t6`u<&aCNlJDyQ2vSj%ltV%gjhUXFo|*2R z{(86c_l3$AzxwOFmW=*N_`Uvx5xZbSVvz=@kvNddNGFg+Acp;t_Dj+z`F>gZWoeXs ze@e0`X-w%{w5O#pJ#NoPV^-VKlFdn{B8`flGb7o&bgI&*9_d(+#=>~sqBIuA?V2=d zaSTVH``>~)YkN!Ck>op!g_NghR6KXPd|-@(7TpX_`b?Il?f@7#`B ziQP#(^V2kocjCxJHgP+Z6gs31`--c9M0>WI^-r|yX{lF9kKDyn_V&9!d7g9;l>t>NlJdA40Hnnhztnh>^S_J6z{o(PiH695M$Ut>6j#J)2w@a~GYH_3 zfMS8<8e_ZlGUnb*)gnB2-n6|eb=J+GWAFFd?JUuyLoz}x>!v{-G}}D3_Nl@qS=)}j-mnQdL@yEL;G9wkNEPFw1?^S#H8(#>X?r>@yNhqk41F>|J7T0HFdtLMB3 z#);#-u3aMs0nh)`$S%!bq#ejUIHXW5-qY{$Csv6v#rBRFv@{O#TJ5K)Orvw^bjCFSEG>|$aowrj$a>*yFeTQs6-=A zbUh7ZIIRP?>qdq%;>sFmpk*2u;WnIQ#2n1Xpe%zKx`vW-FJe|EOv=n_cUA_|I;AQD z&1%?IVGMQ7aVrdJY-2%%y@q)iEN}{r!=Xh6`{5sqgN9i-Sdc-Lqxipd?8X4yTQuF(LFHHnD?QAy1a^baJ%f4J(eu+sJA$)o*wKd3 z9btdKA6OPfIjd#@r!wZeCWEsw0G*e13Auq+-DsiDM2+@_r{NfHj&wRF49cV@_#x zb0pd%=dh+DuHR0b&V<_B0!KSO(?7_PsL0&{Bzi^CE4oO47ROiF9AZ(HK1m#bxD{os zKJl#?wOW1b?(t~X`<>`e?bJ!4yv`J)InUiM5_=zLVRctYXD^9ksJ54$>tEL8aoT>A zHB)PmHdR4hblVCyR?Ps1OxV3BEv#a>N=?=ZHO13wt_KM=+HBC}<9&xbt`5a%yFF@H zr!s`MW*Q%kjg$JmA(BoWc!klpM5##bPMpH#b4DJdCAPgj#+RZDNG%xFl z*y3o0?s06w$&SVxq;J5ATLazWzK$#aP>qvAwHPd*zc7FTi;|b1i~3^XY6JwU40{Je z%nm+hOx3Ih>t-ELT`N`1ig_)l<9ESS@YxP(Xki5Wl>lR@J$xJ|3JLZK>x?e1|`UURyOL#NloLs zz7~Pgj%pNN(D>2^{+y@xM>R@Su?kq`Yn=r-{FCCTuUE91f$*(+zgJ*gR9a)ng2chv z$|L)-9KL@XhA*nziVRVz06|w-`2Z~yhVerf#tmB|wxWmuta}gzC@*twi021WQvA}O zm|^vTe^?q8z$SxHRpVnG3*;z%W=^hz;^9~gfvO4NyXc`FKM(&R3fU-q=C~RQ9UKPT zigtaKgFX9E;5f|u(4I%GF z_)U^`Nc6r{(1M<(!>$t-zRQ90Bo{~&)~Ex^`J^5d;T4YPjU`%y-zNDE33ImMi7F&U zh1bZxPI7~Tgpal&9`{e=_V9-dXoq)=Biu5Yul7{OU7Rq?Propl1=ocgLi=*{3t@u9!CiAB{~i{`hS2B9ya(qbA$m$Sc2%pv(51gulJPMo!p~5 zuGerInZPG&6t=ti7T|7Oqe}zpHjdz9>O;2fko*9o&Vzc-s4E!^;YT2vxLwiLf-C7c zEi>ADbYula29={eVZr2^tY^X>Q^98-uk5&R7t+7R*RnbtXW*LIES)l^Op6gs*D?lg zGXH*shW7?A5Sa|`7?4wd%ghttfR^PKB|J63NIrE;STHnF0%S4fjShI%XCmYmID?N* z&{q!l^nuJWt51#U`_XE%qt$TZU#tgjyHwTFDMKX{K2r{n0-<>frpUrbitTAQipLr~ z4e+G9_y+{NjPG&qpTGUv*FW6* zR|UxW2;l*A@6Bf1Z5_R`X!l*ez$=Ro!xA!KgjxWQ;E_cW+GDHb1Rh6vkJTEEv2Oii z#d)oEQ8H(VzCH7=k$9(VD}_9*9limgsk;-o`?_zuwM$$&is@e3f-=W+J=Pj0`V?kU zK56L_j?dwAfz}dgdyVO^4c_D$XzOJK@GPcW^{;E*=TYrjAk&*m2A`k`vsKyBUu~CG O7iyR17OQW}RsRhbU>8*Y diff --git a/plsc/version.py b/plsc/version.py new file mode 100644 index 00000000000000..0b618a61b046c9 --- /dev/null +++ b/plsc/version.py @@ -0,0 +1,15 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" PLSC version string """ +plsc_version = "0.1.0" diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 00000000000000..0b5ae2bb08b9ef --- /dev/null +++ b/requirements.txt @@ -0,0 +1,8 @@ +numpy>=1.12, <=1.16.4 ; python_version<"3.5" +numpy>=1.12 ; python_version>="3.5" +scipy>=0.19.0, <=1.2.1 ; python_version<"3.5" +paddlepaddle>=1.6.2 +scipy ; python_version>="3.5" +Pillow +sklearn +easydict diff --git a/setup.py b/setup.py index b80189e8d1a9a4..944a94f0e09819 100644 --- a/setup.py +++ b/setup.py @@ -11,17 +11,53 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from setuptools import setup, find_packages +"""Setup for pip package.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function -setup(name="plsc", - version="0.1.0", - description="Large Scale Classfication via distributed fc.", - author='lilong', - author_email="lilong.albert@gmail.com", - url="http", - license="Apache", - #packages=['paddleXML'], - packages=find_packages(), - #install_requires=['paddlepaddle>=1.6.1'], - python_requires='>=2' - ) + +from setuptools import find_packages +from setuptools import setup +from plsc.version import plsc_version + + +REQUIRED_PACKAGES = [ + 'sklearn', 'easydict', 'paddlepaddle>=1.6.2', 'Pillow', + 'numpy', 'scipy' +] + + +setup( + name="plsc", + version=plsc_version, + description= + ("PaddlePaddle Large Scale Classfication Package."), + long_description='', + url='/~https://github.com/PaddlePaddle/PLSC', + author='PaddlePaddle Authors', + author_email='paddle-dev@baidu.com', + install_requires=REQUIRED_PACKAGES, + packages=find_packages(), + # PyPI package information. + classifiers=[ + 'Development Status :: 4 - Beta', + 'Intended Audience :: Developers', + 'Intended Audience :: Education', + 'Intended Audience :: Science/Research', + 'License :: OSI Approved :: Apache Software License', + 'Programming Language :: Python :: 2.7', + 'Programming Language :: Python :: 3', + 'Programming Language :: Python :: 3.4', + 'Programming Language :: Python :: 3.5', + 'Programming Language :: Python :: 3.6', + 'Topic :: Scientific/Engineering', + 'Topic :: Scientific/Engineering :: Mathematics', + 'Topic :: Scientific/Engineering :: Artificial Intelligence', + 'Topic :: Software Development', + 'Topic :: Software Development :: Libraries', + 'Topic :: Software Development :: Libraries :: Python Modules', + ], + license="Apache 2.0", + keywords= + ('plsc paddlepaddle large-scale classification model-parallelism distributed-training'))