Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

[MXNET-1291] solve pylint errors in examples with issue no.12205 #13848

Merged
merged 12 commits into from
Mar 7, 2019
2 changes: 1 addition & 1 deletion CONTRIBUTORS.md
Original file line number Diff line number Diff line change
Expand Up @@ -213,8 +213,8 @@ List of Contributors
* [Piyush Ghai](/~https://github.com/piyushghai)
* [Zach Boldyga](/~https://github.com/zboldyga)
* [Gordon Reid](/~https://github.com/gordon1992)

* [Ming Yang](http://ufoym.com)
* [Neo Chien](/~https://github.com/cchung100m)

Label Bot
---------
Expand Down
1 change: 0 additions & 1 deletion example/bayesian-methods/bdk_demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,6 @@ def dev(gpu_id=None):
return mx.gpu(gpu_id) if gpu_id else mx.cpu()



def run_mnist_SGD(num_training=50000, gpu_id=None):
X, Y, X_test, Y_test = load_mnist(num_training)
minibatch_size = 100
Expand Down
4 changes: 2 additions & 2 deletions example/caffe/caffe_net.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

"""Generate helper functions to load Caffe into MXNet"""
import argparse
import mxnet as mx
Expand Down Expand Up @@ -86,8 +87,7 @@ def get_network_from_json_file(file_name):


def parse_args():
"""Parse the arguments
"""
"""Parse the arguments"""
parser = argparse.ArgumentParser(description='train an image classifier on mnist')
parser.add_argument('--network', type=str, default='lenet',
help='the cnn to use (mlp | lenet | <path to network json file>')
Expand Down
4 changes: 2 additions & 2 deletions example/caffe/train_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,14 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Train module with using Caffe operator in MXNet"""
"""Train module using Caffe operator in MXNet"""
import os
import logging
import mxnet as mx


def fit(args, network, data_loader, eval_metrics=None, batch_end_callback=None):
"""Train the model with using Caffe operator in MXNet"""
"""Train the model using Caffe operator in MXNet"""
# kvstore
kv = mx.kvstore.create(args.kv_store)

Expand Down
3 changes: 1 addition & 2 deletions example/cnn_chinese_text_classification/data_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,7 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

"""Help functions to support for implementing CNN + Highway Network for Chinese Text Classification in MXNet"""
"""Helper functions to support for implementing CNN + Highway Network for Chinese Text Classification in MXNet"""

import codecs
import itertools
Expand Down
9 changes: 1 addition & 8 deletions example/cnn_chinese_text_classification/text_cnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,6 @@

def save_model():
"""Save cnn model

Returns
----------
callback: A callback function that can be passed as epoch_end_callback to fit
Expand All @@ -78,11 +77,9 @@ def save_model():

def highway(data):
"""Construct highway net

Parameters
----------
data:

Returns
----------
Highway Networks
Expand All @@ -104,7 +101,6 @@ def highway(data):

def data_iter(batch_size, num_embed, pre_trained_word2vec=False):
"""Construct data iter

Parameters
----------
batch_size: int
Expand Down Expand Up @@ -166,7 +162,6 @@ def sym_gen(batch_size, sentences_size, num_embed, vocabulary_size,
num_label=2, filter_list=None, num_filter=100,
dropout=0.0, pre_trained_word2vec=False):
"""Generate network symbol

Parameters
----------
batch_size: int
Expand All @@ -178,7 +173,7 @@ def sym_gen(batch_size, sentences_size, num_embed, vocabulary_size,
num_filter: int
dropout: int
pre_trained_word2vec: boolean
identify the pre-trained layers or not
identify the pre-trained layers or not
Returns
----------
sm: symbol
Expand Down Expand Up @@ -236,7 +231,6 @@ def sym_gen(batch_size, sentences_size, num_embed, vocabulary_size,

def train(symbol_data, train_iterator, valid_iterator, data_column_names, target_names):
"""Train cnn model

Parameters
----------
symbol_data: symbol
Expand Down Expand Up @@ -291,7 +285,6 @@ class CustomInit(Initializer):
"""https://mxnet.incubator.apache.org/api/python/optimization.html#mxnet.initializer.register
Create and register a custom initializer that
Initialize the weight and bias with custom requirements

"""
weightMethods = ["normal", "uniform", "orthogonal", "xavier"]
biasMethods = ["costant"]
Expand Down
8 changes: 6 additions & 2 deletions example/cnn_text_classification/data_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@
# specific language governing permissions and limitations
# under the License.

"""Help functions to support for implementing CNN + Highway Network for Text Classification in MXNet"""
"""
Help functions to support for implementing CNN + Highway Network for Text Classification in MXNet
"""

import itertools
import os
Expand Down Expand Up @@ -108,7 +110,9 @@ def build_input_data(sentences, labels, vocabulary):


def build_input_data_with_word2vec(sentences, labels, word2vec_list):
"""Map sentences and labels to vectors based on a pretrained word2vec"""
"""
Map sentences and labels to vectors based on a pretrained word2vec
"""
x_vec = []
for sent in sentences:
vec = []
Expand Down
162 changes: 120 additions & 42 deletions example/cnn_text_classification/text_cnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,13 @@

# -*- coding: utf-8 -*-

import sys
"""Implementing CNN + Highway Network for Text Classification in MXNet"""
cchung100m marked this conversation as resolved.
Show resolved Hide resolved

import os
import mxnet as mx
import numpy as np
import argparse
import logging
import argparse
import numpy as np
import mxnet as mx
import data_helpers

logging.basicConfig(level=logging.DEBUG)
Expand Down Expand Up @@ -54,26 +55,55 @@
parser.add_argument('--save-period', type=int, default=10,
help='save checkpoint for every n epochs')


def save_model():
"""Save cnn model
Returns
----------
callback: A callback function that can be passed as epoch_end_callback to fit
"""
if not os.path.exists("checkpoint"):
os.mkdir("checkpoint")
return mx.callback.do_checkpoint("checkpoint/checkpoint", args.save_period)


def data_iter(batch_size, num_embed, pre_trained_word2vec=False):
"""Construct data iter
Parameters
----------
batch_size: int
num_embed: int
pre_trained_word2vec: boolean
identify the pre-trained layers or not
Returns
----------
train_set: DataIter
Train DataIter
valid: DataIter
Valid DataIter
sentences_size: int
array dimensions
embedded_size: int
array dimensions
vocab_size: int
array dimensions
"""
print('Loading data...')
if pre_trained_word2vec:
word2vec = data_helpers.load_pretrained_word2vec('data/rt.vec')
x, y = data_helpers.load_data_with_word2vec(word2vec)
# reshpae for convolution input
# reshape for convolution input
x = np.reshape(x, (x.shape[0], 1, x.shape[1], x.shape[2]))
embed_size = x.shape[-1]
sentence_size = x.shape[2]
vocab_size = -1
embedded_size = x.shape[-1]
sentences_size = x.shape[2]
vocabulary_size = -1
else:
x, y, vocab, vocab_inv = data_helpers.load_data()
embed_size = num_embed
sentence_size = x.shape[1]
vocab_size = len(vocab)
embedded_size = num_embed
sentences_size = x.shape[1]
vocabulary_size = len(vocab)

# randomly shuffle data
np.random.seed(10)
Expand All @@ -87,27 +117,53 @@ def data_iter(batch_size, num_embed, pre_trained_word2vec=False):
print('Train/Valid split: %d/%d' % (len(y_train), len(y_dev)))
print('train shape:', x_train.shape)
print('valid shape:', x_dev.shape)
print('sentence max words', sentence_size)
print('embedding size', embed_size)
print('vocab size', vocab_size)
print('sentence max words', sentences_size)
print('embedding size', embedded_size)
print('vocab size', vocabulary_size)

train = mx.io.NDArrayIter(
train_set = mx.io.NDArrayIter(
x_train, y_train, batch_size, shuffle=True)
valid = mx.io.NDArrayIter(
x_dev, y_dev, batch_size)

return (train, valid, sentence_size, embed_size, vocab_size)
return train_set, valid, sentences_size, embedded_size, vocabulary_size

def sym_gen(batch_size, sentence_size, num_embed, vocab_size,
num_label=2, filter_list=[3, 4, 5], num_filter=100,

def sym_gen(batch_size, sentences_size, num_embed, vocabulary_size,
num_label=2, filter_list=None, num_filter=100,
dropout=0.0, pre_trained_word2vec=False):
"""Generate network symbol
Parameters
----------
batch_size: int
sentences_size: int
num_embed: int
vocabulary_size: int
num_label: int
filter_list: list
num_filter: int
dropout: int
pre_trained_word2vec: boolean
identify the pre-trained layers or not
Returns
----------
sm: symbol
data: list of str
data names
softmax_label: list of str
label names
"""
input_x = mx.sym.Variable('data')
input_y = mx.sym.Variable('softmax_label')

# embedding layer
if not pre_trained_word2vec:
embed_layer = mx.sym.Embedding(data=input_x, input_dim=vocab_size, output_dim=num_embed, name='vocab_embed')
conv_input = mx.sym.Reshape(data=embed_layer, target_shape=(batch_size, 1, sentence_size, num_embed))
embed_layer = mx.sym.Embedding(data=input_x,
input_dim=vocabulary_size,
output_dim=num_embed,
name='vocab_embed')
conv_input = mx.sym.Reshape(data=embed_layer, target_shape=(batch_size, 1, sentences_size, num_embed))
else:
conv_input = input_x

Expand All @@ -116,7 +172,7 @@ def sym_gen(batch_size, sentence_size, num_embed, vocab_size,
for i, filter_size in enumerate(filter_list):
convi = mx.sym.Convolution(data=conv_input, kernel=(filter_size, num_embed), num_filter=num_filter)
relui = mx.sym.Activation(data=convi, act_type='relu')
pooli = mx.sym.Pooling(data=relui, pool_type='max', kernel=(sentence_size - filter_size + 1, 1), stride=(1,1))
pooli = mx.sym.Pooling(data=relui, pool_type='max', kernel=(sentences_size - filter_size + 1, 1), stride=(1, 1))
pooled_outputs.append(pooli)

# combine all pooled outputs
Expand All @@ -141,35 +197,57 @@ def sym_gen(batch_size, sentence_size, num_embed, vocab_size,

return sm, ('data',), ('softmax_label',)

def train(symbol, train_iter, valid_iter, data_names, label_names):
devs = mx.cpu() if args.gpus is None or args.gpus is '' else [
mx.gpu(int(i)) for i in args.gpus.split(',')]
module = mx.mod.Module(symbol, data_names=data_names, label_names=label_names, context=devs)
module.fit(train_data = train_iter,
eval_data = valid_iter,
eval_metric = 'acc',
kvstore = args.kv_store,
optimizer = args.optimizer,
optimizer_params = { 'learning_rate': args.lr },
initializer = mx.initializer.Uniform(0.1),
num_epoch = args.num_epochs,
batch_end_callback = mx.callback.Speedometer(args.batch_size, args.disp_batches),
epoch_end_callback = save_model())

def train(symbol_data, train_iterator, valid_iterator, data_column_names, target_names):
"""Train cnn model
Parameters
----------
symbol_data: symbol
train_iterator: DataIter
Train DataIter
valid_iterator: DataIter
Valid DataIter
data_column_names: list of str
Defaults to ('data') for a typical model used in image classification
target_names: list of str
Defaults to ('softmax_label') for a typical model used in image classification
"""
devs = mx.cpu() # default setting
if args.gpus is not None:
for i in args.gpus.split(','):
mx.gpu(int(i))
devs = mx.gpu()
module = mx.mod.Module(symbol_data, data_names=data_column_names, label_names=target_names, context=devs)
module.fit(train_data=train_iterator,
eval_data=valid_iterator,
eval_metric='acc',
kvstore=args.kv_store,
optimizer=args.optimizer,
optimizer_params={'learning_rate': args.lr},
initializer=mx.initializer.Uniform(0.1),
num_epoch=args.num_epochs,
batch_end_callback=mx.callback.Speedometer(args.batch_size, args.disp_batches),
epoch_end_callback=save_model())


if __name__ == '__main__':
# parse args
args = parser.parse_args()

# data iter
train_iter, valid_iter, sentence_size, embed_size, vocab_size = data_iter(args.batch_size,
args.num_embed,
args.pretrained_embedding)
args.num_embed,
args.pretrained_embedding)
# network symbol
symbol, data_names, label_names = sym_gen(args.batch_size,
sentence_size,
embed_size,
vocab_size,
num_label=2, filter_list=[3, 4, 5], num_filter=100,
dropout=args.dropout, pre_trained_word2vec=args.pretrained_embedding)
sentence_size,
embed_size,
vocab_size,
num_label=2,
filter_list=[3, 4, 5],
num_filter=100,
dropout=args.dropout,
pre_trained_word2vec=args.pretrained_embedding)
# train cnn model
train(symbol, train_iter, valid_iter, data_names, label_names)
Loading