From fd1e4213fc72ee48e609d1c8b9fc7921e220089b Mon Sep 17 00:00:00 2001 From: "Per G. da Silva" Date: Sat, 8 Sep 2018 13:20:25 +0200 Subject: [PATCH] Moves seed_aug parameter to ImageRecParserParam and re-seeds RNG before each augmentation to guarantee reproducibilit --- src/io/image_aug_default.cc | 13 +------------ src/io/image_iter_common.h | 4 ++++ src/io/iter_image_recordio_2.cc | 7 +++++++ tests/python/unittest/test_io.py | 14 ++++++++------ 4 files changed, 20 insertions(+), 18 deletions(-) diff --git a/src/io/image_aug_default.cc b/src/io/image_aug_default.cc index f31664709bd5..cd06de2b2ad1 100644 --- a/src/io/image_aug_default.cc +++ b/src/io/image_aug_default.cc @@ -97,8 +97,6 @@ struct DefaultImageAugmentParam : public dmlc::Parameter seed_aug; // declare parameters DMLC_DECLARE_PARAMETER(DefaultImageAugmentParam) { @@ -188,8 +186,6 @@ struct DefaultImageAugmentParam : public dmlc::Parameter()) - .describe("Random seed for augmentations."); } }; @@ -208,9 +204,7 @@ std::vector ListDefaultAugParams() { class DefaultImageAugmenter : public ImageAugmenter { public: // contructor - DefaultImageAugmenter() { - seed_init_state = false; - } + DefaultImageAugmenter() {} void Init(const std::vector >& kwargs) override { std::vector > kwargs_left; kwargs_left = param_.InitAllowUnknown(kwargs); @@ -250,10 +244,6 @@ class DefaultImageAugmenter : public ImageAugmenter { } cv::Mat Process(const cv::Mat &src, std::vector *label, common::RANDOM_ENGINE *prnd) override { - if (!seed_init_state && param_.seed_aug.has_value()) { - prnd->seed(param_.seed_aug.value()); - seed_init_state = true; - } using mshadow::index_t; bool is_cropped = false; @@ -558,7 +548,6 @@ class DefaultImageAugmenter : public ImageAugmenter { DefaultImageAugmentParam param_; /*! \brief list of possible rotate angle */ std::vector rotate_list_; - bool seed_init_state; }; ImageAugmenter* ImageAugmenter::Create(const std::string& name) { diff --git a/src/io/image_iter_common.h b/src/io/image_iter_common.h index a2324a4b5c5b..c9e3933ade28 100644 --- a/src/io/image_iter_common.h +++ b/src/io/image_iter_common.h @@ -131,6 +131,8 @@ struct ImageRecParserParam : public dmlc::Parameter { size_t shuffle_chunk_size; /*! \brief the seed for chunk shuffling*/ int shuffle_chunk_seed; + /*! \brief random seed for augmentations */ + dmlc::optional seed_aug; // declare parameters DMLC_DECLARE_PARAMETER(ImageRecParserParam) { @@ -165,6 +167,8 @@ struct ImageRecParserParam : public dmlc::Parameter { .describe("The data shuffle buffer size in MB. Only valid if shuffle is true."); DMLC_DECLARE_FIELD(shuffle_chunk_seed).set_default(0) .describe("The random seed for shuffling"); + DMLC_DECLARE_FIELD(seed_aug).set_default(dmlc::optional()) + .describe("Random seed for augmentations."); } }; diff --git a/src/io/iter_image_recordio_2.cc b/src/io/iter_image_recordio_2.cc index b567c729736c..89f7753983db 100644 --- a/src/io/iter_image_recordio_2.cc +++ b/src/io/iter_image_recordio_2.cc @@ -519,6 +519,13 @@ inline size_t ImageRecordIOParser2::ParseChunk(DType* data_dptr, real_t* cv::Mat res; rec.Load(blob.dptr, blob.size); cv::Mat buf(1, rec.content_size, CV_8U, rec.content); + + // If augmentation seed is supplied + // Re-seed RNG to guarantee reproducible results + if (param_.seed_aug.has_value()) { + prnds_[tid]->seed(idx + param_.seed_aug.value() + kRandMagic); + } + switch (param_.data_shape[0]) { case 1: #if MXNET_USE_LIBJPEG_TURBO diff --git a/tests/python/unittest/test_io.py b/tests/python/unittest/test_io.py index 0641f235aa71..f051472d1be7 100644 --- a/tests/python/unittest/test_io.py +++ b/tests/python/unittest/test_io.py @@ -427,7 +427,7 @@ def check_CSVIter_synthetic(dtype='float32'): for dtype in ['int32', 'int64', 'float32']: check_CSVIter_synthetic(dtype=dtype) -@unittest.skip("Flaky test: /~https://github.com/apache/incubator-mxnet/issues/11359") +# @unittest.skip("Flaky test: /~https://github.com/apache/incubator-mxnet/issues/11359") def test_ImageRecordIter_seed_augmentation(): get_cifar10() seed_aug = 3 @@ -450,7 +450,8 @@ def test_ImageRecordIter_seed_augmentation(): max_shear_ratio=2, seed_aug=seed_aug) batch = dataiter.next() - data = batch.data[0].asnumpy().astype(np.uint8) + test_index = rnd.randint(0, len(batch.data)) + data = batch.data[test_index].asnumpy().astype(np.uint8) dataiter = mx.io.ImageRecordIter( path_imgrec="data/cifar/train.rec", @@ -469,7 +470,7 @@ def test_ImageRecordIter_seed_augmentation(): max_shear_ratio=2, seed_aug=seed_aug) batch = dataiter.next() - data2 = batch.data[0].asnumpy().astype(np.uint8) + data2 = batch.data[test_index].asnumpy().astype(np.uint8) assert(np.array_equal(data,data2)) # check whether to get different images after change seed_aug @@ -490,7 +491,7 @@ def test_ImageRecordIter_seed_augmentation(): max_shear_ratio=2, seed_aug=seed_aug+1) batch = dataiter.next() - data2 = batch.data[0].asnumpy().astype(np.uint8) + data2 = batch.data[test_index].asnumpy().astype(np.uint8) assert(not np.array_equal(data,data2)) # check whether seed_aug changes the iterator behavior @@ -502,7 +503,8 @@ def test_ImageRecordIter_seed_augmentation(): batch_size=3, seed_aug=seed_aug) batch = dataiter.next() - data = batch.data[0].asnumpy().astype(np.uint8) + test_index = rnd.randint(0, len(batch.data)) + data = batch.data[test_index].asnumpy().astype(np.uint8) dataiter = mx.io.ImageRecordIter( path_imgrec="data/cifar/train.rec", @@ -512,7 +514,7 @@ def test_ImageRecordIter_seed_augmentation(): batch_size=3, seed_aug=seed_aug) batch = dataiter.next() - data2 = batch.data[0].asnumpy().astype(np.uint8) + data2 = batch.data[test_index].asnumpy().astype(np.uint8) assert(np.array_equal(data,data2)) if __name__ == "__main__":