diff --git a/src/operator/quantization/quantize-inl.h b/src/operator/quantization/quantize-inl.h index 747deadd68fe..1ad0016c52bc 100644 --- a/src/operator/quantization/quantize-inl.h +++ b/src/operator/quantization/quantize-inl.h @@ -95,6 +95,10 @@ void QuantizeCompute(const nnvm::NodeAttrs& attrs, const QuantizeParam& param = nnvm::get(attrs.parsed); if (param.out_type == mshadow::kUint8) { + if (std::is_same::value) { + LOG(FATAL) << "currently, uint8 quantization is only supported by CPU, " + "please switch to the context of CPU or int8 data type for GPU."; + } Kernel::Launch(s, outputs[0].Size(), outputs[0].dptr(), outputs[1].dptr(), outputs[2].dptr(), inputs[0].dptr(), inputs[1].dptr(), inputs[2].dptr(), diff --git a/src/operator/quantization/quantize_v2-inl.h b/src/operator/quantization/quantize_v2-inl.h index e3c411931eba..02ace6c39fac 100644 --- a/src/operator/quantization/quantize_v2-inl.h +++ b/src/operator/quantization/quantize_v2-inl.h @@ -137,6 +137,10 @@ void QuantizeV2Compute(const nnvm::NodeAttrs &attrs, const OpContext &ctx, Stream *s = ctx.get_stream(); const QuantizeV2Param ¶m = nnvm::get(attrs.parsed); auto out_type = GetOutputType(param); + if (out_type == mshadow::kUint8 && std::is_same::value) { + LOG(FATAL) << "currently, uint8 quantization is only supported by CPU, " + "please switch to the context of CPU or int8 data type for GPU."; + } if (inputs[0].type_flag_ == mshadow::kUint8 || inputs[0].type_flag_ == mshadow::kInt8) { if (param.min_calib_range.has_value() && param.max_calib_range.has_value()) { diff --git a/tests/python/quantization/test_quantization.py b/tests/python/quantization/test_quantization.py index 3ff4b69302fb..d8c7f08d4ca5 100644 --- a/tests/python/quantization/test_quantization.py +++ b/tests/python/quantization/test_quantization.py @@ -450,6 +450,16 @@ def get_fp32_sym_with_multiple_outputs(length=1): @with_seed() def test_quantize_model(): def check_quantize_model(qdtype): + if is_test_for_native_cpu(): + print('skipped testing quantize_model for native cpu since it is not supported yet') + return + elif qdtype == 'int8' and is_test_for_mkldnn(): + print('skipped testing quantize_model for mkldnn cpu int8 since it is not supported yet') + return + elif qdtype == 'uint8' and is_test_for_gpu(): + print('skipped testing quantize_model for gpu uint8 since it is not supported yet') + return + def check_params(params, qparams, qsym=None): if qsym is None: assert len(params) == len(qparams) diff --git a/tests/python/unittest/test_operator.py b/tests/python/unittest/test_operator.py index 0ac530c23d11..6bb815066c80 100644 --- a/tests/python/unittest/test_operator.py +++ b/tests/python/unittest/test_operator.py @@ -4894,11 +4894,11 @@ def test_quantization_op(): min0 = mx.nd.array([0.0]) max0 = mx.nd.array([1.0]) a = mx.nd.array([[0.1392, 0.5928], [0.6027, 0.8579]]) - qa, min1, max1 = mx.nd.contrib.quantize(a, min0, max0, out_type='uint8') + qa, min1, max1 = mx.nd.contrib.quantize(a, min0, max0, out_type='int8') a_ = mx.nd.contrib.dequantize(qa, min1, max1, out_type='float32') - qa_real = mx.nd.array([[35, 151], [154, 219]]) - a_real = mx.nd.array([[0.13725491, 0.59215689], [0.60392159, 0.8588236]]) + qa_real = mx.nd.array([[18, 75], [77, 109]]) + a_real = mx.nd.array([[0.14173228, 0.5905512], [0.6062992, 0.8582677]]) assert same(qa.asnumpy(), qa_real.asnumpy()) assert same(a_.asnumpy(), a_real.asnumpy())