Skip to content
This repository has been archived by the owner on Aug 15, 2019. It is now read-only.

Add packed batchnormalization kernel behind an environment flag #1330

Merged
merged 18 commits into from
Oct 23, 2018
Merged
2 changes: 2 additions & 0 deletions src/environment.ts
Original file line number Diff line number Diff line change
Expand Up @@ -306,6 +306,8 @@ export class Environment {
(typeof process.versions.node !== 'undefined');
} else if (feature === 'IS_CHROME') {
return isChrome();
} else if (feature === 'WEBGL_PACK_BATCHNORMALIZATION') {
return false;
} else if (feature === 'WEBGL_CONV_IM2COL') {
return false;
} else if (feature === 'WEBGL_PAGING_ENABLED') {
Expand Down
3 changes: 3 additions & 0 deletions src/environment_util.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ export interface Features {
'IS_BROWSER'?: boolean;
// Whether we are in the Node.js environment.
'IS_NODE'?: boolean;
// Whether we will pack the batchnormalization op.
'WEBGL_PACK_BATCHNORMALIZATION'?: boolean;
// Whether we will use the im2col algorithm to speed up convolutions.
'WEBGL_CONV_IM2COL'?: boolean;
// Whether we will perform memory paging.
Expand Down Expand Up @@ -79,6 +81,7 @@ export enum Type {
export const URL_PROPERTIES: URLProperty[] = [
{name: 'DEBUG', type: Type.BOOLEAN},
{name: 'IS_BROWSER', type: Type.BOOLEAN},
{name: 'WEBGL_PACK_BATCHNORMALIZATION', type: Type.BOOLEAN},
{name: 'WEBGL_CONV_IM2COL', type: Type.BOOLEAN},
{name: 'WEBGL_MAX_TEXTURE_SIZE', type: Type.NUMBER},
{name: 'WEBGL_PAGING_ENABLED', type: Type.BOOLEAN},
Expand Down
81 changes: 62 additions & 19 deletions src/kernels/backend_webgl.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
*/

import {MemoryInfo, TimingInfo} from '../engine';
import {ENV} from '../environment';
import {ENV, Environment} from '../environment';
import {tidy} from '../globals';
import {warn} from '../log';
import * as array_ops_util from '../ops/array_ops_util';
Expand Down Expand Up @@ -44,6 +44,7 @@ import {topkImpl} from './topk_impl';
import {ArgMinMaxProgram} from './webgl/argminmax_gpu';
import {AvgPool2DBackpropProgram} from './webgl/avg_pool_backprop_gpu';
import {BatchNormProgram} from './webgl/batchnorm_gpu';
import {BatchNormPackedProgram} from './webgl/batchnorm_packed_gpu';
import * as binaryop_complex_gpu from './webgl/binaryop_complex_gpu';
import {BinaryOpComplexProgram} from './webgl/binaryop_complex_gpu';
import * as binaryop_gpu from './webgl/binaryop_gpu';
Expand Down Expand Up @@ -587,15 +588,8 @@ export class MathBackendWebGL implements KernelBackend {
if (a.shape[0] === 1 && b.shape[0] === 1) {
const aSqueezed = a.as2D(a.shape[1], a.shape[2]);
const bSqueezed = b.as2D(b.shape[1], b.shape[2]);
const packProgramA = new PackProgram(aSqueezed.shape);
const packedA = this.compileAndRun<Tensor2D>(
packProgramA, [aSqueezed],
this.makePackedTensor<Tensor2D>(aSqueezed.shape));

const packProgramB = new PackProgram(bSqueezed.shape);
const packedB = this.compileAndRun<Tensor2D>(
packProgramB, [bSqueezed],
this.makePackedTensor<Tensor2D>(bSqueezed.shape));
const packedA = this.packTensor(aSqueezed);
const packedB = this.packTensor(bSqueezed);

const program = new MatMulPackedProgram(
packedA.shape, packedB.shape, [outerShapeA, outerShapeB], transposeA,
Expand All @@ -604,8 +598,7 @@ export class MathBackendWebGL implements KernelBackend {
program, [packedA, packedB],
this.makePackedTensor<Tensor2D>(program.outputShape));

const unpackProgram = new UnpackProgram(result.shape);
const unpacked = this.compileAndRun(unpackProgram, [result]) as Tensor;
const unpacked = this.unpackTensor(result);

packedA.dispose();
packedB.dispose();
Expand Down Expand Up @@ -648,10 +641,53 @@ export class MathBackendWebGL implements KernelBackend {
return this.compileAndRun(program, [a, b], output) as Tensor;
}

batchNormalizationPacked(
x: Tensor4D, mean: Tensor4D|Tensor1D, variance: Tensor4D|Tensor1D,
varianceEpsilon: number, scale?: Tensor4D|Tensor1D,
offset?: Tensor4D|Tensor1D): Tensor4D {
const packedX = this.packTensor(x);
const packedMean = this.packTensor(mean);
const packedVariance = this.packTensor(variance);

const packedInputs = [packedX, packedMean, packedVariance];

let offsetShape = null;
if (offset != null) {
const packedOffset = this.packTensor(offset);
packedInputs.push(packedOffset);
offsetShape = packedOffset.shape;
}

let scaleShape = null;
if (scale != null) {
const packedScale = this.packTensor(scale);
packedInputs.push(packedScale);
scaleShape = packedScale.shape;
}

const batchNormProgram = new BatchNormPackedProgram(
packedX.shape, packedMean.shape, packedVariance.shape, offsetShape,
scaleShape, varianceEpsilon);
const batchNorm = this.compileAndRun(
batchNormProgram, packedInputs,
this.makePackedTensor<Tensor4D>(packedX.shape));

const unpacked = this.unpackTensor(batchNorm);

Environment.dispose([packedInputs, batchNorm]);

return unpacked;
}

batchNormalization(
x: Tensor4D, mean: Tensor4D|Tensor1D, variance: Tensor4D|Tensor1D,
varianceEpsilon: number, scale?: Tensor4D|Tensor1D,
offset?: Tensor4D|Tensor1D): Tensor4D {
if (ENV.get('WEBGL_PACK_BATCHNORMALIZATION')) {
return this.batchNormalizationPacked(
x, mean, variance, varianceEpsilon, scale, offset);
}

const inputs = [x, mean, variance];

let offsetShape = null;
Expand Down Expand Up @@ -1355,18 +1391,15 @@ export class MathBackendWebGL implements KernelBackend {
const x2ColShape = [sharedDim, numCols];

const xSqueezed = x.squeeze([0]);
const w2Row = filter.reshape([sharedDim, -1]);
const w2Row = filter.reshape([sharedDim, -1]) as Tensor2D;

const im2ColProgram =
new Im2ColProgram(x2ColShape, xSqueezed.shape, convInfo);
const im2Col = this.compileAndRun<Tensor2D>(
im2ColProgram, [xSqueezed],
this.makePackedTensor<Tensor2D>(x2ColShape));

const packedW2RowProgram = new PackProgram(w2Row.shape);
const packedW2Row = this.compileAndRun(
packedW2RowProgram, [w2Row],
this.makePackedTensor<Tensor2D>(w2Row.shape));
const packedW2Row = this.packTensor<Tensor2D>(w2Row);

const matmulProgram = new MatMulPackedProgram(
im2Col.shape, packedW2Row.shape, [numCols, convInfo.outChannels], true,
Expand All @@ -1375,8 +1408,7 @@ export class MathBackendWebGL implements KernelBackend {
matmulProgram, [im2Col, packedW2Row],
this.makePackedTensor<Tensor2D>(matmulProgram.outputShape));

const unpackProgram = new UnpackProgram(product.shape);
const unpacked = this.compileAndRun(unpackProgram, [product]) as Tensor;
const unpacked = this.unpackTensor(product);

im2Col.dispose();
packedW2Row.dispose();
Expand Down Expand Up @@ -1645,6 +1677,17 @@ export class MathBackendWebGL implements KernelBackend {
return packedTensor as T;
}

private packTensor<T extends Tensor>(x: T): T {
const packProgram = new PackProgram(x.shape);
return this.compileAndRun(
packProgram, [x], this.makePackedTensor(x.shape)) as T;
}

private unpackTensor<T extends Tensor>(x: T): T {
const unpackProgram = new UnpackProgram(x.shape);
return this.compileAndRun(unpackProgram, [x]) as T;
}

public compileAndRun<
K extends {dtype: DataType, size: number, dataId: {}, shape: number[]}>(
program: GPGPUProgram, inputs: TensorHandle[], output?: K,
Expand Down
39 changes: 39 additions & 0 deletions src/kernels/packing_util.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
/**
* @license
* Copyright 2018 Google LLC. All Rights Reserved.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* =============================================================================
*/

export function getChannels(name: string): string[] {
return ['x', 'y', 'z', 'w'].map(d => `${name}.${d}`);
}

export function getInnerDims(rank: number, dims: string[]): string[] {
return dims.slice(0, rank).slice(-2);
}

export function getSourceCoords(rank: number, dims: string[]): string {
if (rank === 1) {
return 'rc';
}

let coords = '';
for (let i = 0; i < rank; i++) {
coords += dims[i];
if (i < rank - 1) {
coords += ',';
}
}
return coords;
}
81 changes: 81 additions & 0 deletions src/kernels/webgl/batchnorm_packed_gpu.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
/**
* @license
* Copyright 2018 Google LLC. All Rights Reserved.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* =============================================================================
*/

import * as broadcast_util from '../../ops/broadcast_util';
import {GPGPUProgram} from './gpgpu_math';

export class BatchNormPackedProgram implements GPGPUProgram {
variableNames: string[];
outputShape: number[];
userCode: string;
supportsBroadcasting = true;

constructor(
xShape: number[], meanShape: number[], varianceShape: number[],
offsetShape: number[]|null, scaleShape: number[]|null,
varianceEpsilon: number) {
this.variableNames = ['x', 'mean', 'variance'];
broadcast_util.assertAndGetBroadcastShape(xShape, meanShape);
broadcast_util.assertAndGetBroadcastShape(xShape, varianceShape);

const meanSnippet = broadcastSample('mean', meanShape.length);
const varianceSnippet = broadcastSample('variance', varianceShape.length);

let offsetSnippet = 'vec4 offset = vec4(0.0)';
if (offsetShape != null) {
broadcast_util.assertAndGetBroadcastShape(xShape, offsetShape);
this.variableNames.push('offset');
offsetSnippet = broadcastSample('offset', offsetShape.length);
}

let scaleSnippet = 'vec4 scale = vec4(1.0)';
if (scaleShape != null) {
broadcast_util.assertAndGetBroadcastShape(xShape, scaleShape);
this.variableNames.push('scale');
scaleSnippet = broadcastSample('scale', scaleShape.length);
}

this.outputShape = xShape;
this.userCode = `
void main() {
ivec4 rc = getOutputCoords();

${offsetSnippet};
${scaleSnippet};

vec4 x = getX(rc.x, rc.y, rc.z, rc.w);
${meanSnippet};
${varianceSnippet};

vec4 inv = scale * inversesqrt(variance + vec4(${varianceEpsilon}));

setOutput((x - mean) * inv + offset);
}
`;
}
}

function broadcastSample(texName: string, rank: number): string {
const texSampler = `get${texName.charAt(0).toUpperCase()}${texName.slice(1)}`;
if (rank === 1) {
return `
vec4 ${texName}Sample = ${texSampler}(rc.w);
vec4 ${texName} = vec4(${texName}Sample.xy, ${texName}Sample.xy);
`;
}
return `vec4 ${texName} = ${texSampler}(rc.x, rc.y, rc.z, rc.w)`;
}
2 changes: 1 addition & 1 deletion src/kernels/webgl/mulmat_packed_gpu.ts
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ export class MatMulPackedProgram implements GPGPUProgram {

void main() {
ivec2 rc = getOutputCoords();
gl_FragColor = dot2x2ARowBCol(rc);
setOutput(dot2x2ARowBCol(rc));
}
`;
}
Expand Down
Loading