tensorflow · annxingyuan · Oct 23, 2018 · Oct 18, 2018 · Oct 18, 2018 · Oct 18, 2018
diff --git a/src/environment.ts b/src/environment.ts
@@ -306,6 +306,8 @@ export class Environment {
           (typeof process.versions.node !== 'undefined');
     } else if (feature === 'IS_CHROME') {
       return isChrome();
+    } else if (feature === 'WEBGL_PACK_BATCHNORMALIZATION') {
+      return false;
     } else if (feature === 'WEBGL_CONV_IM2COL') {
       return false;
     } else if (feature === 'WEBGL_PAGING_ENABLED') {

diff --git a/src/environment_util.ts b/src/environment_util.ts
@@ -22,6 +22,8 @@ export interface Features {
   'IS_BROWSER'?: boolean;
   // Whether we are in the Node.js environment.
   'IS_NODE'?: boolean;
+  // Whether we will pack the batchnormalization op.
+  'WEBGL_PACK_BATCHNORMALIZATION'?: boolean;
   // Whether we will use the im2col algorithm to speed up convolutions.
   'WEBGL_CONV_IM2COL'?: boolean;
   // Whether we will perform memory paging.
@@ -79,6 +81,7 @@ export enum Type {
 export const URL_PROPERTIES: URLProperty[] = [
   {name: 'DEBUG', type: Type.BOOLEAN},
   {name: 'IS_BROWSER', type: Type.BOOLEAN},
+  {name: 'WEBGL_PACK_BATCHNORMALIZATION', type: Type.BOOLEAN},
   {name: 'WEBGL_CONV_IM2COL', type: Type.BOOLEAN},
   {name: 'WEBGL_MAX_TEXTURE_SIZE', type: Type.NUMBER},
   {name: 'WEBGL_PAGING_ENABLED', type: Type.BOOLEAN},

diff --git a/src/kernels/backend_webgl.ts b/src/kernels/backend_webgl.ts
@@ -16,7 +16,7 @@
  */
 
 import {MemoryInfo, TimingInfo} from '../engine';
-import {ENV} from '../environment';
+import {ENV, Environment} from '../environment';
 import {tidy} from '../globals';
 import {warn} from '../log';
 import * as array_ops_util from '../ops/array_ops_util';
@@ -44,6 +44,7 @@ import {topkImpl} from './topk_impl';
 import {ArgMinMaxProgram} from './webgl/argminmax_gpu';
 import {AvgPool2DBackpropProgram} from './webgl/avg_pool_backprop_gpu';
 import {BatchNormProgram} from './webgl/batchnorm_gpu';
+import {BatchNormPackedProgram} from './webgl/batchnorm_packed_gpu';
 import * as binaryop_complex_gpu from './webgl/binaryop_complex_gpu';
 import {BinaryOpComplexProgram} from './webgl/binaryop_complex_gpu';
 import * as binaryop_gpu from './webgl/binaryop_gpu';
@@ -587,15 +588,8 @@ export class MathBackendWebGL implements KernelBackend {
     if (a.shape[0] === 1 && b.shape[0] === 1) {
       const aSqueezed = a.as2D(a.shape[1], a.shape[2]);
       const bSqueezed = b.as2D(b.shape[1], b.shape[2]);
-      const packProgramA = new PackProgram(aSqueezed.shape);
-      const packedA = this.compileAndRun<Tensor2D>(
-          packProgramA, [aSqueezed],
-          this.makePackedTensor<Tensor2D>(aSqueezed.shape));
-
-      const packProgramB = new PackProgram(bSqueezed.shape);
-      const packedB = this.compileAndRun<Tensor2D>(
-          packProgramB, [bSqueezed],
-          this.makePackedTensor<Tensor2D>(bSqueezed.shape));
+      const packedA = this.packTensor(aSqueezed);
+      const packedB = this.packTensor(bSqueezed);
 
       const program = new MatMulPackedProgram(
           packedA.shape, packedB.shape, [outerShapeA, outerShapeB], transposeA,
@@ -604,8 +598,7 @@ export class MathBackendWebGL implements KernelBackend {
           program, [packedA, packedB],
           this.makePackedTensor<Tensor2D>(program.outputShape));
 
-      const unpackProgram = new UnpackProgram(result.shape);
-      const unpacked = this.compileAndRun(unpackProgram, [result]) as Tensor;
+      const unpacked = this.unpackTensor(result);
 
       packedA.dispose();
       packedB.dispose();
@@ -648,10 +641,53 @@ export class MathBackendWebGL implements KernelBackend {
     return this.compileAndRun(program, [a, b], output) as Tensor;
   }
 
+  batchNormalizationPacked(
+      x: Tensor4D, mean: Tensor4D|Tensor1D, variance: Tensor4D|Tensor1D,
+      varianceEpsilon: number, scale?: Tensor4D|Tensor1D,
+      offset?: Tensor4D|Tensor1D): Tensor4D {
+    const packedX = this.packTensor(x);
+    const packedMean = this.packTensor(mean);
+    const packedVariance = this.packTensor(variance);
+
+    const packedInputs = [packedX, packedMean, packedVariance];
+
+    let offsetShape = null;
+    if (offset != null) {
+      const packedOffset = this.packTensor(offset);
+      packedInputs.push(packedOffset);
+      offsetShape = packedOffset.shape;
+    }
+
+    let scaleShape = null;
+    if (scale != null) {
+      const packedScale = this.packTensor(scale);
+      packedInputs.push(packedScale);
+      scaleShape = packedScale.shape;
+    }
+
+    const batchNormProgram = new BatchNormPackedProgram(
+        packedX.shape, packedMean.shape, packedVariance.shape, offsetShape,
+        scaleShape, varianceEpsilon);
+    const batchNorm = this.compileAndRun(
+        batchNormProgram, packedInputs,
+        this.makePackedTensor<Tensor4D>(packedX.shape));
+
+    const unpacked = this.unpackTensor(batchNorm);
+
+    Environment.dispose([packedInputs, batchNorm]);
+
+    return unpacked;
+  }
+
   batchNormalization(
       x: Tensor4D, mean: Tensor4D|Tensor1D, variance: Tensor4D|Tensor1D,
       varianceEpsilon: number, scale?: Tensor4D|Tensor1D,
       offset?: Tensor4D|Tensor1D): Tensor4D {
+    if (ENV.get('WEBGL_PACK_BATCHNORMALIZATION')) {
+      return this.batchNormalizationPacked(
+          x, mean, variance, varianceEpsilon, scale, offset);
+    }
+
     const inputs = [x, mean, variance];
 
     let offsetShape = null;
@@ -1355,18 +1391,15 @@ export class MathBackendWebGL implements KernelBackend {
     const x2ColShape = [sharedDim, numCols];
 
     const xSqueezed = x.squeeze([0]);
-    const w2Row = filter.reshape([sharedDim, -1]);
+    const w2Row = filter.reshape([sharedDim, -1]) as Tensor2D;
 
     const im2ColProgram =
         new Im2ColProgram(x2ColShape, xSqueezed.shape, convInfo);
     const im2Col = this.compileAndRun<Tensor2D>(
         im2ColProgram, [xSqueezed],
         this.makePackedTensor<Tensor2D>(x2ColShape));
 
-    const packedW2RowProgram = new PackProgram(w2Row.shape);
-    const packedW2Row = this.compileAndRun(
-        packedW2RowProgram, [w2Row],
-        this.makePackedTensor<Tensor2D>(w2Row.shape));
+    const packedW2Row = this.packTensor<Tensor2D>(w2Row);
 
     const matmulProgram = new MatMulPackedProgram(
         im2Col.shape, packedW2Row.shape, [numCols, convInfo.outChannels], true,
@@ -1375,8 +1408,7 @@ export class MathBackendWebGL implements KernelBackend {
         matmulProgram, [im2Col, packedW2Row],
         this.makePackedTensor<Tensor2D>(matmulProgram.outputShape));
 
-    const unpackProgram = new UnpackProgram(product.shape);
-    const unpacked = this.compileAndRun(unpackProgram, [product]) as Tensor;
+    const unpacked = this.unpackTensor(product);
 
     im2Col.dispose();
     packedW2Row.dispose();
@@ -1645,6 +1677,17 @@ export class MathBackendWebGL implements KernelBackend {
     return packedTensor as T;
   }
 
+  private packTensor<T extends Tensor>(x: T): T {
+    const packProgram = new PackProgram(x.shape);
+    return this.compileAndRun(
+               packProgram, [x], this.makePackedTensor(x.shape)) as T;
+  }
+
+  private unpackTensor<T extends Tensor>(x: T): T {
+    const unpackProgram = new UnpackProgram(x.shape);
+    return this.compileAndRun(unpackProgram, [x]) as T;
+  }
+
   public compileAndRun<
       K extends {dtype: DataType, size: number, dataId: {}, shape: number[]}>(
       program: GPGPUProgram, inputs: TensorHandle[], output?: K,

diff --git a/src/kernels/packing_util.ts b/src/kernels/packing_util.ts
@@ -0,0 +1,39 @@
+/**
+ * @license
+ * Copyright 2018 Google LLC. All Rights Reserved.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * =============================================================================
+ */
+
+export function getChannels(name: string): string[] {
+  return ['x', 'y', 'z', 'w'].map(d => `${name}.${d}`);
+}
+
+export function getInnerDims(rank: number, dims: string[]): string[] {
+  return dims.slice(0, rank).slice(-2);
+}
+
+export function getSourceCoords(rank: number, dims: string[]): string {
+  if (rank === 1) {
+    return 'rc';
+  }
+
+  let coords = '';
+  for (let i = 0; i < rank; i++) {
+    coords += dims[i];
+    if (i < rank - 1) {
+      coords += ',';
+    }
+  }
+  return coords;
+}
diff --git a/src/kernels/webgl/batchnorm_packed_gpu.ts b/src/kernels/webgl/batchnorm_packed_gpu.ts
@@ -0,0 +1,81 @@
+/**
+ * @license
+ * Copyright 2018 Google LLC. All Rights Reserved.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * =============================================================================
+ */
+
+import * as broadcast_util from '../../ops/broadcast_util';
+import {GPGPUProgram} from './gpgpu_math';
+
+export class BatchNormPackedProgram implements GPGPUProgram {
+  variableNames: string[];
+  outputShape: number[];
+  userCode: string;
+  supportsBroadcasting = true;
+
+  constructor(
+      xShape: number[], meanShape: number[], varianceShape: number[],
+      offsetShape: number[]|null, scaleShape: number[]|null,
+      varianceEpsilon: number) {
+    this.variableNames = ['x', 'mean', 'variance'];
+    broadcast_util.assertAndGetBroadcastShape(xShape, meanShape);
+    broadcast_util.assertAndGetBroadcastShape(xShape, varianceShape);
+
+    const meanSnippet = broadcastSample('mean', meanShape.length);
+    const varianceSnippet = broadcastSample('variance', varianceShape.length);
+
+    let offsetSnippet = 'vec4 offset = vec4(0.0)';
+    if (offsetShape != null) {
+      broadcast_util.assertAndGetBroadcastShape(xShape, offsetShape);
+      this.variableNames.push('offset');
+      offsetSnippet = broadcastSample('offset', offsetShape.length);
+    }
+
+    let scaleSnippet = 'vec4 scale = vec4(1.0)';
+    if (scaleShape != null) {
+      broadcast_util.assertAndGetBroadcastShape(xShape, scaleShape);
+      this.variableNames.push('scale');
+      scaleSnippet = broadcastSample('scale', scaleShape.length);
+    }
+
+    this.outputShape = xShape;
+    this.userCode = `
+      void main() {
+        ivec4 rc = getOutputCoords();
+
+        ${offsetSnippet};
+        ${scaleSnippet};
+
+        vec4 x = getX(rc.x, rc.y, rc.z, rc.w);
+        ${meanSnippet};
+        ${varianceSnippet};
+
+        vec4 inv = scale * inversesqrt(variance + vec4(${varianceEpsilon}));
+
+        setOutput((x - mean) * inv + offset);
+      }
+    `;
+  }
+}
+
+function broadcastSample(texName: string, rank: number): string {
+  const texSampler = `get${texName.charAt(0).toUpperCase()}${texName.slice(1)}`;
+  if (rank === 1) {
+    return `
+      vec4 ${texName}Sample = ${texSampler}(rc.w);
+      vec4 ${texName} = vec4(${texName}Sample.xy, ${texName}Sample.xy);
+    `;
+  }
+  return `vec4 ${texName} = ${texSampler}(rc.x, rc.y, rc.z, rc.w)`;
+}
diff --git a/src/kernels/webgl/mulmat_packed_gpu.ts b/src/kernels/webgl/mulmat_packed_gpu.ts
@@ -52,7 +52,7 @@ export class MatMulPackedProgram implements GPGPUProgram {
 
       void main() {
         ivec2 rc = getOutputCoords();
-        gl_FragColor = dot2x2ARowBCol(rc);
+        setOutput(dot2x2ARowBCol(rc));
       }
     `;
   }