From 93db4b124ab33ef6c6a8fa8b3815e143068f2bea Mon Sep 17 00:00:00 2001
From: lewuathe <lewuathe@me.com>
Date: Tue, 5 Feb 2019 23:13:04 +0900
Subject: [PATCH 1/3] Optimize addN op by limit of the number of textures

---
 src/kernels/backend_webgl.ts  | 24 ++++++++++++++---
 src/kernels/webgl/addn_gpu.ts | 51 +++++++++++++++++++++++++++++++++++
 2 files changed, 71 insertions(+), 4 deletions(-)
 create mode 100644 src/kernels/webgl/addn_gpu.ts
diff --git a/src/kernels/backend_webgl.ts b/src/kernels/backend_webgl.ts
index 7b174238af..aae176bc5d 100644
--- a/src/kernels/backend_webgl.ts
+++ b/src/kernels/backend_webgl.ts
@@ -110,6 +110,7 @@ import {UnaryOpPackedProgram} from './webgl/unaryop_packed_gpu';
 import {UnpackProgram} from './webgl/unpack_gpu';
 import * as webgl_util from './webgl/webgl_util';
 import {whereImpl} from './where_impl';
+import {AddNProgram} from './webgl/addn_gpu';
 
 type KernelInfo = {
   name: string; query: Promise<number>;
@@ -1335,11 +1336,26 @@ export class MathBackendWebGL implements KernelBackend {
   }
 
   addN<T extends Tensor>(tensors: T[]): T {
-    let res = tensors[0];
-    for (let i = 1; i < tensors.length; i++) {
-      res = this.add(res, tensors[i]) as T;
+    if (tensors.length === 1) {
+      return tensors[0];
     }
-    return res;
+
+    // Limit the number of uploaded textures for optimization.
+    if (tensors.length > ENV.get('WEBGL_MAX_TEXTURES_IN_SHADER')) {
+      const midIndex = Math.floor(tensors.length / 2);
+      const leftSide = this.addN(tensors.slice(0, midIndex));
+      const rightSide = this.addN(tensors.slice(midIndex));
+      return this.addN([leftSide, rightSide]);
+    }
+
+    const dtype = tensors
+        .map(t => t.dtype)
+        .reduce((d1, d2) => upcastType(d1, d2));
+    const shapes = tensors.map(t => t.shape);
+    // We can make sure shapes are identical in op level.
+    const program = new AddNProgram(tensors[0].shape, shapes);
+    const output = this.makeOutputArray(program.outputShape, dtype) as T;
+    return this.compileAndRun<T>(program, tensors, output);
   }
 
   subtract(a: Tensor, b: Tensor): Tensor {
diff --git a/src/kernels/webgl/addn_gpu.ts b/src/kernels/webgl/addn_gpu.ts
new file mode 100644
index 0000000000..780c6da287
--- /dev/null
+++ b/src/kernels/webgl/addn_gpu.ts
@@ -0,0 +1,51 @@
+/**
+ * @license
+ * Copyright 2017 Google Inc. All Rights Reserved.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * =============================================================================
+ */
+
+import {GPGPUProgram} from './gpgpu_math';
+
+export class AddNProgram implements GPGPUProgram {
+  variableNames: string[];
+  outputShape: number[] = [];
+  userCode: string;
+
+  constructor(outputShape: number[], shapes: number[][]) {
+    this.outputShape = outputShape;
+    this.variableNames = shapes.map((_, i) => `T${i}`);
+
+    const snippets: string[] = [];
+    // Get target elements from every input tensor.
+    this.variableNames.forEach(variable => {
+      snippets.push(
+        `float v${variable} = get${variable}AtOutCoords();`
+      );
+    });
+
+    // Calculate the sum of all elements.
+    const operation = this.variableNames.map(variable => {
+      return `v${variable}`;
+    }).join(' + ');
+
+    this.userCode = `
+      void main() {
+        ${snippets.join('\n        ')}
+
+        float result = ${operation};
+        setOutput(result);
+      }
+    `;
+  }
+}

From 7cff68a852e77061c1182638cb10659c885338cd Mon Sep 17 00:00:00 2001
From: lewuathe <lewuathe@me.com>
Date: Sun, 7 Apr 2019 22:13:40 +0900
Subject: [PATCH 2/3] Support packed operation for addN

---
 src/kernels/webgl/addn_packed_gpu.ts | 52 ++++++++++++++++++++++++++++
 src/kernels/webgl/backend_webgl.ts   | 10 ++++--
 2 files changed, 60 insertions(+), 2 deletions(-)
 create mode 100644 src/kernels/webgl/addn_packed_gpu.ts

diff --git a/src/kernels/webgl/addn_packed_gpu.ts b/src/kernels/webgl/addn_packed_gpu.ts
new file mode 100644
index 0000000000..b730e3fdfa
--- /dev/null
+++ b/src/kernels/webgl/addn_packed_gpu.ts
@@ -0,0 +1,52 @@
+/**
+ * @license
+ * Copyright 2017 Google Inc. All Rights Reserved.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * =============================================================================
+ */
+
+import {GPGPUProgram} from './gpgpu_math';
+
+export class AddNPackedProgram implements GPGPUProgram {
+  variableNames: string[];
+  outputShape: number[] = [];
+  userCode: string;
+  usesPackedTextures = true;
+
+  constructor(outputShape: number[], shapes: number[][]) {
+    this.outputShape = outputShape;
+    this.variableNames = shapes.map((_, i) => `T${i}`);
+
+    const snippets: string[] = [];
+    // Get target elements from every input tensor.
+    this.variableNames.forEach(variable => {
+      snippets.push(
+        `vec4 v${variable} = get${variable}AtOutCoords();`
+      );
+    });
+
+    // Calculate the sum of all elements.
+    const operation = this.variableNames.map(variable => {
+      return `v${variable}`;
+    }).join(' + ');
+
+    this.userCode = `
+      void main() {
+        ${snippets.join('\n        ')}
+
+        vec4 result = ${operation};
+        setOutput(result);
+      }
+    `;
+  }
+}
diff --git a/src/kernels/webgl/backend_webgl.ts b/src/kernels/webgl/backend_webgl.ts
index e61ed99054..4183152f11 100644
--- a/src/kernels/webgl/backend_webgl.ts
+++ b/src/kernels/webgl/backend_webgl.ts
@@ -121,6 +121,7 @@ import {UnaryOpPackedProgram} from './unaryop_packed_gpu';
 import {UnpackProgram} from './unpack_gpu';
 import {AddNProgram} from './addn_gpu';
 import * as webgl_util from './webgl_util';
+import {AddNPackedProgram} from './addn_packed_gpu';
 
 type KernelInfo = {
   name: string; query: Promise<number>;
@@ -1486,8 +1487,13 @@ export class MathBackendWebGL implements KernelBackend {
         .reduce((d1, d2) => upcastType(d1, d2));
     const shapes = tensors.map(t => t.shape);
     // We can make sure shapes are identical in op level.
-    const program = new AddNProgram(tensors[0].shape, shapes);
-    const output = this.makeOutputArray(program.outputShape, dtype) as T;
+    const usePackedOp = ENV.getBool('WEBGL_PACK_BINARY_OPERATIONS');
+    const program = usePackedOp ?
+        new AddNPackedProgram(tensors[0].shape, shapes) :
+        new AddNProgram(tensors[0].shape, shapes);
+    const output = usePackedOp ?
+        this.makePackedTensor(program.outputShape, dtype) as T :
+        this.makeOutputArray(program.outputShape, dtype) as T;
     return this.compileAndRun<T>(program, tensors, output);
   }
 

From 247632e7d3c21c2deea958322c1e4308eadc89f5 Mon Sep 17 00:00:00 2001
From: Lewuathe <lewuathe@me.com>
Date: Wed, 10 Apr 2019 20:10:31 +0900
Subject: [PATCH 3/3] Post review followup

---
 src/kernels/webgl/addn_gpu.ts        | 2 +-
 src/kernels/webgl/addn_packed_gpu.ts | 2 +-
 src/kernels/webgl/backend_webgl.ts   | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/kernels/webgl/addn_gpu.ts b/src/kernels/webgl/addn_gpu.ts
index 780c6da287..d33fcd326f 100644
--- a/src/kernels/webgl/addn_gpu.ts
+++ b/src/kernels/webgl/addn_gpu.ts
@@ -1,6 +1,6 @@
 /**
  * @license
- * Copyright 2017 Google Inc. All Rights Reserved.
+ * Copyright 2019 Google Inc. All Rights Reserved.
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
diff --git a/src/kernels/webgl/addn_packed_gpu.ts b/src/kernels/webgl/addn_packed_gpu.ts
index b730e3fdfa..b7b97266ea 100644
--- a/src/kernels/webgl/addn_packed_gpu.ts
+++ b/src/kernels/webgl/addn_packed_gpu.ts
@@ -1,6 +1,6 @@
 /**
  * @license
- * Copyright 2017 Google Inc. All Rights Reserved.
+ * Copyright 2019 Google Inc. All Rights Reserved.
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
diff --git a/src/kernels/webgl/backend_webgl.ts b/src/kernels/webgl/backend_webgl.ts
index 4183152f11..8605c8ca38 100644
--- a/src/kernels/webgl/backend_webgl.ts
+++ b/src/kernels/webgl/backend_webgl.ts
@@ -1487,7 +1487,7 @@ export class MathBackendWebGL implements KernelBackend {
         .reduce((d1, d2) => upcastType(d1, d2));
     const shapes = tensors.map(t => t.shape);
     // We can make sure shapes are identical in op level.
-    const usePackedOp = ENV.getBool('WEBGL_PACK_BINARY_OPERATIONS');
+    const usePackedOp = ENV.getBool('WEBGL_PACK');
     const program = usePackedOp ?
         new AddNPackedProgram(tensors[0].shape, shapes) :
         new AddNProgram(tensors[0].shape, shapes);