diff --git a/src/kernels/backend_webgl.ts b/src/kernels/backend_webgl.ts index dbb5168db6..c5442de9dc 100644 --- a/src/kernels/backend_webgl.ts +++ b/src/kernels/backend_webgl.ts @@ -92,6 +92,7 @@ import {SegmentOpProgram} from './webgl/segment_gpu'; import {SelectProgram} from './webgl/select_gpu'; import {SliceProgram} from './webgl/slice_gpu'; import {StridedSliceProgram} from './webgl/strided_slice_gpu'; +import * as tex_util from './webgl/tex_util'; import {TextureData, TextureUsage} from './webgl/tex_util'; import {TextureManager} from './webgl/texture_manager'; import {TileProgram} from './webgl/tile_gpu'; @@ -301,7 +302,7 @@ export class MathBackendWebGL implements KernelBackend { return new Promise(resolve => subscribers.push(resolve)); } const texData = this.texData.get(dataId); - const {texture, values, texShape} = texData; + const {texture, values, texShape, isPacked, shape} = texData; if (values != null) { return this.convertAndCacheOnCPU(dataId); } @@ -316,8 +317,14 @@ export class MathBackendWebGL implements KernelBackend { } // Possibly copy the texture into a buffer before inserting a fence. - const bufferOrTexture = this.gpgpu.maybeCreateBufferFromTexture( - texture, texShape[0], texShape[1]); + let width = texShape[1]; + let height = texShape[0]; + if (isPacked) { + [width, height] = tex_util.getPackedMatrixTextureShapeWidthHeight( + texShape[0], texShape[1]); + } + const bufferOrTexture = + this.gpgpu.maybeCreateBufferFromTexture(texture, height, width); // Create a fence and wait for it to resolve. await this.gpgpu.createAndWaitForFence(); @@ -327,8 +334,18 @@ export class MathBackendWebGL implements KernelBackend { if (bufferOrTexture instanceof WebGLTexture) { vals = this.getValuesFromTexture(dataId); } else { - vals = this.gpgpu.downloadFloat32MatrixFromBuffer( - bufferOrTexture, texShape[0], texShape[1]); + if (isPacked) { + const batch = this.getBatchDim(shape); + let rows = 1, cols = 1; + if (shape.length) { + [rows, cols] = this.getRowsCols(shape); + } + vals = this.gpgpu.downloadPackedMatrixFromBuffer( + bufferOrTexture, batch, rows, cols, texShape[0], texShape[1]); + } else { + vals = this.gpgpu.downloadFloat32MatrixFromBuffer( + bufferOrTexture, texShape[0], texShape[1]); + } } const dTypeVals = this.convertAndCacheOnCPU(dataId, vals); @@ -1803,31 +1820,29 @@ export class MathBackendWebGL implements KernelBackend { let texData = this.texData.get(input.dataId); - if (texData.texture == null && - !(!texData.isPacked && program.usesPackedTextures) && - util.sizeFromShape(input.shape) <= - ENV.get('WEBGL_SIZE_UPLOAD_UNIFORM')) { - // Upload small tensors that live on the CPU as uniforms, not as - // textures. Do this only when the environment supports 32bit floats due - // to problems when comparing 16bit floats with 32bit floats. - // TODO(/~https://github.com/tensorflow/tfjs/issues/821): Make it possible - // for packed shaders to sample from uniforms. - return { - shape: input.shape, - texData: null, - isUniform: true, - uniformValues: this.readSync(input.dataId) as TypedArray - }; - - // TODO(annyuan): Revive this block once uploading to packed textures is - // fixed. + if (texData.texture == null) { + if (!(!texData.isPacked && program.usesPackedTextures) && + util.sizeFromShape(input.shape) <= + ENV.get('WEBGL_SIZE_UPLOAD_UNIFORM')) { + // Upload small tensors that live on the CPU as uniforms, not as + // textures. Do this only when the environment supports 32bit floats + // due to problems when comparing 16bit floats with 32bit floats. + // TODO(/~https://github.com/tensorflow/tfjs/issues/821): Make it + // possible for packed shaders to sample from uniforms. + return { + shape: input.shape, + texData: null, + isUniform: true, + uniformValues: this.readSync(input.dataId) as TypedArray + }; + } // This ensures that if a packed program's inputs have not yet been // uploaded to the GPU, they get uploaded as packed right off the bat. - // if (program.usesPackedTextures) { - // texData.isPacked = true; - // texData.shape = input.shape; - //} + if (program.usesPackedTextures) { + texData.isPacked = true; + texData.shape = input.shape; + } } else if (!!texData.isPacked !== !!program.usesPackedTextures) { let preProcessProgram: UnpackProgram|PackProgram; let processedInput: Tensor; diff --git a/src/kernels/webgl/gpgpu_context.ts b/src/kernels/webgl/gpgpu_context.ts index 4ae91f8f05..d635646a51 100644 --- a/src/kernels/webgl/gpgpu_context.ts +++ b/src/kernels/webgl/gpgpu_context.ts @@ -199,6 +199,14 @@ export class GPGPUContext { this.gl, rows, columns, this.textureConfig)); } + public downloadPackedMatrixFromBuffer( + buffer: WebGLBuffer, batch: number, rows: number, columns: number, + physicalRows: number, physicalCols: number): Float32Array { + return gpgpu_util.downloadPackedMatrixFromBuffer( + this.gl, buffer, batch, rows, columns, physicalRows, physicalCols, + this.textureConfig); + } + public downloadFloat32MatrixFromBuffer( buffer: WebGLBuffer, rows: number, columns: number): Float32Array { return gpgpu_util.downloadFloat32MatrixFromBuffer( diff --git a/src/kernels/webgl/gpgpu_util.ts b/src/kernels/webgl/gpgpu_util.ts index 15d6486a2d..821656be31 100644 --- a/src/kernels/webgl/gpgpu_util.ts +++ b/src/kernels/webgl/gpgpu_util.ts @@ -381,6 +381,26 @@ export function downloadByteEncodedFloatMatrixFromOutputTexture( return new Float32Array(downloadTarget.buffer); } +export function downloadPackedMatrixFromBuffer( + gl: WebGLRenderingContext, buffer: WebGLBuffer, batch: number, rows: number, + cols: number, physicalRows: number, physicalCols: number, + textureConfig: TextureConfig): Float32Array { + const gl2 = gl as WebGL2RenderingContext; + + const downloadTarget = + new Float32Array(tex_util.getPackedRGBAArraySizeFromMatrixShape( + physicalRows, physicalCols)); + + gl2.bindBuffer(gl.ARRAY_BUFFER, buffer); + gl2.getBufferSubData(gl.ARRAY_BUFFER, 0, downloadTarget); + gl2.bindBuffer(gl.ARRAY_BUFFER, null); + + const matrix = new Float32Array(util.sizeFromShape([batch, rows, cols])); + tex_util.decodeMatrixFromPackedRGBA( + downloadTarget, batch, rows, cols, matrix); + return matrix; +} + export function downloadMatrixFromPackedOutputTexture( gl: WebGLRenderingContext, batch: number, rows: number, cols: number, physicalRows: number, physicalCols: number, diff --git a/src/tensor_test.ts b/src/tensor_test.ts index 9dff97b3b3..c1d684f8d0 100644 --- a/src/tensor_test.ts +++ b/src/tensor_test.ts @@ -108,6 +108,12 @@ describeWithFlags('tensor', ALL_ENVS, () => { expectArraysClose(await a.data(), new Float32Array([1, 2, 3, 4, 5, 6])); }); + it('Tensor.data() packed CPU --> GPU', async () => { + const a = tf.tensor2d([1, 2, 3, 4, 5, 6], [3, 2]); + tf.matMul(a, tf.tensor2d([1, 2], [2, 1])); + expectArraysClose(await a.data(), new Float32Array([1, 2, 3, 4, 5, 6])); + }); + it('Scalar basic methods', () => { const a = tf.scalar(5); expectNumbersClose(a.get(), 5);