tensorflow · mattsoulanille · Apr 20, 2023 · Apr 18, 2023 · Apr 18, 2023 · Apr 18, 2023
diff --git a/tfjs-core/src/io/weights_loader.ts b/tfjs-core/src/io/weights_loader.ts
@@ -18,6 +18,7 @@
 import {env} from '../environment';
 
 import {NamedTensorMap} from '../tensor_types';
+import {TypedArray} from '../types';
 import * as util from '../util';
 import {decodeWeights} from './io_utils';
 import {monitorPromisesProgress} from './progress';
@@ -212,24 +213,13 @@ export function weightsLoaderFactory(
     groupIndicesToFetch.forEach(i => {
       const numBuffers = manifest[i].paths.length;
 
-      let groupBytes = 0;
-      for (let i = 0; i < numBuffers; i++) {
-        groupBytes += buffers[bufferIndexOffset + i].byteLength;
-      }
-
-      // Create a buffer for the whole group.
-      const groupBuffer = new ArrayBuffer(groupBytes);
-      const groupByteBuffer = new Uint8Array(groupBuffer);
-      let groupBufferOffset = 0;
-      for (let i = 0; i < numBuffers; i++) {
-        const buffer = new Uint8Array(buffers[bufferIndexOffset + i]);
-        groupByteBuffer.set(buffer, groupBufferOffset);
-        groupBufferOffset += buffer.byteLength;
-      }
+      const weightsBuffer = new CompositeArrayBuffer(
+        buffers.slice(bufferIndexOffset, bufferIndexOffset + numBuffers));
 
       const weightsEntries = groupWeightsToFetch[i];
+
       weightsEntries.forEach(weightsEntry => {
-        const byteBuffer = groupBuffer.slice(
+        const byteBuffer = weightsBuffer.slice(
             weightsEntry.groupOffset,
             weightsEntry.groupOffset + weightsEntry.sizeBytes);
         const nameToTensorMap =
@@ -245,3 +235,179 @@ export function weightsLoaderFactory(
     return weightsTensorMap;
   };
 }
+
+type BufferRange = {
+  start: number,
+  end: number,
+  buffer: ArrayBuffer,
+};
+
+export class CompositeArrayBuffer {
+  private ranges: BufferRange[] = [];
+  private previousRangeIndex = 0;
+  private bufferUniformSize?: number;
+  public readonly byteLength: number;
+
+  constructor(buffers: ArrayBuffer | ArrayBuffer[] | TypedArray | TypedArray[]) {
+    // Normalize the `buffers` input to be `ArrayBuffer[]`.
+    if (!(buffers instanceof Array)) {
+      buffers = [buffers];
+    }
+    buffers = buffers.map((bufferOrTypedArray) => {
+      if (util.isTypedArray(bufferOrTypedArray)) {
+        return bufferOrTypedArray.buffer;
+      }
+      return bufferOrTypedArray;
+    });
+
+    // Skip setting up ranges if there are no buffers.
+    if (buffers.length === 0) {
+      return;
+    }
+
+    this.bufferUniformSize = buffers[0].byteLength;
+    let start = 0;
+
+    for (let i = 0; i < buffers.length; i++) {
+      const buffer = buffers[i];
+      // Check that all buffers except the last one have the same length.
+      if (i !== buffers.length - 1 &&
+        buffer.byteLength !== this.bufferUniformSize) {
+        // Unset the buffer uniform size, since the buffer sizes are not
+        // uniform.
+        this.bufferUniformSize = undefined;
+      }
+
+      // Create the ranges, including their start and end points.
+      const end = start + buffer.byteLength;
+      this.ranges.push({buffer, start, end,});
+      start = end;
+    }
+
+    // Set the byteLenghth
+    if (this.ranges.length === 0) {
+      this.byteLength = 0;
+    }
+    this.byteLength = this.ranges[this.ranges.length - 1].end;
+  }
+
+  slice(start = 0, end = this.byteLength): ArrayBuffer {
+    // NaN is treated as zero for slicing. This matches ArrayBuffer's behavior.
+    start = isNaN(start) ? 0 : start;
+    end = isNaN(end) ? 0 : end;
+
+    // Fix the bounds to within the array.
+    start = Math.max(0, start);
+    end = Math.min(this.byteLength, end);
+    if (end <= start) {
+      return new ArrayBuffer(0);
+    }
+
+    const startRangeIndex = this.findRangeForByte(start);
+    if (startRangeIndex === -1) {
+      // This should not happen since the start and end indices are always
+      // within 0 and the composite array's length.
+      throw new Error(`Could not find start range for byte ${start}`);
+    }
+
+    const size = end - start;
+    const outputBuffer = new ArrayBuffer(size);
+    const outputArray = new Uint8Array(outputBuffer);
+    let sliced = 0;
+    for (let i = startRangeIndex; i < this.ranges.length; i++) {
+      const range = this.ranges[i];
+
+      const globalStart = start + sliced;
+      const localStart = globalStart - range.start;
+      const outputStart = sliced;
+
+      const globalEnd = Math.min(end, range.end);
+      const localEnd = globalEnd - range.start;
+
+      const outputSlice = new Uint8Array(range.buffer.slice(localStart,
+                                                            localEnd));
+      outputArray.set(outputSlice, outputStart);
+      sliced += outputSlice.length;
+
+      if (end < range.end) {
+        break;
+      }
+    }
+    return outputBuffer;
+  }
+
+  /**
+   * Get the index of the range that contains the byte at `byteIndex`.
+   */
+  private findRangeForByte(byteIndex: number): number {
+    if (this.ranges.length === 0 || byteIndex < 0 ||
+      byteIndex >= this.byteLength) {
+      return -1;
+    }
+
+    // If the buffers have a uniform size, compute the range directly.
+    if (this.bufferUniformSize != null) {
+      this.previousRangeIndex = Math.floor(byteIndex / this.bufferUniformSize);
+      return this.previousRangeIndex;
+    }
+
+    // If the buffers don't have a uniform size, we need to search for the
+    // range. That means we need a function to check where the byteIndex lies
+    // relative to a given range.
+    function check(range: BufferRange) {
+      if (byteIndex < range.start) {
+        return -1;
+      }
+      if (byteIndex >= range.end) {
+        return 1;
+      }
+      return 0;
+    }
+
+    // For efficiency, try the previous range first.
+    if (check(this.ranges[this.previousRangeIndex]) === 0) {
+      return this.previousRangeIndex;
+    }
+
+    // Otherwise, use a generic search function.
+    // This should almost never end up being used in practice since the weight
+    // entries should always be in order.
+    const index = search(this.ranges, check);
+    if (index === -1) {
+      return -1;
+    }
+
+    this.previousRangeIndex = index;
+    return this.previousRangeIndex;
+  }
+}
+
+/**
+ * Search for an element of a sorted array.
+ *
+ * @param sortedArray The sorted array to search
+ * @param compare A function to compare the current value against the searched
+ *     value. Return 0 on a match, negative if the searched value is less than
+ *     the value passed to the function, and positive if the searched value is
+ *     greater than the value passed to the function.
+ * @returns The index of the element, or -1 if it's not in the array.
+ */
+function search<T>(sortedArray: T[], compare: (t: T) => number): number {
+  // Binary search
+  let min = 0;
+  let max = sortedArray.length;
+
+  while (min <= max) {
+    const middle = Math.floor((max - min) / 2);
+    const side = compare(sortedArray[middle]);
+
+    if (side === 0) {
+      return middle;
+    } else if (side < 0) {
+      max = middle;
+    } else {
+      min = middle + 1;
+    }
+  }
+  return -1;
+}
diff --git a/tfjs-core/src/io/weights_loader_test.ts b/tfjs-core/src/io/weights_loader_test.ts
@@ -18,6 +18,7 @@ import * as tf from '../index';
 import {BROWSER_ENVS, describeWithFlags} from '../jasmine_util';
 import {expectArraysClose, expectArraysEqual} from '../test_util';
 import {WeightsManifestConfig} from './types';
+import { CompositeArrayBuffer } from './weights_loader';
 
 describeWithFlags('loadWeights', BROWSER_ENVS, () => {
   const setupFakeWeightFiles = (fileBufferMap: {
@@ -403,7 +404,6 @@ describeWithFlags('loadWeights', BROWSER_ENVS, () => {
 
   it('throws if requested weight has unknown dtype', async () => {
     setupFakeWeightFiles({'./weightfile0': new Float32Array([1, 2, 3])});
-
     const manifest: WeightsManifestConfig = [{
       'paths': ['weightfile0'],
       'weights': [{
@@ -542,3 +542,92 @@ describeWithFlags('loadWeights', BROWSER_ENVS, () => {
     expect(weight2.dtype).toEqual('float32');
   });
 });
+
+describe('CompositeArrayBuffer', () => {
+  const uniformBuffers = [
+    new Uint8Array([0, 1, 2, 3]).buffer,
+    new Uint8Array([4, 5, 6, 7]).buffer,
+    new Uint8Array([8, 9, 10, 11]).buffer,
+    new Uint8Array([12, 13, 14, 15]).buffer,
+    new Uint8Array([16]).buffer,
+  ];
+
+  const nonUniformBuffers = [
+    new Uint8Array([0, 1, 2]).buffer,
+    new Uint8Array([3, 4, 5, 6, 7]).buffer,
+    new Uint8Array([8, 9, 10, 11]).buffer,
+    new Uint8Array([12, 13, 14, 15, 16]).buffer,
+  ];
+
+  const bufferTestCases = [
+    ['uniform', uniformBuffers],
+    ['non-uniform', nonUniformBuffers]
+  ] as const;
+
+  for (const [buffersType, buffers] of bufferTestCases) {
+    let composite: CompositeArrayBuffer;
+    beforeEach(() => {
+      composite = new CompositeArrayBuffer(buffers);
+    });
+
+    it(`${buffersType}: slices across multiple buffers`, () => {
+      expectArraysEqual(new Uint8Array(composite.slice(1, 13)),
+                        [1,2,3,4,5,6,7,8,9,10,11,12]);
+    });
+
+    it(`${buffersType}: slices to the end of the array when \'end\' is not ` +
+      'specified', () => {
+        expectArraysEqual(new Uint8Array(composite.slice(5)),
+                          [5,6,7,8,9,10,11,12,13,14,15,16]);
+      });
+
+    it(`${buffersType}: makes a copy when slice() is called with no arguments`,
+       () => {
+         expectArraysEqual(new Uint8Array(composite.slice()),
+                           [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]);
+       });
+
+    it(`${buffersType}: slices from zero when start is negative`, () => {
+      expectArraysEqual(new Uint8Array(composite.slice(-4, 5)),
+                        [0,1,2,3,4])
+    });
+
+    it(`${buffersType}: slices to the end when end is greater than length`,
+       () => {
+         expectArraysEqual(new Uint8Array(composite.slice(7, 1000)),
+                           [7,8,9,10,11,12,13,14,15,16]);
+       });
+  }
+
+  it('can be passed an empty arraybuffer', () => {
+    const array = new Uint8Array([]);
+    const singleComposite = new CompositeArrayBuffer(array.buffer);
+    expectArraysEqual(new Uint8Array(singleComposite.slice()), []);
+  });
+
+  it('can be created from a single array', () => {
+    const array = new Uint8Array([1,2,3]);
+    const singleComposite = new CompositeArrayBuffer(array.buffer);
+    expectArraysEqual(new Uint8Array(singleComposite.slice()), array);
+  });
+
+  it('treats NaN as zero when passed as the start of slice', () => {
+    const array = new Uint8Array([1,2,3]);
+    const composite = new CompositeArrayBuffer(array.buffer);
+    expectArraysEqual(new Uint8Array(composite.slice(NaN, 2)), [1,2]);
+  });
+
+  it('treats NaN as zero when passed as the end of slice', () => {
+    const array = new Uint8Array([1,2,3]);
+    const composite = new CompositeArrayBuffer(array.buffer);
+    expectArraysEqual(new Uint8Array(composite.slice(0, NaN)), []);
+  });
+
+  it('supports TypedArray input', () => {
+    // This support is necessary for some tests in tfjs-converter. Maybe those
+    // tests are misconfigured?
+    const array = new Uint8Array([1,2,3]);
+    const composite = new CompositeArrayBuffer(array);
+    expectArraysEqual(new Uint8Array(composite.slice(0, 2)), [1,2]);
+  });
+});