Skip to content

Commit

Permalink
Use an even shorter expression for the typed array size.
Browse files Browse the repository at this point in the history
it was realized that the array size doesn't have to be exact.
we keep at most two significant digits of # contexts,
so we might use at most 10% more memory than it should (okay)
and end up using more memory than requested (not okay).
contextBits calculation has been adjusted to avoid this issue.
  • Loading branch information
lifthrasiir committed Sep 11, 2021
1 parent eb74630 commit c6a88ee
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 15 deletions.
2 changes: 1 addition & 1 deletion cli.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -309,7 +309,7 @@ async function compress({ inputs, options, optimize, outputPath, verbose }) {

if (verbose >= 1) {
console.warn(
`Actual memory usage: ${packer.memoryUsageMB < 1 ? '< 1' : packer.memoryUsageMB} MB` +
`Actual memory usage: ${packer.memoryUsageMB < 1 ? '< 1' : packer.memoryUsageMB.toFixed(1)} MB` +
(options.contextBits ? '' : ` (out of ${options.maxMemoryMB || 150} MB)`));
}

Expand Down
44 changes: 32 additions & 12 deletions index.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,25 @@ const ceilLog2 = (x, y = 1) => {
return n;
};

// returns [m, e, m * 10^e] where (m-1) * 10^e < v <= m * 10^e, m < 100 and m mod 10 != 0.
// therefore `${m}e${e}` is an upper bound approximation with ~2 significant digits.
const approximateWithTwoSigDigits = v => {
if (v <= 0) return [0, 0, 0]; // special case
let exp = 0;
let tens = 1;
while (v >= tens * 100) {
++exp;
tens *= 10;
}
let mant = Math.ceil(v / tens);
if (mant % 10 === 0) { // 60e6 -> 6e7
mant /= 10;
++exp;
tens *= 10;
}
return [mant, exp, mant * tens];
};

// Node.js 14 doesn't have a global performance object.
const getPerformanceObject = async () => {
return globalThis.performance || (await import('perf_hooks')).performance;
Expand Down Expand Up @@ -587,7 +606,14 @@ const countBytesPerContext = options => (options.modelMaxCount < 128 ? 1 : optio

const contextBitsFromMaxMemory = options => {
const bytesPerContext = predictionBytesPerContext(options) + countBytesPerContext(options);
return floorLog2(options.maxMemoryMB * 1048576, options.sparseSelectors.length * bytesPerContext);
let contextBits = floorLog2(options.maxMemoryMB * 1048576, options.sparseSelectors.length * bytesPerContext);

// the decoder slightly overallocates the memory (~1%) so a naive calculation can exceed the memory limit;
// recalculate the actual memory usage and decrease contextBits in that case.
const [, , actualNumContexts] = approximateWithTwoSigDigits(options.sparseSelectors.length << contextBits);
if (actualNumContexts * bytesPerContext > options.maxMemoryMB * 1048576) --contextBits;

return contextBits;
};

// String.fromCharCode(...array) is short but doesn't work when array.length is "long enough".
Expand Down Expand Up @@ -650,8 +676,9 @@ export class Packer {

get memoryUsageMB() {
const contextBits = this.options.contextBits || contextBitsFromMaxMemory(this.options);
const [, , numContexts] = approximateWithTwoSigDigits(this.options.sparseSelectors.length << contextBits);
const bytesPerContext = predictionBytesPerContext(this.options) + countBytesPerContext(this.options);
return this.options.sparseSelectors.length * bytesPerContext * (1 << contextBits) / 1048576;
return numContexts * bytesPerContext / 1048576;
}

static prepareText(inputs) {
Expand Down Expand Up @@ -959,16 +986,9 @@ export class Packer {
return 'θ';
};

let contextSize;
{
let v = numModels, shift = contextBits;
while (v % 2 == 0) {
v >>= 1;
++shift;
}
contextSize = `${v}<<${shift}`;
// we can also make use of θ, but that wouldn't work in the argument position
}
// only keep two significant digits, rounding up
const [contextMant, contextExp] = approximateWithTwoSigDigits(numModels << contextBits);
const contextSize = `${contextMant}e${contextExp}`;

// 0. first line
// ι: compressed data, where lowest 6 bits are used and higher bits are chosen to avoid escape sequences.
Expand Down
4 changes: 2 additions & 2 deletions tools/demo.html
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@
<aside id=$outputmessage></aside>
<footer><ul>
<li><label>Number of contexts: <input id=$numcontexts type=number value=12 min=1 max=64></label> <a href=#num-contexts title=Help>ℹ️</a>
<li><label>Maximum memory usage: <input id=$maxmemory type=number value=150 min=10 max=1024> MB (<span id=$usedmemory>96</span> MB in use)</label> <a href=#max-memory title=Help>ℹ️</a>
<li><label>Maximum memory usage: <input id=$maxmemory type=number value=150 min=10 max=1024> MB (<span id=$usedmemory>-</span> MB in use)</label> <a href=#max-memory title=Help>ℹ️</a>
<li><label><input type=checkbox id=$dirty> Allow the decoder to pollute the global scope</label> <a href=#dirty title=Help>ℹ️</a>
<li><button id=$reset>Reset parameters</button> <button id=$optimize>Optimize parameters</button> <a href=#optimize title=Help>ℹ️</a>
</ul><details><summary>Advanced configuration</summary><ul>
Expand Down Expand Up @@ -415,7 +415,7 @@ <h2>Command-line Usage and API</h2>

let prefix = '';
const packer = new Packer(inputs, options);
$usedmemory.textContent = packer.memoryUsageMB;
$usedmemory.textContent = packer.memoryUsageMB.toFixed(1);

if (optimize) {
if (lastOptimizationLevel < 2) ++lastOptimizationLevel;
Expand Down

0 comments on commit c6a88ee

Please sign in to comment.