Skip to content

Commit

Permalink
Avoid unintended garbage collection of raw data in PreparedDictionary…
Browse files Browse the repository at this point in the history
…Impl (#190)

Motivation:

As described in #189 the garbage collection currently removes the data
from the dictionary, resulting in poor compression ratios, since no
dictionary was actually used.
The code to finalize the rawData already exists.

Modification:
Hold the reference on rawData in PreparedDictionaryImpl

Result:
Fixes #189.

---------

Co-authored-by: Benjamin Wollmer <benni@wollmer.dev>
Co-authored-by: Aayush Atharva <aayush@shieldblaze.com>
  • Loading branch information
3 people authored Nov 27, 2024
1 parent 160890b commit f280107
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ private static class PreparedDictionaryImpl implements PreparedDictionary {

private PreparedDictionaryImpl(ByteBuffer data, ByteBuffer rawData) {
this.data = data;
this.rawData = rawData;
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,16 @@
package com.aayushatharva.brotli4j.encoder;

import com.aayushatharva.brotli4j.Brotli4jLoader;
import com.aayushatharva.brotli4j.common.BrotliCommon;
import io.netty.buffer.ByteBuf;
import io.netty.buffer.ByteBufUtil;
import io.netty.buffer.Unpooled;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.ArrayList;

import static org.junit.jupiter.api.Assertions.assertArrayEquals;
import static org.junit.jupiter.api.Assertions.assertEquals;
Expand Down Expand Up @@ -77,4 +80,32 @@ void encodeModeEnumValues() {
assertEquals(Encoder.Mode.TEXT, Encoder.Mode.of(Encoder.Mode.TEXT.ordinal()));
assertEquals(Encoder.Mode.GENERIC, Encoder.Mode.of(Encoder.Mode.GENERIC.ordinal()));
}


@Test
void ensureDictionaryDataRemainsAfterGC() throws IOException, InterruptedException {
// We hard code the compressed data, since the dictionary could also be collected just before our first compression
final byte[] expectedCompression = new byte[]{27, 43, 0, -8, 37, 0, -62, -104, -40, -63, 0};
final String dictionaryData = "This is some data to be used as a dictionary";
final byte[] rawBytes = dictionaryData.getBytes(); // Use dictionary also as data to keep it small
final PreparedDictionary dic = Encoder.prepareDictionary(BrotliCommon.makeNative(dictionaryData.getBytes()), 0);

// Create gc pressure to trigger potential collection of dictionary data
ArrayList<Integer> hashes = new ArrayList<>();
for (int i = 0; i < 1_000_000; i++) {
String obj = String.valueOf(Math.random());
hashes.add(obj.hashCode());
}
hashes = null;
System.gc();

try (ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
BrotliOutputStream brotliOutputStream = new BrotliOutputStream(byteArrayOutputStream)) {
brotliOutputStream.attachDictionary(dic);
brotliOutputStream.write(rawBytes);
brotliOutputStream.close();
byteArrayOutputStream.close();
assertArrayEquals(expectedCompression, byteArrayOutputStream.toByteArray()); // Otherwise the GC already cleared the data
}
}
}

0 comments on commit f280107

Please sign in to comment.