Skip to content

Commit

Permalink
Add lzma test
Browse files Browse the repository at this point in the history
  • Loading branch information
cmdcolin committed Oct 27, 2023
1 parent d061b7a commit 9ba2250
Show file tree
Hide file tree
Showing 10 changed files with 20,940 additions and 22 deletions.
5 changes: 4 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,11 @@
"buffer-crc32": "^0.2.13",
"bzip2": "^0.1.1",
"long": "^4.0.0",
"lzma-native": "^8.0.6",
"md5": "^2.2.1",
"pako": "^1.0.4",
"quick-lru": "^4.0.1"
"quick-lru": "^4.0.1",
"xz-decompress": "^0.2.1"
},
"devDependencies": {
"@babel/plugin-transform-modules-commonjs": "^7.18.2",
Expand All @@ -59,6 +61,7 @@
"@types/buffer-crc32": "^0.2.2",
"@types/jest": "^29.2.4",
"@types/long": "^4.0.2",
"@types/lzma-native": "^4.0.3",
"@types/md5": "^2.3.2",
"@types/pako": "^1.0.3",
"@typescript-eslint/eslint-plugin": "^5.46.1",
Expand Down
10 changes: 8 additions & 2 deletions src/cramFile/file.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import crc32 from 'buffer-crc32'
import QuickLRU from 'quick-lru'
// @ts-expect-error
import bzip2 from 'bzip2'
import lzma from 'lzma-native'

import { CramMalformedError, CramUnimplementedError } from '../errors'
import ransuncompress from '../rans'
Expand Down Expand Up @@ -319,7 +320,7 @@ export default class CramFile {
return data
}

_uncompress(
async _uncompress(
compressionMethod: CompressionMethod,
inputBuffer: Buffer,
outputBuffer: Buffer,
Expand All @@ -340,6 +341,11 @@ export default class CramFile {
size -= chunk.length
}
} while (chunk != -1)
} else if (compressionMethod === 'lzma') {
// https://github.com/addaleax/lzma-native#encoding-strings-and-buffer-objects
// @ts-expect-error @types/lzma-native says return type void but it seems promise
const res = (await lzma.decompress(inputBuffer)) as Buffer
res.copy(outputBuffer)
} else if (compressionMethod === 'rans') {
ransuncompress(inputBuffer, outputBuffer)
//htscodecs r4x8 is slower, but compatible.
Expand Down Expand Up @@ -386,7 +392,7 @@ export default class CramFile {
blockContentPosition,
)

this._uncompress(
await this._uncompress(
blockHeader.compressionMethod,
compressedData,
uncompressedData,
Expand Down
Binary file added test/data/hts-specs/0902_comp_bz2.cram
Binary file not shown.
7 changes: 7 additions & 0 deletions test/data/hts-specs/0902_comp_bz2.sam
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
@SQ SN:CHROMOSOME_I LN:1009800 M5:8ede36131e0dbf3417807e48f77f3ebd UR:/nfs/users/nfs_j/jkb/work/samtools_master/hts-specs/test/CRAM/passed/ce.fa
@RG ID:rg SM:test
@RG ID:rg2 SM:test
r1 99 CHROMOSOME_I 1000 40 100M = 1200 300 ATTTTTCGGGTTTTTTGAAATGAATATCGTAGCTACAGAAACGGTTGTGCGNGCATCTGAAAGTTTGTTTTTCTTGTTTTCTTGCACTTTGTGCAGAATT #############################@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC RG:Z:rg
r1 147 CHROMOSOME_I 1200 40 100M = 1000 -300 TTTTTTTAGAAAAATTATTTTTAAGAATTTTTCATTTTAGGAATATTGTTCNCTCAGAAAATAGCTAAATGTGATTTCTGTAATTTTGCCTGCCAAATTC #############################@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC RG:Z:rg
r2 99 CHROMOSOME_I 1000 40 100M = 1200 300 ATTTTTCGGGTTTTTTGAAATGAATATCGTAGCTACAGAAACGGTTGTGCGNGCATCTGAAAGTTTGTTTTTCTTGTTTTCTTGCACTTTGTGCAGAATT #############################@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC RG:Z:rg2
r2 147 CHROMOSOME_I 1200 40 100M = 1000 -300 TTTTTTTAGAAAAATTATTTTTAAGAATTTTTCATTTTAGGAATATTGTTCNCTCAGAAAATAGCTAAATGTGATTTCTGTAATTTTGCCTGCCAAATTC #############################@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC RG:Z:rg2
Binary file added test/data/hts-specs/0903_comp_lzma.cram
Binary file not shown.
7 changes: 7 additions & 0 deletions test/data/hts-specs/0903_comp_lzma.sam
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
@SQ SN:CHROMOSOME_I LN:1009800 M5:8ede36131e0dbf3417807e48f77f3ebd UR:/nfs/users/nfs_j/jkb/work/samtools_master/hts-specs/test/CRAM/passed/ce.fa
@RG ID:rg SM:test
@RG ID:rg2 SM:test
r1 99 CHROMOSOME_I 1000 40 100M = 1200 300 ATTTTTCGGGTTTTTTGAAATGAATATCGTAGCTACAGAAACGGTTGTGCGNGCATCTGAAAGTTTGTTTTTCTTGTTTTCTTGCACTTTGTGCAGAATT #############################@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC RG:Z:rg
r1 147 CHROMOSOME_I 1200 40 100M = 1000 -300 TTTTTTTAGAAAAATTATTTTTAAGAATTTTTCATTTTAGGAATATTGTTCNCTCAGAAAATAGCTAAATGTGATTTCTGTAATTTTGCCTGCCAAATTC #############################@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC RG:Z:rg
r2 99 CHROMOSOME_I 1000 40 100M = 1200 300 ATTTTTCGGGTTTTTTGAAATGAATATCGTAGCTACAGAAACGGTTGTGCGNGCATCTGAAAGTTTGTTTTTCTTGTTTTCTTGCACTTTGTGCAGAATT #############################@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC RG:Z:rg2
r2 147 CHROMOSOME_I 1200 40 100M = 1000 -300 TTTTTTTAGAAAAATTATTTTTAAGAATTTTTCATTTTAGGAATATTGTTCNCTCAGAAAATAGCTAAATGTGATTTCTGTAATTTTGCCTGCCAAATTC #############################@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC RG:Z:rg2
20,803 changes: 20,803 additions & 0 deletions test/data/hts-specs/ce.fa

Large diffs are not rendered by default.

7 changes: 7 additions & 0 deletions test/data/hts-specs/ce.fa.fai
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
CHROMOSOME_I 1009800 14 50 51
CHROMOSOME_II 5000 1030025 50 51
CHROMOSOME_III 5000 1035141 50 51
CHROMOSOME_IV 5000 1040256 50 51
CHROMOSOME_V 5000 1045370 50 51
CHROMOSOME_X 5000 1050484 50 51
CHROMOSOME_MtDNA 5000 1055602 50 51
67 changes: 50 additions & 17 deletions test/dump.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -61,25 +61,58 @@ describe('dumping cram files', () => {
}, 10000)
})
})
test('works with hard clipping', async () => {
const fasta = new FetchableSmallFasta(testDataFile('volvox.fa'))
const seqFetch = fasta.fetch.bind(fasta)
const file = new CramFile({
filehandle: testDataFile('hard_clipping.cram'),
seqFetch,
})
const fileData = await dumpWholeFile(file)
const feat = fileData[2].data[1].features[0]
const hardClip = feat.readFeatures[0]
const nextReadFeature = feat.readFeatures[0]
expect(hardClip.refPos).toEqual(737)
expect(nextReadFeature.refPos).toEqual(737)
expect(hardClip.refPos).toEqual(feat.alignmentStart)
expect(hardClip.pos).toEqual(1)
expect(hardClip.data).toEqual(803)
})

test('lzma', async () => {
const fasta = new FetchableSmallFasta(testDataFile('hts-specs/ce.fa'))
const seqFetch = fasta.fetch.bind(fasta)
const file = new CramFile({
filehandle: testDataFile('hts-specs/0903_comp_lzma.cram'),
seqFetch,
})
const fileData = await dumpWholeFile(file)
const feat = fileData[2].data[1].features[0]
const hardClip = feat.readFeatures[0]
const nextReadFeature = feat.readFeatures[0]
expect(hardClip.refPos).toEqual(1050)
expect(nextReadFeature.refPos).toEqual(1050)
expect(hardClip.refPos).toEqual(1050)
expect(hardClip.pos).toEqual(51)
expect(hardClip.data).toEqual(1)
})

describe('works with hard clipping', () => {
it('hard clipped volvox data file', async () => {
const fasta = new FetchableSmallFasta(testDataFile('volvox.fa'))
const seqFetch = fasta.fetch.bind(fasta)
const file = new CramFile({
filehandle: testDataFile('hard_clipping.cram'),
seqFetch,
})
const fileData = await dumpWholeFile(file)
const feat = fileData[2].data[1].features[0]
const hardClip = feat.readFeatures[0]
const nextReadFeature = feat.readFeatures[0]
expect(hardClip.refPos).toEqual(737)
expect(nextReadFeature.refPos).toEqual(737)
expect(hardClip.refPos).toEqual(feat.alignmentStart)
expect(hardClip.pos).toEqual(1)
expect(hardClip.data).toEqual(803)
test('bzip2', async () => {
const fasta = new FetchableSmallFasta(testDataFile('hts-specs/ce.fa'))
const seqFetch = fasta.fetch.bind(fasta)
const file = new CramFile({
filehandle: testDataFile('hts-specs/0902_comp_bz2.cram'),
seqFetch,
})
const fileData = await dumpWholeFile(file)
const feat = fileData[2].data[1].features[0]
const hardClip = feat.readFeatures[0]
const nextReadFeature = feat.readFeatures[0]
expect(hardClip.refPos).toEqual(737)
expect(nextReadFeature.refPos).toEqual(737)
expect(hardClip.refPos).toEqual(feat.alignmentStart)
expect(hardClip.pos).toEqual(1)
expect(hardClip.data).toEqual(803)
})

function isIterable(input) {
Expand Down
56 changes: 54 additions & 2 deletions yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -911,6 +911,13 @@
resolved "https://registry.yarnpkg.com/@types/long/-/long-4.0.2.tgz#b74129719fc8d11c01868010082d483b7545591a"
integrity sha512-MqTGEo5bj5t157U6fA/BiDynNkn0YknVdh48CMPkTSpFTVmvao5UQmm7uEF6xBEo7qIMAlY/JSleYaE6VOdpaA==

"@types/lzma-native@^4.0.3":
version "4.0.3"
resolved "https://registry.yarnpkg.com/@types/lzma-native/-/lzma-native-4.0.3.tgz#88fc4eef19f9997f9fea2f53e678577c90fc24a7"
integrity sha512-jePSWm7fQF8ooItptNqiwUHRYfUPt/WGBum4SOraDcogKlWyugk4/gqde6twAU25G5LQEpxawVVJypXPzU5EgA==
dependencies:
"@types/node" "*"

"@types/md5@^2.3.2":
version "2.3.2"
resolved "https://registry.yarnpkg.com/@types/md5/-/md5-2.3.2.tgz#529bb3f8a7e9e9f621094eb76a443f585d882528"
Expand Down Expand Up @@ -2815,7 +2822,7 @@ inflight@^1.0.4:
once "^1.3.0"
wrappy "1"

inherits@2:
inherits@2, inherits@^2.0.3:
version "2.0.4"
resolved "https://registry.yarnpkg.com/inherits/-/inherits-2.0.4.tgz#0fa2c64f932917c3433a0ded55363aae37416b7c"
integrity sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==
Expand Down Expand Up @@ -3645,6 +3652,15 @@ lru-cache@^6.0.0:
resolved "https://registry.yarnpkg.com/lru-cache/-/lru-cache-10.0.0.tgz#b9e2a6a72a129d81ab317202d93c7691df727e61"
integrity sha512-svTf/fzsKHffP42sujkO/Rjs37BCIsQVRCeNYIm9WN8rgT7ffoUnRtZCqU+6BqcSBdv8gwJeTz8knJpgACeQMw==

lzma-native@^8.0.6:
version "8.0.6"
resolved "https://registry.yarnpkg.com/lzma-native/-/lzma-native-8.0.6.tgz#3ea456209d643bafd9b5d911781bdf0b396b2665"
integrity sha512-09xfg67mkL2Lz20PrrDeNYZxzeW7ADtpYFbwSQh9U8+76RIzx5QsJBMy8qikv3hbUPfpy6hqwxt6FcGK81g9AA==
dependencies:
node-addon-api "^3.1.0"
node-gyp-build "^4.2.1"
readable-stream "^3.6.0"

magic-string@^0.30.0:
version "0.30.0"
resolved "https://registry.yarnpkg.com/magic-string/-/magic-string-0.30.0.tgz#fd58a4748c5c4547338a424e90fa5dd17f4de529"
Expand Down Expand Up @@ -4232,6 +4248,16 @@ neo-async@^2.6.2:
resolved "https://registry.yarnpkg.com/neo-async/-/neo-async-2.6.2.tgz#b4aafb93e3aeb2d8174ca53cf163ab7d7308305f"
integrity sha512-Yd3UES5mWCSqR+qNT93S3UoYUkqAZ9lLg8a7g9rimsWmYGK8cVToA4/sF3RrshdyV3sAGMXVUmpMYOw+dLpOuw==

node-addon-api@^3.1.0:
version "3.2.1"
resolved "https://registry.yarnpkg.com/node-addon-api/-/node-addon-api-3.2.1.tgz#81325e0a2117789c0128dab65e7e38f07ceba161"
integrity sha512-mmcei9JghVNDYydghQmeDX8KoAm0FAiYyIcUt/N4nhyAipB17pllZQDOJD2fotxABnt4Mdz+dKTO7eftLg4d0A==

node-gyp-build@^4.2.1:
version "4.6.1"
resolved "https://registry.yarnpkg.com/node-gyp-build/-/node-gyp-build-4.6.1.tgz#24b6d075e5e391b8d5539d98c7fc5c210cac8a3e"
integrity sha512-24vnklJmyRS8ViBNI8KbtK/r/DmXQMRiOMXTNz2nrTnAYUwjmEEbnnpB/+kt+yWRv73bPsSPRFddrcIbAxSiMQ==

node-int64@^0.4.0:
version "0.4.0"
resolved "https://registry.yarnpkg.com/node-int64/-/node-int64-0.4.0.tgz#87a9065cdb355d3182d8f94ce11188b825c68a3b"
Expand Down Expand Up @@ -4609,6 +4635,15 @@ read-pkg@^7.1.0:
parse-json "^5.2.0"
type-fest "^2.0.0"

readable-stream@^3.6.0:
version "3.6.2"
resolved "https://registry.yarnpkg.com/readable-stream/-/readable-stream-3.6.2.tgz#56a9b36ea965c00c5a93ef31eb111a0f11056967"
integrity sha512-9u/sniCrY3D5WdsERHzHE4G2YCXqoG5FTHUiCC4SIbr6XcLZBY05ya9EKjYek9O5xOAwjGq+1JdGBAS7Q9ScoA==
dependencies:
inherits "^2.0.3"
string_decoder "^1.1.1"
util-deprecate "^1.0.1"

readdirp@~3.6.0:
version "3.6.0"
resolved "https://registry.yarnpkg.com/readdirp/-/readdirp-3.6.0.tgz#74a370bd857116e245b29cc97340cd431a02a6c7"
Expand Down Expand Up @@ -4768,7 +4803,7 @@ sade@^1.7.3:
dependencies:
mri "^1.1.0"

safe-buffer@^5.1.0:
safe-buffer@^5.1.0, safe-buffer@~5.2.0:
version "5.2.1"
resolved "https://registry.yarnpkg.com/safe-buffer/-/safe-buffer-5.2.1.tgz#1eaf9fa9bdb1fdd4ec75f58f9cdb4e6b7827eec6"
integrity sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==
Expand Down Expand Up @@ -4980,6 +5015,13 @@ string.prototype.trimstart@^1.0.6:
define-properties "^1.1.4"
es-abstract "^1.20.4"

string_decoder@^1.1.1:
version "1.3.0"
resolved "https://registry.yarnpkg.com/string_decoder/-/string_decoder-1.3.0.tgz#42f114594a46cf1a8e30b0a84f56c78c3edac21e"
integrity sha512-hkRX8U1WjJFd8LsDJ2yQ/wWWxaopEsABU1XfkM8A+j0+85JAGppt16cr1Whg6KIbb4okU6Mql6BOj+uup/wKeA==
dependencies:
safe-buffer "~5.2.0"

stringify-entities@^4.0.0:
version "4.0.3"
resolved "https://registry.yarnpkg.com/stringify-entities/-/stringify-entities-4.0.3.tgz#cfabd7039d22ad30f3cc435b0ca2c1574fc88ef8"
Expand Down Expand Up @@ -5303,6 +5345,11 @@ url@^0.11.0:
punycode "^1.4.1"
qs "^6.11.0"

util-deprecate@^1.0.1:
version "1.0.2"
resolved "https://registry.yarnpkg.com/util-deprecate/-/util-deprecate-1.0.2.tgz#450d4dc9fa70de732762fbd2d4a28981419a0ccf"
integrity sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw==

uvu@^0.5.0:
version "0.5.6"
resolved "https://registry.yarnpkg.com/uvu/-/uvu-0.5.6.tgz#2754ca20bcb0bb59b64e9985e84d2e81058502df"
Expand Down Expand Up @@ -5542,6 +5589,11 @@ write-file-atomic@^4.0.2:
imurmurhash "^0.1.4"
signal-exit "^3.0.7"

xz-decompress@^0.2.1:
version "0.2.1"
resolved "https://registry.yarnpkg.com/xz-decompress/-/xz-decompress-0.2.1.tgz#0b1518a3faacf2d983abdb79a4aacb1d8f557c9d"
integrity sha512-vgpc2zPchALa6D2lc+IFRrcAB3L4I40KmgUcPV7G70CfeMWzVdmlfDpyEczvsE435WgtH6WYYT/11Um57u7s6A==

y18n@^5.0.5:
version "5.0.8"
resolved "https://registry.yarnpkg.com/y18n/-/y18n-5.0.8.tgz#7f4934d0f7ca8c56f95314939ddcd2dd91ce1d55"
Expand Down

0 comments on commit 9ba2250

Please sign in to comment.