diff --git a/package.json b/package.json index 687bcc87..86bf7ea0 100644 --- a/package.json +++ b/package.json @@ -43,7 +43,6 @@ ], "dependencies": { "@gmod/abortable-promise-cache": "^2.0.0", - "@gmod/binary-parser": "^1.3.5", "@jkbonfield/htscodecs": "^0.5.1", "buffer-crc32": "^1.0.0", "bzip2": "^0.1.1", diff --git a/src/cramFile/sectionParsers.ts b/src/cramFile/sectionParsers.ts index 3198b132..a778d51c 100644 --- a/src/cramFile/sectionParsers.ts +++ b/src/cramFile/sectionParsers.ts @@ -1,11 +1,8 @@ -import { Parser } from '@gmod/binary-parser' import { TupleOf } from '../typescript' import { ParsedItem, parseItf8, parseLtf8 } from './util' import { DataSeriesEncodingMap } from './codecs/dataSeriesTypes' import { CramEncoding } from './encoding' -const singleItf8 = new Parser().itf8() - export function getCramFileDefinition(buffer: Buffer, startOffset = 0) { const b = buffer.subarray(startOffset) const dataView = new DataView(b.buffer, b.byteOffset, b.length) @@ -132,11 +129,6 @@ export function getCramTagDictionary( } } -// const cramPreservationMapKeys = 'XX RN AP RR SM TD'.split(' ') -const parseByteAsBool = new Parser().uint8(null, { - formatter: /* istanbul ignore next */ val => !!val, -}) - export interface CramPreservationMap { MI: boolean UI: boolean @@ -633,7 +625,7 @@ function getCramCompressionHeader( } } -function cramContainerHeader1( +function getCramContainerHeader1( majorVersion: number, buffer: Buffer, dataView: DataView, @@ -642,7 +634,8 @@ function cramContainerHeader1( // byte size of the container data (blocks) const length = dataView.getInt32(offset) offset += 4 - // reference sequence identifier, -1 for unmapped reads, -2 for multiple reference sequences + // reference sequence identifier, -1 for unmapped reads, -2 for multiple + // reference sequences const [refSeqId, newOffset1] = parseItf8(buffer, offset) offset += newOffset1 const [refSeqStart, newOffset2] = parseItf8(buffer, offset) @@ -668,17 +661,64 @@ function cramContainerHeader1( let numBases if (majorVersion > 1) { - const [n, newOffset5] = parseItf8(buffer, offset) + const [n, newOffset5] = parseLtf8(buffer, offset) numBases = n offset += newOffset5 maxLength += 9 } - parser = parser - .itf8('numBlocks') // the number of blocks - .itf8('numLandmarks') // the number of landmarks + const [numBlocks, newOffset6] = parseItf8(buffer, offset) + offset += newOffset6 + const [numLandmarks, newOffset7] = parseItf8(buffer, offset) + offset += newOffset7 maxLength += 5 + 5 - return { parser, maxLength } + return { + value: { + length, + refSeqId, + refSeqStart, + alignmentSpan, + numBlocks, + numLandmarks, + numBases, + recordCounter, + numRecords, + }, + maxLength, + offset, + } +} + +function getCramContainerHeader2( + majorVersion: number, + buffer: Buffer, + dataView: DataView, + offset: number, +) { + const [numLandmarks, newOffset1] = parseItf8(buffer, offset) + offset += newOffset1 + const landmarks = [] + for (let i = 0; i < numLandmarks; i++) { + const [landmark, newOffset2] = parseItf8(buffer, offset) + offset += newOffset2 + landmarks.push(landmark) + } + + let crcLength = 0 + let crc32 + if (majorVersion >= 3) { + crc32 = dataView.getUint32(offset) + crcLength = 4 + } + return { + value: { + crc32, + numLandmarks, + landmarks, + }, + maxLength: 5 + numLandmarks * 5 + crcLength, + offset, + } } // each of these is a function of the major and minor version @@ -689,57 +729,8 @@ const versionedParsers = { getCramDataSeriesEncodingMap, getCramTagEncodingMap, getCramCompressionHeader, - - cramContainerHeader1(majorVersion: number) { - let parser = new Parser() - .int32('length') // byte size of the container data (blocks) - .itf8('refSeqId') // reference sequence identifier, -1 for unmapped reads, -2 for multiple reference sequences - .itf8('refSeqStart') // the alignment start position or 0 for unmapped reads - .itf8('alignmentSpan') // the length of the alignment or 0 for unmapped reads - .itf8('numRecords') // number of records in the container - let maxLength = 4 + 5 * 4 - - if (majorVersion >= 3) { - parser = parser.ltf8('recordCounter') // 1-based sequential index of records in the file/stream. - maxLength += 9 - } else if (majorVersion === 2) { - parser = parser.itf8('recordCounter') - maxLength += 5 - } - - if (majorVersion > 1) { - parser = parser.ltf8('numBases') // number of read bases - maxLength += 9 - } - parser = parser - .itf8('numBlocks') // the number of blocks - .itf8('numLandmarks') // the number of landmarks - maxLength += 5 + 5 - - return { parser, maxLength } - }, - - cramContainerHeader2(majorVersion: number) { - let parser = new Parser() - .itf8('numLandmarks') // the number of blocks - // Each integer value of this array is a byte offset - // into the blocks byte array. Landmarks are used for - // random access indexing. - .array('landmarks', { - type: new Parser().itf8(), - length: 'numLandmarks', - }) - - let crcLength = 0 - if (majorVersion >= 3) { - parser = parser.uint32('crc32') - crcLength = 4 - } - return { - parser, - maxLength: (numLandmarks: number) => 5 + numLandmarks * 5 + crcLength, - } - }, + getCramContainerHeader1, + getCramContainerHeader2, } export type CompressionMethod = diff --git a/src/typings/binary-parser.d.ts b/src/typings/binary-parser.d.ts deleted file mode 100644 index 4de8b1ab..00000000 --- a/src/typings/binary-parser.d.ts +++ /dev/null @@ -1,44 +0,0 @@ -declare module '@gmod/binary-parser' { - export interface Options { - stripNull?: boolean - formatter?: (item: any) => any - length?: number | string | ((this: { $parent: unknown }) => void) - } - - export class Parser { - public static start(): Parser - - public uint8(name?: string | null, options?: Options): Parser - - public itf8(name?: string | null, options?: Options): Parser - - public ltf8(name?: string | null, options?: Options): Parser - - public uint32(name?: string | null, options?: Options): Parser - - public int32(name?: string | null, options?: Options): Parser - - public buffer(name?: string | null, options?: Options): Parser - - public string(name?: string | null, options?: Options): Parser - - public namely(name: string): Parser - - public nest( - name?: string | null, - options?: { type: Parser | string } & Options, - ): Parser - - public choice( - name?: string | null, - options?: { tag: string; choices: any } & Options, - ): Parser - - public array( - name?: string | null, - options?: { type: string | Parser } & Options, - ): Parser - - parse(bytes: Buffer): { result: T; offset: number } - } -} diff --git a/test/binary-parser.test.ts b/test/binary-parser.test.ts deleted file mode 100644 index b7645905..00000000 --- a/test/binary-parser.test.ts +++ /dev/null @@ -1,60 +0,0 @@ -//@ts-nocheck -import { Parser } from '@gmod/binary-parser' - -import { parseItf8 } from '../src/cramFile/util' - -describe('binary-parser fork', () => { - describe('itf8', () => { - const ip = new Parser().itf8('val') - ;[ - [[0], { result: { val: 0 }, offset: 1 }], - [[0x80, 0xff], { result: { val: 255 }, offset: 2 }], - [[0xff, 0xff, 0xff, 0xff, 0x0f], { result: { val: -1 }, offset: 5 }], - [[0xff, 0xff, 0xff, 0xff, 0xff], { result: { val: -1 }, offset: 5 }], - [[0xff, 0xff, 0xff, 0xff, 0xfe], { result: { val: -2 }, offset: 5 }], - [[192, 170, 130, 140, 174], { result: { val: 43650 }, offset: 3 }], - ].forEach(([input, output]) => { - it(`can parse itf8 [${input.map(n => `0x${n.toString(16)}`)}] - -> ${output.result.val}`, () => { - expect(ip.parse(Buffer.from(input))).toEqual(output) - - const otherParseResult = parseItf8(Buffer.from(input), 0) - expect(otherParseResult[0]).toEqual(output.result.val) - expect(otherParseResult[1]).toEqual(output.offset) - }) - }) - it('can parse several itf8 numbers in a row', () => { - const p = new Parser().itf8('val1').itf8('val2').itf8('val3') - const data = [0x80, 0xff, 0xff, 0xff, 0xff, 0xff, 0x0f, 0] - expect(p.parse(Buffer.from(data))).toEqual({ - offset: 8, - result: { val1: 255, val2: -1, val3: 0 }, - }) - }) - }) - - describe('ltf8', () => { - const lp = new Parser().ltf8('val') - ;[ - [ - [0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff], - { result: { val: -1 }, offset: 9 }, - ], - [ - [0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfe], - { result: { val: -2 }, offset: 9 }, - ], - [[0x0], { result: { val: 0 }, offset: 1 }], - ].forEach(([input, output]) => { - it(`can parse ltf8 [${input.map(n => `0x${n.toString(16)}`)}] - -> ${output.result.val}`, () => { - expect(lp.parse(Buffer.from(input))).toEqual(output) - }) - }) - }) - - // describe('itf8 extended', () => { - // it('can parse several itf8 numbers in a row') - - // }) -}) diff --git a/yarn.lock b/yarn.lock index e02a6cc0..e43d15ac 100644 --- a/yarn.lock +++ b/yarn.lock @@ -336,13 +336,6 @@ long "^4.0.0" pako "^1.0.11" -"@gmod/binary-parser@^1.3.5": - version "1.4.2" - resolved "https://registry.yarnpkg.com/@gmod/binary-parser/-/binary-parser-1.4.2.tgz#54f50e7b3437cc0882bd8ba6ef7c8e664ae3e892" - integrity sha512-X/UI86z2l6+qhy3Biai9Mog5/PPT3HR8+xo6qALijQ6FpRovkd/OPWOgy2fZX3vMRuMcyZKUva7V24Iir8HfhQ== - dependencies: - long "^4.0.0" - "@gmod/indexedfasta@^2.1.0": version "2.1.1" resolved "https://registry.yarnpkg.com/@gmod/indexedfasta/-/indexedfasta-2.1.1.tgz#aa8eaf6be21f634f11da90dd8494f3df5ed7e50d"