Skip to content

Commit

Permalink
More misc
Browse files Browse the repository at this point in the history
  • Loading branch information
cmdcolin committed Jul 31, 2024
1 parent b566395 commit ee4e6b7
Show file tree
Hide file tree
Showing 5 changed files with 57 additions and 178 deletions.
1 change: 0 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@
],
"dependencies": {
"@gmod/abortable-promise-cache": "^2.0.0",
"@gmod/binary-parser": "^1.3.5",
"@jkbonfield/htscodecs": "^0.5.1",
"buffer-crc32": "^1.0.0",
"bzip2": "^0.1.1",
Expand Down
123 changes: 57 additions & 66 deletions src/cramFile/sectionParsers.ts
Original file line number Diff line number Diff line change
@@ -1,11 +1,8 @@
import { Parser } from '@gmod/binary-parser'
import { TupleOf } from '../typescript'
import { ParsedItem, parseItf8, parseLtf8 } from './util'
import { DataSeriesEncodingMap } from './codecs/dataSeriesTypes'
import { CramEncoding } from './encoding'

const singleItf8 = new Parser().itf8()

export function getCramFileDefinition(buffer: Buffer, startOffset = 0) {
const b = buffer.subarray(startOffset)
const dataView = new DataView(b.buffer, b.byteOffset, b.length)
Expand Down Expand Up @@ -132,11 +129,6 @@ export function getCramTagDictionary(
}
}

// const cramPreservationMapKeys = 'XX RN AP RR SM TD'.split(' ')
const parseByteAsBool = new Parser().uint8(null, {
formatter: /* istanbul ignore next */ val => !!val,
})

export interface CramPreservationMap {
MI: boolean
UI: boolean
Expand Down Expand Up @@ -633,7 +625,7 @@ function getCramCompressionHeader(
}
}

function cramContainerHeader1(
function getCramContainerHeader1(
majorVersion: number,
buffer: Buffer,
dataView: DataView,
Expand All @@ -642,7 +634,8 @@ function cramContainerHeader1(
// byte size of the container data (blocks)
const length = dataView.getInt32(offset)
offset += 4
// reference sequence identifier, -1 for unmapped reads, -2 for multiple reference sequences
// reference sequence identifier, -1 for unmapped reads, -2 for multiple
// reference sequences
const [refSeqId, newOffset1] = parseItf8(buffer, offset)
offset += newOffset1
const [refSeqStart, newOffset2] = parseItf8(buffer, offset)
Expand All @@ -668,17 +661,64 @@ function cramContainerHeader1(

let numBases
if (majorVersion > 1) {
const [n, newOffset5] = parseItf8(buffer, offset)
const [n, newOffset5] = parseLtf8(buffer, offset)
numBases = n
offset += newOffset5
maxLength += 9
}
parser = parser
.itf8('numBlocks') // the number of blocks
.itf8('numLandmarks') // the number of landmarks
const [numBlocks, newOffset6] = parseItf8(buffer, offset)
offset += newOffset6
const [numLandmarks, newOffset7] = parseItf8(buffer, offset)
offset += newOffset7
maxLength += 5 + 5

return { parser, maxLength }
return {
value: {
length,
refSeqId,
refSeqStart,
alignmentSpan,
numBlocks,
numLandmarks,
numBases,
recordCounter,
numRecords,
},
maxLength,
offset,
}
}

function getCramContainerHeader2(
majorVersion: number,
buffer: Buffer,
dataView: DataView,
offset: number,
) {
const [numLandmarks, newOffset1] = parseItf8(buffer, offset)
offset += newOffset1
const landmarks = []
for (let i = 0; i < numLandmarks; i++) {
const [landmark, newOffset2] = parseItf8(buffer, offset)
offset += newOffset2
landmarks.push(landmark)
}

let crcLength = 0
let crc32
if (majorVersion >= 3) {
crc32 = dataView.getUint32(offset)
crcLength = 4
}
return {
value: {
crc32,
numLandmarks,
landmarks,
},
maxLength: 5 + numLandmarks * 5 + crcLength,
offset,
}
}

// each of these is a function of the major and minor version
Expand All @@ -689,57 +729,8 @@ const versionedParsers = {
getCramDataSeriesEncodingMap,
getCramTagEncodingMap,
getCramCompressionHeader,

cramContainerHeader1(majorVersion: number) {
let parser = new Parser()
.int32('length') // byte size of the container data (blocks)
.itf8('refSeqId') // reference sequence identifier, -1 for unmapped reads, -2 for multiple reference sequences
.itf8('refSeqStart') // the alignment start position or 0 for unmapped reads
.itf8('alignmentSpan') // the length of the alignment or 0 for unmapped reads
.itf8('numRecords') // number of records in the container
let maxLength = 4 + 5 * 4

if (majorVersion >= 3) {
parser = parser.ltf8('recordCounter') // 1-based sequential index of records in the file/stream.
maxLength += 9
} else if (majorVersion === 2) {
parser = parser.itf8('recordCounter')
maxLength += 5
}

if (majorVersion > 1) {
parser = parser.ltf8('numBases') // number of read bases
maxLength += 9
}
parser = parser
.itf8('numBlocks') // the number of blocks
.itf8('numLandmarks') // the number of landmarks
maxLength += 5 + 5

return { parser, maxLength }
},

cramContainerHeader2(majorVersion: number) {
let parser = new Parser()
.itf8('numLandmarks') // the number of blocks
// Each integer value of this array is a byte offset
// into the blocks byte array. Landmarks are used for
// random access indexing.
.array('landmarks', {
type: new Parser().itf8(),
length: 'numLandmarks',
})

let crcLength = 0
if (majorVersion >= 3) {
parser = parser.uint32('crc32')
crcLength = 4
}
return {
parser,
maxLength: (numLandmarks: number) => 5 + numLandmarks * 5 + crcLength,
}
},
getCramContainerHeader1,
getCramContainerHeader2,
}

export type CompressionMethod =
Expand Down
44 changes: 0 additions & 44 deletions src/typings/binary-parser.d.ts

This file was deleted.

60 changes: 0 additions & 60 deletions test/binary-parser.test.ts

This file was deleted.

7 changes: 0 additions & 7 deletions yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -336,13 +336,6 @@
long "^4.0.0"
pako "^1.0.11"

"@gmod/binary-parser@^1.3.5":
version "1.4.2"
resolved "https://registry.yarnpkg.com/@gmod/binary-parser/-/binary-parser-1.4.2.tgz#54f50e7b3437cc0882bd8ba6ef7c8e664ae3e892"
integrity sha512-X/UI86z2l6+qhy3Biai9Mog5/PPT3HR8+xo6qALijQ6FpRovkd/OPWOgy2fZX3vMRuMcyZKUva7V24Iir8HfhQ==
dependencies:
long "^4.0.0"

"@gmod/indexedfasta@^2.1.0":
version "2.1.1"
resolved "https://registry.yarnpkg.com/@gmod/indexedfasta/-/indexedfasta-2.1.1.tgz#aa8eaf6be21f634f11da90dd8494f3df5ed7e50d"
Expand Down

0 comments on commit ee4e6b7

Please sign in to comment.