Skip to content

Commit

Permalink
Merge pull request #1663 from gchq/feature/backend-stream-files-to-mo…
Browse files Browse the repository at this point in the history
…delscan-api

Stream files to the ModelScan API endpoint rather than hold entire files in memory
  • Loading branch information
PE39806 authored Dec 6, 2024
2 parents 7f494c3 + 697d5e9 commit 1f813f4
Show file tree
Hide file tree
Showing 4 changed files with 37 additions and 19 deletions.
13 changes: 11 additions & 2 deletions backend/src/clients/modelScan.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import fetch, { Response } from 'node-fetch'
import { Readable } from 'stream'

import config from '../utils/config.js'
import { BadReq, InternalError } from '../utils/error.js'
Expand Down Expand Up @@ -65,13 +66,21 @@ export async function getModelScanInfo() {
return (await res.json()) as ModelScanInfoResponse
}

export async function scanFile(file: Blob, file_name: string) {
export async function scanStream(stream: Readable, fileName: string, fileSize: number) {
const url = `${config.avScanning.modelscan.protocol}://${config.avScanning.modelscan.host}:${config.avScanning.modelscan.port}`
let res: Response

try {
const formData = new FormData()
formData.append('in_file', file, file_name)
formData.append(
'in_file',
{
[Symbol.toStringTag]: 'File',
size: fileSize,
stream: () => stream,
},
fileName,
)

res = await fetch(`${url}/scan/file`, {
method: 'POST',
Expand Down
7 changes: 2 additions & 5 deletions backend/src/connectors/fileScanning/modelScan.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import { Response } from 'node-fetch'
import { Readable } from 'stream'

import { getModelScanInfo, scanFile } from '../../clients/modelScan.js'
import { getModelScanInfo, scanStream } from '../../clients/modelScan.js'
import { getObjectStream } from '../../clients/s3.js'
import { FileInterfaceDoc, ScanState } from '../../models/File.js'
import log from '../../services/log.js'
Expand Down Expand Up @@ -39,9 +38,7 @@ export class ModelScanFileScanningConnector extends BaseFileScanningConnector {

const s3Stream = (await getObjectStream(file.bucket, file.path)).Body as Readable
try {
// TODO: see if it's possible to directly send the Readable stream rather than a blob
const fileBlob = await new Response(s3Stream).blob()
const scanResults = await scanFile(fileBlob, file.name)
const scanResults = await scanStream(s3Stream, file.name, file.size)

const issues = scanResults.summary.total_issues
const isInfected = issues > 0
Expand Down
17 changes: 12 additions & 5 deletions backend/test/clients/__snapshots__/modelScan.spec.ts.snap
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ exports[`clients > modelScan > getModelScanInfo > success 1`] = `
]
`;

exports[`clients > modelScan > scanFile > success 1`] = `
exports[`clients > modelScan > scanStream > success 1`] = `
[
[
"undefined://undefined:undefined/scan/file",
Expand All @@ -23,10 +23,17 @@ exports[`clients > modelScan > scanFile > success 1`] = `
Symbol(state): [
{
"name": "in_file",
"value": File {
Symbol(kHandle): Blob {},
Symbol(kLength): 0,
Symbol(kType): "application/x-hdf5",
"value": FileLike {
Symbol(state): {
"blobLike": {
"size": 0,
"stream": [Function],
Symbol(Symbol.toStringTag): "File",
},
"lastModified": 0,
"name": "safe_model.h5",
"type": undefined,
},
},
},
],
Expand Down
19 changes: 12 additions & 7 deletions backend/test/clients/modelScan.spec.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import { PassThrough } from 'stream'
import { describe, expect, test, vi } from 'vitest'

import { getModelScanInfo, scanFile } from '../../src/clients/modelScan.js'
import { getModelScanInfo, scanStream } from '../../src/clients/modelScan.js'

const configMock = vi.hoisted(() => ({
avScanning: {
Expand Down Expand Up @@ -59,7 +60,7 @@ describe('clients > modelScan', () => {
expect(() => getModelScanInfo()).rejects.toThrowError(/^Unable to communicate with the ModelScan service./)
})

test('scanFile > success', async () => {
test('scanStream > success', async () => {
const expectedResponse = {
summary: {
total_issues: 0,
Expand Down Expand Up @@ -90,28 +91,32 @@ describe('clients > modelScan', () => {
text: vi.fn(),
json: vi.fn(() => expectedResponse),
})
const response = await scanFile(new Blob([''], { type: 'application/x-hdf5' }), 'safe_model.h5')
// force lastModified to be 0
const date = new Date(1970, 0, 1, 0)
vi.setSystemTime(date)

const response = await scanStream(new PassThrough(), 'safe_model.h5', 0)

expect(fetchMock.default).toBeCalled()
expect(fetchMock.default.mock.calls).toMatchSnapshot()
expect(response).toStrictEqual(expectedResponse)
})

test('scanFile > bad response', async () => {
test('scanStream > bad response', async () => {
fetchMock.default.mockResolvedValueOnce({
ok: false,
text: vi.fn(() => 'Unrecognised response'),
json: vi.fn(),
})
expect(() => scanFile(new Blob([''], { type: 'application/x-hdf5' }), 'safe_model.h5')).rejects.toThrowError(
expect(() => scanStream(new PassThrough(), 'safe_model.h5', 0)).rejects.toThrowError(
/^Unrecognised response returned by the ModelScan service./,
)
})

test('scanFile > rejected', async () => {
test('scanStream > rejected', async () => {
fetchMock.default.mockRejectedValueOnce('Unable to communicate with the inferencing service.')

expect(() => scanFile(new Blob([''], { type: 'application/x-hdf5' }), 'safe_model.h5')).rejects.toThrowError(
expect(() => scanStream(new PassThrough(), 'safe_model.h5', 0)).rejects.toThrowError(
/^Unable to communicate with the ModelScan service./,
)
})
Expand Down

0 comments on commit 1f813f4

Please sign in to comment.