From 26a547f621b5d8f4a28715e478c8494d5bffa768 Mon Sep 17 00:00:00 2001 From: PE39806 <185931318+PE39806@users.noreply.github.com> Date: Thu, 5 Dec 2024 13:33:22 +0000 Subject: [PATCH 1/3] stream file to modelscan API endpoint rather than hold file in memory --- backend/src/clients/modelScan.ts | 13 +++++++++++-- backend/src/connectors/fileScanning/modelScan.ts | 7 ++----- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/backend/src/clients/modelScan.ts b/backend/src/clients/modelScan.ts index 61928bf57..1e04bdbea 100644 --- a/backend/src/clients/modelScan.ts +++ b/backend/src/clients/modelScan.ts @@ -1,4 +1,5 @@ import fetch, { Response } from 'node-fetch' +import { Readable } from 'stream' import config from '../utils/config.js' import { BadReq, InternalError } from '../utils/error.js' @@ -65,13 +66,21 @@ export async function getModelScanInfo() { return (await res.json()) as ModelScanInfoResponse } -export async function scanFile(file: Blob, file_name: string) { +export async function scanStream(stream: Readable, file_name: string, file_size: number) { const url = `${config.avScanning.modelscan.protocol}://${config.avScanning.modelscan.host}:${config.avScanning.modelscan.port}` let res: Response try { const formData = new FormData() - formData.append('in_file', file, file_name) + formData.append( + 'in_file', + { + [Symbol.toStringTag]: 'File', + size: file_size, + stream: () => stream, + }, + file_name, + ) res = await fetch(`${url}/scan/file`, { method: 'POST', diff --git a/backend/src/connectors/fileScanning/modelScan.ts b/backend/src/connectors/fileScanning/modelScan.ts index 25d03f7e0..ea5323491 100644 --- a/backend/src/connectors/fileScanning/modelScan.ts +++ b/backend/src/connectors/fileScanning/modelScan.ts @@ -1,7 +1,6 @@ -import { Response } from 'node-fetch' import { Readable } from 'stream' -import { getModelScanInfo, scanFile } from '../../clients/modelScan.js' +import { getModelScanInfo, scanStream } from '../../clients/modelScan.js' import { getObjectStream } from '../../clients/s3.js' import { FileInterfaceDoc, ScanState } from '../../models/File.js' import log from '../../services/log.js' @@ -39,9 +38,7 @@ export class ModelScanFileScanningConnector extends BaseFileScanningConnector { const s3Stream = (await getObjectStream(file.bucket, file.path)).Body as Readable try { - // TODO: see if it's possible to directly send the Readable stream rather than a blob - const fileBlob = await new Response(s3Stream).blob() - const scanResults = await scanFile(fileBlob, file.name) + const scanResults = await scanStream(s3Stream, file.name, file.size) const issues = scanResults.summary.total_issues const isInfected = issues > 0 From ca6c4196878ddfc0b718b8b9898d4ba68046ab7f Mon Sep 17 00:00:00 2001 From: PE39806 <185931318+PE39806@users.noreply.github.com> Date: Thu, 5 Dec 2024 14:02:49 +0000 Subject: [PATCH 2/3] Fix tests --- .../__snapshots__/modelScan.spec.ts.snap | 17 ++++++++++++----- backend/test/clients/modelScan.spec.ts | 19 ++++++++++++------- 2 files changed, 24 insertions(+), 12 deletions(-) diff --git a/backend/test/clients/__snapshots__/modelScan.spec.ts.snap b/backend/test/clients/__snapshots__/modelScan.spec.ts.snap index 84097df6f..022587dc1 100644 --- a/backend/test/clients/__snapshots__/modelScan.spec.ts.snap +++ b/backend/test/clients/__snapshots__/modelScan.spec.ts.snap @@ -14,7 +14,7 @@ exports[`clients > modelScan > getModelScanInfo > success 1`] = ` ] `; -exports[`clients > modelScan > scanFile > success 1`] = ` +exports[`clients > modelScan > scanStream > success 1`] = ` [ [ "undefined://undefined:undefined/scan/file", @@ -23,10 +23,17 @@ exports[`clients > modelScan > scanFile > success 1`] = ` Symbol(state): [ { "name": "in_file", - "value": File { - Symbol(kHandle): Blob {}, - Symbol(kLength): 0, - Symbol(kType): "application/x-hdf5", + "value": FileLike { + Symbol(state): { + "blobLike": { + "size": 0, + "stream": [Function], + Symbol(Symbol.toStringTag): "File", + }, + "lastModified": 0, + "name": "safe_model.h5", + "type": undefined, + }, }, }, ], diff --git a/backend/test/clients/modelScan.spec.ts b/backend/test/clients/modelScan.spec.ts index 6a6485a6a..394a9f33d 100644 --- a/backend/test/clients/modelScan.spec.ts +++ b/backend/test/clients/modelScan.spec.ts @@ -1,6 +1,7 @@ +import { PassThrough } from 'stream' import { describe, expect, test, vi } from 'vitest' -import { getModelScanInfo, scanFile } from '../../src/clients/modelScan.js' +import { getModelScanInfo, scanStream } from '../../src/clients/modelScan.js' const configMock = vi.hoisted(() => ({ avScanning: { @@ -59,7 +60,7 @@ describe('clients > modelScan', () => { expect(() => getModelScanInfo()).rejects.toThrowError(/^Unable to communicate with the ModelScan service./) }) - test('scanFile > success', async () => { + test('scanStream > success', async () => { const expectedResponse = { summary: { total_issues: 0, @@ -90,28 +91,32 @@ describe('clients > modelScan', () => { text: vi.fn(), json: vi.fn(() => expectedResponse), }) - const response = await scanFile(new Blob([''], { type: 'application/x-hdf5' }), 'safe_model.h5') + // force lastModified to be 0 + const date = new Date(1970, 0, 1, 0) + vi.setSystemTime(date) + + const response = await scanStream(new PassThrough(), 'safe_model.h5', 0) expect(fetchMock.default).toBeCalled() expect(fetchMock.default.mock.calls).toMatchSnapshot() expect(response).toStrictEqual(expectedResponse) }) - test('scanFile > bad response', async () => { + test('scanStream > bad response', async () => { fetchMock.default.mockResolvedValueOnce({ ok: false, text: vi.fn(() => 'Unrecognised response'), json: vi.fn(), }) - expect(() => scanFile(new Blob([''], { type: 'application/x-hdf5' }), 'safe_model.h5')).rejects.toThrowError( + expect(() => scanStream(new PassThrough(), 'safe_model.h5', 0)).rejects.toThrowError( /^Unrecognised response returned by the ModelScan service./, ) }) - test('scanFile > rejected', async () => { + test('scanStream > rejected', async () => { fetchMock.default.mockRejectedValueOnce('Unable to communicate with the inferencing service.') - expect(() => scanFile(new Blob([''], { type: 'application/x-hdf5' }), 'safe_model.h5')).rejects.toThrowError( + expect(() => scanStream(new PassThrough(), 'safe_model.h5', 0)).rejects.toThrowError( /^Unable to communicate with the ModelScan service./, ) }) From 697d5e97fa0d13a73306840fef8403eba90ac4a6 Mon Sep 17 00:00:00 2001 From: PE39806 <185931318+PE39806@users.noreply.github.com> Date: Fri, 6 Dec 2024 11:25:23 +0000 Subject: [PATCH 3/3] change snake_case params to camelCase --- backend/src/clients/modelScan.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/backend/src/clients/modelScan.ts b/backend/src/clients/modelScan.ts index 1e04bdbea..a71d48b8d 100644 --- a/backend/src/clients/modelScan.ts +++ b/backend/src/clients/modelScan.ts @@ -66,7 +66,7 @@ export async function getModelScanInfo() { return (await res.json()) as ModelScanInfoResponse } -export async function scanStream(stream: Readable, file_name: string, file_size: number) { +export async function scanStream(stream: Readable, fileName: string, fileSize: number) { const url = `${config.avScanning.modelscan.protocol}://${config.avScanning.modelscan.host}:${config.avScanning.modelscan.port}` let res: Response @@ -76,10 +76,10 @@ export async function scanStream(stream: Readable, file_name: string, file_size: 'in_file', { [Symbol.toStringTag]: 'File', - size: file_size, + size: fileSize, stream: () => stream, }, - file_name, + fileName, ) res = await fetch(`${url}/scan/file`, {