From 45dc8e21c50c8240ea851bf678d95f18a8d7c1fa Mon Sep 17 00:00:00 2001 From: davidmurray Date: Thu, 9 Nov 2023 11:02:58 -0500 Subject: [PATCH] uploads.socketRoutes.ts: Use node-stream-zip library to unzip the file This does it using a stream rather than loading all files into RAM at once. Fixes #691 --- packages/transition-backend/package.json | 1 + .../src/api/uploads.socketRoutes.ts | 23 ++++++++----------- yarn.lock | 5 ++++ 3 files changed, 15 insertions(+), 14 deletions(-) diff --git a/packages/transition-backend/package.json b/packages/transition-backend/package.json index f32eb1ac7..25e8d4287 100644 --- a/packages/transition-backend/package.json +++ b/packages/transition-backend/package.json @@ -49,6 +49,7 @@ "lodash": "^4.17.21", "moment": "^2.29.4", "morgan": "^1.10.0", + "node-stream-zip": "^1.15.0", "p-queue": "^6.6.2", "papaparse": "^5.3.1", "pbf": "^3.2.1", diff --git a/packages/transition-backend/src/api/uploads.socketRoutes.ts b/packages/transition-backend/src/api/uploads.socketRoutes.ts index 8d55f5524..40b4cfb63 100644 --- a/packages/transition-backend/src/api/uploads.socketRoutes.ts +++ b/packages/transition-backend/src/api/uploads.socketRoutes.ts @@ -4,8 +4,7 @@ * This file is licensed under the MIT License. * License text available at https://opensource.org/licenses/MIT */ -import fs from 'fs'; -import JSZip from 'jszip'; +import StreamZip from 'node-stream-zip'; import SocketIO from 'socket.io'; import { directoryManager } from 'chaire-lib-backend/lib/utils/filesystem/directoryManager'; @@ -29,19 +28,15 @@ const gtfsImportFunction = async (socket: SocketIO.Socket, absoluteUserDir: stri // TODO: Consider moving to an `extract` method if this is needed anywhere else try { - const zipData = fs.readFileSync(filePath); - const zip = new JSZip(); - const zipFileContent = await zip.loadAsync(zipData); - const filePromises = Object.keys(zipFileContent.files).map(async (filename) => { - const fileInfo = zip.file(filename); - if (fileInfo === null) { - return; - } - const content = await fileInfo.async('nodebuffer'); - const dest = gtfsFilesDirectoryPath + filename; - fs.writeFileSync(dest, content); + const zip = new StreamZip.async({ file: filePath }); + zip.on('extract', (entry, file) => { + console.log(`GTFS import: extracted ${entry.name} to ${file}`); }); - Promise.all(filePromises); + // This extracts in a stream-based manner so the data is never full loaded in RAM. + const count = await zip.extract(null, gtfsFilesDirectoryPath); + console.log(`GTFS import: extracted ${count} entries`); + + await zip.close(); console.log('GTFS zip file upload Complete.'); socket.emit('gtfsImporter.gtfsFileUnzipped'); diff --git a/yarn.lock b/yarn.lock index 5a2ab204e..9c62370d5 100644 --- a/yarn.lock +++ b/yarn.lock @@ -10631,6 +10631,11 @@ node-releases@^2.0.1: resolved "https://registry.yarnpkg.com/node-releases/-/node-releases-2.0.1.tgz#3d1d395f204f1f2f29a54358b9fb678765ad2fc5" integrity sha512-CqyzN6z7Q6aMeF/ktcMVTzhAHCEpf8SOarwpzpf8pNBY2k5/oM34UHldUwp8VKI7uxct2HxSRdJjBaZeESzcxA== +node-stream-zip@^1.15.0: + version "1.15.0" + resolved "https://registry.yarnpkg.com/node-stream-zip/-/node-stream-zip-1.15.0.tgz#158adb88ed8004c6c49a396b50a6a5de3bca33ea" + integrity sha512-LN4fydt9TqhZhThkZIVQnF9cwjU3qmUH9h78Mx/K7d3VvfRqqwthLwJEUOEL0QPZ0XQmNN7be5Ggit5+4dq3Bw== + nodemailer-mock@^1.5.8: version "1.5.8" resolved "https://registry.yarnpkg.com/nodemailer-mock/-/nodemailer-mock-1.5.8.tgz#22e3d54db735f426fc590a0f975ded3a8cbd6b12"