From d4d400217b0e000b8262febd1ca8dd2f6a2d9783 Mon Sep 17 00:00:00 2001 From: pedr Date: Mon, 22 Jan 2024 11:46:18 -0300 Subject: [PATCH] API: Increase protection of files downloaded via the REST API (#9676) --- .../lib/services/rest/routes/notes.test.ts | 95 ++++++++++++++----- packages/lib/services/rest/routes/notes.ts | 82 ++++++++++------ 2 files changed, 126 insertions(+), 51 deletions(-) diff --git a/packages/lib/services/rest/routes/notes.test.ts b/packages/lib/services/rest/routes/notes.test.ts index 6e49eb11a74..05471976bd5 100644 --- a/packages/lib/services/rest/routes/notes.test.ts +++ b/packages/lib/services/rest/routes/notes.test.ts @@ -1,7 +1,12 @@ import Logger from '@joplin/utils/Logger'; import shim from '../../../shim'; -import uuid from '../../../uuid'; import { downloadMediaFile } from './notes'; +import Setting from '../../../models/Setting'; +import { readFile, readdir, remove, writeFile } from 'fs-extra'; +const md5 = require('md5'); + +const imagePath = `${__dirname}/../../../images/SideMenuHeader.png`; +const jpgBase64Content = '/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAMCAgICAgMCAgIDAwMDBAYEBAQEBAgGBgUGCQgKCgkICQkKDA8MCgsOCwkJDRENDg8QEBEQCgwSExIQEw8QEBD/wAALCAAFAAUBAREA/8QAFAABAAAAAAAAAAAAAAAAAAAACf/EAB8QAAEEAQUBAAAAAAAAAAAAAAQBAgUGAwAREiExM//aAAgBAQAAPwBJarVpGHm7KWbapCSwyZ6FDjkLyYE1W/LHyV2zfOk2TrzX/9k='; describe('routes/notes', () => { @@ -21,44 +26,50 @@ describe('routes/notes', () => { 'https://joplinapp.org/valid/image_url.png', 'http://joplinapp.org/valid/image_url.png', ])('should try to download and return a local path to a valid URL', async (url) => { - const fetchBlobSpy = jest.fn(); - jest.spyOn(shim, 'fetchBlob').mockImplementation(fetchBlobSpy); - jest.spyOn(uuid, 'create').mockReturnValue('mocked_uuid_value'); + const fetchBlobSpy = jest.fn(async (_url, options) => { + await writeFile(options.path, Buffer.from(jpgBase64Content, 'base64')); + }); + const spy = jest.spyOn(shim, 'fetchBlob').mockImplementation(fetchBlobSpy); const response = await downloadMediaFile(url); - expect(response.endsWith('mocked_uuid_value.png')).toBe(true); - expect(fetchBlobSpy).toBeCalledTimes(1); + const files = await readdir(Setting.value('tempDir')); + + expect(files.length).toBe(1); + expect(fetchBlobSpy).toHaveBeenCalledTimes(1); + expect(response).toBe(`${Setting.value('tempDir')}/${files[0]}`); + await remove(response); + spy.mockRestore(); }); test('should get file from local drive if protocol allows it', async () => { - const url = 'file://valid/image.png'; + const url = `file:///${imagePath}`; + const originalFileContent = await readFile(imagePath); - const fsDriverCopySpy = jest.fn(); - jest.spyOn(shim, 'fsDriver').mockImplementation(() => { - return { - copy: fsDriverCopySpy, - } as any; - }); - jest.spyOn(uuid, 'create').mockReturnValue('mocked_uuid_value'); + const response = await downloadMediaFile(url, null, ['file:']); - const response = await downloadMediaFile(url); + const files = await readdir(Setting.value('tempDir')); + expect(files.length).toBe(1); + expect(response).toBe(`${Setting.value('tempDir')}/${files[0]}`); - expect(response.endsWith('mocked_uuid_value.png')).toBe(true); - expect(fsDriverCopySpy).toBeCalledTimes(1); + const responseFileContent = await readFile(response); + expect(md5(responseFileContent)).toBe(md5(originalFileContent)); + await remove(response); }); test('should be able to handle URLs with data', async () => { const url = 'data:image/gif;base64,R0lGODlhEAAQAMQAAORHHOVSKudfOulrSOp3WOyDZu6QdvCchPGolfO0o/XBs/fNwfjZ0frl3/zy7////wAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACH5BAkAABAALAAAAAAQABAAAAVVICSOZGlCQAosJ6mu7fiyZeKqNKToQGDsM8hBADgUXoGAiqhSvp5QAnQKGIgUhwFUYLCVDFCrKUE1lBavAViFIDlTImbKC5Gm2hB0SlBCBMQiB0UjIQA7'; - - const imageFromDataUrlSpy = jest.fn(); - jest.spyOn(shim, 'imageFromDataUrl').mockImplementation(imageFromDataUrlSpy); - jest.spyOn(uuid, 'create').mockReturnValue('mocked_uuid_value'); + const originalFileContent = Buffer.from(url.split('data:image/gif;base64,')[1], 'base64'); const response = await downloadMediaFile(url); - expect(response.endsWith('mocked_uuid_value.gif')).toBe(true); - expect(imageFromDataUrlSpy).toBeCalledTimes(1); + const files = await readdir(Setting.value('tempDir')); + expect(files.length).toBe(1); + expect(response).toBe(`${Setting.value('tempDir')}/${files[0]}`); + + const responseFileContent = await readFile(response); + expect(md5(responseFileContent)).toBe(md5(originalFileContent)); + await remove(response); }); test('should not process URLs with data that is not image type', async () => { @@ -68,6 +79,8 @@ describe('routes/notes', () => { const response = await downloadMediaFile(url); Logger.globalLogger.enabled = true; + const files = await readdir(Setting.value('tempDir')); + expect(files.length).toBe(0); expect(response).toBe(''); }); @@ -76,6 +89,42 @@ describe('routes/notes', () => { const response = await downloadMediaFile(url); + const files = await readdir(Setting.value('tempDir')); + expect(files.length).toBe(0); expect(response).toBe(''); }); + + test('should not copy content from invalid protocols', async () => { + const url = 'file:///home/user/file.db'; + + const allowedProtocols: string[] = []; + const mediaFilePath = await downloadMediaFile(url, null, allowedProtocols); + + const files = await readdir(Setting.value('tempDir')); + expect(files.length).toBe(0); + expect(mediaFilePath).toBe(''); + }); + + test.each([ + 'https://joplinapp.org/valid/image_url', + 'https://joplinapp.org/valid/image_url.invalid_url', + ])('should correct the file extension in filename from files without or invalid ones', async (url) => { + const spy = jest.spyOn(shim, 'fetchBlob').mockImplementation(async (_url, options) => { + await writeFile(options.path, Buffer.from(jpgBase64Content, 'base64')); + return { + headers: { + 'content-type': 'image/jpg', + }, + }; + }); + + const response = await downloadMediaFile(url); + + const files = await readdir(Setting.value('tempDir')); + expect(files.length).toBe(1); + expect(response).toBe(`${Setting.value('tempDir')}/${files[0]}`); + + await remove(response); + spy.mockRestore(); + }); }); diff --git a/packages/lib/services/rest/routes/notes.ts b/packages/lib/services/rest/routes/notes.ts index 35346890509..ca375a0a1d1 100644 --- a/packages/lib/services/rest/routes/notes.ts +++ b/packages/lib/services/rest/routes/notes.ts @@ -189,67 +189,86 @@ async function tryToGuessExtFromMimeType(response: any, mediaPath: string) { return newMediaPath; } -export async function downloadMediaFile(url: string, fetchOptions?: FetchOptions) { - logger.info('Downloading media file', url); - const tempDir = Setting.value('tempDir'); +const getFileExtension = (url: string, isDataUrl: boolean) => { + let fileExt = isDataUrl ? mimeUtils.toFileExtension(mimeUtils.fromDataUrl(url)) : safeFileExtension(fileExtension(url).toLowerCase()); + if (!mimeUtils.fromFileExtension(fileExt)) fileExt = ''; // If the file extension is unknown - clear it. + if (fileExt) fileExt = `.${fileExt}`; - // The URL we get to download have been extracted from the Markdown document - url = markdownUtils.unescapeLinkUrl(url); + return fileExt; +}; - const isDataUrl = url && url.toLowerCase().indexOf('data:') === 0; +const generateMediaPath = (url: string, isDataUrl: boolean, fileExt: string) => { + const tempDir = Setting.value('tempDir'); + const name = isDataUrl ? md5(`${Math.random()}_${Date.now()}`) : filename(url); + // Append a UUID because simply checking if the file exists is not enough since + // multiple resources can be downloaded at the same time (race condition). + const mediaPath = `${tempDir}/${safeFilename(name)}_${uuid.create()}${fileExt}`; + return mediaPath; +}; + +const isValidUrl = (url: string, isDataUrl: boolean, urlProtocol?: string, allowedProtocols?: string[]) => { + if (!urlProtocol) return false; // PDFs and other heavy resoucres are often served as seperate files insted of data urls, its very unlikely to encounter a pdf as a data url if (isDataUrl && !url.toLowerCase().startsWith('data:image/')) { logger.warn(`Resources in data URL format is only supported for images ${url}`); - return ''; + return false; } - const invalidProtocols = ['cid:']; + const defaultAllowedProtocols = ['http:', 'https:', 'data:']; + const allowed = allowedProtocols ?? defaultAllowedProtocols; + const isAllowedProtocol = allowed.includes(urlProtocol); + + return isAllowedProtocol; +}; + +export async function downloadMediaFile(url: string, fetchOptions?: FetchOptions, allowedProtocols?: string[]) { + logger.info('Downloading media file', url); + + // The URL we get to download have been extracted from the Markdown document + url = markdownUtils.unescapeLinkUrl(url); + + const isDataUrl = url && url.toLowerCase().indexOf('data:') === 0; const urlProtocol = urlUtils.urlProtocol(url)?.toLowerCase(); - if (!urlProtocol || invalidProtocols.includes(urlProtocol)) { + if (!isValidUrl(url, isDataUrl, urlProtocol, allowedProtocols)) { return ''; } - const name = isDataUrl ? md5(`${Math.random()}_${Date.now()}`) : filename(url); - let fileExt = isDataUrl ? mimeUtils.toFileExtension(mimeUtils.fromDataUrl(url)) : safeFileExtension(fileExtension(url).toLowerCase()); - if (!mimeUtils.fromFileExtension(fileExt)) fileExt = ''; // If the file extension is unknown - clear it. - if (fileExt) fileExt = `.${fileExt}`; - - // Append a UUID because simply checking if the file exists is not enough since - // multiple resources can be downloaded at the same time (race condition). - let mediaPath = `${tempDir}/${safeFilename(name)}_${uuid.create()}${fileExt}`; + const fileExt = getFileExtension(url, isDataUrl); + const mediaPath = generateMediaPath(url, isDataUrl, fileExt); + let newMediaPath = undefined; try { if (isDataUrl) { await shim.imageFromDataUrl(url, mediaPath); } else if (urlProtocol === 'file:') { - // Can't think of any reason to disallow this at this point - // if (!allowFileProtocolImages) throw new Error('For security reasons, this URL with file:// protocol cannot be downloaded'); const localPath = fileUriToPath(url); await shim.fsDriver().copy(localPath, mediaPath); } else { const response = await shim.fetchBlob(url, { path: mediaPath, maxRetry: 1, ...fetchOptions }); - // If we could not find the file extension from the URL, try to get it - // now based on the Content-Type header. - if (!fileExt) mediaPath = await tryToGuessExtFromMimeType(response, mediaPath); + if (!fileExt) { + // If we could not find the file extension from the URL, try to get it + // now based on the Content-Type header. + newMediaPath = await tryToGuessExtFromMimeType(response, mediaPath); + } } - return mediaPath; + return newMediaPath ?? mediaPath; } catch (error) { logger.warn(`Cannot download image at ${url}`, error); return ''; } } -async function downloadMediaFiles(urls: string[], fetchOptions?: FetchOptions) { +async function downloadMediaFiles(urls: string[], fetchOptions?: FetchOptions, allowedProtocols?: string[]) { const PromisePool = require('es6-promise-pool'); const output: any = {}; const downloadOne = async (url: string) => { - const mediaPath = await downloadMediaFile(url, fetchOptions); // , allowFileProtocolImages); + const mediaPath = await downloadMediaFile(url, fetchOptions, allowedProtocols); if (mediaPath) output[url] = { path: mediaPath, originalUrl: url }; }; @@ -374,14 +393,20 @@ async function attachImageFromDataUrl(note: any, imageDataUrl: string, cropRect: return await shim.attachFileToNote(note, tempFilePath); } -export const extractNoteFromHTML = async (requestNote: RequestNote, requestId: number, imageSizes: any, fetchOptions?: FetchOptions) => { +export const extractNoteFromHTML = async ( + requestNote: RequestNote, + requestId: number, + imageSizes: any, + fetchOptions?: FetchOptions, + allowedProtocols?: string[], +) => { const note = await requestNoteToNote(requestNote); const mediaUrls = extractMediaUrls(note.markup_language, note.body); logger.info(`Request (${requestId}): Downloading media files: ${mediaUrls.length}`); - const mediaFiles = await downloadMediaFiles(mediaUrls, fetchOptions); // , allowFileProtocolImages); + const mediaFiles = await downloadMediaFiles(mediaUrls, fetchOptions, allowedProtocols); logger.info(`Request (${requestId}): Creating resources from paths: ${Object.getOwnPropertyNames(mediaFiles).length}`); @@ -433,7 +458,8 @@ export default async function(request: Request, id: string = null, link: string logger.info('Images:', imageSizes); - const extracted = await extractNoteFromHTML(requestNote, requestId, imageSizes); + const allowedProtocolsForDownloadMediaFiles = ['http:', 'https:', 'file:', 'data:']; + const extracted = await extractNoteFromHTML(requestNote, requestId, imageSizes, undefined, allowedProtocolsForDownloadMediaFiles); let note = await Note.save(extracted.note, extracted.saveOptions);