From efa42226d737142cc29d66ddc3fd800559acce41 Mon Sep 17 00:00:00 2001 From: gabo Date: Wed, 1 Sep 2021 15:35:12 +0200 Subject: [PATCH 01/62] WIP --- .../tasksmanager/specs/taskManager.spec.ts | 24 ++++++++++++++ app/api/tasksmanager/taskManager.ts | 14 ++++++++ package.json | 2 ++ yarn.lock | 33 +++++++++++++++++++ 4 files changed, 73 insertions(+) create mode 100644 app/api/tasksmanager/specs/taskManager.spec.ts create mode 100644 app/api/tasksmanager/taskManager.ts diff --git a/app/api/tasksmanager/specs/taskManager.spec.ts b/app/api/tasksmanager/specs/taskManager.spec.ts new file mode 100644 index 0000000000..07771fd84b --- /dev/null +++ b/app/api/tasksmanager/specs/taskManager.spec.ts @@ -0,0 +1,24 @@ +import { TaskManager } from 'api/tasksmanager/taskManager'; +import RedisSMQ from "rsmq"; + +describe('taskManager', () => { + describe('addTask', () => { + const redis = require('redis-mock'); + const client = redis.createClient(); + const queue = new RedisSMQ({ client: client, ns: 'rsmq' }); + + // jest.mock('redis', () => jest.requireActual('redis-mock')); + + const rsmq = jest.mock('rsmq'); + // const queue = rsmq.RedisSMQ(); + // const queue = { + // sendMessage: jest.fn(), + // }; + + const taskManager = new TaskManager(client); + taskManager.addTask({ task: 1 }); + it('should add a task', async () => { + expect().toHaveBeenCalled(); + }); + }); +}); diff --git a/app/api/tasksmanager/taskManager.ts b/app/api/tasksmanager/taskManager.ts new file mode 100644 index 0000000000..8e0012f5c8 --- /dev/null +++ b/app/api/tasksmanager/taskManager.ts @@ -0,0 +1,14 @@ +import RedisSMQ from 'rsmq'; + +class TaskManager { + private queue; + constructor(queue) { + this.redisClient = redisClient; + } + + addTask(task: string) { + this.queue.sendMessage({ qname: this.queueName, message: task }, () => {}); + } +} + +export { TaskManager }; diff --git a/package.json b/package.json index 4143dcb9d3..21e28750e1 100644 --- a/package.json +++ b/package.json @@ -161,11 +161,13 @@ "react-widgets": "v4.5.0", "recharts": "1.3.6", "redis": "^3.0.2", + "redis-mock": "^0.56.3", "redux": "^3.7.2", "redux-devtools-extension": "^2.13.2", "redux-thunk": "^2.0.1", "reselect": "^4.0.0", "rison-node": "^2.1.1", + "rsmq": "^0.12.4", "rtlcss": "^2.6.0", "sanitize-filename": "^1.6.3", "serialize-javascript": "^5.0.1", diff --git a/yarn.lock b/yarn.lock index 5ea1c89b47..b6b91e3ec4 100644 --- a/yarn.lock +++ b/yarn.lock @@ -5051,6 +5051,11 @@ denque@^1.4.1: resolved "https://registry.yarnpkg.com/denque/-/denque-1.4.1.tgz#6744ff7641c148c3f8a69c307e51235c1f4a37cf" integrity sha512-OfzPuSZKGcgr96rf1oODnfjqBFmr1DVoc/TrItj3Ohe0Ah1C5WX5Baquw/9U9KovnQ88EqmJbD66rKYUQYN1tQ== +denque@^1.5.0: + version "1.5.1" + resolved "https://registry.yarnpkg.com/denque/-/denque-1.5.1.tgz#07f670e29c9a78f8faecb2566a1e2c11929c5cbf" + integrity sha512-XwE+iZ4D6ZUB7mfYRMb5wByE8L74HCn30FBN7sWnXksWc1LO1bPDl67pBR9o/kC4z/xSNAwkMYcGgqDV3BE3Hw== + depd@2.0.0, depd@~2.0.0: version "2.0.0" resolved "https://registry.yarnpkg.com/depd/-/depd-2.0.0.tgz#b696163cc757560d09cf22cc8fad1571b79e76df" @@ -12592,11 +12597,21 @@ redis-commands@^1.5.0: resolved "https://registry.yarnpkg.com/redis-commands/-/redis-commands-1.5.0.tgz#80d2e20698fe688f227127ff9e5164a7dd17e785" integrity sha512-6KxamqpZ468MeQC3bkWmCB1fp56XL64D4Kf0zJSwDZbVLLm7KFkoIcHrgRvQ+sk8dnhySs7+yBg94yIkAK7aJg== +redis-commands@^1.7.0: + version "1.7.0" + resolved "https://registry.yarnpkg.com/redis-commands/-/redis-commands-1.7.0.tgz#15a6fea2d58281e27b1cd1acfb4b293e278c3a89" + integrity sha512-nJWqw3bTFy21hX/CPKHth6sfhZbdiHP6bTawSgQBlKOVRG7EZkfHbbHwQJnrE4vsQf0CMNE+3gJ4Fmm16vdVlQ== + redis-errors@^1.0.0, redis-errors@^1.2.0: version "1.2.0" resolved "https://registry.yarnpkg.com/redis-errors/-/redis-errors-1.2.0.tgz#eb62d2adb15e4eaf4610c04afe1529384250abad" integrity sha1-62LSrbFeTq9GEMBK/hUpOEJQq60= +redis-mock@^0.56.3: + version "0.56.3" + resolved "https://registry.yarnpkg.com/redis-mock/-/redis-mock-0.56.3.tgz#e96471bcc774ddc514c2fc49cdd03cab2baecd89" + integrity sha512-ynaJhqk0Qf3Qajnwvy4aOjS4Mdf9IBkELWtjd+NYhpiqu4QCNq6Vf3Q7c++XRPGiKiwRj9HWr0crcwy7EiPjYQ== + redis-parser@^3.0.0: version "3.0.0" resolved "https://registry.yarnpkg.com/redis-parser/-/redis-parser-3.0.0.tgz#b66d828cdcafe6b4b8a428a7def4c6bcac31c8b4" @@ -12614,6 +12629,16 @@ redis@^3.0.0, redis@^3.0.2: redis-errors "^1.2.0" redis-parser "^3.0.0" +redis@^3.1.2: + version "3.1.2" + resolved "https://registry.yarnpkg.com/redis/-/redis-3.1.2.tgz#766851117e80653d23e0ed536254677ab647638c" + integrity sha512-grn5KoZLr/qrRQVwoSkmzdbw6pwF+/rwODtrOr6vuBRiR/f3rjSTGupbF90Zpqm2oenix8Do6RV7pYEkGwlKkw== + dependencies: + denque "^1.5.0" + redis-commands "^1.7.0" + redis-errors "^1.2.0" + redis-parser "^3.0.0" + reduce-css-calc@~1.3.0: version "1.3.0" resolved "https://registry.yarnpkg.com/reduce-css-calc/-/reduce-css-calc-1.3.0.tgz#747c914e049614a4c9cfbba629871ad1d2927716" @@ -13071,6 +13096,14 @@ router-ips@^1.0.0: resolved "https://registry.yarnpkg.com/router-ips/-/router-ips-1.0.0.tgz#44e00858ebebc0133d58e40b2cd8a1fbb04203f5" integrity sha1-ROAIWOvrwBM9WOQLLNih+7BCA/U= +rsmq@^0.12.4: + version "0.12.4" + resolved "https://registry.yarnpkg.com/rsmq/-/rsmq-0.12.4.tgz#de42490956666aa7ddc245f4a5dff9deea91615e" + integrity sha512-xXs0MudraTG0ndRUo8QIG6/G0/xcPFag/kxbfZZ9Xuz7hTeaQLJKaZH5y0B4fcpLkWBXjC8A9ynIHula65bGBQ== + dependencies: + lodash "^4.17.21" + redis "^3.1.2" + rst-selector-parser@^2.2.3: version "2.2.3" resolved "https://registry.yarnpkg.com/rst-selector-parser/-/rst-selector-parser-2.2.3.tgz#81b230ea2fcc6066c89e3472de794285d9b03d91" From 15640517089094f373449aa7c9e28e2f61d6b483 Mon Sep 17 00:00:00 2001 From: Alberto Casado Torres Date: Wed, 1 Sep 2021 17:50:05 +0200 Subject: [PATCH 02/62] intial test for the taskManager --- .../tasksmanager/specs/taskManager.spec.ts | 29 ++++++++++--------- app/api/tasksmanager/taskManager.ts | 21 ++++++++++---- 2 files changed, 31 insertions(+), 19 deletions(-) diff --git a/app/api/tasksmanager/specs/taskManager.spec.ts b/app/api/tasksmanager/specs/taskManager.spec.ts index 07771fd84b..6d91ad3239 100644 --- a/app/api/tasksmanager/specs/taskManager.spec.ts +++ b/app/api/tasksmanager/specs/taskManager.spec.ts @@ -1,24 +1,25 @@ import { TaskManager } from 'api/tasksmanager/taskManager'; -import RedisSMQ from "rsmq"; +import RedisSMQ from 'rsmq'; describe('taskManager', () => { - describe('addTask', () => { - const redis = require('redis-mock'); - const client = redis.createClient(); - const queue = new RedisSMQ({ client: client, ns: 'rsmq' }); + let taskManager: TaskManager; - // jest.mock('redis', () => jest.requireActual('redis-mock')); + let rsmq: Partial; + let queueName: string; - const rsmq = jest.mock('rsmq'); - // const queue = rsmq.RedisSMQ(); - // const queue = { - // sendMessage: jest.fn(), - // }; + beforeEach(() => { + queueName = 'testQueue'; + rsmq = { + createQueue: jest.fn(), + sendMessage: jest.fn(), + }; + taskManager = new TaskManager(rsmq as RedisSMQ, queueName); + }); - const taskManager = new TaskManager(client); - taskManager.addTask({ task: 1 }); + describe('addTask', () => { it('should add a task', async () => { - expect().toHaveBeenCalled(); + taskManager.addTask('hello'); + expect(rsmq.sendMessage).toHaveBeenCalledWith({ qname: queueName, message: 'hello' }); }); }); }); diff --git a/app/api/tasksmanager/taskManager.ts b/app/api/tasksmanager/taskManager.ts index 8e0012f5c8..5256704826 100644 --- a/app/api/tasksmanager/taskManager.ts +++ b/app/api/tasksmanager/taskManager.ts @@ -1,13 +1,24 @@ import RedisSMQ from 'rsmq'; class TaskManager { - private queue; - constructor(queue) { - this.redisClient = redisClient; + private rsmq: RedisSMQ; + + private queueName: string; + + constructor(rsmq: RedisSMQ, queueName: string) { + this.rsmq = rsmq; + this.queueName = queueName; + + rsmq.createQueue({ qname: queueName }, err => { + if (err && err.name !== 'queueExists') { + // if the error is `queueExists` we can keep going as it tells us that the queue is already there + throw err; + } + }); } - addTask(task: string) { - this.queue.sendMessage({ qname: this.queueName, message: task }, () => {}); + addTask(message: string) { + this.rsmq.sendMessage({ qname: this.queueName, message }); } } From a7a52c98d4810893726174f53b9131a970890b39 Mon Sep 17 00:00:00 2001 From: Alberto Casado Torres Date: Thu, 2 Sep 2021 14:14:17 +0200 Subject: [PATCH 03/62] RedisServer --- .gitignore | 2 + app/api/tasksmanager/RedisServer.ts | 48 +++++++++++ .../tasksmanager/specs/taskManager.spec.ts | 29 +++++-- app/api/tasksmanager/taskManager.ts | 10 +-- package.json | 2 + yarn.lock | 85 +++++++++++++++---- 6 files changed, 144 insertions(+), 32 deletions(-) create mode 100644 app/api/tasksmanager/RedisServer.ts diff --git a/.gitignore b/.gitignore index 8522abf189..8c85df2ac2 100644 --- a/.gitignore +++ b/.gitignore @@ -37,3 +37,5 @@ custom_uploads/* test app/api/files/specs/file1 app/api/files/specs/file2 +redis +dump.rdb diff --git a/app/api/tasksmanager/RedisServer.ts b/app/api/tasksmanager/RedisServer.ts new file mode 100644 index 0000000000..b2e4f594e0 --- /dev/null +++ b/app/api/tasksmanager/RedisServer.ts @@ -0,0 +1,48 @@ +import { execSync } from 'child_process'; +import fs from 'fs'; +import Server from 'redis-server'; + +export class RedisServer { + server: Server; + + pathToBin: string; + + constructor() { + this.pathToBin = 'redis/redis-stable/src/redis-server'; + this.downloadRedis(); + } + + downloadRedis() { + if (fs.existsSync(this.pathToBin)) { + return; + } + + execSync( + `mkdir redis && cd redis + curl -O http://download.redis.io/redis-stable.tar.gz + tar xzvf redis-stable.tar.gz`, + { stdio: 'inherit' } + ); + + execSync('cd redis && tar xzvf redis-stable.tar.gz'); + + execSync( + `cd redis + cd redis-stable + make + make install` + ); + } + + async start() { + this.server = new Server({ + port: 6379, + bin: this.pathToBin, + }); + await this.server.open(); + } + + async stop() { + await this.server.close(); + } +} diff --git a/app/api/tasksmanager/specs/taskManager.spec.ts b/app/api/tasksmanager/specs/taskManager.spec.ts index 6d91ad3239..4a053566e0 100644 --- a/app/api/tasksmanager/specs/taskManager.spec.ts +++ b/app/api/tasksmanager/specs/taskManager.spec.ts @@ -1,25 +1,36 @@ import { TaskManager } from 'api/tasksmanager/taskManager'; import RedisSMQ from 'rsmq'; +import { RedisServer } from '../RedisServer'; +import Redis from 'ioredis'; describe('taskManager', () => { let taskManager: TaskManager; - let rsmq: Partial; + let rsmq: RedisSMQ; let queueName: string; + let redisServer: RedisServer; + let redis: Redis; - beforeEach(() => { + beforeAll(async () => {}); + + afterAll(async () => {}); + + beforeEach(async () => { queueName = 'testQueue'; - rsmq = { - createQueue: jest.fn(), - sendMessage: jest.fn(), - }; - taskManager = new TaskManager(rsmq as RedisSMQ, queueName); }); describe('addTask', () => { it('should add a task', async () => { - taskManager.addTask('hello'); - expect(rsmq.sendMessage).toHaveBeenCalledWith({ qname: queueName, message: 'hello' }); + redisServer = new RedisServer(); + await redisServer.start(); + + redis = new Redis(); + rsmq = await new RedisSMQ({ client: redis }); + // taskManager = new TaskManager(rsmq, queueName); + // taskManager.addTask('hello'); + + // expect(rsmq.sendMessage).toHaveBeenCalledWith({ qname: queueName, message: 'hello' }); + await redisServer.stop(); }); }); }); diff --git a/app/api/tasksmanager/taskManager.ts b/app/api/tasksmanager/taskManager.ts index 5256704826..a0d160a460 100644 --- a/app/api/tasksmanager/taskManager.ts +++ b/app/api/tasksmanager/taskManager.ts @@ -6,20 +6,20 @@ class TaskManager { private queueName: string; constructor(rsmq: RedisSMQ, queueName: string) { + console.log(rsmq); this.rsmq = rsmq; this.queueName = queueName; + } - rsmq.createQueue({ qname: queueName }, err => { + addTask(message: string) { + this.rsmq.createQueue({ qname: this.queueName }, err => { if (err && err.name !== 'queueExists') { // if the error is `queueExists` we can keep going as it tells us that the queue is already there throw err; } + this.rsmq.sendMessage({ qname: this.queueName, message }, () => {}); }); } - - addTask(message: string) { - this.rsmq.sendMessage({ qname: this.queueName, message }); - } } export { TaskManager }; diff --git a/package.json b/package.json index 21e28750e1..f2bfa1d46e 100644 --- a/package.json +++ b/package.json @@ -111,6 +111,7 @@ "html-to-react": "^1.4.3", "htmlparser2": "^3.9.2", "immutable": "^3.7.6", + "ioredis": "^4.27.9", "is-reachable": "5.0.0", "isomorphic-fetch": "2.2.1", "joi": "^13.6.0", @@ -162,6 +163,7 @@ "recharts": "1.3.6", "redis": "^3.0.2", "redis-mock": "^0.56.3", + "redis-server": "^1.2.2", "redux": "^3.7.2", "redux-devtools-extension": "^2.13.2", "redux-thunk": "^2.0.1", diff --git a/yarn.lock b/yarn.lock index b6b91e3ec4..642b8ec87a 100644 --- a/yarn.lock +++ b/yarn.lock @@ -4053,6 +4053,11 @@ clone@^1.0.2: version "1.0.4" resolved "https://registry.yarnpkg.com/clone/-/clone-1.0.4.tgz#da309cc263df15994c688ca902179ca3c7cd7c7e" +cluster-key-slot@^1.1.0: + version "1.1.0" + resolved "https://registry.yarnpkg.com/cluster-key-slot/-/cluster-key-slot-1.1.0.tgz#30474b2a981fb12172695833052bc0d01336d10d" + integrity sha512-2Nii8p3RwAPiFwsnZvukotvow2rIHM+yQ6ZcBXGHdniadkYGZYiGmkHJIbZPIV9nfv7m/U1IPMVVcAhoWFeklw== + co@^4.6.0: version "4.6.0" resolved "https://registry.yarnpkg.com/co/-/co-4.6.0.tgz#6ea6bdf3d853ae54ccb8e47bfa0bf3f9031fb184" @@ -4878,7 +4883,7 @@ debug@^3.0.1, debug@^3.2.6: dependencies: ms "^2.1.1" -debug@^4.2.0, debug@^4.3.2: +debug@^4.3.1: version "4.3.2" resolved "https://registry.yarnpkg.com/debug/-/debug-4.3.2.tgz#f0a49c18ac8779e31d4a0c6029dfb76873c7428b" integrity sha512-mOp8wKcvj7XxC78zLgw/ZA+6TSgkoE2C/ienthhRD298T7UNwAg9diBpLRxC0mOezLl4B0xV7M0cCO6P/O0Xhw== @@ -5046,16 +5051,16 @@ delegates@^1.0.0: version "1.0.0" resolved "https://registry.yarnpkg.com/delegates/-/delegates-1.0.0.tgz#84c6e159b81904fdca59a0ef44cd870d31250f9a" +denque@^1.1.0, denque@^1.5.0: + version "1.5.1" + resolved "https://registry.yarnpkg.com/denque/-/denque-1.5.1.tgz#07f670e29c9a78f8faecb2566a1e2c11929c5cbf" + integrity sha512-XwE+iZ4D6ZUB7mfYRMb5wByE8L74HCn30FBN7sWnXksWc1LO1bPDl67pBR9o/kC4z/xSNAwkMYcGgqDV3BE3Hw== + denque@^1.4.1: version "1.4.1" resolved "https://registry.yarnpkg.com/denque/-/denque-1.4.1.tgz#6744ff7641c148c3f8a69c307e51235c1f4a37cf" integrity sha512-OfzPuSZKGcgr96rf1oODnfjqBFmr1DVoc/TrItj3Ohe0Ah1C5WX5Baquw/9U9KovnQ88EqmJbD66rKYUQYN1tQ== -denque@^1.5.0: - version "1.5.1" - resolved "https://registry.yarnpkg.com/denque/-/denque-1.5.1.tgz#07f670e29c9a78f8faecb2566a1e2c11929c5cbf" - integrity sha512-XwE+iZ4D6ZUB7mfYRMb5wByE8L74HCn30FBN7sWnXksWc1LO1bPDl67pBR9o/kC4z/xSNAwkMYcGgqDV3BE3Hw== - depd@2.0.0, depd@~2.0.0: version "2.0.0" resolved "https://registry.yarnpkg.com/depd/-/depd-2.0.0.tgz#b696163cc757560d09cf22cc8fad1571b79e76df" @@ -7755,6 +7760,23 @@ invert-kv@^2.0.0: version "2.0.0" resolved "https://registry.yarnpkg.com/invert-kv/-/invert-kv-2.0.0.tgz#7393f5afa59ec9ff5f67a27620d11c226e3eec02" +ioredis@^4.27.9: + version "4.27.9" + resolved "https://registry.yarnpkg.com/ioredis/-/ioredis-4.27.9.tgz#c27bbade9724f0b8f84c279fb1d567be785ba33d" + integrity sha512-hAwrx9F+OQ0uIvaJefuS3UTqW+ByOLyLIV+j0EH8ClNVxvFyH9Vmb08hCL4yje6mDYT5zMquShhypkd50RRzkg== + dependencies: + cluster-key-slot "^1.1.0" + debug "^4.3.1" + denque "^1.1.0" + lodash.defaults "^4.2.0" + lodash.flatten "^4.4.0" + lodash.isarguments "^3.1.0" + p-map "^2.1.0" + redis-commands "1.7.0" + redis-errors "^1.2.0" + redis-parser "^3.0.0" + standard-as-callback "^2.1.0" + ipaddr.js@1.9.1: version "1.9.1" resolved "https://registry.yarnpkg.com/ipaddr.js/-/ipaddr.js-1.9.1.tgz#bff38543eeb8984825079ff3a2a8e6cbd46781b3" @@ -9241,6 +9263,11 @@ lodash.debounce@^4.0.8: version "4.0.8" resolved "https://registry.yarnpkg.com/lodash.debounce/-/lodash.debounce-4.0.8.tgz#82d79bff30a67c4005ffd5e2515300ad9ca4d7af" +lodash.defaults@^4.2.0: + version "4.2.0" + resolved "https://registry.yarnpkg.com/lodash.defaults/-/lodash.defaults-4.2.0.tgz#d09178716ffea4dde9e5fb7b37f6f0802274580c" + integrity sha1-0JF4cW/+pN3p5ft7N/bwgCJ0WAw= + lodash.escape@^4.0.1: version "4.0.1" resolved "https://registry.yarnpkg.com/lodash.escape/-/lodash.escape-4.0.1.tgz#c9044690c21e04294beaa517712fded1fa88de98" @@ -9251,6 +9278,11 @@ lodash.escaperegexp@^4.1.2: resolved "https://registry.yarnpkg.com/lodash.escaperegexp/-/lodash.escaperegexp-4.1.2.tgz#64762c48618082518ac3df4ccf5d5886dae20347" integrity sha1-ZHYsSGGAglGKw99Mz11YhtriA0c= +lodash.flatten@^4.4.0: + version "4.4.0" + resolved "https://registry.yarnpkg.com/lodash.flatten/-/lodash.flatten-4.4.0.tgz#f31c22225a9632d2bbf8e4addbef240aa765a61f" + integrity sha1-8xwiIlqWMtK7+OSt2+8kCqdlph8= + lodash.flattendeep@^4.4.0: version "4.4.0" resolved "https://registry.yarnpkg.com/lodash.flattendeep/-/lodash.flattendeep-4.4.0.tgz#fb030917f86a3134e5bc9bec0d69e0013ddfedb2" @@ -9259,6 +9291,11 @@ lodash.get@^4.4.2, lodash.get@~4.4.2: version "4.4.2" resolved "https://registry.yarnpkg.com/lodash.get/-/lodash.get-4.4.2.tgz#2d177f652fa31e939b4438d5341499dfa3825e99" +lodash.isarguments@^3.1.0: + version "3.1.0" + resolved "https://registry.yarnpkg.com/lodash.isarguments/-/lodash.isarguments-3.1.0.tgz#2f573d85c6a24289ff00663b491c1d338ff3458a" + integrity sha1-L1c9hcaiQon/AGY7SRwdM4/zRYo= + lodash.isboolean@^3.0.3: version "3.0.3" resolved "https://registry.yarnpkg.com/lodash.isboolean/-/lodash.isboolean-3.0.3.tgz#6c2e171db2a257cd96802fd43b01b20d5f5870f6" @@ -10886,7 +10923,7 @@ p-locate@^4.1.0: dependencies: p-limit "^2.2.0" -p-map@^2.0.0: +p-map@^2.0.0, p-map@^2.1.0: version "2.1.0" resolved "https://registry.yarnpkg.com/p-map/-/p-map-2.1.0.tgz#310928feef9c9ecc65b68b17693018a665cea175" integrity sha512-y3b8Kpd8OAN444hxfBbFfj1FY/RjtTd8tzYwhUqNYXx0fXx2iX4maP4Qr6qhIKbQXI02wTLAda4fYUbDagTUFw== @@ -11736,6 +11773,11 @@ promise-polyfill@^6.0.1: version "6.1.0" resolved "https://registry.yarnpkg.com/promise-polyfill/-/promise-polyfill-6.1.0.tgz#dfa96943ea9c121fca4de9b5868cb39d3472e057" +promise-queue@^2.2.5: + version "2.2.5" + resolved "https://registry.yarnpkg.com/promise-queue/-/promise-queue-2.2.5.tgz#2f6f5f7c0f6d08109e967659c79b88a9ed5e93b4" + integrity sha1-L29ffA9tCBCelnZZx5uIqe1ek7Q= + promise@^7.1.1: version "7.3.1" resolved "https://registry.yarnpkg.com/promise/-/promise-7.3.1.tgz#064b72602b18f90f29192b8b1bc418ffd1ebd3bf" @@ -12592,26 +12634,21 @@ redent@^1.0.0: indent-string "^2.1.0" strip-indent "^1.0.1" +redis-commands@1.7.0, redis-commands@^1.7.0: + version "1.7.0" + resolved "https://registry.yarnpkg.com/redis-commands/-/redis-commands-1.7.0.tgz#15a6fea2d58281e27b1cd1acfb4b293e278c3a89" + integrity sha512-nJWqw3bTFy21hX/CPKHth6sfhZbdiHP6bTawSgQBlKOVRG7EZkfHbbHwQJnrE4vsQf0CMNE+3gJ4Fmm16vdVlQ== + redis-commands@^1.5.0: version "1.5.0" resolved "https://registry.yarnpkg.com/redis-commands/-/redis-commands-1.5.0.tgz#80d2e20698fe688f227127ff9e5164a7dd17e785" integrity sha512-6KxamqpZ468MeQC3bkWmCB1fp56XL64D4Kf0zJSwDZbVLLm7KFkoIcHrgRvQ+sk8dnhySs7+yBg94yIkAK7aJg== -redis-commands@^1.7.0: - version "1.7.0" - resolved "https://registry.yarnpkg.com/redis-commands/-/redis-commands-1.7.0.tgz#15a6fea2d58281e27b1cd1acfb4b293e278c3a89" - integrity sha512-nJWqw3bTFy21hX/CPKHth6sfhZbdiHP6bTawSgQBlKOVRG7EZkfHbbHwQJnrE4vsQf0CMNE+3gJ4Fmm16vdVlQ== - redis-errors@^1.0.0, redis-errors@^1.2.0: version "1.2.0" resolved "https://registry.yarnpkg.com/redis-errors/-/redis-errors-1.2.0.tgz#eb62d2adb15e4eaf4610c04afe1529384250abad" integrity sha1-62LSrbFeTq9GEMBK/hUpOEJQq60= -redis-mock@^0.56.3: - version "0.56.3" - resolved "https://registry.yarnpkg.com/redis-mock/-/redis-mock-0.56.3.tgz#e96471bcc774ddc514c2fc49cdd03cab2baecd89" - integrity sha512-ynaJhqk0Qf3Qajnwvy4aOjS4Mdf9IBkELWtjd+NYhpiqu4QCNq6Vf3Q7c++XRPGiKiwRj9HWr0crcwy7EiPjYQ== - redis-parser@^3.0.0: version "3.0.0" resolved "https://registry.yarnpkg.com/redis-parser/-/redis-parser-3.0.0.tgz#b66d828cdcafe6b4b8a428a7def4c6bcac31c8b4" @@ -12619,7 +12656,14 @@ redis-parser@^3.0.0: dependencies: redis-errors "^1.0.0" -redis@^3.0.0, redis@^3.0.2: +redis-server@^1.2.2: + version "1.2.2" + resolved "https://registry.yarnpkg.com/redis-server/-/redis-server-1.2.2.tgz#d0415b549ee06256248bf728b407ddf19999818a" + integrity sha512-pOaSIeSMVFkEFIuaMtpQ3TOr3uI4sUmEHm4ofGks5vTPRseHUszxyIlC70IFjUR9qSeH8o/ARZEM8dqcJmgGJw== + dependencies: + promise-queue "^2.2.5" + +redis@^3.0.0: version "3.0.2" resolved "https://registry.yarnpkg.com/redis/-/redis-3.0.2.tgz#bd47067b8a4a3e6a2e556e57f71cc82c7360150a" integrity sha512-PNhLCrjU6vKVuMOyFu7oSP296mwBkcE6lrAjruBYG5LgdSqtRBoVQIylrMyVZD/lkF24RSNNatzvYag6HRBHjQ== @@ -13867,6 +13911,11 @@ stackframe@^0.3.1: version "0.3.1" resolved "https://registry.yarnpkg.com/stackframe/-/stackframe-0.3.1.tgz#33aa84f1177a5548c8935533cbfeb3420975f5a4" +standard-as-callback@^2.1.0: + version "2.1.0" + resolved "https://registry.yarnpkg.com/standard-as-callback/-/standard-as-callback-2.1.0.tgz#8953fc05359868a77b5b9739a665c5977bb7df45" + integrity sha512-qoRRSyROncaz1z0mvYqIE4lCd9p2R90i6GxW3uZv5ucSu8tU7B5HXUP1gG8pVZsYNVaXjk8ClXHPttLyxAL48A== + static-extend@^0.1.1: version "0.1.2" resolved "https://registry.yarnpkg.com/static-extend/-/static-extend-0.1.2.tgz#60809c39cbff55337226fd5e0b520f341f1fb5c6" From d13475a1768f38c4910e57d2cfec16bcc4c5a66e Mon Sep 17 00:00:00 2001 From: Alberto Casado Torres Date: Sat, 4 Sep 2021 09:36:21 +0200 Subject: [PATCH 04/62] TaskManager WIP --- app/api/config.ts | 2 +- .../tasksmanager/specs/taskManager.spec.ts | 20 +++-- app/api/tasksmanager/taskManager.ts | 34 +++++--- package.json | 4 +- yarn.lock | 78 +++++-------------- 5 files changed, 55 insertions(+), 83 deletions(-) diff --git a/app/api/config.ts b/app/api/config.ts index cde646219b..9b81d95005 100644 --- a/app/api/config.ts +++ b/app/api/config.ts @@ -51,6 +51,6 @@ export const config = { redis: { activated: CLUSTER_MODE, host: process.env.REDIS_HOST || 'localhost', - port: process.env.REDIS_PORT || 6379, + port: parseInt(process.env.REDIS_PORT || '', 10) || 6379, }, }; diff --git a/app/api/tasksmanager/specs/taskManager.spec.ts b/app/api/tasksmanager/specs/taskManager.spec.ts index 4a053566e0..436dd48c12 100644 --- a/app/api/tasksmanager/specs/taskManager.spec.ts +++ b/app/api/tasksmanager/specs/taskManager.spec.ts @@ -1,15 +1,14 @@ -import { TaskManager } from 'api/tasksmanager/taskManager'; -import RedisSMQ from 'rsmq'; +import { TaskManagerFactory, TaskManager } from 'api/tasksmanager/TaskManager'; +import { config } from 'api/config'; import { RedisServer } from '../RedisServer'; -import Redis from 'ioredis'; +import Redis from 'redis'; describe('taskManager', () => { let taskManager: TaskManager; - let rsmq: RedisSMQ; let queueName: string; let redisServer: RedisServer; - let redis: Redis; + let redis: Redis.RedisClient; beforeAll(async () => {}); @@ -19,17 +18,16 @@ describe('taskManager', () => { queueName = 'testQueue'; }); - describe('addTask', () => { + describe('startTask', () => { it('should add a task', async () => { redisServer = new RedisServer(); await redisServer.start(); - redis = new Redis(); - rsmq = await new RedisSMQ({ client: redis }); - // taskManager = new TaskManager(rsmq, queueName); - // taskManager.addTask('hello'); + redis = await Redis.createClient({ port: config.redis.port, host: config.redis.host }); + taskManager = await TaskManagerFactory.create(redis, queueName); + await taskManager.startTask({}); - // expect(rsmq.sendMessage).toHaveBeenCalledWith({ qname: queueName, message: 'hello' }); + await redis.end(true); await redisServer.stop(); }); }); diff --git a/app/api/tasksmanager/taskManager.ts b/app/api/tasksmanager/taskManager.ts index a0d160a460..f50d4e1f9a 100644 --- a/app/api/tasksmanager/taskManager.ts +++ b/app/api/tasksmanager/taskManager.ts @@ -1,25 +1,41 @@ import RedisSMQ from 'rsmq'; +import Redis from 'redis'; -class TaskManager { +export interface Task { + tenant: string; + task: string; +} + +export class TaskManager { private rsmq: RedisSMQ; private queueName: string; constructor(rsmq: RedisSMQ, queueName: string) { - console.log(rsmq); this.rsmq = rsmq; this.queueName = queueName; } - addTask(message: string) { - this.rsmq.createQueue({ qname: this.queueName }, err => { - if (err && err.name !== 'queueExists') { - // if the error is `queueExists` we can keep going as it tells us that the queue is already there + async initQueue() { + try { + await this.rsmq.createQueueAsync({ qname: this.queueName }); + } catch (err) { + if (err.name !== 'queueExists') { throw err; } - this.rsmq.sendMessage({ qname: this.queueName, message }, () => {}); - }); + } + } + + async startTask(message: Task) { + await this.rsmq.sendMessageAsync({ qname: this.queueName, message: JSON.stringify(message) }); } } -export { TaskManager }; +export const TaskManagerFactory = { + create: async (redis: Redis.RedisClient, queueName: string) => { + const rsmq = await new RedisSMQ({ client: redis }); + const manager = await new TaskManager(rsmq, queueName); + await manager.initQueue(); + return manager; + }, +}; diff --git a/package.json b/package.json index f2bfa1d46e..67f8f66e84 100644 --- a/package.json +++ b/package.json @@ -80,6 +80,7 @@ "@types/lodash": "^4.14.170", "@types/react-dropzone": "^4.2.2", "@types/react-modal": "^3.12.0", + "@types/redis": "^2.8.31", "ajv": "^6.12.3", "ajv-keywords": "^3.4.1", "async": "2.6.3", @@ -111,7 +112,6 @@ "html-to-react": "^1.4.3", "htmlparser2": "^3.9.2", "immutable": "^3.7.6", - "ioredis": "^4.27.9", "is-reachable": "5.0.0", "isomorphic-fetch": "2.2.1", "joi": "^13.6.0", @@ -161,8 +161,8 @@ "react-text-selection-handler": "0.1.0", "react-widgets": "v4.5.0", "recharts": "1.3.6", - "redis": "^3.0.2", "redis-mock": "^0.56.3", + "redis": "^3.1.2", "redis-server": "^1.2.2", "redux": "^3.7.2", "redux-devtools-extension": "^2.13.2", diff --git a/yarn.lock b/yarn.lock index 642b8ec87a..4bb5c089e5 100644 --- a/yarn.lock +++ b/yarn.lock @@ -2337,6 +2337,13 @@ "@types/d3-shape" "^1" "@types/react" "*" +"@types/redis@^2.8.31": + version "2.8.31" + resolved "https://registry.yarnpkg.com/@types/redis/-/redis-2.8.31.tgz#c11c1b269fec132ac2ec9eb891edf72fc549149e" + integrity sha512-daWrrTDYaa5iSDFbgzZ9gOOzyp2AJmYK59OlG/2KGBgYWF3lfs8GDKm1c//tik5Uc93hDD36O+qLPvzDolChbA== + dependencies: + "@types/node" "*" + "@types/redux-mock-store@^1.0.1": version "1.0.1" resolved "https://registry.yarnpkg.com/@types/redux-mock-store/-/redux-mock-store-1.0.1.tgz#90ca701d640aef7c007f564a9a4f8dc03180b0f7" @@ -4053,11 +4060,6 @@ clone@^1.0.2: version "1.0.4" resolved "https://registry.yarnpkg.com/clone/-/clone-1.0.4.tgz#da309cc263df15994c688ca902179ca3c7cd7c7e" -cluster-key-slot@^1.1.0: - version "1.1.0" - resolved "https://registry.yarnpkg.com/cluster-key-slot/-/cluster-key-slot-1.1.0.tgz#30474b2a981fb12172695833052bc0d01336d10d" - integrity sha512-2Nii8p3RwAPiFwsnZvukotvow2rIHM+yQ6ZcBXGHdniadkYGZYiGmkHJIbZPIV9nfv7m/U1IPMVVcAhoWFeklw== - co@^4.6.0: version "4.6.0" resolved "https://registry.yarnpkg.com/co/-/co-4.6.0.tgz#6ea6bdf3d853ae54ccb8e47bfa0bf3f9031fb184" @@ -4883,13 +4885,6 @@ debug@^3.0.1, debug@^3.2.6: dependencies: ms "^2.1.1" -debug@^4.3.1: - version "4.3.2" - resolved "https://registry.yarnpkg.com/debug/-/debug-4.3.2.tgz#f0a49c18ac8779e31d4a0c6029dfb76873c7428b" - integrity sha512-mOp8wKcvj7XxC78zLgw/ZA+6TSgkoE2C/ienthhRD298T7UNwAg9diBpLRxC0mOezLl4B0xV7M0cCO6P/O0Xhw== - dependencies: - ms "2.1.2" - debug@~4.3.1: version "4.3.1" resolved "https://registry.yarnpkg.com/debug/-/debug-4.3.1.tgz#f0d229c505e0c6d8c49ac553d1b13dc183f6b2ee" @@ -5051,16 +5046,16 @@ delegates@^1.0.0: version "1.0.0" resolved "https://registry.yarnpkg.com/delegates/-/delegates-1.0.0.tgz#84c6e159b81904fdca59a0ef44cd870d31250f9a" -denque@^1.1.0, denque@^1.5.0: - version "1.5.1" - resolved "https://registry.yarnpkg.com/denque/-/denque-1.5.1.tgz#07f670e29c9a78f8faecb2566a1e2c11929c5cbf" - integrity sha512-XwE+iZ4D6ZUB7mfYRMb5wByE8L74HCn30FBN7sWnXksWc1LO1bPDl67pBR9o/kC4z/xSNAwkMYcGgqDV3BE3Hw== - denque@^1.4.1: version "1.4.1" resolved "https://registry.yarnpkg.com/denque/-/denque-1.4.1.tgz#6744ff7641c148c3f8a69c307e51235c1f4a37cf" integrity sha512-OfzPuSZKGcgr96rf1oODnfjqBFmr1DVoc/TrItj3Ohe0Ah1C5WX5Baquw/9U9KovnQ88EqmJbD66rKYUQYN1tQ== +denque@^1.5.0: + version "1.5.1" + resolved "https://registry.yarnpkg.com/denque/-/denque-1.5.1.tgz#07f670e29c9a78f8faecb2566a1e2c11929c5cbf" + integrity sha512-XwE+iZ4D6ZUB7mfYRMb5wByE8L74HCn30FBN7sWnXksWc1LO1bPDl67pBR9o/kC4z/xSNAwkMYcGgqDV3BE3Hw== + depd@2.0.0, depd@~2.0.0: version "2.0.0" resolved "https://registry.yarnpkg.com/depd/-/depd-2.0.0.tgz#b696163cc757560d09cf22cc8fad1571b79e76df" @@ -7760,23 +7755,6 @@ invert-kv@^2.0.0: version "2.0.0" resolved "https://registry.yarnpkg.com/invert-kv/-/invert-kv-2.0.0.tgz#7393f5afa59ec9ff5f67a27620d11c226e3eec02" -ioredis@^4.27.9: - version "4.27.9" - resolved "https://registry.yarnpkg.com/ioredis/-/ioredis-4.27.9.tgz#c27bbade9724f0b8f84c279fb1d567be785ba33d" - integrity sha512-hAwrx9F+OQ0uIvaJefuS3UTqW+ByOLyLIV+j0EH8ClNVxvFyH9Vmb08hCL4yje6mDYT5zMquShhypkd50RRzkg== - dependencies: - cluster-key-slot "^1.1.0" - debug "^4.3.1" - denque "^1.1.0" - lodash.defaults "^4.2.0" - lodash.flatten "^4.4.0" - lodash.isarguments "^3.1.0" - p-map "^2.1.0" - redis-commands "1.7.0" - redis-errors "^1.2.0" - redis-parser "^3.0.0" - standard-as-callback "^2.1.0" - ipaddr.js@1.9.1: version "1.9.1" resolved "https://registry.yarnpkg.com/ipaddr.js/-/ipaddr.js-1.9.1.tgz#bff38543eeb8984825079ff3a2a8e6cbd46781b3" @@ -9263,11 +9241,6 @@ lodash.debounce@^4.0.8: version "4.0.8" resolved "https://registry.yarnpkg.com/lodash.debounce/-/lodash.debounce-4.0.8.tgz#82d79bff30a67c4005ffd5e2515300ad9ca4d7af" -lodash.defaults@^4.2.0: - version "4.2.0" - resolved "https://registry.yarnpkg.com/lodash.defaults/-/lodash.defaults-4.2.0.tgz#d09178716ffea4dde9e5fb7b37f6f0802274580c" - integrity sha1-0JF4cW/+pN3p5ft7N/bwgCJ0WAw= - lodash.escape@^4.0.1: version "4.0.1" resolved "https://registry.yarnpkg.com/lodash.escape/-/lodash.escape-4.0.1.tgz#c9044690c21e04294beaa517712fded1fa88de98" @@ -9278,11 +9251,6 @@ lodash.escaperegexp@^4.1.2: resolved "https://registry.yarnpkg.com/lodash.escaperegexp/-/lodash.escaperegexp-4.1.2.tgz#64762c48618082518ac3df4ccf5d5886dae20347" integrity sha1-ZHYsSGGAglGKw99Mz11YhtriA0c= -lodash.flatten@^4.4.0: - version "4.4.0" - resolved "https://registry.yarnpkg.com/lodash.flatten/-/lodash.flatten-4.4.0.tgz#f31c22225a9632d2bbf8e4addbef240aa765a61f" - integrity sha1-8xwiIlqWMtK7+OSt2+8kCqdlph8= - lodash.flattendeep@^4.4.0: version "4.4.0" resolved "https://registry.yarnpkg.com/lodash.flattendeep/-/lodash.flattendeep-4.4.0.tgz#fb030917f86a3134e5bc9bec0d69e0013ddfedb2" @@ -9291,11 +9259,6 @@ lodash.get@^4.4.2, lodash.get@~4.4.2: version "4.4.2" resolved "https://registry.yarnpkg.com/lodash.get/-/lodash.get-4.4.2.tgz#2d177f652fa31e939b4438d5341499dfa3825e99" -lodash.isarguments@^3.1.0: - version "3.1.0" - resolved "https://registry.yarnpkg.com/lodash.isarguments/-/lodash.isarguments-3.1.0.tgz#2f573d85c6a24289ff00663b491c1d338ff3458a" - integrity sha1-L1c9hcaiQon/AGY7SRwdM4/zRYo= - lodash.isboolean@^3.0.3: version "3.0.3" resolved "https://registry.yarnpkg.com/lodash.isboolean/-/lodash.isboolean-3.0.3.tgz#6c2e171db2a257cd96802fd43b01b20d5f5870f6" @@ -10923,7 +10886,7 @@ p-locate@^4.1.0: dependencies: p-limit "^2.2.0" -p-map@^2.0.0, p-map@^2.1.0: +p-map@^2.0.0: version "2.1.0" resolved "https://registry.yarnpkg.com/p-map/-/p-map-2.1.0.tgz#310928feef9c9ecc65b68b17693018a665cea175" integrity sha512-y3b8Kpd8OAN444hxfBbFfj1FY/RjtTd8tzYwhUqNYXx0fXx2iX4maP4Qr6qhIKbQXI02wTLAda4fYUbDagTUFw== @@ -12634,16 +12597,16 @@ redent@^1.0.0: indent-string "^2.1.0" strip-indent "^1.0.1" -redis-commands@1.7.0, redis-commands@^1.7.0: - version "1.7.0" - resolved "https://registry.yarnpkg.com/redis-commands/-/redis-commands-1.7.0.tgz#15a6fea2d58281e27b1cd1acfb4b293e278c3a89" - integrity sha512-nJWqw3bTFy21hX/CPKHth6sfhZbdiHP6bTawSgQBlKOVRG7EZkfHbbHwQJnrE4vsQf0CMNE+3gJ4Fmm16vdVlQ== - redis-commands@^1.5.0: version "1.5.0" resolved "https://registry.yarnpkg.com/redis-commands/-/redis-commands-1.5.0.tgz#80d2e20698fe688f227127ff9e5164a7dd17e785" integrity sha512-6KxamqpZ468MeQC3bkWmCB1fp56XL64D4Kf0zJSwDZbVLLm7KFkoIcHrgRvQ+sk8dnhySs7+yBg94yIkAK7aJg== +redis-commands@^1.7.0: + version "1.7.0" + resolved "https://registry.yarnpkg.com/redis-commands/-/redis-commands-1.7.0.tgz#15a6fea2d58281e27b1cd1acfb4b293e278c3a89" + integrity sha512-nJWqw3bTFy21hX/CPKHth6sfhZbdiHP6bTawSgQBlKOVRG7EZkfHbbHwQJnrE4vsQf0CMNE+3gJ4Fmm16vdVlQ== + redis-errors@^1.0.0, redis-errors@^1.2.0: version "1.2.0" resolved "https://registry.yarnpkg.com/redis-errors/-/redis-errors-1.2.0.tgz#eb62d2adb15e4eaf4610c04afe1529384250abad" @@ -13911,11 +13874,6 @@ stackframe@^0.3.1: version "0.3.1" resolved "https://registry.yarnpkg.com/stackframe/-/stackframe-0.3.1.tgz#33aa84f1177a5548c8935533cbfeb3420975f5a4" -standard-as-callback@^2.1.0: - version "2.1.0" - resolved "https://registry.yarnpkg.com/standard-as-callback/-/standard-as-callback-2.1.0.tgz#8953fc05359868a77b5b9739a665c5977bb7df45" - integrity sha512-qoRRSyROncaz1z0mvYqIE4lCd9p2R90i6GxW3uZv5ucSu8tU7B5HXUP1gG8pVZsYNVaXjk8ClXHPttLyxAL48A== - static-extend@^0.1.1: version "0.1.2" resolved "https://registry.yarnpkg.com/static-extend/-/static-extend-0.1.2.tgz#60809c39cbff55337226fd5e0b520f341f1fb5c6" From f7af6f4ce11b3e5c9f9689eed447a07fc67b3e67 Mon Sep 17 00:00:00 2001 From: Alberto Casado Torres Date: Mon, 6 Sep 2021 15:50:55 +0200 Subject: [PATCH 05/62] Taskmanager tests with external dummy server WIP --- app/api/tasksmanager/specs/DummyService.ts | 85 +++++++++++++++++++ .../tasksmanager/specs/taskManager.spec.ts | 74 +++++++++++++--- app/api/tasksmanager/taskManager.ts | 16 +++- 3 files changed, 159 insertions(+), 16 deletions(-) create mode 100644 app/api/tasksmanager/specs/DummyService.ts diff --git a/app/api/tasksmanager/specs/DummyService.ts b/app/api/tasksmanager/specs/DummyService.ts new file mode 100644 index 0000000000..3690aa4124 --- /dev/null +++ b/app/api/tasksmanager/specs/DummyService.ts @@ -0,0 +1,85 @@ +import express from 'express'; +import RedisSMQ from 'rsmq'; +import Redis from 'redis'; +import { Server } from 'http'; + +export class DummyService { + private app: express.Application; + + port: number; + + private rsmqClient: RedisSMQ | undefined; + + queueName = 'KonzNGaboHellKitchen'; + + private server: Server | undefined; + + currentTask: string | undefined; + + materials: string[] = []; + + constructor(port: number) { + this.port = port; + this.app = express(); + + this.app.post('/materials', (_req, res) => { + this.materials.push(_req.body.material); + res.send('ok'); + }); + } + + get rsmq() { + if (!this.rsmqClient) { + throw new Error('rsmq is not initialized'); + } + return this.rsmqClient; + } + + async initQueue() { + try { + await this.rsmq.deleteQueueAsync({ qname: this.queueName }); + } catch (err) { + console.log(err); + if (err.name !== 'queueNotFound') { + throw err; + } + } + + try { + await this.rsmq.createQueueAsync({ qname: this.queueName }); + } catch (err) { + if (err.name !== 'queueExists') { + throw err; + } + } + } + + async read() { + try { + const { qname, message } = await this.rsmq.receiveMessageAsync({ + qname: this.queueName, + }); + this.currentTask = message; + return message; + } catch (e) { + console.log(e); + } + } + + async start(client: Redis.RedisClient) { + this.rsmqClient = await new RedisSMQ({ client }); + await this.initQueue(); + + const start = new Promise(resolve => { + this.server = this.app.listen(this.port, () => { + resolve(); + }); + }); + + return start; + } + + async stop() { + await this.server?.close(); + } +} diff --git a/app/api/tasksmanager/specs/taskManager.spec.ts b/app/api/tasksmanager/specs/taskManager.spec.ts index 436dd48c12..383a1dda2d 100644 --- a/app/api/tasksmanager/specs/taskManager.spec.ts +++ b/app/api/tasksmanager/specs/taskManager.spec.ts @@ -1,6 +1,7 @@ import { TaskManagerFactory, TaskManager } from 'api/tasksmanager/TaskManager'; -import { config } from 'api/config'; + import { RedisServer } from '../RedisServer'; +import { DummyService } from './DummyService'; import Redis from 'redis'; describe('taskManager', () => { @@ -8,27 +9,74 @@ describe('taskManager', () => { let queueName: string; let redisServer: RedisServer; - let redis: Redis.RedisClient; + let client: Redis.RedisClient; + let externalDummyService: DummyService; - beforeAll(async () => {}); + beforeAll(async () => { + queueName = 'KonzNGaboHellKitchen'; + redisServer = new RedisServer(); + await redisServer.start(); + client = await Redis.createClient('redis://localhost:6379'); + taskManager = await TaskManagerFactory.create(client, queueName); - afterAll(async () => {}); + externalDummyService = new DummyService(1234); + await externalDummyService.start(client); + }); - beforeEach(async () => { - queueName = 'testQueue'; + afterAll(async () => { + await externalDummyService.stop(); + await client.end(true); + await redisServer.stop(); }); describe('startTask', () => { it('should add a task', async () => { - redisServer = new RedisServer(); - await redisServer.start(); + await taskManager.startTask({ + task: 'CheeseBurger', + tenant: 'Rafa', + }); + + await externalDummyService.read(); + + expect(externalDummyService.currentTask).toBe('{"task":"CheeseBurger","tenant":"Rafa"}'); + }); + + describe('when multiple tasks are added', () => { + it('services get them in order', async () => { + await taskManager.startTask({ + task: 'CheeseBurger', + tenant: 'Joan', + }); - redis = await Redis.createClient({ port: config.redis.port, host: config.redis.host }); - taskManager = await TaskManagerFactory.create(redis, queueName); - await taskManager.startTask({}); + await taskManager.startTask({ + task: 'Fries', + tenant: 'Joan', + }); + + await taskManager.startTask({ + task: 'Ribs', + tenant: 'Fede', + }); + + let message = await externalDummyService.read(); + expect(message).toBe('{"task":"CheeseBurger","tenant":"Joan"}'); + + message = await externalDummyService.read(); + expect(message).toBe('{"task":"Fries","tenant":"Joan"}'); + + message = await externalDummyService.read(); + expect(message).toBe('{"task":"Ribs","tenant":"Fede"}'); + }); + }); - await redis.end(true); - await redisServer.stop(); + describe('sending materials', () => { + it('should send materials to the service', async () => { + // const task = { task: 'doit', tenant: 'test' }; + // const materials = { data: '{"someData": "someValue"}' }; + // await taskManager.startTask(task, materials); + // await externalDummyService.read(); + // expect(externalDummyService.materials[0]).toEqual(materials); + }); }); }); }); diff --git a/app/api/tasksmanager/taskManager.ts b/app/api/tasksmanager/taskManager.ts index f50d4e1f9a..4b6eaf27d1 100644 --- a/app/api/tasksmanager/taskManager.ts +++ b/app/api/tasksmanager/taskManager.ts @@ -6,14 +6,21 @@ export interface Task { task: string; } +export interface Materials { + data: string; +} + export class TaskManager { private rsmq: RedisSMQ; private queueName: string; - constructor(rsmq: RedisSMQ, queueName: string) { + private serviceUrl: string; + + constructor(rsmq: RedisSMQ, queueName: string, serviceUrl: string) { this.rsmq = rsmq; this.queueName = queueName; + this.serviceUrl = serviceUrl; } async initQueue() { @@ -26,8 +33,11 @@ export class TaskManager { } } - async startTask(message: Task) { - await this.rsmq.sendMessageAsync({ qname: this.queueName, message: JSON.stringify(message) }); + async startTask(message: Task, materials?: Materials) { + await this.rsmq.sendMessageAsync({ + qname: this.queueName, + message: JSON.stringify(message), + }); } } From bd6c1a81a29736437e971a3d1937b4ffa7497401 Mon Sep 17 00:00:00 2001 From: gabo Date: Tue, 7 Sep 2021 19:02:49 +0200 Subject: [PATCH 06/62] Task manager simple version working --- app/api/tasksmanager/RedisServer.ts | 3 +- app/api/tasksmanager/specs/DummyService.ts | 85 ------------ .../specs/ExternalDummyService.ts | 127 ++++++++++++++++++ app/api/tasksmanager/specs/blank.pdf | Bin 0 -> 798 bytes .../tasksmanager/specs/taskManager.spec.ts | 78 +++++++++-- app/api/tasksmanager/taskManager.ts | 92 ++++++++++--- 6 files changed, 266 insertions(+), 119 deletions(-) delete mode 100644 app/api/tasksmanager/specs/DummyService.ts create mode 100644 app/api/tasksmanager/specs/ExternalDummyService.ts create mode 100644 app/api/tasksmanager/specs/blank.pdf diff --git a/app/api/tasksmanager/RedisServer.ts b/app/api/tasksmanager/RedisServer.ts index b2e4f594e0..0a91486327 100644 --- a/app/api/tasksmanager/RedisServer.ts +++ b/app/api/tasksmanager/RedisServer.ts @@ -29,8 +29,7 @@ export class RedisServer { execSync( `cd redis cd redis-stable - make - make install` + make` ); } diff --git a/app/api/tasksmanager/specs/DummyService.ts b/app/api/tasksmanager/specs/DummyService.ts deleted file mode 100644 index 3690aa4124..0000000000 --- a/app/api/tasksmanager/specs/DummyService.ts +++ /dev/null @@ -1,85 +0,0 @@ -import express from 'express'; -import RedisSMQ from 'rsmq'; -import Redis from 'redis'; -import { Server } from 'http'; - -export class DummyService { - private app: express.Application; - - port: number; - - private rsmqClient: RedisSMQ | undefined; - - queueName = 'KonzNGaboHellKitchen'; - - private server: Server | undefined; - - currentTask: string | undefined; - - materials: string[] = []; - - constructor(port: number) { - this.port = port; - this.app = express(); - - this.app.post('/materials', (_req, res) => { - this.materials.push(_req.body.material); - res.send('ok'); - }); - } - - get rsmq() { - if (!this.rsmqClient) { - throw new Error('rsmq is not initialized'); - } - return this.rsmqClient; - } - - async initQueue() { - try { - await this.rsmq.deleteQueueAsync({ qname: this.queueName }); - } catch (err) { - console.log(err); - if (err.name !== 'queueNotFound') { - throw err; - } - } - - try { - await this.rsmq.createQueueAsync({ qname: this.queueName }); - } catch (err) { - if (err.name !== 'queueExists') { - throw err; - } - } - } - - async read() { - try { - const { qname, message } = await this.rsmq.receiveMessageAsync({ - qname: this.queueName, - }); - this.currentTask = message; - return message; - } catch (e) { - console.log(e); - } - } - - async start(client: Redis.RedisClient) { - this.rsmqClient = await new RedisSMQ({ client }); - await this.initQueue(); - - const start = new Promise(resolve => { - this.server = this.app.listen(this.port, () => { - resolve(); - }); - }); - - return start; - } - - async stop() { - await this.server?.close(); - } -} diff --git a/app/api/tasksmanager/specs/ExternalDummyService.ts b/app/api/tasksmanager/specs/ExternalDummyService.ts new file mode 100644 index 0000000000..9c726a3076 --- /dev/null +++ b/app/api/tasksmanager/specs/ExternalDummyService.ts @@ -0,0 +1,127 @@ +import express from 'express'; +import RedisSMQ from 'rsmq'; +import Redis from 'redis'; +import { Server } from 'http'; +import bodyParser from 'body-parser'; +import { uploadMiddleware } from 'api/files'; + +export class ExternalDummyService { + private app: express.Application; + + private readonly port: number; + + private redisSMQ: RedisSMQ | undefined; + + serviceName = 'KonzNGaboHellKitchen'; + + private server: Server | undefined; + + currentTask: string | undefined; + + materials: string[] = []; + + files: Buffer[] = []; + + results: object | undefined; + + constructor(port: number) { + this.port = port; + this.app = express(); + this.app.use(bodyParser.json()); + + this.app.post('/data', (req, res) => { + this.materials.push(req.body); + res.send('ok'); + }); + + this.app.post('/files', uploadMiddleware.multiple(), (req, res) => { + if (req.files.length) { + const files = req.files as { buffer: Buffer }[]; + this.files.push(files[0].buffer); + } + res.send('received'); + }); + + this.app.get('/results', (_req, res) => { + res.send(this.results); + }); + } + + setResults(results: object) { + this.results = results; + } + + get rsmq() { + if (!this.redisSMQ) { + throw new Error('rsmq is not initialized'); + } + return this.redisSMQ; + } + + async initQueue() { + try { + await this.rsmq.deleteQueueAsync({ qname: `${this.serviceName}_tasks` }); + } catch (err) { + console.log(err); + if (err.name !== 'queueNotFound') { + throw err; + } + } + try { + await this.rsmq.deleteQueueAsync({ qname: `${this.serviceName}_results` }); + } catch (err) { + console.log(err); + if (err.name !== 'queueNotFound') { + throw err; + } + } + + try { + await this.rsmq.createQueueAsync({ qname: `${this.serviceName}_tasks` }); + } catch (err) { + if (err.name !== 'queueExists') { + throw err; + } + } + + try { + await this.rsmq.createQueueAsync({ qname: `${this.serviceName}_results` }); + } catch (err) { + if (err.name !== 'queueExists') { + throw err; + } + } + } + + async read() { + const { qname, message } = await this.rsmq.receiveMessageAsync({ + qname: this.serviceName + '_tasks', + }); + this.currentTask = message; + return message; + } + + async start(client: Redis.RedisClient) { + this.redisSMQ = await new RedisSMQ({ client }); + await this.initQueue(); + + const start = new Promise(resolve => { + this.server = this.app.listen(this.port, () => { + resolve(); + }); + }); + + return start; + } + + async stop() { + await this.server?.close(); + } + + async sendFinishedMessage(task: { task: string; tenant: string }) { + await this.rsmq.sendMessageAsync({ + qname: `${this.serviceName}_results`, + message: JSON.stringify(task), + }); + } +} diff --git a/app/api/tasksmanager/specs/blank.pdf b/app/api/tasksmanager/specs/blank.pdf new file mode 100644 index 0000000000000000000000000000000000000000..02f8ffa7aa95c8ace1377f825a1ddb4496cbe9fb GIT binary patch literal 798 zcmZWnJ#Q015Dk`s&=ML566H`>ip9s?`R@E7MONfbS;SzSoyu5Zxm!Dz+}$R7Yh-As z==lL@kV3&vprimHK`MGABpM5ej)sa^-YrAJV&C}!c%z^Y;j z0|UW=nTKLIk0mt`E8IW=+9j9Jo{|$!b(x%l1}AD}(~A4GnC^BLVu)qwyqBOscsGr&J)> IY~xY$A4aLk literal 0 HcmV?d00001 diff --git a/app/api/tasksmanager/specs/taskManager.spec.ts b/app/api/tasksmanager/specs/taskManager.spec.ts index 383a1dda2d..d25dd5517f 100644 --- a/app/api/tasksmanager/specs/taskManager.spec.ts +++ b/app/api/tasksmanager/specs/taskManager.spec.ts @@ -1,25 +1,33 @@ -import { TaskManagerFactory, TaskManager } from 'api/tasksmanager/TaskManager'; +import fs from 'fs'; + +import { TaskManagerFactory, TaskManager, Service } from 'api/tasksmanager/taskManager'; -import { RedisServer } from '../RedisServer'; -import { DummyService } from './DummyService'; import Redis from 'redis'; +import { RedisServer } from '../RedisServer'; +import { ExternalDummyService } from './ExternalDummyService'; describe('taskManager', () => { let taskManager: TaskManager; - let queueName: string; + let service: Service; let redisServer: RedisServer; let client: Redis.RedisClient; - let externalDummyService: DummyService; + let externalDummyService: ExternalDummyService; beforeAll(async () => { - queueName = 'KonzNGaboHellKitchen'; + service = { + serviceName: 'KonzNGaboHellKitchen', + dataUrl: 'http://localhost:1234/data', + filesUrl: 'http://localhost:1234/files', + resultsUrl: 'http://localhost:1234/results', + }; + redisServer = new RedisServer(); await redisServer.start(); client = await Redis.createClient('redis://localhost:6379'); - taskManager = await TaskManagerFactory.create(client, queueName); + taskManager = await TaskManagerFactory.create(client, service); - externalDummyService = new DummyService(1234); + externalDummyService = new ExternalDummyService(1234); await externalDummyService.start(client); }); @@ -71,12 +79,56 @@ describe('taskManager', () => { describe('sending materials', () => { it('should send materials to the service', async () => { - // const task = { task: 'doit', tenant: 'test' }; - // const materials = { data: '{"someData": "someValue"}' }; - // await taskManager.startTask(task, materials); - // await externalDummyService.read(); - // expect(externalDummyService.materials[0]).toEqual(materials); + const task = { task: 'Tofu', tenant: 'Me' }; + const materials1 = { someData: 3 }; + const materials2 = { someData: 2 }; + const materials3 = { someData: 3 }; + await taskManager.sendJSON(materials1); + await taskManager.sendJSON(materials2); + await taskManager.sendJSON(materials3); + + await taskManager.startTask(task); + await externalDummyService.read(); + + expect(externalDummyService.materials.length).toEqual(3); + expect(externalDummyService.materials[0]).toEqual(materials1); + expect(externalDummyService.materials[1]).toEqual(materials2); + expect(externalDummyService.materials[2]).toEqual(materials3); + }); + + it('should send files to the service', async () => { + const task = { task: 'make_food', tenant: 'test' }; + + const file = fs.readFileSync('app/api/tasksmanager/specs/blank.pdf'); + + await taskManager.sendFile(file); + await taskManager.sendFile(file); + await taskManager.sendFile(file); + + await taskManager.startTask(task); + await externalDummyService.read(); + + expect(externalDummyService.files.length).toEqual(3); + expect(externalDummyService.files[0]).toEqual(file); + expect(externalDummyService.files[1]).toEqual(file); + expect(externalDummyService.files[2]).toEqual(file); }); }); + + it('should get the results when the task has been finished', async done => { + taskManager.end(); + + const expectedResults = { results: 'Paella' }; + const expectFunction = (results: object) => { + console.log('almost there'); + expect(results).toEqual(expectedResults); + done(); + }; + await TaskManagerFactory.create(client, service, expectFunction); + + const task = { task: 'make_food', tenant: 'test' }; + externalDummyService.setResults(expectedResults); + await externalDummyService.sendFinishedMessage(task); + }); }); }); diff --git a/app/api/tasksmanager/taskManager.ts b/app/api/tasksmanager/taskManager.ts index 4b6eaf27d1..337e27938d 100644 --- a/app/api/tasksmanager/taskManager.ts +++ b/app/api/tasksmanager/taskManager.ts @@ -1,50 +1,104 @@ -import RedisSMQ from 'rsmq'; +import RedisSMQ, { QueueMessage } from 'rsmq'; import Redis from 'redis'; +import request from 'shared/JSONRequest'; -export interface Task { +export interface TaskMessage { tenant: string; task: string; } -export interface Materials { - data: string; +export interface Service { + serviceName: string; + filesUrl: string; + dataUrl: string; + resultsUrl: string; } export class TaskManager { - private rsmq: RedisSMQ; + private redisSMQ: RedisSMQ; - private queueName: string; + private readonly service: Service; - private serviceUrl: string; + private readonly taskQueue: string; - constructor(rsmq: RedisSMQ, queueName: string, serviceUrl: string) { - this.rsmq = rsmq; - this.queueName = queueName; - this.serviceUrl = serviceUrl; + private readonly resultsQueue: string; + + private readonly processResults?: (results: object) => void; + + private listeningToQueue: NodeJS.Timeout | undefined; + + constructor(redisSMQ: RedisSMQ, service: Service, processResults?: (results: object) => void) { + this.redisSMQ = redisSMQ; + this.service = service; + this.processResults = processResults; + this.taskQueue = `${service.serviceName}_tasks`; + this.resultsQueue = `${service.serviceName}_results`; } async initQueue() { try { - await this.rsmq.createQueueAsync({ qname: this.queueName }); + await this.redisSMQ.createQueueAsync({ qname: this.taskQueue }); + } catch (err) { + if (err.name !== 'queueExists') { + throw err; + } + } + try { + await this.redisSMQ.createQueueAsync({ qname: this.resultsQueue }); } catch (err) { if (err.name !== 'queueExists') { throw err; } } + + this.listeningToQueue = setInterval(() => { + this.redisSMQ.receiveMessage({ qname: this.resultsQueue }, async (err, resp) => { + if (err) { + return; + } + + const message = resp as QueueMessage; + + if (message.id) { + if (this.processResults) { + const results = await request.get(this.service.resultsUrl, JSON.parse(message.message)); + this.processResults(results.json); + } + } + }); + }, 1000); } - async startTask(message: Task, materials?: Materials) { - await this.rsmq.sendMessageAsync({ - qname: this.queueName, - message: JSON.stringify(message), + async startTask(taskMessage: TaskMessage) { + await this.redisSMQ.sendMessageAsync({ + qname: this.taskQueue, + message: JSON.stringify(taskMessage), }); } + + async sendJSON(data: object) { + await request.post(this.service.dataUrl, data); + } + + async sendFile(file: Buffer) { + await request.uploadFile(this.service.filesUrl, 'blank.pdf', file); + } + + end() { + if (this.listeningToQueue) { + clearInterval(this.listeningToQueue); + } + } } export const TaskManagerFactory = { - create: async (redis: Redis.RedisClient, queueName: string) => { - const rsmq = await new RedisSMQ({ client: redis }); - const manager = await new TaskManager(rsmq, queueName); + create: async ( + redis: Redis.RedisClient, + service: Service, + processResults?: (results: object) => void + ) => { + const redisSMQ = await new RedisSMQ({ client: redis }); + const manager = await new TaskManager(redisSMQ, service, processResults); await manager.initQueue(); return manager; }, From 563ed501d307f10999e02038a3fdebf84079af1e Mon Sep 17 00:00:00 2001 From: Alberto Casado Torres Date: Wed, 8 Sep 2021 14:14:51 +0200 Subject: [PATCH 07/62] refactor to the Repeater to be a class and dont share the stop variable --- app/api/utils/Repeater.js | 33 ++++++++++------ app/api/utils/specs/Repeater.spec.js | 56 +++++++++++++++++++++------- app/server.js | 13 ++++--- 3 files changed, 71 insertions(+), 31 deletions(-) diff --git a/app/api/utils/Repeater.js b/app/api/utils/Repeater.js index bb91f06df9..379535ca68 100644 --- a/app/api/utils/Repeater.js +++ b/app/api/utils/Repeater.js @@ -3,18 +3,27 @@ const timeout = async interval => setTimeout(resolve, interval); }); -export default { - stopped: false, +export class Repeater { + constructor(cb, interval) { + this.cb = cb; + this.interval = interval; + this.stopped = null; + } - async start(cb, interval) { - if (!this.stopped) { - await cb(); - await timeout(interval); - await this.start(cb, interval); + async start() { + if (this.stopped) { + this.stopped(); + return; } - }, - stop() { - this.stopped = true; - }, -}; + await this.cb(); + await timeout(this.interval); + await this.start(this.cb, this.interval); + } + + async stop() { + return new Promise(resolve => { + this.stopped = resolve; + }); + } +} diff --git a/app/api/utils/specs/Repeater.spec.js b/app/api/utils/specs/Repeater.spec.js index 745ea67b9c..f8fc97bfb7 100644 --- a/app/api/utils/specs/Repeater.spec.js +++ b/app/api/utils/specs/Repeater.spec.js @@ -1,21 +1,43 @@ -import repeater from '../Repeater'; +import { Repeater } from '../Repeater'; -describe('repeat', () => { - let callback; - let counter = 0; - const stopOn = 15; +describe('Repeater', () => { + let callbackOne; + let callbackTwo; + let counterOne = 0; + let counterTwo = 0; + const stopOnOne = 15; + const stopOnTwo = 20; + let repeaterOne; + let repeaterTwo; beforeEach(() => { - counter = 1; - callback = jasmine.createSpy('callback').and.callFake( + counterOne = 1; + counterTwo = 1; + + callbackOne = jasmine.createSpy('callbackone').and.callFake( () => new Promise(resolve => { setTimeout(() => { - if (counter === stopOn) { + if (counterOne === stopOnOne) { resolve(); - repeater.stop(); + repeaterOne.stop(); } else { - counter += 1; + counterOne += 1; + resolve(); + } + }, 1); + }) + ); + + callbackTwo = jasmine.createSpy('callbacktwo').and.callFake( + () => + new Promise(resolve => { + setTimeout(() => { + if (counterTwo === stopOnTwo) { + resolve(); + repeaterTwo.stop(); + } else { + counterTwo += 1; resolve(); } }, 1); @@ -23,9 +45,15 @@ describe('repeat', () => { ); }); - it('should repeat callback call when callback finishes', async () => { - await repeater.start(callback, 0); - expect(callback).toHaveBeenCalledTimes(stopOn); - expect(counter).toBe(stopOn); + it('should be able to have two independant repeaters', async () => { + repeaterOne = new Repeater(callbackOne, 1); + await repeaterOne.start(); + expect(callbackOne).toHaveBeenCalledTimes(stopOnOne); + expect(counterOne).toBe(stopOnOne); + + repeaterTwo = new Repeater(callbackTwo, 1); + await repeaterTwo.start(); + expect(callbackTwo).toHaveBeenCalledTimes(stopOnTwo); + expect(counterTwo).toBe(stopOnTwo); }); }); diff --git a/app/server.js b/app/server.js index 8b35e7cf5e..fe84bc760e 100644 --- a/app/server.js +++ b/app/server.js @@ -24,8 +24,8 @@ import { migrator } from './api/migrations/migrator'; import settings from './api/settings'; import syncWorker from './api/sync/syncWorker'; import errorHandlingMiddleware from './api/utils/error_handling_middleware'; -import { handleError } from './api/utils'; -import repeater from './api/utils/Repeater'; +import handleError from './api/utils/handleError.js'; +import { Repeater } from './api/utils/Repeater'; import serverRenderingRoutes from './react/server.js'; import { DB } from './api/odm'; import { tenants } from './api/tenants/tenantContext'; @@ -126,19 +126,21 @@ DB.connect(config.DBHOST, dbAuth).then(async () => { const { evidencesVault, features } = await settings.get(); if (evidencesVault && evidencesVault.token && evidencesVault.template) { console.info('==> 📥 evidences vault config detected, started sync ....'); - repeater.start( + const vaultSyncRepeater = new Repeater( () => vaultSync.sync(evidencesVault.token, evidencesVault.template), 10000 ); + vaultSyncRepeater.start(); } if (features && features.tocGeneration && features.tocGeneration.url) { console.info('==> 🗂️ automatically generating TOCs using external service'); const service = tocService(features.tocGeneration.url); - repeater.start(() => service.processNext(), 10000); + const tocServiceRepeater = new Repeater(() => service.processNext(), 10000); + tocServiceRepeater.start(); } - repeater.start( + const topicClassificationRepeater = new Repeater( () => TaskProvider.runAndWait('TopicClassificationSync', 'TopicClassificationSync', { mode: 'onlynew', @@ -147,6 +149,7 @@ DB.connect(config.DBHOST, dbAuth).then(async () => { }), 10000 ); + topicClassificationRepeater.start(); } }); From 7c5ca675377e80ac26e77fc032a81090907aff45 Mon Sep 17 00:00:00 2001 From: Alberto Casado Torres Date: Wed, 8 Sep 2021 14:16:26 +0200 Subject: [PATCH 08/62] TaskManage using Repeater to listen for messages --- .../tasksmanager/specs/taskManager.spec.ts | 69 +++++++++---------- app/api/tasksmanager/taskManager.ts | 40 +++++------ 2 files changed, 50 insertions(+), 59 deletions(-) diff --git a/app/api/tasksmanager/specs/taskManager.spec.ts b/app/api/tasksmanager/specs/taskManager.spec.ts index d25dd5517f..42b9ad383b 100644 --- a/app/api/tasksmanager/specs/taskManager.spec.ts +++ b/app/api/tasksmanager/specs/taskManager.spec.ts @@ -32,6 +32,7 @@ describe('taskManager', () => { }); afterAll(async () => { + await taskManager.stop(); await externalDummyService.stop(); await client.end(true); await redisServer.stop(); @@ -76,55 +77,47 @@ describe('taskManager', () => { expect(message).toBe('{"task":"Ribs","tenant":"Fede"}'); }); }); + }); - describe('sending materials', () => { - it('should send materials to the service', async () => { - const task = { task: 'Tofu', tenant: 'Me' }; - const materials1 = { someData: 3 }; - const materials2 = { someData: 2 }; - const materials3 = { someData: 3 }; - await taskManager.sendJSON(materials1); - await taskManager.sendJSON(materials2); - await taskManager.sendJSON(materials3); - - await taskManager.startTask(task); - await externalDummyService.read(); - - expect(externalDummyService.materials.length).toEqual(3); - expect(externalDummyService.materials[0]).toEqual(materials1); - expect(externalDummyService.materials[1]).toEqual(materials2); - expect(externalDummyService.materials[2]).toEqual(materials3); - }); - - it('should send files to the service', async () => { - const task = { task: 'make_food', tenant: 'test' }; - - const file = fs.readFileSync('app/api/tasksmanager/specs/blank.pdf'); + describe('sending materials', () => { + it('should send materials to the service', async () => { + const materials1 = { someData: 3 }; + const materials2 = { someData: 2 }; + const materials3 = { someData: 3 }; + await taskManager.sendJSON(materials1); + await taskManager.sendJSON(materials2); + await taskManager.sendJSON(materials3); + + expect(externalDummyService.materials.length).toEqual(3); + expect(externalDummyService.materials[0]).toEqual(materials1); + expect(externalDummyService.materials[1]).toEqual(materials2); + expect(externalDummyService.materials[2]).toEqual(materials3); + }); - await taskManager.sendFile(file); - await taskManager.sendFile(file); - await taskManager.sendFile(file); + it('should send files to the service', async () => { + const file = fs.readFileSync('app/api/tasksmanager/specs/blank.pdf'); - await taskManager.startTask(task); - await externalDummyService.read(); + await taskManager.sendFile(file); + await taskManager.sendFile(file); + await taskManager.sendFile(file); - expect(externalDummyService.files.length).toEqual(3); - expect(externalDummyService.files[0]).toEqual(file); - expect(externalDummyService.files[1]).toEqual(file); - expect(externalDummyService.files[2]).toEqual(file); - }); + expect(externalDummyService.files.length).toEqual(3); + expect(externalDummyService.files[0]).toEqual(file); + expect(externalDummyService.files[1]).toEqual(file); + expect(externalDummyService.files[2]).toEqual(file); }); + }); - it('should get the results when the task has been finished', async done => { - taskManager.end(); - + describe('when the task finishes', () => { + it('should get the results', async done => { const expectedResults = { results: 'Paella' }; const expectFunction = (results: object) => { - console.log('almost there'); expect(results).toEqual(expectedResults); done(); }; - await TaskManagerFactory.create(client, service, expectFunction); + + await taskManager.stop(); + taskManager = await TaskManagerFactory.create(client, service, expectFunction); const task = { task: 'make_food', tenant: 'test' }; externalDummyService.setResults(expectedResults); diff --git a/app/api/tasksmanager/taskManager.ts b/app/api/tasksmanager/taskManager.ts index 337e27938d..88f5a69fa4 100644 --- a/app/api/tasksmanager/taskManager.ts +++ b/app/api/tasksmanager/taskManager.ts @@ -1,6 +1,7 @@ import RedisSMQ, { QueueMessage } from 'rsmq'; import Redis from 'redis'; import request from 'shared/JSONRequest'; +import { Repeater } from 'api/utils/Repeater'; export interface TaskMessage { tenant: string; @@ -25,7 +26,7 @@ export class TaskManager { private readonly processResults?: (results: object) => void; - private listeningToQueue: NodeJS.Timeout | undefined; + private repeater: Repeater; constructor(redisSMQ: RedisSMQ, service: Service, processResults?: (results: object) => void) { this.redisSMQ = redisSMQ; @@ -33,6 +34,7 @@ export class TaskManager { this.processResults = processResults; this.taskQueue = `${service.serviceName}_tasks`; this.resultsQueue = `${service.serviceName}_results`; + this.repeater = new Repeater(this.receiveMessage.bind(this), 1000); } async initQueue() { @@ -51,22 +53,20 @@ export class TaskManager { } } - this.listeningToQueue = setInterval(() => { - this.redisSMQ.receiveMessage({ qname: this.resultsQueue }, async (err, resp) => { - if (err) { - return; - } - - const message = resp as QueueMessage; - - if (message.id) { - if (this.processResults) { - const results = await request.get(this.service.resultsUrl, JSON.parse(message.message)); - this.processResults(results.json); - } - } - }); - }, 1000); + this.repeater.start(); + } + + async receiveMessage() { + const message = (await this.redisSMQ.receiveMessageAsync({ + qname: this.resultsQueue, + })) as QueueMessage; + + if (message.id) { + if (this.processResults) { + const results = await request.get(this.service.resultsUrl, JSON.parse(message.message)); + this.processResults(results.json); + } + } } async startTask(taskMessage: TaskMessage) { @@ -84,10 +84,8 @@ export class TaskManager { await request.uploadFile(this.service.filesUrl, 'blank.pdf', file); } - end() { - if (this.listeningToQueue) { - clearInterval(this.listeningToQueue); - } + async stop() { + await this.repeater.stop(); } } From 84155289f94d62ae8bb1d9f5f8973b88c713a301 Mon Sep 17 00:00:00 2001 From: Alberto Casado Torres Date: Thu, 9 Sep 2021 11:28:41 +0200 Subject: [PATCH 09/62] Refactored Repeater specs to work with fakeTimers --- app/api/utils/specs/Repeater.spec.js | 70 +++++++++++----------------- 1 file changed, 27 insertions(+), 43 deletions(-) diff --git a/app/api/utils/specs/Repeater.spec.js b/app/api/utils/specs/Repeater.spec.js index f8fc97bfb7..83e846f9ed 100644 --- a/app/api/utils/specs/Repeater.spec.js +++ b/app/api/utils/specs/Repeater.spec.js @@ -3,57 +3,41 @@ import { Repeater } from '../Repeater'; describe('Repeater', () => { let callbackOne; let callbackTwo; - let counterOne = 0; - let counterTwo = 0; - const stopOnOne = 15; - const stopOnTwo = 20; + let repeaterOne; let repeaterTwo; + // one does not simply test timeouts + function advanceTime(time) { + jest.advanceTimersByTime(time); + return new Promise(resolve => setImmediate(resolve)); + } + + afterEach(() => { + jest.useRealTimers(); + }); + beforeEach(() => { - counterOne = 1; - counterTwo = 1; - - callbackOne = jasmine.createSpy('callbackone').and.callFake( - () => - new Promise(resolve => { - setTimeout(() => { - if (counterOne === stopOnOne) { - resolve(); - repeaterOne.stop(); - } else { - counterOne += 1; - resolve(); - } - }, 1); - }) - ); - - callbackTwo = jasmine.createSpy('callbacktwo').and.callFake( - () => - new Promise(resolve => { - setTimeout(() => { - if (counterTwo === stopOnTwo) { - resolve(); - repeaterTwo.stop(); - } else { - counterTwo += 1; - resolve(); - } - }, 1); - }) - ); + jest.useFakeTimers(); + + callbackOne = jasmine.createSpy('callbackone').and.callFake(() => Promise.resolve()); + callbackTwo = jasmine.createSpy('callbackone').and.callFake(() => Promise.resolve()); }); it('should be able to have two independant repeaters', async () => { repeaterOne = new Repeater(callbackOne, 1); - await repeaterOne.start(); - expect(callbackOne).toHaveBeenCalledTimes(stopOnOne); - expect(counterOne).toBe(stopOnOne); - repeaterTwo = new Repeater(callbackTwo, 1); - await repeaterTwo.start(); - expect(callbackTwo).toHaveBeenCalledTimes(stopOnTwo); - expect(counterTwo).toBe(stopOnTwo); + + repeaterTwo.start(); + repeaterOne.start(); + + await advanceTime(1); + + repeaterOne.stop(); + + await advanceTime(1); + + expect(callbackOne).toHaveBeenCalledTimes(1); + expect(callbackTwo).toHaveBeenCalledTimes(2); }); }); From 7d8334f6f3e17c844d15240c97c1f66f1b757cdc Mon Sep 17 00:00:00 2001 From: Alberto Casado Torres Date: Thu, 9 Sep 2021 13:41:40 +0200 Subject: [PATCH 10/62] RepeaterWithLock for locking distributed Uwazi tasks --- app/api/tasksmanager/RepeatWithLock.ts | 53 +++++++++++++++++ .../tasksmanager/specs/repeatWithLock.spec.js | 58 +++++++++++++++++++ package.json | 2 + yarn.lock | 21 ++++++- 4 files changed, 133 insertions(+), 1 deletion(-) create mode 100644 app/api/tasksmanager/RepeatWithLock.ts create mode 100644 app/api/tasksmanager/specs/repeatWithLock.spec.js diff --git a/app/api/tasksmanager/RepeatWithLock.ts b/app/api/tasksmanager/RepeatWithLock.ts new file mode 100644 index 0000000000..64578277ef --- /dev/null +++ b/app/api/tasksmanager/RepeatWithLock.ts @@ -0,0 +1,53 @@ +import Redis from 'redis'; +import Redlock from 'redlock'; + +export class RepeatWithLock { + private lockName: string; + + private task: () => void; + + private redlock: Redlock | undefined; + + private stopTask: ((value: unknown) => void) | undefined; + + private redisClient: Redis.RedisClient | undefined; + + constructor(lockName: string, task: () => void) { + this.lockName = `locks:${lockName}`; + this.task = task; + } + + async start() { + this.redisClient = await Redis.createClient('redis://localhost:6379'); + this.redlock = await new Redlock([this.redisClient]); + + this.redlock.on('clientError', function(err) { + console.error('A redis error has occurred:', err); + }); + + this.lockTask(); + } + + async stop() { + await new Promise(resolve => { + this.stopTask = resolve; + }); + + await this.redlock?.quit(); + await this.redisClient?.end(true); + } + + lockTask() { + this.redlock!.lock(this.lockName, 10000).then(async lock => { + if (this.stopTask) { + await lock.unlock(); + this.stopTask(true); + return; + } + + await this.task(); + await lock.unlock(); + this.lockTask(); + }); + } +} diff --git a/app/api/tasksmanager/specs/repeatWithLock.spec.js b/app/api/tasksmanager/specs/repeatWithLock.spec.js new file mode 100644 index 0000000000..a0de110352 --- /dev/null +++ b/app/api/tasksmanager/specs/repeatWithLock.spec.js @@ -0,0 +1,58 @@ +import { RepeatWithLock } from '../RepeatWithLock'; +import { RedisServer } from '../RedisServer'; + +describe('RepeatWithLock', () => { + let finishTask; + let task; + let redisServer; + let nodeOne; + let nodeTwo; + + const waitForLockDistribution = async () => { + if (finishTask) { + finishTask(); + } + return new Promise(r => setTimeout(r, 100)); + }; + + beforeAll(async () => { + redisServer = new RedisServer(); + await redisServer.start(); + }); + + afterAll(async () => { + await redisServer.stop(); + }); + + beforeEach(async () => { + task = jasmine.createSpy('callbackone').and.callFake( + () => + new Promise(resolve => { + finishTask = () => { + resolve(); + }; + }) + ); + + nodeOne = new RepeatWithLock('my_locked_task', task); + nodeTwo = new RepeatWithLock('my_locked_task', task); + await nodeOne.start(); + await nodeTwo.start(); + }); + + it('should run the task one at a time', async () => { + await waitForLockDistribution(); + expect(task).toHaveBeenCalledTimes(1); + + await waitForLockDistribution(); + expect(task).toHaveBeenCalledTimes(2); + + await waitForLockDistribution(); + expect(task).toHaveBeenCalledTimes(3); + finishTask(); + + await Promise.all([nodeOne.stop(), nodeTwo.stop()]); + + finishTask(); + }); +}); diff --git a/package.json b/package.json index 67f8f66e84..9b2722161d 100644 --- a/package.json +++ b/package.json @@ -81,6 +81,7 @@ "@types/react-dropzone": "^4.2.2", "@types/react-modal": "^3.12.0", "@types/redis": "^2.8.31", + "@types/redlock": "^4.0.2", "ajv": "^6.12.3", "ajv-keywords": "^3.4.1", "async": "2.6.3", @@ -164,6 +165,7 @@ "redis-mock": "^0.56.3", "redis": "^3.1.2", "redis-server": "^1.2.2", + "redlock": "^4.2.0", "redux": "^3.7.2", "redux-devtools-extension": "^2.13.2", "redux-thunk": "^2.0.1", diff --git a/yarn.lock b/yarn.lock index 4bb5c089e5..e5972bad4a 100644 --- a/yarn.lock +++ b/yarn.lock @@ -1820,6 +1820,11 @@ dependencies: "@babel/types" "^7.3.0" +"@types/bluebird@*": + version "3.5.36" + resolved "https://registry.yarnpkg.com/@types/bluebird/-/bluebird-3.5.36.tgz#00d9301d4dc35c2f6465a8aec634bb533674c652" + integrity sha512-HBNx4lhkxN7bx6P0++W8E289foSu8kO8GCk2unhuVggO+cE7rh9DhZUyPhUxNRG9m+5B5BTKxZQ5ZP92x/mx9Q== + "@types/body-parser@*": version "1.17.1" resolved "https://registry.yarnpkg.com/@types/body-parser/-/body-parser-1.17.1.tgz#18fcf61768fb5c30ccc508c21d6fd2e8b3bf7897" @@ -2344,6 +2349,13 @@ dependencies: "@types/node" "*" +"@types/redlock@^4.0.2": + version "4.0.2" + resolved "https://registry.yarnpkg.com/@types/redlock/-/redlock-4.0.2.tgz#3ae94bc236d4cff12815b62b9b9e51a8e7f9f25f" + integrity sha512-3MMTCWOXrfQFU8dLAbQWDOsftnFagQxmUkfR5KK/DB/zKPUh0ZzPFkNV84nfw1yMFYLfd4MgITGT+XolYd8d1w== + dependencies: + "@types/bluebird" "*" + "@types/redux-mock-store@^1.0.1": version "1.0.1" resolved "https://registry.yarnpkg.com/@types/redux-mock-store/-/redux-mock-store-1.0.1.tgz#90ca701d640aef7c007f564a9a4f8dc03180b0f7" @@ -3380,7 +3392,7 @@ bluebird@3.5.1, bluebird@^3.5.1: version "3.5.1" resolved "https://registry.yarnpkg.com/bluebird/-/bluebird-3.5.1.tgz#d9551f9de98f1fcda1e683d17ee91a0602ee2eb9" -bluebird@^3.5.5: +bluebird@^3.5.5, bluebird@^3.7.2: version "3.7.2" resolved "https://registry.yarnpkg.com/bluebird/-/bluebird-3.7.2.tgz#9f229c15be272454ffa973ace0dbee79a1b0c36f" integrity sha512-XpNj6GDQzdfW+r2Wnn7xiSAd7TM3jzkxGXBGTtWKuSXv1xUV+azxAm8jdWZN06QTQk+2N2XB9jRDkvbmQmcRtg== @@ -12646,6 +12658,13 @@ redis@^3.1.2: redis-errors "^1.2.0" redis-parser "^3.0.0" +redlock@^4.2.0: + version "4.2.0" + resolved "https://registry.yarnpkg.com/redlock/-/redlock-4.2.0.tgz#c26590768559afd5fff76aa1133c94b411ff4f5f" + integrity sha512-j+oQlG+dOwcetUt2WJWttu4CZVeRzUrcVcISFmEmfyuwCVSJ93rDT7YSgg7H7rnxwoRyk/jU46kycVka5tW7jA== + dependencies: + bluebird "^3.7.2" + reduce-css-calc@~1.3.0: version "1.3.0" resolved "https://registry.yarnpkg.com/reduce-css-calc/-/reduce-css-calc-1.3.0.tgz#747c914e049614a4c9cfbba629871ad1d2927716" From e65abf4d844192b103e7f6213701400569f58f03 Mon Sep 17 00:00:00 2001 From: gabo Date: Fri, 10 Sep 2021 12:00:35 +0200 Subject: [PATCH 11/62] Repeat with lock handles redis unavailable --- app/api/tasksmanager/RedisServer.ts | 6 +- app/api/tasksmanager/RepeatWithLock.ts | 27 +++- .../tasksmanager/specs/repeatWithLock.spec.js | 148 ++++++++++++++---- 3 files changed, 145 insertions(+), 36 deletions(-) diff --git a/app/api/tasksmanager/RedisServer.ts b/app/api/tasksmanager/RedisServer.ts index 0a91486327..d58c912606 100644 --- a/app/api/tasksmanager/RedisServer.ts +++ b/app/api/tasksmanager/RedisServer.ts @@ -38,7 +38,11 @@ export class RedisServer { port: 6379, bin: this.pathToBin, }); - await this.server.open(); + try { + await this.server.open(); + } catch (err) { + console.log(err); + } } async stop() { diff --git a/app/api/tasksmanager/RepeatWithLock.ts b/app/api/tasksmanager/RepeatWithLock.ts index 64578277ef..376c7bb043 100644 --- a/app/api/tasksmanager/RepeatWithLock.ts +++ b/app/api/tasksmanager/RepeatWithLock.ts @@ -12,7 +12,7 @@ export class RepeatWithLock { private redisClient: Redis.RedisClient | undefined; - constructor(lockName: string, task: () => void) { + constructor(lockName: string, task: () => void, ) { this.lockName = `locks:${lockName}`; this.task = task; } @@ -21,7 +21,14 @@ export class RepeatWithLock { this.redisClient = await Redis.createClient('redis://localhost:6379'); this.redlock = await new Redlock([this.redisClient]); - this.redlock.on('clientError', function(err) { + this.redisClient.on('error', async error => { + if (error.code !== 'ECONNREFUSED') { + throw error; + } + console.log(error); + }); + + this.redlock.on('error', err => { console.error('A redis error has occurred:', err); }); @@ -37,8 +44,10 @@ export class RepeatWithLock { await this.redisClient?.end(true); } - lockTask() { - this.redlock!.lock(this.lockName, 10000).then(async lock => { + async lockTask() { + try { + const lock = await this.redlock!.lock(this.lockName, 10000); + if (this.stopTask) { await lock.unlock(); this.stopTask(true); @@ -47,7 +56,13 @@ export class RepeatWithLock { await this.task(); await lock.unlock(); - this.lockTask(); - }); + } catch (error) { + if (error.name !== 'LockError') { + throw error; + } + } + + this.lockTask(); + } } diff --git a/app/api/tasksmanager/specs/repeatWithLock.spec.js b/app/api/tasksmanager/specs/repeatWithLock.spec.js index a0de110352..f7482e8630 100644 --- a/app/api/tasksmanager/specs/repeatWithLock.spec.js +++ b/app/api/tasksmanager/specs/repeatWithLock.spec.js @@ -1,3 +1,4 @@ +import waitForExpect from 'wait-for-expect'; import { RepeatWithLock } from '../RepeatWithLock'; import { RedisServer } from '../RedisServer'; @@ -5,54 +6,143 @@ describe('RepeatWithLock', () => { let finishTask; let task; let redisServer; - let nodeOne; - let nodeTwo; - - const waitForLockDistribution = async () => { - if (finishTask) { - finishTask(); - } - return new Promise(r => setTimeout(r, 100)); - }; beforeAll(async () => { redisServer = new RedisServer(); - await redisServer.start(); }); afterAll(async () => { - await redisServer.stop(); + if (redisServer.connect) { + await redisServer.end(); + } }); beforeEach(async () => { - task = jasmine.createSpy('callbackone').and.callFake( - () => - new Promise(resolve => { - finishTask = () => { - resolve(); - }; - }) - ); - - nodeOne = new RepeatWithLock('my_locked_task', task); - nodeTwo = new RepeatWithLock('my_locked_task', task); + task = jasmine.createSpy('callbackone').and.callFake(() => { + console.log('start'); + return new Promise(resolve => { + finishTask = () => { + resolve(); + console.log('end'); + + }; + }); + }); + }); + + it('should run the task one at a time', async () => { + await redisServer.start(); + + const nodeOne = new RepeatWithLock('my_locked_task', task); + const nodeTwo = new RepeatWithLock('my_locked_task', task); await nodeOne.start(); await nodeTwo.start(); + + await waitForExpect(async () => { + expect(task).toHaveBeenCalledTimes(1); + }); + + finishTask(); + + await waitForExpect(async () => { + expect(task).toHaveBeenCalledTimes(2); + }); + + finishTask(); + + await waitForExpect(async () => { + expect(task).toHaveBeenCalledTimes(3); + }); + + finishTask(); + + await nodeOne.stop(); + await nodeTwo.stop(); + + await redisServer.stop(); }); - it('should run the task one at a time', async () => { - await waitForLockDistribution(); + it('should execute task when the redis server is available', async () => { + const nodeOne = new RepeatWithLock('my_locked_task', task); + await nodeOne.start(); + + await waitForExpect(async () => { + expect(task).toHaveBeenCalledTimes(0); + }); + + await redisServer.start(); + + await waitForExpect(async () => { + expect(task).toHaveBeenCalledTimes(1); + }); + + finishTask(); + + await waitForExpect(async () => { + expect(task).toHaveBeenCalledTimes(2); + }); + + finishTask(); + + await nodeOne.stop(); + + await redisServer.stop(); + }); + + it('should continue executing tasks after redis was unavailable for a while', async () => { + await redisServer.start(); + + const nodeOne = new RepeatWithLock('my_locked_task', task); + await nodeOne.start(); + + await waitForExpect(async () => { + expect(task).toHaveBeenCalledTimes(1); + }); + + await redisServer.stop(); + + finishTask(); + + await new Promise(resolve => { + setTimeout(resolve, 200); + }); + expect(task).toHaveBeenCalledTimes(1); - await waitForLockDistribution(); - expect(task).toHaveBeenCalledTimes(2); + await redisServer.start(); + + await waitForExpect(async () => { + expect(task).toHaveBeenCalledTimes(2); + }); - await waitForLockDistribution(); - expect(task).toHaveBeenCalledTimes(3); finishTask(); - await Promise.all([nodeOne.stop(), nodeTwo.stop()]); + await nodeOne.stop(); + + await redisServer.stop(); + }); + + it('redlock', async () => { + await redisServer.start(); + + const nodeOne = new RepeatWithLock('my_locked_task', task); + const nodeTwo = new RepeatWithLock('my_locked_task', task); + + await nodeOne.start(); + await nodeTwo.start(); + + await new Promise(resolve => { + setTimeout(resolve, 3000); + }); finishTask(); + await nodeOne.stop(); + await new Promise(resolve => { + setTimeout(resolve, 500); + }); + finishTask(); + await nodeTwo.stop(); + + await redisServer.stop(); }); }); From abc31e16234371842702339d7e10c16f580c209e Mon Sep 17 00:00:00 2001 From: gabo Date: Mon, 13 Sep 2021 12:11:48 +0200 Subject: [PATCH 12/62] Handle error in task and add delay between tasks parameter in repeat with lock --- .../{RepeatWithLock.ts => RepeatWith.ts} | 33 +++-- .../tasksmanager/specs/repeatWithLock.spec.js | 118 +++++++++++++++--- app/api/utils/__mocks__/handleError.js | 1 + 3 files changed, 127 insertions(+), 25 deletions(-) rename app/api/tasksmanager/{RepeatWithLock.ts => RepeatWith.ts} (57%) create mode 100644 app/api/utils/__mocks__/handleError.js diff --git a/app/api/tasksmanager/RepeatWithLock.ts b/app/api/tasksmanager/RepeatWith.ts similarity index 57% rename from app/api/tasksmanager/RepeatWithLock.ts rename to app/api/tasksmanager/RepeatWith.ts index 376c7bb043..1d3ec51019 100644 --- a/app/api/tasksmanager/RepeatWithLock.ts +++ b/app/api/tasksmanager/RepeatWith.ts @@ -1,7 +1,8 @@ import Redis from 'redis'; import Redlock from 'redlock'; +import handleError from 'api/utils/handleError'; -export class RepeatWithLock { +export class RepeatWith { private lockName: string; private task: () => void; @@ -12,20 +13,30 @@ export class RepeatWithLock { private redisClient: Redis.RedisClient | undefined; - constructor(lockName: string, task: () => void, ) { + private maxLockTime: number; + + private delayTimeBetweenTasks: number; + + constructor( + lockName: string, + task: () => void, + maxLockTime: number = 10000, + delayTimeBetweenTasks = 0 + ) { + this.maxLockTime = maxLockTime; + this.delayTimeBetweenTasks = delayTimeBetweenTasks; this.lockName = `locks:${lockName}`; this.task = task; } async start() { this.redisClient = await Redis.createClient('redis://localhost:6379'); - this.redlock = await new Redlock([this.redisClient]); + this.redlock = await new Redlock([this.redisClient], { retryJitter: 0, retryDelay: 20 }); this.redisClient.on('error', async error => { if (error.code !== 'ECONNREFUSED') { throw error; } - console.log(error); }); this.redlock.on('error', err => { @@ -46,7 +57,7 @@ export class RepeatWithLock { async lockTask() { try { - const lock = await this.redlock!.lock(this.lockName, 10000); + const lock = await this.redlock!.lock(this.lockName, this.maxLockTime + this.delayTimeBetweenTasks); if (this.stopTask) { await lock.unlock(); @@ -54,15 +65,21 @@ export class RepeatWithLock { return; } - await this.task(); + try { + await this.task(); + } catch (error) { + handleError(error); + } + await new Promise(resolve => { + setTimeout(resolve, this.delayTimeBetweenTasks); + }); await lock.unlock(); } catch (error) { - if (error.name !== 'LockError') { + if (error && error.name !== 'LockError') { throw error; } } this.lockTask(); - } } diff --git a/app/api/tasksmanager/specs/repeatWithLock.spec.js b/app/api/tasksmanager/specs/repeatWithLock.spec.js index f7482e8630..d940557371 100644 --- a/app/api/tasksmanager/specs/repeatWithLock.spec.js +++ b/app/api/tasksmanager/specs/repeatWithLock.spec.js @@ -1,10 +1,16 @@ +// const handleErrorSpy = jest.mock('api/utils/handleError.js', () => {}); +// import handleError from 'api/utils/handleError'; +import handleError from 'api/utils/handleError'; import waitForExpect from 'wait-for-expect'; -import { RepeatWithLock } from '../RepeatWithLock'; +import { RepeatWith } from '../RepeatWith'; import { RedisServer } from '../RedisServer'; +jest.mock('api/utils/handleError.js', () => jest.fn()); + describe('RepeatWithLock', () => { let finishTask; let task; + let rejectTask; let redisServer; beforeAll(async () => { @@ -20,21 +26,27 @@ describe('RepeatWithLock', () => { beforeEach(async () => { task = jasmine.createSpy('callbackone').and.callFake(() => { console.log('start'); - return new Promise(resolve => { + return new Promise((resolve, reject) => { + rejectTask = reject; finishTask = () => { resolve(); console.log('end'); - }; }); }); }); + async function sleepTime(time) { + await new Promise(resolve => { + setTimeout(resolve, time); + }); + } + it('should run the task one at a time', async () => { await redisServer.start(); - const nodeOne = new RepeatWithLock('my_locked_task', task); - const nodeTwo = new RepeatWithLock('my_locked_task', task); + const nodeOne = new RepeatWith('my_locked_task', task); + const nodeTwo = new RepeatWith('my_locked_task', task); await nodeOne.start(); await nodeTwo.start(); @@ -63,7 +75,7 @@ describe('RepeatWithLock', () => { }); it('should execute task when the redis server is available', async () => { - const nodeOne = new RepeatWithLock('my_locked_task', task); + const nodeOne = new RepeatWith('my_locked_task', task); await nodeOne.start(); await waitForExpect(async () => { @@ -92,7 +104,7 @@ describe('RepeatWithLock', () => { it('should continue executing tasks after redis was unavailable for a while', async () => { await redisServer.start(); - const nodeOne = new RepeatWithLock('my_locked_task', task); + const nodeOne = new RepeatWith('my_locked_task', task); await nodeOne.start(); await waitForExpect(async () => { @@ -103,12 +115,10 @@ describe('RepeatWithLock', () => { finishTask(); - await new Promise(resolve => { - setTimeout(resolve, 200); + await waitForExpect(async () => { + expect(task).toHaveBeenCalledTimes(1); }); - expect(task).toHaveBeenCalledTimes(1); - await redisServer.start(); await waitForExpect(async () => { @@ -122,24 +132,98 @@ describe('RepeatWithLock', () => { await redisServer.stop(); }); - it('redlock', async () => { + it('should handle when a lock fails for too many times', async () => { await redisServer.start(); - const nodeOne = new RepeatWithLock('my_locked_task', task); - const nodeTwo = new RepeatWithLock('my_locked_task', task); + const nodeOne = new RepeatWith('my_locked_task', task); + const nodeTwo = new RepeatWith('my_locked_task', task); await nodeOne.start(); await nodeTwo.start(); await new Promise(resolve => { - setTimeout(resolve, 3000); + setTimeout(resolve, 2100); + }); + + await waitForExpect(async () => { + expect(task).toHaveBeenCalledTimes(1); }); finishTask(); await nodeOne.stop(); - await new Promise(resolve => { - setTimeout(resolve, 500); + finishTask(); + await nodeTwo.stop(); + + await redisServer.stop(); + }); + + it('should handle when a node fails to unlock the lock', async () => { + await redisServer.start(); + + const nodeOne = new RepeatWith('my_locked_task', task, 50); + const nodeTwo = new RepeatWith('my_locked_task', task, 50); + + await nodeOne.start(); + await sleepTime(10); + const firstFinishTask = finishTask; + await nodeTwo.start(); + + await waitForExpect(async () => { + expect(task).toHaveBeenCalledTimes(2); + }); + + firstFinishTask(); + await nodeOne.stop(); + finishTask(); + await nodeTwo.stop(); + + await redisServer.stop(); + }); + + it('should continue executing the task if one task fails', async () => { + await redisServer.start(); + + const nodeOne = new RepeatWith('my_locked_task', task, 500); + + await nodeOne.start(); + console.log(rejectTask); + await sleepTime(25); + console.log(rejectTask); + const someError = { error: 'some error' }; + rejectTask(someError); + await waitForExpect(async () => { + expect(handleError).toHaveBeenLastCalledWith(someError); }); + + finishTask(); + await sleepTime(10); + await waitForExpect(async () => { + expect(task).toHaveBeenCalledTimes(2); + }); + finishTask(); + await nodeOne.stop(); + await redisServer.stop(); + }); + + it('should add a delay between task executions', async () => { + await redisServer.start(); + + const nodeOne = new RepeatWith('my_locked_task', task, 10, 250); + const nodeTwo = new RepeatWith('my_locked_task', task, 10, 250); + + await nodeOne.start(); + await nodeTwo.start(); + + await sleepTime(50); + finishTask(); + await sleepTime(50); + await waitForExpect(async () => { + expect(task).toHaveBeenCalledTimes(1); + }); + + finishTask(); + await nodeOne.stop(); + finishTask(); await nodeTwo.stop(); diff --git a/app/api/utils/__mocks__/handleError.js b/app/api/utils/__mocks__/handleError.js new file mode 100644 index 0000000000..20e731fd4d --- /dev/null +++ b/app/api/utils/__mocks__/handleError.js @@ -0,0 +1 @@ +export default () => {} \ No newline at end of file From 5f38b0b89263017234be40e24032bd284d8ade29 Mon Sep 17 00:00:00 2001 From: Alberto Casado Torres Date: Mon, 13 Sep 2021 17:54:21 +0200 Subject: [PATCH 13/62] Repeater specs cleanup WIP --- app/api/tasksmanager/RepeatWith.ts | 54 ++++++++++----- .../tasksmanager/specs/repeatWithLock.spec.js | 66 +++++++------------ 2 files changed, 60 insertions(+), 60 deletions(-) diff --git a/app/api/tasksmanager/RepeatWith.ts b/app/api/tasksmanager/RepeatWith.ts index 1d3ec51019..ba79c3d2c4 100644 --- a/app/api/tasksmanager/RepeatWith.ts +++ b/app/api/tasksmanager/RepeatWith.ts @@ -17,21 +17,32 @@ export class RepeatWith { private delayTimeBetweenTasks: number; + private retryDelay: number; + + private id: string; + constructor( lockName: string, task: () => void, - maxLockTime: number = 10000, - delayTimeBetweenTasks = 0 + maxLockTime: number = 2000, + delayTimeBetweenTasks: number = 0, + retryDelay: number = 200, + id: string ) { this.maxLockTime = maxLockTime; + this.retryDelay = retryDelay; this.delayTimeBetweenTasks = delayTimeBetweenTasks; this.lockName = `locks:${lockName}`; this.task = task; + this.id = id; } async start() { this.redisClient = await Redis.createClient('redis://localhost:6379'); - this.redlock = await new Redlock([this.redisClient], { retryJitter: 0, retryDelay: 20 }); + this.redlock = await new Redlock([this.redisClient], { + retryJitter: 0, + retryDelay: this.retryDelay, + }); this.redisClient.on('error', async error => { if (error.code !== 'ECONNREFUSED') { @@ -39,40 +50,49 @@ export class RepeatWith { } }); - this.redlock.on('error', err => { - console.error('A redis error has occurred:', err); - }); - this.lockTask(); } + async runTask() { + try { + await this.task(); + } catch (error) { + handleError(error); + } + await new Promise(resolve => { + setTimeout(resolve, this.delayTimeBetweenTasks); + }); + } + async stop() { await new Promise(resolve => { this.stopTask = resolve; }); + console.log('shutting down', this.id); await this.redlock?.quit(); await this.redisClient?.end(true); + console.log('=='); } async lockTask() { try { - const lock = await this.redlock!.lock(this.lockName, this.maxLockTime + this.delayTimeBetweenTasks); + const lock = await this.redlock!.lock( + this.lockName, + this.maxLockTime + this.delayTimeBetweenTasks + ); + + console.log('locked!', this.id); if (this.stopTask) { - await lock.unlock(); this.stopTask(true); + console.log('releasing because of stop', this.id); + await lock.unlock(); return; } - try { - await this.task(); - } catch (error) { - handleError(error); - } - await new Promise(resolve => { - setTimeout(resolve, this.delayTimeBetweenTasks); - }); + await this.runTask(); + console.log('releasing because of finished', this.id); await lock.unlock(); } catch (error) { if (error && error.name !== 'LockError') { diff --git a/app/api/tasksmanager/specs/repeatWithLock.spec.js b/app/api/tasksmanager/specs/repeatWithLock.spec.js index d940557371..ffaae63f72 100644 --- a/app/api/tasksmanager/specs/repeatWithLock.spec.js +++ b/app/api/tasksmanager/specs/repeatWithLock.spec.js @@ -7,44 +7,49 @@ import { RedisServer } from '../RedisServer'; jest.mock('api/utils/handleError.js', () => jest.fn()); +/* eslint-disable max-statements */ describe('RepeatWithLock', () => { let finishTask; let task; let rejectTask; let redisServer; + let pendingTasks; beforeAll(async () => { redisServer = new RedisServer(); + await redisServer.start(); }); afterAll(async () => { - if (redisServer.connect) { - await redisServer.end(); - } + await redisServer.stop(); }); beforeEach(async () => { + pendingTasks = []; task = jasmine.createSpy('callbackone').and.callFake(() => { console.log('start'); return new Promise((resolve, reject) => { + pendingTasks.push(resolve); rejectTask = reject; + console.log('end'); finishTask = () => { resolve(); - console.log('end'); }; }); }); }); + afterEach(() => { + pendingTasks.forEach(t => t()); + }); + async function sleepTime(time) { await new Promise(resolve => { setTimeout(resolve, time); }); } - it('should run the task one at a time', async () => { - await redisServer.start(); - + fit('should run the task one at a time', async () => { const nodeOne = new RepeatWith('my_locked_task', task); const nodeTwo = new RepeatWith('my_locked_task', task); await nodeOne.start(); @@ -70,11 +75,10 @@ describe('RepeatWithLock', () => { await nodeOne.stop(); await nodeTwo.stop(); - - await redisServer.stop(); }); - it('should execute task when the redis server is available', async () => { + fit('should execute task when the redis server is available', async () => { + await redisServer.stop(); const nodeOne = new RepeatWith('my_locked_task', task); await nodeOne.start(); @@ -97,13 +101,9 @@ describe('RepeatWithLock', () => { finishTask(); await nodeOne.stop(); - - await redisServer.stop(); }); - it('should continue executing tasks after redis was unavailable for a while', async () => { - await redisServer.start(); - + fit('should continue executing tasks after redis was unavailable for a while', async () => { const nodeOne = new RepeatWith('my_locked_task', task); await nodeOne.start(); @@ -128,38 +128,26 @@ describe('RepeatWithLock', () => { finishTask(); await nodeOne.stop(); - - await redisServer.stop(); }); - it('should handle when a lock fails for too many times', async () => { - await redisServer.start(); - - const nodeOne = new RepeatWith('my_locked_task', task); - const nodeTwo = new RepeatWith('my_locked_task', task); + fit('should handle when a lock fails for too many times', async () => { + const nodeOne = new RepeatWith('my_locked_task', task, 2000, 0, 20, 'one'); + const nodeTwo = new RepeatWith('my_locked_task', task, 2000, 0, 20, 'two'); await nodeOne.start(); await nodeTwo.start(); - await new Promise(resolve => { - setTimeout(resolve, 2100); - }); + await sleepTime(250); - await waitForExpect(async () => { - expect(task).toHaveBeenCalledTimes(1); - }); + expect(task).toHaveBeenCalledTimes(1); finishTask(); await nodeOne.stop(); finishTask(); await nodeTwo.stop(); - - await redisServer.stop(); }); it('should handle when a node fails to unlock the lock', async () => { - await redisServer.start(); - const nodeOne = new RepeatWith('my_locked_task', task, 50); const nodeTwo = new RepeatWith('my_locked_task', task, 50); @@ -176,19 +164,15 @@ describe('RepeatWithLock', () => { await nodeOne.stop(); finishTask(); await nodeTwo.stop(); - - await redisServer.stop(); }); it('should continue executing the task if one task fails', async () => { - await redisServer.start(); - const nodeOne = new RepeatWith('my_locked_task', task, 500); await nodeOne.start(); - console.log(rejectTask); + await sleepTime(25); - console.log(rejectTask); + const someError = { error: 'some error' }; rejectTask(someError); await waitForExpect(async () => { @@ -202,12 +186,10 @@ describe('RepeatWithLock', () => { }); finishTask(); await nodeOne.stop(); - await redisServer.stop(); }); + // eslint-disable-next-line max-statements it('should add a delay between task executions', async () => { - await redisServer.start(); - const nodeOne = new RepeatWith('my_locked_task', task, 10, 250); const nodeTwo = new RepeatWith('my_locked_task', task, 10, 250); @@ -226,7 +208,5 @@ describe('RepeatWithLock', () => { finishTask(); await nodeTwo.stop(); - - await redisServer.stop(); }); }); From 3c3a78ba85c210aee29b0f53542d1cea62439c04 Mon Sep 17 00:00:00 2001 From: gabo Date: Tue, 14 Sep 2021 10:52:05 +0200 Subject: [PATCH 14/62] Clean up tests for repeater with lock --- app/api/tasksmanager/RepeatWith.ts | 15 ++++--- .../tasksmanager/specs/repeatWithLock.spec.js | 39 ++++++++++--------- 2 files changed, 30 insertions(+), 24 deletions(-) diff --git a/app/api/tasksmanager/RepeatWith.ts b/app/api/tasksmanager/RepeatWith.ts index ba79c3d2c4..04e6c02071 100644 --- a/app/api/tasksmanager/RepeatWith.ts +++ b/app/api/tasksmanager/RepeatWith.ts @@ -27,7 +27,7 @@ export class RepeatWith { maxLockTime: number = 2000, delayTimeBetweenTasks: number = 0, retryDelay: number = 200, - id: string + id: string = '1' ) { this.maxLockTime = maxLockTime; this.retryDelay = retryDelay; @@ -40,7 +40,7 @@ export class RepeatWith { async start() { this.redisClient = await Redis.createClient('redis://localhost:6379'); this.redlock = await new Redlock([this.redisClient], { - retryJitter: 0, + retryJitter: 25, retryDelay: this.retryDelay, }); @@ -53,15 +53,20 @@ export class RepeatWith { this.lockTask(); } + async sleepTime(time: number) { + await new Promise(resolve => { + setTimeout(resolve, time); + }); + } + async runTask() { try { await this.task(); } catch (error) { handleError(error); } - await new Promise(resolve => { - setTimeout(resolve, this.delayTimeBetweenTasks); - }); + + await this.sleepTime(this.delayTimeBetweenTasks); } async stop() { diff --git a/app/api/tasksmanager/specs/repeatWithLock.spec.js b/app/api/tasksmanager/specs/repeatWithLock.spec.js index ffaae63f72..4215fcaa25 100644 --- a/app/api/tasksmanager/specs/repeatWithLock.spec.js +++ b/app/api/tasksmanager/specs/repeatWithLock.spec.js @@ -49,7 +49,7 @@ describe('RepeatWithLock', () => { }); } - fit('should run the task one at a time', async () => { + it('should run one task at a time', async () => { const nodeOne = new RepeatWith('my_locked_task', task); const nodeTwo = new RepeatWith('my_locked_task', task); await nodeOne.start(); @@ -77,14 +77,14 @@ describe('RepeatWithLock', () => { await nodeTwo.stop(); }); - fit('should execute task when the redis server is available', async () => { + it('should wait until the redis server is available to execute the task', async () => { await redisServer.stop(); - const nodeOne = new RepeatWith('my_locked_task', task); + const nodeOne = new RepeatWith('my_locked_task', task, 2000, 0, 20); await nodeOne.start(); - await waitForExpect(async () => { - expect(task).toHaveBeenCalledTimes(0); - }); + await sleepTime(50); + + expect(task).toHaveBeenCalledTimes(0); await redisServer.start(); @@ -103,8 +103,8 @@ describe('RepeatWithLock', () => { await nodeOne.stop(); }); - fit('should continue executing tasks after redis was unavailable for a while', async () => { - const nodeOne = new RepeatWith('my_locked_task', task); + it('should continue executing tasks after redis was unavailable for a while', async () => { + const nodeOne = new RepeatWith('my_locked_task', task, 2000, 0, 20); await nodeOne.start(); await waitForExpect(async () => { @@ -115,9 +115,8 @@ describe('RepeatWithLock', () => { finishTask(); - await waitForExpect(async () => { - expect(task).toHaveBeenCalledTimes(1); - }); + await sleepTime(50); + expect(task).toHaveBeenCalledTimes(1); await redisServer.start(); @@ -130,7 +129,7 @@ describe('RepeatWithLock', () => { await nodeOne.stop(); }); - fit('should handle when a lock fails for too many times', async () => { + it('should handle when a lock fails for too many retries', async () => { const nodeOne = new RepeatWith('my_locked_task', task, 2000, 0, 20, 'one'); const nodeTwo = new RepeatWith('my_locked_task', task, 2000, 0, 20, 'two'); @@ -171,7 +170,9 @@ describe('RepeatWithLock', () => { await nodeOne.start(); - await sleepTime(25); + await waitForExpect(async () => { + expect(task).toHaveBeenCalledTimes(1); + }); const someError = { error: 'some error' }; rejectTask(someError); @@ -180,7 +181,6 @@ describe('RepeatWithLock', () => { }); finishTask(); - await sleepTime(10); await waitForExpect(async () => { expect(task).toHaveBeenCalledTimes(2); }); @@ -190,19 +190,20 @@ describe('RepeatWithLock', () => { // eslint-disable-next-line max-statements it('should add a delay between task executions', async () => { - const nodeOne = new RepeatWith('my_locked_task', task, 10, 250); - const nodeTwo = new RepeatWith('my_locked_task', task, 10, 250); + const nodeOne = new RepeatWith('my_locked_task', task, 50, 50, 20); + const nodeTwo = new RepeatWith('my_locked_task', task, 50, 50, 20); await nodeOne.start(); await nodeTwo.start(); - await sleepTime(50); - finishTask(); - await sleepTime(50); await waitForExpect(async () => { expect(task).toHaveBeenCalledTimes(1); }); + finishTask(); + await sleepTime(25); + expect(task).toHaveBeenCalledTimes(1); + finishTask(); await nodeOne.stop(); From 54c31b87aea5bae1733f7608ee85e81d64ff3f19 Mon Sep 17 00:00:00 2001 From: Alberto Casado Torres Date: Tue, 14 Sep 2021 11:53:36 +0200 Subject: [PATCH 15/62] Cleanup of consoloe logs and fixed some eslint errors --- app/api/tasksmanager/RepeatWith.ts | 25 +++++--------- .../tasksmanager/specs/repeatWithLock.spec.js | 33 ++++++++----------- 2 files changed, 22 insertions(+), 36 deletions(-) diff --git a/app/api/tasksmanager/RepeatWith.ts b/app/api/tasksmanager/RepeatWith.ts index 04e6c02071..16a4a522cd 100644 --- a/app/api/tasksmanager/RepeatWith.ts +++ b/app/api/tasksmanager/RepeatWith.ts @@ -40,11 +40,11 @@ export class RepeatWith { async start() { this.redisClient = await Redis.createClient('redis://localhost:6379'); this.redlock = await new Redlock([this.redisClient], { - retryJitter: 25, + retryJitter: 0, retryDelay: this.retryDelay, }); - this.redisClient.on('error', async error => { + this.redisClient.on('error', error => { if (error.code !== 'ECONNREFUSED') { throw error; } @@ -53,9 +53,9 @@ export class RepeatWith { this.lockTask(); } - async sleepTime(time: number) { + async waitBetweenTasks() { await new Promise(resolve => { - setTimeout(resolve, time); + setTimeout(resolve, this.delayTimeBetweenTasks); }); } @@ -66,7 +66,7 @@ export class RepeatWith { handleError(error); } - await this.sleepTime(this.delayTimeBetweenTasks); + await this.waitBetweenTasks(); } async stop() { @@ -74,31 +74,24 @@ export class RepeatWith { this.stopTask = resolve; }); - console.log('shutting down', this.id); await this.redlock?.quit(); await this.redisClient?.end(true); - console.log('=='); } - async lockTask() { + async lockTask(): Promise { try { const lock = await this.redlock!.lock( this.lockName, this.maxLockTime + this.delayTimeBetweenTasks ); - console.log('locked!', this.id); - if (this.stopTask) { this.stopTask(true); - console.log('releasing because of stop', this.id); await lock.unlock(); - return; + } else { + await this.runTask(); + await lock.unlock(); } - - await this.runTask(); - console.log('releasing because of finished', this.id); - await lock.unlock(); } catch (error) { if (error && error.name !== 'LockError') { throw error; diff --git a/app/api/tasksmanager/specs/repeatWithLock.spec.js b/app/api/tasksmanager/specs/repeatWithLock.spec.js index 4215fcaa25..4435bffe99 100644 --- a/app/api/tasksmanager/specs/repeatWithLock.spec.js +++ b/app/api/tasksmanager/specs/repeatWithLock.spec.js @@ -13,7 +13,6 @@ describe('RepeatWithLock', () => { let task; let rejectTask; let redisServer; - let pendingTasks; beforeAll(async () => { redisServer = new RedisServer(); @@ -25,22 +24,15 @@ describe('RepeatWithLock', () => { }); beforeEach(async () => { - pendingTasks = []; - task = jasmine.createSpy('callbackone').and.callFake(() => { - console.log('start'); - return new Promise((resolve, reject) => { - pendingTasks.push(resolve); - rejectTask = reject; - console.log('end'); - finishTask = () => { - resolve(); - }; - }); - }); - }); - - afterEach(() => { - pendingTasks.forEach(t => t()); + task = jasmine.createSpy('callbackone').and.callFake( + () => + new Promise((resolve, reject) => { + rejectTask = reject; + finishTask = () => { + resolve(); + }; + }) + ); }); async function sleepTime(time) { @@ -72,8 +64,9 @@ describe('RepeatWithLock', () => { }); finishTask(); - await nodeOne.stop(); + + finishTask(); await nodeTwo.stop(); }); @@ -130,8 +123,8 @@ describe('RepeatWithLock', () => { }); it('should handle when a lock fails for too many retries', async () => { - const nodeOne = new RepeatWith('my_locked_task', task, 2000, 0, 20, 'one'); - const nodeTwo = new RepeatWith('my_locked_task', task, 2000, 0, 20, 'two'); + const nodeOne = new RepeatWith('my_locked_task', task, 2000, 0, 20); + const nodeTwo = new RepeatWith('my_locked_task', task, 2000, 0, 20); await nodeOne.start(); await nodeTwo.start(); From ea7e87e71e972e619e6ea41efd6cf39910b4918b Mon Sep 17 00:00:00 2001 From: gabo Date: Wed, 15 Sep 2021 10:34:12 +0200 Subject: [PATCH 16/62] Task manager refactor to avoid factory --- .../specs/ExternalDummyService.ts | 14 +-- .../tasksmanager/specs/taskManager.spec.ts | 56 ++++++++---- app/api/tasksmanager/taskManager.ts | 91 ++++++++++--------- 3 files changed, 92 insertions(+), 69 deletions(-) diff --git a/app/api/tasksmanager/specs/ExternalDummyService.ts b/app/api/tasksmanager/specs/ExternalDummyService.ts index 9c726a3076..1154c30958 100644 --- a/app/api/tasksmanager/specs/ExternalDummyService.ts +++ b/app/api/tasksmanager/specs/ExternalDummyService.ts @@ -58,7 +58,7 @@ export class ExternalDummyService { return this.redisSMQ; } - async initQueue() { + async resetQueue() { try { await this.rsmq.deleteQueueAsync({ qname: `${this.serviceName}_tasks` }); } catch (err) { @@ -94,16 +94,18 @@ export class ExternalDummyService { } async read() { - const { qname, message } = await this.rsmq.receiveMessageAsync({ - qname: this.serviceName + '_tasks', + const { message } = await this.rsmq.receiveMessageAsync({ + qname: `${this.serviceName}_tasks`, }); this.currentTask = message; return message; } - async start(client: Redis.RedisClient) { - this.redisSMQ = await new RedisSMQ({ client }); - await this.initQueue(); + async start(redisUrl: string) { + const redisClient = await Redis.createClient(redisUrl); + + this.redisSMQ = await new RedisSMQ({ client: redisClient }); + await this.resetQueue(); const start = new Promise(resolve => { this.server = this.app.listen(this.port, () => { diff --git a/app/api/tasksmanager/specs/taskManager.spec.ts b/app/api/tasksmanager/specs/taskManager.spec.ts index 42b9ad383b..1f4c9b2fc7 100644 --- a/app/api/tasksmanager/specs/taskManager.spec.ts +++ b/app/api/tasksmanager/specs/taskManager.spec.ts @@ -1,13 +1,14 @@ import fs from 'fs'; -import { TaskManagerFactory, TaskManager, Service } from 'api/tasksmanager/taskManager'; +import { TaskManager, Service } from 'api/tasksmanager/taskManager'; import Redis from 'redis'; +import RedisSMQ from 'rsmq'; import { RedisServer } from '../RedisServer'; import { ExternalDummyService } from './ExternalDummyService'; describe('taskManager', () => { - let taskManager: TaskManager; + let taskManager: TaskManager | undefined; let service: Service; let redisServer: RedisServer; @@ -20,19 +21,21 @@ describe('taskManager', () => { dataUrl: 'http://localhost:1234/data', filesUrl: 'http://localhost:1234/files', resultsUrl: 'http://localhost:1234/results', + redisUrl: 'redis://localhost:6379', }; redisServer = new RedisServer(); await redisServer.start(); - client = await Redis.createClient('redis://localhost:6379'); - taskManager = await TaskManagerFactory.create(client, service); externalDummyService = new ExternalDummyService(1234); - await externalDummyService.start(client); + await externalDummyService.start(service.redisUrl); + + taskManager = new TaskManager(service); + await taskManager.start(); }); afterAll(async () => { - await taskManager.stop(); + await taskManager?.stop(); await externalDummyService.stop(); await client.end(true); await redisServer.stop(); @@ -40,7 +43,7 @@ describe('taskManager', () => { describe('startTask', () => { it('should add a task', async () => { - await taskManager.startTask({ + await taskManager?.startTask({ task: 'CheeseBurger', tenant: 'Rafa', }); @@ -52,17 +55,17 @@ describe('taskManager', () => { describe('when multiple tasks are added', () => { it('services get them in order', async () => { - await taskManager.startTask({ + await taskManager?.startTask({ task: 'CheeseBurger', tenant: 'Joan', }); - await taskManager.startTask({ + await taskManager?.startTask({ task: 'Fries', tenant: 'Joan', }); - await taskManager.startTask({ + await taskManager?.startTask({ task: 'Ribs', tenant: 'Fede', }); @@ -84,9 +87,9 @@ describe('taskManager', () => { const materials1 = { someData: 3 }; const materials2 = { someData: 2 }; const materials3 = { someData: 3 }; - await taskManager.sendJSON(materials1); - await taskManager.sendJSON(materials2); - await taskManager.sendJSON(materials3); + await taskManager?.sendJSON(materials1); + await taskManager?.sendJSON(materials2); + await taskManager?.sendJSON(materials3); expect(externalDummyService.materials.length).toEqual(3); expect(externalDummyService.materials[0]).toEqual(materials1); @@ -97,9 +100,9 @@ describe('taskManager', () => { it('should send files to the service', async () => { const file = fs.readFileSync('app/api/tasksmanager/specs/blank.pdf'); - await taskManager.sendFile(file); - await taskManager.sendFile(file); - await taskManager.sendFile(file); + await taskManager?.sendFile(file); + await taskManager?.sendFile(file); + await taskManager?.sendFile(file); expect(externalDummyService.files.length).toEqual(3); expect(externalDummyService.files[0]).toEqual(file); @@ -115,9 +118,10 @@ describe('taskManager', () => { expect(results).toEqual(expectedResults); done(); }; - - await taskManager.stop(); - taskManager = await TaskManagerFactory.create(client, service, expectFunction); + service.processResults = expectFunction; + await taskManager?.stop(); + taskManager = new TaskManager(service); + await taskManager.start(); const task = { task: 'make_food', tenant: 'test' }; externalDummyService.setResults(expectedResults); @@ -125,3 +129,17 @@ describe('taskManager', () => { }); }); }); + +it('taskManager should fail to start task if redis is unavailable', async () => { + const service = { + serviceName: 'KonzNGaboHellKitchen', + dataUrl: 'http://localhost:1234/data', + filesUrl: 'http://localhost:1234/files', + resultsUrl: 'http://localhost:1234/results', + redisUrl: 'redis://localhost:6379', + }; + + const taskManager = new TaskManager(service); + + await expect(taskManager.start()).rejects.toThrow('I should fail'); +}); diff --git a/app/api/tasksmanager/taskManager.ts b/app/api/tasksmanager/taskManager.ts index 88f5a69fa4..b0656aa393 100644 --- a/app/api/tasksmanager/taskManager.ts +++ b/app/api/tasksmanager/taskManager.ts @@ -1,5 +1,5 @@ import RedisSMQ, { QueueMessage } from 'rsmq'; -import Redis from 'redis'; +import Redis, { RedisClient } from 'redis'; import request from 'shared/JSONRequest'; import { Repeater } from 'api/utils/Repeater'; @@ -13,10 +13,12 @@ export interface Service { filesUrl: string; dataUrl: string; resultsUrl: string; + redisUrl: string; + processResults?: (results: object) => void; } export class TaskManager { - private redisSMQ: RedisSMQ; + private redisSMQ: RedisSMQ | undefined; private readonly service: Service; @@ -24,56 +26,68 @@ export class TaskManager { private readonly resultsQueue: string; - private readonly processResults?: (results: object) => void; + private repeater: Repeater | undefined; - private repeater: Repeater; + private redisClient: RedisClient | undefined; - constructor(redisSMQ: RedisSMQ, service: Service, processResults?: (results: object) => void) { - this.redisSMQ = redisSMQ; + constructor(service: Service) { this.service = service; - this.processResults = processResults; this.taskQueue = `${service.serviceName}_tasks`; this.resultsQueue = `${service.serviceName}_results`; - this.repeater = new Repeater(this.receiveMessage.bind(this), 1000); } - async initQueue() { + async createQueue(queueName: string) { try { - await this.redisSMQ.createQueueAsync({ qname: this.taskQueue }); - } catch (err) { - if (err.name !== 'queueExists') { - throw err; + if (this.redisSMQ) { + await this.redisSMQ.createQueueAsync({ qname: queueName }); } - } - try { - await this.redisSMQ.createQueueAsync({ qname: this.resultsQueue }); } catch (err) { - if (err.name !== 'queueExists') { - throw err; + if (err.name === 'queueExists') { + console.log('queueExists'); } } + } + + async start() { + this.redisClient = await Redis.createClient(this.service.redisUrl); + + this.redisClient.on('error', e => { + throw e; + }); + this.redisSMQ = new RedisSMQ({ + client: this.redisClient, + }); + + await this.createQueue(this.taskQueue); + await this.createQueue(this.resultsQueue); + + this.repeater = new Repeater(this.receiveMessage.bind(this), 1000); this.repeater.start(); } async receiveMessage() { - const message = (await this.redisSMQ.receiveMessageAsync({ - qname: this.resultsQueue, - })) as QueueMessage; - - if (message.id) { - if (this.processResults) { - const results = await request.get(this.service.resultsUrl, JSON.parse(message.message)); - this.processResults(results.json); + if (this.redisSMQ) { + const message = (await this.redisSMQ.receiveMessageAsync({ + qname: this.resultsQueue, + })) as QueueMessage; + + if (message.id) { + if (this.service.processResults) { + const results = await request.get(this.service.resultsUrl, JSON.parse(message.message)); + this.service.processResults(results.json); + } } } } async startTask(taskMessage: TaskMessage) { - await this.redisSMQ.sendMessageAsync({ - qname: this.taskQueue, - message: JSON.stringify(taskMessage), - }); + if (this.redisSMQ) { + await this.redisSMQ.sendMessageAsync({ + qname: this.taskQueue, + message: JSON.stringify(taskMessage), + }); + } } async sendJSON(data: object) { @@ -85,19 +99,8 @@ export class TaskManager { } async stop() { - await this.repeater.stop(); + if (this.repeater) { + await this.repeater.stop(); + } } } - -export const TaskManagerFactory = { - create: async ( - redis: Redis.RedisClient, - service: Service, - processResults?: (results: object) => void - ) => { - const redisSMQ = await new RedisSMQ({ client: redis }); - const manager = await new TaskManager(redisSMQ, service, processResults); - await manager.initQueue(); - return manager; - }, -}; From 2d3266b31d2fc8c8213a20b65fba755caaf62a60 Mon Sep 17 00:00:00 2001 From: Alberto Casado Torres Date: Wed, 15 Sep 2021 13:43:14 +0200 Subject: [PATCH 17/62] TaskManager refactor and redis unavailable test --- .../specs/ExternalDummyService.ts | 9 ++- .../tasksmanager/specs/taskManager.spec.ts | 41 ++++++------ app/api/tasksmanager/taskManager.ts | 64 ++++++++++--------- 3 files changed, 61 insertions(+), 53 deletions(-) diff --git a/app/api/tasksmanager/specs/ExternalDummyService.ts b/app/api/tasksmanager/specs/ExternalDummyService.ts index 1154c30958..090578f09a 100644 --- a/app/api/tasksmanager/specs/ExternalDummyService.ts +++ b/app/api/tasksmanager/specs/ExternalDummyService.ts @@ -1,6 +1,6 @@ import express from 'express'; import RedisSMQ from 'rsmq'; -import Redis from 'redis'; +import Redis, { RedisClient } from 'redis'; import { Server } from 'http'; import bodyParser from 'body-parser'; import { uploadMiddleware } from 'api/files'; @@ -24,6 +24,8 @@ export class ExternalDummyService { results: object | undefined; + redisClient: RedisClient | undefined; + constructor(port: number) { this.port = port; this.app = express(); @@ -102,9 +104,9 @@ export class ExternalDummyService { } async start(redisUrl: string) { - const redisClient = await Redis.createClient(redisUrl); + this.redisClient = await Redis.createClient(redisUrl); - this.redisSMQ = await new RedisSMQ({ client: redisClient }); + this.redisSMQ = await new RedisSMQ({ client: this.redisClient }); await this.resetQueue(); const start = new Promise(resolve => { @@ -117,6 +119,7 @@ export class ExternalDummyService { } async stop() { + await this.redisClient?.end(true); await this.server?.close(); } diff --git a/app/api/tasksmanager/specs/taskManager.spec.ts b/app/api/tasksmanager/specs/taskManager.spec.ts index 1f4c9b2fc7..0b59f4bd2e 100644 --- a/app/api/tasksmanager/specs/taskManager.spec.ts +++ b/app/api/tasksmanager/specs/taskManager.spec.ts @@ -1,9 +1,6 @@ import fs from 'fs'; import { TaskManager, Service } from 'api/tasksmanager/taskManager'; - -import Redis from 'redis'; -import RedisSMQ from 'rsmq'; import { RedisServer } from '../RedisServer'; import { ExternalDummyService } from './ExternalDummyService'; @@ -12,7 +9,6 @@ describe('taskManager', () => { let service: Service; let redisServer: RedisServer; - let client: Redis.RedisClient; let externalDummyService: ExternalDummyService; beforeAll(async () => { @@ -31,13 +27,12 @@ describe('taskManager', () => { await externalDummyService.start(service.redisUrl); taskManager = new TaskManager(service); - await taskManager.start(); + await new Promise(resolve => setTimeout(resolve, 100)); // wait for redis to be ready }); afterAll(async () => { await taskManager?.stop(); await externalDummyService.stop(); - await client.end(true); await redisServer.stop(); }); @@ -121,25 +116,33 @@ describe('taskManager', () => { service.processResults = expectFunction; await taskManager?.stop(); taskManager = new TaskManager(service); - await taskManager.start(); - const task = { task: 'make_food', tenant: 'test' }; externalDummyService.setResults(expectedResults); + const task = { task: 'make_food', tenant: 'test' }; await externalDummyService.sendFinishedMessage(task); }); }); -}); -it('taskManager should fail to start task if redis is unavailable', async () => { - const service = { - serviceName: 'KonzNGaboHellKitchen', - dataUrl: 'http://localhost:1234/data', - filesUrl: 'http://localhost:1234/files', - resultsUrl: 'http://localhost:1234/results', - redisUrl: 'redis://localhost:6379', - }; + describe('when redis server is not available', () => { + beforeEach(async () => { + await redisServer.stop(); + }); - const taskManager = new TaskManager(service); + afterEach(async () => { + await redisServer.stop(); + }); - await expect(taskManager.start()).rejects.toThrow('I should fail'); + it('taskManager should fail to start task', async () => { + const task = { task: 'make_food', tenant: 'test' }; + + try { + await taskManager?.startTask(task); + fail('It should throw'); + } catch (e) { + expect(e).toEqual(Error('Redis is not connected')); + } + + await redisServer.start(); + }); + }); }); diff --git a/app/api/tasksmanager/taskManager.ts b/app/api/tasksmanager/taskManager.ts index b0656aa393..f7c10b9c98 100644 --- a/app/api/tasksmanager/taskManager.ts +++ b/app/api/tasksmanager/taskManager.ts @@ -34,41 +34,42 @@ export class TaskManager { this.service = service; this.taskQueue = `${service.serviceName}_tasks`; this.resultsQueue = `${service.serviceName}_results`; + this.start(); } - async createQueue(queueName: string) { - try { - if (this.redisSMQ) { - await this.redisSMQ.createQueueAsync({ qname: queueName }); - } - } catch (err) { - if (err.name === 'queueExists') { - console.log('queueExists'); - } - } - } - - async start() { - this.redisClient = await Redis.createClient(this.service.redisUrl); + start() { + this.redisClient = Redis.createClient(this.service.redisUrl); - this.redisClient.on('error', e => { - throw e; + this.redisClient.on('error', error => { + if (error.code !== 'ECONNREFUSED') { + throw error; + } }); - this.redisSMQ = new RedisSMQ({ - client: this.redisClient, - }); + this.redisClient.on('connect', () => { + this.redisSMQ = new RedisSMQ({ + client: this.redisClient, + }); - await this.createQueue(this.taskQueue); - await this.createQueue(this.resultsQueue); + this.redisSMQ?.createQueue({ qname: this.taskQueue }, err => { + if (err.name !== 'queueExists') { + throw err; + } + }); + this.redisSMQ?.createQueue({ qname: this.resultsQueue }, err => { + if (err.name !== 'queueExists') { + throw err; + } + }); + }); this.repeater = new Repeater(this.receiveMessage.bind(this), 1000); this.repeater.start(); } async receiveMessage() { - if (this.redisSMQ) { - const message = (await this.redisSMQ.receiveMessageAsync({ + if (this.redisClient?.connected) { + const message = (await this.redisSMQ?.receiveMessageAsync({ qname: this.resultsQueue, })) as QueueMessage; @@ -82,12 +83,14 @@ export class TaskManager { } async startTask(taskMessage: TaskMessage) { - if (this.redisSMQ) { - await this.redisSMQ.sendMessageAsync({ - qname: this.taskQueue, - message: JSON.stringify(taskMessage), - }); + if (!this.redisClient?.connected) { + throw new Error('Redis is not connected'); } + + await this.redisSMQ?.sendMessageAsync({ + qname: this.taskQueue, + message: JSON.stringify(taskMessage), + }); } async sendJSON(data: object) { @@ -99,8 +102,7 @@ export class TaskManager { } async stop() { - if (this.repeater) { - await this.repeater.stop(); - } + await this.repeater?.stop(); + await this.redisClient?.end(true); } } From 3adb54b0811af48e8c2de4c9c7270c5b0a4e56e4 Mon Sep 17 00:00:00 2001 From: Alberto Casado Torres Date: Wed, 15 Sep 2021 16:43:37 +0200 Subject: [PATCH 18/62] Handling redis errors in taskmanager --- .../tasksmanager/specs/repeatWithLock.spec.js | 2 - .../tasksmanager/specs/taskManager.spec.ts | 76 ++++++++++++++----- app/api/tasksmanager/taskManager.ts | 8 +- 3 files changed, 61 insertions(+), 25 deletions(-) diff --git a/app/api/tasksmanager/specs/repeatWithLock.spec.js b/app/api/tasksmanager/specs/repeatWithLock.spec.js index 4435bffe99..89924a071d 100644 --- a/app/api/tasksmanager/specs/repeatWithLock.spec.js +++ b/app/api/tasksmanager/specs/repeatWithLock.spec.js @@ -1,5 +1,3 @@ -// const handleErrorSpy = jest.mock('api/utils/handleError.js', () => {}); -// import handleError from 'api/utils/handleError'; import handleError from 'api/utils/handleError'; import waitForExpect from 'wait-for-expect'; import { RepeatWith } from '../RepeatWith'; diff --git a/app/api/tasksmanager/specs/taskManager.spec.ts b/app/api/tasksmanager/specs/taskManager.spec.ts index 0b59f4bd2e..351cc1472f 100644 --- a/app/api/tasksmanager/specs/taskManager.spec.ts +++ b/app/api/tasksmanager/specs/taskManager.spec.ts @@ -1,5 +1,6 @@ +/* eslint-disable max-statements */ import fs from 'fs'; - +import waitForExpect from 'wait-for-expect'; import { TaskManager, Service } from 'api/tasksmanager/taskManager'; import { RedisServer } from '../RedisServer'; import { ExternalDummyService } from './ExternalDummyService'; @@ -18,8 +19,8 @@ describe('taskManager', () => { filesUrl: 'http://localhost:1234/files', resultsUrl: 'http://localhost:1234/results', redisUrl: 'redis://localhost:6379', + processResults: jest.fn(), }; - redisServer = new RedisServer(); await redisServer.start(); @@ -36,6 +37,10 @@ describe('taskManager', () => { await redisServer.stop(); }); + afterEach(() => { + jest.clearAllMocks(); + }); + describe('startTask', () => { it('should add a task', async () => { await taskManager?.startTask({ @@ -107,33 +112,26 @@ describe('taskManager', () => { }); describe('when the task finishes', () => { - it('should get the results', async done => { + it('should get the results', async () => { const expectedResults = { results: 'Paella' }; - const expectFunction = (results: object) => { - expect(results).toEqual(expectedResults); - done(); - }; - service.processResults = expectFunction; + await taskManager?.stop(); taskManager = new TaskManager(service); externalDummyService.setResults(expectedResults); - const task = { task: 'make_food', tenant: 'test' }; + const task = { task: 'Tofu', tenant: 'Gabo' }; await externalDummyService.sendFinishedMessage(task); + + await waitForExpect(async () => { + expect(service.processResults).toHaveBeenCalledWith(expectedResults); + }); }); }); describe('when redis server is not available', () => { - beforeEach(async () => { - await redisServer.stop(); - }); - - afterEach(async () => { - await redisServer.stop(); - }); - it('taskManager should fail to start task', async () => { - const task = { task: 'make_food', tenant: 'test' }; + await redisServer.stop(); + const task = { task: 'Spagueti', tenant: 'Kon' }; try { await taskManager?.startTask(task); @@ -141,8 +139,48 @@ describe('taskManager', () => { } catch (e) { expect(e).toEqual(Error('Redis is not connected')); } - await redisServer.start(); }); + + describe('and redis comes back', () => { + it('should send tasks again', async () => { + await redisServer.stop(); + const task = { task: 'Ceviche', tenant: 'Mercy' }; + + try { + await taskManager?.startTask(task); + fail('It should throw'); + } catch (e) { + expect(e).toEqual(Error('Redis is not connected')); + } + + await redisServer.start(); + await new Promise(resolve => setTimeout(resolve, 100)); // wait for redis to connect + await taskManager?.startTask(task); + + const message = await externalDummyService.read(); + expect(message).toBe('{"task":"Ceviche","tenant":"Mercy"}'); + }); + + it('should read pending messages', async () => { + const task = { task: 'Ceviche', tenant: 'Mercy' }; + + await taskManager?.stop(); + externalDummyService.setResults({ results: 'Paella' }); + await externalDummyService.sendFinishedMessage(task); + + expect(service.processResults).not.toHaveBeenCalled(); + await redisServer.stop(); + + taskManager?.start(); + await redisServer.start(); + + await waitForExpect(async () => { + expect(service.processResults).toHaveBeenCalledWith({ + results: 'Paella', + }); + }); + }); + }); }); }); diff --git a/app/api/tasksmanager/taskManager.ts b/app/api/tasksmanager/taskManager.ts index f7c10b9c98..51f8dfb10d 100644 --- a/app/api/tasksmanager/taskManager.ts +++ b/app/api/tasksmanager/taskManager.ts @@ -46,11 +46,11 @@ export class TaskManager { } }); - this.redisClient.on('connect', () => { - this.redisSMQ = new RedisSMQ({ - client: this.redisClient, - }); + this.redisSMQ = new RedisSMQ({ + client: this.redisClient, + }); + this.redisClient.on('connect', () => { this.redisSMQ?.createQueue({ qname: this.taskQueue }, err => { if (err.name !== 'queueExists') { throw err; From 2adf80ccfa0f807a0d89b629690d8343d140dab6 Mon Sep 17 00:00:00 2001 From: gabo Date: Thu, 16 Sep 2021 13:29:56 +0200 Subject: [PATCH 19/62] Segmentator task working for one file --- app/api/pdfsegmentation/PdfSegmentation.ts | 56 ++++++++++++++++++ app/api/pdfsegmentation/specs/fixtures.ts | 47 +++++++++++++++ .../specs/pdfSegmentation.spec.ts | 54 +++++++++++++++++ .../f2082bf51b6ef839690485d7153e847a.pdf | Bin 0 -> 7627 bytes .../specs/ExternalDummyService.ts | 20 ++++--- .../tasksmanager/specs/taskManager.spec.ts | 18 ++++-- app/api/tasksmanager/taskManager.ts | 6 +- app/api/utils/fixturesFactory.ts | 15 +++++ app/shared/JSONRequest.js | 2 +- 9 files changed, 203 insertions(+), 15 deletions(-) create mode 100644 app/api/pdfsegmentation/PdfSegmentation.ts create mode 100644 app/api/pdfsegmentation/specs/fixtures.ts create mode 100644 app/api/pdfsegmentation/specs/pdfSegmentation.spec.ts create mode 100644 app/api/pdfsegmentation/specs/uploads/f2082bf51b6ef839690485d7153e847a.pdf diff --git a/app/api/pdfsegmentation/PdfSegmentation.ts b/app/api/pdfsegmentation/PdfSegmentation.ts new file mode 100644 index 0000000000..6ada9f1065 --- /dev/null +++ b/app/api/pdfsegmentation/PdfSegmentation.ts @@ -0,0 +1,56 @@ +import { Service, TaskManager } from 'api/tasksmanager/taskManager'; +import { RepeatWith } from 'api/tasksmanager/RepeatWith'; +import { files, uploadsPath } from 'api/files'; +import fs from 'fs'; + +export interface SegmentationParameters { + filesUrl: string; + dataUrl: string; + resultsUrl: string; + redisUrl: string; +} +export const SERVICE_NAME = 'pdfSegmentation'; + +export class PdfSegmentation { + private service: Service; + + private repeatWith: RepeatWith; + + private taskManager: TaskManager; + + constructor(segmentationParameters: SegmentationParameters) { + this.service = { + serviceName: SERVICE_NAME, + ...segmentationParameters, + }; + + this.repeatWith = new RepeatWith(SERVICE_NAME, this.segment.bind(this), 120000, 100); + this.taskManager = new TaskManager(this.service); + } + + async start() { + await this.repeatWith.start(); + } + + async stop() { + await this.repeatWith.stop(); + } + + async segment() { + const nextFilesToProcess = await files.get({ + type: 'document', + }); + + if (nextFilesToProcess.length === 0 || !nextFilesToProcess[0].filename) { + return; + } + + const file = fs.readFileSync(uploadsPath(nextFilesToProcess[0].filename)); + await this.taskManager.sendFile(file, nextFilesToProcess[0].filename); + const task = { + task: nextFilesToProcess[0].filename, + tenant: 'tenant1', + }; + await this.taskManager.startTask(task); + } +} diff --git a/app/api/pdfsegmentation/specs/fixtures.ts b/app/api/pdfsegmentation/specs/fixtures.ts new file mode 100644 index 0000000000..f21e87ad31 --- /dev/null +++ b/app/api/pdfsegmentation/specs/fixtures.ts @@ -0,0 +1,47 @@ +import db, { DBFixture } from 'api/utils/testing_db'; +import { getFixturesFactory } from 'api/utils/fixturesFactory'; + +const factory = getFixturesFactory(); + +const settings = [ + { + _id: db.id(), + languages: [{ key: 'en', default: true }, { key: 'es' }, { key: 'pt' }], + features: { + 'metadata-extraction': [ + { + id: factory.id('templateToSegmentA'), + properties: ['property1', 'property2'], + }, + { + id: factory.id('templateToSegmentB'), + properties: ['property1'], + }, + ], + }, + }, +]; + +const fixturesPdfName = 'f2082bf51b6ef839690485d7153e847a.pdf'; + +const fixturesOneFile: DBFixture = { + settings, + entities: [factory.entity('A1', 'templateToSegmentA')], + files: [factory.file('F1', 'A1', 'en', 'document', fixturesPdfName)], +}; + +const fixturesUseDefaultPdfPerLanguage: DBFixture = { + settings, + entities: [ + factory.entity('A1', 'templateToSegmentA', {}, { language: 'es' }), + factory.entity('B1', 'templateToSegmentB', {}, { language: 'pt' }), + factory.entity('B2', 'templateNotSegmentC', {}, { language: 'en' }), + ], + files: [ + factory.file('F1', 'A1', 'en', 'document', 'test.pdf'), + factory.file('F2', 'B1', 'es', 'attachment', 'a.png'), + factory.file('F3', 'B2', 'pt', 'document', 'c.pdf'), + ], +}; + +export { fixturesPdfName, fixturesOneFile, fixturesUseDefaultPdfPerLanguage }; diff --git a/app/api/pdfsegmentation/specs/pdfSegmentation.spec.ts b/app/api/pdfsegmentation/specs/pdfSegmentation.spec.ts new file mode 100644 index 0000000000..efa7ed0274 --- /dev/null +++ b/app/api/pdfsegmentation/specs/pdfSegmentation.spec.ts @@ -0,0 +1,54 @@ +import { ExternalDummyService } from 'api/tasksmanager/specs/ExternalDummyService'; +import { testingDB } from 'api/utils/testing_db'; +import { fixturesOneFile, fixturesPdfName } from 'api/pdfsegmentation/specs/fixtures'; +import { testingEnvironment } from 'api/utils/testingEnvironment'; +import fs from 'fs'; +import { PdfSegmentation, SERVICE_NAME } from 'api/pdfsegmentation/PdfSegmentation'; +import { RedisServer } from 'api/tasksmanager/RedisServer'; +import waitForExpect from 'wait-for-expect'; + +describe('pdfSegmentation', () => { + let redisServer: RedisServer; + let segmentationMockService: ExternalDummyService; + let pdfSegmentation: PdfSegmentation; + + beforeAll(async () => { + redisServer = new RedisServer(); + await redisServer.start(); + }); + + afterAll(async () => { + await testingDB.disconnect(); + await segmentationMockService.stop(); + await pdfSegmentation?.stop(); + await redisServer.stop(); + }); + + it('should send one pdfs to segment', async () => { + await testingEnvironment.setUp(fixturesOneFile); + const segmentationConnectionParameters = { + dataUrl: 'http://localhost:1234/data', + filesUrl: 'http://localhost:1234/files', + resultsUrl: 'http://localhost:1234/results', + redisUrl: 'redis://localhost:6379', + }; + + segmentationMockService = new ExternalDummyService(1234, SERVICE_NAME); + await segmentationMockService.start(segmentationConnectionParameters.redisUrl); + + pdfSegmentation = new PdfSegmentation(segmentationConnectionParameters); + await pdfSegmentation.start(); + + const file = fs.readFileSync(`app/api/pdfsegmentation/specs/uploads/${fixturesPdfName}`); + + await waitForExpect(async () => { + expect(segmentationMockService.files.length).toEqual(1); + }); + + expect(segmentationMockService.files[0]).toEqual(file); + expect(segmentationMockService.filesNames[0]).toEqual(fixturesPdfName); + + const message = await segmentationMockService.read(); + expect(message).toEqual(`{"task":"${fixturesPdfName}","tenant":"tenant1"}`); + }); +}); diff --git a/app/api/pdfsegmentation/specs/uploads/f2082bf51b6ef839690485d7153e847a.pdf b/app/api/pdfsegmentation/specs/uploads/f2082bf51b6ef839690485d7153e847a.pdf new file mode 100644 index 0000000000000000000000000000000000000000..1a8032582ed1e9b36bbd424761c4ffdb42d2718b GIT binary patch literal 7627 zcma)h2UJr_7p{UR1`(95lqgc9B>@tuAidX6rG^kX1d`AN1?gRB(xrnSRVkt{_ zRO!-0IwvuE!;bJqGk4sCfwewctTh@&O1(_Q=14}1?Y3`rkV+tHYb zc>ShT*!`SgTZW@`S?|8&li0IuvYE9X3W5^G=;OV)P142^1$qPKuh=K@dx{Nm>lIrC zuxc^H9+2{P&u%K_v*82SL_io9>tBulJ)-X*qRYe%Ob|hQ`%@?NEh`5M zngD_z-K_{jfg%>?41&nYV!cej{6a!tVL>PuAt**{1z=z^5JU-wb$12gf`sVdP%d~^ z6b|Ep_J+tIA@Udxdo)H@Nfx38Cg9wOq4^_vqM-i>9q8||BRvH?r~hLY@V`8klL12o z;2`de@yoVe0s5X)3Tvg_1KWgtgj0Z&~7aTkk_Ub+QGCmrVXDe}3Yv{k>*Z5|- zglb7p39U_n1P5MD(H{KB`+@V+KI^F@1y1}x&s2m2c_Rt<*iQHsVrzeTYER()tit_f zhW57DSR4N#KGKBN^N{fNq2;T&RPfWKA09^YyIueK#{X5^=F)i!nyik=!JDZiN#V+LeLGXO~GR7zM)4qGhtrpS%&6zgcu&U|zH0cB(}(|CV=ANcE{5Lne=T zpVzi0zy0Xq-aWj>lo`MObT zjGEQzF7+gvjHdVR-C5)~MdQl$Zd+s%=@hszP?R^rBHUyQI%y1HtvEvq=sH!{tTx-?BUavOG`yO+Ojv0>>R*VB>s5L64#yk<*D z*5}+AijeaIAqN%~Z#H~}>ZbQExUU+Y%HNp@&~~+Reewp<*Kp55>-*F#vkZR=CEp2` zeD{$a7EhDJv3}E(_cW8pJv|EQP~JWMAi@MhnAV zJnAm*lrmo19#U(3>J?fz#dXw@nx}eY@`B?+nSkaaeGQP>_??%Tx_U`D6Xjw5R^D@yCA znU=lhapc6pH>1izR-1_KGpXkXhPzgnOy5w5flI$TXI*;xJg`1r-OoXfVwcR1rUvb9fn27P68xOm;b;EP=ODTiY#y%x`#mFvoG@F;z32uv+{wElcKE%PMZ zreqM&!7?7=drw~>rTw{2eM{j-SjpsXk^V2Sb@JUtj*0mz)1>+A77Z`cbG{@5-dWuU z2uSTKL*47^PHNG5k(v3e>(M=N$!F&2Syvu%lt|t}>I!Cj)2Y8keS?N!>5bb!_JK6p zKT(NV*TM89(nQngo;=Xi9M%k@35?sd+Qf&sB;(!}5hIXaI9VRXDX4=UPL^&2K=e^JiSPUsvHxbi4G(Yj^mbo}Q-kIkt zn1b@STLmfoq0v^-abG@o{b?6fv!ymtoLpP)HM%iH5n^A2axltyAcEZKw0~ymV)9jh zqAQt^97kuS!HXRMGxxs`E|1OPeJ{eY-VyrN>Ahc5@)OKS=LgRBZrNlAnW6{hCnoGW zhYt%L^y_rfdC0GPidK~gH9^V_&BifD1&@yE1S3q{HfPDVjF~QYP<2fDoeMPCyS;tE zrbm0V;e8Y{;d6K2DTjhP@wBweR$*tJ$;Ta^>2Q`Ge8U3w-FjUTg z)5hElgDxJ#uC8{>`)gtX_Nw3x9C~3^e>k$qvFy8pD^pFLkshZjjqhpHyP;$EnEZDi z#l;%KJSJPGs^2CRXyCrT6&4a$JsWU+I`{Nd6sDxRDU0mu?XKOgiKT?RgMiFT9a@ja z^AjnTytN2-SjpFC26iRur`;es`e!FcZ;dV%7b=-dL`88O+R#eET*3Q~5~Ub2887&2 z+|IuBi9Xd>?$wOi<=6JEPu~_rbx?<2`=Bd*jZ z231!waU125ta!-Wkov>94mI_ML62fLTl*+SAHI;uZisrwP{8SX4Hf3;E zH-WOVP;P6*HH-yOfplD^f+u+Dqraht@%LoR?J8>4qce}*mt-tPnfIj46P%lr%w^`p@t^wm zJcZuX)24bhwNFN_YS9}gT7)>bqn6Ac@ES#S1;}P^!sEE8dn)=bEGf#qWy~ z&f5+Yorqcu^O5`Rb|s~6C8nGXHeHk&W=%tL7qd3(eel&Sn??szTQRHU#}$3YO_$nj zPAV^u>=-J%*$4S?8IctxkbVqHnoy@p0@-T7&&Vb@7{1WHZeW<4kT^z~E$A3t3DL94 zdM5MiZ12cS%9Zq|O$5piN2vA#?FS9Z1w)o-N>#YsvnDYCC0UAaej9fY4l=%c!O=FM z>*TdMiV4@$D0n*^EAqX&*68xN%gr4pp64r6>Cb!#1br`fVK;j4b~FisK=?28WW7Fo zp!T@##7HujYC2e&KTYZ?w-DAHZmKZvyJ5<<){gI$@|)#5;uICVr@s$dc*!c;$?1tY zaOQo&jBqlYrRU{!B)O)PuP=N~HaN%6fxSS)%8UIhBIo%fi_x%_tCgPY0=wt$m>4bd zOMY_}X|~ZxX3(x!Mbzqzo1}_hx2+yjq+G|AmBx)cOT4*{^TkASAA2p9|7PL5E9^eK zvsRrwYSwLWOXypW=lr7jbYJ6dvCMXf{ksQ3Jyk#Ut7B}}CqENB<&U$Z)(%gCMEWY7 ze>itju6dGl6{c@*F-nnoEb9%^B`49B(Z1{CkqfWCLCwP>Y|?q!$?sZ)v~lnqx*CJ* z+KBu(1{$n+ZHU{|lFE7%Zw-*hZitUQyoEH*Ed8OB>sHQ#dmF{s#*XOz& z-9H}N{_uD+%yu#jd(nUY?K)z0orfW|F!Agcv3caoTmH^BNn>j{Ejsm8qcf%$Pdy2e z>@Og03u9LDa9;l4{D8Q6Y8Fjl(JeEslMEW^)z);HT+LGamtFQZxVfvE$1yY(ggJ)# zGnEa=%g1zuY@fv!(oepSWH?cZSE2(Nf2;k}H>fFD-TIM}t8Di2j6A9*+%SFrB~7n{ zTW7dE7msE_x}AGYn+|WXiHA|BkjQX$TI~KaQ4vntEzzY|7~4VO_TogcMkI~#RUY2u zupdWAEx!e|qw!S9lB*^nuYa7Bj8JmDvmAU`Gg4l9eBwv9(%wsXGJC7GeZD}*Ov#U1 z=-d4i%{>e5Ty0%l%WD1ccR2ynVRm%yK>0a*)K4oCM)?rppXnA7?TT8RHw@-dRA~ni z8OvI4bMPdJnuOos$XqJY3m;YuQca(yy==x)FX^514cdEmtDme^-}qjI>xaFdox)GDY)L9TvpFNw4cmIgU>>R; z(W&-JpnA0$);o6Tlt zjvwsDf^jaCoD9EB)-PXG+16yCe?qP3Zgl3H`f^2pwdQigCDqw3ann=uUW%aE)^~URCBSYf&`9h-SBtX`{XF0G7>KYTZZAtVUam%w9Lb90y-y zppX`6EN`(HbvR^T5eR6AP<8hrV-^rEZON}FUXG4wHM*W$Cqkc?*Jt>xT~x~;l(rBjx5@p}WA)_K`nu*+Rts-%_b zR?npo9t6|wuRV%Z@9p^5z4OvIT-r9GFjW`qUF@8k>|Qp#HWq!DsCP42C%5uE951gL zEkGsT+1hq_%#=^ARbKas1EXCWiGx7W2NefF9!de8Z!s~P*I}Abt%72SLCAX1V4rOmlMunf!8wpJbR)JuKHA!<@`e~P_lvTX zj24QXdW3C0s6a$iJ?{Wt3I)p+0zKNSRaN z)oxuXJ1@;xh?hX})cPK)gh#2&M-!Ut9^p952GN2_w+Y1SW%OO z*QnO1YM;@K3z?6b)pg5J-CcpnYAKvg&Ayasr`D&Udvh)}zVXc38C&H@Gkq5kG8Mb~ zRy_ySj@@VWR&@w&Yd(SB;{1d7cH^eV-IB)>E$<^Q@1|o?(XI(|+Tm4KtMcgBQ@E4k z-pU=tj_sq5CBzcUJ_{pFqjw#iEMxPJ+WELTl3E=z=9%eN&&2*9@7K-yu0r=vS)lDG znL{%Qt>cn7#(F*cv;f^qYAL&2M%dIe4-T1rpGZWSwUHF8Xrc*kP(IHW~# z5A#$_MKoL^Y%8^us5U6OZnUvu;yQaz$nB%>(+8t-cg6LlCLT|%GWS|$o9o;{(z~Z3 zC1>XGO`3Pr%U24QRr>L2kq01w42|JgWB9(= zCiI##=IFR|@4(<}3oqWUm?I@~xL7A=-1A7C<^$COM zl$j;=9xf>5#7cF3at^xutW>C)mWI5v5T(zDsUI&`BYC@d>vr#2)1yO5 zDqqYi)hnGrQRZGo9Ys;w7PI)~J3`=?0RW`-ner!Jj1x;$77u2;j$ZcMab z-)z>5?j!C@Dbko$n^TxlSg2V@+ReU*m?jB>_J={F*pcZ#+08(t8)Vf0X=MpsTAanQSC@ih|0N zx}`T~!FZ%vb|!q2ra1*~C$=QMbc;*MzoyB(rn+r!GWH>L*0)+(G4GzC)s-Lb3TH5|F?nl{EQsGRD|EgH8%!l$Tx12ce9Y}15|s9JJs~} z&G(_vE1?Cf;yZDodowBLf*Cj=8lbVaXAOCbmsCAm2>P{plyo&=a%7)FUh(=WI*c5( zv_v>Q4!X!{M_^h0G;?Q67h#C~raQ;2cL~-w<1o+g^X;<;N7{s!xhq zu`BL-KECfFcMKDZYST*BG&VBEdDpkFuI}zG?#AL~8B;wY;?rO5HdI^I(PZUG|DZTg z^ViwJZXVLzi=KG%vj+}WC!u5Row1@uTMLRMsVMcLRsOW?JrR;NB8G#7f+F?oRq2?a# zT6&J#sJFDI4VI!<#%0Hx%3>g;CG7j$X4&g@ESIP-J9`-ccVIS9HnRyZ|0Rd z?OyVhKFZ^vRc{5NjFSzm#*HVL4SOs^wcVHBoPJr={pvs9Ng~KdL{{N2IQ&m6QtU4* z@+Zowh;<>zWAJF4y(<9=z{kK)6XlG7$jT5OUUhpb3=T!G$GRXfIC~oaD|JHI;=#f| z2hjY9#qz_2g#bJjDg=fJiim>6L01JzB=a)(HcKf*>+@G{%Jh78Qem04VnN z0ly#&0fNY(T$M5Qwsr(y9`F)Lz&IO#MS&?q^8loc5d;&_Q(Yn;`v;r~gZ{OLUwEn@ z0{R~)?%x4av$$S|RxowjhE#C-8miw~q``zd;89ha+>gHJRF;HCpy&u#=qo?>Pc?lH zwy!Q8F~31Me;;LH87--;3cao5oDll1%$$V9av+43L;p^QNpNj?Fgu^mk@I<|r_FPB zjZu@Y{7qQG_a|RZc?P}qA~R15UzZw}8vgCeCx#D}Bnx@=MS-x!og1I39|vSRP8W|3 zqb8KPvS$47x7A7`kLm2`_%xL_o7HbS^jy7TQBHX}_he6C3>LMG+%MwGd9?FoR9@hr zmi`>+_CehKV*?&|B4E9$f|Ez{T@pe4`DE}#yMd2~TeEi>w!XAew$l0NA#AK_Ze)R!l z-O%2eV23vm5fK3+fV2q%l_Df61{MUyf^aBU2te_F_JJ{g-xJsPs}{k;Qq=`P^sxFa z_C!7d{4??YTMB<;Llj})pZob=g+G`EfEFN{2Ws?ptH0OkE;4B1rDF2CEr^lv9k7eLlhf~iP{6i2%Egu& zN~ z3-ko}p>QrJld*_n28VAl7dITH__!{7dX9;QV1Kb4&h%OcjRNv2|3xcS)*kHl1pQjZV z$b(J5l5$Wnc@ZElicm3GL71YPn4%0q4lXJT6_giL5EO(;MEDTjpgoz0W%E=%^ { if (req.files.length) { - const files = req.files as { buffer: Buffer }[]; + const files = req.files as { buffer: Buffer; originalname: string }[]; this.files.push(files[0].buffer); + this.filesNames.push(files[0].originalname); } res.send('received'); }); @@ -96,11 +100,13 @@ export class ExternalDummyService { } async read() { - const { message } = await this.rsmq.receiveMessageAsync({ + const messageReceived: RedisSMQ.QueueMessage | {} = await this.rsmq.receiveMessageAsync({ qname: `${this.serviceName}_tasks`, }); - this.currentTask = message; - return message; + + const queueMessage = messageReceived as RedisSMQ.QueueMessage; + this.currentTask = queueMessage?.message; + return this.currentTask; } async start(redisUrl: string) { diff --git a/app/api/tasksmanager/specs/taskManager.spec.ts b/app/api/tasksmanager/specs/taskManager.spec.ts index 351cc1472f..4c9b0aba19 100644 --- a/app/api/tasksmanager/specs/taskManager.spec.ts +++ b/app/api/tasksmanager/specs/taskManager.spec.ts @@ -24,10 +24,12 @@ describe('taskManager', () => { redisServer = new RedisServer(); await redisServer.start(); - externalDummyService = new ExternalDummyService(1234); + externalDummyService = new ExternalDummyService(1234, service.serviceName); await externalDummyService.start(service.redisUrl); taskManager = new TaskManager(service); + taskManager.subscribeToResults(); + await new Promise(resolve => setTimeout(resolve, 100)); // wait for redis to be ready }); @@ -100,14 +102,17 @@ describe('taskManager', () => { it('should send files to the service', async () => { const file = fs.readFileSync('app/api/tasksmanager/specs/blank.pdf'); - await taskManager?.sendFile(file); - await taskManager?.sendFile(file); - await taskManager?.sendFile(file); + await taskManager?.sendFile(file, 'blank1.pdf'); + await taskManager?.sendFile(file, 'blank2.pdf'); + await taskManager?.sendFile(file, 'blank3.pdf'); expect(externalDummyService.files.length).toEqual(3); expect(externalDummyService.files[0]).toEqual(file); + expect(externalDummyService.filesNames[0]).toEqual('blank1.pdf'); expect(externalDummyService.files[1]).toEqual(file); + expect(externalDummyService.filesNames[1]).toEqual('blank2.pdf'); expect(externalDummyService.files[2]).toEqual(file); + expect(externalDummyService.filesNames[2]).toEqual('blank3.pdf'); }); }); @@ -117,6 +122,7 @@ describe('taskManager', () => { await taskManager?.stop(); taskManager = new TaskManager(service); + taskManager.subscribeToResults(); externalDummyService.setResults(expectedResults); const task = { task: 'Tofu', tenant: 'Gabo' }; @@ -155,7 +161,7 @@ describe('taskManager', () => { } await redisServer.start(); - await new Promise(resolve => setTimeout(resolve, 100)); // wait for redis to connect + await new Promise(resolve => setTimeout(resolve, 200)); // wait for redis to connect await taskManager?.startTask(task); const message = await externalDummyService.read(); @@ -173,6 +179,8 @@ describe('taskManager', () => { await redisServer.stop(); taskManager?.start(); + taskManager?.subscribeToResults(); + await redisServer.start(); await waitForExpect(async () => { diff --git a/app/api/tasksmanager/taskManager.ts b/app/api/tasksmanager/taskManager.ts index 51f8dfb10d..728c19a368 100644 --- a/app/api/tasksmanager/taskManager.ts +++ b/app/api/tasksmanager/taskManager.ts @@ -62,7 +62,9 @@ export class TaskManager { } }); }); + } + subscribeToResults() { this.repeater = new Repeater(this.receiveMessage.bind(this), 1000); this.repeater.start(); } @@ -97,8 +99,8 @@ export class TaskManager { await request.post(this.service.dataUrl, data); } - async sendFile(file: Buffer) { - await request.uploadFile(this.service.filesUrl, 'blank.pdf', file); + async sendFile(file: Buffer, fileName: string) { + await request.uploadFile(this.service.filesUrl, fileName, file); } async stop() { diff --git a/app/api/utils/fixturesFactory.ts b/app/api/utils/fixturesFactory.ts index a2b8a04f73..606e4525ef 100644 --- a/app/api/utils/fixturesFactory.ts +++ b/app/api/utils/fixturesFactory.ts @@ -2,6 +2,7 @@ import { ObjectId } from 'mongodb'; import db from 'api/utils/testing_db'; import { EntitySchema } from 'shared/types/entityType'; import { PropertySchema, MetadataSchema } from 'shared/types/commonTypes'; +import { FileType } from 'shared/types/fileType'; export function getIdMapper() { const map = new Map(); @@ -43,6 +44,20 @@ export function getFixturesFactory() { }; }, + file: ( + id: string, + entity: string, + language: string, + type: 'custom' | 'document' | 'thumbnail' | 'attachment' | undefined, + filename: string + ): FileType => ({ + _id: idMapper(`${id}`), + entity, + language, + type, + filename, + }), + inherit(name: string, content: string, property: string, props = {}): PropertySchema { return this.relationshipProp(name, content, { inherit: { property: idMapper(property).toString() }, diff --git a/app/shared/JSONRequest.js b/app/shared/JSONRequest.js index 66da4d5196..1511dd0697 100644 --- a/app/shared/JSONRequest.js +++ b/app/shared/JSONRequest.js @@ -139,7 +139,7 @@ export default { head: (url, data, headers) => _fetch(url, data, 'HEAD', headers), - // TEST!!!! Fully untested function + // TEST!!!!! Fully untested function uploadFile: (url, filename, file, _cookie) => new Promise((resolve, reject) => { superagent From baaf6c2aee6cebd9aa6da8dafa71a6df9132f296 Mon Sep 17 00:00:00 2001 From: gabo Date: Thu, 16 Sep 2021 18:05:53 +0200 Subject: [PATCH 20/62] Segment many files --- app/api/pdfsegmentation/PdfSegmentation.ts | 20 +++++-- app/api/pdfsegmentation/specs/fixtures.ts | 36 ++++++++++- .../specs/pdfSegmentation.spec.ts | 59 +++++++++++++++---- .../specs/ExternalDummyService.ts | 39 ++++++++++-- .../tasksmanager/specs/taskManager.spec.ts | 12 ++-- 5 files changed, 135 insertions(+), 31 deletions(-) diff --git a/app/api/pdfsegmentation/PdfSegmentation.ts b/app/api/pdfsegmentation/PdfSegmentation.ts index 6ada9f1065..d883b0a9f4 100644 --- a/app/api/pdfsegmentation/PdfSegmentation.ts +++ b/app/api/pdfsegmentation/PdfSegmentation.ts @@ -2,6 +2,7 @@ import { Service, TaskManager } from 'api/tasksmanager/taskManager'; import { RepeatWith } from 'api/tasksmanager/RepeatWith'; import { files, uploadsPath } from 'api/files'; import fs from 'fs'; +import { FileType } from 'shared/types/fileType'; export interface SegmentationParameters { filesUrl: string; @@ -12,7 +13,7 @@ export interface SegmentationParameters { export const SERVICE_NAME = 'pdfSegmentation'; export class PdfSegmentation { - private service: Service; + private readonly service: Service; private repeatWith: RepeatWith; @@ -24,6 +25,7 @@ export class PdfSegmentation { ...segmentationParameters, }; + // eslint-disable-next-line @typescript-eslint/no-misused-promises this.repeatWith = new RepeatWith(SERVICE_NAME, this.segment.bind(this), 120000, 100); this.taskManager = new TaskManager(this.service); } @@ -41,14 +43,20 @@ export class PdfSegmentation { type: 'document', }); - if (nextFilesToProcess.length === 0 || !nextFilesToProcess[0].filename) { + // eslint-disable-next-line @typescript-eslint/no-misused-promises + await nextFilesToProcess.forEach(async nextFile => { + await this.segmentOne(nextFile); + }); + } + + private async segmentOne(nextFile: FileType) { + if (!nextFile || !nextFile.filename) { return; } - - const file = fs.readFileSync(uploadsPath(nextFilesToProcess[0].filename)); - await this.taskManager.sendFile(file, nextFilesToProcess[0].filename); + const file = fs.readFileSync(uploadsPath(nextFile.filename)); + await this.taskManager.sendFile(file, nextFile.filename); const task = { - task: nextFilesToProcess[0].filename, + task: nextFile.filename, tenant: 'tenant1', }; await this.taskManager.startTask(task); diff --git a/app/api/pdfsegmentation/specs/fixtures.ts b/app/api/pdfsegmentation/specs/fixtures.ts index f21e87ad31..bb7d089437 100644 --- a/app/api/pdfsegmentation/specs/fixtures.ts +++ b/app/api/pdfsegmentation/specs/fixtures.ts @@ -30,6 +30,40 @@ const fixturesOneFile: DBFixture = { files: [factory.file('F1', 'A1', 'en', 'document', fixturesPdfName)], }; +const fixturesTwelveFiles: DBFixture = { + settings, + entities: [ + factory.entity('A1', 'templateToSegmentA'), + factory.entity('A2', 'templateToSegmentA'), + factory.entity('A3', 'templateToSegmentA'), + factory.entity('A4', 'templateToSegmentA'), + factory.entity('A5', 'templateToSegmentA'), + factory.entity('A6', 'templateToSegmentA'), + factory.entity('A7', 'templateToSegmentA'), + factory.entity('A8', 'templateToSegmentA'), + factory.entity('A9', 'templateToSegmentA'), + factory.entity('A10', 'templateToSegmentA'), + factory.entity('A11', 'templateToSegmentA'), + factory.entity('A12', 'templateToSegmentA'), + factory.entity('A13', 'templateToSegmentA'), + factory.entity('A14', 'templateToSegmentA'), + ], + files: [ + factory.file('F1', 'A1', 'en', 'document', fixturesPdfName), + factory.file('F2', 'A2', 'en', 'document', fixturesPdfName), + factory.file('F3', 'A3', 'en', 'document', fixturesPdfName), + factory.file('F4', 'A4', 'en', 'document', fixturesPdfName), + factory.file('F5', 'A5', 'en', 'document', fixturesPdfName), + factory.file('F6', 'A6', 'en', 'document', fixturesPdfName), + factory.file('F7', 'A7', 'en', 'document', fixturesPdfName), + factory.file('F8', 'A8', 'en', 'document', fixturesPdfName), + factory.file('F9', 'A9', 'en', 'document', fixturesPdfName), + factory.file('F10', 'A10', 'en', 'document', fixturesPdfName), + factory.file('F11', 'A11', 'en', 'document', fixturesPdfName), + factory.file('F12', 'A12', 'en', 'document', fixturesPdfName), + ], +}; + const fixturesUseDefaultPdfPerLanguage: DBFixture = { settings, entities: [ @@ -44,4 +78,4 @@ const fixturesUseDefaultPdfPerLanguage: DBFixture = { ], }; -export { fixturesPdfName, fixturesOneFile, fixturesUseDefaultPdfPerLanguage }; +export { fixturesPdfName, fixturesOneFile, fixturesTwelveFiles, fixturesUseDefaultPdfPerLanguage }; diff --git a/app/api/pdfsegmentation/specs/pdfSegmentation.spec.ts b/app/api/pdfsegmentation/specs/pdfSegmentation.spec.ts index efa7ed0274..b3c764a0b2 100644 --- a/app/api/pdfsegmentation/specs/pdfSegmentation.spec.ts +++ b/app/api/pdfsegmentation/specs/pdfSegmentation.spec.ts @@ -1,9 +1,17 @@ import { ExternalDummyService } from 'api/tasksmanager/specs/ExternalDummyService'; import { testingDB } from 'api/utils/testing_db'; -import { fixturesOneFile, fixturesPdfName } from 'api/pdfsegmentation/specs/fixtures'; +import { + fixturesOneFile, + fixturesPdfName, + fixturesTwelveFiles, +} from 'api/pdfsegmentation/specs/fixtures'; import { testingEnvironment } from 'api/utils/testingEnvironment'; import fs from 'fs'; -import { PdfSegmentation, SERVICE_NAME } from 'api/pdfsegmentation/PdfSegmentation'; +import { + PdfSegmentation, + SERVICE_NAME, + SegmentationParameters, +} from 'api/pdfsegmentation/PdfSegmentation'; import { RedisServer } from 'api/tasksmanager/RedisServer'; import waitForExpect from 'wait-for-expect'; @@ -11,10 +19,25 @@ describe('pdfSegmentation', () => { let redisServer: RedisServer; let segmentationMockService: ExternalDummyService; let pdfSegmentation: PdfSegmentation; + let segmentationConnectionParameters: SegmentationParameters; beforeAll(async () => { redisServer = new RedisServer(); + await redisServer.start(); + segmentationConnectionParameters = { + dataUrl: 'http://localhost:1234/data', + filesUrl: 'http://localhost:1234/files', + resultsUrl: 'http://localhost:1234/results', + redisUrl: 'redis://localhost:6379', + }; + + segmentationMockService = new ExternalDummyService(1234, SERVICE_NAME); + await segmentationMockService.start(segmentationConnectionParameters.redisUrl); + }); + + beforeEach(() => { + segmentationMockService.reset(); }); afterAll(async () => { @@ -26,15 +49,6 @@ describe('pdfSegmentation', () => { it('should send one pdfs to segment', async () => { await testingEnvironment.setUp(fixturesOneFile); - const segmentationConnectionParameters = { - dataUrl: 'http://localhost:1234/data', - filesUrl: 'http://localhost:1234/files', - resultsUrl: 'http://localhost:1234/results', - redisUrl: 'redis://localhost:6379', - }; - - segmentationMockService = new ExternalDummyService(1234, SERVICE_NAME); - await segmentationMockService.start(segmentationConnectionParameters.redisUrl); pdfSegmentation = new PdfSegmentation(segmentationConnectionParameters); await pdfSegmentation.start(); @@ -48,7 +62,28 @@ describe('pdfSegmentation', () => { expect(segmentationMockService.files[0]).toEqual(file); expect(segmentationMockService.filesNames[0]).toEqual(fixturesPdfName); - const message = await segmentationMockService.read(); + const message = await segmentationMockService.readFirstTaskMessage(); expect(message).toEqual(`{"task":"${fixturesPdfName}","tenant":"tenant1"}`); }); + + it('should send 12 pdfs to segment', async () => { + await testingEnvironment.setUp(fixturesTwelveFiles); + pdfSegmentation = new PdfSegmentation(segmentationConnectionParameters); + await pdfSegmentation.start(); + + const file = fs.readFileSync(`app/api/pdfsegmentation/specs/uploads/${fixturesPdfName}`); + + await waitForExpect(async () => { + expect(segmentationMockService.files.length).toEqual(12); + }); + + expect(segmentationMockService.files[0]).toEqual(file); + expect(segmentationMockService.filesNames[0]).toEqual(fixturesPdfName); + expect(segmentationMockService.files[9]).toEqual(file); + expect(segmentationMockService.filesNames[9]).toEqual(fixturesPdfName); + + const messages = await segmentationMockService.readAllTaskMessages(); + expect(messages.length).toEqual(12); + expect(messages[0]).toEqual(`{"task":"${fixturesPdfName}","tenant":"tenant1"}`); + }); }); diff --git a/app/api/tasksmanager/specs/ExternalDummyService.ts b/app/api/tasksmanager/specs/ExternalDummyService.ts index ec84cffe30..83f00c2837 100644 --- a/app/api/tasksmanager/specs/ExternalDummyService.ts +++ b/app/api/tasksmanager/specs/ExternalDummyService.ts @@ -1,5 +1,5 @@ import express from 'express'; -import RedisSMQ from 'rsmq'; +import RedisSMQ, { QueueMessage } from 'rsmq'; import Redis, { RedisClient } from 'redis'; import { Server } from 'http'; import bodyParser from 'body-parser'; @@ -99,14 +99,36 @@ export class ExternalDummyService { } } - async read() { - const messageReceived: RedisSMQ.QueueMessage | {} = await this.rsmq.receiveMessageAsync({ + async readFirstTaskMessage() { + const message: RedisSMQ.QueueMessage | {} = await this.rsmq.receiveMessageAsync({ qname: `${this.serviceName}_tasks`, }); + const queueMessage = message as QueueMessage; - const queueMessage = messageReceived as RedisSMQ.QueueMessage; - this.currentTask = queueMessage?.message; - return this.currentTask; + if (!queueMessage.id) { + return undefined; + } + + await this.rsmq.deleteMessageAsync({ + qname: `${this.serviceName}_tasks`, + id: queueMessage.id, + }); + + return queueMessage?.message; + } + + async readAllTaskMessages() { + const messages: string[] = []; + while (true) { + // eslint-disable-next-line no-await-in-loop + const message = await this.readFirstTaskMessage(); + if (!message) { + break; + } + messages.push(message); + } + + return messages; } async start(redisUrl: string) { @@ -135,4 +157,9 @@ export class ExternalDummyService { message: JSON.stringify(task), }); } + + reset() { + this.files = []; + this.filesNames = []; + } } diff --git a/app/api/tasksmanager/specs/taskManager.spec.ts b/app/api/tasksmanager/specs/taskManager.spec.ts index 4c9b0aba19..9ed39485b2 100644 --- a/app/api/tasksmanager/specs/taskManager.spec.ts +++ b/app/api/tasksmanager/specs/taskManager.spec.ts @@ -50,9 +50,9 @@ describe('taskManager', () => { tenant: 'Rafa', }); - await externalDummyService.read(); + const message = await externalDummyService.readFirstTaskMessage(); - expect(externalDummyService.currentTask).toBe('{"task":"CheeseBurger","tenant":"Rafa"}'); + expect(message).toBe('{"task":"CheeseBurger","tenant":"Rafa"}'); }); describe('when multiple tasks are added', () => { @@ -72,13 +72,13 @@ describe('taskManager', () => { tenant: 'Fede', }); - let message = await externalDummyService.read(); + let message = await externalDummyService.readFirstTaskMessage(); expect(message).toBe('{"task":"CheeseBurger","tenant":"Joan"}'); - message = await externalDummyService.read(); + message = await externalDummyService.readFirstTaskMessage(); expect(message).toBe('{"task":"Fries","tenant":"Joan"}'); - message = await externalDummyService.read(); + message = await externalDummyService.readFirstTaskMessage(); expect(message).toBe('{"task":"Ribs","tenant":"Fede"}'); }); }); @@ -164,7 +164,7 @@ describe('taskManager', () => { await new Promise(resolve => setTimeout(resolve, 200)); // wait for redis to connect await taskManager?.startTask(task); - const message = await externalDummyService.read(); + const message = await externalDummyService.readFirstTaskMessage(); expect(message).toBe('{"task":"Ceviche","tenant":"Mercy"}'); }); From 83bafcf1cb244936e8e9d968f5a03f72901e7531 Mon Sep 17 00:00:00 2001 From: gabo Date: Mon, 20 Sep 2021 12:21:43 +0200 Subject: [PATCH 21/62] Use information information extraction for segmenting --- app/api/pdfsegmentation/PdfSegmentation.ts | 64 --------- app/api/pdfsegmentation/segmentPdfs.ts | 75 +++++++++++ app/api/pdfsegmentation/specs/fixtures.ts | 90 +++++++++---- .../specs/pdfSegmentation.spec.ts | 122 +++++++++--------- app/api/utils/fixturesFactory.ts | 4 +- .../MetadataExtractionDashboard.tsx | 6 +- .../specs/MetadataExtractionDashboard.spec.js | 8 +- app/shared/types/settingsType.d.ts | 2 + 8 files changed, 213 insertions(+), 158 deletions(-) delete mode 100644 app/api/pdfsegmentation/PdfSegmentation.ts create mode 100644 app/api/pdfsegmentation/segmentPdfs.ts diff --git a/app/api/pdfsegmentation/PdfSegmentation.ts b/app/api/pdfsegmentation/PdfSegmentation.ts deleted file mode 100644 index d883b0a9f4..0000000000 --- a/app/api/pdfsegmentation/PdfSegmentation.ts +++ /dev/null @@ -1,64 +0,0 @@ -import { Service, TaskManager } from 'api/tasksmanager/taskManager'; -import { RepeatWith } from 'api/tasksmanager/RepeatWith'; -import { files, uploadsPath } from 'api/files'; -import fs from 'fs'; -import { FileType } from 'shared/types/fileType'; - -export interface SegmentationParameters { - filesUrl: string; - dataUrl: string; - resultsUrl: string; - redisUrl: string; -} -export const SERVICE_NAME = 'pdfSegmentation'; - -export class PdfSegmentation { - private readonly service: Service; - - private repeatWith: RepeatWith; - - private taskManager: TaskManager; - - constructor(segmentationParameters: SegmentationParameters) { - this.service = { - serviceName: SERVICE_NAME, - ...segmentationParameters, - }; - - // eslint-disable-next-line @typescript-eslint/no-misused-promises - this.repeatWith = new RepeatWith(SERVICE_NAME, this.segment.bind(this), 120000, 100); - this.taskManager = new TaskManager(this.service); - } - - async start() { - await this.repeatWith.start(); - } - - async stop() { - await this.repeatWith.stop(); - } - - async segment() { - const nextFilesToProcess = await files.get({ - type: 'document', - }); - - // eslint-disable-next-line @typescript-eslint/no-misused-promises - await nextFilesToProcess.forEach(async nextFile => { - await this.segmentOne(nextFile); - }); - } - - private async segmentOne(nextFile: FileType) { - if (!nextFile || !nextFile.filename) { - return; - } - const file = fs.readFileSync(uploadsPath(nextFile.filename)); - await this.taskManager.sendFile(file, nextFile.filename); - const task = { - task: nextFile.filename, - tenant: 'tenant1', - }; - await this.taskManager.startTask(task); - } -} diff --git a/app/api/pdfsegmentation/segmentPdfs.ts b/app/api/pdfsegmentation/segmentPdfs.ts new file mode 100644 index 0000000000..558ac9ab37 --- /dev/null +++ b/app/api/pdfsegmentation/segmentPdfs.ts @@ -0,0 +1,75 @@ +import { TaskManager } from 'api/tasksmanager/taskManager'; +import { files, uploadsPath } from 'api/files'; +import fs from 'fs'; +import { FileType } from 'shared/types/fileType'; +import { config } from 'api/config'; +import { Settings } from 'shared/types/settingsType'; +import settings from 'api/settings/settings'; +import entities from 'api/entities'; + +class SegmentPdfs { + SERVICE_NAME = 'segmentation'; + + public segmentationTaskManager: TaskManager | undefined; + + templatesWithInformationExtraction: string[] | undefined; + + features: Settings | undefined; + + async start() { + const settingsValues = await settings.get(); + const metadataExtractionFeatureToggle = settingsValues?.features?.metadataExtraction; + this.templatesWithInformationExtraction = metadataExtractionFeatureToggle?.map(x => + x.template.toString() + ); + const segmentationValues = settingsValues?.features?.segmentation; + this.segmentationTaskManager = new TaskManager({ + serviceName: this.SERVICE_NAME, + dataUrl: segmentationValues.dataUrl, + filesUrl: segmentationValues.filesUrl, + resultsUrl: segmentationValues.resultsUrl, + redisUrl: `redis://${config.redis.host}:${config.redis.host}`, + }); + } + + segmentOnePdf = async (nextFile: FileType) => { + if (!this.segmentationTaskManager) { + return; + } + + if (!nextFile || !nextFile.filename) { + return; + } + const file = fs.readFileSync(uploadsPath(nextFile.filename)); + await this.segmentationTaskManager.sendFile(file, nextFile.filename); + const task = { + task: nextFile.filename, + tenant: 'tenant1', + }; + await this.segmentationTaskManager.startTask(task); + }; + + segmentPdfs = async () => { + if (!this.segmentationTaskManager) { + await this.start(); + } + + console.log(this.templatesWithInformationExtraction); + + const nextEntitiesToProcess = await entities.get({ + template: { $in: this.templatesWithInformationExtraction }, + }); + + const sharedIds = nextEntitiesToProcess.map(x => x.sharedId); + const nextFilesToProcess = await files.get({ + entity: { $in: sharedIds }, + }); + + for (let i = 0; i < 10; i += 1) { + // eslint-disable-next-line no-await-in-loop + await this.segmentOnePdf(nextFilesToProcess[i]); + } + }; +} + +export { SegmentPdfs }; diff --git a/app/api/pdfsegmentation/specs/fixtures.ts b/app/api/pdfsegmentation/specs/fixtures.ts index bb7d089437..270f0c106a 100644 --- a/app/api/pdfsegmentation/specs/fixtures.ts +++ b/app/api/pdfsegmentation/specs/fixtures.ts @@ -5,29 +5,57 @@ const factory = getFixturesFactory(); const settings = [ { - _id: db.id(), - languages: [{ key: 'en', default: true }, { key: 'es' }, { key: 'pt' }], features: { - 'metadata-extraction': [ + metadataExtraction: [ { - id: factory.id('templateToSegmentA'), + template: factory.id('templateToSegmentA'), properties: ['property1', 'property2'], }, { - id: factory.id('templateToSegmentB'), + template: factory.id('templateToSegmentB'), properties: ['property1'], }, ], + segmentation: { + dataUrl: 'http://localhost:1234/data', + filesUrl: 'http://localhost:1234/files', + resultsUrl: 'http://localhost:1234/results', + }, }, }, ]; -const fixturesPdfName = 'f2082bf51b6ef839690485d7153e847a.pdf'; +const otherSettings = [ + { + _id: db.id(), + features: { + metadataExtraction: [ + { + template: factory.id('templateToSegmentB'), + properties: ['property1'], + }, + ], + segmentation: { + dataUrl: 'http://other-localhost:1234/data', + filesUrl: 'http://other-localhost:1234/files', + resultsUrl: 'http://other-localhost:1234/results', + }, + }, + }, +]; + +const fixturesPdfNameA = 'f2082bf51b6ef839690485d7153e847a.pdf'; const fixturesOneFile: DBFixture = { settings, entities: [factory.entity('A1', 'templateToSegmentA')], - files: [factory.file('F1', 'A1', 'en', 'document', fixturesPdfName)], + files: [factory.file('F1', 'A1', 'document', fixturesPdfNameA)], +}; + +const fixturesOtherFile: DBFixture = { + settings: otherSettings, + entities: [factory.entity('A1', 'templateToSegmentB')], + files: [factory.file('F1', 'A1', 'document', fixturesPdfNameA)], }; const fixturesTwelveFiles: DBFixture = { @@ -49,33 +77,41 @@ const fixturesTwelveFiles: DBFixture = { factory.entity('A14', 'templateToSegmentA'), ], files: [ - factory.file('F1', 'A1', 'en', 'document', fixturesPdfName), - factory.file('F2', 'A2', 'en', 'document', fixturesPdfName), - factory.file('F3', 'A3', 'en', 'document', fixturesPdfName), - factory.file('F4', 'A4', 'en', 'document', fixturesPdfName), - factory.file('F5', 'A5', 'en', 'document', fixturesPdfName), - factory.file('F6', 'A6', 'en', 'document', fixturesPdfName), - factory.file('F7', 'A7', 'en', 'document', fixturesPdfName), - factory.file('F8', 'A8', 'en', 'document', fixturesPdfName), - factory.file('F9', 'A9', 'en', 'document', fixturesPdfName), - factory.file('F10', 'A10', 'en', 'document', fixturesPdfName), - factory.file('F11', 'A11', 'en', 'document', fixturesPdfName), - factory.file('F12', 'A12', 'en', 'document', fixturesPdfName), + factory.file('F1', 'A1', 'document', fixturesPdfNameA), + factory.file('F2', 'A2', 'document', fixturesPdfNameA), + factory.file('F3', 'A3', 'document', fixturesPdfNameA), + factory.file('F4', 'A4', 'document', fixturesPdfNameA), + factory.file('F5', 'A5', 'document', fixturesPdfNameA), + factory.file('F6', 'A6', 'document', fixturesPdfNameA), + factory.file('F7', 'A7', 'document', fixturesPdfNameA), + factory.file('F8', 'A8', 'document', fixturesPdfNameA), + factory.file('F9', 'A9', 'document', fixturesPdfNameA), + factory.file('F10', 'A10', 'document', fixturesPdfNameA), + factory.file('F11', 'A11', 'document', fixturesPdfNameA), + factory.file('F12', 'A12', 'document', fixturesPdfNameA), ], }; -const fixturesUseDefaultPdfPerLanguage: DBFixture = { +const fixturesFilesWithoutInformationExtraction: DBFixture = { settings, entities: [ - factory.entity('A1', 'templateToSegmentA', {}, { language: 'es' }), - factory.entity('B1', 'templateToSegmentB', {}, { language: 'pt' }), - factory.entity('B2', 'templateNotSegmentC', {}, { language: 'en' }), + factory.entity('A1', 'templateToSegmentA', {}), + factory.entity('B1', 'templateToSegmentB', {}), + factory.entity('B2', 'templateNotSegmentC', {}), + factory.entity('B3', 'templateNotSegmentC', {}), ], files: [ - factory.file('F1', 'A1', 'en', 'document', 'test.pdf'), - factory.file('F2', 'B1', 'es', 'attachment', 'a.png'), - factory.file('F3', 'B2', 'pt', 'document', 'c.pdf'), + factory.file('F1', 'A1', 'document', fixturesPdfNameA), + factory.file('F2', 'B1', 'document', fixturesPdfNameA), + factory.file('F3', 'B2', 'document', fixturesPdfNameA), + factory.file('F4', 'B3', 'document', fixturesPdfNameA), ], }; -export { fixturesPdfName, fixturesOneFile, fixturesTwelveFiles, fixturesUseDefaultPdfPerLanguage }; +export { + fixturesPdfNameA, + fixturesOneFile, + fixturesOtherFile, + fixturesTwelveFiles, + fixturesFilesWithoutInformationExtraction, +}; diff --git a/app/api/pdfsegmentation/specs/pdfSegmentation.spec.ts b/app/api/pdfsegmentation/specs/pdfSegmentation.spec.ts index b3c764a0b2..98e30d811f 100644 --- a/app/api/pdfsegmentation/specs/pdfSegmentation.spec.ts +++ b/app/api/pdfsegmentation/specs/pdfSegmentation.spec.ts @@ -1,89 +1,95 @@ -import { ExternalDummyService } from 'api/tasksmanager/specs/ExternalDummyService'; import { testingDB } from 'api/utils/testing_db'; import { + fixturesFilesWithoutInformationExtraction, fixturesOneFile, - fixturesPdfName, + fixturesOtherFile, + fixturesPdfNameA, fixturesTwelveFiles, } from 'api/pdfsegmentation/specs/fixtures'; import { testingEnvironment } from 'api/utils/testingEnvironment'; import fs from 'fs'; -import { - PdfSegmentation, - SERVICE_NAME, - SegmentationParameters, -} from 'api/pdfsegmentation/PdfSegmentation'; -import { RedisServer } from 'api/tasksmanager/RedisServer'; -import waitForExpect from 'wait-for-expect'; +import { TaskManager } from 'api/tasksmanager/taskManager'; +import { config } from 'api/config'; +import { SegmentPdfs } from '../segmentPdfs'; + +jest.mock('api/tasksmanager/taskManager.ts'); describe('pdfSegmentation', () => { - let redisServer: RedisServer; - let segmentationMockService: ExternalDummyService; - let pdfSegmentation: PdfSegmentation; - let segmentationConnectionParameters: SegmentationParameters; + let segmentPdfs: SegmentPdfs; - beforeAll(async () => { - redisServer = new RedisServer(); + afterAll(async () => { + await testingDB.disconnect(); + }); - await redisServer.start(); - segmentationConnectionParameters = { + beforeEach(() => { + segmentPdfs = new SegmentPdfs(); + }); + + it('should send one pdf to segment', async () => { + await testingEnvironment.setUp(fixturesOneFile); + + await segmentPdfs.segmentPdfs(); + + expect(TaskManager).toHaveBeenCalledWith({ + serviceName: 'segmentation', dataUrl: 'http://localhost:1234/data', filesUrl: 'http://localhost:1234/files', resultsUrl: 'http://localhost:1234/results', - redisUrl: 'redis://localhost:6379', - }; + redisUrl: `redis://${config.redis.host}:${config.redis.host}`, + }); - segmentationMockService = new ExternalDummyService(1234, SERVICE_NAME); - await segmentationMockService.start(segmentationConnectionParameters.redisUrl); + const file = fs.readFileSync(`app/api/pdfsegmentation/specs/uploads/${fixturesPdfNameA}`); + expect(segmentPdfs.segmentationTaskManager?.sendFile).toHaveBeenCalledWith( + file, + fixturesPdfNameA + ); }); - beforeEach(() => { - segmentationMockService.reset(); - }); + it('should send other pdf to segment', async () => { + await testingEnvironment.setUp(fixturesOtherFile); - afterAll(async () => { - await testingDB.disconnect(); - await segmentationMockService.stop(); - await pdfSegmentation?.stop(); - await redisServer.stop(); - }); + await segmentPdfs.segmentPdfs(); - it('should send one pdfs to segment', async () => { - await testingEnvironment.setUp(fixturesOneFile); + expect(TaskManager).toHaveBeenCalledWith({ + serviceName: 'segmentation', + dataUrl: 'http://other-localhost:1234/data', + filesUrl: 'http://other-localhost:1234/files', + resultsUrl: 'http://other-localhost:1234/results', + redisUrl: `redis://${config.redis.host}:${config.redis.host}`, + }); - pdfSegmentation = new PdfSegmentation(segmentationConnectionParameters); - await pdfSegmentation.start(); + const file = fs.readFileSync(`app/api/pdfsegmentation/specs/uploads/${fixturesPdfNameA}`); + expect(segmentPdfs.segmentationTaskManager?.sendFile).toHaveBeenCalledWith( + file, + fixturesPdfNameA + ); + }); - const file = fs.readFileSync(`app/api/pdfsegmentation/specs/uploads/${fixturesPdfName}`); + it('should send 10 pdfs to segment', async () => { + await testingEnvironment.setUp(fixturesTwelveFiles); - await waitForExpect(async () => { - expect(segmentationMockService.files.length).toEqual(1); - }); + await segmentPdfs.segmentPdfs(); - expect(segmentationMockService.files[0]).toEqual(file); - expect(segmentationMockService.filesNames[0]).toEqual(fixturesPdfName); + const file = fs.readFileSync(`app/api/pdfsegmentation/specs/uploads/${fixturesPdfNameA}`); + expect(segmentPdfs.segmentationTaskManager?.sendFile).toHaveBeenCalledWith( + file, + fixturesPdfNameA + ); - const message = await segmentationMockService.readFirstTaskMessage(); - expect(message).toEqual(`{"task":"${fixturesPdfName}","tenant":"tenant1"}`); + expect(segmentPdfs.segmentationTaskManager?.sendFile).toHaveBeenCalledTimes(10); }); - it('should send 12 pdfs to segment', async () => { - await testingEnvironment.setUp(fixturesTwelveFiles); - pdfSegmentation = new PdfSegmentation(segmentationConnectionParameters); - await pdfSegmentation.start(); - - const file = fs.readFileSync(`app/api/pdfsegmentation/specs/uploads/${fixturesPdfName}`); + it('should send pdfs only from templates with the information extraction on', async () => { + await testingEnvironment.setUp(fixturesFilesWithoutInformationExtraction); - await waitForExpect(async () => { - expect(segmentationMockService.files.length).toEqual(12); - }); + await segmentPdfs.segmentPdfs(); - expect(segmentationMockService.files[0]).toEqual(file); - expect(segmentationMockService.filesNames[0]).toEqual(fixturesPdfName); - expect(segmentationMockService.files[9]).toEqual(file); - expect(segmentationMockService.filesNames[9]).toEqual(fixturesPdfName); + const file = fs.readFileSync(`app/api/pdfsegmentation/specs/uploads/${fixturesPdfNameA}`); + expect(segmentPdfs.segmentationTaskManager?.sendFile).toHaveBeenCalledWith( + file, + fixturesPdfNameA + ); - const messages = await segmentationMockService.readAllTaskMessages(); - expect(messages.length).toEqual(12); - expect(messages[0]).toEqual(`{"task":"${fixturesPdfName}","tenant":"tenant1"}`); + expect(segmentPdfs.segmentationTaskManager?.sendFile).toHaveBeenCalledTimes(2); }); }); diff --git a/app/api/utils/fixturesFactory.ts b/app/api/utils/fixturesFactory.ts index 606e4525ef..e16cc093c7 100644 --- a/app/api/utils/fixturesFactory.ts +++ b/app/api/utils/fixturesFactory.ts @@ -47,9 +47,9 @@ export function getFixturesFactory() { file: ( id: string, entity: string, - language: string, type: 'custom' | 'document' | 'thumbnail' | 'attachment' | undefined, - filename: string + filename: string, + language: string = 'en' ): FileType => ({ _id: idMapper(`${id}`), entity, diff --git a/app/react/MetadataExtraction/MetadataExtractionDashboard.tsx b/app/react/MetadataExtraction/MetadataExtractionDashboard.tsx index 1ec29aecdf..c28a773501 100644 --- a/app/react/MetadataExtraction/MetadataExtractionDashboard.tsx +++ b/app/react/MetadataExtraction/MetadataExtractionDashboard.tsx @@ -28,7 +28,7 @@ export interface MetadataExtractionDashboardStateTypes { function mapStateToProps({ settings, templates }: any) { return { - extractionSettings: settings.collection.get('features')?.get('metadata-extraction'), + extractionSettings: settings.collection.get('features')?.get('metadataExtraction'), templates, }; } @@ -52,8 +52,8 @@ class MetadataExtractionDashboard extends React.Component< const formatted: FormattedSettingsData = {}; this.props.extractionSettings.forEach(setting => { - const template = setting.has('id') - ? this.props.templates.find(temp => temp?.get('_id') === setting.get('id')) + const template = setting.has('template') + ? this.props.templates.find(temp => temp?.get('_id') === setting.get('template')) : this.props.templates.find(temp => temp?.get('name') === setting.get('name')); if (!template) { diff --git a/app/react/MetadataExtraction/specs/MetadataExtractionDashboard.spec.js b/app/react/MetadataExtraction/specs/MetadataExtractionDashboard.spec.js index 81afee4d49..a27aebabd0 100644 --- a/app/react/MetadataExtraction/specs/MetadataExtractionDashboard.spec.js +++ b/app/react/MetadataExtraction/specs/MetadataExtractionDashboard.spec.js @@ -35,17 +35,17 @@ const templates = Immutable.fromJS([ const settings = { collection: Immutable.fromJS({ features: { - 'metadata-extraction': [ + metadataExtraction: [ { - id: factory.id('templateA'), + template: factory.id('templateA'), properties: ['AonlyText', 'ABsharedDate', 'ACsharedMarkdown', 'ABC shared Number'], }, { - id: factory.id('templateB'), + template: factory.id('templateB'), properties: ['BonlyText', 'ABsharedDate', 'BCsharedMarkdown', 'ABC shared number'], }, { - id: factory.id('templateC'), + template: factory.id('templateC'), properties: ['ConlyText', 'ACsharedMarkdown', 'BCsharedMarkdown', 'abc shared number'], }, ], diff --git a/app/shared/types/settingsType.d.ts b/app/shared/types/settingsType.d.ts index b957454f45..9c1fb2ef79 100644 --- a/app/shared/types/settingsType.d.ts +++ b/app/shared/types/settingsType.d.ts @@ -2,6 +2,7 @@ /**AUTO-GENERATED. RUN yarn emit-types to update.*/ import { ObjectIdSchema, LanguagesListSchema, GeolocationSchema } from 'shared/types/commonTypes'; +import { ObjectId } from 'mongodb'; export interface ItemSchema { id?: string; @@ -95,6 +96,7 @@ export interface Settings { topicClassification?: boolean; favorites?: boolean; [k: string]: unknown | undefined; + metadataExtraction: {template: ObjectIdSchema, properties: string[]}[] }; mapStartingPoint?: { label?: string; From 4fcd276339392e59e95dcf14951224182acb3a33 Mon Sep 17 00:00:00 2001 From: gabo Date: Mon, 20 Sep 2021 12:53:08 +0200 Subject: [PATCH 22/62] Multitenant test for pdf segmetnation --- app/api/pdfsegmentation/segmentPdfs.ts | 4 +--- app/api/pdfsegmentation/specs/fixtures.ts | 4 ++++ .../pdfsegmentation/specs/pdfSegmentation.spec.ts | 14 ++++++++++++++ 3 files changed, 19 insertions(+), 3 deletions(-) diff --git a/app/api/pdfsegmentation/segmentPdfs.ts b/app/api/pdfsegmentation/segmentPdfs.ts index 558ac9ab37..9205b8b01e 100644 --- a/app/api/pdfsegmentation/segmentPdfs.ts +++ b/app/api/pdfsegmentation/segmentPdfs.ts @@ -54,13 +54,11 @@ class SegmentPdfs { await this.start(); } - console.log(this.templatesWithInformationExtraction); - const nextEntitiesToProcess = await entities.get({ template: { $in: this.templatesWithInformationExtraction }, }); - const sharedIds = nextEntitiesToProcess.map(x => x.sharedId); + const sharedIds = nextEntitiesToProcess.map((x: { sharedId: string }) => x.sharedId); const nextFilesToProcess = await files.get({ entity: { $in: sharedIds }, }); diff --git a/app/api/pdfsegmentation/specs/fixtures.ts b/app/api/pdfsegmentation/specs/fixtures.ts index 270f0c106a..32dbeafeb9 100644 --- a/app/api/pdfsegmentation/specs/fixtures.ts +++ b/app/api/pdfsegmentation/specs/fixtures.ts @@ -108,6 +108,10 @@ const fixturesFilesWithoutInformationExtraction: DBFixture = { ], }; +const fixturesMultiTenant: DBFixture = { + +} + export { fixturesPdfNameA, fixturesOneFile, diff --git a/app/api/pdfsegmentation/specs/pdfSegmentation.spec.ts b/app/api/pdfsegmentation/specs/pdfSegmentation.spec.ts index 98e30d811f..ca3de4a375 100644 --- a/app/api/pdfsegmentation/specs/pdfSegmentation.spec.ts +++ b/app/api/pdfsegmentation/specs/pdfSegmentation.spec.ts @@ -92,4 +92,18 @@ describe('pdfSegmentation', () => { expect(segmentPdfs.segmentationTaskManager?.sendFile).toHaveBeenCalledTimes(2); }); + + it('should send pdfs from different tenants', async () => { + await testingEnvironment.setUp(fixturesMultitenant); + + await segmentPdfs.segmentPdfs(); + + const file = fs.readFileSync(`app/api/pdfsegmentation/specs/uploads/${fixturesPdfNameA}`); + expect(segmentPdfs.segmentationTaskManager?.sendFile).toHaveBeenCalledWith( + file, + fixturesPdfNameA + ); + + expect(segmentPdfs.segmentationTaskManager?.sendFile).toHaveBeenCalledTimes(2); + }); }); From 4558d537d59f928484e30fe4d663873e3f9d9e72 Mon Sep 17 00:00:00 2001 From: Alberto Casado Torres Date: Tue, 21 Sep 2021 10:33:27 +0200 Subject: [PATCH 23/62] exposing the fixturer --- app/api/utils/testing_db.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/api/utils/testing_db.ts b/app/api/utils/testing_db.ts index e7e724baae..d217be91ca 100644 --- a/app/api/utils/testing_db.ts +++ b/app/api/utils/testing_db.ts @@ -140,7 +140,7 @@ const testingDB: { }, }; -export { testingDB }; +export { testingDB, fixturer }; // deprecated, for backward compatibility export default testingDB; From 8a6f3656f1ffb00950cb267e8618627aa10f9620 Mon Sep 17 00:00:00 2001 From: Alberto Casado Torres Date: Tue, 21 Sep 2021 10:34:41 +0200 Subject: [PATCH 24/62] PDF segmentation runing multitenant WIP --- app/api/pdfsegmentation/segmentPdfs.ts | 48 ++++++++++------ app/api/pdfsegmentation/specs/fixtures.ts | 12 ++-- .../specs/pdfSegmentation.spec.ts | 55 ++++++++++++++----- 3 files changed, 76 insertions(+), 39 deletions(-) diff --git a/app/api/pdfsegmentation/segmentPdfs.ts b/app/api/pdfsegmentation/segmentPdfs.ts index 9205b8b01e..949761753e 100644 --- a/app/api/pdfsegmentation/segmentPdfs.ts +++ b/app/api/pdfsegmentation/segmentPdfs.ts @@ -5,7 +5,8 @@ import { FileType } from 'shared/types/fileType'; import { config } from 'api/config'; import { Settings } from 'shared/types/settingsType'; import settings from 'api/settings/settings'; -import entities from 'api/entities'; +import { model as entities } from 'api/entities'; +import { tenants } from 'api/tenants/tenantContext'; class SegmentPdfs { SERVICE_NAME = 'segmentation'; @@ -18,6 +19,7 @@ class SegmentPdfs { async start() { const settingsValues = await settings.get(); + const metadataExtractionFeatureToggle = settingsValues?.features?.metadataExtraction; this.templatesWithInformationExtraction = metadataExtractionFeatureToggle?.map(x => x.template.toString() @@ -40,6 +42,7 @@ class SegmentPdfs { if (!nextFile || !nextFile.filename) { return; } + const file = fs.readFileSync(uploadsPath(nextFile.filename)); await this.segmentationTaskManager.sendFile(file, nextFile.filename); const task = { @@ -49,25 +52,36 @@ class SegmentPdfs { await this.segmentationTaskManager.startTask(task); }; - segmentPdfs = async () => { - if (!this.segmentationTaskManager) { - await this.start(); - } + segmentPdfs = async () => + Promise.all( + Object.keys(tenants.tenants).map(async tenant => { + await tenants.run(async () => { + if (!this.segmentationTaskManager) { + await this.start(); + } - const nextEntitiesToProcess = await entities.get({ - template: { $in: this.templatesWithInformationExtraction }, - }); + const settingsValues = await settings.get(); + const metadataExtractionFeatureToggle = settingsValues?.features?.metadataExtraction; + const templatesWithInformationExtraction = metadataExtractionFeatureToggle?.map(x => + x.template.toString() + ); - const sharedIds = nextEntitiesToProcess.map((x: { sharedId: string }) => x.sharedId); - const nextFilesToProcess = await files.get({ - entity: { $in: sharedIds }, - }); + const nextEntitiesToProcess = await entities.getUnrestricted({ + template: { $in: templatesWithInformationExtraction }, + }); - for (let i = 0; i < 10; i += 1) { - // eslint-disable-next-line no-await-in-loop - await this.segmentOnePdf(nextFilesToProcess[i]); - } - }; + const sharedIds = nextEntitiesToProcess.map((x: { sharedId: string }) => x.sharedId); + const nextFilesToProcess = await files.get({ + entity: { $in: sharedIds }, + }); + + for (let i = 0; i < 10; i += 1) { + // eslint-disable-next-line no-await-in-loop + await this.segmentOnePdf(nextFilesToProcess[i]); + } + }, tenant); + }) + ); } export { SegmentPdfs }; diff --git a/app/api/pdfsegmentation/specs/fixtures.ts b/app/api/pdfsegmentation/specs/fixtures.ts index 32dbeafeb9..26c372dc2d 100644 --- a/app/api/pdfsegmentation/specs/fixtures.ts +++ b/app/api/pdfsegmentation/specs/fixtures.ts @@ -47,14 +47,14 @@ const otherSettings = [ const fixturesPdfNameA = 'f2082bf51b6ef839690485d7153e847a.pdf'; const fixturesOneFile: DBFixture = { - settings, entities: [factory.entity('A1', 'templateToSegmentA')], + settings, files: [factory.file('F1', 'A1', 'document', fixturesPdfNameA)], }; const fixturesOtherFile: DBFixture = { - settings: otherSettings, entities: [factory.entity('A1', 'templateToSegmentB')], + settings: otherSettings, files: [factory.file('F1', 'A1', 'document', fixturesPdfNameA)], }; @@ -92,7 +92,7 @@ const fixturesTwelveFiles: DBFixture = { ], }; -const fixturesFilesWithoutInformationExtraction: DBFixture = { +const fixturesFilesWithtMixedInformationExtraction: DBFixture = { settings, entities: [ factory.entity('A1', 'templateToSegmentA', {}), @@ -108,14 +108,12 @@ const fixturesFilesWithoutInformationExtraction: DBFixture = { ], }; -const fixturesMultiTenant: DBFixture = { - -} +const fixturesMultiTenant: DBFixture = {}; export { fixturesPdfNameA, fixturesOneFile, fixturesOtherFile, fixturesTwelveFiles, - fixturesFilesWithoutInformationExtraction, + fixturesFilesWithtMixedInformationExtraction, }; diff --git a/app/api/pdfsegmentation/specs/pdfSegmentation.spec.ts b/app/api/pdfsegmentation/specs/pdfSegmentation.spec.ts index ca3de4a375..7b91796923 100644 --- a/app/api/pdfsegmentation/specs/pdfSegmentation.spec.ts +++ b/app/api/pdfsegmentation/specs/pdfSegmentation.spec.ts @@ -1,6 +1,6 @@ -import { testingDB } from 'api/utils/testing_db'; +import { testingDB, fixturer } from 'api/utils/testing_db'; import { - fixturesFilesWithoutInformationExtraction, + fixturesFilesWithtMixedInformationExtraction, fixturesOneFile, fixturesOtherFile, fixturesPdfNameA, @@ -10,23 +10,53 @@ import { testingEnvironment } from 'api/utils/testingEnvironment'; import fs from 'fs'; import { TaskManager } from 'api/tasksmanager/taskManager'; import { config } from 'api/config'; +import { tenants } from 'api/tenants/tenantContext'; +import { DB } from 'api/odm'; import { SegmentPdfs } from '../segmentPdfs'; +import { Db } from 'mongodb'; jest.mock('api/tasksmanager/taskManager.ts'); describe('pdfSegmentation', () => { let segmentPdfs: SegmentPdfs; + const tenantOne = { + name: 'tenantOne', + dbName: 'tenantOne', + indexName: 'tenantOne', + uploadedDocuments: `${__dirname}/uploads`, + attachments: `${__dirname}/uploads`, + customUploads: `${__dirname}/uploads`, + temporalFiles: `${__dirname}/uploads`, + }; + + const tenantTwo = { + name: 'tenantTwo', + dbName: 'tenantTwo', + indexName: 'tenantTwo', + uploadedDocuments: `${__dirname}/uploads`, + attachments: `${__dirname}/uploads`, + customUploads: `${__dirname}/uploads`, + temporalFiles: `${__dirname}/uploads`, + }; + + let dbOne: Db; + let dbTwo: Db; + afterAll(async () => { await testingDB.disconnect(); }); - beforeEach(() => { + beforeEach(async () => { segmentPdfs = new SegmentPdfs(); + await DB.connect(); + dbOne = DB.connectionForDB(tenantOne.dbName).db; + dbTwo = DB.connectionForDB(tenantTwo.dbName).db; + tenants.tenants = { tenantOne }; }); it('should send one pdf to segment', async () => { - await testingEnvironment.setUp(fixturesOneFile); + await fixturer.clearAllAndLoad(dbOne, fixturesOneFile); await segmentPdfs.segmentPdfs(); @@ -46,7 +76,7 @@ describe('pdfSegmentation', () => { }); it('should send other pdf to segment', async () => { - await testingEnvironment.setUp(fixturesOtherFile); + await fixturer.clearAllAndLoad(dbOne, fixturesOtherFile); await segmentPdfs.segmentPdfs(); @@ -66,8 +96,7 @@ describe('pdfSegmentation', () => { }); it('should send 10 pdfs to segment', async () => { - await testingEnvironment.setUp(fixturesTwelveFiles); - + await fixturer.clearAllAndLoad(dbOne, fixturesTwelveFiles); await segmentPdfs.segmentPdfs(); const file = fs.readFileSync(`app/api/pdfsegmentation/specs/uploads/${fixturesPdfNameA}`); @@ -80,7 +109,7 @@ describe('pdfSegmentation', () => { }); it('should send pdfs only from templates with the information extraction on', async () => { - await testingEnvironment.setUp(fixturesFilesWithoutInformationExtraction); + await fixturer.clearAllAndLoad(dbOne, fixturesFilesWithtMixedInformationExtraction); await segmentPdfs.segmentPdfs(); @@ -94,16 +123,12 @@ describe('pdfSegmentation', () => { }); it('should send pdfs from different tenants', async () => { - await testingEnvironment.setUp(fixturesMultitenant); + await fixturer.clearAllAndLoad(dbOne, fixturesOneFile); + await fixturer.clearAllAndLoad(dbTwo, fixturesOtherFile); + tenants.tenants = { tenantOne, tenantTwo }; await segmentPdfs.segmentPdfs(); - const file = fs.readFileSync(`app/api/pdfsegmentation/specs/uploads/${fixturesPdfNameA}`); - expect(segmentPdfs.segmentationTaskManager?.sendFile).toHaveBeenCalledWith( - file, - fixturesPdfNameA - ); - expect(segmentPdfs.segmentationTaskManager?.sendFile).toHaveBeenCalledTimes(2); }); }); From b3884028b8f2873fe3e68fae299efe1739d5bd27 Mon Sep 17 00:00:00 2001 From: Alberto Casado Torres Date: Tue, 21 Sep 2021 10:54:36 +0200 Subject: [PATCH 25/62] TODOs segmentation --- .../pdfsegmentation/specs/pdfSegmentation.spec.ts | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/app/api/pdfsegmentation/specs/pdfSegmentation.spec.ts b/app/api/pdfsegmentation/specs/pdfSegmentation.spec.ts index 7b91796923..ee3ce15c40 100644 --- a/app/api/pdfsegmentation/specs/pdfSegmentation.spec.ts +++ b/app/api/pdfsegmentation/specs/pdfSegmentation.spec.ts @@ -122,7 +122,7 @@ describe('pdfSegmentation', () => { expect(segmentPdfs.segmentationTaskManager?.sendFile).toHaveBeenCalledTimes(2); }); - it('should send pdfs from different tenants', async () => { + it('should send pdfs from different tenants with the information extraction on', async () => { await fixturer.clearAllAndLoad(dbOne, fixturesOneFile); await fixturer.clearAllAndLoad(dbTwo, fixturesOtherFile); tenants.tenants = { tenantOne, tenantTwo }; @@ -131,4 +131,14 @@ describe('pdfSegmentation', () => { expect(segmentPdfs.segmentationTaskManager?.sendFile).toHaveBeenCalledTimes(2); }); + + //TODO + // - should call start task + // - should store which entities are already segmented + // - should only send pdfs not already segmented + // - should handle tenants without the information extraction on + // - should get the results from the task and store them + // - do a load test to checkl the perfomance + // - make sure onlye one taskmanager is instanced + // - error handling ? task failed ? }); From e36efa5bc39925beda3319ff6ab36aae6763f400 Mon Sep 17 00:00:00 2001 From: Alberto Casado Torres Date: Tue, 5 Oct 2021 09:40:34 +0200 Subject: [PATCH 26/62] PDF segmentation storing the segmentation place holder --- app/api/pdfsegmentation/segmentPdfs.ts | 7 ++++ app/api/pdfsegmentation/segmentationModel.js | 33 +++++++++++++++++++ app/api/pdfsegmentation/specs/fixtures.ts | 1 + .../specs/pdfSegmentation.spec.ts | 31 ++++++++++++++--- app/api/tasksmanager/taskManager.ts | 2 +- 5 files changed, 69 insertions(+), 5 deletions(-) create mode 100644 app/api/pdfsegmentation/segmentationModel.js diff --git a/app/api/pdfsegmentation/segmentPdfs.ts b/app/api/pdfsegmentation/segmentPdfs.ts index 949761753e..ff86555778 100644 --- a/app/api/pdfsegmentation/segmentPdfs.ts +++ b/app/api/pdfsegmentation/segmentPdfs.ts @@ -7,6 +7,8 @@ import { Settings } from 'shared/types/settingsType'; import settings from 'api/settings/settings'; import { model as entities } from 'api/entities'; import { tenants } from 'api/tenants/tenantContext'; +import { SegmentationModel } from './segmentationModel'; +import { ObjectIdSchema } from 'shared/types/commonTypes'; class SegmentPdfs { SERVICE_NAME = 'segmentation'; @@ -50,6 +52,11 @@ class SegmentPdfs { tenant: 'tenant1', }; await this.segmentationTaskManager.startTask(task); + await this.storeProcess(nextFile._id!, nextFile.filename); + }; + + storeProcess = async (fileID: ObjectIdSchema, fileName: string) => { + await SegmentationModel.save({ fileID, fileName }); }; segmentPdfs = async () => diff --git a/app/api/pdfsegmentation/segmentationModel.js b/app/api/pdfsegmentation/segmentationModel.js new file mode 100644 index 0000000000..66edd03234 --- /dev/null +++ b/app/api/pdfsegmentation/segmentationModel.js @@ -0,0 +1,33 @@ +import mongoose from 'mongoose'; +import { instanceModel } from 'api/odm'; + +const props = { + autoexpire: { type: Date, expires: 360000, default: Date.now }, + status: { type: String, enum: ['pending', 'completed', 'error'], default: 'pending' }, + error: { type: String }, + segmentation: { + page_width: Number, + page_height: Number, + paragraphs: [ + { + left: Number, + top: Number, + width: Number, + height: Number, + page_number: Number, + text: String, + }, + ], + }, + fileID: { type: mongoose.Schema.Types.ObjectId, ref: 'File' }, + fileName: { type: String }, +}; + +const mongoSchema = new mongoose.Schema(props, { + emitIndexErrors: true, + strict: false, +}); + +const SegmentationModel = instanceModel('captchas', mongoSchema); + +export { SegmentationModel }; diff --git a/app/api/pdfsegmentation/specs/fixtures.ts b/app/api/pdfsegmentation/specs/fixtures.ts index 26c372dc2d..17d0dedbe7 100644 --- a/app/api/pdfsegmentation/specs/fixtures.ts +++ b/app/api/pdfsegmentation/specs/fixtures.ts @@ -90,6 +90,7 @@ const fixturesTwelveFiles: DBFixture = { factory.file('F11', 'A11', 'document', fixturesPdfNameA), factory.file('F12', 'A12', 'document', fixturesPdfNameA), ], + segmentation: [{}], }; const fixturesFilesWithtMixedInformationExtraction: DBFixture = { diff --git a/app/api/pdfsegmentation/specs/pdfSegmentation.spec.ts b/app/api/pdfsegmentation/specs/pdfSegmentation.spec.ts index ee3ce15c40..3d0467c82b 100644 --- a/app/api/pdfsegmentation/specs/pdfSegmentation.spec.ts +++ b/app/api/pdfsegmentation/specs/pdfSegmentation.spec.ts @@ -6,13 +6,14 @@ import { fixturesPdfNameA, fixturesTwelveFiles, } from 'api/pdfsegmentation/specs/fixtures'; -import { testingEnvironment } from 'api/utils/testingEnvironment'; + import fs from 'fs'; import { TaskManager } from 'api/tasksmanager/taskManager'; import { config } from 'api/config'; import { tenants } from 'api/tenants/tenantContext'; import { DB } from 'api/odm'; import { SegmentPdfs } from '../segmentPdfs'; +import { SegmentationModel } from '../segmentationModel'; import { Db } from 'mongodb'; jest.mock('api/tasksmanager/taskManager.ts'); @@ -132,9 +133,31 @@ describe('pdfSegmentation', () => { expect(segmentPdfs.segmentationTaskManager?.sendFile).toHaveBeenCalledTimes(2); }); - //TODO - // - should call start task - // - should store which entities are already segmented + it('should start the tasks', async () => { + await fixturer.clearAllAndLoad(dbOne, fixturesOneFile); + + await segmentPdfs.segmentPdfs(); + + expect(segmentPdfs.segmentationTaskManager?.startTask).toHaveBeenCalledWith({ + task: 'f2082bf51b6ef839690485d7153e847a.pdf', + tenant: 'tenant1', + }); + }); + + it('should store the segmentation process state', async () => { + await fixturer.clearAllAndLoad(dbOne, fixturesOneFile); + + await segmentPdfs.segmentPdfs(); + await tenants.run(async () => { + const [segmentation] = await SegmentationModel.get(); + expect(segmentation.status).toBe('pending'); + expect(segmentation.fileName).toBe(fixturesPdfNameA); + expect(segmentation.fileID).toEqual(fixturesOneFile.files![0]._id); + }, 'tenantOne'); + }); + + //TODO: + // - precalculate teanants that have the feature toggle on // - should only send pdfs not already segmented // - should handle tenants without the information extraction on // - should get the results from the task and store them diff --git a/app/api/tasksmanager/taskManager.ts b/app/api/tasksmanager/taskManager.ts index 728c19a368..60516139ad 100644 --- a/app/api/tasksmanager/taskManager.ts +++ b/app/api/tasksmanager/taskManager.ts @@ -89,7 +89,7 @@ export class TaskManager { throw new Error('Redis is not connected'); } - await this.redisSMQ?.sendMessageAsync({ + return this.redisSMQ?.sendMessageAsync({ qname: this.taskQueue, message: JSON.stringify(taskMessage), }); From 13ea8227aa66f0248e8a0586c545bd4d0f36431a Mon Sep 17 00:00:00 2001 From: Alberto Casado Torres Date: Wed, 6 Oct 2021 13:15:22 +0200 Subject: [PATCH 27/62] Moved all files under services folder, and refactor Repeater specs so they can work in paralel with the Taksmanager specs --- .../pdfsegmentation/PDFSegmentation.ts} | 2 +- .../pdfsegmentation/segmentationModel.js | 0 .../specs/PDFSegmentation.spec.ts} | 14 ++++---- .../pdfsegmentation/specs/fixtures.ts | 0 .../f2082bf51b6ef839690485d7153e847a.pdf | Bin .../tasksmanager/RedisServer.ts | 7 ++-- .../{ => services}/tasksmanager/RepeatWith.ts | 26 ++++++++------- .../specs/ExternalDummyService.ts | 0 .../tasksmanager/specs/blank.pdf | Bin .../tasksmanager/specs/repeatWithLock.spec.js | 31 +++++++++--------- .../tasksmanager/specs/taskManager.spec.ts | 9 ++--- .../tasksmanager/taskManager.ts | 0 12 files changed, 47 insertions(+), 42 deletions(-) rename app/api/{pdfsegmentation/segmentPdfs.ts => services/pdfsegmentation/PDFSegmentation.ts} (97%) rename app/api/{ => services}/pdfsegmentation/segmentationModel.js (100%) rename app/api/{pdfsegmentation/specs/pdfSegmentation.spec.ts => services/pdfsegmentation/specs/PDFSegmentation.spec.ts} (89%) rename app/api/{ => services}/pdfsegmentation/specs/fixtures.ts (100%) rename app/api/{ => services}/pdfsegmentation/specs/uploads/f2082bf51b6ef839690485d7153e847a.pdf (100%) rename app/api/{ => services}/tasksmanager/RedisServer.ts (90%) rename app/api/{ => services}/tasksmanager/RepeatWith.ts (80%) rename app/api/{ => services}/tasksmanager/specs/ExternalDummyService.ts (100%) rename app/api/{ => services}/tasksmanager/specs/blank.pdf (100%) rename app/api/{ => services}/tasksmanager/specs/repeatWithLock.spec.js (88%) rename app/api/{ => services}/tasksmanager/specs/taskManager.spec.ts (95%) rename app/api/{ => services}/tasksmanager/taskManager.ts (100%) diff --git a/app/api/pdfsegmentation/segmentPdfs.ts b/app/api/services/pdfsegmentation/PDFSegmentation.ts similarity index 97% rename from app/api/pdfsegmentation/segmentPdfs.ts rename to app/api/services/pdfsegmentation/PDFSegmentation.ts index ff86555778..07f4d428a2 100644 --- a/app/api/pdfsegmentation/segmentPdfs.ts +++ b/app/api/services/pdfsegmentation/PDFSegmentation.ts @@ -1,4 +1,4 @@ -import { TaskManager } from 'api/tasksmanager/taskManager'; +import { TaskManager } from 'api/services/tasksmanager/taskManager'; import { files, uploadsPath } from 'api/files'; import fs from 'fs'; import { FileType } from 'shared/types/fileType'; diff --git a/app/api/pdfsegmentation/segmentationModel.js b/app/api/services/pdfsegmentation/segmentationModel.js similarity index 100% rename from app/api/pdfsegmentation/segmentationModel.js rename to app/api/services/pdfsegmentation/segmentationModel.js diff --git a/app/api/pdfsegmentation/specs/pdfSegmentation.spec.ts b/app/api/services/pdfsegmentation/specs/PDFSegmentation.spec.ts similarity index 89% rename from app/api/pdfsegmentation/specs/pdfSegmentation.spec.ts rename to app/api/services/pdfsegmentation/specs/PDFSegmentation.spec.ts index 3d0467c82b..93a909c8f6 100644 --- a/app/api/pdfsegmentation/specs/pdfSegmentation.spec.ts +++ b/app/api/services/pdfsegmentation/specs/PDFSegmentation.spec.ts @@ -5,18 +5,18 @@ import { fixturesOtherFile, fixturesPdfNameA, fixturesTwelveFiles, -} from 'api/pdfsegmentation/specs/fixtures'; +} from 'api/services/pdfsegmentation/specs/fixtures'; import fs from 'fs'; -import { TaskManager } from 'api/tasksmanager/taskManager'; +import { TaskManager } from 'api/services/tasksmanager/taskManager'; import { config } from 'api/config'; import { tenants } from 'api/tenants/tenantContext'; import { DB } from 'api/odm'; -import { SegmentPdfs } from '../segmentPdfs'; +import { SegmentPdfs } from '../PDFSegmentation'; import { SegmentationModel } from '../segmentationModel'; import { Db } from 'mongodb'; -jest.mock('api/tasksmanager/taskManager.ts'); +jest.mock('api/services/tasksmanager/taskManager.ts'); describe('pdfSegmentation', () => { let segmentPdfs: SegmentPdfs; @@ -43,6 +43,7 @@ describe('pdfSegmentation', () => { let dbOne: Db; let dbTwo: Db; + let file: Buffer; afterAll(async () => { await testingDB.disconnect(); @@ -54,6 +55,7 @@ describe('pdfSegmentation', () => { dbOne = DB.connectionForDB(tenantOne.dbName).db; dbTwo = DB.connectionForDB(tenantTwo.dbName).db; tenants.tenants = { tenantOne }; + file = fs.readFileSync(`app/api/services/pdfsegmentation/specs/uploads/${fixturesPdfNameA}`); }); it('should send one pdf to segment', async () => { @@ -69,7 +71,6 @@ describe('pdfSegmentation', () => { redisUrl: `redis://${config.redis.host}:${config.redis.host}`, }); - const file = fs.readFileSync(`app/api/pdfsegmentation/specs/uploads/${fixturesPdfNameA}`); expect(segmentPdfs.segmentationTaskManager?.sendFile).toHaveBeenCalledWith( file, fixturesPdfNameA @@ -89,7 +90,6 @@ describe('pdfSegmentation', () => { redisUrl: `redis://${config.redis.host}:${config.redis.host}`, }); - const file = fs.readFileSync(`app/api/pdfsegmentation/specs/uploads/${fixturesPdfNameA}`); expect(segmentPdfs.segmentationTaskManager?.sendFile).toHaveBeenCalledWith( file, fixturesPdfNameA @@ -100,7 +100,6 @@ describe('pdfSegmentation', () => { await fixturer.clearAllAndLoad(dbOne, fixturesTwelveFiles); await segmentPdfs.segmentPdfs(); - const file = fs.readFileSync(`app/api/pdfsegmentation/specs/uploads/${fixturesPdfNameA}`); expect(segmentPdfs.segmentationTaskManager?.sendFile).toHaveBeenCalledWith( file, fixturesPdfNameA @@ -114,7 +113,6 @@ describe('pdfSegmentation', () => { await segmentPdfs.segmentPdfs(); - const file = fs.readFileSync(`app/api/pdfsegmentation/specs/uploads/${fixturesPdfNameA}`); expect(segmentPdfs.segmentationTaskManager?.sendFile).toHaveBeenCalledWith( file, fixturesPdfNameA diff --git a/app/api/pdfsegmentation/specs/fixtures.ts b/app/api/services/pdfsegmentation/specs/fixtures.ts similarity index 100% rename from app/api/pdfsegmentation/specs/fixtures.ts rename to app/api/services/pdfsegmentation/specs/fixtures.ts diff --git a/app/api/pdfsegmentation/specs/uploads/f2082bf51b6ef839690485d7153e847a.pdf b/app/api/services/pdfsegmentation/specs/uploads/f2082bf51b6ef839690485d7153e847a.pdf similarity index 100% rename from app/api/pdfsegmentation/specs/uploads/f2082bf51b6ef839690485d7153e847a.pdf rename to app/api/services/pdfsegmentation/specs/uploads/f2082bf51b6ef839690485d7153e847a.pdf diff --git a/app/api/tasksmanager/RedisServer.ts b/app/api/services/tasksmanager/RedisServer.ts similarity index 90% rename from app/api/tasksmanager/RedisServer.ts rename to app/api/services/tasksmanager/RedisServer.ts index d58c912606..434e1ad6ef 100644 --- a/app/api/tasksmanager/RedisServer.ts +++ b/app/api/services/tasksmanager/RedisServer.ts @@ -5,11 +5,14 @@ import Server from 'redis-server'; export class RedisServer { server: Server; + port: number; + pathToBin: string; - constructor() { + constructor(port = 6379) { this.pathToBin = 'redis/redis-stable/src/redis-server'; this.downloadRedis(); + this.port = port; } downloadRedis() { @@ -35,7 +38,7 @@ export class RedisServer { async start() { this.server = new Server({ - port: 6379, + port: this.port, bin: this.pathToBin, }); try { diff --git a/app/api/tasksmanager/RepeatWith.ts b/app/api/services/tasksmanager/RepeatWith.ts similarity index 80% rename from app/api/tasksmanager/RepeatWith.ts rename to app/api/services/tasksmanager/RepeatWith.ts index 16a4a522cd..37ba55bd4b 100644 --- a/app/api/tasksmanager/RepeatWith.ts +++ b/app/api/services/tasksmanager/RepeatWith.ts @@ -9,7 +9,7 @@ export class RepeatWith { private redlock: Redlock | undefined; - private stopTask: ((value: unknown) => void) | undefined; + private stopTask: Function | undefined; private redisClient: Redis.RedisClient | undefined; @@ -19,7 +19,7 @@ export class RepeatWith { private retryDelay: number; - private id: string; + private port: number; constructor( lockName: string, @@ -27,18 +27,18 @@ export class RepeatWith { maxLockTime: number = 2000, delayTimeBetweenTasks: number = 0, retryDelay: number = 200, - id: string = '1' + port: number = 6379 ) { this.maxLockTime = maxLockTime; this.retryDelay = retryDelay; this.delayTimeBetweenTasks = delayTimeBetweenTasks; this.lockName = `locks:${lockName}`; this.task = task; - this.id = id; + this.port = port; } async start() { - this.redisClient = await Redis.createClient('redis://localhost:6379'); + this.redisClient = await Redis.createClient(`redis://localhost:${this.port}`); this.redlock = await new Redlock([this.redisClient], { retryJitter: 0, retryDelay: this.retryDelay, @@ -50,7 +50,8 @@ export class RepeatWith { } }); - this.lockTask(); + // eslint-disable-next-line no-void + void this.lockTask(); } async waitBetweenTasks() { @@ -86,18 +87,19 @@ export class RepeatWith { ); if (this.stopTask) { - this.stopTask(true); - await lock.unlock(); - } else { - await this.runTask(); - await lock.unlock(); + this.stopTask(); + return; } + + await this.runTask(); + await lock.unlock(); } catch (error) { if (error && error.name !== 'LockError') { throw error; } } - this.lockTask(); + // eslint-disable-next-line no-void + void this.lockTask(); } } diff --git a/app/api/tasksmanager/specs/ExternalDummyService.ts b/app/api/services/tasksmanager/specs/ExternalDummyService.ts similarity index 100% rename from app/api/tasksmanager/specs/ExternalDummyService.ts rename to app/api/services/tasksmanager/specs/ExternalDummyService.ts diff --git a/app/api/tasksmanager/specs/blank.pdf b/app/api/services/tasksmanager/specs/blank.pdf similarity index 100% rename from app/api/tasksmanager/specs/blank.pdf rename to app/api/services/tasksmanager/specs/blank.pdf diff --git a/app/api/tasksmanager/specs/repeatWithLock.spec.js b/app/api/services/tasksmanager/specs/repeatWithLock.spec.js similarity index 88% rename from app/api/tasksmanager/specs/repeatWithLock.spec.js rename to app/api/services/tasksmanager/specs/repeatWithLock.spec.js index 89924a071d..55d5dcc946 100644 --- a/app/api/tasksmanager/specs/repeatWithLock.spec.js +++ b/app/api/services/tasksmanager/specs/repeatWithLock.spec.js @@ -11,6 +11,7 @@ describe('RepeatWithLock', () => { let task; let rejectTask; let redisServer; + let pendingTasks; beforeAll(async () => { redisServer = new RedisServer(); @@ -22,17 +23,21 @@ describe('RepeatWithLock', () => { }); beforeEach(async () => { + pendingTasks = []; task = jasmine.createSpy('callbackone').and.callFake( () => new Promise((resolve, reject) => { + pendingTasks.push(resolve); rejectTask = reject; - finishTask = () => { - resolve(); - }; + finishTask = resolve; }) ); }); + afterEach(async () => { + await pendingTasks.map(pendingTask => pendingTask()); + }); + async function sleepTime(time) { await new Promise(resolve => { setTimeout(resolve, time); @@ -44,26 +49,19 @@ describe('RepeatWithLock', () => { const nodeTwo = new RepeatWith('my_locked_task', task); await nodeOne.start(); await nodeTwo.start(); - await waitForExpect(async () => { expect(task).toHaveBeenCalledTimes(1); }); - finishTask(); - await waitForExpect(async () => { expect(task).toHaveBeenCalledTimes(2); }); - finishTask(); - await waitForExpect(async () => { expect(task).toHaveBeenCalledTimes(3); }); - finishTask(); await nodeOne.stop(); - finishTask(); await nodeTwo.stop(); }); @@ -95,21 +93,23 @@ describe('RepeatWithLock', () => { }); it('should continue executing tasks after redis was unavailable for a while', async () => { - const nodeOne = new RepeatWith('my_locked_task', task, 2000, 0, 20); + const unstableRedisServer = new RedisServer(6371); + await unstableRedisServer.start(); + const nodeOne = new RepeatWith('my_locked_task', task, 2000, 0, 20, 6371); await nodeOne.start(); await waitForExpect(async () => { expect(task).toHaveBeenCalledTimes(1); }); - await redisServer.stop(); + await unstableRedisServer.stop(); finishTask(); await sleepTime(50); expect(task).toHaveBeenCalledTimes(1); - await redisServer.start(); + await unstableRedisServer.start(); await waitForExpect(async () => { expect(task).toHaveBeenCalledTimes(2); @@ -118,11 +118,12 @@ describe('RepeatWithLock', () => { finishTask(); await nodeOne.stop(); + await unstableRedisServer.stop(); }); it('should handle when a lock fails for too many retries', async () => { - const nodeOne = new RepeatWith('my_locked_task', task, 2000, 0, 20); - const nodeTwo = new RepeatWith('my_locked_task', task, 2000, 0, 20); + const nodeOne = new RepeatWith('my_long_locked_task', task, 2000, 0, 20); + const nodeTwo = new RepeatWith('my_long_locked_task', task, 2000, 0, 20); await nodeOne.start(); await nodeTwo.start(); diff --git a/app/api/tasksmanager/specs/taskManager.spec.ts b/app/api/services/tasksmanager/specs/taskManager.spec.ts similarity index 95% rename from app/api/tasksmanager/specs/taskManager.spec.ts rename to app/api/services/tasksmanager/specs/taskManager.spec.ts index 9ed39485b2..c919e2ab22 100644 --- a/app/api/tasksmanager/specs/taskManager.spec.ts +++ b/app/api/services/tasksmanager/specs/taskManager.spec.ts @@ -1,7 +1,7 @@ /* eslint-disable max-statements */ import fs from 'fs'; import waitForExpect from 'wait-for-expect'; -import { TaskManager, Service } from 'api/tasksmanager/taskManager'; +import { TaskManager, Service } from 'api/services/tasksmanager/taskManager'; import { RedisServer } from '../RedisServer'; import { ExternalDummyService } from './ExternalDummyService'; @@ -13,15 +13,16 @@ describe('taskManager', () => { let externalDummyService: ExternalDummyService; beforeAll(async () => { + const port = 6378; service = { serviceName: 'KonzNGaboHellKitchen', dataUrl: 'http://localhost:1234/data', filesUrl: 'http://localhost:1234/files', resultsUrl: 'http://localhost:1234/results', - redisUrl: 'redis://localhost:6379', + redisUrl: `redis://localhost:${port}`, processResults: jest.fn(), }; - redisServer = new RedisServer(); + redisServer = new RedisServer(port); await redisServer.start(); externalDummyService = new ExternalDummyService(1234, service.serviceName); @@ -100,7 +101,7 @@ describe('taskManager', () => { }); it('should send files to the service', async () => { - const file = fs.readFileSync('app/api/tasksmanager/specs/blank.pdf'); + const file = fs.readFileSync('app/api/services/tasksmanager/specs/blank.pdf'); await taskManager?.sendFile(file, 'blank1.pdf'); await taskManager?.sendFile(file, 'blank2.pdf'); diff --git a/app/api/tasksmanager/taskManager.ts b/app/api/services/tasksmanager/taskManager.ts similarity index 100% rename from app/api/tasksmanager/taskManager.ts rename to app/api/services/tasksmanager/taskManager.ts From b27d7613f80da3cff2d46a5c55c48b2db689eaa0 Mon Sep 17 00:00:00 2001 From: Alberto Casado Torres Date: Thu, 7 Oct 2021 15:52:35 +0200 Subject: [PATCH 28/62] Aggregation to only get files that need to be processed --- .../pdfsegmentation/PDFSegmentation.ts | 59 ++++++++++++++----- .../pdfsegmentation/segmentationModel.js | 2 +- .../specs/PDFSegmentation.spec.ts | 26 ++++++-- .../pdfsegmentation/specs/fixtures.ts | 20 ++++++- .../tasksmanager/specs/taskManager.spec.ts | 2 +- 5 files changed, 87 insertions(+), 22 deletions(-) diff --git a/app/api/services/pdfsegmentation/PDFSegmentation.ts b/app/api/services/pdfsegmentation/PDFSegmentation.ts index 07f4d428a2..7eceee90b8 100644 --- a/app/api/services/pdfsegmentation/PDFSegmentation.ts +++ b/app/api/services/pdfsegmentation/PDFSegmentation.ts @@ -1,5 +1,6 @@ -import { TaskManager } from 'api/services/tasksmanager/taskManager'; +import { TaskManager } from 'api/services/tasksmanager/TaskManager'; import { files, uploadsPath } from 'api/files'; +import filesModel from 'api/files/filesModel'; import fs from 'fs'; import { FileType } from 'shared/types/fileType'; import { config } from 'api/config'; @@ -69,22 +70,52 @@ class SegmentPdfs { const settingsValues = await settings.get(); const metadataExtractionFeatureToggle = settingsValues?.features?.metadataExtraction; - const templatesWithInformationExtraction = metadataExtractionFeatureToggle?.map(x => - x.template.toString() + const templatesWithInformationExtraction = metadataExtractionFeatureToggle?.map( + x => x.template ); - const nextEntitiesToProcess = await entities.getUnrestricted({ - template: { $in: templatesWithInformationExtraction }, - }); - - const sharedIds = nextEntitiesToProcess.map((x: { sharedId: string }) => x.sharedId); - const nextFilesToProcess = await files.get({ - entity: { $in: sharedIds }, - }); - - for (let i = 0; i < 10; i += 1) { + const filesToSegment = await filesModel.db.aggregate([ + { + $match: { + type: 'document', + }, + }, + { + $lookup: { + from: 'segmentation', + localField: '_id', + foreignField: 'fileID', + as: 'segmentation', + }, + }, + { + $match: { + segmentation: { + $size: 0, + }, + }, + }, + { + $lookup: { + from: 'entities', + localField: 'entity', + foreignField: 'sharedId', + as: 'entity', + }, + }, + { + $match: { + 'entity.template': { $in: templatesWithInformationExtraction }, + }, + }, + { + $limit: 10, + }, + ]); + + for (let i = 0; i < filesToSegment.length; i += 1) { // eslint-disable-next-line no-await-in-loop - await this.segmentOnePdf(nextFilesToProcess[i]); + await this.segmentOnePdf(filesToSegment[i]); } }, tenant); }) diff --git a/app/api/services/pdfsegmentation/segmentationModel.js b/app/api/services/pdfsegmentation/segmentationModel.js index 66edd03234..fcbb98a3af 100644 --- a/app/api/services/pdfsegmentation/segmentationModel.js +++ b/app/api/services/pdfsegmentation/segmentationModel.js @@ -28,6 +28,6 @@ const mongoSchema = new mongoose.Schema(props, { strict: false, }); -const SegmentationModel = instanceModel('captchas', mongoSchema); +const SegmentationModel = instanceModel('segmentation', mongoSchema); export { SegmentationModel }; diff --git a/app/api/services/pdfsegmentation/specs/PDFSegmentation.spec.ts b/app/api/services/pdfsegmentation/specs/PDFSegmentation.spec.ts index 93a909c8f6..cbe505fd5a 100644 --- a/app/api/services/pdfsegmentation/specs/PDFSegmentation.spec.ts +++ b/app/api/services/pdfsegmentation/specs/PDFSegmentation.spec.ts @@ -5,18 +5,20 @@ import { fixturesOtherFile, fixturesPdfNameA, fixturesTwelveFiles, + fixturesFiveFiles, } from 'api/services/pdfsegmentation/specs/fixtures'; import fs from 'fs'; -import { TaskManager } from 'api/services/tasksmanager/taskManager'; +import { TaskManager } from 'api/services/tasksmanager/TaskManager'; import { config } from 'api/config'; import { tenants } from 'api/tenants/tenantContext'; import { DB } from 'api/odm'; +import { Db } from 'mongodb'; + import { SegmentPdfs } from '../PDFSegmentation'; import { SegmentationModel } from '../segmentationModel'; -import { Db } from 'mongodb'; -jest.mock('api/services/tasksmanager/taskManager.ts'); +jest.mock('api/services/tasksmanager/TaskManager.ts'); describe('pdfSegmentation', () => { let segmentPdfs: SegmentPdfs; @@ -154,9 +156,23 @@ describe('pdfSegmentation', () => { }, 'tenantOne'); }); + it('should only send pdfs not already segmented or in the process', async () => { + await fixturer.clearAllAndLoad(dbOne, fixturesFiveFiles); + await dbOne.collection('segmentation').insertMany([ + { + fileName: fixturesFiveFiles, + fileID: fixturesFiveFiles.files![0]._id, + status: 'pending', + }, + ]); + + await segmentPdfs.segmentPdfs(); + + expect(segmentPdfs.segmentationTaskManager?.sendFile).toHaveBeenCalledTimes(4); + }); + //TODO: - // - precalculate teanants that have the feature toggle on - // - should only send pdfs not already segmented + // - precalculate teanants that have the feature toggle on ? // - should handle tenants without the information extraction on // - should get the results from the task and store them // - do a load test to checkl the perfomance diff --git a/app/api/services/pdfsegmentation/specs/fixtures.ts b/app/api/services/pdfsegmentation/specs/fixtures.ts index 17d0dedbe7..a35e85ec8e 100644 --- a/app/api/services/pdfsegmentation/specs/fixtures.ts +++ b/app/api/services/pdfsegmentation/specs/fixtures.ts @@ -58,6 +58,24 @@ const fixturesOtherFile: DBFixture = { files: [factory.file('F1', 'A1', 'document', fixturesPdfNameA)], }; +const fixturesFiveFiles: DBFixture = { + settings, + entities: [ + factory.entity('A1', 'templateToSegmentA'), + factory.entity('A2', 'templateToSegmentA'), + factory.entity('A3', 'templateToSegmentA'), + factory.entity('A4', 'templateToSegmentA'), + factory.entity('A5', 'templateToSegmentA'), + ], + files: [ + factory.file('F1', 'A1', 'document', fixturesPdfNameA), + factory.file('F2', 'A2', 'document', fixturesPdfNameA), + factory.file('F3', 'A3', 'document', fixturesPdfNameA), + factory.file('F4', 'A4', 'document', fixturesPdfNameA), + factory.file('F5', 'A5', 'document', fixturesPdfNameA), + ], +}; + const fixturesTwelveFiles: DBFixture = { settings, entities: [ @@ -90,7 +108,6 @@ const fixturesTwelveFiles: DBFixture = { factory.file('F11', 'A11', 'document', fixturesPdfNameA), factory.file('F12', 'A12', 'document', fixturesPdfNameA), ], - segmentation: [{}], }; const fixturesFilesWithtMixedInformationExtraction: DBFixture = { @@ -117,4 +134,5 @@ export { fixturesOtherFile, fixturesTwelveFiles, fixturesFilesWithtMixedInformationExtraction, + fixturesFiveFiles, }; diff --git a/app/api/services/tasksmanager/specs/taskManager.spec.ts b/app/api/services/tasksmanager/specs/taskManager.spec.ts index c919e2ab22..8519f67d9f 100644 --- a/app/api/services/tasksmanager/specs/taskManager.spec.ts +++ b/app/api/services/tasksmanager/specs/taskManager.spec.ts @@ -1,7 +1,7 @@ /* eslint-disable max-statements */ import fs from 'fs'; import waitForExpect from 'wait-for-expect'; -import { TaskManager, Service } from 'api/services/tasksmanager/taskManager'; +import { TaskManager, Service } from 'api/services/tasksmanager/TaskManager'; import { RedisServer } from '../RedisServer'; import { ExternalDummyService } from './ExternalDummyService'; From e2673ed3dc8c86ee2b1ae96178385e008ed65749 Mon Sep 17 00:00:00 2001 From: Alberto Casado Torres Date: Wed, 13 Oct 2021 12:20:09 +0200 Subject: [PATCH 29/62] Taskmanager count, and some pending tests for the PDFsegmentation --- .../pdfsegmentation/PDFSegmentation.ts | 4 - .../pdfsegmentation/segmentationModel.js | 2 +- .../specs/PDFSegmentation.spec.ts | 28 +++++-- .../tasksmanager/specs/taskManager.spec.ts | 74 +++++++++---------- app/api/services/tasksmanager/taskManager.ts | 20 +++-- app/shared/types/settingsSchema.ts | 20 +++++ app/shared/types/settingsType.d.ts | 9 ++- 7 files changed, 98 insertions(+), 59 deletions(-) diff --git a/app/api/services/pdfsegmentation/PDFSegmentation.ts b/app/api/services/pdfsegmentation/PDFSegmentation.ts index 7eceee90b8..46905ed57a 100644 --- a/app/api/services/pdfsegmentation/PDFSegmentation.ts +++ b/app/api/services/pdfsegmentation/PDFSegmentation.ts @@ -30,10 +30,6 @@ class SegmentPdfs { const segmentationValues = settingsValues?.features?.segmentation; this.segmentationTaskManager = new TaskManager({ serviceName: this.SERVICE_NAME, - dataUrl: segmentationValues.dataUrl, - filesUrl: segmentationValues.filesUrl, - resultsUrl: segmentationValues.resultsUrl, - redisUrl: `redis://${config.redis.host}:${config.redis.host}`, }); } diff --git a/app/api/services/pdfsegmentation/segmentationModel.js b/app/api/services/pdfsegmentation/segmentationModel.js index fcbb98a3af..71391fa50f 100644 --- a/app/api/services/pdfsegmentation/segmentationModel.js +++ b/app/api/services/pdfsegmentation/segmentationModel.js @@ -19,7 +19,7 @@ const props = { }, ], }, - fileID: { type: mongoose.Schema.Types.ObjectId, ref: 'File' }, + file: { type: mongoose.Schema.Types.ObjectId, ref: 'File' }, fileName: { type: String }, }; diff --git a/app/api/services/pdfsegmentation/specs/PDFSegmentation.spec.ts b/app/api/services/pdfsegmentation/specs/PDFSegmentation.spec.ts index cbe505fd5a..9b5eff5eb3 100644 --- a/app/api/services/pdfsegmentation/specs/PDFSegmentation.spec.ts +++ b/app/api/services/pdfsegmentation/specs/PDFSegmentation.spec.ts @@ -79,6 +79,10 @@ describe('pdfSegmentation', () => { ); }); + it('should send the file', () => { + throw new Error('Not implemented'); + }); + it('should send other pdf to segment', async () => { await fixturer.clearAllAndLoad(dbOne, fixturesOtherFile); @@ -171,11 +175,25 @@ describe('pdfSegmentation', () => { expect(segmentPdfs.segmentationTaskManager?.sendFile).toHaveBeenCalledTimes(4); }); + describe('when there is pending tasks', () => { + it('should not put more', () => { + throw new Error('Not implemented'); + }); + }); + + describe('when there is segmentation config', () => { + it('should do nothing', () => { + throw new Error('Not implemented'); + }); + }); + + describe('when there segmentation finsihes', () => { + it('should store the segmentation', () => { + throw new Error('Not implemented'); + }); + }); + //TODO: - // - precalculate teanants that have the feature toggle on ? - // - should handle tenants without the information extraction on - // - should get the results from the task and store them - // - do a load test to checkl the perfomance - // - make sure onlye one taskmanager is instanced + // - do a load test to checkl the perfomance: Tested in Cejil with 5k files and Plan with 25k and took 0.2s to do an aggregation query // - error handling ? task failed ? }); diff --git a/app/api/services/tasksmanager/specs/taskManager.spec.ts b/app/api/services/tasksmanager/specs/taskManager.spec.ts index 8519f67d9f..b7fbef2b5e 100644 --- a/app/api/services/tasksmanager/specs/taskManager.spec.ts +++ b/app/api/services/tasksmanager/specs/taskManager.spec.ts @@ -4,6 +4,7 @@ import waitForExpect from 'wait-for-expect'; import { TaskManager, Service } from 'api/services/tasksmanager/TaskManager'; import { RedisServer } from '../RedisServer'; import { ExternalDummyService } from './ExternalDummyService'; +import { config } from 'api/config'; describe('taskManager', () => { let taskManager: TaskManager | undefined; @@ -14,19 +15,18 @@ describe('taskManager', () => { beforeAll(async () => { const port = 6378; + config.redis.port = port; + + const redisUrl = `redis://${config.redis.host}:${config.redis.port}`; service = { serviceName: 'KonzNGaboHellKitchen', - dataUrl: 'http://localhost:1234/data', - filesUrl: 'http://localhost:1234/files', - resultsUrl: 'http://localhost:1234/results', - redisUrl: `redis://localhost:${port}`, processResults: jest.fn(), }; redisServer = new RedisServer(port); await redisServer.start(); externalDummyService = new ExternalDummyService(1234, service.serviceName); - await externalDummyService.start(service.redisUrl); + await externalDummyService.start(redisUrl); taskManager = new TaskManager(service); taskManager.subscribeToResults(); @@ -50,9 +50,7 @@ describe('taskManager', () => { task: 'CheeseBurger', tenant: 'Rafa', }); - const message = await externalDummyService.readFirstTaskMessage(); - expect(message).toBe('{"task":"CheeseBurger","tenant":"Rafa"}'); }); @@ -85,48 +83,38 @@ describe('taskManager', () => { }); }); - describe('sending materials', () => { - it('should send materials to the service', async () => { - const materials1 = { someData: 3 }; - const materials2 = { someData: 2 }; - const materials3 = { someData: 3 }; - await taskManager?.sendJSON(materials1); - await taskManager?.sendJSON(materials2); - await taskManager?.sendJSON(materials3); - - expect(externalDummyService.materials.length).toEqual(3); - expect(externalDummyService.materials[0]).toEqual(materials1); - expect(externalDummyService.materials[1]).toEqual(materials2); - expect(externalDummyService.materials[2]).toEqual(materials3); - }); + describe('count tasks', () => { + it('should count the pending tasks', async () => { + await taskManager?.startTask({ + task: 'CheeseBurger', + tenant: 'Rafa', + }); - it('should send files to the service', async () => { - const file = fs.readFileSync('app/api/services/tasksmanager/specs/blank.pdf'); + await taskManager?.startTask({ + task: 'Fries', + tenant: 'Joan', + }); - await taskManager?.sendFile(file, 'blank1.pdf'); - await taskManager?.sendFile(file, 'blank2.pdf'); - await taskManager?.sendFile(file, 'blank3.pdf'); + await taskManager?.startTask({ + task: 'Ribs', + tenant: 'Fede', + }); - expect(externalDummyService.files.length).toEqual(3); - expect(externalDummyService.files[0]).toEqual(file); - expect(externalDummyService.filesNames[0]).toEqual('blank1.pdf'); - expect(externalDummyService.files[1]).toEqual(file); - expect(externalDummyService.filesNames[1]).toEqual('blank2.pdf'); - expect(externalDummyService.files[2]).toEqual(file); - expect(externalDummyService.filesNames[2]).toEqual('blank3.pdf'); + const pendingTasks = await taskManager?.countPendingTasks(); + expect(pendingTasks).toBe(3); }); }); describe('when the task finishes', () => { it('should get the results', async () => { - const expectedResults = { results: 'Paella' }; + const expectedResults = { results: 'Tofu' }; await taskManager?.stop(); taskManager = new TaskManager(service); taskManager.subscribeToResults(); externalDummyService.setResults(expectedResults); - const task = { task: 'Tofu', tenant: 'Gabo' }; + const task = { task: 'Tofu', tenant: 'Gabo', resultsUrl: 'http://localhost:1234/results' }; await externalDummyService.sendFinishedMessage(task); await waitForExpect(async () => { @@ -138,7 +126,7 @@ describe('taskManager', () => { describe('when redis server is not available', () => { it('taskManager should fail to start task', async () => { await redisServer.stop(); - const task = { task: 'Spagueti', tenant: 'Kon' }; + const task = { task: 'Spagueti', tenant: 'Konz' }; try { await taskManager?.startTask(task); @@ -151,7 +139,9 @@ describe('taskManager', () => { describe('and redis comes back', () => { it('should send tasks again', async () => { + await externalDummyService.resetQueue(); await redisServer.stop(); + const task = { task: 'Ceviche', tenant: 'Mercy' }; try { @@ -170,10 +160,16 @@ describe('taskManager', () => { }); it('should read pending messages', async () => { - const task = { task: 'Ceviche', tenant: 'Mercy' }; + const task = { + task: 'Ceviche', + tenant: 'Mercy', + resultsUrl: 'http://localhost:1234/results', + }; await taskManager?.stop(); - externalDummyService.setResults({ results: 'Paella' }); + externalDummyService.setResults({ + results: 'Ceviche', + }); await externalDummyService.sendFinishedMessage(task); expect(service.processResults).not.toHaveBeenCalled(); @@ -186,7 +182,7 @@ describe('taskManager', () => { await waitForExpect(async () => { expect(service.processResults).toHaveBeenCalledWith({ - results: 'Paella', + results: 'Ceviche', }); }); }); diff --git a/app/api/services/tasksmanager/taskManager.ts b/app/api/services/tasksmanager/taskManager.ts index 60516139ad..e974c5741f 100644 --- a/app/api/services/tasksmanager/taskManager.ts +++ b/app/api/services/tasksmanager/taskManager.ts @@ -2,6 +2,7 @@ import RedisSMQ, { QueueMessage } from 'rsmq'; import Redis, { RedisClient } from 'redis'; import request from 'shared/JSONRequest'; import { Repeater } from 'api/utils/Repeater'; +import { config } from 'api/config'; export interface TaskMessage { tenant: string; @@ -10,10 +11,6 @@ export interface TaskMessage { export interface Service { serviceName: string; - filesUrl: string; - dataUrl: string; - resultsUrl: string; - redisUrl: string; processResults?: (results: object) => void; } @@ -38,7 +35,8 @@ export class TaskManager { } start() { - this.redisClient = Redis.createClient(this.service.redisUrl); + const redisUrl = `redis://${config.redis.host}:${config.redis.port}`; + this.redisClient = Redis.createClient(redisUrl); this.redisClient.on('error', error => { if (error.code !== 'ECONNREFUSED') { @@ -52,18 +50,23 @@ export class TaskManager { this.redisClient.on('connect', () => { this.redisSMQ?.createQueue({ qname: this.taskQueue }, err => { - if (err.name !== 'queueExists') { + if (err && err.name !== 'queueExists') { throw err; } }); this.redisSMQ?.createQueue({ qname: this.resultsQueue }, err => { - if (err.name !== 'queueExists') { + if (err && err.name !== 'queueExists') { throw err; } }); }); } + countPendingTasks: () => Promise = async () => { + const queueAttributes = await this.redisSMQ!.getQueueAttributesAsync({ qname: this.taskQueue }); + return queueAttributes.msgs; + }; + subscribeToResults() { this.repeater = new Repeater(this.receiveMessage.bind(this), 1000); this.repeater.start(); @@ -77,7 +80,8 @@ export class TaskManager { if (message.id) { if (this.service.processResults) { - const results = await request.get(this.service.resultsUrl, JSON.parse(message.message)); + const processedMessage = JSON.parse(message.message); + const results = await request.get(processedMessage.resultsUrl, processedMessage); this.service.processResults(results.json); } } diff --git a/app/shared/types/settingsSchema.ts b/app/shared/types/settingsSchema.ts index 400e1f5f08..5bbbbeca92 100644 --- a/app/shared/types/settingsSchema.ts +++ b/app/shared/types/settingsSchema.ts @@ -148,6 +148,26 @@ export const settingsSchema = { }, topicClassification: { type: 'boolean' }, favorites: { type: 'boolean' }, + segmentation: { + type: 'object', + additionalProperties: false, + required: ['url'], + properties: { + url: { type: 'string' }, + }, + }, + metadataExtraction: { + type: 'array', + items: { + type: 'object', + additionalProperties: false, + required: ['template', 'properties'], + properties: { + template: objectIdSchema, + properties: { type: 'array', items: { type: 'string' } }, + }, + }, + }, }, }, mapStartingPoint: geolocationSchema, diff --git a/app/shared/types/settingsType.d.ts b/app/shared/types/settingsType.d.ts index 9c1fb2ef79..6d4b460f8a 100644 --- a/app/shared/types/settingsType.d.ts +++ b/app/shared/types/settingsType.d.ts @@ -2,7 +2,6 @@ /**AUTO-GENERATED. RUN yarn emit-types to update.*/ import { ObjectIdSchema, LanguagesListSchema, GeolocationSchema } from 'shared/types/commonTypes'; -import { ObjectId } from 'mongodb'; export interface ItemSchema { id?: string; @@ -95,8 +94,14 @@ export interface Settings { }; topicClassification?: boolean; favorites?: boolean; + segmentation?: { + url: string; + }; + metadataExtraction?: { + template: ObjectIdSchema; + properties: string[]; + }[]; [k: string]: unknown | undefined; - metadataExtraction: {template: ObjectIdSchema, properties: string[]}[] }; mapStartingPoint?: { label?: string; From 2bdb11c6f6e976ac8ee564b9e14fa937323190f3 Mon Sep 17 00:00:00 2001 From: Alberto Casado Torres Date: Wed, 13 Oct 2021 12:56:01 +0200 Subject: [PATCH 30/62] PDFsegmentation doing nothing when there is already pending tasks --- .../pdfsegmentation/PDFSegmentation.ts | 22 ++++++++++--------- .../specs/PDFSegmentation.spec.ts | 19 ++++++++-------- app/api/services/tasksmanager/taskManager.ts | 8 ++++--- 3 files changed, 26 insertions(+), 23 deletions(-) diff --git a/app/api/services/pdfsegmentation/PDFSegmentation.ts b/app/api/services/pdfsegmentation/PDFSegmentation.ts index 46905ed57a..c58fe0103c 100644 --- a/app/api/services/pdfsegmentation/PDFSegmentation.ts +++ b/app/api/services/pdfsegmentation/PDFSegmentation.ts @@ -20,20 +20,15 @@ class SegmentPdfs { features: Settings | undefined; - async start() { - const settingsValues = await settings.get(); + batchSize = 10; - const metadataExtractionFeatureToggle = settingsValues?.features?.metadataExtraction; - this.templatesWithInformationExtraction = metadataExtractionFeatureToggle?.map(x => - x.template.toString() - ); - const segmentationValues = settingsValues?.features?.segmentation; + async start() { this.segmentationTaskManager = new TaskManager({ serviceName: this.SERVICE_NAME, }); } - segmentOnePdf = async (nextFile: FileType) => { + segmentOnePdf = async (nextFile: FileType, url: string) => { if (!this.segmentationTaskManager) { return; } @@ -64,8 +59,15 @@ class SegmentPdfs { await this.start(); } + const pendingTasks = await this.segmentationTaskManager!.countPendingTasks(); + if (pendingTasks > 0) { + return; + } + const settingsValues = await settings.get(); const metadataExtractionFeatureToggle = settingsValues?.features?.metadataExtraction; + const segmentationServiceConfig = settingsValues?.features?.segmentation; + const templatesWithInformationExtraction = metadataExtractionFeatureToggle?.map( x => x.template ); @@ -105,13 +107,13 @@ class SegmentPdfs { }, }, { - $limit: 10, + $limit: this.batchSize, }, ]); for (let i = 0; i < filesToSegment.length; i += 1) { // eslint-disable-next-line no-await-in-loop - await this.segmentOnePdf(filesToSegment[i]); + await this.segmentOnePdf(filesToSegment[i], segmentationServiceConfig.url); } }, tenant); }) diff --git a/app/api/services/pdfsegmentation/specs/PDFSegmentation.spec.ts b/app/api/services/pdfsegmentation/specs/PDFSegmentation.spec.ts index 9b5eff5eb3..58e72a1a3b 100644 --- a/app/api/services/pdfsegmentation/specs/PDFSegmentation.spec.ts +++ b/app/api/services/pdfsegmentation/specs/PDFSegmentation.spec.ts @@ -67,10 +67,6 @@ describe('pdfSegmentation', () => { expect(TaskManager).toHaveBeenCalledWith({ serviceName: 'segmentation', - dataUrl: 'http://localhost:1234/data', - filesUrl: 'http://localhost:1234/files', - resultsUrl: 'http://localhost:1234/results', - redisUrl: `redis://${config.redis.host}:${config.redis.host}`, }); expect(segmentPdfs.segmentationTaskManager?.sendFile).toHaveBeenCalledWith( @@ -90,10 +86,6 @@ describe('pdfSegmentation', () => { expect(TaskManager).toHaveBeenCalledWith({ serviceName: 'segmentation', - dataUrl: 'http://other-localhost:1234/data', - filesUrl: 'http://other-localhost:1234/files', - resultsUrl: 'http://other-localhost:1234/results', - redisUrl: `redis://${config.redis.host}:${config.redis.host}`, }); expect(segmentPdfs.segmentationTaskManager?.sendFile).toHaveBeenCalledWith( @@ -176,8 +168,15 @@ describe('pdfSegmentation', () => { }); describe('when there is pending tasks', () => { - it('should not put more', () => { - throw new Error('Not implemented'); + it('should not put more', async () => { + await fixturer.clearAllAndLoad(dbOne, fixturesFiveFiles); + segmentPdfs.start(); + + segmentPdfs.segmentationTaskManager!.countPendingTasks = () => Promise.resolve(10); + + await segmentPdfs.segmentPdfs(); + + expect(segmentPdfs.segmentationTaskManager?.startTask).not.toHaveBeenCalled(); }); }); diff --git a/app/api/services/tasksmanager/taskManager.ts b/app/api/services/tasksmanager/taskManager.ts index e974c5741f..7c2124330d 100644 --- a/app/api/services/tasksmanager/taskManager.ts +++ b/app/api/services/tasksmanager/taskManager.ts @@ -62,10 +62,12 @@ export class TaskManager { }); } - countPendingTasks: () => Promise = async () => { - const queueAttributes = await this.redisSMQ!.getQueueAttributesAsync({ qname: this.taskQueue }); + async countPendingTasks(): Promise { + const queueAttributes = await this.redisSMQ!.getQueueAttributesAsync({ + qname: this.taskQueue, + }); return queueAttributes.msgs; - }; + } subscribeToResults() { this.repeater = new Repeater(this.receiveMessage.bind(this), 1000); From f8df201c960e4f3326efb541d0b05b52e303d3d2 Mon Sep 17 00:00:00 2001 From: Alberto Casado Torres Date: Wed, 13 Oct 2021 14:43:00 +0200 Subject: [PATCH 31/62] Segmentator sending the PDFs instead of the TasdkManager --- .../pdfsegmentation/PDFSegmentation.ts | 8 +-- .../specs/PDFSegmentation.spec.ts | 64 ++++++++---------- .../pdfsegmentation/specs/fixtures.ts | 14 ++-- .../specs/uploads/documentA.pdf | Bin 0 -> 798 bytes .../specs/uploads/documentB.pdf | Bin 0 -> 798 bytes .../f2082bf51b6ef839690485d7153e847a.pdf | Bin 7627 -> 0 bytes 6 files changed, 38 insertions(+), 48 deletions(-) create mode 100644 app/api/services/pdfsegmentation/specs/uploads/documentA.pdf create mode 100644 app/api/services/pdfsegmentation/specs/uploads/documentB.pdf delete mode 100644 app/api/services/pdfsegmentation/specs/uploads/f2082bf51b6ef839690485d7153e847a.pdf diff --git a/app/api/services/pdfsegmentation/PDFSegmentation.ts b/app/api/services/pdfsegmentation/PDFSegmentation.ts index c58fe0103c..17720476de 100644 --- a/app/api/services/pdfsegmentation/PDFSegmentation.ts +++ b/app/api/services/pdfsegmentation/PDFSegmentation.ts @@ -1,15 +1,14 @@ import { TaskManager } from 'api/services/tasksmanager/TaskManager'; -import { files, uploadsPath } from 'api/files'; +import { uploadsPath } from 'api/files'; import filesModel from 'api/files/filesModel'; import fs from 'fs'; import { FileType } from 'shared/types/fileType'; -import { config } from 'api/config'; import { Settings } from 'shared/types/settingsType'; import settings from 'api/settings/settings'; -import { model as entities } from 'api/entities'; import { tenants } from 'api/tenants/tenantContext'; import { SegmentationModel } from './segmentationModel'; import { ObjectIdSchema } from 'shared/types/commonTypes'; +import request from 'shared/JSONRequest'; class SegmentPdfs { SERVICE_NAME = 'segmentation'; @@ -38,7 +37,8 @@ class SegmentPdfs { } const file = fs.readFileSync(uploadsPath(nextFile.filename)); - await this.segmentationTaskManager.sendFile(file, nextFile.filename); + await request.uploadFile(url, nextFile.filename, file); + const task = { task: nextFile.filename, tenant: 'tenant1', diff --git a/app/api/services/pdfsegmentation/specs/PDFSegmentation.spec.ts b/app/api/services/pdfsegmentation/specs/PDFSegmentation.spec.ts index 58e72a1a3b..66d20e6eab 100644 --- a/app/api/services/pdfsegmentation/specs/PDFSegmentation.spec.ts +++ b/app/api/services/pdfsegmentation/specs/PDFSegmentation.spec.ts @@ -4,19 +4,21 @@ import { fixturesOneFile, fixturesOtherFile, fixturesPdfNameA, + fixturesPdfNameB, fixturesTwelveFiles, fixturesFiveFiles, } from 'api/services/pdfsegmentation/specs/fixtures'; import fs from 'fs'; import { TaskManager } from 'api/services/tasksmanager/TaskManager'; -import { config } from 'api/config'; import { tenants } from 'api/tenants/tenantContext'; import { DB } from 'api/odm'; import { Db } from 'mongodb'; import { SegmentPdfs } from '../PDFSegmentation'; import { SegmentationModel } from '../segmentationModel'; +import request from 'shared/JSONRequest'; +import exp from 'constants'; jest.mock('api/services/tasksmanager/TaskManager.ts'); @@ -45,7 +47,8 @@ describe('pdfSegmentation', () => { let dbOne: Db; let dbTwo: Db; - let file: Buffer; + let fileA: Buffer; + let fileB: Buffer; afterAll(async () => { await testingDB.disconnect(); @@ -57,40 +60,33 @@ describe('pdfSegmentation', () => { dbOne = DB.connectionForDB(tenantOne.dbName).db; dbTwo = DB.connectionForDB(tenantTwo.dbName).db; tenants.tenants = { tenantOne }; - file = fs.readFileSync(`app/api/services/pdfsegmentation/specs/uploads/${fixturesPdfNameA}`); + fileA = fs.readFileSync(`app/api/services/pdfsegmentation/specs/uploads/${fixturesPdfNameA}`); + fileB = fs.readFileSync(`app/api/services/pdfsegmentation/specs/uploads/${fixturesPdfNameA}`); + jest.spyOn(request, 'uploadFile').mockResolvedValue({}); + jest.resetAllMocks(); }); - it('should send one pdf to segment', async () => { + it('should send the pdf', async () => { await fixturer.clearAllAndLoad(dbOne, fixturesOneFile); await segmentPdfs.segmentPdfs(); - - expect(TaskManager).toHaveBeenCalledWith({ - serviceName: 'segmentation', - }); - - expect(segmentPdfs.segmentationTaskManager?.sendFile).toHaveBeenCalledWith( - file, - fixturesPdfNameA + expect(request.uploadFile).toHaveBeenCalledWith( + 'http://localhost:1234/files', + fixturesPdfNameA, + fileA ); }); - it('should send the file', () => { - throw new Error('Not implemented'); - }); - it('should send other pdf to segment', async () => { await fixturer.clearAllAndLoad(dbOne, fixturesOtherFile); await segmentPdfs.segmentPdfs(); - expect(TaskManager).toHaveBeenCalledWith({ - serviceName: 'segmentation', - }); - - expect(segmentPdfs.segmentationTaskManager?.sendFile).toHaveBeenCalledWith( - file, - fixturesPdfNameA + await segmentPdfs.segmentPdfs(); + expect(request.uploadFile).toHaveBeenCalledWith( + 'http://localhost:1234/files', + fixturesPdfNameB, + fileB ); }); @@ -98,12 +94,13 @@ describe('pdfSegmentation', () => { await fixturer.clearAllAndLoad(dbOne, fixturesTwelveFiles); await segmentPdfs.segmentPdfs(); - expect(segmentPdfs.segmentationTaskManager?.sendFile).toHaveBeenCalledWith( - file, - fixturesPdfNameA + expect(request.uploadFile).toHaveBeenCalledWith( + 'http://localhost:1234/files', + fixturesPdfNameA, + fileA ); - expect(segmentPdfs.segmentationTaskManager?.sendFile).toHaveBeenCalledTimes(10); + expect(request.uploadFile).toHaveBeenCalledTimes(10); }); it('should send pdfs only from templates with the information extraction on', async () => { @@ -111,12 +108,7 @@ describe('pdfSegmentation', () => { await segmentPdfs.segmentPdfs(); - expect(segmentPdfs.segmentationTaskManager?.sendFile).toHaveBeenCalledWith( - file, - fixturesPdfNameA - ); - - expect(segmentPdfs.segmentationTaskManager?.sendFile).toHaveBeenCalledTimes(2); + expect(request.uploadFile).toHaveBeenCalledTimes(2); }); it('should send pdfs from different tenants with the information extraction on', async () => { @@ -126,7 +118,7 @@ describe('pdfSegmentation', () => { await segmentPdfs.segmentPdfs(); - expect(segmentPdfs.segmentationTaskManager?.sendFile).toHaveBeenCalledTimes(2); + expect(request.uploadFile).toHaveBeenCalledTimes(2); }); it('should start the tasks', async () => { @@ -135,7 +127,7 @@ describe('pdfSegmentation', () => { await segmentPdfs.segmentPdfs(); expect(segmentPdfs.segmentationTaskManager?.startTask).toHaveBeenCalledWith({ - task: 'f2082bf51b6ef839690485d7153e847a.pdf', + task: 'documentA.pdf', tenant: 'tenant1', }); }); @@ -164,7 +156,7 @@ describe('pdfSegmentation', () => { await segmentPdfs.segmentPdfs(); - expect(segmentPdfs.segmentationTaskManager?.sendFile).toHaveBeenCalledTimes(4); + expect(segmentPdfs.segmentationTaskManager?.startTask).toHaveBeenCalledTimes(4); }); describe('when there is pending tasks', () => { diff --git a/app/api/services/pdfsegmentation/specs/fixtures.ts b/app/api/services/pdfsegmentation/specs/fixtures.ts index a35e85ec8e..89ed6e2dfc 100644 --- a/app/api/services/pdfsegmentation/specs/fixtures.ts +++ b/app/api/services/pdfsegmentation/specs/fixtures.ts @@ -17,9 +17,7 @@ const settings = [ }, ], segmentation: { - dataUrl: 'http://localhost:1234/data', - filesUrl: 'http://localhost:1234/files', - resultsUrl: 'http://localhost:1234/results', + url: 'http://localhost:1234/files', }, }, }, @@ -36,15 +34,14 @@ const otherSettings = [ }, ], segmentation: { - dataUrl: 'http://other-localhost:1234/data', - filesUrl: 'http://other-localhost:1234/files', - resultsUrl: 'http://other-localhost:1234/results', + url: 'http://localhost:1234/files', }, }, }, ]; -const fixturesPdfNameA = 'f2082bf51b6ef839690485d7153e847a.pdf'; +const fixturesPdfNameA = 'documentA.pdf'; +const fixturesPdfNameB = 'documentB.pdf'; const fixturesOneFile: DBFixture = { entities: [factory.entity('A1', 'templateToSegmentA')], @@ -55,7 +52,7 @@ const fixturesOneFile: DBFixture = { const fixturesOtherFile: DBFixture = { entities: [factory.entity('A1', 'templateToSegmentB')], settings: otherSettings, - files: [factory.file('F1', 'A1', 'document', fixturesPdfNameA)], + files: [factory.file('F1', 'A1', 'document', fixturesPdfNameB)], }; const fixturesFiveFiles: DBFixture = { @@ -130,6 +127,7 @@ const fixturesMultiTenant: DBFixture = {}; export { fixturesPdfNameA, + fixturesPdfNameB, fixturesOneFile, fixturesOtherFile, fixturesTwelveFiles, diff --git a/app/api/services/pdfsegmentation/specs/uploads/documentA.pdf b/app/api/services/pdfsegmentation/specs/uploads/documentA.pdf new file mode 100644 index 0000000000000000000000000000000000000000..02f8ffa7aa95c8ace1377f825a1ddb4496cbe9fb GIT binary patch literal 798 zcmZWnJ#Q015Dk`s&=ML566H`>ip9s?`R@E7MONfbS;SzSoyu5Zxm!Dz+}$R7Yh-As z==lL@kV3&vprimHK`MGABpM5ej)sa^-YrAJV&C}!c%z^Y;j z0|UW=nTKLIk0mt`E8IW=+9j9Jo{|$!b(x%l1}AD}(~A4GnC^BLVu)qwyqBOscsGr&J)> IY~xY$A4aLk literal 0 HcmV?d00001 diff --git a/app/api/services/pdfsegmentation/specs/uploads/documentB.pdf b/app/api/services/pdfsegmentation/specs/uploads/documentB.pdf new file mode 100644 index 0000000000000000000000000000000000000000..02f8ffa7aa95c8ace1377f825a1ddb4496cbe9fb GIT binary patch literal 798 zcmZWnJ#Q015Dk`s&=ML566H`>ip9s?`R@E7MONfbS;SzSoyu5Zxm!Dz+}$R7Yh-As z==lL@kV3&vprimHK`MGABpM5ej)sa^-YrAJV&C}!c%z^Y;j z0|UW=nTKLIk0mt`E8IW=+9j9Jo{|$!b(x%l1}AD}(~A4GnC^BLVu)qwyqBOscsGr&J)> IY~xY$A4aLk literal 0 HcmV?d00001 diff --git a/app/api/services/pdfsegmentation/specs/uploads/f2082bf51b6ef839690485d7153e847a.pdf b/app/api/services/pdfsegmentation/specs/uploads/f2082bf51b6ef839690485d7153e847a.pdf deleted file mode 100644 index 1a8032582ed1e9b36bbd424761c4ffdb42d2718b..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 7627 zcma)h2UJr_7p{UR1`(95lqgc9B>@tuAidX6rG^kX1d`AN1?gRB(xrnSRVkt{_ zRO!-0IwvuE!;bJqGk4sCfwewctTh@&O1(_Q=14}1?Y3`rkV+tHYb zc>ShT*!`SgTZW@`S?|8&li0IuvYE9X3W5^G=;OV)P142^1$qPKuh=K@dx{Nm>lIrC zuxc^H9+2{P&u%K_v*82SL_io9>tBulJ)-X*qRYe%Ob|hQ`%@?NEh`5M zngD_z-K_{jfg%>?41&nYV!cej{6a!tVL>PuAt**{1z=z^5JU-wb$12gf`sVdP%d~^ z6b|Ep_J+tIA@Udxdo)H@Nfx38Cg9wOq4^_vqM-i>9q8||BRvH?r~hLY@V`8klL12o z;2`de@yoVe0s5X)3Tvg_1KWgtgj0Z&~7aTkk_Ub+QGCmrVXDe}3Yv{k>*Z5|- zglb7p39U_n1P5MD(H{KB`+@V+KI^F@1y1}x&s2m2c_Rt<*iQHsVrzeTYER()tit_f zhW57DSR4N#KGKBN^N{fNq2;T&RPfWKA09^YyIueK#{X5^=F)i!nyik=!JDZiN#V+LeLGXO~GR7zM)4qGhtrpS%&6zgcu&U|zH0cB(}(|CV=ANcE{5Lne=T zpVzi0zy0Xq-aWj>lo`MObT zjGEQzF7+gvjHdVR-C5)~MdQl$Zd+s%=@hszP?R^rBHUyQI%y1HtvEvq=sH!{tTx-?BUavOG`yO+Ojv0>>R*VB>s5L64#yk<*D z*5}+AijeaIAqN%~Z#H~}>ZbQExUU+Y%HNp@&~~+Reewp<*Kp55>-*F#vkZR=CEp2` zeD{$a7EhDJv3}E(_cW8pJv|EQP~JWMAi@MhnAV zJnAm*lrmo19#U(3>J?fz#dXw@nx}eY@`B?+nSkaaeGQP>_??%Tx_U`D6Xjw5R^D@yCA znU=lhapc6pH>1izR-1_KGpXkXhPzgnOy5w5flI$TXI*;xJg`1r-OoXfVwcR1rUvb9fn27P68xOm;b;EP=ODTiY#y%x`#mFvoG@F;z32uv+{wElcKE%PMZ zreqM&!7?7=drw~>rTw{2eM{j-SjpsXk^V2Sb@JUtj*0mz)1>+A77Z`cbG{@5-dWuU z2uSTKL*47^PHNG5k(v3e>(M=N$!F&2Syvu%lt|t}>I!Cj)2Y8keS?N!>5bb!_JK6p zKT(NV*TM89(nQngo;=Xi9M%k@35?sd+Qf&sB;(!}5hIXaI9VRXDX4=UPL^&2K=e^JiSPUsvHxbi4G(Yj^mbo}Q-kIkt zn1b@STLmfoq0v^-abG@o{b?6fv!ymtoLpP)HM%iH5n^A2axltyAcEZKw0~ymV)9jh zqAQt^97kuS!HXRMGxxs`E|1OPeJ{eY-VyrN>Ahc5@)OKS=LgRBZrNlAnW6{hCnoGW zhYt%L^y_rfdC0GPidK~gH9^V_&BifD1&@yE1S3q{HfPDVjF~QYP<2fDoeMPCyS;tE zrbm0V;e8Y{;d6K2DTjhP@wBweR$*tJ$;Ta^>2Q`Ge8U3w-FjUTg z)5hElgDxJ#uC8{>`)gtX_Nw3x9C~3^e>k$qvFy8pD^pFLkshZjjqhpHyP;$EnEZDi z#l;%KJSJPGs^2CRXyCrT6&4a$JsWU+I`{Nd6sDxRDU0mu?XKOgiKT?RgMiFT9a@ja z^AjnTytN2-SjpFC26iRur`;es`e!FcZ;dV%7b=-dL`88O+R#eET*3Q~5~Ub2887&2 z+|IuBi9Xd>?$wOi<=6JEPu~_rbx?<2`=Bd*jZ z231!waU125ta!-Wkov>94mI_ML62fLTl*+SAHI;uZisrwP{8SX4Hf3;E zH-WOVP;P6*HH-yOfplD^f+u+Dqraht@%LoR?J8>4qce}*mt-tPnfIj46P%lr%w^`p@t^wm zJcZuX)24bhwNFN_YS9}gT7)>bqn6Ac@ES#S1;}P^!sEE8dn)=bEGf#qWy~ z&f5+Yorqcu^O5`Rb|s~6C8nGXHeHk&W=%tL7qd3(eel&Sn??szTQRHU#}$3YO_$nj zPAV^u>=-J%*$4S?8IctxkbVqHnoy@p0@-T7&&Vb@7{1WHZeW<4kT^z~E$A3t3DL94 zdM5MiZ12cS%9Zq|O$5piN2vA#?FS9Z1w)o-N>#YsvnDYCC0UAaej9fY4l=%c!O=FM z>*TdMiV4@$D0n*^EAqX&*68xN%gr4pp64r6>Cb!#1br`fVK;j4b~FisK=?28WW7Fo zp!T@##7HujYC2e&KTYZ?w-DAHZmKZvyJ5<<){gI$@|)#5;uICVr@s$dc*!c;$?1tY zaOQo&jBqlYrRU{!B)O)PuP=N~HaN%6fxSS)%8UIhBIo%fi_x%_tCgPY0=wt$m>4bd zOMY_}X|~ZxX3(x!Mbzqzo1}_hx2+yjq+G|AmBx)cOT4*{^TkASAA2p9|7PL5E9^eK zvsRrwYSwLWOXypW=lr7jbYJ6dvCMXf{ksQ3Jyk#Ut7B}}CqENB<&U$Z)(%gCMEWY7 ze>itju6dGl6{c@*F-nnoEb9%^B`49B(Z1{CkqfWCLCwP>Y|?q!$?sZ)v~lnqx*CJ* z+KBu(1{$n+ZHU{|lFE7%Zw-*hZitUQyoEH*Ed8OB>sHQ#dmF{s#*XOz& z-9H}N{_uD+%yu#jd(nUY?K)z0orfW|F!Agcv3caoTmH^BNn>j{Ejsm8qcf%$Pdy2e z>@Og03u9LDa9;l4{D8Q6Y8Fjl(JeEslMEW^)z);HT+LGamtFQZxVfvE$1yY(ggJ)# zGnEa=%g1zuY@fv!(oepSWH?cZSE2(Nf2;k}H>fFD-TIM}t8Di2j6A9*+%SFrB~7n{ zTW7dE7msE_x}AGYn+|WXiHA|BkjQX$TI~KaQ4vntEzzY|7~4VO_TogcMkI~#RUY2u zupdWAEx!e|qw!S9lB*^nuYa7Bj8JmDvmAU`Gg4l9eBwv9(%wsXGJC7GeZD}*Ov#U1 z=-d4i%{>e5Ty0%l%WD1ccR2ynVRm%yK>0a*)K4oCM)?rppXnA7?TT8RHw@-dRA~ni z8OvI4bMPdJnuOos$XqJY3m;YuQca(yy==x)FX^514cdEmtDme^-}qjI>xaFdox)GDY)L9TvpFNw4cmIgU>>R; z(W&-JpnA0$);o6Tlt zjvwsDf^jaCoD9EB)-PXG+16yCe?qP3Zgl3H`f^2pwdQigCDqw3ann=uUW%aE)^~URCBSYf&`9h-SBtX`{XF0G7>KYTZZAtVUam%w9Lb90y-y zppX`6EN`(HbvR^T5eR6AP<8hrV-^rEZON}FUXG4wHM*W$Cqkc?*Jt>xT~x~;l(rBjx5@p}WA)_K`nu*+Rts-%_b zR?npo9t6|wuRV%Z@9p^5z4OvIT-r9GFjW`qUF@8k>|Qp#HWq!DsCP42C%5uE951gL zEkGsT+1hq_%#=^ARbKas1EXCWiGx7W2NefF9!de8Z!s~P*I}Abt%72SLCAX1V4rOmlMunf!8wpJbR)JuKHA!<@`e~P_lvTX zj24QXdW3C0s6a$iJ?{Wt3I)p+0zKNSRaN z)oxuXJ1@;xh?hX})cPK)gh#2&M-!Ut9^p952GN2_w+Y1SW%OO z*QnO1YM;@K3z?6b)pg5J-CcpnYAKvg&Ayasr`D&Udvh)}zVXc38C&H@Gkq5kG8Mb~ zRy_ySj@@VWR&@w&Yd(SB;{1d7cH^eV-IB)>E$<^Q@1|o?(XI(|+Tm4KtMcgBQ@E4k z-pU=tj_sq5CBzcUJ_{pFqjw#iEMxPJ+WELTl3E=z=9%eN&&2*9@7K-yu0r=vS)lDG znL{%Qt>cn7#(F*cv;f^qYAL&2M%dIe4-T1rpGZWSwUHF8Xrc*kP(IHW~# z5A#$_MKoL^Y%8^us5U6OZnUvu;yQaz$nB%>(+8t-cg6LlCLT|%GWS|$o9o;{(z~Z3 zC1>XGO`3Pr%U24QRr>L2kq01w42|JgWB9(= zCiI##=IFR|@4(<}3oqWUm?I@~xL7A=-1A7C<^$COM zl$j;=9xf>5#7cF3at^xutW>C)mWI5v5T(zDsUI&`BYC@d>vr#2)1yO5 zDqqYi)hnGrQRZGo9Ys;w7PI)~J3`=?0RW`-ner!Jj1x;$77u2;j$ZcMab z-)z>5?j!C@Dbko$n^TxlSg2V@+ReU*m?jB>_J={F*pcZ#+08(t8)Vf0X=MpsTAanQSC@ih|0N zx}`T~!FZ%vb|!q2ra1*~C$=QMbc;*MzoyB(rn+r!GWH>L*0)+(G4GzC)s-Lb3TH5|F?nl{EQsGRD|EgH8%!l$Tx12ce9Y}15|s9JJs~} z&G(_vE1?Cf;yZDodowBLf*Cj=8lbVaXAOCbmsCAm2>P{plyo&=a%7)FUh(=WI*c5( zv_v>Q4!X!{M_^h0G;?Q67h#C~raQ;2cL~-w<1o+g^X;<;N7{s!xhq zu`BL-KECfFcMKDZYST*BG&VBEdDpkFuI}zG?#AL~8B;wY;?rO5HdI^I(PZUG|DZTg z^ViwJZXVLzi=KG%vj+}WC!u5Row1@uTMLRMsVMcLRsOW?JrR;NB8G#7f+F?oRq2?a# zT6&J#sJFDI4VI!<#%0Hx%3>g;CG7j$X4&g@ESIP-J9`-ccVIS9HnRyZ|0Rd z?OyVhKFZ^vRc{5NjFSzm#*HVL4SOs^wcVHBoPJr={pvs9Ng~KdL{{N2IQ&m6QtU4* z@+Zowh;<>zWAJF4y(<9=z{kK)6XlG7$jT5OUUhpb3=T!G$GRXfIC~oaD|JHI;=#f| z2hjY9#qz_2g#bJjDg=fJiim>6L01JzB=a)(HcKf*>+@G{%Jh78Qem04VnN z0ly#&0fNY(T$M5Qwsr(y9`F)Lz&IO#MS&?q^8loc5d;&_Q(Yn;`v;r~gZ{OLUwEn@ z0{R~)?%x4av$$S|RxowjhE#C-8miw~q``zd;89ha+>gHJRF;HCpy&u#=qo?>Pc?lH zwy!Q8F~31Me;;LH87--;3cao5oDll1%$$V9av+43L;p^QNpNj?Fgu^mk@I<|r_FPB zjZu@Y{7qQG_a|RZc?P}qA~R15UzZw}8vgCeCx#D}Bnx@=MS-x!og1I39|vSRP8W|3 zqb8KPvS$47x7A7`kLm2`_%xL_o7HbS^jy7TQBHX}_he6C3>LMG+%MwGd9?FoR9@hr zmi`>+_CehKV*?&|B4E9$f|Ez{T@pe4`DE}#yMd2~TeEi>w!XAew$l0NA#AK_Ze)R!l z-O%2eV23vm5fK3+fV2q%l_Df61{MUyf^aBU2te_F_JJ{g-xJsPs}{k;Qq=`P^sxFa z_C!7d{4??YTMB<;Llj})pZob=g+G`EfEFN{2Ws?ptH0OkE;4B1rDF2CEr^lv9k7eLlhf~iP{6i2%Egu& zN~ z3-ko}p>QrJld*_n28VAl7dITH__!{7dX9;QV1Kb4&h%OcjRNv2|3xcS)*kHl1pQjZV z$b(J5l5$Wnc@ZElicm3GL71YPn4%0q4lXJT6_giL5EO(;MEDTjpgoz0W%E=%^ Date: Wed, 13 Oct 2021 19:08:22 +0200 Subject: [PATCH 32/62] Handling tenants with no segmentation config --- app/api/services/pdfsegmentation/PDFSegmentation.ts | 9 +++++++++ .../pdfsegmentation/specs/PDFSegmentation.spec.ts | 11 +++++++---- .../services/tasksmanager/specs/taskManager.spec.ts | 4 ++-- app/api/services/tasksmanager/taskManager.ts | 10 +--------- 4 files changed, 19 insertions(+), 15 deletions(-) diff --git a/app/api/services/pdfsegmentation/PDFSegmentation.ts b/app/api/services/pdfsegmentation/PDFSegmentation.ts index 17720476de..e9a33e4916 100644 --- a/app/api/services/pdfsegmentation/PDFSegmentation.ts +++ b/app/api/services/pdfsegmentation/PDFSegmentation.ts @@ -51,6 +51,11 @@ class SegmentPdfs { await SegmentationModel.save({ fileID, fileName }); }; + storeResult = async task => { + // const fileName = task.task; + // await SegmentationModel.save({ fileName, segmentation: results, autoexpire: false }); + }; + segmentPdfs = async () => Promise.all( Object.keys(tenants.tenants).map(async tenant => { @@ -68,6 +73,10 @@ class SegmentPdfs { const metadataExtractionFeatureToggle = settingsValues?.features?.metadataExtraction; const segmentationServiceConfig = settingsValues?.features?.segmentation; + if (!metadataExtractionFeatureToggle || !segmentationServiceConfig) { + return; + } + const templatesWithInformationExtraction = metadataExtractionFeatureToggle?.map( x => x.template ); diff --git a/app/api/services/pdfsegmentation/specs/PDFSegmentation.spec.ts b/app/api/services/pdfsegmentation/specs/PDFSegmentation.spec.ts index 66d20e6eab..04fdef2453 100644 --- a/app/api/services/pdfsegmentation/specs/PDFSegmentation.spec.ts +++ b/app/api/services/pdfsegmentation/specs/PDFSegmentation.spec.ts @@ -172,13 +172,16 @@ describe('pdfSegmentation', () => { }); }); - describe('when there is segmentation config', () => { - it('should do nothing', () => { - throw new Error('Not implemented'); + describe('when there is NOT segmentation config', () => { + it('should do nothing', async () => { + await fixturer.clearAllAndLoad(dbOne, { ...fixturesOneFile, settings: [{}] }); + await segmentPdfs.segmentPdfs(); + + expect(segmentPdfs.segmentationTaskManager?.startTask).not.toHaveBeenCalled(); }); }); - describe('when there segmentation finsihes', () => { + describe('when the segmentation finsihes', () => { it('should store the segmentation', () => { throw new Error('Not implemented'); }); diff --git a/app/api/services/tasksmanager/specs/taskManager.spec.ts b/app/api/services/tasksmanager/specs/taskManager.spec.ts index b7fbef2b5e..0bf8bfbffb 100644 --- a/app/api/services/tasksmanager/specs/taskManager.spec.ts +++ b/app/api/services/tasksmanager/specs/taskManager.spec.ts @@ -114,7 +114,7 @@ describe('taskManager', () => { taskManager.subscribeToResults(); externalDummyService.setResults(expectedResults); - const task = { task: 'Tofu', tenant: 'Gabo', resultsUrl: 'http://localhost:1234/results' }; + const task = { task: 'Tofu', tenant: 'Gabo', results_url: 'http://localhost:1234/results' }; await externalDummyService.sendFinishedMessage(task); await waitForExpect(async () => { @@ -163,7 +163,7 @@ describe('taskManager', () => { const task = { task: 'Ceviche', tenant: 'Mercy', - resultsUrl: 'http://localhost:1234/results', + results_url: 'http://localhost:1234/results', }; await taskManager?.stop(); diff --git a/app/api/services/tasksmanager/taskManager.ts b/app/api/services/tasksmanager/taskManager.ts index 7c2124330d..024e860f61 100644 --- a/app/api/services/tasksmanager/taskManager.ts +++ b/app/api/services/tasksmanager/taskManager.ts @@ -83,7 +83,7 @@ export class TaskManager { if (message.id) { if (this.service.processResults) { const processedMessage = JSON.parse(message.message); - const results = await request.get(processedMessage.resultsUrl, processedMessage); + const results = await request.get(processedMessage.results_url, processedMessage); this.service.processResults(results.json); } } @@ -101,14 +101,6 @@ export class TaskManager { }); } - async sendJSON(data: object) { - await request.post(this.service.dataUrl, data); - } - - async sendFile(file: Buffer, fileName: string) { - await request.uploadFile(this.service.filesUrl, fileName, file); - } - async stop() { await this.repeater?.stop(); await this.redisClient?.end(true); From 89a9170a6c90ff2c0e5858cf46d7fbd3ed73c234 Mon Sep 17 00:00:00 2001 From: Alberto Casado Torres Date: Thu, 14 Oct 2021 11:38:28 +0200 Subject: [PATCH 33/62] TaskManager only passing the results message to the connector instead requesting the results --- .../tasksmanager/specs/taskManager.spec.ts | 26 +++--- app/api/services/tasksmanager/taskManager.ts | 79 +++++++++++-------- 2 files changed, 58 insertions(+), 47 deletions(-) diff --git a/app/api/services/tasksmanager/specs/taskManager.spec.ts b/app/api/services/tasksmanager/specs/taskManager.spec.ts index 0bf8bfbffb..07949a0a25 100644 --- a/app/api/services/tasksmanager/specs/taskManager.spec.ts +++ b/app/api/services/tasksmanager/specs/taskManager.spec.ts @@ -21,6 +21,7 @@ describe('taskManager', () => { service = { serviceName: 'KonzNGaboHellKitchen', processResults: jest.fn(), + processRessultsMessageHiddenTime: 1, }; redisServer = new RedisServer(port); await redisServer.start(); @@ -29,7 +30,6 @@ describe('taskManager', () => { await externalDummyService.start(redisUrl); taskManager = new TaskManager(service); - taskManager.subscribeToResults(); await new Promise(resolve => setTimeout(resolve, 100)); // wait for redis to be ready }); @@ -106,20 +106,21 @@ describe('taskManager', () => { }); describe('when the task finishes', () => { - it('should get the results', async () => { - const expectedResults = { results: 'Tofu' }; + it('should call process results once and delete the result message', async () => { + const task = { + task: 'Tofu', + tenant: 'Gabo', + results_url: 'http://localhost:1234/results', + }; - await taskManager?.stop(); - taskManager = new TaskManager(service); - taskManager.subscribeToResults(); - - externalDummyService.setResults(expectedResults); - const task = { task: 'Tofu', tenant: 'Gabo', results_url: 'http://localhost:1234/results' }; await externalDummyService.sendFinishedMessage(task); await waitForExpect(async () => { - expect(service.processResults).toHaveBeenCalledWith(expectedResults); + expect(service.processResults).toHaveBeenCalledWith(task); }); + + await new Promise(resolve => setTimeout(resolve, 1001)); // wait for another check for results + expect(service.processResults).toHaveBeenCalledTimes(1); }); }); @@ -176,14 +177,11 @@ describe('taskManager', () => { await redisServer.stop(); taskManager?.start(); - taskManager?.subscribeToResults(); await redisServer.start(); await waitForExpect(async () => { - expect(service.processResults).toHaveBeenCalledWith({ - results: 'Ceviche', - }); + expect(service.processResults).toHaveBeenCalledWith(task); }); }); }); diff --git a/app/api/services/tasksmanager/taskManager.ts b/app/api/services/tasksmanager/taskManager.ts index 024e860f61..15c55a12f7 100644 --- a/app/api/services/tasksmanager/taskManager.ts +++ b/app/api/services/tasksmanager/taskManager.ts @@ -1,6 +1,5 @@ import RedisSMQ, { QueueMessage } from 'rsmq'; import Redis, { RedisClient } from 'redis'; -import request from 'shared/JSONRequest'; import { Repeater } from 'api/utils/Repeater'; import { config } from 'api/config'; @@ -9,15 +8,25 @@ export interface TaskMessage { task: string; } +/* eslint-disable camelcase */ +export interface ResultsMessage { + tenant: string; + task: string; + data_url?: string; + file_url?: string; +} +/* eslint-enable camelcase */ + export interface Service { serviceName: string; - processResults?: (results: object) => void; + processResults?: (results: ResultsMessage) => Promise; + processRessultsMessageHiddenTime?: number; } export class TaskManager { - private redisSMQ: RedisSMQ | undefined; + redisSMQ: RedisSMQ; - private readonly service: Service; + readonly service: Service; private readonly taskQueue: string; @@ -25,36 +34,34 @@ export class TaskManager { private repeater: Repeater | undefined; - private redisClient: RedisClient | undefined; + redisClient: RedisClient; constructor(service: Service) { this.service = service; this.taskQueue = `${service.serviceName}_tasks`; this.resultsQueue = `${service.serviceName}_results`; - this.start(); - } - - start() { const redisUrl = `redis://${config.redis.host}:${config.redis.port}`; this.redisClient = Redis.createClient(redisUrl); + this.redisSMQ = new RedisSMQ({ client: this.redisClient }); + this.subscribeToEvents(); + this.subscribeToResults(); + } + + subscribeToEvents() { this.redisClient.on('error', error => { if (error.code !== 'ECONNREFUSED') { throw error; } }); - this.redisSMQ = new RedisSMQ({ - client: this.redisClient, - }); - this.redisClient.on('connect', () => { - this.redisSMQ?.createQueue({ qname: this.taskQueue }, err => { + this.redisSMQ.createQueue({ qname: this.taskQueue }, err => { if (err && err.name !== 'queueExists') { throw err; } }); - this.redisSMQ?.createQueue({ qname: this.resultsQueue }, err => { + this.redisSMQ.createQueue({ qname: this.resultsQueue }, err => { if (err && err.name !== 'queueExists') { throw err; } @@ -69,40 +76,46 @@ export class TaskManager { return queueAttributes.msgs; } - subscribeToResults() { - this.repeater = new Repeater(this.receiveMessage.bind(this), 1000); + private subscribeToResults(): void { + this.repeater = new Repeater(this.checkForResults.bind(this), 500); this.repeater.start(); } - async receiveMessage() { - if (this.redisClient?.connected) { - const message = (await this.redisSMQ?.receiveMessageAsync({ - qname: this.resultsQueue, - })) as QueueMessage; + private async checkForResults() { + if (!this.redisClient?.connected) { + return; + } - if (message.id) { - if (this.service.processResults) { - const processedMessage = JSON.parse(message.message); - const results = await request.get(processedMessage.results_url, processedMessage); - this.service.processResults(results.json); - } - } + const message = (await this.redisSMQ.receiveMessageAsync({ + qname: this.resultsQueue, + vt: this.service.processRessultsMessageHiddenTime, + })) as QueueMessage; + + if (message.id && this.service.processResults) { + const processedMessage = JSON.parse(message.message); + + await this.service.processResults(processedMessage); + + await this.redisSMQ?.deleteMessageAsync({ + qname: this.resultsQueue, + id: message.id, + }); } } async startTask(taskMessage: TaskMessage) { - if (!this.redisClient?.connected) { + if (!this.redisClient.connected) { throw new Error('Redis is not connected'); } - return this.redisSMQ?.sendMessageAsync({ + return this.redisSMQ.sendMessageAsync({ qname: this.taskQueue, message: JSON.stringify(taskMessage), }); } async stop() { - await this.repeater?.stop(); - await this.redisClient?.end(true); + await this.repeater!.stop(); + await this.redisClient.end(true); } } From efa9a78a7427193f092561cd3f6f8228c42c6140 Mon Sep 17 00:00:00 2001 From: Alberto Casado Torres Date: Fri, 15 Oct 2021 12:20:55 +0200 Subject: [PATCH 34/62] Uwazi server using a repeater to Segment PDFs --- .../pdfsegmentation/PDFSegmentation.ts | 55 +++++++++------ .../pdfsegmentation/segmentationModel.js | 2 +- .../specs/PDFSegmentation.spec.ts | 68 ++++++++++++++++--- app/api/services/tasksmanager/RepeatWith.ts | 32 ++++++--- .../specs/ExternalDummyService.ts | 12 ++-- .../tasksmanager/specs/repeatWithLock.spec.js | 48 ++++++++++--- .../tasksmanager/specs/taskManager.spec.ts | 5 +- app/api/services/tasksmanager/taskManager.ts | 18 +++++ app/server.js | 11 +++ 9 files changed, 191 insertions(+), 60 deletions(-) diff --git a/app/api/services/pdfsegmentation/PDFSegmentation.ts b/app/api/services/pdfsegmentation/PDFSegmentation.ts index e9a33e4916..f0f5096fbb 100644 --- a/app/api/services/pdfsegmentation/PDFSegmentation.ts +++ b/app/api/services/pdfsegmentation/PDFSegmentation.ts @@ -1,4 +1,4 @@ -import { TaskManager } from 'api/services/tasksmanager/TaskManager'; +import { TaskManager, ResultsMessage } from 'api/services/tasksmanager/TaskManager'; import { uploadsPath } from 'api/files'; import filesModel from 'api/files/filesModel'; import fs from 'fs'; @@ -10,7 +10,7 @@ import { SegmentationModel } from './segmentationModel'; import { ObjectIdSchema } from 'shared/types/commonTypes'; import request from 'shared/JSONRequest'; -class SegmentPdfs { +class PDFSegmentation { SERVICE_NAME = 'segmentation'; public segmentationTaskManager: TaskManager | undefined; @@ -19,7 +19,7 @@ class SegmentPdfs { features: Settings | undefined; - batchSize = 10; + batchSize = 1; async start() { this.segmentationTaskManager = new TaskManager({ @@ -27,33 +27,44 @@ class SegmentPdfs { }); } - segmentOnePdf = async (nextFile: FileType, url: string) => { + segmentOnePdf = async (file: FileType, serviceUrl: string, tenant: string) => { if (!this.segmentationTaskManager) { return; } - if (!nextFile || !nextFile.filename) { + if (!file || !file.filename) { return; } - - const file = fs.readFileSync(uploadsPath(nextFile.filename)); - await request.uploadFile(url, nextFile.filename, file); + console.log('segmentOnePdf', file.filename); + const fileBuffer = fs.readFileSync(uploadsPath(file.filename)); + await request.uploadFile(serviceUrl, file.filename, fileBuffer); const task = { - task: nextFile.filename, - tenant: 'tenant1', + task: file.filename, + tenant, }; + console.log('segmentOnePdf task'); await this.segmentationTaskManager.startTask(task); - await this.storeProcess(nextFile._id!, nextFile.filename); - }; - - storeProcess = async (fileID: ObjectIdSchema, fileName: string) => { - await SegmentationModel.save({ fileID, fileName }); + console.log('store process'); + const segmentationCreated = await this.storeProcess(file._id!, file.filename); + console.log(segmentationCreated); }; - storeResult = async task => { - // const fileName = task.task; - // await SegmentationModel.save({ fileName, segmentation: results, autoexpire: false }); + storeProcess = async (fileID: ObjectIdSchema, fileName: string) => + SegmentationModel.save({ fileID, fileName }); + + processResults = async (message: ResultsMessage) => { + const response = await request.get(message.data_url); + await tenants.run(async () => { + const [segmentation] = await SegmentationModel.get({ fileName: message.task }); + console.log('processing results', message.task); + await SegmentationModel.save({ + ...segmentation, + segmentation: response.json, + autoexpire: null, + status: 'completed', + }); + }, message.tenant); }; segmentPdfs = async () => @@ -66,6 +77,7 @@ class SegmentPdfs { const pendingTasks = await this.segmentationTaskManager!.countPendingTasks(); if (pendingTasks > 0) { + console.log(`${pendingTasks} tasks are pending`); return; } @@ -74,6 +86,7 @@ class SegmentPdfs { const segmentationServiceConfig = settingsValues?.features?.segmentation; if (!metadataExtractionFeatureToggle || !segmentationServiceConfig) { + console.log('no configuration'); return; } @@ -120,13 +133,15 @@ class SegmentPdfs { }, ]); + console.log('filesToSegment', filesToSegment.length); + for (let i = 0; i < filesToSegment.length; i += 1) { // eslint-disable-next-line no-await-in-loop - await this.segmentOnePdf(filesToSegment[i], segmentationServiceConfig.url); + await this.segmentOnePdf(filesToSegment[i], segmentationServiceConfig.url, tenant); } }, tenant); }) ); } -export { SegmentPdfs }; +export { PDFSegmentation }; diff --git a/app/api/services/pdfsegmentation/segmentationModel.js b/app/api/services/pdfsegmentation/segmentationModel.js index 71391fa50f..13201bb9fb 100644 --- a/app/api/services/pdfsegmentation/segmentationModel.js +++ b/app/api/services/pdfsegmentation/segmentationModel.js @@ -2,7 +2,7 @@ import mongoose from 'mongoose'; import { instanceModel } from 'api/odm'; const props = { - autoexpire: { type: Date, expires: 360000, default: Date.now }, + autoexpire: { type: Date, expires: 86400, default: Date.now }, // 24 hours status: { type: String, enum: ['pending', 'completed', 'error'], default: 'pending' }, error: { type: String }, segmentation: { diff --git a/app/api/services/pdfsegmentation/specs/PDFSegmentation.spec.ts b/app/api/services/pdfsegmentation/specs/PDFSegmentation.spec.ts index 04fdef2453..101ebf63b7 100644 --- a/app/api/services/pdfsegmentation/specs/PDFSegmentation.spec.ts +++ b/app/api/services/pdfsegmentation/specs/PDFSegmentation.spec.ts @@ -10,20 +10,21 @@ import { } from 'api/services/pdfsegmentation/specs/fixtures'; import fs from 'fs'; -import { TaskManager } from 'api/services/tasksmanager/TaskManager'; + import { tenants } from 'api/tenants/tenantContext'; import { DB } from 'api/odm'; import { Db } from 'mongodb'; -import { SegmentPdfs } from '../PDFSegmentation'; +import { PDFSegmentation } from '../PDFSegmentation'; import { SegmentationModel } from '../segmentationModel'; import request from 'shared/JSONRequest'; +import { ExternalDummyService } from '../../tasksmanager/specs/ExternalDummyService'; import exp from 'constants'; jest.mock('api/services/tasksmanager/TaskManager.ts'); -describe('pdfSegmentation', () => { - let segmentPdfs: SegmentPdfs; +describe('PDFSegmentation', () => { + let segmentPdfs: PDFSegmentation; const tenantOne = { name: 'tenantOne', @@ -55,7 +56,7 @@ describe('pdfSegmentation', () => { }); beforeEach(async () => { - segmentPdfs = new SegmentPdfs(); + segmentPdfs = new PDFSegmentation(); await DB.connect(); dbOne = DB.connectionForDB(tenantOne.dbName).db; dbTwo = DB.connectionForDB(tenantTwo.dbName).db; @@ -182,12 +183,57 @@ describe('pdfSegmentation', () => { }); describe('when the segmentation finsihes', () => { - it('should store the segmentation', () => { - throw new Error('Not implemented'); + let segmentationExternalService: ExternalDummyService; + beforeEach(async () => { + segmentationExternalService = new ExternalDummyService(); + await segmentationExternalService.start(); }); - }); - //TODO: - // - do a load test to checkl the perfomance: Tested in Cejil with 5k files and Plan with 25k and took 0.2s to do an aggregation query - // - error handling ? task failed ? + afterEach(async () => { + await segmentationExternalService.stop(); + }); + it('should store the segmentation', async () => { + await fixturer.clearAllAndLoad(dbOne, fixturesOneFile); + const segmentationData = { + page_width: 600, + page_height: 1200, + paragraphs: [ + { + left: 30, + top: 45, + width: 400, + height: 120, + page_number: 1, + text: 'El veloz murciélago hindú comía feliz cardillo y kiwi.', + }, + ], + }; + + segmentationExternalService.setResults(segmentationData); + + await segmentPdfs.segmentPdfs(); + + await segmentPdfs.processResults({ + tenant: tenantOne.name, + task: 'documentA.pdf', + data_url: 'http://localhost:1234/results', + }); + + await tenants.run(async () => { + const segmentations = await SegmentationModel.get(); + const [segmentation] = segmentations; + expect(segmentation.status).toBe('completed'); + expect(segmentation.fileName).toBe(fixturesPdfNameA); + expect(segmentation.fileID).toEqual(fixturesOneFile.files![0]._id); + expect(segmentation.autoexpire).toBe(null); + + expect(segmentation.segmentation).toEqual( + expect.objectContaining({ + ...segmentationData, + paragraphs: [expect.objectContaining(segmentationData.paragraphs[0])], + }) + ); + }, 'tenantOne'); + }); + }); }); diff --git a/app/api/services/tasksmanager/RepeatWith.ts b/app/api/services/tasksmanager/RepeatWith.ts index 37ba55bd4b..71204c3d5b 100644 --- a/app/api/services/tasksmanager/RepeatWith.ts +++ b/app/api/services/tasksmanager/RepeatWith.ts @@ -21,29 +21,41 @@ export class RepeatWith { private port: number; + private host: string; + constructor( lockName: string, task: () => void, - maxLockTime: number = 2000, - delayTimeBetweenTasks: number = 0, - retryDelay: number = 200, - port: number = 6379 + options: { + maxLockTime?: number; + delayTimeBetweenTasks?: number; + retryDelay?: number; + port?: number; + } ) { - this.maxLockTime = maxLockTime; - this.retryDelay = retryDelay; - this.delayTimeBetweenTasks = delayTimeBetweenTasks; + const _options = { + maxLockTime: 2000, + delayTimeBetweenTasks: 1000, + retryDelay: 200, + port: 6379, + host: 'localhost', + ...options, + }; + this.maxLockTime = _options.maxLockTime; + this.retryDelay = _options.retryDelay; + this.delayTimeBetweenTasks = _options.delayTimeBetweenTasks; this.lockName = `locks:${lockName}`; this.task = task; - this.port = port; + this.port = _options.port; + this.host = _options.host; } async start() { - this.redisClient = await Redis.createClient(`redis://localhost:${this.port}`); + this.redisClient = await Redis.createClient(`redis://${this.host}:${this.port}`); this.redlock = await new Redlock([this.redisClient], { retryJitter: 0, retryDelay: this.retryDelay, }); - this.redisClient.on('error', error => { if (error.code !== 'ECONNREFUSED') { throw error; diff --git a/app/api/services/tasksmanager/specs/ExternalDummyService.ts b/app/api/services/tasksmanager/specs/ExternalDummyService.ts index 83f00c2837..bd612b6fcf 100644 --- a/app/api/services/tasksmanager/specs/ExternalDummyService.ts +++ b/app/api/services/tasksmanager/specs/ExternalDummyService.ts @@ -28,7 +28,7 @@ export class ExternalDummyService { private readonly serviceName: string; - constructor(port: number, serviceName: string) { + constructor(port = 1234, serviceName = 'dummy') { this.port = port; this.serviceName = serviceName; this.app = express(); @@ -131,11 +131,13 @@ export class ExternalDummyService { return messages; } - async start(redisUrl: string) { - this.redisClient = await Redis.createClient(redisUrl); + async start(redisUrl?: string) { + if (redisUrl) { + this.redisClient = await Redis.createClient(redisUrl); - this.redisSMQ = await new RedisSMQ({ client: this.redisClient }); - await this.resetQueue(); + this.redisSMQ = await new RedisSMQ({ client: this.redisClient }); + await this.resetQueue(); + } const start = new Promise(resolve => { this.server = this.app.listen(this.port, () => { diff --git a/app/api/services/tasksmanager/specs/repeatWithLock.spec.js b/app/api/services/tasksmanager/specs/repeatWithLock.spec.js index 55d5dcc946..6da1b3d631 100644 --- a/app/api/services/tasksmanager/specs/repeatWithLock.spec.js +++ b/app/api/services/tasksmanager/specs/repeatWithLock.spec.js @@ -68,7 +68,10 @@ describe('RepeatWithLock', () => { it('should wait until the redis server is available to execute the task', async () => { await redisServer.stop(); - const nodeOne = new RepeatWith('my_locked_task', task, 2000, 0, 20); + const nodeOne = new RepeatWith('my_locked_task', task, { + retryDelay: 20, + delayTimeBetweenTasks: 0, + }); await nodeOne.start(); await sleepTime(50); @@ -95,7 +98,11 @@ describe('RepeatWithLock', () => { it('should continue executing tasks after redis was unavailable for a while', async () => { const unstableRedisServer = new RedisServer(6371); await unstableRedisServer.start(); - const nodeOne = new RepeatWith('my_locked_task', task, 2000, 0, 20, 6371); + const nodeOne = new RepeatWith('my_locked_task', task, { + retryDelay: 20, + delayTimeBetweenTasks: 0, + port: 6371, + }); await nodeOne.start(); await waitForExpect(async () => { @@ -122,8 +129,14 @@ describe('RepeatWithLock', () => { }); it('should handle when a lock fails for too many retries', async () => { - const nodeOne = new RepeatWith('my_long_locked_task', task, 2000, 0, 20); - const nodeTwo = new RepeatWith('my_long_locked_task', task, 2000, 0, 20); + const nodeOne = new RepeatWith('my_long_locked_task', task, { + retryDelay: 20, + delayTimeBetweenTasks: 0, + }); + const nodeTwo = new RepeatWith('my_long_locked_task', task, { + retryDelay: 20, + delayTimeBetweenTasks: 0, + }); await nodeOne.start(); await nodeTwo.start(); @@ -139,8 +152,14 @@ describe('RepeatWithLock', () => { }); it('should handle when a node fails to unlock the lock', async () => { - const nodeOne = new RepeatWith('my_locked_task', task, 50); - const nodeTwo = new RepeatWith('my_locked_task', task, 50); + const nodeOne = new RepeatWith('my_locked_task', task, { + maxLockTime: 50, + delayTimeBetweenTasks: 0, + }); + const nodeTwo = new RepeatWith('my_locked_task', task, { + maxLockTime: 50, + delayTimeBetweenTasks: 0, + }); await nodeOne.start(); await sleepTime(10); @@ -158,7 +177,10 @@ describe('RepeatWithLock', () => { }); it('should continue executing the task if one task fails', async () => { - const nodeOne = new RepeatWith('my_locked_task', task, 500); + const nodeOne = new RepeatWith('my_locked_task', task, { + maxLockTime: 500, + delayTimeBetweenTasks: 0, + }); await nodeOne.start(); @@ -182,8 +204,16 @@ describe('RepeatWithLock', () => { // eslint-disable-next-line max-statements it('should add a delay between task executions', async () => { - const nodeOne = new RepeatWith('my_locked_task', task, 50, 50, 20); - const nodeTwo = new RepeatWith('my_locked_task', task, 50, 50, 20); + const nodeOne = new RepeatWith('my_locked_task', task, { + maxLockTime: 50, + delayTimeBetweenTasks: 50, + retryDelay: 20, + }); + const nodeTwo = new RepeatWith('my_locked_task', task, { + maxLockTime: 50, + delayTimeBetweenTasks: 50, + retryDelay: 20, + }); await nodeOne.start(); await nodeTwo.start(); diff --git a/app/api/services/tasksmanager/specs/taskManager.spec.ts b/app/api/services/tasksmanager/specs/taskManager.spec.ts index 07949a0a25..f9dbd3021a 100644 --- a/app/api/services/tasksmanager/specs/taskManager.spec.ts +++ b/app/api/services/tasksmanager/specs/taskManager.spec.ts @@ -167,16 +167,13 @@ describe('taskManager', () => { results_url: 'http://localhost:1234/results', }; - await taskManager?.stop(); externalDummyService.setResults({ results: 'Ceviche', }); + await redisServer.stop(); await externalDummyService.sendFinishedMessage(task); expect(service.processResults).not.toHaveBeenCalled(); - await redisServer.stop(); - - taskManager?.start(); await redisServer.start(); diff --git a/app/api/services/tasksmanager/taskManager.ts b/app/api/services/tasksmanager/taskManager.ts index 15c55a12f7..425146a428 100644 --- a/app/api/services/tasksmanager/taskManager.ts +++ b/app/api/services/tasksmanager/taskManager.ts @@ -69,6 +69,24 @@ export class TaskManager { }); } + async clearQueue() { + while (true) { + // eslint-disable-next-line no-await-in-loop + const message = (await this.redisSMQ.receiveMessageAsync({ + qname: this.taskQueue, + })) as QueueMessage; + + if (!message.id) { + break; + } + // eslint-disable-next-line no-await-in-loop + await this.redisSMQ.deleteMessageAsync({ + qname: this.taskQueue, + id: message.id, + }); + } + } + async countPendingTasks(): Promise { const queueAttributes = await this.redisSMQ!.getQueueAttributesAsync({ qname: this.taskQueue, diff --git a/app/server.js b/app/server.js index fe84bc760e..b6d4092764 100644 --- a/app/server.js +++ b/app/server.js @@ -10,6 +10,8 @@ import mongoose from 'mongoose'; import path from 'path'; import { TaskProvider } from 'shared/tasks/tasks'; +import { PDFSegmentation } from 'api/services/pdfsegmentation/PDFSegmentation'; +import { RepeatWith } from 'api/services/tasksmanager/RepeatWith'; import { appContextMiddleware } from 'api/utils/appContextMiddleware'; import { requestIdMiddleware } from 'api/utils/requestIdMiddleware'; @@ -150,6 +152,15 @@ DB.connect(config.DBHOST, dbAuth).then(async () => { 10000 ); topicClassificationRepeater.start(); + const segmentationConnector = new PDFSegmentation(); + segmentationConnector.start(); + const segmentationRepeater = new RepeatWith( + 'segmentation_repeat', + segmentationConnector.segmentPdfs, + { port: config.redis.port, host: config.redis.host, delayTimeBetweenTasks: 2000 } + ); + + segmentationRepeater.start(); } }); From 3ec154b76ad68797ddc73ed644160ef68c90a6c5 Mon Sep 17 00:00:00 2001 From: Alberto Casado Torres Date: Mon, 18 Oct 2021 10:02:35 +0200 Subject: [PATCH 35/62] small refactor and rename --- .../tasksmanager/{taskManager.ts => TaskManager.ts} | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) rename app/api/services/tasksmanager/{taskManager.ts => TaskManager.ts} (96%) diff --git a/app/api/services/tasksmanager/taskManager.ts b/app/api/services/tasksmanager/TaskManager.ts similarity index 96% rename from app/api/services/tasksmanager/taskManager.ts rename to app/api/services/tasksmanager/TaskManager.ts index 425146a428..1e44e476b4 100644 --- a/app/api/services/tasksmanager/taskManager.ts +++ b/app/api/services/tasksmanager/TaskManager.ts @@ -1,3 +1,4 @@ +/* eslint-disable no-await-in-loop */ import RedisSMQ, { QueueMessage } from 'rsmq'; import Redis, { RedisClient } from 'redis'; import { Repeater } from 'api/utils/Repeater'; @@ -70,8 +71,7 @@ export class TaskManager { } async clearQueue() { - while (true) { - // eslint-disable-next-line no-await-in-loop + while ((await this.countPendingTasks()) > 0) { const message = (await this.redisSMQ.receiveMessageAsync({ qname: this.taskQueue, })) as QueueMessage; @@ -79,7 +79,7 @@ export class TaskManager { if (!message.id) { break; } - // eslint-disable-next-line no-await-in-loop + await this.redisSMQ.deleteMessageAsync({ qname: this.taskQueue, id: message.id, From a4f556a4c9a9517bf3fc08a26ee10e0367a000c8 Mon Sep 17 00:00:00 2001 From: Alberto Casado Torres Date: Mon, 18 Oct 2021 11:10:50 +0200 Subject: [PATCH 36/62] fixed some eslint errors and specs --- app/api/services/pdfsegmentation/PDFSegmentation.ts | 2 +- .../pdfsegmentation/specs/PDFSegmentation.spec.ts | 4 +--- app/api/services/tasksmanager/RepeatWith.ts | 2 +- .../tasksmanager/specs/repeatWithLock.spec.js | 11 +++++------ app/server.js | 2 +- 5 files changed, 9 insertions(+), 12 deletions(-) diff --git a/app/api/services/pdfsegmentation/PDFSegmentation.ts b/app/api/services/pdfsegmentation/PDFSegmentation.ts index f0f5096fbb..caefd44002 100644 --- a/app/api/services/pdfsegmentation/PDFSegmentation.ts +++ b/app/api/services/pdfsegmentation/PDFSegmentation.ts @@ -21,7 +21,7 @@ class PDFSegmentation { batchSize = 1; - async start() { + constructor() { this.segmentationTaskManager = new TaskManager({ serviceName: this.SERVICE_NAME, }); diff --git a/app/api/services/pdfsegmentation/specs/PDFSegmentation.spec.ts b/app/api/services/pdfsegmentation/specs/PDFSegmentation.spec.ts index 101ebf63b7..887b9ac030 100644 --- a/app/api/services/pdfsegmentation/specs/PDFSegmentation.spec.ts +++ b/app/api/services/pdfsegmentation/specs/PDFSegmentation.spec.ts @@ -19,7 +19,6 @@ import { PDFSegmentation } from '../PDFSegmentation'; import { SegmentationModel } from '../segmentationModel'; import request from 'shared/JSONRequest'; import { ExternalDummyService } from '../../tasksmanager/specs/ExternalDummyService'; -import exp from 'constants'; jest.mock('api/services/tasksmanager/TaskManager.ts'); @@ -163,9 +162,8 @@ describe('PDFSegmentation', () => { describe('when there is pending tasks', () => { it('should not put more', async () => { await fixturer.clearAllAndLoad(dbOne, fixturesFiveFiles); - segmentPdfs.start(); - segmentPdfs.segmentationTaskManager!.countPendingTasks = () => Promise.resolve(10); + segmentPdfs.segmentationTaskManager!.countPendingTasks = async () => Promise.resolve(10); await segmentPdfs.segmentPdfs(); diff --git a/app/api/services/tasksmanager/RepeatWith.ts b/app/api/services/tasksmanager/RepeatWith.ts index 71204c3d5b..b43ac434d4 100644 --- a/app/api/services/tasksmanager/RepeatWith.ts +++ b/app/api/services/tasksmanager/RepeatWith.ts @@ -1,6 +1,6 @@ import Redis from 'redis'; import Redlock from 'redlock'; -import handleError from 'api/utils/handleError'; +import { handleError } from 'api/utils/handleError'; export class RepeatWith { private lockName: string; diff --git a/app/api/services/tasksmanager/specs/repeatWithLock.spec.js b/app/api/services/tasksmanager/specs/repeatWithLock.spec.js index 6da1b3d631..cdc268f8b3 100644 --- a/app/api/services/tasksmanager/specs/repeatWithLock.spec.js +++ b/app/api/services/tasksmanager/specs/repeatWithLock.spec.js @@ -1,10 +1,8 @@ -import handleError from 'api/utils/handleError'; +import * as errorHelper from 'api/utils/handleError'; import waitForExpect from 'wait-for-expect'; import { RepeatWith } from '../RepeatWith'; import { RedisServer } from '../RedisServer'; -jest.mock('api/utils/handleError.js', () => jest.fn()); - /* eslint-disable max-statements */ describe('RepeatWithLock', () => { let finishTask; @@ -45,8 +43,8 @@ describe('RepeatWithLock', () => { } it('should run one task at a time', async () => { - const nodeOne = new RepeatWith('my_locked_task', task); - const nodeTwo = new RepeatWith('my_locked_task', task); + const nodeOne = new RepeatWith('my_locked_task', task, { delayTimeBetweenTasks: 0 }); + const nodeTwo = new RepeatWith('my_locked_task', task, { delayTimeBetweenTasks: 0 }); await nodeOne.start(); await nodeTwo.start(); await waitForExpect(async () => { @@ -177,6 +175,7 @@ describe('RepeatWithLock', () => { }); it('should continue executing the task if one task fails', async () => { + jest.spyOn(errorHelper, 'handleError').mockImplementation(() => {}); const nodeOne = new RepeatWith('my_locked_task', task, { maxLockTime: 500, delayTimeBetweenTasks: 0, @@ -191,7 +190,7 @@ describe('RepeatWithLock', () => { const someError = { error: 'some error' }; rejectTask(someError); await waitForExpect(async () => { - expect(handleError).toHaveBeenLastCalledWith(someError); + expect(errorHelper.handleError).toHaveBeenLastCalledWith(someError); }); finishTask(); diff --git a/app/server.js b/app/server.js index b6d4092764..98380eefc7 100644 --- a/app/server.js +++ b/app/server.js @@ -152,8 +152,8 @@ DB.connect(config.DBHOST, dbAuth).then(async () => { 10000 ); topicClassificationRepeater.start(); + const segmentationConnector = new PDFSegmentation(); - segmentationConnector.start(); const segmentationRepeater = new RepeatWith( 'segmentation_repeat', segmentationConnector.segmentPdfs, From 4ab94f7ce9cf2c96c282bc9a38b908fed1c53fd7 Mon Sep 17 00:00:00 2001 From: Alberto Casado Torres Date: Mon, 18 Oct 2021 11:24:33 +0200 Subject: [PATCH 37/62] fixed some eslint errors --- app/api/services/pdfsegmentation/specs/fixtures.ts | 2 -- app/api/services/tasksmanager/TaskManager.ts | 1 + app/api/services/tasksmanager/specs/taskManager.spec.ts | 1 - app/api/utils/__mocks__/handleError.js | 1 - 4 files changed, 1 insertion(+), 4 deletions(-) delete mode 100644 app/api/utils/__mocks__/handleError.js diff --git a/app/api/services/pdfsegmentation/specs/fixtures.ts b/app/api/services/pdfsegmentation/specs/fixtures.ts index 89ed6e2dfc..b9386093ec 100644 --- a/app/api/services/pdfsegmentation/specs/fixtures.ts +++ b/app/api/services/pdfsegmentation/specs/fixtures.ts @@ -123,8 +123,6 @@ const fixturesFilesWithtMixedInformationExtraction: DBFixture = { ], }; -const fixturesMultiTenant: DBFixture = {}; - export { fixturesPdfNameA, fixturesPdfNameB, diff --git a/app/api/services/tasksmanager/TaskManager.ts b/app/api/services/tasksmanager/TaskManager.ts index 1e44e476b4..fc9e57f443 100644 --- a/app/api/services/tasksmanager/TaskManager.ts +++ b/app/api/services/tasksmanager/TaskManager.ts @@ -96,6 +96,7 @@ export class TaskManager { private subscribeToResults(): void { this.repeater = new Repeater(this.checkForResults.bind(this), 500); + // eslint-disable-next-line @typescript-eslint/no-floating-promises this.repeater.start(); } diff --git a/app/api/services/tasksmanager/specs/taskManager.spec.ts b/app/api/services/tasksmanager/specs/taskManager.spec.ts index f9dbd3021a..b1df61eed4 100644 --- a/app/api/services/tasksmanager/specs/taskManager.spec.ts +++ b/app/api/services/tasksmanager/specs/taskManager.spec.ts @@ -1,5 +1,4 @@ /* eslint-disable max-statements */ -import fs from 'fs'; import waitForExpect from 'wait-for-expect'; import { TaskManager, Service } from 'api/services/tasksmanager/TaskManager'; import { RedisServer } from '../RedisServer'; diff --git a/app/api/utils/__mocks__/handleError.js b/app/api/utils/__mocks__/handleError.js deleted file mode 100644 index 20e731fd4d..0000000000 --- a/app/api/utils/__mocks__/handleError.js +++ /dev/null @@ -1 +0,0 @@ -export default () => {} \ No newline at end of file From 73543cfc767e9c556ec984d84cabb524141f52b3 Mon Sep 17 00:00:00 2001 From: Alberto Casado Torres Date: Mon, 18 Oct 2021 13:20:58 +0200 Subject: [PATCH 38/62] Fixed instalation of redis server for tests --- .gitignore | 4 +-- .../pdfsegmentation/PDFSegmentation.ts | 4 --- app/api/services/tasksmanager/RedisServer.ts | 36 +++++++++++-------- .../specs/ExternalDummyService.ts | 12 ++++--- .../tasksmanager/specs/taskManager.spec.ts | 7 ++-- 5 files changed, 36 insertions(+), 27 deletions(-) diff --git a/.gitignore b/.gitignore index 8c85df2ac2..0a9a3ea253 100644 --- a/.gitignore +++ b/.gitignore @@ -37,5 +37,5 @@ custom_uploads/* test app/api/files/specs/file1 app/api/files/specs/file2 -redis -dump.rdb +**/redis-bin + diff --git a/app/api/services/pdfsegmentation/PDFSegmentation.ts b/app/api/services/pdfsegmentation/PDFSegmentation.ts index caefd44002..b18d13b912 100644 --- a/app/api/services/pdfsegmentation/PDFSegmentation.ts +++ b/app/api/services/pdfsegmentation/PDFSegmentation.ts @@ -71,10 +71,6 @@ class PDFSegmentation { Promise.all( Object.keys(tenants.tenants).map(async tenant => { await tenants.run(async () => { - if (!this.segmentationTaskManager) { - await this.start(); - } - const pendingTasks = await this.segmentationTaskManager!.countPendingTasks(); if (pendingTasks > 0) { console.log(`${pendingTasks} tasks are pending`); diff --git a/app/api/services/tasksmanager/RedisServer.ts b/app/api/services/tasksmanager/RedisServer.ts index 434e1ad6ef..d01439d223 100644 --- a/app/api/services/tasksmanager/RedisServer.ts +++ b/app/api/services/tasksmanager/RedisServer.ts @@ -1,4 +1,5 @@ -import { execSync } from 'child_process'; +import { execSync, spawn } from 'child_process'; +import path from 'path'; import fs from 'fs'; import Server from 'redis-server'; @@ -9,8 +10,11 @@ export class RedisServer { pathToBin: string; + redisFolder: string; + constructor(port = 6379) { - this.pathToBin = 'redis/redis-stable/src/redis-server'; + this.redisFolder = path.join(__dirname, 'redis-bin'); + this.pathToBin = path.join(this.redisFolder, 'redis-stable/src/redis-server'); this.downloadRedis(); this.port = port; } @@ -21,34 +25,36 @@ export class RedisServer { } execSync( - `mkdir redis && cd redis + `mkdir ${this.redisFolder} && cd ${this.redisFolder} curl -O http://download.redis.io/redis-stable.tar.gz tar xzvf redis-stable.tar.gz`, { stdio: 'inherit' } ); - execSync('cd redis && tar xzvf redis-stable.tar.gz'); + execSync(`cd ${this.redisFolder} && tar xzvf redis-stable.tar.gz`); execSync( - `cd redis - cd redis-stable - make` + `cd ${this.redisFolder} && + cd redis-stable && + make`, + { stdio: 'inherit' } ); } - async start() { - this.server = new Server({ - port: this.port, - bin: this.pathToBin, - }); + start() { try { - await this.server.open(); + this.server = spawn(this.pathToBin, ['--port', this.port.toString()]); } catch (err) { console.log(err); } } - async stop() { - await this.server.close(); + async stop(): Promise { + return new Promise((resolve, _reject) => { + this.server.on('close', () => { + resolve(); + }); + this.server.kill('SIGINT'); + }); } } diff --git a/app/api/services/tasksmanager/specs/ExternalDummyService.ts b/app/api/services/tasksmanager/specs/ExternalDummyService.ts index bd612b6fcf..bf78dfac2b 100644 --- a/app/api/services/tasksmanager/specs/ExternalDummyService.ts +++ b/app/api/services/tasksmanager/specs/ExternalDummyService.ts @@ -154,10 +154,14 @@ export class ExternalDummyService { } async sendFinishedMessage(task: { task: string; tenant: string }) { - await this.rsmq.sendMessageAsync({ - qname: `${this.serviceName}_results`, - message: JSON.stringify(task), - }); + try { + await this.rsmq.sendMessageAsync({ + qname: `${this.serviceName}_results`, + message: JSON.stringify(task), + }); + } catch (err) { + console.log(err); + } } reset() { diff --git a/app/api/services/tasksmanager/specs/taskManager.spec.ts b/app/api/services/tasksmanager/specs/taskManager.spec.ts index b1df61eed4..c2934244c1 100644 --- a/app/api/services/tasksmanager/specs/taskManager.spec.ts +++ b/app/api/services/tasksmanager/specs/taskManager.spec.ts @@ -160,18 +160,21 @@ describe('taskManager', () => { }); it('should read pending messages', async () => { + await taskManager?.stop(); const task = { task: 'Ceviche', tenant: 'Mercy', results_url: 'http://localhost:1234/results', }; - externalDummyService.setResults({ results: 'Ceviche', }); - await redisServer.stop(); + await externalDummyService.sendFinishedMessage(task); + await redisServer.stop(); + + taskManager = new TaskManager(service); expect(service.processResults).not.toHaveBeenCalled(); await redisServer.start(); From 16fd34665049caab5ac50318df62894fbb0b62ad Mon Sep 17 00:00:00 2001 From: Alberto Casado Torres Date: Mon, 18 Oct 2021 13:22:24 +0200 Subject: [PATCH 39/62] removed redis-server --- app/api/services/tasksmanager/RedisServer.ts | 3 +-- package.json | 3 +-- yarn.lock | 24 ++++++++++---------- 3 files changed, 14 insertions(+), 16 deletions(-) diff --git a/app/api/services/tasksmanager/RedisServer.ts b/app/api/services/tasksmanager/RedisServer.ts index d01439d223..b962729b45 100644 --- a/app/api/services/tasksmanager/RedisServer.ts +++ b/app/api/services/tasksmanager/RedisServer.ts @@ -1,10 +1,9 @@ import { execSync, spawn } from 'child_process'; import path from 'path'; import fs from 'fs'; -import Server from 'redis-server'; export class RedisServer { - server: Server; + server: any; port: number; diff --git a/package.json b/package.json index 9b2722161d..6caf79af2d 100644 --- a/package.json +++ b/package.json @@ -162,9 +162,8 @@ "react-text-selection-handler": "0.1.0", "react-widgets": "v4.5.0", "recharts": "1.3.6", - "redis-mock": "^0.56.3", "redis": "^3.1.2", - "redis-server": "^1.2.2", + "redis-mock": "^0.56.3", "redlock": "^4.2.0", "redux": "^3.7.2", "redux-devtools-extension": "^2.13.2", diff --git a/yarn.lock b/yarn.lock index e5972bad4a..f4bd91660e 100644 --- a/yarn.lock +++ b/yarn.lock @@ -4897,6 +4897,13 @@ debug@^3.0.1, debug@^3.2.6: dependencies: ms "^2.1.1" +debug@^4.2.0, debug@^4.3.2: + version "4.3.2" + resolved "https://registry.yarnpkg.com/debug/-/debug-4.3.2.tgz#f0a49c18ac8779e31d4a0c6029dfb76873c7428b" + integrity sha512-mOp8wKcvj7XxC78zLgw/ZA+6TSgkoE2C/ienthhRD298T7UNwAg9diBpLRxC0mOezLl4B0xV7M0cCO6P/O0Xhw== + dependencies: + ms "2.1.2" + debug@~4.3.1: version "4.3.1" resolved "https://registry.yarnpkg.com/debug/-/debug-4.3.1.tgz#f0d229c505e0c6d8c49ac553d1b13dc183f6b2ee" @@ -11748,11 +11755,6 @@ promise-polyfill@^6.0.1: version "6.1.0" resolved "https://registry.yarnpkg.com/promise-polyfill/-/promise-polyfill-6.1.0.tgz#dfa96943ea9c121fca4de9b5868cb39d3472e057" -promise-queue@^2.2.5: - version "2.2.5" - resolved "https://registry.yarnpkg.com/promise-queue/-/promise-queue-2.2.5.tgz#2f6f5f7c0f6d08109e967659c79b88a9ed5e93b4" - integrity sha1-L29ffA9tCBCelnZZx5uIqe1ek7Q= - promise@^7.1.1: version "7.3.1" resolved "https://registry.yarnpkg.com/promise/-/promise-7.3.1.tgz#064b72602b18f90f29192b8b1bc418ffd1ebd3bf" @@ -12624,6 +12626,11 @@ redis-errors@^1.0.0, redis-errors@^1.2.0: resolved "https://registry.yarnpkg.com/redis-errors/-/redis-errors-1.2.0.tgz#eb62d2adb15e4eaf4610c04afe1529384250abad" integrity sha1-62LSrbFeTq9GEMBK/hUpOEJQq60= +redis-mock@^0.56.3: + version "0.56.3" + resolved "https://registry.yarnpkg.com/redis-mock/-/redis-mock-0.56.3.tgz#e96471bcc774ddc514c2fc49cdd03cab2baecd89" + integrity sha512-ynaJhqk0Qf3Qajnwvy4aOjS4Mdf9IBkELWtjd+NYhpiqu4QCNq6Vf3Q7c++XRPGiKiwRj9HWr0crcwy7EiPjYQ== + redis-parser@^3.0.0: version "3.0.0" resolved "https://registry.yarnpkg.com/redis-parser/-/redis-parser-3.0.0.tgz#b66d828cdcafe6b4b8a428a7def4c6bcac31c8b4" @@ -12631,13 +12638,6 @@ redis-parser@^3.0.0: dependencies: redis-errors "^1.0.0" -redis-server@^1.2.2: - version "1.2.2" - resolved "https://registry.yarnpkg.com/redis-server/-/redis-server-1.2.2.tgz#d0415b549ee06256248bf728b407ddf19999818a" - integrity sha512-pOaSIeSMVFkEFIuaMtpQ3TOr3uI4sUmEHm4ofGks5vTPRseHUszxyIlC70IFjUR9qSeH8o/ARZEM8dqcJmgGJw== - dependencies: - promise-queue "^2.2.5" - redis@^3.0.0: version "3.0.2" resolved "https://registry.yarnpkg.com/redis/-/redis-3.0.2.tgz#bd47067b8a4a3e6a2e556e57f71cc82c7360150a" From cb4515ddf83d89fd25cc2289493139aa46b1f6e3 Mon Sep 17 00:00:00 2001 From: Alberto Casado Torres Date: Mon, 18 Oct 2021 13:47:47 +0200 Subject: [PATCH 40/62] Some refactor and atempting to fix CI redis instalation --- app/api/services/tasksmanager/RedisServer.ts | 1 + .../specs/ExternalDummyService.ts | 19 ++++++++++++------- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/app/api/services/tasksmanager/RedisServer.ts b/app/api/services/tasksmanager/RedisServer.ts index b962729b45..7fcb1388f0 100644 --- a/app/api/services/tasksmanager/RedisServer.ts +++ b/app/api/services/tasksmanager/RedisServer.ts @@ -35,6 +35,7 @@ export class RedisServer { execSync( `cd ${this.redisFolder} && cd redis-stable && + make distclean && make`, { stdio: 'inherit' } ); diff --git a/app/api/services/tasksmanager/specs/ExternalDummyService.ts b/app/api/services/tasksmanager/specs/ExternalDummyService.ts index bf78dfac2b..d0bdbaaf37 100644 --- a/app/api/services/tasksmanager/specs/ExternalDummyService.ts +++ b/app/api/services/tasksmanager/specs/ExternalDummyService.ts @@ -64,28 +64,28 @@ export class ExternalDummyService { return this.redisSMQ; } - async resetQueue() { + async deleteQueues() { try { await this.rsmq.deleteQueueAsync({ qname: `${this.serviceName}_tasks` }); } catch (err) { - console.log(err); - if (err.name !== 'queueNotFound') { + if (err instanceof Error && err.name !== 'queueNotFound') { throw err; } } try { await this.rsmq.deleteQueueAsync({ qname: `${this.serviceName}_results` }); } catch (err) { - console.log(err); - if (err.name !== 'queueNotFound') { + if (err instanceof Error && err.name !== 'queueNotFound') { throw err; } } + } + async createQueues() { try { await this.rsmq.createQueueAsync({ qname: `${this.serviceName}_tasks` }); } catch (err) { - if (err.name !== 'queueExists') { + if (err instanceof Error && err.name !== 'queueExists') { throw err; } } @@ -93,12 +93,17 @@ export class ExternalDummyService { try { await this.rsmq.createQueueAsync({ qname: `${this.serviceName}_results` }); } catch (err) { - if (err.name !== 'queueExists') { + if (err instanceof Error && err.name !== 'queueExists') { throw err; } } } + async resetQueue() { + await this.deleteQueues(); + await this.createQueues(); + } + async readFirstTaskMessage() { const message: RedisSMQ.QueueMessage | {} = await this.rsmq.receiveMessageAsync({ qname: `${this.serviceName}_tasks`, From 277b4a8d74d9331269482f056e99bb6c6061ed3a Mon Sep 17 00:00:00 2001 From: Alberto Casado Torres Date: Mon, 18 Oct 2021 15:18:12 +0200 Subject: [PATCH 41/62] Moved redis installation to a jest pre hook --- .gitignore | 1 + .../pdfsegmentation/PDFSegmentation.ts | 2 +- .../specs/PDFSegmentation.spec.ts | 6 ++-- app/api/services/tasksmanager/RedisServer.ts | 31 ++----------------- app/api/utils/downloadRedis.js | 29 +++++++++++++++++ app/jestServerGlobalSetup.js | 2 ++ 6 files changed, 38 insertions(+), 33 deletions(-) create mode 100644 app/api/utils/downloadRedis.js diff --git a/.gitignore b/.gitignore index 0a9a3ea253..7dbadf1ce1 100644 --- a/.gitignore +++ b/.gitignore @@ -38,4 +38,5 @@ test app/api/files/specs/file1 app/api/files/specs/file2 **/redis-bin +dump.rdb diff --git a/app/api/services/pdfsegmentation/PDFSegmentation.ts b/app/api/services/pdfsegmentation/PDFSegmentation.ts index b18d13b912..f9cc903616 100644 --- a/app/api/services/pdfsegmentation/PDFSegmentation.ts +++ b/app/api/services/pdfsegmentation/PDFSegmentation.ts @@ -19,7 +19,7 @@ class PDFSegmentation { features: Settings | undefined; - batchSize = 1; + batchSize = 10; constructor() { this.segmentationTaskManager = new TaskManager({ diff --git a/app/api/services/pdfsegmentation/specs/PDFSegmentation.spec.ts b/app/api/services/pdfsegmentation/specs/PDFSegmentation.spec.ts index 887b9ac030..2912518189 100644 --- a/app/api/services/pdfsegmentation/specs/PDFSegmentation.spec.ts +++ b/app/api/services/pdfsegmentation/specs/PDFSegmentation.spec.ts @@ -128,7 +128,7 @@ describe('PDFSegmentation', () => { expect(segmentPdfs.segmentationTaskManager?.startTask).toHaveBeenCalledWith({ task: 'documentA.pdf', - tenant: 'tenant1', + tenant: 'tenantOne', }); }); @@ -183,7 +183,7 @@ describe('PDFSegmentation', () => { describe('when the segmentation finsihes', () => { let segmentationExternalService: ExternalDummyService; beforeEach(async () => { - segmentationExternalService = new ExternalDummyService(); + segmentationExternalService = new ExternalDummyService(1235); await segmentationExternalService.start(); }); @@ -214,7 +214,7 @@ describe('PDFSegmentation', () => { await segmentPdfs.processResults({ tenant: tenantOne.name, task: 'documentA.pdf', - data_url: 'http://localhost:1234/results', + data_url: 'http://localhost:1235/results', }); await tenants.run(async () => { diff --git a/app/api/services/tasksmanager/RedisServer.ts b/app/api/services/tasksmanager/RedisServer.ts index 7fcb1388f0..4edef36261 100644 --- a/app/api/services/tasksmanager/RedisServer.ts +++ b/app/api/services/tasksmanager/RedisServer.ts @@ -1,6 +1,4 @@ -import { execSync, spawn } from 'child_process'; -import path from 'path'; -import fs from 'fs'; +import { spawn } from 'child_process'; export class RedisServer { server: any; @@ -12,35 +10,10 @@ export class RedisServer { redisFolder: string; constructor(port = 6379) { - this.redisFolder = path.join(__dirname, 'redis-bin'); - this.pathToBin = path.join(this.redisFolder, 'redis-stable/src/redis-server'); - this.downloadRedis(); + this.pathToBin = 'redis-bin/redis-stable/src/redis-server'; this.port = port; } - downloadRedis() { - if (fs.existsSync(this.pathToBin)) { - return; - } - - execSync( - `mkdir ${this.redisFolder} && cd ${this.redisFolder} - curl -O http://download.redis.io/redis-stable.tar.gz - tar xzvf redis-stable.tar.gz`, - { stdio: 'inherit' } - ); - - execSync(`cd ${this.redisFolder} && tar xzvf redis-stable.tar.gz`); - - execSync( - `cd ${this.redisFolder} && - cd redis-stable && - make distclean && - make`, - { stdio: 'inherit' } - ); - } - start() { try { this.server = spawn(this.pathToBin, ['--port', this.port.toString()]); diff --git a/app/api/utils/downloadRedis.js b/app/api/utils/downloadRedis.js new file mode 100644 index 0000000000..7b980a7665 --- /dev/null +++ b/app/api/utils/downloadRedis.js @@ -0,0 +1,29 @@ +import { execSync } from 'child_process'; +import path from 'path'; +import fs from 'fs'; + +export const downloadRedis = () => { + const pathToBin = path.join(__dirname, 'redis-bin/redis-stable/src/redis-server'); + if (fs.existsSync(pathToBin)) { + return; + } + console.log('Downloading redis...'); + execSync( + `mkdir redis-bin && cd redis-bin + curl -O http://download.redis.io/redis-stable.tar.gz + tar xzvf redis-stable.tar.gz`, + { stdio: 'ignore' } + ); + + execSync('cd redis-bin && tar xzvf redis-stable.tar.gz', { stdio: 'ignore' }); + console.log('Downloading redis... Done'); + console.log('Installing redis...'); + execSync( + `cd redis-bin && + cd redis-stable && + make distclean && + make`, + { stdio: 'ignore' } + ); + console.log('Installing redis... Done'); +}; diff --git a/app/jestServerGlobalSetup.js b/app/jestServerGlobalSetup.js index 25c31c8eb7..c8d7efb186 100644 --- a/app/jestServerGlobalSetup.js +++ b/app/jestServerGlobalSetup.js @@ -1,6 +1,8 @@ import { createMongoInstance as checkMongoVersion } from './api/utils/createMongoInstance.js'; +import { downloadRedis } from './api/utils/downloadRedis.js'; module.exports = async () => { const mongod = await checkMongoVersion(); await mongod.stop(); + downloadRedis(); }; From 2fd8a544f0a9953e67d4dd88cd400ef507b1d7a5 Mon Sep 17 00:00:00 2001 From: Alberto Casado Torres Date: Mon, 18 Oct 2021 15:43:01 +0200 Subject: [PATCH 42/62] removed unused variable --- app/api/services/tasksmanager/RedisServer.ts | 2 -- 1 file changed, 2 deletions(-) diff --git a/app/api/services/tasksmanager/RedisServer.ts b/app/api/services/tasksmanager/RedisServer.ts index 4edef36261..a651f2419e 100644 --- a/app/api/services/tasksmanager/RedisServer.ts +++ b/app/api/services/tasksmanager/RedisServer.ts @@ -7,8 +7,6 @@ export class RedisServer { pathToBin: string; - redisFolder: string; - constructor(port = 6379) { this.pathToBin = 'redis-bin/redis-stable/src/redis-server'; this.port = port; From 52fcb9b105735d40a4a2a4ed0daf054ff6f9c5c4 Mon Sep 17 00:00:00 2001 From: Alberto Casado Torres Date: Mon, 18 Oct 2021 17:50:31 +0200 Subject: [PATCH 43/62] fixed an issue with downloadRedis --- .../specs/ExternalDummyService.ts | 30 ++++++------------- app/api/utils/downloadRedis.js | 4 ++- 2 files changed, 12 insertions(+), 22 deletions(-) diff --git a/app/api/services/tasksmanager/specs/ExternalDummyService.ts b/app/api/services/tasksmanager/specs/ExternalDummyService.ts index d0bdbaaf37..8357f51e26 100644 --- a/app/api/services/tasksmanager/specs/ExternalDummyService.ts +++ b/app/api/services/tasksmanager/specs/ExternalDummyService.ts @@ -64,16 +64,9 @@ export class ExternalDummyService { return this.redisSMQ; } - async deleteQueues() { + async deleteQueue(qname: string) { try { - await this.rsmq.deleteQueueAsync({ qname: `${this.serviceName}_tasks` }); - } catch (err) { - if (err instanceof Error && err.name !== 'queueNotFound') { - throw err; - } - } - try { - await this.rsmq.deleteQueueAsync({ qname: `${this.serviceName}_results` }); + await this.rsmq.deleteQueueAsync({ qname }); } catch (err) { if (err instanceof Error && err.name !== 'queueNotFound') { throw err; @@ -81,17 +74,9 @@ export class ExternalDummyService { } } - async createQueues() { - try { - await this.rsmq.createQueueAsync({ qname: `${this.serviceName}_tasks` }); - } catch (err) { - if (err instanceof Error && err.name !== 'queueExists') { - throw err; - } - } - + async createQueue(qname: string) { try { - await this.rsmq.createQueueAsync({ qname: `${this.serviceName}_results` }); + await this.rsmq.createQueueAsync({ qname }); } catch (err) { if (err instanceof Error && err.name !== 'queueExists') { throw err; @@ -100,8 +85,11 @@ export class ExternalDummyService { } async resetQueue() { - await this.deleteQueues(); - await this.createQueues(); + await this.deleteQueue(`${this.serviceName}_tasks`); + await this.deleteQueue(`${this.serviceName}_results`); + + await this.createQueue(`${this.serviceName}_tasks`); + await this.createQueue(`${this.serviceName}_results`); } async readFirstTaskMessage() { diff --git a/app/api/utils/downloadRedis.js b/app/api/utils/downloadRedis.js index 7b980a7665..da9bdea3cd 100644 --- a/app/api/utils/downloadRedis.js +++ b/app/api/utils/downloadRedis.js @@ -1,9 +1,11 @@ +/* eslint-disable no-console */ import { execSync } from 'child_process'; import path from 'path'; import fs from 'fs'; export const downloadRedis = () => { - const pathToBin = path.join(__dirname, 'redis-bin/redis-stable/src/redis-server'); + const pathToBin = path.join(__dirname, '../../../redis-bin/redis-stable/src/redis-server'); + console.log(pathToBin); if (fs.existsSync(pathToBin)) { return; } From ab41bbec7915ade25cbb2981b73e4cd999ca6c3d Mon Sep 17 00:00:00 2001 From: Alberto Casado Torres Date: Tue, 19 Oct 2021 09:27:28 +0200 Subject: [PATCH 44/62] refactored PDFsegmentation to segment all files regardless of template --- .../pdfsegmentation/PDFSegmentation.ts | 37 +++++-------------- .../specs/PDFSegmentation.spec.ts | 9 ----- .../pdfsegmentation/specs/fixtures.ts | 17 --------- 3 files changed, 10 insertions(+), 53 deletions(-) diff --git a/app/api/services/pdfsegmentation/PDFSegmentation.ts b/app/api/services/pdfsegmentation/PDFSegmentation.ts index f9cc903616..1e87639c80 100644 --- a/app/api/services/pdfsegmentation/PDFSegmentation.ts +++ b/app/api/services/pdfsegmentation/PDFSegmentation.ts @@ -67,29 +67,24 @@ class PDFSegmentation { }, message.tenant); }; - segmentPdfs = async () => - Promise.all( + segmentPdfs = async () => { + const pendingTasks = await this.segmentationTaskManager!.countPendingTasks(); + if (pendingTasks > 0) { + console.log(`${pendingTasks} tasks are pending`); + return; + } + + await Promise.all( Object.keys(tenants.tenants).map(async tenant => { await tenants.run(async () => { - const pendingTasks = await this.segmentationTaskManager!.countPendingTasks(); - if (pendingTasks > 0) { - console.log(`${pendingTasks} tasks are pending`); - return; - } - const settingsValues = await settings.get(); - const metadataExtractionFeatureToggle = settingsValues?.features?.metadataExtraction; const segmentationServiceConfig = settingsValues?.features?.segmentation; - if (!metadataExtractionFeatureToggle || !segmentationServiceConfig) { + if (!segmentationServiceConfig) { console.log('no configuration'); return; } - const templatesWithInformationExtraction = metadataExtractionFeatureToggle?.map( - x => x.template - ); - const filesToSegment = await filesModel.db.aggregate([ { $match: { @@ -111,19 +106,6 @@ class PDFSegmentation { }, }, }, - { - $lookup: { - from: 'entities', - localField: 'entity', - foreignField: 'sharedId', - as: 'entity', - }, - }, - { - $match: { - 'entity.template': { $in: templatesWithInformationExtraction }, - }, - }, { $limit: this.batchSize, }, @@ -138,6 +120,7 @@ class PDFSegmentation { }, tenant); }) ); + }; } export { PDFSegmentation }; diff --git a/app/api/services/pdfsegmentation/specs/PDFSegmentation.spec.ts b/app/api/services/pdfsegmentation/specs/PDFSegmentation.spec.ts index 2912518189..6890426875 100644 --- a/app/api/services/pdfsegmentation/specs/PDFSegmentation.spec.ts +++ b/app/api/services/pdfsegmentation/specs/PDFSegmentation.spec.ts @@ -1,6 +1,5 @@ import { testingDB, fixturer } from 'api/utils/testing_db'; import { - fixturesFilesWithtMixedInformationExtraction, fixturesOneFile, fixturesOtherFile, fixturesPdfNameA, @@ -103,14 +102,6 @@ describe('PDFSegmentation', () => { expect(request.uploadFile).toHaveBeenCalledTimes(10); }); - it('should send pdfs only from templates with the information extraction on', async () => { - await fixturer.clearAllAndLoad(dbOne, fixturesFilesWithtMixedInformationExtraction); - - await segmentPdfs.segmentPdfs(); - - expect(request.uploadFile).toHaveBeenCalledTimes(2); - }); - it('should send pdfs from different tenants with the information extraction on', async () => { await fixturer.clearAllAndLoad(dbOne, fixturesOneFile); await fixturer.clearAllAndLoad(dbTwo, fixturesOtherFile); diff --git a/app/api/services/pdfsegmentation/specs/fixtures.ts b/app/api/services/pdfsegmentation/specs/fixtures.ts index b9386093ec..99cfb77c9d 100644 --- a/app/api/services/pdfsegmentation/specs/fixtures.ts +++ b/app/api/services/pdfsegmentation/specs/fixtures.ts @@ -107,28 +107,11 @@ const fixturesTwelveFiles: DBFixture = { ], }; -const fixturesFilesWithtMixedInformationExtraction: DBFixture = { - settings, - entities: [ - factory.entity('A1', 'templateToSegmentA', {}), - factory.entity('B1', 'templateToSegmentB', {}), - factory.entity('B2', 'templateNotSegmentC', {}), - factory.entity('B3', 'templateNotSegmentC', {}), - ], - files: [ - factory.file('F1', 'A1', 'document', fixturesPdfNameA), - factory.file('F2', 'B1', 'document', fixturesPdfNameA), - factory.file('F3', 'B2', 'document', fixturesPdfNameA), - factory.file('F4', 'B3', 'document', fixturesPdfNameA), - ], -}; - export { fixturesPdfNameA, fixturesPdfNameB, fixturesOneFile, fixturesOtherFile, fixturesTwelveFiles, - fixturesFilesWithtMixedInformationExtraction, fixturesFiveFiles, }; From 127ba72ba23e4c01f9abb79770a2bc035b47f149 Mon Sep 17 00:00:00 2001 From: Alberto Casado Torres Date: Tue, 19 Oct 2021 10:19:35 +0200 Subject: [PATCH 45/62] Fixed some errors typing --- app/api/services/tasksmanager/TaskManager.ts | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/app/api/services/tasksmanager/TaskManager.ts b/app/api/services/tasksmanager/TaskManager.ts index fc9e57f443..8735510d2d 100644 --- a/app/api/services/tasksmanager/TaskManager.ts +++ b/app/api/services/tasksmanager/TaskManager.ts @@ -50,19 +50,19 @@ export class TaskManager { } subscribeToEvents() { - this.redisClient.on('error', error => { - if (error.code !== 'ECONNREFUSED') { + this.redisClient.on('error', (error: any | undefined) => { + if (error && error.code !== 'ECONNREFUSED') { throw error; } }); this.redisClient.on('connect', () => { - this.redisSMQ.createQueue({ qname: this.taskQueue }, err => { + this.redisSMQ.createQueue({ qname: this.taskQueue }, (err: Error | undefined) => { if (err && err.name !== 'queueExists') { throw err; } }); - this.redisSMQ.createQueue({ qname: this.resultsQueue }, err => { + this.redisSMQ.createQueue({ qname: this.resultsQueue }, (err: Error | undefined) => { if (err && err.name !== 'queueExists') { throw err; } From 66925249c449921c71a3b3052c4218bfce5fd3d6 Mon Sep 17 00:00:00 2001 From: Alberto Casado Torres Date: Tue, 19 Oct 2021 13:22:55 +0200 Subject: [PATCH 46/62] General refactor, fixed PDFsegemntation tests database setup --- .../pdfsegmentation/PDFSegmentation.ts | 61 ++++++++++--------- .../pdfsegmentation/segmentationModel.js | 4 +- .../specs/PDFSegmentation.spec.ts | 25 +++++--- .../pdfsegmentation/specs/fixtures.ts | 4 +- .../specs/ExternalDummyService.ts | 2 +- app/server.js | 2 +- 6 files changed, 55 insertions(+), 43 deletions(-) diff --git a/app/api/services/pdfsegmentation/PDFSegmentation.ts b/app/api/services/pdfsegmentation/PDFSegmentation.ts index 1e87639c80..0bf4174674 100644 --- a/app/api/services/pdfsegmentation/PDFSegmentation.ts +++ b/app/api/services/pdfsegmentation/PDFSegmentation.ts @@ -6,14 +6,15 @@ import { FileType } from 'shared/types/fileType'; import { Settings } from 'shared/types/settingsType'; import settings from 'api/settings/settings'; import { tenants } from 'api/tenants/tenantContext'; -import { SegmentationModel } from './segmentationModel'; import { ObjectIdSchema } from 'shared/types/commonTypes'; import request from 'shared/JSONRequest'; +import { handleError } from 'api/utils'; +import { SegmentationModel } from './segmentationModel'; class PDFSegmentation { SERVICE_NAME = 'segmentation'; - public segmentationTaskManager: TaskManager | undefined; + public segmentationTaskManager: TaskManager; templatesWithInformationExtraction: string[] | undefined; @@ -24,53 +25,58 @@ class PDFSegmentation { constructor() { this.segmentationTaskManager = new TaskManager({ serviceName: this.SERVICE_NAME, + processResults: this.processResults, }); } segmentOnePdf = async (file: FileType, serviceUrl: string, tenant: string) => { - if (!this.segmentationTaskManager) { + if (!file.filename) { return; } - if (!file || !file.filename) { + const fileBuffer = fs.readFileSync(uploadsPath(file.filename)); + try { + await request.uploadFile(serviceUrl, file.filename, fileBuffer); + } catch { + handleError(`Error uploading file to segmentation service, tenant: ${tenant}`); return; } - console.log('segmentOnePdf', file.filename); - const fileBuffer = fs.readFileSync(uploadsPath(file.filename)); - await request.uploadFile(serviceUrl, file.filename, fileBuffer); const task = { task: file.filename, tenant, }; - console.log('segmentOnePdf task'); + await this.segmentationTaskManager.startTask(task); - console.log('store process'); - const segmentationCreated = await this.storeProcess(file._id!, file.filename); - console.log(segmentationCreated); + await this.storeProcess(file._id!, file.filename); }; - storeProcess = async (fileID: ObjectIdSchema, fileName: string) => - SegmentationModel.save({ fileID, fileName }); + storeProcess = async (fileID: ObjectIdSchema, filename: string) => + SegmentationModel.save({ fileID, filename }); processResults = async (message: ResultsMessage) => { - const response = await request.get(message.data_url); - await tenants.run(async () => { - const [segmentation] = await SegmentationModel.get({ fileName: message.task }); - console.log('processing results', message.task); - await SegmentationModel.save({ - ...segmentation, - segmentation: response.json, - autoexpire: null, - status: 'completed', - }); - }, message.tenant); + try { + const response = await request.get(message.data_url); + + await tenants.run(async () => { + const [segmentation] = await SegmentationModel.get({ filename: message.task }); + // eslint-disable-next-line camelcase + const { paragraphs, page_height, page_width } = JSON.parse(response.json); + await SegmentationModel.save({ + ...segmentation, + segmentation: { page_height, page_width, paragraphs }, + autoexpire: null, + status: 'completed', + }); + }, message.tenant); + } catch (error) { + handleError(error); + } }; segmentPdfs = async () => { const pendingTasks = await this.segmentationTaskManager!.countPendingTasks(); if (pendingTasks > 0) { - console.log(`${pendingTasks} tasks are pending`); return; } @@ -81,7 +87,6 @@ class PDFSegmentation { const segmentationServiceConfig = settingsValues?.features?.segmentation; if (!segmentationServiceConfig) { - console.log('no configuration'); return; } @@ -93,7 +98,7 @@ class PDFSegmentation { }, { $lookup: { - from: 'segmentation', + from: 'segmentations', localField: '_id', foreignField: 'fileID', as: 'segmentation', @@ -111,8 +116,6 @@ class PDFSegmentation { }, ]); - console.log('filesToSegment', filesToSegment.length); - for (let i = 0; i < filesToSegment.length; i += 1) { // eslint-disable-next-line no-await-in-loop await this.segmentOnePdf(filesToSegment[i], segmentationServiceConfig.url, tenant); diff --git a/app/api/services/pdfsegmentation/segmentationModel.js b/app/api/services/pdfsegmentation/segmentationModel.js index 13201bb9fb..e7d7aecba0 100644 --- a/app/api/services/pdfsegmentation/segmentationModel.js +++ b/app/api/services/pdfsegmentation/segmentationModel.js @@ -20,7 +20,7 @@ const props = { ], }, file: { type: mongoose.Schema.Types.ObjectId, ref: 'File' }, - fileName: { type: String }, + filename: { type: String }, }; const mongoSchema = new mongoose.Schema(props, { @@ -28,6 +28,6 @@ const mongoSchema = new mongoose.Schema(props, { strict: false, }); -const SegmentationModel = instanceModel('segmentation', mongoSchema); +const SegmentationModel = instanceModel('segmentations', mongoSchema); export { SegmentationModel }; diff --git a/app/api/services/pdfsegmentation/specs/PDFSegmentation.spec.ts b/app/api/services/pdfsegmentation/specs/PDFSegmentation.spec.ts index 6890426875..02b9512943 100644 --- a/app/api/services/pdfsegmentation/specs/PDFSegmentation.spec.ts +++ b/app/api/services/pdfsegmentation/specs/PDFSegmentation.spec.ts @@ -1,4 +1,5 @@ -import { testingDB, fixturer } from 'api/utils/testing_db'; +import { fixturer, testingDB, createNewMongoDB } from 'api/utils/testing_db'; +import { MongoMemoryServer } from 'mongodb-memory-server'; import { fixturesOneFile, fixturesOtherFile, @@ -48,16 +49,24 @@ describe('PDFSegmentation', () => { let dbTwo: Db; let fileA: Buffer; let fileB: Buffer; + let mongod: MongoMemoryServer; afterAll(async () => { - await testingDB.disconnect(); + await DB.disconnect(); + await mongod.stop(); + }); + + beforeAll(async () => { + mongod = await createNewMongoDB(); + const mongoUri = mongod.getUri(); + await DB.connect(mongoUri); }); beforeEach(async () => { segmentPdfs = new PDFSegmentation(); - await DB.connect(); dbOne = DB.connectionForDB(tenantOne.dbName).db; dbTwo = DB.connectionForDB(tenantTwo.dbName).db; + tenants.tenants = { tenantOne }; fileA = fs.readFileSync(`app/api/services/pdfsegmentation/specs/uploads/${fixturesPdfNameA}`); fileB = fs.readFileSync(`app/api/services/pdfsegmentation/specs/uploads/${fixturesPdfNameA}`); @@ -130,16 +139,16 @@ describe('PDFSegmentation', () => { await tenants.run(async () => { const [segmentation] = await SegmentationModel.get(); expect(segmentation.status).toBe('pending'); - expect(segmentation.fileName).toBe(fixturesPdfNameA); + expect(segmentation.filename).toBe(fixturesPdfNameA); expect(segmentation.fileID).toEqual(fixturesOneFile.files![0]._id); }, 'tenantOne'); }); it('should only send pdfs not already segmented or in the process', async () => { await fixturer.clearAllAndLoad(dbOne, fixturesFiveFiles); - await dbOne.collection('segmentation').insertMany([ + await dbOne.collection('segmentations').insertMany([ { - fileName: fixturesFiveFiles, + filename: fixturesFiveFiles.files![0].filename, fileID: fixturesFiveFiles.files![0]._id, status: 'pending', }, @@ -212,7 +221,7 @@ describe('PDFSegmentation', () => { const segmentations = await SegmentationModel.get(); const [segmentation] = segmentations; expect(segmentation.status).toBe('completed'); - expect(segmentation.fileName).toBe(fixturesPdfNameA); + expect(segmentation.filename).toBe(fixturesPdfNameA); expect(segmentation.fileID).toEqual(fixturesOneFile.files![0]._id); expect(segmentation.autoexpire).toBe(null); @@ -222,7 +231,7 @@ describe('PDFSegmentation', () => { paragraphs: [expect.objectContaining(segmentationData.paragraphs[0])], }) ); - }, 'tenantOne'); + }, tenantOne.name); }); }); }); diff --git a/app/api/services/pdfsegmentation/specs/fixtures.ts b/app/api/services/pdfsegmentation/specs/fixtures.ts index 99cfb77c9d..d5d009609d 100644 --- a/app/api/services/pdfsegmentation/specs/fixtures.ts +++ b/app/api/services/pdfsegmentation/specs/fixtures.ts @@ -50,9 +50,9 @@ const fixturesOneFile: DBFixture = { }; const fixturesOtherFile: DBFixture = { - entities: [factory.entity('A1', 'templateToSegmentB')], + entities: [factory.entity('A2', 'templateToSegmentB')], settings: otherSettings, - files: [factory.file('F1', 'A1', 'document', fixturesPdfNameB)], + files: [factory.file('F2', 'A2', 'document', fixturesPdfNameB)], }; const fixturesFiveFiles: DBFixture = { diff --git a/app/api/services/tasksmanager/specs/ExternalDummyService.ts b/app/api/services/tasksmanager/specs/ExternalDummyService.ts index 8357f51e26..3a2dbfc554 100644 --- a/app/api/services/tasksmanager/specs/ExternalDummyService.ts +++ b/app/api/services/tasksmanager/specs/ExternalDummyService.ts @@ -49,7 +49,7 @@ export class ExternalDummyService { }); this.app.get('/results', (_req, res) => { - res.send(this.results); + res.json(JSON.stringify(this.results)); }); } diff --git a/app/server.js b/app/server.js index 98380eefc7..62ac602fb2 100644 --- a/app/server.js +++ b/app/server.js @@ -26,7 +26,7 @@ import { migrator } from './api/migrations/migrator'; import settings from './api/settings'; import syncWorker from './api/sync/syncWorker'; import errorHandlingMiddleware from './api/utils/error_handling_middleware'; -import handleError from './api/utils/handleError.js'; +import { handleError } from './api/utils/handleError.js'; import { Repeater } from './api/utils/Repeater'; import serverRenderingRoutes from './react/server.js'; import { DB } from './api/odm'; From 9a96c3aaad533c24203f6c5a667d7a070648cf0f Mon Sep 17 00:00:00 2001 From: Alberto Casado Torres Date: Tue, 19 Oct 2021 13:27:33 +0200 Subject: [PATCH 47/62] deleted unused var --- app/api/services/pdfsegmentation/specs/PDFSegmentation.spec.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/api/services/pdfsegmentation/specs/PDFSegmentation.spec.ts b/app/api/services/pdfsegmentation/specs/PDFSegmentation.spec.ts index 02b9512943..27106fe656 100644 --- a/app/api/services/pdfsegmentation/specs/PDFSegmentation.spec.ts +++ b/app/api/services/pdfsegmentation/specs/PDFSegmentation.spec.ts @@ -1,4 +1,4 @@ -import { fixturer, testingDB, createNewMongoDB } from 'api/utils/testing_db'; +import { fixturer, createNewMongoDB } from 'api/utils/testing_db'; import { MongoMemoryServer } from 'mongodb-memory-server'; import { fixturesOneFile, From 3825a84052b479b37c6545192172662454ae03c3 Mon Sep 17 00:00:00 2001 From: Alberto Casado Torres Date: Tue, 19 Oct 2021 15:34:29 +0200 Subject: [PATCH 48/62] renamed RepeatWith to DistributedLoop --- .../{RepeatWith.ts => DistributedLoop.ts} | 4 +-- ...thLock.spec.js => distributedLoop.spec.js} | 26 +++++++++---------- app/server.js | 4 +-- 3 files changed, 17 insertions(+), 17 deletions(-) rename app/api/services/tasksmanager/{RepeatWith.ts => DistributedLoop.ts} (96%) rename app/api/services/tasksmanager/specs/{repeatWithLock.spec.js => distributedLoop.spec.js} (84%) diff --git a/app/api/services/tasksmanager/RepeatWith.ts b/app/api/services/tasksmanager/DistributedLoop.ts similarity index 96% rename from app/api/services/tasksmanager/RepeatWith.ts rename to app/api/services/tasksmanager/DistributedLoop.ts index b43ac434d4..4a7c1ea5f1 100644 --- a/app/api/services/tasksmanager/RepeatWith.ts +++ b/app/api/services/tasksmanager/DistributedLoop.ts @@ -2,7 +2,7 @@ import Redis from 'redis'; import Redlock from 'redlock'; import { handleError } from 'api/utils/handleError'; -export class RepeatWith { +export class DistributedLoop { private lockName: string; private task: () => void; @@ -106,7 +106,7 @@ export class RepeatWith { await this.runTask(); await lock.unlock(); } catch (error) { - if (error && error.name !== 'LockError') { + if (error instanceof Error && error.name !== 'LockError') { throw error; } } diff --git a/app/api/services/tasksmanager/specs/repeatWithLock.spec.js b/app/api/services/tasksmanager/specs/distributedLoop.spec.js similarity index 84% rename from app/api/services/tasksmanager/specs/repeatWithLock.spec.js rename to app/api/services/tasksmanager/specs/distributedLoop.spec.js index cdc268f8b3..472b863646 100644 --- a/app/api/services/tasksmanager/specs/repeatWithLock.spec.js +++ b/app/api/services/tasksmanager/specs/distributedLoop.spec.js @@ -1,10 +1,10 @@ import * as errorHelper from 'api/utils/handleError'; import waitForExpect from 'wait-for-expect'; -import { RepeatWith } from '../RepeatWith'; +import { DistributedLoop } from '../DistributedLoop'; import { RedisServer } from '../RedisServer'; /* eslint-disable max-statements */ -describe('RepeatWithLock', () => { +describe('DistributedLoopLock', () => { let finishTask; let task; let rejectTask; @@ -43,8 +43,8 @@ describe('RepeatWithLock', () => { } it('should run one task at a time', async () => { - const nodeOne = new RepeatWith('my_locked_task', task, { delayTimeBetweenTasks: 0 }); - const nodeTwo = new RepeatWith('my_locked_task', task, { delayTimeBetweenTasks: 0 }); + const nodeOne = new DistributedLoop('my_locked_task', task, { delayTimeBetweenTasks: 0 }); + const nodeTwo = new DistributedLoop('my_locked_task', task, { delayTimeBetweenTasks: 0 }); await nodeOne.start(); await nodeTwo.start(); await waitForExpect(async () => { @@ -66,7 +66,7 @@ describe('RepeatWithLock', () => { it('should wait until the redis server is available to execute the task', async () => { await redisServer.stop(); - const nodeOne = new RepeatWith('my_locked_task', task, { + const nodeOne = new DistributedLoop('my_locked_task', task, { retryDelay: 20, delayTimeBetweenTasks: 0, }); @@ -96,7 +96,7 @@ describe('RepeatWithLock', () => { it('should continue executing tasks after redis was unavailable for a while', async () => { const unstableRedisServer = new RedisServer(6371); await unstableRedisServer.start(); - const nodeOne = new RepeatWith('my_locked_task', task, { + const nodeOne = new DistributedLoop('my_locked_task', task, { retryDelay: 20, delayTimeBetweenTasks: 0, port: 6371, @@ -127,11 +127,11 @@ describe('RepeatWithLock', () => { }); it('should handle when a lock fails for too many retries', async () => { - const nodeOne = new RepeatWith('my_long_locked_task', task, { + const nodeOne = new DistributedLoop('my_long_locked_task', task, { retryDelay: 20, delayTimeBetweenTasks: 0, }); - const nodeTwo = new RepeatWith('my_long_locked_task', task, { + const nodeTwo = new DistributedLoop('my_long_locked_task', task, { retryDelay: 20, delayTimeBetweenTasks: 0, }); @@ -150,11 +150,11 @@ describe('RepeatWithLock', () => { }); it('should handle when a node fails to unlock the lock', async () => { - const nodeOne = new RepeatWith('my_locked_task', task, { + const nodeOne = new DistributedLoop('my_locked_task', task, { maxLockTime: 50, delayTimeBetweenTasks: 0, }); - const nodeTwo = new RepeatWith('my_locked_task', task, { + const nodeTwo = new DistributedLoop('my_locked_task', task, { maxLockTime: 50, delayTimeBetweenTasks: 0, }); @@ -176,7 +176,7 @@ describe('RepeatWithLock', () => { it('should continue executing the task if one task fails', async () => { jest.spyOn(errorHelper, 'handleError').mockImplementation(() => {}); - const nodeOne = new RepeatWith('my_locked_task', task, { + const nodeOne = new DistributedLoop('my_locked_task', task, { maxLockTime: 500, delayTimeBetweenTasks: 0, }); @@ -203,12 +203,12 @@ describe('RepeatWithLock', () => { // eslint-disable-next-line max-statements it('should add a delay between task executions', async () => { - const nodeOne = new RepeatWith('my_locked_task', task, { + const nodeOne = new DistributedLoop('my_locked_task', task, { maxLockTime: 50, delayTimeBetweenTasks: 50, retryDelay: 20, }); - const nodeTwo = new RepeatWith('my_locked_task', task, { + const nodeTwo = new DistributedLoop('my_locked_task', task, { maxLockTime: 50, delayTimeBetweenTasks: 50, retryDelay: 20, diff --git a/app/server.js b/app/server.js index 62ac602fb2..2c51007187 100644 --- a/app/server.js +++ b/app/server.js @@ -11,7 +11,7 @@ import path from 'path'; import { TaskProvider } from 'shared/tasks/tasks'; import { PDFSegmentation } from 'api/services/pdfsegmentation/PDFSegmentation'; -import { RepeatWith } from 'api/services/tasksmanager/RepeatWith'; +import { DistributedLoop } from 'api/services/tasksmanager/DistributedLoop'; import { appContextMiddleware } from 'api/utils/appContextMiddleware'; import { requestIdMiddleware } from 'api/utils/requestIdMiddleware'; @@ -154,7 +154,7 @@ DB.connect(config.DBHOST, dbAuth).then(async () => { topicClassificationRepeater.start(); const segmentationConnector = new PDFSegmentation(); - const segmentationRepeater = new RepeatWith( + const segmentationRepeater = new DistributedLoop( 'segmentation_repeat', segmentationConnector.segmentPdfs, { port: config.redis.port, host: config.redis.host, delayTimeBetweenTasks: 2000 } From 0b977f77f23bbe0bc2fc2c0af455179772e90a43 Mon Sep 17 00:00:00 2001 From: Alberto Casado Torres Date: Tue, 19 Oct 2021 17:36:05 +0200 Subject: [PATCH 49/62] Changed task structure to contain params --- app/api/services/pdfsegmentation/PDFSegmentation.ts | 7 +++++-- .../pdfsegmentation/specs/PDFSegmentation.spec.ts | 8 +++++--- app/api/services/tasksmanager/TaskManager.ts | 6 ++++++ app/api/services/tasksmanager/specs/taskManager.spec.ts | 5 ++++- 4 files changed, 20 insertions(+), 6 deletions(-) diff --git a/app/api/services/pdfsegmentation/PDFSegmentation.ts b/app/api/services/pdfsegmentation/PDFSegmentation.ts index 0bf4174674..e14c1282b2 100644 --- a/app/api/services/pdfsegmentation/PDFSegmentation.ts +++ b/app/api/services/pdfsegmentation/PDFSegmentation.ts @@ -43,8 +43,11 @@ class PDFSegmentation { } const task = { - task: file.filename, + task: this.SERVICE_NAME, tenant, + params: { + filename: file.filename, + }, }; await this.segmentationTaskManager.startTask(task); @@ -59,7 +62,7 @@ class PDFSegmentation { const response = await request.get(message.data_url); await tenants.run(async () => { - const [segmentation] = await SegmentationModel.get({ filename: message.task }); + const [segmentation] = await SegmentationModel.get({ filename: message.params!.filename }); // eslint-disable-next-line camelcase const { paragraphs, page_height, page_width } = JSON.parse(response.json); await SegmentationModel.save({ diff --git a/app/api/services/pdfsegmentation/specs/PDFSegmentation.spec.ts b/app/api/services/pdfsegmentation/specs/PDFSegmentation.spec.ts index 27106fe656..cdc1d213fd 100644 --- a/app/api/services/pdfsegmentation/specs/PDFSegmentation.spec.ts +++ b/app/api/services/pdfsegmentation/specs/PDFSegmentation.spec.ts @@ -14,10 +14,10 @@ import fs from 'fs'; import { tenants } from 'api/tenants/tenantContext'; import { DB } from 'api/odm'; import { Db } from 'mongodb'; +import request from 'shared/JSONRequest'; import { PDFSegmentation } from '../PDFSegmentation'; import { SegmentationModel } from '../segmentationModel'; -import request from 'shared/JSONRequest'; import { ExternalDummyService } from '../../tasksmanager/specs/ExternalDummyService'; jest.mock('api/services/tasksmanager/TaskManager.ts'); @@ -127,8 +127,9 @@ describe('PDFSegmentation', () => { await segmentPdfs.segmentPdfs(); expect(segmentPdfs.segmentationTaskManager?.startTask).toHaveBeenCalledWith({ - task: 'documentA.pdf', + params: { filename: 'documentA.pdf' }, tenant: 'tenantOne', + task: 'segmentation', }); }); @@ -213,8 +214,9 @@ describe('PDFSegmentation', () => { await segmentPdfs.processResults({ tenant: tenantOne.name, - task: 'documentA.pdf', + params: { filename: 'documentA.pdf' }, data_url: 'http://localhost:1235/results', + task: 'segmentation', }); await tenants.run(async () => { diff --git a/app/api/services/tasksmanager/TaskManager.ts b/app/api/services/tasksmanager/TaskManager.ts index 8735510d2d..68aba598dc 100644 --- a/app/api/services/tasksmanager/TaskManager.ts +++ b/app/api/services/tasksmanager/TaskManager.ts @@ -7,12 +7,18 @@ import { config } from 'api/config'; export interface TaskMessage { tenant: string; task: string; + params?: { + [key: string]: any; + }; } /* eslint-disable camelcase */ export interface ResultsMessage { tenant: string; task: string; + params?: { + [key: string]: any; + }; data_url?: string; file_url?: string; } diff --git a/app/api/services/tasksmanager/specs/taskManager.spec.ts b/app/api/services/tasksmanager/specs/taskManager.spec.ts index c2934244c1..af1e1951dd 100644 --- a/app/api/services/tasksmanager/specs/taskManager.spec.ts +++ b/app/api/services/tasksmanager/specs/taskManager.spec.ts @@ -152,7 +152,10 @@ describe('taskManager', () => { } await redisServer.start(); - await new Promise(resolve => setTimeout(resolve, 200)); // wait for redis to connect + + await waitForExpect(async () => { + expect(taskManager?.redisClient.connected).toBe(true); + }); await taskManager?.startTask(task); const message = await externalDummyService.readFirstTaskMessage(); From f260d8b734afb3270c37d035105fbbfd7f53e7da Mon Sep 17 00:00:00 2001 From: Alberto Casado Torres Date: Wed, 20 Oct 2021 10:51:18 +0200 Subject: [PATCH 50/62] PDFsegmentation service requesting and storing the xml --- .../pdfsegmentation/PDFSegmentation.ts | 58 ++++++++++++------- .../specs/PDFSegmentation.spec.ts | 55 ++++++++++++------ .../pdfsegmentation/specs/uploads/test.xml | 26 +++++++++ .../specs/ExternalDummyService.ts | 24 +++++++- 4 files changed, 122 insertions(+), 41 deletions(-) create mode 100644 app/api/services/pdfsegmentation/specs/uploads/test.xml diff --git a/app/api/services/pdfsegmentation/PDFSegmentation.ts b/app/api/services/pdfsegmentation/PDFSegmentation.ts index e14c1282b2..dbb3366aef 100644 --- a/app/api/services/pdfsegmentation/PDFSegmentation.ts +++ b/app/api/services/pdfsegmentation/PDFSegmentation.ts @@ -1,7 +1,9 @@ import { TaskManager, ResultsMessage } from 'api/services/tasksmanager/TaskManager'; -import { uploadsPath } from 'api/files'; +import { uploadsPath, fileFromReadStream, createDirIfNotExists } from 'api/files'; +import { Readable } from 'stream'; import filesModel from 'api/files/filesModel'; import fs from 'fs'; +import path from 'path'; import { FileType } from 'shared/types/fileType'; import { Settings } from 'shared/types/settingsType'; import settings from 'api/settings/settings'; @@ -57,26 +59,6 @@ class PDFSegmentation { storeProcess = async (fileID: ObjectIdSchema, filename: string) => SegmentationModel.save({ fileID, filename }); - processResults = async (message: ResultsMessage) => { - try { - const response = await request.get(message.data_url); - - await tenants.run(async () => { - const [segmentation] = await SegmentationModel.get({ filename: message.params!.filename }); - // eslint-disable-next-line camelcase - const { paragraphs, page_height, page_width } = JSON.parse(response.json); - await SegmentationModel.save({ - ...segmentation, - segmentation: { page_height, page_width, paragraphs }, - autoexpire: null, - status: 'completed', - }); - }, message.tenant); - } catch (error) { - handleError(error); - } - }; - segmentPdfs = async () => { const pendingTasks = await this.segmentationTaskManager!.countPendingTasks(); if (pendingTasks > 0) { @@ -127,6 +109,40 @@ class PDFSegmentation { }) ); }; + + processResults = async (message: ResultsMessage) => { + try { + const response = await request.get(message.data_url); + const fileStream = ((await fetch(message.file_url!)).body as unknown) as Readable; + + if (!fileStream) { + throw new Error(`Error requesting segmentation results, tenant: ${message.tenant}`); + } + + await tenants.run(async () => { + await createDirIfNotExists(path.join(uploadsPath(), this.SERVICE_NAME)); + const filePath = path.join(uploadsPath(), this.SERVICE_NAME); + const fileName = `${path.basename( + message.params!.filename, + path.extname(message.params!.filename) + )}.xml`; + + await fileFromReadStream(fileName, fileStream, filePath); + + const [segmentation] = await SegmentationModel.get({ filename: message.params!.filename }); + // eslint-disable-next-line camelcase + const { paragraphs, page_height, page_width } = JSON.parse(response.json); + await SegmentationModel.save({ + ...segmentation, + segmentation: { page_height, page_width, paragraphs }, + autoexpire: null, + status: 'completed', + }); + }, message.tenant); + } catch (error) { + handleError(error); + } + }; } export { PDFSegmentation }; diff --git a/app/api/services/pdfsegmentation/specs/PDFSegmentation.spec.ts b/app/api/services/pdfsegmentation/specs/PDFSegmentation.spec.ts index cdc1d213fd..e87c1bb281 100644 --- a/app/api/services/pdfsegmentation/specs/PDFSegmentation.spec.ts +++ b/app/api/services/pdfsegmentation/specs/PDFSegmentation.spec.ts @@ -1,3 +1,4 @@ +/* eslint-disable camelcase */ import { fixturer, createNewMongoDB } from 'api/utils/testing_db'; import { MongoMemoryServer } from 'mongodb-memory-server'; import { @@ -10,6 +11,7 @@ import { } from 'api/services/pdfsegmentation/specs/fixtures'; import fs from 'fs'; +import path from 'path'; import { tenants } from 'api/tenants/tenantContext'; import { DB } from 'api/odm'; @@ -87,9 +89,6 @@ describe('PDFSegmentation', () => { it('should send other pdf to segment', async () => { await fixturer.clearAllAndLoad(dbOne, fixturesOtherFile); - - await segmentPdfs.segmentPdfs(); - await segmentPdfs.segmentPdfs(); expect(request.uploadFile).toHaveBeenCalledWith( 'http://localhost:1234/files', @@ -101,13 +100,6 @@ describe('PDFSegmentation', () => { it('should send 10 pdfs to segment', async () => { await fixturer.clearAllAndLoad(dbOne, fixturesTwelveFiles); await segmentPdfs.segmentPdfs(); - - expect(request.uploadFile).toHaveBeenCalledWith( - 'http://localhost:1234/files', - fixturesPdfNameA, - fileA - ); - expect(request.uploadFile).toHaveBeenCalledTimes(10); }); @@ -183,17 +175,23 @@ describe('PDFSegmentation', () => { describe('when the segmentation finsihes', () => { let segmentationExternalService: ExternalDummyService; + let segmentationData: { + page_width: number; + page_height: number; + paragraphs: object[]; + }; + let segmentationFolder: string; beforeEach(async () => { + await fixturer.clearAllAndLoad(dbOne, fixturesOneFile); + await segmentPdfs.segmentPdfs(); + segmentationFolder = path.join(tenantOne.uploadedDocuments, 'segmentation'); + if (fs.existsSync(segmentationFolder)) { + fs.rmdirSync(segmentationFolder, { recursive: true }); + } segmentationExternalService = new ExternalDummyService(1235); await segmentationExternalService.start(); - }); - afterEach(async () => { - await segmentationExternalService.stop(); - }); - it('should store the segmentation', async () => { - await fixturer.clearAllAndLoad(dbOne, fixturesOneFile); - const segmentationData = { + segmentationData = { page_width: 600, page_height: 1200, paragraphs: [ @@ -207,15 +205,23 @@ describe('PDFSegmentation', () => { }, ], }; - segmentationExternalService.setResults(segmentationData); + segmentationExternalService.setFileResults(path.join(__dirname, '/uploads/test.xml')); + }); - await segmentPdfs.segmentPdfs(); + afterEach(async () => { + await segmentationExternalService.stop(); + if (fs.existsSync(segmentationFolder)) { + fs.rmdirSync(segmentationFolder, { recursive: true }); + } + }); + it('should store the segmentation', async () => { await segmentPdfs.processResults({ tenant: tenantOne.name, params: { filename: 'documentA.pdf' }, data_url: 'http://localhost:1235/results', + file_url: 'http://localhost:1235/file', task: 'segmentation', }); @@ -235,5 +241,16 @@ describe('PDFSegmentation', () => { ); }, tenantOne.name); }); + + it('should store the xml file', async () => { + await segmentPdfs.processResults({ + tenant: tenantOne.name, + params: { filename: 'documentA.pdf' }, + data_url: 'http://localhost:1235/results', + file_url: 'http://localhost:1235/file', + task: 'segmentation', + }); + expect(fs.existsSync(path.join(segmentationFolder, 'documentA.xml'))).toBe(true); + }); }); }); diff --git a/app/api/services/pdfsegmentation/specs/uploads/test.xml b/app/api/services/pdfsegmentation/specs/uploads/test.xml new file mode 100644 index 0000000000..3edbcb9b13 --- /dev/null +++ b/app/api/services/pdfsegmentation/specs/uploads/test.xml @@ -0,0 +1,26 @@ + + + Ceviche + $5.95 + Cold shrimps soup + 450 + + + Fideua + $7.95 + Hot shrimps pasta + 600 + + + Tofu + $8.95 + No one really knows + 1 + + + Cheese Burger + $4.50 + Hot beef sandwich + 950 + + \ No newline at end of file diff --git a/app/api/services/tasksmanager/specs/ExternalDummyService.ts b/app/api/services/tasksmanager/specs/ExternalDummyService.ts index 3a2dbfc554..4556d05426 100644 --- a/app/api/services/tasksmanager/specs/ExternalDummyService.ts +++ b/app/api/services/tasksmanager/specs/ExternalDummyService.ts @@ -1,3 +1,4 @@ +/* eslint-disable camelcase */ import express from 'express'; import RedisSMQ, { QueueMessage } from 'rsmq'; import Redis, { RedisClient } from 'redis'; @@ -26,6 +27,8 @@ export class ExternalDummyService { redisClient: RedisClient | undefined; + fileResults: string | undefined; + private readonly serviceName: string; constructor(port = 1234, serviceName = 'dummy') { @@ -51,12 +54,25 @@ export class ExternalDummyService { this.app.get('/results', (_req, res) => { res.json(JSON.stringify(this.results)); }); + + this.app.get('/file', (_req, res) => { + if (!this.fileResults) { + res.status(404).send('Not found'); + return; + } + + res.sendFile(this.fileResults); + }); } setResults(results: object) { this.results = results; } + setFileResults(file: string) { + this.fileResults = file; + } + get rsmq() { if (!this.redisSMQ) { throw new Error('rsmq is not initialized'); @@ -146,7 +162,13 @@ export class ExternalDummyService { await this.server?.close(); } - async sendFinishedMessage(task: { task: string; tenant: string }) { + async sendFinishedMessage(task: { + task: string; + tenant: string; + params: object; + data_url: string; + file_url: string; + }) { try { await this.rsmq.sendMessageAsync({ qname: `${this.serviceName}_results`, From fd5c112363d4ad0e58254d700897c6b8ce364113 Mon Sep 17 00:00:00 2001 From: Alberto Casado Torres Date: Wed, 20 Oct 2021 12:51:45 +0200 Subject: [PATCH 51/62] some error handling --- .../pdfsegmentation/PDFSegmentation.ts | 104 ++++++++++-------- .../services/tasksmanager/DistributedLoop.ts | 6 +- app/api/services/tasksmanager/TaskManager.ts | 20 ++-- .../specs/ExternalDummyService.ts | 9 +- .../specs/distributedLoop.spec.js | 2 +- .../tasksmanager/specs/taskManager.spec.ts | 38 ++++++- app/api/utils/handleError.js | 6 +- 7 files changed, 114 insertions(+), 71 deletions(-) diff --git a/app/api/services/pdfsegmentation/PDFSegmentation.ts b/app/api/services/pdfsegmentation/PDFSegmentation.ts index dbb3366aef..e8d8be5f6a 100644 --- a/app/api/services/pdfsegmentation/PDFSegmentation.ts +++ b/app/api/services/pdfsegmentation/PDFSegmentation.ts @@ -36,24 +36,30 @@ class PDFSegmentation { return; } - const fileBuffer = fs.readFileSync(uploadsPath(file.filename)); try { - await request.uploadFile(serviceUrl, file.filename, fileBuffer); + await request.uploadFile( + serviceUrl, + file.filename, + fs.readFileSync(uploadsPath(file.filename)) + ); + + const task = { + task: this.SERVICE_NAME, + tenant, + params: { + filename: file.filename, + }, + }; + + await this.segmentationTaskManager.startTask(task); + await this.storeProcess(file._id!, file.filename); } catch { - handleError(`Error uploading file to segmentation service, tenant: ${tenant}`); - return; - } + handleError(`Error segmenting pdf, tenant: ${tenant}, file: ${file.filename}`); - const task = { - task: this.SERVICE_NAME, - tenant, - params: { - filename: file.filename, - }, - }; - - await this.segmentationTaskManager.startTask(task); - await this.storeProcess(file._id!, file.filename); + await new Promise(resolve => { + setTimeout(resolve, 60000); + }); + } }; storeProcess = async (fileID: ObjectIdSchema, filename: string) => @@ -110,38 +116,46 @@ class PDFSegmentation { ); }; - processResults = async (message: ResultsMessage) => { - try { - const response = await request.get(message.data_url); - const fileStream = ((await fetch(message.file_url!)).body as unknown) as Readable; + requestResults = async (message: ResultsMessage) => { + const response = await request.get(message.data_url); + const fileStream = ((await fetch(message.file_url!)).body as unknown) as Readable; - if (!fileStream) { - throw new Error(`Error requesting segmentation results, tenant: ${message.tenant}`); - } + return { data: JSON.parse(response.json), fileStream }; + }; - await tenants.run(async () => { - await createDirIfNotExists(path.join(uploadsPath(), this.SERVICE_NAME)); - const filePath = path.join(uploadsPath(), this.SERVICE_NAME); - const fileName = `${path.basename( - message.params!.filename, - path.extname(message.params!.filename) - )}.xml`; - - await fileFromReadStream(fileName, fileStream, filePath); - - const [segmentation] = await SegmentationModel.get({ filename: message.params!.filename }); - // eslint-disable-next-line camelcase - const { paragraphs, page_height, page_width } = JSON.parse(response.json); - await SegmentationModel.save({ - ...segmentation, - segmentation: { page_height, page_width, paragraphs }, - autoexpire: null, - status: 'completed', - }); - }, message.tenant); - } catch (error) { - handleError(error); - } + storeXML = async (filename: string, fileStream: Readable) => { + await createDirIfNotExists(path.join(uploadsPath(), this.SERVICE_NAME)); + const filePath = path.join(uploadsPath(), this.SERVICE_NAME); + const xmlname = `${path.basename(filename, path.extname(filename))}.xml`; + + await fileFromReadStream(xmlname, fileStream, filePath); + }; + + saveSegmentation = async (filename: string, data: any) => { + const [segmentation] = await SegmentationModel.get({ filename }); + // eslint-disable-next-line camelcase + const { paragraphs, page_height, page_width } = data; + await SegmentationModel.save({ + ...segmentation, + segmentation: { page_height, page_width, paragraphs }, + autoexpire: null, + status: 'completed', + }); + }; + + processResults = async (message: ResultsMessage): Promise => { + let processed = true; + await tenants.run(async () => { + try { + const { data, fileStream } = await this.requestResults(message); + await this.storeXML(message.params!.filename, fileStream); + await this.saveSegmentation(message.params!.filename, data); + } catch (error) { + handleError(error); + processed = false; + } + }, message.tenant); + return processed; }; } diff --git a/app/api/services/tasksmanager/DistributedLoop.ts b/app/api/services/tasksmanager/DistributedLoop.ts index 4a7c1ea5f1..ad9a4ba315 100644 --- a/app/api/services/tasksmanager/DistributedLoop.ts +++ b/app/api/services/tasksmanager/DistributedLoop.ts @@ -66,9 +66,9 @@ export class DistributedLoop { void this.lockTask(); } - async waitBetweenTasks() { + async waitBetweenTasks(delay = this.delayTimeBetweenTasks) { await new Promise(resolve => { - setTimeout(resolve, this.delayTimeBetweenTasks); + setTimeout(resolve, delay); }); } @@ -76,7 +76,7 @@ export class DistributedLoop { try { await this.task(); } catch (error) { - handleError(error); + handleError(error, { useContext: false }); } await this.waitBetweenTasks(); diff --git a/app/api/services/tasksmanager/TaskManager.ts b/app/api/services/tasksmanager/TaskManager.ts index 68aba598dc..30e55a6ce5 100644 --- a/app/api/services/tasksmanager/TaskManager.ts +++ b/app/api/services/tasksmanager/TaskManager.ts @@ -3,6 +3,7 @@ import RedisSMQ, { QueueMessage } from 'rsmq'; import Redis, { RedisClient } from 'redis'; import { Repeater } from 'api/utils/Repeater'; import { config } from 'api/config'; +import { handleError } from 'api/utils'; export interface TaskMessage { tenant: string; @@ -26,7 +27,7 @@ export interface ResultsMessage { export interface Service { serviceName: string; - processResults?: (results: ResultsMessage) => Promise; + processResults?: (results: ResultsMessage) => Promise; processRessultsMessageHiddenTime?: number; } @@ -35,9 +36,9 @@ export class TaskManager { readonly service: Service; - private readonly taskQueue: string; + readonly taskQueue: string; - private readonly resultsQueue: string; + readonly resultsQueue: string; private repeater: Repeater | undefined; @@ -119,12 +120,13 @@ export class TaskManager { if (message.id && this.service.processResults) { const processedMessage = JSON.parse(message.message); - await this.service.processResults(processedMessage); - - await this.redisSMQ?.deleteMessageAsync({ - qname: this.resultsQueue, - id: message.id, - }); + const processed = await this.service.processResults(processedMessage); + if (processed) { + await this.redisSMQ.deleteMessageAsync({ + qname: this.resultsQueue, + id: message.id, + }); + } } } diff --git a/app/api/services/tasksmanager/specs/ExternalDummyService.ts b/app/api/services/tasksmanager/specs/ExternalDummyService.ts index 4556d05426..4bd4692bcc 100644 --- a/app/api/services/tasksmanager/specs/ExternalDummyService.ts +++ b/app/api/services/tasksmanager/specs/ExternalDummyService.ts @@ -5,6 +5,7 @@ import Redis, { RedisClient } from 'redis'; import { Server } from 'http'; import bodyParser from 'body-parser'; import { uploadMiddleware } from 'api/files'; +import { ResultsMessage } from '../TaskManager'; export class ExternalDummyService { private app: express.Application; @@ -162,13 +163,7 @@ export class ExternalDummyService { await this.server?.close(); } - async sendFinishedMessage(task: { - task: string; - tenant: string; - params: object; - data_url: string; - file_url: string; - }) { + async sendFinishedMessage(task: ResultsMessage) { try { await this.rsmq.sendMessageAsync({ qname: `${this.serviceName}_results`, diff --git a/app/api/services/tasksmanager/specs/distributedLoop.spec.js b/app/api/services/tasksmanager/specs/distributedLoop.spec.js index 472b863646..5772294728 100644 --- a/app/api/services/tasksmanager/specs/distributedLoop.spec.js +++ b/app/api/services/tasksmanager/specs/distributedLoop.spec.js @@ -190,7 +190,7 @@ describe('DistributedLoopLock', () => { const someError = { error: 'some error' }; rejectTask(someError); await waitForExpect(async () => { - expect(errorHelper.handleError).toHaveBeenLastCalledWith(someError); + expect(errorHelper.handleError).toHaveBeenLastCalledWith(someError, { useContext: false }); }); finishTask(); diff --git a/app/api/services/tasksmanager/specs/taskManager.spec.ts b/app/api/services/tasksmanager/specs/taskManager.spec.ts index af1e1951dd..7e2dc06c5a 100644 --- a/app/api/services/tasksmanager/specs/taskManager.spec.ts +++ b/app/api/services/tasksmanager/specs/taskManager.spec.ts @@ -19,7 +19,7 @@ describe('taskManager', () => { const redisUrl = `redis://${config.redis.host}:${config.redis.port}`; service = { serviceName: 'KonzNGaboHellKitchen', - processResults: jest.fn(), + processResults: jest.fn().mockImplementation(async () => true), processRessultsMessageHiddenTime: 1, }; redisServer = new RedisServer(port); @@ -33,6 +33,10 @@ describe('taskManager', () => { await new Promise(resolve => setTimeout(resolve, 100)); // wait for redis to be ready }); + beforeEach(() => { + service.processResults = jest.fn().mockImplementation(async () => true); + }); + afterAll(async () => { await taskManager?.stop(); await externalDummyService.stop(); @@ -109,7 +113,7 @@ describe('taskManager', () => { const task = { task: 'Tofu', tenant: 'Gabo', - results_url: 'http://localhost:1234/results', + data_url: 'http://localhost:1234/results', }; await externalDummyService.sendFinishedMessage(task); @@ -118,8 +122,33 @@ describe('taskManager', () => { expect(service.processResults).toHaveBeenCalledWith(task); }); - await new Promise(resolve => setTimeout(resolve, 1001)); // wait for another check for results - expect(service.processResults).toHaveBeenCalledTimes(1); + const queueAttributes = await taskManager?.redisSMQ!.getQueueAttributesAsync({ + qname: taskManager.resultsQueue, + }); + + expect(queueAttributes!.msgs).toBe(0); + }); + + describe('if the processing goes wrong', () => { + it('should not delete the message', async () => { + const task = { + task: 'Tofu', + tenant: 'Gabo', + data_url: 'http://localhost:1234/results', + }; + service.processResults = jest.fn().mockImplementation(async () => false); + await externalDummyService.sendFinishedMessage(task); + + await waitForExpect(async () => { + expect(service.processResults).toHaveBeenCalledWith(task); + }); + + const queueAttributes = await taskManager?.redisSMQ!.getQueueAttributesAsync({ + qname: taskManager.resultsQueue, + }); + + expect(queueAttributes!.msgs).toBe(1); + }); }); }); @@ -134,6 +163,7 @@ describe('taskManager', () => { } catch (e) { expect(e).toEqual(Error('Redis is not connected')); } + await redisServer.start(); }); diff --git a/app/api/utils/handleError.js b/app/api/utils/handleError.js index c019113fcf..e2d6997c4a 100644 --- a/app/api/utils/handleError.js +++ b/app/api/utils/handleError.js @@ -132,7 +132,7 @@ function simplifyError(result, error) { return simplifiedError; } -const handleError = (_error, { req = undefined, uncaught = false } = {}) => { +const handleError = (_error, { req = undefined, uncaught = false, useContext = true } = {}) => { const errorData = typeof _error === 'string' ? createError(_error, 500) : _error; const error = errorData || new Error('Unexpected error has occurred'); @@ -141,7 +141,9 @@ const handleError = (_error, { req = undefined, uncaught = false } = {}) => { } const result = prettifyError(error, { req, uncaught }); - result.requestId = appContext.get('requestId'); + if (useContext) { + result.requestId = appContext.get('requestId'); + } sendLog(result, error, {}); From 10ecc2091853f073d7dffa23585bfc870c5740cb Mon Sep 17 00:00:00 2001 From: Alberto Casado Torres Date: Wed, 20 Oct 2021 14:27:45 +0200 Subject: [PATCH 52/62] deleted unused var --- app/api/services/tasksmanager/TaskManager.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/app/api/services/tasksmanager/TaskManager.ts b/app/api/services/tasksmanager/TaskManager.ts index 30e55a6ce5..f4e7c28684 100644 --- a/app/api/services/tasksmanager/TaskManager.ts +++ b/app/api/services/tasksmanager/TaskManager.ts @@ -3,7 +3,6 @@ import RedisSMQ, { QueueMessage } from 'rsmq'; import Redis, { RedisClient } from 'redis'; import { Repeater } from 'api/utils/Repeater'; import { config } from 'api/config'; -import { handleError } from 'api/utils'; export interface TaskMessage { tenant: string; From 866d020a3ada7594862c2ff6e6fde72d7b687b70 Mon Sep 17 00:00:00 2001 From: Alberto Casado Torres Date: Wed, 20 Oct 2021 14:29:00 +0200 Subject: [PATCH 53/62] fixed type --- app/api/services/tasksmanager/TaskManager.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/api/services/tasksmanager/TaskManager.ts b/app/api/services/tasksmanager/TaskManager.ts index f4e7c28684..351933da4a 100644 --- a/app/api/services/tasksmanager/TaskManager.ts +++ b/app/api/services/tasksmanager/TaskManager.ts @@ -26,7 +26,7 @@ export interface ResultsMessage { export interface Service { serviceName: string; - processResults?: (results: ResultsMessage) => Promise; + processResults?: (results: ResultsMessage) => Promise; processRessultsMessageHiddenTime?: number; } From 48cce933233344c46d1d51e10b9c0155bb55cce0 Mon Sep 17 00:00:00 2001 From: Alberto Casado Torres Date: Thu, 21 Oct 2021 10:01:04 +0200 Subject: [PATCH 54/62] Always deleting result message when an error happens --- .../pdfsegmentation/PDFSegmentation.ts | 19 ++++++++++--- .../specs/PDFSegmentation.spec.ts | 2 ++ app/api/services/tasksmanager/TaskManager.ts | 17 +++++------ .../tasksmanager/specs/taskManager.spec.ts | 28 +------------------ 4 files changed, 27 insertions(+), 39 deletions(-) diff --git a/app/api/services/pdfsegmentation/PDFSegmentation.ts b/app/api/services/pdfsegmentation/PDFSegmentation.ts index e8d8be5f6a..d158639caa 100644 --- a/app/api/services/pdfsegmentation/PDFSegmentation.ts +++ b/app/api/services/pdfsegmentation/PDFSegmentation.ts @@ -143,19 +143,30 @@ class PDFSegmentation { }); }; - processResults = async (message: ResultsMessage): Promise => { - let processed = true; + saveSegmentationError = async (filename: string) => { + const [segmentation] = await SegmentationModel.get({ filename }); + await SegmentationModel.save({ + ...segmentation, + autoexpire: null, + status: 'error', + }); + }; + + processResults = async (message: ResultsMessage): Promise => { await tenants.run(async () => { try { + if (!message.success) { + await this.saveSegmentationError(message.params!.filename); + return; + } + const { data, fileStream } = await this.requestResults(message); await this.storeXML(message.params!.filename, fileStream); await this.saveSegmentation(message.params!.filename, data); } catch (error) { handleError(error); - processed = false; } }, message.tenant); - return processed; }; } diff --git a/app/api/services/pdfsegmentation/specs/PDFSegmentation.spec.ts b/app/api/services/pdfsegmentation/specs/PDFSegmentation.spec.ts index e87c1bb281..78eefd7ab4 100644 --- a/app/api/services/pdfsegmentation/specs/PDFSegmentation.spec.ts +++ b/app/api/services/pdfsegmentation/specs/PDFSegmentation.spec.ts @@ -223,6 +223,7 @@ describe('PDFSegmentation', () => { data_url: 'http://localhost:1235/results', file_url: 'http://localhost:1235/file', task: 'segmentation', + success: true, }); await tenants.run(async () => { @@ -249,6 +250,7 @@ describe('PDFSegmentation', () => { data_url: 'http://localhost:1235/results', file_url: 'http://localhost:1235/file', task: 'segmentation', + success: true, }); expect(fs.existsSync(path.join(segmentationFolder, 'documentA.xml'))).toBe(true); }); diff --git a/app/api/services/tasksmanager/TaskManager.ts b/app/api/services/tasksmanager/TaskManager.ts index 351933da4a..33e12c946a 100644 --- a/app/api/services/tasksmanager/TaskManager.ts +++ b/app/api/services/tasksmanager/TaskManager.ts @@ -21,12 +21,14 @@ export interface ResultsMessage { }; data_url?: string; file_url?: string; + success?: boolean; + error?: string; } /* eslint-enable camelcase */ export interface Service { serviceName: string; - processResults?: (results: ResultsMessage) => Promise; + processResults?: (results: ResultsMessage) => Promise; processRessultsMessageHiddenTime?: number; } @@ -119,13 +121,12 @@ export class TaskManager { if (message.id && this.service.processResults) { const processedMessage = JSON.parse(message.message); - const processed = await this.service.processResults(processedMessage); - if (processed) { - await this.redisSMQ.deleteMessageAsync({ - qname: this.resultsQueue, - id: message.id, - }); - } + await this.service.processResults(processedMessage); + + await this.redisSMQ.deleteMessageAsync({ + qname: this.resultsQueue, + id: message.id, + }); } } diff --git a/app/api/services/tasksmanager/specs/taskManager.spec.ts b/app/api/services/tasksmanager/specs/taskManager.spec.ts index 7e2dc06c5a..81829238d6 100644 --- a/app/api/services/tasksmanager/specs/taskManager.spec.ts +++ b/app/api/services/tasksmanager/specs/taskManager.spec.ts @@ -19,7 +19,7 @@ describe('taskManager', () => { const redisUrl = `redis://${config.redis.host}:${config.redis.port}`; service = { serviceName: 'KonzNGaboHellKitchen', - processResults: jest.fn().mockImplementation(async () => true), + processResults: jest.fn(), processRessultsMessageHiddenTime: 1, }; redisServer = new RedisServer(port); @@ -33,10 +33,6 @@ describe('taskManager', () => { await new Promise(resolve => setTimeout(resolve, 100)); // wait for redis to be ready }); - beforeEach(() => { - service.processResults = jest.fn().mockImplementation(async () => true); - }); - afterAll(async () => { await taskManager?.stop(); await externalDummyService.stop(); @@ -128,28 +124,6 @@ describe('taskManager', () => { expect(queueAttributes!.msgs).toBe(0); }); - - describe('if the processing goes wrong', () => { - it('should not delete the message', async () => { - const task = { - task: 'Tofu', - tenant: 'Gabo', - data_url: 'http://localhost:1234/results', - }; - service.processResults = jest.fn().mockImplementation(async () => false); - await externalDummyService.sendFinishedMessage(task); - - await waitForExpect(async () => { - expect(service.processResults).toHaveBeenCalledWith(task); - }); - - const queueAttributes = await taskManager?.redisSMQ!.getQueueAttributesAsync({ - qname: taskManager.resultsQueue, - }); - - expect(queueAttributes!.msgs).toBe(1); - }); - }); }); describe('when redis server is not available', () => { From 12e9a9e921e1feb5ebdad71c7288709da114e707 Mon Sep 17 00:00:00 2001 From: Alberto Casado Torres Date: Fri, 22 Oct 2021 18:37:51 +0200 Subject: [PATCH 55/62] only save the error when the segmentation placeholder existed to avoid saving malformed responses --- app/api/services/pdfsegmentation/PDFSegmentation.ts | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/app/api/services/pdfsegmentation/PDFSegmentation.ts b/app/api/services/pdfsegmentation/PDFSegmentation.ts index d158639caa..3d7f9c2330 100644 --- a/app/api/services/pdfsegmentation/PDFSegmentation.ts +++ b/app/api/services/pdfsegmentation/PDFSegmentation.ts @@ -145,11 +145,13 @@ class PDFSegmentation { saveSegmentationError = async (filename: string) => { const [segmentation] = await SegmentationModel.get({ filename }); - await SegmentationModel.save({ - ...segmentation, - autoexpire: null, - status: 'error', - }); + if (segmentation) { + await SegmentationModel.save({ + ...segmentation, + autoexpire: null, + status: 'error', + }); + } }; processResults = async (message: ResultsMessage): Promise => { From 62ff8cf7b9b346f03030036bf1008bc63cbb9089 Mon Sep 17 00:00:00 2001 From: Alberto Casado Torres Date: Tue, 26 Oct 2021 09:26:26 +0200 Subject: [PATCH 56/62] Using getFileContents instead of readFileSync --- app/api/services/pdfsegmentation/PDFSegmentation.ts | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/app/api/services/pdfsegmentation/PDFSegmentation.ts b/app/api/services/pdfsegmentation/PDFSegmentation.ts index 3d7f9c2330..eb7195696a 100644 --- a/app/api/services/pdfsegmentation/PDFSegmentation.ts +++ b/app/api/services/pdfsegmentation/PDFSegmentation.ts @@ -1,5 +1,5 @@ import { TaskManager, ResultsMessage } from 'api/services/tasksmanager/TaskManager'; -import { uploadsPath, fileFromReadStream, createDirIfNotExists } from 'api/files'; +import { uploadsPath, fileFromReadStream, createDirIfNotExists, getFileContent } from 'api/files'; import { Readable } from 'stream'; import filesModel from 'api/files/filesModel'; import fs from 'fs'; @@ -37,11 +37,8 @@ class PDFSegmentation { } try { - await request.uploadFile( - serviceUrl, - file.filename, - fs.readFileSync(uploadsPath(file.filename)) - ); + const fileContent = await getFileContent(file.filename); + await request.uploadFile(serviceUrl, file.filename, fileContent); const task = { task: this.SERVICE_NAME, From 9187b5406098be588f038be5c0ce8fd5d6144ce9 Mon Sep 17 00:00:00 2001 From: Alberto Casado Torres Date: Tue, 26 Oct 2021 13:56:57 +0200 Subject: [PATCH 57/62] Segementation type and using filesystem instead of fs --- app/api/files/filesystem.ts | 3 ++ .../pdfsegmentation/PDFSegmentation.ts | 36 ++++++------- .../pdfsegmentation/segmentationModel.js | 33 ------------ .../pdfsegmentation/segmentationModel.ts | 18 +++++++ .../specs/PDFSegmentation.spec.ts | 6 +-- app/shared/types/segmentationSchema.ts | 52 +++++++++++++++++++ app/shared/types/segmentationType.d.ts | 24 +++++++++ 7 files changed, 118 insertions(+), 54 deletions(-) delete mode 100644 app/api/services/pdfsegmentation/segmentationModel.js create mode 100644 app/api/services/pdfsegmentation/segmentationModel.ts create mode 100644 app/shared/types/segmentationSchema.ts create mode 100644 app/shared/types/segmentationType.d.ts diff --git a/app/api/files/filesystem.ts b/app/api/files/filesystem.ts index 9777d642af..c5e7ebe5c5 100644 --- a/app/api/files/filesystem.ts +++ b/app/api/files/filesystem.ts @@ -136,6 +136,8 @@ const streamToString = async (stream: Readable): Promise => const getFileContent = async (fileName: FilePath): Promise => asyncFS.readFile(uploadsPath(fileName), 'utf8'); +const readFile = async (fileName: FilePath): Promise => asyncFS.readFile(fileName); + export { setupTestUploadedPaths, deleteUploadedFiles, @@ -154,4 +156,5 @@ export { activityLogPath, writeFile, appendFile, + readFile, }; diff --git a/app/api/services/pdfsegmentation/PDFSegmentation.ts b/app/api/services/pdfsegmentation/PDFSegmentation.ts index eb7195696a..b909cf8846 100644 --- a/app/api/services/pdfsegmentation/PDFSegmentation.ts +++ b/app/api/services/pdfsegmentation/PDFSegmentation.ts @@ -1,8 +1,7 @@ import { TaskManager, ResultsMessage } from 'api/services/tasksmanager/TaskManager'; -import { uploadsPath, fileFromReadStream, createDirIfNotExists, getFileContent } from 'api/files'; +import { uploadsPath, fileFromReadStream, createDirIfNotExists, readFile } from 'api/files'; import { Readable } from 'stream'; import filesModel from 'api/files/filesModel'; -import fs from 'fs'; import path from 'path'; import { FileType } from 'shared/types/fileType'; import { Settings } from 'shared/types/settingsType'; @@ -31,13 +30,13 @@ class PDFSegmentation { }); } - segmentOnePdf = async (file: FileType, serviceUrl: string, tenant: string) => { - if (!file.filename) { - return; - } - + segmentOnePdf = async ( + file: FileType & { filename: string; _id: ObjectIdSchema }, + serviceUrl: string, + tenant: string + ) => { try { - const fileContent = await getFileContent(file.filename); + const fileContent = await readFile(uploadsPath(file.filename)); await request.uploadFile(serviceUrl, file.filename, fileContent); const task = { @@ -49,7 +48,7 @@ class PDFSegmentation { }; await this.segmentationTaskManager.startTask(task); - await this.storeProcess(file._id!, file.filename); + await this.storeProcess(file._id, file.filename); } catch { handleError(`Error segmenting pdf, tenant: ${tenant}, file: ${file.filename}`); @@ -63,7 +62,7 @@ class PDFSegmentation { SegmentationModel.save({ fileID, filename }); segmentPdfs = async () => { - const pendingTasks = await this.segmentationTaskManager!.countPendingTasks(); + const pendingTasks = await this.segmentationTaskManager.countPendingTasks(); if (pendingTasks > 0) { return; } @@ -78,10 +77,11 @@ class PDFSegmentation { return; } - const filesToSegment = await filesModel.db.aggregate([ + const filesToSegment = (await filesModel.db.aggregate([ { $match: { type: 'document', + filename: { $exists: true }, }, }, { @@ -102,7 +102,7 @@ class PDFSegmentation { { $limit: this.batchSize, }, - ]); + ])) as FileType & { filename: string; _id: ObjectIdSchema }[]; for (let i = 0; i < filesToSegment.length; i += 1) { // eslint-disable-next-line no-await-in-loop @@ -115,17 +115,17 @@ class PDFSegmentation { requestResults = async (message: ResultsMessage) => { const response = await request.get(message.data_url); - const fileStream = ((await fetch(message.file_url!)).body as unknown) as Readable; + const fileStream = ((await fetch(message.file_url!)).body as unknown) as Readable; // investigart mas sobre esto, instalar typos de node fetch ? return { data: JSON.parse(response.json), fileStream }; }; storeXML = async (filename: string, fileStream: Readable) => { - await createDirIfNotExists(path.join(uploadsPath(), this.SERVICE_NAME)); - const filePath = path.join(uploadsPath(), this.SERVICE_NAME); + const folderPath = uploadsPath(this.SERVICE_NAME); + await createDirIfNotExists(folderPath); const xmlname = `${path.basename(filename, path.extname(filename))}.xml`; - await fileFromReadStream(xmlname, fileStream, filePath); + await fileFromReadStream(xmlname, fileStream, folderPath); }; saveSegmentation = async (filename: string, data: any) => { @@ -136,7 +136,7 @@ class PDFSegmentation { ...segmentation, segmentation: { page_height, page_width, paragraphs }, autoexpire: null, - status: 'completed', + status: 'ready', }); }; @@ -146,7 +146,7 @@ class PDFSegmentation { await SegmentationModel.save({ ...segmentation, autoexpire: null, - status: 'error', + status: 'failed', }); } }; diff --git a/app/api/services/pdfsegmentation/segmentationModel.js b/app/api/services/pdfsegmentation/segmentationModel.js deleted file mode 100644 index e7d7aecba0..0000000000 --- a/app/api/services/pdfsegmentation/segmentationModel.js +++ /dev/null @@ -1,33 +0,0 @@ -import mongoose from 'mongoose'; -import { instanceModel } from 'api/odm'; - -const props = { - autoexpire: { type: Date, expires: 86400, default: Date.now }, // 24 hours - status: { type: String, enum: ['pending', 'completed', 'error'], default: 'pending' }, - error: { type: String }, - segmentation: { - page_width: Number, - page_height: Number, - paragraphs: [ - { - left: Number, - top: Number, - width: Number, - height: Number, - page_number: Number, - text: String, - }, - ], - }, - file: { type: mongoose.Schema.Types.ObjectId, ref: 'File' }, - filename: { type: String }, -}; - -const mongoSchema = new mongoose.Schema(props, { - emitIndexErrors: true, - strict: false, -}); - -const SegmentationModel = instanceModel('segmentations', mongoSchema); - -export { SegmentationModel }; diff --git a/app/api/services/pdfsegmentation/segmentationModel.ts b/app/api/services/pdfsegmentation/segmentationModel.ts new file mode 100644 index 0000000000..92e3d0c1d2 --- /dev/null +++ b/app/api/services/pdfsegmentation/segmentationModel.ts @@ -0,0 +1,18 @@ +import mongoose from 'mongoose'; +import { instanceModel } from 'api/odm'; +import { SegmentationType } from 'shared/types/segmentationType'; + +const props = { + autoexpire: { type: Date, expires: 86400, default: Date.now }, // 24 hours + file: { type: mongoose.Schema.Types.ObjectId, ref: 'File' }, + status: { type: String, enum: ['processing', 'failed', 'ready'], default: 'processing' }, +}; + +const mongoSchema = new mongoose.Schema(props, { + emitIndexErrors: true, + strict: false, +}); + +const SegmentationModel = instanceModel('segmentations', mongoSchema); + +export { SegmentationModel }; diff --git a/app/api/services/pdfsegmentation/specs/PDFSegmentation.spec.ts b/app/api/services/pdfsegmentation/specs/PDFSegmentation.spec.ts index 78eefd7ab4..194f77f20d 100644 --- a/app/api/services/pdfsegmentation/specs/PDFSegmentation.spec.ts +++ b/app/api/services/pdfsegmentation/specs/PDFSegmentation.spec.ts @@ -131,7 +131,7 @@ describe('PDFSegmentation', () => { await segmentPdfs.segmentPdfs(); await tenants.run(async () => { const [segmentation] = await SegmentationModel.get(); - expect(segmentation.status).toBe('pending'); + expect(segmentation.status).toBe('processing'); expect(segmentation.filename).toBe(fixturesPdfNameA); expect(segmentation.fileID).toEqual(fixturesOneFile.files![0]._id); }, 'tenantOne'); @@ -143,7 +143,7 @@ describe('PDFSegmentation', () => { { filename: fixturesFiveFiles.files![0].filename, fileID: fixturesFiveFiles.files![0]._id, - status: 'pending', + status: 'processing', }, ]); @@ -229,7 +229,7 @@ describe('PDFSegmentation', () => { await tenants.run(async () => { const segmentations = await SegmentationModel.get(); const [segmentation] = segmentations; - expect(segmentation.status).toBe('completed'); + expect(segmentation.status).toBe('ready'); expect(segmentation.filename).toBe(fixturesPdfNameA); expect(segmentation.fileID).toEqual(fixturesOneFile.files![0]._id); expect(segmentation.autoexpire).toBe(null); diff --git a/app/shared/types/segmentationSchema.ts b/app/shared/types/segmentationSchema.ts new file mode 100644 index 0000000000..6e0f14e28d --- /dev/null +++ b/app/shared/types/segmentationSchema.ts @@ -0,0 +1,52 @@ +import Ajv from 'ajv'; +import { objectIdSchema } from 'shared/types/commonSchemas'; +import { wrapValidator } from 'shared/tsUtils'; +import { SegmentationType } from './segmentationType'; + +export const emitSchemaTypes = true; + +const ajv = Ajv({ allErrors: true, removeAdditional: true }); + +export const segmentationSchema = { + $schema: 'http://json-schema.org/schema#', + $async: true, + type: 'object', + additionalProperties: false, + title: 'SegmentationType', + definitions: { objectIdSchema }, + properties: { + _id: objectIdSchema, + autoexpire: { oneOf: [{ type: 'number' }, { type: 'null' }] }, + fileID: objectIdSchema, + filename: { type: 'string', minLength: 1 }, + status: { type: 'string', enum: ['processing', 'failed', 'ready'] }, + segmentation: { + type: 'object', + additionalProperties: false, + properties: { + page_width: { type: 'number' }, + page_height: { type: 'number' }, + paragraphs: { + type: 'array', + items: { + type: 'object', + additionalProperties: false, + properties: { + left: { type: 'number' }, + top: { type: 'number' }, + width: { type: 'number' }, + height: { type: 'number' }, + page_number: { type: 'number' }, + text: { type: 'string' }, + }, + }, + }, + }, + }, + }, +}; + +const validate = wrapValidator(ajv.compile(segmentationSchema)); + +export const validateFile = async (file: SegmentationType): Promise => + validate({ ...file }); diff --git a/app/shared/types/segmentationType.d.ts b/app/shared/types/segmentationType.d.ts new file mode 100644 index 0000000000..7c4be57d77 --- /dev/null +++ b/app/shared/types/segmentationType.d.ts @@ -0,0 +1,24 @@ +/* eslint-disable */ +/**AUTO-GENERATED. RUN yarn emit-types to update.*/ + +import { ObjectIdSchema } from 'shared/types/commonTypes'; + +export interface SegmentationType { + _id?: ObjectIdSchema; + autoexpire?: number | null; + fileID?: ObjectIdSchema; + filename?: string; + status?: 'processing' | 'failed' | 'ready'; + segmentation?: { + page_width?: number; + page_height?: number; + paragraphs?: { + left?: number; + top?: number; + width?: number; + height?: number; + page_number?: number; + text?: string; + }[]; + }; +} From 2bd25d0a862307693b0b79bd453dd0b3f4b29544 Mon Sep 17 00:00:00 2001 From: Alberto Casado Torres Date: Wed, 27 Oct 2021 10:58:02 +0200 Subject: [PATCH 58/62] Error handling for missing files --- .../pdfsegmentation/PDFSegmentation.ts | 142 +++++++++++------- .../specs/PDFSegmentation.spec.ts | 15 ++ .../pdfsegmentation/specs/fixtures.ts | 7 + 3 files changed, 107 insertions(+), 57 deletions(-) diff --git a/app/api/services/pdfsegmentation/PDFSegmentation.ts b/app/api/services/pdfsegmentation/PDFSegmentation.ts index b909cf8846..d8d79a1f35 100644 --- a/app/api/services/pdfsegmentation/PDFSegmentation.ts +++ b/app/api/services/pdfsegmentation/PDFSegmentation.ts @@ -9,6 +9,7 @@ import settings from 'api/settings/settings'; import { tenants } from 'api/tenants/tenantContext'; import { ObjectIdSchema } from 'shared/types/commonTypes'; import request from 'shared/JSONRequest'; + import { handleError } from 'api/utils'; import { SegmentationModel } from './segmentationModel'; @@ -39,27 +40,61 @@ class PDFSegmentation { const fileContent = await readFile(uploadsPath(file.filename)); await request.uploadFile(serviceUrl, file.filename, fileContent); - const task = { + await this.segmentationTaskManager.startTask({ task: this.SERVICE_NAME, tenant, params: { filename: file.filename, }, - }; + }); - await this.segmentationTaskManager.startTask(task); await this.storeProcess(file._id, file.filename); - } catch { - handleError(`Error segmenting pdf, tenant: ${tenant}, file: ${file.filename}`); + } catch (err) { + if (err.code === 'ENOENT') { + await this.storeProcess(file._id, file.filename, false); + handleError(err); + return; + } - await new Promise(resolve => { - setTimeout(resolve, 60000); - }); + throw err; } }; - storeProcess = async (fileID: ObjectIdSchema, filename: string) => - SegmentationModel.save({ fileID, filename }); + storeProcess = async (fileID: ObjectIdSchema, filename: string, proccessing = true) => { + if (!proccessing) { + await SegmentationModel.save({ fileID, filename, status: 'failed' }); + } + + await SegmentationModel.save({ fileID, filename }); + }; + + getFilesToSegment = async (): Promise => + filesModel.db.aggregate([ + { + $match: { + type: 'document', + filename: { $exists: true }, + }, + }, + { + $lookup: { + from: 'segmentations', + localField: '_id', + foreignField: 'fileID', + as: 'segmentation', + }, + }, + { + $match: { + segmentation: { + $size: 0, + }, + }, + }, + { + $limit: this.batchSize, + }, + ]); segmentPdfs = async () => { const pendingTasks = await this.segmentationTaskManager.countPendingTasks(); @@ -67,57 +102,49 @@ class PDFSegmentation { return; } - await Promise.all( - Object.keys(tenants.tenants).map(async tenant => { - await tenants.run(async () => { - const settingsValues = await settings.get(); - const segmentationServiceConfig = settingsValues?.features?.segmentation; - - if (!segmentationServiceConfig) { - return; - } - - const filesToSegment = (await filesModel.db.aggregate([ - { - $match: { - type: 'document', - filename: { $exists: true }, - }, - }, - { - $lookup: { - from: 'segmentations', - localField: '_id', - foreignField: 'fileID', - as: 'segmentation', - }, - }, - { - $match: { - segmentation: { - $size: 0, - }, - }, - }, - { - $limit: this.batchSize, - }, - ])) as FileType & { filename: string; _id: ObjectIdSchema }[]; - - for (let i = 0; i < filesToSegment.length; i += 1) { - // eslint-disable-next-line no-await-in-loop - await this.segmentOnePdf(filesToSegment[i], segmentationServiceConfig.url, tenant); - } - }, tenant); - }) - ); + try { + await Promise.all( + Object.keys(tenants.tenants).map(async tenant => { + await tenants.run(async () => { + const settingsValues = await settings.get(); + const segmentationServiceConfig = settingsValues?.features?.segmentation; + + if (!segmentationServiceConfig) { + return; + } + + const filesToSegment = await this.getFilesToSegment(); + + for (let i = 0; i < filesToSegment.length; i += 1) { + // eslint-disable-next-line no-await-in-loop + await this.segmentOnePdf(filesToSegment[i], segmentationServiceConfig.url, tenant); + } + }, tenant); + }) + ); + } catch (err) { + if (err.code === 'ECONNREFUSED') { + await new Promise(resolve => { + setTimeout(resolve, 60000); + }); + } + handleError(err, { useContext: false }); + } }; requestResults = async (message: ResultsMessage) => { const response = await request.get(message.data_url); - const fileStream = ((await fetch(message.file_url!)).body as unknown) as Readable; // investigart mas sobre esto, instalar typos de node fetch ? - - return { data: JSON.parse(response.json), fileStream }; + const fileStream = (await fetch(message.file_url!)).body; + + if (!fileStream) { + throw new Error( + `Error requesting for segmentation file: ${message.params!.filename}, tenant: ${ + message.tenant + }` + ); + } + const stream = Readable.from(fileStream.toString()); + return { data: JSON.parse(response.json), fileStream: stream }; }; storeXML = async (filename: string, fileStream: Readable) => { @@ -145,6 +172,7 @@ class PDFSegmentation { if (segmentation) { await SegmentationModel.save({ ...segmentation, + filename, autoexpire: null, status: 'failed', }); diff --git a/app/api/services/pdfsegmentation/specs/PDFSegmentation.spec.ts b/app/api/services/pdfsegmentation/specs/PDFSegmentation.spec.ts index 194f77f20d..90b15d2d07 100644 --- a/app/api/services/pdfsegmentation/specs/PDFSegmentation.spec.ts +++ b/app/api/services/pdfsegmentation/specs/PDFSegmentation.spec.ts @@ -8,6 +8,7 @@ import { fixturesPdfNameB, fixturesTwelveFiles, fixturesFiveFiles, + fixturesMissingPdf, } from 'api/services/pdfsegmentation/specs/fixtures'; import fs from 'fs'; @@ -152,6 +153,20 @@ describe('PDFSegmentation', () => { expect(segmentPdfs.segmentationTaskManager?.startTask).toHaveBeenCalledTimes(4); }); + describe('if the file is missing', () => { + it('should throw an error and store the segmentation as failed', async () => { + await fixturer.clearAllAndLoad(dbOne, fixturesMissingPdf); + + await segmentPdfs.segmentPdfs(); + + await tenants.run(async () => { + const [segmentation] = await SegmentationModel.get(); + expect(segmentation.status).toBe('failed'); + expect(segmentation.filename).toBe(fixturesMissingPdf.files![0].filename); + }, 'tenantOne'); + }); + }); + describe('when there is pending tasks', () => { it('should not put more', async () => { await fixturer.clearAllAndLoad(dbOne, fixturesFiveFiles); diff --git a/app/api/services/pdfsegmentation/specs/fixtures.ts b/app/api/services/pdfsegmentation/specs/fixtures.ts index d5d009609d..7b3e1787fe 100644 --- a/app/api/services/pdfsegmentation/specs/fixtures.ts +++ b/app/api/services/pdfsegmentation/specs/fixtures.ts @@ -55,6 +55,12 @@ const fixturesOtherFile: DBFixture = { files: [factory.file('F2', 'A2', 'document', fixturesPdfNameB)], }; +const fixturesMissingPdf: DBFixture = { + entities: [factory.entity('A1', 'templateToSegmentA')], + settings, + files: [factory.file('F1', 'A1', 'document', 'missing.pdf')], +}; + const fixturesFiveFiles: DBFixture = { settings, entities: [ @@ -114,4 +120,5 @@ export { fixturesOtherFile, fixturesTwelveFiles, fixturesFiveFiles, + fixturesMissingPdf, }; From 18bb88e582bad00825fb0d9fb8e779fc19a396da Mon Sep 17 00:00:00 2001 From: Alberto Casado Torres Date: Thu, 28 Oct 2021 10:57:05 +0200 Subject: [PATCH 59/62] Storing only one process when the segmentation fails --- .../pdfsegmentation/PDFSegmentation.ts | 13 ++++----- .../specs/PDFSegmentation.spec.ts | 29 ++++++++++++++++++- 2 files changed, 34 insertions(+), 8 deletions(-) diff --git a/app/api/services/pdfsegmentation/PDFSegmentation.ts b/app/api/services/pdfsegmentation/PDFSegmentation.ts index d8d79a1f35..27ed80cc32 100644 --- a/app/api/services/pdfsegmentation/PDFSegmentation.ts +++ b/app/api/services/pdfsegmentation/PDFSegmentation.ts @@ -60,13 +60,12 @@ class PDFSegmentation { } }; - storeProcess = async (fileID: ObjectIdSchema, filename: string, proccessing = true) => { - if (!proccessing) { - await SegmentationModel.save({ fileID, filename, status: 'failed' }); - } - - await SegmentationModel.save({ fileID, filename }); - }; + storeProcess = async (fileID: ObjectIdSchema, filename: string, proccessing = true) => + SegmentationModel.save({ + fileID, + filename, + status: proccessing ? 'processing' : 'failed', + }); getFilesToSegment = async (): Promise => filesModel.db.aggregate([ diff --git a/app/api/services/pdfsegmentation/specs/PDFSegmentation.spec.ts b/app/api/services/pdfsegmentation/specs/PDFSegmentation.spec.ts index 90b15d2d07..14fca8bdd3 100644 --- a/app/api/services/pdfsegmentation/specs/PDFSegmentation.spec.ts +++ b/app/api/services/pdfsegmentation/specs/PDFSegmentation.spec.ts @@ -1,4 +1,6 @@ /* eslint-disable camelcase */ +/* eslint-disable max-lines */ + import { fixturer, createNewMongoDB } from 'api/utils/testing_db'; import { MongoMemoryServer } from 'mongodb-memory-server'; import { @@ -160,9 +162,11 @@ describe('PDFSegmentation', () => { await segmentPdfs.segmentPdfs(); await tenants.run(async () => { - const [segmentation] = await SegmentationModel.get(); + const segmentations = await SegmentationModel.get(); + const [segmentation] = segmentations; expect(segmentation.status).toBe('failed'); expect(segmentation.filename).toBe(fixturesMissingPdf.files![0].filename); + expect(segmentations.length).toBe(1); }, 'tenantOne'); }); }); @@ -269,5 +273,28 @@ describe('PDFSegmentation', () => { }); expect(fs.existsSync(path.join(segmentationFolder, 'documentA.xml'))).toBe(true); }); + + describe('if the segmentation fails', () => { + it('should store it as failed', async () => { + await segmentPdfs.processResults({ + tenant: tenantOne.name, + params: { filename: 'documentA.pdf' }, + data_url: 'http://localhost:1235/results', + file_url: 'http://localhost:1235/file', + task: 'segmentation', + success: false, + }); + + await tenants.run(async () => { + const segmentations = await SegmentationModel.get(); + const [segmentation] = segmentations; + expect(segmentation.status).toBe('failed'); + expect(segmentation.filename).toBe(fixturesPdfNameA); + expect(segmentation.fileID).toEqual(fixturesOneFile.files![0]._id); + expect(segmentation.autoexpire).toBe(null); + expect(segmentations.length).toBe(1); + }, tenantOne.name); + }); + }); }); }); From 94696693c2e3bfdb3daee88adeaa9706562f1293 Mon Sep 17 00:00:00 2001 From: Alberto Casado Torres Date: Thu, 28 Oct 2021 11:30:33 +0200 Subject: [PATCH 60/62] adding tenant to service url --- .../pdfsegmentation/PDFSegmentation.ts | 6 ++-- .../specs/PDFSegmentation.spec.ts | 31 +++++++++++++------ .../specs/ExternalDummyService.ts | 2 +- app/api/utils/async-fs.js | 1 + 4 files changed, 26 insertions(+), 14 deletions(-) diff --git a/app/api/services/pdfsegmentation/PDFSegmentation.ts b/app/api/services/pdfsegmentation/PDFSegmentation.ts index 27ed80cc32..4f9dba77c5 100644 --- a/app/api/services/pdfsegmentation/PDFSegmentation.ts +++ b/app/api/services/pdfsegmentation/PDFSegmentation.ts @@ -12,6 +12,7 @@ import request from 'shared/JSONRequest'; import { handleError } from 'api/utils'; import { SegmentationModel } from './segmentationModel'; +import urljoin from 'url-join'; class PDFSegmentation { SERVICE_NAME = 'segmentation'; @@ -38,7 +39,7 @@ class PDFSegmentation { ) => { try { const fileContent = await readFile(uploadsPath(file.filename)); - await request.uploadFile(serviceUrl, file.filename, fileContent); + await request.uploadFile(urljoin(serviceUrl, tenant), file.filename, fileContent); await this.segmentationTaskManager.startTask({ task: this.SERVICE_NAME, @@ -142,8 +143,7 @@ class PDFSegmentation { }` ); } - const stream = Readable.from(fileStream.toString()); - return { data: JSON.parse(response.json), fileStream: stream }; + return { data: JSON.parse(response.json), fileStream: (fileStream as unknown) as Readable }; }; storeXML = async (filename: string, fileStream: Readable) => { diff --git a/app/api/services/pdfsegmentation/specs/PDFSegmentation.spec.ts b/app/api/services/pdfsegmentation/specs/PDFSegmentation.spec.ts index 14fca8bdd3..5c1006cc58 100644 --- a/app/api/services/pdfsegmentation/specs/PDFSegmentation.spec.ts +++ b/app/api/services/pdfsegmentation/specs/PDFSegmentation.spec.ts @@ -13,7 +13,7 @@ import { fixturesMissingPdf, } from 'api/services/pdfsegmentation/specs/fixtures'; -import fs from 'fs'; +import asyncFS from 'api/utils/async-fs'; import path from 'path'; import { tenants } from 'api/tenants/tenantContext'; @@ -73,8 +73,12 @@ describe('PDFSegmentation', () => { dbTwo = DB.connectionForDB(tenantTwo.dbName).db; tenants.tenants = { tenantOne }; - fileA = fs.readFileSync(`app/api/services/pdfsegmentation/specs/uploads/${fixturesPdfNameA}`); - fileB = fs.readFileSync(`app/api/services/pdfsegmentation/specs/uploads/${fixturesPdfNameA}`); + fileA = await asyncFS.readFile( + `app/api/services/pdfsegmentation/specs/uploads/${fixturesPdfNameA}` + ); + fileB = await asyncFS.readFile( + `app/api/services/pdfsegmentation/specs/uploads/${fixturesPdfNameA}` + ); jest.spyOn(request, 'uploadFile').mockResolvedValue({}); jest.resetAllMocks(); }); @@ -84,7 +88,7 @@ describe('PDFSegmentation', () => { await segmentPdfs.segmentPdfs(); expect(request.uploadFile).toHaveBeenCalledWith( - 'http://localhost:1234/files', + 'http://localhost:1234/files/tenantOne', fixturesPdfNameA, fileA ); @@ -94,7 +98,7 @@ describe('PDFSegmentation', () => { await fixturer.clearAllAndLoad(dbOne, fixturesOtherFile); await segmentPdfs.segmentPdfs(); expect(request.uploadFile).toHaveBeenCalledWith( - 'http://localhost:1234/files', + 'http://localhost:1234/files/tenantOne', fixturesPdfNameB, fileB ); @@ -204,8 +208,8 @@ describe('PDFSegmentation', () => { await fixturer.clearAllAndLoad(dbOne, fixturesOneFile); await segmentPdfs.segmentPdfs(); segmentationFolder = path.join(tenantOne.uploadedDocuments, 'segmentation'); - if (fs.existsSync(segmentationFolder)) { - fs.rmdirSync(segmentationFolder, { recursive: true }); + if (await asyncFS.exists(segmentationFolder)) { + await asyncFS.rmdir(segmentationFolder, { recursive: true }); } segmentationExternalService = new ExternalDummyService(1235); await segmentationExternalService.start(); @@ -231,8 +235,8 @@ describe('PDFSegmentation', () => { afterEach(async () => { await segmentationExternalService.stop(); - if (fs.existsSync(segmentationFolder)) { - fs.rmdirSync(segmentationFolder, { recursive: true }); + if (await asyncFS.exists(segmentationFolder)) { + await asyncFS.rmdir(segmentationFolder, { recursive: true }); } }); it('should store the segmentation', async () => { @@ -271,7 +275,14 @@ describe('PDFSegmentation', () => { task: 'segmentation', success: true, }); - expect(fs.existsSync(path.join(segmentationFolder, 'documentA.xml'))).toBe(true); + const fileExists = await asyncFS.exists(path.join(segmentationFolder, 'documentA.xml')); + const fileContents = await asyncFS.readFile( + path.join(segmentationFolder, 'documentA.xml'), + 'utf8' + ); + expect(fileExists).toBe(true); + const xml = 'Cold shrimps soup'; + await expect(fileContents.includes(xml)).toBe(true); }); describe('if the segmentation fails', () => { diff --git a/app/api/services/tasksmanager/specs/ExternalDummyService.ts b/app/api/services/tasksmanager/specs/ExternalDummyService.ts index 4bd4692bcc..94186a619a 100644 --- a/app/api/services/tasksmanager/specs/ExternalDummyService.ts +++ b/app/api/services/tasksmanager/specs/ExternalDummyService.ts @@ -43,7 +43,7 @@ export class ExternalDummyService { res.send('ok'); }); - this.app.post('/files', uploadMiddleware.multiple(), (req, res) => { + this.app.post('/files/*', uploadMiddleware.multiple(), (req, res) => { if (req.files.length) { const files = req.files as { buffer: Buffer; originalname: string }[]; this.files.push(files[0].buffer); diff --git a/app/api/utils/async-fs.js b/app/api/utils/async-fs.js index 396fedac43..86cd045d9d 100644 --- a/app/api/utils/async-fs.js +++ b/app/api/utils/async-fs.js @@ -12,4 +12,5 @@ export default { readFile: promisify(fs.readFile), readdir: promisify(fs.readdir), mkdir: promisify(fs.mkdir), + rmdir: promisify(fs.rmdir), }; From cc84a09ef42f5044211c88b93d4052eac34fe391 Mon Sep 17 00:00:00 2001 From: Alberto Casado Torres Date: Thu, 28 Oct 2021 15:10:20 +0200 Subject: [PATCH 61/62] fixed types error --- app/api/services/pdfsegmentation/PDFSegmentation.ts | 3 +-- package.json | 1 + yarn.lock | 5 +++++ 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/app/api/services/pdfsegmentation/PDFSegmentation.ts b/app/api/services/pdfsegmentation/PDFSegmentation.ts index 4f9dba77c5..0f6e06ce3f 100644 --- a/app/api/services/pdfsegmentation/PDFSegmentation.ts +++ b/app/api/services/pdfsegmentation/PDFSegmentation.ts @@ -1,6 +1,7 @@ import { TaskManager, ResultsMessage } from 'api/services/tasksmanager/TaskManager'; import { uploadsPath, fileFromReadStream, createDirIfNotExists, readFile } from 'api/files'; import { Readable } from 'stream'; +import urljoin from 'url-join'; import filesModel from 'api/files/filesModel'; import path from 'path'; import { FileType } from 'shared/types/fileType'; @@ -9,10 +10,8 @@ import settings from 'api/settings/settings'; import { tenants } from 'api/tenants/tenantContext'; import { ObjectIdSchema } from 'shared/types/commonTypes'; import request from 'shared/JSONRequest'; - import { handleError } from 'api/utils'; import { SegmentationModel } from './segmentationModel'; -import urljoin from 'url-join'; class PDFSegmentation { SERVICE_NAME = 'segmentation'; diff --git a/package.json b/package.json index 6caf79af2d..c7e6d63db8 100644 --- a/package.json +++ b/package.json @@ -82,6 +82,7 @@ "@types/react-modal": "^3.12.0", "@types/redis": "^2.8.31", "@types/redlock": "^4.0.2", + "@types/url-join": "^4.0.1", "ajv": "^6.12.3", "ajv-keywords": "^3.4.1", "async": "2.6.3", diff --git a/yarn.lock b/yarn.lock index f4bd91660e..90a13641b7 100644 --- a/yarn.lock +++ b/yarn.lock @@ -2439,6 +2439,11 @@ dependencies: source-map "^0.6.1" +"@types/url-join@^4.0.1": + version "4.0.1" + resolved "https://registry.yarnpkg.com/@types/url-join/-/url-join-4.0.1.tgz#4989c97f969464647a8586c7252d97b449cdc045" + integrity sha512-wDXw9LEEUHyV+7UWy7U315nrJGJ7p1BzaCxDpEoLr789Dk1WDVMMlf3iBfbG2F8NdWnYyFbtTxUn2ZNbm1Q4LQ== + "@types/webpack-env@1.15.2": version "1.15.2" resolved "https://registry.yarnpkg.com/@types/webpack-env/-/webpack-env-1.15.2.tgz#927997342bb9f4a5185a86e6579a0a18afc33b0a" From 7b9e3b5eaa30f1d3ebf40f041c38f329099e38c9 Mon Sep 17 00:00:00 2001 From: Alberto Casado Torres Date: Mon, 8 Nov 2021 09:49:45 +0100 Subject: [PATCH 62/62] sgementation service behind a config flag --- app/api/config.ts | 1 + app/server.js | 16 +++++++++------- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/app/api/config.ts b/app/api/config.ts index 9b81d95005..0b2f58e3dd 100644 --- a/app/api/config.ts +++ b/app/api/config.ts @@ -47,6 +47,7 @@ export const config = { customUploads: CUSTOM_UPLOADS_FOLDER || `${rootPath}/custom_uploads/`, temporalFiles: TEMPORAL_FILES_FOLDER || `${rootPath}/temporal_files/`, }, + externalServices: Boolean(process.env.EXTERNAL_SERVICES) || false, redis: { activated: CLUSTER_MODE, diff --git a/app/server.js b/app/server.js index 2c51007187..d19f834ae9 100644 --- a/app/server.js +++ b/app/server.js @@ -153,14 +153,16 @@ DB.connect(config.DBHOST, dbAuth).then(async () => { ); topicClassificationRepeater.start(); - const segmentationConnector = new PDFSegmentation(); - const segmentationRepeater = new DistributedLoop( - 'segmentation_repeat', - segmentationConnector.segmentPdfs, - { port: config.redis.port, host: config.redis.host, delayTimeBetweenTasks: 2000 } - ); + if (config.externalServices) { + const segmentationConnector = new PDFSegmentation(); + const segmentationRepeater = new DistributedLoop( + 'segmentation_repeat', + segmentationConnector.segmentPdfs, + { port: config.redis.port, host: config.redis.host, delayTimeBetweenTasks: 2000 } + ); - segmentationRepeater.start(); + segmentationRepeater.start(); + } } });