Skip to content

Commit

Permalink
Merge branch 'master-COLLAUDO' into features/kb-scrape-types-coll1
Browse files Browse the repository at this point in the history
  • Loading branch information
Giovanni Troisi committed Sep 18, 2024
2 parents f050e2f + 239c46d commit 3f4f78e
Show file tree
Hide file tree
Showing 4 changed files with 160 additions and 1 deletion.
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@
"@tiledesk/tiledesk-rasa-connector": "^1.0.10",
"@tiledesk/tiledesk-telegram-connector": "^0.1.14",
"@tiledesk/tiledesk-train-jobworker": "^0.0.11",
"@tiledesk/tiledesk-tybot-connector": "^0.2.96",
"@tiledesk/tiledesk-tybot-connector": "^0.2.98",
"@tiledesk/tiledesk-whatsapp-connector": "^0.1.72",
"@tiledesk/tiledesk-whatsapp-jobworker": "^0.0.8",
"@tiledesk/tiledesk-sms-connector": "^0.1.10",
Expand Down
112 changes: 112 additions & 0 deletions routes/kb.js
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ var mongoose = require('mongoose');
const faq = require('../models/faq');
const faq_kb = require('../models/faq_kb');
let Integration = require('../models/integrations');
var parsecsv = require("fast-csv");

const { MODELS_MULTIPLIER } = require('../utils/aiUtils');

Expand Down Expand Up @@ -1007,6 +1008,117 @@ router.post('/multi', upload.single('uploadFile'), async (req, res) => {

})

router.post('/csv', upload.single('uploadFile'), async (req, res) => {

let project_id = req.projectid;

let csv = req.file.buffer.toString('utf8');
winston.debug("csv: ", csv);

let delimiter = req.body.delimiter || ";";
winston.debug("delimiter: ", delimiter);

let namespace_id = req.query.namespace;
if (!namespace_id) {
return res.status(400).send({ success: false, error: "queryParam 'namespace' is not defined" })
}

let namespaces = await Namespace.find({ id_project: project_id }).catch((err) => {
winston.error("find namespaces error: ", err)
res.status(500).send({ success: false, error: err })
})

if (!namespaces || namespaces.length == 0) {
let alert = "No namespace found for the selected project " + project_id + ". Cannot add content to a non-existent namespace."
winston.warn(alert);
res.status(403).send({ success: false, error: alert });
}

let namespaceIds = namespaces.map(namespace => namespace.id);

if (!namespaceIds.includes(namespace_id)) {
return res.status(403).send({ success: false, error: "Not allowed. The namespace does not belong to the current project." })
}

let quoteManager = req.app.get('quote_manager');
let limits = await quoteManager.getPlanLimits(req.project);
let kbs_limit = limits.kbs;
winston.verbose("Limit of kbs for current plan: " + kbs_limit);

let kbs_count = await KB.countDocuments({ id_project: project_id }).exec();
winston.verbose("Kbs count: " + kbs_count);

if (kbs_count >= kbs_limit) {
return res.status(403).send({ success: false, error: "Maximum number of resources reached for the current plan", plan_limit: kbs_limit })
}

let webhook = apiUrl + '/webhook/kb/status?token=' + KB_WEBHOOK_TOKEN;

let kbs = [];

parsecsv.parseString(csv, { headers: false, delimiter: delimiter })
.on("data", (data) => {

let question = data[0];
let answer = data[1];

console.log("data. ", data)
kbs.push({
id_project: project_id,
name: question,
source: question,
type: 'faq',
content: question + "\n" + answer,
namespace: namespace_id,
status: -1
})
})
.on("end", () => {
winston.debug("kbs after CSV parsing: ", kbs);

let total_count = kbs_count + kbs.length;
if (total_count >= kbs_limit) {
return res.status(403).send({ success: false, error: "Cannot exceed the number of resources in the current plan", plan_limit: kbs_limit })
}

if (kbs.length > 300) {
return res.status(403).send({ success: false, error: "Too many faqs. Can't index more than 300 urls at a time." })
}

let operations = kbs.map(doc => {
return {
updateOne: {
filter: { id_project: doc.id_project, type: 'faq', source: doc.source },
update: doc,
upsert: true,
returnOriginal: false
}
}
})

saveBulk(operations, kbs, project_id).then((result) => {
let resources = result.map(({ name, status, __v, createdAt, updatedAt, id_project, ...keepAttrs }) => keepAttrs)
resources = resources.map(({ _id, ...rest}) => {
return { id: _id, webhooh: webhook, ...rest };
})
winston.verbose("resources to be sent to worker: ", resources);
if (!process.env.NODE_ENV) {
scheduleScrape(resources);
}
res.status(200).send(result);
}).catch((err) => {
winston.error("Unabled to saved kbs in bulk " + err);
res.status(500).send(err);
})

})
.on("error", (err) => {
winston.error("CSV parsing error: ", err);
res.status(400).send({ success: false, error: err });
})

})

router.post('/sitemap', async (req, res) => {

let sitemap_url = req.body.sitemap;
Expand Down
2 changes: 2 additions & 0 deletions test/example-kb-faqs.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Question 1;Question 1 Answer 1
Question 2;Question 2 Answer 2
45 changes: 45 additions & 0 deletions test/kbRoute.js
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,51 @@ describe('KbRoute', () => {
})
}).timeout(20000)

it('add-multiple-faqs-with-csv', (done) => {

var email = "test-signup-" + Date.now() + "@email.com";
var pwd = "pwd";

userService.signup(email, pwd, "Test Firstname", "Test lastname").then(function (savedUser) {
projectService.create("test-faqkb-create", savedUser._id).then(function (savedProject) {

chai.request(server)
.get('/' + savedProject._id + '/kb/namespace/all')
.auth(email, pwd)
.end((err, res) => {

if (err) { console.error("err: ", err); }
if (log) { console.log("res.body: ", res.body) }

res.should.have.status(200)
expect(res.body.length).to.equal(1);

let namespace_id = res.body[0].id;

chai.request(server)
.post('/' + savedProject._id + '/kb/csv?namespace=' + namespace_id)
.auth(email, pwd)
.set('Content-Type', 'text/csv')
.attach('uploadFile', fs.readFileSync(path.resolve(__dirname, './example-kb-faqs.csv')), 'example-kb-faqs.csv')
.field('delimiter', ';')
.end((err, res) => {

if (err) { console.error("err: ", err); }
if (log) { console.log("res.body: ", res.body) }
console.log("res.body: ", res.body)

res.should.have.status(200);

done();

})
})
});
});

}).timeout(10000)


/**
* If you try to add content to a project that has no namespace, it returns 403 forbidden.
*/
Expand Down

0 comments on commit 3f4f78e

Please sign in to comment.