Skip to content

Commit

Permalink
Rewrite search (#926)
Browse files Browse the repository at this point in the history
* added rewrited trie implementation, manipulation functions, and search data, maintianing funcitons and writing funciton

* integrate rewriteSeatch to index and parseXmlJson

* added testcase for the index search

* debugged and fix the issue not not parsing index nodes within snippets

* some cleaning

* fixed all the errors indicated in the failedTest.txt

* support urls to exercise

* commented out testcode, deleted temp files

* add test for corrrectness of urls, and run and passed the test

* fix multiple bugs for text search

* support order, open close, and simplify the frontend's parsing job
  • Loading branch information
yiwen101 authored Oct 19, 2023
1 parent ca6ce37 commit 0f53ae6
Show file tree
Hide file tree
Showing 5 changed files with 639 additions and 2 deletions.
7 changes: 6 additions & 1 deletion javascript/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,9 @@ import { setupSnippetsJs } from "./processingFunctions/processSnippetJs";
import { getAnswers } from "./processingFunctions/processExercisePdf";

// json (for cadet frontend)
import {testIndexSearch} from "./searchRewriteTest";
import { parseXmlJson } from "./parseXmlJson";
import {writeRewritedSearchData} from "./searchRewrite";
import { setupSnippetsJson } from "./processingFunctions/processSnippetJson";
import { createTocJson } from "./generateTocJson";
import { setupReferencesJson } from "./processingFunctions/processReferenceJson";
Expand Down Expand Up @@ -360,7 +362,10 @@ async function main() {
await recursiveXmlToHtmlInOrder("setupSnippet");
console.log("setup snippets and references done\n");

recursiveXmlToHtmlInOrder("parseXml");
await recursiveXmlToHtmlInOrder("parseXml");
writeRewritedSearchData();
// this is meant to be temp; also, will remove the original "generateSearchData" after the updation at the frontend is completed.
//testIndexSearch();
}
}

Expand Down
47 changes: 46 additions & 1 deletion javascript/parseXmlJson.js
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,14 @@ import {
recursivelyProcessTextSnippetJson
} from "./processingFunctions";

import {getIdForExerciseJson} from "./processingFunctions/processExerciseJson";

import {
generateSearchData
} from "./generateSearchData";

import {parseAndInsertToIndexTrie, parseAndInsertToIdToContentMap} from "./searchRewrite";

let paragraph_count = 0;
let heading_count = 0;
let footnote_count = 0;
Expand Down Expand Up @@ -120,6 +124,31 @@ const processLatex = (node, obj, inline) => {
obj["body"] = math;
obj["tag"] = "LATEX";
};
let latest_exercise_json_id = undefined;
const tagsWithIds = {
"#document": () => "",
SUBSUBSECTION: () => subsubsection_count>0? `#sec${chapterIndex}.${subsubsection_count}` :"",
TEXT:() => "#p" + paragraph_count,
SUBHEADING: () => `#h${heading_count}`,
SUBSUBHEADING: () => `#h${heading_count}`,
SECTION: () => `#h${heading_count}`,
FOOTNOTE: () => `#footnote-link-${footnote_count}`,
DISPLAYFOOTNOTE: () => `#footnote-${display_footnote_count}`,
//SNIPPET: () => `${snippet_count}`,

EXERCISE: () => latest_exercise_json_id,
DISPLAYFOOTNOTE: () => `#footnote-${display_footnote_count}`,
};
const findParentID = (node) => {
let parent = node.parentNode;
while (parent) {
if(tagsWithIds[parent.nodeName]) {
return `${chapterIndex}` + tagsWithIds[parent.nodeName]();
} else {
parent = parent.parentNode;
}
}
}

const processTextFunctions = {
// Text tags: tag that is parsed as text
Expand All @@ -132,6 +161,10 @@ const processTextFunctions = {
}
}
},
INDEX: (node, obj) => {
const id = findParentID(node);
parseAndInsertToIndexTrie(node, {id});
},

AMP: (_node, obj) => {
processText("&", obj);
Expand Down Expand Up @@ -181,6 +214,7 @@ const processTextFunctions = {
},

EXERCISE: (node, obj) => {
latest_exercise_json_id = getIdForExerciseJson(node);
exercise_count += 1;
processExerciseJson(node, obj, chapArrIndex, exercise_count);
},
Expand Down Expand Up @@ -310,6 +344,8 @@ const processTextFunctions = {
},

SNIPPET: (node, obj) => {
const indexNodes = node.getElementsByTagName("INDEX");

if (node.getAttribute("HIDE") == "yes") {
return;
} else if (node.getAttribute("LATEX") == "yes") {
Expand Down Expand Up @@ -349,6 +385,10 @@ const processTextFunctions = {
obj["body"] = obj["body"].replace(matchStr, newStr);
}
}

for (let i = 0; i < indexNodes.length; i++) {
processTextJson(indexNodes[i], {});
}

return;
}
Expand All @@ -358,6 +398,9 @@ const processTextFunctions = {
obj["latex"] = false;
obj["id"] = snippet_count;
processSnippetJson(node, obj);
for (let i = 0; i < indexNodes.length; i++) {
processTextJson(indexNodes[i], {});
}
},

SUBINDEX: (node, obj) => {
Expand Down Expand Up @@ -520,7 +563,8 @@ export const parseXmlJson = (doc, arr, filename) => {
} else {
displayTitle = chapterIndex + "\u00A0\u00A0" + chapterTitle;
}


latest_exercise_json_id = undefined;
paragraph_count = 0;
footnote_count = 0;
display_footnote_count = 0;
Expand Down Expand Up @@ -571,6 +615,7 @@ export const parseXmlJson = (doc, arr, filename) => {
recursiveProcessTextJson(name.nextSibling, arr, title);
}

parseAndInsertToIdToContentMap(arr,chapterIndex);
generateSearchData(doc, filename);

};
19 changes: 19 additions & 0 deletions javascript/processingFunctions/processExerciseJson.js
Original file line number Diff line number Diff line change
Expand Up @@ -35,4 +35,23 @@ const processExerciseJson = (node, obj) => {
}
};

export const getIdForExerciseJson = (node) => {
const label = node.getElementsByTagName("LABEL")[0];
let labelName = "";

if (!label) {
labelName = "ex:unlabeled" + unlabeledEx;
} else {
labelName = label.getAttribute("NAME");
}

if (!referenceStore[labelName]) {
missingExerciseWarning(labelName);
return undefined;
}

const displayName = referenceStore[labelName].displayName;
return `#ex-${displayName}`;
}

export default processExerciseJson;
Loading

0 comments on commit 0f53ae6

Please sign in to comment.