diff --git a/javascript/index.js b/javascript/index.js index 89fef6feb..24f568ebb 100644 --- a/javascript/index.js +++ b/javascript/index.js @@ -36,7 +36,9 @@ import { setupSnippetsJs } from "./processingFunctions/processSnippetJs"; import { getAnswers } from "./processingFunctions/processExercisePdf"; // json (for cadet frontend) +import {testIndexSearch} from "./searchRewriteTest"; import { parseXmlJson } from "./parseXmlJson"; +import {writeRewritedSearchData} from "./searchRewrite"; import { setupSnippetsJson } from "./processingFunctions/processSnippetJson"; import { createTocJson } from "./generateTocJson"; import { setupReferencesJson } from "./processingFunctions/processReferenceJson"; @@ -360,7 +362,10 @@ async function main() { await recursiveXmlToHtmlInOrder("setupSnippet"); console.log("setup snippets and references done\n"); - recursiveXmlToHtmlInOrder("parseXml"); + await recursiveXmlToHtmlInOrder("parseXml"); + writeRewritedSearchData(); + // this is meant to be temp; also, will remove the original "generateSearchData" after the updation at the frontend is completed. + //testIndexSearch(); } } diff --git a/javascript/parseXmlJson.js b/javascript/parseXmlJson.js index e27e1a516..0355d53c1 100644 --- a/javascript/parseXmlJson.js +++ b/javascript/parseXmlJson.js @@ -12,10 +12,14 @@ import { recursivelyProcessTextSnippetJson } from "./processingFunctions"; +import {getIdForExerciseJson} from "./processingFunctions/processExerciseJson"; + import { generateSearchData } from "./generateSearchData"; +import {parseAndInsertToIndexTrie, parseAndInsertToIdToContentMap} from "./searchRewrite"; + let paragraph_count = 0; let heading_count = 0; let footnote_count = 0; @@ -120,6 +124,31 @@ const processLatex = (node, obj, inline) => { obj["body"] = math; obj["tag"] = "LATEX"; }; +let latest_exercise_json_id = undefined; +const tagsWithIds = { + "#document": () => "", + SUBSUBSECTION: () => subsubsection_count>0? `#sec${chapterIndex}.${subsubsection_count}` :"", + TEXT:() => "#p" + paragraph_count, + SUBHEADING: () => `#h${heading_count}`, + SUBSUBHEADING: () => `#h${heading_count}`, + SECTION: () => `#h${heading_count}`, + FOOTNOTE: () => `#footnote-link-${footnote_count}`, + DISPLAYFOOTNOTE: () => `#footnote-${display_footnote_count}`, + //SNIPPET: () => `${snippet_count}`, + + EXERCISE: () => latest_exercise_json_id, + DISPLAYFOOTNOTE: () => `#footnote-${display_footnote_count}`, +}; +const findParentID = (node) => { + let parent = node.parentNode; + while (parent) { + if(tagsWithIds[parent.nodeName]) { + return `${chapterIndex}` + tagsWithIds[parent.nodeName](); + } else { + parent = parent.parentNode; + } + } +} const processTextFunctions = { // Text tags: tag that is parsed as text @@ -132,6 +161,10 @@ const processTextFunctions = { } } }, + INDEX: (node, obj) => { + const id = findParentID(node); + parseAndInsertToIndexTrie(node, {id}); + }, AMP: (_node, obj) => { processText("&", obj); @@ -181,6 +214,7 @@ const processTextFunctions = { }, EXERCISE: (node, obj) => { + latest_exercise_json_id = getIdForExerciseJson(node); exercise_count += 1; processExerciseJson(node, obj, chapArrIndex, exercise_count); }, @@ -310,6 +344,8 @@ const processTextFunctions = { }, SNIPPET: (node, obj) => { + const indexNodes = node.getElementsByTagName("INDEX"); + if (node.getAttribute("HIDE") == "yes") { return; } else if (node.getAttribute("LATEX") == "yes") { @@ -349,6 +385,10 @@ const processTextFunctions = { obj["body"] = obj["body"].replace(matchStr, newStr); } } + + for (let i = 0; i < indexNodes.length; i++) { + processTextJson(indexNodes[i], {}); + } return; } @@ -358,6 +398,9 @@ const processTextFunctions = { obj["latex"] = false; obj["id"] = snippet_count; processSnippetJson(node, obj); + for (let i = 0; i < indexNodes.length; i++) { + processTextJson(indexNodes[i], {}); + } }, SUBINDEX: (node, obj) => { @@ -520,7 +563,8 @@ export const parseXmlJson = (doc, arr, filename) => { } else { displayTitle = chapterIndex + "\u00A0\u00A0" + chapterTitle; } - + + latest_exercise_json_id = undefined; paragraph_count = 0; footnote_count = 0; display_footnote_count = 0; @@ -571,6 +615,7 @@ export const parseXmlJson = (doc, arr, filename) => { recursiveProcessTextJson(name.nextSibling, arr, title); } + parseAndInsertToIdToContentMap(arr,chapterIndex); generateSearchData(doc, filename); }; diff --git a/javascript/processingFunctions/processExerciseJson.js b/javascript/processingFunctions/processExerciseJson.js index 42c96af75..29712842d 100755 --- a/javascript/processingFunctions/processExerciseJson.js +++ b/javascript/processingFunctions/processExerciseJson.js @@ -35,4 +35,23 @@ const processExerciseJson = (node, obj) => { } }; +export const getIdForExerciseJson = (node) => { + const label = node.getElementsByTagName("LABEL")[0]; + let labelName = ""; + + if (!label) { + labelName = "ex:unlabeled" + unlabeledEx; + } else { + labelName = label.getAttribute("NAME"); + } + + if (!referenceStore[labelName]) { + missingExerciseWarning(labelName); + return undefined; + } + + const displayName = referenceStore[labelName].displayName; + return `#ex-${displayName}`; +} + export default processExerciseJson; diff --git a/javascript/searchRewrite.js b/javascript/searchRewrite.js new file mode 100644 index 000000000..a14706db4 --- /dev/null +++ b/javascript/searchRewrite.js @@ -0,0 +1,295 @@ +import fs from "fs"; + +// line 3 to 68: trie implementation and search functions +class trieNode { + constructor() { + this.children = {}; + this.value = []; + this.key = ""; + } +} + +export function insert(keyStr, value, trie) { + const keys = [...keyStr]; + let node = trie; + for (let i = 0; i < keys.length; i++) { + if (!node.children[keys[i]]) { + node.children[keys[i]] = new trieNode(); + } + node = node.children[keys[i]]; + } + node.value.push(value); + node.key = keyStr; +} + +export function search(keyStr, trie) { + const keys = [...keyStr]; + let node = trie; + for (let i = 0; i < keys.length; i++) { + if(node === undefined || node.children === undefined) { + console.log("when searching, got undefined node or node.children"); + console.log("i is " + i); + return null; + } + + if (!node.children[keys[i]]) { + return null; + } + node = node.children[keys[i]]; + } + return node.value; +} + +export function autoComplete(incompleteKeys, trie, n = 30) { + let node = trie; + for (let i = 0; i < incompleteKeys.length; i++) { + if (!node.children[incompleteKeys[i]]) { + return []; + } + node = node.children[incompleteKeys[i]]; + } + const result = []; + const queue = [node]; + while (queue.length > 0 && result.length < n) { + const node = queue.shift(); + if (node.value.length > 0) { + result.push(node.key); + } + for (const child of Object.values(node.children)) { + queue.push(child); + } + } + return result; +} + +export const getUrl= searchResult => `https://sourceacademy.nus.edu.sg/sicpjs/${searchResult.id}`; + + +// search data, maintaining and updation functions and write function from this line onwards +export const idToContentMap = {}; +export const textTrie = new trieNode(); +export const indexTrie = new trieNode(); + + +const parseIndexSearchEntryTo = (node, json) => { + if (node === null) return; + if(indexParsers[node.nodeName]) { + indexParsers[node.nodeName](node, json); + return; + } +}; + +const indexParsers = { + "#text": (node,json) => { + json["text"] += node.nodeValue; + }, + + OPERATOR: (node,json) => { + json["text"] += "operators"; + }, + PARSING: (node,json) => { + json["text"] += "parsing JavaScript"; + }, + FUNCTION: (node,json) => { + json["text"] += "function (JavaScript)"; + }, + PRIMITIVE: (node,json) => { + json["text"] += "primitive functions (ECMAScript equivalent in parentheses if they are in the ECMAScript standard)"; + }, + ENDASH: (node,json) => { + json["text"] += "–"; + }, + APOS: (node,json) => { + json["text"] += "'"; + }, + EACUTE_LOWER : (node,json) => { + json["text"] += "é"; + }, + AACUTE_LOWER : (node,json) => { + json["text"] += "á"; + }, + AACUTE_UPPER : (node,json) => { + json["text"] += "Á"; + }, + SPACE: (node,json) => { + json["text"] += " "; + }, + // next and only child is text nodes + ECMA: (node,json) => { + if(node.firstChild.nodeName !== "#text") { + console.log("when parsing ECMA, got this unknown node name" + node.firstChild.nodeName); + return; + } + json["text"] += ` (${node.firstChild.nodeValue})`; + }, + JAVASCRIPTINLINE: (node,json) => { + if(node.firstChild.nodeName !== "#text") { + console.log("when parsing JAVASCRIPTINLINE, got this unknown node name" + node.firstChild.nodeName); + return; + } + json["text"] += node.firstChild.nodeValue; + }, + QUOTE: (node,json) => { + if(node.firstChild.nodeName !== "#text") { + console.log("when parsing QUOTE, got this unknown node name" + node.firstChild.nodeName); + return; + } + json["text"] += `"${node.firstChild.nodeValue}"`; + }, + USE: (node,json) => { + if(node.firstChild.nodeName !== "#text") { + console.log("when parsing USE, got this unknown node name" + node.firstChild.nodeName); + return; + } + json["text"] += node.firstChild.nodeValue; + json["ORDER"] = node.firstChild.nodeValue; + }, + DECLARATION: (node,json) => { + if(node.firstChild.nodeName !== "#text") { + console.log("when parsing USE, got this unknown node name" + node.firstChild.nodeName); + return; + } + json["text"] += node.firstChild.nodeValue; + json["ORDER"] = node.firstChild.nodeValue; + }, + ORDER: (node,json) => { + if(node.firstChild.nodeName !== "#text") { + console.log("when parsing ORDER, got this unknown node name" + node.firstChild.nodeName); + return; + } + json["ORDER"] = node.firstChild.nodeValue; + }, + // other nodes + CLOSE: (node,json) => { + json["CLOSE"] = true; + }, + OPEN: (node,json) => { + json["OPEN"] = true; + }, + SUBINDEX: (node, json) => { + const newJson = {"text":""}; + json["SUBINDEX"] = newJson; + for (let i = 0; i < node.childNodes.length; i++) { + const child = node.childNodes[i]; + parseIndexSearchEntryTo(child, newJson); + } + }, + SPLITINLINE: (node,json) => { + const javascriptNode = node.getElementsByTagName("JAVASCRIPT")[0]; + if(!javascriptNode) { + console.log("when parsing SPLITINLINE, got no JAVASCRIPT node"); + return; + } + for (let i = 0; i < node.childNodes.length; i++) { + const child = node.childNodes[i]; + parseIndexSearchEntryTo(child, json); + } + }, + INDEX: (node, json) => { + json["text"] = ""; + for (let i = 0; i < node.childNodes.length; i++) { + const child = node.childNodes[i]; + parseIndexSearchEntryTo(child, json); + } + if(node.getElementsByTagName("PRIMITIVE")[0]) { + json.SUBINDEX.text = json.SUBINDEX.text.replace(" (\\textit{ns})", ""); + } + }, + + //todo + LATEXINLINE: (node,json) => { + json["text"] += `LATEX: ${node.firstChild.nodeValue}`; + }, +}; + +const maintainOpenClose = (json, writeTo) => { + if(json.OPEN) { + writeTo["text"] += " (begin of range)"; + if(!writeTo["id"].includes("#")) { + writeTo["id"] += "#begin"; + } + } + + if(json.CLOSE) { + writeTo["text"] += " (end of range)"; + if(!writeTo["id"].includes("#")) { + writeTo["id"] += "#end"; + } + } +} + +export const parseAndInsertToIndexTrie = (node, json) => { + parseIndexSearchEntryTo(node, json); + const frontEndDisplayable = {text:"", order:"", id:"", hasSubindex: false}; + frontEndDisplayable["id"] = json.id; + // build text for front end display, build prefix, main text and subindex text seperately + let chapterId = json.id.split("#")[0]; + const num = chapterId.split(".").length; + if(num === 1) { + chapterId = " " + chapterId; + } else if (num === 2) { + chapterId = " " + chapterId; + } + frontEndDisplayable["text"] = chapterId + ": " + json.text; + if(json.SUBINDEX) { + frontEndDisplayable["hasSubindex"] = true; + frontEndDisplayable["text"] += ` :: ${json.SUBINDEX.text}`; + if(json.SUBINDEX.ORDER) { + frontEndDisplayable["order"] = json.SUBINDEX.ORDER; + } else { + frontEndDisplayable["order"] = json.SUBINDEX.text; + } + + maintainOpenClose(json.SUBINDEX, frontEndDisplayable); + } + maintainOpenClose(json, frontEndDisplayable); + insert(json.text, frontEndDisplayable, indexTrie); +} + +export const parseAndInsertToIdToContentMap = (json,chapterIndex, parentID =chapterIndex) => { + if(Array.isArray(json)) { + for (let i = 0; i < json.length; i++) { + parseAndInsertToIdToContentMap(json[i],chapterIndex,parentID); + } + return; + } + + if(json.id && json.tag !== "SNIPPET") { + const id = json.id.includes(chapterIndex)? chapterIndex: chapterIndex + json.id; + parentID = id; + idToContentMap[id] = ""; + } + if(json.body) { + idToContentMap[parentID] += json.body; + } + if(json.child) { + parseAndInsertToIdToContentMap(json.child,chapterIndex,parentID); + } +} + +const buildTextTrie = () => { + console.log("enter buildTextTrie") + for (const [key, value] of Object.entries(idToContentMap)) { + const temp = value.match(/\b\w+\b/g); + if(temp === null) { + // some json node does not have text content, there id is stored, but no text value, so we skip them here + continue; + } + const words = Array.from(new Set(temp.map(word => word.toLowerCase()))); + words.map(word => insert(word, key, textTrie)); + + } + } + +export const writeRewritedSearchData = () => { + buildTextTrie(); + + const searchData = {indexTrie, textTrie, idToContentMap}; + fs.writeFile("json/rewritedSearchData.json", JSON.stringify(searchData), (err) => { + if (err) { + console.log(err); + } + }); +} + + diff --git a/javascript/searchRewriteTest.js b/javascript/searchRewriteTest.js new file mode 100644 index 000000000..e2a23ffd8 --- /dev/null +++ b/javascript/searchRewriteTest.js @@ -0,0 +1,273 @@ +// do not know why, I can not run yarn test on my device which leads to binding error. +// this file is meant to be temporary + + +/* todos and issues: + propably need to modify the frontend to enable link to snippets +works fine for things like <= (numeric comparison operator), but not for || (logical disjunction); could not test for " (double quote) +did not process the "seexml" file, so no see also in index +did not process the latex, roman, italic, etc. +*/ + +/* +abs, 14 +absolute value, 13 +abstract data, 72, see also data abstraction abstraction, see also data abstraction; +higher-order functions; means of +abstraction +common pattern and, 50 +functional, 22 +metalinguistic, 318 +in register-machine design, 456–457 of search in nondeterministic +programming, 378 +abstraction barriers, 71, 76–78, 147 +in complex-number system, 148 +in generic arithmetic system, 164 +in query language, 437 +in representing JavaScript syntax, 329 +abstract models for data, 78 n abstract syntax +in metacircular evaluator, 322 +in query interpreter, 425 accelerated_sequence, 297 accumulate, 53 (ex. 1.32), 100 +same as fold_right, 105 (ex. 2.38) accumulate_n, 104 (ex. 2.36) accumulator, 100, 196 (ex. 3.1) Ackermann’s function, 31 (ex. 1.10) acquire a mutex, 276 +actions, in register machine, 454–455 +actual_value, 364 +Ada, 411 (ex. 4.61) +Adams, Norman I., IV, 356 n add (generic), 165 +used for polynomial coefficients, 179, 180 +add_action, 244, 247 +add_complex, 150 add_complex_to_javascript_num, 169 addend, 128 +adder (primitive constraint), 256 adder +full, 243 +half, 242 +ripple-carry, 245 (ex. 3.30) +add_interval, 81 +additivity, 72, 147, 156–162, 166 add_lists, 371 +add_poly, 178 +add_rat, 73 +address, 488 +address arithmetic, 488 add_rule_or_assertion, 434 add_streams, 290 +add_terms, 179 add_to_agenda, 248, 251 add_vect, 118 (ex. 2.46) adjoin_arg, 505 n +adjoining to a list with pair, 88 adjoin_set, 131 +binary-tree representation, 136 ordered-list representation, 135 (ex. +2.61) +unordered-list representation, 132 for weighted sets, 145 +adjoin_term, 179, 182 +Adleman, Leonard, 46 n +administrative assistant, importance of, +403 advance_pc, 479 +after_delay, 244, 248 +agenda, see digital-circuit simulation A’h-mose, 40 n +algebra, symbolic, see symbolic algebra algebraic expression, 176 +differentiating, 126–131 representing, 128–131 simplifying, 129–130 +algebraic specification for data, 79 n +Algol +block structure, 26 +call-by-name argument passing, 286 n, +363 n +thunks, 286 n, 363 n +algorithm +optimal, 104 n probabilistic, 45–46, 188 n +aliasing, 204 n +Al-Karaji, 36 n +Allen, John, 494 n all_regs (compiler), 542 n alternative +of conditional expression, 14 +of conditional statement, 57 always_true, 428 +amb, 374 +amb evaluator, see nondeterministic +evaluator ambeval, 388 +analog computer, 306 (fig. 3.34) +analyze +metacircular, 356 +nondeterministic, 387 +analyze_... +metacircular, 356–359, 360 (ex. 4.21) +nondeterministic, 389–392 analyze_amb, 394 +analyzing evaluator, 355–360 +as basis for nondeterministic evaluator, 386 +and (query language), 405 +evaluation of, 413, 426, 446 (ex. 4.73) +and-gate, 241 and_gate, 245 +an_element_of, 375 angle +data-directed, 160 +polar representation, 152 rectangular representation, 151 with tagged data, 154 +angle_polar, 154 angle_rectangular, 153 an_integer_starting_from, 375 Appel, Andrew W., 541 n +append, 88, 225 (ex. 3.12) +as accumulation, 103 (ex. 2.33) append_mutator vs., 225 (ex. 3.12) as register machine, 492 (ex. 5.21) “what is” (rules) vs. “how to” +(function), 399–400 +append_instruction_sequences, 524, 544 +append_mutator, 225 (ex. 3.12) +as register machine, 492 (ex. 5.21) +append_to_form (rules), 410 applicative-order evaluation, 13 +in JavaScript, 13 +normal order vs., 17 (ex. 1.5), 43 (ex. +1.20), 361–362 apply (lazy), 364 +apply (metacircular), 324 tail recursion and, 324 n +apply (primitive method), 346 n apply_a_rule, 430 apply_dispatch, 507 +modified for compiled code, 558 apply_generic, 160 +with coercion, 171, 174 (ex. 2.81) +with coercion by raising, 175 (ex. 2.84) with coercion of multiple arguments, +175 (ex. 2.82) +with coercion to simplify, 176 (ex. +2.85) +with message passing, 163 with tower of types, 173 +apply_in_underlying_javascript, 159 n, 346 n +apply_primitive_function, 324, 340, 346 +apply_rules, 430 arbiter, 278 n arctangent, 151 n arg_expressions, 331 argl register, 500 argument(s), 9 +arbitrary number of, 276 +delayed, 306 +argument passing, see call-by-name +argument passing; call-by-need +argument passing Aristotle’s De caelo (Buridan’s +commentary on), 278 n arithmetic +address arithmetic, 488 generic, 163, see also generic +arithmetic operations +on complex numbers, 148 +on intervals, 81–84 +on polynomials, see polynomial +arithmetic +on power series, 294 (ex. 3.60), 295 +(ex. 3.62) +on rational numbers, 72–76 +operators for, 4 +array, see vector (data structure) arrow function, see lambda expression articles, 381 +ASCII code, 140 +assemble, 474, 475 n +assembler, 470, 474–477 +assert (query interpreter), 419 assertion, 401 +implicit, 407 assertion_body, 443 +assign (in register machine), 453 +instruction constructor, 478 simulating, 478 +storing label in register, 459 +assignment, 190–206 +assignment expression, 192 assignment operation, 190 +benefits of, 197–200 +bugs associated with, 204 n, 205 constant/variable declaration vs., 192 n costs of, 200–206 +equality test vs., 192 n +evaluation of, 210 +parsing of, 333 +value of, 192 n +assignment_symbol, 333 assignment_value_expression, 333 assign_reg_name, 478 assign_symbol_value, 341, 342 assign_value_exp, 478 +assoc, 236 associativity +of conditional expression, 14 +of operators, 5 +atomic operations supported in hardware, +278 n +atomic requirement for test_and_set, 277 attach_tag, 152 +using JavaScript data types, 168 (ex. 2.78) +augend, 128 +automagically, 376 +automatic search, 373, see also search +history of, 376 n +automatic storage allocation, 487 average, 19 +average_damp, 63 +average damping, 61 +averager (constraint), 261 (ex. 3.33) +*/ + +// I include 2 out of the 5 columns of all the index with A here. + +// manually tested the exercise urls to accumulate and accumulate_n, they are correct +import { indexTrie, search, getUrl, autoComplete } from "./searchRewrite" +import fs from "fs"; + +const indexSearchTestCase = { + "abs": 1, + "absolute value": 1, + "abstract data": 1, + "abstraction": 6, + "abstraction barriers": 8, + "abstract models for data": 1, + "abstract syntax": 2, + "accelerated_sequence": 1, + "accumulate": 3, + "accumulate_n": 1, + "accumulator": 2, + "Ackermann's function": 1, + "acquire a mutex": 1, + "actions, in register machine": 2, + "actual_value": 1, + "Ada": 1, + "Adams, Norman I., IV": 1, + "add (generic)": 1, + "add_action": 2, + "add_complex": 1, + "add_complex_to_javascript_num": 1, + "addend": 1, + "adder (primitive constraint)": 1, + "adder": 3, + "add_interval": 1, + "additivity": 5, + "add_lists": 1, + "add_poly": 1, + "add_rat": 1, + "address": 1, + "address arithmetic": 1, + "add_rule_or_assertion": 1, + "add_streams": 1, + "add_terms": 1, + "add_to_agenda": 2, + "add_vect": 1, + "adjoin_arg": 1, + "adjoining to a list with pair": 1, + "adjoin_set": 5, + "adjoin_term": 2, + "Adleman, Leonard": 1, + "administrative assistant, importance of": 1, + "advance_pc": 1, + "after_delay": 2, + "A'h-mose": 1, + "algebraic expression": 7, + "algebraic specification for data": 1, +} + +const failedTests = []; +const urls = {}; +const writeFailureMessage = (key, searchResult) => { + failedTests.push(`${key}: result is ${searchResult}, expected occuer number is: ${indexSearchTestCase[key]}`); +} + +export async function testIndexSearch() { + for (const [key, value] of Object.entries(indexSearchTestCase)) { + const result = search(key, indexTrie); + //console.log(result); + if (result === null) { + writeFailureMessage(key, "null"); + continue; + } + + urls[key] = result.map(getUrl); + + if (result.length < value) { + writeFailureMessage(key, result.length); + continue; + } + } + + console.log(autoComplete("||", indexTrie)); + console.log(search("|| (logical disjunction)", indexTrie)); + + async function testURLs() { + console.log("Testing urls"); + for (const [key, urlArray] of Object.entries(urls)) { + for (const url of urlArray) { + try { + const response = await fetch(url); + + if (!response.ok) { + console.log(key + ": " + url + " is not working"); + } + + } catch (error) { + console.error(key + ": " + url + " is not working"); + } + } + } + console.log("Done testing urls"); + } + + await testURLs(); + + fs.writeFileSync("failedTests.txt", failedTests.join("\n")); + fs.writeFileSync("urls.txt", JSON.stringify(urls)); +} +