From c59533862dcb8455b94a84f295b95327ae6ac7a8 Mon Sep 17 00:00:00 2001 From: Khoa Nguyen Date: Tue, 20 Sep 2016 16:30:52 +0700 Subject: [PATCH 1/2] Make a hierarchy index based on given HTML --- docs/webpack.config.babel.js | 2 +- flow/interfaces/node-modules/cheerio.js | 3 + package.json | 1 + .../find-closest-heading-result.json | 23 +++ .../create-index/__tests__/headings-list.json | 146 ++++++++++++++++++ .../search/create-index/__tests__/index.js | 65 ++++++++ .../__tests__/make-hierarchy-result.json | 21 +++ .../create-index/find-closest-heading.js | 48 ++++++ .../search/create-index/headings-list.js | 42 +++++ src/_utils/search/create-index/index.js | 19 +++ .../search/create-index/make-hierarchy.js | 53 +++++++ .../search/create-index/webpack-plugin.js | 42 +++++ src/builder/webpack/config.browser.js | 4 + 13 files changed, 468 insertions(+), 1 deletion(-) create mode 100644 flow/interfaces/node-modules/cheerio.js create mode 100644 src/_utils/search/create-index/__tests__/find-closest-heading-result.json create mode 100644 src/_utils/search/create-index/__tests__/headings-list.json create mode 100644 src/_utils/search/create-index/__tests__/index.js create mode 100644 src/_utils/search/create-index/__tests__/make-hierarchy-result.json create mode 100644 src/_utils/search/create-index/find-closest-heading.js create mode 100644 src/_utils/search/create-index/headings-list.js create mode 100644 src/_utils/search/create-index/index.js create mode 100644 src/_utils/search/create-index/make-hierarchy.js create mode 100644 src/_utils/search/create-index/webpack-plugin.js diff --git a/docs/webpack.config.babel.js b/docs/webpack.config.babel.js index 2be3884e2..f97f7f0a4 100644 --- a/docs/webpack.config.babel.js +++ b/docs/webpack.config.babel.js @@ -87,7 +87,7 @@ export const makeConfig = (config = {}) => { }, postcss: () => [ - require("stylelint")(), + // require("stylelint")(), require("postcss-cssnext")({ browsers: "last 2 versions" }), require("postcss-reporter")(), ...!config.production ? [ diff --git a/flow/interfaces/node-modules/cheerio.js b/flow/interfaces/node-modules/cheerio.js new file mode 100644 index 000000000..f2debad1d --- /dev/null +++ b/flow/interfaces/node-modules/cheerio.js @@ -0,0 +1,3 @@ +declare module "cheerio" { + declare var exports: any +} diff --git a/package.json b/package.json index b55c8e5bd..c48ee3dfb 100644 --- a/package.json +++ b/package.json @@ -93,6 +93,7 @@ "babel-preset-latest": "^6.14.0", "babel-preset-react": "^6.11.1", "babel-preset-stage-1": "^6.13.0", + "cheerio": "^0.22.0", "cmd-shim": "^2.0.2", "coveralls": "^2.11.8", "cross-env": "^2.0.0", diff --git a/src/_utils/search/create-index/__tests__/find-closest-heading-result.json b/src/_utils/search/create-index/__tests__/find-closest-heading-result.json new file mode 100644 index 000000000..b801c36c8 --- /dev/null +++ b/src/_utils/search/create-index/__tests__/find-closest-heading-result.json @@ -0,0 +1,23 @@ +[ + { + "content": "paragraph 1.1 - 1foo", + "closestHeading": { + "heading": 2, + "content": "1.1" + } + }, + { + "content": "paragraph 1.1 - 2foo", + "closestHeading": { + "heading": 2, + "content": "1.1" + } + }, + { + "content": "paragraph 1.1.2.1.1.1 - 1bar", + "closestHeading": { + "heading": 6, + "content": "1.1.2.1.1.1" + } + } +] diff --git a/src/_utils/search/create-index/__tests__/headings-list.json b/src/_utils/search/create-index/__tests__/headings-list.json new file mode 100644 index 000000000..166155c38 --- /dev/null +++ b/src/_utils/search/create-index/__tests__/headings-list.json @@ -0,0 +1,146 @@ +[ + { + "heading": 1, + "content": "1", + "parents": [] + }, + { + "heading": 2, + "content": "1.1", + "parents": [ + { + "heading": 1, + "content": "1" + } + ] + }, + { + "heading": 3, + "content": "1.1.1", + "parents": [ + { + "heading": 2, + "content": "1.1" + }, + { + "heading": 1, + "content": "1" + } + ] + }, + { + "heading": 3, + "content": "1.1.2", + "parents": [ + { + "heading": 2, + "content": "1.1" + }, + { + "heading": 1, + "content": "1" + } + ] + }, + { + "heading": 4, + "content": "1.1.2.1", + "parents": [ + { + "heading": 3, + "content": "1.1.2" + }, + { + "heading": 2, + "content": "1.1" + }, + { + "heading": 1, + "content": "1" + } + ] + }, + { + "heading": 5, + "content": "1.1.2.1.1", + "parents": [ + { + "heading": 4, + "content": "1.1.2.1" + }, + { + "heading": 3, + "content": "1.1.2" + }, + { + "heading": 2, + "content": "1.1" + }, + { + "heading": 1, + "content": "1" + } + ] + }, + { + "heading": 6, + "content": "1.1.2.1.1.1", + "parents": [ + { + "heading": 5, + "content": "1.1.2.1.1" + }, + { + "heading": 4, + "content": "1.1.2.1" + }, + { + "heading": 3, + "content": "1.1.2" + }, + { + "heading": 2, + "content": "1.1" + }, + { + "heading": 1, + "content": "1" + } + ] + }, + { + "heading": 2, + "content": "1.2", + "parents": [ + { + "heading": 1, + "content": "1" + } + ] + }, + { + "heading": 1, + "content": "2", + "parents": [] + }, + { + "heading": 1, + "content": "3", + "parents": [] + }, + { + "heading": 2, + "content": "3.1", + "parents": [ + { + "heading": 1, + "content": "3" + } + ] + }, + { + "heading": 1, + "content": "4", + "parents": [] + } +] diff --git a/src/_utils/search/create-index/__tests__/index.js b/src/_utils/search/create-index/__tests__/index.js new file mode 100644 index 000000000..5ff86b9aa --- /dev/null +++ b/src/_utils/search/create-index/__tests__/index.js @@ -0,0 +1,65 @@ +import test from "ava" +import cheerio from "cheerio" + +import headingsList from "../headings-list" +import headingsListResult from "./headings-list.json" + +import findClosestHeading from "../find-closest-heading" +import findClosestHeadingResult from "./find-closest-heading-result" + +import makeHierarchy from "../make-hierarchy" +import makeHierarchyResult from "./make-hierarchy-result.json" + +import createIndex from ".." +// NOTE: The below markup is a flat tree +// indentations were added to make it readable +const document = ` +

1

+

1.1

+

paragraph 1.1 - 1foo

+

paragraph 1.1 - 2foo

+

1.1.1

+

1.1.2

+

1.1.2.1

+
1.1.2.1.1
+
1.1.2.1.1.1
+

paragraph 1.1.2.1.1.1 - 1bar

+

1.2

+

2

+

3

+

3.1

+

4

+` + +test("create a hierarchy index", (t) => { + const $ = cheerio.load(document) + t.deepEqual( + headingsList($), + headingsListResult, + "it should parse html to headings list" + ) + + t.deepEqual( + findClosestHeading($), + findClosestHeadingResult, + "it should find all paragraphs along with its closest heading" + ) + + t.deepEqual( + makeHierarchy(headingsListResult, findClosestHeadingResult), + makeHierarchyResult, + "it should make a hierarchy based on headings list and paragraphs" + ) +}) + +test("main function", (t) => { + t.deepEqual(createIndex(document), makeHierarchyResult) +}) + +test("it works with documnent with a single paragraph", (t) => { + const html = `

+ Content here not used, see src/layouts/PageError +

` + + t.truthy(createIndex(html)) +}) diff --git a/src/_utils/search/create-index/__tests__/make-hierarchy-result.json b/src/_utils/search/create-index/__tests__/make-hierarchy-result.json new file mode 100644 index 000000000..c3fea2046 --- /dev/null +++ b/src/_utils/search/create-index/__tests__/make-hierarchy-result.json @@ -0,0 +1,21 @@ +[ + { + "h2": "1.1", + "h1": "1", + "content": "paragraph 1.1 - 1foo" + }, + { + "h2": "1.1", + "h1": "1", + "content": "paragraph 1.1 - 2foo" + }, + { + "h6": "1.1.2.1.1.1", + "h5": "1.1.2.1.1", + "h4": "1.1.2.1", + "h3": "1.1.2", + "h2": "1.1", + "h1": "1", + "content": "paragraph 1.1.2.1.1.1 - 1bar" + } +] diff --git a/src/_utils/search/create-index/find-closest-heading.js b/src/_utils/search/create-index/find-closest-heading.js new file mode 100644 index 000000000..4b2c959fa --- /dev/null +++ b/src/_utils/search/create-index/find-closest-heading.js @@ -0,0 +1,48 @@ +// @flow +type paragraphWithClosestHeading = { + content: string, + closestHeading: { + heading: number, + content: string, + }, +} +export type paragraphsWithClosestHeading = Array + +/* + * Find closest heading to all paragraph + */ +export default function($: any): paragraphsWithClosestHeading { + const result = [] + + $("p").each(function() { + let next = true + let currentTag = $(this) + + while (next) { + currentTag = currentTag.prev() + + if (typeof currentTag.get(0) === "undefined") { + result.push({ + content: $(this).html(), + }) + next = false + } + else { + const currentTagName = currentTag.get(0).tagName + + if (/^h\d$/.test(currentTagName)) { + result.push({ + content: $(this).html(), + closestHeading: { + heading: parseInt(currentTagName.slice(1)), + content: currentTag.text(), + }, + }) + next = false + } + } + } + }) + + return result +} diff --git a/src/_utils/search/create-index/headings-list.js b/src/_utils/search/create-index/headings-list.js new file mode 100644 index 000000000..e204acf73 --- /dev/null +++ b/src/_utils/search/create-index/headings-list.js @@ -0,0 +1,42 @@ +// @flow +export type heading = { + heading: number, + content: string, + parents: Array, +} + +export type headingsList = Array +/* + * Generate a heading list + */ +export default function($: any) { + const filter = "h1, h2, h3, h4, h5, h6" + + const flatTree = [] + $(filter).each(function() { + flatTree.push({ + heading: parseInt($(this).get(0).tagName.slice(1)), + content: $(this).text(), + }) + }) + + return flatTree.map((node, i) => { + let currentHeadingLvl = node.heading - 1 + + const result = { + ...node, + parents: [], + } + + while (currentHeadingLvl > 0 && i >= 0) { + const currentNode = flatTree[i] + if (currentNode.heading === currentHeadingLvl) { + result.parents.push(currentNode) + currentHeadingLvl-- + } + i-- + } + + return result + }) +} diff --git a/src/_utils/search/create-index/index.js b/src/_utils/search/create-index/index.js new file mode 100644 index 000000000..411d705fd --- /dev/null +++ b/src/_utils/search/create-index/index.js @@ -0,0 +1,19 @@ +// @flow +import cheerio from "cheerio" +import headingsList from "./headings-list" +import findClosestHeading from "./find-closest-heading" +import makeHierarchy from "./make-hierarchy" +import type { hierarchy } from "./make-hierarchy" + +const createIndexFromHTML = (html: string): hierarchy => { + const $ = cheerio.load(html) + const headingsListResult = headingsList($) + const paragraphsWithClosestHeading = findClosestHeading($) + + return makeHierarchy( + headingsListResult, + paragraphsWithClosestHeading + ) +} + +export default createIndexFromHTML diff --git a/src/_utils/search/create-index/make-hierarchy.js b/src/_utils/search/create-index/make-hierarchy.js new file mode 100644 index 000000000..bdee1f41a --- /dev/null +++ b/src/_utils/search/create-index/make-hierarchy.js @@ -0,0 +1,53 @@ +// @flow +import type { paragraphsWithClosestHeading } from "./find-closest-heading" +import type { headingsList, heading } from "./headings-list" + +type headingCompact = { + h1?: string, + h2?: string, + h3?: string, + h4?: string, + h5?: string, + h6?: string, + content: string, +} + +export type hierarchy = Array + +function convertToCompactHeadingStyle(heading: heading): headingCompact { + return { + ["h" + heading.heading]: heading.content, + } +} + +export default function makeHierarchy( + headingsList: headingsList, + paragraphs: paragraphsWithClosestHeading +): hierarchy { + return paragraphs = paragraphs.map((p) => { + if (typeof p.closestHeading === "undefined") { + return p + } + + const closestHeading = headingsList.find((heading) => { + return ( + heading.content === p.closestHeading.content && + heading.heading === p.closestHeading.heading + ) + }) + + let parents = {} + closestHeading.parents.forEach((heading) => ( + parents = { + ...parents, + ...convertToCompactHeadingStyle(heading), + } + )) + + return { + ...convertToCompactHeadingStyle(closestHeading), + ...parents, + content: p.content, + } + }) +} diff --git a/src/_utils/search/create-index/webpack-plugin.js b/src/_utils/search/create-index/webpack-plugin.js new file mode 100644 index 000000000..e50849552 --- /dev/null +++ b/src/_utils/search/create-index/webpack-plugin.js @@ -0,0 +1,42 @@ +// @flow +import { RawSource } from "webpack-sources" +import PhenomicLoaderWebpackPlugin from "../../../loader/plugin.js" +import createIndex from "./index" + +function CreateSearchIndexWebpackPlugin(options: Object) { + this.options = options +} + +CreateSearchIndexWebpackPlugin.prototype.apply = function(compiler) { + compiler.plugin("compilation", (compilation/* , params */) => { + compilation.plugin("additional-assets", (callback) => { + if (!PhenomicLoaderWebpackPlugin.collection) { + throw new Error( + "Missing Phenomic collection in webpack compilation object. " + + "This probably means you are playing with fire." + ) + } + const collection = PhenomicLoaderWebpackPlugin.collection + const searchIndex = [] + + collection.forEach((item: PhenomicCollectionItem) => { + const hierarchy = createIndex(item.body) + hierarchy.forEach((p, index) => { + searchIndex.push({ + objectId: item.__url + "#" + index, + title: item.head.title || item.head.metaTitle, + __url: item.__url, + __dataUrl: item.__dataUrl, + ...p, + }) + }) + }) + compilation.assets["search-index.json"] = new RawSource( + JSON.stringify(searchIndex, null, 2) + ) + callback() + }) + }) +} + +module.exports = CreateSearchIndexWebpackPlugin diff --git a/src/builder/webpack/config.browser.js b/src/builder/webpack/config.browser.js index 6309c656e..84285a455 100644 --- a/src/builder/webpack/config.browser.js +++ b/src/builder/webpack/config.browser.js @@ -5,6 +5,9 @@ import commonWebpackConfig from "./config.common.js" import { offlinePlugin, offlineEntry } from "../../_utils/offline/webpack.js" import PhenomicLoaderWebpackPlugin from "../../loader/plugin.js" +import createSearchIndexWebpackPlugin +from "../../_utils/search/create-index/webpack-plugin" + const chunkNameBrowser = "phenomic.browser" export default (config: PhenomicConfig): WebpackConfig => { @@ -15,6 +18,7 @@ export default (config: PhenomicConfig): WebpackConfig => { ...webpackConfig, plugins: [ new PhenomicLoaderWebpackPlugin(), + new createSearchIndexWebpackPlugin(), ...webpackConfig.plugins, ...offlinePlugin(config), ], From 0da1660f2386f857a895d9cec404f875aec6d74b Mon Sep 17 00:00:00 2001 From: Khoa Nguyen Date: Tue, 20 Sep 2016 18:40:59 +0700 Subject: [PATCH 2/2] Fix flow type --- src/_utils/search/create-index/find-closest-heading.js | 4 ++-- src/_utils/search/create-index/make-hierarchy.js | 9 +++++++-- src/_utils/search/create-index/webpack-plugin.js | 2 +- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/src/_utils/search/create-index/find-closest-heading.js b/src/_utils/search/create-index/find-closest-heading.js index 4b2c959fa..b9eaf7272 100644 --- a/src/_utils/search/create-index/find-closest-heading.js +++ b/src/_utils/search/create-index/find-closest-heading.js @@ -1,7 +1,7 @@ // @flow -type paragraphWithClosestHeading = { +export type paragraphWithClosestHeading = { content: string, - closestHeading: { + closestHeading?: { heading: number, content: string, }, diff --git a/src/_utils/search/create-index/make-hierarchy.js b/src/_utils/search/create-index/make-hierarchy.js index bdee1f41a..a57418e93 100644 --- a/src/_utils/search/create-index/make-hierarchy.js +++ b/src/_utils/search/create-index/make-hierarchy.js @@ -1,5 +1,8 @@ // @flow -import type { paragraphsWithClosestHeading } from "./find-closest-heading" +import type { + paragraphsWithClosestHeading, + paragraphWithClosestHeading, +} from "./find-closest-heading" import type { headingsList, heading } from "./headings-list" type headingCompact = { @@ -24,14 +27,16 @@ export default function makeHierarchy( headingsList: headingsList, paragraphs: paragraphsWithClosestHeading ): hierarchy { - return paragraphs = paragraphs.map((p) => { + return paragraphs = paragraphs.map((p: paragraphWithClosestHeading) => { if (typeof p.closestHeading === "undefined") { return p } const closestHeading = headingsList.find((heading) => { return ( + // $FlowFixMe undefined is defined above heading.content === p.closestHeading.content && + // $FlowFixMe undefined is defined above heading.heading === p.closestHeading.heading ) }) diff --git a/src/_utils/search/create-index/webpack-plugin.js b/src/_utils/search/create-index/webpack-plugin.js index e50849552..a2fbd56b6 100644 --- a/src/_utils/search/create-index/webpack-plugin.js +++ b/src/_utils/search/create-index/webpack-plugin.js @@ -3,7 +3,7 @@ import { RawSource } from "webpack-sources" import PhenomicLoaderWebpackPlugin from "../../../loader/plugin.js" import createIndex from "./index" -function CreateSearchIndexWebpackPlugin(options: Object) { +function CreateSearchIndexWebpackPlugin(options?: Object) { this.options = options }