Skip to content

Commit

Permalink
Use markdown parser based on commonmark-java (#668)
Browse files Browse the repository at this point in the history
Upgrades `nextjournal/markdown` library to `{:mvn/version 0.6.157}` switching markdown parsing from `markdown-it` via GraalJS to `commonmark-java` (roughly 10 times faster). 

Makes Clerk is compatible with Java 22 or later (#642).
  • Loading branch information
zampino authored Sep 30, 2024
1 parent 47d0e27 commit 0dbd116
Show file tree
Hide file tree
Showing 4 changed files with 21 additions and 26 deletions.
6 changes: 4 additions & 2 deletions deps.edn
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
{:paths ["src" "resources" "bb"]
:mvn/repos {"jitpack.io" {:url "https://jitpack.io"}}
:deps {org.clojure/clojure {:mvn/version "1.10.3"}
org.clojure/java.classpath {:mvn/version "1.0.0"}
org.clojure/tools.analyzer {:mvn/version "1.1.0"}
Expand All @@ -8,8 +9,9 @@
weavejester/dependency {:mvn/version "0.2.1"}
com.nextjournal/beholder {:mvn/version "1.0.2"}
org.flatland/ordered {:mvn/version "1.15.12"}

io.github.nextjournal/markdown {:mvn/version "0.5.148"}
io.github.nextjournal/markdown {:mvn/version "0.6.157"
;; only used in cljs
:exclusions [applied-science/js-interop]}
babashka/process {:mvn/version "0.4.16"}
io.github.nextjournal/dejavu {:git/sha "4980e0cc18c9b09fb220874ace94ba6b57a749ca"}
io.github.babashka/sci.nrepl {:mvn/version "0.0.2"}
Expand Down
1 change: 1 addition & 0 deletions notebooks/markdown.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ This notebook demoes feeding Clerk with markdown files. We currently make no ass
Nextjournal Markdown library is able to ingest a markdown string

```clojure
^{::clerk/visibility {:code :fold :result :hide}}
(def markdown-syntax
"Markdown: Syntax
================
Expand Down
25 changes: 4 additions & 21 deletions src/nextjournal/clerk/parser.cljc
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
[clojure.string :as str]
[clojure.zip]
[nextjournal.markdown :as markdown]
[nextjournal.markdown.parser :as markdown.parser]
[nextjournal.markdown.transform :as markdown.transform]
[rewrite-clj.node :as n]
[rewrite-clj.parser :as p]
Expand Down Expand Up @@ -291,33 +290,17 @@
#_(text-with-clerk-metadata-removed "(def ^::clerk/no-cache random-thing (rand-int 1000))" {'clerk 'nextjournal.clerk})
#_(text-with-clerk-metadata-removed "^::clerk/bar [] ;; keep me" {'clerk 'nextjournal.clerk})

(defn markdown-context []
(update markdown.parser/empty-doc
:text-tokenizers (partial map markdown.parser/normalize-tokenizer)))

#_(markdown-context)

(defn parse-markdown
"Like `n.markdown.parser/parse` but allows to reuse the same context in successive calls"
[ctx md]
(markdown.parser/apply-tokens ctx (markdown/tokenize md)))

(defn update-markdown-blocks [{:as state :keys [md-context]} md]
(let [{::markdown.parser/keys [path]} md-context
doc (parse-markdown md-context md)
[_ index] path]
(let [doc (markdown/parse* (assoc md-context :content []) md)]
(-> state
(assoc :md-context doc)
(update :blocks conj {:type :markdown
:doc (-> doc
(select-keys [:type :content :footnotes])
;; take only new nodes, keep context intact
(update :content subvec (inc index)))}))))
:doc (select-keys doc [:type :content :footnotes])}))))

(defn parse-clojure-string
([s] (parse-clojure-string {} s))
([{:as opts :keys [doc?]} s]
(let [doc (parse-clojure-string opts {:blocks [] :md-context (markdown-context)} s)]
(let [doc (parse-clojure-string opts {:blocks [] :md-context markdown/empty-doc} s)]
(select-keys (cond-> doc doc? (merge (:md-context doc)))
[:blocks :title :toc :footnotes])))
([{:as _opts :keys [doc?]} initial-state s]
Expand Down Expand Up @@ -382,7 +365,7 @@
(update doc :blocks #(filterv (some-fn :form (complement code?)) %)))

(defn parse-markdown-string [{:as opts :keys [doc?]} s]
(let [{:as ctx :keys [content]} (parse-markdown (markdown-context) s)]
(let [{:as ctx :keys [content]} (markdown/parse* markdown/empty-doc s)]
(loop [{:as state :keys [nodes] ::keys [md-slice]} {:blocks [] ::md-slice [] :nodes content :md-context ctx}]
(if-some [node (first nodes)]
(recur
Expand Down
15 changes: 12 additions & 3 deletions src/nextjournal/clerk/viewer.cljc
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
[applied-science.js-interop :as j]])
[nextjournal.clerk.parser :as parser]
[nextjournal.markdown :as md]
[nextjournal.markdown.parser :as md.parser]
[nextjournal.markdown.utils :as md.utils]
[nextjournal.markdown.transform :as md.transform])
#?(:clj (:import (com.pngencoder PngEncoder)
(clojure.lang IDeref IAtom)
Expand Down Expand Up @@ -585,7 +585,7 @@

(defn process-sidenotes [cell-doc {:keys [footnotes]}]
(if (seq footnotes)
(md.parser/insert-sidenote-containers (assoc cell-doc :footnotes footnotes))
(md.utils/insert-sidenote-containers (assoc cell-doc :footnotes footnotes))
cell-doc))

(defn process-image-source [src {:as doc :keys [file package]}]
Expand Down Expand Up @@ -674,6 +674,14 @@
:transform-fn (update-val transform-cell)
:render-fn '(fn [xs opts] (into [:<>] (nextjournal.clerk.render/inspect-children opts) xs))})

(defn lift-block-images
"Lift an image node to top-level when it is the only child of a paragraph."
[md-nodes]
(map (fn [{:as node :keys [type content]}]
(if (and (= :paragraph type) (= 1 (count content)) (= :image (:type (first content))))
(first content)
node)) md-nodes))

(defn with-block-viewer [doc {:as cell :keys [type id]}]
(case type
:markdown (let [{:keys [content]} (:doc cell)
Expand All @@ -685,7 +693,8 @@
(process-sidenotes {:type :doc
:content (vec fragment)
::doc doc} doc))]))
(partition-by (comp #{:image} :type) content)))
(partition-by (comp #{:image} :type)
(lift-block-images content))))

:code (if (cell-visible? cell)
[(with-viewer `cell-viewer (assoc cell ::doc doc))]
Expand Down

0 comments on commit 0dbd116

Please sign in to comment.