Skip to content

Commit

Permalink
Change AST representation
Browse files Browse the repository at this point in the history
* Remove the record wrapper for each node and inline the attribute field
* Make the block type parametric with the goal of using this later to have
  a version of the AST with source locations and whatever information we might
  want per node.
  • Loading branch information
sonologico committed Apr 26, 2021
1 parent 885eda4 commit 2bba77e
Show file tree
Hide file tree
Showing 9 changed files with 186 additions and 213 deletions.
135 changes: 63 additions & 72 deletions src/ast.ml
Original file line number Diff line number Diff line change
Expand Up @@ -9,105 +9,96 @@ type list_spacing =
| Loose
| Tight

let same_block_list_kind k1 k2 =
match k1, k2 with
| Ordered (_, c1), Ordered (_, c2)
| Bullet c1, Bullet c2 -> c1 = c2
| _ -> false

type link_def =
type 'attr link_def =
{
label: string;
destination: string;
title: string option;
attributes: attributes;
attributes: 'attr;
}

module type T = sig
type t
type 'a t
end

module MakeBlock (I : T) = struct
type def_elt =
{
term: I.t;
defs: I.t list;
}

and block =
module MakeBlock(I : T) = struct
type 'attr def_elt =
{
bl_desc: block_desc;
bl_attributes: attributes;
term: 'attr I.t;
defs: 'attr I.t list;
}

and block_desc =
| Paragraph of I.t
| List of list_type * list_spacing * block list list
| Blockquote of block list
| Thematic_break
| Heading of int * I.t
| Code_block of string * string
| Html_block of string
| Definition_list of def_elt list
(* A value of type 'attr is present in all variants of this type. We use it to associate
extra information to each node in the AST. In the common case, the attributes type defined
above is used. We might eventually have an alternative function to parse blocks while keeping
concrete information such as source location and we'll use it for that as well. *)
type 'attr block =
| Paragraph of 'attr * 'attr I.t
| List of 'attr * list_type * list_spacing * 'attr block list list
| Blockquote of 'attr * 'attr block list
| Thematic_break of 'attr
| Heading of 'attr * int * 'attr I.t
| Code_block of 'attr * string * string
| Html_block of 'attr * string
| Definition_list of 'attr * 'attr def_elt list
end

type link =
type 'attr link =
{
label: inline;
label: 'attr inline;
destination: string;
title: string option;
}

and inline =
{
il_desc: inline_desc;
il_attributes: attributes;
}

and inline_desc =
| Concat of inline list
| Text of string
| Emph of inline
| Strong of inline
| Code of string
| Hard_break
| Soft_break
| Link of link
| Image of link
| Html of string
(* See comment on the block type above about the 'attr parameter *)
and 'attr inline =
| Concat of 'attr * 'attr inline list
| Text of 'attr * string
| Emph of 'attr * 'attr inline
| Strong of 'attr * 'attr inline
| Code of 'attr * string
| Hard_break of 'attr
| Soft_break of 'attr
| Link of 'attr * 'attr link
| Image of 'attr * 'attr link
| Html of 'attr * string

module Raw = MakeBlock (String)
module StringT = struct type 'attr t = string end
module InlineT = struct type 'attr t = 'attr inline end

module Inline = struct type t = inline end
module Raw = MakeBlock(StringT)
module Inline = MakeBlock(InlineT)

include MakeBlock (Inline)
include Inline

module MakeMapper (Src : T) (Dst : T) = struct
module SrcBlock = MakeBlock(Src)
module DstBlock = MakeBlock(Dst)

let rec map (f : Src.t -> Dst.t) : SrcBlock.block -> DstBlock.block =
fun {bl_desc; bl_attributes} ->
let bl_desc =
match bl_desc with
| SrcBlock.Paragraph x -> DstBlock.Paragraph (f x)
| List (ty, sp, bl) ->
List (ty, sp, List.map (List.map (map f)) bl)
| Blockquote xs ->
Blockquote (List.map (map f) xs)
| Thematic_break ->
Thematic_break
| Heading (level, text) ->
Heading (level, f text)
| Definition_list l ->
let rec map (f : 'attr Src.t -> 'attr Dst.t) : 'attr SrcBlock.block -> 'attr DstBlock.block =
function
| SrcBlock.Paragraph (attr, x) -> DstBlock.Paragraph (attr, f x)
| List (attr, ty, sp, bl) ->
List (attr, ty, sp, List.map (List.map (map f)) bl)
| Blockquote (attr, xs) ->
Blockquote (attr, List.map (map f) xs)
| Thematic_break attr ->
Thematic_break attr
| Heading (attr, level, text) ->
Heading (attr, level, f text)
| Definition_list (attr, l) ->
let f {SrcBlock.term; defs} = {DstBlock.term = f term; defs = List.map f defs} in
Definition_list (List.map f l)
| Code_block (label, code) ->
Code_block (label, code)
| Html_block x ->
Html_block x
in
{bl_desc; bl_attributes}
Definition_list (attr, List.map f l)
| Code_block (attr, label, code) ->
Code_block (attr, label, code)
| Html_block (attr, x) ->
Html_block (attr, x)
end

module Mapper = MakeMapper (String) (Inline)
module Mapper = MakeMapper (StringT) (InlineT)

let same_block_list_kind k1 k2 =
match k1, k2 with
| Ordered (_, c1), Ordered (_, c2)
| Bullet c1, Bullet c2 -> c1 = c2
| _ -> false
33 changes: 15 additions & 18 deletions src/block.ml
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,10 @@ open Ast

module Sub = Parser.Sub

let mk ?(attr = []) desc =
{Ast.Raw.bl_desc = desc; bl_attributes = attr}

module Pre = struct
type container =
| Rblockquote of t
| Rlist of list_type * list_spacing * bool * int * Raw.block list list * t
| Rlist of list_type * list_spacing * bool * int * attributes Raw.block list list * t
| Rparagraph of string list
| Rfenced_code of int * int * Parser.code_block_kind * (string * string) * string list * attributes
| Rindented_code of string list
Expand All @@ -18,7 +15,7 @@ module Pre = struct

and t =
{
blocks: Raw.block list;
blocks: attributes Raw.block list;
next: container;
}

Expand All @@ -43,31 +40,31 @@ module Pre = struct
let finish = finish link_defs in
match next with
| Rblockquote state ->
mk (Raw.Blockquote (finish state)) :: blocks
Raw.Blockquote ([], (finish state)) :: blocks
| Rlist (ty, sp, _, _, closed_items, state) ->
mk (List (ty, sp, List.rev (finish state :: closed_items))) :: blocks
List ([], ty, sp, List.rev (finish state :: closed_items)) :: blocks
| Rparagraph l ->
let s = concat (List.map trim_left l) in
let defs, off = Parser.link_reference_definitions (Parser.P.of_string s) in
let s = String.sub s off (String.length s - off) |> String.trim in
link_defs := defs @ !link_defs;
if s = "" then blocks else mk (Paragraph s) :: blocks
if s = "" then blocks else Paragraph ([], s) :: blocks
| Rfenced_code (_, _, _kind, (label, _other), [], attr) ->
mk ~attr (Code_block (label, "")) :: blocks
Code_block (attr, label, "") :: blocks
| Rfenced_code (_, _, _kind, (label, _other), l, attr) ->
mk ~attr (Code_block (label, concat l)) :: blocks
Code_block (attr, label, concat l) :: blocks
| Rdef_list (term, defs) ->
let l, blocks =
match blocks with
| {Raw.bl_desc = Definition_list l; _} :: b -> l, b
| Definition_list (_, l) :: b -> l, b
| b -> [], b
in
mk (Raw.Definition_list (l @ [{ Raw.term; defs = List.rev defs}])) :: blocks
Definition_list ([], l @ [{ term; defs = List.rev defs}]) :: blocks
| Rindented_code l -> (* TODO: trim from the right *)
let rec loop = function "" :: l -> loop l | _ as l -> l in
mk (Code_block ("", concat (loop l))) :: blocks
Code_block ([], "", concat (loop l)) :: blocks
| Rhtml (_, l) ->
mk (Html_block (concat l)) :: blocks
Html_block ([], concat l) :: blocks
| Rempty ->
blocks

Expand All @@ -90,11 +87,11 @@ module Pre = struct
| Rempty, Lblockquote s ->
{blocks; next = Rblockquote (process empty s)}
| Rempty, Lthematic_break ->
{blocks = mk Thematic_break :: blocks; next = Rempty}
{blocks = Thematic_break [] :: blocks; next = Rempty}
| Rempty, Lsetext_heading (2, n) when n >= 3 ->
{blocks = mk Thematic_break :: blocks; next = Rempty}
{blocks = Thematic_break [] :: blocks; next = Rempty}
| Rempty, Latx_heading (level, text, attr) ->
{blocks = mk ~attr (Heading (level, text)) :: blocks; next = Rempty}
{blocks = Heading (attr, level, text) :: blocks; next = Rempty}
| Rempty, Lfenced_code (ind, num, q, info, a) ->
{blocks; next = Rfenced_code (ind, num, q, info, [], a)}
| Rempty, Lhtml (_, kind) ->
Expand All @@ -117,7 +114,7 @@ module Pre = struct
process {blocks = close {blocks; next}; next = Rempty} s
| Rparagraph (_ :: _ as lines), Lsetext_heading (level, _) ->
let text = String.trim (String.concat "\n" (List.rev lines)) in
{blocks = mk (Heading (level, text)) :: blocks; next = Rempty}
{blocks = Heading ([], level, text) :: blocks; next = Rempty}
| Rparagraph lines, _ ->
{blocks; next = Rparagraph (Sub.to_string s :: lines)}
| Rfenced_code (_, num, q, _, _, _), Lfenced_code (_, num', q1, ("", _), _) when num' >= num && q = q1 ->
Expand Down
7 changes: 5 additions & 2 deletions src/block.mli
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
open Ast

module Pre : sig
val of_channel: in_channel -> Ast.Raw.block list * Ast.link_def list
val of_string: string -> Ast.Raw.block list * Ast.link_def list

val of_channel: in_channel -> attributes Raw.block list * attributes link_def list
val of_string: string -> attributes Raw.block list * attributes link_def list
end
46 changes: 22 additions & 24 deletions src/html.ml
Original file line number Diff line number Diff line change
Expand Up @@ -121,37 +121,35 @@ and img label destination title attrs =
in
elt Inline "img" attrs None

and inline {il_desc; il_attributes = attr} =
match il_desc with
| Concat l ->
and inline = function
| Ast.Concat (_, l) ->
concat_map inline l
| Text t ->
| Text (_, t) ->
text t
| Emph il ->
| Emph (attr, il) ->
elt Inline "em" attr (Some (inline il))
| Strong il ->
| Strong (attr, il) ->
elt Inline "strong" attr (Some (inline il))
| Code s ->
| Code (attr, s) ->
elt Inline "code" attr (Some (text s))
| Hard_break ->
| Hard_break attr ->
concat (elt Inline "br" attr None) nl
| Soft_break ->
| Soft_break _ ->
nl
| Html body ->
| Html (_, body) ->
raw body
| Link {label; destination; title} ->
| Link (attr, {label; destination; title}) ->
url label destination title attr
| Image {label; destination; title} ->
| Image (attr, {label; destination; title}) ->
img label destination title attr

let rec block {bl_desc; bl_attributes = attr} =
match bl_desc with
| Blockquote q ->
let rec block = function
| Blockquote (attr, q) ->
elt Block "blockquote" attr
(Some (concat nl (concat_map block q)))
| Paragraph md ->
| Paragraph (attr, md) ->
elt Block "p" attr (Some (inline md))
| List (ty, sp, bl) ->
| List (attr, ty, sp, bl) ->
let name = match ty with Ordered _ -> "ol" | Bullet _ -> "ul" in
let attr =
match ty with
Expand All @@ -162,26 +160,26 @@ let rec block {bl_desc; bl_attributes = attr} =
in
let li t =
let block' t =
match t.bl_desc, sp with
| Paragraph t, Tight -> concat (inline t) nl
match t, sp with
| Paragraph (_, t), Tight -> concat (inline t) nl
| _ -> block t
in
let nl = if sp = Tight then Null else nl in
elt Block "li" [] (Some (concat nl (concat_map block' t))) in
elt Block name attr (Some (concat nl (concat_map li bl)))
| Code_block (label, code) ->
| Code_block (attr, label, code) ->
let code_attr =
if String.trim label = "" then []
else ["class", "language-" ^ label]
in
let c = text code in
elt Block "pre" attr
(Some (elt Inline "code" code_attr (Some c)))
| Thematic_break ->
| Thematic_break attr ->
elt Block "hr" attr None
| Html_block body ->
| Html_block (_, body) ->
raw body
| Heading (level, text) ->
| Heading (attr, level, text) ->
let name =
match level with
| 1 -> "h1"
Expand All @@ -193,7 +191,7 @@ let rec block {bl_desc; bl_attributes = attr} =
| _ -> "p"
in
elt Block name attr (Some (inline text))
| Definition_list l ->
| Definition_list (attr, l) ->
let f {term; defs} =
concat
(elt Block "dt" [] (Some (inline term)))
Expand Down
2 changes: 1 addition & 1 deletion src/html.mli
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ type t =
| Null
| Concat of t * t

val of_doc : block list -> t
val of_doc : attributes block list -> t

val to_string : t -> string

Expand Down
4 changes: 2 additions & 2 deletions src/omd.ml
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,14 @@ module Pre = Block.Pre

include Ast

type doc = block list
type doc = attributes block list

let parse_inline defs s =
Parser.inline defs (Parser.P.of_string s)

let parse_inlines (md, defs) =
let defs =
let f (def : link_def) = {def with label = Parser.normalize def.label} in
let f (def : attributes link_def) = {def with label = Parser.normalize def.label} in
List.map f defs
in
List.map (Mapper.map (parse_inline defs)) md
Expand Down
Loading

0 comments on commit 2bba77e

Please sign in to comment.