Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Initial cleanup and refactoring of the parser #286

Merged
merged 31 commits into from
Sep 12, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
e436c57
Add some documentation to cryptic Sub functions
shonfeder Sep 3, 2022
3adc1da
Factor out string slice access functions
shonfeder Sep 3, 2022
5955a32
Rename `Sub.tails` to `Sub.drop`
shonfeder Sep 3, 2022
147b7d4
Rename "heads" to "take"
shonfeder Sep 4, 2022
153bd13
Move `Sub` module into strSlice module file
shonfeder Sep 4, 2022
2aaddb3
Add some comments and improve organization
shonfeder Sep 4, 2022
f7bc5b0
Factor out repeated length checks
shonfeder Sep 4, 2022
9b98a62
Define peek_exn via peek
shonfeder Sep 4, 2022
63a3709
Document the parser helpers
shonfeder Sep 5, 2022
ed6f2ae
Clean up sp3
shonfeder Sep 5, 2022
dabd83c
Remove (most) duplicates of ws testing logic
shonfeder Sep 5, 2022
16747cd
Move char preds together
shonfeder Sep 5, 2022
e842b90
Add drop_while and drop_last_while
shonfeder Sep 5, 2022
ec0e52d
Factor out white space trimming
shonfeder Sep 5, 2022
fa6ae6c
Clarify comment
shonfeder Sep 5, 2022
75805d8
Refactor thematic_break parsing
shonfeder Sep 5, 2022
d3535dc
Clean up setext_heading
shonfeder Sep 5, 2022
60f750b
Add stdcmpat dependency
shonfeder Sep 5, 2022
28b38e5
Remove unneeded disabled warning
shonfeder Sep 5, 2022
46ee44c
Add index, split_at, and fold_left
shonfeder Sep 6, 2022
7f88563
Further refactor thematic_break
shonfeder Sep 6, 2022
4a17175
Simplify setext_heading again
shonfeder Sep 6, 2022
4527c6a
Replace custom Compat module with Stdcompat
shonfeder Sep 6, 2022
3cafd16
Fix formatting
shonfeder Sep 6, 2022
d2666ce
Use inline records for Lsetext_heading
shonfeder Sep 8, 2022
6611e25
Remove enusre_chars_remain
shonfeder Sep 8, 2022
ff1799d
Drop use of "Sub" alias for StrSlice
shonfeder Sep 8, 2022
7b1ea60
Add Compat module back
shonfeder Sep 8, 2022
606e2c0
Try to fix name clsah
shonfeder Sep 8, 2022
e1f1f94
remove compat module
tatchi Sep 9, 2022
612f35a
Raise minimum ocaml version to 4.08
shonfeder Sep 12, 2022
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .github/workflows/workflow.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@ jobs:
- windows-latest
ocaml-compiler:
# Decision on version matrix informed by https://discuss.ocaml.org/t/which-ocaml-compiler-versions-should-we-run-against-in-ci/7933/2
- 4.05.0
# But has gradually inched up due to signs of bitrot on earlier versions
# such as https://github.com/thierry-martinez/stdcompat/issues/26
- 4.08.0
- 4.14.x
runs-on: ${{ matrix.os }}
steps:
Expand Down
3 changes: 2 additions & 1 deletion dune-project
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ Additionally, OMD implements a few Github markdown features, an
extension mechanism, and some other features. Note that the opam
package installs both the OMD library and the command line tool `omd`.")
(tags (org:ocamllabs org:mirage))
(depends (ocaml (>= 4.05))
(depends (ocaml (>= 4.08))
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note that I'm suggesting raising the minimum ocaml version here. I think this library is high-level enough, and has few enough dependencies that we don't need to weigh ourselves down trying to maintain backwards compatibility with versions that much more central libraries have already lost support for.

stdcompat
uutf
uucp
uunf
Expand Down
3 changes: 2 additions & 1 deletion omd.opam
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@ homepage: "https://github.com/ocaml/omd"
bug-reports: "https://github.com/ocaml/omd/issues"
depends: [
"dune" {>= "2.7"}
"ocaml" {>= "4.05"}
"ocaml" {>= "4.08"}
"stdcompat"
"uutf"
"uucp"
"uunf"
Expand Down
43 changes: 22 additions & 21 deletions src/block.ml
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
open Ast
module Sub = Parser.Sub

module Pre = struct
type container =
Expand Down Expand Up @@ -90,34 +89,34 @@ module Pre = struct
| Rempty, Lblockquote s -> { blocks; next = Rblockquote (process empty s) }
| Rempty, Lthematic_break ->
{ blocks = Thematic_break [] :: blocks; next = Rempty }
| Rempty, Lsetext_heading (2, n) when n >= 3 ->
| Rempty, Lsetext_heading { level = 2; len } when len >= 3 ->
{ blocks = Thematic_break [] :: blocks; next = Rempty }
| Rempty, Latx_heading (level, text, attr) ->
{ blocks = Heading (attr, level, text) :: blocks; next = Rempty }
| Rempty, Lfenced_code (ind, num, q, info, a) ->
{ blocks; next = Rfenced_code (ind, num, q, info, [], a) }
| Rempty, Lhtml (_, kind) -> process { blocks; next = Rhtml (kind, []) } s
| Rempty, Lindented_code s ->
{ blocks; next = Rindented_code [ Sub.to_string s ] }
{ blocks; next = Rindented_code [ StrSlice.to_string s ] }
| Rempty, Llist_item (kind, indent, s) ->
{ blocks
; next = Rlist (kind, Tight, false, indent, [], process empty s)
}
| Rempty, (Lsetext_heading _ | Lparagraph | Ldef_list _) ->
{ blocks; next = Rparagraph [ Sub.to_string s ] }
{ blocks; next = Rparagraph [ StrSlice.to_string s ] }
| Rparagraph [ h ], Ldef_list def ->
{ blocks; next = Rdef_list (h, [ def ]) }
| Rdef_list (term, defs), Ldef_list def ->
{ blocks; next = Rdef_list (term, def :: defs) }
| Rparagraph _, Llist_item ((Ordered (1, _) | Bullet _), _, s1)
when not (Parser.is_empty (Parser.P.of_string (Sub.to_string s1))) ->
when not (Parser.is_empty (Parser.P.of_string (StrSlice.to_string s1))) ->
process { blocks = close { blocks; next }; next = Rempty } s
| ( Rparagraph _
, ( Lempty | Lblockquote _ | Lthematic_break | Latx_heading _
| Lfenced_code _
| Lhtml (true, _) ) ) ->
process { blocks = close { blocks; next }; next = Rempty } s
| Rparagraph (_ :: _ as lines), Lsetext_heading (level, _) ->
| Rparagraph (_ :: _ as lines), Lsetext_heading { level; _ } ->
let text = concat (List.map trim_left lines) in
let defs, text = link_reference_definitions text in
link_defs := defs @ !link_defs;
Expand All @@ -130,44 +129,46 @@ module Pre = struct

In that case, there's nothing to make as Heading. We can simply add `===` as Rparagraph
*)
{ blocks; next = Rparagraph [ Sub.to_string s ] }
{ blocks; next = Rparagraph [ StrSlice.to_string s ] }
else { blocks = Heading ([], level, text) :: blocks; next = Rempty }
| Rparagraph lines, _ ->
{ blocks; next = Rparagraph (Sub.to_string s :: lines) }
{ blocks; next = Rparagraph (StrSlice.to_string s :: lines) }
| Rfenced_code (_, num, q, _, _, _), Lfenced_code (_, num', q1, ("", _), _)
when num' >= num && q = q1 ->
{ blocks = close { blocks; next }; next = Rempty }
| Rfenced_code (ind, num, q, info, lines, a), _ ->
let s =
let ind = min (Parser.indent s) ind in
if ind > 0 then Sub.offset ind s else s
if ind > 0 then StrSlice.offset ind s else s
in
{ blocks
; next = Rfenced_code (ind, num, q, info, Sub.to_string s :: lines, a)
; next =
Rfenced_code (ind, num, q, info, StrSlice.to_string s :: lines, a)
}
| Rdef_list (term, d :: defs), Lparagraph ->
{ blocks
; next = Rdef_list (term, (d ^ "\n" ^ Sub.to_string s) :: defs)
; next = Rdef_list (term, (d ^ "\n" ^ StrSlice.to_string s) :: defs)
}
| Rdef_list _, _ ->
process { blocks = close { blocks; next }; next = Rempty } s
| Rindented_code lines, Lindented_code s ->
{ blocks; next = Rindented_code (Sub.to_string s :: lines) }
{ blocks; next = Rindented_code (StrSlice.to_string s :: lines) }
| Rindented_code lines, Lempty ->
let n = min (Parser.indent s) 4 in
let s = Sub.offset n s in
{ blocks; next = Rindented_code (Sub.to_string s :: lines) }
let s = StrSlice.offset n s in
{ blocks; next = Rindented_code (StrSlice.to_string s :: lines) }
| Rindented_code _, _ ->
process { blocks = close { blocks; next }; next = Rempty } s
| Rhtml ((Hcontains l as k), lines), _
when List.exists (fun t -> Sub.contains t s) l ->
{ blocks = close { blocks; next = Rhtml (k, Sub.to_string s :: lines) }
when List.exists (fun t -> StrSlice.contains t s) l ->
{ blocks =
close { blocks; next = Rhtml (k, StrSlice.to_string s :: lines) }
; next = Rempty
}
| Rhtml (Hblank, _), Lempty ->
{ blocks = close { blocks; next }; next = Rempty }
| Rhtml (k, lines), _ ->
{ blocks; next = Rhtml (k, Sub.to_string s :: lines) }
{ blocks; next = Rhtml (k, StrSlice.to_string s :: lines) }
| Rblockquote state, Lblockquote s ->
{ blocks; next = Rblockquote (process state s) }
| Rlist (kind, style, _, ind, items, state), Lempty ->
Expand All @@ -179,7 +180,7 @@ module Pre = struct
process { blocks = close { blocks; next }; next = Rempty } s
| Rlist (kind, style, prev_empty, ind, items, state), _
when Parser.indent s >= ind ->
let s = Sub.offset ind s in
let s = StrSlice.offset ind s in
let state = process state s in
let style =
let rec new_block = function
Expand Down Expand Up @@ -221,17 +222,17 @@ module Pre = struct
| Rparagraph (_ :: _ as lines) -> (
match classify_line s with
| Parser.Lparagraph | Lindented_code _
| Lsetext_heading (1, _)
| Lsetext_heading { level = 1; _ }
| Lhtml (false, _) ->
Some (Rparagraph (Sub.to_string s :: lines))
Some (Rparagraph (StrSlice.to_string s :: lines))
| _ -> None)
| _ -> None
in
match loop next with
| Some next -> { blocks; next }
| None -> process { blocks = close { blocks; next }; next = Rempty } s)

let process link_defs state s = process link_defs state (Sub.of_string s)
let process link_defs state s = process link_defs state (StrSlice.of_string s)

let of_channel ic =
let link_defs = ref [] in
Expand Down
52 changes: 0 additions & 52 deletions src/compat.ml

This file was deleted.

3 changes: 1 addition & 2 deletions src/dune
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
(library
(name omd)
(public_name omd)
(libraries uutf uucp uunf)
(flags :standard -w -30))
(libraries uutf uucp uunf stdcompat))

(rule
(with-stdout-to
Expand Down
Loading