From fba19f78635a60088ac2e98c69c530d3c8f39c24 Mon Sep 17 00:00:00 2001 From: Gabriel Scherer Date: Tue, 29 Dec 2020 22:37:17 +0100 Subject: [PATCH] reimplement whitespace handling for standalone tokens Whitespace handling was previously done by a post-processor on the token stream produced by the lexer. We reimplement it as a post-processor on the *output* stream after rendering. This means that information from the fully-parsed AST can be used to make rendering decisions. For example, non-rendered sections (because their corresponding json data is empty) are now handled as standalone tokens. This gives nicer output, consider for example: {{#foo}}Foo is present.{{/foo}} {{^foo}}Foo is absent. {{/foo}} This used to produce two lines of output, one being empty. Now this only produces one line of input (if 'foo' is a scalar or object, not a list). (Interestingly, such examples are not tested in the Mustache specification testsuite. Both the previous implementation and the current implementation pass all the specification tests, which suggests that the tests are not comprehensive enough.) Note: The real motivation for this change is not inverted sections, but rather indentation and whitespace handling for partial parameters -- when implementing template inheritance. --- bin/test/errors/parsing-errors.t | 2 +- bin/test/errors/render-errors.t/run.t | 1 - lib/mustache.ml | 276 +++++++++++++++++++------- lib/mustache.mli | 26 +-- lib/mustache_lexer.mll | 138 +++++-------- lib/mustache_parser.mly | 21 +- lib/mustache_types.ml | 12 +- lib_test/test_mustache.ml | 15 +- 8 files changed, 295 insertions(+), 196 deletions(-) diff --git a/bin/test/errors/parsing-errors.t b/bin/test/errors/parsing-errors.t index 36c66f1..77a7d94 100644 --- a/bin/test/errors/parsing-errors.t +++ b/bin/test/errors/parsing-errors.t @@ -111,7 +111,7 @@ Mismatch between section-start and section-end: $ echo "{{#bar}} {{#foo}} {{.}} {{/bar}} {{/foo}}" > $PROBLEM $ mustache foo.json $PROBLEM Template parse error: - File "wrong-nesting.mustache", lines 1-2, characters 41-0: + File "wrong-nesting.mustache", line 1, characters 32-33: Section mismatch: {{#foo}} is closed by {{/bar}}. [3] diff --git a/bin/test/errors/render-errors.t/run.t b/bin/test/errors/render-errors.t/run.t index 55abe4e..fae9076 100644 --- a/bin/test/errors/render-errors.t/run.t +++ b/bin/test/errors/render-errors.t/run.t @@ -13,7 +13,6 @@ reference.json and reference.mustache work well together, there is no error. The variable "name" has value "Some Name". - Last line. diff --git a/lib/mustache.ml b/lib/mustache.ml index abde6ac..02c1e6b 100644 --- a/lib/mustache.ml +++ b/lib/mustache.ml @@ -68,7 +68,7 @@ let dummy_loc = let rec erase_locs { Locs.desc; _ } = erase_locs_desc desc and erase_locs_desc = function - | Locs.String s -> No_locs.String s + | Locs.String (ty, s) -> No_locs.String (ty, s) | Locs.Escaped s -> No_locs.Escaped s | Locs.Section s -> No_locs.Section (erase_locs_section s) | Locs.Unescaped s -> No_locs.Unescaped s @@ -78,16 +78,15 @@ and erase_locs_desc = function | Locs.Comment s -> No_locs.Comment s and erase_locs_section { Locs.name; Locs.contents } = { No_locs.name; No_locs.contents = erase_locs contents } -and erase_locs_partial { Locs.indent; Locs.name; Locs.contents } = - { No_locs.indent; - No_locs.name; +and erase_locs_partial { Locs.name; Locs.contents } = + { No_locs.name; No_locs.contents = lazy (option_map (Lazy.force contents) erase_locs) } let rec add_dummy_locs t = { Locs.loc = dummy_loc; Locs.desc = add_dummy_locs_desc t } and add_dummy_locs_desc = function - | No_locs.String s -> Locs.String s + | No_locs.String (ty, s) -> Locs.String (ty, s) | No_locs.Escaped s -> Locs.Escaped s | No_locs.Section s -> Locs.Section (add_dummy_locs_section s) | No_locs.Unescaped s -> Locs.Unescaped s @@ -98,9 +97,8 @@ and add_dummy_locs_desc = function | No_locs.Comment s -> Locs.Comment s and add_dummy_locs_section { No_locs.name; No_locs.contents } = { Locs.name; Locs.contents = add_dummy_locs contents } -and add_dummy_locs_partial { No_locs.indent; No_locs.name; No_locs.contents } = - { Locs.indent; - Locs.name; +and add_dummy_locs_partial { No_locs.name; No_locs.contents } = + { Locs.name; Locs.contents = lazy (option_map (Lazy.force contents) add_dummy_locs) } (* Printing: defined on the ast without locations. *) @@ -108,7 +106,7 @@ and add_dummy_locs_partial { No_locs.indent; No_locs.name; No_locs.contents } = let rec pp fmt = let open No_locs in function - | String s -> + | String (_ty, s) -> Format.pp_print_string fmt s | Escaped s -> @@ -164,9 +162,9 @@ let parse_lx (lexbuf: Lexing.lexbuf) : Locs.t = raise (Template_parse_error { loc; kind }) in try - MenhirLib.Convert.Simplified.traditional2revised - Mustache_parser.mustache - Mustache_lexer.(handle_standalone mustache lexbuf) + Mustache_parser.mustache + Mustache_lexer.mustache + lexbuf with | Mustache_lexer.Error msg -> raise_err lexbuf (Lexing msg) @@ -383,6 +381,147 @@ module Render = struct open Locs + (* Per-line whitespace handling. + + The Mustache specification is careful with its treatment of + whitespace. In particular, tags that do not themselves expand to + visible content are defined as "standalone", with the requirement + that if one or several standalone tags "stand alone" in a line + (there is nothing else but whitespace), the whitespace of this + line should be ommitted. + + For example, this means that: + + {{#foo}} + I can access {{var}} inside the section. + {{/foo} + + takes, once rendered, only 1 line instead of 3: the newlines + after {{#foo}} and {{/foo}} are part of the "standalone + whitespace", so they are not included in the output. This is what + the user expects. + + We implement this logic by adding rendering the template into + a "line buffer", which stores elements to be printed until the + line is full, and at this point decides whether whitespace should + be checked or not. + + The line buffer is also used to keep track of the line + indentation, which is useful for the rendering of {{>partial}} + tags: the spec mandates that when a partial is used standalone, + then its indentation on the line should be added to all the lines + of its rendered content. + *) + module Line : sig + type state + + type output_token = + | Raw of string_type * string + | Data of string + | Partial of (state -> unit) + + val init : Buffer.t -> global_indentation:int -> state + + val print : state -> output_token -> unit + + val standalone_tag : state -> unit + + val standalone_block : state -> (unit -> unit) -> unit + + val end_of_input : state -> unit + end = struct + type output_token = + | Raw of string_type * string + | Data of string + | Partial of (state -> unit) + + and state = { + buf: Buffer.t; (* output buffer *) + global_indentation: int; (* global indentation of the current printing context *) + mutable indentation: int; (* number of spaces at the beginning of the line *) + mutable standalone: bool; (* have we seen any standalone token? *) + mutable visible: bool; (* have we seen any visible token? *) + mutable rev_buffer: output_token list; (* delayed printing to flush *) + } + + let init buf ~global_indentation = { + buf; + global_indentation; + indentation = 0; + standalone = false; + visible = false; + rev_buffer = []; + } + + let reset ls = + ls.indentation <- 0; + ls.standalone <- false; + ls.visible <- false; + ls.rev_buffer <- []; + () + + let flush line = + let standalone_line = + line.standalone && not line.visible in + let tokens = + let rec rev acc = function + | [] -> acc + | Raw ((Blank | Newline), _) :: rest when standalone_line -> rev acc rest + | token :: rest -> rev (token :: acc) rest + in + rev [] line.rev_buffer + in + let output_token = function + | Raw (_, s) -> Buffer.add_string line.buf s + | Data s -> + Buffer.add_string line.buf s + | Partial output_partial -> + let partial_indentation = + if standalone_line + then line.global_indentation + line.indentation + else line.global_indentation + in + let partial_line = + (* The partial template comes from a different source file, + so it uses a different line state (with the same output buffer). *) + init line.buf ~global_indentation:partial_indentation + in + output_partial partial_line + in + begin + if not (standalone_line || tokens = []) then + Buffer.add_string line.buf (String.make line.global_indentation ' '); + List.iter output_token tokens; + reset line; + end + + let standalone_tag line = + line.standalone <- true + + let standalone_block line output_fun = + standalone_tag line; + output_fun (); + standalone_tag line + + let print (line : state) token = + line.rev_buffer <- token :: line.rev_buffer; + begin match token with + | Raw (Visible, _) | Data _ -> + line.visible <- true; + | Partial _ -> + line.standalone <- true; + | Raw (Blank, s) -> + let beginning_of_line = not (line.standalone || line.visible) in + if beginning_of_line then + line.indentation <- line.indentation + String.length s; + | Raw (Newline, _) -> + flush line; + end + + let end_of_input line = + flush line + end + (* Render a template whose partials have already been expanded. Note: the reason we expand partials once before rendering, @@ -397,75 +536,60 @@ module Render = struct ?(strict = true) (buf : Buffer.t) (m : Locs.t) (js : Json.t) = - let print_indent indent = - for _ = 0 to indent - 1 do - Buffer.add_char buf ' ' - done - in - - let beginning_of_line = ref true in - - let align indent = - if !beginning_of_line then ( - print_indent indent; - beginning_of_line := false - ) - in - - let print_indented_string indent s = - let lines = String.split_on_char '\n' s in - align indent; Buffer.add_string buf (List.hd lines); - List.iter (fun line -> - Buffer.add_char buf '\n'; - beginning_of_line := true; - if line <> "" then ( - align indent; - Buffer.add_string buf line; - ) - ) (List.tl lines) - in - - let rec render indent m (ctxs : Contexts.t) = + let rec render (line : Line.state) m (ctxs : Contexts.t) = let loc = m.loc in match m.desc with - | String s -> - print_indented_string indent s + | String (string_type, s) -> + Line.print line (Raw (string_type, s)) | Escaped name -> - align indent; - Buffer.add_string buf (escape_html (Lookup.str ~strict ~loc ~key:name ctxs)) + let data = escape_html (Lookup.str ~strict ~loc ~key:name ctxs) in + Line.print line (Data data) | Unescaped name -> - align indent; - Buffer.add_string buf (Lookup.str ~strict ~loc ~key:name ctxs) + let data = Lookup.str ~strict ~loc ~key:name ctxs in + Line.print line (Data data) | Inverted_section s -> - if Lookup.inverted ctxs ~loc ~key:s.name - then render indent s.contents ctxs + Line.standalone_block line (fun () -> + if Lookup.inverted ctxs ~loc ~key:s.name + then render line s.contents ctxs; + ) | Section s -> - let enter ctx = render indent s.contents (Contexts.add ctxs ctx) in + let enter ctx = + Line.standalone_block line (fun () -> + render line s.contents (Contexts.add ctxs ctx) + ) + in begin match Lookup.section ~strict ctxs ~loc ~key:s.name with | `Bool false -> () | `A elems -> List.iter enter elems | elem -> enter elem - end + end; - | Partial { indent = partial_indent; name; contents } -> + | Partial { name; contents } -> + Line.standalone_tag line; begin match (Lazy.force contents, strict) with - | Some p, _ -> render (indent + partial_indent) p ctxs + | Some p, _ -> + Line.print line (Partial (fun partial_line -> + render partial_line p ctxs; + Line.end_of_input partial_line)) | None, false -> () | None, true -> raise_err loc (Missing_partial { name }) - end + end; - | Comment _c -> () + | Comment _c -> + Line.standalone_tag line; | Concat templates -> - List.iter (fun x -> render indent x ctxs) templates - - in render 0 m (Contexts.start (Json.value js)) + List.iter (fun x -> render line x ctxs) templates; + in + let line = Line.init buf ~global_indentation:0 in + render line m (Contexts.start (Json.value js)); + Line.end_of_input line end (* Packing up everything in two modules of similar signature: @@ -488,7 +612,7 @@ module Without_locations = struct let rec fold ~string ~section ~escaped ~unescaped ~partial ~comment ~concat t = let go = fold ~string ~section ~escaped ~unescaped ~partial ~comment ~concat in match t with - | String s -> string s + | String (ty, s) -> string ty s | Escaped s -> escaped s | Unescaped s -> unescaped s | Comment s -> comment s @@ -498,35 +622,36 @@ module Without_locations = struct section ~inverted:true name (go contents) | Concat ms -> concat (List.map ms ~f:go) - | Partial p -> partial p.indent p.name p.contents + | Partial p -> partial p.name p.contents module Infix = struct let (^) y x = Concat [x; y] end - let raw s = String s + let string ty s = String (ty, s) + let concat t = Concat t + let raw s = concat (Mustache_lexer.process_string string s) let escaped s = Escaped s let unescaped s = Unescaped s let section n c = Section { name = n ; contents = c } let inverted_section n c = Inverted_section { name = n ; contents = c } - let partial ?(indent = 0) n c = Partial { indent ; name = n ; contents = c } - let concat t = Concat t + let partial n c = Partial { name = n ; contents = c } let comment s = Comment s let rec expand_partials (partials : name -> t option) : t -> t = let section ~inverted = if inverted then inverted_section else section in - let partial indent name contents = + let partial name contents = let contents' = lazy ( match Lazy.force contents with | None -> option_map (partials name) (expand_partials partials) | Some t_opt -> Some t_opt ) in - partial ~indent name contents' + partial name contents' in - fold ~string:raw ~section ~escaped ~unescaped ~partial ~comment ~concat + fold ~string ~section ~escaped ~unescaped ~partial ~comment ~concat let render_buf ?strict ?(partials = fun _ -> None) buf (m : t) (js : Json.t) = @@ -563,7 +688,7 @@ module With_locations = struct let go = fold ~string ~section ~escaped ~unescaped ~partial ~comment ~concat in let { desc; loc } = t in match desc with - | String s -> string ~loc s + | String (ty, s) -> string ~loc ty s | Escaped s -> escaped ~loc s | Unescaped s -> unescaped ~loc s | Comment s -> comment ~loc s @@ -573,13 +698,15 @@ module With_locations = struct section ~loc ~inverted:true name (go contents) | Concat ms -> concat ~loc (List.map ms ~f:go) - | Partial p -> partial ~loc p.indent p.name p.contents + | Partial p -> partial ~loc p.name p.contents module Infix = struct let (^) t1 t2 = { desc = Concat [t1; t2]; loc = dummy_loc } end - let raw ~loc s = { desc = String s; loc } + let string ~loc ty s = { desc = String (ty, s); loc } + let concat ~loc t = { desc = Concat t; loc } + let raw ~loc s = concat ~loc (Mustache_lexer.process_string (string ~loc) s) let escaped ~loc s = { desc = Escaped s; loc } let unescaped ~loc s = { desc = Unescaped s; loc } let section ~loc n c = @@ -588,26 +715,25 @@ module With_locations = struct let inverted_section ~loc n c = { desc = Inverted_section { name = n; contents = c }; loc } - let partial ~loc ?(indent = 0) n c = - { desc = Partial { indent; name = n; contents = c }; + let partial ~loc n c = + { desc = Partial { name = n; contents = c }; loc } - let concat ~loc t = { desc = Concat t; loc } let comment ~loc s = { desc = Comment s; loc } let rec expand_partials (partials : name -> t option) : t -> t = let section ~loc ~inverted = if inverted then inverted_section ~loc else section ~loc in - let partial ~loc indent name contents = + let partial ~loc name contents = let contents' = lazy ( match Lazy.force contents with | None -> option_map (partials name) (expand_partials partials) | Some t_opt -> Some t_opt ) in - partial ~loc ~indent name contents' + partial ~loc name contents' in - fold ~string:raw ~section ~escaped ~unescaped ~partial ~comment ~concat + fold ~string ~section ~escaped ~unescaped ~partial ~comment ~concat let render_buf ?strict ?(partials = fun _ -> None) buf (m : t) (js : Json.t) = let m = expand_partials partials m in diff --git a/lib/mustache.mli b/lib/mustache.mli index 6e179f8..1c16145 100644 --- a/lib/mustache.mli +++ b/lib/mustache.mli @@ -19,8 +19,10 @@ end type name = string type dotted_name = string list +type string_type = Newline | Blank | Visible + type t = - | String of string + | String of string_type * string | Escaped of dotted_name | Section of section | Unescaped of dotted_name @@ -32,8 +34,7 @@ and section = { name: dotted_name; contents: t } and partial = - { indent: int; - name: name; + { name: name; contents: t option Lazy.t } type loc = @@ -129,11 +130,12 @@ val render : @param unescaped Applied to ["name"] for occurrences of [{{{name}}}]. @param partial Applied to ["box"] for occurrences of [{{> box}}]. @param comment Applied to ["comment"] for occurrences of [{{! comment}}]. *) -val fold : string: (string -> 'a) -> +val fold : + string: (string_type -> string -> 'a) -> section: (inverted:bool -> dotted_name -> 'a -> 'a) -> escaped: (dotted_name -> 'a) -> unescaped: (dotted_name -> 'a) -> - partial: (int -> name -> t option Lazy.t -> 'a) -> + partial: (name -> t option Lazy.t -> 'a) -> comment: (string -> 'a) -> concat:('a list -> 'a) -> t -> 'a @@ -169,7 +171,7 @@ val inverted_section : dotted_name -> t -> t val section : dotted_name -> t -> t (** [{{> box}}] *) -val partial : ?indent:int -> name -> t option Lazy.t -> t +val partial : name -> t option Lazy.t -> t (** [{{! this is a comment}}] *) val comment : string -> t @@ -185,7 +187,7 @@ module With_locations : sig type nonrec loc = loc = { loc_start: Lexing.position; loc_end: Lexing.position } type desc = - | String of string + | String of string_type * string | Escaped of dotted_name | Section of section | Unescaped of dotted_name @@ -197,8 +199,7 @@ module With_locations : sig { name: dotted_name; contents: t } and partial = - { indent: int; - name: name; + { name: name; contents: t option Lazy.t } and t = { loc : loc; @@ -266,11 +267,12 @@ module With_locations : sig @param unescaped Applied to ["name"] for occurrences of [{{{name}}}]. @param partial Applied to ["box"] for occurrences of [{{> box}}]. @param comment Applied to ["comment"] for occurrences of [{{! comment}}]. *) - val fold : string: (loc:loc -> string -> 'a) -> + val fold : + string: (loc:loc -> string_type -> string -> 'a) -> section: (loc:loc -> inverted:bool -> dotted_name -> 'a -> 'a) -> escaped: (loc:loc -> dotted_name -> 'a) -> unescaped: (loc:loc -> dotted_name -> 'a) -> - partial: (loc:loc -> int -> name -> t option Lazy.t -> 'a) -> + partial: (loc:loc -> name -> t option Lazy.t -> 'a) -> comment: (loc:loc -> string -> 'a) -> concat:(loc:loc -> 'a list -> 'a) -> t -> 'a @@ -304,7 +306,7 @@ module With_locations : sig val section : loc:loc -> dotted_name -> t -> t (** [{{> box}}] *) - val partial : loc:loc -> ?indent:int -> name -> t option Lazy.t -> t + val partial : loc:loc -> name -> t option Lazy.t -> t (** [{{! this is a comment}}] *) val comment : loc:loc -> string -> t diff --git a/lib/mustache_lexer.mll b/lib/mustache_lexer.mll index d32e462..f5a8fef 100644 --- a/lib/mustache_lexer.mll +++ b/lib/mustache_lexer.mll @@ -22,6 +22,7 @@ { open Lexing open Mustache_parser + open Mustache_types exception Error of string @@ -47,6 +48,52 @@ let check_mustaches ~expected ~lexed = if expected <> lexed then raise (Error (Printf.sprintf "'%s' expected" expected)) + + let is_blank s = + let blank = ref true in + let i = ref 0 in + let len = String.length s in + while !blank && !i < len do + begin match s.[!i] with + | ' ' | '\t' -> () + | _ -> blank := false + end; + incr i + done; + !blank + + let string_type s = + if s = "\n" || s = "\r\n" then Newline + else if is_blank s then Blank + else Visible + + (* take a user-provided string, which may contain newlines, + and split it into raw chunks following the structure + expected by the rendering pass *) + let process_string (f : string_type -> string -> 'a) s : 'a list = + let len = String.length s in + let chunk_type chunk = if is_blank chunk then Blank else Visible in + let rec process acc start_pos = + if start_pos >= len then List.rev acc + else match String.index_from_opt s start_pos '\n' with + | None -> + let chunk = String.sub s start_pos (len - start_pos) in + process (f (chunk_type chunk) chunk :: acc) len + | Some newline_pos -> + let newline_start = + if newline_pos > 0 && s.[newline_pos - 1] = '\r' + then newline_pos - 1 + else newline_pos + in + let newline = String.sub s newline_start (newline_pos - newline_start + 1) in + if newline_start = start_pos then + process (f Newline newline :: acc) (newline_pos + 1) + else + let chunk = String.sub s start_pos (newline_start - start_pos) in + process (f Newline newline :: f (chunk_type chunk) chunk :: acc) (newline_pos + 1) + in + process [] 0 + } let blank = [' ' '\t']* @@ -90,9 +137,9 @@ and end_on expected = parse and comment acc = parse | "}}" { String.concat "" (List.rev acc) } - | raw newline { new_line lexbuf; comment ((lexeme lexbuf) :: acc) lexbuf } - | raw { comment ((lexeme lexbuf) :: acc) lexbuf } | ['{' '}'] { comment ((lexeme lexbuf) :: acc) lexbuf } + | raw { comment ((lexeme lexbuf) :: acc) lexbuf } + | newline { new_line lexbuf; comment ((lexeme lexbuf) :: acc) lexbuf } | eof { raise (Error "non-terminated comment") } and mustache = parse @@ -102,86 +149,11 @@ and mustache = parse | "{{#" { OPEN_SECTION (lex_tag lexbuf space ident (end_on "}}") |> split_ident) } | "{{^" { OPEN_INVERTED_SECTION (lex_tag lexbuf space ident (end_on "}}") |> split_ident) } | "{{/" { CLOSE_SECTION (lex_tag lexbuf space ident (end_on "}}") |> split_ident) } - | "{{>" { PARTIAL (0, lex_tag lexbuf space partial_name (end_on "}}")) } + | "{{>" { PARTIAL (lex_tag lexbuf space partial_name (end_on "}}")) } | "{{!" { COMMENT (tok_arg lexbuf (comment [])) } - | raw newline { new_line lexbuf; RAW (lexeme lexbuf) } - | raw { RAW (lexeme lexbuf) } - | ['{' '}'] { RAW (lexeme lexbuf) } + | ['{' '}'] { RAW (Visible, lexeme lexbuf) } + | raw { let raw = lexeme lexbuf in + let ty = if is_blank raw then Blank else Visible in + RAW (ty, raw) } + | newline { new_line lexbuf; RAW (Newline, lexeme lexbuf) } | eof { EOF } - -{ - let handle_standalone lexer lexbuf = - let ends_with_newline s = - String.length s > 0 && - s.[String.length s - 1] = '\n' - in - let get_loc () = lexbuf.Lexing.lex_curr_p in - let get_tok () = - let loc_start = get_loc () in - let tok = lexer lexbuf in - let loc_end = get_loc () in - (tok, loc_start, loc_end) - in - let slurp_line () = - let rec loop acc = - let tok = get_tok () in - match tok with - | EOF, _, _ -> tok :: acc - | RAW s, _, _ when ends_with_newline s -> tok :: acc - | _ -> loop (tok :: acc) - in - List.rev (loop []) - in - let is_blank s = - let ret = ref true in - for i = 0 to String.length s - 1 do - if not (List.mem s.[i] [' '; '\t'; '\r'; '\n']) then - ret := false - done; - !ret - in - let skip_blanks l = - let rec loop skipped = function - | (RAW s, _, _) :: toks when is_blank s -> - loop (skipped + String.length s) toks - | toks -> (skipped, toks) - in - loop 0 l - in - let is_standalone toks = - let (skipped, toks) = skip_blanks toks in - match toks with - | ((OPEN_SECTION _ - | OPEN_INVERTED_SECTION _ - | CLOSE_SECTION _ - | PARTIAL _ - | COMMENT _), _, _) as tok :: toks' -> - let (_, toks_rest) = skip_blanks toks' in - begin match toks_rest with - | [] | [(EOF, _, _)] -> - let tok = - match tok with - | (PARTIAL (_, p), loc1, loc2) -> - (PARTIAL (skipped, p), loc1, loc2) - | _ -> tok - in - Some (tok, toks_rest) - | _ -> None - end - | _ -> None - in - - let buffer = ref [] in - fun () -> - match !buffer with - | tok :: toks -> - buffer := toks; tok - | [] -> - let toks = slurp_line () in - match is_standalone toks with - | Some (tok_standalone, toks_rest) -> - buffer := toks_rest; - tok_standalone - | None -> - buffer := List.tl toks; List.hd toks -} diff --git a/lib/mustache_parser.mly b/lib/mustache_parser.mly index cfe484b..5d31743 100644 --- a/lib/mustache_parser.mly +++ b/lib/mustache_parser.mly @@ -24,7 +24,7 @@ open Mustache_types open Mustache_types.Locs - let parse_section start_name end_name contents = + let parse_section start_name end_name contents : section = if start_name <> end_name then raise (Mismatched_section { start_name; end_name }); { contents; name = start_name } @@ -37,15 +37,12 @@ %} %token EOF -%token ESCAPE -%token UNESCAPE -%token OPEN_INVERTED_SECTION -%token OPEN_SECTION -%token CLOSE_SECTION -%token PARTIAL +%token ESCAPE UNESCAPE +%token OPEN_SECTION OPEN_INVERTED_SECTION CLOSE_SECTION +%token PARTIAL %token COMMENT -%token RAW +%token RAW %start mustache %type mustache @@ -71,18 +68,18 @@ mustache_element: | elt = ESCAPE { with_loc $sloc (Escaped elt) } | elt = PARTIAL { with_loc $sloc - (Partial { indent = fst elt; - name = snd elt; + (Partial { name = elt; contents = lazy None }) } | s = COMMENT { with_loc $sloc (Comment s) } | sec = section { sec } - | s = RAW { with_loc $sloc (String s) } + | r = RAW { let (string_type, s) = r in + with_loc $sloc (String (string_type, s)) } mustache_expr: | elts = list(mustache_element) { match elts with - | [] -> with_loc $sloc (String "") + | [] -> with_loc $sloc (String (Blank, "")) | [x] -> x | xs -> with_loc $sloc (Concat xs) } diff --git a/lib/mustache_types.ml b/lib/mustache_types.ml index 1d42411..a81c34d 100644 --- a/lib/mustache_types.ml +++ b/lib/mustache_types.ml @@ -37,11 +37,13 @@ let pp_dotted_name fmt = function let string_of_dotted_name n = Format.asprintf "%a" pp_dotted_name n +type string_type = Newline | Blank | Visible + module Locs = struct [@@@warning "-30"] type desc = - | String of string + | String of string_type * string | Escaped of dotted_name | Section of section | Unescaped of dotted_name @@ -53,8 +55,7 @@ module Locs = struct { name: dotted_name; contents: t } and partial = - { indent: int; - name: name; + { name: name; contents: t option Lazy.t } and t = { loc : loc; @@ -65,7 +66,7 @@ module No_locs = struct [@@@warning "-30"] type t = - | String of string + | String of string_type * string | Escaped of dotted_name | Section of section | Unescaped of dotted_name @@ -77,8 +78,7 @@ module No_locs = struct { name: dotted_name; contents: t } and partial = - { indent: int; - name: name; + { name: name; contents: t option Lazy.t } end diff --git a/lib_test/test_mustache.ml b/lib_test/test_mustache.ml index ed2bbec..13383a9 100644 --- a/lib_test/test_mustache.ml +++ b/lib_test/test_mustache.ml @@ -122,11 +122,11 @@ let tests_with_locs = With_locations.[ , concat ~loc:(mkloc (1, 0, 0, 4, 24, 31)) [ raw ~loc:(mkloc (1, 0, 0, 1, 0, 1)) " "; section ~loc:(mkloc (1, 0, 1, 4, 24, 31)) ["a"] ( - concat ~loc:(mkloc (2, 8, 12, 4, 24, 24)) [ + concat ~loc:(mkloc (2, 8, 12, 4, 24, 25)) [ raw ~loc:(mkloc (2, 8, 12, 2, 8, 13)) " "; escaped ~loc:(mkloc (2, 8, 13, 3, 17, 23)) ["x"]; raw ~loc:(mkloc (3, 17, 23, 4, 24, 24)) "\n"; - (* raw ~loc:(mkloc (4, 24, 24, 4, 24, 25)) " " *) + raw ~loc:(mkloc (4, 24, 24, 4, 24, 25)) " " ] ) ] @@ -141,7 +141,11 @@ let tests_with_locs = With_locations.[ , [ ( `A [], "" )]); (" {{!x}} " - , comment ~loc:(mkloc (1, 0, 2, 1, 0, 8)) "x" + , concat ~loc:(mkloc (1, 0, 0, 1, 0, 11)) [ + raw " " ~loc:(mkloc (1, 0, 0, 1, 0, 2)); + comment ~loc:(mkloc (1, 0, 2, 1, 0, 8)) "x"; + raw " " ~loc:(mkloc (1, 0, 8, 1, 0, 11)); + ] , [ (`A [], "" )]); ] @@ -154,7 +158,6 @@ let () = fun _ -> assert_equal ~printer a b in "Mustache test suite" >::: - (List.mapi (fun i (input, expected_parsing, rendering_tests) -> let template = @@ -167,7 +170,7 @@ let () = in (Printf.sprintf "%d - erase_locs/add_dummy_locs roundtrip" i >:: assert_equal (roundtrip template) template) - :: (Printf.sprintf "%d - parsing" i + :: (Printf.sprintf "%d - parsing (%S)" i input >:: assert_equal expected_parsing template) :: List.mapi (fun j (data, expected) -> let rendered = @@ -193,7 +196,7 @@ let () = i (Printexc.to_string exn) ) in - (Printf.sprintf "%d with locations - parsing" i + (Printf.sprintf "%d with locations - parsing (%S)" i input >:: assert_equal expected_parsing template) :: List.mapi (fun j (data, expected) -> let rendered =