Skip to content

Commit

Permalink
Merge branch 'main' into dual-libs-names
Browse files Browse the repository at this point in the history
Signed-off-by: Javier Chávarri <javier.chavarri@gmail.com>
  • Loading branch information
jchavarri committed Mar 20, 2024
2 parents 531e4d3 + 171c231 commit 1a3558a
Show file tree
Hide file tree
Showing 6 changed files with 161 additions and 26 deletions.
2 changes: 1 addition & 1 deletion boot/libs.ml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ let local_libraries =
; ("vendor/fiber/src", Some "Fiber", false, None)
; ("src/dune_console", Some "Dune_console", false, None)
; ("src/memo", Some "Memo", false, None)
; ("vendor/uutf", None, false, None)
; ("src/dune_sexp", Some "Dune_sexp", false, None)
; ("src/ocaml-config", Some "Ocaml_config", false, None)
; ("src/ocaml", Some "Ocaml", false, None)
Expand All @@ -23,7 +24,6 @@ let local_libraries =
; ("otherlibs/dune-rpc/private", Some "Dune_rpc_private", false, None)
; ("src/dune_config", Some "Dune_config", false, None)
; ("vendor/sha", None, false, None)
; ("vendor/uutf", None, false, None)
; ("vendor/opam/src/core", None, false, None)
; ("vendor/opam-file-format", None, false, None)
; ("vendor/opam/src/format", None, false, None)
Expand Down
2 changes: 2 additions & 0 deletions doc/changes/10113.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
- dune file formatting: output utf8 if input is correctly encoded (#10113,
fixes #9728, @moyodiallo)
2 changes: 1 addition & 1 deletion src/dune_sexp/dune
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
(library
(name dune_sexp)
(synopsis "[Internal] S-expression library")
(libraries stdune)
(libraries stdune dune_uutf)
(instrumentation
(backend bisect_ppx)))

Expand Down
92 changes: 71 additions & 21 deletions src/dune_sexp/escape.ml
Original file line number Diff line number Diff line change
@@ -1,26 +1,64 @@
open! Stdune

(** Note: on OCaml >= 4.14, this can be switched to the following (and the
dependency to [Uutf] can be removed)
{[
let next_valid_utf8_length s i =
let decode = String.get_utf_8_uchar s i in
Option.some_if (Uchar.utf_decode_is_valid decode) (Uchar.utf_decode_length decode)
;;
]} *)
let next_valid_utf8_uchar_len s i =
let pos = ref i in
let buf = Bytes.create 1 in
let decoder = Uutf.decoder ~encoding:`UTF_8 `Manual in
let rec go () =
match Uutf.decode decoder with
| `Await ->
if !pos >= String.length s
then None
else (
Bytes.set buf 0 (String.get s !pos);
incr pos;
Uutf.Manual.src decoder buf 0 1;
go ())
| `Uchar _ -> Some (!pos - i)
| `Malformed _ -> None
| `End -> Code_error.raise "next_valid_utf8_uchar: `End" []
in
go ()
;;

let quote_length s =
let n = ref 0 in
let len = String.length s in
for i = 0 to len - 1 do
n
:= !n
+
match String.unsafe_get s i with
| '\"' | '\\' | '\n' | '\t' | '\r' | '\b' -> 2
| '%' -> if i + 1 < len && s.[i + 1] = '{' then 2 else 1
| ' ' .. '~' -> 1
| _ -> 4
let i = ref 0 in
while !i < len do
(n
:= !n
+
match String.unsafe_get s !i with
| '\"' | '\\' | '\n' | '\t' | '\r' | '\b' -> 2
| '%' -> if !i + 1 < len && s.[!i + 1] = '{' then 2 else 1
| ' ' .. '~' -> 1
| _ ->
(match next_valid_utf8_uchar_len s !i with
| Some uchar_len ->
i := !i + uchar_len - 1;
uchar_len
| None -> 4));
incr i
done;
!n
;;

let escape_to s ~dst:s' ~ofs =
let n = ref ofs in
let len = String.length s in
for i = 0 to len - 1 do
(match String.unsafe_get s i with
let i = ref 0 in
while !i < len do
(match String.unsafe_get s !i with
| ('\"' | '\\') as c ->
Bytes.unsafe_set s' !n '\\';
incr n;
Expand All @@ -41,21 +79,33 @@ let escape_to s ~dst:s' ~ofs =
Bytes.unsafe_set s' !n '\\';
incr n;
Bytes.unsafe_set s' !n 'b'
| '%' when i + 1 < len && s.[i + 1] = '{' ->
| '%' when !i + 1 < len && s.[!i + 1] = '{' ->
Bytes.unsafe_set s' !n '\\';
incr n;
Bytes.unsafe_set s' !n '%'
| ' ' .. '~' as c -> Bytes.unsafe_set s' !n c
| c ->
let a = Char.code c in
Bytes.unsafe_set s' !n '\\';
incr n;
Bytes.unsafe_set s' !n (Char.unsafe_chr (48 + (a / 100)));
incr n;
Bytes.unsafe_set s' !n (Char.unsafe_chr (48 + (a / 10 mod 10)));
incr n;
Bytes.unsafe_set s' !n (Char.unsafe_chr (48 + (a mod 10))));
incr n
(match next_valid_utf8_uchar_len s !i with
| Some uchar_len ->
Bytes.unsafe_set s' !n (String.unsafe_get s !i);
Bytes.unsafe_set s' (!n + 1) (String.unsafe_get s (!i + 1));
if uchar_len > 2
then Bytes.unsafe_set s' (!n + 2) (String.unsafe_get s (!i + 2));
if uchar_len > 3
then Bytes.unsafe_set s' (!n + 3) (String.unsafe_get s (!i + 3));
n := !n + uchar_len - 1;
i := !i + uchar_len - 1
| None ->
let a = Char.code c in
Bytes.unsafe_set s' !n '\\';
incr n;
Bytes.unsafe_set s' !n (Char.unsafe_chr (48 + (a / 100)));
incr n;
Bytes.unsafe_set s' !n (Char.unsafe_chr (48 + (a / 10 mod 10)));
incr n;
Bytes.unsafe_set s' !n (Char.unsafe_chr (48 + (a mod 10)))));
incr n;
incr i
done
;;

Expand Down
28 changes: 25 additions & 3 deletions test/blackbox-tests/test-cases/formatting/non-ascii-characters.t
Original file line number Diff line number Diff line change
@@ -1,13 +1,35 @@
How the non-ASCII characters are handled, this is also related to the issue #9728
Utf8 characters are handled for now, this is also related to the issue #9728

$ dune format-dune-file <<EOF
> ("É")
> ("Éff ĎúÑȨ")
> EOF
("\195\137")
("Éff ĎúÑȨ")

$ dune format-dune-file <<EOF
> (run foo %{bin:é})
> EOF
File "", line 1, characters 15-16:
Error: The character '\195' is not allowed inside %{...} forms
[1]

$ dune format-dune-file <<EOF
> (echo "hÉllo")
> EOF
(echo "hÉllo")

$ dune format-dune-file <<EOF
> (echo "É")
> EOF
(echo "É")

$ dune format-dune-file <<EOF
> (Écho "hello")
> EOF
File "", line 1, characters 1-1:
Error: Invalid . file
[1]

$ bash -c "printf '(echo \"%b\")' '\xc0'"| dune format-dune-file
(echo "\192")
$ bash -c "printf '(echo \"%b\")' '\xf0'"| dune format-dune-file
(echo "\240")
61 changes: 61 additions & 0 deletions test/blackbox-tests/test-cases/melange/library-include-subdirs.t
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
Test moving modules in a library with `(include_subdirs unqualified)`

$ cat > dune-project <<EOF
> (lang dune 3.8)
> (using melange 0.1)
> EOF

$ cat > dune <<EOF
> (melange.emit
> (target output)
> (alias mel)
> (libraries foo)
> (emit_stdlib false)
> (preprocess (pps melange.ppx)))
> EOF

$ mkdir lib
$ cat > lib/dune <<EOF
> (include_subdirs unqualified)
> (library
> (name foo)
> (modes melange))
> EOF
$ cat > lib/foo.ml <<EOF
> let name = Bar.name
> EOF

$ mkdir lib/init
$ cat > lib/init/bar.ml <<EOF
> let name = "Zoe"
> EOF

$ dune build @mel

Melange shows the proper path to `bar.js`

$ cat _build/default/output/lib/foo.js | grep bar.js
let Foo__Bar = require("./init/bar.js");

$ mv lib/init lib/end

$ dune build @mel

The import in `foo.js` still shows the initial path to `bar.js`, but the file is not there anymore

$ cat _build/default/output/lib/foo.js | grep bar.js
let Foo__Bar = require("./init/bar.js");

$ test -f _build/default/output/lib/init/bar.js
[1]

$ test -f _build/default/output/lib/end/bar.js

After removal of the js artifact, the path in `bar.js` import is correct

$ rm _build/default/output/lib/foo.js

$ dune build @mel

$ cat _build/default/output/lib/foo.js | grep bar.js
let Foo__Bar = require("./end/bar.js");

0 comments on commit 1a3558a

Please sign in to comment.