From 4ecee332ed658d15405b9b99bb1f8f3d8f60ae5a Mon Sep 17 00:00:00 2001 From: Zolisa Bleki Date: Wed, 30 Oct 2024 17:32:35 +0200 Subject: [PATCH] Add improvements to ZipStore example. This commit uses features of camplzip v1.13 to implement a fully featured ZipStore that allows deletion and rename operations. --- README.md | 2 +- examples/dune | 4 +- examples/readonly_zipstore.ml | 106 --------------------- examples/zipstore.ml | 172 ++++++++++++++++++++++++++++++++++ 4 files changed, 175 insertions(+), 109 deletions(-) delete mode 100644 examples/readonly_zipstore.ml create mode 100644 examples/zipstore.ml diff --git a/README.md b/README.md index 50ea71c..97e5110 100644 --- a/README.md +++ b/README.md @@ -143,5 +143,5 @@ FilesystemStore.Array.rename store anode "new_name";; [6]: https://zarr-specs.readthedocs.io/en/latest/v3/core/v3.0.html [7]: https://zoj613.github.io/zarr-ml/zarr/Zarr/index.html#examples [8]: https://github.com/ocaml-multicore/eio -[9]: https://github.com/zoj613/zarr-ml/tree/main/examples/inmemory_zipstore.ml +[9]: https://github.com/zoj613/zarr-ml/tree/main/examples/zipstore.ml [10]: https://github.com/zoj613/zarr-ml/tree/main/examples/picos_fs_store.ml diff --git a/examples/dune b/examples/dune index b706910..f110b85 100644 --- a/examples/dune +++ b/examples/dune @@ -1,6 +1,6 @@ (executable - (name readonly_zipstore) - (modules readonly_zipstore) + (name zipstore) + (modules zipstore) (ocamlopt_flags (:standard -O3)) (libraries zarr-eio camlzip)) diff --git a/examples/readonly_zipstore.ml b/examples/readonly_zipstore.ml deleted file mode 100644 index d1edd9d..0000000 --- a/examples/readonly_zipstore.ml +++ /dev/null @@ -1,106 +0,0 @@ -(* This module implements a Read-only Zip file zarr store that is Eio-aware. - The main requirement is to implement the signature of Zarr.Types.IO. - We use Zarr_eio's Deferred module for `Deferred` so that the store can be - Eio-aware. We only implement the get_* and list_* family of functions and - raise a Not_implemented exception for the set_* and erase_* family of - functions. This effectively allows us to create a read-only store since - calling any of the following functions would result in error: - - ZipStore.Group.create - - ZipStore.Array.create - - ZipStore.Group.delete - - ZipStore.Array.delete - - ZipStore.clear - - ZipStore.Array.write - - ZipStore.Array.reshape - - ZipStore.Array.rename - - ZipStore.Group.rename - Below we show how to implement this custom Zarr Store. - - To compile & run this example execute the command - dune exec -- examples/zipstore.exe - in your shell at the root of this project. *) - -module ZipStore : sig - exception Not_implemented - include Zarr.Storage.STORE with module Deferred = Zarr_eio.Deferred - val with_open : string -> (t -> 'a) -> 'a -end = struct - exception Not_implemented - - module Z = struct - module Deferred = Zarr_eio.Deferred - - type t = Zip.in_file - - let is_member t key = - match Zip.find_entry t key with - | exception Not_found -> false - | _ -> true - - let size t key = - match Zip.find_entry t key with - | e -> e.uncompressed_size - | exception Not_found -> 0 - - let get t key = - match Zip.find_entry t key with - | e -> Zip.read_entry t e - | exception Not_found -> raise (Zarr.Storage.Key_not_found key) - - let get_partial_values t key ranges = - let data = get t key in - let size = String.length data in - ranges |> Eio.Fiber.List.map @@ fun (ofs, len) -> - let f v = String.sub data ofs v in - Option.fold ~none:(f (size - ofs)) ~some:f len - - let list t = - Zip.entries t |> Eio.Fiber.List.filter_map @@ function - | (e : Zip.entry) when not e.is_directory -> Some e.filename - | _ -> None - - let list_dir t prefix = - let module S = Set.Make(String) in - let n = String.length prefix in - let prefs, keys = - List.fold_left - (fun ((l, r) as acc) -> function - | (e : Zip.entry) when e.is_directory -> acc - | e when not @@ String.starts_with ~prefix e.filename -> acc - | e when String.contains_from e.filename n '/' -> - let key = e.filename in - let pre = String.sub key 0 @@ 1 + String.index_from key n '/' in - S.add pre l, r - | e -> l, e.filename :: r) (S.empty, []) @@ Zip.entries t - in keys, S.elements prefs - - let set _ = raise Not_implemented - - let set_partial_values _ = raise Not_implemented - - let erase _ = raise Not_implemented - - let erase_prefix _ = raise Not_implemented - - let rename _ = raise Not_implemented - end - - include Zarr.Storage.Make(Z) - - let with_open path f = - let x = Zip.open_in path in - Fun.protect ~finally:(fun () -> Zip.close_in x) @@ fun () -> f x -end - -let _ = - Eio_main.run @@ fun _ -> - let open Zarr in - let open Zarr.Ndarray in - - ZipStore.with_open "examples/data/testdata.zip" @@ fun store -> - let xs, _ = ZipStore.hierarchy store in - let anode = List.hd @@ Eio.Fiber.List.filter - (fun node -> Node.Array.to_path node = "/some/group/name") xs in - let arr = ZipStore.Array.read store anode [||] Char in - try ZipStore.Array.write store anode [||] arr with - | ZipStore.Not_implemented -> print_endline "Store is read-only" diff --git a/examples/zipstore.ml b/examples/zipstore.ml new file mode 100644 index 0000000..d328172 --- /dev/null +++ b/examples/zipstore.ml @@ -0,0 +1,172 @@ +(* This module implements a Zip file zarr store that uses the Eio library for + non-blocking I/O operations. The main requirement is to implement the signature + of Zarr.Types.IO. Below we show how to implement this custom Zarr Store. + + To compile & run this example execute the command + dune exec -- examples/zipstore.exe + in your shell at the root of this project. *) + +module ZipStore : sig + include Zarr.Storage.STORE with module Deferred = Zarr_eio.Deferred + val with_open : ?clevel:int -> string -> (t -> 'a) -> 'a +end = struct + + module Z = struct + module Deferred = Zarr_eio.Deferred + + type t = {path : string; level : int option} + + let with_open_in path f = + let ic = Zip.open_in path in + Fun.protect ~finally:(fun () -> Zip.close_in ic) (fun () -> f ic) + + let with_open_out path f = + let oc = Zip.open_update path in + Fun.protect ~finally:(fun () -> Zip.close_out oc) (fun () -> f oc) + + let is_member t key = + let entry_exists ~key ic = match Zip.find_entry ic key with + | exception Not_found -> false + | _ -> true + in + with_open_in t.path (entry_exists ~key) + + let size t key = + let entry_size ~key ic = match Zip.find_entry ic key with + | exception Not_found -> 0 + | e -> e.uncompressed_size + in + with_open_in t.path (entry_size ~key) + + let get t key = + let read_entry ~key ic = match Zip.find_entry ic key with + | exception Not_found -> raise (Zarr.Storage.Key_not_found key) + | e -> Zip.read_entry ic e + in + with_open_in t.path (read_entry ~key) + + let get_partial_values t key ranges = + let read_range ~data ~size (ofs, len) = match len with + | Some l -> String.sub data ofs l + | None -> String.sub data ofs (size - ofs) + in + let data = get t key in + let size = String.length data in + List.map (read_range ~data ~size) ranges + + let list t = + let entry_filename = function + | (e : Zip.entry) when not e.is_directory -> Some e.filename + | _ -> None + in + let entries = with_open_in t.path (fun ic -> Zip.entries ic) in + List.filter_map entry_filename entries + + let list_dir t prefix = + let module S = Set.Make(String) in + let n = String.length prefix in + let add_entry_with_prefix ((l, r) as acc) = function + | (e : Zip.entry) when e.is_directory -> acc + | e when not (String.starts_with ~prefix e.filename) -> acc + | e when String.contains_from e.filename n '/' -> + let key = e.filename in + let pre = String.sub key 0 (1 + String.index_from key n '/') in + S.add pre l, r + | e -> l, e.filename :: r + in + let entries = with_open_in t.path (fun ic -> Zip.entries ic) in + let prefs, keys = List.fold_left add_entry_with_prefix (S.empty, []) entries in + keys, S.elements prefs + + let set t key value = + with_open_out t.path (fun oc -> Zip.add_entry ?level:t.level value oc key) + + let set_partial_values t key ?(append=false) rvs = + let ov = try get t key with + | Zarr.Storage.Key_not_found _ -> String.empty + in + let f = if append || ov = String.empty then + fun acc (_, v) -> acc ^ v else + fun acc (rs, v) -> + let s = Bytes.unsafe_of_string acc in + Bytes.blit_string v 0 s rs String.(length v); + Bytes.unsafe_to_string s + in + set t key (List.fold_left f ov rvs) + + let add_to_zip ~oc ~level (path, v) = Zip.add_entry ?level v oc path + + let rename t prefix new_prefix = + let add_pair ~ic ~prefix ~new_prefix acc = function + | (e : Zip.entry) when not (String.starts_with ~prefix e.filename) -> + (e.filename, Zip.read_entry ic e) :: acc + | e -> + let l = String.length prefix in + let path = new_prefix ^ String.sub e.filename l (String.length e.filename - l) in + (path, Zip.read_entry ic e) :: acc + in + let rename_entries ic = + List.fold_left (add_pair ~ic ~prefix ~new_prefix) [] (Zip.entries ic) + in + let pairs = with_open_in t.path rename_entries in + let oc = Zip.open_out t.path in Zip.close_out oc; (* truncate the old zip file *) + with_open_out t.path @@ fun oc -> List.iter (add_to_zip ~oc ~level:t.level) pairs + + let erase t key = + let filter ~ic acc = function + | (e : Zip.entry) when e.filename = key -> acc + | e -> (e.filename, Zip.read_entry ic e) :: acc + in + let filter_entries ic = List.fold_left (filter ~ic) [] (Zip.entries ic) in + let pairs = with_open_in t.path filter_entries in + let oc = Zip.open_out t.path in Zip.close_out oc; (* truncate the old zip file *) + with_open_out t.path @@ fun oc -> List.iter (add_to_zip ~oc ~level:t.level) pairs + + let erase_prefix t prefix = + let filter ~ic ~prefix acc = function + | (e : Zip.entry) when String.starts_with ~prefix e.filename -> acc + | e -> (e.filename, Zip.read_entry ic e) :: acc + in + let filter_entries ic = List.fold_left (filter ~ic ~prefix) [] (Zip.entries ic) in + let pairs = with_open_in t.path filter_entries in + let oc = Zip.open_out t.path in Zip.close_out oc; (* truncate the old zip file *) + with_open_out t.path @@ fun oc -> List.iter (add_to_zip ~oc ~level:t.level) pairs + end + + include Zarr.Storage.Make(Z) + + let with_open ?clevel path f = + if not @@ Sys.file_exists path then begin + Zip.(close_out @@ open_out path) + end; + let level = match clevel with + | Some l when l < 0 || l > 9 -> + raise @@ invalid_arg (Printf.sprintf "wrong compression level: %d" l) + | l -> l + in + f Z.{path; level} +end + +let _ = + Eio_main.run @@ fun _ -> + let open Zarr in + let open Zarr.Ndarray in + let open Zarr.Indexing in + + let test_functionality store = + let xs, _ = ZipStore.hierarchy store in + let anode = List.hd @@ List.filter + (fun node -> Node.Array.to_path node = "/some/group/name") xs in + let slice = [|R [|0; 20|]; I 10; R [||]|] in + let x = ZipStore.Array.read store anode slice Char in + let x' = Zarr.Ndarray.map (fun _ -> Random.int 256 |> Char.chr) x in + ZipStore.Array.write store anode slice x'; + let y = ZipStore.Array.read store anode slice Char in + assert (Zarr.Ndarray.equal x' y); + ZipStore.Array.rename store anode "name2"; + let exists = ZipStore.Array.exists store @@ Node.Array.of_path "/some/group/name2" in + assert exists; + ZipStore.clear store (* deletes all zip entries *) + in + ZipStore.with_open "examples/data/testdata.zip" test_functionality; + print_endline "Zip store has been updated."