jimexist · jimexist · Feb 1, 2024 · Jan 31, 2024 · Feb 1, 2024 · Feb 1, 2024
diff --git a/Cargo.toml b/Cargo.toml
@@ -4,6 +4,7 @@ version = "0.3.0"
 edition = "2021"
 description = "Surya is a multilingual document OCR toolkit, original implementation in Python and PyTorch"
 license = "Apache-2.0"
+authors = ["Jiayu Liu <jiayu@hey.com>"]
 repository = "https://github.com/jimexist/surya-rs"
 default-run = "surya"
 
@@ -21,10 +22,21 @@ opencv = { version = "0.88.8", default-features = false, features = [
 ] }
 serde = { version = "1.0.196" }
 serde_json = { version = "1.0.112" }
-thiserror = "1.0.56"
+accelerate-src = { version = "0.3.2", optional = true }
+intel-mkl-src = { version = "0.8.1", features = [
+  "mkl-static-lp64-iomp",
+], optional = true }
+thiserror = { version = "1.0.56" }
 
 [features]
+default = ["cli"]
 metal = ["candle-core/metal", "candle-nn/metal"]
+accelerate = [
+  "accelerate-src",
+  "candle-core/accelerate",
+  "candle-nn/accelerate",
+]
+mkl = ["intel-mkl-src", "candle-core/mkl", "candle-nn/mkl"]
 cli = ["clap", "anyhow"]
 
 [[bin]]

diff --git a/README.md b/README.md
@@ -31,17 +31,21 @@ Setup rust toolchain if you haven't yet:
 curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
 ```
 
+Install `llvm` and `opencv` (example on Mac):
+
+```bash
+brew install llvm opencv
+```
+
 Build and install the binary:
 
 ```bash
 # run this first on Mac if you have a M1 chip
 export DYLD_FALLBACK_LIBRARY_PATH="$(xcode-select --print-path)/usr/lib/"
 # run this first on other Mac
 export DYLD_FALLBACK_LIBRARY_PATH="$(xcode-select --print-path)/Toolchains/XcodeDefault.xctoolchain/"
-# run this if you have a mac with Metal support
-cargo install --path . --features=cli,metal --bin surya
-# run this on other architectures
-cargo install --path . --features=cli --bin surya
+# optionally you can include features like accelerate, metal, mkl, etc.
+cargo install --path . --features=cli
 ```
 
 The binary when built does _not_ include the weights file itself, and will instead download via the HuggingFace Hub API. Once downloaded, the weights file will be cached in the HuggingFace cache directory.
@@ -57,32 +61,44 @@ Arguments:
   <IMAGE>  path to image
 
 Options:
+      --batch-size <BATCH_SIZE>
+          detection batch size, if not supplied defaults to 2 on CPU and 16 on GPU
       --model-repo <MODEL_REPO>
-          model's hugging face repo [default: vikp/line_detector]
+          detection model's hugging face repo [default: vikp/line_detector]
       --weights-file-name <WEIGHTS_FILE_NAME>
-          model's weights file name [default: model.safetensors]
+          detection model's weights file name [default: model.safetensors]
       --config-file-name <CONFIG_FILE_NAME>
-          model's config file name [default: config.json]
-      --generate-bbox-image
+          detection model's config file name [default: config.json]
+      --non-max-suppression-threshold <NON_MAX_SUPPRESSION_THRESHOLD>
+          a value between 0.0 and 1.0 to filter low density part of heatmap [default: 0.35]
+      --extract-text-threshold <EXTRACT_TEXT_THRESHOLD>
+          a value between 0.0 and 1.0 to filter out bbox with low heatmap density [default: 0.6]
+      --bbox-area-threshold <BBOX_AREA_THRESHOLD>
+          a pixel threshold to filter out small area bbox [default: 10]
+      --polygons
+          whether to output polygons json file
+      --image
           whether to generate bbox image
-      --generate-heatmap
+      --heatmap
           whether to generate heatmap
-      --generate-affinity-map
+      --affinity-map
           whether to generate affinity map
       --output-dir <OUTPUT_DIR>
-          output directory, each file will be generating a subdirectory under this directory [default: ./surya_output]
-      --device-type <DEVICE_TYPE>
+          output directory, under which the input image will be generating a subdirectory [default: ./surya_output]
+      --device <DEVICE_TYPE>
           [default: cpu] [possible values: cpu, gpu, metal]
+      --verbose
+          whether to enable verbose mode
   -h, --help
           Print help
   -V, --version
           Print version
 ```
 
-You can use this to control logging level:
+You can also use this to control logging level:
 
 ```bash
-export RUST_LOG=info # or debug, warn, etc.
+export SURYA_LOG=warn # or debug, warn, etc.
 ```
 
 ## Library

diff --git a/src/bbox.rs b/src/bbox.rs
@@ -159,7 +159,7 @@ pub fn draw_bboxes<P: AsRef<Path>>(
     image: &mut Mat,
     heatmap_size: Size,
     image_with_padding_size: Size,
-    bboxes: Vec<BBox>,
+    bboxes: &[BBox],
     output_file: P,
 ) -> crate::Result<()> {
     debug!(
@@ -185,7 +185,7 @@ pub fn draw_bboxes<P: AsRef<Path>>(
 pub fn generate_bbox(
     heatmap: &Mat,
     non_max_suppression_threshold: f64,
-    text_threshold: f64,
+    extract_text_threshold: f64,
     bbox_area_threshold: i32,
 ) -> crate::Result<Vec<BBox>> {
     let labels = image_threshold(heatmap, non_max_suppression_threshold)?;
@@ -212,7 +212,7 @@ pub fn generate_bbox(
             continue;
         }
         let max_value = heatmap_label_max(heatmap, &labels, label)?;
-        if max_value < text_threshold {
+        if max_value < extract_text_threshold {
             continue;
         }
         let polygon = connected_area_to_bbox(&labels, stats_row, label)?;