aphp · percevalw · Jul 27, 2023 · Jul 7, 2023 · Jul 7, 2023 · Jul 7, 2023
diff --git a/docs/pipes/embeddings/huggingface-embedding.md b/docs/pipes/embeddings/huggingface-embedding.md
@@ -0,0 +1,7 @@
+# HuggingfaceEmbedding {: #edspdf.pipes.embeddings.huggingface_embedding.HuggingfaceEmbedding }
+
+::: edspdf.pipes.embeddings.huggingface_embedding.HuggingfaceEmbedding
+    options:
+        heading_level: 2
+        show_bases: false
+        show_source: false
diff --git a/docs/pipes/embeddings/index.md b/docs/pipes/embeddings/index.md
@@ -10,13 +10,14 @@ td:nth-child(1), td:nth-child(2) {
 }
 </style>
 
-| Factory name                                                                                 |  Description                                                       |
-|----------------------------------------------------------------------------------------------|--------------------------------------------------------------------|
-| [`simple-text-embedding`][edspdf.pipes.embeddings.simple_text_embedding.SimpleTextEmbedding] |  A module that embeds the textual features of the blocks.          |
-| [`embedding-combiner`][edspdf.pipes.embeddings.embedding_combiner.EmbeddingCombiner]         |  Encodes boxes using a combination of multiple encoders            |
-| [`sub-box-cnn-pooler`][edspdf.pipes.embeddings.sub_box_cnn_pooler.SubBoxCNNPooler]           |  Pools the output of a CNN over the elements of a box (like words) |
-| [`box-layout-embedding`][edspdf.pipes.embeddings.box_layout_embedding.BoxLayoutEmbedding]    |  Encodes the layout of the boxes                                   |
-| [`box-transformer`][edspdf.pipes.embeddings.box_transformer.BoxTransformer]                  |  Contextualizes box representations using a transformer            |
+| Factory name                                                                                  | Description                                                       |
+|-----------------------------------------------------------------------------------------------|-------------------------------------------------------------------|
+| [`simple-text-embedding`][edspdf.pipes.embeddings.simple_text_embedding.SimpleTextEmbedding]  | A module that embeds the textual features of the blocks.          |
+| [`embedding-combiner`][edspdf.pipes.embeddings.embedding_combiner.EmbeddingCombiner]          | Encodes boxes using a combination of multiple encoders            |
+| [`sub-box-cnn-pooler`][edspdf.pipes.embeddings.sub_box_cnn_pooler.SubBoxCNNPooler]            | Pools the output of a CNN over the elements of a box (like words) |
+| [`box-layout-embedding`][edspdf.pipes.embeddings.box_layout_embedding.BoxLayoutEmbedding]     | Encodes the layout of the boxes                                   |
+| [`box-transformer`][edspdf.pipes.embeddings.box_transformer.BoxTransformer]                   | Contextualizes box representations using a transformer            |
+| [`huggingface-embedding`][edspdf.pipes.embeddings.huggingface_embedding.HuggingfaceEmbedding] | Box representations using a Huggingface multi-modal model.        |
 
 <!-- --8<-- [end:components] -->
 

diff --git a/docs/recipes/training.md b/docs/recipes/training.md
@@ -63,7 +63,6 @@ model to decrease a given loss. The process of training a pipeline with EDS-PDF
         config={
             "embedding": model.get_pipe("embedding"),
             "labels": [],
-            "activation": "relu",
         },
     )
     ```
@@ -190,10 +189,10 @@ def segmentation_adapter(
 
 ## Full example
 
-Let's wrap the training code in a function, and make it callable from the command line !
+Let's wrap the training code in a function, and make it callable from the command line using [confit](https://github.com/aphp/confit) !
 
 ???+ example "train.py"
-    ```python linenums="1" hl_lines="16-27"
+    ```python linenums="1"
     import itertools
     import json
     from pathlib import Path
@@ -309,7 +308,6 @@ Let's wrap the training code in a function, and make it callable from the comman
             config={
                 "embedding": model.get_pipe("embedding"),
                 "labels": [],
-                "activation": "relu",
             },
         )
 
@@ -483,7 +481,6 @@ def train_my_model(
 -       config={
 -           "embedding": model.get_pipe("embedding"),
 -           "labels": [],
--           "activation": "relu",
 -       },
 -   )
 

diff --git a/edspdf/pipeline.py b/edspdf/pipeline.py
@@ -588,24 +588,28 @@ def collate(
         return batch
 
     def parameters(self):
+        """Returns an iterator over the Pytorch parameters of the components in the
+        pipeline"""
+        return (p for n, p in self.named_parameters())
+
+    def named_parameters(self):
         """Returns an iterator over the Pytorch parameters of the components in the
         pipeline"""
         seen = set()
         for name, component in self.pipeline:
-            if hasattr(component, "parameters"):
-                for param in component.parameters():
+            if hasattr(component, "named_parameters"):
+                for param_name, param in component.named_parameters():
                     if param in seen:
                         continue
                     seen.add(param)
-                    yield param
+                    yield f"{name}.{param_name}", param
 
     def to(self, device: Optional[torch.device] = None):
         """Moves the pipeline to a given device"""
         for name, component in self.trainable_pipes():
             component.to(device)
         return self
 
-    @contextmanager
     def train(self, mode=True):
         """
         Enables training mode on pytorch modules
@@ -616,12 +620,19 @@ def train(self, mode=True):
             Whether to enable training or not
         """
 
+        class context:
+            def __enter__(self):
+                pass
+
+            def __exit__(ctx_self, type, value, traceback):
+                for name, proc in self.trainable_pipes():
+                    proc.train(was_training[name])
+
         was_training = {name: proc.training for name, proc in self.trainable_pipes()}
         for name, proc in self.trainable_pipes():
             proc.train(mode)
-        yield
-        for name, proc in self.trainable_pipes():
-            proc.train(was_training[name])
+
+        return context()
 
     def score(self, docs: Sequence[PDFDoc], batch_size: int = None) -> Dict[str, Any]:
         """

diff --git a/edspdf/pipes/classifiers/trainable.py b/edspdf/pipes/classifiers/trainable.py
@@ -15,7 +15,6 @@
 from edspdf.registry import registry
 from edspdf.structures import PDFDoc
 from edspdf.trainable_pipe import Scorer, TrainablePipe
-from edspdf.utils.torch import ActivationFunction, get_activation_function
 
 
 def classifier_scorer(pairs):
@@ -70,7 +69,6 @@ class TrainableClassifier(TrainablePipe[Dict[str, Any]]):
                     },
                 },
                 "labels": ["body", "pollution"],
-                "activation": "relu",
             },
         )
         ```
@@ -81,7 +79,6 @@ class TrainableClassifier(TrainablePipe[Dict[str, Any]]):
         [components.classifier]
         @factory = "trainable-classifier"
         labels = ["body", "pollution"]
-        activation = "relu"
 
         [components.classifier.embedding]
         @factory = "sub-box-cnn-pooler"
@@ -99,8 +96,6 @@ class TrainableClassifier(TrainablePipe[Dict[str, Any]]):
         Initial labels of the classifier (will be completed during initialization)
     embedding: TrainablePipe[EmbeddingOutput]
         Embedding module to encode the PDF boxes
-    activation: ActivationFunction
-        Name of the activation function
     dropout_p: float
         Dropout probability used on the output of the box and textual encoders
     scorer: Scorer
@@ -111,8 +106,6 @@ def __init__(
         self,
         embedding: TrainablePipe[EmbeddingOutput],
         labels: Sequence[str] = ("pollution",),
-        activation: ActivationFunction = "gelu",
-        dropout_p: float = 0.0,
         scorer: Scorer = classifier_scorer,
         pipeline: Pipeline = None,
         name: str = "trainable-classifier",
@@ -128,9 +121,6 @@ def __init__(
             in_features=self.embedding.output_size,
             out_features=len(self.label_voc),
         )
-        self.activation = get_activation_function(activation)
-        self.dropout = torch.nn.Dropout(dropout_p)
-
         # Scoring function
         self.score = scorer
 

diff --git a/edspdf/pipes/embeddings/box_layout_preprocessor.py b/edspdf/pipes/embeddings/box_layout_preprocessor.py
@@ -1,16 +1,15 @@
-from typing import Any, Dict, Sequence
+from typing import Any, Dict
 
 import torch
 from foldedtensor import FoldedTensor, as_folded_tensor
 from typing_extensions import TypedDict
 
 from edspdf import Pipeline, TrainablePipe, registry
-from edspdf.structures import PDFDoc, TextBox
+from edspdf.structures import PDFDoc
 
 BoxLayoutBatch = TypedDict(
     "BoxLayoutBatch",
     {
-        "page": FoldedTensor,
         "xmin": FoldedTensor,
         "ymin": FoldedTensor,
         "xmax": FoldedTensor,
@@ -60,39 +59,19 @@ def __init__(
     ):
         super().__init__(pipeline, name)
 
-    def preprocess_boxes(self, boxes: Sequence[TextBox]):
-        box_pages = [box.page.page_num for box in boxes]
-
-        last_page = max(box_pages, default=0)
-
-        return {
-            "page": box_pages,
-            "xmin": [b.x0 for b in boxes],
-            "ymin": [b.y0 for b in boxes],
-            "xmax": [b.x1 for b in boxes],
-            "ymax": [b.y1 for b in boxes],
-            "width": [(b.x1 - b.x0) for b in boxes],
-            "height": [(b.y1 - b.y0) for b in boxes],
-            "first_page": [b.page_num == 0 for b in boxes],
-            "last_page": [b.page_num == last_page for b in boxes],
-        }
-
     def preprocess(self, doc: PDFDoc, supervision: bool = False):
         pages = doc.pages
-        box_pages = [[b.page.page_num for b in page.text_boxes] for page in pages]
-        last_page = max(box_pages, default=0)
+        [[b.page_num for b in page.text_boxes] for page in pages]
+        last_p = doc.num_pages - 1
         return {
-            "page": box_pages,
             "xmin": [[b.x0 for b in p.text_boxes] for p in pages],
             "ymin": [[b.y0 for b in p.text_boxes] for p in pages],
             "xmax": [[b.x1 for b in p.text_boxes] for p in pages],
             "ymax": [[b.y1 for b in p.text_boxes] for p in pages],
             "width": [[(b.x1 - b.x0) for b in p.text_boxes] for p in pages],
             "height": [[(b.y1 - b.y0) for b in p.text_boxes] for p in pages],
-            "first_page": [[b.page.page_num == 0 for b in p.text_boxes] for p in pages],
-            "last_page": [
-                [b.page.page_num == last_page for b in p.text_boxes] for p in pages
-            ],
+            "first_page": [[b.page_num == 0 for b in p.text_boxes] for p in pages],
+            "last_page": [[b.page_num == last_p for b in p.text_boxes] for p in pages],
         }
 
     def collate(self, batch, device: torch.device) -> BoxLayoutBatch:
@@ -103,13 +82,12 @@ def collate(self, batch, device: torch.device) -> BoxLayoutBatch:
         }
 
         return {
-            "page": as_folded_tensor(batch["page"], dtype=torch.long, **kw),
-            "xmin": as_folded_tensor(batch["xmin"], dtype=torch.long, **kw),
-            "ymin": as_folded_tensor(batch["ymin"], dtype=torch.long, **kw),
-            "xmax": as_folded_tensor(batch["xmax"], dtype=torch.long, **kw),
-            "ymax": as_folded_tensor(batch["ymax"], dtype=torch.long, **kw),
-            "width": as_folded_tensor(batch["width"], dtype=torch.long, **kw),
-            "height": as_folded_tensor(batch["height"], dtype=torch.long, **kw),
+            "xmin": as_folded_tensor(batch["xmin"], dtype=torch.float, **kw),
+            "ymin": as_folded_tensor(batch["ymin"], dtype=torch.float, **kw),
+            "xmax": as_folded_tensor(batch["xmax"], dtype=torch.float, **kw),
+            "ymax": as_folded_tensor(batch["ymax"], dtype=torch.float, **kw),
+            "width": as_folded_tensor(batch["width"], dtype=torch.float, **kw),
+            "height": as_folded_tensor(batch["height"], dtype=torch.float, **kw),
             "first_page": as_folded_tensor(batch["first_page"], dtype=torch.bool, **kw),
             "last_page": as_folded_tensor(batch["last_page"], dtype=torch.bool, **kw),
         }