Skip to content

Commit

Permalink
Merge pull request #11 from BharatSahAIyak/feat/table-parser
Browse files Browse the repository at this point in the history
Table Detection and Parser
  • Loading branch information
Gautam-Rajeev authored Aug 21, 2024
2 parents 3d07842 + ee9069f commit bbf24fd
Show file tree
Hide file tree
Showing 7 changed files with 791 additions and 105 deletions.
9 changes: 4 additions & 5 deletions marker/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@
"ignore", category=UserWarning
) # Filter torch pytree user warnings

from marker.tables.table import table_detection
import pypdfium2 as pdfium # Needs to be at the top to avoid warnings
from PIL import Image

from marker.utils import flush_cuda_memory
from marker.tables.table import format_tables
from marker.debug.data import dump_bbox_debug_data
from marker.layout.layout import surya_layout, annotate_block_types
from marker.layout.order import surya_order, sort_blocks_in_reading_order
Expand All @@ -32,7 +32,6 @@

from typing import List, Dict, Tuple, Optional
from marker.settings import settings

def convert_single_pdf(
fname: str,
model_lst: List,
Expand Down Expand Up @@ -156,7 +155,7 @@ def convert_single_pdf(

# Dump debug data if flags are set
dump_bbox_debug_data(doc, fname, pages)

table_detection(fname, pages, max_pages=max_pages)
# Find reading order for blocks
# Sort blocks by reading order
surya_order(doc, pages, order_model, batch_multiplier=batch_multiplier)
Expand All @@ -169,8 +168,8 @@ def convert_single_pdf(
indent_blocks(pages)

# Fix table blocks
table_count = format_tables(pages)
out_meta["block_stats"]["table"] = table_count
# table_count = format_tables(pages)
# out_meta["block_stats"]["table"] = table_count

from marker.schema.block import Span, Line, Block

Expand Down
97 changes: 0 additions & 97 deletions marker/tables/cells.py

This file was deleted.

Loading

0 comments on commit bbf24fd

Please sign in to comment.