Skip to content

Commit

Permalink
Fix for issue #18
Browse files Browse the repository at this point in the history
Issue concerning verse sub parts. Examples: MSG MAT.1.2-6a and MAT1.6b-11
  • Loading branch information
martijnlentink committed May 4, 2024
1 parent b99102d commit d5dfcf7
Showing 1 changed file with 3 additions and 2 deletions.
5 changes: 3 additions & 2 deletions bible_import.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import zipfile
import io
import pathlib
import string
from tqdm import tqdm

headers = {
Expand Down Expand Up @@ -198,8 +199,8 @@ def parse_paragraph(parent, paragraph, style):
paragraph_el = SubElement(parent, 'para', style=style)
verse_el = SubElement(paragraph_el, 'verse', number=verse_number_label, style='v')

# retrieve all verse text components
verse_number_classes = ' '. join([f"v{x}" for x in verse_numbers])
# retrieve all verse text components - strip alphanumeric suffix
verse_number_classes = ' '. join(["v" + str(x).rstrip(string.ascii_lowercase) for x in verse_numbers])
verse_texts = verse.xpath('//*[@class="verse ' + verse_number_classes + '"]//span[@class="content"]')
# group by div-tag, these will be text blocks for a single line
groups = itertools.groupby(verse_texts, lambda x: next(an for an in x.iterancestors() if an.tag == 'div'))
Expand Down

0 comments on commit d5dfcf7

Please sign in to comment.