Skip to content

Commit

Permalink
Merge pull request #134 from kba/page-to-alto-python
Browse files Browse the repository at this point in the history
Page to ALTO Python
  • Loading branch information
stweil authored Jun 25, 2021
2 parents 290b202 + 4ddf261 commit e0b42ff
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 8 deletions.
14 changes: 7 additions & 7 deletions script/transform/page__alto
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

SCRIPTDIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
VENDORDIR="$(cd $SCRIPTDIR/../../vendor/; pwd)"
JAR="$VENDORDIR/JPageConverter/PageConverter.jar"
INFILE="$1"
OUTFILE="$2"
ARGUMENT="$3"
Expand All @@ -16,17 +15,18 @@ if [[ "$2" = "-" ]]; then
OUTFILE="$(mktemp)"
fi

java -jar "$JAR" -neg-coords toZero -source-xml "$INFILE" -target-xml "$OUTFILE" -convert-to ALTO 2>&1
page-to-alto $ARGUMENT "$INFILE" > "$OUTFILE"; retval="$?"

if [[ "$1" = "-" ]]; then
rm "$INFILE"
fi

if (( retval > 0 ));then
rm "$OUTFILE"
exit $retval
fi

if [[ "$2" = "-" ]]; then
if [[ -z "$ARGUMENT" ]]; then
cat "$OUTFILE"
else
java -cp "$VENDORDIR/saxon9he.jar" net.sf.saxon.Query -s:"$OUTFILE" -qs:/ "$ARGUMENT"
fi
cat "$OUTFILE"
rm "$OUTFILE"
fi
32 changes: 32 additions & 0 deletions script/transform/page__alto_legacy
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
#!/bin/bash

SCRIPTDIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
VENDORDIR="$(cd $SCRIPTDIR/../../vendor/; pwd)"
JAR="$VENDORDIR/JPageConverter/PageConverter.jar"
INFILE="$1"
OUTFILE="$2"
ARGUMENT="$3"

if [[ "$1" = "-" ]]; then
INFILE="$(mktemp)"
cat >"$INFILE"
fi

if [[ "$2" = "-" ]]; then
OUTFILE="$(mktemp)"
fi

java -jar "$JAR" -neg-coords toZero -source-xml "$INFILE" -target-xml "$OUTFILE" -convert-to ALTO 2>&1

if [[ "$1" = "-" ]]; then
rm "$INFILE"
fi

if [[ "$2" = "-" ]]; then
if [[ -z "$ARGUMENT" ]]; then
cat "$OUTFILE"
else
java -cp "$VENDORDIR/saxon9he.jar" net.sf.saxon.Query -s:"$OUTFILE" -qs:/ "$ARGUMENT"
fi
rm "$OUTFILE"
fi
11 changes: 10 additions & 1 deletion vendor/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ RM = rm -rfv
UNZIP = unzip -o
WGET = wget --progress=bar:force --no-verbose
GIT_CLONE = git clone --depth 1
PIP = pip3

SAXON_HE_VERSION_MAJOR = 9
SAXON_HE_VERSION_MINOR = 9
Expand Down Expand Up @@ -45,6 +46,9 @@ ALTO2PAGE_ZIP = JPageConverter.zip
ALTO2PAGE_URL = https://github.com/PRImA-Research-Lab/prima-page-converter/releases/download/$(ALTO2PAGE_VERSION)/JPageConverter_$(ALTO2PAGE_VERSION).zip
ALTO2PAGE_DIR = JPageConverter

PAGE_TO_ALTO_REPO = page-to-alto
PAGE_TO_ALTO_URL = https://github.com/kba/page-to-alto

# {{{
# SAXON_BROWSER_VERSION = 1.1
# SAXON_BROWSER_ZIP = Saxon-CE_$(SAXON_BROWSER_VERSION).zip
Expand All @@ -69,7 +73,8 @@ all:\
$(PAGE2HOCR_REPO)\
$(HOCR_SPEC_REPO)\
$(SAXON_HE_JAR) \
$(ALTO2PAGE_DIR)
$(ALTO2PAGE_DIR) \
$(PAGE_TO_ALTO_REPO)

clean:
$(RM) $(ALTO_SCHEMA_REPO)
Expand Down Expand Up @@ -133,3 +138,7 @@ $(ALTO2PAGE_ZIP):
$(ALTO2PAGE_DIR): $(ALTO2PAGE_ZIP)
$(UNZIP) "$<"
mv "JPageConverter $(ALTO2PAGE_VERSION_MAJOR_MINOR)" "$@"

$(PAGE_TO_ALTO_REPO):
$(GIT_CLONE) "$(PAGE_TO_ALTO_URL)" "$@"
cd "$@"; $(PIP) install .

0 comments on commit e0b42ff

Please sign in to comment.