From 144485d48a9378392b8d20448acca6ed6c44a34c Mon Sep 17 00:00:00 2001 From: Jean Connelly Date: Mon, 13 May 2024 12:18:13 -0400 Subject: [PATCH] Use Python to make text report from html (#441) * Use Python to make text from html --------- Co-authored-by: Tom Aldcroft --- starcheck/src/starcheck.pl | 38 +++----------------------------------- starcheck/utils.py | 12 ++++++++++++ 2 files changed, 15 insertions(+), 35 deletions(-) diff --git a/starcheck/src/starcheck.pl b/starcheck/src/starcheck.pl index bed0204a..2c2c7584 100755 --- a/starcheck/src/starcheck.pl +++ b/starcheck/src/starcheck.pl @@ -33,7 +33,6 @@ use Cwd qw( abs_path ); -use HTML::TableExtract; use Carp 'verbose'; $SIG{__DIE__} = sub { Carp::confess(@_) }; @@ -965,41 +964,10 @@ sub json_obsids { if ($par{text}) { - my $textout = io("${STARCHECK}.txt"); - - my $te = HTML::TableExtract->new(); - $te->parse($out); - - my %table; - foreach my $ts ($te->table_states) { - - # print "Table (", join(',', $ts->coords), "):\n"; - my $table_text = qq{}; - my ($depth, $count) = $ts->coords; - foreach my $row ($ts->rows) { - $table_text .= $row->[0] . "\n" if defined $row->[0]; - } - if ($table_text =~ /OBSID/s) { - $table{$depth}{$count} = $table_text; - } - } - - # use Data::Dumper; - # print Dumper %table; - for my $depth (sort { $a <=> $b } (keys %table)) { - for my $count (sort { $a <=> $b } (keys %{ $table{$depth} })) { - - # print " $depth $count \n"; - my $chunk = $table{$depth}{$count}; - chomp($chunk); - $chunk =~ s/\s+$/\n/; - $textout->print("$chunk"); - $textout->print( -"==================================================================================== \n" - ); - } - } + my $textout = io("${STARCHECK}.txt"); + $textout->print(call_python("utils.prehtml2text", [ $out ])); + $textout->close; print STDERR "Wrote text report to $STARCHECK.txt\n"; } diff --git a/starcheck/utils.py b/starcheck/utils.py index e5a765e0..6694b256 100644 --- a/starcheck/utils.py +++ b/starcheck/utils.py @@ -2,6 +2,7 @@ import os from pathlib import Path import warnings +from bs4 import BeautifulSoup import agasc import cxotime @@ -45,6 +46,17 @@ message=r"\nModel .* computed between .* clipping input mag\(s\) outside that range\.", ) +def prehtml2text(html_text): + """Convert the starcheck report html to plain text.""" + + soup = BeautifulSoup(html_text, "lxml") + + # All of the report is basically in the pre tags, so write those out with a separator line. + section_separator = "\n" + "=" * 84 + outs = [pre.get_text() + section_separator for pre in soup.find_all("pre")] + + return "\n".join(outs) + def date2secs(val): """Convert date to seconds since 1998.0"""