Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH: first pass at html output #88

Merged
merged 2 commits into from
Oct 24, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion blark/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import pathlib

from . import _version, plain
from . import _version, html, plain
from .parse import get_parser, new_parser, parse_project, parse_source_code
from .solution import Project, Solution, TwincatTsProject
from .transform import GrammarTransformer
Expand All @@ -13,6 +13,7 @@
GRAMMAR_FILENAME = MODULE_PATH / "iec.lark"

plain._register()
html._register()

__all__ = [
"GRAMMAR_FILENAME",
Expand Down
177 changes: 177 additions & 0 deletions blark/html.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
"""Syntax-highlighted HTML file writer."""
from __future__ import annotations

import collections
import dataclasses
import pathlib
from typing import Any, DefaultDict, Dict, Generator, List, Optional

import lark

from .output import OutputBlock, register_output_handler


@dataclasses.dataclass(frozen=True)
class HighlighterAnnotation:
"""A single HTML tag annotation which applies to a position range of source code."""

name: str
terminal: bool
is_open_tag: bool
other_tag_pos: int

def __str__(self) -> str:
return self.as_string()

def as_string(self, tag: str = "span") -> str:
# some options here?
if not self.is_open_tag:
return f'</{tag}>'

if self.terminal:
classes = " ".join(("term", self.name))
else:
classes = " ".join(("rule", self.name))

return f'<{tag} class="{classes}">'


def _add_annotation_pair(
annotations: DefaultDict[int, List[HighlighterAnnotation]],
name: str,
start_pos: int,
end_pos: int,
terminal: bool,
) -> None:
"""
Add a pair of HTML tag annotations to the position-indexed list.

Parameters
----------
annotations : DefaultDict[int, List[HighlighterAnnotation]]
Annotations keyed by 0-indexed string position.
name : str
Name of the annotation.
start_pos : int
String index position which the annotation applies to.
end_pos : int
String index position which the annotation ends at.
terminal : bool
Whether this is a TERMINAL (True) or a rule (false).
"""
annotations[start_pos].append(
HighlighterAnnotation(
name=name,
terminal=terminal,
is_open_tag=True,
other_tag_pos=end_pos,
)
)
annotations[end_pos].append(
HighlighterAnnotation(
name=name,
terminal=terminal,
is_open_tag=False,
other_tag_pos=start_pos,
)
)


def get_annotations(tree: lark.Tree) -> DefaultDict[int, List[HighlighterAnnotation]]:
"""Get annotations for syntax elements in the given parse tree."""
annotations: DefaultDict[int, List[HighlighterAnnotation]] = collections.defaultdict(
list
)

for subtree in tree.iter_subtrees():
if hasattr(subtree.meta, "start_pos"):
_add_annotation_pair(
annotations,
name=subtree.data,
terminal=False,
start_pos=subtree.meta.start_pos,
end_pos=subtree.meta.end_pos,
)
for child in subtree.children:
if isinstance(child, lark.Token):
if child.start_pos is not None and child.end_pos is not None:
_add_annotation_pair(
annotations,
name=child.type,
terminal=True,
start_pos=child.start_pos,
end_pos=child.end_pos,
)
return annotations


def apply_annotations_to_code(
code: str,
annotations: Dict[int, List[HighlighterAnnotation]]
) -> str:
def annotate() -> Generator[str, None, None]:
pos = 0
for pos, ch in enumerate(code):
for ann in reversed(annotations.get(pos, [])):
yield str(ann)
if ch == " ":
yield "&nbsp;"
else:
yield ch

for ann in annotations.get(pos + 1, []):
yield str(ann)

return "".join(annotate())


@dataclasses.dataclass
class HtmlWriter:
user: Any
source_filename: Optional[pathlib.Path]
block: OutputBlock

@property
def source_code(self) -> str:
"""The source code associated with the block."""
assert self.block.origin is not None
return self.block.origin.source_code

def to_html(self) -> str:
"""HTML tag-annotated source code."""
assert self.block.origin is not None
assert self.block.origin.tree is not None
annotations = get_annotations(self.block.origin.tree)

for comment in self.block.origin.comments:
if comment.start_pos is not None and comment.end_pos is not None:
_add_annotation_pair(
annotations,
name=comment.type,
start_pos=comment.start_pos,
end_pos=comment.end_pos,
terminal=True,
)

return apply_annotations_to_code(self.source_code, annotations)

@staticmethod
def save(
user: Any,
source_filename: Optional[pathlib.Path],
parts: List[OutputBlock],
) -> str:
"""Convert the source code block to HTML and return it."""
result = []
for part in parts:
writer = HtmlWriter(user, source_filename, part)
result.append(writer.to_html())

return "\n\n".join(result)


def _register():
"""Register the HTML output file handlers."""
register_output_handler("html", HtmlWriter.save)
register_output_handler(".htm", HtmlWriter.save)
register_output_handler(".html", HtmlWriter.save)
4 changes: 4 additions & 0 deletions blark/tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,10 @@ def test_blark_main_help(monkeypatch, args: List[str]):
["format", "--debug", "filename"],
id="format-debug",
),
param(
["format", "-o", "html", "filename"],
id="format-html",
),
]
)
def test_blark_main(monkeypatch, input_filename: str, args: List[str], skip_summary: bool):
Expand Down
Loading