Skip to content

Commit

Permalink
Merge pull request #957 from samjwu/articleinfoos
Browse files Browse the repository at this point in the history
Modify OS info for article info
  • Loading branch information
samjwu authored Dec 3, 2024
2 parents 5c18041 + 585d08e commit 0355e44
Show file tree
Hide file tree
Showing 4 changed files with 268 additions and 215 deletions.
16 changes: 12 additions & 4 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,8 @@
external_projects_current_project = "rocm-docs-core"

setting_all_article_info = True

all_article_info_os = ["linux", "windows"]

all_article_info_os = []
all_article_info_author = ""
# specific settings override any general settings (eg: all_article_info_<field>)
article_pages = [
{
Expand All @@ -27,7 +26,16 @@
"date": "2024-07-03",
"read-time": "2 min read",
},
{"file": "developer_guide/commitizen"},
{
"file": "user_guide/article_info",
"os": [],
"author": "",
"date": "",
"read-time": "",
},
{
"file": "developer_guide/commitizen",
},
]

html_theme = "rocm_docs_theme"
Expand Down
216 changes: 216 additions & 0 deletions src/rocm_docs/article_info.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,216 @@
"""Logic to add article info to a page.
For all options see the user guide:
https://rocm.docs.amd.com/projects/rocm-docs-core/en/latest/user_guide/article_info.html
"""

from typing import Any, cast

import importlib.resources
import os
from pathlib import Path

import bs4
import git.repo
from sphinx.application import Sphinx
from sphinx.config import Config


def set_article_info(app: Sphinx, _: Config) -> None:
"""Add article info headers to HTML pages."""
if (
app.config.setting_all_article_info is False
and len(app.config.article_pages) == 0
):
return

article_info = (
importlib.resources.files("rocm_docs")
.joinpath("rocm_docs_theme/components/article-info.html")
.read_text(encoding="utf-8")
)

specific_pages: list[str] = []

_set_page_article_info(app, article_info, specific_pages)

if app.config.setting_all_article_info is True:
_set_all_article_info(app, article_info, specific_pages)


def _set_page_article_info(
app: Sphinx, article_info: str, specific_pages: list[str]
) -> None:
"""Add article info headers to the configured HTML pages.
The pages can be set in "article_pages" of the Sphinx configuration.
"""
repo = git.repo.Repo(app.srcdir, search_parent_directories=True)
for page in app.config.article_pages:
path_rel = app.project.doc2path(page["file"], False)
path_html = Path(app.outdir, path_rel).with_suffix(".html")
path_source = Path(app.srcdir, path_rel)

# FIXME: This will silently skip all files when not building the default
# `html` format (e.g `htmlzip`, `epub` or `pdf`)
if not path_html.is_file():
continue

os_list = []
page.setdefault("os", app.config.all_article_info_os)
if "linux" in page["os"]:
os_list.append("Linux")
if "windows" in page["os"]:
os_list.append("Windows")
article_os_info = " and ".join(os_list)
if os_list:
article_os_info = f"Applies to {article_os_info}"
modified_info = article_info.replace("<!--os-info-->", article_os_info)

author = app.config.all_article_info_author
if "author" in page:
author = page["author"]
modified_info = modified_info.replace("<!--author-info-->", author)

date_info: str | None = None
if "date" in page:
date_info = page["date"]
else:
date_info = _get_time_last_modified(repo, path_source)

if date_info == "":
soup = bs4.BeautifulSoup(modified_info, "html.parser")
svg_to_remove = soup.find("span", class_="article-info-date-svg")
if svg_to_remove and isinstance(svg_to_remove, bs4.Tag):
svg_to_remove.decompose()
modified_info = str(soup)

if date_info is not None:
modified_info = modified_info.replace("<!--date-info-->", date_info)

if "read-time" in page:
read_time = page["read-time"]
else:
read_time = _estimate_read_time(path_html)

if read_time == "":
soup = bs4.BeautifulSoup(modified_info, "html.parser")
svg_to_remove = soup.find(
"span", class_="article-info-read-time-svg"
)
if svg_to_remove and isinstance(svg_to_remove, bs4.Tag):
svg_to_remove.decompose()
modified_info = str(soup)

if read_time is not None:
modified_info = modified_info.replace("<!--read-info-->", read_time)

specific_pages.append(page["file"])
_write_article_info(path_html, modified_info)


def _set_all_article_info(
app: Sphinx, article_info: str, specific_pages: list[str]
) -> None:
"""Add article info headers with general settings to all HTML pages.
Pages that have specific settings (configured by "article_pages") are
skipped.
"""
repo = git.repo.Repo(app.srcdir, search_parent_directories=True)
for docname in app.project.docnames:
# skip pages with specific settings
if docname in specific_pages:
continue

page_rel = app.project.doc2path(docname, False)
page = Path(app.outdir, page_rel).with_suffix(".html")

# FIXME: This will silently skip all files when not building the default
# `html` format (e.g `htmlzip`, `epub` or `pdf`)
if not page.is_file():
continue

os_list = []
if "linux" in app.config.all_article_info_os:
os_list.append("Linux")
if "windows" in app.config.all_article_info_os:
os_list.append("Windows")
article_os_info = " and ".join(os_list)
if os_list:
article_os_info = f"Applies to {article_os_info}"

date_info = _get_time_last_modified(repo, Path(app.srcdir, page_rel))
if not date_info:
date_info = cast(str, app.config.all_article_info_date)

modified_info = article_info.replace("<!--os-info-->", article_os_info)
modified_info = modified_info.replace(
"<!--author-info-->", app.config.all_article_info_author
)
modified_info = modified_info.replace("<!--date-info-->", date_info)
modified_info = modified_info.replace(
"<!--read-info-->", _estimate_read_time(page)
)

_write_article_info(page, modified_info)


def _get_time_last_modified(repo: git.repo.Repo, path: Path) -> str | None:
try:
time = next(
repo.iter_commits(paths=path, max_count=1)
).committed_datetime
return time.strftime("%Y-%m-%d")
except StopIteration:
return None


def _estimate_read_time(file_name: Path) -> str:
def is_visible(element):
if element.parent.name in [
"style",
"script",
"[document]",
"head",
"title",
]:
return False
if isinstance(element, bs4.element.Comment):
return False
return element.string != "\n"

words_per_minute = 200
average_word_length = 5

with open(file_name, encoding="utf-8") as file:
html = file.read()
soup = bs4.BeautifulSoup(html, "html.parser")
page_text = soup.findAll(text=True)
visible_page_text = filter(is_visible, page_text)
average_word_count = (
sum(len(line) for line in visible_page_text) / average_word_length
)
time_minutes = int(max(1, round(average_word_count / words_per_minute)))
return f"{time_minutes} min read time"


def _write_article_info(path: os.PathLike[Any], article_info: str) -> None:
with open(path, "r+", encoding="utf8") as file:
page_html = file.read()
soup = bs4.BeautifulSoup(page_html, "html.parser")

has_article_info = soup.find("div", id="rocm-docs-core-article-info")
if (
has_article_info is not None
or soup.article is None
or soup.article.h1 is None
):
return

soup.article.h1.insert_after(
bs4.BeautifulSoup(article_info, "html.parser")
)
file.seek(0)
file.truncate(0)
file.write(str(soup))
Loading

0 comments on commit 0355e44

Please sign in to comment.