From d3d205625655b78a92224296bb98786218f5277a Mon Sep 17 00:00:00 2001 From: Michael Curran Date: Mon, 9 Sep 2024 15:35:59 +1000 Subject: [PATCH 01/36] make md2html.py a standalone script in user_docs that can be used by more than just SCons. --- projectDocs/dev/developerGuide/sconscript | 15 +++- sconstruct | 27 ++++-- .../site_tools => user_docs}/md2html.py | 85 ++++++------------- 3 files changed, 61 insertions(+), 66 deletions(-) rename {site_scons/site_tools => user_docs}/md2html.py (68%) diff --git a/projectDocs/dev/developerGuide/sconscript b/projectDocs/dev/developerGuide/sconscript index 590727221f8..3a9a9beeeb6 100644 --- a/projectDocs/dev/developerGuide/sconscript +++ b/projectDocs/dev/developerGuide/sconscript @@ -12,7 +12,20 @@ env = env.Clone() devDocsOutputDir = outputDir.Dir("devDocs") # Build the developer guide and move it to the output directory -htmlFile = env.md2html("developerGuide.md") +mdFile = env.File("developerGuide.md") +# first substitute some variables such as NvDA version and URL into the markdown file +mdFileSub = env.Substfile( + target=mdFile.abspath.replace(".md", ".md.sub"), + source=mdFile, + SUBST_DICT={ + "NVDA_VERSION": env["version"], + }, +) +htmlFile = env.Command( + target=mdFile.abspath.replace(".md", ".html"), + source=mdFileSub, + action=[f'@{sys.executable} user_docs/md2html.py convert "$SOURCE" "$TARGET"'], +) devGuide = env.Command( target=devDocsOutputDir.File("developerGuide.html"), source=htmlFile, action=Move("$TARGET", "$SOURCE") ) diff --git a/sconstruct b/sconstruct index 0b9c6a401b2..171ca86d488 100755 --- a/sconstruct +++ b/sconstruct @@ -149,7 +149,6 @@ env = Environment( tools=[ "textfile", "gettextTool", - "md2html", "doxygen", "recursiveInstall", "m4", @@ -346,7 +345,21 @@ for xliffFile in env.Glob(os.path.join(userDocsDir.path, "*", "*.xliff")): ) # Allow all markdown files to be converted to html in user_docs for mdFile in env.Glob(os.path.join(userDocsDir.path, "*", "*.md")): - htmlFile = env.md2html(mdFile) + # first substitute some variables such as NvDA version and URL into the markdown file + mdFileSub = env.Substfile( + target=mdFile.abspath.replace(".md", ".md.sub"), + source=mdFile, + SUBST_DICT={ + "NVDA_VERSION": env["version"], + "NVDA_URL": versionInfo.url, + "NVDA_COPYRIGHT_YEARS": versionInfo.copyrightYears, + }, + ) + htmlFile = env.Command( + target=mdFile.abspath.replace(".md", ".html"), + source=mdFileSub, + action=[f'@{sys.executable} user_docs/md2html.py convert "$SOURCE" "$TARGET"'], + ) styleInstallPath = os.path.dirname(mdFile.abspath) installedStyle = env.Install(styleInstallPath, styles) installedHeadingsStyle = env.Install(styleInstallPath, numberedHeadingsStyle) @@ -362,11 +375,13 @@ for mdFile in env.Glob(os.path.join(userDocsDir.path, "*", "*.md")): env.Depends(htmlFile, mdFile) # Create key commands files -for userGuideFile in env.Glob(os.path.join(userDocsDir.path, "*", "userGuide.md")): - keyCommandsHtmlFile = env.md2html( - userGuideFile.abspath.replace("userGuide.md", "keyCommands.html"), userGuideFile +for userGuideFileSub in env.Glob(os.path.join(userDocsDir.path, "*", "userGuide.md.sub")): + keyCommandsHtmlFile = env.Command( + target=userGuideFileSub.abspath.replace("userGuide.md.sub", "keyCommands.html"), + source=userGuideFileSub, + action=[f'@{sys.executable} user_docs/md2html.py convert "$SOURCE" "$TARGET"'], ) - env.Depends(keyCommandsHtmlFile, userGuideFile) + env.Depends(keyCommandsHtmlFile, userGuideFileSub) # Build unicode CLDR dictionaries env.SConscript("cldrDict_sconscript", exports=["env", "sourceDir"]) diff --git a/site_scons/site_tools/md2html.py b/user_docs/md2html.py similarity index 68% rename from site_scons/site_tools/md2html.py rename to user_docs/md2html.py index 929df794782..9647d29115b 100644 --- a/site_scons/site_tools/md2html.py +++ b/user_docs/md2html.py @@ -3,16 +3,13 @@ # This file may be used under the terms of the GNU General Public License, version 2 or later. # For more details see: https://www.gnu.org/licenses/gpl-2.0.html +import sys from copy import deepcopy -from importlib.util import find_spec import io import pathlib import re import shutil -import SCons.Node.FS -import SCons.Environment - DEFAULT_EXTENSIONS = frozenset( { # Supports tables, HTML mixed with markdown, code blocks, custom attributes and more @@ -25,7 +22,7 @@ "markdown_link_attr_modifier", # Adds links to GitHub authors, issues and PRs "mdx_gh_links", - }, + } ) EXTENSIONS_CONFIG = { @@ -55,16 +52,6 @@ """.strip() -def _replaceNVDATags(md: str, env: SCons.Environment.Environment) -> str: - import versionInfo - - # Replace tags in source file - md = md.replace("NVDA_VERSION", env["version"]) - md = md.replace("NVDA_URL", versionInfo.url) - md = md.replace("NVDA_COPYRIGHT_YEARS", versionInfo.copyrightYears) - return md - - def _getTitle(mdBuffer: io.StringIO, isKeyCommands: bool = False) -> str: if isKeyCommands: TITLE_RE = re.compile(r"^$") @@ -122,7 +109,7 @@ def _generateSanitizedHTML(md: str, isKeyCommands: bool = False) -> str: extensions = set(DEFAULT_EXTENSIONS) if isKeyCommands: - from user_docs.keyCommandsDoc import KeyCommandsExtension + from keyCommandsDoc import KeyCommandsExtension extensions.add(KeyCommandsExtension()) @@ -145,36 +132,30 @@ def _generateSanitizedHTML(md: str, isKeyCommands: bool = False) -> str: return htmlOutput -def md2html_actionFunc( - target: list[SCons.Node.FS.File], - source: list[SCons.Node.FS.File], - env: SCons.Environment.Environment, -): - isKeyCommands = target[0].path.endswith("keyCommands.html") - isUserGuide = target[0].path.endswith("userGuide.html") - isDevGuide = target[0].path.endswith("developerGuide.html") - isChanges = target[0].path.endswith("changes.html") +def main(cmd: str, source: str, dest: str): + if cmd not in ("check", "convert"): + raise ValueError(f"Unknown command {cmd}") + print(f"{cmd} {source} {dest}") + isKeyCommands = dest.endswith("keyCommands.html") + isUserGuide = dest.endswith("userGuide.html") + isDevGuide = dest.endswith("developerGuide.html") + isChanges = dest.endswith("changes.html") - with open(source[0].path, "r", encoding="utf-8") as mdFile: + with open(source, "r", encoding="utf-8") as mdFile: mdStr = mdFile.read() - mdStr = _replaceNVDATags(mdStr, env) - with io.StringIO() as mdBuffer: mdBuffer.write(mdStr) title = _getTitle(mdBuffer, isKeyCommands) - lang = pathlib.Path(source[0].path).parent.name - if isDevGuide and lang == "developerGuide": - # Parent folder in this case is the developerGuide folder in project docs - lang = "en" + lang = pathlib.Path(source).parent.name if isUserGuide or isDevGuide: - extraStylesheet = '' + extraStylesheet = "" elif isChanges or isKeyCommands: extraStylesheet = "" else: - raise ValueError(f"Unknown target type for {target[0].path}") + raise ValueError(f"Unknown target type for {dest}") htmlBuffer = io.StringIO() htmlBuffer.write( @@ -183,7 +164,7 @@ def md2html_actionFunc( dir="rtl" if lang in RTL_LANG_CODES else "ltr", title=title, extraStylesheet=extraStylesheet, - ), + ) ) htmlOutput = _generateSanitizedHTML(mdStr, isKeyCommands) @@ -195,31 +176,17 @@ def md2html_actionFunc( htmlBuffer.seek(0, io.SEEK_END) htmlBuffer.write("\n\n\n") - with open(target[0].path, "w", encoding="utf-8") as targetFile: - # Make next read at start of buffer - htmlBuffer.seek(0) - shutil.copyfileobj(htmlBuffer, targetFile) + if cmd == "convert": + with open(dest, "w", encoding="utf-8") as targetFile: + # Make next read at start of buffer + htmlBuffer.seek(0) + shutil.copyfileobj(htmlBuffer, targetFile) htmlBuffer.close() -def exists(env: SCons.Environment.Environment) -> bool: - for ext in [ - "markdown", - "markdown_link_attr_modifier", - "mdx_truly_sane_lists", - "mdx_gh_links", - "nh3", - "user_docs.keyCommandsDoc", - ]: - if find_spec(ext) is None: - return False - return True - - -def generate(env: SCons.Environment.Environment): - env["BUILDERS"]["md2html"] = env.Builder( - action=env.Action(md2html_actionFunc, lambda t, s, e: f"Converting {s[0].path} to {t[0].path}"), - suffix=".html", - src_suffix=".md", - ) +if __name__ == "__main__": + cmd = sys.argv[1] + source = sys.argv[2] + dest = sys.argv[3] + main(cmd, source, dest) From 552cba5ca986ce5ad1b4562e1ea61328a03a4976 Mon Sep 17 00:00:00 2001 From: Michael Curran Date: Wed, 11 Sep 2024 17:31:30 +1000 Subject: [PATCH 02/36] markdownTranslate translateXliff: include line number on exceptions. Fail if purely structual lines do not match the skeleton. --- user_docs/markdownTranslate.py | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/user_docs/markdownTranslate.py b/user_docs/markdownTranslate.py index c70353a0e60..d9d87890a9c 100644 --- a/user_docs/markdownTranslate.py +++ b/user_docs/markdownTranslate.py @@ -369,19 +369,27 @@ def translateXliff( f'Line {lineNo} of translation: does not end with "{suffix}", {pretranslatedLine=}, {skelLine=}' ) translation = pretranslatedLine[len(prefix) : len(pretranslatedLine) - len(suffix)] - unit = xliffRoot.find(f'./xliff:file/xliff:unit[@id="{ID}"]', namespaces=namespace) - if unit is not None: - segment = unit.find("./xliff:segment", namespaces=namespace) - if segment is not None: - target = lxml.etree.Element("target") - target.text = xmlEscape(translation) - target.tail = "\n" - segment.append(target) - res.numTranslatedStrings += 1 + try: + unit = xliffRoot.find(f'./xliff:file/xliff:unit[@id="{ID}"]', namespaces=namespace) + if unit is not None: + segment = unit.find("./xliff:segment", namespaces=namespace) + if segment is not None: + target = lxml.etree.Element("target") + target.text = xmlEscape(translation) + target.tail = "\n" + segment.append(target) + res.numTranslatedStrings += 1 + else: + raise ValueError(f"No segment found for unit {ID}") else: - raise ValueError(f"No segment found for unit {ID}") - else: - raise ValueError(f"Cannot locate Unit {ID} in xliff file") + raise ValueError(f"Cannot locate Unit {ID} in xliff file") + except Exception as e: + e.add_note( + f"Line {lineNo}: {pretranslatedLine=}, {skelLine=}" + ) + raise + elif skelLine != pretranslatedLine: + raise ValueError(f"Line {lineNo}: pretranslated line {pretranslatedLine!r}, does not match skeleton line {skelLine!r}") xliff.write(outputPath, encoding="utf8", xml_declaration=True) print(f"Translated xliff file with {res.numTranslatedStrings} translated strings") return res From 6cb9d0720a040a7f96e562eff42814b9ea126e01 Mon Sep 17 00:00:00 2001 From: Michael Curran Date: Wed, 11 Sep 2024 17:33:07 +1000 Subject: [PATCH 03/36] Add a utility script for translators called nvdaL10nUtil which does a few things such as xliff to markdown, and markdown to html. This required slightly refactoring md2html.py and sconstruct. --- sconstruct | 14 ++++++++++++-- user_docs/md2html.py | 38 ++++++++++++++++++++------------------ user_docs/nvdaL10nUtil.py | 31 +++++++++++++++++++++++++++++++ 3 files changed, 63 insertions(+), 20 deletions(-) create mode 100644 user_docs/nvdaL10nUtil.py diff --git a/sconstruct b/sconstruct index 171ca86d488..4bbba5c006b 100755 --- a/sconstruct +++ b/sconstruct @@ -355,10 +355,12 @@ for mdFile in env.Glob(os.path.join(userDocsDir.path, "*", "*.md")): "NVDA_COPYRIGHT_YEARS": versionInfo.copyrightYears, }, ) + lang = os.path.split(os.path.dirname(mdFile.path))[-1] + docType = os.path.basename(mdFile.path).split(".")[0] htmlFile = env.Command( target=mdFile.abspath.replace(".md", ".html"), source=mdFileSub, - action=[f'@{sys.executable} user_docs/md2html.py convert "$SOURCE" "$TARGET"'], + action=[f'@{sys.executable} user_docs/md2html.py -l {lang} -t {docType} "$SOURCE" "$TARGET"'], ) styleInstallPath = os.path.dirname(mdFile.abspath) installedStyle = env.Install(styleInstallPath, styles) @@ -376,10 +378,11 @@ for mdFile in env.Glob(os.path.join(userDocsDir.path, "*", "*.md")): # Create key commands files for userGuideFileSub in env.Glob(os.path.join(userDocsDir.path, "*", "userGuide.md.sub")): + lang = os.path.split(os.path.dirname(userGuideFileSub.path))[-1] keyCommandsHtmlFile = env.Command( target=userGuideFileSub.abspath.replace("userGuide.md.sub", "keyCommands.html"), source=userGuideFileSub, - action=[f'@{sys.executable} user_docs/md2html.py convert "$SOURCE" "$TARGET"'], + action=[f'@{sys.executable} user_docs/md2html.py -l {lang} -t keyCommands "$SOURCE" "$TARGET"'], ) env.Depends(keyCommandsHtmlFile, userGuideFileSub) @@ -728,3 +731,10 @@ source = env.Dir(os.path.join(os.getcwd(), "dist")) # Putting the target in the output dir automatically causes AppVeyor to package it as an artefact target = env.File(os.path.join(outputDir.abspath, "library_modules.txt")) env.Alias("moduleList", env.GenerateModuleList(target, source)) + +nvdaL10nUtil = env.Command( + target=outputDir.File("nvdaL10nUtil.exe"), + source="user_docs/nvdaL10nUtil.py", + ENV=os.environ, + action=f"nuitka --standalone --onefile --output-dir=./output --include-module=mdx_truly_sane_lists --include-module=markdown_link_attr_modifier --include-module=mdx_gh_links user_docs/nvdaL10nUtil.py", +) diff --git a/user_docs/md2html.py b/user_docs/md2html.py index 9647d29115b..c3d6ccaf559 100644 --- a/user_docs/md2html.py +++ b/user_docs/md2html.py @@ -4,6 +4,7 @@ # For more details see: https://www.gnu.org/licenses/gpl-2.0.html import sys +import argparse from copy import deepcopy import io import pathlib @@ -132,15 +133,14 @@ def _generateSanitizedHTML(md: str, isKeyCommands: bool = False) -> str: return htmlOutput -def main(cmd: str, source: str, dest: str): - if cmd not in ("check", "convert"): - raise ValueError(f"Unknown command {cmd}") - print(f"{cmd} {source} {dest}") - isKeyCommands = dest.endswith("keyCommands.html") - isUserGuide = dest.endswith("userGuide.html") - isDevGuide = dest.endswith("developerGuide.html") - isChanges = dest.endswith("changes.html") - +def main(source: str, dest: str, lang="en", docType=None): + print(f"Converting {docType or 'document'} at {source} to {dest}, {lang=}") + isUserGuide = docType == "userGuide" + isDevGuide = docType == "developerGuide" + isChanges = docType == "changes" + isKeyCommands = docType == "keyCommands" + if docType and not any([isUserGuide, isDevGuide, isChanges, isKeyCommands]): + raise ValueError(f"Unknown docType {docType}") with open(source, "r", encoding="utf-8") as mdFile: mdStr = mdFile.read() @@ -176,17 +176,19 @@ def main(cmd: str, source: str, dest: str): htmlBuffer.seek(0, io.SEEK_END) htmlBuffer.write("\n\n\n") - if cmd == "convert": - with open(dest, "w", encoding="utf-8") as targetFile: - # Make next read at start of buffer - htmlBuffer.seek(0) - shutil.copyfileobj(htmlBuffer, targetFile) + with open(dest, "w", encoding="utf-8") as targetFile: + # Make next read at start of buffer + htmlBuffer.seek(0) + shutil.copyfileobj(htmlBuffer, targetFile) htmlBuffer.close() if __name__ == "__main__": - cmd = sys.argv[1] - source = sys.argv[2] - dest = sys.argv[3] - main(cmd, source, dest) + args = argparse.ArgumentParser() + args.add_argument("-l", "--lang", help="Language code", action="store", default="en") + args.add_argument("-t", "--docType", help="Type of document", action="store", choices=["userGuide", "developerGuide", "changes", "keyCommands"]) + args.add_argument("source", help="Path to the markdown file") + args.add_argument("dest", help="Path to the resulting html file") + args = args.parse_args() + main(source=args.source, dest=args.dest, lang=args.lang, docType=args.docType) diff --git a/user_docs/nvdaL10nUtil.py b/user_docs/nvdaL10nUtil.py new file mode 100644 index 00000000000..d4a754085dd --- /dev/null +++ b/user_docs/nvdaL10nUtil.py @@ -0,0 +1,31 @@ +# A part of NonVisual Desktop Access (NVDA) +# Copyright (C) 2024 NV Access Limited. +# This file is covered by the GNU General Public License. +# See the file COPYING for more details. + + +import os +import argparse +import markdownTranslate +import md2html + +if __name__ == "__main__": + args = argparse.ArgumentParser() + commands = args.add_subparsers(title="commands", dest="command", required=True) + command_xliff2md = commands.add_parser("xliff2md", help="Convert xliff to markdown") + command_xliff2md.add_argument("-u", "--untranslated", help="Produce the untranslated markdown file", action="store_true", default=False) + command_xliff2md.add_argument("xliffPath", help="Path to the xliff file") + command_xliff2md.add_argument("mdPath", help="Path to the resulting markdown file") + command_md2html = commands.add_parser("md2html", help="Convert markdown to html") + command_md2html.add_argument("-l", "--lang", help="Language code", action="store", default="en") + command_md2html.add_argument("-t", "--docType", help="Type of document", action="store", choices=["userGuide", "developerGuide", "changes", "keyCommands"]) + command_md2html.add_argument("mdPath", help="Path to the markdown file") + command_md2html.add_argument("htmlPath", help="Path to the resulting html file") + args = args.parse_args() + match args.command: + case "xliff2md": + markdownTranslate.generateMarkdown(xliffPath=args.xliffPath, outputPath=args.mdPath, translated=not args.untranslated) + case "md2html": + md2html.main(source=args.mdPath, dest=args.htmlPath, lang=args.lang, docType=args.docType) + case _: + raise ValueError(f"Unknown command {args.command}") From 4ba0f602fdf0f04bd9cf3df15d0ad7fae80f87e0 Mon Sep 17 00:00:00 2001 From: Michael Curran Date: Wed, 11 Sep 2024 17:34:13 +1000 Subject: [PATCH 04/36] nuitka is now a requirement, for building the standalone nvdaL10nUtil executable. --- requirements.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/requirements.txt b/requirements.txt index ab96d3cf03f..db493975438 100644 --- a/requirements.txt +++ b/requirements.txt @@ -21,6 +21,8 @@ pycaw==20240210 # Packaging NVDA py2exe==0.13.0.2 +# xliff2html is packaged with nuitka +nuitka==2.4.8 # Creating XML unit test reports unittest-xml-reporting==3.2.0 From 7bab7b1825ffc96ba54392e42d20d8c03c06337e Mon Sep 17 00:00:00 2001 From: Michael Curran Date: Tue, 17 Sep 2024 14:24:53 +1000 Subject: [PATCH 05/36] markdownTranslate generateMarkdown: ignore bad translations containing '' where Crowdin has taken empty target tags as literal translations. --- user_docs/markdownTranslate.py | 61 +++++++++++++++++++++------------- 1 file changed, 38 insertions(+), 23 deletions(-) diff --git a/user_docs/markdownTranslate.py b/user_docs/markdownTranslate.py index d9d87890a9c..2babfe05468 100644 --- a/user_docs/markdownTranslate.py +++ b/user_docs/markdownTranslate.py @@ -384,12 +384,12 @@ def translateXliff( else: raise ValueError(f"Cannot locate Unit {ID} in xliff file") except Exception as e: - e.add_note( - f"Line {lineNo}: {pretranslatedLine=}, {skelLine=}" - ) + e.add_note(f"Line {lineNo}: {pretranslatedLine=}, {skelLine=}") raise elif skelLine != pretranslatedLine: - raise ValueError(f"Line {lineNo}: pretranslated line {pretranslatedLine!r}, does not match skeleton line {skelLine!r}") + raise ValueError( + f"Line {lineNo}: pretranslated line {pretranslatedLine!r}, does not match skeleton line {skelLine!r}" + ) xliff.write(outputPath, encoding="utf8", xml_declaration=True) print(f"Translated xliff file with {res.numTranslatedStrings} translated strings") return res @@ -400,6 +400,7 @@ class Result_generateMarkdown: numTotalLines = 0 numTranslatableStrings = 0 numTranslatedStrings = 0 + numBadTranslationStrings = 0 def generateMarkdown(xliffPath: str, outputPath: str, translated: bool = True) -> Result_generateMarkdown: @@ -422,30 +423,44 @@ def generateMarkdown(xliffPath: str, outputPath: str, translated: bool = True) - prefix, ID, suffix = m.groups() res.numTranslatableStrings += 1 unit = xliffRoot.find(f'./xliff:file/xliff:unit[@id="{ID}"]', namespaces=namespace) - if unit is not None: - segment = unit.find("./xliff:segment", namespaces=namespace) - if segment is not None: - source = segment.find("./xliff:source", namespaces=namespace) - if translated: - target = segment.find("./xliff:target", namespaces=namespace) - else: - target = None - if target is not None and target.text: - res.numTranslatedStrings += 1 - translation = xmlUnescape(target.text) - elif source is not None and source.text: - translation = xmlUnescape(source.text) - else: - raise ValueError(f"No source or target found for unit {ID}") - else: - raise ValueError(f"No segment found for unit {ID}") - else: + if unit is None: raise ValueError(f"Cannot locate Unit {ID} in xliff file") + segment = unit.find("./xliff:segment", namespaces=namespace) + if segment is None: + raise ValueError(f"No segment found for unit {ID}") + source = segment.find("./xliff:source", namespaces=namespace) + if source is None: + raise ValueError(f"No source found for unit {ID}") + translation = "" + if translated: + target = segment.find("./xliff:target", namespaces=namespace) + if target is not None: + targetText = target.text + if targetText: + translation = xmlUnescape(targetText) + # Crowdin treats empty targets () as a literal translation. + # Filter out such strings and count them as bad translations. + if translation in ( + "", + "<target/>", + "", + "<target></target>", + ): + res.numBadTranslationStrings += 1 + translation = "" + else: + res.numTranslatedStrings += 1 + # If we have no translation, use the source text + if not translation: + sourceText = source.text + if sourceText is None: + raise ValueError(f"No source text found for unit {ID}") + translation = xmlUnescape(sourceText) outputFile.write(f"{prefix}{translation}{suffix}\n") else: outputFile.write(line) print( - f"Generated markdown file with {res.numTotalLines} total lines, {res.numTranslatableStrings} translatable strings, and {res.numTranslatedStrings} translated strings" + f"Generated markdown file with {res.numTotalLines} total lines, {res.numTranslatableStrings} translatable strings, and {res.numTranslatedStrings} translated strings. Ignoring {res.numBadTranslationStrings} bad translated strings" ) return res From 45886d82a7d49911bc75286fcdaa89717dcddabe Mon Sep 17 00:00:00 2001 From: Michael Curran Date: Wed, 18 Sep 2024 12:45:50 +1000 Subject: [PATCH 06/36] markdownTranslate translateXliff: do not xmlEscape the pretranslated string - lxml does this itself. We were doing it twice. --- user_docs/markdownTranslate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/user_docs/markdownTranslate.py b/user_docs/markdownTranslate.py index 2babfe05468..1cfd025f8e9 100644 --- a/user_docs/markdownTranslate.py +++ b/user_docs/markdownTranslate.py @@ -375,7 +375,7 @@ def translateXliff( segment = unit.find("./xliff:segment", namespaces=namespace) if segment is not None: target = lxml.etree.Element("target") - target.text = xmlEscape(translation) + target.text = translation target.tail = "\n" segment.append(target) res.numTranslatedStrings += 1 From ba832ab4d416b6e9474362dddd07c4525ded61c7 Mon Sep 17 00:00:00 2001 From: Michael Curran Date: Wed, 18 Sep 2024 15:31:30 +1000 Subject: [PATCH 07/36] markdownTranslate generateMarkdown: warn for corrupt or escaped lines --- user_docs/markdownTranslate.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/user_docs/markdownTranslate.py b/user_docs/markdownTranslate.py index 1cfd025f8e9..10a0fbd483f 100644 --- a/user_docs/markdownTranslate.py +++ b/user_docs/markdownTranslate.py @@ -416,8 +416,8 @@ def generateMarkdown(xliffPath: str, outputPath: str, translated: bool = True) - skeletonNode = xliffRoot.find("./xliff:file/xliff:skeleton", namespaces=namespace) if skeletonNode is None: raise ValueError("No skeleton found in xliff file") - skeletonContent = xmlUnescape(skeletonNode.text).strip() - for line in skeletonContent.splitlines(keepends=True): + skeletonContent = skeletonNode.text).strip() + for lineNum, line in enumerate(skeletonContent.splitlines(keepends=True), 1): res.numTotalLines += 1 if m := re_translationID.match(line): prefix, ID, suffix = m.groups() @@ -437,7 +437,11 @@ def generateMarkdown(xliffPath: str, outputPath: str, translated: bool = True) - if target is not None: targetText = target.text if targetText: - translation = xmlUnescape(targetText) + translation = targetText + unescapedTargetText = xmlUnescape(targetText) + if unescapedTargetText != targetText: + print(f"Warning: line {lineNum} contained escaped characters. Unescaped: {unescapedTargetText}") + translation = unescapedTargetText # Crowdin treats empty targets () as a literal translation. # Filter out such strings and count them as bad translations. if translation in ( @@ -447,6 +451,7 @@ def generateMarkdown(xliffPath: str, outputPath: str, translated: bool = True) - "<target></target>", ): res.numBadTranslationStrings += 1 + print(f"Warning: line {lineNum} contained a corrupt empty translation. Using source") translation = "" else: res.numTranslatedStrings += 1 From 536c0684bd920b696f3b5e4ea48dbe7586c53898 Mon Sep 17 00:00:00 2001 From: Michael Curran Date: Sun, 22 Sep 2024 15:46:41 +1000 Subject: [PATCH 08/36] markdownTranslate: It is not correct to escape / unescape text when setting / getting from lxml as this automatically happens. Otherwise, content is doubly escaped. --- user_docs/markdownTranslate.py | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/user_docs/markdownTranslate.py b/user_docs/markdownTranslate.py index 10a0fbd483f..c0a1a03bfc9 100644 --- a/user_docs/markdownTranslate.py +++ b/user_docs/markdownTranslate.py @@ -13,7 +13,6 @@ import re from itertools import zip_longest from xml.sax.saxutils import escape as xmlEscape -from xml.sax.saxutils import unescape as xmlUnescape import difflib from dataclasses import dataclass import subprocess @@ -158,7 +157,7 @@ def extractSkeleton(xliffPath: str, outputPath: str): if skeletonNode is None: raise ValueError("No skeleton found in xliff file") skeletonContent = skeletonNode.text.strip() - outputFile.write(xmlUnescape(skeletonContent)) + outputFile.write(skeletonContent) print(f"Extracted skeleton to {prettyPathString(outputPath)}") @@ -347,7 +346,7 @@ def translateXliff( skeletonNode = xliffRoot.find("./xliff:file/xliff:skeleton", namespaces=namespace) if skeletonNode is None: raise ValueError("No skeleton found in xliff file") - skeletonContent = xmlUnescape(skeletonNode.text).strip() + skeletonContent = skeletonNode.text.strip() for lineNo, (skelLine, pretranslatedLine) in enumerate( zip_longest(skeletonContent.splitlines(), pretranslatedMdFile.readlines()), start=1, @@ -416,7 +415,7 @@ def generateMarkdown(xliffPath: str, outputPath: str, translated: bool = True) - skeletonNode = xliffRoot.find("./xliff:file/xliff:skeleton", namespaces=namespace) if skeletonNode is None: raise ValueError("No skeleton found in xliff file") - skeletonContent = skeletonNode.text).strip() + skeletonContent = skeletonNode.text.strip() for lineNum, line in enumerate(skeletonContent.splitlines(keepends=True), 1): res.numTotalLines += 1 if m := re_translationID.match(line): @@ -438,10 +437,6 @@ def generateMarkdown(xliffPath: str, outputPath: str, translated: bool = True) - targetText = target.text if targetText: translation = targetText - unescapedTargetText = xmlUnescape(targetText) - if unescapedTargetText != targetText: - print(f"Warning: line {lineNum} contained escaped characters. Unescaped: {unescapedTargetText}") - translation = unescapedTargetText # Crowdin treats empty targets () as a literal translation. # Filter out such strings and count them as bad translations. if translation in ( @@ -460,7 +455,7 @@ def generateMarkdown(xliffPath: str, outputPath: str, translated: bool = True) - sourceText = source.text if sourceText is None: raise ValueError(f"No source text found for unit {ID}") - translation = xmlUnescape(sourceText) + translation = sourceText outputFile.write(f"{prefix}{translation}{suffix}\n") else: outputFile.write(line) From e163f4d116cb8ca91becccfdfbe23baa1f45e0ae Mon Sep 17 00:00:00 2001 From: Michael Curran Date: Sun, 22 Sep 2024 15:52:23 +1000 Subject: [PATCH 09/36] nvdaL10nUtil: add stripXliff command which removes everything from an xliff except for valid target translations, ready for upload to Crowdin. It also optionally ignores all existing target translations froma n existing xliff file so that only a delta can be uploaded. --- user_docs/nvdaL10nUtil.py | 79 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) diff --git a/user_docs/nvdaL10nUtil.py b/user_docs/nvdaL10nUtil.py index d4a754085dd..031139f1e5b 100644 --- a/user_docs/nvdaL10nUtil.py +++ b/user_docs/nvdaL10nUtil.py @@ -4,11 +4,84 @@ # See the file COPYING for more details. +import lxml.etree import os import argparse import markdownTranslate import md2html + +def stripXliff(xliffPath: str, outputPath: str, oldXliffPath: str | None= None): + namespace = {"xliff": "urn:oasis:names:tc:xliff:document:2.0"} + xliff = lxml.etree.parse(xliffPath) + xliffRoot = xliff.getroot() + if xliffRoot.tag != "{urn:oasis:names:tc:xliff:document:2.0}xliff": + raise ValueError(f"Not an xliff file: {xliffPath}") + skeletonNode = xliffRoot.find(f'./xliff:file/xliff:skeleton', namespaces=namespace) + if skeletonNode is not None: + skeletonNode.getparent().remove(skeletonNode) + if oldXliffPath: + oldXliff = lxml.etree.parse(oldXliffPath) + oldXliffRoot = oldXliff.getroot() + if oldXliffRoot.tag != "{urn:oasis:names:tc:xliff:document:2.0}xliff": + raise ValueError(f"Not an xliff file: {oldXliffPath}") + else: + oldXliffRoot = None + file = xliffRoot.find(f'./xliff:file', namespaces=namespace) + units = file.findall(f'./xliff:unit', namespaces=namespace) + segmentCount = 0 + emptyTargetCount = 0 + corruptTargetcount = 0 + sourceTargetcount = 0 + existingTranslationCount = 0 + for unit in units: + notes = unit.find("./xliff:notes", namespaces=namespace) + if notes is not None: + unit.remove(notes) + segment = unit.find("./xliff:segment", namespaces=namespace) + if segment is None: + print("Warning: No segment element in unit") + continue + state = segment.get("state") + source = segment.find("./xliff:source", namespaces=namespace) + if source is None: + print("Warning: No source element in segment") + continue + sourceText = source.text + segmentCount += 1 + target = segment.find("./xliff:target", namespaces=namespace) + if target is None: + continue + targetText = target.text + # remove empty / self-closing target tags + if not targetText: + emptyTargetCount += 1 + file.remove(unit) + # remove corrupt target tags + elif targetText in ( + "", + "<target/>", + "", + "<target></target>" + ): + corruptTargetcount += 1 + file.remove(unit) + # remove target tags pre-filled with source text + elif (not state or state == "initial") and targetText == sourceText: + sourceTargetcount += 1 + file.remove(unit) + # remove translations that already exist in the old xliff file + elif oldXliffRoot is not None: + unitId = unit.get("id") + oldTarget = oldXliffRoot.find(f'./xliff:file/xliff:unit[@id="{unitId}"]/xliff:segment/xliff:target', namespaces=namespace) + if oldTarget is not None and oldTarget.text == targetText: + existingTranslationCount += 1 + file.remove(unit) + xliff.write(outputPath) + keptTranslations = segmentCount - emptyTargetCount - corruptTargetcount - sourceTargetcount - existingTranslationCount + print(f"Processed {segmentCount} segments, removing {emptyTargetCount} empty targets, {corruptTargetcount} corrupt targets, {sourceTargetcount} source targets, and {existingTranslationCount} existing translations, resulting in {keptTranslations} translations kept") + + if __name__ == "__main__": args = argparse.ArgumentParser() commands = args.add_subparsers(title="commands", dest="command", required=True) @@ -21,11 +94,17 @@ command_md2html.add_argument("-t", "--docType", help="Type of document", action="store", choices=["userGuide", "developerGuide", "changes", "keyCommands"]) command_md2html.add_argument("mdPath", help="Path to the markdown file") command_md2html.add_argument("htmlPath", help="Path to the resulting html file") + command_stripXliff = commands.add_parser("stripXliff", help="Remove prefilled, empty or corrupt target tags from an xliff file before upload to Crowdin. Optionally also remove translations that already exist in an old xliff file") + command_stripXliff.add_argument('-o', '--oldXliffPath', help="Path to the old xliff file containing existing translations that should be stripped", action="store", default=None) + command_stripXliff.add_argument("xliffPath", help="Path to the xliff file") + command_stripXliff.add_argument("outputPath", help="Path to the resulting xliff file") args = args.parse_args() match args.command: case "xliff2md": markdownTranslate.generateMarkdown(xliffPath=args.xliffPath, outputPath=args.mdPath, translated=not args.untranslated) case "md2html": md2html.main(source=args.mdPath, dest=args.htmlPath, lang=args.lang, docType=args.docType) + case "stripXliff": + stripXliff(args.xliffPath, args.outputPath, args.oldXliffPath) case _: raise ValueError(f"Unknown command {args.command}") From 496b2d0b5f3cd885a4276709549e78295cae3411 Mon Sep 17 00:00:00 2001 From: Michael Curran Date: Mon, 23 Sep 2024 08:15:42 +1000 Subject: [PATCH 10/36] gitignore .md.sub files now generated by scons while generating html from markdown. --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index e1f9e47b924..2f61662a1ae 100644 --- a/.gitignore +++ b/.gitignore @@ -27,6 +27,7 @@ source/louis dlldata.c *.pdb .sconsign.dblite +user_docs/*/*.md.sub user_docs/*/*.html user_docs/*/*.css extras/controllerClient/x86/nvdaController.h From b75d77b338f16fd62e2814faa73d14ecbb5c5c2e Mon Sep 17 00:00:00 2001 From: Michael Curran Date: Wed, 25 Sep 2024 09:01:10 +1000 Subject: [PATCH 11/36] nvdaL10nutil: add xliff2html command which incorporates xliff2md and md2html. --- user_docs/nvdaL10nUtil.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/user_docs/nvdaL10nUtil.py b/user_docs/nvdaL10nUtil.py index 031139f1e5b..04b80ecf7df 100644 --- a/user_docs/nvdaL10nUtil.py +++ b/user_docs/nvdaL10nUtil.py @@ -4,6 +4,7 @@ # See the file COPYING for more details. +import tempfile import lxml.etree import os import argparse @@ -12,6 +13,7 @@ def stripXliff(xliffPath: str, outputPath: str, oldXliffPath: str | None= None): + print(f"Creating stripped xliff at {outputPath} from {xliffPath}") namespace = {"xliff": "urn:oasis:names:tc:xliff:document:2.0"} xliff = lxml.etree.parse(xliffPath) xliffRoot = xliff.getroot() @@ -94,6 +96,12 @@ def stripXliff(xliffPath: str, outputPath: str, oldXliffPath: str | None= None): command_md2html.add_argument("-t", "--docType", help="Type of document", action="store", choices=["userGuide", "developerGuide", "changes", "keyCommands"]) command_md2html.add_argument("mdPath", help="Path to the markdown file") command_md2html.add_argument("htmlPath", help="Path to the resulting html file") + command_xliff2html = commands.add_parser("xliff2html", help="Convert xliff to html") + command_xliff2html.add_argument("-l", "--lang", help="Language code", action="store", default="en") + command_xliff2html.add_argument("-t", "--docType", help="Type of document", action="store", choices=["userGuide", "developerGuide", "changes", "keyCommands"]) + command_xliff2html.add_argument("-u", "--untranslated", help="Produce the untranslated markdown file", action="store_true", default=False) + command_xliff2html.add_argument("xliffPath", help="Path to the xliff file") + command_xliff2html.add_argument("htmlPath", help="Path to the resulting html file") command_stripXliff = commands.add_parser("stripXliff", help="Remove prefilled, empty or corrupt target tags from an xliff file before upload to Crowdin. Optionally also remove translations that already exist in an old xliff file") command_stripXliff.add_argument('-o', '--oldXliffPath', help="Path to the old xliff file containing existing translations that should be stripped", action="store", default=None) command_stripXliff.add_argument("xliffPath", help="Path to the xliff file") @@ -104,6 +112,14 @@ def stripXliff(xliffPath: str, outputPath: str, oldXliffPath: str | None= None): markdownTranslate.generateMarkdown(xliffPath=args.xliffPath, outputPath=args.mdPath, translated=not args.untranslated) case "md2html": md2html.main(source=args.mdPath, dest=args.htmlPath, lang=args.lang, docType=args.docType) + case "xliff2html": + temp_mdFile = tempfile.NamedTemporaryFile(suffix=".md", delete=False, mode="w", encoding="utf-8") + temp_mdFile.close() + try: + markdownTranslate.generateMarkdown(xliffPath=args.xliffPath, outputPath=temp_mdFile.name, translated=not args.untranslated) + md2html.main(source=temp_mdFile.name, dest=args.htmlPath, lang=args.lang, docType=args.docType) + finally: + os.remove(temp_mdFile.name) case "stripXliff": stripXliff(args.xliffPath, args.outputPath, args.oldXliffPath) case _: From 1418cab5121ac9509d6ba51ce14aa18b15101ed7 Mon Sep 17 00:00:00 2001 From: Michael Curran Date: Wed, 25 Sep 2024 13:17:54 +1000 Subject: [PATCH 12/36] Update translation documentation to remove no longer relevant info and to provide clearer info about Crowdin. --- projectDocs/translating/crowdin.md | 93 ++++++++++++++++++++---------- projectDocs/translating/readme.md | 13 ++--- 2 files changed, 65 insertions(+), 41 deletions(-) diff --git a/projectDocs/translating/crowdin.md b/projectDocs/translating/crowdin.md index b59899a358f..6c48091843c 100644 --- a/projectDocs/translating/crowdin.md +++ b/projectDocs/translating/crowdin.md @@ -1,9 +1,9 @@ # Translating using Crowdin -Crowdin is used to translate the main NVDA interface. +Crowdin is used to translate the main NVDA interface and user documentation. NVDA's Crowdin project: . -This document covers setting up a Crowdin account, connecting it with PoEdit, and translating the main interface using Crowdin and PoEdit. +This document covers setting up a Crowdin account, connecting it with PoEdit, and translating the main interface and user documentation using Crowdin and PoEdit. ## Setup @@ -20,44 +20,26 @@ Alternatively, you can use the [Crowdin web interface](https://support.crowdin.c As PoEdit only supports viewing approved strings, large translators team need to co-ordinate submitting unapproved strings to prevent conflicts. Using Crowdin's interface avoids this problem. -PoEdit supports connecting with Crowdin directly. PoEdit's homepage is: 1. Download the latest Windows PoEdit version at 1. Install it by following the on-screen instructions, the default options should be sufficient. -1. When launching PoEdit: - 1. Choose "Translate cloud project" - 1. Connect your Crowdin account - 1. Select NVDA and the language you wish to translate ### Translation reviews - -Translated strings will need to be reviewed and approved by a proofreader before being included in NVDA. -A proofreader is required for each language. -Proofreader status is granted on a case-by-case basis by messaging the [translators mailing list](https://groups.io/g/nvda-translations) or - -Proofreaders approve strings using the [Crowdin web interface](https://support.crowdin.com/online-editor/). -PoEdit does not support viewing unapproved strings from other translators. -When manually uploading to Crowdin from PoEdit, proofreaders are able to auto-approve all submitted strings. +Due to accessibility issues, for now translation approvals have been disabled on Crowdin. +Any translation uploaded to Crowdin is automatically available in the project. +However, joining the project as a translator is by invitation only. ## Translation workflows -There are 3 common workflows for translating with Crowdin: - -1. Only on Crowdin's web interface, either with: - - only one proofreader approving their own translations, - - or with many translators making suggestions and a proofreader approving them. -1. Multiple translators translating on PoEdit. - - Using Crowdin cloud synchronization. - - Proofreaders approve the translations on Crowdin's web interface. -1. Translating on PoEdit without cloud synchronization and performing manual uploads to Crowdin. - - Translators with proofreader status can upload strings manually with automatic approval. - As such, this may be a preference for single or small-team translators using PoEdit. - - Manual uploads without cloud synchronization means conflicts can occur, translator teams must be co-ordinated if following this approach. +There are 2 common workflows for translating with Crowdin: +1. Translating strings directly via Crowdin's interface. Or +1. Downloading from Crowdin, translating with Poedit and uploading again. + ## Translating using PoEdit -After opening a .po file you will be placed on a list with all of the strings to translate. +After opening a .po or .xliff file you will be placed on a list with all of the strings to translate. You can read the status bar to see how many strings have already been translated, the number of untranslated messages, and how many are fuzzy. A fuzzy string is a message which has been automatically translated, thus it may be wrong. @@ -77,12 +59,15 @@ Each time you press this key, PoEdit saves the po file, and if you check compile NVDA provides additional shortcuts for PoEdit which are described in [the User Guide](https://www.nvaccess.org/files/nvda/documentation/userGuide.html#Poedit). -If you are unsure of meaning of the original interface message, consult automatic comments (also called translator comments), by pressing `control+shift+a`. +If you are unsure of the meaning of the original interface message, consult automatic comments (also called translator comments), by pressing `control+shift+a`. Some comments provide an example output message to help you understand what NVDA will say when speaking or brailling such messages. -## Translating the interface +## Translating NVDA's interface + +* Download nvda.po from the Files section of your language on Crowdin. +* Open the po file in Poedit, translate, and save the file. +* Upload the po file back to Crowdin. -Open "nvda.po" for the language you want to translate in PoEdit. Alternatively, you can use the [Crowdin interface directly](https://support.crowdin.com/online-editor/). ### Messages with formatting strings @@ -160,7 +145,7 @@ In Crowdin, this information appears at the end of the context section. ### Testing the interface translation -1. To test the current interface messages, save the current nvda.po file, and copy the nvda.mo file to the following location: `nvdadir/locale/langcode/LC_MESSAGES` +1. To test the current interface messages, save the current nvda.po file in Poedit, and copy the nvda.mo file to the following location: `nvdadir/locale/langcode/LC_MESSAGES` - `nvdadir`: the directory where NVDA has been installed - `langcode`: the ISO 639-1 language code for your language (e.g. en for English, es for Spanish, etc.) 1. Restart NVDA, then go to the NVDA menu, go to Preferences and choose General Settings, or press `NVDA+control+g` to open General Settings. @@ -168,3 +153,47 @@ In Crowdin, this information appears at the end of the context section. 1. The messages you have translated should now be heard or brailled in your native language provided that the synthesizer you are using supports your language or a braille code for your language exists. Whenever you add or update your interface translations, repeat the steps above (copying the updated .mo file and restarting NVDA) to test your updated translation messages. + +## Translating NvDA' s user documentation + +Documentation available for translation includes: +* The NVDA user guide (userGuide.xliff) +* The NVDA What's New document (changes.xliff) + +To translate any of these files: + +* Download the xliff file from the Files section of your language on Crowdin. + * Make sure to choose "Download" not "Export xliff". +* Make a copy of this file. +* Open the po file in Poedit, translate, and save the file. +* Use the nvdaL10nUtil program to strip the xliff so that it only contains translations that were added / changed. E.g. +``` +nvdaL10nUtil stripXliff -o +``` +* Upload the xliff file back to Crowdin. If it is a stripped xliff file, it is safe to check the `allow target to match source` checkbox. + +Alternatively, you can use the [Crowdin interface directly](https://support.crowdin.com/online-editor/). + +### Translating markdown +The English NVDA user documentation is written in markdown syntax. +The xliff file you are directly translating has been generated from that markdown file. +It contains the content of any line that requires translation, shown in the order it appears in the original markdown file. + +Structural lines that do not contain any translatable content (such as blank lines, hidden table header rows, table header body separator lines etc) are not included here. + +Structural syntax from the beginning and end of lines (such as heading prefix like `###`, heading anchors like `{#Introduction}`, and initial and final vertical bars on table rows) has been removed from the content to translate, but is available to view in the translator notes for that line. +Content may still however contain inline markdown syntax such as links, inline code fences (`\``), and table column separators (`|`). +This syntax must be kept in tact when translating. + +All strings for translation contain translator notes which include: +* Line: the original line number in the markdown file. +* prefix: any structural markdown on the line before this content. +* Suffix: any structural markdown on the line after this content. + +### Verifying your translation +When ever you have saved the xliff file with Poedit, you can use the nvdaL10nUtil program to generate the html version of the documentation file. E.g. +``` +nvdaL10nUtil xliff2html -t [userGuide|changes|keyCommands] -l +``` + + diff --git a/projectDocs/translating/readme.md b/projectDocs/translating/readme.md index 4c4f5cf77ae..da65c7498ed 100644 --- a/projectDocs/translating/readme.md +++ b/projectDocs/translating/readme.md @@ -23,19 +23,14 @@ For further information please see the [Release Process page](https://github.com You can view [Crowdin](https://crowdin.com/project/nvda) for an up to date report on the status of translating the NVDA interface. If you would like to improve or would like to work on a new language, please write to the [NVDA translations mailing list](https://groups.io/g/nvda-translations). -The translation status of user documentation (User Guide and Changes) can only be checked by translators. - ## New Localization Start by subscribing to the translation list above so that you can get help and advice. The current process for translation is split between multiple processes: -- Crowdin for the NVDA interface -- The legacy SVN translation system for the User Guide and Changes files. -This is planned to move to Crowdin. -- The legacy SVN translation system for Character Descriptions, Symbols and Gestures. -This is planned to move to GitHub. +- Crowdin for the NVDA interface and user documentation +- Github for Character Descriptions, Symbols and Gestures. Read [Files to be Localized](#files-to-be-localized) to learn the translation for process for these. @@ -57,6 +52,6 @@ Note that linked guides may be out of date, as the translation system is undergo - characterDescriptions.dic: names of characters in your language, see [Translating Character Descriptions](https://www.nvaccess.org/files/nvda/documentation/developerGuide.html#characterDescriptions) for more info. - symbols.dic: names of symbols and punctuation in your language, see [Translating Symbols](https://www.nvaccess.org/files/nvda/documentation/developerGuide.html#symbolPronunciation) for more information. - gestures.ini: remapping of gestures for your language, see [Translating Gestures](https://www.nvaccess.org/files/nvda/documentation/developerGuide.html#TranslatingGestures) for more information. -- userGuide.md: the User Guide, see [Translating the User Guide](https://github.com/nvaccess/nvda/wiki/TranslatingUserGuide) for more information. -- changes.md (optional): a list of changes between releases, see [Translating Changes](https://github.com/nvaccess/nvda/wiki/TranslatingChanges) for more information. +- userGuide.md: the User Guide, see [Translating using Crowdin](./crowdin.md) for more information. +- changes.md (optional): a list of changes between releases, see [Translating using Crowdin](./crowdin.md) for more information. - Add-ons (optional): a set of optional features that users can install, see [Translating Addons](https://github.com/nvaccess/nvda/wiki/TranslatingAddons) for more information. From 66f634acf8ea86c3605b9b8e9715ba4d1c2fd658 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 25 Sep 2024 03:44:57 +0000 Subject: [PATCH 13/36] Pre-commit auto-fix --- projectDocs/translating/crowdin.md | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/projectDocs/translating/crowdin.md b/projectDocs/translating/crowdin.md index 6c48091843c..7fbe919b7c5 100644 --- a/projectDocs/translating/crowdin.md +++ b/projectDocs/translating/crowdin.md @@ -36,7 +36,7 @@ There are 2 common workflows for translating with Crowdin: 1. Translating strings directly via Crowdin's interface. Or 1. Downloading from Crowdin, translating with Poedit and uploading again. - + ## Translating using PoEdit After opening a .po or .xliff file you will be placed on a list with all of the strings to translate. @@ -154,7 +154,7 @@ In Crowdin, this information appears at the end of the context section. Whenever you add or update your interface translations, repeat the steps above (copying the updated .mo file and restarting NVDA) to test your updated translation messages. -## Translating NvDA' s user documentation +## Translating NvDA' s user documentation Documentation available for translation includes: * The NVDA user guide (userGuide.xliff) @@ -170,10 +170,10 @@ To translate any of these files: ``` nvdaL10nUtil stripXliff -o ``` -* Upload the xliff file back to Crowdin. If it is a stripped xliff file, it is safe to check the `allow target to match source` checkbox. +* Upload the xliff file back to Crowdin. If it is a stripped xliff file, it is safe to check the `allow target to match source` checkbox. Alternatively, you can use the [Crowdin interface directly](https://support.crowdin.com/online-editor/). - + ### Translating markdown The English NVDA user documentation is written in markdown syntax. The xliff file you are directly translating has been generated from that markdown file. @@ -195,5 +195,3 @@ When ever you have saved the xliff file with Poedit, you can use the nvdaL10nUti ``` nvdaL10nUtil xliff2html -t [userGuide|changes|keyCommands] -l ``` - - From 40dd89e37f69f8a537ec3384b8a30d992bccb1ea Mon Sep 17 00:00:00 2001 From: Michael Curran Date: Fri, 27 Sep 2024 12:13:15 +1000 Subject: [PATCH 14/36] Apply suggestions from code review Co-authored-by: Cyrille Bougot --- projectDocs/translating/crowdin.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/projectDocs/translating/crowdin.md b/projectDocs/translating/crowdin.md index 7fbe919b7c5..9a6828d05b2 100644 --- a/projectDocs/translating/crowdin.md +++ b/projectDocs/translating/crowdin.md @@ -183,7 +183,7 @@ Structural lines that do not contain any translatable content (such as blank lin Structural syntax from the beginning and end of lines (such as heading prefix like `###`, heading anchors like `{#Introduction}`, and initial and final vertical bars on table rows) has been removed from the content to translate, but is available to view in the translator notes for that line. Content may still however contain inline markdown syntax such as links, inline code fences (`\``), and table column separators (`|`). -This syntax must be kept in tact when translating. +This syntax must be kept intact when translating. All strings for translation contain translator notes which include: * Line: the original line number in the markdown file. From 90f938579bba7eed90d29615c51f159b7723806f Mon Sep 17 00:00:00 2001 From: Michael Curran Date: Tue, 1 Oct 2024 08:41:32 +1000 Subject: [PATCH 15/36] nvdaL10nUtil xliff2html: autodetect language from the xliff file. --- user_docs/nvdaL10nUtil.py | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/user_docs/nvdaL10nUtil.py b/user_docs/nvdaL10nUtil.py index 04b80ecf7df..35ff90fe8c1 100644 --- a/user_docs/nvdaL10nUtil.py +++ b/user_docs/nvdaL10nUtil.py @@ -12,6 +12,26 @@ import md2html +def fetchLanguageFromXliff(xliffPath: str, source: bool = False) -> str: + """ + Fetch the language from an xliff file. + :param xliffPath: Path to the xliff file + :param source: If True, fetch the source language, otherwise fetch the target language + :return: The language code + """ + namespace = {"xliff": "urn:oasis:names:tc:xliff:document:2.0"} + xliff = lxml.etree.parse(xliffPath) + xliffRoot = xliff.getroot() + if xliffRoot.tag != "{urn:oasis:names:tc:xliff:document:2.0}xliff": + raise ValueError(f"Not an xliff file: {xliffPath}") + lang = xliffRoot.get("srcLang" if source else "trgLang") + if lang is None: + print(f"Could not detect language for xliff file {xliffPath}, {source=}") + else: + print(f"Detected language {lang} for xliff file {xliffPath}, {source=}") + return lang + + def stripXliff(xliffPath: str, outputPath: str, oldXliffPath: str | None= None): print(f"Creating stripped xliff at {outputPath} from {xliffPath}") namespace = {"xliff": "urn:oasis:names:tc:xliff:document:2.0"} @@ -97,7 +117,7 @@ def stripXliff(xliffPath: str, outputPath: str, oldXliffPath: str | None= None): command_md2html.add_argument("mdPath", help="Path to the markdown file") command_md2html.add_argument("htmlPath", help="Path to the resulting html file") command_xliff2html = commands.add_parser("xliff2html", help="Convert xliff to html") - command_xliff2html.add_argument("-l", "--lang", help="Language code", action="store", default="en") + command_xliff2html.add_argument("-l", "--lang", help="Language code", action="store", required=False) command_xliff2html.add_argument("-t", "--docType", help="Type of document", action="store", choices=["userGuide", "developerGuide", "changes", "keyCommands"]) command_xliff2html.add_argument("-u", "--untranslated", help="Produce the untranslated markdown file", action="store_true", default=False) command_xliff2html.add_argument("xliffPath", help="Path to the xliff file") @@ -113,11 +133,12 @@ def stripXliff(xliffPath: str, outputPath: str, oldXliffPath: str | None= None): case "md2html": md2html.main(source=args.mdPath, dest=args.htmlPath, lang=args.lang, docType=args.docType) case "xliff2html": + lang = args.lang or fetchLanguageFromXliff(args.xliffPath, source=args.untranslated) temp_mdFile = tempfile.NamedTemporaryFile(suffix=".md", delete=False, mode="w", encoding="utf-8") temp_mdFile.close() try: markdownTranslate.generateMarkdown(xliffPath=args.xliffPath, outputPath=temp_mdFile.name, translated=not args.untranslated) - md2html.main(source=temp_mdFile.name, dest=args.htmlPath, lang=args.lang, docType=args.docType) + md2html.main(source=temp_mdFile.name, dest=args.htmlPath, lang=lang, docType=args.docType) finally: os.remove(temp_mdFile.name) case "stripXliff": From 410e71738d91d20435bbb7bec01306d0237ae524 Mon Sep 17 00:00:00 2001 From: Sean Budd Date: Wed, 2 Oct 2024 10:36:36 +1000 Subject: [PATCH 16/36] Apply suggestions from code review --- projectDocs/translating/crowdin.md | 5 ++++- sconstruct | 2 +- user_docs/md2html.py | 2 +- user_docs/nvdaL10nUtil.py | 8 ++++---- 4 files changed, 10 insertions(+), 7 deletions(-) diff --git a/projectDocs/translating/crowdin.md b/projectDocs/translating/crowdin.md index 9a6828d05b2..a01b3d31420 100644 --- a/projectDocs/translating/crowdin.md +++ b/projectDocs/translating/crowdin.md @@ -157,6 +157,7 @@ Whenever you add or update your interface translations, repeat the steps above ( ## Translating NvDA' s user documentation Documentation available for translation includes: + * The NVDA user guide (userGuide.xliff) * The NVDA What's New document (changes.xliff) @@ -175,6 +176,7 @@ nvdaL10nUtil stripXliff -o