diff --git a/.gitignore b/.gitignore index e1f9e47b924..2f61662a1ae 100644 --- a/.gitignore +++ b/.gitignore @@ -27,6 +27,7 @@ source/louis dlldata.c *.pdb .sconsign.dblite +user_docs/*/*.md.sub user_docs/*/*.html user_docs/*/*.css extras/controllerClient/x86/nvdaController.h diff --git a/projectDocs/dev/developerGuide/sconscript b/projectDocs/dev/developerGuide/sconscript index 590727221f8..3a9a9beeeb6 100644 --- a/projectDocs/dev/developerGuide/sconscript +++ b/projectDocs/dev/developerGuide/sconscript @@ -12,7 +12,20 @@ env = env.Clone() devDocsOutputDir = outputDir.Dir("devDocs") # Build the developer guide and move it to the output directory -htmlFile = env.md2html("developerGuide.md") +mdFile = env.File("developerGuide.md") +# first substitute some variables such as NvDA version and URL into the markdown file +mdFileSub = env.Substfile( + target=mdFile.abspath.replace(".md", ".md.sub"), + source=mdFile, + SUBST_DICT={ + "NVDA_VERSION": env["version"], + }, +) +htmlFile = env.Command( + target=mdFile.abspath.replace(".md", ".html"), + source=mdFileSub, + action=[f'@{sys.executable} user_docs/md2html.py convert "$SOURCE" "$TARGET"'], +) devGuide = env.Command( target=devDocsOutputDir.File("developerGuide.html"), source=htmlFile, action=Move("$TARGET", "$SOURCE") ) diff --git a/projectDocs/translating/crowdin.md b/projectDocs/translating/crowdin.md index b59899a358f..9a6828d05b2 100644 --- a/projectDocs/translating/crowdin.md +++ b/projectDocs/translating/crowdin.md @@ -1,9 +1,9 @@ # Translating using Crowdin -Crowdin is used to translate the main NVDA interface. +Crowdin is used to translate the main NVDA interface and user documentation. NVDA's Crowdin project: . -This document covers setting up a Crowdin account, connecting it with PoEdit, and translating the main interface using Crowdin and PoEdit. +This document covers setting up a Crowdin account, connecting it with PoEdit, and translating the main interface and user documentation using Crowdin and PoEdit. ## Setup @@ -20,44 +20,26 @@ Alternatively, you can use the [Crowdin web interface](https://support.crowdin.c As PoEdit only supports viewing approved strings, large translators team need to co-ordinate submitting unapproved strings to prevent conflicts. Using Crowdin's interface avoids this problem. -PoEdit supports connecting with Crowdin directly. PoEdit's homepage is: 1. Download the latest Windows PoEdit version at 1. Install it by following the on-screen instructions, the default options should be sufficient. -1. When launching PoEdit: - 1. Choose "Translate cloud project" - 1. Connect your Crowdin account - 1. Select NVDA and the language you wish to translate ### Translation reviews - -Translated strings will need to be reviewed and approved by a proofreader before being included in NVDA. -A proofreader is required for each language. -Proofreader status is granted on a case-by-case basis by messaging the [translators mailing list](https://groups.io/g/nvda-translations) or - -Proofreaders approve strings using the [Crowdin web interface](https://support.crowdin.com/online-editor/). -PoEdit does not support viewing unapproved strings from other translators. -When manually uploading to Crowdin from PoEdit, proofreaders are able to auto-approve all submitted strings. +Due to accessibility issues, for now translation approvals have been disabled on Crowdin. +Any translation uploaded to Crowdin is automatically available in the project. +However, joining the project as a translator is by invitation only. ## Translation workflows -There are 3 common workflows for translating with Crowdin: +There are 2 common workflows for translating with Crowdin: -1. Only on Crowdin's web interface, either with: - - only one proofreader approving their own translations, - - or with many translators making suggestions and a proofreader approving them. -1. Multiple translators translating on PoEdit. - - Using Crowdin cloud synchronization. - - Proofreaders approve the translations on Crowdin's web interface. -1. Translating on PoEdit without cloud synchronization and performing manual uploads to Crowdin. - - Translators with proofreader status can upload strings manually with automatic approval. - As such, this may be a preference for single or small-team translators using PoEdit. - - Manual uploads without cloud synchronization means conflicts can occur, translator teams must be co-ordinated if following this approach. +1. Translating strings directly via Crowdin's interface. Or +1. Downloading from Crowdin, translating with Poedit and uploading again. ## Translating using PoEdit -After opening a .po file you will be placed on a list with all of the strings to translate. +After opening a .po or .xliff file you will be placed on a list with all of the strings to translate. You can read the status bar to see how many strings have already been translated, the number of untranslated messages, and how many are fuzzy. A fuzzy string is a message which has been automatically translated, thus it may be wrong. @@ -77,12 +59,15 @@ Each time you press this key, PoEdit saves the po file, and if you check compile NVDA provides additional shortcuts for PoEdit which are described in [the User Guide](https://www.nvaccess.org/files/nvda/documentation/userGuide.html#Poedit). -If you are unsure of meaning of the original interface message, consult automatic comments (also called translator comments), by pressing `control+shift+a`. +If you are unsure of the meaning of the original interface message, consult automatic comments (also called translator comments), by pressing `control+shift+a`. Some comments provide an example output message to help you understand what NVDA will say when speaking or brailling such messages. -## Translating the interface +## Translating NVDA's interface + +* Download nvda.po from the Files section of your language on Crowdin. +* Open the po file in Poedit, translate, and save the file. +* Upload the po file back to Crowdin. -Open "nvda.po" for the language you want to translate in PoEdit. Alternatively, you can use the [Crowdin interface directly](https://support.crowdin.com/online-editor/). ### Messages with formatting strings @@ -160,7 +145,7 @@ In Crowdin, this information appears at the end of the context section. ### Testing the interface translation -1. To test the current interface messages, save the current nvda.po file, and copy the nvda.mo file to the following location: `nvdadir/locale/langcode/LC_MESSAGES` +1. To test the current interface messages, save the current nvda.po file in Poedit, and copy the nvda.mo file to the following location: `nvdadir/locale/langcode/LC_MESSAGES` - `nvdadir`: the directory where NVDA has been installed - `langcode`: the ISO 639-1 language code for your language (e.g. en for English, es for Spanish, etc.) 1. Restart NVDA, then go to the NVDA menu, go to Preferences and choose General Settings, or press `NVDA+control+g` to open General Settings. @@ -168,3 +153,45 @@ In Crowdin, this information appears at the end of the context section. 1. The messages you have translated should now be heard or brailled in your native language provided that the synthesizer you are using supports your language or a braille code for your language exists. Whenever you add or update your interface translations, repeat the steps above (copying the updated .mo file and restarting NVDA) to test your updated translation messages. + +## Translating NvDA' s user documentation + +Documentation available for translation includes: +* The NVDA user guide (userGuide.xliff) +* The NVDA What's New document (changes.xliff) + +To translate any of these files: + +* Download the xliff file from the Files section of your language on Crowdin. + * Make sure to choose "Download" not "Export xliff". +* Make a copy of this file. +* Open the po file in Poedit, translate, and save the file. +* Use the nvdaL10nUtil program to strip the xliff so that it only contains translations that were added / changed. E.g. +``` +nvdaL10nUtil stripXliff -o +``` +* Upload the xliff file back to Crowdin. If it is a stripped xliff file, it is safe to check the `allow target to match source` checkbox. + +Alternatively, you can use the [Crowdin interface directly](https://support.crowdin.com/online-editor/). + +### Translating markdown +The English NVDA user documentation is written in markdown syntax. +The xliff file you are directly translating has been generated from that markdown file. +It contains the content of any line that requires translation, shown in the order it appears in the original markdown file. + +Structural lines that do not contain any translatable content (such as blank lines, hidden table header rows, table header body separator lines etc) are not included here. + +Structural syntax from the beginning and end of lines (such as heading prefix like `###`, heading anchors like `{#Introduction}`, and initial and final vertical bars on table rows) has been removed from the content to translate, but is available to view in the translator notes for that line. +Content may still however contain inline markdown syntax such as links, inline code fences (`\``), and table column separators (`|`). +This syntax must be kept intact when translating. + +All strings for translation contain translator notes which include: +* Line: the original line number in the markdown file. +* prefix: any structural markdown on the line before this content. +* Suffix: any structural markdown on the line after this content. + +### Verifying your translation +When ever you have saved the xliff file with Poedit, you can use the nvdaL10nUtil program to generate the html version of the documentation file. E.g. +``` +nvdaL10nUtil xliff2html -t [userGuide|changes|keyCommands] -l +``` diff --git a/projectDocs/translating/readme.md b/projectDocs/translating/readme.md index 4c4f5cf77ae..da65c7498ed 100644 --- a/projectDocs/translating/readme.md +++ b/projectDocs/translating/readme.md @@ -23,19 +23,14 @@ For further information please see the [Release Process page](https://github.com You can view [Crowdin](https://crowdin.com/project/nvda) for an up to date report on the status of translating the NVDA interface. If you would like to improve or would like to work on a new language, please write to the [NVDA translations mailing list](https://groups.io/g/nvda-translations). -The translation status of user documentation (User Guide and Changes) can only be checked by translators. - ## New Localization Start by subscribing to the translation list above so that you can get help and advice. The current process for translation is split between multiple processes: -- Crowdin for the NVDA interface -- The legacy SVN translation system for the User Guide and Changes files. -This is planned to move to Crowdin. -- The legacy SVN translation system for Character Descriptions, Symbols and Gestures. -This is planned to move to GitHub. +- Crowdin for the NVDA interface and user documentation +- Github for Character Descriptions, Symbols and Gestures. Read [Files to be Localized](#files-to-be-localized) to learn the translation for process for these. @@ -57,6 +52,6 @@ Note that linked guides may be out of date, as the translation system is undergo - characterDescriptions.dic: names of characters in your language, see [Translating Character Descriptions](https://www.nvaccess.org/files/nvda/documentation/developerGuide.html#characterDescriptions) for more info. - symbols.dic: names of symbols and punctuation in your language, see [Translating Symbols](https://www.nvaccess.org/files/nvda/documentation/developerGuide.html#symbolPronunciation) for more information. - gestures.ini: remapping of gestures for your language, see [Translating Gestures](https://www.nvaccess.org/files/nvda/documentation/developerGuide.html#TranslatingGestures) for more information. -- userGuide.md: the User Guide, see [Translating the User Guide](https://github.com/nvaccess/nvda/wiki/TranslatingUserGuide) for more information. -- changes.md (optional): a list of changes between releases, see [Translating Changes](https://github.com/nvaccess/nvda/wiki/TranslatingChanges) for more information. +- userGuide.md: the User Guide, see [Translating using Crowdin](./crowdin.md) for more information. +- changes.md (optional): a list of changes between releases, see [Translating using Crowdin](./crowdin.md) for more information. - Add-ons (optional): a set of optional features that users can install, see [Translating Addons](https://github.com/nvaccess/nvda/wiki/TranslatingAddons) for more information. diff --git a/requirements.txt b/requirements.txt index ab96d3cf03f..db493975438 100644 --- a/requirements.txt +++ b/requirements.txt @@ -21,6 +21,8 @@ pycaw==20240210 # Packaging NVDA py2exe==0.13.0.2 +# xliff2html is packaged with nuitka +nuitka==2.4.8 # Creating XML unit test reports unittest-xml-reporting==3.2.0 diff --git a/sconstruct b/sconstruct index 8bbf1708f6c..8636052ab5d 100755 --- a/sconstruct +++ b/sconstruct @@ -149,7 +149,6 @@ env = Environment( tools=[ "textfile", "gettextTool", - "md2html", "doxygen", "recursiveInstall", "m4", @@ -346,7 +345,23 @@ for xliffFile in env.Glob(os.path.join(userDocsDir.path, "*", "*.xliff")): ) # Allow all markdown files to be converted to html in user_docs for mdFile in env.Glob(os.path.join(userDocsDir.path, "*", "*.md")): - htmlFile = env.md2html(mdFile) + # first substitute some variables such as NvDA version and URL into the markdown file + mdFileSub = env.Substfile( + target=mdFile.abspath.replace(".md", ".md.sub"), + source=mdFile, + SUBST_DICT={ + "NVDA_VERSION": env["version"], + "NVDA_URL": versionInfo.url, + "NVDA_COPYRIGHT_YEARS": versionInfo.copyrightYears, + }, + ) + lang = os.path.split(os.path.dirname(mdFile.path))[-1] + docType = os.path.basename(mdFile.path).split(".")[0] + htmlFile = env.Command( + target=mdFile.abspath.replace(".md", ".html"), + source=mdFileSub, + action=[f'@{sys.executable} user_docs/md2html.py -l {lang} -t {docType} "$SOURCE" "$TARGET"'], + ) styleInstallPath = os.path.dirname(mdFile.abspath) installedStyle = env.Install(styleInstallPath, styles) installedHeadingsStyle = env.Install(styleInstallPath, numberedHeadingsStyle) @@ -362,11 +377,14 @@ for mdFile in env.Glob(os.path.join(userDocsDir.path, "*", "*.md")): env.Depends(htmlFile, mdFile) # Create key commands files -for userGuideFile in env.Glob(os.path.join(userDocsDir.path, "*", "userGuide.md")): - keyCommandsHtmlFile = env.md2html( - userGuideFile.abspath.replace("userGuide.md", "keyCommands.html"), userGuideFile +for userGuideFileSub in env.Glob(os.path.join(userDocsDir.path, "*", "userGuide.md.sub")): + lang = os.path.split(os.path.dirname(userGuideFileSub.path))[-1] + keyCommandsHtmlFile = env.Command( + target=userGuideFileSub.abspath.replace("userGuide.md.sub", "keyCommands.html"), + source=userGuideFileSub, + action=[f'@{sys.executable} user_docs/md2html.py -l {lang} -t keyCommands "$SOURCE" "$TARGET"'], ) - env.Depends(keyCommandsHtmlFile, userGuideFile) + env.Depends(keyCommandsHtmlFile, userGuideFileSub) # Build unicode CLDR dictionaries env.SConscript("cldrDict_sconscript", exports=["env", "sourceDir"]) @@ -713,3 +731,10 @@ source = env.Dir(os.path.join(os.getcwd(), "dist")) # Putting the target in the output dir automatically causes AppVeyor to package it as an artefact target = env.File(os.path.join(outputDir.abspath, "library_modules.txt")) env.Alias("moduleList", env.GenerateModuleList(target, source)) + +nvdaL10nUtil = env.Command( + target=outputDir.File("nvdaL10nUtil.exe"), + source="user_docs/nvdaL10nUtil.py", + ENV=os.environ, + action=f"nuitka --standalone --onefile --output-dir=./output --include-module=mdx_truly_sane_lists --include-module=markdown_link_attr_modifier --include-module=mdx_gh_links user_docs/nvdaL10nUtil.py", +) diff --git a/user_docs/markdownTranslate.py b/user_docs/markdownTranslate.py index c70353a0e60..c0a1a03bfc9 100644 --- a/user_docs/markdownTranslate.py +++ b/user_docs/markdownTranslate.py @@ -13,7 +13,6 @@ import re from itertools import zip_longest from xml.sax.saxutils import escape as xmlEscape -from xml.sax.saxutils import unescape as xmlUnescape import difflib from dataclasses import dataclass import subprocess @@ -158,7 +157,7 @@ def extractSkeleton(xliffPath: str, outputPath: str): if skeletonNode is None: raise ValueError("No skeleton found in xliff file") skeletonContent = skeletonNode.text.strip() - outputFile.write(xmlUnescape(skeletonContent)) + outputFile.write(skeletonContent) print(f"Extracted skeleton to {prettyPathString(outputPath)}") @@ -347,7 +346,7 @@ def translateXliff( skeletonNode = xliffRoot.find("./xliff:file/xliff:skeleton", namespaces=namespace) if skeletonNode is None: raise ValueError("No skeleton found in xliff file") - skeletonContent = xmlUnescape(skeletonNode.text).strip() + skeletonContent = skeletonNode.text.strip() for lineNo, (skelLine, pretranslatedLine) in enumerate( zip_longest(skeletonContent.splitlines(), pretranslatedMdFile.readlines()), start=1, @@ -369,19 +368,27 @@ def translateXliff( f'Line {lineNo} of translation: does not end with "{suffix}", {pretranslatedLine=}, {skelLine=}' ) translation = pretranslatedLine[len(prefix) : len(pretranslatedLine) - len(suffix)] - unit = xliffRoot.find(f'./xliff:file/xliff:unit[@id="{ID}"]', namespaces=namespace) - if unit is not None: - segment = unit.find("./xliff:segment", namespaces=namespace) - if segment is not None: - target = lxml.etree.Element("target") - target.text = xmlEscape(translation) - target.tail = "\n" - segment.append(target) - res.numTranslatedStrings += 1 + try: + unit = xliffRoot.find(f'./xliff:file/xliff:unit[@id="{ID}"]', namespaces=namespace) + if unit is not None: + segment = unit.find("./xliff:segment", namespaces=namespace) + if segment is not None: + target = lxml.etree.Element("target") + target.text = translation + target.tail = "\n" + segment.append(target) + res.numTranslatedStrings += 1 + else: + raise ValueError(f"No segment found for unit {ID}") else: - raise ValueError(f"No segment found for unit {ID}") - else: - raise ValueError(f"Cannot locate Unit {ID} in xliff file") + raise ValueError(f"Cannot locate Unit {ID} in xliff file") + except Exception as e: + e.add_note(f"Line {lineNo}: {pretranslatedLine=}, {skelLine=}") + raise + elif skelLine != pretranslatedLine: + raise ValueError( + f"Line {lineNo}: pretranslated line {pretranslatedLine!r}, does not match skeleton line {skelLine!r}" + ) xliff.write(outputPath, encoding="utf8", xml_declaration=True) print(f"Translated xliff file with {res.numTranslatedStrings} translated strings") return res @@ -392,6 +399,7 @@ class Result_generateMarkdown: numTotalLines = 0 numTranslatableStrings = 0 numTranslatedStrings = 0 + numBadTranslationStrings = 0 def generateMarkdown(xliffPath: str, outputPath: str, translated: bool = True) -> Result_generateMarkdown: @@ -407,37 +415,52 @@ def generateMarkdown(xliffPath: str, outputPath: str, translated: bool = True) - skeletonNode = xliffRoot.find("./xliff:file/xliff:skeleton", namespaces=namespace) if skeletonNode is None: raise ValueError("No skeleton found in xliff file") - skeletonContent = xmlUnescape(skeletonNode.text).strip() - for line in skeletonContent.splitlines(keepends=True): + skeletonContent = skeletonNode.text.strip() + for lineNum, line in enumerate(skeletonContent.splitlines(keepends=True), 1): res.numTotalLines += 1 if m := re_translationID.match(line): prefix, ID, suffix = m.groups() res.numTranslatableStrings += 1 unit = xliffRoot.find(f'./xliff:file/xliff:unit[@id="{ID}"]', namespaces=namespace) - if unit is not None: - segment = unit.find("./xliff:segment", namespaces=namespace) - if segment is not None: - source = segment.find("./xliff:source", namespaces=namespace) - if translated: - target = segment.find("./xliff:target", namespaces=namespace) - else: - target = None - if target is not None and target.text: - res.numTranslatedStrings += 1 - translation = xmlUnescape(target.text) - elif source is not None and source.text: - translation = xmlUnescape(source.text) - else: - raise ValueError(f"No source or target found for unit {ID}") - else: - raise ValueError(f"No segment found for unit {ID}") - else: + if unit is None: raise ValueError(f"Cannot locate Unit {ID} in xliff file") + segment = unit.find("./xliff:segment", namespaces=namespace) + if segment is None: + raise ValueError(f"No segment found for unit {ID}") + source = segment.find("./xliff:source", namespaces=namespace) + if source is None: + raise ValueError(f"No source found for unit {ID}") + translation = "" + if translated: + target = segment.find("./xliff:target", namespaces=namespace) + if target is not None: + targetText = target.text + if targetText: + translation = targetText + # Crowdin treats empty targets () as a literal translation. + # Filter out such strings and count them as bad translations. + if translation in ( + "", + "<target/>", + "", + "<target></target>", + ): + res.numBadTranslationStrings += 1 + print(f"Warning: line {lineNum} contained a corrupt empty translation. Using source") + translation = "" + else: + res.numTranslatedStrings += 1 + # If we have no translation, use the source text + if not translation: + sourceText = source.text + if sourceText is None: + raise ValueError(f"No source text found for unit {ID}") + translation = sourceText outputFile.write(f"{prefix}{translation}{suffix}\n") else: outputFile.write(line) print( - f"Generated markdown file with {res.numTotalLines} total lines, {res.numTranslatableStrings} translatable strings, and {res.numTranslatedStrings} translated strings" + f"Generated markdown file with {res.numTotalLines} total lines, {res.numTranslatableStrings} translatable strings, and {res.numTranslatedStrings} translated strings. Ignoring {res.numBadTranslationStrings} bad translated strings" ) return res diff --git a/site_scons/site_tools/md2html.py b/user_docs/md2html.py similarity index 69% rename from site_scons/site_tools/md2html.py rename to user_docs/md2html.py index 929df794782..c3d6ccaf559 100644 --- a/site_scons/site_tools/md2html.py +++ b/user_docs/md2html.py @@ -3,16 +3,14 @@ # This file may be used under the terms of the GNU General Public License, version 2 or later. # For more details see: https://www.gnu.org/licenses/gpl-2.0.html +import sys +import argparse from copy import deepcopy -from importlib.util import find_spec import io import pathlib import re import shutil -import SCons.Node.FS -import SCons.Environment - DEFAULT_EXTENSIONS = frozenset( { # Supports tables, HTML mixed with markdown, code blocks, custom attributes and more @@ -25,7 +23,7 @@ "markdown_link_attr_modifier", # Adds links to GitHub authors, issues and PRs "mdx_gh_links", - }, + } ) EXTENSIONS_CONFIG = { @@ -55,16 +53,6 @@ """.strip() -def _replaceNVDATags(md: str, env: SCons.Environment.Environment) -> str: - import versionInfo - - # Replace tags in source file - md = md.replace("NVDA_VERSION", env["version"]) - md = md.replace("NVDA_URL", versionInfo.url) - md = md.replace("NVDA_COPYRIGHT_YEARS", versionInfo.copyrightYears) - return md - - def _getTitle(mdBuffer: io.StringIO, isKeyCommands: bool = False) -> str: if isKeyCommands: TITLE_RE = re.compile(r"^$") @@ -122,7 +110,7 @@ def _generateSanitizedHTML(md: str, isKeyCommands: bool = False) -> str: extensions = set(DEFAULT_EXTENSIONS) if isKeyCommands: - from user_docs.keyCommandsDoc import KeyCommandsExtension + from keyCommandsDoc import KeyCommandsExtension extensions.add(KeyCommandsExtension()) @@ -145,36 +133,29 @@ def _generateSanitizedHTML(md: str, isKeyCommands: bool = False) -> str: return htmlOutput -def md2html_actionFunc( - target: list[SCons.Node.FS.File], - source: list[SCons.Node.FS.File], - env: SCons.Environment.Environment, -): - isKeyCommands = target[0].path.endswith("keyCommands.html") - isUserGuide = target[0].path.endswith("userGuide.html") - isDevGuide = target[0].path.endswith("developerGuide.html") - isChanges = target[0].path.endswith("changes.html") - - with open(source[0].path, "r", encoding="utf-8") as mdFile: +def main(source: str, dest: str, lang="en", docType=None): + print(f"Converting {docType or 'document'} at {source} to {dest}, {lang=}") + isUserGuide = docType == "userGuide" + isDevGuide = docType == "developerGuide" + isChanges = docType == "changes" + isKeyCommands = docType == "keyCommands" + if docType and not any([isUserGuide, isDevGuide, isChanges, isKeyCommands]): + raise ValueError(f"Unknown docType {docType}") + with open(source, "r", encoding="utf-8") as mdFile: mdStr = mdFile.read() - mdStr = _replaceNVDATags(mdStr, env) - with io.StringIO() as mdBuffer: mdBuffer.write(mdStr) title = _getTitle(mdBuffer, isKeyCommands) - lang = pathlib.Path(source[0].path).parent.name - if isDevGuide and lang == "developerGuide": - # Parent folder in this case is the developerGuide folder in project docs - lang = "en" + lang = pathlib.Path(source).parent.name if isUserGuide or isDevGuide: - extraStylesheet = '' + extraStylesheet = "" elif isChanges or isKeyCommands: extraStylesheet = "" else: - raise ValueError(f"Unknown target type for {target[0].path}") + raise ValueError(f"Unknown target type for {dest}") htmlBuffer = io.StringIO() htmlBuffer.write( @@ -183,7 +164,7 @@ def md2html_actionFunc( dir="rtl" if lang in RTL_LANG_CODES else "ltr", title=title, extraStylesheet=extraStylesheet, - ), + ) ) htmlOutput = _generateSanitizedHTML(mdStr, isKeyCommands) @@ -195,7 +176,7 @@ def md2html_actionFunc( htmlBuffer.seek(0, io.SEEK_END) htmlBuffer.write("\n\n\n") - with open(target[0].path, "w", encoding="utf-8") as targetFile: + with open(dest, "w", encoding="utf-8") as targetFile: # Make next read at start of buffer htmlBuffer.seek(0) shutil.copyfileobj(htmlBuffer, targetFile) @@ -203,23 +184,11 @@ def md2html_actionFunc( htmlBuffer.close() -def exists(env: SCons.Environment.Environment) -> bool: - for ext in [ - "markdown", - "markdown_link_attr_modifier", - "mdx_truly_sane_lists", - "mdx_gh_links", - "nh3", - "user_docs.keyCommandsDoc", - ]: - if find_spec(ext) is None: - return False - return True - - -def generate(env: SCons.Environment.Environment): - env["BUILDERS"]["md2html"] = env.Builder( - action=env.Action(md2html_actionFunc, lambda t, s, e: f"Converting {s[0].path} to {t[0].path}"), - suffix=".html", - src_suffix=".md", - ) +if __name__ == "__main__": + args = argparse.ArgumentParser() + args.add_argument("-l", "--lang", help="Language code", action="store", default="en") + args.add_argument("-t", "--docType", help="Type of document", action="store", choices=["userGuide", "developerGuide", "changes", "keyCommands"]) + args.add_argument("source", help="Path to the markdown file") + args.add_argument("dest", help="Path to the resulting html file") + args = args.parse_args() + main(source=args.source, dest=args.dest, lang=args.lang, docType=args.docType) diff --git a/user_docs/nvdaL10nUtil.py b/user_docs/nvdaL10nUtil.py new file mode 100644 index 00000000000..35ff90fe8c1 --- /dev/null +++ b/user_docs/nvdaL10nUtil.py @@ -0,0 +1,147 @@ +# A part of NonVisual Desktop Access (NVDA) +# Copyright (C) 2024 NV Access Limited. +# This file is covered by the GNU General Public License. +# See the file COPYING for more details. + + +import tempfile +import lxml.etree +import os +import argparse +import markdownTranslate +import md2html + + +def fetchLanguageFromXliff(xliffPath: str, source: bool = False) -> str: + """ + Fetch the language from an xliff file. + :param xliffPath: Path to the xliff file + :param source: If True, fetch the source language, otherwise fetch the target language + :return: The language code + """ + namespace = {"xliff": "urn:oasis:names:tc:xliff:document:2.0"} + xliff = lxml.etree.parse(xliffPath) + xliffRoot = xliff.getroot() + if xliffRoot.tag != "{urn:oasis:names:tc:xliff:document:2.0}xliff": + raise ValueError(f"Not an xliff file: {xliffPath}") + lang = xliffRoot.get("srcLang" if source else "trgLang") + if lang is None: + print(f"Could not detect language for xliff file {xliffPath}, {source=}") + else: + print(f"Detected language {lang} for xliff file {xliffPath}, {source=}") + return lang + + +def stripXliff(xliffPath: str, outputPath: str, oldXliffPath: str | None= None): + print(f"Creating stripped xliff at {outputPath} from {xliffPath}") + namespace = {"xliff": "urn:oasis:names:tc:xliff:document:2.0"} + xliff = lxml.etree.parse(xliffPath) + xliffRoot = xliff.getroot() + if xliffRoot.tag != "{urn:oasis:names:tc:xliff:document:2.0}xliff": + raise ValueError(f"Not an xliff file: {xliffPath}") + skeletonNode = xliffRoot.find(f'./xliff:file/xliff:skeleton', namespaces=namespace) + if skeletonNode is not None: + skeletonNode.getparent().remove(skeletonNode) + if oldXliffPath: + oldXliff = lxml.etree.parse(oldXliffPath) + oldXliffRoot = oldXliff.getroot() + if oldXliffRoot.tag != "{urn:oasis:names:tc:xliff:document:2.0}xliff": + raise ValueError(f"Not an xliff file: {oldXliffPath}") + else: + oldXliffRoot = None + file = xliffRoot.find(f'./xliff:file', namespaces=namespace) + units = file.findall(f'./xliff:unit', namespaces=namespace) + segmentCount = 0 + emptyTargetCount = 0 + corruptTargetcount = 0 + sourceTargetcount = 0 + existingTranslationCount = 0 + for unit in units: + notes = unit.find("./xliff:notes", namespaces=namespace) + if notes is not None: + unit.remove(notes) + segment = unit.find("./xliff:segment", namespaces=namespace) + if segment is None: + print("Warning: No segment element in unit") + continue + state = segment.get("state") + source = segment.find("./xliff:source", namespaces=namespace) + if source is None: + print("Warning: No source element in segment") + continue + sourceText = source.text + segmentCount += 1 + target = segment.find("./xliff:target", namespaces=namespace) + if target is None: + continue + targetText = target.text + # remove empty / self-closing target tags + if not targetText: + emptyTargetCount += 1 + file.remove(unit) + # remove corrupt target tags + elif targetText in ( + "", + "<target/>", + "", + "<target></target>" + ): + corruptTargetcount += 1 + file.remove(unit) + # remove target tags pre-filled with source text + elif (not state or state == "initial") and targetText == sourceText: + sourceTargetcount += 1 + file.remove(unit) + # remove translations that already exist in the old xliff file + elif oldXliffRoot is not None: + unitId = unit.get("id") + oldTarget = oldXliffRoot.find(f'./xliff:file/xliff:unit[@id="{unitId}"]/xliff:segment/xliff:target', namespaces=namespace) + if oldTarget is not None and oldTarget.text == targetText: + existingTranslationCount += 1 + file.remove(unit) + xliff.write(outputPath) + keptTranslations = segmentCount - emptyTargetCount - corruptTargetcount - sourceTargetcount - existingTranslationCount + print(f"Processed {segmentCount} segments, removing {emptyTargetCount} empty targets, {corruptTargetcount} corrupt targets, {sourceTargetcount} source targets, and {existingTranslationCount} existing translations, resulting in {keptTranslations} translations kept") + + +if __name__ == "__main__": + args = argparse.ArgumentParser() + commands = args.add_subparsers(title="commands", dest="command", required=True) + command_xliff2md = commands.add_parser("xliff2md", help="Convert xliff to markdown") + command_xliff2md.add_argument("-u", "--untranslated", help="Produce the untranslated markdown file", action="store_true", default=False) + command_xliff2md.add_argument("xliffPath", help="Path to the xliff file") + command_xliff2md.add_argument("mdPath", help="Path to the resulting markdown file") + command_md2html = commands.add_parser("md2html", help="Convert markdown to html") + command_md2html.add_argument("-l", "--lang", help="Language code", action="store", default="en") + command_md2html.add_argument("-t", "--docType", help="Type of document", action="store", choices=["userGuide", "developerGuide", "changes", "keyCommands"]) + command_md2html.add_argument("mdPath", help="Path to the markdown file") + command_md2html.add_argument("htmlPath", help="Path to the resulting html file") + command_xliff2html = commands.add_parser("xliff2html", help="Convert xliff to html") + command_xliff2html.add_argument("-l", "--lang", help="Language code", action="store", required=False) + command_xliff2html.add_argument("-t", "--docType", help="Type of document", action="store", choices=["userGuide", "developerGuide", "changes", "keyCommands"]) + command_xliff2html.add_argument("-u", "--untranslated", help="Produce the untranslated markdown file", action="store_true", default=False) + command_xliff2html.add_argument("xliffPath", help="Path to the xliff file") + command_xliff2html.add_argument("htmlPath", help="Path to the resulting html file") + command_stripXliff = commands.add_parser("stripXliff", help="Remove prefilled, empty or corrupt target tags from an xliff file before upload to Crowdin. Optionally also remove translations that already exist in an old xliff file") + command_stripXliff.add_argument('-o', '--oldXliffPath', help="Path to the old xliff file containing existing translations that should be stripped", action="store", default=None) + command_stripXliff.add_argument("xliffPath", help="Path to the xliff file") + command_stripXliff.add_argument("outputPath", help="Path to the resulting xliff file") + args = args.parse_args() + match args.command: + case "xliff2md": + markdownTranslate.generateMarkdown(xliffPath=args.xliffPath, outputPath=args.mdPath, translated=not args.untranslated) + case "md2html": + md2html.main(source=args.mdPath, dest=args.htmlPath, lang=args.lang, docType=args.docType) + case "xliff2html": + lang = args.lang or fetchLanguageFromXliff(args.xliffPath, source=args.untranslated) + temp_mdFile = tempfile.NamedTemporaryFile(suffix=".md", delete=False, mode="w", encoding="utf-8") + temp_mdFile.close() + try: + markdownTranslate.generateMarkdown(xliffPath=args.xliffPath, outputPath=temp_mdFile.name, translated=not args.untranslated) + md2html.main(source=temp_mdFile.name, dest=args.htmlPath, lang=lang, docType=args.docType) + finally: + os.remove(temp_mdFile.name) + case "stripXliff": + stripXliff(args.xliffPath, args.outputPath, args.oldXliffPath) + case _: + raise ValueError(f"Unknown command {args.command}")