Skip to content
This repository has been archived by the owner on Mar 15, 2020. It is now read-only.

Commit

Permalink
Added xslt extension to properly (and easily) hadle direct speech tra…
Browse files Browse the repository at this point in the history
…nsformations
  • Loading branch information
rupor-github committed Feb 20, 2015
1 parent dfc7cf6 commit 9671f79
Show file tree
Hide file tree
Showing 5 changed files with 78 additions and 133 deletions.
9 changes: 4 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,10 @@ Here is brief list of changes:

* Renamed from fb2conv to fb2mobi (MyHomeLib integration)
* Ported to Python 3.4.2
* Lost UI
* Lost "Send To Kindle" functionality
* All messages got translated to English
* All profile descriptions got translated to English
* Added processing of some HTML entities which XML parser normally ignores (nbsp, acirc)
* Lost UI and "Send To Kindle" functionality
* All messages and profile descriptions got translated to English
* Fixed all problems I was aware of at the moment (see git log)
* Added ability to apply xslt transformation from external file to fb2 before further processing
* Added xslt extension "katz_tr" to speedup and simplify transformation of direct speech in dialogs

Enjoy!
43 changes: 40 additions & 3 deletions fb2mobi.config
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,11 @@
<debug>False</debug>
<logFile/>
<logLevel>INFO</logLevel>
<outputFormat>mobi</outputFormat>
<outputFormat>azw3</outputFormat>
<kindleCompressionLevel>1</kindleCompressionLevel>
<noDropcapsSymbols>'"-.…0123456789‒–—«»</noDropcapsSymbols>
<transliterate>False</transliterate>
<defaultProfile>default</defaultProfile>
<defaultProfile>spaces</defaultProfile>
<profiles>
<profile description="Default profile" name="default">
<hyphens>True</hyphens>
Expand All @@ -22,7 +22,7 @@
<annotationTitle>Annotation</annotationTitle>
<tocTitle>Content</tocTitle>
<notesMode>default</notesMode>
<notesBodies>inline</notesBodies>
<notesBodies>notes</notesBodies>
<generateTOCPage>True</generateTOCPage>
<generateAnnotationPage>True</generateAnnotationPage>
<generateOPFGuide>True</generateOPFGuide>
Expand All @@ -45,6 +45,43 @@
</vignette>
</vignettes>
</profile>
<profile description="SIX-PER-EM SPACE in dialogs" name="spaces">
<hyphens>True</hyphens>
<dropcaps>False</dropcaps>
<tocMaxLevel>1000</tocMaxLevel>
<tocBeforeBody>False</tocBeforeBody>
<flatTOC>False</flatTOC>
<css parse="False">profiles/default.css</css>
<xslt>spaces.xsl</xslt>
<chapterOnNewPage>True</chapterOnNewPage>
<authorFormat>#l #f #m</authorFormat>
<bookTitleFormat>(#number) #title</bookTitleFormat>
<annotationTitle>Annotation</annotationTitle>
<tocTitle>Content</tocTitle>
<notesMode>default</notesMode>
<notesBodies>notes</notesBodies>
<generateTOCPage>False</generateTOCPage>
<generateAnnotationPage>False</generateAnnotationPage>
<generateOPFGuide>True</generateOPFGuide>
<kindleRemovePersonalLabel>True</kindleRemovePersonalLabel>
<vignettes>
<vignette level="default">
<beforeTitle>profiles/vignettes/title_before.png</beforeTitle>
<afterTitle>profiles/vignettes/title_after.png</afterTitle>
<chapterEnd>profiles/vignettes/chapter_end.png</chapterEnd>
</vignette>
<vignette level="h0">
<beforeTitle>None</beforeTitle>
<afterTitle>None</afterTitle>
<chapterEnd>None</chapterEnd>
</vignette>
<vignette level="h1">
<beforeTitle>profiles/vignettes/title_before.png</beforeTitle>
<afterTitle>profiles/vignettes/title_after.png</afterTitle>
<chapterEnd>profiles/vignettes/chapter_end.png</chapterEnd>
</vignette>
</vignettes>
</profile>
<profile description="Using Liberation fonts" name="liberation">
<hyphens>False</hyphens>
<dropcaps>False</dropcaps>
Expand Down
29 changes: 25 additions & 4 deletions modules/fb2html.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# -*- coding: utf-8 -*-

import os
from lxml import etree, html
from lxml import etree, html, objectify
import re
import shutil
import io
Expand All @@ -14,6 +14,7 @@
import html

from hyphenator import Hyphenator
from copy import deepcopy

SOFT_HYPHEN = u'\u00AD' # Символ 'мягкого' переноса

Expand Down Expand Up @@ -177,7 +178,7 @@ def copy_file(src, dest):

def indent(elem, level=0):
'''Функция для улучшения вида xml/html.
Вставляет символы табуляции согласно уровня вложенности тэга
Вставляет символы табуляции согласно уровню вложенности тэга
'''

i = '\n' + level*'\t'
Expand Down Expand Up @@ -275,8 +276,29 @@ def __init__(self, fb2file, mobifile, tempdir, config):
self.tree = etree.parse(fb2file, parser=etree.XMLParser(recover=True))

if 'xslt' in config.current_profile:

# rupor - this allows for smaller xsl, quicker replacement and allows handling of tags in the paragraphs
class MyExtElement(etree.XSLTExtension):
def execute(self, context, self_node, input_node, output_parent):
child = deepcopy(input_node)
old_text = child.text
child.text = self_node.text
if len(old_text) > 1:
i = 1
for c in old_text[1:]:
if c.isspace(): i = i + 1
else: break;
child.text = child.text + old_text[i:]
for e in child.getiterator():
if not hasattr(e.tag, 'find'): continue
i = e.tag.find('}')
if i >= 0:
e.tag = e.tag[i+1:]
objectify.deannotate(child, cleanup_namespaces=True)
output_parent.append(child)

config.log.info(u'Applying XSLT transformations "{0}"'.format(config.current_profile['xslt']))
self.transform = etree.XSLT(etree.parse(config.current_profile['xslt']))
self.transform = etree.XSLT(etree.parse(config.current_profile['xslt']), extensions = { ('fb2mobi_ns', 'katz_tr') : MyExtElement() })
self.tree = self.transform(self.tree)
for entry in self.transform.error_log:
self.log.warning(entry)
Expand Down Expand Up @@ -756,7 +778,6 @@ def parse_format(self, elem, tag = None, css = None, href=None):
self.links_location[elem.attrib['id']] = self.current_file
if href:
self.buff.append(' href="%s"' % save_html(href))
if tag:
if css == 'section':
self.buff.append(' />')
else:
Expand Down
128 changes: 8 additions & 120 deletions spaces.xsl
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
<?xml version="1.0" encoding="utf-8"?>
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:fb="http://www.gribuser.ru/xml/fictionbook/2.0">
<xsl:stylesheet version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:fb="http://www.gribuser.ru/xml/fictionbook/2.0"
xmlns:rupor="fb2mobi_ns" extension-element-prefixes="rupor"
exclude-result-prefixes="fb">

<xsl:output method="xml" encoding="UTF-8" indent="no"/>

<xsl:template match="node()|@*">
Expand All @@ -8,125 +13,8 @@
</xsl:copy>
</xsl:template>

<xsl:template match="fb:p">
<xsl:choose>
<!-- Non-breaking space (c2a0) -->
<xsl:when test="starts-with(.,'‐ ')">
<xsl:element name="p" namespace="http://www.gribuser.ru/xml/fictionbook/2.0">
<xsl:text disable-output-escaping="yes">–&#8198;</xsl:text>
<xsl:value-of select="substring(.,3)"/>
</xsl:element>
</xsl:when>
<xsl:when test="starts-with(.,'‑ ')">
<xsl:element name="p" namespace="http://www.gribuser.ru/xml/fictionbook/2.0">
<xsl:text disable-output-escaping="yes">–&#8198;</xsl:text>
<xsl:value-of select="substring(.,3)"/>
</xsl:element>
</xsl:when>
<xsl:when test="starts-with(.,'− ')">
<xsl:element name="p" namespace="http://www.gribuser.ru/xml/fictionbook/2.0">
<xsl:text disable-output-escaping="yes">–&#8198;</xsl:text>
<xsl:value-of select="substring(.,3)"/>
</xsl:element>
</xsl:when>
<xsl:when test="starts-with(.,'– ')">
<xsl:element name="p" namespace="http://www.gribuser.ru/xml/fictionbook/2.0">
<xsl:text disable-output-escaping="yes">–&#8198;</xsl:text>
<xsl:value-of select="substring(.,3)"/>
</xsl:element>
</xsl:when>
<xsl:when test="starts-with(.,'— ')">
<xsl:element name="p" namespace="http://www.gribuser.ru/xml/fictionbook/2.0">
<xsl:text disable-output-escaping="yes">–&#8198;</xsl:text>
<xsl:value-of select="substring(.,3)"/>
</xsl:element>
</xsl:when>
<xsl:when test="starts-with(.,'― ')">
<xsl:element name="p" namespace="http://www.gribuser.ru/xml/fictionbook/2.0">
<xsl:text disable-output-escaping="yes">–&#8198;</xsl:text>
<xsl:value-of select="substring(.,3)"/>
</xsl:element>
</xsl:when>
<!-- Standart space (0020) -->
<xsl:when test="starts-with(.,'‐ ')">
<xsl:element name="p" namespace="http://www.gribuser.ru/xml/fictionbook/2.0">
<xsl:text disable-output-escaping="yes">–&#8198;</xsl:text>
<xsl:value-of select="substring(.,3)"/>
</xsl:element>
</xsl:when>
<xsl:when test="starts-with(.,'‑ ')">
<xsl:element name="p" namespace="http://www.gribuser.ru/xml/fictionbook/2.0">
<xsl:text disable-output-escaping="yes">–&#8198;</xsl:text>
<xsl:value-of select="substring(.,3)"/>
</xsl:element>
</xsl:when>
<xsl:when test="starts-with(.,'− ')">
<xsl:element name="p" namespace="http://www.gribuser.ru/xml/fictionbook/2.0">
<xsl:text disable-output-escaping="yes">–&#8198;</xsl:text>
<xsl:value-of select="substring(.,3)"/>
</xsl:element>
</xsl:when>
<xsl:when test="starts-with(.,'– ')">
<xsl:element name="p" namespace="http://www.gribuser.ru/xml/fictionbook/2.0">
<xsl:text disable-output-escaping="yes">–&#8198;</xsl:text>
<xsl:value-of select="substring(.,3)"/>
</xsl:element>
</xsl:when>
<xsl:when test="starts-with(.,'— ')">
<xsl:element name="p" namespace="http://www.gribuser.ru/xml/fictionbook/2.0">
<xsl:text disable-output-escaping="yes">–&#8198;</xsl:text>
<xsl:value-of select="substring(.,3)"/>
</xsl:element>
</xsl:when>
<xsl:when test="starts-with(.,'― ')">
<xsl:element name="p" namespace="http://www.gribuser.ru/xml/fictionbook/2.0">
<xsl:text disable-output-escaping="yes">–&#8198;</xsl:text>
<xsl:value-of select="substring(.,3)"/>
</xsl:element>
</xsl:when>
<!-- No space -->
<xsl:when test="starts-with(.,'‐')">
<xsl:element name="p" namespace="http://www.gribuser.ru/xml/fictionbook/2.0">
<xsl:text disable-output-escaping="yes">–&#8198;</xsl:text>
<xsl:value-of select="substring(.,2)"/>
</xsl:element>
</xsl:when>
<xsl:when test="starts-with(.,'‑')">
<xsl:element name="p" namespace="http://www.gribuser.ru/xml/fictionbook/2.0">
<xsl:text disable-output-escaping="yes">–&#8198;</xsl:text>
<xsl:value-of select="substring(.,2)"/>
</xsl:element>
</xsl:when>
<xsl:when test="starts-with(.,'−')">
<xsl:element name="p" namespace="http://www.gribuser.ru/xml/fictionbook/2.0">
<xsl:text disable-output-escaping="yes">–&#8198;</xsl:text>
<xsl:value-of select="substring(.,2)"/>
</xsl:element>
</xsl:when>
<xsl:when test="starts-with(.,'–')">
<xsl:element name="p" namespace="http://www.gribuser.ru/xml/fictionbook/2.0">
<xsl:text disable-output-escaping="yes">–&#8198;</xsl:text>
<xsl:value-of select="substring(.,2)"/>
</xsl:element>
</xsl:when>
<xsl:when test="starts-with(.,'—')">
<xsl:element name="p" namespace="http://www.gribuser.ru/xml/fictionbook/2.0">
<xsl:text disable-output-escaping="yes">–&#8198;</xsl:text>
<xsl:value-of select="substring(.,2)"/>
</xsl:element>
</xsl:when>
<xsl:when test="starts-with(.,'―')">
<xsl:element name="p" namespace="http://www.gribuser.ru/xml/fictionbook/2.0">
<xsl:text disable-output-escaping="yes">–&#8198;</xsl:text>
<xsl:value-of select="substring(.,2)"/>
</xsl:element>
</xsl:when>
<xsl:otherwise>
<xsl:copy>
<xsl:apply-templates/>
</xsl:copy>
</xsl:otherwise>
</xsl:choose>
<xsl:template match="fb:p[starts-with(.,'‐') or starts-with(.,'‑') or starts-with(.,'−') or starts-with(.,'–') or starts-with(.,'—') or starts-with(.,'―')]">
<rupor:katz_tr>–&#8198;</rupor:katz_tr>
</xsl:template>

</xsl:stylesheet>
2 changes: 1 addition & 1 deletion version.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
# -*- coding: utf-8 -*-

VERSION = u'2.1.2'
VERSION = u'2.2.0'

0 comments on commit 9671f79

Please sign in to comment.