From d103f61ccd70101476a642c5d204c461896fab3a Mon Sep 17 00:00:00 2001 From: Joel Nothman Date: Wed, 5 Apr 2017 17:13:42 +1000 Subject: [PATCH] Refactoring and initial tests for CSS to Excel --- pandas/formats/format.py | 373 +++++++++++++++----------- pandas/tests/formats/test_to_excel.py | 178 ++++++++++++ pandas/tests/io/test_excel.py | 4 + 3 files changed, 404 insertions(+), 151 deletions(-) create mode 100644 pandas/tests/formats/test_to_excel.py diff --git a/pandas/formats/format.py b/pandas/formats/format.py index 16158663fb94a..34e59b2977319 100644 --- a/pandas/formats/format.py +++ b/pandas/formats/format.py @@ -1758,15 +1758,213 @@ def __init__(self, row, col, val, style=None, mergestart=None, "vertical": "top"}} -class CSSParseWarning(Warning): +class CSSWarning(UserWarning): """This CSS syntax cannot currently be parsed""" pass +class CSSResolver(object): + """A callable for parsing and resolving CSS to atomic properties + + """ + + INITIAL_STYLE = { + } + + def __call__(self, declarations_str, inherited=None): + """ the given declarations to atomic properties + + Parameters + ---------- + declarations_str : str + A list of CSS declarations + inherited : dict, optional + Atomic properties indicating the inherited style context in which + declarations_str is to be resolved. ``inherited`` should already + be resolved, i.e. valid output of this method. + + Returns + ------- + props : dict + Atomic CSS 2.2 properties + + Examples + -------- + >>> resolve = CSSResolver() + >>> inherited = {'font-family': 'serif', 'font-weight': 'bold'} + >>> out = resolve(''' + ... border-color: BLUE RED; + ... font-size: 1em; + ... font-size: 2em; + ... font-weight: normal; + ... font-weight: inherit; + ... ''', inherited) + >>> sorted(out.items()) # doctest: +NORMALIZE_WHITESPACE + [('border-bottom-color', 'blue'), + ('border-left-color', 'red'), + ('border-right-color', 'red'), + ('border-top-color', 'blue'), + ('font-family', 'serif'), + ('font-size', '24pt'), + ('font-weight', 'bold')] + """ + + props = dict(self.atomize(self.parse(declarations_str))) + if inherited is None: + inherited = {} + + # 1. resolve inherited, initial + for prop, val in inherited.items(): + if prop not in props: + props[prop] = val + + for prop, val in list(props.items()): + if val == 'inherit': + val = inherited.get(prop, 'initial') + if val == 'initial': + val = self.INITIAL_STYLE.get(prop) + + if val is None: + # we do not define a complete initial stylesheet + del props[val] + else: + props[prop] = val + + # 2. resolve relative font size + if props.get('font-size'): + if 'font-size' in inherited: + em_pt = inherited['font-size'] + assert em_pt[-2:] == 'pt' + em_pt = float(em_pt[:-2]) + else: + em_pt = None + font_size = self.font_size_to_pt(props['font-size'], em_pt) + if font_size == int(font_size): + size_fmt = '%d' + else: + size_fmt = '%f' + props['font-size'] = (size_fmt + 'pt') % font_size + + # 3. TODO: resolve other font-relative units + # 4. TODO: resolve other relative styles (e.g. ?) + return props + + UNIT_CONVERSIONS = { + 'rem': ('pt', 12), + 'ex': ('em', .5), + # 'ch': + 'px': ('pt', .75), + 'pc': ('pt', 12), + 'in': ('pt', 72), + 'cm': ('in', 1 / 2.54), + 'mm': ('in', 1 / 25.4), + 'q': ('mm', .25), + } + + FONT_SIZE_CONVERSIONS = UNIT_CONVERSIONS.copy() + FONT_SIZE_CONVERSIONS.update({ + '%': ('em', 1), + 'xx-small': ('rem', .5), + 'x-small': ('rem', .625), + 'small': ('rem', .8), + 'medium': ('rem', 1), + 'large': ('rem', 1.125), + 'x-large': ('rem', 1.5), + 'xx-large': ('rem', 2), + 'smaller': ('em', 1 / 1.2), + 'larger': ('em', 1.2), + }) + + def font_size_to_pt(self, val, em_pt=None): + try: + val, unit = re.match('(.*?)([a-zA-Z%].*)', val).groups() + except AttributeError: + warnings.warn('Unhandled font size: %r' % val, CSSWarning) + return + if val == '': + # hack for 'large' etc. + val = 1 + else: + try: + val = float(val) + except ValueError: + warnings.warn('Unhandled font size: %r' % val + unit, + CSSWarning) + + while unit != 'pt': + if unit == 'em': + if em_pt is None: + unit = 'rem' + else: + val *= em_pt + unit = 'pt' + continue + + unit, mul = self.FONT_SIZE_CONVERSIONS[unit] + val *= mul + return val + + def atomize(self, declarations): + for prop, value in declarations: + attr = 'expand_' + prop.replace('-', '_') + try: + expand = getattr(self, attr) + except AttributeError: + yield prop, value + else: + for prop, value in expand(prop, value): + yield prop, value + + DIRECTION_SHORTHANDS = { + 1: [0, 0, 0, 0], + 2: [0, 1, 0, 1], + 3: [0, 1, 2, 1], + 4: [0, 1, 2, 3], + } + DIRECTIONS = ('top', 'right', 'bottom', 'left') + + def _direction_expander(prop_fmt): + def expand(self, prop, value): + tokens = value.split() + try: + mapping = self.DIRECTION_SHORTHANDS[len(tokens)] + except KeyError: + warnings.warn('Could not expand "%s: %s"' % (prop, value), + CSSWarning) + return + for key, idx in zip(self.DIRECTIONS, mapping): + yield prop_fmt % key, tokens[idx] + + return expand + + expand_border_color = _direction_expander('border-%s-color') + expand_border_style = _direction_expander('border-%s-style') + expand_border_width = _direction_expander('border-%s-width') + expand_margin = _direction_expander('margin-%s') + expand_padding = _direction_expander('padding-%s') + + def parse(self, declarations_str): + """Generates (prop, value) pairs from declarations + + In a future version may generate parsed tokens from tinycss/tinycss2 + """ + for decl in declarations_str.split(';'): + if not decl.strip(): + continue + prop, sep, val = decl.partition(':') + prop = prop.strip().lower() + # TODO: don't lowercase case sensitive parts of values (strings) + val = val.strip().lower() + if not sep: + warnings.warn('Ill-formatted attribute: expected a colon ' + 'in %r' % decl, CSSWarning) + yield prop, val + + class CSSToExcelConverter(object): - """Converts CSS declarations to ExcelWriter styles + """A callable for converting CSS declarations to ExcelWriter styles - Supports parts of CSS2, with minimal CSS3 support (e.g. text-shadow), + Supports parts of CSS 2.2, with minimal CSS 3.0 support (e.g. text-shadow), focusing on font styling, backgrounds, borders and alignment. Operates by first computing CSS styles in a fairly generic @@ -1790,8 +1988,7 @@ def __init__(self, inherited=None): self.inherited = inherited - INITIAL_STYLE = { - } + compute_css = CSSResolver() def __call__(self, declarations_str): """Convert CSS declarations to ExcelWriter style @@ -1809,7 +2006,7 @@ def __call__(self, declarations_str): def build_xlstyle(self, props): out = { 'alignment': self.build_alignment(props), - 'borders': self.build_borders(props), + 'border': self.build_border(props), 'fill': self.build_fill(props), 'font': self.build_font(props), } @@ -1839,14 +2036,14 @@ def remove_none(d): } def build_alignment(self, props): - # TODO: text-indent, margin-left -> alignment.indent + # TODO: text-indent, padding-left -> alignment.indent return {'horizontal': props.get('text-align'), 'vertical': self.VERTICAL_MAP.get(props.get('vertical-align')), - 'wrapText': (props['white-space'] not in (None, 'nowrap') - if 'white-space' in props else None), + 'wrap_text': (props['white-space'] not in (None, 'nowrap') + if 'white-space' in props else None), } - def build_borders(self, props): + def build_border(self, props): return {side: { # TODO: convert styles and widths to openxml, one of: # 'dashDot' @@ -1879,11 +2076,11 @@ def build_fill(self, props): 'patternType': 'solid', } - BOLD_MAP = {k: True for k in - ['bold', 'bolder', '600', '700', '800', '900']} - ITALIC_MAP = {'italic': True, 'oblique': True} - UNDERLINE_MAP = {'underline': True} - STRIKE_MAP = {'line-through': True} + BOLD_MAP = {'bold': True, 'bolder': True, '600': True, '700': True, + '800': True, '900': True, + 'normal': False, 'lighter': False, '100': False, '200': False, + '300': False, '400': False, '500': False} + ITALIC_MAP = {'normal': False, 'italic': True, 'oblique': True} def build_font(self, props): size = props.get('font-size') @@ -1907,14 +2104,20 @@ def build_font(self, props): family = 5 # decorative break + decoration = props.get('text-decoration') + if decoration is not None: + decoration = decoration.split() + return { 'name': font_names[0] if font_names else None, 'family': family, 'size': size, 'bold': self.BOLD_MAP.get(props.get('font-weight')), 'italic': self.ITALIC_MAP.get(props.get('font-style')), - 'underline': self.UNDERLINE_MAP.get(props.get('text-decoration')), - 'strike': self.STRIKE_MAP.get(props.get('text-decoration')), + 'underline': (None if decoration is None + else 'underline' in decoration), + 'strike': (None if decoration is None + else 'line-through' in decoration), 'color': self.color_to_excel(props.get('font-color')), # shadow if nonzero digit before shadow colour 'shadow': (bool(re.search('^[^#(]*[1-9]', @@ -1957,139 +2160,7 @@ def color_to_excel(self, val): try: return self.NAMED_COLORS[val] except KeyError: - warnings.warn('Unhandled colour format: %r' % val, CSSParseWarning) - - UNIT_CONVERSIONS = { - 'rem': ('pt', 12), - 'ex': ('em', .5), - # 'ch': - 'px': ('pt', .75), - 'pc': ('pt', 12), - 'in': ('pt', 72), - 'cm': ('in', 1 / 2.54), - 'mm': ('in', 1 / 25.4), - 'q': ('mm', .25), - } - - FONT_SIZE_CONVERSIONS = UNIT_CONVERSIONS.copy() - FONT_SIZE_CONVERSIONS.update({ - '%': ('em', 1), - 'xx-small': ('rem', .5), - 'x-small': ('rem', .625), - 'small': ('rem', .8), - 'medium': ('rem', 1), - 'large': ('rem', 1.125), - 'x-large': ('rem', 1.5), - 'xx-large': ('rem', 2), - 'smaller': ('em', 1 / 1.2), - 'larger': ('em', 1.2), - }) - - def font_size_to_pt(self, val, em_pt=None): - val, unit = re.split('(?=[a-zA-Z%])', val, 1).groups() - if val == '': - # hack for 'large' etc. - val = 1 - - while unit != 'pt': - if unit == 'em': - if em_pt is None: - unit = 'rem' - else: - val *= em_pt - unit = 'pt' - continue - - unit, mul = self.FONT_SIZE_CONVERSIONS[unit] - val *= mul - return val - - def compute_css(self, declarations_str, inherited=None): - props = dict(self.atomize(self.parse(declarations_str))) - if inherited is None: - inherited = {} - - # 1. resolve inherited, initial - for prop, val in list(props.items()): - if val == 'inherited': - val = inherited.get(prop, 'initial') - if val == 'initial': - val = self.INITIAL_STYLE.get(prop) - - if val is None: - # we do not define a complete initial stylesheet - del props[val] - else: - props[prop] = val - - # 2. resolve relative font size - if props.get('font-size'): - if 'font-size' in inherited: - em_pt = inherited['font-size'] - assert em_pt[-2:] == 'pt' - em_pt = float(em_pt[:-2]) - font_size = self.font_size_to_pt(props['font-size'], em_pt) - props['font-size'] = '%fpt' % font_size - - # 3. TODO: resolve other font-relative units - # 4. TODO: resolve other relative styles (e.g. ?) - return props - - def atomize(self, declarations): - for prop, value in declarations: - attr = 'expand_' + prop.replace('-', '_') - try: - expand = getattr(self, attr) - except AttributeError: - yield prop, value - else: - for prop, value in expand(prop, value): - yield prop, value - - DIRECTION_SHORTHANDS = { - 1: [0, 0, 0, 0], - 2: [0, 1, 0, 1], - 3: [0, 1, 2, 1], - 4: [0, 1, 2, 3], - } - DIRECTIONS = ('top', 'right', 'bottom', 'left') - - def _direction_expander(prop_fmt): - def expand(self, prop, value): - tokens = value.split() - try: - mapping = self.DIRECTION_SHORTHANDS[len(tokens)] - except KeyError: - warnings.warn('Could not expand "%s: %s"' % (prop, value), - CSSParseWarning) - return - for key, idx in zip(self.DIRECTIONS, mapping): - yield prop_fmt % key, tokens[idx] - - return expand - - expand_border_color = _direction_expander('border-%s-color') - expand_border_style = _direction_expander('border-%s-style') - expand_border_width = _direction_expander('border-%s-width') - expand_margin = _direction_expander('margin-%s') - expand_padding = _direction_expander('padding-%s') - - def parse(self, declarations_str): - """Generates (prop, value) pairs from declarations - - In a future version may generate parsed tokens from tinycss/tinycss2 - """ - for decl in sum((l.split(';') for l in declarations_str), []): - if not decl.strip(): - continue - prop, sep, val = decl.partition(':') - prop = prop.strip().lower() - # TODO: don't lowercase case sensitive parts of values (strings) - val = val.strip().lower() - if not sep: - raise ValueError('Ill-formatted attribute: expected a colon ' - 'in %r' % decl) - yield prop, val + warnings.warn('Unhandled colour format: %r' % val, CSSWarning) class ExcelFormatter(object): @@ -2374,7 +2445,7 @@ def _generate_body(self, coloffset): series = self.df.iloc[:, colidx] for i, val in enumerate(series): if styles is not None: - xlstyle = self.style_converter(styles[i, colidx]) + xlstyle = self.style_converter(';'.join(styles[i, colidx])) yield ExcelCell(self.rowcounter + i, colidx + coloffset, val, xlstyle) diff --git a/pandas/tests/formats/test_to_excel.py b/pandas/tests/formats/test_to_excel.py new file mode 100644 index 0000000000000..de24e62f2d2a1 --- /dev/null +++ b/pandas/tests/formats/test_to_excel.py @@ -0,0 +1,178 @@ +"""Tests formatting as writer-agnostic ExcelCells + +Most of the conversion to Excel is tested in pandas/tests/io/test_excel.py +""" + +import pytest + +from pandas.formats.format import CSSResolver, CSSWarning, CSSToExcelConverter + + +# Test parsing and normalising of CSS + + +def assert_resolves(css, props, inherited=None): + resolve = CSSResolver() + actual = resolve(css, inherited=inherited) + assert props == actual + + +def test_css_parse_whitespace(): + pass # TODO + + +def test_css_parse_case(): + pass # TODO + + +def test_css_parse_empty(): + pass # TODO + + +def test_css_parse_invalid(): + pass # TODO + + +@pytest.mark.xfail +def test_css_parse_comments(): + pass # TODO + + +@pytest.mark.xfail +def test_css_parse_strings(): + pass # TODO + + +@pytest.mark.parametrize( + 'shorthand,expansions', + [('margin', ['margin-top', 'margin-right', + 'margin-bottom', 'margin-left']), + ('padding', ['padding-top', 'padding-right', + 'padding-bottom', 'padding-left']), + ('border-width', ['border-top-width', 'border-right-width', + 'border-bottom-width', 'border-left-width']), + ('border-color', ['border-top-color', 'border-right-color', + 'border-bottom-color', 'border-left-color']), + ('border-style', ['border-top-style', 'border-right-style', + 'border-bottom-style', 'border-left-style']), + ]) +def test_css_direction_shorthands(shorthand, expansions): + top, right, bottom, left = expansions + + assert_resolves('%s: thin' % shorthand, + {top: 'thin', right: 'thin', + bottom: 'thin', left: 'thin'}) + + assert_resolves('%s: thin thick' % shorthand, + {top: 'thin', right: 'thick', + bottom: 'thin', left: 'thick'}) + + assert_resolves('%s: thin thick medium' % shorthand, + {top: 'thin', right: 'thick', + bottom: 'medium', left: 'thick'}) + + assert_resolves('%s: thin thick medium none' % shorthand, + {top: 'thin', right: 'thick', + bottom: 'medium', left: 'none'}) + + with pytest.warns(CSSWarning): + assert_resolves('%s: thin thick medium none medium' % shorthand, + {}) + + +@pytest.mark.xfail +@pytest.mark.parametrize('css,props', [ + ('font: italic bold 12pt helvetica,sans-serif', + {'font-family': 'helvetica,sans-serif', + 'font-style': 'italic', + 'font-weight': 'bold', + 'font-size': '12pt'}), + ('font: bold italic 12pt helvetica,sans-serif', + {'font-family': 'helvetica,sans-serif', + 'font-style': 'italic', + 'font-weight': 'bold', + 'font-size': '12pt'}), +]) +def test_css_font_shorthand(css, props): + assert_resolves(css, props) + + +@pytest.mark.xfail +def test_css_background_shorthand(): + pass # TODO + + +def test_css_override(): + pass # TODO + + +def test_css_override_inherited(): + pass # TODO + + +def test_css_default_inherited(): + pass # TODO + + +def test_css_none_absent(): + pass # TODO + + +def test_css_font_size(): + pass # TODO + + +def test_css_font_size_invalid(): + pass # TODO + + +# Test translation of CSS to ExcelCell.style values + + +@pytest.mark.parametrize('css,expected', [ + # FONT + # - name + # - family + # - size + # - bold + ('font-weight: 100', {'font': {'bold': False}}), + ('font-weight: 200', {'font': {'bold': False}}), + ('font-weight: 300', {'font': {'bold': False}}), + ('font-weight: 400', {'font': {'bold': False}}), + ('font-weight: normal', {'font': {'bold': False}}), + ('font-weight: lighter', {'font': {'bold': False}}), + ('font-weight: bold', {'font': {'bold': True}}), + ('font-weight: bolder', {'font': {'bold': True}}), + ('font-weight: 700', {'font': {'bold': True}}), + ('font-weight: 800', {'font': {'bold': True}}), + ('font-weight: 900', {'font': {'bold': True}}), + # - italic + # - underline + ('text-decoration: underline', + {'font': {'underline': True, 'strike': False}}), + ('text-decoration: overline', + {'font': {'underline': False, 'strike': False}}), + ('text-decoration: none', + {'font': {'underline': False, 'strike': False}}), + # - strike + ('text-decoration: line-through', + {'font': {'strike': True, 'underline': False}}), + ('text-decoration: underline line-through', + {'font': {'strike': True, 'underline': True}}), + ('text-decoration: underline; text-decoration: line-through', + {'font': {'strike': True, 'underline': False}}), + # - color + # - shadow + # FILL + # - color, fillType + # BORDER + # - style + # - color + # ALIGNMENT + # - horizontal + # - vertical + # - wrap_text +]) +def test_css_to_excel(css, expected): + convert = CSSToExcelConverter() + assert expected == convert(css) diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py index 256a37e922177..6735d69b47fac 100644 --- a/pandas/tests/io/test_excel.py +++ b/pandas/tests/io/test_excel.py @@ -27,6 +27,10 @@ import pandas.util.testing as tm +# FIXME: run all/some tests with plain Styler instead of DataFrame +# FIXME: run some tests with styled Styler + + def _skip_if_no_xlrd(): try: import xlrd