Skip to content

Commit

Permalink
Modify the TensorBoard text dashboard so that it can support non-scal…
Browse files Browse the repository at this point in the history
…ar string tensors.

If the string tensor passed to the text dashboard is not a scalar, then the data is automatically organized into a table.
Change: 152081823
  • Loading branch information
Dandelion Mané authored and tensorflower-gardener committed Apr 4, 2017
1 parent 438c13e commit b657f5a
Show file tree
Hide file tree
Showing 5 changed files with 411 additions and 40 deletions.
15 changes: 8 additions & 7 deletions tensorflow/python/summary/text_summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,17 @@ def text_summary(name, tensor, collections=None):
"""Summarizes textual data.
Text data summarized via this plugin will be visible in the Text Dashboard
in TensorBoard.
in TensorBoard. The standard TensorBoard Text Dashboard will render markdown
in the strings, and will automatically organize 1d and 2d tensors into tables.
If a tensor with more than 2 dimensions is provided, a 2d subarray will be
displayed along with a warning message. (Note that this behavior is not
intrinsic to the text summary api, but rather to the default TensorBoard text
plugin.)
Args:
name: A name for the generated node. Will also serve as a series name in
TensorBoard.
tensor: a scalar string-type Tensor to summarize.
tensor: a string-type Tensor to summarize.
collections: Optional list of ops.GraphKeys. The collections to add the
summary to. Defaults to [_ops.GraphKeys.SUMMARIES]
Expand All @@ -49,16 +54,12 @@ def text_summary(name, tensor, collections=None):
type `string` which contains `Summary` protobufs.
Raises:
ValueError: If tensor has the wrong shape or type.
ValueError: If tensor has the wrong type.
"""
if tensor.dtype != dtypes.string:
raise ValueError("Expected tensor %s to have dtype string, got %s" %
(tensor.name, tensor.dtype))

if tensor.shape.ndims != 0:
raise ValueError("Expected tensor %s to be scalar, has shape %s" %
(tensor.name, tensor.shape))

t_summary = tensor_summary(name, tensor, collections=collections)
text_assets = plugin_asset.get_plugin_asset(TextSummaryPluginAsset)
text_assets.register_tensor(t_summary.op.name)
Expand Down
16 changes: 9 additions & 7 deletions tensorflow/python/summary/text_summary_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,17 +40,19 @@ def testTextSummaryAPI(self):
num = array_ops.constant(1)
text_summary.text_summary("foo", num)

with self.assertRaises(ValueError):
arr = array_ops.constant(["one", "two", "three"])
text_summary.text_summary("foo", arr)
# The API accepts vectors.
arr = array_ops.constant(["one", "two", "three"])
summ = text_summary.text_summary("foo", arr)
self.assertEqual(summ.op.type, "TensorSummary")

# the API accepts scalars
summ = text_summary.text_summary("foo", array_ops.constant("one"))
self.assertEqual(summ.op.type, "TensorSummary")

text_summary.text_summary("bar", array_ops.constant("2"), collections=[])
summaries = framework_ops.get_collection(
framework_ops.GraphKeys.SUMMARIES)
self.assertEqual(len(summaries), 1)
def testTextSummaryCollections(self):
text_summary.text_summary("bar", array_ops.constant("2"), collections=[])
summaries = framework_ops.get_collection(framework_ops.GraphKeys.SUMMARIES)
self.assertEqual(len(summaries), 0)


if __name__ == "__main__":
Expand Down
5 changes: 1 addition & 4 deletions tensorflow/tensorboard/plugins/projector/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,7 @@ py_library(
name = "projector_plugin",
srcs = ["projector_plugin.py"],
srcs_version = "PY2AND3",
visibility = [
"//learning/vis/projector:__subpackages__",
"//tensorflow:internal",
],
visibility = ["//tensorflow:internal"],
deps = [
":protos_all_py",
"//tensorflow/python:errors",
Expand Down
147 changes: 146 additions & 1 deletion tensorflow/tensorboard/plugins/text/text_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,12 @@
from __future__ import print_function

import json
import textwrap

# pylint: disable=g-bad-import-order
# Necessary for an internal test with special behavior for numpy.
import numpy as np
# pylint: enable=g-bad-import-order

import bleach
# pylint: disable=g-bad-import-order
Expand All @@ -27,6 +33,7 @@
# pylint: enable=g-bad-import-order
from werkzeug import wrappers

from tensorflow.python.framework import tensor_util
from tensorflow.python.summary import text_summary
from tensorflow.tensorboard.backend import http_util
from tensorflow.tensorboard.plugins import base_plugin
Expand Down Expand Up @@ -68,6 +75,10 @@

ALLOWED_ATTRIBUTES = {'a': ['href', 'title'], 'img': ['src', 'title', 'alt']}

WARNING_TEMPLATE = textwrap.dedent("""\
**Warning:** This text summary contained data of dimensionality %d, but only \
2d tables are supported. Showing a 2d slice of the data instead.""")


def markdown_and_sanitize(markdown_string):
"""Takes a markdown string and converts it into sanitized html.
Expand All @@ -87,19 +98,153 @@ def markdown_and_sanitize(markdown_string):
# Convert to utf-8 because we get a bytearray in python3
if not isinstance(markdown_string, str):
markdown_string = markdown_string.decode('utf-8')

string_html = markdown.markdown(
markdown_string, extensions=['markdown.extensions.tables'])
string_sanitized = bleach.clean(
string_html, tags=ALLOWED_TAGS, attributes=ALLOWED_ATTRIBUTES)
return string_sanitized


def make_table_row(contents, tag='td'):
"""Given an iterable of string contents, make a table row.
Args:
contents: An iterable yielding strings.
tag: The tag to place contents in. Defaults to 'td', you might want 'th'.
Returns:
A string containing the content strings, organized into a table row.
Example: make_table_row(['one', 'two', 'three']) == '''
<tr>
<td>one</td>
<td>two</td>
<td>three</td>
</tr>'''
"""
columns = ('<%s>%s</%s>\n' % (tag, s, tag) for s in contents)
return '<tr>\n' + ''.join(columns) + '</tr>\n'


def make_table(contents, headers=None):
"""Given a numpy ndarray of strings, concatenate them into a html table.
Args:
contents: A np.ndarray of strings. May be 1d or 2d. In the 1d case, the
table is laid out vertically (i.e. row-major).
headers: A np.ndarray or list of string header names for the table.
Returns:
A string containing all of the content strings, organized into a table.
Raises:
ValueError: If contents is not a np.ndarray.
ValueError: If contents is not 1d or 2d.
ValueError: If contents is empty.
ValueError: If headers is present and not a list, tuple, or ndarray.
ValueError: If headers is not 1d.
ValueError: If number of elements in headers does not correspond to number
of columns in contents.
"""
if not isinstance(contents, np.ndarray):
raise ValueError('make_table contents must be a numpy ndarray')

if contents.ndim not in [1, 2]:
raise ValueError('make_table requires a 1d or 2d numpy array, was %dd' %
contents.ndim)

if headers:
if isinstance(headers, list) or isinstance(headers, tuple):
headers = np.array(headers)
if not isinstance(headers, np.ndarray):
raise ValueError('Could not convert headers %s into np.ndarray' % headers)
if headers.ndim != 1:
raise ValueError('Headers must be 1d, is %dd' % headers.ndim)
expected_n_columns = contents.shape[1] if contents.ndim == 2 else 1
if headers.shape[0] != expected_n_columns:
raise ValueError('Number of headers %d must match number of columns %d' %
(headers.shape[0], expected_n_columns))
header = '<thead>\n%s</thead>\n' % make_table_row(headers, tag='th')
else:
header = ''

n_rows = contents.shape[0]
if contents.ndim == 1:
# If it's a vector, we need to wrap each element in a new list, otherwise
# we would turn the string itself into a row (see test code)
rows = (make_table_row([contents[i]]) for i in range(n_rows))
else:
rows = (make_table_row(contents[i, :]) for i in range(n_rows))

return '<table>\n%s<tbody>\n%s</tbody>\n</table>' % (header, ''.join(rows))


def reduce_to_2d(arr):
"""Given a np.npdarray with nDims > 2, reduce it to 2d.
It does this by selecting the zeroth coordinate for every dimension greater
than two.
Args:
arr: a numpy ndarray of dimension at least 2.
Returns:
A two-dimensional subarray from the input array.
Raises:
ValueError: If the argument is not a numpy ndarray, or the dimensionality
is too low.
"""
if not isinstance(arr, np.ndarray):
raise ValueError('reduce_to_2d requires a numpy.ndarray')

ndims = len(arr.shape)
if ndims < 2:
raise ValueError('reduce_to_2d requires an array of dimensionality >=2')
# slice(None) is equivalent to `:`, so we take arr[0,0,...0,:,:]
slices = ([0] * (ndims - 2)) + [slice(None), slice(None)]
return arr[slices]


def text_array_to_html(text_arr):
"""Take a numpy.ndarray containing strings, and convert it into html.
If the ndarray contains a single scalar string, that string is converted to
html via our sanitized markdown parser. If it contains an array of strings,
the strings are individually converted to html and then composed into a table
using make_table. If the array contains dimensionality greater than 2,
all but two of the dimensions are removed, and a warning message is prefixed
to the table.
Args:
text_arr: A numpy.ndarray containing strings.
Returns:
The array converted to html.
"""
if not text_arr.shape:
# It is a scalar. No need to put it in a table, just apply markdown
return markdown_and_sanitize(text_arr.astype(np.dtype(str)).tostring())
warning = ''
if len(text_arr.shape) > 2:
warning = markdown_and_sanitize(WARNING_TEMPLATE % len(text_arr.shape))
text_arr = reduce_to_2d(text_arr)

html_arr = [markdown_and_sanitize(x) for x in text_arr.reshape(-1)]
html_arr = np.array(html_arr).reshape(text_arr.shape)

return warning + make_table(html_arr)


def process_string_tensor_event(event):
"""Convert a TensorEvent into a JSON-compatible response."""
string_arr = tensor_util.MakeNdarray(event.tensor_proto)
html = text_array_to_html(string_arr)
return {
'wall_time': event.wall_time,
'step': event.step,
'text': markdown_and_sanitize(event.tensor_proto.string_val[0]),
'text': html,
}


Expand Down
Loading

0 comments on commit b657f5a

Please sign in to comment.