diff --git a/gensim/visualization/gensim_wrap.py b/gensim/visualization/gensim_wrap.py new file mode 100644 index 0000000000..fa2e7a6c8b --- /dev/null +++ b/gensim/visualization/gensim_wrap.py @@ -0,0 +1,135 @@ +""" +pyLDAvis Gensim +=============== +Helper functions to visualize LDA models trained by Gensim +""" + +from __future__ import absolute_import +import funcy as fp +import numpy as np +import pandas as pd +from scipy.sparse import issparse +from past.builtins import xrange +from . import prepare as vis_prepare + + +def _extract_data(topic_model, corpus, dictionary, texts=None, doc_topic_dists=None): + import gensim + + if not gensim.matutils.ismatrix(corpus): + corpus_csc = gensim.matutils.corpus2csc(corpus, num_terms=len(dictionary)) + else: + corpus_csc = corpus + # Need corpus to be a streaming gensim list corpus for len and inference functions below: + corpus = gensim.matutils.Sparse2Corpus(corpus_csc) + + doc_word_dists = corpus_csc.todense() + doc_word_dists = doc_word_dists / doc_word_dists.sum(axis=1) + + vocab = list(dictionary.token2id.keys()) + # TODO: add the hyperparam to smooth it out? no beta in online LDA impl.. hmm.. + # for now, I'll just make sure we don't ever get zeros... + beta = 0.01 + fnames_argsort = np.asarray(list(dictionary.token2id.values()), dtype=np.int_) + term_freqs = corpus_csc.sum(axis=1).A.ravel()[fnames_argsort] + term_freqs[term_freqs == 0] = beta + doc_lengths = corpus_csc.sum(axis=0).A.ravel() + + assert term_freqs.shape[0] == len(dictionary), 'Term frequencies and dictionary have different shape {} != {}'.format(term_freqs.shape[0], len(dictionary)) + assert doc_lengths.shape[0] == len(corpus), 'Document lengths and corpus have different sizes {} != {}'.format(doc_lengths.shape[0], len(corpus)) + + if hasattr(topic_model, 'lda_alpha'): + num_topics = len(topic_model.lda_alpha) + else: + num_topics = topic_model.num_topics + + if doc_topic_dists is None: + # If its an HDP model. + if hasattr(topic_model, 'lda_beta'): + gamma = topic_model.inference(corpus) + else: + gamma, _ = topic_model.inference(corpus) + doc_topic_dists = gamma / gamma.sum(axis=1)[:, None] + else: + if isinstance(doc_topic_dists, list): + doc_topic_dists = gensim.matutils.corpus2dense(doc_topic_dists, num_topics).T + elif issparse(doc_topic_dists): + doc_topic_dists = doc_topic_dists.T.todense() + doc_topic_dists = doc_topic_dists / doc_topic_dists.sum(axis=1) + + assert doc_topic_dists.shape[1] == num_topics, 'Document topics and number of topics do not match {} != {}'.format(doc_topic_dists.shape[1], num_topics) + + term_topic_dists = [] + for word_id in list(dictionary.token2id.values()): + values = [] + for topic_id in range(0, num_topics): + values.append((topic_id, topic_model.expElogbeta[topic_id][word_id])) + term_topic_dists.append(values) + term_topic_dists = gensim.matutils.corpus2dense(term_topic_dists, num_topics).T + term_topic_dists = term_topic_dists / term_topic_dists.sum(axis=1) + + # get the topic-term distribution straight from gensim without + # iterating over tuples + if hasattr(topic_model, 'lda_beta'): + topic = topic_model.lda_beta + else: + topic = topic_model.state.get_lambda() + topic = topic / topic.sum(axis=1)[:, None] + topic_term_dists = topic[:, fnames_argsort] + + assert topic_term_dists.shape[0] == doc_topic_dists.shape[1] + + # convert tokenised texts of documents to list of strings + texts = [' '.join(doc) for doc in texts] + + return {'doc_topic_dists': doc_topic_dists, 'doc_word_dists': doc_word_dists, 'topic_word_dists': topic_term_dists, + 'word_topic_dists': term_topic_dists, 'doc_tag': range(0, doc_topic_dists.shape[0]), 'doc_texts': texts, + 'doc_lengths': doc_lengths, 'vocab': vocab, 'term_frequency': term_freqs} + +def prepare(topic_model, corpus, dictionary, texts=None, doc_topic_dist=None, **kwargs): + """Transforms the Gensim TopicModel and related corpus and dictionary into + the data structures needed for the visualization. + + Parameters + ---------- + topic_model : gensim.models.ldamodel.LdaModel + An already trained Gensim LdaModel. The other gensim model types are + not supported (PRs welcome). + + corpus : array-like list of bag of word docs in tuple form or scipy CSC matrix + The corpus in bag of word form, the same docs used to train the model. + The corpus is transformed into a csc matrix internally, if you intend to + call prepare multiple times it is a good idea to first call + `gensim.matutils.corpus2csc(corpus)` and pass in the csc matrix instead. + + For example: [(50, 3), (63, 5), ....] + + dictionary: gensim.corpora.Dictionary + The dictionary object used to create the corpus. Needed to extract the + actual terms (not ids). + + doc_topic_dist (optional): Document topic distribution from LDA (default=None) + The document topic distribution that is eventually visualised, if you will + be calling `prepare` multiple times it's a good idea to explicitly pass in + `doc_topic_dist` as inferring this for large corpora can be quite + expensive. + + **kwargs : + additional keyword arguments are passed through to :func:`pyldavis.prepare`. + + Returns + ------- + prepared_data : PreparedData + the data structures used in the visualization + + Example + -------- + For example usage please see this notebook: + http://nbviewer.ipython.org/github/bmabey/pyLDAvis/blob/master/notebooks/Gensim%20Newsgroup.ipynb + + See + ------ + See `pyLDAvis.prepare` for **kwargs. + """ + opts = fp.merge(_extract_data(topic_model, corpus, dictionary, texts, doc_topic_dist), kwargs) + return vis_prepare(**opts) diff --git a/gensim/visualization/prepare.py b/gensim/visualization/prepare.py new file mode 100644 index 0000000000..14f5f88f77 --- /dev/null +++ b/gensim/visualization/prepare.py @@ -0,0 +1,198 @@ +# code modified from https://github.com/bmabey/pyLDAvis + +from __future__ import absolute_import +from past.builtins import basestring +from collections import namedtuple +import json +import logging +from joblib import Parallel, delayed, cpu_count +import numpy as np +import pandas as pd +from scipy.stats import entropy +from scipy.spatial.distance import pdist, squareform +from .utils import NumPyEncoder +try: + from sklearn.manifold import MDS, TSNE + sklearn_present = True +except ImportError: + sklearn_present = False + + + +def _jensen_shannon(_P, _Q): + _M = 0.5 * (_P + _Q) + return 0.5 * (entropy(_P, _M) + entropy(_Q, _M)) + + +def _pcoa(pair_dists, n_components=2): + """Principal Coordinate Analysis, + aka Classical Multidimensional Scaling + """ + # code referenced from skbio.stats.ordination.pcoa + # https://github.com/biocore/scikit-bio/blob/0.5.0/skbio/stats/ordination/_principal_coordinate_analysis.py + + # pairwise distance matrix is assumed symmetric + pair_dists = np.asarray(pair_dists, np.float64) + + # perform SVD on double centred distance matrix + n = pair_dists.shape[0] + H = np.eye(n) - np.ones((n, n)) / n + B = - H.dot(pair_dists ** 2).dot(H) / 2 + eigvals, eigvecs = np.linalg.eig(B) + + # Take first n_components of eigenvalues and eigenvectors + # sorted in decreasing order + ix = eigvals.argsort()[::-1][:n_components] + eigvals = eigvals[ix] + eigvecs = eigvecs[:, ix] + + # replace any remaining negative eigenvalues and associated eigenvectors with zeroes + # at least 1 eigenvalue must be zero + eigvals[np.isclose(eigvals, 0)] = 0 + if np.any(eigvals < 0): + ix_neg = eigvals < 0 + eigvals[ix_neg] = np.zeros(eigvals[ix_neg].shape) + eigvecs[:, ix_neg] = np.zeros(eigvecs[:, ix_neg].shape) + + return np.sqrt(eigvals) * eigvecs + + +def js_PCoA(distributions): + """Dimension reduction via Jensen-Shannon Divergence & Principal Coordinate Analysis + (aka Classical Multidimensional Scaling) + Parameters + ---------- + distributions : array-like, shape (`n_dists`, `k`) + Matrix of distributions probabilities. + Returns + ------- + pcoa : array, shape (`n_dists`, 2) + """ + dist_matrix = squareform(pdist(distributions, metric=_jensen_shannon)) + return _pcoa(dist_matrix) + +def js_MMDS(distributions, **kwargs): + """Dimension reduction via Jensen-Shannon Divergence & Metric Multidimensional Scaling + Parameters + ---------- + distributions : array-like, shape (`n_dists`, `k`) + Matrix of distributions probabilities. + **kwargs : Keyword argument to be passed to `sklearn.manifold.MDS()` + Returns + ------- + mmds : array, shape (`n_dists`, 2) + """ + dist_matrix = squareform(pdist(distributions, metric=_jensen_shannon)) + model = MDS(n_components=2, random_state=0, dissimilarity='precomputed', **kwargs) + return model.fit_transform(dist_matrix) + +def js_TSNE(distributions, **kwargs): + """Dimension reduction via Jensen-Shannon Divergence & t-distributed Stochastic Neighbor Embedding + Parameters + ---------- + distributions : array-like, shape (`n_dists`, `k`) + Matrix of distributions probabilities. + **kwargs : Keyword argument to be passed to `sklearn.manifold.TSNE()` + Returns + ------- + tsne : array, shape (`n_dists`, 2) + """ + dist_matrix = squareform(pdist(distributions, metric=_jensen_shannon)) + model = TSNE(n_components=2, random_state=0, metric='precomputed', **kwargs) + return model.fit_transform(dist_matrix) + + +def _doc_coordinates(mds, doc_topic_dists, doc_tag, doc_texts): + K = doc_topic_dists.shape[0] + mds_res = mds(doc_topic_dists) + assert mds_res.shape == (K, 2) + mds_df = pd.DataFrame({'x': mds_res[:,0], 'y': mds_res[:,1], 'docs': doc_tag, 'doc_texts':doc_texts}) + return mds_df + +def _topic_coordinates(mds, topic_word_dists, topic_proportion): + K = topic_word_dists.shape[0] + mds_res = mds(topic_word_dists) + assert mds_res.shape == (K, 2) + mds_df = pd.DataFrame({'x': mds_res[:,0], 'y': mds_res[:,1], 'topics': range(1, K + 1), 'Freq': topic_proportion * 100}) + return mds_df + +def _word_coordinates(mds, word_topic_dists, vocab, word_proportion): + K = word_topic_dists.shape[0] + mds_res = mds(word_topic_dists) + assert mds_res.shape == (K, 2) + mds_df = pd.DataFrame({'x': mds_res[:,0], 'y': mds_res[:,1], 'vocab': vocab, 'Freq': word_proportion * 100}) + return mds_df + + +def _info(dists, fst, scnd): + dists = dists / dists.sum() + pd_data = pd.DataFrame(dists) + pd_data = pd_data.stack().reset_index().rename(columns={'level_0':fst,'level_1':scnd, 0:'Freq'}) + + return pd_data + + +def prepare(doc_topic_dists, doc_word_dists, topic_word_dists, word_topic_dists, + vocab, doc_tag, doc_texts, doc_lengths, mds=js_PCoA): + """Transforms the topic model distributions and related corpus data into + the data structures needed for the visualization. + """ + # parse mds + if isinstance(mds, basestring): + mds = mds.lower() + if mds == 'pcoa': + mds = js_PCoA + elif mds in ('mmds', 'tsne'): + if sklearn_present: + mds_opts = {'mmds': js_MMDS, 'tsne': js_TSNE} + mds = mds_opts[mds] + else: + logging.warning('sklearn not present, switch to PCoA') + mds = js_PCoA + else: + logging.warning('Unknown mds `%s`, switch to PCoA' % mds) + mds = js_PCoA + + vocab = pd.Series(vocab, name='vocab') + doc_tag = pd.Series(doc_tag, name='docs') + doc_texts = pd.Series(doc_texts, name='doc_texts') + + topic_freq = np.dot(doc_topic_dists.T, doc_lengths) + topic_proportion = topic_freq / topic_freq.sum() + + word_freq = np.dot(topic_word_dists.T, topic_freq) + word_proportion = word_freq / word_freq.sum() + + word_doc_dists = doc_word_dists.T + topic_doc_dists = doc_topic_dists.T + + doc_topic_info = _info(doc_topic_dists, 'Doc', 'Topic') + doc_word_info = _info(doc_word_dists, 'Doc', 'Word') + topic_doc_info = _info(topic_doc_dists, 'Topic', 'Doc') + topic_word_info = _info(topic_word_dists, 'Topic', 'Word') + word_doc_info = _info(word_doc_dists, 'Word', 'Doc') + word_topic_info = _info(word_topic_dists, 'Word', 'Topic') + + doc_coordinates = _doc_coordinates(mds, doc_topic_dists, doc_tag) + topic_coordinates = _topic_coordinates(mds, topic_word_dists, topic_proportion) + word_coordinates = _word_coordinates(mds, word_topic_dists, vocab, word_proportion) + + return PreparedData(doc_coordinates, topic_coordinates, word_coordinates, doc_topic_info, doc_word_info, topic_doc_info, topic_word_info, word_doc_info, word_topic_info) + + +class PreparedData(namedtuple('PreparedData', ['doc_coordinates', 'topic_coordinates', 'word_coordinates', 'doc_topic_info', 'doc_word_info', + 'topic_doc_info', 'topic_word_info', 'word_doc_info', 'word_topic_info'])): + def to_dict(self): + return {'doc_mds': self.doc_coordinates.to_dict(orient='list'), + 'topic_mds': self.topic_coordinates.to_dict(orient='list'), + 'word_mds': self.word_coordinates.to_dict(orient='list'), + 'doc_topic.info': self.doc_topic_info.to_dict(orient='list'), + 'doc_word.info': self.doc_word_info.to_dict(orient='list'), + 'topic_doc.info': self.topic_doc_info.to_dict(orient='list'), + 'topic_word.info': self.topic_word_info.to_dict(orient='list'), + 'word_doc.info': self.word_doc_info.to_dict(orient='list'), + 'word_topic.info': self.word_topic_info.to_dict(orient='list') + } + + def to_json(self): + return json.dumps(self.to_dict(), cls=NumPyEncoder) \ No newline at end of file diff --git a/gensim/visualization/vis.js b/gensim/visualization/vis.js new file mode 100644 index 0000000000..4bf5567963 --- /dev/null +++ b/gensim/visualization/vis.js @@ -0,0 +1,1263 @@ +// code modified from https://github.com/bmabey/pyLDAvis + +var TopicModelVis = function(to_select, data_or_file_name) { + + // This section sets up the logic for event handling + var current_clicked = { + what: "nothing", + element: undefined + }, + current_hover = { + what: "nothing", + element: undefined + }, + old_winning_state = { + what: "nothing", + element: undefined + }, + vis_state = { + doc: 0, + topic: 0, + word: 0 + }; + + // Set up a few 'global' variables to hold the data: + var D, // number of docs + T, // number of topics + W, // number of words + docMdsData, // (x,y) locations and topic proportions + topicMdsData, + wordMdsData, + doc_topic_info, // topic proportions for all docs in the viz + doc_word_info, + topic_doc_info, + topic_word_info, + word_doc_info, + word_topic_info, + color1 = "#1f77b4", // baseline color for default topic circles and overall word frequencies + color2 = "#d62728"; // 'highlight' color for selected topics and word-topic frequencies + + // Set the duration of each half of the transition: + var duration = 750; + + // Set global margins used for everything + var margin = { + top: 30, + right: 30, + bottom: 70, + left: 30 + }, + mdswidth = 390, + mdsheight = 530, + mdsarea = mdsheight * mdswidth; + // controls how big the maximum circle can be + // doesn't depend on data, only on mds width and height: + var rMax = 40; + + // proportion of area of MDS plot to which the sum of default topic circle areas is set + var circle_prop = 0.25; + var word_prop = 0.25; + + // opacity of topic circles: + var base_opacity = 0.2, + highlight_opacity = 0.6; + + // doc/topic/word selection names are specific to *this* vis + var doc_select = to_select + "-doc"; + var topic_select = to_select + "-topic"; + var word_select = to_select + "-word"; + + // get rid of the # in the to_select (useful) for setting ID values + var visID = to_select.replace("#", ""); + var topID = visID + "-top"; + var docID = visID + "-doc"; + var topicID = visID + "-topic"; + var wordID = visID + "-word"; + // --------- + var docDown = docID + "-down"; + var docUp = docID + "-up"; + var docClear = docID + "-clear"; + var topicDown = topicID + "-down"; + var topicUp = topicID + "-up"; + var topicClear = topicID + "-clear"; + var wordDown = wordID + "-down"; + var wordUp = wordID + "-up"; + var wordClear = wordID + "-clear"; + + var docPanelID = visID + "-docPanel"; + var topicPanelID = visID + "-topicPanel"; + var wordPanelID = visID + "-wordPanel"; + + ////////////////////////////////////////////////////////////////////////////// + + + function visualize(data) { + + // set the number of documents to global variable D: + D = data['doc_mds'].x.length; + // // set the number of topics to global variable T: + T = data['topic_mds'].x.length; + // set the number of words to global variable W: + W = data['word_mds'].x.length; + + // a (D x 3) matrix with columns x, y, doc_tag + docMdsData = []; + for (var i = 0; i < D; i++) { + var obj = {}; + for (var key in data['doc_mds']) { + obj[key] = data['doc_mds'][key][i]; + } + docMdsData.push(obj); + } + + // a (T x 4) matrix with columns x, y, topics id, Freq + topicMdsData = []; + for (var i = 0; i < T; i++) { + var obj = {}; + for (var key in data['topic_mds']) { + obj[key] = data['topic_mds'][key][i]; + } + topicMdsData.push(obj); + } + + // a (W x 4) matrix with columns x, y, vocab word, Freq + wordMdsData = []; + for (var i = 0; i < W; i++) { + var obj = {}; + for (var key in data['word_mds']) { + obj[key] = data['word_mds'][key][i]; + } + wordMdsData.push(obj); + } + + + doc_topic_info = []; + for (var i = 0; i < data['doc_topic.info'].Doc.length; i++) { + var obj = {}; + for (var key in data['doc_topic.info']) { + obj[key] = data['doc_topic.info'][key][i]; + } + doc_topic_info.push(obj); + } + + doc_word_info = []; + for (var i = 0; i < data['doc_word.info'].Doc.length; i++) { + var obj = {}; + for (var key in data['doc_word.info']) { + obj[key] = data['doc_word.info'][key][i]; + } + doc_word_info.push(obj); + } + + topic_doc_info = []; + for (var i = 0; i < data['topic_doc.info'].Topic.length; i++) { + var obj = {}; + for (var key in data['topic_doc.info']) { + obj[key] = data['topic_doc.info'][key][i]; + } + topic_doc_info.push(obj); + } + + topic_word_info = []; + for (var i = 0; i < data['topic_word.info'].Topic.length; i++) { + var obj = {}; + for (var key in data['topic_word.info']) { + obj[key] = data['topic_word.info'][key][i]; + } + topic_word_info.push(obj); + } + + word_doc_info = []; + for (var i = 0; i < data['word_doc.info'].Word.length; i++) { + var obj = {}; + for (var key in data['word_doc.info']) { + obj[key] = data['word_doc.info'][key][i]; + } + word_doc_info.push(obj); + } + + word_topic_info = []; + for (var i = 0; i < data['word_topic.info'].Word.length; i++) { + var obj = {}; + for (var key in data['word_topic.info']) { + obj[key] = data['word_topic.info'][key][i]; + } + word_topic_info.push(obj); + } + + + // Create the doc/topic/word input forms + init_forms(docID, topicID, wordID); + + d3.select("#" + docID) + .on("keyup", function() { + // remove topic selection if it exists (from a saved URL) + var topicElem = document.getElementById(topicID + vis_state.topic); + if (topicElem !== undefined) topic_off(topicElem); + vis_state.topic = ""; + // remove word selection if it exists (from a saved URL) + var wordElem = document.getElementById(wordID + vis_state.word); + if (wordElem !== undefined) word_off(wordElem); + vis_state.word = ""; + doc_off(document.getElementById(docID + vis_state.doc)); + var value_new = document.getElementById(docID).value; + if (!isNaN(value_new) && value_new > 0) { + value_new = Math.min(D, Math.max(1, value_new)); + doc_on(document.getElementById(docID + value_new)); + vis_state.doc = value_new; + state_save(true); + document.getElementById(docID).value = vis_state.doc; + } + }); + + d3.select("#" + docClear) + .on("click", function() { + state_reset(); + state_save(true); + }); + + d3.select("#" + topicID) + .on("keyup", function() { + // remove doc selection if it exists (from a saved URL) + var docElem = document.getElementById(docID + vis_state.doc); + if (docElem !== undefined) doc_off(docElem); + vis_state.doc = ""; + // remove word selection if it exists (from a saved URL) + var wordElem = document.getElementById(wordID + vis_state.word); + if (wordElem !== undefined) word_off(wordElem); + vis_state.word = ""; + topic_off(document.getElementById(topicID + vis_state.topic)); + var value_new = document.getElementById(topicID).value; + if (!isNaN(value_new) && value_new > 0) { + value_new = Math.min(T, Math.max(1, value_new)); + topic_on(document.getElementById(topicID + value_new)); + vis_state.topic = value_new; + state_save(true); + document.getElementById(topicID).value = vis_state.topic; + } + }); + + d3.select("#" + topicClear) + .on("click", function() { + state_reset(); + state_save(true); + }); + + d3.select("#" + wordID) + .on("keyup", function() { + // remove doc selection if it exists (from a saved URL) + var docElem = document.getElementById(docID + vis_state.doc); + if (docElem !== undefined) doc_off(docElem); + vis_state.doc = ""; + // remove topic selection if it exists (from a saved URL) + var topicElem = document.getElementById(topicID + vis_state.topic); + if (topicElem !== undefined) topic_off(topicElem); + vis_state.topic = ""; + word_off(document.getElementById(wordID + vis_state.word)); + var value_new = document.getElementById(wordID).value; + if (!isNaN(value_new) && value_new > 0) { + value_new = Math.min(W, Math.max(1, value_new)); + word_on(document.getElementById(wordID + value_new)); + vis_state.word = value_new; + state_save(true); + document.getElementById(wordID).value = vis_state.word; + } + }); + + d3.select("#" + wordClear) + .on("click", function() { + state_reset(); + state_save(true); + }); + + + // create linear scaling to pixels (and add some padding on outer region of scatterplot) + var doc_xrange = d3.extent(docMdsData, function(d) { + return d.x; + }); //d3.extent returns min and max of an array + var doc_xdiff = doc_xrange[1] - doc_xrange[0], + doc_xpad = 0.05; + var doc_yrange = d3.extent(docMdsData, function(d) { + return d.y; + }); + var doc_ydiff = doc_yrange[1] - doc_yrange[0], + doc_ypad = 0.05; + + if (doc_xdiff > doc_ydiff) { + var doc_xScale = d3.scale.linear() + .range([0, mdswidth]) + .domain([doc_xrange[0] - doc_xpad * doc_xdiff, doc_xrange[1] + doc_xpad * doc_xdiff]); + + var doc_yScale = d3.scale.linear() + .range([mdsheight, 0]) + .domain([doc_yrange[0] - 0.5*(doc_xdiff - doc_ydiff) - doc_ypad*doc_xdiff, doc_yrange[1] + 0.5*(doc_xdiff - doc_ydiff) + doc_ypad*doc_xdiff]); + } else { + var doc_xScale = d3.scale.linear() + .range([0, mdswidth]) + .domain([doc_xrange[0] - 0.5*(doc_ydiff - doc_xdiff) - doc_xpad*doc_ydiff, doc_xrange[1] + 0.5*(doc_ydiff - doc_xdiff) + doc_xpad*doc_ydiff]); + + var doc_yScale = d3.scale.linear() + .range([mdsheight, 0]) + .domain([doc_yrange[0] - doc_ypad * doc_ydiff, doc_yrange[1] + doc_ypad * doc_ydiff]); + } + + // create linear scaling to pixels (and add some padding on outer region of scatterplot) + var topic_xrange = d3.extent(topicMdsData, function(d) { + return d.x; + }); //d3.extent returns min and max of an array + var topic_xdiff = topic_xrange[1] - topic_xrange[0], + topic_xpad = 0.05; + var topic_yrange = d3.extent(topicMdsData, function(d) { + return d.y; + }); + var topic_ydiff = topic_yrange[1] - topic_yrange[0], + topic_ypad = 0.05; + + if (topic_xdiff > topic_ydiff) { + var topic_xScale = d3.scale.linear() + .range([0, mdswidth]) + .domain([topic_xrange[0] - topic_xpad * topic_xdiff, topic_xrange[1] + topic_xpad * topic_xdiff]); + + var topic_yScale = d3.scale.linear() + .range([mdsheight, 0]) + .domain([topic_yrange[0] - 0.5*(topic_xdiff - topic_ydiff) - topic_ypad*topic_xdiff, topic_yrange[1] + 0.5*(topic_xdiff - topic_ydiff) + topic_ypad*topic_xdiff]); + } else { + var topic_xScale = d3.scale.linear() + .range([0, mdswidth]) + .domain([topic_xrange[0] - 0.5*(topic_ydiff - topic_xdiff) - topic_xpad*topic_ydiff, topic_xrange[1] + 0.5*(topic_ydiff - topic_xdiff) + topic_xpad*topic_ydiff]); + + var topic_yScale = d3.scale.linear() + .range([mdsheight, 0]) + .domain([topic_yrange[0] - topic_ypad * topic_ydiff, topic_yrange[1] + topic_ypad * topic_ydiff]); + } + + // create linear scaling to pixels (and add some padding on outer region of scatterplot) + var word_xrange = d3.extent(wordMdsData, function(d) { + return d.x; + }); //d3.extent returns min and max of an array + var word_xdiff = word_xrange[1] - word_xrange[0], + word_xpad = 0.05; + var word_yrange = d3.extent(wordMdsData, function(d) { + return d.y; + }); + var word_ydiff = word_yrange[1] - word_yrange[0], + word_ypad = 0.05; + + if (word_xdiff > word_ydiff) { + var word_xScale = d3.scale.linear() + .range([0, mdswidth]) + .domain([word_xrange[0] - word_xpad * word_xdiff, word_xrange[1] + word_xpad * word_xdiff]); + + var word_yScale = d3.scale.linear() + .range([mdsheight, 0]) + .domain([word_yrange[0] - 0.5*(word_xdiff - word_ydiff) - word_ypad*word_xdiff, word_yrange[1] + 0.5*(word_xdiff - word_ydiff) + word_ypad*word_xdiff]); + } else { + var word_xScale = d3.scale.linear() + .range([0, mdswidth]) + .domain([word_xrange[0] - 0.5*(word_ydiff - word_xdiff) - word_xpad*word_ydiff, word_xrange[1] + 0.5*(word_ydiff - word_xdiff) + word_xpad*word_ydiff]); + + var word_yScale = d3.scale.linear() + .range([mdsheight, 0]) + .domain([word_yrange[0] - word_ypad * word_ydiff, word_yrange[1] + word_ypad * word_ydiff]); + } + + + // Create new svg element (that will contain everything): + var svg = d3.select(to_select).append("svg") + .attr("width", 3 * (mdswidth + margin.left) + margin.right) + .attr("height", mdsheight + 2 * margin.top + margin.bottom + 2 * rMax); + + // Add a group for the doc plot + var doc_plot = svg.append("g") + .attr("id", docPanelID) + .attr("class", "docpoints") + .attr("transform", "translate(" + margin.left + "," + 2 * margin.top + ")"); + + // Create line element b/w doc and topic plot + var doc_topic_partition = doc_plot.append("line") + .attr("x1", mdswidth) + .attr("x2", mdswidth) + .attr("y1", 20) + .attr("y2", mdsheight) + .attr("stroke", "black") + + // Create a group for the topic plot + var topic_plot = svg.append("g") + .attr("id", topicPanelID) + .attr("class", "topicpoints") + // .attr("align","center") + .attr("transform", "translate(" + (mdswidth + 2 * margin.left) + "," + 2 * margin.top + ")"); + + // Create line element b/w topic and word plot + var topic_word_partition = topic_plot.append("line") + .attr("x1", mdswidth) + .attr("x2", mdswidth) + .attr("y1", 20) + .attr("y2", mdsheight) + .attr("stroke", "black") + + // Add a group for the word plot + var word_plot = svg.append("g") + .attr("id", wordPanelID) + .attr("class", "wordpoints") + // .attr("align","right") + .attr("transform", "translate(" + (2 * mdswidth + 3 * margin.left) + "," + 2 * margin.top + ")"); + + + // Clicking on the doc_plot should clear the selection + doc_plot + .append("rect") + .attr("x", 0) + .attr("y", 0) + .attr("height", mdsheight) + .attr("width", mdswidth) + .style("fill", color1) + .attr("opacity", 0) + .on("click", function() { + state_reset(); + state_save(true); + }); + + // Clicking on the topic_plot should clear the selection + topic_plot + .append("rect") + .attr("x", 0) + .attr("y", 0) + .attr("height", mdsheight) + .attr("width", mdswidth) + .style("fill", color1) + .attr("opacity", 0) + .on("click", function() { + state_reset(); + state_save(true); + }); + + // Clicking on the word_plot should clear the selection + word_plot + .append("rect") + .attr("x", 0) + .attr("y", 0) + .attr("height", mdsheight) + .attr("width", mdswidth) + .style("fill", color1) + .attr("opacity", 0) + .on("click", function() { + state_reset(); + state_save(true); + }); + + + // bind mdsData to the points in the doc panel: + var docpoints = doc_plot.selectAll("docpoints") + .data(docMdsData) + .enter(); + + var docs_tooltip = d3.select("body") + .append("div") + .style("position", "absolute") + .style("z-index", "10") + .style("visibility", "hidden") + .attr("stroke", "black") + .text("docs_tooltip"); + + var docs_text_tooltip = d3.select("body") + .append("div") + .style("position", "relative") + .style("z-index", "100") + .style("height", "100vh") + .style("overflow", "scroll") + .style("visibility", "hidden") + .attr("stroke", "black") + .text("docs_text_tooltip"); + + // draw circles + docpoints.append("circle") + .attr("class", "docdot") + .style("opacity", function(d) { + return ((d.Freq/10)*0.2); + }) + .style("fill", color1) + .attr("r", Math.sqrt(mdswidth*mdsheight*circle_prop/Math.PI)/(1.5*D)) + .attr("cx", function(d) { + return (doc_xScale(+d.x)); + }) + .attr("cy", function(d) { + return (doc_yScale(+d.y)); + }) + .attr("stroke", "black") + .attr("id", function(d) { + return (docID + d.docs); + }) + .text(function(d) { + return d.docs; + }) + .on("mouseover", function(d) { + docs_tooltip.text(d.docs); + docs_tooltip.style("visibility", "visible"); + var old_doc = docID + vis_state.doc; + if (vis_state.doc > 0 && old_doc!= this.id) { + doc_off(document.getElementById(old_doc)); + } + doc_on(this); + }) + .on("click", function(d) { + // prevent click event defined on the div container from firing + // http://bl.ocks.org/jasondavies/3186840 + d3.event.stopPropagation(); + var old_doc = docID + vis_state.doc; + if (vis_state.doc > 0 && old_doc != this.id) { + doc_off(document.getElementById(old_doc)); + } + // make sure doc input box value and fragment reflects clicked selection + document.getElementById(docID).value = vis_state.doc = d.docs; + state_save(true); + doc_on(this); + }) + .on("dblclick", function(d) { + docs_text_tooltip.text(d.doc_texts); + docs_text_tooltip.style("visibility", "visible"); + }) + .on("mousemove", function(){ + docs_tooltip.style("top", (d3.event.pageY-10)+"px").style("left",(d3.event.pageX+10)+"px"); + docs_text_tooltip.style("top", (d3.event.pageY-10)+"px").style("left",(d3.event.pageX+10)+"px"); + }) + .on("mouseout", function(d) { + docs_tooltip.style("visibility", "hidden"); + docs_text_tooltip.style("visibility", "hidden"); + if (vis_state.doc != d.docs) doc_off(this); + if (vis_state.doc > 0) doc_on(document.getElementById(docID + vis_state.doc)); + }); + + // bind mdsData to the points in the topic panel: + var topicpoints = topic_plot.selectAll("topicpoints") + .data(topicMdsData) + .enter(); + + // text to indicate topic + topicpoints.append("text") + .attr("class", "topic_txt") + .attr("x", function(d) { + return (topic_xScale(+d.x)); + }) + .attr("y", function(d) { + return (topic_yScale(+d.y) + 4); + }) + .attr("stroke", "black") + .attr("opacity", 1) + .style("text-anchor", "middle") + .style("font-size", "11px") + .style("fontWeight", 100) + .text(function(d) { + return d.topics; + }); + + // draw circles + topicpoints.append("circle") + .attr("class", "topicdot") + .style("opacity", function(d) { + return ((d.Freq/10)*0.2); + }) + .style("fill", color1) + .attr("r", Math.sqrt(mdswidth*mdsheight*circle_prop/Math.PI)/(1.5*T)) + .attr("cx", function(d) { + return (topic_xScale(+d.x)); + }) + .attr("cy", function(d) { + return (topic_yScale(+d.y)); + }) + .attr("stroke", "black") + .attr("id", function(d) { + return (topicID + d.topics); + }) + .on("mouseover", function(d) { + var old_topic = topicID + vis_state.topic; + if (vis_state.topic > 0 && old_topic!= this.id) { + topic_off(document.getElementById(old_topic)); + } + topic_on(this); + }) + .on("click", function(d) { + // prevent click event defined on the div container from firing + // http://bl.ocks.org/jasondavies/3186840 + d3.event.stopPropagation(); + var old_topic = topicID + vis_state.topic; + if (vis_state.topic > 0 && old_topic != this.id) { + topic_off(document.getElementById(old_topic)); + } + // make sure topic input box value and fragment reflects clicked selection + document.getElementById(topicID).value = vis_state.topic = d.topics; + state_save(true); + topic_on(this); + }) + .on("mouseout", function(d) { + if (vis_state.topic != d.topics) topic_off(this); + if (vis_state.topic > 0) topic_on(document.getElementById(topicID + vis_state.topic)); + }); + + // bind mdsData to the points in the word panel: + var wordpoints = word_plot.selectAll("wordpoints") + .data(wordMdsData) + .enter(); + + var tooltip = d3.select("body") + .append("div") + .style("position", "absolute") + .style("z-index", "10") + .style("visibility", "hidden") + .attr("stroke", "black") + .text("a simple tooltip"); + + // draw circles + wordpoints.append("circle") + .attr("class", "worddot") + .style("opacity", function(d) { + return ((d.Freq/10)*0.2); + }) + .style("fill", color1) + .attr("r", Math.sqrt(mdswidth*mdsheight*circle_prop/Math.PI)/(1.5*W)) + .attr("cx", function(d) { + return (word_xScale(+d.x)); + }) + .attr("cy", function(d) { + return (word_yScale(+d.y)); + }) + .attr("stroke", "black") + .attr("id", function(d) { + return (wordID + d.vocab); + }) + .text(function(d) { + return d.vocab; + }) + .on("mouseover", function(d) { + tooltip.text(d.vocab); + tooltip.style("visibility", "visible"); + var old_word = wordID + vis_state.word; + if (vis_state.word > 0 && old_word!= this.id) { + word_off(document.getElementById(old_word)); + } + word_on(this); + }) + .on("click", function(d) { + // prevent click event defined on the div container from firing + // http://bl.ocks.org/jasondavies/3186840 + d3.event.stopPropagation(); + var old_word = wordID + vis_state.word; + if (vis_state.word > 0 && old_word != this.id) { + word_off(document.getElementById(old_word)); + } + // make sure word input box value and fragment reflects clicked selection + document.getElementById(wordID).value = vis_state.word = d.vocab; + state_save(true); + word_on(this); + }) + .on("mousemove", function(){ + return tooltip.style("top", (d3.event.pageY-10)+"px").style("left",(d3.event.pageX+10)+"px"); + }) + .on("mouseout", function(d) { + if (vis_state.word != d.vocab) word_off(this); + if (vis_state.word > 0) word_on(document.getElementById(wordID + vis_state.word)); + return tooltip.style("visibility", "hidden"); + }); + + + // dynamically create the doc/topic/word input forms at the top of the page + function init_forms(docID, topicID, wordID) { + + // create container div for topic and lambda input: + var inputDiv = document.createElement("div"); + inputDiv.setAttribute("id", topID); + inputDiv.setAttribute("style", "width: 1210px"); // to match the width of the main svg element + document.getElementById(visID).appendChild(inputDiv); + + // doc input container: + var docDiv = document.createElement("div"); + docDiv.setAttribute("style", "padding: 5px; background-color: #e8e8e8; display: inline-block; width: " + mdswidth + "px; height: 50px; float: left"); + inputDiv.appendChild(docDiv); + + var docLabel = document.createElement("label"); + docLabel.setAttribute("for", docID); + docLabel.setAttribute("style", "font-family: sans-serif; font-size: 14px"); + docLabel.innerHTML = "Document: "; + docDiv.appendChild(docLabel); + + var docInput = document.createElement("input"); + docInput.setAttribute("style", "width: 50px"); + docInput.type = "text"; + docInput.min = "0"; + docInput.max = D; // assumes the data has already been read in + docInput.step = "1"; + docInput.value = "0"; // a value of 0 indicates no topic is selected + docInput.id = docID; + docDiv.appendChild(docInput); + + var clear = document.createElement("button"); + clear.setAttribute("id", docClear); + clear.setAttribute("style", "margin-left: 5px"); + clear.innerHTML = "Clear Document"; + docDiv.appendChild(clear); + + // topic input container: + var topicDiv = document.createElement("div"); + topicDiv.setAttribute("style", "padding: 5px; background-color: #e8e8e8; display: inline-block; width: " + mdswidth + "px; height: 50px; float: left; margin-left: 450px"); + inputDiv.appendChild(topicDiv); + + var topicLabel = document.createElement("label"); + topicLabel.setAttribute("for", topicID); + topicLabel.setAttribute("style", "font-family: sans-serif; font-size: 14px"); + topicLabel.innerHTML = "Topic: "; + topicDiv.appendChild(topicLabel); + + var topicInput = document.createElement("input"); + topicInput.setAttribute("style", "width: 50px"); + topicInput.type = "text"; + topicInput.min = "0"; + topicInput.max = T; // assumes the data has already been read in + topicInput.step = "1"; + topicInput.value = "0"; // a value of 0 indicates no topic is selected + topicInput.id = topicID; + topicDiv.appendChild(topicInput); + + var clear = document.createElement("button"); + clear.setAttribute("id", topicClear); + clear.setAttribute("style", "margin-left: 5px"); + clear.innerHTML = "Clear Topic"; + topicDiv.appendChild(clear); + + // word input container: + var wordDiv = document.createElement("div"); + wordDiv.setAttribute("style", "padding: 5px; background-color: #e8e8e8; display: inline-block; width: " + mdswidth + "px; height: 50px; float: right; margin-right: 30px"); + inputDiv.appendChild(wordDiv); + + var wordLabel = document.createElement("label"); + wordLabel.setAttribute("for", wordID); + wordLabel.setAttribute("style", "font-family: sans-serif; font-size: 14px"); + wordLabel.innerHTML = "Word: "; + wordDiv.appendChild(wordLabel); + + var wordInput = document.createElement("input"); + wordInput.setAttribute("style", "width: 50px"); + wordInput.type = "text"; + wordInput.min = "0"; + wordInput.max = W; // assumes the data has already been read in + wordInput.step = "1"; + wordInput.value = "0"; // a value of 0 indicates no word is selected + wordInput.id = wordID; + wordDiv.appendChild(wordInput); + + var clear = document.createElement("button"); + clear.setAttribute("id", wordClear); + clear.setAttribute("style", "margin-left: 5px"); + clear.innerHTML = "Clear Word"; + wordDiv.appendChild(clear); + + } + + + ////////////////////////////////////////////////////////////////////////////// + + // function to update topic/word plot when a doc is selected + // the circle argument should be the appropriate circle element + function doc_on(circle) { + if (circle == null) return null; + + // grab data bound to this element + var d = circle.__data__; + var docs = d.docs; + + // change opacity and fill of the selected circle + circle.style.opacity = highlight_opacity; + circle.style.fill = color2; + + + // word interactions + + // grab the word-plot data for this doc only: + var dat1 = doc_word_info.filter(function(d) { + return d.Doc == docs; + }); + + var w = dat1.length; // number of words for this doc + + // freq depicted using color intensity rather than radius (T = total vocab) + var word_radius = []; + for (var i = 0; i < W; ++i) { + word_radius[i] = 0; + } + for (i = 0; i < w; i++) { + word_radius[dat1[i].Word] = dat1[i].Freq; + } + + var size = []; + for (var i = 0; i < W; ++i) { + size[i] = 0; + } + for (i = 0; i < w; i++) { + // If we want to also re-size the topic number labels, do it here + // 11 is the default, so leaving this as 11 won't change anything. + size[dat1[i].Word] = 11; + } + + // var rScaleCond = d3.scale.sqrt() + // .domain([0, 1]).range([0, rMax]); + + // Change color of bubbles according to the doc's distribution over words + d3.selectAll(to_select + " .worddot") + .data(word_radius) + .transition() + .attr("r", function(d) { + //return (rScaleCond(d)); + return (Math.sqrt(d*mdswidth*mdsheight*word_prop/Math.PI)); + }); + + // re-bind mdsData so we can handle multiple selection + d3.selectAll(to_select + " .worddot") + .data(wordMdsData); + + + // topic interactions + + var dat2 = doc_topic_info.filter(function(d) { + return d.Doc == docs; + }); + + var t = dat2.length; // number of topics for this doc + + var topic_radius = []; + for (var i = 0; i < T; ++i) { + topic_radius[i] = 0; + } + for (i = 0; i < t; i++) { + topic_radius[dat2[i].Topic] = dat2[i].Freq; + } + + var size2 = []; + for (var i = 0; i < T; ++i) { + size2[i] = 0; + } + for (i = 0; i < t; i++) { + // If we want to also re-size the topic number labels, do it here + // 11 is the default, so leaving this as 11 won't change anything. + size2[dat2[i].Topic] = 11; + } + + // Change color of bubbles according to the doc's distribution over topics + d3.selectAll(to_select + " .topicdot") + .data(topic_radius) + .transition() + .attr("r", function(d) { + //return (rScaleCond(d)); + return (Math.sqrt(d*mdswidth*mdsheight*word_prop/Math.PI)); + }); + + // re-bind mdsData so we can handle multiple selection + d3.selectAll(to_select + " .topicdot") + .data(topicMdsData); + + // // Change sizes of topic numbers: + // d3.selectAll(to_select + " .topic_txt") + // .data(size2) + // .transition() + // .style("font-size", function(d) { + // return +d; + // }); + } + + // function to update doc/word plot when a topic is selected + // the circle argument should be the appropriate circle element + function topic_on(circle) { + if (circle == null) return null; + + // grab data bound to this element + var d = circle.__data__; + var topics = d.topics; + + // change opacity and fill of the selected circle + circle.style.opacity = highlight_opacity; + circle.style.fill = color2; + + // doc interactions + + // grab the doc-plot data for this topic only: + var dat1 = topic_doc_info.filter(function(d) { + return d.Topic == topics; + }); + + var dd = dat1.length; // number of docs for this topic + + // freq depicted using color intensity rather than radius (T = total vocab) + var doc_radius = []; + for (var i = 0; i < D; ++i) { + doc_radius[i] = 0; + } + for (i = 0; i < dd; i++) { + doc_radius[dat1[i].Doc] = dat1[i].Freq; + } + + var size = []; + for (var i = 0; i < D; ++i) { + size[i] = 0; + } + for (i = 0; i < dd; i++) { + // If we want to also re-size the topic number labels, do it here + // 11 is the default, so leaving this as 11 won't change anything. + size[dat1[i].Doc] = 11; + } + + // var rScaleCond = d3.scale.sqrt() + // .domain([0, 1]).range([0, rMax]); + + // Change color of bubbles according to the doc's distribution over words + d3.selectAll(to_select + " .docdot") + .data(doc_radius) + .transition() + .attr("r", function(d) { + //return (rScaleCond(d)); + return (Math.sqrt(d*mdswidth*mdsheight*word_prop/Math.PI)); + }); + + // re-bind mdsData so we can handle multiple selection + d3.selectAll(to_select + " .docdot") + .data(docMdsData); + + + // word interactions + + var dat2 = topic_word_info.filter(function(d) { + return d.Topic == topics; + }); + + var w = dat2.length; // number of words for this topic + + var word_radius = []; + for (var i = 0; i < T; ++i) { + word_radius[i] = 0; + } + for (i = 0; i < w; i++) { + word_radius[dat2[i].Word] = dat2[i].Freq; + } + + var size2 = []; + for (var i = 0; i < W; ++i) { + size2[i] = 0; + } + for (i = 0; i < w; i++) { + // If we want to also re-size the topic number labels, do it here + // 11 is the default, so leaving this as 11 won't change anything. + size2[dat2[i].Word] = 11; + } + + // Change color of bubbles according to the topic's distribution over word + d3.selectAll(to_select + " .worddot") + .data(word_radius) + .transition() + .attr("r", function(d) { + //return (rScaleCond(d)); + return (Math.sqrt(d*mdswidth*mdsheight*word_prop/Math.PI)); + }); + + // re-bind mdsData so we can handle multiple selection + d3.selectAll(to_select + " .worddot") + .data(wordMdsData); + + // // Change sizes of topic numbers: + // d3.selectAll(to_select + " .topic_txt") + // .data(size2) + // .transition() + // .style("font-size", function(d) { + // return +d; + // }); + } + + // function to update doc/topic plot when a word is selected + // the circle argument should be the appropriate circle element + function word_on(circle) { + if (circle == null) return null; + + // grab data bound to this element + var d = circle.__data__; + var vocab = d.vocab; + + // change opacity and fill of the selected circle + circle.style.opacity = highlight_opacity; + circle.style.fill = color2; + + // doc interactions + + // grab the doc-plot data for this word only: + var dat1 = word_doc_info.filter(function(d) { + return d.Word == vocab; + }); + + var dd = dat1.length; // number of docs for this word + + // freq depicted using color intensity rather than radius (T = total vocab) + var doc_radius = []; + for (var i = 0; i < D; ++i) { + doc_radius[i] = 0; + } + for (i = 0; i < dd; i++) { + doc_radius[dat1[i].Doc] = dat1[i].Freq; + } + + var size = []; + for (var i = 0; i < D; ++i) { + size[i] = 0; + } + for (i = 0; i < dd; i++) { + // If we want to also re-size the topic number labels, do it here + // 11 is the default, so leaving this as 11 won't change anything. + size[dat1[i].Doc] = 11; + } + + // var rScaleCond = d3.scale.sqrt() + // .domain([0, 1]).range([0, rMax]); + + // Change color of bubbles according to the word's distribution over docs + d3.selectAll(to_select + " .docdot") + .data(doc_radius) + .transition() + .attr("r", function(d) { + //return (rScaleCond(d)); + return (Math.sqrt(d*mdswidth*mdsheight*word_prop/Math.PI)); + }); + + // re-bind mdsData so we can handle multiple selection + d3.selectAll(to_select + " .docdot") + .data(docMdsData); + + + // topic interactions + // grab the topic-plot data for this word only: + var dat2 = word_topic_info.filter(function(d) { + return d.Word == vocab; + }); + + var t = dat2.length; // number of topics for this word + + var topic_radius = []; + for (var i = 0; i < T; ++i) { + topic_radius[i] = 0; + } + for (i = 0; i < t; i++) { + topic_radius[dat2[i].Topic] = dat2[i].Freq; + } + + var size2 = []; + for (var i = 0; i < T; ++i) { + size2[i] = 0; + } + for (i = 0; i < t; i++) { + // If we want to also re-size the topic number labels, do it here + // 11 is the default, so leaving this as 11 won't change anything. + size2[dat2[i].Topic] = 11; + } + + // Change color of bubbles according to the doc's distribution over topics + d3.selectAll(to_select + " .topicdot") + .data(topic_radius) + .transition() + .attr("r", function(d) { + //return (rScaleCond(d)); + return (Math.sqrt(d*mdswidth*mdsheight*word_prop/Math.PI)); + }); + + // re-bind mdsData so we can handle multiple selection + d3.selectAll(to_select + " .topicdot") + .data(topicMdsData); + + // // Change sizes of topic numbers: + // d3.selectAll(to_select + " .topic_txt") + // .data(size2) + // .transition() + // .style("font-size", function(d) { + // return +d; + // }); + } + + function doc_off(circle) { + if (circle == null) return circle; + // go back to original opacity/fill + circle.style.opacity = base_opacity; + circle.style.fill = color1; + + d3.selectAll(to_select + " .topicdot") + .data(topicMdsData) + .transition() + .attr("r", function(d) { + //return (rScaleMargin(+d.Freq)); + return (Math.sqrt((d.Freq/100)*mdswidth*mdsheight*circle_prop/Math.PI)); + }); + + d3.selectAll(to_select + " .worddot") + .data(wordMdsData) + .transition() + .attr("r", function(d) { + //return (rScaleMargin(+d.Freq)); + return (Math.sqrt((d.Freq/100)*mdswidth*mdsheight*circle_prop/Math.PI)); + }); + + // // Change sizes of topic numbers: + // d3.selectAll(to_select + " .txt") + // .transition() + // .style("font-size", "11px"); + + } + + function topic_off(circle) { + if (circle == null) return circle; + // go back to original opacity/fill + circle.style.opacity = base_opacity; + circle.style.fill = color1; + + d3.selectAll(to_select + " .docdot") + .data(docMdsData) + .transition() + .attr("r", function(d) { + //return (rScaleMargin(+d.Freq)); + return (Math.sqrt((d.Freq/100)*mdswidth*mdsheight*circle_prop/Math.PI)); + }); + + d3.selectAll(to_select + " .worddot") + .data(wordMdsData) + .transition() + .attr("r", function(d) { + //return (rScaleMargin(+d.Freq)); + return (Math.sqrt((d.Freq/100)*mdswidth*mdsheight*circle_prop/Math.PI)); + }); + + // // Change sizes of topic numbers: + // d3.selectAll(to_select + " .txt") + // .transition() + // .style("font-size", "11px"); + + } + + function word_off(circle) { + if (circle == null) return circle; + // go back to original opacity/fill + circle.style.opacity = base_opacity; + circle.style.fill = color1; + + d3.selectAll(to_select + " .docdot") + .data(docMdsData) + .transition() + .attr("r", function(d) { + //return (rScaleMargin(+d.Freq)); + return (Math.sqrt((d.Freq/100)*mdswidth*mdsheight*circle_prop/Math.PI)); + }); + + d3.selectAll(to_select + " .topicdot") + .data(topicMdsData) + .transition() + .attr("r", function(d) { + //return (rScaleMargin(+d.Freq)); + return (Math.sqrt((d.Freq/100)*mdswidth*mdsheight*circle_prop/Math.PI)); + }); + + // // Change sizes of topic numbers: + // d3.selectAll(to_select + " .txt") + // .transition() + // .style("font-size", "11px"); + + } + + + // serialize the visualization state using fragment identifiers -- http://en.wikipedia.org/wiki/Fragment_identifier + // location.hash holds the address information + + var params = location.hash.split("&"); + if (params.length > 1) { + vis_state.doc = params[0].split("=")[1]; + vis_state.topic = params[1].split("=")[1]; + vis_state.word = params[2].split("=")[1]; + + // Idea: write a function to parse the URL string + // only accept values in [0,1] for lambda, {0, 1, ..., K} for topics (any string is OK for term) + // Allow for subsets of the three to be entered: + // (1) topic only (lambda = 1 term = "") + // (2) lambda only (topic = 0 term = "") visually the same but upon hovering a topic, the effect of lambda will be seen + // (3) term only (topic = 0 lambda = 1) only fires when the term is among the R most salient + // (4) topic + lambda (term = "") + // (5) topic + term (lambda = 1) + // (6) lambda + term (topic = 0) visually lambda doesn't make a difference unless a topic is hovered + // (7) topic + lambda + term + + // Short-term: assume format of "#topic=k&lambda=l&term=s" where k, l, and s are strings (b/c they're from a URL) + + // Force t (doc identifier) to be an integer between 0 and D: + vis_state.doc = Math.round(Math.min(D, Math.max(0, vis_state.doc))); + // Force t (topic identifier) to be an integer between 0 and T: + vis_state.topic = Math.round(Math.min(T, Math.max(0, vis_state.topic))); + // Force w (word identifier) to be an integer between 0 and W: + vis_state.word = Math.round(Math.min(W, Math.max(0, vis_state.word))); + + // select the doc + if (!isNaN(vis_state.doc)) { + document.getElementById(docID).value = vis_state.doc; + if (vis_state.doc > 0) { + doc_on(document.getElementById(docID + vis_state.doc)); + } + } + + // select the topic + if (!isNaN(vis_state.topic)) { + document.getElementById(topicID).value = vis_state.topic; + if (vis_state.topic > 0) { + topic_on(document.getElementById(topicID + vis_state.topic)); + } + } + + // select the word + if (!isNaN(vis_state.word)) { + document.getElementById(wordID).value = vis_state.word; + if (vis_state.word > 0) { + word_on(document.getElementById(wordID + vis_state.word)); + } + } + } + + + function state_url() { + return location.origin + location.pathname + "#doc=" + vis_state.doc + + "&topic=" + vis_state.topic + "&word=" + vis_state.word; + } + + function state_save(replace) { + if (replace) + history.replaceState(vis_state, "Query", state_url()); + else + history.pushState(vis_state, "Query", state_url()); + } + + function state_reset() { + if (vis_state.doc > 0) { + doc_off(document.getElementById(docID + vis_state.doc)); + } + if (vis_state.topic > 0) { + topic_off(document.getElementById(topicID + vis_state.topic)); + } + if (vis_state.word > 0) { + word_off(document.getElementById(wordID + vis_state.word)); + } + + document.getElementById(docID).value = vis_state.doc = 0; + document.getElementById(topicID).value = vis_state.topic = 0; + document.getElementById(wordID).value = vis_state.word = 0; + state_save(true); + } + + } + + if (typeof data_or_file_name === 'string') + d3.json(data_or_file_name, function(error, data) {visualize(data);}); + else + visualize(data_or_file_name); + + // var current_clicked = { + // what: "nothing", + // element: undefined + // }, + + //debugger; + +}; \ No newline at end of file