-
Notifications
You must be signed in to change notification settings - Fork 0
/
stack-decoder.js
745 lines (631 loc) · 23.1 KB
/
stack-decoder.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
/****************************************************************
* VARIABLES
****************************************************************/
// the chart containing hypotheses
var CHART = new Object();
CHART.size = function() {
var size = -1;
for (var key in this)
size++;
return size;
};
// this maps DP state signatures to valid CSS element IDS
var IDS = new Object();
IDS.size = function() {
var size = -1;
for (var key in this)
size++;
return size;
}
// number of hypotheses added
var NUM_HYPOTHESES = 0;
// start- and end-of-sentence
var SOS = "<s>";
var EOS = "</s>";
// the stacks that hypotheses are placed in
var STACKS = Array();
// lowest LM score
var LM_FLOOR = -100;
// how fast to fade chart items in and out
var FADE_SPEED = 1000;
var AUTOMATE_DELAY = 1000; // ms
/****************************************************************
* INITIALIZATION CODE
****************************************************************/
/*
* Build lists of source- and target-language words.
*/
var row = $("<div></div>").attr('id', 'sourcebox');
for (i = 0; i < WORDS.length; i++) {
var word = WORDS[i][0];
// This function creates the list of translations when a source
// word is clicked. It has to be a separate function like this
// due to Javascripts lexical binding.
var clickfunc = function(index) {
return function() {
// make sure the list is created
var list = create_translations_list(index);
// animate it
if (list.is(":visible")) {
list.slideUp();
$("#source" + index).find('p').css({border: "1px solid #dddddd",
'border-radius': '3px'});
} else {
list.slideDown();
$("#source" + index).find('p').css({border: "1px solid black",
'border-radius': '3px'});
}
}
};
var label = "source" + i;
var td = $("<div></div>")
.addClass("source")
.attr("id",label)
.append($("<p></p>")
.append(word)
.click(clickfunc(i)));
row.append(td);
// document.write("<td><p class='source' id='" + label + "'>" + word + "</p></td>");
// $("td#" + label).click(function() { translation_options(i); });
}
$("#content :first")
.before(row)
.before($("<div></div>").css({"clear":"both"}))
.before($("<div></div>").
attr("id","stacks"))
;
// document.writeln("</tr></table></p>");
$("#automate").click(function() { automate() });
/****************************************************************
* FUNCTIONS
****************************************************************/
/*
* Builds the list of target-language translations of a given source
* word, creating it if necessary, and inserts it into the source word div.
*/
function create_translations_list(i) {
var id = "targetlist" + i;
var list = $("#" + id);
if (list.size() == 0) {
list = $("<ul></ul>")
.attr("id", "targetlist" + i)
.addClass("translation")
.hide();
var num_candidates = min($("#numcandidates").val(), WORDS[i].length - 1);
for (j = 1; j <= num_candidates; j++) {
var word, score;
if (WORDS[i][j] instanceof Array) {
word = WORDS[i][j][0];
score = WORDS[i][j][1];
} else {
word = WORDS[i][j];
score = 0;
}
var label = "target" + i + "-" + j;
var item = $("<li></li>")
.attr("id", label)
.addClass("translation")
.text(word + " ")
.append($("<span></span>").addClass("score").text(sprintf('%.2f',score)))
.data('word', word)
.data('pos', i)
.data('score', score)
.data('itemno', j)
.click(function() {
/* Use the word to extend a hypothesis if one is
* selected.
*/
if (count_selected() == 1) {
var hypothesis = $(".selected");
if (is_legal($(".selected"), $(this))) {
var pos = $(this).data('pos');
if (hypothesis.data('pos')[pos] != 1) {
extend_item(hypothesis, $(this));
// get_stack(item.data('stack')).append(item.fadeIn());
}
}
}
})
.hover(function(e) {
/* On hovering, we highlight the word if one other
* item is selected and this word is a valid
* extension of that hypothesis (according to
* various constraints).
*/
var num_selected = count_selected();
switch(num_selected) {
case 0:
// nothing can be done if nothing is selected
$(this).addClass('illegal');
message("Select a hypothesis to extend.");
break;
case 1:
if (is_legal($(".selected"), $(this))) {
$(this).addClass('hilite');
} else {
$(this).addClass('illegal');
}
}
},function(e) {
$(this).removeClass("hilite illegal");
});
// .draggable({
// cancel: "a.ui-icon",
// revert: function(dropped) {
// return true;
// },
// cursor: "move",
// });
list.append(item);
// document.writeln("<p class='target' id='" + label + "'>" + word + "</p>");
// document.write(p.html());
}
// list.append($("<br></br>").css({"clear":"both"}));
$("#source" + i).append(list);
}
return list;
}
/*
* Takes two JQuery objects representing a hypothesis and a word, and
* return true if the extension is legal under the current set of
* constraints.
*/
function is_legal(hypothesis, word) {
// only highlight if this is a valid extension of that hyp.
// a word is illegal if it is already covered
if (hypothesis.data('pos')[word.data('pos')]) {
return false;
} else {
var permitted_distance = $("#constraints").val();
var lastpos = hypothesis.data('lastpos');
var curpos = word.data('pos');
// permitted
if (permitted_distance == "0")
return true;
else if (permitted_distance == "+1") {
if (curpos == lastpos + 1)
return true;
else
return false;
} else {
// if we're extending the empty hypothesis, or the
// distance is within the permitted distance, we can
// extend
if (lastpos == -1 || (abs(curpos - lastpos) <= permitted_distance) ){
return true;
} else {
return false;
}
}
}
}
/*
* Returns the requested stack, adding it if it doesn't already exist.
* Handles different stack scenarios (single, word-based,
* coverage-based).
*/
function get_stack(which) {
// If we're doing just one stack, make sure it exists and return it
if ($("#numstacks").val() == "one") {
// create the stack if it doesn't exist
if (STACKS.length == 0) {
var stackdiv = $("<div></div>")
.attr("id", "stack" + i)
.addClass('stack-header')
.append($("<h3></h3>")
.text("Stack"))
.append("<div></div>");
$("div#stacks").append(stackdiv);
STACKS.push(stackdiv);
}
return STACKS[0];
} else {
for (i = STACKS.length; i <= which; i++) {
var stackdiv = $("<div></div>")
.attr("id", "stack" + i)
.addClass('stack-header')
.append($("<h3></h3>")
.click(function() {
if ($(this).next().is(':visible'))
$(this).next().slideUp();
else
$(this).next().slideDown();
})
.text("Stack (" + i + ")"))
.append("<div></div>");
if (i == WORDS.length)
stackdiv.addClass("stack-complete");
if (i == 0)
$("div#stacks").append(stackdiv);
else
$("div#stacks :first").before(stackdiv);
STACKS.push(stackdiv);
// $("#debug").append("<p>creating stack " + i + "</p>");
// debug("creating stack " + i)
}
return STACKS[which].children(':last');
}
}
$(".source")
.click(function() {
make_start_item();
});
function compute_dpstate(phrase) {
if ($("#dp").attr('checked')) {
var histsize = 1;
var words = phrase.split(' ');
if (words.length > histsize) {
phrase = "...";
for (i = words.length - histsize; i < words.length; i++)
phrase += " " + words[i];
}
}
// debug("returning " + phrase);
return phrase;
}
function make_start_item() {
// disable options once the user has started clicking around
$(".options").attr('disabled', 'disabled');
var empty_word_item = $("<div></div>")
.data('word', SOS)
.data('pos', -1)
.data('score', 0);
var item = make_item(empty_word_item);
var key = item.data('key');
if (! (key in CHART)) {
// create the chart entry
CHART[key] = item;
// update the chart size display
$("#chartsize").text(CHART.size());
// display it
if (! item.is(':visible')) {
var stack = get_stack(item.data('stack'));
stack.append(item.fadeIn());
}
}
return CHART[key];
}
/*
* This function maps from cell signatures to unique names, which
* names are valid as CSS Id identifiers. This allows us to easily
* select cells visually by referring to this latter name.
*/
function id(key) {
if (! (key in IDS)) {
IDS[key] = "cell" + IDS.size();
}
return IDS[key];
}
function make_item(worditem, olditem) {
var obj = $("<div></div>")
.addClass("stack");
var words = (olditem ? (olditem.data('words') + " ") : "") + worditem.data('word');
var pos = worditem.data('pos');
obj.data('words', compute_dpstate(words));
obj.data('pos', olditem ? olditem.data('pos').slice(0) : new Array());
if (pos != -1)
obj.data('pos')[pos] = 1;
obj.data('covered', create_coverage_display(obj.data('pos')));
obj.data('key', obj.data('words') + " ||| " + obj.data('covered'));
obj.data('lastpos', -1);
obj.data('stack', olditem ? (olditem.data('stack') + 1) : 0);
obj.data('backpointer', olditem ? olditem : null);
obj.data('word', worditem);
obj.attr('id', id(obj.data('key')));
// scoring
var score = worditem.data('score');
if (olditem) {
score += olditem.data('score');
score += bigram_score(olditem.data('words'), worditem.data('word'));
}
obj.data('score', score);
// debug('make_item(' + words + ',' + pos + ')');
// debug("obj stack = " + obj.data('stack') + " wordslen = " + WORDS.length);
// check if the item is complete, and if so, extend it
if (obj.data('stack') == WORDS.length) {
obj.data('words', obj.data('words') + " " + EOS);
obj.data('score', obj.data('score') + bigram_score(obj.data('words'), EOS));
obj.data('complete', true);
var translation = follow_backpointers(obj);
message("Translation: '" + translation + "' (" + sprintf('%.2f', obj.data('score')) + ")");
} else {
obj.data('complete', false);
}
obj.append(obj.data('words'))
.append($("<br></br>"))
.append(sprintf('%.2f', obj.data('score')))
.append($("<br></br>"))
.append(obj.data('covered'))
.hide()
.click(function () {
var obj = this; toggle_selection(obj);
})
// .droppable({
// accept: ".translation",
// hoverClass: "highlight",
// tolerance: 'intersect',
// drop: function(event, ui) {
// extend_item($(this), ui.draggable);
// // get_stack(item.data('stack')).append(item.fadeIn());
// },
// })
.hover(function () {
$(this).addClass("stackhilite");
// highlight backpointers
$("." + $(this).attr('id')).addClass("dp-hilite");
var obj = $(this).data('backpointer');
while (obj) {
obj.addClass('dp-hilite');
obj = obj.data('backpointer');
}
}, function () {
if (! ($(this).hasClass("selected")))
$(this).removeClass("stackhilite");
// un-hilite DP backpointers
$("." + $(this).attr('id')).removeClass('dp-hilite');
var obj = $(this).data('backpointer');
while (obj) {
obj.removeClass('dp-hilite');
obj = obj.data('backpointer');
}
});
if (obj.data('complete'))
obj.addClass("item-complete");
return obj;
// over: function(event, ui) {
// var item = ui.draggable.data('item');
// var word = item.words;
// var pos = item.pos;
// if (item.pos[pos] == 1)
// }
}
/**
* Takes an existing item and a new word and creates a new item that
* also covers that word.
*/
function extend_item(olditem,worditem) {
var item = make_item(worditem, olditem);
// If the item is not in the chart or has a better score, add it
var key = item.data('key');
if (! (key in CHART) || CHART[key].data('score') < item.data('score')) {
// if there is an old item, delete it
if (key in CHART) {
var olditem = CHART[key];
// remove it
olditem.fadeOut(FADE_SPEED);
// remove anything tagged with it
var id = olditem.attr('id');
$("." + id).removeClass(id);
message("Replacing existing, lower-scoring stack item.");
// } else {
// message("Adding new item to the stack.");
}
// record the current item
CHART[key] = item;
// mark the backpointer items with this item's class id, so
// that we can highlight them
item.data('backpointer').addClass(item.attr('id'));
// add this item's class to the word
item.data('word').addClass(item.attr('id'));
// update the chart size display
$("#chartsize").text(CHART.size());
if (! item.is(":visible")) {
var stack = get_stack(item.data('stack'));
// if the stack is empty, just append it (empty means that
// it just has the title element, so it's of size 1)
var num_children = stack.children().size();
if (num_children == 0) {
stack.append(item.fadeIn(FADE_SPEED));
} else {
// otherwise, insert it into the appropriate position on the stack
var itemscore = item.data('score');
stack.children().each(function(index) {
// If we find an element we're greater than,
// insert before it. This maintains a sorted list
// so long as the long was sorted before
var score = $(this).data('score');
if (itemscore > score) {
$(this).before(item.fadeIn(FADE_SPEED));
// returning false exits the each()
return false;
} else if (index + 1 == num_children) {
// end of the list
$(this).after(item.fadeIn(FADE_SPEED));
} else {
return true;
}
});
// remove entries that fall outside the beam
var stacksize = $("#stacksize").val();
// if (stack.children().size() - 1 > stacksize)
// message("Pruning the stack of " + (stack.children.size() - stacksize - 1) + " candidates");
stack.children().each(function(index) {
// skip the first element (the stack title)
if (index >= stacksize) {
var key = $(this).data('key');
$(this).fadeOut(FADE_SPEED);
delete CHART[key];
}
});
}
}
} else {
// add the item only to remove it
CHART[key].after(item.fadeIn(FADE_SPEED).addClass('X').text('X').fadeOut(FADE_SPEED));
message("Deleting item, since it has a lower probability than an existing item in the chart.");
}
// update the chart size display
$("#hypotheses").text(++NUM_HYPOTHESES);
return CHART[key];
}
// This function takes an array with 1s denoting source-language words
// that have been consumed. It returns a nice HTML display of it. It
// assumes access to the global "words" array (to determine sentence
// length only).
function create_coverage_display(array) {
var covered = "";
for (i = 0; i < WORDS.length; i++) {
if (array[i] == 1) {
covered += "◉";
} else {
covered += "◎";
}
}
return covered;
}
function translation_options() {
// for (i = 1; i < WORDS[index].length; i++) {
// $("div#debug").append("<p>" + i + "/" + j + "</p>");
ensure_stack_exists(2);
$("div#debug").append("<p>MATT</p>");
// }
}
// Converts an ID to the word it represents.
function id2word(label) {
var matches = label.match(/(\d+)-(\d+)/);
var i = matches[1];
var j = matches[2];
return WORDS[i][j];
}
function id2index(label) {
var matches = label.match(/(\d+)-(\d+)/);
var i = matches[1];
return i;
}
function deselect_item(div) {
// deselect this item
$(div).removeClass("selected stackhilite");
// debug("DESELECT: num=" + count_selected());
}
function select_item(div) {
// deselect all other items
$(".selected").removeClass("selected stackhilite");
// select this item
$(div).addClass("stackhilite selected");
}
function toggle_selection(div) {
if ($(div).data('complete')) {
log("Translation: " + follow_backpointers($(div)));
}
if (! ($(div).hasClass("selected")))
select_item(div);
else
deselect_item(div);
}
function highlight(o) {
$(o).addClass('highlight');
debug("highlighting DIV:'" + $(o).id + "'");
}
// returns the number of current selected objects
function count_selected() {
var num = $(".selected").size();
return num;
}
function log(message) {
$("#debug > div").prepend("<p>" + message + "</p>");
}
function debug(message) {
$("#debug > div").prepend("<p>" + message + "</p>");
}
function min(a,b) {
if (a < b)
return a;
else
return b;
}
function abs(a) {
if (a < 0)
return -a;
return a;
}
function bigram_score(history, word) {
var oldword = history.split(' ').pop();
if (BIGRAM != undefined) {
if (oldword in BIGRAM) {
if (word in BIGRAM[oldword]) {
return BIGRAM[oldword][word];
}
}
}
return LM_FLOOR;
}
function message(text) {
$("#message").text(text);
log(text);
}
/*
* Automates the visualization by simulating click events. This is
* somewhat complicated by the fact that calls to click() are not
* blocking; they are passed to some event model and return
* immediately, so you don't direct control over timing, which is
* crucial to have.
*
* Here I implemented the following nasty workaround to accommodate
* this. We break down the steps of the algorithm into click events
* wrapped in functions that call execute the click and then schedule
* the next event. We therefore create a new function each time that
* has enough information to compute the next move.
*/
function automate() {
AUTOMATE_DELAY = $("#delay").val();
$("#automate").attr('disabled', 'disabled');
// expand all the source boxes
for (var i = 0; i < WORDS.length; i++)
$("#source" + i + " p").click();
// schedule the first event
// the arguments are: automate_click(stackno, hypno, sourceno, transno)
window.setTimeout(automate_click(get_stack(0).children(':first'),0,1), AUTOMATE_DELAY);
}
function automate_click(item, i, j) {
// log('automate_click(' + item.html() + ',' + i + ',' + j);
// select the hypothesis (if not selected)
var stackno = item.data('stack');
// quit if this is the last stack
if (stackno == WORDS.length)
return;
var stack = get_stack(item.data('stack'));
if (! item.hasClass("selected")) {
item.click();
item.addClass('stackhilite');
}
// click on the word
var wordobj = $("#target" + i + "-" + j);
wordobj.click();
wordobj.addClass('hilite');
setTimeout(function() { wordobj.removeClass('hilite') }, AUTOMATE_DELAY);
// if there's another translation of this word, do that next
if (wordobj.next().length != 0) {
setTimeout(function() { automate_click(item, i, j+1) }, AUTOMATE_DELAY);
} else {
// otherwise, find the next valid source word, moving through
// the options until we find one that's not covered
var nexti = i + 1;
while (nexti < WORDS.length && item.data('pos')[nexti] == 1)
nexti++;
// if there's an uncovered word, do that next
if (nexti < WORDS.length) {
setTimeout(function() { automate_click(item, nexti, 1) }, AUTOMATE_DELAY);
} else {
// otherwise, move on to the next hypothesis if possible
var nextitem = item.next();
while (nextitem.length != 0 && nextitem.is(":hidden"))
nextitem = nextitem.next();
// if we ended up at a valid one, run it
if (nextitem.length != 0) {
setTimeout(function() { automate_click(nextitem, 0, 1) }, AUTOMATE_DELAY);
} else {
// otherwise, move on to the next stack
var newitem = get_stack(stackno+1).children(':first');
setTimeout(function() { automate_click(newitem, 0, 1) }, AUTOMATE_DELAY);
}
}
}
}
/*
* Builds up the translation by following the backpointers.
*/
function follow_backpointers(item) {
if (item.data('backpointer'))
return follow_backpointers(item.data('backpointer')) + " " + item.data('word').data('word');
return "";
}