-
Notifications
You must be signed in to change notification settings - Fork 28
/
index.js
166 lines (140 loc) · 5.3 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
var Combinatorics = require('js-combinatorics');
var Numbered = require('numbered');
// Util functions for generating schema and utterances
// ===================================================
// Convert a number range like 5-10 into an array of english words
function expandNumberRange(start, end, by) {
by = by || 1; //incrementing by 0 is a bad idea
var converted = [];
for (var i=start; i<=end; i+=by) {
converted.push( Numbered.stringify(i).replace(/-/g,' ') );
}
return converted;
}
// Determine if a curly brace expression is a Slot name literal
// Returns true if expression is of the form {-|Name}, false otherwise
function isSlotLiteral(braceExpression) {
return braceExpression.substring(0, 3) == "{-|";
}
// Recognize shortcuts in utterance definitions and swap them out with the actual values
function expandShortcuts(str, slots, dictionary) {
// If the string is found in the dictionary, just provide the matching values
if (typeof dictionary=="object" && typeof dictionary[str]!="undefined") {
return dictionary[str];
}
// Numbered ranges, ex: 5-100 by 5
var match = str.match(/(\d+)\s*-\s*(\d+)(\s+by\s+(\d+))?/);
if (match) {
return expandNumberRange(+match[1],+match[2],+match[4]);
}
return [str];
}
var slotIndexes = [];
function expandSlotValues (variations, slotSampleValues) {
var i;
var slot;
for (slot in slotSampleValues) {
var sampleValues = slotSampleValues[slot];
var idx = -1;
if (typeof slotIndexes[slot] !== "undefined") {
idx = slotIndexes[slot];
}
var newVariations = [];
// make sure we have enough variations that we can get through the sample values
// at least once for each alexa-app utterance... this isn't strictly as
// minimalistic as it could be.
//
// our *real* objective is to make sure that each sampleValue gets used once per
// intent, but each intent spans multiple utterances; it would require heavy
// restructuring of the way the utterances are constructed to keep track of
// whether every slot was given each sample value once within an Intent's set
// of utterances. So we take the easier route, which generates more utterances
// in the output (but still many less than we would get if we did the full
// cartesian product).
if (variations.length < sampleValues.length) {
var mod = variations.length;
var xtraidx = 0;
while (variations.length < sampleValues.length) {
variations.push (variations[xtraidx]);
xtraidx = (xtraidx + 1) % mod;
}
}
variations.forEach (function (variation, j) {
var newVariation = [];
variation.forEach (function (value, k) {
if (value == "slot-" + slot) {
idx = (idx + 1) % sampleValues.length;
slotIndexes[slot] = idx;
value = sampleValues[idx];
}
newVariation.push (value);
});
newVariations.push (newVariation);
});
variations = newVariations;
}
return variations;
}
// Generate a list of utterances from a template
function generateUtterances(str, slots, dictionary, exhaustiveUtterances) {
var placeholders=[], utterances=[], slotmap={}, slotValues=[];
// First extract sample placeholders values from the string
str = str.replace(/\{([^\}]+)\}/g, function(match,p1) {
if (isSlotLiteral(match)) {
return match;
}
var expandedValues=[], slot, values = p1.split("|");
// If the last of the values is a SLOT name, we need to keep the name in the utterances
if (values && values.length && values.length>1 && slots && typeof slots[values[values.length-1]]!="undefined") {
slot = values.pop();
}
values.forEach(function(val,i) {
Array.prototype.push.apply(expandedValues,expandShortcuts(val,slots,dictionary));
});
if (slot) {
slotmap[slot] = placeholders.length;
}
// if we're dealing with minimal utterances, we will delay the expansion of the
// values for the slots; all the non-slot expansions need to be fully expanded
// in the cartesian product
if (!exhaustiveUtterances && slot)
{
placeholders.push( [ "slot-" + slot ] );
slotValues[slot] = expandedValues;
}
else
{
placeholders.push( expandedValues );
}
return "{"+(slot || placeholders.length-1)+"}";
});
// Generate all possible combinations using the cartesian product
if (placeholders.length>0) {
var variations = Combinatorics.cartesianProduct.apply(Combinatorics,placeholders).toArray();
if (!exhaustiveUtterances)
{
variations = expandSlotValues (variations, slotValues);
}
// Substitute each combination back into the original string
variations.forEach(function(values) {
// Replace numeric placeholders
var utterance = str.replace(/\{(\d+)\}/g,function(match,p1){
return values[p1];
});
// Replace slot placeholders
utterance = utterance.replace(/\{(.*?)\}/g,function(match,p1){
return (isSlotLiteral(match)) ? match : "{"+values[slotmap[p1]]+"|"+p1+"}";
});
utterances.push( utterance );
});
}
else {
utterances = [str];
}
// Convert all {-|Name} to {Name} to accomodate slot literals
for (var idx in utterances) {
utterances[idx] = utterances[idx].replace(/\{\-\|/g, "{");
}
return utterances;
}
module.exports = generateUtterances;