-
Notifications
You must be signed in to change notification settings - Fork 2
/
tedbot.py
168 lines (137 loc) · 4.41 KB
/
tedbot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
from __future__ import print_function
import markovify
import sys
import math
import random
import re
import flickrapi
import yaml
import json
from pattern.en import parse
from pattern.en import tag
MAX_TITLE = 50
GRAF_LENGTH = 8
GRAF_RANGE = 3
DEBUG = True
MARKOVSTATESIZE = 3
with open("config.yml", 'r') as ymlfile:
cfg = yaml.load(ymlfile)
flickr_key = cfg['flickr']['api_key']
flickr_secret = cfg['flickr']['api_key']
# generate a target length for a paragraph, of the range LENGTH +/- [0-RANGE]
def initTarget():
return GRAF_LENGTH + int(math.floor(random.triangular(0-GRAF_RANGE, GRAF_RANGE)))
def getImage(refstring):
tagged = tag(refstring)
nouns = [word for word,pos in tagged if pos == 'NNP' or pos == 'NP' or pos == 'NN']
try:
query = random.choice(nouns)
except IndexError:
#somehow this string has no nouns!
if DEBUG: print("Paragraph with no nouns:\n" + refstring, file=sys.stderr)
return None
if DEBUG: print(query, file=sys.stderr)
flickr = flickrapi.FlickrAPI(flickr_key, flickr_secret, format='parsed-json')
result = flickr.photos_search(api_key = flickr_key, text = query, privacy_filter = 1, safe_search=1, sort='interestingness-desc', orientation="landscape")
try:
pick = random.choice(result['photos']['photo'])
url = 'https://farm' + str(pick['farm']) + '.staticflickr.com/' + str(pick['server']) + '/' + str(pick['id']) + '_' + str(pick['secret']) + '_z.jpg'
except IndexError:
# there were no results, so the random.choice call failed above. This is OK, we'll just move on.
url = None
image = {}
image['url'] = url
image['noun'] = query
return image
#given a list of sentences, return a list of paragraphs
def graphize(sentences):
grafs = []
currentgraf = ""
counter = 0
target = initTarget()
for sentence in sentences:
currentgraf += sentence
if counter == target:
if DEBUG: print("Ending a graf after " + str(target) + " lines.", file=sys.stderr)
grafs.append(currentgraf)
counter = 0
target = initTarget()
currentgraf = ""
else:
currentgraf += " "
counter += 1
return grafs
def createTitle():
with open("./titles.txt") as f:
titles = f.read()
model = markovify.Text(titles)
title = model.make_short_sentence(MAX_TITLE)
if title is None:
print("No title generated; try futzing with the overlap function.", file=sys.stderr)
return "No title available"
else:
title = title[:1].upper() + title[1:].lower()
return title
#get a list of sentences to make into a speech, then call a utility to format it
def createTalk(seed=None):
with open("./transcripts.txt") as f:
text = f.read()
markovmodel = markovify.Text(text, state_size=MARKOVSTATESIZE)
speech = ""
if DEBUG: print(seed, file=sys.stderr)
if seed:
beginning = seed
words = seed.split()
if len(words) >= MARKOVSTATESIZE:
seed = words[0-MARKOVSTATESIZE]
else:
if len(words) == 1:
seed += " " + seed
else:
seed = None
try:
firstsentence = markovmodel.make_sentence_with_start(beginning=seed)
except KeyError:
firstsentence = None
if firstsentence is None:
print("Seed generated no results. Falling back.", file=sys.stderr)
firstsentence = markovmodel.make_sentence()
else:
firstsentence = beginning + " " + firstsentence
else:
firstsentence = markovmodel.make_sentence()
speechlength = len(firstsentence.split())
sentences = [firstsentence]
while speechlength < 1800:
newsentence = markovmodel.make_sentence()
if (newsentence is not None):
speechlength += len(newsentence.split())
sentences.append(newsentence)
if DEBUG: print(str(speechlength) + " - " + str(len(sentences)), file=sys.stderr)
grafs = graphize(sentences)
slides = []
thumb = None
for graf in grafs:
image = getImage(graf)
if (thumb is None) and (image['url'] is not None):
# assign the first valid image to thumb
thumb = image['url']
slides.append({"image": image, "text": graf})
talk = {}
talk['title'] = createTitle()
talk['slides'] = slides
talk['thumb'] = thumb
return talk
def main():
if len(sys.argv) == 1:
print("TEDBot initiated without seed sentence. To start a speech with a specified opening, try " + sys.argv[0] + " \"Here is my opening sentence\".", file=sys.stderr)
talk = createTalk(None)
else:
if len(sys.argv) > 2:
print("usage: " + sys.argv[0] + "[\"opening sentence is optional\"]", file=sys.stderr)
sys.exit()
else:
talk = createTalk(sys.argv[1])
print(json.dumps(talk))
if __name__ == "__main__":
main()