-
Notifications
You must be signed in to change notification settings - Fork 2
/
gettweets.py
152 lines (114 loc) · 4.76 KB
/
gettweets.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
import requests
from requests_oauthlib import OAuth1
import json
import urllib
import MySQLdb as mdb
import sys
from datetime import datetime, timedelta
import calendar
# Let's define consumer and access keys and secrets for getting access to Twitter API through your application
consumer_key = 'ex8R74QkZc2dSMnqa23UVCo00'
consumer_secret = 'w5eyduTNznYqEwyNy2hpuYNlrfrNGF3aSLPR9LETN1UKmNtx2H'
access_token = '796895774449733634-2HDV8yzn7BgfH5jM5kCVCdCQLAsKFHl'
access_secret = 'FUSPKORsxwdrlfDUWyr6fHXHK0DchqYXSNHMsVi8DoBo7'
# You will authenticate yourself using OAuth1 object
auth = OAuth1(consumer_key, consumer_secret, access_token, access_secret)
print(auth)
# If the authentication was successful, you should see the name of the account print(out)
url_1 = 'https://api.twitter.com/1.1/account/verify_credentials.json'
res = requests.get(url_1, auth=auth)
print("My name is", res.json()["name"])
query = '-RT "the internet is"'
encoded_query = urllib.parse.quote(query)
params = {
"count": 100,
"lang": 'en',
"q": encoded_query
}
url_2 = 'https://api.twitter.com/1.1/search/tweets.json'
res = requests.get(url_2, auth=auth, params=params)
print(res, res.status_code, res.headers['content-type'])
print(res.url)
tweets = res.json()
con = mdb.connect(host = 'localhost',
user = 'root',
passwd = 'dwdstudent2015',
charset = 'utf8', use_unicode=True);
def createDatabase():
# Create the tweets database
db_name = 'tweets_v8'
create_db_query = "CREATE DATABASE IF NOT EXISTS {db} DEFAULT CHARACTER SET 'utf8'".format(db=db_name)
# Create a database
cursor = con.cursor()
cursor.execute(create_db_query)
cursor.close()
def createTable_tweets():
cursor = con.cursor()
db_name = 'tweets_v8'
table_name = 'tweets'
# Create a table to store tweets:
create_table_query = '''CREATE TABLE IF NOT EXISTS {db}.{table}
(
primary_id varchar(250),
text varchar(250),
PRIMARY KEY(primary_id)
)'''.format(db=db_name, table=table_name)
cursor.execute(create_table_query)
cursor.close()
def createTable_metadata():
cursor = con.cursor()
db_name = 'tweets_v8'
table_name = 'metadata'
create_table_query = '''CREATE TABLE IF NOT EXISTS {db}.{table}
(
primary_id varchar(250),
tweet_id varchar(250),
author varchar(250),
date datetime,
location varchar(250),
PRIMARY KEY(primary_id),
FOREIGN KEY(primary_id)
REFERENCES {db}.tweets(primary_id)
)'''.format(db=db_name, table=table_name)
cursor.execute(create_table_query)
cursor.close()
def insertTweets(timestamp):
query_template = '''INSERT IGNORE INTO tweets_v8.tweets(primary_id, text)
VALUES (%s, %s)'''
cursor = con.cursor()
timestamp = timestamp
for num, tweet in enumerate(tweets['statuses']):
primary_id = '-'.join([str(tweet['id']), str(timestamp)])
text = tweet['text']
query_parameters = (primary_id, text)
cursor.execute(query_template, query_parameters)
con.commit()
cursor.close()
def insertMetadata(timestamp):
query_template = '''INSERT IGNORE INTO tweets_v8.metadata(primary_id, tweet_id, author, date, location)
VALUES (%s, %s, %s, %s, %s)'''
cursor = con.cursor()
timestamp = timestamp
for num, tweet in enumerate(tweets['statuses']):
# Converting the date to EST timezone -- 'hacky' approximation:
date_str = tweet['created_at']
date = datetime.strptime(date_str, '%a %b %d %H:%M:%S %z %Y')
date = date - timedelta(hours=5)
new_date = date.strftime('%Y-%m-%d %H:%M:%S')
primary_id = '-'.join([str(tweet['id']), str(timestamp)])
tweet_id = tweet['id']
author = tweet['user']['screen_name']
date = new_date
location = tweet['user']['location']
query_parameters = (primary_id, tweet_id, author, date, location)
cursor.execute(query_template, query_parameters)
con.commit()
cursor.close()
# Current timestamp for the second part of the primary_id:
now = datetime.now()
timestamp = calendar.timegm(now.utctimetuple())
createDatabase()
createTable_tweets()
insertTweets(timestamp)
createTable_metadata()
insertMetadata(timestamp)