Skip to content
This repository has been archived by the owner on Feb 1, 2022. It is now read-only.

Created Workers #3

Draft
wants to merge 8 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 79 additions & 0 deletions workers/DatabaseConnection.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
import psycopg2
import requests
import os
import worker
from dotenv import load_dotenv
load_dotenv()

DB_NAME = os.getenv('DB_NAME')
DB_HOST = os.getenv('DB_HOST')
DB_PORT = os.getenv('DB_PORT')
USERNAME = os.getenv('USERNAME')
PASSWORD = os.getenv('PASSWORD')

class DatabaseConnection:
def __init__(self) :
try :
#connect to postgres server
self.connection = psycopg2.connect(database = DB_NAME , user = USERNAME , password = PASSWORD , host = DB_HOST , port = DB_PORT )
self.connection.autocommit = True
self.cursor = self.connection.cursor()
print('CONNECTED!')
except :
print('connection unsuccessful')

def twitter_conn(self) :
base_url = 'https://api.twitter.com/'
search_url = '{}1.1/search/tweets.json'.format(base_url)
access_token = os.getenv('BEARER_TOKEN')

search_headers = {
'Authorization': 'Bearer {}'.format(access_token)
}

search_params = {
'q': 'Makeup',
'result_type': 'recent',
'count': 3
}

#Send request to Twitter API
search_resp = requests.get(search_url, headers=search_headers, params=search_params)

return search_resp

def data_get_and_insert(self, search_resp) :

#Postgres query and params
sql_query = """INSERT INTO tweets(name, text, time) VALUES ( %s, %s, %s) RETURNING id ;"""
id = None

if search_resp.status_code == 200 :

#convert text into a data object (dictionary) for Python
tweet_data = search_resp.json()

#Get data from Twitter API
for tweet in tweet_data["statuses"] :
text = tweet['text']
time = tweet['created_at']
name = tweet['user']['screen_name']
print('FIRST DATA' , text +'\n'+ time +'\n'+ name )

#Insert data into postgres
self.cursor.execute(sql_query , (name, text, time))
id = self.cursor.fetchone()[0]
print('SECOND DATA' , text +'\n'+ time +'\n'+ name )

#Postgres commit and close cursor and connection
self.connection.commit()
self.cursor.close()
self.connection.close()
else :
print('Result for' , search_url , 'is unsuccesful')

if __name__ == '__main__' :
db = DatabaseCon()
db.twitter_conn()
search_resp = db.twitter_conn()
db.data_get_and_insert(search_resp)
67 changes: 67 additions & 0 deletions workers/ORMConnection.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
import sqlalchemy as db
import psycopg2
import os
import requests
from dotenv import load_dotenv
load_dotenv()

URL_ADDON = os.getenv('DATABASE_URL_ADDON')

def orm_config() :
#ORM connection
engine = db.create_engine('postgresql+psycopg2://{}'.format(URL_ADDON))
connection = engine.connect()
metadata = db.MetaData()
tweets = db.Table('tweets', metadata , autoload=True , autoload_with=engine)


return metadata, tweets, connection

def twitter_conn() :
base_url = 'https://api.twitter.com/'
search_url = '{}1.1/search/tweets.json'.format(base_url)
access_token = os.getenv('BEARER_TOKEN')

search_headers = {
'Authorization': 'Bearer {}'.format(access_token)
}

search_params = {
'q': 'Makeup',
'result_type': 'recent',
'count': 10
}

#Send request to Twitter API
search_resp = requests.get(search_url, headers=search_headers, params=search_params)

return search_resp

def data_get_and_insert(search_resp, metadata, tweets, connection) :

id = None
print(search_resp.status_code)
if search_resp.status_code == 200 :

#convert text into a data object (dictionary) for Python
tweet_data = search_resp.json()

#Get data from Twitter API
for tweet in tweet_data["statuses"] :
text = tweet['text']
time = tweet['created_at']
name = tweet['user']['screen_name']
print('FIRST DATA' , text +'\n'+ time +'\n'+ name + '\n')

#ORM query
query = tweets.insert().values(name = name , text = text , time = time).returning(tweets.c.id)
result = connection.execute(query)

else :
print('Request unsuccesful')
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Dont commit changes in ur py file when the commit is about ENV vars in README


if __name__ == '__main__':
orm_config()
metadata, tweets, connection = orm_config()
search_resp = twitter_conn()
data_get_and_insert(search_resp, metadata, tweets, connection)
15 changes: 15 additions & 0 deletions workers/Pipfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
[[source]]
name = "pypi"
url = "https://pypi.org/simple"
verify_ssl = true

[dev-packages]

[packages]
requests = "*"
psycopg2 = "*"
python-dotenv = "*"
sqlalchemy = "*"

[requires]
python_version = "3.8"
91 changes: 91 additions & 0 deletions workers/Pipfile.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

85 changes: 85 additions & 0 deletions workers/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
# Worker

## Installation

Ensure that pip and python is installed

install pipenv for dependencies management. List of dependencies will be listed on Pipfile. Make sure the pipenv path is added to the system.
```sh
pip install --user pipenv
```

install Requests library for HTTP request
```sh
pipenv install requests
```

install psycopg2 for library to access PostgreSQL Database
```sh
pipenv install psycopg2
```

install python-dotenv to add environment variables into the app
```sh
pipenv install python-dotenv
```

install SQLAlchemy for ORM database
```sh
pipenv install SQLAlchemy
```

## Authentication for Twitter
Create an account followed by application creation on Twitter Developer Account. More information can be found in https://developer.twitter.com/en/account/get-started
Get your application tokens at "Keys and Access Tokens"
- consumer key
- consumer secret key
- access token
- access token secret

Bearer Token can be generated from get_bearer_token.py
Bearer Token is required to use the Request lib for Twitter API

## Fetch Data from API
- Insert the Bearer Token into worker.py
- Insert params for the Twitter API
Info about the endpoint of Twitter API https://developer.twitter.com/en/docs/tweets/search/api-reference/get-search-tweets#

## Environment variables set up
- Create a .env file and input the variables
- Call dotenv library and os to get the desired variables
variables description :
1. BEARER_TOKEN
Token that consist of all kinds of character.
OAuth 2.0 generated from Account Token and Account Token Secret from Twitter developer page, Once these two tokens are available, use get_bearer_token.py to generate Bearer Token for the account
2. ACC_TOKEN
Token that consist of all kinds of character.
This token can be generated after user create an account and a new aplication in the developer page.
3. ACC_TOKEN_SECRET
Token that consist of all kinds of character.
This token can be generated after user create an account and a new aplication in the developer page.
4. USERNAME
consist of string/int that the user created in PostgreSQL
5. PASSWORD
consist of string/int that the user created in PostgreSQL
6. DB_NAME
consist of string/int that the user created in PostgreSQL
7. DB_PORT and DB_HOST
default value in PostgreSQL are DB_PORT = 5432 , DB_HOST = localhost
8. DATABASE_URL_ADDON
an URL for ORM endpoint of PostgreSQL that consist of
USERNAME:PASSWORD@HOST:PORT/DATABASE_NAME

## Connect to PostgreSQL server and queries
- start postgres server with psql -U <user> <database_name>
- insert all the params to psycopg2.connect
- use INSERT query to insert data each time it is fetched
- commit and close connection and its cursor

## ORM with SQLAlchemy setup and usage
SQLAlchemy is an Object-Relation Mapper that interact with database with Python programming language
- set up the engine and connection with the URL (with params to Postgres)
- keep all database information into Metadata object
- query with Python languange. More info can be found here https://docs.sqlalchemy.org/en/13/core/tutorial.html#connecting


36 changes: 36 additions & 0 deletions workers/get_bearer_token.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import base64
import requests
import urllib.parse

OAUTH2_TOKEN = 'https://api.twitter.com/oauth2/token'


def get_bearer_token(consumer_key, consumer_secret):
# enconde consumer key
consumer_key = urllib.parse.quote(consumer_key)
# encode consumer secret
consumer_secret = urllib.parse.quote(consumer_secret)
# create bearer token
bearer_token = consumer_key + ':' + consumer_secret
# base64 encode the token
base64_encoded_bearer_token = base64.b64encode(bearer_token.encode('utf-8'))
# set headers
headers = {
"Authorization": "Basic " + base64_encoded_bearer_token.decode('utf-8') + "",
"Content-Type": "application/x-www-form-urlencoded;charset=UTF-8",
"Content-Length": "29"}

response = requests.post(OAUTH2_TOKEN, headers=headers, data={'grant_type': 'client_credentials'})
to_json = response.json()
print("token_type = %s\naccess_token = %s" % (to_json['token_type'], to_json['access_token']))


def main():
consumer_key = 'Enter your consumer key'
consumer_secret = 'Enter your consumer secret'
print("***** ***** ***** *****")
get_bearer_token(consumer_key, consumer_secret)


if __name__ == "__main__":
main()