Skip to content
This repository has been archived by the owner on Jun 23, 2024. It is now read-only.

Commit

Permalink
initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
quiver committed Jan 25, 2018
1 parent 21a36f6 commit dc1bf04
Show file tree
Hide file tree
Showing 4 changed files with 163 additions and 0 deletions.
58 changes: 58 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,60 @@
# athena-dl
CLI to query to Amazon Athena and save its results

# Usage

First create SQL files to query against Amazon athena.

```
$ cat foo.sql
SELECT count(*)
FROM elb_logs
$ cat bar.sql
SELECT user_agent
,count(*)
FROM elb_logs
GROUP BY user_agent
```
Then pass
- S3 bucket for Athena (Athena uses `aws-athena-query-results-<AWS-ACCOUNT_ID>-<REGION>` as its default S3 bucket name)
- database name
- SQL files
```
$ athena-dl --help
Usage: athena-dl [OPTIONS] [ARGS]...
Options:
--s3bucket TEXT athena log bucket [required]
--database TEXT athena database name [required]
--help Show this message and exit.
$ athena-dl \
--s3bucket aws-athena-query-results-123456789012-us-west-1 \
--database ec-prd \
foo.sql bar.sql
```
Check its results
```
$ ls -1
athena.log # application log
bar.sql # sql file
bar.sql.csv # query result for bar.sql
bar.sql.log # AWS API response for bar.sql
...
```


# Installation

## install via pip
```
$ pip install athena-dl
```
## install from GitHub

```
$ git clone https://github.com/quiver/athena-dl.git
$ cd athena-dl
$ pip install --editable .
```
61 changes: 61 additions & 0 deletions athena_batch_query.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
import logging
import pprint
import sys
import traceback

import boto3
from retrying import retry

athena = boto3.client('athena')
s3 = boto3.resource('s3')

@retry(stop_max_attempt_number = 10,
wait_exponential_multiplier = 30 * 1000,
wait_exponential_max = 10 * 60 * 1000)
def poll_status(_id):
'''
poll query status
'''
result = athena.get_query_execution(
QueryExecutionId = _id
)

logging.info(pprint.pformat(result['QueryExecution']))
state = result['QueryExecution']['Status']['State']
if state == 'SUCCEEDED':
return result
elif state == 'FAILED':
return result
else:
raise Exception

def query_to_athena(s3bucket, database, sqlfile):
logging.info("sqlfile:" + sqlfile)
sql = open(sqlfile, 'r').read()
logging.info("SQL:" + sql)
result = athena.start_query_execution(
QueryString = sql,
QueryExecutionContext = {
'Database': database
},
ResultConfiguration = {
'OutputLocation': 's3://' + s3bucket,
}
)

logging.info(pprint.pformat(result))

QueryExecutionId = result['QueryExecutionId']
result = poll_status(QueryExecutionId)

# save response
with open(sqlfile + '.log', 'w') as f:
f.write(pprint.pformat(result, indent = 4))

# save query result from S3
if result['QueryExecution']['Status']['State'] == 'SUCCEEDED':
s3_key = QueryExecutionId + '.csv'
query_result = sqlfile + '.csv'
s3.Bucket(s3bucket).download_file(s3_key, query_result)

logging.info('FINISHED')
26 changes: 26 additions & 0 deletions athena_dl.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#!/usr/bin/env python
# vim: set fileencoding=utf8 :

import logging

import click
import athena_batch_query

logging.basicConfig(filename = 'athena.log',
filemode = 'a',
level = logging.INFO,
format = '%(levelname)s:%(asctime)s:%(message)s'
)

@click.command()
@click.option('--s3bucket', help='athena log bucket', required=True)
@click.option('--database', help='athena database name', required=True)
@click.argument('args', nargs=-1)
def cli(s3bucket, database, args):
logging.info("s3bucket:" + s3bucket)
logging.info("database:" + database)
for arg in args:
try:
athena_batch_query.query_to_athena(s3bucket, database, arg)
except Exception, err:
logging.error(err, exc_info=True)
18 changes: 18 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#!/usr/bin/env python
# vim: set fileencoding=utf8 :

from setuptools import setup

setup(
name='Athena-DL',
version='0.1',
py_modules=['athena'],
install_requires=[
'Click',
'retry',
],
entry_points='''
[console_scripts]
athena-dl=athena_dl:cli
'''
)

0 comments on commit dc1bf04

Please sign in to comment.