-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit dfd12e2
Showing
10 changed files
with
3,574 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,106 @@ | ||
.DS_Store | ||
|
||
# Created by https://www.gitignore.io/api/python | ||
|
||
### Python ### | ||
# Byte-compiled / optimized / DLL files | ||
__pycache__/ | ||
*.py[cod] | ||
*$py.class | ||
|
||
# C extensions | ||
*.so | ||
|
||
# Distribution / packaging | ||
.Python | ||
build/ | ||
develop-eggs/ | ||
dist/ | ||
downloads/ | ||
eggs/ | ||
.eggs/ | ||
lib/ | ||
lib64/ | ||
parts/ | ||
sdist/ | ||
var/ | ||
wheels/ | ||
*.egg-info/ | ||
.installed.cfg | ||
*.egg | ||
|
||
# PyInstaller | ||
# Usually these files are written by a python script from a template | ||
# before PyInstaller builds the exe, so as to inject date/other infos into it. | ||
*.manifest | ||
*.spec | ||
|
||
# Installer logs | ||
pip-log.txt | ||
pip-delete-this-directory.txt | ||
|
||
# Unit test / coverage reports | ||
htmlcov/ | ||
.tox/ | ||
.coverage | ||
.coverage.* | ||
.cache | ||
.pytest_cache/ | ||
nosetests.xml | ||
coverage.xml | ||
*.cover | ||
.hypothesis/ | ||
|
||
# Translations | ||
*.mo | ||
*.pot | ||
|
||
# Flask stuff: | ||
instance/ | ||
.webassets-cache | ||
|
||
# Scrapy stuff: | ||
.scrapy | ||
|
||
# Sphinx documentation | ||
docs/_build/ | ||
|
||
# PyBuilder | ||
target/ | ||
|
||
# Jupyter Notebook | ||
.ipynb_checkpoints | ||
|
||
# pyenv | ||
.python-version | ||
|
||
# celery beat schedule file | ||
celerybeat-schedule.* | ||
|
||
# SageMath parsed files | ||
*.sage.py | ||
|
||
# Environments | ||
.env | ||
.venv | ||
env/ | ||
venv/ | ||
ENV/ | ||
env.bak/ | ||
venv.bak/ | ||
|
||
# Spyder project settings | ||
.spyderproject | ||
.spyproject | ||
|
||
# Rope project settings | ||
.ropeproject | ||
|
||
# mkdocs documentation | ||
/site | ||
|
||
# mypy | ||
.mypy_cache/ | ||
|
||
|
||
# End of https://www.gitignore.io/api/python |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
# Simple Visual Geocoder | ||
|
||
[Download from GitHub](https://github.com/jsoma/simple-visual-geocoder/releases/download/v0.1/SimpleVisualGeocoder.zip) | ||
|
||
## Batch geocode CSV files _on your own machine_ using the Census API | ||
|
||
Geocoding is a pain in the neck. If you're just looking at US addresses, though, Simple Visual Geocoder is here to help! **Latitude and longitude are on the way.** | ||
|
||
![](screenshots/main.png) | ||
|
||
Simple Visual Geocoder takes in CSV files and geocodes them with the [Census Bureau's Batch Geocoding service](https://www.documentcloud.org/documents/3894452-Census-Geocoding-Services-API.html) - **all without knowing Python or using the command line!** | ||
|
||
It's more or less a very thin later on top of the LA Times' [Census Batch Geocoder](https://github.com/datadesk/python-censusbatchgeocoder), with the addition of a few simple tools to help you break out street addresses and city names. | ||
|
||
As a fun bonus, you don't need to upload your data into 🌪 THE CLOUD 🌪. | ||
|
||
## Using Simple Visual Geocoder | ||
|
||
### Downloading | ||
|
||
You can download the latest release [from GitHub](https://github.com/jsoma/simple-visual-geocoder/releases/download/v0.1/SimpleVisualGeocoder.zip). | ||
|
||
### Simple How-To | ||
|
||
1. Click `Browse...` to select your file | ||
2. Pick your columns that contain address, city, state and zipcode (state and zipcode are optional) | ||
3. Click `Geocode` button, pick a destination for your new geocoded data | ||
4. Wait and wait and wait (don't worry, it isn't frozen!) | ||
5. A brand-new CSV file shows up, full of latitudes and longitudes! | ||
6. 🎉🎉🎉 | ||
|
||
### Cleaning your data with "Adjustments" | ||
|
||
The Census Bureau requires you do split `address`, `city`, `state`, and `zipcode` into separate fields. But what if your addresses aren all in one column and look like this? | ||
|
||
``` | ||
540 Streetsway Avenue, Townsville, NC | ||
101 Rock and Roll St, Apt 3, Guitar City, NV | ||
``` | ||
|
||
Oh no! Do we need to split it apart? Do we need to get rid of that apartment? No! No! **Don't worry, Simple Visual Geocoder is here!** It has built-in data cleaning tools called Adjustments to make splitting long addresses up easy. | ||
|
||
When you pick your column for `address` you can add an Adjustment: "Split up the _big long full address_ according to the commas, and only give me the _first section_." That would give us `540 Streetsway Avenue ` and `101 Rock and Roll St`. | ||
|
||
You can also pluck out the states from the end by saying "Split it up by commas and give me the _last_ piece." `NC` and `NV`, delivered to your doorstep. | ||
|
||
There are options to use this on both commas as well as newlines/linebreaks. | ||
|
||
## Troubleshooting | ||
|
||
### I think maybe it only works on OS X? | ||
|
||
But it shouldn't be tough to make it work on PCs, too. | ||
|
||
### Help! I told it to geocode and it froze up! | ||
|
||
It didn't freeze, it's just busy geocoding for you! It should finish eventually. Just be patient. Very, very patient. | ||
|
||
_At some point it would be nice to implement threads so that this doesn't happen._ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,223 @@ | ||
from parsedatetime.pdt_locales import ( | ||
de_DE, en_AU, en_US, | ||
es, nl_NL, pt_BR, | ||
ru_RU, fr_FR) | ||
|
||
from PyQt5.QtWidgets import (QMainWindow, QTextEdit, | ||
QAction, QFileDialog, QApplication, QWidget, QLabel, | ||
QComboBox, QApplication, QHBoxLayout, QVBoxLayout, QPushButton, | ||
QTableWidget,QTableWidgetItem, QGridLayout) | ||
from PyQt5.QtGui import QIcon | ||
from PyQt5.QtCore import Qt | ||
import sys | ||
import os | ||
import pandas as pd | ||
from collections import OrderedDict | ||
import censusbatchgeocoder | ||
import traceback | ||
|
||
ROW_COUNT = 20 | ||
|
||
ADJUSTMENTS = OrderedDict() | ||
ADJUSTMENTS["None"] = lambda x: x | ||
ADJUSTMENTS["Comma split: first section"] = lambda x: x.split(",")[0] | ||
ADJUSTMENTS["Comma split: second"] = lambda x: x.split(",")[1] | ||
ADJUSTMENTS["Comma split: third"] = lambda x: x.split(",")[2] | ||
ADJUSTMENTS["Comma split: second-to-last"] = lambda x: x.split(",")[-2] | ||
ADJUSTMENTS["Comma split: last"] = lambda x: x.split(",")[-1] | ||
ADJUSTMENTS["Newline split: first section"] = lambda x: x.split("\n")[0] | ||
ADJUSTMENTS["Newline split: second"] = lambda x: x.split("\n")[1] | ||
ADJUSTMENTS["Newline split: third"] = lambda x: x.split("\n")[2] | ||
ADJUSTMENTS["Newline split: second-to-last"] = lambda x: x.split("\n")[-2] | ||
ADJUSTMENTS["Newline split: last"] = lambda x: x.split("\n")[-1] | ||
|
||
class VisualCensusGeocoder(QWidget): | ||
|
||
def __init__(self): | ||
super().__init__() | ||
|
||
self.filename = None | ||
|
||
self.initUI() | ||
|
||
|
||
def initUI(self): | ||
vbox = QVBoxLayout() | ||
|
||
# File picker | ||
|
||
file_picker = QPushButton("Browse...") | ||
file_picker.clicked.connect(self.showFilePicker) | ||
|
||
row = QHBoxLayout() | ||
row.addWidget(file_picker) | ||
self.file_label = QLabel("Please select a CSV file to geocode") | ||
row.addWidget(self.file_label) | ||
|
||
row.addStretch(1) | ||
|
||
vbox.addLayout(row) | ||
|
||
# Not using .keys() to keep them in order when drawing | ||
self.field_names = ['address', 'city', 'state', 'zipcode'] | ||
self.fields = { | ||
'address': { 'title': 'Address', 'column': 0 }, | ||
'city': { 'title': 'City', 'column': 1 }, | ||
'state': { 'title': 'State', 'column': 2 }, | ||
'zipcode': { 'title': 'Zipcode', 'column': 3 } | ||
} | ||
|
||
for key in self.field_names: | ||
field_selector = QComboBox(self) | ||
field_selector.currentIndexChanged.connect(self.comboChanged) | ||
self.fields[key]['combo'] = field_selector | ||
|
||
adjustment_selector = QComboBox(self) | ||
adjustment_selector.addItems(ADJUSTMENTS.keys()) | ||
adjustment_selector.currentIndexChanged.connect(self.comboChanged) | ||
self.fields[key]['adjustment'] = adjustment_selector | ||
|
||
self.fields[key]['sample'] = QLabel("") | ||
|
||
grid = QGridLayout() | ||
|
||
grid.addWidget(QLabel("<strong>Column Name</strong>"), 0, 1, 1, 2) | ||
grid.addWidget(QLabel("<strong>Adjustments</strong>"), 0, 3, 1, 2) | ||
grid.addWidget(QLabel("<strong>Column Sample</strong>"), 0, 5, 1, 2) | ||
|
||
for i, key in enumerate(self.field_names): | ||
field = self.fields[key] | ||
grid.addWidget(QLabel(field['title']), i+1, 0, 1, 1, alignment=Qt.AlignRight) | ||
grid.addWidget(field['combo'], i+1, 1, 1, 2) | ||
grid.addWidget(field['adjustment'], i+1, 3, 1, 2) | ||
grid.addWidget(field['sample'], i+1, 5, 1, 2) | ||
vbox.addLayout(grid) | ||
|
||
# Preview | ||
vbox.addWidget(QLabel("<strong>API Preview</strong>")) | ||
|
||
self.table = QTableWidget() | ||
self.table.setRowCount(ROW_COUNT) | ||
self.table.setColumnCount(4) | ||
self.table.setHorizontalHeaderLabels(["Address", "City", "State", "ZIP"]) | ||
self.table.verticalHeader().setVisible(False) | ||
|
||
row = QHBoxLayout() | ||
row.addWidget(self.table) | ||
vbox.addLayout(row) | ||
|
||
# Geocode button | ||
geo_button = QPushButton("Geocode") | ||
geo_button.clicked.connect(self.doGeocode) | ||
|
||
row = QHBoxLayout() | ||
row.addWidget(geo_button) | ||
|
||
vbox.addLayout(row) | ||
|
||
self.setLayout(vbox) | ||
|
||
self.setGeometry(300, 300, 750, 500) | ||
self.setWindowTitle('Visual Census Geocoder') | ||
self.table.setMinimumSectionSize = 150 | ||
self.table.horizontalHeader().setSectionResizeMode(0, 1) | ||
self.table.horizontalHeader().setSectionResizeMode(1, 1) | ||
self.table.horizontalHeader().setSectionResizeMode(2, 2) | ||
self.table.horizontalHeader().setSectionResizeMode(3, 2) | ||
self.show() | ||
|
||
def prepareForGeocoding(self, row): | ||
data = {} | ||
data['id'] = row.name | ||
|
||
for key in self.field_names: | ||
try: | ||
data_col_name = self.fields[key]['combo'].currentText() | ||
adjustment_name = self.fields[key]['adjustment'].currentText() | ||
adjustment = ADJUSTMENTS[adjustment_name] | ||
val = adjustment(row[data_col_name]) | ||
|
||
data[key] = val | ||
except: | ||
data[key] = "" | ||
return data | ||
|
||
def doGeocode(self): | ||
self.pickTargetFilename() | ||
|
||
if self.target_filename: | ||
for_geocoding = self.df.fillna("").apply(self.prepareForGeocoding, axis=1) | ||
results = censusbatchgeocoder.geocode(for_geocoding) | ||
results_df = pd.DataFrame(results) | ||
self.df.merge(results_df, | ||
left_index=True, | ||
right_on='id', | ||
suffixes=("","_geo") | ||
).to_csv(self.target_filename) | ||
os.system("open -R \"%s\"" % self.target_filename) | ||
|
||
def updateColumn(self, field): | ||
colnum = field['column'] | ||
data_col_name = field['combo'].currentText() | ||
adjustment_name = field['adjustment'].currentText() | ||
|
||
for index, row in self.df.head(ROW_COUNT).fillna("").iterrows(): | ||
try: | ||
adjustment = ADJUSTMENTS[adjustment_name] | ||
val = adjustment(row[data_col_name]) | ||
if index == 0: | ||
field['sample'].setText("<small>" + row[data_col_name] + "</small>") | ||
self.table.setItem(index, colnum, QTableWidgetItem(val)) | ||
except: | ||
self.table.setItem(index, colnum, QTableWidgetItem("")) | ||
|
||
def comboChanged(self): | ||
for key in self.field_names: | ||
self.updateColumn(self.fields[key]) | ||
|
||
def updateComboBoxes(self): | ||
options = ['-'] + list(self.df.columns.values) + ['custom', 'none'] | ||
for index, key in enumerate(self.field_names): | ||
combo = self.fields[key]['combo'] | ||
combo.clear() | ||
combo.addItems(options) | ||
self.fields[key]['adjustment'].setCurrentIndex(0) | ||
|
||
def updateTable(self): | ||
rows = df.head() | ||
|
||
def pickTargetFilename(self): | ||
fname = QFileDialog.getSaveFileName(self, | ||
'Select output filename', | ||
self.filename.replace(".csv", "-geocoded.csv")) | ||
|
||
if fname[0]: | ||
self.target_filename = fname[0] | ||
else: | ||
self.target_filename = None | ||
|
||
def showFilePicker(self): | ||
fname = QFileDialog.getOpenFileName(self, | ||
'Select file') | ||
|
||
if fname[0]: | ||
self.filename = fname[0] | ||
self.df = pd.read_csv(self.filename, dtype='str') | ||
self.file_label.setText(self.filename) | ||
self.updateComboBoxes() | ||
|
||
# with f: | ||
# data = f.read() | ||
# self.textEdit.setText(data) | ||
|
||
def onActivated(self, text): | ||
|
||
self.lbl.setText(text) | ||
self.lbl.adjustSize() | ||
|
||
|
||
if __name__ == '__main__': | ||
|
||
app = QApplication(sys.argv) | ||
ex = VisualCensusGeocoder() | ||
sys.exit(app.exec_()) |
Oops, something went wrong.