Skip to content

Commit

Permalink
Added threading for geocoding
Browse files Browse the repository at this point in the history
  • Loading branch information
jsoma committed Feb 23, 2018
1 parent 832fea2 commit 31299b7
Show file tree
Hide file tree
Showing 5 changed files with 85 additions and 163 deletions.
16 changes: 5 additions & 11 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,17 @@

[Download the latest release](https://github.com/jsoma/simple-visual-geocoder/releases/latest)

## Batch geocode CSV files _on your own machine_ using the Census API
## Batch geocode CSV files in a nice visual interface

Geocoding is a pain in the neck. If you're just looking at US addresses, though, Simple Visual Geocoder is here to help! **Latitude and longitude are on the way.**
Geocoding is a pain in the neck. If you're just looking at US addresses, though, Simple Visual Geocoder is here to help! **Latitude and longitude, just for you.**

![](screenshots/main.png)

Simple Visual Geocoder takes in CSV files and geocodes them with the [Census Bureau's Batch Geocoding service](https://www.documentcloud.org/documents/3894452-Census-Geocoding-Services-API.html) - **all without knowing Python or using the command line!**

It's more or less a very thin later on top of the LA Times' [Census Batch Geocoder](https://github.com/datadesk/python-censusbatchgeocoder), with the addition of a few simple tools to help you break out street addresses and city names.

As a fun bonus, you don't need to upload your data into 🌪 THE CLOUD 🌪.
As a fun bonus, you don't need to upload your data into ✨🌪✨ The Cloud ✨🌪✨ (well, the addresses go to the Census Bureau, sure, but not the rest of it).

## Using Simple Visual Geocoder

Expand All @@ -25,13 +25,13 @@ You can download the latest release [from GitHub](https://github.com/jsoma/simpl
1. Click `Browse...` to select your file
2. Pick your columns that contain address, city, state and zipcode (state and zipcode are optional)
3. Click the `Geocode` button and pick a destination for your new geocoded CSV
4. Wait and wait and wait (don't worry, it isn't frozen!)
4. Wait and wait and wait
5. A brand-new CSV file shows up, full of latitudes and longitudes!
6. 🎉🎉🎉

### How long does it take?

It's a mystery to me, but it took a little under ten minutes for the 3000-row `schools.csv` file.
It's a mystery to me, but it took a little under ten minutes for the 3000-row `schools.csv` file, and a little over 1 minute for the 30-row `schools_sample.csv`.

### What platforms does it work on?

Expand Down Expand Up @@ -70,12 +70,6 @@ Try to right-click (control+click) and select **Open**.

You can also go into `System Preferences > Security & Privacy` and change **Allow apps downloaded from:** to be `App Store and identified developers`. You might need to click the lock on the bottom-left corner of the window before it'll let you change anything.

### Help! I told it to geocode and it froze up!

It didn't freeze, it's just busy geocoding for you! It should finish eventually. Just be patient. It takes me about 10 minutes to run 3000.

_At some point it would be nice to implement threads so that this doesn't happen._

### Help! I'm building this app on my own and it's HUGE!

Anaconda on Windows installs versions of numpy & friends that include something called MKL. MKL is big. Anaconda doesn't play nice with `venv` so the only way I could figure out to get around it was to just uninstall Anaconda and install everything from Python.org instead.
Expand Down
94 changes: 80 additions & 14 deletions app/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@
QTableWidget,QTableWidgetItem, QGridLayout, QStyleFactory)
import PyQt5.QtGui
from PyQt5.QtGui import QIcon
from PyQt5.QtCore import Qt
import sys
import os
from PyQt5.QtCore import (Qt, QObject, QRunnable, pyqtSignal,
pyqtSlot, QThreadPool, QTimer)
import os, traceback, sys, signal
import pandas as pd
from collections import OrderedDict
import censusbatchgeocoder
Expand Down Expand Up @@ -39,26 +39,56 @@ def resource_path(path):
current_dir = os.path.dirname(os.path.realpath(__file__))
return os.path.join(current_dir, path)

class WorkerSignals(QObject):
finished = pyqtSignal()
error = pyqtSignal(tuple)
result = pyqtSignal(object)

class GeocodeWorker(QRunnable):
def __init__(self, data):
super(GeocodeWorker, self).__init__()

self.data = data
self.signals = WorkerSignals()

@pyqtSlot()
def run(self):
try:
results = censusbatchgeocoder.geocode(self.data)
except:
traceback.print_exc()
exctype, value = sys.exc_info()[:2]
self.signals.error.emit((exctype, value, traceback.format_exc()))
else:
self.signals.result.emit(results) # Return result
finally:
self.signals.finished.emit() # Done

class VisualCensusGeocoder(QWidget):

def __init__(self):
super().__init__()

self.threadpool = QThreadPool()
self.filename = None

self.wait_timer = QTimer()
self.wait_timer.setInterval(1000)
self.wait_timer.timeout.connect(self.geoWaitTick)

self.setWindowIcon(QIcon(resource_path("worldwide.png")))

self.initUI()



def initUI(self):
vbox = QVBoxLayout()
# File picker

file_picker = QPushButton("Browse...")
file_picker.clicked.connect(self.showFilePicker)
self.file_picker = QPushButton("Browse...")
self.file_picker.clicked.connect(self.showFilePicker)

row = QHBoxLayout()
row.addWidget(file_picker)
row.addWidget(self.file_picker)
self.file_label = QLabel("Please select a CSV file to geocode")
row.addWidget(self.file_label)

Expand Down Expand Up @@ -115,11 +145,11 @@ def initUI(self):
vbox.addLayout(row)

# Geocode button
geo_button = QPushButton("Geocode")
geo_button.clicked.connect(self.doGeocode)
self.geo_button = QPushButton("Geocode")
self.geo_button.clicked.connect(self.startGeocode)

row = QHBoxLayout()
row.addWidget(geo_button)
row.addWidget(self.geo_button)

vbox.addLayout(row)

Expand Down Expand Up @@ -150,15 +180,46 @@ def prepareForGeocoding(self, row):
data[key] = ""
return data

def doGeocode(self):
def formEnabled(self, state):
self.file_picker.setEnabled(state)
self.geo_button.setEnabled(state)
for key in self.field_names:
self.fields[key]['combo'].setEnabled(state)
self.fields[key]['adjustment'].setEnabled(state)

def geoWaitStart(self):
self.formEnabled(False)
self.tick_counter = 0
self.wait_timer.start()

def geoWaitEnd(self):
self.wait_timer.stop()
self.geo_button.setText("Geocode")
self.formEnabled(True)

def geoWaitTick(self):
self.tick_counter = self.tick_counter + 1
timestring = "%im%is" % (self.tick_counter / 60, self.tick_counter % 60)
self.geo_button.setText(
"Processing %s rows: %s" % (len(self.df), timestring)
)

def startGeocode(self):
self.pickTargetFilename()

if not self.target_filename:
return

self.geoWaitStart()

prepared_data = self.df.fillna("").apply(self.prepareForGeocoding, axis=1)
results = censusbatchgeocoder.geocode(prepared_data)

worker = GeocodeWorker(prepared_data)
# Hook into signals
worker.signals.result.connect(self.processGeocodeResult)
self.threadpool.start(worker)

def processGeocodeResult(self, results):
# Combine geodata with original dataframe, same results
results_df = pd.DataFrame(results)
self.df.merge(results_df,
Expand All @@ -177,6 +238,8 @@ def doGeocode(self):
except:
pass

self.geoWaitEnd()

def updateColumn(self, field):
colnum = field['column']
data_col_name = field['combo'].currentText()
Expand All @@ -197,7 +260,8 @@ def comboChanged(self):
self.updateColumn(self.fields[key])

def updateComboBoxes(self):
options = ['-'] + list(self.df.columns.values) + ['custom', 'none']
# options = ['-'] + list(self.df.columns.values) + ['custom', 'none']
options = ['-'] + list(self.df.columns.values)
for index, key in enumerate(self.field_names):
combo = self.fields[key]['combo']
combo.clear()
Expand Down Expand Up @@ -238,6 +302,8 @@ def onActivated(self, text):


if __name__ == '__main__':
signal.signal(signal.SIGINT, signal.SIG_DFL)

app = QApplication(sys.argv)
if 'windowsvista' in QStyleFactory.keys():
app.setStyle(QStyleFactory.create('windowsvista'))
Expand Down
Binary file modified release/SimpleVisualGeocoder.OSX.zip
Binary file not shown.
Loading

0 comments on commit 31299b7

Please sign in to comment.