-
Notifications
You must be signed in to change notification settings - Fork 3
/
Choropleth-bangladesh.py
192 lines (143 loc) · 4.73 KB
/
Choropleth-bangladesh.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
# To add a new cell, type '# %%'
# To add a new markdown cell, type '# %% [markdown]'
# %% [markdown]
# # Plotting Choropleth Bangladesh Map using Python
# ---
# A choropleth map is a type of thematic map in which areas are shaded or patterned in proportion to a statistical variable that represents an aggregate summary of a geographic characteristic within each area, such as population density or per-capita income.
#
# %% [markdown]
# Load all districts from geojson file
# %%
from json import load
bd_districts=load(open('bangladesh_geojson_adm2_64_districts_zillas.json','r'))
# %% [markdown]
# Lets check all keys available in geojson file
# %%
bd_districts['features'][61].keys()
# %%
bd_districts["features"][61]['properties']
# %% [markdown]
# To get population info from wikipedia we can use pandas read_html module
# %%
import pandas as pd
dfs= pd.read_html('https://en.wikipedia.org/wiki/Districts_of_Bangladesh')
# %% [markdown]
# In the website there are many tables available. These are stored in list format. We can check the lenth and by selecting the correct index we can get our required table.
# %%
print("No of lists are:",len(dfs))
# %% [markdown]
# Store data as csv format for offline use
# %%
for i in range(len(dfs)):
a = "Population (thousands)[28]" in dfs[i]
if a == True:
df=dfs[i].to_csv("Districts_of_Bangladesh.csv")
# %% [markdown]
# Storing csv data into a dataframe
# %%
df=pd.read_csv("Districts_of_Bangladesh.csv")
# %% [markdown]
# Checking dataframe head
# %%
df.head()
# %% [markdown]
# Removing District string from each row as geojson data do not have this district level after each district name.
# %%
df.District
# %%
df.District = df.District.apply(lambda x: x.replace(" District",""))
# %%
df.District
# %% [markdown]
# Now it is the time to map this dataframe with geojson file. For this we have to Index the district name for each dataframe. However, we can label a specific id for each district.
# %%
district_id_map = {}
for feature in bd_districts["features"]:
feature["id"] = feature["id"]
district_id_map[feature["properties"]["ADM2_EN"]] = feature["id"]
# %%
district_id_map
# %% [markdown]
# Merge both dataframe according to id
# %%
df['id'] = df.District.apply(lambda x: district_id_map[x])
# %% [markdown]
# Now we can see an id column in the dataframe
# %%
df.head()
# %% [markdown]
# Renaming columns for looking good
# %%
df = df.rename(columns={
'Population (thousands)[28]' : 'Population (thousands)',
'Area (km2)[28]' : 'Area (km2)' })
# %% [markdown]
# A bar plot can be used to show population level in each district
# %%
import numpy as np
from matplotlib import cm
import matplotlib.pyplot as plt
color = cm.inferno_r(np.linspace(.3, .7, 64))
df = df.set_index('District')
fig = plt.figure(figsize=(20,10)).add_subplot(1,1,1)
fig.bar(df.index, df["Population (thousands)"],color=color)
fig.set_xticklabels(df.index,
rotation=90,
fontsize='7',
)
fig.set_title("Population level in each district")
fig.set_ylabel('Population (thousands)')
plt.show()
# %% [markdown]
# Now lets make choropleth map of Bangladesh with population density
# %%
import plotly.express as px
import plotly.io as pio
#pio.renderers.default = 'vscode'
# %%
fig = px.choropleth(
df,
locations='id',
geojson=bd_districts,
color='Population (thousands)',
title='Bangladesh Population',
)
fig.update_geos(fitbounds="locations", visible=False)
fig.show()
# %% [markdown]
# As Dhaka has the most population, this part looks yellow. But others are not showing well as these locations have very few population against Dhaka. However, we can make log scale to solve the issue.
# %%
df['Population scale'] = np.log10(df['Population (thousands)'])
# %% [markdown]
# Now, dataframe has new column named "Population scale"
# %%
df.head()
# %% [markdown]
# Changing color to 'Population scale' and adding hover_name with hover_data the we can get a more informative graph.
# %%
fig = px.choropleth(
df,
locations='id',
geojson=bd_districts,
color='Population scale',
hover_name='Bengali',
hover_data=['Population (thousands)','Area (km2)'],
title='Bangladesh Population'
)
fig.update_geos(fitbounds="locations", visible=False)
fig.show()
# %% [markdown]
# Customizing choropleth graph with mapbox looks more better.
# %%
fig=px.choropleth_mapbox(df,
locations='id',
geojson=bd_districts,
color='Population scale',
hover_name='Bengali',
hover_data=['Population (thousands)','Area (km2)'],
title='Bangladesh Population',
mapbox_style='carto-positron',
center= { 'lat' : 23.6850, 'lon' : 90.3563},
zoom=4.8,
opacity=0.6)
fig.show()