Ookla open data data preparation and mapping exercise¶

The purpose of the below notebook is to open and aggregate mobile download speeds available across districts in Budapest, Hungary. Data is sourced from Ookla's github page and Open Street Maps.

For the final dashboard that visualises the end product of the analysis, please visit gabordata.pythonanywhere.com.

In [1]:
import numpy as np
import pandas as pd
import geopandas as gpd
import seaborn as sns
from matplotlib.pyplot import figure
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import contextily as ctx
In [2]:
minx=18.9264959
miny=47.33765
maxx=19.35269
maxy=47.62049
In [3]:
data = gpd.read_file('./data/ookla/gps_mobile_tiles.shp', bbox=(minx,miny,maxx,maxy))
In [4]:
data.shape
Out[4]:
(2708, 7)
In [5]:
data.head()
Out[5]:
quadkey avg_d_kbps avg_u_kbps avg_lat_ms tests devices geometry
0 1202310303310301 9921 7938 14 1 1 POLYGON ((18.92395 47.62098, 18.92944 47.62098...
1 1202310303310312 134509 19991 23 3 2 POLYGON ((18.92944 47.61727, 18.93494 47.61727...
2 1202310303310330 214577 49852 22 2 2 POLYGON ((18.92944 47.61357, 18.93494 47.61357...
3 1202310303312110 8855 9563 23 5 2 POLYGON ((18.92944 47.60616, 18.93494 47.60616...
4 1202310303312121 9693 10353 22 2 2 POLYGON ((18.92395 47.59876, 18.92944 47.59876...
In [6]:
data.plot(column='avg_d_kbps', cmap='Reds');
No description has been provided for this image
In [7]:
data.crs
Out[7]:
<Geographic 2D CRS: EPSG:4326>
Name: WGS 84
Axis Info [ellipsoidal]:
- Lat[north]: Geodetic latitude (degree)
- Lon[east]: Geodetic longitude (degree)
Area of Use:
- name: World.
- bounds: (-180.0, -90.0, 180.0, 90.0)
Datum: World Geodetic System 1984 ensemble
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich
In [8]:
data = data.to_crs(epsg=3857)
In [9]:
# Plot the map
ax = data.boundary.plot(figsize=(20, 20), alpha=1.0, edgecolor='blue')
ctx.add_basemap(ax, source=ctx.providers.CartoDB.Positron)
ax.set_axis_off()
No description has been provided for this image
In [10]:
dist = gpd.read_file('./data/Budapest/kozighatarok/kozighatarok/admin9.shp')
In [11]:
dist.plot()
Out[11]:
<Axes: >
No description has been provided for this image
In [12]:
dist.head()
Out[12]:
NAME ADMIN_LEVE geometry
0 XII. kerület 9 POLYGON ((2106932.500 6027738.090, 2106933.580...
1 II. kerület 9 POLYGON ((2106733.130 6031349.000, 2107187.900...
2 XXII. kerület 9 POLYGON ((2109653.470 6006386.150, 2109664.680...
3 XI. kerület 9 POLYGON ((2111719.240 6016018.040, 2112558.130...
4 XXI. kerület 9 POLYGON ((2117179.530 6005981.940, 2118192.650...
In [13]:
dist.NAME.value_counts()
Out[13]:
NAME
XII. kerület      1
II. kerület       1
XVI. kerület      1
X. kerület        1
Alag              1
XV. kerület       1
IV. kerület       1
III. kerület      1
XIV. kerület      1
VII. kerület      1
VIII. kerület     1
IX. kerület       1
XIII. kerület     1
VI. kerület       1
V. kerület        1
I. kerület        1
XVIII. kerület    1
XIX. kerület      1
XX. kerület       1
XXIII. kerület    1
XXI. kerület      1
XI. kerület       1
XXII. kerület     1
XVII. kerület     1
Name: count, dtype: int64
In [14]:
dist = dist[dist.NAME != 'Alag']
In [15]:
dist = dist.to_crs(epsg=3857)
# Plot the map
ax = dist.plot(figsize=(10, 10), alpha=1.0, edgecolor='blue')
ctx.add_basemap(ax, source=ctx.providers.CartoDB.Positron)
ax.set_axis_off()
No description has been provided for this image
In [16]:
# Create figure
fig, ax = plt.subplots(1, figsize=(10, 10))
#Set aspect to equal
ax.set_aspect('equal')

# PLOTTING: Specify layers to plot how to format each layer (colours, transparency, etc.):
# Layer 1:
dist.boundary.plot(ax=ax, color='blue', edgecolor='k', alpha=0.5, zorder=1)
# Layer 2:
data.plot(ax=ax, zorder=2, column='avg_d_kbps', cmap='Reds')
# ADD LAYERS here as needed:
#args[2].plot(ax=ax, color='red', alpha=0.3, zorder=3)

# Contextily basemap:
ctx.add_basemap(ax, source=ctx.providers.CartoDB.Positron)

# Turn off axis
ax.axis('off')
# Save as file
fig.savefig('layered_map.png', dpi=300) #Comment out or delete if you don't want a PNG image saved
layered_map = plt.show()
No description has been provided for this image
In [17]:
x = dist.sjoin(data)
In [18]:
x.head()
Out[18]:
NAME ADMIN_LEVE geometry index_right quadkey avg_d_kbps avg_u_kbps avg_lat_ms tests devices
0 XII. kerület 9 POLYGON ((2106932.500 6027738.090, 2106933.580... 1265 1202310330002021 58752 51849 20 2 2
0 XII. kerület 9 POLYGON ((2106932.500 6027738.090, 2106933.580... 1263 1202310330002013 38566 14285 39 3 2
0 XII. kerület 9 POLYGON ((2106932.500 6027738.090, 2106933.580... 1278 1202310330002113 200111 35990 17 9 8
0 XII. kerület 9 POLYGON ((2106932.500 6027738.090, 2106933.580... 1277 1202310330002112 82260 24369 18 3 2
0 XII. kerület 9 POLYGON ((2106932.500 6027738.090, 2106933.580... 1274 1202310330002103 87295 27168 13 1 1
In [19]:
df_x = x[['NAME','geometry','avg_d_kbps']].groupby(['NAME','geometry']).mean().round(2).reset_index()
In [20]:
df_x.head()
Out[20]:
NAME geometry avg_d_kbps
0 I. kerület POLYGON ((2117715.190 6023350.030, 2117715.480... 159369.26
1 II. kerület POLYGON ((2106733.130 6031349.000, 2107187.900... 96410.06
2 III. kerület POLYGON ((2112491.940 6036412.580, 2112566.970... 80094.32
3 IV. kerület POLYGON ((2122550.720 6034481.710, 2122573.980... 119782.45
4 IX. kerület POLYGON ((2121171.190 6021730.280, 2121511.040... 102067.03
In [21]:
df_x.shape
Out[21]:
(23, 3)
In [22]:
df_x = gpd.GeoDataFrame(
    df_x, geometry='geometry'
)
In [23]:
ax = df_x.plot(figsize=(10, 10),column='avg_d_kbps', cmap='Reds')
ctx.add_basemap(ax, source=ctx.providers.CartoDB.Positron)
ax.set_axis_off()
No description has been provided for this image
In [24]:
df_x.to_crs(epsg=4326).to_file('./processed_data.json', driver="GeoJSON")  
In [ ]: