import requests
overpass_url = "http://overpass-api.de/api/interpreter"
overpass_query = """
[out:json];
area["ISO3166-1"="DE"][admin_level=2]->.search;
node[amenity="restaurant"](area.search);
out count;"""
response = requests.get(overpass_url, params={'data': overpass_query})
response.json()
{'version': 0.6, 'generator': 'Overpass API 0.7.55.5 2ca3f387', 'osm3s': {'timestamp_osm_base': '2019-03-11T19:14:02Z', 'timestamp_areas_base': '2019-03-11T18:08:02Z', 'copyright': 'The data included in this document is from www.openstreetmap.org. The data is made available under ODbL.'}, 'elements': [{'type': 'count', 'id': 0, 'tags': {'nodes': '74361', 'ways': '0', 'relations': '0', 'areas': '0', 'total': '74361'}}]}
import osmium
class RestaurantHandler(osmium.SimpleHandler):
def __init__(self):
osmium.SimpleHandler.__init__(self)
self.num_restaurants = 0
def node(self, n):
if n.tags.get('amenity') == 'restaurant':
self.num_restaurants += 1
handler = RestaurantHandler()
handler.apply_file('data/liechtenstein-latest.osm.pbf')
print('Number of Restaurants: ', handler.num_restaurants)
Number of Restaurants: 45
conda install -c conda-forge geopandas
import psycopg2
import geopandas as gpd
with psycopg2.connect(database="osm_data_science", user="postgres",
password='password', host='localhost') as connection:
gdf = gpd.GeoDataFrame.from_postgis(
"""SELECT osm_id, amenity, state, geom FROM osm_amenities_areas""",
connection, geom_col='geom')
gdf.head()
osm_id | amenity | state | geom | |
---|---|---|---|---|
0 | 476 | university | Steiermark | POINT (15.4498326391065 47.0777236761512) |
1 | 11101 | townhall | Wien | POINT (16.3573412415296 48.2108412144281) |
2 | 11936 | school | Wien | POINT (16.4103468335947 48.2842460993475) |
3 | 14604 | school | Niederösterreich | POINT (16.778381402358 48.0259891360606) |
4 | 27945 | school | Wien | POINT (16.2909158154218 48.1690868739775) |
gdf.to_crs({'init': 'epsg:31287'}).plot(
figsize=(20, 10), alpha=0.4, markersize=0.5, color='k');
import psycopg2
import geopandas as gpd
with psycopg2.connect(database="osm_data_science", user="postgres",
password='postgres', host='localhost') as connection:
gdf = gpd.GeoDataFrame.from_postgis(
"SELECT * FROM city_amenity_counts",
connection, geom_col='geom')
gdf.loc[:, 'parking':'veterinary'].head()
parking | place_of_worship | school | bench | restaurant | fuel | cafe | fast_food | bank | waste_basket | ... | waste_disposal | marketplace | bus_station | university | college | parking_entrance | swimming_pool | theatre | taxi | veterinary | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0.001411 | 0.000246 | 0.000305 | 0.001454 | 0.001369 | 0.000116 | 0.000587 | 0.000420 | 0.000180 | 0.000943 | ... | 0.000027 | 0.000023 | 0.000004 | 0.000054 | 0.000005 | 0.000097 | 0.000023 | 0.000039 | 0.000086 | 0.000043 |
1 | 0.005232 | 0.000507 | 0.000468 | 0.005668 | 0.001579 | 0.000232 | 0.001196 | 0.000636 | 0.000329 | 0.003164 | ... | 0.000014 | 0.000039 | 0.000018 | 0.000314 | 0.000007 | 0.000468 | 0.000079 | 0.000089 | 0.000143 | 0.000079 |
2 | 0.010564 | 0.000431 | 0.000573 | 0.002580 | 0.001526 | 0.000238 | 0.000532 | 0.000517 | 0.000421 | 0.000710 | ... | 0.000015 | 0.000010 | 0.000005 | 0.000056 | 0.000005 | 0.001485 | 0.000010 | 0.000041 | 0.000066 | 0.000041 |
3 | 0.004517 | 0.000559 | 0.000441 | 0.004504 | 0.001775 | 0.000230 | 0.000750 | 0.000434 | 0.000506 | 0.000947 | ... | 0.000000 | 0.000013 | 0.000020 | 0.000230 | 0.000026 | 0.000460 | 0.000000 | 0.000132 | 0.000171 | 0.000046 |
4 | 0.002205 | 0.000756 | 0.000654 | 0.002331 | 0.001921 | 0.000260 | 0.001252 | 0.000614 | 0.000378 | 0.000472 | ... | 0.000031 | 0.000024 | 0.000016 | 0.000394 | 0.000055 | 0.000189 | 0.000000 | 0.000087 | 0.000094 | 0.000047 |
5 rows × 50 columns
# Get only Germany and France
gdf_DE_FR = gdf[(gdf['country_code'] == 'DE') | \
(gdf['country_code'] == 'FR')].copy()
X = gdf_DE_FR.loc[:, 'parking':'veterinary'].values
y = gdf_DE_FR['country_code'].map({'DE': 0, 'FR': 1}).values
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.3, random_state=1000)
from sklearn.linear_model import LogisticRegressionCV
clf = LogisticRegressionCV(cv=5)
clf.fit(X_train, y_train)
LogisticRegressionCV(Cs=10, class_weight=None, cv=5, dual=False, fit_intercept=True, intercept_scaling=1.0, max_iter=100, multi_class='ovr', n_jobs=1, penalty='l2', random_state=None, refit=True, scoring=None, solver='lbfgs', tol=0.0001, verbose=0)
print('Training score : {:.4f}'.format(clf.score(X_train, y_train)))
print('Testing score : {:.4f}'.format(clf.score(X_test, y_test)))
Training score : 0.8936 Testing score : 0.8293
gdf['frenchness'] = gdf.loc[:, 'parking':'veterinary'].apply(
lambda row: clf.predict_proba([row.values])[:, 1][0], axis=1)
import folium
import matplotlib
cmap = matplotlib.cm.get_cmap('cool', 5)
m = folium.Map(location=[49.5734053, 7.588576], zoom_start=5)
for i, row in gdf.iterrows():
rgb = cmap(row['frenchness'])[:3]
hex_color = matplotlib.colors.rgb2hex(rgb)
folium.CircleMarker([row['geom'].y, row['geom'].x],
radius=5, popup=row['city'],
fill_color=hex_color, fill_opacity=1,
opacity=0.0).add_to(m)
m.save('maps/folium_map.html')
from IPython.display import IFrame
IFrame('maps/folium_map.html', width=1024, height=600)