#Package Loading 
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import descartes 
import geopandas as gpd 
from shapely.geometry import Point, Polygon
# import os

%matplotlib inline


#Data Loaing from personal dropbox 
#http://ubc.syzygy.ca is shit. Can't read in data, have to install the packages EVERY TIME I login. Finally move to my PC
mon_total = pd.read_csv("https://www.dropbox.com/s/tg0op6xld39hun8/Mon_crime_Total.csv?dl=1")
mon_case = pd.read_csv("https://www.dropbox.com/s/o500n6mo1a4xbeg/Mon_Kaggle.csv?dl=1")
van_total = pd.read_csv("https://www.dropbox.com/s/yzu999n44900xbm/Van_crime_Total.csv?dl=1")
van_case = pd.read_csv("https://www.dropbox.com/s/n51fww2op22jovk/Van_Kaggle.csv?dl=1")


mon_total.head()
van_total.head()


mon_total = mon_total[["REF_DATE","Violations and calls for service", "VALUE"]]
van_total = van_total[["REF_DATE","Violations and calls for service", "VALUE"]]
#Tow dataset have the same variables we remain those that are useful for us


mon_case.head()


mon_case = mon_case.drop(mon_case.columns[[0,3,7]], axis = 1)
mon_case = mon_case.rename(columns={"date": "DATE"})
mon_case.head()


van_case.head()


#Join the YEAR MONTH and DAY to be the same format as mon_case
ymd = van_case[["YEAR","MONTH","DAY"]]
van_case["DATE"] = pd.to_datetime(ymd)
van_case = van_case.drop(van_case.columns[[1,2,3]],axis = 1)
van_total = van_total.rename(columns={"REF_DATE": "DATE"})
van_case.head()


mon_total = mon_total.rename(columns={"REF_DATE": "DATE"})


mon_total = mon_total.dropna(axis='rows')
fig, ax = plt.subplots(figsize = (20,10))
mon_total.set_index('DATE', inplace=True)
pmont = mon_total.groupby('Violations and calls for service')['VALUE'].plot(legend = "TRUE", x = "DATE",y = "VALUE", marker = ".",linewidth=3.0)

#Move the ledgned out side the plot 
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5),borderpad = 2,labelspacing = 2,fontsize = "large",
         shadow = True) 

#Add reference Line and events

plt.axvline(x=9.5)#Covid
plt.text(9.6,1400,'Covid-19',  fontsize=10,weight='bold')
plt.text(9.6,1365,'2019-12',  fontsize=10,weight='bold')
ax.axvspan(9.5, 12.2, alpha=0.1, color='blue')

plt.axvline(x=12.2) #EI 
ax.axvspan(12.2, 18, alpha=0.1, color='red')
plt.text(12.3,1400,'EI Started',  fontsize=10,weight='bold')
plt.text(12.3,1365,'2020-Mar',  fontsize=10,weight='bold')

plt.axvline(x=18) #CBR
ax.axvspan(18, 30, alpha=0.1, color='yellow')
plt.text(18.1,1400,'CBR Started',  fontsize=10,weight='bold')
plt.text(18.1,1365,'2020-Sep',  fontsize=10,weight='bold')


plt.title("Motreal Crime Cases Reported 2019-03 to 2021-09", fontsize = 20,weight='bold')
ax.spines["right"].set_visible(False)
ax.spines["top"].set_visible(False)
ax.spines["left"].set_visible(False)


mon_total=mon_total.reset_index()


fig1, ax1 = plt.subplots(figsize = (17,13))
van_total.set_index('DATE', inplace=True)
pmont = van_total.groupby('Violations and calls for service')['VALUE'].plot(legend = "TRUE", x = "DATE",y = "VALUE", marker = ".",linewidth=3.0)

#Move the ledgned out side the plot 
ax1.legend(loc='center left', bbox_to_anchor=(1, 0.5),borderpad = 2,labelspacing = 2,fontsize = "large",
         shadow = True) 
plt.axvline(x=9)
plt.text(9.2,1200,'Covid-19 Starts',  fontsize=12,weight='bold')
plt.text(9.2,1245,'2019-12',  fontsize=12,weight='bold')
ax1.axvspan(9, 12.2, alpha=0.1, color='blue')
plt.title("Vancouver Crime Cases Reported 2019-03 to 2021-09", fontsize = 20,weight='bold')
ax1.spines["right"].set_visible(False)
ax1.spines["top"].set_visible(False)
ax1.spines["left"].set_visible(False)


plt.axvline(x=12.2) #EI 
ax1.axvspan(12.2, 18, alpha=0.1, color='red')
plt.text(12.3,1400,'EI Started',  fontsize=10,weight='bold')
plt.text(12.3,1365,'2020-Mar',  fontsize=10,weight='bold')

plt.axvline(x=18) #CBR
ax1.axvspan(18, 30, alpha=0.1, color='yellow')
plt.text(18.1,1400,'CBR Started',  fontsize=10,weight='bold')
plt.text(18.1,1365,'2020-Sep',  fontsize=10,weight='bold')

van_total = van_total.reset_index()


van_total_realcrime = van_total[(van_total["Violations and calls for service"] != "Calls for service, domestic disturbances / disputes") &
                               (van_total["Violations and calls for service"] != "Calls for service, Mental Health Act (MHA) apprehension") & 
                                (van_total["Violations and calls for service"] != "Calls for service, mental health - other")& 
                               (van_total["Violations and calls for service"] != "Calls for service, suicide/attempted suicide") & 
                               (van_total["Violations and calls for service"] != "Calls for service, overdose") &
                               (van_total["Violations and calls for service"] != "Calls for service, child welfare check")&
                               (van_total["Violations and calls for service"] != "Calls for service, check welfare - general") &  
                               (van_total["Violations and calls for service"] != "Calls for service, child custody matter - domestic")]
    

fig1, ax1 = plt.subplots(figsize = (17,10))
van_total_realcrime.set_index('DATE', inplace=True)
pvan_total_realcrime = van_total_realcrime.groupby('Violations and calls for service')['VALUE'].plot(legend = "TRUE", x = "DATE",y = "VALUE", marker = ".",linewidth=3.0)

#Move the ledgned out side the plot 
ax1.legend(loc='center left', bbox_to_anchor=(1, 0.5),borderpad = 2,labelspacing = 2,fontsize = "large",
         shadow = True) 


plt.axvline(x=9) #covid
plt.text(9.2,560,'Covid-19',  fontsize=12,weight='bold')
plt.text(9.2,570,'2019-12',  fontsize=12,weight='bold')
ax1.axvspan(9, 12.2, alpha=0.05, color='blue')

plt.axvline(x=12.2) #EI 
ax1.axvspan(12.2, 18, alpha=0.1, color='red')
plt.text(12.3,560,'EI Started',  fontsize=10,weight='bold')
plt.text(12.3,570,'2020-Mar',  fontsize=10,weight='bold')

plt.axvline(x=18) #CBR
ax1.axvspan(18, 30, alpha=0.1, color='yellow')
plt.text(18.1,560,'CBR Started',  fontsize=10,weight='bold')
plt.text(18.1,570,'2020-Sep',  fontsize=10,weight='bold')


plt.title("Motreal Crime Cases Reported 2019-03 to 2021-09", fontsize = 20,weight='bold')
ax1.spines["right"].set_visible(False)
ax1.spines["top"].set_visible(False)
ax1.spines["left"].set_visible(False)


van_total_realcrime = van_total_realcrime.reset_index()


#!pip install osmnx       !!!!!Notice, if you want to run the following code, run this line in terminal to install the package first!!!!!
import osmnx as ox        #    on your local pc. UBC sygz doese not work. Kernal always die when the code takes a longer time to run
import folium
import random 
import folium.plugins
import bokeh
from folium.plugins import MarkerCluster


#Metro Montreal Cities transfor to be "<city>, Canada" to geocoding
list_mon_city = list(mon_case["city"].unique())
for i in range(len(list_mon_city)):
    list_mon_city[i] = list_mon_city[i] + ", Canada" 
    
#Check cities that can not serve as boundaries(edges)
un_coded_city = []
for i in range(len(list_mon_city)):
    try:
        ox.geocode_to_gdf(list_mon_city[i])
    except:
        un_coded_city.append(list_mon_city[i])
        continue 
un_coded_city        

#Drop those uncoded 
for i in range(len(un_coded_city)):
    list_mon_city.remove(un_coded_city[i])

#Graph the fundamental map with valid cities
image = ox.graph_from_place(list_mon_city,simplify=True)
fig, ax = ox.plot_graph(image,bgcolor='#123555',node_color='b',edge_color="#E1EED2",figsize=(18,24),node_size = 0, edge_linewidth=0.5,show=False, close=True)


#Assign different type of crime different color
dim_high = mon_case.shape[0]
mon_case_map = mon_case
mon_case_map_color = ['place-holder' for i in range(dim_high)]
type_crime = mon_case["category"].unique()
for i in range(dim_high):
    if mon_case_map.iloc[i,0] == type_crime[0]:
        mon_case_map_color[i] = "#D1494E"
    if mon_case_map.iloc[i,0] == type_crime[1]:
        mon_case_map_color[i] = "#E69B03"
    if  mon_case_map.iloc[i,0] == type_crime[2]: 
        mon_case_map_color[i] = "#bb84c9"
    if mon_case_map.iloc[i,0] == type_crime[3]: 
        mon_case_map_color[i] = "#FFDE0A"
    if mon_case_map.iloc[i,0] == type_crime[4]: 
        mon_case_map_color[i] = "#F40D64"
    if mon_case_map.iloc[i,0] == type_crime[5]: 
        mon_case_map_color[i] = "#23EBB9"
mon_case_map["color"] = mon_case_map_color


#plot all those crimes on to the graph with its coordinates
position_mon_cases = mon_case_map[["longitude", "latitude", "color"]]
for i in range(dim_high): 
    ax.scatter(position_mon_cases.iloc[i,0], position_mon_cases.iloc[i,1], c=position_mon_cases.iloc[i,2], alpha =0.2, s = 35)
fig


#Seperate the set into two set for pre covid and after covid 
mon_case['color'] = mon_case_map["color"]
before_covid = mon_case[mon_case["year"] < 2019] 
post_covid = mon_case[mon_case["year"] > 2019]
print("Number of Items:", before_covid.shape[0], post_covid.shape[0])

Number of Items: 89659 28333


#Random pre covid
index = random.sample(range(89659), post_covid.shape[0])
before_covid = before_covid.iloc[index]

#Reform the position values in to [['long', 'lat']]
#Reform a dictionary for pop-up 
#    pd.DataFrame(
#     {'date' : date,
#      'category' : category,
#      'neighbourhood' : neighbourhood
#     })

#Additional Information Dic
before_dic_pop = before_covid[["DATE", "category", "neighbourhood"]]
before_dic_pop = before_dic_pop.to_dict(orient='records')

post_dic_pop = post_covid[["DATE", "category", "neighbourhood"]]
post_dic_pop = post_covid.to_dict(orient='records')

#Position list
post_covid_position_list = post_covid[["latitude","longitude"]].apply(tuple, axis=1).tolist()
before_covid_position_list = before_covid[["latitude","longitude"]].apply(tuple, axis=1).tolist()


#Before covid
n = 3000
before_map = folium.Map(location=[45.567,-73.626], tiles="Stamen Toner", zoom_start=12, width='100%', height='100%')
before_covid_position_list = [list(x) for x in before_covid_position_list]
for i in range(n):
    folium.Circle(
            radius=10,
            location = before_covid_position_list[i],
            popup='date:'+before_dic_pop[i]['DATE'] + 'category' + before_dic_pop[i]['category'],
            color=before_covid.iloc[i,7],
            fill=True,
        ).add_to(before_map)
before_map


#After covid
post_covid_position_list = [list(x) for x in post_covid_position_list]

#more than 20000 data shown simuteneously on the same map down the pc. Let's draw random sample form it.
index_post = random.sample(range(28333), n)
subset_post = post_covid.iloc[index_post]
sublist_post = [post_covid_position_list[i] for i in index_post]
subdiction_post = [post_dic_pop[i] for i in index_post]


post_map = folium.Map(location=[45.567,-73.626], tiles="Stamen Toner", zoom_start=12, width='100%', height='100%')
for i in range(n):
    folium.Circle(
            radius=10,
            location = sublist_post[i],
            popup='date:'+subdiction_post[i]['DATE'] + " " + 'category' + subdiction_post[i]['category'],
            color=subset_post.iloc[i,7],
            fill=True,
        ).add_to(post_map)
post_map


from matplotlib.patches import Patch
def grouped_dataset_factory(data, group_criterior):
    return data.groupby(group_criterior)
col_map = {
    "Motor vehicle theft": "#D1494E",
    'Home Invasion':"#E69B03",
    'Mischief': "#bb84c9",
    'Theft in / from a motor vehicle':"#FFDE0A",
    'Confirmed Theft': "#F40D64",
    'Offenses resulting in death':"#23EBB9"
}

a = ["neighbourhood"]
post_covid_grouped = grouped_dataset_factory(post_covid, a)
post_covid_grouped["category"].value_counts().unstack().plot(kind='bar',figsize = (20,20), stacked = True,color=col_map)
plt.legend([
        Patch(facecolor=col_map['Motor vehicle theft']),
        Patch(facecolor=col_map['Home Invasion']),
        Patch(facecolor=col_map['Mischief']),
        Patch(facecolor=col_map['Theft in / from a motor vehicle']),
        Patch(facecolor=col_map['Confirmed Theft']),
        Patch(facecolor=col_map['Offenses resulting in death']),                
    ], type_crime, borderpad = 2,labelspacing = 2,fontsize = "large")

<matplotlib.legend.Legend at 0x7f847a5f3880>

	REF_DATE	GEO	DGUID	Violations and calls for service	Statistics	UOM	UOM_ID	SCALAR_FACTOR	VECTOR	COORDINATE	VALUE	STATUS	SYMBOL	TERMINATED
0	2019-03	Vancouver, British Columbia, municipal [59023]	59023	Total assaults (levels 1, 2, 3) [141]	Actual incidents	Number	223	units	v1210497781	15.1.1	395.0	NaN	NaN	NaN
1	2019-04	Vancouver, British Columbia, municipal [59023]	59023	Total assaults (levels 1, 2, 3) [141]	Actual incidents	Number	223	units	v1210497781	15.1.1	357.0	NaN	NaN	NaN
2	2019-05	Vancouver, British Columbia, municipal [59023]	59023	Total assaults (levels 1, 2, 3) [141]	Actual incidents	Number	223	units	v1210497781	15.1.1	423.0	NaN	NaN	NaN
3	2019-06	Vancouver, British Columbia, municipal [59023]	59023	Total assaults (levels 1, 2, 3) [141]	Actual incidents	Number	223	units	v1210497781	15.1.1	419.0	NaN	NaN	NaN
4	2019-07	Vancouver, British Columbia, municipal [59023]	59023	Total assaults (levels 1, 2, 3) [141]	Actual incidents	Number	223	units	v1210497781	15.1.1	449.0	NaN	NaN	NaN

	Unnamed: 0	category	date	postal_code	city	neighbourhood	year	count	longitude	latitude
0	0	Motor vehicle theft	2018-09-13	H1Z 1S9	MONTREAL	Saint-Michel	2018	1	-73.626	45.567
1	1	Motor vehicle theft	2018-04-30	H1Z 1S9	MONTREAL	Saint-Michel	2018	1	-73.626	45.567
2	2	Home Invasion	2018-01-10	H1Z 2V6	MONTREAL	Saint-Michel	2018	1	-73.629	45.569
3	3	Mischief	2018-11-12	H1Z 2V6	MONTREAL	Saint-Michel	2018	1	-73.629	45.569
4	4	Mischief	2018-08-15	H1Z 2V6	MONTREAL	Saint-Michel	2018	1	-73.629	45.569

	category	DATE	city	neighbourhood	year	longitude	latitude
0	Motor vehicle theft	2018-09-13	MONTREAL	Saint-Michel	2018	-73.626	45.567
1	Motor vehicle theft	2018-04-30	MONTREAL	Saint-Michel	2018	-73.626	45.567
2	Home Invasion	2018-01-10	MONTREAL	Saint-Michel	2018	-73.629	45.569
3	Mischief	2018-11-12	MONTREAL	Saint-Michel	2018	-73.629	45.569
4	Mischief	2018-08-15	MONTREAL	Saint-Michel	2018	-73.629	45.569

	TYPE	YEAR	MONTH	DAY	HOUR	MINUTE	HUNDRED_BLOCK	NEIGHBOURHOOD	X	Y
0	Theft from Vehicle	2006	3	4	20	30	DAVIE ST / HOWE ST	Central Business District	490748.5904	5.458346e+06
1	Theft from Vehicle	2006	3	5	11	30	DAVIE ST / HOWE ST	Central Business District	490748.5904	5.458346e+06
2	Theft from Vehicle	2006	4	16	0	1	DAVIE ST / HOWE ST	Central Business District	490748.5904	5.458346e+06
3	Theft from Vehicle	2006	6	11	17	45	DAVIE ST / HOWE ST	Central Business District	490748.5904	5.458346e+06
4	Theft from Vehicle	2006	8	5	20	0	DAVIE ST / HOWE ST	Central Business District	490748.5904	5.458346e+06

	TYPE	HOUR	MINUTE	HUNDRED_BLOCK	NEIGHBOURHOOD	X	Y	DATE
0	Theft from Vehicle	20	30	DAVIE ST / HOWE ST	Central Business District	490748.5904	5.458346e+06	2006-03-04
1	Theft from Vehicle	11	30	DAVIE ST / HOWE ST	Central Business District	490748.5904	5.458346e+06	2006-03-05
2	Theft from Vehicle	0	1	DAVIE ST / HOWE ST	Central Business District	490748.5904	5.458346e+06	2006-04-16
3	Theft from Vehicle	17	45	DAVIE ST / HOWE ST	Central Business District	490748.5904	5.458346e+06	2006-06-11
4	Theft from Vehicle	20	0	DAVIE ST / HOWE ST	Central Business District	490748.5904	5.458346e+06	2006-08-05

ECON 323 FINAL TERM PROJECT¶

An Investigation in to Criminal Situation and in Vancouver and Montreal¶

Dec/21/2021 Shihao Tong¶

Introduction¶

1. Data Investigation¶

Data Cleaning and restructured¶

Distribution Illustration¶

2. Map Exploration for Montreal Crime Occurrence¶

NOTICE: Do not try to run the code in the next bolck. It will take you about 30mins to get the plot¶

Conclusion & Summary¶

Reference¶