#IMPORT MODULES ------

#Common modules
import pandas as pd #Dataframe support
import matplotlib.pyplot as plt   #Plotting support

#Statistics
from statsmodels.stats.proportion import proportions_ztest

#SQL
import sqlalchemy
from sqlalchemy import create_engine

#Mapping
import branca
import folium
from folium.plugins import HeatMap

#IPython - HTML notebook formatting
from IPython.core.display import HTML

#Suppress warnings in notebook
import warnings
warnings.filterwarnings('ignore')


%load_ext sql


#Add custom CSS to center output PNGs
HTML("""
<style>
.jp-needs-light-background {
    display: block;
    margin: auto;
}

.jp-OutputArea-output.jp-RenderedHTMLCommon table {
    margin: 2em auto;
    background: #eae9e9;
    border: 1px solid #000;
    font-size: 14px;
}

.toc {
    font-size: 16px;
}

.nest-one {
    margin-left: 1em;
    font-style: italic;
    font-size: 14px;
}

.nest-two {
    margin-left: 3em;
    font-style: italic;
    font-size: 12px;
}

.faint {
    opacity: 0.2;
}
</style>
""")


%sql sqlite:///../data/dallas-ois.sqlite


%%sql

SELECT
    name "Table Name" 
FROM
    sqlite_schema 
WHERE
    type='table' 
ORDER BY
    name;

 * sqlite:///../data/dallas-ois.sqlite
Done.


%%sql

CREATE VIEW annual_shootings AS 
SELECT
   SUBSTR(STRFTIME('%Y-%m-%d', ic.date), 1, 4) year,
   COUNT(*) shootings,
   ic.subject_statuses outcome,
   sb.race,
   sb.gender 
FROM
   incidents ic 
   INNER JOIN
      subjects sb 
      ON sb.case_number = ic.case_number 
GROUP BY
   year,
   ic.subject_statuses,
   sb.race,
   sb.gender;

 * sqlite:///../data/dallas-ois.sqlite
(sqlite3.OperationalError) view annual_shootings already exists
[SQL: CREATE VIEW annual_shootings AS 
SELECT
   SUBSTR(STRFTIME('%Y-%m-%d', ic.date), 1, 4) year,
   COUNT(*) shootings,
   ic.subject_statuses outcome,
   sb.race,
   sb.gender 
FROM
   incidents ic 
   INNER JOIN
      subjects sb 
      ON sb.case_number = ic.case_number 
GROUP BY
   year,
   ic.subject_statuses,
   sb.race,
   sb.gender;]
(Background on this error at: https://sqlalche.me/e/20/e3q8)


%%sql

WITH annual_stats AS 
(
   SELECT
      year,
      SUM(shootings) shootings 
   FROM
      annual_shootings 
   GROUP BY
      year 
)
SELECT
   SUM(shootings) "Total Shootings",
   ROUND(CAST(SUM(shootings) AS Float) / COUNT(DISTINCT(year)), 1) "Avg. Shootings Per Year",
   MAX(shootings) "Max Shootings Per Year",
   MIN(shootings) "Min Shootings Per Year" 
FROM
   annual_stats;

 * sqlite:///../data/dallas-ois.sqlite
Done.


%%sql

SELECT
    race "Race",
    SUM(shootings) "Total Shootings",
    ROUND(CAST(SUM(shootings) AS Float) / (    SELECT
        SUM(shootings)     
    FROM
        annual_shootings),
    2)*100 " % of Total"     
FROM
    annual_shootings     
GROUP BY
    race;

 * sqlite:///../data/dallas-ois.sqlite
Done.


proportions_ztest(count=111, nobs=223, value=0.24)

(7.698368402575708, 1.3781456281035084e-14)


%%sql

SELECT
    gender "Gender",
    SUM(shootings) "Total Shootings",
    ROUND(CAST(SUM(shootings) AS Float)/223,
    2)*100 "% of Total"   
FROM
    annual_shootings 
GROUP BY
    gender;

 * sqlite:///../data/dallas-ois.sqlite
Done.


%%sql

CREATE VIEW fatal_shootings AS 
    SELECT
       year,
       race,
       gender,
       SUM(shootings) shootings 
FROM
   annual_shootings 
WHERE
   outcome = "Deceased" 
GROUP BY
   year,
   race,
   gender;

 * sqlite:///../data/dallas-ois.sqlite
(sqlite3.OperationalError) view fatal_shootings already exists
[SQL: CREATE VIEW fatal_shootings AS 
    SELECT
       year,
       race,
       gender,
       SUM(shootings) shootings 
FROM
   annual_shootings 
WHERE
   outcome = "Deceased" 
GROUP BY
   year,
   race,
   gender;]
(Background on this error at: https://sqlalche.me/e/20/e3q8)


%%sql
SELECT
    SUM(shootings) "Total Fatal Shootings",
    ROUND(CAST(SUM(shootings) AS Float) / COUNT(DISTINCT(year)),
    1) "Avg. Fatal Shootings Per Year",
    MAX(shootings) "Max Fatal Shootings Per Year",
    MIN(shootings) "Min Fatal Shootings Per Year"   
FROM
    fatal_shootings;

 * sqlite:///../data/dallas-ois.sqlite
Done.


%%sql

SELECT
    race "Race",
    SUM(shootings) "Fatal Shootings",
    ROUND(CAST(SUM(shootings) AS Float)/(SELECT
        SUM(shootings) 
    FROM
        fatal_shootings),
    2)*100 "% of Total"   
FROM
    fatal_shootings 
GROUP BY
    race;

 * sqlite:///../data/dallas-ois.sqlite
Done.


%%sql

SELECT
    gender "Gender",
    SUM(shootings) "Fatal Shootings",
    ROUND(CAST(SUM(shootings) AS Float)/(SELECT
        SUM(shootings) 
    FROM
        fatal_shootings),
    2)*100 "% of Total"   
FROM
    fatal_shootings 
GROUP BY
    gender;

 * sqlite:///../data/dallas-ois.sqlite
Done.


#Create another connection to SQLite file 
sql_engine = create_engine('sqlite:///../data/dallas-ois.sqlite', echo=False)
conn = sql_engine.raw_connection()


#Save SQL query to pandas dataframe
ann_trends = pd.read_sql_query(
    """
    WITH ann_sums AS 
        (
           SELECT
              year,
              SUM(shootings) shootings 
           FROM
              annual_shootings 
           GROUP BY
              year 
        ),
    ann_fats AS 
        (
           SELECT
              year,
              SUM(shootings) shootings 
           FROM
              fatal_shootings 
           GROUP BY
              year 
        )
        
    SELECT
       ans.year,
       ans.shootings tot_shootings,
       afs.shootings fat_shootings
    FROM
       ann_fats afs 
    INNER JOIN
       ann_sums ans 
       ON ans.year = afs.year;
    """, con = conn)


#Generate a rolling average column for total shootings
ann_trends["tot_shootings_roll"] = ann_trends["tot_shootings"].rolling(window=3, center=True).mean()

#Generate a rolling average column for fatal shootings
ann_trends["fat_shootings_roll"] = ann_trends["fat_shootings"].rolling(window=3, center=True).mean()


#Generate a time series plot
plt.figure(figsize=(6,4), dpi=100)
plt.plot(ann_trends['year'], ann_trends['tot_shootings_roll'], label="All Shootings (Rolling Avg)", color="#0000ca")
plt.plot(ann_trends['year'], ann_trends['tot_shootings'], color="#0000ca", alpha=0.1)
plt.plot(ann_trends['year'], ann_trends['fat_shootings_roll'], label="Fatal Shootings (Rolling Avg)", color="#cd0000")
plt.plot(ann_trends['year'], ann_trends['fat_shootings'], color="#cd0000", alpha=0.1)
plt.suptitle("Annual Officer-Involved Shootings in Dallas, TX (2003-2016)", fontsize=12, y=0.97)
plt.xlabel("Year")
plt.xticks(["2003", "2005", "2007", "2009", "2011", "2013", "2015"])
plt.ylabel("Number of Shootings")
plt.yticks([0,5,10,15,20,25, 30])
plt.legend(loc="upper left")
plt.show()


%%sql
DROP VIEW annual_shootings;
DROP VIEW fatal_shootings;

 * sqlite:///../data/dallas-ois.sqlite
Done.
Done.

[]


#Save latitude and longitude values for each shooting into a dataframe
locs = pd.read_sql_query("SELECT date, subject_statuses, latitude, longitude FROM incidents", con = conn)


#Explore missing values
locs[locs["latitude"].isna()]


#Remove NaN values from dataframe
locs_filt = locs.dropna(axis=0)


#What types of "subject statuses" do we have?
statuses = locs_filt["subject_statuses"].unique()
statuses

array(['Deceased', 'Shoot and Miss', 'Injured', 'Other',
       '1 Deceased 1 Injured', '2 Injured', 'Deceased Injured'],
      dtype=object)


locs_filt["subject_statuses"].value_counts()

Shoot and Miss          81
Deceased                67
Injured                 59
Other                    2
1 Deceased 1 Injured     1
2 Injured                1
Deceased Injured         1
Name: subject_statuses, dtype: int64


#Recategorize outlier statuses
locs_filt["subject_statuses"][(locs_filt["subject_statuses"] == statuses[-3]) | (locs_filt["subject_statuses"] == statuses[-1])] = "Deceased"
locs_filt["subject_statuses"][locs_filt["subject_statuses"] == statuses[-2]] = "Injured"
locs_filt["subject_statuses"].value_counts()

Shoot and Miss    81
Deceased          69
Injured           60
Other              2
Name: subject_statuses, dtype: int64


#Assign colors in new column
locs_filt["colors"] = ''
locs_filt["colors"][locs_filt["subject_statuses"] == "Other"] = "#cfcfcf"
locs_filt["colors"][locs_filt["subject_statuses"] == "Shoot and Miss"] = "green"
locs_filt["colors"][locs_filt["subject_statuses"] == "Deceased"] = "red"
locs_filt["colors"][locs_filt["subject_statuses"] == "Injured"] = "gold"


#Create an opacity column based on year
locs_filt["year"] = pd.to_numeric(locs_filt["date"].str.split('-').str[0])
locs_filt["opacity"]=(locs_filt["year"]-(locs_filt["year"].min()-2.5))/((locs_filt["year"].max()+2.5)-(locs_filt["year"].min()-2.5))


#Define a basemap
m1 = folium.Map(location=[32.766328, -96.787865],
               tiles = 'CartoDB positron',
               zoom_start = 11,
               max_zoom = 13,
               min_zoom = 11)

#Define initial bounding box to show all data when user loads map
m1.fit_bounds(bounds=[[32.601042, -96.973948],[33.051101, -96.583247]])

#Add markers to map with positions, tooltips, and colors based on shooting data
def add_markers(x):
    folium.vector_layers.CircleMarker(location=[x["latitude"], x["longitude"]],
                                      radius=6,
                                      tooltip=x["subject_statuses"] + ' (' + str(x['year']) + ')',
                                      fill=True,
                                      fill_color=x["colors"],
                                      fill_opacity=x["opacity"],
                                      color=False).add_to(m1)
    
locs_filt.apply(add_markers, axis=1)

#Add a custom legend to map w/ HTML
legend_html = '''
{% macro html(this, kwargs) %}
<div style="position: absolute; left: 30px; bottom: 30px; background: rgba(255,255,255,0.7); z-index:9999; padding: 16px; font-size: 12px;">
    <p style="position: relative; margin: auto"><span style="background:red; display: inline-block; height: 12px; width: 12px; border-radius: 50%; vertical-align:middle; margin-right: 8px;"></span><span style="display:inline-block; vertical-align:middle">Deceased</span></p>
    <p style="position: relative; margin: auto"><span style="background:gold; display: inline-block; height: 12px; width: 12px; border-radius: 50%; vertical-align:middle; margin-right: 8px;"></span><span style="display:inline-block; vertical-align:middle">Injury</span></p>
    <p style="position: relative; margin: auto"><span style="background:green; display: inline-block; height: 12px; width: 12px; border-radius: 50%; vertical-align:middle; margin-right: 8px;"></span><span style="display:inline-block; vertical-align:middle">Shoot and Miss</span></p>
</div>
{% endmacro %}
'''
legend = branca.element.MacroElement()
legend._template = branca.element.Template(legend_html)
m1.get_root().add_child(legend)

#Display map
m1


#Define a map centered over Dallas, TX
m2 = folium.Map(location=[32.766328, -96.787865],
               tiles = 'CartoDB positron',
               zoom_start = 11,
               max_zoom = 12,
               min_zoom = 11)

#Define initial bounding box to show all data when user loads map
m2.fit_bounds(bounds=[[32.601042, -96.973948],[33.051101, -96.583247]])

#Generate a heat map from point data and add to map
HeatMap(locs_filt.iloc[:,-5:-3]).add_to(m2)

#Display map
m2

Race	Total Shootings	% of Total
A	2	1.0
B	111	50.0
L	72	32.0
W	38	17.0

Officer-Involved Shootings - Dallas, TX¶

Summary¶

Import Modules and Data¶

Modules¶

Data¶

Database Structure¶

Exploring Officer-Involved Shootings (2013-2016)¶

All Shootings¶

By Subject Race¶

By Subject Gender¶

Fatal Shootings¶

By Subject Race¶

By Subject Gender¶

Annual Trends¶

Locations¶

Point Map¶

Heatmap¶

Insights¶

Trends¶

Race¶

Gender¶

Location¶

	date	subject_statuses	latitude	longitude
0	2013-02-23	Injured	NaN	NaN
1	2010-05-03	Injured	NaN	NaN
2	2007-08-12	Other	NaN	NaN
3	2007-05-26	Shoot and Miss	NaN	NaN
4	2006-04-03	Injured	NaN	NaN
5	2005-05-09	Shoot and Miss	NaN	NaN
6	2003-07-24	Deceased	NaN	NaN

Table Name
incidents
officers
subjects

Gender	Total Shootings	% of Total
F	8	4.0
M	215	96.0

Gender	Fatal Shootings	% of Total
F	2	3.0
M	67	97.0