In [31]:
from IPython.display import HTML

HTML('''<script>
code_show=true; 
function code_toggle() {
 if (code_show){
 $('div.input').hide();
 } else {
 $('div.input').show();
 }
 code_show = !code_show
} 
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()"><input type="submit" value="Click here to toggle on/off the raw code."></form>''')
Out[31]:
In [33]:
import pandas as pd
import matplotlib
from IPython.display import Image
from IPython.core.display import HTML 

%matplotlib inline

311 Data Exploration

Reading Standardized Data

Data Table: (Rows, Columns)

In [34]:
df=pd.read_csv('311_geo_report.csv',parse_dates=['requested_datetime','updated_datetime','closed_date'])
df.shape
Out[34]:
(47834, 14)

Sample Data

In [35]:
df.head()
Out[35]:
service_request_id service_name requested_datetime source description status_description updated_datetime service_subtype neighborhood_district closed_date location address lat long
0 400578 Trash questions 2015-07-02 12:17:00 33891 DPW Sanitation Closed 2017-06-26 20:17:00 information calls Ward 7 2017-06-26 20:17:00 80 Electric Ave\nSomerville, MA\n(42.403581, -... 80 Electric Ave 42.403581 -71.125199
1 400606 Request tree on public property 2015-07-02 12:59:00 43103 OSPCD Closed 2017-06-08 12:35:00 Service Requests Ward 5 2017-06-08 12:35:00 32 Robinson St\nSomerville, MA\n(42.3928953, -... 32 Robinson St 42.392895 -71.101951
2 401540 Temporary no parking sign posting 2015-07-07 10:08:00 43542 Traffic and Parking Closed 2017-06-29 15:58:00 internally generated Ward 6 2017-06-29 15:58:00 5 Windsor Rd\nSomerville, MA\n(42.3930438, -71... 5 Windsor Rd 42.393044 -71.115367
3 402710 Request tree on public property 2015-07-10 11:12:00 27228 OSPCD Closed 2016-02-23 10:08:00 Service Requests Ward 3 2016-02-23 10:08:00 56 Lowell St\nSomerville, MA\n(42.3863771, -71... 56 Lowell St 42.386377 -71.110644
4 403238 Arborist and tree maintenance 2015-07-13 15:20:00 31420 DPW-Highway Closed 2017-05-09 14:03:00 Service Requests Ward 7 2017-05-09 14:03:00 17 Sunset Rd\nSomerville, MA\n(42.4087858, -71... 17 Sunset Rd 42.408786 -71.124307

Number of requests per year

In [36]:
df['count']=1
df['year']=df.requested_datetime.dt.year
df[['year','count']].groupby('year').count().plot(kind='bar')
Out[36]:
<matplotlib.axes._subplots.AxesSubplot at 0x11af7d310>

Limiting data to 2017

Number of requests per month (2017)

In [39]:
df=df[df['year']==2017]
df['month']=df.requested_datetime.dt.month
df[['month','count']].groupby('month').count().plot(kind='bar')
Out[39]:
<matplotlib.axes._subplots.AxesSubplot at 0x11a8caf90>

Limiting data to Feb 2017 to June 2017

Total number of requests from Feb to June (2017)

In [40]:
df=df[df.month>2]
df.shape[0]
Out[40]:
46389

Most frequent request types (top 10)

In [41]:
top_services=df.service_name.value_counts().head(10)
top_services.plot(kind='barh',figsize=(10,5))
Out[41]:
<matplotlib.axes._subplots.AxesSubplot at 0x1133da350>

Total Number of frequent requests (top 10) from Feb to June (2017)

In [44]:
df=df[df.service_name.isin(top_services.keys())]
df.shape[0]
Out[44]:
17269

Request types by month (March 2017 - July 2017)

In [45]:
dg=df[['service_name','month','count']].groupby(['month','service_name']).count().unstack()
dg.columns=dg.columns.droplevel()
dg=dg[list(top_services.keys())]
dg.plot(figsize=(10,7),kind='bar',stacked=True).legend(loc='center left', bbox_to_anchor=(1, 0.5))
Out[45]:
<matplotlib.legend.Legend at 0x11a922550>

Request types by hours

In [46]:
df['hour']=df.requested_datetime.dt.hour
In [47]:
dg=df[['service_name','hour','count']].groupby(['hour','service_name']).count().unstack()
dg.columns=dg.columns.droplevel()
dg=dg[list(top_services.keys())]
dg.plot(figsize=(17,10),kind='bar',stacked=True)
Out[47]:
<matplotlib.axes._subplots.AxesSubplot at 0x118c108d0>

Calculating response times

In [51]:
df['response_time']=df.closed_date-df.requested_datetime
df['response_days']=df.response_time.dt.days
In [52]:
df=df[df.response_time.dt.seconds>0]
In [53]:
dg=df[['service_request_id','service_name','response_days']].pivot(index='service_request_id',columns='service_name')
dg.columns=dg.columns.droplevel(0)

Response times by types (in days)

In [54]:
l=list(dg.median().sort_values(ascending=False).index)
l=[s for s in l if s not in ['CS-Lost call (wrong #, hang up, dead air)',
                             'Ticket info. (balance, how many, citation #, etc.)',
                             'Welcome desk information',
                             'ISD-Buildings'
                          ]]
dg[l].boxplot(figsize=(15,5),vert=False,grid=False,showfliers=False,return_type='axes')
Out[54]:
<matplotlib.axes._subplots.AxesSubplot at 0x11aa458d0>

* boxplot guide

In [55]:
Image(url= "boxplot.png", width=400, height=400)
Out[55]: