A Developing Blog

Feb 2016

Code

Code…where the heck am I keeping my code notes?

here
Comments

Charts


Comments

More Montco

Seems like all I do is work on this site….okay, there are a lot of cool features for analyzing this data. But, it's a lot of time spent on the icons, etc.

Comments

Tribute

Nice tribute by Brian Mulcahy, to a friend that passed away…

Screenshot 2016-02-15 13.07.02

Comments

Vomit -- more pandas

I spent the day working on charting auto accidents and calls to 911 for vomiting. Ugh, seems like unless amount of time trying to get the format correct..



Screenshot 2016-02-14 14.03.29





#!/usr/bin/env python
"""
import matplotlib
matplotlib.use('Agg')



"""

import requests
import pandas as pd
import matplotlib
matplotlib.use(
'Agg')
import matplotlib.pyplot as plt
import io
import numpy as np
import datetime

# Read in the data
url=
"https://storage.googleapis.com/montco-stats/tz.csv"
d=requests.get(url).content
d=pd.read_csv(io.StringIO(d.decode(
'utf-8')))
d=pd.DataFrame(d)

# Set index
d.index = pd.DatetimeIndex(d.timeStamp)



ax = plt.subplot(
111)
ax.spines[
"top"].set_visible(False)
ax.spines[
"bottom"].set_visible(False)
ax.spines[
"right"].set_visible(False)
ax.spines[
"left"].set_visible(False)



# Ensure that the axis ticks only show up on the bottom and left of the plot.
# Ticks on the right and top of the plot are generally unnecessary chartjunk.
ax.get_xaxis().tick_bottom()
ax.get_yaxis().tick_left()
ax.spines[
"top"].set_visible(False)
ax.spines[
"bottom"].set_visible(False)
ax.spines[
"right"].set_visible(False)
ax.spines[
"left"].set_visible(False)
ax.get_xaxis().tick_bottom()
ax.get_yaxis().tick_left()
plt.xticks(fontsize=
8)
plt.yticks(fontsize=
7)



def createPivot(hr=24):
d.timeStamp=pd.DatetimeIndex(d.timeStamp)
t24=datetime.datetime.now() - datetime.timedelta(hours=hr)
tz=d[(d.timeStamp >= t24)]
g = tz.groupby([
'title']).agg({'e':sum})
h=g.sort_values(by=
'e',ascending=False).head(2)['e'].to_dict()
tz=tz[tz[
'title'].isin(h.keys())]
p=pd.pivot_table(tz, values=
'e', index=['timeStamp'], columns=['title'], aggfunc=np.sum)
p.fillna(
0, inplace=True)
j=p.resample(
'24H',how='sum')
j.fillna(
0, inplace=True)
# j.index=j.index-pd.offsets.Hour(j.index.min().hour) - pd.offsets.Minute(j.index.min().minute) -pd.offsets.Second(j.index.min().second)
# j.to_csv(file,index=True,header=True)
j.to_csv(
'j.csv',index=True,header=True)
jj=pd.read_csv(
"./j.csv")
return (jj,j)

(j,gj)=createPivot(hr=
1000)

# These are the "Tableau 20" colors as RGB.
tableau20 = [(
31, 119, 180), (174, 199, 232), (255, 127, 14), (255, 187, 120),
(
44, 160, 44), (152, 223, 138), (214, 39, 40), (255, 152, 150),
(
148, 103, 189), (197, 176, 213), (140, 86, 75), (196, 156, 148),
(
227, 119, 194), (247, 182, 210), (127, 127, 127), (199, 199, 199),
(
188, 189, 34), (219, 219, 141), (23, 190, 207), (158, 218, 229)]
tableau20 = [(
0, 255, 0), (174, 199, 232), (255, 127, 14), (255, 187, 120),
(
44, 160, 44), (152, 223, 138), (214, 39, 40), (255, 152, 150),
(
148, 103, 189), (197, 176, 213), (140, 86, 75), (196, 156, 148),
(
227, 119, 194), (247, 182, 210), (127, 127, 127), (199, 199, 199),
(
188, 189, 34), (219, 219, 141), (23, 190, 207), (158, 218, 229)]

# Scale the RGB values to the [0, 1] range, which is the format matplotlib accepts.
for i in range(len(tableau20)):
r, g, b = tableau20[i]
tableau20[i] = (r /
255., g / 255., b / 255.)

def fixTime(x):
try:
return datetime.datetime.strptime(x,'%Y-%m-%d %H:%M:%S')
except:
return datetime.datetime.strptime(x,'%Y-%m-%d')
# return int(datetime.datetime.strptime(x,'%Y-%m-%d %H:%M:%S').hour)



tics=[
'ro', 'bs', 'g^','ro','bo','yo','r^','b--','g--']


timeHrs=[fixTime(x)
for x in j.timeStamp.values]
rank=
0
for col in list(j.columns)[1::]:
plt.plot(timeHrs,
j[col].values,tics[rank],
lw=
0.9, color=tableau20[rank])
rank+=
1
rank=
0
for col in list(j.columns)[1::]:
plt.plot(timeHrs,
j[col].values,
lw=
0.9, color=tableau20[rank])
rank+=
1
#plt.savefig("junk.svg",format="svg", bbox_inches="tight")
plt.savefig(
"junk.png",format="png", bbox_inches="tight")



The plan was to get something anyone could download from Cloud9 and start analyzing the data.


#!/usr/bin/env python
"""
src: https://github.com/mchirico/montcoalert/raw/master/src/python/vomit.py
You can run this on cloud9
wget https://github.com/mchirico/montcoalert/raw/master/src/python/vomit.py

Install Anaconda:

Step 1:

Get the latest version of Anaconda2
$ wget https://repo.continuum.io/archive/Anaconda2-2.5.0-Linux-x86_64.sh
$ bash ./wget https://repo.continuum.io/archive/Anaconda2-2.5.0-Linux-x86_64.sh


"""


import requests
import pandas as pd
import matplotlib.pyplot as plt
import io
import numpy as np

# Read in the data
url=
"https://storage.googleapis.com/montco-stats/tz.csv"
d=requests.get(url).content
d=pd.read_csv(io.StringIO(d.decode(
'utf-8')))
d=pd.DataFrame(d)

# Set index
d.index = pd.DatetimeIndex(d.timeStamp)

# Take a look at just 'EMS: NAUSEA/VOMITING' for this year
# set to temporary variable tz
tz=d[(d.title ==
'EMS: NAUSEA/VOMITING') & (d.timeStamp >= '2016-01-01 00:00:00')]

print tz.title.count() # prints 187 currently

# Group by just title
g = d.groupby([
'title'])

# Look at the data every 100 hours. Note 60T = 1hr, 60*100=6000
kt100=g[
'e'].resample('6000T', how=[np.sum,np.mean,np.median, len])
kt100.fillna(
0, inplace=True)


# Write it out to .csv
vomit=kt100.ix[
'EMS: NAUSEA/VOMITING']['sum']
vomit.to_csv(
'vomit100hr.csv',index=True,header=True)

"""
This is what we see in vomit100hr.csv
...
2016-01-29 00:00:00,23.0
2016-02-02 04:00:00,15.0
2016-02-06 08:00:00,26.0
2016-02-10 12:00:00,17.0

Note that the last entry will almost always be a lower
sum, since it doesn't contain the full 100hrs.

"""


# Let's look at every 50 hours... shorten the time
kt50=g[
'e'].resample('3000T', how=[np.sum,np.mean,np.median, len])
kt50.fillna(
0, inplace=True)


# Write it out to .csv
vomit50=kt50.ix[
'EMS: NAUSEA/VOMITING']['sum']
vomit50.to_csv(
'vomit50hr.csv',index=True,header=True)
print vomit50.tail()
print "\n\nStats on 50hr"
print "Max: %d Mean:% 6.2f Median:% 6.2f" % (vomit50.max(),vomit50.mean(),vomit50.median())
s=
"Quantiles: 25%, 50%, 75%, 90% 100%\n"
s+=
" % 6.2f % 6.2f % 6.2f % 6.2f % 6.2f" % (vomit50.quantile(0.25),
vomit50.quantile(
0.50),
vomit50.quantile(
0.75),
vomit50.quantile(
0.9),
vomit50.quantile(
1))
print s

# If you want to see the display
# vomit50.plot()
# plt.savefig('vomit50.png', bbox_inches='tight')

"""
Now we get the following
...
2016-02-04 06:00:00,7.0
2016-02-06 08:00:00,6.0
2016-02-08 10:00:00,20.0 <---- A slight spike
2016-02-10 12:00:00,13.0
2016-02-12 14:00:00,4.0

Stats on 50hr
Max: 20 Mean: 8.31 Median: 7.00
Quantiles: 25%, 50%, 75%, 90%
6.00 7.00 10.00 13.00


"""


# Not using this
"""
# Group by title and township
g = d.groupby(['title','twp'])

# Look at the data every 100 hours. Note 60T = 1hr, 60*100=6000
k100=g['e'].resample('6000T', how=[np.sum,np.mean,np.median, len])
k100.fillna(0, inplace=True)
"""




# Create pivot table
# Start with the group we want
# You need this for pivots
d.timeStamp=pd.DatetimeIndex(d.timeStamp)

def createPivot(title='EMS: NAUSEA/VOMITING',hr='72H'):
d.timeStamp=pd.DatetimeIndex(d.timeStamp)
tz=d[(d.title == title)]
tz.index=pd.DatetimeIndex(tz.timeStamp)
# tz[(tz.title=='EMS: ABDOMINAL PAINS') & (tz.twp=='CHELTENHAM')]
p=pd.pivot_table(tz, values=
'e', index=['timeStamp'], columns=['twp'], aggfunc=np.sum)
# Make sure you don't do this
#j=p.resample('4D',how='sum', fill_method='pad')
j=p.resample(hr,how=
'sum')
j.fillna(
0, inplace=True)
s=title.replace(
' ','_').replace('/','_').replace(' ','_').replace(':','_')
file=
"pivot%s.csv" % (s)
j.index=j.index-pd.offsets.Hour(j.index.min().hour) - pd.offsets.Minute(j.index.min().minute) -pd.offsets.Second(j.index.min().second)
j.to_csv(file,index=True,header=True)

createPivot(title=
'EMS: NAUSEA/VOMITING',hr='100H')
createPivot(title=
'EMS: ABDOMINAL PAINS',hr='100H')



Comments

Machine Learning Courses


Recommended reading list:
https://redd.it/1jeawf



Screenshot 2016-02-08 19.52.09





https://www.coursera.org/learn/practical-machine-learning/lecture/HZKcr/cross-validation


http://golang.org/s/oracle-user-manual


Comments