A Developing Blog

Python

Pandas

I keep doing these simple things with data, over and over,
summarized, group, time series in Pandas…


#!/usr/bin/env python

#!/usr/bin/env python

import pandas as pd
import datetime
import numpy as np

def getTZ():
File='allEvents.csv'
f=pd.read_csv(File,header=None,names=['lat', 'lng','desc','zip','title','timeStamp'],
dtype={'lat':str,'lng':str,'desc':str,'zip':str,'title':str,'timeStamp':datetime.datetime})
tz=pd.DataFrame(f)
return tz

tz=getTZ()
tz.index = pd.DatetimeIndex(tz.timeStamp)
def f(x):
x=[i.strip()
for i in x[2].split(';')]
return x[1]

tz[
'twp']=tz.apply(f, axis=1)
tz[
'e'] = pd.Series(1, index=tz.index)
#here

g = tz.groupby([
'title','twp'])
k=g[
'e'].resample('300T', how=[np.sum,np.mean,np.median, len])
k.to_csv(
'TitleTmp.csv')
tz.to_csv(
'tz.csv', index = False)



Comments