#!/usr/bin/env python
# -*- coding: utf-8 -*-
import numpy as np
import pandas as pd
__all__ = ['climatology','apply_climatology','climatology_quantiles']
[docs]def climatology(ts,freq,nsmooth=None):
"""" Create a climatology on the columns of ts
Parameters
----------
ts: DataFrame or Series
DataStructure to be analyzed. Must have a length of at least 2*freq
freq: period ["day","month"]
Period over which the climatology is analyzed
nsmooth: int
window size (number of values) of pre-smoothing. This may not make sense for series that are not approximately regular. An odd number is usually best.
Returns:
out: DataFrame or Series
Data structure of the same type as ts, with Integer index representing month (Jan=1) or day of year (1:365).
"""
if nsmooth is None:
ts_mean = ts
else:
ts_mean=ts.rolling(nsmooth,center=True,min_periods=nsmooth//2).mean() # moving average
by=[ts_mean.index.month, ts_mean.index.day]
if(freq=="month"):
by=[ts_mean.index.month]
elif not(freq=="day"):
raise ValueError("invalid frequency, must be 'month' or 'day'")
mean_data=[]
mean_data_size=[]
for name, group in ts_mean.groupby(by):
if (len(by)==2):
(mo, dy) = name
if not((mo==2) and (dy==29)):
mean_data.append(group.mean(axis=0))
mean_data_size.append(group.count())
else:
mean_data.append(group.mean(axis=0))
mean_data_size.append(group.count())
climatology_data=pd.concat(mean_data,axis=1).transpose()
indexvals = list(range(1,13) if freq == "month" else range(1,len(climatology_data)+1))
climatology_data.index = list(indexvals)
climatology_data.index.name = "month" if freq=="month" else "dayofyear"
return climatology_data
def climatology_quantiles(ts,min_day_year,max_day_year,
window_width,
quantiles=[0.05,0.25,0.5,0.75,0.95]):
"""" Create windowed quantiles across years on a time series
Parameters
----------
ts: DataFrame or Series
DataStructure to be analyzed.
min_day_year: int
Minimum Julian day to be considered
freq: period ["day","month"]
Maximum Julian day to be considered
window_width: int
Number of days to include, including the central day and days on each side. So for instance window_width=15 would span the central date and 7 days on each side
quantiles: array-like
quantiles requested
Returns:
out: DataFrame or Series
Data structure with Julian day as the index and quantiles as columns.
"""
if (min_day_year < window_width) or (max_day_year > (365-window_width)):
raise NotImplementedError("Time brackets that cross January 1 not implemented yet")
if window_width % 2 == 0:
raise ValueError("window_width must be odd")
window_half = (window_width - 1)/2
day_year = ts.index.dayofyear
nquant = len(quantiles)
clim = pd.DataFrame(columns=quantiles,index=range(min_day_year,max_day_year))
for imid in range(min_day_year,max_day_year):
iend = imid + window_half + 1 # The plus one centers the estimate, equal on each side plus one for center
istart = imid - window_half
usets = ts[(day_year > istart) & (day_year < iend)]
qs = usets.quantile(quantiles)
clim.loc[imid,:] = qs.values.flatten()
clim.index.name='day_of_year'
return clim
if __name__=='__main__':
import matplotlib.pyplot as plt
from vtools.datastore.read_ts import read_ts
fname = "//cnrastore-bdo/Modeling_Data/continuous_station_repo/raw/des_twi_405_turbidity_*.csv"
fname = "//cnrastore-bdo/Modeling_Data/continuous_station_repo/raw/usgs_lib*turbidity*.rdb"
selector = "16127_63680"
ts = read_ts(fname,selector=selector)
window = 19 # central day plus 9 on each side
clim=climatology_quantiles(ts,182,305,window)
clim.plot()
plt.show()
def apply_climatology(climate, index):
""" Apply daily or monthly climatology to a new index
Parameters
----------
climate: DataFrame with integer index representing month of year (Jan=1) or day of year. Must be of size 12 365,366. Day 366 will be inferred from day 365 value
index: DatetimeIndex representing locations to be inferred
Returns
-------
DataFrame or Series as given by climate with values extracted from climatology for the month or day
"""
import numpy as np
if len(climate) not in [12,365,366]:
raise ValueError("Length of climatology must be 12,365 or 366")
if len(climate) == 365:
climate.loc[366,:] = climate.loc[365,:]
freq = "month" if len(climate) == 12 else "day"
df = pd.DataFrame(index = index,data=np.zeros(len(index),dtype='d'))
def rowFunc1(row):
return climate.loc[row.name.dayofyear,:]
def rowFunc2(row):
return climate.loc[row.name.month,:]
if freq == "day":
out=df.apply(rowFunc1,axis=1)
else:
out=df.apply(rowFunc2,axis=1)
return out