Source code for vtools.functions.climatology

#!/usr/bin/env python
# -*- coding: utf-8 -*-

import numpy as np
import pandas as pd

__all__ = ['climatology','apply_climatology','climatology_quantiles']


[docs]def climatology(ts,freq,nsmooth=None):
    """" Create a climatology on the columns of ts

        Parameters
        ----------
        
        ts: DataFrame or Series
        DataStructure to be analyzed. Must have a length of at least 2*freq

        freq: period ["day","month"]
        Period over which the climatology is analyzed

        nsmooth: int
           window size (number of values) of pre-smoothing. This may not make sense for series that are not approximately regular. An odd number is usually best.

        Returns:
           out: DataFrame or Series
           Data structure of the same type as ts, with Integer index representing month (Jan=1) or day of year (1:365).
           
    """
    if nsmooth is None:
        ts_mean = ts
    else:        
        ts_mean=ts.rolling(nsmooth,center=True,min_periods=nsmooth//2).mean() # moving average
    
    by=[ts_mean.index.month, ts_mean.index.day]
    if(freq=="month"):
        by=[ts_mean.index.month]
    elif not(freq=="day"):
        raise ValueError("invalid frequency, must be 'month' or 'day'") 
        
    
    mean_data=[]
    mean_data_size=[]
    for name, group in ts_mean.groupby(by): 
        if (len(by)==2):
            (mo, dy) = name
            if not((mo==2) and (dy==29)):
                mean_data.append(group.mean(axis=0))
                mean_data_size.append(group.count())
        else:
             mean_data.append(group.mean(axis=0))
             mean_data_size.append(group.count())
             
    climatology_data=pd.concat(mean_data,axis=1).transpose()
    indexvals = list(range(1,13) if freq == "month" else range(1,len(climatology_data)+1))

    climatology_data.index = list(indexvals)
    climatology_data.index.name = "month" if freq=="month" else "dayofyear"
    return climatology_data
            

def climatology_quantiles(ts,min_day_year,max_day_year,
                          window_width,
                          quantiles=[0.05,0.25,0.5,0.75,0.95]):
    """" Create windowed quantiles across years on a time series

        Parameters
        ----------
        
        ts: DataFrame or Series
        DataStructure to be analyzed. 
        
        min_day_year: int
        Minimum Julian day to be considered

        freq: period ["day","month"]
        Maximum Julian day to be considered 
  
        window_width: int
        Number of days to include, including the central day and days on each side. So for instance window_width=15 would span the central date and 7 days on each side

        quantiles: array-like
           quantiles requested

        Returns:
           out: DataFrame or Series
           Data structure with Julian day as the index and quantiles as columns.
           
    """

    if (min_day_year < window_width) or (max_day_year > (365-window_width)): 
        raise NotImplementedError("Time brackets that cross January 1 not implemented yet")
    if window_width % 2 == 0: 
        raise ValueError("window_width must be odd")
    window_half = (window_width - 1)/2
    day_year = ts.index.dayofyear
    nquant = len(quantiles)
    clim = pd.DataFrame(columns=quantiles,index=range(min_day_year,max_day_year))
    
    for imid in range(min_day_year,max_day_year):
        iend = imid + window_half + 1 # The plus one centers the estimate, equal on each side plus one for center
        istart = imid - window_half
        usets = ts[(day_year > istart) & (day_year < iend)]
        qs = usets.quantile(quantiles)        
        clim.loc[imid,:] = qs.values.flatten()
    clim.index.name='day_of_year'
    return clim

if __name__=='__main__':
    import matplotlib.pyplot as plt
    from vtools.datastore.read_ts import read_ts
    fname = "//cnrastore-bdo/Modeling_Data/continuous_station_repo/raw/des_twi_405_turbidity_*.csv"
    fname = "//cnrastore-bdo/Modeling_Data/continuous_station_repo/raw/usgs_lib*turbidity*.rdb"
    selector = "16127_63680"
    ts = read_ts(fname,selector=selector)
    window = 19 # central day plus 9 on each side
    clim=climatology_quantiles(ts,182,305,window)
    clim.plot()
    plt.show()


def apply_climatology(climate, index):

    """ Apply daily or monthly climatology to a new index 

    Parameters
    ----------

    climate: DataFrame with integer index representing month of year (Jan=1) or day of year. Must be of size 12 365,366. Day 366 will be inferred from day 365 value

    index: DatetimeIndex representing locations to be inferred

    Returns
    -------
    DataFrame or Series as given by climate with values extracted from climatology for the month or day   
    """
    import numpy as np
    if len(climate) not in [12,365,366]:
        raise ValueError("Length of climatology must be 12,365 or 366")
    if len(climate) == 365:
        climate.loc[366,:] = climate.loc[365,:]
     
    freq = "month" if len(climate) == 12 else "day"
    
    
    df = pd.DataFrame(index = index,data=np.zeros(len(index),dtype='d'))
    def rowFunc1(row):
        return climate.loc[row.name.dayofyear,:]
    def rowFunc2(row):
        return climate.loc[row.name.month,:]
    
    if freq == "day":
        out=df.apply(rowFunc1,axis=1)
    else:
        out=df.apply(rowFunc2,axis=1)
               
    
    return out
Navigation

Related Topics

Source code for vtools.functions.climatology