Source code for vtools.functions.climatology

#!/usr/bin/env python
# -*- coding: utf-8 -*-

import numpy as np
import pandas as pd


__all__ = ["climatology", "apply_climatology", "climatology_quantiles"]


[docs] def climatology(ts, freq, nsmooth=None): """ " Create a climatology on the columns of ts Parameters ---------- ts: DataFrame or Series DataStructure to be analyzed. Must have a length of at least 2*freq freq: period ["day","month"] Period over which the climatology is analyzed nsmooth: int window size (number of values) of pre-smoothing. This may not make sense for series that are not approximately regular. An odd number is usually best. Returns: out: DataFrame or Series Data structure of the same type as ts, with Integer index representing month (Jan=1) or day of year (1:365). """ if nsmooth is None: ts_mean = ts else: ts_mean = ts.rolling( nsmooth, center=True, min_periods=nsmooth // 2 ).mean() # moving average by = [ts_mean.index.month, ts_mean.index.day] if freq == "month": by = [ts_mean.index.month] elif not (freq == "day"): raise ValueError("invalid frequency, must be 'month' or 'day'") mean_data = [] mean_data_size = [] for name, group in ts_mean.groupby(by): if len(by) == 2: (mo, dy) = name if not ((mo == 2) and (dy == 29)): mean_data.append(group.mean(axis=0)) mean_data_size.append(group.count()) else: mean_data.append(group.mean(axis=0)) mean_data_size.append(group.count()) climatology_data = pd.concat(mean_data, axis=1).transpose() indexvals = list( range(1, 13) if freq == "month" else range(1, len(climatology_data) + 1) ) climatology_data.index = list(indexvals) climatology_data.index.name = "month" if freq == "month" else "dayofyear" return climatology_data
[docs] def climatology_quantiles( ts, min_day_year, max_day_year, window_width, quantiles=[0.05, 0.25, 0.5, 0.75, 0.95], ): """ " Create windowed quantiles across years on a time series Parameters ---------- ts: DataFrame or Series DataStructure to be analyzed. min_day_year: int Minimum Julian day to be considered freq: period ["day","month"] Maximum Julian day to be considered window_width: int Number of days to include, including the central day and days on each side. So for instance window_width=15 would span the central date and 7 days on each side quantiles: array-like quantiles requested Returns: out: DataFrame or Series Data structure with Julian day as the index and quantiles as columns. """ if (min_day_year < window_width) or (max_day_year > (365 - window_width)): raise NotImplementedError( "Time brackets that cross January 1 not implemented yet" ) if window_width % 2 == 0: raise ValueError("window_width must be odd") window_half = (window_width - 1) / 2 day_year = ts.index.dayofyear nquant = len(quantiles) clim = pd.DataFrame(columns=quantiles, index=range(min_day_year, max_day_year)) for imid in range(min_day_year, max_day_year): iend = ( imid + window_half + 1 ) # The plus one centers the estimate, equal on each side plus one for center istart = imid - window_half usets = ts[(day_year > istart) & (day_year < iend)] qs = usets.quantile(quantiles) clim.loc[imid, :] = qs.values.flatten() clim.index.name = "day_of_year" return clim
if __name__ == "__main__": import matplotlib.pyplot as plt from dms_datastore.read_ts import read_ts fname = "//cnrastore-bdo/Modeling_Data/continuous_station_repo/raw/des_twi_405_turbidity_*.csv" fname = "//cnrastore-bdo/Modeling_Data/continuous_station_repo/raw/usgs_lib*turbidity*.rdb" selector = "16127_63680" ts = read_ts(fname, selector=selector) window = 19 # central day plus 9 on each side clim = climatology_quantiles(ts, 182, 305, window) clim.plot() plt.show()
[docs] def apply_climatology(climate, index): """Apply daily or monthly climatology to a new index Parameters ---------- climate: DataFrame with integer index representing month of year (Jan=1) or day of year. Must be of size 12 365,366. Day 366 will be inferred from day 365 value index: DatetimeIndex representing locations to be inferred Returns ------- DataFrame or Series as given by climate with values extracted from climatology for the month or day """ if len(climate) not in [12, 365, 366]: raise ValueError("Length of climatology must be 12,365 or 366") if len(climate) == 365: climate.loc[366, :] = climate.loc[365, :] freq = "month" if len(climate) == 12 else "day" # Vectorized lookup if freq == "day": dayofyear = index.dayofyear # Ensure day 366 is handled dayofyear = np.where(dayofyear > 365, 366, dayofyear) out = climate.loc[dayofyear].set_index(index) else: month = index.month out = climate.loc[month].set_index(index) return out