#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""Time series module
Helpers for creating regular and irregular time series, transforming irregular to regular
and analyzing gaps.
"""
import sys
import numpy as np
import pandas as pd
import datetime as _datetime
from typing import Union, Sequence, Mapping, Callable
from numbers import Number as numtype
from vtools.data.vtime import *
__all__ = [
"to_dataframe",
"rename_columns",
"time_overlap",
"rts",
"rts_formula",
"extrapolate_ts",
"datetime_elapsed",
"elapsed_datetime",
"to_dataframe",
"is_regular",
]
[docs]
def to_dataframe(ts):
if isinstance(ts, pd.DataFrame):
return ts
else:
return ts.to_frame()
[docs]
def rename_columns(
ts: Union[pd.Series, pd.DataFrame],
colnames: Union[str, Sequence[str], Mapping[str, str], Callable[[str], str]],
convert_df: bool = True
) -> Union[pd.Series, pd.DataFrame]:
"""
Rename columns (for DataFrame) or the name (for Series).
Parameters
----------
ts
pandas Series or DataFrame to rename.
colnames
- str
* Series: set `Series.name` to this value.
* DataFrame: treated as a single target name; the DataFrame must have exactly
one column. Raises if there are multiple columns.
- Sequence[str]
* Series: must be length 1; that single value becomes `Series.name`.
* DataFrame: length must equal the number of columns; these become the new
column names in order.
- Mapping[str, str]
* DataFrame: passed to `DataFrame.rename(columns=...)`.
* Series: if the current `Series.name` is a key, it is mapped to the value.
If not present, the name is left unchanged. Mapping with `None` can be
used to rename a nameless series.
- Callable[[str], str]
* DataFrame: applied to each column name (via `rename`).
* Series: called with the current `Series.name` to compute the new name.
convert_df : bool, default True
If True and `ts` is a Series, convert it to a single-column DataFrame before renaming.
Returns
-------
pandas Series or DataFrame
A copy of `ts` with updated name(s).
Raises
------
TypeError
If `ts` is not a Series or DataFrame, or `colnames` has an unsupported type.
ValueError
If a provided list length does not match the number of columns (DataFrame) or
is not exactly 1 (Series), or if a single string is given for a multi-column
DataFrame.
"""
if isinstance(ts,pd.Series) and convert_df:
ts = ts.to_frame()
if isinstance(ts, pd.DataFrame):
# Disallow MultiIndex columns in this helper
if isinstance(ts.columns, pd.MultiIndex):
raise TypeError("rename_columns does not support MultiIndex columns")
if isinstance(colnames, str):
if ts.shape[1] != 1:
raise ValueError(
"rename_columns: single string provided for a DataFrame with "
f"{ts.shape[1]} columns; provide a list of names or a mapper."
)
out = ts.copy()
out.columns = [colnames]
return out
if isinstance(colnames, Sequence) and not isinstance(colnames, (str, bytes)):
colnames = list(colnames)
if len(colnames) != ts.shape[1]:
raise ValueError(
f"rename_columns: list length ({len(colnames)}) must match DataFrame columns ({ts.shape[1]})"
)
out = ts.copy()
out.columns = colnames
return out
if callable(colnames) or isinstance(colnames, Mapping):
return ts.rename(columns=colnames)
raise TypeError("rename_columns: 'colnames' must be str, list/sequence, dict/mapping, or callable for DataFrame")
elif isinstance(ts, pd.Series):
out = ts.copy()
if isinstance(colnames, str):
out.name = colnames
return out
if isinstance(colnames, Sequence) and not isinstance(colnames, (str, bytes)):
colnames = list(colnames)
if len(colnames) != 1:
raise ValueError(
f"rename_columns: list length for Series must be 1, got {len(colnames)}"
)
out.name = colnames[0]
return out
if isinstance(colnames, Mapping):
key = out.name
if key in colnames:
out.name = colnames[key]
elif key is None and None in colnames:
out.name = colnames[None]
return out
if callable(colnames):
out.name = colnames(out.name)
return out
raise TypeError("rename_columns: 'colnames' must be str, list/sequence, dict/mapping, or callable for Series")
else:
raise TypeError("rename_columns: expected a pandas Series or DataFrame")
[docs]
def time_overlap(ts0, ts1, valid=True):
"""Check for overlapping time coverage between series
Returns a tuple of start and end of overlapping periods. Only considers
the time stamps of the start/end, possibly ignoring NaNs at the beginning
if valid=True, does not check for actual time stamp alignment
"""
if valid:
start = max(ts0.first_valid_index(), ts1.first_valid_index())
end = min(ts0.last_valid_index(), ts1.last_valid_index())
else:
start = max(ts0.index[0], ts1.index[0])
end = min(ts0.index[-1], ts1.index[-1])
return (start, end) if end > start else None
[docs]
def rts(data, start, freq, columns=None, props=None):
"""Create a regular or calendar time series from data and time parameters
Parameters
----------
data : array_like
Should be a array/list of values. There is no restriction on data
type, but not all functionality like addition or interpolation will work on all data.
start : :class:`Pandas.Timestamp`
Timestamp or a string or type that can be coerced to one.
interval : _time_interval
Can also be a string representing a pandas `freq`.
Returns
-------
result : :class:`Pandas.DataFrame`
A regular time series with the `freq` attribute set
"""
if type(data) == list:
data = np.array(data)
if not props is None:
raise NotImplementedError(
"Props reserved for future implementation using xarray"
)
tslen = data.shape[0]
ndx = pd.date_range(start, freq=freq, periods=tslen)
ts = pd.DataFrame(data, index=ndx, columns=columns)
return ts
[docs]
def datetime_elapsed(index_or_ts, reftime=None, dtype="d", inplace=False):
"""Convert a time series or DatetimeIndex to an integer/double series of elapsed time
Parameters
----------
index_or_ts : :class:`DatatimeIndex <pandas:pandas.DatetimeIndex> or :class:`DataFrame <pandas:pandas.DataFrame>`
Time series or index to be transformed
reftime : :class:`DatatimeIndex <pandas:pandas.Timestamp>` or something convertible
The reference time upon which elapsed time is measured. Default of None means start of
series
dtype : str like 'i' or 'd' or type like `int` (`Int64`) or `float` (`Float64`)
Data type for output, which starts out as a Float64 ('d') and gets converted, typically to Int64 ('i')
inplace : `bool`
If input is a data frame, replaces the index in-place with no copy
Returns
-------
result :
A new index using elapsed time from `reftime` as its value and of type `dtype`
"""
try:
ndx = index_or_ts.index
input_index = False
except AttributeError as e:
ndx = index_or_ts
input_index = True
if reftime is None:
ref = ndx[0]
else:
ref = pd.Timestamp(reftime)
elapsed = (ndx - ref).total_seconds().astype(dtype)
if input_index:
return elapsed
if inplace:
index_or_ts.index = elapsed
return index_or_ts
else:
result = index_or_ts.copy()
# Not sure of the merits of this relative to
# result.index = ["elapsed"] = elapsed; result.reindex(key = 'elapsed',drop=True)
result.index = elapsed
return result
[docs]
def elapsed_datetime(index_or_ts, reftime=None, time_unit="s", inplace=False):
"""Convert a time series or numerical Index to a Datetime index or series
Parameters
----------
index_or_ts : :class:`DatatimeIndex <pandas:pandas.Int64Index> or float or TimedeltaIndex :class:`DataFrame <pandas:pandas.DataFrame>`
Time series or index to be transformed with index in elapsed seconds from `reftime`
reftime : :class:`DatatimeIndex <pandas:pandas.Timestamp>` or something convertible
The reference time upon which datetimes are to be evaluated.
inplace : `bool`
If input is a data frame, replaces the index in-place with no copy
Returns
-------
result :
A new index using DatetimeIndex inferred from elapsed time from `reftime` as its value and of type `dtype`
"""
try:
ndx = index_or_ts.index
input_index = False
except AttributeError as e:
ndx = index_or_ts
input_index = True
if isinstance(ndx, pd.TimedeltaIndex):
dtndx = reftime + ndx
else:
if time_unit.lower() == "h":
ndx = ndx * 3600.0
elif time_unit.lower() == "d":
ndx = ndx * 86400.0
elif time_unit.lower() == "s":
pass
else:
raise ValueError("time unit must be 's','h',or 'd'")
dtndx = reftime + pd.to_timedelta(ndx, unit="s")
if input_index:
return dtndx
if inplace:
index_or_ts.index = dtndx
return index_or_ts
else:
result = index_or_ts.copy()
# Not sure of the merits of this relative to
# result.index = ["elapsed"] = elapsed; result.reindex(key = 'elapsed',drop=True)
result.index = dtndx
return result
[docs]
def is_regular(ts, raise_exception=False):
"""
Check if a pandas DataFrame, Series, or xarray object with a time axis (axis 0)
has a regular time index.
Regular means:
- The index is unique.
- The index equals a date_range spanning from the first to the last value with
the inferred frequency.
Parameters:
ts : DataFrame, Series, or xarray object.
Series to evaluate
raise_exception : bool
If True, raises a ValueError when the index is not regular.
Otherwise, returns False.
Returns:
bool : True if the time index is regular; False otherwise.
"""
# Determine the index from the object
if hasattr(ts, "index"):
idx = ts.index
# For xarray objects, assume the first dimension is time.
elif hasattr(ts, "coords") and ts.dims:
time_dim = ts.dims[0]
# Try to convert coordinate to a pandas Index
coord = ts.coords[time_dim]
if hasattr(coord, "to_index"):
idx = coord.to_index()
else:
idx = pd.Index(coord.values)
else:
msg = "The provided object does not have an accessible time index."
if raise_exception:
raise ValueError(msg)
return False
# An empty or single-element index is considered regular.
if len(idx) == 0 or len(idx) == 1:
return True
# Check if the index has duplicate values.
if not idx.is_unique:
msg = "Index contains duplicate values."
if raise_exception:
raise ValueError(msg)
return False
# Ensure we are working with a DatetimeIndex. If not, attempt conversion.
if not isinstance(idx, pd.DatetimeIndex):
try:
idx = pd.to_datetime(idx)
except Exception as e:
msg = "Index could not be converted to datetime."
if raise_exception:
raise ValueError(msg) from e
return False
# Attempt to get the frequency. First check the .freq attribute.
freq = idx.freq
# If not set, try to infer it. This can often produce false negatives with messy data
# but will not fail in this case because every timestamp is checked
if freq is None:
freq = pd.infer_freq(idx)
if freq is None:
msg = "Could not infer a frequency from the index; it may not be regular."
if raise_exception:
raise ValueError(msg)
return False
# Build the expected index using the determined frequency.
expected_index = pd.date_range(start=idx[0], end=idx[-1], freq=freq)
if not expected_index.equals(idx):
msg = "Index is not regular based on the inferred frequency."
if raise_exception:
raise ValueError(msg)
return False
return True
[docs]
def example():
ndx = pd.date_range(pd.Timestamp(2017, 1, 1, 12), freq="15min", periods=10)
out = datetime_elapsed(ndx, dtype="i")
print(out)
print(type(out))
vals = np.arange(0.0, 10.0, dtype="d")
df = pd.DataFrame({"vals": vals}, index=ndx.copy())
ref = pd.Timestamp(2017, 1, 1, 11, 59)
df2 = datetime_elapsed(df, reftime=ref, dtype=int)
print(elapsed_datetime(df2, reftime=ref) - df)
if __name__ == "__main__":
example()