Source code for vtools.functions.skill_metrics

import vtools
from vtools.data.timeseries import *
from scipy.stats import trim_mean
import numpy as np



[docs]
def mse(predictions, targets):
    """Mean squared error

    Parameters
    ----------
    predictions, targets : array_like
        Time series or arrays to analyze

    Returns
    -------
    mse : vtools.data.timeseries.TimeSeries
        Mean squared error between predictions and targets
    """
    return ((predictions - targets) ** 2.0).mean()




[docs]
def rmse(predictions, targets):
    """Root mean squared error

    Parameters
    ----------
    predictions, targets : array_like
        Time series or arrays to analyze

    Returns
    -------
    mse : float
        Mean squared error
    """
    return ((predictions - targets) ** 2).mean() ** 0.5




[docs]
def median_error(predictions, targets):
    """Calculate the median error, discounting nan values

    Parameters
    ----------
    predictions, targets : array_like

        Time series or arrays to be analyzed

    Returns
    -------
    med : float
        Median error
    """
    return (predictions - targets).median()




[docs]
def mean_error(predictions, targets, proportiontocut):
    """Calculate the untrimmed mean error, discounting nan values

    Parameters
    ----------
    predictions, targets : array_like

        Time series or arrays to be analyzed

    Returns
    -------
    med : float
        Median error
    """
    trim = (predictions - targets).mean()
    return trim




[docs]
def skill_score(predictions, targets, ref=None):
    """Calculate a Nash-Sutcliffe-like skill score based on mean squared error

    As per the discussion in Murphy (1988) other reference forecasts (climatology,
    harmonic tide, etc.) are possible.

    Parameters
    ----------
    predictions, targets : array_like
        Time series or arrays to be analyzed

    Returns
    -------
    rmse : float
        Root mean squared error
    """
    if not ref:
        ref = targets.mean()

    return 1.0 - (mse(predictions, targets) / mse(ref, targets))




[docs]
def willmott_score(predictions, targets, ref=None):
    """Calculate a Nash-Sutcliffe-like skill score based on mean squared error

    As per the discussion in Murphy (1988) other reference forecasts (climatology,
    harmonic tide, etc.) are possible.

    Parameters
    ----------
    predictions, targets : array_like
        Time series or arrays to be analyzed

    Returns
    -------
    rmse : float
        Root mean squared error
    """
    if not ref:
        ref = targets.mean()
    n = len(predictions)
    a = mse(predictions, targets)
    b = mse(ref, targets)
    c = mse(ref, predictions)
    e = np.abs(mean_error(predictions, ref, None))
    f = np.abs(mean_error(targets, ref, None))

    score = 1.0 - a / (b + c + 2 * e * f * n)
    return score




[docs]
def tmean_error(predictions, targets, limits=None, inclusive=[True, True]):
    """Calculate the (possibly trimmed) mean error, discounting nan values

    Parameters
    ----------
    predictions, targets : array_like
        Time series or arrays to be analyzed

    limits : tuple(float)
        Low and high limits for trimming

    inclusive : [boolean, boolean]
        True if clipping is inclusive on the low/high end

    Returns
    -------
    mean : float
        Trimmed mean error
    """
    import scipy

    y = np.ma.masked_invalid(predictions)
    z = np.ma.masked_invalid(targets)
    return scipy.stats.mstats.tmean(y - z, limits)




[docs]
def corr_coefficient(predictions, targets, method="pearson"):
    """Calculates the correlation coefficient (the 'r' in '-squared' between two series.

    For time series where the targets are serially correlated and may span only a fraction
    of the natural variability, this statistic may not be appropriate and Murphy (1988) explains
    why caution should be exercised in using this statistic.

    Parameters
    ----------
    predictions, targets : array_like
        Time series to analyze

    method : pearson’, ‘kendall’, ‘spearman’
        Method compatilble with pandasa

    Returns
    -------
    r : float
        Correlation coefficient
    """

    return predictions.corr(targets, method)




[docs]
def _main():
    from statsmodels.tsa.arima_process import arma_generate_sample
    import matplotlib.pyplot as plt

    # from vtools.data.sample_series import arma
    start = pd.Timestamp(2009, 3, 12)
    intvl = minutes(15)
    lag = minutes(37)
    index = pd.date_range(start=start, freq=intvl, periods=4000)
    x = np.linspace(0, 500.0, 4000)
    arparams = np.array([0.975])
    maparams = np.array([0.5, 0.25, 0.25])
    ar = np.r_[1, -arparams]  # add zero-lag and negate
    ma = np.r_[1, maparams]  # add zero-lag
    noise = arma_generate_sample(ar, ma, nsample=4000)
    base = np.cos(x * 2.0 * np.pi / 12.4) + 0.4 * np.cos(x * 2.0 * np.pi / 24.0 - 1.2)
    scale = 1.30
    y0 = base + noise / 20.0
    y1 = scale * base + x / 400.0

    ts0 = pd.Series(y0, index=index)
    ts1 = pd.Series(y1, index=index)
    ts1 = ts1.shift(2)

    ts0.plot()
    ts1.plot()
    plt.show()

    # lag_sec = calculate_lag(ts0,ts1,minutes(60),res=seconds(1))
    # print("lag = {}".format((lag_sec/60.)))
    # print("skill = {}".format(skill_score(ts0, ts1)))

    dr = pd.date_range(start=pd.Timestamp(2009, 2, 10), freq="15min", periods=4)
    x = pd.Series([2.0, 4.0, 6.0, 8.0], index=dr)
    y = x * 1.5

    print(mse(x, y))
    print(rmse(x, y))
    print(skill_score(x, y))
    print(median_error(x, y))

    z0 = np.array([1.0, np.nan, 7.0, 5.0, 7.0, 10.0, 12.0, 14.0])
    z1 = np.array([2.0, 3.0, 5.0, 6.0, 8.0, 11.0, 13.0, 36.0])
    dr2 = pd.date_range(pd.Timestamp(2009, 2, 10), freq="15min", periods=8)

    zts0 = pd.Series(z0, index=dr2)
    zts1 = pd.Series(z1, index=dr2)
    print("r {}".format(corr_coefficient(zts0, zts1)))



all = [mse, rmse, median_error, tmean_error, corr_coefficient, skill_score]

if __name__ == "__main__":
    _main()
Navigation

Related Topics

Source code for vtools.functions.skill_metrics