Source code for vtools.functions.colname_align

"""
Column naming alignment utilities for time series composition functions.

This module provides decorators that standardize how functions like
``ts_merge``, ``ts_splice``, and ``transition_ts`` handle their ``names``
argument and enforce column consistency across multiple time series inputs.

Main features
-------------
- **Column consistency enforcement**:
  Ensures that when ``names=None`` (default), all input DataFrames share
  identical columns. This prevents accidental creation of staggered or
  mismatched columns.

- **Centralized naming behavior**:
  Applies uniform handling of ``names`` values:

  * ``None`` — require identical columns across all inputs and keep them.
  * ``str`` — require univariate inputs (single column each); output is
    a single-column DataFrame (or Series if all inputs were Series) with
    this name.
  * ``Iterable[str]`` — treated as a column selector: these columns are
    selected (and ordered) from the final output and must exist in every
    input.

- **Support for both list-style and pairwise APIs**:
  Works for functions that accept a sequence of time series (like
  ``ts_merge``/``ts_splice``) or two explicit series arguments
  (like ``transition_ts``).

Usage pattern
-------------
Decorate your functions as follows::

    @columns_aligned(mode="same_set")
    @names_aligned(seq_arg=0, pre_rename=True)
    def ts_splice(series, names=None, ...):
        ...

    @columns_aligned(mode="same_set")
    @names_aligned_pair(ts0_kw="ts0", ts1_kw="ts1")
    def transition_ts(ts0, ts1, names=None, ...):
        ...

This ensures consistent semantics for all multi-series combination tools.
"""

# colname_align.py
import pandas as pd
from functools import wraps
from inspect import signature

# module level error so test can access it
ERR_MULTI_NAMES_SERIES = "Cannot assign multiple names to a Series; pass a single name."



[docs]
def align_names(result, names):
    if not names:
        return result

    # Series case
    if isinstance(result, pd.Series):
        if isinstance(names, str):
            result = result.copy()
            result.name = names
            return result
        elif hasattr(names, "__iter__"):
            lst = list(names)
            if len(lst) != 1:
                raise ValueError(ERR_MULTI_NAMES_SERIES)
            result = result.copy()
            result.name = lst[0]
            return result
        return result  # any other type: no-op

    # DataFrame case
    if isinstance(names, str):
        return result.rename(columns={result.columns[0]: names})
    elif hasattr(names, "__iter__"):
        return result[list(names)]
    return result




[docs]
def _coerce_inputs_strict(seq, names):
    """
    Strict input alignment policy:
    - names is None  -> all inputs must have identical column lists (no unions/intersections).
    - names is str   -> leave inputs as-is; final renaming happens via align_names(...).
    - names is list  -> for each DF, select exactly those columns; for a Series, only len==1 allowed.
    """
    out = []

    if names is None:
        # Promote Series->DataFrame for apples-to-apples checks
        tmp = [s.to_frame(name=s.name) if isinstance(s, pd.Series) else s for s in seq]

        # 1) Same number of columns across all inputs
        ncols0 = tmp[0].shape[1] if isinstance(tmp[0], pd.DataFrame) else 1
        for t in tmp[1:]:
            ncols = t.shape[1] if isinstance(t, pd.DataFrame) else 1
            if ncols != ncols0:
                raise ValueError(
                    "All inputs must have the same number of columns when `names` is None."
                )

        # 2) Exact column-name equality (order matters) if DataFrames
        if isinstance(tmp[0], pd.DataFrame):
            cols0 = list(tmp[0].columns)
            for t in tmp[1:]:
                if not isinstance(t, pd.DataFrame) or list(t.columns) != cols0:
                    raise ValueError(
                        "All input columns must be identical when `names` is None"
                    )

        return seq  # keep original types; they already match strictly

    # colname_align.py  (_coerce_inputs_strict)
    elif isinstance(names, str):
        # If ALL inputs are univariate, pre-rename their single column to `names`
        def is_uni(x):
            return (isinstance(x, pd.Series)) or (
                isinstance(x, pd.DataFrame) and x.shape[1] == 1
            )

        if all(is_uni(s) for s in seq):
            out = []
            for s in seq:
                if isinstance(s, pd.Series):
                    out.append(s.rename(names))
                else:  # 1-col DataFrame
                    only = s.columns[0]
                    out.append(s.rename(columns={only: names}))
            return out
        # Otherwise leave inputs as-is; final rename happens on the output
        return seq

    else:
        # Iterable of names: enforce and select exactly these columns
        req = list(names)
        if not req:
            raise ValueError("`names` selection is empty.")
        for s in seq:
            if isinstance(s, pd.DataFrame):
                missing = set(req) - set(s.columns)
                if missing:
                    raise ValueError(
                        f"DataFrame missing requested columns: {sorted(missing)}"
                    )
                out.append(s[req])
            else:  # Series
                if len(req) != 1:
                    raise ValueError(ERR_MULTI_NAMES_SERIES)  # <-- updated
                out.append(s.rename(req[0]))
        return out




[docs]
def align_inputs_strict(seq_arg=0, names_kw="names"):
    def deco(fn):
        @wraps(fn)
        def wrapper(*args, **kwargs):
            sig = signature(fn)
            param_name = list(sig.parameters)[seq_arg]  # e.g., "series"
            names = kwargs.get(names_kw, None)

            # read sequence regardless of positional/keyword
            seq = kwargs[param_name] if param_name in kwargs else args[seq_arg]

            # >>> Early passthrough on bad/empty input so the wrapped fn raises its own error <<<
            if not isinstance(seq, (list, tuple)) or len(seq) == 0:
                # ts_merge/ts_splice keep their original messages:
                # - ts_merge: "`series` must be a non-empty tuple or list"  (test expects this)
                # - ts_splice: "`series` must be a non-empty tuple or list of pandas.Series or pandas.DataFrame."
                return fn(*args, **kwargs)

            # strict coercion only for non-empty sequences
            seq2 = _coerce_inputs_strict(seq, names)

            # write back
            if param_name in kwargs:
                kwargs = dict(kwargs)
                kwargs[param_name] = seq2
            else:
                args = list(args)
                args[seq_arg] = seq2
                args = tuple(args)

            out = fn(*args, **kwargs)
            return align_names(out, names)

        return wrapper

    return deco




[docs]
def align_inputs_pair_strict(ts0_kw="ts0", ts1_kw="ts1", names_kw="names"):
    def deco(fn):
        @wraps(fn)
        def wrapper(*args, **kwargs):
            names = kwargs.get(names_kw, None)

            # accept positional or keyword
            from inspect import signature

            sig = signature(fn)
            param_names = list(sig.parameters)
            # defaults: try kwargs first, else positional fallback
            ts0 = kwargs.get(
                ts0_kw,
                args[param_names.index(ts0_kw)] if ts0_kw in param_names else args[0],
            )
            ts1 = kwargs.get(
                ts1_kw,
                args[param_names.index(ts1_kw)] if ts1_kw in param_names else args[1],
            )

            ts0_new, ts1_new = _coerce_inputs_strict([ts0, ts1], names)

            # write back
            if ts0_kw in kwargs:
                kwargs = dict(kwargs)
                kwargs[ts0_kw] = ts0_new
            else:
                args = list(args)
                idx0 = param_names.index(ts0_kw) if ts0_kw in param_names else 0
                args[idx0] = ts0_new
                args = tuple(args)

            if ts1_kw in kwargs:
                kwargs = dict(kwargs)
                kwargs[ts1_kw] = ts1_new
            else:
                args = list(args)
                idx1 = param_names.index(ts1_kw) if ts1_kw in param_names else 1
                args[idx1] = ts1_new
                args = tuple(args)

            out = fn(*args, **kwargs)
            return align_names(out, names)

        return wrapper

    return deco
Navigation

Related Topics

Source code for vtools.functions.colname_align