Source code for bdschism.barrier_status

#!/usr/bin/env python3
"""
make_south_delta_barrier_status.py

Reads four barrier installation files (space-separated, #-commented) with at least:
  - datetime
  - install

And for Grant Line, an additional weir elevation column (accepts either 'elev_weir' or 'weir_elev').

Computes the per-barrier installation status at midnight (last known value strictly
BEFORE each midnight) and writes a single CSV with per-barrier status plus an ag_count
column.

Special rule for Grant Line:
  If the weir-elevation column exists and is < 0 at a timestamp, treat the instantaneous
  'install' value as 0.5 (partial). This is applied before computing the midnight state.
  (So daily 'grantline' may be 0, 0.5, or 1.0.)

ag_count:
  Sum of the three ag barriers (oldr_tracy, midr, grantline) using their numeric
  daily values (so totals like 2.5 are possible). oldr_head is excluded from ag_count.

Output:
  south_delta_barrier_install_and_count_daily.csv
  Columns: date, oldr_tracy, midr, grantline, oldr_head, ag_count
"""

from pathlib import Path
import pandas as pd

# Input file mapping (edit paths as needed)
PATHS = {
    "oldr_tracy": "oldr_tracy_barrier.th",
    "midr": "midr_weir.th",
    "grantline": "grantline_barrier.th",
    "oldr_head": "oldr_head_barrier.th",
}

def _grantline_adjust(df: pd.DataFrame) -> pd.DataFrame:
    """Grant Line rule:
       - install == 0 → stays 0
       - install == 1 & weir_elev < 0 → 0.5
       - install == 1 & weir_elev > 0 → 1
    """
    df["install"] = pd.to_numeric(df["install"])
    elev = pd.to_numeric(df["elev_weir"])


    df.loc[(df["install"] == 1) & (elev < 0.), "install"] = 0.5
    df.loc[(df["install"] == 1) & (elev > 0.), "install"] = 1.0
    return df


[docs]
def read_install_series(path: str, name: str) -> pd.Series:
    """Read a barrier file with required headers 'datetime' and 'install'.
    For 'grantline', apply the weir-elevation adjustment (0.5 when elev < 0) if present.
    Returns a time-indexed Series of numeric install values.
    """
    df = pd.read_csv(
        path,
        sep=r"\s+",
        comment="#",
        engine="python"
    )
    if "datetime" not in df.columns or "install" not in df.columns:
        raise ValueError(f"{path} must contain 'datetime' and 'install' columns.")
    df["install"] = df["install"].astype(float)
    if name == "grantline":
        df = _grantline_adjust(df)
    s = pd.Series(df["install"].values, index=pd.to_datetime(df["datetime"], errors="coerce"))
    s = s.dropna().sort_index()
    s = s[~s.index.duplicated(keep="last")]
    return s



[docs]
def status_at_midnight(s: pd.Series, days: pd.DatetimeIndex) -> pd.Series:
    """Status at 00:00 of each day = last known value strictly BEFORE that instant."""
    query_times = days - pd.Timedelta(nanoseconds=1)
    vals = s.asof(query_times)
    out = pd.Series(vals.values, index=days)
    return out.fillna(0.0)



[docs]
def main():
    series_map = {name: read_install_series(path, name) for name, path in PATHS.items()}
    min_dt = min(s.index.min() for s in series_map.values())
    max_dt = max(s.index.max() for s in series_map.values())
    days = pd.date_range(min_dt.floor("D"), (max_dt + pd.Timedelta(days=1)).floor("D"), freq="D")
    daily = pd.DataFrame(index=days)
    for name, s in series_map.items():
        daily[name] = status_at_midnight(s, days)
    daily["ag_count"] = daily[["oldr_tracy", "midr", "grantline"]].sum(axis=1)
    daily.index = daily.index.strftime("%Y-%m-%d")
    daily.index.name = "date"
    out_path = Path("south_delta_barrier_install_and_count_daily.csv")
    daily.to_csv(out_path)
    print(f"Wrote {out_path.resolve()}")


if __name__ == "__main__":
    main()