Source code for bdschism.barrier_status

#!/usr/bin/env python3
"""
make_south_delta_barrier_status.py

Reads four barrier installation files (space-separated, #-commented) with at least:
  - datetime
  - install

And for Grant Line, an additional weir elevation column (accepts either 'elev_weir' or 'weir_elev').

Computes the per-barrier installation status at midnight (last known value strictly
BEFORE each midnight) and writes a single CSV with per-barrier status plus an ag_count
column.

Special rule for Grant Line:
  If the weir-elevation column exists and is < 0 at a timestamp, treat the instantaneous
  'install' value as 0.5 (partial). This is applied before computing the midnight state.
  (So daily 'grantline' may be 0, 0.5, or 1.0.)

ag_count:
  Sum of the three ag barriers (oldr_tracy, midr, grantline) using their numeric
  daily values (so totals like 2.5 are possible). oldr_head is excluded from ag_count.

Output:
  south_delta_barrier_install_and_count_daily.csv
  Columns: date, oldr_tracy, midr, grantline, oldr_head, ag_count
"""

from pathlib import Path
import pandas as pd

# Input file mapping (edit paths as needed)
PATHS = {
    "oldr_tracy": "oldr_tracy_barrier.th",
    "midr": "midr_weir.th",
    "grantline": "grantline_barrier.th",
    "oldr_head": "oldr_head_barrier.th",
}

def _grantline_adjust(df: pd.DataFrame) -> pd.DataFrame:
    """Grant Line rule:
       - install == 0 → stays 0
       - install == 1 & weir_elev < 0 → 0.5
       - install == 1 & weir_elev > 0 → 1
    """
    df["install"] = pd.to_numeric(df["install"])
    elev = pd.to_numeric(df["elev_weir"])


    df.loc[(df["install"] == 1) & (elev < 0.), "install"] = 0.5
    df.loc[(df["install"] == 1) & (elev > 0.), "install"] = 1.0
    return df

[docs] def read_install_series(path: str, name: str) -> pd.Series: """Read a barrier file with required headers 'datetime' and 'install'. For 'grantline', apply the weir-elevation adjustment (0.5 when elev < 0) if present. Returns a time-indexed Series of numeric install values. """ df = pd.read_csv( path, sep=r"\s+", comment="#", engine="python" ) if "datetime" not in df.columns or "install" not in df.columns: raise ValueError(f"{path} must contain 'datetime' and 'install' columns.") df["install"] = df["install"].astype(float) if name == "grantline": df = _grantline_adjust(df) s = pd.Series(df["install"].values, index=pd.to_datetime(df["datetime"], errors="coerce")) s = s.dropna().sort_index() s = s[~s.index.duplicated(keep="last")] return s
[docs] def status_at_midnight(s: pd.Series, days: pd.DatetimeIndex) -> pd.Series: """Status at 00:00 of each day = last known value strictly BEFORE that instant.""" query_times = days - pd.Timedelta(nanoseconds=1) vals = s.asof(query_times) out = pd.Series(vals.values, index=days) return out.fillna(0.0)
[docs] def main(): series_map = {name: read_install_series(path, name) for name, path in PATHS.items()} min_dt = min(s.index.min() for s in series_map.values()) max_dt = max(s.index.max() for s in series_map.values()) days = pd.date_range(min_dt.floor("D"), (max_dt + pd.Timedelta(days=1)).floor("D"), freq="D") daily = pd.DataFrame(index=days) for name, s in series_map.items(): daily[name] = status_at_midnight(s, days) daily["ag_count"] = daily[["oldr_tracy", "midr", "grantline"]].sum(axis=1) daily.index = daily.index.strftime("%Y-%m-%d") daily.index.name = "date" out_path = Path("south_delta_barrier_install_and_count_daily.csv") daily.to_csv(out_path) print(f"Wrote {out_path.resolve()}")
if __name__ == "__main__": main()