Source code for bdschism.hotstart_nudging_data

#!/usr/bin/env python
# -*- coding: utf-8 -*-import pandas as pd

# Use example as command-line interface function:
# python hotstart_nudging_data.py --start_date 2018-02-19  --nudge_len 300 --dest_dir . --repo_dir $repo_path
# or with bdschism in your environment::
# hot_nudge_data --start_date 2018-02-19  --nudge_len 300 --dest_dir . --repo_dir $repo_path

import matplotlib.pyplot as plt
from dms_datastore.read_ts import *
from dms_datastore.dstore_config import *
from dms_datastore.read_multi import *
from vtools.functions.unit_conversions import *
from vtools.data.vtime import days
import glob
import pandas as pd
import os
import logging
import click

logger = logger = logging.getLogger(__name__)

logging.basicConfig(filename="hotstart_nudge_data.log", level=logging.INFO)

stations = [
    "anh",
    "benbr",
    "hsl",
    "bts",
    "snc",
    "ibs",
    "cyg",
    "hun",
    "bdl",
    "fmb",
    "msl",
    "cse",
    "gzl",
    "ryc",
    "hon",
    "c24",
    "pct",
    "flt",
    "mrz",
    "pct",
    "mal",
    "pts",
    "carqb",
    "benbr",
    "co5",
    "ssi",
    "emm",
    "sdi",
    "blp",
    "jer",
    "sjj",
    "dsj",
    "frp",
    "fal",
    "bet",
    "hol2",
    "hll",
    "orq2",
    "frk",
    "holm",
    "bac",
    "mdm",
    "dbi",
    "ori",
    "oh4",
    "mab",
    "pri",
    "ppt",
    "trn",
    "rindg",
    "sjc",
    "rri",
    "sjg",
    "bdt",
    "dvi",
    "sjr",
    "orx",
    "pdup",
    "tpp",
    "uni",
    "sga",
    "gle",
    "old",
    "twa",
    "orm",
    "oad",
    "trp",
    "glc2",
    "wci",
    "vcu",
    "mab",
    "mtb",
    "rri2",
    "mdmzq",
    "sdc",
    "ges",
    "swe",
    "gss",
    "nmr",
    "sus",
    "sss",
    "sut",
    "snod",
    "gln",
    "rye",
    "ryf",
    "rvb",
    "mir",
    "dws",
    "lib",
    "ucs",
    "has",
    "srh",
    "awb",
    "afo",
    "hst",
    "ist",
    "ssw",
    "von",
    "few",
    "fre",
    "wlk",
    "gys",
    "god",
    "sal",
]

# Stations where an "upper" and "lower" sublocation occur and we must distinguish the upper
add_upper = ["anh", "cse", "mrz", "emm", "mal", "pts"]
repo = "//cnrastore-bdo/Modeling_Data/jenkins_repo_staging/continuous/formatted"


[docs] def hotstart_nudge_data(sdate, ndays, dest, repo_dir): t0 = sdate nudgelen = pd.Timedelta(days=ndays) repo = repo_dir ## station_df = station_dbase() buf = days(5) sdata = t0 - buf edata = t0 + nudgelen + buf station_df = station_df.loc[stations] no_such_file = [] tndx = pd.date_range(t0, t0 + nudgelen, freq="h") all_vars = ["temperature", "salinity"] used_stations = set() nudging_dfs = {} accepted_loc = [] for label_var in all_vars: var = {"temperature": "temp", "salinity": "ec"}[ label_var ] # working variable for data print(f"Working on variable: {label_var},{var}") logger.info(f"Working on variable: {label_var},{var}") vals = [] accepted = {} for ndx, row in station_df.iterrows(): x = row.x y = row.y fndx = ndx + "@upper" if ndx in add_upper else ndx pat = f"*_{fndx}_*_{var}*_20??.csv" pat = os.path.join(repo, pat) matches = glob.glob(pat) if len(matches) == 0: no_such_file.append((ndx, var)) continue try: subloc = "upper" if ndx in add_upper else None ts = read_ts_repo( ndx, var, subloc=subloc, repo=repo, src_priority="infer" ) ts = ts.loc[sdata:edata] ts = ts.interpolate(limit=4) if ts.shape[1] > 1: ts = ts.mean(axis=1) ts.name = "value" else: ts = ts.squeeze() if var == "temp": topquant = ts.quantile(q=0.25) if topquant > 35: print("Transforming F to C based on 25% qyantuke > 35deg") print("Transforming F to C based on 25% qyantuke > 35deg") ts = fahrenheit_to_celsius(ts) if ndx in ["clc"] and (ts < 0.0).all(): ts = celsius_to_farenheit(ts) elif var == "ec": ts = ec_psu_25c(ts) else: raise ValueError( f"Haven't worked out transforms needed except for {var}, only salt/temp" ) val = ts.at[t0] if not np.isnan(val): vals.append((ndx, x, y, val)) # This is the fraction of missing data ts = ts.reindex(tndx) gap_frac = ts.isnull().sum() / len(ts) print(f"Fraction of mssing data for {ndx} {var} is {gap_frac}") if gap_frac < 0.25: print(f"Accepted {ndx} {var}") ts.columns = [ndx] ts = ts.fillna(-9999.0) accepted[ndx] = ts if ndx not in used_stations: accepted_loc.append((ndx, x, y)) used_stations.add(ndx) except Exception as err: print("Exception") print(str(err)) print(ndx, var) print(err) var_df = pd.DataFrame(data=vals, columns=("station", "x", "y", f"{label_var}")) var_df.set_index("station") var_df.to_csv( os.path.join(dest, f"hotstart_data_{label_var}.csv"), sep=",", float_format="%.2f", ) # Check if accepted is empty. If so, throw an exception if accepted == {}: raise ValueError(f"No data found for {label_var} in {repo}") else: nudging_df = pd.concat(accepted, axis=1) nudging_df.index.name = "datetime" nudging_dfs[label_var] = nudging_df print(nudging_df) logger.info(nudging_df) obs_xy = pd.DataFrame(data=accepted_loc, columns=["site", "x", "y"]) print("reindexing and printing") logger.info("reindexing and printing") for label_var in all_vars: nudging_dfs[label_var].to_csv( os.path.join(dest, f"nudging_data_{label_var}.csv"), sep=",", float_format="%.2f", ) # Deprecated # nudging_dfs[label_var].reindex(columns=obs_xy.site).to_csv(f"nudging_data_{label_var}_b.csv",sep=",",float_format="%.2f") obs_xy = obs_xy.set_index("site", drop=True) obs_xy.to_csv(os.path.join(dest, f"obs_xy.csv"), sep=",", float_format="%.2f") print("No such file") logger.info("No such files") for item in no_such_file: print(item) logger.info(item)
@click.command( help="""\n Download station data from repo and save in csv format for hotstart nudging.\n Usage:\n bds hot_nudge_data --start_date 2018-02-19 --nudge_len 300 --dest_dir . --repo_dir $repo_path\n """ ) @click.option( "--start_date", required=True, help="starting date of SCHISM model, must be \ format like 2018-02-19", ) @click.option( "--nudge_len", required=True, type=int, help="number of days to be downloaded \ from starting date", ) @click.option( "--dest_dir", default=".", help="folder to store downloaded obs nudging data. \ Default is current folder", ) @click.option( "--repo_dir", default=repo, help=f"path to the repo of observed time series. \ Default is {repo}", ) @click.help_option("-h", "--help") def hotstart_nudge_data_cli(start_date, nudge_len, dest_dir, repo_dir): """ Command-line interface for the hotstart_nudge_data function. """ try: sdate = pd.to_datetime(start_date) except ValueError: raise ValueError("Invalid date format. Use YYYY-MM-DD.") hotstart_nudge_data(sdate, nudge_len, dest_dir, repo_dir) if __name__ == "__main__": hotstart_nudge_data_cli()