Source code for vtools.data.vis_gap

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from datetime import timedelta


[docs] def generate_sample_data(): freq = "15T" start = pd.Timestamp("2010-01-01") end = pd.Timestamp("2012-01-01") index = pd.date_range(start, end, freq=freq) n = len(index) cols = [f"Series{i+1}" for i in range(12)] data = np.random.randn(n, 12) df = pd.DataFrame(data, index=index, columns=cols) rng = np.random.default_rng(seed=0) for col in df.columns: for _ in range(20): start_idx = rng.integers(0, n - 1) gap_length = rng.integers(1, 201) end_idx = min(start_idx + gap_length, n) df.loc[df.index[start_idx:end_idx], col] = np.nan gap_end_candidates = rng.choice(df.columns, size=6, replace=False) intervals_per_day = int((24 * 60) / 15) for col in gap_end_candidates: gap_length = rng.integers(1, 2 * intervals_per_day + 1) df.loc[df.index[-gap_length:], col] = np.nan df.loc[df.index < (start + pd.DateOffset(years=1)), df.columns[0]] = np.nan return df
[docs] def plot_missing_data(df, ax, min_gap_duration, overall_start, overall_end): overall_start_num = mdates.date2num(overall_start) overall_end_num = mdates.date2num(overall_end) overall_color = "skyblue" gap_color = "orange" boundary_gap_color = "darkorange" bar_height = 0.8 ax.cla() y_ticks = [] y_labels = [] for i, col in enumerate(df.columns): # Full-range background bar ax.broken_barh( [(overall_start_num, overall_end_num - overall_start_num)], (i - bar_height / 2, bar_height), facecolors=overall_color, alpha=0.6, ) series = df[col] mask = series.isna() if mask.any(): groups = (mask != mask.shift()).cumsum() for _, group in mask.groupby(groups): if group.iloc[0]: # missing segment gs = group.index[0] ge = group.index[-1] + pd.Timedelta(minutes=15) actual = ge - gs if actual < min_gap_duration: extra = min_gap_duration - actual gs = max(gs - extra / 2, overall_start) ge = min(ge + extra / 2, overall_end) color = ( boundary_gap_color if (gs <= overall_start or ge >= overall_end) else gap_color ) ax.broken_barh( [ ( mdates.date2num(gs), mdates.date2num(ge) - mdates.date2num(gs), ) ], (i - bar_height / 2, bar_height), facecolors=color, ) y_ticks.append(i) # Missing percentage label perc = series.isna().mean() * 100 if perc == 0: lbl = f"{col} (0%)" elif perc < 0.01: lbl = f"{col} (<0.01%)" else: lbl = f"{col} ({perc:.2f}%)" y_labels.append(lbl) ax.set_yticks(y_ticks) ax.set_yticklabels(y_labels) ax.set_xlabel("Time") ax.set_title(f"Missing Data (Min gap = {min_gap_duration})") ax.xaxis_date() ax.figure.autofmt_xdate() # Fix y-limits so zooming on x-axis only ax.set_ylim(-0.5, len(df.columns) - 0.5)
[docs] def interactive_gap_plot(df): overall_start = df.index[0] overall_end = df.index[-1] + pd.Timedelta(minutes=15) fig, ax = plt.subplots(figsize=(12, 6)) prev_xlim = [None, None] # Initial draw default_gap = timedelta(days=4) plot_missing_data(df, ax, default_gap, overall_start, overall_end) def on_draw(event): # Preserve user zoom x-limits x0, x1 = ax.get_xlim() if prev_xlim[0] == x0 and prev_xlim[1] == x1: return prev_xlim[0], prev_xlim[1] = x0, x1 # Determine new min-gap based on visible span d0 = mdates.num2date(x0) d1 = mdates.num2date(x1) years_view = (d1 - d0).total_seconds() / (365.25 * 24 * 3600) if years_view >= 12: mg = timedelta(days=12) elif years_view >= 8: mg = timedelta(days=4) elif years_view >= 4: mg = timedelta(hours=18) elif years_view >= 1: mg = timedelta(hours=4) else: mg = timedelta(hours=1) # Redraw bars then restore zoom plot_missing_data(df, ax, mg, overall_start, overall_end) ax.set_xlim(x0, x1) # Use draw_event for reliable detection after toolbar zoom/pan fig.canvas.mpl_connect("draw_event", on_draw) plt.tight_layout() plt.show()
if __name__ == "__main__": df = generate_sample_data() interactive_plot(df)