Source code for synoptic.namespace

from typing import Literal

import polars as pl
import polars.selectors as cs


[docs] @pl.api.register_dataframe_namespace("synoptic") class SynopticFrame: """Custom Polars namespace for SynopticPy DataFrames."""
[docs] def __init__(self, df: pl.DataFrame) -> None: self._df = df
[docs] def with_local_timezone(self) -> pl.DataFrame | dict: """Convert datetime columns from UTC to local timezone. Returns ------- DataFrame if only one unique timezone is present, else returns a dict of DataFrames, one item for each timezone. """ df = self._df if len(df["timezone"].unique()) > 1: a = {} for i in df.partition_by("timezone"): tz = i["timezone"].unique().item() a[tz] = i.with_columns(cs.datetime().dt.convert_time_zone(tz)) return a else: tz = df["timezone"].unique().item() return df.with_columns(cs.datetime().dt.convert_time_zone(tz))
[docs] def pivot(self, *, sensor_index: int = 1, **kwargs) -> pl.DataFrame: """Pivot a long-form SynopticPy DataFrame to wide-form. This likely gets what you're looking for, but it is recommended to use Polars directly to accomplish more customized pivots. - Long-form DataFrame has a row for each observation. - Wide-form DataFrame has a station variables are in their own column for each station. Parameters ---------- sensor_index : int Sensor index to filter prior to the pivot. **kwargs : Keyword arguments for Polars `pivot`. """ df = self._df kwargs.setdefault("on", "variable") kwargs.setdefault( "index", ["date_time", "stid", "latitude", "longitude", "elevation"] ) kwargs.setdefault("values", "value") kwargs.setdefault("aggregate_function", "mean") df = df.filter(sensor_index=sensor_index).pivot(**kwargs) return df
[docs] def with_wind_uv(self) -> pl.DataFrame: """Provide ``wind_u`` and ``wind_v`` columns from wind speed and direction. **IMPORTANT** Requires a wide-form DataFrame (pivoted) with columns `wind_speed` and `wind_direction`. """ df = self._df if ("wind_speed" not in df.columns) | ("wind_direction" not in df.columns): raise ValueError( "Must pivot the DataFrame with columns 'wind_speed' and 'wind_direction'." ) df = df.with_columns( wind_u=-pl.col("wind_speed") * pl.col("wind_direction").radians().sin(), wind_v=-pl.col("wind_speed") * pl.col("wind_direction").radians().cos(), ) return df
[docs] def with_network_name( self, which: Literal["short", "long"] = "short" ) -> pl.DataFrame: """Provide DataFrame with ``network_name`` column. Parameters ---------- which : {'short', 'long'} Specify if the network shortname or longname is joined. """ from synoptic.services import Networks df = self._df if "mnet_id" not in df.columns: raise ValueError("Column 'mnet_id' is not in the DataFrame.") return df.join( Networks(id=df["mnet_id"].unique().to_list(), verbose=False) .df() .select("mnet_id", f"{which}name") .rename({f"{which}name": "network_name"}), on="mnet_id", )