{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 💽 Archive Long Dataset\n",
"\n",
"SynopticPy doesn't let you request more than a month of data. But what if we want all of WBB's data for a year? \n",
"\n",
"1. Make multiple API requests.\n",
"1. Write each month of data into it's own Parquet file.\n",
"1. Read from Parquet files for fast loading!"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from datetime import datetime, timedelta\n",
"from synoptic import TimeSeries\n",
"\n",
"import polars as pl"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"🚚💨 Speedy delivery from Synoptic timeseries service.\n"
]
},
{
"ename": "SynopticAPIError",
"evalue": "\n🛑 FATAL: Not a valid Synoptic API request.\n ├─ message: RECENT must be less than or equal to a month (43200 minutes).\n └─ url: https://api.synopticdata.com/v2/stations/timeseries?stid=wbb&recent=525600&token=0bbe0e9fda7945a68951cc1bdebb2b0d\nSee https://docs.synopticdata.com/services/weather-data-api for help.",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mSynopticAPIError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[2], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# Can't request more than a month of data at a time...\u001b[39;00m\n\u001b[0;32m----> 3\u001b[0m df \u001b[38;5;241m=\u001b[39m \u001b[43mTimeSeries\u001b[49m\u001b[43m(\u001b[49m\u001b[43mstid\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mwbb\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrecent\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtimedelta\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdays\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m365\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241m.\u001b[39mdf()\n",
"File \u001b[0;32m~/GITHUB/SynopticPy/synoptic/services.py:399\u001b[0m, in \u001b[0;36mTimeSeries.__init__\u001b[0;34m(self, **params)\u001b[0m\n\u001b[1;32m 398\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__init__\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mparams):\n\u001b[0;32m--> 399\u001b[0m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[38;5;21;43m__init__\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtimeseries\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mparams\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m~/GITHUB/SynopticPy/synoptic/services.py:347\u001b[0m, in \u001b[0;36mSynopticAPI.__init__\u001b[0;34m(self, service, token, verbose, **params)\u001b[0m\n\u001b[1;32m 343\u001b[0m \u001b[38;5;66;03m# -------------------\u001b[39;00m\n\u001b[1;32m 344\u001b[0m \u001b[38;5;66;03m# Check returned data\u001b[39;00m\n\u001b[1;32m 345\u001b[0m \u001b[38;5;66;03m# Note: SUMMARY is always returned in the JSON.\u001b[39;00m\n\u001b[1;32m 346\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mSUMMARY[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mRESPONSE_CODE\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m!=\u001b[39m \u001b[38;5;241m1\u001b[39m:\n\u001b[0;32m--> 347\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m SynopticAPIError(\n\u001b[1;32m 348\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 349\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m🛑 FATAL: Not a valid Synoptic API request.\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 350\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m ├─ message: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mSUMMARY[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mRESPONSE_MESSAGE\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 351\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m └─ url: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mresponse\u001b[38;5;241m.\u001b[39murl\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 352\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mSee \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhelp_url\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m for help.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 353\u001b[0m )\n\u001b[1;32m 355\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mverbose:\n\u001b[1;32m 356\u001b[0m \u001b[38;5;28mprint\u001b[39m(\n\u001b[1;32m 357\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m📦 Received data from \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mSUMMARY\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mNUMBER_OF_OBJECTS\u001b[39m\u001b[38;5;124m'\u001b[39m)\u001b[38;5;132;01m:\u001b[39;00m\u001b[38;5;124m,\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m stations.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 358\u001b[0m )\n",
"\u001b[0;31mSynopticAPIError\u001b[0m: \n🛑 FATAL: Not a valid Synoptic API request.\n ├─ message: RECENT must be less than or equal to a month (43200 minutes).\n └─ url: https://api.synopticdata.com/v2/stations/timeseries?stid=wbb&recent=525600&token=0bbe0e9fda7945a68951cc1bdebb2b0d\nSee https://docs.synopticdata.com/services/weather-data-api for help."
]
}
],
"source": [
"# Can't request more than a month of data at a time...\n",
"\n",
"df = TimeSeries(stid=\"wbb\", recent=timedelta(days=365)).df()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"
shape: (13,)| literal |
|---|
| datetime[μs] |
| 2023-01-01 00:00:00 |
| 2023-02-01 00:00:00 |
| 2023-03-01 00:00:00 |
| 2023-04-01 00:00:00 |
| 2023-05-01 00:00:00 |
| … |
| 2023-09-01 00:00:00 |
| 2023-10-01 00:00:00 |
| 2023-11-01 00:00:00 |
| 2023-12-01 00:00:00 |
| 2024-01-01 00:00:00 |
"
],
"text/plain": [
"shape: (13,)\n",
"Series: 'literal' [datetime[μs]]\n",
"[\n",
"\t2023-01-01 00:00:00\n",
"\t2023-02-01 00:00:00\n",
"\t2023-03-01 00:00:00\n",
"\t2023-04-01 00:00:00\n",
"\t2023-05-01 00:00:00\n",
"\t…\n",
"\t2023-09-01 00:00:00\n",
"\t2023-10-01 00:00:00\n",
"\t2023-11-01 00:00:00\n",
"\t2023-12-01 00:00:00\n",
"\t2024-01-01 00:00:00\n",
"]"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Create month intervals for a year datetime range\n",
"\n",
"dates = pl.datetime_range(\n",
" datetime(2023, 1, 1),\n",
" datetime(2024, 1, 1),\n",
" interval=\"1mo\",\n",
" eager=True,\n",
")\n",
"dates"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 7.7 s, sys: 909 ms, total: 8.61 s 12, 1, 0, 0), end=datetime.datetime(2024, 1, 1, 0, 0))\n",
"Wall time: 40.7 s\n"
]
}
],
"source": [
"%%time\n",
"\n",
"for i, (start, end) in enumerate(zip(dates, dates[1:]), start=1):\n",
" print(f\"Working on loop {i}: {start=}, {end=}\", end=\"\\r\")\n",
" df = TimeSeries(\n",
" stid=\"wbb\",\n",
" vars=\"air_temp,relative_humidity,wind_speed,wind_direction\",\n",
" start=start,\n",
" end=end - timedelta(microseconds=1),\n",
" verbose=False,\n",
" ).df()\n",
" df.write_parquet(\n",
" f\"sample_long_timeseries_{start:%Y%m%d%H%M%S}_{end:%Y%m%d%H%M%S}.parquet\"\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 799 ms, sys: 881 ms, total: 1.68 s\n",
"Wall time: 564 ms\n"
]
},
{
"data": {
"text/html": [
"\n",
"
shape: (2_100_848, 20)| date_time | variable | sensor_index | is_derived | value | units | id | stid | name | elevation | latitude | longitude | mnet_id | state | timezone | elev_dem | period_of_record_start | period_of_record_end | is_restricted | is_active |
|---|
| datetime[μs, UTC] | str | u32 | bool | f64 | str | u32 | str | str | f64 | f64 | f64 | u32 | str | str | f64 | datetime[μs, UTC] | datetime[μs, UTC] | bool | bool |
| 2023-01-01 00:00:00 UTC | "wind_direction" | 1 | false | 56.97 | "Degrees" | 1 | "WBB" | "U of U William Browning Buildi… | 4806.0 | 40.76623 | -111.84755 | 153 | "UT" | "America/Denver" | 4727.7 | 1997-01-01 00:00:00 UTC | 2024-10-17 05:50:00 UTC | false | true |
| 2023-01-01 00:01:00 UTC | "wind_direction" | 1 | false | 58.74 | "Degrees" | 1 | "WBB" | "U of U William Browning Buildi… | 4806.0 | 40.76623 | -111.84755 | 153 | "UT" | "America/Denver" | 4727.7 | 1997-01-01 00:00:00 UTC | 2024-10-17 05:50:00 UTC | false | true |
| 2023-01-01 00:02:00 UTC | "wind_direction" | 1 | false | 60.34 | "Degrees" | 1 | "WBB" | "U of U William Browning Buildi… | 4806.0 | 40.76623 | -111.84755 | 153 | "UT" | "America/Denver" | 4727.7 | 1997-01-01 00:00:00 UTC | 2024-10-17 05:50:00 UTC | false | true |
| 2023-01-01 00:03:00 UTC | "wind_direction" | 1 | false | 57.05 | "Degrees" | 1 | "WBB" | "U of U William Browning Buildi… | 4806.0 | 40.76623 | -111.84755 | 153 | "UT" | "America/Denver" | 4727.7 | 1997-01-01 00:00:00 UTC | 2024-10-17 05:50:00 UTC | false | true |
| 2023-01-01 00:04:00 UTC | "wind_direction" | 1 | false | 61.8 | "Degrees" | 1 | "WBB" | "U of U William Browning Buildi… | 4806.0 | 40.76623 | -111.84755 | 153 | "UT" | "America/Denver" | 4727.7 | 1997-01-01 00:00:00 UTC | 2024-10-17 05:50:00 UTC | false | true |
| … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … |
| 2023-12-31 23:55:00 UTC | "wind_speed" | 1 | false | 0.761 | "m/s" | 1 | "WBB" | "U of U William Browning Buildi… | 4806.0 | 40.76623 | -111.84755 | 153 | "UT" | "America/Denver" | 4727.7 | 1997-01-01 00:00:00 UTC | 2024-10-17 05:50:00 UTC | false | true |
| 2023-12-31 23:56:00 UTC | "wind_speed" | 1 | false | 0.689 | "m/s" | 1 | "WBB" | "U of U William Browning Buildi… | 4806.0 | 40.76623 | -111.84755 | 153 | "UT" | "America/Denver" | 4727.7 | 1997-01-01 00:00:00 UTC | 2024-10-17 05:50:00 UTC | false | true |
| 2023-12-31 23:57:00 UTC | "wind_speed" | 1 | false | 0.628 | "m/s" | 1 | "WBB" | "U of U William Browning Buildi… | 4806.0 | 40.76623 | -111.84755 | 153 | "UT" | "America/Denver" | 4727.7 | 1997-01-01 00:00:00 UTC | 2024-10-17 05:50:00 UTC | false | true |
| 2023-12-31 23:58:00 UTC | "wind_speed" | 1 | false | 0.422 | "m/s" | 1 | "WBB" | "U of U William Browning Buildi… | 4806.0 | 40.76623 | -111.84755 | 153 | "UT" | "America/Denver" | 4727.7 | 1997-01-01 00:00:00 UTC | 2024-10-17 05:50:00 UTC | false | true |
| 2023-12-31 23:59:00 UTC | "wind_speed" | 1 | false | 0.54 | "m/s" | 1 | "WBB" | "U of U William Browning Buildi… | 4806.0 | 40.76623 | -111.84755 | 153 | "UT" | "America/Denver" | 4727.7 | 1997-01-01 00:00:00 UTC | 2024-10-17 05:50:00 UTC | false | true |
"
],
"text/plain": [
"shape: (2_100_848, 20)\n",
"┌───────────┬───────────┬───────────┬───────────┬───┬───────────┬───────────┬───────────┬──────────┐\n",
"│ date_time ┆ variable ┆ sensor_in ┆ is_derive ┆ … ┆ period_of ┆ period_of ┆ is_restri ┆ is_activ │\n",
"│ --- ┆ --- ┆ dex ┆ d ┆ ┆ _record_s ┆ _record_e ┆ cted ┆ e │\n",
"│ datetime[ ┆ str ┆ --- ┆ --- ┆ ┆ tart ┆ nd ┆ --- ┆ --- │\n",
"│ μs, UTC] ┆ ┆ u32 ┆ bool ┆ ┆ --- ┆ --- ┆ bool ┆ bool │\n",
"│ ┆ ┆ ┆ ┆ ┆ datetime[ ┆ datetime[ ┆ ┆ │\n",
"│ ┆ ┆ ┆ ┆ ┆ μs, UTC] ┆ μs, UTC] ┆ ┆ │\n",
"╞═══════════╪═══════════╪═══════════╪═══════════╪═══╪═══════════╪═══════════╪═══════════╪══════════╡\n",
"│ 2023-01-0 ┆ wind_dire ┆ 1 ┆ false ┆ … ┆ 1997-01-0 ┆ 2024-10-1 ┆ false ┆ true │\n",
"│ 1 ┆ ction ┆ ┆ ┆ ┆ 1 ┆ 7 ┆ ┆ │\n",
"│ 00:00:00 ┆ ┆ ┆ ┆ ┆ 00:00:00 ┆ 05:50:00 ┆ ┆ │\n",
"│ UTC ┆ ┆ ┆ ┆ ┆ UTC ┆ UTC ┆ ┆ │\n",
"│ 2023-01-0 ┆ wind_dire ┆ 1 ┆ false ┆ … ┆ 1997-01-0 ┆ 2024-10-1 ┆ false ┆ true │\n",
"│ 1 ┆ ction ┆ ┆ ┆ ┆ 1 ┆ 7 ┆ ┆ │\n",
"│ 00:01:00 ┆ ┆ ┆ ┆ ┆ 00:00:00 ┆ 05:50:00 ┆ ┆ │\n",
"│ UTC ┆ ┆ ┆ ┆ ┆ UTC ┆ UTC ┆ ┆ │\n",
"│ 2023-01-0 ┆ wind_dire ┆ 1 ┆ false ┆ … ┆ 1997-01-0 ┆ 2024-10-1 ┆ false ┆ true │\n",
"│ 1 ┆ ction ┆ ┆ ┆ ┆ 1 ┆ 7 ┆ ┆ │\n",
"│ 00:02:00 ┆ ┆ ┆ ┆ ┆ 00:00:00 ┆ 05:50:00 ┆ ┆ │\n",
"│ UTC ┆ ┆ ┆ ┆ ┆ UTC ┆ UTC ┆ ┆ │\n",
"│ 2023-01-0 ┆ wind_dire ┆ 1 ┆ false ┆ … ┆ 1997-01-0 ┆ 2024-10-1 ┆ false ┆ true │\n",
"│ 1 ┆ ction ┆ ┆ ┆ ┆ 1 ┆ 7 ┆ ┆ │\n",
"│ 00:03:00 ┆ ┆ ┆ ┆ ┆ 00:00:00 ┆ 05:50:00 ┆ ┆ │\n",
"│ UTC ┆ ┆ ┆ ┆ ┆ UTC ┆ UTC ┆ ┆ │\n",
"│ 2023-01-0 ┆ wind_dire ┆ 1 ┆ false ┆ … ┆ 1997-01-0 ┆ 2024-10-1 ┆ false ┆ true │\n",
"│ 1 ┆ ction ┆ ┆ ┆ ┆ 1 ┆ 7 ┆ ┆ │\n",
"│ 00:04:00 ┆ ┆ ┆ ┆ ┆ 00:00:00 ┆ 05:50:00 ┆ ┆ │\n",
"│ UTC ┆ ┆ ┆ ┆ ┆ UTC ┆ UTC ┆ ┆ │\n",
"│ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n",
"│ 2023-12-3 ┆ wind_spee ┆ 1 ┆ false ┆ … ┆ 1997-01-0 ┆ 2024-10-1 ┆ false ┆ true │\n",
"│ 1 ┆ d ┆ ┆ ┆ ┆ 1 ┆ 7 ┆ ┆ │\n",
"│ 23:55:00 ┆ ┆ ┆ ┆ ┆ 00:00:00 ┆ 05:50:00 ┆ ┆ │\n",
"│ UTC ┆ ┆ ┆ ┆ ┆ UTC ┆ UTC ┆ ┆ │\n",
"│ 2023-12-3 ┆ wind_spee ┆ 1 ┆ false ┆ … ┆ 1997-01-0 ┆ 2024-10-1 ┆ false ┆ true │\n",
"│ 1 ┆ d ┆ ┆ ┆ ┆ 1 ┆ 7 ┆ ┆ │\n",
"│ 23:56:00 ┆ ┆ ┆ ┆ ┆ 00:00:00 ┆ 05:50:00 ┆ ┆ │\n",
"│ UTC ┆ ┆ ┆ ┆ ┆ UTC ┆ UTC ┆ ┆ │\n",
"│ 2023-12-3 ┆ wind_spee ┆ 1 ┆ false ┆ … ┆ 1997-01-0 ┆ 2024-10-1 ┆ false ┆ true │\n",
"│ 1 ┆ d ┆ ┆ ┆ ┆ 1 ┆ 7 ┆ ┆ │\n",
"│ 23:57:00 ┆ ┆ ┆ ┆ ┆ 00:00:00 ┆ 05:50:00 ┆ ┆ │\n",
"│ UTC ┆ ┆ ┆ ┆ ┆ UTC ┆ UTC ┆ ┆ │\n",
"│ 2023-12-3 ┆ wind_spee ┆ 1 ┆ false ┆ … ┆ 1997-01-0 ┆ 2024-10-1 ┆ false ┆ true │\n",
"│ 1 ┆ d ┆ ┆ ┆ ┆ 1 ┆ 7 ┆ ┆ │\n",
"│ 23:58:00 ┆ ┆ ┆ ┆ ┆ 00:00:00 ┆ 05:50:00 ┆ ┆ │\n",
"│ UTC ┆ ┆ ┆ ┆ ┆ UTC ┆ UTC ┆ ┆ │\n",
"│ 2023-12-3 ┆ wind_spee ┆ 1 ┆ false ┆ … ┆ 1997-01-0 ┆ 2024-10-1 ┆ false ┆ true │\n",
"│ 1 ┆ d ┆ ┆ ┆ ┆ 1 ┆ 7 ┆ ┆ │\n",
"│ 23:59:00 ┆ ┆ ┆ ┆ ┆ 00:00:00 ┆ 05:50:00 ┆ ┆ │\n",
"│ UTC ┆ ┆ ┆ ┆ ┆ UTC ┆ UTC ┆ ┆ │\n",
"└───────────┴───────────┴───────────┴───────────┴───┴───────────┴───────────┴───────────┴──────────┘"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%%time\n",
"\n",
"# Read all parquet file; look how fast this is\n",
"wbb_2023 = pl.read_parquet(\"sample_long_timeseries_*.parquet\")\n",
"wbb_2023"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "synoptic2",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}