{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# 💽 Archive Long Dataset\n", "\n", "SynopticPy doesn't let you request more than a month of data. But what if we want all of WBB's data for a year? \n", "\n", "1. Make multiple API requests.\n", "1. Write each month of data into it's own Parquet file.\n", "1. Read from Parquet files for fast loading!" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "from datetime import datetime, timedelta\n", "from synoptic import TimeSeries\n", "\n", "import polars as pl" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "🚚💨 Speedy delivery from Synoptic timeseries service.\n" ] }, { "ename": "SynopticAPIError", "evalue": "\n🛑 FATAL: Not a valid Synoptic API request.\n ├─ message: RECENT must be less than or equal to a month (43200 minutes).\n └─ url: https://api.synopticdata.com/v2/stations/timeseries?stid=wbb&recent=525600&token=0bbe0e9fda7945a68951cc1bdebb2b0d\nSee https://docs.synopticdata.com/services/weather-data-api for help.", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mSynopticAPIError\u001b[0m Traceback (most recent call last)", "Cell \u001b[0;32mIn[2], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# Can't request more than a month of data at a time...\u001b[39;00m\n\u001b[0;32m----> 3\u001b[0m df \u001b[38;5;241m=\u001b[39m \u001b[43mTimeSeries\u001b[49m\u001b[43m(\u001b[49m\u001b[43mstid\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mwbb\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrecent\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtimedelta\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdays\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m365\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241m.\u001b[39mdf()\n", "File \u001b[0;32m~/GITHUB/SynopticPy/synoptic/services.py:399\u001b[0m, in \u001b[0;36mTimeSeries.__init__\u001b[0;34m(self, **params)\u001b[0m\n\u001b[1;32m 398\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__init__\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mparams):\n\u001b[0;32m--> 399\u001b[0m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[38;5;21;43m__init__\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtimeseries\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mparams\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m~/GITHUB/SynopticPy/synoptic/services.py:347\u001b[0m, in \u001b[0;36mSynopticAPI.__init__\u001b[0;34m(self, service, token, verbose, **params)\u001b[0m\n\u001b[1;32m 343\u001b[0m \u001b[38;5;66;03m# -------------------\u001b[39;00m\n\u001b[1;32m 344\u001b[0m \u001b[38;5;66;03m# Check returned data\u001b[39;00m\n\u001b[1;32m 345\u001b[0m \u001b[38;5;66;03m# Note: SUMMARY is always returned in the JSON.\u001b[39;00m\n\u001b[1;32m 346\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mSUMMARY[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mRESPONSE_CODE\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m!=\u001b[39m \u001b[38;5;241m1\u001b[39m:\n\u001b[0;32m--> 347\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m SynopticAPIError(\n\u001b[1;32m 348\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 349\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m🛑 FATAL: Not a valid Synoptic API request.\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 350\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m ├─ message: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mSUMMARY[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mRESPONSE_MESSAGE\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 351\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m └─ url: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mresponse\u001b[38;5;241m.\u001b[39murl\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 352\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mSee \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhelp_url\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m for help.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 353\u001b[0m )\n\u001b[1;32m 355\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mverbose:\n\u001b[1;32m 356\u001b[0m \u001b[38;5;28mprint\u001b[39m(\n\u001b[1;32m 357\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m📦 Received data from \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mSUMMARY\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mNUMBER_OF_OBJECTS\u001b[39m\u001b[38;5;124m'\u001b[39m)\u001b[38;5;132;01m:\u001b[39;00m\u001b[38;5;124m,\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m stations.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 358\u001b[0m )\n", "\u001b[0;31mSynopticAPIError\u001b[0m: \n🛑 FATAL: Not a valid Synoptic API request.\n ├─ message: RECENT must be less than or equal to a month (43200 minutes).\n └─ url: https://api.synopticdata.com/v2/stations/timeseries?stid=wbb&recent=525600&token=0bbe0e9fda7945a68951cc1bdebb2b0d\nSee https://docs.synopticdata.com/services/weather-data-api for help." ] } ], "source": [ "# Can't request more than a month of data at a time...\n", "\n", "df = TimeSeries(stid=\"wbb\", recent=timedelta(days=365)).df()" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (13,)
literal
datetime[μs]
2023-01-01 00:00:00
2023-02-01 00:00:00
2023-03-01 00:00:00
2023-04-01 00:00:00
2023-05-01 00:00:00
2023-09-01 00:00:00
2023-10-01 00:00:00
2023-11-01 00:00:00
2023-12-01 00:00:00
2024-01-01 00:00:00
" ], "text/plain": [ "shape: (13,)\n", "Series: 'literal' [datetime[μs]]\n", "[\n", "\t2023-01-01 00:00:00\n", "\t2023-02-01 00:00:00\n", "\t2023-03-01 00:00:00\n", "\t2023-04-01 00:00:00\n", "\t2023-05-01 00:00:00\n", "\t…\n", "\t2023-09-01 00:00:00\n", "\t2023-10-01 00:00:00\n", "\t2023-11-01 00:00:00\n", "\t2023-12-01 00:00:00\n", "\t2024-01-01 00:00:00\n", "]" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Create month intervals for a year datetime range\n", "\n", "dates = pl.datetime_range(\n", " datetime(2023, 1, 1),\n", " datetime(2024, 1, 1),\n", " interval=\"1mo\",\n", " eager=True,\n", ")\n", "dates" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CPU times: user 7.7 s, sys: 909 ms, total: 8.61 s 12, 1, 0, 0), end=datetime.datetime(2024, 1, 1, 0, 0))\n", "Wall time: 40.7 s\n" ] } ], "source": [ "%%time\n", "\n", "for i, (start, end) in enumerate(zip(dates, dates[1:]), start=1):\n", " print(f\"Working on loop {i}: {start=}, {end=}\", end=\"\\r\")\n", " df = TimeSeries(\n", " stid=\"wbb\",\n", " vars=\"air_temp,relative_humidity,wind_speed,wind_direction\",\n", " start=start,\n", " end=end - timedelta(microseconds=1),\n", " verbose=False,\n", " ).df()\n", " df.write_parquet(\n", " f\"sample_long_timeseries_{start:%Y%m%d%H%M%S}_{end:%Y%m%d%H%M%S}.parquet\"\n", " )" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CPU times: user 799 ms, sys: 881 ms, total: 1.68 s\n", "Wall time: 564 ms\n" ] }, { "data": { "text/html": [ "
\n", "shape: (2_100_848, 20)
date_timevariablesensor_indexis_derivedvalueunitsidstidnameelevationlatitudelongitudemnet_idstatetimezoneelev_demperiod_of_record_startperiod_of_record_endis_restrictedis_active
datetime[μs, UTC]stru32boolf64stru32strstrf64f64f64u32strstrf64datetime[μs, UTC]datetime[μs, UTC]boolbool
2023-01-01 00:00:00 UTC"wind_direction"1false56.97"Degrees"1"WBB""U of U William Browning Buildi…4806.040.76623-111.84755153"UT""America/Denver"4727.71997-01-01 00:00:00 UTC2024-10-17 05:50:00 UTCfalsetrue
2023-01-01 00:01:00 UTC"wind_direction"1false58.74"Degrees"1"WBB""U of U William Browning Buildi…4806.040.76623-111.84755153"UT""America/Denver"4727.71997-01-01 00:00:00 UTC2024-10-17 05:50:00 UTCfalsetrue
2023-01-01 00:02:00 UTC"wind_direction"1false60.34"Degrees"1"WBB""U of U William Browning Buildi…4806.040.76623-111.84755153"UT""America/Denver"4727.71997-01-01 00:00:00 UTC2024-10-17 05:50:00 UTCfalsetrue
2023-01-01 00:03:00 UTC"wind_direction"1false57.05"Degrees"1"WBB""U of U William Browning Buildi…4806.040.76623-111.84755153"UT""America/Denver"4727.71997-01-01 00:00:00 UTC2024-10-17 05:50:00 UTCfalsetrue
2023-01-01 00:04:00 UTC"wind_direction"1false61.8"Degrees"1"WBB""U of U William Browning Buildi…4806.040.76623-111.84755153"UT""America/Denver"4727.71997-01-01 00:00:00 UTC2024-10-17 05:50:00 UTCfalsetrue
2023-12-31 23:55:00 UTC"wind_speed"1false0.761"m/s"1"WBB""U of U William Browning Buildi…4806.040.76623-111.84755153"UT""America/Denver"4727.71997-01-01 00:00:00 UTC2024-10-17 05:50:00 UTCfalsetrue
2023-12-31 23:56:00 UTC"wind_speed"1false0.689"m/s"1"WBB""U of U William Browning Buildi…4806.040.76623-111.84755153"UT""America/Denver"4727.71997-01-01 00:00:00 UTC2024-10-17 05:50:00 UTCfalsetrue
2023-12-31 23:57:00 UTC"wind_speed"1false0.628"m/s"1"WBB""U of U William Browning Buildi…4806.040.76623-111.84755153"UT""America/Denver"4727.71997-01-01 00:00:00 UTC2024-10-17 05:50:00 UTCfalsetrue
2023-12-31 23:58:00 UTC"wind_speed"1false0.422"m/s"1"WBB""U of U William Browning Buildi…4806.040.76623-111.84755153"UT""America/Denver"4727.71997-01-01 00:00:00 UTC2024-10-17 05:50:00 UTCfalsetrue
2023-12-31 23:59:00 UTC"wind_speed"1false0.54"m/s"1"WBB""U of U William Browning Buildi…4806.040.76623-111.84755153"UT""America/Denver"4727.71997-01-01 00:00:00 UTC2024-10-17 05:50:00 UTCfalsetrue
" ], "text/plain": [ "shape: (2_100_848, 20)\n", "┌───────────┬───────────┬───────────┬───────────┬───┬───────────┬───────────┬───────────┬──────────┐\n", "│ date_time ┆ variable ┆ sensor_in ┆ is_derive ┆ … ┆ period_of ┆ period_of ┆ is_restri ┆ is_activ │\n", "│ --- ┆ --- ┆ dex ┆ d ┆ ┆ _record_s ┆ _record_e ┆ cted ┆ e │\n", "│ datetime[ ┆ str ┆ --- ┆ --- ┆ ┆ tart ┆ nd ┆ --- ┆ --- │\n", "│ μs, UTC] ┆ ┆ u32 ┆ bool ┆ ┆ --- ┆ --- ┆ bool ┆ bool │\n", "│ ┆ ┆ ┆ ┆ ┆ datetime[ ┆ datetime[ ┆ ┆ │\n", "│ ┆ ┆ ┆ ┆ ┆ μs, UTC] ┆ μs, UTC] ┆ ┆ │\n", "╞═══════════╪═══════════╪═══════════╪═══════════╪═══╪═══════════╪═══════════╪═══════════╪══════════╡\n", "│ 2023-01-0 ┆ wind_dire ┆ 1 ┆ false ┆ … ┆ 1997-01-0 ┆ 2024-10-1 ┆ false ┆ true │\n", "│ 1 ┆ ction ┆ ┆ ┆ ┆ 1 ┆ 7 ┆ ┆ │\n", "│ 00:00:00 ┆ ┆ ┆ ┆ ┆ 00:00:00 ┆ 05:50:00 ┆ ┆ │\n", "│ UTC ┆ ┆ ┆ ┆ ┆ UTC ┆ UTC ┆ ┆ │\n", "│ 2023-01-0 ┆ wind_dire ┆ 1 ┆ false ┆ … ┆ 1997-01-0 ┆ 2024-10-1 ┆ false ┆ true │\n", "│ 1 ┆ ction ┆ ┆ ┆ ┆ 1 ┆ 7 ┆ ┆ │\n", "│ 00:01:00 ┆ ┆ ┆ ┆ ┆ 00:00:00 ┆ 05:50:00 ┆ ┆ │\n", "│ UTC ┆ ┆ ┆ ┆ ┆ UTC ┆ UTC ┆ ┆ │\n", "│ 2023-01-0 ┆ wind_dire ┆ 1 ┆ false ┆ … ┆ 1997-01-0 ┆ 2024-10-1 ┆ false ┆ true │\n", "│ 1 ┆ ction ┆ ┆ ┆ ┆ 1 ┆ 7 ┆ ┆ │\n", "│ 00:02:00 ┆ ┆ ┆ ┆ ┆ 00:00:00 ┆ 05:50:00 ┆ ┆ │\n", "│ UTC ┆ ┆ ┆ ┆ ┆ UTC ┆ UTC ┆ ┆ │\n", "│ 2023-01-0 ┆ wind_dire ┆ 1 ┆ false ┆ … ┆ 1997-01-0 ┆ 2024-10-1 ┆ false ┆ true │\n", "│ 1 ┆ ction ┆ ┆ ┆ ┆ 1 ┆ 7 ┆ ┆ │\n", "│ 00:03:00 ┆ ┆ ┆ ┆ ┆ 00:00:00 ┆ 05:50:00 ┆ ┆ │\n", "│ UTC ┆ ┆ ┆ ┆ ┆ UTC ┆ UTC ┆ ┆ │\n", "│ 2023-01-0 ┆ wind_dire ┆ 1 ┆ false ┆ … ┆ 1997-01-0 ┆ 2024-10-1 ┆ false ┆ true │\n", "│ 1 ┆ ction ┆ ┆ ┆ ┆ 1 ┆ 7 ┆ ┆ │\n", "│ 00:04:00 ┆ ┆ ┆ ┆ ┆ 00:00:00 ┆ 05:50:00 ┆ ┆ │\n", "│ UTC ┆ ┆ ┆ ┆ ┆ UTC ┆ UTC ┆ ┆ │\n", "│ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n", "│ 2023-12-3 ┆ wind_spee ┆ 1 ┆ false ┆ … ┆ 1997-01-0 ┆ 2024-10-1 ┆ false ┆ true │\n", "│ 1 ┆ d ┆ ┆ ┆ ┆ 1 ┆ 7 ┆ ┆ │\n", "│ 23:55:00 ┆ ┆ ┆ ┆ ┆ 00:00:00 ┆ 05:50:00 ┆ ┆ │\n", "│ UTC ┆ ┆ ┆ ┆ ┆ UTC ┆ UTC ┆ ┆ │\n", "│ 2023-12-3 ┆ wind_spee ┆ 1 ┆ false ┆ … ┆ 1997-01-0 ┆ 2024-10-1 ┆ false ┆ true │\n", "│ 1 ┆ d ┆ ┆ ┆ ┆ 1 ┆ 7 ┆ ┆ │\n", "│ 23:56:00 ┆ ┆ ┆ ┆ ┆ 00:00:00 ┆ 05:50:00 ┆ ┆ │\n", "│ UTC ┆ ┆ ┆ ┆ ┆ UTC ┆ UTC ┆ ┆ │\n", "│ 2023-12-3 ┆ wind_spee ┆ 1 ┆ false ┆ … ┆ 1997-01-0 ┆ 2024-10-1 ┆ false ┆ true │\n", "│ 1 ┆ d ┆ ┆ ┆ ┆ 1 ┆ 7 ┆ ┆ │\n", "│ 23:57:00 ┆ ┆ ┆ ┆ ┆ 00:00:00 ┆ 05:50:00 ┆ ┆ │\n", "│ UTC ┆ ┆ ┆ ┆ ┆ UTC ┆ UTC ┆ ┆ │\n", "│ 2023-12-3 ┆ wind_spee ┆ 1 ┆ false ┆ … ┆ 1997-01-0 ┆ 2024-10-1 ┆ false ┆ true │\n", "│ 1 ┆ d ┆ ┆ ┆ ┆ 1 ┆ 7 ┆ ┆ │\n", "│ 23:58:00 ┆ ┆ ┆ ┆ ┆ 00:00:00 ┆ 05:50:00 ┆ ┆ │\n", "│ UTC ┆ ┆ ┆ ┆ ┆ UTC ┆ UTC ┆ ┆ │\n", "│ 2023-12-3 ┆ wind_spee ┆ 1 ┆ false ┆ … ┆ 1997-01-0 ┆ 2024-10-1 ┆ false ┆ true │\n", "│ 1 ┆ d ┆ ┆ ┆ ┆ 1 ┆ 7 ┆ ┆ │\n", "│ 23:59:00 ┆ ┆ ┆ ┆ ┆ 00:00:00 ┆ 05:50:00 ┆ ┆ │\n", "│ UTC ┆ ┆ ┆ ┆ ┆ UTC ┆ UTC ┆ ┆ │\n", "└───────────┴───────────┴───────────┴───────────┴───┴───────────┴───────────┴───────────┴──────────┘" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "%%time\n", "\n", "# Read all parquet file; look how fast this is\n", "wbb_2023 = pl.read_parquet(\"sample_long_timeseries_*.parquet\")\n", "wbb_2023" ] } ], "metadata": { "kernelspec": { "display_name": "synoptic2", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.5" } }, "nbformat": 4, "nbformat_minor": 2 }