Source code for goes2go.NEW

## Brian Blaylock
## June 4, 2022

"""
==========
GOES Class
==========
"""

import itertools
import logging
import re
from pathlib import Path

import numpy as np
import pandas as pd
import s3fs
import toml

from goes2go import config
from goes2go.data import _goes_file_df, goes_latest, goes_nearesttime, goes_timerange

log = logging.getLogger(__name__)

tables_dir = Path(__file__).parent

# Connect to AWS public buckets
fs = s3fs.S3FileSystem(anon=True)

product_table = pd.read_csv(
    tables_dir / "product_table.txt",
    skiprows=2,
    names=["product", "description"],
    index_col=0,
)
product_table.index = product_table.index.str.strip()
product_table["description"] = product_table.description.str.strip()


# Assume goes17 and goes18 have same products as goes16
_product = {i.split("/")[-1] for i in fs.ls("noaa-goes16")}
_product = set(filter(lambda x: x.split(".")[-1] not in ["pdf", "html"], _product))
_product

# you can be unspecific and request any mesoscale domain (M),
# or by number (M1, M2)
_domains = {"F", "C", "M", "M1", "M2"}


[docs] class GOES: """The GOES satellite class."""
[docs] def __init__( self, satellite=config["timerange"].get("satellite"), product=config["timerange"].get("product"), domain=config["timerange"].get("domain"), bands=None, channel=None, ): """Initialize a GOES object for a desired satellite and product. Parameters ---------- satellite : {16, 17, 18} The satellite number. May also use the following aliases {'G16', "G17", "EAST", "WEST"} product : str The product to acquire. A full list of products is here https://github.com/blaylockbk/goes2go/blob/main/goes2go/product_table.txt - GLM = alias for geostationary lighting mapper - ABI = alias for ABI multi-channel cloud moisture imagery domain : {None, 'F', 'C', "M", "M1", "M2"} Only needed for ABI products. - F = Full Disk - C = CONUS - M = Mesoscale sector (both) - M1 = Mesoscale sector 1 - M2 = Mesoscale sector 2 bands : None, int, or list Specify the ABI channels to retrieve. *Only used if the product requested has unique bands.* For example, ``bands=2`` for channel 2 or ``bands=[1,2,3]`` for channels 1, 2 and 3. channel : None, int, or list Alias for "bands" argument. If channel is not None, then "bands" is not use. Note: I don't like the name of "bands", but the NetCDF file uses the term "band" instead of "channel" in reference to the ABI products, so I'll stick with "band" for now. """ self.satellite = satellite self.product = product self.domain = domain if channel is not None: self.bands = channel else: self.bands = bands if self.product.startswith("ABI") and self.product in _product: # Sometimes the user might inavertantly give the domain # in the product name. self.domain = self.product[-1] self.product = self.product[:-1] self._check_satellite() self._check_product()
def _check_satellite(self): if isinstance(self.satellite, int): self.satellite = f"noaa-goes{self.satellite}" elif isinstance(self.satellite, str): if self.satellite.upper() == "EAST": self.satellite = "noaa-goes16" elif self.satellite.upper() == "WEST": self.satellite = "noaa-goes17" else: # look for the satellite number in the string (i.e.g, 'G16', 'goes16') self.satellite = re.sub("[^0-9]", "", self.satellite) self.satellite = f"noaa-goes{self.satellite}" else: raise ValueError( f"Could not figure out what satellite you want from `{self.satellite}`" ) def _check_product(self): if self.product == "GLM": # Alias for geostationary lighting mapper self.product = "GLM-L2-LCFA" elif self.product == "ABI": # Alias for multi-channel cloud moisture imagery if self.domain is None: self.product = "ABI-L2-MCMIP" + "C" else: self.product = "ABI-L2-MCMIP" + re.sub("[0-9]", "", self.domain) elif self.product.startswith("ABI"): if self.domain is None: if self.product[-1] in _domains: self.domain = self.product[-1] elif self.domain is not None: if self.domain in _domains: self.product = self.product + re.sub("[0-9]", "", self.domain) if self.product not in _product: raise ValueError(f"{self.product} is not a valid product.") else: raise ValueError( f"domain for ABI products must be None or one of {_domains}" ) else: if self.domain is not None: log.warning("domain argument is ignored for non-ABI products") if self.product in _product: self.description = product_table.loc[self.product].description else: raise ValueError(f"{self.product} is not an available product.") def __repr__(self): msg = [ "╭───────────────────────────────╌┄┈", "│ 🌎 GOES Object ", "│ ───────────────", f"│ {self.satellite=}", f"│ {self.product=}", f"│ {self.domain=}", f"│ {self.bands=}", f"│ {self.description=}", "╰───────────────────────────────╌┄┈", ] return "\n".join(msg)
[docs] def latest(self, **kwargs): """Get the latest available GOES data.""" return goes_latest( satellite=self.satellite, product=self.product, domain=self.domain, bands=self.bands, **kwargs, )
[docs] def nearesttime( self, attime, within=pd.to_timedelta(config["nearesttime"].get("within", "1H")), **kwargs, ): """Get the GOES data nearest a specified time. Parameters ---------- attime : datetime Time to find the nearest observation for. May also use a pandas-interpretable datetime string. within : timedelta or pandas-parsable timedelta str Timerange tht the nearest observation must be. """ return goes_nearesttime( attime, within, satellite=self.satellite, product=self.product, domain=self.domain, bands=self.bands, **kwargs, )
[docs] def timerange(self, start=None, end=None, recent=None, **kwargs): """Get GOES data for a time range. Parameters ---------- start, end : datetime Required if recent is None. recent : timedelta or pandas-parsable timedelta str Required if start and end are None. If timedelta(hours=1), will get the most recent files for the past hour. """ return goes_timerange( start, end, recent, satellite=self.satellite, product=self.product, domain=self.domain, bands=self.bands, **kwargs, )
[docs] def df(self, start, end, refresh=True): """Get list of requested GOES files as pandas.DataFrame. Parameters ---------- start : datetime end : datetime refresh : bool Refresh the s3fs.S3FileSystem object when files are listed. Default True will refresh and not use a cached list. """ return _goes_file_df( self.satellite, self.product, start=start, end=end, bands=self.bands, refresh=refresh, )