Source code for utdquake.core.config

import os
from typing import Dict, Optional
from pathlib import Path
from dataclasses import dataclass, replace

UTDQUAKE_ROOT: str = "UTDQUAKE_ROOT"
"""Environment variable name for UTDQuake cache root."""

UTDQUAKE_DAS_ROOT: str = "UTDQUAKE_DAS_ROOT"
"""Environment variable name for UTDQuake DAS cache root."""

HF_REPO_ID: str = "ecastillot/UTDQuake"
"""Hugging Face repository ID for UTDQuake dataset."""

HF_REPO_TYPE: str = "dataset"
"""Type of Hugging Face repository (default: 'dataset')."""

CORE_DIR: Path = Path(__file__).resolve().parent
"""Path to the core directory of the UTDQuake package."""

KM_PER_DEG = 111.19

[docs] @dataclass(frozen=True) class HFEntry: """ Configuration entry for a Hugging Face dataset component. Attributes ---------- name : str Dataset name or identifier (e.g., '0_networks'). split : str Dataset split (e.g., 'metadata'). path : str Relative path pattern for the dataset file. """ name: Optional[str] split: Optional[str] path: str
HF_CONFIG: Dict[str, HFEntry] = { "banks": HFEntry( name=None, split=None, path="banks/{network}.zip", ), "networks": HFEntry( name="0_networks", split="metadata", path="network/network.parquet", ), "stations": HFEntry( name="1_stations", split="metadata", path="stations/network={network}.parquet", ), "events": HFEntry( name="2_events", split="metadata", path="events/network={network}.parquet", ), "picks": HFEntry( name="3_picks", split="metadata", path="picks/network={network}.parquet", ), ".utdquake/travel_time": HFEntry( name=None, split=None, path=".utdquake/travel_time/{network}.parquet", ), ".utdquake/stats": HFEntry( name=None, split=None, path=".utdquake/stats/{network}.npz", ), ".utdquake/export": HFEntry( name=None, split=None, path=".utdquake/export", ), ".utdquake/logs": HFEntry( name=None, split=None, path=".utdquake/logs", ), }
[docs] def get_hf_entry(key: str, das: bool = False) -> HFEntry: """ Retrieve a Hugging Face dataset configuration entry. Parameters ---------- key : str Configuration key defined in ``HF_CONFIG``. das : bool, default=False If ``True``, return the corresponding DAS-specific entry by modifying the dataset name and path as required. Returns ------- HFEntry Configuration entry containing the dataset name, split, and relative file path. Notes ----- For DAS datasets: - ``banks`` entries keep the same filename but are stored under a ``bank_DAS`` directory. - All other entries use a ``{key}_DAS`` directory and a dataset name suffixed with ``"_DAS"``. """ entry = HF_CONFIG[key] if not das: return entry if key == "banks": # Replace "bank" with "bank_DAS" while preserving the filename. path = Path(entry.path) das_path = path.parent.with_name(f"{path.parent.name}_DAS") / path.name return replace(entry, path=str(das_path)) elif ".utdquake" in key: path = Path(entry.path) first_part = path.parts[0] rest_parts = path.parts[1:] das_path = Path(f"{first_part}_DAS", *rest_parts) return replace(entry, path=str(das_path)) # Store DAS files in a dedicated directory and append "_DAS" # to the dataset name when one exists. return replace( entry, name=f"{entry.name}_DAS" if entry.name else None, path=f"{key}_DAS/{Path(entry.path).name}", )
[docs] def get_root(das: bool = False) -> Path: """ Return the root directory for cached UTDQuake data. Users can override this location by setting the environment variable `UTDQUAKE_ROOT` or `UTDQUAKE_DAS_ROOT` before importing or using UTDQuake. If the variable is not set, the default locations are: - ``~/.utdquake`` - ``~/.utdquake_das`` (for DAS data)` Parameters ---------- das : bool, optional If True, use the DAS cache root environment variable (`UTDQUAKE_DAS_ROOT`). Otherwise use the standard cache root (`UTDQUAKE_ROOT`). Returns ------- Path Resolved path to the root cache directory. Examples -------- >>> import os >>> os.environ["UTDQUAKE_ROOT"] = "/my/custom/cache" >>> root_path = get_root() >>> print(root_path) /my/custom/cache >>> os.environ["UTDQUAKE_DAS_ROOT"] = "/my/das/cache" >>> das_root = get_root(das=True) >>> print(das_root) /my/das/cache """ env_var = UTDQUAKE_DAS_ROOT if das else UTDQUAKE_ROOT root = os.environ.get(env_var, None) if root is None or str(root).strip() == "": # default Linux cache location if das: root = os.path.join(Path.home(), ".utdquake_das") else: root = os.path.join(Path.home(), ".utdquake") return Path(root).expanduser().resolve()
[docs] def get_utdq_paths(network: str, das: bool = False) -> Dict[str, Path]: """ Return standardized UTDQuake directory paths for a given network. This helper constructs the filesystem paths used by UTDQuake for storing and accessing data products associated with a seismic network. Parameters ---------- network : str Network code (e.g., ``"tx"``, ``"AK"``, etc.). das : bool, optional If True, use the DAS cache root environment variable (`UTDQUAKE_DAS_ROOT`). Otherwise use the standard cache root (`UTDQUAKE_ROOT`). Returns ------- dict of str to pathlib.Path Dictionary containing the following keys: - ``"banks"``: Path to the EventBank directory. - ``"events"``: Path to event files. - ``"stations"``: Path to station metadata. - ``"picks"``: Path to pick files. Notes ----- - Paths are constructed relative to the root directory returned by :func:`get_root`. - Subdirectory templates are defined in :data:`HF_CONFIG`. """ root = get_root(das=das) utdq_paths = { "banks": root / get_hf_entry("banks",das).path.format(network=network).split(".zip")[0], "events": root / get_hf_entry("events",das).path.format( network=network ), "stations": root / get_hf_entry("stations",das).path.format( network=network ), "picks": root / get_hf_entry("picks",das).path.format( network=network ), ".utdquake/travel_time": root / get_hf_entry(".utdquake/travel_time",das).path.format(network=network), ".utdquake/stats": root / get_hf_entry(".utdquake/stats",das).path.format(network=network), # not for the public API ".utdquake/export/db": root / get_hf_entry(".utdquake/export",das).path / "db", ".utdquake/logs/banks": root / get_hf_entry(".utdquake/logs",das).path / f"{network}" / "banks", } # Ensure directories exist for path in utdq_paths.values(): if path.suffix: path.parent.mkdir(parents=True, exist_ok=True) else: path.mkdir(parents=True, exist_ok=True) return utdq_paths