Source code for oggm.shop.w5e5

import logging

# External libs
import numpy as np
import xarray as xr
import pandas as pd

# Optional libs
try:
    import salem
except ImportError:
    pass

# Locals
from oggm import cfg
from oggm import utils
from oggm import entity_task
from oggm.shop import ecmwf
from oggm.exceptions import InvalidParamsError

# Module logger
log = logging.getLogger(__name__)

GSWP3_W5E5_SERVER = 'https://cluster.klima.uni-bremen.de/~oggm/climate/'

_base = 'gswp3-w5e5/flattened/2025.11.25/monthly/'

BASENAMES = {
    'GSWP3_W5E5': {
        'inv': f'{_base}gswp3-w5e5_glacier_invariant_flat_v2025.11.25.nc',
        'tmp': f'{_base}gswp3-w5e5_obsclim_tas_global_monthly_1901_2019_flat_glaciers_v2025.11.25.nc',
        'temp_std': f'{_base}gswp3-w5e5_obsclim_temp_std_global_monthly_1901_2019_flat_glaciers_v2025.11.25.nc',
        'prcp': f'{_base}gswp3-w5e5_obsclim_pr_global_monthly_1901_2019_flat_glaciers_v2025.11.25.nc'
    }
}


def _get_w5e5_server():
    return cfg.PARAMS.get('gswp3_w5e5_server', GSWP3_W5E5_SERVER)


def _get_w5e5_basenames():
    return cfg.PARAMS.get('gswp3_w5e5_basenames', BASENAMES)


def get_gswp3_w5e5_file(dataset='GSWP3_W5E5', var=None):
    """Returns the path to the desired GSWP3-W5E5 baseline climate file.

    It is the observed climate dataset used for ISIMIP3a.
    For OGGM, it was preprocessed by selecting only those gridpoints
    with glaciers nearby.

    If the file is not present, downloads it.

    var : str
        'inv' for invariant
        'tmp' for temperature
        'prcp' for precipitation
        'temp_std' for mean of daily temperature standard deviation
    dataset : str
        default option in OGGM is 'GSWP3_W5E5' (could also just use 'W5E5_monthly',
        but this is the same for a shorter time period, and we only use it for testing)
    """
    # check if input makes sense
    basenames = _get_w5e5_basenames()

    if var not in basenames[dataset].keys():
        raise InvalidParamsError('GSWP3-W5E5 variable {} not '
                                 'in {}'.format(var,
                                                basenames[dataset].keys()))

    # File to look for
    return utils.file_downloader(_get_w5e5_server() + basenames[dataset][var])


@entity_task(log, writes=['climate_historical'])
def process_gswp3_w5e5_data(gdir, y0=None, y1=None, output_filesuffix=None):
    """Processes and writes the GSWP3-W5E5+W5E5 baseline climate data for a glacier.

    The data is the same as W5E5 after 79, and is GSWP3 before that.

    Data source: https://www.isimip.org/gettingstarted/input-data-bias-adjustment/details/80/
    If y0>=1979, the temperature lapse rate gradient from ERA5dr is added.

    Extracts the nearest timeseries and writes everything to a NetCDF file.

    Parameters
    ----------
    y0 : int
        the starting year of the timeseries to write. The default is to take
        the entire time period available in the file, but with this kwarg
        you can shorten it (to save space or to crop bad data). If y0>=1979,
        it only uses W5E5 data!
    y1 : int
        the end year of the timeseries to write. The default is to take
        the entire time period available in the file, but with this kwarg
        you can shorten it (to save space or to crop bad data)
    output_filesuffix : str, optional
         None by default
    """
    dataset = 'GSWP3_W5E5'  # 'W5E5_monthly'
    tvar = 'tas'
    pvar = 'pr'

    # get the central longitude/latitudes of the glacier
    lon = gdir.cenlon + 360 if gdir.cenlon < 0 else gdir.cenlon
    lat = gdir.cenlat

    path_tmp = get_gswp3_w5e5_file(dataset, 'tmp')
    path_prcp = get_gswp3_w5e5_file(dataset, 'prcp')
    path_inv = get_gswp3_w5e5_file(dataset, 'inv')

    # Use xarray to read the data
    # would go faster with only netCDF -.-, but easier with xarray
    # first temperature dataset
    with xr.open_dataset(path_tmp) as ds:
        # sanity checks, safeguarding for unwanted future side effects
        # we do it only for temp and assume no mistake on the data prep side
        info = utils.climate_file_info(ds)
        assert info['lon_bounds'][0] >= 0
        assert info['is_flat']

        yrs = ds['time.year'].data
        y0 = yrs[0] if y0 is None else y0
        y1 = yrs[-1] if y1 is None else y1

        if y1 > 2019 or y0 < 1901:
            text = 'The climate files only go from 1901--2019'
            raise InvalidParamsError(text)
        ds = ds.sel(time=slice(f'{y0}-01-01', f'{y1}-12-01'))

        # computing all the distances and choose the nearest gridpoint
        c = (ds.longitude - lon)**2 + (ds.latitude - lat)**2
        ds = ds.isel(points=np.argmin(c.data))

        # Feth lon and lat
        ds['longitude'] = ds.longitude
        ds['latitude'] = ds.latitude

        # temperature should be in degree Celsius for the glacier climate files
        temp = ds[tvar].data - 273.15
        time = ds.time.data

        ref_lon = float(ds['longitude'])
        ref_lat = float(ds['latitude'])

        ref_lon = ref_lon - 360 if ref_lon > 180 else ref_lon

    # precipitation: similar as temperature
    with xr.open_dataset(path_prcp) as ds:

        # here we take the same y0 and y1 as given from the
        # tmp dataset
        ds = ds.sel(time=slice(f'{y0}-01-01', f'{y1}-12-01'))

        c = (ds.longitude - lon)**2 + (ds.latitude - lat)**2
        ds = ds.isel(points=np.argmin(c.data))

        # convert kg m-2 s-1 monthly mean into monthly sum!!!
        prcp = ds[pvar].data*cfg.SEC_IN_DAY*ds['time.daysinmonth']

    # w5e5 invariant file
    with xr.open_dataset(path_inv) as ds:
        ds = ds.isel(time=0)

        c = (ds.longitude - lon)**2 + (ds.latitude - lat)**2
        ds = ds.isel(points=np.argmin(c.data))

        # w5e5 inv ASurf/hgt is already in hgt coordinates
        hgt = ds['ASurf'].data

    path_temp_std = get_gswp3_w5e5_file(dataset, 'temp_std')
    with xr.open_dataset(path_temp_std) as ds:
        ds = ds.sel(time=slice(f'{y0}-01-01', f'{y1}-12-01'))
        c = (ds.longitude - lon)**2 + (ds.latitude - lat)**2
        ds = ds.isel(points=np.argmin(c.data))
        temp_std = ds['temp_std'].data

    # OK, ready to write
    gdir.write_monthly_climate_file(time, prcp, temp, hgt, ref_lon, ref_lat,
                                    filesuffix=output_filesuffix,
                                    temp_std=temp_std,
                                    source=dataset)


[docs] def process_w5e5_data(gdir, y0=None, y1=None, output_filesuffix=None): """Processes and writes the W5E5 baseline climate data for a glacier. Internally, this is actually calling process_gswp3_w5e5_data but only for the W5E5 part. data source: https://data.isimip.org/10.48364/ISIMIP.342217 The temperature lapse rate gradient from ERA5dr is added. Same as process_gswp3_w5e5_data, except that y0 is set to 1979 and can not be set to lower values. Extracts the nearest timeseries and writes everything to a NetCDF file. Parameters ---------- y0 : int the starting year of the timeseries to write. The default is to take the entire time period available in the file (1979-2019), but with this kwarg you can shorten it (to save space or to crop bad data). y1 : int the end year of the timeseries to write. The default is to take the entire time period available in the file (1979-2019), but with this kwarg you can shorten it (to save space or to crop bad data) output_filesuffix : str, optional None by default """ y0 = 1979 if y0 is None else y0 y1 = 2019 if y1 is None else y1 if y0 < 1979: text = ('The W5E5 climate only goes from 1979-2019,' 'if you want older climate data,' 'use instead "process_gswp3_w5e5_data()"') raise InvalidParamsError(text) process_gswp3_w5e5_data(gdir, y0=y0, y1=y1, output_filesuffix=output_filesuffix)