Source code for oggm.shop.millan22

import logging
import os
import warnings

import numpy as np
import pandas as pd
import xarray as xr
import shapely.geometry as shpg

try:
    import salem
except ImportError:
    pass

try:
    import geopandas as gpd
except ImportError:
    pass

try:
    import rasterio
except ImportError:
    pass

from oggm import utils, cfg
from oggm.exceptions import InvalidWorkflowError

# Module logger
log = logging.getLogger(__name__)

default_base_url = 'https://cluster.klima.uni-bremen.de/~oggm/velocities/millan22/'

_lookup_thickness = None
_lookup_velocity = None


def _get_lookup_thickness():
    global _lookup_thickness
    if _lookup_thickness is None:
        fname = default_base_url + 'millan22_thickness_lookup_shp_20220902.zip'
        _lookup_thickness = gpd.read_file('zip://' + utils.file_downloader(fname))
    return _lookup_thickness


def _get_lookup_velocity():
    global _lookup_velocity
    if _lookup_velocity is None:
        fname = default_base_url + 'millan22_velocity_lookup_shp_20231127.zip'
        _lookup_velocity = gpd.read_file('zip://' + utils.file_downloader(fname))
    return _lookup_velocity


def _filter(ds):
    # Read the data and prevent bad surprises
    data = ds.get_vardata().astype(np.float64)

    with rasterio.Env():
        with rasterio.open(ds.file) as src:
            try:
                nodata = src.nodatavals[0]
                data[data == nodata] = np.nan
            except AttributeError:
                pass

    # NaNs
    data[~ np.isfinite(data)] = np.nan
    data[data > 1e5] = np.nan
    return data


def _filter_and_reproj(gdir, var, gdf):
    """ Same code for thickness and error

    Parameters
    ----------
    var : 'thickness' or 'err'
    gdf : the lookup
    """

    # We may have more than one file
    total_data = None
    grids_used = []
    files_used = []
    for i, s in gdf.iterrows():
        # Fetch it
        url = default_base_url + s[var]
        input_file = utils.file_downloader(url)

        # Subset to avoid mega files
        dsb = salem.GeoTiff(input_file)
        x0, x1, y0, y1 = gdir.grid.extent_in_crs(dsb.grid.proj)
        with warnings.catch_warnings():
            # This can trigger an out of bounds warning
            warnings.filterwarnings("ignore", category=RuntimeWarning,
                                    message='.*out of bounds.*')
            dsb.set_subset(corners=((x0, y0), (x1, y1)),
                           crs=dsb.grid.proj,
                           margin=5)

        data = _filter(dsb)

        if np.nansum(data) == 0:
            # No need to continue
            continue

        # Reproject now
        with warnings.catch_warnings():
            # This can trigger an out of bounds warning
            warnings.filterwarnings("ignore", category=RuntimeWarning,
                                    message='.*out of bounds.*')
            r_data = gdir.grid.map_gridded_data(data, dsb.grid, interp='linear')

        if total_data is None:
            total_data = r_data.filled(np.nan)
        else:
            r_data = r_data.filled(np.nan)
            pok = np.isfinite(r_data)
            total_data[pok] = r_data[pok]

        grids_used.append(dsb)
        files_used.append(s.file_id)

    return total_data, files_used, grids_used


[docs] @utils.entity_task(log, writes=['gridded_data']) def millan_thickness_to_gdir(gdir, add_error=False): """Add the Millan 22 thickness data to this glacier directory. Parameters ---------- gdir : :py:class:`oggm.GlacierDirectory` the glacier directory to process add_error : bool add the error data or not """ # Find out which file(s) we need gdf = _get_lookup_thickness() cp = shpg.Point(gdir.cenlon, gdir.cenlat) sel = gdf.loc[gdf.contains(cp)] if len(sel) == 0: raise InvalidWorkflowError(f'There seems to be no Millan file for this ' f'glacier: {gdir.rgi_id}') total_thick, files_used, _ = _filter_and_reproj(gdir, 'thickness', sel) if add_error: total_err, _, _ = _filter_and_reproj(gdir, 'err', sel) total_err[~ np.isfinite(total_thick)] = np.nan # Write with utils.ncDataset(gdir.get_filepath('gridded_data'), 'a') as nc: vn = 'millan_ice_thickness' if vn in nc.variables: v = nc.variables[vn] else: v = nc.createVariable(vn, 'f4', ('y', 'x', ), zlib=True, fill_value=np.nan) v.units = 'm' ln = 'Ice thickness from Millan et al. 2022' v.long_name = ln data_str = ' '.join(files_used) if len(files_used) > 1 else files_used[0] v.data_source = data_str v[:] = total_thick.astype(np.float32) if add_error: vn = 'millan_ice_thickness_err' if vn in nc.variables: v = nc.variables[vn] else: v = nc.createVariable(vn, 'f4', ('y', 'x',), zlib=True, fill_value=np.nan) v.units = 'm' ln = 'Ice thickness error from Millan et al. 2022' v.long_name = ln v.data_source = data_str v[:] = total_err.astype(np.float32)
[docs] @utils.entity_task(log, writes=['gridded_data']) def millan_velocity_to_gdir(gdir, add_error=False): """Add the Millan 22 velocity data to this glacier directory. Parameters ---------- gdir : :py:class:`oggm.GlacierDirectory` the glacier directory to process add_error : bool add the error data or not """ if gdir.rgi_region in ['05']: raise NotImplementedError('Millan 22 does not provide velocity ' 'products for Greenland - we would need to ' 'implement MEASURES in the shop for this.') # Find out which file(s) we need gdf = _get_lookup_velocity() cp = shpg.Point(gdir.cenlon, gdir.cenlat) sel = gdf.loc[gdf.contains(cp)] if len(sel) == 0: raise InvalidWorkflowError(f'There seems to be no Millan file for this ' f'glacier: {gdir.rgi_id}') vel, files, grids = _filter_and_reproj(gdir, 'v', sel) if len(grids) == 0: raise RuntimeError('There is no velocity data for this glacier') all_vx = None all_vy = None all_err_vx = None all_err_vy = None for i, _ in sel.iterrows(): subsel = sel.loc[[i]] vx, _, gridsx = _filter_and_reproj(gdir, 'vx', subsel) vy, _, gridsy = _filter_and_reproj(gdir, 'vy', subsel) if len(gridsx) == 0: continue dsx = gridsx[0] dsy = gridsy[0] grid_vel = dsx.grid proj_vel = grid_vel.proj grid_gla = gdir.grid # Get the coords at t0 xx0, yy0 = grid_vel.center_grid.xy_coordinates # Compute coords at t1 xx1 = _filter(dsx) yy1 = _filter(dsy) xx1 += xx0 yy1 += yy0 # Transform both to glacier proj xx0, yy0 = salem.transform_proj(proj_vel, grid_gla.proj, xx0, yy0) xx1, yy1 = salem.transform_proj(proj_vel, grid_gla.proj, xx1, yy1) # Compute velocities from there vx = xx1 - xx0 vy = yy1 - yy0 # And transform to local map vx = grid_gla.map_gridded_data(vx, grid=grid_vel, interp='linear') vy = grid_gla.map_gridded_data(vy, grid=grid_vel, interp='linear') # Scale back to match velocity with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=RuntimeWarning) new_vel = np.sqrt(vx**2 + vy**2) p_ok = np.isfinite(new_vel) & (new_vel > 0) # avoid div by zero scaler = vel[p_ok] / new_vel[p_ok] vx[p_ok] = vx[p_ok] * scaler vy[p_ok] = vy[p_ok] * scaler if all_vx is None: all_vx = vx.filled(np.nan) all_vy = vy.filled(np.nan) else: vx = vx.filled(np.nan) vy = vy.filled(np.nan) locuptate = np.isfinite(vx) all_vx[locuptate] = vx[locuptate] all_vy[locuptate] = vy[locuptate] if add_error: err_vx, _, _ = _filter_and_reproj(gdir, 'err_vx', subsel) err_vy, _, _ = _filter_and_reproj(gdir, 'err_vy', subsel) err_vx[p_ok] = err_vx[p_ok] * scaler err_vy[p_ok] = err_vy[p_ok] * scaler if all_err_vx is None: all_err_vx = err_vx.filled(np.nan) all_err_vy = err_vy.filled(np.nan) else: err_vx = err_vx.filled(np.nan) err_vy = err_vy.filled(np.nan) all_err_vx[locuptate] = err_vx[locuptate] all_err_vy[locuptate] = err_vy[locuptate] # Write with utils.ncDataset(gdir.get_filepath('gridded_data'), 'a') as nc: vn = 'millan_v' if vn in nc.variables: v = nc.variables[vn] else: v = nc.createVariable(vn, 'f4', ('y', 'x', ), zlib=True, fill_value=np.nan) v.units = 'm' ln = 'Ice velocity from Millan et al. 2022' v.long_name = ln v.data_source = files[0] v[:] = vel.astype(np.float32) vn = 'millan_vx' if vn in nc.variables: v = nc.variables[vn] else: v = nc.createVariable(vn, 'f4', ('y', 'x', ), zlib=True, fill_value=np.nan) v.units = 'm' ln = 'Ice velocity in map x direction from Millan et al. 2022' v.long_name = ln v.data_source = files[0] v[:] = all_vx.astype(np.float32) vn = 'millan_vy' if vn in nc.variables: v = nc.variables[vn] else: v = nc.createVariable(vn, 'f4', ('y', 'x', ), zlib=True, fill_value=np.nan) v.units = 'm' ln = 'Ice velocity in map y direction from Millan et al. 2022' v.long_name = ln v.data_source = files[0] v[:] = all_vy.astype(np.float32) if add_error: vn = 'millan_err_vx' if vn in nc.variables: v = nc.variables[vn] else: v = nc.createVariable(vn, 'f4', ('y', 'x',), zlib=True, fill_value=np.nan) v.units = 'm' ln = 'Ice velocity error in map x direction from Millan et al. 2022' v.long_name = ln v.data_source = files[0] v[:] = all_err_vx.astype(np.float32) vn = 'millan_err_vy' if vn in nc.variables: v = nc.variables[vn] else: v = nc.createVariable(vn, 'f4', ('y', 'x',), zlib=True, fill_value=np.nan) v.units = 'm' ln = 'Ice velocity error in map y direction from Millan et al. 2022' v.long_name = ln v.data_source = files[0] v[:] = all_err_vy.astype(np.float32)
@utils.entity_task(log) def millan_statistics(gdir): """Gather statistics about the Millan data interpolated to this glacier. """ d = dict() # Easy stats - this should always be possible d['rgi_id'] = gdir.rgi_id d['rgi_region'] = gdir.rgi_region d['rgi_subregion'] = gdir.rgi_subregion d['rgi_area_km2'] = gdir.rgi_area_km2 d['millan_vol_km3'] = 0 d['millan_area_km2'] = 0 d['millan_perc_cov'] = 0 try: with xr.open_dataset(gdir.get_filepath('gridded_data')) as ds: thick = ds['millan_ice_thickness'].where(ds['glacier_mask'], np.nan).load() gridded_area = ds['glacier_mask'].sum() * gdir.grid.dx ** 2 * 1e-6 with warnings.catch_warnings(): # For operational runs we ignore the warnings warnings.filterwarnings('ignore', category=RuntimeWarning) d['millan_vol_km3'] = float(thick.sum() * gdir.grid.dx ** 2 * 1e-9) d['millan_area_km2'] = float((~thick.isnull()).sum() * gdir.grid.dx ** 2 * 1e-6) d['millan_perc_cov'] = float(d['millan_area_km2'] / gridded_area) if 'millan_ice_thickness_err' in ds: err = ds['millan_ice_thickness_err'].where(ds['glacier_mask'], np.nan).load() d['millan_vol_err_km3'] = float(err.sum() * gdir.grid.dx ** 2 * 1e-9) except (FileNotFoundError, AttributeError, KeyError): pass try: with xr.open_dataset(gdir.get_filepath('gridded_data')) as ds: v = ds['millan_v'].where(ds['glacier_mask'], np.nan).load() gridded_area = ds['glacier_mask'].sum() * gdir.grid.dx ** 2 * 1e-6 with warnings.catch_warnings(): # For operational runs we ignore the warnings warnings.filterwarnings('ignore', category=RuntimeWarning) d['millan_avg_vel'] = np.nanmean(v) d['millan_max_vel'] = np.nanmax(v) d['millan_vel_perc_cov'] = float(((~v.isnull()).sum() * gdir.grid.dx ** 2 * 1e-6) / gridded_area) if 'millan_err_vx' in ds: err_vx = ds['millan_err_vx'].where(ds['glacier_mask'], np.nan).load() err_vy = ds['millan_err_vy'].where(ds['glacier_mask'], np.nan).load() err = (err_vx**2 + err_vy**2)**0.5 d['millan_avg_err_vel'] = np.nanmean(err) except (FileNotFoundError, AttributeError, KeyError): pass return d
[docs] @utils.global_task(log) def compile_millan_statistics(gdirs, filesuffix='', path=True): """Gather as much statistics as possible about a list of glaciers. It can be used to do result diagnostics and other stuffs. If the data necessary for a statistic is not available (e.g.: flowlines length) it will simply be ignored. Parameters ---------- gdirs : list of :py:class:`oggm.GlacierDirectory` objects the glacier directories to process filesuffix : str add suffix to output file path : str, bool Set to "True" in order to store the info in the working directory Set to a path to store the file to your chosen location """ from oggm.workflow import execute_entity_task out_df = execute_entity_task(millan_statistics, gdirs) out = pd.DataFrame(out_df).set_index('rgi_id') if path: if path is True: out.to_csv(os.path.join(cfg.PATHS['working_dir'], ('millan_statistics' + filesuffix + '.csv'))) else: out.to_csv(path) return out