import logging
import warnings
# External libs
import numpy as np
import pandas as pd
import xarray as xr
from scipy import stats
# Optional libs
try:
import salem
except ImportError:
pass
# Locals
from oggm import cfg
from oggm import utils
from oggm import entity_task
from oggm.exceptions import MassBalanceCalibrationError, InvalidParamsError
# Module logger
log = logging.getLogger(__name__)
CRU_SERVER = ('https://crudata.uea.ac.uk/cru/data/hrg/cru_ts_4.04/'
'cruts.2004151855.v4.04/')
CRU_BASE = 'cru_ts4.04.1901.2019.{}.dat.nc'
CRU_CL = ('https://cluster.klima.uni-bremen.de/~oggm/climate/cru/'
'cru_cl2.nc.zip')
def set_cru_url(url):
"""If you want to use a different server for CRU (for testing, etc)."""
global CRU_SERVER
CRU_SERVER = url
@utils.locked_func
def get_cru_cl_file():
"""Returns the path to the unpacked CRU CL file."""
return utils.file_extractor(utils.file_downloader(CRU_CL))
[docs]
@utils.locked_func
def get_cru_file(var=None):
"""Returns a path to the desired CRU baseline climate file.
If the file is not present, download it.
Parameters
----------
var : str
'tmp' for temperature
'pre' for precipitation
Returns
-------
str
path to the CRU file
"""
# Be sure input makes sense
if var not in ['tmp', 'pre']:
raise InvalidParamsError('CRU variable {} does not exist!'.format(var))
# Download
cru_filename = CRU_BASE.format(var)
cru_url = CRU_SERVER + '{}/'.format(var) + cru_filename + '.gz'
return utils.file_extractor(utils.file_downloader(cru_url))
[docs]
@entity_task(log, writes=['climate_historical'])
def process_cru_data(gdir, tmp_file=None, pre_file=None, y0=None, y1=None,
output_filesuffix=None):
"""Processes and writes the CRU baseline climate data for this glacier.
Interpolates the CRU TS data to the high-resolution CL2 climatologies
(provided with OGGM) and writes everything to a NetCDF file.
Parameters
----------
gdir : :py:class:`oggm.GlacierDirectory`
the glacier directory to process
tmp_file : str
path to the CRU temperature file (defaults to the current OGGM chosen
CRU version)
pre_file : str
path to the CRU precip file (defaults to the current OGGM chosen
CRU version)
y0 : int
the starting year of the timeseries to write. The default is to take
the entire time period available in the file, but with this kwarg
you can shorten it (to save space or to crop bad data)
y1 : int
the end year of the timeseries to write. The default is to take
the entire time period available in the file, but with this kwarg
you can shorten it (to save space or to crop bad data)
output_filesuffix : str
this add a suffix to the output file (useful to avoid overwriting
previous experiments)
"""
if cfg.PARAMS['baseline_climate'] != 'CRU':
raise InvalidParamsError("cfg.PARAMS['baseline_climate'] should be "
"set to CRU")
# read the climatology
ncclim = salem.GeoNetcdf(get_cru_cl_file())
# and the TS data
if tmp_file is None:
tmp_file = get_cru_file('tmp')
if pre_file is None:
pre_file = get_cru_file('pre')
nc_ts_tmp = salem.GeoNetcdf(tmp_file, monthbegin=True)
nc_ts_pre = salem.GeoNetcdf(pre_file, monthbegin=True)
# set temporal subset for the ts data (hydro years)
yrs = nc_ts_pre.time.year
y0 = yrs[0] if y0 is None else y0
y1 = yrs[-1] if y1 is None else y1
nc_ts_tmp.set_period(t0=f'{y0}-01-01', t1=f'{y1}-12-01')
nc_ts_pre.set_period(t0=f'{y0}-01-01', t1=f'{y1}-12-01')
time = nc_ts_pre.time
ny, r = divmod(len(time), 12)
assert r == 0
lon = gdir.cenlon
lat = gdir.cenlat
# This is guaranteed to work because I prepared the file (I hope)
ncclim.set_subset(corners=((lon, lat), (lon, lat)), margin=1)
# get climatology data
loc_hgt = ncclim.get_vardata('elev')
loc_tmp = ncclim.get_vardata('temp')
loc_pre = ncclim.get_vardata('prcp')
loc_lon = ncclim.get_vardata('lon')
loc_lat = ncclim.get_vardata('lat')
# see if the center is ok
if not np.isfinite(loc_hgt[1, 1]):
# take another candidate where finite
isok = np.isfinite(loc_hgt)
# wait: some areas are entirely nans, make the subset larger
_margin = 1
while not np.any(isok):
_margin += 1
ncclim.set_subset(corners=((lon, lat), (lon, lat)), margin=_margin)
loc_hgt = ncclim.get_vardata('elev')
isok = np.isfinite(loc_hgt)
if _margin > 1:
log.debug('(%s) I had to look up for far climate pixels: %s',
gdir.rgi_id, _margin)
# Take the first candidate (doesn't matter which)
lon, lat = ncclim.grid.ll_coordinates
lon = lon[isok][0]
lat = lat[isok][0]
# Resubset
ncclim.set_subset()
ncclim.set_subset(corners=((lon, lat), (lon, lat)), margin=1)
loc_hgt = ncclim.get_vardata('elev')
loc_tmp = ncclim.get_vardata('temp')
loc_pre = ncclim.get_vardata('prcp')
loc_lon = ncclim.get_vardata('lon')
loc_lat = ncclim.get_vardata('lat')
assert np.isfinite(loc_hgt[1, 1])
isok = np.isfinite(loc_hgt)
hgt_f = loc_hgt[isok].flatten()
assert len(hgt_f) > 0.
# maybe this will throw out of bounds warnings
with warnings.catch_warnings():
warnings.filterwarnings("ignore", category=RuntimeWarning)
nc_ts_tmp.set_subset(corners=((lon, lat), (lon, lat)), margin=1)
nc_ts_pre.set_subset(corners=((lon, lat), (lon, lat)), margin=1)
# compute monthly anomalies
# of temp
ts_tmp = nc_ts_tmp.get_vardata('tmp', as_xarray=True)
ts_tmp_avg = ts_tmp.sel(time=slice('1961-01-01', '1990-12-01'))
ts_tmp_avg = ts_tmp_avg.groupby('time.month').mean(dim='time')
ts_tmp = ts_tmp.groupby('time.month') - ts_tmp_avg
# of precip
ts_pre = nc_ts_pre.get_vardata('pre', as_xarray=True)
ts_pre_avg = ts_pre.sel(time=slice('1961-01-01', '1990-12-01'))
ts_pre_avg = ts_pre_avg.groupby('time.month').mean(dim='time')
ts_pre_ano = ts_pre.groupby('time.month') - ts_pre_avg
# scaled anomalies is the default. Standard anomalies above
# are used later for where ts_pre_avg == 0
ts_pre = ts_pre.groupby('time.month') / ts_pre_avg
# interpolate to HR grid
if np.any(~np.isfinite(ts_tmp[:, 1, 1])):
# Extreme case, middle pix is not valid
# take any valid pix from the 3*3 (and hope there's one)
found_it = False
for idi in range(2):
for idj in range(2):
if np.all(np.isfinite(ts_tmp[:, idj, idi])):
ts_tmp[:, 1, 1] = ts_tmp[:, idj, idi]
ts_pre[:, 1, 1] = ts_pre[:, idj, idi]
ts_pre_ano[:, 1, 1] = ts_pre_ano[:, idj, idi]
found_it = True
if not found_it:
msg = '({}) there is no climate data'.format(gdir.rgi_id)
raise MassBalanceCalibrationError(msg)
elif np.any(~np.isfinite(ts_tmp)):
# maybe the side is nan, but we can do nearest
ts_tmp = ncclim.grid.map_gridded_data(ts_tmp.values, nc_ts_tmp.grid,
interp='nearest')
ts_pre = ncclim.grid.map_gridded_data(ts_pre.values, nc_ts_pre.grid,
interp='nearest')
ts_pre_ano = ncclim.grid.map_gridded_data(ts_pre_ano.values,
nc_ts_pre.grid,
interp='nearest')
else:
# We can do bilinear
ts_tmp = ncclim.grid.map_gridded_data(ts_tmp.values, nc_ts_tmp.grid,
interp='linear')
ts_pre = ncclim.grid.map_gridded_data(ts_pre.values, nc_ts_pre.grid,
interp='linear')
ts_pre_ano = ncclim.grid.map_gridded_data(ts_pre_ano.values,
nc_ts_pre.grid,
interp='linear')
# take the center pixel and add it to the CRU CL clim
# for temp
loc_tmp = xr.DataArray(loc_tmp[:, 1, 1], dims=['month'],
coords={'month': ts_tmp_avg.month})
ts_tmp = xr.DataArray(ts_tmp[:, 1, 1], dims=['time'],
coords={'time': time})
ts_tmp = ts_tmp.groupby('time.month') + loc_tmp
# for prcp
loc_pre = xr.DataArray(loc_pre[:, 1, 1], dims=['month'],
coords={'month': ts_pre_avg.month})
ts_pre = xr.DataArray(ts_pre[:, 1, 1], dims=['time'],
coords={'time': time})
ts_pre_ano = xr.DataArray(ts_pre_ano[:, 1, 1], dims=['time'],
coords={'time': time})
# scaled anomalies
ts_pre = ts_pre.groupby('time.month') * loc_pre
# standard anomalies
ts_pre_ano = ts_pre_ano.groupby('time.month') + loc_pre
# Correct infinite values with standard anomalies
ts_pre.values = np.where(np.isfinite(ts_pre.values),
ts_pre.values,
ts_pre_ano.values)
# The last step might create negative values (unlikely). Clip them
ts_pre.values = utils.clip_min(ts_pre.values, 0)
# done
loc_hgt = loc_hgt[1, 1]
loc_lon = loc_lon[1]
loc_lat = loc_lat[1]
assert np.isfinite(loc_hgt)
assert np.all(np.isfinite(ts_pre.values))
assert np.all(np.isfinite(ts_tmp.values))
gdir.write_monthly_climate_file(time, ts_pre.values, ts_tmp.values,
loc_hgt, loc_lon, loc_lat,
filesuffix=output_filesuffix,
source=nc_ts_tmp._nc.title[:10])
ncclim._nc.close()
nc_ts_tmp._nc.close()
nc_ts_pre._nc.close()
[docs]
@entity_task(log, writes=['climate_historical'])
def process_dummy_cru_file(gdir, sigma_temp=2, sigma_prcp=0.5, seed=None,
y0=None, y1=None, output_filesuffix=None):
"""Create a simple baseline climate file for this glacier - for testing!
This simply reproduces the climatology with a little randomness in it.
TODO: extend the functionality by allowing a monthly varying sigma
Parameters
----------
gdir : GlacierDirectory
the glacier directory
sigma_temp : float
the standard deviation of the random timeseries (set to 0 for constant
ts)
sigma_prcp : float
the standard deviation of the random timeseries (set to 0 for constant
ts)
seed : int
the RandomState seed
y0 : int
the starting year of the timeseries to write. The default is to take
the entire time period available in the file, but with this kwarg
you can shorten it (to save space or to crop bad data)
y1 : int
the starting year of the timeseries to write. The default is to take
the entire time period available in the file, but with this kwarg
you can shorten it (to save space or to crop bad data)
output_filesuffix : str
this add a suffix to the output file (useful to avoid overwriting
previous experiments)
"""
# read the climatology
clfile = get_cru_cl_file()
ncclim = salem.GeoNetcdf(clfile)
y0 = 1901 if y0 is None else y0
y1 = 2021 if y1 is None else y1
time = pd.date_range(start=f'{y0}-01-01', end=f'{y1}-12-01', freq='MS')
ny, r = divmod(len(time), 12)
assert r == 0
lon = gdir.cenlon
lat = gdir.cenlat
# This is guaranteed to work because I prepared the file (I hope)
ncclim.set_subset(corners=((lon, lat), (lon, lat)), margin=1)
# get climatology data
loc_hgt = ncclim.get_vardata('elev')
loc_tmp = ncclim.get_vardata('temp')
loc_pre = ncclim.get_vardata('prcp')
loc_lon = ncclim.get_vardata('lon')
loc_lat = ncclim.get_vardata('lat')
# see if the center is ok
if not np.isfinite(loc_hgt[1, 1]):
# take another candidate where finite
isok = np.isfinite(loc_hgt)
# wait: some areas are entirely nans, make the subset larger
_margin = 1
while not np.any(isok):
_margin += 1
ncclim.set_subset(corners=((lon, lat), (lon, lat)), margin=_margin)
loc_hgt = ncclim.get_vardata('elev')
isok = np.isfinite(loc_hgt)
if _margin > 1:
log.debug('(%s) I had to look up for far climate pixels: %s',
gdir.rgi_id, _margin)
# Take the first candidate (doesn't matter which)
lon, lat = ncclim.grid.ll_coordinates
lon = lon[isok][0]
lat = lat[isok][0]
# Resubset
ncclim.set_subset()
ncclim.set_subset(corners=((lon, lat), (lon, lat)), margin=1)
loc_hgt = ncclim.get_vardata('elev')
loc_tmp = ncclim.get_vardata('temp')
loc_pre = ncclim.get_vardata('prcp')
loc_lon = ncclim.get_vardata('lon')
loc_lat = ncclim.get_vardata('lat')
assert np.isfinite(loc_hgt[1, 1])
isok = np.isfinite(loc_hgt)
hgt_f = loc_hgt[isok].flatten()
assert len(hgt_f) > 0.
# Make DataArrays
rng = np.random.RandomState(seed)
loc_tmp = xr.DataArray(loc_tmp[:, 1, 1], dims=['month'],
coords={'month': np.arange(1, 13)})
ts_tmp = rng.randn(len(time)) * sigma_temp
ts_tmp = xr.DataArray(ts_tmp, dims=['time'],
coords={'time': time})
loc_pre = xr.DataArray(loc_pre[:, 1, 1], dims=['month'],
coords={'month': np.arange(1, 13)})
ts_pre = utils.clip_min(rng.randn(len(time)) * sigma_prcp + 1, 0)
ts_pre = xr.DataArray(ts_pre, dims=['time'],
coords={'time': time})
# Create the time series
ts_tmp = ts_tmp.groupby('time.month') + loc_tmp
ts_pre = ts_pre.groupby('time.month') * loc_pre
# done
loc_hgt = loc_hgt[1, 1]
loc_lon = loc_lon[1]
loc_lat = loc_lat[1]
assert np.isfinite(loc_hgt)
gdir.write_monthly_climate_file(time, ts_pre.values, ts_tmp.values,
loc_hgt, loc_lon, loc_lat,
filesuffix=output_filesuffix,
source='CRU CL2 and some randomness')
ncclim._nc.close()