"""Climate data and mass balance computations"""
# Built ins
import logging
import os
import datetime
import json
import warnings
# External libs
import numpy as np
import xarray as xr
import netCDF4
import pandas as pd
from scipy import stats
from scipy import optimize
# Optional libs
try:
import salem
except ImportError:
pass
# Locals
from oggm import cfg
from oggm import utils
from oggm.core import centerlines
from oggm import entity_task, global_task
from oggm.exceptions import (MassBalanceCalibrationError, InvalidParamsError,
InvalidWorkflowError)
# Module logger
log = logging.getLogger(__name__)
[docs]@entity_task(log, writes=['climate_historical'])
def process_custom_climate_data(gdir, y0=None, y1=None, output_filesuffix=None):
"""Processes and writes the climate data from a user-defined climate file.
The input file must have a specific format
(see https://github.com/OGGM/oggm-sample-data
->test-files/histalp_merged_hef.nc for an example).
This is the way OGGM used to do it for HISTALP before it got added to the shop.
Parameters
----------
gdir : :py:class:`oggm.GlacierDirectory`
the glacier directory to process
y0 : int
the starting year of the timeseries to write. The default is to take
the entire time period available in the file, but with this kwarg
you can shorten it (to save space or to crop bad data)
y1 : int
the ending year of the timeseries to write. The default is to take
the entire time period available in the file, but with this kwarg
you can shorten it (to save space or to crop bad data). This year
will be included, i.e. 2019 means the data will end at 2019-12-01
output_filesuffix : str
this adds a suffix to the output file (useful to avoid overwriting
previous experiments)
"""
if not (('climate_file' in cfg.PATHS) and
os.path.exists(cfg.PATHS['climate_file'])):
raise InvalidParamsError('Custom climate file not found')
if cfg.PARAMS['baseline_climate'] not in ['', 'CUSTOM']:
raise InvalidParamsError("When using custom climate data please set "
"PARAMS['baseline_climate'] to an empty "
"string or `CUSTOM`. Note also that you can "
"now use the `process_histalp_data` task for "
"automated HISTALP data processing.")
# read the file
fpath = cfg.PATHS['climate_file']
nc_ts = salem.GeoNetcdf(fpath)
# Avoid reading all data
with warnings.catch_warnings():
warnings.filterwarnings("ignore", category=RuntimeWarning)
nc_ts.set_subset(((gdir.cenlon, gdir.cenlat),
(gdir.cenlon, gdir.cenlat)),
margin=2) # 2 is to be sure - also on small files
yrs = nc_ts.time.year
mths = nc_ts.time.month
y0 = yrs[0] if y0 is None else y0
if mths[0] != 1:
# The file starts at the wrong month
y0 += 1
y1 = yrs[-1] if y1 is None else y1
if mths[-1] != 12:
# Same: wrong month
y1 -= 1
nc_ts.set_period(t0=f'{y0}-01-01', t1=f'{y1}-12-01')
time = nc_ts.time
ny, r = divmod(len(time), 12)
if r != 0:
raise InvalidParamsError('Climate data should be full years')
# Units
assert nc_ts._nc.variables['hgt'].units.lower() in ['m', 'meters', 'meter',
'metres', 'metre']
assert nc_ts._nc.variables['temp'].units.lower() in ['degc', 'degrees',
'degree', 'c']
assert nc_ts._nc.variables['prcp'].units.lower() in ['kg m-2', 'l m-2',
'mm', 'millimeters',
'millimeter']
# geoloc
lon = nc_ts.get_vardata('lon')
lat = nc_ts.get_vardata('lat')
ilon = np.argmin(np.abs(lon - gdir.cenlon))
ilat = np.argmin(np.abs(lat - gdir.cenlat))
ref_pix_lon = lon[ilon]
ref_pix_lat = lat[ilat]
# read the data
temp = nc_ts.get_vardata('temp')
prcp = nc_ts.get_vardata('prcp')
hgt = nc_ts.get_vardata('hgt')
ttemp = temp[:, ilat-1:ilat+2, ilon-1:ilon+2]
itemp = ttemp[:, 1, 1]
thgt = hgt[ilat-1:ilat+2, ilon-1:ilon+2]
ihgt = thgt[1, 1]
thgt = thgt.flatten()
iprcp = prcp[:, ilat, ilon]
nc_ts.close()
gdir.write_monthly_climate_file(time, iprcp, itemp, ihgt,
ref_pix_lon, ref_pix_lat,
filesuffix=output_filesuffix,
source=fpath)
[docs]@entity_task(log)
def process_climate_data(gdir, y0=None, y1=None, output_filesuffix=None,
**kwargs):
"""Adds the selected climate data to this glacier directory.
Short wrapper deciding on which task to run based on
`cfg.PARAMS['baseline_climate']`.
If you want to make it explicit, simply call the relevant task
(e.g. oggm.shop.cru.process_cru_data).
Parameters
----------
gdir : :py:class:`oggm.GlacierDirectory`
the glacier directory to process
y0 : int
the starting year of the timeseries to write. The default is to take
the entire time period available in the file, but with this kwarg
you can shorten it (to save space or to crop bad data)
y1 : int
the starting year of the timeseries to write. The default is to take
the entire time period available in the file, but with this kwarg
you can shorten it (to save space or to crop bad data)
output_filesuffix : str
this add a suffix to the output file (useful to avoid overwriting
previous experiments)
**kwargs :
any other argument relevant to the task that will be called.
"""
# Which climate should we use?
baseline = cfg.PARAMS['baseline_climate']
if baseline == 'CRU':
from oggm.shop.cru import process_cru_data
process_cru_data(gdir, output_filesuffix=output_filesuffix,
y0=y0, y1=y1, **kwargs)
elif baseline == 'HISTALP':
from oggm.shop.histalp import process_histalp_data
process_histalp_data(gdir, output_filesuffix=output_filesuffix,
y0=y0, y1=y1, **kwargs)
elif baseline == 'W5E5':
from oggm.shop.w5e5 import process_w5e5_data
process_w5e5_data(gdir, output_filesuffix=output_filesuffix,
y0=y0, y1=y1, **kwargs)
elif baseline == 'GSWP3_W5E5':
from oggm.shop.w5e5 import process_gswp3_w5e5_data
process_gswp3_w5e5_data(gdir, output_filesuffix=output_filesuffix,
y0=y0, y1=y1, **kwargs)
elif baseline in ['ERA5', 'ERA5L', 'CERA', 'ERA5dr', 'ERA5L-HMA']:
from oggm.shop.ecmwf import process_ecmwf_data
process_ecmwf_data(gdir, output_filesuffix=output_filesuffix,
dataset=baseline, y0=y0, y1=y1, **kwargs)
elif '+' in baseline:
# This bit below assumes ECMWF only datasets, but it should be
# quite easy to extend for HISTALP+ERA5L for example
from oggm.shop.ecmwf import process_ecmwf_data
his, ref = baseline.split('+')
s = 'tmp_'
process_ecmwf_data(gdir, output_filesuffix=s+his, dataset=his,
y0=y0, y1=y1, **kwargs)
process_ecmwf_data(gdir, output_filesuffix=s+ref, dataset=ref,
y0=y0, y1=y1, **kwargs)
historical_delta_method(gdir,
ref_filesuffix=s+ref,
hist_filesuffix=s+his,
output_filesuffix=output_filesuffix)
elif '|' in baseline:
from oggm.shop.ecmwf import process_ecmwf_data
his, ref = baseline.split('|')
s = 'tmp_'
process_ecmwf_data(gdir, output_filesuffix=s+his, dataset=his,
y0=y0, y1=y1, **kwargs)
process_ecmwf_data(gdir, output_filesuffix=s+ref, dataset=ref,
y0=y0, y1=y1, **kwargs)
historical_delta_method(gdir,
ref_filesuffix=s+ref,
hist_filesuffix=s+his,
output_filesuffix=output_filesuffix,
replace_with_ref_data=False)
elif baseline == 'CUSTOM':
process_custom_climate_data(gdir, y0=y0, y1=y1,
output_filesuffix=output_filesuffix,
**kwargs)
else:
raise ValueError("cfg.PARAMS['baseline_climate'] not understood")
[docs]@entity_task(log, writes=['climate_historical'])
def historical_delta_method(gdir, ref_filesuffix='', hist_filesuffix='',
output_filesuffix='', ref_year_range=None,
delete_input_files=True, scale_stddev=True,
replace_with_ref_data=True):
"""Applies the anomaly method to historical climate data.
This function can be used to prolongate historical time series,
for example by bias-correcting CERA-20C to ERA5 or ERA5-Land.
The timeseries must be already available in the glacier directory
Parameters
----------
gdir : :py:class:`oggm.GlacierDirectory`
where to write the data
ref_filesuffix : str
the filesuffix of the historical climate data to take as reference
hist_filesuffix : str
the filesuffix of the historical climate data to apply to the
reference
output_filesuffix : str
the filesuffix of the output file (usually left empty - i.e. this
file will become the default)
ref_year_range : tuple of str
the year range for which you want to compute the anomalies. The
default is to take the entire reference data period, but you could
also choose `('1961', '1990')` for example
delete_input_files : bool
delete the input files after use - useful for operational runs
where you don't want to carry too many files
scale_stddev : bool
whether to scale the temperature standard deviation as well
(you probably want to do that)
replace_with_ref_data : bool
the default is to paste the bias-corrected data where no reference
data is available, i.e. creating timeseries which are not consistent
in time but "better" for recent times (e.g. CERA-20C until 1980,
then ERA5). Set this to False to prevent this and make a consistent
time series of CERA-20C (but bias corrected to the reference data,
so "better" than CERA-20C out of the box).
"""
if ref_year_range is not None:
raise NotImplementedError()
# Read input
f_ref = gdir.get_filepath('climate_historical', filesuffix=ref_filesuffix)
with xr.open_dataset(f_ref) as ds:
ref_temp = ds['temp']
ref_prcp = ds['prcp']
ref_hgt = float(ds.ref_hgt)
ref_lon = float(ds.ref_pix_lon)
ref_lat = float(ds.ref_pix_lat)
source = ds.attrs.get('climate_source')
f_his = gdir.get_filepath('climate_historical', filesuffix=hist_filesuffix)
with xr.open_dataset(f_his) as ds:
hist_temp = ds['temp']
hist_prcp = ds['prcp']
# To differentiate both cases
if replace_with_ref_data:
source = ds.attrs.get('climate_source') + '+' + source
else:
source = ds.attrs.get('climate_source') + '|' + source
# Common time period
cmn_time = (ref_temp + hist_temp)['time']
assert len(cmn_time) // 12 == len(cmn_time) / 12
# We need an even number of years for this to work
if ((len(cmn_time) // 12) % 2) == 1:
cmn_time = cmn_time.isel(time=slice(12, len(cmn_time)))
assert len(cmn_time) // 12 == len(cmn_time) / 12
assert ((len(cmn_time) // 12) % 2) == 0
cmn_time_range = cmn_time.values[[0, -1]]
# Select ref
sref_temp = ref_temp.sel(time=slice(*cmn_time_range))
sref_prcp = ref_prcp.sel(time=slice(*cmn_time_range))
# See if we need to scale the variability
if scale_stddev:
# This is a bit more arithmetic
tmp_sel = hist_temp.sel(time=slice(*cmn_time_range))
tmp_std = tmp_sel.groupby('time.month').std(dim='time')
std_fac = sref_temp.groupby('time.month').std(dim='time') / tmp_std
std_fac = np.tile(std_fac.data, len(hist_temp) // 12)
win_size = len(cmn_time) + 1
def roll_func(x, axis=None):
x = x[:, ::12]
n = len(x[0, :]) // 2
xm = np.nanmean(x, axis=axis)
return xm + (x[:, n] - xm) * std_fac
hist_temp = hist_temp.rolling(time=win_size, center=True,
min_periods=1).reduce(roll_func)
# compute monthly anomalies
# of temp
ts_tmp_sel = hist_temp.sel(time=slice(*cmn_time_range))
ts_tmp_avg = ts_tmp_sel.groupby('time.month').mean(dim='time')
ts_tmp = hist_temp.groupby('time.month') - ts_tmp_avg
# of precip -- scaled anomalies
ts_pre_avg = hist_prcp.sel(time=slice(*cmn_time_range))
ts_pre_avg = ts_pre_avg.groupby('time.month').mean(dim='time')
ts_pre_ano = hist_prcp.groupby('time.month') - ts_pre_avg
# scaled anomalies is the default. Standard anomalies above
# are used later for where ts_pre_avg == 0
ts_pre = hist_prcp.groupby('time.month') / ts_pre_avg
# reference averages
# for temp
loc_tmp = sref_temp.groupby('time.month').mean()
ts_tmp = ts_tmp.groupby('time.month') + loc_tmp
# for prcp
loc_pre = sref_prcp.groupby('time.month').mean()
# scaled anomalies
ts_pre = ts_pre.groupby('time.month') * loc_pre
# standard anomalies
ts_pre_ano = ts_pre_ano.groupby('time.month') + loc_pre
# Correct infinite values with standard anomalies
ts_pre.values = np.where(np.isfinite(ts_pre.values),
ts_pre.values,
ts_pre_ano.values)
# The previous step might create negative values (unlikely). Clip them
ts_pre.values = utils.clip_min(ts_pre.values, 0)
assert np.all(np.isfinite(ts_pre.values))
assert np.all(np.isfinite(ts_tmp.values))
if not replace_with_ref_data:
# Just write what we have
gdir.write_monthly_climate_file(ts_tmp.time.values,
ts_pre.values, ts_tmp.values,
ref_hgt, ref_lon, ref_lat,
filesuffix=output_filesuffix,
source=source)
else:
# Select all hist data before the ref
ts_tmp = ts_tmp.sel(time=slice(ts_tmp.time[0], ref_temp.time[0]))
ts_tmp = ts_tmp.isel(time=slice(0, -1))
ts_pre = ts_pre.sel(time=slice(ts_tmp.time[0], ref_temp.time[0]))
ts_pre = ts_pre.isel(time=slice(0, -1))
# Concatenate and write
gdir.write_monthly_climate_file(np.append(ts_pre.time, ref_prcp.time),
np.append(ts_pre, ref_prcp),
np.append(ts_tmp, ref_temp),
ref_hgt, ref_lon, ref_lat,
filesuffix=output_filesuffix,
source=source)
if delete_input_files:
# Delete all files without suffix
if ref_filesuffix:
os.remove(f_ref)
if hist_filesuffix:
os.remove(f_his)