Source code for oggm.core.climate

"""Climate data and mass-balance computations"""
# Built ins
import logging
import os
import datetime
import warnings
# External libs
import numpy as np
import pandas as pd
import xarray as xr
from scipy import stats
import netCDF4
import salem
from scipy import optimize as optimization
# Locals
from oggm import cfg
from oggm import utils
from oggm.core import centerlines
from oggm import entity_task, global_task

# Module logger
log = logging.getLogger(__name__)


@global_task
def process_histalp_nonparallel(gdirs, fpath=None):
    """This is the way OGGM used to do it (deprecated).

    It requires an input file with a specific format, and uses lazy
    optimisation (computing time dependant gradients can be slow)
    """

    # Did the user specify a specific climate data file?
    if fpath is None:
        if 'climate_file' in cfg.PATHS:
            fpath = cfg.PATHS['climate_file']

    if not os.path.exists(fpath):
        raise IOError('Custom climate file not found')

    log.info('process_histalp_nonparallel')

    # read the file and data entirely (faster than many I/O)
    with netCDF4.Dataset(fpath, mode='r') as nc:
        lon = nc.variables['lon'][:]
        lat = nc.variables['lat'][:]

        # Time
        time = nc.variables['time']
        time = netCDF4.num2date(time[:], time.units)
        ny, r = divmod(len(time), 12)
        if r != 0:
            raise ValueError('Climate data should be N full years exclusively')
        y0, y1 = time[0].year, time[-1].year

        # Units
        assert nc.variables['hgt'].units == 'm'
        assert nc.variables['temp'].units == 'degC'
        assert nc.variables['prcp'].units == 'kg m-2'

    # Gradient defaults
    use_grad = cfg.PARAMS['temp_use_local_gradient']
    def_grad = cfg.PARAMS['temp_default_gradient']
    g_minmax = cfg.PARAMS['temp_local_gradient_bounds']

    for gdir in gdirs:
        ilon = np.argmin(np.abs(lon - gdir.cenlon))
        ilat = np.argmin(np.abs(lat - gdir.cenlat))
        ref_pix_lon = lon[ilon]
        ref_pix_lat = lat[ilat]
        iprcp, itemp, igrad, ihgt = utils.joblib_read_climate(fpath, ilon,
                                                              ilat, def_grad,
                                                              g_minmax,
                                                              use_grad)
        gdir.write_monthly_climate_file(time, iprcp, itemp, igrad, ihgt,
                                        ref_pix_lon, ref_pix_lat)
        # metadata
        out = {'climate_source': fpath,
               'hydro_yr_0': y0+1, 'hydro_yr_1': y1}
        gdir.write_pickle(out, 'climate_info')


[docs]@entity_task(log, writes=['climate_monthly'])
def process_custom_climate_data(gdir):
    """Processes and writes the climate data from a user-defined climate file.

    The input file must have a specific format (see
    oggm-sample-data/test-files/histalp_merged_hef.nc for an example).

    Uses caching for faster retrieval.

    This is the way OGGM does it for the Alps (HISTALP).
    """

    if not (('climate_file' in cfg.PATHS) and
            os.path.exists(cfg.PATHS['climate_file'])):
        raise IOError('Custom climate file not found')

    # read the file
    fpath = cfg.PATHS['climate_file']
    nc_ts = salem.GeoNetcdf(fpath)

    # set temporal subset for the ts data (hydro years)
    sm = cfg.PARAMS['hydro_month_' + gdir.hemisphere]
    em = sm - 1 if (sm > 1) else 12
    yrs = nc_ts.time.year
    y0, y1 = yrs[0], yrs[-1]
    nc_ts.set_period(t0='{}-{:02d}-01'.format(y0, sm),
                     t1='{}-{:02d}-01'.format(y1, em))
    time = nc_ts.time
    ny, r = divmod(len(time), 12)
    if r != 0:
        raise ValueError('Climate data should be N full years exclusively')

    # Units
    assert nc_ts._nc.variables['hgt'].units.lower() in ['m', 'meters', 'meter',
                                                        'metres', 'metre']
    assert nc_ts._nc.variables['temp'].units.lower() in ['degc', 'degrees',
                                                         'degree', 'c']
    assert nc_ts._nc.variables['prcp'].units.lower() in ['kg m-2', 'l m-2',
                                                         'mm', 'millimeters',
                                                         'millimeter']

    # geoloc
    lon = nc_ts._nc.variables['lon'][:]
    lat = nc_ts._nc.variables['lat'][:]

    # Gradient defaults
    use_grad = cfg.PARAMS['temp_use_local_gradient']
    def_grad = cfg.PARAMS['temp_default_gradient']
    g_minmax = cfg.PARAMS['temp_local_gradient_bounds']

    ilon = np.argmin(np.abs(lon - gdir.cenlon))
    ilat = np.argmin(np.abs(lat - gdir.cenlat))
    ref_pix_lon = lon[ilon]
    ref_pix_lat = lat[ilat]

    # read the data
    temp = nc_ts.get_vardata('temp')
    prcp = nc_ts.get_vardata('prcp')
    hgt = nc_ts.get_vardata('hgt')
    igrad = np.zeros(len(time)) + def_grad
    ttemp = temp[:, ilat-1:ilat+2, ilon-1:ilon+2]
    itemp = ttemp[:, 1, 1]
    thgt = hgt[ilat-1:ilat+2, ilon-1:ilon+2]
    ihgt = thgt[1, 1]
    thgt = thgt.flatten()
    iprcp = prcp[:, ilat, ilon]
    nc_ts.close()

    # Now the gradient
    if use_grad:
        for t, loct in enumerate(ttemp):
            slope, _, _, p_val, _ = stats.linregress(thgt,
                                                     loct.flatten())
            igrad[t] = slope if (p_val < 0.01) else def_grad
        # dont exagerate too much
        igrad = np.clip(igrad, g_minmax[0], g_minmax[1])

    gdir.write_monthly_climate_file(time, iprcp, itemp, igrad, ihgt,
                                    ref_pix_lon, ref_pix_lat)
    # metadata
    out = {'climate_source': fpath, 'hydro_yr_0': y0+1, 'hydro_yr_1': y1}
    gdir.write_pickle(out, 'climate_info')


[docs]@entity_task(log, writes=['cesm_data'])
def process_cesm_data(gdir, filesuffix='', fpath_temp=None, fpath_precc=None,
                      fpath_precl=None):
    """Processes and writes the climate data for this glacier.

    This function is made for interpolating the Community
    Earth System Model Last Millenial Ensemble (CESM-LME) climate simulations,
    from Otto-Bliesner et al. (2016), to the high-resolution CL2 climatologies
    (provided with OGGM) and writes everything to a NetCDF file.

    Parameters
    ----------
    filesuffix : str
        append a suffix to the filename (useful for ensemble experiments).
    fpath_temp : str
        path to the temp file (default: cfg.PATHS['gcm_temp_file'])
    fpath_precc : str
        path to the precc file (default: cfg.PATHS['gcm_precc_file'])
    fpath_precl : str
        path to the precl file (default: cfg.PATHS['gcm_precl_file'])
    """

    # GCM temperature and precipitation data
    if fpath_temp is None:
        if not ('gcm_temp_file' in cfg.PATHS):
            raise ValueError("Need to set cfg.PATHS['gcm_temp_file']")
        fpath_temp = cfg.PATHS['gcm_temp_file']
    if fpath_precc is None:
        if not ('gcm_precc_file' in cfg.PATHS):
            raise ValueError("Need to set cfg.PATHS['gcm_precc_file']")
        fpath_precc = cfg.PATHS['gcm_precc_file']
    if fpath_precl is None:
        if not ('gcm_precl_file' in cfg.PATHS):
            raise ValueError("Need to set cfg.PATHS['gcm_precl_file']")
        fpath_precl = cfg.PATHS['gcm_precl_file']

    # read the files
    with warnings.catch_warnings():
        # Long time series are currently a pain pandas
        warnings.filterwarnings("ignore", message='Unable to decode time axis')
        tempds = xr.open_dataset(fpath_temp)
    precpcds = xr.open_dataset(fpath_precc, decode_times=False)
    preclpds = xr.open_dataset(fpath_precl, decode_times=False)

    # select for location
    lon = gdir.cenlon
    lat = gdir.cenlat

    # CESM files are in 0-360
    if lon <= 0:
        lon += 360

    # take the closest
    # TODO: consider GCM interpolation?
    temp = tempds.TREFHT.sel(lat=lat, lon=lon, method='nearest')
    precp = precpcds.PRECC.sel(lat=lat, lon=lon, method='nearest') + \
            preclpds.PRECL.sel(lat=lat, lon=lon, method='nearest')

    # from normal years to hydrological years
    sm = cfg.PARAMS['hydro_month_' + gdir.hemisphere]
    em = sm - 1 if (sm > 1) else 12
    # TODO: we don't check if the files actually start in January but we should
    precp = precp[sm-1:sm-13].load()
    temp = temp[sm-1:sm-13].load()
    y0 = int(temp.time.values[0].strftime('%Y'))
    y1 = int(temp.time.values[-1].strftime('%Y'))
    time = pd.period_range('{}-{:02d}'.format(y0, sm),
                           '{}-{:02d}'.format(y1, em), freq='M')
    temp['time'] = time
    precp['time'] = time
    # Workaround for https://github.com/pydata/xarray/issues/1565
    temp['month'] = ('time', time.month)
    precp['month'] = ('time', time.month)
    temp['year'] = ('time', time.year)
    precp['year'] = ('time', time.year)
    ny, r = divmod(len(time), 12)
    assert r == 0

    # Convert m s-1 to mm mth-1
    ndays = np.tile(np.roll(cfg.DAYS_IN_MONTH, 13-sm), y1 - y0)
    precp = precp * ndays * (60 * 60 * 24 * 1000)

    # compute monthly anomalies
    # of temp
    ts_tmp_avg = temp.sel(time=(temp.year >= 1961) & (temp.year <= 1990))
    ts_tmp_avg = ts_tmp_avg.groupby(ts_tmp_avg.month).mean(dim='time')
    ts_tmp = temp.groupby(temp.month) - ts_tmp_avg
    # of precip
    ts_pre_avg = precp.isel(time=(precp.year >= 1961) & (precp.year <= 1990))
    ts_pre_avg = ts_pre_avg.groupby(ts_pre_avg.month).mean(dim='time')
    ts_pre = precp.groupby(precp.month) - ts_pre_avg

    # Get CRU to apply the anomaly to
    fpath = gdir.get_filepath('climate_monthly')
    ds_cru = xr.open_dataset(fpath)

    # Here we assume the gradient is a monthly average
    ts_grad = np.tile(ds_cru.grad[0:12], ny)

    # Add climate anomaly to CRU clim
    dscru = ds_cru.sel(time=slice('1961', '1990'))
    # for temp
    loc_tmp = dscru.temp.groupby('time.month').mean()
    ts_tmp = ts_tmp.groupby(ts_tmp.month) + loc_tmp
    # for prcp
    loc_pre = dscru.prcp.groupby('time.month').mean()
    ts_pre = ts_pre.groupby(ts_pre.month) + loc_pre

    # load dates in right format to save
    dsindex = salem.GeoNetcdf(fpath_temp, monthbegin=True)
    time1 = dsindex.variables['time']
    time2 = time1[sm-1:sm-13] - ndays  # to hydrological years
    time2 = netCDF4.num2date(time2, time1.units, calendar='noleap')

    assert np.all(np.isfinite(ts_pre.values))
    assert np.all(np.isfinite(ts_tmp.values))
    assert np.all(np.isfinite(ts_grad))

    # back to -180 - 180
    loc_lon = precp.lon if precp.lon <= 180 else precp.lon - 360

    gdir.write_monthly_climate_file(time2, ts_pre.values, ts_tmp.values,
                                    ts_grad, float(dscru.ref_hgt),
                                    loc_lon, precp.lat.values,
                                    time_unit=time1.units,
                                    file_name='cesm_data',
                                    filesuffix=filesuffix)

    dsindex._nc.close()
    tempds.close()
    precpcds.close()
    preclpds.close()
    ds_cru.close()


[docs]@entity_task(log, writes=['climate_monthly'])
def process_cru_data(gdir):
    """Processes and writes the climate data for this glacier.

    Interpolates the CRU TS data to the high-resolution CL2 climatologies
    (provided with OGGM) and writes everything to a NetCDF file.
    """

    # read the climatology
    clfile = utils.get_cru_cl_file()
    ncclim = salem.GeoNetcdf(clfile)
    # and the TS data
    nc_ts_tmp = salem.GeoNetcdf(utils.get_cru_file('tmp'), monthbegin=True)
    nc_ts_pre = salem.GeoNetcdf(utils.get_cru_file('pre'), monthbegin=True)

    # set temporal subset for the ts data (hydro years)
    sm = cfg.PARAMS['hydro_month_' + gdir.hemisphere]
    em = sm - 1 if (sm > 1) else 12
    yrs = nc_ts_pre.time.year
    y0, y1 = yrs[0], yrs[-1]
    nc_ts_tmp.set_period(t0='{}-{:02d}-01'.format(y0, sm),
                         t1='{}-{:02d}-01'.format(y1, em))
    nc_ts_pre.set_period(t0='{}-{:02d}-01'.format(y0, sm),
                         t1='{}-{:02d}-01'.format(y1, em))
    time = nc_ts_pre.time
    ny, r = divmod(len(time), 12)
    assert r == 0

    # gradient default params
    use_grad = cfg.PARAMS['temp_use_local_gradient']
    def_grad = cfg.PARAMS['temp_default_gradient']
    g_minmax = cfg.PARAMS['temp_local_gradient_bounds']

    lon = gdir.cenlon
    lat = gdir.cenlat

    # This is guaranteed to work because I prepared the file (I hope)
    ncclim.set_subset(corners=((lon, lat), (lon, lat)), margin=1)

    # get climatology data
    loc_hgt = ncclim.get_vardata('elev')
    loc_tmp = ncclim.get_vardata('temp')
    loc_pre = ncclim.get_vardata('prcp')
    loc_lon = ncclim.get_vardata('lon')
    loc_lat = ncclim.get_vardata('lat')

    # see if the center is ok
    if not np.isfinite(loc_hgt[1, 1]):
        # take another candidate where finite
        isok = np.isfinite(loc_hgt)

        # wait: some areas are entirely NaNs, make the subset larger
        _margin = 1
        while not np.any(isok):
            _margin += 1
            ncclim.set_subset(corners=((lon, lat), (lon, lat)), margin=_margin)
            loc_hgt = ncclim.get_vardata('elev')
            isok = np.isfinite(loc_hgt)
        if _margin > 1:
            log.debug('(%s) I had to look up for far climate pixels: %s',
                      gdir.rgi_id, _margin)

        # Take the first candidate (doesn't matter which)
        lon, lat = ncclim.grid.ll_coordinates
        lon = lon[isok][0]
        lat = lat[isok][0]
        # Resubset
        ncclim.set_subset()
        ncclim.set_subset(corners=((lon, lat), (lon, lat)), margin=1)
        loc_hgt = ncclim.get_vardata('elev')
        loc_tmp = ncclim.get_vardata('temp')
        loc_pre = ncclim.get_vardata('prcp')
        loc_lon = ncclim.get_vardata('lon')
        loc_lat = ncclim.get_vardata('lat')

    assert np.isfinite(loc_hgt[1, 1])
    isok = np.isfinite(loc_hgt)
    hgt_f = loc_hgt[isok].flatten()
    assert len(hgt_f) > 0.
    ts_grad = np.zeros(12) + def_grad
    if use_grad and len(hgt_f) >= 5:
        for i in range(12):
            loc_tmp_mth = loc_tmp[i, ...][isok].flatten()
            slope, _, _, p_val, _ = stats.linregress(hgt_f, loc_tmp_mth)
            ts_grad[i] = slope if (p_val < 0.01) else def_grad
    # ... but dont exaggerate too much
    ts_grad = np.clip(ts_grad, g_minmax[0], g_minmax[1])
    # convert to timeserie and hydroyears
    ts_grad = ts_grad.tolist()
    ts_grad = ts_grad[9:] + ts_grad[0:9]
    ts_grad = np.asarray(ts_grad * ny)

    # maybe this will throw out of bounds warnings
    nc_ts_tmp.set_subset(corners=((lon, lat), (lon, lat)), margin=1)
    nc_ts_pre.set_subset(corners=((lon, lat), (lon, lat)), margin=1)

    # compute monthly anomalies
    # of temp
    ts_tmp = nc_ts_tmp.get_vardata('tmp', as_xarray=True)
    ts_tmp_avg = ts_tmp.sel(time=slice('1961-01-01', '1990-12-01'))
    ts_tmp_avg = ts_tmp_avg.groupby('time.month').mean(dim='time')
    ts_tmp = ts_tmp.groupby('time.month') - ts_tmp_avg
    # of precip
    ts_pre = nc_ts_pre.get_vardata('pre', as_xarray=True)
    ts_pre_avg = ts_pre.sel(time=slice('1961-01-01', '1990-12-01'))
    ts_pre_avg = ts_pre_avg.groupby('time.month').mean(dim='time')
    ts_pre = ts_pre.groupby('time.month') - ts_pre_avg

    # interpolate to HR grid
    if np.any(~np.isfinite(ts_tmp[:, 1, 1])):
        # Extreme case, middle pix is not valid
        # take any valid pix from the 3*3 (and hope there's one)
        found_it = False
        for idi in range(2):
            for idj in range(2):
                if np.all(np.isfinite(ts_tmp[:, idj, idi])):
                    ts_tmp[:, 1, 1] = ts_tmp[:, idj, idi]
                    ts_pre[:, 1, 1] = ts_pre[:, idj, idi]
                    found_it = True
        if not found_it:
            msg = '({}) there is no climate data'.format(gdir.rgi_id)
            raise RuntimeError(msg)
    elif np.any(~np.isfinite(ts_tmp)):
        # maybe the side is nan, but we can do nearest
        ts_tmp = ncclim.grid.map_gridded_data(ts_tmp.values, nc_ts_tmp.grid,
                                              interp='nearest')
        ts_pre = ncclim.grid.map_gridded_data(ts_pre.values, nc_ts_pre.grid,
                                              interp='nearest')
    else:
        # We can do bilinear
        ts_tmp = ncclim.grid.map_gridded_data(ts_tmp.values, nc_ts_tmp.grid,
                                              interp='linear')
        ts_pre = ncclim.grid.map_gridded_data(ts_pre.values, nc_ts_pre.grid,
                                              interp='linear')

    # take the center pixel and add it to the CRU CL clim
    # for temp
    loc_tmp = xr.DataArray(loc_tmp[:, 1, 1], dims=['month'],
                           coords={'month': ts_tmp_avg.month})
    ts_tmp = xr.DataArray(ts_tmp[:, 1, 1], dims=['time'],
                          coords={'time': time})
    ts_tmp = ts_tmp.groupby('time.month') + loc_tmp
    # for prcp
    loc_pre = xr.DataArray(loc_pre[:, 1, 1], dims=['month'],
                           coords={'month': ts_pre_avg.month})
    ts_pre = xr.DataArray(ts_pre[:, 1, 1], dims=['time'],
                          coords={'time': time})
    ts_pre = ts_pre.groupby('time.month') + loc_pre

    # done
    loc_hgt = loc_hgt[1, 1]
    loc_lon = loc_lon[1]
    loc_lat = loc_lat[1]
    assert np.isfinite(loc_hgt)
    assert np.all(np.isfinite(ts_pre.values))
    assert np.all(np.isfinite(ts_tmp.values))
    assert np.all(np.isfinite(ts_grad))
    gdir.write_monthly_climate_file(time, ts_pre.values, ts_tmp.values,
                                    ts_grad, loc_hgt, loc_lon, loc_lat)
    ncclim._nc.close()
    nc_ts_tmp._nc.close()
    nc_ts_pre._nc.close()
    # metadata
    out = {'climate_source': 'CRU data', 'hydro_yr_0': y0+1, 'hydro_yr_1': y1}
    gdir.write_pickle(out, 'climate_info')


def mb_climate_on_height(gdir, heights, prcp_fac,
                         time_range=None, year_range=None):
    """Mass-balance climate of the glacier at a specific height

    Reads the glacier's monthly climate data file and computes the
    temperature "energies" (temp above 0) and solid precipitation at the
    required height.

    Parameters:
    -----------
    gdir: the glacier directory
    heights: a 1D array of the heights (in meter) where you want the data
    prcp_fac: the correction factor for precipitation
    time_range (optional): default is to read all data but with this you
    can provide a [datetime, datetime] bounds (inclusive).
    year_range (optional): maybe more useful than the time bounds above.
    Provide a [y0, y1] year range to get the data for specific (hydrological)
    years only

    Returns:
    --------
    (time, tempformelt, prcpsol)::
        - time: array of shape (nt,)
        - tempformelt:  array of shape (len(heights), nt)
        - prcpsol:  array of shape (len(heights), nt)
    """

    if year_range is not None:
        sm = cfg.PARAMS['hydro_month_' + gdir.hemisphere]
        em = sm - 1 if (sm > 1) else 12
        t0 = datetime.datetime(year_range[0]-1, sm, 1)
        t1 = datetime.datetime(year_range[1], em, 1)
        return mb_climate_on_height(gdir, heights, prcp_fac,
                                    time_range=[t0, t1])

    # Parameters
    temp_all_solid = cfg.PARAMS['temp_all_solid']
    temp_all_liq = cfg.PARAMS['temp_all_liq']
    temp_melt = cfg.PARAMS['temp_melt']

    # Read file
    with netCDF4.Dataset(gdir.get_filepath('climate_monthly'), mode='r') as nc:
        # time
        time = nc.variables['time']
        time = netCDF4.num2date(time[:], time.units)
        if time_range is not None:
            p0 = np.where(time == time_range[0])[0]
            try:
                p0 = p0[0]
            except IndexError:
                raise RuntimeError('time_range[0] not found in file')
            p1 = np.where(time == time_range[1])[0]
            try:
                p1 = p1[0]
            except IndexError:
                raise RuntimeError('time_range[1] not found in file')
        else:
            p0 = 0
            p1 = len(time)-1

        time = time[p0:p1+1]

        # Read timeseries
        itemp = nc.variables['temp'][p0:p1+1]
        iprcp = nc.variables['prcp'][p0:p1+1]
        igrad = nc.variables['grad'][p0:p1+1]
        ref_hgt = nc.ref_hgt

    # Correct precipitation
    iprcp *= prcp_fac

    # For each height pixel:
    # Compute temp and tempformelt (temperature above melting threshold)
    npix = len(heights)
    grad_temp = np.atleast_2d(igrad).repeat(npix, 0)
    grad_temp *= (heights.repeat(len(time)).reshape(grad_temp.shape) - ref_hgt)
    temp2d = np.atleast_2d(itemp).repeat(npix, 0) + grad_temp
    temp2dformelt = temp2d - temp_melt
    temp2dformelt = np.clip(temp2dformelt, 0, temp2dformelt.max())
    # Compute solid precipitation from total precipitation
    prcpsol = np.atleast_2d(iprcp).repeat(npix, 0)
    fac = 1 - (temp2d - temp_all_solid) / (temp_all_liq - temp_all_solid)
    fac = np.clip(fac, 0, 1)
    prcpsol = prcpsol * fac

    return time, temp2dformelt, prcpsol


def mb_yearly_climate_on_height(gdir, heights, prcp_fac,
                                year_range=None, flatten=False):
    """Yearly mass-balance climate of the glacier at a specific height

    The precipitation time series are not corrected!

    Parameters:
    -----------
    gdir: the glacier directory
    heights: a 1D array of the heights (in meter) where you want the data
    prcp_fac: the correction factor for precipitation
    year_range (optional): a [y0, y1] year range to get the data for specific (
    hydrological) years only
    flatten: for some applications (glacier average MB) it's ok to flatten  the
    data (average over height) prior to annual summing.

    Returns:
    --------
    (years, tempformelt, prcpsol)::
        - years: array of shape (ny,)
        - tempformelt:  array of shape (len(heights), ny) (or ny if flatten
        is set)
        - prcpsol:  array of shape (len(heights), ny) (or ny if flatten
        is set)
    """

    time, temp, prcp = mb_climate_on_height(gdir, heights, prcp_fac,
                                            year_range=year_range)

    ny, r = divmod(len(time), 12)
    if r != 0:
        raise ValueError('Climate data should be N full years exclusively')
    # Last year gives the tone of the hydro year
    years = np.arange(time[-1].year-ny+1, time[-1].year+1, 1)

    if flatten:
        # Spatial average
        temp_yr = np.zeros(len(years))
        prcp_yr = np.zeros(len(years))
        temp = np.mean(temp, axis=0)
        prcp = np.mean(prcp, axis=0)
        for i, y in enumerate(years):
            temp_yr[i] = np.sum(temp[i*12:(i+1)*12])
            prcp_yr[i] = np.sum(prcp[i*12:(i+1)*12])
    else:
        # Annual prcp and temp for each point (no spatial average)
        temp_yr = np.zeros((len(heights), len(years)))
        prcp_yr = np.zeros((len(heights), len(years)))
        for i, y in enumerate(years):
            temp_yr[:, i] = np.sum(temp[:, i*12:(i+1)*12], axis=1)
            prcp_yr[:, i] = np.sum(prcp[:, i*12:(i+1)*12], axis=1)

    return years, temp_yr, prcp_yr


def mb_yearly_climate_on_glacier(gdir, prcp_fac, year_range=None):
    """Yearly mass-balance climate at all glacier heights,
    multiplied with the flowlines widths. (all in pix coords.)

    The precipitation time series are not corrected!

    Parameters:
    -----------
    gdir : the glacier directory
    prcp_fac: the correction factor for precipitation
    year_range (optional): a [y0, y1] year range to get the data for specific
    (hydrological) years only

    Returns:
    --------
    (years, tempformelt, prcpsol)::
        - years: array of shape (ny,)
        - tempformelt:  array of shape (len(heights), ny)
        - prcpsol:  array of shape (len(heights), ny) (not corrected!)
    """

    flowlines = gdir.read_pickle('inversion_flowlines')

    heights = np.array([])
    widths = np.array([])
    for fl in flowlines:
        heights = np.append(heights, fl.surface_h)
        widths = np.append(widths, fl.widths)

    years, temp, prcp = mb_yearly_climate_on_height(gdir, heights, prcp_fac,
                                                    year_range=year_range,
                                                    flatten=False)

    temp = np.average(temp, axis=0, weights=widths)
    prcp = np.average(prcp, axis=0, weights=widths)

    return years, temp, prcp


[docs]@entity_task(log, writes=['mu_candidates'])
def mu_candidates(gdir, prcp_sf=None):
    """Computes the mu candidates.

    For each 31 year-period centered on the year of interest, mu is is the
    temperature sensitivity necessary for the glacier with its current shape
    to be in equilibrium with its climate.

    For glaciers with MB data only!

    Parameters
    ----------
    gdir : oggm.GlacierDirectory
    prcp_sf : float (optional)
        force to a certain prcp scaling factor
    """

    mu_hp = int(cfg.PARAMS['mu_star_halfperiod'])

    # Only get the years were we consider looking for tstar
    y0, y1 = cfg.PARAMS['tstar_search_window']
    ci = gdir.read_pickle('climate_info')
    y0 = y0 or ci['hydro_yr_0']
    y1 = y1 or ci['hydro_yr_1']

    years, temp_yr, prcp_yr = mb_yearly_climate_on_glacier(gdir, 1.,
                                                           year_range=[y0, y1])

    # Be sure we have no marine terminating glacier
    assert gdir.terminus_type == 'Land-terminating'

    # prcp scaling factor - one or more?
    if cfg.PARAMS['prcp_scaling_factor'] == 'stddev_perglacier':
        sf = np.arange(0.5, 5.01, 0.05)
    elif cfg.PARAMS['prcp_scaling_factor'] == 'stddev':
        sf = [float(prcp_sf)]
    else:
        sf = np.asarray([cfg.PARAMS['prcp_scaling_factor']])

    # Compute mu for each 31-yr climatological period and each prcp factor
    ny = len(years)
    nsf = len(sf)
    mu_yr_clim = np.zeros((ny, nsf)) * np.NaN
    for j, fac in enumerate(sf):
        for i, y in enumerate(years):
            # Ignore begin and end
            if ((i-mu_hp) < 0) or ((i+mu_hp) >= ny):
                continue
            t_avg = np.mean(temp_yr[i-mu_hp:i+mu_hp+1])
            if t_avg > 1e-3:  # if too cold no melt possible
                prcp_ts = prcp_yr[i-mu_hp:i+mu_hp+1] * fac
                mu_yr_clim[i, j] = np.mean(prcp_ts) / t_avg

    # Check that we found a least one mustar
    if np.sum(np.isfinite(mu_yr_clim)) < 1:
        raise RuntimeError('({}) no mustar candidates found.'
                           .format(gdir.rgi_id))

    # Write
    df = pd.DataFrame(data=mu_yr_clim, index=years, columns=sf)
    gdir.write_pickle(df, 'mu_candidates')


def t_star_from_refmb(gdir, mbdf):
    """Computes the t* for the glacier, given a series of MB measurements.

    Could be multiprocessed but its probably not necessary.

    Parameters
    ----------
    gdirs: the list of oggm.GlacierDirectory objects where to write the data.
    mbdf: a pd.Series containing the observed MB data indexed by year

    Returns
    -------
    A dict: {t_star:[], bias:[], bias_std:[], prcp_fac:float}
    """

    # Only divide 0, we believe the original RGI entities to be the ref...
    years, temp_yr_ts, prcp_yr_ts = mb_yearly_climate_on_glacier(gdir, 1.)

    # which years to look at
    selind = np.searchsorted(years, mbdf.index)
    temp_yr_ts = temp_yr_ts[selind]
    prcp_yr_ts = prcp_yr_ts[selind]
    temp_yr = np.mean(temp_yr_ts)
    prcp_yr = np.mean(prcp_yr_ts)

    # Average oberved mass-balance
    ref_mb = np.mean(mbdf)
    ref_mb_std = np.std(mbdf)

    # Average mass-balance per mu and fac
    mu_yr_clim_df = gdir.read_pickle('mu_candidates')

    odf = pd.DataFrame(index=mu_yr_clim_df.columns)
    out = dict()
    for prcp_fac in mu_yr_clim_df:

        mu_yr_clim = mu_yr_clim_df[prcp_fac]
        nmu = len(mu_yr_clim)
        mb_per_mu = prcp_yr * prcp_fac - mu_yr_clim * temp_yr
        mbts_per_mu = np.atleast_2d(prcp_yr_ts * prcp_fac).repeat(nmu, 0) - \
                      np.atleast_2d(mu_yr_clim).T * \
                      np.atleast_2d(temp_yr_ts).repeat(nmu, 0)
        std_per_mu = mb_per_mu*0 + np.std(mbts_per_mu, axis=1)

        # Diff to reference
        diff = (mb_per_mu - ref_mb).dropna()
        diff_std = (std_per_mu - ref_mb_std).dropna()
        signchange = utils.signchange(diff)

        # If sign changes save them
        # TODO: ideas to do this better:
        #  - apply a smooth (take noise into account)
        #  - take longer stable periods into account
        # these stuffs could be proven better (or not) with cross-val
        pchange = np.where(signchange == 1)[0]
        years = diff.index
        diff = diff.values
        diff_std = diff_std.values
        if len(pchange) > 0:
            t_stars = []
            bias = []
            std_bias = []
            for p in pchange:
                # Check the side with the smallest bias
                ide = p-1 if np.abs(diff[p-1]) < np.abs(diff[p]) else p
                if years[ide] not in t_stars:
                    t_stars.append(years[ide])
                    bias.append(diff[ide])
                    std_bias.append(diff_std[ide])
        else:
            amin = np.argmin(np.abs(diff))
            t_stars = [years[amin]]
            bias = [diff[amin]]
            std_bias = [diff_std[amin]]

        # (prcp_fac, t_stars, bias, std_bias)
        odf.loc[prcp_fac, 'avg_bias'] = np.mean(bias)
        odf.loc[prcp_fac, 'avg_std_bias'] = np.mean(std_bias)
        odf.loc[prcp_fac, 'n_tstar'] = len(std_bias)
        out[prcp_fac] = {'t_star': t_stars, 'bias': bias, 'std_bias': std_bias,
                         'prcp_fac': prcp_fac}

    # write
    gdir.write_pickle(odf, 'prcp_fac_optim')

    # we take the closest result and see later if it needs cleverer handling
    amin = odf.avg_std_bias.abs().idxmin()  # this gives back an index!
    return out[amin]


def calving_mb(gdir):
    """Calving mass-loss in specific MB equivalent.

    This is necessary to compute mu star.

    TODO: currently this is hardcoded for Columbia, but we should come-up with
    somthing better!
    """

    if not gdir.is_tidewater:
        return 0.

    # Ok. Just take the caving rate from cfg and change its units
    # Original units: km3 a-1, to change to mm a-1 (units of specific MB)
    return gdir.inversion_calving_rate * 1e9 * cfg.RHO / gdir.rgi_area_m2


[docs]@entity_task(log, writes=['local_mustar'])
def local_mustar(gdir, tstar=None, bias=None, prcp_fac=None,
                 minimum_mustar=0.):
    """Compute the local mustar from interpolated tstars.

    Parameters
    ----------
    gdir : oggm.GlacierDirectory
    tstar: int
        the year where the glacier should be equilibrium
    bias: int
        the associated reference bias
    prcp_fac: int
        the associated precipitation factor
    minimum_mustar: float
        if mustar goes below this threshold, clip it to that value.
        If you want this to happen with `minimum_mustar=0.` you will have
        to set `cfg.PARAMS['allow_negative_mustar']=True` first.
    """

    assert bias is not None
    assert prcp_fac is not None
    assert tstar is not None

    # Climate period
    mu_hp = int(cfg.PARAMS['mu_star_halfperiod'])
    yr = [tstar-mu_hp, tstar+mu_hp]

    # Do we have a calving glacier?
    cmb = calving_mb(gdir)

    log.info('(%s) local mu* for t*=%d', gdir.rgi_id, tstar)

    # Get the corresponding mu
    years, temp_yr, prcp_yr = mb_yearly_climate_on_glacier(gdir, prcp_fac,
                                                           year_range=yr)
    assert len(years) == (2 * mu_hp + 1)

    # mustar is taking calving into account (units of specific MB)
    mustar = (np.mean(prcp_yr) - cmb) / np.mean(temp_yr)
    if not np.isfinite(mustar):
        raise RuntimeError('{} has a non finite mu'.format(gdir.rgi_id))
    if not cfg.PARAMS['allow_negative_mustar']:
        if mustar < 0:
            raise RuntimeError('{} has a negative mu'.format(gdir.rgi_id))
    # For the calving param it might be useful to clip the mu
    mustar = np.clip(mustar, minimum_mustar, np.max(mustar))

    # Scalars in a small dataframe for later
    df = pd.DataFrame()
    df['rgi_id'] = [gdir.rgi_id]
    df['t_star'] = [tstar]
    df['mu_star'] = [mustar]
    df['prcp_fac'] = [prcp_fac]
    df['bias'] = [bias]
    df.to_csv(gdir.get_filepath('local_mustar'), index=False)


[docs]@entity_task(log, writes=['inversion_flowlines'])
def apparent_mb(gdir):
    """Compute the apparent mb from the calibrated mustar.

    Parameters
    ----------
    """

    # Calibrated data
    df = pd.read_csv(gdir.get_filepath('local_mustar')).iloc[0]
    tstar = df['t_star']
    prcp_fac = df['prcp_fac']
    mu_star = df['mu_star']
    bias = df['bias']

    # Climate period
    mu_hp = int(cfg.PARAMS['mu_star_halfperiod'])
    yr = [tstar-mu_hp, tstar+mu_hp]

    # Do we have a calving glacier?
    cmb = calving_mb(gdir)

    # For each flowline compute the apparent MB
    fls = gdir.read_pickle('inversion_flowlines')

    # Reset flux
    for fl in fls:
        fl.flux = np.zeros(len(fl.surface_h))

    # Flowlines in order to be sure
    for fl in fls:
        y, t, p = mb_yearly_climate_on_height(gdir, fl.surface_h, prcp_fac,
                                              year_range=yr,
                                              flatten=False)
        fl.set_apparent_mb(np.mean(p, axis=1) - mu_star*np.mean(t, axis=1))

    # Sometimes, low lying tributaries have a non-physically consistent
    # Mass-balance. We should remove these, and start all over again until
    # all tributaries are consistent
    do_filter = [fl.flux_needed_correction for fl in fls]
    if cfg.PARAMS['filter_for_neg_flux'] and np.any(do_filter):
        assert not do_filter[-1]  # This should not happen
        # Keep only the good lines
        heads = [fl.orig_head for fl in fls if not fl.flux_needed_correction]
        centerlines.compute_centerlines(gdir, heads=heads, reset=True)
        centerlines.initialize_flowlines(gdir, reset=True)
        if gdir.has_file('downstream_line'):
            centerlines.compute_downstream_line(gdir, reset=True)
            centerlines.compute_downstream_bedshape(gdir, reset=True)
        centerlines.catchment_area(gdir, reset=True)
        centerlines.catchment_intersections(gdir, reset=True)
        centerlines.catchment_width_geom(gdir, reset=True)
        centerlines.catchment_width_correction(gdir, reset=True)
        local_mustar(gdir, tstar=tstar, bias=bias, prcp_fac=prcp_fac,
                     reset=True)
        # Ok, re-call ourselves
        return apparent_mb(gdir, reset=True)

    # Check and write
    aflux = fls[-1].flux[-1] * 1e-9 / cfg.RHO * gdir.grid.dx**2
    # If not marine and a bit far from zero, warning
    if cmb == 0 and not np.allclose(fls[-1].flux[-1], 0., atol=0.01):
        log.warning('(%s) flux should be zero, but is: '
                    '%.4f km3 ice yr-1', gdir.rgi_id, aflux)
    # If not marine and quite far from zero, error
    if cmb == 0 and not np.allclose(fls[-1].flux[-1], 0., atol=1):
        msg = ('({}) flux should be zero, but is: {:.4f} km3 ice yr-1'
               .format(gdir.rgi_id, aflux))
        raise RuntimeError(msg)
    gdir.write_pickle(fls, 'inversion_flowlines')


[docs]@entity_task(log, writes=['inversion_flowlines', 'linear_mb_params'])
def apparent_mb_from_linear_mb(gdir, mb_gradient=3.):
    """Compute apparent mb from a linear mass-balance assumption (for testing).

    This is for testing currently, but could be used as alternative method
    for the inversion quite easily.

    Parameters
    ----------
    gdir : oggm.GlacierDirectory
    """

    # Do we have a calving glacier?
    cmb = calving_mb(gdir)

    # Get the height and widths along the fls
    h, w = gdir.get_inversion_flowline_hw()

    # Now find the ELA till the integrated mb is zero
    from oggm.core.massbalance import LinearMassBalance
    def to_minimize(ela_h):
        mbmod = LinearMassBalance(ela_h[0], grad=mb_gradient)
        smb = mbmod.get_specific_mb(h, w)
        return (smb - cmb)**2

    ela_h = optimization.minimize(to_minimize, [0.], bounds=((0, 10000), ))
    ela_h = ela_h['x'][0]
    mbmod = LinearMassBalance(ela_h, grad=mb_gradient)

    # For each flowline compute the apparent MB
    fls = gdir.read_pickle('inversion_flowlines')

    # Reset flux
    for fl in fls:
        fl.flux = np.zeros(len(fl.surface_h))

    # Flowlines in order to be sure
    for fl in fls:
        mbz = mbmod.get_annual_mb(fl.surface_h) * cfg.SEC_IN_YEAR * cfg.RHO
        fl.set_apparent_mb(mbz)

    # Check and write
    aflux = fls[-1].flux[-1] * 1e-9 / cfg.RHO * gdir.grid.dx**2
    # If not marine and a bit far from zero, warning
    if cmb == 0 and not np.allclose(fls[-1].flux[-1], 0., atol=0.01):
        log.warning('(%s) flux should be zero, but is: '
                    '%.4f km3 ice yr-1', gdir.rgi_id, aflux)
    # If not marine and quite far from zero, error
    if cmb == 0 and not np.allclose(fls[-1].flux[-1], 0., atol=1):
        msg = ('({}) flux should be zero, but is: {:.4f} km3 ice yr-1'
               .format(gdir.rgi_id, aflux))
        raise RuntimeError(msg)
    gdir.write_pickle(fls, 'inversion_flowlines')
    gdir.write_pickle({'ela_h': ela_h, 'grad': mb_gradient},
                      'linear_mb_params')


def _get_optimal_scaling_factor(ref_gdirs):
    """Get the precipitation scaling factor that minimizes the std dev error.
    """

    from scipy import optimize as optimization

    def to_optimize(sf):
        abs_std = []
        for gdir in ref_gdirs:
            # all possible mus
            mu_candidates(gdir, prcp_sf=sf, reset=True)
            # list of mus compatibles with refmb
            mbdf = gdir.get_ref_mb_data()['ANNUAL_BALANCE']
            res = t_star_from_refmb(gdir, mbdf)
            abs_std.append(np.mean(res['std_bias']))
        return np.mean(abs_std)**2

    with utils.DisableLogger():
        opti = optimization.minimize(to_optimize, [1.],
                                     bounds=((0.01, 10),),
                                     tol=1)

    fac = opti['x'][0]
    log.info('Optimal prcp factor: {:.2f}'.format(fac))
    return fac


[docs]@global_task
def compute_ref_t_stars(gdirs):
    """ Detects the best t* for the reference glaciers.

    Parameters
    ----------
    gdirs: list of oggm.GlacierDirectory objects
    """

    if not cfg.PARAMS['run_mb_calibration']:
        raise RuntimeError('Are you sure you want to calibrate the reference '
                           't*? There is a pre-calibrated version available. '
                           'If you know what you are doing and still want to '
                           'calibrate, set the `run_mb_calibration` parameter '
                           'to `True`.')

    log.info('Compute the reference t* and mu* for WGMS glaciers')

    # Reference glaciers only if in the list and period is good
    ref_gdirs = utils.get_ref_mb_glaciers(gdirs)

    prcp_sf = None
    if cfg.PARAMS['prcp_scaling_factor'] == 'stddev':
        prcp_sf = _get_optimal_scaling_factor(ref_gdirs)

    # Loop
    only_one = []  # start to store the glaciers with just one t*
    per_glacier = dict()
    for gdir in ref_gdirs:
        # all possible mus
        mu_candidates(gdir, prcp_sf=prcp_sf, reset=True)
        # list of mus compatibles with refmb
        mbdf = gdir.get_ref_mb_data()['ANNUAL_BALANCE']
        res = t_star_from_refmb(gdir, mbdf)

        # if we have just one candidate this is good
        if len(res['t_star']) == 1:
            only_one.append(gdir.rgi_id)
        # this might be more than one, we'll have to select them later
        per_glacier[gdir.rgi_id] = (gdir, res['t_star'], res['bias'],
                                    res['prcp_fac'])

    # At least one of the glaciers should have a single t*, otherwise we don't
    # know how to start
    if len(only_one) == 0:
        # TODO: hardcoded stuff here, for the test workflow
        for v in ['4', '5', '6']:
            rid = 'RGI{}0-11.00897'.format(v)
            if rid in per_glacier:
                only_one.append(rid)
                gdir, t_star, res_bias, prcp_fac = per_glacier[rid]
                per_glacier[rid] = (gdir, [t_star[-1]], [res_bias[-1]],
                                    prcp_fac)
    if len(only_one) == 0:
        raise RuntimeError('We need at least one glacier with one '
                           'tstar only.')

    log.info('%d out of %d have only one possible t*. Start from here',
             len(only_one), len(ref_gdirs))

    # Ok. now loop over the nearest glaciers until all have a unique t*
    while True:
        ids_left = [id for id in per_glacier.keys() if id not in only_one]
        if len(ids_left) == 0:
            break

        # Compute the summed distance to all glaciers with one t*
        distances = []
        for id in ids_left:
            gdir = per_glacier[id][0]
            lon, lat = gdir.cenlon, gdir.cenlat
            ldis = 0.
            for id_o in only_one:
                ogdir = per_glacier[id_o][0]
                ldis += utils.haversine(lon, lat, ogdir.cenlon, ogdir.cenlat)
            distances.append(ldis)

        # Take the shortest and choose the best t*
        pg = per_glacier[ids_left[np.argmin(distances)]]
        gdir, t_star, res_bias, prcp_fac = pg
        distances = []
        for tt in t_star:
            ldis = 0.
            for id_o in only_one:
                _, ot_star, _, _ = per_glacier[id_o]
                ldis += np.abs(tt - ot_star)
            distances.append(ldis)
        amin = np.argmin(distances)
        per_glacier[gdir.rgi_id] = (gdir, [t_star[amin]], [res_bias[amin]],
                                    prcp_fac)
        only_one.append(gdir.rgi_id)

    # Write out the data
    rgis_ids, t_stars, prcp_facs,  biases, lons, lats, n_mb = ([], [], [], [],
                                                               [], [], [])
    for id, (gdir, t_star, res_bias, prcp_fac) in per_glacier.items():
        rgis_ids.append(id)
        t_stars.append(t_star[0])
        prcp_facs.append(prcp_fac)
        biases.append(res_bias[0])
        lats.append(gdir.cenlat)
        lons.append(gdir.cenlon)
        n_mb.append(len(gdir.get_ref_mb_data()))
    df = pd.DataFrame(index=rgis_ids)
    df['lon'] = lons
    df['lat'] = lats
    df['n_mb_years'] = n_mb
    df['tstar'] = t_stars
    df['prcp_fac'] = prcp_facs
    df['bias'] = biases
    file = os.path.join(cfg.PATHS['working_dir'], 'ref_tstars.csv')
    df.sort_index().to_csv(file)


[docs]@global_task
def distribute_t_stars(gdirs, ref_df=None, minimum_mustar=0.):
    """After the computation of the reference tstars, apply
    the interpolation to each individual glacier.

    Parameters
    ----------
    gdirs : []
        list of oggm.GlacierDirectory objects
    ref_df : pd.Dataframe
        replace the default calibration list
    minimum_mustar: float
        if mustar goes below this threshold, clip it to that value.
        If you want this to happen with `minimum_mustar=0.` you will have
        to set `cfg.PARAMS['allow_negative_mustar']=True` first.
    """

    log.info('Distribute t* and mu*')

    if ref_df is None:
        fp = os.path.join(cfg.PATHS['working_dir'], 'ref_tstars.csv')
        if not cfg.PARAMS['run_mb_calibration']:
            # Make some checks and use the default one
            if (('climate_file' in cfg.PATHS) and
                    os.path.exists(cfg.PATHS['climate_file'])):
                raise RuntimeError('If you are using a custom climate file '
                                   'you should run your own MB calibration.')
            fn = 'oggm_ref_tstars_rgi{}_cru4.csv'.format(gdirs[0].rgi_version)
            fp = utils.get_demo_file(fn)
        ref_df = pd.read_csv(fp)

    for gdir in gdirs:

        # Compute the distance to each glacier
        distances = utils.haversine(gdir.cenlon, gdir.cenlat,
                                    ref_df.lon, ref_df.lat)

        # Take the 10 closests
        aso = np.argsort(distances)[0:9]
        amin = ref_df.iloc[aso]
        distances = distances[aso]**2

        # If really close no need to divide, else weighted average
        if distances.iloc[0] <= 0.1:
            tstar = amin.tstar.iloc[0]
            prcp_fac = amin.prcp_fac.iloc[0]
            bias = amin.bias.iloc[0]
        else:
            tstar = int(np.average(amin.tstar, weights=1./distances))
            prcp_fac = np.average(amin.prcp_fac, weights=1./distances)
            bias = np.average(amin.bias, weights=1./distances)

        # Go
        local_mustar(gdir, tstar=tstar, bias=bias, prcp_fac=prcp_fac,
                     reset=True, minimum_mustar=minimum_mustar)


[docs]@global_task
def crossval_t_stars(gdirs):
    """Cross-validate the interpolation of tstar to each individual glacier.

    This is a thorough check (redoes many calculations) because it recomputes
    the chosen tstars at each time. You should use quick_crossval_t_stars
    instead.

    Parameters
    ----------
    gdirs: list of oggm.GlacierDirectory objects
    """

    log.info('Cross-validate the t* and mu* determination')

    full_ref_df = pd.read_csv(os.path.join(cfg.PATHS['working_dir'],
                                           'ref_tstars.csv'), index_col=0)

    rgdirs = utils.get_ref_mb_glaciers(gdirs)
    n = len(full_ref_df)
    for i, rid in enumerate(full_ref_df.index):

        log.info('Cross-validation iteration {} of {}'.format(i+1, n))

        # the glacier to look at
        gdir = [g for g in rgdirs if g.rgi_id == rid][0]

        # the reference glaciers
        ref_gdirs = [g for g in rgdirs if g.rgi_id != rid]

        # redo the computations
        with utils.DisableLogger():
            compute_ref_t_stars(ref_gdirs)
            distribute_t_stars([gdir])

        # store
        rdf = pd.read_csv(gdir.get_filepath('local_mustar'))
        full_ref_df.loc[rid, 'cv_tstar'] = int(rdf['t_star'].values[0])
        full_ref_df.loc[rid, 'cv_mustar'] = rdf['mu_star'].values[0]
        full_ref_df.loc[rid, 'cv_prcp_fac'] = rdf['prcp_fac'].values[0]
        full_ref_df.loc[rid, 'cv_bias'] = rdf['bias'].values[0]

    # write
    file = os.path.join(cfg.PATHS['working_dir'], 'crossval_tstars.csv')
    full_ref_df.to_csv(file)


@global_task
def quick_crossval_t_stars(gdirs):
    """Cross-validate the interpolation of tstar to each individual glacier.

    This version does NOT recompute the precipitation scaling factor at each
    round (this quite OK to do so)

    Parameters
    ----------
    gdirs: list of oggm.GlacierDirectory objects
    """

    log.info('Cross-validate the t* and mu* determination')

    rgdirs = utils.get_ref_mb_glaciers(gdirs)

    # This might be redundant but we redo the calc here
    with utils.DisableLogger():
        compute_ref_t_stars(rgdirs)
    full_ref_df = pd.read_csv(os.path.join(cfg.PATHS['working_dir'],
                                           'ref_tstars.csv'), index_col=0)
    with utils.DisableLogger():
        distribute_t_stars(rgdirs)

    n = len(full_ref_df)
    for i, rid in enumerate(full_ref_df.index):

        # log.info('Cross-validation iteration {} of {}'.format(i+1, n))

        # the glacier to look at
        gdir = [g for g in rgdirs if g.rgi_id == rid][0]

        # the reference glaciers
        tmp_ref_df = full_ref_df.loc[full_ref_df.index != rid]

        # before the cross-val we can get the info about "real" mustar
        rdf = pd.read_csv(gdir.get_filepath('local_mustar'))
        full_ref_df.loc[rid, 'mustar'] = rdf['mu_star'].values[0]

        # redo the computations
        with utils.DisableLogger():
            distribute_t_stars([gdir], ref_df=tmp_ref_df)

        # store
        rdf = pd.read_csv(gdir.get_filepath('local_mustar'))
        full_ref_df.loc[rid, 'cv_tstar'] = int(rdf['t_star'].values[0])
        full_ref_df.loc[rid, 'cv_mustar'] = rdf['mu_star'].values[0]
        full_ref_df.loc[rid, 'cv_prcp_fac'] = rdf['prcp_fac'].values[0]
        full_ref_df.loc[rid, 'cv_bias'] = rdf['bias'].values[0]

    # Reproduce Ben's figure
    for i, rid in enumerate(full_ref_df.index):
        # the glacier to look at
        gdir = full_ref_df.loc[full_ref_df.index == rid]
        # the reference glaciers
        tmp_ref_df = full_ref_df.loc[full_ref_df.index != rid]

        # Compute the distance
        distances = utils.haversine(gdir.lon.values[0], gdir.lat.values[0],
                                    tmp_ref_df.lon, tmp_ref_df.lat)

        # Take the 10 closests
        aso = np.argsort(distances)[0:9]
        amin = tmp_ref_df.iloc[aso]
        distances = distances[aso] ** 2
        interp = np.average(amin.mustar, weights=1. / distances)
        full_ref_df.loc[rid, 'interp_mustar'] = interp

    # write
    file = os.path.join(cfg.PATHS['working_dir'], 'crossval_tstars.csv')
    full_ref_df.to_csv(file)