Source code for aneris._io

"""
Provides helper functions for reading input data and configuration files.

The default configuration values are provided in aneris.RC_DEFAULTS.
"""

import os
from collections import abc

import pandas as pd
import yaml

from aneris.utils import iamc_idx, isnum, isstr, pd_read

RC_DEFAULTS = """
config:
    default_luc_method: reduce_ratio_2150_cov
    default_ratio_method: reduce_ratio_2080
    default_offset_method: reduce_offset_2080
    cov_threshold: 20
    harmonize_year: 2015
    global_harmonization_only: false
    replace_suffix: Harmonized-DB
prefix: CEDS+|9+ Sectors
suffix: Unharmonized
add_5regions: true
"""


def _read_data(indfs):
    datakeys = sorted([x for x in indfs if x.startswith("data")])
    df = pd.concat([indfs[k] for k in datakeys])
    # don't know why reading from excel changes dtype and column types
    # but I have to reset them manually
    df.columns = df.columns.astype(str)
    numcols = [x for x in df.columns if isnum(x)]
    df[numcols] = df[numcols].astype(float)

    # some teams also don't provide standardized column names and styles
    df.columns = df.columns.str.capitalize()

    return df


def _recursive_update(d, u):
    for k, v in u.items():
        if isinstance(v, abc.Mapping):
            r = _recursive_update(d.get(k, {}), v)
            d[k] = r
        else:
            d[k] = u[k]
    return d


[docs] def read_excel(f): """ Read an excel-based input file for harmonization. Parameters ---------- f : string path to input file Returns ------- model : pd.DataFrame model data frame in IAMC format overrides : pd.DataFrame overrides data frame in IAMC format config : dictionary configuration overrides (if any) """ indfs = pd_read(f, sheet_name=None) model = _read_data(indfs) # make an empty df which will be caught later overrides = ( indfs["harmonization"] if "harmonization" in indfs else pd.DataFrame([], columns=iamc_idx + ["Unit"]) ) # get run control config = {} if "Configuration" in overrides: config = overrides[["Configuration", "Value"]].dropna() config = config.set_index("Configuration").to_dict()["Value"] overrides = overrides.drop(["Configuration", "Value"], axis=1) # a single row of nans implies only configs provided, # if so, only return the empty df if len(overrides) == 1 and overrides.isnull().all(axis=None): overrides = pd.DataFrame([], columns=iamc_idx + ["Unit"]) return model, overrides, config
[docs] class RunControl(abc.Mapping): """ A thin wrapper around a Python Dictionary to support configuration of harmonization execution. Input can be provided as dictionaries or YAML files. """
[docs] def __init__(self, rc=None, defaults=None): """ Parameters ---------- rc : string, file, dictionary, optional a path to a YAML file, a file handle for a YAML file, or a dictionary describing run control configuration defaults : string, file, dictionary, optional a path to a YAML file, a file handle for a YAML file, or a dictionary describing **default** run control configuration """ rc = rc or {} defaults = defaults or RC_DEFAULTS rc = self._load_yaml(rc) defaults = self._load_yaml(defaults) self.store = _recursive_update(defaults, rc)
def __getitem__(self, k): return self.store[k] def __iter__(self): return iter(self.store) def __len__(self): return len(self.store)
[docs] def __repr__(self): return self.store.__repr__()
def _get_path(self, key, fyaml, fname): if os.path.exists(fname): return fname _fname = os.path.join(os.path.dirname(fyaml), fname) if not os.path.exists(_fname): msg = ( "YAML key '{}' in {}: {} is not a valid relative " + "or absolute path" ) raise OSError(msg.format(key, fyaml, fname)) return _fname def _fill_relative_paths(self, fyaml, d): file_keys = [ "exogenous", ] for k in file_keys: if k in d: d[k] = [self._get_path(k, fyaml, fname) for fname in d[k]] def _load_yaml(self, obj): check_rel_paths = False if hasattr(obj, "read"): # it's a file obj = obj.read() if isstr(obj) and os.path.exists(obj): check_rel_paths = True fname = obj with open(fname) as f: obj = f.read() if not isinstance(obj, dict): obj = yaml.safe_load(obj) if check_rel_paths: self._fill_relative_paths(fname, obj) return obj
[docs] def recursive_update(self, k, d): """ Recursively update a top-level option in the run control. Parameters ---------- k : string the top-level key d : dictionary or similar the dictionary to use for updating """ u = self.__getitem__(k) self.store[k] = _recursive_update(u, d)