"""
Provides helper functions for reading input data and configuration files.
The default configuration values are provided in aneris.RC_DEFAULTS.
"""
import os
from collections import abc
import pandas as pd
import yaml
from aneris.utils import iamc_idx, isnum, isstr, pd_read
RC_DEFAULTS = """
config:
default_luc_method: reduce_ratio_2150_cov
default_ratio_method: reduce_ratio_2080
default_offset_method: reduce_offset_2080
cov_threshold: 20
harmonize_year: 2015
global_harmonization_only: false
replace_suffix: Harmonized-DB
prefix: CEDS+|9+ Sectors
suffix: Unharmonized
add_5regions: true
"""
def _read_data(indfs):
datakeys = sorted([x for x in indfs if x.startswith("data")])
df = pd.concat([indfs[k] for k in datakeys])
# don't know why reading from excel changes dtype and column types
# but I have to reset them manually
df.columns = df.columns.astype(str)
numcols = [x for x in df.columns if isnum(x)]
df[numcols] = df[numcols].astype(float)
# some teams also don't provide standardized column names and styles
df.columns = df.columns.str.capitalize()
return df
def _recursive_update(d, u):
for k, v in u.items():
if isinstance(v, abc.Mapping):
r = _recursive_update(d.get(k, {}), v)
d[k] = r
else:
d[k] = u[k]
return d
[docs]
def read_excel(f):
"""
Read an excel-based input file for harmonization.
Parameters
----------
f : string
path to input file
Returns
-------
model : pd.DataFrame
model data frame in IAMC format
overrides : pd.DataFrame
overrides data frame in IAMC format
config : dictionary
configuration overrides (if any)
"""
indfs = pd_read(f, sheet_name=None)
model = _read_data(indfs)
# make an empty df which will be caught later
overrides = (
indfs["harmonization"]
if "harmonization" in indfs
else pd.DataFrame([], columns=iamc_idx + ["Unit"])
)
# get run control
config = {}
if "Configuration" in overrides:
config = overrides[["Configuration", "Value"]].dropna()
config = config.set_index("Configuration").to_dict()["Value"]
overrides = overrides.drop(["Configuration", "Value"], axis=1)
# a single row of nans implies only configs provided,
# if so, only return the empty df
if len(overrides) == 1 and overrides.isnull().all(axis=None):
overrides = pd.DataFrame([], columns=iamc_idx + ["Unit"])
return model, overrides, config
[docs]
class RunControl(abc.Mapping):
"""
A thin wrapper around a Python Dictionary to support configuration of
harmonization execution.
Input can be provided as dictionaries or YAML files.
"""
[docs]
def __init__(self, rc=None, defaults=None):
"""
Parameters
----------
rc : string, file, dictionary, optional
a path to a YAML file, a file handle for a YAML file, or a
dictionary describing run control configuration
defaults : string, file, dictionary, optional
a path to a YAML file, a file handle for a YAML file, or a
dictionary describing **default** run control configuration
"""
rc = rc or {}
defaults = defaults or RC_DEFAULTS
rc = self._load_yaml(rc)
defaults = self._load_yaml(defaults)
self.store = _recursive_update(defaults, rc)
def __getitem__(self, k):
return self.store[k]
def __iter__(self):
return iter(self.store)
def __len__(self):
return len(self.store)
[docs]
def __repr__(self):
return self.store.__repr__()
def _get_path(self, key, fyaml, fname):
if os.path.exists(fname):
return fname
_fname = os.path.join(os.path.dirname(fyaml), fname)
if not os.path.exists(_fname):
msg = (
"YAML key '{}' in {}: {} is not a valid relative " + "or absolute path"
)
raise OSError(msg.format(key, fyaml, fname))
return _fname
def _fill_relative_paths(self, fyaml, d):
file_keys = [
"exogenous",
]
for k in file_keys:
if k in d:
d[k] = [self._get_path(k, fyaml, fname) for fname in d[k]]
def _load_yaml(self, obj):
check_rel_paths = False
if hasattr(obj, "read"): # it's a file
obj = obj.read()
if isstr(obj) and os.path.exists(obj):
check_rel_paths = True
fname = obj
with open(fname) as f:
obj = f.read()
if not isinstance(obj, dict):
obj = yaml.safe_load(obj)
if check_rel_paths:
self._fill_relative_paths(fname, obj)
return obj
[docs]
def recursive_update(self, k, d):
"""
Recursively update a top-level option in the run control.
Parameters
----------
k : string
the top-level key
d : dictionary or similar
the dictionary to use for updating
"""
u = self.__getitem__(k)
self.store[k] = _recursive_update(u, d)