Source code for aneris.utils

import logging
import os
from pathlib import Path
from typing import TypeAlias

import pandas as pd
import pycountry

Pathy: TypeAlias = str | Path

_logger = None

# Index for iamc
iamc_idx = ["Model", "Scenario", "Region", "Variable"]

# default dataframe index
df_idx = ["region", "gas", "sector", "unit"]

# paths to data dependencies
here = os.path.join(os.path.dirname(os.path.realpath(__file__)))
hist_path = lambda f: os.path.join(here, "historical", f)
iamc_path = lambda f: os.path.join(here, "iamc_template", f)
region_path = lambda f: os.path.join(here, "regional_definitions", f)


[docs] def logger(): """ Global Logger used for aneris. """ global _logger if _logger is None: logging.basicConfig() _logger = logging.getLogger() _logger.setLevel("INFO") return _logger
[docs] def numcols(df): """ Returns all columns in df that have data types of floats or ints. """ dtypes = df.dtypes return [i for i in dtypes.index if dtypes.loc[i].name.startswith(("float", "int"))]
[docs] def isstr(x): """ Returns True if x is a string. """ try: return isinstance(x, (str, unicode)) except NameError: return isinstance(x, str)
[docs] def isnum(s): """ Returns True if s is a number. """ try: float(s) return True except ValueError: return False
[docs] def pd_read(f, str_cols=False, *args, **kwargs): """ Try to read a file with pandas, supports CSV and XLSX. Parameters ---------- f : string the file to read in str_cols : bool, optional turn all columns into strings (numerical column names are sometimes read in as numerical dtypes) args, kwargs : sent directly to the Pandas read function Returns ------- df : pd.DataFrame """ if f.endswith("csv"): df = pd.read_csv(f, *args, **kwargs) else: df = pd.read_excel(f, *args, **kwargs) if str_cols: df.columns = [str(x) for x in df.columns] return df
[docs] def pd_write(df, f, *args, **kwargs): """ Try to write a file with pandas, supports CSV and XLSX. """ # guess whether to use index, unless we're told otherwise index = kwargs.pop("index", isinstance(df.index, pd.MultiIndex)) if f.endswith("csv"): df.to_csv(f, index=index, *args, **kwargs) else: with pd.ExcelWriter(f) as writer: df.to_excel(writer, index=index, *args, **kwargs)
def normalize(s): return s / s.sum() def country_name(iso: str): country_obj = pycountry.countries.get(alpha_3=iso) return iso if country_obj is None else country_obj.name def skipempty(*dfs): return [df for df in dfs if not df.empty]