Source code for aneris.utils

import logging
import os
from pathlib import Path
from typing import TypeAlias

import pandas as pd
import pycountry

Pathy: TypeAlias = str | Path

_logger = None

# Index for iamc
iamc_idx = ["Model", "Scenario", "Region", "Variable"]

# default dataframe index
df_idx = ["region", "gas", "sector", "unit"]

# paths to data dependencies
here = os.path.join(os.path.dirname(os.path.realpath(__file__)))
hist_path = lambda f: os.path.join(here, "historical", f)
iamc_path = lambda f: os.path.join(here, "iamc_template", f)
region_path = lambda f: os.path.join(here, "regional_definitions", f)



[docs]
def logger():
    """
    Global Logger used for aneris.
    """
    global _logger
    if _logger is None:
        logging.basicConfig()
        _logger = logging.getLogger()
        _logger.setLevel("INFO")
    return _logger




[docs]
def numcols(df):
    """
    Returns all columns in df that have data types of floats or ints.
    """
    dtypes = df.dtypes
    return [i for i in dtypes.index if dtypes.loc[i].name.startswith(("float", "int"))]




[docs]
def isstr(x):
    """
    Returns True if x is a string.
    """
    try:
        return isinstance(x, (str, unicode))
    except NameError:
        return isinstance(x, str)




[docs]
def isnum(s):
    """
    Returns True if s is a number.
    """
    try:
        float(s)
        return True
    except ValueError:
        return False




[docs]
def pd_read(f, str_cols=False, *args, **kwargs):
    """
    Try to read a file with pandas, supports CSV and XLSX.

    Parameters
    ----------
    f : string
        the file to read in
    str_cols : bool, optional
        turn all columns into strings (numerical column names are sometimes
        read in as numerical dtypes)
    args, kwargs : sent directly to the Pandas read function

    Returns
    -------
    df : pd.DataFrame
    """
    if f.endswith("csv"):
        df = pd.read_csv(f, *args, **kwargs)
    else:
        df = pd.read_excel(f, *args, **kwargs)

    if str_cols:
        df.columns = [str(x) for x in df.columns]

    return df




[docs]
def pd_write(df, f, *args, **kwargs):
    """
    Try to write a file with pandas, supports CSV and XLSX.
    """
    # guess whether to use index, unless we're told otherwise
    index = kwargs.pop("index", isinstance(df.index, pd.MultiIndex))

    if f.endswith("csv"):
        df.to_csv(f, index=index, *args, **kwargs)
    else:
        with pd.ExcelWriter(f) as writer:
            df.to_excel(writer, index=index, *args, **kwargs)



def normalize(s):
    return s / s.sum()


def country_name(iso: str):
    country_obj = pycountry.countries.get(alpha_3=iso)
    return iso if country_obj is None else country_obj.name


def skipempty(*dfs):
    return [df for df in dfs if not df.empty]