trnslator/translater/utils.py
2020-06-24 14:22:21 -04:00

874 lines
26 KiB
Python

################################################################################
# Module: utils.py
# Description: Utility functions for configuration, logging
# License: MIT, see full license in LICENSE.txt
# Web: https://github.com/louisleroy5/translater
################################################################################
# OSMnx
#
# Copyright (c) 2019 Geoff Boeing https://geoffboeing.com/
#
# Part of the following code is a derivative work of the code from the OSMnx
# project, which is licensed MIT License. This code therefore is also
# licensed under the terms of the The MIT License (MIT).
################################################################################
import contextlib
import datetime as dt
import json
import logging as lg
import multiprocessing
import os
import platform
import re
import sys
import time
import unicodedata
import warnings
from collections import OrderedDict
from datetime import datetime, timedelta
import numpy as np
import pandas as pd
from pandas.io.json import json_normalize
from path import Path
from translater import settings
from translater.settings import ep_version
def config(
data_folder=settings.data_folder,
logs_folder=settings.logs_folder,
imgs_folder=settings.imgs_folder,
cache_folder=settings.cache_folder,
use_cache=settings.use_cache,
log_file=settings.log_file,
log_console=settings.log_console,
log_level=settings.log_level,
log_name=settings.log_name,
log_filename=settings.log_filename,
useful_idf_objects=settings.useful_idf_objects,
umitemplate=settings.umitemplate,
trnsys_default_folder=settings.trnsys_default_folder,
default_weight_factor="area",
ep_version=settings.ep_version,
):
"""Package configurations. Call this method at the beginning of script or at the
top of an interactive python environment to set package-wide settings.
Args:
data_folder (str): where to save and load data files.
logs_folder (str): where to write the log files.
imgs_folder (str): where to save figures.
cache_folder (str): where to save the simulation results.
use_cache (bool): if True, use a local cache to save/retrieve many of
translater outputs such as EnergyPlus simulation results. This can
save a lot of time by not calling the simulation and DataPortal APIs
repetitively for the same requests.
log_file (bool): if true, save log output to a log file in logs_folder.
log_console (bool): if true, print log output to the console.
log_level (int): one of the logger.level constants.
log_name (str): name of the logger.
log_filename (str): name of the log file.
useful_idf_objects (list): a list of useful idf objects.
umitemplate (str): where the umitemplate is located.
trnsys_default_folder (str): root folder of TRNSYS install.
default_weight_factor:
ep_version (str): EnergyPlus version to use. eg. "9-2-0".
Returns:
None
"""
# set each global variable to the passed-in parameter value
settings.use_cache = use_cache
settings.cache_folder = Path(cache_folder).makedirs_p()
settings.data_folder = Path(data_folder).makedirs_p()
settings.imgs_folder = Path(imgs_folder).makedirs_p()
settings.logs_folder = Path(logs_folder).makedirs_p()
settings.log_console = log_console
settings.log_file = log_file
settings.log_level = log_level
settings.log_name = log_name
settings.log_filename = log_filename
settings.useful_idf_objects = useful_idf_objects
settings.umitemplate = umitemplate
settings.trnsys_default_folder = validate_trnsys_folder(trnsys_default_folder)
settings.zone_weight.set_weigth_attr(default_weight_factor)
settings.ep_version = validate_epversion(ep_version)
# if logging is turned on, log that we are configured
if settings.log_file or settings.log_console:
log("Configured translater")
def validate_epversion(ep_version):
"""Validates the ep_version form"""
if "." in ep_version:
raise NameError('Enter the EnergyPlus version in the form "9-2-0"')
return ep_version
def validate_trnsys_folder(trnsys_default_folder):
"""
Args:
trnsys_default_folder:
"""
if sys.platform == "win32":
if os.path.isdir(trnsys_default_folder):
return trnsys_default_folder
else:
warnings.warn(
"The TRNSYS path does not exist. Please set the TRNSYS "
"path with the --trnsys-default-folder option".format(
trnsys_default_folder
)
)
return trnsys_default_folder
else:
return trnsys_default_folder
def log(
message, level=None, name=None, filename=None, avoid_console=False, log_dir=None
):
"""Write a message to the log file and/or print to the the console.
Args:
message (str): the content of the message to log
level (int): one of the logger.level constants
name (str): name of the logger
filename (str): name of the log file
avoid_console (bool): If True, don't print to console for this message
only
log_dir (str, optional): directory of log file. Defaults to
settings.log_folder
"""
if level is None:
level = settings.log_level
if name is None:
name = settings.log_name
if filename is None:
filename = settings.log_filename
logger = None
# if logging to file is turned on
if settings.log_file:
# get the current logger (or create a new one, if none), then log
# message at requested level
logger = get_logger(level=level, name=name, filename=filename, log_dir=log_dir)
if level == lg.DEBUG:
logger.debug(message)
elif level == lg.INFO:
logger.info(message)
elif level == lg.WARNING:
logger.warning(message)
elif level == lg.ERROR:
logger.error(message)
# if logging to console is turned on, convert message to ascii and print to
# the console
if settings.log_console and not avoid_console:
# capture current stdout, then switch it to the console, print the
# message, then switch back to what had been the stdout. this prevents
# logging to notebook - instead, it goes to console
standard_out = sys.stdout
sys.stdout = sys.__stdout__
# convert message to ascii for console display so it doesn't break
# windows terminals
message = (
unicodedata.normalize("NFKD", make_str(message))
.encode("ascii", errors="replace")
.decode()
)
print(message)
sys.stdout = standard_out
if level == lg.WARNING:
warnings.warn(message)
return logger
def get_logger(level=None, name=None, filename=None, log_dir=None):
"""Create a logger or return the current one if already instantiated.
Args:
level (int): one of the logger.level constants.
name (str): name of the logger.
filename (str): name of the log file.
log_dir (str, optional): directory of the log file. Defaults to
settings.log_folder.
Returns:
logging.Logger: a Logger
"""
if isinstance(log_dir, str):
log_dir = Path(log_dir)
if level is None:
level = settings.log_level
if name is None:
name = settings.log_name
if filename is None:
filename = settings.log_filename
logger = lg.getLogger(name)
# if a logger with this name is not already set up
if not getattr(logger, "handler_set", None):
# get today's date and construct a log filename
todays_date = dt.datetime.today().strftime("%Y_%m_%d")
if not log_dir:
log_dir = settings.logs_folder
log_filename = log_dir / "{}_{}.log".format(filename, todays_date)
# if the logs folder does not already exist, create it
if not log_dir.exists():
log_dir.makedirs_p()
# create file handler and log formatter and set them up
try:
handler = lg.FileHandler(log_filename, encoding="utf-8")
except:
handler = lg.StreamHandler()
formatter = lg.Formatter(
"%(asctime)s [%(process)d] %(levelname)s - %(name)s - %(" "message)s"
)
handler.setFormatter(formatter)
logger.addHandler(handler)
logger.setLevel(level)
logger.handler_set = True
return logger
def close_logger(logger=None, level=None, name=None, filename=None, log_dir=None):
"""
Args:
logger:
level:
name:
filename:
log_dir:
"""
if not logger:
# try get logger by name
logger = get_logger(level=level, name=name, filename=filename, log_dir=log_dir)
handlers = logger.handlers[:]
for handler in handlers:
handler.close()
logger.removeHandler(handler)
def make_str(value):
"""Convert a passed-in value to unicode if Python 2, or string if Python 3.
Args:
value (any): the value to convert to unicode/string
Returns:
unicode or string
"""
try:
# for python 2.x compatibility, use unicode
return np.unicode(value)
except NameError:
# python 3.x has no unicode type, so if error, use str type
return str(value)
def newrange(previous, following):
"""Takes the previous DataFrame and calculates a new Index range. Returns a
DataFrame with a new index
Args:
previous (pandas.DataFrame): previous DataFrame
following (pandas.DataFrame): following DataFrame
Returns:
pandas.DataFrame: DataFrame with an incremented new index
"""
if not previous.empty:
from_index = previous.iloc[[-1]].index.values + 1
to_index = from_index + len(following)
following.index = np.arange(from_index, to_index)
following.rename_axis("$id", inplace=True)
return following
else:
# If previous dataframe is empty, return the orginal DataFrame
return following
def date_transform(date_str):
"""Simple function transforming one-based hours (1->24) into zero-based
hours (0->23)
Args:
date_str (str): a date string of the form 'HH:MM'
Returns:
datetime.datetime: datetime object
"""
if date_str[0:2] != "24":
return datetime.strptime(date_str, "%H:%M") - timedelta(hours=1)
return datetime.strptime("23:00", "%H:%M")
def weighted_mean(series, df, weighting_variable):
"""Compute the weighted average while ignoring NaNs. Implements
:func:`numpy.average`.
Args:
series (pandas.Series): the *series* on which to compute the mean.
df (pandas.DataFrame): the *df* containing weighting variables.
weighting_variable (str or list or tuple): Name of weights to use in
*df*. If multiple values given, the values are multiplied together.
Returns:
numpy.ndarray: the weighted average
"""
# get non-nan values
index = ~np.isnan(series.values.astype("float"))
# Returns weights. If multiple `weighting_variable`, df.prod will take care
# of multipling them together.
if not isinstance(weighting_variable, list):
weighting_variable = [weighting_variable]
try:
weights = df.loc[series.index, weighting_variable].astype("float").prod(axis=1)
except Exception:
raise
# Try to average
try:
wa = np.average(series[index].astype("float"), weights=weights[index])
except ZeroDivisionError:
log("Cannot aggregate empty series {}".format(series.name), lg.WARNING)
return np.NaN
except Exception:
raise
else:
return wa
def top(series, df, weighting_variable):
"""Compute the highest ranked value weighted by some other variable.
Implements
:func:`pandas.DataFrame.nlargest`.
Args:
series (pandas.Series): the *series* on which to compute the ranking.
df (pandas.DataFrame): the *df* containing weighting variables.
weighting_variable (str or list or tuple): Name of weights to use in
*df*. If multiple values given, the values are multiplied together.
Returns:
numpy.ndarray: the weighted top ranked variable
"""
# Returns weights. If multiple `weighting_variable`, df.prod will take care
# of multipling them together.
if not isinstance(series, pd.Series):
raise TypeError(
'"top()" only works on Series, ' "not DataFrames\n{}".format(series)
)
if not isinstance(weighting_variable, list):
weighting_variable = [weighting_variable]
try:
idx_ = (
df.loc[series.index]
.groupby(series.name)
.apply(lambda x: safe_prod(x, df, weighting_variable))
)
if not idx_.empty:
idx = idx_.nlargest(1).index
else:
log('No such names "{}"'.format(series.name))
return np.NaN
except KeyError:
log("Cannot aggregate empty series {}".format(series.name), lg.WARNING)
return np.NaN
except Exception:
raise
else:
if idx.isnull().any():
return np.NaN
else:
return pd.to_numeric(idx, errors="ignore").values[0]
def safe_prod(x, df, weighting_variable):
"""
Args:
x:
df:
weighting_variable:
"""
df_ = df.loc[x.index, weighting_variable]
if not df_.empty:
return df_.astype("float").prod(axis=1).sum()
else:
return 0
def copy_file(files, where=None):
"""Handles a copy of test idf files
Args:
files (str or list): path(s) of the file(s) to copy
where (str): path where to save the copy(ies)
"""
import shutil, os
if isinstance(files, str):
files = [files]
files = {os.path.basename(k): k for k in files}
# defaults to cache folder
if where is None:
where = settings.cache_folder
for file in files:
dst = os.path.join(where, file)
output_folder = where
if not os.path.isdir(output_folder):
os.makedirs(output_folder)
shutil.copyfile(files[file], dst)
files[file] = dst
return _unpack_tuple(list(files.values()))
class EnergyPlusProcessError(Exception):
"""EnergyPlus Process call error"""
def __init__(self, cmd, stderr, idf):
"""
Args:
cmd:
stderr:
idf:
"""
super().__init__(stderr)
self.cmd = cmd
self.idf = idf
self.stderr = stderr
def __str__(self):
"""Override that only returns the stderr"""
msg = ":\n".join([self.idf, self.stderr])
return msg
class EnergyPlusVersionError(Exception):
"""EnergyPlus Version call error"""
def __init__(self, idf_file, idf_version, ep_version):
super(EnergyPlusVersionError, self).__init__(None)
self.idf_file = idf_file
self.idf_version = idf_version
self.ep_version = ep_version
def __str__(self):
"""Override that only returns the stderr"""
if tuple(self.idf_version.split("-")) > tuple(self.ep_version.split("-")):
compares_ = "higher"
else:
compares_ = "lower"
msg = (
"The version of the idf file {} (v{}) is {} than the specified "
"EnergyPlus version (v{}). Specify the default EnergyPlus version "
"with :func:`config` that corresponds with the one installed on your machine"
" or specify the version in related module functions, e.g. :func:`run_eplus`.".format(
self.idf_file.basename(), self.idf_version, compares_, self.ep_version
)
)
return msg
@contextlib.contextmanager
def cd(path):
"""
Args:
path:
"""
log("initially inside {0}".format(os.getcwd()))
CWD = os.getcwd()
os.chdir(path)
log("inside {0}".format(os.getcwd()))
try:
yield
finally:
os.chdir(CWD)
log("finally inside {0}".format(os.getcwd()))
def rmse(data, targets):
"""calculate rmse with target values
# Todo : write de description of the args
Args:
data:
targets:
"""
y = piecewise(data)
predictions = y
error = np.sqrt(np.mean((predictions - targets) ** 2))
return error
def piecewise(data):
"""returns a piecewise function from an array of the form [hour1, hour2,
..., value1, value2, ...]
# Todo : write de description of the args
Args:
data:
"""
nb = int(len(data) / 2)
bins = data[0:nb]
sf = data[nb:]
x = np.linspace(0, 8760, 8760)
# build condition array
conds = [x < bins[0]]
conds.extend([np.logical_and(x >= i, x < j) for i, j in zip(bins[0:], bins[1:])])
# build function array. This is the value of y when the condition is met.
funcs = sf
y = np.piecewise(x, conds, funcs)
return y
def checkStr(datafile, string, begin_line=0):
"""Find the first occurrence of a string and return its line number
Returns: the list index containing the string
Args:
datafile (list-like): a list-like object
string (str): the string to find in the txt file
"""
value = []
count = 0
for line in datafile:
if count < begin_line:
count += 1
continue
count += 1
match = re.search(string, str(line))
if match:
return count
break
def write_lines(file_path, lines):
"""Delete file if exists, then write lines in it
Args:
file_path (str): path of the file
lines (list of str): lines to be written in file
"""
# Delete temp file if exists
if os.path.exists(file_path):
os.remove(file_path)
# Save lines in temp file
temp_idf_file = open(file_path, "w+")
for line in lines:
temp_idf_file.write("%s" % line)
temp_idf_file.close()
def check_unique_name(first_letters, count, name, unique_list, suffix=False):
"""Making sure new_name does not already exist
Args:
first_letters (str): string at the beginning of the name, giving a hint
on what the variable is.
count (int): increment to create a unique id in the name.
name (str): name that was just created. To be verified that it is unique
in this function.
unique_list (list): list where unique names are stored.
suffix (bool):
Returns:
new_name (str): name that is unique
"""
if suffix:
while name in unique_list:
count += 1
end_count = "%03d" % count
name = name[:-3] + end_count
else:
while name in unique_list:
count += 1
end_count = "%06d" % count
name = first_letters + "_" + end_count
return name, count
def angle(v1, v2, acute=True):
"""Calculate the angle between 2 vectors
Args:
v1 (Vector3D): vector 1
v2 (Vector3D): vector 2
acute (bool): If True, give the acute angle, else gives the obtuse one.
Returns:
angle (float): angle between the 2 vectors in degree
"""
angle = np.arccos(np.dot(v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2)))
if acute == True:
return angle
else:
return 2 * np.pi - angle
def float_round(num, n):
"""Makes sure a variable is a float and round it at "n" decimals
Args:
num (str, int, float): number we want to make sure is a float
n (int): number of decimals
Returns:
num (float): a float rounded number
"""
num = float(num)
num = round(num, n)
return num
def get_eplus_dirs(version=ep_version):
"""Returns EnergyPlus root folder for a specific version.
Returns (Path): The folder path.
Args:
version (str): Version number in the form "9-2-0" to search for.
"""
from eppy.runner.run_functions import install_paths
eplus_exe, eplus_weather = install_paths(version)
return Path(eplus_exe).dirname()
def warn_if_not_compatible():
"""Checks if an EnergyPlus install is detected. If the latest version
detected is higher than the one specified by translater, a warning is also
raised.
"""
eplus_homes = get_eplus_basedirs()
if not eplus_homes:
warnings.warn(
"No installation of EnergyPlus could be detected on this "
"machine. Please install EnergyPlus from https://energyplus.net before using translater"
)
if len(eplus_homes) > 1:
# more than one installs
warnings.warn(
"There are more than one versions of EnergyPlus on this machine. Make "
"sure you provide the appropriate version number when possible. "
)
def get_eplus_basedirs():
"""Returns a list of possible E+ install paths"""
if platform.system() == "Windows":
eplus_homes = Path("C:\\").glob("EnergyPlusV*")
return eplus_homes
elif platform.system() == "Linux":
eplus_homes = Path("/usr/local/").glob("EnergyPlus-*")
return eplus_homes
elif platform.system() == "Darwin":
eplus_homes = Path("/Applications").glob("EnergyPlus-*")
return eplus_homes
else:
warnings.warn(
"translater is not compatible with %s. It is only compatible "
"with Windows, Linux or MacOs" % platform.system()
)
def timeit(method):
"""Use this method as a decorator on a function to calculate the time it
take to complete. Uses the :func:`log` method.
Examples:
>>> @timeit
>>> def myfunc():
>>> return 'is a function'
Args:
method (function): A function.
"""
def timed(*args, **kwargs):
ts = time.time()
log("Executing %r..." % method.__qualname__)
result = method(*args, **kwargs)
te = time.time()
tt = te - ts
try:
try:
name = result.Name
except:
name = result.__qualname__
except:
name = str(result)
if tt > 0.001:
log("Completed %r for %r in %.3f s" % (method.__qualname__, name, tt))
else:
log(
"Completed %r for %r in %.3f ms"
% (method.__qualname__, name, tt * 1000)
)
return result
return timed
def lcm(x, y):
"""This function takes two integers and returns the L.C.M.
Args:
x:
y:
"""
# choose the greater number
if x > y:
greater = x
else:
greater = y
while True:
if (greater % x == 0) and (greater % y == 0):
lcm = greater
break
greater += 1
return lcm
def _unpack_tuple(x):
"""Unpacks one-element tuples for use as return values
Args:
x:
"""
if len(x) == 1:
return x[0]
else:
return x
def recursive_len(item):
"""Calculate the number of elements in nested list
Args:
item (list): list of lists (i.e. nested list)
Returns:
Total number of elements in nested list
"""
if type(item) == list:
return sum(recursive_len(subitem) for subitem in item)
else:
return 1
def rotate(l, n):
"""Shift list elements to the left
Args:
l (list): list to rotate
n (int): number to shift list to the left
Returns:
list: shifted list.
"""
return l[n:] + l[:n]
def parallel_process(in_dict, function, processors=-1, use_kwargs=True):
"""A parallel version of the map function with a progress btr.
Examples:
>>> import translater as tr
>>> files = ['tests/input_data/problematic/nat_ventilation_SAMPLE0.idf',
>>> 'tests/input_data/regular/5ZoneNightVent1.idf']
>>> wf = 'tests/input_data/CAN_PQ_Montreal.Intl.AP.716270_CWEC.epw'
>>> files = tr.copy_file(files)
>>> rundict = {file: dict(eplus_file=file, weather_file=wf,
>>> ep_version=ep_version, annual=True,
>>> prep_outputs=True, expandobjects=True,
>>> verbose='q', output_report='sql')
>>> for file in files}
>>> result = parallel_process(rundict, tr.run_eplus, use_kwargs=True)
Args:
in_dict (dict-like): A dictionary to iterate over.
function (function): A python function to apply to the elements of
in_dict
processors (int): The number of cores to use
use_kwargs (bool): If True, pass the kwargs as arguments to `function` .
Returns:
[function(array[0]), function(array[1]), ...]
"""
from tqdm import tqdm
from concurrent.futures import ProcessPoolExecutor, as_completed
if processors == -1:
processors = min(len(in_dict), multiprocessing.cpu_count())
if processors == 1:
kwargs = {
"desc": function.__name__,
"total": len(in_dict),
"unit": "runs",
"unit_scale": True,
"leave": True,
}
if use_kwargs:
futures = {a: function(**in_dict[a]) for a in tqdm(in_dict, **kwargs)}
else:
futures = {a: function(in_dict[a]) for a in tqdm(in_dict, **kwargs)}
else:
with ProcessPoolExecutor(max_workers=processors) as pool:
if use_kwargs:
futures = {pool.submit(function, **in_dict[a]): a for a in in_dict}
else:
futures = {pool.submit(function, in_dict[a]): a for a in in_dict}
kwargs = {
"desc": function.__name__,
"total": len(futures),
"unit": "runs",
"unit_scale": True,
"leave": True,
}
# Print out the progress as tasks complete
for f in tqdm(as_completed(futures), **kwargs):
pass
out = {}
# Get the results from the futures.
for key in futures:
try:
if processors > 1:
out[futures[key]] = key.result()
else:
out[key] = futures[key]
except Exception as e:
log(str(e), lg.ERROR)
out[futures[key]] = e
return out