import os
import re
import sys
import yaml
from loguru import logger
import esm_parser
YAML_AUTO_EXTENSIONS = ["", ".yml", ".yaml", ".YML", ".YAML"]
class EsmConfigFileError(Exception):
"""
Exception for yaml file containing tabs or other syntax issues.
An exception used when yaml.load() throws a yaml.scanner.ScannerError.
This error occurs mainly when there are tabs inside a yaml file or
when the syntax is incorrect. If tabs are found, this exception returns
a user-friendly message indicating where the tabs are located in the
yaml file.
Parameters
----------
fpath : str
Path to the yaml file
"""
def __init__(self, fpath, yaml_error):
report = ""
# Loop through the lines inside the yaml file searching for tabs
with open(fpath) as yaml_file:
for n, line in enumerate(yaml_file):
# Save lines and line numbers with tabs
if "\t" in line:
report += str(n) + ":" + line.replace("\t", "____") + "\n"
# Message to return
if len(report) == 0:
# If no tabs are found print the original error message
print("\n\n\n" + yaml_error)
else:
# If tabs are found print the report
self.message = (
"\n\n\n"
f"Your file {fpath} has tabs, please use ONLY spaces!\n"
"Tabs are in following lines:\n" + report
)
super().__init__(self.message)
# This next part is stolen here:
# https://medium.com/swlh/python-yaml-configuration-with-environment-variables-parsing-77930f4273ac
[docs]def create_env_loader(tag="!ENV", loader=yaml.SafeLoader):
# pattern for global vars: look for ${word}
pattern = re.compile('\${(\w+)}')
# the tag will be used to mark where to start searching for the pattern
# e.g. somekey: !ENV somestring${MYENVVAR}blah blah blah
loader.add_implicit_resolver(tag, pattern, None)
loader.env_variables = []
def constructor_env_variables(loader, node):
"""
Extracts the environment variable from the node's value
:param yaml.Loader loader: the yaml loader
:param node: the current node in the yaml
:return: the parsed string that contains the value of the environment
variable
"""
value = loader.construct_scalar(node)
match = pattern.findall(value) # to find all env variables in line
if match:
full_value = value
for g in match:
full_value = full_value.replace(
f'${{{g}}}', os.environ.get(g, g)
)
loader.env_variables.append((g, full_value))
return full_value
return value
loader.add_constructor(tag, constructor_env_variables)
return loader
[docs]def yaml_file_to_dict(filepath):
"""
Given a yaml file, returns a corresponding dictionary.
If you do not give an extension, tries again after appending one.
It raises an EsmConfigFileError exception if yaml files contain tabs.
Parameters
----------
filepath : str
Where to get the YAML file from
Returns
-------
dict
A dictionary representation of the yaml file.
Raises
------
EsmConfigFileError
Raised when YAML file contains tabs or other syntax issues.
FileNotFoundError
Raised when the YAML file cannot be found and all extensions have been tried.
"""
loader = create_env_loader()
for extension in YAML_AUTO_EXTENSIONS:
try:
with open(filepath + extension) as yaml_file:
# Check for duplicates
check_duplicates(yaml_file)
# Back to the beginning of the file
yaml_file.seek(0, 0)
# Actually load the file
yaml_load = yaml.load(yaml_file, Loader=loader) # yaml.FullLoader)
# Check for incompatible ``_changes`` (no more than one ``_changes``
# type should be accessible simultaneously)
check_changes_duplicates(yaml_load, filepath + extension)
# Add the file name you loaded from to track it back:
yaml_load["debug_info"] = {"loaded_from_file": yaml_file.name}
if loader.env_variables:
runtime_env_changes = yaml_load.get("general", {}).get("runtime_environment_changes", {})
add_export_vars = runtime_env_changes.get("add_export_vars", [])
for env_var_name, env_var_value in loader.env_variables:
add_export_vars.append(f"{env_var_name}={env_var_value}")
# TODO(PG): There is probably a more elegant way of doing this:
yaml_load['general'] = yaml_load.get("general") or {}
yaml_load['general']['runtime_environment_changes'] = yaml_load['general'].get('runtime_environment_changes') or {}
yaml_load['general']['runtime_environment_changes']['add_export_vars'] = add_export_vars
return yaml_load
except IOError as error:
logger.debug(
"IOError (%s) File not found with %s, trying another extension pattern.",
error.errno,
filepath + extension,
)
except yaml.scanner.ScannerError as yaml_error:
logger.debug(
"Your file %s has syntax issues!", filepath + extension,
)
raise EsmConfigFileError(filepath + extension, yaml_error)
except Exception as error:
print("Something else went wrong")
print(f"Serious issue with {filepath}, goodbye...")
logger.exception(error)
sys.exit()
raise FileNotFoundError(
"All file extensions tried and none worked for %s" % filepath
)
[docs]def check_changes_duplicates(yamldict_all, fpath):
"""
Checks for duplicates and conflicting ``_changes`` and ``add_``:
1. Finds variables containing ``_changes`` (but excluding ``add_``) and checks
if they are compatible with the same ``_changes`` inside the same file. If they
are not compatible returns an error where the conflicting variable paths are
specified. More than one ``_changes`` type in a file are allowed but they need
to be part of the same ``_choose`` and not be accessible simultaneously in any
situation.
2. Checks if there is any variable containing ``add_`` in the main sections of
a file and labels it as incompatible if the same variable is found inside a
``choose_`` block. ``add_<variable>``s are compatible as long as they are inside
``choose_`` blocks, but if you want to include something as a default, please just
do it inside the ``<variable>``.
.. warning:: ``add_<variable>``s are not checked for incompatibility when they
are included inside ``choose_`` blocks. Merging of these ``add_<variable>``s
is done using ``deep_update``, meaning that the merge is arbitrary (i.e. if
two ``choose_`` blocks are modifying the same variable using ``add_``, the
final value would be decided arbitrarily). It is up to the developer/user to
make good use of ``add_``s inside ``choose_`` blocks.
Parameters
----------
yamldict_all : dict
Dictionary read from the yaml file
fpath : str
Path to the yaml file
"""
changes_note = (
"Note that if there are more than one ``_changes`` in the "
"file, they need to be placed inside different cases of the "
"same ``choose`` and these options need to be compatible "
"(only one ``_changes`` can be reached at a time).\n"
"Use ``add_<variable>_changes`` if you want to add/overwrite "
"variables inside the main ``_changes``."
)
add_note = (
"Note that multiple ``add_<variable>`` in a single file are compatible "
"as long as they are included inside ``choose_`` blocks. An "
"``add_<variable>`` out of a ``choose_`` block and the same "
"``add_<variable>`` inside of a ``choose_`` block are considered "
"incompatible. If the general ``add_<variable>`` should be added "
"as a default, please include it to ``<variable>`` instead."
)
# If it is a couple setup, check for ``_changes`` duplicates separately for each component
if "general" not in yamldict_all:
yamldict_all = {"main": yamldict_all}
# Loop through the components or main
for yamldict in yamldict_all.values():
# Check if any <variable>_changes or add_<variable> exists, if not, return
# Perform the check only for the dictionary objects
if isinstance(yamldict, dict):
changes_list = esm_parser.find_key(
yamldict, "_changes", "add_",paths2finds = [], sep=","
)
add_list = esm_parser.find_key(yamldict, ["add_"], "",paths2finds = [], sep=",")
if (len(changes_list) + len(add_list)) == 0:
continue
# Find ``_changes`` types
changes_types = set(
[y for x in changes_list for y in x.split(",") if "_changes" in y]
)
# Find ``add_`` types
add_types = set([y for x in add_list for y in x.split(",") if "add_" in y])
# Define ``_changes`` groups
changes_groups = []
for change_type in changes_types:
changes_groups.append(
[x for x in changes_list if change_type == x.split(",")[-1]]
)
# Define ``add_`` groups
add_groups = []
for add_type in add_types:
add_groups.append([x for x in add_list if add_type == x.split(",")[-1]])
# Loop through the different ``_changes`` groups
for changes_group in changes_groups:
# Check for ``_changes`` without ``choose_``, "there can be only one"
changes_no_choose = [x for x in changes_group if "choose_" not in x]
# If more than one ``_changes`` without ``choose_`` return error
if len(changes_no_choose) > 1:
changes_no_choose = [x.replace(",",".") for x in changes_no_choose]
esm_parser.user_error("YAML syntax",
"More than one ``_changes`` out of a ``choose_``in "
+ fpath + ":\n - " + "\n - ".join(changes_no_choose) +
"\n" + changes_note + "\n\n")
# If only one ``_changes`` without ``choose_`` check for ``_changes`` inside
# ``choose_`` and return error if any is found
elif len(changes_no_choose) == 1:
changes_group.remove(changes_no_choose[0])
if len(changes_group) > 0:
changes_group = [x.replace(",",".") for x in changes_group]
esm_parser.user_error("YAML syntax",
"The general ``" + changes_no_choose[0] +
"`` and ``_changes`` in ``choose_`` are not compatible in "
+ fpath + ":\n - " +
"\n - ".join(changes_group) + "\n" +
"\n" + changes_note + "\n\n")
# If you reach this point all ``_changes`` are inside
# some number of ``choose_`` (there are no ``_changes``
# outside of a ``choose_``)
# Check for incompatible ``_changes`` inside ``choose_``:
# Split the path of the variables
changes_group_split = [x.split(",") for x in changes_group]
# Loop through the paths of the ``_changes`` in the group
for count, changes in enumerate(changes_group_split):
# Find the path of the last ``choose_`` in ``changes`` and
# its case
path2choose, case = find_last_choose(changes)
# Loop through the changes following the current one
for other_changes in changes_group_split[count+1:]:
# Find the path of the last ``choose_`` in
# ``other_changes`` and its case
sub_path2choose, sub_case = find_last_choose(other_changes)
# If one ``choose_`` is contained into the other
# find the common ``choose_`` and compare the cases.
# If the case is the same, duplicates exist and error
# is returned (i.e. choose_lresume.True.namelist_changes
# and choose_lresume.True.choose_another_switch
# False.namelist_changes)
if path2choose in sub_path2choose or sub_path2choose in path2choose:
if path2choose in sub_path2choose:
sub_case = sub_path2choose.replace(path2choose + ",", "") \
.split(",")[0]
elif sub_path2choose in path2choose:
case = path2choose.replace(sub_path2choose + ",", "") \
.split(",")[0]
if case == sub_case:
esm_parser.user_error("YAML syntax",
"The following ``_changes`` can be accessed " +
"simultaneously in " + fpath + ":\n" +
" - " + ".".join(changes) + "\n" +
" - " + ".".join(other_changes) + "\n" +
"\n" + changes_note + "\n\n")
else:
# If these ``choose_`` are different they can be accessed
# simultaneously, then it returns an error
esm_parser.user_error("YAML syntax",
"\The following ``_changes`` can be accessed " +
"simultaneously in " + fpath + ":\n" +
" - " + ".".join(changes) + "\n" +
" - " + ".".join(other_changes) + "\n" +
"\n" + changes_note + "\n\n")
# Loop through the different ``add_`` groups
for add_group in add_groups:
# Count ``add_`` occurrences out of a ``choose_``
add_no_choose = [x for x in add_group if "choose_" not in x]
# If one ``add_`` without ``choose_`` check for ``add_`` inside
# ``choose_`` and return error if any is found (incompatible ``add_``s)
if len(add_no_choose) == 1:
add_group.remove(add_no_choose[0])
if len(add_group) > 0:
add_group = [x.replace(",", ".") for x in add_group]
esm_parser.user_error(
"YAML syntax",
"The general ``"
+ add_no_choose[0]
+ "`` and ``add_`` in ``choose_`` are not compatible in "
+ fpath
+ ":\n - "
+ "\n - ".join(add_group)
+ "\n\n"
+ add_note
+ "\n\n",
)
[docs]def find_last_choose(var_path):
"""
Locates the last ``choose_`` on a string containing the path to a
variable separated by ",", and returns the path to the ``choose_``
(also separated by ",") and the case that follows the ``choose_``.
Parameters
----------
var_path : str
String containing the path to the last ``choose_`` separated by
",".
Returns
-------
path2choose : str
Path to the last ``choose_``.
case : str
Case after the choose.
"""
# Find the last ``choose_``
last_choose = [x for x in var_path if "choose_" in x][-1]
# Find the last ``choose_`` index
choose_index = var_path.index(last_choose)
# Defines the path to the last ``choose_``
path2choose = ",".join(var_path[:var_path.index(last_choose)+1])
# Defines the case of the last ``choose_``
case = var_path[choose_index+1]
return path2choose, case
[docs]def check_duplicates(src):
"""
Checks that there are no duplicates in a yaml file, and if there are
returns an error stating which key is repeated and in which file the
duplication occurs.
Parameters
----------
src : object
Source file object
Exceptions
----------
ConstructorError
If duplicated keys are found, returns an error
"""
class PreserveDuplicatesLoader(yaml.loader.Loader):
# We deliberately define a fresh class inside the function,
# because add_constructor is a class method and we don't want to
# mutate pyyaml classes.
pass
def map_constructor(loader, node, deep=False):
"""
Mapping, finds any duplicate keys.
"""
mapping = {}
for key_node, value_node in node.value:
key = loader.construct_object(key_node, deep=deep)
value = loader.construct_object(value_node, deep=deep)
if key in mapping:
esm_parser.user_error("Duplicated variables",
"Key ``{0}`` is duplicated {1}\n\n"
.format(key, str(key_node.start_mark).replace(" ","").split(",")[0]))
mapping[key] = value
return loader.construct_mapping(node, deep)
PreserveDuplicatesLoader.add_constructor(yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG, map_constructor)
new_loader = create_env_loader(loader=PreserveDuplicatesLoader)
return yaml.load(src, Loader=new_loader)
[docs]class EsmConfigFileError(Exception):
"""
Exception for yaml file containing tabs or other syntax issues.
An exception used when yaml.load() throws a yaml.scanner.ScannerError.
This error occurs mainly when there are tabs inside a yaml file or
when the syntax is incorrect. If tabs are found, this exception returns
a user-friendly message indicating where the tabs are located in the
yaml file.
Parameters
----------
fpath : str
Path to the yaml file
"""
def __init__(self, fpath, yaml_error):
report = ""
# Loop through the lines inside the yaml file searching for tabs
with open(fpath) as yaml_file:
for n, line in enumerate(yaml_file):
# Save lines and line numbers with tabs
if "\t" in line:
report += str(n) + ":" + line.replace("\t", "____") + "\n"
# Message to return
if len(report) == 0:
# If no tabs are found print the original error message
print("\n\n\n" + yaml_error)
else:
# If tabs are found print the report
self.message = "\n\n\n" \
f"Your file {fpath} has tabs, please use ONLY spaces!\n" \
"Tabs are in following lines:\n" + report
super().__init__(self.message)