Source code for esm_utilities.utils

import pandas as pd
import io
import datetime
import numpy as np
import semver

import esm_tools


[docs]def logfile_stats(logfile_to_read): with open(logfile_to_read, "r") as f: table = f.read() df = pd.read_table( io.StringIO(table), sep=" : | -", engine="python", names=["Date", "Stuff", "State"], ) # Remove commented rows df = df[~df.Stuff.str.startswith("#")] # Clean up Date df["Date"] = pd.to_datetime(df.Date, errors="coerce") # Clean up Stuff ## Split columns df[["Jobtype", "Job Number", "Job Date", "Job PID"]] = df.Stuff.str.split( " ", expand=True ) del df["Stuff"] ## Fix types df["Jobtype"] = df.Jobtype.str.strip() df["Job Number"] = df["Job Number"].astype(int) df["Job Date"] = pd.to_datetime(df["Job Date"]) df["Job PID"] = df["Job PID"].astype(int) # Clean up State df["State"] = df.State.str.strip() while "done" not in df.iloc[-1]["State"]: df = df.head(-1) gb = df.groupby(["Job Number", "Jobtype"]) compute_submit_to_start = [] compute_start_to_end = [] tidy_start_to_end = [] queue_time_list = [] for (jobnumber, jobtype), subdf in gb: if jobnumber > 1: prev_subdf = gb.get_group((jobnumber - 1, jobtype)) prev_compute = gb.get_group((jobnumber - 1, "compute")) prev_tidy = gb.get_group((jobnumber - 1, "tidy_and_resubmit")) if "compute" in jobtype: submit = subdf.Date[subdf.State == "submitted"].values[0].astimezone(None) end = subdf.Date[subdf.State == "done"].values[0].astimezone(None) start = subdf.Date[subdf.State == "start"].values[0].astimezone(None) compute_submit_to_start.append((jobnumber, start - submit)) compute_start_to_end.append((jobnumber, end - start)) if jobnumber > 1: queue_time = start - prev_compute.Date[ prev_compute.State == "done" ].values[0].astimezone(None) else: queue_time = None queue_time_list.append((jobnumber, queue_time)) if "tidy" in jobtype: end = subdf.Date[subdf.State == "done"].values[0].astimezone(None) start = subdf.Date[subdf.State == "start"].values[0].astimezone(None) tidy_start_to_end.append((jobnumber, end - start)) queue_time_df = ( pd.DataFrame(queue_time_list, columns=["Job Number", "Queue Time"]) .set_index("Job Number") .transpose() ) compute_start_to_end_df = ( pd.DataFrame(compute_start_to_end, columns=["Job Number", "Compute"]) .set_index("Job Number") .transpose() ) compute_submit_to_start_df = ( pd.DataFrame(compute_submit_to_start, columns=["Job Number", "Submit"]) .set_index("Job Number") .transpose() ) tidy_df = ( pd.DataFrame(tidy_start_to_end, columns=["Job Number", "Tidy"]) .set_index("Job Number") .transpose() ) df = pd.concat( [queue_time_df, compute_submit_to_start_df, compute_start_to_end_df, tidy_df] ) df = df.transpose() print(df) print(df.mean()) one_day = datetime.timedelta(1) throughput = one_day / df["Compute"][1:].mean() print( f"Theoretical Throughput assuming no queueing time: {np.round(throughput, 2)} runs per day" ) return df
[docs]def check_valid_version(versionrange, version=""): """ Returns ``True`` if the ``version`` provided matches the condition of ``versionrange``. Parameters ---------- version : str String specifying the version number with the format ``X.Y.Z``. versionrange : str Condition for the version range, expressed as a comparison operator followed by a version number in the format ``X.Y.Z``. Returns ------- True, False : bool ``True`` if the condition is met, ``False`` if not. """ if not version: version = esm_tools.__version__ version = semver.VersionInfo.parse(version) if versionrange.startswith("<="): operator = version.__le__ other_version = semver.VersionInfo.parse( versionrange.replace("<=", "").strip() ) if versionrange.startswith("<"): operator = version.__lt__ other_version = semver.VersionInfo.parse( versionrange.replace("<", "").strip() ) elif versionrange.startswith(">="): operator = version.__ge__ other_version = semver.VersionInfo.parse( versionrange.replace(">=", "").strip() ) elif versionrange.startswith(">"): operator = version.__gt__ other_version = semver.VersionInfo.parse( versionrange.replace(">", "").strip() ) elif versionrange.startswith("=="): operator = version.__eq__ other_version = semver.VersionInfo.parse( versionrange.replace("==", "").strip() ) elif versionrange.startswith("!="): operator = version.__ne__ other_version = semver.VersionInfo.parse( versionrange.replace("!=", "").strip() ) else: raise MessageOfTheDayError( f"Unknown version range specified: {versionrange}" ) return operator(other_version)