"""
To parse logs or generated files, you can use the ``Parser`` class. Here is an
example parser for the FF planner:
.. literalinclude:: ../examples/ff/ff_parser.py
:caption:
You can add a parser to all runs with :meth:`add_parser()
<lab.experiment.Experiment.add_parser>`:
>>> from pathlib import Path
>>> from lab.experiment import Experiment
>>> parser = Parser()
>>> parser.add_pattern("exitcode", "retcode: (.+)\\n", type=int, file="run.log")
>>> exp = Experiment()
>>> exp.add_parser(parser)
Parsers are run in the order in which they were added.
"""
import logging
import os
import re
from collections import defaultdict
from pathlib import Path
from lab import tools
def _get_pattern_flags(s):
flags = 0
for char in s:
try:
flags |= getattr(re, char)
except AttributeError:
raise ValueError(f"Unknown pattern flag: {char}") from None
return flags
class _Function:
def __init__(self, function, filename):
self.function = function
self.filename = filename
class _Pattern:
def __init__(self, attribute, regex, required, type_, flags):
self.attribute = attribute
self.type_ = type_
self.required = required
self.group = 1
flags = _get_pattern_flags(flags)
self.regex = re.compile(regex, flags)
def search(self, filename, content, props):
found_props = {}
match = self.regex.search(content)
if match:
try:
value = match.group(self.group)
except IndexError:
tools.add_unexplained_error(
props,
f"Attribute {self.attribute} not found for pattern {self} in "
f"file {filename}.",
)
else:
value = self.type_(value)
found_props[self.attribute] = value
elif self.required:
tools.add_unexplained_error(
props, f'Pattern "{self}" not found in {filename}'
)
return found_props
def __str__(self):
return self.regex.pattern
class _FileParser:
"""
Private class that searches a given file for the added patterns.
"""
def __init__(self):
self.patterns = []
def add_pattern(self, pattern):
self.patterns.append(pattern)
def search_patterns(self, filename, content, props):
for pattern in self.patterns:
props.update(pattern.search(filename, content, props))
[docs]
class Parser:
"""
Parse logs or files in a given directory and write results into the
``properties`` file.
"""
def __init__(self):
self.file_parsers = defaultdict(_FileParser)
self.functions = []
[docs]
def add_pattern(
self, attribute, regex, file="run.log", type=int, flags="", required=False
):
r"""
Look for *regex* in *file*, cast what is found in brackets to
*type* and store it in the properties dictionary under
*attribute*. During parsing roughly the following code will be
executed::
contents = open(file).read()
match = re.compile(regex).search(contents)
properties[attribute] = type(match.group(1))
*flags* must be a string of Python regular expression flags (see
https://docs.python.org/3/library/re.html). E.g., ``flags="M"``
lets "^" and "$" match at the beginning and end of each line,
respectively.
If *required* is True and the pattern is not found in *file*,
an error message is printed to stderr.
>>> parser = Parser()
>>> parser.add_pattern("facts", r"Facts: (\d+)", type=int)
"""
if type == bool:
logging.warning(
"Casting any non-empty string to boolean will always "
"evaluate to true. Are you sure you want to use type=bool?"
)
self.file_parsers[file].add_pattern(
_Pattern(attribute, regex, required, type, flags)
)
[docs]
def add_function(self, function, file="run.log"):
r"""Call ``function(open(file).read(), properties)`` during parsing.
Functions are applied **after** all patterns have been
evaluated and in the order in which they are added to the parser.
The function is passed the file contents and the properties
dictionary. It must manipulate the passed properties
dictionary. The return value is ignored.
Example:
>>> import re
>>> from lab.parser import Parser
>>> def parse_states_over_time(content, props):
... matches = re.findall(r"(.+)s: (\d+) states\n", content)
... props["states_over_time"] = [(float(t), int(s)) for t, s in matches]
...
>>> parser = Parser()
>>> parser.add_function(parse_states_over_time)
You can use ``props.add_unexplained_error("message")`` when your
parsing function detects that something went wrong during the
run.
"""
self.functions.append(_Function(function, file))
[docs]
def parse(self, run_dir, props):
"""Search all patterns and apply all functions.
Add the found values to *props*.
"""
run_dir = Path(run_dir).resolve()
content_cache = {}
def get_content(path):
if path not in content_cache:
try:
content_cache[path] = path.read_text()
except FileNotFoundError:
content_cache[path] = None
return content_cache[path]
for filename, file_parser in self.file_parsers.items():
# If filename is absolute, path is set to filename.
path = run_dir / filename
content = get_content(path)
if content is None:
if any(pattern.required for pattern in file_parser.patterns):
tools.add_unexplained_error(
props, f'Required file "{path}" is missing.'
)
else:
file_parser.search_patterns(str(path), content, props)
for function in self.functions:
path = run_dir / function.filename
# Call function with empty string if file is missing.
content = get_content(path) or ""
# Run function in the run directory.
old_cwd = Path.cwd()
os.chdir(run_dir)
function.function(content, props)
os.chdir(old_cwd)