Run Singularity images

The script below shows how to run Singularity planner images using Downward Lab.

../examples/singularity/singularity-exp.py
#! /usr/bin/env python

"""
Example experiment for running Singularity planner images.

The time and memory limits set with Lab can be circumvented by solvers
that fork child processes. Their resource usage is not checked. If you're
running solvers that don't check their resource usage like Fast Downward,
we recommend using cgroups or the "runsolver" tool to enforce resource
limits. Since setting time limits for solvers with cgroups is difficult,
the experiment below uses the "runsolver" tool, which has been used in
multiple SAT competitions to enforce resource limits. For the experiment
to run, the runsolver binary needs to be on the PATH. You can obtain a
runsolver copy from https://github.com/jendrikseipp/runsolver.

A note on running Singularity on clusters: reading large Singularity files
over the network is not optimal, so we recommend copying the images to a
local filesystem (e.g., /tmp/) before running experiments.
"""

import os
from pathlib import Path
import platform
import sys

from downward import suites
from downward.reports.absolute import AbsoluteReport
from lab.environments import BaselSlurmEnvironment, LocalEnvironment
from lab.experiment import Experiment


# Create custom report class with suitable info and error attributes.
class BaseReport(AbsoluteReport):
    INFO_ATTRIBUTES = []
    ERROR_ATTRIBUTES = [
        "domain",
        "problem",
        "algorithm",
        "unexplained_errors",
        "error",
        "node",
    ]


NODE = platform.node()
RUNNING_ON_CLUSTER = NODE.endswith((".scicore.unibas.ch", ".cluster.bc2.ch"))
DIR = Path(__file__).resolve().parent
REPO = DIR.parent
IMAGES_DIR = Path(os.environ["SINGULARITY_IMAGES"])
assert IMAGES_DIR.is_dir(), IMAGES_DIR
BENCHMARKS_DIR = os.environ["DOWNWARD_BENCHMARKS"]
MEMORY_LIMIT = 3584  # MiB
if RUNNING_ON_CLUSTER:
    SUITE = ["depot", "freecell", "gripper", "zenotravel"]
    ENVIRONMENT = BaselSlurmEnvironment(
        partition="infai_1",
        email="my.name@unibas.ch",
        memory_per_cpu="3872M",
        export=["PATH"],
        setup=BaselSlurmEnvironment.DEFAULT_SETUP,
        # Until recently, we had to load the Singularity module here
        # by adding "module load Singularity/2.6.1 2> /dev/null".
    )
    TIME_LIMIT = 1800
else:
    SUITE = ["depot:p01.pddl", "gripper:prob01.pddl", "mystery:prob07.pddl"]
    ENVIRONMENT = LocalEnvironment(processes=2)
    TIME_LIMIT = 5

ATTRIBUTES = [
    "cost",
    "coverage",
    "error",
    "g_values_over_time",
    "run_dir",
    "raw_memory",
    "runtime",
    "virtual_memory",
]

exp = Experiment(environment=ENVIRONMENT)
exp.add_step("build", exp.build)
exp.add_step("start", exp.start_runs)
exp.add_fetcher(name="fetch")
exp.add_parser(DIR / "singularity-parser.py")


def get_image(name):
    planner = name.replace("-", "_")
    image = IMAGES_DIR / (name + ".img")
    assert image.is_file(), image
    return planner, image


IMAGES = [get_image("fd1906-lama-first")]

for planner, image in IMAGES:
    exp.add_resource(planner, image, symlink=True)

exp.add_resource("run_singularity", DIR / "run-singularity.sh")
exp.add_resource("filter_stderr", DIR / "filter-stderr.py")

for planner, _ in IMAGES:
    for task in suites.build_suite(BENCHMARKS_DIR, SUITE):
        run = exp.add_run()
        run.add_resource("domain", task.domain_file, "domain.pddl")
        run.add_resource("problem", task.problem_file, "problem.pddl")
        # Use runsolver to limit time and memory. It must be on the system
        # PATH. Important: we cannot use time_limit and memory_limit of
        # Lab's add_command() because setting the same memory limit with
        # runsolver again using setrlimit fails.
        run.add_command(
            "run-planner",
            [
                "runsolver",
                "-C",
                TIME_LIMIT,
                "-V",
                MEMORY_LIMIT,
                "-w",
                "watch.log",
                "-v",
                "values.log",
                "{run_singularity}",
                f"{{{planner}}}",
                "{domain}",
                "{problem}",
                "sas_plan",
            ],
        )
        # Remove temporary files from old Fast Downward versions.
        run.add_command("rm-tmp-files", ["rm", "-f", "output.sas", "output"])
        run.add_command("filter-stderr", [sys.executable, "{filter_stderr}"])

        run.set_property("domain", task.domain)
        run.set_property("problem", task.problem)
        run.set_property("algorithm", planner)
        run.set_property("id", [planner, task.domain, task.problem])

report = Path(exp.eval_dir) / f"{exp.name}.html"
exp.add_report(BaseReport(attributes=ATTRIBUTES), outfile=report)

exp.run_steps()

The experiment script needs a parser and a helper script:

../examples/singularity/singularity-parser.py
#! /usr/bin/env python

import re
import sys

from lab.parser import Parser


def coverage(content, props):
    props["coverage"] = int("cost" in props)


def unsolvable(content, props):
    # Note that this naive test may easily generate false positives.
    props["unsolvable"] = int(
        not props["coverage"]
        and "Completely explored state space -- no solution!" in content
    )


def parse_g_value_over_time(content, props):
    """Example line: "[g=6, 16 evaluated, 15 expanded, t=0.00328561s, 22300 KB]" """
    matches = re.findall(
        r"\[g=(\d+), \d+ evaluated, \d+ expanded, t=(.+)s, \d+ KB\]\n", content
    )
    props["g_values_over_time"] = [(float(t), int(g)) for g, t in matches]


def set_outcome(content, props):
    lines = content.splitlines()
    solved = props["coverage"]
    unsolvable = props["unsolvable"]
    out_of_time = int("TIMEOUT=true" in lines)
    out_of_memory = int("MEMOUT=true" in lines)
    # runsolver decides "out of time" based on CPU rather than (cumulated)
    # WCTIME.
    if (
        not solved
        and not unsolvable
        and not out_of_time
        and not out_of_memory
        and props["runtime"] > props["time_limit"]
    ):
        out_of_time = 1
    # In cases where CPU time is very slightly above the threshold so that
    # runsolver didn't kill the planner yet and the planner solved a task
    # just within the limit, runsolver will still record an "out of time".
    # We remove this record. This case also applies to iterative planners.
    # If such planners solve the task, we don't treat them as running out
    # of time.
    if (solved or unsolvable) and (out_of_time or out_of_memory):
        print("task solved however runsolver recorded an out_of_*")
        print(props)
        out_of_time = 0
        out_of_memory = 0

    if not solved and not unsolvable:
        props["runtime"] = None

    if solved ^ unsolvable ^ out_of_time ^ out_of_memory:
        if solved:
            props["error"] = "solved"
        elif unsolvable:
            props["error"] = "unsolvable"
        elif out_of_time:
            props["error"] = "out_of_time"
        elif out_of_memory:
            props["error"] = "out_of_memory"
    else:
        print(f"unexpected error: {props}", file=sys.stderr)
        props["error"] = "unexpected-error"


def main():
    print("Running singularity parser")
    parser = Parser()
    parser.add_pattern(
        "planner_exit_code",
        r"run-planner exit code: (.+)\n",
        type=int,
        file="driver.log",
        required=True,
    )
    parser.add_pattern(
        "node", r"node: (.+)\n", type=str, file="driver.log", required=True
    )
    parser.add_pattern(
        "planner_wall_clock_time",
        r"run-planner wall-clock time: (.+)s",
        type=float,
        file="driver.log",
        required=True,
    )
    parser.add_pattern("runtime", r"Singularity runtime: (.+?)s", type=float)
    parser.add_pattern(
        "time_limit",
        r"Enforcing CPUTime limit \(soft limit, will send "
        r"SIGTERM then SIGKILL\): (\d+) seconds",
        type=int,
        file="watch.log",
        required=True,
    )
    # Cumulative runtime and virtual memory of the solver and all child processes.
    parser.add_pattern(
        "runtime", r"WCTIME=(.+)", type=float, file="values.log", required=True
    )
    parser.add_pattern(
        "virtual_memory", r"MAXVM=(\d+)", type=int, file="values.log", required=True
    )
    parser.add_pattern("raw_memory", r"Peak memory: (\d+) KB", type=int)
    parser.add_pattern("cost", r"\nFinal value: (.+)\n", type=int)
    parser.add_function(coverage)
    parser.add_function(unsolvable)
    parser.add_function(parse_g_value_over_time)
    parser.add_function(set_outcome, file="values.log")
    parser.parse()


if __name__ == "__main__":
    main()
../examples/singularity/run-singularity.sh
#!/bin/bash

set -euo pipefail

if [[ $# != 4 ]]; then
    echo "usage: $(basename "$0") image domain_file problem_file plan_file" 1>&2
    exit 2
fi

if [ -f $PWD/$4 ]; then
    echo "Error: remove $PWD/$4" 1>&2
    exit 2
fi

# Ensure that strings like "CPU time limit exceeded" and "Killed" are in English.
export LANG=C

set +e
singularity run -C -H "$PWD" "$1" "$PWD/$2" "$PWD/$3" "$4"
set -e

printf "\nRun VAL\n\n"

if [ -f $PWD/$4 ]; then
    echo "Found plan file."
    validate -v "$PWD/$2" "$PWD/$3" "$PWD/$4"
    exit 0
else
    echo "No plan file."
    validate -v "$PWD/$2" "$PWD/$3"
    exit 99
fi