From 56e5b1175660ccd0835275efbbd7248dc814fc23 Mon Sep 17 00:00:00 2001
From: Gunnar Kreitz <gkreitz@kattis.com>
Date: Wed, 22 Apr 2026 13:47:01 +0200
Subject: [PATCH 1/2] Create a Diagnostics interface with a default
 implementation to extract logic from verifyproblem #398

---
 problemtools/diagnostics.py   | 153 ++++++++++++++++++++++++++++++++++
 problemtools/verifyproblem.py | 148 ++++++++++++--------------------
 tests/test_verify_hello.py    |  11 ++-
 3 files changed, 215 insertions(+), 97 deletions(-)
 create mode 100644 problemtools/diagnostics.py

diff --git a/problemtools/diagnostics.py b/problemtools/diagnostics.py
new file mode 100644
index 00000000..8ce2532a
--- /dev/null
+++ b/problemtools/diagnostics.py
@@ -0,0 +1,153 @@
+from __future__ import annotations
+
+import dataclasses
+import logging
+import sys
+from abc import ABC, abstractmethod
+from typing import NoReturn
+
+import colorlog
+
+
+class VerifyError(Exception):
+    pass
+
+
+class Diagnostics(ABC):
+    """Interface for emitting and recording verification diagnostics."""
+
+    @abstractmethod
+    def error(self, msg: str, additional_info: str | None = None) -> None: ...
+
+    @abstractmethod
+    def warning(self, msg: str, additional_info: str | None = None) -> None: ...
+
+    @abstractmethod
+    def info(self, msg: str) -> None: ...
+
+    @abstractmethod
+    def debug(self, msg: str) -> None: ...
+
+    @abstractmethod
+    def child(self, name: str) -> Diagnostics:
+        """Return a Diagnostics scoped to a named sub-component."""
+        ...
+
+    @property
+    @abstractmethod
+    def errors(self) -> int: ...
+
+    @property
+    @abstractmethod
+    def warnings(self) -> int: ...
+
+    def fatal(self, msg: str, additional_info: str | None = None) -> NoReturn:
+        """Report a fatal error and unconditionally stop verification."""
+        self.error(msg, additional_info)
+        raise VerifyError(msg)
+
+
+@dataclasses.dataclass
+class _Counts:
+    errors: int = 0
+    warnings: int = 0
+
+
+class LoggingDiagnostics(Diagnostics):
+    """Diagnostics implementation that emits messages via Python's logging module."""
+
+    def __init__(
+        self,
+        logger: logging.Logger,
+        counts: _Counts,
+        bail_on_error: bool,
+        warnings_as_errors: bool,
+        max_additional_info: int,
+    ) -> None:
+        self._log = logger
+        self._counts = counts
+        self._bail_on_error = bail_on_error
+        self._warnings_as_errors = warnings_as_errors
+        self._max_additional_info = max_additional_info
+
+    @classmethod
+    def create(
+        cls,
+        name: str,
+        log_level: int = logging.WARNING,
+        bail_on_error: bool = False,
+        warnings_as_errors: bool = False,
+        max_additional_info: int = 15,
+    ) -> LoggingDiagnostics:
+        """Create a root LoggingDiagnostics instance.
+
+        Args:
+            name: Logger name; becomes the root of the child logger hierarchy.
+            log_level: A logging level constant (e.g. logging.DEBUG, logging.WARNING).
+            bail_on_error: Raise VerifyError on the first error, rather than continuing.
+            warnings_as_errors: Treat warnings as errors.
+            max_additional_info: Maximum number of lines of additional context (e.g.
+                compiler output or validator feedback) to include when reporting an error
+                or warning. Set to 0 to suppress additional info entirely.
+        """
+        colorlog.basicConfig(
+            stream=sys.stdout,
+            format='%(log_color)s%(levelname)s %(message)s',
+            level=log_level,
+        )
+        return cls(
+            logger=logging.getLogger(name),
+            counts=_Counts(),
+            bail_on_error=bail_on_error,
+            warnings_as_errors=warnings_as_errors,
+            max_additional_info=max_additional_info,
+        )
+
+    def child(self, name: str) -> LoggingDiagnostics:
+        return LoggingDiagnostics(
+            logger=self._log.getChild(name),
+            counts=self._counts,
+            bail_on_error=self._bail_on_error,
+            warnings_as_errors=self._warnings_as_errors,
+            max_additional_info=self._max_additional_info,
+        )
+
+    @property
+    def errors(self) -> int:
+        return self._counts.errors
+
+    @property
+    def warnings(self) -> int:
+        return self._counts.warnings
+
+    def _format(self, msg: str, additional_info: str | None) -> str:
+        if additional_info is None or self._max_additional_info <= 0:
+            return msg
+        additional_info = additional_info.rstrip()
+        if not additional_info:
+            return msg
+        lines = additional_info.split('\n')
+        if len(lines) == 1:
+            return f'{msg} ({lines[0]})'
+        if len(lines) > self._max_additional_info:
+            lines = lines[: self._max_additional_info] + [f'[.....truncated to {self._max_additional_info} lines.....]']
+        return f'{msg}:\n' + '\n'.join(' ' * 8 + line for line in lines)
+
+    def error(self, msg: str, additional_info: str | None = None) -> None:
+        self._counts.errors += 1
+        self._log.error(self._format(msg, additional_info))
+        if self._bail_on_error:
+            raise VerifyError(msg)
+
+    def warning(self, msg: str, additional_info: str | None = None) -> None:
+        if self._warnings_as_errors:
+            self.error(msg, additional_info)
+            return
+        self._counts.warnings += 1
+        self._log.warning(self._format(msg, additional_info))
+
+    def info(self, msg: str) -> None:
+        self._log.info(msg)
+
+    def debug(self, msg: str) -> None:
+        self._log.debug(msg)
diff --git a/problemtools/verifyproblem.py b/problemtools/verifyproblem.py
index 08164df9..7e6db1d0 100644
--- a/problemtools/verifyproblem.py
+++ b/problemtools/verifyproblem.py
@@ -26,7 +26,6 @@
 import difflib
 from pathlib import Path
 
-import colorlog
 import yaml
 
 from . import config
@@ -36,6 +35,7 @@
 from . import problem2pdf
 from . import run
 from . import statement_util
+from .diagnostics import Diagnostics, LoggingDiagnostics, VerifyError
 from .formatversion import FormatVersion, get_format_version
 from .version import add_version_arg
 
@@ -45,8 +45,6 @@
 
 random.seed(42)
 
-log = logging.getLogger(__name__)
-
 Verdict = Literal['AC', 'TLE', 'OLE', 'MLE', 'RTE', 'WA', 'PAC', 'JE']
 
 
@@ -98,10 +96,6 @@ def __str__(self) -> str:
         return f'{verdict} [{", ".join(details)}]'
 
 
-class VerifyError(Exception):
-    pass
-
-
 _T = TypeVar('_T')
 _P = ParamSpec('_P')
 
@@ -123,61 +117,45 @@ def wait_for_background_work(self) -> None:
 
 
 class ProblemAspect(ABC):
-    errors: int = 0
-    warnings: int = 0
     _check_res: bool | None = None
     problem: Problem
-
-    def __append_additional_info(self, msg: str, additional_info: str | None) -> str:
-        max_additional_info = self.problem.max_additional_info()
-        if additional_info is None or max_additional_info <= 0:
-            return msg
-        additional_info = additional_info.rstrip()
-        if not additional_info:
-            return msg
-        lines = additional_info.split('\n')
-        if len(lines) == 1:
-            return f'{msg} ({lines[0]})'
-        if len(lines) > max_additional_info:
-            lines = lines[:max_additional_info] + [f'[.....truncated to {max_additional_info} lines.....]']
-
-        return f'{msg}:\n' + '\n'.join(' ' * 8 + line for line in lines)
+    _diag: Diagnostics
 
     def __init__(self, name: str, problem: Problem) -> None:
-        self.log = log.getChild(name)
+        if self is not problem:
+            self._diag = problem._diag.child(name)
         self.problem = problem
 
-    def fatal(self, msg: str, additional_info: str | None = None, *args) -> None:
+    @property
+    def errors(self) -> int:
+        return self._diag.errors
+
+    @property
+    def warnings(self) -> int:
+        return self._diag.warnings
+
+    def fatal(self, msg: str, additional_info: str | None = None) -> None:
         self._check_res = False
-        self._add_error()
-        self.log.critical(self.__append_additional_info(msg, additional_info), *args)
-        raise VerifyError(msg)
+        self._diag.fatal(msg, additional_info)
 
-    def error(self, msg: str, additional_info: str | None = None, *args) -> None:
+    def error(self, msg: str, additional_info: str | None = None) -> None:
         self._check_res = False
-        self._add_error()
-        self.log.error(self.__append_additional_info(msg, additional_info), *args)
-        if self.problem.bail_on_error():
-            raise VerifyError(msg)
-
-    def warning(self, msg: str, additional_info: str | None = None, *args) -> None:
-        if self.problem.consider_warnings_errors():
-            self.error(msg, additional_info, *args)
-            return
-        self._add_warning()
-        self.log.warning(self.__append_additional_info(msg, additional_info), *args)
+        self._diag.error(msg, additional_info)
 
-    def error_in_2023_07(self, msg: str, additional_info: str | None = None, *args) -> None:
+    def warning(self, msg: str, additional_info: str | None = None) -> None:
+        self._diag.warning(msg, additional_info)
+
+    def error_in_2023_07(self, msg: str, additional_info: str | None = None) -> None:
         if self.problem.format is FormatVersion.LEGACY:
-            self.warning(msg, additional_info, *args)
+            self.warning(msg, additional_info)
         else:
-            self.error(msg, additional_info, *args)
+            self.error(msg, additional_info)
 
-    def info(self, msg: str, *args) -> None:
-        self.log.info(msg, *args)
+    def info(self, msg: str) -> None:
+        self._diag.info(msg)
 
-    def debug(self, msg: str, *args) -> None:
-        self.log.debug(msg, *args)
+    def debug(self, msg: str) -> None:
+        self._diag.debug(msg)
 
     def msg(self, msg):
         print(msg)
@@ -191,16 +169,6 @@ def warn_directory(self, name: str, prop: str) -> None:
             if (problem_root / directory).exists():
                 self.warning(f'Found directory "{directory}". Version {self.problem.format} looks for {name} in "{good_dir}"')
 
-    def _add_error(self) -> None:
-        self.errors += 1
-        if self.problem is not self:
-            self.problem._add_error()
-
-    def _add_warning(self) -> None:
-        self.warnings += 1
-        if self.problem is not self:
-            self.problem._add_warning()
-
 
 class ProblemPart(ProblemAspect):
     """Baseclass for all parts that can be included in a problem-format."""
@@ -213,7 +181,7 @@ class ProblemPart(ProblemAspect):
     def __init__(self, problem: Problem) -> None:
         if self.PART_NAME is None:
             raise NotImplementedError('Every problem-part must override PART_NAME')
-        super().__init__(f'{problem.shortname}.{self.PART_NAME}', problem)
+        super().__init__(self.PART_NAME, problem)
         self.setup()
 
     def setup(self) -> None:
@@ -230,7 +198,7 @@ class TestCase(ProblemAspect):
     Result = tuple[SubmissionResult, SubmissionResult, SubmissionResult]
 
     def __init__(self, problem: Problem, base: str, testcasegroup: TestCaseGroup) -> None:
-        super().__init__(f'{problem.shortname}.test.{testcasegroup.name}.{os.path.basename(base)}', problem)
+        super().__init__(f'test.{testcasegroup.name}.{os.path.basename(base)}', problem)
         self._base = base
         self.infile = f'{base}.in'
         self.ansfile = f'{base}.ans'
@@ -367,7 +335,7 @@ def run_normal(self, sub, infile: Path, time_limit: float, feedback_dir: Path) -
                 with open(errfile, mode='rt') as f:
                     info = f.read()
             except IOError:
-                self.info('Failed to read error file %s', errfile)
+                self.info(f'Failed to read error file {errfile}')
                 info = None
             res_high = SubmissionResult('RTE', additional_info=info)
         else:
@@ -484,10 +452,10 @@ def __init__(self, problem: Problem, datadir: str | None = None, parent: TestCas
         self._datadir = datadir
         self.name = os.path.relpath(os.path.abspath(self._datadir), os.path.abspath(self._problem.probdir)).replace('/', '.')
 
-        super().__init__(f'{problem.shortname}.test.{self.name}', problem)
+        super().__init__(f'test.{self.name}', problem)
 
         self._seen_oob_scores = False
-        self.debug('Loading test data group %s', datadir)
+        self.debug(f'Loading test data group {datadir}')
         configfile = os.path.join(self._datadir, 'testdata.yaml')
         self.config: dict[str, Any] = {}
         if os.path.isfile(configfile):
@@ -1637,18 +1605,17 @@ def validate(
                     outfile=outfile,
                     errfile=errfile,
                 )
-                if self.log.isEnabledFor(logging.DEBUG):
-                    try:
-                        with open(outfile, mode='rt') as f:
-                            output = f.read()
-                        if output:
-                            self.log.debug('Validator output:\n%s', output)
-                        with open(errfile, mode='rt') as f:
-                            error = f.read()
-                        if error:
-                            self.log.debug('Validator stderr:\n%s', error)
-                    except IOError as e:
-                        self.info('Failed to read validator output: %s', e)
+                try:
+                    with open(outfile, mode='rt') as f:
+                        output = f.read()
+                    if output:
+                        self.debug(f'Validator output:\n{output}')
+                    with open(errfile, mode='rt') as f:
+                        error = f.read()
+                    if error:
+                        self.debug(f'Validator stderr:\n{error}')
+                except IOError as e:
+                    self.info(f'Failed to read validator output: {e}')
                 res = self._parse_validator_results(val, status, feedbackdir, testcase)
                 shutil.rmtree(validator_output)
                 if feedback_dir_path is None:
@@ -1956,9 +1923,10 @@ def _f_n(number: float | None) -> str:
 class Problem(ProblemAspect):
     """Represents a checkable problem"""
 
-    def __init__(self, probdir: str, args: argparse.Namespace):
+    def __init__(self, probdir: str, args: argparse.Namespace, diagnostics: Diagnostics):
         self.probdir = os.path.realpath(probdir)
         self.shortname: str = os.path.basename(self.probdir)
+        self._diag = diagnostics
         super().__init__(self.shortname, self)
         self.language_config = languages.load_language_config(Path(self.probdir).parent)
         self.testcase_by_infile: dict[str, TestCase] = {}
@@ -2178,15 +2146,6 @@ def _special_case_allowed_dirs(directory: str, reldir: str) -> bool:
                 if not regex.match(directory) and not _special_case_allowed_dirs(directory, reldir):
                     self.error(f"Invalid directory name '{directory}' in {reldir}, should match {regex.pattern}")
 
-    def bail_on_error(self) -> bool:
-        return self._args.bail_on_error
-
-    def consider_warnings_errors(self) -> bool:
-        return self._args.werror
-
-    def max_additional_info(self) -> int:
-        return self._args.max_additional_info
-
 
 def re_argument(s: str) -> Pattern[str]:
     try:
@@ -2262,21 +2221,22 @@ def argparser() -> argparse.ArgumentParser:
     return parser
 
 
-def initialize_logging(args: argparse.Namespace) -> None:
-    fmt = '%(log_color)s%(levelname)s %(message)s'
-    colorlog.basicConfig(stream=sys.stdout, format=fmt, level=getattr(logging, args.log_level.upper()))
-
-
 def main() -> None:
     args = argparser().parse_args()
 
-    initialize_logging(args)
-
     total_errors = 0
     try:
         for problemdir in args.problemdir:
-            print(f'Loading problem {os.path.basename(os.path.realpath(problemdir))}')
-            with Problem(problemdir, args) as prob:
+            shortname = os.path.basename(os.path.realpath(problemdir))
+            print(f'Loading problem {shortname}')
+            diag = LoggingDiagnostics.create(
+                shortname,
+                log_level=getattr(logging, args.log_level.upper()),
+                bail_on_error=args.bail_on_error,
+                warnings_as_errors=args.werror,
+                max_additional_info=args.max_additional_info,
+            )
+            with Problem(problemdir, args, diag) as prob:
                 errors, warnings = prob.check()
 
                 def p(x: int) -> str:
diff --git a/tests/test_verify_hello.py b/tests/test_verify_hello.py
index 6f5ac3c4..659dd850 100644
--- a/tests/test_verify_hello.py
+++ b/tests/test_verify_hello.py
@@ -1,5 +1,11 @@
+import logging
 import pathlib
 import problemtools.verifyproblem as verify
+from problemtools.diagnostics import LoggingDiagnostics
+
+
+def _make_diag(shortname: str) -> LoggingDiagnostics:
+    return LoggingDiagnostics.create(shortname, log_level=logging.WARNING)
 
 
 def test_load_hello():
@@ -7,10 +13,9 @@ def test_load_hello():
     string = str(directory.resolve())
 
     args = verify.argparser().parse_args([string])
-    verify.initialize_logging(args)
     context = verify.Context(args, None)
 
-    with verify.Problem(string, args) as p:
+    with verify.Problem(string, args, _make_diag('hello')) as p:
         p.load()
         assert p.shortname == 'hello'
         # pytest and fork don't go along very well, so just run aspects that work without run
@@ -28,6 +33,6 @@ def test_load_twice():
     string = str(directory.resolve())
 
     args = verify.argparser().parse_args([string])
-    with verify.Problem(string, args) as p:
+    with verify.Problem(string, args, _make_diag('hello')) as p:
         p.load()
         p.load()

From d6d07f6b4ee7f7fefb7d4bd491a12f04d050b394 Mon Sep 17 00:00:00 2001
From: Gunnar Kreitz <gkreitz@kattis.com>
Date: Wed, 22 Apr 2026 14:31:17 +0200
Subject: [PATCH 2/2] Remove Problem constructor taking an ArgumentParser
 instance

---
 problemtools/verifyproblem.py | 45 ++++++++++++++++++++++-------------
 tests/test_verify_hello.py    |  8 +++----
 2 files changed, 32 insertions(+), 21 deletions(-)

diff --git a/problemtools/verifyproblem.py b/problemtools/verifyproblem.py
index 7e6db1d0..4e842bf9 100644
--- a/problemtools/verifyproblem.py
+++ b/problemtools/verifyproblem.py
@@ -101,11 +101,20 @@ def __str__(self) -> str:
 
 
 class Context:
-    def __init__(self, args: argparse.Namespace, executor: ThreadPoolExecutor | None) -> None:
-        self.data_filter: Pattern[str] = args.data_filter
-        self.submission_filter: Pattern[str] = args.submission_filter
-        self.fixed_timelim: float | None = args.fixed_timelim
-        self.executor = executor
+    # Default values here must be kept in sync with the defaults in argparser().
+    def __init__(
+        self,
+        data_filter: Pattern[str] = re.compile('.*'),
+        submission_filter: Pattern[str] = re.compile('.*'),
+        fixed_timelim: float | None = None,
+        parts: list[str] | None = None,
+        threads: int = 1,
+    ) -> None:
+        self.data_filter = data_filter
+        self.submission_filter = submission_filter
+        self.fixed_timelim = fixed_timelim
+        self.parts: list[str] = parts if parts is not None else list(PROBLEM_PARTS)
+        self.executor: ThreadPoolExecutor | None = ThreadPoolExecutor(threads) if threads > 1 else None
         self._background_work: list[concurrent.futures.Future[object]] = []
 
     def submit_background_work(self, job: Callable[_P, _T], *args: _P.args, **kwargs: _P.kwargs) -> None:
@@ -1923,7 +1932,7 @@ def _f_n(number: float | None) -> str:
 class Problem(ProblemAspect):
     """Represents a checkable problem"""
 
-    def __init__(self, probdir: str, args: argparse.Namespace, diagnostics: Diagnostics):
+    def __init__(self, probdir: str, diagnostics: Diagnostics):
         self.probdir = os.path.realpath(probdir)
         self.shortname: str = os.path.basename(self.probdir)
         self._diag = diagnostics
@@ -1932,7 +1941,6 @@ def __init__(self, probdir: str, args: argparse.Namespace, diagnostics: Diagnost
         self.testcase_by_infile: dict[str, TestCase] = {}
         self.loaded = False
         self._metadata: metadata.Metadata | None = None
-        self._args = args
         self._timelim: float | None = None
 
     # Unfortunately must be before metadata, otherwise mypy gets confused about the type metadata.Metadata (feels like a bug)
@@ -2012,7 +2020,7 @@ def __exit__(self, exc_type, exc_value, exc_traceback) -> None:
     def __str__(self) -> str:
         return str(self.shortname)
 
-    def check(self) -> tuple[int, int]:
+    def check(self, context: Context) -> tuple[int, int]:
         """Loads and checks the problem package
 
         Loads the problem package and runs checks. After this has completed,
@@ -2030,9 +2038,6 @@ def check(self) -> tuple[int, int]:
         except VerifyError:
             return self.errors, self.warnings
 
-        executor = ThreadPoolExecutor(self._args.threads) if self._args.threads > 1 else None
-        context = Context(self._args, executor)
-
         try:
             part_mapping: dict[str, list] = {
                 'config': [self.config],
@@ -2056,9 +2061,9 @@ def check(self) -> tuple[int, int]:
             run.limit.check_limit_capabilities(self)
 
             parts = [
-                part for part in part_mapping if part in self._args.parts
-            ]  # Parts from _args in the order they appear in part_mapping
-            if executor:
+                part for part in part_mapping if part in context.parts
+            ]  # Parts from context in the order they appear in part_mapping
+            if context.executor:
                 for part in parts:
                     for item in part_mapping[part]:
                         item.start_background_work(context)
@@ -2174,6 +2179,7 @@ def argparser_basic_arguments(parser: argparse.ArgumentParser) -> None:
 
 
 def argparser() -> argparse.ArgumentParser:
+    # Default values here must be kept in sync with the defaults in Context.__init__().
     parser = argparse.ArgumentParser(description='Validate a problem package in the Kattis problem format.')
     parser.add_argument(
         '-s',
@@ -2226,6 +2232,13 @@ def main() -> None:
 
     total_errors = 0
     try:
+        context = Context(
+            data_filter=args.data_filter,
+            submission_filter=args.submission_filter,
+            fixed_timelim=args.fixed_timelim,
+            parts=args.parts,
+            threads=args.threads,
+        )
         for problemdir in args.problemdir:
             shortname = os.path.basename(os.path.realpath(problemdir))
             print(f'Loading problem {shortname}')
@@ -2236,8 +2249,8 @@ def main() -> None:
                 warnings_as_errors=args.werror,
                 max_additional_info=args.max_additional_info,
             )
-            with Problem(problemdir, args, diag) as prob:
-                errors, warnings = prob.check()
+            with Problem(problemdir, diag) as prob:
+                errors, warnings = prob.check(context)
 
                 def p(x: int) -> str:
                     return '' if x == 1 else 's'
diff --git a/tests/test_verify_hello.py b/tests/test_verify_hello.py
index 659dd850..180a7f7e 100644
--- a/tests/test_verify_hello.py
+++ b/tests/test_verify_hello.py
@@ -12,10 +12,9 @@ def test_load_hello():
     directory = pathlib.Path(__file__).parent / 'hello'
     string = str(directory.resolve())
 
-    args = verify.argparser().parse_args([string])
-    context = verify.Context(args, None)
+    context = verify.Context()
 
-    with verify.Problem(string, args, _make_diag('hello')) as p:
+    with verify.Problem(string, _make_diag('hello')) as p:
         p.load()
         assert p.shortname == 'hello'
         # pytest and fork don't go along very well, so just run aspects that work without run
@@ -32,7 +31,6 @@ def test_load_twice():
     directory = pathlib.Path(__file__).parent / 'hello'
     string = str(directory.resolve())
 
-    args = verify.argparser().parse_args([string])
-    with verify.Problem(string, args, _make_diag('hello')) as p:
+    with verify.Problem(string, _make_diag('hello')) as p:
         p.load()
         p.load()