Source code for proteobench.validation.validator

"""
Central submission-validation API.

:func:`validate_submission` resolves the module's validation profile, builds a
:class:`~proteobench.validation.context.ValidationContext`, and runs the
profile's checks, returning a single structured
:class:`~proteobench.validation.report.ValidationReport`. The caller decides
what to do with the report (typically: block public submission when
``report.has_errors`` is true, but allow it through with warnings).

The orchestrator is generic: it does not know about any particular module type.
Which checks run is determined entirely by the resolved profile
(:mod:`proteobench.validation.profiles`). Adding a new module of an existing
category needs no code; adding a new category needs only a new registered
profile.

The function is framework-agnostic and performs no I/O: any reference data (a
FASTA, a ground-truth table) is supplied via the arguments / context. Front ends
are responsible for obtaining the standardized DataFrame and the reference.
"""

from __future__ import annotations

from typing import Any, Optional

import pandas as pd

from proteobench.validation.config import ModuleValidationConfig
from proteobench.validation.context import ValidationContext
from proteobench.validation.fasta import FastaReference
from proteobench.validation.profiles import Check, available_profiles, get_profile
from proteobench.validation.report import ValidationReport


[docs] def validate_submission( standard_df: pd.DataFrame, parameters: Any = None, fasta: Optional[FastaReference] = None, config: Optional[ModuleValidationConfig] = None, input_format: Optional[str] = None, profile: Optional[str] = None, ) -> ValidationReport: """ Validate a benchmark submission and return a structured report. The set of checks run is determined by the validation profile, resolved from (in order): the explicit ``profile`` argument, ``config.validation_profile``, or the default. Each check is fault-tolerant: a check that raises an unexpected exception is converted to a warning so that validation itself never crashes the submission flow. Parameters ---------- standard_df : pandas.DataFrame The standardized result DataFrame produced by the module parser. parameters : Any, optional Parsed parameters (a :class:`ProteoBenchParameters` or any object with the same attributes). Parameter-dependent checks degrade to warnings when values are missing. fasta : FastaReference, optional Reference protein identifiers, for profiles that validate against a sequence database. config : ModuleValidationConfig, optional Module validation configuration. Defaults to a generic configuration (which selects the default profile). input_format : str, optional The selected software tool, used for run-consistency checks. profile : str, optional Explicit profile name, overriding ``config.validation_profile``. Mostly useful for testing. Returns ------- ValidationReport The aggregated validation report. """ config = config or ModuleValidationConfig() report = ValidationReport() if not isinstance(standard_df, pd.DataFrame) or standard_df.empty: report.add_error( "empty_results", "The standardized results are empty; nothing could be validated.", "input", ) return report profile_name = profile or config.validation_profile profile_obj = get_profile(profile_name) if profile_obj is None: report.add_warning( "unknown_validation_profile", f"No validation profile named '{profile_name}' is registered " f"(available: {available_profiles()}); no checks were run.", "input", ) return report ctx = ValidationContext( standard_df=standard_df, parameters=parameters, config=config, fasta=fasta, input_format=input_format, reference=fasta, ) for check in profile_obj.checks: _run_check(report, check, ctx) return report
def _run_check(report: ValidationReport, check: Check, ctx: ValidationContext) -> None: """ Run a single check and absorb unexpected failures as warnings. Parameters ---------- report : ValidationReport The report to extend with the check's issues. check : Check The check to run. ctx : ValidationContext The validation context passed to the check. """ try: report.extend(check.run(ctx)) except Exception as exc: # noqa: BLE001 - validation must never crash the flow report.add_warning( "check_failed", f"The '{check.name}' validation check could not be completed ({type(exc).__name__}: {exc}).", check.name, )