"""
Validation checks, profiles, and the profile registry.
This module is the extensibility surface of the validation layer. It models
validation as two composable pieces:
* a :class:`Check` wraps a single ``ctx -> list[ValidationIssue]`` function with
a stable name and description;
* a :class:`ValidationProfile` is an ordered list of checks that applies to one
category of module.
Profiles are looked up by name in a module-level registry. A module declares
which profile it uses (via ``[validation].profile`` in ``module_settings.toml``,
or by inference from its parser class); the orchestrator then runs that
profile's checks. Adding a new module of an existing category requires no code.
Adding a genuinely new category requires only registering a new profile here (or
from third-party code via :func:`register_profile`), without touching the
orchestrator.
Checks are reusable across profiles: for example ``run_consistency`` is shared
by both the quant and de novo profiles.
"""
from __future__ import annotations
from dataclasses import dataclass, field
from typing import Callable, Dict, List, Optional
from proteobench.validation.checks import (
check_charge_range,
check_enzyme,
check_fdr_psm,
check_mass_tolerances,
check_max_modifications,
check_modifications,
check_peptide_length,
check_protein_ids,
check_run_consistency,
)
from proteobench.validation.context import ValidationContext
from proteobench.validation.report import ValidationIssue, ValidationReport
#: Type alias for a check function: takes a context, returns a list of issues.
CheckFunc = Callable[[ValidationContext], List[ValidationIssue]]
[docs]
@dataclass
class Check:
"""
A single, named validation check.
Attributes
----------
name : str
Stable identifier used in fallback error messages and progress display.
func : callable
A function ``ctx -> list[ValidationIssue]``.
description : str, optional
Human-readable description of what the check verifies.
"""
name: str
func: CheckFunc
description: str = ""
[docs]
def run(self, ctx: ValidationContext) -> List[ValidationIssue]:
"""
Execute the check against a validation context.
Parameters
----------
ctx : ValidationContext
The inputs available to the check.
Returns
-------
list of ValidationIssue
Issues produced by the check (possibly empty).
"""
return self.func(ctx)
[docs]
@dataclass
class ValidationProfile:
"""
An ordered set of checks that applies to one category of module.
Attributes
----------
name : str
Unique profile name (the routing key declared by modules).
checks : list of Check
Checks to run, in order.
description : str, optional
Human-readable description of the profile.
"""
name: str
checks: List[Check] = field(default_factory=list)
description: str = ""
@property
def check_names(self) -> List[str]:
"""
Return the names of the checks in this profile.
Returns
-------
list of str
The ordered check names.
"""
return [check.name for check in self.checks]
# --------------------------------------------------------------------------- #
# Registry
# --------------------------------------------------------------------------- #
_PROFILES: Dict[str, ValidationProfile] = {}
[docs]
def register_profile(profile: ValidationProfile, overwrite: bool = False) -> None:
"""
Register a validation profile under its name.
Parameters
----------
profile : ValidationProfile
The profile to register.
overwrite : bool, optional
If ``False`` (default), registering a name that already exists raises.
Set ``True`` to replace an existing profile.
Raises
------
ValueError
If a profile with the same name is already registered and
``overwrite`` is ``False``.
"""
if profile.name in _PROFILES and not overwrite:
raise ValueError(
f"A validation profile named '{profile.name}' is already registered. " "Pass overwrite=True to replace it."
)
_PROFILES[profile.name] = profile
[docs]
def unregister_profile(name: str) -> None:
"""
Remove a profile from the registry if present.
Parameters
----------
name : str
Name of the profile to remove.
"""
_PROFILES.pop(name, None)
[docs]
def get_profile(name: str) -> Optional[ValidationProfile]:
"""
Look up a registered profile by name.
Parameters
----------
name : str
Profile name.
Returns
-------
ValidationProfile or None
The registered profile, or ``None`` if no profile has that name (or if
``name`` is not a string).
"""
if not isinstance(name, str):
return None
return _PROFILES.get(name)
[docs]
def available_profiles() -> List[str]:
"""
List the names of all registered profiles.
Returns
-------
list of str
Sorted profile names.
"""
return sorted(_PROFILES)
# --------------------------------------------------------------------------- #
# Check adapters
#
# Trivial pass-throughs that simply forward context fields to the underlying
# pure check functions in ``checks.py`` are defined inline as lambdas in the
# profile definitions below. Checks that need extra orchestration logic (such
# as deciding whether a reference is available) are defined as named functions
# here.
# --------------------------------------------------------------------------- #
def _protein_ids_check(ctx: ValidationContext) -> List[ValidationIssue]:
"""
Validate protein identifiers against the reference FASTA, if available.
Parameters
----------
ctx : ValidationContext
The validation context (uses ``fasta``, ``standard_df``, ``config``).
Returns
-------
list of ValidationIssue
Protein-ID issues, or an info message if no FASTA reference is available.
"""
if ctx.fasta is not None and len(ctx.fasta) > 0:
return check_protein_ids(ctx.standard_df, ctx.fasta, ctx.config)
report = ValidationReport()
report.add_info(
"no_fasta_reference",
"No reference FASTA was available; protein-identifier validation was skipped.",
"protein_ids",
)
return report.issues
def _denovo_pending_check(ctx: ValidationContext) -> List[ValidationIssue]:
"""
Emit an informational notice that de novo checks are not yet implemented.
Parameters
----------
ctx : ValidationContext
The validation context (unused; present for the uniform signature).
Returns
-------
list of ValidationIssue
A single info issue.
"""
report = ValidationReport()
report.add_info(
"denovo_validation_pending",
"De novo content checks are not yet implemented. Quant checks (protein IDs, charge, "
"peptide length) do not apply to this module type. Implement de novo checks and add "
"them to the 'denovo' profile in proteobench/validation/profiles.py.",
"input",
)
return report.issues
# --------------------------------------------------------------------------- #
# Built-in profiles
# --------------------------------------------------------------------------- #
QUANT_LFQ_PROFILE = ValidationProfile(
name="quant_lfq",
description="LFQ quantification modules (HYE/PYE): protein IDs, charge, peptide length, enzyme, mods.",
checks=[
Check("protein_ids", _protein_ids_check, "Protein identifiers present in the reference FASTA."),
Check(
"charge_range",
lambda ctx: check_charge_range(ctx.standard_df, ctx.parameters, ctx.config),
"Precursor charges within the searched charge range.",
),
Check(
"peptide_length",
lambda ctx: check_peptide_length(ctx.standard_df, ctx.parameters, ctx.config),
"Peptide lengths within the searched length range.",
),
Check(
"enzyme",
lambda ctx: check_enzyme(ctx.standard_df, ctx.parameters, ctx.config),
"Trypsin-family missed-cleavage heuristic (warning only).",
),
Check(
"modifications",
lambda ctx: check_modifications(ctx.standard_df, ctx.parameters, ctx.config),
"Observed modifications declared in the parameter file (warning only).",
),
Check(
"max_modifications",
lambda ctx: check_max_modifications(ctx.standard_df, ctx.parameters, ctx.config),
"Number of modifications per peptidoform within max_mods (warning only).",
),
Check(
"mass_tolerances",
lambda ctx: check_mass_tolerances(ctx.standard_df, ctx.parameters, ctx.config),
"Precursor/fragment mass tolerances are present and positive; the plausibility "
"ceiling is checked only when configured (warning only).",
),
Check(
"fdr_psm",
lambda ctx: check_fdr_psm(ctx.standard_df, ctx.parameters, ctx.config),
"PSM-level FDR within the valid range and recommended maximum (warning only).",
),
Check(
"run_consistency",
lambda ctx: check_run_consistency(ctx.standard_df, ctx.parameters, ctx.input_format, ctx.config),
"Parameter file matches the submitted run (software identity).",
),
],
)
DENOVO_PROFILE = ValidationProfile(
name="denovo",
description="De novo sequencing modules. Reuses run-consistency; content checks are pending.",
checks=[
Check(
"run_consistency",
lambda ctx: check_run_consistency(ctx.standard_df, ctx.parameters, ctx.input_format, ctx.config),
"Parameter file matches the submitted run (software identity).",
),
Check("denovo_pending", _denovo_pending_check, "Placeholder for future de novo content checks."),
],
)
register_profile(QUANT_LFQ_PROFILE, overwrite=True)
register_profile(DENOVO_PROFILE, overwrite=True)