Source code for webinterface.pages.base_pages.utils.validation_ui

"""
Streamlit glue for the submission-validation layer.

This module bridges the framework-agnostic core validator
(:mod:`proteobench.validation`) and the Streamlit submission flow. It:

* re-derives the standardized result DataFrame from the already-parsed input
  DataFrame by reusing the existing parser (no duplicated tool logic);
* downloads and caches the module reference FASTA;
* runs :func:`proteobench.validation.validate_submission`;
* renders the resulting report in a curator- and user-friendly way.

All network access and Streamlit calls live here, keeping the core validation
library free of UI and I/O dependencies.
"""

from __future__ import annotations

from typing import Any, Optional

import pandas as pd
import streamlit as st

from proteobench.io.parsing.parse_settings import ParseSettingsBuilder
from proteobench.validation import (
    FastaReference,
    ModuleValidationConfig,
    Severity,
    ValidationReport,
    validate_submission,
)


@st.cache_data(show_spinner="Downloading reference FASTA for validation ...")
def _load_fasta_reference(fasta_url: str, fasta_filename: Optional[str]) -> FastaReference:
    """
    Download and parse the module reference FASTA (cached).

    Parameters
    ----------
    fasta_url : str
        URL of the reference FASTA / zip / gzip resource.
    fasta_filename : str, optional
        Preferred FASTA member name when the resource is an archive.

    Returns
    -------
    FastaReference
        Reference protein identifiers.
    """
    return FastaReference.from_url(fasta_url, member_filename=fasta_filename)


def _build_standard_dataframe(ionmodule: Any, input_format: str, input_df: pd.DataFrame) -> pd.DataFrame:
    """
    Re-derive the standardized result DataFrame by reusing the module parser.

    Parameters
    ----------
    ionmodule : Any
        The benchmarking module instance (provides ``parse_settings_dir`` and ``module_id``).
    input_format : str
        The selected software tool.
    input_df : pandas.DataFrame
        The raw parsed tool output (as stored in session state).

    Returns
    -------
    pandas.DataFrame
        The standardized result DataFrame.
    """
    parser = ParseSettingsBuilder(
        parse_settings_dir=ionmodule.parse_settings_dir,
        module_id=ionmodule.module_id,
    ).build_parser(input_format)
    standard_df, _ = parser.convert_to_standard_format(input_df)
    return standard_df


def _resolve_input_df(variables):
    """
    Fetch the parsed input DataFrame from session state for validation.

    Parameters
    ----------
    variables : Any
        The module's ``Variables`` dataclass instance (session-state keys).

    Returns
    -------
    pandas.DataFrame or None
        The submission input DataFrame, or ``None`` if unavailable.
    """
    input_df = st.session_state.get(variables.input_df_submission)
    if input_df is None:
        input_df = st.session_state.get(variables.input_df)
    return input_df


def _build_config(ionmodule, input_format: str) -> ModuleValidationConfig:
    """
    Build the module validation config, falling back to defaults on failure.

    Parameters
    ----------
    ionmodule : Any
        The benchmarking module instance.
    input_format : str
        The selected software tool.

    Returns
    -------
    ModuleValidationConfig
        The resolved configuration (never raises).
    """
    try:
        return ModuleValidationConfig.from_parse_settings(
            parse_settings_dir=ionmodule.parse_settings_dir,
            module_id=ionmodule.module_id,
            input_format=input_format,
        )
    except Exception:  # noqa: BLE001
        return ModuleValidationConfig()


def _acquire_fasta(config: ModuleValidationConfig, report: ValidationReport):
    """
    Obtain the reference FASTA, degrading to a report message on any problem.

    Parameters
    ----------
    config : ModuleValidationConfig
        The module configuration (provides ``fasta_url`` / ``fasta_filename``).
    report : ValidationReport
        Report to which a warning/info is added when no FASTA is available.

    Returns
    -------
    FastaReference or None
        The reference, or ``None`` when not configured or not downloadable.
    """
    if not config.fasta_url:
        report.add_info(
            "no_fasta_configured",
            "No reference FASTA is configured for this module ([reference_database] in "
            "module_settings.toml); protein-identifier validation was skipped.",
            "protein_ids",
        )
        return None
    try:
        return _load_fasta_reference(config.fasta_url, config.fasta_filename)
    except Exception as exc:  # noqa: BLE001
        report.add_warning(
            "fasta_unavailable",
            f"Could not download or parse the reference FASTA ({type(exc).__name__}: {exc}); "
            "protein-identifier validation was skipped.",
            "protein_ids",
            field=config.fasta_url,
        )
        return None


[docs] def run_submission_validation(variables, ionmodule, user_input, params) -> ValidationReport: """ Validate a submission and return the structured report. Designed to be fault-tolerant: any infrastructure problem (missing input, parser failure, FASTA download failure) is converted into a warning so that validation never crashes the submission flow. Only genuine consistency problems produce errors. Parameters ---------- variables : Any The module's ``Variables`` dataclass instance (session-state keys). ionmodule : Any The benchmarking module instance. user_input : dict The submission's user input (provides ``"input_format"``). params : Any The parsed/edited :class:`ProteoBenchParameters` to be submitted. Returns ------- ValidationReport The aggregated validation report. """ report = ValidationReport() input_format = user_input.get("input_format") input_df = _resolve_input_df(variables) if input_df is None: report.add_warning( "no_input_dataframe", "Could not run submission validation because the parsed result data was not available in the session.", "input", ) return report # Re-derive the standardized DataFrame (reuses existing parsing; no duplication). try: standard_df = _build_standard_dataframe(ionmodule, input_format, input_df) except Exception as exc: # noqa: BLE001 - never block submission on a validation infra error report.add_warning( "standardization_failed", f"Could not re-standardize the results for validation ({type(exc).__name__}: {exc}); " "protein/charge/length checks were skipped.", "input", ) return report config = _build_config(ionmodule, input_format) fasta = _acquire_fasta(config, report) try: core_report = validate_submission( standard_df, parameters=params, fasta=fasta, config=config, input_format=input_format, ) report.extend(core_report.issues) except Exception as exc: # noqa: BLE001 - never block submission on a validation infra error report.add_warning( "validation_failed", f"Submission validation could not be completed ({type(exc).__name__}: {exc}); " "no automated consistency checks were applied.", "input", ) return report
def _render_issue(issue) -> None: """ Render a single validation issue with its details. Parameters ---------- issue : ValidationIssue The issue to render. """ header = f"**{issue.message}**" if issue.severity == Severity.ERROR: st.error(header, icon="đŸšĢ") elif issue.severity == Severity.WARNING: st.warning(header, icon="âš ī¸") else: st.info(header, icon="â„šī¸") details = [] if issue.expected is not None: details.append(f"- Expected: `{issue.expected}`") if issue.observed is not None: details.append(f"- Observed: `{issue.observed}`") if issue.examples: shown = ", ".join(f"`{e}`" for e in issue.examples) details.append(f"- Examples: {shown}") if details: st.markdown("\n".join(details))
[docs] def render_validation_report(report: ValidationReport) -> None: """ Render a full validation report in the Streamlit UI. The checks never block submission; the report is shown so the submitter can review the findings, which are also included in the pull-request description. Parameters ---------- report : ValidationReport The report to display. """ n_flagged = len(report.errors) + len(report.warnings) n_info = len(report.infos) st.subheader("Submission checks") if n_flagged == 0: st.success("All automated submission checks passed.", icon="✅") else: st.info( f"We flagged {n_flagged} point(s) to review below. You can still submit your results, and " "these notes will be included in the pull request for the reviewers.", icon="📝", ) for issue in report.errors: _render_issue(issue) for issue in report.warnings: _render_issue(issue) if report.infos: with st.expander(f"More details ({n_info})"): for issue in report.infos: _render_issue(issue)