"""
Streamlit glue for the submission-validation layer.
This module bridges the framework-agnostic core validator
(:mod:`proteobench.validation`) and the Streamlit submission flow. It:
* re-derives the standardized result DataFrame from the already-parsed input
DataFrame by reusing the existing parser (no duplicated tool logic);
* downloads and caches the module reference FASTA;
* runs :func:`proteobench.validation.validate_submission`;
* renders the resulting report in a curator- and user-friendly way.
All network access and Streamlit calls live here, keeping the core validation
library free of UI and I/O dependencies.
"""
from __future__ import annotations
from typing import Any, Optional
import pandas as pd
import streamlit as st
from proteobench.io.parsing.parse_settings import ParseSettingsBuilder
from proteobench.validation import (
FastaReference,
ModuleValidationConfig,
Severity,
ValidationReport,
validate_submission,
)
@st.cache_data(show_spinner="Downloading reference FASTA for validation ...")
def _load_fasta_reference(fasta_url: str, fasta_filename: Optional[str]) -> FastaReference:
"""
Download and parse the module reference FASTA (cached).
Parameters
----------
fasta_url : str
URL of the reference FASTA / zip / gzip resource.
fasta_filename : str, optional
Preferred FASTA member name when the resource is an archive.
Returns
-------
FastaReference
Reference protein identifiers.
"""
return FastaReference.from_url(fasta_url, member_filename=fasta_filename)
def _build_standard_dataframe(ionmodule: Any, input_format: str, input_df: pd.DataFrame) -> pd.DataFrame:
"""
Re-derive the standardized result DataFrame by reusing the module parser.
Parameters
----------
ionmodule : Any
The benchmarking module instance (provides ``parse_settings_dir`` and ``module_id``).
input_format : str
The selected software tool.
input_df : pandas.DataFrame
The raw parsed tool output (as stored in session state).
Returns
-------
pandas.DataFrame
The standardized result DataFrame.
"""
parser = ParseSettingsBuilder(
parse_settings_dir=ionmodule.parse_settings_dir,
module_id=ionmodule.module_id,
).build_parser(input_format)
standard_df, _ = parser.convert_to_standard_format(input_df)
return standard_df
def _resolve_input_df(variables):
"""
Fetch the parsed input DataFrame from session state for validation.
Parameters
----------
variables : Any
The module's ``Variables`` dataclass instance (session-state keys).
Returns
-------
pandas.DataFrame or None
The submission input DataFrame, or ``None`` if unavailable.
"""
input_df = st.session_state.get(variables.input_df_submission)
if input_df is None:
input_df = st.session_state.get(variables.input_df)
return input_df
def _build_config(ionmodule, input_format: str) -> ModuleValidationConfig:
"""
Build the module validation config, falling back to defaults on failure.
Parameters
----------
ionmodule : Any
The benchmarking module instance.
input_format : str
The selected software tool.
Returns
-------
ModuleValidationConfig
The resolved configuration (never raises).
"""
try:
return ModuleValidationConfig.from_parse_settings(
parse_settings_dir=ionmodule.parse_settings_dir,
module_id=ionmodule.module_id,
input_format=input_format,
)
except Exception: # noqa: BLE001
return ModuleValidationConfig()
def _acquire_fasta(config: ModuleValidationConfig, report: ValidationReport):
"""
Obtain the reference FASTA, degrading to a report message on any problem.
Parameters
----------
config : ModuleValidationConfig
The module configuration (provides ``fasta_url`` / ``fasta_filename``).
report : ValidationReport
Report to which a warning/info is added when no FASTA is available.
Returns
-------
FastaReference or None
The reference, or ``None`` when not configured or not downloadable.
"""
if not config.fasta_url:
report.add_info(
"no_fasta_configured",
"No reference FASTA is configured for this module ([reference_database] in "
"module_settings.toml); protein-identifier validation was skipped.",
"protein_ids",
)
return None
try:
return _load_fasta_reference(config.fasta_url, config.fasta_filename)
except Exception as exc: # noqa: BLE001
report.add_warning(
"fasta_unavailable",
f"Could not download or parse the reference FASTA ({type(exc).__name__}: {exc}); "
"protein-identifier validation was skipped.",
"protein_ids",
field=config.fasta_url,
)
return None
[docs]
def run_submission_validation(variables, ionmodule, user_input, params) -> ValidationReport:
"""
Validate a submission and return the structured report.
Designed to be fault-tolerant: any infrastructure problem (missing input,
parser failure, FASTA download failure) is converted into a warning so that
validation never crashes the submission flow. Only genuine consistency
problems produce errors.
Parameters
----------
variables : Any
The module's ``Variables`` dataclass instance (session-state keys).
ionmodule : Any
The benchmarking module instance.
user_input : dict
The submission's user input (provides ``"input_format"``).
params : Any
The parsed/edited :class:`ProteoBenchParameters` to be submitted.
Returns
-------
ValidationReport
The aggregated validation report.
"""
report = ValidationReport()
input_format = user_input.get("input_format")
input_df = _resolve_input_df(variables)
if input_df is None:
report.add_warning(
"no_input_dataframe",
"Could not run submission validation because the parsed result data was not available in the session.",
"input",
)
return report
# Re-derive the standardized DataFrame (reuses existing parsing; no duplication).
try:
standard_df = _build_standard_dataframe(ionmodule, input_format, input_df)
except Exception as exc: # noqa: BLE001 - never block submission on a validation infra error
report.add_warning(
"standardization_failed",
f"Could not re-standardize the results for validation ({type(exc).__name__}: {exc}); "
"protein/charge/length checks were skipped.",
"input",
)
return report
config = _build_config(ionmodule, input_format)
fasta = _acquire_fasta(config, report)
try:
core_report = validate_submission(
standard_df,
parameters=params,
fasta=fasta,
config=config,
input_format=input_format,
)
report.extend(core_report.issues)
except Exception as exc: # noqa: BLE001 - never block submission on a validation infra error
report.add_warning(
"validation_failed",
f"Submission validation could not be completed ({type(exc).__name__}: {exc}); "
"no automated consistency checks were applied.",
"input",
)
return report
def _render_issue(issue) -> None:
"""
Render a single validation issue with its details.
Parameters
----------
issue : ValidationIssue
The issue to render.
"""
header = f"**{issue.message}**"
if issue.severity == Severity.ERROR:
st.error(header, icon="đĢ")
elif issue.severity == Severity.WARNING:
st.warning(header, icon="â ī¸")
else:
st.info(header, icon="âšī¸")
details = []
if issue.expected is not None:
details.append(f"- Expected: `{issue.expected}`")
if issue.observed is not None:
details.append(f"- Observed: `{issue.observed}`")
if issue.examples:
shown = ", ".join(f"`{e}`" for e in issue.examples)
details.append(f"- Examples: {shown}")
if details:
st.markdown("\n".join(details))
[docs]
def render_validation_report(report: ValidationReport) -> None:
"""
Render a full validation report in the Streamlit UI.
The checks never block submission; the report is shown so the submitter can
review the findings, which are also included in the pull-request description.
Parameters
----------
report : ValidationReport
The report to display.
"""
n_flagged = len(report.errors) + len(report.warnings)
n_info = len(report.infos)
st.subheader("Submission checks")
if n_flagged == 0:
st.success("All automated submission checks passed.", icon="â
")
else:
st.info(
f"We flagged {n_flagged} point(s) to review below. You can still submit your results, and "
"these notes will be included in the pull request for the reviewers.",
icon="đ",
)
for issue in report.errors:
_render_issue(issue)
for issue in report.warnings:
_render_issue(issue)
if report.infos:
with st.expander(f"More details ({n_info})"):
for issue in report.infos:
_render_issue(issue)