"""Streamlit-based web interface for ProteoBench."""
import copy
import logging
import uuid
from typing import Any, Dict
import pages.texts.proteobench_builder as pbb
import pandas as pd
import streamlit as st
from pages.pages_variables.Entrapment.Entrapment_DIA_ion_Astral_variables import (
VariablesDIAEntrapmentAstral,
)
from proteobench.modules.entrapment.entrapment_ion_DIA_Astral import (
DIAEntrapmentIonModuleAstral as IonModule,
)
from proteobench.io.parsing.parse_settings import ParseSettingsBuilder
from proteobench.exceptions import DatasetAlreadyExistsOnServerError
from proteobench.github.gh import get_submission_source, is_official_server
from proteobench.io.params import ProteoBenchParameters
from proteobench.io.parsing.parse_settings import ParseSettingsBuilder
from proteobench.modules.entrapment.entrapment_ion_DIA_Astral import (
DIAEntrapmentIonModuleAstral as IonModule,
)
from proteobench.utils.server_io import dataset_folder_exists
from .base import BaseUIModule
from .tabs_entrapment import (
tab1_view_public_results,
tab2_upload_results,
tab3_view_single_result,
tab4_view_public_and_new_results,
tab6_submit_results,
)
[docs]
class EntrapmentUIObjects(BaseUIModule):
"""Main class for the Streamlit interface of ProteoBench entrapment.
This class handles the creation of the Streamlit UI elements, including the main page layout,
input forms, results display, and data submission elements.
Parameters
----------
variables : VariablesDIAEntrapmentAstral
The variables for the entrapment module.
ionmodule : IonModule
The entrapment module.
parsesettingsbuilder : ParseSettingsBuilder
The parse settings builder.
"""
def __init__(
self,
variables: VariablesDIAEntrapmentAstral,
ionmodule: IonModule,
parsesettingsbuilder: ParseSettingsBuilder,
page_name: str = "/",
) -> None:
"""
Initialize the Streamlit UI objects for the entrapment modules.
Parameters
----------
variables : VariablesDIAEntrapment
The variables for the entrapment module.
ionmodule : IonModule
The entrapment module.
parsesettingsbuilder : ParseSettingsBuilder
The parse settings builder.
"""
super().__init__(
variables=variables, ionmodule=ionmodule, parsesettingsbuilder=parsesettingsbuilder, page_name=page_name
)
# Quant-specific attributes
self.first_point_plotted = False
st.session_state[self.variables.submit] = False
self.stop_duplicating = False
[docs]
def display_indepth_plots(self) -> None:
"""
Display the dataset selection dropdown and plot the selected dataset (Tab 3).
"""
# the key is a string and links to a pandas.DataFrame
key_in_state = self.variables.all_datapoints
if key_in_state not in st.session_state.keys():
st.error("No data available for plotting.", icon="đ¨")
return
df = st.session_state[key_in_state]
downloads_df = pd.DataFrame()
if not df.empty:
# st.error("No data available for plotting.", icon="đ¨")
# return
downloads_df = df[["id", "intermediate_hash"]]
downloads_df.set_index("intermediate_hash", drop=False, inplace=True)
key_in_state = self.variables.placeholder_dataset_selection_container
if key_in_state not in st.session_state.keys():
st.session_state[key_in_state] = st.empty()
key_in_state = self.variables.dataset_selector_id_uuid
st.session_state[key_in_state] = uuid.uuid4()
st.subheader("Select dataset to plot")
if not downloads_df.empty:
dataset_options = [("Uploaded dataset", None)] + list(
zip(downloads_df["id"], downloads_df["intermediate_hash"])
)
else:
dataset_options = [("Uploaded dataset", None)]
dataset_selection = st.selectbox(
"Select dataset",
dataset_options,
index=0,
key=st.session_state[self.variables.dataset_selector_id_uuid],
format_func=lambda x: x[0],
)
public_id, selected_hash = dataset_selection
# For the uploaded dataset, reuse the fdp_curve already computed during
# benchmarking rather than recomputing it from the intermediate DataFrame.
plot_kwargs = {}
if public_id == "Uploaded dataset":
all_dp = st.session_state.get(self.variables.all_datapoints)
if all_dp is not None and not all_dp.empty and "fdp_curve" in all_dp.columns:
new_rows = (
all_dp[all_dp.get("old_new", pd.Series()) == "new"]
if "old_new" in all_dp.columns
else pd.DataFrame()
)
if not new_rows.empty:
candidate = new_rows.iloc[0]["fdp_curve"]
if isinstance(candidate, dict) and candidate:
plot_kwargs["fdp_curve"] = candidate
tab3_view_single_result.generate_indepth_plots(
module=self.ionmodule,
variables=self.variables,
parsesettingsbuilder=self.parsesettingsbuilder,
user_input=self.user_input,
public_id=public_id,
public_hash=selected_hash,
**plot_kwargs,
)
[docs]
def display_public_submission_ui(self) -> None:
"""
Display the public submission section of the page in Tab 5.
"""
submission_source = get_submission_source()
if not is_official_server():
st.warning(
"You are running ProteoBench locally. Submissions from local installs "
"will be labeled as 'local' and will NOT be merged into the public dataset. "
"To submit data for public inclusion, please use the official web server at "
"https://proteobench.cubimed.rub.de/"
)
try:
resolved_hash = st.session_state[self.variables.all_datapoints][
st.session_state[self.variables.all_datapoints][st.session_state["old_new"] == "new"]
]["intermediate_hash"].values[0]
if resolved_hash and dataset_folder_exists(resolved_hash):
st.error(
f":no_entry: This dataset was already submitted. A folder for hash '{resolved_hash}' exists on the server. Submission disabled.",
icon="đĢ",
)
return
except Exception:
# Fail-soft; backend will still enforce protection
pass
# Initialize Unchecked submission box variable
if self.variables.check_submission not in st.session_state:
st.session_state[self.variables.check_submission] = False
if self.variables.first_new_plot:
self.submission_ready = tab6_submit_results.generate_submission_ui_elements(
variables=self.variables,
user_input=self.user_input,
)
if self.user_input[self.variables.meta_data]:
params = tab6_submit_results.load_user_parameters(
variables=self.variables,
ionmodule=self.ionmodule,
user_input=self.user_input,
)
st.session_state[self.variables.params_file_dict] = params.__dict__
self.params_file_dict_copy = copy.deepcopy(params.__dict__)
tab6_submit_results.generate_additional_parameters_fields_submission(
variables=self.variables,
user_input=self.user_input,
)
tab6_submit_results.generate_comments_section(
variables=self.variables,
user_input=self.user_input,
)
# ? stop_duplicating is not used?
self.stop_duplicating = tab6_submit_results.generate_confirmation_checkbox(
check_submission=self.variables.check_submission
)
else:
params = None
pr_url = None
if st.session_state[self.variables.check_submission] and params is not None:
get_form_values = tab6_submit_results.get_form_values(
variables=self.variables,
)
params = ProteoBenchParameters(**get_form_values, filename=self.variables.additional_params_json)
try:
pr_url = tab6_submit_results.submit_to_repository(
variables=self.variables,
ionmodule=self.ionmodule,
user_input=self.user_input,
params_from_file=self.params_file_dict_copy,
params=params,
submission_source=submission_source,
)
except DatasetAlreadyExistsOnServerError as e:
st.error(str(e), icon="đĢ")
return
if not self.submission_ready:
return
if (
st.session_state[self.variables.check_submission]
and params is not None
and self.variables.submit in st.session_state
and pr_url is not None
):
tab6_submit_results.show_submission_success_message(
variables=self.variables,
pr_url=pr_url,
)
def _render_fdp_ratio_plot(
self, session_key: str, metric: str, colorblind_mode: bool, is_public_only: bool = False
) -> None:
"""Render the FDP / reported-FDR ratio plot from the given session-state key."""
all_data = st.session_state.get(session_key)
if all_data is None or not isinstance(all_data, pd.DataFrame):
st.info("No data loaded yet â please wait for public results to load.", icon="âšī¸")
return
st.subheader("FDP / Reported FDR by Workflow")
st.markdown(
"Ratio of empirical false discovery proportion (FDP) to the FDR threshold "
"declared for each workflow. A ratio ⤠1 means the empirical FDP does not "
"exceed the claimed FDR. Workflows are coloured by validity category: "
":green[valid] (upper bound ⤠reported FDR), "
":red[invalid] (lower bound > reported FDR), "
":orange[inconclusive] (lower bound ⤠reported FDR < upper bound). "
"Newly uploaded results are shown with larger open markers (âŦ¤)."
)
if is_public_only:
submitted_key = self.variables.all_datapoints_submitted
if submitted_key in st.session_state and isinstance(st.session_state[submitted_key], pd.DataFrame):
new_df = st.session_state[submitted_key]
if "old_new" in new_df.columns and (new_df["old_new"] == "new").any():
st.info(
"You have an uploaded result. Switch to **Tab 4 (View Public + New Results)** "
"to see it in this plot.",
icon="âšī¸",
)
if all_data.empty:
st.info("No datapoints available to plot.", icon="âšī¸")
return
if "old_new" in all_data.columns:
new_rows = all_data[all_data["old_new"] == "new"]
if not new_rows.empty and "reported_fdr_parsed_from_input" in new_rows.columns:
fdr_vals = pd.to_numeric(new_rows["reported_fdr_parsed_from_input"], errors="coerce")
missing = fdr_vals.isna() | (fdr_vals == 0)
if missing.any():
st.warning(
"Your uploaded result has no FDR threshold set â the plot uses 0.01 as a fallback. "
"To see the correct ratio, re-upload and fill in the **FDR PSM** field in the form.",
icon="â ī¸",
)
try:
fdp_ratio_fig = self.ionmodule.get_plot_generator().plot_fdp_ratio(
all_data,
metric=metric,
colorblind_mode=colorblind_mode,
)
st.plotly_chart(fdp_ratio_fig, use_container_width=True)
except Exception as e:
st.error(f"Could not render FDP ratio plot: {e}", icon="đ¨")
def _render_category_strip(self, session_key: str) -> None:
"""Render the category strip plot from the given session-state key."""
all_data = st.session_state.get(session_key)
if all_data is None or not isinstance(all_data, pd.DataFrame) or all_data.empty:
return
st.subheader("Identified Features by Validity Category")
st.markdown(
"Points are grouped into the three validity categories based on the paired FDP bounds "
"vs each workflow's declared PSM-level FDR. "
"**Point colour** indicates the software tool. "
"Within each category, points are spread horizontally for readability and sorted "
"by number of identified features."
)
try:
fig = self.ionmodule.get_plot_generator().plot_category_strip(all_data)
st.plotly_chart(fig, use_container_width=True)
except Exception as e:
st.error(f"Could not render category strip plot: {e}", icon="đ¨")
def _render_fdp_id_scatter(self, session_key: str) -> None:
"""Render the FDP/FDR ratio vs number of IDs scatter from the given session-state key."""
all_data = st.session_state.get(session_key)
if all_data is None or not isinstance(all_data, pd.DataFrame) or all_data.empty:
return
st.subheader("FDP/FDR Ratio vs Identified Features")
st.markdown(
"Each point is one submitted workflow. "
"**Colour** indicates the software tool; **shape** indicates the validity category "
"(â valid, âŗ inconclusive, â invalid). "
"The x-axis shows the ratio of the paired upper FDP bound to the declared PSM-level FDR; "
"a ratio below 1 (left of the dashed line) means the empirical FDP is within the claimed threshold."
)
try:
fig = self.ionmodule.get_plot_generator().plot_fdp_id_scatter(all_data)
st.plotly_chart(fig, use_container_width=True)
except Exception as e:
st.error(f"Could not render FDP/FDR scatter: {e}", icon="đ¨")
def _render_forest_plot(self, session_key: str) -> None:
"""Render the FDP interval forest plot from the given session-state key."""
all_data = st.session_state.get(session_key)
if all_data is None or not isinstance(all_data, pd.DataFrame) or all_data.empty:
return
st.subheader("FDP Interval Plot")
st.markdown(
"Each row is one submitted workflow. The thick horizontal bar spans the FDP "
"uncertainty interval from the **lower bound** (left) to the **paired upper bound** "
"(right); open circles mark both endpoints. "
"The diamond marker (â) shows the workflow's declared FDR threshold "
"(``reported_fdr_parsed_from_input``); a star (â
) marks newly uploaded results. "
"Bar colour indicates the validity category: "
"**green** = valid, **orange** = inconclusive, **red** = invalid."
)
sort_dir = st.radio(
"Sort by number of identified features",
["Ascending â", "Descending â"],
index=1,
horizontal=True,
key=f"forest_sort_{session_key}",
)
sort_ascending = sort_dir.startswith("Ascending")
try:
fig = self.ionmodule.get_plot_generator().plot_forest(
all_data,
sort_ascending=sort_ascending,
)
st.plotly_chart(fig, use_container_width=True)
except Exception as e:
st.error(f"Could not render forest plot: {e}", icon="đ¨")
[docs]
@st.fragment
def display_all_data_results_main(self) -> None:
"""Display the results for all data in Tab 1."""
tab1_view_public_results.initialize_main_data_points(
variables=self.variables,
ionmodule=self.ionmodule,
)
tab1_view_public_results.initialize_main_selectbox(
selectbox_id_uuid=self.variables.selectbox_id_uuid,
default_value="None",
)
with st.expander("Quick Intro", expanded=False):
st.markdown(
"**Entrapment benchmarking** assesses whether FDR control is well-calibrated in a "
"proteomics database search. A set of *entrapment* peptides â derived from organisms "
"that are absent from the sample â is added to the search database. Because these "
"sequences cannot be present in the sample, every entrapment identification is a false "
"positive by construction. Comparing the fraction of entrapment identifications to the "
"declared FDR threshold reveals whether the reported FDR is accurate. "
"The approach is described in "
"[Wen et al. (2025), *Nature Methods*]"
"(https://www.nature.com/articles/s41592-025-02719-x)."
)
st.markdown("##### FDP estimates")
st.markdown("Two bounds on the false discovery proportion (FDP) are computed from the search results:")
st.markdown("**Lower FDP bound** â fraction of identified entrapment peptides among all identifications:")
st.latex(
r"\widehat{\underline{\mathrm{FDP}}}_{\mathcal{T}\cup\mathcal{E}_{\mathcal{T}}}"
r"= \frac{N_{\mathcal{E}}}{N_{\mathcal{T}}+N_{\mathcal{E}}}"
)
st.markdown(
"**Upper FDP bound (paired method)** â additionally accounts for entrapment peptides "
"that outscore their paired target:"
)
st.latex(
r"\widehat{\mathrm{FDP}}_{\mathcal{T}\cup\mathcal{E}_{\mathcal{T}}}^{\,*}"
r"= \frac{N_{\mathcal{E}} + N_{\mathcal{E}\,\ge\,s\,>\,\mathcal{T}}"
r"+ 2\,N_{\mathcal{E}\,>\,\mathcal{T}\,\ge\,s}}{N_{\mathcal{T}}+N_{\mathcal{E}}}"
)
st.markdown(
r"$N_\mathcal{E}$: identified entrapments â "
r"$N_\mathcal{T}$: identified targets â "
r"$N_{\mathcal{E}\,\ge\,s\,>\,\mathcal{T}}$: entrapments with no identified paired target â "
r"$N_{\mathcal{E}\,>\,\mathcal{T}\,\ge\,s}$: pairs where the entrapment outscores the target."
)
st.markdown("##### Interpreting results")
st.markdown(
"The FDP interval is compared to the declared FDR threshold of the workflow:\n\n"
"- **Valid** (green bar): both FDP bounds are lower than the declared FDR threshold â "
"FDR control is valid (but overestimated).\n"
"- **Inconclusive** (orange bar): the FDP interval includes the threshold â "
"insufficient evidence to confirm or reject FDR calculation.\n"
"- **Invalid** (red bar): both FDP bounds exceed the declared threshold â "
"the reported FDR is invalid and underestimated."
)
self._render_forest_plot(session_key=self.variables.all_datapoints)
st.subheader("FDR Control and Sensitivity Overview")
st.markdown(
"Each point represents a submitted workflow. "
"The x-axis shows the **false discovery proportion (FDP)** â an empirical estimate "
"of the actual FDR computed from entrapment peptides. "
"The y-axis shows the number of identified precursor ions. "
"Workflows to the left have better-calibrated FDR control. "
"Use the options below to select the FDP metric and adjust the display."
)
# Define callbacks for plot options
def render_selectbox():
tab1_view_public_results.generate_main_selectbox(
self.variables, selectbox_id_uuid=self.variables.selectbox_id_uuid
)
# Store metric in a container to share between callbacks
metric_container = {"metric": None}
def render_metric_selector():
metric = tab1_view_public_results.display_metric_selector(self.variables)
metric_container["metric"] = metric
return metric
def render_colorblind_selector():
return tab1_view_public_results.display_colorblindmode_selector(self.variables)
# Render plot options expander and capture return values
results = self.render_plot_options_expander(
filter_callbacks=[render_selectbox],
selector_callbacks=[render_metric_selector, render_colorblind_selector],
filter_cols_spec=1,
selector_cols_spec=[1, 1],
)
# Extract returned values
metric = results[1] if len(results) > 1 else "Upper FDP bound - Paired method"
colorblind_mode = results[2] if len(results) > 2 else False
tab1_view_public_results.display_existing_results(
variables=self.variables,
ionmodule=self.ionmodule,
plot_params={
"metric": metric,
"colorblind_mode": colorblind_mode,
"label": st.session_state.get(st.session_state.get(self.variables.selectbox_id_uuid, ""), "None"),
"alpha_warning": getattr(self.variables, "alpha_warning", False),
"beta_warning": getattr(self.variables, "beta_warning", False),
},
)
with st.expander("Show more"):
self._render_fdp_id_scatter(session_key=self.variables.all_datapoints)
self._render_fdp_ratio_plot(
session_key=self.variables.all_datapoints,
metric=metric,
colorblind_mode=colorblind_mode,
is_public_only=True,
)
self._render_category_strip(session_key=self.variables.all_datapoints)
[docs]
@st.fragment
def display_all_data_results_submitted(self) -> None:
"""Display the results for all data in Tab 4."""
st.title("Results (All Data)")
tab4_view_public_and_new_results.initialize_submitted_data_points(
variables=self.variables,
ionmodule=self.ionmodule,
)
# Initialize plot options controls (same as tab 1)
tab1_view_public_results.initialize_main_selectbox(
selectbox_id_uuid=self.variables.selectbox_id_submitted_uuid,
default_value="None",
)
self._render_forest_plot(session_key=self.variables.all_datapoints_submitted)
st.subheader("FDR Control and Sensitivity Overview")
st.markdown(
"Each point represents a submitted workflow. "
"The x-axis shows the **false discovery proportion (FDP)** â an empirical estimate "
"of the actual FDR computed from entrapment peptides. "
"The y-axis shows the number of identified precursor ions. "
"Workflows to the left have better-calibrated FDR control. "
"Use the options below to select the FDP metric and adjust the display."
)
# Define callbacks for plot options
def render_selectbox():
tab1_view_public_results.generate_main_selectbox(
self.variables,
selectbox_id_uuid=self.variables.selectbox_id_submitted_uuid,
)
# Store metric in a container to share between callbacks
metric_container = {"metric": None}
def render_metric_selector():
key = self.variables.metric_selector_submitted_uuid
if key not in st.session_state:
st.session_state[key] = uuid.uuid4()
metric_uuid = st.session_state[key]
help_text = (
getattr(self.variables.texts.Help, "radio_metric", None) if hasattr(self.variables, "texts") else None
)
metric = st.radio(
"Select metric to show in x axis",
["Lower FDP bound", "Upper FDP bound - Paired method"],
help=help_text,
horizontal=True,
key=metric_uuid,
)
metric_container["metric"] = metric
return metric
def render_colorblind_selector():
return tab1_view_public_results.display_colorblindmode_selector(self.variables, use_submitted=True)
# Render plot options expander and capture return values
results = self.render_plot_options_expander(
filter_callbacks=[render_selectbox],
selector_callbacks=[render_metric_selector, render_colorblind_selector],
filter_cols_spec=1,
selector_cols_spec=[1, 1],
)
# Extract returned values
metric = results[1] if len(results) > 1 else "Upper FDP bound - Paired method"
colorblind_mode = results[2] if len(results) > 2 else False
# Get current selections from session state
label = st.session_state.get(st.session_state.get(self.variables.selectbox_id_submitted_uuid, ""), "None")
tab4_view_public_and_new_results.display_submitted_results(
variables=self.variables,
ionmodule=self.ionmodule,
plot_params={
"metric": metric,
"colorblind_mode": colorblind_mode,
"label": label,
"alpha_warning": getattr(self.variables, "alpha_warning", False),
"beta_warning": getattr(self.variables, "beta_warning", False),
},
)
with st.expander("Show more"):
self._render_fdp_id_scatter(session_key=self.variables.all_datapoints_submitted)
self._render_fdp_ratio_plot(
session_key=self.variables.all_datapoints_submitted,
metric=metric,
colorblind_mode=colorblind_mode,
)
self._render_category_strip(session_key=self.variables.all_datapoints_submitted)