Source code for webinterface.pages.base_pages.denovo

"""Streamlit-based web interface for ProteoBench."""

import copy
import glob
import json
import logging
import os
import tempfile
import uuid
import zipfile
from datetime import datetime
from pprint import pformat
from typing import Any, Dict, Optional

import pages.texts.proteobench_builder as pbb
import pandas as pd
import plotly.graph_objects as go
import streamlit as st
import streamlit_utils
from pages.pages_variables.DeNovo.DDA_HCD_variables import VariablesDDADeNovo
from streamlit_extras.let_it_rain import rain

from proteobench.exceptions import DatasetAlreadyExistsOnServerError
from proteobench.github.gh import get_submission_source, is_official_server
from proteobench.io.params import ProteoBenchParameters
from proteobench.io.parsing.parse_settings import ParseSettingsBuilder
from proteobench.io.parsing.utils import add_maxquant_fixed_modifications
from proteobench.modules.denovo.denovo_DDA_HCD import DDAHCDDeNovoModule as IonModule
from proteobench.utils.server_io import dataset_folder_exists

from .base import BaseUIModule
from .tabs import tab1_view_public_results as tab1
from .tabs import tab2_upload_results as tab2
from .tabs import tab2_upload_results as tab2_quant
from .tabs import tab4_view_public_and_new_results as tab4
from .tabs import tab5_compare_results
from .tabs import tab6_submit_results as tab5_quant

logger: logging.Logger = logging.getLogger(__name__)


[docs] class DeNovoUIObjects(BaseUIModule): """ Main class for the Streamlit interface of ProteoBench de novo identification. This class handles the creation of the Streamlit UI elements, including the main page layout, input forms, results display, and data submission elements. Parameters ---------- variables : VariablesDDAQuant The variables for the quantification module. ionmodule : IonModule The quantification module. parsesettingsbuilder : ParseSettingsBuilder The parse settings builder. """ def __init__( self, variables: VariablesDDADeNovo, ionmodule: IonModule, parsesettingsbuilder: ParseSettingsBuilder, page_name: str = "/", ) -> None: """ Initialize the Streamlit UI objects for the de novo modules. Parameters ---------- variables : VariablesDDADeNovo The variables for the de novo module. ionmodule : IonModule The de novo module. parsesettingsbuilder : ParseSettingsBuilder The parse settings builder. """ super().__init__( variables=variables, ionmodule=ionmodule, parsesettingsbuilder=parsesettingsbuilder, page_name=page_name ) # Specific to the 'de novo' module. self.level_mapping = {"Precision": "precision", "Recall": "recall"} self.level_mapping_submitted = {"Precision": "precision", "Recall": "recall"} self.evaluation_type_mapping = {"Exact": "exact", "Mass-based": "mass"}
[docs] @st.fragment def display_all_data_results_main(self): """Display the results for all data in Tab 1.""" st.title("Results (All Data)") # Initialize selectbox tab1.initialize_main_selectbox( selectbox_id_uuid=self.variables.selectbox_id_uuid, default_value="None", ) # Radio for level (Precision or Recall) tab1.initialize_radio( radio_id_uuid=self.variables.radio_level_id_uuid, default_value=self.variables.default_level ) # Radio for evaluation type (Exact or Mass-Based) tab1.initialize_radio( radio_id_uuid=self.variables.radio_evaluation_id_uuid, default_value=self.variables.default_evaluation ) # Define callbacks for plot options def render_selectbox(): tab1.generate_main_selectbox(self.variables, selectbox_id_uuid=self.variables.selectbox_id_uuid) def render_level_radio(): tab1.generate_main_radio( radio_id_uuid=self.variables.radio_level_id_uuid, description="Select the classification metric", options=["Precision", "Recall"], help=self.variables.texts.Help.radio_level, ) def render_evaluation_radio(): tab1.generate_main_radio( radio_id_uuid=self.variables.radio_evaluation_id_uuid, description="Select the stringency of evaluation", options=["Exact", "Mass-based"], help=self.variables.texts.Help.radio_evaluation, ) def render_colorblind_selector(): return tab1.display_colorblindmode_selector(self.variables) # Render plot options expander results = self.render_plot_options_expander( filter_callbacks=[render_selectbox], selector_callbacks=[render_level_radio, render_evaluation_radio, render_colorblind_selector], filter_cols_spec=1, selector_cols_spec=[1, 1, 1, 1], ) # Extract colorblind mode from results colorblind_mode = results[3] if len(results) > 3 else False tab1.display_existing_results( variables=self.variables, ionmodule=self.ionmodule, plot_params={ "label": st.session_state.get(st.session_state.get(self.variables.selectbox_id_uuid, ""), "None"), "level": self.level_mapping[ st.session_state.get(st.session_state.get(self.variables.radio_level_id_uuid, ""), "Precision") ], "evaluation_type": self.evaluation_type_mapping[ st.session_state.get(st.session_state.get(self.variables.radio_evaluation_id_uuid, ""), "Exact") ], "colorblind_mode": colorblind_mode, "alpha_warning": getattr(self.variables, "alpha_warning", False), "beta_warning": getattr(self.variables, "beta_warning", False), }, use_slider=False, )
[docs] def display_submission_form(self) -> None: """Create the main submission form for the Streamlit UI in Tab 2.""" # Display software selector and AlphaDIA info outside the form so it updates immediately tab2_quant.show_software_selector_and_alphadia_info( variables=self.variables, parsesettingsbuilder=self.parsesettingsbuilder, user_input=self.user_input, ) with st.form(key="main_form"): tab2_quant.generate_input_fields( user_input=self.user_input, ) # TODO: Investigate the necessity of generating additional parameters fields in the first tab. tab2_quant.generate_additional_parameters_fields( variables=self.variables, user_input=self.user_input, ) text = self.variables.texts.ShortMessages.run_instructions st.markdown(text) submit_button = st.form_submit_button( "Parse and bench", help=self.variables.texts.Help.parse_button, ) if submit_button: # Clear any previously uploaded parameter file and widget state so Tab 5 # starts fresh for the new tool/file combination. st.session_state[self.variables.params_file_dict] = {} # Erase the old file uploader's stored file before cycling the UUID, # so the uploader cannot resurrect the old file even if the key persists. _old_meta_uuid = st.session_state.get(self.variables.meta_file_uploader_uuid) if _old_meta_uuid is not None: st.session_state.pop(_old_meta_uuid, None) st.session_state[self.variables.meta_file_uploader_uuid] = uuid.uuid4() with open(self.variables.additional_params_json, encoding="utf-8") as f: _param_config = json.load(f) for _key in _param_config.keys(): st.session_state.pop(self.variables.prefix_params + _key, None) st.info( "Calculating metrics for {}. This will take around two minutes. Please be patient.".format( self.user_input["input_format"] ) ) self.first_point_plotted = tab2.process_submission_form( variables=self.variables, ionmodule=self.ionmodule, user_input=self.user_input, )
# Almost entirely unique to denovo module
[docs] def display_indepth_plots(self) -> None: """ Display the dataset selection dropdown and plot the selected dataset (Tab 3). """ if self.variables.all_datapoints_submitted not in st.session_state: tab2.initialize_main_data_points(variables=self.variables, ionmodule=self.ionmodule) st.session_state[self.variables.all_datapoints_submitted] = self.ionmodule.obtain_all_data_points( all_datapoints=st.session_state[self.variables.all_datapoints] ) if self.variables.all_datapoints_submitted not in st.session_state.keys(): st.error("No data available for plotting.", icon="🚨") return if st.session_state[self.variables.all_datapoints_submitted].empty: st.error("No data available for plotting.", icon="🚨") return downloads_df = st.session_state[self.variables.all_datapoints_submitted][["id", "intermediate_hash"]] downloads_df.set_index("intermediate_hash", drop=False, inplace=True) if self.variables.placeholder_dataset_selection_container not in st.session_state.keys(): st.session_state[self.variables.placeholder_dataset_selection_container] = st.empty() st.session_state[self.variables.dataset_selector_id_uuid] = uuid.uuid4() st.subheader("Select dataset to plot") dataset_options = [("Uploaded dataset", None)] + list( zip(downloads_df["id"], downloads_df["intermediate_hash"]) ) dataset_selection = st.multiselect( label="Select datasets", options=dataset_options, key=st.session_state[self.variables.dataset_selector_id_uuid], format_func=lambda x: x[0], default=[dataset_options[0]], help=self.variables.texts.Help.dataset_selection_indepth, ) # Use default values for plot rendering (no user controls on this tab) levels = ["precision", "recall"] evaluation_types = ["exact", "mass"] colorblind_mode = False modifications = [ "M-Oxidation", "Q-Deamidation", "N-Deamidation", "N-term Acetylation", "N-term Carbamylation", "N-term Ammonia-loss", ] feature_names = ["Missing Fragmentation Sites", "Peptide Length", "% Explained Intensity"] # Handle dataset selection - separate uploaded data from public data all_datapoints_df = st.session_state[self.variables.all_datapoints_submitted] selected_dtps = pd.DataFrame() for dtp_id, dtp_hash in dataset_selection: if dtp_hash is None: # "Uploaded dataset" case # Get the newly uploaded data (marked as "new") uploaded_data = all_datapoints_df[all_datapoints_df["old_new"] == "new"] selected_dtps = pd.concat([selected_dtps, uploaded_data], ignore_index=True) else: # Get public dataset by hash public_data = all_datapoints_df[all_datapoints_df["intermediate_hash"] == dtp_hash] selected_dtps = pd.concat([selected_dtps, public_data], ignore_index=True) # Generate in-depth plots using plot generator if not selected_dtps.empty: plot_generator = self.ionmodule.get_plot_generator() # Create kwargs with De Novo-specific parameters (now using user selections) plot_kwargs = { "mod_labels": modifications, "feature": feature_names, "level": levels, "evaluation_type": evaluation_types, "colorblind_mode": colorblind_mode, } try: # Generate all plots plots = plot_generator.generate_in_depth_plots(selected_dtps, **plot_kwargs) # Display plots using layout from plot generator layout = plot_generator.get_in_depth_plot_layout() descriptions = plot_generator.get_in_depth_plot_descriptions() for section in layout: st.subheader(section.get("title", "")) for idx, plot_name in enumerate(section["plots"]): if plot_name in plots: if plot_name in descriptions: st.caption(descriptions[plot_name]) self._display_indepth_plot(plot_name=plot_name, figs=plots[plot_name]) # st.plotly_chart(plots[plot_name], use_container_width=True) except Exception as e: st.error(f"Error generating in-depth plots: {e}", icon="🚨") import traceback with st.expander("Error details"): st.code(traceback.format_exc()) else: st.info("No datasets selected for plotting.")
def _display_ptm_overview(self, figs) -> None: # Overview PTM plot with st.expander("Description"): st.markdown(self.variables.texts.Description.ptm_overview) st.plotly_chart(figs, use_container_width=True) def _display_ptm_specific(self, figs) -> None: # Specific PTM plots with st.expander("Description"): st.markdown(self.variables.texts.Description.ptm_specific) modification_labels = list(figs.keys()) tabs = st.tabs(modification_labels) tab_dict = {mod_label: tab for mod_label, tab in zip(modification_labels, tabs)} for mod_label, tab in tab_dict.items(): with tab: st.header(mod_label) st.plotly_chart( figs[mod_label], key=f"ptm_plot_{mod_label}", use_container_width=True, ) def _display_spectrum_features(self, figs) -> None: feature_names = list(figs.keys()) exact_mode = st.toggle( label="Exact evaluation mode", value=False, key=self.variables.evaluation_mode_toggle_tab3_features ) if exact_mode: evaluation_type = "exact" else: evaluation_type = "mass" with st.expander("Description"): st.markdown(self.variables.texts.Description.spectrum_features_overview) tabs = st.tabs(feature_names) tab_dict = {feature_name: tab for feature_name, tab in zip(feature_names, tabs)} for feature_name, tab in tab_dict.items(): with tab: st.header(feature_name) st.plotly_chart(figs[feature_name][evaluation_type], use_container_width=True) def _display_species_overview(self, figs) -> None: with st.expander("Description"): st.markdown(self.variables.texts.Description.species) exact_mode = st.toggle( label="Exact evaluation mode", value=False, key=self.variables.evaluation_mode_toggle_tab3_species ) if exact_mode: evaluation_type = "exact" else: evaluation_type = "mass" st.plotly_chart(figs[evaluation_type], use_container_width=True, key=self.variables.fig_species_overview) def _display_indepth_plot(self, plot_name: str, figs) -> None: if plot_name == "ptm_overview": self._display_ptm_overview(figs) elif plot_name == "ptm_specific": self._display_ptm_specific(figs) elif plot_name == "spectrum_feature": self._display_spectrum_features(figs) elif plot_name == "species_overview": self._display_species_overview(figs) else: raise Exception("Cannot display non-implemented in-depth plot.")
[docs] @st.fragment def display_all_data_results_submitted(self) -> None: """Display the results for all data in Tab 4.""" st.title("Results (All Data)") # Initialize selectbox tab1.initialize_main_selectbox( selectbox_id_uuid=self.variables.selectbox_id_submitted_uuid, default_value="None", ) # Radio one for precision or recall tab1.initialize_radio( radio_id_uuid=self.variables.radio_level_id_submitted_uuid, default_value=self.variables.default_level ) # Radio two for evaluation stringency tab1.initialize_radio( radio_id_uuid=self.variables.radio_evaluation_id_submitted_uuid, default_value=self.variables.default_evaluation, ) # Define callbacks for plot options def render_selectbox(): tab1.generate_main_selectbox( variables=self.variables, selectbox_id_uuid=self.variables.selectbox_id_submitted_uuid ) def render_level_radio(): tab1.generate_main_radio( radio_id_uuid=self.variables.radio_level_id_submitted_uuid, description="Select the classification metric", options=["Precision", "Recall"], help=self.variables.texts.Help.radio_level, ) def render_evaluation_radio(): tab1.generate_main_radio( radio_id_uuid=self.variables.radio_evaluation_id_submitted_uuid, description="Select the stringency of evaluation", options=["Exact", "Mass-based"], help=self.variables.texts.Help.radio_evaluation, ) def render_colorblind_selector(): return tab1.display_colorblindmode_selector(self.variables, use_submitted=True) # Render plot options expander results = self.render_plot_options_expander( filter_callbacks=[render_selectbox], selector_callbacks=[render_level_radio, render_evaluation_radio, render_colorblind_selector], filter_cols_spec=1, selector_cols_spec=[1, 1, 1, 1], ) # Extract colorblind mode from results colorblind_mode = results[3] if len(results) > 3 else False # Get current selections from session state label = st.session_state.get(st.session_state.get(self.variables.selectbox_id_submitted_uuid, ""), "None") level = self.level_mapping[ st.session_state.get(st.session_state.get(self.variables.radio_level_id_submitted_uuid, ""), "Precision") ] evaluation_type = self.evaluation_type_mapping[ st.session_state.get(st.session_state.get(self.variables.radio_evaluation_id_submitted_uuid, ""), "Exact") ] # Plot the datapoints tab4.display_submitted_results( self.variables, self.ionmodule, plot_params={ "label": label, "level": level, "evaluation_type": evaluation_type, "colorblind_mode": colorblind_mode, }, ) st.session_state[self.variables.table_id_uuid] = uuid.uuid4() st.data_editor( st.session_state[self.variables.all_datapoints_submitted], key=st.session_state[self.variables.table_id_uuid], on_change=self._handle_submitted_table_edits, ) st.title("Public submission") st.markdown( "If you want to make this point β€” and the associated data β€” publicly available, please go to β€œPublic Submission" )
[docs] def display_workflow_comparison(self) -> None: """Display the workflow comparison tab.""" tab5_compare_results.display_workflow_comparison( variables=self.variables, ionmodule=self.ionmodule, )
[docs] def display_public_submission_ui(self) -> None: """Display the public submission section of the page in Tab 5.""" submission_source = get_submission_source() if not is_official_server(): st.warning( "You are running ProteoBench locally. Submissions from local installs " "will be labeled as 'local' and will NOT be merged into the public dataset. " "To submit data for public inclusion, please use the official web server at " "https://proteobench.cubimed.rub.de/" ) try: resolved_hash = st.session_state[self.variables.all_datapoints][ st.session_state[self.variables.all_datapoints][st.session_state["old_new"] == "new"] ]["intermediate_hash"].values[0] if resolved_hash and dataset_folder_exists(resolved_hash): st.error( f":no_entry: This dataset was already submitted. A folder for hash '{resolved_hash}' exists on the server. Submission disabled.", icon="🚫", ) return except Exception: # Fail-soft; backend will still enforce protection pass # Initialize Unchecked submission box variable if self.variables.check_submission not in st.session_state: st.session_state[self.variables.check_submission] = False self.submission_ready = tab5_quant.generate_submission_ui_elements( variables=self.variables, user_input=self.user_input, ) # Parse parameter file if uploaded so parsed values pre-populate the fields below. # If no file is provided the fields render with schema defaults for manual entry. if self.user_input[self.variables.meta_data]: params_from_file = tab5_quant.load_user_parameters( variables=self.variables, ionmodule=self.ionmodule, user_input=self.user_input, ) if params_from_file is not None: st.session_state[self.variables.params_file_dict] = params_from_file.__dict__ self.params_file_dict_copy = copy.deepcopy(params_from_file.__dict__) # Directly update widget session state keys so Streamlit uses the parsed values. # Without this, Streamlit ignores the `value` arg on widgets whose keys already exist # (registered from the pre-upload render of the always-visible fields). # Values must be sanitized: ProteoBenchParameters stores np.nan for missing fields, # which Streamlit cannot assign to a protobuf string field. for key, val in params_from_file.__dict__.items(): try: is_missing = pd.isna(val) except (TypeError, ValueError): is_missing = False if is_missing: sanitized = None elif not isinstance(val, str): sanitized = str(val) else: sanitized = val st.session_state[self.variables.prefix_params + key] = sanitized else: self.params_file_dict_copy = {} else: self.params_file_dict_copy = {} # Always override software_name with the active input_format. This must come after # the parameter file re-application above, which does a full dict replacement and # would otherwise overwrite this value. st.session_state[self.variables.params_file_dict]["software_name"] = self.user_input["input_format"] # Explicitly write every parameter widget's session state to the desired value # (YAML-parsed value if available, otherwise JSON default for the current tool). # This is necessary because generate_additional_parameters_fields_submission contains # `on_change=func(args)` β€” Python evaluates these arguments immediately on every render, # reading stale browser-sent session state and rewriting params_file_dict with old values. # By explicitly owning the session state keys here, before the widgets render, we # prevent any browser-side stale value from slipping through. with open(self.variables.additional_params_json, encoding="utf-8") as _schema_f: _param_schema = json.load(_schema_f) _file_dict = st.session_state.get(self.variables.params_file_dict, {}) for _field_key, _field_schema in _param_schema.items(): if _field_key in _file_dict: _val = _file_dict[_field_key] # Sanitize: params_from_file.__dict__ may contain np.nan for missing fields. # np.nan is a float and cannot be serialized by Streamlit's protobuf for # text_input; convert to "" (empty). Non-string non-missing values are # stringified to match what st.text_input expects. try: _is_missing = pd.isna(_val) except (TypeError, ValueError): _is_missing = False if _is_missing: _val = "" elif not isinstance(_val, str): _val = str(_val) st.session_state[self.variables.prefix_params + _field_key] = _val else: _default = _field_schema.get("value", {}).get(self.user_input.get("input_format", ""), None) st.session_state[self.variables.prefix_params + _field_key] = _default if _default is not None else "" # Always show parameter fields, comments, and confirmation checkbox. tab5_quant.generate_additional_parameters_fields_submission( variables=self.variables, user_input=self.user_input, ) tab5_quant.generate_comments_section( variables=self.variables, user_input=self.user_input, ) # ? stop_duplicating is not used? self.stop_duplicating = tab5_quant.generate_confirmation_checkbox( check_submission=self.variables.check_submission ) params = None pr_url = None if st.session_state[self.variables.check_submission]: get_form_values = tab5_quant.get_form_values( variables=self.variables, ) params = ProteoBenchParameters(**get_form_values, filename=self.variables.additional_params_json) try: pr_url = tab5_quant.submit_to_repository( variables=self.variables, ionmodule=self.ionmodule, user_input=self.user_input, params_from_file=self.params_file_dict_copy, params=params, submission_source=submission_source, ) except DatasetAlreadyExistsOnServerError as e: st.error(str(e), icon="🚫") return if not self.submission_ready: return if ( st.session_state[self.variables.check_submission] and params is not None and self.variables.submit in st.session_state and pr_url is not None ): tab5_quant.show_submission_success_message( variables=self.variables, pr_url=pr_url, )
##################### ### TAB 4 METHODS ### ##################### def _handle_submitted_table_edits(self) -> None: """Callback function for handling edits made to the data table in the UI.""" edits = st.session_state[st.session_state[self.variables.table_id_uuid]]["edited_rows"].items() for k, v in edits: try: st.session_state[self.variables.all_datapoints_submitted][list(v.keys())[0]].iloc[k] = list(v.values())[ 0 ] except TypeError: return st.session_state[self.variables.highlight_list_submitted] = list( st.session_state[self.variables.all_datapoints_submitted]["Highlight"] ) st.session_state[self.variables.placeholder_table] = st.session_state[self.variables.all_datapoints_submitted] if len(st.session_state[self.variables.all_datapoints]) == 0: st.error("No datapoints available for plotting", icon="🚨") try: # Get plot generator from module (following Quant pattern) plot_generator = self.ionmodule.get_plot_generator() # Get colorblind mode from session state colorblind_key = self.variables.colorblind_mode_selector_uuid if colorblind_key in st.session_state: colorblind_mode_id = st.session_state[colorblind_key] colorblind_mode = st.session_state.get(colorblind_mode_id, False) else: colorblind_mode = False fig_metric = plot_generator.plot_main_metric( result_df=st.session_state[self.variables.all_datapoints], hide_annot=False, label=st.session_state[st.session_state[self.variables.selectbox_id_uuid]], level=self.level_mapping[st.session_state[st.session_state[self.variables.radio_level_id_uuid]]], evaluation_type=self.evaluation_type_mapping[ st.session_state[st.session_state[self.variables.radio_evaluation_id_uuid]] ], colorblind_mode=colorblind_mode, ) except Exception as e: st.error(f"Unable to plot the datapoints: {e}", icon="🚨") st.session_state[self.variables.fig_metric] = fig_metric