Source code for webinterface.pages.base_pages.tab1_results

import os
import uuid
from typing import Callable

import streamlit as st
import pandas as pd
import re

from proteobench.plotting.plot_quant import PlotDataPoint
from st_aggrid import GridOptionsBuilder, AgGrid, JsCode

from .filter import filter_data_using_slider

# === Table Color Constants ===
COLOR_IDENTIFIER = "#F0F2F6"
COLOR_PARAMETER = "#FFFFFF"
COLOR_RESULT = "#F0F2F6"
COLOR_TECHNICAL = "#FFFFFF"
COLOR_ADDITIONAL = "#F0F2F6"


[docs] def initialize_main_slider(slider_id_uuid: str, default_val_slider: float) -> None: """ Initialize the slider for the main data. We use a slider uuid and associate a defalut value with it. - self.variables_quant.slider_id_uuid - self.variables_quant.default_val_slider """ key = slider_id_uuid if key not in st.session_state.keys(): st.session_state[key] = uuid.uuid4() _id_of_key = st.session_state[key] if _id_of_key not in st.session_state.keys(): st.session_state[_id_of_key] = default_val_slider
[docs] def generate_main_slider(slider_id_uuid: str, description_slider_md: str, default_val_slider: float) -> None: """ Create a slider input. """ # key for slider_uuid in session state if slider_id_uuid not in st.session_state: st.session_state[slider_id_uuid] = uuid.uuid4() slider_key = st.session_state[slider_id_uuid] fpath = description_slider_md st.markdown(open(fpath, "r").read()) default_value = st.session_state.get(slider_key, default_val_slider) st.select_slider( label="Minimal precursor quantifications (# samples)", options=[1, 2, 3, 4, 5, 6], value=default_value, key=slider_key, )
[docs] def generate_main_selectbox(variables_quant, selectbox_id_uuid) -> None: """ Create the selectbox for the Streamlit UI. """ if selectbox_id_uuid not in st.session_state.keys(): st.session_state[selectbox_id_uuid] = uuid.uuid4() try: # TODO: Other labels based on different modules, e.g. mass tolerances are less relevant for DIA st.selectbox( "Select label to plot", variables_quant.metric_plot_labels, key=st.session_state[selectbox_id_uuid], ) except Exception as e: st.error(f"Unable to create the selectbox: {e}", icon="🚨")
[docs] def display_download_section(variables_quant, reset_uuid=False) -> None: """ Render the selector and area for raw data download. Parameters ---------- reset_uuid : bool, optional Whether to reset the UUID, by default False. """ if len(st.session_state[variables_quant.all_datapoints]) == 0: st.error("No data available for download.", icon="🚨") return downloads_df = st.session_state[variables_quant.all_datapoints][["id", "intermediate_hash"]] downloads_df.set_index("intermediate_hash", drop=False, inplace=True) if variables_quant.placeholder_downloads_container not in st.session_state.keys() or reset_uuid: st.session_state[variables_quant.placeholder_downloads_container] = st.empty() st.session_state[variables_quant.download_selector_id_uuid] = uuid.uuid4() # with st.session_state[variables_quant.placeholder_downloads_container].container(border=True): st.subheader("Download raw datasets") # Sort the intermediate_hash values and get the corresponding ids sorted_indices = sorted(range(len(downloads_df["id"])), key=lambda i: downloads_df["id"].iloc[i]) sorted_intermediate_hash = [downloads_df["intermediate_hash"].iloc[i] for i in sorted_indices] sorted_ids = [downloads_df["id"].iloc[i] for i in sorted_indices] st.selectbox( "Select dataset", sorted_intermediate_hash, index=None, key=st.session_state[variables_quant.download_selector_id_uuid], format_func=lambda x: sorted_ids[sorted_intermediate_hash.index(x)], ) if ( st.session_state[st.session_state[variables_quant.download_selector_id_uuid]] is not None and st.secrets["storage"]["dir"] is not None ): dataset_path = ( st.secrets["storage"]["dir"] + "/" + st.session_state[st.session_state[variables_quant.download_selector_id_uuid]] ) if os.path.isdir(dataset_path): files = os.listdir(dataset_path) for file_name in files: path_to_file = dataset_path + "/" + file_name with open(path_to_file, "rb") as file: st.download_button(file_name, file, file_name=file_name) else: st.write( "Directory for this dataset does not exist, this should not happen" " on the server, but is expected in the local development environment." )
[docs] def display_existing_results(variables_quant, ionmodule) -> None: """ Orchestrates the full display of quantification results in Streamlit, including plotting and interactive tabular output with styling. Parameters ---------- variables_quant : object Object containing quantification variables including data points, slider/selectbox UUIDs, and configuration flags. ionmodule : object Module responsible for filtering and transforming ion data. """ initialize_and_filter_data(variables_quant, ionmodule) data_points_filtered = variables_quant.filtered_data metric = display_metric_selector() highlight_point_id = render_metric_plot(data_points_filtered, variables_quant, metric) df_display = prepare_display_dataframe(data_points_filtered, highlight_point_id) grid_options = configure_aggrid(df_display) render_aggrid(df_display, grid_options) offer_download(df_display) display_download_section(variables_quant=variables_quant)
# === Modular Functions ===
[docs] def initialize_and_filter_data(variables_quant, ionmodule): initialize_main_data_points( all_datapoints=variables_quant.all_datapoints, obtain_all_data_points=ionmodule.obtain_all_data_points, ) variables_quant.filtered_data = filter_data_using_slider( slider_id_uuid=variables_quant.slider_id_uuid, all_datapoints=variables_quant.all_datapoints, filter_data_point=ionmodule.filter_data_point, )
[docs] def display_metric_selector() -> str: return st.radio( "Select metric to plot", options=["Median", "Mean"], help="Toggle between median and mean absolute difference metrics.", )
[docs] def render_metric_plot(data: pd.DataFrame, variables_quant, metric: str) -> str | None: """ Displays the metric plot and returns the ProteoBench ID of the selected point (if any). Parameters ---------- data : pd.DataFrame The filtered dataset to plot. variables_quant : object Contains session state and selectbox identifier. metric : str Metric to plot ("Median" or "Mean"). Returns ------- str or None ProteoBench ID of the selected data point, if any. """ if len(data) == 0: st.error("No datapoints available for plotting", icon="🚨") return None highlight_point_id = None try: fig_metric = PlotDataPoint.plot_metric( data, label=st.session_state[st.session_state[variables_quant.selectbox_id_uuid]], metric=metric, ) event_dict = st.plotly_chart( fig_metric, use_container_width=True, on_select="rerun", selection_mode="points", ) selected_point = ( event_dict["selection"]["points"][0] if "selection" in event_dict and "points" in event_dict["selection"] and event_dict["selection"]["points"] else None ) if selected_point: hover = selected_point.get("hovertext", "") match = re.search(r"ProteoBench ID: ([^\s<]+)", hover) if match: highlight_point_id = match.group(1) except Exception as e: st.error(f"Unable to plot the datapoints: {e}", icon="🚨") return highlight_point_id
[docs] def prepare_display_dataframe(df: pd.DataFrame, highlight_id: str | None) -> pd.DataFrame: """ Prepares the DataFrame for display, including column filtering, ordering, row highlighting, and numeric formatting. Parameters ---------- df : pd.DataFrame The filtered dataset for display. highlight_id : str or None The ProteoBench ID to highlight (adds a marker in the 'selected' column). Returns ------- pd.DataFrame A formatted and sorted DataFrame ready for rendering. """ df = df.copy() df["selected"] = df["id"].apply(lambda x: "➡️" if x == highlight_id else "") identifier_cols = ["selected", "id"] parameter_cols = [ "software_name", "software_version", "search_engine", "search_engine_version", "ident_fdr_psm", "ident_fdr_protein", "ident_fdr_peptide", "enable_match_between_runs", "precursor_mass_tolerance", "fragment_mass_tolerance", "enzyme", "allowed_miscleavages", "min_peptide_length", "max_peptide_length", "fixed_mods", "variable_mods", "max_mods", "min_precursor_charge", "max_precursor_charge", "quantification_method", "protein_inference", "abundance_normalization_ions", "submission_comments", ] result_cols = ["median_abs_epsilon", "mean_abs_epsilon", "nr_prec", "results"] technical_cols = [ "proteobench_version", "intermediate_hash", "hover_text", "color", "old_new", "is_temporary", "comments", "scatter_size", ] # Define display column order cols = identifier_cols + parameter_cols + result_cols + technical_cols cols = [col for col in cols if col in df.columns] additional_cols = [col for col in df.columns if col not in cols] # remove boring columns cols = [ col for col in cols if col not in ["comments", "scatter_size", "old_new", "is_temporary", "color", "hover_text"] ] df = df[cols + additional_cols] # Clean up values df["results"] = df["results"].apply(str) numeric_cols = df.select_dtypes(include=["float64", "int64"]).columns df[numeric_cols] = df[numeric_cols].round(3) df.sort_values(by="id", inplace=True) return df
[docs] def configure_aggrid(df: pd.DataFrame): """ Configures the styling and options for AgGrid based on column category. Parameters ---------- df : pd.DataFrame The display-ready DataFrame. Returns ------- dict AgGrid gridOptions dictionary. """ gb = GridOptionsBuilder.from_dataframe(df) identifier_cols = ["selected", "id"] parameter_cols = [ "software_name", "software_version", "search_engine", "search_engine_version", "ident_fdr_psm", "ident_fdr_protein", "ident_fdr_peptide", "enable_match_between_runs", "precursor_mass_tolerance", "fragment_mass_tolerance", "enzyme", "allowed_miscleavages", "min_peptide_length", "max_peptide_length", "fixed_mods", "variable_mods", "max_mods", "min_precursor_charge", "max_precursor_charge", "quantification_method", "protein_inference", "abundance_normalization_ions", "submission_comments", ] result_cols = ["median_abs_epsilon", "mean_abs_epsilon", "nr_prec", "results"] technical_cols = [ "proteobench_version", "intermediate_hash", "hover_text", "color", "old_new", "is_temporary", "comments", "scatter_size", ] for col in df.columns: if col in identifier_cols: gb.configure_column(col, cellStyle=get_style_js(COLOR_IDENTIFIER)) elif col in parameter_cols: gb.configure_column(col, cellStyle=get_style_js(COLOR_PARAMETER)) elif col in result_cols: gb.configure_column(col, cellStyle=get_style_js(COLOR_RESULT)) elif col in technical_cols: gb.configure_column(col, cellStyle=get_style_js(COLOR_TECHNICAL)) else: gb.configure_column(col, cellStyle=get_style_js(COLOR_ADDITIONAL)) return gb.build()
[docs] def render_aggrid(df: pd.DataFrame, grid_options): AgGrid( df, gridOptions=grid_options, theme="alpine", fit_columns_on_grid_load=False, height=600, allow_unsafe_jscode=True, )
[docs] def offer_download(df: pd.DataFrame, filename: str = "quantification_results.csv") -> None: """ Adds a download button to export the displayed DataFrame as a CSV file. Parameters ---------- df : pd.DataFrame The DataFrame to be downloaded. filename : str, optional The name of the file to download, by default "quantification_results.csv". """ csv_data = df.to_csv(index=False).encode("utf-8") st.download_button(label="📥 Download table as CSV", data=csv_data, file_name=filename, mime="text/csv")
[docs] def get_style_js(bg_color: str) -> JsCode: """ Generates JavaScript for styling cells with a background color. Parameters ---------- bg_color : str Hex color string to use as the background. Returns ------- JsCode A JavaScript code block that defines the style. """ return JsCode( f""" function(params) {{ return {{ 'backgroundColor': '{bg_color}', 'color': 'black', 'fontWeight': 'normal' }} }} """ )
[docs] def initialize_main_data_points(all_datapoints: str, obtain_all_data_points: Callable) -> None: """ Initialize the all_datapoints variable in the session state. """ if all_datapoints not in st.session_state.keys(): st.session_state[all_datapoints] = None st.session_state[all_datapoints] = obtain_all_data_points(all_datapoints=st.session_state[all_datapoints])