Source code for webinterface.pages.base_pages.tabs.tab3_view_single_result

"""
Generic Tab 3: In-Depth Plots

This module provides a unified interface for displaying in-depth analysis plots
across all ProteoBench module types (Quant, De Novo, etc.).
"""

import glob
import logging
import os
import subprocess
import uuid
import zipfile
from datetime import datetime
from pathlib import Path
from tempfile import TemporaryDirectory
from typing import Optional

import pandas as pd
import streamlit as st
import streamlit_utils
from plotly import graph_objects as go

from ..utils.general import clean_dataframe_for_export

logger: logging.Logger = logging.getLogger(__name__)


[docs] def generate_indepth_plots( module, variables, parsesettingsbuilder, user_input, public_id: Optional[str], public_hash: Optional[str], metric: str = "Median", mode: str = "Global", colorblind_mode: bool = False, ) -> Optional[go.Figure]: """ Generate and display in-depth plots for the selected dataset. Works generically across all module types by using the plot_generator interface. Parameters ---------- module : object The module instance (Quant, De Novo, etc.). variables : object Variables object containing session state keys. parsesettingsbuilder : ParseSettingsBuilder Parse settings builder for the module. user_input : dict User input parameters. public_id : Optional[str] The dataset identifier ("Uploaded dataset" or public run name). public_hash : Optional[str] The hash of the selected public dataset. metric : str, optional The metric to use for plotting (e.g., "Median", "Mean"). Defaults to "Median". mode : str, optional The mode for metric calculation (e.g., "Global", "Species-specific"). Defaults to "Global". colorblind_mode : bool, optional Whether to use colorblind-friendly colors. Defaults to False. Returns ------- Optional[go.Figure] The first generated plot or None if generation fails. """ plot_generator = module.get_plot_generator() # Validate that we have data to plot if variables.result_perf not in st.session_state: if public_hash is None: st.error(":x: Please submit a result file or select a public run for display", icon="🚨") return None elif public_id == "Uploaded dataset": st.error(":x: Please submit a result file in the Submit New Data Tab", icon="🚨") return None # Load performance data if public_id == "Uploaded dataset": performance_data = st.session_state[variables.result_perf] else: performance_data = load_public_performance_data(public_hash) if performance_data is None: return None # Generate parse settings parse_settings = parsesettingsbuilder.build_parser(user_input["input_format"]) # Generate plots using module's plot generator try: plots = plot_generator.generate_in_depth_plots( performance_data, parse_settings=parse_settings, metric=metric, mode=mode, colorblind_mode=colorblind_mode, ) except Exception as e: st.error(f"Error generating in-depth plots: {e}", icon="🚨") import traceback with st.expander("Error details"): st.code(traceback.format_exc()) return None # Store plots in session state for plot_name, fig in plots.items(): st.session_state[f"{variables.fig_prefix}_{plot_name}"] = fig # Display plots using module's layout configuration display_plots_with_layout(plots, plot_generator, variables, public_id) # Display performance data table display_performance_table(performance_data, variables, user_input, public_id, public_hash) return plots.get(next(iter(plots))) if plots else None
[docs] def load_public_performance_data(public_hash: str) -> Optional[pd.DataFrame]: """ Load performance data for a public dataset from storage. Parameters ---------- public_hash : str The hash identifier of the public dataset. Returns ------- Optional[pd.DataFrame] The loaded performance data or None if loading fails. """ if "storage" not in st.secrets or st.secrets["storage"]["dir"] is None: st.error(":x: Storage directory not configured", icon="🚨") return None dataset_path = os.path.join(st.secrets["storage"]["dir"], public_hash) pattern = os.path.join(dataset_path, "*_data.zip") zip_files = glob.glob(pattern) if not zip_files: st.error(":x: Could not find the files on the server", icon="🚨") return None zip_path = zip_files[0] try: with zipfile.ZipFile(zip_path) as z: with z.open("result_performance.csv") as f: return pd.read_csv(f) except Exception as e: st.error(f":x: Error loading performance data: {e}", icon="🚨") return None
[docs] def display_plots_with_layout(plots: dict, plot_generator, variables, public_id: str) -> None: """ Display plots using the module's layout configuration. Parameters ---------- plots : dict Dictionary of plot names to plotly figures. plot_generator : PlotGeneratorBase The plot generator instance. variables : object Variables object. public_id : str The dataset identifier for display in titles. """ layout_config = plot_generator.get_in_depth_plot_layout() descriptions = plot_generator.get_in_depth_plot_descriptions() for section in layout_config: # Handle section title if provided if "title" in section and section["title"]: st.markdown(f"## {section['title']}") # Create columns based on section configuration cols = st.columns(section["columns"]) # Display plots in columns for i, plot_name in enumerate(section["plots"]): if plot_name not in plots: continue col = cols[i % section["columns"]] with col: # Add plot title if available in titles dict if "titles" in section and plot_name in section["titles"]: st.subheader(section["titles"][plot_name]) elif plot_name in descriptions: # Use first line of description as title title = descriptions[plot_name].split(".")[0] st.subheader(title) # Add description if plot_name in descriptions: desc = descriptions[plot_name] st.markdown(f"{desc}") if public_id: st.caption(f"Data source: {public_id}") # Display plot st.plotly_chart(plots[plot_name], use_container_width=True) # Add separator after each section (except last) if section != layout_config[-1] and len(section["plots"]) > 0: st.markdown("---")
[docs] def display_performance_table( performance_data: pd.DataFrame, variables, user_input: dict, public_id: str, public_hash: Optional[str] = None ) -> None: """ Display the performance data table with download option. Parameters ---------- performance_data : pd.DataFrame The performance data to display. variables : object Variables object. user_input : dict User input parameters. public_id : str The dataset identifier. public_hash : Optional[str] The hash of the public dataset (for stable cache key). """ st.subheader(f"Sample of the processed file for {public_id}") # Display description if available if hasattr(variables, "description_table_md") and os.path.exists(variables.description_table_md): st.markdown(open(variables.description_table_md, "r", encoding="utf-8").read()) # Display table st.dataframe(performance_data.head(100)) # Generate sample name (used for user-facing filenames) if public_id == "Uploaded dataset": sample_name = generate_sample_name(user_input.get("input_format", "unknown")) else: sample_name = public_id # Generate stable cache key (for session state, independent of reruns) if public_id == "Uploaded dataset": cache_key = user_input.get("input_format", "unknown") else: cache_key = public_hash or public_id # Download button random_uuid = uuid.uuid4() # Clean data for CSV export (replace newlines with spaces) cleaned_data = clean_dataframe_for_export(performance_data) st.download_button( label="Download table", data=streamlit_utils.save_dataframe(cleaned_data), file_name=f"{sample_name}.csv", mime="text/csv", key=f"{random_uuid}", icon=":material/download:", ) display_pmultiqc_report(performance_data=performance_data, sample_name=sample_name, cache_key=cache_key)
[docs] def generate_sample_name(input_format: str) -> str: """ Generate a unique sample name based on input format and timestamp. Parameters ---------- input_format : str The input format/software name. Returns ------- str The generated sample name. """ time_stamp = datetime.now().strftime("%Y%m%d_%H%M%S") return f"{input_format}_{time_stamp}"
[docs] def display_in_depth_plots_generic(variables, ionmodule, performance_data: pd.DataFrame, **kwargs) -> None: """ Generic function to display in-depth plots for any module type. This is a simpler alternative to generate_indepth_plots when you already have the performance data loaded. Parameters ---------- variables : object Variables object. ionmodule : object The module instance. performance_data : pd.DataFrame The performance data to visualize. **kwargs : dict Additional module-specific parameters to pass to generate_in_depth_plots. """ if performance_data is None or len(performance_data) == 0: st.warning("No data available for in-depth analysis.", icon="⚠️") return # Get plot generator from module plot_generator = ionmodule.get_plot_generator() # Generate plots try: plots = plot_generator.generate_in_depth_plots(performance_data=performance_data, **kwargs) except Exception as e: st.error(f"Error generating in-depth plots: {e}", icon="🚨") import traceback with st.expander("Error details"): st.code(traceback.format_exc()) return # Display plots with layout display_plots_with_layout(plots, plot_generator, variables, "Current Dataset")
[docs] def display_pmultiqc_report(performance_data: pd.DataFrame, sample_name: str, cache_key: str) -> None: """ Display the pMultiQC report section. Parameters ---------- performance_data : pd.DataFrame The performance data to generate the report from. sample_name : str The name of the sample for the report (used in filenames). cache_key : str Stable identifier for caching (independent of reruns). """ st.subheader("pMultiQC Report") st.markdown( "pMultiQC Reports contain additional QC plots for e.g. missing values, CV distributions, and intensity distributions. Report generation might take up to a minute." ) session_key = "tab31_pmultiqc_html_content_" + cache_key html_content = st.session_state.get(session_key, "") if not html_content: html_content = create_pmultiqc_report_section(performance_data) st.session_state[session_key] = html_content logger.info( "pMultiQC report generated.", ) else: logger.info('using cached pMultiQC report from session_state["{}"]'.format(session_key)) download_disactivate = True if html_content: download_disactivate = False show_download_button(html_content, disabled=download_disactivate, sample_name=sample_name)
[docs] def show_download_button(html_content: str, disabled: bool, sample_name: str) -> None: """ Display a download button for the pMultiQC report. Parameters ---------- html_content : str The HTML content of the report. disabled : bool Whether the download button should be disabled. sample_name : str The name of the sample for the report filename. """ st.markdown("Download the pMultiQC report generated from the intermediate data.") # components.html(html_content, height=800, scrolling=True) st.download_button( label="Download pMultiQC Report", file_name="pMultiQC_report_{}.html".format(sample_name), data=html_content, disabled=disabled, mime="text/html", )
[docs] def create_pmultiqc_report_section(performance_data: pd.DataFrame) -> str: """ Create a section in the Streamlit app to display the pMultiQC report. Parameters ---------- performance_data : pd.DataFrame The performance data to generate the report from. Returns ------- str The HTML content of the generated report. """ html_content = "" if st.button("Generate pMultiQC Report"): df_intermediate_results = performance_data with TemporaryDirectory() as tmp_dir: tmp_dir = Path(tmp_dir) tmp_data = (tmp_dir / "data").resolve() tmp_data.mkdir(parents=True, exist_ok=True) df_intermediate_results.to_csv(tmp_data / "result_performance.csv", index=False) file_out = tmp_dir try: ret_code = subprocess.run( [ "multiqc", "--parse_proteobench", f"{tmp_data}", "-o", f"{file_out}", "-f", "--clean-up", ], check=False, capture_output=True, text=True, timeout=500, # Set a timeout to prevent hanging ) html_path = Path(file_out) / "multiqc_report.html" if html_path.exists() and ret_code.returncode == 0: with open(html_path, "r", encoding="utf-8") as f: html_content = f.read() st.success("pMultiQC report generated successfully.") else: error_msg = ret_code.stderr if ret_code.stderr else "Unknown error" logger.error(f"pMultiQC failed with return code {ret_code.returncode}: {error_msg}") st.error(f"Error generating pMultiQC report: {error_msg}") except subprocess.TimeoutExpired: logger.error("pMultiQC report generation timed out after 500 seconds") st.error("pMultiQC report generation timed out. The analysis may be too complex for the current input.") except Exception as e: logger.error(f"Unexpected error during pMultiQC report generation: {str(e)}") st.error(f"Unexpected error generating pMultiQC report: {str(e)}") return html_content