"""
Generic Tab 3: In-Depth Plots
This module provides a unified interface for displaying in-depth analysis plots
across all ProteoBench module types (Quant, De Novo, etc.).
"""
import glob
import logging
import os
import subprocess
import uuid
import zipfile
from datetime import datetime
from pathlib import Path
from tempfile import TemporaryDirectory
from typing import Optional
import pandas as pd
import streamlit as st
import streamlit_utils
from plotly import graph_objects as go
from ..utils.general import clean_dataframe_for_export
logger: logging.Logger = logging.getLogger(__name__)
[docs]
def generate_indepth_plots(
module,
variables,
parsesettingsbuilder,
user_input,
public_id: Optional[str],
public_hash: Optional[str],
metric: str = "Median",
mode: str = "Species-weighted",
colorblind_mode: bool = False,
) -> Optional[go.Figure]:
"""
Generate and display in-depth plots for the selected dataset.
Works generically across all module types by using the plot_generator interface.
Parameters
----------
module : object
The module instance (Quant, De Novo, etc.).
variables : object
Variables object containing session state keys.
parsesettingsbuilder : ParseSettingsBuilder
Parse settings builder for the module.
user_input : dict
User input parameters.
public_id : Optional[str]
The dataset identifier ("Uploaded dataset" or public run name).
public_hash : Optional[str]
The hash of the selected public dataset.
metric : str, optional
The metric to use for plotting (e.g., "Median", "Mean"). Defaults to "Median".
mode : str, optional
The mode for metric calculation (e.g., "Global", "Species-specific"). Defaults to "Species-specific".
colorblind_mode : bool, optional
Whether to use colorblind-friendly colors. Defaults to False.
Returns
-------
Optional[go.Figure]
The first generated plot or None if generation fails.
"""
plot_generator = module.get_plot_generator()
# Validate that we have data to plot
if variables.result_perf not in st.session_state:
if public_hash is None:
st.error(":x: Please submit a result file or select a public run for display", icon="🚨")
return None
elif public_id == "Uploaded dataset":
st.error(":x: Please submit a result file in the Submit New Data Tab", icon="🚨")
return None
# Load performance data
if public_id == "Uploaded dataset":
performance_data = st.session_state[variables.result_perf]
else:
performance_data = load_public_performance_data(public_hash)
if performance_data is None:
return None
# Generate parse settings
parse_settings = parsesettingsbuilder.build_parser(user_input["input_format"])
# Generate plots using module's plot generator
try:
plots = plot_generator.generate_in_depth_plots(
performance_data,
parse_settings=parse_settings,
metric=metric,
mode=mode,
colorblind_mode=colorblind_mode,
)
except Exception as e:
st.error(f"Error generating in-depth plots: {e}", icon="🚨")
import traceback
with st.expander("Error details"):
st.code(traceback.format_exc())
return None
# Store plots in session state
for plot_name, fig in plots.items():
st.session_state[f"{variables.fig_prefix}_{plot_name}"] = fig
# Display plots using module's layout configuration
display_plots_with_layout(plots, plot_generator, variables, public_id)
# Display performance data table
display_performance_table(performance_data, variables, user_input, public_id, public_hash)
return plots.get(next(iter(plots))) if plots else None
[docs]
def display_plots_with_layout(plots: dict, plot_generator, variables, public_id: str) -> None:
"""
Display plots using the module's layout configuration.
Parameters
----------
plots : dict
Dictionary of plot names to plotly figures.
plot_generator : PlotGeneratorBase
The plot generator instance.
variables : object
Variables object.
public_id : str
The dataset identifier for display in titles.
"""
layout_config = plot_generator.get_in_depth_plot_layout()
descriptions = plot_generator.get_in_depth_plot_descriptions()
for section in layout_config:
# Handle section title if provided
if "title" in section and section["title"]:
st.markdown(f"## {section['title']}")
# Create columns based on section configuration
cols = st.columns(section["columns"])
# Display plots in columns
for i, plot_name in enumerate(section["plots"]):
if plot_name not in plots:
continue
col = cols[i % section["columns"]]
with col:
# Add plot title if available in titles dict
if "titles" in section and plot_name in section["titles"]:
st.subheader(section["titles"][plot_name])
elif plot_name in descriptions:
# Use first line of description as title
title = descriptions[plot_name].split(".")[0]
st.subheader(title)
# Add description
if plot_name in descriptions:
desc = descriptions[plot_name]
st.markdown(f"{desc}")
if public_id:
st.caption(f"Data source: {public_id}")
# Display plot
st.plotly_chart(plots[plot_name], use_container_width=True)
# Add separator after each section (except last)
if section != layout_config[-1] and len(section["plots"]) > 0:
st.markdown("---")
[docs]
def generate_sample_name(input_format: str) -> str:
"""
Generate a unique sample name based on input format and timestamp.
Parameters
----------
input_format : str
The input format/software name.
Returns
-------
str
The generated sample name.
"""
time_stamp = datetime.now().strftime("%Y%m%d_%H%M%S")
return f"{input_format}_{time_stamp}"
[docs]
def display_in_depth_plots_generic(variables, ionmodule, performance_data: pd.DataFrame, **kwargs) -> None:
"""
Generic function to display in-depth plots for any module type.
This is a simpler alternative to generate_indepth_plots when you already
have the performance data loaded.
Parameters
----------
variables : object
Variables object.
ionmodule : object
The module instance.
performance_data : pd.DataFrame
The performance data to visualize.
**kwargs : dict
Additional module-specific parameters to pass to generate_in_depth_plots.
"""
if performance_data is None or len(performance_data) == 0:
st.warning("No data available for in-depth analysis.", icon="⚠️")
return
# Get plot generator from module
plot_generator = ionmodule.get_plot_generator()
# Generate plots
try:
plots = plot_generator.generate_in_depth_plots(performance_data=performance_data, **kwargs)
except Exception as e:
st.error(f"Error generating in-depth plots: {e}", icon="🚨")
import traceback
with st.expander("Error details"):
st.code(traceback.format_exc())
return
# Display plots with layout
display_plots_with_layout(plots, plot_generator, variables, "Current Dataset")
[docs]
def display_pmultiqc_report(performance_data: pd.DataFrame, sample_name: str, cache_key: str) -> None:
"""
Display the pMultiQC report section.
Parameters
----------
performance_data : pd.DataFrame
The performance data to generate the report from.
sample_name : str
The name of the sample for the report (used in filenames).
cache_key : str
Stable identifier for caching (independent of reruns).
"""
st.subheader("pMultiQC Report")
st.markdown(
"pMultiQC Reports contain additional QC plots for e.g. missing values, CV distributions, and intensity distributions. Report generation might take up to a minute."
)
session_key = "tab31_pmultiqc_html_content_" + cache_key
html_content = st.session_state.get(session_key, "")
if not html_content:
html_content = create_pmultiqc_report_section(performance_data)
st.session_state[session_key] = html_content
logger.info(
"pMultiQC report generated.",
)
else:
logger.info('using cached pMultiQC report from session_state["{}"]'.format(session_key))
download_disactivate = True
if html_content:
download_disactivate = False
show_download_button(html_content, disabled=download_disactivate, sample_name=sample_name)
[docs]
def create_pmultiqc_report_section(performance_data: pd.DataFrame) -> str:
"""
Create a section in the Streamlit app to display the pMultiQC report.
Parameters
----------
performance_data : pd.DataFrame
The performance data to generate the report from.
Returns
-------
str
The HTML content of the generated report.
"""
html_content = ""
if st.button("Generate pMultiQC Report"):
df_intermediate_results = performance_data
with TemporaryDirectory() as tmp_dir:
tmp_dir = Path(tmp_dir)
tmp_data = (tmp_dir / "data").resolve()
tmp_data.mkdir(parents=True, exist_ok=True)
df_intermediate_results.to_csv(tmp_data / "result_performance.csv", index=False)
file_out = tmp_dir
try:
ret_code = subprocess.run(
[
"multiqc",
"--parse_proteobench",
f"{tmp_data}",
"-o",
f"{file_out}",
"-f",
"--clean-up",
],
check=False,
capture_output=True,
text=True,
timeout=500, # Set a timeout to prevent hanging
)
html_path = Path(file_out) / "multiqc_report.html"
if html_path.exists() and ret_code.returncode == 0:
with open(html_path, "r", encoding="utf-8") as f:
html_content = f.read()
st.success("pMultiQC report generated successfully.")
else:
error_msg = ret_code.stderr if ret_code.stderr else "Unknown error"
logger.error(f"pMultiQC failed with return code {ret_code.returncode}: {error_msg}")
st.error(f"Error generating pMultiQC report: {error_msg}")
except subprocess.TimeoutExpired:
logger.error("pMultiQC report generation timed out after 500 seconds")
st.error("pMultiQC report generation timed out. The analysis may be too complex for the current input.")
except Exception as e:
logger.error(f"Unexpected error during pMultiQC report generation: {str(e)}")
st.error(f"Unexpected error generating pMultiQC report: {str(e)}")
return html_content