import json
import os
import tempfile
import streamlit as st
from streamlit_utils import display_error, get_error_suggestions
from proteobench.exceptions import ProteoBenchError
[docs]
def show_software_selector_and_alphadia_info(variables, parsesettingsbuilder, user_input) -> None:
"""
Display software selector outside the form and show AlphaDIA-specific information.
This allows immediate feedback when AlphaDIA is selected.
"""
st.subheader("Input files")
st.markdown(open(variables.description_input_file_md, "r", encoding="utf-8").read())
# Software tool selector - outside form to enable reactive updates
selected_format = st.selectbox(
"Software tool",
parsesettingsbuilder.INPUT_FORMATS,
help=variables.texts.Help.input_format,
key="software_tool_selector",
)
# Store selection in user_input for use in form
user_input["input_format"] = selected_format
# Display AlphaDIA-specific information text only (file uploader will be shown after main uploader)
if selected_format == "AlphaDIA":
st.info(
"ℹ️**If submitting AlphaDIA output from versions >= 2.0, please submit the precursors.parquet or precursors.tsv file only and ignore the secondary file uploader.**\n"
"**If not, you have the following options:**\n\n"
"**Two-file upload (recommended):** Upload both **precursor.matrix.tsv** and **precursors.tsv** files below for automatic merging. "
"You can upload them in any order - the system will automatically detect which is which.\n\n"
"**Single-file upload (legacy):** Alternatively, upload a single pre-merged file in the main uploader above."
)
# TODO: change additional_params_json for other modules, to capture relevant parameters
[docs]
def generate_additional_parameters_fields(
variables,
user_input,
) -> None:
"""
Create the additional parameters section of the form and initializes the parameter fields.
"""
with open(variables.additional_params_json, encoding="utf-8") as file:
config = json.load(file)
for key, value in config.items():
if key.lower() == "software_name":
editable = False
else:
editable = True
user_input[key] = None
########################################################################################
# function used in process_submission_form
[docs]
def execute_proteobench(variables, ionmodule, user_input) -> bool:
"""
Execute the ProteoBench benchmarking process.
Returns
-------
bool
True if benchmarking succeeded, False if an error occurred.
"""
try:
if variables.all_datapoints_submitted not in st.session_state:
initialize_main_data_points(
variables=variables,
ionmodule=ionmodule,
)
result_performance, all_datapoints, input_df = run_benchmarking_process(
variables=variables,
ionmodule=ionmodule,
user_input=user_input,
)
st.session_state[variables.all_datapoints_submitted] = all_datapoints
# Store keyword for this submission (session-only)
store_submission_keyword(variables, all_datapoints, user_input)
set_highlight_column_in_submitted_data(
variables=variables,
)
st.session_state[variables.result_perf] = result_performance
st.session_state[variables.input_df] = input_df
return True
except (ProteoBenchError, Exception) as e:
friendly_msg, suggestions = get_error_suggestions(e, user_input)
display_error(friendly_msg, exception=e, suggestions=suggestions)
return False
# function with same name exists in tab1_results.py, but is different
[docs]
def initialize_main_data_points(variables, ionmodule) -> None:
"""
Initialize the all_datapoints variable in the session state.
"""
if variables.all_datapoints not in st.session_state.keys():
st.session_state[variables.all_datapoints] = None
st.session_state[variables.all_datapoints] = ionmodule.obtain_all_data_points(
all_datapoints=st.session_state[variables.all_datapoints]
)
[docs]
def run_benchmarking_process(variables, ionmodule, user_input):
"""
Execute the benchmarking process and returns the results.
Returns
-------
Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]
The benchmarking results, all data points, and the input data frame.
"""
# Get file extension from uploaded file to preserve it in temp file
_, file_extension = os.path.splitext(user_input["input_csv"].name)
with tempfile.NamedTemporaryFile(delete=False, suffix=file_extension) as tmp_file:
tmp_file.write(user_input["input_csv"].getbuffer())
tmp_file.flush()
# For AlphaDIA, also create temporary file for secondary input
tmp_file_secondary_name = None
if user_input.get("input_csv_secondary"):
_, file_extension_secondary = os.path.splitext(user_input["input_csv_secondary"].name)
with tempfile.NamedTemporaryFile(delete=False, suffix=file_extension_secondary) as tmp_file_secondary:
tmp_file_secondary.write(user_input["input_csv_secondary"].getbuffer())
tmp_file_secondary.flush()
tmp_file_secondary_name = tmp_file_secondary.name
user_input["input_csv_secondary"].seek(0)
# reload buffer: https://stackoverflow.com/a/64478151/9684872
user_input["input_csv"].seek(0)
user_input_tmp = tmp_file.name
# Get slider value if module uses sliders (e.g., quant module)
if hasattr(variables, "slider_id_submitted_uuid") and hasattr(variables, "default_val_slider"):
if st.session_state[variables.slider_id_submitted_uuid] in st.session_state.keys():
set_slider_val = st.session_state[st.session_state[variables.slider_id_submitted_uuid]]
else:
set_slider_val = variables.default_val_slider
else:
set_slider_val = None
if variables.all_datapoints_submitted in st.session_state.keys():
all_datapoints = st.session_state[variables.all_datapoints_submitted]
else:
all_datapoints = st.session_state[variables.all_datapoints]
if hasattr(variables, "max_nr_observed"):
max_nr_observed = variables.max_nr_observed
else:
max_nr_observed = None
benchmark_kwargs = {
"input_format": user_input["input_format"],
"user_input": user_input,
"all_datapoints": all_datapoints,
}
# Only add slider/cutoff parameter if module uses it
if set_slider_val is not None:
benchmark_kwargs["default_cutoff_min_prec"] = set_slider_val
# Only add secondary file if provided
if tmp_file_secondary_name:
benchmark_kwargs["input_file_secondary"] = tmp_file_secondary_name
if max_nr_observed is not None:
benchmark_kwargs["max_nr_observed"] = max_nr_observed
return ionmodule.benchmarking(user_input_tmp, **benchmark_kwargs)
[docs]
def store_submission_keyword(variables, all_datapoints, user_input) -> None:
"""
Store the keyword for the newly submitted datapoint.
Keywords are stored in session state only and not persisted to public submissions.
"""
# Initialize keyword storage if it doesn't exist
keyword_storage_key = f"{variables.all_datapoints_submitted}_keywords"
if keyword_storage_key not in st.session_state:
st.session_state[keyword_storage_key] = {}
# Get the most recent (new) datapoint
new_datapoints = all_datapoints[all_datapoints["old_new"] == "new"]
if not new_datapoints.empty:
latest_datapoint = new_datapoints.iloc[-1]
intermediate_hash = latest_datapoint["intermediate_hash"]
keyword = user_input.get("submission_keyword", "").strip()
if keyword: # Only store if keyword is not empty
st.session_state[keyword_storage_key][intermediate_hash] = keyword
[docs]
def add_keywords_column_to_submitted_data(variables) -> None:
"""
Add a 'Keyword' column to the submitted datapoints showing user-defined keywords.
"""
df = st.session_state[variables.all_datapoints_submitted]
keyword_storage_key = f"{variables.all_datapoints_submitted}_keywords"
if keyword_storage_key in st.session_state:
keywords_dict = st.session_state[keyword_storage_key]
# Map intermediate_hash to keyword, default to empty string
df["Keyword"] = df["intermediate_hash"].map(keywords_dict).fillna("")
else:
df["Keyword"] = ""
# Move Keyword column to position 1 (after Highlight)
if "Keyword" in df.columns:
cols = df.columns.tolist()
cols.remove("Keyword")
# Insert after Highlight if it exists, otherwise at the beginning
insert_pos = 1 if "Highlight" in cols else 0
cols.insert(insert_pos, "Keyword")
df = df[cols]
st.session_state[variables.all_datapoints_submitted] = df
[docs]
def set_highlight_column_in_submitted_data(variables) -> None:
"""
Initialize the highlight column in the data points.
"""
df = st.session_state[variables.all_datapoints_submitted]
if variables.highlight_list_submitted not in st.session_state.keys() and "Highlight" not in df.columns:
df.insert(0, "Highlight", [False] * len(df.index))
elif "Highlight" not in df.columns:
df.insert(0, "Highlight", st.session_state[variables.highlight_list_submitted])
elif "Highlight" in df.columns:
# Not sure how 'Highlight' column became object dtype
df["Highlight"] = df["Highlight"].astype(bool).fillna(False)
# only needed for last elif, but to be sure apply always:
st.session_state[variables.all_datapoints_submitted] = df
# Also add keywords column
add_keywords_column_to_submitted_data(variables)