Source code for webinterface.pages.base_pages.utils.resulttable

import pandas as pd

from proteobench.io.parsing.parse_settings import get_open_source_tools

# this file contains utility functions for rendering the result table in tab1_results and tab4_display_results_submitted

# === Open Source Tools ===
# Loaded from proteobench/io/parsing/io_parse_settings/tool_metadata.toml
OPEN_SOURCE_TOOLS = get_open_source_tools()



[docs]
def add_open_source_column(df: pd.DataFrame) -> pd.DataFrame:
    """
    Add an 'open_source' column indicating whether the software is open source.

    Parameters
    ----------
    df : pd.DataFrame
        DataFrame containing a 'software_name' column.

    Returns
    -------
    pd.DataFrame
        Copy of the DataFrame with an 'open_source' column inserted after 'software_name'.
        Open source tools are marked with '✅', others with an empty string.
    """
    if "software_name" not in df.columns:
        return df
    df = df.copy()
    df["open_source"] = df["software_name"].apply(
        lambda x: "✅" if str(x).lower() in OPEN_SOURCE_TOOLS else ""
    )
    cols = df.columns.tolist()
    cols.remove("open_source")
    idx = cols.index("software_name") + 1
    cols.insert(idx, "open_source")
    return df[cols]



# === Table Color Constants ===
COLOR_IDENTIFIER = "#F0F2F6"
COLOR_PARAMETER = "#FFFFFF"
COLOR_RESULT = "#F0F2F6"
COLOR_TECHNICAL = "#FFFFFF"
COLOR_ADDITIONAL = "#F0F2F6"


def _get_style_js(bg_color: str):
    """
    Generates JavaScript for styling cells with a background color.

    Parameters
    ----------
    bg_color : str
        Hex color string to use as the background.

    Returns
    -------
    JsCode
        A JavaScript code block that defines the style.
    """
    from st_aggrid import JsCode

    return JsCode(
        f"""
    function(params) {{
        return {{
            'backgroundColor': '{bg_color}',
            'color': 'black',
            'fontWeight': 'normal'
        }}
    }}
    """
    )



[docs]
def render_aggrid(df: pd.DataFrame, grid_options, key):
    """
    Renders a DataFrame using AgGrid with specified grid options and a unique key.

    Parameters
    ----------
    df : pd.DataFrame
        The DataFrame to display in the grid.
    grid_options : dict
        Configuration options for AgGrid.
    key : Any
        Unique identifier for the grid instance (AgGrid does not work with UUID keys).

    Returns
    -------
    None
        This function renders the grid in the Streamlit interface and does not return a value.
    """
    # Calculate dynamic height based on number of rows
    # Row height ~50px + header ~40px + padding
    row_height = 50
    header_height = 40
    padding = 10
    num_rows = len(df)
    calculated_height = (num_rows * row_height) + header_height + padding

    # Set min and max bounds for usability
    min_height = 150
    max_height = 800
    dynamic_height = max(min_height, min(calculated_height, max_height))

    from st_aggrid import AgGrid

    AgGrid(
        df,
        gridOptions=grid_options,
        theme="alpine",
        fit_columns_on_grid_load=False,
        height=dynamic_height,
        allow_unsafe_jscode=True,
        key=f"aggrid::{str(key)}",  # AgGrid does not work with UUID keys
    )




[docs]
def configure_aggrid(df: pd.DataFrame):
    """
    Configures the styling and options for AgGrid based on column category.

    Parameters
    ----------
    df : pd.DataFrame
        The display-ready DataFrame.

    Returns
    -------
    dict
        AgGrid gridOptions dictionary.
    """
    from st_aggrid import GridOptionsBuilder

    gb = GridOptionsBuilder.from_dataframe(df)
    identifier_cols = ["selected", "id"]
    parameter_cols = [
        "software_name",
        "software_version",
        "search_engine",
        "search_engine_version",
        "ident_fdr_psm",
        "ident_fdr_protein",
        "ident_fdr_peptide",
        "enable_match_between_runs",
        "precursor_mass_tolerance",
        "fragment_mass_tolerance",
        "enzyme",
        "allowed_miscleavages",
        "min_peptide_length",
        "max_peptide_length",
        "fixed_mods",
        "variable_mods",
        "max_mods",
        "min_precursor_charge",
        "max_precursor_charge",
        "quantification_method",
        "protein_inference",
        "abundance_normalization_ions",
        "submission_comments",
    ]
    result_cols = ["median_abs_epsilon", "mean_abs_epsilon", "nr_prec", "results"]
    technical_cols = [
        "proteobench_version",
        "intermediate_hash",
        "hover_text",
        "color",
        "old_new",
        "is_temporary",
        "comments",
        "scatter_size",
    ]

    for col in df.columns:
        if col in identifier_cols:
            gb.configure_column(col, cellStyle=_get_style_js(COLOR_IDENTIFIER))
        elif col in parameter_cols:
            gb.configure_column(col, cellStyle=_get_style_js(COLOR_PARAMETER))
        elif col in result_cols:
            gb.configure_column(col, cellStyle=_get_style_js(COLOR_RESULT))
        elif col in technical_cols:
            gb.configure_column(col, cellStyle=_get_style_js(COLOR_TECHNICAL))
        else:
            gb.configure_column(col, cellStyle=_get_style_js(COLOR_ADDITIONAL))

    return gb.build()




[docs]
def prepare_display_dataframe(df: pd.DataFrame, highlight_id: str | None) -> pd.DataFrame:
    """
    Prepares the DataFrame for display, including column filtering, ordering,
    row highlighting, and numeric formatting.

    Parameters
    ----------
    df : pd.DataFrame
        The filtered dataset for display.

    highlight_id : str or None
        The ProteoBench ID to highlight (adds a marker in the 'selected' column).

    Returns
    -------
    pd.DataFrame
        A formatted and sorted DataFrame ready for rendering.
    """
    df = df.copy()

    if len(df) == 0:
        return df
    df["selected"] = df["id"].apply(lambda x: "➡️" if x == highlight_id else "")

    try:
        identifier_cols = ["selected", "id"]
        parameter_cols = [
            "software_name",
            "software_version",
            "search_engine",
            "search_engine_version",
            "ident_fdr_psm",
            "ident_fdr_protein",
            "ident_fdr_peptide",
            "enable_match_between_runs",
            "precursor_mass_tolerance",
            "fragment_mass_tolerance",
            "enzyme",
            "allowed_miscleavages",
            "min_peptide_length",
            "max_peptide_length",
            "fixed_mods",
            "variable_mods",
            "max_mods",
            "min_precursor_charge",
            "max_precursor_charge",
            "quantification_method",
            "protein_inference",
            "abundance_normalization_ions",
            "submission_comments",
        ]
        result_cols = ["median_abs_epsilon", "mean_abs_epsilon", "nr_prec", "results"]
        technical_cols = [
            "proteobench_version",
            "intermediate_hash",
            "hover_text",
            "color",
            "old_new",
            "is_temporary",
            "comments",
            "scatter_size",
        ]

        # Define display column order
        cols = identifier_cols + parameter_cols + result_cols + technical_cols
        cols = [col for col in cols if col in df.columns]
        additional_cols = [col for col in df.columns if col not in cols]
        # remove boring columns
        cols = [
            col
            for col in cols
            if col not in ["comments", "scatter_size", "old_new", "is_temporary", "color", "hover_text"]
        ]
        df = df[cols + additional_cols]

        # Clean up values
        df["results"] = df["results"].apply(str)
        numeric_cols = df.select_dtypes(include=["float64", "int64"]).columns
        df[numeric_cols] = df[numeric_cols].round(3)
        df.sort_values(by="id", inplace=True)

    except KeyError as e:
        print(f"KeyError during DataFrame preparation: {e}")

    return df