Source code for webinterface.UI_utils

import base64
import io
import json
import logging
import re
import tarfile
from collections import Counter
from concurrent.futures import ThreadPoolExecutor, as_completed
from pathlib import Path
from typing import Dict, Optional

import requests
import streamlit as st
from pages.utils.module_registry import get_all_modules

logger = logging.getLogger(__name__)


[docs] def get_base64_image(path): with open(path, "rb") as image_file: return base64.b64encode(image_file.read()).decode("utf-8")
[docs] def stat_box(title, value, icon_path, url=None): img_data = get_base64_image(icon_path) content = f""" <div style=" background: #fff; border-radius: 12px; box-shadow: 0 4px 16px rgba(0,0,0,0.06); padding: 12px; text-align: center; min-height: 120px; display: flex; flex-direction: column; justify-content: center; align-items: center; transition: transform 0.2s ease; "> <div style="margin-bottom: 8px;"> <img src="data:image/png;base64,{img_data}" alt="icon" style="width: 36px; height: 36px;" /> </div> <div style="color: #37475E; font-weight: 600; font-size: 0.95rem; text-align: center;">{title}</div> <div style="font-size: 1.5rem; font-weight: 700; margin-top: 6px; color: #37475E;">{value}</div> </div> """ if url: return f"""<a href="{url}" target="_blank" style="text-decoration: none;">{content}</a>""" return content
[docs] def get_n_modules(): """ Get the number of modules in ProteoBench. Returns ------- int The number of modules. """ # The number of modules is defined by the number of .py files in the pages directory that are not __init__.py pages_dir = Path(__file__).parent / "pages" n_modules = len( [f for f in pages_dir.glob("*.py") if not f.name == "__init__.py" and not f.name.startswith("base")] ) return n_modules
[docs] def get_n_submitted_points(url: str = "https://proteobench.cubimed.rub.de/datasets/"): """ Get the number of submitted points in ProteoBench. Returns ------- int The number of submitted points (excluding 'fasta/' and 'raw_files/'). """ exclude_dirs = {"fasta/", "raw_files/", "../"} try: response = requests.get(url) response.raise_for_status() html = response.text # Find all hrefs that end in / dirs = re.findall(r'href="([^"]+/)"', html) # Remove unwanted dirs submitted_dirs = set(dirs) - exclude_dirs return len(submitted_dirs) except requests.RequestException: return "Error communicating with the server"
[docs] def get_n_supported_tools(): """ Get the number of supported tools in ProteoBench. Returns ------- int The number of supported tools. """ # The number of supported tools is defined by the number of .py files in the io/params/ directory that are not __init__.py params_dir = Path(__file__).parent.parent / "proteobench" / "io" / "params" n_tools = len([f for f in params_dir.glob("*.py") if f.name != "__init__.py"]) return n_tools
# TODO: perhaps proposed modules should be parsed using the GitHub discussion but there doesnt seem to be an API endpoint for this
[docs] def parse_proteobench_index(rst_text: str) -> Dict[str, int]: """ Parses the ProteoBench index.rst and counts modules by status. This version assumes that each module starts with '.. grid-item-card::' and that the badge line contains ':bdg-' followed by the status. Parameters ---------- rst_text : str The text content of the index.rst file. Returns ------- Dict[str, int] Dictionary mapping statuses to counts. """ status_counter = Counter() # Split into sections by grid-item-card sections = rst_text.split(".. grid-item-card::") # First part is before first card for section in sections[1:]: # Skip first part before first card lines = section.strip().splitlines() status_found = False for line in lines: line = line.strip() if line.startswith(":bdg-") and ":`" in line: # Example: :bdg-success:`active` try: status = line.split(":`")[1].rstrip("`").strip().lower() status_counter[status] += 1 status_found = True break except IndexError: continue if not status_found: # If no badge found, you can log or skip pass return dict(status_counter)
[docs] def get_n_modules_proposed(rst_text: str) -> int: """ Computes the number of proposed modules as the sum of modules 'in discussion' and 'in development'. Parameters ---------- status_counts : Dict[str, int] A dictionary of status counts as returned by parse_proteobench_index(). Returns ------- int The total number of proposed modules. """ status_counts = parse_proteobench_index(rst_text) return status_counts.get("in discussion", 0) + status_counts.get("in development", 0)
[docs] def get_monthly_visits(api_endpoint: str, token: str, id_site: int) -> Optional[int]: """ Gets the monthly visits count from the Matomo API. Parameters ---------- api_endpoint : str The API endpoint URL of the Matomo installation token : str The authentication token (from Matomo) id_site : int The site ID (from Matomo) Returns ------- Optional[int] The number of monthly visits (nb_visits of last 30 days), or ``None`` if retrieval/parsing failed. """ # data to be sent to api data = { "module": "API", "method": "Actions.getPageTitles", "idSite": id_site, "period": "day", "date": "last30", "format": "json", "token_auth": token, } try: r = requests.post(url=api_endpoint, data=data, timeout=10) r.raise_for_status() json_visits = json.loads(r.text) visits_count = 0 for _, visits in json_visits.items(): if len(visits) > 0: for page in visits: visits_count += page.get("nb_visits", 0) return visits_count except (requests.RequestException, json.JSONDecodeError, KeyError): logger.warning("Failed to retrieve or parse monthly visits from Matomo API", exc_info=True) return None
[docs] @st.cache_data(ttl=3600, show_spinner=False) def get_module_submission_data() -> Dict[str, Dict[str, int]]: """ Fetch submission data per module by downloading repo archives. Returns per-tool submission counts for each module. Requests are made concurrently to minimize latency. Returns ------- Dict[str, Dict[str, int]] Mapping of results_repo name to {software_name: count} dict. """ modules_by_category = get_all_modules() headers = {} try: token = st.secrets["gh"]["token"] headers["Authorization"] = f"token {token}" except Exception: logger.warning( "Could not obtain GitHub token, proceeding with unauthenticated requests which may be rate-limited." ) pass repo_names = [ module.results_repo for modules in modules_by_category.values() for module in modules if module.results_repo ] def _fetch_tool_breakdown(repo_name: str) -> tuple: try: url = f"https://api.github.com/repos/Proteobench/{repo_name}/tarball/main" resp = requests.get(url, headers=headers, timeout=30) resp.raise_for_status() except requests.RequestException: logger.warning("Failed to download archive for %s", repo_name, exc_info=True) return repo_name, {} tools = Counter() try: with tarfile.open(fileobj=io.BytesIO(resp.content), mode="r:gz") as tar: for member in tar.getmembers(): if member.name.endswith(".json") and member.isfile(): try: f = tar.extractfile(member) data = json.loads(f.read()) tools[data.get("software_name", "Unknown")] += 1 except (json.JSONDecodeError, KeyError, OSError): logger.warning("Skipping malformed file %s in %s", member.name, repo_name, exc_info=True) except tarfile.TarError: logger.warning("Failed to read archive for %s", repo_name, exc_info=True) return repo_name, {} return repo_name, dict(tools) if not repo_names: return {} result: Dict[str, Dict[str, int]] = {} with ThreadPoolExecutor(max_workers=min(len(repo_names), 10)) as executor: futures = {executor.submit(_fetch_tool_breakdown, name): name for name in repo_names} for future in as_completed(futures): repo_name, tool_counts = future.result() result[repo_name] = tool_counts return result
[docs] def build_submissions_figure(): """ Build a Plotly figure with faceted vertical bar charts showing submissions per module, one subplot per category (DDA, DIA, etc.). Excludes archived modules. Each bar stores its results_repo name in customdata for click-based pie chart interaction. Returns ------- tuple(plotly.graph_objects.Figure, Dict[str, Dict[str, int]]) or (None, None) The bar figure and a mapping of module title to per-tool counts. """ from pages.utils.module_registry import get_all_modules from plotly import graph_objects as go from plotly.subplots import make_subplots modules_by_category = get_all_modules() submission_data = get_module_submission_data() # Build per-category data, excluding archived modules category_data: Dict[str, list] = {} tool_data_by_title: Dict[str, Dict[str, int]] = {} for category, modules in modules_by_category.items(): rows = [] for module in modules: if module.release_stage == "archived": continue if module.results_repo and module.results_repo in submission_data: tool_counts = submission_data[module.results_repo] total = sum(tool_counts.values()) rows.append({"label": module.title, "count": total}) tool_data_by_title[module.title] = tool_counts if rows: rows.sort(key=lambda r: r["count"], reverse=True) category_data[category] = rows if not category_data: return None, None categories = sorted(category_data.keys()) n_cats = len(categories) palette = ["#F4A582", "#92C5DE", "#B2DF8A", "#CAB2D6", "#FDBF6F", "#FB9A99"] color_map = {cat: palette[i % len(palette)] for i, cat in enumerate(categories)} fig = make_subplots( rows=1, cols=n_cats, subplot_titles=[f"{cat} Modules" for cat in categories], shared_yaxes=False, horizontal_spacing=0.08, ) for col_idx, cat in enumerate(categories, start=1): rows = category_data[cat] labels = [r["label"] for r in rows] values = [r["count"] for r in rows] fig.add_trace( go.Bar( x=labels, y=values, marker_color=color_map[cat], name=cat, hovertemplate="<b>%{x}</b><br>Submissions: %{y}<br><i>Click for tool breakdown</i><extra></extra>", showlegend=False, ), row=1, col=col_idx, ) fig.update_xaxes(tickangle=-45, row=1, col=col_idx) fig.update_yaxes(title_text="# public workflow results" if col_idx == 1 else "", row=1, col=col_idx) max_modules = max(len(v) for v in category_data.values()) fig.update_layout( height=max(350, 200 + max_modules * 25), margin=dict(l=40, r=20, t=40, b=120), ) return fig, tool_data_by_title
[docs] def build_tool_pie_chart(module_title: str, tool_counts: Dict[str, int]): """ Build a Plotly pie chart showing tool breakdown for a given module. Parameters ---------- module_title : str The module title for the chart heading. tool_counts : Dict[str, int] Mapping of software_name to submission count. Returns ------- plotly.graph_objects.Figure """ from plotly import graph_objects as go labels = list(tool_counts.keys()) values = list(tool_counts.values()) fig = go.Figure( data=[ go.Pie( labels=labels, values=values, hovertemplate="<b>%{label}</b><br>Submissions: %{value}<br>(%{percent})<extra></extra>", textinfo="label+value", textposition="auto", ) ] ) fig.update_layout( title=dict(text=f"Tool breakdown: {module_title}", font=dict(size=14)), height=350, margin=dict(l=20, r=20, t=50, b=20), showlegend=True, legend=dict(orientation="h", yanchor="bottom", y=-0.2, xanchor="center", x=0.5), ) return fig
if __name__ == "__main__": # This block is only for testing purposes print(f"Number of modules: {get_n_modules()}") print(f"Number of submitted points: {get_n_submitted_points()}") print(f"Number of supported tools: {get_n_supported_tools()}") file_path = Path(__file__).parent.parent / "docs" / "index.rst" status_counts = parse_proteobench_index(file_path.read_text(encoding="utf-8")) print(f"Number of proposed modules: {get_n_modules_proposed(file_path.read_text(encoding='utf-8'))}")