Loading source
Pulling the file list, source metadata, and syntax-aware rendering for this listing.
Source from repo
Enterprise-grade research with multi-source synthesis, citation tracking, and verification. 8-phase pipeline with auto-continuation.
Files
Skill
Size
Entrypoint
Format
Open file
Syntax-highlighted preview of this file as included in the skill package.
scripts/md_to_html.py
1#!/usr/bin/env python32"""3Markdown to HTML converter for research reports4Properly converts markdown sections to HTML while preserving structure and formatting5"""67import re8from typing import Tuple9from pathlib import Path101112def convert_markdown_to_html(markdown_text: str) -> Tuple[str, str]:13"""14Convert markdown to HTML in two parts: content and bibliography1516Args:17markdown_text: Full markdown report text1819Returns:20Tuple of (content_html, bibliography_html)21"""22# Split content and bibliography23parts = markdown_text.split('## Bibliography')24content_md = parts[0]25bibliography_md = parts[1] if len(parts) > 1 else ""2627# Convert content (everything except bibliography)28content_html = _convert_content_section(content_md)2930# Convert bibliography separately31bibliography_html = _convert_bibliography_section(bibliography_md)3233return content_html, bibliography_html343536def _convert_content_section(markdown: str) -> str:37"""Convert main content sections to HTML"""38html = markdown3940# Remove title and front matter (first ## heading is handled separately)41lines = html.split('\n')42processed_lines = []43skip_until_first_section = True4445for line in lines:46# Skip everything until we hit "## Executive Summary" or first major section47if skip_until_first_section:48if line.startswith('## ') and not line.startswith('### '):49skip_until_first_section = False50processed_lines.append(line)51continue52processed_lines.append(line)5354html = '\n'.join(processed_lines)5556# Convert headers57# ## Section Title → <div class="section"><h2 class="section-title">Section Title</h2></div>58html = re.sub(59r'^## (.+)$',60r'<div class="section"><h2 class="section-title">\1</h2>',61html,62flags=re.MULTILINE63)6465# ### Subsection → <h3 class="subsection-title">Subsection</h3>66html = re.sub(67r'^### (.+)$',68r'<h3 class="subsection-title">\1</h3>',69html,70flags=re.MULTILINE71)7273# #### Subsubsection → <h4 class="subsubsection-title">Title</h4>74html = re.sub(75r'^#### (.+)$',76r'<h4 class="subsubsection-title">\1</h4>',77html,78flags=re.MULTILINE79)8081# Convert **bold** text82html = re.sub(r'\*\*(.+?)\*\*', r'<strong>\1</strong>', html)8384# Convert *italic* text85html = re.sub(r'\*(.+?)\*', r'<em>\1</em>', html)8687# Convert inline code `code`88html = re.sub(r'`(.+?)`', r'<code>\1</code>', html)8990# Convert unordered lists91html = _convert_lists(html)9293# Convert tables94html = _convert_tables(html)9596# Convert paragraphs (wrap non-HTML lines in <p> tags)97html = _convert_paragraphs(html)9899# Close all open sections100html = _close_sections(html)101102# Wrap executive summary if present103html = html.replace(104'<h2 class="section-title">Executive Summary</h2>',105'<div class="executive-summary"><h2 class="section-title">Executive Summary</h2>'106)107if '<div class="executive-summary">' in html:108# Close executive summary at the next section109html = html.replace(110'</h2>\n<div class="section">',111'</h2></div>\n<div class="section">',1121113)114115return html116117118def _convert_bibliography_section(markdown: str) -> str:119"""Convert bibliography section to HTML"""120if not markdown.strip():121return ""122123html = markdown124125# Convert each [N] citation to a proper bibliography entry126# Look for patterns like [1] Title - URL127html = re.sub(128r'\[(\d+)\]\s*(.+?)\s*-\s*(https?://[^\s\)]+)',129r'<div class="bib-entry"><span class="bib-number">[\1]</span> <a href="\3" target="_blank">\2</a></div>',130html131)132133# Convert any remaining **bold** sections134html = re.sub(r'\*\*(.+?)\*\*', r'<strong>\1</strong>', html)135136# Wrap in bibliography content div137html = f'<div class="bibliography-content">{html}</div>'138139return html140141142def _convert_lists(html: str) -> str:143"""Convert markdown lists to HTML lists"""144lines = html.split('\n')145result = []146in_list = False147list_level = 0148149for i, line in enumerate(lines):150stripped = line.strip()151152# Check for unordered list item153if stripped.startswith('- ') or stripped.startswith('* '):154if not in_list:155result.append('<ul>')156in_list = True157list_level = len(line) - len(line.lstrip())158159# Get the content after the marker160content = stripped[2:]161result.append(f'<li>{content}</li>')162163# Check for ordered list item164elif re.match(r'^\d+\.\s', stripped):165if not in_list:166result.append('<ol>')167in_list = True168list_level = len(line) - len(line.lstrip())169170# Get the content after the number and period171content = re.sub(r'^\d+\.\s', '', stripped)172result.append(f'<li>{content}</li>')173174else:175# Not a list item176if in_list:177# Check if we're still in the list (indented continuation)178current_level = len(line) - len(line.lstrip())179if current_level > list_level and stripped:180# Continuation of previous list item181if result[-1].endswith('</li>'):182result[-1] = result[-1][:-5] + ' ' + stripped + '</li>'183continue184else:185# End of list186result.append('</ul>' if '<ul>' in '\n'.join(result[-10:]) else '</ol>')187in_list = False188list_level = 0189190result.append(line)191192# Close any remaining open list193if in_list:194result.append('</ul>' if '<ul>' in '\n'.join(result[-10:]) else '</ol>')195196return '\n'.join(result)197198199def _convert_tables(html: str) -> str:200"""Convert markdown tables to HTML tables"""201lines = html.split('\n')202result = []203in_table = False204205for i, line in enumerate(lines):206if '|' in line and line.strip().startswith('|'):207if not in_table:208result.append('<table>')209in_table = True210# This is the header row211cells = [cell.strip() for cell in line.split('|')[1:-1]]212result.append('<thead><tr>')213for cell in cells:214result.append(f'<th>{cell}</th>')215result.append('</tr></thead>')216result.append('<tbody>')217elif '---' in line:218# Skip separator row219continue220else:221# Data row222cells = [cell.strip() for cell in line.split('|')[1:-1]]223result.append('<tr>')224for cell in cells:225result.append(f'<td>{cell}</td>')226result.append('</tr>')227else:228if in_table:229result.append('</tbody></table>')230in_table = False231result.append(line)232233if in_table:234result.append('</tbody></table>')235236return '\n'.join(result)237238239def _convert_paragraphs(html: str) -> str:240"""Wrap non-HTML lines in paragraph tags"""241lines = html.split('\n')242result = []243in_paragraph = False244245for line in lines:246stripped = line.strip()247248# Skip empty lines249if not stripped:250if in_paragraph:251result.append('</p>')252in_paragraph = False253result.append(line)254continue255256# Skip lines that are already HTML tags257if (stripped.startswith('<') and stripped.endswith('>')) or \258stripped.startswith('</') or \259'<h' in stripped or '<div' in stripped or '<ul' in stripped or \260'<ol' in stripped or '<li' in stripped or '<table' in stripped or \261'</div>' in stripped or '</ul>' in stripped or '</ol>' in stripped:262if in_paragraph:263result.append('</p>')264in_paragraph = False265result.append(line)266continue267268# Regular text line - wrap in paragraph269if not in_paragraph:270result.append('<p>' + line)271in_paragraph = True272else:273result.append(line)274275if in_paragraph:276result.append('</p>')277278return '\n'.join(result)279280281def _close_sections(html: str) -> str:282"""Close all open section divs"""283# Count open and closed divs284open_divs = html.count('<div class="section">')285closed_divs = html.count('</div>')286287# Add closing divs for sections288# Each section should be closed before the next section starts289lines = html.split('\n')290result = []291section_open = False292293for i, line in enumerate(lines):294if '<div class="section">' in line:295if section_open:296result.append('</div>') # Close previous section297section_open = True298result.append(line)299300# Close final section if still open301if section_open:302result.append('</div>')303304return '\n'.join(result)305306307def main():308"""Test the converter with a sample markdown file"""309import sys310311if len(sys.argv) < 2:312print("Usage: python md_to_html.py <markdown_file>")313sys.exit(1)314315md_file = Path(sys.argv[1])316if not md_file.exists():317print(f"Error: File {md_file} not found")318sys.exit(1)319320markdown_text = md_file.read_text()321content_html, bib_html = convert_markdown_to_html(markdown_text)322323print("=== CONTENT HTML ===")324print(content_html[:1000])325print("\n=== BIBLIOGRAPHY HTML ===")326print(bib_html[:500])327328329if __name__ == "__main__":330main()331