Why Markdown → HTML?
Markdown is the most popular markup language for technical writing, but browsers only understand HTML. The Markdown → HTML conversion is needed for:
- Static site generators: Hugo, Jekyll, MkDocs
- Blogs and CMS: displaying Markdown content in the browser
- Technical documentation: ReadTheDocs, Docusaurus
- HTML emails: newsletters written in Markdown
- WYSIWYG editors: real-time preview
Library 1: python-markdown (Most Extensible)
pip install markdown pymdown-extensions pygments
import markdown
from pathlib import Path
def md_to_html_basic(md_text, extensions=None):
"""Convert Markdown to HTML with extensions."""
exts = extensions or [
'tables', # GFM tables
'fenced_code', # code blocks with ```
'codehilite', # syntax highlighting
'toc', # table of contents
'footnotes', # footnotes
'attr_list', # attributes on elements
'def_list', # definition lists
'abbr', # abbreviations
'meta', # YAML metadata at top
'admonition', # NOTE/WARNING/TIP blocks
'nl2br', # line breaks → <br>
]
ext_config = {
'codehilite': {
'css_class': 'highlight',
'linenums': False,
'guess_lang': True,
},
'toc': {
'permalink': True,
'toc_depth': '2-4',
},
}
md = markdown.Markdown(
extensions=exts,
extension_configs=ext_config,
output_format='html5'
)
return md.convert(md_text)
def md_file_to_html(md_path, html_output=None, title=None, css=None):
"""
Convert a .md file to a complete .html with CSS included.
"""
md_path = Path(md_path)
md_text = md_path.read_text(encoding='utf-8')
if html_output is None:
html_output = md_path.with_suffix('.html')
# GitHub-like default CSS
default_css = '''
* { box-sizing: border-box; }
body {
font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Helvetica,
Arial, sans-serif;
font-size: 16px; line-height: 1.6; color: #24292e;
max-width: 800px; margin: 0 auto; padding: 2rem;
}
h1,h2,h3 { font-weight: 600; line-height: 1.25; margin-top: 1.5rem; }
h1 { font-size: 2em; padding-bottom: 0.3em; border-bottom: 1px solid #eaecef; }
h2 { font-size: 1.5em; padding-bottom: 0.3em; border-bottom: 1px solid #eaecef; }
a { color: #0366d6; text-decoration: none; }
a:hover { text-decoration: underline; }
pre { background: #f6f8fa; border-radius: 6px; padding: 1rem; overflow: auto; }
code { background: #f6f8fa; border-radius: 3px; padding: 0.2em 0.4em;
font-family: "SFMono-Regular", Consolas, monospace; font-size: 85%; }
pre code { background: none; padding: 0; }
table { border-collapse: collapse; width: 100%; }
th,td { border: 1px solid #dfe2e5; padding: 6px 13px; }
th { background: #f6f8fa; font-weight: 600; }
tr:nth-child(even) { background: #f6f8fa; }
blockquote { margin: 0; padding: 0 1em; color: #6a737d;
border-left: 4px solid #dfe2e5; }
img { max-width: 100%; }
'''
html_body = md_to_html_basic(md_text)
page_title = title or md_path.stem.replace('-', ' ').replace('_', ' ').title()
html = f'''<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>{page_title}</title>
<style>{css or default_css}</style>
</head>
<body>
{html_body}
</body>
</html>'''
Path(html_output).write_text(html, encoding='utf-8')
size_kb = Path(html_output).stat().st_size / 1024
print(f"HTML generated: {html_output} ({size_kb:.1f} KB)")
return html
md_file_to_html('README.md', title='Project Documentation')
PyMdown Extensions (GFM and More)
def md_with_pymdown(md_text):
"""
Use PyMdown Extensions for advanced features.
Compatible with GitHub Flavored Markdown (GFM) and more.
pip install pymdown-extensions
"""
exts = [
'pymdownx.superfences', # nested code blocks, Mermaid diagrams
'pymdownx.highlight', # advanced highlighting with line numbers
'pymdownx.inlinehilite', # inline code with highlighting
'pymdownx.tabbed', # content tabs
'pymdownx.details', # collapsible <details> blocks
'pymdownx.tasklist', # task lists [ ] / [x]
'pymdownx.emoji', # emojis :smile: :rocket:
'pymdownx.mark', # ==highlighted text==
'pymdownx.caret', # ^superscript^
'pymdownx.tilde', # ~subscript~ and ~~strikethrough~~
'pymdownx.smartsymbols', # (c) → ©, (tm) → ™
'tables', 'footnotes', 'toc',
]
md = markdown.Markdown(extensions=exts)
return md.convert(md_text)
Library 2: mistune (Fastest)
def md_with_mistune(md_text, sanitize=True):
"""
mistune: fastest Markdown parser in pure Python.
pip install mistune
Ideal for high-volume sites or real-time conversion.
"""
try:
import mistune
except ImportError:
raise ImportError("pip install mistune")
renderer = mistune.HTMLRenderer(escape=sanitize)
md = mistune.create_markdown(
renderer=renderer,
plugins=['table', 'url', 'strikethrough', 'footnotes', 'task_lists'],
)
return md(md_text)
def md_with_custom_renderer(md_text):
"""Custom renderer for full HTML control."""
import mistune, re
class CustomRenderer(mistune.HTMLRenderer):
def heading(self, token, state):
level = token['attrs']['level']
children = self.render_children(token, state)
id_text = re.sub(r'[^\w\s-]', '', children.lower())
id_text = re.sub(r'\s+', '-', id_text.strip())
return f'<h{level} id="{id_text}">{children}</h{level}>\n'
def image(self, token, state):
src = token['attrs']['url']
alt = token.get('children', [{}])[0].get('raw', '')
title = token['attrs'].get('title', '')
title_attr = f' title="{title}"' if title else ''
return (f'<figure>'
f'<img src="{src}" alt="{alt}" loading="lazy"{title_attr}>'
f'<figcaption>{alt}</figcaption>'
f'</figure>\n')
md = mistune.create_markdown(
renderer=CustomRenderer(),
plugins=['table', 'strikethrough', 'task_lists']
)
return md(md_text)
Minimal Static Site Generator
def generate_static_site(md_folder, output_folder, css_path=None):
"""
Generate a static website from a folder of Markdown files.
Similar to a minimal MkDocs.
"""
md_dir = Path(md_folder)
html_dir = Path(output_folder)
html_dir.mkdir(parents=True, exist_ok=True)
md_files = sorted(md_dir.rglob('*.md'))
print(f"Generating site: {len(md_files)} pages...")
# Build navigation
nav_items = []
for md in md_files:
rel = md.relative_to(md_dir)
href = str(rel.with_suffix('.html')).replace('\\', '/')
title = md.stem.replace('-', ' ').replace('_', ' ').title()
nav_items.append(f'<a href="{href}">{title}</a>')
navigation = '\n'.join(nav_items)
template = '''<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>{title}</title>
<style>
body {{ font-family: sans-serif; max-width: 900px; margin: 0 auto;
padding: 1rem; display: flex; gap: 2rem; }}
nav {{ min-width: 200px; }}
nav a {{ display: block; padding: 0.3em 0; color: #0366d6; }}
main {{ flex: 1; }}
</style>
</head>
<body>
<nav>{navigation}</nav>
<main>{content}</main>
</body>
</html>'''
for md in md_files:
text = md.read_text(encoding='utf-8')
html_body = md_to_html_basic(text)
title = md.stem.replace('-', ' ').replace('_', ' ').title()
rel = md.relative_to(md_dir)
html_p = html_dir / rel.with_suffix('.html')
html_p.parent.mkdir(parents=True, exist_ok=True)
html = template.format(
title=title,
navigation=navigation,
content=html_body
)
html_p.write_text(html, encoding='utf-8')
print(f" ✓ {rel} → {html_p.name}")
print(f"\nSite generated in: {output_folder}")
return len(md_files)
generate_static_site('docs/', 'website/')
Sanitizing User-Submitted Markdown
def safe_md_to_html(md_text):
"""
Convert Markdown to HTML removing JavaScript and malicious content.
Essential when Markdown comes from untrusted users.
pip install bleach
"""
try:
import bleach
except ImportError:
raise ImportError("pip install bleach")
# Convert Markdown to HTML first
dirty_html = md_to_html_basic(md_text)
# Allowed tags and attributes (allowlist)
allowed_tags = [
'p', 'br', 'strong', 'em', 'u', 's', 'del',
'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
'ul', 'ol', 'li', 'blockquote', 'pre', 'code', 'hr',
'table', 'thead', 'tbody', 'tr', 'th', 'td',
'a', 'img', 'figure', 'figcaption',
]
allowed_attrs = {
'a': ['href', 'title', 'rel'],
'img': ['src', 'alt', 'title', 'width', 'height', 'loading'],
'code': ['class'],
'th': ['align'], 'td': ['align'],
}
clean_html = bleach.clean(
dirty_html,
tags=allowed_tags,
attributes=allowed_attrs,
strip=True,
strip_comments=True,
)
# Add rel="nofollow noopener" to external links
clean_html = bleach.linkify(
clean_html,
callbacks=[bleach.callbacks.nofollow]
)
return clean_html
# Test with malicious Markdown
malicious_md = '''
# Normal title
**Bold text**
[Link](javascript:alert('XSS'))
<script>alert('XSS')</script>
<img src="x" onerror="alert('XSS')">
'''
safe_html = safe_md_to_html(malicious_md)
print("Sanitized HTML:", safe_html)
Conclusion
For converting Markdown to HTML in Python, python-markdown is the most common choice for its extension ecosystem (PyMdown, codehilite, toc). mistune is the fastest (2-5× faster than python-markdown) and ideal for real-time conversion. For user-submitted content, always pair the conversion with bleach to sanitize the resulting HTML and prevent XSS. For full static sites, consider MkDocs (built on python-markdown) or Pelican.
Related conversions
Document conversions that follow this topic naturally: