Add 3DE to Nuke Track Converter v2.0

- Complete rewrite with beautiful Rich TUI interface
- Interactive and CLI modes for flexibility
- Robust error handling with clear, helpful messages
- Gap filling with linear interpolation support
- Coordinate system transforms (pixels/normalized)
- Auto-generated output filenames from input
- Configurable resolution and Nuke versions
- Batch processing support via CLI
- Comprehensive documentation in Scripts/README_CONVERTER.md
- Updated main README.md with Scripts section
This commit is contained in:
NicholaiVogel 2025-10-07 21:14:33 -06:00
parent c43a45ae1a
commit 3c83039a71
2113 changed files with 372008 additions and 12 deletions

View File

@ -32,22 +32,56 @@ A curated set of master templates for Nuke/NukeX. Each `.nk` ships with sensible
## Repository Tree
```text
/
├─ Biohazard Master Rev 01/
│ ├─ Biohazard Master Rev 01.nk
│ ├─ Biohazard Master Rev 01.nk~ # backup
│ └─ desktop.ini
├─ Biohazard Master Rev 02/
│ ├─ Biohazard Master Rev 02 LIGHT.nk # lightweight preset
│ ├─ Biohazard Master Rev 02.nk
│ └─ desktop.ini
├─ Covert Template/
│ ├─ Covert Template.nk
│ ├─ Covert Template.nk~ # backup
│ └─ desktop.ini
├─ Scripts/
│ ├─ 3de-to-nuke-converter.py # 3DE track converter v2.0
│ ├─ export-2d-tracks-from-3de-to-nuke.py # legacy converter
│ ├─ requirements.txt # Python dependencies
│ └─ README_CONVERTER.md # converter documentation
├─ Shot_Templates/
│ ├─ Biohazard Master Rev 01/
│ │ ├─ Biohazard Master Rev 01.nk
│ │ ├─ Biohazard Master Rev 01.nk~ # backup
│ │ └─ desktop.ini
│ ├─ Biohazard Master Rev 02/
│ │ ├─ Biohazard Master Rev 02 LIGHT.nk # lightweight preset
│ │ ├─ Biohazard Master Rev 02.nk
│ │ └─ desktop.ini
│ └─ Covert Template/
│ ├─ Covert Template.nk
│ ├─ Covert Template.nk~ # backup
│ └─ desktop.ini
└─ README.md
```
---
## Scripts
<details>
<summary><strong>3DE to Nuke Track Converter v2.0</strong> — production tool</summary>
* Professional converter for 3DEqualizer 2D tracks to Nuke Tracker4 nodes
* Beautiful terminal UI with progress bars and interactive prompts
* Features:
- CLI and interactive modes for flexibility
- Robust error handling with clear messages
- Gap filling with linear interpolation
- Coordinate system transforms (pixels/normalized)
- Auto-generated output filenames
- Configurable resolution and Nuke versions
- Batch processing support
* See `Scripts/README_CONVERTER.md` for full documentation
**Quick Start:**
```bash
cd Scripts
pip install -r requirements.txt
python 3de-to-nuke-converter.py # Interactive mode
```
</details>
---
## Templates
<details>

41
Scripts/.gitignore vendored Normal file
View File

@ -0,0 +1,41 @@
# Python virtual environment
Include/
Lib/
Scripts/
pyvenv.cfg
# Python cache
__pycache__/
*.py[cod]
*$py.class
*.so
.Python
# Distribution / packaging
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
# IDE
.vscode/
.idea/
*.swp
*.swo
*~
# OS
.DS_Store
Thumbs.db
desktop.ini

View File

@ -0,0 +1,548 @@
#!/usr/bin/env python3
"""
3DE to Nuke Track Converter
A professional tool for converting 3DEqualizer 2D tracks to Nuke Tracker nodes
"""
import argparse
import sys
import json
from pathlib import Path
from typing import List, Dict, Optional, Tuple
from dataclasses import dataclass
from rich.console import Console
from rich.panel import Panel
from rich.table import Table
from rich.progress import Progress, SpinnerColumn, BarColumn, TextColumn, TimeElapsedColumn
from rich.prompt import Prompt, Confirm
from rich.layout import Layout
from rich.text import Text
from rich import box
from rich.style import Style
console = Console()
# ASCII Art Banner
BANNER = """
__________.___________ ___ ___ _____ __________ _____ __________________
\______ \ \_____ \ / | \ / _ \ \____ / / _ \\______ \______ \
| | _/ |/ | \/ ~ \/ /_\ \ / / / /_\ \| _/| | \
| | \ / | \ Y / | \/ /_/ | \ | \| ` \
|______ /___\_______ /\___|_ /\____|__ /_______ \____|__ /____|_ /_______ /
\/ \/ \/ \/ \/ \/ \/ \/
_________ .____ .___ ___________________ ________ .____ _________
\_ ___ \| | | | \__ ___/\_____ \ \_____ \ | | / _____/
/ \ \/| | | | | | / | \ / | \| | \_____ \
\ \___| |___| | | | / | \/ | \ |___ / \
\______ /_______ \___| |____| \_______ /\_______ /_______ \/_______ /
\/ \/ \/ \/ \/ \/
Convert 2d tracks from 3dequalizer to .nk 2d tracker nodes.
"""
@dataclass
class TrackPoint:
"""Represents a single track point"""
frame: int
x: float
y: float
@dataclass
class Track:
"""Represents a complete track"""
name: str
frame_start: int
points: List[TrackPoint]
@property
def frame_end(self) -> int:
return max(p.frame for p in self.points) if self.points else self.frame_start
@property
def num_points(self) -> int:
return len(self.points)
class TrackConverter:
"""Main converter class"""
def __init__(self, config: Dict):
self.config = config
self.tracks: List[Track] = []
def parse_3de_file(self, filepath: Path) -> bool:
"""Parse 3DE track file with robust error handling"""
try:
with Progress(
SpinnerColumn(),
TextColumn("[progress.description]{task.description}"),
BarColumn(),
TextColumn("[progress.percentage]{task.percentage:>3.0f}%"),
TimeElapsedColumn(),
console=console
) as progress:
task = progress.add_task("[cyan]Reading 3DE file...", total=100)
with open(filepath, 'r') as f:
lines = f.read().strip().split('\n')
progress.update(task, advance=20)
if not lines:
console.print("[red]✗[/red] Error: File is empty", style="bold red")
return False
try:
track_count = int(lines[0])
except ValueError:
console.print(f"[red]✗[/red] Error: Invalid track count on line 1: '{lines[0]}'", style="bold red")
return False
progress.update(task, advance=10, description=f"[cyan]Parsing {track_count} tracks...")
current_line = 1
parse_increment = 70 / max(track_count, 1)
for i in range(track_count):
try:
# Parse track number
if current_line >= len(lines):
raise ValueError(f"Unexpected end of file at track {i+1}")
track_num = lines[current_line].strip()
current_line += 1
# Parse frame start
if current_line >= len(lines):
raise ValueError(f"Missing frame_start for track {i+1}")
frame_start = int(lines[current_line])
current_line += 1
# Parse number of points
if current_line >= len(lines):
raise ValueError(f"Missing num_points for track {i+1}")
num_points = int(lines[current_line])
current_line += 1
# Parse points
points = []
for j in range(num_points):
if current_line >= len(lines):
raise ValueError(f"Missing point data for track {i+1}, point {j+1}")
parts = lines[current_line].strip().split()
if len(parts) < 3:
raise ValueError(f"Invalid point data at line {current_line+1}: '{lines[current_line]}'")
points.append(TrackPoint(
frame=int(parts[0]),
x=float(parts[1]),
y=float(parts[2])
))
current_line += 1
# Create track with custom name if available, otherwise use track number
track_name = self.config.get('name_prefix', '') + f"track_{track_num}"
self.tracks.append(Track(
name=track_name,
frame_start=frame_start,
points=points
))
progress.update(task, advance=parse_increment)
except (ValueError, IndexError) as e:
console.print(f"[red]✗[/red] Error parsing track {i+1}: {str(e)}", style="bold red")
return False
progress.update(task, completed=100)
return True
except FileNotFoundError:
console.print(f"[red]✗[/red] File not found: {filepath}", style="bold red")
return False
except Exception as e:
console.print(f"[red]✗[/red] Unexpected error: {str(e)}", style="bold red")
return False
def transform_coordinates(self, track: Track) -> Track:
"""Transform coordinates based on config settings"""
if self.config.get('input_space') == 'normalized':
# Convert normalized (0-1) to pixels
width = self.config.get('input_width', 2048)
height = self.config.get('input_height', 1556)
for point in track.points:
point.x *= width
point.y *= height
if self.config.get('flip_y', False):
height = self.config.get('input_height', 1556)
for point in track.points:
point.y = height - point.y
return track
def fill_gaps(self, track: Track) -> Track:
"""Fill missing frames in track"""
if not self.config.get('fill_gaps', False) or not track.points:
return track
strategy = self.config.get('fill_strategy', 'last')
filled_points = []
# Sort points by frame
sorted_points = sorted(track.points, key=lambda p: p.frame)
for i in range(len(sorted_points) - 1):
current = sorted_points[i]
next_point = sorted_points[i + 1]
filled_points.append(current)
# Check for gap
gap = next_point.frame - current.frame
if gap > 1:
for frame in range(current.frame + 1, next_point.frame):
if strategy == 'last':
filled_points.append(TrackPoint(frame, current.x, current.y))
elif strategy == 'linear':
# Linear interpolation
t = (frame - current.frame) / gap
x = current.x + (next_point.x - current.x) * t
y = current.y + (next_point.y - current.y) * t
filled_points.append(TrackPoint(frame, x, y))
filled_points.append(sorted_points[-1])
track.points = filled_points
return track
def generate_curve_data(self, points: List[TrackPoint], axis: str) -> str:
"""Generate Nuke curve data"""
curve_data = '{curve'
for point in points:
value = point.x if axis == 'x' else point.y
curve_data += f' x{point.frame} {value}'
curve_data += '}'
return curve_data
def generate_nuke_file(self, output_path: Path) -> bool:
"""Generate Nuke .nk file"""
try:
with Progress(
SpinnerColumn(),
TextColumn("[progress.description]{task.description}"),
BarColumn(),
console=console
) as progress:
task = progress.add_task("[cyan]Generating Nuke file...", total=len(self.tracks))
# Transform and process tracks
processed_tracks = []
for track in self.tracks:
track = self.transform_coordinates(track)
track = self.fill_gaps(track)
processed_tracks.append(track)
progress.advance(task)
# Generate track entries
track_entries = []
for index, track in enumerate(processed_tracks):
track_x = self.generate_curve_data(track.points, 'x')
track_y = self.generate_curve_data(track.points, 'y')
enabled = '1' if (self.config.get('enable_all', False) or index == 0) else '0'
entry = f' {{ {{curve K x1 1}} "{track.name}" {track_x} {track_y} {{curve K x1 0}} {{curve K x1 0}} {enabled} 0 0 {{curve x1 0}} 1 0 -32 -32 32 32 -22 -22 22 22 {{}} {{}} {{}} {{}} {{}} {{}} {{}} {{}} {{}} {{}} {{}} }}'
track_entries.append(entry)
# Get config values
nuke_version = self.config.get('nuke_version', '15.2 v3')
format_name = self.config.get('format', '2K_Super_35(full-ap)')
width = self.config.get('input_width', 2048)
height = self.config.get('input_height', 1556)
center_x = width // 2
center_y = height // 2
# Generate .nk file
nuke_file = f"""#! C:/Program Files/Nuke15.2v3/nuke-15.2.3.dll -nx
version {nuke_version}
Root {{
inputs 0
name {output_path.name}
format "{width} {height} 0 0 {width} {height} 1 {format_name}"
proxy_type scale
colorManagement OCIO
OCIO_config fn-nuke_studio-config-v1.0.0_aces-v1.3_ocio-v2.1
defaultViewerLUT "OCIO LUTs"
workingSpaceLUT scene_linear
monitorLut "ACES 1.0 - SDR Video (sRGB - Display)"
monitorOutLUT "ACES 1.0 - SDR Video (sRGB - Display)"
}}
Tracker4 {{
inputs 0
tracks {{ {{ 1 31 {len(processed_tracks)} }}
{{ {{ 5 1 20 enable e 1 }}
{{ 3 1 75 name name 1 }}
{{ 2 1 58 track_x track_x 1 }}
{{ 2 1 58 track_y track_y 1 }}
{{ 2 1 63 offset_x offset_x 1 }}
{{ 2 1 63 offset_y offset_y 1 }}
{{ 4 1 27 T T 1 }}
{{ 4 1 27 R R 1 }}
{{ 4 1 27 S S 1 }}
{{ 2 0 45 error error 1 }}
{{ 1 1 0 error_min error_min 1 }}
{{ 1 1 0 error_max error_max 1 }}
{{ 1 1 0 pattern_x pattern_x 1 }}
{{ 1 1 0 pattern_y pattern_y 1 }}
{{ 1 1 0 pattern_r pattern_r 1 }}
{{ 1 1 0 pattern_t pattern_t 1 }}
{{ 1 1 0 search_x search_x 1 }}
{{ 1 1 0 search_y search_y 1 }}
{{ 1 1 0 search_r search_r 1 }}
{{ 1 1 0 search_t search_t 1 }}
{{ 2 1 0 key_track key_track 1 }}
{{ 2 1 0 key_search_x key_search_x 1 }}
{{ 2 1 0 key_search_y key_search_y 1 }}
{{ 2 1 0 key_search_r key_search_r 1 }}
{{ 2 1 0 key_search_t key_search_t 1 }}
{{ 2 1 0 key_track_x key_track_x 1 }}
{{ 2 1 0 key_track_y key_track_y 1 }}
{{ 2 1 0 key_track_r key_track_r 1 }}
{{ 2 1 0 key_track_t key_track_t 1 }}
{{ 2 1 0 key_centre_offset_x key_centre_offset_x 1 }}
{{ 2 1 0 key_centre_offset_y key_centre_offset_y 1 }}
}}
{{
{chr(10).join(track_entries)}
}}
}}
center {{{center_x} {center_y}}}
name Tracker1
xpos -75
ypos -94
}}
"""
with open(output_path, 'w') as f:
f.write(nuke_file)
return True
except Exception as e:
console.print(f"[red]✗[/red] Error generating Nuke file: {str(e)}", style="bold red")
return False
def display_summary(self):
"""Display conversion summary"""
if not self.tracks:
return
# Create summary table
table = Table(
title="Track Summary",
box=box.ROUNDED,
show_header=True,
header_style="bold magenta",
border_style="cyan"
)
table.add_column("Track", style="cyan", no_wrap=True)
table.add_column("Points", justify="right", style="green")
table.add_column("Frame Range", justify="center", style="yellow")
table.add_column("Status", justify="center")
for track in self.tracks[:10]: # Show first 10
status = "[green]✓[/green]" if track.num_points > 0 else "[red]✗[/red]"
table.add_row(
track.name,
str(track.num_points),
f"{track.frame_start}-{track.frame_end}",
status
)
if len(self.tracks) > 10:
table.add_row("...", "...", "...", "...")
console.print(table)
# Stats panel
total_points = sum(t.num_points for t in self.tracks)
stats = f"""
[cyan]Total Tracks:[/cyan] [bold]{len(self.tracks)}[/bold]
[cyan]Total Points:[/cyan] [bold]{total_points}[/bold]
[cyan]Avg Points/Track:[/cyan] [bold]{total_points // len(self.tracks) if self.tracks else 0}[/bold]
"""
console.print(Panel(stats, title="Statistics", border_style="green", box=box.ROUNDED))
def interactive_mode():
"""Run in interactive mode with prompts"""
console.print(Panel(BANNER, style="bold cyan", box=box.DOUBLE))
console.print("\n[bold cyan]Welcome to 3DE to Nuke Track Converter![/bold cyan]\n")
# Get input file
input_file = Prompt.ask(
"[cyan]Enter 3DE track file path[/cyan] [dim](e.g., tracks.txt from 3DEqualizer)[/dim]",
default="tracks.txt"
)
if not Path(input_file).exists():
console.print(f"[red]✗[/red] File not found: {input_file}", style="bold red")
return
# Auto-generate output filename from input
input_path = Path(input_file)
default_output = input_path.stem + ".nk"
# Get output file
output_file = Prompt.ask(
"[cyan]Enter output .nk file path[/cyan]",
default=default_output
)
# Resolution
width = int(Prompt.ask("[cyan]Input resolution width[/cyan]", default="2048"))
height = int(Prompt.ask("[cyan]Input resolution height[/cyan]", default="1556"))
# Coordinate space - removed confusing option, using auto-detection
console.print("\n[dim]Note: Coordinate space auto-detected as pixels (standard 3DE export)[/dim]")
coord_space = "pixels"
# Fill gaps
fill_gaps = Confirm.ask("[cyan]Fill missing frames?[/cyan]", default=False)
fill_strategy = "last"
if fill_gaps:
fill_strategy = Prompt.ask(
"[cyan]Fill strategy[/cyan]",
choices=["last", "linear"],
default="last"
)
# Enable all tracks
enable_all = Confirm.ask("[cyan]Enable all tracks?[/cyan]", default=False)
# Build config
config = {
'input_width': width,
'input_height': height,
'input_space': coord_space,
'fill_gaps': fill_gaps,
'fill_strategy': fill_strategy,
'enable_all': enable_all,
'flip_y': False,
'name_prefix': '',
'nuke_version': '15.2 v3',
'format': '2K_Super_35(full-ap)'
}
# Process
converter = TrackConverter(config)
console.print("\n")
if converter.parse_3de_file(Path(input_file)):
console.print("[green]✓[/green] Successfully parsed 3DE file\n", style="bold green")
converter.display_summary()
console.print("\n")
if converter.generate_nuke_file(Path(output_file)):
console.print(f"\n[green]✓[/green] Successfully created: [bold]{output_file}[/bold]", style="bold green")
# Final summary
summary = Panel(
f"[green]Conversion Complete![/green]\n\n"
f"[cyan]Input:[/cyan] {input_file}\n"
f"[cyan]Output:[/cyan] {output_file}\n"
f"[cyan]Tracks:[/cyan] {len(converter.tracks)}\n"
f"[cyan]Total Points:[/cyan] {sum(t.num_points for t in converter.tracks)}",
title="✓ Success",
border_style="green",
box=box.DOUBLE
)
console.print(summary)
def cli_mode(args):
"""Run in CLI mode with arguments"""
config = {
'input_width': args.width,
'input_height': args.height,
'input_space': args.input_space,
'fill_gaps': args.fill_gaps,
'fill_strategy': args.fill_strategy,
'enable_all': args.enable_all,
'flip_y': args.flip_y,
'name_prefix': args.name_prefix,
'nuke_version': args.nuke_version,
'format': args.format
}
if not args.quiet:
console.print(Panel(BANNER, style="bold cyan", box=box.DOUBLE))
converter = TrackConverter(config)
if converter.parse_3de_file(Path(args.input)):
if not args.quiet:
console.print("[green]✓[/green] Successfully parsed 3DE file\n", style="bold green")
if args.verbose:
converter.display_summary()
if args.dry_run:
console.print("[yellow]Dry run - no file written[/yellow]")
return
if converter.generate_nuke_file(Path(args.output)):
if not args.quiet:
console.print(f"\n[green]✓[/green] Successfully created: [bold]{args.output}[/bold]", style="bold green")
else:
sys.exit(1)
else:
sys.exit(1)
def main():
parser = argparse.ArgumentParser(
description='Convert 3DEqualizer 2D tracks to Nuke Tracker nodes',
formatter_class=argparse.RawDescriptionHelpFormatter
)
parser.add_argument('-i', '--input', help='Input 3DE track file')
parser.add_argument('-o', '--output', help='Output Nuke .nk file')
parser.add_argument('-w', '--width', type=int, default=2048, help='Input resolution width (default: 2048)')
parser.add_argument('--height', type=int, default=1556, help='Input resolution height (default: 1556)')
parser.add_argument('--input-space', choices=['pixels', 'normalized'], default='pixels',
help='Input coordinate space (default: pixels)')
parser.add_argument('--fill-gaps', action='store_true', help='Fill missing frames')
parser.add_argument('--fill-strategy', choices=['last', 'linear'], default='last',
help='Gap filling strategy (default: last)')
parser.add_argument('--flip-y', action='store_true', help='Flip Y coordinates')
parser.add_argument('--enable-all', action='store_true', help='Enable all tracks (default: only first)')
parser.add_argument('--name-prefix', default='', help='Prefix for track names')
parser.add_argument('--nuke-version', default='15.2 v3', help='Nuke version (default: 15.2 v3)')
parser.add_argument('--format', default='2K_Super_35(full-ap)', help='Nuke format name')
parser.add_argument('-v', '--verbose', action='store_true', help='Verbose output')
parser.add_argument('-q', '--quiet', action='store_true', help='Quiet mode')
parser.add_argument('--dry-run', action='store_true', help='Parse only, don\'t write file')
# If no arguments, run interactive mode
if len(sys.argv) == 1:
interactive_mode()
else:
args = parser.parse_args()
if not args.input or not args.output:
parser.error("--input and --output are required in CLI mode")
cli_mode(args)
if __name__ == '__main__':
try:
main()
except KeyboardInterrupt:
console.print("\n[yellow]Cancelled by user[/yellow]")
sys.exit(0)
except Exception as e:
console.print(f"\n[red]Fatal error: {str(e)}[/red]", style="bold red")
sys.exit(1)

View File

@ -0,0 +1,128 @@
import sys
import os
import re
import importlib
import warnings
is_pypy = '__pypy__' in sys.builtin_module_names
warnings.filterwarnings('ignore',
r'.+ distutils\b.+ deprecated',
DeprecationWarning)
def warn_distutils_present():
if 'distutils' not in sys.modules:
return
if is_pypy and sys.version_info < (3, 7):
# PyPy for 3.6 unconditionally imports distutils, so bypass the warning
# https://foss.heptapod.net/pypy/pypy/-/blob/be829135bc0d758997b3566062999ee8b23872b4/lib-python/3/site.py#L250
return
warnings.warn(
"Distutils was imported before Setuptools, but importing Setuptools "
"also replaces the `distutils` module in `sys.modules`. This may lead "
"to undesirable behaviors or errors. To avoid these issues, avoid "
"using distutils directly, ensure that setuptools is installed in the "
"traditional way (e.g. not an editable install), and/or make sure "
"that setuptools is always imported before distutils.")
def clear_distutils():
if 'distutils' not in sys.modules:
return
warnings.warn("Setuptools is replacing distutils.")
mods = [name for name in sys.modules if re.match(r'distutils\b', name)]
for name in mods:
del sys.modules[name]
def enabled():
"""
Allow selection of distutils by environment variable.
"""
which = os.environ.get('SETUPTOOLS_USE_DISTUTILS', 'stdlib')
return which == 'local'
def ensure_local_distutils():
clear_distutils()
distutils = importlib.import_module('setuptools._distutils')
distutils.__name__ = 'distutils'
sys.modules['distutils'] = distutils
# sanity check that submodules load as expected
core = importlib.import_module('distutils.core')
assert '_distutils' in core.__file__, core.__file__
def do_override():
"""
Ensure that the local copy of distutils is preferred over stdlib.
See https://github.com/pypa/setuptools/issues/417#issuecomment-392298401
for more motivation.
"""
if enabled():
warn_distutils_present()
ensure_local_distutils()
class DistutilsMetaFinder:
def find_spec(self, fullname, path, target=None):
if path is not None:
return
method_name = 'spec_for_{fullname}'.format(**locals())
method = getattr(self, method_name, lambda: None)
return method()
def spec_for_distutils(self):
import importlib.abc
import importlib.util
class DistutilsLoader(importlib.abc.Loader):
def create_module(self, spec):
return importlib.import_module('setuptools._distutils')
def exec_module(self, module):
pass
return importlib.util.spec_from_loader('distutils', DistutilsLoader())
def spec_for_pip(self):
"""
Ensure stdlib distutils when running under pip.
See pypa/pip#8761 for rationale.
"""
if self.pip_imported_during_build():
return
clear_distutils()
self.spec_for_distutils = lambda: None
@staticmethod
def pip_imported_during_build():
"""
Detect if pip is being imported in a build script. Ref #2355.
"""
import traceback
return any(
frame.f_globals['__file__'].endswith('setup.py')
for frame, line in traceback.walk_stack(None)
)
DISTUTILS_FINDER = DistutilsMetaFinder()
def add_shim():
sys.meta_path.insert(0, DISTUTILS_FINDER)
def remove_shim():
try:
sys.meta_path.remove(DISTUTILS_FINDER)
except ValueError:
pass

View File

@ -0,0 +1 @@
__import__('_distutils_hack').do_override()

View File

@ -0,0 +1 @@
import os; var = 'SETUPTOOLS_USE_DISTUTILS'; enabled = os.environ.get(var, 'stdlib') == 'local'; enabled and __import__('_distutils_hack').add_shim();

View File

@ -0,0 +1,6 @@
"""A Python port of Markdown-It"""
__all__ = ("MarkdownIt",)
__version__ = "4.0.0"
from .main import MarkdownIt

View File

@ -0,0 +1 @@
from __future__ import annotations

View File

@ -0,0 +1,67 @@
# Copyright 2014 Mathias Bynens <https://mathiasbynens.be/>
# Copyright 2021 Taneli Hukkinen
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
import codecs
from collections.abc import Callable
import re
REGEX_SEPARATORS = re.compile(r"[\x2E\u3002\uFF0E\uFF61]")
REGEX_NON_ASCII = re.compile(r"[^\0-\x7E]")
def encode(uni: str) -> str:
return codecs.encode(uni, encoding="punycode").decode()
def decode(ascii: str) -> str:
return codecs.decode(ascii, encoding="punycode") # type: ignore
def map_domain(string: str, fn: Callable[[str], str]) -> str:
parts = string.split("@")
result = ""
if len(parts) > 1:
# In email addresses, only the domain name should be punycoded. Leave
# the local part (i.e. everything up to `@`) intact.
result = parts[0] + "@"
string = parts[1]
labels = REGEX_SEPARATORS.split(string)
encoded = ".".join(fn(label) for label in labels)
return result + encoded
def to_unicode(obj: str) -> str:
def mapping(obj: str) -> str:
if obj.startswith("xn--"):
return decode(obj[4:].lower())
return obj
return map_domain(obj, mapping)
def to_ascii(obj: str) -> str:
def mapping(obj: str) -> str:
if REGEX_NON_ASCII.search(obj):
return "xn--" + encode(obj)
return obj
return map_domain(obj, mapping)

View File

@ -0,0 +1,110 @@
#!/usr/bin/env python
"""
CLI interface to markdown-it-py
Parse one or more markdown files, convert each to HTML, and print to stdout.
"""
from __future__ import annotations
import argparse
from collections.abc import Iterable, Sequence
import sys
from markdown_it import __version__
from markdown_it.main import MarkdownIt
version_str = f"markdown-it-py [version {__version__}]"
def main(args: Sequence[str] | None = None) -> int:
namespace = parse_args(args)
if namespace.filenames:
convert(namespace.filenames)
else:
interactive()
return 0
def convert(filenames: Iterable[str]) -> None:
for filename in filenames:
convert_file(filename)
def convert_file(filename: str) -> None:
"""
Parse a Markdown file and dump the output to stdout.
"""
try:
with open(filename, encoding="utf8", errors="ignore") as fin:
rendered = MarkdownIt().render(fin.read())
print(rendered, end="")
except OSError:
sys.stderr.write(f'Cannot open file "{filename}".\n')
sys.exit(1)
def interactive() -> None:
"""
Parse user input, dump to stdout, rinse and repeat.
Python REPL style.
"""
print_heading()
contents = []
more = False
while True:
try:
prompt, more = ("... ", True) if more else (">>> ", True)
contents.append(input(prompt) + "\n")
except EOFError:
print("\n" + MarkdownIt().render("\n".join(contents)), end="")
more = False
contents = []
except KeyboardInterrupt:
print("\nExiting.")
break
def parse_args(args: Sequence[str] | None) -> argparse.Namespace:
"""Parse input CLI arguments."""
parser = argparse.ArgumentParser(
description="Parse one or more markdown files, "
"convert each to HTML, and print to stdout",
# NOTE: Remember to update README.md w/ the output of `markdown-it -h`
epilog=(
f"""
Interactive:
$ markdown-it
markdown-it-py [version {__version__}] (interactive)
Type Ctrl-D to complete input, or Ctrl-C to exit.
>>> # Example
... > markdown *input*
...
<h1>Example</h1>
<blockquote>
<p>markdown <em>input</em></p>
</blockquote>
Batch:
$ markdown-it README.md README.footer.md > index.html
"""
),
formatter_class=argparse.RawDescriptionHelpFormatter,
)
parser.add_argument("-v", "--version", action="version", version=version_str)
parser.add_argument(
"filenames", nargs="*", help="specify an optional list of files to convert"
)
return parser.parse_args(args)
def print_heading() -> None:
print(f"{version_str} (interactive)")
print("Type Ctrl-D to complete input, or Ctrl-C to exit.")
if __name__ == "__main__":
exit_code = main(sys.argv[1:])
sys.exit(exit_code)

View File

@ -0,0 +1,5 @@
"""HTML5 entities map: { name -> characters }."""
import html.entities
entities = {name.rstrip(";"): chars for name, chars in html.entities.html5.items()}

View File

@ -0,0 +1,69 @@
"""List of valid html blocks names, according to commonmark spec
http://jgm.github.io/CommonMark/spec.html#html-blocks
"""
# see https://spec.commonmark.org/0.31.2/#html-blocks
block_names = [
"address",
"article",
"aside",
"base",
"basefont",
"blockquote",
"body",
"caption",
"center",
"col",
"colgroup",
"dd",
"details",
"dialog",
"dir",
"div",
"dl",
"dt",
"fieldset",
"figcaption",
"figure",
"footer",
"form",
"frame",
"frameset",
"h1",
"h2",
"h3",
"h4",
"h5",
"h6",
"head",
"header",
"hr",
"html",
"iframe",
"legend",
"li",
"link",
"main",
"menu",
"menuitem",
"nav",
"noframes",
"ol",
"optgroup",
"option",
"p",
"param",
"search",
"section",
"summary",
"table",
"tbody",
"td",
"tfoot",
"th",
"thead",
"title",
"tr",
"track",
"ul",
]

View File

@ -0,0 +1,39 @@
"""Regexps to match html elements"""
import re
attr_name = "[a-zA-Z_:][a-zA-Z0-9:._-]*"
unquoted = "[^\"'=<>`\\x00-\\x20]+"
single_quoted = "'[^']*'"
double_quoted = '"[^"]*"'
attr_value = "(?:" + unquoted + "|" + single_quoted + "|" + double_quoted + ")"
attribute = "(?:\\s+" + attr_name + "(?:\\s*=\\s*" + attr_value + ")?)"
open_tag = "<[A-Za-z][A-Za-z0-9\\-]*" + attribute + "*\\s*\\/?>"
close_tag = "<\\/[A-Za-z][A-Za-z0-9\\-]*\\s*>"
comment = "<!---?>|<!--(?:[^-]|-[^-]|--[^>])*-->"
processing = "<[?][\\s\\S]*?[?]>"
declaration = "<![A-Za-z][^>]*>"
cdata = "<!\\[CDATA\\[[\\s\\S]*?\\]\\]>"
HTML_TAG_RE = re.compile(
"^(?:"
+ open_tag
+ "|"
+ close_tag
+ "|"
+ comment
+ "|"
+ processing
+ "|"
+ declaration
+ "|"
+ cdata
+ ")"
)
HTML_OPEN_CLOSE_TAG_STR = "^(?:" + open_tag + "|" + close_tag + ")"
HTML_OPEN_CLOSE_TAG_RE = re.compile(HTML_OPEN_CLOSE_TAG_STR)

View File

@ -0,0 +1,81 @@
from __future__ import annotations
from collections.abc import Callable
from contextlib import suppress
import re
from urllib.parse import quote, unquote, urlparse, urlunparse # noqa: F401
import mdurl
from .. import _punycode
RECODE_HOSTNAME_FOR = ("http:", "https:", "mailto:")
def normalizeLink(url: str) -> str:
"""Normalize destination URLs in links
::
[label]: destination 'title'
^^^^^^^^^^^
"""
parsed = mdurl.parse(url, slashes_denote_host=True)
# Encode hostnames in urls like:
# `http://host/`, `https://host/`, `mailto:user@host`, `//host/`
#
# We don't encode unknown schemas, because it's likely that we encode
# something we shouldn't (e.g. `skype:name` treated as `skype:host`)
#
if parsed.hostname and (
not parsed.protocol or parsed.protocol in RECODE_HOSTNAME_FOR
):
with suppress(Exception):
parsed = parsed._replace(hostname=_punycode.to_ascii(parsed.hostname))
return mdurl.encode(mdurl.format(parsed))
def normalizeLinkText(url: str) -> str:
"""Normalize autolink content
::
<destination>
~~~~~~~~~~~
"""
parsed = mdurl.parse(url, slashes_denote_host=True)
# Encode hostnames in urls like:
# `http://host/`, `https://host/`, `mailto:user@host`, `//host/`
#
# We don't encode unknown schemas, because it's likely that we encode
# something we shouldn't (e.g. `skype:name` treated as `skype:host`)
#
if parsed.hostname and (
not parsed.protocol or parsed.protocol in RECODE_HOSTNAME_FOR
):
with suppress(Exception):
parsed = parsed._replace(hostname=_punycode.to_unicode(parsed.hostname))
# add '%' to exclude list because of https://github.com/markdown-it/markdown-it/issues/720
return mdurl.decode(mdurl.format(parsed), mdurl.DECODE_DEFAULT_CHARS + "%")
BAD_PROTO_RE = re.compile(r"^(vbscript|javascript|file|data):")
GOOD_DATA_RE = re.compile(r"^data:image\/(gif|png|jpeg|webp);")
def validateLink(url: str, validator: Callable[[str], bool] | None = None) -> bool:
"""Validate URL link is allowed in output.
This validator can prohibit more than really needed to prevent XSS.
It's a tradeoff to keep code simple and to be secure by default.
Note: url should be normalized at this point, and existing entities decoded.
"""
if validator is not None:
return validator(url)
url = url.strip().lower()
return bool(GOOD_DATA_RE.search(url)) if BAD_PROTO_RE.search(url) else True

View File

@ -0,0 +1,313 @@
"""Utilities for parsing source text"""
from __future__ import annotations
import re
from re import Match
from typing import TypeVar
import unicodedata
from .entities import entities
def charCodeAt(src: str, pos: int) -> int | None:
"""
Returns the Unicode value of the character at the specified location.
@param - index The zero-based index of the desired character.
If there is no character at the specified index, NaN is returned.
This was added for compatibility with python
"""
try:
return ord(src[pos])
except IndexError:
return None
def charStrAt(src: str, pos: int) -> str | None:
"""
Returns the Unicode value of the character at the specified location.
@param - index The zero-based index of the desired character.
If there is no character at the specified index, NaN is returned.
This was added for compatibility with python
"""
try:
return src[pos]
except IndexError:
return None
_ItemTV = TypeVar("_ItemTV")
def arrayReplaceAt(
src: list[_ItemTV], pos: int, newElements: list[_ItemTV]
) -> list[_ItemTV]:
"""
Remove element from array and put another array at those position.
Useful for some operations with tokens
"""
return src[:pos] + newElements + src[pos + 1 :]
def isValidEntityCode(c: int) -> bool:
# broken sequence
if c >= 0xD800 and c <= 0xDFFF:
return False
# never used
if c >= 0xFDD0 and c <= 0xFDEF:
return False
if ((c & 0xFFFF) == 0xFFFF) or ((c & 0xFFFF) == 0xFFFE):
return False
# control codes
if c >= 0x00 and c <= 0x08:
return False
if c == 0x0B:
return False
if c >= 0x0E and c <= 0x1F:
return False
if c >= 0x7F and c <= 0x9F:
return False
# out of range
return not (c > 0x10FFFF)
def fromCodePoint(c: int) -> str:
"""Convert ordinal to unicode.
Note, in the original Javascript two string characters were required,
for codepoints larger than `0xFFFF`.
But Python 3 can represent any unicode codepoint in one character.
"""
return chr(c)
# UNESCAPE_MD_RE = re.compile(r'\\([!"#$%&\'()*+,\-.\/:;<=>?@[\\\]^_`{|}~])')
# ENTITY_RE_g = re.compile(r'&([a-z#][a-z0-9]{1,31})', re.IGNORECASE)
UNESCAPE_ALL_RE = re.compile(
r'\\([!"#$%&\'()*+,\-.\/:;<=>?@[\\\]^_`{|}~])' + "|" + r"&([a-z#][a-z0-9]{1,31});",
re.IGNORECASE,
)
DIGITAL_ENTITY_BASE10_RE = re.compile(r"#([0-9]{1,8})")
DIGITAL_ENTITY_BASE16_RE = re.compile(r"#x([a-f0-9]{1,8})", re.IGNORECASE)
def replaceEntityPattern(match: str, name: str) -> str:
"""Convert HTML entity patterns,
see https://spec.commonmark.org/0.30/#entity-references
"""
if name in entities:
return entities[name]
code: None | int = None
if pat := DIGITAL_ENTITY_BASE10_RE.fullmatch(name):
code = int(pat.group(1), 10)
elif pat := DIGITAL_ENTITY_BASE16_RE.fullmatch(name):
code = int(pat.group(1), 16)
if code is not None and isValidEntityCode(code):
return fromCodePoint(code)
return match
def unescapeAll(string: str) -> str:
def replacer_func(match: Match[str]) -> str:
escaped = match.group(1)
if escaped:
return escaped
entity = match.group(2)
return replaceEntityPattern(match.group(), entity)
if "\\" not in string and "&" not in string:
return string
return UNESCAPE_ALL_RE.sub(replacer_func, string)
ESCAPABLE = r"""\\!"#$%&'()*+,./:;<=>?@\[\]^`{}|_~-"""
ESCAPE_CHAR = re.compile(r"\\([" + ESCAPABLE + r"])")
def stripEscape(string: str) -> str:
"""Strip escape \\ characters"""
return ESCAPE_CHAR.sub(r"\1", string)
def escapeHtml(raw: str) -> str:
"""Replace special characters "&", "<", ">" and '"' to HTML-safe sequences."""
# like html.escape, but without escaping single quotes
raw = raw.replace("&", "&amp;") # Must be done first!
raw = raw.replace("<", "&lt;")
raw = raw.replace(">", "&gt;")
raw = raw.replace('"', "&quot;")
return raw
# //////////////////////////////////////////////////////////////////////////////
REGEXP_ESCAPE_RE = re.compile(r"[.?*+^$[\]\\(){}|-]")
def escapeRE(string: str) -> str:
string = REGEXP_ESCAPE_RE.sub("\\$&", string)
return string
# //////////////////////////////////////////////////////////////////////////////
def isSpace(code: int | None) -> bool:
"""Check if character code is a whitespace."""
return code in (0x09, 0x20)
def isStrSpace(ch: str | None) -> bool:
"""Check if character is a whitespace."""
return ch in ("\t", " ")
MD_WHITESPACE = {
0x09, # \t
0x0A, # \n
0x0B, # \v
0x0C, # \f
0x0D, # \r
0x20, # space
0xA0,
0x1680,
0x202F,
0x205F,
0x3000,
}
def isWhiteSpace(code: int) -> bool:
r"""Zs (unicode class) || [\t\f\v\r\n]"""
if code >= 0x2000 and code <= 0x200A:
return True
return code in MD_WHITESPACE
# //////////////////////////////////////////////////////////////////////////////
def isPunctChar(ch: str) -> bool:
"""Check if character is a punctuation character."""
return unicodedata.category(ch).startswith(("P", "S"))
MD_ASCII_PUNCT = {
0x21, # /* ! */
0x22, # /* " */
0x23, # /* # */
0x24, # /* $ */
0x25, # /* % */
0x26, # /* & */
0x27, # /* ' */
0x28, # /* ( */
0x29, # /* ) */
0x2A, # /* * */
0x2B, # /* + */
0x2C, # /* , */
0x2D, # /* - */
0x2E, # /* . */
0x2F, # /* / */
0x3A, # /* : */
0x3B, # /* ; */
0x3C, # /* < */
0x3D, # /* = */
0x3E, # /* > */
0x3F, # /* ? */
0x40, # /* @ */
0x5B, # /* [ */
0x5C, # /* \ */
0x5D, # /* ] */
0x5E, # /* ^ */
0x5F, # /* _ */
0x60, # /* ` */
0x7B, # /* { */
0x7C, # /* | */
0x7D, # /* } */
0x7E, # /* ~ */
}
def isMdAsciiPunct(ch: int) -> bool:
"""Markdown ASCII punctuation characters.
::
!, ", #, $, %, &, ', (, ), *, +, ,, -, ., /, :, ;, <, =, >, ?, @, [, \\, ], ^, _, `, {, |, }, or ~
See http://spec.commonmark.org/0.15/#ascii-punctuation-character
Don't confuse with unicode punctuation !!! It lacks some chars in ascii range.
"""
return ch in MD_ASCII_PUNCT
def normalizeReference(string: str) -> str:
"""Helper to unify [reference labels]."""
# Trim and collapse whitespace
#
string = re.sub(r"\s+", " ", string.strip())
# In node v10 'ẞ'.toLowerCase() === 'Ṿ', which is presumed to be a bug
# fixed in v12 (couldn't find any details).
#
# So treat this one as a special case
# (remove this when node v10 is no longer supported).
#
# if ('ẞ'.toLowerCase() === 'Ṿ') {
# str = str.replace(/ẞ/g, 'ß')
# }
# .toLowerCase().toUpperCase() should get rid of all differences
# between letter variants.
#
# Simple .toLowerCase() doesn't normalize 125 code points correctly,
# and .toUpperCase doesn't normalize 6 of them (list of exceptions:
# İ, ϴ, ẞ, Ω, , Å - those are already uppercased, but have differently
# uppercased versions).
#
# Here's an example showing how it happens. Lets take greek letter omega:
# uppercase U+0398 (Θ), U+03f4 (ϴ) and lowercase U+03b8 (θ), U+03d1 (ϑ)
#
# Unicode entries:
# 0398;GREEK CAPITAL LETTER THETA;Lu;0;L;;;;;N;;;;03B8
# 03B8;GREEK SMALL LETTER THETA;Ll;0;L;;;;;N;;;0398;;0398
# 03D1;GREEK THETA SYMBOL;Ll;0;L;<compat> 03B8;;;;N;GREEK SMALL LETTER SCRIPT THETA;;0398;;0398
# 03F4;GREEK CAPITAL THETA SYMBOL;Lu;0;L;<compat> 0398;;;;N;;;;03B8
#
# Case-insensitive comparison should treat all of them as equivalent.
#
# But .toLowerCase() doesn't change ϑ (it's already lowercase),
# and .toUpperCase() doesn't change ϴ (already uppercase).
#
# Applying first lower then upper case normalizes any character:
# '\u0398\u03f4\u03b8\u03d1'.toLowerCase().toUpperCase() === '\u0398\u0398\u0398\u0398'
#
# Note: this is equivalent to unicode case folding; unicode normalization
# is a different step that is not required here.
#
# Final result should be uppercased, because it's later stored in an object
# (this avoid a conflict with Object.prototype members,
# most notably, `__proto__`)
#
return string.lower().upper()
LINK_OPEN_RE = re.compile(r"^<a[>\s]", flags=re.IGNORECASE)
LINK_CLOSE_RE = re.compile(r"^</a\s*>", flags=re.IGNORECASE)
def isLinkOpen(string: str) -> bool:
return bool(LINK_OPEN_RE.search(string))
def isLinkClose(string: str) -> bool:
return bool(LINK_CLOSE_RE.search(string))

View File

@ -0,0 +1,6 @@
"""Functions for parsing Links"""
__all__ = ("parseLinkDestination", "parseLinkLabel", "parseLinkTitle")
from .parse_link_destination import parseLinkDestination
from .parse_link_label import parseLinkLabel
from .parse_link_title import parseLinkTitle

View File

@ -0,0 +1,83 @@
"""
Parse link destination
"""
from ..common.utils import charCodeAt, unescapeAll
class _Result:
__slots__ = ("ok", "pos", "str")
def __init__(self) -> None:
self.ok = False
self.pos = 0
self.str = ""
def parseLinkDestination(string: str, pos: int, maximum: int) -> _Result:
start = pos
result = _Result()
if charCodeAt(string, pos) == 0x3C: # /* < */
pos += 1
while pos < maximum:
code = charCodeAt(string, pos)
if code == 0x0A: # /* \n */)
return result
if code == 0x3C: # / * < * /
return result
if code == 0x3E: # /* > */) {
result.pos = pos + 1
result.str = unescapeAll(string[start + 1 : pos])
result.ok = True
return result
if code == 0x5C and pos + 1 < maximum: # \
pos += 2
continue
pos += 1
# no closing '>'
return result
# this should be ... } else { ... branch
level = 0
while pos < maximum:
code = charCodeAt(string, pos)
if code is None or code == 0x20:
break
# ascii control characters
if code < 0x20 or code == 0x7F:
break
if code == 0x5C and pos + 1 < maximum:
if charCodeAt(string, pos + 1) == 0x20:
break
pos += 2
continue
if code == 0x28: # /* ( */)
level += 1
if level > 32:
return result
if code == 0x29: # /* ) */)
if level == 0:
break
level -= 1
pos += 1
if start == pos:
return result
if level != 0:
return result
result.str = unescapeAll(string[start:pos])
result.pos = pos
result.ok = True
return result

View File

@ -0,0 +1,44 @@
"""
Parse link label
this function assumes that first character ("[") already matches
returns the end of the label
"""
from markdown_it.rules_inline import StateInline
def parseLinkLabel(state: StateInline, start: int, disableNested: bool = False) -> int:
labelEnd = -1
oldPos = state.pos
found = False
state.pos = start + 1
level = 1
while state.pos < state.posMax:
marker = state.src[state.pos]
if marker == "]":
level -= 1
if level == 0:
found = True
break
prevPos = state.pos
state.md.inline.skipToken(state)
if marker == "[":
if prevPos == state.pos - 1:
# increase level if we find text `[`,
# which is not a part of any token
level += 1
elif disableNested:
state.pos = oldPos
return -1
if found:
labelEnd = state.pos
# restore old state
state.pos = oldPos
return labelEnd

View File

@ -0,0 +1,75 @@
"""Parse link title"""
from ..common.utils import charCodeAt, unescapeAll
class _State:
__slots__ = ("can_continue", "marker", "ok", "pos", "str")
def __init__(self) -> None:
self.ok = False
"""if `true`, this is a valid link title"""
self.can_continue = False
"""if `true`, this link can be continued on the next line"""
self.pos = 0
"""if `ok`, it's the position of the first character after the closing marker"""
self.str = ""
"""if `ok`, it's the unescaped title"""
self.marker = 0
"""expected closing marker character code"""
def __str__(self) -> str:
return self.str
def parseLinkTitle(
string: str, start: int, maximum: int, prev_state: _State | None = None
) -> _State:
"""Parse link title within `str` in [start, max] range,
or continue previous parsing if `prev_state` is defined (equal to result of last execution).
"""
pos = start
state = _State()
if prev_state is not None:
# this is a continuation of a previous parseLinkTitle call on the next line,
# used in reference links only
state.str = prev_state.str
state.marker = prev_state.marker
else:
if pos >= maximum:
return state
marker = charCodeAt(string, pos)
# /* " */ /* ' */ /* ( */
if marker != 0x22 and marker != 0x27 and marker != 0x28:
return state
start += 1
pos += 1
# if opening marker is "(", switch it to closing marker ")"
if marker == 0x28:
marker = 0x29
state.marker = marker
while pos < maximum:
code = charCodeAt(string, pos)
if code == state.marker:
state.pos = pos + 1
state.str += unescapeAll(string[start:pos])
state.ok = True
return state
elif code == 0x28 and state.marker == 0x29: # /* ( */ /* ) */
return state
elif code == 0x5C and pos + 1 < maximum: # /* \ */
pos += 1
pos += 1
# no closing marker found, but this link title may continue on the next line (for references)
state.can_continue = True
state.str += unescapeAll(string[start:pos])
return state

View File

@ -0,0 +1,350 @@
from __future__ import annotations
from collections.abc import Callable, Generator, Iterable, Mapping, MutableMapping
from contextlib import contextmanager
from typing import Any, Literal, overload
from . import helpers, presets
from .common import normalize_url, utils
from .parser_block import ParserBlock
from .parser_core import ParserCore
from .parser_inline import ParserInline
from .renderer import RendererHTML, RendererProtocol
from .rules_core.state_core import StateCore
from .token import Token
from .utils import EnvType, OptionsDict, OptionsType, PresetType
try:
import linkify_it
except ModuleNotFoundError:
linkify_it = None
_PRESETS: dict[str, PresetType] = {
"default": presets.default.make(),
"js-default": presets.js_default.make(),
"zero": presets.zero.make(),
"commonmark": presets.commonmark.make(),
"gfm-like": presets.gfm_like.make(),
}
class MarkdownIt:
def __init__(
self,
config: str | PresetType = "commonmark",
options_update: Mapping[str, Any] | None = None,
*,
renderer_cls: Callable[[MarkdownIt], RendererProtocol] = RendererHTML,
):
"""Main parser class
:param config: name of configuration to load or a pre-defined dictionary
:param options_update: dictionary that will be merged into ``config["options"]``
:param renderer_cls: the class to load as the renderer:
``self.renderer = renderer_cls(self)
"""
# add modules
self.utils = utils
self.helpers = helpers
# initialise classes
self.inline = ParserInline()
self.block = ParserBlock()
self.core = ParserCore()
self.renderer = renderer_cls(self)
self.linkify = linkify_it.LinkifyIt() if linkify_it else None
# set the configuration
if options_update and not isinstance(options_update, Mapping):
# catch signature change where renderer_cls was not used as a key-word
raise TypeError(
f"options_update should be a mapping: {options_update}"
"\n(Perhaps you intended this to be the renderer_cls?)"
)
self.configure(config, options_update=options_update)
def __repr__(self) -> str:
return f"{self.__class__.__module__}.{self.__class__.__name__}()"
@overload
def __getitem__(self, name: Literal["inline"]) -> ParserInline: ...
@overload
def __getitem__(self, name: Literal["block"]) -> ParserBlock: ...
@overload
def __getitem__(self, name: Literal["core"]) -> ParserCore: ...
@overload
def __getitem__(self, name: Literal["renderer"]) -> RendererProtocol: ...
@overload
def __getitem__(self, name: str) -> Any: ...
def __getitem__(self, name: str) -> Any:
return {
"inline": self.inline,
"block": self.block,
"core": self.core,
"renderer": self.renderer,
}[name]
def set(self, options: OptionsType) -> None:
"""Set parser options (in the same format as in constructor).
Probably, you will never need it, but you can change options after constructor call.
__Note:__ To achieve the best possible performance, don't modify a
`markdown-it` instance options on the fly. If you need multiple configurations
it's best to create multiple instances and initialize each with separate config.
"""
self.options = OptionsDict(options)
def configure(
self, presets: str | PresetType, options_update: Mapping[str, Any] | None = None
) -> MarkdownIt:
"""Batch load of all options and component settings.
This is an internal method, and you probably will not need it.
But if you will - see available presets and data structure
[here](https://github.com/markdown-it/markdown-it/tree/master/lib/presets)
We strongly recommend to use presets instead of direct config loads.
That will give better compatibility with next versions.
"""
if isinstance(presets, str):
if presets not in _PRESETS:
raise KeyError(f"Wrong `markdown-it` preset '{presets}', check name")
config = _PRESETS[presets]
else:
config = presets
if not config:
raise ValueError("Wrong `markdown-it` config, can't be empty")
options = config.get("options", {}) or {}
if options_update:
options = {**options, **options_update} # type: ignore
self.set(options) # type: ignore
if "components" in config:
for name, component in config["components"].items():
rules = component.get("rules", None)
if rules:
self[name].ruler.enableOnly(rules)
rules2 = component.get("rules2", None)
if rules2:
self[name].ruler2.enableOnly(rules2)
return self
def get_all_rules(self) -> dict[str, list[str]]:
"""Return the names of all active rules."""
rules = {
chain: self[chain].ruler.get_all_rules()
for chain in ["core", "block", "inline"]
}
rules["inline2"] = self.inline.ruler2.get_all_rules()
return rules
def get_active_rules(self) -> dict[str, list[str]]:
"""Return the names of all active rules."""
rules = {
chain: self[chain].ruler.get_active_rules()
for chain in ["core", "block", "inline"]
}
rules["inline2"] = self.inline.ruler2.get_active_rules()
return rules
def enable(
self, names: str | Iterable[str], ignoreInvalid: bool = False
) -> MarkdownIt:
"""Enable list or rules. (chainable)
:param names: rule name or list of rule names to enable.
:param ignoreInvalid: set `true` to ignore errors when rule not found.
It will automatically find appropriate components,
containing rules with given names. If rule not found, and `ignoreInvalid`
not set - throws exception.
Example::
md = MarkdownIt().enable(['sub', 'sup']).disable('smartquotes')
"""
result = []
if isinstance(names, str):
names = [names]
for chain in ["core", "block", "inline"]:
result.extend(self[chain].ruler.enable(names, True))
result.extend(self.inline.ruler2.enable(names, True))
missed = [name for name in names if name not in result]
if missed and not ignoreInvalid:
raise ValueError(f"MarkdownIt. Failed to enable unknown rule(s): {missed}")
return self
def disable(
self, names: str | Iterable[str], ignoreInvalid: bool = False
) -> MarkdownIt:
"""The same as [[MarkdownIt.enable]], but turn specified rules off. (chainable)
:param names: rule name or list of rule names to disable.
:param ignoreInvalid: set `true` to ignore errors when rule not found.
"""
result = []
if isinstance(names, str):
names = [names]
for chain in ["core", "block", "inline"]:
result.extend(self[chain].ruler.disable(names, True))
result.extend(self.inline.ruler2.disable(names, True))
missed = [name for name in names if name not in result]
if missed and not ignoreInvalid:
raise ValueError(f"MarkdownIt. Failed to disable unknown rule(s): {missed}")
return self
@contextmanager
def reset_rules(self) -> Generator[None, None, None]:
"""A context manager, that will reset the current enabled rules on exit."""
chain_rules = self.get_active_rules()
yield
for chain, rules in chain_rules.items():
if chain != "inline2":
self[chain].ruler.enableOnly(rules)
self.inline.ruler2.enableOnly(chain_rules["inline2"])
def add_render_rule(
self, name: str, function: Callable[..., Any], fmt: str = "html"
) -> None:
"""Add a rule for rendering a particular Token type.
Only applied when ``renderer.__output__ == fmt``
"""
if self.renderer.__output__ == fmt:
self.renderer.rules[name] = function.__get__(self.renderer) # type: ignore
def use(
self, plugin: Callable[..., None], *params: Any, **options: Any
) -> MarkdownIt:
"""Load specified plugin with given params into current parser instance. (chainable)
It's just a sugar to call `plugin(md, params)` with curring.
Example::
def func(tokens, idx):
tokens[idx].content = tokens[idx].content.replace('foo', 'bar')
md = MarkdownIt().use(plugin, 'foo_replace', 'text', func)
"""
plugin(self, *params, **options)
return self
def parse(self, src: str, env: EnvType | None = None) -> list[Token]:
"""Parse the source string to a token stream
:param src: source string
:param env: environment sandbox
Parse input string and return list of block tokens (special token type
"inline" will contain list of inline tokens).
`env` is used to pass data between "distributed" rules and return additional
metadata like reference info, needed for the renderer. It also can be used to
inject data in specific cases. Usually, you will be ok to pass `{}`,
and then pass updated object to renderer.
"""
env = {} if env is None else env
if not isinstance(env, MutableMapping):
raise TypeError(f"Input data should be a MutableMapping, not {type(env)}")
if not isinstance(src, str):
raise TypeError(f"Input data should be a string, not {type(src)}")
state = StateCore(src, self, env)
self.core.process(state)
return state.tokens
def render(self, src: str, env: EnvType | None = None) -> Any:
"""Render markdown string into html. It does all magic for you :).
:param src: source string
:param env: environment sandbox
:returns: The output of the loaded renderer
`env` can be used to inject additional metadata (`{}` by default).
But you will not need it with high probability. See also comment
in [[MarkdownIt.parse]].
"""
env = {} if env is None else env
return self.renderer.render(self.parse(src, env), self.options, env)
def parseInline(self, src: str, env: EnvType | None = None) -> list[Token]:
"""The same as [[MarkdownIt.parse]] but skip all block rules.
:param src: source string
:param env: environment sandbox
It returns the
block tokens list with the single `inline` element, containing parsed inline
tokens in `children` property. Also updates `env` object.
"""
env = {} if env is None else env
if not isinstance(env, MutableMapping):
raise TypeError(f"Input data should be an MutableMapping, not {type(env)}")
if not isinstance(src, str):
raise TypeError(f"Input data should be a string, not {type(src)}")
state = StateCore(src, self, env)
state.inlineMode = True
self.core.process(state)
return state.tokens
def renderInline(self, src: str, env: EnvType | None = None) -> Any:
"""Similar to [[MarkdownIt.render]] but for single paragraph content.
:param src: source string
:param env: environment sandbox
Similar to [[MarkdownIt.render]] but for single paragraph content. Result
will NOT be wrapped into `<p>` tags.
"""
env = {} if env is None else env
return self.renderer.render(self.parseInline(src, env), self.options, env)
# link methods
def validateLink(self, url: str) -> bool:
"""Validate if the URL link is allowed in output.
This validator can prohibit more than really needed to prevent XSS.
It's a tradeoff to keep code simple and to be secure by default.
Note: the url should be normalized at this point, and existing entities decoded.
"""
return normalize_url.validateLink(url)
def normalizeLink(self, url: str) -> str:
"""Normalize destination URLs in links
::
[label]: destination 'title'
^^^^^^^^^^^
"""
return normalize_url.normalizeLink(url)
def normalizeLinkText(self, link: str) -> str:
"""Normalize autolink content
::
<destination>
~~~~~~~~~~~
"""
return normalize_url.normalizeLinkText(link)

View File

@ -0,0 +1,113 @@
"""Block-level tokenizer."""
from __future__ import annotations
from collections.abc import Callable
import logging
from typing import TYPE_CHECKING
from . import rules_block
from .ruler import Ruler
from .rules_block.state_block import StateBlock
from .token import Token
from .utils import EnvType
if TYPE_CHECKING:
from markdown_it import MarkdownIt
LOGGER = logging.getLogger(__name__)
RuleFuncBlockType = Callable[[StateBlock, int, int, bool], bool]
"""(state: StateBlock, startLine: int, endLine: int, silent: bool) -> matched: bool)
`silent` disables token generation, useful for lookahead.
"""
_rules: list[tuple[str, RuleFuncBlockType, list[str]]] = [
# First 2 params - rule name & source. Secondary array - list of rules,
# which can be terminated by this one.
("table", rules_block.table, ["paragraph", "reference"]),
("code", rules_block.code, []),
("fence", rules_block.fence, ["paragraph", "reference", "blockquote", "list"]),
(
"blockquote",
rules_block.blockquote,
["paragraph", "reference", "blockquote", "list"],
),
("hr", rules_block.hr, ["paragraph", "reference", "blockquote", "list"]),
("list", rules_block.list_block, ["paragraph", "reference", "blockquote"]),
("reference", rules_block.reference, []),
("html_block", rules_block.html_block, ["paragraph", "reference", "blockquote"]),
("heading", rules_block.heading, ["paragraph", "reference", "blockquote"]),
("lheading", rules_block.lheading, []),
("paragraph", rules_block.paragraph, []),
]
class ParserBlock:
"""
ParserBlock#ruler -> Ruler
[[Ruler]] instance. Keep configuration of block rules.
"""
def __init__(self) -> None:
self.ruler = Ruler[RuleFuncBlockType]()
for name, rule, alt in _rules:
self.ruler.push(name, rule, {"alt": alt})
def tokenize(self, state: StateBlock, startLine: int, endLine: int) -> None:
"""Generate tokens for input range."""
rules = self.ruler.getRules("")
line = startLine
maxNesting = state.md.options.maxNesting
hasEmptyLines = False
while line < endLine:
state.line = line = state.skipEmptyLines(line)
if line >= endLine:
break
if state.sCount[line] < state.blkIndent:
# Termination condition for nested calls.
# Nested calls currently used for blockquotes & lists
break
if state.level >= maxNesting:
# If nesting level exceeded - skip tail to the end.
# That's not ordinary situation and we should not care about content.
state.line = endLine
break
# Try all possible rules.
# On success, rule should:
# - update `state.line`
# - update `state.tokens`
# - return True
for rule in rules:
if rule(state, line, endLine, False):
break
# set state.tight if we had an empty line before current tag
# i.e. latest empty line should not count
state.tight = not hasEmptyLines
line = state.line
# paragraph might "eat" one newline after it in nested lists
if (line - 1) < endLine and state.isEmpty(line - 1):
hasEmptyLines = True
if line < endLine and state.isEmpty(line):
hasEmptyLines = True
line += 1
state.line = line
def parse(
self, src: str, md: MarkdownIt, env: EnvType, outTokens: list[Token]
) -> list[Token] | None:
"""Process input string and push block tokens into `outTokens`."""
if not src:
return None
state = StateBlock(src, md, env, outTokens)
self.tokenize(state, state.line, state.lineMax)
return state.tokens

View File

@ -0,0 +1,46 @@
"""
* class Core
*
* Top-level rules executor. Glues block/inline parsers and does intermediate
* transformations.
"""
from __future__ import annotations
from collections.abc import Callable
from .ruler import Ruler
from .rules_core import (
block,
inline,
linkify,
normalize,
replace,
smartquotes,
text_join,
)
from .rules_core.state_core import StateCore
RuleFuncCoreType = Callable[[StateCore], None]
_rules: list[tuple[str, RuleFuncCoreType]] = [
("normalize", normalize),
("block", block),
("inline", inline),
("linkify", linkify),
("replacements", replace),
("smartquotes", smartquotes),
("text_join", text_join),
]
class ParserCore:
def __init__(self) -> None:
self.ruler = Ruler[RuleFuncCoreType]()
for name, rule in _rules:
self.ruler.push(name, rule)
def process(self, state: StateCore) -> None:
"""Executes core chain rules."""
for rule in self.ruler.getRules(""):
rule(state)

View File

@ -0,0 +1,148 @@
"""Tokenizes paragraph content."""
from __future__ import annotations
from collections.abc import Callable
from typing import TYPE_CHECKING
from . import rules_inline
from .ruler import Ruler
from .rules_inline.state_inline import StateInline
from .token import Token
from .utils import EnvType
if TYPE_CHECKING:
from markdown_it import MarkdownIt
# Parser rules
RuleFuncInlineType = Callable[[StateInline, bool], bool]
"""(state: StateInline, silent: bool) -> matched: bool)
`silent` disables token generation, useful for lookahead.
"""
_rules: list[tuple[str, RuleFuncInlineType]] = [
("text", rules_inline.text),
("linkify", rules_inline.linkify),
("newline", rules_inline.newline),
("escape", rules_inline.escape),
("backticks", rules_inline.backtick),
("strikethrough", rules_inline.strikethrough.tokenize),
("emphasis", rules_inline.emphasis.tokenize),
("link", rules_inline.link),
("image", rules_inline.image),
("autolink", rules_inline.autolink),
("html_inline", rules_inline.html_inline),
("entity", rules_inline.entity),
]
# Note `rule2` ruleset was created specifically for emphasis/strikethrough
# post-processing and may be changed in the future.
#
# Don't use this for anything except pairs (plugins working with `balance_pairs`).
#
RuleFuncInline2Type = Callable[[StateInline], None]
_rules2: list[tuple[str, RuleFuncInline2Type]] = [
("balance_pairs", rules_inline.link_pairs),
("strikethrough", rules_inline.strikethrough.postProcess),
("emphasis", rules_inline.emphasis.postProcess),
# rules for pairs separate '**' into its own text tokens, which may be left unused,
# rule below merges unused segments back with the rest of the text
("fragments_join", rules_inline.fragments_join),
]
class ParserInline:
def __init__(self) -> None:
self.ruler = Ruler[RuleFuncInlineType]()
for name, rule in _rules:
self.ruler.push(name, rule)
# Second ruler used for post-processing (e.g. in emphasis-like rules)
self.ruler2 = Ruler[RuleFuncInline2Type]()
for name, rule2 in _rules2:
self.ruler2.push(name, rule2)
def skipToken(self, state: StateInline) -> None:
"""Skip single token by running all rules in validation mode;
returns `True` if any rule reported success
"""
ok = False
pos = state.pos
rules = self.ruler.getRules("")
maxNesting = state.md.options["maxNesting"]
cache = state.cache
if pos in cache:
state.pos = cache[pos]
return
if state.level < maxNesting:
for rule in rules:
# Increment state.level and decrement it later to limit recursion.
# It's harmless to do here, because no tokens are created.
# But ideally, we'd need a separate private state variable for this purpose.
state.level += 1
ok = rule(state, True)
state.level -= 1
if ok:
break
else:
# Too much nesting, just skip until the end of the paragraph.
#
# NOTE: this will cause links to behave incorrectly in the following case,
# when an amount of `[` is exactly equal to `maxNesting + 1`:
#
# [[[[[[[[[[[[[[[[[[[[[foo]()
#
# TODO: remove this workaround when CM standard will allow nested links
# (we can replace it by preventing links from being parsed in
# validation mode)
#
state.pos = state.posMax
if not ok:
state.pos += 1
cache[pos] = state.pos
def tokenize(self, state: StateInline) -> None:
"""Generate tokens for input range."""
ok = False
rules = self.ruler.getRules("")
end = state.posMax
maxNesting = state.md.options["maxNesting"]
while state.pos < end:
# Try all possible rules.
# On success, rule should:
#
# - update `state.pos`
# - update `state.tokens`
# - return true
if state.level < maxNesting:
for rule in rules:
ok = rule(state, False)
if ok:
break
if ok:
if state.pos >= end:
break
continue
state.pending += state.src[state.pos]
state.pos += 1
if state.pending:
state.pushPending()
def parse(
self, src: str, md: MarkdownIt, env: EnvType, tokens: list[Token]
) -> list[Token]:
"""Process input string and push inline tokens into `tokens`"""
state = StateInline(src, md, env, tokens)
self.tokenize(state)
rules2 = self.ruler2.getRules("")
for rule in rules2:
rule(state)
return state.tokens

View File

@ -0,0 +1,48 @@
- package: markdown-it/markdown-it
version: 14.1.0
commit: 0fe7ccb4b7f30236fb05f623be6924961d296d3d
date: Mar 19, 2024
notes:
- Rename variables that use python built-in names, e.g.
- `max` -> `maximum`
- `len` -> `length`
- `str` -> `string`
- |
Convert JS `for` loops to `while` loops
this is generally the main difference between the codes,
because in python you can't do e.g. `for {i=1;i<x;i++} {}`
- |
`env` is a common Python dictionary, and so does not have attribute access to keys,
as with JavaScript dictionaries.
`options` have attribute access only to core markdownit configuration options
- |
`Token.attrs` is a dictionary, instead of a list of lists.
Upstream the list format is only used to guarantee order: https://github.com/markdown-it/markdown-it/issues/142,
but in Python 3.7+ order of dictionaries is guaranteed.
One should anyhow use the `attrGet`, `attrSet`, `attrPush` and `attrJoin` methods
to manipulate `Token.attrs`, which have an identical signature to those upstream.
- Use python version of `charCodeAt`
- |
Use `str` units instead of `int`s to represent Unicode codepoints.
This provides a significant performance boost
- |
In markdown_it/rules_block/reference.py,
record line range in state.env["references"] and add state.env["duplicate_refs"]
This is to allow renderers to report on issues regarding references
- |
The `MarkdownIt.__init__` signature is slightly different for updating options,
since you must always specify the config first, e.g.
use `MarkdownIt("commonmark", {"html": False})` instead of `MarkdownIt({"html": False})`
- The default configuration preset for `MarkdownIt` is "commonmark" not "default"
- Allow custom renderer to be passed to `MarkdownIt`
- |
change render method signatures
`func(tokens, idx, options, env, slf)` to
`func(self, tokens, idx, options, env)`
- |
Extensions add render methods by format
`MarkdownIt.add_render_rule(name, function, fmt="html")`,
rather than `MarkdownIt.renderer.rules[name] = function`
and renderers should declare a class property `__output__ = "html"`.
This allows for extensibility to more than just HTML renderers
- inline tokens in tables are assigned a map (this is helpful for propagation to children)

View File

@ -0,0 +1,28 @@
__all__ = ("commonmark", "default", "gfm_like", "js_default", "zero")
from ..utils import PresetType
from . import commonmark, default, zero
js_default = default
class gfm_like: # noqa: N801
"""GitHub Flavoured Markdown (GFM) like.
This adds the linkify, table and strikethrough components to CommmonMark.
Note, it lacks task-list items and raw HTML filtering,
to meet the the full GFM specification
(see https://github.github.com/gfm/#autolinks-extension-).
"""
@staticmethod
def make() -> PresetType:
config = commonmark.make()
config["components"]["core"]["rules"].append("linkify")
config["components"]["block"]["rules"].append("table")
config["components"]["inline"]["rules"].extend(["strikethrough", "linkify"])
config["components"]["inline"]["rules2"].append("strikethrough")
config["options"]["linkify"] = True
config["options"]["html"] = True
return config

View File

@ -0,0 +1,75 @@
"""Commonmark default options.
This differs to presets.default,
primarily in that it allows HTML and does not enable components:
- block: table
- inline: strikethrough
"""
from ..utils import PresetType
def make() -> PresetType:
return {
"options": {
"maxNesting": 20, # Internal protection, recursion limit
"html": True, # Enable HTML tags in source,
# this is just a shorthand for .enable(["html_inline", "html_block"])
# used by the linkify rule:
"linkify": False, # autoconvert URL-like texts to links
# used by the replacements and smartquotes rules
# Enable some language-neutral replacements + quotes beautification
"typographer": False,
# used by the smartquotes rule:
# Double + single quotes replacement pairs, when typographer enabled,
# and smartquotes on. Could be either a String or an Array.
#
# For example, you can use '«»„“' for Russian, '„“‚‘' for German,
# and ['«\xA0', '\xA0»', '\xA0', '\xA0'] for French (including nbsp).
"quotes": "\u201c\u201d\u2018\u2019", # /* “”‘’ */
# Renderer specific; these options are used directly in the HTML renderer
"xhtmlOut": True, # Use '/' to close single tags (<br />)
"breaks": False, # Convert '\n' in paragraphs into <br>
"langPrefix": "language-", # CSS language prefix for fenced blocks
# Highlighter function. Should return escaped HTML,
# or '' if the source string is not changed and should be escaped externally.
# If result starts with <pre... internal wrapper is skipped.
#
# function (/*str, lang, attrs*/) { return ''; }
#
"highlight": None,
},
"components": {
"core": {"rules": ["normalize", "block", "inline", "text_join"]},
"block": {
"rules": [
"blockquote",
"code",
"fence",
"heading",
"hr",
"html_block",
"lheading",
"list",
"reference",
"paragraph",
]
},
"inline": {
"rules": [
"autolink",
"backticks",
"emphasis",
"entity",
"escape",
"html_inline",
"image",
"link",
"newline",
"text",
],
"rules2": ["balance_pairs", "emphasis", "fragments_join"],
},
},
}

View File

@ -0,0 +1,36 @@
"""markdown-it default options."""
from ..utils import PresetType
def make() -> PresetType:
return {
"options": {
"maxNesting": 100, # Internal protection, recursion limit
"html": False, # Enable HTML tags in source
# this is just a shorthand for .disable(["html_inline", "html_block"])
# used by the linkify rule:
"linkify": False, # autoconvert URL-like texts to links
# used by the replacements and smartquotes rules:
# Enable some language-neutral replacements + quotes beautification
"typographer": False,
# used by the smartquotes rule:
# Double + single quotes replacement pairs, when typographer enabled,
# and smartquotes on. Could be either a String or an Array.
# For example, you can use '«»„“' for Russian, '„“‚‘' for German,
# and ['«\xA0', '\xA0»', '\xA0', '\xA0'] for French (including nbsp).
"quotes": "\u201c\u201d\u2018\u2019", # /* “”‘’ */
# Renderer specific; these options are used directly in the HTML renderer
"xhtmlOut": False, # Use '/' to close single tags (<br />)
"breaks": False, # Convert '\n' in paragraphs into <br>
"langPrefix": "language-", # CSS language prefix for fenced blocks
# Highlighter function. Should return escaped HTML,
# or '' if the source string is not changed and should be escaped externally.
# If result starts with <pre... internal wrapper is skipped.
#
# function (/*str, lang, attrs*/) { return ''; }
#
"highlight": None,
},
"components": {"core": {}, "block": {}, "inline": {}},
}

View File

@ -0,0 +1,44 @@
"""
"Zero" preset, with nothing enabled. Useful for manual configuring of simple
modes. For example, to parse bold/italic only.
"""
from ..utils import PresetType
def make() -> PresetType:
return {
"options": {
"maxNesting": 20, # Internal protection, recursion limit
"html": False, # Enable HTML tags in source
# this is just a shorthand for .disable(["html_inline", "html_block"])
# used by the linkify rule:
"linkify": False, # autoconvert URL-like texts to links
# used by the replacements and smartquotes rules:
# Enable some language-neutral replacements + quotes beautification
"typographer": False,
# used by the smartquotes rule:
# Double + single quotes replacement pairs, when typographer enabled,
# and smartquotes on. Could be either a String or an Array.
# For example, you can use '«»„“' for Russian, '„“‚‘' for German,
# and ['«\xA0', '\xA0»', '\xA0', '\xA0'] for French (including nbsp).
"quotes": "\u201c\u201d\u2018\u2019", # /* “”‘’ */
# Renderer specific; these options are used directly in the HTML renderer
"xhtmlOut": False, # Use '/' to close single tags (<br />)
"breaks": False, # Convert '\n' in paragraphs into <br>
"langPrefix": "language-", # CSS language prefix for fenced blocks
# Highlighter function. Should return escaped HTML,
# or '' if the source string is not changed and should be escaped externally.
# If result starts with <pre... internal wrapper is skipped.
# function (/*str, lang, attrs*/) { return ''; }
"highlight": None,
},
"components": {
"core": {"rules": ["normalize", "block", "inline", "text_join"]},
"block": {"rules": ["paragraph"]},
"inline": {
"rules": ["text"],
"rules2": ["balance_pairs", "fragments_join"],
},
},
}

View File

@ -0,0 +1 @@
# Marker file for PEP 561

View File

@ -0,0 +1,336 @@
"""
class Renderer
Generates HTML from parsed token stream. Each instance has independent
copy of rules. Those can be rewritten with ease. Also, you can add new
rules if you create plugin and adds new token types.
"""
from __future__ import annotations
from collections.abc import Sequence
import inspect
from typing import Any, ClassVar, Protocol
from .common.utils import escapeHtml, unescapeAll
from .token import Token
from .utils import EnvType, OptionsDict
class RendererProtocol(Protocol):
__output__: ClassVar[str]
def render(
self, tokens: Sequence[Token], options: OptionsDict, env: EnvType
) -> Any: ...
class RendererHTML(RendererProtocol):
"""Contains render rules for tokens. Can be updated and extended.
Example:
Each rule is called as independent static function with fixed signature:
::
class Renderer:
def token_type_name(self, tokens, idx, options, env) {
# ...
return renderedHTML
::
class CustomRenderer(RendererHTML):
def strong_open(self, tokens, idx, options, env):
return '<b>'
def strong_close(self, tokens, idx, options, env):
return '</b>'
md = MarkdownIt(renderer_cls=CustomRenderer)
result = md.render(...)
See https://github.com/markdown-it/markdown-it/blob/master/lib/renderer.js
for more details and examples.
"""
__output__ = "html"
def __init__(self, parser: Any = None):
self.rules = {
k: v
for k, v in inspect.getmembers(self, predicate=inspect.ismethod)
if not (k.startswith("render") or k.startswith("_"))
}
def render(
self, tokens: Sequence[Token], options: OptionsDict, env: EnvType
) -> str:
"""Takes token stream and generates HTML.
:param tokens: list on block tokens to render
:param options: params of parser instance
:param env: additional data from parsed input
"""
result = ""
for i, token in enumerate(tokens):
if token.type == "inline":
if token.children:
result += self.renderInline(token.children, options, env)
elif token.type in self.rules:
result += self.rules[token.type](tokens, i, options, env)
else:
result += self.renderToken(tokens, i, options, env)
return result
def renderInline(
self, tokens: Sequence[Token], options: OptionsDict, env: EnvType
) -> str:
"""The same as ``render``, but for single token of `inline` type.
:param tokens: list on block tokens to render
:param options: params of parser instance
:param env: additional data from parsed input (references, for example)
"""
result = ""
for i, token in enumerate(tokens):
if token.type in self.rules:
result += self.rules[token.type](tokens, i, options, env)
else:
result += self.renderToken(tokens, i, options, env)
return result
def renderToken(
self,
tokens: Sequence[Token],
idx: int,
options: OptionsDict,
env: EnvType,
) -> str:
"""Default token renderer.
Can be overridden by custom function
:param idx: token index to render
:param options: params of parser instance
"""
result = ""
needLf = False
token = tokens[idx]
# Tight list paragraphs
if token.hidden:
return ""
# Insert a newline between hidden paragraph and subsequent opening
# block-level tag.
#
# For example, here we should insert a newline before blockquote:
# - a
# >
#
if token.block and token.nesting != -1 and idx and tokens[idx - 1].hidden:
result += "\n"
# Add token name, e.g. `<img`
result += ("</" if token.nesting == -1 else "<") + token.tag
# Encode attributes, e.g. `<img src="foo"`
result += self.renderAttrs(token)
# Add a slash for self-closing tags, e.g. `<img src="foo" /`
if token.nesting == 0 and options["xhtmlOut"]:
result += " /"
# Check if we need to add a newline after this tag
if token.block:
needLf = True
if token.nesting == 1 and (idx + 1 < len(tokens)):
nextToken = tokens[idx + 1]
if nextToken.type == "inline" or nextToken.hidden:
# Block-level tag containing an inline tag.
#
needLf = False
elif nextToken.nesting == -1 and nextToken.tag == token.tag:
# Opening tag + closing tag of the same type. E.g. `<li></li>`.
#
needLf = False
result += ">\n" if needLf else ">"
return result
@staticmethod
def renderAttrs(token: Token) -> str:
"""Render token attributes to string."""
result = ""
for key, value in token.attrItems():
result += " " + escapeHtml(key) + '="' + escapeHtml(str(value)) + '"'
return result
def renderInlineAsText(
self,
tokens: Sequence[Token] | None,
options: OptionsDict,
env: EnvType,
) -> str:
"""Special kludge for image `alt` attributes to conform CommonMark spec.
Don't try to use it! Spec requires to show `alt` content with stripped markup,
instead of simple escaping.
:param tokens: list on block tokens to render
:param options: params of parser instance
:param env: additional data from parsed input
"""
result = ""
for token in tokens or []:
if token.type == "text":
result += token.content
elif token.type == "image":
if token.children:
result += self.renderInlineAsText(token.children, options, env)
elif token.type == "softbreak":
result += "\n"
return result
###################################################
def code_inline(
self, tokens: Sequence[Token], idx: int, options: OptionsDict, env: EnvType
) -> str:
token = tokens[idx]
return (
"<code"
+ self.renderAttrs(token)
+ ">"
+ escapeHtml(tokens[idx].content)
+ "</code>"
)
def code_block(
self,
tokens: Sequence[Token],
idx: int,
options: OptionsDict,
env: EnvType,
) -> str:
token = tokens[idx]
return (
"<pre"
+ self.renderAttrs(token)
+ "><code>"
+ escapeHtml(tokens[idx].content)
+ "</code></pre>\n"
)
def fence(
self,
tokens: Sequence[Token],
idx: int,
options: OptionsDict,
env: EnvType,
) -> str:
token = tokens[idx]
info = unescapeAll(token.info).strip() if token.info else ""
langName = ""
langAttrs = ""
if info:
arr = info.split(maxsplit=1)
langName = arr[0]
if len(arr) == 2:
langAttrs = arr[1]
if options.highlight:
highlighted = options.highlight(
token.content, langName, langAttrs
) or escapeHtml(token.content)
else:
highlighted = escapeHtml(token.content)
if highlighted.startswith("<pre"):
return highlighted + "\n"
# If language exists, inject class gently, without modifying original token.
# May be, one day we will add .deepClone() for token and simplify this part, but
# now we prefer to keep things local.
if info:
# Fake token just to render attributes
tmpToken = Token(type="", tag="", nesting=0, attrs=token.attrs.copy())
tmpToken.attrJoin("class", options.langPrefix + langName)
return (
"<pre><code"
+ self.renderAttrs(tmpToken)
+ ">"
+ highlighted
+ "</code></pre>\n"
)
return (
"<pre><code"
+ self.renderAttrs(token)
+ ">"
+ highlighted
+ "</code></pre>\n"
)
def image(
self,
tokens: Sequence[Token],
idx: int,
options: OptionsDict,
env: EnvType,
) -> str:
token = tokens[idx]
# "alt" attr MUST be set, even if empty. Because it's mandatory and
# should be placed on proper position for tests.
if token.children:
token.attrSet("alt", self.renderInlineAsText(token.children, options, env))
else:
token.attrSet("alt", "")
return self.renderToken(tokens, idx, options, env)
def hardbreak(
self, tokens: Sequence[Token], idx: int, options: OptionsDict, env: EnvType
) -> str:
return "<br />\n" if options.xhtmlOut else "<br>\n"
def softbreak(
self, tokens: Sequence[Token], idx: int, options: OptionsDict, env: EnvType
) -> str:
return (
("<br />\n" if options.xhtmlOut else "<br>\n") if options.breaks else "\n"
)
def text(
self, tokens: Sequence[Token], idx: int, options: OptionsDict, env: EnvType
) -> str:
return escapeHtml(tokens[idx].content)
def html_block(
self, tokens: Sequence[Token], idx: int, options: OptionsDict, env: EnvType
) -> str:
return tokens[idx].content
def html_inline(
self, tokens: Sequence[Token], idx: int, options: OptionsDict, env: EnvType
) -> str:
return tokens[idx].content

View File

@ -0,0 +1,275 @@
"""
class Ruler
Helper class, used by [[MarkdownIt#core]], [[MarkdownIt#block]] and
[[MarkdownIt#inline]] to manage sequences of functions (rules):
- keep rules in defined order
- assign the name to each rule
- enable/disable rules
- add/replace rules
- allow assign rules to additional named chains (in the same)
- caching lists of active rules
You will not need use this class directly until write plugins. For simple
rules control use [[MarkdownIt.disable]], [[MarkdownIt.enable]] and
[[MarkdownIt.use]].
"""
from __future__ import annotations
from collections.abc import Iterable
from dataclasses import dataclass, field
from typing import TYPE_CHECKING, Generic, TypedDict, TypeVar
import warnings
from .utils import EnvType
if TYPE_CHECKING:
from markdown_it import MarkdownIt
class StateBase:
def __init__(self, src: str, md: MarkdownIt, env: EnvType):
self.src = src
self.env = env
self.md = md
@property
def src(self) -> str:
return self._src
@src.setter
def src(self, value: str) -> None:
self._src = value
self._srcCharCode: tuple[int, ...] | None = None
@property
def srcCharCode(self) -> tuple[int, ...]:
warnings.warn(
"StateBase.srcCharCode is deprecated. Use StateBase.src instead.",
DeprecationWarning,
stacklevel=2,
)
if self._srcCharCode is None:
self._srcCharCode = tuple(ord(c) for c in self._src)
return self._srcCharCode
class RuleOptionsType(TypedDict, total=False):
alt: list[str]
RuleFuncTv = TypeVar("RuleFuncTv")
"""A rule function, whose signature is dependent on the state type."""
@dataclass(slots=True)
class Rule(Generic[RuleFuncTv]):
name: str
enabled: bool
fn: RuleFuncTv = field(repr=False)
alt: list[str]
class Ruler(Generic[RuleFuncTv]):
def __init__(self) -> None:
# List of added rules.
self.__rules__: list[Rule[RuleFuncTv]] = []
# Cached rule chains.
# First level - chain name, '' for default.
# Second level - diginal anchor for fast filtering by charcodes.
self.__cache__: dict[str, list[RuleFuncTv]] | None = None
def __find__(self, name: str) -> int:
"""Find rule index by name"""
for i, rule in enumerate(self.__rules__):
if rule.name == name:
return i
return -1
def __compile__(self) -> None:
"""Build rules lookup cache"""
chains = {""}
# collect unique names
for rule in self.__rules__:
if not rule.enabled:
continue
for name in rule.alt:
chains.add(name)
self.__cache__ = {}
for chain in chains:
self.__cache__[chain] = []
for rule in self.__rules__:
if not rule.enabled:
continue
if chain and (chain not in rule.alt):
continue
self.__cache__[chain].append(rule.fn)
def at(
self, ruleName: str, fn: RuleFuncTv, options: RuleOptionsType | None = None
) -> None:
"""Replace rule by name with new function & options.
:param ruleName: rule name to replace.
:param fn: new rule function.
:param options: new rule options (not mandatory).
:raises: KeyError if name not found
"""
index = self.__find__(ruleName)
options = options or {}
if index == -1:
raise KeyError(f"Parser rule not found: {ruleName}")
self.__rules__[index].fn = fn
self.__rules__[index].alt = options.get("alt", [])
self.__cache__ = None
def before(
self,
beforeName: str,
ruleName: str,
fn: RuleFuncTv,
options: RuleOptionsType | None = None,
) -> None:
"""Add new rule to chain before one with given name.
:param beforeName: new rule will be added before this one.
:param ruleName: new rule will be added before this one.
:param fn: new rule function.
:param options: new rule options (not mandatory).
:raises: KeyError if name not found
"""
index = self.__find__(beforeName)
options = options or {}
if index == -1:
raise KeyError(f"Parser rule not found: {beforeName}")
self.__rules__.insert(
index, Rule[RuleFuncTv](ruleName, True, fn, options.get("alt", []))
)
self.__cache__ = None
def after(
self,
afterName: str,
ruleName: str,
fn: RuleFuncTv,
options: RuleOptionsType | None = None,
) -> None:
"""Add new rule to chain after one with given name.
:param afterName: new rule will be added after this one.
:param ruleName: new rule will be added after this one.
:param fn: new rule function.
:param options: new rule options (not mandatory).
:raises: KeyError if name not found
"""
index = self.__find__(afterName)
options = options or {}
if index == -1:
raise KeyError(f"Parser rule not found: {afterName}")
self.__rules__.insert(
index + 1, Rule[RuleFuncTv](ruleName, True, fn, options.get("alt", []))
)
self.__cache__ = None
def push(
self, ruleName: str, fn: RuleFuncTv, options: RuleOptionsType | None = None
) -> None:
"""Push new rule to the end of chain.
:param ruleName: new rule will be added to the end of chain.
:param fn: new rule function.
:param options: new rule options (not mandatory).
"""
self.__rules__.append(
Rule[RuleFuncTv](ruleName, True, fn, (options or {}).get("alt", []))
)
self.__cache__ = None
def enable(
self, names: str | Iterable[str], ignoreInvalid: bool = False
) -> list[str]:
"""Enable rules with given names.
:param names: name or list of rule names to enable.
:param ignoreInvalid: ignore errors when rule not found
:raises: KeyError if name not found and not ignoreInvalid
:return: list of found rule names
"""
if isinstance(names, str):
names = [names]
result: list[str] = []
for name in names:
idx = self.__find__(name)
if (idx < 0) and ignoreInvalid:
continue
if (idx < 0) and not ignoreInvalid:
raise KeyError(f"Rules manager: invalid rule name {name}")
self.__rules__[idx].enabled = True
result.append(name)
self.__cache__ = None
return result
def enableOnly(
self, names: str | Iterable[str], ignoreInvalid: bool = False
) -> list[str]:
"""Enable rules with given names, and disable everything else.
:param names: name or list of rule names to enable.
:param ignoreInvalid: ignore errors when rule not found
:raises: KeyError if name not found and not ignoreInvalid
:return: list of found rule names
"""
if isinstance(names, str):
names = [names]
for rule in self.__rules__:
rule.enabled = False
return self.enable(names, ignoreInvalid)
def disable(
self, names: str | Iterable[str], ignoreInvalid: bool = False
) -> list[str]:
"""Disable rules with given names.
:param names: name or list of rule names to enable.
:param ignoreInvalid: ignore errors when rule not found
:raises: KeyError if name not found and not ignoreInvalid
:return: list of found rule names
"""
if isinstance(names, str):
names = [names]
result = []
for name in names:
idx = self.__find__(name)
if (idx < 0) and ignoreInvalid:
continue
if (idx < 0) and not ignoreInvalid:
raise KeyError(f"Rules manager: invalid rule name {name}")
self.__rules__[idx].enabled = False
result.append(name)
self.__cache__ = None
return result
def getRules(self, chainName: str = "") -> list[RuleFuncTv]:
"""Return array of active functions (rules) for given chain name.
It analyzes rules configuration, compiles caches if not exists and returns result.
Default chain name is `''` (empty string). It can't be skipped.
That's done intentionally, to keep signature monomorphic for high speed.
"""
if self.__cache__ is None:
self.__compile__()
assert self.__cache__ is not None
# Chain can be empty, if rules disabled. But we still have to return Array.
return self.__cache__.get(chainName, []) or []
def get_all_rules(self) -> list[str]:
"""Return all available rule names."""
return [r.name for r in self.__rules__]
def get_active_rules(self) -> list[str]:
"""Return the active rule names."""
return [r.name for r in self.__rules__ if r.enabled]

View File

@ -0,0 +1,27 @@
__all__ = (
"StateBlock",
"blockquote",
"code",
"fence",
"heading",
"hr",
"html_block",
"lheading",
"list_block",
"paragraph",
"reference",
"table",
)
from .blockquote import blockquote
from .code import code
from .fence import fence
from .heading import heading
from .hr import hr
from .html_block import html_block
from .lheading import lheading
from .list import list_block
from .paragraph import paragraph
from .reference import reference
from .state_block import StateBlock
from .table import table

View File

@ -0,0 +1,299 @@
# Block quotes
from __future__ import annotations
import logging
from ..common.utils import isStrSpace
from .state_block import StateBlock
LOGGER = logging.getLogger(__name__)
def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool:
LOGGER.debug(
"entering blockquote: %s, %s, %s, %s", state, startLine, endLine, silent
)
oldLineMax = state.lineMax
pos = state.bMarks[startLine] + state.tShift[startLine]
max = state.eMarks[startLine]
if state.is_code_block(startLine):
return False
# check the block quote marker
try:
if state.src[pos] != ">":
return False
except IndexError:
return False
pos += 1
# we know that it's going to be a valid blockquote,
# so no point trying to find the end of it in silent mode
if silent:
return True
# set offset past spaces and ">"
initial = offset = state.sCount[startLine] + 1
try:
second_char: str | None = state.src[pos]
except IndexError:
second_char = None
# skip one optional space after '>'
if second_char == " ":
# ' > test '
# ^ -- position start of line here:
pos += 1
initial += 1
offset += 1
adjustTab = False
spaceAfterMarker = True
elif second_char == "\t":
spaceAfterMarker = True
if (state.bsCount[startLine] + offset) % 4 == 3:
# ' >\t test '
# ^ -- position start of line here (tab has width==1)
pos += 1
initial += 1
offset += 1
adjustTab = False
else:
# ' >\t test '
# ^ -- position start of line here + shift bsCount slightly
# to make extra space appear
adjustTab = True
else:
spaceAfterMarker = False
oldBMarks = [state.bMarks[startLine]]
state.bMarks[startLine] = pos
while pos < max:
ch = state.src[pos]
if isStrSpace(ch):
if ch == "\t":
offset += (
4
- (offset + state.bsCount[startLine] + (1 if adjustTab else 0)) % 4
)
else:
offset += 1
else:
break
pos += 1
oldBSCount = [state.bsCount[startLine]]
state.bsCount[startLine] = (
state.sCount[startLine] + 1 + (1 if spaceAfterMarker else 0)
)
lastLineEmpty = pos >= max
oldSCount = [state.sCount[startLine]]
state.sCount[startLine] = offset - initial
oldTShift = [state.tShift[startLine]]
state.tShift[startLine] = pos - state.bMarks[startLine]
terminatorRules = state.md.block.ruler.getRules("blockquote")
oldParentType = state.parentType
state.parentType = "blockquote"
# Search the end of the block
#
# Block ends with either:
# 1. an empty line outside:
# ```
# > test
#
# ```
# 2. an empty line inside:
# ```
# >
# test
# ```
# 3. another tag:
# ```
# > test
# - - -
# ```
# for (nextLine = startLine + 1; nextLine < endLine; nextLine++) {
nextLine = startLine + 1
while nextLine < endLine:
# check if it's outdented, i.e. it's inside list item and indented
# less than said list item:
#
# ```
# 1. anything
# > current blockquote
# 2. checking this line
# ```
isOutdented = state.sCount[nextLine] < state.blkIndent
pos = state.bMarks[nextLine] + state.tShift[nextLine]
max = state.eMarks[nextLine]
if pos >= max:
# Case 1: line is not inside the blockquote, and this line is empty.
break
evaluatesTrue = state.src[pos] == ">" and not isOutdented
pos += 1
if evaluatesTrue:
# This line is inside the blockquote.
# set offset past spaces and ">"
initial = offset = state.sCount[nextLine] + 1
try:
next_char: str | None = state.src[pos]
except IndexError:
next_char = None
# skip one optional space after '>'
if next_char == " ":
# ' > test '
# ^ -- position start of line here:
pos += 1
initial += 1
offset += 1
adjustTab = False
spaceAfterMarker = True
elif next_char == "\t":
spaceAfterMarker = True
if (state.bsCount[nextLine] + offset) % 4 == 3:
# ' >\t test '
# ^ -- position start of line here (tab has width==1)
pos += 1
initial += 1
offset += 1
adjustTab = False
else:
# ' >\t test '
# ^ -- position start of line here + shift bsCount slightly
# to make extra space appear
adjustTab = True
else:
spaceAfterMarker = False
oldBMarks.append(state.bMarks[nextLine])
state.bMarks[nextLine] = pos
while pos < max:
ch = state.src[pos]
if isStrSpace(ch):
if ch == "\t":
offset += (
4
- (
offset
+ state.bsCount[nextLine]
+ (1 if adjustTab else 0)
)
% 4
)
else:
offset += 1
else:
break
pos += 1
lastLineEmpty = pos >= max
oldBSCount.append(state.bsCount[nextLine])
state.bsCount[nextLine] = (
state.sCount[nextLine] + 1 + (1 if spaceAfterMarker else 0)
)
oldSCount.append(state.sCount[nextLine])
state.sCount[nextLine] = offset - initial
oldTShift.append(state.tShift[nextLine])
state.tShift[nextLine] = pos - state.bMarks[nextLine]
nextLine += 1
continue
# Case 2: line is not inside the blockquote, and the last line was empty.
if lastLineEmpty:
break
# Case 3: another tag found.
terminate = False
for terminatorRule in terminatorRules:
if terminatorRule(state, nextLine, endLine, True):
terminate = True
break
if terminate:
# Quirk to enforce "hard termination mode" for paragraphs;
# normally if you call `tokenize(state, startLine, nextLine)`,
# paragraphs will look below nextLine for paragraph continuation,
# but if blockquote is terminated by another tag, they shouldn't
state.lineMax = nextLine
if state.blkIndent != 0:
# state.blkIndent was non-zero, we now set it to zero,
# so we need to re-calculate all offsets to appear as
# if indent wasn't changed
oldBMarks.append(state.bMarks[nextLine])
oldBSCount.append(state.bsCount[nextLine])
oldTShift.append(state.tShift[nextLine])
oldSCount.append(state.sCount[nextLine])
state.sCount[nextLine] -= state.blkIndent
break
oldBMarks.append(state.bMarks[nextLine])
oldBSCount.append(state.bsCount[nextLine])
oldTShift.append(state.tShift[nextLine])
oldSCount.append(state.sCount[nextLine])
# A negative indentation means that this is a paragraph continuation
#
state.sCount[nextLine] = -1
nextLine += 1
oldIndent = state.blkIndent
state.blkIndent = 0
token = state.push("blockquote_open", "blockquote", 1)
token.markup = ">"
token.map = lines = [startLine, 0]
state.md.block.tokenize(state, startLine, nextLine)
token = state.push("blockquote_close", "blockquote", -1)
token.markup = ">"
state.lineMax = oldLineMax
state.parentType = oldParentType
lines[1] = state.line
# Restore original tShift; this might not be necessary since the parser
# has already been here, but just to make sure we can do that.
for i, item in enumerate(oldTShift):
state.bMarks[i + startLine] = oldBMarks[i]
state.tShift[i + startLine] = item
state.sCount[i + startLine] = oldSCount[i]
state.bsCount[i + startLine] = oldBSCount[i]
state.blkIndent = oldIndent
return True

View File

@ -0,0 +1,36 @@
"""Code block (4 spaces padded)."""
import logging
from .state_block import StateBlock
LOGGER = logging.getLogger(__name__)
def code(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool:
LOGGER.debug("entering code: %s, %s, %s, %s", state, startLine, endLine, silent)
if not state.is_code_block(startLine):
return False
last = nextLine = startLine + 1
while nextLine < endLine:
if state.isEmpty(nextLine):
nextLine += 1
continue
if state.is_code_block(nextLine):
nextLine += 1
last = nextLine
continue
break
state.line = last
token = state.push("code_block", "code", 0)
token.content = state.getLines(startLine, last, 4 + state.blkIndent, False) + "\n"
token.map = [startLine, state.line]
return True

View File

@ -0,0 +1,101 @@
# fences (``` lang, ~~~ lang)
import logging
from .state_block import StateBlock
LOGGER = logging.getLogger(__name__)
def fence(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool:
LOGGER.debug("entering fence: %s, %s, %s, %s", state, startLine, endLine, silent)
haveEndMarker = False
pos = state.bMarks[startLine] + state.tShift[startLine]
maximum = state.eMarks[startLine]
if state.is_code_block(startLine):
return False
if pos + 3 > maximum:
return False
marker = state.src[pos]
if marker not in ("~", "`"):
return False
# scan marker length
mem = pos
pos = state.skipCharsStr(pos, marker)
length = pos - mem
if length < 3:
return False
markup = state.src[mem:pos]
params = state.src[pos:maximum]
if marker == "`" and marker in params:
return False
# Since start is found, we can report success here in validation mode
if silent:
return True
# search end of block
nextLine = startLine
while True:
nextLine += 1
if nextLine >= endLine:
# unclosed block should be autoclosed by end of document.
# also block seems to be autoclosed by end of parent
break
pos = mem = state.bMarks[nextLine] + state.tShift[nextLine]
maximum = state.eMarks[nextLine]
if pos < maximum and state.sCount[nextLine] < state.blkIndent:
# non-empty line with negative indent should stop the list:
# - ```
# test
break
try:
if state.src[pos] != marker:
continue
except IndexError:
break
if state.is_code_block(nextLine):
continue
pos = state.skipCharsStr(pos, marker)
# closing code fence must be at least as long as the opening one
if pos - mem < length:
continue
# make sure tail has spaces only
pos = state.skipSpaces(pos)
if pos < maximum:
continue
haveEndMarker = True
# found!
break
# If a fence has heading spaces, they should be removed from its inner block
length = state.sCount[startLine]
state.line = nextLine + (1 if haveEndMarker else 0)
token = state.push("fence", "code", 0)
token.info = params
token.content = state.getLines(startLine + 1, nextLine, length, True)
token.markup = markup
token.map = [startLine, state.line]
return True

View File

@ -0,0 +1,69 @@
"""Atex heading (#, ##, ...)"""
from __future__ import annotations
import logging
from ..common.utils import isStrSpace
from .state_block import StateBlock
LOGGER = logging.getLogger(__name__)
def heading(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool:
LOGGER.debug("entering heading: %s, %s, %s, %s", state, startLine, endLine, silent)
pos = state.bMarks[startLine] + state.tShift[startLine]
maximum = state.eMarks[startLine]
if state.is_code_block(startLine):
return False
ch: str | None = state.src[pos]
if ch != "#" or pos >= maximum:
return False
# count heading level
level = 1
pos += 1
try:
ch = state.src[pos]
except IndexError:
ch = None
while ch == "#" and pos < maximum and level <= 6:
level += 1
pos += 1
try:
ch = state.src[pos]
except IndexError:
ch = None
if level > 6 or (pos < maximum and not isStrSpace(ch)):
return False
if silent:
return True
# Let's cut tails like ' ### ' from the end of string
maximum = state.skipSpacesBack(maximum, pos)
tmp = state.skipCharsStrBack(maximum, "#", pos)
if tmp > pos and isStrSpace(state.src[tmp - 1]):
maximum = tmp
state.line = startLine + 1
token = state.push("heading_open", "h" + str(level), 1)
token.markup = "########"[:level]
token.map = [startLine, state.line]
token = state.push("inline", "", 0)
token.content = state.src[pos:maximum].strip()
token.map = [startLine, state.line]
token.children = []
token = state.push("heading_close", "h" + str(level), -1)
token.markup = "########"[:level]
return True

View File

@ -0,0 +1,56 @@
"""Horizontal rule
At least 3 of these characters on a line * - _
"""
import logging
from ..common.utils import isStrSpace
from .state_block import StateBlock
LOGGER = logging.getLogger(__name__)
def hr(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool:
LOGGER.debug("entering hr: %s, %s, %s, %s", state, startLine, endLine, silent)
pos = state.bMarks[startLine] + state.tShift[startLine]
maximum = state.eMarks[startLine]
if state.is_code_block(startLine):
return False
try:
marker = state.src[pos]
except IndexError:
return False
pos += 1
# Check hr marker
if marker not in ("*", "-", "_"):
return False
# markers can be mixed with spaces, but there should be at least 3 of them
cnt = 1
while pos < maximum:
ch = state.src[pos]
pos += 1
if ch != marker and not isStrSpace(ch):
return False
if ch == marker:
cnt += 1
if cnt < 3:
return False
if silent:
return True
state.line = startLine + 1
token = state.push("hr", "hr", 0)
token.map = [startLine, state.line]
token.markup = marker * (cnt + 1)
return True

View File

@ -0,0 +1,90 @@
# HTML block
from __future__ import annotations
import logging
import re
from ..common.html_blocks import block_names
from ..common.html_re import HTML_OPEN_CLOSE_TAG_STR
from .state_block import StateBlock
LOGGER = logging.getLogger(__name__)
# An array of opening and corresponding closing sequences for html tags,
# last argument defines whether it can terminate a paragraph or not
HTML_SEQUENCES: list[tuple[re.Pattern[str], re.Pattern[str], bool]] = [
(
re.compile(r"^<(script|pre|style|textarea)(?=(\s|>|$))", re.IGNORECASE),
re.compile(r"<\/(script|pre|style|textarea)>", re.IGNORECASE),
True,
),
(re.compile(r"^<!--"), re.compile(r"-->"), True),
(re.compile(r"^<\?"), re.compile(r"\?>"), True),
(re.compile(r"^<![A-Z]"), re.compile(r">"), True),
(re.compile(r"^<!\[CDATA\["), re.compile(r"\]\]>"), True),
(
re.compile("^</?(" + "|".join(block_names) + ")(?=(\\s|/?>|$))", re.IGNORECASE),
re.compile(r"^$"),
True,
),
(re.compile(HTML_OPEN_CLOSE_TAG_STR + "\\s*$"), re.compile(r"^$"), False),
]
def html_block(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool:
LOGGER.debug(
"entering html_block: %s, %s, %s, %s", state, startLine, endLine, silent
)
pos = state.bMarks[startLine] + state.tShift[startLine]
maximum = state.eMarks[startLine]
if state.is_code_block(startLine):
return False
if not state.md.options.get("html", None):
return False
if state.src[pos] != "<":
return False
lineText = state.src[pos:maximum]
html_seq = None
for HTML_SEQUENCE in HTML_SEQUENCES:
if HTML_SEQUENCE[0].search(lineText):
html_seq = HTML_SEQUENCE
break
if not html_seq:
return False
if silent:
# true if this sequence can be a terminator, false otherwise
return html_seq[2]
nextLine = startLine + 1
# If we are here - we detected HTML block.
# Let's roll down till block end.
if not html_seq[1].search(lineText):
while nextLine < endLine:
if state.sCount[nextLine] < state.blkIndent:
break
pos = state.bMarks[nextLine] + state.tShift[nextLine]
maximum = state.eMarks[nextLine]
lineText = state.src[pos:maximum]
if html_seq[1].search(lineText):
if len(lineText) != 0:
nextLine += 1
break
nextLine += 1
state.line = nextLine
token = state.push("html_block", "", 0)
token.map = [startLine, nextLine]
token.content = state.getLines(startLine, nextLine, state.blkIndent, True)
return True

View File

@ -0,0 +1,86 @@
# lheading (---, ==)
import logging
from .state_block import StateBlock
LOGGER = logging.getLogger(__name__)
def lheading(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool:
LOGGER.debug("entering lheading: %s, %s, %s, %s", state, startLine, endLine, silent)
level = None
nextLine = startLine + 1
ruler = state.md.block.ruler
terminatorRules = ruler.getRules("paragraph")
if state.is_code_block(startLine):
return False
oldParentType = state.parentType
state.parentType = "paragraph" # use paragraph to match terminatorRules
# jump line-by-line until empty one or EOF
while nextLine < endLine and not state.isEmpty(nextLine):
# this would be a code block normally, but after paragraph
# it's considered a lazy continuation regardless of what's there
if state.sCount[nextLine] - state.blkIndent > 3:
nextLine += 1
continue
# Check for underline in setext header
if state.sCount[nextLine] >= state.blkIndent:
pos = state.bMarks[nextLine] + state.tShift[nextLine]
maximum = state.eMarks[nextLine]
if pos < maximum:
marker = state.src[pos]
if marker in ("-", "="):
pos = state.skipCharsStr(pos, marker)
pos = state.skipSpaces(pos)
# /* = */
if pos >= maximum:
level = 1 if marker == "=" else 2
break
# quirk for blockquotes, this line should already be checked by that rule
if state.sCount[nextLine] < 0:
nextLine += 1
continue
# Some tags can terminate paragraph without empty line.
terminate = False
for terminatorRule in terminatorRules:
if terminatorRule(state, nextLine, endLine, True):
terminate = True
break
if terminate:
break
nextLine += 1
if not level:
# Didn't find valid underline
return False
content = state.getLines(startLine, nextLine, state.blkIndent, False).strip()
state.line = nextLine + 1
token = state.push("heading_open", "h" + str(level), 1)
token.markup = marker
token.map = [startLine, state.line]
token = state.push("inline", "", 0)
token.content = content
token.map = [startLine, state.line - 1]
token.children = []
token = state.push("heading_close", "h" + str(level), -1)
token.markup = marker
state.parentType = oldParentType
return True

View File

@ -0,0 +1,345 @@
# Lists
import logging
from ..common.utils import isStrSpace
from .state_block import StateBlock
LOGGER = logging.getLogger(__name__)
# Search `[-+*][\n ]`, returns next pos after marker on success
# or -1 on fail.
def skipBulletListMarker(state: StateBlock, startLine: int) -> int:
pos = state.bMarks[startLine] + state.tShift[startLine]
maximum = state.eMarks[startLine]
try:
marker = state.src[pos]
except IndexError:
return -1
pos += 1
if marker not in ("*", "-", "+"):
return -1
if pos < maximum:
ch = state.src[pos]
if not isStrSpace(ch):
# " -test " - is not a list item
return -1
return pos
# Search `\d+[.)][\n ]`, returns next pos after marker on success
# or -1 on fail.
def skipOrderedListMarker(state: StateBlock, startLine: int) -> int:
start = state.bMarks[startLine] + state.tShift[startLine]
pos = start
maximum = state.eMarks[startLine]
# List marker should have at least 2 chars (digit + dot)
if pos + 1 >= maximum:
return -1
ch = state.src[pos]
pos += 1
ch_ord = ord(ch)
# /* 0 */ /* 9 */
if ch_ord < 0x30 or ch_ord > 0x39:
return -1
while True:
# EOL -> fail
if pos >= maximum:
return -1
ch = state.src[pos]
pos += 1
# /* 0 */ /* 9 */
ch_ord = ord(ch)
if ch_ord >= 0x30 and ch_ord <= 0x39:
# List marker should have no more than 9 digits
# (prevents integer overflow in browsers)
if pos - start >= 10:
return -1
continue
# found valid marker
if ch in (")", "."):
break
return -1
if pos < maximum:
ch = state.src[pos]
if not isStrSpace(ch):
# " 1.test " - is not a list item
return -1
return pos
def markTightParagraphs(state: StateBlock, idx: int) -> None:
level = state.level + 2
i = idx + 2
length = len(state.tokens) - 2
while i < length:
if state.tokens[i].level == level and state.tokens[i].type == "paragraph_open":
state.tokens[i + 2].hidden = True
state.tokens[i].hidden = True
i += 2
i += 1
def list_block(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool:
LOGGER.debug("entering list: %s, %s, %s, %s", state, startLine, endLine, silent)
isTerminatingParagraph = False
tight = True
if state.is_code_block(startLine):
return False
# Special case:
# - item 1
# - item 2
# - item 3
# - item 4
# - this one is a paragraph continuation
if (
state.listIndent >= 0
and state.sCount[startLine] - state.listIndent >= 4
and state.sCount[startLine] < state.blkIndent
):
return False
# limit conditions when list can interrupt
# a paragraph (validation mode only)
# Next list item should still terminate previous list item
#
# This code can fail if plugins use blkIndent as well as lists,
# but I hope the spec gets fixed long before that happens.
#
if (
silent
and state.parentType == "paragraph"
and state.sCount[startLine] >= state.blkIndent
):
isTerminatingParagraph = True
# Detect list type and position after marker
posAfterMarker = skipOrderedListMarker(state, startLine)
if posAfterMarker >= 0:
isOrdered = True
start = state.bMarks[startLine] + state.tShift[startLine]
markerValue = int(state.src[start : posAfterMarker - 1])
# If we're starting a new ordered list right after
# a paragraph, it should start with 1.
if isTerminatingParagraph and markerValue != 1:
return False
else:
posAfterMarker = skipBulletListMarker(state, startLine)
if posAfterMarker >= 0:
isOrdered = False
else:
return False
# If we're starting a new unordered list right after
# a paragraph, first line should not be empty.
if (
isTerminatingParagraph
and state.skipSpaces(posAfterMarker) >= state.eMarks[startLine]
):
return False
# We should terminate list on style change. Remember first one to compare.
markerChar = state.src[posAfterMarker - 1]
# For validation mode we can terminate immediately
if silent:
return True
# Start list
listTokIdx = len(state.tokens)
if isOrdered:
token = state.push("ordered_list_open", "ol", 1)
if markerValue != 1:
token.attrs = {"start": markerValue}
else:
token = state.push("bullet_list_open", "ul", 1)
token.map = listLines = [startLine, 0]
token.markup = markerChar
#
# Iterate list items
#
nextLine = startLine
prevEmptyEnd = False
terminatorRules = state.md.block.ruler.getRules("list")
oldParentType = state.parentType
state.parentType = "list"
while nextLine < endLine:
pos = posAfterMarker
maximum = state.eMarks[nextLine]
initial = offset = (
state.sCount[nextLine]
+ posAfterMarker
- (state.bMarks[startLine] + state.tShift[startLine])
)
while pos < maximum:
ch = state.src[pos]
if ch == "\t":
offset += 4 - (offset + state.bsCount[nextLine]) % 4
elif ch == " ":
offset += 1
else:
break
pos += 1
contentStart = pos
# trimming space in "- \n 3" case, indent is 1 here
indentAfterMarker = 1 if contentStart >= maximum else offset - initial
# If we have more than 4 spaces, the indent is 1
# (the rest is just indented code block)
if indentAfterMarker > 4:
indentAfterMarker = 1
# " - test"
# ^^^^^ - calculating total length of this thing
indent = initial + indentAfterMarker
# Run subparser & write tokens
token = state.push("list_item_open", "li", 1)
token.markup = markerChar
token.map = itemLines = [startLine, 0]
if isOrdered:
token.info = state.src[start : posAfterMarker - 1]
# change current state, then restore it after parser subcall
oldTight = state.tight
oldTShift = state.tShift[startLine]
oldSCount = state.sCount[startLine]
# - example list
# ^ listIndent position will be here
# ^ blkIndent position will be here
#
oldListIndent = state.listIndent
state.listIndent = state.blkIndent
state.blkIndent = indent
state.tight = True
state.tShift[startLine] = contentStart - state.bMarks[startLine]
state.sCount[startLine] = offset
if contentStart >= maximum and state.isEmpty(startLine + 1):
# workaround for this case
# (list item is empty, list terminates before "foo"):
# ~~~~~~~~
# -
#
# foo
# ~~~~~~~~
state.line = min(state.line + 2, endLine)
else:
# NOTE in list.js this was:
# state.md.block.tokenize(state, startLine, endLine, True)
# but tokeniz does not take the final parameter
state.md.block.tokenize(state, startLine, endLine)
# If any of list item is tight, mark list as tight
if (not state.tight) or prevEmptyEnd:
tight = False
# Item become loose if finish with empty line,
# but we should filter last element, because it means list finish
prevEmptyEnd = (state.line - startLine) > 1 and state.isEmpty(state.line - 1)
state.blkIndent = state.listIndent
state.listIndent = oldListIndent
state.tShift[startLine] = oldTShift
state.sCount[startLine] = oldSCount
state.tight = oldTight
token = state.push("list_item_close", "li", -1)
token.markup = markerChar
nextLine = startLine = state.line
itemLines[1] = nextLine
if nextLine >= endLine:
break
contentStart = state.bMarks[startLine]
#
# Try to check if list is terminated or continued.
#
if state.sCount[nextLine] < state.blkIndent:
break
if state.is_code_block(startLine):
break
# fail if terminating block found
terminate = False
for terminatorRule in terminatorRules:
if terminatorRule(state, nextLine, endLine, True):
terminate = True
break
if terminate:
break
# fail if list has another type
if isOrdered:
posAfterMarker = skipOrderedListMarker(state, nextLine)
if posAfterMarker < 0:
break
start = state.bMarks[nextLine] + state.tShift[nextLine]
else:
posAfterMarker = skipBulletListMarker(state, nextLine)
if posAfterMarker < 0:
break
if markerChar != state.src[posAfterMarker - 1]:
break
# Finalize list
if isOrdered:
token = state.push("ordered_list_close", "ol", -1)
else:
token = state.push("bullet_list_close", "ul", -1)
token.markup = markerChar
listLines[1] = nextLine
state.line = nextLine
state.parentType = oldParentType
# mark paragraphs tight if needed
if tight:
markTightParagraphs(state, listTokIdx)
return True

View File

@ -0,0 +1,66 @@
"""Paragraph."""
import logging
from .state_block import StateBlock
LOGGER = logging.getLogger(__name__)
def paragraph(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool:
LOGGER.debug(
"entering paragraph: %s, %s, %s, %s", state, startLine, endLine, silent
)
nextLine = startLine + 1
ruler = state.md.block.ruler
terminatorRules = ruler.getRules("paragraph")
endLine = state.lineMax
oldParentType = state.parentType
state.parentType = "paragraph"
# jump line-by-line until empty one or EOF
while nextLine < endLine:
if state.isEmpty(nextLine):
break
# this would be a code block normally, but after paragraph
# it's considered a lazy continuation regardless of what's there
if state.sCount[nextLine] - state.blkIndent > 3:
nextLine += 1
continue
# quirk for blockquotes, this line should already be checked by that rule
if state.sCount[nextLine] < 0:
nextLine += 1
continue
# Some tags can terminate paragraph without empty line.
terminate = False
for terminatorRule in terminatorRules:
if terminatorRule(state, nextLine, endLine, True):
terminate = True
break
if terminate:
break
nextLine += 1
content = state.getLines(startLine, nextLine, state.blkIndent, False).strip()
state.line = nextLine
token = state.push("paragraph_open", "p", 1)
token.map = [startLine, state.line]
token = state.push("inline", "", 0)
token.content = content
token.map = [startLine, state.line]
token.children = []
token = state.push("paragraph_close", "p", -1)
state.parentType = oldParentType
return True

View File

@ -0,0 +1,235 @@
import logging
from ..common.utils import charCodeAt, isSpace, normalizeReference
from .state_block import StateBlock
LOGGER = logging.getLogger(__name__)
def reference(state: StateBlock, startLine: int, _endLine: int, silent: bool) -> bool:
LOGGER.debug(
"entering reference: %s, %s, %s, %s", state, startLine, _endLine, silent
)
pos = state.bMarks[startLine] + state.tShift[startLine]
maximum = state.eMarks[startLine]
nextLine = startLine + 1
if state.is_code_block(startLine):
return False
if state.src[pos] != "[":
return False
string = state.src[pos : maximum + 1]
# string = state.getLines(startLine, nextLine, state.blkIndent, False).strip()
maximum = len(string)
labelEnd = None
pos = 1
while pos < maximum:
ch = charCodeAt(string, pos)
if ch == 0x5B: # /* [ */
return False
elif ch == 0x5D: # /* ] */
labelEnd = pos
break
elif ch == 0x0A: # /* \n */
if (lineContent := getNextLine(state, nextLine)) is not None:
string += lineContent
maximum = len(string)
nextLine += 1
elif ch == 0x5C: # /* \ */
pos += 1
if (
pos < maximum
and charCodeAt(string, pos) == 0x0A
and (lineContent := getNextLine(state, nextLine)) is not None
):
string += lineContent
maximum = len(string)
nextLine += 1
pos += 1
if (
labelEnd is None or labelEnd < 0 or charCodeAt(string, labelEnd + 1) != 0x3A
): # /* : */
return False
# [label]: destination 'title'
# ^^^ skip optional whitespace here
pos = labelEnd + 2
while pos < maximum:
ch = charCodeAt(string, pos)
if ch == 0x0A:
if (lineContent := getNextLine(state, nextLine)) is not None:
string += lineContent
maximum = len(string)
nextLine += 1
elif isSpace(ch):
pass
else:
break
pos += 1
# [label]: destination 'title'
# ^^^^^^^^^^^ parse this
destRes = state.md.helpers.parseLinkDestination(string, pos, maximum)
if not destRes.ok:
return False
href = state.md.normalizeLink(destRes.str)
if not state.md.validateLink(href):
return False
pos = destRes.pos
# save cursor state, we could require to rollback later
destEndPos = pos
destEndLineNo = nextLine
# [label]: destination 'title'
# ^^^ skipping those spaces
start = pos
while pos < maximum:
ch = charCodeAt(string, pos)
if ch == 0x0A:
if (lineContent := getNextLine(state, nextLine)) is not None:
string += lineContent
maximum = len(string)
nextLine += 1
elif isSpace(ch):
pass
else:
break
pos += 1
# [label]: destination 'title'
# ^^^^^^^ parse this
titleRes = state.md.helpers.parseLinkTitle(string, pos, maximum, None)
while titleRes.can_continue:
if (lineContent := getNextLine(state, nextLine)) is None:
break
string += lineContent
pos = maximum
maximum = len(string)
nextLine += 1
titleRes = state.md.helpers.parseLinkTitle(string, pos, maximum, titleRes)
if pos < maximum and start != pos and titleRes.ok:
title = titleRes.str
pos = titleRes.pos
else:
title = ""
pos = destEndPos
nextLine = destEndLineNo
# skip trailing spaces until the rest of the line
while pos < maximum:
ch = charCodeAt(string, pos)
if not isSpace(ch):
break
pos += 1
if pos < maximum and charCodeAt(string, pos) != 0x0A and title:
# garbage at the end of the line after title,
# but it could still be a valid reference if we roll back
title = ""
pos = destEndPos
nextLine = destEndLineNo
while pos < maximum:
ch = charCodeAt(string, pos)
if not isSpace(ch):
break
pos += 1
if pos < maximum and charCodeAt(string, pos) != 0x0A:
# garbage at the end of the line
return False
label = normalizeReference(string[1:labelEnd])
if not label:
# CommonMark 0.20 disallows empty labels
return False
# Reference can not terminate anything. This check is for safety only.
if silent:
return True
if "references" not in state.env:
state.env["references"] = {}
state.line = nextLine
# note, this is not part of markdown-it JS, but is useful for renderers
if state.md.options.get("inline_definitions", False):
token = state.push("definition", "", 0)
token.meta = {
"id": label,
"title": title,
"url": href,
"label": string[1:labelEnd],
}
token.map = [startLine, state.line]
if label not in state.env["references"]:
state.env["references"][label] = {
"title": title,
"href": href,
"map": [startLine, state.line],
}
else:
state.env.setdefault("duplicate_refs", []).append(
{
"title": title,
"href": href,
"label": label,
"map": [startLine, state.line],
}
)
return True
def getNextLine(state: StateBlock, nextLine: int) -> None | str:
endLine = state.lineMax
if nextLine >= endLine or state.isEmpty(nextLine):
# empty line or end of input
return None
isContinuation = False
# this would be a code block normally, but after paragraph
# it's considered a lazy continuation regardless of what's there
if state.is_code_block(nextLine):
isContinuation = True
# quirk for blockquotes, this line should already be checked by that rule
if state.sCount[nextLine] < 0:
isContinuation = True
if not isContinuation:
terminatorRules = state.md.block.ruler.getRules("reference")
oldParentType = state.parentType
state.parentType = "reference"
# Some tags can terminate paragraph without empty line.
terminate = False
for terminatorRule in terminatorRules:
if terminatorRule(state, nextLine, endLine, True):
terminate = True
break
state.parentType = oldParentType
if terminate:
# terminated by another block
return None
pos = state.bMarks[nextLine] + state.tShift[nextLine]
maximum = state.eMarks[nextLine]
# max + 1 explicitly includes the newline
return state.src[pos : maximum + 1]

View File

@ -0,0 +1,261 @@
from __future__ import annotations
from typing import TYPE_CHECKING, Literal
from ..common.utils import isStrSpace
from ..ruler import StateBase
from ..token import Token
from ..utils import EnvType
if TYPE_CHECKING:
from markdown_it.main import MarkdownIt
class StateBlock(StateBase):
def __init__(
self, src: str, md: MarkdownIt, env: EnvType, tokens: list[Token]
) -> None:
self.src = src
# link to parser instance
self.md = md
self.env = env
#
# Internal state variables
#
self.tokens = tokens
self.bMarks: list[int] = [] # line begin offsets for fast jumps
self.eMarks: list[int] = [] # line end offsets for fast jumps
# offsets of the first non-space characters (tabs not expanded)
self.tShift: list[int] = []
self.sCount: list[int] = [] # indents for each line (tabs expanded)
# An amount of virtual spaces (tabs expanded) between beginning
# of each line (bMarks) and real beginning of that line.
#
# It exists only as a hack because blockquotes override bMarks
# losing information in the process.
#
# It's used only when expanding tabs, you can think about it as
# an initial tab length, e.g. bsCount=21 applied to string `\t123`
# means first tab should be expanded to 4-21%4 === 3 spaces.
#
self.bsCount: list[int] = []
# block parser variables
self.blkIndent = 0 # required block content indent (for example, if we are
# inside a list, it would be positioned after list marker)
self.line = 0 # line index in src
self.lineMax = 0 # lines count
self.tight = False # loose/tight mode for lists
self.ddIndent = -1 # indent of the current dd block (-1 if there isn't any)
self.listIndent = -1 # indent of the current list block (-1 if there isn't any)
# can be 'blockquote', 'list', 'root', 'paragraph' or 'reference'
# used in lists to determine if they interrupt a paragraph
self.parentType = "root"
self.level = 0
# renderer
self.result = ""
# Create caches
# Generate markers.
indent_found = False
start = pos = indent = offset = 0
length = len(self.src)
for pos, character in enumerate(self.src):
if not indent_found:
if isStrSpace(character):
indent += 1
if character == "\t":
offset += 4 - offset % 4
else:
offset += 1
continue
else:
indent_found = True
if character == "\n" or pos == length - 1:
if character != "\n":
pos += 1
self.bMarks.append(start)
self.eMarks.append(pos)
self.tShift.append(indent)
self.sCount.append(offset)
self.bsCount.append(0)
indent_found = False
indent = 0
offset = 0
start = pos + 1
# Push fake entry to simplify cache bounds checks
self.bMarks.append(length)
self.eMarks.append(length)
self.tShift.append(0)
self.sCount.append(0)
self.bsCount.append(0)
self.lineMax = len(self.bMarks) - 1 # don't count last fake line
# pre-check if code blocks are enabled, to speed up is_code_block method
self._code_enabled = "code" in self.md["block"].ruler.get_active_rules()
def __repr__(self) -> str:
return (
f"{self.__class__.__name__}"
f"(line={self.line},level={self.level},tokens={len(self.tokens)})"
)
def push(self, ttype: str, tag: str, nesting: Literal[-1, 0, 1]) -> Token:
"""Push new token to "stream"."""
token = Token(ttype, tag, nesting)
token.block = True
if nesting < 0:
self.level -= 1 # closing tag
token.level = self.level
if nesting > 0:
self.level += 1 # opening tag
self.tokens.append(token)
return token
def isEmpty(self, line: int) -> bool:
"""."""
return (self.bMarks[line] + self.tShift[line]) >= self.eMarks[line]
def skipEmptyLines(self, from_pos: int) -> int:
"""."""
while from_pos < self.lineMax:
try:
if (self.bMarks[from_pos] + self.tShift[from_pos]) < self.eMarks[
from_pos
]:
break
except IndexError:
pass
from_pos += 1
return from_pos
def skipSpaces(self, pos: int) -> int:
"""Skip spaces from given position."""
while True:
try:
current = self.src[pos]
except IndexError:
break
if not isStrSpace(current):
break
pos += 1
return pos
def skipSpacesBack(self, pos: int, minimum: int) -> int:
"""Skip spaces from given position in reverse."""
if pos <= minimum:
return pos
while pos > minimum:
pos -= 1
if not isStrSpace(self.src[pos]):
return pos + 1
return pos
def skipChars(self, pos: int, code: int) -> int:
"""Skip character code from given position."""
while True:
try:
current = self.srcCharCode[pos]
except IndexError:
break
if current != code:
break
pos += 1
return pos
def skipCharsStr(self, pos: int, ch: str) -> int:
"""Skip character string from given position."""
while True:
try:
current = self.src[pos]
except IndexError:
break
if current != ch:
break
pos += 1
return pos
def skipCharsBack(self, pos: int, code: int, minimum: int) -> int:
"""Skip character code reverse from given position - 1."""
if pos <= minimum:
return pos
while pos > minimum:
pos -= 1
if code != self.srcCharCode[pos]:
return pos + 1
return pos
def skipCharsStrBack(self, pos: int, ch: str, minimum: int) -> int:
"""Skip character string reverse from given position - 1."""
if pos <= minimum:
return pos
while pos > minimum:
pos -= 1
if ch != self.src[pos]:
return pos + 1
return pos
def getLines(self, begin: int, end: int, indent: int, keepLastLF: bool) -> str:
"""Cut lines range from source."""
line = begin
if begin >= end:
return ""
queue = [""] * (end - begin)
i = 1
while line < end:
lineIndent = 0
lineStart = first = self.bMarks[line]
last = (
self.eMarks[line] + 1
if line + 1 < end or keepLastLF
else self.eMarks[line]
)
while (first < last) and (lineIndent < indent):
ch = self.src[first]
if isStrSpace(ch):
if ch == "\t":
lineIndent += 4 - (lineIndent + self.bsCount[line]) % 4
else:
lineIndent += 1
elif first - lineStart < self.tShift[line]:
lineIndent += 1
else:
break
first += 1
if lineIndent > indent:
# partially expanding tabs in code blocks, e.g '\t\tfoobar'
# with indent=2 becomes ' \tfoobar'
queue[i - 1] = (" " * (lineIndent - indent)) + self.src[first:last]
else:
queue[i - 1] = self.src[first:last]
line += 1
i += 1
return "".join(queue)
def is_code_block(self, line: int) -> bool:
"""Check if line is a code block,
i.e. the code block rule is enabled and text is indented by more than 3 spaces.
"""
return self._code_enabled and (self.sCount[line] - self.blkIndent) >= 4

View File

@ -0,0 +1,250 @@
# GFM table, https://github.github.com/gfm/#tables-extension-
from __future__ import annotations
import re
from ..common.utils import charStrAt, isStrSpace
from .state_block import StateBlock
headerLineRe = re.compile(r"^:?-+:?$")
enclosingPipesRe = re.compile(r"^\||\|$")
# Limit the amount of empty autocompleted cells in a table,
# see https://github.com/markdown-it/markdown-it/issues/1000,
# Both pulldown-cmark and commonmark-hs limit the number of cells this way to ~200k.
# We set it to 65k, which can expand user input by a factor of x370
# (256x256 square is 1.8kB expanded into 650kB).
MAX_AUTOCOMPLETED_CELLS = 0x10000
def getLine(state: StateBlock, line: int) -> str:
pos = state.bMarks[line] + state.tShift[line]
maximum = state.eMarks[line]
# return state.src.substr(pos, max - pos)
return state.src[pos:maximum]
def escapedSplit(string: str) -> list[str]:
result: list[str] = []
pos = 0
max = len(string)
isEscaped = False
lastPos = 0
current = ""
ch = charStrAt(string, pos)
while pos < max:
if ch == "|":
if not isEscaped:
# pipe separating cells, '|'
result.append(current + string[lastPos:pos])
current = ""
lastPos = pos + 1
else:
# escaped pipe, '\|'
current += string[lastPos : pos - 1]
lastPos = pos
isEscaped = ch == "\\"
pos += 1
ch = charStrAt(string, pos)
result.append(current + string[lastPos:])
return result
def table(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool:
tbodyLines = None
# should have at least two lines
if startLine + 2 > endLine:
return False
nextLine = startLine + 1
if state.sCount[nextLine] < state.blkIndent:
return False
if state.is_code_block(nextLine):
return False
# first character of the second line should be '|', '-', ':',
# and no other characters are allowed but spaces;
# basically, this is the equivalent of /^[-:|][-:|\s]*$/ regexp
pos = state.bMarks[nextLine] + state.tShift[nextLine]
if pos >= state.eMarks[nextLine]:
return False
first_ch = state.src[pos]
pos += 1
if first_ch not in ("|", "-", ":"):
return False
if pos >= state.eMarks[nextLine]:
return False
second_ch = state.src[pos]
pos += 1
if second_ch not in ("|", "-", ":") and not isStrSpace(second_ch):
return False
# if first character is '-', then second character must not be a space
# (due to parsing ambiguity with list)
if first_ch == "-" and isStrSpace(second_ch):
return False
while pos < state.eMarks[nextLine]:
ch = state.src[pos]
if ch not in ("|", "-", ":") and not isStrSpace(ch):
return False
pos += 1
lineText = getLine(state, startLine + 1)
columns = lineText.split("|")
aligns = []
for i in range(len(columns)):
t = columns[i].strip()
if not t:
# allow empty columns before and after table, but not in between columns;
# e.g. allow ` |---| `, disallow ` ---||--- `
if i == 0 or i == len(columns) - 1:
continue
else:
return False
if not headerLineRe.search(t):
return False
if charStrAt(t, len(t) - 1) == ":":
aligns.append("center" if charStrAt(t, 0) == ":" else "right")
elif charStrAt(t, 0) == ":":
aligns.append("left")
else:
aligns.append("")
lineText = getLine(state, startLine).strip()
if "|" not in lineText:
return False
if state.is_code_block(startLine):
return False
columns = escapedSplit(lineText)
if columns and columns[0] == "":
columns.pop(0)
if columns and columns[-1] == "":
columns.pop()
# header row will define an amount of columns in the entire table,
# and align row should be exactly the same (the rest of the rows can differ)
columnCount = len(columns)
if columnCount == 0 or columnCount != len(aligns):
return False
if silent:
return True
oldParentType = state.parentType
state.parentType = "table"
# use 'blockquote' lists for termination because it's
# the most similar to tables
terminatorRules = state.md.block.ruler.getRules("blockquote")
token = state.push("table_open", "table", 1)
token.map = tableLines = [startLine, 0]
token = state.push("thead_open", "thead", 1)
token.map = [startLine, startLine + 1]
token = state.push("tr_open", "tr", 1)
token.map = [startLine, startLine + 1]
for i in range(len(columns)):
token = state.push("th_open", "th", 1)
if aligns[i]:
token.attrs = {"style": "text-align:" + aligns[i]}
token = state.push("inline", "", 0)
# note in markdown-it this map was removed in v12.0.0 however, we keep it,
# since it is helpful to propagate to children tokens
token.map = [startLine, startLine + 1]
token.content = columns[i].strip()
token.children = []
token = state.push("th_close", "th", -1)
token = state.push("tr_close", "tr", -1)
token = state.push("thead_close", "thead", -1)
autocompleted_cells = 0
nextLine = startLine + 2
while nextLine < endLine:
if state.sCount[nextLine] < state.blkIndent:
break
terminate = False
for i in range(len(terminatorRules)):
if terminatorRules[i](state, nextLine, endLine, True):
terminate = True
break
if terminate:
break
lineText = getLine(state, nextLine).strip()
if not lineText:
break
if state.is_code_block(nextLine):
break
columns = escapedSplit(lineText)
if columns and columns[0] == "":
columns.pop(0)
if columns and columns[-1] == "":
columns.pop()
# note: autocomplete count can be negative if user specifies more columns than header,
# but that does not affect intended use (which is limiting expansion)
autocompleted_cells += columnCount - len(columns)
if autocompleted_cells > MAX_AUTOCOMPLETED_CELLS:
break
if nextLine == startLine + 2:
token = state.push("tbody_open", "tbody", 1)
token.map = tbodyLines = [startLine + 2, 0]
token = state.push("tr_open", "tr", 1)
token.map = [nextLine, nextLine + 1]
for i in range(columnCount):
token = state.push("td_open", "td", 1)
if aligns[i]:
token.attrs = {"style": "text-align:" + aligns[i]}
token = state.push("inline", "", 0)
# note in markdown-it this map was removed in v12.0.0 however, we keep it,
# since it is helpful to propagate to children tokens
token.map = [nextLine, nextLine + 1]
try:
token.content = columns[i].strip() if columns[i] else ""
except IndexError:
token.content = ""
token.children = []
token = state.push("td_close", "td", -1)
token = state.push("tr_close", "tr", -1)
nextLine += 1
if tbodyLines:
token = state.push("tbody_close", "tbody", -1)
tbodyLines[1] = nextLine
token = state.push("table_close", "table", -1)
tableLines[1] = nextLine
state.parentType = oldParentType
state.line = nextLine
return True

View File

@ -0,0 +1,19 @@
__all__ = (
"StateCore",
"block",
"inline",
"linkify",
"normalize",
"replace",
"smartquotes",
"text_join",
)
from .block import block
from .inline import inline
from .linkify import linkify
from .normalize import normalize
from .replacements import replace
from .smartquotes import smartquotes
from .state_core import StateCore
from .text_join import text_join

View File

@ -0,0 +1,13 @@
from ..token import Token
from .state_core import StateCore
def block(state: StateCore) -> None:
if state.inlineMode:
token = Token("inline", "", 0)
token.content = state.src
token.map = [0, 1]
token.children = []
state.tokens.append(token)
else:
state.md.block.parse(state.src, state.md, state.env, state.tokens)

Some files were not shown because too many files have changed in this diff Show More