NicholaiVogel c5ccd14048 feat: Initial Whisper Transcription TUI implementation
- Added Textual-based TUI with file selection and progress monitoring

- Implemented transcription service with OpenAI API and local Whisper backends

- Added markdown formatter for transcription output

- Configuration management for persistent API keys and output directory

- Comprehensive README with installation and usage instructions

- Support for multi-file batch processing

- Beautiful terminal UI with modal dialogs for user input
2025-10-20 16:43:35 -06:00

445 lines
15 KiB
Python

"""Main Textual TUI application for transcription."""
from pathlib import Path
from typing import Optional
from textual.app import ComposeResult, App
from textual.containers import Container, Vertical
from textual.screen import Screen, ModalScreen
from textual.widgets import (
Header,
Footer,
Static,
Input,
Button,
Label,
DirectoryTree,
SelectionList,
Select,
RichLog,
)
from textual.widgets.selection_list import Selection
from .config import ConfigManager
from .file_handler import FileHandler
from .transcriber import TranscriptionService
from .formatter import MarkdownFormatter
class ApiKeyModal(ModalScreen):
"""Modal screen for entering OpenAI API key."""
def __init__(self, config: ConfigManager):
"""Initialize API key modal."""
super().__init__()
self.config = config
self.api_key: Optional[str] = None
def compose(self) -> ComposeResult:
"""Compose modal widgets."""
yield Vertical(
Label("OpenAI API Key Required"),
Label("Enter your OpenAI API key (get it from https://platform.openai.com/api-keys):"),
Input(id="api_key_input", password=True),
Container(
Button("Save", id="save_api", variant="primary"),
Button("Cancel", id="cancel_api"),
),
id="api_key_modal",
)
def on_button_pressed(self, event: Button.Pressed) -> None:
"""Handle button press."""
if event.button.id == "save_api":
api_key_input = self.query_one("#api_key_input", Input)
if api_key_input.value.strip():
self.api_key = api_key_input.value.strip()
self.config.set_api_key(self.api_key)
self.app.pop_screen()
else:
self.app.notify("API key cannot be empty", timeout=2)
elif event.button.id == "cancel_api":
self.app.pop_screen()
class MethodSelectModal(ModalScreen):
"""Modal screen for selecting transcription method."""
def __init__(self):
"""Initialize method selection modal."""
super().__init__()
self.selected_method: Optional[str] = None
def compose(self) -> ComposeResult:
"""Compose modal widgets."""
yield Vertical(
Label("Select Transcription Method"),
Select(
options=[
("OpenAI Whisper API (fast, costs money)", "openai"),
("Local Whisper (free, slower)", "local"),
],
id="method_select",
),
Container(
Button("Select", id="select_method", variant="primary"),
Button("Cancel", id="cancel_method"),
),
id="method_modal",
)
def on_button_pressed(self, event: Button.Pressed) -> None:
"""Handle button press."""
if event.button.id == "select_method":
select = self.query_one("#method_select", Select)
if select.value != Select.BLANK:
self.selected_method = select.value
self.app.pop_screen()
else:
self.app.notify("Please select a method", timeout=2)
elif event.button.id == "cancel_method":
self.app.pop_screen()
class OutputDirModal(ModalScreen):
"""Modal screen for selecting output directory."""
def __init__(self, config: ConfigManager):
"""Initialize output directory modal."""
super().__init__()
self.config = config
self.selected_dir: Optional[Path] = None
def compose(self) -> ComposeResult:
"""Compose modal widgets."""
yield Vertical(
Label("Select Output Directory"),
DirectoryTree("/", id="dir_tree"),
Container(
Button("Select", id="select_dir", variant="primary"),
Button("Cancel", id="cancel_dir"),
),
id="output_dir_modal",
)
def on_button_pressed(self, event: Button.Pressed) -> None:
"""Handle button press."""
if event.button.id == "select_dir":
tree = self.query_one("#dir_tree", DirectoryTree)
if tree.cursor_node:
self.selected_dir = Path(tree.cursor_node.data)
is_valid, error = FileHandler.validate_directory(self.selected_dir)
if is_valid:
self.config.set_output_directory(self.selected_dir)
self.app.pop_screen()
else:
self.app.notify(f"Error: {error}", timeout=3)
else:
self.app.notify("Please select a directory", timeout=2)
elif event.button.id == "cancel_dir":
self.app.pop_screen()
class FileSelectScreen(Screen):
"""Screen for selecting files to transcribe."""
def __init__(self, config: ConfigManager):
"""Initialize file selection screen."""
super().__init__()
self.config = config
self.selected_files: list[Path] = []
def compose(self) -> ComposeResult:
"""Compose screen widgets."""
yield Header()
yield Vertical(
Label("Select files to transcribe (Supported: MP3, WAV, M4A, FLAC, MP4, AVI, MKV, MOV)"),
DirectoryTree("/", id="file_tree"),
Static(id="file_info"),
Container(
Button("Add File", id="add_file", variant="primary"),
Button("Continue", id="continue_btn", variant="success"),
Button("Cancel", id="cancel_btn", variant="error"),
),
id="file_select_container",
)
yield Footer()
def on_mount(self) -> None:
"""Called when screen is mounted."""
self.query_one("#file_tree", DirectoryTree).focus()
def on_button_pressed(self, event: Button.Pressed) -> None:
"""Handle button press."""
if event.button.id == "add_file":
tree = self.query_one("#file_tree", DirectoryTree)
if tree.cursor_node:
file_path = Path(tree.cursor_node.data)
is_valid, error = FileHandler.validate_file(file_path)
if is_valid:
if file_path not in self.selected_files:
self.selected_files.append(file_path)
self._update_file_info()
else:
self.app.notify("File already added", timeout=2)
else:
self.app.notify(f"Error: {error}", timeout=2)
else:
self.app.notify("Please select a file", timeout=2)
elif event.button.id == "continue_btn":
if self.selected_files:
self.app.post_message(self.FileSelected(self.selected_files))
else:
self.app.notify("Please select at least one file", timeout=2)
elif event.button.id == "cancel_btn":
self.app.exit()
def _update_file_info(self) -> None:
"""Update file information display."""
info = self.query_one("#file_info", Static)
file_list = "\n".join([f"{f.name}" for f in self.selected_files])
info.update(f"Selected files:\n{file_list}\n\nTotal: {len(self.selected_files)}")
class FileSelected:
"""Message for file selection."""
def __init__(self, files: list[Path]):
"""Initialize message."""
self.files = files
class ProgressScreen(Screen):
"""Screen showing transcription progress."""
def __init__(self, files: list[Path], method: str, config: ConfigManager):
"""Initialize progress screen."""
super().__init__()
self.files = files
self.method = method
self.config = config
self.service = TranscriptionService()
self.results: list[tuple[Path, str]] = []
def compose(self) -> ComposeResult:
"""Compose screen widgets."""
yield Header()
yield Vertical(
Label("Transcription Progress"),
RichLog(id="progress_log", markup=True),
Container(
Button("View Results", id="view_results", variant="primary"),
Button("Exit", id="exit_btn", variant="error"),
id="progress_controls",
),
)
yield Footer()
def on_mount(self) -> None:
"""Called when screen is mounted."""
self.app.call_later(self._run_transcription)
def _run_transcription(self) -> None:
"""Run transcription on all files."""
log = self.query_one("#progress_log", RichLog)
# Setup transcription service
try:
if self.method == "openai":
api_key = self.config.get_api_key()
if not api_key:
log.write("[red]Error: No API key configured[/red]")
return
self.service.set_openai_backend(api_key)
log.write("[green]Using OpenAI Whisper API[/green]")
else:
model_size = self.config.get_whisper_model()
self.service.set_local_backend(model_size)
log.write(f"[green]Using Local Whisper ({model_size})[/green]")
except Exception as e:
log.write(f"[red]Error initializing backend: {str(e)}[/red]")
return
output_dir = self.config.get_output_directory()
if not output_dir:
log.write("[red]Error: Output directory not configured[/red]")
return
# Process each file
for i, file_path in enumerate(self.files, 1):
log.write(f"\n[yellow]Processing {i}/{len(self.files)}: {file_path.name}[/yellow]")
try:
# Transcribe
result = self.service.transcribe(file_path)
log.write(f"[green]✓ Transcribed[/green]")
# Format as markdown
markdown = MarkdownFormatter.format_transcription(
result["text"],
file_path,
result.get("duration", 0.0),
result.get("language", "en"),
)
# Save to file
output_filename = MarkdownFormatter.get_output_filename(file_path)
output_path = FileHandler.get_output_path(file_path, output_dir, output_filename)
output_path.write_text(markdown, encoding="utf-8")
log.write(f"[green]✓ Saved to {output_path.name}[/green]")
self.results.append((file_path, str(output_path)))
except Exception as e:
log.write(f"[red]✗ Error: {str(e)}[/red]")
log.write(f"\n[cyan]Completed {len(self.results)}/{len(self.files)} files[/cyan]")
def on_button_pressed(self, event: Button.Pressed) -> None:
"""Handle button press."""
if event.button.id == "view_results":
self.app.push_screen(ResultsScreen(self.results))
elif event.button.id == "exit_btn":
self.app.exit()
class ResultsScreen(Screen):
"""Screen displaying transcription results."""
def __init__(self, results: list[tuple[Path, str]]):
"""Initialize results screen."""
super().__init__()
self.results = results
def compose(self) -> ComposeResult:
"""Compose screen widgets."""
yield Header()
yield Vertical(
Label("Transcription Results"),
RichLog(id="results_log", markup=True),
Container(
Button("Back", id="back_btn"),
Button("Exit", id="exit_btn", variant="error"),
),
)
yield Footer()
def on_mount(self) -> None:
"""Called when screen is mounted."""
log = self.query_one("#results_log", RichLog)
log.write("[cyan]Transcription Results[/cyan]\n")
for source, output in self.results:
log.write(f"[green]✓[/green] {source.name}")
log.write(f"{Path(output).name}\n")
def on_button_pressed(self, event: Button.Pressed) -> None:
"""Handle button press."""
if event.button.id == "back_btn":
self.app.pop_screen()
elif event.button.id == "exit_btn":
self.app.exit()
class TranscriptionApp(App):
"""Main transcription application."""
CSS = """
Screen {
layout: vertical;
}
#api_key_modal {
width: 60;
height: 12;
border: solid green;
}
#method_modal {
width: 50;
height: 10;
border: solid blue;
}
#output_dir_modal {
width: 80;
height: 20;
border: solid purple;
}
#file_select_container {
width: 100%;
height: 100%;
}
DirectoryTree {
width: 1fr;
height: 1fr;
}
#file_info {
width: 100%;
height: auto;
border: solid $accent;
padding: 1;
}
#progress_log {
width: 100%;
height: 1fr;
border: solid $accent;
padding: 1;
}
Container {
height: auto;
margin: 1;
}
Button {
margin-right: 1;
}
Label {
margin-bottom: 1;
}
"""
def __init__(self):
"""Initialize application."""
super().__init__()
self.config = ConfigManager()
def on_mount(self) -> None:
"""Called when app is mounted."""
self.title = "Whisper Transcription TUI"
self._check_setup()
def _check_setup(self) -> None:
"""Check if setup is needed."""
if not self.config.output_directory_configured():
self.push_screen_wait(OutputDirModal(self.config), self._output_dir_set)
else:
self.push_screen(FileSelectScreen(self.config))
def _output_dir_set(self) -> None:
"""Called when output directory is set."""
self.push_screen(FileSelectScreen(self.config))
def on_file_select_screen_file_selected(self, message: FileSelectScreen.FileSelected) -> None:
"""Handle file selection."""
self.push_screen_wait(MethodSelectModal(), self._method_selected(message.files))
def _method_selected(self, files: list[Path]):
"""Return handler for method selection."""
def handler(modal: MethodSelectModal) -> None:
if modal.selected_method:
if modal.selected_method == "openai":
if not self.config.api_key_configured():
self.push_screen_wait(ApiKeyModal(self.config), lambda: self._start_transcription(files, "openai"))
else:
self._start_transcription(files, "openai")
else:
self._start_transcription(files, "local")
return handler
def _start_transcription(self, files: list[Path], method: str) -> None:
"""Start transcription process."""
self.push_screen(ProgressScreen(files, method, self.config))