transcription-bot/Transcription_Bot.py
2025-10-20 16:38:55 -06:00

251 lines
8.7 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""
transcribe_bot.py foolproof transcription & summarization helper
────────────────────────────────────────────────────────────────────
Updates v1.4 (20250710)
• **UX** Replaced basic text prompts with interactive `questionary` prompts.
• **Project** Added `pyproject.toml` to manage dependencies like `rich` and `questionary`.
• **Output** Now using `rich` for pretty-printing.
• **Workflow** Ask for output dir once, then process multiple files.
"""
from __future__ import annotations
import os
import sys
import shutil
import subprocess
import platform
from pathlib import Path
# ---------------------------------------------------------------------------
# Helper wrappers
# ---------------------------------------------------------------------------
def command_exists(cmd: str) -> bool:
return shutil.which(cmd) is not None
def run_ps(cmd: str, *, check: bool = True):
"""Run *cmd* inside a PowerShell session (Windows only)."""
if os.name != "nt":
subprocess.run(cmd, shell=True, check=check)
return
subprocess.run([
"powershell", "-NoProfile", "-ExecutionPolicy", "Bypass", "-Command", cmd
], check=check)
def pip_install(*args: str, allow_fail: bool = False) -> bool:
try:
subprocess.check_call([sys.executable, "-m", "pip", "install", *args])
return True
except subprocess.CalledProcessError as e:
if allow_fail:
return False
raise e
# ---------------------------------------------------------------------------
# Python version sanity check
# ---------------------------------------------------------------------------
MIN_PY = (3, 8)
MAX_PY = (3, 12)
if not (MIN_PY <= sys.version_info[:2] <= MAX_PY):
sys.exit(
f"[FATAL] Python {platform.python_version()} isnt supported. Use 3.{MIN_PY[1]}3.{MAX_PY[1]} (3.103.12 recommended)."
)
# ---------------------------------------------------------------------------
# Dependency installers
# ---------------------------------------------------------------------------
SCOOP_INSTALL_CMD = (
"Set-ExecutionPolicy -ExecutionPolicy RemoteSigned -Scope CurrentUser; "
"Invoke-RestMethod -Uri https://get.scoop.sh | Invoke-Expression"
)
def ensure_scoop():
if os.name != "nt":
return
if command_exists("scoop"):
return
rich.print("[yellow]Scoop not found, installing...[/yellow]")
run_ps(SCOOP_INSTALL_CMD)
def ensure_scoop_bucket(bucket: str):
if os.name != "nt":
return
run_ps(f"scoop bucket add {bucket}", check=False)
def ensure_ffmpeg():
if command_exists("ffmpeg"):
return
rich.print("[yellow]ffmpeg not found, installing...[/yellow]")
if os.name == "nt":
run_ps("scoop install ffmpeg")
else:
pip_install("ffmpeg-python")
def ensure_rust():
if command_exists("cargo"):
return
rich.print("[yellow]Rust tool-chain not found, installing...[/yellow]")
if os.name == "nt":
run_ps("scoop install rust")
else:
subprocess.run("curl https://sh.rustup.rs -sSf | sh -s -- -y", shell=True, check=True)
os.environ["PATH"] += os.pathsep + str(Path.home() / ".cargo" / "bin")
def ensure_python_pkg(pypi_name: str):
try:
subprocess.check_call(
[sys.executable, "-m", "pip", "show", pypi_name],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
)
except subprocess.CalledProcessError:
rich.print(f"[yellow]Python package [bold]{pypi_name}[/bold] not found, installing...[/yellow]")
pip_install(pypi_name)
def ensure_ollama():
if command_exists("ollama"):
return
rich.print("[bold red]Ollama CLI not in PATH.[/bold red]")
rich.print("Please install from [blue]https://ollama.ai[/blue] and ensure it's in your PATH.")
sys.exit(1)
# ---------------------------------------------------------------------------
# Whisper + Ollama API helpers
# ---------------------------------------------------------------------------
GEMMA_WRAP = "<start_of_turn>user\n{prompt}<end_of_turn>\n<start_of_turn>model\n"
def ollama_run(prompt: str) -> str:
"""Run Gemma via Ollama and return stdout decoded as UTF8."""
wrapped = GEMMA_WRAP.format(prompt=prompt)
proc = subprocess.run(
["ollama", "run", "deepseek-r1:1.5b"],
input=wrapped.encode("utf-8"),
capture_output=True,
check=True,
)
return proc.stdout.decode("utf-8", "replace").strip()
# ---------------------------------------------------------------------------
# Transcription
# ---------------------------------------------------------------------------
def transcribe_audio(src: Path, job_dir: Path) -> str:
import whisper
rich.print("[cyan]Transcribing... (hang tight, this may take a while)[/cyan]")
model = whisper.load_model("base")
result = model.transcribe(str(src))
text = result["text"].strip()
(job_dir / "transcription.txt").write_text(text, encoding="utf-8")
rich.print(f"[green]✓ Transcript saved to {job_dir / 'transcription.txt'}[/green]")
return text
# ---------------------------------------------------------------------------
# CLI helpers
# ---------------------------------------------------------------------------
def ask_path(msg: str, *, must_exist=False, file_ok=False) -> Path:
path = questionary.path(
message=msg,
validate=lambda p: True if not must_exist or os.path.exists(p) else "Path does not exist.",
file_filter=lambda p: True if not file_ok or os.path.isfile(p) else "Path is not a file.",
).ask()
if path is None:
raise KeyboardInterrupt
return Path(path)
def ask_menu(msg: str, choices: list[str]) -> str:
choice = questionary.select(message=msg, choices=choices).ask()
if choice is None:
raise KeyboardInterrupt
return choice
# ---------------------------------------------------------------------------
# Dependency orchestration
# ---------------------------------------------------------------------------
def setup_deps():
rich.print("[bold]———— Checking dependencies ————[/bold]")
ensure_scoop()
ensure_scoop_bucket("extras")
ensure_ffmpeg()
ensure_rust()
ensure_python_pkg("openai-whisper")
ensure_python_pkg("setuptools-rust")
ensure_python_pkg("rich")
ensure_python_pkg("questionary")
ensure_ollama()
rich.print("[bold green]✓ All set![/bold green]\n")
# ---------------------------------------------------------------------------
# Main loop
# ---------------------------------------------------------------------------
def main():
setup_deps()
out_dir = ask_path("Select an output directory:", must_exist=False)
out_dir.mkdir(parents=True, exist_ok=True)
while True:
src = ask_path("Select an audio/video file to process:", must_exist=True, file_ok=True)
job_dir = out_dir / src.stem
job_dir.mkdir(exist_ok=True)
transcript = transcribe_audio(src, job_dir)
while True:
choice = ask_menu(
"What next?",
choices=["Exit", "Summarize transcript", "Custom prompt"],
)
if choice == "Exit":
rich.print("[bold magenta]Farewell ✌️[/bold magenta]")
sys.exit()
elif choice in {"Summarize transcript", "Custom prompt"}:
if choice == "Summarize transcript":
user_prompt = "You are a summarization bot, please observe the contents of this transcription and provide a concise paragraph summary."
else:
user_prompt = questionary.text("Enter your prompt for Gemma:").ask()
if user_prompt is None:
raise KeyboardInterrupt
reply = ollama_run(f"{user_prompt}\n\n{transcript}")
(job_dir / "processed.txt").write_text(reply, encoding="utf-8")
rich.print(f"[green]✓ Response saved to {job_dir / 'processed.txt'}[/green]")
break
else:
rich.print("[bold red]Invalid choice.[/bold red]")
if not questionary.confirm("Transcribe another file?").ask():
rich.print("[bold magenta]Catch you later ✌️[/bold magenta]")
break
if __name__ == "__main__":
try:
import rich
import questionary
except ImportError:
print("Missing core dependencies. Please run `pip install rich questionary`.")
sys.exit(1)
try:
main()
except KeyboardInterrupt:
print("\n[bold magenta]Interrupted. Bye![/bold magenta]")