#!/usr/bin/env python from __future__ import annotations import argparse import sys from pathlib import Path from granite_speech_plus_mlx import GraniteSpeechPlusPipeline from granite_speech_plus_mlx.pipeline import DEFAULT_MODEL from granite_speech_plus_mlx.prompts import GRANITE_SYSTEM_PROMPT, PROMPT_MODES def main() -> int: parser = argparse.ArgumentParser(description="Transcribe audio with Granite Speech Plus MLX.") parser.add_argument("audio") parser.add_argument("--model", default=DEFAULT_MODEL) parser.add_argument("--output", default=None) parser.add_argument("--chunk-seconds", type=float, default=300.0) parser.add_argument("--overlap-seconds", type=float, default=2.0) parser.add_argument("--prompt-mode", choices=sorted(PROMPT_MODES), default="asr") parser.add_argument("--repetition-penalty", type=float, default=1.2) parser.add_argument("--max-tokens", type=int, default=4096) parser.add_argument("--system-prompt", default=GRANITE_SYSTEM_PROMPT) parser.add_argument("--verbose", action="store_true") args = parser.parse_args() pipe = GraniteSpeechPlusPipeline.from_pretrained( args.model, chunk_seconds=args.chunk_seconds, overlap_seconds=args.overlap_seconds, repetition_penalty=args.repetition_penalty, max_tokens=args.max_tokens, system_prompt=args.system_prompt or None, verbose=args.verbose, ) text = pipe.transcribe(args.audio, prompt_mode=args.prompt_mode) if args.output: Path(args.output).write_text(text + "\n", encoding="utf-8") else: print(text) return 0 if __name__ == "__main__": sys.exit(main())