Files
2026-05-09 20:00:57 +02:00

48 lines
1.6 KiB
Python
Executable File

#!/usr/bin/env python
from __future__ import annotations
import argparse
import sys
from pathlib import Path
from granite_speech_plus_mlx import GraniteSpeechPlusPipeline
from granite_speech_plus_mlx.pipeline import DEFAULT_MODEL
from granite_speech_plus_mlx.prompts import GRANITE_SYSTEM_PROMPT, PROMPT_MODES
def main() -> int:
parser = argparse.ArgumentParser(description="Transcribe audio with Granite Speech Plus MLX.")
parser.add_argument("audio")
parser.add_argument("--model", default=DEFAULT_MODEL)
parser.add_argument("--output", default=None)
parser.add_argument("--chunk-seconds", type=float, default=300.0)
parser.add_argument("--overlap-seconds", type=float, default=2.0)
parser.add_argument("--prompt-mode", choices=sorted(PROMPT_MODES), default="asr")
parser.add_argument("--repetition-penalty", type=float, default=1.2)
parser.add_argument("--max-tokens", type=int, default=4096)
parser.add_argument("--system-prompt", default=GRANITE_SYSTEM_PROMPT)
parser.add_argument("--verbose", action="store_true")
args = parser.parse_args()
pipe = GraniteSpeechPlusPipeline.from_pretrained(
args.model,
chunk_seconds=args.chunk_seconds,
overlap_seconds=args.overlap_seconds,
repetition_penalty=args.repetition_penalty,
max_tokens=args.max_tokens,
system_prompt=args.system_prompt or None,
verbose=args.verbose,
)
text = pipe.transcribe(args.audio, prompt_mode=args.prompt_mode)
if args.output:
Path(args.output).write_text(text + "\n", encoding="utf-8")
else:
print(text)
return 0
if __name__ == "__main__":
sys.exit(main())