From ea1f5f2f01a75fbdee88e9970a75ffcb0e28ae20 Mon Sep 17 00:00:00 2001 From: ambassadia Date: Wed, 20 May 2026 16:13:40 +0200 Subject: [PATCH] =?UTF-8?q?chore:=20add=20config.json=20=E2=80=94=20model?= =?UTF-8?q?=20metadata=20+=20enable=20HF=20download=20counter?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Without one of HF's default query files at the repo root (config.json, config.yaml, hyperparams.yaml, params.json, meta.yaml), the Hub doesn't register any downloads — HfApi reported 'downloads: 0' for this repo because Pipeline.from_pretrained() pulls weights/*.safetensors but never touches a recognised query file. Adding config.json fixes the counter AND provides a single discoverable metadata file: - model_type, library_name, base_model, pipeline_tag - the 4 sub-architectures (DP / TE / VE / vocoder) - 31 supported languages (ISO codes) - 13 voices (10 presets + 3 custom blends) - inference config (5 Euler steps, CFG 4x cond - 3x uncond, default seed 99) - measured RTF on M4 and M3 Ultra - license trail (OpenRAIL-M weights + Apache-2.0 code) Ref: https://huggingface.co/docs/hub/models-download-stats --- config.json | 58 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100644 config.json diff --git a/config.json b/config.json new file mode 100644 index 0000000..d054b60 --- /dev/null +++ b/config.json @@ -0,0 +1,58 @@ +{ + "model_type": "supertonic-3", + "library_name": "supertonic-3-mlx", + "base_model": "Supertone/supertonic-3", + "framework": "mlx", + "pipeline_tag": "text-to-speech", + + "architectures": [ + "DurationPredictor", + "TextEncoder", + "VectorEstimator", + "Vocoder" + ], + + "sample_rate": 44100, + "num_languages": 31, + "supported_languages": [ + "en", "fr", "de", "es", "it", "pt", "ja", "ko", "zh", "ru", + "pl", "nl", "tr", "ar", "hi", "vi", "th", "id", "cs", "ro", + "hu", "el", "da", "sv", "fi", "no", "he", "uk", "bg", "hr", "sk" + ], + + "voices": { + "presets": ["F1", "F2", "F3", "F4", "F5", "M1", "M2", "M3", "M4", "M5"], + "custom": ["voix_sombre", "homme_moyen", "homme_clair"], + "total": 13 + }, + + "inference": { + "euler_steps": 5, + "cfg_cond_scale": 4.0, + "cfg_uncond_scale": 3.0, + "default_seed": 99, + "supports_streaming": true, + "supports_voice_mixing": true + }, + + "performance_m4": { + "short_utterance_ms": 30, + "long_utterance_ms": 38, + "rtf_short": 76, + "rtf_long": 138, + "vs_onnx_sdk": "17-25x", + "vs_coreml": "2-3x" + }, + + "performance_m3_ultra": { + "rtf_short": 147, + "rtf_long": 185 + }, + + "license": "openrail", + "license_link": "LICENSE", + "license_code": "Apache-2.0", + "license_code_link": "LICENSE-CODE", + + "upstream_attribution": "Copyright (c) 2026 Supertone Inc." +}