From ea1f5f2f01a75fbdee88e9970a75ffcb0e28ae20 Mon Sep 17 00:00:00 2001
From: ambassadia <ambassadia@users.noreply.github.com>
Date: Wed, 20 May 2026 16:13:40 +0200
Subject: [PATCH] =?UTF-8?q?chore:=20add=20config.json=20=E2=80=94=20model?=
 =?UTF-8?q?=20metadata=20+=20enable=20HF=20download=20counter?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Without one of HF's default query files at the repo root (config.json,
config.yaml, hyperparams.yaml, params.json, meta.yaml), the Hub doesn't
register any downloads — HfApi reported 'downloads: 0' for this repo
because Pipeline.from_pretrained() pulls weights/*.safetensors but
never touches a recognised query file.

Adding config.json fixes the counter AND provides a single discoverable
metadata file:
- model_type, library_name, base_model, pipeline_tag
- the 4 sub-architectures (DP / TE / VE / vocoder)
- 31 supported languages (ISO codes)
- 13 voices (10 presets + 3 custom blends)
- inference config (5 Euler steps, CFG 4x cond - 3x uncond, default seed 99)
- measured RTF on M4 and M3 Ultra
- license trail (OpenRAIL-M weights + Apache-2.0 code)

Ref: https://huggingface.co/docs/hub/models-download-stats
---
 config.json | 58 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 58 insertions(+)
 create mode 100644 config.json

diff --git a/config.json b/config.json
new file mode 100644
index 0000000..d054b60
--- /dev/null
+++ b/config.json
@@ -0,0 +1,58 @@
+{
+  "model_type": "supertonic-3",
+  "library_name": "supertonic-3-mlx",
+  "base_model": "Supertone/supertonic-3",
+  "framework": "mlx",
+  "pipeline_tag": "text-to-speech",
+
+  "architectures": [
+    "DurationPredictor",
+    "TextEncoder",
+    "VectorEstimator",
+    "Vocoder"
+  ],
+
+  "sample_rate": 44100,
+  "num_languages": 31,
+  "supported_languages": [
+    "en", "fr", "de", "es", "it", "pt", "ja", "ko", "zh", "ru",
+    "pl", "nl", "tr", "ar", "hi", "vi", "th", "id", "cs", "ro",
+    "hu", "el", "da", "sv", "fi", "no", "he", "uk", "bg", "hr", "sk"
+  ],
+
+  "voices": {
+    "presets": ["F1", "F2", "F3", "F4", "F5", "M1", "M2", "M3", "M4", "M5"],
+    "custom": ["voix_sombre", "homme_moyen", "homme_clair"],
+    "total": 13
+  },
+
+  "inference": {
+    "euler_steps": 5,
+    "cfg_cond_scale": 4.0,
+    "cfg_uncond_scale": 3.0,
+    "default_seed": 99,
+    "supports_streaming": true,
+    "supports_voice_mixing": true
+  },
+
+  "performance_m4": {
+    "short_utterance_ms": 30,
+    "long_utterance_ms": 38,
+    "rtf_short": 76,
+    "rtf_long": 138,
+    "vs_onnx_sdk": "17-25x",
+    "vs_coreml": "2-3x"
+  },
+
+  "performance_m3_ultra": {
+    "rtf_short": 147,
+    "rtf_long": 185
+  },
+
+  "license": "openrail",
+  "license_link": "LICENSE",
+  "license_code": "Apache-2.0",
+  "license_code_link": "LICENSE-CODE",
+
+  "upstream_attribution": "Copyright (c) 2026 Supertone Inc."
+}