feat(orchestrator): add T=1 path with audit JSONL + tail extraction

2026-05-10 03:02:31 +02:00
parent c5584b6396
commit 86ccbe53e1
4 changed files with 344 additions and 1 deletions
--- a/src/markovian_rsa_mlx/init.py
+++ b/src/markovian_rsa_mlx/init.py
@@ -3,5 +3,6 @@ __version__ = "0.1.0"

 from markovian_rsa_mlx.config import RSAConfig
 from markovian_rsa_mlx.loader import load_zaya_model
+from markovian_rsa_mlx.orchestrator import MarkovianRSAOrchestrator

-__all__ = ["__version__", "RSAConfig", "load_zaya_model"]
+__all__ = ["__version__", "RSAConfig", "load_zaya_model", "MarkovianRSAOrchestrator"]
--- a/src/markovian_rsa_mlx/orchestrator.py
+++ b/src/markovian_rsa_mlx/orchestrator.py
@@ -0,0 +1,235 @@
+"""MarkovianRSAOrchestrator — drives N parallel traces + aggregation rounds."""
+from __future__ import annotations
+import datetime as _dt
+import hashlib
+import time
+import uuid
+from pathlib import Path
+from typing import Any
+
+from markovian_rsa_mlx.audit import (
+    AuditWriter, RunStartEvent, GenerationStartEvent,
+    TraceCompleteEvent, TailExtractedEvent, AggregationPromptEvent,
+    RoundCompleteEvent, FinalEvent, RunEndEvent,
+)
+from markovian_rsa_mlx.batching import GenerationRequest, run_batch, GenerationResult
+from markovian_rsa_mlx.config import RSAConfig
+from markovian_rsa_mlx.prompts import (
+    build_round_0_messages,
+    build_aggregation_messages,
+)
+from markovian_rsa_mlx.results import RSAResult, RSARound, RSAStats, TraceRecord
+
+
+def _trace_seed(base_seed: int | None, run_id: str, round_index: int, trace_index: int) -> int:
+    """Deterministic seed if base_seed set, else stable from run_id."""
+    key = f"{base_seed}|{run_id}|{round_index}|{trace_index}"
+    h = hashlib.sha256(key.encode()).hexdigest()
+    return int(h[:8], 16)
+
+
+def _now_iso() -> str:
+    return _dt.datetime.now(tz=_dt.timezone.utc).isoformat().replace("+00:00", "Z")
+
+
+class MarkovianRSAOrchestrator:
+    """Drives Markovian RSA rounds over a loaded mlx-lm model + tokenizer."""
+
+    def __init__(
+        self,
+        model: Any,
+        tokenizer: Any,
+        *,
+        model_id: str = "kyr0/zaya1-base-8b-MLX",
+        quantization: str = "q4_g64",
+        default_config: RSAConfig | None = None,
+        single_generate=None,
+        batch_generate=None,
+    ) -> None:
+        self.model = model
+        self.tokenizer = tokenizer
+        self.model_id = model_id
+        self.quantization = quantization
+        self.default_config = default_config or RSAConfig()
+        self._single_generate = single_generate
+        self._batch_generate = batch_generate
+
+    @classmethod
+    def from_pretrained(
+        cls,
+        model_id: str = "kyr0/zaya1-base-8b-MLX",
+        *,
+        quantization: str = "q4_g64",
+        default_config: RSAConfig | None = None,
+    ) -> "MarkovianRSAOrchestrator":
+        from markovian_rsa_mlx.loader import load_zaya_model
+        model, tokenizer = load_zaya_model(model_id)
+        return cls(
+            model=model, tokenizer=tokenizer,
+            model_id=model_id, quantization=quantization,
+            default_config=default_config,
+        )
+
+    def solve(
+        self,
+        prompt: str,
+        *,
+        config: RSAConfig | None = None,
+        return_audit: bool = False,
+        audit_path: str | Path | None = None,
+    ):
+        cfg = config or self.default_config
+        run_id = uuid.uuid4().hex[:12]
+        t0 = time.time()
+        with AuditWriter(audit_path) as aud:
+            aud.write(RunStartEvent(
+                run_id=run_id, model_id=self.model_id, quantization=self.quantization,
+                config=cfg, prompt=prompt, created_at=_now_iso(),
+            ))
+            rounds_records: list[RSARound] = []
+            previous_traces: list[TraceRecord] = []
+            for round_idx in range(cfg.rounds):
+                round_traces, round_elapsed = self._run_round(
+                    run_id=run_id, round_idx=round_idx, original_prompt=prompt,
+                    previous_traces=previous_traces, cfg=cfg, audit=aud,
+                )
+                rounds_records.append(RSARound(
+                    round=round_idx, traces=round_traces, elapsed_s=round_elapsed,
+                    memory_estimate_bytes=0,
+                ))
+                aud.write(RoundCompleteEvent(
+                    run_id=run_id, round=round_idx,
+                    trace_ids=[t.trace_id for t in round_traces],
+                    memory_estimate_bytes=0, elapsed_s=round_elapsed,
+                ))
+                previous_traces = round_traces
+
+            final_trace = previous_traces[0]
+            aud.write(FinalEvent(
+                run_id=run_id, final_trace_id=final_trace.trace_id,
+                final_text=final_trace.text,
+                all_final_trace_ids=[t.trace_id for t in previous_traces],
+                answer_selection=cfg.answer_selection,
+            ))
+            elapsed = time.time() - t0
+            total_tokens = sum(
+                t.generated_tokens for r in rounds_records for t in r.traces
+            )
+            aud.write(RunEndEvent(
+                run_id=run_id, elapsed_s=elapsed,
+                total_generated_tokens=total_tokens, peak_memory_bytes=0,
+            ))
+
+        result = RSAResult(
+            run_id=run_id, prompt=prompt, final_text=final_trace.text,
+            final_trace_id=final_trace.trace_id, model_id=self.model_id,
+            quantization=self.quantization, config=cfg, rounds=rounds_records,
+            stats=RSAStats(
+                total_generated_tokens=total_tokens, elapsed_s=elapsed,
+                peak_memory_bytes=0,
+            ),
+            audit_path=Path(audit_path) if audit_path is not None else None,
+        )
+        if return_audit:
+            return result.final_text, result
+        return result.final_text
+
+    def _run_round(
+        self, *, run_id: str, round_idx: int, original_prompt: str,
+        previous_traces: list[TraceRecord], cfg: RSAConfig, audit: AuditWriter,
+    ) -> tuple[list[TraceRecord], float]:
+        round_t0 = time.time()
+        is_round_0 = round_idx == 0
+        max_tokens = cfg.chunk_tokens if round_idx < cfg.rounds - 1 else cfg.effective_final_tokens()
+
+        prompts_token_ids: list[list[int]] = []
+        parent_ids_per_trace: list[list[str]] = []
+        if is_round_0:
+            messages = build_round_0_messages(original_prompt)
+            prompt_ids = self._render_chat(messages)
+            prompts_token_ids = [prompt_ids for _ in range(cfg.parallel)]
+            parent_ids_per_trace = [[] for _ in range(cfg.parallel)]
+        else:
+            import random as _random
+            rng = _random.Random(_trace_seed(cfg.seed, run_id, round_idx, -1))
+            for trace_idx in range(cfg.parallel):
+                K = min(cfg.aggregation_subsample, len(previous_traces))
+                selected = rng.sample(previous_traces, K)
+                tails = [self._extract_tail_text(t.token_ids, cfg.tail_tokens) for t in selected]
+                tail_token_ids_list = [self._extract_tail_token_ids(t.token_ids, cfg.tail_tokens) for t in selected]
+                for sel, tail_ids, tail_text in zip(selected, tail_token_ids_list, tails):
+                    audit.write(TailExtractedEvent(
+                        run_id=run_id, round=round_idx, trace_id=sel.trace_id,
+                        tail_token_ids=tail_ids, tail_text=tail_text,
+                        tail_tokens=len(tail_ids),
+                    ))
+                messages = build_aggregation_messages(
+                    original_prompt=original_prompt, tails=tails,
+                    template=cfg.aggregation_template,
+                )
+                prompt_ids = self._render_chat(messages)
+                child_trace_id = f"r{round_idx}-t{trace_idx}-{run_id[:6]}"
+                audit.write(AggregationPromptEvent(
+                    run_id=run_id, round=round_idx, trace_id=child_trace_id,
+                    selected_tail_trace_ids=[s.trace_id for s in selected],
+                    prompt_text=messages[0]["content"], prompt_token_ids=prompt_ids,
+                ))
+                prompts_token_ids.append(prompt_ids)
+                parent_ids_per_trace.append([s.trace_id for s in selected])
+
+        seeds = [_trace_seed(cfg.seed, run_id, round_idx, i) for i in range(cfg.parallel)]
+        trace_ids = [f"r{round_idx}-t{i}-{run_id[:6]}" for i in range(cfg.parallel)]
+        for i, tid in enumerate(trace_ids):
+            audit.write(GenerationStartEvent(
+                run_id=run_id, round=round_idx, trace_id=tid,
+                seed=seeds[i], prompt_token_count=len(prompts_token_ids[i]),
+                max_tokens=max_tokens, parent_trace_ids=parent_ids_per_trace[i],
+            ))
+
+        requests = [
+            GenerationRequest(prompt_token_ids=prompts_token_ids[i], seed=seeds[i], max_tokens=max_tokens)
+            for i in range(cfg.parallel)
+        ]
+        results: list[GenerationResult] = run_batch(
+            model=self.model, tokenizer=self.tokenizer,
+            requests=requests, temperature=cfg.temperature, top_p=cfg.top_p, top_k=cfg.top_k,
+            serial=cfg.serial, single_generate=self._single_generate, batch_generate=self._batch_generate,
+        )
+
+        records: list[TraceRecord] = []
+        for i, (tid, gen) in enumerate(zip(trace_ids, results)):
+            audit.write(TraceCompleteEvent(
+                run_id=run_id, round=round_idx, trace_id=tid,
+                text=gen.text, token_ids=gen.token_ids,
+                generated_tokens=gen.generated_tokens, finish_reason=gen.finish_reason,
+                elapsed_s=gen.elapsed_s,
+            ))
+            records.append(TraceRecord(
+                trace_id=tid, text=gen.text, token_ids=gen.token_ids,
+                generated_tokens=gen.generated_tokens, finish_reason=gen.finish_reason,
+                elapsed_s=gen.elapsed_s, seed=seeds[i],
+                parent_trace_ids=parent_ids_per_trace[i],
+            ))
+        round_elapsed = time.time() - round_t0
+        return records, round_elapsed
+
+    def _render_chat(self, messages: list[dict[str, str]]) -> list[int]:
+        """Apply ZAYA chat template and return token ids."""
+        rendered = self.tokenizer.apply_chat_template(
+            messages, add_generation_prompt=True, enable_thinking=True,
+        )
+        if isinstance(rendered, str):
+            return self.tokenizer.encode(rendered)
+        return list(rendered)
+
+    @staticmethod
+    def _extract_tail_token_ids(ids: list[int], tail_tokens: int) -> list[int]:
+        if tail_tokens <= 0 or not ids:
+            return []
+        return ids[-tail_tokens:]
+
+    def _extract_tail_text(self, ids: list[int], tail_tokens: int) -> str:
+        tail_ids = self._extract_tail_token_ids(ids, tail_tokens)
+        if not tail_ids:
+            return ""
+        return self.tokenizer.decode(tail_ids)
--- a/src/markovian_rsa_mlx/results.py
+++ b/src/markovian_rsa_mlx/results.py
@@ -0,0 +1,47 @@
+"""Public result types returned by MarkovianRSAOrchestrator."""
+from __future__ import annotations
+from dataclasses import dataclass, field
+from pathlib import Path
+
+from markovian_rsa_mlx.config import RSAConfig
+
+
+@dataclass
+class TraceRecord:
+    trace_id: str
+    text: str
+    token_ids: list[int]
+    generated_tokens: int
+    finish_reason: str
+    elapsed_s: float
+    seed: int
+    parent_trace_ids: list[str] = field(default_factory=list)
+
+
+@dataclass
+class RSARound:
+    round: int
+    traces: list[TraceRecord]
+    elapsed_s: float
+    memory_estimate_bytes: int
+
+
+@dataclass
+class RSAStats:
+    total_generated_tokens: int
+    elapsed_s: float
+    peak_memory_bytes: int
+
+
+@dataclass
+class RSAResult:
+    run_id: str
+    prompt: str
+    final_text: str
+    final_trace_id: str
+    model_id: str
+    quantization: str
+    config: RSAConfig
+    rounds: list[RSARound]
+    stats: RSAStats
+    audit_path: Path | None
--- a/tests/test_orchestrator_t1.py
+++ b/tests/test_orchestrator_t1.py
@@ -0,0 +1,60 @@
+from unittest.mock import MagicMock
+
+from markovian_rsa_mlx.batching import GenerationRequest, GenerationResult
+from markovian_rsa_mlx.config import RSAConfig
+from markovian_rsa_mlx.orchestrator import MarkovianRSAOrchestrator
+
+
+def _fake_tokenizer(eos_id: int = 999):
+    tok = MagicMock()
+    tok.encode.side_effect = lambda s: [ord(c) for c in s][:32] or [1]
+    tok.decode.side_effect = lambda ids: "".join(chr(min(i, 122)) for i in ids if 32 <= i <= 122)
+    tok.eos_token_id = eos_id
+    tok.all_special_ids = [eos_id]
+    tok.apply_chat_template.side_effect = lambda messages, **kw: \
+        " ".join(m["content"] for m in messages).encode().hex()
+    return tok
+
+
+def _fake_single_gen(model, tokenizer, prompt_token_ids, *, max_tokens, seed, temperature, top_p, top_k):
+    text = f"trace-{seed}-final-answer"
+    ids = [ord(c) for c in text]
+    return GenerationResult(
+        token_ids=ids, text=text, generated_tokens=len(ids),
+        finish_reason="eos", elapsed_s=0.01,
+    )
+
+
+def test_t1_single_round_produces_final_text(tmp_path):
+    cfg = RSAConfig(rounds=1, parallel=2, aggregation_subsample=2,
+                    chunk_tokens=64, tail_tokens=8, serial=True, seed=123)
+    orch = MarkovianRSAOrchestrator(
+        model=MagicMock(),
+        tokenizer=_fake_tokenizer(),
+        model_id="test-model",
+        quantization="bf16",
+        single_generate=_fake_single_gen,
+        batch_generate=None,
+    )
+    audit_path = tmp_path / "audit.jsonl"
+    text, result = orch.solve("What is 2+2?", config=cfg, return_audit=True, audit_path=audit_path)
+    assert isinstance(text, str)
+    assert text == result.final_text
+    assert result.config.rounds == 1
+    assert len(result.rounds) == 1
+    assert len(result.rounds[0].traces) == 2
+    assert audit_path.exists()
+    lines = audit_path.read_text().strip().split("\n")
+    # at minimum: run_start, 2 trace_complete, final, run_end
+    assert len(lines) >= 5
+
+
+def test_t1_returns_string_when_return_audit_false(tmp_path):
+    cfg = RSAConfig(rounds=1, parallel=2, aggregation_subsample=2, serial=True)
+    orch = MarkovianRSAOrchestrator(
+        model=MagicMock(), tokenizer=_fake_tokenizer(),
+        model_id="m", quantization="bf16",
+        single_generate=_fake_single_gen,
+    )
+    out = orch.solve("X", config=cfg)
+    assert isinstance(out, str)