formalise optimiser auditing with relation ledgers and shrunk semantic counterexamples

2026-04-27 13:17:12 +00:00
parent 31596491f0
commit f46c9b652b
9 changed files with 559 additions and 38 deletions
@@ -5,10 +5,16 @@ type specimen = {
  term : Source.term;
 }

+type failure = {
+  original : specimen;
+  minimal : specimen;
+  comparison : Relation.comparison;
+}
+
 type result = {
  total : int;
  related : int;
-  failures : (specimen * Relation.comparison) list;
+  failures : failure list;
 }

 let rng = Random.State.make [| 0x51; 0x17; 0x2b |]
@@ -106,22 +112,105 @@ let sample_terms ~count ~max_depth () =
  in
  fill [] 0 0

-let shrink specimen =
-  match specimen.term, specimen.ty with
-  | Source.Pair (a, _), ty -> [{ term = a; ty }]
-  | Source.If (_, t, e), _ -> [{ specimen with term = t }; { specimen with term = e }]
+let rec shrink_term ty term =
+  let self = shrink_term in
+  let typ_or fallback term =
+    match Typecheck.type_of term with
+    | Ok ty -> ty
+    | Error _ -> fallback
+  in
+  let rebuild f sub_ty sub =
+    List.map (fun sub' -> { ty; term = f sub'.term }) (self sub_ty sub)
+  in
+  match ty, term with
+  | TInt, Source.Int n when n <> 0 -> [{ ty; term = Source.Int 0 }]
+  | TBool, Source.Bool true -> [{ ty; term = Source.Bool false }]
+  | _, Source.Tick (_, t) -> { ty; term = t } :: rebuild (fun t' -> Source.Tick ("shrunk", t')) ty t
+  | _, Source.Let (_, a, b) ->
+      { ty; term = b } ::
+      rebuild (fun a' -> Source.Let ("_", a', b)) (typ_or ty a) a @
+      rebuild (fun b' -> Source.Let ("_", a, b')) ty b
+  | _, Source.If (c, t, e) ->
+      { ty; term = t } :: { ty; term = e } ::
+      rebuild (fun c' -> Source.If (c', t, e)) TBool c @
+      rebuild (fun t' -> Source.If (c, t', e)) ty t @
+      rebuild (fun e' -> Source.If (c, t, e')) ty e
+  | TPair (a_ty, b_ty), Source.Pair (a, b) ->
+      rebuild (fun a' -> Source.Pair (a', b)) a_ty a @
+      rebuild (fun b' -> Source.Pair (a, b')) b_ty b
+  | TSum (a_ty, _), Source.Inl (sum_ty, t) ->
+      rebuild (fun t' -> Source.Inl (sum_ty, t')) a_ty t
+  | TSum (_, b_ty), Source.Inr (sum_ty, t) ->
+      rebuild (fun t' -> Source.Inr (sum_ty, t')) b_ty t
+  | _, Source.App (f, a) ->
+      let f_ty = typ_or (TArrow (ty, ty)) f in
+      let a_ty = typ_or ty a in
+      rebuild (fun f' -> Source.App (f', a)) f_ty f @
+      rebuild (fun a' -> Source.App (f, a')) a_ty a
+  | _, Source.TyApp (t, app_ty) ->
+      let t_ty = typ_or (TForall ("a", ty)) t in
+      rebuild (fun t' -> Source.TyApp (t', app_ty)) t_ty t
+  | _, Source.Eq (g, a, b) ->
+      let ground_ty = match g with GInt -> TInt | GBool -> TBool in
+      rebuild (fun a' -> Source.Eq (g, a', b)) ground_ty a @
+      rebuild (fun b' -> Source.Eq (g, a, b')) ground_ty b
+  | _, Source.LetPair (x, y, scrut, body) ->
+      let scrut_ty = typ_or (TPair (ty, ty)) scrut in
+      rebuild (fun scrut' -> Source.LetPair (x, y, scrut', body)) scrut_ty scrut @
+      rebuild (fun body' -> Source.LetPair (x, y, scrut, body')) ty body
+  | _, Source.Case (scrut, (x, l), (y, r)) ->
+      let scrut_ty = typ_or (TSum (ty, ty)) scrut in
+      rebuild (fun scrut' -> Source.Case (scrut', (x, l), (y, r))) scrut_ty scrut @
+      rebuild (fun l' -> Source.Case (scrut, (x, l'), (y, r))) ty l @
+      rebuild (fun r' -> Source.Case (scrut, (x, l), (y, r'))) ty r
+  | TMu (_, _), Source.Roll (roll_ty, t) ->
+      let payload_ty = typ_or ty t in
+      rebuild (fun t' -> Source.Roll (roll_ty, t')) payload_ty t
+  | _, Source.Unroll t ->
+      let t_ty = typ_or (TMu ("a", ty)) t in
+      rebuild (fun t' -> Source.Unroll t') t_ty t
  | _ -> []

+let shrink specimen =
+  shrink_term specimen.ty specimen.term
+  |> List.filter (fun candidate -> Typecheck.is_well_typed candidate.ty candidate.term)
+
+let comparison_for ?(fuel = 256) flags relation specimen =
+  let compiled = Pipeline.compile flags specimen.ty specimen.term in
+  Relation.compare_programs ~fuel relation specimen.ty specimen.term compiled.target_program
+
+let is_failure cmp =
+  match cmp.Relation.verdict with
+  | Relation.Related -> false
+  | Relation.Unrelated _ -> true
+
+let shrink_failure ?(fuel = 256) flags relation specimen =
+  let rec improve current =
+    let current_cmp = comparison_for ~fuel flags relation current in
+    let candidates =
+      shrink current
+      |> List.filter_map (fun candidate ->
+             let cmp = comparison_for ~fuel flags relation candidate in
+             if is_failure cmp then Some (candidate, cmp) else None)
+    in
+    match candidates with
+    | (candidate, _) :: _ when Source.string_of_term candidate.term <> Source.string_of_term current.term ->
+        improve candidate
+    | _ -> (current, current_cmp)
+  in
+  improve specimen
+
 let run_campaign ?(fuel = 256) flags relation ~count ~max_depth () =
  let specimens = sample_terms ~count ~max_depth () in
  let failures, related =
    List.fold_left
      (fun (fails, ok) specimen ->
-        let compiled = Pipeline.compile flags specimen.ty specimen.term in
-        let cmp = Relation.compare_programs ~fuel relation specimen.ty specimen.term compiled.target_program in
+        let cmp = comparison_for ~fuel flags relation specimen in
        match cmp.verdict with
        | Relation.Related -> (fails, ok + 1)
-        | Relation.Unrelated _ -> ((specimen, cmp) :: fails, ok))
+        | Relation.Unrelated _ ->
+            let minimal, minimal_cmp = shrink_failure ~fuel flags relation specimen in
+            ({ original = specimen; minimal; comparison = minimal_cmp } :: fails, ok))
      ([], 0)
      specimens
  in