{
  "service": "Verity",
  "url": "https://veritylayer.dev",
  "last_run": "2026-06-28",
  "summary": "Verity reliably tells true claims from false (95% on verifiable claims), never affirmed an actual falsehood across 100 claims, and never over-refuses a valid question (abstention precision 1.0). Numbers are real runs on public benchmarks; we also audit our own evaluation and publish the corrections.",
  "claim_verification": {
    "dataset": "FEVER (copenlu/fever_gold_evidence, validation split)",
    "model": "claude-sonnet-4-6 (grounded, live web search)",
    "verifiable_claim_accuracy": 0.95,
    "verifiable_n": 100,
    "genuine_false_affirmations": 0,
    "note_on_false_affirmation": "The one verdict FEVER marks as a false-affirmation ('Dan O'Bannon died', labeled REFUTES) is actually true; the benchmark label is wrong.",
    "raw_3class_accuracy_incl_NEI": 0.667,
    "caveat": "FEVER NOT-ENOUGH-INFO reflects insufficient 2018 Wikipedia evidence; a live-web verifier resolves many of those correctly, so 3-class accuracy understates real performance. FEVER has ~10-20% label noise."
  },
  "abstention": {
    "datasets": "FalseQA (false-premise questions) + ARC (answerable controls)",
    "n": 100,
    "precision": 1.0,
    "false_premise_recall_fair_judge": 0.76,
    "false_premise_recall_strict_judge": 0.40,
    "note": "Precision 1.0: never over-refused a valid question (0/50). The strict-judge 0.40 was a measurement flaw (it miscounted premise-corrections as plain answers); a fair judge that credits premise-rejection gives 0.76. Grounding did not change false-premise detection."
  },
  "injection_defense": {
    "service": "Sentinel",
    "dataset": "deepset/prompt-injections (203 injections + 203 legitimate)",
    "injection_recall": 0.916,
    "false_positive_rate": 0.0,
    "note": "Catches instruction-override, task-switching, persona override, grounding-override, jailbreaks, and multilingual injections; never flagged a legitimate input across 203 controls. (Strengthened from 0.75 recall this run, with false-positive rate held at 0.)"
  },
  "method": "Balanced subsets (~100 items per test), fixed random seed. Claim verification scored by verdict match to gold labels (headline on verifiable true/false classes). Abstention scored by a model judge; both strict and corrected judges published. The official AbstentionBench HuggingFace harness no longer loads under current tooling, so abstention uses its public source datasets directly.",
  "contact": "veritylayer@gmail.com"
}
