Source code for amorphgen.analysis.validate

"""Validate computed structural metrics against literature reference ranges.

A reference YAML lists expected ranges for density, bond distances, mean
coordination numbers, and bond angle means. Each metric is compared to the
analyser's computed value and labelled "match" / "concern" / "fail" so the
user can defend an ensemble against published data.
"""

from __future__ import annotations


def _verdict(value, low, high, tol_frac=0.05):
    """Classify a value against an expected [low, high] range.

    Returns "match" if inside the range; "concern" if within tol_frac of the
    nearer bound; otherwise "fail".
    """
    if value is None:
        return "n/a"
    if low <= value <= high:
        return "match"
    width = max(abs(high - low), 1e-9)
    margin = max(low - value, value - high)
    if margin <= tol_frac * max(abs(low), abs(high), width):
        return "concern"
    return "fail"


[docs] def validate_against_reference(analyser, reference): """Compare analyser output to a reference dict (loaded from YAML). Parameters ---------- analyser : StructureAnalyser reference : dict Parsed YAML with optional keys: ``density``, ``bond_distances``, ``coordination``, ``bond_angles`` (see examples/reference_*.yaml). Returns ------- dict ``{"system": str, "sources": list[str], "rows": list[tuple]}`` where each row is (descriptor, computed, expected_lo, expected_hi, units, verdict). """ rows = [] if "density" in reference: d = analyser.density() lo, hi = reference["density"]["expected"] rows.append(("Density", d["mean"], lo, hi, reference["density"].get("units", "g/cm³"), _verdict(d["mean"], lo, hi))) bd = analyser.bond_distances() if "bond_distances" in reference else {} for pair, spec in reference.get("bond_distances", {}).items(): if pair not in bd: continue lo, hi = spec["expected"] v = bd[pair]["mean"] rows.append((f"Bond {pair}", v, lo, hi, spec.get("units", "Å"), _verdict(v, lo, hi))) cn = analyser.coordination() if "coordination" in reference else {} for pair, spec in reference.get("coordination", {}).items(): if pair not in cn: continue lo, hi = spec["mean_expected"] v = cn[pair]["mean"] rows.append((f"CN {pair}", v, lo, hi, "", _verdict(v, lo, hi))) ba = analyser.bond_angles() if "bond_angles" in reference else {} for triplet, spec in reference.get("bond_angles", {}).items(): if triplet not in ba: continue lo, hi = spec["expected"] v = ba[triplet]["mean"] rows.append((f"Angle {triplet}", v, lo, hi, spec.get("units", "°"), _verdict(v, lo, hi))) return { "system": reference.get("system", "(unspecified)"), "sources": reference.get("references", []), "rows": rows, }
[docs] def format_validation_report(result): """Render the dict from validate_against_reference() as a printable table.""" rows = result["rows"] if not rows: return "No validation rows produced (check reference YAML)." bar = "=" * 78 lines = [f"\n{bar}", f" Validation: {result['system']}", bar] if result["sources"]: lines.append(" Reference sources:") for s in result["sources"]: lines.append(f" - {s}") lines.append("") lines.append(f" {'Descriptor':<22}{'Computed':>10} " f"{'Expected':>14} {'Units':<6} Verdict") lines.append(" " + "-" * 72) for descriptor, value, lo, hi, units, verdict in rows: if value is None: cval = "n/a" elif abs(value) >= 100: cval = f"{value:>10.1f}" else: cval = f"{value:>10.3f}" expected = f"[{lo:.2f}, {hi:.2f}]" lines.append(f" {descriptor:<22}{cval:>10} " f"{expected:>14} {units:<6} {verdict}") lines.append(" " + "-" * 72) n_match = sum(1 for r in rows if r[5] == "match") n_concern = sum(1 for r in rows if r[5] == "concern") n_fail = sum(1 for r in rows if r[5] == "fail") lines.append(f" Summary: {n_match} match, {n_concern} concern, " f"{n_fail} fail (out of {len(rows)} metrics)") lines.append(bar) return "\n".join(lines)