Source code for amorphgen.utils.radii

"""
amorphgen.utils.radii
----------------------
Atomic radii data, bonding classification, minsep calculation,
and density estimation for random structure generation.

This module contains:
- Shannon ionic radii (CN=4 and CN=6)
- Metallic radii (Goldschmidt CN=12)
- Pauling electronegativities
- Element classification (nonmetal, metalloid)
- Elemental solid densities
- Bond type classification
- Minsep calculation from radii
- Cell volume / density estimation
"""

from __future__ import annotations

import logging
import numpy as np
from ase.data import covalent_radii, atomic_numbers, atomic_masses

logger = logging.getLogger(__name__)


# ==============================================================================
# Radii data tables
# ==============================================================================

# Shannon effective ionic radii (A).
# Keyed as {symbol: {oxidation_state: {CN: radius}}}.
# CN=6 is the default; CN=4 used when target_cn is specified.
# Source: Shannon, R.D. Acta Cryst. A32, 751-767 (1976).
SHANNON_IONIC_RADII = {
    # Cations
    "Li": {1: {4: 0.59, 6: 0.76}},
    "Na": {1: {4: 0.99, 6: 1.02}},
    "K":  {1: {6: 1.38}},
    "Rb": {1: {6: 1.52}},
    "Cs": {1: {6: 1.67}},
    "Mg": {2: {4: 0.57, 6: 0.72}},
    "Ca": {2: {6: 1.00}},
    "Sr": {2: {6: 1.18}},
    "Ba": {2: {6: 1.35}},
    "B":  {3: {4: 0.11, 6: 0.27}},
    "Al": {3: {4: 0.39, 6: 0.535}},
    "Ga": {3: {4: 0.47, 6: 0.620}},
    "In": {3: {4: 0.62, 6: 0.800}},
    "Tl": {3: {6: 0.885}},
    "Si": {4: {4: 0.26, 6: 0.400}},
    "Ge": {4: {4: 0.39, 6: 0.530}},
    "Sn": {2: {6: 0.930}, 4: {4: 0.55, 6: 0.690}},
    "Pb": {2: {6: 1.190}, 4: {6: 0.775}},
    "Ti": {3: {6: 0.670}, 4: {4: 0.42, 6: 0.605}},
    "Zr": {4: {4: 0.59, 6: 0.720}},
    "Hf": {4: {4: 0.58, 6: 0.710}},
    "V":  {3: {6: 0.640}, 5: {4: 0.355, 6: 0.540}},
    "Nb": {5: {4: 0.48, 6: 0.640}},
    "Ta": {5: {6: 0.640}},
    "Cr": {3: {6: 0.615}, 6: {4: 0.26, 6: 0.440}},
    "Mo": {4: {6: 0.650}, 6: {4: 0.41, 6: 0.590}},
    "W":  {4: {6: 0.660}, 6: {4: 0.42, 6: 0.600}},
    "Mn": {2: {4: 0.66, 6: 0.830}, 3: {6: 0.645}, 4: {4: 0.39, 6: 0.530}},
    "Fe": {2: {4: 0.63, 6: 0.780}, 3: {4: 0.49, 6: 0.645}},
    "Co": {2: {4: 0.58, 6: 0.745}, 3: {6: 0.610}},
    "Ni": {2: {4: 0.55, 6: 0.690}},
    "Cu": {1: {4: 0.60, 6: 0.770}, 2: {4: 0.57, 6: 0.730}},
    "Zn": {2: {4: 0.60, 6: 0.740}},
    "Cd": {2: {4: 0.78, 6: 0.950}},
    "Y":  {3: {6: 0.900}},
    "La": {3: {6: 1.032}},
    "Ce": {3: {6: 1.010}, 4: {6: 0.870}},
    "Bi": {3: {6: 1.030}},
    "Sc": {3: {6: 0.745}},
    # Metalloid in high oxidation state oxides (e.g. As2O5).
    # As(3+) is not tabulated by Shannon and Sb(III/V) Shannon radii
    # are too small (lone-pair distortion), giving unphysical sphere
    # volumes; for Sb compounds AmorphGen falls back to the Cordero
    # covalent radius which matches experimental Sb2O3 density better.
    "As": {5: {4: 0.335, 6: 0.460}},
    # Anions (CN-independent)
    "O":  {-2: {6: 1.400}},
    "S":  {-2: {6: 1.840}},
    "Se": {-2: {6: 1.980}},
    "Te": {-2: {6: 2.210}},
    "F":  {-1: {6: 1.330}},
    "Cl": {-1: {6: 1.810}},
    "Br": {-1: {6: 1.960}},
    "I":  {-1: {6: 2.200}},
    "N":  {-3: {6: 1.460}},
    "H":  {-1: {6: 1.400}},
    "C":  {-4: {6: 1.400}},  # Approximate; Shannon does not list C4-
    "P":  {-3: {6: 2.120}, 5: {4: 0.17, 6: 0.380}},
}

# Metallic radii (A, CN=12 Goldschmidt radii).
# Source: Greenwood & Earnshaw, Chemistry of the Elements (1997).
METALLIC_RADII = {
    "Li": 1.52, "Na": 1.86, "K": 2.27, "Rb": 2.48, "Cs": 2.65,
    "Mg": 1.60, "Ca": 1.97, "Sr": 2.15, "Ba": 2.17,
    "Al": 1.43, "Ga": 1.35, "In": 1.67,
    "Tl": 1.70, "Sn": 1.58, "Pb": 1.75, "Ti": 1.47, "Zr": 1.60,
    "Hf": 1.59, "V": 1.34, "Nb": 1.46, "Ta": 1.46, "Cr": 1.28,
    "Mo": 1.39, "W": 1.39, "Mn": 1.27, "Fe": 1.26, "Co": 1.25,
    "Ni": 1.24, "Cu": 1.28, "Zn": 1.37, "Cd": 1.52, "Y": 1.80,
    "La": 1.87, "Ce": 1.83, "Sc": 1.64, "Bi": 1.56, "Ge": 1.37,
    "Si": 1.17,
    # Metalloids (rhombohedral / metallic phase radii)
    "As": 1.39, "Sb": 1.59,
}

# Pauling electronegativities for bonding classification.
# Values are the Allred (1961) revised scale, which retains Pauling's
# original methodology but uses updated thermochemical bond-dissociation
# data.  These are the values that appear in modern chemistry textbooks
# and the CRC Handbook.
#   Allred, A. L.  "Electronegativity values from thermochemical data."
#                  J. Inorg. Nucl. Chem. 17, 215-221 (1961).
#   Originally:  Pauling, L.  The Nature of the Chemical Bond, 3rd ed.,
#                Cornell Univ. Press (1960).
PAULING_EN = {
    "H": 2.20, "Li": 0.98, "Be": 1.57, "B": 2.04, "C": 2.55,
    "N": 3.04, "O": 3.44, "F": 3.98, "Na": 0.93, "Mg": 1.31,
    "Al": 1.61, "Si": 1.90, "P": 2.19, "S": 2.58, "Cl": 3.16,
    "K": 0.82, "Ca": 1.00, "Sc": 1.36, "Ti": 1.54, "V": 1.63,
    "Cr": 1.66, "Mn": 1.55, "Fe": 1.83, "Co": 1.88, "Ni": 1.91,
    "Cu": 1.90, "Zn": 1.65, "Ga": 1.81, "Ge": 2.01, "As": 2.18,
    "Se": 2.55, "Br": 2.96, "Rb": 0.82, "Sr": 0.95, "Y": 1.22,
    "Zr": 1.33, "Nb": 1.60, "Mo": 2.16, "Cd": 1.69, "In": 1.78,
    "Sn": 1.96, "Sb": 2.05, "Te": 2.10, "I": 2.66, "Cs": 0.79,
    "Ba": 0.89, "La": 1.10, "Ce": 1.12, "Hf": 1.30, "Ta": 1.50,
    "W": 2.36, "Pb": 2.33, "Bi": 2.02, "Tl": 1.62,
}

# Conventional anion charges, used to infer the cation oxidation state
# from a compound's stoichiometry by charge balance (e.g. Ti=+4 in
# TiO_2, Sb=+5 in Sb_2O_5). Limited to clear-cut anion-formers; complex
# / mixed-anion / non-stoichiometric systems fall through to the
# default "highest positive Shannon state" fallback.
ANION_CHARGES = {
    "O":  -2,
    "S":  -2,
    "Se": -2,
    "Te": -2,
    "F":  -1,
    "Cl": -1,
    "Br": -1,
    "I":  -1,
    "N":  -3,
    "H":  -1,  # treated as hydride; only valid for binary hydrides
}



[docs]
def infer_oxidation_state(sym: str, composition: dict) -> int | None:
    """Infer the oxidation state of *sym* in *composition* by charge balance.

    Sums anion charge from :data:`ANION_CHARGES` and assigns the
    remaining positive charge equally across cations of the queried
    element when only one cation type is present, or for any element
    that appears as the only non-anion type. Returns ``None`` when the
    inference is ambiguous (multiple cation species, non-integer
    oxidation state, or no anions at all), letting the caller fall back
    to a default Shannon entry.
    """
    if sym in ANION_CHARGES or sym in NONMETALS:
        return None  # not a cation we resolve here

    total_neg = 0
    for s, n in composition.items():
        q = ANION_CHARGES.get(s)
        if q is not None and s != sym:
            total_neg += n * q
    if total_neg == 0:
        return None  # no anions -> can't balance

    # Identify cations (everything that isn't a known anion-former).
    cations = {s: n for s, n in composition.items() if s not in ANION_CHARGES}
    if sym not in cations:
        return None

    # Single-cation case: balance directly.
    if len(cations) == 1:
        n_cat = cations[sym]
        ox = -total_neg / n_cat
        if abs(ox - round(ox)) < 1e-6 and ox > 0:
            return int(round(ox))
        return None

    # Multi-cation case: try to assign a *unique* state for *sym* by
    # subtracting fixed-state contributions from other cations whose
    # Shannon table has only a single positive oxidation state.
    fixed_pos = 0
    unresolved = []
    for s, n in cations.items():
        states = SHANNON_IONIC_RADII.get(s, {})
        positive = [k for k in states if k > 0]
        if len(positive) == 1 and s != sym:
            fixed_pos += n * positive[0]
        elif s != sym:
            unresolved.append(s)

    if unresolved:
        return None  # too ambiguous; let highest-positive fallback handle it

    n_cat = cations[sym]
    ox = (-total_neg - fixed_pos) / n_cat
    if abs(ox - round(ox)) < 1e-6 and ox > 0:
        return int(round(ox))
    return None



# ==============================================================================
# Element classification
# ==============================================================================

NONMETALS = frozenset({
    "H", "He", "C", "N", "O", "F", "Ne", "P", "S", "Cl", "Ar",
    "Br", "Kr", "I", "Xe", "At", "Rn",
})

# Source: IUPAC recognises B, Si, Ge, As, Sb, Te as metalloids.
# Sn is included as it adopts tetrahedral bonding (alpha-Sn, SnO2).
METALLOIDS = frozenset({"B", "Si", "Ge", "Sn", "As", "Sb", "Te", "Se"})

# Typical coordination numbers for coordination-aware placement.
# Metalloids are always CN=4. For metals, CN depends on the anion context:
# oxides → CN=4-6, nitrides → CN=4, halides/sulfides → CN=6.
_ALWAYS_TETRAHEDRAL = frozenset({
    "Si", "Ge", "B", "P", "As",      # metalloids — always CN=4
    "Be",                              # tetrahedral in BeF2, BeO (like Si in SiO2)
    # Group-12 post-transition metals form zincblende-structured II-VI
    # semiconductors with chalcogens (ZnS, ZnSe, ZnTe, CdS, CdSe, CdTe,
    # HgS, HgSe, HgTe — all CN=4 in their stable phases).  Without this,
    # the chalcogenide branch of auto_target_cn defaults them to octahedral
    # CN=6, which is wrong for these specific compounds.
    "Zn", "Cd", "Hg",
})

# d-block transition metals that form *interstitial* carbides (rocksalt or
# close-related dense structures: TiC, ZrC, HfC, VC, NbC, TaC, CrC, MoC,
# WC, ...).  These compounds want metallic + Cordero radii and a higher
# packing factor (≈ 0.60) than covalent carbides (SiC, B4C, P4C3, ...).
# Pauling Δχ does NOT cleanly separate these from covalent carbides
# because W–C (Δχ = 0.19) and Mo–C (Δχ = 0.39) are interstitial but have
# small Δχ — the discriminator has to be cation group, not electronegativity.
# Group-11 (Cu, Ag, Au) and Group-12 (Zn, Cd, Hg) do not form stable
# carbides and are excluded.
_TRANSITION_METAL_CARBIDE_CATIONS = frozenset({
    "Sc", "Ti", "V", "Cr", "Mn", "Fe", "Co", "Ni",         # 3d
    "Y",  "Zr", "Nb", "Mo", "Tc", "Ru", "Rh", "Pd",         # 4d
    "Hf", "Ta", "W",  "Re", "Os", "Ir", "Pt",               # 5d
})



[docs]
def auto_target_cn(composition: dict) -> tuple[dict | None, int]:
    """
    Auto-detect target coordination numbers and tolerance from composition.

    Returns (target_cn, cn_tolerance) tuple.

    Rules:
    - Metalloids (Si, Ge, B): CN=4, tolerance=0 (strict tetrahedral)
    - Pure Si/Ge: CN=4, tolerance=0
    - Metals in nitrides: CN=4, tolerance=0 (wurtzite tetrahedral)
    - Metals in halides/sulfides: CN=6, tolerance=0 (strict octahedral)
    - Metals in oxides: CN=5, tolerance=1 (flexible 4-6 range)

    Parameters
    ----------
    composition : dict
        e.g. {"In": 32, "O": 48}

    Returns
    -------
    (target_cn, cn_tolerance) : tuple
        target_cn: dict or None
        cn_tolerance: int (0=strict, 1=flexible)
    """
    elems = set(composition.keys())
    anions = elems & NONMETALS
    cations = elems - anions

    # Check compound class first (handles semiconductors, chalcogenides)
    cls = _classify_compound(composition)

    if cls == "group_iv":
        # Pure group IV (Si, Ge, SiGe) — tetrahedral CN=4
        target_cn = {s: 4 for s in elems}
        return target_cn, 0

    if cls == "pnictide":
        # III-V compounds (GaAs, InP, InAs) — tetrahedral CN=4
        target_cn = {s: 4 for s in elems}
        return target_cn, 0

    if cls == "chalcogenide":
        # Chalcogenides (ZnS, Sb2Te3, CdTe, GeTe)
        chalcogens = elems & {"S", "Se", "Te"}
        target_cn = {}
        for s in elems:
            if s not in chalcogens:
                if s in _ALWAYS_TETRAHEDRAL or s in METALLOIDS:
                    target_cn[s] = 4
                else:
                    target_cn[s] = 6
        return (target_cn if target_cn else None), 0

    if cls in {"covalent_carbide", "transition_metal_carbide"}:
        # Covalent carbides (SiC, B4C) — metalloid CN=4
        # Transition-metal carbides (TiC, WC, ZrC) — transition-metal CN=6 (rocksalt)
        target_cn = {}
        for s in elems:
            if s != "C":
                if s in _ALWAYS_TETRAHEDRAL or s in METALLOIDS:
                    target_cn[s] = 4
                else:
                    target_cn[s] = 6
        return (target_cn if target_cn else None), 0

    if cls == "hydride":
        # Hydrides (LiH, MgH2) — metal CN=6
        target_cn = {}
        for s in elems:
            if s != "H":
                target_cn[s] = 6
        return (target_cn if target_cn else None), 0

    if cls == "boride":
        # Borides (TiB2, LaB6) — metal CN=6
        target_cn = {}
        for s in elems:
            if s != "B":
                target_cn[s] = 6
        return (target_cn if target_cn else None), 0

    if cls == "alloy":
        # Metal alloys / intermetallics — typical CN=8-12
        # Use CN=8 (BCC-like) with tolerance=2 for flexibility
        target_cn = {s: 8 for s in elems}
        return target_cn, 2

    if not anions:
        # Pure metals — typical CN=12 (FCC/HCP) or 8 (BCC)
        target_cn = {s: 8 for s in elems}
        return target_cn, 2

    # Determine anion context
    chalcogens = elems & {"S", "Se", "Te"}
    pnictogens = elems & {"P", "As", "Sb"}
    has_nitrogen = "N" in anions
    has_oxide = "O" in anions
    has_halide = bool(anions & {"Cl", "Br", "I", "F"})
    has_chalcogen = bool(chalcogens)
    has_halide_sulfide = has_halide or has_chalcogen

    target_cn = {}

    if has_oxide and not has_nitrogen and not has_halide_sulfide:
        # Pure oxides: metals CN=5 (flexible 4-6), metalloids CN=4 (strict)
        has_metal_cation = any(s not in _ALWAYS_TETRAHEDRAL and s not in METALLOIDS for s in cations)
        for s in cations:
            if s in _ALWAYS_TETRAHEDRAL or s in METALLOIDS:
                target_cn[s] = 4
            else:
                target_cn[s] = 5
        # tolerance=1 only if metal cations present (flexible 4-6)
        # pure metalloid oxides (SiO2, GeO2, B2O3): tolerance=0 (strict CN=4)
        return target_cn, 1 if has_metal_cation else 0

    elif has_nitrogen and not has_oxide:
        # Nitrides: all CN=4 strict
        for s in cations:
            target_cn[s] = 4
        return target_cn, 0

    elif has_halide and not has_oxide:
        # Halides: CN=6 strict, metalloids CN=4
        for s in cations:
            if s in _ALWAYS_TETRAHEDRAL or s in METALLOIDS:
                target_cn[s] = 4
            else:
                target_cn[s] = 6
        return target_cn, 0

    # Chalcogenides already handled above via cls == "chalcogenide"

    else:
        # Mixed or other: metals CN=5 flexible, metalloids CN=4
        for s in cations:
            if s in _ALWAYS_TETRAHEDRAL or s in METALLOIDS:
                target_cn[s] = 4
            else:
                target_cn[s] = 5
        return target_cn, 1


# Elemental solid densities (kg/m3).
# Source: CRC Handbook of Chemistry and Physics, 104th edition (2023).
# Gaseous elements (O, N, Cl, etc.) are excluded.
# (ASE's ase.data.covalent_radii — used elsewhere in this module — is
# sourced from Cordero et al. 2008 for elements 1-96.)
ELEMENTAL_DENSITIES = {
    # Solid nonmetals / metalloids (kg/m3)
    "B": 2340, "C": 2267, "P": 1823, "S": 2070,
    "Se": 4809, "Te": 6240, "I": 4930,
    # Metals
    "Li": 534, "Be": 1850, "Na": 971, "Mg": 1738, "Al": 2700,
    "Si": 2330, "K": 862, "Ca": 1550, "Sc": 2985, "Ti": 4507,
    "V": 6110, "Cr": 7190, "Mn": 7440, "Fe": 7874, "Co": 8900,
    "Ni": 8908, "Cu": 8960, "Zn": 7134, "Ga": 5904, "Ge": 5323,
    "As": 5727, "Sr": 2640, "Y": 4472, "Zr": 6506,
    "Nb": 8570, "Mo": 10220, "Cd": 8650, "In": 7310, "Sn": 7287,
    "Sb": 6685, "Ba": 3510, "La": 6145, "Ce": 6770,
    "Hf": 13310, "Ta": 16654, "W": 19250, "Pb": 11340, "Bi": 9807,
    "Tl": 11850,
}


# ==============================================================================
# Scale factors
# ==============================================================================

# Applied to sum of radii to give minimum interatomic distance.
# Scientific basis:
#   - Shannon (1976): ionic radii from ~900 oxide/fluoride crystal structures
#   - AIRSS buildcell: uses 80-90% of equilibrium distances
SCALE_FACTORS = {
    "ionic":              0.80,  # M-X bonds (shorter -> better coordination)
    "covalent":           0.80,  # Directional bonds
    "metallic":           0.85,  # M-M (keep higher to prevent M-M clustering)
    "anion_packing":      0.80,  # Small anion packing (O-O, F-F, N-N)
    "anion_packing_large": 0.70,  # Large anion packing (Cl-Cl, Br-Br, I-I, S-S)
}

# Shannon ionic radius threshold for small vs large anions.
_LARGE_ANION_RADIUS_THRESHOLD = 1.5

# Maximum minsep caps (A). Prevents overly large distances
# that make random placement impossible.
_MAX_SAME_ELEMENT_MINSEP = 2.80   # M-M, X-X same-element
_MAX_IONIC_MINSEP = 3.00          # M-X ionic bonds
_MAX_ANION_MINSEP = 3.00          # X-X anion packing


# ==============================================================================
# Bonding classification
# ==============================================================================


[docs]
def classify_bond(sym_a: str, sym_b: str) -> str:
    """
    Classify a pair of elements by bonding type.

    Uses an element-type classification rule (nonmetal / metalloid / metal
    membership) with a Pauling-electronegativity refinement: pairs that
    the type rules would call "ionic" but whose Pauling electronegativity
    difference is small (Δχ < 1.0) are reclassified as "covalent".
    This catches III–V semiconductors (GaAs Δχ=0.37, AlAs ~0.6) and related
    compounds where the chemistry is predominantly covalent and Shannon
    ionic radii give unrealistically small inter-atomic distances.

    Returns one of: "ionic", "covalent", "metallic".
    """
    a_is_nonmetal = sym_a in NONMETALS
    b_is_nonmetal = sym_b in NONMETALS
    a_is_metalloid = sym_a in METALLOIDS
    b_is_metalloid = sym_b in METALLOIDS

    # Helper: small Δχ ⇒ predominantly covalent regardless of type rules.
    def _is_covalent_by_en():
        en_a = PAULING_EN.get(sym_a)
        en_b = PAULING_EN.get(sym_b)
        if en_a is None or en_b is None:
            return False
        return abs(en_a - en_b) < 1.0

    if a_is_metalloid and b_is_metalloid:
        return "covalent"
    if (a_is_metalloid and b_is_nonmetal) or (b_is_metalloid and a_is_nonmetal):
        # Was unconditionally "ionic" — but BN, BC etc. are covalent.
        return "covalent" if _is_covalent_by_en() else "ionic"
    if a_is_metalloid or b_is_metalloid:
        # Metal + metalloid (e.g.\ Ga + As).  Was unconditionally "ionic" —
        # but III-V semiconductors are predominantly covalent.
        return "covalent" if _is_covalent_by_en() else "ionic"
    if a_is_nonmetal != b_is_nonmetal:
        # Metal + nonmetal (e.g.\ Ga + N).  Was unconditionally "ionic" —
        # check Δχ for borderline cases (GaN Δχ=1.23 stays ionic; carbides
        # of low-EN metals like Ti-C Δχ=1.0 may flip).
        return "covalent" if _is_covalent_by_en() else "ionic"
    if a_is_nonmetal and b_is_nonmetal:
        return "covalent"
    return "metallic"



# ==============================================================================
# Radius lookup functions
# ==============================================================================


[docs]
def get_ionic_radius(sym: str, cn: int | None = None,
                     oxidation_state: int | None = None) -> float | None:
    """
    Get Shannon ionic radius for an element.

    Parameters
    ----------
    sym : str
        Element symbol.
    cn : int, optional
        Coordination number. If provided, use CN-specific radius.
        Defaults to CN=6.
    oxidation_state : int, optional
        Specific oxidation state to look up (e.g.\\ ``5`` for Sb in
        Sb_2O_5). If supplied and present in the Shannon table the
        matching radius is returned. If supplied but absent, falls
        through to the default selection.  When ``None`` (default),
        anions return their most-negative state and cations return
        their **highest** positive state - usually the right choice for
        binary oxides / halides / nitrides where the higher state
        dominates (TiO_2, V_2O_5, WO_3 etc.).

    Returns
    -------
    float or None
    """
    if sym not in SHANNON_IONIC_RADII:
        return None
    states = SHANNON_IONIC_RADII[sym]

    if oxidation_state is not None and oxidation_state in states:
        ox = oxidation_state
    elif oxidation_state is not None and oxidation_state > 0:
        # Requested cation state is missing from Shannon; pick the
        # closest available positive state (preferring same-or-higher).
        # This handles cases like ZrN, where charge balance suggests
        # Zr(III) but Shannon only lists Zr(IV); using Zr(IV) is closer
        # than dropping all the way to a covalent fallback.
        positive = sorted(k for k in states.keys() if k > 0)
        if not positive:
            return None
        ox = min(positive, key=lambda k: (abs(k - oxidation_state), -k))
    elif sym in NONMETALS:
        ox = min(states.keys())  # most-negative anion state
    else:
        positive = {k: v for k, v in states.items() if k > 0}
        if not positive:
            return None
        ox = max(positive.keys())  # highest cation state when unspecified

    cn_dict = states[ox]
    target_cn = cn if cn is not None else 6

    if target_cn in cn_dict:
        return cn_dict[target_cn]
    elif 6 in cn_dict:
        return cn_dict[6]
    else:
        return next(iter(cn_dict.values()))




[docs]
def get_metallic_radius(sym: str) -> float | None:
    """Get metallic radius for an element. Returns None if unavailable."""
    return METALLIC_RADII.get(sym)




[docs]
def get_effective_radius(sym: str) -> float:
    """
    Get the most appropriate radius for volume estimation.

    - Metals -> metallic radius
    - Anion-formers (O, S, Cl, ...) -> Shannon ionic radius
    - Metalloids (Si, Ge, ...) -> metallic radius if available
    - Fallback -> covalent radius
    """
    if sym not in NONMETALS:
        r = get_metallic_radius(sym)
        if r is not None:
            return r
    if sym in NONMETALS and sym not in {"He", "Ne", "Ar", "Kr", "Xe", "Rn"}:
        r = get_ionic_radius(sym)
        if r is not None:
            return r
    return covalent_radii[atomic_numbers[sym]]



# ==============================================================================
# Minsep calculation
# ==============================================================================


[docs]
def default_minsep(symbols: list[str], scale: float = 0.85,
                   target_cn: dict | None = None) -> dict:
    """
    Build a minsep dict for all element pairs.

    Uses bonding-type-aware radii. When target_cn is provided, uses
    CN-specific Shannon radii (e.g. CN=4 for Si in SiO2).

    Parameters
    ----------
    symbols : list of str
    scale : float
        Fallback scale factor (default 0.85).
    target_cn : dict, optional
        e.g. {"Si": 4, "O": 2}. Uses CN-specific radii when available.

    Returns
    -------
    dict
        Minimum separations keyed as "A-B" with A <= B alphabetically.
    """
    unique = sorted(set(symbols))
    minsep = {}
    cn_map = target_cn or {}

    def _cn_radius(sym):
        cn = cn_map.get(sym)
        return get_ionic_radius(sym, cn=cn)

    for i, s1 in enumerate(unique):
        for s2 in unique[i:]:
            key = f"{s1}-{s2}" if s1 <= s2 else f"{s2}-{s1}"

            bond_type = classify_bond(s1, s2)
            sf = SCALE_FACTORS.get(bond_type, scale)

            if bond_type == "ionic":
                r1 = _cn_radius(s1)
                r2 = _cn_radius(s2)
                if r1 is not None and r2 is not None:
                    d_ionic = min((r1 + r2) * sf, _MAX_IONIC_MINSEP)
                    minsep[key] = d_ionic
                    logger.info(
                        "  minsep %s = %.2f A  (ionic: Shannon %.3f + %.3f, "
                        "scale=%.2f)", key, minsep[key], r1, r2, sf
                    )
                    continue

            elif bond_type == "metallic":
                r1 = get_metallic_radius(s1)
                r2 = get_metallic_radius(s2)
                if r1 is not None and r2 is not None:
                    d_metallic = (r1 + r2) * sf

                    has_anion = any(s in NONMETALS for s in unique)
                    if has_anion:
                        ri1 = _cn_radius(s1)
                        ri2 = _cn_radius(s2)
                        anion_syms = [s for s in unique if s in NONMETALS]
                        r_anion = max(
                            (_cn_radius(a) or 0.0) for a in anion_syms
                        ) if anion_syms else None

                        # Geometric estimate for edge-sharing polyhedra:
                        # d(M-M) = sqrt(2) * d(M-X) * sf
                        # Uses the same scale factor as ionic/metallic (0.85).
                        if ri1 is not None and r_anion and r_anion > 0:
                            d_geom_1 = 2**0.5 * (ri1 + r_anion) * sf
                        else:
                            d_geom_1 = 0.0

                        if ri2 is not None and r_anion and r_anion > 0:
                            d_geom_2 = 2**0.5 * (ri2 + r_anion) * sf
                        else:
                            d_geom_2 = 0.0

                        d_geometric = (d_geom_1 + d_geom_2) / 2 if (
                            d_geom_1 > 0 and d_geom_2 > 0
                        ) else max(d_geom_1, d_geom_2)

                        d_mm = min(max(d_metallic, d_geometric),
                                   _MAX_SAME_ELEMENT_MINSEP)
                        minsep[key] = d_mm
                        source = "metallic" if d_metallic >= d_geometric else "geometric"
                        capped = " (capped)" if d_mm < max(d_metallic, d_geometric) else ""
                        logger.info(
                            "  minsep %s = %.2f A  (M-M ionic context, %s: "
                            "met=%.2f, geom=%.2f, scale=%.2f)%s",
                            key, minsep[key], source,
                            d_metallic, d_geometric, sf, capped
                        )
                    else:
                        minsep[key] = min(d_metallic, _MAX_SAME_ELEMENT_MINSEP)
                        logger.info(
                            "  minsep %s = %.2f A  (metallic: %.3f + %.3f, "
                            "scale=%.2f)", key, minsep[key], r1, r2, sf
                        )
                    continue

            elif bond_type == "covalent":
                a_is_metalloid = s1 in METALLOIDS
                b_is_metalloid = s2 in METALLOIDS

                if a_is_metalloid or b_is_metalloid:
                    # Use metallic radii if available (better for pure elements),
                    # fall back to covalent radii
                    r1 = get_metallic_radius(s1) or covalent_radii[atomic_numbers[s1]]
                    r2 = get_metallic_radius(s2) or covalent_radii[atomic_numbers[s2]]
                    d_covalent = (r1 + r2) * sf

                    # In anion context (oxide, nitride, etc.), metalloid-metalloid
                    # distances are much larger due to bridging anions (Si-O-Si).
                    # Use geometric estimate: sqrt(2) * d(X-anion)
                    has_anion = any(s in NONMETALS for s in unique)
                    if has_anion and s1 == s2:
                        ri = _cn_radius(s1)
                        anion_syms = [s for s in unique if s in NONMETALS]
                        r_anion = max(
                            (_cn_radius(a) or 0.0) for a in anion_syms
                        ) if anion_syms else None

                        if ri is not None and r_anion and r_anion > 0:
                            d_geometric = 2**0.5 * (ri + r_anion) * sf
                            d_xx = min(max(d_covalent, d_geometric),
                                       _MAX_SAME_ELEMENT_MINSEP)
                            minsep[key] = d_xx
                            source = "covalent" if d_covalent >= d_geometric else "geometric"
                            capped = " (capped)" if d_xx < max(d_covalent, d_geometric) else ""
                            logger.info(
                                "  minsep %s = %.2f A  (metalloid anion context, %s: "
                                "cov=%.2f, geom=%.2f, scale=%.2f)%s",
                                key, minsep[key], source,
                                d_covalent, d_geometric, sf, capped
                            )
                            continue

                    minsep[key] = d_covalent
                    logger.info(
                        "  minsep %s = %.2f A  (covalent/metalloid: "
                        "%.3f + %.3f, scale=%.2f)",
                        key, minsep[key], r1, r2, sf
                    )
                    continue

                # Pure single-element nonmetal composition (e.g.\ pure C,
                # pure S, pure P). This is a genuine covalent homo-element
                # bond, not an anion-anion contact in a compound, so use
                # Cordero covalent radii rather than the much larger
                # Shannon anion radius.  Heteroelement nonmetal pairs and
                # same-element pairs in compounds (O-O in TiO_2, Cl-Cl in
                # LiCl, ...) still go through anion-packing below.
                if s1 == s2 and len(unique) == 1:
                    r = covalent_radii[atomic_numbers[s1]]
                    d_cov_homo = 2 * r * sf
                    minsep[key] = min(d_cov_homo, _MAX_SAME_ELEMENT_MINSEP)
                    logger.info(
                        "  minsep %s = %.2f A  (covalent homo-element: "
                        "Cordero %.3f, scale=%.2f)",
                        key, minsep[key], r, sf
                    )
                    continue

                r1 = _cn_radius(s1)
                r2 = _cn_radius(s2)
                if r1 is not None and r2 is not None:
                    avg_r = (r1 + r2) / 2
                    if avg_r >= _LARGE_ANION_RADIUS_THRESHOLD:
                        sf_anion = SCALE_FACTORS.get("anion_packing_large", 0.70)
                        label = "large anion"
                    else:
                        sf_anion = SCALE_FACTORS.get("anion_packing", 0.80)
                        label = "small anion"
                    minsep[key] = min((r1 + r2) * sf_anion, _MAX_ANION_MINSEP)
                    logger.info(
                        "  minsep %s = %.2f A  (%s packing: Shannon "
                        "%.3f + %.3f, scale=%.2f)",
                        key, minsep[key], label, r1, r2, sf_anion
                    )
                    continue

            # Fallback: covalent radii (with cap)
            r1 = covalent_radii[atomic_numbers[s1]]
            r2 = covalent_radii[atomic_numbers[s2]]
            d_fallback = (r1 + r2) * scale
            if s1 == s2:
                d_fallback = min(d_fallback, _MAX_SAME_ELEMENT_MINSEP)
            else:
                d_fallback = min(d_fallback, _MAX_IONIC_MINSEP)
            minsep[key] = d_fallback
            logger.info(
                "  minsep %s = %.2f A  (fallback: covalent %.3f + %.3f, "
                "scale=%.2f)", key, minsep[key], r1, r2, scale
            )

    return minsep



# ==============================================================================
# Density / cell volume estimation
# ==============================================================================


[docs]
def estimate_density(composition: dict, amorphous_factor: float = 0.80) -> float | None:
    """
    Estimate density from elemental solid densities.

    Returns density in g/cm3, or None if any element lacks density data.
    """
    total_mass = 0.0
    total_vol = 0.0
    for sym, count in composition.items():
        dens = ELEMENTAL_DENSITIES.get(sym)
        if dens is None or dens == 0:
            return None
        dens_gcm3 = dens / 1000.0
        mass = atomic_masses[atomic_numbers[sym]] * count
        total_mass += mass
        total_vol += mass / dens_gcm3

    if total_vol == 0:
        return None

    crystal_density = total_mass / total_vol
    return crystal_density * amorphous_factor



# Packing factors for density estimation by material class.
#
# The same formula V_cell = sum(sphere_vol) / f_pack is used for all
# classes, but the radius type underneath the spheres differs by
# bonding regime (see :func:`_radius_for_density`):
#   - Ionic (oxides, halides, nitrides, hydrides) -> Shannon ionic CN=6
#     - typical f_pack range 0.50-0.58
#   - Covalent (group-IV, pnictides, chalcogenides, borides, carbides)
#     -> Cordero covalent. Covalent radii reflect bond lengths, so
#     the sphere is much smaller than the actual atomic volume; f_pack
#     is correspondingly smaller (~0.20-0.40).
#   - Metallic (alloys) -> Goldschmidt metallic. Metallic radii are
#     close to atomic centres in close-packed metals; f_pack ~0.65-0.75.
#
# Values calibrated to give experimental amorphous densities for
# representative systems in each class.
PACKING_FACTORS = {
    # Ionic — Shannon ionic radii
    "covalent_oxide":  0.50,  # SiO2, GeO2, B2O3 (network formers)
    "metal_oxide":     0.52,  # In2O3, TiO2, Al2O3, Ga2O3
    "halide":          0.58,  # Li2ZrCl6, LiF
    "nitride":         0.52,  # AlN, GaN, Si3N4
    "hydride":         0.55,  # LiH, MgH2, NaAlH4
    # Covalent — Cordero covalent radii
    "group_iv":        0.30,  # Si, Ge, C (tetrahedral covalent network)
    "pnictide":        0.32,  # GaAs, InP, InAs (III-V compounds)
    "chalcogenide":    0.30,  # ZnS, CdTe, GeTe (II-VI / IV-VI)
    "boride":          0.50,  # TiB2, MgB2, ZrB2
    # Carbides split by cation chemistry (2026-05-11) — see
    # _TRANSITION_METAL_CARBIDE_CATIONS and _classify_compound for the routing.
    "covalent_carbide":     0.32,  # SiC, B4C — Cordero radii, open network
    "transition_metal_carbide": 0.60,  # TiC, WC, ZrC — rocksalt-like; uses
                                   # Goldschmidt metallic for cation,
                                   # Cordero for C (see _radius_for_density)
    # Metallic — Goldschmidt metallic radii.  Lowered from 0.70 to 0.60
    # (2026-05-10) so random sequential placement has enough headroom:
    # random close packing of equal hard spheres caps at ~0.64, and 0.70
    # left no room for the placement attempt budget — Cu/Au/Ti/Ni etc.
    # all failed single-call generate_random with auto density.  At 0.60
    # the cell is ~5% larger; predicted density for a-Cu is ~7.2 g/cm³
    # vs experimental amorphous ~7.7-8.5 (~10% low — within design-target
    # accuracy and easily overridden via target_density for production runs).
    "alloy":           0.60,  # NiTi, CuZr, brass, pure metals
    "default":         0.52,  # fallback (ionic)
}

# NOTE: ``AMORPHOUS_DENSITY_FACTORS`` and the associated elemental
# density-mixing branch were retired in favour of the unified
# class-aware sphere-packing model. ``estimate_density()`` (which
# implements the mixing rule) is kept as a public helper for users
# who want the mass-averaged crystal density directly.


def _classify_compound(composition: dict) -> str:
    """
    Classify a compound by material class for packing factor selection.

    Returns one of: "group_iv", "pnictide", "chalcogenide",
    "covalent_oxide", "metal_oxide", "halide", "nitride",
    "carbide", "hydride", "boride", "alloy", "default".
    """
    elems = set(composition.keys())
    has_metal = any(s not in NONMETALS and s not in METALLOIDS for s in elems)
    has_metalloid = any(s in METALLOIDS for s in elems)

    anions = elems & NONMETALS
    chalcogens = elems & {"S", "Se", "Te"}
    pnictogens = elems & {"P", "As", "Sb"}

    # Group IV: pure Si, Ge, SiGe (all metalloid, no chalcogen/pnictogen)
    all_metalloid = all(s in METALLOIDS for s in elems)
    if all_metalloid and not chalcogens and not pnictogens:
        return "group_iv"

    # Pnictides: III-V compounds (GaAs, InP, InAs, GaSb)
    # Must have at least one non-pnictogen element (the group III metal)
    # Pure pnictogens (Sb, As) are not III-V compounds
    # P is both NONMETAL and pnictogen — exclude from anion check here
    non_pnictogen = elems - pnictogens
    other_anions = anions - {"P", "As"}  # P/As can act as pnictogen anion
    if pnictogens and non_pnictogen and not other_anions and not chalcogens:
        return "pnictide"

    # Chalcogenides (S, Se, Te) — check before oxides
    if chalcogens and "O" not in anions:
        return "chalcogenide"

    # Oxides
    if "O" in anions:
        if has_metal:
            return "metal_oxide"
        elif has_metalloid:
            return "covalent_oxide"

    # Halides
    if anions & {"Cl", "Br", "I", "F"}:
        return "halide"

    # Nitrides
    if "N" in anions:
        return "nitride"

    # Carbides — split by cation chemistry
    #   Transition-metal carbides (TiC, WC, ZrC, ...) form rocksalt-like dense
    #   structures with d-block transition metals and need a higher packing
    #   factor + Goldschmidt metallic radii for the cation.
    #   Covalent carbides (SiC, B4C, ...) keep Cordero radii + low packing
    #   factor.  See _TRANSITION_METAL_CARBIDE_CATIONS for the cation set.
    if "C" in anions:
        cations = elems - anions
        if cations & _TRANSITION_METAL_CARBIDE_CATIONS:
            return "transition_metal_carbide"
        return "covalent_carbide"

    # Hydrides
    if "H" in anions:
        return "hydride"

    # Borides (B as anion-like element with metals)
    if "B" in elems and has_metal and not anions and not chalcogens:
        return "boride"

    # Metal alloys / intermetallics (no anions)
    if not anions and not chalcogens and has_metal:
        return "alloy"

    return "default"


def _radius_for_density(sym: str, cls: str,
                        composition: dict | None = None) -> float:
    """Pick the radius type appropriate for the composition's bonding character.

    The density estimator uses sphere-packing on per-element spheres;
    different bonding regimes need different radius tables so that the
    sphere volume is a faithful proxy for the volume the atom occupies
    in the corresponding amorphous structure:

    * Ionic compounds (oxides, halides, nitrides, hydrides) -> Shannon
      ionic radii at CN=6 (cation-anion contact length).
    * Covalent compounds (group-IV, pnictides, chalcogenides, borides,
      carbides) -> Cordero covalent radii (bond-length-based).
    * Metallic alloys -> Goldschmidt metallic radii (close-packed
      atomic centres).
    """
    ionic_classes = {"covalent_oxide", "metal_oxide", "halide",
                     "nitride", "hydride"}
    covalent_classes = {"group_iv", "pnictide", "chalcogenide",
                        "boride", "covalent_carbide"}
    # Infer oxidation state from charge balance when a composition is
    # supplied; falls back to "highest positive" inside get_ionic_radius
    # if inference returns None.
    ox = infer_oxidation_state(sym, composition) if composition else None
    if cls in ionic_classes:
        r = get_ionic_radius(sym, cn=6, oxidation_state=ox)
        if r is None:
            r = covalent_radii[atomic_numbers[sym]]
        return r
    if cls in covalent_classes:
        return covalent_radii[atomic_numbers[sym]]
    if cls == "transition_metal_carbide":
        # TiC, WC, ZrC etc. — cation uses Goldschmidt metallic, C uses
        # Cordero.  This pair of radii + pf=0.60 reproduces a-TiC density
        # ≈ 4.0 g/cm³ (vs crystal 4.93) and a-WC ≈ 14 g/cm³ (vs 15.6),
        # within the documented 5-20 % amorphous-vs-crystal range.
        if sym == "C":
            return covalent_radii[atomic_numbers[sym]]
        r = get_metallic_radius(sym)
        if r is None:
            r = covalent_radii[atomic_numbers[sym]]
        return r
    if cls == "alloy":
        r = get_metallic_radius(sym)
        if r is None:
            r = covalent_radii[atomic_numbers[sym]]
        return r
    # Default: ionic with covalent fallback
    r = get_ionic_radius(sym, cn=6, oxidation_state=ox)
    if r is None:
        r = covalent_radii[atomic_numbers[sym]]
    return r



[docs]
def estimate_cell_length(composition: dict, target_density: float | None = None,
                         packing_factor: float | None = None,
                         density_scale: float = 1.0) -> float:
    """Estimate cubic cell length from composition via sphere packing.

    A single class-aware sphere-packing model handles all amorphous
    compositions:

    .. math::
       V_{\\text{cell}} = \\frac{\\sum_i N_i \\cdot \\tfrac{4}{3}\\pi r_i^3}
                              {f_{\\text{pack}} \\cdot s},

    where :math:`r_i` is the radius for element :math:`i` chosen
    according to the compound's bonding regime (Shannon ionic for ionic
    compounds, Cordero covalent for covalent compounds, Goldschmidt
    metallic for alloys; see :func:`_radius_for_density`).
    :math:`f_{\\text{pack}}` is the class-aware packing fraction stored
    in :data:`PACKING_FACTORS`, and :math:`s` is the user-supplied
    ``density_scale`` (default 1.0).

    If ``target_density`` is supplied, the cell is sized directly from
    it and ``density_scale`` / ``packing_factor`` are ignored.
    """
    if density_scale <= 0:
        raise ValueError(f"density_scale must be > 0 (got {density_scale}).")

    total_mass = sum(atomic_masses[atomic_numbers[s]] * n
                     for s, n in composition.items())

    if target_density is not None:
        vol_cm3 = (total_mass / 6.022e23) / target_density
        vol_A3 = vol_cm3 * 1e24
    else:
        cls = _classify_compound(composition)
        if packing_factor is None:
            packing_factor = PACKING_FACTORS.get(cls, PACKING_FACTORS["default"])

        total_sphere_vol = 0.0
        for sym, count in composition.items():
            r = _radius_for_density(sym, cls, composition=composition)
            total_sphere_vol += count * (4.0 / 3.0) * np.pi * r ** 3
        vol_A3 = total_sphere_vol / (packing_factor * density_scale)

    return vol_A3 ** (1.0 / 3.0)



# ==============================================================================
# Auto-derivation log line
# ==============================================================================


[docs]
def format_auto_derive_summary(
    composition: dict,
    target_cn: dict | None,
    minsep: dict,
    est_density: float,
    cell_length: float,
) -> str:
    """Single-line summary of the auto-derivation chain for the random_gen log.

    Captures the chemistry-informed decisions AmorphGen makes from a bare
    composition: material class, oxidation states (where inferable), per-cation
    target coordination, per-pair bond classification + Pauling Δχ + minsep,
    and the final estimated density / cell length.  Designed to fit on one
    log line for typical 2–4 element systems (e.g. Ga₂O₃ ≈ 200 chars).

    Format:
        [auto-derive] <formula> → <class>, OS{...}, CN{...},
                       minsep{pair:val class [Δχ=val] | ...},
                       ρ=<val> g/cm³, L=<val> Å

    For ionic pairs (where the Pauling Δχ < 1.0 refinement fires) the Δχ
    value is shown so the user can see exactly why the pair was classified
    as ionic vs covalent.  For metallic and anion-packing pairs the Δχ is
    omitted (the type rule alone decides those).
    """
    # Composition string in the order the user provided
    formula = "".join(f"{s}{n}" for s, n in composition.items())

    # Material class
    cls = _classify_compound(composition)

    # Oxidation states — only for cations (positive OS, where inferable)
    os_parts = []
    for elem in composition:
        os_val = infer_oxidation_state(elem, composition)
        if os_val is not None and os_val > 0:
            os_parts.append(f"{elem}:+{os_val}")
    os_str = f"OS{{{', '.join(os_parts)}}}" if os_parts else ""

    # Target CN per element
    cn_str = ""
    if target_cn:
        cn_parts = [f"{e}:{cn}" for e, cn in target_cn.items()]
        cn_str = f"CN{{{', '.join(cn_parts)}}}"

    # Per-pair: minsep + bond class label (+ Δχ for ionic)
    pair_parts = []
    for pair in sorted(minsep):
        a, b = pair.split("-")
        d = minsep[pair]
        if a == b and a in NONMETALS:
            label = "anion-pack"
        else:
            pair_cls = classify_bond(a, b)
            if pair_cls == "ionic":
                dchi = abs(PAULING_EN.get(a, 0.0) - PAULING_EN.get(b, 0.0))
                label = f"ionic Δχ={dchi:.2f}"
            elif pair_cls == "metallic":
                label = "metallic"
            elif pair_cls == "covalent":
                label = "covalent"
            else:
                label = pair_cls
        pair_parts.append(f"{pair}:{d:.2f} {label}")
    minsep_str = f"minsep{{{' | '.join(pair_parts)}}}"

    # Assemble — comma-separated top-level fields
    parts = [f"[auto-derive] {formula} → {cls}"]
    if os_str:
        parts.append(os_str)
    if cn_str:
        parts.append(cn_str)
    parts.append(minsep_str)
    parts.append(f"ρ={est_density:.2f} g/cm³ L={cell_length:.2f} Å")

    return ", ".join(parts)