Source code for amorphgen.utils.radii

"""
amorphgen.utils.radii
----------------------
Atomic radii data, bonding classification, minsep calculation,
and density estimation for random structure generation.

This module contains:
- Shannon ionic radii (CN=4 and CN=6)
- Metallic radii (Goldschmidt CN=12)
- Pauling electronegativities
- Element classification (nonmetal, metalloid)
- Elemental solid densities
- Bond type classification
- Minsep calculation from radii
- Cell volume / density estimation
"""

from __future__ import annotations

import logging
import numpy as np
from ase.data import covalent_radii, atomic_numbers, atomic_masses

logger = logging.getLogger(__name__)


# ==============================================================================
# Radii data tables
# ==============================================================================

# Shannon effective ionic radii (A).
# Keyed as {symbol: {oxidation_state: {CN: radius}}}.
# CN=6 is the default; CN=4 used when target_cn is specified.
# Source: Shannon, R.D. Acta Cryst. A32, 751-767 (1976).
SHANNON_IONIC_RADII = {
    # Cations
    "Li": {1: {4: 0.59, 6: 0.76}},
    "Na": {1: {4: 0.99, 6: 1.02}},
    "K":  {1: {6: 1.38}},
    "Rb": {1: {6: 1.52}},
    "Cs": {1: {6: 1.67}},
    "Mg": {2: {4: 0.57, 6: 0.72}},
    "Ca": {2: {6: 1.00}},
    "Sr": {2: {6: 1.18}},
    "Ba": {2: {6: 1.35}},
    "B":  {3: {4: 0.11, 6: 0.27}},
    "Al": {3: {4: 0.39, 6: 0.535}},
    "Ga": {3: {4: 0.47, 6: 0.620}},
    "In": {3: {4: 0.62, 6: 0.800}},
    "Tl": {3: {6: 0.885}},
    "Si": {4: {4: 0.26, 6: 0.400}},
    "Ge": {4: {4: 0.39, 6: 0.530}},
    "Sn": {2: {6: 0.930}, 4: {4: 0.55, 6: 0.690}},
    "Pb": {2: {6: 1.190}, 4: {6: 0.775}},
    "Ti": {3: {6: 0.670}, 4: {4: 0.42, 6: 0.605}},
    "Zr": {4: {4: 0.59, 6: 0.720}},
    "Hf": {4: {4: 0.58, 6: 0.710}},
    "V":  {3: {6: 0.640}, 5: {4: 0.355, 6: 0.540}},
    "Nb": {5: {4: 0.48, 6: 0.640}},
    "Ta": {5: {6: 0.640}},
    "Cr": {3: {6: 0.615}, 6: {4: 0.26, 6: 0.440}},
    "Mo": {4: {6: 0.650}, 6: {4: 0.41, 6: 0.590}},
    "W":  {4: {6: 0.660}, 6: {4: 0.42, 6: 0.600}},
    "Mn": {2: {4: 0.66, 6: 0.830}, 3: {6: 0.645}, 4: {4: 0.39, 6: 0.530}},
    "Fe": {2: {4: 0.63, 6: 0.780}, 3: {4: 0.49, 6: 0.645}},
    "Co": {2: {4: 0.58, 6: 0.745}, 3: {6: 0.610}},
    "Ni": {2: {4: 0.55, 6: 0.690}},
    "Cu": {1: {4: 0.60, 6: 0.770}, 2: {4: 0.57, 6: 0.730}},
    "Zn": {2: {4: 0.60, 6: 0.740}},
    "Cd": {2: {4: 0.78, 6: 0.950}},
    "Y":  {3: {6: 0.900}},
    "La": {3: {6: 1.032}},
    "Ce": {3: {6: 1.010}, 4: {6: 0.870}},
    "Bi": {3: {6: 1.030}},
    "Sc": {3: {6: 0.745}},
    # Metalloid in high oxidation state oxides (e.g. As2O5).
    # As(3+) is not tabulated by Shannon and Sb(III/V) Shannon radii
    # are too small (lone-pair distortion), giving unphysical sphere
    # volumes; for Sb compounds AmorphGen falls back to the Cordero
    # covalent radius which matches experimental Sb2O3 density better.
    "As": {5: {4: 0.335, 6: 0.460}},
    # Anions (CN-independent)
    "O":  {-2: {6: 1.400}},
    "S":  {-2: {6: 1.840}},
    "Se": {-2: {6: 1.980}},
    "Te": {-2: {6: 2.210}},
    "F":  {-1: {6: 1.330}},
    "Cl": {-1: {6: 1.810}},
    "Br": {-1: {6: 1.960}},
    "I":  {-1: {6: 2.200}},
    "N":  {-3: {6: 1.460}},
    "H":  {-1: {6: 1.400}},
    "C":  {-4: {6: 1.400}},  # Approximate; Shannon does not list C4-
    "P":  {-3: {6: 2.120}, 5: {4: 0.17, 6: 0.380}},
}

# Metallic radii (A, CN=12 Goldschmidt radii).
# Source: Greenwood & Earnshaw, Chemistry of the Elements (1997).
METALLIC_RADII = {
    "Li": 1.52, "Na": 1.86, "K": 2.27, "Rb": 2.48, "Cs": 2.65,
    "Mg": 1.60, "Ca": 1.97, "Sr": 2.15, "Ba": 2.17,
    "Al": 1.43, "Ga": 1.35, "In": 1.67,
    "Tl": 1.70, "Sn": 1.58, "Pb": 1.75, "Ti": 1.47, "Zr": 1.60,
    "Hf": 1.59, "V": 1.34, "Nb": 1.46, "Ta": 1.46, "Cr": 1.28,
    "Mo": 1.39, "W": 1.39, "Mn": 1.27, "Fe": 1.26, "Co": 1.25,
    "Ni": 1.24, "Cu": 1.28, "Zn": 1.37, "Cd": 1.52, "Y": 1.80,
    "La": 1.87, "Ce": 1.83, "Sc": 1.64, "Bi": 1.56, "Ge": 1.37,
    "Si": 1.17,
    # Metalloids (rhombohedral / metallic phase radii)
    "As": 1.39, "Sb": 1.59,
}

# Pauling electronegativities for bonding classification.
# Values are the Allred (1961) revised scale, which retains Pauling's
# original methodology but uses updated thermochemical bond-dissociation
# data.  These are the values that appear in modern chemistry textbooks
# and the CRC Handbook.
#   Allred, A. L.  "Electronegativity values from thermochemical data."
#                  J. Inorg. Nucl. Chem. 17, 215-221 (1961).
#   Originally:  Pauling, L.  The Nature of the Chemical Bond, 3rd ed.,
#                Cornell Univ. Press (1960).
PAULING_EN = {
    "H": 2.20, "Li": 0.98, "Be": 1.57, "B": 2.04, "C": 2.55,
    "N": 3.04, "O": 3.44, "F": 3.98, "Na": 0.93, "Mg": 1.31,
    "Al": 1.61, "Si": 1.90, "P": 2.19, "S": 2.58, "Cl": 3.16,
    "K": 0.82, "Ca": 1.00, "Sc": 1.36, "Ti": 1.54, "V": 1.63,
    "Cr": 1.66, "Mn": 1.55, "Fe": 1.83, "Co": 1.88, "Ni": 1.91,
    "Cu": 1.90, "Zn": 1.65, "Ga": 1.81, "Ge": 2.01, "As": 2.18,
    "Se": 2.55, "Br": 2.96, "Rb": 0.82, "Sr": 0.95, "Y": 1.22,
    "Zr": 1.33, "Nb": 1.60, "Mo": 2.16, "Cd": 1.69, "In": 1.78,
    "Sn": 1.96, "Sb": 2.05, "Te": 2.10, "I": 2.66, "Cs": 0.79,
    "Ba": 0.89, "La": 1.10, "Ce": 1.12, "Hf": 1.30, "Ta": 1.50,
    "W": 2.36, "Pb": 2.33, "Bi": 2.02, "Tl": 1.62,
}

# Conventional anion charges, used to infer the cation oxidation state
# from a compound's stoichiometry by charge balance (e.g. Ti=+4 in
# TiO_2, Sb=+5 in Sb_2O_5). Limited to clear-cut anion-formers; complex
# / mixed-anion / non-stoichiometric systems fall through to the
# default "highest positive Shannon state" fallback.
ANION_CHARGES = {
    "O":  -2,
    "S":  -2,
    "Se": -2,
    "Te": -2,
    "F":  -1,
    "Cl": -1,
    "Br": -1,
    "I":  -1,
    "N":  -3,
    "H":  -1,  # treated as hydride; only valid for binary hydrides
}


[docs] def infer_oxidation_state(sym: str, composition: dict) -> int | None: """Infer the oxidation state of *sym* in *composition* by charge balance. Sums anion charge from :data:`ANION_CHARGES` and assigns the remaining positive charge equally across cations of the queried element when only one cation type is present, or for any element that appears as the only non-anion type. Returns ``None`` when the inference is ambiguous (multiple cation species, non-integer oxidation state, or no anions at all), letting the caller fall back to a default Shannon entry. """ if sym in ANION_CHARGES or sym in NONMETALS: return None # not a cation we resolve here total_neg = 0 for s, n in composition.items(): q = ANION_CHARGES.get(s) if q is not None and s != sym: total_neg += n * q if total_neg == 0: return None # no anions -> can't balance # Identify cations (everything that isn't a known anion-former). cations = {s: n for s, n in composition.items() if s not in ANION_CHARGES} if sym not in cations: return None # Single-cation case: balance directly. if len(cations) == 1: n_cat = cations[sym] ox = -total_neg / n_cat if abs(ox - round(ox)) < 1e-6 and ox > 0: return int(round(ox)) return None # Multi-cation case: try to assign a *unique* state for *sym* by # subtracting fixed-state contributions from other cations whose # Shannon table has only a single positive oxidation state. fixed_pos = 0 unresolved = [] for s, n in cations.items(): states = SHANNON_IONIC_RADII.get(s, {}) positive = [k for k in states if k > 0] if len(positive) == 1 and s != sym: fixed_pos += n * positive[0] elif s != sym: unresolved.append(s) if unresolved: return None # too ambiguous; let highest-positive fallback handle it n_cat = cations[sym] ox = (-total_neg - fixed_pos) / n_cat if abs(ox - round(ox)) < 1e-6 and ox > 0: return int(round(ox)) return None
# ============================================================================== # Element classification # ============================================================================== NONMETALS = frozenset({ "H", "He", "C", "N", "O", "F", "Ne", "P", "S", "Cl", "Ar", "Br", "Kr", "I", "Xe", "At", "Rn", }) # Source: IUPAC recognises B, Si, Ge, As, Sb, Te as metalloids. # Sn is included as it adopts tetrahedral bonding (alpha-Sn, SnO2). METALLOIDS = frozenset({"B", "Si", "Ge", "Sn", "As", "Sb", "Te", "Se"}) # Typical coordination numbers for coordination-aware placement. # Metalloids are always CN=4. For metals, CN depends on the anion context: # oxides → CN=4-6, nitrides → CN=4, halides/sulfides → CN=6. _ALWAYS_TETRAHEDRAL = frozenset({ "Si", "Ge", "B", "P", "As", # metalloids — always CN=4 "Be", # tetrahedral in BeF2, BeO (like Si in SiO2) # Group-12 post-transition metals form zincblende-structured II-VI # semiconductors with chalcogens (ZnS, ZnSe, ZnTe, CdS, CdSe, CdTe, # HgS, HgSe, HgTe — all CN=4 in their stable phases). Without this, # the chalcogenide branch of auto_target_cn defaults them to octahedral # CN=6, which is wrong for these specific compounds. "Zn", "Cd", "Hg", }) # d-block transition metals that form *interstitial* carbides (rocksalt or # close-related dense structures: TiC, ZrC, HfC, VC, NbC, TaC, CrC, MoC, # WC, ...). These compounds want metallic + Cordero radii and a higher # packing factor (≈ 0.60) than covalent carbides (SiC, B4C, P4C3, ...). # Pauling Δχ does NOT cleanly separate these from covalent carbides # because W–C (Δχ = 0.19) and Mo–C (Δχ = 0.39) are interstitial but have # small Δχ — the discriminator has to be cation group, not electronegativity. # Group-11 (Cu, Ag, Au) and Group-12 (Zn, Cd, Hg) do not form stable # carbides and are excluded. _TRANSITION_METAL_CARBIDE_CATIONS = frozenset({ "Sc", "Ti", "V", "Cr", "Mn", "Fe", "Co", "Ni", # 3d "Y", "Zr", "Nb", "Mo", "Tc", "Ru", "Rh", "Pd", # 4d "Hf", "Ta", "W", "Re", "Os", "Ir", "Pt", # 5d })
[docs] def auto_target_cn(composition: dict) -> tuple[dict | None, int]: """ Auto-detect target coordination numbers and tolerance from composition. Returns (target_cn, cn_tolerance) tuple. Rules: - Metalloids (Si, Ge, B): CN=4, tolerance=0 (strict tetrahedral) - Pure Si/Ge: CN=4, tolerance=0 - Metals in nitrides: CN=4, tolerance=0 (wurtzite tetrahedral) - Metals in halides/sulfides: CN=6, tolerance=0 (strict octahedral) - Metals in oxides: CN=5, tolerance=1 (flexible 4-6 range) Parameters ---------- composition : dict e.g. {"In": 32, "O": 48} Returns ------- (target_cn, cn_tolerance) : tuple target_cn: dict or None cn_tolerance: int (0=strict, 1=flexible) """ elems = set(composition.keys()) anions = elems & NONMETALS cations = elems - anions # Check compound class first (handles semiconductors, chalcogenides) cls = _classify_compound(composition) if cls == "group_iv": # Pure group IV (Si, Ge, SiGe) — tetrahedral CN=4 target_cn = {s: 4 for s in elems} return target_cn, 0 if cls == "pnictide": # III-V compounds (GaAs, InP, InAs) — tetrahedral CN=4 target_cn = {s: 4 for s in elems} return target_cn, 0 if cls == "chalcogenide": # Chalcogenides (ZnS, Sb2Te3, CdTe, GeTe) chalcogens = elems & {"S", "Se", "Te"} target_cn = {} for s in elems: if s not in chalcogens: if s in _ALWAYS_TETRAHEDRAL or s in METALLOIDS: target_cn[s] = 4 else: target_cn[s] = 6 return (target_cn if target_cn else None), 0 if cls in {"covalent_carbide", "transition_metal_carbide"}: # Covalent carbides (SiC, B4C) — metalloid CN=4 # Transition-metal carbides (TiC, WC, ZrC) — transition-metal CN=6 (rocksalt) target_cn = {} for s in elems: if s != "C": if s in _ALWAYS_TETRAHEDRAL or s in METALLOIDS: target_cn[s] = 4 else: target_cn[s] = 6 return (target_cn if target_cn else None), 0 if cls == "hydride": # Hydrides (LiH, MgH2) — metal CN=6 target_cn = {} for s in elems: if s != "H": target_cn[s] = 6 return (target_cn if target_cn else None), 0 if cls == "boride": # Borides (TiB2, LaB6) — metal CN=6 target_cn = {} for s in elems: if s != "B": target_cn[s] = 6 return (target_cn if target_cn else None), 0 if cls == "alloy": # Metal alloys / intermetallics — typical CN=8-12 # Use CN=8 (BCC-like) with tolerance=2 for flexibility target_cn = {s: 8 for s in elems} return target_cn, 2 if not anions: # Pure metals — typical CN=12 (FCC/HCP) or 8 (BCC) target_cn = {s: 8 for s in elems} return target_cn, 2 # Determine anion context chalcogens = elems & {"S", "Se", "Te"} pnictogens = elems & {"P", "As", "Sb"} has_nitrogen = "N" in anions has_oxide = "O" in anions has_halide = bool(anions & {"Cl", "Br", "I", "F"}) has_chalcogen = bool(chalcogens) has_halide_sulfide = has_halide or has_chalcogen target_cn = {} if has_oxide and not has_nitrogen and not has_halide_sulfide: # Pure oxides: metals CN=5 (flexible 4-6), metalloids CN=4 (strict) has_metal_cation = any(s not in _ALWAYS_TETRAHEDRAL and s not in METALLOIDS for s in cations) for s in cations: if s in _ALWAYS_TETRAHEDRAL or s in METALLOIDS: target_cn[s] = 4 else: target_cn[s] = 5 # tolerance=1 only if metal cations present (flexible 4-6) # pure metalloid oxides (SiO2, GeO2, B2O3): tolerance=0 (strict CN=4) return target_cn, 1 if has_metal_cation else 0 elif has_nitrogen and not has_oxide: # Nitrides: all CN=4 strict for s in cations: target_cn[s] = 4 return target_cn, 0 elif has_halide and not has_oxide: # Halides: CN=6 strict, metalloids CN=4 for s in cations: if s in _ALWAYS_TETRAHEDRAL or s in METALLOIDS: target_cn[s] = 4 else: target_cn[s] = 6 return target_cn, 0 # Chalcogenides already handled above via cls == "chalcogenide" else: # Mixed or other: metals CN=5 flexible, metalloids CN=4 for s in cations: if s in _ALWAYS_TETRAHEDRAL or s in METALLOIDS: target_cn[s] = 4 else: target_cn[s] = 5 return target_cn, 1
# Elemental solid densities (kg/m3). # Source: CRC Handbook of Chemistry and Physics, 104th edition (2023). # Gaseous elements (O, N, Cl, etc.) are excluded. # (ASE's ase.data.covalent_radii — used elsewhere in this module — is # sourced from Cordero et al. 2008 for elements 1-96.) ELEMENTAL_DENSITIES = { # Solid nonmetals / metalloids (kg/m3) "B": 2340, "C": 2267, "P": 1823, "S": 2070, "Se": 4809, "Te": 6240, "I": 4930, # Metals "Li": 534, "Be": 1850, "Na": 971, "Mg": 1738, "Al": 2700, "Si": 2330, "K": 862, "Ca": 1550, "Sc": 2985, "Ti": 4507, "V": 6110, "Cr": 7190, "Mn": 7440, "Fe": 7874, "Co": 8900, "Ni": 8908, "Cu": 8960, "Zn": 7134, "Ga": 5904, "Ge": 5323, "As": 5727, "Sr": 2640, "Y": 4472, "Zr": 6506, "Nb": 8570, "Mo": 10220, "Cd": 8650, "In": 7310, "Sn": 7287, "Sb": 6685, "Ba": 3510, "La": 6145, "Ce": 6770, "Hf": 13310, "Ta": 16654, "W": 19250, "Pb": 11340, "Bi": 9807, "Tl": 11850, } # ============================================================================== # Scale factors # ============================================================================== # Applied to sum of radii to give minimum interatomic distance. # Scientific basis: # - Shannon (1976): ionic radii from ~900 oxide/fluoride crystal structures # - AIRSS buildcell: uses 80-90% of equilibrium distances SCALE_FACTORS = { "ionic": 0.80, # M-X bonds (shorter -> better coordination) "covalent": 0.80, # Directional bonds "metallic": 0.85, # M-M (keep higher to prevent M-M clustering) "anion_packing": 0.80, # Small anion packing (O-O, F-F, N-N) "anion_packing_large": 0.70, # Large anion packing (Cl-Cl, Br-Br, I-I, S-S) } # Shannon ionic radius threshold for small vs large anions. _LARGE_ANION_RADIUS_THRESHOLD = 1.5 # Maximum minsep caps (A). Prevents overly large distances # that make random placement impossible. _MAX_SAME_ELEMENT_MINSEP = 2.80 # M-M, X-X same-element _MAX_IONIC_MINSEP = 3.00 # M-X ionic bonds _MAX_ANION_MINSEP = 3.00 # X-X anion packing # ============================================================================== # Bonding classification # ==============================================================================
[docs] def classify_bond(sym_a: str, sym_b: str) -> str: """ Classify a pair of elements by bonding type. Uses an element-type classification rule (nonmetal / metalloid / metal membership) with a Pauling-electronegativity refinement: pairs that the type rules would call "ionic" but whose Pauling electronegativity difference is small (Δχ < 1.0) are reclassified as "covalent". This catches III–V semiconductors (GaAs Δχ=0.37, AlAs ~0.6) and related compounds where the chemistry is predominantly covalent and Shannon ionic radii give unrealistically small inter-atomic distances. Returns one of: "ionic", "covalent", "metallic". """ a_is_nonmetal = sym_a in NONMETALS b_is_nonmetal = sym_b in NONMETALS a_is_metalloid = sym_a in METALLOIDS b_is_metalloid = sym_b in METALLOIDS # Helper: small Δχ ⇒ predominantly covalent regardless of type rules. def _is_covalent_by_en(): en_a = PAULING_EN.get(sym_a) en_b = PAULING_EN.get(sym_b) if en_a is None or en_b is None: return False return abs(en_a - en_b) < 1.0 if a_is_metalloid and b_is_metalloid: return "covalent" if (a_is_metalloid and b_is_nonmetal) or (b_is_metalloid and a_is_nonmetal): # Was unconditionally "ionic" — but BN, BC etc. are covalent. return "covalent" if _is_covalent_by_en() else "ionic" if a_is_metalloid or b_is_metalloid: # Metal + metalloid (e.g.\ Ga + As). Was unconditionally "ionic" — # but III-V semiconductors are predominantly covalent. return "covalent" if _is_covalent_by_en() else "ionic" if a_is_nonmetal != b_is_nonmetal: # Metal + nonmetal (e.g.\ Ga + N). Was unconditionally "ionic" — # check Δχ for borderline cases (GaN Δχ=1.23 stays ionic; carbides # of low-EN metals like Ti-C Δχ=1.0 may flip). return "covalent" if _is_covalent_by_en() else "ionic" if a_is_nonmetal and b_is_nonmetal: return "covalent" return "metallic"
# ============================================================================== # Radius lookup functions # ==============================================================================
[docs] def get_ionic_radius(sym: str, cn: int | None = None, oxidation_state: int | None = None) -> float | None: """ Get Shannon ionic radius for an element. Parameters ---------- sym : str Element symbol. cn : int, optional Coordination number. If provided, use CN-specific radius. Defaults to CN=6. oxidation_state : int, optional Specific oxidation state to look up (e.g.\\ ``5`` for Sb in Sb_2O_5). If supplied and present in the Shannon table the matching radius is returned. If supplied but absent, falls through to the default selection. When ``None`` (default), anions return their most-negative state and cations return their **highest** positive state - usually the right choice for binary oxides / halides / nitrides where the higher state dominates (TiO_2, V_2O_5, WO_3 etc.). Returns ------- float or None """ if sym not in SHANNON_IONIC_RADII: return None states = SHANNON_IONIC_RADII[sym] if oxidation_state is not None and oxidation_state in states: ox = oxidation_state elif oxidation_state is not None and oxidation_state > 0: # Requested cation state is missing from Shannon; pick the # closest available positive state (preferring same-or-higher). # This handles cases like ZrN, where charge balance suggests # Zr(III) but Shannon only lists Zr(IV); using Zr(IV) is closer # than dropping all the way to a covalent fallback. positive = sorted(k for k in states.keys() if k > 0) if not positive: return None ox = min(positive, key=lambda k: (abs(k - oxidation_state), -k)) elif sym in NONMETALS: ox = min(states.keys()) # most-negative anion state else: positive = {k: v for k, v in states.items() if k > 0} if not positive: return None ox = max(positive.keys()) # highest cation state when unspecified cn_dict = states[ox] target_cn = cn if cn is not None else 6 if target_cn in cn_dict: return cn_dict[target_cn] elif 6 in cn_dict: return cn_dict[6] else: return next(iter(cn_dict.values()))
[docs] def get_metallic_radius(sym: str) -> float | None: """Get metallic radius for an element. Returns None if unavailable.""" return METALLIC_RADII.get(sym)
[docs] def get_effective_radius(sym: str) -> float: """ Get the most appropriate radius for volume estimation. - Metals -> metallic radius - Anion-formers (O, S, Cl, ...) -> Shannon ionic radius - Metalloids (Si, Ge, ...) -> metallic radius if available - Fallback -> covalent radius """ if sym not in NONMETALS: r = get_metallic_radius(sym) if r is not None: return r if sym in NONMETALS and sym not in {"He", "Ne", "Ar", "Kr", "Xe", "Rn"}: r = get_ionic_radius(sym) if r is not None: return r return covalent_radii[atomic_numbers[sym]]
# ============================================================================== # Minsep calculation # ==============================================================================
[docs] def default_minsep(symbols: list[str], scale: float = 0.85, target_cn: dict | None = None) -> dict: """ Build a minsep dict for all element pairs. Uses bonding-type-aware radii. When target_cn is provided, uses CN-specific Shannon radii (e.g. CN=4 for Si in SiO2). Parameters ---------- symbols : list of str scale : float Fallback scale factor (default 0.85). target_cn : dict, optional e.g. {"Si": 4, "O": 2}. Uses CN-specific radii when available. Returns ------- dict Minimum separations keyed as "A-B" with A <= B alphabetically. """ unique = sorted(set(symbols)) minsep = {} cn_map = target_cn or {} def _cn_radius(sym): cn = cn_map.get(sym) return get_ionic_radius(sym, cn=cn) for i, s1 in enumerate(unique): for s2 in unique[i:]: key = f"{s1}-{s2}" if s1 <= s2 else f"{s2}-{s1}" bond_type = classify_bond(s1, s2) sf = SCALE_FACTORS.get(bond_type, scale) if bond_type == "ionic": r1 = _cn_radius(s1) r2 = _cn_radius(s2) if r1 is not None and r2 is not None: d_ionic = min((r1 + r2) * sf, _MAX_IONIC_MINSEP) minsep[key] = d_ionic logger.info( " minsep %s = %.2f A (ionic: Shannon %.3f + %.3f, " "scale=%.2f)", key, minsep[key], r1, r2, sf ) continue elif bond_type == "metallic": r1 = get_metallic_radius(s1) r2 = get_metallic_radius(s2) if r1 is not None and r2 is not None: d_metallic = (r1 + r2) * sf has_anion = any(s in NONMETALS for s in unique) if has_anion: ri1 = _cn_radius(s1) ri2 = _cn_radius(s2) anion_syms = [s for s in unique if s in NONMETALS] r_anion = max( (_cn_radius(a) or 0.0) for a in anion_syms ) if anion_syms else None # Geometric estimate for edge-sharing polyhedra: # d(M-M) = sqrt(2) * d(M-X) * sf # Uses the same scale factor as ionic/metallic (0.85). if ri1 is not None and r_anion and r_anion > 0: d_geom_1 = 2**0.5 * (ri1 + r_anion) * sf else: d_geom_1 = 0.0 if ri2 is not None and r_anion and r_anion > 0: d_geom_2 = 2**0.5 * (ri2 + r_anion) * sf else: d_geom_2 = 0.0 d_geometric = (d_geom_1 + d_geom_2) / 2 if ( d_geom_1 > 0 and d_geom_2 > 0 ) else max(d_geom_1, d_geom_2) d_mm = min(max(d_metallic, d_geometric), _MAX_SAME_ELEMENT_MINSEP) minsep[key] = d_mm source = "metallic" if d_metallic >= d_geometric else "geometric" capped = " (capped)" if d_mm < max(d_metallic, d_geometric) else "" logger.info( " minsep %s = %.2f A (M-M ionic context, %s: " "met=%.2f, geom=%.2f, scale=%.2f)%s", key, minsep[key], source, d_metallic, d_geometric, sf, capped ) else: minsep[key] = min(d_metallic, _MAX_SAME_ELEMENT_MINSEP) logger.info( " minsep %s = %.2f A (metallic: %.3f + %.3f, " "scale=%.2f)", key, minsep[key], r1, r2, sf ) continue elif bond_type == "covalent": a_is_metalloid = s1 in METALLOIDS b_is_metalloid = s2 in METALLOIDS if a_is_metalloid or b_is_metalloid: # Use metallic radii if available (better for pure elements), # fall back to covalent radii r1 = get_metallic_radius(s1) or covalent_radii[atomic_numbers[s1]] r2 = get_metallic_radius(s2) or covalent_radii[atomic_numbers[s2]] d_covalent = (r1 + r2) * sf # In anion context (oxide, nitride, etc.), metalloid-metalloid # distances are much larger due to bridging anions (Si-O-Si). # Use geometric estimate: sqrt(2) * d(X-anion) has_anion = any(s in NONMETALS for s in unique) if has_anion and s1 == s2: ri = _cn_radius(s1) anion_syms = [s for s in unique if s in NONMETALS] r_anion = max( (_cn_radius(a) or 0.0) for a in anion_syms ) if anion_syms else None if ri is not None and r_anion and r_anion > 0: d_geometric = 2**0.5 * (ri + r_anion) * sf d_xx = min(max(d_covalent, d_geometric), _MAX_SAME_ELEMENT_MINSEP) minsep[key] = d_xx source = "covalent" if d_covalent >= d_geometric else "geometric" capped = " (capped)" if d_xx < max(d_covalent, d_geometric) else "" logger.info( " minsep %s = %.2f A (metalloid anion context, %s: " "cov=%.2f, geom=%.2f, scale=%.2f)%s", key, minsep[key], source, d_covalent, d_geometric, sf, capped ) continue minsep[key] = d_covalent logger.info( " minsep %s = %.2f A (covalent/metalloid: " "%.3f + %.3f, scale=%.2f)", key, minsep[key], r1, r2, sf ) continue # Pure single-element nonmetal composition (e.g.\ pure C, # pure S, pure P). This is a genuine covalent homo-element # bond, not an anion-anion contact in a compound, so use # Cordero covalent radii rather than the much larger # Shannon anion radius. Heteroelement nonmetal pairs and # same-element pairs in compounds (O-O in TiO_2, Cl-Cl in # LiCl, ...) still go through anion-packing below. if s1 == s2 and len(unique) == 1: r = covalent_radii[atomic_numbers[s1]] d_cov_homo = 2 * r * sf minsep[key] = min(d_cov_homo, _MAX_SAME_ELEMENT_MINSEP) logger.info( " minsep %s = %.2f A (covalent homo-element: " "Cordero %.3f, scale=%.2f)", key, minsep[key], r, sf ) continue r1 = _cn_radius(s1) r2 = _cn_radius(s2) if r1 is not None and r2 is not None: avg_r = (r1 + r2) / 2 if avg_r >= _LARGE_ANION_RADIUS_THRESHOLD: sf_anion = SCALE_FACTORS.get("anion_packing_large", 0.70) label = "large anion" else: sf_anion = SCALE_FACTORS.get("anion_packing", 0.80) label = "small anion" minsep[key] = min((r1 + r2) * sf_anion, _MAX_ANION_MINSEP) logger.info( " minsep %s = %.2f A (%s packing: Shannon " "%.3f + %.3f, scale=%.2f)", key, minsep[key], label, r1, r2, sf_anion ) continue # Fallback: covalent radii (with cap) r1 = covalent_radii[atomic_numbers[s1]] r2 = covalent_radii[atomic_numbers[s2]] d_fallback = (r1 + r2) * scale if s1 == s2: d_fallback = min(d_fallback, _MAX_SAME_ELEMENT_MINSEP) else: d_fallback = min(d_fallback, _MAX_IONIC_MINSEP) minsep[key] = d_fallback logger.info( " minsep %s = %.2f A (fallback: covalent %.3f + %.3f, " "scale=%.2f)", key, minsep[key], r1, r2, scale ) return minsep
# ============================================================================== # Density / cell volume estimation # ==============================================================================
[docs] def estimate_density(composition: dict, amorphous_factor: float = 0.80) -> float | None: """ Estimate density from elemental solid densities. Returns density in g/cm3, or None if any element lacks density data. """ total_mass = 0.0 total_vol = 0.0 for sym, count in composition.items(): dens = ELEMENTAL_DENSITIES.get(sym) if dens is None or dens == 0: return None dens_gcm3 = dens / 1000.0 mass = atomic_masses[atomic_numbers[sym]] * count total_mass += mass total_vol += mass / dens_gcm3 if total_vol == 0: return None crystal_density = total_mass / total_vol return crystal_density * amorphous_factor
# Packing factors for density estimation by material class. # # The same formula V_cell = sum(sphere_vol) / f_pack is used for all # classes, but the radius type underneath the spheres differs by # bonding regime (see :func:`_radius_for_density`): # - Ionic (oxides, halides, nitrides, hydrides) -> Shannon ionic CN=6 # - typical f_pack range 0.50-0.58 # - Covalent (group-IV, pnictides, chalcogenides, borides, carbides) # -> Cordero covalent. Covalent radii reflect bond lengths, so # the sphere is much smaller than the actual atomic volume; f_pack # is correspondingly smaller (~0.20-0.40). # - Metallic (alloys) -> Goldschmidt metallic. Metallic radii are # close to atomic centres in close-packed metals; f_pack ~0.65-0.75. # # Values calibrated to give experimental amorphous densities for # representative systems in each class. PACKING_FACTORS = { # Ionic — Shannon ionic radii "covalent_oxide": 0.50, # SiO2, GeO2, B2O3 (network formers) "metal_oxide": 0.52, # In2O3, TiO2, Al2O3, Ga2O3 "halide": 0.58, # Li2ZrCl6, LiF "nitride": 0.52, # AlN, GaN, Si3N4 "hydride": 0.55, # LiH, MgH2, NaAlH4 # Covalent — Cordero covalent radii "group_iv": 0.30, # Si, Ge, C (tetrahedral covalent network) "pnictide": 0.32, # GaAs, InP, InAs (III-V compounds) "chalcogenide": 0.30, # ZnS, CdTe, GeTe (II-VI / IV-VI) "boride": 0.50, # TiB2, MgB2, ZrB2 # Carbides split by cation chemistry (2026-05-11) — see # _TRANSITION_METAL_CARBIDE_CATIONS and _classify_compound for the routing. "covalent_carbide": 0.32, # SiC, B4C — Cordero radii, open network "transition_metal_carbide": 0.60, # TiC, WC, ZrC — rocksalt-like; uses # Goldschmidt metallic for cation, # Cordero for C (see _radius_for_density) # Metallic — Goldschmidt metallic radii. Lowered from 0.70 to 0.60 # (2026-05-10) so random sequential placement has enough headroom: # random close packing of equal hard spheres caps at ~0.64, and 0.70 # left no room for the placement attempt budget — Cu/Au/Ti/Ni etc. # all failed single-call generate_random with auto density. At 0.60 # the cell is ~5% larger; predicted density for a-Cu is ~7.2 g/cm³ # vs experimental amorphous ~7.7-8.5 (~10% low — within design-target # accuracy and easily overridden via target_density for production runs). "alloy": 0.60, # NiTi, CuZr, brass, pure metals "default": 0.52, # fallback (ionic) } # NOTE: ``AMORPHOUS_DENSITY_FACTORS`` and the associated elemental # density-mixing branch were retired in favour of the unified # class-aware sphere-packing model. ``estimate_density()`` (which # implements the mixing rule) is kept as a public helper for users # who want the mass-averaged crystal density directly. def _classify_compound(composition: dict) -> str: """ Classify a compound by material class for packing factor selection. Returns one of: "group_iv", "pnictide", "chalcogenide", "covalent_oxide", "metal_oxide", "halide", "nitride", "carbide", "hydride", "boride", "alloy", "default". """ elems = set(composition.keys()) has_metal = any(s not in NONMETALS and s not in METALLOIDS for s in elems) has_metalloid = any(s in METALLOIDS for s in elems) anions = elems & NONMETALS chalcogens = elems & {"S", "Se", "Te"} pnictogens = elems & {"P", "As", "Sb"} # Group IV: pure Si, Ge, SiGe (all metalloid, no chalcogen/pnictogen) all_metalloid = all(s in METALLOIDS for s in elems) if all_metalloid and not chalcogens and not pnictogens: return "group_iv" # Pnictides: III-V compounds (GaAs, InP, InAs, GaSb) # Must have at least one non-pnictogen element (the group III metal) # Pure pnictogens (Sb, As) are not III-V compounds # P is both NONMETAL and pnictogen — exclude from anion check here non_pnictogen = elems - pnictogens other_anions = anions - {"P", "As"} # P/As can act as pnictogen anion if pnictogens and non_pnictogen and not other_anions and not chalcogens: return "pnictide" # Chalcogenides (S, Se, Te) — check before oxides if chalcogens and "O" not in anions: return "chalcogenide" # Oxides if "O" in anions: if has_metal: return "metal_oxide" elif has_metalloid: return "covalent_oxide" # Halides if anions & {"Cl", "Br", "I", "F"}: return "halide" # Nitrides if "N" in anions: return "nitride" # Carbides — split by cation chemistry # Transition-metal carbides (TiC, WC, ZrC, ...) form rocksalt-like dense # structures with d-block transition metals and need a higher packing # factor + Goldschmidt metallic radii for the cation. # Covalent carbides (SiC, B4C, ...) keep Cordero radii + low packing # factor. See _TRANSITION_METAL_CARBIDE_CATIONS for the cation set. if "C" in anions: cations = elems - anions if cations & _TRANSITION_METAL_CARBIDE_CATIONS: return "transition_metal_carbide" return "covalent_carbide" # Hydrides if "H" in anions: return "hydride" # Borides (B as anion-like element with metals) if "B" in elems and has_metal and not anions and not chalcogens: return "boride" # Metal alloys / intermetallics (no anions) if not anions and not chalcogens and has_metal: return "alloy" return "default" def _radius_for_density(sym: str, cls: str, composition: dict | None = None) -> float: """Pick the radius type appropriate for the composition's bonding character. The density estimator uses sphere-packing on per-element spheres; different bonding regimes need different radius tables so that the sphere volume is a faithful proxy for the volume the atom occupies in the corresponding amorphous structure: * Ionic compounds (oxides, halides, nitrides, hydrides) -> Shannon ionic radii at CN=6 (cation-anion contact length). * Covalent compounds (group-IV, pnictides, chalcogenides, borides, carbides) -> Cordero covalent radii (bond-length-based). * Metallic alloys -> Goldschmidt metallic radii (close-packed atomic centres). """ ionic_classes = {"covalent_oxide", "metal_oxide", "halide", "nitride", "hydride"} covalent_classes = {"group_iv", "pnictide", "chalcogenide", "boride", "covalent_carbide"} # Infer oxidation state from charge balance when a composition is # supplied; falls back to "highest positive" inside get_ionic_radius # if inference returns None. ox = infer_oxidation_state(sym, composition) if composition else None if cls in ionic_classes: r = get_ionic_radius(sym, cn=6, oxidation_state=ox) if r is None: r = covalent_radii[atomic_numbers[sym]] return r if cls in covalent_classes: return covalent_radii[atomic_numbers[sym]] if cls == "transition_metal_carbide": # TiC, WC, ZrC etc. — cation uses Goldschmidt metallic, C uses # Cordero. This pair of radii + pf=0.60 reproduces a-TiC density # ≈ 4.0 g/cm³ (vs crystal 4.93) and a-WC ≈ 14 g/cm³ (vs 15.6), # within the documented 5-20 % amorphous-vs-crystal range. if sym == "C": return covalent_radii[atomic_numbers[sym]] r = get_metallic_radius(sym) if r is None: r = covalent_radii[atomic_numbers[sym]] return r if cls == "alloy": r = get_metallic_radius(sym) if r is None: r = covalent_radii[atomic_numbers[sym]] return r # Default: ionic with covalent fallback r = get_ionic_radius(sym, cn=6, oxidation_state=ox) if r is None: r = covalent_radii[atomic_numbers[sym]] return r
[docs] def estimate_cell_length(composition: dict, target_density: float | None = None, packing_factor: float | None = None, density_scale: float = 1.0) -> float: """Estimate cubic cell length from composition via sphere packing. A single class-aware sphere-packing model handles all amorphous compositions: .. math:: V_{\\text{cell}} = \\frac{\\sum_i N_i \\cdot \\tfrac{4}{3}\\pi r_i^3} {f_{\\text{pack}} \\cdot s}, where :math:`r_i` is the radius for element :math:`i` chosen according to the compound's bonding regime (Shannon ionic for ionic compounds, Cordero covalent for covalent compounds, Goldschmidt metallic for alloys; see :func:`_radius_for_density`). :math:`f_{\\text{pack}}` is the class-aware packing fraction stored in :data:`PACKING_FACTORS`, and :math:`s` is the user-supplied ``density_scale`` (default 1.0). If ``target_density`` is supplied, the cell is sized directly from it and ``density_scale`` / ``packing_factor`` are ignored. """ if density_scale <= 0: raise ValueError(f"density_scale must be > 0 (got {density_scale}).") total_mass = sum(atomic_masses[atomic_numbers[s]] * n for s, n in composition.items()) if target_density is not None: vol_cm3 = (total_mass / 6.022e23) / target_density vol_A3 = vol_cm3 * 1e24 else: cls = _classify_compound(composition) if packing_factor is None: packing_factor = PACKING_FACTORS.get(cls, PACKING_FACTORS["default"]) total_sphere_vol = 0.0 for sym, count in composition.items(): r = _radius_for_density(sym, cls, composition=composition) total_sphere_vol += count * (4.0 / 3.0) * np.pi * r ** 3 vol_A3 = total_sphere_vol / (packing_factor * density_scale) return vol_A3 ** (1.0 / 3.0)
# ============================================================================== # Auto-derivation log line # ==============================================================================
[docs] def format_auto_derive_summary( composition: dict, target_cn: dict | None, minsep: dict, est_density: float, cell_length: float, ) -> str: """Single-line summary of the auto-derivation chain for the random_gen log. Captures the chemistry-informed decisions AmorphGen makes from a bare composition: material class, oxidation states (where inferable), per-cation target coordination, per-pair bond classification + Pauling Δχ + minsep, and the final estimated density / cell length. Designed to fit on one log line for typical 2–4 element systems (e.g. Ga₂O₃ ≈ 200 chars). Format: [auto-derive] <formula> → <class>, OS{...}, CN{...}, minsep{pair:val class [Δχ=val] | ...}, ρ=<val> g/cm³, L=<val> Å For ionic pairs (where the Pauling Δχ < 1.0 refinement fires) the Δχ value is shown so the user can see exactly why the pair was classified as ionic vs covalent. For metallic and anion-packing pairs the Δχ is omitted (the type rule alone decides those). """ # Composition string in the order the user provided formula = "".join(f"{s}{n}" for s, n in composition.items()) # Material class cls = _classify_compound(composition) # Oxidation states — only for cations (positive OS, where inferable) os_parts = [] for elem in composition: os_val = infer_oxidation_state(elem, composition) if os_val is not None and os_val > 0: os_parts.append(f"{elem}:+{os_val}") os_str = f"OS{{{', '.join(os_parts)}}}" if os_parts else "" # Target CN per element cn_str = "" if target_cn: cn_parts = [f"{e}:{cn}" for e, cn in target_cn.items()] cn_str = f"CN{{{', '.join(cn_parts)}}}" # Per-pair: minsep + bond class label (+ Δχ for ionic) pair_parts = [] for pair in sorted(minsep): a, b = pair.split("-") d = minsep[pair] if a == b and a in NONMETALS: label = "anion-pack" else: pair_cls = classify_bond(a, b) if pair_cls == "ionic": dchi = abs(PAULING_EN.get(a, 0.0) - PAULING_EN.get(b, 0.0)) label = f"ionic Δχ={dchi:.2f}" elif pair_cls == "metallic": label = "metallic" elif pair_cls == "covalent": label = "covalent" else: label = pair_cls pair_parts.append(f"{pair}:{d:.2f} {label}") minsep_str = f"minsep{{{' | '.join(pair_parts)}}}" # Assemble — comma-separated top-level fields parts = [f"[auto-derive] {formula}{cls}"] if os_str: parts.append(os_str) if cn_str: parts.append(cn_str) parts.append(minsep_str) parts.append(f"ρ={est_density:.2f} g/cm³ L={cell_length:.2f} Å") return ", ".join(parts)