Source code for amorphgen.analysis.analyser

"""
StructureAnalyser — main class that delegates to submodules.

Usage
-----
    from amorphgen.analysis import StructureAnalyser

    sa = StructureAnalyser("output_dir/", cutoff="auto")
    sa.summary()
    sa.plot(output_dir="plots/")
"""

from __future__ import annotations

import os
import glob
import numpy as np
from collections import defaultdict
from ase.io import read

from .cutoff import auto_cutoff_minsep, auto_cutoff_rdf
from .structure import (compute_density, compute_coordination,
                        compute_bond_distances, compute_all_angles,
                        compute_bond_angle_stats, build_neighbour_dict)
from .rdf import (compute_rdf, compute_structure_factor,
                  compute_averaged_rdf)
from .rings import compute_ring_statistics
from .voronoi import compute_voronoi
from .energy import compute_energy_ranking
from .plotting import plot_analysis


[docs] class StructureAnalyser: """ Analyse amorphous structures: density, coordination, distances, angles, RDF, S(q), rings, Voronoi, energy ranking. Parameters ---------- source : str or list of str Path to a structure file, a directory, or a list of file paths. cutoff : float, dict, or str - float: single cutoff (A) for all pairs - dict: pair-specific, e.g. {"Si-O": 2.2} - "auto": from bonding radii (fast) - "auto-rdf": from RDF first minimum (accurate) """
[docs] def __init__(self, source, cutoff="auto"): """ Parameters ---------- source : str, list of str, or list of Atoms Path to a structure file, a directory of structure files, a list of file paths, or a list of ASE Atoms objects. cutoff : float, dict, or str Neighbour cutoff for CN, bond distances, and angles. - float: single cutoff (A) for all pairs - dict: pair-specific, e.g. {"Si-O": 2.2, "O-O": 2.8} - "auto": from bonding radii (fast, default) - "auto-rdf": from RDF first minimum (more accurate) """ self.atoms_list = self._load(source) if not self.atoms_list: raise ValueError("No structures loaded. Check the source path.") # Warn if structures have different compositions formulas = set(a.get_chemical_formula(mode="hill") for a in self.atoms_list) if len(formulas) > 1: import warnings warnings.warn( f"Structures have different compositions: {formulas}. " f"Analysis assumes uniform composition; energy/atom and " f"CN statistics may mix incompatible systems." ) self._cutoff_mode = cutoff if cutoff == "auto": self.cutoff = auto_cutoff_minsep(self.atoms_list) self._cutoff_mode = "auto (minsep)" elif cutoff == "auto-rdf": self.cutoff = auto_cutoff_rdf(self.atoms_list) self._cutoff_mode = "auto (RDF)" else: self.cutoff = cutoff self._max_cutoff = ( max(self.cutoff.values()) if isinstance(self.cutoff, dict) else self.cutoff )
@staticmethod def _load(source): from ase import Atoms if isinstance(source, list): if source and isinstance(source[0], Atoms): return source return [read(f) for f in source] if os.path.isdir(source): files = sorted( glob.glob(os.path.join(source, "*.xyz")) + glob.glob(os.path.join(source, "*.extxyz")) + glob.glob(os.path.join(source, "*.cif")) + glob.glob(os.path.join(source, "*.vasp")) ) if not files: raise FileNotFoundError(f"No structure files in {source}/") return [read(f) for f in files] return [read(source)] def _get_cutoff(self, s1, s2): if isinstance(self.cutoff, (int, float)): return self.cutoff key1 = f"{s1}-{s2}" key2 = f"{s2}-{s1}" return self.cutoff.get(key1, self.cutoff.get(key2, self._max_cutoff)) def _build_neighbour_dict(self, atoms): return build_neighbour_dict(atoms, self._max_cutoff, self._get_cutoff) # ── Core analysis methods ────────────────────────────────────────────
[docs] def density(self): """Compute density (g/cm3) for each structure. Returns ------- dict {"values": list[float], "mean": float, "std": float} """ return compute_density(self.atoms_list)
[docs] def coordination(self, pair=None): """Compute coordination numbers across all structures. Parameters ---------- pair : str, optional Pair to analyse, e.g. "Si-O". If None, all pairs computed. Returns ------- dict Keyed by pair string. Each value is a dict with "mean", "std", "min", "max", "distribution", "total_atoms". """ return compute_coordination(self.atoms_list, self._max_cutoff, self._get_cutoff, pair)
[docs] def bond_distances(self, pair=None): """Compute bond distance statistics. Parameters ---------- pair : str, optional Pair to analyse, e.g. "Si-O". If None, all pairs computed. Returns ------- dict Keyed by pair string. Each value is a dict with "mean", "std", "min", "max", "count". """ return compute_bond_distances(self.atoms_list, self._max_cutoff, self._get_cutoff, pair)
def _compute_all_angles(self, triplet=None, bonding_only=True): return compute_all_angles(self.atoms_list, self._max_cutoff, self._get_cutoff, triplet, bonding_only)
[docs] def bond_angles(self, triplet=None): """Compute bond angle statistics. Parameters ---------- triplet : str, optional Triplet to analyse, e.g. "O-Si-O". If None, all triplets computed. Returns ------- dict Keyed by triplet string. Each value is a dict with "mean", "std", "min", "max", "count" (angles in degrees). """ raw = self._compute_all_angles(triplet) return compute_bond_angle_stats(raw)
# ── RDF and S(q) ────────────────────────────────────────────────────
[docs] def rdf(self, pair=None, rmax=None, nbins=200, sigma=0.0): """Compute the radial distribution function g(r). Parameters ---------- pair : str, optional Pair to analyse, e.g. "Si-O". If None, total RDF. rmax : float, optional Maximum radius in A. Auto-detected from cell if None. nbins : int Number of histogram bins (default 200). sigma : float Gaussian smearing width in A (default 0.0 = no smearing). Use 0.02-0.05 for comparison with experiment. Returns ------- dict {"r": list[float], "g_r": list[float]} """ return compute_rdf(self.atoms_list, pair, rmax, nbins, sigma=sigma)
[docs] def structure_factor(self, pair=None, qmax=15.0, nq=300, rmax=None): """Compute the structure factor S(q) from g(r) via Fourier transform. Parameters ---------- pair : str, optional Pair to analyse. If None, total S(q). qmax : float Maximum q in 1/A (default 15.0). nq : int Number of q points (default 300). rmax : float, optional Maximum radius for RDF used in the transform. Returns ------- dict {"q": list[float], "s_q": list[float]} """ return compute_structure_factor(self.atoms_list, pair, qmax, nq, rmax)
[docs] def averaged_rdf(self, pair=None, rmax=None, nbins=200): """Compute RDF per structure with mean and standard deviation. Parameters ---------- pair : str, optional Pair to analyse, e.g. "Si-O". If None, total RDF. rmax : float, optional Maximum radius in A. Auto-detected from cell if None. nbins : int Number of histogram bins (default 200). Returns ------- dict {"r": list, "g_r_mean": list, "g_r_std": list, "n_structures": int} """ return compute_averaged_rdf(self.atoms_list, pair, rmax, nbins)
# ── Advanced analysis ───────────────────────────────────────────────
[docs] def ring_statistics(self, bond_pair=None, cutoff=None, max_ring=12): """Compute ring size statistics for network-forming structures. Parameters ---------- bond_pair : tuple of str, optional Bond pair to trace, e.g. ("Si", "O"). Auto-detected if None. cutoff : float or dict, optional Bond cutoff. Uses analyser cutoff if None. max_ring : int Maximum ring size to search (default 12). Returns ------- dict {"ring_sizes": list, "counts": list, "fractions": list, "bond_pair": tuple, "total_rings": int} """ return compute_ring_statistics( self.atoms_list, bond_pair, cutoff, max_ring, self._get_cutoff)
[docs] def voronoi(self, element=None): """Compute Voronoi tessellation indices (n3, n4, n5, n6). Parameters ---------- element : str, optional Element to analyse. If None, all atoms included. Returns ------- dict {"indices": list, "distribution": dict, "top_10": list, "mean_faces": float, "total_atoms": int} """ return compute_voronoi(self.atoms_list, element)
[docs] def energy_ranking(self): """Rank structures by potential energy per atom. Reads energy from atoms.info or attached calculator. Returns ------- dict {"energies_per_atom": dict, "ranking": list, "best": int, "worst": int, "best_energy": float, "worst_energy": float, "spread": float}. Returns "warning" key if no energy data. """ return compute_energy_ranking(self.atoms_list)
# ── Multi-structure averaging ───────────────────────────────────────
[docs] def averaged_cn(self, pair=None): """Compute per-structure mean CN with overall mean and std. Parameters ---------- pair : str, optional Pair to analyse, e.g. "Si-O". If None, all pairs computed. Returns ------- dict Keyed by pair string. Each value is a dict with "mean_per_structure", "overall_mean", "overall_std", "n_structures". """ cn_per_structure = defaultdict(list) for atoms in self.atoms_list: nbr_dict, syms = self._build_neighbour_dict(atoms) unique = sorted(set(syms)) n = len(atoms) for s1 in unique: for s2 in unique: if pair is not None: p = pair.split("-") if not ((s1 == p[0] and s2 == p[1]) or (s1 == p[1] and s2 == p[0])): continue key = f"{s1}-{s2}" cns = [] for a in range(n): if syms[a] != s1: continue cn = sum(1 for _, sj, _, _ in nbr_dict[a] if sj == s2) cns.append(cn) if cns: cn_per_structure[key].append(np.mean(cns)) result = {} for key, means in cn_per_structure.items(): means = np.array(means) result[key] = { "mean_per_structure": means.tolist(), "overall_mean": float(np.mean(means)), "overall_std": float(np.std(means)), "n_structures": len(means), } return result
# ── Summary and reporting ───────────────────────────────────────────
[docs] def summary(self, show_angles=True): lines = [] bar = "=" * 65 n_structs = len(self.atoms_list) formula = self.atoms_list[0].get_chemical_formula(mode="hill") n_atoms = len(self.atoms_list[0]) lines.append(f"\n{bar}") lines.append(f" Structural Analysis: {formula} ({n_atoms} atoms)") lines.append(f" N structures: {n_structs}") lines.append(f" Cutoff mode: {self._cutoff_mode}") if isinstance(self.cutoff, dict): for pair in sorted(self.cutoff): lines.append(f" {pair}: {self.cutoff[pair]:.2f} A") else: lines.append(f" All pairs: {self.cutoff} A") lines.append(bar) d = self.density() lines.append(f"\n Density: {d['mean']:.2f} +/- {d['std']:.2f} g/cm3") bd = self.bond_distances() if bd: lines.append(f"\n Bond distances:") lines.append(f" {'Pair':<10} {'Mean (A)':>10} {'Std':>8} " f"{'Min':>8} {'Max':>8} {'Count':>8}") lines.append(f" {'-'*54}") for pair, data in bd.items(): lines.append( f" {pair:<10} {data['mean']:>10.3f} {data['std']:>7.3f} " f"{data['min']:>8.3f} {data['max']:>8.3f} " f"{data['count']:>8}") cn = self.coordination() if cn: try: from ..pipeline.random_gen import _classify_bond except ImportError: from amorphgen.pipeline.random_gen import _classify_bond bonding_cn = {} nonbonded_cn = {} for pair, data in cn.items(): s1, s2 = pair.split("-") bt = _classify_bond(s1, s2) if bt == "ionic" or (bt == "covalent" and s1 != s2): bonding_cn[pair] = data else: nonbonded_cn[pair] = data def _fmt(pair, data): l1 = (f" {pair}: mean={data['mean']:.1f} +/- " f"{data['std']:.1f} [{data['min']},{data['max']}]") parts = [f"CN={cn}: {pct:.1f}%" for cn, pct in sorted(data["distribution"].items()) if pct >= 0.5] l2 = " Distribution: " + ", ".join(parts) return [l1, l2] if bonding_cn: lines.append(f"\n Bonding coordination numbers:") for pair, data in bonding_cn.items(): lines.extend(_fmt(pair, data)) if nonbonded_cn: nb = {k: v for k, v in nonbonded_cn.items() if v["mean"] > 0.05} if nb: lines.append(f"\n Non-bonded contacts:") for pair, data in nb.items(): lines.extend(_fmt(pair, data)) if show_angles: ba = self.bond_angles() if ba: lines.append(f"\n Bond angles:") lines.append(f" {'Triplet':<12} {'Mean (deg)':>10} " f"{'Std':>8} {'Count':>8}") lines.append(f" {'-'*42}") for triplet, data in ba.items(): lines.append( f" {triplet:<12} {data['mean']:>10.1f} " f"{data['std']:>7.1f} {data['count']:>8}") lines.append(f"\n{bar}\n") text = "\n".join(lines) print(text) return text
[docs] def per_structure_summary(self) -> str: """ Analyse each structure individually and produce a comparison table. Returns ------- str — formatted table (also printed). """ lines = [] bar = "=" * 80 formula = self.atoms_list[0].get_chemical_formula(mode="hill") n_atoms = len(self.atoms_list[0]) lines.append(f"\n{bar}") lines.append(f" Per-Structure Analysis: {formula} ({n_atoms} atoms)") lines.append(f" N structures: {len(self.atoms_list)}") lines.append(bar) # Determine bonding pairs for CN try: from ..pipeline.random_gen import _classify_bond except ImportError: from amorphgen.pipeline.random_gen import _classify_bond unique = sorted(set(self.atoms_list[0].get_chemical_symbols())) bonding_pairs = [] for s1 in unique: for s2 in unique: bt = _classify_bond(s1, s2) if bt == "ionic" or (bt == "covalent" and s1 != s2): bonding_pairs.append(f"{s1}-{s2}") # If single element, use same-species if not bonding_pairs: bonding_pairs = [f"{unique[0]}-{unique[0]}"] # Header cn_headers = [f"CN({p})" for p in bonding_pairs[:3]] header = f" {'#':<5} {'Density':>8} {'E/atom':>10}" for h in cn_headers: header += f" {h:>10}" lines.append(f"\n{header}") lines.append(f" {'-'*(len(header)-2)}") # Per-structure data all_densities = [] all_energies = [] all_cns = {p: [] for p in bonding_pairs[:3]} for i, atoms in enumerate(self.atoms_list): sa_single = StructureAnalyser([atoms], cutoff=self.cutoff) # Density d = sa_single.density() dens = d["mean"] all_densities.append(dens) # Energy (per atom, using THIS structure's atom count) n_at = len(atoms) e_str = "" for key in ['energy', 'Energy', 'potential_energy']: if key in atoms.info: e = atoms.info[key] / n_at all_energies.append(e) e_str = f"{e:.4f}" break if not e_str: try: e = atoms.get_potential_energy() / n_at all_energies.append(e) e_str = f"{e:.4f}" except Exception: e_str = "N/A" # CN for bonding pairs cn = sa_single.coordination() row = f" {i:<5} {dens:>8.2f} {e_str:>10}" for p in bonding_pairs[:3]: cn_val = cn.get(p, {}).get("mean", 0) all_cns[p].append(cn_val) row += f" {cn_val:>10.1f}" lines.append(row) # Summary statistics lines.append(f" {'-'*(len(header)-2)}") mean_row = f" {'Mean':<5} {np.mean(all_densities):>8.2f}" std_row = f" {'Std':<5} {np.std(all_densities):>8.2f}" if all_energies: mean_row += f" {np.mean(all_energies):>10.4f}" std_row += f" {np.std(all_energies):>10.4f}" else: mean_row += f" {'N/A':>10}" std_row += f" {'N/A':>10}" for p in bonding_pairs[:3]: mean_row += f" {np.mean(all_cns[p]):>10.1f}" std_row += f" {np.std(all_cns[p]):>10.1f}" lines.append(mean_row) lines.append(std_row) lines.append(f"\n{bar}\n") text = "\n".join(lines) print(text) return text
[docs] def save_report(self, filepath, text=None, show_angles=True): """Save the summary report to a text file. Parameters ---------- filepath : str Output file path. text : str, optional Pre-computed report text. If None, calls summary(). show_angles : bool Include bond angles in the report (default True). """ if text is None: text = self.summary(show_angles=show_angles) with open(filepath, "w") as f: f.write(text) print(f" Report saved: {filepath}")
[docs] def plot(self, **kwargs): """Generate and save analysis plots (RDF, CN distribution, bond angles). Parameters ---------- output_dir : str Directory for output files (default "."). prefix : str Filename prefix (default "analysis"). rdf_pairs : list of str, optional Pairs to plot, e.g. ["Si-O", "O-O"]. If None, all pairs. rmax : float, optional Maximum radius for RDF. Auto-detected if None. save_csv : bool Also save raw data as CSV files (default True). """ return plot_analysis(self, **kwargs)