"""Utilities to manipulate data.
Reads variables and selects specific conditions. Requires
['meta']['dimensions'] subdictionaries provided by tp load modules.
"""
#Functions
#---------
#
# merge:
# merge data from multiple files.
# resolve:
# selects data based on dependent variables.
#"""
import numpy as np
import warnings
from copy import deepcopy
[docs]def merge(data, dependent):
"""Merges data dictionaries with shared dependent variables.
Particularly with amset in mind, to relieve memory constraints.
Currently works for one dependent variable, so looping will often
be required. An output in the form [ab,cd] would require three
merges: a to b, c to d and ab to cd (or a to c etc.).
Arguments
---------
data : list of dicts
data to merge. Requires tp metadata and dependent variable.
dependent : string
dependent variable.
Returns
-------
dict
merged data.
"""
depi = [list(range(len(data[0][dependent])))]
data2 = {dependent: data[0][dependent],
'meta': data[0]['meta']}
for d in data[1:]:
depi.append([d[dependent].index(d2) for d2 in d[dependent]\
if d2 not in data[0][dependent]])
data2[dependent].extend(np.array(d[dependent])[depi[-1]])
for key in data[0].keys():
if key in [dependent, 'meta']:
continue
else:
present = True
for d in data[1:]:
if key not in d.keys():
present = False
break
if present and dependent in data[0]['meta']['dimensions'][key]:
axis = data[0]['meta']['dimensions'][key].index(dependent)
for d in data:
d[key] = np.swapaxes(d[key], axis, 0)
data2[key] = np.concatenate([np.array(data[i][key])[depi[i]] for i in range(len(data))], axis=0)
for d in data:
d[key] = np.swapaxes(d[key], 0, axis)
elif key not in data2 and key in data[0]:
data2[key] = data[0][key]
return data2
[docs]def resolve(data, quantities, **kwargs):
"""Selects particular values of arbitrary quantities.
Requires the meta/dimensions dictionaries found in later versions
of tp. Currently cannot accept dictionary keys (e.g. dtype='n') if
they are not in the 0th index.
Arguments
---------
data : dict
data with meta/dimensions dictionaries and quantities.
quantities : array-like or str
quantities to resolve
kwargs
dimesions to resolve. Rounds to nearest available value.
Common options include:
direction
direction to resolve, accepts x-z/, a-c,
average/ avg/ mean/ arithmetic/ arith, or
norm/ normal or harmonic/ harm.
dtype
n or p.
stype
codes from amset, e.g. IMP, or overall.
doping
concentration, not to be confused with dtype.
temperature
temperature.
Returns
-------
dict
resolved data.
"""
data = deepcopy(data) # sever the link to enable the original data to be reused
if 'meta' not in data or 'dimensions' not in data['meta']:
raise Exception('data must have a meta subdictionary with a '
'dimensions subdictionary.')
if isinstance(quantities, str):
quantities = quantities.split()
direction = {'a': 0, 'b': 1, 'c': 2,
'x': 0, 'y': 1, 'z': 2}
# make sure dictionaries are dealt with first
keys, vals = [], []
for key, val in kwargs.items():
if isinstance(val, str) and key != 'direction':
keys.insert(0, key)
vals.insert(0, val)
else:
keys.append(key)
vals.append(val)
for q in quantities:
if q not in data['meta']['dimensions']:
warnings.warn('{} not in dimensions. Skipping.'.format(q))
continue
for key, val in zip(keys, vals):
if val is None:
continue
if key != 'direction':
if key not in data and key not in ['dtype', 'stype']:
warnings.warn('{} not in data. Skipping.'.format(key))
continue
if key not in data['meta']['dimensions'][q]:
continue
if isinstance(val, str):
data['meta'][key] = val
for i, d in enumerate(data['meta']['dimensions'][q]):
if d == key:
if i == 0:
del data['meta']['dimensions'][q][i]
if key in data and val in data[key]:
pos = data[key].index(val)
data[q] = data[q][pos]
else:
data[q] = data[q][val]
data['meta'][key] = val
break
else:
warnings.warn('Does not currently work '
'unless strings are in '
'the 0th index.')
break
elif isinstance(val, (int, float, list, np.ndarray)):
if isinstance(val, (int, float)):
index = np.abs(np.subtract(data[key], val)).argmin()
else:
index = np.sqrt(np.sum(np.square(
np.subtract(data[key], val)), axis=1)).argmin()
data['meta'][key] = data[key][index]
for i, d in enumerate(data['meta']['dimensions'][q]):
if d == key:
del data['meta']['dimensions'][q][i]
data[q] = np.moveaxis(data[q], i, 0)
data[q] = data[q][index]
data['meta'][key] = data[key][index]
break
else: # if key == 'direction':
while True:
for i, d in enumerate(data['meta']['dimensions'][q]):
if d in [3, 6]:
del data['meta']['dimensions'][q][i]
data[q] = np.moveaxis(data[q], i, 0)
if val in direction:
data[q] = data[q][direction[val]]
elif val in ['mean', 'arithmetic', 'arith', 'average', 'avg']:
if len(data['meta']['dimensions'][q]) > i and \
data['meta']['dimensions'][q][i] == 3:
# if this is a 3x3 array
del data['meta']['dimensions'][q][i]
data[q] = np.moveaxis(data[q], i+1, 1)
data[q] = np.average([data[q][0][0],
data[q][1][1],
data[q][2][2]],
axis=0)
else:
# if this is a 3x1 or 6x1 array
data[q] = np.average(data[q][:3], axis=0)
data['meta'][key] = 'arithmetic mean'
elif val in ['norm', 'normal']:
if len(data['meta']['dimensions'][q]) > i and \
data['meta']['dimensions'][q][i] == 3:
# if this is a 3x3 array
del data['meta']['dimensions'][q][i]
data[q] = np.moveaxis(data[q], i+1, 1)
data[q] = np.square(data[q][0][0]) \
+ np.square(data[q][1][1]) \
+ np.square(data[q][2][2])
data[q] = np.sqrt(data[q])
else:
# if this is a 3x1 or 6x1 array
data[q] = np.square(data[q][0]) \
+ np.square(data[q][1]) \
+ np.square(data[q][2])
data[q] = np.sqrt(data[q])
data['meta'][key] = 'norm'
elif val in ['harmonic', 'harm']:
if len(data['meta']['dimensions'][q]) > i and \
data['meta']['dimensions'][q][i] == 3:
# if this is a 3x3 array
del data['meta']['dimensions'][q][i]
data[q] = np.moveaxis(data[q], i+1, 1)
data[q] = 1/np.average([1/data[q][0][0],
1/data[q][1][1],
1/data[q][2][2]],
axis=0)
else:
# if this is a 3x1 or 6x1 array
data[q] = 1/np.average([1/data[q][0],
1/data[q][1],
1/data[q][2]],
axis=0)
data['meta'][key] = 'harmonic mean'
break
else:
break
return data