diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index 9b9f6420..54ce4ad4 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -33,7 +33,7 @@ jobs: fail-fast: false max-parallel: 100 matrix: - python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] + python-version: ["3.9", "3.10", "3.11", "3.12"] steps: - uses: actions/checkout@v4 with: @@ -52,7 +52,7 @@ jobs: fail-fast: false max-parallel: 100 matrix: - python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] + python-version: ["3.9", "3.10", "3.11", "3.12"] steps: - uses: actions/checkout@v4 with: diff --git a/espei/error_functions/activity_error.py b/espei/error_functions/activity_error.py index 0db807b8..a2d5e239 100644 --- a/espei/error_functions/activity_error.py +++ b/espei/error_functions/activity_error.py @@ -14,10 +14,11 @@ import numpy as np import numpy.typing as npt import tinydb -from pycalphad import Database, equilibrium, variables as v +from pycalphad import Database, variables as v from pycalphad.plot.eqplot import _map_coord_to_variable -from pycalphad.core.utils import filter_phases, unpack_components +from pycalphad.core.utils import filter_phases, unpack_species from scipy.stats import norm +from pycalphad import Workspace from espei.core_utils import ravel_conditions from espei.error_functions.residual_base import ResidualFunction, residual_function_registry @@ -28,7 +29,7 @@ _log = logging.getLogger(__name__) -def target_chempots_from_activity(component, target_activity, temperatures, reference_result): +def target_chempots_from_activity(component, target_activity, temperatures, wks_ref): """ Return an array of experimental chemical potentials for the component @@ -50,8 +51,9 @@ def target_chempots_from_activity(component, target_activity, temperatures, refe """ # acr_i = exp((mu_i - mu_i^{ref})/(RT)) # so mu_i = R*T*ln(acr_i) + mu_i^{ref} - ref_chempot = reference_result["MU"].sel(component=component).values.flatten() - return v.R * temperatures * np.log(target_activity) + ref_chempot + ref_chempot = wks_ref.get(v.MU(component)) + exp_chem_pots = v.R * temperatures * np.log(target_activity) + ref_chempot + return exp_chem_pots # TODO: roll this function into ActivityResidual @@ -86,11 +88,9 @@ def calculate_activity_residuals(dbf, comps, phases, datasets, parameters=None, # data_comps and data_phases ensures that we only do calculations on # the subsystem of the system defining the data. data_comps = ds['components'] - data_phases = filter_phases(dbf, unpack_components(dbf, data_comps), candidate_phases=phases) + data_phases = filter_phases(dbf, unpack_species(dbf, data_comps), candidate_phases=phases) ref_conditions = {_map_coord_to_variable(coord): val for coord, val in ref['conditions'].items()} - ref_result = equilibrium(dbf, data_comps, ref['phases'], ref_conditions, - model=phase_models, parameters=parameters, - callables=callables) + wks_ref = Workspace(database=dbf, components=data_comps, phases=ref['phases'], conditions=ref_conditions, parameters=parameters) # calculate current chemical potentials # get the conditions @@ -113,15 +113,13 @@ def calculate_activity_residuals(dbf, comps, phases, datasets, parameters=None, # assume now that the ravelled conditions all have the same size conditions_list = [{c: conditions[c][i] for c in conditions.keys()} for i in range(len(conditions[v.T]))] for conds in conditions_list: - sample_eq_res = equilibrium(dbf, data_comps, data_phases, conds, - model=phase_models, parameters=parameters, - callables=callables) - dataset_computed_chempots.append(float(sample_eq_res.MU.sel(component=acr_component).values.flatten()[0])) + wks_sample = Workspace(database=dbf, components=data_comps, phases=data_phases, conditions=conds, parameters=parameters) + dataset_computed_chempots.append(wks_sample.get(v.MU(acr_component))) dataset_weights.append(std_dev / data_weight / ds.get("weight", 1.0)) # calculate target chempots dataset_activities = np.array(ds['values']).flatten() - dataset_target_chempots = target_chempots_from_activity(acr_component, dataset_activities, conditions[v.T], ref_result) + dataset_target_chempots = target_chempots_from_activity(acr_component, dataset_activities, conditions[v.T], wks_ref) dataset_residuals = (np.asarray(dataset_computed_chempots) - np.asarray(dataset_target_chempots, dtype=float)).tolist() _log.debug('Data: %s, chemical potential difference: %s, reference: %s', dataset_activities, dataset_residuals, ds["reference"]) residuals.extend(dataset_residuals) @@ -203,7 +201,7 @@ def __init__( else: comps = sorted(database.elements) model_dict = dict() - phases = sorted(filter_phases(database, unpack_components(database, comps), database.phases.keys())) + phases = sorted(filter_phases(database, unpack_species(database, comps), database.phases.keys())) if symbols_to_fit is None: symbols_to_fit = database_symbols_to_fit(database) self._symbols_to_fit = symbols_to_fit diff --git a/espei/error_functions/equilibrium_thermochemical_error.py b/espei/error_functions/equilibrium_thermochemical_error.py index dc2b988d..01d31e71 100644 --- a/espei/error_functions/equilibrium_thermochemical_error.py +++ b/espei/error_functions/equilibrium_thermochemical_error.py @@ -11,16 +11,14 @@ import tinydb from tinydb import where from scipy.stats import norm -from pycalphad.plot.eqplot import _map_coord_to_variable from pycalphad import Database, Model, ReferenceState, variables as v -from pycalphad.core.equilibrium import _eqcalculate -from pycalphad.codegen.callables import build_phase_records -from pycalphad.core.utils import instantiate_models, filter_phases, extract_parameters, unpack_components, unpack_condition -from pycalphad.core.phase_rec import PhaseRecord +from pycalphad.core.utils import instantiate_models, filter_phases, extract_parameters, unpack_species, unpack_condition +from pycalphad.codegen.phase_record_factory import PhaseRecordFactory +from pycalphad import Workspace, as_property from espei.error_functions.residual_base import ResidualFunction, residual_function_registry from espei.phase_models import PhaseModelSpecification -from espei.shadow_functions import equilibrium_, calculate_, no_op_equilibrium_, update_phase_record_parameters +from espei.shadow_functions import update_phase_record_parameters from espei.typing import SymbolName from espei.utils import PickleableTinyDB, database_symbols_to_fit @@ -34,7 +32,7 @@ ('composition_conds', Sequence[Dict[v.X, float]]), ('models', Dict[str, Model]), ('params_keys', Dict[str, float]), - ('phase_records', Sequence[Dict[str, PhaseRecord]]), + ('phase_record_factory', PhaseRecordFactory), ('output', str), ('samples', np.ndarray), ('weight', np.ndarray), @@ -79,7 +77,7 @@ def build_eqpropdata(data: tinydb.database.Document, params_keys, _ = extract_parameters(parameters) data_comps = list(set(data['components']).union({'VA'})) - species = sorted(unpack_components(dbf, data_comps), key=str) + species = sorted(unpack_species(dbf, data_comps), key=str) data_phases = filter_phases(dbf, species, candidate_phases=data['phases']) models = instantiate_models(dbf, species, data_phases, model=model, parameters=parameters) output = data['output'] @@ -88,6 +86,7 @@ def build_eqpropdata(data: tinydb.database.Document, reference = data.get('reference', '') # Models are now modified in response to the data from this data + # TODO: build a reference state MetaProperty with the reference state information, maybe just-in-time, below if 'reference_states' in data: property_output = output[:-1] if output.endswith('R') else output # unreferenced model property so we can tell shift_reference_state what to build. reference_states = [] @@ -100,7 +99,7 @@ def build_eqpropdata(data: tinydb.database.Document, pot_conds = OrderedDict([(getattr(v, key), unpack_condition(data['conditions'][key])) for key in sorted(data['conditions'].keys()) if not key.startswith('X_')]) comp_conds = OrderedDict([(v.X(key[2:]), unpack_condition(data['conditions'][key])) for key in sorted(data['conditions'].keys()) if key.startswith('X_')]) - phase_records = build_phase_records(dbf, species, data_phases, {**pot_conds, **comp_conds}, models, parameters=parameters, build_gradients=True, build_hessians=True) + phase_record_factory = PhaseRecordFactory(dbf, species, {**pot_conds, **comp_conds}, models, parameters=parameters) # Now we need to unravel the composition conditions # (from Dict[v.X, Sequence[float]] to Sequence[Dict[v.X, float]]), since the @@ -115,7 +114,7 @@ def build_eqpropdata(data: tinydb.database.Document, dataset_weights = np.array(data.get('weight', 1.0)) * np.ones(total_num_calculations) weights = (property_std_deviation.get(property_output, 1.0)/data_weight_dict.get(property_output, 1.0)/dataset_weights).flatten() - return EqPropData(dbf, species, data_phases, pot_conds, rav_comp_conds, models, params_keys, phase_records, output, samples, weights, reference) + return EqPropData(dbf, species, data_phases, pot_conds, rav_comp_conds, models, params_keys, phase_record_factory, output, samples, weights, reference) def get_equilibrium_thermochemical_data(dbf: Database, comps: Sequence[str], @@ -197,38 +196,28 @@ def calc_prop_differences(eqpropdata: EqPropData, * weights for this dataset """ - if approximate_equilibrium: - _equilibrium = no_op_equilibrium_ - else: - _equilibrium = equilibrium_ - dbf = eqpropdata.dbf species = eqpropdata.species phases = eqpropdata.phases pot_conds = eqpropdata.potential_conds models = eqpropdata.models - phase_records = eqpropdata.phase_records - update_phase_record_parameters(phase_records, parameters) + phase_record_factory = eqpropdata.phase_record_factory + update_phase_record_parameters(phase_record_factory, parameters) params_dict = OrderedDict(zip(map(str, eqpropdata.params_keys), parameters)) - output = eqpropdata.output + output = as_property(eqpropdata.output) weights = np.array(eqpropdata.weight, dtype=np.float64) samples = np.array(eqpropdata.samples, dtype=np.float64) + wks = Workspace(database=dbf, components=species, phases=phases, models=models, phase_record_factory=phase_record_factory, parameters=params_dict) calculated_data = [] for comp_conds in eqpropdata.composition_conds: cond_dict = OrderedDict(**pot_conds, **comp_conds) - # str_statevar_dict must be sorted, assumes that pot_conds are. - str_statevar_dict = OrderedDict([(str(key), vals) for key, vals in pot_conds.items()]) - grid = calculate_(species, phases, str_statevar_dict, models, phase_records, pdens=50, fake_points=True) - multi_eqdata = _equilibrium(phase_records, cond_dict, grid) - # TODO: could be kind of slow. Callables (which are cachable) must be built. - propdata = _eqcalculate(dbf, species, phases, cond_dict, output, data=multi_eqdata, per_phase=False, callables=None, parameters=params_dict, model=models) - - if 'vertex' in propdata.data_vars[output][0]: - raise ValueError(f"Property {output} cannot be used to calculate equilibrium thermochemical error because each phase has a unique value for this property.") - - vals = getattr(propdata, output).flatten().tolist() - calculated_data.extend(vals) + wks.conditions = cond_dict + wks.parameters = params_dict # these reset models and phase_record_factory through depends_on -> lose Model.shift_reference_state, etc. + wks.models = models + wks.phase_record_factory = phase_record_factory + vals = wks.get(output) + calculated_data.extend(np.atleast_1d(vals).tolist()) calculated_data = np.array(calculated_data, dtype=np.float64) @@ -304,7 +293,7 @@ def __init__( else: comps = sorted(database.elements) model_dict = dict() - phases = sorted(filter_phases(database, unpack_components(database, comps), database.phases.keys())) + phases = sorted(filter_phases(database, unpack_species(database, comps), database.phases.keys())) if symbols_to_fit is None: symbols_to_fit = database_symbols_to_fit(database) # okay if parameters are initialized to zero, we only need the symbol names diff --git a/espei/error_functions/non_equilibrium_thermochemical_error.py b/espei/error_functions/non_equilibrium_thermochemical_error.py index c9c05db9..60dd806c 100644 --- a/espei/error_functions/non_equilibrium_thermochemical_error.py +++ b/espei/error_functions/non_equilibrium_thermochemical_error.py @@ -12,16 +12,19 @@ import numpy.typing as npt from symengine import Symbol from tinydb import where - +from pycalphad.codegen.phase_record_factory import PhaseRecordFactory from pycalphad import Database, Model, ReferenceState, variables as v -from pycalphad.codegen.callables import build_phase_records -from pycalphad.core.utils import unpack_components, get_pure_elements, filter_phases +from pycalphad.core.utils import unpack_species, get_pure_elements, filter_phases +from pycalphad import Workspace +from pycalphad.property_framework import IsolatedPhase +from pycalphad.property_framework.metaproperties import find_first_compset +from pycalphad.core.solver import Solver, SolverResult + from espei.datasets import Dataset from espei.core_utils import ravel_conditions, get_prop_data, filter_temperatures from espei.parameter_selection.redlich_kister import calc_interaction_product from espei.phase_models import PhaseModelSpecification -from espei.shadow_functions import calculate_, update_phase_record_parameters from espei.sublattice_tools import canonicalize, recursive_tuplify, tuplify from espei.typing import SymbolName from espei.utils import database_symbols_to_fit, PickleableTinyDB @@ -29,6 +32,45 @@ _log = logging.getLogger(__name__) +class NoSolveSolver(Solver): + def solve(self, composition_sets, conditions): + """ + Initialize a solver, but don't actually do anything - i.e. return the SolverResult as if the calculation converged. + + """ + spec = self.get_system_spec(composition_sets, conditions) + self._fix_state_variables_in_compsets(composition_sets, conditions) + state = spec.get_new_state(composition_sets) + # DO NOT: converged = spec.run_loop(state, 1000) + + if self.remove_metastable: + phase_idx = 0 + compsets_to_remove = [] + for compset in composition_sets: + # Mark unstable phases for removal + if compset.NP <= 0.0 and not compset.fixed: + compsets_to_remove.append(int(phase_idx)) + phase_idx += 1 + # Watch removal order here, as the indices of composition_sets are changing! + for idx in reversed(compsets_to_remove): + del composition_sets[idx] + + phase_amt = [compset.NP for compset in composition_sets] + + x = composition_sets[0].dof + state_variables = composition_sets[0].phase_record.state_variables + num_statevars = len(state_variables) + for compset in composition_sets[1:]: + x = np.r_[x, compset.dof[num_statevars:]] + x = np.r_[x, phase_amt] + chemical_potentials = np.array(state.chemical_potentials) + + if self.verbose: + print('Chemical Potentials', chemical_potentials) + print(np.asarray(x)) + return SolverResult(converged=True, x=x, chemical_potentials=chemical_potentials) + + # TODO: make into an object similar to how ZPF data works? FixedConfigurationCalculationData = NewType("FixedConfigurationCalculationData", Dict[str, Any]) @@ -117,10 +159,11 @@ def get_prop_samples(desired_data, constituents): Dictionary of condition kwargs for pycalphad's calculate and the expected values """ - # TODO: assumes T, P as conditions + # TODO: assumes T, P, N as conditions # calculate needs points, state variable lists, and values to compare to num_dof = sum(map(len, constituents)) calculate_dict = { + 'N': np.array([]), 'P': np.array([]), 'T': np.array([]), 'points': np.atleast_2d([[]]).reshape(-1, num_dof), @@ -133,6 +176,8 @@ def get_prop_samples(desired_data, constituents): # extract the data we care about datum_T = datum['conditions']['T'] datum_P = datum['conditions']['P'] + # TODO: fix this when N different from 1 allowed in pycalphad + datum_N = np.full_like(datum['values'], 1.0) configurations = datum['solver']['sublattice_configurations'] occupancies = datum['solver'].get('sublattice_occupancies') values = np.array(datum['values']) @@ -145,7 +190,7 @@ def get_prop_samples(desired_data, constituents): weights = np.broadcast_to(np.asarray(datum.get('weight', 1.0)), values.shape) # broadcast and flatten the conditions arrays - P, T = ravel_conditions(values, datum_P, datum_T) + P, T, N = ravel_conditions(values, datum_P, datum_T, datum_N) if occupancies is None: occupancies = [None] * len(configurations) @@ -156,6 +201,7 @@ def get_prop_samples(desired_data, constituents): # add everything to the calculate_dict calculate_dict['P'] = np.concatenate([calculate_dict['P'], P]) calculate_dict['T'] = np.concatenate([calculate_dict['T'], T]) + calculate_dict['N'] = np.concatenate([calculate_dict['N'], N]) calculate_dict['points'] = np.concatenate([calculate_dict['points'], np.tile(points, (values.shape[0]*values.shape[1], 1))], axis=0) calculate_dict['values'] = np.concatenate([calculate_dict['values'], values.flatten()]) calculate_dict['weights'].extend(weights.flatten()) @@ -240,7 +286,7 @@ def get_thermochemical_data(dbf, comps, phases, datasets, model=None, weight_dic else: symbols_to_fit = database_symbols_to_fit(dbf) - species_comps = set(unpack_components(dbf, comps)) + species_comps = set(unpack_species(dbf, comps)) # estimated from NIST TRC uncertainties property_std_deviation = { @@ -272,7 +318,7 @@ def get_thermochemical_data(dbf, comps, phases, datasets, model=None, weight_dic 'phase_name': phase_name, 'prop': prop, # needs the following keys to be added: - # species, calculate_dict, phase_records, model, output, weights + # species, calculate_dict, phase_record_factory, model, output, weights } # get all the data with these model exclusions if exclusion == tuple([]): @@ -286,7 +332,7 @@ def get_thermochemical_data(dbf, comps, phases, datasets, model=None, weight_dic model_cls = model.get(phase_name, Model) mod = model_cls(dbf, comps, phase_name, parameters=symbols_to_fit) if prop.endswith('_FORM'): - output = ''.join(prop.split('_')[:-1])+'R' + output = ''.join(prop.split('_')[:-1])+"R" mod.shift_reference_state(ref_states, dbf, contrib_mods={e: symengine.S.Zero for e in exclusion}) else: output = prop @@ -298,42 +344,85 @@ def get_thermochemical_data(dbf, comps, phases, datasets, model=None, weight_dic mod.endmember_reference_model.models[contrib] = symengine.S.Zero except AttributeError: mod.reference_model.models[contrib] = symengine.S.Zero - species = sorted(unpack_components(dbf, comps), key=str) - data_dict['species'] = species model_dict = {phase_name: mod} + species = sorted(unpack_species(dbf, comps), key=str) + data_dict['species'] = species statevar_dict = {getattr(v, c, None): vals for c, vals in calculate_dict.items() if isinstance(getattr(v, c, None), v.StateVariable)} statevar_dict = OrderedDict(sorted(statevar_dict.items(), key=lambda x: str(x[0]))) + phase_record_factory = PhaseRecordFactory(dbf, species, statevar_dict, model_dict, + parameters={s: 0 for s in symbols_to_fit}) str_statevar_dict = OrderedDict((str(k), vals) for k, vals in statevar_dict.items()) - phase_records = build_phase_records(dbf, species, [phase_name], statevar_dict, model_dict, - output=output, parameters={s: 0 for s in symbols_to_fit}, - build_gradients=False, build_hessians=False) data_dict['str_statevar_dict'] = str_statevar_dict - data_dict['phase_records'] = phase_records + data_dict['phase_record_factory'] = phase_record_factory data_dict['calculate_dict'] = calculate_dict data_dict['model'] = model_dict data_dict['output'] = output data_dict['weights'] = np.array(property_std_deviation[prop.split('_')[0]])/np.array(calculate_dict.pop('weights')) + data_dict['constituents'] = constituents all_data_dicts.append(data_dict) return all_data_dicts - -def compute_fixed_configuration_property_differences(calc_data: FixedConfigurationCalculationData, parameters): +def compute_fixed_configuration_property_differences(dbf, calc_data: FixedConfigurationCalculationData, parameters): + species = calc_data['species'] phase_name = calc_data['phase_name'] + models = calc_data['model'] # Dict[PhaseName: Model] output = calc_data['output'] - phase_records = calc_data['phase_records'] + phase_record_factory = calc_data['phase_record_factory'] sample_values = calc_data['calculate_dict']['values'] - - update_phase_record_parameters(phase_records, parameters) - results = calculate_(calc_data['species'], [phase_name], - calc_data['str_statevar_dict'], calc_data['model'], - phase_records, output=output, broadcast=False, - points=calc_data['calculate_dict']['points'])[output] - differences = results - sample_values + str_statevar_dict = calc_data['str_statevar_dict'] + + constituent_list = [] + sublattice_list = [] + counter = 0 + for sublattice in calc_data['constituents']: + for const in sublattice: + sublattice_list.append(counter) + constituent_list.append(const) + counter = counter + 1 + + differences = [] + for index in range(len(sample_values)): + cond_dict = {} + for sv_key, sv_val in str_statevar_dict.items(): + cond_dict.update({sv_key: sv_val[index]}) + + # Build internal DOF as if they were used in conditions + dof = {} + for site_frac in range(len(constituent_list)): + comp = constituent_list[site_frac] + occupancy = calc_data['calculate_dict']['points'][index,site_frac] + sublattice = sublattice_list[site_frac] + dof.update({v.Y(phase_name,sublattice,comp): occupancy}) + + # TODO: active_pure_elements should be replaced with wks.components when wks.components no longer includes phase constituent Species + # Build composition conditions, probably not necessary given that we don't actually solve anything, but still useful in terms of derivatives probably. + active_pure_elements = [list(x.constituents.keys()) for x in species] + active_pure_elements = sorted(set(el.upper() for constituents in active_pure_elements for el in constituents) - {"VA"}) + ind_comps = len(active_pure_elements) - 1 + for comp in active_pure_elements: + if v.Species(comp) != v.Species('VA') and ind_comps > 0: + cond_dict[v.X(comp)] = float(models[phase_name].moles(comp).xreplace(dof)) + ind_comps = ind_comps - 1 + # Need to be careful here. Making a workspace erases the custom models that have some contributions excluded (which are passed in). Not sure exactly why. + # The models themselves are preserved, but the ones inside the workspace's phase_record_factory get clobbered. + # We workaround this by replacing the phase_record_factory models with ours, but this is definitely a hack we'd like to avoid. + wks = Workspace(database=dbf, components=species, phases=[phase_name], conditions={**cond_dict}, models=models, phase_record_factory=phase_record_factory, parameters=parameters, solver=NoSolveSolver()) + # We then get a composition set and we use a special "NoSolveSolver" to + # ensure that we don't change from the data-specified DOF. + compset = find_first_compset(phase_name, wks) + new_sitefracs = np.array([sf for _, sf in sorted(dof.items(), key=lambda y: (y[0].phase_name, y[0].sublattice_index, y[0].species.name))]) + new_statevars = np.array(compset.dof[:len(compset.phase_record.state_variables)]) # no updates expected + compset.update(new_sitefracs, 1.0, new_statevars) + iso_phase = IsolatedPhase(compset, wks=wks) + iso_phase.solver = NoSolveSolver() + results = wks.get(iso_phase(output)) + sample_differences = results - sample_values[index] + differences.append(sample_differences) return differences -def calculate_non_equilibrium_thermochemical_probability(thermochemical_data: List[FixedConfigurationCalculationData], parameters=None): +def calculate_non_equilibrium_thermochemical_probability(thermochemical_data: List[FixedConfigurationCalculationData], dbf, parameters=None): """ Calculate the weighted single phase error in the Database @@ -351,13 +440,14 @@ def calculate_non_equilibrium_thermochemical_probability(thermochemical_data: Li """ if parameters is None: - parameters = np.array([]) + parameters = {} prob_error = 0.0 for data in thermochemical_data: phase_name = data['phase_name'] sample_values = data['calculate_dict']['values'] - differences = compute_fixed_configuration_property_differences(data, parameters) + differences = compute_fixed_configuration_property_differences(dbf, data, parameters) + differences = np.array(differences) probabilities = norm.logpdf(differences, loc=0, scale=data['weights']) prob_sum = np.sum(probabilities) _log.debug("%s(%s) - probability sum: %0.2f, data: %s, differences: %s, probabilities: %s, references: %s", data['prop'], phase_name, prob_sum, sample_values, differences, probabilities, data['calculate_dict']['references']) @@ -387,16 +477,18 @@ def __init__( else: comps = sorted(database.elements) model_dict = dict() - phases = sorted(filter_phases(database, unpack_components(database, comps), database.phases.keys())) + phases = sorted(filter_phases(database, unpack_species(database, comps), database.phases.keys())) if symbols_to_fit is None: symbols_to_fit = database_symbols_to_fit(database) self.thermochemical_data = get_thermochemical_data(database, comps, phases, datasets, model_dict, weight_dict=self.weight, symbols_to_fit=symbols_to_fit) + self._symbols_to_fit = symbols_to_fit + self.dbf = database def get_residuals(self, parameters: npt.ArrayLike) -> Tuple[List[float], List[float]]: residuals = [] weights = [] for data in self.thermochemical_data: - dataset_residuals = compute_fixed_configuration_property_differences(data, parameters).tolist() + dataset_residuals = compute_fixed_configuration_property_differences(self.dbf, data, dict(zip(self._symbols_to_fit, parameters))) residuals.extend(dataset_residuals) dataset_weights = np.asarray(data["weights"], dtype=float).flatten().tolist() if len(dataset_weights) != len(dataset_residuals): @@ -407,7 +499,8 @@ def get_residuals(self, parameters: npt.ArrayLike) -> Tuple[List[float], List[fl return residuals, weights def get_likelihood(self, parameters) -> float: - likelihood = calculate_non_equilibrium_thermochemical_probability(self.thermochemical_data, parameters) + parameters = {param_name: param for param_name, param in zip(self._symbols_to_fit, parameters.tolist())} + likelihood = calculate_non_equilibrium_thermochemical_probability(self.thermochemical_data, self.dbf, parameters) return likelihood diff --git a/espei/error_functions/zpf_error.py b/espei/error_functions/zpf_error.py index c1b0f9f3..f6d961e2 100644 --- a/espei/error_functions/zpf_error.py +++ b/espei/error_functions/zpf_error.py @@ -21,17 +21,15 @@ import numpy as np from numpy.typing import ArrayLike -from pycalphad import Database, Model, variables as v -from pycalphad.codegen.callables import build_phase_records -from pycalphad.core.utils import instantiate_models, filter_phases, unpack_components -from pycalphad.core.phase_rec import PhaseRecord -from pycalphad.core.calculate import _sample_phase_constitution -from pycalphad.core.utils import point_sample +from pycalphad import Database, Model, Workspace, variables as v +from pycalphad.property_framework import IsolatedPhase +from pycalphad.core.utils import filter_phases, unpack_species +from pycalphad.codegen.phase_record_factory import PhaseRecordFactory from scipy.stats import norm import tinydb from espei.phase_models import PhaseModelSpecification -from espei.shadow_functions import equilibrium_, calculate_, no_op_equilibrium_, update_phase_record_parameters, constrained_equilibrium +from espei.shadow_functions import calculate_, update_phase_record_parameters from espei.typing import SymbolName from espei.utils import PickleableTinyDB, database_symbols_to_fit from .residual_base import ResidualFunction, residual_function_registry @@ -44,8 +42,7 @@ class RegionVertex: phase_name: str composition: ArrayLike # 1D of size (number nonvacant pure elements) comp_conds: Dict[v.X, float] - points: ArrayLike - phase_records: Dict[str, PhaseRecord] + phase_record_factory: PhaseRecordFactory is_disordered: bool has_missing_comp_cond: bool @@ -119,24 +116,6 @@ def _compute_vertex_composition(comps: Sequence[str], comp_conds: Dict[str, floa return vertex_composition -def _subsample_phase_points(phase_record, phase_points, target_composition, additional_distance_radius=0.02): - # Compute the mole fractions of each point - phase_compositions = np.zeros((phase_points.shape[0], target_composition.size), order='F') - # TODO: potential bug here if the composition has dependence (even piecewise - # dependence) in the state variables. The compositions may be nan in this case. - statevar_placeholder = np.ones((phase_points.shape[0], phase_record.num_statevars)) - dof = np.hstack((statevar_placeholder, phase_points)) - for el_idx in range(target_composition.size): - phase_record.mass_obj_2d(phase_compositions[:, el_idx], dof, el_idx) - - # Find the points indicdes where the mass is within the radius of minimum distance + additional_distance_radius - distances = np.mean(np.abs(phase_compositions - target_composition), axis=1) - idxs = np.nonzero(distances < (distances.min() + additional_distance_radius))[0] - - # Return the sub-space of points where this condition holds valid - return phase_points[idxs] - - def get_zpf_data(dbf: Database, comps: Sequence[str], phases: Sequence[str], datasets: PickleableTinyDB, parameters: Dict[str, float], model: Optional[Dict[str, Type[Model]]] = None): """ Return the ZPF data used in the calculation of ZPF error @@ -162,18 +141,22 @@ def get_zpf_data(dbf: Database, comps: Sequence[str], phases: Sequence[str], dat desired_data = datasets.search((tinydb.where('output') == 'ZPF') & (tinydb.where('components').test(lambda x: set(x).issubset(comps))) & (tinydb.where('phases').test(lambda x: len(set(phases).intersection(x)) > 0))) + wks = Workspace(dbf, comps, phases, parameters=parameters) + if model is None: + model = {} zpf_data = [] # 1:1 correspondence with each dataset for data in desired_data: + current_wks = wks.copy() data_comps = list(set(data['components']).union({'VA'})) - species = sorted(unpack_components(dbf, data_comps), key=str) - data_phases = filter_phases(dbf, species, candidate_phases=phases) - models = instantiate_models(dbf, species, data_phases, model=model, parameters=parameters) - # assumed N, P, T state variables - phase_recs = build_phase_records(dbf, species, data_phases, {v.N, v.P, v.T}, models, parameters=parameters, build_gradients=True, build_hessians=True) - all_phase_points = {phase_name: _sample_phase_constitution(models[phase_name], point_sample, True, 50) for phase_name in data_phases} all_regions = data['values'] conditions = data['conditions'] + current_wks.components = data_comps + current_wks.conditions = conditions + current_wks.phases = phases + # only init models that are defined for the phases; fallback to default pycalphad behavior if no custom model defined + current_wks.models = {phase_name: model.get(phase_name, current_wks.models[phase_name]) + for phase_name in current_wks.phases} phase_regions = [] # Each phase_region is one set of phases in equilibrium (on a tie-line), # e.g. [["ALPHA", ["B"], [0.25]], ["BETA", ["B"], [0.5]]] @@ -187,45 +170,40 @@ def get_zpf_data(dbf: Database, comps: Sequence[str], phases: Sequence[str], dat for vertex in phase_region: phase_name, comp_conds, disordered_flag = _extract_phases_comps(vertex) composition = _compute_vertex_composition(data_comps, comp_conds) + # TODO: should maybe have different types of RegionVertex that are semantic for __HYPERPLANE__, known phase composition, unknown phase composition if phase_name.upper() == '__HYPERPLANE__': if np.any(np.isnan(composition)): # TODO: make this a part of the dataset checker raise ValueError(f"__HYPERPLANE__ vertex ({vertex}) must have all independent compositions defined to make a well-defined hyperplane (from dataset: {data})") - vtx = RegionVertex(phase_name, composition, comp_conds, None, phase_recs, disordered_flag, False) + vtx = RegionVertex(phase_name, composition, comp_conds, current_wks.phase_record_factory, disordered_flag, False) hyperplane_vertices.append(vtx) continue - # Construct single-phase points satisfying the conditions for each phase in the region - mod = models[phase_name] + mod = current_wks.models[phase_name] if np.any(np.isnan(composition)): - # We can't construct points because we don't have a known composition has_missing_comp_cond = True - phase_points = None elif _phase_is_stoichiometric(mod): has_missing_comp_cond = False - phase_points = None else: has_missing_comp_cond = False - # Only sample points that have an average mass residual within tol - tol = 0.02 - phase_points = _subsample_phase_points(phase_recs[phase_name], all_phase_points[phase_name], composition, tol) - assert phase_points.shape[0] > 0, f"phase {phase_name} must have at least one set of points within the target tolerance {pot_conds} {comp_conds}" - vtx = RegionVertex(phase_name, composition, comp_conds, phase_points, phase_recs, disordered_flag, has_missing_comp_cond) + vtx = RegionVertex(phase_name, composition, comp_conds, current_wks.phase_record_factory, disordered_flag, has_missing_comp_cond) vertices.append(vtx) if len(hyperplane_vertices) == 0: # Define the hyperplane at the vertices of the ZPF points hyperplane_vertices = vertices - region = PhaseRegion(hyperplane_vertices, vertices, pot_conds, species, data_phases, models) + region = PhaseRegion(hyperplane_vertices, vertices, pot_conds, current_wks.components, current_wks.phases, current_wks.models.unwrap()) phase_regions.append(region) data_dict = { 'weight': data.get('weight', 1.0), 'phase_regions': phase_regions, - 'dataset_reference': data['reference'] + 'dataset_reference': data['reference'], + 'dbf': dbf, # TODO: not ideal to ship databases across the wire, but we can accept it for now. + 'parameter_dict': parameters } zpf_data.append(data_dict) return zpf_data -def estimate_hyperplane(phase_region: PhaseRegion, parameters: np.ndarray, approximate_equilibrium: bool = False) -> np.ndarray: +def estimate_hyperplane(phase_region: PhaseRegion, dbf: Database, parameters: np.ndarray, approximate_equilibrium: bool = False) -> np.ndarray: """ Calculate the chemical potentials for the target hyperplane, one vertex at a time @@ -238,18 +216,14 @@ def estimate_hyperplane(phase_region: PhaseRegion, parameters: np.ndarray, appro are taken as the target hyperplane for the given equilibria. """ - if approximate_equilibrium: - _equilibrium = no_op_equilibrium_ - else: - _equilibrium = equilibrium_ target_hyperplane_chempots = [] - target_hyperplane_phases = [] species = phase_region.species phases = phase_region.phases models = phase_region.models + param_keys = list(models.values())[0]._parameters_arg + parameters_dict = dict(zip(sorted(map(str, param_keys)), parameters)) for vertex in phase_region.hyperplane_vertices: - phase_records = vertex.phase_records - update_phase_record_parameters(phase_records, parameters) + update_phase_record_parameters(vertex.phase_record_factory, parameters) cond_dict = {**vertex.comp_conds, **phase_region.potential_conds} if vertex.has_missing_comp_cond: # This composition is unknown -- it doesn't contribute to hyperplane estimation @@ -257,16 +231,14 @@ def estimate_hyperplane(phase_region: PhaseRegion, parameters: np.ndarray, appro else: # Extract chemical potential hyperplane from multi-phase calculation # Note that we consider all phases in the system, not just ones in this tie region - str_statevar_dict = OrderedDict([(str(key), cond_dict[key]) for key in sorted(phase_region.potential_conds.keys(), key=str)]) - grid = calculate_(species, phases, str_statevar_dict, models, phase_records, pdens=50, fake_points=True) - multi_eqdata = _equilibrium(phase_records, cond_dict, grid) - target_hyperplane_phases.append(multi_eqdata.Phase.squeeze()) - # Does there exist only a single phase in the result with zero internal degrees of freedom? - # We should exclude those chemical potentials from the average because they are meaningless. - num_phases = np.sum(multi_eqdata.Phase.squeeze() != '') - Y_values = multi_eqdata.Y.squeeze() + wks = Workspace(database=dbf, components=species, phases=phases, models=models, phase_record_factory=vertex.phase_record_factory, conditions=cond_dict, parameters=parameters_dict) + # TODO: active_pure_elements should be replaced with wks.components when wks.components no longer includes phase constituent Species + active_pure_elements = [list(x.constituents.keys()) for x in species] + active_pure_elements = sorted(set(el.upper() for constituents in active_pure_elements for el in constituents) - {"VA"}) + MU_values = [wks.get(v.MU(comp)) for comp in active_pure_elements] + num_phases = np.sum(wks.eq.Phase.squeeze() != '') + Y_values = wks.eq.Y.squeeze() no_internal_dof = np.all((np.isclose(Y_values, 1.)) | np.isnan(Y_values)) - MU_values = multi_eqdata.MU.squeeze() if (num_phases == 1) and no_internal_dof: target_hyperplane_chempots.append(np.full_like(MU_values, np.nan)) else: @@ -276,23 +248,24 @@ def estimate_hyperplane(phase_region: PhaseRegion, parameters: np.ndarray, appro def driving_force_to_hyperplane(target_hyperplane_chempots: np.ndarray, - phase_region: PhaseRegion, vertex: RegionVertex, - parameters: np.ndarray, approximate_equilibrium: bool = False) -> float: + phase_region: PhaseRegion, dbf: Database, parameter_dict, vertex: RegionVertex, + parameters: np.ndarray, approximate_equilibrium: bool = False) -> Tuple[float,List[float]]: """Calculate the integrated driving force between the current hyperplane and target hyperplane. """ species = phase_region.species models = phase_region.models + param_keys = list(models.values())[0]._parameters_arg + parameters_dict = dict(zip(sorted(map(str, param_keys)), parameters)) current_phase = vertex.phase_name cond_dict = {**phase_region.potential_conds, **vertex.comp_conds} str_statevar_dict = OrderedDict([(str(key),cond_dict[key]) for key in sorted(phase_region.potential_conds.keys(), key=str)]) - phase_points = vertex.points - phase_records = vertex.phase_records - update_phase_record_parameters(phase_records, parameters) - if phase_points is None: + phase_record_factory = vertex.phase_record_factory + update_phase_record_parameters(phase_record_factory, parameters) + if vertex.has_missing_comp_cond: # We don't have the phase composition here, so we estimate the driving force. # Can happen if one of the composition conditions is unknown or if the phase is # stoichiometric and the user did not specify a valid phase composition. - single_eqdata = calculate_(species, [current_phase], str_statevar_dict, models, phase_records, pdens=50) + single_eqdata = calculate_(species, [current_phase], str_statevar_dict, models, phase_record_factory, pdens=50) df = np.multiply(target_hyperplane_chempots, single_eqdata.X).sum(axis=-1) - single_eqdata.GM driving_force = float(df.max()) elif vertex.is_disordered: @@ -318,18 +291,13 @@ def driving_force_to_hyperplane(target_hyperplane_chempots: np.ndarray, sitefracs_to_add[np.isnan(sitefracs_to_add)] = 1 - np.nansum(sitefracs_to_add) desired_sitefracs[dof_idx:dof_idx + num_subl_dof] = sitefracs_to_add dof_idx += num_subl_dof - single_eqdata = calculate_(species, [current_phase], str_statevar_dict, models, phase_records, points=np.asarray([desired_sitefracs])) + single_eqdata = calculate_(species, [current_phase], str_statevar_dict, models, phase_record_factory, points=np.asarray([desired_sitefracs])) driving_force = np.multiply(target_hyperplane_chempots, single_eqdata.X).sum(axis=-1) - single_eqdata.GM driving_force = float(np.squeeze(driving_force)) else: - # Extract energies from single-phase calculations - grid = calculate_(species, [current_phase], str_statevar_dict, models, phase_records, points=phase_points, pdens=50, fake_points=True) - # TODO: consider enabling approximate for this? - converged, energy = constrained_equilibrium(phase_records, cond_dict, grid) - if not converged: - _log.debug('Calculation failure: constrained equilibrium not converged for %s, conditions: %s, parameters %s', current_phase, cond_dict, parameters) - return np.inf - driving_force = float(np.dot(target_hyperplane_chempots, vertex.composition) - float(energy)) + wks = Workspace(database=dbf, components=species, phases=current_phase, models=models, phase_record_factory=phase_record_factory, conditions=cond_dict, parameters=parameters_dict) + constrained_energy = wks.get(IsolatedPhase(current_phase,wks=wks)('GM')) + driving_force = np.dot(np.squeeze(target_hyperplane_chempots), vertex.composition) - constrained_energy return driving_force @@ -380,7 +348,7 @@ def calculate_zpf_driving_forces(zpf_data: Sequence[Dict[str, Any]], for phase_region in data['phase_regions']: # 1. Calculate the average multiphase hyperplane eq_str = phase_region.eq_str() - target_hyperplane = estimate_hyperplane(phase_region, parameters, approximate_equilibrium=approximate_equilibrium) + target_hyperplane = estimate_hyperplane(phase_region, data['dbf'], parameters, approximate_equilibrium=approximate_equilibrium) if np.any(np.isnan(target_hyperplane)): _log.debug('NaN target hyperplane. Equilibria: (%s), driving force: 0.0, reference: %s.', eq_str, dataset_ref) data_driving_forces.extend([0]*len(phase_region.vertices)) @@ -388,7 +356,7 @@ def calculate_zpf_driving_forces(zpf_data: Sequence[Dict[str, Any]], continue # 2. Calculate the driving force to that hyperplane for each vertex for vertex in phase_region.vertices: - driving_force = driving_force_to_hyperplane(target_hyperplane, phase_region, vertex, parameters, + driving_force = driving_force_to_hyperplane(target_hyperplane, phase_region, data['dbf'], data['parameter_dict'], vertex, parameters, approximate_equilibrium=approximate_equilibrium, ) if np.isinf(driving_force) and short_circuit: @@ -421,7 +389,7 @@ def calculate_zpf_error(zpf_data: Sequence[Dict[str, Any]], return 0.0 driving_forces, weights = calculate_zpf_driving_forces(zpf_data, parameters, approximate_equilibrium, short_circuit=True) # Driving forces and weights are 2D ragged arrays with the shape (len(zpf_data), len(zpf_data['values'])) - driving_forces = np.concatenate(driving_forces) + driving_forces = np.concatenate(driving_forces).T weights = np.concatenate(weights) if np.any(np.logical_or(np.isinf(driving_forces), np.isnan(driving_forces))): return -np.inf @@ -450,7 +418,7 @@ def __init__( else: comps = sorted(database.elements) model_dict = dict() - phases = sorted(filter_phases(database, unpack_components(database, comps), database.phases.keys())) + phases = sorted(filter_phases(database, unpack_species(database, comps), database.phases.keys())) if symbols_to_fit is None: symbols_to_fit = database_symbols_to_fit(database) # okay if parameters are initialized to zero, we only need the symbol names diff --git a/espei/optimizers/opt_mcmc.py b/espei/optimizers/opt_mcmc.py index f4820218..5d0cc865 100644 --- a/espei/optimizers/opt_mcmc.py +++ b/espei/optimizers/opt_mcmc.py @@ -289,12 +289,14 @@ def predict(params, **ctx): lnlike = 0.0 likelihoods = {} for residual_obj in ctx.get("residual_objs", []): + residual_starttime = time.time() likelihood = residual_obj.get_likelihood(params) - likelihoods[type(residual_obj).__name__] = likelihood + residual_time = time.time() - residual_starttime + likelihoods[type(residual_obj).__name__] = (likelihood, residual_time) lnlike += likelihood liketime = time.time() - starttime - like_str = ". ".join([f"{ky}: {vl:0.3f}" for ky, vl in likelihoods.items()]) + like_str = ". ".join([f"{ky}: {vl[0]:0.3f} ({vl[1]:0.2f} s)" for ky, vl in likelihoods.items()]) lnlike = np.array(lnlike, dtype=np.float64) _log.trace('Likelihood - %0.2fs - %s. Total: %0.3f.', liketime, like_str, lnlike) diff --git a/espei/optimizers/opt_scipy.py b/espei/optimizers/opt_scipy.py index a9629a95..93a5c014 100644 --- a/espei/optimizers/opt_scipy.py +++ b/espei/optimizers/opt_scipy.py @@ -4,7 +4,6 @@ from scipy.optimize import minimize from espei.utils import unpack_piecewise from espei.error_functions.context import setup_context -from espei.error_functions import calculate_activity_error from .opt_base import OptimizerBase _log = logging.getLogger(__name__) diff --git a/espei/paramselect.py b/espei/paramselect.py index 27a85f3e..758fa98c 100644 --- a/espei/paramselect.py +++ b/espei/paramselect.py @@ -108,7 +108,7 @@ def _build_feature_matrix(sample_condition_dicts: List[Dict[Symbol, float]], sym M = len(sample_condition_dicts) N = len(symbolic_coefficients) feature_matrix = np.empty((M, N)) - coeffs = ImmutableDenseMatrix(symbolic_coefficients) + coeffs = ImmutableDenseMatrix([symbolic_coefficients]) # see https://github.com/symengine/symengine.py/issues/485 for i in range(M): # Profiling-guided optimization # At the time, we got a 3x performance speedup compared to calling subs diff --git a/espei/plot.py b/espei/plot.py index ca329ba8..1fa99b0c 100644 --- a/espei/plot.py +++ b/espei/plot.py @@ -10,7 +10,7 @@ import tinydb from symengine import Symbol from pycalphad import Model, calculate, equilibrium, variables as v -from pycalphad.core.utils import unpack_components +from pycalphad.core.utils import unpack_species from pycalphad.plot.utils import phase_legend from pycalphad.plot.eqplot import eqplot, _map_coord_to_variable, unpack_condition @@ -504,7 +504,6 @@ def _get_interaction_predicted_values(dbf, comps, phase_name, configuration, out grid = np.linspace(0, 1, num=100) point_matrix = grid[None].T * second_endpoint + (1 - grid)[None].T * first_endpoint # TODO: Real temperature support - point_matrix = point_matrix[None, None] predicted_values = calculate( dbf, comps, [phase_name], output=output, T=298.15, P=101325, points=point_matrix, model=mod)[output].values.flatten() @@ -576,7 +575,7 @@ def plot_interaction(dbf, comps, phase_name, configuration, output, datasets=Non else: desired_data = [] - species = unpack_components(dbf, comps) + species = unpack_species(dbf, comps) # phase constituents are Species objects, so we need to be doing intersections with those phase_constituents = dbf.phases[phase_name].constituents # phase constituents must be filtered to only active @@ -627,7 +626,6 @@ def plot_interaction(dbf, comps, phase_name, configuration, output, datasets=Non points[point_idx] = list(OrderedDict(sorted(points[point_idx].items(), key=str)).values()) points = np.array(points, dtype=np.float64) # TODO: Real temperature support - points = points[None, None] stability = calculate(dbf, comps, [phase_name], output=data['output'][:-5], T=temps, P=pressures, points=points, model=mod_srf) @@ -715,9 +713,9 @@ def plot_endmember(dbf, comps, phase_name, configuration, output, datasets=None, else: mod = Model(dbf, comps, phase_name) prop = output - endmember = _translate_endmember_to_array(endpoints[0], mod.ast.atoms(v.SiteFraction))[None, None] + endmember = _translate_endmember_to_array(endpoints[0], mod.ast.atoms(v.SiteFraction)) # Set up the domain of the calculation - species = unpack_components(dbf, comps) + species = unpack_species(dbf, comps) # phase constituents are Species objects, so we need to be doing intersections with those phase_constituents = dbf.phases[phase_name].constituents # phase constituents must be filtered to only active @@ -784,7 +782,7 @@ def _compare_data_to_parameters(dbf, comps, phase_name, desired_data, mod, confi matplotlib.Axes """ - species = unpack_components(dbf, comps) + species = unpack_species(dbf, comps) # phase constituents are Species objects, so we need to be doing intersections with those phase_constituents = dbf.phases[phase_name].constituents # phase constituents must be filtered to only active: diff --git a/espei/shadow_functions.py b/espei/shadow_functions.py index 75179d9e..14ea739a 100644 --- a/espei/shadow_functions.py +++ b/espei/shadow_functions.py @@ -3,67 +3,24 @@ pycalphad functions for very fast performance. """ -from collections import OrderedDict from typing import Sequence, Dict, Optional from numpy.typing import ArrayLike import numpy as np from pycalphad import Model, variables as v -from pycalphad.core.phase_rec import PhaseRecord -from pycalphad.core.composition_set import CompositionSet -from pycalphad.core.starting_point import starting_point -from pycalphad.core.eqsolver import _solve_eq_at_conditions -from pycalphad.core.equilibrium import _adjust_conditions -from pycalphad.core.utils import get_state_variables, unpack_kwarg, point_sample +from pycalphad.codegen.phase_record_factory import PhaseRecordFactory +from pycalphad.core.utils import unpack_kwarg, point_sample from pycalphad.core.light_dataset import LightDataset from pycalphad.core.calculate import _sample_phase_constitution, _compute_phase_values -from pycalphad.core.solver import Solver -def update_phase_record_parameters(phase_records: Dict[str, PhaseRecord], parameters: ArrayLike) -> None: +def update_phase_record_parameters(phase_record_factory: PhaseRecordFactory, parameters: ArrayLike) -> None: if parameters.size > 0: - for phase_name, phase_record in phase_records.items(): - # very important that these are floats, otherwise parameters can end up - # with garbage data. `np.asarray` does not create a copy if the type is - # correct - phase_record.parameters[:] = np.asarray(parameters, dtype=np.float64) - -def _single_phase_start_point(conditions, state_variables, phase_records, grid): - """Return a single CompositionSet object to use in a point calculation - - Assumes the grid has includes only candidate phases. The starting point will be - generated by taking the minimum energy, regardless of whether mass balance is - satisfied. - - Parameters - ---------- - conditions : Dict[v.StateVariable, ArrayLike] - Conditions in this region. Assumes state variable conditions have size == 1. - state_variables : List[v.StateVariable] - Active state variables in the calculation - phase_records : Dict[str, PhaseRecord] - Phase records, can have more than just the phase of interest - grid : LightDataset - Sampled grid of points - - """ - # assumes state variables in the conditions have size == 1 - idx_min = grid.GM.argmin() - # Assumes ordering of dimensions is [state variables, points, data_variable...] - # Get phase record - phase_name = str(grid.Phase[..., idx_min].squeeze()) - prx = phase_records[phase_name] - Y = grid.Y[..., idx_min, :].squeeze()[:prx.phase_dof] - # Get current state variables - # TODO: can we assume sorting - state_vars = np.array([conditions[sv][0] for sv in sorted(state_variables, key=str)]) - compset = CompositionSet(prx) - compset.update(Y, 1.0, state_vars) - return compset + phase_record_factory.param_values[:] = np.asarray(parameters, dtype=np.float64) def calculate_(species: Sequence[v.Species], phases: Sequence[str], str_statevar_dict: Dict[str, np.ndarray], models: Dict[str, Model], - phase_records: Dict[str, PhaseRecord], output: Optional[str] = 'GM', + phase_record_factory: PhaseRecordFactory, output: Optional[str] = 'GM', points: Optional[Dict[str, np.ndarray]] = None, pdens: Optional[int] = 50, broadcast: Optional[bool] = True, fake_points: Optional[bool] = False, @@ -74,27 +31,28 @@ def calculate_(species: Sequence[v.Species], phases: Sequence[str], points_dict = unpack_kwarg(points, default_arg=None) pdens_dict = unpack_kwarg(pdens, default_arg=50) nonvacant_components = [x for x in sorted(species) if x.number_of_atoms > 0] - maximum_internal_dof = max(prx.phase_dof for prx in phase_records.values()) + cur_phase_local_conditions = {} # XXX: Temporary hack to allow compatibility + str_phase_local_conditions = {} # XXX: Temporary hack to allow compatibility + maximum_internal_dof = max(prx.phase_dof for prx in phase_record_factory.values()) all_phase_data = [] for phase_name in sorted(phases): mod = models[phase_name] - phase_record = phase_records[phase_name] + phase_record = phase_record_factory[phase_name] points = points_dict[phase_name] if points is None: - points = _sample_phase_constitution(mod, point_sample, True, pdens_dict[phase_name]) + points = _sample_phase_constitution(mod, point_sample, True, pdens_dict[phase_name], cur_phase_local_conditions) points = np.atleast_2d(points) - fp = fake_points and (phase_name == sorted(phases)[0]) - phase_ds = _compute_phase_values(nonvacant_components, str_statevar_dict, + phase_ds = _compute_phase_values(nonvacant_components, str_statevar_dict, str_phase_local_conditions, points, phase_record, output, maximum_internal_dof, broadcast=broadcast, largest_energy=float(1e10), fake_points=fp, parameters={}) all_phase_data.append(phase_ds) - # assumes phase_records all have the same nonvacant pure elements, + # assumes phase_record_factory all have the same nonvacant pure elements, # even if those elements are not present in this phase record - fp_offset = len(tuple(phase_records.values())[0].nonvacant_elements) if fake_points else 0 + fp_offset = len(tuple(phase_record_factory.values())[0].nonvacant_elements) if fake_points else 0 running_total = [fp_offset] + list(np.cumsum([phase_ds['X'].shape[-2] for phase_ds in all_phase_data])) islice_by_phase = {phase_name: slice(running_total[phase_idx], running_total[phase_idx+1], None) for phase_idx, phase_name in enumerate(sorted(phases))} @@ -117,51 +75,3 @@ def calculate_(species: Sequence[v.Species], phases: Sequence[str], final_ds = all_phase_data[0] final_ds.attrs['phase_indices'] = islice_by_phase return final_ds - - -def constrained_equilibrium(phase_records: Dict[str, PhaseRecord], - conditions: Dict[v.StateVariable, np.ndarray], grid: LightDataset): - """Perform an equilibrium calculation with just a single composition set that is constrained to the global composition condition""" - statevars = get_state_variables(conds=conditions) - conditions = _adjust_conditions(conditions) - # Assume that all conditions keys are lists with exactly one element (point calculation) - str_conds = OrderedDict([(str(ky), conditions[ky][0]) for ky in sorted(conditions.keys(), key=str)]) - compset = _single_phase_start_point(conditions, statevars, phase_records, grid) - solution_compsets = [compset] - solver = Solver() - # modifies `solution_compsets` and `compset` in place - solver_result = solver.solve(solution_compsets, str_conds) - energy = compset.NP * compset.energy - return solver_result.converged, energy - -def equilibrium_(phase_records: Dict[str, PhaseRecord], - conditions: Dict[v.StateVariable, np.ndarray], grid: LightDataset - ) -> LightDataset: - """ - Perform a fast equilibrium calculation with virtually no overhead. - """ - statevars = sorted(get_state_variables(conds=conditions), key=str) - conditions = _adjust_conditions(conditions) - str_conds = OrderedDict([(str(ky), conditions[ky]) for ky in sorted(conditions.keys(), key=str)]) - start_point = starting_point(conditions, statevars, phase_records, grid) - return _solve_eq_at_conditions(start_point, phase_records, grid, str_conds, statevars, False) - - -def no_op_equilibrium_(phase_records: Dict[str, PhaseRecord], - conditions: Dict[v.StateVariable, np.ndarray], - grid: LightDataset, - ) -> LightDataset: - """ - Perform a fast "equilibrium" calculation with virtually no overhead that - doesn't refine the solution or do global minimization, but just returns - the starting point. - - Notes - ----- - Uses a placeholder first argument for the same signature as - ``_equilibrium``, but ``species`` are not needed. - - """ - statevars = get_state_variables(conds=conditions) - conditions = _adjust_conditions(conditions) - return starting_point(conditions, statevars, phase_records, grid) diff --git a/setup.py b/setup.py index c4a31e48..123df475 100644 --- a/setup.py +++ b/setup.py @@ -61,7 +61,6 @@ def readme(fname): 'License :: OSI Approved :: MIT License', 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.8', 'Programming Language :: Python :: 3.9', 'Programming Language :: Python :: 3.10', 'Programming Language :: Python :: 3.11', diff --git a/tests/test_error_functions.py b/tests/test_error_functions.py index 6fa2bc78..7af9e5c4 100644 --- a/tests/test_error_functions.py +++ b/tests/test_error_functions.py @@ -12,7 +12,7 @@ import scipy.stats from tinydb import where -from pycalphad import Database, Model, variables as v +from pycalphad import Database from espei.paramselect import generate_parameters from espei.error_functions import * @@ -78,11 +78,11 @@ def test_get_thermochemical_data_filters_invalid_sublattice_configurations(datas dbf = Database(CU_MG_TDB) comps = ["CU", "MG", "VA"] phases = ["CUMG2"] - thermochemical_data = get_thermochemical_data(dbf, comps, phases, datasets_db) + thermochemical_data = get_thermochemical_data(dbf, comps, phases, datasets_db, symbols_to_fit=[]) print('thermochemical data:', thermochemical_data) assert thermochemical_data[0]["calculate_dict"]["values"].shape == (2,) - error = calculate_non_equilibrium_thermochemical_probability(thermochemical_data) + error = calculate_non_equilibrium_thermochemical_probability(thermochemical_data, dbf) assert np.isclose(error, -14.28729) @@ -100,6 +100,19 @@ def test_fixed_configuration_residual_function(datasets_db): assert np.isclose(likelihood, -14.28729, rtol=1e-6) +def test_fixed_configuration_residual_with_internal_degrees_of_freedom(datasets_db): + """Unstable endmembers in phases that have internal degrees of freedom should retain fixed internal DOF""" + dbf = Database(CU_MG_TDB) + datasets_db.insert(CU_MG_HM_FORM_LAVES_C15_ANTISITE) + + residual_func = FixedConfigurationPropertyResidual(dbf, datasets_db, phase_models=None, symbols_to_fit=[]) + + # Expect that the database has the same energy as the dataset (the former was fit to the latter) + residuals, weights = residual_func.get_residuals(np.asarray([])) + assert len(residuals) == len(weights) + assert np.allclose(residuals, [0.0]) + + def test_get_thermochemical_data_filters_configurations_when_all_configurations_are_invalid(datasets_db): datasets_db.insert(CU_MG_HM_MIX_CUMG2_ALL_INVALID) # No valid configurations @@ -110,7 +123,7 @@ def test_get_thermochemical_data_filters_configurations_when_all_configurations_ print('thermochemical data:', thermochemical_data) assert thermochemical_data[0]["calculate_dict"]["values"].shape == (0,) - error = calculate_non_equilibrium_thermochemical_probability(thermochemical_data) + error = calculate_non_equilibrium_thermochemical_probability(thermochemical_data, dbf) assert np.isclose(error, 0) @@ -121,8 +134,8 @@ def test_non_equilibrium_thermochemical_error_with_multiple_X_points(datasets_db dbf = Database(CU_MG_TDB) phases = list(dbf.phases.keys()) comps = ['CU', 'MG', 'VA'] - thermochemical_data = get_thermochemical_data(dbf, comps, phases, datasets_db) - error = calculate_non_equilibrium_thermochemical_probability(thermochemical_data) + thermochemical_data = get_thermochemical_data(dbf, comps, phases, datasets_db, symbols_to_fit=[]) + error = calculate_non_equilibrium_thermochemical_probability(thermochemical_data, dbf) assert np.isclose(error, -4061.119001241541, rtol=1e-6) @@ -134,8 +147,8 @@ def test_non_equilibrium_thermochemical_error_with_multiple_T_points(datasets_db dbf = Database(CU_MG_TDB) phases = list(dbf.phases.keys()) comps = ['CU', 'MG', 'VA'] - thermochemical_data = get_thermochemical_data(dbf, comps, phases, datasets_db) - error = calculate_non_equilibrium_thermochemical_probability(thermochemical_data) + thermochemical_data = get_thermochemical_data(dbf, comps, phases, datasets_db, symbols_to_fit=[]) + error = calculate_non_equilibrium_thermochemical_probability(thermochemical_data, dbf) assert np.isclose(error,-14.287293263253728, rtol=1e-6) @@ -147,7 +160,10 @@ def test_non_equilibrium_thermochemical_error_with_multiple_T_X_points(datasets_ phases = list(dbf.phases.keys()) comps = ['CU', 'MG', 'VA'] thermochemical_data = get_thermochemical_data(dbf, comps, phases, datasets_db) - error = calculate_non_equilibrium_thermochemical_probability(thermochemical_data) + symbols_to_fit = database_symbols_to_fit(dbf) + initial_guess = np.array([unpack_piecewise(dbf.symbols[s]) for s in symbols_to_fit]) + error = calculate_non_equilibrium_thermochemical_probability(thermochemical_data, dbf, parameters=dict(zip(symbols_to_fit, initial_guess))) + assert np.isclose(float(error), -3282497.2380024833, rtol=1e-6) def test_non_equilibrium_thermochemical_error_for_mixing_entropy_error_is_excess_only(datasets_db): @@ -198,7 +214,7 @@ def test_non_equilibrium_thermochemical_error_for_mixing_entropy_error_is_excess zero_error_prob = scipy.stats.norm(loc=0, scale=0.2).logpdf(0.0) # SM weight = 0.2 # Explicitly pass parameters={} to not try fitting anything thermochemical_data = get_thermochemical_data(dbf, comps, phases, datasets_db, symbols_to_fit=[]) - error = calculate_non_equilibrium_thermochemical_probability(thermochemical_data) + error = calculate_non_equilibrium_thermochemical_probability(thermochemical_data, dbf) assert np.isclose(error, zero_error_prob, atol=1e-6) @@ -249,7 +265,7 @@ def test_non_equilibrium_thermochemical_error_for_of_enthalpy_mixing(datasets_db zero_error_prob = scipy.stats.norm(loc=0, scale=500.0).logpdf(0.0) # HM weight = 500 # Explicitly pass parameters={} to not try fitting anything thermochemical_data = get_thermochemical_data(dbf, comps, phases, datasets_db, symbols_to_fit=[]) - error = calculate_non_equilibrium_thermochemical_probability(thermochemical_data) + error = calculate_non_equilibrium_thermochemical_probability(thermochemical_data, dbf) assert np.isclose(error, zero_error_prob, atol=1e-6) @@ -264,16 +280,16 @@ def test_subsystem_non_equilibrium_thermochemcial_probability(datasets_db): # Truth thermochemical_data = get_thermochemical_data(dbf_bin, ['CR', 'NI', 'VA'], phases, datasets_db) - bin_prob = calculate_non_equilibrium_thermochemical_probability(thermochemical_data) + bin_prob = calculate_non_equilibrium_thermochemical_probability(thermochemical_data, dbf_bin) # Getting binary subsystem data explictly (from binary input) thermochemical_data = get_thermochemical_data(dbf_tern, ['CR', 'NI', 'VA'], phases, datasets_db) - prob = calculate_non_equilibrium_thermochemical_probability(thermochemical_data) + prob = calculate_non_equilibrium_thermochemical_probability(thermochemical_data, dbf_tern) assert np.isclose(prob, bin_prob) # Getting binary subsystem from ternary input thermochemical_data = get_thermochemical_data(dbf_tern, ['CR', 'FE', 'NI', 'VA'], phases, datasets_db) - prob = calculate_non_equilibrium_thermochemical_probability(thermochemical_data) + prob = calculate_non_equilibrium_thermochemical_probability(thermochemical_data, dbf_tern) assert np.isclose(prob, bin_prob) @@ -351,11 +367,8 @@ def test_zpf_error_species(datasets_db): zero_error_probability = 2 * scipy.stats.norm(loc=0, scale=1000.0).logpdf(0.0) zpf_data = get_zpf_data(dbf, comps, phases, datasets_db, {}) - exact_likelihood = calculate_zpf_error(zpf_data, approximate_equilibrium=False) - assert np.isclose(exact_likelihood, zero_error_probability) - approx_likelihood = calculate_zpf_error(zpf_data, approximate_equilibrium=True) - # accept higher tolerance for approximate - assert np.isclose(approx_likelihood, zero_error_probability, rtol=1e-4) + likelihood = calculate_zpf_error(zpf_data) + assert np.isclose(likelihood, zero_error_probability) def test_zpf_error_equilibrium_failure(datasets_db): @@ -371,10 +384,8 @@ def test_zpf_error_equilibrium_failure(datasets_db): zpf_data = get_zpf_data(dbf, comps, phases, datasets_db, {}) with mock.patch('espei.error_functions.zpf_error.estimate_hyperplane', return_value=np.array([np.nan, np.nan])): - exact_likelihood = calculate_zpf_error(zpf_data) - assert np.isclose(exact_likelihood, zero_error_probability, rtol=1e-6) - approx_likelihood = calculate_zpf_error(zpf_data) - assert np.isclose(approx_likelihood, zero_error_probability, rtol=1e-6) + likelihood = calculate_zpf_error(zpf_data) + assert np.isclose(likelihood, zero_error_probability, rtol=1e-6) def test_zpf_error_works_for_stoichiometric_cmpd_tielines(datasets_db): @@ -389,10 +400,8 @@ def test_zpf_error_works_for_stoichiometric_cmpd_tielines(datasets_db): zero_error_probability = 2 * scipy.stats.norm(loc=0, scale=1000.0).logpdf(0.0) zpf_data = get_zpf_data(dbf, comps, phases, datasets_db, {}) - exact_likelihood = calculate_zpf_error(zpf_data) - assert np.isclose(exact_likelihood, zero_error_probability, rtol=1e-6) - approx_likelihood = calculate_zpf_error(zpf_data) - assert np.isclose(approx_likelihood, zero_error_probability, rtol=1e-6) + likelihood = calculate_zpf_error(zpf_data) + assert np.isclose(likelihood, zero_error_probability, rtol=1e-6) def test_non_equilibrium_thermochemcial_species(datasets_db): @@ -404,7 +413,7 @@ def test_non_equilibrium_thermochemcial_species(datasets_db): phases = ['LIQUID'] thermochemical_data = get_thermochemical_data(dbf, ['LI', 'SN'], phases, datasets_db) - prob = calculate_non_equilibrium_thermochemical_probability(thermochemical_data) + prob = calculate_non_equilibrium_thermochemical_probability(thermochemical_data, dbf) # Near zero error and non-zero error assert np.isclose(prob, (-7.13354663 + -22.43585011)) @@ -420,13 +429,8 @@ def test_equilibrium_thermochemcial_error_species(datasets_db): eqdata = get_equilibrium_thermochemical_data(dbf, ['LI', 'SN'], phases, datasets_db) # Thermo-Calc truth_values = np.array([0.0, -28133.588, -40049.995, 0.0]) - # Approximate - errors_approximate, weights = calc_prop_differences(eqdata[0], np.array([]), True) - # Looser tolerances because the equilibrium is approximate, note that this is pdens dependent - assert np.all(np.isclose(errors_approximate, truth_values, atol=1e-5, rtol=1e-3)) - # Exact - errors_exact, weights = calc_prop_differences(eqdata[0], np.array([]), False) - assert np.all(np.isclose(errors_exact, truth_values, atol=1e-5)) + residuals, weights = calc_prop_differences(eqdata[0], np.array([])) + assert np.all(np.isclose(residuals, truth_values, atol=3e-5)) def test_equilibrium_thermochemical_error_unsupported_property(datasets_db): @@ -439,8 +443,8 @@ def test_equilibrium_thermochemical_error_unsupported_property(datasets_db): phases = list(dbf.phases.keys()) eqdata = get_equilibrium_thermochemical_data(dbf, ['CR', 'NI'], phases, datasets_db) - errors_exact, weights = calc_prop_differences(eqdata[0], np.array([])) - assert np.all(np.isclose(errors_exact, EXPECTED_VALUES, atol=1e-3)) + residuals, weights = calc_prop_differences(eqdata[0], np.array([])) + assert np.all(np.isclose(residuals, EXPECTED_VALUES, atol=1e-3)) def test_equilibrium_property_residual_function(datasets_db): @@ -483,7 +487,7 @@ def test_equilibrium_thermochemical_error_computes_correct_probability(datasets_ def test_driving_force_miscibility_gap(datasets_db): datasets_db.insert(A_B_DATASET_ALPHA) dbf = Database(A_B_REGULAR_SOLUTION_TDB) - parameters = {"L_ALPHA": None} + parameters = {"L_ALPHA": 0} zpf_data = get_zpf_data(dbf, ["A", "B"], ["ALPHA"], datasets_db, parameters) # probability for zero error error with ZPF weight = 1000.0 @@ -491,25 +495,19 @@ def test_driving_force_miscibility_gap(datasets_db): # Ideal solution case params = np.array([0.0]) - prob = calculate_zpf_error(zpf_data, parameters=params, approximate_equilibrium=False) - assert np.isclose(prob, zero_error_prob) - prob = calculate_zpf_error(zpf_data, parameters=params, approximate_equilibrium=True) + prob = calculate_zpf_error(zpf_data, parameters=params) assert np.isclose(prob, zero_error_prob) # Negative interaction case params = np.array([-10000.0]) - prob = calculate_zpf_error(zpf_data, parameters=params, approximate_equilibrium=False) - assert np.isclose(prob, zero_error_prob) - prob = calculate_zpf_error(zpf_data, parameters=params, approximate_equilibrium=True) + prob = calculate_zpf_error(zpf_data, parameters=params) assert np.isclose(prob, zero_error_prob) # Miscibility gap case params = np.array([10000.0]) - prob = calculate_zpf_error(zpf_data, parameters=params, approximate_equilibrium=False) + prob = calculate_zpf_error(zpf_data, parameters=params) # Remember these are log probabilities, so more negative means smaller probability and larger error assert prob < zero_error_prob - prob = calculate_zpf_error(zpf_data, parameters=params, approximate_equilibrium=True) - assert prob < zero_error_prob def test_setting_up_context_with_custom_models(datasets_db): diff --git a/tests/test_parameter_generation.py b/tests/test_parameter_generation.py index 224dcf32..3bf803fe 100644 --- a/tests/test_parameter_generation.py +++ b/tests/test_parameter_generation.py @@ -143,7 +143,7 @@ def test_mixing_energies_are_fit(datasets_db): zero_error_prob = scipy.stats.norm(loc=0, scale=500.0).logpdf(0.0) # HM weight = 500 # Explicitly pass parameters={} to not try fitting anything thermochemical_data = get_thermochemical_data(dbf, sorted(read_dbf.elements), list(read_dbf.phases.keys()), datasets_db, symbols_to_fit=[]) - error = calculate_non_equilibrium_thermochemical_probability(thermochemical_data) + error = calculate_non_equilibrium_thermochemical_probability(thermochemical_data, dbf) assert np.isclose(error, zero_error_prob, atol=1e-6) def test_duplicate_parameters_are_not_added_with_input_database(datasets_db): diff --git a/tests/test_plotting.py b/tests/test_plotting.py index db57262d..33f64479 100644 --- a/tests/test_plotting.py +++ b/tests/test_plotting.py @@ -4,7 +4,8 @@ Mostly integration tests that don't validate the plots themselves, but rather that the internal usage and matplotlib usage is correct. """ - +import matplotlib +matplotlib.use('Agg') import matplotlib.pyplot as plt from pycalphad import Database, variables as v diff --git a/tests/testing_data.py b/tests/testing_data.py index 657e7bf1..a58365ea 100644 --- a/tests/testing_data.py +++ b/tests/testing_data.py @@ -33,9 +33,9 @@ $ Generated by brandon (pycalphad 0.5.2.post1) $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ -ELEMENT CU BLANK 0 0 0 ! -ELEMENT MG BLANK 0 0 0 ! -ELEMENT VA BLANK 0 0 0 ! +ELEMENT CU FCC_A1 0 0 0 ! +ELEMENT MG HCP_A3 0 0 0 ! +ELEMENT VA VACUUM 0 0 0 ! FUNCTION GFCCCU 298.15 GHSERCU#; 3200.0 N ! FUNCTION GFCCMG 298.15 -0.9*T + GHSERMG# + 2600; 3000.0 N ! @@ -342,6 +342,28 @@ } +CU_MG_HM_FORM_LAVES_C15_ANTISITE = yaml.load("""{ + "components": ["CU", "MG", "VA"], + "phases": ["LAVES_C15"], + "solver": { + "sublattice_site_ratios": [2, 1], + "sublattice_configurations": [["MG", "CU"]], + "mode": "manual" + }, + "conditions": { + "P": 101325, + "T": [298.15], + }, + + "output": "HM_FORM", + "values": [[[34720.0]]], + "reference": "FAKE DATA", + "comment": "Cu2:Mg is the stable endmember, Mg2:Cu is the antisite endmember that should have high energy." +} +""", Loader=YAML_LOADER) + + + CU_MG_HM_MIX_CUMG2_ANTISITE = yaml.load("""{ "components": ["CU", "MG", "VA"], "phases": ["CUMG2"],