Source code for chemtools.basisopt

# -*- coding: utf-8 -*-

#The MIT License (MIT)
#
#Copyright (c) 2014 Lukasz Mentel
#
#Permission is hereby granted, free of charge, to any person obtaining a copy
#of this software and associated documentation files (the "Software"), to deal
#in the Software without restriction, including without limitation the rights
#to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
#copies of the Software, and to permit persons to whom the Software is
#furnished to do so, subject to the following conditions:
#
#The above copyright notice and this permission notice shall be included in all
#copies or substantial portions of the Software.
#
#THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
#IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
#FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
#AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
#LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
#OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
#SOFTWARE.

"""\
``basisopt`` is a module containing flexible methods for optimizing primitive
exponents of basis sets"""

from __future__ import division, print_function

import atexit
import datetime
import os
import pprint
import random
import string
import sys
import time
from collections import OrderedDict
import numpy as np
from scipy.optimize import minimize

from chemtools.basisparse import ORBITALS
from chemtools.basisset import BasisSet, get_num_params, sliceinto


[docs]class BSOptimizer(object): ''' Basis Set Optimizer class is a convenient wrapper for optimizing primitive exponents of Gaussian basis sets using different code interfaces for performing the actual electronic structure calculations. Args: objective : str or callable Name of the objective using which the optimization will be evaluated, if it is a callable/function it should take the output name as argument code : :py:class:`Calculator <chemtools.calculators.calculator.Calculator>` Subclass of the :py:class:`Calculator <chemtools.calculators.calculator.Calculator>` specifying the electronic structure program to use mol : :py:class:` Molecule <chemtools.molecule.Molecule>` Molecule specifying the system staticbs : dict or :py:class:`BasisSet <chemtools.basisset.BasisSet>` Basis set or ``dict`` of basis set with basis sets whose exponents are not going to be optimized fsopt : dict A dictionary specifying the basis set to be optimized, the keys should be element/atom symbol and the values should contain lists of 4-tuples composed of: shell, type, number of functions and parameters/exponents, for example, >>> fs = {'H' : [('s', 'et', 10, (0.5, 2.0))]} describes 10 ``s``-type exponents for H generated from even tempered formula with parameters 0.5 and 2.0 optalg : dict A dictionary specifying the optimization algorithm and its options fname : str Name of the job/input file for the single point calculator uselogs : bool Use natural logarithms of exponents in optimization rather than their values, this option is only relevant if functions asre given as ``exp`` or ``exponents`` regexp : str Regular expression to use in search for the objective if ``objective`` is regexp runcore : bool Flag to mark wheather to run separate single point jobs with different numbers of frozen core orbitals to calculate core energy penalize : bool Flag enabling the use of penalty function, when two exponent in any shell are too close the objective is multiplied by a penalty factor calculated in :py:func:`get_penalty <chemtools.basisopt.get_penalty>` penaltykwargs : dict Keyword arguments for the penalty function, default ``{'alpha' : 25.0, 'smallestonly' : True}`` ''' def __init__(self, objective=None, core=None, template=None, regexp=None, verbose=False, code=None, optalg=None, mol=None, fsopt=None, staticbs=None, fname=None, uselogs=True, runcore=False, penalize=None, penaltykwargs=None, logfile=None): self.fsopt = fsopt self.staticbs = staticbs self.objective = objective self.regexp = regexp self.template = template self.verbose = verbose self.core = core self.code = code self.optalg = optalg self.mol = mol self.result = None self.fname = fname self.uselogs = uselogs self.penalize = penalize self.penaltykwargs = penaltykwargs self.logfile = logfile if runcore: self.function = run_core_energy else: self.function = run_total_energy @property def fname(self): return self._fname @fname.setter def fname(self, value): ''' Set the name of the job/input file for the code used for single point calculations. ''' if value is None: self._fname = 'bso_' + ''.join(random.choice(string.ascii_letters) for _ in range(10)) + '.inp' else: self._fname = value @property def optalg(self): return self._optalg @optalg.setter def optalg(self, value): ''' set default optimization options if opts not given ''' # defaults tol = 1.0e-4 jac = False mxi = 100 if value is None: self._optalg = {"method": "Nelder-Mead", "tol": tol, "jacob": None, "options": {"maxiter": 100, "disp": True, } } elif isinstance(value, dict): if value.get('method').lower() == "nelder-mead": value['jacob'] = None else: value['jacob'] = jac if 'tol' not in value.keys(): value['tol'] = tol if 'maxiter' not in value['options'].keys(): value['options']['maxiter'] = mxi self._optalg = value else: raise ValueError("optalg should be a <dict>, got: {}".format(type(value))) @property def fsopt(self): return self._fsopt @fsopt.setter def fsopt(self, value): seqs = ["et", "even", "eventemp", "even tempered", "wt", "well", "welltemp", "well tempered", "le", "legendre", "exp", "exponents"] if value is None: raise ValueError("<fsopt> cannot be None") else: if isinstance(value, dict): for symbol, funlist in value.items(): for funtuple in funlist: if funtuple[0].lower() not in ORBITALS: msg = '<{}>: <shell> '.format(symbol) +\ ' should be one of {}'.format(', '.join(ORBITALS)) +\ ', got: {}'.format(funtuple[0]) raise ValueError(msg) if funtuple[1] not in seqs: msg = '<{}>: <seq> '.format(symbol) +\ ' should be one of {}'.format(', '.join(seqs)) +\ ', got: {}'.format(funtuple[1]) raise ValueError(msg) else: ValueError('<fsopt> should be a dict, got: {}'.format(type(value))) self._fsopt = OrderedDict(sorted(value.items(), key=lambda t: t[0])) @property def penaltykwargs(self): return self._penaltykwargs @penaltykwargs.setter def penaltykwargs(self, value): if value is None: self._penaltykwargs = {'alpha': 25.0, 'smallestonly': True} else: self._penaltykwargs = value @property def logfile(self): return self._logfile @logfile.setter def logfile(self, value): ''' if a filename is specified open a text file for writing with line buffering ''' if value is None: self._logfile = None self.log = sys.stdout else: self._logfile = value self.log = open(value, 'wt', buffering=1) atexit.register(self.finalize) def finalize(self): if self.log is not sys.stdout: self.log.close()
[docs] def header(self): ''' Return the basic information about the execution environment and optimization settings. ''' script = sys.argv[0] path = os.path.abspath(os.path.dirname(script)) out = "\n".join(["Script name : {0}".format(script), "Workdir : {0}".format(path), "Start time : {0}".format(datetime.datetime.today()), ]) return out
[docs] def jobinfo(self): '''Return the information on the optimization objects and parameters''' # for name, obj in [("code", code), ("job", job), ("mol", mol), # ("bsnoopt", bsnoopt), ("bsopt", bsopt), ("opt", opt)]: # TODO add printing of static basis set attrs = ['code', 'mol', 'optalg'] out = '' for attr in attrs: out += attr.upper().center(80, "=") + '\n' obj = getattr(self, attr) if isinstance(obj, dict): out += pprint.pformat(obj) else: out += str(obj) return out
[docs] def get_x0(self): ''' Collect all the parameters in an array of consecutive elements. ''' x0 = np.empty(0) for fperatom in self.fsopt.values(): for fs in fperatom: if fs[1].lower() in ['exp', 'exponents'] and self.uselogs: x0 = np.concatenate((x0, np.log(fs[-1])), axis=0) else: x0 = np.concatenate((x0, np.array(fs[-1])), axis=0) return x0
[docs] def run(self): ''' Start the basis set optimization Returns: res : OptimizeResult An instance of the ``scipy.optimize.OptimizeResult`` class ''' self.log.write(self.header()) self.log.write("\n" + "\n".join(["=" * 80, "STARTING OPTIMIZATION".center(80, "="), "=" * 80]) + "\n\n") self.log.write(self.jobinfo()) self.log.flush() starttime = time.time() x0 = self.get_x0() self.result = minimize(self.function, x0, args=(self,), method=self.optalg["method"], jac=self.optalg['jacob'], tol=self.optalg["tol"], options=self.optalg["options"]) self.log.write(str(self.result)) self.log.write("Elapsed time : {0:>20.3f} sec".format(time.time() - starttime))
[docs] def get_basis(self, name=None, element=None): ''' Construct the BasisSet object from the result of the optimized exponents and function definition. Args: name : str Name to be assigned to the basis set element : str Element symbol for the basis set Returns: basis : chemtools.basisset.BasisSet :py:class:`BasisSet <chemtools.basisset.BasisSet>` object with the optimized functions ''' bsdict = {} # get number of parameters per atom npars = [sum(get_num_params(t) for t in funs) for funs in self.fsopt.values()] x0peratom = sliceinto(self.result.x, npars) for (atom, funs), xpars in zip(self.fsopt.items(), x0peratom): bsdict[atom] = BasisSet.from_optpars(xpars, funs, name=name, element=element, explogs=self.uselogs) if self.staticbs is not None: if isinstance(self.staticbs, dict): common_atoms = set(self.staticbs.keys()) & set(bsdict.keys()) if common_atoms: for atom in common_atoms: bsdict[atom].append(self.staticbs[atom]) diff_atoms = set(self.staticbs.keys()) - set(bsdict.keys()) if diff_atoms: for atom in diff_atoms: bsdict[atom] = self.staticbs[atom] elif isinstance(self.staticbs, BasisSet): if self.staticbs.element in set(bsdict.keys()): bsdict[atom].append(self.staticbs) else: bsdict[self.staticbs.element] = self.staticbs return bsdict
[docs]def get_basis_dict(bso, x0): ''' Return a dictionary with :py:class:`BasisSet` objects as values and element symbols as keys. The dictionary is composed based on the current parameters ``x0`` and attributes of the :py:class:`BSOptimizer` including the ``staticbs`` ''' bsdict = dict() for atom, functs in bso.fsopt.items(): bsdict[atom] = BasisSet.from_optpars(x0, funs=functs, name='opt', element=atom, explogs=bso.uselogs) if bso.staticbs is not None: if isinstance(bso.staticbs, dict): common_atoms = set(bso.staticbs.keys()) & set(bsdict.keys()) if common_atoms: for atom in common_atoms: bsdict[atom].append(bso.staticbs[atom]) diff_atoms = set(bso.staticbs.keys()) - set(bsdict.keys()) if diff_atoms: for atom in diff_atoms: bsdict[atom] = bso.staticbs[atom] elif isinstance(bso.staticbs, BasisSet): if bso.staticbs.element in set(bsdict.keys()): bsdict[atom].append(bso.staticbs) else: bsdict[bso.staticbs.element] = bso.staticbs return bsdict
[docs]def get_penalty(bsdict, alpha=25.0, smallestonly=True): ''' For a given dict of basis sets calculate the penalty for pairs of exponents being too close together. Args: bsdict : dict Dictionary of :py:class:`BasisSet` objects alpha : float Parameter controlling the magnitude and range of the penalty smallestonly : bool A flag to mark whether to use only the smallest ratio to calculate the penalty or all smallest ratios from each shell and calculate the penalty as a product of individual penalties For each basis and shell within the basis ratios between pairs of sorted exponents are calculated. The minimal ratio (closest to 1.0) is taken to calculate the penalty according to the formula .. math:: P = 1 + \\exp(-\\alpha (r - 1)) where :math:`r` is the ratio between the two closest exponents (>1). ''' minratios = [] for basis in bsdict.values(): for functions in basis.functions.values(): exps = np.sort(functions['e']) ratios = exps[1:] / exps[:-1] minratios.append(np.min(ratios)) if smallestonly: return 1 + np.exp(-alpha * (np.min(np.array(minratios)) - 1)) else: return np.prod(1 + np.exp(-alpha * (np.array(minratios) - 1)))
[docs]def run_total_energy(x0, *args): ''' Funtion for running a single point calculation and parsing the resulting energy (or property) as specified by the objective function. Args: x0 : list or numpy.array contains a list of parameters to be optimized, may be explicit exponents or parametrized exponents in terms of some polynomial args : tuple of dicts bsopt, bsnoopt, code, job, mol, opt, needed for writing input and parsing output Returns: parsed result of the single point calculation as speficied by the objective function in the "job" dictionary ''' # unpack the args tuple for code readability bso = args[0] for atom, functs in bso.fsopt.items(): ni = 0 nt = 0 for shell, seq, nf, params in functs: nt += nf if seq not in ['le', 'legendre'] and not bso.uselogs: x0[ni:nt] = np.abs(x0[ni:nt]) ni += nf bsdict = get_basis_dict(bso, x0) # set the penalty value if bso.penalize: penalty = get_penalty(bsdict, **bso.penaltykwargs) else: penalty = 1.0 if bso.verbose: bso.log.write("Current exponents being optimized:\n") for atom, functs in bso.fsopt.items(): basis = BasisSet.from_optpars(x0, funs=functs, name='opt', element=atom, explogs=bso.uselogs) bso.log.write(atom + basis.print_functions()) bso.log.flush() bso.code.write_input(fname=bso.fname, template=bso.template, basis=bsdict, mol=bso.mol, core=bso.core) output = bso.code.run(bso.fname) if bso.code.accomplished(output): if callable(bso.objective): objective = bso.objective(output) else: objective = bso.code.parse(output, bso.objective, bso.regexp) if objective is None: raise ValueError("Unable to parse the objective, check output") if bso.verbose: bso.log.write("{0:<s}".format("Job Terminated without errors\n")) bso.log.write("x0 : " + ", ".join([str(x) for x in x0]) + "\n") bso.log.write("\n{0:<20s} : {1:>30s}\n".format("Output", output)) bso.log.write("{0:<20s} : {1:>30.10f}\n".format(str(bso.objective), objective)) bso.log.write("{0:<20s} : {1:>30.10f}\n".format("Objective", objective * penalty)) bso.log.write("=" * 80 + "\n") bso.log.flush() return objective * penalty else: raise ValueError("something went wrong, check output {0:s}".format(output))
[docs]def run_core_energy(x0, *args): ''' Funtion for running two single point calculations and parsing the resulting energy (or property) as specified by the objective function, primarily designed to extract core energy. Args: x0: list or numpy.array contains a list of parameters to be optimized, may be explicit exponents or parametrized exponents in terms of some polynomial args: tuple of dicts bsopt, bsnoopt, code, job, mol, opt, needed for writing input and parsing output Returns: parsed result of the single point calculation as speficied by the objective function in the "job" dictionary ''' # unpack the args tuple for code readability bso = args[0] for atom, functs in bso.fsopt.items(): ni = 0 nt = 0 for shell, seq, nf, params in functs: nt += nf if seq not in ['le', 'legendre'] and not bso.uselogs: x0[ni:nt] = np.abs(x0[ni:nt]) ni += nf bsdict = get_basis_dict(bso, x0) # set the penalty value if bso.penalize: penalty = get_penalty(bsdict, bso.penaltykwargs) else: penalty = 1.0 if bso.verbose: bso.log.write("Current exponents being optimized:\n") for atom, functs in bso.fsopt.items(): basis = BasisSet.from_optpars(x0, funs=functs, name='opt', element=atom, explogs=bso.uselogs) bso.log.write(atom + "\n" + basis.print_functions()) bso.log.flush() citote = [] stats = [] base = os.path.splitext(bso.fname)[0] inputs = [base + "_core" + str(sum(x)) + ".inp" for x in bso.core] for inpname, core in zip(inputs, bso.core): bso.code.write_input(fname=inpname, core=core, basis=bsdict, mol=bso.mol, template=bso.template) outputs = bso.code.run_multiple(inputs) for output in outputs: citote.append(bso.code.parse(output, bso.objective, bso.regexp)) stats.append(bso.code.accomplished(output)) if stats[0] and stats[1]: if bso.verbose: bso.log.write("x0 : " + ", ".join([str(x) for x in x0]) + "\n") bso.log.write("{0:<20s} : {1:>30s} {2:>30s}\n".format("Terminated OK", str(stats[0]), str(stats[1]))) bso.log.write("{0:<20s} : {1:>30.10f} {2:>30.10f}\n".format(str(bso.objective), citote[0], citote[1])) bso.log.write("-" * 84) coreenergy = citote[0] - citote[1] if coreenergy > 0.0: coreenergy = -1.0 * coreenergy if bso.verbose: bso.log.write("{0:<20s} : {1:>30.10f}\n".format("Core energy", coreenergy)) bso.log.write("{0:<20s} : {1:>30.10f}\n".format("Objective", coreenergy * penalty)) bso.log.write("=" * 84) bso.log.flush() return coreenergy * penalty else: raise ValueError("something went wrong, check outputs {0:s}".format(", ".join(outputs)))
[docs]def opt_shell_by_nf(shell=None, nfs=None, max_params=5, opt_tol=1.0e-4, save=False, bsopt=None, **kwargs): ''' For a given shell optimize the functions until the convergence criterion is reached the energy difference for two consecutive function numbers is less than the threshold Kwargs: shell : string string label for the shell to be optimized nfs : list of ints list of integers representing the number of basis functions to be inceremented in the optimization, max_params : int maximal number of parameters to be used in the legendre expansion, (length of the expansion) opt_tol : float threshold controlling the termination of the shell optimization, if energy difference between two basis sets with subsequent number of functionsis larger than this threshold, another function is added to this shell and parameters are reoptimized, save : bool a flag to trigger saving all the optimized basis functions for each shell, kwargs : dict options for the basis set optimization driver, see driver function from the basisopt module Returns: BasisSet object instance with optimized functions for the specified shell Raises: ValueError: when `shell` is different from `s`, `p`, `d`, `f`, `g`, `h`, `i` when number of parameters equals 1 and there are more functions when there are more parameters than functions to optimize ''' _shells = ['s', 'p', 'd', 'f', 'g', 'h', 'i'] if shell not in _shells: raise ValueError('shell must be one of the following: {}'.format(", ".join(_shells))) if len(bsopt['params'][0]) == 1 and min(nfs) != 1: raise ValueError("1 parameter and {0} functions doesn't make sense".format(min(nfs))) if min(nfs) < len(bsopt['params'][0]): raise ValueError("more parameters ({0}) than functions to optimize ({1})".format(len(bsopt['params'][0]), min(nfs))) bsopt["typ"] = "legendre" e_last = 0.0 x_last = bsopt['params'][0] for nf in nfs: bsopt['nfpshell'] = [0]*_shells.index(shell) + [nf] res = driver(bsopt=bsopt, **kwargs) print("Completed optimization for {0:d} {1}-type functions".format(nf, shell)) print("{s:<25s} : {v:>20.10f}".format(s="Current Function value", v=res.fun)) print("{s:<25s} : {v:>20.10f}".format(s="Previous Function value", v=e_last)) print("{s:<25s} : {v:>20.10f}".format(s="Difference", v=abs(res.fun-e_last))) if abs(res.fun - e_last) < opt_tol: print("Basis saturated with respect to threshold") bsopt['nfpshell'] = nfps_last if save: save_basis(x_last, bsopt) return BasisSet.from_optdict(x_last, bsopt) else: print("Threshold exceeded, continuing optimization.") x_last = res.x.tolist() nfps_last = bsopt['nfpshell'] if len(bsopt['params'][0]) < max_params: print("adding more parameters") restup = tuple(res.x) # this assumption should be revised, improved or justified # suggestion: maybe change to average of existing parameters restup += tuple([abs(min(restup)*0.25)]) bsopt['params'] = [restup,] else: print("not adding more parameters") bsopt['params'] = [tuple(res.x),] e_last = res.fun else: print("Supplied number of functions exhausted but the required accuracy was not reached") if save: save_basis(res.x.tolist(), bsopt) return BasisSet.from_optdict(res.x.tolist(), bsopt)
[docs]def opt_multishell(shells=None, nfps=None, guesses=None, max_params=5, opt_tol=1.0e-4, save=False, bsopt=None, **kwargs): ''' Optimize a basis set by saturating the function space shell by shell Kwargs: shells (list of strings): list of shells to be optimized, in the order the optimization should be performed, nfps (list of lists of integers): list specifying a set of function numbers to be scanned per each shell, guesses (list of lists of floats): list specifying a set of starting parameters per each shell, max_params (int) maximal number of parameters to be used in the legendre expansion, (length of the expansion) opt_tol (float): threshold controlling the termination of the shell optimization, if energy difference between two basis sets with subsequent number of functionsis larger than this threshold, another function is added to this shell and parameters are reoptimized kwargs: options for the basis set optimization driver, see driver function from the basisopt module ''' # bsnoopt needs to exists since it will be appended with optimized # functions after each shell is optimized if kwargs['bsnoopt'] is None: kwargs['bsnoopt'] = BasisSet.from_dict({"element" : "Be", "functions" : {}}) # begin the main loop over shells, nr f per shell and guesses for shell, nfs, guess in zip(shells, nfps, guesses): header = " Beginning optimization for {s:s} shell ".format(s=shell) print("="*100) print(format(header, '-^100')) print("="*100) bsopt['params'] = [tuple(guess)] # begin the optimization for a given shell and store the optimized # functions under optimized_shell optimized_shell = opt_shell_by_nf(shell, nfs, max_params=max_params, opt_tol=opt_tol, save=save, bsopt=bsopt, **kwargs) # add the optimized shell to the total basis set kwargs['bsnoopt'].add(optimized_shell) # save the final complete basis set basis_dict = vars(kwargs['bsnoopt']) with open("final.bas", 'wb') as ff: ff.write(str(basis_dict))