Source code for chemtools.calculators.gamessus

# -*- coding: utf-8 -*-

#The MIT License (MIT)
#
#Copyright (c) 2014 Lukasz Mentel
#
#Permission is hereby granted, free of charge, to any person obtaining a copy
#of this software and associated documentation files (the "Software"), to deal
#in the Software without restriction, including without limitation the rights
#to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
#copies of the Software, and to permit persons to whom the Software is
#furnished to do so, subject to the following conditions:
#
#The above copyright notice and this permission notice shall be included in all
#copies or substantial portions of the Software.
#
#THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
#IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
#FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
#AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
#LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
#OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
#SOFTWARE.

'''
Module for handling Gamess-US related jobs:
- Gamess          : running and submitting jobs, writing inputs,
- GamessInpParser : parsing the input file,
- GamessLogParser : parsing the output file,
- GamessDatParser : parsing data from the gamess PUNCH (.dat) file
'''

from __future__ import print_function

import argparse
import itertools
import math
import os
import re
import sys
from copy import copy
from subprocess import call
import numpy as np

from .calculator import Calculator, InputTemplate
from ..parsetools import slicebetween, sliceafter, parsepairs, getlines


[docs]class GamessUS(Calculator): '''Container object for Gamess-us jobs.''' def __init__(self, name="GamessUS", version="00", runopts=None, **kwargs): super(GamessUS, self).__init__(**kwargs) self.name = name self.inpext = '.inp' self.rungms = self.executable self.gmspath = os.path.dirname(self.executable) self.version = version self.runopts = runopts if os.path.isfile(os.path.join(self.gmspath, 'ddikick.x')): self.ddikick = os.path.join(self.gmspath, 'ddikick.x') @property def version(self): 'Return the version.' return self._version @version.setter def version(self, value): 'Check if the version exist and set it if it does.' versions = [] for item in os.listdir(self.gmspath): match = re.match(r'[a-z]+\.([0-9]+)\.x', item) if match: versions.append(match.group(1)) if value in versions: self._version = value else: raise IOError('GamessUS version {0:s} not found in {1:s}'.format(value, self.gmspath)) @property def runopts(self): 'Return the runopts' return self._runopts @runopts.setter def runopts(self, value): 'Set the runopts dict' if value is None: self._runopts = ['1'] else: self._runopts = value
[docs] def get_command(self, inpfile): 'Return the command to execute' return [self.executable, os.path.splitext(inpfile)[0], self.version] +\ self.runopts
[docs] def remove_dat(self, inpfile): ''' Remove the gamess dat file if it exists in the scratch directory. ''' datfile = os.path.splitext(inpfile)[0] + ".dat" if os.path.exists(os.path.join(self.scratch, datfile)): os.remove(os.path.join(self.scratch, datfile))
[docs] def run(self, inpfile, logfile=None, remove_dat=True): ''' Run a single gamess job interactively - without submitting to the queue. ''' if remove_dat: self.remove_dat(inpfile) if logfile is None: logfile = os.path.splitext(inpfile)[0] + ".log" with open(logfile, 'w') as fobj: call(self.get_command(inpfile), stdout=fobj, stderr=fobj) return logfile
[docs] def run_multiple(self, inputs): pass
[docs] def accomplished(self, outfile): ''' Return True if Gamess(US) job finished without errors. ''' parser = GamessLogParser(outfile) return parser.accomplished()
[docs] def parse(self, output, objective, regexp=None): ''' Parser GAMESS(US) output file to get the objective. ''' parser = GamessLogParser(output) if objective == "hf total energy": return parser.get_hf_total_energy() elif objective == "cisd total energy": energies = parser.get_energy_components('ci') return energies["TOTAL ENERGY"] elif objective == "correlation energy": energies = parser.get_energy_components('ci') return energies["TOTAL ENERGY"] - parser.get_hf_total_energy() elif objective == "regexp": return parser.get_variable(regexp) else: raise ValueError("unknown objective in prase {0:s}".format(objective))
[docs] def write_input(self, fname, template=None, mol=None, basis=None, core=None): ''' Write the molpro input to "fname" file based on the information from the keyword arguments. Args: mol : :py:class:`chemtools.molecule.Molecule` Molecule object instance basis : dict or :py:class:`BasisSet <chemtools.basisset.BasisSet>` An instance of :py:class:`BasisSet <chemtools.basisset.BasisSet>` class or a dictionary of :py:class:`BasisSet <chemtools.basisset.BasisSet>` objects with element symbols as keys core : list of ints Molpro core specification template : :py:class:`str` Template of the input file fname : :py:class:`str` Name of the input file to be used ''' out = ' $data\nbasis optimization\n{0:s}'.format(mol.symmetry) if mol.symmetry.lower() == 'c1': out += '\n' else: out += '\n\n' for i, atom in enumerate(mol.atoms): if i in mol.unique_labels: atombasis = basis[atom.symbol] out += atom.gamess_rep() out += atombasis.to_gamessus() out += ' $end' subs = {'basis': out, 'core': core} temp = InputTemplate(template) with open(fname, 'w') as finp: finp.write(temp.substitute(subs))
def __repr__(self): return "\n".join([ "<Gamess(", "\tname={},".format(self.name), "\tgmspath={},".format(self.gmspath), "\trungms={},".format(self.rungms), "\tversion={},".format(self.version), "\tscratch={},".format(self.scratch), "\trunopts={},".format(str(self.runopts)), ")>\n"])
class GamessInput(object): ''' A class for parsing and writing gamess-us input files. ''' def __init__(self, fname=None, parsed=None): ''' Initialize the class. ''' self.fname = fname self.end = " $end\n" self.parsed = parsed # not nested groups of input blocks (not parsed into a dict of dicts) self._notnested = ["$data", "$vec", "$ecp"] @property def parsed(self): 'Return the `parsed` attribute' return self._parsed @parsed.setter def parsed(self, value): if isinstance(value, dict): self._parsed = value elif value is None: self._parsed = {} else: raise TypeError("expected a dictionary but got {0:s}".format(type(value))) def parse(self): ''' Parse gamess input file into a dictionary of dictionaries, where the highest level entries are gamess namelist fileds and that contain dictionaries of options. All key are converted to lowercase. ''' with open(self.fname, 'r') as finp: contents = finp.read() return self.parse_from_string(contents) def parse_from_string(self, inpstr): ''' Parse gamess input file into a dictionary of dictionaries, where the highest level entries are gamess namelist fileds and that contain dictionaries of options. All key are converted to lowercase. For example if the following input was parsed: ''' pat = re.compile(r'(?P<block>\$[a-zA-Z]{3,6})\s+(?P<entries>.*?)\$END', flags=re.DOTALL | re.IGNORECASE) iterator = pat.finditer(inpstr) for match in iterator: if match.group("block").lower() not in self._notnested: self.parsed[match.group("block").lower()] = {} fields = [s.strip() for s in match.group("entries").split("\n")] for field in fields: if not field.startswith("!"): for line in field.split(): key, value = line.split("=") self.parsed[match.group("block").lower()][key.lower()] = value elif match.group("block").lower() == "$data": self.parsed["$data"] = self.parse_data(match.group("entries")) elif match.group("block").lower() in ["$vec", "$ecp"]: self.parsed[match.group("block").lower()] = match.group("entries") return True def parse_data(self, datastr, parse_basis=False): ''' Parse $DATA block specified in the gamess input file. The parser assumes that in $DATA input the atom positions are specified using cartesian coordiantes and returns a list of dictionaries with parsed information about each specified atom. Args: datastr (str) string with the contents of the $data block Returns: datadict (dict) ''' block = re.compile(r'(?P<label>[a-zA-Z]{1,2}[0-9]{0,2})\s*' + r'(?P<atomic>\d+\.\d+)' + r'(?P<xyz>(\s+\-?\d+\.\d+){3})\s*' + r'(?P<basis>.*?)\n\s*\n', flags=re.S) datadict = dict() datadict["title"] = datastr.split('\n')[0].strip() datadict["group"] = datastr.split('\n')[1].strip() datadict["atoms"] = list() itfound = block.finditer(datastr) for match in itfound: datadict["atoms"].append({ 'label': match.group('label'), 'atomic': float(match.group('atomic')), 'xyz': tuple(float(x) for x in match.group('xyz').split()), 'basis': match.group('basis'), }) return datadict def parsed2str(self): ''' Return a string with formatted string containing the GAMESS(US) input based on the previously parsed data. ''' inpstr = "" # write nested namelist groups for key, value in sorted(self.parsed.items()): if key not in self._notnested: inpstr += " {0:<s}\n".format(key) for kkey, vvalue in sorted(value.items()): inpstr += " {k:s}={v:s}\n".format(k=kkey, v=str(vvalue)) inpstr += self.end # write $data card inpstr += self.data2str() return inpstr def data2str(self, header=True): ''' Return the $DATA card of the input as a formatted string based on the previously parsed data.a ''' data = " {0:s}\n".format("$data") if header: data += "{0:s}\n".format(self.parsed["$data"]["title"]) data += "{0:s}\n".format(self.parsed["$data"]["group"]) if self.parsed['$data']['group'].lower() != 'c1': data += '\n' for atom in self.parsed['$data']['atoms']: data += '{0:5s}{1:5.1f}{2:12.5f}{3:12.5f}{4:12.5f}\n'.format( atom['label'], atom['atomic'], atom['xyz'][0], atom['xyz'][1], atom['xyz'][2]) data += atom['basis'] + '\n\n' data += self.end return data def write_input(self, filename): ''' Write a gamess input file under the name <inpfile> based on the information fstored in the dictionary <inpdict>. Args: filename : str Name of the input file ''' with open(filename, "w") as finp: finp.write(self.parsed2str()) def write_with_vec(self, filename, vecstr, keepscftyp=False): ''' Write new gamess input based on exisiting input and natural orbitals from $VEC section in PUNCH file. Write a new input file for gamess based on previously prased and/or constructed dictionary containing input specification and append orbitals from a previous run as starting orbitals. The starting orbitals should be stored in an ASCII PUNCH file whose name is given under "datfile" variable. Args: filename : str Name of the input file to be created vecstr : str String with the vectors to be written ''' parsed = copy(self.parsed) if not keepscftyp: self.parsed['$contrl']['scftyp'] = 'none' self.parsed['$guess'] = {} self.parsed['$guess']['guess'] = 'moread' self.parsed['$guess']['norb'] = str(get_naos_nmos(vecstr)[1]) with open(filename, "w") as finp: finp.write(self.parsed2str()) finp.write("\n $vec\n") finp.write(vecstr) finp.write("\n $end\n") finp.close() self.parsed = parsed def set_gamess_input(self, dinp, mol, bs, code, core): if "$contrl" in dinp.keys(): dinp["$contrl"]["icharg"] = mol.charge dinp["$contrl"]["mult"] = mol.multiplicity if mol.multiplicity == 1: dinp["$contrl"]["scftyp"] = "rhf" elif mol.multiplicity > 1: dinp["$contrl"]["scftyp"] = "rohf" if code["method"].lower() == "hf": dinp["$contrl"] = {key: value for key, value in dinp["$contrl"].items() if key != "cityp"} else: sys.exit("no $contrl group in the gamess input string") if "$cidet" in dinp.keys(): dinp["$cidet"]["nact"] = bs.get_no_functions(bs) - core dinp["$cidet"]["ncore"] = core dinp["$cidet"]["nels"] = mol.electrons - core*2 if "$ormas" not in dinp.keys(): dinp["$ormas"] = {} if code["method"] == "cisd": # sum of singly and doubly occupied orbitals noccupied = mol.multiplicity - 1 + (mol.electrons-(mol.multiplicity - 1))/2 dinp["$ormas"]["nspace"] = 2 dinp["$ormas"]["mstart(1)"] = ','.join([str(core+1),str(noccupied+1)]) dinp["$ormas"]["mine(1)"] = ",".join([str((mol.electrons-core*2)-2), str(0)]) dinp["$ormas"]["maxe(1)"] = ",".join([str(mol.electrons-core*2), str(2)]) elif code["method"] == "mrcisd": dinp["$ormas"]["nspace"] = 3 dinp["$ormas"]["mstart(1)"] = ','.join([str(core+1),str(mol.electrons-core+1)]) dinp["$ormas"]["mine(1)"] = ",".join([str(mol.electrons-2), str(0)]) dinp["$ormas"]["maxe(1)"] = ",".join([str(mol.electrons), str(2)]) return dinp def print_parsed(self): ''' Neat print of the dictionary with parsed input ''' for key, value in sorted(self.parsed.items()): if isinstance(value, dict): print(key) for kkey, vvalue in sorted(value.items()): print("\t{0:<10s} : {1:}".format(kkey, vvalue)) else: print(key, '\n', value) class GamessLogParser(object): ''' Methods for parsing gamess-us log file. ''' def __init__(self, log): self.logfile = log @property def logfile(self): 'Return the value of the `logfile` attribute' return self._logfile @logfile.setter def logfile(self, value): 'Check if the logfile exists first.' if os.path.exists(value): self._logfile = value else: raise ValueError("File: {} does not exist".format(value)) def logexists(self): ''' Check if the log file exists. ''' if os.path.exists(self.logfile): return True else: sys.exit("Gamess log file: {0:s} doesn't exist in {1:s}".format( self.logfile, os.getcwd())) def parse(self, regex): ''' Parsing function based on querries in the form of regular expressions that return the match object. ''' cpatt = re.compile(regex) with open(self.logfile, 'r') as log: content = log.read() return cpatt.search(content) def accomplished(self): '''Check if a job teminated normally.''' regex = r'TERMINATED NORMALLY' match = self.parse(regex) return match is not None def get_version(self): ''' Get the version of the GAMESS(US) package. ''' regex = r'.*\s+GAMESS VERSION =\s*(.*?)\s*\*' match = self.parse(regex) if match: return match.group(1) else: return None def get_charge(self): '''Get total charge.''' regex = r'CHARGE OF MOLECULE\s+=\s*(?P<charge>\d+)' match = self.parse(regex) if match: return int(match.group("charge")) def get_electrons(self): '''Get number of electrons.''' regex = r'NUMBER OF ELECTRONS\s+=\s*(?P<nele>\d+)' match = self.parse(regex) if match: return int(match.group("nele")) def get_homo(self): ''' Get the orbital index of homo orbital (indexing starts from zero). ''' if int(self.get_electrons()) % 2 == 0: return int(self.get_electrons()) / 2 - 1 else: sys.exit("open shell handling not implemented") def get_number_of_atoms(self): '''Get total number of atoms from gamess log file.''' regex = r'TOTAL NUMBER OF ATOMS\s+=\s*(?P<nat>\d+)' match = self.parse(regex) if match: return int(match.group("nat")) def get_number_of_aos(self): ''' Get the number of primitive cartesian gaussian basis functions from Gamess log file ''' regex = r'NUMBER OF CARTESIAN GAUSSIAN BASIS FUNCTIONS =\s*(?P<nao>\d+)' match = self.parse(regex) if match: return int(match.group("nao")) def get_number_of_core_mos(self): ''' Get the number of core molecular orbitals from the Gamess log file ''' regex = r'NUMBER OF CORE MOLECULAR ORBITALS\s*=\s*(?P<ncore>\d+)' match = self.parse(regex) if match: return int(match.group("ncore")) def get_number_of_mos(self): ''' Get the number of molecular orbitals from Gammess log file. ''' ispher_patt = r'ISPHER=\s*(?P<ispher>\-?\d{1}).*' var_space_patt = r'.*VARIATION SPACE IS\s*(?P<nmo>\d+).*' c_ispher = re.compile(ispher_patt) c_var_space = re.compile(var_space_patt) with open(self.logfile, 'r') as log: lines = log.read() match = c_ispher.search(lines) if match: ispher = int(match.group("ispher")) if ispher == -1: return self.get_number_of_aos() elif ispher == 1: match = c_var_space.search(lines) if match: n_mo = int(match.group("nmo")) return n_mo else: sys.exit("wrong ispher found: {0:d}".format(ispher)) def get_linear_deps(self): ''' Get number of linearly dependent combinations dropped. ''' regex = r'NUMBER OF LINEARLY DEPENDENT MOS DROPPED=\s*(?P<lindep>\d+)' match = self.parse(regex) if match: return int(match.group('lindep')) def get_scf_type(self): ''' Get the information on SCFTYP used in the gamess job. ''' regex = r'.*SCFTYP=(?P<scftyp>[A-Z]+)' match = self.parse(regex) if match: return match.group("scftyp") def get_cc_type(self): ''' Get the information on CCTYP used in the gamess job. ''' regex = r'.*CCTYP =(?P<cctyp>[A-Z\(\)\-]+)' match = self.parse(regex) if match: return match.group("cctyp") else: return None def get_ci_type(self): ''' Get the information on CITYP used in the gamess job. ''' regex = r'.*CITYP =(?P<cityp>[A-Z]+)' match = self.parse(regex) if match: return match.group("cityp") def get_mplevel(self): ''' Get the information on MPLEVL used in the gamess job. ''' regex = r'.*MPLEVL=\s*(?P<mplevl>\d+)' match = self.parse(regex) if match: return int(match.group("mplevl")) def get_hf_total_energy(self): ''' Return the total HF energy. ''' regex = r'FINAL R[O]*HF ENERGY IS\s+(?P<energy>\-?\d+\.\d+)' match = self.parse(regex) if match: return float(match.group("energy")) def get_cc_total_energy(self): ''' Independently of CCTYP value return the "HIGHEST LEVEL RESULT" total energy. ''' regex = r'COUPLED-CLUSTER ENERGY E(.*?)\s*=\s*(?P<energy>\-?\d+\.\d+)' match = self.parse(regex) if match: return float(match.group("energy")) def get_energy_components(self, method): ''' Read the summary of the energies printed in the gamess log file at the property section corresponding to a particular "method". ''' if method.lower() in ["hf", "scf", "hfscf"]: if self.get_scf_type() != "NONE": header = 'PROPERTY VALUES FOR THE {0:<5s} SELF-CONSISTENT FIELD WAVEFUNCTION'.format(self.get_scf_type()) else: sys.exit("No HF calculation was performed, check the log file once again.") elif method.lower() in ["ci"]: if self.get_ci_type().lower() in ["guga", "ormas", "fsoci"]: header = '{0:<5s} CI PROPERTIES'.format(self.get_ci_type()) else: sys.exit("Wrong method in <get_energy_components>: {0:s}".format(method)) with open(self.logfile, 'r') as log: data = log.readlines() return parsepairs(sliceafter(data, header, 22)) def get_lz_values(self): ''' Get values of Lz operator for every orbital and translate them into labels of orbitals. ''' mos = r'\s*MO\s*(?P<index>\d*)\s*\(\s*(?P<shell>\d+)\s*\)' weights = r'\s*HAS LZ\(WEIGHT\)=\s*(?P<lz>-?\d*\.\d+)' lz_patt = re.compile(mos + weights) locstr = self.get_loc_strings('lz values') lines = getlines(self.logfile, locstr) res = list() for line in lines: match = lz_patt.search(line) if match: index = int(match.group('index')) - 1 shell = int(match.group('shell')) lz = int(abs(float(match.group('lz')))) res.append({"index": index, "shell": shell, "lz": lz}) return res def get_ao_labels(self, orbs='hf orbs'): ''' Retrieve the information about atomic basis set ''' indsym = r'\s*(?P<index>\d+)\s{2}(?P<symbol>[a-zA-Z]{1,2})' cencom = r'\s*(?P<center>\d+)\s*(?P<component>[A-Z]{1,})' ao_patt = re.compile(indsym + cencom) istart = 1 if orbs == 'local orbs' else 3 locstr = self.get_loc_strings(orbs) lines = getlines(self.logfile, locstr) lines = lines[istart:istart + self.get_number_of_aos()] res = [] for line in lines: if match := ao_patt.search(line): index = int(match.group('index')) - 1 symbol = match.group('symbol') center = int(match.group('center')) component = match.group('component') res.append({ "index": index, "symbol": symbol, "center": center, "component": component}) return res def get_variable(self, rawstring): # sourcery skip: use-named-expression 'wrapper around a regex search method' with open(self.logfile, 'r') as out: data = out.read() genre = re.compile(rawstring, flags=re.M) match = genre.search(data) if match: return float(match.group(1)) def get_loc_strings(self, name): locators = { 'csfs' : [('DETERMINANT CONTRIBUTION TO CSF', 2), (' ...... END OF -DRT- GENERATION ......', -5)], 'ci coeffs' : [(' CSF COEF OCCUPANCY (IGNORING CORE)', 2), (' ...... END OF CI-MATRIX DIAGONALIZATION ......', 0)], 'initial orbs' : [('INITIAL GUESS ORBITALS', 3), (' ...... END OF INITIAL ORBITAL SELECTION ......', 0)], 'hf orbs' : [('EIGENVECTORS', 3), (' ...... END OF {} CALCULATION ......'.format(self.get_scf_type()), 0)], 'ci orbs' : [('NATURAL ORBITALS IN ATOMIC ORBITAL', 3), (' ...... END OF DENSITY MATRIX CALCULATION ......', 0)], 'local orbs': [('LOCALIZED ORBITALS', 3), (' LOCALIZATION ...', 0)], 'lz values' : [('LZ VALUE ANALYSIS FOR', 2), ('EIGENVECTORS', -2)], } if name in locators: return locators[name] else: raise ValueError('wrong "name", should be one of: {}'.format(', '.join(locators.keys()))) def get_orbital_labels(self, orbs, minlength=5): '''Return the labels of the specified orbitals Args: orbs : str String key for extracting locators minlength : int Minimal length of a line to be kept Returns: out : list ''' locstr = self.get_loc_strings(orbs) lines = getlines(self.logfile, locstr) lines = [l for l in lines if len(l) > minlength] nao = self.get_number_of_aos() inds = list(itertools.chain(*[l.split() for l in lines[::nao + 3]])) evs = list(itertools.chain(*[l.split() for l in lines[1::nao + 3]])) syms = list(itertools.chain(*[l.split() for l in lines[2::nao + 3]])) return zip(inds, evs, syms) def get_ci_coeffs(self): ''' Parse CI coefficients from a list of lines containing the output of GAMESS(US) calculation Returns: coeffs : dict Dictionary with parsed coeficients and their indexes in the internal GAMESS(US) ordering. ''' locstr = self.get_loc_strings('ci coeffs') lines = getlines(self.logfile, locstr) patt = re.compile(r'\s*(\d+)\s*(-?\d*\.\d+)') index = [] coeff = [] for line in lines: match = patt.search(line) if match: index.append(match.group(1)) coeff.append(match.group(2)) return dict(zip([int(x) for x in index], [float(x) for x in coeff])) def get_csfs(self, withcoeffs=True): ''' Parse CSF information from a list of lines from GAMESS(US) output. ''' locstr = self.get_loc_strings('csfs') lines = getlines(self.logfile, locstr) ne = self.get_electrons() - 2 * self.get_number_of_core_mos() csfre = r'^\s*(CSF\s*(?P<csfno>\d+)\:)?' coeffre = r'\s*C\(\s*(?P<detno>\d+)\)=\s*(?P<coeff>-?\d*\.\d+)' orbsre = r'\s+\:\s{2}(?P<orbs>.*)' patt = re.compile(csfre + coeffre + orbsre) csfstr = "".join(lines).lstrip() csfs = [s for s in re.split(r'CASE VECTOR =\s*\d+\s*', csfstr) if len(s) > 0] out = [] for csfstr in csfs: endvec = csfstr.find('FOR MS=') vector = csfstr[:endvec].strip() csf = {'vector': vector, 'dets': []} csfstart = csfstr.find('CSF') for i, line in enumerate(csfstr[csfstart:].split('\n')): if match := patt.search(line): gd = match.groupdict() # in the first line of the csf specification there is the CSF number # (index) that should be stored if i == 0: csf['no'] = int(gd['csfno']) spatial = [abs(int(x)) for x in detsplit(gd['orbs'], ne)] isort = np.argsort(np.array(spatial)) csf['spatial'] = [spatial[i] for i in isort] spin = det_to_spin_coupling(gd['orbs'], ne) csf['dets'].append((int(gd['detno']), float(gd['coeff']), "".join([spin[i] for i in isort]))) out.append(csf) if withcoeffs: coeffs = self.get_ci_coeffs() return [ dict(csf.items() + [('coeff', coeffs.get(csf['no'], None))]) for csf in out ] else: return out def detsplit(det, norb): ''' Split a string representation of the spatial part of the determinant `det` into `n` parts. Args: det : str String representation of the determinant norb : int Number of orbitals in the determinant Returns: out : list List of determinant components ''' out = det.split() if len(out) != norb: nchar = int(math.floor(len(det) / float(norb))) out = [det[nchar * i: nchar * i + nchar] for i in range(norb)] return out def det_to_spin_coupling(det, norb): ''' Convert a string representaiton of a determinant to a string with spin coupling. It is assumed here that a minus sign means beta spin. `a` will denote :math:`\alpha` and `b` will denote :math:`\beta` Args: det : str String representation of a determinant, e.g. `-3 4 2 -1` norb : int Number of orbitals that should be present in `det` equal to the number of active electrons Returns: sc : str String representation of a spin coupling ''' sc = '' detspl = detsplit(det, norb) for oi in detspl: if '-' in oi: sc += 'b' else: try: sc += 'a' except: sc += '*' return sc class GamessDatParser(object): 'Parser for the GAMESS(US) dat (.F10) file' def __init__(self, datfile): self.datfile = datfile @property def datfile(self): 'Return the value of the `datfile` attribute' return self._datfile @datfile.setter def datfile(self, value): if os.path.exists(value): self._datfile = value else: raise ValueError("File: {} does not exist".format(value)) def get_occupations(self): ''' Parse the occupation numbers from the ascii PUNCH file (.dat). ''' with open(self.datfile, 'r') as dat: data = dat.read() no_patt = re.compile(r'\$OCC(NO)?(?P<occ>.*?)\$END', flags=re.DOTALL) match = no_patt.search(data) nooc = list() if match: for line in match.group("occ").split('\n'): nooc.extend([float(x) for x in line.split()]) return np.asarray(nooc) else: raise ValueError('No section with occupation numbers found.') def parse_data(self): ''' Parse $DATA block from the dat file. ''' with open(self.datfile, 'r') as dat: datastr = slicebetween(dat.read(), '$DATA', '$END') gip = GamessInput() return gip.parse_data(datastr.lstrip(' \n')) def get_vec_string(self, method): ''' Parse the natural orbitals from the ascii PUNCH file (.dat). Args: method : str acceptable values are: - for scf orbitals: "scf", "hf", "rhf", "rohf", "uhf", "gvb" - for mcscf orbitals: "mcscfmos", "mcscfnos" - for ci orbitals: "ci", "aldet", "fsoci", "guga", "genci", "ormas"" - for localized orbitals: `local` Returns: orbs : numpy.array 2D numpy array of shape (naos, nmos) containing ortbials expansion coefficients ''' with open(self.datfile, 'r') as dat: data = dat.read() if method.lower() in ['scf', 'hf', 'rhf', 'rohf', 'uhf', 'gvb']: try: orbi = data.index('ORBITALS ---') except: raise ValueError('SCF orbitals header not found, check dat file') elif method.lower() in ['ci', 'aldet', 'fsoci', 'guga', 'genci', 'ormas']: try: orbi = data.index('NO-S') except: raise ValueError('CI orbitals header not found, check dat file') elif method.lower() == 'mcscfmos': try: orbi = data.index('OPTIMIZED MCSCF MO-S') except: raise ValueError('MCSCF orbitals header not found, check dat file') elif method.lower() == 'mcscfnos': try: orbi = data.index('NATURAL ORBITALS OF MCSCF') except: raise ValueError('MCSCF orbitals header not found, check dat file') elif method.lower() == 'local': try: orbi = data.index('LOCALIZED ORBITALS') except: raise ValueError('LOCALIZED orbitals header not found, check dat file') else: raise ValueError("Don't know what to do with: '{0:s}'".format(method)) vecstr = slicebetween(data[orbi:], '$VEC', '$END').strip(" \n\t\r") if vecstr == "": raise ValueError("No $VEC section found for method: '{0:s}'".format(method)) else: # add a single whitespace to account for the removed space in strip return ' ' + vecstr def get_orbitals(self, method): ''' Parse the natural orbitals from the ascii PUNCH file (.dat). Args: method : str acceptable values are: - for scf orbitals: "scf", "hf", "rhf", "rohf", "uhf", "gvb" - for mcscf orbitals: "mcscfmos", "mcscfnos" - for ci orbitals: "ci", "aldet", "fsoci", "guga", "genci", "ormas"" - for localized orbitals: `local` Returns: orbs : numpy.array 2D numpy array of shape (naos, nmos) containing ortbials expansion coefficients ''' vecstr = self.get_vec_string(method) return self.parse_orbitals(vecstr) @staticmethod def parse_orbitals(vecstr, clength=15): ''' Parse dat orbitals into numpy array of the shape (naos, nmos) Parse orbitals given a string obtained from the $VEC section of the PUNCH file (.dat) into a numpy array of the shape (naos, nmos) where "naos" and "nmos" are the number of atomic orbitals and number of molecular orbitals respectively. The shape is deduced from the last line of the string. Args: vecstr : str string with the contents fo the gamess $VEC block clength : int total length of the coefficient string as stored by the gamess format, by default gamess stores orbital coefficients in `e15.8` fortran format so the total length is 15. Returns: orbs : numpy.array 2D numpy array of shape (naos, nmos) containing ortbials expansion coefficients ''' naos, nmos, nlines = get_naos_nmos(vecstr) orblines = vecstr.split('\n') orbs = np.zeros((naos, nmos), dtype=float) counter = -1 for i in range(nmos): for j in range(nlines): counter += 1 nitems = int(len(orblines[counter][5:])) // clength orbs[5 * j: 5 * (j + 1), i] = \ [float(orblines[counter][5 + 15 * n: 5 + 15 * (n + 1)]) for n in range(nitems)] return orbs def get_naos_nmos(vecstr, clength=15): ''' Get the number of AO's and MO's from a string in $VEC block. Args: vecstr (str) string with the contents fo the gamess $VEC block clength (int) total length of the coefficient string as stored by the gamess format, by default gamess stores orbital coefficients in 'e15.8' fortran format so the total length is 15. Returns: naos (int) number of atomic orbitals naos (int) number of moelcular orbitals nlines (int) number of lines per molecular orbital ''' veclines = vecstr.split('\n') noveclines = len(veclines) lineit = iter(veclines) nlines = 0 while next(lineit)[:2].strip() == '1': nlines += 1 if nlines == 0: raise ValueError("'nlines' cannot be zero, check vecstr in 'get_naos_nmos'") naos = 5 * (nlines - 1) + len(vecstr.split('\n')[nlines - 1][5:]) // clength nmos = noveclines // nlines return naos, nmos, nlines def to_gamess_vec(coeffs, cfmt='15.8e'): # sourcery skip: assign-if-exp, remove-zero-from-range, simplify-numeric-comparison ''' Given a 2D array return a string with the elements converted to the GAMESS(US) $VEC format Args: coeffs : numpy.array A two-dimensional array of type `float` cmft : str Default format for the elements of `coeffs` Returns: vec : str A string representation of the `coeffs` using GAMESS(US) $VEC format ''' if coeffs.dtype != np.float64: raise ValueError('array should be of type np.flaot64, got: {}'.format(coeffs.dtype)) nao, nmo = coeffs.shape if nao % 5 == 0: nlines = nao / 5 else: nlines = 1 + nao / 5 vec = '' for i in range(nmo): if i + 1 >= 100: ilab = i + 1 % 100 else: ilab = i + 1 for l in range(nlines): if l + 1 > 1000: llab = l + 1 % 1000 else: llab = l + 1 if l < nlines - 1: coeffrow = "".join(["{0:{1:}}".format(coeffs[j + l * 5, i], cfmt) for j in range(5)]) else: coeffrow = "".join(["{0:{1:}}".format(coeffs[j + l * 5, i], cfmt) for j in range(0, 5 - ((l + 1) * 5 - nao))]) vec += "{0:2d}{1:3d}{2:s}\n".format(ilab, llab, coeffrow) return vec def writeorbinp(): ''' A script for writing the GAMESS(US) input with guess orbitals from a previous run. ''' parser = argparse.ArgumentParser() parser.add_argument('inpfile') parser.add_argument('datfile') parser.add_argument('orbitals', default='ci') parser.add_argument('-o', '--output') args = parser.parse_args() gip = GamessInput(args.inpfile) gip.parse() gdp = GamessDatParser(args.datfile) vectr = gdp.get_vec_string(args.orbitals) if args.output is None: args.output = os.path.splitext(args.inpfile)[0] + '_{}.inp'.format(args.orbitals) gip.write_with_vec(args.output, vectr) print('Input written to: {}'.format(args.output))