Source code for BigDFT.Calculators

"""
This module defines some classes to perform a calculation using BigDFT
using binding (GIBinding) or using system call (SystemCalculator).

"""

##In our case for the class SystemCalculator which uses system calls:
##* We define posinp (equivalent of Atoms)
##* We have a python dictionary for the parameter
##* We define a calculator (equivalent of BFGS which is an Optimizer (a method to optimize))
##Then we perform the method run.
##
##For the class GIBinding using the Gobject Introspection bindings, two methods set and update are added.
##
##The goal is to have a light Calculator almost compatible with ASE (Atomic Simulation environment, see https://gitlab.com/ase/ase)
##.. todo::
##    In a future we add our method to ASE which is at a higher level (workflow of simulations).
##:Example:
##   >>> from ase import Atoms
##   >>> from ase.optimize import BFGS
##   >>>  from ase.calculators.nwchem import NWChem
##   >>>  from ase.io import write
##   >>>  h2 = Atoms('H2',
##   >>>             positions=[[0, 0, 0],
##   >>>                        [0, 0, 0.7]])
##   >>>  h2.calc = NWChem(xc='PBE')
##   >>>  opt = BFGS(h2, trajectory='h2.traj')
##   >>>  opt.run(fmax=0.02)
##   >>>  BFGS:   0  19:10:49    -31.435229     2.2691
##   >>>  BFGS:   1  19:10:50    -31.490773     0.3740
##   >>>  BFGS:   2  19:10:50    -31.492791     0.0630
##   >>>  BFGS:   3  19:10:51    -31.492848     0.0023
##   >>>  write('H2.xyz', h2)
##   >>>  h2.get_potential_energy()  # ASE's units are eV and Ang
##   >>>  -31.492847800329216
##

import os
import shutil
from futile.Utils import write as safe_print
import BigDFT.Logfiles as Lf


[docs]class GIBinding():
    """
    Calculator for BigDFT from Gobject Introspection bindings.
    """

    def __init__(self):
        #Import bindings about BigDFT (if the bindings are not generated, do not work at all)
        from gi.repository import BigDFT
        self.runObj = -1
        # MPI initialisation
        (ierr, self.iproc, self.nproc, igroup, ngroup) = BigDFT.lib_init(0)
        self.runObj = None

[docs]    def update(self, inputfile):
        # If the inputpsiid is not present in the inputfile
        # assumes that the user wants to do a restart
        from futile.Utils import dict_merge
        if "dft" in inputfile and "inputpsiid" in inputfile["dft"]:
            var = inputfile
        else:
            var = inputfile.copy()
            dict_merge(var,{'dft': {'inputpsiid': 1}})
        from gi.repository import BigDFT
        self.runObj.update(BigDFT.Dict(var))

[docs]    def run(self):
        self.out = self.runObj.calculate(self.iproc, self.nproc)
        return self.out

[docs]    def set(self, inputfile=None):
        from gi.repository import BigDFT
        if inputfile is None:
            var = {}
        else:
            var = inputfile
        # Free memory first
        self.out = None
        self.runObj = None
        self.runObj = BigDFT.Run.new_from_dict(BigDFT.Dict(var))

    def __del__(self):
        if self.runObj == -1:
            return
        # MPI finalisation.
        self.out = None
        self.runObj = None
        from gi.repository import BigDFT
        BigDFT.lib_finalize()


[docs]class Runner():
    """Run of something.

    Object dealing with global and local options of a run method.
    All arguments in the __init__ call is stored as global options.
    For each run, these global options may updated by the arguments of the run call.
    """
    def __init__(self,**kwargs):
        """
        All arguments for the runs are saved in a private dictionary of global options
        """
        import copy
        self._global_options=copy.deepcopy(kwargs)

[docs]    def global_options(self):
        """

        Get all global options

        Returns:
            The dictionary of the global options in its current status
        """
        return self._global_options

[docs]    def get_global_option(self,key):
        """

        Get one key in global options

        Args:
           key (string): the global option key

        Returns:
            The value of the global options labelled by ``key``

        """
        return self._global_options[key]

[docs]    def update_global_options(self,**kwargs):
        """
        Update the global options by providing keyword arguments.
        """
        self._global_options.update(kwargs)

[docs]    def pop_global_option(self,key):
        """
        Remove a given global option from the global option dictionary

        Args:
           key (string): the global option key

        Returns:
           The value of the global option
        """
        self._global_option.pop(key)

    def _run_options(self,**kwargs):
        """
        Create a local dictionary for a specific run.
        It combines the present status of global option with the local dictionary of the run
        """
        import copy
        #First deepcopy from global_options and update from kwargs (warning: a dictionary is not update)
        self.run_options=copy.deepcopy(self._global_options)
        """Local options of process_run.

        dict: This dictionary can be accessed during the definition of the process_run method.
        It contains all the relevant keys for the definition of the runner.
        """

        self.run_options.update(kwargs)

[docs]    def run(self,**kwargs):
        """
        Run method of the class. It performs the following actions:

         * Constructs the local dictionary to be passed as ``*kwargs*`` to the `process_run` function
         * Calls the ``pre_processing`` method (intended to prepare some actions associated to the ``process_run`` method
         * Calls ``process_run``
         * Returns the object passed by the call to ``post_processing`` class method

        """
        from futile.Utils import dict_merge
        self._run_options(**kwargs)
        run_args=self.pre_processing()
        run_results=self.process_run(**run_args)
        #safe_print('run_args',run_args,'run_results',run_results)
        dict_merge(dest=run_args,src=run_results)
        #safe_print('run_updated, again',run_args)
        return self.post_processing(**run_args)

[docs]    def pre_processing(self):
        """
        Pre-treat the keyword arguments and the options, if needed.

        Returns:
           dictionary of the pre-treated keyword arguments that have to be actually considered by process_run.
        """
        return {}

[docs]    def process_run(self,**kwargs):
        """
        Main item of the runner, defines the information that have to be post_processed by post_processing.

        Args:
          **kwargs** (dict): keyword arguments as returned from the :meth:`pre_processing` method.

        Returns:
          objects to be passed to post_processing
        """
        return kwargs

[docs]    def post_processing(self,**kwargs):
        """
        Post-processing, take the arguments as they are provided by the process_run.

        Returns:
           The final object that each call to the ``run`` method is supposed to provide.
        """
        return None

[docs]class SystemCalculator(Runner):
    """Define a BigDFT calculator.

    Main calculator of BigDFT code. It performs :py:meth:`os.system` calls to the main ``bigdft`` executable
    in the ``$BIGDFT_ROOT`` directory. It is designed for two purposes:

      * Run the code in a workstation-based environment, for exemple within notebooks or scripts.
      * Run the code from a python script that is submitted to a batch scheduler in a potnentially large-scale supercomputer.

    For triggering the execution, this code gets two variables from the environment:

        * The value of ``OMP_NUM_THREADS`` to set the number of OMP_NUM_THREADS. If this variable is not present in the environment,
          :class:`SystemCalculator` sets it to the value provided by the ``omp`` keyword at initialization.

        * The value of ``BIGDFT_MPIRUN`` to define the MPI execution command. If absent, the run is executed simply by ``$BIGDFT_ROOT/bigdft``,
          followed by the command given by post-processing.

    Arguments:
         omp (int): number of OpenMP threads.
            It defaults to the $OMP_NUM_THREADS variable in the environment, if present, otherwise it fixes the run to 1 thread.
         mpi_run (str): define the MPI command to be used.
            It defaults to the value $BIGDFT_MPIRUN of the environment, if present.
            When using this calculator into a job submission script, the value of
            $BIGDFT_MPIRUN variable may be set appropriately to launch the bigdft executable.
         skip (bool): if ``True``, do not run the calculation if the corresponding logfile exists.
         verbose (bool): if ``True`` the class prints out informations about the operations that are being performed by the calculator
         dry_run (bool): check the input, estimate the memory but do not perform the calculation.
         dry_mpi (int): Number of MPI processes for the estimation of the memory when ``dry_run`` is ``True`` (not yet implemented)
         taskgroup_size (int): number of MPI processes of each of the taskgroup in the case of a runs_file.

    Warning:
       At the initialization, `SystemCalculator` checks if the environment variable $BIGDFT_ROOT is defined.
       This would mean (although not guarantee) that the environment has been properly set prior to the evaluation of the python command.
       Also, it checks that the executable file ``bigdft`` might be found in the ``$BIGDFT_ROOT/bigdft`` path.

    Example:
        >>> inpdict = { 'dft': { 'ixc': 'LDA' }} #a simple input file
        >>> study = SystemCalculator(omp=1)
        >>> logf = study.run(name="test",input=inpdict)
        Executing command:  $BIGDFT_MPIRUN <path_to_$BIGDFT_ROOT>/bigdft test

    """

    import os,shutil
    def __init__(self,
                 omp=os.environ.get('OMP_NUM_THREADS','1'),
                 mpi_run=os.environ.get('BIGDFT_MPIRUN',''),
                 dry_run=False,skip=False,verbose=True):
        #Use the initialization from the Runner class (so all options inside __global_options)
        Runner.__init__(self,omp=str(omp),mpi_run=mpi_run,dry_run=dry_run,skip=skip,verbose=verbose)
        assert 'BIGDFT_ROOT' in os.environ      # Verify if $BIGDFT_ROOT is in the environment
        executable=os.path.join(os.environ['BIGDFT_ROOT'],'bigdft')
        assert os.path.isfile(executable) #the bigdft file should be present in the BIGDFT_ROOT directory
        #Build the command setting the number of omp threads
        self.command = (self._global_options['mpi_run'] + ' ' + executable).strip()
        safe_print('Initialize a Calculator with OMP_NUM_THREADS=%s and command %s' % (self._global_options['omp'],self.command) )

[docs]    def pre_processing(self):
    #def run(self, name='', outdir='', run_name='', input={}, posinp=None,**kwargs):
        """
        Run a calculation building the input file from a dictionary.

        :param str name: naming scheme of the run i.e. <name>.yaml is the input file and log-<name>.yaml the output one.
           Data will then be written in the directory `data-<name>.yaml`, unless the "radical" keyword is specified in the input dictionary.
        :param str run_dir: specify the directory where bigdft will be executed (the input and log file will be created in it)
                            it must be a simple
        :param str outdir: specify the output directory for all data coming from bigdft (parameter of bigdft)
        :param str run_name: File containing the list of the run ids which have to be launched independently
                             (list in yaml format). The option runs-file is not compatible with the name option.
        :param input: give the input parameters (a dictionary or a list of dictionary)
        :type input: dict
        :param posinp: indicate the posinp file (atomic position file).
        :type posinp: filename
        :return: a Logfile instance is returned. It returns None if an error occurred
        :rtype: Logfile

        .. todo::

           Set the return value of run in the case of a run_file. It should be a list of Logfile classes

        """
        from futile.Utils import make_dict
        self._ensure_run_directory()
        #Create the input file (deepcopy because we modify it)
        inp = self.run_options.get('input',{})
        # from here onwards the local input is a dict and not anymore anothe class
        local_input = make_dict(inp)
        #Add into the dictionary a posinp key
        posinp = self.run_options.get('posinp',None)
        if posinp != None: local_input['posinp'] = self._posinp_dictionary_value(posinp)
        #Creating the yaml input file
        from futile import YamlIO as Y
        input_file=self._get_inputfilename()
        Y.dump(local_input,filename=input_file)
        if self.run_options['verbose']: safe_print('Creating the yaml input file "%s"' % input_file)
        return {'command':self._get_command()}

[docs]    def process_run(self,command):
        """Finally launch the code.

        Routine associated to the running of the ``bigdft`` executable.

        Arguments:
           command (str): the command as it is set by the ``pre_processing`` method.
        """
        #check if the debug file will be updated (case of erroneous run)
        timedbg=self._get_debugfile_date()
        verbose = self.run_options['verbose']
        # Set the number of omp threads
        os.environ['OMP_NUM_THREADS'] = self.run_options['omp']
        if verbose:
            if self.run_dir != '.': safe_print('Run directory', self.run_dir)
            safe_print('Executing command: ', command)
        #Run the command
        os.system("cd "+self.run_dir+"; "+command)
        return {'timedbg':timedbg,'logname': self._get_logname()}

[docs]    def post_processing(self,timedbg,logname,command):
        """
        Check the existence and the log file and return an instance logfile.

        Returns:
           A `BigDFT.Logfile` class instance associated to the run which has been just performed.
           If the run failed for some reasons, the logfile seem not existing or it cannot be parsed it returns `None`.

        """
        #verify that no debug file has been created
        if self._get_debugfile_date() > timedbg :
            verbose = self.run_options['verbose']
            if verbose:
                safe_print("ERROR: some problem occured during the execution of the command, check the 'debug/' directory and the logfile")
                #the debug file is sane, we may print out the error message
                self._dump_debugfile_info()
            try:
                return Lf.Logfile(logname)
            except:
                return None
        if os.path.exists(logname):
            from futile.Utils import file_time
            from time import time
            inputname=self._get_inputfilename()
            if file_time(logname) < file_time(inputname) and not self.run_options['skip']:
                safe_print("ERROR: The logfile (",logname,") is older than the inputfile (",inputname,").")
                return None
            else:
                return Lf.Logfile(logname)
        else:
            safe_print("ERROR: The logfile (",logname,") does not exist.")
            return None

    def _get_command(self):
        name = self.run_options.get('name','')
        dry_run = self.run_options['dry_run']
        run_name = self.run_options.get('run_name','')
        outdir = self.run_options.get('outdir','')
	taskgroup_size = self.run_options.get('taskgroup_size','')
        #Check if it is a dry run
        if dry_run:
            #Use bigdft-tool (do not use BIGDFT_MPIRUN because it is a python script)
            command = os.path.join(os.environ['BIGDFT_ROOT'],'bigdft-tool')+' -a memory-estimation -l'
            if name > 0: command += ' --name='+name
        else:
            # Adjust the command line with options
            command = self.command
            if name: command += ' -n ' + name
            if run_name: command += ' -r ' + run_name
            if outdir: command += ' -d ' + outdir
	    if taskgroup_size: command += ' -t '+taskgroup_size
            if self.run_options['skip']: command += ' -s Yes'
        return command

    def _get_logname(self):
        import os
        outdir = self.run_options.get('outdir','')
        name = self.run_options.get('name','')
        logname = 'log-'+name+'.yaml' if name else 'log.yaml'
        if outdir: logname=os.path.join(outdir,logname)
        logname = os.path.join(self.run_dir,logname)
        return logname

    def _get_inputfilename(self):
        import os
        name = self.run_options.get('name','')
        input_file = name+'.yaml' if name else 'input.yaml'
        return os.path.join(self.run_dir,input_file)

    def _ensure_run_directory(self):
        from futile.Utils import ensure_dir
        run_dir = self.run_options.get('run_dir','.')
        #Restrict run_dir to a sub-directory
        if  ("/" in run_dir or run_dir == ".."):
            raise ValueError("run_dir '%s' where bigdft is executed must be a sub-directory" % run_dir)
        #Create the run_dir if not exist
        if ensure_dir(run_dir) and self.run_options['verbose']: safe_print("Create the sub-directory '%s'" % run_dir)
        self.run_dir=run_dir
        """Run directory.
        str: the directory where the inputfile has been copied to.
              Might be useful to associate to each of the calculation of a given run a different directory.
              Note that this is different than setting the ``outdir`` or the ``name`` arguments at it refers
              to the directory of the inputfile.
        Note:
            This is not a global property of the calculator, as the same calculator instance might be used for various workflows.
        """

    def _posinp_dictionary_value(self,posinp):
        """
        Create the dictionary value associated to posinp field

        Args:
          posinp (str, dict): path of the posinp file. Might be relative or absolute. Copied into `run_dir` if not existing. If it is a dictionary, it is a representation
          of the atomic position.

        Returns:
          str,dict: the value of the key ``posinp`` of the input file, if posinp is a string, otherwise the posinp dictionary
        """
        import os
        from futile.Utils import ensure_copy,make_dict
        if isinstance(posinp,dict): return make_dict(posinp)
        #Check if the file does exist
        if not os.path.isfile(posinp):
            raise ValueError("posinp: The atomic position file '%s' does not exist" % posinp)
        posinpdict=posinp
        posinpfile=os.path.basename(posinp)
        #Copy the posinp if not identical
        cp_posinp = os.path.join(self.run_dir,posinpfile)  #LG: not like that "%s/%s" % (self.run_dir,posinp)
        copied=ensure_copy(src=posinp,dest=cp_posinp)
        if copied:
            posinpdict=posinpfile
            if self.run_options['verbose']: safe_print("Copy the posinp file '%s' into '%s'" % (posinp,self.run_dir))
        return posinpdict

    def _get_debugfile_date(self):
        """
        Get the information about the debug time of the last file in the current directory
        """
        from futile.Utils import file_time
        return file_time(os.path.join('debug','bigdft-err-0.yaml'))

    def _dump_debugfile_info(self):
        from futile import YamlIO as Y
        debugfile=os.path.join('debug','bigdft-err-0.yaml')
        if os.path.isfile(debugfile):
            debugdict=Y.load(debugfile,doc_lists=False)
            safe_print('The error occured is',self._get_error_key(debugdict))
            safe_print('Additional Info: ',debugdict['Additional Info'])

    def _get_error_key(self,debugdict):
        for key in debugdict:
            if 'Calling sequence' in key: continue
            if 'Global dictionary' in key: continue
            if 'Additional Info' in key: continue
            return key


# Test the calculators
if __name__ == '__main__':

    import yaml
    import matplotlib.pyplot as plt

    basicinput = """
#mode: {method: lj}
logfile: No
dft: { ixc: HF, nspin: 2}
posinp:
   positions:
   - {Be : [0.0, 0.0, 0.0]}#, IGSpin: -1}
   - {Be : [0.0, 0.0, 1.0]}#, IGSpin: 1}
#   properties: {format: yaml}
ig_occupation:
   Atom 1: {2s: {up: 1.0, down: 0.9}, 2p: {up: 0.0, down: 0.2} }
   Atom 2: {2s: {up: 0.9, down: 1.0}, 2p: {up: 0.2, down: 0.0} }

psppar.Be: {Pseudopotential XC: 11}
"""

    # Initialize the calculator
    study = GIBinding()
    inp = yaml.load(basicinput)
    study.set(inp)

    # Perform the first calculation
    out = study.run()
    safe_print('starting energy', out.eKS)
    energy = [out.eKS]
    pos = [1.0]

    # Perform a dissociation curve
    for i in range(10):
        sh = float(i+1) * 0.02
        inp['posinp']['positions'][-1]['Be'][2] += sh
        study.update(inp)
        out = study.run()
        energy.append(out.eKS)
        pos.append(pos[-1]+sh)
        if study.iproc == 0:
            safe_print('iter', i, 'shift', sh, 'energy', out.eKS)
    out = None
    safe_print('End of the calculations')

    # Plot the dissociation curve
    if study.iproc == 0:
        plt.plot(pos, energy)
        plt.show()