pax_global_header00006660000000000000000000000064145455001300014507gustar00rootroot0000000000000052 comment=1b04c48511ed16b15a3c369bf023846248349434 gpaw-24.1.0/000077500000000000000000000000001454550013000125315ustar00rootroot00000000000000gpaw-24.1.0/.flake8000066400000000000000000000000461454550013000137040ustar00rootroot00000000000000[flake8] ignore = E129,E741,W503,W504 gpaw-24.1.0/.gitignore000066400000000000000000000014711454550013000145240ustar00rootroot00000000000000*.pyc build/ /dist MANIFEST _gpaw*.so .DS_Store */.DS_Store gpaw.egg-info/ .eggs/ /siteconfig.py /siteconfig-*.py /venv*/ .coverage* htmlcov/ .pytest_cache/ # If you build the documentation in the source tree, you will create a # lot of files that we don't want to see with "git status": *.png *.gif *.pov *.jpg *.odg *.dat *.ini *.traj *.pdf *.csv *.txt *.log *.xyz *.db *.pckl *.gz N.LDA *.gpw *.npy theme.css *.svg *.json *.out *.cube doc/setups/[A-Z]*.rst *.ipynb !*.master.ipynb !doc/devel/gpaw-logo.svg # If you run the tests, these may write out files *.gpw # Stuff from LaTeX: *.aux *.bbl *.blg *.toc # Editor backup files: *~ \#* # Vim swap files: .*.sw? # Emacs lock files: .\#* # Files produced by AGTS (myqueue) *.py.state *.py.*.err # These are OK: !requirements.txt !mypy.ini !pytest.ini !doc/words.txt gpaw-24.1.0/.gitlab-ci.yml000066400000000000000000000304151454550013000151700ustar00rootroot00000000000000variables: PYTHONUSERBASE: ".local" IMAGE_PATH: "registry.gitlab.com/gpaw/gpaw-ci-containers" IMAGE_MAIN_OLDEST: "$IMAGE_PATH/main:oldest" IMAGE_MAIN_LATEST: "$IMAGE_PATH/main:latest" IMAGE_CUDA: "$IMAGE_PATH/cuda:11" OMP_NUM_THREADS: "1" default: before_script: - export PYTHONUSERBASE=`realpath $PYTHONUSERBASE` - mkdir -p `python -m site --user-site` - export PATH=$PYTHONUSERBASE/bin:$PATH - export PYTEST_ADDOPTS="--color=yes" interruptible: true stages: - build - precalculate - test - deploy .build: stage: build before_script: - !reference [default, before_script] - pip install --user --no-deps git+https://gitlab.com/ase/ase.git - | cat << EOF > siteconfig.py parallel_python_interpreter = True mpi = True compiler = 'mpicc' libraries = [] library_dirs = [] include_dirs = [] extra_compile_args = ['-fPIC', '-O3', '-g', '-fopenmp', '-Wall', '-Werror', ] extra_link_args = ['-fopenmp'] libraries += ['blas'] fftw = True libraries += ['fftw3'] scalapack = True libraries += ['scalapack-openmpi'] libraries += ['xc'] EOF artifacts: when: always paths: - $PYTHONUSERBASE - gpaw.egg-info - _gpaw.*.so - "*.log" expire_in: 30 mins .test: stage: test before_script: - !reference [default, before_script] .gpu-job: rules: - if: $CI_COMMIT_BRANCH == "master" - if: $CI_COMMIT_BRANCH =~ "/^gpu-.*/" build: image: $IMAGE_MAIN_OLDEST extends: .build tags: - linux script: - cat siteconfig.py - pip install --user --log build.log --editable . - gpaw -P 1 info - gpaw-python -m gpaw info build-latest: image: $IMAGE_MAIN_LATEST extends: .build tags: - linux script: - | cat << EOF >> siteconfig.py define_macros += [('NDEBUG', None)] undef_macros = [] # TODO: PySys_SetArgv() etc deprecated in Python 3.11 extra_compile_args += ['-Wno-deprecated-declarations'] from pathlib import Path libvdwxc = True libraries += ['vdwxc'] libvdwxc_dir = Path('/install/libvdwxc') libvdwxc_libdir = libvdwxc_dir / 'lib' library_dirs += [libvdwxc_libdir] extra_link_args += [f'-Wl,-rpath={libvdwxc_libdir}'] include_dirs += [libvdwxc_dir / 'include'] elpa = True elpa_dir = Path('/install/elpa') elpa_libdir = elpa_dir / 'lib' # Maybe we should do a glob to avoid version dependence: elpa_includedir = elpa_dir / 'include/elpa-2022.11.001' libraries += ['elpa'] library_dirs += [f'{elpa_libdir}'] extra_link_args += [f'-Wl,-rpath={elpa_libdir}'] include_dirs += [f'{elpa_includedir}'] EOF - cat siteconfig.py - pip install --user --upgrade numpy scipy - pip install --user --editable . - gpaw -P 1 info - gpaw-python -m gpaw info build-cuda: image: $IMAGE_CUDA extends: - .build - .gpu-job tags: - linux script: - | cat << EOF >> siteconfig.py gpu = True gpu_target = 'cuda' gpu_compiler = 'nvcc' gpu_compile_args = ['-O3', '-g'] libraries += ['cudart', 'cublas'] undef_macros += ['GPAW_GPU_AWARE_MPI'] EOF - cat siteconfig.py - pip install --user --log build.log --editable . build-hip-cuda: image: $IMAGE_CUDA extends: - .build - .gpu-job tags: - linux script: - | cat << EOF >> siteconfig.py gpu = True gpu_target = 'hip-cuda' gpu_compiler = 'hipcc' gpu_compile_args = ['-O3', '-g'] libraries += ['cudart', 'hipblas'] # Suppress warnings from hip-cuda headers extra_compile_args += ['-Wno-deprecated-declarations', '-Wno-implicit-int', ] undef_macros += ['GPAW_GPU_AWARE_MPI'] EOF - cat siteconfig.py - pip install --user --log build.log --editable . build-minimal: image: $IMAGE_MAIN_OLDEST extends: .build tags: - linux script: - | cat << EOF > siteconfig.py mpi = False noblas = True nolibxc = True EOF - cat siteconfig.py - pip install --user --log build.log --editable . - gpaw info prepare-libxc: image: $IMAGE_MAIN_OLDEST stage: build tags: - linux variables: PREFIX: $PYTHONUSERBASE before_script: - export PREFIX=`realpath $PREFIX` script: - git clone -b 6.2.2 https://gitlab.com/libxc/libxc.git - cd libxc - autoreconf -i - > ./configure --enable-shared --disable-static --disable-fortran --prefix=$PREFIX --disable-kxc --disable-lxc # Enable kxc, lxc, ... derivatives only if needed (affects compilation time) - make | tee ../libxc-build.log - make install when: manual artifacts: paths: - $PREFIX - "*.log" expire_in: 30 mins build-libxc: image: $IMAGE_MAIN_OLDEST extends: - .build needs: - prepare-libxc tags: - linux script: - | cat << EOF >> siteconfig.py local = Path('.local').resolve() include_dirs += [local / 'include'] library_dirs += [local / 'lib'] runtime_library_dirs += [local / 'lib'] EOF - cat siteconfig.py - pip install --user --log build.log --editable . - gpaw -P 1 info test-oldest: image: $IMAGE_MAIN_OLDEST extends: - .test needs: - build tags: - linux script: - gpaw info - > OMP_NUM_THREADS=2 pytest -v -m ci -We # This is the only test which uses OpenMP. # We may want to identify a limited set of tests that must work # with OpenMP, but so far we just have it in this job. - gpaw-python -m gpaw info - > OMP_NUM_THREADS=2 gpaw-python -m pytest -v -We -k 0-pw gpaw/test/test_fuzz.py test-latest: image: $IMAGE_MAIN_LATEST extends: - .test needs: - build-latest tags: - linux script: - gpaw info # Create a .coveragerc file so we can get nicely printed coverage: - | cat << EOF >> .coveragerc [report] precision = 2 EOF - > pytest -v -m ci -m gpu --cov=gpaw --cov-report=html --cov-report=term artifacts: paths: - htmlcov/ expire_in: 1 week coverage: '/TOTAL.+ ([0-9]+\.[0-9]+%)/' test-latest-new: image: $IMAGE_MAIN_LATEST extends: - .test needs: - build-latest tags: - linux script: - gpaw info - GPAW_NEW=1 pytest -v -m "ci and not later" test-minimal: image: $IMAGE_MAIN_OLDEST extends: - .test needs: - build-minimal tags: - linux script: - gpaw info - > pytest -v -m ci -We test-cuda: image: $IMAGE_CUDA extends: - .test - .gpu-job needs: - build-cuda tags: - cuda script: - nvidia-smi - gpaw info - pytest -v -m gpu - gpaw-python -m gpaw info - gpaw-python -m pytest -v -m gpu test-cuda-mpi: image: $IMAGE_CUDA extends: - .test - .gpu-job needs: - build-cuda tags: - cuda-mpi script: - nvidia-smi - gpaw info - mpirun -np 2 gpaw-python -m pytest -v -m gpu - mpirun -np 4 gpaw-python -m pytest -v -m gpu test-hip-cuda: image: $IMAGE_CUDA extends: - .test - .gpu-job needs: - build-hip-cuda tags: - cuda script: - nvidia-smi - gpaw info - pytest -v -m gpu test-libxc: image: $IMAGE_MAIN_OLDEST extends: - .test needs: - build-libxc tags: - linux script: - gpaw info - pytest -v -m libxc check-agts: image: $IMAGE_MAIN_OLDEST stage: test needs: - build tags: - linux script: # - pip install --user git+https://gitlab.com/myqueue/myqueue.git@master - pip install --user myqueue - mq config --in-place - mq init - mq workflow -p agts.py -zT | tail -1 | tee task_count - > [[ `cut -d ' ' -f1 < task_count` -ge 501 ]] || (echo "Too few agts tasks"; exit 1) lint: image: $IMAGE_MAIN_LATEST stage: build tags: - linux script: - echo "png check" - > [[ `find . -name '*.png' | wc -l` -le 5 ]] || (echo "Too many png files in git"; exit 1) - echo "compileall" - python --version - python -We:invalid -m compileall -f -q gpaw/ - echo "flake8" - pip install flake8 - flake8 --version - flake8 --doctests gpaw - > flake8 --doctests --exclude "doc/platforms/*,doc/*/summerschool22/*/" --extend-ignore E402 doc - > flake8 --doctests --extend-ignore E402,E501 doc/summerschools/summerschool22/catalysis doc/platforms/Linux/Niflheim - echo "interrogate" - pip install interrogate - interrogate --version - > interrogate -m -i -f 33.1 -e gpaw/test -e gpaw/point_groups/groups.py gpaw typecheck: image: $IMAGE_MAIN_LATEST stage: build tags: - linux script: - pip install mypy types-PyYAML - python --version - mypy --version - mypy --pretty docs: image: $IMAGE_MAIN_OLDEST stage: test needs: - build tags: - linux script: - cd doc - make - make doctest - cd .. - python -c "from gpaw.utilities.urlcheck import test; test()" when: manual artifacts: paths: - doc/build/html expire_in: 1 week gpwfiles: image: $IMAGE_MAIN_LATEST extends: - .test needs: - build-latest stage: precalculate tags: - xeon24 before_script: - !reference [default, before_script] script: - pytest -n 16 -m generate_gpw_files --durations 30 -v artifacts: paths: - .pytest_cache/ expire_in: 30 mins rules: - if: $CI_PIPELINE_SOURCE == "push" when: manual allow_failure: true - if: $CI_PIPELINE_SOURCE == "schedule" .nightly: image: $IMAGE_MAIN_LATEST variables: GPAW_MPI_NPROCS: "1" extends: - .test stage: test needs: - build-latest - gpwfiles tags: - xeon24 before_script: - !reference [default, before_script] - pip install pytest-instafail pytest-xdist script: - > gpaw -P $GPAW_MPI_NPROCS python -m pytest --instafail -r s --durations 30 rules: - if: $CI_PIPELINE_SOURCE == "push" when: manual allow_failure: true - if: $CI_PIPELINE_SOURCE == "schedule" # - if: $CI_PIPELINE_SOURCE == "merge_request_event" nightly-mpi-1: extends: - .nightly script: - gpaw info - pytest --cache-show - > pytest -n 8 --instafail -r s --durations 30 --cov=gpaw --cov-report=html --cov-report=term artifacts: paths: - htmlcov/ expire_in: 1 day nightly-mpi-1-new: variables: GPAW_NEW: "1" extends: - .nightly script: - gpaw info - pytest -n 4 --cache-clear -m generate_gpw_files - > pytest -n 4 --instafail -r s --durations 30 -m "not (later or dscf or gllb or elph or wannier or legacy or ofdft or mom or do or lrtddft or rttddft or hybrids or pipekmezey)" nightly-mpi-2-new: variables: GPAW_NEW: "1" GPAW_MPI_NPROCS: "2" extends: - .nightly script: - gpaw info - pytest -n 4 --cache-clear -m generate_gpw_files - > gpaw -P $GPAW_MPI_NPROCS python -m pytest --instafail -r s --durations 30 -m "not (later or dscf or gllb or elph or wannier or legacy or ofdft or mom or do or lrtddft or rttddft or hybrids or pipekmezey)" nightly-mpi-4-new: variables: GPAW_NEW: "1" GPAW_MPI_NPROCS: "4" extends: - .nightly script: - gpaw info - pytest -n 4 --cache-clear -m generate_gpw_files - > gpaw -P $GPAW_MPI_NPROCS python -m pytest --instafail -r s --durations 30 -m "ci and not later" nightly-mpi-2: variables: GPAW_MPI_NPROCS: "2" extends: - .nightly nightly-mpi-4: variables: GPAW_MPI_NPROCS: "4" extends: - .nightly nightly-mpi-8: variables: GPAW_MPI_NPROCS: "8" extends: - .nightly pages: image: $IMAGE_MAIN_LATEST # (Any image present on the runner will do) stage: deploy dependencies: - nightly-mpi-1 tags: - xeon24 script: - mv htmlcov/ public artifacts: paths: - public expire_in: 1 week rules: - if: $CI_PIPELINE_SOURCE == "schedule" gpaw-24.1.0/.mailmap000066400000000000000000000261271454550013000141620ustar00rootroot00000000000000Alexander Held Aleksei V. Ivanov Aleksei V. Ivanov Aleksei Andre Kelkkanen Anubhab Haldar Anubhab Haldar Anubhab Haldar Ari Ojanperä Arto Sakko Ask Hjorth Larsen Asmus Ougaard Dohn Asmus Ougaard Dohn Asmus Ougaard Dohn Carsten Rostgaard Carsten Rostgaard Chengdun Jin Christian Glinsvad Christopher Patrick Christian Schaefer schaferc Daniele dtorel David Landis Duncan Mowbray Esben Leonhard Kolsbjerg Esko Makkonen Esko Makkonen Esko Makkonen Fabian Glatzel Falco Hüser Filip Anselm Rasmussen Gaël Donval George Tritsaris Georg Kastlunger Georg Kastlunger Georg Kastlunger Georg Kastlunger Georg Kastlunger Gianluca Levi Guido Gandus Haiping Lin Heine Anton Hansen Heine Anton Hansen Henrik Kristoffersen Henrik Levämäki Ivano Eligio Castelli Jakob Schiøtz Jakob Schiøtz Janosch Michael Rauba Jens Jørgen Mortensen Jens Jørgen Mortensen Jens Jørgen Mortensen Jens Jørgen Mortensen Jens Jørgen Mortensen Jeppe Gavnholt Jess Stausholm-Møller Jess Wellendorff Pedersen Jiajie Chen Jingzhe Chen Joachim Sødequist Jonathan Stenlund Jouko Lehtomäki Juan María García-Lastra Julian Heske Julian Heske Jun Yan Jussi Enkovaara Jussi Enkovaara Jussi Kamal Saha Karsten Wedel Jacobsen Keld Troen Lundgaard Kirsten Winther Kirsten Winther Kristen Kaasbjerg Kristian Baruël Ørnsø Lara Ferrighi Lars Bruno Hansen Lauri Lehtovaara Lucas S.R. Cavalcante Mads Burgdorff Kristensen Mads Kruse Marcin Dulak Marco Vanin Marc Torrent Marko Melander Mathias Ljungberg Michael Walter Michael Walter Mikael Kuisma Mikael Kuisma Mikael Kuisma Mikkel Strange Mikkel Strange Mikkel Strange Mohnish Pandey Morten Gjerding Morten Gjerding Nichols Romero Olga Lopez-Acevedo Olga Lopez-Acevedo Oliver Stauffert Oliver Stauffert Ondrej Marsalek Paul Christopher O'Grady Paweł Zawadzki Per Simmendefeldt Schmidt Per Simmendefeldt Schmidt Per Simmendefeldt Schmidt Per Simmendefeldt Schmidt Peter Kluepfel Poul Georg Moses Poul Georg Moses Rasmus Karlsson Roberto De Renzi Robert Warmbier Robert Warmbier Robert Warmbier Rolf Würdemann Rolf Würdemann Sami Juhani Kaappa Sami Juhani Kaappa Samuli Hakala Simone Latini Simone Latini Simone Latini Stefan Othmar Poulsen Stefano Americo aesteric_fooman Sten Haastrup Sten Haastrup Sten Haastrup Tao Jiang Tara Maria Boland Tara Maria Boland Thomas Olsen Thomas Olsen Thorbjørn Skovhus Thorbjørn Skovhus Thorsten Deilmann Toma Susi TomaSusi Torsten Hahn Tristan Maxson Tristan Maxson Troels Kofoed Jacobsen Tuomas Rossi Tuomas Rossi Tuomas Rossi Tuomas Rossi William Bro-Jørgensen Fredrik Andreas Nilsson Fredrik Andreas Nilsson Fredrik Andreas Nilsson Fredrik Andreas Nilsson Fredrik Andreas Nilsson Kyle Bystrom Yorick Schmerwitz Varun Rajeev Pavizhakumari Fabrice Roncoroni Dario A. Leon Valido Dario A. Leon Valido Amalie Helena Søndersted Amalie Helena Søndersted Elvar Örn Jónsson Jakob Kjærulff Svaneborg Stefan Mattsson Stefan Mattsson gpaw-24.1.0/CHANGELOG.rst000066400000000000000000000001541454550013000145520ustar00rootroot00000000000000Changelog ========= See what's new in GPAW here: https://wiki.fysik.dtu.dk/gpaw/releasenotes.html gpaw-24.1.0/CONTRIBUTING.rst000066400000000000000000000003341454550013000151720ustar00rootroot00000000000000Contributing ============ The source code for GPAW is handled the same way as the source code for the ASE project. Read here about how to get started: https://wiki.fysik.dtu.dk/ase/development/contribute.html gpaw-24.1.0/LICENSE000066400000000000000000001045131454550013000135420ustar00rootroot00000000000000 GNU GENERAL PUBLIC LICENSE Version 3, 29 June 2007 Copyright (C) 2007 Free Software Foundation, Inc. Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Preamble The GNU General Public License is a free, copyleft license for software and other kinds of works. The licenses for most software and other practical works are designed to take away your freedom to share and change the works. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change all versions of a program--to make sure it remains free software for all its users. We, the Free Software Foundation, use the GNU General Public License for most of our software; it applies also to any other work released this way by its authors. You can apply it to your programs, too. When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for them if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs, and that you know you can do these things. To protect your rights, we need to prevent others from denying you these rights or asking you to surrender the rights. Therefore, you have certain responsibilities if you distribute copies of the software, or if you modify it: responsibilities to respect the freedom of others. For example, if you distribute copies of such a program, whether gratis or for a fee, you must pass on to the recipients the same freedoms that you received. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights. Developers that use the GNU GPL protect your rights with two steps: (1) assert copyright on the software, and (2) offer you this License giving you legal permission to copy, distribute and/or modify it. For the developers' and authors' protection, the GPL clearly explains that there is no warranty for this free software. For both users' and authors' sake, the GPL requires that modified versions be marked as changed, so that their problems will not be attributed erroneously to authors of previous versions. Some devices are designed to deny users access to install or run modified versions of the software inside them, although the manufacturer can do so. This is fundamentally incompatible with the aim of protecting users' freedom to change the software. The systematic pattern of such abuse occurs in the area of products for individuals to use, which is precisely where it is most unacceptable. Therefore, we have designed this version of the GPL to prohibit the practice for those products. If such problems arise substantially in other domains, we stand ready to extend this provision to those domains in future versions of the GPL, as needed to protect the freedom of users. Finally, every program is threatened constantly by software patents. States should not allow patents to restrict development and use of software on general-purpose computers, but in those that do, we wish to avoid the special danger that patents applied to a free program could make it effectively proprietary. To prevent this, the GPL assures that patents cannot be used to render the program non-free. The precise terms and conditions for copying, distribution and modification follow. TERMS AND CONDITIONS 0. Definitions. "This License" refers to version 3 of the GNU General Public License. "Copyright" also means copyright-like laws that apply to other kinds of works, such as semiconductor masks. "The Program" refers to any copyrightable work licensed under this License. Each licensee is addressed as "you". "Licensees" and "recipients" may be individuals or organizations. To "modify" a work means to copy from or adapt all or part of the work in a fashion requiring copyright permission, other than the making of an exact copy. The resulting work is called a "modified version" of the earlier work or a work "based on" the earlier work. A "covered work" means either the unmodified Program or a work based on the Program. To "propagate" a work means to do anything with it that, without permission, would make you directly or secondarily liable for infringement under applicable copyright law, except executing it on a computer or modifying a private copy. Propagation includes copying, distribution (with or without modification), making available to the public, and in some countries other activities as well. To "convey" a work means any kind of propagation that enables other parties to make or receive copies. Mere interaction with a user through a computer network, with no transfer of a copy, is not conveying. An interactive user interface displays "Appropriate Legal Notices" to the extent that it includes a convenient and prominently visible feature that (1) displays an appropriate copyright notice, and (2) tells the user that there is no warranty for the work (except to the extent that warranties are provided), that licensees may convey the work under this License, and how to view a copy of this License. If the interface presents a list of user commands or options, such as a menu, a prominent item in the list meets this criterion. 1. Source Code. The "source code" for a work means the preferred form of the work for making modifications to it. "Object code" means any non-source form of a work. A "Standard Interface" means an interface that either is an official standard defined by a recognized standards body, or, in the case of interfaces specified for a particular programming language, one that is widely used among developers working in that language. The "System Libraries" of an executable work include anything, other than the work as a whole, that (a) is included in the normal form of packaging a Major Component, but which is not part of that Major Component, and (b) serves only to enable use of the work with that Major Component, or to implement a Standard Interface for which an implementation is available to the public in source code form. A "Major Component", in this context, means a major essential component (kernel, window system, and so on) of the specific operating system (if any) on which the executable work runs, or a compiler used to produce the work, or an object code interpreter used to run it. The "Corresponding Source" for a work in object code form means all the source code needed to generate, install, and (for an executable work) run the object code and to modify the work, including scripts to control those activities. However, it does not include the work's System Libraries, or general-purpose tools or generally available free programs which are used unmodified in performing those activities but which are not part of the work. For example, Corresponding Source includes interface definition files associated with source files for the work, and the source code for shared libraries and dynamically linked subprograms that the work is specifically designed to require, such as by intimate data communication or control flow between those subprograms and other parts of the work. The Corresponding Source need not include anything that users can regenerate automatically from other parts of the Corresponding Source. The Corresponding Source for a work in source code form is that same work. 2. Basic Permissions. All rights granted under this License are granted for the term of copyright on the Program, and are irrevocable provided the stated conditions are met. This License explicitly affirms your unlimited permission to run the unmodified Program. The output from running a covered work is covered by this License only if the output, given its content, constitutes a covered work. This License acknowledges your rights of fair use or other equivalent, as provided by copyright law. You may make, run and propagate covered works that you do not convey, without conditions so long as your license otherwise remains in force. You may convey covered works to others for the sole purpose of having them make modifications exclusively for you, or provide you with facilities for running those works, provided that you comply with the terms of this License in conveying all material for which you do not control copyright. Those thus making or running the covered works for you must do so exclusively on your behalf, under your direction and control, on terms that prohibit them from making any copies of your copyrighted material outside their relationship with you. Conveying under any other circumstances is permitted solely under the conditions stated below. Sublicensing is not allowed; section 10 makes it unnecessary. 3. Protecting Users' Legal Rights From Anti-Circumvention Law. No covered work shall be deemed part of an effective technological measure under any applicable law fulfilling obligations under article 11 of the WIPO copyright treaty adopted on 20 December 1996, or similar laws prohibiting or restricting circumvention of such measures. When you convey a covered work, you waive any legal power to forbid circumvention of technological measures to the extent such circumvention is effected by exercising rights under this License with respect to the covered work, and you disclaim any intention to limit operation or modification of the work as a means of enforcing, against the work's users, your or third parties' legal rights to forbid circumvention of technological measures. 4. Conveying Verbatim Copies. You may convey verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice; keep intact all notices stating that this License and any non-permissive terms added in accord with section 7 apply to the code; keep intact all notices of the absence of any warranty; and give all recipients a copy of this License along with the Program. You may charge any price or no price for each copy that you convey, and you may offer support or warranty protection for a fee. 5. Conveying Modified Source Versions. You may convey a work based on the Program, or the modifications to produce it from the Program, in the form of source code under the terms of section 4, provided that you also meet all of these conditions: a) The work must carry prominent notices stating that you modified it, and giving a relevant date. b) The work must carry prominent notices stating that it is released under this License and any conditions added under section 7. This requirement modifies the requirement in section 4 to "keep intact all notices". c) You must license the entire work, as a whole, under this License to anyone who comes into possession of a copy. This License will therefore apply, along with any applicable section 7 additional terms, to the whole of the work, and all its parts, regardless of how they are packaged. This License gives no permission to license the work in any other way, but it does not invalidate such permission if you have separately received it. d) If the work has interactive user interfaces, each must display Appropriate Legal Notices; however, if the Program has interactive interfaces that do not display Appropriate Legal Notices, your work need not make them do so. A compilation of a covered work with other separate and independent works, which are not by their nature extensions of the covered work, and which are not combined with it such as to form a larger program, in or on a volume of a storage or distribution medium, is called an "aggregate" if the compilation and its resulting copyright are not used to limit the access or legal rights of the compilation's users beyond what the individual works permit. Inclusion of a covered work in an aggregate does not cause this License to apply to the other parts of the aggregate. 6. Conveying Non-Source Forms. You may convey a covered work in object code form under the terms of sections 4 and 5, provided that you also convey the machine-readable Corresponding Source under the terms of this License, in one of these ways: a) Convey the object code in, or embodied in, a physical product (including a physical distribution medium), accompanied by the Corresponding Source fixed on a durable physical medium customarily used for software interchange. b) Convey the object code in, or embodied in, a physical product (including a physical distribution medium), accompanied by a written offer, valid for at least three years and valid for as long as you offer spare parts or customer support for that product model, to give anyone who possesses the object code either (1) a copy of the Corresponding Source for all the software in the product that is covered by this License, on a durable physical medium customarily used for software interchange, for a price no more than your reasonable cost of physically performing this conveying of source, or (2) access to copy the Corresponding Source from a network server at no charge. c) Convey individual copies of the object code with a copy of the written offer to provide the Corresponding Source. This alternative is allowed only occasionally and noncommercially, and only if you received the object code with such an offer, in accord with subsection 6b. d) Convey the object code by offering access from a designated place (gratis or for a charge), and offer equivalent access to the Corresponding Source in the same way through the same place at no further charge. You need not require recipients to copy the Corresponding Source along with the object code. If the place to copy the object code is a network server, the Corresponding Source may be on a different server (operated by you or a third party) that supports equivalent copying facilities, provided you maintain clear directions next to the object code saying where to find the Corresponding Source. Regardless of what server hosts the Corresponding Source, you remain obligated to ensure that it is available for as long as needed to satisfy these requirements. e) Convey the object code using peer-to-peer transmission, provided you inform other peers where the object code and Corresponding Source of the work are being offered to the general public at no charge under subsection 6d. A separable portion of the object code, whose source code is excluded from the Corresponding Source as a System Library, need not be included in conveying the object code work. A "User Product" is either (1) a "consumer product", which means any tangible personal property which is normally used for personal, family, or household purposes, or (2) anything designed or sold for incorporation into a dwelling. In determining whether a product is a consumer product, doubtful cases shall be resolved in favor of coverage. For a particular product received by a particular user, "normally used" refers to a typical or common use of that class of product, regardless of the status of the particular user or of the way in which the particular user actually uses, or expects or is expected to use, the product. A product is a consumer product regardless of whether the product has substantial commercial, industrial or non-consumer uses, unless such uses represent the only significant mode of use of the product. "Installation Information" for a User Product means any methods, procedures, authorization keys, or other information required to install and execute modified versions of a covered work in that User Product from a modified version of its Corresponding Source. The information must suffice to ensure that the continued functioning of the modified object code is in no case prevented or interfered with solely because modification has been made. If you convey an object code work under this section in, or with, or specifically for use in, a User Product, and the conveying occurs as part of a transaction in which the right of possession and use of the User Product is transferred to the recipient in perpetuity or for a fixed term (regardless of how the transaction is characterized), the Corresponding Source conveyed under this section must be accompanied by the Installation Information. But this requirement does not apply if neither you nor any third party retains the ability to install modified object code on the User Product (for example, the work has been installed in ROM). The requirement to provide Installation Information does not include a requirement to continue to provide support service, warranty, or updates for a work that has been modified or installed by the recipient, or for the User Product in which it has been modified or installed. Access to a network may be denied when the modification itself materially and adversely affects the operation of the network or violates the rules and protocols for communication across the network. Corresponding Source conveyed, and Installation Information provided, in accord with this section must be in a format that is publicly documented (and with an implementation available to the public in source code form), and must require no special password or key for unpacking, reading or copying. 7. Additional Terms. "Additional permissions" are terms that supplement the terms of this License by making exceptions from one or more of its conditions. Additional permissions that are applicable to the entire Program shall be treated as though they were included in this License, to the extent that they are valid under applicable law. If additional permissions apply only to part of the Program, that part may be used separately under those permissions, but the entire Program remains governed by this License without regard to the additional permissions. When you convey a copy of a covered work, you may at your option remove any additional permissions from that copy, or from any part of it. (Additional permissions may be written to require their own removal in certain cases when you modify the work.) You may place additional permissions on material, added by you to a covered work, for which you have or can give appropriate copyright permission. Notwithstanding any other provision of this License, for material you add to a covered work, you may (if authorized by the copyright holders of that material) supplement the terms of this License with terms: a) Disclaiming warranty or limiting liability differently from the terms of sections 15 and 16 of this License; or b) Requiring preservation of specified reasonable legal notices or author attributions in that material or in the Appropriate Legal Notices displayed by works containing it; or c) Prohibiting misrepresentation of the origin of that material, or requiring that modified versions of such material be marked in reasonable ways as different from the original version; or d) Limiting the use for publicity purposes of names of licensors or authors of the material; or e) Declining to grant rights under trademark law for use of some trade names, trademarks, or service marks; or f) Requiring indemnification of licensors and authors of that material by anyone who conveys the material (or modified versions of it) with contractual assumptions of liability to the recipient, for any liability that these contractual assumptions directly impose on those licensors and authors. All other non-permissive additional terms are considered "further restrictions" within the meaning of section 10. If the Program as you received it, or any part of it, contains a notice stating that it is governed by this License along with a term that is a further restriction, you may remove that term. If a license document contains a further restriction but permits relicensing or conveying under this License, you may add to a covered work material governed by the terms of that license document, provided that the further restriction does not survive such relicensing or conveying. If you add terms to a covered work in accord with this section, you must place, in the relevant source files, a statement of the additional terms that apply to those files, or a notice indicating where to find the applicable terms. Additional terms, permissive or non-permissive, may be stated in the form of a separately written license, or stated as exceptions; the above requirements apply either way. 8. Termination. You may not propagate or modify a covered work except as expressly provided under this License. Any attempt otherwise to propagate or modify it is void, and will automatically terminate your rights under this License (including any patent licenses granted under the third paragraph of section 11). However, if you cease all violation of this License, then your license from a particular copyright holder is reinstated (a) provisionally, unless and until the copyright holder explicitly and finally terminates your license, and (b) permanently, if the copyright holder fails to notify you of the violation by some reasonable means prior to 60 days after the cessation. Moreover, your license from a particular copyright holder is reinstated permanently if the copyright holder notifies you of the violation by some reasonable means, this is the first time you have received notice of violation of this License (for any work) from that copyright holder, and you cure the violation prior to 30 days after your receipt of the notice. Termination of your rights under this section does not terminate the licenses of parties who have received copies or rights from you under this License. If your rights have been terminated and not permanently reinstated, you do not qualify to receive new licenses for the same material under section 10. 9. Acceptance Not Required for Having Copies. You are not required to accept this License in order to receive or run a copy of the Program. Ancillary propagation of a covered work occurring solely as a consequence of using peer-to-peer transmission to receive a copy likewise does not require acceptance. However, nothing other than this License grants you permission to propagate or modify any covered work. These actions infringe copyright if you do not accept this License. Therefore, by modifying or propagating a covered work, you indicate your acceptance of this License to do so. 10. Automatic Licensing of Downstream Recipients. Each time you convey a covered work, the recipient automatically receives a license from the original licensors, to run, modify and propagate that work, subject to this License. You are not responsible for enforcing compliance by third parties with this License. An "entity transaction" is a transaction transferring control of an organization, or substantially all assets of one, or subdividing an organization, or merging organizations. If propagation of a covered work results from an entity transaction, each party to that transaction who receives a copy of the work also receives whatever licenses to the work the party's predecessor in interest had or could give under the previous paragraph, plus a right to possession of the Corresponding Source of the work from the predecessor in interest, if the predecessor has it or can get it with reasonable efforts. You may not impose any further restrictions on the exercise of the rights granted or affirmed under this License. For example, you may not impose a license fee, royalty, or other charge for exercise of rights granted under this License, and you may not initiate litigation (including a cross-claim or counterclaim in a lawsuit) alleging that any patent claim is infringed by making, using, selling, offering for sale, or importing the Program or any portion of it. 11. Patents. A "contributor" is a copyright holder who authorizes use under this License of the Program or a work on which the Program is based. The work thus licensed is called the contributor's "contributor version". A contributor's "essential patent claims" are all patent claims owned or controlled by the contributor, whether already acquired or hereafter acquired, that would be infringed by some manner, permitted by this License, of making, using, or selling its contributor version, but do not include claims that would be infringed only as a consequence of further modification of the contributor version. For purposes of this definition, "control" includes the right to grant patent sublicenses in a manner consistent with the requirements of this License. Each contributor grants you a non-exclusive, worldwide, royalty-free patent license under the contributor's essential patent claims, to make, use, sell, offer for sale, import and otherwise run, modify and propagate the contents of its contributor version. In the following three paragraphs, a "patent license" is any express agreement or commitment, however denominated, not to enforce a patent (such as an express permission to practice a patent or covenant not to sue for patent infringement). To "grant" such a patent license to a party means to make such an agreement or commitment not to enforce a patent against the party. If you convey a covered work, knowingly relying on a patent license, and the Corresponding Source of the work is not available for anyone to copy, free of charge and under the terms of this License, through a publicly available network server or other readily accessible means, then you must either (1) cause the Corresponding Source to be so available, or (2) arrange to deprive yourself of the benefit of the patent license for this particular work, or (3) arrange, in a manner consistent with the requirements of this License, to extend the patent license to downstream recipients. "Knowingly relying" means you have actual knowledge that, but for the patent license, your conveying the covered work in a country, or your recipient's use of the covered work in a country, would infringe one or more identifiable patents in that country that you have reason to believe are valid. If, pursuant to or in connection with a single transaction or arrangement, you convey, or propagate by procuring conveyance of, a covered work, and grant a patent license to some of the parties receiving the covered work authorizing them to use, propagate, modify or convey a specific copy of the covered work, then the patent license you grant is automatically extended to all recipients of the covered work and works based on it. A patent license is "discriminatory" if it does not include within the scope of its coverage, prohibits the exercise of, or is conditioned on the non-exercise of one or more of the rights that are specifically granted under this License. You may not convey a covered work if you are a party to an arrangement with a third party that is in the business of distributing software, under which you make payment to the third party based on the extent of your activity of conveying the work, and under which the third party grants, to any of the parties who would receive the covered work from you, a discriminatory patent license (a) in connection with copies of the covered work conveyed by you (or copies made from those copies), or (b) primarily for and in connection with specific products or compilations that contain the covered work, unless you entered into that arrangement, or that patent license was granted, prior to 28 March 2007. Nothing in this License shall be construed as excluding or limiting any implied license or other defenses to infringement that may otherwise be available to you under applicable patent law. 12. No Surrender of Others' Freedom. If conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot convey a covered work so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not convey it at all. For example, if you agree to terms that obligate you to collect a royalty for further conveying from those to whom you convey the Program, the only way you could satisfy both those terms and this License would be to refrain entirely from conveying the Program. 13. Use with the GNU Affero General Public License. Notwithstanding any other provision of this License, you have permission to link or combine any covered work with a work licensed under version 3 of the GNU Affero General Public License into a single combined work, and to convey the resulting work. The terms of this License will continue to apply to the part which is the covered work, but the special requirements of the GNU Affero General Public License, section 13, concerning interaction through a network will apply to the combination as such. 14. Revised Versions of this License. The Free Software Foundation may publish revised and/or new versions of the GNU General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Program specifies that a certain numbered version of the GNU General Public License "or any later version" applies to it, you have the option of following the terms and conditions either of that numbered version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of the GNU General Public License, you may choose any version ever published by the Free Software Foundation. If the Program specifies that a proxy can decide which future versions of the GNU General Public License can be used, that proxy's public statement of acceptance of a version permanently authorizes you to choose that version for the Program. Later license versions may give you additional or different permissions. However, no additional obligations are imposed on any author or copyright holder as a result of your choosing to follow a later version. 15. Disclaimer of Warranty. THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 16. Limitation of Liability. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 17. Interpretation of Sections 15 and 16. If the disclaimer of warranty and limitation of liability provided above cannot be given local legal effect according to their terms, reviewing courts shall apply local law that most closely approximates an absolute waiver of all civil liability in connection with the Program, unless a warranty or assumption of liability accompanies a copy of the Program in return for a fee. END OF TERMS AND CONDITIONS How to Apply These Terms to Your New Programs If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms. To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively state the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. Copyright (C) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . Also add information on how to contact you by electronic and paper mail. If the program does terminal interaction, make it output a short notice like this when it starts in an interactive mode: Copyright (C) This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. This is free software, and you are welcome to redistribute it under certain conditions; type `show c' for details. The hypothetical commands `show w' and `show c' should show the appropriate parts of the General Public License. Of course, your program's commands might be different; for a GUI interface, you would use an "about box". You should also get your employer (if you work as a programmer) or school, if any, to sign a "copyright disclaimer" for the program, if necessary. For more information on this, and how to apply and follow the GNU GPL, see . The GNU General Public License does not permit incorporating your program into proprietary programs. If your program is a subroutine library, you may consider it more useful to permit linking proprietary applications with the library. If this is what you want to do, use the GNU Lesser General Public License instead of this License. But first, please read . gpaw-24.1.0/MANIFEST.in000066400000000000000000000004431454550013000142700ustar00rootroot00000000000000include MANIFEST.in include LICENSE CONTRIBUTING.rst CHANGELOG.rst include config.py siteconfig_example.py include c/*.c include c/*.h include c/xc/*.h include c/xc/*.c include c/gpu/*.h include c/gpu/*.c include c/gpu/kernels/*.cpp include c/bmgs/*.h include c/bmgs/*.c include pytest.ini gpaw-24.1.0/README.rst000066400000000000000000000057621454550013000142320ustar00rootroot00000000000000.. image:: https://badge.fury.io/py/gpaw.svg :target: https://pypi.org/project/gpaw/ Coverage_ GPAW ==== GPAW is a density-functional theory (DFT) Python_ code based on the projector-augmented wave (PAW) method and the atomic simulation environment (ASE_). It uses plane-waves, atom-centered basis-functions or real-space uniform grids combined with multigrid methods. Webpage: https://wiki.fysik.dtu.dk/gpaw Requirements ------------ * Python_ 3.8 or later * ASE_ * NumPy_ * SciPy_ * LibXC_ * A C compiler * A BLAS_ library Optional (highly recommended for increased performance): * A MPI_ library (required for parallel calculations) * BLACS_ and ScaLAPACK_ libraries * FFTW_ See `Release notes `_ for version requirements. Installation ------------ Do this:: $ python3 -m pip install gpaw and make sure you have ``~/.local/bin`` in your $PATH. For more details, please see: https://wiki.fysik.dtu.dk/gpaw/install.html Test your installation ---------------------- You can do a test calculation with:: $ gpaw test Contact ------- * Mailing list: gpaw-users_ * Chat: #gpaw on Matrix_. * Bug reports and development: gitlab-issues_ Please send us bug-reports, patches, code, ideas and questions. Example ------- Geometry optimization of hydrogen molecule: >>> from ase import Atoms >>> from ase.optimize import BFGS >>> from ase.io import write >>> from gpaw import GPAW, PW >>> h2 = Atoms('H2', ... positions=[[0, 0, 0], ... [0, 0, 0.7]]) >>> h2.center(vacuum=2.5) >>> h2.calc = GPAW(xc='PBE', ... mode=PW(300), ... txt='h2.txt') >>> opt = BFGS(h2, trajectory='h2.traj') >>> opt.run(fmax=0.02) BFGS: 0 09:08:09 -6.566505 2.2970 BFGS: 1 09:08:11 -6.629859 0.1871 BFGS: 2 09:08:12 -6.630410 0.0350 BFGS: 3 09:08:13 -6.630429 0.0003 >>> write('H2.xyz', h2) >>> h2.get_potential_energy() # ASE's units are eV and Å -6.6304292169392784 Getting started --------------- Once you have familiarized yourself with ASE_ and NumPy_, you should take a look at the GPAW exercises_ and tutorials_. .. _Python: https://www.python.org/ .. _ASE: https://wiki.fysik.dtu.dk/ase .. _NumPy: http://docs.scipy.org/doc/numpy/reference/ .. _SciPy: http://docs.scipy.org/doc/scipy/reference/ .. _LibXC: http://www.tddft.org/programs/libxc/ .. _MPI: http://www.mpi-forum.org/ .. _BLAS: http://www.netlib.org/blas/ .. _FFTW: http://www.fftw.org/ .. _BLACS: http://www.netlib.org/blacs/ .. _ScaLAPACK: http://www.netlib.org/scalapack/ .. _gpaw-users: https://listserv.fysik.dtu.dk/mailman/listinfo/gpaw-users .. _Matrix: https://app.element.io/#/room/#gpaw:matrix.org .. _gitlab-issues: https://gitlab.com/gpaw/gpaw/issues .. _exercises: https://wiki.fysik.dtu.dk/gpaw/exercises/exercises.html .. _tutorials: https://wiki.fysik.dtu.dk/gpaw/tutorials/tutorials.html .. _Coverage: https://wiki.fysik.dtu.dk/gpaw/htmlcov/index.html .. _releasenotes: https://wiki.fysik.dtu.dk/gpaw/releasenotes.html gpaw-24.1.0/_gpaw.pyi000066400000000000000000000041461454550013000143560ustar00rootroot00000000000000import numpy as np from typing import TypeVar, Any def adjust_positions() -> None: ... def adjust_momenta() -> None: ... def calculate_forces_H2O() -> None: ... def localize(Z_nnc: Any, U_nn: Any) -> float: ... def spherical_harmonics() -> None: ... def hartree(l: int, nrdr: np.ndarray, r: np.ndarray, vr: np.ndarray) -> None: ... def get_num_threads() -> int: ... def pack(A: np.ndarray) -> np.ndarray: ... T = TypeVar('T', float, complex) def mmm(alpha: T, a: np.ndarray, opa: str, b: np.ndarray, opb: str, beta: T, c: np.ndarray) -> None: ... def gemm(alpha, a, b, beta, c, transa='n') -> None: ... def rk(alpha, a, beta, c, trans='c') -> None: ... def r2k(alpha, a, b, beta, c, trans='c') -> None: ... def gemmdot() -> None: ... class Communicator: rank: int size: int def add_to_density(f: float, psit: np.ndarray, density: np.ndarray) -> None: ... def pw_insert(coef_G: np.ndarray, Q_G: np.ndarray, s: float, array_Q: np.ndarray) -> None: ... def pblas_tran(N: int, M: int, alpha: float, a_MN: np.ndarray, beta:float, c_NM: np.ndarray, desca: np.ndarray, descc: np.ndarray, conj: bool) -> None: ... def scalapack_set(a: np.ndarray, desc: np.ndarray, alpha: float, beta: float, uplo: str, n: int, m: int, ja: int, ia: int) -> None: ... def scalapack_diagonalize_dc(a: np.ndarray, desc: np.ndarray, uplo: str, data: np.ndarray, eps: np.ndarray) -> int: ... def scalapack_inverse() -> None: ... def new_blacs_context() -> None: ... def get_blacs_local_shape() -> None: ... def pblas_rk() -> None: ... def pblas_r2k() -> None: ... def pblas_gemm() -> None: ... def scalapack_redist() -> None: ... def GG_shuffle(G_G: np.ndarray, int, A_GG: np.ndarray, tmp_GG: np.ndarray) -> None: ... def pw_precond(G2_G: np.ndarray, r_G: np.ndarray, ekin: float, o_G: np.ndarray) -> None: ... gpaw-24.1.0/c/000077500000000000000000000000001454550013000127535ustar00rootroot00000000000000gpaw-24.1.0/c/_gpaw.h000066400000000000000000000453671454550013000142400ustar00rootroot00000000000000/* Copyright (C) 2003-2007 CAMP * Copyright (C) 2007-2009 CAMd * Copyright (C) 2007-2010 CSC - IT Center for Science Ltd. * Please see the accompanying LICENSE file for further information. */ #include #define PY_ARRAY_UNIQUE_SYMBOL GPAW_ARRAY_API #include #ifdef PARALLEL #include #endif #ifndef GPAW_WITHOUT_LIBXC #include #endif #ifdef GPAW_HPM PyObject* ibm_hpm_start(PyObject *self, PyObject *args); PyObject* ibm_hpm_stop(PyObject *self, PyObject *args); PyObject* ibm_mpi_start(PyObject *self); PyObject* ibm_mpi_stop(PyObject *self); #endif #ifdef CRAYPAT #include PyObject* craypat_region_begin(PyObject *self, PyObject *args); PyObject* craypat_region_end(PyObject *self, PyObject *args); #endif PyObject* symmetrize(PyObject *self, PyObject *args); PyObject* symmetrize_ft(PyObject *self, PyObject *args); PyObject* symmetrize_wavefunction(PyObject *self, PyObject *args); PyObject* symmetrize_return_index(PyObject *self, PyObject *args); PyObject* symmetrize_with_index(PyObject *self, PyObject *args); PyObject* map_k_points(PyObject *self, PyObject *args); PyObject* GG_shuffle(PyObject *self, PyObject *args); PyObject* tetrahedron_weight(PyObject *self, PyObject *args); #ifndef GPAW_WITHOUT_BLAS PyObject* mmm(PyObject *self, PyObject *args); PyObject* rk(PyObject *self, PyObject *args); PyObject* r2k(PyObject *self, PyObject *args); #endif PyObject* NewOperatorObject(PyObject *self, PyObject *args); PyObject* NewWOperatorObject(PyObject *self, PyObject *args); PyObject* NewSplineObject(PyObject *self, PyObject *args); PyObject* NewTransformerObject(PyObject *self, PyObject *args); PyObject* pc_potential(PyObject *self, PyObject *args); PyObject* add_to_density(PyObject *self, PyObject *args); PyObject* utilities_gaussian_wave(PyObject *self, PyObject *args); PyObject* pack(PyObject *self, PyObject *args); PyObject* unpack(PyObject *self, PyObject *args); PyObject* unpack_complex(PyObject *self, PyObject *args); PyObject* hartree(PyObject *self, PyObject *args); PyObject* integrate_outwards(PyObject *self, PyObject *args); PyObject* integrate_inwards(PyObject *self, PyObject *args); PyObject* localize(PyObject *self, PyObject *args); PyObject* NewXCFunctionalObject(PyObject *self, PyObject *args); #ifndef GPAW_WITHOUT_LIBXC PyObject* NewlxcXCFunctionalObject(PyObject *self, PyObject *args); PyObject* lxcXCFuncNum(PyObject *self, PyObject *args); #endif PyObject* exterior_electron_density_region(PyObject *self, PyObject *args); PyObject* plane_wave_grid(PyObject *self, PyObject *args); PyObject* tci_overlap(PyObject *self, PyObject *args); PyObject *pwlfc_expand(PyObject *self, PyObject *args); PyObject *pw_insert(PyObject *self, PyObject *args); PyObject *pw_precond(PyObject *self, PyObject *args); PyObject *fd_precond(PyObject *self, PyObject *args); PyObject* vdw(PyObject *self, PyObject *args); PyObject* vdw2(PyObject *self, PyObject *args); PyObject* spherical_harmonics(PyObject *self, PyObject *args); PyObject* spline_to_grid(PyObject *self, PyObject *args); PyObject* NewLFCObject(PyObject *self, PyObject *args); #ifdef PARALLEL PyObject* globally_broadcast_bytes(PyObject *self, PyObject *args); #endif #if defined(GPAW_WITH_SL) && defined(PARALLEL) PyObject* new_blacs_context(PyObject *self, PyObject *args); PyObject* get_blacs_gridinfo(PyObject* self, PyObject *args); PyObject* get_blacs_local_shape(PyObject* self, PyObject *args); PyObject* blacs_destroy(PyObject *self, PyObject *args); PyObject* scalapack_set(PyObject *self, PyObject *args); PyObject* scalapack_redist(PyObject *self, PyObject *args); PyObject* scalapack_diagonalize_dc(PyObject *self, PyObject *args); PyObject* scalapack_diagonalize_ex(PyObject *self, PyObject *args); #ifdef GPAW_MR3 PyObject* scalapack_diagonalize_mr3(PyObject *self, PyObject *args); #endif PyObject* scalapack_general_diagonalize_dc(PyObject *self, PyObject *args); PyObject* scalapack_general_diagonalize_ex(PyObject *self, PyObject *args); #ifdef GPAW_MR3 PyObject* scalapack_general_diagonalize_mr3(PyObject *self, PyObject *args); #endif PyObject* scalapack_inverse_cholesky(PyObject *self, PyObject *args); PyObject* scalapack_inverse(PyObject *self, PyObject *args); PyObject* scalapack_solve(PyObject *self, PyObject *args); PyObject* pblas_tran(PyObject *self, PyObject *args); PyObject* pblas_gemm(PyObject *self, PyObject *args); PyObject* pblas_hemm_symm(PyObject *self, PyObject *args); PyObject* pblas_gemv(PyObject *self, PyObject *args); PyObject* pblas_r2k(PyObject *self, PyObject *args); PyObject* pblas_rk(PyObject *self, PyObject *args); #if defined(GPAW_WITH_ELPA) #include PyObject* pyelpa_init(PyObject *self, PyObject *args); PyObject* pyelpa_uninit(PyObject *self, PyObject *args); PyObject* pyelpa_version(PyObject *self, PyObject *args); PyObject* pyelpa_allocate(PyObject *self, PyObject *args); PyObject* pyelpa_set(PyObject *self, PyObject *args); PyObject* pyelpa_set_comm(PyObject *self, PyObject *args); PyObject* pyelpa_setup(PyObject *self, PyObject *args); PyObject* pyelpa_diagonalize(PyObject *self, PyObject *args); PyObject* pyelpa_general_diagonalize(PyObject *self, PyObject *args); PyObject* pyelpa_constants(PyObject *self, PyObject *args); PyObject* pyelpa_deallocate(PyObject *self, PyObject *args); #endif // GPAW_WITH_ELPA #endif // GPAW_WITH_SL and PARALLEL #ifdef GPAW_WITH_FFTW PyObject * FFTWPlan(PyObject *self, PyObject *args); PyObject * FFTWExecute(PyObject *self, PyObject *args); PyObject * FFTWDestroy(PyObject *self, PyObject *args); #endif // Threading PyObject* get_num_threads(PyObject *self, PyObject *args); #ifdef GPAW_PAPI PyObject* papi_mem_info(PyObject *self, PyObject *args); #endif #ifdef GPAW_WITH_LIBVDWXC PyObject* libvdwxc_create(PyObject *self, PyObject *args); PyObject* libvdwxc_has(PyObject* self, PyObject *args); PyObject* libvdwxc_init_serial(PyObject *self, PyObject *args); PyObject* libvdwxc_calculate(PyObject *self, PyObject *args); PyObject* libvdwxc_tostring(PyObject *self, PyObject *args); PyObject* libvdwxc_free(PyObject* self, PyObject* args); PyObject* libvdwxc_init_mpi(PyObject* self, PyObject* args); PyObject* libvdwxc_init_pfft(PyObject* self, PyObject* args); #endif // GPAW_WITH_LIBVDWXC #ifdef GPAW_GITHASH // For converting contents of a macro to a string, see // https://en.wikipedia.org/wiki/C_preprocessor#Token_stringification #define STR(s) #s #define XSTR(s) STR(s) PyObject* githash(PyObject* self, PyObject* args) { return Py_BuildValue("s", XSTR(GPAW_GITHASH)); } #undef XSTR #undef STR #endif // GPAW_GITHASH // Holonomic constraints PyObject* adjust_positions(PyObject *self, PyObject *args); PyObject* adjust_momenta(PyObject *self, PyObject *args); // TIP3P forces PyObject* calculate_forces_H2O(PyObject *self, PyObject *args); #ifdef GPAW_GPU PyObject* gpaw_gpu_init(PyObject *self, PyObject *args); PyObject* gpaw_gpu_delete(PyObject *self, PyObject *args); PyObject* csign_gpu(PyObject *self, PyObject *args); PyObject* scal_gpu(PyObject *self, PyObject *args); PyObject* multi_scal_gpu(PyObject *self, PyObject *args); PyObject* mmm_gpu(PyObject *self, PyObject *args); PyObject* gemm_gpu(PyObject *self, PyObject *args); PyObject* gemv_gpu(PyObject *self, PyObject *args); PyObject* rk_gpu(PyObject *self, PyObject *args); PyObject* axpy_gpu(PyObject *self, PyObject *args); PyObject* multi_axpy_gpu(PyObject *self, PyObject *args); PyObject* r2k_gpu(PyObject *self, PyObject *args); PyObject* dotc_gpu(PyObject *self, PyObject *args); PyObject* dotu_gpu(PyObject *self, PyObject *args); PyObject* multi_dotu_gpu(PyObject *self, PyObject *args); PyObject* multi_dotc_gpu(PyObject *self, PyObject *args); PyObject* add_linear_field_gpu(PyObject *self, PyObject *args); PyObject* elementwise_multiply_add_gpu(PyObject *self, PyObject *args); PyObject* multi_elementwise_multiply_add_gpu(PyObject *self, PyObject *args); PyObject* ax2py_gpu(PyObject *self, PyObject *args); PyObject* multi_ax2py_gpu(PyObject *self, PyObject *args); PyObject* axpbyz_gpu(PyObject *self, PyObject *args); PyObject* axpbz_gpu(PyObject *self, PyObject *args); PyObject* fill_gpu(PyObject *self, PyObject *args); PyObject* pwlfc_expand_gpu(PyObject *self, PyObject *args); PyObject* pw_insert_gpu(PyObject *self, PyObject *args); PyObject* add_to_density_gpu(PyObject* self, PyObject* args); PyObject* dH_aii_times_P_ani_gpu(PyObject* self, PyObject* args); PyObject* evaluate_lda_gpu(PyObject* self, PyObject* args); PyObject* evaluate_pbe_gpu(PyObject* self, PyObject* args); PyObject* calculate_residual_gpu(PyObject* self, PyObject* args); #endif static PyMethodDef functions[] = { {"symmetrize", symmetrize, METH_VARARGS, 0}, {"symmetrize_ft", symmetrize_ft, METH_VARARGS, 0}, {"symmetrize_wavefunction", symmetrize_wavefunction, METH_VARARGS, 0}, {"symmetrize_return_index", symmetrize_return_index, METH_VARARGS, 0}, {"symmetrize_with_index", symmetrize_with_index, METH_VARARGS, 0}, {"map_k_points", map_k_points, METH_VARARGS, 0}, {"GG_shuffle", GG_shuffle, METH_VARARGS, 0}, {"tetrahedron_weight", tetrahedron_weight, METH_VARARGS, 0}, #ifndef GPAW_WITHOUT_BLAS {"mmm", mmm, METH_VARARGS, 0}, {"rk", rk, METH_VARARGS, 0}, {"r2k", r2k, METH_VARARGS, 0}, #endif {"Operator", NewOperatorObject, METH_VARARGS, 0}, {"WOperator", NewWOperatorObject, METH_VARARGS, 0}, {"Spline", NewSplineObject, METH_VARARGS, 0}, {"Transformer", NewTransformerObject, METH_VARARGS, 0}, {"add_to_density", add_to_density, METH_VARARGS, 0}, {"utilities_gaussian_wave", utilities_gaussian_wave, METH_VARARGS, 0}, {"eed_region", exterior_electron_density_region, METH_VARARGS, 0}, {"plane_wave_grid", plane_wave_grid, METH_VARARGS, 0}, {"pwlfc_expand", pwlfc_expand, METH_VARARGS, 0}, {"pw_insert", pw_insert, METH_VARARGS, 0}, {"pw_precond", pw_precond, METH_VARARGS, 0}, {"fd_precond", fd_precond, METH_VARARGS, 0}, {"pack", pack, METH_VARARGS, 0}, {"unpack", unpack, METH_VARARGS, 0}, {"unpack_complex", unpack_complex, METH_VARARGS, 0}, {"hartree", hartree, METH_VARARGS, 0}, {"integrate_outwards", integrate_outwards, METH_VARARGS, 0}, {"integrate_inwards", integrate_inwards, METH_VARARGS, 0}, {"localize", localize, METH_VARARGS, 0}, {"XCFunctional", NewXCFunctionalObject, METH_VARARGS, 0}, #ifndef GPAW_WITHOUT_LIBXC {"lxcXCFunctional", NewlxcXCFunctionalObject, METH_VARARGS, 0}, {"lxcXCFuncNum", lxcXCFuncNum, METH_VARARGS, 0}, #endif {"tci_overlap", tci_overlap, METH_VARARGS, 0}, {"vdw", vdw, METH_VARARGS, 0}, {"vdw2", vdw2, METH_VARARGS, 0}, {"spherical_harmonics", spherical_harmonics, METH_VARARGS, 0}, {"pc_potential", pc_potential, METH_VARARGS, 0}, {"spline_to_grid", spline_to_grid, METH_VARARGS, 0}, {"LFC", NewLFCObject, METH_VARARGS, 0}, #ifdef PARALLEL {"globally_broadcast_bytes", globally_broadcast_bytes, METH_VARARGS, 0}, #endif {"get_num_threads", get_num_threads, METH_VARARGS, 0}, #if defined(GPAW_WITH_SL) && defined(PARALLEL) {"new_blacs_context", new_blacs_context, METH_VARARGS, NULL}, {"get_blacs_gridinfo", get_blacs_gridinfo, METH_VARARGS, NULL}, {"get_blacs_local_shape", get_blacs_local_shape, METH_VARARGS, NULL}, {"blacs_destroy", blacs_destroy, METH_VARARGS, 0}, {"scalapack_set", scalapack_set, METH_VARARGS, 0}, {"scalapack_redist", scalapack_redist, METH_VARARGS, 0}, {"scalapack_diagonalize_dc", scalapack_diagonalize_dc, METH_VARARGS, 0}, {"scalapack_diagonalize_ex", scalapack_diagonalize_ex, METH_VARARGS, 0}, #ifdef GPAW_MR3 {"scalapack_diagonalize_mr3", scalapack_diagonalize_mr3, METH_VARARGS, 0}, #endif // GPAW_MR3 {"scalapack_general_diagonalize_dc", scalapack_general_diagonalize_dc, METH_VARARGS, 0}, {"scalapack_general_diagonalize_ex", scalapack_general_diagonalize_ex, METH_VARARGS, 0}, #ifdef GPAW_MR3 {"scalapack_general_diagonalize_mr3", scalapack_general_diagonalize_mr3, METH_VARARGS, 0}, #endif // GPAW_MR3 {"scalapack_inverse_cholesky", scalapack_inverse_cholesky, METH_VARARGS, 0}, {"scalapack_inverse", scalapack_inverse, METH_VARARGS, 0}, {"scalapack_solve", scalapack_solve, METH_VARARGS, 0}, {"pblas_tran", pblas_tran, METH_VARARGS, 0}, {"pblas_gemm", pblas_gemm, METH_VARARGS, 0}, {"pblas_hemm_symm", pblas_hemm_symm, METH_VARARGS, 0}, {"pblas_gemv", pblas_gemv, METH_VARARGS, 0}, {"pblas_r2k", pblas_r2k, METH_VARARGS, 0}, {"pblas_rk", pblas_rk, METH_VARARGS, 0}, #if defined(GPAW_WITH_ELPA) {"pyelpa_init", pyelpa_init, METH_VARARGS, 0}, {"pyelpa_uninit", pyelpa_uninit, METH_VARARGS, 0}, {"pyelpa_version", pyelpa_version, METH_VARARGS, 0}, {"pyelpa_allocate", pyelpa_allocate, METH_VARARGS, 0}, {"pyelpa_set", pyelpa_set, METH_VARARGS, 0}, {"pyelpa_setup", pyelpa_setup, METH_VARARGS, 0}, {"pyelpa_set_comm", pyelpa_set_comm, METH_VARARGS, 0}, {"pyelpa_diagonalize", pyelpa_diagonalize, METH_VARARGS, 0}, {"pyelpa_general_diagonalize", pyelpa_general_diagonalize, METH_VARARGS, 0}, {"pyelpa_constants", pyelpa_constants, METH_VARARGS, 0}, {"pyelpa_deallocate", pyelpa_deallocate, METH_VARARGS, 0}, #endif // GPAW_WITH_ELPA #endif // GPAW_WITH_SL && PARALLEL #ifdef GPAW_WITH_FFTW {"FFTWPlan", FFTWPlan, METH_VARARGS, 0}, {"FFTWExecute", FFTWExecute, METH_VARARGS, 0}, {"FFTWDestroy", FFTWDestroy, METH_VARARGS, 0}, #endif #ifdef GPAW_HPM {"hpm_start", ibm_hpm_start, METH_VARARGS, 0}, {"hpm_stop", ibm_hpm_stop, METH_VARARGS, 0}, {"mpi_start", (PyCFunction) ibm_mpi_start, METH_NOARGS, 0}, {"mpi_stop", (PyCFunction) ibm_mpi_stop, METH_NOARGS, 0}, #endif // GPAW_HPM #ifdef CRAYPAT {"craypat_region_begin", craypat_region_begin, METH_VARARGS, 0}, {"craypat_region_end", craypat_region_end, METH_VARARGS, 0}, #endif // CRAYPAT #ifdef GPAW_PAPI {"papi_mem_info", papi_mem_info, METH_VARARGS, 0}, #endif // GPAW_PAPI #ifdef GPAW_WITH_LIBVDWXC {"libvdwxc_create", libvdwxc_create, METH_VARARGS, 0}, {"libvdwxc_has", libvdwxc_has, METH_VARARGS, 0}, {"libvdwxc_init_serial", libvdwxc_init_serial, METH_VARARGS, 0}, {"libvdwxc_calculate", libvdwxc_calculate, METH_VARARGS, 0}, {"libvdwxc_tostring", libvdwxc_tostring, METH_VARARGS, 0}, {"libvdwxc_free", libvdwxc_free, METH_VARARGS, 0}, {"libvdwxc_init_mpi", libvdwxc_init_mpi, METH_VARARGS, 0}, {"libvdwxc_init_pfft", libvdwxc_init_pfft, METH_VARARGS, 0}, #endif // GPAW_WITH_LIBVDWXC {"adjust_positions", adjust_positions, METH_VARARGS, 0}, {"adjust_momenta", adjust_momenta, METH_VARARGS, 0}, {"calculate_forces_H2O", calculate_forces_H2O, METH_VARARGS, 0}, #ifdef GPAW_GITHASH {"githash", githash, METH_VARARGS, 0}, #endif // GPAW_GITHASH #ifdef GPAW_GPU {"gpaw_gpu_init", gpaw_gpu_init, METH_VARARGS, 0}, {"gpaw_gpu_delete", gpaw_gpu_delete, METH_VARARGS, 0}, {"csign_gpu", csign_gpu, METH_VARARGS, 0}, {"scal_gpu", scal_gpu, METH_VARARGS, 0}, {"multi_scal_gpu", multi_scal_gpu, METH_VARARGS, 0}, {"mmm_gpu", mmm_gpu, METH_VARARGS, 0}, {"gemm_gpu", gemm_gpu, METH_VARARGS, 0}, {"gemv_gpu", gemv_gpu, METH_VARARGS, 0}, {"axpy_gpu", axpy_gpu, METH_VARARGS, 0}, {"multi_axpy_gpu", multi_axpy_gpu, METH_VARARGS, 0}, {"rk_gpu", rk_gpu, METH_VARARGS, 0}, {"r2k_gpu", r2k_gpu, METH_VARARGS, 0}, {"dotc_gpu", dotc_gpu, METH_VARARGS, 0}, {"dotu_gpu", dotu_gpu, METH_VARARGS, 0}, {"multi_dotu_gpu", multi_dotu_gpu, METH_VARARGS, 0}, {"multi_dotc_gpu", multi_dotc_gpu, METH_VARARGS, 0}, {"add_linear_field_gpu", add_linear_field_gpu, METH_VARARGS, 0}, {"elementwise_multiply_add_gpu", elementwise_multiply_add_gpu, METH_VARARGS, 0}, {"multi_elementwise_multiply_add_gpu", multi_elementwise_multiply_add_gpu, METH_VARARGS, 0}, {"ax2py_gpu", ax2py_gpu, METH_VARARGS, 0}, {"multi_ax2py_gpu", multi_ax2py_gpu, METH_VARARGS, 0}, {"axpbyz_gpu", axpbyz_gpu, METH_VARARGS, 0}, {"axpbz_gpu", axpbz_gpu, METH_VARARGS, 0}, {"fill_gpu", fill_gpu, METH_VARARGS, 0}, {"pwlfc_expand_gpu", pwlfc_expand_gpu, METH_VARARGS, 0}, {"pw_insert_gpu", pw_insert_gpu, METH_VARARGS, 0}, {"add_to_density_gpu", add_to_density_gpu, METH_VARARGS, 0}, {"dH_aii_times_P_ani_gpu", dH_aii_times_P_ani_gpu, METH_VARARGS, 0}, {"evaluate_lda_gpu", evaluate_lda_gpu, METH_VARARGS, 0}, {"evaluate_pbe_gpu", evaluate_pbe_gpu, METH_VARARGS, 0}, {"calculate_residuals_gpu", calculate_residual_gpu, METH_VARARGS, 0}, #endif // GPAW_GPU {0, 0, 0, 0} }; #ifdef PARALLEL extern PyTypeObject MPIType; extern PyTypeObject GPAW_MPI_Request_type; #endif extern PyTypeObject LFCType; extern PyTypeObject OperatorType; extern PyTypeObject WOperatorType; extern PyTypeObject SplineType; extern PyTypeObject TransformerType; extern PyTypeObject XCFunctionalType; #ifndef GPAW_WITHOUT_LIBXC extern PyTypeObject lxcXCFunctionalType; #endif static struct PyModuleDef moduledef = { PyModuleDef_HEAD_INIT, "_gpaw", "C-extension for GPAW", -1, functions, NULL, NULL, NULL, NULL }; static PyObject* moduleinit(void) { #ifdef PARALLEL if (PyType_Ready(&MPIType) < 0) return NULL; if (PyType_Ready(&GPAW_MPI_Request_type) < 0) return NULL; #endif if (PyType_Ready(&LFCType) < 0) return NULL; if (PyType_Ready(&OperatorType) < 0) return NULL; if (PyType_Ready(&WOperatorType) < 0) return NULL; if (PyType_Ready(&SplineType) < 0) return NULL; if (PyType_Ready(&TransformerType) < 0) return NULL; if (PyType_Ready(&XCFunctionalType) < 0) return NULL; #ifndef GPAW_WITHOUT_LIBXC if (PyType_Ready(&lxcXCFunctionalType) < 0) return NULL; #endif PyObject* m = PyModule_Create(&moduledef); if (m == NULL) return NULL; #ifdef PARALLEL Py_INCREF(&MPIType); Py_INCREF(&GPAW_MPI_Request_type); PyModule_AddObject(m, "Communicator", (PyObject *)&MPIType); #endif #ifndef GPAW_WITHOUT_LIBXC # if XC_MAJOR_VERSION >= 3 PyObject_SetAttrString(m, "libxc_version", PyUnicode_FromString(xc_version_string())); # endif #endif #ifdef GPAW_GPU PyObject_SetAttrString(m, "GPU_ENABLED", Py_True); #else PyObject_SetAttrString(m, "GPU_ENABLED", Py_False); #endif #ifdef GPAW_GPU_AWARE_MPI PyObject_SetAttrString(m, "gpu_aware_mpi", Py_True); #else PyObject_SetAttrString(m, "gpu_aware_mpi", Py_False); #endif #ifdef _OPENMP PyObject_SetAttrString(m, "have_openmp", Py_True); #else PyObject_SetAttrString(m, "have_openmp", Py_False); #endif // Version number of C-code. Keep in sync with gpaw/__init__.py PyObject_SetAttrString(m, "version", PyLong_FromLong(4)); Py_INCREF(&LFCType); Py_INCREF(&OperatorType); Py_INCREF(&WOperatorType); Py_INCREF(&SplineType); Py_INCREF(&TransformerType); Py_INCREF(&XCFunctionalType); #ifndef GPAW_WITHOUT_LIBXC Py_INCREF(&lxcXCFunctionalType); #endif return m; } gpaw-24.1.0/c/_gpaw_so.c000066400000000000000000000003521454550013000147150ustar00rootroot00000000000000#include "_gpaw.h" PyMODINIT_FUNC PyInit__gpaw(void) { // gpaw-python needs to import arrays at the right time, so this is // done in gpaw_main(). For _gpaw.so, we do it here: import_array1(0); return moduleinit(); } gpaw-24.1.0/c/array.h000066400000000000000000000140371454550013000142470ustar00rootroot00000000000000// In the code, one utilizes calls equvalent to PyArray API, // except instead of PyArray_BYTES one uses Array_BYTES. // Then, if GPAW is built with GPAW_GPU_AWARE_MPI define, these macros are rewritten with wrappers. #ifndef GPAW_ARRAY_ALLOW_CUPY #ifdef GPAW_ARRAY_DISABLE_NUMPY #error "No CPAW_ARRAY_ALLOW_CUPY and GPAW_ARRAY_DISABLE_NUMPY is set. No array interfaces remain." #endif // Check that array is well-behaved and contains data that can be sent. #define CHK_ARRAY(a) if ((a) == NULL || !PyArray_Check(a) \ || !PyArray_ISCARRAY((PyArrayObject*)a) \ || !PyArray_ISNUMBER((PyArrayObject*)a)) { \ PyErr_SetString(PyExc_TypeError, \ "Not a proper NumPy array for MPI communication."); \ return NULL; } else // Check that array is well-behaved, read-only and contains data that // can be sent. #define CHK_ARRAY_RO(a) if ((a) == NULL || !PyArray_Check(a) \ || !PyArray_ISCARRAY_RO((PyArrayObject*)a) \ || !PyArray_ISNUMBER((PyArrayObject*)a)) { \ PyErr_SetString(PyExc_TypeError, \ "Not a proper NumPy array for MPI communication."); \ return NULL; } else // Check that two arrays have the same type, and the size of the // second is a given multiple of the size of the first #define CHK_ARRAYS(a,b,n) \ if ((PyArray_TYPE((PyArrayObject*)a) != PyArray_TYPE((PyArrayObject*)b)) \ || (PyArray_SIZE((PyArrayObject*)b) != PyArray_SIZE((PyArrayObject*)a) * n)) { \ PyErr_SetString(PyExc_ValueError, \ "Incompatible array types or sizes."); \ return NULL; } else #define Array_NDIM(a) PyArray_NDIM((PyArrayObject*)a) #define Array_DIM(a,d) PyArray_DIM((PyArrayObject*)a,d) #define Array_ITEMSIZE(a) PyArray_ITEMSIZE((PyArrayObject*)a) #define Array_BYTES(a) PyArray_BYTES((PyArrayObject*)a) #define Array_DATA(a) PyArray_DATA((PyArrayObject*)a) #define Array_SIZE(a) PyArray_SIZE((PyArrayObject*)a) #define Array_TYPE(a) PyArray_TYPE((PyArrayObject*)a) #define Array_NBYTES(a) PyArray_NBYTES((PyArrayObject*)a) #define Array_ISCOMPLEX(a) PyArray_ISCOMPLEX((PyArrayObject*)a) #else // GPAW_ARRAY_ALLOW_CUPY #define CHK_ARRAY(a) // TODO #define CHK_ARRAY_RO(a) // TODO #define CHK_ARRAYS(a,b,n) // TODO #include static inline int Array_NDIM(PyObject* obj) { #ifndef GPAW_ARRAY_DISABLE_NUMPY if (PyArray_Check(obj)) { return PyArray_NDIM((PyArrayObject*)obj); } #endif // return len(obj.shape) PyObject* shape = PyObject_GetAttrString(obj, "shape"); if (shape == NULL) return -1; Py_DECREF(shape); return PyTuple_Size(shape); } static inline int Array_DIM(PyObject* obj, int dim) { #ifndef GPAW_ARRAY_DISABLE_NUMPY if (PyArray_Check(obj)) { return PyArray_DIM((PyArrayObject*)obj, dim); } #endif PyObject* shape = PyObject_GetAttrString(obj, "shape"); if (shape == NULL) return -1; PyObject* pydim = PyTuple_GetItem(shape, dim); Py_DECREF(shape); if (pydim == NULL) return -1; int value = (int) PyLong_AS_LONG(pydim); return value; } static inline char* Array_BYTES(PyObject* obj) { #ifndef GPAW_ARRAY_DISABLE_NUMPY if (PyArray_Check(obj)) { return PyArray_BYTES((PyArrayObject*)obj); } #endif // Equivalent to obj.data.ptr PyObject* ndarray_data = PyObject_GetAttrString(obj, "data"); if (ndarray_data == NULL) return NULL; PyObject* ptr_data = PyObject_GetAttrString(ndarray_data, "ptr"); if (ptr_data == NULL) return NULL; char* ptr = (char*) PyLong_AS_LONG(ptr_data); Py_DECREF(ptr_data); Py_DECREF(ndarray_data); return ptr; } #define Array_DATA(a) ((void*) Array_BYTES(a)) static inline int Array_SIZE(PyObject* obj) { PyObject* size = PyObject_GetAttrString(obj, "size"); int arraysize = (int) PyLong_AS_LONG(size); Py_DECREF(size); return arraysize; } static inline int Array_TYPE(PyObject* obj) { #ifndef GPAW_ARRAY_DISABLE_NUMPY if (PyArray_Check(obj)) { return PyArray_TYPE((PyArrayObject*)obj); } #endif PyObject* dtype = PyObject_GetAttrString(obj, "dtype"); if (dtype == NULL) return -1; PyObject* num = PyObject_GetAttrString(dtype, "num"); Py_DECREF(dtype); if (num == NULL) return -1; int value = (int) PyLong_AS_LONG(num); Py_DECREF(num); return value; } static inline int Array_ITEMSIZE(PyObject* obj) { #ifndef GPAW_ARRAY_DISABLE_NUMPY if (PyArray_Check(obj)) { return PyArray_ITEMSIZE((PyArrayObject*)obj); } #endif PyObject* dtype = PyObject_GetAttrString(obj, "dtype"); if (dtype == NULL) return -1; PyObject* itemsize_obj = PyObject_GetAttrString(dtype, "itemsize"); if (itemsize_obj == NULL) return -1; int itemsize = (int) PyLong_AS_LONG(itemsize_obj); Py_DECREF(itemsize_obj); Py_DECREF(dtype); return itemsize; } static inline long Array_NBYTES(PyObject* obj) { #ifndef GPAW_ARRAY_DISABLE_NUMPY if (PyArray_Check(obj)) { return PyArray_NBYTES((PyArrayObject*)obj); } #endif PyObject* nbytes = PyObject_GetAttrString(obj, "nbytes"); long nbytesvalue = PyLong_AS_LONG(nbytes); Py_DECREF(nbytes); return nbytesvalue; } static inline int Array_ISCOMPLEX(PyObject* obj) { int result = PyTypeNum_ISCOMPLEX(Array_TYPE(obj)); return result; } static inline void print_array_info(PyObject* obj) { if (PyArray_Check(obj)) { printf("numpy "); } if (Array_ISCOMPLEX(obj)) { printf("complex "); } printf("itemsize: %d", Array_ITEMSIZE(obj)); printf("typenum %d", Array_TYPE(obj)); printf("shape: ["); for (int i=0; i #include #include #include boundary_conditions* bc_init(const long size1[3], const long padding[3][2], const long npadding[3][2], const long neighbors[3][2], MPI_Comm comm, bool real, bool cfd) { boundary_conditions* bc = GPAW_MALLOC(boundary_conditions, 1); for (int i = 0; i < 3; i++) { bc->size1[i] = size1[i]; bc->size2[i] = size1[i] + padding[i][0] + padding[i][1]; bc->padding[i] = padding[i][0]; } bc->comm = comm; bc->ndouble = (real ? 1 : 2); bc->cfd = cfd; int rank = 0; if (comm != MPI_COMM_NULL) MPI_Comm_rank(comm, &rank); int start[3]; int size[3]; for (int i = 0; i < 3; i++) { start[i] = padding[i][0]; size[i] = size1[i]; } for (int i = 0; i < 3; i++) { int n = bc->ndouble; for (int j = 0; j < 3; j++) if (j != i) n *= size[j]; for (int d = 0; d < 2; d++) { int ds = npadding[i][d]; int dr = padding[i][d]; for (int j = 0; j < 3; j++) { bc->sendstart[i][d][j] = start[j]; bc->sendsize[i][d][j] = size[j]; bc->recvstart[i][d][j] = start[j]; bc->recvsize[i][d][j] = size[j]; } if (d == 0) { bc->sendstart[i][d][i] = dr; bc->recvstart[i][d][i] = 0; } else { bc->sendstart[i][d][i] = padding[i][0] + size1[i] - ds; bc->recvstart[i][d][i] = padding[i][0] + size1[i]; } bc->sendsize[i][d][i] = ds; bc->recvsize[i][d][i] = dr; bc->sendproc[i][d] = DO_NOTHING; bc->recvproc[i][d] = DO_NOTHING; bc->nsend[i][d] = 0; bc->nrecv[i][d] = 0; int p = neighbors[i][d]; if (p == rank) { if (ds > 0) bc->sendproc[i][d] = COPY_DATA; if (dr > 0) bc->recvproc[i][d] = COPY_DATA; } else if (p >= 0) { // Communication required: if (ds > 0) { bc->sendproc[i][d] = p; bc->nsend[i][d] = n * ds; } if (dr > 0) { bc->recvproc[i][d] = p; bc->nrecv[i][d] = n * dr; } } } if (cfd == 0) { start[i] = 0; size[i] = bc->size2[i]; } // If the two neighboring processors along the // i'th axis are the same, then we join the two communications // into one: #ifndef GPAW_GPU bc->rjoin[i] = ((bc->recvproc[i][0] == bc->recvproc[i][1]) && bc->recvproc[i][0] >= 0); bc->sjoin[i] = ((bc->sendproc[i][0] == bc->sendproc[i][1]) && bc->sendproc[i][0] >= 0); #else bc->rjoin[i] = 0; bc->sjoin[i] = 0; #endif } bc->maxsend = 0; bc->maxrecv = 0; for (int i = 0; i < 3; i++) { int n = bc->nsend[i][0] + bc->nsend[i][1]; if (n > bc->maxsend) bc->maxsend = n; n = bc->nrecv[i][0] + bc->nrecv[i][1]; if (n > bc->maxrecv) bc->maxrecv = n; } #ifdef GPAW_GPU bc_init_gpu(bc); #endif return bc; } void bc_unpack1(const boundary_conditions* bc, const double* aa1, double* aa2, int i, MPI_Request recvreq[2], MPI_Request sendreq[2], double* rbuff, double* sbuff, const double_complex phases[2], int thd, int nin) { int ng = bc->ndouble * bc->size1[0] * bc->size1[1] * bc->size1[2]; int ng2 = bc->ndouble * bc->size2[0] * bc->size2[1] * bc->size2[2]; bool real = (bc->ndouble == 1); for (int m = 0; m < nin; m++) // Copy data: if (i == 0) { // Zero all of a2 array. We should only zero the bounaries // that are not periodic, but it's simpler to zero everything! // XXX memset(aa2 + m * ng2, 0, ng2 * sizeof(double)); // Copy data from a1 to central part of a2: if (real) bmgs_paste(aa1 + m * ng, bc->size1, aa2 + m * ng2, bc->size2, bc->sendstart[0][0]); else bmgs_pastez((const double_complex*)(aa1 + m * ng), bc->size1, (double_complex*)(aa2 + m * ng2), bc->size2, bc->sendstart[0][0]); } #ifdef PARALLEL // Start receiving. for (int d = 0; d < 2; d++) { int p = bc->recvproc[i][d]; if (p >= 0) { if (bc->rjoin[i]) { if (d == 0) MPI_Irecv(rbuff, (bc->nrecv[i][0] + bc->nrecv[i][1]) * nin, MPI_DOUBLE, p, 10 * thd + 1000 * i + 100000, bc->comm, &recvreq[0]); } else { MPI_Irecv(rbuff, bc->nrecv[i][d] * nin, MPI_DOUBLE, p, d + 10 * thd + 1000 * i, bc->comm, &recvreq[d]); rbuff += bc->nrecv[i][d] * nin; } } } // Prepare send-buffers and start sending: double* sbuf = sbuff; double* sbuf0 = sbuff; for (int d = 0; d < 2; d++) { sendreq[d] = 0; int p = bc->sendproc[i][d]; if (p >= 0) { const int* start = bc->sendstart[i][d]; const int* size = bc->sendsize[i][d]; for (int m = 0; m < nin; m++) if (real) bmgs_cut(aa2 + m * ng2, bc->size2, start, sbuf + m * bc->nsend[i][d], size); else bmgs_cutmz((const double_complex*)(aa2 + m * ng2), bc->size2, start, (double_complex*)(sbuf + m * bc->nsend[i][d]), size, phases[d]); if (bc->sjoin[i]) { if (d == 1) { MPI_Isend(sbuf0, (bc->nsend[i][0] + bc->nsend[i][1]) * nin, MPI_DOUBLE, p, 10 * thd + 1000 * i + 100000, bc->comm, &sendreq[0]); } } else { MPI_Isend(sbuf, bc->nsend[i][d] * nin, MPI_DOUBLE, p, 1 - d + 10 * thd + 1000 * i, bc->comm, &sendreq[d]); } sbuf += bc->nsend[i][d] * nin; } } #endif // Parallel for (int m = 0; m < nin; m++) { // Copy data for periodic boundary conditions: for (int d = 0; d < 2; d++) if (bc->sendproc[i][d] == COPY_DATA) { if (real) bmgs_translate(aa2 + m * ng2, bc->size2, bc->sendsize[i][d], bc->sendstart[i][d], bc->recvstart[i][1 - d]); else bmgs_translatemz((double_complex*)(aa2 + m * ng2), bc->size2, bc->sendsize[i][d], bc->sendstart[i][d], bc->recvstart[i][1 - d], phases[d]); } } } void bc_unpack2(const boundary_conditions* bc, double* a2, int i, MPI_Request recvreq[2], MPI_Request sendreq[2], double* rbuf, int nin) { #ifdef PARALLEL int ng2 = bc->ndouble * bc->size2[0] * bc->size2[1] * bc->size2[2]; // Store data from receive-buffer: bool real = (bc->ndouble == 1); double* rbuf0 = rbuf; for (int d = 0; d < 2; d++) if (bc->recvproc[i][d] >= 0) { if (bc->rjoin[i]) { if (d == 0) { MPI_Wait(&recvreq[0], MPI_STATUS_IGNORE); rbuf += bc->nrecv[i][1] * nin; } else rbuf = rbuf0; } else MPI_Wait(&recvreq[d], MPI_STATUS_IGNORE); for (int m = 0; m < nin; m++) if (real) bmgs_paste(rbuf + m * bc->nrecv[i][d], bc->recvsize[i][d], a2 + m * ng2, bc->size2, bc->recvstart[i][d]); else bmgs_pastez((const double_complex*)(rbuf + m * bc->nrecv[i][d]), bc->recvsize[i][d], (double_complex*)(a2 + m * ng2), bc->size2, bc->recvstart[i][d]); rbuf += bc->nrecv[i][d] * nin; } // This does not work on the ibm with gcc! We do a blocking send instead. for (int d = 0; d < 2; d++) if (sendreq[d] != 0) MPI_Wait(&sendreq[d], MPI_STATUS_IGNORE); #endif // PARALLEL } gpaw-24.1.0/c/bc.h000066400000000000000000000050021454550013000135050ustar00rootroot00000000000000/* Copyright (C) 2003-2007 CAMP * Copyright (C) 2005 CSC - IT Center for Science Ltd. * Please see the accompanying LICENSE file for further information. */ #ifndef BC_H #define BC_H #include "bmgs/bmgs.h" #ifdef PARALLEL #include #else typedef int* MPI_Request; // !!!!!!!??????????? typedef int* MPI_Comm; #define MPI_COMM_NULL 0 #define MPI_Comm_rank(comm, rank) *(rank) = 0 #endif typedef struct { int size1[3]; int size2[3]; int sendstart[3][2][3]; int sendsize[3][2][3]; int recvstart[3][2][3]; int recvsize[3][2][3]; int sendproc[3][2]; int recvproc[3][2]; int nsend[3][2]; int nrecv[3][2]; int maxsend; int maxrecv; int padding[3]; bool sjoin[3]; bool rjoin[3]; int ndouble; bool cfd; MPI_Comm comm; #ifdef GPAW_GPU bool gpu_sjoin[3]; bool gpu_rjoin[3]; bool gpu_async[3]; #endif } boundary_conditions; static const int COPY_DATA = -2; static const int DO_NOTHING = -3; // ?????????? boundary_conditions* bc_init(const long size1[3], const long padding[3][2], const long npadding[3][2], const long neighbors[3][2], MPI_Comm comm, bool real, bool cfd); void bc_unpack1(const boundary_conditions* bc, const double* input, double* output, int i, MPI_Request recvreq[2], MPI_Request sendreq[2], double* rbuf, double* sbuf, const double_complex phases[2], int thd, int nin); void bc_unpack2(const boundary_conditions* bc, double* a2, int i, MPI_Request recvreq[2], MPI_Request sendreq[2], double* rbuf, int nin); #ifdef GPAW_GPU #include "gpu/gpu-runtime.h" void bc_init_gpu(boundary_conditions* bc); void bc_dealloc_gpu(int force); void bc_unpack_gpu(const boundary_conditions* bc, double* aa2, int i, MPI_Request recvreq[3][2], MPI_Request sendreq[2], const double_complex phases[2], gpuStream_t kernel_stream, int nin); void bc_unpack_gpu_async(const boundary_conditions* bc, double* aa2, int i, MPI_Request recvreq[3][2], MPI_Request sendreq[2], const double_complex phases[2], gpuStream_t kernel_stream, int nin); void bc_unpack_paste_gpu(boundary_conditions* bc, const double* aa1, double* aa2, MPI_Request recvreq[3][2], gpuStream_t thd, int nin); #endif #endif gpaw-24.1.0/c/blacs.c000066400000000000000000001565231454550013000142170ustar00rootroot00000000000000/* Copyright (C) 2003-2007 CAMP * Copyright (C) 2007-2009 CAMd * Copyright (C) 2010 Argonne National Laboratory * Please see the accompanying LICENSE file for further information. */ #ifdef PARALLEL #include #ifdef GPAW_WITH_SL #define PY_ARRAY_UNIQUE_SYMBOL GPAW_ARRAY_API #define NO_IMPORT_ARRAY #include #include #include #include #include "extensions.h" #include "mympi.h" // BLACS #define BLOCK_CYCLIC_2D 1 #ifdef GPAW_NO_UNDERSCORE_CBLACS #define Cblacs_barrier_ Cblacs_barrier #define Cblacs_gridexit_ Cblacs_gridexit #define Cblacs_gridinfo_ Cblacs_gridinfo #define Cblacs_gridinit_ Cblacs_gridinit #define Cblacs_pinfo_ Cblacs_pinfo #define Csys2blacs_handle_ Csys2blacs_handle #endif void Cblacs_barrier_(int ConTxt, char *scope); void Cblacs_gridexit_(int ConTxt); void Cblacs_gridinfo_(int ConTxt, int* nprow, int* npcol, int* myrow, int* mycol); void Cblacs_gridinit_(int* ConTxt, char* order, int nprow, int npcol); void Cblacs_pinfo_(int* mypnum, int* nprocs); int Csys2blacs_handle_(MPI_Comm SysCtxt); // End of BLACS // ScaLAPACK #ifdef GPAW_NO_UNDERSCORE_SCALAPACK #define numroc_ numroc #define pdlamch_ pdlamch #define pdlaset_ pdlaset #define pzlaset_ pzlaset #define pdpotrf_ pdpotrf #define pzpotrf_ pzpotrf #define pzpotri_ pzpotri #define pdtrtri_ pdtrtri #define pztrtri_ pztrtri #define pzgesv_ pzgesv #define pdgesv_ pdgesv #define pdsyevd_ pdsyevd #define pzheevd_ pzheevd #define pdsyevx_ pdsyevx #define pzheevx_ pzheevx #define pdsygvx_ pdsygvx #define pzhegvx_ pzhegvx #define pdsyngst_ pdsyngst #define pzhengst_ pzhengst #ifdef GPAW_MR3 #define pdsyevr_ pdsyevr #define pzheevr_ pzheevr #endif // GPAW_MR3 #define pdtran_ pdtran #define pztranc_ pztranc #define pztranu_ pztranu #define pdgemm_ pdgemm #define pzgemm_ pzgemm #define pdgemv_ pdgemv #define pzgemv_ pzgemv #define pdsyr2k_ pdsyr2k #define pzher2k_ pzher2k #define pdsyrk_ pdsyrk #define pzherk_ pzherk #define pdtrsm_ pdtrsm #define pztrsm_ pztrsm #define pzhemm_ pzhemm #define pzsymm_ pzsymm #define pdsymm_ pdsymm #endif #ifdef GPAW_NO_UNDERSCORE_CSCALAPACK #define Cpdgemr2d_ Cpdgemr2d #define Cpzgemr2d_ Cpzgemr2d #define Cpdtrmr2d_ Cpdtrmr2d #define Cpztrmr2d_ Cpztrmr2d #endif // tools int numroc_(int* n, int* nb, int* iproc, int* isrcproc, int* nprocs); void Cpdgemr2d_(int m, int n, double* a, int ia, int ja, int* desca, double* b, int ib, int jb, int* descb, int gcontext); void Cpzgemr2d_(int m, int n, void* a, int ia, int ja, int* desca, void* b, int ib, int jb, int* descb, int gcontext); void Cpdtrmr2d_(char* uplo, char* diag, int m, int n, double* a, int ia, int ja, int* desca, double* b, int ib, int jb, int* descb, int gcontext); void Cpztrmr2d_(char* uplo, char* diag, int m, int n, void* a, int ia, int ja, int* desca, void* b, int ib, int jb, int* descb, int gcontext); double pdlamch_(int* ictxt, char* cmach); void pzpotri_(char* uplo, int* n, void* a, int *ia, int* ja, int* desca, int* info); void pzgetri_(int* n, void* a, int *ia, int* ja, int* desca, int* info); void pdlaset_(char* uplo, int* m, int* n, double* alpha, double* beta, double* a, int* ia, int* ja, int* desca); void pzlaset_(char* uplo, int* m, int* n, void* alpha, void* beta, void* a, int* ia, int* ja, int* desca); // cholesky void pdpotrf_(char* uplo, int* n, double* a, int* ia, int* ja, int* desca, int* info); void pzpotrf_(char* uplo, int* n, void* a, int* ia, int* ja, int* desca, int* info); void pzgesv_(int* n, int* nrhs, void* a, int* ia, int* ja, int* desca, int* ipiv, void* b, int* ib, int* jb, int* descb, int* info); void pdgesv_(int *n, int *nrhs, void *a, int *ia, int *ja, int* desca, int *ipiv, void* b, int* ib, int* jb, int* descb, int* info); void pdtrtri_(char* uplo, char* diag, int* n, double* a, int *ia, int* ja, int* desca, int* info); void pztrtri_(char* uplo, char* diag, int* n, void* a, int *ia, int* ja, int* desca, int* info); // diagonalization void pdsyevd_(char* jobz, char* uplo, int* n, double* a, int* ia, int* ja, int* desca, double* w, double* z, int* iz, int* jz, int* descz, double* work, int* lwork, int* iwork, int* liwork, int* info); void pzheevd_(char* jobz, char* uplo, int* n, void* a, int* ia, int* ja, int* desca, double* w, void* z, int* iz, int* jz, int* descz, void* work, int* lwork, double* rwork, int* lrwork, int* iwork, int* liwork, int* info); void pdsyevx_(char* jobz, char* range, char* uplo, int* n, double* a, int* ia, int* ja, int* desca, double* vl, double* vu, int* il, int* iu, double* abstol, int* m, int* nz, double* w, double* orfac, double* z, int* iz, int* jz, int* descz, double* work, int* lwork, int* iwork, int* liwork, int* ifail, int* iclustr, double* gap, int* info); void pzheevx_(char* jobz, char* range, char* uplo, int* n, void* a, int* ia, int* ja, int* desca, double* vl, double* vu, int* il, int* iu, double* abstol, int* m, int* nz, double* w, double* orfac, void* z, int* iz, int* jz, int* descz, void* work, int* lwork, double* rwork, int* lrwork, int* iwork, int* liwork, int* ifail, int* iclustr, double* gap, int* info); void pdsygvx_(int* ibtype, char* jobz, char* range, char* uplo, int* n, double* a, int* ia, int* ja, int* desca, double* b, int *ib, int* jb, int* descb, double* vl, double* vu, int* il, int* iu, double* abstol, int* m, int* nz, double* w, double* orfac, double* z, int* iz, int* jz, int* descz, double* work, int* lwork, int* iwork, int* liwork, int* ifail, int* iclustr, double* gap, int* info); void pzhegvx_(int* ibtype, char* jobz, char* range, char* uplo, int* n, void* a, int* ia, int* ja, int* desca, void* b, int *ib, int* jb, int* descb, double* vl, double* vu, int* il, int* iu, double* abstol, int* m, int* nz, double* w, double* orfac, void* z, int* iz, int* jz, int* descz, void* work, int* lwork, double* rwork, int* lrwork, int* iwork, int* liwork, int* ifail, int* iclustr, double* gap, int* info); void pdsyngst_(int* ibtype, char* uplo, int* n, double* a, int* ia, int* ja, int* desca, double* b, int* ib, int* jb, int* descb, double* scale, double* work, int* lwork, int* info); void pzhengst_(int* ibtype, char* uplo, int* n, void* a, int* ia, int* ja, int* desca, void* b, int* ib, int* jb, int* descb, double* scale, void* work, int* lwork, int* info); #ifdef GPAW_MR3 void pdsyevr_(char* jobz, char* range, char* uplo, int* n, double* a, int* ia, int* ja, int* desca, double* vl, double* vu, int* il, int* iu, int* m, int* nz, double* w, double* z, int* iz, int* jz, int* descz, double* work, int* lwork, int* iwork, int* liwork, int* info); void pzheevr_(char* jobz, char* range, char* uplo, int* n, void* a, int* ia, int* ja, int* desca, double* vl, double* vu, int* il, int* iu, int* m, int* nz, double* w, void* z, int* iz, int* jz, int* descz, void* work, int* lwork, double* rwork, int* lrwork, int* iwork, int* liwork, int* info); #endif // GPAW_MR3 // pblas void pdtran_(int* m, int* n, double* alpha, double* a, int* ia, int* ja, int* desca, double* beta, double* c, int* ic, int* jc, int* descc); void pztranc_(int* m, int* n, void* alpha, void* a, int* ia, int* ja, int* desca, void* beta, void* c, int* ic, int* jc, int* descc); void pztranu_(int* m, int* n, void* alpha, void* a, int* ia, int* ja, int* desca, void* beta, void* c, int* ic, int* jc, int* descc); void pdgemm_(char* transa, char* transb, int* m, int* n, int* k, double* alpha, double* a, int* ia, int* ja, int* desca, double* b, int* ib, int* jb, int* descb, double* beta, double* c, int* ic, int* jc, int* descc); void pzgemm_(char* transa, char* transb, int* m, int* n, int* k, void* alpha, void* a, int* ia, int* ja, int* desca, void* b, int* ib, int* jb, int* descb, void* beta, void* c, int* ic, int* jc, int* descc); void pzhemm_(char* side, char* uplo, int* m, int* n, void* alpha, void* a, int* ia, int* ja, int* desca, void* b, int* ib, int* jb, int* descb, void* beta, void* c, int* ic, int* jc, int* descc); void pzsymm_(char* side, char* uplo, int* m, int* n, void* alpha, void* a, int* ia, int* ja, int* desca, void* b, int* ib, int* jb, int* descb, void* beta, void* c, int* ic, int* jc, int* descc); void pdsymm_(char* side, char* uplo, int* m, int* n, void* alpha, void* a, int* ia, int* ja, int* desca, void* b, int* ib, int* jb, int* descb, void* beta, void* c, int* ic, int* jc, int* descc); void pdgemv_(char* transa, int* m, int* n, double* alpha, double* a, int* ia, int* ja, int* desca, double* x, int* ix, int* jx, int* descx, int* incx, double* beta, double* y, int* iy, int* jy, int* descy, int* incy); void pzgemv_(char* transa, int* m, int* n, void* alpha, void* a, int* ia, int* ja, int* desca, void* x, int* ix, int* jx, int* descx, int* incx, void* beta, void* y, int* iy, int* jy, int* descy, int* incy); void pdsyr2k_(char* uplo, char* trans, int* n, int* k, double* alpha, double* a, int* ia, int* ja, int* desca, double* b, int* ib, int* jb, int* descb, double* beta, double* c, int* ic, int *jc, int* descc); void pzher2k_(char* uplo, char* trans, int* n, int* k, void* alpha, void* a, int* ia, int* ja, int* desca, void* b, int* ib, int* jb, int* descb, void* beta, void* c, int* ic, int* jc, int* descc); void pdsyrk_(char* uplo, char* trans, int* n, int* k, double* alpha, double* a, int* ia, int* ja, int* desca, double* beta, double* c, int* ic, int* jc, int* descc); void pzherk_(char* uplo, char* trans, int* n, int* k, void* alpha, void* a, int* ia, int* ja, int* desca, void* beta, void* c, int* ic, int* jc, int* descc); void pdtrsm_(char* side, char* uplo, char* trans, char* diag, int* m, int *n, double* alpha, double* a, int* ia, int* ja, int* desca, double* b, int* ib, int* jb, int* descb); void pztrsm_(char* side, char* uplo, char* trans, char* diag, int* m, int *n, void* alpha, void* a, int* ia, int* ja, int* desca, void* b, int* ib, int* jb, int* descb); PyObject* pblas_tran(PyObject *self, PyObject *args) { int m, n; Py_complex alpha; Py_complex beta; PyArrayObject *a, *c; PyArrayObject *desca, *descc; int conj; if (!PyArg_ParseTuple(args, "iiDODOOOi", &m, &n, &alpha, &a, &beta, &c, &desca, &descc, &conj)) return NULL; int one = 1; if (PyArray_DESCR(c)->type_num == NPY_DOUBLE) pdtran_(&m, &n, &(alpha.real), DOUBLEP(a), &one, &one, INTP(desca), &(beta.real), DOUBLEP(c), &one, &one, INTP(descc)); else if (conj) pztranc_(&m, &n, &alpha, (void*)PyArray_DATA(a), &one, &one, INTP(desca), &beta, (void*)PyArray_DATA(c), &one, &one, INTP(descc)); else pztranu_(&m, &n, &alpha, (void*)PyArray_DATA(a), &one, &one, INTP(desca), &beta, (void*)PyArray_DATA(c), &one, &one, INTP(descc)); Py_RETURN_NONE; } PyObject* pblas_gemm(PyObject *self, PyObject *args) { char* transa; char* transb; int m, n, k; Py_complex alpha; Py_complex beta; PyArrayObject *a, *b, *c; PyArrayObject *desca, *descb, *descc; int one = 1; if (!PyArg_ParseTuple(args, "iiiDOODOOOOss", &m, &n, &k, &alpha, &a, &b, &beta, &c, &desca, &descb, &descc, &transa, &transb)) { return NULL; } // cdesc // int c_ConTxt = INTP(descc)[1]; // If process not on BLACS grid, then return. // if (c_ConTxt == -1) Py_RETURN_NONE; if (PyArray_DESCR(c)->type_num == NPY_DOUBLE) pdgemm_(transa, transb, &m, &n, &k, &(alpha.real), DOUBLEP(a), &one, &one, INTP(desca), DOUBLEP(b), &one, &one, INTP(descb), &(beta.real), DOUBLEP(c), &one, &one, INTP(descc)); else pzgemm_(transa, transb, &m, &n, &k, &alpha, (void*)COMPLEXP(a), &one, &one, INTP(desca), (void*)COMPLEXP(b), &one, &one, INTP(descb), &beta, (void*)COMPLEXP(c), &one, &one, INTP(descc)); Py_RETURN_NONE; } PyObject* pblas_hemm_symm(PyObject *self, PyObject *args) { char* side; char* uplo; int m, n; Py_complex alpha; Py_complex beta; PyArrayObject *a, *b, *c; PyArrayObject *desca, *descb, *descc; int hemm; int one = 1; if (!PyArg_ParseTuple(args, "ssiiDOODOOOOi", &side, &uplo, &n, &m, &alpha, &a, &b, &beta, &c, &desca, &descb, &descc, &hemm)) { return NULL; } if (PyArray_DESCR(c)->type_num == NPY_DOUBLE) { pdsymm_(side, uplo, &n, &m, &(alpha.real), (void*)DOUBLEP(a), &one, &one, INTP(desca), (void*)DOUBLEP(b), &one, &one, INTP(descb), &(beta.real), (void*)DOUBLEP(c), &one, &one, INTP(descc)); } else if (hemm) { pzhemm_(side, uplo, &n, &m, &alpha, (void*)COMPLEXP(a), &one, &one, INTP(desca), (void*)COMPLEXP(b), &one, &one, INTP(descb), &beta, (void*)COMPLEXP(c), &one, &one, INTP(descc)); } else { pzsymm_(side, uplo, &n, &m, &alpha, (void*)COMPLEXP(a), &one, &one, INTP(desca), (void*)COMPLEXP(b), &one, &one, INTP(descb), &beta, (void*)COMPLEXP(c), &one, &one, INTP(descc)); } Py_RETURN_NONE; } PyObject* pblas_gemv(PyObject *self, PyObject *args) { char* transa; int m, n; Py_complex alpha; Py_complex beta; PyArrayObject *a, *x, *y; int incx = 1, incy = 1; // what should these be? PyArrayObject *desca, *descx, *descy; int one = 1; if (!PyArg_ParseTuple(args, "iiDOODOOOOs", &m, &n, &alpha, &a, &x, &beta, &y, &desca, &descx, &descy, &transa)) { return NULL; } // ydesc // int y_ConTxt = INTP(descy)[1]; // If process not on BLACS grid, then return. // if (y_ConTxt == -1) Py_RETURN_NONE; if (PyArray_DESCR(y)->type_num == NPY_DOUBLE) pdgemv_(transa, &m, &n, &(alpha.real), DOUBLEP(a), &one, &one, INTP(desca), DOUBLEP(x), &one, &one, INTP(descx), &incx, &(beta.real), DOUBLEP(y), &one, &one, INTP(descy), &incy); else pzgemv_(transa, &m, &n, &alpha, (void*)COMPLEXP(a), &one, &one, INTP(desca), (void*)COMPLEXP(x), &one, &one, INTP(descx), &incx, &beta, (void*)COMPLEXP(y), &one, &one, INTP(descy), &incy); Py_RETURN_NONE; } PyObject* pblas_r2k(PyObject *self, PyObject *args) { char* uplo; int n, k; Py_complex alpha; Py_complex beta; PyArrayObject *a, *b, *c; PyArrayObject *desca, *descb, *descc; int one = 1; if (!PyArg_ParseTuple(args, "iiDOODOOOOs", &n, &k, &alpha, &a, &b, &beta, &c, &desca, &descb, &descc, &uplo)) { return NULL; } // cdesc // int c_ConTxt = INTP(descc)[1]; // If process not on BLACS grid, then return. // if (c_ConTxt == -1) Py_RETURN_NONE; if (PyArray_DESCR(c)->type_num == NPY_DOUBLE) pdsyr2k_(uplo, "T", &n, &k, &(alpha.real), DOUBLEP(a), &one, &one, INTP(desca), DOUBLEP(b), &one, &one, INTP(descb), &(beta.real), DOUBLEP(c), &one, &one, INTP(descc)); else pzher2k_(uplo, "C", &n, &k, &alpha, (void*)COMPLEXP(a), &one, &one, INTP(desca), (void*)COMPLEXP(b), &one, &one, INTP(descb), &beta, (void*)COMPLEXP(c), &one, &one, INTP(descc)); Py_RETURN_NONE; } PyObject* pblas_rk(PyObject *self, PyObject *args) { char* uplo; int n, k; Py_complex alpha; Py_complex beta; PyArrayObject *a, *c; PyArrayObject *desca, *descc; int one = 1; if (!PyArg_ParseTuple(args, "iiDODOOOs", &n, &k, &alpha, &a, &beta, &c, &desca, &descc, &uplo)) { return NULL; } // cdesc // int c_ConTxt = INTP(descc)[1]; // If process not on BLACS grid, then return. // if (c_ConTxt == -1) Py_RETURN_NONE; if (PyArray_DESCR(c)->type_num == NPY_DOUBLE) pdsyrk_(uplo, "T", &n, &k, &(alpha.real), DOUBLEP(a), &one, &one, INTP(desca), &(beta.real), DOUBLEP(c), &one, &one, INTP(descc)); else pzherk_(uplo, "C", &n, &k, &alpha, (void*)COMPLEXP(a), &one, &one, INTP(desca), &beta, (void*)COMPLEXP(c), &one, &one, INTP(descc)); Py_RETURN_NONE; } PyObject* new_blacs_context(PyObject *self, PyObject *args) { PyObject* comm_obj; int nprow, npcol; int iam, nprocs; int ConTxt; char* order; if (!PyArg_ParseTuple(args, "Oiis", &comm_obj, &nprow, &npcol, &order)){ return NULL; } // Create blacs grid on this communicator MPI_Comm comm = ((MPIObject*)comm_obj)->comm; // Get my id and nprocs. This is for debugging purposes only Cblacs_pinfo_(&iam, &nprocs); MPI_Comm_size(comm, &nprocs); // Create blacs grid on this communicator continued ConTxt = Csys2blacs_handle_(comm); Cblacs_gridinit_(&ConTxt, order, nprow, npcol); PyObject* returnvalue = Py_BuildValue("i", ConTxt); return returnvalue; } PyObject* get_blacs_gridinfo(PyObject *self, PyObject *args) { int ConTxt, nprow, npcol; int myrow, mycol; if (!PyArg_ParseTuple(args, "iii", &ConTxt, &nprow, &npcol)) { return NULL; } Cblacs_gridinfo_(ConTxt, &nprow, &npcol, &myrow, &mycol); return Py_BuildValue("(ii)", myrow, mycol); } PyObject* get_blacs_local_shape(PyObject *self, PyObject *args) { int ConTxt; int m, n, mb, nb, rsrc, csrc; int nprow, npcol, myrow, mycol; int locM, locN; if (!PyArg_ParseTuple(args, "iiiiiii", &ConTxt, &m, &n, &mb, &nb, &rsrc, &csrc)){ return NULL; } Cblacs_gridinfo_(ConTxt, &nprow, &npcol, &myrow, &mycol); locM = numroc_(&m, &mb, &myrow, &rsrc, &nprow); locN = numroc_(&n, &nb, &mycol, &csrc, &npcol); return Py_BuildValue("(ii)", locM, locN); } PyObject* blacs_destroy(PyObject *self, PyObject *args) { int ConTxt; if (!PyArg_ParseTuple(args, "i", &ConTxt)) return NULL; Cblacs_gridexit_(ConTxt); Py_RETURN_NONE; } PyObject* scalapack_set(PyObject *self, PyObject *args) { PyArrayObject* a; // matrix; PyArrayObject* desca; // descriptor Py_complex alpha; Py_complex beta; int m, n; int ia, ja; char* uplo; if (!PyArg_ParseTuple(args, "OODDsiiii", &a, &desca, &alpha, &beta, &uplo, &m, &n, &ia, &ja)) return NULL; if (PyArray_DESCR(a)->type_num == NPY_DOUBLE) pdlaset_(uplo, &m, &n, &(alpha.real), &(beta.real), DOUBLEP(a), &ia, &ja, INTP(desca)); else pzlaset_(uplo, &m, &n, &alpha, &beta, (void*)COMPLEXP(a), &ia, &ja, INTP(desca)); Py_RETURN_NONE; } PyObject* scalapack_redist(PyObject *self, PyObject *args) { PyArrayObject* a; // source matrix PyArrayObject* b; // destination matrix PyArrayObject* desca; // source descriptor PyArrayObject* descb; // destination descriptor char* uplo; char diag='N'; // copy the diagonal int c_ConTxt; int m; int n; int ia, ja, ib, jb; if (!PyArg_ParseTuple(args, "OOOOiiiiiiis", &desca, &descb, &a, &b, &m, &n, &ia, &ja, &ib, &jb, &c_ConTxt, &uplo)) return NULL; if (*uplo == 'G') // General matrix { if (PyArray_DESCR(a)->type_num == NPY_DOUBLE) Cpdgemr2d_(m, n, DOUBLEP(a), ia, ja, INTP(desca), DOUBLEP(b), ib, jb, INTP(descb), c_ConTxt); else Cpzgemr2d_(m, n, (void*)COMPLEXP(a), ia, ja, INTP(desca), (void*)COMPLEXP(b), ib, jb, INTP(descb), c_ConTxt); } else // Trapezoidal matrix { if (PyArray_DESCR(a)->type_num == NPY_DOUBLE) Cpdtrmr2d_(uplo, &diag, m, n, DOUBLEP(a), ia, ja, INTP(desca), DOUBLEP(b), ib, jb, INTP(descb), c_ConTxt); else Cpztrmr2d_(uplo, &diag, m, n, (void*)COMPLEXP(a), ia, ja, INTP(desca), (void*)COMPLEXP(b), ib, jb, INTP(descb), c_ConTxt); } Py_RETURN_NONE; } PyObject* scalapack_diagonalize_dc(PyObject *self, PyObject *args) { // Standard driver for divide and conquer algorithm // Computes all eigenvalues and eigenvectors PyArrayObject* a; // symmetric matrix PyArrayObject* desca; // symmetric matrix description vector PyArrayObject* z; // eigenvector matrix PyArrayObject* w; // eigenvalue array int one = 1; char jobz = 'V'; // eigenvectors also char* uplo; if (!PyArg_ParseTuple(args, "OOsOO", &a, &desca, &uplo, &z, &w)) return NULL; // adesc // int a_ConTxt = INTP(desca)[1]; int n = INTP(desca)[2]; assert(n == INTP(desca)[3]); // Only square matrices // zdesc = adesc; this can be relaxed a bit according to pdsyevd.f // If process not on BLACS grid, then return. // if (a_ConTxt == -1) Py_RETURN_NONE; // Query part, need to find the optimal size of a number of work arrays int info; int querywork = -1; int* iwork; int liwork; int lwork; int lrwork; int i_work; double d_work; double_complex c_work; if (PyArray_DESCR(a)->type_num == NPY_DOUBLE) { pdsyevd_(&jobz, uplo, &n, DOUBLEP(a), &one, &one, INTP(desca), DOUBLEP(w), DOUBLEP(z), &one, &one, INTP(desca), &d_work, &querywork, &i_work, &querywork, &info); lwork = (int)(d_work); // Sometimes lwork is not large enough. Found this formula on // the internet: lwork = MAX(131072, 2 * (int) lwork + 1); } else { pzheevd_(&jobz, uplo, &n, (void*)COMPLEXP(a), &one, &one, INTP(desca), DOUBLEP(w), (void*)COMPLEXP(z), &one, &one, INTP(desca), (void*)&c_work, &querywork, &d_work, &querywork, &i_work, &querywork, &info); lwork = (int)(c_work); lrwork = (int)(d_work); } if (info != 0) { PyErr_SetString(PyExc_RuntimeError, "scalapack_diagonalize_dc error in query."); return NULL; } // Computation part liwork = MAX(8 * n, i_work + 1); iwork = GPAW_MALLOC(int, liwork); if (PyArray_DESCR(a)->type_num == NPY_DOUBLE) { double* work = GPAW_MALLOC(double, lwork); pdsyevd_(&jobz, uplo, &n, DOUBLEP(a), &one, &one, INTP(desca), DOUBLEP(w), DOUBLEP(z), &one, &one, INTP(desca), work, &lwork, iwork, &liwork, &info); free(work); } else { double_complex *work = GPAW_MALLOC(double_complex, lwork); double* rwork = GPAW_MALLOC(double, lrwork); pzheevd_(&jobz, uplo, &n, (void*)COMPLEXP(a), &one, &one, INTP(desca), DOUBLEP(w), (void*)COMPLEXP(z), &one, &one, INTP(desca), (void*)work, &lwork, rwork, &lrwork, iwork, &liwork, &info); free(rwork); free(work); } free(iwork); PyObject* returnvalue = Py_BuildValue("i", info); return returnvalue; } PyObject* scalapack_diagonalize_ex(PyObject *self, PyObject *args) { // Standard driver for bisection and inverse iteration algorithm // Computes 'iu' eigenvalues and eigenvectors PyArrayObject* a; // Hamiltonian matrix PyArrayObject* desca; // Hamintonian matrix descriptor PyArrayObject* z; // eigenvector matrix PyArrayObject* w; // eigenvalue array int a_mycol = -1; int a_myrow = -1; int a_nprow, a_npcol; int il = 1; // not used when range = 'A' or 'V' int iu; int eigvalm, nz; int one = 1; double vl, vu; // not used when range = 'A' or 'I' char jobz = 'V'; // eigenvectors also char range = 'I'; // eigenvalues il-th through iu-th char* uplo; if (!PyArg_ParseTuple(args, "OOsiOO", &a, &desca, &uplo, &iu, &z, &w)) return NULL; // a desc int a_ConTxt = INTP(desca)[1]; int n = INTP(desca)[2]; assert(n == INTP(desca)[3]); // Only square matrices // zdesc = adesc = bdesc; required by pdsyevx.f // If process not on BLACS grid, then return. // if (a_ConTxt == -1) Py_RETURN_NONE; Cblacs_gridinfo_(a_ConTxt, &a_nprow, &a_npcol, &a_myrow, &a_mycol); // Convergence tolerance double abstol = 1.0e-8; // char cmach = 'U'; // most orthogonal eigenvectors // char cmach = 'S'; // most acccurate eigenvalues // double abstol = pdlamch_(&a_ConTxt, &cmach); // most orthogonal eigenvectors // double abstol = 2.0*pdlamch_(&a_ConTxt, &cmach); // most accurate eigenvalues double orfac = -1.0; // Query part, need to find the optimal size of a number of work arrays int info; int *ifail; ifail = GPAW_MALLOC(int, n); int *iclustr; iclustr = GPAW_MALLOC(int, 2*a_nprow*a_npcol); double *gap; gap = GPAW_MALLOC(double, a_nprow*a_npcol); int querywork = -1; int* iwork; int liwork; int lwork; // workspace size must be at least 3 int lrwork; // workspace size must be at least 3 int i_work; double d_work[3]; double_complex c_work; if (PyArray_DESCR(a)->type_num == NPY_DOUBLE) { pdsyevx_(&jobz, &range, uplo, &n, DOUBLEP(a), &one, &one, INTP(desca), &vl, &vu, &il, &iu, &abstol, &eigvalm, &nz, DOUBLEP(w), &orfac, DOUBLEP(z), &one, &one, INTP(desca), d_work, &querywork, &i_work, &querywork, ifail, iclustr, gap, &info); lwork = MAX(3, (int)(d_work[0])); } else { pzheevx_(&jobz, &range, uplo, &n, (void*)COMPLEXP(a), &one, &one, INTP(desca), &vl, &vu, &il, &iu, &abstol, &eigvalm, &nz, DOUBLEP(w), &orfac, (void*)COMPLEXP(z), &one, &one, INTP(desca), (void*)&c_work, &querywork, d_work, &querywork, &i_work, &querywork, ifail, iclustr, gap, &info); lwork = MAX(3, (int)(c_work)); lrwork = MAX(3, (int)(d_work[0])); } if (info != 0) { printf ("info = %d", info); PyErr_SetString(PyExc_RuntimeError, "scalapack_diagonalize_ex error in query."); return NULL; } // Computation part // lwork = lwork + (n-1)*n; // this is a ridiculous amount of workspace liwork = i_work; iwork = GPAW_MALLOC(int, liwork); if (PyArray_DESCR(a)->type_num == NPY_DOUBLE) { double* work = GPAW_MALLOC(double, lwork); pdsyevx_(&jobz, &range, uplo, &n, DOUBLEP(a), &one, &one, INTP(desca), &vl, &vu, &il, &iu, &abstol, &eigvalm, &nz, DOUBLEP(w), &orfac, DOUBLEP(z), &one, &one, INTP(desca), work, &lwork, iwork, &liwork, ifail, iclustr, gap, &info); free(work); } else { double_complex* work = GPAW_MALLOC(double_complex, lwork); double* rwork = GPAW_MALLOC(double, lrwork); pzheevx_(&jobz, &range, uplo, &n, (void*)COMPLEXP(a), &one, &one, INTP(desca), &vl, &vu, &il, &iu, &abstol, &eigvalm, &nz, DOUBLEP(w), &orfac, (void*)COMPLEXP(z), &one, &one, INTP(desca), (void*)work, &lwork, rwork, &lrwork, iwork, &liwork, ifail, iclustr, gap, &info); free(rwork); free(work); } free(iwork); free(gap); free(iclustr); free(ifail); // If this fails, fewer eigenvalues than requested were computed. assert (eigvalm == iu); PyObject* returnvalue = Py_BuildValue("i", info); return returnvalue; } #ifdef GPAW_MR3 PyObject* scalapack_diagonalize_mr3(PyObject *self, PyObject *args) { // Standard driver for MRRR algorithm // Computes 'iu' eigenvalues and eigenvectors // http://icl.cs.utk.edu/lapack-forum/archives/scalapack/msg00159.html PyArrayObject* a; // Hamiltonian matrix PyArrayObject* desca; // Hamintonian matrix descriptor PyArrayObject* z; // eigenvector matrix PyArrayObject* w; // eigenvalue array int il = 1; // not used when range = 'A' or 'V' int iu; int eigvalm, nz; int one = 1; double vl, vu; // not used when range = 'A' or 'I' char jobz = 'V'; // eigenvectors also char range = 'I'; // eigenvalues il-th through iu-th char* uplo; if (!PyArg_ParseTuple(args, "OOsiOO", &a, &desca, &uplo, &iu, &z, &w)) return NULL; // a desc // int a_ConTxt = INTP(desca)[1]; int n = INTP(desca)[2]; assert(n == INTP(desca)[3]); // Only square matrices // zdesc = adesc = bdesc; required by pdsyevx.f // If process not on BLACS grid, then return. // if (a_ConTxt == -1) Py_RETURN_NONE; // Query part, need to find the optimal size of a number of work arrays int info; int querywork = -1; int* iwork; int liwork; int lwork; int lrwork; int i_work; double d_work[3]; double_complex c_work; if (PyArray_DESCR(a)->type_num == NPY_DOUBLE) { pdsyevr_(&jobz, &range, uplo, &n, DOUBLEP(a), &one, &one, INTP(desca), &vl, &vu, &il, &iu, &eigvalm, &nz, DOUBLEP(w), DOUBLEP(z), &one, &one, INTP(desca), d_work, &querywork, &i_work, &querywork, &info); lwork = (int)(d_work[0]); } else { pzheevr_(&jobz, &range, uplo, &n, (void*)COMPLEXP(a), &one, &one, INTP(desca), &vl, &vu, &il, &iu, &eigvalm, &nz, DOUBLEP(w), (void*)COMPLEXP(z), &one, &one, INTP(desca), (void*)&c_work, &querywork, d_work, &querywork, &i_work, &querywork, &info); lwork = (int)(c_work); lrwork = (int)(d_work[0]); } if (info != 0) { printf ("info = %d", info); PyErr_SetString(PyExc_RuntimeError, "scalapack_diagonalize_evr error in query."); return NULL; } // Computation part liwork = i_work; iwork = GPAW_MALLOC(int, liwork); if (PyArray_DESCR(a)->type_num == NPY_DOUBLE) { double* work = GPAW_MALLOC(double, lwork); pdsyevr_(&jobz, &range, uplo, &n, DOUBLEP(a), &one, &one, INTP(desca), &vl, &vu, &il, &iu, &eigvalm, &nz, DOUBLEP(w), DOUBLEP(z), &one, &one, INTP(desca), work, &lwork, iwork, &liwork, &info); free(work); } else { double_complex* work = GPAW_MALLOC(double_complex, lwork); double* rwork = GPAW_MALLOC(double, lrwork); pzheevr_(&jobz, &range, uplo, &n, (void*)COMPLEXP(a), &one, &one, INTP(desca), &vl, &vu, &il, &iu, &eigvalm, &nz, DOUBLEP(w), (void*)COMPLEXP(z), &one, &one, INTP(desca), (void*)work, &lwork, rwork, &lrwork, iwork, &liwork, &info); free(rwork); free(work); } free(iwork); // If this fails, fewer eigenvalues than requested were computed. assert (eigvalm == iu); PyObject* returnvalue = Py_BuildValue("i", info); return returnvalue; } #endif PyObject* scalapack_general_diagonalize_dc(PyObject *self, PyObject *args) { // General driver for divide and conquer algorithm // Computes *all* eigenvalues and eigenvectors PyArrayObject* a; // Hamiltonian matrix PyArrayObject* b; // overlap matrix PyArrayObject* desca; // Hamintonian matrix descriptor PyArrayObject* z; // eigenvector matrix PyArrayObject* w; // eigenvalue array int ibtype = 1; // Solve H*psi = lambda*S*psi int one = 1; char jobz = 'V'; // eigenvectors also char* uplo; double scale; if (!PyArg_ParseTuple(args, "OOsOOO", &a, &desca, &uplo, &b, &z, &w)) return NULL; // a desc // int a_ConTxt = INTP(desca)[1]; int n = INTP(desca)[2]; assert(n == INTP(desca)[3]); // Only square matrices // zdesc = adesc = bdesc can be relaxed a bit according to pdsyevd.f // If process not on BLACS grid, then return. // if (a_ConTxt == -1) Py_RETURN_NONE; // Cholesky Decomposition int info; if (PyArray_DESCR(b)->type_num == NPY_DOUBLE) pdpotrf_(uplo, &n, DOUBLEP(b), &one, &one, INTP(desca), &info); else pzpotrf_(uplo, &n, (void*)COMPLEXP(b), &one, &one, INTP(desca), &info); if (info != 0) { PyErr_SetString(PyExc_RuntimeError, "scalapack_general_diagonalize_dc error in Cholesky."); return NULL; } // Query variables int querywork = -1; int* iwork; int liwork; int lwork; int lrwork; int i_work; double d_work; double_complex c_work; // NGST Query if (PyArray_DESCR(a)->type_num == NPY_DOUBLE) { pdsyngst_(&ibtype, uplo, &n, DOUBLEP(a), &one, &one, INTP(desca), DOUBLEP(b), &one, &one, INTP(desca), &scale, &d_work, &querywork, &info); lwork = (int)(d_work); } else { pzhengst_(&ibtype, uplo, &n, (void*)COMPLEXP(a), &one, &one, INTP(desca), (void*)COMPLEXP(b), &one, &one, INTP(desca), &scale, (void*)&c_work, &querywork, &info); lwork = (int)(c_work); } if (info != 0) { PyErr_SetString(PyExc_RuntimeError, "scalapack_general_diagonalize_dc error in NGST query."); return NULL; } // NGST Compute if (PyArray_DESCR(a)->type_num == NPY_DOUBLE) { double* work = GPAW_MALLOC(double, lwork); pdsyngst_(&ibtype, uplo, &n, DOUBLEP(a), &one, &one, INTP(desca), DOUBLEP(b), &one, &one, INTP(desca), &scale, work, &lwork, &info); free(work); } else { double_complex* work = GPAW_MALLOC(double_complex, lwork); pzhengst_(&ibtype, uplo, &n, (void*)COMPLEXP(a), &one, &one, INTP(desca), (void*)COMPLEXP(b), &one, &one, INTP(desca), &scale, (void*)work, &lwork, &info); free(work); } if (info != 0) { PyErr_SetString(PyExc_RuntimeError, "scalapack_general_diagonalize_dc error in NGST compute."); return NULL; } // NOTE: Scale is always equal to 1.0 above. In future version of ScaLAPACK, we // may need to rescale eigenvalues by scale. This can be accomplised by using // the BLAS1 d/zscal. See pdsygvx.f // EVD Query if (PyArray_DESCR(a)->type_num == NPY_DOUBLE) { pdsyevd_(&jobz, uplo, &n, DOUBLEP(a), &one, &one, INTP(desca), DOUBLEP(w), DOUBLEP(z), &one, &one, INTP(desca), &d_work, &querywork, &i_work, &querywork, &info); lwork = (int)(d_work); } else { pzheevd_(&jobz, uplo, &n, (void*)COMPLEXP(a), &one, &one, INTP(desca), DOUBLEP(w), (void*)COMPLEXP(z), &one, &one, INTP(desca), (void*)&c_work, &querywork, &d_work, &querywork, &i_work, &querywork, &info); lwork = (int)(c_work); lrwork = (int)(d_work); } if (info != 0) { PyErr_SetString(PyExc_RuntimeError, "scalapack_general_diagonalize_dc error in EVD query."); return NULL; } // EVD Computation liwork = i_work; iwork = GPAW_MALLOC(int, liwork); if (PyArray_DESCR(a)->type_num == NPY_DOUBLE) { double* work = GPAW_MALLOC(double, lwork); pdsyevd_(&jobz, uplo, &n, DOUBLEP(a), &one, &one, INTP(desca), DOUBLEP(w), DOUBLEP(z), &one, &one, INTP(desca), work, &lwork, iwork, &liwork, &info); free(work); } else { double_complex *work = GPAW_MALLOC(double_complex, lwork); double* rwork = GPAW_MALLOC(double, lrwork); pzheevd_(&jobz, uplo, &n, (void*)COMPLEXP(a), &one, &one, INTP(desca), DOUBLEP(w), (void*)COMPLEXP(z), &one, &one, INTP(desca), (void*)work, &lwork, rwork, &lrwork, iwork, &liwork, &info); free(rwork); free(work); } free(iwork); // Backtransformation to the original problem char trans; double d_one = 1.0; double_complex c_one = 1.0; if (*uplo == 'U') trans = 'N'; else trans = 'T'; if (PyArray_DESCR(a)->type_num == NPY_DOUBLE) pdtrsm_("L", uplo, &trans, "N", &n, &n, &d_one, DOUBLEP(b), &one, &one, INTP(desca), DOUBLEP(z), &one, &one, INTP(desca)); else pztrsm_("L", uplo, &trans, "N", &n, &n, (void*)&c_one, (void*)COMPLEXP(b), &one, &one, INTP(desca), (void*)COMPLEXP(z), &one, &one, INTP(desca)); PyObject* returnvalue = Py_BuildValue("i", info); return returnvalue; } PyObject* scalapack_general_diagonalize_ex(PyObject *self, PyObject *args) { // General driver for bisection and inverse iteration algorithm // Computes 'iu' eigenvalues and eigenvectors PyArrayObject* a; // Hamiltonian matrix PyArrayObject* b; // overlap matrix PyArrayObject* desca; // Hamintonian matrix descriptor PyArrayObject* z; // eigenvector matrix PyArrayObject* w; // eigenvalue array int ibtype = 1; // Solve H*psi = lambda*S*psi int a_mycol = -1; int a_myrow = -1; int a_nprow, a_npcol; int il = 1; // not used when range = 'A' or 'V' int iu; // int eigvalm, nz; int one = 1; double vl, vu; // not used when range = 'A' or 'I' char jobz = 'V'; // eigenvectors also char range = 'I'; // eigenvalues il-th through iu-th char* uplo; if (!PyArg_ParseTuple(args, "OOsiOOO", &a, &desca, &uplo, &iu, &b, &z, &w)) return NULL; // a desc int a_ConTxt = INTP(desca)[1]; int n = INTP(desca)[2]; assert(n == INTP(desca)[3]); // Only square matrices // zdesc = adesc = bdesc; required by pdsygvx.f // If process not on BLACS grid, then return. // if (a_ConTxt == -1) Py_RETURN_NONE; Cblacs_gridinfo_(a_ConTxt, &a_nprow, &a_npcol, &a_myrow, &a_mycol); // Convergence tolerance double abstol = 1.0e-8; // char cmach = 'U'; // most orthogonal eigenvectors // char cmach = 'S'; // most acccurate eigenvalues // double abstol = pdlamch_(&a_ConTxt, &cmach); // most orthogonal eigenvectors // double abstol = 2.0*pdlamch_(&a_ConTxt, &cmach); // most accurate eigenvalues double orfac = -1.0; // Query part, need to find the optimal size of a number of work arrays int info; int *ifail; ifail = GPAW_MALLOC(int, n); int *iclustr; iclustr = GPAW_MALLOC(int, 2*a_nprow*a_npcol); double *gap; gap = GPAW_MALLOC(double, a_nprow*a_npcol); int querywork = -1; int* iwork; int liwork; int lwork; // workspace size must be at least 3 int lrwork; // workspace size must be at least 3 int i_work; double d_work[3]; double_complex c_work; if (PyArray_DESCR(a)->type_num == NPY_DOUBLE) { pdsygvx_(&ibtype, &jobz, &range, uplo, &n, DOUBLEP(a), &one, &one, INTP(desca), DOUBLEP(b), &one, &one, INTP(desca), &vl, &vu, &il, &iu, &abstol, &eigvalm, &nz, DOUBLEP(w), &orfac, DOUBLEP(z), &one, &one, INTP(desca), d_work, &querywork, &i_work, &querywork, ifail, iclustr, gap, &info); lwork = MAX(3, (int)(d_work[0])); } else { pzhegvx_(&ibtype, &jobz, &range, uplo, &n, (void*)COMPLEXP(a), &one, &one, INTP(desca), (void*)COMPLEXP(b), &one, &one, INTP(desca), &vl, &vu, &il, &iu, &abstol, &eigvalm, &nz, DOUBLEP(w), &orfac, (void*)COMPLEXP(z), &one, &one, INTP(desca), (void*)&c_work, &querywork, d_work, &querywork, &i_work, &querywork, ifail, iclustr, gap, &info); lwork = MAX(3, (int)(c_work)); lrwork = MAX(3, (int)(d_work[0])); } if (info != 0) { PyErr_SetString(PyExc_RuntimeError, "scalapack_general_diagonalize_ex error in query."); return NULL; } // Computation part // lwork = lwork + (n-1)*n; // this is a ridiculous amount of workspace liwork = i_work; iwork = GPAW_MALLOC(int, liwork); if (PyArray_DESCR(a)->type_num == NPY_DOUBLE) { double* work = GPAW_MALLOC(double, lwork); pdsygvx_(&ibtype, &jobz, &range, uplo, &n, DOUBLEP(a), &one, &one, INTP(desca), DOUBLEP(b), &one, &one, INTP(desca), &vl, &vu, &il, &iu, &abstol, &eigvalm, &nz, DOUBLEP(w), &orfac, DOUBLEP(z), &one, &one, INTP(desca), work, &lwork, iwork, &liwork, ifail, iclustr, gap, &info); free(work); } else { double_complex* work = GPAW_MALLOC(double_complex, lwork); double* rwork = GPAW_MALLOC(double, lrwork); pzhegvx_(&ibtype, &jobz, &range, uplo, &n, (void*)COMPLEXP(a), &one, &one, INTP(desca), (void*)COMPLEXP(b), &one, &one, INTP(desca), &vl, &vu, &il, &iu, &abstol, &eigvalm, &nz, DOUBLEP(w), &orfac, (void*)COMPLEXP(z), &one, &one, INTP(desca), (void*)work, &lwork, rwork, &lrwork, iwork, &liwork, ifail, iclustr, gap, &info); free(rwork); free(work); } free(iwork); free(gap); free(iclustr); free(ifail); // If this fails, fewer eigenvalues than requested were computed. assert (eigvalm == iu); PyObject* returnvalue = Py_BuildValue("i", info); return returnvalue; } #ifdef GPAW_MR3 PyObject* scalapack_general_diagonalize_mr3(PyObject *self, PyObject *args) { // General driver for MRRR algorithm // Computes 'iu' eigenvalues and eigenvectors // http://icl.cs.utk.edu/lapack-forum/archives/scalapack/msg00159.html PyArrayObject* a; // Hamiltonian matrix PyArrayObject* b; // overlap matrix PyArrayObject* desca; // Hamintonian matrix descriptor PyArrayObject* z; // eigenvector matrix PyArrayObject* w; // eigenvalue array int ibtype = 1; // Solve H*psi = lambda*S*psi int il = 1; // not used when range = 'A' or 'V' int iu; int eigvalm, nz; int one = 1; double vl, vu; // not used when range = 'A' or 'I' char jobz = 'V'; // eigenvectors also char range = 'I'; // eigenvalues il-th through iu-th char* uplo; double scale; if (!PyArg_ParseTuple(args, "OOsiOOO", &a, &desca, &uplo, &iu, &b, &z, &w)) return NULL; // a desc // int a_ConTxt = INTP(desca)[1]; int n = INTP(desca)[2]; assert(n == INTP(desca)[3]); // Only square matrices // zdesc = adesc = bdesc can be relaxed a bit according to pdsyevd.f // If process not on BLACS grid, then return. // if (a_ConTxt == -1) Py_RETURN_NONE; // Cholesky Decomposition int info; if (PyArray_DESCR(b)->type_num == NPY_DOUBLE) pdpotrf_(uplo, &n, DOUBLEP(b), &one, &one, INTP(desca), &info); else pzpotrf_(uplo, &n, (void*)COMPLEXP(b), &one, &one, INTP(desca), &info); if (info != 0) { PyErr_SetString(PyExc_RuntimeError, "scalapack_general_diagonalize_mr3 error in Cholesky."); return NULL; } // Query variables int querywork = -1; int* iwork; int liwork; int lwork; int lrwork; int i_work; double d_work[3]; double_complex c_work; // NGST Query if (PyArray_DESCR(a)->type_num == NPY_DOUBLE) { pdsyngst_(&ibtype, uplo, &n, DOUBLEP(a), &one, &one, INTP(desca), DOUBLEP(b), &one, &one, INTP(desca), &scale, d_work, &querywork, &info); lwork = (int)(d_work[0]); } else { pzhengst_(&ibtype, uplo, &n, (void*)COMPLEXP(a), &one, &one, INTP(desca), (void*)COMPLEXP(b), &one, &one, INTP(desca), &scale, (void*)&c_work, &querywork, &info); lwork = (int)(c_work); } if (info != 0) { PyErr_SetString(PyExc_RuntimeError, "scalapack_general_diagonalize_mr3 error in NGST query."); return NULL; } // NGST Compute if (PyArray_DESCR(a)->type_num == NPY_DOUBLE) { double* work = GPAW_MALLOC(double, lwork); pdsyngst_(&ibtype, uplo, &n, DOUBLEP(a), &one, &one, INTP(desca), DOUBLEP(b), &one, &one, INTP(desca), &scale, work, &lwork, &info); free(work); } else { double_complex* work = GPAW_MALLOC(double_complex, lwork); pzhengst_(&ibtype, uplo, &n, (void*)COMPLEXP(a), &one, &one, INTP(desca), (void*)COMPLEXP(b), &one, &one, INTP(desca), &scale, (void*)work, &lwork, &info); free(work); } if (info != 0) { PyErr_SetString(PyExc_RuntimeError, "scalapack_general_diagonalize_mr3 error in NGST compute."); return NULL; } // NOTE: Scale is always equal to 1.0 above. In future version of ScaLAPACK, we // may need to rescale eigenvalues by scale. This can be accomplised by using // the BLAS1 d/zscal. See pdsygvx.f // EVR Query if (PyArray_DESCR(a)->type_num == NPY_DOUBLE) { pdsyevr_(&jobz, &range, uplo, &n, DOUBLEP(a), &one, &one, INTP(desca), &vl, &vu, &il, &iu, &eigvalm, &nz, DOUBLEP(w), DOUBLEP(z), &one, &one, INTP(desca), d_work, &querywork, &i_work, &querywork, &info); lwork = (int)(d_work[0]); } else { pzheevr_(&jobz, &range, uplo, &n, (void*)COMPLEXP(a), &one, &one, INTP(desca), &vl, &vu, &il, &iu, &eigvalm, &nz, DOUBLEP(w), (void*)COMPLEXP(z), &one, &one, INTP(desca), (void*)&c_work, &querywork, d_work, &querywork, &i_work, &querywork, &info); lwork = (int)(c_work); lrwork = (int)(d_work[0]); } if (info != 0) { printf ("info = %d", info); PyErr_SetString(PyExc_RuntimeError, "scalapack_general_diagonalize_evr error in query."); return NULL; } // EVR Computation liwork = i_work; iwork = GPAW_MALLOC(int, liwork); if (PyArray_DESCR(a)->type_num == NPY_DOUBLE) { double* work = GPAW_MALLOC(double, lwork); pdsyevr_(&jobz, &range, uplo, &n, DOUBLEP(a), &one, &one, INTP(desca), &vl, &vu, &il, &iu, &eigvalm, &nz, DOUBLEP(w), DOUBLEP(z), &one, &one, INTP(desca), work, &lwork, iwork, &liwork, &info); free(work); } else { double_complex* work = GPAW_MALLOC(double_complex, lwork); double* rwork = GPAW_MALLOC(double, lrwork); pzheevr_(&jobz, &range, uplo, &n, (void*)COMPLEXP(a), &one, &one, INTP(desca), &vl, &vu, &il, &iu, &eigvalm, &nz, DOUBLEP(w), (void*)COMPLEXP(z), &one, &one, INTP(desca), (void*)work, &lwork, rwork, &lrwork, iwork, &liwork, &info); free(rwork); free(work); } free(iwork); // Backtransformation to the original problem char trans; double d_one = 1.0; double_complex c_one = 1.0; if (*uplo == 'U') trans = 'N'; else trans = 'T'; if (PyArray_DESCR(a)->type_num == NPY_DOUBLE) pdtrsm_("L", uplo, &trans, "N", &n, &n, &d_one, DOUBLEP(b), &one, &one, INTP(desca), DOUBLEP(z), &one, &one, INTP(desca)); else pztrsm_("L", uplo, &trans, "N", &n, &n, (void*)&c_one, (void*)COMPLEXP(b), &one, &one, INTP(desca), (void*)COMPLEXP(z), &one, &one, INTP(desca)); // If this fails, fewer eigenvalues than requested were computed. assert (eigvalm == iu); PyObject* returnvalue = Py_BuildValue("i", info); return returnvalue; } #endif PyObject* scalapack_inverse_cholesky(PyObject *self, PyObject *args) { // Cholesky plus inverse of triangular matrix PyArrayObject* a; // overlap matrix PyArrayObject* desca; // symmetric matrix description vector int info; double d_zero = 0.0; double_complex c_zero = 0.0; int one = 1; int two = 2; char diag = 'N'; // non-unit triangular char* uplo; if (!PyArg_ParseTuple(args, "OOs", &a, &desca, &uplo)) return NULL; // adesc // int a_ConTxt = INTP(desca)[1]; int n = INTP(desca)[2]; assert(n == INTP(desca)[3]); // Only square matrices int p = n - 1; // If process not on BLACS grid, then return. // if (a_ConTxt == -1) Py_RETURN_NONE; if (PyArray_DESCR(a)->type_num == NPY_DOUBLE) { pdpotrf_(uplo, &n, DOUBLEP(a), &one, &one, INTP(desca), &info); if (info == 0) { pdtrtri_(uplo, &diag, &n, DOUBLEP(a), &one, &one, INTP(desca), &info); if (*uplo == 'L') pdlaset_("U", &p, &p, &d_zero, &d_zero, DOUBLEP(a), &one, &two, INTP(desca)); else pdlaset_("L", &p, &p, &d_zero, &d_zero, DOUBLEP(a), &two, &one, INTP(desca)); } } else { pzpotrf_(uplo, &n, (void*)COMPLEXP(a), &one, &one, INTP(desca), &info); if (info == 0) { pztrtri_(uplo, &diag, &n, (void*)COMPLEXP(a), &one, &one, INTP(desca), &info); if (*uplo == 'L') pzlaset_("U", &p, &p, (void*)&c_zero, (void*)&c_zero, (void*)COMPLEXP(a), &one, &two, INTP(desca)); else pzlaset_("L", &p, &p, (void*)&c_zero, (void*)&c_zero, (void*)COMPLEXP(a), &two, &one, INTP(desca)); } } PyObject* returnvalue = Py_BuildValue("i", info); return returnvalue; } PyObject* scalapack_inverse(PyObject *self, PyObject *args) { // Inverse of an hermitean matrix PyArrayObject* a; // Matrix PyArrayObject* desca; // Matrix description vector char* uplo; int info; int one = 1; if (!PyArg_ParseTuple(args, "OOs", &a, &desca, &uplo)) return NULL; int n = INTP(desca)[2]; assert(n == INTP(desca)[3]); // Only square matrices if (PyArray_DESCR(a)->type_num == NPY_DOUBLE) { assert(1==-1); // No double version implemented } else { pzpotrf_(uplo, &n, (void*)COMPLEXP(a), &one, &one, INTP(desca), &info); if (info == 0) { pzpotri_(uplo, &n, (void*)COMPLEXP(a), &one, &one, INTP(desca), &info); } } PyObject* returnvalue = Py_BuildValue("i", info); return returnvalue; } /* PyObject* scalapack_solve(PyObject *self, PyObject *args) { // Solves equation Ax = B, where A is a general matrix PyArrayObject* a; // Matrix PyArrayObject* desca; // Matrix description vector PyArrayObject* b; // Matrix PyArrayObject* descb; // Matrix description vector char uplo; int info; int one = 1; if (!PyArg_ParseTuple(args, "OOOO", &a, &desca, &b, &descb)) return NULL; int a_m = INTP(desca)[2]; int a_n = INTP(desca)[3]; // Only square matrices assert (a_m == a_n); int b_m = INTP(descb)[2]; int b_n = INTP(descb)[3]; // Equation valid assert (a_n == b_m); int n = a_n; int nrhs = b_n; int* pivot = GPAW_MALLOC(int, a_m+2000); // TODO: How long should this exaclty be? if (PyArray_DESCR(a)->type_num == NPY_DOUBLE) { assert(1==-1); // No double version implemented } else { pzgesv_(&n, &nrhs,(void*)COMPLEXP(a), &one, &one, INTP(desca), pivot, (void*)COMPLEXP(b), &one, &one, INTP(descb), &info); } free(pivot); PyObject* returnvalue = Py_BuildValue("i", info); return returnvalue; } */ PyObject* scalapack_solve(PyObject *self, PyObject *args) { // Solves equation Ax = B, where A is a general matrix PyArrayObject* a; // Matrix PyArrayObject* desca; // Matrix description vector PyArrayObject* b; // Matrix PyArrayObject* descb; // Matrix description vector int info; int one = 1; if (!PyArg_ParseTuple(args, "OOOO", &a, &desca, &b, &descb)) return NULL; int a_ConTxt = INTP(desca)[1]; int n = INTP(desca)[2]; assert(n == INTP(desca)[3]); // Only square matrices int a_mb = INTP(desca)[4]; assert(n == INTP(descb)[2]); // Equation valid int nrhs = INTP(descb)[3]; int nprow, npcol, myrow, mycol, locM; Cblacs_gridinfo_(a_ConTxt, &nprow, &npcol, &myrow, &mycol); // LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A locM = (((n/a_mb) + 1)/nprow + 1) * a_mb; /* * IPIV (local output) INTEGER array, dimension ( LOCr(M_A)+MB_A ) * This array contains the pivoting information. * IPIV(i) -> The global row local row i was swapped with. * This array is tied to the distributed matrix A. * An upper bound for these quantities may be computed by: * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A * M_A (global) DESCA( M_ ) The number of rows in the global * array A. * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute * the rows of the array. * NPROW (global input) INTEGER * NPROW specifies the number of process rows in the grid * to be created. */ int* pivot = GPAW_MALLOC(int, locM + a_mb); //if (a->descr->type_num == PyArray_DOUBLE) if (PyArray_DESCR(a)->type_num == NPY_DOUBLE) { pdgesv_(&n, &nrhs,(double*)DOUBLEP(a), &one, &one, INTP(desca), pivot, (double*)DOUBLEP(b), &one, &one, INTP(descb), &info); } else { pzgesv_(&n, &nrhs,(void*)COMPLEXP(a), &one, &one, INTP(desca), pivot, (void*)COMPLEXP(b), &one, &one, INTP(descb), &info); } free(pivot); PyObject* returnvalue = Py_BuildValue("i", info); return returnvalue; } #endif #endif // PARALLEL gpaw-24.1.0/c/blas.c000066400000000000000000000115171454550013000140450ustar00rootroot00000000000000/* Copyright (C) 2003-2007 CAMP * Copyright (C) 2007-2009 CAMd * Copyright (C) 2007 CSC - IT Center for Science Ltd. * Please see the accompanying LICENSE file for further information. */ #ifndef GPAW_WITHOUT_BLAS #include #define PY_ARRAY_UNIQUE_SYMBOL GPAW_ARRAY_API #define NO_IMPORT_ARRAY #include #include "extensions.h" #ifdef GPAW_NO_UNDERSCORE_BLAS # define dsyrk_ dsyrk # define zherk_ zherk # define dsyr2k_ dsyr2k # define zher2k_ zher2k # define dgemm_ dgemm # define zgemm_ zgemm #endif void dsyrk_(char *uplo, char *trans, int *n, int *k, double *alpha, double *a, int *lda, double *beta, double *c, int *ldc); void zherk_(char *uplo, char *trans, int *n, int *k, double *alpha, void *a, int *lda, double *beta, void *c, int *ldc); void dsyr2k_(char *uplo, char *trans, int *n, int *k, double *alpha, double *a, int *lda, double *b, int *ldb, double *beta, double *c, int *ldc); void zher2k_(char *uplo, char *trans, int *n, int *k, void *alpha, void *a, int *lda, void *b, int *ldb, double *beta, void *c, int *ldc); void dgemm_(char *transa, char *transb, int *m, int * n, int *k, double *alpha, double *a, int *lda, double *b, int *ldb, double *beta, double *c, int *ldc); void zgemm_(char *transa, char *transb, int *m, int * n, int *k, void *alpha, void *a, int *lda, void *b, int *ldb, void *beta, void *c, int *ldc); PyObject* mmm(PyObject *self, PyObject *args) { Py_complex alpha; PyArrayObject* M1; char* trans1; PyArrayObject* M2; char* trans2; Py_complex beta; PyArrayObject* M3; if (!PyArg_ParseTuple(args, "DOsOsDO", &alpha, &M1, &trans1, &M2, &trans2, &beta, &M3)) return NULL; void* a = PyArray_DATA(M2); void* b = PyArray_DATA(M1); void* c = PyArray_DATA(M3); int bytes = PyArray_ITEMSIZE(M3); int m = PyArray_DIM(M3, 1); int n = PyArray_DIM(M3, 0); int lda = PyArray_STRIDE(M2, 0) / bytes; int ldb = PyArray_STRIDE(M1, 0) / bytes; int ldc = MAX(MAX(1, m), PyArray_STRIDE(M3, 0) / bytes); int k; if (*trans2 == 'N' || *trans2 == 'n') { k = PyArray_DIM(M2, 0); lda = MAX(MAX(1, m), lda); } else { k = PyArray_DIM(M2, 1); lda = MAX(MAX(1, k), lda); } if (*trans1 == 'N' || *trans1 == 'n') ldb = MAX(MAX(1, k), ldb); else ldb = MAX(MAX(1, n), ldb); if (bytes == 8) dgemm_(trans2, trans1, &m, &n, &k, &(alpha.real), a, &lda, b, &ldb, &(beta.real), c, &ldc); else zgemm_(trans2, trans1, &m, &n, &k, &alpha, a, &lda, b, &ldb, &beta, c, &ldc); Py_RETURN_NONE; } PyObject* rk(PyObject *self, PyObject *args) { double alpha; PyArrayObject* a; double beta; PyArrayObject* c; char t = 'c'; char* trans = &t; if (!PyArg_ParseTuple(args, "dOdO|s", &alpha, &a, &beta, &c, &trans)) return NULL; int n = PyArray_DIMS(c)[0]; int k, lda; if (*trans == 'c') { k = PyArray_DIMS(a)[1]; for (int d = 2; d < PyArray_NDIM(a); d++) k *= PyArray_DIMS(a)[d]; lda = MAX(k, 1); } else { k = PyArray_DIMS(a)[0]; lda = MAX(n, 1); } int ldc = MAX(MAX(1, n), PyArray_STRIDES(c)[0] / PyArray_ITEMSIZE(c)); if (PyArray_DESCR(a)->type_num == NPY_DOUBLE) dsyrk_("u", trans, &n, &k, &alpha, DOUBLEP(a), &lda, &beta, DOUBLEP(c), &ldc); else zherk_("u", trans, &n, &k, &alpha, (void*)COMPLEXP(a), &lda, &beta, (void*)COMPLEXP(c), &ldc); Py_RETURN_NONE; } PyObject* r2k(PyObject *self, PyObject *args) { Py_complex alpha; PyArrayObject* a; PyArrayObject* b; double beta; PyArrayObject* c; char t = 'c'; char* trans = &t; if (!PyArg_ParseTuple(args, "DOOdO|s", &alpha, &a, &b, &beta, &c, &trans)) return NULL; int n = PyArray_DIMS(c)[0]; int k, lda; if (*trans == 'c') { k = PyArray_DIMS(a)[1]; for (int d = 2; d < PyArray_NDIM(a); d++) k *= PyArray_DIMS(a)[d]; lda = MAX(k, 1); } else { k = PyArray_DIMS(a)[0]; lda = MAX(n, 1); } int ldc = MAX(MAX(1, n), PyArray_STRIDES(c)[0] / PyArray_ITEMSIZE(c)); if (PyArray_DESCR(a)->type_num == NPY_DOUBLE) dsyr2k_("u", trans, &n, &k, (double*)(&alpha), DOUBLEP(a), &lda, DOUBLEP(b), &lda, &beta, DOUBLEP(c), &ldc); else zher2k_("u", trans, &n, &k, (void*)(&alpha), (void*)COMPLEXP(a), &lda, (void*)COMPLEXP(b), &lda, &beta, (void*)COMPLEXP(c), &ldc); Py_RETURN_NONE; } #endif gpaw-24.1.0/c/bmgs/000077500000000000000000000000001454550013000137035ustar00rootroot00000000000000gpaw-24.1.0/c/bmgs/bmgs.c000066400000000000000000000010631454550013000147770ustar00rootroot00000000000000/* Copyright (C) 2003-2007 CAMP * Copyright (C) 2005 CSC - IT Center for Science Ltd. * Please see the accompanying LICENSE file for further information. */ #include "fd.c" #include "wfd.c" #include "relax.c" #include "wrelax.c" #include "cut.c" #include "zero.c" #include "paste.c" #include "spline.c" #include "stencils.c" #include "restrict.c" #include "translate.c" #include "interpolate.c" #define BMGSCOMPLEX #include "fd.c" #include "wfd.c" #include "cut.c" #include "zero.c" #include "paste.c" #include "restrict.c" #include "interpolate.c" gpaw-24.1.0/c/bmgs/bmgs.h000066400000000000000000000102441454550013000150050ustar00rootroot00000000000000/* Copyright (C) 2003-2007 CAMP * Copyright (C) 2007-2008 CAMd * Copyright (C) 2005 CSC - IT Center for Science Ltd. * Please see the accompanying LICENSE file for further information. */ #ifndef DOUBLECOMPLEXDEFINED # define DOUBLECOMPLEXDEFINED 1 # include typedef double complex double_complex; #endif #undef T #undef Z #ifndef BMGSCOMPLEX # define T double # define Z(f) f #else # define T double_complex # define Z(f) f ## z #endif #ifndef BMGS_H #define BMGS_H #include typedef struct { int ncoefs; double* coefs; long* offsets; long n[3]; long j[3]; } bmgsstencil; typedef struct { int l; double dr; int nbins; double* data; } bmgsspline; bmgsstencil bmgs_stencil(int ncoefs, const double* coefs, const long* offsets, int range, const long size[3]); bmgsstencil bmgs_laplace(int k, double scale, const double h[3], const long n[3]); bmgsstencil bmgs_mslaplaceA(double scale, const double h[3], const long n[3]); bmgsstencil bmgs_mslaplaceB(const long n[3]); bmgsstencil bmgs_gradient(int k, int i, double h, const long n[3]); void bmgs_deletestencil(bmgsstencil* spline); bmgsspline bmgs_spline(int l, double dr, int nbins, double* f); double bmgs_splinevalue(const bmgsspline* spline, double r); void bmgs_get_value_and_derivative(const bmgsspline* spline, double r, double *f, double *dfdr); void bmgs_deletespline(bmgsspline* spline); void bmgs_fd(const bmgsstencil* s, const double* a, double* b); void bmgs_wfd(int nweights, const bmgsstencil* stencils, const double** weights, const double* a, double* b); void bmgs_relax(const int relax_method, const bmgsstencil* s, double* a, double* b, const double* src, const double w); void bmgs_wrelax(const int relax_method, const int nweights, const bmgsstencil* stencils, const double** weights, double* a, double* b, const double* src, const double w); void bmgs_cut(const double* a, const int n[3], const int c[3], double* b, const int m[3]); void bmgs_zero(double* a, const int n[3], const int c[3], const int s[3]); void bmgs_paste(const double* a, const int n[3], double* b, const int m[3], const int c[3]); void bmgs_pastep(const double* a, const int n[3], double* b, const int m[3], const int c[3]); void bmgs_rotate(const double* a, const int size[3], double* b, double angle, int d, long c, double*, long*, long*, double*, long*, long*, int exact); void bmgs_translate(double* a, const int sizea[3], const int size[3], const int start1[3], const int start2[3]); void bmgs_restrict(int k, double* a, const int n[3], double* b, double* w); void bmgs_interpolate(int k, int skip[3][2], const double* a, const int n[3], double* b, double* w); // complex routines: void bmgs_fdz(const bmgsstencil* s, const double_complex* a, double_complex* b); void bmgs_wfdz(int nweights, const bmgsstencil* stencils, const double** weights, const double_complex* a, double_complex* b); void bmgs_cutz(const double_complex* a, const int n[3], const int c[3], double_complex* b, const int m[3]); void bmgs_cutmz(const double_complex* a, const int n[3], const int c[3], double_complex* b, const int m[3], double_complex phase); void bmgs_zeroz(double_complex* a, const int n[3], const int c[3], const int s[3]); void bmgs_pastez(const double_complex* a, const int n[3], double_complex* b, const int m[3], const int c[3]); void bmgs_pastepz(const double_complex* a, const int n[3], double_complex* b, const int m[3], const int c[3]); void bmgs_rotatez(const double_complex* a, const int size[3], double_complex* b, double angle, int d, long c, double*, long*, long*, double*, long*, long*, int exact); void bmgs_translatemz(double_complex* a, const int sizea[3], const int size[3], const int start1[3], const int start2[3], double_complex phase); void bmgs_restrictz(int k, double_complex* a, const int n[3], double_complex* b, double_complex* w); void bmgs_interpolatez(int k, int skip[3][2], const double_complex* a, const int n[3], double_complex* b, double_complex* w); #endif gpaw-24.1.0/c/bmgs/cut.c000066400000000000000000000017531454550013000146500ustar00rootroot00000000000000/* Copyright (C) 2003-2007 CAMP * Please see the accompanying LICENSE file for further information. */ #include #include "bmgs.h" void Z(bmgs_cut)(const T* a, const int n[3], const int c[3], T* b, const int m[3]) { a += c[2] + (c[1] + c[0] * n[1]) * n[2]; for (int i0 = 0; i0 < m[0]; i0++) { for (int i1 = 0; i1 < m[1]; i1++) { memcpy(b, a, m[2] * sizeof(T)); a += n[2]; b += m[2]; } a += n[2] * (n[1] - m[1]); } } #ifdef BMGSCOMPLEX void bmgs_cutmz(const double_complex* a, const int sizea[3], const int start[3], double_complex* b, const int sizeb[3], double_complex p) { a += start[2] + (start[1] + start[0] * sizea[1]) * sizea[2]; for (int i0 = 0; i0 < sizeb[0]; i0++) { for (int i1 = 0; i1 < sizeb[1]; i1++) { for (int i2 = 0; i2 < sizeb[2]; i2++) b[i2] = p * a[i2]; a += sizea[2]; b += sizeb[2]; } a += sizea[2] * (sizea[1] - sizeb[1]); } } #endif gpaw-24.1.0/c/bmgs/fd.c000066400000000000000000000017041454550013000144420ustar00rootroot00000000000000/* Copyright (C) 2003-2007 CAMP Copyright (C) 2010 CSC - IT Center for Science Ltd. * Please see the accompanying LICENSE file for further information. */ #include "../extensions.h" #include "bmgs.h" void Z(bmgs_fd)(const bmgsstencil* s, const T* a, T* b) { /* Skip the leading halo area. */ a += (s->j[0] + s->j[1] + s->j[2]) / 2; for (int i0 = 0; i0 < s->n[0]; i0++) { for (int i1 = 0; i1 < s->n[1]; i1++) { #ifdef _OPENMP #pragma omp simd #endif for (int i2 = 0; i2 < s->n[2]; i2++) { int i = i2 + i1 * (s->j[2] + s->n[2]) + i0 * (s->j[1] + s->n[1] * (s->j[2] + s->n[2])); int j = i2 + i1 * s->n[2] + i0 * s->n[1] * s->n[2]; T x = 0.0; for (int c = 0; c < s->ncoefs; c++) x += a[i + s->offsets[c]] * s->coefs[c]; b[j] = x; } } } } gpaw-24.1.0/c/bmgs/interpolate.c000066400000000000000000000051771454550013000164070ustar00rootroot00000000000000/* Copyright (C) 2003-2007 CAMP * Please see the accompanying LICENSE file for further information. */ #include "../extensions.h" #include "bmgs.h" #ifdef K void IP1D(const T* a, const int n, const int m, T* restrict b, int skip[2]) { a += K / 2 - 1; for (int j = 0; j < m; j++) { const T* aa = a + j * (K - 1 - skip[1] + n); T* restrict bb = b + j; for (int i = 0; i < n; i++) { if (i == 0 && skip[0]) bb -= m; else bb[0] = aa[0]; if (i == n - 1 && skip[1]) bb -= m; else { if (K == 2) bb[m] = 0.5 * (aa[0] + aa[1]); else if (K == 4) bb[m] = ( 0.5625 * (aa[ 0] + aa[1]) + -0.0625 * (aa[-1] + aa[2])); else if (K == 6) bb[m] = ( 0.58593750 * (aa[ 0] + aa[1]) + -0.09765625 * (aa[-1] + aa[2]) + 0.01171875 * (aa[-2] + aa[3])); else bb[m] = ( 0.59814453125 * (aa[ 0] + aa[1]) + -0.11962890625 * (aa[-1] + aa[2]) + 0.02392578125 * (aa[-2] + aa[3]) + -0.00244140625 * (aa[-3] + aa[4])); } aa++; bb += 2 * m; } } } #else # define K 2 # define IP1D Z(bmgs_interpolate1D2) # include "interpolate.c" # undef IP1D # undef K # define K 4 # define IP1D Z(bmgs_interpolate1D4) # include "interpolate.c" # undef IP1D # undef K # define K 6 # define IP1D Z(bmgs_interpolate1D6) # include "interpolate.c" # undef IP1D # undef K # define K 8 # define IP1D Z(bmgs_interpolate1D8) # include "interpolate.c" # undef IP1D # undef K void Z(bmgs_interpolate)(int k, int skip[3][2], const T* a, const int size[3], T* restrict b, T* restrict w) { void (*ip)(const T*, int, int, T*, int[2]); int e; if (k == 2) ip = Z(bmgs_interpolate1D2); else if (k == 4) ip = Z(bmgs_interpolate1D4); else if (k == 6) ip = Z(bmgs_interpolate1D6); else ip = Z(bmgs_interpolate1D8); e = k - 1; ip(a, size[2] - e + skip[2][1], size[0] * size[1], b, skip[2]); ip(b, size[1] - e + skip[1][1], size[0] * ((size[2] - e) * 2 - skip[2][0] + skip[2][1]), w, skip[1]); ip(w, size[0] - e + skip[0][1], ((size[1] - e) * 2 - skip[1][0] + skip[1][1]) * ((size[2] - e) * 2 - skip[2][0] + skip[2][1]), b, skip[0]); } #endif gpaw-24.1.0/c/bmgs/paste.c000066400000000000000000000016341454550013000151670ustar00rootroot00000000000000/* Copyright (C) 2003-2007 CAMP * Please see the accompanying LICENSE file for further information. */ #include "bmgs.h" void Z(bmgs_paste)(const T* a, const int sizea[3], T* b, const int sizeb[3], const int startb[3]) { b += startb[2] + (startb[1] + startb[0] * sizeb[1]) * sizeb[2]; for (int i0 = 0; i0 < sizea[0]; i0++) { for (int i1 = 0; i1 < sizea[1]; i1++) { memcpy(b, a, sizea[2] * sizeof(T)); a += sizea[2]; b += sizeb[2]; } b += sizeb[2] * (sizeb[1] - sizea[1]); } } void Z(bmgs_pastep)(const T* a, const int sizea[3], T* b, const int sizeb[3], const int startb[3]) { b += startb[2] + (startb[1] + startb[0] * sizeb[1]) * sizeb[2]; for (int i0 = 0; i0 < sizea[0]; i0++) { for (int i1 = 0; i1 < sizea[1]; i1++) { for (int i2 = 0; i2 < sizea[2]; i2++) b[i2] += *a++; b += sizeb[2]; } b += sizeb[2] * (sizeb[1] - sizea[1]); } } gpaw-24.1.0/c/bmgs/relax.c000066400000000000000000000040571454550013000151700ustar00rootroot00000000000000/* Copyright (C) 2003-2007 CAMP * Copyright (C) 2005,2010 CSC - IT Center for Science Ltd. * Please see the accompanying LICENSE file for further information. */ #include "bmgs.h" void bmgs_relax(const int relax_method, const bmgsstencil* s, double* restrict a, double* restrict b, const double* src, const double w) { if (relax_method == 1) { // Coefficient needed multiple times later const double coef = 1.0/s->coefs[0]; a += (s->j[0] + s->j[1] + s->j[2]) / 2; /* Weighted Gauss-Seidel relaxation for the equation "operator" b = src a contains the temporary array holding also the boundary values. */ for (int i0 = 0; i0 < s->n[0]; i0++) { for (int i1 = 0; i1 < s->n[1]; i1++) { #ifdef _OPENMP #pragma omp simd #endif for (int i2 = 0; i2 < s->n[2]; i2++) { int i = i2 + i1 * s->n[2] + i0 * s->n[1] * s->n[2]; int j = i2 + i1 * (s->n[2] + s->j[2]) + i0 * (s->n[1] * (s->n[2] + s->j[2]) + s->j[1]); double x = 0.0; for (int c = 1; c < s->ncoefs; c++) x += a[j + s->offsets[c]] * s->coefs[c]; x = (src[i] - x) * coef; b[i] = x; a[j] = x; } } } } else { a += (s->j[0] + s->j[1] + s->j[2]) / 2; /* Weighted Jacobi relaxation for the equation "operator" b = src a contains the temporariry array holding also the boundary values. */ #pragma omp parallel for schedule(static) for (int i0 = 0; i0 < s->n[0]; i0++) { for (int i1 = 0; i1 < s->n[1]; i1++) { #ifdef _OPENMP #pragma omp simd #endif for (int i2 = 0; i2 < s->n[2]; i2++) { int i = i2 + i1 * s->n[2] + i0 * s->n[1] * s->n[2]; int j = i2 + i1 * (s->n[2] + s->j[2]) + i0 * (s->n[1] * (s->n[2] + s->j[2]) + s->j[1]); double x = 0.0; for (int c = 1; c < s->ncoefs; c++) x += a[j + s->offsets[c]] * s->coefs[c]; b[i] = (1.0 - w) * b[i] + w * (src[i] - x) / s->coefs[0]; } } } } } gpaw-24.1.0/c/bmgs/restrict.c000066400000000000000000000070541454550013000157140ustar00rootroot00000000000000/* Copyright (C) 2003-2007 CAMP * Please see the accompanying LICENSE file for further information. */ #include "../extensions.h" #include "bmgs.h" #ifdef K struct RST1DA{ int thread_id; int nthds; const T* a; int n; int m; T* b; }; void *RST1DW(void *threadarg) { struct RST1DA *args = (struct RST1DA *) threadarg; int m = args->m; int chunksize = m / args->nthds + 1; int nstart = args->thread_id * chunksize; if (nstart >= m) return NULL; int nend = nstart + chunksize; if (nend > m) nend = m; for (int j = 0; j < m; j++) { const T* aa = args->a + j * args->n; T* bb = args->b + j; for (int i = 0; i < (args->n - K * 2 + 3) / 2; i++) { if (K == 2) bb[0] = 0.5 * (aa[0] + 0.5 * (aa[1] + aa[-1])); else if (K == 4) bb[0] = 0.5 * (aa[0] + 0.5625 * (aa[1] + aa[-1]) + -0.0625 * (aa[3] + aa[-3])); else if (K == 6) bb[0] = 0.5 * (aa[0] + 0.58593750 * (aa[1] + aa[-1]) + -0.09765625 * (aa[3] + aa[-3]) + 0.01171875 * (aa[5] + aa[-5])); else bb[0] = 0.5 * (aa[0] + 0.59814453125 * (aa[1] + aa[-1]) + -0.11962890625 * (aa[3] + aa[-3]) + 0.02392578125 * (aa[5] + aa[-5]) + -0.00244140625 * (aa[7] + aa[-7])); aa += 2; bb += m; } } return NULL; } void RST1D(const T* a, int n, int m, T* b) { a += K - 1; int nthds = 1; #ifdef GPAW_OMP_MONLY if (getenv("OMP_NUM_THREADS") != NULL) nthds = atoi(getenv("OMP_NUM_THREADS")); #endif struct RST1DA *wargs = GPAW_MALLOC(struct RST1DA, nthds); pthread_t *thds = GPAW_MALLOC(pthread_t, nthds); for(int i=0; i < nthds; i++) { (wargs+i)->thread_id = i; (wargs+i)->nthds = nthds; (wargs+i)->a = a; (wargs+i)->n = n; (wargs+i)->m = m; (wargs+i)->b = b; } #ifdef GPAW_OMP_MONLY for(int i=1; i < nthds; i++) pthread_create(thds + i, NULL, RST1DW, (void*) (wargs+i)); #endif RST1DW(wargs); #ifdef GPAW_OMP_MONLY for(int i=1; i < nthds; i++) pthread_join(*(thds+i), NULL); #endif free(wargs); free(thds); } #else # define K 2 # define RST1D Z(bmgs_restrict1D2) # define RST1DA Z(bmgs_restrict1D2_args) # define RST1DW Z(bmgs_restrict1D2_worker) # include "restrict.c" # undef RST1D # undef RST1DA # undef RST1DW # undef K # define K 4 # define RST1D Z(bmgs_restrict1D4) # define RST1DA Z(bmgs_restrict1D4_args) # define RST1DW Z(bmgs_restrict1D4_worker) # include "restrict.c" # undef RST1D # undef RST1DA # undef RST1DW # undef K # define K 6 # define RST1D Z(bmgs_restrict1D6) # define RST1DA Z(bmgs_restrict1D6_args) # define RST1DW Z(bmgs_restrict1D6_worker) # include "restrict.c" # undef RST1D # undef RST1DA # undef RST1DW # undef K # define K 8 # define RST1D Z(bmgs_restrict1D8) # define RST1DA Z(bmgs_restrict1D8_args) # define RST1DW Z(bmgs_restrict1D8_worker) # include "restrict.c" # undef RST1D # undef RST1DA # undef RST1DW # undef K void Z(bmgs_restrict)(int k, T* a, const int n[3], T* b, T* w) { void (*plg)(const T*, int, int, T*); if (k == 2) plg = Z(bmgs_restrict1D2); else if (k == 4) plg = Z(bmgs_restrict1D4); else if (k == 6) plg = Z(bmgs_restrict1D6); else plg = Z(bmgs_restrict1D8); int e = k * 2 - 3; int nb[3] = {(n[0] - e) / 2, (n[1] - e) / 2, (n[2] - e) / 2}; plg(a, n[2], n[0] * n[1], w); plg(w, n[1], n[0] * nb[2], a); plg(a, n[0], nb[1] * nb[2], b); } #endif gpaw-24.1.0/c/bmgs/sharmonic.py000066400000000000000000000622541454550013000162510ustar00rootroot00000000000000import numpy as np from Numeric import pi, sqrt from tools import factorial from tools import Rational as Q """ This is a script designed for construction of the real solid spherical harmonics (RSSH) in cartesian form. These can be written as:: m |m| l |m| Y = Y = C r P (cos theta) Phi (phi) L l l l m where C_l^|m| is a normalization constant P_l^|m| is the associatied legendre polynomial and: / cos(m phi) , m > 0 Phi (phi) = | 1 , m = 0 m \ sin(-m phi), m < 0 The first few harmonics are listed below:: +----+---------------------+-__---------------------------------------+ | L | l | m | r^l * Y | \/ (r^l * Y) | +----+---s----+------------+------------------------------------------+ | 0 | 0 | 0 | 1 | (0, 0, 0) | +----+---p----+------------+------------------------------------------+ | 1 | 1 | -1 | y | (0, 1, 0) | | 2 | 1 | 0 | z | (0, 0, 1) | | 3 | 1 | 1 | x | (1, 0, 0) | +----+---d----+------------+------------------------------------------+ | 4 | 2 | -2 | xy | ( y, x, 0) | | 5 | 2 | -1 | yz | ( 0, z, y) | | 6 | 2 | 0 | 3z^2-r^2 | (-x, -y, 2z) | | 7 | 2 | 1 | xz | ( z, 0, x) | | 8 | 2 | 2 | x^2-y^2 | ( x, -y, 0) | +----+---f----+------------+------------------------------------------+ | 9 | 3 | -3 | 3x^2y-y^3 | ( 2xy, x^2-y^2, 0) | | 10 | 3 | -2 | xyz | ( yz, xz, xy) | | 11 | 3 | -1 | 5yz^2-yr^2 | ( -2xy, 4z^2-x^2-3y^2, 8yz) | | 12 | 3 | 0 | 5z^3-3zr^2 | ( -2xz, -2yz, 3z^2-r^2) | | 13 | 3 | 1 | 5xz^2-xr^2 | (4z^2-3x^2-y^2, -2xy, 8xz) | | 14 | 3 | 2 | x^2z-y^2z | ( 2xz, -2yz, x^2-y^2) | | 15 | 3 | 3 | x^3-3xy^2 | ( x^2-y^2, -2xy, 0) | +----+--------+----------+--------------------------------------------+ Y_lm is represented as a polynomial in x, y, and z The function consists of three parts: a normalization constant accessed by class 'Normalization(l, m)', a polynomial in z accessed with method 'legendre(l, m)', and a polynomial in x and y accessed with method 'Phi(l, m)' The normalization and the z-polynomial are both invariant of the sign of m The z-polynomial has powers l-|m|, l-|m|-2, l-|m|-4, l-..., i.e. it is strictly odd (even) if l-|m| is odd (even) The combined power of x and y is |m| in all terms of Phi """ Y_lp = [{}, {}] # Global list of dictionaries for storing calculated # Legendre polynomials, and Phi functions #--------------------------- RELEVANT USER METHODS --------------------------- def L_to_lm(L): """convert L index to (l, m) index""" l = int(sqrt(L)) m = L - l**2 - l return l, m def lm_to_L(l,m): """convert (l, m) index to L index""" return l**2 + l + m def Y_to_string(l, m, deriv=None, multiply=None, numeric=False): # for L in range(40): print L, Y_to_string(*L_to_lm(L)) """ l m If deriv is None, return string representation of r * Y (x, y, z) l If deriv == q, return string is the derivative of above with respect to x, y or z if q is 0, 1 or 2 respectively. multiply=q indicates that the entire expression should be multiplied by x, y or z if q is 0, 1 or 2 respectively. numeric=True/False indicates whether the normalization constant should be written as a numeric or an algebraic expression. """ assert deriv is None or deriv in range(3) assert multiply is None or multiply in range(3) if deriv is None: norm, xyzs = Y_collect(l, m) else: norm, xyzs = dYdq(l, m, deriv) if multiply is not None: xyzs = q_times_xyzs(xyzs, multiply) string = to_string(l, xyzs, deriv is not None, multiply is not None) if string == '0': return '0' else: return norm.tostring(numeric) + (' * ' + string) * (string != '1') def gauss_to_string(l, m, numeric=False): """Return string representation of the generalized gaussian:: _____ 2 m / 1 l! l+3/2 -a r l m g (x,y,z) = / ----- --------- (4 a) e r Y (x,y,z) l \/ 4 pi (2l + 1)! l numeric=True/False indicates whether the normalization constant should be written as a number or an algebraic expression. """ norm, xyzs = Y_collect(l, m) ng = Q(2**(2*l+3) * factorial(l), 2 * factorial(2 * l + 1)) norm.multiply(ng) string = to_string(l, xyzs) string = (' * ' + string) * (string != '1') if numeric: snorm = repr(eval(repr(norm.norm))) else: snorm = repr(norm.norm) string = 'sqrt(a**%s*%s)/pi'%(2*l+3, snorm) + string string += ' * exp(-a*r2)' return string def gauss_potential_to_string(l, m, numeric=False): """Return string representation of the potential of a generalized gaussian. The potential is determined by:: m m ^ _ m ^ v [g (r) Y (r) ](r) = v (r) Y (r) l l l l l l where:: 4 pi / -l-1 /r l+2 l /oo 1-l \ v (r) = ---- | r | dx x g (r) + r | dx x g (r) | l 2l+1 \ /0 l /r l / """ v_l = [[Q(4,1), 1], [Q(4,3), 1, 2], [Q(4,15), 3, 6, 4], [Q(4,105), 15, 30, 20, 8], [Q(4,945), 105, 210, 140, 56, 16], [Q(4,10395), 945, 1890, 1260, 504, 144, 32], ] norm, xyzs = Y_collect(l, m) norm.multiply(v_l[l][0]) string = txt_sqrt(norm.norm, numeric) + '*' + (l!=0)*'(' if numeric: string += repr(v_l[l][1] * sqrt(pi)) else: string += str(v_l[l][1]) + '*sqrt(pi)' string += '*erf(sqrt(a)*r)' if len(v_l[l]) > 2: string += '-(' for n, coeff in enumerate(v_l[l][2:]): if n == 0: string += str(coeff) else: string += '+' + str(coeff) + '*(sqrt(a)*r)**%d'%(2*n) string += ')*sqrt(a)*r*exp(-a*r2)' if l == 0: string += '/r' elif l == 1: string += ')/r/r2*' + to_string(l, xyzs) else: string += ')/r/r2**%d*'%l + to_string(l, xyzs) return string #----------------------------- TECHNICAL METHODS ----------------------------- def to_string(l, xyzs, deriv=False, multiply=False): """Return string representation of an xyz dictionary""" if xyzs == {}: return '0' out = '' for xyz, coef in xyzs.items(): x, y, z = xyz r = l - x - y - z - deriv + multiply one = abs(coef) != 1 or (x == 0 and y == 0 and z == 0 and r == 0) out += sign(coef) + str(abs(coef)) * one out += ('*x'*x + '*y'*y + '*z'*z + '*r2'*(r/2))[1 - one:] if out[0] == '+': out = out[1:] if len(xyzs) > 1: out = '(' + out + ')' return out def sign(x): """Return string representation of the sign of x""" if x >= 0: return '+' else: return '-' def txt_sqrt(norm, numeric=False): if numeric: return repr(sqrt(norm)) else: if sqrt(norm) % 1 == 0: return str(sqrt(norm)) else: return 'sqrt(' + str(norm.nom) + \ ('./' + str(norm.denom)) * (norm.denom != 1) + ')' class Normalization: """Determine normalization factor of spherical harmonic ______________ / / 2l+1 (l-m)! | / ---- * ------ , m != 0 | \/ 2 pi (l+m)! C = < _____ L | / 2l+1 | / ---- , m = 0 \ \/ 4 pi """ def __init__(self, l, m): m = abs(m) if m == 0: self.norm = Q(2 * l + 1, 4) else: self.norm = Q((2 * l + 1) * factorial(l - m), 2 * factorial(l + m)) def __str__(self): n = self.norm sn = sqrt(n) if int(sn) == sn: string = repr(sn) + '/sqrt(pi)' else: string = 'sqrt(' + repr(n.nom) + \ ('./' + repr(n.denom)) * (n.denom != 1) + '/pi)' return string def __repr__(self): return repr(self.__float__()) def __float__(self): return sqrt(self.norm / pi) def multiply(self, x): self.norm *= x**2 def tostring(self, numeric=False): if numeric: return self.__repr__() else: return self.__str__() def legendre(l, m): """Determine z dependence of spherical harmonic. Returns vector, where the p'th element is the coefficient of z^p r^(l-|m|-p). """ # Check if requested has already been calculated if (l, m) in Y_lp[0]: return Y_lp[0][(l, m)] m = abs(m) assert l >= 0 and 0 <= m <=l result = np.zeros(l - m + 1, 'O') if l == m == 0: """Use that 0 P (z) = 1 0 """ result[0] = Q(1) elif l == m: """Use the recursion relation m m-1 P (z) = (2m-1) P (z) m m-1 """ result[:] += (2 * m - 1) * legendre(l - 1, m - 1) elif l == m + 1: """Use the recursion relation l-1 l-1 P (z) = (2l-1)z P (z) l l-1 """ result[1:] += (2 * l - 1) * legendre(l-1, l-1) else: """Use the recursion relation m 2l-1 m l+m-1 2 m P (z)= ---- z P (z) - ----- r P (z) l l-m l-1 l-m l-2 """ result[1:] += np.multiply(legendre(l - 1, m), Q(2 * l - 1, l - m)) result[:(l - 2) - m + 1] -= np.multiply(legendre(l - 2, m), Q(l + m - 1, l - m)) # Store result in global dictionary Y_lp[0][(l, m)] = result return result def Phi(m): """Determine the x and y dependence of the spherical harmonics from |m| |m| / r sin (theta) cos(|m| phi), m >= 0 Phi (phi) = | m | |m| |m| \ r sin (theta) sin(|m| phi), m < 0 Returns dictionary of format {(i, j): c} where c is the coefficient of x^i y^j """ # Check if requested has already been calculated if m in Y_lp[1]: return Y_lp[1][m] if m == 0: xys = {(0, 0): 1} # use that Phi_0 = 1 elif m == 1: xys = {(1, 0): 1} # use that Phi_1 = x elif m == -1: xys = {(0, 1): 1} # use that Phi_-1 = y else: """Use the recurrence formula m > 0: Phi (x,y) = x Phi (x,y) - y Phi (x,y) |m| |m|-1 1-|m| m < 0: Phi (x,y) = y Phi (x,y) + x Phi (x,y) |m| |m|-1 1-|m| """ xys = {} phi1 = Phi(abs(m) - 1) phi2 = Phi(1 - abs(m)) for x, y in phi1: new = (x + (m > 0), y + (m < 0)) xys[new] = xys.get(new, 0) + phi1[(x, y)] for x,y in phi2: new = (x + (m < 0), y + (m > 0)) sign = 2 * (m < 0) - 1 xys[new] = xys.get(new, 0) + sign * phi2[(x, y)] # Store result in global dictionary Y_lp[1][m] = xys return xys def Y_collect(l, m): """Collect all necessary parts of spherical harmonic and return in simplified format. Return dictionary xyzs has format {(i, j, k): c} where c is the coefficient of x^i y^j z^k r^(l-|m|-k), or (since i+j = |m|) the coefficient of x^i y^j z^k r^(l-i-j-k), from which it is clear that all terms are of power l in x, y and z collectively. """ zs = legendre(l, m) xys = Phi(m) xyzs = {} for xy in xys: if xys[xy] != 0: for p in range(len(zs)): if zs[p] != 0: xyzs[xy + (p,)] = xys[xy] * zs[p] # get normalization constant and simplify norm = Normalization(l, m) norm.multiply(simplify(xyzs)) return norm, xyzs def Y_collect2(l, m): """Same as Y_collect, but collective power of x, y, and z are adjusted, such the it is always equal to l (thus avoiding multiplication by r) """ norm, p = Y_collect(l, m) done = False while not done: p2 = {} done = True for (nx, ny, nz), c in p.items(): n = nx + ny + nz if n < l: p2[(nx + 2, ny, nz)] = p2.get((nx + 2, ny, nz), 0) + c p2[(nx, ny + 2, nz)] = p2.get((nx, ny + 2, nz), 0) + c p2[(nx, ny, nz + 2)] = p2.get((nx, ny, nz + 2), 0) + c if n + 2 < l: done = False else: assert n == l p2[(nx, ny, nz)] = p2.get((nx, ny, nz), 0) + c p = p2 p2 = p.copy() for n, c in p.items(): if c == 0: del p2[n] return norm, p2 def dYdq(l, m, q): """Returns a normalization constant, and a dictionary describing the functional form of the derivative of r^l Y_l^m(x,y,z) with respect to x, y or z if q is either 0, 1 or 2 respectively. The format of the output dictionary is {(i, j, k): c}, where c is the coefficient of x^i y^j z^k r^(l-i-j-k-1). """ norm, xyzs = Y_collect(l, m) dxyzs = {} for xyz, coef in xyzs.items(): x, y, z = xyz r = l - x - y - z # chain rule: diff coordinate q only if xyz[q] != 0: dxyz = list(xyz) dxyz[q] -= 1 dxyz = tuple(dxyz) dxyzs[dxyz] = dxyzs.get(dxyz, 0) + xyz[q] * coef # chain rule: diff coordinate r only if r != 0: dxyz = list(xyz) dxyz[q] += 1 dxyz = tuple(dxyz) dxyzs[dxyz] = dxyzs.get(dxyz, 0) + r * coef # remove zeros from list for dxyz in dxyzs.keys(): if dxyzs[dxyz] == 0: dxyzs.pop(dxyz) # simplify if dxyzs != {}: norm.multiply(simplify(dxyzs)) return norm, dxyzs def simplify(xyzs): """Rescale coefficients to smallest integer value""" norm = Q(1) numxyz = np.array(xyzs.values()) # up-scale all 'xyz' coefficients to integers for xyz in numxyz: numxyz *= xyz.denom norm /= xyz.denom # determine least common divisor for 'xyz' coefficients dmax = 1 num_max = max(abs(np.floor(numxyz))) for d in range(2, num_max + 1): test = numxyz / d if np.all(test == np.floor(test)): dmax = d # Update simplified dictionary norm *= dmax for i, xyz in enumerate(xyzs): xyzs[xyz] = numxyz[i] / dmax return norm def q_times_xyzs(xyzs, q): """multiply xyz dictionary by x, y, or z according to q = 0, 1, or 2""" qxyzs = {} for xyz, c in xyzs.items(): qxyz = list(xyz) qxyz[q] += 1 qxyz = tuple(qxyz) qxyzs[qxyz] = c return qxyzs #--------------------- TEST AND CODE CONSTRUCTING METHODS --------------------- def orthogonal(L1, L2): """Perform the integral 2pi pi / / I = | |sin(theta) d(theta) d(phi) Y (theta, phi) * Y (theta, phi) / / L1 L2 0 0 which should be a kronecker delta in L1 and L2 """ I = 0.0 N = 40 for theta in np.arange(0, pi, pi / N): for phi in np.arange(0, 2 * pi, 2 * pi / N): x = np.cos(phi) * np.sin(theta) y = np.sin(phi) * np.sin(theta) z = np.cos(theta) r2 = x*x + y*y + z*z Y1 = eval(Y_to_string(*L_to_lm(L1))) Y2 = eval(Y_to_string(*L_to_lm(L2))) I += np.sin(theta) * Y1 * Y2 I *= 2 * (pi / N)**2 return I def check_orthogonality(Lmax=10): """Check orthogonality for all combinations of the first few harmonics""" all_passed = True for L1 in range(Lmax+1): for L2 in range(L1, Lmax+1): I = orthogonal(L1, L2) passed = abs(I - (L1 == L2)) < 3e-3 all_passed *= passed print('L1 = %s, L2 = %s, passed = %s, I = %s' %(L1, L2, passed, I)) if all_passed: print('All tests passed') else: print('Some tests failed') def symmetry1(lmax, display=True): """Make dictionary of format diff = {(l1, m1, q1): (nrel, l2, m2, q2)} indicating that m1 m2 d Y d Y l1 l2 ------ = nrel * ------ d q1 d q2 """ diff = {} # diff[(l1, m1, q1)] = (nrel, l2, m2, q2) unique_L = [] # unique_L[L] = (l, m, q, norm, dxyzs) for L in range((lmax + 1)**2): l, m = L_to_lm(L) for q in range(3): identical = False name = (l, m, 'xyz'[q]) norm, dxyzs = dYdq(l, m, q) for unique in unique_L: if dxyzs == unique[4]: diff[name] = (norm.eval() / unique[3],) + unique[0:3] identical = True break if identical == False: unique_L.append(name + (norm.eval(), dxyzs)) if display: for key, value in diff.items(): print(str(key) + ' = ' + str(value[0]) + ' * ' + str(value[1:])) else: return diff def symmetry2(l, display=True): """Make dictionary of format diff = {(l1, m1, q1): (nrel, l2, m2, q2)} indicating that m1 m2 d Y d Y l1 l2 ------ = nrel * ------ d q1 d q2 and m1 m2 q1 * Y = nrel * q2 * Y l1 l2 """ diff = {} # diff[(l1, m1, q1)] = (nrel, l2, m2, q2) unique_L = [] # unique_L[L] = (l, m, q, dnorm, dxyzs, qnorm, qxyzs) for m in range(-l, l+1): for q in range(3): identical = False name = (l, m, q) qnorm, xyzs = Y_collect(l, m) qxyzs = q_times_xyzs(xyzs, q) dnorm, dxyzs = dYdq(l, m, q) for unique in unique_L: if dxyzs == unique[4] and qxyzs == unique[6]: dnrel = dnorm.eval() / unique[3] qnrel = qnorm.eval() / unique[5] print(dnrel == qnrel) if dnrel == qnrel: diff[name] = (dnrel,) + unique[0:3] identical = True break if identical == False: unique_L.append(name + (dnorm.eval(), dxyzs, qnorm.eval(), qxyzs)) if display: for key, value in diff.items(): print(str(key) + ' = ' + str(value[0]) + ' * ' + str(value[1:])) else: return diff def construct_spherical_harmonic_c_function(file, lmax, funcname, multiply=None, deriv=None): """Construct a macro for evaluating values of spherical harmonics, or the derivative of any spherical harmonic with respect to some axis. The deriv keyword corresponds to that of the Y_to_string function.""" w = file.write indent = 0 def wn(string=''): w(2 * indent * ' ') w(string) w('\\\n') wn('#define %s(l, f, x, y, z, r2, p) (' % funcname) indent = 2 wn('{') wn(' switch(l)') wn(' {') switchindent = 3 indent += switchindent for l in range(lmax + 1): wn('case %d:' % l) indent += 1 for M, m in enumerate(range(-l, l + 1)): Ystr = Y_to_string(l, m, numeric=True, deriv=deriv) wn('p[%d] = f * %s;' % (M, Ystr)) wn('break;') indent -= 1 wn('default:') wn(' assert(0 == 1);') indent -= switchindent wn(' }') wn('}') indent = 0 wn(')') w('\n') def construct_spherical_harmonic_c_code(filename='spherical_harmonics.h', lmax=4): """Construct macros for evaluating spherical harmonics as well as their derivatives.""" file = open(filename, 'w') construct = construct_spherical_harmonic_c_function construct(file, lmax, 'spherical_harmonics') for c in range(3): construct(file, lmax, 'spherical_harmonics_derivative_%s' % 'xyz'[c], multiply=c, deriv=c) file.close() def construct_c_code(file='temp.c', lmax=3): """Method for generating the code in c/spline.c""" txt = '//Computer generated code! Hands off!' start_func = """ // inserts values of f(r) r^l Y_lm(theta, phi) in elements of input array 'a' void bmgs_radial3(const bmgsspline* spline, int m, const int n[3], const double C[3], const double h[3], const double* f, double* a) { int l = spline->l; if (l == 0) for (int q = 0; q < n[0] * n[1] * n[2]; q++) a[q] = 0.28209479177387814 * f[q]; """ start_deriv = """ // insert values of // d( f(r) * r^l Y_l^m ) d( r^l Y_l^m ) // --------------------- = g(r) q r^l Y_l^m + f(r) -------------- // dq dq // where q={x, y, z} and g(r) = 1/r*(df/dr) void bmgs_radiald3(const bmgsspline* spline, int m, int c, const int n[3], const double C[3], const double h[3], const double* f, const double* g, double* a) { int l = spline->l; """ start_case = """ { int q = 0; double x = C[0]; for (int i0 = 0; i0 < n[0]; i0++) { double y = C[1]; for (int i1 = 0; i1 < n[1]; i1++) { double z = C[2]; for (int i2 = 0; i2 < n[2]; i2++, q++) { """ end_case = """ z += h[2]; } y += h[1]; } x += h[0]; } } """ # insert code for evaluating the function txt += start_func for l in range(1, lmax + 1): txt += ' else if (l == %s)' %l txt += start_case case = '' for m in range(-l, l+1): if m == -l: case += ' ' * 18 + 'if (m == %s)\n' %m elif m == l: case += '\n' + ' ' * 18 +'else\n' else: case += '\n' + ' ' * 18 + 'else if (m == %s)\n' %m case += ' ' * 20 + 'a[q] = f[q] * ' case += Y_to_string(l,m, numeric=True) + ';' if 'r2' in case: txt += ' ' * 18 + 'double r2 = x*x+y*y+z*z;\n' txt += case txt += end_case txt += """ else assert(0 == 1); } """ # insert code for evaluating the derivative txt += start_deriv for q in range(3): txt += ' // ' + 'xyz'[q] + '\n' for l in range(0, lmax + 1): if l == 0 and q == 0: txt += ' if (c == 0 && l == 0)' else: txt += ' else if (c == %s && l == %s)' %(q, l) txt += start_case case = '' for m in range(-l, l+1): if m == -l: case += ' ' * 18 + 'if (m == %s)\n' %m elif m == l: case += '\n' + ' ' * 18 + 'else\n' else: case += '\n' + ' ' * 18 + 'else if (m == %s)\n' %m case += ' ' * 20 + 'a[q] = g[q] * ' case += Y_to_string(l, m, multiply=q, numeric=True) diff = Y_to_string(l, m, deriv=q, numeric=True) if diff != '0': case += ' + f[q] * ' + diff case += ';' if 'r2' in case: txt += ' ' * 18 + 'double r2 = x*x+y*y+z*z;\n' txt += case txt += end_case txt += """ else assert(0 == 1); } """ f = open(file, 'w') print(txt, file=f) f.close() def construct_gauss_code(lmax=2): """Method for generating the code in gpaw/utilities/gauss.py""" Lmax = (lmax + 1)**2 out= 'Y_L = [\n' for L in range(Lmax): l, m = L_to_lm(L) out+= ' \'' + Y_to_string(l, m, numeric=True) + '\',\n' out += ']' out += '\ngauss_L = [\n' for L in range(Lmax): l, m = L_to_lm(L) out += ' \'' + gauss_to_string(l, m, numeric=True) + '\',\n' out += ']' out += '\ngausspot_L = [\n' for L in range(Lmax): l, m = L_to_lm(L) out += ' \'' + gauss_potential_to_string(l, m, numeric=True) + '\',\n' out += ']' print(out) def construct_spherical_code(lmax=3): """Method for generating the code in gpaw/spherical_harmonics.py""" YL = [] norms = [] for L in range((lmax+1)**2): #norm, xyzs = Y_collect(*L_to_lm(L)) norm, xyzs = Y_collect2(*L_to_lm(L)) norms.append(str(norm)) YL.append(zip(xyzs.values(), xyzs.keys())) print('Y_L = [') for L, Y in enumerate(YL): l = sqrt(L) if l % 1 == 0: print(' #' + 'spdfghijklmn'[int(l)] + ':') print(' %s,' % Y) print(']') print('norms =', norms) if __name__ == '__main__': construct_spherical_harmonic_c_code() gpaw-24.1.0/c/bmgs/spherical_harmonics.h000066400000000000000000000152611454550013000200760ustar00rootroot00000000000000/* Copyright (C) 2003-2007 CAMP * Copyright (C) 2007-2009 CAMd * Please see the accompanying LICENSE file for further information. */ #define spherical_harmonics(l, f, x, y, z, r2, p) (\ {\ switch(l)\ {\ case 0:\ p[0] = f * 0.28209479177387814;\ break;\ case 1:\ p[0] = f * 0.48860251190291992 * y;\ p[1] = f * 0.48860251190291992 * z;\ p[2] = f * 0.48860251190291992 * x;\ break;\ case 2:\ p[0] = f * 1.0925484305920792 * x*y;\ p[1] = f * 1.0925484305920792 * y*z;\ p[2] = f * 0.31539156525252005 * (-r2+3*z*z);\ p[3] = f * 1.0925484305920792 * x*z;\ p[4] = f * 0.54627421529603959 * (-y*y+x*x);\ break;\ case 3:\ p[0] = f * 0.59004358992664352 * (-y*y*y+3*x*x*y);\ p[1] = f * 2.8906114426405538 * x*y*z;\ p[2] = f * 0.45704579946446577 * (5*y*z*z-y*r2);\ p[3] = f * 0.3731763325901154 * (-3*z*r2+5*z*z*z);\ p[4] = f * 0.45704579946446577 * (-x*r2+5*x*z*z);\ p[5] = f * 1.4453057213202769 * (-y*y*z+x*x*z);\ p[6] = f * 0.59004358992664352 * (x*x*x-3*x*y*y);\ break;\ case 4:\ p[0] = f * 2.5033429417967046 * (x*x*x*y-x*y*y*y);\ p[1] = f * 1.7701307697799307 * (3*x*x*y*z-y*y*y*z);\ p[2] = f * 0.94617469575756008 * (-x*y*r2+7*x*y*z*z);\ p[3] = f * 0.66904654355728921 * (-3*y*z*r2+7*y*z*z*z);\ p[4] = f * 0.10578554691520431 * (3*r2*r2-30*z*z*r2+35*z*z*z*z);\ p[5] = f * 0.66904654355728921 * (7*x*z*z*z-3*x*z*r2);\ p[6] = f * 0.47308734787878004 * (y*y*r2+7*x*x*z*z-x*x*r2-7*y*y*z*z);\ p[7] = f * 1.7701307697799307 * (x*x*x*z-3*x*y*y*z);\ p[8] = f * 0.62583573544917614 * (-6*x*x*y*y+x*x*x*x+y*y*y*y);\ break;\ default:\ assert(0 == 1);\ }\ }\ )\ #define spherical_harmonics_derivative_x(l, f, x, y, z, r2, p) (\ {\ switch(l)\ {\ case 0:\ p[0] = f * 0;\ break;\ case 1:\ p[0] = f * 0;\ p[1] = f * 0;\ p[2] = f * 0.48860251190291992;\ break;\ case 2:\ p[0] = f * 1.0925484305920792 * y;\ p[1] = f * 0;\ p[2] = f * 0.63078313050504009 * -x;\ p[3] = f * 1.0925484305920792 * z;\ p[4] = f * 1.0925484305920792 * x;\ break;\ case 3:\ p[0] = f * 3.5402615395598613 * x*y;\ p[1] = f * 2.8906114426405538 * y*z;\ p[2] = f * 0.91409159892893155 * -x*y;\ p[3] = f * 2.2390579955406924 * -x*z;\ p[4] = f * 0.45704579946446577 * (-r2-2*x*x+5*z*z);\ p[5] = f * 2.8906114426405538 * x*z;\ p[6] = f * 1.7701307697799307 * (-y*y+x*x);\ break;\ case 4:\ p[0] = f * 2.5033429417967046 * (-y*y*y+3*x*x*y);\ p[1] = f * 10.620784618679583 * x*y*z;\ p[2] = f * 0.94617469575756008 * (7*y*z*z-y*r2-2*x*x*y);\ p[3] = f * 4.0142792613437353 * -x*y*z;\ p[4] = f * 1.2694265629824517 * (x*r2-5*x*z*z);\ p[5] = f * 0.66904654355728921 * (-3*z*r2-6*x*x*z+7*z*z*z);\ p[6] = f * 0.94617469575756008 * (-x*r2-x*x*x+x*y*y+7*x*z*z);\ p[7] = f * 5.3103923093397913 * (-y*y*z+x*x*z);\ p[8] = f * 2.5033429417967046 * (-3*x*y*y+x*x*x);\ break;\ default:\ assert(0 == 1);\ }\ }\ )\ #define spherical_harmonics_derivative_y(l, f, x, y, z, r2, p) (\ {\ switch(l)\ {\ case 0:\ p[0] = f * 0;\ break;\ case 1:\ p[0] = f * 0.48860251190291992;\ p[1] = f * 0;\ p[2] = f * 0;\ break;\ case 2:\ p[0] = f * 1.0925484305920792 * x;\ p[1] = f * 1.0925484305920792 * z;\ p[2] = f * 0.63078313050504009 * -y;\ p[3] = f * 0;\ p[4] = f * 1.0925484305920792 * -y;\ break;\ case 3:\ p[0] = f * 1.7701307697799307 * (-y*y+x*x);\ p[1] = f * 2.8906114426405538 * x*z;\ p[2] = f * 0.45704579946446577 * (-2*y*y-r2+5*z*z);\ p[3] = f * 2.2390579955406924 * -y*z;\ p[4] = f * 0.91409159892893155 * -x*y;\ p[5] = f * 2.8906114426405538 * -y*z;\ p[6] = f * 3.5402615395598613 * -x*y;\ break;\ case 4:\ p[0] = f * 2.5033429417967046 * (x*x*x-3*x*y*y);\ p[1] = f * 5.3103923093397913 * (-y*y*z+x*x*z);\ p[2] = f * 0.94617469575756008 * (-x*r2-2*x*y*y+7*x*z*z);\ p[3] = f * 0.66904654355728921 * (-6*y*y*z-3*z*r2+7*z*z*z);\ p[4] = f * 1.2694265629824517 * (-5*y*z*z+y*r2);\ p[5] = f * 4.0142792613437353 * -x*y*z;\ p[6] = f * 0.94617469575756008 * (y*y*y-7*y*z*z+y*r2-x*x*y);\ p[7] = f * 10.620784618679583 * -x*y*z;\ p[8] = f * 2.5033429417967046 * (y*y*y-3*x*x*y);\ break;\ default:\ assert(0 == 1);\ }\ }\ )\ #define spherical_harmonics_derivative_z(l, f, x, y, z, r2, p) (\ {\ switch(l)\ {\ case 0:\ p[0] = f * 0;\ break;\ case 1:\ p[0] = f * 0;\ p[1] = f * 0.48860251190291992;\ p[2] = f * 0;\ break;\ case 2:\ p[0] = f * 0;\ p[1] = f * 1.0925484305920792 * y;\ p[2] = f * 1.2615662610100802 * z;\ p[3] = f * 1.0925484305920792 * x;\ p[4] = f * 0;\ break;\ case 3:\ p[0] = f * 0;\ p[1] = f * 2.8906114426405538 * x*y;\ p[2] = f * 3.6563663957157262 * y*z;\ p[3] = f * 1.1195289977703462 * (-r2+3*z*z);\ p[4] = f * 3.6563663957157262 * x*z;\ p[5] = f * 1.4453057213202769 * (-y*y+x*x);\ p[6] = f * 0;\ break;\ case 4:\ p[0] = f * 0;\ p[1] = f * 1.7701307697799307 * (-y*y*y+3*x*x*y);\ p[2] = f * 11.354096349090721 * x*y*z;\ p[3] = f * 2.0071396306718676 * (5*y*z*z-y*r2);\ p[4] = f * 1.6925687506432689 * (-3*z*r2+5*z*z*z);\ p[5] = f * 2.0071396306718676 * (-x*r2+5*x*z*z);\ p[6] = f * 5.6770481745453605 * (-y*y*z+x*x*z);\ p[7] = f * 1.7701307697799307 * (x*x*x-3*x*y*y);\ p[8] = f * 0;\ break;\ default:\ assert(0 == 1);\ }\ }\ )\ gpaw-24.1.0/c/bmgs/spline.c000066400000000000000000000040621454550013000153430ustar00rootroot00000000000000/* Copyright (C) 2003-2007 CAMP * Copyright (C) 2007-2008 CAMd * Please see the accompanying LICENSE file for further information. */ #include #include #include #include "bmgs.h" bmgsspline bmgs_spline(int l, double dr, int nbins, double* f) { double c = 3.0 / (dr * dr); double* f2 = (double*)malloc((nbins + 1) * sizeof(double)); assert(f2 != NULL); double* u = (double*)malloc(nbins * sizeof(double)); assert(u != NULL); f2[0] = -0.5; u[0] = (f[1] - f[0]) * c; for (int b = 1; b < nbins; b++) { double p = 0.5 * f2[b - 1] + 2.0; f2[b] = -0.5 / p; u[b] = ((f[b + 1] - 2.0 * f[b] + f[b - 1]) * c - 0.5 * u[b - 1]) / p; } f2[nbins] = ((f[nbins - 1] * c - 0.5 * u[nbins - 1]) / (0.5 * f2[nbins - 1] + 1.0)); for (int b = nbins - 1; b >= 0; b--) f2[b] = f2[b] * f2[b + 1] + u[b]; double* data = (double*)malloc(4 * (nbins + 1) * sizeof(double)); assert(data != NULL); bmgsspline spline = {l, dr, nbins, data}; for (int b = 0; b < nbins; b++) { *data++ = f[b]; *data++ = (f[b + 1] - f[b]) / dr - (f2[b] / 3 + f2[b + 1] / 6) * dr; *data++ = 0.5 * f2[b]; *data++ = (f2[b + 1] - f2[b]) / (6 * dr); } data[0] = 0.0; data[1] = 0.0; data[2] = 0.0; data[3] = 0.0; free(u); free(f2); return spline; } double bmgs_splinevalue(const bmgsspline* spline, double r) { int b = r / spline->dr; if (b >= spline->nbins) return 0.0; double u = r - b * spline->dr; double* s = spline->data + 4 * b; return s[0] + u * (s[1] + u * (s[2] + u * s[3])); } void bmgs_get_value_and_derivative(const bmgsspline* spline, double r, double *f, double *dfdr) { int b = r / spline->dr; if (b >= spline->nbins) { *f = 0.0; *dfdr = 0.0; return; } double u = r - b * spline->dr; double* s = spline->data + 4 * b; *f = s[0] + u * (s[1] + u * (s[2] + u * s[3])); *dfdr = s[1] + u * (2.0 * s[2] + u * 3.0 * s[3]); } void bmgs_deletespline(bmgsspline* spline) { free(spline->data); } gpaw-24.1.0/c/bmgs/stencils.c000066400000000000000000000115551454550013000157020ustar00rootroot00000000000000/* Copyright (C) 2003-2007 CAMP * Copyright (C) 2005 CSC - IT Center for Science Ltd. * Please see the accompanying LICENSE file for further information. */ #include #include "bmgs.h" // Expansion coefficients for finite difference Laplacian. The numbers are // from J. R. Chelikowsky et al., Phys. Rev. B 50, 11355 (1994): bmgsstencil bmgs_stencil(int ncoefs, const double* coefs, const long* offsets, int r, const long n[3]) { bmgsstencil stencil = {ncoefs, (double*)malloc(ncoefs * sizeof(double)), (long*)malloc(ncoefs * sizeof(long)), {n[0], n[1], n[2]}, {2 * r * (n[2] + 2 * r) * (n[1] + 2 * r), 2 * r * (n[2] + 2 * r), 2 * r}}; assert((stencil.coefs != NULL) && (stencil.offsets != NULL)); memcpy(stencil.coefs, coefs, ncoefs * sizeof(double)); memcpy(stencil.offsets, offsets, ncoefs * sizeof(long)); return stencil; } static const double laplace[4][5] = {{-2.0, 1.0, 0.0, 0.0, 0.0}, {-5.0/2.0, 4.0/3.0, -1.0/12.0, 0.0, 0.0}, {-49.0/18.0, 3.0/2.0, -3.0/20.0, 1.0/90.0, 0.0}, {-205.0/72.0, 8.0/5.0, -1.0/5.0, 8.0/315.0, -1.0/560.0}}; bmgsstencil bmgs_laplace(int k, double scale, const double h[3], const long n[3]) { int ncoefs = 3 * k - 2; double* coefs = (double*)malloc(ncoefs * sizeof(double)); long* offsets = (long*)malloc(ncoefs * sizeof(long)); assert((coefs != NULL) && (offsets != NULL)); double f1 = 1.0 / (h[0] * h[0]); double f2 = 1.0 / (h[1] * h[1]); double f3 = 1.0 / (h[2] * h[2]); int r = (k - 1) / 2; // range double s[3] = {(n[2] + 2 * r) * (n[1] + 2 * r), n[2] + 2 * r, 1}; int m = 0; for (int j = 1; j <= r; j++) { double c = scale * laplace[r - 1][j]; coefs[m] = c * f1; offsets[m++] = -j * s[0]; coefs[m] = c * f1; offsets[m++] = +j * s[0]; coefs[m] = c * f2; offsets[m++] = -j * s[1]; coefs[m] = c * f2; offsets[m++] = +j * s[1]; coefs[m] = c * f3; offsets[m++] = -j; coefs[m] = c * f3; offsets[m++] = +j; } double c = scale * laplace[r - 1][0]; coefs[m] = c * (f1 + f2 + f3); offsets[m] = 0; bmgsstencil stencil = {ncoefs, coefs, offsets, {n[0], n[1], n[2]}, {2 * r * (n[2] + 2 * r) * (n[1] + 2 * r), 2 * r * (n[2] + 2 * r), 2 * r}}; return stencil; } bmgsstencil bmgs_mslaplaceA(double scale, const double h[3], const long n[3]) { int ncoefs = 19; double* coefs = (double*)malloc(ncoefs * sizeof(double)); long* offsets = (long*)malloc(ncoefs * sizeof(long)); assert((coefs != NULL) && (offsets != NULL)); double e[3] = {-scale / (12.0 * h[0] * h[0]), -scale / (12.0 * h[1] * h[1]), -scale / (12.0 * h[2] * h[2])}; double f = -16.0 * (e[0] + e[1] + e[2]); double g[3] = {10.0 * e[0] + 0.125 * f, 10.0 * e[1] + 0.125 * f, 10.0 * e[2] + 0.125 * f}; double s[3] = {(n[2] + 2) * (n[1] + 2), n[2] + 2, 1}; int m = 0; coefs[m] = f; offsets[m++] = 0; for (int j = -1; j <= 1; j += 2) { coefs[m] = g[0]; offsets[m++] = j * s[0]; coefs[m] = g[1]; offsets[m++] = j * s[1]; coefs[m] = g[2]; offsets[m++] = j * s[2]; } for (int j = -1; j <= 1; j += 2) for (int k = -1; j <= 1; j += 2) { coefs[m] = e[1] + e[2]; offsets[m++] = -j * s[1] - k * s[2]; coefs[m] = e[0] + e[2]; offsets[m++] = -j * s[0] - k * s[2]; coefs[m] = e[0] + e[1]; offsets[m++] = -j * s[0] - k * s[1]; } bmgsstencil stencil = {ncoefs, coefs, offsets, {n[0], n[1], n[2]}, {2 * s[0], 2 * s[1], 2}}; return stencil; } bmgsstencil bmgs_mslaplaceB(const long n[3]) { int ncoefs = 7; double* coefs = (double*)malloc(ncoefs * sizeof(double)); long* offsets = (long*)malloc(ncoefs * sizeof(long)); assert((coefs != NULL) && (offsets != NULL)); double s[3] = {(n[2] + 2) * (n[1] + 2), n[2] + 2, 1}; int k = 0; coefs[k] = 0.5; offsets[k++] = 0; for (int j = -1; j <= 1; j += 2) { coefs[k] = 1.0 / 12.0; offsets[k++] = j * s[0]; coefs[k] = 1.0 / 12.0; offsets[k++] = j * s[1]; coefs[k] = 1.0 / 12.0; offsets[k++] = j * s[2]; } bmgsstencil stencil = {ncoefs, coefs, offsets, {n[0], n[1], n[2]}, {2 * s[0], 2 * s[1], 2}}; return stencil; } bmgsstencil bmgs_gradient(int k, int i, double h, const long n[3]) { int ncoefs = k - 1; double* coefs = (double*)malloc(ncoefs * sizeof(double)); long* offsets = (long*)malloc(ncoefs * sizeof(long)); assert((coefs != NULL) && (offsets != NULL)); int r = 1; double s[3] = {(n[2] + 2 * r) * (n[1] + 2 * r), n[2] + 2 * r, 1}; double c = 0.5 / h; coefs[0] = +c; offsets[0] = +s[i]; coefs[1] = -c; offsets[1] = -s[i]; bmgsstencil stencil = {ncoefs, coefs, offsets, {n[0], n[1], n[2]}, {2 * r * s[0], 2 * r * s[1], 2 * r}}; return stencil; } void bmgs_deletestencil(bmgsstencil* stencil) { free(stencil->coefs); free(stencil->offsets); } gpaw-24.1.0/c/bmgs/tools.py000066400000000000000000000075141454550013000154240ustar00rootroot00000000000000def factorial(x): """Return x!, where x is a non-negative integer""" if x < 2: return 1 else: return x * factorial(x - 1) def gcd(a, b): """Return greatest common divisor of a and b. Uses the euclidian algorithm """ if b == 0: return a else: return gcd(b, a % b) class Rational: """Class used to represent rational numbers as fractions, such that no precision is lost during calculation operations. Example usage with Numeric: import numpy as np from tools import Rational as Q n = np.zeros(4, 'O') array([0 , 0 , 0 , 0 ],'O') n[2:4] = [Q(35,12), Q(36,12)] array([0 , 0 , 35./12 , 3 ],'O') 24 * n array([0 , 0 , 70 , 72 ],'O') np.multiply(n, Q(3,9)) array([0 , 0 , 35./36 , 1 ],'O') """ def __init__(self, nom=0, denom=1): ## assert type(nom) == type(denom) == int # ensure that sign is in the nominator nom = cmp(denom, 0) * nom denom = abs(denom) # reduce fraction q = gcd(nom, denom) self.nom = nom / q self.denom = denom / q def __add__(self, x): if type(x) == float: return float(self) + x elif type(x) == int: x = Rational(x) nom = self.nom * x.denom + x.nom * self.denom denom = self.denom * x.denom return Rational(nom, denom) def __radd__(self, x): return self.__add__(x) def __mul__(self, x): if type(x) == float: return float(self) * x elif type(x) == int: x = Rational(x) return Rational(self.nom * x.nom, self.denom * x.denom) def __rmul__(self, x): return self.__mul__(x) def __neg__(self): return Rational(-self.nom, self.denom) def __pos__(self): return self.copy() def __sub__(self, x): return self.__add__(-x) def __rsub__(self, x): return -self.__sub__(x) def __div__(self, x): if type(x) == float: return float(self) / x elif type(x) == int: x = Rational(x) return self.__mul__(Rational(x.denom, x.nom)) def __rdiv__(self, x): if type(x) == float: return x / float(self) elif type(x) == int: x = Rational(x) return x.__mul__(Rational(self.denom, self.nom)) def __pow__(self, p): if p == 0: return Rational(1) if p >= 0 and type(p) == int: return Rational(self.nom**p, self.denom**p) else: return float(self)**p def __mod__(self, x): if type(x) == float: return float(self) % x return Rational(self.nom % (x * self.denom), self.denom) def __rmod__(self, x): if type(x) == int: x = Rational(x) i = self.__int__() return x.__mod__(i) def __abs__(self): return Rational(abs(self.nom), self.denom) def __nonzero__(self): return self.nom.__nonzero__() def __cmp__(self, x): return cmp(float(self), float(x)) def __str__(self): out = str(self.nom) if self.denom != 1: out += './' + str(self.denom) return out def __int__(self): assert self.denom == 1 return self.nom def __float__(self): return float(self.nom) / self.denom def __repr__(self): out = repr(self.nom) if self.denom != 1: out += './' + repr(self.denom) return out def __copy__(self): return Rational(self.nom, self.denom) def floor(self): return int(float(self)) def sqrt(self): return self**.5 def abs(self): return Rational(abs(self.nom), self.denom) def copy(self): return Rational(self.nom, self.denom) gpaw-24.1.0/c/bmgs/translate.c000066400000000000000000000026501454550013000160470ustar00rootroot00000000000000/* Copyright (C) 2003-2007 CAMP * Please see the accompanying LICENSE file for further information. */ #include #include "bmgs.h" void bmgs_translate(double* a, const int sizea[3], const int size[3], const int start1[3], const int start2[3]) { const double* restrict s = a + start1[2] + (start1[1] + start1[0] * sizea[1]) * sizea[2]; double* restrict d = a + start2[2] + (start2[1] + start2[0] * sizea[1]) * sizea[2]; for (int i0 = 0; i0 < size[0]; i0++) { for (int i1 = 0; i1 < size[1]; i1++) { memcpy(d, s, size[2] * sizeof(double)); s += sizea[2]; d += sizea[2]; } s += sizea[2] * (sizea[1] - size[1]); d += sizea[2] * (sizea[1] - size[1]); } } void bmgs_translatemz(double_complex* a, const int sizea[3], const int size[3], const int start1[3], const int start2[3], double_complex phase) { const double_complex* restrict s = a + start1[2] + (start1[1] + start1[0] * sizea[1]) * sizea[2]; double_complex* restrict d = a + start2[2] + (start2[1] + start2[0] * sizea[1]) * sizea[2]; for (int i0 = 0; i0 < size[0]; i0++) { for (int i1 = 0; i1 < size[1]; i1++) { for (int i2 = 0; i2 < size[2]; i2++) d[i2] = phase * s[i2]; s += sizea[2]; d += sizea[2]; } s += sizea[2] * (sizea[1] - size[1]); d += sizea[2] * (sizea[1] - size[1]); } } gpaw-24.1.0/c/bmgs/wfd.c000066400000000000000000000025671454550013000146410ustar00rootroot00000000000000/* This file (wfd.c) is a modified copy of fd.c * with added support for nonlocal operator weights. * The original copyright note of fd.c follows: * Copyright (C) 2003-2007 CAMP * Please see the accompanying LICENSE file for further information. */ #include "../extensions.h" #include "bmgs.h" void Z(bmgs_wfd)(int nweights, const bmgsstencil* stencils, const double** weights, const T* a, T* b) { a += (stencils[0].j[0] + stencils[0].j[1] + stencils[0].j[2]) / 2; const int n0 = stencils[0].n[0]; const int n1 = stencils[0].n[1]; const int n2 = stencils[0].n[2]; const int j1 = stencils[0].j[1]; const int j2 = stencils[0].j[2]; for (int i0 = 0; i0 < n0; i0++) { const T* aa = a + i0 * (j1 + n1 * (j2 + n2)); T* bb = b + i0 * n1 * n2; for (int i1 = 0; i1 < n1; i1++) { for (int i2 = 0; i2 < n2; i2++) { T x = 0.0; for (int iw = 0; iw < nweights; iw++) { const bmgsstencil* s = &(stencils[iw]); T tmp = 0.0; for (int c = 0; c < s->ncoefs; c++) tmp += aa[s->offsets[c]] * s->coefs[c]; tmp *= weights[iw][0]; x += tmp; weights[iw]++; } *bb++ = x; aa++; } aa += j2; } } } gpaw-24.1.0/c/bmgs/wrelax.c000066400000000000000000000055351454550013000153610ustar00rootroot00000000000000/* This file (wrelax.c) is a modified copy of relax.c * with added support for nonlocal operator weights. * The original copyright note of relax.c follows: * Copyright (C) 2003-2007 CAMP * Copyright (C) 2005 CSC - IT Center for Science Ltd. * Please see the accompanying LICENSE file for further information. */ #include "bmgs.h" void bmgs_wrelax(const int relax_method, const int nweights, const bmgsstencil* stencils, const double** weights, double* a, double* b, const double* src, const double w) { const int n0 = stencils[0].n[0]; const int n1 = stencils[0].n[1]; const int n2 = stencils[0].n[2]; const int j0 = stencils[0].j[0]; const int j1 = stencils[0].j[1]; const int j2 = stencils[0].j[2]; a += (j0 + j1 + j2) / 2; if (relax_method == 1) { /* Weighted Gauss-Seidel relaxation for the equation "operator" b = src a contains the temporary array holding also the boundary values. */ for (int i0 = 0; i0 < n0; i0++) { for (int i1 = 0; i1 < n1; i1++) { for (int i2 = 0; i2 < n2; i2++) { double x = 0.0; double coef = 0.0; for (int iw = 0; iw < nweights; iw++) { double weight = weights[iw][0]; double tmp = 0.0; const bmgsstencil* s = &(stencils[iw]); for (int c = 1; c < s->ncoefs; c++) tmp += a[s->offsets[c]] * s->coefs[c]; tmp *= weight; x += tmp; coef += weight * s->coefs[0]; weights[iw]++; } x = (*src - x) / coef; *b++ = x; *a++ = x; src++; } a += j2; } a += j1; } } else { /* Weighted Jacobi relaxation for the equation "operator" b = src a contains the temporariry array holding also the boundary values. */ double temp; for (int i0 = 0; i0 < n0; i0++) { for (int i1 = 0; i1 < n1; i1++) { for (int i2 = 0; i2 < n2; i2++) { double x = 0.0; double coef = 0.0; for (int iw = 0; iw < nweights; iw++) { double weight = weights[iw][0]; double tmp = 0.0; const bmgsstencil* s = &(stencils[iw]); for (int c = 1; c < s->ncoefs; c++) tmp += a[s->offsets[c]] * s->coefs[c]; tmp *= weight; x += tmp; coef += weight * s->coefs[0]; weights[iw]++; } temp = (1.0 - w) * *b + w * (*src - x) / coef; *b++ = temp; a++; src++; } a += j2; } a += j1; } } } gpaw-24.1.0/c/bmgs/zero.c000066400000000000000000000006761454550013000150370ustar00rootroot00000000000000/* Copyright (C) 2003-2007 CAMP * Please see the accompanying LICENSE file for further information. */ #include #include "bmgs.h" void Z(bmgs_zero)(T* a, const int n[3], const int c[3], const int s[3]) { a += c[2] + (c[1] + c[0] * n[1]) * n[2]; for (int i0 = 0; i0 < s[0]; i0++) { for (int i1 = 0; i1 < s[1]; i1++) { memset(a, 0, s[2] * sizeof(T)); a += n[2]; } a += n[2] * (n[1] - s[1]); } } gpaw-24.1.0/c/constraints.c000066400000000000000000000430741454550013000154760ustar00rootroot00000000000000#include #define PY_ARRAY_UNIQUE_SYMBOL GPAW_ARRAY_API #define NO_IMPORT_ARRAY #include #include "extensions.h" #define VELOCITY_VERLET_ADJUST_POSITION_TOL 1e-13 #define VELOCITY_VERLET_ADJUST_VELOCITY_TOL 1e-13 #define COULOMB_CONSTANT 14.399645351950548 // #define FF_DEBUG_M 1 #ifdef __clang__ #define INLINE static #else #define INLINE static inline #endif // Index notation // v is coordinate (x,y or z) // x is distance index (1-2, 2-3, or 3-1) // n is water index // i is atom index (O, H1 or H2) INLINE void vec3_sub(double* target_v, double* a_v, double* b_v) { target_v[0] = a_v[0] - b_v[0]; target_v[1] = a_v[1] - b_v[1]; target_v[2] = a_v[2] - b_v[2]; } INLINE void vec3_submin(double* target_v, double* a_v, double* b_v, unsigned char* pbc, double* celldiag) { for (unsigned int v=0; v<3; v++) if (pbc[v]) { double s = a_v[v] - b_v[v]; s -= round(s / celldiag[v])*celldiag[v]; target_v[v] = s; } else { target_v[v] = a_v[v] - b_v[v]; } } INLINE void vec9_diffs(double* target_xv, double* R_iv) { // R1-R2 target_xv[0] = R_iv[0*3 + 0] - R_iv[1*3 + 0]; // dOH1_x target_xv[1] = R_iv[0*3 + 1] - R_iv[1*3 + 1]; // dOH1_y target_xv[2] = R_iv[0*3 + 2] - R_iv[1*3 + 2]; // dOH1_z // R2-R3 target_xv[3] = R_iv[1*3 + 0] - R_iv[2*3 + 0]; // dOH2_x target_xv[4] = R_iv[1*3 + 1] - R_iv[2*3 + 1]; // dOH2_y target_xv[5] = R_iv[1*3 + 2] - R_iv[2*3 + 2]; // dOH2_z // R3-R1 target_xv[6] = R_iv[2*3 + 0] - R_iv[0*3 + 0]; // dH1H2_x target_xv[7] = R_iv[2*3 + 1] - R_iv[0*3 + 1]; // dH1H2_y target_xv[8] = R_iv[2*3 + 2] - R_iv[0*3 + 2]; // dH1H2_z } INLINE void vec9_massdiffs(double* target_xv, double* im_i, double* P_iv) { // R1-R2 target_xv[0] = im_i[0] * P_iv[0*3 + 0] - im_i[1] * P_iv[1*3 + 0]; // dOH1_x target_xv[1] = im_i[0] * P_iv[0*3 + 1] - im_i[1] * P_iv[1*3 + 1]; // dOH1_y target_xv[2] = im_i[0] * P_iv[0*3 + 2] - im_i[1] * P_iv[1*3 + 2]; // dOH1_z // R2-R3 target_xv[3] = im_i[1] * P_iv[1*3 + 0] - im_i[2] * P_iv[2*3 + 0]; // dOH2_x target_xv[4] = im_i[1] * P_iv[1*3 + 1] - im_i[2] * P_iv[2*3 + 1]; // dOH2_y target_xv[5] = im_i[1] * P_iv[1*3 + 2] - im_i[2] * P_iv[2*3 + 2]; // dOH2_z // R3-R1 target_xv[6] = im_i[2] * P_iv[2*3 + 0] - im_i[0] * P_iv[0*3 + 0]; // dH1H2_x target_xv[7] = im_i[2] * P_iv[2*3 + 1] - im_i[0] * P_iv[0*3 + 1]; // dH1H2_y target_xv[8] = im_i[2] * P_iv[2*3 + 2] - im_i[0] * P_iv[0*3 + 2]; // dH1H2_z } INLINE void vec9_dot(double* lambda_x, double* newd_xv, double* d_xv) { lambda_x[0] = newd_xv[0*3 + 0] * d_xv[0*3 + 0] + newd_xv[0*3 + 1] * d_xv[0*3 + 1] + newd_xv[0*3 + 2] * d_xv[0*3 + 2]; lambda_x[1] = newd_xv[1*3 + 0] * d_xv[1*3 + 0] + newd_xv[1*3 + 1] * d_xv[1*3 + 1] + newd_xv[1*3 + 2] * d_xv[1*3 + 2]; lambda_x[2] = newd_xv[2*3 + 0] * d_xv[2*3 + 0] + newd_xv[2*3 + 1] * d_xv[2*3 + 1] + newd_xv[2*3 + 2] * d_xv[2*3 + 2]; } INLINE void vec3_imul(double* target_x, double* a_x) { target_x[0]*= a_x[0]; target_x[1]*= a_x[1]; target_x[2]*= a_x[2]; } INLINE void vec3_div(double* target_x, double* a_x, double* b_x) { target_x[0]= a_x[0] / b_x[0]; target_x[1]= a_x[1] / b_x[1]; target_x[2]= a_x[2] / b_x[2]; } INLINE void vec3_axpy(double* target_v, double coeff, double* a_v) { target_v[0] += coeff * a_v[0]; target_v[1] += coeff * a_v[1]; target_v[2] += coeff * a_v[2]; } INLINE double sqr(double a) { return a*a; } INLINE void vec9_sqrsum(double* target_x, double* R_xv) { target_x[0] = sqr(R_xv[0*3 + 0]) + sqr(R_xv[0*3 + 1]) + sqr(R_xv[0*3 + 2]); target_x[1] = sqr(R_xv[1*3 + 0]) + sqr(R_xv[1*3 + 1]) + sqr(R_xv[1*3 + 2]); target_x[2] = sqr(R_xv[2*3 + 0]) + sqr(R_xv[2*3 + 1]) + sqr(R_xv[2*3 + 2]); } INLINE double vec3_sqrsum(double* R_v) { return sqr(R_v[0]) + sqr(R_v[1]) + sqr(R_v[2]); } void vec9_print(char* title, double* target_iv) { printf("%s:\n", title); printf("%20.15f %20.15f %20.15f\n", target_iv[0], target_iv[1], target_iv[2]); printf("%20.15f %20.15f %20.15f\n", target_iv[3], target_iv[4], target_iv[5]); printf("%20.15f %20.15f %20.15f\n", target_iv[6], target_iv[7], target_iv[8]); } void vec3_print(char* title, double* target_v) { printf("%s:\n", title); printf("%20.15f %20.15f %20.15f\n", target_v[0], target_v[1], target_v[2]); } INLINE double coulomb(double Za, double Zb, double* Ra_v, double* Rb_v, double* Fa_v, double* Fb_v, unsigned char* pbc, double* celldiag) { double d_v[3]; vec3_submin(d_v, Ra_v, Rb_v, pbc, celldiag); double r2 = vec3_sqrsum(d_v); double r = sqrt(r2); double E = COULOMB_CONSTANT * Za * Zb / r; double str = E / r2; vec3_axpy(Fa_v, str, d_v); vec3_axpy(Fb_v, -str, d_v); return E; } INLINE double coulomb_cutoff(double Za, double Zb, double t, double* Ra_v, double* Rb_v, double* Fa_v, double* Fb_v, unsigned char* pbc, double* celldiag) { double d_v[3]; vec3_submin(d_v, Ra_v, Rb_v, pbc, celldiag); double r2 = vec3_sqrsum(d_v); double r = sqrt(r2); double E = COULOMB_CONSTANT * Za * Zb / r; double str = E / r2 * t; vec3_axpy(Fa_v, str, d_v); vec3_axpy(Fb_v, -str, d_v); return E; // The energy is not correct energy, it is multiplied later with t } INLINE double LJ(double A, double B, double* Ra_v, double* Rb_v, double* Fa_v, double* Fb_v, unsigned char* pbc, double* celldiag) { double d_v[3]; vec3_submin(d_v, Ra_v, Rb_v, pbc, celldiag); double r2 = vec3_sqrsum(d_v); double r4 = r2*r2; double r6 = r4*r2; double r8 = r4*r4; double r12 = r8*r4; double r14 = r12*r2; double str = 12*A/r14+6*B/r8; vec3_axpy(Fa_v, str, d_v); vec3_axpy(Fb_v, -str, d_v); return A / r12 + B/r6; } INLINE double pair_interaction_cutoff(double A, double B, double cutoff, double width, double* Z_i, double* Ra_iv, double* Rb_iv, double* Fa_iv, double* Fb_iv, unsigned char* pbc, double* celldiag) { double d_v[3]; vec3_submin(d_v, Ra_iv + 0*3, Rb_iv + 0*3, pbc, celldiag); double r2 = vec3_sqrsum(d_v); double t; double dtdr; double r; if (r2 > (cutoff*cutoff)) { return 0.0; } if (r2 < (cutoff-width)*(cutoff-width)) { t = 1; dtdr = 0; r = 1; // XXX Dummy value, this is not used because dtdr = 0 } else { r = sqrt(r2); double y = (r - cutoff + width) / width; t = 1.0-y*y * (3.0 - 2.0 * y); dtdr = -6.0 / width * y * (1.0 - y); } double r4 = r2*r2; double r6 = r4*r2; double r8 = r4*r4; double r12 = r8*r4; double r14 = r12*r2; double E = (A / r12 + B/r6); double str = (12*A/r14+6*B/r8)* t - E*dtdr / r; E *= t; vec3_axpy(Fa_iv + 0*3, str, d_v); vec3_axpy(Fb_iv + 0*3, -str, d_v); for (unsigned int i1 = 0; i1 < 3; i1++) for (unsigned int i2 = 0; i2 < 3; i2++) { double Ept = coulomb_cutoff(Z_i[i1], Z_i[i2], t, Ra_iv+i1*3, Rb_iv+i2*3, Fa_iv+i1*3, Fb_iv+i2*3, pbc, celldiag); E+= Ept * t; double str = Ept / r * dtdr; vec3_axpy(Fa_iv + 0*3, -str, d_v); // Note that these are added to O, even for H-H interaction. vec3_axpy(Fb_iv + 0*3, str, d_v); } return E; } PyObject* adjust_positions(PyObject *self, PyObject *args) { PyArrayObject* arraylen_x = 0; // Input: the 3 constraint lengths PyArrayObject* arraymass_i = 0; // Input: the 3 masses PyArrayObject* arrayR_niv = 0; // Output: Adjusted positions will be written here. PyArrayObject* arraynewR_niv = 0; // Input: gives the positions to be adjusted. if (!PyArg_ParseTuple(args, "OOOO", &arraylen_x, &arraymass_i, &arrayR_niv, &arraynewR_niv)) { return NULL; } unsigned int NA = PyArray_DIM(arrayR_niv, 0); if (NA % 3 != 0) { PyErr_SetString(PyExc_TypeError, "Number of atoms not divisible with 3."); return NULL; } unsigned int NW = NA / 3; if (!(PyArray_NDIM(arraymass_i) == 1 && PyArray_DIM(arraymass_i,0) == 3)) { PyErr_SetString(PyExc_TypeError, "mass_i should be array with length 3."); return NULL; } if (!(PyArray_NDIM(arraylen_x) == 1 && PyArray_DIM(arraylen_x,0) == 3)) { PyErr_SetString(PyExc_TypeError, "len_x should be array with length 3."); return NULL; } double* len_x = DOUBLEP(arraylen_x); double* mass_i = DOUBLEP(arraymass_i); double len2_x[3]; len2_x[0] = sqr(len_x[0]); len2_x[1] = sqr(len_x[1]); len2_x[2] = sqr(len_x[2]); double mu_x[3]; // Reduced masses of pairs mu_x[0] = 1.0 / ( (1.0/mass_i[0]) + (1.0/mass_i[1]) ); mu_x[1] = 1.0 / ( (1.0/mass_i[1]) + (1.0/mass_i[2]) ); mu_x[2] = 1.0 / ( (1.0/mass_i[2]) + (1.0/mass_i[0]) ); double invm_i[3]; // Inverse masses of atoms invm_i[0] = 0.5 / mass_i[0]; // Note: /2 is embedded here invm_i[1] = 0.5 / mass_i[1]; invm_i[2] = 0.5 / mass_i[2]; double* R_niv = DOUBLEP(arrayR_niv); double* newR_niv = DOUBLEP(arraynewR_niv); for (unsigned int n=0; n 1000) { printf("Warning: Adjust positions did not converge.\n"); break; } if ((fabs(f_x[0]) < VELOCITY_VERLET_ADJUST_POSITION_TOL) && (fabs(f_x[1]) < VELOCITY_VERLET_ADJUST_POSITION_TOL) && (fabs(f_x[2]) < VELOCITY_VERLET_ADJUST_POSITION_TOL)) { break; } double lambda_x[3]; double denom_x[3]; // Calculate lambdas vec9_dot(denom_x, newd_xv, d_xv); // (R1-R2) . (R1'-R2') etc. vec3_div(lambda_x, f_x, denom_x); // f_12 / ( (R1-R2) . (R1'-R2') etc. vec3_imul(lambda_x, mu_x); // lambda /= (m_1^-1 + m_2^-1) etc. #ifdef FF_DEBUG vec3_print("lambda_x", lambda_x); #endif // Update newR's // newR_1 += -lambda12 * (R1-R2) vec3_axpy(newR_niv + n*9 + 0*3, -lambda_x[0] * invm_i[0], d_xv + 0*3); // newR_1 += lambda31 * (R3-R1) vec3_axpy(newR_niv + n*9 + 0*3, lambda_x[2] * invm_i[0], d_xv + 2*3); // newR_2 += +lambda12 * (R1-R2) vec3_axpy(newR_niv + n*9 + 1*3, +lambda_x[0] * invm_i[1], d_xv + 0*3); // newR_2 += -lambda23 * (R2-R3) vec3_axpy(newR_niv + n*9 + 1*3, -lambda_x[1] * invm_i[1], d_xv + 1*3); // newR_3 += +lambda23 * (R2-R3) vec3_axpy(newR_niv + n*9 + 2*3, +lambda_x[1] * invm_i[2], d_xv + 1*3); // newR_3 += -lambda31 * (R3-R1) vec3_axpy(newR_niv + n*9 + 2*3, -lambda_x[2] * invm_i[2], d_xv + 2*3); } } Py_RETURN_NONE; } PyObject* adjust_momenta(PyObject *self, PyObject *args) { PyArrayObject* arraymass_i = 0; // Input: the 3 masses PyArrayObject* arrayR_niv = 0; // PyArrayObject* arraynewP_niv = 0; // if (!PyArg_ParseTuple(args, "OOO", &arraymass_i, &arrayR_niv, &arraynewP_niv)) { return NULL; } unsigned int NA = PyArray_DIM(arrayR_niv, 0); if (NA % 3 != 0) { PyErr_SetString(PyExc_TypeError, "Number of atoms not divisible with 3."); return NULL; } unsigned int NW = NA / 3; if (!(PyArray_NDIM(arraymass_i) == 1 && PyArray_DIM(arraymass_i,0) == 3)) { PyErr_SetString(PyExc_TypeError, "mass_i should be array with length 3."); return NULL; } double* mass_i = DOUBLEP(arraymass_i); double invm_i[3]; // Inverse masses of atoms invm_i[0] = 1.0 / mass_i[0]; invm_i[1] = 1.0 / mass_i[1]; invm_i[2] = 1.0 / mass_i[2]; double mu_x[3]; // Reduced masses of pairs mu_x[0] = 1.0 / ( (1.0/mass_i[0]) + (1.0/mass_i[1]) ); mu_x[1] = 1.0 / ( (1.0/mass_i[1]) + (1.0/mass_i[2]) ); mu_x[2] = 1.0 / ( (1.0/mass_i[2]) + (1.0/mass_i[0]) ); double* R_niv = DOUBLEP(arrayR_niv); double* newP_niv = DOUBLEP(arraynewP_niv); for (unsigned int n=0; n 1000) { printf("Warning: Adjust velocities did not converge.\n"); break; } if ((fabs(g_x[0]) < VELOCITY_VERLET_ADJUST_VELOCITY_TOL) && (fabs(g_x[1]) < VELOCITY_VERLET_ADJUST_VELOCITY_TOL) && (fabs(g_x[2]) < VELOCITY_VERLET_ADJUST_VELOCITY_TOL)) { break; } double lambda_x[3]; double denom_x[3]; vec9_dot(denom_x, d_xv, d_xv); // (R1-R2)^2 etc. vec3_div(lambda_x, g_x, denom_x); // g_12 / (R1-R2)^2 vec3_imul(lambda_x, mu_x); // lambda /= (m_1^-1 + m_2^-1) etc. #ifdef FF_DEBUG_M vec3_print("lambda_x", lambda_x); #endif // Update newR's // newR_1 += -lambda12 * (R1-R2) vec3_axpy(newP_niv + n*9 + 0*3, -lambda_x[0], d_xv + 0*3); // newR_1 += lambda31 * (R3-R1) vec3_axpy(newP_niv + n*9 + 0*3, lambda_x[2], d_xv + 2*3); // newR_2 += +lambda12 * (R1-R2) vec3_axpy(newP_niv + n*9 + 1*3, +lambda_x[0], d_xv + 0*3); // newR_2 += -lambda23 * (R2-R3) vec3_axpy(newP_niv + n*9 + 1*3, -lambda_x[1], d_xv + 1*3); // newR_3 += +lambda23 * (R2-R3) vec3_axpy(newP_niv + n*9 + 2*3, +lambda_x[1], d_xv + 1*3); // newR_3 += -lambda31 * (R3-R1) vec3_axpy(newP_niv + n*9 + 2*3, -lambda_x[2], d_xv + 2*3); } } Py_RETURN_NONE; } PyObject* calculate_forces_H2O(PyObject *self, PyObject *args) { double A; double B; PyArrayObject* arraypbc = 0; PyArrayObject* arrayZ_i = 0; PyArrayObject* arrayR_niv = 0; PyArrayObject* arrayF_niv = 0; PyArrayObject* arraycell = 0; double cutoff=0; double width=0; if (!PyArg_ParseTuple(args, "OOddddOOO", &arraypbc, &arraycell, &A, &B, &cutoff, &width, &arrayZ_i, &arrayR_niv, &arrayF_niv)) { return NULL; } if (!(PyArray_NDIM(arraypbc) == 1 && PyArray_DIM(arraypbc,0) == 3)) { PyErr_SetString(PyExc_TypeError, "pbc should be array with length 3."); return NULL; } if (!(PyArray_NDIM(arrayZ_i) == 1 && PyArray_DIM(arrayZ_i,0) == 3)) { PyErr_SetString(PyExc_TypeError, "Z_i should be array with length 3."); return NULL; } if (!(PyArray_NDIM(arraycell) == 2 && (PyArray_DIM(arraycell,0) == 3) && (PyArray_DIM(arraycell,1) == 3))) { PyErr_SetString(PyExc_TypeError, "Cell should be array with size 3x3."); return NULL; } double* cell_vc = DOUBLEP(arraycell); //printf("Cell\n"); for (unsigned int v1=0; v1<3; v1++) for (unsigned int v2=0; v2<3; v2++) { //printf("%20.15f \n", cell_vc[v1+v2*3]); if (v1 != v2) if (fabs(cell_vc[v1+v2*3]) > 1e-10) { PyErr_SetString(PyExc_TypeError, "Cell array should be diagonal."); return NULL; } } double celldiag[3]; celldiag[0] = cell_vc[0]; celldiag[1] = cell_vc[1+1*3]; celldiag[2] = cell_vc[2+2*3]; unsigned int NA = PyArray_DIM(arrayR_niv, 0); if (NA % 3 != 0) { PyErr_SetString(PyExc_TypeError, "Number of atoms not divisible with 3."); return NULL; } unsigned char *pbc = (unsigned char*) PyArray_DATA(arraypbc); //printf("Boundary conditions %u %u %u \n", pbc[0], pbc[1], pbc[2]); unsigned int NW = NA / 3; double* Z_i = DOUBLEP(arrayZ_i); double* R_niv = DOUBLEP(arrayR_niv); double* F_niv = DOUBLEP(arrayF_niv); double E = 0.0; if (cutoff <= 0.0) { // No cutoff // Double loop of atoms for (unsigned int n1 = 0; n1 < NW; n1++) for (unsigned int n2 = n1+1; n2 < NW; n2++) { E += LJ(A, B, R_niv+n1*9, R_niv+n2*9, F_niv+n1*9, F_niv+n2*9, pbc, celldiag); for (unsigned int i1 = 0; i1 < 3; i1++) for (unsigned int i2 = 0; i2 < 3; i2++) E+= coulomb(Z_i[i1], Z_i[i2], R_niv+n1*9+i1*3, R_niv+n2*9+i2*3, F_niv+n1*9+i1*3, F_niv+n2*9+i2*3, pbc, celldiag); } } else { // With cutoff // Double loop of atoms for (unsigned int n1 = 0; n1 < NW; n1++) for (unsigned int n2 = n1+1; n2 < NW; n2++) { E += pair_interaction_cutoff(A, B, cutoff, width, Z_i, R_niv+n1*9, R_niv+n2*9, F_niv+n1*9, F_niv+n2*9, pbc, celldiag); } } return Py_BuildValue("d", E); } gpaw-24.1.0/c/elpa.c000066400000000000000000000134761454550013000140530ustar00rootroot00000000000000#if defined(GPAW_WITH_SL) && defined(PARALLEL) && defined(GPAW_WITH_ELPA) #include "extensions.h" #include #include #include "mympi.h" elpa_t* unpack_handleptr(PyObject* handle_obj) { elpa_t* elpa = (elpa_t *)PyArray_DATA((PyArrayObject *)handle_obj); return elpa; } elpa_t unpack_handle(PyObject* handle_obj) { elpa_t* elpa = unpack_handleptr(handle_obj); return *elpa; } PyObject* checkerr(int err) { if(err != ELPA_OK) { const char * errmsg = elpa_strerr(err); PyErr_SetString(PyExc_RuntimeError, errmsg); return NULL; } Py_RETURN_NONE; } PyObject* pyelpa_version(PyObject *self, PyObject *args) { if (!PyArg_ParseTuple(args, "")) { return NULL; } #ifdef ELPA_API_VERSION int version = ELPA_API_VERSION; return Py_BuildValue("i", version); #else Py_RETURN_NONE; // This means 'old', e.g. 2018.05.001 #endif } PyObject* pyelpa_set(PyObject *self, PyObject *args) { PyObject *handle_obj; char* varname; int value; if (!PyArg_ParseTuple(args, "Osi", &handle_obj, &varname, &value)) { return NULL; } elpa_t handle = unpack_handle(handle_obj); int err; elpa_set(handle, varname, value, &err); return checkerr(err); } PyObject* pyelpa_init(PyObject *self, PyObject *args) { if (!PyArg_ParseTuple(args, "")) return NULL; // Globally initialize Elpa library if present: if (elpa_init(20171201) != ELPA_OK) { // What API versions do we support? PyErr_SetString(PyExc_RuntimeError, "Elpa >= 20171201 required"); PyErr_Print(); return NULL; } Py_RETURN_NONE; } PyObject* pyelpa_uninit(PyObject *self, PyObject *args) { if (!PyArg_ParseTuple(args, "")) return NULL; #ifdef ELPA_API_VERSION // Newer Elpas define their version but older ones don't. int elpa_err; elpa_uninit(&elpa_err); if (elpa_err != ELPA_OK) { PyErr_SetString(PyExc_RuntimeError, "elpa_uninit() failed"); return NULL; } #else elpa_uninit(); // 2018.05.001: no errcode #endif Py_RETURN_NONE; } PyObject* pyelpa_allocate(PyObject *self, PyObject *args) { PyObject *handle_obj; if (!PyArg_ParseTuple(args, "O", &handle_obj)) return NULL; elpa_t *handle = unpack_handleptr(handle_obj); int err = 0; handle[0] = elpa_allocate(&err); return checkerr(err); } PyObject* pyelpa_setup(PyObject *self, PyObject *args) { PyObject *handle_obj; if (!PyArg_ParseTuple(args, "O", &handle_obj)) return NULL; elpa_t handle = unpack_handle(handle_obj); int err = elpa_setup(handle); return checkerr(err); } PyObject* pyelpa_set_comm(PyObject *self, PyObject *args) { PyObject *handle_obj; PyObject *gpaw_comm_obj; if(!PyArg_ParseTuple(args, "OO", &handle_obj, &gpaw_comm_obj)) return NULL; elpa_t handle = unpack_handle(handle_obj); MPIObject *gpaw_comm = (MPIObject *)gpaw_comm_obj; MPI_Comm comm = gpaw_comm->comm; int fcomm = MPI_Comm_c2f(comm); int err; elpa_set(handle, "mpi_comm_parent", fcomm, &err); return checkerr(err); } PyObject* pyelpa_constants(PyObject *self, PyObject *args) { if(!PyArg_ParseTuple(args, "")) return NULL; return Py_BuildValue("iii", ELPA_OK, ELPA_SOLVER_1STAGE, ELPA_SOLVER_2STAGE); } PyObject* pyelpa_diagonalize(PyObject *self, PyObject *args) { PyObject *handle_obj; PyArrayObject *A_obj, *C_obj, *eps_obj; if (!PyArg_ParseTuple(args, "OOOO", &handle_obj, &A_obj, &C_obj, &eps_obj)) return NULL; elpa_t handle = unpack_handle(handle_obj); double *a = (double*)PyArray_DATA(A_obj); double *ev = (double*)PyArray_DATA(eps_obj); double *q = (double*)PyArray_DATA(C_obj); int err; elpa_eigenvectors(handle, a, ev, q, &err); return checkerr(err); } PyObject* pyelpa_general_diagonalize(PyObject *self, PyObject *args) { PyObject *handle_obj; PyArrayObject *A_obj, *S_obj, *C_obj, *eps_obj; int is_already_decomposed; if (!PyArg_ParseTuple(args, "OOOOOi", &handle_obj, &A_obj, &S_obj, &C_obj, &eps_obj, &is_already_decomposed)) return NULL; elpa_t handle = unpack_handle(handle_obj); int err; double *ev = (double *)PyArray_DATA(eps_obj); double *a = (double *)PyArray_DATA(A_obj); double *b = (double *)PyArray_DATA(S_obj); double *q = (double *)PyArray_DATA(C_obj); if(PyArray_DESCR(A_obj)->type_num == NPY_DOUBLE) { elpa_generalized_eigenvectors(handle, a, b, ev, q, is_already_decomposed, &err); } else { elpa_generalized_eigenvectors(handle, (double complex *)a, (double complex *)b, ev, (double complex *)q, is_already_decomposed, &err); } return checkerr(err); } PyObject *pyelpa_deallocate(PyObject *self, PyObject *args) { PyObject *handle_obj; if(!PyArg_ParseTuple(args, "O", &handle_obj)) { return NULL; } elpa_t handle = unpack_handle(handle_obj); #ifdef ELPA_API_VERSION int err; elpa_deallocate(handle, &err); return checkerr(err); #else // This function provides no error checking in older Elpas Py_RETURN_NONE; #endif } #endif gpaw-24.1.0/c/extensions.h000066400000000000000000000025041454550013000153240ustar00rootroot00000000000000/* Copyright (C) 2003-2007 CAMP * Copyright (C) 2007-2008 CAMd * Copyright (C) 2005 CSC - IT Center for Science Ltd. * Please see the accompanying LICENSE file for further information. */ #ifndef H_EXTENSIONS #define H_EXTENSIONS #include #define PY_ARRAY_UNIQUE_SYMBOL GPAW_ARRAY_API #define NO_IMPORT_ARRAY #include #include #include /* If strict ANSI, then some useful macros are not defined */ #if defined(__STRICT_ANSI__) && !defined(__DARWIN_UNIX03) # define M_PI 3.14159265358979323846 /* pi */ #endif #ifndef DOUBLECOMPLEXDEFINED # define DOUBLECOMPLEXDEFINED 1 # include typedef double complex double_complex; #endif static inline void* gpaw_malloc(size_t n) { void* p = malloc(n); assert(p != NULL); return p; } #ifdef GPAW_BGP #define GPAW_MALLOC(T, n) (gpaw_malloc((n) * sizeof(T))) #else #ifdef GPAW_AIX #define GPAW_MALLOC(T, n) (malloc((n) * sizeof(T))) #else #define GPAW_MALLOC(T, n) (gpaw_malloc((n) * sizeof(T))) #endif #endif #define MIN(x, y) ((x) < (y) ? (x) : (y)) #define MAX(x, y) ((x) > (y) ? (x) : (y)) #define INTP(a) ((int*)PyArray_DATA(a)) #define LONGP(a) ((long*)PyArray_DATA(a)) #define DOUBLEP(a) ((double*)PyArray_DATA(a)) #define COMPLEXP(a) ((double_complex*)PyArray_DATA(a)) #endif //H_EXTENSIONS gpaw-24.1.0/c/f2c.h000066400000000000000000000004421454550013000135760ustar00rootroot00000000000000/* Definitions needed by code transferred with f2c */ #include #include typedef int integer; typedef double doublereal; typedef struct { doublereal r, i; } doublecomplex; #ifndef STATIC_NUMERIC inline double pow_dd(double *x, double *y) { return pow(*x,*y); } #endif gpaw-24.1.0/c/fd_preconditioner.c000066400000000000000000000316121454550013000166170ustar00rootroot00000000000000/* Copyright (C) 2003-2007 CAMP * Copyright (C) 2007-2008 CAMd * Copyright (C) 2005-2020 CSC - IT Center for Science Ltd. * Please see the accompanying LICENSE file for further information. Pure C implementation of preconditioner */ #include #define PY_ARRAY_UNIQUE_SYMBOL GPAW_ARRAY_API #define NO_IMPORT_ARRAY #include #include #ifdef _OPENMP #include #endif #include "extensions.h" #include "operators.h" #include "transformers.h" #include "threading.h" #define DIMS_SAME(a, b) (PyArray_NDIM(a) == PyArray_NDIM(b) && \ memcmp(PyArray_DIMS(a), \ PyArray_DIMS(b), \ PyArray_NDIM(a) * sizeof(PyArray_DIMS(a)[0])) == 0) /* * Array operations for the preconditioner. BLAS routines are not sensible * here since those might be multithreaded. Nested multithreading is not * a smart move performance-wise in general unless you really know what * you are doing. * * With a decent compiler there should be no need for optimization by hand. */ #define ARRAY_NEGATE(name, type) \ static void \ name(type *x, int start, int end, int size) \ { \ int i; \ \ x += start * size; \ for (i = 0; i < (end - start) * size; i++) \ x[i] = -x[i]; \ } ARRAY_NEGATE(array_negate, double) ARRAY_NEGATE(array_negatez, double complex) #define ARRAY_SUB(name, type) \ static void \ name(type *x, const type *y, int start, int end, int size) \ { \ int i; \ \ x += start * size; \ y += start * size; \ for (i = 0; i < (end - start) * size; i++) \ x[i] -= y[i]; \ } ARRAY_SUB(array_sub, double) ARRAY_SUB(array_subz, double complex) #define ARRAY_SUB_MULT(name, type) \ static void \ name(type *x, const type *y, double a, int start, int end, int size) \ { \ int i; \ \ x += start * size; \ y += start * size; \ for (i = 0; i < (end - start) * size; i++) \ x[i] -= a * y[i]; \ } ARRAY_SUB_MULT(array_sub_mult, double) ARRAY_SUB_MULT(array_sub_multz, double complex) #define ARRAY_MULTO(name, type) \ static void \ name(type *x, const type *y, double a, int start, int end, int size) \ { \ int i; \ \ x += start * size; \ y += start * size; \ for (i = 0; i < (end - start) * size; i++) \ x[i] = a * y[i]; \ } ARRAY_MULTO(array_multo, double) ARRAY_MULTO(array_multoz, double complex) /* * Implements the computational part of the preconditioner in C. In overall, * it is at least somewhat faster than the original Python code. There is less * overhead. The performance is even better with multiple threads since * synchronizations are largely eliminated and threads can be kept running * throughout the computation. * * This code is a drop-in replacement for the computational Python code. * However, this code does not do administrative tasks, such as buffer * allocation. Those things are done reliably a lot easier in Python. */ PyObject *fd_precond(PyObject *self, PyObject *args) { PyArrayObject *d0, *q0, *r1, *d1, *q1, *r2, *d2, *q2; PyArrayObject *residuals, *nresiduals, *phases; TransformerObject *rest0, *rest1, *intp1, *intp2; OperatorObject *kin0, *kin1, *kin2; double step; int size0, size1, size2; /* Grid sizes on different levels */ int nin; if (PyArg_ParseTuple(args, "OOOOOOOOOOOOOOOOOd|O", &rest0, &rest1, &intp1, &intp2, &kin0, &kin1, &kin2, &d0, &q0, &r1, &d1, &q1, &r2, &d2, &q2, &residuals, &nresiduals, &step, &phases) == 0) return NULL; /* Input and output buffers */ const double* in; double* out; /* * Check parameters. Sometimes it tends to save time... It is anything * but foolproof, however. */ /* * Resolve the number of input grids. Array residuals can contain * either a single or multiple grids. */ nin = 1; if (PyArray_NDIM(residuals) == 4) nin = PyArray_DIMS(residuals)[0]; else if (PyArray_NDIM(residuals) != 3) { PyErr_SetString(PyExc_TypeError, "Bad array dimension."); return NULL; } assert(nin >= 0); /* Paranoia. */ /* Calculate the size of a single grid on every level of coarseness. */ size0 = PyArray_DIMS(d0)[1] * PyArray_DIMS(d0)[2] * PyArray_DIMS(d0)[3]; size1 = PyArray_DIMS(d1)[1] * PyArray_DIMS(d1)[2] * PyArray_DIMS(d1)[3]; size2 = PyArray_DIMS(d2)[1] * PyArray_DIMS(d2)[2] * PyArray_DIMS(d2)[3]; /* * Scrutinize the array shapes since the correctness of upcoming * computatons depends heavily on them. */ if (PyArray_NDIM(d0) != 4 || PyArray_NDIM(q0) != 4 || PyArray_NDIM(r1) != 4 || PyArray_NDIM(d1) != 4 || PyArray_NDIM(q1) != 4 || PyArray_NDIM(r2) != 4 || PyArray_NDIM(d2) != 4 || PyArray_NDIM(q2) != 4) { PyErr_SetString(PyExc_TypeError, "Work arrays do not have 4 dimensions."); return NULL; } if (!DIMS_SAME(residuals, nresiduals)) { PyErr_SetString(PyExc_TypeError, "Arrays residuals and nresiduals do not have the same shape."); return NULL; } if (!DIMS_SAME(d0, q0)) { PyErr_SetString(PyExc_TypeError, "Arrays d0 and q0 do not have the same shape."); return NULL; } if (!DIMS_SAME(d1, r1) || !DIMS_SAME(d1, q1)) { PyErr_SetString(PyExc_TypeError, "Arrays d1, q1, r1 do not have the same shape."); return NULL; } if (!DIMS_SAME(d2, r2) || !DIMS_SAME(d2, q2)) { PyErr_SetString(PyExc_TypeError, "Arrays d2, q2, r2 do not have the same shape."); return NULL; } if ((PyArray_NDIM(residuals) == 3 && memcmp(PyArray_DIMS(residuals), PyArray_DIMS(d0) + 1, 3 * sizeof(PyArray_DIMS(d0)[0])) != 0) && !DIMS_SAME(residuals, d0)) { PyErr_SetString(PyExc_TypeError, "Input grid shape does not match with arrays d0 and q0."); return NULL; } if (nin != PyArray_DIMS(d0)[0] || nin != PyArray_DIMS(d1)[0] || nin != PyArray_DIMS(d2)[0]) { PyErr_SetString(PyExc_TypeError, "Number of input grids does not match with work arrays."); return NULL; } /* Checks for non-strided arrays and contiguous data would be nice. */ bool real = (PyArray_DESCR(residuals)->type_num == NPY_DOUBLE); const double_complex* ph; ph = (real != 0) ? NULL : COMPLEXP(phases); int chunksize = 1; // Use a single chunk for a while #pragma omp parallel { int nthreads, thread_id; int start, end; #ifdef _OPENMP nthreads = omp_get_num_threads(); thread_id = omp_get_thread_num(); #else nthreads = 1; thread_id = 0; #endif /* Partition the grids among threads. */ SHARE_WORK(nin, nthreads, thread_id, &start, &end); /* Restrict (-residuals) -> r1. */ in = DOUBLEP(nresiduals); out = DOUBLEP(r1); transapply_worker(rest0, chunksize, start, end, thread_id, nthreads, in, out, real, ph ); /* d1 <- 4 * step * r1 */ if (real != 0) array_multo(DOUBLEP(d1), DOUBLEP(r1), 4 * step, start, end, size1); else array_multoz(COMPLEXP(d1), COMPLEXP(r1), 4 * step, start, end, size1); /* Apply d1 --kin1--> q1. */ in = DOUBLEP(d1); out = DOUBLEP(q1); apply_worker(kin1, chunksize, start, end, thread_id, nthreads, in, out, real, ph); /* q1 -= r1 */ if (real != 0) array_sub(DOUBLEP(q1), DOUBLEP(r1), start, end, size1); else array_subz(COMPLEXP(q1), COMPLEXP(r1), start, end, size1); /* Restrict q1 -> r2. */ in = DOUBLEP(q1); out = DOUBLEP(r2); transapply_worker(rest1, chunksize, start, end, thread_id, nthreads, in, out, real, ph); /* d2 <- 16 * step * r2 */ if (real != 0) array_multo(DOUBLEP(d2), DOUBLEP(r2), 16 * step, start, end, size2); else array_multoz(COMPLEXP(d2), COMPLEXP(r2), 16 * step, start, end, size2); /* Apply d2 --kin2--> q2. */ in = DOUBLEP(d2); out = DOUBLEP(q2); apply_worker(kin2, chunksize, start, end, thread_id, nthreads, in, out, real, ph); /* * q2 -= r2 * d2 -= 16 * step * q2 */ if (real != 0) { array_sub(DOUBLEP(q2), DOUBLEP(r2), start, end, size2); array_sub_mult(DOUBLEP(d2), DOUBLEP(q2), 16 * step, start, end, size2); } else { array_subz(COMPLEXP(q2), COMPLEXP(r2), start, end, size2); array_sub_multz(COMPLEXP(d2), COMPLEXP(q2), 16 * step, start, end, size2); } /* Interpolate d2 -> q1. */ in = DOUBLEP(d2); out = DOUBLEP(q1); transapply_worker(intp2, chunksize, start, end, thread_id, nthreads, in, out, real, ph); /* d1 -= q1 */ if (real != 0) array_sub(DOUBLEP(d1), DOUBLEP(q1), start, end, size1); else array_subz(COMPLEXP(d1), COMPLEXP(q1), start, end, size1); /* Apply d1 --kin1--> q1. */ in = DOUBLEP(d1); out = DOUBLEP(q1); apply_worker(kin1, chunksize, start, end, thread_id, nthreads, in, out, real, ph); /* * q1 -= r1 * d1 -= 4 * step * q1 */ if (real != 0) { array_sub(DOUBLEP(q1), DOUBLEP(r1), start, end, size1); array_sub_mult(DOUBLEP(d1), DOUBLEP(q1), 4 * step, start, end, size1); } else { array_subz(COMPLEXP(q1), COMPLEXP(r1), start, end, size1); array_sub_multz(COMPLEXP(d1), COMPLEXP(q1), 4 * step, start, end, size1); } /* Interpolate (-d1) -> d0. Do negation in place. */ if (real != 0) array_negate(DOUBLEP(d1), start, end, size1); else array_negatez(COMPLEXP(d1), start, end, size1); in = DOUBLEP(d1); out = DOUBLEP(d0); transapply_worker(intp1, chunksize, start, end, thread_id, nthreads, in, out, real, ph); /* Apply d0 --kin0--> q0. */ in = DOUBLEP(d0); out = DOUBLEP(q0); apply_worker(kin0, chunksize, start, end, thread_id, nthreads, in, out, real, ph); /* * q0 -= residuals * d0 -= step * q0 * d0 *= -1 */ if (real != 0) { array_sub(DOUBLEP(q0), DOUBLEP(residuals), start, end, size0); array_sub_mult(DOUBLEP(d0), DOUBLEP(q0), step, start, end, size0); array_negate(DOUBLEP(d0), start, end, size0); } else { array_subz(COMPLEXP(q0), COMPLEXP(residuals), start, end, size0); array_sub_multz(COMPLEXP(d0), COMPLEXP(q0), step, start, end, size0); array_negatez(COMPLEXP(d0), start, end, size0); } } /* The return value is in d0. The calling code handles it. */ Py_RETURN_NONE; }gpaw-24.1.0/c/fftw.c000066400000000000000000000041301454550013000140630ustar00rootroot00000000000000#ifdef GPAW_WITH_FFTW #define PY_SSIZE_T_CLEAN #include #define PY_ARRAY_UNIQUE_SYMBOL GPAW_ARRAY_API #define NO_IMPORT_ARRAY #include #include /* Create plan and return pointer to plan as a string */ PyObject * FFTWPlan(PyObject *self, PyObject *args) { PyArrayObject* in; PyArrayObject* out; int sign; unsigned int flags; if (!PyArg_ParseTuple(args, "OOiI", &in, &out, &sign, &flags)) return NULL; fftw_plan* plan = (fftw_plan*)malloc(sizeof(fftw_plan)); int ndim = PyArray_NDIM(in); int dims_in[ndim]; int dims_out[ndim]; int i; void *indata = PyArray_DATA(in); void *outdata = PyArray_DATA(out); for(i=0; i < ndim; i++) { dims_in[i] = (int)PyArray_DIMS(in)[i]; dims_out[i] = (int)PyArray_DIMS(out)[i]; } if (PyArray_DESCR(in)->type_num == NPY_DOUBLE) { *plan = fftw_plan_dft_r2c(ndim, dims_in, (double *)indata, (fftw_complex *)outdata, flags); } else if (PyArray_DESCR(out)->type_num == NPY_DOUBLE) { *plan = fftw_plan_dft_c2r(ndim, dims_out, (fftw_complex *)indata, (double *)outdata, flags); } else { *plan = fftw_plan_dft(ndim, dims_out, (fftw_complex *)indata, (fftw_complex *)outdata, sign, flags); } return Py_BuildValue("y#", plan, (Py_ssize_t)sizeof(fftw_plan*)); } PyObject * FFTWExecute(PyObject *self, PyObject *args) { fftw_plan* plan; Py_ssize_t n; if (!PyArg_ParseTuple(args, "y#", &plan, &n)) return NULL; fftw_execute(*plan); Py_RETURN_NONE; } PyObject * FFTWDestroy(PyObject *self, PyObject *args) { fftw_plan* plan; Py_ssize_t n; if (!PyArg_ParseTuple(args, "y#", &plan, &n)) return NULL; fftw_destroy_plan(*plan); Py_RETURN_NONE; } #endif // GPAW_WITH_FFTW gpaw-24.1.0/c/gpu/000077500000000000000000000000001454550013000135465ustar00rootroot00000000000000gpaw-24.1.0/c/gpu/bc.c000066400000000000000000000522171454550013000143050ustar00rootroot00000000000000#include "../extensions.h" #include "../bc.h" #include "bmgs.h" #include "gpu.h" #include #include #include #include #include static int bc_init_count = 0; static gpuStream_t bc_recv_stream; static int bc_streams = 0; static gpuEvent_t bc_sendcpy_event[3][2]; static gpuEvent_t bc_recv_event[3][2]; static int bc_recv_done[3][2]; static double *bc_rbuff[3][2]; static double *bc_sbuff[3][2]; static double *bc_rbuffs=NULL; static double *bc_sbuffs=NULL; static double *bc_rbuff_gpu[3][2]; static double *bc_sbuff_gpu[3][2]; static double *bc_rbuffs_gpu=NULL; static double *bc_sbuffs_gpu=NULL; static int bc_rbuffs_size=0; static int bc_sbuffs_size=0; static int bc_rbuffs_max=0; static int bc_sbuffs_max=0; #ifdef NDEBUG # define check_mpi(s) (s) #else # define check_mpi(s) (assert((s) == MPI_SUCCESS)) #endif void bc_init_gpu(boundary_conditions* bc) { int nsends=0; int nrecvs=0; for (int i=0; i<3; i++) { for (int d=0; d<2; d++) { nsends += NEXTPITCHDIV(bc->nsend[i][d]); nrecvs += NEXTPITCHDIV(bc->nrecv[i][d]); } } bc_sbuffs_max = MAX(nsends, bc_sbuffs_max); bc_rbuffs_max = MAX(nrecvs, bc_rbuffs_max); bc_init_count++; } void bc_init_buffers_gpu() { #ifndef GPAW_GPU_AWARE_MPI bc_rbuffs = NULL; bc_sbuffs = NULL; bc_streams = 0; #endif bc_rbuffs_gpu = NULL; bc_sbuffs_gpu = NULL; bc_rbuffs_size = 0; bc_sbuffs_size = 0; bc_init_count = 0; } static void _reallocate_buffer_host(double **buffer, const int size) { gpuFreeHost(*buffer); gpuCheckLastError(); gpuHostAlloc(buffer, sizeof(double) * size); } static void _reallocate_buffer_device(double **buffer, const int size) { gpuFree(*buffer); gpuCheckLastError(); gpuMalloc(buffer, sizeof(double) * size); } static void _create_stream_events() { if (!bc_streams) { gpuStreamCreate(&bc_recv_stream); bc_streams = 1; for (int d=0; d<3; d++) { for (int i=0; i<2; i++) { gpuEventCreateWithFlags(&bc_sendcpy_event[d][i], gpuEventDefault|gpuEventDisableTiming); gpuEventCreateWithFlags(&bc_recv_event[d][i], gpuEventDefault|gpuEventDisableTiming); } } } } static void _allocate_buffers(const boundary_conditions* bc, int blocks) { int nsends=0; int nrecvs=0; for (int i=0; i<3; i++) { for (int d=0; d<2; d++) { nsends += NEXTPITCHDIV(bc->nsend[i][d] * blocks); nrecvs += NEXTPITCHDIV(bc->nrecv[i][d] * blocks); } } bc_sbuffs_max=MAX(nsends, bc_sbuffs_max); if (bc_sbuffs_max > bc_sbuffs_size) { #ifndef GPAW_GPU_AWARE_MPI _reallocate_buffer_host(&bc_sbuffs, bc_sbuffs_max); #endif _reallocate_buffer_device(&bc_sbuffs_gpu, bc_sbuffs_max); bc_sbuffs_size = bc_sbuffs_max; } bc_rbuffs_max=MAX(nrecvs, bc_rbuffs_max); if (bc_rbuffs_max > bc_rbuffs_size) { #ifndef GPAW_GPU_AWARE_MPI _reallocate_buffer_host(&bc_rbuffs, bc_rbuffs_max); #endif _reallocate_buffer_device(&bc_rbuffs_gpu, bc_rbuffs_max); bc_rbuffs_size = bc_rbuffs_max; } #ifndef GPAW_GPU_AWARE_MPI _create_stream_events(); #endif } void bc_dealloc_gpu(int force) { if (force) bc_init_count = 1; if (bc_init_count == 1) { #ifndef GPAW_GPU_AWARE_MPI gpuFreeHost(bc_sbuffs); gpuFreeHost(bc_rbuffs); if (bc_streams) { gpuStreamDestroy(bc_recv_stream); for (int d=0; d<3; d++) { for (int i=0; i<2; i++) { gpuEventDestroy(bc_sendcpy_event[d][i]); gpuEventDestroy(bc_recv_event[d][i]); } } } #endif gpuFree(bc_sbuffs_gpu); gpuFree(bc_rbuffs_gpu); bc_init_buffers_gpu(); return; } if (bc_init_count > 0) bc_init_count--; } static void _check_msg_size(boundary_conditions* bc, int nin) { int maxrecv, maxsend; for (int i=0; i<3; i++) { maxrecv = MAX(bc->nrecv[i][0], bc->nrecv[i][1]) * nin * sizeof(double); maxsend = MAX(bc->nsend[i][0], bc->nsend[i][1]) * nin * sizeof(double); bc->gpu_rjoin[i] = 0; if (bc->recvproc[i][0] >= 0 && bc->recvproc[i][1] >= 0) { if (maxrecv < GPU_RJOIN_SIZE) bc->gpu_rjoin[i] = 1; else if ((maxrecv < GPU_RJOIN_SAME_SIZE) && (bc->recvproc[i][0] == bc->recvproc[i][1])) bc->gpu_rjoin[i] = 1; } bc->gpu_sjoin[i] = 0; if (bc->sendproc[i][0] >= 0 && bc->sendproc[i][1] >= 0) { if (maxsend < GPU_SJOIN_SIZE) bc->gpu_sjoin[i] = 1; else if ((maxsend < GPU_SJOIN_SAME_SIZE) && (bc->sendproc[i][0] == bc->sendproc[i][1])) bc->gpu_sjoin[i] = 1; } if (MAX(maxsend, maxrecv) < GPU_ASYNC_SIZE) bc->gpu_async[i] = 0; else bc->gpu_async[i] = 1; } } static void _prepare_buffers_host(boundary_conditions* bc, int nin) { int recvp=0; int sendp=0; for (int i=0; i<3; i++) { bc_sbuff[i][0] = bc_sbuffs + sendp; if (!bc->gpu_async[i] || bc->gpu_sjoin[i]) { bc_sbuff[i][1] = bc_sbuffs + sendp + bc->nsend[i][0] * nin; sendp += NEXTPITCHDIV((bc->nsend[i][0] + bc->nsend[i][1]) * nin); } else { sendp += NEXTPITCHDIV(bc->nsend[i][0] * nin); bc_sbuff[i][1] = bc_sbuffs + sendp; sendp += NEXTPITCHDIV(bc->nsend[i][1] * nin); } bc_rbuff[i][0] = bc_rbuffs + recvp; if (!bc->gpu_async[i] || bc->gpu_rjoin[i]) { bc_rbuff[i][1] = bc_rbuffs + recvp + bc->nrecv[i][0] * nin; recvp += NEXTPITCHDIV((bc->nrecv[i][0] + bc->nrecv[i][1]) * nin); } else { recvp += NEXTPITCHDIV(bc->nrecv[i][0] * nin); bc_rbuff[i][1] = bc_rbuffs + recvp; recvp += NEXTPITCHDIV(bc->nrecv[i][1] * nin); } } } static void _prepare_buffers_gpu(boundary_conditions* bc, int nin) { int recvp=0; int sendp=0; for (int i=0; i<3; i++) { bc_sbuff_gpu[i][0] = bc_sbuffs_gpu + sendp; if (!bc->gpu_async[i] || bc->gpu_sjoin[i]) { bc_sbuff_gpu[i][1] = bc_sbuffs_gpu + sendp + bc->nsend[i][0] * nin; sendp += NEXTPITCHDIV((bc->nsend[i][0] + bc->nsend[i][1]) * nin); } else { sendp += NEXTPITCHDIV(bc->nsend[i][0] * nin); bc_sbuff_gpu[i][1] = bc_sbuffs_gpu + sendp; sendp += NEXTPITCHDIV(bc->nsend[i][1] * nin); } bc_rbuff_gpu[i][0] = bc_rbuffs_gpu + recvp; if (!bc->gpu_async[i] || bc->gpu_rjoin[i]) { bc_rbuff_gpu[i][1] = bc_rbuffs_gpu + recvp + bc->nrecv[i][0] * nin; recvp += NEXTPITCHDIV((bc->nrecv[i][0] + bc->nrecv[i][1]) * nin); } else { recvp += NEXTPITCHDIV(bc->nrecv[i][0] * nin); bc_rbuff_gpu[i][1] = bc_rbuffs_gpu + recvp; recvp += NEXTPITCHDIV(bc->nrecv[i][1] * nin); } } } void bc_unpack_paste_gpu(boundary_conditions* bc, const double* aa1, double* aa2, MPI_Request recvreq[3][2], gpuStream_t kernel_stream, int nin) { bool real = (bc->ndouble == 1); _allocate_buffers(bc, nin); // Copy data: // Zero all of a2 array. We should only zero the bounaries // that are not periodic, but it's simpler to zero everything! // Copy data from a1 to central part of a2 and zero boundaries: if (real) bmgs_paste_zero_gpu(aa1, bc->size1, aa2, bc->size2, bc->sendstart[0][0], nin, kernel_stream); else bmgs_paste_zero_gpuz((const gpuDoubleComplex*)(aa1), bc->size1, (gpuDoubleComplex*) aa2, bc->size2, bc->sendstart[0][0], nin, kernel_stream); #ifndef GPAW_GPU_AWARE_MPI _check_msg_size(bc, nin); _prepare_buffers_host(bc, nin); #endif _prepare_buffers_gpu(bc, nin); for (int i=0; i<3; i++) { for (int d=0; d<2; d++) { int p = bc->recvproc[i][d]; if (p >= 0) { #ifndef GPAW_GPU_AWARE_MPI MPI_Irecv(bc_rbuff[i][d], bc->nrecv[i][d] * nin, MPI_DOUBLE, p, d + 1000 * i, bc->comm, &recvreq[i][d]); #else MPI_Irecv(bc_rbuff_gpu[i][d], bc->nrecv[i][d] * nin, MPI_DOUBLE, p, d + 1000 * i, bc->comm, &recvreq[i][d]); #endif bc_recv_done[i][d] = 0; } else { bc_recv_done[i][d] = 1; } } } } void bc_unpack_gpu_sync(const boundary_conditions* bc, double* aa2, int i, MPI_Request recvreq[3][2], MPI_Request sendreq[2], const double_complex phases[2], gpuStream_t kernel_stream, int nin) { bool real = (bc->ndouble == 1); _allocate_buffers(bc, nin); #ifdef PARALLEL for (int d=0; d<2; d++) { if (bc->sendproc[i][d] >= 0) { const int* start = bc->sendstart[i][d]; const int* size = bc->sendsize[i][d]; if (real) bmgs_cut_gpu(aa2, bc->size2, start, bc_sbuff_gpu[i][d], size, nin, kernel_stream); else { gpuDoubleComplex phase = {creal(phases[d]), cimag(phases[d])}; bmgs_cut_gpuz((gpuDoubleComplex*)(aa2), bc->size2, start, (gpuDoubleComplex*)(bc_sbuff_gpu[i][d]), size, phase, nin, kernel_stream); } } } #ifndef GPAW_GPU_AWARE_MPI if (bc->sendproc[i][0] >= 0 || bc->sendproc[i][1] >= 0) gpuMemcpy(bc_sbuff[i][0], bc_sbuff_gpu[i][0], sizeof(double) * (bc->nsend[i][0] + bc->nsend[i][1]) * nin, gpuMemcpyDeviceToHost); #endif // Start sending: for (int d=0; d<2; d++) { sendreq[d] = 0; int p = bc->sendproc[i][d]; if (p >= 0) { #ifndef GPAW_GPU_AWARE_MPI check_mpi(MPI_Isend(bc_sbuff[i][d], bc->nsend[i][d] * nin, MPI_DOUBLE, p, 1 - d + 1000 * i, bc->comm, &sendreq[d])); #else gpuStreamSynchronize(kernel_stream); check_mpi(MPI_Isend(bc_sbuff_gpu[i][d], bc->nsend[i][d] * nin, MPI_DOUBLE, p, 1 - d + 1000 * i, bc->comm, &sendreq[d])); #endif } } #endif // Copy data for periodic boundary conditions: for (int d=0; d<2; d++) { if (bc->sendproc[i][d] == COPY_DATA) { if (real) { bmgs_translate_gpu(aa2, bc->size2, bc->sendsize[i][d], bc->sendstart[i][d], bc->recvstart[i][1 - d], nin, kernel_stream); } else { gpuDoubleComplex phase = {creal(phases[d]), cimag(phases[d])}; bmgs_translate_gpuz((gpuDoubleComplex*)(aa2), bc->size2, bc->sendsize[i][d], bc->sendstart[i][d], bc->recvstart[i][1 - d], phase, nin, kernel_stream); } } } #ifdef PARALLEL for (int d=0; d<2; d++) { if (!bc_recv_done[i][d]) { check_mpi(MPI_Wait(&recvreq[i][d], MPI_STATUS_IGNORE)); } } if (!bc_recv_done[i][0] || !bc_recv_done[i][1]) { #ifndef GPAW_GPU_AWARE_MPI gpuMemcpy(bc_rbuff_gpu[i][0], bc_rbuff[i][0], sizeof(double) * (bc->nrecv[i][0] + bc->nrecv[i][1]) * nin, gpuMemcpyHostToDevice); #endif bc_recv_done[i][0] = 1; bc_recv_done[i][1] = 1; } for (int d=0; d<2; d++) { if (bc->recvproc[i][d] >= 0) { if (real) bmgs_paste_gpu(bc_rbuff_gpu[i][d], bc->recvsize[i][d], aa2, bc->size2, bc->recvstart[i][d], nin, kernel_stream); else bmgs_paste_gpuz( (const gpuDoubleComplex*)(bc_rbuff_gpu[i][d]), bc->recvsize[i][d], (gpuDoubleComplex*)(aa2), bc->size2, bc->recvstart[i][d], nin, kernel_stream); } } // This does not work on the ibm with gcc! We do a blocking send instead. for (int d=0; d<2; d++) if (bc->sendproc[i][d] >= 0) check_mpi(MPI_Wait(&sendreq[d], MPI_STATUS_IGNORE)); #endif } static void _bc_unpack_gpu_async(const boundary_conditions* bc, double* aa2, int i, MPI_Request recvreq[3][2], MPI_Request sendreq[2], const double_complex phases[2], gpuStream_t kernel_stream, int nin) { bool real = (bc->ndouble == 1); _allocate_buffers(bc, nin); #ifdef PARALLEL // Prepare send-buffers int send_done[2] = {0,0}; if (bc->sendproc[i][0] >= 0 || bc->sendproc[i][1] >= 0) { for (int d=0; d<2; d++) { sendreq[d] = 0; if (bc->sendproc[i][d] >= 0) { const int* start = bc->sendstart[i][d]; const int* size = bc->sendsize[i][d]; if (real) bmgs_cut_gpu(aa2, bc->size2, start, bc_sbuff_gpu[i][d], size, nin, kernel_stream); else { gpuDoubleComplex phase = {creal(phases[d]), cimag(phases[d])}; bmgs_cut_gpuz((gpuDoubleComplex*)(aa2), bc->size2, start, (gpuDoubleComplex*)(bc_sbuff_gpu[i][d]), size, phase, nin, kernel_stream); } if (!bc->gpu_sjoin[i]) { gpuMemcpyAsync(bc_sbuff[i][d], bc_sbuff_gpu[i][d], sizeof(double) * bc->nsend[i][d] * nin, gpuMemcpyDeviceToHost, kernel_stream); gpuEventRecord(bc_sendcpy_event[i][d], kernel_stream); } } } if (bc->gpu_sjoin[i]) { gpuMemcpyAsync(bc_sbuff[i][0], bc_sbuff_gpu[i][0], sizeof(double) * (bc->nsend[i][0] + bc->nsend[i][1]) * nin, gpuMemcpyDeviceToHost, kernel_stream); gpuEventRecord(bc_sendcpy_event[i][0], kernel_stream); } } for (int d=0; d<2; d++) if (!(bc->sendproc[i][d] >= 0)) send_done[d] = 1; int dd=0; if (send_done[dd]) dd=1; int loopc=MIN(2, 3-i); int ddd[loopc]; for (int ii=0; ii < loopc; ii++) { ddd[ii] = 1; if (bc_recv_done[ii+i][ddd[ii]]) ddd[ii] = 1 - ddd[ii]; } do { if (!send_done[dd] && gpuEventQuery(bc_sendcpy_event[i][dd]) == gpuSuccess) { MPI_Isend(bc_sbuff[i][dd], bc->nsend[i][dd] * nin, MPI_DOUBLE, bc->sendproc[i][dd], 1 - dd + 1000 * i, bc->comm, &sendreq[dd]); send_done[dd] = 1; dd = 1; if (bc->gpu_sjoin[i]) { MPI_Isend(bc_sbuff[i][dd], bc->nsend[i][dd] * nin, MPI_DOUBLE, bc->sendproc[i][dd], 1 - dd + 1000 * i, bc->comm, &sendreq[dd]); send_done[dd] = 1; } loopc = 1; } for (int i2=0; i2 < loopc; i2++) { int i3 = i2+i; if (i==0 && i2==1 && bc_recv_done[i3][0] && bc_recv_done[i3][1]) { i3 = 2; } if (!bc->gpu_async[i3]) continue; if (i2==0 && bc->gpu_rjoin[i3] && !bc_recv_done[i3][0] && !bc_recv_done[i3][1]) { int status; MPI_Testall(2, recvreq[i3], &status, MPI_STATUSES_IGNORE); if (status) { gpuMemcpyAsync(bc_rbuff_gpu[i3][0], bc_rbuff[i3][0], sizeof(double) * (bc->nrecv[i3][0] + bc->nrecv[i3][1]) * nin, gpuMemcpyHostToDevice, bc_recv_stream); for (int d=0; d<2; d++) { if (!bc_recv_done[i3][d]) { if (real) bmgs_paste_gpu(bc_rbuff_gpu[i3][d], bc->recvsize[i3][d], aa2, bc->size2, bc->recvstart[i3][d], nin, bc_recv_stream); else bmgs_paste_gpuz( (const gpuDoubleComplex*)(bc_rbuff_gpu[i3][d]), bc->recvsize[i3][d], (gpuDoubleComplex*)(aa2), bc->size2, bc->recvstart[i3][d], nin, bc_recv_stream); gpuEventRecord(bc_recv_event[i3][d], bc_recv_stream); bc_recv_done[i3][d] = 1; } } } } else if (!bc_recv_done[i3][ddd[i2]]) { int status; MPI_Test(&recvreq[i3][ddd[i2]], &status, MPI_STATUS_IGNORE); if (status) { gpuMemcpyAsync(bc_rbuff_gpu[i3][ddd[i2]], bc_rbuff[i3][ddd[i2]], sizeof(double) * (bc->nrecv[i3][ddd[i2]]) * nin, gpuMemcpyHostToDevice, bc_recv_stream); if (real) bmgs_paste_gpu(bc_rbuff_gpu[i3][ddd[i2]], bc->recvsize[i3][ddd[i2]], aa2, bc->size2, bc->recvstart[i3][ddd[i2]], nin, bc_recv_stream); else bmgs_paste_gpuz( (const gpuDoubleComplex*)(bc_rbuff_gpu[i3][ddd[i2]]), bc->recvsize[i3][ddd[i2]], (gpuDoubleComplex*)(aa2), bc->size2, bc->recvstart[i3][ddd[i2]], nin, bc_recv_stream); gpuEventRecord(bc_recv_event[i3][ddd[i2]], bc_recv_stream); bc_recv_done[i3][ddd[i2]] = 1; } } if (!bc_recv_done[i3][1-ddd[i2]]) ddd[i2] = 1 - ddd[i2]; } } while (!bc_recv_done[i][0] || !bc_recv_done[i][1] || !send_done[0] || !send_done[1]); #endif // Copy data for periodic boundary conditions: for (int d=0; d<2; d++) { if (bc->sendproc[i][d] == COPY_DATA) { if (real) { bmgs_translate_gpu(aa2, bc->size2, bc->sendsize[i][d], bc->sendstart[i][d], bc->recvstart[i][1 - d], nin, kernel_stream); } else { gpuDoubleComplex phase = {creal(phases[d]), cimag(phases[d])}; bmgs_translate_gpuz((gpuDoubleComplex*)(aa2), bc->size2, bc->sendsize[i][d], bc->sendstart[i][d], bc->recvstart[i][1 - d], phase, nin, kernel_stream); } } } #ifdef PARALLEL // This does not work on the ibm with gcc! We do a blocking send instead. for (int d=0; d<2; d++) { if (bc->sendproc[i][d] >= 0) check_mpi(MPI_Wait(&sendreq[d], MPI_STATUS_IGNORE)); } for (int d=0; d<2; d++) { if (bc->recvproc[i][d] >= 0) gpuStreamWaitEvent(kernel_stream, bc_recv_event[i][d], 0); } #endif } void bc_unpack_gpu_async(const boundary_conditions* bc, double* aa2, int i, MPI_Request recvreq[3][2], MPI_Request sendreq[2], const double_complex phases[2], gpuStream_t kernel_stream, int nin) { #ifndef GPAW_GPU_AWARE_MPI _bc_unpack_gpu_async(bc, aa2, i, recvreq, sendreq, phases, kernel_stream, nin); #else bc_unpack_gpu_sync(bc, aa2, i, recvreq, sendreq, phases, kernel_stream, nin); #endif } void bc_unpack_gpu(const boundary_conditions* bc, double* aa2, int i, MPI_Request recvreq[3][2], MPI_Request sendreq[2], const double_complex phases[2], gpuStream_t kernel_stream, int nin) { #ifndef GPAW_GPU_AWARE_MPI if (!bc->gpu_async[i]) { bc_unpack_gpu_sync(bc, aa2, i, recvreq, sendreq, phases, kernel_stream, nin); } else { bc_unpack_gpu_async(bc, aa2, i, recvreq, sendreq, phases, kernel_stream, nin); } #else bc_unpack_gpu_sync(bc, aa2, i, recvreq, sendreq, phases, kernel_stream, nin); #endif } gpaw-24.1.0/c/gpu/blas.c000066400000000000000000000336461454550013000146470ustar00rootroot00000000000000#include #define PY_ARRAY_UNIQUE_SYMBOL GPAW_ARRAY_API #define NO_IMPORT_ARRAY #include #include "gpu.h" #include "gpu-complex.h" gpublasHandle_t _gpaw_gpublas_handle; void blas_init_gpu() { gpublasSafeCall(gpublasCreate(&_gpaw_gpublas_handle)); } static gpublasOperation_t gpublas_operation(int op) { gpublasOperation_t gpu_op; if (op == 'N' || op == 'n') gpu_op = GPUBLAS_OP_N; else if (op == 'T' || op == 't') gpu_op = GPUBLAS_OP_T; else if (op == 'C' || op == 'c') gpu_op = GPUBLAS_OP_C; else assert(0); return gpu_op; } PyObject* scal_gpu(PyObject *self, PyObject *args) { Py_complex alpha; void *x_gpu; PyObject *x_shape; PyArray_Descr *type; if (!PyArg_ParseTuple(args, "DnOO", &alpha, &x_gpu, &x_shape, &type)) return NULL; int n = (int) PyLong_AsLong(PyTuple_GetItem(x_shape, 0)); Py_ssize_t nd = PyTuple_Size(x_shape); for (int d=1; d < nd; d++) n *= (int) PyLong_AsLong(PyTuple_GetItem(x_shape, d)); int incx = 1; if (type->type_num == NPY_DOUBLE) { gpublasSafeCall( gpublasDscal(_gpaw_gpublas_handle, n, &alpha.real, (double*) x_gpu, incx)); } else { gpublasDoubleComplex alpha_gpu = {alpha.real, alpha.imag}; gpublasSafeCall( gpublasZscal(_gpaw_gpublas_handle, n, &alpha_gpu, (gpublasDoubleComplex*) x_gpu, incx)); } if (PyErr_Occurred()) return NULL; else Py_RETURN_NONE; } static void _mmm_gpu(gpublasOperation_t gpu_opa, gpublasOperation_t gpu_opb, int m, int n, int k, Py_complex alpha, void *a, int lda, void *b, int ldb, Py_complex beta, void *c, int ldc, int real) { if (real) { gpublasSafeCall( gpublasDgemm(_gpaw_gpublas_handle, gpu_opa, gpu_opb, m, n, k, &(alpha.real), (double*) a, lda, (double*) b, ldb, &(beta.real), (double*) c, ldc)); } else { gpublasDoubleComplex alpha_gpu = {alpha.real, alpha.imag}; gpublasDoubleComplex beta_gpu = {beta.real, beta.imag}; gpublasSafeCall( gpublasZgemm(_gpaw_gpublas_handle, gpu_opa, gpu_opb, m, n, k, &alpha_gpu, (gpublasDoubleComplex*) a, lda, (gpublasDoubleComplex*) b, ldb, &beta_gpu, (gpublasDoubleComplex*) c, ldc)); } } PyObject* mmm_gpu(PyObject *self, PyObject *args) { Py_complex alpha; void *b; int ldb; int opb; void *a; int lda; int opa; Py_complex beta; void *c; int ldc; int bytes; int m, n, k; if (!PyArg_ParseTuple(args, "DniCniCDniiiii", &alpha, &b, &ldb, &opb, &a, &lda, &opa, &beta, &c, &ldc, &bytes, &m, &n, &k)) return NULL; gpublasOperation_t gpu_opa = gpublas_operation(opa); gpublasOperation_t gpu_opb = gpublas_operation(opb); int real = (bytes == NPY_SIZEOF_DOUBLE); _mmm_gpu(gpu_opa, gpu_opb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, real); Py_RETURN_NONE; } static void _gemm_gpu(gpublasOperation_t transa_c, int m, int n, int k, Py_complex alpha, void *a_gpu, int lda, void *b_gpu, int ldb, Py_complex beta, void *c_gpu, int ldc, int real) { _mmm_gpu(transa_c, GPUBLAS_OP_N, m, n, k, alpha, a_gpu, lda, b_gpu, ldb, beta, c_gpu, ldc, real); } PyObject* gemm_gpu(PyObject *self, PyObject *args) { Py_complex alpha; Py_complex beta; void *a_gpu; void *b_gpu; void *c_gpu; PyObject *a_shape, *b_shape, *c_shape; PyArray_Descr *type; int transa = 'n'; if (!PyArg_ParseTuple(args, "DnOnODnOO|Ci", &alpha, &a_gpu, &a_shape, &b_gpu, &b_shape, &beta, &c_gpu, &c_shape, &type, &transa)) return NULL; int real = 0; if (type->type_num == NPY_DOUBLE) { real = 1; } gpublasOperation_t transa_c = gpublas_operation(transa); int m, k, lda, ldb, ldc; int n = (int) PyLong_AsLong(PyTuple_GetItem(b_shape, 0)); if (transa == 'n') { m = (int) PyLong_AsLong(PyTuple_GetItem(a_shape, 1)); for (int i=2; i < PyTuple_Size(a_shape); i++) m *= (int) PyLong_AsLong(PyTuple_GetItem(a_shape, i)); k = (int) PyLong_AsLong(PyTuple_GetItem(a_shape, 0)); lda = m; ldb = k; ldc = m; } else { k = (int) PyLong_AsLong(PyTuple_GetItem(a_shape, 1)); for (int i=2; i < PyTuple_Size(a_shape); i++) k *= (int) PyLong_AsLong(PyTuple_GetItem(a_shape, i)); m = (int) PyLong_AsLong(PyTuple_GetItem(a_shape, 0)); lda = k; ldb = k; ldc = m; } _gemm_gpu(transa_c, m, n, k, alpha, a_gpu, lda, b_gpu, ldb, beta, c_gpu, ldc, real); if (PyErr_Occurred()) return NULL; else Py_RETURN_NONE; } PyObject* gemv_gpu(PyObject *self, PyObject *args) { Py_complex alpha; void *a_gpu; void *x_gpu; void *y_gpu; Py_complex beta; PyObject *a_shape, *x_shape; PyArray_Descr *type; int trans = 't'; if (!PyArg_ParseTuple(args, "DnOnODnO|C", &alpha, &a_gpu, &a_shape, &x_gpu, &x_shape, &beta, &y_gpu, &type, &trans)) return NULL; gpublasOperation_t trans_c = gpublas_operation(trans); int m, n, lda, incx, incy; if (trans == 'n') { m = (int) PyLong_AsLong(PyTuple_GetItem(a_shape, 1)); for (int i=2; i < PyTuple_Size(a_shape); i++) m *= (int) PyLong_AsLong(PyTuple_GetItem(a_shape, i)); n = (int) PyLong_AsLong(PyTuple_GetItem(a_shape, 0)); lda = m; } else { n = (int) PyLong_AsLong(PyTuple_GetItem(a_shape, 0)); for (int i=1; i < PyTuple_Size(a_shape) - 1; i++) n *= (int) PyLong_AsLong(PyTuple_GetItem(a_shape, i)); m = (int) PyLong_AsLong( PyTuple_GetItem(a_shape, PyTuple_Size(a_shape) - 1)); lda = m; } incx = 1; incy = 1; if (type->type_num == NPY_DOUBLE) { gpublasSafeCall( gpublasDgemv(_gpaw_gpublas_handle, trans_c, m, n, &alpha.real, (double*) a_gpu, lda, (double*) x_gpu, incx, &beta.real, (double*) y_gpu, incy)); } else { gpublasDoubleComplex alpha_gpu = {alpha.real, alpha.imag}; gpublasDoubleComplex beta_gpu = {beta.real, beta.imag}; gpublasSafeCall( gpublasZgemv(_gpaw_gpublas_handle, trans_c, m, n, &alpha_gpu, (gpublasDoubleComplex*) a_gpu, lda, (gpublasDoubleComplex*) x_gpu, incx, &beta_gpu, (gpublasDoubleComplex*) y_gpu, incy)); } if (PyErr_Occurred()) return NULL; else Py_RETURN_NONE; } PyObject* axpy_gpu(PyObject *self, PyObject *args) { Py_complex alpha; void *x_gpu; void *y_gpu; PyObject *x_shape,*y_shape; PyArray_Descr *type; if (!PyArg_ParseTuple(args, "DnOnOO", &alpha, &x_gpu, &x_shape, &y_gpu, &y_shape, &type)) return NULL; Py_ssize_t nd = PyTuple_Size(x_shape); int n = (int) PyLong_AsLong(PyTuple_GetItem(x_shape, 0)); for (int d=1; d < nd; d++) n *= (int) PyLong_AsLong(PyTuple_GetItem(x_shape, d)); int incx = 1; int incy = 1; if (type->type_num == NPY_DOUBLE) { gpublasSafeCall( gpublasDaxpy(_gpaw_gpublas_handle, n, &alpha.real, (double*) x_gpu, incx, (double*) y_gpu, incy)); } else { gpublasDoubleComplex alpha_gpu = {alpha.real, alpha.imag}; gpublasSafeCall( gpublasZaxpy(_gpaw_gpublas_handle, n, &alpha_gpu, (gpublasDoubleComplex*) x_gpu, incx, (gpublasDoubleComplex*) y_gpu, incy)); } if (PyErr_Occurred()) return NULL; else Py_RETURN_NONE; } static void _rk_gpu(int n, int k, double alpha, void *a_gpu, int lda, double beta, void *c_gpu, int ldc, int real) { if (real) { gpublasSafeCall( gpublasDsyrk(_gpaw_gpublas_handle, GPUBLAS_FILL_MODE_UPPER, GPUBLAS_OP_T, n, k, &alpha, (double*) a_gpu, lda, &beta, (double*) c_gpu, ldc)); } else { gpublasSafeCall( gpublasZherk(_gpaw_gpublas_handle, GPUBLAS_FILL_MODE_UPPER, GPUBLAS_OP_C, n, k, &alpha, (gpublasDoubleComplex*) a_gpu, lda, &beta, (gpublasDoubleComplex*) c_gpu, ldc)); } } PyObject* rk_gpu(PyObject *self, PyObject *args) { double alpha; double beta; void *a_gpu; void *c_gpu; PyObject *a_shape, *c_shape; PyArray_Descr *type; if (!PyArg_ParseTuple(args, "dnOdnOO|i", &alpha, &a_gpu, &a_shape, &beta, &c_gpu, &c_shape, &type)) return NULL; int real = 0; if (type->type_num == NPY_DOUBLE) { real = 1; } int n = (int) PyLong_AsLong(PyTuple_GetItem(a_shape, 0)); int k = (int) PyLong_AsLong(PyTuple_GetItem(a_shape, 1)); for (int d=2; d < PyTuple_Size(a_shape); d++) k *= (int) PyLong_AsLong(PyTuple_GetItem(a_shape, d)); int ldc = n; int lda = k; _rk_gpu(n, k, alpha, a_gpu, lda, beta, c_gpu, ldc, real); if (PyErr_Occurred()) return NULL; else Py_RETURN_NONE; } static void _r2k_gpu(int n, int k, Py_complex alpha, void *a_gpu, int lda, void *b_gpu, double beta, void *c_gpu, int ldc, int real) { if (real) { gpublasSafeCall( gpublasDsyr2k(_gpaw_gpublas_handle, GPUBLAS_FILL_MODE_UPPER, GPUBLAS_OP_T, n, k, &alpha.real, (double*) a_gpu, lda, (double*) b_gpu, lda, &beta, (double*) c_gpu, ldc)); } else { gpublasDoubleComplex alpha_gpu = {alpha.real, alpha.imag}; gpublasSafeCall( gpublasZher2k(_gpaw_gpublas_handle, GPUBLAS_FILL_MODE_UPPER, GPUBLAS_OP_C, n, k, &alpha_gpu, (gpublasDoubleComplex*) a_gpu, lda, (gpublasDoubleComplex*) b_gpu, lda, &beta, (gpublasDoubleComplex*) c_gpu, ldc)); } } PyObject* r2k_gpu(PyObject *self, PyObject *args) { Py_complex alpha; double beta; void *a_gpu; void *b_gpu; void *c_gpu; PyObject *a_shape, *b_shape, *c_shape; PyArray_Descr *type; if (!PyArg_ParseTuple(args, "DnOnOdnOO|i", &alpha, &a_gpu, &a_shape, &b_gpu, &b_shape, &beta, &c_gpu, &c_shape, &type)) return NULL; int real = 0; if (type->type_num == NPY_DOUBLE) { real = 1; } int n = (int) PyLong_AsLong(PyTuple_GetItem(a_shape, 0)); int k = (int) PyLong_AsLong(PyTuple_GetItem(a_shape, 1)); for (int d=2; d < PyTuple_Size(a_shape); d++) k *= (int) PyLong_AsLong(PyTuple_GetItem(a_shape, d)); int ldc = n; int lda = k; _r2k_gpu(n, k, alpha, a_gpu, lda, b_gpu, beta, c_gpu, ldc, real); if (PyErr_Occurred()) return NULL; else Py_RETURN_NONE; } PyObject* dotc_gpu(PyObject *self, PyObject *args) { void *a_gpu; void *b_gpu; PyObject *a_shape; PyArray_Descr *type; if (!PyArg_ParseTuple(args, "nOnO", &a_gpu, &a_shape, &b_gpu, &type)) return NULL; int n = (int) PyLong_AsLong(PyTuple_GetItem(a_shape, 0)); for (int i=1; i < PyTuple_Size(a_shape); i++) n *= (int) PyLong_AsLong(PyTuple_GetItem(a_shape, i)); int incx = 1; int incy = 1; if (type->type_num == NPY_DOUBLE) { double result; gpublasSafeCall( gpublasDdot(_gpaw_gpublas_handle, n, (double*) a_gpu, incx, (double*) b_gpu, incy, &result)); if (PyErr_Occurred()) return NULL; else return PyFloat_FromDouble(result); } else { gpublasDoubleComplex result; gpublasSafeCall( gpublasZdotc(_gpaw_gpublas_handle, n, (gpublasDoubleComplex*) a_gpu, incx, (gpublasDoubleComplex*) b_gpu, incy, &result)); if (PyErr_Occurred()) return NULL; else return PyComplex_FromDoubles(result.x,result.y); } } PyObject* dotu_gpu(PyObject *self, PyObject *args) { void *a_gpu; void *b_gpu; PyObject *a_shape; PyArray_Descr *type; if (!PyArg_ParseTuple(args, "nOnO", &a_gpu, &a_shape, &b_gpu, &type)) return NULL; int n = (int) PyLong_AsLong(PyTuple_GetItem(a_shape, 0)); for (int i=1; i < PyTuple_Size(a_shape); i++) n *= (int) PyLong_AsLong(PyTuple_GetItem(a_shape, i)); int incx = 1; int incy = 1; if (type->type_num == NPY_DOUBLE) { double result; gpublasSafeCall( gpublasDdot(_gpaw_gpublas_handle, n, (double*) a_gpu, incx, (double*) b_gpu, incy, &result)); if (PyErr_Occurred()) return NULL; else return PyFloat_FromDouble(result); } else { gpublasDoubleComplex result; gpublasSafeCall( gpublasZdotu(_gpaw_gpublas_handle, n, (gpublasDoubleComplex*) a_gpu, incx, (gpublasDoubleComplex*) b_gpu, incy, &result)); if (PyErr_Occurred()) return NULL; else return PyComplex_FromDoubles(result.x,result.y); } } gpaw-24.1.0/c/gpu/bmgs.h000066400000000000000000000102051454550013000146450ustar00rootroot00000000000000#ifndef GPU_BMGS_H #define GPU_BMGS_H #include "gpu.h" #include "gpu-complex.h" int bmgs_fd_boundary_test(const bmgsstencil_gpu* s, int boundary, int ndouble); bmgsstencil_gpu bmgs_stencil_to_gpu(bmgsstencil *s); void bmgs_fd_gpu(const bmgsstencil_gpu* s, const double* adev, double* bdev, int boundary, int blocks, gpuStream_t stream); void bmgs_relax_gpu(const int relax_method, const bmgsstencil_gpu* s, double* adev, double* bdev, const double* src, const double w, int boundary, gpuStream_t stream); void bmgs_cut_gpu(const double* a, const int n[3], const int c[3], double* b, const int m[3],int blocks, gpuStream_t stream); void bmgs_paste_gpu(const double* a, const int n[3], double* b, const int m[3], const int c[3], int blocks, gpuStream_t stream); void bmgs_paste_zero_gpu(const double* a, const int n[3], double* b, const int m[3], const int c[3], int blocks, gpuStream_t stream); void bmgs_translate_gpu(double* a, const int sizea[3], const int size[3], const int start1[3], const int start2[3], int blocks, gpuStream_t stream); void bmgs_restrict_gpu(int k, double* a, const int n[3], double* b, const int nb[3], int blocks); void bmgs_restrict_stencil_gpu(int k, double* a, const int na[3], double* b, const int nb[3], double* w, int blocks); void bmgs_interpolate_gpu(int k, int skip[3][2], const double* a, const int n[3], double* b, const int sizeb[3], int blocks); void bmgs_interpolate_stencil_gpu(int k, int skip[3][2], const double* a, const int sizea[3], double* b, const int sizeb[3], double* w, int blocks); // complex routines: void bmgs_fd_gpuz(const bmgsstencil_gpu* s, const gpuDoubleComplex* adev, gpuDoubleComplex* bdev, int boundary, int blocks, gpuStream_t stream); void bmgs_cut_gpuz(const gpuDoubleComplex* a, const int n[3], const int c[3], gpuDoubleComplex* b, const int m[3], gpuDoubleComplex, int blocks, gpuStream_t stream); void bmgs_paste_gpuz(const gpuDoubleComplex* a, const int n[3], gpuDoubleComplex* b, const int m[3], const int c[3], int blocks, gpuStream_t stream); void bmgs_paste_zero_gpuz(const gpuDoubleComplex* a, const int n[3], gpuDoubleComplex* b, const int m[3], const int c[3], int blocks, gpuStream_t stream); void bmgs_translate_gpuz(gpuDoubleComplex* a, const int sizea[3], const int size[3], const int start1[3], const int start2[3], gpuDoubleComplex, int blocks, gpuStream_t stream); void bmgs_restrict_gpuz(int k, gpuDoubleComplex* a, const int n[3], gpuDoubleComplex* b, const int nb[3], int blocks); void bmgs_restrict_stencil_gpuz(int k, gpuDoubleComplex* a, const int na[3], gpuDoubleComplex* b, const int nb[3], gpuDoubleComplex* w, int blocks); void bmgs_interpolate_gpuz(int k, int skip[3][2], const gpuDoubleComplex* a, const int n[3], gpuDoubleComplex* b, const int sizeb[3], int blocks); void bmgs_interpolate_stencil_gpuz(int k, int skip[3][2], const gpuDoubleComplex* a, const int sizea[3], gpuDoubleComplex* b, const int sizeb[3], gpuDoubleComplex* w, int blocks); void reducemap_dotuz(const gpuDoubleComplex* a_gpu, const gpuDoubleComplex* b_gpu, gpuDoubleComplex* result, int n, int nvec); #endif gpaw-24.1.0/c/gpu/cuda.h000066400000000000000000000115741454550013000146430ustar00rootroot00000000000000#ifndef GPU_CUDA_H #define GPU_CUDA_H #include #include #include #define gpuMemcpyKind cudaMemcpyKind #define gpuMemcpyDeviceToHost cudaMemcpyDeviceToHost #define gpuMemcpyHostToDevice cudaMemcpyHostToDevice #define gpuSuccess cudaSuccess #define gpuEventDefault cudaEventDefault #define gpuEventBlockingSync cudaEventBlockingSync #define gpuEventDisableTiming cudaEventDisableTiming #define gpuStream_t cudaStream_t #define gpuEvent_t cudaEvent_t #define gpuError_t cudaError_t #define gpuDeviceProp cudaDeviceProp #define gpuDoubleComplex cuDoubleComplex #define gpublasDoubleComplex cuDoubleComplex #define make_gpuDoubleComplex make_cuDoubleComplex #define gpuCreal cuCreal #define gpuCimag cuCimag #define gpuCadd cuCadd #define gpuCsub cuCsub #define gpuCmul cuCmul #define gpuConj cuConj #define gpuCheckLastError() gpuSafeCall(cudaGetLastError()) #define gpuGetErrorString(err) cudaGetErrorString(err) #define gpuSetDevice(id) gpuSafeCall(cudaSetDevice(id)) #define gpuGetDevice(dev) gpuSafeCall(cudaGetDevice(dev)) #define gpuGetDeviceProperties(prop, dev) \ gpuSafeCall(cudaGetDeviceProperties(prop, dev)) #define gpuDeviceSynchronize() gpuSafeCall(cudaDeviceSynchronize()) #define gpuFree(p) if ((p) != NULL) gpuSafeCall(cudaFree(p)) #define gpuFreeHost(p) if ((p) != NULL) gpuSafeCall(cudaFreeHost(p)) #define gpuMalloc(pp, size) gpuSafeCall(cudaMalloc((void**) (pp), size)) #define gpuHostAlloc(pp, size) \ gpuSafeCall(cudaHostAlloc((void**) (pp), size, cudaHostAllocPortable)) #define gpuMemcpy(dst, src, count, kind) \ gpuSafeCall(cudaMemcpy(dst, src, count, kind)) #define gpuMemcpyAsync(dst, src, count, kind, stream) \ gpuSafeCall(cudaMemcpyAsync(dst, src, count, kind, stream)) #define gpuStreamCreate(stream) gpuSafeCall(cudaStreamCreate(stream)) #define gpuStreamDestroy(stream) gpuSafeCall(cudaStreamDestroy(stream)) #define gpuStreamWaitEvent(stream, event, flags) \ gpuSafeCall(cudaStreamWaitEvent(stream, event, flags)) #define gpuStreamSynchronize(stream) \ gpuSafeCall(cudaStreamSynchronize(stream)) #define gpuEventCreate(event) gpuSafeCall(cudaEventCreate(event)) #define gpuEventCreateWithFlags(event, flags) \ gpuSafeCall(cudaEventCreateWithFlags(event, flags)) #define gpuEventDestroy(event) gpuSafeCall(cudaEventDestroy(event)) #define gpuEventQuery(event) cudaEventQuery(event) #define gpuEventRecord(event, stream) \ gpuSafeCall(cudaEventRecord(event, stream)) #define gpuEventSynchronize(event) \ gpuSafeCall(cudaEventSynchronize(event)) #define gpuEventElapsedTime(ms, start, end) \ gpuSafeCall(cudaEventElapsedTime(ms, start, end)) #define gpuLaunchKernel(kernel, dimGrid, dimBlock, shared, stream, ...) \ kernel<<>>(__VA_ARGS__) #define gpublasStatus_t cublasStatus_t #define gpublasHandle_t cublasHandle_t #define gpublasOperation_t cublasOperation_t #define gpublasCreate cublasCreate #define gpublasSetStream cublasSetStream #define gpublasGetMatrixAsync cublasGetMatrixAsync #define gpublasSetMatrixAsync cublasSetMatrixAsync #define gpublasDsyrk cublasDsyrk #define gpublasDsyr2k cublasDsyr2k #define gpublasDscal cublasDscal #define gpublasZscal cublasZscal #define gpublasDgemm cublasDgemm #define gpublasZgemm cublasZgemm #define gpublasDgemv cublasDgemv #define gpublasZgemv cublasZgemv #define gpublasDaxpy cublasDaxpy #define gpublasZaxpy cublasZaxpy #define gpublasZherk cublasZherk #define gpublasZher2k cublasZher2k #define gpublasDdot cublasDdot #define gpublasZdotc cublasZdotc #define gpublasZdotu cublasZdotu #define GPUBLAS_OP_N CUBLAS_OP_N #define GPUBLAS_OP_T CUBLAS_OP_T #define GPUBLAS_OP_C CUBLAS_OP_C #define GPUBLAS_FILL_MODE_UPPER CUBLAS_FILL_MODE_UPPER #define GPUBLAS_STATUS_SUCCESS CUBLAS_STATUS_SUCCESS #define GPUBLAS_STATUS_NOT_INITIALIZED CUBLAS_STATUS_NOT_INITIALIZED #define GPUBLAS_STATUS_ALLOC_FAILED CUBLAS_STATUS_ALLOC_FAILED #define GPUBLAS_STATUS_INVALID_VALUE CUBLAS_STATUS_INVALID_VALUE #define GPUBLAS_STATUS_ARCH_MISMATCH CUBLAS_STATUS_ARCH_MISMATCH #define GPUBLAS_STATUS_MAPPING_ERROR CUBLAS_STATUS_MAPPING_ERROR #define GPUBLAS_STATUS_EXECUTION_FAILED CUBLAS_STATUS_EXECUTION_FAILED #define GPUBLAS_STATUS_INTERNAL_ERROR CUBLAS_STATUS_INTERNAL_ERROR #endif gpaw-24.1.0/c/gpu/debug.c000066400000000000000000000025021454550013000147770ustar00rootroot00000000000000#include "../bmgs/bmgs.h" void bmgs_paste_cpu(const double *a_cpu, const int sizea[3], double *b_cpu, const int sizeb[3], const int startb[3]) { bmgs_paste(a_cpu, sizea, b_cpu, sizeb, startb); } void bmgs_pastez_cpu(const double *a_cpu, const int sizea[3], double *b_cpu, const int sizeb[3], const int startb[3]) { bmgs_pastez((const double_complex*) a_cpu, sizea, (double_complex*) b_cpu, sizeb, startb); } void bmgs_cut_cpu(const double *a_cpu, const int sizea[3], const int starta[3], double *b_cpu, const int sizeb[3]) { bmgs_cut(a_cpu, sizea, starta, b_cpu, sizeb); } void bmgs_cutz_cpu(const double *a_cpu, const int sizea[3], const int starta[3], double *b_cpu, const int sizeb[3]) { bmgs_cutz((const double_complex*) a_cpu, sizea, starta, (double_complex*) b_cpu, sizeb); } void bmgs_cutmz_cpu(const void *a_cpu, const int sizea[3], const int starta[3], void *b_cpu, const int sizeb[3], void *phase) { double_complex *ph = (double_complex *) phase; bmgs_cutmz((const double_complex *) a_cpu, sizea, starta, (double_complex *) b_cpu, sizeb, *ph); } gpaw-24.1.0/c/gpu/debug.h000066400000000000000000000021061454550013000150040ustar00rootroot00000000000000#ifndef GPU_DEBUG_H #define GPU_DEBUG_H #define GPAW_MALLOC(T, n) (T*)(malloc((n) * sizeof(T))) extern "C" void bmgs_paste_cpu(const double *a_cpu, const int sizea[3], double *b_cpu, const int sizeb[3], const int startb[3]); extern "C" void bmgs_pastez_cpu(const double *a_cpu, const int sizea[3], double *b_cpu, const int sizeb[3], const int startb[3]); extern "C" void bmgs_cut_cpu(const double *a_cpu, const int sizea[3], const int starta[3], double *b_cpu, const int sizeb[3]); extern "C" void bmgs_cutz_cpu(const double *a_cpu, const int sizea[3], const int starta[3], double *b_cpu, const int sizeb[3]); extern "C" void bmgs_cutmz_cpu(const void *a_cpu, const int sizea[3], const int starta[3], void *b_cpu, const int sizeb[3], void *phase); #endif gpaw-24.1.0/c/gpu/gpu-align.h000066400000000000000000000003311454550013000155770ustar00rootroot00000000000000#ifndef GPU_ALIGN_H #define GPU_ALIGN_H #if defined(__CUDACC__) #define ALIGN(x) __align__(x) #else #if defined(__GNUC__) #define ALIGN(x) __attribute__ ((aligned (x))) #else #define ALIGN(x) #endif #endif #endif gpaw-24.1.0/c/gpu/gpu-complex.h000066400000000000000000000046161454550013000161660ustar00rootroot00000000000000#ifdef GPAW_CUDA #include #endif #ifdef GPAW_HIP #include #endif #include "gpu-runtime.h" #undef Tgpu #undef Zgpu #undef MULTD #undef MULDT #undef ADD #undef ADD3 #undef ADD4 #undef IADD #undef MAKED #undef MULTT #undef CONJ #undef REAL #undef IMAG #undef NEG #ifndef GPU_USE_COMPLEX # define Tgpu double # define Zgpu(f) f # define MULTT(a,b) ((a) * (b)) # define MULTD(a,b) ((a) * (b)) # define MULDT(a,b) ((a) * (b)) # define ADD(a,b) ((a) + (b)) # define ADD3(a,b,c) ((a) + (b) + (c)) # define ADD4(a,b,c,d) ((a) + (b) + (c) + (d)) # define IADD(a,b) ((a) += (b)) # define MAKED(a) (a) # define CONJ(a) (a) # define REAL(a) (a) # define IMAG(a) (0) # define NEG(a) (-(a)) #else # define Tgpu gpuDoubleComplex # define Zgpu(f) f ## z # define MULTT(a,b) gpuCmul((a), (b)) # define MULTD(a,b) gpuCmulD((a), (b)) # define MULDT(b,a) MULTD((a), (b)) # define ADD(a,b) gpuCadd((a), (b)) # define ADD3(a,b,c) gpuCadd3((a), (b), (c)) # define ADD4(a,b,c,d) gpuCadd4((a), (b), (c), (d)) # define IADD(a,b) {(a).x += gpuCreal(b); (a).y += gpuCimag(b);} # define MAKED(a) make_gpuDoubleComplex(a, 0) # define CONJ(a) gpuConj(a) # define REAL(a) gpuCreal(a) # define IMAG(a) gpuCimag(a) # define NEG(a) gpuCneg(a) #endif #ifndef GPU_COMPLEX_H #define GPU_COMPLEX_H __host__ __device__ static __inline__ gpuDoubleComplex gpuCmulD( gpuDoubleComplex x, double y) { return make_gpuDoubleComplex(gpuCreal(x) * y, gpuCimag(x) * y); } __host__ __device__ static __inline__ gpuDoubleComplex gpuCneg( gpuDoubleComplex x) { return make_gpuDoubleComplex(-gpuCreal(x), -gpuCimag(x)); } __host__ __device__ static __inline__ gpuDoubleComplex gpuCadd3( gpuDoubleComplex x, gpuDoubleComplex y, gpuDoubleComplex z) { return make_gpuDoubleComplex(gpuCreal(x) + gpuCreal(y) + gpuCreal(z), gpuCimag(x) + gpuCimag(y) + gpuCimag(z)); } __host__ __device__ static __inline__ gpuDoubleComplex gpuCadd4( gpuDoubleComplex x, gpuDoubleComplex y, gpuDoubleComplex z, gpuDoubleComplex w) { return make_gpuDoubleComplex( gpuCreal(x) + gpuCreal(y) + gpuCreal(z) + gpuCreal(w), gpuCimag(x) + gpuCimag(y) + gpuCimag(z) + gpuCimag(w)); } #endif gpaw-24.1.0/c/gpu/gpu-runtime.h000066400000000000000000000001221454550013000161660ustar00rootroot00000000000000#ifdef GPAW_CUDA #include "cuda.h" #endif #ifdef GPAW_HIP #include "hip.h" #endif gpaw-24.1.0/c/gpu/gpu.h000066400000000000000000000123331454550013000145140ustar00rootroot00000000000000#ifndef GPU_GPU_H #define GPU_GPU_H #include #include #include #include "gpu-runtime.h" #define GPU_BLOCKS_MIN (16) #define GPU_BLOCKS_MAX (96) #define GPU_DEFAULT_BLOCK_X (32) #define GPU_DEFAULT_BLOCK_Y (16) #define GPU_ASYNC_SIZE (8*1024) #define GPU_RJOIN_SIZE (16*1024) #define GPU_SJOIN_SIZE (16*1024) #define GPU_RJOIN_SAME_SIZE (96*1024) #define GPU_SJOIN_SAME_SIZE (96*1024) #define GPU_OVERLAP_SIZE (GPU_ASYNC_SIZE) #define GPU_ERROR_ABS_TOL (1e-13) #define GPU_ERROR_ABS_TOL_EXCT (DBL_EPSILON) #define GPAW_BOUNDARY_NORMAL (1<<(0)) #define GPAW_BOUNDARY_SKIP (1<<(1)) #define GPAW_BOUNDARY_ONLY (1<<(2)) #define GPAW_BOUNDARY_X0 (1<<(3)) #define GPAW_BOUNDARY_X1 (1<<(4)) #define GPAW_BOUNDARY_Y0 (1<<(5)) #define GPAW_BOUNDARY_Y1 (1<<(6)) #define GPAW_BOUNDARY_Z0 (1<<(7)) #define GPAW_BOUNDARY_Z1 (1<<(8)) #define gpuSafeCall(err) __gpuSafeCall(err, __FILE__, __LINE__) #define gpublasSafeCall(err) __gpublasSafeCall(err, __FILE__, __LINE__) #define GPU_PITCH (16) /* in doubles */ #define NEXTPITCHDIV(n) \ (((n) > 0) ? ((n) + GPU_PITCH - 1 - ((n) - 1) % GPU_PITCH) : 0) #ifndef MAX # define MAX(a,b) (((a) > (b)) ? (a) : (b)) #endif #ifndef MIN # define MIN(a,b) (((a) < (b)) ? (a) : (b)) #endif typedef struct { int ncoefs; double* coefs_gpu; long* offsets_gpu; int ncoefs0; double* coefs0_gpu; int ncoefs1; double* coefs1_gpu; int ncoefs2; double* coefs2_gpu; double coef_relax; long n[3]; long j[3]; } bmgsstencil_gpu; #ifndef BMGS_H typedef struct { int ncoefs; double* coefs; long* offsets; long n[3]; long j[3]; } bmgsstencil; #endif static inline gpuError_t __gpuSafeCall(gpuError_t err, const char *file, int line) { if (gpuSuccess != err) { char str[100]; snprintf(str, 100, "%s(%i): GPU error: %s.\n", file, line, gpuGetErrorString(err)); PyErr_SetString(PyExc_RuntimeError, str); fprintf(stderr, "%s", str); } return err; } static inline gpublasStatus_t __gpublasSafeCall(gpublasStatus_t err, const char *file, int line) { if (GPUBLAS_STATUS_SUCCESS != err) { char str[100]; switch (err) { case GPUBLAS_STATUS_NOT_INITIALIZED: snprintf(str, 100, "%s(%i): GPU BLAS error: NOT INITIALIZED.\n", file, line); break; case GPUBLAS_STATUS_ALLOC_FAILED: snprintf(str, 100, "%s(%i): GPU BLAS error: ALLOC FAILED.\n", file, line); break; case GPUBLAS_STATUS_INVALID_VALUE: snprintf(str, 100, "%s(%i): GPU BLAS error: INVALID VALUE.\n", file, line); break; case GPUBLAS_STATUS_ARCH_MISMATCH: snprintf(str, 100, "%s(%i): GPU BLAS error: ARCH MISMATCH.\n", file, line); break; case GPUBLAS_STATUS_MAPPING_ERROR: snprintf(str, 100, "%s(%i): GPU BLAS error: MAPPING ERROR.\n", file, line); break; case GPUBLAS_STATUS_EXECUTION_FAILED: snprintf(str, 100, "%s(%i): GPU BLAS error: EXECUTION FAILED.\n", file, line); break; case GPUBLAS_STATUS_INTERNAL_ERROR: snprintf(str, 100, "%s(%i): GPU BLAS error: INTERNAL ERROR.\n", file, line); break; default: snprintf(str, 100, "%s(%i): GPU BLAS error: UNKNOWN ERROR '%X'.\n", file, line, err); } PyErr_SetString(PyExc_RuntimeError, str); fprintf(stderr, "%s", str); } return err; } static inline unsigned int nextPow2(unsigned int x) { --x; x |= x >> 1; x |= x >> 2; x |= x >> 4; x |= x >> 8; x |= x >> 16; return ++x; } #define BLOCK_GRID(hc_size) \ int blockx = MIN((int)nextPow2(hc_size.z), \ BLOCK_MAX); \ int blocky = MIN(MIN((int)nextPow2(hc_size.y), \ BLOCK_TOTALMAX / blockx), \ BLOCK_MAX); \ dim3 dimBlock(blockx, blocky); \ int gridx = ((hc_size.z + dimBlock.x - 1) / dimBlock.x); \ int xdiv = MAX(1, MIN(hc_size.x, GRID_MAX / gridx)); \ gridx = xdiv * gridx; \ int gridy = blocks * ((hc_size.y + dimBlock.y - 1) / dimBlock.y); \ dim3 dimGrid(gridx, gridy); \ #endif gpaw-24.1.0/c/gpu/hip.h000066400000000000000000000115751454550013000145100ustar00rootroot00000000000000#ifndef GPU_HIP_H #define GPU_HIP_H #include #include #define gpuMemcpyKind hipMemcpyKind #define gpuMemcpyDeviceToHost hipMemcpyDeviceToHost #define gpuMemcpyHostToDevice hipMemcpyHostToDevice #define gpuSuccess hipSuccess #define gpuEventDefault hipEventDefault #define gpuEventBlockingSync hipEventBlockingSync #define gpuEventDisableTiming hipEventDisableTiming #define gpuStream_t hipStream_t #define gpuEvent_t hipEvent_t #define gpuError_t hipError_t #define gpuDeviceProp hipDeviceProp_t #define gpuDoubleComplex hipDoubleComplex #define gpublasDoubleComplex hipblasDoubleComplex #define make_gpuDoubleComplex make_hipDoubleComplex #define gpuCreal hipCreal #define gpuCimag hipCimag #define gpuCadd hipCadd #define gpuCsub hipCsub #define gpuCmul hipCmul #define gpuConj hipConj #define gpuCheckLastError() gpuSafeCall(hipGetLastError()) #define gpuGetErrorString(err) hipGetErrorString(err) #define gpuSetDevice(id) gpuSafeCall(hipSetDevice(id)) #define gpuGetDevice(dev) gpuSafeCall(hipGetDevice(dev)) #define gpuGetDeviceProperties(prop, dev) \ gpuSafeCall(hipGetDeviceProperties(prop, dev)) #define gpuDeviceSynchronize() gpuSafeCall(hipDeviceSynchronize()) #define gpuFree(p) if ((p) != NULL) gpuSafeCall(hipFree(p)) #define gpuFreeHost(p) if ((p) != NULL) gpuSafeCall(hipHostFree(p)) #define gpuMalloc(pp, size) gpuSafeCall(hipMalloc((void**) (pp), size)) #define gpuHostAlloc(pp, size) \ gpuSafeCall(hipHostMalloc((void**) (pp), size, hipHostMallocPortable)) #define gpuMemcpy(dst, src, count, kind) \ gpuSafeCall(hipMemcpy(dst, src, count, kind)) #define gpuMemcpyAsync(dst, src, count, kind, stream) \ gpuSafeCall(hipMemcpyAsync(dst, src, count, kind, stream)) #define gpuStreamCreate(stream) gpuSafeCall(hipStreamCreate(stream)) #define gpuStreamDestroy(stream) gpuSafeCall(hipStreamDestroy(stream)) #define gpuStreamWaitEvent(stream, event, flags) \ gpuSafeCall(hipStreamWaitEvent(stream, event, flags)) #define gpuStreamSynchronize(stream) \ gpuSafeCall(hipStreamSynchronize(stream)) #define gpuEventCreate(event) gpuSafeCall(hipEventCreate(event)) #define gpuEventCreateWithFlags(event, flags) \ gpuSafeCall(hipEventCreateWithFlags(event, flags)) #define gpuEventDestroy(event) gpuSafeCall(hipEventDestroy(event)) #define gpuEventQuery(event) hipEventQuery(event) #define gpuEventRecord(event, stream) \ gpuSafeCall(hipEventRecord(event, stream)) #define gpuEventSynchronize(event) \ gpuSafeCall(hipEventSynchronize(event)) #define gpuEventElapsedTime(ms, start, end) \ gpuSafeCall(hipEventElapsedTime(ms, start, end)) #define gpuLaunchKernel(kernel, dimGrid, dimBlock, shared, stream, ...) \ kernel<<>>(__VA_ARGS__) #define gpublasStatus_t hipblasStatus_t #define gpublasHandle_t hipblasHandle_t #define gpublasOperation_t hipblasOperation_t #define gpublasCreate hipblasCreate #define gpublasSetStream hipblasSetStream #define gpublasGetMatrixAsync hipblasGetMatrixAsync #define gpublasSetMatrixAsync hipblasSetMatrixAsync #define gpublasDsyrk hipblasDsyrk #define gpublasDsyr2k hipblasDsyr2k #define gpublasDscal hipblasDscal #define gpublasZscal hipblasZscal #define gpublasDgemm hipblasDgemm #define gpublasZgemm hipblasZgemm #define gpublasDgemv hipblasDgemv #define gpublasZgemv hipblasZgemv #define gpublasDaxpy hipblasDaxpy #define gpublasZaxpy hipblasZaxpy #define gpublasZherk hipblasZherk #define gpublasZher2k hipblasZher2k #define gpublasDdot hipblasDdot #define gpublasZdotc hipblasZdotc #define gpublasZdotu hipblasZdotu #define GPUBLAS_OP_N HIPBLAS_OP_N #define GPUBLAS_OP_T HIPBLAS_OP_T #define GPUBLAS_OP_C HIPBLAS_OP_C #define GPUBLAS_FILL_MODE_UPPER HIPBLAS_FILL_MODE_UPPER #define GPUBLAS_STATUS_SUCCESS HIPBLAS_STATUS_SUCCESS #define GPUBLAS_STATUS_NOT_INITIALIZED HIPBLAS_STATUS_NOT_INITIALIZED #define GPUBLAS_STATUS_ALLOC_FAILED HIPBLAS_STATUS_ALLOC_FAILED #define GPUBLAS_STATUS_INVALID_VALUE HIPBLAS_STATUS_INVALID_VALUE #define GPUBLAS_STATUS_ARCH_MISMATCH HIPBLAS_STATUS_ARCH_MISMATCH #define GPUBLAS_STATUS_MAPPING_ERROR HIPBLAS_STATUS_MAPPING_ERROR #define GPUBLAS_STATUS_EXECUTION_FAILED HIPBLAS_STATUS_EXECUTION_FAILED #define GPUBLAS_STATUS_INTERNAL_ERROR HIPBLAS_STATUS_INTERNAL_ERROR #endif gpaw-24.1.0/c/gpu/kernels/000077500000000000000000000000001454550013000152115ustar00rootroot00000000000000gpaw-24.1.0/c/gpu/kernels/cut.cpp000066400000000000000000000052521454550013000165140ustar00rootroot00000000000000#include #include "../gpu.h" #include "../gpu-complex.h" #ifndef GPU_USE_COMPLEX #define BLOCK_MAX 32 #define GRID_MAX 65535 #define BLOCK_TOTALMAX 256 #endif /* * GPU kernel to copy a slice of an array. */ __global__ void Zgpu(bmgs_cut_kernel)( const Tgpu* a, const int3 c_sizea, Tgpu* b, const int3 c_sizeb, #ifdef GPU_USE_COMPLEX gpuDoubleComplex phase, #endif int blocks, int xdiv) { int xx = gridDim.x / xdiv; int yy = gridDim.y / blocks; int blocksi = blockIdx.y / yy; int i1 = (blockIdx.y - blocksi * yy) * blockDim.y + threadIdx.y; int xind = blockIdx.x / xx; int i2 = (blockIdx.x - xind * xx) * blockDim.x + threadIdx.x; b += i2 + (i1 + (xind + blocksi * c_sizeb.x) * c_sizeb.y) * c_sizeb.z; a += i2 + (i1 + (xind + blocksi * c_sizea.x) * c_sizea.y) * c_sizea.z; while (xind < c_sizeb.x) { if ((i2 < c_sizeb.z) && (i1 < c_sizeb.y)) { #ifndef GPU_USE_COMPLEX b[0] = a[0]; #else b[0] = MULTT(phase, a[0]); #endif } b += xdiv * c_sizeb.y * c_sizeb.z; a += xdiv * c_sizea.y * c_sizea.z; xind += xdiv; } } /* * Copy a slice of an array on the GPU. If the array contains complex * numbers, then multiply each element with the given phase. * * For example: * . . . . (OR for complex numbers) * a = . 1 2 . -> b = 1 2 -> b = phase*1 phase*2 * . 3 4 . 3 4 phase*3 phase*4 * . . . . * * arguments: * a -- input array * sizea -- dimensions of the array a * starta -- offset to the start of the slice * b -- output array * sizeb -- dimensions of the array b * phase -- phase (only for complex) * blocks -- number of blocks * stream -- GPU stream to use */ extern "C" void Zgpu(bmgs_cut_gpu)( const Tgpu* a, const int sizea[3], const int starta[3], Tgpu* b, const int sizeb[3], #ifdef GPU_USE_COMPLEX gpuDoubleComplex phase, #endif int blocks, gpuStream_t stream) { if (!(sizea[0] && sizea[1] && sizea[2])) return; int3 hc_sizea, hc_sizeb; hc_sizea.x = sizea[0]; hc_sizea.y = sizea[1]; hc_sizea.z = sizea[2]; hc_sizeb.x = sizeb[0]; hc_sizeb.y = sizeb[1]; hc_sizeb.z = sizeb[2]; BLOCK_GRID(hc_sizeb); a += starta[2] + (starta[1] + starta[0] * hc_sizea.y) * hc_sizea.z; gpuLaunchKernel(Zgpu(bmgs_cut_kernel), dimGrid, dimBlock, 0, stream, (Tgpu*) a, hc_sizea, (Tgpu*) b, hc_sizeb, #ifdef GPU_USE_COMPLEX phase, #endif blocks, xdiv); gpuCheckLastError(); } #ifndef GPU_USE_COMPLEX #define GPU_USE_COMPLEX #include "cut.cpp" #endif gpaw-24.1.0/c/gpu/kernels/elementwise.cpp000066400000000000000000000145321454550013000202430ustar00rootroot00000000000000#include #define PY_ARRAY_UNIQUE_SYMBOL GPAW_ARRAY_API #define NO_IMPORT_ARRAY #include #include "../gpu.h" #include "../gpu-complex.h" #ifndef GPU_USE_COMPLEX # define BLOCK_X 128 # define MAX_BLOCKS (65535) #endif /* * GPU kernel for axpbyz, i.e. z[i] = a * x[i] + b * y[i] */ __global__ void axpbyz_kernel(double a, double *x, double b, double *y, double *z, int n) { int tid = threadIdx.x + blockIdx.x * blockDim.x; int stride = gridDim.x * blockDim.x; for (; tid < n; tid += stride) { z[tid] = a * x[tid] + b * y[tid]; } } /* * GPU kernel for axpbz, i.e. z[i] = a * x[i] + b */ __global__ void axpbz_kernel(double a, double *x, double b, double *z, int n) { int tid = threadIdx.x + blockIdx.x * blockDim.x; int stride = gridDim.x * blockDim.x; for (; tid < n; tid += stride) { z[tid] = a * x[tid] + b; } } /* * GPU kernel for axpbyz, i.e. z[i] = a * x[i] + b * y[i], * on complex numbers. */ __global__ void axpbyz_kernelz(double a, gpuDoubleComplex *x, double b, gpuDoubleComplex *y, gpuDoubleComplex *z, int n) { int tid = threadIdx.x + blockIdx.x * blockDim.x; int stride = gridDim.x * blockDim.x; for (; tid < n; tid += stride) { (z[tid]).x = a * gpuCreal(x[tid]) + b * gpuCreal(y[tid]); (z[tid]).y = a * gpuCimag(x[tid]) + b * gpuCimag(y[tid]); } } /* * GPU kernel for axpbz, i.e. z[i] = a * x[i] + b, on complex numbers. */ __global__ void axpbz_kernelz(double a, gpuDoubleComplex *x, double b, gpuDoubleComplex *z, int n) { int tid = threadIdx.x + blockIdx.x * blockDim.x; int stride = gridDim.x * blockDim.x; for (; tid < n; tid += stride) { (z[tid]).x = a * gpuCreal(x[tid]) + b; (z[tid]).y = a * gpuCimag(x[tid]) + b; } } /* * GPU kernel to fill an array of doubles with a given value. */ __global__ void fill_kernel(double a, double *z, int n) { int tid = threadIdx.x + blockIdx.x * blockDim.x; int stride = gridDim.x * blockDim.x; for (; tid < n; tid += stride) { z[tid] = a; } } /* * GPU kernel to fill an array of complex numbers with a given * complex value. */ __global__ void fill_kernelz(double real, double imag, gpuDoubleComplex *z, int n) { int tid = threadIdx.x + blockIdx.x * blockDim.x; int stride = gridDim.x * blockDim.x; for (; tid < n; tid += stride) { (z[tid]).x = real; (z[tid]).y = imag; } } /* * GPU version of axpbyz, i.e. z[i] = a * x[i] + b * y[i] * * Arguments: * a, x, b, y, z -- (as above) * shape -- shape of the arrays * type -- datatype of elements in the arrays */ extern "C" PyObject* axpbyz_gpu(PyObject *self, PyObject *args) { double a, b; void *x; void *y; void *z; PyObject *shape; PyArray_Descr *type; if (!PyArg_ParseTuple(args, "dndnnOO", &a, &x, &b, &y, &z, &shape, &type)) return NULL; int n = 1; Py_ssize_t nd = PyTuple_Size(shape); for (int d=0; d < nd; d++) n *= (int) PyLong_AsLong(PyTuple_GetItem(shape, d)); int gridx = MIN(MAX((n + BLOCK_X - 1) / BLOCK_X, 1), MAX_BLOCKS); dim3 dimBlock(BLOCK_X, 1); dim3 dimGrid(gridx, 1); if (type->type_num == NPY_DOUBLE) { gpuLaunchKernel(axpbyz_kernel, dimGrid, dimBlock, 0, 0, a, (double*) x, b, (double*) y, (double *) z, n); } else { gpuLaunchKernel(axpbyz_kernelz, dimGrid, dimBlock, 0, 0, a, (gpuDoubleComplex*) x, b, (gpuDoubleComplex*) y, (gpuDoubleComplex*) z, n); } gpuCheckLastError(); if (PyErr_Occurred()) return NULL; else Py_RETURN_NONE; } /* * GPU version of axpbz, i.e. z[i] = a * x[i] + b * * Arguments: * a, x, b, z -- (as above) * shape -- shape of the arrays * type -- datatype of elements in the arrays */ extern "C" PyObject* axpbz_gpu(PyObject *self, PyObject *args) { double a, b; void *x; void *z; PyObject *shape; PyArray_Descr *type; if (!PyArg_ParseTuple(args, "dndnOO", &a, &x, &b, &z, &shape, &type)) return NULL; int n = 1; Py_ssize_t nd = PyTuple_Size(shape); for (int d=0; d < nd; d++) n *= (int) PyLong_AsLong(PyTuple_GetItem(shape, d)); int gridx = MIN(MAX((n + BLOCK_X - 1) / BLOCK_X, 1), MAX_BLOCKS); dim3 dimBlock(BLOCK_X, 1); dim3 dimGrid(gridx, 1); if (type->type_num == NPY_DOUBLE) { gpuLaunchKernel(axpbz_kernel, dimGrid, dimBlock, 0, 0, a, (double*) x, b, (double *) z, n); } else { gpuLaunchKernel(axpbz_kernelz, dimGrid, dimBlock, 0, 0, a, (gpuDoubleComplex*) x, b, (gpuDoubleComplex*) z, n); } gpuCheckLastError(); if (PyErr_Occurred()) return NULL; else Py_RETURN_NONE; } /* * Fill a GPU array with a given value, i.e. x[i] = value * * Arguments: * x, value -- (as above) * shape -- shape of the arrays * type -- datatype of elements in the arrays */ extern "C" PyObject* fill_gpu(PyObject *self, PyObject *args) { PyObject *value; void *x; PyObject *shape; PyArray_Descr *type; if (!PyArg_ParseTuple(args, "OnOO", &value, &x, &shape, &type)) return NULL; double real; double imag; if (PyComplex_Check(value)) { Py_complex c; c = PyComplex_AsCComplex(value); real = c.real; imag = c.imag; } else { real = PyFloat_AsDouble(value); imag = 0.0; } int n = 1; Py_ssize_t nd = PyTuple_Size(shape); for (int d=0; d < nd; d++) n *= (int) PyLong_AsLong(PyTuple_GetItem(shape, d)); int gridx = MIN(MAX((n + BLOCK_X - 1) / BLOCK_X, 1), MAX_BLOCKS); dim3 dimBlock(BLOCK_X, 1); dim3 dimGrid(gridx, 1); if (type->type_num == NPY_DOUBLE) { gpuLaunchKernel(fill_kernel, dimGrid, dimBlock, 0, 0, real, (double*) x, n); } else { gpuLaunchKernel(fill_kernelz, dimGrid, dimBlock, 0, 0, real, imag, (gpuDoubleComplex*) x, n); } gpuCheckLastError(); if (PyErr_Occurred()) return NULL; else Py_RETURN_NONE; } gpaw-24.1.0/c/gpu/kernels/ext-potential.cpp000066400000000000000000000075341454550013000205230ustar00rootroot00000000000000#include #include #include #include #include #define PY_ARRAY_UNIQUE_SYMBOL GPAW_ARRAY_API #define NO_IMPORT_ARRAY #include #include "../gpu.h" #include "../gpu-complex.h" #ifndef GPU_USE_COMPLEX #define BLOCK_SIZEX 32 #define BLOCK_SIZEY 8 #define XDIV 4 #endif __global__ void Zgpu(add_linear_field_kernel)( const Tgpu *a, const int3 c_sizea, Tgpu *b, const int3 c_n, const int3 c_beg, const double3 strength, int blocks) { int xx = gridDim.x / XDIV; int yy = gridDim.y / blocks; int blocksi = blockIdx.y / yy; int i1bl = blockIdx.y - yy * blocksi; int i1tid = threadIdx.y; int i1 = i1bl * BLOCK_SIZEY + i1tid; int xind = blockIdx.x / xx; int i2bl = blockIdx.x - xind * xx; int i2 = i2bl * BLOCK_SIZEX + threadIdx.x; int xlen = (c_n.x + XDIV - 1) / XDIV; int xstart = xind * xlen; int xend = MIN(xstart + xlen, c_n.x); b += c_sizea.x * c_sizea.y * c_sizea.z * blocksi; a += c_sizea.x * c_sizea.y * c_sizea.z * blocksi; b += i2 + i1 * c_sizea.z + xstart * c_sizea.y * c_sizea.z; a += i2 + i1 * c_sizea.z + xstart * c_sizea.y * c_sizea.z; double yz = (i1 + c_beg.y) * strength.y + (i2 + c_beg.z) * strength.z; for (int i0=xstart; i0 < xend; i0++) { if ((i2 < c_n.z) && (i1 < c_n.y)) { IADD(b[0], MULDT(((i0 + c_beg.x) * strength.x + yz), a[0])); } b += c_sizea.y * c_sizea.z; a += c_sizea.y * c_sizea.z; } } #ifndef GPU_USE_COMPLEX #define GPU_USE_COMPLEX #include "ext-potential.cpp" extern "C" PyObject* add_linear_field_gpu(PyObject *self, PyObject *args) { void *a_gpu; void *b_gpu; PyObject *shape; PyArrayObject *c_ni, *c_begi, *c_vi, *strengthi; PyArray_Descr *type; int blocks=1; int3 hc_sizea, hc_n, hc_beg; double3 h_strength; if (!PyArg_ParseTuple(args, "nOOnOOOO", &a_gpu, &shape, &type, &b_gpu, &c_ni, &c_begi, &c_vi, &strengthi)) return NULL; int nd = PyTuple_Size(shape); if (nd == 4) blocks = (int) PyLong_AsLong(PyTuple_GetItem(shape, 0)); hc_sizea.x = (int) PyLong_AsLong(PyTuple_GetItem(shape, nd-3+0)); hc_sizea.y = (int) PyLong_AsLong(PyTuple_GetItem(shape, nd-3+1)); hc_sizea.z = (int) PyLong_AsLong(PyTuple_GetItem(shape, nd-3+2)); hc_n.x = ((long*) PyArray_DATA(c_ni))[0]; hc_n.y = ((long*) PyArray_DATA(c_ni))[1]; hc_n.z = ((long*) PyArray_DATA(c_ni))[2]; hc_beg.x = ((long*) PyArray_DATA(c_begi))[0]; hc_beg.y = ((long*) PyArray_DATA(c_begi))[1]; hc_beg.z = ((long*) PyArray_DATA(c_begi))[2]; h_strength.x = ((double*) PyArray_DATA(strengthi))[0] * ((double*) PyArray_DATA(c_vi))[0+0*3]; h_strength.y = ((double*) PyArray_DATA(strengthi))[1] * ((double*) PyArray_DATA(c_vi))[1+1*3]; h_strength.z = ((double*) PyArray_DATA(strengthi))[2] * ((double*) PyArray_DATA(c_vi))[2+2*3]; int gridy = blocks * ((hc_n.y + BLOCK_SIZEY - 1) / BLOCK_SIZEY); int gridx = XDIV * ((hc_n.z + BLOCK_SIZEX - 1) / BLOCK_SIZEX); dim3 dimBlock(BLOCK_SIZEX, BLOCK_SIZEY); dim3 dimGrid(gridx, gridy); if (type->type_num == NPY_DOUBLE) { gpuLaunchKernel(add_linear_field_kernel, dimGrid, dimBlock, 0, 0, (double*) a_gpu, hc_sizea, (double*) b_gpu, hc_n, hc_beg, h_strength, blocks); } else { gpuLaunchKernel(add_linear_field_kernelz, dimGrid, dimBlock, 0, 0, (gpuDoubleComplex*) a_gpu, hc_sizea, (gpuDoubleComplex*) b_gpu, hc_n, hc_beg, h_strength, blocks); } gpuCheckLastError(); if (PyErr_Occurred()) return NULL; else Py_RETURN_NONE; } #endif gpaw-24.1.0/c/gpu/kernels/fd.cpp000066400000000000000000000606501454550013000163150ustar00rootroot00000000000000#include #include #include #include #include #include #include #include #include "../gpu.h" #include "../gpu-complex.h" #undef MYJ_X #undef NDOUBLE #undef BLOCK_X #undef BLOCK_Y #define BLOCK_X GPU_DEFAULT_BLOCK_X #define BLOCK_Y GPU_DEFAULT_BLOCK_Y #ifndef GPU_USE_COMPLEX #define MYJ_X (MYJ) #define NDOUBLE 1 #else #define MYJ_X (MYJ * NDOUBLE) #define NDOUBLE 2 #endif #ifdef MYJ #define ACACHE_X (BLOCK_X + 2 * MYJ_X) #define ACACHE_Y (BLOCK_Y + 2 * MYJ) __global__ void FD_kernel( const int ncoefs, const double *c_coefs, const long *c_offsets, const double *c_coefs0, const double *c_coefs1, const double *c_coefs2, const double *a, double *b, const int3 c_n, const int3 a_size, const int3 b_size, const int xdiv, const int blocks) { int i2tid = threadIdx.x; int i1tid = threadIdx.y; int i1, i2; int xlen; double acache0[MYJ] = {0.0}; double acache0t[MYJ + 1] = {0.0}; double *acache12p; __shared__ double s_coefs0[MYJ * 2 + 1]; __shared__ double s_coefs1[MYJ * 2]; __shared__ double s_coefs2[MYJ * 2]; __shared__ double acache12[ACACHE_X * ACACHE_Y]; { int xx = gridDim.x / xdiv; int yy = gridDim.y / blocks; int xind = blockIdx.x / xx; i2 = (blockIdx.x - xind * xx) * BLOCK_X + i2tid; int blocksi = blockIdx.y / yy; i1 = (blockIdx.y - blocksi * yy) * BLOCK_Y + i1tid; xlen = (c_n.x + xdiv-1) / xdiv; int xstart = xind * xlen; if ((c_n.x - xstart) < xlen) xlen = c_n.x - xstart; a += a_size.x * blocksi + xstart * a_size.y + i1 * a_size.z + i2; b += b_size.x * blocksi + xstart * b_size.y + i1 * b_size.z + i2; } acache12p = acache12 + ACACHE_X * (i1tid + MYJ) + i2tid + MYJ_X; if (i2tid <= MYJ * 2) s_coefs0[i2tid] = c_coefs0[i2tid]; if (i2tid < MYJ * 2) { s_coefs1[i2tid] = c_coefs1[i2tid]; s_coefs2[i2tid] = c_coefs2[i2tid]; } __syncthreads(); for (int c=0; c < MYJ; c++) { if ((i1 < c_n.y) && (i2 < c_n.z)) acache0[c] = a[(c - MYJ) * (a_size.y)]; } for (int i0=0; i0 < xlen; i0++) { if (i1 < c_n.y + MYJ) { acache12p[-MYJ_X] = a[-MYJ_X]; if ((i2tid < MYJ_X * 2) && (i2 < c_n.z + MYJ_X - BLOCK_X + MYJ_X)) { acache12p[BLOCK_X - MYJ_X] = a[BLOCK_X - MYJ_X]; } } if (i1tid < MYJ) { acache12p[-ACACHE_X * MYJ] = a[-a_size.z * MYJ]; if (i1 < c_n.y + MYJ - BLOCK_Y) { acache12p[ACACHE_X * BLOCK_Y] = a[a_size.z * BLOCK_Y]; } } __syncthreads(); acache0t[0] = 0.0; for (int c=0; c < MYJ; c++) acache0t[0] += acache12p[ACACHE_X * (c - MYJ)] * s_coefs1[c]; for (int c=0; c < MYJ; c++) acache0t[0] += acache12p[NDOUBLE * c - MYJ_X] * s_coefs2[c]; for (int c=0; c < MYJ; c++) acache0t[0] += acache12p[NDOUBLE * (c+1)] * s_coefs2[c + MYJ]; for (int c=0; c < MYJ; c++) acache0t[0] += acache12p[ACACHE_X * (c+1)] * s_coefs1[c + MYJ]; for (int c=0; c < MYJ; c++) acache0t[0] += acache0[c] * s_coefs0[c]; acache0t[0] += acache12p[0] * s_coefs0[MYJ]; for (int c=0; c < MYJ; c++) acache0t[c+1] += acache12p[0] * s_coefs0[c + 1 + MYJ]; for (int c=0; c < ncoefs; c++) acache0t[0] += a[NDOUBLE * c_offsets[c]] * c_coefs[c]; if (i0 >= MYJ) { if ((i1 < c_n.y) && (i2 < c_n.z)) { b[0] = acache0t[MYJ]; } b += b_size.y; } for (int c=0; c < MYJ-1; c++) { acache0[c] = acache0[c+1]; } acache0[MYJ-1] = acache12p[0]; for (int c=MYJ; c > 0; c--) { acache0t[c] = acache0t[c-1]; } a += a_size.y; __syncthreads(); } #pragma unroll for (int i0=0; i0 < MYJ; i0++) { if ((i1 < c_n.y) && (i2 < c_n.z)) acache0[0] = a[0]; if (i0 < 1) acache0t[1 - i0] += acache0[0] * s_coefs0[1 + MYJ]; #if MYJ >= 2 if (i0 < 2) acache0t[2 - i0] += acache0[0] * s_coefs0[2 + MYJ]; #endif #if MYJ >= 3 if (i0 < 3) acache0t[3 - i0] += acache0[0] * s_coefs0[3 + MYJ]; #endif #if MYJ >= 4 if (i0 < 4) acache0t[4 - i0] += acache0[0] * s_coefs0[4 + MYJ]; #endif #if MYJ >= 5 if (i0 < 5) acache0t[5 - i0] += acache0[0] * s_coefs0[5 + MYJ]; #endif if (i0 + xlen >= MYJ) { if ((i1 < c_n.y) && (i2 < c_n.z)) { b[0] = acache0t[MYJ - i0]; } b += b_size.y; } a += a_size.y; } } __global__ void FD_kernel_onlyb( const int ncoefs, const double *c_coefs, const long *c_offsets, const double *c_coefs0, const double *c_coefs1, const double *c_coefs2, const double *a, double *b, const int3 c_n, const int3 c_jb, const int boundary, const int xdiv, const int blocks) { int xx = MAX((c_n.z + BLOCK_X - 1) / BLOCK_X, 1); int yy = MAX((c_n.y + BLOCK_Y - 1) / BLOCK_Y, 1); int ysiz = c_n.y; if ((boundary & GPAW_BOUNDARY_Y0) != 0) ysiz -= BLOCK_Y; if ((boundary & GPAW_BOUNDARY_Y1) != 0) ysiz -= BLOCK_Y; int yy2 = MAX((ysiz + BLOCK_Y - 1) / BLOCK_Y, 0); int i2bl, i1bl; int xlen = c_n.x; int xind = 0; int xstart = 0; int i2pitch = 0; int i1pitch = 0; int ymax = c_n.y; int zmax = c_n.z; int xmax = c_n.x; int blockix; blockix = blockIdx.x; if ((boundary & GPAW_BOUNDARY_X0) != 0) { if ((blockix >= 0) && (blockix < xx * yy)) { i1bl = blockix / xx; i2bl = blockix - i1bl * xx; xlen = c_jb.x / 2; xstart = 0; } blockix -= xx * yy; } if ((boundary & GPAW_BOUNDARY_X1) != 0) { if ((blockix >= 0) && (blockix < xx * yy)) { i1bl = blockix / xx; i2bl = blockix - i1bl * xx; xlen = c_jb.x / 2; xstart += c_n.x - c_jb.x / 2; } blockix -= xx * yy; } if (blockix >= 0) { if ((boundary & GPAW_BOUNDARY_Y0) != 0) { if ((blockix >= 0) && (blockix < xdiv * xx)) { xind = blockix / xx; i2bl = blockix - xind * xx; i1bl = 0; ymax = MIN(BLOCK_Y, ymax); } blockix -= xdiv * xx; } if ((boundary & GPAW_BOUNDARY_Y1) != 0) { if ((blockix >= 0) && (blockix < xdiv * xx)) { xind = blockix / xx; i2bl = blockix - xind * xx; i1bl = 0; i1pitch = MAX(c_n.y - BLOCK_Y, 0); } blockix -= xdiv * xx; } if ((boundary & GPAW_BOUNDARY_Z0) != 0) { if ((blockix >= 0) && (blockix < xdiv * yy2)) { xind = blockix / yy2; i2bl = 0; zmax = MIN(BLOCK_X, zmax); i1bl = blockix - xind * yy2; if ((boundary & GPAW_BOUNDARY_Y0) != 0) i1pitch = BLOCK_Y; if ((boundary & GPAW_BOUNDARY_Y1) != 0) ymax = MAX(c_n.y - BLOCK_Y, 0); } blockix -= xdiv * yy2; } if ((boundary & GPAW_BOUNDARY_Z1) != 0) { if ((blockix >= 0) && (blockix < xdiv * yy2)) { xind = blockix / yy2; i2bl = 0; i2pitch = MAX(c_n.z - BLOCK_X, 0); i1bl = blockix - xind * yy2; if ((boundary & GPAW_BOUNDARY_Y0) != 0) i1pitch = BLOCK_Y; if ((boundary & GPAW_BOUNDARY_Y1) != 0) ymax = MAX(c_n.y - BLOCK_Y, 0); } blockix -= xdiv * yy2; } if ((boundary & GPAW_BOUNDARY_X0) != 0) { xstart += c_jb.x / 2; xlen -= c_jb.x / 2; } if ((boundary & GPAW_BOUNDARY_X1) != 0) { xlen -= c_jb.x / 2; xmax -= c_jb.x / 2; } xlen = (xlen + xdiv - 1) / xdiv; xstart += xind * xlen; } int i2tid = threadIdx.x; int i2 = i2pitch + i2bl * BLOCK_X + i2tid; int blocksi = blockIdx.y; int i1tid = threadIdx.y; int i1 = i1pitch + i1bl * BLOCK_Y + i1tid; __shared__ double s_coefs0[MYJ * 2 + 1]; __shared__ double s_coefs1[MYJ * 2]; __shared__ double s_coefs2[MYJ * 2]; __shared__ double acache12[ACACHE_X * ACACHE_Y]; double acache0[MYJ]; double acache0t[MYJ + 1]; double *acache12p; int sizez = c_jb.z + c_n.z; int sizeyz = (c_jb.y + c_n.y) * sizez; if ((xmax-xstart) < xlen) xlen = xmax - xstart; a += ((c_jb.x + c_n.x) * sizeyz) * blocksi; b += (c_n.x * c_n.y * c_n.z) * blocksi; acache12p = acache12 + ACACHE_X * (i1tid + MYJ * 2 / 2) + i2tid + MYJ_X * 2 / 2; if (i2tid <= MYJ * 2) s_coefs0[i2tid] = c_coefs0[i2tid]; if (i2tid < MYJ * 2) { s_coefs1[i2tid] = c_coefs1[i2tid]; s_coefs2[i2tid] = c_coefs2[i2tid]; } __syncthreads(); a += xstart * sizeyz + i1 * sizez + i2; b += xstart * c_n.y * c_n.z + i1 * c_n.z + i2; for (int c=0; c < MYJ; c++) { if ((i1 < ymax) && (i2 < zmax)) acache0[c] = a[(c - MYJ) * sizeyz]; } for (int i0=0; i0 < xlen; i0++) { if (i1 < ymax + MYJ) { acache12p[-MYJ_X] = a[-MYJ_X]; if ((i2tid < MYJ_X * 2) && (i2 < zmax + MYJ_X - BLOCK_X + MYJ_X)) acache12p[BLOCK_X - MYJ_X] = a[BLOCK_X - MYJ_X]; } if (i1tid < MYJ) { acache12p[-ACACHE_X * MYJ] = a[-sizez * MYJ]; if (i1 < ymax + MYJ - BLOCK_Y) acache12p[ACACHE_X * BLOCK_Y] = a[sizez * BLOCK_Y]; } __syncthreads(); acache0t[0] = 0.0; for (int c=0; c < MYJ; c++) acache0t[0] += acache12p[ACACHE_X * (c - MYJ)] * s_coefs1[c]; for (int c=0; c < MYJ; c++) acache0t[0] += acache12p[NDOUBLE * c - MYJ_X] * s_coefs2[c]; for (int c=0; c < MYJ; c++) acache0t[0] += acache12p[NDOUBLE * (c+1)] * s_coefs2[c+MYJ]; for (int c=0; c < MYJ; c++) acache0t[0] += acache12p[ACACHE_X * (c+1)] * s_coefs1[c+MYJ]; for (int c=0; c < MYJ; c++) acache0t[0] += acache0[c] * s_coefs0[c]; acache0t[0] += acache12p[0] * s_coefs0[MYJ]; for (int c=0; c < MYJ; c++) acache0t[c+1] += acache12p[0] * s_coefs0[c + 1 + MYJ]; for (int c=0; c < ncoefs; c++) acache0t[0] += a[NDOUBLE * c_offsets[c]] * c_coefs[c]; if (i0 >= MYJ) { if ((i1 < ymax) && (i2 < zmax)) { b[0] = acache0t[MYJ]; } b += c_n.y * c_n.z; } for (int c=0; c < MYJ - 1; c++) { acache0[c] = acache0[c+1]; } acache0[MYJ - 1] = acache12p[0]; for (int c=MYJ; c > 0;c--) { acache0t[c] = acache0t[c-1]; } a += sizeyz; __syncthreads(); } #pragma unroll for (int i0=0; i0 < MYJ; i0++) { if ((i1 < c_n.y) && (i2 < c_n.z)) acache0[0] = a[0]; if (i0 < 1) acache0t[1 - i0] += acache0[0] * s_coefs0[1 + MYJ]; #if MYJ >= 2 if (i0 < 2) acache0t[2 - i0] += acache0[0] * s_coefs0[2 + MYJ]; #endif #if MYJ >= 3 if (i0 < 3) acache0t[3 - i0] += acache0[0] * s_coefs0[3 + MYJ]; #endif #if MYJ >= 4 if (i0 < 4) acache0t[4 - i0] += acache0[0] * s_coefs0[4 + MYJ]; #endif #if MYJ >= 5 if (i0 < 5) acache0t[5 - i0] += acache0[0] * s_coefs0[5 + MYJ]; #endif if (i0 + xlen >= MYJ) { if ((i1 < ymax) && (i2 < zmax)) { b[0] = acache0t[MYJ - i0]; } b += c_n.y * c_n.z; } a += sizeyz; } } #else #define MYJ (2/2) # define FD_kernel Zgpu(fd_kernel2) # define FD_kernel_onlyb Zgpu(fd_kernel2_onlyb) # include "fd.cpp" # undef FD_kernel # undef FD_kernel_onlyb # undef MYJ #define MYJ (4/2) # define FD_kernel Zgpu(fd_kernel4) # define FD_kernel_onlyb Zgpu(fd_kernel4_onlyb) # include "fd.cpp" # undef FD_kernel # undef FD_kernel_onlyb # undef MYJ #define MYJ (6/2) # define FD_kernel Zgpu(fd_kernel6) # define FD_kernel_onlyb Zgpu(fd_kernel6_onlyb) # include "fd.cpp" # undef FD_kernel # undef FD_kernel_onlyb # undef MYJ #define MYJ (8/2) # define FD_kernel Zgpu(fd_kernel8) # define FD_kernel_onlyb Zgpu(fd_kernel8_onlyb) # include "fd.cpp" # undef FD_kernel # undef FD_kernel_onlyb # undef MYJ #define MYJ (10/2) # define FD_kernel Zgpu(fd_kernel10) # define FD_kernel_onlyb Zgpu(fd_kernel10_onlyb) # include "fd.cpp" # undef FD_kernel # undef FD_kernel_onlyb # undef MYJ extern "C" bmgsstencil_gpu bmgs_stencil_to_gpu(const bmgsstencil* s); extern "C" int bmgs_fd_boundary_test( const bmgsstencil_gpu* s, int boundary, int ndouble); extern "C" void Zgpu(bmgs_fd_gpu)( const bmgsstencil_gpu* s_gpu, const Tgpu* adev, Tgpu* bdev, int boundary, int blocks, gpuStream_t stream) { int3 bjb; int3 jb; int3 hc_bj; int3 hc_n; int3 hc_j; long *offsets_gpu; dim3 dimBlock(BLOCK_X, BLOCK_Y); if ((boundary & GPAW_BOUNDARY_SKIP) != 0) { if (!bmgs_fd_boundary_test(s_gpu, boundary, NDOUBLE)) return; } else if ((boundary & GPAW_BOUNDARY_ONLY) != 0) { if (!bmgs_fd_boundary_test(s_gpu, boundary, NDOUBLE)) { boundary &= ~GPAW_BOUNDARY_ONLY; boundary |= GPAW_BOUNDARY_NORMAL; } } hc_n.x=s_gpu->n[0]; hc_n.y=s_gpu->n[1]; hc_n.z=s_gpu->n[2]; hc_j.x=s_gpu->j[0]; hc_j.y=s_gpu->j[1]; hc_j.z=s_gpu->j[2]; bjb.x=0; bjb.y=0; bjb.z=0; hc_bj.x=0; hc_bj.y=0; hc_bj.z=0; hc_n.z *= NDOUBLE; hc_j.x *= NDOUBLE; hc_j.y *= NDOUBLE; hc_j.z *= NDOUBLE; offsets_gpu = s_gpu->offsets_gpu; jb.z = hc_j.z; jb.y = hc_j.y / (hc_j.z + hc_n.z); jb.x = hc_j.x / ((hc_j.z + hc_n.z) * hc_n.y + hc_j.y); if ((boundary & GPAW_BOUNDARY_SKIP) != 0) { int3 jb1; int3 bjb1, bjb2; bjb1.x = 0; bjb1.y = 0; bjb1.z = 0; bjb2.x = 0; bjb2.y = 0; bjb2.z = 0; jb1.z = jb.z / 2; jb1.x = jb.x / 2; jb1.y = jb.y / 2; if ((boundary & GPAW_BOUNDARY_X0) != 0) { bjb1.x += jb.x / 2; } if ((boundary & GPAW_BOUNDARY_X1) != 0) { bjb2.x += jb.x / 2; } if ((boundary & GPAW_BOUNDARY_Y0) != 0) { bjb1.y += dimBlock.y; } if ((boundary & GPAW_BOUNDARY_Y1) != 0) { bjb2.y += dimBlock.y; } if ((boundary & GPAW_BOUNDARY_Z0) != 0) { bjb1.z += dimBlock.x; } if ((boundary & GPAW_BOUNDARY_Z1) != 0) { bjb2.z += dimBlock.x; } bjb.x = bjb1.x + bjb2.x; bjb.y = bjb1.y + bjb2.y; bjb.z = bjb1.z + bjb2.z; hc_n.x -= bjb.x; hc_n.y -= bjb.y; hc_n.z -= bjb.z; jb.x += bjb.x; jb.y += bjb.y; jb.z += bjb.z; jb1.x += bjb1.x; jb1.y += bjb1.y; jb1.z += bjb1.z; hc_bj.z = bjb.z; hc_bj.y = bjb.y * (hc_bj.z + hc_n.z); hc_bj.x = bjb.x * ((hc_bj.z + hc_n.z) * hc_n.y + hc_bj.y); hc_j.z = jb.z; hc_j.y = jb.y * (hc_j.z + hc_n.z); hc_j.x = jb.x * ((hc_j.z + hc_n.z) * hc_n.y + hc_j.y); bdev += bjb1.z + bjb1.y * (hc_bj.z + hc_n.z) + bjb1.x * ((hc_bj.z + hc_n.z) * hc_n.y + hc_bj.y); adev = (Tgpu*) ((double*) adev + jb1.z + jb1.y * (hc_j.z + hc_n.z) + jb1.x * ((hc_j.z + hc_n.z) * hc_n.y + hc_j.y)); } else { adev = (Tgpu*) ((double*) adev + (hc_j.x + hc_j.y + hc_j.z) / 2); } if ((hc_n.x <= 0) || (hc_n.y <= 0) || (hc_n.z <= 0)) return; dim3 dimGrid(1,1,1); int xdiv = MIN(hc_n.x, MAX((4 + blocks - 1) / blocks, 1)); if (((boundary & GPAW_BOUNDARY_NORMAL) != 0) || ((boundary & GPAW_BOUNDARY_SKIP) != 0)) { dimGrid.x = MAX((hc_n.z + dimBlock.x - 1) / dimBlock.x, 1); dimGrid.y = MAX((hc_n.y + dimBlock.y - 1) / dimBlock.y, 1); dimGrid.y *= blocks; dimGrid.x *= xdiv; } else if ((boundary & GPAW_BOUNDARY_ONLY) != 0) { int xx = MAX((hc_n.z + dimBlock.x - 1) / dimBlock.x, 1); int yy = MAX((hc_n.y + dimBlock.y - 1) / dimBlock.y, 1); int ysiz = hc_n.y; if ((boundary & GPAW_BOUNDARY_Y0) != 0) ysiz -= dimBlock.y; if ((boundary & GPAW_BOUNDARY_Y1) != 0) ysiz -= dimBlock.y; int yy2 = MAX((ysiz + dimBlock.y - 1) / dimBlock.y, 0); dimGrid.x = 0; if ((boundary & GPAW_BOUNDARY_X0) != 0) dimGrid.x += xx * yy; if ((boundary & GPAW_BOUNDARY_X1) != 0) dimGrid.x += xx * yy; if ((boundary & GPAW_BOUNDARY_Y0) != 0) dimGrid.x += xdiv * xx; if ((boundary & GPAW_BOUNDARY_Y1) != 0) dimGrid.x += xdiv * xx; if ((boundary & GPAW_BOUNDARY_Z0) != 0) dimGrid.x += xdiv * yy2; if ((boundary & GPAW_BOUNDARY_Z1) != 0) dimGrid.x += xdiv * yy2; dimGrid.y = blocks; } int3 sizea; sizea.z = hc_j.z + hc_n.z; sizea.y = sizea.z * hc_n.y + hc_j.y; sizea.x = sizea.y * hc_n.x + hc_j.x; int3 sizeb; sizeb.z = hc_bj.z + hc_n.z; sizeb.y = sizeb.z * hc_n.y + hc_bj.y; sizeb.x = sizeb.y * hc_n.x + hc_bj.x; if (((boundary & GPAW_BOUNDARY_NORMAL) != 0) || ((boundary & GPAW_BOUNDARY_SKIP) != 0)) { void (*fd_kernel)(const int ncoefs, const double *c_coefs, const long *c_offsets, const double *c_coefs0, const double *c_coefs1, const double *c_coefs2, const double* a, double* b, const int3 c_n, const int3 a_size, const int3 b_size, const int xdiv, const int blocks); switch (s_gpu->ncoefs0) { case 3: fd_kernel = Zgpu(fd_kernel2); break; case 5: fd_kernel = Zgpu(fd_kernel4); break; case 7: fd_kernel = Zgpu(fd_kernel6); break; case 9: fd_kernel = Zgpu(fd_kernel8); break; case 11: fd_kernel = Zgpu(fd_kernel10); break; default: assert(0); } gpuLaunchKernel( (*fd_kernel), dimGrid, dimBlock, 0, stream, s_gpu->ncoefs, s_gpu->coefs_gpu, offsets_gpu, s_gpu->coefs0_gpu, s_gpu->coefs1_gpu, s_gpu->coefs2_gpu, (double*) adev, (double*) bdev, hc_n, sizea, sizeb, xdiv, blocks); } else if ((boundary & GPAW_BOUNDARY_ONLY) != 0) { void (*fd_kernel)(const int ncoefs, const double *c_coefs, const long *c_offsets, const double *c_coefs0, const double *c_coefs1, const double *c_coefs2, const double *a, double *b, const int3 c_n, const int3 c_jb, const int boundary, const int xdiv, const int blocks); switch (s_gpu->ncoefs0) { case 3: fd_kernel = Zgpu(fd_kernel2_onlyb); break; case 5: fd_kernel = Zgpu(fd_kernel4_onlyb); break; case 7: fd_kernel = Zgpu(fd_kernel6_onlyb); break; case 9: fd_kernel = Zgpu(fd_kernel8_onlyb); break; case 11: fd_kernel = Zgpu(fd_kernel10_onlyb); break; default: assert(0); } gpuLaunchKernel( (*fd_kernel), dimGrid, dimBlock, 0, stream, s_gpu->ncoefs, s_gpu->coefs_gpu, offsets_gpu, s_gpu->coefs0_gpu, s_gpu->coefs1_gpu, s_gpu->coefs2_gpu, (double*) adev, (double*) bdev, hc_n, jb, boundary, xdiv, blocks); } gpuCheckLastError(); } #ifndef GPU_USE_COMPLEX #define GPU_USE_COMPLEX #include "fd.cpp" extern "C" int bmgs_fd_boundary_test(const bmgsstencil_gpu* s, int boundary, int ndouble) { int3 jb; int3 bjb; long3 hc_n; long3 hc_j; dim3 dimBlock(BLOCK_X, BLOCK_Y); hc_n.x = s->n[0]; hc_n.y = s->n[1]; hc_n.z = s->n[2]; hc_j.x = s->j[0]; hc_j.y = s->j[1]; hc_j.z = s->j[2]; jb.z = hc_j.z; jb.y = hc_j.y / (hc_j.z + hc_n.z); jb.x = hc_j.x / ((hc_j.z + hc_n.z) * hc_n.y + hc_j.y); int3 bjb1, bjb2; bjb1.x=0; bjb1.y=0; bjb1.z=0; bjb2.x=0; bjb2.y=0; bjb2.z=0; if ((boundary & GPAW_BOUNDARY_X0) != 0) { bjb1.x += jb.x / 2; } if ((boundary & GPAW_BOUNDARY_X1) != 0) { bjb2.x += jb.x / 2; } if ((boundary & GPAW_BOUNDARY_Y0) != 0) { bjb1.y += dimBlock.y; } if ((boundary & GPAW_BOUNDARY_Y1) != 0) { bjb2.y += dimBlock.y; } if ((boundary & GPAW_BOUNDARY_Z0) != 0) { bjb1.z += dimBlock.x; } if ((boundary & GPAW_BOUNDARY_Z1) != 0) { bjb2.z += dimBlock.x; } bjb.x = bjb1.x + bjb2.x; bjb.y = bjb1.y + bjb2.y; bjb.z = bjb1.z + bjb2.z; hc_n.x -= bjb.x; hc_n.y -= bjb.y; hc_n.z -= bjb.z; if (hc_n.x < 4 || hc_n.y < 1 || hc_n.z < 1) return 0; if ((hc_n.y / (dimBlock.y)) * (hc_n.z / (dimBlock.x)) < 20) return 0; return 1; } extern "C" bmgsstencil_gpu bmgs_stencil_to_gpu(const bmgsstencil* s) { bmgsstencil_gpu s_gpu; long offsets[s->ncoefs]; double coefs[s->ncoefs]; int ncoefs=0, ncoefs0=0, ncoefs1=0, ncoefs2=0; int n2 = (s->n[2] + s->j[2]); int n1 = s->j[1] + s->n[1] * n2; int jb[3]; jb[2] = s->j[2]; jb[1] = s->j[1] / n2; jb[0] = s->j[0] / n1; s_gpu.n[0] = s->n[0]; s_gpu.n[1] = s->n[1]; s_gpu.n[2] = s->n[2]; s_gpu.j[0] = s->j[0]; s_gpu.j[1] = s->j[1]; s_gpu.j[2] = s->j[2]; ncoefs0 = jb[0] + 1; ncoefs1 = jb[1]; ncoefs2 = jb[2]; double coefs0[ncoefs0], coefs1[ncoefs1], coefs2[ncoefs2]; memset(coefs0, 0, sizeof(double) * ncoefs0); memset(coefs1, 0, sizeof(double) * ncoefs1); memset(coefs2, 0, sizeof(double) * ncoefs2); for (int i=0; i < s->ncoefs; i++) { int offpoint = s->offsets[i] + (s->j[0] + s->j[1] + s->j[2]) / 2; int i0 = offpoint / n1; int i1 = (offpoint - i0 * n1) / n2; int i2 = (offpoint - i0 * n1 - i1 * n2); i0 -= jb[0] / 2; i1 -= jb[1] / 2; i2 -= jb[2] / 2; if (i1 == 0 && i2 == 0 && abs(i0) <= jb[0] / 2) { int offset = ncoefs0 / 2 + i0; coefs0[offset] = s->coefs[i]; } else if (i0 == 0 && i1 == 0 && abs(i2) <= jb[2] / 2) { int offset = i2 > 0 ? ncoefs2 / 2 + i2 - 1 : ncoefs2 / 2 + i2; coefs2[offset] = s->coefs[i]; } else if (i0 == 0 && i2 == 0 && abs(i1) <= jb[1] / 2) { int offset = i1 > 0 ? ncoefs1 / 2 + i1 - 1 : ncoefs1 / 2 + i1; coefs1[offset] = s->coefs[i]; } else { offsets[ncoefs] = s->offsets[i]; coefs[ncoefs] = s->coefs[i]; ncoefs++; } } s_gpu.ncoefs = ncoefs; s_gpu.ncoefs0 = ncoefs0; s_gpu.ncoefs1 = ncoefs1; s_gpu.ncoefs2 = ncoefs2; s_gpu.coef_relax = s->coefs[0]; if (ncoefs > 0) { gpuMalloc(&(s_gpu.coefs_gpu), sizeof(double) * ncoefs); gpuMemcpy(s_gpu.coefs_gpu, coefs, sizeof(double) * ncoefs, gpuMemcpyHostToDevice); gpuMalloc(&(s_gpu.offsets_gpu), sizeof(long) * ncoefs); gpuMemcpy(s_gpu.offsets_gpu, offsets, sizeof(long) * ncoefs, gpuMemcpyHostToDevice); } gpuMalloc(&(s_gpu.coefs0_gpu), sizeof(double) * ncoefs0); gpuMemcpy(s_gpu.coefs0_gpu, coefs0, sizeof(double) * ncoefs0, gpuMemcpyHostToDevice); gpuMalloc(&(s_gpu.coefs1_gpu), sizeof(double) * ncoefs1); gpuMemcpy(s_gpu.coefs1_gpu, coefs1, sizeof(double) * ncoefs1, gpuMemcpyHostToDevice); gpuMalloc(&(s_gpu.coefs2_gpu), sizeof(double) * ncoefs2); gpuMemcpy(s_gpu.coefs2_gpu, coefs2, sizeof(double) * ncoefs2, gpuMemcpyHostToDevice); return s_gpu; } #endif #endif gpaw-24.1.0/c/gpu/kernels/interpolate-stencil.cpp000066400000000000000000000031001454550013000216740ustar00rootroot00000000000000__global__ void IP1D_kernel( const Tgpu* a, int n, int m, Tgpu* b, int skip0, int skip1) { a += K / 2 - 1; int j = blockIdx.x * BLOCK_X + threadIdx.x; int i = blockIdx.y * BLOCK_Y + threadIdx.y; if (j >= m || i >= n) { return; } a += j * (K - 1 - skip1 + n) + i; b += j + (2 * m * i); if (skip0) { b -= m; } if (i > 0 || !skip0) { b[0] = a[0]; } if (i == n - 1 && skip1) { b -= m; } else { if (K == 2) { b[m] = MULDT(0.5, ADD(a[0], a[1])); } else if (K == 4) { b[m] = ADD(MULDT( 0.5625, ADD(a[ 0], a[1])), MULDT(-0.0625, ADD(a[-1], a[2]))); } else if (K == 6) { b[m] = ADD(ADD(MULDT( 0.58593750, ADD(a[ 0], a[1])), MULDT(-0.09765625, ADD(a[-1], a[2]))), MULDT(0.01171875, ADD(a[-2], a[3]))); } else { b[m] = ADD(ADD(MULDT( 0.59814453125, ADD(a[ 0], a[1])), MULDT(-0.11962890625, ADD(a[-1], a[2]))), ADD(MULDT( 0.02392578125, ADD(a[-2], a[3])), MULDT(-0.00244140625, ADD(a[-3], a[4])))); } } } void IP1D(const Tgpu* a, int n, int m, Tgpu* b, int skip[2]) { int gridx = (m + BLOCK_X - 1) / BLOCK_X; int gridy = (n + BLOCK_Y - 1) / BLOCK_Y; dim3 dimBlock(BLOCK_X, BLOCK_Y); dim3 dimGrid(gridx, gridy); gpuLaunchKernel( IP1D_kernel, dimGrid, dimBlock, 0, 0, a, n, m, b, skip[0], skip[1]); gpuCheckLastError(); } gpaw-24.1.0/c/gpu/kernels/interpolate.cpp000066400000000000000000000173441454550013000202540ustar00rootroot00000000000000#include #include #include #include #include "../gpu.h" #include "../gpu-complex.h" #ifndef GPU_USE_COMPLEX # define BLOCK_X (32) # define BLOCK_Y (16) # define BCACHE_X (BLOCK_X + 1) # define BCACHE_Y (BLOCK_Y + 1) # define ACACHE_X (BLOCK_X / 2 + 1) # define ACACHE_Y (BLOCK_Y / 2 + 1) #endif __global__ void Zgpu(interpolate_kernel)( const Tgpu* a, const int3 n, Tgpu* b, const int3 b_n, const int3 skip0, const int3 skip1, int xdiv, int blocks) { int xx = gridDim.x / xdiv; int yy = gridDim.y / blocks; int xind = blockIdx.x / xx; int i2tid = threadIdx.x; int i2base = (blockIdx.x - xind * xx) * BLOCK_X; int i2 = i2base + i2tid; int blocksi = blockIdx.y / yy; int i1tid = threadIdx.y; int i1base = (blockIdx.y - blocksi * yy) * BLOCK_Y; int i1 = i1base + i1tid; __shared__ Tgpu bcache12[BCACHE_Y * BCACHE_X]; Tgpu *bcache12p_2x; int xlen = (n.x + xdiv - 1) / xdiv; int xstart = xind * xlen; int xend = MIN(xstart + xlen, n.x); if (xind < xdiv - 1) xend++; xlen = xend - xstart; a += n.x * n.y * n.z * blocksi + xstart * n.y * n.z + ((i1base / 2) + i1tid) * n.z + (i2base / 2) + i2tid; if (skip0.y) i1--; if (skip0.z) i2--; b += b_n.x * b_n.y * b_n.z * blocksi + 2 * xstart * b_n.y * b_n.z + i1 * b_n.z + i2; if ((xind > 0) && (skip0.x)) b -= b_n.y * b_n.z; bcache12p_2x = bcache12 + BCACHE_X * (2 * i1tid) + 2 * i2tid; if (i1tid < ACACHE_Y && i2tid < ACACHE_X) bcache12p_2x[0] = a[0]; __syncthreads(); for (int i0=xstart+1; i0 < xend; i0++) { Tgpu a_c; a += n.y*n.z; if (i1tid < ACACHE_Y && i2tid < BLOCK_X / 2) { bcache12p_2x[1] = MULTD(ADD(bcache12p_2x[0], bcache12p_2x[2]), 0.5); } __syncthreads(); if (i1tid 1 || !skip0.x) { if ((i1 < b_n.y) && (i2 < b_n.z) && (i1 >= 0) && (i2 >= 0)) b[0] = bcache12[BCACHE_X * i1tid + i2tid]; b += b_n.y * b_n.z; } __syncthreads(); if (i1tid < ACACHE_Y && i2tid < ACACHE_X) { a_c = a[0]; bcache12p_2x[0] = MULTD(ADD(bcache12p_2x[0], a_c), 0.5); } __syncthreads(); if (i1tid < ACACHE_Y && i2tid < BLOCK_X / 2) { bcache12p_2x[1] = MULTD(ADD(bcache12p_2x[0], bcache12p_2x[2]), 0.5); } __syncthreads(); if (i1tid < BLOCK_Y / 2) { bcache12p_2x[BCACHE_X * 1 - i2tid] = MULTD(ADD(bcache12p_2x[BCACHE_X * 0 - i2tid], bcache12p_2x[BCACHE_X * 2 - i2tid]), 0.5); if ((skip1.z) && (i2tid < 1)) bcache12p_2x[BCACHE_X * 1 - i2tid + BLOCK_X] = MULTD(ADD(bcache12p_2x[BCACHE_X * 0 - i2tid + BLOCK_X], bcache12p_2x[BCACHE_X * 2 - i2tid + BLOCK_X]), 0.5); } __syncthreads(); if ((i1 < b_n.y) && (i2 < b_n.z) && (i1 >= 0) && (i2 >= 0)) { b[0] = bcache12[BCACHE_X * i1tid + i2tid]; } __syncthreads(); if (i1tid < ACACHE_Y && i2tid < ACACHE_X) { bcache12p_2x[0] = a_c; } b += b_n.y * b_n.z; __syncthreads(); } if (xend == n.x && skip1.x) { if (i1tid < ACACHE_Y && i2tid < BLOCK_X / 2) bcache12p_2x[1] = MULTD(ADD(bcache12p_2x[0], bcache12p_2x[2]), 0.5); __syncthreads(); if (i1tid < BLOCK_Y / 2) { bcache12p_2x[BCACHE_X * 1 - i2tid] = MULTD(ADD(bcache12p_2x[BCACHE_X * 0 - i2tid], bcache12p_2x[BCACHE_X * 2 - i2tid]), 0.5); if ((skip1.z) && (i2tid < 1)) bcache12p_2x[BCACHE_X * 1 - i2tid + BLOCK_X] = MULTD(ADD(bcache12p_2x[BCACHE_X * 0 - i2tid + BLOCK_X], bcache12p_2x[BCACHE_X * 2 - i2tid + BLOCK_X]), 0.5); } __syncthreads(); if (xend > 1 || !skip0.x) { if ((i1 < b_n.y) && (i2 < b_n.z) && (i1 >= 0) && (i2 >= 0)) b[0] = bcache12[BCACHE_X * i1tid + i2tid]; b += b_n.y * b_n.z; } } } extern "C" void Zgpu(bmgs_interpolate_gpu)(int k, int skip[3][2], const Tgpu* a, const int size[3], Tgpu* b, const int sizeb[3], int blocks) { if (k != 2) assert(0); int xdiv=1; int gridy = blocks * ((sizeb[1] + skip[1][0] + BLOCK_Y - 1) / BLOCK_Y); int gridx = xdiv * ((sizeb[2] + skip[2][0] + BLOCK_X - 1) / BLOCK_X); dim3 dimBlock(BLOCK_X, BLOCK_Y); dim3 dimGrid(gridx, gridy); int3 n = {size[0], size[1], size[2]}; int3 skip0 = {skip[0][0], skip[1][0], skip[2][0]}; int3 skip1 = {skip[0][1], skip[1][1], skip[2][1]}; int3 b_n = {2 * n.x - 2 - skip0.x + skip1.x, 2 * n.y - 2 - skip0.y + skip1.y, 2 * n.z - 2 - skip0.z + skip1.z}; gpuLaunchKernel(Zgpu(interpolate_kernel), dimGrid, dimBlock, 0, 0, a, n, b, b_n, skip0, skip1, xdiv, blocks); gpuCheckLastError(); } #define K 2 #define IP1D Zgpu(interpolate1D2) #define IP1D_kernel Zgpu(interpolate1D2_kernel) #include "interpolate-stencil.cpp" #undef IP1D #undef IP1D_kernel #undef K #define K 4 #define IP1D Zgpu(interpolate1D4) #define IP1D_kernel Zgpu(interpolate1D4_kernel) #include "interpolate-stencil.cpp" #undef IP1D #undef IP1D_kernel #undef K #define K 6 #define IP1D Zgpu(interpolate1D6) #define IP1D_kernel Zgpu(interpolate1D6_kernel) #include "interpolate-stencil.cpp" #undef IP1D #undef IP1D_kernel #undef K #define K 8 #define IP1D Zgpu(interpolate1D8) #define IP1D_kernel Zgpu(interpolate1D8_kernel) #include "interpolate-stencil.cpp" #undef IP1D #undef IP1D_kernel #undef K extern "C" void Zgpu(bmgs_interpolate_stencil_gpu)(int k, int skip[3][2], const Tgpu* a, const int sizea[3], Tgpu* b, const int sizeb[3], Tgpu* w, int blocks) { void (*func)(const Tgpu*, int, int, Tgpu*, int[2]); if (k == 2) func = Zgpu(interpolate1D2); else if (k == 4) func = Zgpu(interpolate1D4); else if (k == 6) func = Zgpu(interpolate1D6); else func = Zgpu(interpolate1D8); int e = k - 1; for (int i=0; i < blocks; i++) { func(a, sizea[2] - e + skip[2][1], sizea[0] * sizea[1], b, skip[2]); func(b, sizea[1] - e + skip[1][1], sizea[0] * ((sizea[2] - e) * 2 - skip[2][0] + skip[2][1]), w, skip[1]); func(w, sizea[0] - e + skip[0][1], ((sizea[1] - e) * 2 - skip[1][0] + skip[1][1]) * ((sizea[2] - e) * 2 - skip[2][0] + skip[2][1]), b, skip[0]); a += sizea[0] * sizea[1] * sizea[2]; b += sizeb[0] * sizeb[1] * sizeb[2]; } } #ifndef GPU_USE_COMPLEX #define GPU_USE_COMPLEX #include "interpolate.cpp" #endif gpaw-24.1.0/c/gpu/kernels/lfc-reduce-kernel.cpp000066400000000000000000000175151454550013000212150ustar00rootroot00000000000000__device__ unsigned int INNAME(lfc_retirementCount) = {0}; __global__ void INNAME(integrate_mul_kernel)( const Tgpu *a_G, int nG, const LFVolume_gpu *volume_W, const int *volume_WMi_gpu, const int *WMi_gpu, int WMimax, int q, Tgpu *out, int block_out, Tgpu *results, int Mcount, int nM, int nvec) { int yy = gridDim.y / Mcount; int bloy = blockIdx.y / yy; int block = blockIdx.y - bloy * yy; unsigned int tid = threadIdx.x; unsigned int gridSize = REDUCE_LFC_THREADS * gridDim.x; unsigned int i_b = blockIdx.x * (REDUCE_LFC_THREADS) + tid; extern __shared__ Tgpu Zgpu(sdata)[]; // perform first level of reduction, // reading from global memory, writing to shared memory a_G += nG * block; for (int vv=0; vv < WMi_gpu[bloy]; vv++) { const LFVolume_gpu *v = &volume_W[volume_WMi_gpu[bloy * WMimax + vv]]; int *nGBcum = v->nGBcum; #ifdef GPU_USE_COMPLEX Tgpu phase = v->phase_k[q]; #endif int len_A_gm = v->len_A_gm; Tgpu *out_t = out + v->M * block_out + block * nM * block_out; int a_ind, ai=0, acum=0; if (i_b < len_A_gm) { int bi = v->nB; int ci; int bcum = nGBcum[bi]; int ccum; while (bi - ai > 1) { ci = ai + 1 + (bi - ai - 2) * (i_b - acum) / (bcum - acum); ccum = nGBcum[ci]; if (ccum <= i_b) { ai = ci; acum = ccum; } else { bi = ci; bcum = ccum; } } a_ind = v->GB1[ai] + i_b - acum; } for (int i=0; i < nvec; i++) { Tgpu a_Gv; double *A_gm2 = v->A_gm; Tgpu *out_t2 = out_t; if (i_b < len_A_gm) { #ifdef GPU_USE_COMPLEX a_Gv = MULTT(a_G[i * nG + a_ind], phase); #else a_Gv = a_G[i * nG + a_ind]; #endif } for (int m=0; m < v->nm; m++) { Tgpu mySum = MAKED(0); if (i_b < len_A_gm) { mySum = MULTD(a_Gv, A_gm2[i_b]); } if (len_A_gm > gridSize) { unsigned int i_bb = i_b + gridSize; int aai = ai; int aacum = acum; while (i_bb < len_A_gm) { int bi = v->nB; int ci; int bcum = nGBcum[bi]; int ccum; while (bi - aai > 1) { ci = aai + 1 + (bi - aai - 2) * (i_bb - aacum) / (bcum - aacum); ccum = nGBcum[ci]; if (ccum <= i_bb) { aai = ci; aacum = ccum; } else { bi = ci; bcum = ccum; } } #ifdef GPU_USE_COMPLEX IADD(mySum, MULTD(MULTT(a_G[i * nG + v->GB1[aai] + i_bb - aacum], phase), A_gm2[i_bb])); #else IADD(mySum, MULTD(a_G[i * nG + v->GB1[aai] + i_bb - aacum], A_gm2[i_bb])); #endif i_bb += gridSize; } } Zgpu(sdata)[tid] = mySum; __syncthreads(); if (REDUCE_LFC_THREADS >= 512) { if (tid < 256) { Zgpu(sdata)[tid] = mySum = ADD(mySum, Zgpu(sdata)[tid + 256]); } __syncthreads(); } if (REDUCE_LFC_THREADS >= 256) { if (tid < 128) { Zgpu(sdata)[tid] = mySum = ADD(mySum, Zgpu(sdata)[tid + 128]); } __syncthreads(); } if (REDUCE_LFC_THREADS >= 128) { if (tid < 64) { Zgpu(sdata)[tid] = mySum = ADD(mySum, Zgpu(sdata)[tid + 64]); } __syncthreads(); } if (tid < 32) { volatile Tgpu *smem = Zgpu(sdata); #ifdef GPU_USE_COMPLEX if (REDUCE_LFC_THREADS >= 64) { smem[tid].x = mySum.x = mySum.x + smem[tid + 32].x; smem[tid].y = mySum.y = mySum.y + smem[tid + 32].y; } if (REDUCE_LFC_THREADS >= 32) { smem[tid].x = mySum.x = mySum.x + smem[tid + 16].x; smem[tid].y = mySum.y = mySum.y + smem[tid + 16].y; } if (REDUCE_LFC_THREADS >= 16) { smem[tid].x = mySum.x = mySum.x + smem[tid + 8].x; smem[tid].y = mySum.y = mySum.y + smem[tid + 8].y; } if (REDUCE_LFC_THREADS >= 8) { smem[tid].x = mySum.x = mySum.x + smem[tid + 4].x; smem[tid].y = mySum.y = mySum.y + smem[tid + 4].y; } if (REDUCE_LFC_THREADS >= 4) { smem[tid].x = mySum.x = mySum.x + smem[tid + 2].x; smem[tid].y = mySum.y = mySum.y + smem[tid + 2].y; } if (REDUCE_LFC_THREADS >= 2) { smem[tid].x = mySum.x = mySum.x + smem[tid + 1].x; smem[tid].y = mySum.y = mySum.y + smem[tid + 1].y; } #else if (REDUCE_LFC_THREADS >= 64) smem[tid] = mySum = ADD(mySum, smem[tid + 32]); if (REDUCE_LFC_THREADS >= 32) smem[tid] = mySum = ADD(mySum, smem[tid + 16]); if (REDUCE_LFC_THREADS >= 16) smem[tid] = mySum = ADD(mySum, smem[tid + 8]); if (REDUCE_LFC_THREADS >= 8) smem[tid] = mySum = ADD(mySum, smem[tid + 4]); if (REDUCE_LFC_THREADS >= 4) smem[tid] = mySum = ADD(mySum, smem[tid + 2]); if (REDUCE_LFC_THREADS >= 2) smem[tid] = mySum = ADD(mySum, smem[tid + 1]); #endif } // write result for this block to global mem if (tid==0) { if (vv==0) out_t2[blockIdx.x] = Zgpu(sdata)[0]; else IADD(out_t2[blockIdx.x], Zgpu(sdata)[0]); } A_gm2 += len_A_gm; out_t2 += block_out; __syncthreads(); } out_t += nM * block_out; } } if (gridDim.x==1) { __shared__ bool amLast; __threadfence(); if (tid == 0) { unsigned int ticket = atomicInc(&INNAME(lfc_retirementCount), gridDim.y); amLast = (ticket == gridDim.y - 1); } __syncthreads(); if ((amLast)) { for (int i=tid; i < nM * yy * nvec; i += blockDim.x) { results[i] = out[i * block_out]; } INNAME(lfc_retirementCount) = 0; } } } gpaw-24.1.0/c/gpu/kernels/lfc-reduce.cpp000066400000000000000000000346051454550013000177360ustar00rootroot00000000000000#ifndef REDUCE_LFC #define REDUCE_LFC_MAX_THREADS (64) #define REDUCE_LFC_MAX_THREADS2 (64) #define REDUCE_LFC_MAX_BLOCKS (32) #define REDUCE_LFC_MAX_BLOCKS2 (32) #define REDUCE_LFC_MAX_YBLOCKS (65535) #define REDUCE_LFC_BUFFER_SIZE ((2 * GPU_BLOCKS_MAX \ * MAX(REDUCE_LFC_MAX_BLOCKS, \ REDUCE_LFC_MAX_BLOCKS2)) * 16) static void *lfc_reduce_buffer = NULL; static int lfc_reduce_buffer_size = 0; extern "C" void lfc_reduce_init_buffers_gpu() { lfc_reduce_buffer = NULL; lfc_reduce_buffer_size = 0; } extern "C" void lfc_reduce_dealloc_gpu() { gpuFree(lfc_reduce_buffer); gpuCheckLastError(); lfc_reduce_init_buffers_gpu(); } static void lfc_reduceNumBlocksAndThreads(int n, int *blocks, int *threads) { *threads = (n < REDUCE_LFC_MAX_THREADS) ? nextPow2(n) : REDUCE_LFC_MAX_THREADS; *blocks = MIN((n + (*threads - 1)) / (*threads), REDUCE_LFC_MAX_BLOCKS); } static void lfc_reduceNumBlocksAndThreads2(int n,int *blocks, int *threads) { *threads = (n < REDUCE_LFC_MAX_THREADS2 * 2) ? nextPow2((n + 1) / 2) : REDUCE_LFC_MAX_THREADS2; *blocks = MIN((n + (*threads * 2 - 1)) / (*threads * 2), REDUCE_LFC_MAX_BLOCKS2); } #endif #define REDUCE_LFC #define INNAME(f) Zgpu(f ## _map512) #define REDUCE_LFC_THREADS 512 #include "lfc-reduce-kernel.cpp" #undef REDUCE_LFC_THREADS #undef INNAME #define INNAME(f) Zgpu(f ## _map256) #define REDUCE_LFC_THREADS 256 #include "lfc-reduce-kernel.cpp" #undef REDUCE_LFC_THREADS #undef INNAME #define INNAME(f) Zgpu(f ## _map128) #define REDUCE_LFC_THREADS 128 #include "lfc-reduce-kernel.cpp" #undef REDUCE_LFC_THREADS #undef INNAME #define INNAME(f) Zgpu(f ## _map64) #define REDUCE_LFC_THREADS 64 #include "lfc-reduce-kernel.cpp" #undef REDUCE_LFC_THREADS #undef INNAME #define INNAME(f) Zgpu(f ## _map32) #define REDUCE_LFC_THREADS 32 #include "lfc-reduce-kernel.cpp" #undef REDUCE_LFC_THREADS #undef INNAME #define INNAME(f) Zgpu(f ## _map16) #define REDUCE_LFC_THREADS 16 #include "lfc-reduce-kernel.cpp" #undef REDUCE_LFC_THREADS #undef INNAME #define INNAME(f) Zgpu(f ## _map8) #define REDUCE_LFC_THREADS 8 #include "lfc-reduce-kernel.cpp" #undef REDUCE_LFC_THREADS #undef INNAME #define INNAME(f) Zgpu(f ## _map4) #define REDUCE_LFC_THREADS 4 #include "lfc-reduce-kernel.cpp" #undef REDUCE_LFC_THREADS #undef INNAME #define INNAME(f) Zgpu(f ## _map2) #define REDUCE_LFC_THREADS 2 #include "lfc-reduce-kernel.cpp" #undef REDUCE_LFC_THREADS #undef INNAME #define INNAME(f) Zgpu(f ## _map1) #define REDUCE_LFC_THREADS 1 #include "lfc-reduce-kernel.cpp" #undef REDUCE_LFC_THREADS #undef INNAME #undef INFUNC #undef REDUCE_THREADS #define INFUNC(a,b) (a) #define INNAME(f) Zgpu(f ## 512) #define REDUCE_THREADS 512 #include "reduce-kernel.cpp" #undef REDUCE_THREADS #undef INNAME #define INNAME(f) Zgpu(f ## 256) #define REDUCE_THREADS 256 #include "reduce-kernel.cpp" #undef REDUCE_THREADS #undef INNAME #define INNAME(f) Zgpu(f ## 128) #define REDUCE_THREADS 128 #include "reduce-kernel.cpp" #undef REDUCE_THREADS #undef INNAME #define INNAME(f) Zgpu(f ## 64) #define REDUCE_THREADS 64 #include "reduce-kernel.cpp" #undef REDUCE_THREADS #undef INNAME #define INNAME(f) Zgpu(f ## 32) #define REDUCE_THREADS 32 #include "reduce-kernel.cpp" #undef REDUCE_THREADS #undef INNAME #define INNAME(f) Zgpu(f ## 16) #define REDUCE_THREADS 16 #include "reduce-kernel.cpp" #undef REDUCE_THREADS #undef INNAME #define INNAME(f) Zgpu(f ## 8) #define REDUCE_THREADS 8 #include "reduce-kernel.cpp" #undef REDUCE_THREADS #undef INNAME #define INNAME(f) Zgpu(f ## 4) #define REDUCE_THREADS 4 #include "reduce-kernel.cpp" #undef REDUCE_THREADS #undef INNAME #define INNAME(f) Zgpu(f ## 2) #define REDUCE_THREADS 2 #include "reduce-kernel.cpp" #undef REDUCE_THREADS #undef INNAME #define INNAME(f) Zgpu(f ## 1) #define REDUCE_THREADS 1 #include "reduce-kernel.cpp" #undef REDUCE_THREADS #undef INNAME #undef INFUNC void Zgpu(lfc_reducemap)(LFCObject *lfc, const Tgpu *a_G, int nG, Tgpu *c_xM, int nM, int nvec, int q) { int blocks, threads; if (lfc_reduce_buffer_size < nM * REDUCE_LFC_BUFFER_SIZE) { lfc_reduce_dealloc_gpu(); gpuMalloc(&lfc_reduce_buffer, nM * REDUCE_LFC_BUFFER_SIZE); lfc_reduce_buffer_size = nM * REDUCE_LFC_BUFFER_SIZE; } lfc_reduceNumBlocksAndThreads(lfc->max_len_A_gm, &blocks, &threads); int min_wsize = blocks * nM; int work_buffer_size = (lfc_reduce_buffer_size / sizeof(Tgpu)) / 2; assert(min_wsize < work_buffer_size); int mynvec = MAX(MIN(work_buffer_size / min_wsize, nvec), 1); mynvec = MIN(mynvec, (REDUCE_LFC_MAX_YBLOCKS) / nM); Tgpu *work_buffer1 = (Tgpu*) lfc_reduce_buffer; Tgpu *work_buffer2 = work_buffer1 + work_buffer_size; Tgpu *result_gpu = c_xM; int smemSize = (threads <= 32) ? 2 * threads * sizeof(Tgpu) : threads * sizeof(Tgpu); for (int i=0; i < nvec; i += mynvec) { int cunvec = MIN(mynvec, nvec - i); int innvec = 1; dim3 dimBlock(threads, 1, 1); dim3 dimGrid(blocks, lfc->Mcount, 1); int block_out = blocks; innvec = cunvec; switch (threads) { case 512: gpuLaunchKernel( Zgpu(integrate_mul_kernel_map512), dimGrid, dimBlock, smemSize, 0, a_G + i * nG, nG, lfc->volume_W_gpu, lfc->volume_WMi_gpu, lfc->WMi_gpu, lfc->WMimax, q, (Tgpu*) work_buffer1, block_out, result_gpu + i * nM, lfc->Mcount, nM, innvec); break; case 256: gpuLaunchKernel( Zgpu(integrate_mul_kernel_map256), dimGrid, dimBlock, smemSize, 0, a_G + i * nG, nG, lfc->volume_W_gpu, lfc->volume_WMi_gpu, lfc->WMi_gpu, lfc->WMimax, q, (Tgpu*) work_buffer1, block_out, result_gpu + i * nM, lfc->Mcount, nM, innvec); break; case 128: gpuLaunchKernel( Zgpu(integrate_mul_kernel_map128), dimGrid, dimBlock, smemSize, 0, a_G + i * nG, nG, lfc->volume_W_gpu, lfc->volume_WMi_gpu, lfc->WMi_gpu, lfc->WMimax, q, (Tgpu*) work_buffer1, block_out, result_gpu + i * nM, lfc->Mcount, nM, innvec); break; case 64: gpuLaunchKernel( Zgpu(integrate_mul_kernel_map64), dimGrid, dimBlock, smemSize, 0, a_G + i * nG, nG, lfc->volume_W_gpu, lfc->volume_WMi_gpu, lfc->WMi_gpu, lfc->WMimax, q, (Tgpu*) work_buffer1, block_out, result_gpu + i * nM, lfc->Mcount, nM, innvec); break; case 32: gpuLaunchKernel( Zgpu(integrate_mul_kernel_map32), dimGrid, dimBlock, smemSize, 0, a_G + i * nG, nG, lfc->volume_W_gpu, lfc->volume_WMi_gpu, lfc->WMi_gpu, lfc->WMimax, q, (Tgpu*) work_buffer1, block_out, result_gpu + i * nM, lfc->Mcount, nM, innvec); break; case 16: gpuLaunchKernel( Zgpu(integrate_mul_kernel_map16), dimGrid, dimBlock, smemSize, 0, a_G + i * nG, nG, lfc->volume_W_gpu, lfc->volume_WMi_gpu, lfc->WMi_gpu, lfc->WMimax, q, (Tgpu*) work_buffer1, block_out, result_gpu + i * nM, lfc->Mcount, nM, innvec); break; case 8: gpuLaunchKernel( Zgpu(integrate_mul_kernel_map8), dimGrid, dimBlock, smemSize, 0, a_G + i * nG, nG, lfc->volume_W_gpu, lfc->volume_WMi_gpu, lfc->WMi_gpu, lfc->WMimax, q, (Tgpu*) work_buffer1, block_out, result_gpu + i * nM, lfc->Mcount, nM, innvec); break; case 4: gpuLaunchKernel( Zgpu(integrate_mul_kernel_map4), dimGrid, dimBlock, smemSize, 0, a_G + i * nG, nG, lfc->volume_W_gpu, lfc->volume_WMi_gpu, lfc->WMi_gpu, lfc->WMimax, q, (Tgpu*) work_buffer1, block_out, result_gpu + i * nM, lfc->Mcount, nM, innvec); break; case 2: gpuLaunchKernel( Zgpu(integrate_mul_kernel_map2), dimGrid, dimBlock, smemSize, 0, a_G + i * nG, nG,lfc->volume_W_gpu, lfc->volume_WMi_gpu, lfc->WMi_gpu, lfc->WMimax, q, (Tgpu*) work_buffer1, block_out, result_gpu + i * nM, lfc->Mcount, nM, innvec); break; case 1: gpuLaunchKernel( Zgpu(integrate_mul_kernel_map1), dimGrid, dimBlock, smemSize, 0, a_G + i * nG, nG, lfc->volume_W_gpu, lfc->volume_WMi_gpu, lfc->WMi_gpu, lfc->WMimax, q, (Tgpu*) work_buffer1, block_out, result_gpu + i * nM, lfc->Mcount, nM, innvec); break; default: assert(0); } assert(!gpuCheckLastError()); int s = blocks; int count = 0; while (s > 1) { int blocks2, threads2; int block_in = block_out; lfc_reduceNumBlocksAndThreads2(s, &blocks2, &threads2); block_out = blocks2; dim3 dimBlock(threads2, 1, 1); dim3 dimGrid(blocks2, cunvec * nM, 1); int smemSize = (threads2 <= 32) ? 2 * threads2 * sizeof(Tgpu) : threads2 * sizeof(Tgpu); Tgpu *work1 = (count % 2) ? work_buffer2 : work_buffer1; Tgpu *work2 = (count % 2) ? work_buffer1 : work_buffer2; count++; switch (threads2) { case 512: gpuLaunchKernel( Zgpu(reduce_kernel512), dimGrid, dimBlock, smemSize, 0, (Tgpu*) work1, NULL, (Tgpu*) work2, result_gpu + i * nM, s, block_in, block_out, cunvec * nM); break; case 256: gpuLaunchKernel( Zgpu(reduce_kernel256), dimGrid, dimBlock, smemSize, 0, (Tgpu*) work1, NULL, (Tgpu*) work2, result_gpu + i * nM, s, block_in, block_out, cunvec * nM); break; case 128: gpuLaunchKernel( Zgpu(reduce_kernel128), dimGrid, dimBlock, smemSize, 0, (Tgpu*) work1, NULL, (Tgpu*) work2, result_gpu + i * nM, s, block_in, block_out, cunvec * nM); break; case 64: gpuLaunchKernel( Zgpu(reduce_kernel64), dimGrid, dimBlock, smemSize, 0, (Tgpu*) work1, NULL, (Tgpu*) work2, result_gpu + i * nM, s, block_in, block_out, cunvec * nM); break; case 32: gpuLaunchKernel( Zgpu(reduce_kernel32), dimGrid, dimBlock, smemSize, 0, (Tgpu*) work1, NULL, (Tgpu*) work2, result_gpu + i * nM, s, block_in, block_out, cunvec * nM); break; case 16: gpuLaunchKernel( Zgpu(reduce_kernel16), dimGrid, dimBlock, smemSize, 0, (Tgpu*) work1, NULL, (Tgpu*) work2, result_gpu + i * nM, s, block_in, block_out, cunvec * nM); break; case 8: gpuLaunchKernel( Zgpu(reduce_kernel8), dimGrid, dimBlock, smemSize, 0, (Tgpu*) work1, NULL, (Tgpu*) work2, result_gpu + i * nM, s, block_in, block_out, cunvec * nM); break; case 4: gpuLaunchKernel( Zgpu(reduce_kernel4), dimGrid, dimBlock, smemSize, 0, (Tgpu*) work1, NULL, (Tgpu*) work2, result_gpu + i * nM, s, block_in, block_out, cunvec * nM); break; case 2: gpuLaunchKernel( Zgpu(reduce_kernel2), dimGrid, dimBlock, smemSize, 0, (Tgpu*) work1, NULL, (Tgpu*) work2, result_gpu + i * nM, s, block_in, block_out, cunvec * nM); break; case 1: gpuLaunchKernel( Zgpu(reduce_kernel1), dimGrid, dimBlock, smemSize, 0, (Tgpu*) work1, NULL, (Tgpu*) work2, result_gpu + i * nM, s, block_in, block_out, cunvec * nM); break; default: assert(0); } assert(!gpuCheckLastError()); s = (s + (threads2 * 2 - 1)) / (threads2 * 2); } } } gpaw-24.1.0/c/gpu/kernels/lfc.cpp000066400000000000000000000426501454550013000164700ustar00rootroot00000000000000#include #include #include #include #include #include #include #include #include #define PY_ARRAY_UNIQUE_SYMBOL GPAW_ARRAY_API #define NO_IMPORT_ARRAY #include #include "../../lfc.h" #include "../gpu.h" #include "../gpu-complex.h" #ifndef GPU_USE_COMPLEX #define INLINE inline static INLINE void* gpaw_malloc(int n) { void *p = malloc(n); assert(p != NULL); return p; } #define GPAW_MALLOC(T, n) (T*)(gpaw_malloc((n) * sizeof(T))) #define BLOCK_Y 16 #endif #include "lfc-reduce.cpp" __global__ void Zgpu(add_kernel)(Tgpu *a_G, const Tgpu *c_M, int *G_B1, int *G_B2, LFVolume_gpu **volume_i, int *A_gm_i, int *ni, int nimax, int na_G, int nM, gpuDoubleComplex *phase_i, int max_k, int q, int nB_gpu) { int G = threadIdx.x; int B = blockIdx.x * blockDim.y + threadIdx.y; if (B >= nB_gpu) return; int x = blockIdx.y; int nii, Gb, Ga, nG; LFVolume_gpu* v; double* A_gm; const Tgpu* c_Mt; int nm; int len; nii = ni[B]; Ga = G_B1[B]; Gb = G_B2[B]; nG = Gb - Ga; a_G += Ga + na_G * x; c_M += nM * x; Tgpu av = MAKED(0); if (G < nG) { for (int i=0; i < nii; i++) { Tgpu avv; v = volume_i[B + i * nB_gpu]; A_gm = v->A_gm + A_gm_i[B + i * nB_gpu] + G; nm = v->nm; len = v->len_A_gm; c_Mt = c_M + v->M; avv = MULTD(c_Mt[0], A_gm[0]); for (int m=1; m < nm; m += 2) { A_gm += len; IADD(avv, MULTD(c_Mt[m], A_gm[0])); A_gm += len; IADD(avv, MULTD(c_Mt[m+1], A_gm[0])); } #ifdef GPU_USE_COMPLEX avv = MULTT(avv, gpuConj(phase_i[max_k * nimax * B + q * nimax + i])); #endif IADD(av, avv); } IADD(a_G[G] , av); } } #ifndef GPU_USE_COMPLEX #define GPU_USE_COMPLEX #include "lfc.cpp" extern "C" void lfc_dealloc_gpu(LFCObject *self) { if (self->use_gpu) { for (int W=0; W < self->nW; W++) { LFVolume_gpu* volume_gpu = &self->volume_W_gpu_host[W]; gpuFree(volume_gpu->A_gm); gpuFree(volume_gpu->GB1); gpuFree(volume_gpu->nGBcum); gpuFree(volume_gpu->phase_k); } free(self->volume_W_gpu_host); gpuFree(self->volume_W_gpu); gpuFree(self->G_B1_gpu); gpuFree(self->G_B2_gpu); gpuFree(self->volume_i_gpu); gpuFree(self->A_gm_i_gpu); gpuFree(self->volume_WMi_gpu); gpuFree(self->WMi_gpu); gpuFree(self->ni_gpu); gpuCheckLastError(); } } extern "C" void *transp(void *matrix, int rows, int cols, size_t item_size) { #define ALIGNMENT 16 /* power of 2 >= minimum array boundary alignment; maybe unnecessary but machine dependent */ char *cursor; char carry[ALIGNMENT]; size_t block_size, remaining_size; int nadir, lag, orbit, ents; if (rows == 1 || cols == 1) return matrix; ents = rows * cols; cursor = (char *) matrix; remaining_size = item_size; while ((block_size = ALIGNMENT < remaining_size ? ALIGNMENT : remaining_size)) { nadir = 1; /* first and last entries are always fixed points so aren't visited */ while (nadir + 1 < ents) { memcpy(carry, &cursor[(lag = nadir) * item_size], block_size); /* follow a complete cycle */ while ((orbit = lag / rows + cols * (lag % rows)) > nadir) { memcpy(&cursor[lag * item_size], &cursor[orbit * item_size], block_size); lag = orbit; } memcpy(&cursor[lag * item_size], carry, block_size); orbit = nadir++; /* find the next unvisited index by an exhaustive search */ while (orbit < nadir && nadir + 1 < ents) { orbit = nadir; while ((orbit = orbit / rows + cols * (orbit % rows)) > nadir); if (orbit < nadir) nadir++; } } cursor += block_size; remaining_size -= block_size; } return matrix; } extern "C" PyObject * NewLFCObject_gpu(LFCObject *self, PyObject *args) { PyObject* A_Wgm_obj; const PyArrayObject* M_W_obj; const PyArrayObject* G_B_obj; const PyArrayObject* W_B_obj; double dv; PyArrayObject* phase_kW_obj; int use_gpu = 1; if (!PyArg_ParseTuple(args, "OOOOdO|iO", &A_Wgm_obj, &M_W_obj, &G_B_obj, &W_B_obj, &dv, &phase_kW_obj, &use_gpu)) return NULL; if (!use_gpu) return (PyObject*) self; int nimax = self->nimax; int max_k = 0; int *GB2s[self->nW]; LFVolume_gpu* volume_W_gpu; volume_W_gpu = GPAW_MALLOC(LFVolume_gpu, self->nW); if (self->bloch_boundary_conditions) { max_k = PyArray_DIMS(phase_kW_obj)[0]; } self->max_k = max_k; self->max_len_A_gm = 0; self->max_nG = 0; for (int W=0; W < self->nW; W++) { LFVolume_gpu* v_gpu = &volume_W_gpu[W]; LFVolume* v = &self->volume_W[W]; PyArrayObject* A_gm_obj = (PyArrayObject*) PyList_GetItem(A_Wgm_obj, W); double *work_A_gm = GPAW_MALLOC(double, self->ngm_W[W]); gpuMalloc(&(v_gpu->A_gm), sizeof(double) * self->ngm_W[W]); memcpy(work_A_gm, v->A_gm, sizeof(double) * self->ngm_W[W]); transp(work_A_gm, PyArray_DIMS(A_gm_obj)[0], PyArray_DIMS(A_gm_obj)[1], sizeof(double)); gpuMemcpy(v_gpu->A_gm, work_A_gm, sizeof(double) * self->ngm_W[W], gpuMemcpyHostToDevice); free(work_A_gm); v_gpu->nm = v->nm; v_gpu->M = v->M; v_gpu->W = v->W; v_gpu->len_A_gm = 0; v_gpu->GB1 = GPAW_MALLOC(int, self->ngm_W[W]); GB2s[W] = GPAW_MALLOC(int, self->ngm_W[W]); v_gpu->nGBcum = GPAW_MALLOC(int, self->ngm_W[W] + 1); v_gpu->nB = 0; v_gpu->phase_k = NULL; } gpuMalloc(&(self->volume_W_gpu), sizeof(LFVolume_gpu) * self->nW); int* i_W = self->i_W; LFVolume_gpu** volume_i = GPAW_MALLOC(LFVolume_gpu*, nimax); int Ga = 0; int ni = 0; LFVolume_gpu **volume_i_gpu = GPAW_MALLOC(LFVolume_gpu*, self->nB*nimax); int *A_gm_i_gpu = GPAW_MALLOC(int, self->nB*nimax); int *ni_gpu = GPAW_MALLOC(int, self->nB); int *G_B1_gpu = GPAW_MALLOC(int, self->nB); int *G_B2_gpu = GPAW_MALLOC(int, self->nB); gpuDoubleComplex *phase_i_gpu = NULL; gpuDoubleComplex *phase_i = NULL; if (self->bloch_boundary_conditions) { phase_i_gpu = GPAW_MALLOC(gpuDoubleComplex, max_k * self->nB * nimax); phase_i = GPAW_MALLOC(gpuDoubleComplex, max_k * nimax); } int nB_gpu=0; for (int B=0; B < self->nB; B++) { int Gb = self->G_B[B]; int nG = Gb - Ga; if ((nG > 0) && (ni > 0)) { for (int i=0; i < ni; i++) { LFVolume_gpu* v = volume_i[i]; volume_i_gpu[nB_gpu * nimax + i] = self->volume_W_gpu + (v - volume_W_gpu); A_gm_i_gpu[nB_gpu * nimax + i] = v->len_A_gm; if (self->bloch_boundary_conditions) { for (int kk=0; kk < max_k; kk++){ phase_i_gpu[i + nB_gpu * nimax * max_k + kk * nimax] = phase_i[i + kk * nimax]; } } v->len_A_gm += nG; int *GB2 = GB2s[v - volume_W_gpu]; if ((v->nB > 0) && (GB2[v->nB - 1] == Ga)) { GB2[v->nB - 1] = Gb; v->nGBcum[v->nB] += nG; } else { v->GB1[v->nB] = Ga; GB2[v->nB] = Gb; if (v->nB == 0) v->nGBcum[v->nB] = 0; v->nGBcum[v->nB + 1] = nG + v->nGBcum[v->nB]; v->nB++; } } self->max_nG = MAX(self->max_nG, nG); G_B1_gpu[nB_gpu] = Ga; G_B2_gpu[nB_gpu] = Gb; ni_gpu[nB_gpu] = ni; nB_gpu++; } int Wnew = self->W_B[B]; if (Wnew >= 0) { /* Entering new sphere: */ volume_i[ni] = &volume_W_gpu[Wnew]; if (self->bloch_boundary_conditions) { for (int i=0; i < max_k; i++) { phase_i[ni + i * nimax].x = creal( self->phase_kW[Wnew + i * self->nW]); phase_i[ni + i * nimax].y = cimag( self->phase_kW[Wnew + i * self->nW]); } } i_W[Wnew] = ni; ni++; } else { /* Leaving sphere: */ int Wold = -1 - Wnew; int iold = i_W[Wold]; volume_W_gpu[Wold].len_A_gm = volume_i[iold]->len_A_gm; ni--; volume_i[iold] = volume_i[ni]; if (self->bloch_boundary_conditions) { for (int i=0; i < max_k; i++) { phase_i[iold + i * nimax] = phase_i[ni + i * nimax]; } } int Wlast = volume_i[iold]->W; i_W[Wlast] = iold; } Ga = Gb; } for (int W=0; W < self->nW; W++) { LFVolume_gpu* v = &volume_W_gpu[W]; self->max_len_A_gm = MAX(self->max_len_A_gm, v->len_A_gm); int *GB_gpu; gpuMalloc(&(GB_gpu), sizeof(int) * v->nB); gpuMemcpy(GB_gpu, v->GB1, sizeof(int) * v->nB, gpuMemcpyHostToDevice); free(v->GB1); v->GB1 = GB_gpu; free(GB2s[W]); gpuMalloc(&(GB_gpu), sizeof(int) * (v->nB + 1)); gpuMemcpy(GB_gpu, v->nGBcum, sizeof(int) * (v->nB + 1), gpuMemcpyHostToDevice); free(v->nGBcum); v->nGBcum = GB_gpu; if (self->bloch_boundary_conditions) { gpuDoubleComplex phase_k[max_k]; for (int q=0; q < max_k; q++) { phase_k[q].x = creal(self->phase_kW[self->nW*q+W]); phase_k[q].y = cimag(self->phase_kW[self->nW*q+W]); } gpuMalloc(&(v->phase_k), sizeof(gpuDoubleComplex) * max_k); gpuMemcpy(v->phase_k, phase_k, sizeof(gpuDoubleComplex) * max_k, gpuMemcpyHostToDevice); } } int WMimax = 0; int *WMi_gpu = GPAW_MALLOC(int, self->nW); int *volume_WMi_gpu = GPAW_MALLOC(int, self->nW * self->nW); self->Mcount = 0; for (int W=0; W < self->nW; W++) { WMi_gpu[W] = 0; } for (int W=0; W < self->nW; W++) { LFVolume_gpu* v = &volume_W_gpu[W]; int M = v->M; for (int W2=0; W2 <= W; W2++) { if (WMi_gpu[W2] > 0) { LFVolume_gpu* v2 = &volume_W_gpu[volume_WMi_gpu[W2 * self->nW]]; if (v2->M == M) { volume_WMi_gpu[W2 * self->nW + WMi_gpu[W2]] = W; WMi_gpu[W2]++; WMimax = MAX(WMi_gpu[W2], WMimax); break; } } else { volume_WMi_gpu[W2*self->nW] = W; WMi_gpu[W2]++; self->Mcount++; WMimax = MAX(WMi_gpu[W2], WMimax); break; } } } int *volume_WMi_gpu2 = GPAW_MALLOC(int, WMimax * self->nW); for (int W=0; W < self->Mcount; W++) { for (int W2=0; W2 < WMi_gpu[W]; W2++) { volume_WMi_gpu2[W * WMimax + W2] = volume_WMi_gpu[W * self->nW + W2]; } } self->WMimax = WMimax; gpuMalloc(&(self->WMi_gpu), sizeof(int) * self->Mcount); gpuMemcpy(self->WMi_gpu, WMi_gpu, sizeof(int) * self->Mcount, gpuMemcpyHostToDevice); gpuMalloc(&(self->volume_WMi_gpu), sizeof(int) * self->Mcount * WMimax); gpuMemcpy(self->volume_WMi_gpu, volume_WMi_gpu2, sizeof(int) * self->Mcount * WMimax, gpuMemcpyHostToDevice); self->nB_gpu = nB_gpu; gpuMalloc(&(self->G_B1_gpu), sizeof(int) * nB_gpu); gpuMemcpy(self->G_B1_gpu, G_B1_gpu, sizeof(int) * nB_gpu, gpuMemcpyHostToDevice); gpuMalloc(&(self->G_B2_gpu), sizeof(int) * nB_gpu); gpuMemcpy(self->G_B2_gpu, G_B2_gpu, sizeof(int) * nB_gpu, gpuMemcpyHostToDevice); gpuMalloc(&(self->ni_gpu), sizeof(int) * nB_gpu); gpuMemcpy(self->ni_gpu, ni_gpu, sizeof(int) * nB_gpu, gpuMemcpyHostToDevice); transp(volume_i_gpu, nB_gpu, nimax, sizeof(LFVolume_gpu*)); gpuMalloc(&(self->volume_i_gpu), sizeof(LFVolume_gpu*) * nB_gpu * nimax); gpuMemcpy(self->volume_i_gpu, volume_i_gpu, sizeof(LFVolume_gpu*) * nB_gpu * nimax, gpuMemcpyHostToDevice); transp(A_gm_i_gpu, nB_gpu, nimax, sizeof(int)); gpuMalloc(&(self->A_gm_i_gpu), sizeof(int) * nB_gpu * nimax); gpuMemcpy(self->A_gm_i_gpu, A_gm_i_gpu, sizeof(int) * nB_gpu * nimax, gpuMemcpyHostToDevice); if (self->bloch_boundary_conditions) { gpuMalloc(&(self->phase_i_gpu), sizeof(gpuDoubleComplex) * max_k * nB_gpu * nimax); gpuMemcpy(self->phase_i_gpu, phase_i_gpu, sizeof(gpuDoubleComplex) * max_k * nB_gpu * nimax, gpuMemcpyHostToDevice); } self->volume_W_gpu_host = volume_W_gpu; gpuMemcpy(self->volume_W_gpu, volume_W_gpu, sizeof(LFVolume_gpu) * self->nW, gpuMemcpyHostToDevice); free(volume_i); free(volume_i_gpu); free(A_gm_i_gpu); free(volume_WMi_gpu); free(volume_WMi_gpu2); free(WMi_gpu); free(ni_gpu); free(G_B1_gpu); free(G_B2_gpu); if (self->bloch_boundary_conditions) { free(phase_i_gpu); } if (PyErr_Occurred()) return NULL; else return (PyObject*) self; } extern "C" void parse_shape_xG(PyObject* shape, int* nx, int* nG) { int nd = PyTuple_Size(shape); *nx = 1; for (int i = 0; i < nd-3; i++) { *nx *= (int) PyLong_AsLong(PyTuple_GetItem(shape, i)); } *nG = 1; for (int i = nd-3; i < nd; i++) *nG *= (int) PyLong_AsLong(PyTuple_GetItem(shape, i)); } extern "C" PyObject* integrate_gpu(LFCObject *lfc, PyObject *args) { void *a_xG_gpu; void *c_xM_gpu; PyObject *shape, *c_shape; int q; assert(lfc->use_gpu); if (!PyArg_ParseTuple(args, "nOnOi", &a_xG_gpu, &shape, &c_xM_gpu, &c_shape, &q)) return NULL; int nx, nG; parse_shape_xG(shape, &nx, &nG); int c_nd = PyTuple_Size(c_shape); int nM = (int) PyLong_AsLong(PyTuple_GetItem(c_shape, c_nd - 1)); if (nM > 0) { if (!lfc->bloch_boundary_conditions) { const double* a_G = (const double*) a_xG_gpu; double* c_M = (double*) c_xM_gpu; lfc_reducemap(lfc, a_G, nG, c_M, nM, nx, q); gpuCheckLastError(); } else { const gpuDoubleComplex* a_G = (const gpuDoubleComplex*) a_xG_gpu; gpuDoubleComplex* c_M = (gpuDoubleComplex*) c_xM_gpu; lfc_reducemapz(lfc, a_G, nG, c_M, nM, nx, q); gpuCheckLastError(); } } if (PyErr_Occurred()) return NULL; else Py_RETURN_NONE; } extern "C" PyObject* add_gpu(LFCObject *lfc, PyObject *args) { void *a_xG_gpu; void *c_xM_gpu; PyObject *shape, *c_shape; int q; assert(lfc->use_gpu); if (!PyArg_ParseTuple(args, "nOnOi", &c_xM_gpu, &c_shape, &a_xG_gpu, &shape, &q)) return NULL; int nx, nG; parse_shape_xG(shape, &nx, &nG); int c_nd = PyTuple_Size(c_shape); int nM = (int) PyLong_AsLong(PyTuple_GetItem(c_shape, c_nd - 1)); if (nM > 0) { if (!lfc->bloch_boundary_conditions) { double* a_G = (double*) a_xG_gpu; const double* c_M = (const double*) c_xM_gpu; int blockx = lfc->max_nG; int gridx = (lfc->nB_gpu + BLOCK_Y - 1) / BLOCK_Y; dim3 dimBlock(blockx, BLOCK_Y); dim3 dimGrid(gridx, nx); gpuLaunchKernel( add_kernel, dimGrid, dimBlock, 0, 0, a_G, c_M, lfc->G_B1_gpu, lfc->G_B2_gpu, lfc->volume_i_gpu, lfc->A_gm_i_gpu, lfc->ni_gpu, lfc->nimax, nG, nM, lfc->phase_i_gpu, lfc->max_k, q, lfc->nB_gpu); gpuCheckLastError(); } else { gpuDoubleComplex* a_G = (gpuDoubleComplex*) a_xG_gpu; const gpuDoubleComplex* c_M = (const gpuDoubleComplex*) c_xM_gpu; int blockx = lfc->max_nG; int gridx = (lfc->nB_gpu + BLOCK_Y - 1) / BLOCK_Y; dim3 dimBlock(blockx, BLOCK_Y); dim3 dimGrid(gridx, nx); gpuLaunchKernel( add_kernelz, dimGrid, dimBlock, 0, 0, a_G, c_M, lfc->G_B1_gpu, lfc->G_B2_gpu, lfc->volume_i_gpu, lfc->A_gm_i_gpu, lfc->ni_gpu, lfc->nimax, nG, nM, lfc->phase_i_gpu, lfc->max_k, q, lfc->nB_gpu); gpuCheckLastError(); } } if (PyErr_Occurred()) return NULL; else Py_RETURN_NONE; } #endif gpaw-24.1.0/c/gpu/kernels/linalg.cpp000066400000000000000000000275761454550013000172040ustar00rootroot00000000000000#include #define PY_ARRAY_UNIQUE_SYMBOL GPAW_ARRAY_API #define NO_IMPORT_ARRAY #include #include "../gpu.h" #include "../gpu-complex.h" #ifndef GPU_USE_COMPLEX # define BLOCK_X 128 # define MAX_BLOCKS (65535) #endif __global__ void Zgpu(elmenwise_mul_add_kernelx)( int n, const double* a, const Tgpu* b, Tgpu *c) { int i = blockIdx.x * BLOCK_X + threadIdx.x; while (i < n) { IADD(c[i], MULDT(a[i], b[i])); i += gridDim.x * BLOCK_X; } } __global__ void Zgpu(multi_elmenwise_mul_add_kernel1x)( int n, const double* a, const Tgpu* b, Tgpu *c) { int i = blockIdx.x * BLOCK_X + threadIdx.x; int k = blockIdx.y; a += k * n; c += k * n; while (i < n) { IADD(c[i], MULDT(a[i], b[i])); i += gridDim.x * BLOCK_X; } } __global__ void Zgpu(multi_elmenwise_mul_add_kernel2x)( int n, const double* a, const Tgpu* b, Tgpu *c) { int i = blockIdx.x * BLOCK_X + threadIdx.x; int k = blockIdx.y; b += k * n; c += k * n; while (i < n) { IADD(c[i], MULDT(a[i], b[i])); i += gridDim.x * BLOCK_X; } } __global__ void Zgpu(ax2py_kernel)(int n, double a, const Tgpu* x, double* y) { int i = blockIdx.x * BLOCK_X + threadIdx.x; while (i < n) { y[i] += a * (REAL(x[i]) * REAL(x[i]) + IMAG(x[i]) * IMAG(x[i])); i += gridDim.x * BLOCK_X; } } __global__ void Zgpu(csign_kernel)(int n, Tgpu* x) { int i = blockIdx.x * BLOCK_X + threadIdx.x; while (i < n) { x[i] = NEG(x[i]); i += gridDim.x * BLOCK_X; } } __global__ void Zgpu(multi_ax2py_kernel)(int n, int nvec, double *a, const Tgpu* x, double* y) { int i = blockIdx.x * BLOCK_X + threadIdx.x; for (int k=0; k < nvec; k++) { int ii = i; while (ii < n) { y[ii] += a[k] * (REAL(x[ii]) * REAL(x[ii]) + IMAG(x[ii]) * IMAG(x[ii])); ii += gridDim.x * BLOCK_X; } x += n; } } #ifndef GPU_USE_COMPLEX #define GPU_USE_COMPLEX #include "linalg.cpp" __global__ void elmenwise_mul_add_kernelzz( int n, const gpuDoubleComplex* a, const gpuDoubleComplex* b, gpuDoubleComplex* c) { int i = blockIdx.x * BLOCK_X + threadIdx.x; while (i < n) { c[i] = gpuCadd(c[i], gpuCmul(a[i], b[i])); i += gridDim.x * BLOCK_X; } } __global__ void multi_elmenwise_mul_add_kernel1zz( int n, const gpuDoubleComplex* a, const gpuDoubleComplex* b, gpuDoubleComplex* c) { int i = blockIdx.x * BLOCK_X + threadIdx.x; int k = blockIdx.y; a += k * n; c += k * n; while (i < n) { c[i] = gpuCadd(c[i], gpuCmul(a[i], b[i])); i += gridDim.x * BLOCK_X; } } __global__ void multi_elmenwise_mul_add_kernel2zz( int n, const gpuDoubleComplex* a, const gpuDoubleComplex* b, gpuDoubleComplex* c) { int i = blockIdx.x * BLOCK_X + threadIdx.x; int k = blockIdx.y; b += k * n; c += k * n; while (i < n) { c[i] = gpuCadd(c[i], gpuCmul(a[i], b[i])); i += gridDim.x * BLOCK_X; } } extern "C" PyObject* elementwise_multiply_add_gpu(PyObject *self, PyObject *args) { void *x_gpu; void *y_gpu; void *c_gpu; PyObject *a_shape; PyArray_Descr *a_type, *y_type; if (!PyArg_ParseTuple(args, "nOOnOn", &x_gpu, &a_shape, &a_type, &y_gpu, &y_type, &c_gpu)) return NULL; int n = (int) PyLong_AsLong(PyTuple_GetItem(a_shape, 0)); Py_ssize_t nd = PyTuple_Size(a_shape); for (int d=1; d < nd; d++) n *= (int) PyLong_AsLong(PyTuple_GetItem(a_shape, d)); int gridx = MIN(MAX((n + BLOCK_X - 1) / BLOCK_X, 1), MAX_BLOCKS); dim3 dimBlock(BLOCK_X, 1); dim3 dimGrid(gridx, 1); if (a_type->type_num == NPY_DOUBLE) { if (y_type->type_num == NPY_DOUBLE) { gpuLaunchKernel( elmenwise_mul_add_kernelx, dimGrid, dimBlock, 0, 0, n, (double*) x_gpu, (double*) y_gpu, (double*) c_gpu); } else { gpuLaunchKernel( elmenwise_mul_add_kernelxz, dimGrid, dimBlock, 0, 0, n, (double*) x_gpu, (gpuDoubleComplex*) y_gpu, (gpuDoubleComplex*) c_gpu); } } else { if (y_type->type_num == NPY_DOUBLE) { gpuLaunchKernel( elmenwise_mul_add_kernelxz, dimGrid, dimBlock, 0, 0, n, (double*) y_gpu, (gpuDoubleComplex*) x_gpu, (gpuDoubleComplex*) c_gpu); } else { gpuLaunchKernel( elmenwise_mul_add_kernelzz, dimGrid, dimBlock, 0, 0, n, (gpuDoubleComplex*) x_gpu, (gpuDoubleComplex*) y_gpu, (gpuDoubleComplex*) c_gpu); } } gpuCheckLastError(); if (PyErr_Occurred()) return NULL; else Py_RETURN_NONE; } extern "C" PyObject* multi_elementwise_multiply_add_gpu(PyObject *self, PyObject *args) { void *x_gpu; void *y_gpu; void *c_gpu; PyObject *x_shape, *y_shape, *shape; PyArray_Descr *x_type, *y_type; if (!PyArg_ParseTuple(args, "nOOnOOn", &x_gpu, &x_shape, &x_type, &y_gpu, &y_shape, &y_type, &c_gpu)) return NULL; Py_ssize_t x_nd = PyTuple_Size(x_shape); Py_ssize_t y_nd = PyTuple_Size(y_shape); shape = (x_nd > y_nd) ? x_shape : y_shape; int n = (int) PyLong_AsLong(PyTuple_GetItem(shape, 1)); Py_ssize_t nd=PyTuple_Size(shape); for (int d=2; d < nd; d++) n *= (int) PyLong_AsLong(PyTuple_GetItem(shape, d)); int nvec = (int) PyLong_AsLong(PyTuple_GetItem(shape, 0)); int gridx = MIN(MAX((n + BLOCK_X - 1) / BLOCK_X, 1), MAX_BLOCKS); dim3 dimBlock(BLOCK_X, 1); dim3 dimGrid(gridx, nvec); if (x_type->type_num == NPY_DOUBLE) { if (y_type->type_num == NPY_DOUBLE) { if (x_nd > y_nd) { gpuLaunchKernel( multi_elmenwise_mul_add_kernel1x, dimGrid, dimBlock, 0, 0, n, (double*) x_gpu, (double*) y_gpu, (double*) c_gpu); } else { gpuLaunchKernel( multi_elmenwise_mul_add_kernel2x, dimGrid, dimBlock, 0, 0, n, (double*) x_gpu, (double*) y_gpu, (double*) c_gpu); } } else { if (x_nd > y_nd) { gpuLaunchKernel( multi_elmenwise_mul_add_kernel1xz, dimGrid, dimBlock, 0, 0, n, (double*) x_gpu, (gpuDoubleComplex*) y_gpu, (gpuDoubleComplex*) c_gpu); } else { gpuLaunchKernel( multi_elmenwise_mul_add_kernel2xz, dimGrid, dimBlock, 0, 0, n, (double*) x_gpu, (gpuDoubleComplex*) y_gpu, (gpuDoubleComplex*) c_gpu); } } } else { if (y_type->type_num == NPY_DOUBLE) { if (y_nd > x_nd) { gpuLaunchKernel( multi_elmenwise_mul_add_kernel1xz, dimGrid, dimBlock, 0, 0, n, (double*) y_gpu, (gpuDoubleComplex*) x_gpu, (gpuDoubleComplex*) c_gpu); } else { gpuLaunchKernel( multi_elmenwise_mul_add_kernel2xz, dimGrid, dimBlock, 0, 0, n, (double*) y_gpu, (gpuDoubleComplex*) x_gpu, (gpuDoubleComplex*) c_gpu); } } else { if (x_nd > y_nd) { gpuLaunchKernel( multi_elmenwise_mul_add_kernel1zz, dimGrid, dimBlock, 0, 0, n, (gpuDoubleComplex*) x_gpu, (gpuDoubleComplex*) y_gpu, (gpuDoubleComplex*) c_gpu); } else { gpuLaunchKernel( multi_elmenwise_mul_add_kernel2zz, dimGrid, dimBlock, 0, 0, n, (gpuDoubleComplex*) x_gpu, (gpuDoubleComplex*) y_gpu, (gpuDoubleComplex*) c_gpu); } } } gpuCheckLastError(); if (PyErr_Occurred()) return NULL; else Py_RETURN_NONE; } extern "C" PyObject* ax2py_gpu(PyObject *self, PyObject *args) { double alpha; void *x_gpu; void *y_gpu; PyObject *x_shape, y_shape; PyArray_Descr *type; if (!PyArg_ParseTuple(args, "dnOnOO", &alpha, &x_gpu, &x_shape, &y_gpu, &y_shape, &type)) return NULL; int n = (int) PyLong_AsLong(PyTuple_GetItem(x_shape, 0)); Py_ssize_t nd = PyTuple_Size(x_shape); for (int d=1; d < nd; d++) n *= (int) PyLong_AsLong(PyTuple_GetItem(x_shape, d)); int gridx = MIN(MAX((n + BLOCK_X - 1) / BLOCK_X, 1), MAX_BLOCKS); dim3 dimBlock(BLOCK_X, 1); dim3 dimGrid(gridx, 1); if (type->type_num == NPY_DOUBLE) { gpuLaunchKernel( ax2py_kernel, dimGrid, dimBlock, 0, 0, n, alpha, (double*) x_gpu, (double*) y_gpu); } else { gpuLaunchKernel( ax2py_kernelz, dimGrid, dimBlock, 0, 0, n, alpha, (Tgpu*) x_gpu, (double*) y_gpu); } gpuCheckLastError(); if (PyErr_Occurred()) return NULL; else Py_RETURN_NONE; } extern "C" PyObject* csign_gpu(PyObject *self, PyObject *args) { void *x_gpu; PyObject *x_shape; PyArray_Descr *type; if (!PyArg_ParseTuple(args, "nOO", &x_gpu, &x_shape, &type)) return NULL; int n = (int) PyLong_AsLong(PyTuple_GetItem(x_shape, 0)); Py_ssize_t nd = PyTuple_Size(x_shape); for (int d=1; d < nd; d++) n *= (int) PyLong_AsLong(PyTuple_GetItem(x_shape, d)); int gridx = MIN(MAX((n + BLOCK_X - 1) / BLOCK_X, 1), MAX_BLOCKS); dim3 dimBlock(BLOCK_X, 1); dim3 dimGrid(gridx, 1); if (type->type_num == NPY_DOUBLE) { gpuLaunchKernel( csign_kernel, dimGrid, dimBlock, 0, 0, n, (double*) x_gpu); } else { gpuLaunchKernel( csign_kernelz, dimGrid, dimBlock, 0, 0, n, (Tgpu*) x_gpu); } gpuCheckLastError(); if (PyErr_Occurred()) return NULL; else Py_RETURN_NONE; } extern "C" PyObject* multi_ax2py_gpu(PyObject *self, PyObject *args) { void *alpha_gpu; void *x_gpu; void *y_gpu; PyObject *x_shape, *y_shape; PyArray_Descr *type; if (!PyArg_ParseTuple(args, "nnOnOO", &alpha_gpu, &x_gpu, &x_shape, &y_gpu, &y_shape, &type)) return NULL; int n = (int) PyLong_AsLong(PyTuple_GetItem(x_shape, 1)); Py_ssize_t nd = PyTuple_Size(x_shape); for (int d=2; d < nd; d++) n *= (int) PyLong_AsLong(PyTuple_GetItem(x_shape, d)); int nvec = (int) PyLong_AsLong(PyTuple_GetItem(x_shape, 0)); if (type->type_num == NPY_DOUBLE) { double *alpha = (double*) alpha_gpu; int gridx = MIN(MAX((n + BLOCK_X - 1) / BLOCK_X, 1), MAX_BLOCKS); int gridy = 1; dim3 dimBlock(BLOCK_X, 1); dim3 dimGrid(gridx, gridy); gpuLaunchKernel( multi_ax2py_kernel, dimGrid, dimBlock, 0, 0, n, nvec, alpha, (double*) x_gpu, (double*) y_gpu); } else { double *alpha = (double*) alpha_gpu; int gridx = MIN(MAX((n + BLOCK_X - 1) / BLOCK_X, 1), MAX_BLOCKS); int gridy = 1; dim3 dimBlock(BLOCK_X, 1); dim3 dimGrid(gridx, gridy); gpuLaunchKernel( multi_ax2py_kernelz, dimGrid, dimBlock, 0, 0, n, nvec, alpha, (gpuDoubleComplex*) x_gpu, (double*) y_gpu); } gpuCheckLastError(); if (PyErr_Occurred()) return NULL; else Py_RETURN_NONE; } #endif gpaw-24.1.0/c/gpu/kernels/mblas.cpp000066400000000000000000000171141454550013000170170ustar00rootroot00000000000000#include #define PY_ARRAY_UNIQUE_SYMBOL GPAW_ARRAY_API #define NO_IMPORT_ARRAY #include #include #include #include #include #include #include #include #include #include #include "../gpu-complex.h" #ifndef GPU_USE_COMPLEX # define MBLAS_BLOCK_X (128) # define MAX_BLOCKS (65535) # define MIN_BLOCKS (MAX_BLOCKS) #endif #define MAPNAME(f) Zgpu(f ## _dotu) #define MAPFUNC(a,b) MULTT((a), (b)) #include "reduce.cpp" #undef MAPNAME #undef MAPFUNC #define MAPNAME(f) Zgpu(f ## _dotc) #define MAPFUNC(a,b) MULTT(CONJ(a), (b)) #include "reduce.cpp" #undef MAPNAME #undef MAPFUNC __global__ void Zgpu(multi_scal_kernel)(int n, const Tgpu *alpha, Tgpu *a) { int i = blockIdx.x * MBLAS_BLOCK_X + threadIdx.x; int k = blockIdx.y; a += n * k; while (i < n) { a[i] = MULTT(a[i], alpha[k]); i += gridDim.x * MBLAS_BLOCK_X; } } __global__ void Zgpu(multi_axpy_kernel)(int n, const Tgpu *alpha, const Tgpu *a, Tgpu *b) { int k = blockIdx.y; int i = blockIdx.x * MBLAS_BLOCK_X + threadIdx.x; a += n * k; b += n * k; while (i < n) { IADD(b[i], MULTT(a[i], alpha[k])); i += gridDim.x * MBLAS_BLOCK_X; } } #ifndef GPU_USE_COMPLEX #define GPU_USE_COMPLEX #include "mblas.cpp" extern "C" PyObject* multi_scal_gpu(PyObject *self, PyObject *args) { void *alpha_gpu; void *x_gpu; PyObject *x_shape; PyArray_Descr *type, *a_type; if (!PyArg_ParseTuple(args, "nOnOO", &alpha_gpu, &a_type, &x_gpu, &x_shape, &type)) return NULL; int n = (int) PyLong_AsLong(PyTuple_GetItem(x_shape, 1)); Py_ssize_t nd = PyTuple_Size(x_shape); for (int d=2; d < nd; d++) { n *= (int) PyLong_AsLong(PyTuple_GetItem(x_shape, d)); } int nvec = (int) PyLong_AsLong(PyTuple_GetItem(x_shape, 0)); if (type->type_num == NPY_DOUBLE) { int gridx = MIN(MAX((n + MBLAS_BLOCK_X - 1) / MBLAS_BLOCK_X, 1), MAX_BLOCKS); int gridy = nvec; dim3 dimBlock(MBLAS_BLOCK_X, 1); dim3 dimGrid(gridx, gridy); gpuLaunchKernel( multi_scal_kernel, dimGrid, dimBlock, 0, 0, n, (double *) alpha_gpu, (double*) x_gpu); } else if (a_type->type_num == NPY_DOUBLE) { double *alpha = (double*) (alpha_gpu); int gridx = MIN(MAX((2 * n + MBLAS_BLOCK_X - 1) / MBLAS_BLOCK_X, 1), MAX_BLOCKS); int gridy = nvec; dim3 dimBlock(MBLAS_BLOCK_X, 1); dim3 dimGrid(gridx, gridy); gpuLaunchKernel( multi_scal_kernel, dimGrid, dimBlock, 0, 0, 2 * n, alpha, (double *) x_gpu); } else { gpuDoubleComplex *alpha = (gpuDoubleComplex*) (alpha_gpu); int gridx = MIN(MAX((n + MBLAS_BLOCK_X - 1) / MBLAS_BLOCK_X, 1), MAX_BLOCKS); int gridy = nvec; dim3 dimBlock(MBLAS_BLOCK_X, 1); dim3 dimGrid(gridx, gridy); gpuLaunchKernel( multi_scal_kernelz, dimGrid, dimBlock, 0, 0, n, alpha, (gpuDoubleComplex*) x_gpu); } gpuCheckLastError(); if (PyErr_Occurred()) return NULL; else Py_RETURN_NONE; } extern "C" PyObject* multi_axpy_gpu(PyObject *self, PyObject *args) { void *alpha_gpu; void *x_gpu; void *y_gpu; PyObject *x_shape, *y_shape; PyArray_Descr *type, *a_type; if (!PyArg_ParseTuple(args, "nOnOnOO", &alpha_gpu, &a_type, &x_gpu, &x_shape, &y_gpu, &y_shape, &type)) return NULL; int n = (int) PyLong_AsLong(PyTuple_GetItem(x_shape, 1)); Py_ssize_t nd = PyTuple_Size(x_shape); for (int d=2; d < nd; d++) { n *= (int) PyLong_AsLong(PyTuple_GetItem(x_shape, d)); } int nvec = (int) PyLong_AsLong(PyTuple_GetItem(x_shape, 0)); if (type->type_num == NPY_DOUBLE) { double *alpha = (double*) alpha_gpu; int gridx = MIN(MAX((n + MBLAS_BLOCK_X - 1) / MBLAS_BLOCK_X, 1), MAX_BLOCKS); int gridy = nvec; dim3 dimBlock(MBLAS_BLOCK_X, 1); dim3 dimGrid(gridx, gridy); gpuLaunchKernel( multi_axpy_kernel, dimGrid, dimBlock, 0, 0, n, alpha, (double*) x_gpu, (double*) y_gpu); } else if (a_type->type_num == NPY_DOUBLE) { double *alpha = (double*) alpha_gpu; int gridx = MIN(MAX((2 * n + MBLAS_BLOCK_X - 1) / MBLAS_BLOCK_X, 1), MAX_BLOCKS); int gridy = nvec; dim3 dimBlock(MBLAS_BLOCK_X, 1); dim3 dimGrid(gridx, gridy); gpuLaunchKernel( multi_axpy_kernel, dimGrid, dimBlock, 0, 0, 2 * n, alpha, (double*) x_gpu, (double*) y_gpu); } else { gpuDoubleComplex *alpha = (gpuDoubleComplex*) alpha_gpu; int gridx = MIN(MAX((n + MBLAS_BLOCK_X - 1) / MBLAS_BLOCK_X, 1), MAX_BLOCKS); int gridy = nvec; dim3 dimBlock(MBLAS_BLOCK_X, 1); dim3 dimGrid(gridx, gridy); gpuLaunchKernel( multi_axpy_kernelz, dimGrid, dimBlock, 0, 0, n, alpha, (gpuDoubleComplex*) x_gpu, (gpuDoubleComplex*) y_gpu); } gpuCheckLastError(); if (PyErr_Occurred()) return NULL; else Py_RETURN_NONE; } extern "C" PyObject* multi_dotu_gpu(PyObject *self, PyObject *args) { void *a_gpu; void *b_gpu; void *res_gpu; PyObject *a_shape; PyArray_Descr *type; if (!PyArg_ParseTuple(args, "nOnOn", &a_gpu, &a_shape, &b_gpu, &type, &res_gpu)) return NULL; int n = (int) PyLong_AsLong(PyTuple_GetItem(a_shape, 1)); for (int i=2; i < PyTuple_Size(a_shape); i++) { n *= (int) PyLong_AsLong(PyTuple_GetItem(a_shape, i)); } int nvec = (int) PyLong_AsLong(PyTuple_GetItem(a_shape, 0)); if (type->type_num == NPY_DOUBLE) { double *result = (double *) res_gpu; reducemap_dotu((double*) a_gpu, (double*) b_gpu, result, n, nvec); } else { gpuDoubleComplex *result = (gpuDoubleComplex *) res_gpu; reducemap_dotuz((gpuDoubleComplex *) a_gpu, (gpuDoubleComplex *) b_gpu, result, n, nvec); } gpuCheckLastError(); if (PyErr_Occurred()) return NULL; else Py_RETURN_NONE; } extern "C" PyObject* multi_dotc_gpu(PyObject *self, PyObject *args) { void *a_gpu; void *b_gpu; void *res_gpu; PyObject *a_shape; PyArray_Descr *type; if (!PyArg_ParseTuple(args, "nOnOn", &a_gpu, &a_shape, &b_gpu, &type, &res_gpu)) return NULL; int n = (int) PyLong_AsLong(PyTuple_GetItem(a_shape, 1)); Py_ssize_t nd=PyTuple_Size(a_shape); for (int i = 2; i < nd; i++) { n *= (int) PyLong_AsLong(PyTuple_GetItem(a_shape, i)); } int nvec = (int) PyLong_AsLong(PyTuple_GetItem(a_shape, 0)); if (type->type_num == NPY_DOUBLE) { double *result = (double *) res_gpu; reducemap_dotc((double*) a_gpu, (double*) b_gpu, result, n, nvec); } else { gpuDoubleComplex *result = (gpuDoubleComplex *) res_gpu; reducemap_dotcz((gpuDoubleComplex*) a_gpu, (gpuDoubleComplex*) b_gpu, result, n, nvec); } gpuCheckLastError(); if (PyErr_Occurred()) return NULL; else Py_RETURN_NONE; } #endif gpaw-24.1.0/c/gpu/kernels/paste.cpp000066400000000000000000000170641454550013000170410ustar00rootroot00000000000000#include #include #include #include #include "../gpu.h" #include "../gpu-complex.h" #ifndef GPU_USE_COMPLEX # define BLOCK_SIZEX 32 # define BLOCK_SIZEY 16 # define BLOCK_MAX 32 # define GRID_MAX 65535 # define BLOCK_TOTALMAX 512 # define XDIV 4 # define Tfunc launch_func typedef void (*launch_func)(const double *, const int *, double *, const int *, const int *, int, gpuStream_t); typedef void (*launch_funcz)(const gpuDoubleComplex *, const int *, gpuDoubleComplex *, const int *, const int *, int, gpuStream_t); #else # undef Tfunc # define Tfunc launch_funcz #endif /* * GPU kernel to copy a smaller array into a given position in a * larger one. */ __global__ void Zgpu(bmgs_paste_kernel)( const double* a, const int3 c_sizea, double* b, const int3 c_sizeb, int blocks, int xdiv) { int xx = gridDim.x / xdiv; int yy = gridDim.y / blocks; int blocksi = blockIdx.y / yy; int i1 = (blockIdx.y - blocksi * yy) * blockDim.y + threadIdx.y; int xind = blockIdx.x / xx; int i2 = (blockIdx.x - xind * xx) * blockDim.x + threadIdx.x; b += i2 + (i1 + (xind + blocksi * c_sizeb.x) * c_sizeb.y) * c_sizeb.z; a += i2 + (i1 + (xind + blocksi * c_sizea.x) * c_sizea.y) * c_sizea.z; while (xind < c_sizea.x) { if ((i2 < c_sizea.z) && (i1 < c_sizea.y)) { b[0] = a[0]; } b += xdiv * c_sizeb.y * c_sizeb.z; a += xdiv * c_sizea.y * c_sizea.z; xind += xdiv; } } /* * GPU kernel to copy a smaller array into a given position in a * larger one and set all other elements to 0. */ __global__ void Zgpu(bmgs_paste_zero_kernel)( const Tgpu* a, const int3 c_sizea, Tgpu* b, const int3 c_sizeb, const int3 c_startb, const int3 c_blocks_bc, int blocks) { int xx = gridDim.x / XDIV; int yy = gridDim.y / blocks; int blocksi = blockIdx.y / yy; int i1bl = blockIdx.y - blocksi * yy; int i1tid = threadIdx.y; int i1 = i1bl * BLOCK_SIZEY + i1tid; int xind = blockIdx.x / xx; int i2bl = blockIdx.x - xind * xx; int i2tid = threadIdx.x; int i2 = i2bl * BLOCK_SIZEX + i2tid; int xlen = (c_sizea.x + XDIV - 1) / XDIV; int xstart = xind * xlen; int xend = MIN(xstart + xlen, c_sizea.x); b += c_sizeb.x * c_sizeb.y * c_sizeb.z * blocksi; a += c_sizea.x * c_sizea.y * c_sizea.z * blocksi; // zero x = 0 .. startb.x if (xind==0) { Tgpu *bb = b + i2 + i1 * c_sizeb.z; #pragma unroll 3 for (int i0=0; i0 < c_startb.x; i0++) { if ((i2 < c_sizeb.z) && (i1 < c_sizeb.y)) { bb[0] = MAKED(0); } bb += c_sizeb.y * c_sizeb.z; } } // zero x = startb.x+sizea.x .. if (xind == XDIV - 1) { Tgpu *bb = b + (c_startb.x + c_sizea.x) * c_sizeb.y * c_sizeb.z + i2 + i1 * c_sizeb.z; #pragma unroll 3 for (int i0 = c_startb.x + c_sizea.x; i0 < c_sizeb.x; i0++) { if ((i2 < c_sizeb.z) && (i1 < c_sizeb.y)) { bb[0] = MAKED(0); } bb += c_sizeb.y * c_sizeb.z; } } int i1blbc = gridDim.y / blocks - i1bl - 1; int i2blbc = gridDim.x / XDIV - i2bl - 1; if (i1blbc if ((i1bc + c_sizea.y + c_startb.y < c_sizeb.y) && (i2 < c_sizeb.z)) { b[i2 + i1bc * c_sizeb.z + (c_sizea.y + c_startb.y) * c_sizeb.z] = MAKED(0); } // zero z = 0 .. startb.z if ((i2bc < c_startb.z) && (i1 < c_sizeb.y)) { b[i2bc + i1 * c_sizeb.z] = MAKED(0); } // zero z = startb.z+sizea.z .. if ((i2bc + c_sizea.z + c_startb.z < c_sizeb.z) && (i1 < c_sizeb.y)) { b[i2bc + i1 * c_sizeb.z + c_sizea.z + c_startb.z] = MAKED(0); } b += c_sizeb.y * c_sizeb.z; } } else { b += c_startb.z + (c_startb.y + c_startb.x * c_sizeb.y) * c_sizeb.z; b += i2 + i1 * c_sizeb.z + xstart * c_sizeb.y * c_sizeb.z; a += i2 + i1 * c_sizea.z + xstart * c_sizea.y * c_sizea.z; for (int i0=xstart; i0 < xend; i0++) { if ((i2 < c_sizea.z) && (i1 < c_sizea.y)) { b[0] = a[0]; } b += c_sizeb.y * c_sizeb.z; a += c_sizea.y * c_sizea.z; } } } /* * Copy a smaller array into a given position in a larger one. * * For example: * . . . . * a = 1 2 -> b = . 1 2 . * 3 4 . 3 4 . * . . . . */ extern "C" void Zgpu(bmgs_paste_gpu)(const Tgpu* a, const int sizea[3], Tgpu* b, const int sizeb[3], const int startb[3], int blocks, gpuStream_t stream) { if (!(sizea[0] && sizea[1] && sizea[2])) return; int3 hc_sizea, hc_sizeb; hc_sizea.x = sizea[0]; hc_sizea.y = sizea[1]; hc_sizea.z = sizea[2] * sizeof(Tgpu) / sizeof(double); hc_sizeb.x = sizeb[0]; hc_sizeb.y = sizeb[1]; hc_sizeb.z = sizeb[2] * sizeof(Tgpu) / sizeof(double); BLOCK_GRID(hc_sizea); b += startb[2] + (startb[1] + startb[0] * sizeb[1]) * sizeb[2]; gpuLaunchKernel( Zgpu(bmgs_paste_kernel), dimGrid, dimBlock, 0, stream, (double*) a, hc_sizea, (double*) b, hc_sizeb, blocks, xdiv); gpuCheckLastError(); } /* * Copy a smaller array into a given position in a larger one and * set all other elements to 0. */ extern "C" void Zgpu(bmgs_paste_zero_gpu)(const Tgpu* a, const int sizea[3], Tgpu* b, const int sizeb[3], const int startb[3], int blocks, gpuStream_t stream) { if (!(sizea[0] && sizea[1] && sizea[2])) return; int3 bc_blocks; int3 hc_sizea, hc_sizeb, hc_startb; hc_sizea.x = sizea[0]; hc_sizea.y = sizea[1]; hc_sizea.z = sizea[2]; hc_sizeb.x = sizeb[0]; hc_sizeb.y = sizeb[1]; hc_sizeb.z = sizeb[2]; hc_startb.x = startb[0]; hc_startb.y = startb[1]; hc_startb.z = startb[2]; bc_blocks.y = hc_sizeb.y - hc_sizea.y > 0 ? MAX((hc_sizeb.y - hc_sizea.y + BLOCK_SIZEY - 1) / BLOCK_SIZEY, 1) : 0; bc_blocks.z = hc_sizeb.z - hc_sizea.z > 0 ? MAX((hc_sizeb.z - hc_sizea.z + BLOCK_SIZEX - 1) / BLOCK_SIZEX, 1) : 0; int gridy = blocks * ((sizeb[1] + BLOCK_SIZEY - 1) / BLOCK_SIZEY + bc_blocks.y); int gridx = XDIV * ((sizeb[2] + BLOCK_SIZEX - 1) / BLOCK_SIZEX + bc_blocks.z); dim3 dimBlock(BLOCK_SIZEX, BLOCK_SIZEY); dim3 dimGrid(gridx, gridy); gpuLaunchKernel( Zgpu(bmgs_paste_zero_kernel), dimGrid, dimBlock, 0, stream, (Tgpu*) a, hc_sizea, (Tgpu*) b, hc_sizeb, hc_startb, bc_blocks, blocks); gpuCheckLastError(); } #ifndef GPU_USE_COMPLEX #define GPU_USE_COMPLEX #include "paste.cpp" #endif gpaw-24.1.0/c/gpu/kernels/pwlfc-expand.cpp000066400000000000000000000441551454550013000203160ustar00rootroot00000000000000#include "../gpu.h" #include "../gpu-complex.h" #include "numpy/arrayobject.h" #include "assert.h" #define BETA 0.066725 #define GAMMA 0.031091 #define MU 0.2195164512208958 // PBE mod in libxc //#define MU 0.2195149727645171 from libxc #define C2 0.26053088059892404 #define C0I 0.238732414637843 #define C1 -0.45816529328314287 #define CC1 1.9236610509315362 #define CC2 2.5648814012420482 #define IF2 0.58482236226346462 #define C3 0.10231023756535741 #define C0 4.1887902047863905 #define THIRD 0.33333333333333333 #define NMIN 1.0E-10 __global__ void calculate_residual_kernel_complex(int nG, int nn, gpuDoubleComplex* residual_nG, double* eps_n, gpuDoubleComplex* wf_nG) { int n = threadIdx.x + blockIdx.x * blockDim.x; int g = threadIdx.y + blockIdx.y * blockDim.y; if ((g < nG) && (n < nn)) { residual_nG[n*nG + g] = gpuCsub(residual_nG[n*nG + g], gpuCmulD(wf_nG[n*nG + g], eps_n[n])); } } __global__ void calculate_residual_kernel_real(int nG, int nn, double* residual_nG, double* eps_n, double* wf_nG) { int n = threadIdx.x + blockIdx.x * blockDim.x; int g = threadIdx.y + blockIdx.y * blockDim.y; if ((g < nG) && (n < nn)) { residual_nG[n*nG + g] -= eps_n[n] * wf_nG[n*nG + g]; } } extern "C" void calculate_residual_launch_kernel(int nG, int nn, double* residual_nG, double* eps_n, double* wf_nG, int is_complex) { if (is_complex) { gpuLaunchKernel(calculate_residual_kernel_complex, dim3((nn+15)/16, (nG+15)/16), dim3(16, 16), 0, 0, nG, nn, (gpuDoubleComplex*) residual_nG, eps_n, (gpuDoubleComplex*) wf_nG); } else { gpuLaunchKernel(calculate_residual_kernel_real, dim3((nn+15)/16, (nG+15)/16), dim3(16, 16), 0, 0, nG, nn, residual_nG, eps_n, wf_nG); } } template __device__ double pbe_exchange(double n, double rs, double a2, double* dedrs, double* deda2) { double e = C1 / rs; *dedrs = -e / rs; if (gga) { double kappa = 0.804; double c = C2 * rs / n; c *= c; double s2 = a2 * c; double x = 1.0 + MU * s2 / kappa; double Fx = 1.0 + kappa - kappa / x; double dFxds2 = MU / (x * x); double ds2drs = 8.0 * c * a2 / rs; *dedrs = *dedrs * Fx + e * dFxds2 * ds2drs; *deda2 = e * dFxds2 * c; e *= Fx; } return e; } __device__ double G(double rtrs, double A, double alpha1, double beta1, double beta2, double beta3, double beta4, double* dGdrs) { double Q0 = -2.0 * A * (1.0 + alpha1 * rtrs * rtrs); double Q1 = 2.0 * A * rtrs * (beta1 + rtrs * (beta2 + rtrs * (beta3 + rtrs * beta4))); double G1 = Q0 * log(1.0 + 1.0 / Q1); double dQ1drs = A * (beta1 / rtrs + 2.0 * beta2 + rtrs * (3.0 * beta3 + 4.0 * beta4 * rtrs)); *dGdrs = -2.0 * A * alpha1 * G1 / Q0 - Q0 * dQ1drs / (Q1 * (Q1 + 1.0)); return G1; } template __device__ double pbe_correlation(double n, double rs, double zeta, double a2, double* dedrs, double* dedzeta, double* deda2) { bool spinpol = nspin == 2; double rtrs = sqrt(rs); double de0drs; double e0 = G(rtrs, GAMMA, 0.21370, 7.5957, 3.5876, 1.6382, 0.49294, &de0drs); double e; double xp = 117.0; double xm = 117.0; if (spinpol) { double de1drs; double e1 = G(rtrs, 0.015545, 0.20548, 14.1189, 6.1977, 3.3662, 0.62517, &de1drs); double dalphadrs; double alpha = -G(rtrs, 0.016887, 0.11125, 10.357, 3.6231, 0.88026, 0.49671, &dalphadrs); dalphadrs = -dalphadrs; double zp = 1.0 + zeta; double zm = 1.0 - zeta; xp = pow(zp, THIRD); xm = pow(zm, THIRD); double f = CC1 * (zp * xp + zm * xm - 2.0); double f1 = CC2 * (xp - xm); double zeta2 = zeta * zeta; double zeta3 = zeta2 * zeta; double zeta4 = zeta2 * zeta2; double x = 1.0 - zeta4; *dedrs = (de0drs * (1.0 - f * zeta4) + de1drs * f * zeta4 + dalphadrs * f * x * IF2); *dedzeta = (4.0 * zeta3 * f * (e1 - e0 - alpha * IF2) + f1 * (zeta4 * e1 - zeta4 * e0 + x * alpha * IF2)); e = e0 + alpha * IF2 * f * x + (e1 - e0) * f * zeta4; } else { *dedrs = de0drs; e = e0; } if (gga) { double n2 = n * n; double t2; double y; double phi = 117.0; double phi2 = 117.0; double phi3 = 117.0; if (spinpol) { phi = 0.5 * (xp * xp + xm * xm); phi2 = phi * phi; phi3 = phi * phi2; t2 = C3 * a2 * rs / (n2 * phi2); y = -e / (GAMMA * phi3); } else { t2 = C3 * a2 * rs / n2; y = -e / GAMMA; } double x = exp(y); double A; if (x != 1.0) A = BETA / (GAMMA * (x - 1.0)); else A = BETA / (GAMMA * y); double At2 = A * t2; double nom = 1.0 + At2; double denom = nom + At2 * At2; double H = GAMMA * log( 1.0 + BETA * t2 * nom / (denom * GAMMA)); double tmp = (GAMMA * BETA / (denom * (BETA * t2 * nom + GAMMA * denom))); double tmp2 = A * A * x / BETA; double dAdrs = tmp2 * *dedrs; if (spinpol) { H *= phi3; tmp *= phi3; dAdrs /= phi3; } double dHdt2 = (1.0 + 2.0 * At2) * tmp; double dHdA = -At2 * t2 * t2 * (2.0 + At2) * tmp; *dedrs += dHdt2 * 7 * t2 / rs + dHdA * dAdrs; *deda2 = dHdt2 * C3 * rs / n2; if (spinpol) { double dphidzeta = (1.0 / xp - 1.0 / xm) / 3.0; double dAdzeta = tmp2 * (*dedzeta - 3.0 * e * dphidzeta / phi) / phi3; *dedzeta += ((3.0 * H / phi - dHdt2 * 2.0 * t2 / phi ) * dphidzeta + dHdA * dAdzeta); *deda2 /= phi2; } e += H; } return e; } template __global__ void evaluate_ldaorgga_kernel(int ng, double* n_sg, double* v_sg, double* e_g, double* sigma_xg, double* dedsigma_xg) { int g = threadIdx.x + blockIdx.x * blockDim.x; if (g >= ng) { return; } if (nspin == 1) { double n = n_sg[g]; if (n < NMIN) n = NMIN; double rs = pow(C0I / n, THIRD); double dexdrs; double dexda2; double ex; double decdrs; double decda2; double ec; if (gga) { double a2 = sigma_xg[g]; ex = pbe_exchange(n, rs, a2, &dexdrs, &dexda2); ec = pbe_correlation(n, rs, 0.0, a2, &decdrs, 0, &decda2); dedsigma_xg[g] = n * (dexda2 + decda2); } else { ex = pbe_exchange(n, rs, 0.0, &dexdrs, 0); ec = pbe_correlation(n, rs, 0.0, 0.0, &decdrs, 0, 0); } e_g[g] = n * (ex + ec); v_sg[g] += ex + ec - rs * (dexdrs + decdrs) / 3.0; } else { const double* na_g = n_sg; double* va_g = v_sg; const double* nb_g = na_g + ng; double* vb_g = va_g + ng; const double* sigma0_g = 0; const double* sigma1_g = 0; const double* sigma2_g = 0; double* dedsigma0_g = 0; double* dedsigma1_g = 0; double* dedsigma2_g = 0; if (gga) { sigma0_g = sigma_xg; sigma1_g = sigma0_g + ng; sigma2_g = sigma1_g + ng; dedsigma0_g = dedsigma_xg; dedsigma1_g = dedsigma0_g + ng; dedsigma2_g = dedsigma1_g + ng; } double na = 2.0 * na_g[g]; if (na < NMIN) na = NMIN; double rsa = pow(C0I / na, THIRD); double nb = 2.0 * nb_g[g]; if (nb < NMIN) nb = NMIN; double rsb = pow(C0I / nb, THIRD); double n = 0.5 * (na + nb); double rs = pow(C0I / n, THIRD); double zeta = 0.5 * (na - nb) / n; double dexadrs; double dexada2; double exa; double dexbdrs; double dexbda2; double exb; double decdrs; double decdzeta; double decda2; double ec; if (gga) { exa = pbe_exchange(na, rsa, 4.0 * sigma0_g[g], &dexadrs, &dexada2); exb = pbe_exchange(nb, rsb, 4.0 * sigma2_g[g], &dexbdrs, &dexbda2); double a2 = sigma0_g[g] + 2 * sigma1_g[g] + sigma2_g[g]; ec = pbe_correlation(n, rs, zeta, a2, &decdrs, &decdzeta, &decda2); dedsigma0_g[g] = 2 * na * dexada2 + n * decda2; dedsigma1_g[g] = 2 * n * decda2; dedsigma2_g[g] = 2 * nb * dexbda2 + n * decda2; } else { exa = pbe_exchange(na, rsa, 0.0, &dexadrs, 0); exb = pbe_exchange(nb, rsb, 0.0, &dexbdrs, 0); ec = pbe_correlation(n, rs, zeta, 0.0, &decdrs, &decdzeta, 0); } e_g[g] = 0.5 * (na * exa + nb * exb) + n * ec; va_g[g] += (exa + ec - (rsa * dexadrs + rs * decdrs) / 3.0 - (zeta - 1.0) * decdzeta); vb_g[g] += (exb + ec - (rsb * dexbdrs + rs * decdrs) / 3.0 - (zeta + 1.0) * decdzeta); } } // The define wrappers do not allow special characters for the first argument // Hence, here defining an expression in such way, that the first argument can be // a well defined identifier, and the preprocessor macro parses it correctly. constexpr void(*LDA_SPINPAIRED)(int, double*, double*, double*, double*, double*) = &evaluate_ldaorgga_kernel<1, false>; constexpr void(*LDA_SPINPOLARIZED)(int, double*, double*, double*, double*, double*) = &evaluate_ldaorgga_kernel<2, false>; constexpr void(*PBE_SPINPAIRED)(int, double*, double*, double*, double*, double*) = &evaluate_ldaorgga_kernel<1, true>; constexpr void(*PBE_SPINPOLARIZED)(int, double*, double*, double*, double*, double*) = &evaluate_ldaorgga_kernel<2, true>; extern "C" void evaluate_pbe_launch_kernel(int nspin, int ng, double* n, double* v, double* e, double* sigma, double* dedsigma) { if (nspin == 1) { gpuLaunchKernel(PBE_SPINPAIRED, dim3((ng+255)/256), dim3(256), 0, 0, ng, n, v, e, sigma, dedsigma); } else if (nspin == 2) { gpuLaunchKernel(PBE_SPINPOLARIZED, dim3((ng+255)/256), dim3(256), 0, 0, ng, n, v, e, sigma, dedsigma); } } extern "C" void evaluate_lda_launch_kernel(int nspin, int ng, double* n, double* v, double* e) { if (nspin == 1) { gpuLaunchKernel(LDA_SPINPAIRED, dim3((ng+255)/256), dim3(256), 0, 0, ng, n, v, e, NULL, NULL); } else if (nspin == 2) { gpuLaunchKernel(LDA_SPINPOLARIZED, dim3((ng+255)/256), dim3(256), 0, 0, ng, n, v, e, NULL, NULL); } } __global__ void pw_insert_many_16(int nb, int nG, int nQ, gpuDoubleComplex* c_nG, npy_int32* Q_G, double scale, gpuDoubleComplex* tmp_nQ) { int G = threadIdx.x + blockIdx.x * blockDim.x; int b = threadIdx.y + blockIdx.y * blockDim.y; __shared__ npy_int32 locQ_G[16]; if (threadIdx.y == 0) locQ_G[threadIdx.x] = Q_G[G]; __syncthreads(); if ((G < nG) && (b < nb)) { npy_int32 Q = locQ_G[threadIdx.x]; tmp_nQ[Q + b * nQ] = gpuCmulD(c_nG[G + b * nG], scale); } } __global__ void add_to_density_8(int nb, int nR, double* f_n, double* psit_nR, double* rho_R) { //int b = threadIdx.x + blockIdx.x * blockDim.x; int R = threadIdx.x + blockIdx.x * blockDim.x; if (R < nR) { double rho = 0.0; for (int b=0; b< nb; b++) { int idx = b * nR + R; rho += f_n[b] * (psit_nR[idx] * psit_nR[idx]); } rho_R[R] += rho; } } __global__ void add_to_density_16(int nb, int nR, double* f_n, gpuDoubleComplex* psit_nR, double* rho_R) { //int b = threadIdx.x + blockIdx.x * blockDim.x; int R = threadIdx.x + blockIdx.x * blockDim.x; if (R < nR) { double rho = 0.0; for (int b=0; b< nb; b++) { int idx = b * nR + R; rho += f_n[b] * (psit_nR[idx].x * psit_nR[idx].x + psit_nR[idx].y * psit_nR[idx].y); } rho_R[R] += rho; } } __global__ void pw_insert_16(int nG, int nQ, gpuDoubleComplex* c_G, npy_int32* Q_G, double scale, gpuDoubleComplex* tmp_Q) { int G = threadIdx.x + blockIdx.x * blockDim.x; if (G < nG) tmp_Q[Q_G[G]] = gpuCmulD(c_G[G], scale); } extern "C" void gpawDeviceSynchronize() { gpuDeviceSynchronize(); } extern "C" void add_to_density_gpu_launch_kernel(int nb, int nR, double* f_n, gpuDoubleComplex* psit_nR, double* rho_R, int wfs_is_complex) { if (wfs_is_complex) gpuLaunchKernel(add_to_density_16, dim3((nR+255)/256), dim3(256), 0, 0, nb, nR, f_n, psit_nR, rho_R); else gpuLaunchKernel(add_to_density_8, dim3((nR+255)/256), dim3(256), 0, 0, nb, nR, f_n, (double*) psit_nR, rho_R); } extern "C" void pw_insert_gpu_launch_kernel( int nb, int nG, int nQ, double* c_nG, npy_int32* Q_G, double scale, double* tmp_nQ) { if (nb == 1) { gpuLaunchKernel(pw_insert_16, dim3((nG+15)/16, (nb+15)/16), dim3(16, 16), 0, 0, nG, nQ, (gpuDoubleComplex*) c_nG, Q_G, scale, (gpuDoubleComplex*) tmp_nQ); } else { gpuLaunchKernel(pw_insert_many_16, dim3((nG+15)/16, (nb+15)/16), dim3(16, 16), 0, 0, nb, nG, nQ, (gpuDoubleComplex*) c_nG, Q_G, scale, (gpuDoubleComplex*) tmp_nQ); } } __global__ void pwlfc_expand_kernel_8(double* f_Gs, gpuDoubleComplex *emiGR_Ga, double *Y_GL, int* l_s, int* a_J, int* s_J, int* I_J, double* f_GI, int nG, int nJ, int nL, int nI, int natoms, int nsplines, bool cc) { int G = threadIdx.x + blockIdx.x * blockDim.x; int J = threadIdx.y + blockIdx.y * blockDim.y; gpuDoubleComplex imag_powers[4] = {make_gpuDoubleComplex(1.0,0), make_gpuDoubleComplex(0.0,-1.0), make_gpuDoubleComplex(-1.0,0), make_gpuDoubleComplex(0, 1.0)}; if ((G < nG) && (J < nJ)) { f_Gs += G*nsplines; emiGR_Ga += G*natoms; Y_GL += G*nL; f_GI += G*nI*2 + I_J[J]; int s = s_J[J]; int l = l_s[s]; gpuDoubleComplex f1 = gpuCmulD(gpuCmul(emiGR_Ga[a_J[J]], imag_powers[l % 4]), f_Gs[s]); for (int m = 0; m < 2 * l + 1; m++) { gpuDoubleComplex f = gpuCmulD(f1, Y_GL[l * l + m]); f_GI[0] = f.x; f_GI[nI] = cc ? -f.y : f.y; f_GI++; } } } __global__ void pwlfc_expand_kernel_16(double* f_Gs, gpuDoubleComplex *emiGR_Ga, double *Y_GL, int* l_s, int* a_J, int* s_J, int* I_J, double* f_GI, int nG, int nJ, int nL, int nI, int natoms, int nsplines, bool cc) { int G = threadIdx.x + blockIdx.x * blockDim.x; int J = threadIdx.y + blockIdx.y * blockDim.y; gpuDoubleComplex imag_powers[4] = {make_gpuDoubleComplex(1.0,0), make_gpuDoubleComplex(0.0,-1.0), make_gpuDoubleComplex(-1.0,0), make_gpuDoubleComplex(0, 1.0)}; if ((G < nG) && (J < nJ)) { f_Gs += G*nsplines; emiGR_Ga += G*natoms; Y_GL += G*nL; f_GI += (G*nI + I_J[J])*2; int s = s_J[J]; int l = l_s[s]; gpuDoubleComplex f1 = gpuCmulD(gpuCmul(emiGR_Ga[a_J[J]], imag_powers[l % 4]), f_Gs[s]); for (int m = 0; m < 2 * l + 1; m++) { gpuDoubleComplex f = gpuCmulD(f1, Y_GL[l * l + m]); *f_GI++ = f.x; *f_GI++ = cc ? -f.y : f.y; } } } // outP_ani[a] = \sum_A H_aii[a] P_ani[a] __global__ void dH_aii_times_P_ani_16(int nA, int nn, int nI, npy_int32* ni_a, double* dH_aii_dev, gpuDoubleComplex* P_ani_dev, gpuDoubleComplex* outP_ani_dev) { int n1 = threadIdx.x + blockIdx.x * blockDim.x; if (n1 < nn) { double* dH_ii = dH_aii_dev; int I = 0; for (int a=0; a< nA; a++) { int ni = ni_a[a]; int Istart = I; for (int i=0; i< ni; i++) { gpuDoubleComplex* outP_ni = outP_ani_dev + n1 * nI + I; gpuDoubleComplex result = make_gpuDoubleComplex(0.0, 0.0); gpuDoubleComplex* P_ni = P_ani_dev + n1 * nI + Istart; for (int i2=0; i2 < ni; i2++) { gpuDoubleComplex item = gpuCmulD(*P_ni, dH_ii[i2 * ni + i]); result.x += item.x; result.y += item.y; P_ni++; } outP_ni->x = result.x; outP_ni->y = result.y; I++; } dH_ii += ni * ni; } } } __global__ void dH_aii_times_P_ani_8(int nA, int nn, int nI, npy_int32* ni_a, double* dH_aii_dev, double* P_ani_dev, double* outP_ani_dev) { int n1 = threadIdx.x + blockIdx.x * blockDim.x; if (n1 < nn) { double* dH_ii = dH_aii_dev; int I = 0; for (int a=0; a< nA; a++) { int ni = ni_a[a]; int Istart = I; for (int i=0; i< ni; i++) { double* outP_ni = outP_ani_dev + n1 * nI + I; double result = 0; double* P_ni = P_ani_dev + n1 * nI + Istart; for (int i2=0; i2 < ni; i2++) { double item = *P_ni * dH_ii[i2 * ni + i]; result += item; P_ni++; } *outP_ni = result; I++; } dH_ii += ni * ni; } } } extern "C" void dH_aii_times_P_ani_launch_kernel(int nA, int nn, int nI, npy_int32* ni_a, double* dH_aii_dev, gpuDoubleComplex* P_ani_dev, gpuDoubleComplex* outP_ani_dev, int is_complex) { if (is_complex) gpuLaunchKernel(dH_aii_times_P_ani_16, dim3((nn+255)/256), dim3(256), 0, 0, nA, nn, nI, ni_a, dH_aii_dev, P_ani_dev, outP_ani_dev); else gpuLaunchKernel(dH_aii_times_P_ani_8, dim3((nn+255)/256), dim3(256), 0, 0, nA, nn, nI, ni_a, dH_aii_dev, (double*) P_ani_dev, (double*) outP_ani_dev); } extern "C" void pwlfc_expand_gpu_launch_kernel(int itemsize, double* f_Gs, gpuDoubleComplex *emiGR_Ga, double *Y_GL, int* l_s, int* a_J, int* s_J, double* f_GI, int* I_J, int nG, int nJ, int nL, int nI, int natoms, int nsplines, bool cc) { if (itemsize == 16) { gpuLaunchKernel(pwlfc_expand_kernel_16, dim3((nG+15)/16, (nJ+15)/16), dim3(16, 16), 0, 0, f_Gs, emiGR_Ga, Y_GL, l_s, a_J, s_J, I_J, f_GI, nG, nJ, nL, nI, natoms, nsplines, cc); } else { gpuLaunchKernel(pwlfc_expand_kernel_8, dim3((nG+15)/16, (nJ+15)/16), dim3(16, 16), 0, 0, f_Gs, emiGR_Ga, Y_GL, l_s, a_J, s_J, I_J, f_GI, nG, nJ, nL, nI, natoms, nsplines, cc); } //gpuDeviceSynchronize(); } gpaw-24.1.0/c/gpu/kernels/reduce-kernel.cpp000066400000000000000000000101551454550013000204440ustar00rootroot00000000000000__device__ unsigned int INNAME(retirementCount) = {0}; __global__ void INNAME(reduce_kernel)( const Tgpu *g_idata1, const Tgpu *g_idata2, Tgpu *g_odata, Tgpu *results, unsigned int n, unsigned int block_in, int block_out, int nvec) { extern __shared__ Tgpu Zgpu(sdata)[]; // perform first level of reduction, // reading from global memory, writing to shared memory unsigned int tid = threadIdx.x; unsigned int gridSize = REDUCE_THREADS * 2 * gridDim.x; unsigned int i_vec = blockIdx.y; unsigned int i = blockIdx.x * (REDUCE_THREADS * 2) + threadIdx.x; Tgpu mySum = MAKED(0); // we reduce multiple elements per thread. The number is determined by the // number of active thread blocks (via gridDim). More blocks will result // in a larger gridSize and therefore fewer elements per thread while (i < n) { IADD(mySum, INFUNC(g_idata1[i + block_in * i_vec], g_idata2[i + block_in * i_vec])); // ensure we don't read out of bounds if (i + REDUCE_THREADS < n) { IADD(mySum, INFUNC(g_idata1[i + block_in * i_vec + REDUCE_THREADS], g_idata2[i + block_in * i_vec + REDUCE_THREADS])); } i += gridSize; } Zgpu(sdata)[tid] = mySum; __syncthreads(); if (REDUCE_THREADS >= 512) { if (tid < 256) { Zgpu(sdata)[tid] = mySum = ADD(mySum, Zgpu(sdata)[tid + 256]); } __syncthreads(); } if (REDUCE_THREADS >= 256) { if (tid < 128) { Zgpu(sdata)[tid] = mySum = ADD(mySum, Zgpu(sdata)[tid + 128]); } __syncthreads(); } if (REDUCE_THREADS >= 128) { if (tid < 64) { Zgpu(sdata)[tid] = mySum = ADD(mySum, Zgpu(sdata)[tid + 64]); } __syncthreads(); } if (tid < 32) { volatile Tgpu *smem = Zgpu(sdata); #ifdef GPU_USE_COMPLEX if (REDUCE_THREADS >= 64) { smem[tid].x = mySum.x = mySum.x + smem[tid + 32].x; smem[tid].y = mySum.y = mySum.y + smem[tid + 32].y; } if (REDUCE_THREADS >= 32) { smem[tid].x = mySum.x = mySum.x + smem[tid + 16].x; smem[tid].y = mySum.y = mySum.y + smem[tid + 16].y; } if (REDUCE_THREADS >= 16) { smem[tid].x = mySum.x = mySum.x + smem[tid + 8].x; smem[tid].y = mySum.y = mySum.y + smem[tid + 8].y; } if (REDUCE_THREADS >= 8) { smem[tid].x = mySum.x = mySum.x + smem[tid + 4].x; smem[tid].y = mySum.y = mySum.y + smem[tid + 4].y; } if (REDUCE_THREADS >= 4) { smem[tid].x = mySum.x = mySum.x + smem[tid + 2].x; smem[tid].y = mySum.y = mySum.y + smem[tid + 2].y; } if (REDUCE_THREADS >= 2) { smem[tid].x = mySum.x = mySum.x + smem[tid + 1].x; smem[tid].y = mySum.y = mySum.y + smem[tid + 1].y; } #else if (REDUCE_THREADS >= 64) smem[tid] = mySum = ADD(mySum, smem[tid + 32]); if (REDUCE_THREADS >= 32) smem[tid] = mySum = ADD(mySum, smem[tid + 16]); if (REDUCE_THREADS >= 16) smem[tid] = mySum = ADD(mySum, smem[tid + 8]); if (REDUCE_THREADS >= 8) smem[tid] = mySum = ADD(mySum, smem[tid + 4]); if (REDUCE_THREADS >= 4) smem[tid] = mySum = ADD(mySum, smem[tid + 2]); if (REDUCE_THREADS >= 2) smem[tid] = mySum = ADD(mySum, smem[tid + 1]); #endif } // write result for this block to global mem if (tid == 0) g_odata[blockIdx.x + block_out * i_vec] = Zgpu(sdata)[0]; if (gridDim.x == 1) { __shared__ bool amLast; __threadfence(); if (tid == 0) { unsigned int ticket = atomicInc(&INNAME(retirementCount), gridDim.y); amLast = (ticket == gridDim.y - 1); } __syncthreads(); if (amLast) { for (int i=tid; i < nvec; i += blockDim.x) results[i] = g_odata[i * block_out]; INNAME(retirementCount) = 0; } } } gpaw-24.1.0/c/gpu/kernels/reduce.cpp000066400000000000000000000306121454550013000171660ustar00rootroot00000000000000#ifndef REDUCE #include "../gpu.h" #define REDUCE_MAX_THREADS (256) #define REDUCE_MAX_BLOCKS (64) #define REDUCE_MAX_NVEC (128*1024) #define REDUCE_BUFFER_SIZE ((REDUCE_MAX_NVEC + 2 * GPU_BLOCKS_MAX \ * REDUCE_MAX_BLOCKS) * 16) static void *reduce_buffer = NULL; extern "C" void reduce_init_buffers_gpu() { reduce_buffer = NULL; } extern "C" void reduce_dealloc_gpu() { gpuFree(reduce_buffer); gpuCheckLastError(); reduce_init_buffers_gpu(); } static void reduceNumBlocksAndThreads(int n, int *blocks, int *threads) { *threads = (n < REDUCE_MAX_THREADS * 2) ? nextPow2((n + 1) / 2) : REDUCE_MAX_THREADS; *blocks = MIN((n + (*threads * 2 - 1)) / (*threads * 2), REDUCE_MAX_BLOCKS); } #endif #define REDUCE #define INFUNC(a,b) MAPFUNC(a,b) #define INNAME(f) MAPNAME(f ## _map512) #define REDUCE_THREADS 512 #include "reduce-kernel.cpp" #undef REDUCE_THREADS #undef INNAME #define INNAME(f) MAPNAME(f ## _map256) #define REDUCE_THREADS 256 #include "reduce-kernel.cpp" #undef REDUCE_THREADS #undef INNAME #define INNAME(f) MAPNAME(f ## _map128) #define REDUCE_THREADS 128 #include "reduce-kernel.cpp" #undef REDUCE_THREADS #undef INNAME #define INNAME(f) MAPNAME(f ## _map64) #define REDUCE_THREADS 64 #include "reduce-kernel.cpp" #undef REDUCE_THREADS #undef INNAME #define INNAME(f) MAPNAME(f ## _map32) #define REDUCE_THREADS 32 #include "reduce-kernel.cpp" #undef REDUCE_THREADS #undef INNAME #define INNAME(f) MAPNAME(f ## _map16) #define REDUCE_THREADS 16 #include "reduce-kernel.cpp" #undef REDUCE_THREADS #undef INNAME #define INNAME(f) MAPNAME(f ## _map8) #define REDUCE_THREADS 8 #include "reduce-kernel.cpp" #undef REDUCE_THREADS #undef INNAME #define INNAME(f) MAPNAME(f ## _map4) #define REDUCE_THREADS 4 #include "reduce-kernel.cpp" #undef REDUCE_THREADS #undef INNAME #define INNAME(f) MAPNAME(f ## _map2) #define REDUCE_THREADS 2 #include "reduce-kernel.cpp" #undef REDUCE_THREADS #undef INNAME #define INNAME(f) MAPNAME(f ## _map1) #define REDUCE_THREADS 1 #include "reduce-kernel.cpp" #undef REDUCE_THREADS #undef INNAME #undef INFUNC #define INFUNC(a,b) (a) #define INNAME(f) MAPNAME(f ## 512) #define REDUCE_THREADS 512 #include "reduce-kernel.cpp" #undef REDUCE_THREADS #undef INNAME #define INNAME(f) MAPNAME(f ## 256) #define REDUCE_THREADS 256 #include "reduce-kernel.cpp" #undef REDUCE_THREADS #undef INNAME #define INNAME(f) MAPNAME(f ## 128) #define REDUCE_THREADS 128 #include "reduce-kernel.cpp" #undef REDUCE_THREADS #undef INNAME #define INNAME(f) MAPNAME(f ## 64) #define REDUCE_THREADS 64 #include "reduce-kernel.cpp" #undef REDUCE_THREADS #undef INNAME #define INNAME(f) MAPNAME(f ## 32) #define REDUCE_THREADS 32 #include "reduce-kernel.cpp" #undef REDUCE_THREADS #undef INNAME #define INNAME(f) MAPNAME(f ## 16) #define REDUCE_THREADS 16 #include "reduce-kernel.cpp" #undef REDUCE_THREADS #undef INNAME #define INNAME(f) MAPNAME(f ## 8) #define REDUCE_THREADS 8 #include "reduce-kernel.cpp" #undef REDUCE_THREADS #undef INNAME #define INNAME(f) MAPNAME(f ## 4) #define REDUCE_THREADS 4 #include "reduce-kernel.cpp" #undef REDUCE_THREADS #undef INNAME #define INNAME(f) MAPNAME(f ## 2) #define REDUCE_THREADS 2 #include "reduce-kernel.cpp" #undef REDUCE_THREADS #undef INNAME #define INNAME(f) MAPNAME(f ## 1) #define REDUCE_THREADS 1 #include "reduce-kernel.cpp" #undef REDUCE_THREADS #undef INNAME #undef INFUNC void MAPNAME(reducemap)(const Tgpu *d_idata1, const Tgpu *d_idata2, Tgpu *d_odata, int size, int nvec) { int blocks, threads; if (reduce_buffer == NULL) { gpuMalloc(&reduce_buffer, REDUCE_BUFFER_SIZE); } reduceNumBlocksAndThreads(size, &blocks, &threads); int min_wsize = blocks; int work_buffer_size = ((REDUCE_BUFFER_SIZE) / sizeof(Tgpu) - nvec) / 2; assert(min_wsize < work_buffer_size); int mynvec = MAX(MIN(work_buffer_size / min_wsize, nvec), 1); Tgpu *result_gpu = (Tgpu*) d_odata; Tgpu *work_buffer1 = (Tgpu*) reduce_buffer; Tgpu *work_buffer2 = work_buffer1 + work_buffer_size; int smemSize = (threads <= 32) ? 2 * threads * sizeof(Tgpu) : threads * sizeof(Tgpu); for (int i=0; i < nvec; i += mynvec) { int cunvec = MIN(mynvec, nvec - i); dim3 dimBlock(threads, 1, 1); dim3 dimGrid(blocks, cunvec, 1); int block_out = blocks; int s = size; switch (threads) { case 512: gpuLaunchKernel( MAPNAME(reduce_kernel_map512), dimGrid, dimBlock, smemSize, 0, d_idata1 + i * size, d_idata2 + i * size, (Tgpu*) work_buffer1, result_gpu + i, s, size, block_out, cunvec); break; case 256: gpuLaunchKernel( MAPNAME(reduce_kernel_map256), dimGrid, dimBlock, smemSize, 0, d_idata1 + i * size, d_idata2 + i * size, (Tgpu*) work_buffer1, result_gpu + i, s, size, block_out, cunvec); break; case 128: gpuLaunchKernel( MAPNAME(reduce_kernel_map128), dimGrid, dimBlock, smemSize, 0, d_idata1 + i * size, d_idata2 + i * size, (Tgpu*) work_buffer1, result_gpu + i, s, size, block_out, cunvec); break; case 64: gpuLaunchKernel( MAPNAME(reduce_kernel_map64), dimGrid, dimBlock, smemSize, 0, d_idata1 + i * size, d_idata2 + i * size, (Tgpu*) work_buffer1, result_gpu + i, s, size, block_out, cunvec); break; case 32: gpuLaunchKernel( MAPNAME(reduce_kernel_map32), dimGrid, dimBlock, smemSize, 0, d_idata1 + i * size, d_idata2 + i * size, (Tgpu*) work_buffer1, result_gpu + i, s, size, block_out, cunvec); break; case 16: gpuLaunchKernel( MAPNAME(reduce_kernel_map16), dimGrid, dimBlock, smemSize, 0, d_idata1 + i * size, d_idata2 + i * size, (Tgpu*) work_buffer1, result_gpu + i, s, size, block_out, cunvec); break; case 8: gpuLaunchKernel( MAPNAME(reduce_kernel_map8), dimGrid, dimBlock, smemSize, 0, d_idata1 + i * size, d_idata2 + i * size, (Tgpu*) work_buffer1, result_gpu + i, s, size, block_out, cunvec); break; case 4: gpuLaunchKernel( MAPNAME(reduce_kernel_map4), dimGrid, dimBlock, smemSize, 0, d_idata1 + i * size, d_idata2 + i * size, (Tgpu*) work_buffer1, result_gpu + i, s, size, block_out, cunvec); break; case 2: gpuLaunchKernel( MAPNAME(reduce_kernel_map2), dimGrid, dimBlock, smemSize, 0, d_idata1 + i * size, d_idata2 + i * size, (Tgpu*) work_buffer1, result_gpu + i, s, size, block_out, cunvec); break; case 1: gpuLaunchKernel( MAPNAME(reduce_kernel_map1), dimGrid, dimBlock, smemSize, 0, d_idata1 + i * size, d_idata2 + i * size, (Tgpu*) work_buffer1, result_gpu + i, s, size, block_out, cunvec); break; default: assert(0); } gpuCheckLastError(); s = blocks; int count = 0; while (s > 1) { int blocks2, threads2; int block_in = block_out; reduceNumBlocksAndThreads(s, &blocks2, &threads2); block_out = blocks2; dim3 dimBlock(threads2, 1, 1); dim3 dimGrid(blocks2, cunvec, 1); int smemSize = (threads2 <= 32) ? 2 * threads2 * sizeof(Tgpu) : threads2 * sizeof(Tgpu); Tgpu *work1 = (count % 2) ? work_buffer2 : work_buffer1; Tgpu *work2 = (count % 2) ? work_buffer1 : work_buffer2; count++; switch (threads2) { case 512: gpuLaunchKernel( MAPNAME(reduce_kernel512), dimGrid, dimBlock, smemSize, 0, (Tgpu*) work1, NULL, (Tgpu*) work2, result_gpu + i, s, block_in, block_out, cunvec); break; case 256: gpuLaunchKernel( MAPNAME(reduce_kernel256), dimGrid, dimBlock, smemSize, 0, (Tgpu*) work1, NULL, (Tgpu*) work2, result_gpu + i, s, block_in, block_out, cunvec); break; case 128: gpuLaunchKernel( MAPNAME(reduce_kernel128), dimGrid, dimBlock, smemSize, 0, (Tgpu*) work1, NULL, (Tgpu*) work2, result_gpu + i, s, block_in, block_out, cunvec); break; case 64: gpuLaunchKernel( MAPNAME(reduce_kernel64), dimGrid, dimBlock, smemSize, 0, (Tgpu*) work1, NULL, (Tgpu*) work2, result_gpu + i, s, block_in, block_out, cunvec); break; case 32: gpuLaunchKernel( MAPNAME(reduce_kernel32), dimGrid, dimBlock, smemSize, 0, (Tgpu*) work1, NULL, (Tgpu*) work2, result_gpu + i, s, block_in, block_out, cunvec); break; case 16: gpuLaunchKernel( MAPNAME(reduce_kernel16), dimGrid, dimBlock, smemSize, 0, (Tgpu*) work1, NULL, (Tgpu*) work2, result_gpu + i, s, block_in, block_out, cunvec); break; case 8: gpuLaunchKernel( MAPNAME(reduce_kernel8), dimGrid, dimBlock, smemSize, 0, (Tgpu*) work1, NULL, (Tgpu*) work2, result_gpu + i, s, block_in, block_out, cunvec); break; case 4: gpuLaunchKernel( MAPNAME(reduce_kernel4), dimGrid, dimBlock, smemSize, 0, (Tgpu*) work1, NULL, (Tgpu*) work2, result_gpu + i, s, block_in, block_out, cunvec); break; case 2: gpuLaunchKernel( MAPNAME(reduce_kernel2), dimGrid, dimBlock, smemSize, 0, (Tgpu*) work1, NULL, (Tgpu*) work2, result_gpu + i, s, block_in, block_out, cunvec); break; case 1: gpuLaunchKernel( MAPNAME(reduce_kernel1), dimGrid, dimBlock, smemSize, 0, (Tgpu*) work1, NULL, (Tgpu*) work2, result_gpu + i, s, block_in, block_out, cunvec); break; default: assert(0); } gpuCheckLastError(); s = (s + (threads2 * 2 - 1)) / (threads2 * 2); } } } gpaw-24.1.0/c/gpu/kernels/relax.cpp000066400000000000000000000541461454550013000170420ustar00rootroot00000000000000#include #include #include #include #include #include "../gpu.h" #include "../gpu-complex.h" #undef BLOCK_X #undef BLOCK_Y #define BLOCK_X GPU_DEFAULT_BLOCK_X #define BLOCK_Y GPU_DEFAULT_BLOCK_Y #ifdef MYJ #define ACACHE_X (BLOCK_X + 2 * MYJ) #define ACACHE_Y (BLOCK_Y + 2 * MYJ) __global__ void RELAX_kernel( const int relax_method, const double coef_relax, const int ncoefs, const double *c_coefs, const long *c_offsets, const double *c_coefs0, const double *c_coefs1, const double *c_coefs2, const double* a, double* b, const double* src,const long3 c_n, const int3 a_size, const int3 b_size, const double w, const int xdiv) { int i1tid = threadIdx.y; int i2tid = threadIdx.x; int i1, i2; int xlen; double acache0[MYJ]; double acache0t[MYJ + 1]; double *acache12p; __shared__ double s_coefs0[MYJ * 2 + 1]; __shared__ double s_coefs1[MYJ * 2]; __shared__ double s_coefs2[MYJ * 2]; __shared__ double acache12[ACACHE_X * ACACHE_Y]; { int xx = gridDim.x / xdiv; int xind = blockIdx.x / xx; i2 = (blockIdx.x - xind * xx) * BLOCK_X + i2tid; i1 = blockIdx.y * BLOCK_Y + i1tid; xlen = (c_n.x + xdiv - 1) / xdiv; int xstart = xind * xlen; if ((c_n.x - xstart) < xlen) xlen = c_n.x - xstart; a += xstart * a_size.y + i1 * a_size.z + i2; b += xstart * b_size.y + i1 * b_size.z + i2; src += xstart * b_size.y + i1 * b_size.z + i2; } acache12p = acache12 + ACACHE_X * (i1tid + MYJ) + i2tid + MYJ; if (i2tid <= MYJ * 2) s_coefs0[i2tid] = c_coefs0[i2tid]; if (i2tid < MYJ * 2) { s_coefs1[i2tid] = c_coefs1[i2tid]; s_coefs2[i2tid] = c_coefs2[i2tid]; } __syncthreads(); if (relax_method == 1) { /* Weighted Gauss-Seidel relaxation for the equation "operator" b = src a contains the temporary array holding also the boundary values. */ // Coefficient needed multiple times later // const double coef = 1.0/c_coefs[0]; /* FIXME: NOT WORKIN ATM */ return; } else { /* Weighted Jacobi relaxation for the equation "operator" b = src a contains the temporary array holding also the boundary values. */ for (int c=0; c < MYJ; c++) { if ((i1 < c_n.y) && (i2 < c_n.z)) acache0[c] = a[(c - MYJ) * (a_size.y)]; } for (int i0=0; i0 < xlen; i0++) { if (i1 < c_n.y + MYJ) { acache12p[-MYJ] = a[-MYJ]; if ((i2tid < MYJ * 2) && (i2 < c_n.z + MYJ - BLOCK_X + MYJ)) acache12p[BLOCK_X - MYJ] = a[BLOCK_X - MYJ]; } if (i1tid < MYJ) { acache12p[-ACACHE_X * MYJ] = a[-a_size.z * MYJ]; if (i1 < c_n.y + MYJ - BLOCK_Y) acache12p[ACACHE_X * BLOCK_Y] = a[a_size.z * BLOCK_Y]; } __syncthreads(); acache0t[0] = 0.0; for (int c=0; c < MYJ; c++) acache0t[0] += acache12p[ACACHE_X * (c - MYJ)] * s_coefs1[c]; for (int c=0; c < MYJ; c++) acache0t[0] += acache12p[c - MYJ] * s_coefs2[c]; for (int c=0; c < MYJ; c++) acache0t[0] += acache12p[c + 1] * s_coefs2[c + MYJ]; for (int c=0; c < MYJ; c++) acache0t[0] += acache12p[ACACHE_X * (c + 1)] * s_coefs1[c + MYJ]; for (int c=0; c < MYJ; c++) acache0t[0] += acache0[c] * s_coefs0[c]; for (int c=0; c < MYJ; c++) acache0t[c + 1] += acache12p[0] * s_coefs0[c + 1 + MYJ]; for (int c=0; c < ncoefs; c++) acache0t[0] += a[c_offsets[c]] * c_coefs[c]; if (i0 >= MYJ) { if ((i1 < c_n.y) && (i2 < c_n.z)) { b[0] = (1.0 - w) * b[0] + w * (src[0] - acache0t[MYJ]) / coef_relax; } b += b_size.y; src += b_size.y; } for (int c=0; c < MYJ - 1; c++) { acache0[c] = acache0[c + 1]; } acache0[MYJ - 1] = acache12p[0]; for (int c=MYJ;c > 0; c--) { acache0t[c] = acache0t[c - 1]; } a += a_size.y; __syncthreads(); } #pragma unroll for (int i0=0; i0 < MYJ; i0++) { if ((i1 < c_n.y) && (i2 < c_n.z)) acache0[0] = a[0]; if (i0 < 1) acache0t[1 - i0] += acache0[0] * s_coefs0[1 + MYJ]; #if MYJ >= 2 if (i0 < 2) acache0t[2 - i0] += acache0[0] * s_coefs0[2 + MYJ]; #endif #if MYJ >= 3 if (i0 < 3) acache0t[3 - i0] += acache0[0] * s_coefs0[3 + MYJ]; #endif #if MYJ >= 4 if (i0 < 4) acache0t[4 - i0] += acache0[0] * s_coefs0[4 + MYJ]; #endif #if MYJ >= 5 if (i0 < 5) acache0t[5 - i0] += acache0[0] * s_coefs0[5 + MYJ]; #endif if (i0 + xlen >= MYJ) { if ((i1 < c_n.y) && (i2 < c_n.z)) { b[0] = (1.0 - w) * b[0] + w * (src[0] - acache0t[MYJ - i0]) / coef_relax; } b += b_size.y; src += b_size.y; } a += a_size.y; } } } __global__ void RELAX_kernel_onlyb( const int relax_method, const double coef_relax, const int ncoefs, const double *c_coefs, const long *c_offsets, const double *c_coefs0, const double *c_coefs1, const double *c_coefs2, const double* a, double* b, const double* src, const long3 c_n, const int3 c_jb, const int boundary, const double w, const int xdiv) { int xx = MAX((c_n.z + BLOCK_X - 1) / BLOCK_X, 1); int yy = MAX((c_n.y + BLOCK_Y - 1) / BLOCK_Y, 1); int ysiz = c_n.y; if ((boundary & GPAW_BOUNDARY_Y0) != 0) ysiz -= BLOCK_Y; if ((boundary & GPAW_BOUNDARY_Y1) != 0) ysiz -= BLOCK_Y; int yy2 = MAX((ysiz + BLOCK_Y - 1) / BLOCK_Y, 0); int i2bl, i1bl; int xlen = c_n.x; int xind = 0; int xstart = 0; int i1pitch = 0; int i2pitch = 0; int xmax = c_n.x; int ymax = c_n.y; int zmax = c_n.z; int blockix; blockix = blockIdx.x; if ((boundary & GPAW_BOUNDARY_X0) != 0) { if ((blockix >= 0) && (blockix < xx * yy)) { i1bl = blockix / xx; i2bl = blockix-i1bl * xx; xlen = c_jb.x / 2; xstart = 0; } blockix -= xx * yy; } if ((boundary & GPAW_BOUNDARY_X1) != 0) { if ((blockix >= 0) && (blockix < xx * yy)) { i1bl = blockix / xx; i2bl = blockix-i1bl * xx; xlen = c_jb.x / 2; xstart += c_n.x - c_jb.x / 2; } blockix -= xx * yy; } if (blockix >= 0) { if ((boundary & GPAW_BOUNDARY_Y0) != 0) { if ((blockix >= 0) && (blockix < xdiv * xx)) { xind = blockix / xx; i2bl = blockix - xind * xx; i1bl = 0; ymax = MIN(BLOCK_Y, ymax); } blockix -= xdiv * xx; } if ((boundary & GPAW_BOUNDARY_Y1) != 0) { if ((blockix >= 0) && (blockix < xdiv * xx)) { xind = blockix / xx; i2bl = blockix - xind * xx; i1bl = 0; i1pitch = MAX(c_n.y - BLOCK_Y, 0); } blockix -= xdiv * xx; } if ((boundary & GPAW_BOUNDARY_Z0) != 0) { if ((blockix >= 0) && (blockix < xdiv * yy2)) { xind = blockix / yy2; i2bl = 0; zmax = MIN(BLOCK_X, zmax); i1bl = blockix - xind * yy2; if ((boundary & GPAW_BOUNDARY_Y0) != 0) i1pitch = BLOCK_Y; if ((boundary & GPAW_BOUNDARY_Y1) != 0) ymax = MAX(c_n.y - BLOCK_Y, 0); } blockix -= xdiv * yy2; } if ((boundary & GPAW_BOUNDARY_Z1) != 0) { if ((blockix >= 0) && (blockix < xdiv * yy2)) { xind = blockix / yy2; i2bl = 0; i2pitch = MAX(c_n.z - BLOCK_X, 0); i1bl = blockix - xind * yy2; if ((boundary & GPAW_BOUNDARY_Y0) != 0) i1pitch = BLOCK_Y; if ((boundary & GPAW_BOUNDARY_Y1) != 0) ymax = MAX(c_n.y - BLOCK_Y, 0); } blockix -= xdiv * yy2; } if ((boundary & GPAW_BOUNDARY_X0) != 0) { xstart += c_jb.x / 2; xlen -= c_jb.x / 2; } if ((boundary & GPAW_BOUNDARY_X1) != 0) { xlen -= c_jb.x / 2; xmax -= c_jb.x / 2; } xlen = (xlen + xdiv - 1) / xdiv; xstart += xind * xlen; } int i2tid = threadIdx.x; int i2 = i2pitch + i2bl * BLOCK_X + i2tid; int i1tid = threadIdx.y; int i1 = i1pitch + i1bl * BLOCK_Y + i1tid; __shared__ double s_coefs0[MYJ * 2 + 1]; __shared__ double s_coefs1[MYJ * 2]; __shared__ double s_coefs2[MYJ * 2]; __shared__ double acache12[ACACHE_X * ACACHE_Y]; double acache0[MYJ]; double acache0t[MYJ + 1]; double *acache12p; int sizez = c_jb.z + c_n.z; int sizeyz = (c_jb.y + c_n.y) * sizez; if ((xmax-xstart) < xlen) xlen = xmax - xstart; acache12p = acache12 + ACACHE_X * (i1tid + MYJ) + i2tid + MYJ; if (i2tid <= MYJ * 2) s_coefs0[i2tid] = c_coefs0[i2tid]; if (i2tid < MYJ * 2) { s_coefs1[i2tid] = c_coefs1[i2tid]; s_coefs2[i2tid] = c_coefs2[i2tid]; } __syncthreads(); a += xstart * sizeyz + i1 * sizez + i2; b += xstart * c_n.y * c_n.z + i1 * c_n.z + i2; src += xstart * c_n.y * c_n.z + i1 * c_n.z + i2; if (relax_method == 1) { /* Weighted Gauss-Seidel relaxation for the equation "operator" b = src a contains the temporary array holding also the boundary values. */ // Coefficient needed multiple times later // const double coef = 1.0/c_coefs[0]; /* FIXME: NOT WORKIN ATM */ return; } else { /* Weighted Jacobi relaxation for the equation "operator" b = src a contains the temporariry array holding also the boundary values. */ for (int c=0; c < MYJ; c++) { if ((i1 < ymax) && (i2 < zmax)) acache0[c] = a[(c - MYJ) * (sizeyz)]; } for (int i0=0; i0 < xlen; i0++) { if (i1 < ymax + MYJ) { acache12p[-MYJ] = a[-MYJ]; if ((i2tid < MYJ * 2) && (i2 < zmax + MYJ - BLOCK_X + MYJ)) acache12p[BLOCK_X - MYJ] = a[BLOCK_X - MYJ]; } if (i1tid < MYJ) { acache12p[-ACACHE_X * MYJ] = a[-sizez * MYJ]; if (i1 < ymax + MYJ - BLOCK_Y) acache12p[ACACHE_X * BLOCK_Y] = a[sizez * BLOCK_Y]; } __syncthreads(); acache0t[0] = 0.0; for (int c=0; c < MYJ; c++) acache0t[0] += acache12p[ACACHE_X * (c - MYJ)] * s_coefs1[c]; for (int c=0; c < MYJ; c++) acache0t[0] += acache12p[c - MYJ] * s_coefs2[c]; for (int c=0; c < MYJ; c++) acache0t[0] += acache12p[c+1] * s_coefs2[c + MYJ]; for (int c=0; c < MYJ; c++) acache0t[0] += acache12p[ACACHE_X * (c + 1)] * s_coefs1[c + MYJ]; for (int c=0; c < MYJ; c++) acache0t[0] += acache0[c] * s_coefs0[c]; for (int c=0; c < MYJ; c++) acache0t[c + 1] += acache12p[0] * s_coefs0[c + 1 + MYJ]; for (int c=0; c < ncoefs; c++) acache0t[0] += a[c_offsets[c]] * c_coefs[c]; if (i0 >= MYJ) { if ((i1 < ymax) && (i2 < zmax)) { b[0] = (1.0 - w) * b[0] + w * (src[0] - acache0t[MYJ]) / coef_relax; } b += c_n.y * c_n.z; src += c_n.y * c_n.z; } for (int c=0; c < MYJ - 1; c++) { acache0[c] = acache0[c + 1]; } acache0[MYJ - 1] = acache12p[0]; for (int c=MYJ; c > 0; c--) { acache0t[c] = acache0t[c - 1]; } a += sizeyz; __syncthreads(); } #pragma unroll for (int i0=0; i0 < MYJ; i0++) { if ((i1 < c_n.y) && (i2 < c_n.z)) acache0[0] = a[0]; if (i0 < 1) acache0t[1 - i0] += acache0[0] * s_coefs0[1 + MYJ]; #if MYJ >= 2 if (i0 < 2) acache0t[2 - i0] += acache0[0] * s_coefs0[2 + MYJ]; #endif #if MYJ >= 3 if (i0 < 3) acache0t[3 - i0] += acache0[0] * s_coefs0[3 + MYJ]; #endif #if MYJ >= 4 if (i0 < 4) acache0t[4 - i0] += acache0[0] * s_coefs0[4 + MYJ]; #endif #if MYJ >= 5 if (i0 < 5) acache0t[5 - i0] += acache0[0] * s_coefs0[5 + MYJ]; #endif if (i0 + xlen >= MYJ) { if ((i1 < ymax) && (i2 < zmax)) { b[0] = (1.0 - w) * b[0] + w * (src[0] - acache0t[MYJ - i0]) / coef_relax; } b += c_n.y * c_n.z; src += c_n.y * c_n.z; } a += sizeyz; } } } #else #define MYJ (2/2) # define RELAX_kernel relax_kernel2 # define RELAX_kernel_onlyb relax_kernel2_onlyb # include "relax.cpp" # undef RELAX_kernel # undef RELAX_kernel_onlyb # undef MYJ #define MYJ (4/2) # define RELAX_kernel relax_kernel4 # define RELAX_kernel_onlyb relax_kernel4_onlyb # include "relax.cpp" # undef RELAX_kernel # undef RELAX_kernel_onlyb # undef MYJ #define MYJ (6/2) # define RELAX_kernel relax_kernel6 # define RELAX_kernel_onlyb relax_kernel6_onlyb # include "relax.cpp" # undef RELAX_kernel # undef RELAX_kernel_onlyb # undef MYJ #define MYJ (8/2) # define RELAX_kernel relax_kernel8 # define RELAX_kernel_onlyb relax_kernel8_onlyb # include "relax.cpp" # undef RELAX_kernel # undef RELAX_kernel_onlyb # undef MYJ #define MYJ (10/2) # define RELAX_kernel relax_kernel10 # define RELAX_kernel_onlyb relax_kernel10_onlyb # include "relax.cpp" # undef RELAX_kernel # undef RELAX_kernel_onlyb # undef MYJ extern "C" bmgsstencil_gpu bmgs_stencil_to_gpu(const bmgsstencil* s); extern "C" int bmgs_fd_boundary_test(const bmgsstencil_gpu* s, int boundary, int ndouble); extern "C" void bmgs_relax_gpu(const int relax_method, const bmgsstencil_gpu* s_gpu, double* adev, double* bdev, const double* src, const double w, int boundary, gpuStream_t stream) { int3 jb; int3 bjb; int3 hc_bj; long3 hc_n; long3 hc_j; dim3 dimBlock(BLOCK_X, BLOCK_Y); if ((boundary & GPAW_BOUNDARY_SKIP) != 0) { if (!bmgs_fd_boundary_test(s_gpu, boundary, 1)) return; } else if ((boundary & GPAW_BOUNDARY_ONLY) != 0) { if (!bmgs_fd_boundary_test(s_gpu, boundary, 1)) { boundary &= ~GPAW_BOUNDARY_ONLY; boundary |= GPAW_BOUNDARY_NORMAL; } } hc_n.x = s_gpu->n[0]; hc_n.y = s_gpu->n[1]; hc_n.z = s_gpu->n[2]; hc_j.x = s_gpu->j[0]; hc_j.y = s_gpu->j[1]; hc_j.z = s_gpu->j[2]; bjb.x = 0; bjb.y = 0; bjb.z = 0; hc_bj.x = 0; hc_bj.y = 0; hc_bj.z = 0; jb.z = hc_j.z; jb.y = hc_j.y / (hc_j.z + hc_n.z); jb.x = hc_j.x / ((hc_j.z + hc_n.z) * hc_n.y + hc_j.y); if ((boundary & GPAW_BOUNDARY_SKIP) != 0) { int3 jb1; int3 bjb1, bjb2; bjb1.x = 0; bjb1.y = 0; bjb1.z = 0; bjb2.x = 0; bjb2.y = 0; bjb2.z = 0; jb1.z = jb.z / 2; jb1.x = jb.x / 2; jb1.y = jb.y / 2; if ((boundary & GPAW_BOUNDARY_X0) != 0) { bjb1.x += jb.x / 2; } if ((boundary & GPAW_BOUNDARY_X1) != 0) { bjb2.x += jb.x / 2; } if ((boundary & GPAW_BOUNDARY_Y0) != 0) { bjb1.y += dimBlock.y; } if ((boundary & GPAW_BOUNDARY_Y1) != 0) { bjb2.y += dimBlock.y; } if ((boundary & GPAW_BOUNDARY_Z0) != 0) { bjb1.z += dimBlock.x; } if ((boundary & GPAW_BOUNDARY_Z1) != 0) { bjb2.z += dimBlock.x; } bjb.x = bjb1.x + bjb2.x; bjb.y = bjb1.y + bjb2.y; bjb.z = bjb1.z + bjb2.z; hc_n.x -= bjb.x; hc_n.y -= bjb.y; hc_n.z -= bjb.z; jb.x += bjb.x; jb.y += bjb.y; jb.z += bjb.z; jb1.x += bjb1.x; jb1.y += bjb1.y; jb1.z += bjb1.z; hc_bj.z = bjb.z; hc_bj.y = bjb.y * (hc_bj.z + hc_n.z); hc_bj.x = bjb.x * ((hc_bj.z + hc_n.z) * hc_n.y + hc_bj.y); hc_j.z = jb.z; hc_j.y = jb.y * (hc_j.z + hc_n.z); hc_j.x = jb.x * ((hc_j.z + hc_n.z) * hc_n.y + hc_j.y); bdev += bjb1.z + bjb1.y * (hc_bj.z + hc_n.z) + bjb1.x * ((hc_bj.z + hc_n.z) * hc_n.y + hc_bj.y); src += bjb1.z + bjb1.y * (hc_bj.z + hc_n.z) + bjb1.x * ((hc_bj.z + hc_n.z) * hc_n.y + hc_bj.y); adev = (Tgpu*) ((double*) adev + jb1.z + jb1.y * (hc_j.z + hc_n.z) + jb1.x * ((hc_j.z + hc_n.z) * hc_n.y + hc_j.y)); } else { adev = (Tgpu*) ((double*) adev + (hc_j.x + hc_j.y + hc_j.z) / 2); } if ((hc_n.x <= 0) || (hc_n.y <= 0) || (hc_n.z <= 0)) return; dim3 dimGrid(1,1,1); int xdiv = MIN(hc_n.x, 4); if (((boundary & GPAW_BOUNDARY_NORMAL) != 0) || ((boundary & GPAW_BOUNDARY_SKIP) != 0)) { dimGrid.x = xdiv * MAX((hc_n.z + dimBlock.x - 1) / dimBlock.x, 1); dimGrid.y = MAX((hc_n.y + dimBlock.y - 1) / dimBlock.y, 1); } else if ((boundary & GPAW_BOUNDARY_ONLY) != 0) { int xx = MAX((hc_n.z + dimBlock.x - 1) / dimBlock.x, 1); int yy = MAX((hc_n.y + dimBlock.y - 1) / dimBlock.y, 1); int ysiz = hc_n.y; if ((boundary & GPAW_BOUNDARY_Y0) != 0) ysiz -= dimBlock.y; if ((boundary & GPAW_BOUNDARY_Y1) != 0) ysiz -= dimBlock.y; int yy2 = MAX((ysiz + dimBlock.y - 1) / dimBlock.y, 0); dimGrid.x = 0; if ((boundary & GPAW_BOUNDARY_X0) != 0) dimGrid.x += xx * yy; if ((boundary & GPAW_BOUNDARY_X1) != 0) dimGrid.x += xx * yy; if ((boundary & GPAW_BOUNDARY_Y0) != 0) dimGrid.x += xdiv * xx; if ((boundary & GPAW_BOUNDARY_Y1) != 0) dimGrid.x += xdiv * xx; if ((boundary & GPAW_BOUNDARY_Z0) != 0) dimGrid.x += xdiv * yy2; if ((boundary & GPAW_BOUNDARY_Z1) != 0) dimGrid.x += xdiv * yy2; dimGrid.y = 1; } int3 sizea; sizea.z = hc_j.z + hc_n.z; sizea.y = sizea.z * hc_n.y + hc_j.y; sizea.x = sizea.y * hc_n.x + hc_j.x; int3 sizeb; sizeb.z = hc_bj.z + hc_n.z; sizeb.y = sizeb.z * hc_n.y + hc_bj.y; sizeb.x = sizeb.y * hc_n.x + hc_bj.x; if (((boundary & GPAW_BOUNDARY_NORMAL) != 0) || ((boundary & GPAW_BOUNDARY_SKIP) != 0)) { void (*relax_kernel)( const int relax_method, const double coef_relax, const int ncoefs, const double* c_coefs, const long* c_offsets, const double* c_coefs0, const double* c_coefs1, const double* c_coefs2, const double* a, double* b, const double* src, const long3 c_n, const int3 a_size, const int3 b_size, const double w, const int xdiv); switch (s_gpu->ncoefs0) { case 3: relax_kernel = relax_kernel2; break; case 5: relax_kernel = relax_kernel4; break; case 7: relax_kernel = relax_kernel6; break; case 9: relax_kernel = relax_kernel8; break; case 11: relax_kernel = relax_kernel10; break; default: assert(0); } gpuLaunchKernel( (*relax_kernel), dimGrid, dimBlock, 0, stream, relax_method, s_gpu->coef_relax, s_gpu->ncoefs, s_gpu->coefs_gpu, s_gpu->offsets_gpu, s_gpu->coefs0_gpu, s_gpu->coefs1_gpu, s_gpu->coefs2_gpu, adev, bdev, src, hc_n, sizea, sizeb, w, xdiv); } else if ((boundary & GPAW_BOUNDARY_ONLY) != 0) { void (*relax_kernel)( const int relax_method, const double coef_relax, const int ncoefs, const double* c_coefs, const long* c_offsets, const double* c_coefs0, const double* c_coefs1, const double* c_coefs2, const double* a, double* b, const double* src, const long3 c_n, const int3 c_jb, const int boundary, const double w, const int xdiv); switch (s_gpu->ncoefs0) { case 3: relax_kernel = relax_kernel2_onlyb; break; case 5: relax_kernel = relax_kernel4_onlyb; break; case 7: relax_kernel = relax_kernel6_onlyb; break; case 9: relax_kernel = relax_kernel8_onlyb; break; case 11: relax_kernel = relax_kernel10_onlyb; break; default: assert(0); } gpuLaunchKernel( (*relax_kernel), dimGrid, dimBlock, 0, stream, relax_method, s_gpu->coef_relax, s_gpu->ncoefs, s_gpu->coefs_gpu, s_gpu->offsets_gpu, s_gpu->coefs0_gpu, s_gpu->coefs1_gpu, s_gpu->coefs2_gpu, adev, bdev, src, hc_n, jb, boundary, w, xdiv); } gpuCheckLastError(); } #endif gpaw-24.1.0/c/gpu/kernels/restrict-stencil.cpp000066400000000000000000000051131454550013000212130ustar00rootroot00000000000000#define ACACHE_K (2 * (K - 1)) __global__ void RST1D_kernel( const Tgpu* a, int n, int m, Tgpu* b, int ang, int bng, int blocks) { __shared__ Tgpu ac[ACACHE_Y * ACACHE_X]; Tgpu *acp; int jtid = threadIdx.x; int j = blockIdx.x * BLOCK; int itid = threadIdx.y; int yy = gridDim.y / blocks; int blocksi = blockIdx.y / yy; int ibl = blockIdx.y - yy * blocksi; int i = ibl * BLOCK; int sizex = n * 2 + K * 2 - 3; int aind = (j + itid) * sizex + i * 2 + jtid + K - 1; a += blocksi * ang + aind; b += blocksi * bng + (j + jtid) + (i + itid) * m; acp = ac + ACACHE_X * itid + jtid + ACACHE_K / 2; if (aind < ang) acp[0] = a[0]; if ((aind + BLOCK) < ang) acp[BLOCK] = a[BLOCK]; if (jtid < ACACHE_K / 2) { if (aind - ACACHE_K / 2 < ang) acp[-ACACHE_K / 2] = a[-ACACHE_K / 2]; if (aind + 2 * BLOCK < ang) acp[2 * BLOCK] = a[2 * BLOCK]; } acp = ac + ACACHE_X * (jtid) + 2 * itid + ACACHE_K / 2; __syncthreads(); if (((i + itid) < n) && ((j + jtid) < m)) { if (K == 2) b[0] = MULDT(0.5, ADD(acp[0], MULDT(0.5, ADD(acp[1], acp[-1])))); else if (K == 4) b[0] = MULDT(0.5, ADD(acp[0], ADD(MULDT( 0.5625, ADD(acp[1], acp[-1])), MULDT(-0.0625, ADD(acp[3], acp[-3]))))); else if (K == 6) b[0] = MULDT(0.5, ADD(ADD(acp[0], MULDT( 0.58593750, ADD(acp[1], acp[-1]))), ADD(MULDT(-0.09765625, ADD(acp[3], acp[-3])), MULDT( 0.01171875, ADD(acp[5], acp[-5]))))); else b[0] = MULDT( 0.5, ADD(acp[0], ADD(ADD(MULDT( 0.59814453125, ADD(acp[1], acp[-1])), MULDT(-0.11962890625, ADD(acp[3], acp[-3]))), ADD(MULDT( 0.02392578125, ADD(acp[5], acp[-5])), MULDT(-0.00244140625, ADD(acp[7], acp[-7])))))); } } void RST1D(const Tgpu* a, int n, int m, Tgpu* b, int ang, int bng, int blocks) { int gridx = (m + BLOCK - 1) / BLOCK; int gridy = (n + BLOCK - 1) / BLOCK; dim3 dimBlock(BLOCK, BLOCK); dim3 dimGrid(gridx, gridy * blocks); gpuLaunchKernel( RST1D_kernel, dimGrid, dimBlock, 0, 0, a, n, m, b, ang, bng, blocks); gpuCheckLastError(); } #undef ACACHE_K gpaw-24.1.0/c/gpu/kernels/restrict.cpp000066400000000000000000000246121454550013000175610ustar00rootroot00000000000000#include #include #include #include #include "../gpu.h" #include "../gpu-complex.h" #ifndef GPU_USE_COMPLEX # define BLOCK (16) # define BLOCK_X (32) # define BLOCK_Y (8) # define ACACHE_X (2 * BLOCK_X + 1) # define ACACHE_Y (2 * BLOCK_Y + 1) #endif __global__ void Zgpu(restrict_kernel)(const Tgpu* a, const int3 n, Tgpu* b, const int3 b_n, int xdiv, int blocks) { int i2, i1; int i2_x2, i1_x2; int xlen; Tgpu *acache12p; Tgpu *acache12p_2x; Tgpu b_old; __shared__ Tgpu Zgpu(acache12)[ACACHE_X * ACACHE_Y]; { int xx = gridDim.x / xdiv; int xind = blockIdx.x / xx; int base = (blockIdx.x - xind * xx) * BLOCK_X; i2 = base + threadIdx.x; i2_x2 = 2 * base + threadIdx.x; int yy = gridDim.y / blocks; int blocksi = blockIdx.y / yy; base = (blockIdx.y - blocksi * yy) * BLOCK_Y; i1 = base + threadIdx.y; i1_x2 = 2 * base + threadIdx.y; xlen = (b_n.x + xdiv - 1) / xdiv; int xstart = xind * xlen; if ((b_n.x - xstart) < xlen) xlen = b_n.x - xstart; a += n.x * n.y * n.z * blocksi + 2 * xstart * n.y * n.z + i1_x2 * n.z + i2_x2; b += b_n.x * b_n.y * b_n.z * blocksi + xstart * b_n.y * b_n.z + i1 * b_n.z + i2; } acache12p = Zgpu(acache12) + ACACHE_X * threadIdx.y + threadIdx.x; acache12p_2x = Zgpu(acache12) + ACACHE_X * (2 * threadIdx.y) + 2 * threadIdx.x; acache12p[0] = a[0]; acache12p[BLOCK_X] = a[BLOCK_X]; if (threadIdx.x < 1) { acache12p[2 * BLOCK_X] = a[2 * BLOCK_X]; acache12p[BLOCK_Y * ACACHE_X + 2 * BLOCK_X] = a[BLOCK_Y * n.z + 2 * BLOCK_X]; } acache12p[BLOCK_Y * ACACHE_X + 0] = a[BLOCK_Y * n.z]; acache12p[BLOCK_Y * ACACHE_X + BLOCK_X] = a[BLOCK_Y * n.z + BLOCK_X]; if (threadIdx.y < 1) { acache12p[2 * BLOCK_Y * ACACHE_X] = a[2 * BLOCK_Y * n.z]; acache12p[2 * BLOCK_Y * ACACHE_X + BLOCK_X] = a[2 * BLOCK_Y * n.z + BLOCK_X]; if (threadIdx.x < 1) acache12p[2 * BLOCK_Y * ACACHE_X + 2 * BLOCK_X] = a[2 * BLOCK_Y * n.z + 2 * BLOCK_X]; } __syncthreads(); b_old = ADD3(MULTD(acache12p_2x[ACACHE_X * 1 + 1], 0.0625), MULTD(ADD4(acache12p_2x[ACACHE_X * 1 + 0], acache12p_2x[ACACHE_X * 1 + 2], acache12p_2x[ACACHE_X * 0 + 1], acache12p_2x[ACACHE_X * 2 + 1]), 0.03125), MULTD(ADD4(acache12p_2x[ACACHE_X * 0 + 0], acache12p_2x[ACACHE_X * 0 + 2], acache12p_2x[ACACHE_X * 2 + 0], acache12p_2x[ACACHE_X * 2 + 2]), 0.015625)); __syncthreads(); for (int i0=0; i0 < xlen; i0++) { a += n.y * n.z; acache12p[0] = a[0]; acache12p[BLOCK_X] = a[BLOCK_X]; if (threadIdx.x < 1) { acache12p[2 * BLOCK_X] = a[2 * BLOCK_X]; acache12p[BLOCK_Y * ACACHE_X + 2 * BLOCK_X] = a[BLOCK_Y * n.z + 2 * BLOCK_X]; } acache12p[BLOCK_Y * ACACHE_X + 0] = a[BLOCK_Y * n.z]; acache12p[BLOCK_Y * ACACHE_X + BLOCK_X] = a[BLOCK_Y * n.z + BLOCK_X]; if (threadIdx.y < 1) { acache12p[2 * BLOCK_Y * ACACHE_X] = a[2 * BLOCK_Y * n.z]; acache12p[2 * BLOCK_Y * ACACHE_X + BLOCK_X] = a[2 * BLOCK_Y * n.z + BLOCK_X]; if (threadIdx.x < 1) acache12p[2 * BLOCK_Y * ACACHE_X + 2 * BLOCK_X] = a[2 * BLOCK_Y * n.z + 2 * BLOCK_X]; } __syncthreads(); IADD(b_old, ADD3(MULTD(acache12p_2x[ACACHE_X * 1 + 1], 0.125), MULTD(ADD4(acache12p_2x[ACACHE_X * 1 + 0], acache12p_2x[ACACHE_X * 1 + 2], acache12p_2x[ACACHE_X * 0 + 1], acache12p_2x[ACACHE_X * 2 + 1]), 0.0625), MULTD(ADD4(acache12p_2x[ACACHE_X * 0 + 0], acache12p_2x[ACACHE_X * 0 + 2], acache12p_2x[ACACHE_X * 2 + 0], acache12p_2x[ACACHE_X * 2 + 2]), 0.03125))); __syncthreads(); a += n.y * n.z; if (i0 == b_n.x - 1) { if (i1_x2 < n.y) { if (i2_x2 < n.z) { acache12p[0] = a[0]; if (i2_x2 + BLOCK_X < n.z) { acache12p[BLOCK_X] = a[BLOCK_X]; if (threadIdx.x < 1) { if (i2_x2 + 2 * BLOCK_X < n.z) acache12p[2 * BLOCK_X] = a[2 * BLOCK_X]; } } } } if (i1_x2 + BLOCK_Y < n.y) { if (i2_x2 < n.z) { acache12p[BLOCK_Y * ACACHE_X + 0] = a[BLOCK_Y * n.z]; if (i2_x2 + BLOCK_X < n.z) { acache12p[BLOCK_Y * ACACHE_X + BLOCK_X] = a[BLOCK_Y * n.z + BLOCK_X]; if (threadIdx.x < 1) { if (i2_x2 + 2 * BLOCK_X < n.z) acache12p[BLOCK_Y * ACACHE_X + 2 * BLOCK_X] =a[BLOCK_Y * n.z + 2 * BLOCK_X]; } } } } if (threadIdx.y < 1) { if (i1_x2 + 2 * BLOCK_Y < n.y) { if (i2_x2 < n.z) { acache12p[2 * BLOCK_Y * ACACHE_X] = a[2 * BLOCK_Y * n.z]; if (i2_x2 + BLOCK_X < n.z) { acache12p[2 * BLOCK_Y * ACACHE_X + BLOCK_X] = a[2 * BLOCK_Y * n.z + BLOCK_X]; if (threadIdx.x < 1) if (i2_x2 + 2 * BLOCK_X < n.z) acache12p[2 * BLOCK_Y * ACACHE_X + 2 * BLOCK_X] = a[2 * BLOCK_Y * n.z + 2 * BLOCK_X]; } } } } } else { acache12p[0] = a[0]; acache12p[BLOCK_X] = a[BLOCK_X]; if (threadIdx.x < 1) { acache12p[2 * BLOCK_X] = a[2 * BLOCK_X]; acache12p[BLOCK_Y * ACACHE_X + 2 * BLOCK_X] = a[BLOCK_Y * n.z + 2 * BLOCK_X]; } acache12p[BLOCK_Y * ACACHE_X + 0] = a[BLOCK_Y * n.z]; acache12p[BLOCK_Y * ACACHE_X + BLOCK_X] = a[BLOCK_Y * n.z + BLOCK_X]; if (threadIdx.y < 1) { acache12p[2 * BLOCK_Y * ACACHE_X] = a[2 * BLOCK_Y * n.z]; acache12p[2 * BLOCK_Y * ACACHE_X + BLOCK_X] = a[2 * BLOCK_Y * n.z + BLOCK_X]; if (threadIdx.x < 1) acache12p[2 * BLOCK_Y * ACACHE_X + 2 * BLOCK_X] =a[2 * BLOCK_Y * n.z + 2 * BLOCK_X]; } } __syncthreads(); Tgpu b_new=ADD3(MULTD(acache12p_2x[ACACHE_X * 1 + 1], 0.0625), MULTD(ADD4(acache12p_2x[ACACHE_X * 1 + 0], acache12p_2x[ACACHE_X * 1 + 2], acache12p_2x[ACACHE_X * 0 + 1], acache12p_2x[ACACHE_X * 2 + 1]), 0.03125), MULTD(ADD4(acache12p_2x[ACACHE_X * 0 + 0], acache12p_2x[ACACHE_X * 0 + 2], acache12p_2x[ACACHE_X * 2 + 0], acache12p_2x[ACACHE_X * 2 + 2]), 0.015625)); if (i1 < b_n.y && i2 < b_n.z) b[0] = ADD(b_old, b_new); b_old = b_new; __syncthreads(); b += b_n.y * b_n.z; } } extern "C" void Zgpu(bmgs_restrict_gpu)(int k, const Tgpu* a, const int size[3], Tgpu* b, const int sizeb[3], int blocks) { if (k != 2) assert(0); dim3 dimBlock(BLOCK_X, BLOCK_Y); int xdiv = MIN(MAX(sizeb[0] / 2, 1), MAX((4 + blocks - 1) / blocks, 1)); int gridy = blocks * ((sizeb[1] + dimBlock.y - 1) / dimBlock.y); int gridx = xdiv * ((sizeb[2] + dimBlock.x - 1) / dimBlock.x); dim3 dimGrid(gridx, gridy); int3 n = {size[0], size[1], size[2]}; int3 b_n = {sizeb[0], sizeb[1], sizeb[2]}; gpuLaunchKernel( Zgpu(restrict_kernel), dimGrid, dimBlock, 0, 0, a, n ,b, b_n, xdiv, blocks); gpuCheckLastError(); } #define K 2 #define RST1D Zgpu(restrict1D2) #define RST1D_kernel Zgpu(restrict1D2_kernel) #include "restrict-stencil.cpp" #undef RST1D #undef RST1D_kernel #undef K #define K 4 #define RST1D Zgpu(restrict1D4) #define RST1D_kernel Zgpu(restrict1D4_kernel) #include "restrict-stencil.cpp" #undef RST1D #undef RST1D_kernel #undef K #define K 6 #define RST1D Zgpu(restrict1D6) #define RST1D_kernel Zgpu(restrict1D6_kernel) #include "restrict-stencil.cpp" #undef RST1D #undef RST1D_kernel #undef K #define K 8 #define RST1D Zgpu(restrict1D8) #define RST1D_kernel Zgpu(restrict1D8_kernel) #include "restrict-stencil.cpp" #undef RST1D #undef RST1D_kernel #undef K extern "C" void Zgpu(bmgs_restrict_stencil_gpu)(int k, Tgpu* a, const int na[3], Tgpu* b, const int nb[3], Tgpu* w, int blocks) { void (*func)(const Tgpu*, int, int, Tgpu*, int, int, int); int ang = na[0] * na[1] * na[2]; int bng = nb[0] * nb[1] * nb[2]; if (k == 2) func = Zgpu(restrict1D2); else if (k == 4) func = Zgpu(restrict1D4); else if (k == 6) func = Zgpu(restrict1D6); else func = Zgpu(restrict1D8); int e = k * 2 - 3; func(a, (na[2] - e) / 2, na[0] * na[1], w, ang, ang, blocks); func(w, (na[1] - e) / 2, na[0] * (na[2] - e) / 2, a, ang, ang, blocks); func(a, (na[0] - e) / 2, (na[1] - e) * (na[2] - e) / 4, b, ang, bng, blocks); } #ifndef GPU_USE_COMPLEX #define GPU_USE_COMPLEX #include "restrict.cpp" #endif gpaw-24.1.0/c/gpu/kernels/translate.cpp000066400000000000000000000042341454550013000177150ustar00rootroot00000000000000#include #include #include #include #include "../gpu.h" #include "../gpu-complex.h" #ifndef GPU_USE_COMPLEX #define BLOCK_MAX 32 #define GRID_MAX 65535 #define BLOCK_TOTALMAX 256 #endif __global__ void Zgpu(bmgs_translate_kernel)( const Tgpu* a, const int3 c_sizea, Tgpu* b, const int3 c_sizeb, #ifdef GPU_USE_COMPLEX gpuDoubleComplex phase, #endif int blocks, int xdiv) { int xx = gridDim.x / xdiv; int yy = gridDim.y / blocks; int blocksi = blockIdx.y / yy; int i1 = (blockIdx.y - blocksi * yy) * blockDim.y + threadIdx.y; int xind = blockIdx.x / xx; int i2 = (blockIdx.x - xind * xx) * blockDim.x + threadIdx.x; b += i2 + (i1 + (xind + blocksi * c_sizea.x) * c_sizea.y) * c_sizea.z; a += i2 + (i1 + (xind + blocksi * c_sizea.x) * c_sizea.y) * c_sizea.z; while (xind < c_sizeb.x) { if ((i2 < c_sizeb.z) && (i1 < c_sizeb.y)) { #ifndef GPU_USE_COMPLEX b[0] = a[0]; #else b[0] = MULTT(phase, a[0]); #endif } b += xdiv * c_sizea.y * c_sizea.z; a += xdiv * c_sizea.y * c_sizea.z; xind += xdiv; } } extern "C" void Zgpu(bmgs_translate_gpu)( Tgpu* a, const int sizea[3], const int size[3], const int start1[3], const int start2[3], #ifdef GPU_USE_COMPLEX gpuDoubleComplex phase, #endif int blocks, gpuStream_t stream) { if (!(size[0] && size[1] && size[2])) return; int3 hc_sizea, hc_size; hc_sizea.x = sizea[0]; hc_sizea.y = sizea[1]; hc_sizea.z = sizea[2]; hc_size.x = size[0]; hc_size.y = size[1]; hc_size.z = size[2]; BLOCK_GRID(hc_size); Tgpu *b = a + start2[2] + (start2[1] + start2[0] * hc_sizea.y) * hc_sizea.z; a += start1[2] + (start1[1] + start1[0] * hc_sizea.y) * hc_sizea.z; gpuLaunchKernel( Zgpu(bmgs_translate_kernel), dimGrid, dimBlock, 0, stream, (Tgpu*) a, hc_sizea, (Tgpu*) b, hc_size, #ifdef GPU_USE_COMPLEX phase, #endif blocks, xdiv); gpuCheckLastError(); } #ifndef GPU_USE_COMPLEX #define GPU_USE_COMPLEX #include "translate.cpp" #endif gpaw-24.1.0/c/gpu/operators.c000066400000000000000000000312131454550013000157300ustar00rootroot00000000000000#include #define PY_ARRAY_UNIQUE_SYMBOL GPAW_ARRAY_API #define NO_IMPORT_ARRAY #include #include #include #include "../extensions.h" #define __OPERATORS_C #include "../operators.h" #undef __OPERATORS_C #include "bmgs.h" #include "gpu.h" #define OPERATOR_NSTREAMS (2) static gpuStream_t operator_stream[OPERATOR_NSTREAMS]; static gpuEvent_t operator_event[2]; static int operator_streams = 0; static double *operator_buf_gpu = NULL; static int operator_buf_size = 0; static int operator_init_count = 0; /* * Increment reference count to register a new operator object * and copy the stencil to the GPU. */ void operator_init_gpu(OperatorObject *self) { self->stencil_gpu = bmgs_stencil_to_gpu(&(self->stencil)); operator_init_count++; } /* * Ensure buffer is allocated and is big enough. Reallocate only if * size has increased. * * Create also GPU streams and events if not already created. */ void operator_alloc_buffers(OperatorObject *self, int blocks) { const boundary_conditions* bc = self->bc; const int* size2 = bc->size2; int ng2 = (bc->ndouble * size2[0] * size2[1] * size2[2]) * blocks; if (ng2 > operator_buf_size) { gpuFree(operator_buf_gpu); gpuCheckLastError(); gpuMalloc(&operator_buf_gpu, sizeof(double) * ng2); operator_buf_size = ng2; } if (!operator_streams) { for (int i=0; i < OPERATOR_NSTREAMS; i++) { gpuStreamCreate(&(operator_stream[i])); } for (int i=0; i < 2; i++) { gpuEventCreateWithFlags( &operator_event[i], gpuEventDefault|gpuEventDisableTiming); } operator_streams = OPERATOR_NSTREAMS; } } /* * Reset reference count and unset buffer. */ void operator_init_buffers_gpu() { operator_buf_gpu = NULL; operator_buf_size = 0; operator_init_count = 0; operator_streams = 0; } /* * Deallocate buffer and destroy GPU streams and events, * or decrease reference count * * arguments: * (int) force -- if true, force deallocation etc. */ void operator_dealloc_gpu(int force) { if (force) { operator_init_count = 1; } if (operator_init_count == 1) { gpuFree(operator_buf_gpu); if (operator_streams) { for (int i=0; i < OPERATOR_NSTREAMS; i++) { gpuStreamSynchronize(operator_stream[i]); gpuStreamDestroy(operator_stream[i]); } for (int i=0; i < 2; i++) { gpuEventDestroy(operator_event[i]); } } operator_init_buffers_gpu(); return; } if (operator_init_count > 0) { operator_init_count--; } } /* * Run the relax algorithm (see Operator_relax() in ../operators.c) * on the GPU. */ static void _operator_relax_gpu(OperatorObject* self, int relax_method, double *fun, const double *src, int nrelax, double w) { boundary_conditions* bc = self->bc; MPI_Request recvreq[3][2]; MPI_Request sendreq[3][2]; const double_complex *ph; ph = 0; int blocks = 1; operator_alloc_buffers(self, blocks); int boundary = 0; if (bc->sendproc[0][0] != DO_NOTHING) boundary |= GPAW_BOUNDARY_X0; if (bc->sendproc[0][1] != DO_NOTHING) boundary |= GPAW_BOUNDARY_X1; if (bc->sendproc[1][0] != DO_NOTHING) boundary |= GPAW_BOUNDARY_Y0; if (bc->sendproc[1][1] != DO_NOTHING) boundary |= GPAW_BOUNDARY_Y1; if (bc->sendproc[2][0] != DO_NOTHING) boundary |= GPAW_BOUNDARY_Z0; if (bc->sendproc[2][1] != DO_NOTHING) boundary |= GPAW_BOUNDARY_Z1; int gpu_overlap = bmgs_fd_boundary_test(&self->stencil_gpu, boundary, bc->ndouble); int nsendrecvs = 0; for (int i=0; i < 3; i++) { for (int j=0; j < 2; j++) { nsendrecvs += MAX(bc->nsend[i][j], bc->nrecv[i][j]) * blocks * sizeof(double); } } gpu_overlap &= (nsendrecvs > GPU_OVERLAP_SIZE); if (gpu_overlap) gpuEventRecord(operator_event[1], 0); for (int n=0; n < nrelax; n++ ) { if (gpu_overlap) { gpuStreamWaitEvent(operator_stream[0], operator_event[1], 0); bc_unpack_paste_gpu(bc, fun, operator_buf_gpu, recvreq, operator_stream[0], 1); gpuEventRecord(operator_event[0], operator_stream[0]); bmgs_relax_gpu(relax_method, &self->stencil_gpu, operator_buf_gpu, fun, src, w, boundary|GPAW_BOUNDARY_SKIP, operator_stream[0]); gpuStreamWaitEvent(operator_stream[1], operator_event[0], 0); for (int i=0; i < 3; i++) { bc_unpack_gpu_async(bc, operator_buf_gpu, i, recvreq, sendreq[i], ph + 2 * i, operator_stream[1], 1); } bmgs_relax_gpu(relax_method, &self->stencil_gpu, operator_buf_gpu, fun, src, w, boundary|GPAW_BOUNDARY_ONLY, operator_stream[1]); gpuEventRecord(operator_event[1], operator_stream[1]); } else { bc_unpack_paste_gpu(bc, fun, operator_buf_gpu, recvreq, 0, 1); for (int i=0; i < 3; i++) { bc_unpack_gpu(bc, operator_buf_gpu, i, recvreq, sendreq[i], ph + 2 * i, 0, 1); } bmgs_relax_gpu(relax_method, &self->stencil_gpu, operator_buf_gpu, fun, src, w, GPAW_BOUNDARY_NORMAL, 0); } } if (gpu_overlap) { gpuStreamWaitEvent(0, operator_event[1], 0); gpuStreamSynchronize(operator_stream[0]); } } /* * Python interface for the GPU version of the relax algorithm * (similar to Operator_relax() for CPUs). * * arguments: * relax_method -- relaxation method (int) * func_gpu -- pointer to device memory (GPUArray.gpudata) * source_gpu -- pointer to device memory (GPUArray.gpudata) * nrelax -- number of iterations (int) * w -- weight (float) */ PyObject* Operator_relax_gpu(OperatorObject* self, PyObject* args) { int relax_method; void *func_gpu; void *source_gpu; double w = 1.0; int nrelax; if (!PyArg_ParseTuple(args, "inni|d", &relax_method, &func_gpu, &source_gpu, &nrelax, &w)) return NULL; double *fun = (double*) func_gpu; const double *src = (double*) source_gpu; _operator_relax_gpu(self, relax_method, fun, src, nrelax, w); if (PyErr_Occurred()) return NULL; else Py_RETURN_NONE; } /* * Run the FD algorithm (see apply_worker() in ../operators.c) * on the GPU. */ static void _operator_apply_gpu(OperatorObject *self, const double *in, double *out, int nin, int blocks, bool real, const double_complex *ph) { boundary_conditions* bc = self->bc; const int *size1 = bc->size1; int ng = bc->ndouble * size1[0] * size1[1] * size1[2]; MPI_Request recvreq[3][2]; MPI_Request sendreq[3][2]; operator_alloc_buffers(self, blocks); int boundary = 0; if (bc->sendproc[0][0] != DO_NOTHING) boundary |= GPAW_BOUNDARY_X0; if (bc->sendproc[0][1] != DO_NOTHING) boundary |= GPAW_BOUNDARY_X1; if (bc->sendproc[1][0] != DO_NOTHING) boundary |= GPAW_BOUNDARY_Y0; if (bc->sendproc[1][1] != DO_NOTHING) boundary |= GPAW_BOUNDARY_Y1; if (bc->sendproc[2][0] != DO_NOTHING) boundary |= GPAW_BOUNDARY_Z0; if (bc->sendproc[2][1] != DO_NOTHING) boundary |= GPAW_BOUNDARY_Z1; int gpu_overlap = bmgs_fd_boundary_test(&self->stencil_gpu, boundary, bc->ndouble); int nsendrecvs = 0; for (int i=0; i < 3; i++) { for (int j=0; j < 2; j++) { nsendrecvs += MAX(bc->nsend[i][j], bc->nrecv[i][j]) * blocks * sizeof(double); } } gpu_overlap &= (nsendrecvs > GPU_OVERLAP_SIZE); if (gpu_overlap) gpuEventRecord(operator_event[1], 0); for (int n=0; n < nin; n += blocks) { const double *in2 = in + n * ng; double *out2 = out + n * ng; int myblocks = MIN(blocks, nin - n); if (gpu_overlap) { gpuStreamWaitEvent(operator_stream[0], operator_event[1], 0); bc_unpack_paste_gpu(bc, in2, operator_buf_gpu, recvreq, operator_stream[0], myblocks); gpuEventRecord(operator_event[0], operator_stream[0]); if (real) { bmgs_fd_gpu(&self->stencil_gpu, operator_buf_gpu, out2, boundary|GPAW_BOUNDARY_SKIP, myblocks, operator_stream[0]); } else { bmgs_fd_gpuz(&self->stencil_gpu, (const gpuDoubleComplex*) operator_buf_gpu, (gpuDoubleComplex*) out2, boundary|GPAW_BOUNDARY_SKIP, myblocks, operator_stream[0]); } gpuStreamWaitEvent(operator_stream[1], operator_event[0], 0); for (int i=0; i < 3; i++) { bc_unpack_gpu_async(bc, operator_buf_gpu, i, recvreq, sendreq[i], ph + 2 * i, operator_stream[1], myblocks); } if (real) { bmgs_fd_gpu(&self->stencil_gpu, operator_buf_gpu, out2, boundary|GPAW_BOUNDARY_ONLY, myblocks, operator_stream[1]); } else { bmgs_fd_gpuz(&self->stencil_gpu, (const gpuDoubleComplex*) operator_buf_gpu, (gpuDoubleComplex*) out2, boundary|GPAW_BOUNDARY_ONLY, myblocks, operator_stream[1]); } gpuEventRecord(operator_event[1], operator_stream[1]); } else { bc_unpack_paste_gpu(bc, in2, operator_buf_gpu, recvreq, 0, myblocks); for (int i=0; i < 3; i++) { bc_unpack_gpu(bc, operator_buf_gpu, i, recvreq, sendreq[i], ph + 2 * i, 0, myblocks); } if (real) { bmgs_fd_gpu(&self->stencil_gpu, operator_buf_gpu, out2, GPAW_BOUNDARY_NORMAL, myblocks, 0); } else { bmgs_fd_gpuz(&self->stencil_gpu, (const gpuDoubleComplex*) (operator_buf_gpu), (gpuDoubleComplex*) out2, GPAW_BOUNDARY_NORMAL, myblocks, 0); } } } if (gpu_overlap) { gpuStreamWaitEvent(0, operator_event[1], 0); gpuStreamSynchronize(operator_stream[0]); } } /* * Python interface for the GPU version of the FD algorithm * (similar to Operator_apply() for CPUs). * * arguments: * input_gpu -- pointer to device memory (GPUArray.gpudata) * output_gpu -- pointer to device memory (GPUArray.gpudata) * shape -- shape of the array (tuple) * type -- datatype of array elements * phases -- phase (complex) (ignored if type is NPY_DOUBLE) */ PyObject * Operator_apply_gpu(OperatorObject* self, PyObject* args) { PyArrayObject* phases = 0; void *input_gpu; void *output_gpu; PyObject *shape; PyArray_Descr *type; if (!PyArg_ParseTuple(args, "nnOO|O", &input_gpu, &output_gpu, &shape, &type, &phases)) return NULL; int nin = 1; if (PyTuple_Size(shape) == 4) nin = (int) PyLong_AsLong(PyTuple_GetItem(shape, 0)); const double *in = (double*) input_gpu; double *out = (double*) output_gpu; bool real = (type->type_num == NPY_DOUBLE); const double_complex *ph; if (real) ph = 0; else ph = COMPLEXP(phases); boundary_conditions* bc = self->bc; int mpi_size = 1; if ((bc->maxsend || bc->maxrecv) && bc->comm != MPI_COMM_NULL) { MPI_Comm_size(bc->comm, &mpi_size); } int blocks = MAX(1, MIN(nin, MIN((GPU_BLOCKS_MIN) * mpi_size, (GPU_BLOCKS_MAX) / bc->ndouble))); _operator_apply_gpu(self, in, out, nin, blocks, real, ph); if (PyErr_Occurred()) return NULL; else Py_RETURN_NONE; } gpaw-24.1.0/c/gpu/pwlfc-expand.c000066400000000000000000000244531454550013000163120ustar00rootroot00000000000000#include "../extensions.h" #define GPAW_ARRAY_DISABLE_NUMPY #define GPAW_ARRAY_ALLOW_CUPY #include "../array.h" #undef GPAW_ARRAY_DISABLE_NUMPY #include "gpu.h" #include "gpu-complex.h" #include void calculate_residual_launch_kernel(int nG, int nn, double* residual_ng, double* eps_n, double* wf_nG, int is_complex); void pwlfc_expand_gpu_launch_kernel(int itemsize, double* f_Gs, gpuDoubleComplex *emiGR_Ga, double *Y_GL, int* l_s, int* a_J, int* s_J, double* f_GI, int* I_J, int nG, int nJ, int nL, int nI, int natoms, int nsplines, bool cc); void pw_insert_gpu_launch_kernel( int nb, int nG, int nQ, double* c_nG, npy_int32* Q_G, double scale, double* tmp_nQ); void add_to_density_gpu_launch_kernel(int nb, int nR, double* f_n, double complex* psit_nR, double* rho_R, int wfs_is_complex); void dH_aii_times_P_ani_launch_kernel(int nA, int nn, int nI, npy_int32* ni_a, double* dH_aii_dev, gpuDoubleComplex* P_ani_dev, gpuDoubleComplex* outP_ani_dev, int is_complex); void evaluate_pbe_launch_kernel(int nspin, int ng, double* n, double* v, double* e, double* sigma, double* dedsigma); void evaluate_lda_launch_kernel(int nspin, int ng, double* n, double* v, double* e); PyObject* evaluate_lda_gpu(PyObject* self, PyObject* args) { PyObject* n_obj; PyObject* v_obj; PyObject* e_obj; if (!PyArg_ParseTuple(args, "OOO", &n_obj, &v_obj, &e_obj)) return NULL; int nspin = Array_DIM(n_obj, 0); if ((nspin != 1) && (nspin != 2)) { PyErr_Format(PyExc_RuntimeError, "Expected 1 or 2 spins. Got %d.", nspin); return NULL; } int ng = 1; for (int d=1; d #include #include void bc_init_buffers_gpu(); void blas_init_gpu(); void transformer_init_buffers_gpu(); void operator_init_buffers_gpu(); void reduce_init_buffers_gpu(); void lfc_reduce_init_buffers_gpu(); void bc_dealloc_gpu(int force); void transformer_dealloc_gpu(int force); void operator_dealloc_gpu(int force); void reduce_dealloc_gpu(); void lfc_reduce_dealloc_gpu(); PyObject* gpaw_gpu_init(PyObject *self, PyObject *args) { if (!PyArg_ParseTuple(args, "")) return NULL; bc_init_buffers_gpu(); transformer_init_buffers_gpu(); operator_init_buffers_gpu(); reduce_init_buffers_gpu(); lfc_reduce_init_buffers_gpu(); blas_init_gpu(); if (PyErr_Occurred()) return NULL; else Py_RETURN_NONE; } PyObject* gpaw_gpu_delete(PyObject *self, PyObject *args) { if (!PyArg_ParseTuple(args, "")) return NULL; reduce_dealloc_gpu(); lfc_reduce_dealloc_gpu(); bc_dealloc_gpu(1); transformer_dealloc_gpu(1); operator_dealloc_gpu(1); if (PyErr_Occurred()) return NULL; else Py_RETURN_NONE; } gpaw-24.1.0/c/gpu/transformers.c000066400000000000000000000174021454550013000164430ustar00rootroot00000000000000#include #define PY_ARRAY_UNIQUE_SYMBOL GPAW_ARRAY_API #define NO_IMPORT_ARRAY #include #include #include #include "../extensions.h" #define __TRANSFORMERS_C #include "../transformers.h" #undef __TRANSFORMERS_C #include "bmgs.h" #include "gpu.h" static double *transformer_buf_gpu = NULL; static double *transformer_buf16_gpu = NULL; static int transformer_buf_size = 0; static int transformer_init_count = 0; /* * Increment reference count to register a new tranformer object. */ void transformer_init_gpu(TransformerObject *self) { transformer_init_count++; } /* * Ensure buffer is allocated and is big enough. Reallocate only if * size has increased. */ void transformer_init_buffers(TransformerObject *self, int blocks) { const boundary_conditions* bc = self->bc; const int* size2 = bc->size2; int ng2 = (bc->ndouble * size2[0] * size2[1] * size2[2]) * blocks; if (ng2 > transformer_buf_size) { gpuFree(transformer_buf_gpu); gpuCheckLastError(); gpuMalloc(&transformer_buf_gpu, sizeof(double) * ng2); gpuFree(transformer_buf16_gpu); gpuCheckLastError(); gpuMalloc(&transformer_buf16_gpu, sizeof(double) * ng2 * 16); transformer_buf_size = ng2; } } /* * Reset reference count and unset buffer. */ void transformer_init_buffers_gpu() { transformer_buf_gpu = NULL; transformer_buf16_gpu = NULL; transformer_buf_size = 0; transformer_init_count = 0; } /* * Deallocate buffer or decrease reference count. * * arguments: * (int) force -- if true, force deallocation */ void transformer_dealloc_gpu(int force) { if (force) transformer_init_count = 1; if (transformer_init_count == 1) { gpuFree(transformer_buf_gpu); gpuCheckLastError(); transformer_init_buffers_gpu(); return; } if (transformer_init_count > 0) transformer_init_count--; } /* * Run the interpolate and restrict algorithm (see transapply_worker() * in ../transformers.c) on the GPU. */ static void _transformer_apply_gpu(TransformerObject* self, const double *in, double *out, int nin, int blocks, bool real, const double_complex *ph, bool stencil) { boundary_conditions* bc = self->bc; const int* size1 = bc->size1; int ng = bc->ndouble * size1[0] * size1[1] * size1[2]; int out_ng = bc->ndouble * self->size_out[0] * self->size_out[1] * self->size_out[2]; int mpi_size = 1; if ((bc->maxsend || bc->maxrecv) && bc->comm != MPI_COMM_NULL) MPI_Comm_size(bc->comm, &mpi_size); MPI_Request recvreq[3][2]; MPI_Request sendreq[3][2]; transformer_init_buffers(self, blocks); double* buf = transformer_buf_gpu; double* buf16 = transformer_buf16_gpu; /* use stencil version, if no optimised kernel available */ if (self->k != 2) { stencil = 1; } for (int n = 0; n < nin; n += blocks) { const double* in2 = in + n * ng; double* out2 = out + n * out_ng; int myblocks = MIN(blocks, nin - n); bc_unpack_paste_gpu(bc, in2, buf, recvreq, 0, myblocks); for (int i=0; i < 3; i++) { bc_unpack_gpu(bc, buf, i, recvreq, sendreq[i], ph + 2 * i, 0, myblocks); } if (self->interpolate) { if (stencil) { if (real) { bmgs_interpolate_stencil_gpu(self->k, self->skip, buf, bc->size2, out2, self->size_out, buf16, myblocks); } else { bmgs_interpolate_stencil_gpuz(self->k, self->skip, (gpuDoubleComplex*) (buf), bc->size2, (gpuDoubleComplex*) (out2), self->size_out, (gpuDoubleComplex*) (buf16), myblocks); } } else { if (real) { bmgs_interpolate_gpu(self->k, self->skip, buf, bc->size2, out2, self->size_out, myblocks); } else { bmgs_interpolate_gpuz(self->k, self->skip, (gpuDoubleComplex*) (buf), bc->size2, (gpuDoubleComplex*) (out2), self->size_out, myblocks); } } } else { if (stencil) { if (real) { bmgs_restrict_stencil_gpu(self->k, buf, bc->size2, out2, self->size_out, buf16, myblocks); } else { bmgs_restrict_stencil_gpuz(self->k, (gpuDoubleComplex*) (buf), bc->size2, (gpuDoubleComplex*) (out2), self->size_out, (gpuDoubleComplex*) (buf16), myblocks); } } else { if (real) { bmgs_restrict_gpu(self->k, buf, bc->size2, out2, self->size_out, myblocks); } else { bmgs_restrict_gpuz(self->k, (gpuDoubleComplex*) (buf), bc->size2, (gpuDoubleComplex*) (out2), self->size_out, myblocks); } } } } } /* * Python interface for the GPU version of the interpolate and restrict * algorithm (similar to Transformer_apply() for CPUs). * * arguments: * input_gpu -- pointer to device memory (GPUArray.gpudata) * output_gpu -- pointer to device memory (GPUArray.gpudata) * shape -- shape of the array (tuple) * type -- datatype of array elements * phases -- phase (complex) (ignored if type is NPY_DOUBLE) * stencil -- use stencil version of interpolate functions */ PyObject* Transformer_apply_gpu(TransformerObject *self, PyObject *args) { PyArrayObject* phases = 0; void *input_gpu; void *output_gpu; PyObject *shape; PyArray_Descr *type; int stencil = 0; if (!PyArg_ParseTuple(args, "nnOO|Oi", &input_gpu, &output_gpu, &shape, &type, &phases, &stencil)) return NULL; int nin = 1; if (PyTuple_Size(shape) == 4) nin = (int) PyLong_AsLong(PyTuple_GetItem(shape, 0)); const double* in = (double*) input_gpu; double* out = (double*) output_gpu; bool real = (type->type_num == NPY_DOUBLE); const double_complex* ph = (real ? 0 : COMPLEXP(phases)); boundary_conditions* bc = self->bc; int mpi_size = 1; if ((bc->maxsend || bc->maxrecv) && bc->comm != MPI_COMM_NULL) MPI_Comm_size(bc->comm, &mpi_size); int blocks = MAX(1, MIN(nin, MIN((GPU_BLOCKS_MIN) * mpi_size, (GPU_BLOCKS_MAX) / bc->ndouble))); _transformer_apply_gpu(self, in, out, nin, blocks, real, ph, stencil); if (PyErr_Occurred()) return NULL; else Py_RETURN_NONE; } gpaw-24.1.0/c/lcao.c000066400000000000000000000136051454550013000140420ustar00rootroot00000000000000/* Copyright (C) 2003-2007 CAMP * Copyright (C) 2007-2009 CAMd * Please see the accompanying LICENSE file for further information. */ #include "extensions.h" #include "bmgs/bmgs.h" #include "spline.h" #include // +-----------n // +----m +----m | +----c+m | // | | | | | | | | // | b | = | v | * | | a | | // | | | | | | | | // 0----+ 0----+ | c----+ | // | | // 0-----------+ void cut(const double* a, const int n[3], const int c[3], const double* v, double* b, const int m[3]) { a += c[2] + (c[1] + c[0] * n[1]) * n[2]; for (int i0 = 0; i0 < m[0]; i0++) { for (int i1 = 0; i1 < m[1]; i1++) { for (int i2 = 0; i2 < m[2]; i2++) b[i2] = v[i2] * a[i2]; a += n[2]; b += m[2]; v += m[2]; } a += n[2] * (n[1] - m[1]); } } PyObject *tci_overlap(PyObject *self, PyObject *args) { /* Calculate two-center integral overlaps: -- -- l _ X = > s (r) > G r Y (r) LL' -- l -- LL'L'' L'' l L'' or derivatives / dX \ ^ -- -- l _ | -- | = R > s'(r) > G r Y (r) \ dR /LL' -- l -- LL'L'' L'' l L'' l _ -- -- / d r Y(r) \ + > s (r) > G | ----- | , -- l -- LL'L'' \ dR /L'' l L'' ^ where dR denotes movement of one of the centers and R is a unit vector parallel to the displacement vector r. Without derivatives, Rhat_c_obj, drLYdR_Lc_obj, and dxdR_cmi_obj must still be numpy arrays but are otherwise ignored (may have size 0). With derivatives, x_mi_obj can be likewise ignored. */ int la, lb; PyArrayObject *G_LLL_obj; PyObject *spline_l; double r; PyArrayObject *rlY_L_obj, *x_mi_obj; int is_derivative; PyArrayObject *Rhat_c_obj, *drlYdR_Lc_obj, *dxdR_cmi_obj; if (!PyArg_ParseTuple(args, "iiOOdOOiOOO", &la, &lb, &G_LLL_obj, &spline_l, &r, &rlY_L_obj, &x_mi_obj, &is_derivative, &Rhat_c_obj, &drlYdR_Lc_obj, &dxdR_cmi_obj)) return NULL; SplineObject *spline_obj; bmgsspline *spline; double *x_mi = (double *) PyArray_DATA(x_mi_obj); double *G_LLL = (double *) PyArray_DATA(G_LLL_obj); double *rlY_L = (double *) PyArray_DATA(rlY_L_obj); double *Rhat_c = (double *) PyArray_DATA(Rhat_c_obj); double *drlYdR_Lc = (double *) PyArray_DATA(drlYdR_Lc_obj); double *dxdR_cmi = (double *) PyArray_DATA(dxdR_cmi_obj); int Lastart = la * la; int Lbstart = lb * lb; int l = (la + lb) % 2; int nsplines = PyList_Size(spline_l); int ispline; int itemsize = PyArray_ITEMSIZE(G_LLL_obj); npy_intp *strides = PyArray_STRIDES(G_LLL_obj); npy_intp *xstrides = PyArray_STRIDES(x_mi_obj); int stride0 = strides[0] / itemsize; int stride1 = strides[1] / itemsize; int xstride = xstrides[0] / itemsize; G_LLL += Lastart * stride0 + Lbstart * stride1; for(ispline=0; ispline < nsplines; ispline++, l+=2) { int Lstart = l * l; spline_obj = (SplineObject*)PyList_GET_ITEM(spline_l, ispline); spline = &spline_obj->spline; double s, dsdr; if(is_derivative) { bmgs_get_value_and_derivative(spline, r, &s, &dsdr); } else { s = bmgs_splinevalue(spline, r); } if(fabs(s) < 1e-10) { continue; } int nm1 = 2 * la + 1; int nm2 = 2 * lb + 1; int m1, m2, L; int nL = 2 * l + 1; double srlY_L[2 * l + 1]; // Variable but very small alloc on stack for(L=0; L < nL; L++) { srlY_L[L] = s * rlY_L[Lstart + L]; } if(!is_derivative) { for(m1=0; m1 < nm1; m1++) { for(m2=0; m2 < nm2; m2++) { double x = 0.0; for(L=0; L < nL; L++) { x += G_LLL[stride0 * m1 + stride1 * m2 + Lstart + L] * srlY_L[L]; } x_mi[m1 * xstride + m2] += x; } } continue; } // Derivative only int c; npy_intp *dxdRstrides = PyArray_STRIDES(dxdR_cmi_obj); int dxdRstride0 = dxdRstrides[0] / itemsize; int dxdRstride1 = dxdRstrides[1] / itemsize; double dsdr_Rhat_c[3]; for(c=0; c < 3; c++) { dsdr_Rhat_c[c] = dsdr * Rhat_c[c]; } double s_drlYdR_Lc[nL * 3]; for(L=0; L < nL; L++) { for(c=0; c < 3; c++) { s_drlYdR_Lc[L * 3 + c] = s * drlYdR_Lc[(Lstart + L) * 3 + c]; } } // This loop can probably be written a lot better, but it turns out // it is so fast that we need not worry for a long time. for(m1=0; m1 < nm1; m1++) { for(m2=0; m2 < nm2; m2++) { double GrlY_mi = 0.0; for(L=0; L < nL; L++) { GrlY_mi += G_LLL[stride0 * m1 + stride1 * m2 + Lstart + L] * rlY_L[Lstart + L]; } for(c=0; c < 3; c++) { double derivative = 0.0; derivative += dsdr_Rhat_c[c] * GrlY_mi; for(L=0; L < nL; L++) { derivative += G_LLL[stride0 * m1 + stride1 * m2 + Lstart + L] * s_drlYdR_Lc[L * 3 + c]; } dxdR_cmi[dxdRstride0 * c + dxdRstride1 * m1 + m2] += derivative; } } } } Py_RETURN_NONE; } gpaw-24.1.0/c/lfc.c000066400000000000000000001636761454550013000137060ustar00rootroot00000000000000/* Copyright (C) 2003-2007 CAMP * Copyright (C) 2007-2009 CAMd * Copyright (C) 2010,2020 CSC - IT Center for Science Ltd. * Please see the accompanying LICENSE file for further information. */ #include #define PY_ARRAY_UNIQUE_SYMBOL GPAW_ARRAY_API #define NO_IMPORT_ARRAY #include #include "spline.h" #include "lfc.h" #include "bmgs/spherical_harmonics.h" #include "bmgs/bmgs.h" #ifdef _OPENMP #include #endif #ifdef GPAW_NO_UNDERSCORE_BLAS # define zgemm_ zgemm #endif #ifndef GPAW_WITHOUT_BLAS void zgemm_(char *transa, char *transb, int *m, int * n, int *k, void *alpha, void *a, int *lda, const void *b, int *ldb, void *beta, void *c, int *ldc); #define myzgemm zgemm_ #else void myzgemm(char *transa, char *transb, int *m, int * n, int *k, void *alpha, void *a, int *lda, const void *b, int *ldb, void *beta, void *c, int *ldc) { const double complex *a_GM = a; const double complex *b_xM = b; double complex *c_xG = c; for (int x = 0; x < *n; x++) for (int G = 0; G < *m; G++) for (int M = 0; M < *k; M++) c_xG[*ldc * x + G] += (conj(a_GM[*lda * G + M]) * b_xM[*ldb * x + M]); } #endif #ifdef GPAW_GPU void lfc_dealloc_gpu(LFCObject *self); PyObject* NewLFCObject_gpu(LFCObject *self, PyObject *args); PyObject* add_gpu(LFCObject *self, PyObject *args); PyObject* integrate_gpu(LFCObject *self, PyObject *args); #endif static void lfc_dealloc(LFCObject *self) { if (self->bloch_boundary_conditions) free(self->phase_i); free(self->volume_i); free(self->work_gm); free(self->ngm_W); free(self->i_W); free(self->volume_W); #ifdef GPAW_GPU if (self->use_gpu) { lfc_dealloc_gpu(self); } #endif PyObject_DEL(self); } PyObject* calculate_potential_matrix(LFCObject *self, PyObject *args); PyObject* calculate_potential_matrices(LFCObject *self, PyObject *args); PyObject* lfcintegrate(LFCObject *self, PyObject *args); PyObject* derivative(LFCObject *self, PyObject *args); PyObject* normalized_derivative(LFCObject *self, PyObject *args); PyObject* construct_density(LFCObject *self, PyObject *args); PyObject* construct_density1(LFCObject *self, PyObject *args); PyObject* ae_valence_density_correction(LFCObject *self, PyObject *args); PyObject* ae_core_density_correction(LFCObject *self, PyObject *args); PyObject* lcao_to_grid(LFCObject *self, PyObject *args); PyObject* lcao_to_grid_k(LFCObject *self, PyObject *args); PyObject* add(LFCObject *self, PyObject *args); PyObject* calculate_potential_matrix_derivative(LFCObject *self, PyObject *args); PyObject* calculate_potential_matrix_force_contribution(LFCObject *self, PyObject *args); PyObject* second_derivative(LFCObject *self, PyObject *args); PyObject* add_derivative(LFCObject *self, PyObject *args); static PyMethodDef lfc_methods[] = { {"calculate_potential_matrix", (PyCFunction)calculate_potential_matrix, METH_VARARGS, 0}, {"calculate_potential_matrices", (PyCFunction)calculate_potential_matrices, METH_VARARGS, 0}, {"integrate", (PyCFunction)lfcintegrate, METH_VARARGS, 0}, {"derivative", (PyCFunction)derivative, METH_VARARGS, 0}, {"normalized_derivative", (PyCFunction)normalized_derivative, METH_VARARGS, 0}, {"construct_density", (PyCFunction)construct_density, METH_VARARGS, 0}, {"construct_density1", (PyCFunction)construct_density1, METH_VARARGS, 0}, {"ae_valence_density_correction", (PyCFunction)ae_valence_density_correction, METH_VARARGS, 0}, {"ae_core_density_correction", (PyCFunction)ae_core_density_correction, METH_VARARGS, 0}, {"lcao_to_grid", (PyCFunction)lcao_to_grid, METH_VARARGS, 0}, {"lcao_to_grid_k", (PyCFunction)lcao_to_grid_k, METH_VARARGS, 0}, {"add", (PyCFunction)add, METH_VARARGS, 0}, {"calculate_potential_matrix_derivative", (PyCFunction)calculate_potential_matrix_derivative, METH_VARARGS, 0}, {"calculate_potential_matrix_force_contribution", (PyCFunction)calculate_potential_matrix_force_contribution, METH_VARARGS, 0}, {"second_derivative", (PyCFunction)second_derivative, METH_VARARGS, 0}, {"add_derivative", (PyCFunction)add_derivative, METH_VARARGS, 0}, #ifdef GPAW_GPU {"integrate_gpu", (PyCFunction)integrate_gpu, METH_VARARGS, 0}, {"add_gpu", (PyCFunction)add_gpu, METH_VARARGS, 0}, #endif {NULL, NULL, 0, NULL} }; PyTypeObject LFCType = { PyVarObject_HEAD_INIT(NULL, 0) "LocalizedFunctionsCollection", sizeof(LFCObject), 0, (destructor)lfc_dealloc, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, "LFC object", 0, 0, 0, 0, 0, 0, lfc_methods }; PyObject * NewLFCObject(PyObject *obj, PyObject *args) { PyObject* A_Wgm_obj; PyArrayObject* M_W_obj; PyArrayObject* G_B_obj; PyArrayObject* W_B_obj; double dv; PyArrayObject* phase_kW_obj; int use_gpu = 0; if (!PyArg_ParseTuple(args, "OOOOdO|i", &A_Wgm_obj, &M_W_obj, &G_B_obj, &W_B_obj, &dv, &phase_kW_obj, &use_gpu)) return NULL; LFCObject *self = PyObject_NEW(LFCObject, &LFCType); if (self == NULL) return NULL; #ifdef GPAW_GPU self->use_gpu = use_gpu; #endif self->dv = dv; const int* M_W = (const int*)PyArray_DATA(M_W_obj); self->G_B = (int*)PyArray_DATA(G_B_obj); self->W_B = (int*)PyArray_DATA(W_B_obj); if (PyArray_DIMS(phase_kW_obj)[0] > 0) { self->bloch_boundary_conditions = true; self->phase_kW = (double complex*)PyArray_DATA(phase_kW_obj); } else { self->bloch_boundary_conditions = false; } int nB = PyArray_DIMS(G_B_obj)[0]; int nW = PyList_Size(A_Wgm_obj); self->nW = nW; self->nB = nB; int nimax = 0; int ngmax = 0; int ni = 0; int Ga = 0; for (int B = 0; B < nB; B++) { int Gb = self->G_B[B]; int nG = Gb - Ga; if (ni > 0 && nG > ngmax) ngmax = nG; if (self->W_B[B] >= 0) ni += 1; else { if (ni > nimax) nimax = ni; ni--; } Ga = Gb; } self->nimax = nimax; assert(ni == 0); #ifdef _OPENMP int nthreads; #pragma omp parallel { nthreads = omp_get_num_threads(); } #else int nthreads = 1; #endif // we need private volume for each thread. // (Constant) data is same for all threads self->volume_W = GPAW_MALLOC(LFVolume, nthreads * nW); self->i_W = GPAW_MALLOC(int, nthreads * nW); self->ngm_W = GPAW_MALLOC(int, nW); self->nimax = nimax; int nmmax = 0; for (int W = 0; W < nW; W++) { PyArrayObject* A_gm_obj = (PyArrayObject*)PyList_GetItem(A_Wgm_obj, W); self->ngm_W[W] = PyArray_DIMS(A_gm_obj)[0] * PyArray_DIMS(A_gm_obj)[1]; for (int i = 0; i < nthreads; i++) { LFVolume* volume = &self->volume_W[W + i * nW]; volume->A_gm = (const double*)PyArray_DATA(A_gm_obj); volume->nm = PyArray_DIMS(A_gm_obj)[1]; volume->M = M_W[W]; volume->W = W; if (volume->nm > nmmax) nmmax = volume->nm; } } self->work_gm = GPAW_MALLOC(double, ngmax * nmmax); self->volume_i = GPAW_MALLOC(LFVolume *, nthreads * nimax); self->phase_i = NULL; if (self->bloch_boundary_conditions) { self->phase_i = GPAW_MALLOC(complex double, nthreads * nimax); } /* Zero volume_i just in case since it will contain pointers to volume_W. */ memset(self->volume_i, 0, sizeof(LFVolume *) * nthreads * nimax); #ifdef GPAW_GPU if (use_gpu) { NewLFCObject_gpu(self, args); } #endif return (PyObject*)self; } PyObject* calculate_potential_matrix(LFCObject *lfc, PyObject *args) { PyArrayObject* vt_G_obj; PyArrayObject* Vt_MM_obj; int k; int Mstart; int Mstop; if (!PyArg_ParseTuple(args, "OOiii", &vt_G_obj, &Vt_MM_obj, &k, &Mstart, &Mstop)) return NULL; const double* vt_G = (const double*)PyArray_DATA(vt_G_obj); int nM = PyArray_DIMS(Vt_MM_obj)[1]; double dv = lfc->dv; double* work_gm = lfc->work_gm; if (!lfc->bloch_boundary_conditions) { double* Vt_MM = (double*)PyArray_DATA(Vt_MM_obj); GRID_LOOP_START(lfc, -1, 0) { // ORDINARY/GAMMA-POINT for (int i1 = 0; i1 < ni; i1++) { LFVolume* v1 = volume_i[i1]; int M1 = v1->M; int nm1 = v1->nm; int M1p = MAX(M1, Mstart); int nm1p = MIN(M1 + nm1, Mstop) - M1p; if (nm1p <= 0) continue; int gm = M1p - M1; int gm1 = 0; const double* A1_gm = v1->A_gm; for (int G = Ga; G < Gb; G++, gm += nm1 - nm1p) { double vtdv = vt_G[G] * dv; for (int m1 = 0; m1 < nm1p; m1++, gm1++, gm++) work_gm[gm1] = vtdv * A1_gm[gm]; } for (int i2 = 0; i2 < ni; i2++) { LFVolume* v2 = volume_i[i2]; int M2 = v2->M; if (M1 >= M2) { int nm2 = v2->nm; const double* A2_gm = v2->A_gm; double* Vt_mm = Vt_MM + (M1p - Mstart) * nM + M2; for (int g = 0; g < nG; g++){ int gnm1 = g * nm1p; int gnm2 = g * nm2; for (int m1 = 0; m1 < nm1p; m1++) { int m1nM = m1 * nM; for (int m2 = 0; m2 < nm2; m2++) Vt_mm[m2 + m1nM] += A2_gm[gnm2 + m2] * work_gm[gnm1 + m1]; } } } } } } GRID_LOOP_STOP(lfc, -1, 0); } else { complex double* Vt_MM = (complex double*)PyArray_DATA(Vt_MM_obj); GRID_LOOP_START(lfc, k, 0) { // KPOINT CALC POT MATRIX for (int i1 = 0; i1 < ni; i1++) { LFVolume* v1 = volume_i[i1]; double complex conjphase1 = conj(phase_i[i1]); int M1 = v1->M; int nm1 = v1->nm; int M1p = MAX(M1, Mstart); int nm1p = MIN(M1 + nm1, Mstop) - M1p; if (nm1p <= 0) continue; int gm = M1p - M1; int gm1 = 0; const double* A1_gm = v1->A_gm; for (int G = Ga; G < Gb; G++, gm += nm1 - nm1p) { double vtdv = vt_G[G] * dv; for (int m1 = 0; m1 < nm1p; m1++, gm1++, gm++) work_gm[gm1] = vtdv * A1_gm[gm]; } for (int i2 = 0; i2 < ni; i2++) { LFVolume* v2 = volume_i[i2]; const double* A2_gm = v2->A_gm; int M2 = v2->M; if (M1 >= M2) { int nm2 = v2->nm; double complex phase = conjphase1 * phase_i[i2]; double complex* Vt_mm = Vt_MM + (M1p - Mstart) * nM + M2; for (int g = 0; g < nG; g++) { int gnm1 = g * nm1p; int gnm2 = g * nm2; int m1nM = 0; for (int m1 = 0; m1 < nm1p; m1++, m1nM += nM) { complex double wphase = work_gm[gnm1 + m1] * phase; for (int m2 = 0; m2 < nm2; m2++) { Vt_mm[m1nM + m2] += A2_gm[gnm2 + m2] * wphase; } } } } } } } GRID_LOOP_STOP(lfc, k, 0); } Py_RETURN_NONE; } PyObject* calculate_potential_matrices(LFCObject *lfc, PyObject *args) { PyArrayObject* vt_G_obj; PyArrayObject* Vt_xMM_obj; PyArrayObject* x_W_obj; int Mstart; int Mstop; if (!PyArg_ParseTuple(args, "OOOii", &vt_G_obj, &Vt_xMM_obj, &x_W_obj, &Mstart, &Mstop)) return NULL; const double* vt_G = (const double*)PyArray_DATA(vt_G_obj); int nM = PyArray_DIMS(Vt_xMM_obj)[2]; double dv = lfc->dv; double* work_gm = lfc->work_gm; double* Vt_xMM = (double*)PyArray_DATA(Vt_xMM_obj); int* x_W = (int*)PyArray_DATA(x_W_obj); GRID_LOOP_START(lfc, -1, 0) { for (int i1 = 0; i1 < ni; i1++) { LFVolume* v1 = volume_i[i1]; int M1 = v1->M; int nm1 = v1->nm; int M1p = MAX(M1, Mstart); int nm1p = MIN(M1 + nm1, Mstop) - M1p; if (nm1p <= 0) continue; int x1 = x_W[v1->W]; int gm = M1p - M1; int gm1 = 0; const double* A1_gm = v1->A_gm; for (int G = Ga; G < Gb; G++, gm += nm1 - nm1p) { double vtdv = vt_G[G] * dv; for (int m1 = 0; m1 < nm1p; m1++, gm1++, gm++) work_gm[gm1] = vtdv * A1_gm[gm]; } for (int i2 = 0; i2 < ni; i2++) { LFVolume* v2 = volume_i[i2]; int x = x_W[v2->W] - x1; if (x >= 0) { int M2 = v2->M; int nm2 = v2->nm; const double* A2_gm = v2->A_gm; double* Vt_mm = (Vt_xMM + (M1p - Mstart) * nM + M2 + x * (Mstop - Mstart) * nM); for (int g = 0; g < nG; g++) { int gnm1 = g * nm1p; int gnm2 = g * nm2; for (int m1 = 0; m1 < nm1p; m1++) { int m1nM = m1 * nM; for (int m2 = 0; m2 < nm2; m2++) Vt_mm[m2 + m1nM] += (A2_gm[gnm2 + m2] * work_gm[gnm1 + m1]); } } } } } } GRID_LOOP_STOP(lfc, -1, 0); Py_RETURN_NONE; } PyObject* lfcintegrate(LFCObject *lfc, PyObject *args) { PyArrayObject* a_xG_obj; PyArrayObject* c_xM_obj; int q; if (!PyArg_ParseTuple(args, "OOi", &a_xG_obj, &c_xM_obj, &q)) return NULL; int nd = PyArray_NDIM(a_xG_obj); npy_intp* dims = PyArray_DIMS(a_xG_obj); int nx = PyArray_MultiplyList(dims, nd - 3); int nG = PyArray_MultiplyList(dims + nd - 3, 3); int nM = PyArray_DIMS(c_xM_obj)[PyArray_NDIM(c_xM_obj) - 1]; double dv = lfc->dv; #pragma omp parallel { int thread_id; #ifdef _OPENMP thread_id = omp_get_thread_num(); #else thread_id = 0; #endif if (!lfc->bloch_boundary_conditions) { #pragma omp for schedule(static) for (int x = 0; x < nx; x++) { const double* a_G = (const double*)PyArray_DATA(a_xG_obj) + x * nG; double* c_M = (double*)PyArray_DATA(c_xM_obj) + x * nM; GRID_LOOP_START(lfc, -1, thread_id) { for (int i = 0; i < ni; i++) { LFVolume* v = volume_i[i]; const double* A_gm = v->A_gm; int nm = v->nm; double* c_M1 = c_M + v->M; for (int gm = 0, G = Ga; G < Gb; G++){ double av = a_G[G] * dv; for (int m = 0; m < nm; m++, gm++){ c_M1[m] += av * A_gm[gm]; } } } } GRID_LOOP_STOP(lfc, -1, thread_id); } } else { #pragma omp for schedule(static) for (int x = 0; x < nx; x++) { const complex double* a_G = (const complex double*)PyArray_DATA(a_xG_obj) + x * nG; complex double* c_M = (complex double*)PyArray_DATA(c_xM_obj) + x * nM; GRID_LOOP_START(lfc, q, thread_id) { for (int i = 0; i < ni; i++) { LFVolume* v = volume_i[i]; int nm = v->nm; complex double* c_M1 = c_M + v->M; const double* A_gm = v->A_gm; double complex vphase = phase_i[i] * dv; for (int gm = 0, G = Ga; G < Gb; G++){ double complex avphase = a_G[G] * vphase; for (int m = 0; m < nm; m++, gm++){ c_M1[m] += avphase * A_gm[gm]; } } } } GRID_LOOP_STOP(lfc, q, thread_id); } } } Py_RETURN_NONE; } PyObject* construct_density(LFCObject *lfc, PyObject *args) { PyArrayObject* rho_MM_obj; PyArrayObject* nt_G_obj; int k; int Mstart, Mstop; if (!PyArg_ParseTuple(args, "OOiii", &rho_MM_obj, &nt_G_obj, &k, &Mstart, &Mstop)) return NULL; double* nt_G = (double*)PyArray_DATA(nt_G_obj); int nM = PyArray_DIMS(rho_MM_obj)[1]; double* work_gm = lfc->work_gm; if (!lfc->bloch_boundary_conditions) { if (!(PyArray_DESCR(rho_MM_obj)->kind == 'f' && PyArray_DESCR(rho_MM_obj)->elsize == 8)) { PyErr_SetString(PyExc_ValueError ,"Expected float64 dtype for rho_MM array."); return NULL; } const double* rho_MM = (const double*)PyArray_DATA(rho_MM_obj); GRID_LOOP_START(lfc, -1, 0) { for (int i1 = 0; i1 < ni; i1++) { LFVolume* v1 = volume_i[i1]; int M1 = v1->M; int nm1 = v1->nm; int M1p = MAX(M1, Mstart); int nm1p = MIN(M1 + nm1, Mstop) - M1p; if (nm1p <= 0) continue; memset(work_gm, 0, nG * nm1 * sizeof(double)); double factor = 1.0; int m1end = MIN(nm1, Mstop - M1); int m1start = MAX(0, Mstart - M1); for (int i2 = i1; i2 < ni; i2++) { LFVolume* v2 = volume_i[i2]; int M2 = v2->M; int nm2 = v2->nm; const double* rho_mm = rho_MM + (M1p - Mstart) * nM + M2; //assert(M1 - Mstart + m1start >= 0); for (int g = 0; g < nG; g++) { for (int m1 = m1start, m1p = 0; m1 < m1end; m1++, m1p++) { for (int m2 = 0; m2 < nm2; m2++) { work_gm[g * nm1 + m1] += (v2->A_gm[g * nm2 + m2] * rho_mm[m1p * nM + m2] * factor); } } } factor = 2.0; } int gm1 = 0; for (int G = Ga; G < Gb; G++) { double nt = 0.0; for (int m1 = 0; m1 < nm1; m1++, gm1++) { nt += v1->A_gm[gm1] * work_gm[gm1]; } nt_G[G] += nt; } } } GRID_LOOP_STOP(lfc, -1, 0); } else { if (!(PyArray_DESCR(rho_MM_obj)->kind == 'c' && PyArray_DESCR(rho_MM_obj)->elsize == 16)) { PyErr_SetString(PyExc_ValueError, "Expected complex128 dtype for rho_MM array."); return NULL; } const double complex* rho_MM = (const double complex*)PyArray_DATA(rho_MM_obj); GRID_LOOP_START(lfc, k, 0) { for (int i1 = 0; i1 < ni; i1++) { LFVolume* v1 = volume_i[i1]; int M1 = v1->M; int nm1 = v1->nm; int M1p = MAX(M1, Mstart); int nm1p = MIN(M1 + nm1, Mstop) - M1p; if (nm1p <= 0) continue; memset(work_gm, 0, nG * nm1 * sizeof(double)); double complex factor = 1.0; int m1end = MIN(nm1, Mstop - M1); int m1start = MAX(0, Mstart - M1); for (int i2 = i1; i2 < ni; i2++) { if (i2 > i1) factor = 2.0 * phase_i[i1] * conj(phase_i[i2]); double rfactor = creal(factor); double ifactor = cimag(factor); LFVolume* v2 = volume_i[i2]; const double* A2_gm = v2->A_gm; int M2 = v2->M; int nm2 = v2->nm; const double complex* rho_mm = rho_MM + (M1p - Mstart) * nM + M2; double rrho, irho, rwork, iwork; complex double rho; for (int g = 0; g < nG; g++) { int gnm1 = g * nm1; int gnm2 = g * nm2; int m1pnM = 0; for (int m1 = m1start, m1p=0; m1 < m1end; m1++, m1p++) { m1pnM = m1p * nM; iwork = 0; rwork = 0; for (int m2 = 0; m2 < nm2; m2++) { rho = rho_mm[m1pnM + m2]; rrho = creal(rho); irho = cimag(rho); rwork += A2_gm[gnm2 + m2] * rrho; iwork += A2_gm[gnm2 + m2] * irho; // We could save one of those multiplications if the buffer // were twice as large //work += A2_gm[gnm2 + m2] * (rfactor * rrho - ifactor * irho); } //work_gm[m1 + gnm1] += work; work_gm[m1 + gnm1] += rwork * rfactor - iwork * ifactor; } } } int gm1 = 0; const double* A1_gm = v1->A_gm; for (int G = Ga; G < Gb; G++) { double nt = 0.0; for (int m1 = 0; m1 < nm1; m1++, gm1++) { nt += A1_gm[gm1] * work_gm[gm1]; } nt_G[G] += nt; } } } GRID_LOOP_STOP(lfc, k, 0); } Py_RETURN_NONE; } PyObject* construct_density1(LFCObject *lfc, PyObject *args) { PyArrayObject* f_M_obj; PyArrayObject* nt_G_obj; if (!PyArg_ParseTuple(args, "OO", &f_M_obj, &nt_G_obj)) return NULL; const double* f_M = (const double*)PyArray_DATA(f_M_obj); double* nt_G = (double*)PyArray_DATA(nt_G_obj); GRID_LOOP_START(lfc, -1, 0) { for (int i = 0; i < ni; i++) { LFVolume* v = volume_i[i]; for (int gm = 0, G = Ga; G < Gb; G++) { for (int m = 0; m < v->nm; m++, gm++) { nt_G[G] += v->A_gm[gm] * v->A_gm[gm] * f_M[v->M + m]; } } } } GRID_LOOP_STOP(lfc, -1, 0); Py_RETURN_NONE; } PyObject* lcao_to_grid(LFCObject *lfc, PyObject *args) { PyArrayObject* c_M_obj; PyArrayObject* psit_G_obj; int k; if (!PyArg_ParseTuple(args, "OOi", &c_M_obj, &psit_G_obj, &k)) return NULL; if (!lfc->bloch_boundary_conditions) { if (PyArray_DESCR(c_M_obj)->type_num == NPY_DOUBLE) { const double* c_M = (const double*)PyArray_DATA(c_M_obj); double* psit_G = (double*)PyArray_DATA(psit_G_obj); GRID_LOOP_START(lfc, -1, 0) { for (int i = 0; i < ni; i++) { LFVolume* v = volume_i[i]; for (int gm = 0, G = Ga; G < Gb; G++) { for (int m = 0; m < v->nm; m++, gm++) { psit_G[G] += v->A_gm[gm] * c_M[v->M + m]; } } } } GRID_LOOP_STOP(lfc, -1, 0); } else { const double complex* c_M = (const double complex*)PyArray_DATA(c_M_obj); double complex* psit_G = (double complex*)PyArray_DATA(psit_G_obj); GRID_LOOP_START(lfc, -1, 0) { for (int i = 0; i < ni; i++) { LFVolume* v = volume_i[i]; for (int gm = 0, G = Ga; G < Gb; G++) { for (int m = 0; m < v->nm; m++, gm++) { psit_G[G] += v->A_gm[gm] * c_M[v->M + m]; } } } } GRID_LOOP_STOP(lfc, -1, 0); } } else { const double complex* c_M = (const double complex*)PyArray_DATA(c_M_obj); double complex* psit_G = (double complex*)PyArray_DATA(psit_G_obj); GRID_LOOP_START(lfc, k, 0) { for (int i = 0; i < ni; i++) { LFVolume* v = volume_i[i]; double complex conjphase = conj(phase_i[i]); const double* A_gm = v->A_gm; const double complex* c_M1 = c_M + v->M; for (int gm = 0, G = Ga; G < Gb; G++) { double complex psit = 0.0; for (int m = 0; m < v->nm; m++, gm++) { psit += A_gm[gm] * c_M1[m]; } psit_G[G] += psit * conjphase; } } } GRID_LOOP_STOP(lfc, k, 0); } Py_RETURN_NONE; } // Faster implementation of lcao_to_grid() function specialized // for k-points PyObject* lcao_to_grid_k(LFCObject *lfc, PyObject *args) { PyArrayObject* c_xM_obj; PyArrayObject* psit_xG_obj; int k; int Mblock; if (!PyArg_ParseTuple(args, "OOii", &c_xM_obj, &psit_xG_obj, &k, &Mblock)) return NULL; const double complex* c_xM = (const double complex*)PyArray_DATA(c_xM_obj); double complex* psit_xG = (double complex*)PyArray_DATA(psit_xG_obj); int nd = PyArray_NDIM(psit_xG_obj); npy_intp* dims = PyArray_DIMS(psit_xG_obj); int nx = PyArray_MultiplyList(dims, nd - 3); int Gmax = PyArray_MultiplyList(dims + nd - 3, 3); int Mmax = PyArray_DIMS(c_xM_obj)[PyArray_NDIM(c_xM_obj) - 1]; double complex* tmp_GM = 0; for (int Mstart = 0; Mstart < Mmax; Mstart += Mblock) { int Mstop = Mstart + Mblock; if (Mstop > Mmax) { Mstop = Mmax; Mblock = Mstop - Mstart; } if (tmp_GM == 0) tmp_GM = GPAW_MALLOC(double complex, Mblock * Gmax); for (int GM = 0; GM < Gmax * Mblock; GM++) tmp_GM[GM] = 0.0; GRID_LOOP_START(lfc, k, 0) { for (int i = 0; i < ni; i++) { LFVolume* v = volume_i[i]; int M1 = v->M; if (M1 >= Mstop) continue; int nm = v->nm; int M2 = M1 + nm; if (M2 <= Mstart) continue; int M1p = MAX(M1, Mstart); int M2p = MIN(M2, Mstop); if (M1p == M2p) continue; double complex phase = phase_i[i]; const double* A_gm = v->A_gm; for (int G = Ga; G < Gb; G++) for (int M = M1p; M < M2p; M++) tmp_GM[G * Mblock + M - Mstart] += \ A_gm[(G - Ga) * nm + M - M1] * phase; } } GRID_LOOP_STOP(lfc, k, 0); double complex one = 1.0; myzgemm("C", "N", &Gmax, &nx, &Mblock, &one, tmp_GM, &Mblock, c_xM + Mstart, &Mmax, &one, psit_xG, &Gmax); } free(tmp_GM); Py_RETURN_NONE; } PyObject* add(LFCObject *lfc, PyObject *args) { PyArrayObject* c_xM_obj; PyArrayObject* a_xG_obj; int q; if (!PyArg_ParseTuple(args, "OOi", &c_xM_obj, &a_xG_obj, &q)) return NULL; int nd = PyArray_NDIM(a_xG_obj); npy_intp* dims = PyArray_DIMS(a_xG_obj); int nx = PyArray_MultiplyList(dims, nd - 3); int nG = PyArray_MultiplyList(dims + nd - 3, 3); int nM = PyArray_DIMS(c_xM_obj)[PyArray_NDIM(c_xM_obj) - 1]; #pragma omp parallel { int thread_id; #ifdef _OPENMP thread_id = omp_get_thread_num(); #else thread_id = 0; #endif if (!lfc->bloch_boundary_conditions) { #pragma omp for schedule(static) for (int x = 0; x < nx; x++) { const double* c_M = (const double*)PyArray_DATA(c_xM_obj) + x * nM; double* a_G = (double*)PyArray_DATA(a_xG_obj) + x * nG; GRID_LOOP_START(lfc, -1, thread_id) { for (int i = 0; i < ni; i++) { LFVolume* v = volume_i[i]; for (int gm = 0, G = Ga; G < Gb; G++) { for (int m = 0; m < v->nm; m++, gm++) { a_G[G] += v->A_gm[gm] * c_M[v->M + m]; } } } } GRID_LOOP_STOP(lfc, -1, thread_id); } } else { #pragma omp for schedule(static) for (int x = 0; x < nx; x++) { const double complex* c_M = (const double complex*)PyArray_DATA(c_xM_obj) + x * nM; double complex* a_G = (double complex*)PyArray_DATA(a_xG_obj) + x * nG; GRID_LOOP_START(lfc, q, thread_id) { for (int i = 0; i < ni; i++) { double complex conjphase = conj(phase_i[i]); LFVolume* v = volume_i[i]; const double complex* c_M1 = c_M + v->M; const double* A_gm = v->A_gm; for (int gm = 0, G = Ga; G < Gb; G++) { double complex a = 0.0; for (int m = 0; m < v->nm; m++, gm++) { a += A_gm[gm] * c_M1[m]; } a_G[G] += a * conjphase; } } } GRID_LOOP_STOP(lfc, q, thread_id); } } } Py_RETURN_NONE; } PyObject* spline_to_grid(PyObject *self, PyObject *args) { SplineObject* spline_obj; PyArrayObject* beg_c_obj; PyArrayObject* end_c_obj; PyArrayObject* pos_v_obj; PyArrayObject* h_cv_obj; PyArrayObject* n_c_obj; PyArrayObject* gdcorner_c_obj; if (!PyArg_ParseTuple(args, "OOOOOOO", &spline_obj, &beg_c_obj, &end_c_obj, &pos_v_obj, &h_cv_obj, &n_c_obj, &gdcorner_c_obj)) return NULL; const bmgsspline* spline = (const bmgsspline*)(&(spline_obj->spline)); long* beg_c = LONGP(beg_c_obj); long* end_c = LONGP(end_c_obj); double* pos_v = DOUBLEP(pos_v_obj); double* h_cv = DOUBLEP(h_cv_obj); long* n_c = LONGP(n_c_obj); long* gdcorner_c = LONGP(gdcorner_c_obj); int l = spline_obj->spline.l; int nm = 2 * l + 1; double rcut = spline->dr * spline->nbins; int ngmax = ((end_c[0] - beg_c[0]) * (end_c[1] - beg_c[1]) * (end_c[2] - beg_c[2])); double* A_gm = GPAW_MALLOC(double, ngmax * nm); int nBmax = ((end_c[0] - beg_c[0]) * (end_c[1] - beg_c[1])); int* G_B = GPAW_MALLOC(int, 2 * nBmax); int nB = 0; int ngm = 0; int G = -gdcorner_c[2] + n_c[2] * (beg_c[1] - gdcorner_c[1] + n_c[1] * (beg_c[0] - gdcorner_c[0])); for (int g0 = beg_c[0]; g0 < end_c[0]; g0++) { for (int g1 = beg_c[1]; g1 < end_c[1]; g1++) { int g2_beg = -1; // function boundary coordinates int g2_end = -1; for (int g2 = beg_c[2]; g2 < end_c[2]; g2++) { double x = h_cv[0] * g0 + h_cv[3] * g1 + h_cv[6] * g2 - pos_v[0]; double y = h_cv[1] * g0 + h_cv[4] * g1 + h_cv[7] * g2 - pos_v[1]; double z = h_cv[2] * g0 + h_cv[5] * g1 + h_cv[8] * g2 - pos_v[2]; double r2 = x * x + y * y + z * z; double r = sqrt(r2); if (r < rcut) { if (g2_beg < 0) g2_beg = g2; // found boundary g2_end = g2; double A = bmgs_splinevalue(spline, r); double* p = A_gm + ngm; spherical_harmonics(l, A, x, y, z, r2, p); ngm += nm; } } if (g2_end >= 0) { g2_end++; G_B[nB++] = G + g2_beg; G_B[nB++] = G + g2_end; } G += n_c[2]; } G += n_c[2] * (n_c[1] - end_c[1] + beg_c[1]); } npy_intp gm_dims[2] = {ngm / (2 * l + 1), 2 * l + 1}; PyArrayObject* A_gm_obj = (PyArrayObject*)PyArray_SimpleNew(2, gm_dims, NPY_DOUBLE); memcpy(PyArray_DATA(A_gm_obj), A_gm, ngm * sizeof(double)); free(A_gm); npy_intp B_dims[1] = {nB}; PyArrayObject* G_B_obj = (PyArrayObject*)PyArray_SimpleNew(1, B_dims, NPY_INT); memcpy(PyArray_DATA(G_B_obj), G_B, nB * sizeof(int)); free(G_B); // PyObjects created in the C code will be initialized with a refcount // of 1, for which reason we'll have to decref them when done here PyObject* values = Py_BuildValue("(OO)", A_gm_obj, G_B_obj); Py_DECREF(A_gm_obj); Py_DECREF(G_B_obj); return values; } // Horrible copy-paste of calculate_potential_matrix // Surely it must be possible to find a way to actually reuse code // Maybe some kind of preprocessor thing PyObject* calculate_potential_matrix_derivative(LFCObject *lfc, PyObject *args) { PyArrayObject* vt_G_obj; PyArrayObject* DVt_MM_obj; PyArrayObject* h_cv_obj; PyArrayObject* n_c_obj; int k, c; PyArrayObject* spline_obj_M_obj; PyArrayObject* beg_c_obj; PyArrayObject* pos_Wc_obj; int Mstart, Mstop; if (!PyArg_ParseTuple(args, "OOOOiiOOOii", &vt_G_obj, &DVt_MM_obj, &h_cv_obj, &n_c_obj, &k, &c, &spline_obj_M_obj, &beg_c_obj, &pos_Wc_obj, &Mstart, &Mstop)) return NULL; const double* vt_G = (const double*)PyArray_DATA(vt_G_obj); const double* h_cv = (const double*)PyArray_DATA(h_cv_obj); const long* n_c = (const long*)PyArray_DATA(n_c_obj); const SplineObject** spline_obj_M = \ (const SplineObject**)PyArray_DATA(spline_obj_M_obj); const double (*pos_Wc)[3] = (const double (*)[3])PyArray_DATA(pos_Wc_obj); long* beg_c = LONGP(beg_c_obj); int nM = PyArray_DIMS(DVt_MM_obj)[1]; double* work_gm = lfc->work_gm; double dv = lfc->dv; if (!lfc->bloch_boundary_conditions) { double* DVt_MM = (double*)PyArray_DATA(DVt_MM_obj); { GRID_LOOP_START(lfc, -1, 0) { // In one grid loop iteration, only z changes. int iza = Ga % n_c[2] + beg_c[2]; int iy = (Ga / n_c[2]) % n_c[1] + beg_c[1]; int ix = Ga / (n_c[2] * n_c[1]) + beg_c[0]; int iz = iza; //assert(Ga == ((ix - beg_c[0]) * n_c[1] + (iy - beg_c[1])) // * n_c[2] + iza - beg_c[2]); for (int i1 = 0; i1 < ni; i1++) { iz = iza; LFVolume* v1 = volume_i[i1]; int M1 = v1->M; const SplineObject* spline_obj = spline_obj_M[M1]; const bmgsspline* spline = \ (const bmgsspline*)(&(spline_obj->spline)); int nm1 = v1->nm; int M1p = MAX(M1, Mstart); int nm1p = MIN(M1 + nm1, Mstop) - M1p; if (nm1p <= 0) continue; double fdYdc_m[nm1]; double rlYdfdr_m[nm1]; double f, dfdr; int l = (nm1 - 1) / 2; const double* pos_c = pos_Wc[v1->W]; //assert(2 * l + 1 == nm1); //assert(spline_obj->spline.l == l); int gm1 = 0; for (int G = Ga; G < Gb; G++, iz++) { double x = h_cv[0] * ix + h_cv[3] * iy + h_cv[6] * iz - pos_c[0]; double y = h_cv[1] * ix + h_cv[4] * iy + h_cv[7] * iz - pos_c[1]; double z = h_cv[2] * ix + h_cv[5] * iy + h_cv[8] * iz - pos_c[2]; double vtdv = vt_G[G] * dv; double R_c[] = {x, y, z}; double r2 = x * x + y * y + z * z; double r = sqrt(r2); double Rcinvr = r > 1e-15 ? R_c[c] / r : 0.0; //assert(G == ((ix - beg_c[0]) * n_c[1] + // (iy - beg_c[1])) * n_c[2] + iz - beg_c[2]); bmgs_get_value_and_derivative(spline, r, &f, &dfdr); //assert (r <= spline->dr * spline->nbins); // important switch(c) { case 0: spherical_harmonics_derivative_x(l, f, x, y, z, r2, fdYdc_m); break; case 1: spherical_harmonics_derivative_y(l, f, x, y, z, r2, fdYdc_m); break; case 2: spherical_harmonics_derivative_z(l, f, x, y, z, r2, fdYdc_m); break; } spherical_harmonics(l, dfdr * Rcinvr, x, y, z, r2, rlYdfdr_m); int m1start = M1 < Mstart ? nm1 - nm1p : 0; for (int m1 = 0; m1 < nm1p; m1++, gm1++) { work_gm[gm1] = vtdv * (fdYdc_m[m1 + m1start] + rlYdfdr_m[m1 + m1start]); } } // end loop over G for (int i2 = 0; i2 < ni; i2++) { LFVolume* v2 = volume_i[i2]; int M2 = v2->M; const double* A2_start_gm = v2->A_gm; const double* A2_gm; int nm2 = v2->nm; double* DVt_start_mm = DVt_MM + (M1p - Mstart) * nM + M2; double* DVt_mm; double work; for (int g = 0; g < nG; g++) { A2_gm = A2_start_gm + g * nm2; for (int m1 = 0; m1 < nm1p; m1++) { work = work_gm[g * nm1p + m1]; DVt_mm = DVt_start_mm + m1 * nM; for (int m2 = 0; m2 < nm2; m2++) { DVt_mm[m2] += A2_gm[m2] * work; } } } } // i2 loop } // G loop } // i1 loop GRID_LOOP_STOP(lfc, -1, 0); } // c loop } else { complex double* DVt_MM = (complex double*)PyArray_DATA(DVt_MM_obj); { GRID_LOOP_START(lfc, k, 0) { // In one grid loop iteration, only z changes. int iza = Ga % n_c[2] + beg_c[2]; int iy = (Ga / n_c[2]) % n_c[1] + beg_c[1]; int ix = Ga / (n_c[2] * n_c[1]) + beg_c[0]; int iz = iza; for (int i1 = 0; i1 < ni; i1++) { iz = iza; LFVolume* v1 = volume_i[i1]; int M1 = v1->M; const SplineObject* spline_obj = spline_obj_M[M1]; const bmgsspline* spline = \ (const bmgsspline*)(&(spline_obj->spline)); int nm1 = v1->nm; int M1p = MAX(M1, Mstart); int nm1p = MIN(M1 + nm1, Mstop) - M1p; if (nm1p <= 0) continue; double fdYdc_m[nm1]; double rlYdfdr_m[nm1]; double f, dfdr; int l = (nm1 - 1) / 2; //assert(2 * l + 1 == nm1); //assert(spline_obj->spline.l == l); const double* pos_c = pos_Wc[v1->W]; int gm1 = 0; for (int G = Ga; G < Gb; G++, iz++) { double x = h_cv[0] * ix + h_cv[3] * iy + h_cv[6] * iz - pos_c[0]; double y = h_cv[1] * ix + h_cv[4] * iy + h_cv[7] * iz - pos_c[1]; double z = h_cv[2] * ix + h_cv[5] * iy + h_cv[8] * iz - pos_c[2]; double vtdv = vt_G[G] * dv; double R_c[] = {x, y, z}; double r2 = x * x + y * y + z * z; double r = sqrt(r2); double Rc_over_r = r > 1e-15 ? R_c[c] / r : 0.0; bmgs_get_value_and_derivative(spline, r, &f, &dfdr); //assert (r <= spline->dr * spline->nbins); switch(c) { case 0: spherical_harmonics_derivative_x(l, f, x, y, z, r2, fdYdc_m); break; case 1: spherical_harmonics_derivative_y(l, f, x, y, z, r2, fdYdc_m); break; case 2: spherical_harmonics_derivative_z(l, f, x, y, z, r2, fdYdc_m); break; } spherical_harmonics(l, dfdr * Rc_over_r, x, y, z, r2, rlYdfdr_m); int m1start = M1 < Mstart ? nm1 - nm1p : 0; for (int m1 = 0; m1 < nm1p; m1++, gm1++) { work_gm[gm1] = vtdv * (fdYdc_m[m1 + m1start] + rlYdfdr_m[m1 + m1start]); } } // end loop over G for (int i2 = 0; i2 < ni; i2++) { LFVolume* v2 = volume_i[i2]; int M2 = v2->M; const double* A2_start_gm = v2->A_gm; const double* A2_gm; double complex* DVt_start_mm = DVt_MM + (M1p - Mstart) * nM + M2; double complex* DVt_mm; double complex work; int nm2 = v2->nm; double complex phase = conj(phase_i[i1]) * phase_i[i2]; for (int g = 0; g < nG; g++) { A2_gm = A2_start_gm + g * nm2; for (int m1 = 0; m1 < nm1p; m1++) { work = work_gm[g * nm1p + m1] * phase; DVt_mm = DVt_start_mm + m1 * nM; for (int m2 = 0; m2 < nm2; m2++) { DVt_mm[m2] += A2_gm[m2] * work; } } } } // i2 loop } // G loop } // i1 loop GRID_LOOP_STOP(lfc, k, 0); } // c loop } Py_RETURN_NONE; } // Horrible copy-paste of calculate_potential_matrix // Surely it must be possible to find a way to actually reuse code // Maybe some kind of preprocessor thing PyObject* calculate_potential_matrix_force_contribution(LFCObject *lfc, PyObject *args) { PyArrayObject* vt_G_obj; PyArrayObject* rho_MM_obj; PyArrayObject* F_M_obj; PyArrayObject* h_cv_obj; PyArrayObject* n_c_obj; int k, c; PyArrayObject* spline_obj_M_obj; PyArrayObject* beg_c_obj; PyArrayObject* pos_Wc_obj; int Mstart, Mstop; if (!PyArg_ParseTuple(args, "OOOOOiiOOOii", &vt_G_obj, &rho_MM_obj, &F_M_obj, &h_cv_obj, &n_c_obj, &k, &c, &spline_obj_M_obj, &beg_c_obj, &pos_Wc_obj, &Mstart, &Mstop)) return NULL; const double* vt_G = (const double*)PyArray_DATA(vt_G_obj); const double* h_cv = (const double*)PyArray_DATA(h_cv_obj); const long* n_c = (const long*)PyArray_DATA(n_c_obj); const SplineObject** spline_obj_M = \ (const SplineObject**)PyArray_DATA(spline_obj_M_obj); const double (*pos_Wc)[3] = (const double (*)[3])PyArray_DATA(pos_Wc_obj); double* F_M = (double*)PyArray_DATA(F_M_obj); long* beg_c = LONGP(beg_c_obj); int nM = PyArray_DIMS(rho_MM_obj)[1]; double* work_gm = lfc->work_gm; double dv = lfc->dv; if (!lfc->bloch_boundary_conditions) { double* rho_MM = (double*)PyArray_DATA(rho_MM_obj); { GRID_LOOP_START(lfc, -1, 0) { // In one grid loop iteration, only z changes. int iza = Ga % n_c[2] + beg_c[2]; int iy = (Ga / n_c[2]) % n_c[1] + beg_c[1]; int ix = Ga / (n_c[2] * n_c[1]) + beg_c[0]; int iz = iza; //assert(Ga == ((ix - beg_c[0]) * n_c[1] + (iy - beg_c[1])) // * n_c[2] + iza - beg_c[2]); for (int i1 = 0; i1 < ni; i1++) { iz = iza; LFVolume* v1 = volume_i[i1]; int M1 = v1->M; const SplineObject* spline_obj = spline_obj_M[M1]; const bmgsspline* spline = \ (const bmgsspline*)(&(spline_obj->spline)); int nm1 = v1->nm; int M1p = MAX(M1, Mstart); int nm1p = MIN(M1 + nm1, Mstop) - M1p; if (nm1p <= 0) continue; int m1start = M1 < Mstart ? nm1 - nm1p : 0; double fdYdc_m[nm1]; double rlYdfdr_m[nm1]; double f, dfdr; int l = (nm1 - 1) / 2; const double* pos_c = pos_Wc[v1->W]; //assert(2 * l + 1 == nm1); //assert(spline_obj->spline.l == l); int gm1 = 0; for (int G = Ga; G < Gb; G++, iz++) { double x = h_cv[0] * ix + h_cv[3] * iy + h_cv[6] * iz - pos_c[0]; double y = h_cv[1] * ix + h_cv[4] * iy + h_cv[7] * iz - pos_c[1]; double z = h_cv[2] * ix + h_cv[5] * iy + h_cv[8] * iz - pos_c[2]; double vtdv = vt_G[G] * dv; double R_c[] = {x, y, z}; double r2 = x * x + y * y + z * z; double r = sqrt(r2); double Rcinvr = r > 1e-15 ? R_c[c] / r : 0.0; //assert(G == ((ix - beg_c[0]) * n_c[1] + // (iy - beg_c[1])) * n_c[2] + iz - beg_c[2]); bmgs_get_value_and_derivative(spline, r, &f, &dfdr); //assert (r <= spline->dr * spline->nbins); // important switch(c) { case 0: spherical_harmonics_derivative_x(l, f, x, y, z, r2, fdYdc_m); break; case 1: spherical_harmonics_derivative_y(l, f, x, y, z, r2, fdYdc_m); break; case 2: spherical_harmonics_derivative_z(l, f, x, y, z, r2, fdYdc_m); break; } spherical_harmonics(l, dfdr * Rcinvr, x, y, z, r2, rlYdfdr_m); for (int m1 = 0; m1 < nm1p; m1++, gm1++) { work_gm[gm1] = vtdv * (fdYdc_m[m1 + m1start] + rlYdfdr_m[m1 + m1start]); } } // end loop over G for (int i2 = 0; i2 < ni; i2++) { LFVolume* v2 = volume_i[i2]; int M2 = v2->M; const double* A2_start_gm = v2->A_gm; const double* A2_gm; int nm2 = v2->nm; double* rho_start_mm = rho_MM + (M1p - Mstart) * nM + M2; double* rho_mm; double work; for (int g = 0; g < nG; g++) { A2_gm = A2_start_gm + g * nm2; for (int m1 = 0; m1 < nm1p; m1++) { rho_mm = rho_start_mm + m1 * nM; work = 0.0; for (int m2 = 0; m2 < nm2; m2++) { work += A2_gm[m2] * rho_mm[m2]; } F_M[M1p - Mstart + m1] += work * work_gm[g * nm1p + m1]; } } } // i2 loop } // G loop } // i1 loop GRID_LOOP_STOP(lfc, -1, 0); } // c loop } else { complex double* rho_MM = (complex double*)PyArray_DATA(rho_MM_obj); { GRID_LOOP_START(lfc, k, 0) { // In one grid loop iteration, only z changes. int iza = Ga % n_c[2] + beg_c[2]; int iy = (Ga / n_c[2]) % n_c[1] + beg_c[1]; int ix = Ga / (n_c[2] * n_c[1]) + beg_c[0]; int iz = iza; for (int i1 = 0; i1 < ni; i1++) { iz = iza; LFVolume* v1 = volume_i[i1]; int M1 = v1->M; const SplineObject* spline_obj = spline_obj_M[M1]; const bmgsspline* spline = \ (const bmgsspline*)(&(spline_obj->spline)); int nm1 = v1->nm; int M1p = MAX(M1, Mstart); int nm1p = MIN(M1 + nm1, Mstop) - M1p; if (nm1p <= 0) continue; int m1start = M1 < Mstart ? nm1 - nm1p : 0; double fdYdc_m[nm1]; double rlYdfdr_m[nm1]; double f, dfdr; int l = (nm1 - 1) / 2; //assert(2 * l + 1 == nm1); //assert(spline_obj->spline.l == l); const double* pos_c = pos_Wc[v1->W]; int gm1 = 0; for (int G = Ga; G < Gb; G++, iz++) { double x = h_cv[0] * ix + h_cv[3] * iy + h_cv[6] * iz - pos_c[0]; double y = h_cv[1] * ix + h_cv[4] * iy + h_cv[7] * iz - pos_c[1]; double z = h_cv[2] * ix + h_cv[5] * iy + h_cv[8] * iz - pos_c[2]; double vtdv = vt_G[G] * dv; double R_c[] = {x, y, z}; double r2 = x * x + y * y + z * z; double r = sqrt(r2); double Rc_over_r = r > 1e-15 ? R_c[c] / r : 0.0; bmgs_get_value_and_derivative(spline, r, &f, &dfdr); //assert (r <= spline->dr * spline->nbins); switch(c) { case 0: spherical_harmonics_derivative_x(l, f, x, y, z, r2, fdYdc_m); break; case 1: spherical_harmonics_derivative_y(l, f, x, y, z, r2, fdYdc_m); break; case 2: spherical_harmonics_derivative_z(l, f, x, y, z, r2, fdYdc_m); break; } spherical_harmonics(l, dfdr * Rc_over_r, x, y, z, r2, rlYdfdr_m); for (int m1 = 0; m1 < nm1p; m1++, gm1++) { work_gm[gm1] = vtdv * (fdYdc_m[m1 + m1start] + rlYdfdr_m[m1 + m1start]); } } // end loop over G for (int i2 = 0; i2 < ni; i2++) { LFVolume* v2 = volume_i[i2]; int M2 = v2->M; const double* A2_start_gm = v2->A_gm; const double* A2_gm; int nm2 = v2->nm; double complex* rho_start_mm = rho_MM + (M1p - Mstart) * nM + M2; double complex* rho_mm; double complex phase = conj(phase_i[i1]) * phase_i[i2]; double complex work; for (int g = 0; g < nG; g++) { A2_gm = A2_start_gm + g * nm2; for (int m1 = 0; m1 < nm1p; m1++) { rho_mm = rho_start_mm + m1 * nM; work = 0.0; for (int m2 = 0; m2 < nm2; m2++) { work += A2_gm[m2] * rho_mm[m2]; } F_M[M1p - Mstart + m1] += creal(work * work_gm[g * nm1p + m1] * phase); } } } // i2 loop } // G loop } // i1 loop GRID_LOOP_STOP(lfc, k, 0); } // c loop } Py_RETURN_NONE; } PyObject* derivative(LFCObject *lfc, PyObject *args) { PyArrayObject* a_xG_obj; PyArrayObject* c_xMv_obj; PyArrayObject* h_cv_obj; PyArrayObject* n_c_obj; PyObject* spline_M_obj; PyArrayObject* beg_c_obj; PyArrayObject* pos_Wc_obj; int q; if (!PyArg_ParseTuple(args, "OOOOOOOi", &a_xG_obj, &c_xMv_obj, &h_cv_obj, &n_c_obj, &spline_M_obj, &beg_c_obj, &pos_Wc_obj, &q)) return NULL; int nd = PyArray_NDIM(a_xG_obj); npy_intp* dims = PyArray_DIMS(a_xG_obj); int nx = PyArray_MultiplyList(dims, nd - 3); int nG = PyArray_MultiplyList(dims + nd - 3, 3); int nM = PyArray_DIMS(c_xMv_obj)[PyArray_NDIM(c_xMv_obj) - 2]; const double* h_cv = (const double*)PyArray_DATA(h_cv_obj); const long* n_c = (const long*)PyArray_DATA(n_c_obj); const double (*pos_Wc)[3] = (const double (*)[3])PyArray_DATA(pos_Wc_obj); long* beg_c = LONGP(beg_c_obj); if (!lfc->bloch_boundary_conditions) { const double* a_G = (const double*)PyArray_DATA(a_xG_obj); double* c_Mv = (double*)PyArray_DATA(c_xMv_obj); for (int x = 0; x < nx; x++) { GRID_LOOP_START(lfc, -1, 0) { // In one grid loop iteration, only i2 changes. int i2 = Ga % n_c[2] + beg_c[2]; int i1 = (Ga / n_c[2]) % n_c[1] + beg_c[1]; int i0 = Ga / (n_c[2] * n_c[1]) + beg_c[0]; double xG = h_cv[0] * i0 + h_cv[3] * i1 + h_cv[6] * i2; double yG = h_cv[1] * i0 + h_cv[4] * i1 + h_cv[7] * i2; double zG = h_cv[2] * i0 + h_cv[5] * i1 + h_cv[8] * i2; for (int G = Ga; G < Gb; G++) { for (int i = 0; i < ni; i++) { LFVolume* vol = volume_i[i]; int M = vol->M; double* c_mv = c_Mv + 3 * M; const bmgsspline* spline = (const bmgsspline*) \ &((const SplineObject*)PyList_GetItem(spline_M_obj, M))->spline; int nm = vol->nm; int l = (nm - 1) / 2; double x = xG - pos_Wc[vol->W][0]; double y = yG - pos_Wc[vol->W][1]; double z = zG - pos_Wc[vol->W][2]; double R_c[] = {x, y, z}; double r2 = x * x + y * y + z * z; double r = sqrt(r2); double af; double dfdr; bmgs_get_value_and_derivative(spline, r, &af, &dfdr); af *= a_G[G] * lfc->dv; double afdrlYdx_m[nm]; // a * f * d(r^l * Y)/dx spherical_harmonics_derivative_x(l, af, x, y, z, r2, afdrlYdx_m); for (int m = 0; m < nm; m++) c_mv[3 * m] += afdrlYdx_m[m]; spherical_harmonics_derivative_y(l, af, x, y, z, r2, afdrlYdx_m); for (int m = 0; m < nm; m++) c_mv[3 * m + 1] += afdrlYdx_m[m]; spherical_harmonics_derivative_z(l, af, x, y, z, r2, afdrlYdx_m); for (int m = 0; m < nm; m++) c_mv[3 * m + 2] += afdrlYdx_m[m]; if (r > 1e-15) { double arlm1Ydfdr_m[nm]; // a * r^(l-1) * Y * df/dr double arm1dfdr = a_G[G] / r * dfdr * lfc->dv; spherical_harmonics(l, arm1dfdr, x, y, z, r2, arlm1Ydfdr_m); for (int m = 0; m < nm; m++) for (int v = 0; v < 3; v++) c_mv[m * 3 + v] += arlm1Ydfdr_m[m] * R_c[v]; } } xG += h_cv[6]; yG += h_cv[7]; zG += h_cv[8]; } } GRID_LOOP_STOP(lfc, -1, 0); c_Mv += 3 * nM; a_G += nG; } } else { const complex double* a_G = (const complex double*)PyArray_DATA(a_xG_obj); complex double* c_Mv = (complex double*)PyArray_DATA(c_xMv_obj); for (int x = 0; x < nx; x++) { GRID_LOOP_START(lfc, q, 0) { // In one grid loop iteration, only i2 changes. int i2 = Ga % n_c[2] + beg_c[2]; int i1 = (Ga / n_c[2]) % n_c[1] + beg_c[1]; int i0 = Ga / (n_c[2] * n_c[1]) + beg_c[0]; double xG = h_cv[0] * i0 + h_cv[3] * i1 + h_cv[6] * i2; double yG = h_cv[1] * i0 + h_cv[4] * i1 + h_cv[7] * i2; double zG = h_cv[2] * i0 + h_cv[5] * i1 + h_cv[8] * i2; for (int G = Ga; G < Gb; G++) { for (int i = 0; i < ni; i++) { LFVolume* vol = volume_i[i]; int M = vol->M; complex double* c_mv = c_Mv + 3 * M; const bmgsspline* spline = (const bmgsspline*) \ &((const SplineObject*)PyList_GetItem(spline_M_obj, M))->spline; int nm = vol->nm; int l = (nm - 1) / 2; double x = xG - pos_Wc[vol->W][0]; double y = yG - pos_Wc[vol->W][1]; double z = zG - pos_Wc[vol->W][2]; double R_c[] = {x, y, z}; double r2 = x * x + y * y + z * z; double r = sqrt(r2); double f; double dfdr; bmgs_get_value_and_derivative(spline, r, &f, &dfdr); double fdrlYdx_m[nm]; // a * f * d(r^l * Y)/dx complex double ap = a_G[G] * phase_i[i] * lfc->dv; spherical_harmonics_derivative_x(l, f, x, y, z, r2, fdrlYdx_m); for (int m = 0; m < nm; m++) c_mv[3 * m ] += ap * fdrlYdx_m[m]; spherical_harmonics_derivative_y(l, f, x, y, z, r2, fdrlYdx_m); for (int m = 0; m < nm; m++) c_mv[3 * m + 1] += ap * fdrlYdx_m[m]; spherical_harmonics_derivative_z(l, f, x, y, z, r2, fdrlYdx_m); for (int m = 0; m < nm; m++) c_mv[3 * m + 2] += ap * fdrlYdx_m[m]; if (r > 1e-15) { double rlm1Ydfdr_m[nm]; // r^(l-1) * Y * df/dr double rm1dfdr = dfdr / r; spherical_harmonics(l, rm1dfdr, x, y, z, r2, rlm1Ydfdr_m); for (int m = 0; m < nm; m++) for (int v = 0; v < 3; v++) c_mv[m * 3 + v] += ap * rlm1Ydfdr_m[m] * R_c[v]; } } xG += h_cv[6]; yG += h_cv[7]; zG += h_cv[8]; } } GRID_LOOP_STOP(lfc, q, 0); c_Mv += 3 * nM; a_G += nG; } } Py_RETURN_NONE; } PyObject* normalized_derivative(LFCObject *lfc, PyObject *args) { PyArrayObject* a_G_obj; PyArrayObject* c_Mv_obj; PyArrayObject* h_cv_obj; PyArrayObject* n_c_obj; PyObject* spline_M_obj; PyArrayObject* beg_c_obj; PyArrayObject* pos_Wc_obj; if (!PyArg_ParseTuple(args, "OOOOOOO", &a_G_obj, &c_Mv_obj, &h_cv_obj, &n_c_obj, &spline_M_obj, &beg_c_obj, &pos_Wc_obj)) return NULL; const double* h_cv = (const double*)PyArray_DATA(h_cv_obj); const long* n_c = (const long*)PyArray_DATA(n_c_obj); const double (*pos_Wc)[3] = (const double (*)[3])PyArray_DATA(pos_Wc_obj); long* beg_c = LONGP(beg_c_obj); const double* a_G = (const double*)PyArray_DATA(a_G_obj); double* c_Mv = (double*)PyArray_DATA(c_Mv_obj); GRID_LOOP_START(lfc, -1, 0) { int i2 = Ga % n_c[2] + beg_c[2]; int i1 = (Ga / n_c[2]) % n_c[1] + beg_c[1]; int i0 = Ga / (n_c[2] * n_c[1]) + beg_c[0]; double xG = h_cv[0] * i0 + h_cv[3] * i1 + h_cv[6] * i2; double yG = h_cv[1] * i0 + h_cv[4] * i1 + h_cv[7] * i2; double zG = h_cv[2] * i0 + h_cv[5] * i1 + h_cv[8] * i2; for (int G = Ga; G < Gb; G++) { for (int i = 0; i < ni; i++) { LFVolume* vol = volume_i[i]; int M = vol->M; double* c_mv = c_Mv + 7 * M; const bmgsspline* spline = (const bmgsspline*) \ &((const SplineObject*)PyList_GetItem(spline_M_obj, M))->spline; int nm = vol->nm; int l = (nm - 1) / 2; double x = xG - pos_Wc[vol->W][0]; double y = yG - pos_Wc[vol->W][1]; double z = zG - pos_Wc[vol->W][2]; double R_c[] = {x, y, z}; double r2 = x * x + y * y + z * z; double r = sqrt(r2); double f; double dfdr; bmgs_get_value_and_derivative(spline, r, &f, &dfdr); f *= lfc->dv; double a = a_G[G]; if (l == 0) c_mv[6] += 0.28209479177387814 * a * f; double fdrlYdx_m[nm]; // f * d(r^l * Y)/dx spherical_harmonics_derivative_x(l, f, x, y, z, r2, fdrlYdx_m); for (int m = 0; m < nm; m++) { c_mv[7 * m ] += a * fdrlYdx_m[m]; c_mv[7 * m + 3] += fdrlYdx_m[m]; } spherical_harmonics_derivative_y(l, f, x, y, z, r2, fdrlYdx_m); for (int m = 0; m < nm; m++) { c_mv[7 * m + 1] += a * fdrlYdx_m[m]; c_mv[7 * m + 4] += fdrlYdx_m[m]; } spherical_harmonics_derivative_z(l, f, x, y, z, r2, fdrlYdx_m); for (int m = 0; m < nm; m++) { c_mv[7 * m + 2] += a * fdrlYdx_m[m]; c_mv[7 * m + 5] += fdrlYdx_m[m]; } if (r > 1e-15) { double rlm1Ydfdr_m[nm]; // r^(l-1) * Y * df/dr double rm1dfdr = dfdr * lfc->dv / r; spherical_harmonics(l, rm1dfdr, x, y, z, r2, rlm1Ydfdr_m); for (int m = 0; m < nm; m++) for (int v = 0; v < 3; v++) { c_mv[m * 7 + v] += a * rlm1Ydfdr_m[m] * R_c[v]; c_mv[m * 7 + v + 3] += rlm1Ydfdr_m[m] * R_c[v]; } } } xG += h_cv[6]; yG += h_cv[7]; zG += h_cv[8]; } } GRID_LOOP_STOP(lfc, -1, 0); Py_RETURN_NONE; } PyObject* ae_valence_density_correction(LFCObject *lfc, PyObject *args) { PyArrayObject* rho_MM_obj; PyArrayObject* n_G_obj; PyArrayObject* a_W_obj; PyArrayObject* I_a_obj; PyArrayObject* x_W_obj; if (!PyArg_ParseTuple(args, "OOOOO", &rho_MM_obj, &n_G_obj, &a_W_obj, &I_a_obj, &x_W_obj)) return NULL; double* n_G = (double*)PyArray_DATA(n_G_obj); int* a_W = (int*)PyArray_DATA(a_W_obj); double* I_a = (double*)PyArray_DATA(I_a_obj); const double* rho_MM = (const double*)PyArray_DATA(rho_MM_obj); int* x_W = (int*)PyArray_DATA(x_W_obj); int nM = PyArray_DIMS(rho_MM_obj)[0]; GRID_LOOP_START(lfc, -1, 0) { for (int i1 = 0; i1 < ni; i1++) { LFVolume* v1 = volume_i[i1]; int x1 = x_W[v1->W]; int a1 = a_W[v1->W]; int M1 = v1->M; int nm1 = v1->nm; double Ia = 0.0; for (int i2 = 0; i2 < ni; i2++) { LFVolume* v2 = volume_i[i2]; int x2 = x_W[v2->W]; if (x1 != x2) continue; int a2 = a_W[v2->W]; if (a1 != a2) continue; int M2 = v2->M; int nm2 = v2->nm; const double* rho_mm = rho_MM + M1 * nM + M2; for (int g = 0; g < nG; g++) { double density = 0.0; for (int m2 = 0; m2 < nm2; m2++) for (int m1 = 0; m1 < nm1; m1++) density += (rho_mm[m2 + m1 * nM] * v1->A_gm[g * nm1 + m1] * v2->A_gm[g * nm2 + m2]); n_G[Ga + g] += density; Ia += density; } } I_a[a1] += Ia * lfc->dv; } } GRID_LOOP_STOP(lfc, -1, 0); Py_RETURN_NONE; } PyObject* ae_core_density_correction(LFCObject *lfc, PyObject *args) { double scale; PyArrayObject* n_G_obj; PyArrayObject* a_W_obj; PyArrayObject* I_a_obj; if (!PyArg_ParseTuple(args, "dOOO", &scale, &n_G_obj, &a_W_obj, &I_a_obj)) return NULL; double* n_G = (double*)PyArray_DATA(n_G_obj); int* a_W = (int*)PyArray_DATA(a_W_obj); double* I_a = (double*)PyArray_DATA(I_a_obj); GRID_LOOP_START(lfc, -1, 0) { for (int i = 0; i < ni; i++) { LFVolume* v = volume_i[i]; double Ia = 0.0; for (int g = 0; g < nG; g++) { double density = scale * v->A_gm[g]; n_G[Ga + g] += density; Ia += density; } I_a[a_W[v->W]] += Ia * lfc->dv; } } GRID_LOOP_STOP(lfc, -1, 0); Py_RETURN_NONE; } gpaw-24.1.0/c/lfc.h000066400000000000000000000127421454550013000136760ustar00rootroot00000000000000/* Copyright (C) 2003-2007 CAMP * Copyright (C) 2007-2009 CAMd * Copyright (C) 2010,2020 CSC - IT Center for Science Ltd. * Please see the accompanying LICENSE file for further information. */ #ifndef LFC_H #define LFC_H #include typedef struct { const double* A_gm; // function values int nm; // number of functions (2*l+1) int M; // global number of first function int W; // volume number } LFVolume; #ifdef GPAW_GPU #include "gpu/gpu-align.h" #include "gpu/gpu-complex.h" typedef struct ALIGN(16) { double *A_gm; int len_A_gm; int nm; // number of functions (2*l+1) int M; // global number of first function int W; // volume number int nB; int *GB1; int *nGBcum; gpuDoubleComplex *phase_k; } LFVolume_gpu; #endif typedef struct { PyObject_HEAD double dv; // volume per grid point int nW; // number of volumes int nB; // number of boundary points int nimax; // maximum number of current volumes double* work_gm; // work space LFVolume* volume_W; // pointers to volumes LFVolume** volume_i; // pointers to volumes at current grid point int* G_B; // boundary grid points int* W_B; // volume numbers int* i_W; // mapping from all volumes to current volumes int* ngm_W; // number of grid points per volume bool bloch_boundary_conditions; // Gamma-point calculation? complex double* phase_kW; // phase factors: exp(ik.R) complex double* phase_i; // phase factors for current volumes #ifdef GPAW_GPU int use_gpu; LFVolume_gpu *volume_W_gpu; LFVolume_gpu *volume_W_gpu_host; int nB_gpu; // number of boundary points int* G_B1_gpu; // boundary grid points int* G_B2_gpu; // boundary grid points int max_len_A_gm; int max_nG; gpuDoubleComplex *phase_i_gpu; int max_k; LFVolume_gpu **volume_i_gpu; int *A_gm_i_gpu; int *ni_gpu; int Mcount; int *volume_WMi_gpu; int *WMi_gpu; int WMimax; #endif } LFCObject; #define GRID_LOOP_START(lfc, k, thread_id) \ { \ const int* G_B = lfc->G_B; \ const int* W_B = lfc->W_B; \ int* i_W = lfc->i_W + thread_id * lfc->nW; \ complex double* phase_i = lfc->phase_i + thread_id * lfc->nimax; \ LFVolume **volume_i = lfc->volume_i + thread_id * lfc->nimax; \ LFVolume *volume_W = lfc->volume_W + thread_id * lfc->nW; \ const double complex* phase_W = lfc->phase_kW + k * lfc->nW; \ int Ga = 0; \ int ni = 0; \ for (int B = 0; B < lfc->nB; B++) \ { \ int Gb = G_B[B]; \ int nG = Gb - Ga; \ if (nG > 0) \ { #define GRID_LOOP_STOP(lfc, k, thread_id) \ for (int i = 0; i < ni; i++) \ volume_i[i]->A_gm += nG * volume_i[i]->nm; \ } \ int Wnew = W_B[B]; \ if (Wnew >= 0) \ { \ /* Entering new sphere. Add the new volume to the head of the list of current volumes. */ \ volume_i[ni] = &volume_W[Wnew]; \ if (k >= 0) \ phase_i[ni] = phase_W[Wnew]; \ i_W[Wnew] = ni; \ ni++; \ } \ else \ { \ /* Leaving sphere. Remove the volume from the list of current volumes. */ \ int Wold = -1 - Wnew; \ int iold = i_W[Wold]; \ ni--; \ volume_i[iold] = volume_i[ni]; \ if (k >= 0) \ phase_i[iold] = phase_i[ni]; \ int Wlast = volume_i[iold]->W; \ i_W[Wlast] = iold; \ } \ Ga = Gb; \ } \ /* Restore function value pointers to the initial state. */ \ for (int W = 0; W < lfc->nW; W++) \ volume_W[W].A_gm -= lfc->ngm_W[W]; \ } #endif gpaw-24.1.0/c/lfc2.c000066400000000000000000000314611454550013000137520ustar00rootroot00000000000000/* Copyright (C) 2010 CAMd * Please see the accompanying LICENSE file for further information. */ #include "extensions.h" #include "spline.h" #include "lfc.h" #include "bmgs/spherical_harmonics.h" PyObject* second_derivative(LFCObject *lfc, PyObject *args) { PyArrayObject* a_G_obj; PyArrayObject* c_Mvv_obj; PyArrayObject* h_cv_obj; PyArrayObject* n_c_obj; PyObject* spline_M_obj; PyArrayObject* beg_c_obj; PyArrayObject* pos_Wc_obj; int q; if (!PyArg_ParseTuple(args, "OOOOOOOi", &a_G_obj, &c_Mvv_obj, &h_cv_obj, &n_c_obj, &spline_M_obj, &beg_c_obj, &pos_Wc_obj, &q)) return NULL; // Copied from derivative member function int nd = PyArray_NDIM(a_G_obj); npy_intp* dims = PyArray_DIMS(a_G_obj); int nx = PyArray_MultiplyList(dims, nd - 3); int nG = PyArray_MultiplyList(dims + nd - 3, 3); int nM = PyArray_DIM(c_Mvv_obj, PyArray_NDIM(c_Mvv_obj) - 2); // These were already present const double* h_cv = (const double*)PyArray_DATA(h_cv_obj); const long* n_c = (const long*)PyArray_DATA(n_c_obj); const double (*pos_Wc)[3] = (const double (*)[3])PyArray_DATA(pos_Wc_obj); long* beg_c = LONGP(beg_c_obj); /////////////////////////////////////////////// const double Y00dv = lfc->dv / sqrt(4.0 * M_PI); if (!lfc->bloch_boundary_conditions) { const double* a_G = (const double*)PyArray_DATA(a_G_obj); double* c_Mvv = (double*)PyArray_DATA(c_Mvv_obj); // Loop over number of x-dimension in a_xG (not relevant yet) for (int x = 0; x < nx; x++) { // JJs old stuff GRID_LOOP_START(lfc, -1, 0) { // In one grid loop iteration, only i2 changes. int i2 = Ga % n_c[2] + beg_c[2]; int i1 = (Ga / n_c[2]) % n_c[1] + beg_c[1]; int i0 = Ga / (n_c[2] * n_c[1]) + beg_c[0]; double xG = h_cv[0] * i0 + h_cv[3] * i1 + h_cv[6] * i2; double yG = h_cv[1] * i0 + h_cv[4] * i1 + h_cv[7] * i2; double zG = h_cv[2] * i0 + h_cv[5] * i1 + h_cv[8] * i2; for (int G = Ga; G < Gb; G++) { for (int i = 0; i < ni; i++) { LFVolume* vol = volume_i[i]; int M = vol->M; double* c_mvv = c_Mvv + 9 * M; const bmgsspline* spline = (const bmgsspline*) \ &((const SplineObject*)PyList_GetItem(spline_M_obj, M))->spline; double x = xG - pos_Wc[vol->W][0]; double y = yG - pos_Wc[vol->W][1]; double z = zG - pos_Wc[vol->W][2]; double r2 = x * x + y * y + z * z; double r = sqrt(r2); int bin = r / spline->dr; assert(bin <= spline->nbins); double* s = spline->data + 4 * bin; double u = r - bin * spline->dr; double dfdror; if (bin == 0) dfdror = 2.0 * s[2] + 3.0 * s[3] * r; else dfdror = (s[1] + u * (2.0 * s[2] + u * 3.0 * s[3])) / r; double a = a_G[G] * Y00dv; dfdror *= a; c_mvv[0] += dfdror; c_mvv[4] += dfdror; c_mvv[8] += dfdror; if (r > 1e-15) { double b = ((2.0 * s[2] + 6.0 * s[3] * u) * a - dfdror) / r2; c_mvv[0] += b * x * x; c_mvv[1] += b * x * y; c_mvv[2] += b * x * z; c_mvv[3] += b * y * x; c_mvv[4] += b * y * y; c_mvv[5] += b * y * z; c_mvv[6] += b * z * x; c_mvv[7] += b * z * y; c_mvv[8] += b * z * z; } } xG += h_cv[6]; yG += h_cv[7]; zG += h_cv[8]; } } GRID_LOOP_STOP(lfc, -1, 0); c_Mvv += 9 * nM; a_G += nG; } } else { const complex double* a_G = (const complex double*)PyArray_DATA(a_G_obj); complex double* c_Mvv = (complex double*)PyArray_DATA(c_Mvv_obj); for (int x = 0; x < nx; x++) { GRID_LOOP_START(lfc, q, 0) { // In one grid loop iteration, only i2 changes. int i2 = Ga % n_c[2] + beg_c[2]; int i1 = (Ga / n_c[2]) % n_c[1] + beg_c[1]; int i0 = Ga / (n_c[2] * n_c[1]) + beg_c[0]; double xG = h_cv[0] * i0 + h_cv[3] * i1 + h_cv[6] * i2; double yG = h_cv[1] * i0 + h_cv[4] * i1 + h_cv[7] * i2; double zG = h_cv[2] * i0 + h_cv[5] * i1 + h_cv[8] * i2; for (int G = Ga; G < Gb; G++) { for (int i = 0; i < ni; i++) { LFVolume* vol = volume_i[i]; int M = vol->M; complex double* c_mvv = c_Mvv + 9 * M; const bmgsspline* spline = (const bmgsspline*) \ &((const SplineObject*)PyList_GetItem(spline_M_obj, M))->spline; double x = xG - pos_Wc[vol->W][0]; double y = yG - pos_Wc[vol->W][1]; double z = zG - pos_Wc[vol->W][2]; double r2 = x * x + y * y + z * z; double r = sqrt(r2); double dfdror; // use bmgs_get_value_and_derivative instead ??!! int bin = r / spline->dr; assert(bin <= spline->nbins); double u = r - bin * spline->dr; double* s = spline->data + 4 * bin; if (bin == 0) dfdror = 2.0 * s[2] + 3.0 * s[3] * r; else dfdror = (s[1] + u * (2.0 * s[2] + u * 3.0 * s[3])) / r; // phase added here complex double a = a_G[G] * phase_i[i] * Y00dv; // dfdror *= a; c_mvv[0] += a * dfdror; c_mvv[4] += a * dfdror; c_mvv[8] += a * dfdror; if (r > 1e-15) { double b = (2.0 * s[2] + 6.0 * s[3] * u - dfdror) / r2; c_mvv[0] += a * b * x * x; c_mvv[1] += a * b * x * y; c_mvv[2] += a * b * x * z; c_mvv[3] += a * b * y * x; c_mvv[4] += a * b * y * y; c_mvv[5] += a * b * y * z; c_mvv[6] += a * b * z * x; c_mvv[7] += a * b * z * y; c_mvv[8] += a * b * z * z; } } xG += h_cv[6]; yG += h_cv[7]; zG += h_cv[8]; } } GRID_LOOP_STOP(lfc, q, 0); c_Mvv += 9 * nM; a_G += nG; } } Py_RETURN_NONE; } PyObject* add_derivative(LFCObject *lfc, PyObject *args) { // Coefficients for the lfc's PyArrayObject* c_xM_obj; // Array PyArrayObject* a_xG_obj; PyArrayObject* h_cv_obj; PyArrayObject* n_c_obj; PyObject* spline_M_obj; PyArrayObject* beg_c_obj; PyArrayObject* pos_Wc_obj; // Atom index int a; // Cartesian coordinate int v; // k-point index int q; if (!PyArg_ParseTuple(args, "OOOOOOOiii", &c_xM_obj, &a_xG_obj, &h_cv_obj, &n_c_obj, &spline_M_obj, &beg_c_obj, &pos_Wc_obj, &a, &v, &q)) return NULL; // Number of dimensions int nd = PyArray_NDIM(a_xG_obj); // Array with lengths of array dimensions npy_intp* dims = PyArray_DIMS(a_xG_obj); // Number of extra dimensions int nx = PyArray_MultiplyList(dims, nd - 3); // Number of grid points int nG = PyArray_MultiplyList(dims + nd - 3, 3); // Number of lfc's int nM = PyArray_DIM(c_xM_obj, PyArray_NDIM(c_xM_obj) - 1); const double* h_cv = (const double*)PyArray_DATA(h_cv_obj); const long* n_c = (const long*)PyArray_DATA(n_c_obj); const double (*pos_Wc)[3] = (const double (*)[3])PyArray_DATA(pos_Wc_obj); long* beg_c = LONGP(beg_c_obj); if (!lfc->bloch_boundary_conditions) { const double* c_M = (const double*)PyArray_DATA(c_xM_obj); double* a_G = (double*)PyArray_DATA(a_xG_obj); for (int x = 0; x < nx; x++) { GRID_LOOP_START(lfc, -1, 0) { // In one grid loop iteration, only i2 changes. int i2 = Ga % n_c[2] + beg_c[2]; int i1 = (Ga / n_c[2]) % n_c[1] + beg_c[1]; int i0 = Ga / (n_c[2] * n_c[1]) + beg_c[0]; // Grid point position double xG = h_cv[0] * i0 + h_cv[3] * i1 + h_cv[6] * i2; double yG = h_cv[1] * i0 + h_cv[4] * i1 + h_cv[7] * i2; double zG = h_cv[2] * i0 + h_cv[5] * i1 + h_cv[8] * i2; // Loop over grid points in current stride for (int G = Ga; G < Gb; G++) { // Loop over volumes at current grid point for (int i = 0; i < ni; i++) { LFVolume* vol = volume_i[i]; int M = vol->M; // Check that the volume belongs to the atom in consideration later int W = vol->W; int nm = vol->nm; int l = (nm - 1) / 2; const bmgsspline* spline = (const bmgsspline*) \ &((const SplineObject*)PyList_GetItem(spline_M_obj, M))->spline; double x = xG - pos_Wc[W][0]; double y = yG - pos_Wc[W][1]; double z = zG - pos_Wc[W][2]; double R_c[] = {x, y, z}; double r2 = x * x + y * y + z * z; double r = sqrt(r2); double f; double dfdr; bmgs_get_value_and_derivative(spline, r, &f, &dfdr); // First contribution: f * d(r^l * Y)/dv double fdrlYdx_m[nm]; if (v == 0) spherical_harmonics_derivative_x(l, f, x, y, z, r2, fdrlYdx_m); else if (v == 1) spherical_harmonics_derivative_y(l, f, x, y, z, r2, fdrlYdx_m); else spherical_harmonics_derivative_z(l, f, x, y, z, r2, fdrlYdx_m); for (int m = 0; m < nm; m++) a_G[G] += fdrlYdx_m[m] * c_M[M + m]; // Second contribution: r^(l-1) * Y * df/dr * R_v if (r > 1e-15) { double rlm1Ydfdr_m[nm]; // r^(l-1) * Y * df/dr double rm1dfdr = 1. / r * dfdr; spherical_harmonics(l, rm1dfdr, x, y, z, r2, rlm1Ydfdr_m); for (int m = 0; m < nm; m++) a_G[G] += rlm1Ydfdr_m[m] * R_c[v] * c_M[M + m]; } } // Update coordinates of current grid point xG += h_cv[6]; yG += h_cv[7]; zG += h_cv[8]; } } GRID_LOOP_STOP(lfc, -1, 0); c_M += nM; a_G += nG; } } else { const double complex* c_M = (const double complex*)PyArray_DATA(c_xM_obj); double complex* a_G = (double complex*)PyArray_DATA(a_xG_obj); for (int x = 0; x < nx; x++) { GRID_LOOP_START(lfc, q, 0) { // In one grid loop iteration, only i2 changes. int i2 = Ga % n_c[2] + beg_c[2]; int i1 = (Ga / n_c[2]) % n_c[1] + beg_c[1]; int i0 = Ga / (n_c[2] * n_c[1]) + beg_c[0]; // Grid point position double xG = h_cv[0] * i0 + h_cv[3] * i1 + h_cv[6] * i2; double yG = h_cv[1] * i0 + h_cv[4] * i1 + h_cv[7] * i2; double zG = h_cv[2] * i0 + h_cv[5] * i1 + h_cv[8] * i2; // Loop over grid points in current stride for (int G = Ga; G < Gb; G++) { // Loop over volumes at current grid point for (int i = 0; i < ni; i++) { // Phase of volume double complex conjphase = conj(phase_i[i]); LFVolume* vol = volume_i[i]; int M = vol->M; // Check that the volume belongs to the atom in consideration later int W = vol->W; int nm = vol->nm; int l = (nm - 1) / 2; const bmgsspline* spline = (const bmgsspline*) \ &((const SplineObject*)PyList_GetItem(spline_M_obj, M))->spline; double x = xG - pos_Wc[W][0]; double y = yG - pos_Wc[W][1]; double z = zG - pos_Wc[W][2]; double R_c[] = {x, y, z}; double r2 = x * x + y * y + z * z; double r = sqrt(r2); double f; double dfdr; bmgs_get_value_and_derivative(spline, r, &f, &dfdr); // First contribution: f * d(r^l * Y)/dv double fdrlYdx_m[nm]; if (v == 0) spherical_harmonics_derivative_x(l, f, x, y, z, r2, fdrlYdx_m); else if (v == 1) spherical_harmonics_derivative_y(l, f, x, y, z, r2, fdrlYdx_m); else spherical_harmonics_derivative_z(l, f, x, y, z, r2, fdrlYdx_m); for (int m = 0; m < nm; m++) a_G[G] += fdrlYdx_m[m] * c_M[M + m] * conjphase; // Second contribution: r^(l-1) * Y * df/dr * R_v if (r > 1e-15) { double rlm1Ydfdr_m[nm]; // r^(l-1) * Y * df/dr double rm1dfdr = 1. / r * dfdr; spherical_harmonics(l, rm1dfdr, x, y, z, r2, rlm1Ydfdr_m); for (int m = 0; m < nm; m++) a_G[G] += rlm1Ydfdr_m[m] * R_c[v] * c_M[M + m] * conjphase; } } // Update coordinates of current grid point xG += h_cv[6]; yG += h_cv[7]; zG += h_cv[8]; } } GRID_LOOP_STOP(lfc, q, 0); c_M += nM; a_G += nG; } } Py_RETURN_NONE; } gpaw-24.1.0/c/main.c000066400000000000000000000050341454550013000140450ustar00rootroot00000000000000#include "_gpaw.h" int gpaw_main() { int status = -1; PyObject *gpaw_mod = NULL, *pymain = NULL; gpaw_mod = PyImport_ImportModule("gpaw"); if(gpaw_mod == NULL) { status = 3; // Basic import failure } else { pymain = PyObject_GetAttrString(gpaw_mod, "main"); } if(pymain == NULL) { status = 4; // gpaw.main does not exist for some reason //PyErr_Print(); } else { // Returns Py_None or NULL (error after calling user script) // We already imported the Python parts of numpy. If we want, we can // later attempt to broadcast the numpy C API imports, too. // However I don't know how many files they are, and we need to // figure out how to broadcast extension modules (shared objects). import_array1(0); PyObject *pyreturn = PyObject_CallFunction(pymain, ""); status = (pyreturn == NULL); Py_XDECREF(pyreturn); } Py_XDECREF(pymain); Py_XDECREF(gpaw_mod); return status; } int main(int argc, char **argv) { #ifdef GPAW_GPU int granted; MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &granted); if (granted < MPI_THREAD_MULTIPLE) exit(1); #else #ifndef _OPENMP MPI_Init(&argc, &argv); #else int granted; MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &granted); if (granted != MPI_THREAD_MULTIPLE) exit(1); #endif #endif #define PyChar wchar_t wchar_t* wargv[argc]; wchar_t* wargv2[argc]; for (int i = 0; i < argc; i++) { int n = 1 + mbstowcs(NULL, argv[i], 0); wargv[i] = (wchar_t*)malloc(n * sizeof(wchar_t)); wargv2[i] = wargv[i]; mbstowcs(wargv[i], argv[i], n); } Py_SetProgramName(wargv[0]); PyImport_AppendInittab("_gpaw", &moduleinit); Py_Initialize(); PySys_SetArgvEx(argc, wargv, 0); #ifdef GPAW_WITH_ELPA // Globally initialize Elpa library if present: if (elpa_init(20171201) != ELPA_OK) { // What API versions do we support? PyErr_SetString(PyExc_RuntimeError, "Elpa >= 20171201 required"); PyErr_Print(); return 1; } #endif int status = gpaw_main(); if(status != 0) { PyErr_Print(); } #ifdef GPAW_WITH_ELPA #ifdef ELPA_API_VERSION // Newer Elpas define their version but older ones don't. int elpa_err; elpa_uninit(&elpa_err); #else elpa_uninit(); // 2018.05.001: no errcode #endif #endif Py_Finalize(); MPI_Finalize(); for (int i = 0; i < argc; i++) free(wargv2[i]); return status; } gpaw-24.1.0/c/mpi.c000066400000000000000000001213101454550013000137020ustar00rootroot00000000000000/* Copyright (C) 2003-2007 CAMP * Copyright (C) 2007-2009 CAMd * Copyright (C) 2005-2009 CSC - IT Center for Science Ltd. * Please see the accompanying LICENSE file for further information. */ #define PY_SSIZE_T_CLEAN #include #ifdef PARALLEL #define PY_ARRAY_UNIQUE_SYMBOL GPAW_ARRAY_API #define NO_IMPORT_ARRAY #include #include #include "extensions.h" #include #include "mympi.h" #ifdef __bgp__ #include #endif // Wrappers to support GPU_AWARE_MPI #ifdef GPAW_GPU_AWARE_MPI #define GPAW_ARRAY_ALLOW_CUPY #endif #include "array.h" #ifdef GPAW_MPI2 #ifndef GPAW_MPI_INPLACE #error "Deprecated: Define or undefine GPAW_MPI_INPLACE, instead of using GPAW_MPI2." #endif #endif void gpawDeviceSynchronize(); // Check that a processor number is valid #define CHK_PROC(n) if (n < 0 || n >= self->size) {\ PyErr_SetString(PyExc_ValueError, "Invalid processor number."); \ return NULL; } else // Check that a processor number is valid or is -1 #define CHK_PROC_DEF(n) if (n < -1 || n >= self->size) {\ PyErr_SetString(PyExc_ValueError, "Invalid processor number."); \ return NULL; } else // Check that a processor number is valid and is not this processor #define CHK_OTHER_PROC(n) if (n < 0 || n >= self->size || n == self->rank) { \ PyErr_SetString(PyExc_ValueError, "Invalid processor number."); \ return NULL; } else // MPI request object, so we can store a reference to the buffer, // preventing its early deallocation. typedef struct { PyObject_HEAD MPI_Request rq; PyObject *buffer; int status; } GPAW_MPI_Request; static void maybeSynchronize(PyObject* a) { #ifdef GPAW_GPU_AWARE_MPI if (!PyArray_Check(a)) { gpawDeviceSynchronize(); } #endif } static PyObject *mpi_request_wait(GPAW_MPI_Request *self, PyObject *noargs) { if (self->status == 0) { // Calling wait multiple times is allowed but meaningless (as in the MPI standard) Py_RETURN_NONE; } int ret = MPI_Wait(&(self->rq), MPI_STATUS_IGNORE); if (ret != MPI_SUCCESS) { PyErr_SetString(PyExc_RuntimeError, "MPI_Wait error occurred."); return NULL; } Py_DECREF(self->buffer); self->status = 0; Py_RETURN_NONE; } static PyObject *mpi_request_test(GPAW_MPI_Request *self, PyObject *noargs) { if (self->status == 0) { Py_RETURN_TRUE; // Already completed } int flag; int ret = MPI_Test(&(self->rq), &flag, MPI_STATUS_IGNORE); // Can this change the Python string? if (ret != MPI_SUCCESS) { PyErr_SetString(PyExc_RuntimeError, "MPI_Test error occurred."); return NULL; } if (flag) { Py_DECREF(self->buffer); self->status = 0; Py_RETURN_TRUE; } else { Py_RETURN_FALSE; } } static void mpi_request_dealloc(GPAW_MPI_Request *self) { if (self->status) { PyObject *none = mpi_request_wait(self, NULL); Py_DECREF(none); } PyObject_Del(self); } static PyMemberDef mpi_request_members[] = { {"status", T_INT, offsetof(GPAW_MPI_Request, status), READONLY, "status of the request, non-zero if communication is pending."}, {NULL} }; static PyMethodDef mpi_request_methods[] = { {"wait", (PyCFunction) mpi_request_wait, METH_NOARGS, "Wait for the communication to complete." }, {"test", (PyCFunction) mpi_request_test, METH_NOARGS, "Test if the communication has completed." }, {NULL} }; PyTypeObject GPAW_MPI_Request_type = { PyVarObject_HEAD_INIT(NULL, 0) "MPI_Request", /*tp_name*/ sizeof(GPAW_MPI_Request), /*tp_basicsize*/ 0, /*tp_itemsize*/ (destructor)mpi_request_dealloc, /*tp_dealloc*/ 0, /*tp_print*/ 0, /*tp_getattr*/ 0, /*tp_setattr*/ 0, /*tp_compare*/ 0, /*tp_repr*/ 0, /*tp_as_number*/ 0, /*tp_as_sequence*/ 0, /*tp_as_mapping*/ 0, /*tp_hash */ 0, /*tp_call*/ 0, /*tp_str*/ 0, /*tp_getattro*/ 0, /*tp_setattro*/ 0, /*tp_as_buffer*/ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/ "MPI request object", /* tp_doc */ 0, /* tp_traverse */ 0, /* tp_clear */ 0, /* tp_richcompare */ 0, /* tp_weaklistoffset */ 0, /* tp_iter */ 0, /* tp_iternext */ mpi_request_methods, /* tp_methods */ mpi_request_members, 0, /* tp_getset */ 0, /* tp_base */ 0, /* tp_dict */ 0, /* tp_descr_get */ 0, /* tp_descr_set */ 0, /* tp_dictoffset */ 0, /* tp_init */ 0, /* tp_alloc */ 0, /* tp_new */ }; static GPAW_MPI_Request *NewMPIRequest(void) { GPAW_MPI_Request *self; self = PyObject_NEW(GPAW_MPI_Request, &GPAW_MPI_Request_type); if (self == NULL) return NULL; memset(&(self->rq), 0, sizeof(MPI_Request)); self->buffer = NULL; self->status = 1; // Active return self; } static void mpi_ensure_finalized(void) { int already_finalized = 1; int ierr = MPI_SUCCESS; MPI_Finalized(&already_finalized); if (!already_finalized) { ierr = MPI_Finalize(); } if (ierr != MPI_SUCCESS) PyErr_SetString(PyExc_RuntimeError, "MPI_Finalize error occurred"); } // MPI initialization static void mpi_ensure_initialized(void) { int already_initialized = 1; int ierr = MPI_SUCCESS; // Check whether MPI is already initialized MPI_Initialized(&already_initialized); if (!already_initialized) { // if not, let's initialize it int use_threads = 0; #ifdef GPAW_GPU use_threads = 1; #endif #ifdef _OPENMP use_threads = 1; #endif if (!use_threads) { ierr = MPI_Init(NULL, NULL); if (ierr == MPI_SUCCESS) { // No problem: register finalization when at Python exit Py_AtExit(*mpi_ensure_finalized); } else { // We have a problem: raise an exception char err[MPI_MAX_ERROR_STRING]; int resultlen; MPI_Error_string(ierr, err, &resultlen); PyErr_SetString(PyExc_RuntimeError, err); } } else { int granted; ierr = MPI_Init_thread(NULL, NULL, MPI_THREAD_MULTIPLE, &granted); if (ierr == MPI_SUCCESS && granted == MPI_THREAD_MULTIPLE) { // No problem: register finalization when at Python exit Py_AtExit(*mpi_ensure_finalized); } else if (granted != MPI_THREAD_MULTIPLE) { // We have a problem: raise an exception char err[MPI_MAX_ERROR_STRING] = "MPI_THREAD_MULTIPLE is not supported"; PyErr_SetString(PyExc_RuntimeError, err); } else { // We have a problem: raise an exception char err[MPI_MAX_ERROR_STRING]; int resultlen; MPI_Error_string(ierr, err, &resultlen); PyErr_SetString(PyExc_RuntimeError, err); } } } } static void mpi_dealloc(MPIObject *obj) { if (obj->comm != MPI_COMM_WORLD) MPI_Comm_free(&(obj->comm)); Py_XDECREF(obj->parent); free(obj->members); PyObject_DEL(obj); } static PyObject * mpi_sendreceive(MPIObject *self, PyObject *args, PyObject *kwargs) { PyObject* a; PyObject* b; int dest, src; int sendtag = 123; int recvtag = 123; static char *kwlist[] = {"a", "dest", "b", "src", "sendtag", "recvtag", NULL}; if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OiOi|ii:sendreceive", kwlist, &a, &dest, &b, &src, &sendtag, &recvtag)) return NULL; CHK_ARRAY(a); CHK_OTHER_PROC(dest); CHK_ARRAY(b); CHK_OTHER_PROC(src); int nsend = Array_ITEMSIZE(a); for (int d = 0; d < Array_NDIM(a); d++) nsend *= Array_DIM(a,d); int nrecv = Array_ITEMSIZE(b); for (int d = 0; d < Array_NDIM(b); d++) nrecv *= Array_DIM(b,d); maybeSynchronize(a); int ret = MPI_Sendrecv(Array_BYTES(a), nsend, MPI_BYTE, dest, sendtag, Array_BYTES(b), nrecv, MPI_BYTE, src, recvtag, self->comm, MPI_STATUS_IGNORE); if (ret != MPI_SUCCESS) { PyErr_SetString(PyExc_RuntimeError, "MPI_Sendrecv error occurred."); return NULL; } Py_RETURN_NONE; } static PyObject * mpi_receive(MPIObject *self, PyObject *args, PyObject *kwargs) { PyObject* a; int src; int tag = 123; int block = 1; static char *kwlist[] = {"a", "src", "tag", "block", NULL}; if (!PyArg_ParseTupleAndKeywords(args, kwargs, "Oi|ii:receive", kwlist, &a, &src, &tag, &block)) return NULL; CHK_ARRAY(a); CHK_OTHER_PROC(src); int n = Array_ITEMSIZE(a); for (int d = 0; d < Array_NDIM(a); d++) n *= Array_DIM(a, d); if (block) { maybeSynchronize(a); int ret = MPI_Recv(Array_BYTES(a), n, MPI_BYTE, src, tag, self->comm, MPI_STATUS_IGNORE); if (ret != MPI_SUCCESS) { PyErr_SetString(PyExc_RuntimeError, "MPI_Recv error occurred."); return NULL; } Py_RETURN_NONE; } else { GPAW_MPI_Request *req = NewMPIRequest(); if (req == NULL) return NULL; req->buffer = (PyObject*)a; Py_INCREF(req->buffer); maybeSynchronize(a); int ret = MPI_Irecv(Array_BYTES(a), n, MPI_BYTE, src, tag, self->comm, &(req->rq)); if (ret != MPI_SUCCESS) { PyErr_SetString(PyExc_RuntimeError, "MPI_Irecv error occurred."); return NULL; } return (PyObject *) req; } } static PyObject * mpi_send(MPIObject *self, PyObject *args, PyObject *kwargs) { PyObject* a; int dest; int tag = 123; int block = 1; static char *kwlist[] = {"a", "dest", "tag", "block", NULL}; if (!PyArg_ParseTupleAndKeywords(args, kwargs, "Oi|ii:send", kwlist, &a, &dest, &tag, &block)) return NULL; CHK_ARRAY(a); CHK_OTHER_PROC(dest); int n = Array_ITEMSIZE(a); for (int d = 0; d < Array_NDIM(a); d++) n *= Array_DIM(a,d); if (block) { maybeSynchronize(a); int ret = MPI_Send(Array_BYTES(a), n, MPI_BYTE, dest, tag, self->comm); if (ret != MPI_SUCCESS) { PyErr_SetString(PyExc_RuntimeError, "MPI_Send error occurred."); return NULL; } Py_RETURN_NONE; } else { GPAW_MPI_Request *req = NewMPIRequest(); req->buffer = (PyObject*)a; Py_INCREF(a); maybeSynchronize(a); int ret = MPI_Isend(Array_BYTES(a), n, MPI_BYTE, dest, tag, self->comm, &(req->rq)); if (ret != MPI_SUCCESS) { PyErr_SetString(PyExc_RuntimeError, "MPI_Isend error occurred."); return NULL; } return (PyObject *)req; } } static PyObject * mpi_ssend(MPIObject *self, PyObject *args, PyObject *kwargs) { PyObject* a; int dest; int tag = 123; static char *kwlist[] = {"a", "dest", "tag", NULL}; if (!PyArg_ParseTupleAndKeywords(args, kwargs, "Oi|i:send", kwlist, &a, &dest, &tag)) return NULL; CHK_ARRAY_RO(a); CHK_OTHER_PROC(dest); int n = Array_ITEMSIZE(a); for (int d = 0; d < Array_NDIM(a); d++) n *= Array_DIM(a,d); maybeSynchronize(a); MPI_Ssend(Array_BYTES(a), n, MPI_BYTE, dest, tag, self->comm); Py_RETURN_NONE; } static PyObject * mpi_name(MPIObject *self, PyObject *noargs) { char name[MPI_MAX_PROCESSOR_NAME]; int resultlen; MPI_Get_processor_name(name, &resultlen); return Py_BuildValue("s#", name, (Py_ssize_t)resultlen); } static PyObject * mpi_abort(MPIObject *self, PyObject *args) { int errcode; if (!PyArg_ParseTuple(args, "i:abort", &errcode)) return NULL; MPI_Abort(self->comm, errcode); Py_RETURN_NONE; } static PyObject * mpi_barrier(MPIObject *self) { MPI_Barrier(self->comm); Py_RETURN_NONE; } static PyObject * mpi_test(MPIObject *self, PyObject *args) { GPAW_MPI_Request* s; if (!PyArg_ParseTuple(args, "O!:wait", &GPAW_MPI_Request_type, &s)) return NULL; return mpi_request_test(s, NULL); } static PyObject * mpi_testall(MPIObject *self, PyObject *requests) { int n; // Number of requests MPI_Request *rqs = NULL; int flag = 0; if (!PySequence_Check(requests)) { PyErr_SetString(PyExc_TypeError, "mpi.testall: argument must be a sequence"); return NULL; } // Extract the request objects n = PySequence_Size(requests); assert(n >= 0); // This cannot fail. rqs = GPAW_MALLOC(MPI_Request, n); assert(rqs != NULL); for (int i = 0; i < n; i++) { PyObject *o = PySequence_GetItem(requests, i); if (o == NULL) return NULL; if (Py_TYPE(o) != &GPAW_MPI_Request_type) { Py_DECREF(o); free(rqs); PyErr_SetString(PyExc_TypeError, "mpi.testall: argument must be a sequence of MPI requests"); return NULL; } GPAW_MPI_Request *s = (GPAW_MPI_Request *)o; rqs[i] = s->rq; Py_DECREF(o); } // Do the actual test. int ret = MPI_Testall(n, rqs, &flag, MPI_STATUSES_IGNORE); if (ret != MPI_SUCCESS) { // We do not dare to release the buffers now! PyErr_SetString(PyExc_RuntimeError, "MPI_Testall error occurred."); return NULL; } // Unlike MPI_Test, if flag outcome is non-zero, MPI_Testall will deallocate // all requests which were allocated by nonblocking communication calls, so // we must free these buffers. Otherwise, none of the requests are modified. if (flag != 0) { // Release the buffers used by the MPI communication for (int i = 0; i < n; i++) { GPAW_MPI_Request *o = (GPAW_MPI_Request *) PySequence_GetItem(requests, i); if (o->status) { assert(o->buffer != NULL); Py_DECREF(o->buffer); } o->status = 0; Py_DECREF(o); } } // Release internal data and return. free(rqs); return Py_BuildValue("i", flag); } static PyObject * mpi_wait(MPIObject *self, PyObject *args) { GPAW_MPI_Request* s; if (!PyArg_ParseTuple(args, "O!:wait", &GPAW_MPI_Request_type, &s)) return NULL; return mpi_request_wait(s, NULL); } static PyObject * mpi_waitall(MPIObject *self, PyObject *requests) { int n; // Number of requests MPI_Request *rqs = NULL; if (!PySequence_Check(requests)) { PyErr_SetString(PyExc_TypeError, "mpi.waitall: argument must be a sequence"); return NULL; } // Extract the request objects n = PySequence_Size(requests); assert(n >= 0); // This cannot fail. rqs = GPAW_MALLOC(MPI_Request, n); for (int i = 0; i < n; i++) { PyObject *o = PySequence_GetItem(requests, i); if (o == NULL) return NULL; if (Py_TYPE(o) != &GPAW_MPI_Request_type) { Py_DECREF(o); free(rqs); PyErr_SetString(PyExc_TypeError, "mpi.waitall: argument must be a sequence of MPI requests"); return NULL; } GPAW_MPI_Request *s = (GPAW_MPI_Request *)o; rqs[i] = s->rq; Py_DECREF(o); } int ret = MPI_Waitall(n, rqs, MPI_STATUSES_IGNORE); if (ret != MPI_SUCCESS) { // We do not dare to release the buffers now! PyErr_SetString(PyExc_RuntimeError, "MPI_Waitall error occurred."); return NULL; } // Release the buffers used by the MPI communication for (int i = 0; i < n; i++) { GPAW_MPI_Request *o = (GPAW_MPI_Request *) PySequence_GetItem(requests, i); if (o->status) { assert(o->buffer != NULL); Py_DECREF(o->buffer); } o->status = 0; Py_DECREF(o); } // Release internal data and return. free(rqs); Py_RETURN_NONE; } static MPI_Datatype get_mpi_datatype(PyObject *a) { int n = Array_ITEMSIZE(a); if (Array_ISCOMPLEX(a)) n = n / 2; int array_type = Array_TYPE(a); switch(array_type) { // Floating point numbers including complex numbers case NPY_DOUBLE: case NPY_CDOUBLE: assert(sizeof(double) == n); return MPI_DOUBLE; case NPY_FLOAT: case NPY_CFLOAT: assert(sizeof(float) == n); return MPI_FLOAT; case NPY_LONGDOUBLE: case NPY_CLONGDOUBLE: assert(sizeof(long double) == n); return MPI_LONG_DOUBLE; // Signed integer types case NPY_BYTE: assert(sizeof(char) == n); return MPI_CHAR; case NPY_SHORT: assert(sizeof(short) == n); return MPI_SHORT; case NPY_INT: assert(sizeof(int) == n); return MPI_INT; case NPY_LONG: assert(sizeof(long) == n); return MPI_LONG; // Unsigned integer types case NPY_BOOL: case NPY_UBYTE: assert(sizeof(unsigned char) == n); return MPI_UNSIGNED_CHAR; case NPY_USHORT: assert(sizeof(unsigned short) == n); return MPI_UNSIGNED_SHORT; case NPY_UINT: assert(sizeof(unsigned) == n); return MPI_UNSIGNED; case NPY_ULONG: assert(sizeof(unsigned long) == n); return MPI_UNSIGNED_LONG; } // If we reach this point none of the cases worked out. PyErr_SetString(PyExc_ValueError, "Cannot communicate data of this type."); return 0; } static PyObject * mpi_reduce(MPIObject *self, PyObject *args, PyObject *kwargs, MPI_Op operation, int allowcomplex) { #ifdef GPAW_MPI_DEBUG MPI_Barrier(self->comm); #endif PyObject* obj; int root = -1; static char *kwlist[] = {"a", "root", NULL}; if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|i:reduce", kwlist, &obj, &root)) return NULL; CHK_PROC_DEF(root); if (PyFloat_Check(obj)) { double din = PyFloat_AS_DOUBLE(obj); double dout; if (root == -1) MPI_Allreduce(&din, &dout, 1, MPI_DOUBLE, operation, self->comm); else MPI_Reduce(&din, &dout, 1, MPI_DOUBLE, operation, root, self->comm); return PyFloat_FromDouble(dout); } if (PyLong_Check(obj)) { long din = PyLong_AS_LONG(obj); long dout; if (root == -1) MPI_Allreduce(&din, &dout, 1, MPI_LONG, operation, self->comm); else MPI_Reduce(&din, &dout, 1, MPI_LONG, operation, root, self->comm); return PyLong_FromLong(dout); } else if (PyComplex_Check(obj) && allowcomplex) { double din[2]; double dout[2]; din[0] = PyComplex_RealAsDouble(obj); din[1] = PyComplex_ImagAsDouble(obj); if (root == -1) MPI_Allreduce(&din, &dout, 2, MPI_DOUBLE, MPI_SUM, self->comm); else MPI_Reduce(&din, &dout, 2, MPI_DOUBLE, MPI_SUM, root, self->comm); return PyComplex_FromDoubles(dout[0], dout[1]); } else if (PyComplex_Check(obj)) { PyErr_SetString(PyExc_ValueError, "Operation not allowed on complex numbers"); return NULL; } else // It should be an array { int n; int elemsize; MPI_Datatype datatype; PyObject* aobj = obj; CHK_ARRAY(aobj); datatype = get_mpi_datatype(aobj); if (datatype == 0) return NULL; n = Array_SIZE(aobj); elemsize = Array_ITEMSIZE(aobj); if (Array_ISCOMPLEX(aobj)) { if (allowcomplex) { n *= 2; elemsize /= 2; } else { PyErr_SetString(PyExc_ValueError, "Operation not allowed on complex numbers"); return NULL; } } if (root == -1) { maybeSynchronize(aobj); #ifdef GPAW_MPI_INPLACE MPI_Allreduce(MPI_IN_PLACE, Array_BYTES(aobj), n, datatype, operation, self->comm); #else char* b = GPAW_MALLOC(char, n * elemsize); MPI_Allreduce(Array_BYTES(aobj), b, n, datatype, operation, self->comm); assert(Array_NBYTES(aobj) == n * elemsize); memcpy(Array_BYTES(aobj), b, n * elemsize); free(b); #endif } else { int rank; MPI_Comm_rank(self->comm, &rank); char* b = 0; if (rank == root) { maybeSynchronize(aobj); #ifdef GPAW_MPI_INPLACE MPI_Reduce(MPI_IN_PLACE, Array_BYTES(aobj), n, datatype, operation, root, self->comm); #else b = GPAW_MALLOC(char, n * elemsize); MPI_Reduce(Array_BYTES(aobj), b, n, datatype, operation, root, self->comm); assert(Array_NBYTES(aobj) == n * elemsize); memcpy(Array_BYTES(aobj), b, n * elemsize); free(b); #endif } else { maybeSynchronize(aobj); MPI_Reduce(Array_BYTES(aobj), b, n, datatype, operation, root, self->comm); } } Py_RETURN_NONE; } } static PyObject * mpi_reduce_scalar(MPIObject *self, PyObject *args, PyObject *kwargs, MPI_Op operation, int allowcomplex) { #ifdef GPAW_MPI_DEBUG MPI_Barrier(self->comm); #endif PyObject* obj; int root = -1; static char *kwlist[] = {"a", "root", NULL}; if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|i:reduce", kwlist, &obj, &root)) return NULL; CHK_PROC_DEF(root); if (PyFloat_Check(obj)) { double din = PyFloat_AS_DOUBLE(obj); double dout; if (root == -1) MPI_Allreduce(&din, &dout, 1, MPI_DOUBLE, operation, self->comm); else MPI_Reduce(&din, &dout, 1, MPI_DOUBLE, operation, root, self->comm); return PyFloat_FromDouble(dout); } if (PyLong_Check(obj)) { long din = PyLong_AS_LONG(obj); long dout; if (root == -1) MPI_Allreduce(&din, &dout, 1, MPI_LONG, operation, self->comm); else MPI_Reduce(&din, &dout, 1, MPI_LONG, operation, root, self->comm); return PyLong_FromLong(dout); } else if (PyComplex_Check(obj) && allowcomplex) { double din[2]; double dout[2]; din[0] = PyComplex_RealAsDouble(obj); din[1] = PyComplex_ImagAsDouble(obj); if (root == -1) MPI_Allreduce(&din, &dout, 2, MPI_DOUBLE, MPI_SUM, self->comm); else MPI_Reduce(&din, &dout, 2, MPI_DOUBLE, MPI_SUM, root, self->comm); return PyComplex_FromDoubles(dout[0], dout[1]); } else if (PyComplex_Check(obj)) { PyErr_SetString(PyExc_ValueError, "Operation not allowed on complex numbers"); return NULL; } else // It should be an array { PyErr_SetString(PyExc_ValueError, "Operation not allowed for this datatype for mpi_sum_scalar."); return NULL; } } static PyObject * mpi_sum(MPIObject *self, PyObject *args, PyObject *kwargs) { return mpi_reduce(self, args, kwargs, MPI_SUM, 1); } static PyObject * mpi_sum_scalar(MPIObject *self, PyObject *args, PyObject *kwargs) { return mpi_reduce_scalar(self, args, kwargs, MPI_SUM, 1); } static PyObject * mpi_product(MPIObject *self, PyObject *args, PyObject *kwargs) { // No complex numbers as that would give separate products of // real and imaginary parts. return mpi_reduce(self, args, kwargs, MPI_PROD, 0); } static PyObject * mpi_max(MPIObject *self, PyObject *args, PyObject *kwargs) { return mpi_reduce(self, args, kwargs, MPI_MAX, 0); } static PyObject * mpi_max_scalar(MPIObject *self, PyObject *args, PyObject *kwargs) { return mpi_reduce_scalar(self, args, kwargs, MPI_MAX, 0); } static PyObject * mpi_min(MPIObject *self, PyObject *args, PyObject *kwargs) { return mpi_reduce(self, args, kwargs, MPI_MIN, 0); } static PyObject * mpi_min_scalar(MPIObject *self, PyObject *args, PyObject *kwargs) { return mpi_reduce_scalar(self, args, kwargs, MPI_MIN, 0); } static PyObject * mpi_scatter(MPIObject *self, PyObject *args) { PyObject* sendobj; PyObject* recvobj; int root; if (!PyArg_ParseTuple(args, "OOi:scatter", &sendobj, &recvobj, &root)) return NULL; CHK_ARRAY(recvobj); CHK_PROC(root); char* source = 0; if (self->rank == root) { CHK_ARRAY(sendobj); CHK_ARRAYS(recvobj, sendobj, self->size); // size(send) = size(recv)*Ncpu source = Array_BYTES(sendobj); } int n = Array_ITEMSIZE(recvobj); for (int d = 0; d < Array_NDIM(recvobj); d++) n *= Array_DIM(recvobj,d); maybeSynchronize(recvobj); MPI_Scatter(source, n, MPI_BYTE, Array_BYTES(recvobj), n, MPI_BYTE, root, self->comm); Py_RETURN_NONE; } static PyObject * mpi_allgather(MPIObject *self, PyObject *args) { PyObject* a; PyObject* b; if (!PyArg_ParseTuple(args, "OO:allgather", &a, &b)) return NULL; CHK_ARRAY(a); CHK_ARRAY(b); CHK_ARRAYS(a, b, self->size); int n = Array_ITEMSIZE(a); for (int d = 0; d < Array_NDIM(a); d++) n *= Array_DIM(a,d); // What about endianness???? maybeSynchronize(a); MPI_Allgather(Array_BYTES(a), n, MPI_BYTE, Array_BYTES(b), n, MPI_BYTE, self->comm); Py_RETURN_NONE; } static PyObject * mpi_gather(MPIObject *self, PyObject *args) { PyObject* a; int root; PyObject* b = 0; if (!PyArg_ParseTuple(args, "Oi|O", &a, &root, &b)) return NULL; CHK_ARRAY(a); CHK_PROC(root); if (root == self->rank) { CHK_ARRAY(b); CHK_ARRAYS(a, b, self->size); } else if ((PyObject*)b != Py_None && b != NULL) { fprintf(stderr, "******** Root=%d\n", root); PyErr_SetString(PyExc_ValueError, "mpi_gather: b array should not be given on non-root processors."); return NULL; } int n = Array_ITEMSIZE(a); for (int d = 0; d < Array_NDIM(a); d++) n *= Array_DIM(a,d); maybeSynchronize(a); if (root != self->rank) MPI_Gather(Array_BYTES(a), n, MPI_BYTE, 0, n, MPI_BYTE, root, self->comm); else MPI_Gather(Array_BYTES(a), n, MPI_BYTE, Array_BYTES(b), n, MPI_BYTE, root, self->comm); Py_RETURN_NONE; } static PyObject * mpi_broadcast(MPIObject *self, PyObject *args) { #ifdef GPAW_MPI_DEBUG MPI_Barrier(self->comm); #endif PyObject* buf; int root; if (!PyArg_ParseTuple(args, "Oi:broadcast", &buf, &root)) return NULL; if (root == self->rank) CHK_ARRAY_RO(buf); else CHK_ARRAY(buf); CHK_PROC(root); int n = Array_ITEMSIZE(buf); for (int d = 0; d < Array_NDIM(buf); d++) n *= Array_DIM(buf,d); maybeSynchronize(buf); MPI_Bcast(Array_BYTES(buf), n, MPI_BYTE, root, self->comm); Py_RETURN_NONE; } static PyObject *mpi_compare(MPIObject *self, PyObject *args) { MPIObject* other; int result; char* pyresult; if (!PyArg_ParseTuple(args, "O", &other)) return NULL; MPI_Comm_compare(self->comm, other->comm, &result); if(result == MPI_IDENT) pyresult = "ident"; else if (result == MPI_CONGRUENT) pyresult = "congruent"; else if (result == MPI_SIMILAR) pyresult = "similar"; else if (result == MPI_UNEQUAL) pyresult = "unequal"; else return NULL; return Py_BuildValue("s", pyresult); } static PyObject *mpi_translate_ranks(MPIObject *self, PyObject *args) { PyObject* myranks_anytype; // Conversion to numpy array below MPIObject* other; if (!PyArg_ParseTuple(args, "OO", &other, &myranks_anytype)) return NULL; // XXXXXX This uses NPY_LONG and NPY_INT. On some computers the // returned array is int32 while np.array(..., dtype=int) returns // int64. This should very probably be changed so it always // corresponds to the default int of numpy. // This handling of arrays of ranks is taken from the MPICommunicator // creation method. See that method for explanation of casting, datatypes // etc. PyArrayObject *myranks_long = (PyArrayObject*)PyArray_ContiguousFromAny( myranks_anytype, NPY_LONG, 1, 1); if(myranks_long == NULL) return NULL; int nranks = PyArray_DIM(myranks_long, 0); PyArrayObject *myranks; myranks = (PyArrayObject*)PyArray_Cast(myranks_long, NPY_INT); npy_intp rankshape[1]; rankshape[0] = PyArray_SIZE(myranks); PyArrayObject* other_ranks = (PyArrayObject*)PyArray_SimpleNew(1, rankshape, NPY_INT); MPI_Group mygroup, othergroup; MPI_Comm_group(self->comm, &mygroup); MPI_Comm_group(other->comm, &othergroup); int* rankdata = (int*)PyArray_BYTES(myranks); int* otherrankdata = (int*)PyArray_BYTES(other_ranks); MPI_Group_translate_ranks(mygroup, nranks, rankdata, othergroup, otherrankdata); // Return something with a definite value to Python. for(int i=0; i < nranks; i++) { if(otherrankdata[i] == MPI_UNDEFINED) { otherrankdata[i] = -1; } } PyObject* other_ranks_anytype = PyArray_Cast(other_ranks, PyArray_TYPE((PyArrayObject*)myranks_anytype)); Py_DECREF(myranks_long); Py_DECREF(myranks); Py_DECREF(other_ranks); return (PyObject*)other_ranks_anytype; } static PyObject * mpi_alltoallv(MPIObject *self, PyObject *args) { PyObject* send_obj; PyObject* send_cnts; PyObject* send_displs; PyObject* recv_obj; PyObject* recv_cnts; PyObject* recv_displs; if (!PyArg_ParseTuple(args, "OOOOOO:alltoallv", &send_obj, &send_cnts, &send_displs, &recv_obj, &recv_cnts, &recv_displs)) return NULL; CHK_ARRAY(send_obj); CHK_ARRAY(send_cnts); CHK_ARRAY(send_displs); CHK_ARRAY(recv_obj); CHK_ARRAY(recv_cnts); CHK_ARRAY(recv_displs); int *s_cnts = GPAW_MALLOC(int, self->size); int *s_displs = GPAW_MALLOC(int, self->size); int *r_cnts = GPAW_MALLOC(int, self->size); int *r_displs = GPAW_MALLOC(int, self->size); /* Create count and displacement arrays in units of bytes */ int elem_size = Array_ITEMSIZE(send_obj); long* tmp1 = Array_DATA(send_cnts); long* tmp2 = Array_DATA(send_displs); long* tmp3 = Array_DATA(recv_cnts); long* tmp4 = Array_DATA(recv_displs); for (int i=0; i < self->size; i++) { s_cnts[i] = tmp1[i] * elem_size; s_displs[i] = tmp2[i] * elem_size; r_cnts[i] = tmp3[i] * elem_size; r_displs[i] = tmp4[i] * elem_size; } maybeSynchronize(send_obj); MPI_Alltoallv(Array_BYTES(send_obj), s_cnts, s_displs, MPI_BYTE, Array_BYTES(recv_obj), r_cnts, r_displs, MPI_BYTE, self->comm); free(s_cnts); free(s_displs); free(r_cnts); free(r_displs); Py_RETURN_NONE; } static PyObject * get_members(MPIObject *self, PyObject *args) { PyArrayObject *ranks; npy_intp ranks_dims[1] = {self->size}; ranks = (PyArrayObject *) PyArray_SimpleNew(1, ranks_dims, NPY_INT); if (ranks == NULL) return NULL; memcpy(INTP(ranks), self->members, self->size*sizeof(int)); PyObject* values = Py_BuildValue("O", ranks); Py_DECREF(ranks); return values; } // See the documentation for corresponding function in debug wrapper // for the purpose of this function (gpaw/mpi/__init__.py) static PyObject * get_c_object(MPIObject *self, PyObject *args) { return Py_BuildValue("O", self); } // Forward declaration of MPI_Communicator because it needs MPIType // that needs MPI_getattr that needs MPI_Methods that need // MPI_Communicator that need ... static PyObject * MPICommunicator(MPIObject *self, PyObject *args); static PyMethodDef mpi_methods[] = { {"sendreceive", (PyCFunction)mpi_sendreceive, METH_VARARGS|METH_KEYWORDS, "sendreceive(a, dest, b, src, desttag=123, srctag=123) sends an array a to dest and receives an array b from src."}, {"receive", (PyCFunction)mpi_receive, METH_VARARGS|METH_KEYWORDS, "receive(a, src, tag=123, block=1) receives array a from src."}, {"send", (PyCFunction)mpi_send, METH_VARARGS|METH_KEYWORDS, "send(a, dest, tag=123, block=1) sends array a to dest."}, {"ssend", (PyCFunction)mpi_ssend, METH_VARARGS|METH_KEYWORDS, "ssend(a, dest, tag=123) synchronously sends array a to dest."}, {"abort", (PyCFunction)mpi_abort, METH_VARARGS, "abort(errcode) aborts all MPI tasks."}, {"name", (PyCFunction)mpi_name, METH_NOARGS, "name() returns the name of the processor node."}, {"barrier", (PyCFunction)mpi_barrier, METH_VARARGS, "barrier() synchronizes all MPI tasks"}, {"test", (PyCFunction)mpi_test, METH_VARARGS, "test(request) tests if a nonblocking communication is complete."}, {"testall", (PyCFunction)mpi_testall, METH_O, "testall(list_of_rqs) tests if multiple nonblocking communications are complete."}, {"wait", (PyCFunction)mpi_wait, METH_VARARGS, "wait(request) waits for a nonblocking communication to complete."}, {"waitall", (PyCFunction)mpi_waitall, METH_O, "waitall(list_of_rqs) waits for multiple nonblocking communications to complete."}, {"sum", (PyCFunction)mpi_sum, METH_VARARGS|METH_KEYWORDS, "sum(a, root=-1) sums arrays, result on all tasks unless root is given."}, {"sum_scalar", (PyCFunction)mpi_sum_scalar, METH_VARARGS|METH_KEYWORDS, "sum_scalar(a, root=-1) sums numbers, result on all tasks unless root is given. Returns the sum."}, {"product", (PyCFunction)mpi_product, METH_VARARGS|METH_KEYWORDS, "product(a, root=-1) multiplies arrays, result on all tasks unless root is given."}, {"max", (PyCFunction)mpi_max, METH_VARARGS|METH_KEYWORDS, "max(a, root=-1) maximum of arrays, result on all tasks unless root is given."}, {"max_scalar", (PyCFunction)mpi_max_scalar, METH_VARARGS|METH_KEYWORDS, "max_sclar(a, root=-1) maximum of scalars, result on all tasks unless root is given. Returns the value."}, {"min", (PyCFunction)mpi_min, METH_VARARGS|METH_KEYWORDS, "min(a, root=-1) minimum of arrays, result on all tasks unless root is given."}, {"min_scalar", (PyCFunction)mpi_min_scalar, METH_VARARGS|METH_KEYWORDS, "min_scalar(a, root=-1) minimum of scalars, result on all tasks unless root is given. Returns the value."}, {"scatter", (PyCFunction)mpi_scatter, METH_VARARGS, "scatter(src, target, root) distributes array from root task."}, {"gather", (PyCFunction)mpi_gather, METH_VARARGS, "gather(src, root, target=None) gathers data from all tasks on root task."}, {"all_gather", (PyCFunction)mpi_allgather, METH_VARARGS, "all_gather(src, target) gathers data from all tasks on all tasks."}, {"alltoallv", (PyCFunction)mpi_alltoallv, METH_VARARGS, "alltoallv(sbuf, scnt, sdispl, rbuf, ...) send data from all tasks to all tasks."}, {"broadcast", (PyCFunction)mpi_broadcast, METH_VARARGS, "broadcast(buffer, root) Broadcast data in-place from root task."}, {"compare", (PyCFunction)mpi_compare, METH_VARARGS, "compare two communicators for identity using MPI_Comm_compare."}, {"translate_ranks", (PyCFunction)mpi_translate_ranks, METH_VARARGS, "figure out correspondence between ranks on two communicators."}, {"get_members", (PyCFunction)get_members, METH_VARARGS, 0}, {"get_c_object", (PyCFunction)get_c_object, METH_VARARGS, 0}, {"new_communicator", (PyCFunction)MPICommunicator, METH_VARARGS, "new_communicator(ranks) creates a new communicator."}, {0, 0, 0, 0} }; static PyMemberDef mpi_members[] = { {"size", T_INT, offsetof(MPIObject, size), 0, "Number of processors"}, {"rank", T_INT, offsetof(MPIObject, rank), 0, "Number of this processor"}, {"parent", T_OBJECT_EX, offsetof(MPIObject, parent), 0, "Parent communicator"}, {0, 0, 0, 0, 0} /* Sentinel */ }; // __new__ static PyObject *NewMPIObject(PyTypeObject* type, PyObject *args, PyObject *kwds) { static char *kwlist[] = {NULL}; MPIObject* self; if (! PyArg_ParseTupleAndKeywords(args, kwds, "", kwlist)) return NULL; self = (MPIObject *) type->tp_alloc(type, 0); if (self == NULL) return NULL; mpi_ensure_initialized(); MPI_Comm_size(MPI_COMM_WORLD, &(self->size)); MPI_Comm_rank(MPI_COMM_WORLD, &(self->rank)); self->comm = MPI_COMM_WORLD; Py_INCREF(Py_None); self->parent = Py_None; self->members = (int*) malloc(self->size*sizeof(int)); if (self->members == NULL) return NULL; for (int i=0; isize; i++) self->members[i] = i; return (PyObject *) self; } // __init__ does nothing. static int InitMPIObject(MPIObject* self, PyObject *args, PyObject *kwds) { static char *kwlist[] = {NULL}; if (! PyArg_ParseTupleAndKeywords(args, kwds, "", kwlist)) return -1; return 0; } PyTypeObject MPIType = { PyVarObject_HEAD_INIT(NULL, 0) "MPI", /*tp_name*/ sizeof(MPIObject), /*tp_basicsize*/ 0, /*tp_itemsize*/ (destructor)mpi_dealloc, /*tp_dealloc*/ 0, /*tp_print*/ 0, /*tp_getattr*/ 0, /*tp_setattr*/ 0, /*tp_compare*/ 0, /*tp_repr*/ 0, /*tp_as_number*/ 0, /*tp_as_sequence*/ 0, /*tp_as_mapping*/ 0, /*tp_hash*/ 0, /*tp_call*/ 0, /*tp_str*/ 0, /*tp_getattro*/ 0, /*tp_setattro*/ 0, /*tp_as_buffer*/ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/ "MPI object", /*tp_doc*/ 0, /*tp_traverse*/ 0, /*tp_clear*/ 0, /*tp_richcompare*/ 0, /*tp_weaklistoffset*/ 0, /*tp_iter*/ 0, /*tp_iternext*/ mpi_methods, /*tp_methods*/ mpi_members, /*tp_members*/ 0, /*tp_getset*/ 0, /*tp_base*/ 0, /*tp_dict*/ 0, /*tp_descr_get*/ 0, /*tp_descr_set*/ 0, /*tp_dictoffset*/ (initproc)InitMPIObject, /*tp_init*/ 0, /*tp_alloc*/ NewMPIObject, /*tp_new*/ }; static PyObject * MPICommunicator(MPIObject *self, PyObject *args) { PyObject* orig_ranks; if (!PyArg_ParseTuple(args, "O", &orig_ranks)) return NULL; // NB: int32 is NPY_LONG on 32-bit Linux and NPY_INT on 64-bit Linux! // First convert to NumPy array of NPY_LONG, then cast to NPY_INT, to // allow both 32 and 64 bit integers in the argument (except 64 on 32). PyArrayObject *ranks = (PyArrayObject*)PyArray_ContiguousFromAny( orig_ranks, NPY_LONG, 1, 1); if (ranks == NULL) return NULL; PyArrayObject *iranks; int n = PyArray_DIM(ranks, 0); iranks = (PyArrayObject*)PyArray_Cast((PyArrayObject*) ranks, NPY_INT); Py_DECREF(ranks); if (iranks == NULL) return NULL; // Check that all ranks make sense for (int i = 0; i < n; i++) { int *x = PyArray_GETPTR1(iranks, i); if (*x < 0 || *x >= self->size) { Py_DECREF(iranks); PyErr_SetString(PyExc_ValueError, "invalid rank"); return NULL; } for (int j = 0; j < i; j++) { int *y = PyArray_GETPTR1(iranks, j); if (*y == *x) { Py_DECREF(iranks); PyErr_SetString(PyExc_ValueError, "duplicate rank"); return NULL; } } } MPI_Group group; MPI_Comm_group(self->comm, &group); MPI_Group newgroup; MPI_Group_incl(group, n, (int *) PyArray_BYTES(iranks), &newgroup); MPI_Comm comm; MPI_Comm_create(self->comm, newgroup, &comm); // has a memory leak! #ifdef GPAW_MPI_DEBUG if (comm != MPI_COMM_NULL) { // Default Errhandler is MPI_ERRORS_ARE_FATAL MPI_Errhandler_set(comm, MPI_ERRORS_RETURN); #ifdef __bgp__ int result; int rank; MPI_Comm_rank(comm, &rank); MPIX_Get_property(comm, MPIDO_RECT_COMM, &result); if (rank == 0) { if(result) fprintf(stderr, "Get_property: comm is rectangular. \n"); } #endif } #endif // GPAW_MPI_DEBUG MPI_Group_free(&newgroup); MPI_Group_free(&group); if (comm == MPI_COMM_NULL) { Py_DECREF(iranks); Py_RETURN_NONE; } else { MPIObject *obj = PyObject_NEW(MPIObject, &MPIType); if (obj == NULL) return NULL; MPI_Comm_size(comm, &(obj->size)); MPI_Comm_rank(comm, &(obj->rank)); obj->comm = comm; if (obj->parent == Py_None) Py_DECREF(obj->parent); obj->members = (int*) malloc(obj->size*sizeof(int)); if (obj->members == NULL) return NULL; memcpy(obj->members, (int *) PyArray_BYTES(iranks), obj->size*sizeof(int)); Py_DECREF(iranks); // Make sure that MPI_COMM_WORLD is kept alive till the end (we // don't want MPI_Finalize to be called before MPI_Comm_free): Py_INCREF(self); obj->parent = (PyObject*)self; return (PyObject*)obj; } } PyObject* globally_broadcast_bytes(PyObject *self, PyObject *args) { PyObject *pybytes; if(!PyArg_ParseTuple(args, "O", &pybytes)){ return NULL; } MPI_Comm comm = MPI_COMM_WORLD; int rank; MPI_Comm_rank(comm, &rank); long size; if(rank == 0) { size = PyBytes_Size(pybytes); // Py_ssize_t --> long } MPI_Bcast(&size, 1, MPI_LONG, 0, comm); char *dst = (char *)malloc(size); if(rank == 0) { char *src = PyBytes_AsString(pybytes); // Read-only memcpy(dst, src, size); } maybeSynchronize(pybytes); MPI_Bcast(dst, size, MPI_BYTE, 0, comm); PyObject *value = PyBytes_FromStringAndSize(dst, size); free(dst); return value; } #endif // PARALLEL gpaw-24.1.0/c/mympi.h000066400000000000000000000003501454550013000142550ustar00rootroot00000000000000/* Copyright (C) 2003-2007 CAMP * Please see the accompanying LICENSE file for further information. */ typedef struct { PyObject_HEAD int size; int rank; MPI_Comm comm; PyObject* parent; int* members; } MPIObject; gpaw-24.1.0/c/operators.c000066400000000000000000000303041454550013000151350ustar00rootroot00000000000000/* Copyright (C) 2003-2007 CAMP * Copyright (C) 2007-2008 CAMd * Copyright (C) 2005-2020 CSC - IT Center for Science Ltd. * Please see the accompanying LICENSE file for further information. */ //*** Double buffering code based on original code by ***// //*** Mads R. B. Kristensen - madsbk@diku.dk ***// #include #define PY_ARRAY_UNIQUE_SYMBOL GPAW_ARRAY_API #define NO_IMPORT_ARRAY #include #include #include "extensions.h" #include "bc.h" #include "mympi.h" #ifdef _OPENMP #include #endif #include "threading.h" #define __OPERATORS_C #include "operators.h" #undef __OPERATORS_C #ifdef GPAW_ASYNC #define GPAW_ASYNC3 3 #define GPAW_ASYNC2 2 #else #define GPAW_ASYNC3 1 #define GPAW_ASYNC2 1 #endif #ifdef GPAW_GPU #include "gpu/gpu.h" PyObject* Operator_relax_gpu(OperatorObject *self, PyObject *args); PyObject* Operator_apply_gpu(OperatorObject *self, PyObject *args); #endif static void Operator_dealloc(OperatorObject *self) { #ifdef GPAW_GPU if (self->use_gpu) { operator_dealloc_gpu(0); bc_dealloc_gpu(0); } #endif free(self->bc); PyObject_DEL(self); } static PyObject * Operator_relax(OperatorObject *self, PyObject *args) { int relax_method; PyArrayObject* func; PyArrayObject* source; int nrelax; double w = 1.0; if (!PyArg_ParseTuple(args, "iOOi|d", &relax_method, &func, &source, &nrelax, &w)) return NULL; const boundary_conditions* bc = self->bc; double* fun = DOUBLEP(func); const double* src = DOUBLEP(source); const double_complex* ph; const int* size2 = bc->size2; double* buf = GPAW_MALLOC(double, size2[0] * size2[1] * size2[2] * bc->ndouble); double* sendbuf = GPAW_MALLOC(double, bc->maxsend); double* recvbuf = GPAW_MALLOC(double, bc->maxrecv); ph = 0; for (int n = 0; n < nrelax; n++ ) { for (int i = 0; i < 3; i++) { bc_unpack1(bc, fun, buf, i, self->recvreq, self->sendreq, recvbuf, sendbuf, ph + 2 * i, 0, 1); bc_unpack2(bc, buf, i, self->recvreq, self->sendreq, recvbuf, 1); } bmgs_relax(relax_method, &self->stencil, buf, fun, src, w); } free(recvbuf); free(sendbuf); free(buf); Py_RETURN_NONE; } // The actual computation routine for simple finite difference operation // Separating this routine helps using the same code in // C-preconditioner void apply_worker(OperatorObject *self, int chunksize, int start, int end, int thread_id, int nthreads, const double* in, double* out, bool real, const double_complex* ph) { boundary_conditions* bc = self->bc; const int* size1 = bc->size1; const int* size2 = bc->size2; int ng = bc->ndouble * size1[0] * size1[1] * size1[2]; int ng2 = bc->ndouble * size2[0] * size2[1] * size2[2]; MPI_Request recvreq[2]; MPI_Request sendreq[2]; double* sendbuf = GPAW_MALLOC(double, bc->maxsend * chunksize); double* recvbuf = GPAW_MALLOC(double, bc->maxrecv * chunksize); double* buf = GPAW_MALLOC(double, ng2 * chunksize); const double* my_in; double* my_out; for (int n = start; n < end; n += chunksize) { if (n + chunksize >= end && chunksize > 1) chunksize = end - n; my_in = in + n * ng; my_out = out + n * ng; for (int i = 0; i < 3; i++) { bc_unpack1(bc, my_in, buf, i, recvreq, sendreq, recvbuf, sendbuf, ph + 2 * i, thread_id, chunksize); bc_unpack2(bc, buf, i, recvreq, sendreq, recvbuf, chunksize); } for (int m = 0; m < chunksize; m++) if (real) bmgs_fd(&self->stencil, buf + m * ng2, my_out + m * ng); else bmgs_fdz(&self->stencil, (const double_complex*) (buf + m * ng2), (double_complex*) (my_out + m * ng)); } free(buf); free(recvbuf); free(sendbuf); } // Double buffering async worker for central difference stencils // based on original code by Mads R. B. Kristensen - madsbk@diku.dk void apply_worker_cfd(OperatorObject *self, int chunksize, int chunkinc, int start, int end, int thread_id, int nthreads, const double* in, double* out, bool real, const double_complex* ph) { if (start >= end) return; boundary_conditions* bc = self->bc; const int* size1 = bc->size1; const int* size2 = bc->size2; int ng = bc->ndouble * size1[0] * size1[1] * size1[2]; int ng2 = bc->ndouble * size2[0] * size2[1] * size2[2]; MPI_Request recvreq[2 * GPAW_ASYNC3 * GPAW_ASYNC2]; MPI_Request sendreq[2 * GPAW_ASYNC3 * GPAW_ASYNC2]; double* sendbuf = GPAW_MALLOC(double, bc->maxsend * chunksize * GPAW_ASYNC3 * GPAW_ASYNC2); double* recvbuf = GPAW_MALLOC(double, bc->maxrecv * chunksize * GPAW_ASYNC3 * GPAW_ASYNC2); double* buf = GPAW_MALLOC(double, ng2 * chunksize * GPAW_ASYNC2); if ((end - start) < chunksize) chunksize = end - start; int chunk = chunkinc; if (chunk > chunksize) chunk = chunksize; int odd = 0; const double* my_in = in + start * ng; double* my_out; for (int i = 0; i < 3; i++) bc_unpack1(bc, my_in, buf + odd * ng2 * chunksize, i, recvreq + odd * 2 + i * 4, sendreq + odd * 2 + i * 4, recvbuf + odd * bc->maxrecv * chunksize + i * bc->maxrecv * chunksize * GPAW_ASYNC2, sendbuf + odd * bc->maxsend * chunksize + i * bc->maxsend * chunksize * GPAW_ASYNC2, ph + 2 * i, thread_id, chunk); odd = odd ^ 1; int last_chunk = chunk; for (int n = start+chunk; n < end; n += chunk) { last_chunk += chunkinc; if (last_chunk > chunksize) last_chunk = chunksize; if (n + last_chunk >= end && last_chunk > 1) last_chunk = end - n; my_in = in + n * ng; my_out = out + (n-chunk) * ng; for (int i = 0; i < 3; i++) { bc_unpack1(bc, my_in, buf + odd * ng2 * chunksize, i, recvreq + odd * 2 + i * 4, sendreq + odd * 2 + i * 4, recvbuf + odd * bc->maxrecv * chunksize + i * bc->maxrecv * chunksize * GPAW_ASYNC2, sendbuf + odd * bc->maxsend * chunksize + i * bc->maxsend * chunksize * GPAW_ASYNC2, ph + 2 * i, thread_id, last_chunk); } odd = odd ^ 1; for (int i = 0; i < 3; i++) { bc_unpack2(bc, buf + odd * ng2 * chunksize, i, recvreq + odd * 2 + i * 4, sendreq + odd * 2 + i * 4, recvbuf + odd * bc->maxrecv * chunksize + i * bc->maxrecv * chunksize * GPAW_ASYNC2, chunk); } for (int m = 0; m < chunk; m++) if (real) bmgs_fd(&self->stencil, buf + m * ng2 + odd * ng2 * chunksize, my_out + m * ng); else bmgs_fdz(&self->stencil, (const double_complex*) (buf + m * ng2 + odd * ng2 * chunksize), (double_complex*) (my_out + m * ng)); chunk = last_chunk; } odd = odd ^ 1; my_out = out + (end-last_chunk) * ng; for (int i = 0; i < 3; i++) { bc_unpack2(bc, buf + odd * ng2 * chunksize, i, recvreq + odd * 2 + i * 4, sendreq + odd * 2 + i * 4, recvbuf + odd * bc->maxrecv * chunksize + i * bc->maxrecv * chunksize * GPAW_ASYNC2, last_chunk); } for (int m = 0; m < last_chunk; m++) if (real) bmgs_fd(&self->stencil, buf + m * ng2 + odd * ng2 * chunksize, my_out + m * ng); else bmgs_fdz(&self->stencil, (const double_complex*) (buf + m * ng2 + odd * ng2 * chunksize), (double_complex*) (my_out + m * ng)); free(buf); free(recvbuf); free(sendbuf); } static PyObject * Operator_apply(OperatorObject *self, PyObject *args) { PyArrayObject* input; PyArrayObject* output; PyArrayObject* phases = 0; if (!PyArg_ParseTuple(args, "OO|O", &input, &output, &phases)) return NULL; int nin = 1; if (PyArray_NDIM(input) == 4) nin = PyArray_DIMS(input)[0]; boundary_conditions* bc = self->bc; const double* in = DOUBLEP(input); double* out = DOUBLEP(output); const double_complex* ph; bool real = (PyArray_DESCR(input)->type_num == NPY_DOUBLE); if (real) ph = 0; else ph = COMPLEXP(phases); int chunksize = 1; if (getenv("GPAW_MPI_OPTIMAL_MSG_SIZE") != NULL) { int opt_msg_size = atoi(getenv("GPAW_MPI_OPTIMAL_MSG_SIZE")); if (bc->maxsend > 0 ) chunksize = opt_msg_size * 1024 / (bc->maxsend / 2 * (2 - (int)real) * sizeof(double)); chunksize = (chunksize > 0) ? chunksize : 1; chunksize = (chunksize < nin) ? chunksize : nin; } int chunkinc = chunksize; if (getenv("GPAW_CHUNK_INC") != NULL) chunkinc = atoi(getenv("GPAW_CHUNK_INC")); #ifdef _OPENMP #pragma omp parallel #endif { int thread_id = 0; int nthreads = 1; int start, end; #ifdef _OPENMP thread_id = omp_get_thread_num(); nthreads = omp_get_num_threads(); #endif SHARE_WORK(nin, nthreads, thread_id, &start, &end); #ifndef GPAW_ASYNC if (1) #else if (bc->cfd == 0) #endif { apply_worker(self, chunksize, start, end, thread_id, nthreads, in, out, real, ph); } else { apply_worker_cfd(self, chunksize, chunkinc, start, end, thread_id, nthreads, in, out, real, ph); } } Py_RETURN_NONE; } static PyObject * Operator_get_diagonal_element(OperatorObject *self, PyObject *args) { if (!PyArg_ParseTuple(args, "")) return NULL; const bmgsstencil* s = &self->stencil; double d = 0.0; for (int n = 0; n < s->ncoefs; n++) if (s->offsets[n] == 0) d = s->coefs[n]; return Py_BuildValue("d", d); } static PyObject * Operator_get_async_sizes(OperatorObject *self, PyObject *args) { if (!PyArg_ParseTuple(args, "")) return NULL; #ifdef GPAW_ASYNC return Py_BuildValue("(iii)", 1, GPAW_ASYNC2, GPAW_ASYNC3); #else return Py_BuildValue("(iii)", 0, GPAW_ASYNC2, GPAW_ASYNC3); #endif } static PyMethodDef Operator_Methods[] = { {"apply", (PyCFunction)Operator_apply, METH_VARARGS, NULL}, {"relax", (PyCFunction)Operator_relax, METH_VARARGS, NULL}, #ifdef GPAW_GPU {"apply_gpu", (PyCFunction)Operator_apply_gpu, METH_VARARGS, NULL}, {"relax_gpu", (PyCFunction)Operator_relax_gpu, METH_VARARGS, NULL}, #endif {"get_diagonal_element", (PyCFunction)Operator_get_diagonal_element, METH_VARARGS, NULL}, {"get_async_sizes", (PyCFunction)Operator_get_async_sizes, METH_VARARGS, NULL}, {NULL, NULL, 0, NULL} }; PyTypeObject OperatorType = { PyVarObject_HEAD_INIT(NULL, 0) "Operator", sizeof(OperatorObject), 0, (destructor)Operator_dealloc, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, "FD-operator object", 0, 0, 0, 0, 0, 0, Operator_Methods }; PyObject * NewOperatorObject(PyObject *obj, PyObject *args) { PyArrayObject* coefs; PyArrayObject* offsets; PyArrayObject* size; int range; PyArrayObject* neighbors; int real; PyObject* comm_obj; int cfd; int use_gpu = 0; if (!PyArg_ParseTuple(args, "OOOiOiOi|i", &coefs, &offsets, &size, &range, &neighbors, &real, &comm_obj, &cfd, &use_gpu)) return NULL; OperatorObject *self = PyObject_NEW(OperatorObject, &OperatorType); if (self == NULL) return NULL; self->stencil = bmgs_stencil(PyArray_DIMS(coefs)[0], DOUBLEP(coefs), LONGP(offsets), range, LONGP(size)); const long (*nb)[2] = (const long (*)[2])LONGP(neighbors); const long padding[3][2] = {{range, range}, {range, range}, {range, range}}; MPI_Comm comm = MPI_COMM_NULL; if (comm_obj != Py_None) comm = ((MPIObject*)comm_obj)->comm; self->bc = bc_init(LONGP(size), padding, padding, nb, comm, real, cfd); #ifdef GPAW_GPU self->use_gpu = use_gpu; if (self->use_gpu) { operator_init_gpu(self); } #endif return (PyObject*)self; } gpaw-24.1.0/c/operators.h000066400000000000000000000016611454550013000151460ustar00rootroot00000000000000#ifndef __OPERATORS_H #define __OPERATORS_H /* Copyright (C) 2009-2012 CSC - IT Center for Science Ltd. * Please see the accompanying LICENSE file for further information. */ #include "bc.h" #ifdef GPAW_GPU #include "gpu/bmgs.h" #endif #ifdef __OPERATORS_C typedef struct { PyObject_HEAD bmgsstencil stencil; boundary_conditions* bc; MPI_Request recvreq[2]; MPI_Request sendreq[2]; int nthreads; #ifdef GPAW_GPU int use_gpu; bmgsstencil_gpu stencil_gpu; #endif } OperatorObject; #else // Provide opaque type for routines outside operators.c struct _OperatorObject; typedef struct _OperatorObject OperatorObject; #endif #ifdef GPAW_GPU void operator_init_gpu(OperatorObject *self); void operator_dealloc_gpu(int force); #endif void apply_worker(OperatorObject *self, int chunksize, int start, int end, int thread_id, int nthreads, const double* in, double* out, bool real, const double_complex* ph); #endif gpaw-24.1.0/c/plane_wave.c000066400000000000000000000126211454550013000152420ustar00rootroot00000000000000#include "extensions.h" #include #include void _pw_insert(int nG, int nQ, double complex* c_G, npy_int32* Q_G, double scale, double complex* tmp_Q) // Does the same as these two lines of Python: // // tmp_Q[:] = 0.0 // tmp_Q.ravel()[Q_G] = c_G * scale { int Q1 = 0; for (int G = 0; G < nG; G++) { int Q2 = Q_G[G]; for (; Q1 < Q2; Q1++) tmp_Q[Q1] = 0.0; tmp_Q[Q1++] = c_G[G] * scale; } for (; Q1 < nQ; Q1++) tmp_Q[Q1] = 0.0; } PyObject *pw_insert(PyObject *self, PyObject *args) // Python wrapper { PyArrayObject *c_G_obj, *Q_G_obj, *tmp_Q_obj; double scale; if (!PyArg_ParseTuple(args, "OOdO", &c_G_obj, &Q_G_obj, &scale, &tmp_Q_obj)) return NULL; double complex *c_G = PyArray_DATA(c_G_obj); npy_int32 *Q_G = PyArray_DATA(Q_G_obj); double complex *tmp_Q = PyArray_DATA(tmp_Q_obj); int nG = PyArray_SIZE(c_G_obj); int nQ = PyArray_SIZE(tmp_Q_obj); _pw_insert(nG, nQ, c_G, Q_G, scale, tmp_Q); Py_RETURN_NONE; } PyObject *pw_precond(PyObject *self, PyObject *args) { PyArrayObject *G2_G_obj; PyArrayObject *R_G_obj; double ekin; PyArrayObject *out_G_obj; if (!PyArg_ParseTuple(args, "OOdO", &G2_G_obj, &R_G_obj, &ekin, &out_G_obj)) return NULL; double *G2_G = PyArray_DATA(G2_G_obj); double complex *R_G = PyArray_DATA(R_G_obj); double complex *out_G = PyArray_DATA(out_G_obj); int nG = PyArray_SIZE(G2_G_obj); for (int G = 0; G < nG; G++) { double x = 1 / ekin / 3 * G2_G[G]; double a = 27.0 + x * (18.0 + x * (12.0 + x * 8.0)); double xx = x * x; out_G[G] = -4.0 / 3 / ekin * a / (a + 16.0 * xx * xx) * R_G[G]; } Py_RETURN_NONE; } PyObject *pwlfc_expand(PyObject *self, PyObject *args) { PyArrayObject *f_Gs_obj; PyArrayObject *emiGR_Ga_obj; PyArrayObject *Y_GL_obj; PyArrayObject *l_s_obj; PyArrayObject *a_J_obj; PyArrayObject *s_J_obj; int cc; PyArrayObject *f_GI_obj; if (!PyArg_ParseTuple(args, "OOOOOOiO", &f_Gs_obj, &emiGR_Ga_obj, &Y_GL_obj, &l_s_obj, &a_J_obj, &s_J_obj, &cc, &f_GI_obj)) return NULL; double *f_Gs = PyArray_DATA(f_Gs_obj); double complex *emiGR_Ga = PyArray_DATA(emiGR_Ga_obj); double *Y_GL = PyArray_DATA(Y_GL_obj); npy_int32 *l_s = PyArray_DATA(l_s_obj); npy_int32 *a_J = PyArray_DATA(a_J_obj); npy_int32 *s_J = PyArray_DATA(s_J_obj); double *f_GI = PyArray_DATA(f_GI_obj); int nG = PyArray_DIM(emiGR_Ga_obj, 0); int nJ = PyArray_DIM(a_J_obj, 0); int nL = PyArray_DIM(Y_GL_obj, 1); int natoms = PyArray_DIM(emiGR_Ga_obj, 1); int nsplines = PyArray_DIM(f_Gs_obj, 1); double complex imag_powers[4] = {1.0, -I, -1.0, I}; if (PyArray_ITEMSIZE(f_GI_obj) == 16) for(int G = 0; G < nG; G++) { for (int J = 0; J < nJ; J++) { int s = s_J[J]; int l = l_s[s]; double complex f1 = (emiGR_Ga[a_J[J]] * f_Gs[s] * imag_powers[l % 4]); for (int m = 0; m < 2 * l + 1; m++) { double complex f = f1 * Y_GL[l * l + m]; *f_GI++ = creal(f); *f_GI++ = cc ? -cimag(f) : cimag(f); } } f_Gs += nsplines; emiGR_Ga += natoms; Y_GL += nL; } else { int nI = PyArray_DIM(f_GI_obj, 1); for(int G = 0; G < nG; G++) { for (int J = 0; J < nJ; J++) { int s = s_J[J]; int l = l_s[s]; double complex f1 = (emiGR_Ga[a_J[J]] * f_Gs[s] * imag_powers[l % 4]); for (int m = 0; m < 2 * l + 1; m++) { double complex f = f1 * Y_GL[l * l + m]; f_GI[0] = creal(f); f_GI[nI] = cc ? -cimag(f) : cimag(f); f_GI++; } } f_Gs += nsplines; emiGR_Ga += natoms; Y_GL += nL; f_GI += nI; } } Py_RETURN_NONE; } PyObject *plane_wave_grid(PyObject *self, PyObject *args) { PyArrayObject* beg_c; PyArrayObject* end_c; PyArrayObject* h_c; PyArrayObject* k_c; PyArrayObject* r0_c; PyArrayObject* pw_g; if (!PyArg_ParseTuple(args, "OOOOOO", &beg_c, &end_c, &h_c, &k_c, &r0_c, &pw_g)) return NULL; long *beg = LONGP(beg_c); long *end = LONGP(end_c); double *h = DOUBLEP(h_c); double *vk = DOUBLEP(k_c); double *vr0 = DOUBLEP(r0_c); double_complex *pw = COMPLEXP(pw_g); double kr[3], kr0[3]; int n[3], ij; for (int c = 0; c < 3; c++) { n[c] = end[c] - beg[c]; kr0[c] = vk[c] * vr0[c]; } for (int i = 0; i < n[0]; i++) { kr[0] = vk[0] * h[0] * (beg[0] + i) - kr0[0]; for (int j = 0; j < n[1]; j++) { kr[1] = kr[0] + vk[1] * h[1] * (beg[1] + j) - kr0[1]; ij = (i*n[1] + j)*n[2]; for (int k = 0; k < n[2]; k++) { kr[2] = kr[1] + vk[2] * h[2] * (beg[2] + k) - kr0[2]; pw[ij + k] = cos(kr[2]) + I * sin(kr[2]); } } } Py_RETURN_NONE; } gpaw-24.1.0/c/plt.c000066400000000000000000000060141454550013000137170ustar00rootroot00000000000000/* Copyright (C) 2003-2007 CAMP * Copyright (C) 2007-2008 CAMd * Please see the accompanying LICENSE file for further information. */ #include #define PY_ARRAY_UNIQUE_SYMBOL GPAW_ARRAY_API #define NO_IMPORT_ARRAY #include #include "extensions.h" int write_plt_file(char *fname, int nx, int ny, int nz, double x0, double y0, double z0, double dx, double dy, double dz, double *grid); /* write grid to binary plt (gOpenMol) plot file */ PyObject* WritePLT(PyObject *self, PyObject *args) { char* fname; /* file name */ PyArrayObject* ho; /* grid spacings */ PyArrayObject* go; /* grid to write */ if (!PyArg_ParseTuple(args, "sOO", &fname, &ho, &go)) return NULL; /* must be 3D */ if(PyArray_NDIM(go) != 3) return NULL; double* g = DOUBLEP(go); double* h = DOUBLEP(ho); write_plt_file(fname, PyArray_DIM(go, 0), PyArray_DIM(go, 1), PyArray_DIM(go, 2), 0.,0.,0., h[0],h[1],h[2], g); Py_RETURN_NONE; } /* ----------------------------------------------------------------- * write grid to binary plt (gOpenMol) plot file * * x0, dx etc are assumed to be atomic units * the grid is assumed to be in the format: * grid(ix,iy,iz) = grid[ ix + ( iy + iz*ny )*nx ]; * where ix=0..nx-1 etc */ /* stolen from pltfile.c */ #define FWRITE(value , size) { \ Items = fwrite(&value, size , 1L , Output_p);\ if(Items < 1) {\ printf("?ERROR - in writing contour file (*)\n");\ return(1);}} int write_plt_file(char *fname, int nx, int ny, int nz, double x0, double y0, double z0, double dx, double dy, double dz, double *grid) { FILE *Output_p; static int Items; float scale,zmin,zmax,ymin,ymax,xmin,xmax,val; int rank,TypeOfSurface; int ix,iy,iz,indx; double norm,sum,dV; Output_p = fopen(fname,"wb"); /* see http://www.csc.fi/gopenmol/developers/plt_format.phtml */ #define au_A 0.52917725 scale = au_A; /* atomic length in Angstroem */ rank=3; /* always 3 */ FWRITE(rank , sizeof(int)); TypeOfSurface=4; /* arbitrary */ FWRITE(TypeOfSurface , sizeof(int)); FWRITE(nz , sizeof(int)); FWRITE(ny , sizeof(int)); FWRITE(nx , sizeof(int)); zmin= scale * ((float) z0); zmax= scale * ((float) z0+(nz-1)*dz); /* float zmax=(float) z0+nz*dz; */ FWRITE(zmin , sizeof(float)); FWRITE(zmax , sizeof(float)); ymin= scale * ((float) y0); ymax= scale * ((float) y0+(ny-1)*dy); /* float ymax=(float) y0+ny*dy; */ FWRITE(ymin , sizeof(float)); FWRITE(ymax , sizeof(float)); xmin= scale * ((float) x0); xmax= scale * ((float) x0+(nx-1)*dx); /* float xmax=(float) x0+nx*dx; */ FWRITE(xmin , sizeof(float)); FWRITE(xmax , sizeof(float)); indx=0; norm = 0; sum=0; dV=dx*dy*dz; for(iz=0;iz %s written (sum=%g,norm=%g)\n", fname,sum*dV,norm*dV); return 0; } gpaw-24.1.0/c/point_charges.c000066400000000000000000000130401454550013000157420ustar00rootroot00000000000000#include "extensions.h" //#include PyObject *pc_potential(PyObject *self, PyObject *args) { PyArrayObject* beg_v_obj; PyArrayObject* h_v_obj; PyArrayObject* q_p_obj; PyArrayObject* R_pv_obj; double rc; double rc2; double width; PyArrayObject* vext_G_obj; PyArrayObject* dcom_pv_obj; PyArrayObject* rhot_G_obj = 0; PyArrayObject* F_pv_obj = 0; if (!PyArg_ParseTuple(args, "OOOOdddOO|OO", &beg_v_obj, &h_v_obj, &q_p_obj, &R_pv_obj, &rc, &rc2, &width, &vext_G_obj, &dcom_pv_obj, &rhot_G_obj, &F_pv_obj)) return NULL; const long *beg_v = PyArray_DATA(beg_v_obj); const double *h_v = PyArray_DATA(h_v_obj); const double *q_p = PyArray_DATA(q_p_obj); const double *R_pv = PyArray_DATA(R_pv_obj); const double *dcom_pv = 0; if ((PyObject*)dcom_pv_obj != Py_None) dcom_pv = PyArray_DATA(dcom_pv_obj); double *vext_G = PyArray_DATA(vext_G_obj); int np = PyArray_DIM(R_pv_obj, 0); npy_intp* n = PyArray_DIMS(vext_G_obj); const double* rhot_G = 0; double* F_pv = 0; double dV = 0.0; if (F_pv_obj != 0) { // Handle the two extra arguments for the force calculation: rhot_G = PyArray_DATA(rhot_G_obj); F_pv = PyArray_DATA(F_pv_obj); dV = h_v[0] * h_v[1] * h_v[2]; } double rc12 = rc2 - width; for (int i = 0; i < n[0]; i++) { double x = (beg_v[0] + i) * h_v[0]; for (int j = 0; j < n[1]; j++) { double y = (beg_v[1] + j) * h_v[1]; int ij = (i * n[1] + j) * n[2]; for (int k = 0; k < n[2]; k++) { double z = (beg_v[2] + k) * h_v[2]; for (int p = 0; p < np; p++) { const double* R_v = R_pv + 3 * p; double dx = R_v[0] - x; double dy = R_v[1] - y; double dz = R_v[2] - z; double d = sqrt(dx * dx + dy * dy + dz * dz); double dc, dxc, dyc, dzc; if (dcom_pv == 0) { dc = d; dxc = dx; dyc = dy; dzc = dz; } else { const double* dcom_v = dcom_pv + 3 * p; dxc = dcom_v[0]; dyc = dcom_v[1]; dzc = dcom_v[2]; dc = sqrt(dxc * dxc + dyc * dyc + dzc * dzc); } int G = ij + k; if (F_pv == 0) { // Calculate potential: double v; if (rc < 0.0) v = (q_p[p] * (d * d * d * d - rc * rc * rc * rc) / (d * d * d * d * d + rc * rc * rc * rc * rc)); else if (dc > rc2) v = 0.0; else if (dc > rc12) { double x = (dc - rc12) / width; v = q_p[p] * (1 - x * x * (3 - 2 * x)) / d; } else if (d > rc) v = q_p[p] / d; else { double s = d / rc; double s2 = s * s; v = q_p[p] * (3.28125 + s2 * (-5.46875 + s2 * (4.59375 + s2 * -1.40625))) / rc; } vext_G[G] -= v; } else { // Calculate forces: double w; // -(dv/dr)/r double o = 0.0; if (rc < 0.0) { double x = (d * d * d * d * d + rc * rc * rc * rc * rc); w = ((d * d * d * d - rc * rc * rc * rc) / (x * x) * 5 * d * d * d - 4 * d * d / x); } else if (dc > rc2) w = 0.0; else if (dc > rc12) { double x = (dc - rc12) / width; w = (1 - x * x * (3 - 2 * x)) / (d * d * d); o = 6 * x * (1 - x) / (width * dc * d); } else if (d > rc) w = 1 / (d * d * d); else { double s = d / rc; double s2 = s * s; w = (-2 * (-5.46875 + s2 * (2 * 4.59375 + s2 * 3 * -1.40625)) / (rc * rc * rc)); } w *= q_p[p] * rhot_G[G] * dV; o *= q_p[p] * rhot_G[G] * dV; double* F_v = F_pv + 3 * p; F_v[0] -= w * dx + o * dxc; F_v[1] -= w * dy + o * dyc; F_v[2] -= w * dz + o * dzc; } } } } } Py_RETURN_NONE; } gpaw-24.1.0/c/spline.c000066400000000000000000000075111454550013000144150ustar00rootroot00000000000000/* Copyright (C) 2003-2007 CAMP * Copyright (C) 2007-2008 CAMd * Please see the accompanying LICENSE file for further information. */ #include "spline.h" static void spline_dealloc(SplineObject *xp) { bmgs_deletespline(&xp->spline); PyObject_DEL(xp); } static PyObject * spline_get_cutoff(SplineObject *self, PyObject *args) { return Py_BuildValue("d", self->spline.dr * self->spline.nbins); } static PyObject * spline_get_angular_momentum_number(SplineObject *self, PyObject *args) { return Py_BuildValue("i", self->spline.l); } static PyObject * spline_get_value_and_derivative(SplineObject *obj, PyObject *args, PyObject *kwargs) { double r; if (!PyArg_ParseTuple(args, "d", &r)) return NULL; double f; double dfdr; bmgs_get_value_and_derivative(&obj->spline, r, &f, &dfdr); return Py_BuildValue("(dd)", f, dfdr); } // Convert boundary point z-ranges to grid indices for the 2*l+1 boxes static PyObject * spline_get_indices_from_zranges(SplineObject *self, PyObject *args) { PyArrayObject* beg_c_obj; PyArrayObject* end_c_obj; PyArrayObject* G_b_obj; int nm = 2 * self->spline.l + 1; if (!PyArg_ParseTuple(args, "OOO", &beg_c_obj, &end_c_obj, &G_b_obj)) return NULL; long* beg_c = LONGP(beg_c_obj); long* end_c = LONGP(end_c_obj); int ngmax = ((end_c[0] - beg_c[0]) * (end_c[1] - beg_c[1]) * (end_c[2] - beg_c[2])); int* G_B = INTP(G_b_obj); int nB = PyArray_DIMS(G_b_obj)[0]; int ng = 0; for (int b = 0; b < nB; b+=2) ng += G_B[b+1]-G_B[b]; npy_intp gm_dims[2] = {ng, nm}; PyArrayObject* indices_gm_obj = (PyArrayObject*)PyArray_SimpleNew(2, gm_dims, NPY_INT); int* p = INTP(indices_gm_obj); for (int b = 0; b < nB; b += 2) { int Ga = G_B[b], Gb = G_B[b+1]; for (int G = Ga; G < Gb; G++) for (int m = 0; m < nm; m++) *p++ = m * ngmax + G; } // PyObjects created in the C code will be initialized with a refcount // of 1, for which reason we'll have to decref them when done here PyObject* values = Py_BuildValue("(Oii)", indices_gm_obj, ng, nm); Py_DECREF(indices_gm_obj); return values; } static PyMethodDef spline_methods[] = { {"get_cutoff", (PyCFunction)spline_get_cutoff, METH_VARARGS, 0}, {"get_angular_momentum_number", (PyCFunction)spline_get_angular_momentum_number, METH_VARARGS, 0}, {"get_value_and_derivative", (PyCFunction)spline_get_value_and_derivative, METH_VARARGS, 0}, {"get_indices_from_zranges", (PyCFunction)spline_get_indices_from_zranges, METH_VARARGS, 0}, {NULL, NULL, 0, NULL} }; static PyObject * spline_call(SplineObject *obj, PyObject *args, PyObject *kwargs) { double r; if (!PyArg_ParseTuple(args, "d", &r)) return NULL; return Py_BuildValue("d", bmgs_splinevalue(&obj->spline, r)); } PyTypeObject SplineType = { PyVarObject_HEAD_INIT(NULL, 0) "Spline", sizeof(SplineObject), 0, (destructor)spline_dealloc, 0, 0, 0, 0, 0, 0, 0, 0, 0, (ternaryfunc)spline_call, 0, 0, 0, 0, Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, "Spline object", 0, 0, 0, 0, 0, 0, spline_methods }; PyObject * NewSplineObject(PyObject *self, PyObject *args) { int l; double rcut; PyArrayObject* farray; if (!PyArg_ParseTuple(args, "idO", &l, &rcut, &farray)) return NULL; SplineObject *spline = PyObject_NEW(SplineObject, &SplineType); if (spline == NULL) return NULL; int nbins = PyArray_DIMS(farray)[0] - 1; double dr = rcut / nbins; spline->spline = bmgs_spline(l, dr, nbins, DOUBLEP(farray)); return (PyObject*)spline; } gpaw-24.1.0/c/spline.h000066400000000000000000000003431454550013000144160ustar00rootroot00000000000000/* Copyright (C) 2003-2007 CAMP * Please see the accompanying LICENSE file for further information. */ #include "extensions.h" #include "bmgs/bmgs.h" typedef struct { PyObject_HEAD bmgsspline spline; } SplineObject; gpaw-24.1.0/c/symmetry.c000066400000000000000000000267511454550013000150230ustar00rootroot00000000000000/* Copyright (C) 2010-2011 CAMd * Please see the accompanying LICENSE file for further information. */ #include "extensions.h" PyObject* GG_shuffle(PyObject *self, PyObject *args) { PyArrayObject* G_G_obj; int sign; PyArrayObject* A_GG_obj; PyArrayObject* B_GG_obj; // def GG_shuffle(G_G:int32 array, sign:int, A_GG:complex128 array, B_GG:complex128 array) if (!PyArg_ParseTuple(args, "OiOO", &G_G_obj, &sign, &A_GG_obj, &B_GG_obj)) return NULL; int nG = PyArray_DIMS(G_G_obj)[0]; // Check dimensions if ((nG != PyArray_DIMS(B_GG_obj)[0]) || (nG != PyArray_DIMS(B_GG_obj)[1]) || (nG != PyArray_DIMS(A_GG_obj)[0]) || (nG != PyArray_DIMS(A_GG_obj)[1])) { PyErr_SetString(PyExc_TypeError, "Unmatched dimensions at GG_shuffle."); return NULL; } // Check input types if ((PyArray_TYPE(B_GG_obj) != NPY_COMPLEX128) || (PyArray_TYPE(A_GG_obj) != NPY_COMPLEX128)) { PyErr_SetString(PyExc_TypeError, "Expected complex arrays."); return NULL; } if (PyArray_TYPE(G_G_obj) != NPY_INT) { PyErr_SetString(PyExc_TypeError, "G_G expected to be an integer array."); return NULL; } if (!PyArray_IS_C_CONTIGUOUS(B_GG_obj)) { PyErr_SetString(PyExc_TypeError, "B_GG need to be c-contiguous."); return NULL; } if (!((sign == 1) || (sign == -1))) { PyErr_SetString(PyExc_TypeError, "Sign must be 1 or -1."); return NULL; } int* G0_G = (int*)malloc(nG * sizeof(int)); int* G1_G = (int*)malloc(nG * sizeof(int)); npy_int32* G_G = (npy_int32*)PyArray_DATA(G_G_obj); int stride0 = PyArray_STRIDES(A_GG_obj)[0]; int stride1 = PyArray_STRIDES(A_GG_obj)[1]; for (int G=0; G < nG; G++) { if (sign==1) { G0_G[G] = G_G[G] * stride0; G1_G[G] = G_G[G] * stride1; } else // Transpose { G0_G[G] = G_G[G] * stride1; G1_G[G] = G_G[G] * stride0; } } double complex* A_GG = (double complex*)PyArray_DATA(A_GG_obj); double complex* B_GG = (double complex*)PyArray_DATA(B_GG_obj); for (int G0=0; G0 tol) continue; double p1 = q1 - bzk_kc[k2 * 3 + 1]; if (fabs(p1 - round(p1)) > tol) continue; double p2 = q2 - bzk_kc[k2 * 3 + 2]; if (fabs(p2 - round(p2)) > tol) continue; bz2bz_ks[k1 * nsym + s] = k2; break; } } } Py_RETURN_NONE; } gpaw-24.1.0/c/tetra.c000066400000000000000000000074121454550013000142420ustar00rootroot00000000000000#include #define PY_ARRAY_UNIQUE_SYMBOL GPAW_ARRAY_API #define NO_IMPORT_ARRAY #include #include "extensions.h" int compare_doubles (const void *a, const void *b) { const double *da = (const double *) a; const double *db = (const double *) b; return (*da > *db) - (*da < *db); } PyObject* tetrahedron_weight(PyObject *self, PyObject *args) { PyArrayObject* epsilon_k; int K; PyArrayObject* allsimplices_sk; PyArrayObject* simplices_s; PyArrayObject* Win_w; PyArrayObject* omega_w; PyArrayObject* vol_s; double f10, f20, f21, f30, f31, f32; double f01, f02, f12, f03, f13, f23; double omega; if (!PyArg_ParseTuple(args, "OOiOOOO", &epsilon_k, &allsimplices_sk, &K, &simplices_s, &Win_w, &omega_w, &vol_s)) return NULL; int nsimplex = PyArray_DIMS(simplices_s)[0]; int nw = PyArray_DIMS(omega_w)[0]; double* e_k = (double*)PyArray_DATA(epsilon_k); double* o_w = (double*)PyArray_DATA(omega_w); double* W_w = (double*)PyArray_DATA(Win_w); long* s_s = (long*)PyArray_DATA(simplices_s); int* alls_sk = (int*)PyArray_DATA(allsimplices_sk); double* v_s = (double*)PyArray_DATA(vol_s); double* et_k = GPAW_MALLOC(double, 4); double gw = 0; double Iw = 0; double delta = 0; int relk = 0; double ek = 0; for (int s = 0; s < nsimplex; s++) { relk = 0; for (int k = 0; k < 4; k++) { et_k[k] = e_k[alls_sk[s_s[s] * 4 + k]]; } ek = e_k[K]; for (int k = 0; k < 4; k++) { if (et_k[k] < ek) { relk += 1; } } qsort(et_k, 4, sizeof (double), compare_doubles); delta = et_k[3] - et_k[0]; for (int w = 0; w < nw; w++) { Iw = 0; gw = 0; omega = o_w[w]; f10 = (omega - et_k[0]) / (et_k[1] - et_k[0]); f20 = (omega - et_k[0]) / (et_k[2] - et_k[0]); f21 = (omega - et_k[1]) / (et_k[2] - et_k[1]); f30 = (omega - et_k[0]) / (et_k[3] - et_k[0]); f31 = (omega - et_k[1]) / (et_k[3] - et_k[1]); f32 = (omega - et_k[2]) / (et_k[3] - et_k[2]); f01 = 1 - f10; f02 = 1 - f20; f03 = 1 - f30; f12 = 1 - f21; f13 = 1 - f31; f23 = 1 - f32; if (et_k[1] != et_k[0] && et_k[0] <= omega && omega <= et_k[1]) { gw = 3 * f20 * f30 / (et_k[1] - et_k[0]); switch (relk) { case 0: Iw = (f01 + f02 + f03) / 3; break; case 1: Iw = f10 / 3; break; case 2: Iw = f20 / 3; break; case 3: Iw = f30 / 3; break; } } else if (et_k[1] != et_k[2] && et_k[1] < omega && omega < et_k[2]) { gw = 3 / delta * (f12 * f20 + f21 * f13); switch (relk) { case 0: Iw = f03 / 3 + f02 * f20 * f12 / (gw * delta); break; case 1: Iw = f12 / 3 + f13 * f13 * f21 / (gw * delta); break; case 2: Iw = f21 / 3 + f20 * f20 * f12 / (gw * delta); break; case 3: Iw = f30 / 3 + f31 * f13 * f21 / (gw * delta); break; } } else if (et_k[2] != et_k[3] && et_k[2] <= omega && omega <= et_k[3]) { gw = 3 * f03 * f13 / (et_k[3] - et_k[2]); switch (relk) { case 0: Iw = f03 / 3; break; case 1: Iw = f13 / 3; break; case 2: Iw = f23 / 3; break; case 3: Iw = (f30 + f31 + f32) / 3; break; } } else { continue; } W_w[w] += v_s[s] * Iw * gw; } } free(et_k); Py_RETURN_NONE; } gpaw-24.1.0/c/threading.h000066400000000000000000000023731454550013000150760ustar00rootroot00000000000000/* Copyright (C) 2009-2012 CSC - IT Center for Science Ltd. * Please see the accompanying LICENSE file for further information. * Helper macro for threading */ #ifndef __THREADED_H #define __THREADED_H /* * Partitions a range of indices among threads. * * n the number of elements in the range * tn the number of threads * tid thread id * s start index * e end index */ #define SHARE_WORK(n, tn, tid, s, e) do { \ int q = (n) / (tn); \ int r = (n) % (tn); /* 0 <= r < q */ \ *(s) = q * (tid); \ *(e) = q * ((tid) + 1); \ if (r > 0) { \ if (r > (tid)) { \ /* Assing this thread one element more. */ \ *(s) += (tid); \ *(e) += (tid) + 1; \ } else { \ *(s) += r; \ *(e) += r; \ } \ } \ } while (0) #endif /* ! __THREADED_H */ gpaw-24.1.0/c/transformers.c000066400000000000000000000162311454550013000156470ustar00rootroot00000000000000/* Copyright (C) 2003-2007 CAMP * Copyright (C) 2007-2008 CAMd * Copyright (C) 2005-2020 CSC - IT Center for Science Ltd. * Please see the accompanying LICENSE file for further information. */ #include #ifdef _OPENMP #include #endif #define PY_ARRAY_UNIQUE_SYMBOL GPAW_ARRAY_API #define NO_IMPORT_ARRAY #include #include "extensions.h" #include "bc.h" #include "mympi.h" #include "bmgs/bmgs.h" #include "threading.h" #define __TRANSFORMERS_C #include "transformers.h" #undef __TRANSFORMERS_C #ifdef GPAW_GPU #include "gpu/gpu.h" #include "gpu/bmgs.h" PyObject* Transformer_apply_gpu(TransformerObject *self, PyObject *args); #endif static void Transformer_dealloc(TransformerObject *self) { #ifdef GPAW_GPU if (self->use_gpu) { transformer_dealloc_gpu(0); bc_dealloc_gpu(0); } #endif free(self->bc); PyObject_DEL(self); } // The actual computation routine for interpolation and restriction // operations. The routine is used also in C-preconditioner void transapply_worker(TransformerObject *self, int chunksize, int start, int end, int thread_id, int nthreads, const double* in, double* out, bool real, const double_complex* ph) { boundary_conditions* bc = self->bc; const int* size1 = bc->size1; const int* size2 = bc->size2; int ng = bc->ndouble * size1[0] * size1[1] * size1[2]; int ng2 = bc->ndouble * size2[0] * size2[1] * size2[2]; double* sendbuf = GPAW_MALLOC(double, bc->maxsend * chunksize); double* recvbuf = GPAW_MALLOC(double, bc->maxrecv * chunksize); double* buf = GPAW_MALLOC(double, ng2 * chunksize); int buf2size = ng2; if (self->interpolate) buf2size *= 16; else buf2size /= 2; double* buf2 = GPAW_MALLOC(double, buf2size * chunksize); MPI_Request recvreq[2]; MPI_Request sendreq[2]; const double* my_in; double* my_out; int out_ng = bc->ndouble * self->size_out[0] * self->size_out[1] * self->size_out[2]; for (int n = start; n < end; n += chunksize) { if (n + chunksize >= end && chunksize > 1) chunksize = end - n; my_in = in + n * ng; my_out = out + n * out_ng; for (int i = 0; i < 3; i++) { bc_unpack1(bc, my_in, buf, i, recvreq, sendreq, recvbuf, sendbuf, ph + 2 * i, thread_id, 1); bc_unpack2(bc, buf, i, recvreq, sendreq, recvbuf, 1); } for (int m = 0; m < chunksize; m++) { if (real) { if (self->interpolate) bmgs_interpolate(self->k, self->skip, buf + m * ng2, bc->size2, my_out + m * out_ng, buf2 + m * buf2size); else bmgs_restrict(self->k, buf + m * ng2, bc->size2, my_out + m * out_ng, buf2 + m * buf2size); } else { if (self->interpolate) bmgs_interpolatez(self->k, self->skip, (double_complex*)(buf + m * ng2), bc->size2, (double_complex*)(my_out + m * out_ng), (double_complex*)(buf2 + m * buf2size)); else bmgs_restrictz(self->k, (double_complex*)(buf + m * ng2), bc->size2, (double_complex*)(my_out + m * out_ng), (double_complex*)(buf2 + m * buf2size)); } } } free(buf2); free(buf); free(recvbuf); free(sendbuf); } static PyObject* Transformer_apply(TransformerObject *self, PyObject *args) { PyArrayObject* input; PyArrayObject* output; PyArrayObject* phases = 0; if (!PyArg_ParseTuple(args, "OO|O", &input, &output, &phases)) return NULL; int nin = 1; if (PyArray_NDIM(input) == 4) nin = PyArray_DIMS(input)[0]; boundary_conditions* bc = self->bc; const double* in = DOUBLEP(input); double* out = DOUBLEP(output); bool real = (PyArray_DESCR(input)->type_num == NPY_DOUBLE); const double_complex* ph = (real ? 0 : COMPLEXP(phases)); int chunksize = 1; if (getenv("GPAW_MPI_OPTIMAL_MSG_SIZE") != NULL) { int opt_msg_size = atoi(getenv("GPAW_MPI_OPTIMAL_MSG_SIZE")); if (bc->maxsend > 0 ) chunksize = opt_msg_size * 1024 / (bc->maxsend / 2 * (2 - (int)real) * sizeof(double)); chunksize = (chunksize > 0) ? chunksize : 1; chunksize = (chunksize < nin) ? chunksize : nin; } #ifdef _OPENMP #pragma omp parallel #endif { int thread_id = 0; int nthreads = 1; int start, end; #ifdef _OPENMP thread_id = omp_get_thread_num(); nthreads = omp_get_num_threads(); #endif SHARE_WORK(nin, nthreads, thread_id, &start, &end); transapply_worker(self, chunksize, start, end, thread_id, nthreads, in, out, real, ph); } // omp parallel for Py_RETURN_NONE; } static PyObject * Transformer_get_async_sizes(TransformerObject *self, PyObject *args) { if (!PyArg_ParseTuple(args, "")) return NULL; #ifdef GPAW_ASYNC return Py_BuildValue("(ii)", 1, GPAW_ASYNC_D); #else return Py_BuildValue("(ii)", 0, GPAW_ASYNC_D); #endif } static PyMethodDef Transformer_Methods[] = { {"apply", (PyCFunction)Transformer_apply, METH_VARARGS, NULL}, #ifdef GPAW_GPU {"apply_gpu", (PyCFunction)Transformer_apply_gpu, METH_VARARGS, NULL}, #endif {"get_async_sizes", (PyCFunction)Transformer_get_async_sizes, METH_VARARGS, NULL}, {NULL, NULL, 0, NULL} }; PyTypeObject TransformerType = { PyVarObject_HEAD_INIT(NULL, 0) "Transformer", sizeof(TransformerObject), 0, (destructor)Transformer_dealloc, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, "Transformer object", 0, 0, 0, 0, 0, 0, Transformer_Methods }; PyObject * NewTransformerObject(PyObject *obj, PyObject *args) { PyArrayObject* size_in; PyArrayObject* size_out; int k; PyArrayObject* paddings; PyArrayObject* npaddings; PyArrayObject* skip; PyArrayObject* neighbors; int real; PyObject* comm_obj; int interpolate; int use_gpu = 0; if (!PyArg_ParseTuple(args, "OOiOOOOiOi|i", &size_in, &size_out, &k, &paddings, &npaddings, &skip, &neighbors, &real, &comm_obj, &interpolate, &use_gpu)) return NULL; TransformerObject* self = PyObject_NEW(TransformerObject, &TransformerType); if (self == NULL) return NULL; self->k = k; self->interpolate = interpolate; MPI_Comm comm = MPI_COMM_NULL; if (comm_obj != Py_None) comm = ((MPIObject*)comm_obj)->comm; const long (*nb)[2] = (const long (*)[2])LONGP(neighbors); const long (*pad)[2] = (const long (*)[2])LONGP(paddings); const long (*npad)[2] = (const long (*)[2])LONGP(npaddings); const long (*skp)[2] = (const long (*)[2])LONGP(skip); self->bc = bc_init(LONGP(size_in), pad, npad, nb, comm, real, 0); for (int c = 0; c < 3; c++) self->size_out[c] = LONGP(size_out)[c]; for (int c = 0; c < 3; c++) for (int d = 0; d < 2; d++) self->skip[c][d] = (int)skp[c][d]; #ifdef GPAW_GPU self->use_gpu = use_gpu; if (self->use_gpu) { transformer_init_gpu(self); } #endif return (PyObject*)self; } gpaw-24.1.0/c/transformers.h000066400000000000000000000020561454550013000156540ustar00rootroot00000000000000#ifndef __TRANSFORMERS_H #define __TRANSFORMERS_H /* Copyright (C) 2009-2012 CSC - IT Center for Science Ltd. * Please see the accompanying LICENSE file for further information. */ #include "bc.h" #ifdef GPAW_ASYNC #define GPAW_ASYNC_D 3 #else #define GPAW_ASYNC_D 1 #endif #ifdef __TRANSFORMERS_C typedef struct { PyObject_HEAD boundary_conditions* bc; int p; int k; bool interpolate; MPI_Request recvreq[2]; MPI_Request sendreq[2]; int skip[3][2]; int size_out[3]; /* Size of the output grid */ #ifdef GPAW_GPU int use_gpu; #endif } TransformerObject; #else // Provide an opaque type for routines outside transformers.c struct _TransformerObject; typedef struct _TransformerObject TransformerObject; #endif #ifdef GPAW_GPU void transformer_init_gpu(TransformerObject *self); void transformer_dealloc_gpu(int force); #endif void transapply_worker(TransformerObject *self, int chunksize, int start, int end, int thread_id, int nthreads, const double* in, double* out, bool real, const double_complex* ph); #endif gpaw-24.1.0/c/utilities.c000066400000000000000000000654261454550013000151470ustar00rootroot00000000000000/* Copyright (C) 2003-2007 CAMP * Copyright (C) 2007-2008 CAMd * Copyright (C) 2008-2010 CSC - IT Center for Science Ltd. * Copyright (C) 2011 Argonne National Laboratory * Please see the accompanying LICENSE file for further information. */ #include #define PY_ARRAY_UNIQUE_SYMBOL GPAW_ARRAY_API #define NO_IMPORT_ARRAY #include #include "extensions.h" #include #include #ifdef __DARWIN_UNIX03 /* Allows for special MaxOS magic */ #include #endif #ifdef _OPENMP #include #endif #ifdef GPAW_HPM void HPM_Start(char *); void HPM_Stop(char *); void summary_start(void); void summary_stop(void); PyObject* ibm_hpm_start(PyObject *self, PyObject *args) { char* s; if (!PyArg_ParseTuple(args, "s", &s)) return NULL; HPM_Start(s); Py_RETURN_NONE; } PyObject* ibm_hpm_stop(PyObject *self, PyObject *args) { char* s; if (!PyArg_ParseTuple(args, "s", &s)) return NULL; HPM_Stop(s); Py_RETURN_NONE; } PyObject* ibm_mpi_start(PyObject *self) { summary_start(); Py_RETURN_NONE; } PyObject* ibm_mpi_stop(PyObject *self) { summary_stop(); Py_RETURN_NONE; } #endif #ifdef CRAYPAT #include PyObject* craypat_region_begin(PyObject *self, PyObject *args) { int n; char* s; if (!PyArg_ParseTuple(args, "is", &n, &s)) return NULL; PAT_region_begin(n, s); Py_RETURN_NONE; } PyObject* craypat_region_end(PyObject *self, PyObject *args) { int n; if (!PyArg_ParseTuple(args, "i", &n)) return NULL; PAT_region_end(n); Py_RETURN_NONE; } #endif PyObject* get_num_threads(PyObject *self, PyObject *args) { int nthreads = 1; #ifdef _OPENMP #pragma omp parallel { nthreads = omp_get_num_threads(); } #endif return Py_BuildValue("i", nthreads); } #ifdef PARALLEL #include struct eval { double val; int rank; }; static void coll_print(FILE *fp, const char *label, double val, int print_aggregate, MPI_Comm Comm){ double sum; struct eval in; struct eval out; int rank, numranks; MPI_Comm_size(Comm, &numranks); MPI_Comm_rank(Comm, &rank); in.val=val; in.rank=rank; MPI_Reduce(&val, &sum, 1, MPI_DOUBLE, MPI_SUM, 0, Comm); if(rank==0) { if(print_aggregate) fprintf(fp,"#%19s %14.3f %10.3f ",label,sum,sum/numranks); else fprintf(fp,"#%19s %10.3f ",label,sum/numranks); } MPI_Reduce(&in, &out, 1, MPI_DOUBLE_INT, MPI_MINLOC, 0, Comm); if(rank==0){ fprintf(fp,"%4d %10.3f ", out.rank, out.val); } MPI_Reduce(&in, &out, 1, MPI_DOUBLE_INT, MPI_MAXLOC, 0, Comm); if(rank==0){ fprintf(fp,"%4d %10.3f\n",out.rank, out.val); } } // Utilities for performance measurement with PAPI #ifdef GPAW_PAPI #include #define NUM_PAPI_EV 1 static long_long papi_start_usec_p; static long_long papi_start_usec_r; // Returns PAPI_dmem_info structure in Python dictionary // Units used by PAPI are kB PyObject* papi_mem_info(PyObject *self, PyObject *args) { PAPI_dmem_info_t dmem; PyObject* py_dmem; PAPI_get_dmem_info(&dmem); py_dmem = PyDict_New(); PyDict_SetItemString(py_dmem, "peak", PyLong_FromLongLong(dmem.peak)); PyDict_SetItemString(py_dmem, "size", PyLong_FromLongLong(dmem.size)); PyDict_SetItemString(py_dmem, "resident", PyLong_FromLongLong(dmem.resident)); PyDict_SetItemString(py_dmem, "high_water_mark", PyLong_FromLongLong(dmem.high_water_mark)); PyDict_SetItemString(py_dmem, "shared", PyLong_FromLongLong(dmem.shared)); PyDict_SetItemString(py_dmem, "text", PyLong_FromLongLong(dmem.text)); PyDict_SetItemString(py_dmem, "library", PyLong_FromLongLong(dmem.library)); PyDict_SetItemString(py_dmem, "heap", PyLong_FromLongLong(dmem.heap)); PyDict_SetItemString(py_dmem, "stack", PyLong_FromLongLong(dmem.stack)); PyDict_SetItemString(py_dmem, "pagesize", PyLong_FromLongLong(dmem.pagesize)); PyDict_SetItemString(py_dmem, "pte", PyLong_FromLongLong(dmem.pte)); return py_dmem; } int gpaw_perf_init() { int events[NUM_PAPI_EV]; events[0] = PAPI_FP_OPS; // events[1] = PAPI_L1_DCM; // events[2] = PAPI_L1_DCH; // events[3] = PAPI_TOT_INS; PAPI_start_counters(events, NUM_PAPI_EV); papi_start_usec_r = PAPI_get_real_usec(); papi_start_usec_p = PAPI_get_virt_usec(); return 0; } void gpaw_perf_finalize() { long long papi_values[NUM_PAPI_EV]; double rtime,ptime; double avegflops; double gflop_opers; PAPI_dmem_info_t dmem; int error = 0; double l1hitratio; long_long papi_end_usec_p; long_long papi_end_usec_r; int rank, numranks; MPI_Comm Comm = MPI_COMM_WORLD; //get papi info, first time it intializes PAPI counters papi_end_usec_r = PAPI_get_real_usec(); papi_end_usec_p = PAPI_get_virt_usec(); MPI_Comm_size(Comm, &numranks); MPI_Comm_rank(Comm, &rank); FILE *fp; if (rank == 0) fp = fopen("gpaw_perf.log", "w"); else fp = NULL; if(PAPI_read_counters(papi_values, NUM_PAPI_EV) != PAPI_OK) error++; if(PAPI_get_dmem_info(&dmem) != PAPI_OK) error++; rtime=(double)(papi_end_usec_r - papi_start_usec_r)/1e6; ptime=(double)(papi_end_usec_p - papi_start_usec_p)/1e6; avegflops=(double)papi_values[0]/rtime/1e9; gflop_opers = (double)papi_values[0]/1e9; // l1hitratio=100.0*(double)papi_values[1]/(papi_values[0] + papi_values[1]); if (rank==0 ) { fprintf(fp,"######## GPAW PERFORMANCE REPORT (PAPI) ########\n"); fprintf(fp,"# MPI tasks %d\n", numranks); fprintf(fp,"# aggregated average min(rank/val) max(rank/val) \n"); } coll_print(fp, "Real time (s)", rtime, 1, Comm); coll_print(fp, "Process time (s)", ptime, 1, Comm); coll_print(fp, "Flops (GFlop/s)", avegflops, 1, Comm); coll_print(fp, "Flp-opers (10^9)", gflop_opers, 1, Comm); // coll_print(fp, "L1 hit ratio (%)", l1hitratio, 0, Comm); coll_print(fp, "Peak mem size (MB)", (double)dmem.peak/1.0e3, 0, Comm ); coll_print(fp, "Peak resident (MB)", (double)dmem.high_water_mark/1.0e3 , 0, Comm); if(rank==0) { fflush(fp); fclose(fp); } } #elif GPAW_HPM void HPM_Start(char *); int gpaw_perf_init() { HPM_Start("GPAW"); return 0; } void gpaw_perf_finalize() { HPM_Stop("GPAW"); } #else // Use just MPI_Wtime static double t0; int gpaw_perf_init(void) { t0 = MPI_Wtime(); return 0; } void gpaw_perf_finalize(void) { double rtime; int rank, numranks; MPI_Comm Comm = MPI_COMM_WORLD; MPI_Comm_size(Comm, &numranks); MPI_Comm_rank(Comm, &rank); double t1 = MPI_Wtime(); rtime = t1 - t0; FILE *fp; if (rank == 0) fp = fopen("gpaw_perf.log", "w"); else fp = NULL; if (rank==0 ) { fprintf(fp,"######## GPAW PERFORMANCE REPORT (MPI_Wtime) ########\n"); fprintf(fp,"# MPI tasks %d\n", numranks); fprintf(fp,"# aggregated average min(rank/val) max(rank/val) \n"); } coll_print(fp, "Real time (s)", rtime, 1, Comm); if(rank==0) { fflush(fp); fclose(fp); } } #endif #endif // returns the distance between two 3d double vectors double distance(double *a, double *b) { double sum = 0; double diff; for (int c = 0; c < 3; c++) { diff = a[c] - b[c]; sum += diff*diff; } return sqrt(sum); } PyObject* add_to_density_gpu(PyObject* self, PyObject* args); // Equivalent to: // // nt_R += f * abs(psit_R)**2 // PyObject* add_to_density(PyObject *self, PyObject *args) { double f; PyArrayObject* psit_R_obj; PyArrayObject* nt_R_obj; if (!PyArg_ParseTuple(args, "dOO", &f, &psit_R_obj, &nt_R_obj)) return NULL; if (PyArray_Check(psit_R_obj)) { const double* psit_R = PyArray_DATA(psit_R_obj); double* nt_R = PyArray_DATA(nt_R_obj); int n = PyArray_SIZE(nt_R_obj); if (PyArray_ITEMSIZE(psit_R_obj) == 8) { // Real wave functions // psit_R can be a view of a larger array (psit_R = tmp[:, :, :dim2]) int stride2 = PyArray_STRIDE(psit_R_obj, 1) / 8; int dim2 = PyArray_DIM(psit_R_obj, 2); int j = 0; for (int i = 0; i < n;) { for (int k = 0; k < dim2; i++, j++, k++) nt_R[i] += f * psit_R[j] * psit_R[j]; j += stride2 - dim2; } } else // Complex wave functions for (int i = 0; i < n; i++) nt_R[i] += f * (psit_R[2 * i] * psit_R[2 * i] + psit_R[2 * i + 1] * psit_R[2 * i + 1]); } else { // Must be cupy #ifdef GPAW_GPU add_to_density_gpu(self, args); #else PyErr_SetString(PyExc_RuntimeError, "Unknown array type to add_to_density."); return NULL; #endif } Py_RETURN_NONE; } PyObject* utilities_gaussian_wave(PyObject *self, PyObject *args) { Py_complex A_obj; PyArrayObject* r_cG_obj; PyArrayObject* r0_c_obj; Py_complex sigma_obj; // imaginary part ignored PyArrayObject* k_c_obj; PyArrayObject* gs_G_obj; if (!PyArg_ParseTuple(args, "DOODOO", &A_obj, &r_cG_obj, &r0_c_obj, &sigma_obj, &k_c_obj, &gs_G_obj)) return NULL; int C, G; C = PyArray_DIMS(r_cG_obj)[0]; G = PyArray_DIMS(r_cG_obj)[1]; for (int i = 2; i < PyArray_NDIM(r_cG_obj); i++) G *= PyArray_DIMS(r_cG_obj)[i]; double* r_cG = DOUBLEP(r_cG_obj); // XXX not ideally strided double* r0_c = DOUBLEP(r0_c_obj); double dr2, kr, alpha = -0.5/pow(sigma_obj.real, 2); int gammapoint = 1; double* k_c = DOUBLEP(k_c_obj); for (int c=0; ctype_num == NPY_DOUBLE) { double* gs_G = DOUBLEP(gs_G_obj); if(gammapoint) for(int g=0; g0) for(int g=0; g 1 ? } } else { double_complex* gs_G = COMPLEXP(gs_G_obj); double_complex A = A_obj.real+I*A_obj.imag; if(gammapoint) for(int g=0; g0) for(int g=0; g 1 ? } } Py_RETURN_NONE; } PyObject* pack(PyObject *self, PyObject *args) { PyArrayObject* a_obj; if (!PyArg_ParseTuple(args, "O", &a_obj)) return NULL; a_obj = PyArray_GETCONTIGUOUS(a_obj); int n = PyArray_DIMS(a_obj)[0]; npy_intp dims[1] = {n * (n + 1) / 2}; int typenum = PyArray_DESCR(a_obj)->type_num; PyArrayObject* b_obj = (PyArrayObject*) PyArray_SimpleNew(1, dims, typenum); if (b_obj == NULL) return NULL; if (typenum == NPY_DOUBLE) { double* a = (double*)PyArray_DATA(a_obj); double* b = (double*)PyArray_DATA(b_obj); for (int r = 0; r < n; r++) { *b++ = a[r + n * r]; for (int c = r + 1; c < n; c++) *b++ = a[r + n * c] + a[c + n * r]; } } else { double complex* a = (double complex*)PyArray_DATA(a_obj); double complex* b = (double complex*)PyArray_DATA(b_obj); for (int r = 0; r < n; r++) { *b++ = a[r + n * r]; for (int c = r + 1; c < n; c++) *b++ = a[r + n * c] + a[c + n * r]; } } Py_DECREF(a_obj); PyObject* value = Py_BuildValue("O", b_obj); Py_DECREF(b_obj); return value; } PyObject* unpack(PyObject *self, PyObject *args) { PyArrayObject* ap; PyArrayObject* a; if (!PyArg_ParseTuple(args, "OO", &ap, &a)) return NULL; int n = PyArray_DIMS(a)[0]; double* datap = DOUBLEP(ap); double* data = DOUBLEP(a); for (int r = 0; r < n; r++) for (int c = r; c < n; c++) { double d = *datap++; data[c + r * n] = d; data[r + c * n] = d; } Py_RETURN_NONE; } PyObject* unpack_complex(PyObject *self, PyObject *args) { PyArrayObject* ap; PyArrayObject* a; if (!PyArg_ParseTuple(args, "OO", &ap, &a)) return NULL; int n = PyArray_DIMS(a)[0]; double_complex* datap = COMPLEXP(ap); double_complex* data = COMPLEXP(a); for (int r = 0; r < n; r++) for (int c = r; c < n; c++) { double_complex d = *datap++; data[c + r * n] = d; data[r + c * n] = conj(d); } Py_RETURN_NONE; } PyObject* hartree(PyObject *self, PyObject *args) { int l; PyArrayObject* nrdr_obj; PyArrayObject* r_obj; PyArrayObject* vr_obj; if (!PyArg_ParseTuple(args, "iOOO", &l, &nrdr_obj, &r_obj, &vr_obj)) return NULL; const int M = PyArray_DIM(nrdr_obj, 0); const double* nrdr = DOUBLEP(nrdr_obj); const double* r = DOUBLEP(r_obj); double* vr = DOUBLEP(vr_obj); double p = 0.0; double q = 0.0; for (int g = M - 1; g > 0; g--) { double R = r[g]; double rl = pow(R, l); double dp = nrdr[g] / rl; double rlp1 = rl * R; double dq = nrdr[g] * rlp1; vr[g] = (p + 0.5 * dp) * rlp1 - (q + 0.5 * dq) / rl; p += dp; q += dq; } vr[0] = 0.0; double f = 4.0 * M_PI / (2 * l + 1); for (int g = 1; g < M; g++) { double R = r[g]; vr[g] = f * (vr[g] + q / pow(R, l)); } Py_RETURN_NONE; } PyObject* localize(PyObject *self, PyObject *args) { PyArrayObject* Z_nnc; PyArrayObject* U_nn; if (!PyArg_ParseTuple(args, "OO", &Z_nnc, &U_nn)) return NULL; int n = PyArray_DIMS(U_nn)[0]; double complex (*Z)[n][3] = (double complex (*)[n][3])COMPLEXP(Z_nnc); double (*U)[n] = (double (*)[n])DOUBLEP(U_nn); double value = 0.0; for (int a = 0; a < n; a++) { for (int b = a + 1; b < n; b++) { double complex* Zaa = Z[a][a]; double complex* Zab = Z[a][b]; double complex* Zbb = Z[b][b]; double x = 0.0; double y = 0.0; for (int c = 0; c < 3; c++) { x += (0.25 * creal(Zbb[c] * conj(Zbb[c])) + 0.25 * creal(Zaa[c] * conj(Zaa[c])) - 0.5 * creal(Zaa[c] * conj(Zbb[c])) - creal(Zab[c] * conj(Zab[c]))); y += creal((Zaa[c] - Zbb[c]) * conj(Zab[c])); } double t = 0.25 * atan2(y, x); double C = cos(t); double S = sin(t); for (int i = 0; i < a; i++) for (int c = 0; c < 3; c++) { double complex Ziac = Z[i][a][c]; Z[i][a][c] = C * Ziac + S * Z[i][b][c]; Z[i][b][c] = C * Z[i][b][c] - S * Ziac; } for (int c = 0; c < 3; c++) { double complex Zaac = Zaa[c]; double complex Zabc = Zab[c]; double complex Zbbc = Zbb[c]; Zaa[c] = C * C * Zaac + 2 * C * S * Zabc + S * S * Zbbc; Zbb[c] = C * C * Zbbc - 2 * C * S * Zabc + S * S * Zaac; Zab[c] = S * C * (Zbbc - Zaac) + (C * C - S * S) * Zabc; } for (int i = a + 1; i < b; i++) for (int c = 0; c < 3; c++) { double complex Zaic = Z[a][i][c]; Z[a][i][c] = C * Zaic + S * Z[i][b][c]; Z[i][b][c] = C * Z[i][b][c] - S * Zaic; } for (int i = b + 1; i < n; i++) for (int c = 0; c < 3; c++) { double complex Zaic = Z[a][i][c]; Z[a][i][c] = C * Zaic + S * Z[b][i][c]; Z[b][i][c] = C * Z[b][i][c] - S * Zaic; } for (int i = 0; i < n; i++) { double Uia = U[i][a]; U[i][a] = C * Uia + S * U[i][b]; U[i][b] = C * U[i][b] - S * Uia; } } double complex* Zaa = Z[a][a]; for (int c = 0; c < 3; c++) value += creal(Zaa[c] * conj(Zaa[c])); } return Py_BuildValue("d", value); } PyObject* spherical_harmonics(PyObject *self, PyObject *args) { int l; PyArrayObject* R_obj_c; PyArrayObject* Y_obj_m; if (!PyArg_ParseTuple(args, "iOO", &l, &R_obj_c, &Y_obj_m)) return NULL; double* R_c = DOUBLEP(R_obj_c); double* Y_m = DOUBLEP(Y_obj_m); if (l == 0) Y_m[0] = 0.28209479177387814; else { double x = R_c[0]; double y = R_c[1]; double z = R_c[2]; if (l == 1) { Y_m[0] = 0.48860251190291992 * y; Y_m[1] = 0.48860251190291992 * z; Y_m[2] = 0.48860251190291992 * x; } else { double r2 = x*x+y*y+z*z; if (l == 2) { Y_m[0] = 1.0925484305920792 * x*y; Y_m[1] = 1.0925484305920792 * y*z; Y_m[2] = 0.31539156525252005 * (3*z*z-r2); Y_m[3] = 1.0925484305920792 * x*z; Y_m[4] = 0.54627421529603959 * (x*x-y*y); } else if (l == 3) { Y_m[0] = 0.59004358992664352 * (-y*y*y+3*x*x*y); Y_m[1] = 2.8906114426405538 * x*y*z; Y_m[2] = 0.45704579946446577 * (-y*r2+5*y*z*z); Y_m[3] = 0.3731763325901154 * (5*z*z*z-3*z*r2); Y_m[4] = 0.45704579946446577 * (5*x*z*z-x*r2); Y_m[5] = 1.4453057213202769 * (x*x*z-y*y*z); Y_m[6] = 0.59004358992664352 * (x*x*x-3*x*y*y); } else if (l == 4) { Y_m[0] = 2.5033429417967046 * (x*x*x*y-x*y*y*y); Y_m[1] = 1.7701307697799307 * (-y*y*y*z+3*x*x*y*z); Y_m[2] = 0.94617469575756008 * (-x*y*r2+7*x*y*z*z); Y_m[3] = 0.66904654355728921 * (-3*y*z*r2+7*y*z*z*z); Y_m[4] = 0.10578554691520431 * (-30*z*z*r2+3*r2*r2+35*z*z*z*z); Y_m[5] = 0.66904654355728921 * (7*x*z*z*z-3*x*z*r2); Y_m[6] = 0.47308734787878004 * (-x*x*r2+7*x*x*z*z+y*y*r2-7*y*y*z*z); Y_m[7] = 1.7701307697799307 * (x*x*x*z-3*x*y*y*z); Y_m[8] = 0.62583573544917614 * (-6*x*x*y*y+x*x*x*x+y*y*y*y); } else if (l == 5) { Y_m[0] = 0.65638205684017015 * (y*y*y*y*y+5*x*x*x*x*y-10*x*x*y*y*y); Y_m[1] = 8.3026492595241645 * (x*x*x*y*z-x*y*y*y*z); Y_m[2] = 0.48923829943525038 * (y*y*y*r2-9*y*y*y*z*z-3*x*x*y*r2+27*x*x*y*z*z); Y_m[3] = 4.7935367849733241 * (3*x*y*z*z*z-x*y*z*r2); Y_m[4] = 0.45294665119569694 * (-14*y*z*z*r2+y*r2*r2+21*y*z*z*z*z); Y_m[5] = 0.1169503224534236 * (63*z*z*z*z*z+15*z*r2*r2-70*z*z*z*r2); Y_m[6] = 0.45294665119569694 * (x*r2*r2-14*x*z*z*r2+21*x*z*z*z*z); Y_m[7] = 2.3967683924866621 * (-3*y*y*z*z*z+y*y*z*r2+3*x*x*z*z*z-x*x*z*r2); Y_m[8] = 0.48923829943525038 * (9*x*x*x*z*z-27*x*y*y*z*z-x*x*x*r2+3*x*y*y*r2); Y_m[9] = 2.0756623148810411 * (y*y*y*y*z-6*x*x*y*y*z+x*x*x*x*z); Y_m[10] = 0.65638205684017015 * (-10*x*x*x*y*y+5*x*y*y*y*y+x*x*x*x*x); } else if (l == 6) { Y_m[0] = 1.3663682103838286 * (-10*x*x*x*y*y*y+3*x*x*x*x*x*y+3*x*y*y*y*y*y); Y_m[1] = 2.3666191622317521 * (y*y*y*y*y*z-10*x*x*y*y*y*z+5*x*x*x*x*y*z); Y_m[2] = 2.0182596029148967 * (-x*x*x*y*r2+x*y*y*y*r2-11*x*y*y*y*z*z+11*x*x*x*y*z*z); Y_m[3] = 0.92120525951492349 * (-11*y*y*y*z*z*z-9*x*x*y*z*r2+33*x*x*y*z*z*z+3*y*y*y*z*r2); Y_m[4] =0.92120525951492349 * (x*y*r2*r2+33*x*y*z*z*z*z-18*x*y*z*z*r2); Y_m[5] = 0.58262136251873142 * (5*y*z*r2*r2+33*y*z*z*z*z*z-30*y*z*z*z*r2); Y_m[6] = 0.063569202267628425 * (231*z*z*z*z*z*z-5*r2*r2*r2+105*z*z*r2*r2-315*z*z*z*z*r2); Y_m[7] = 0.58262136251873142 * (-30*x*z*z*z*r2+33*x*z*z*z*z*z+5*x*z*r2*r2); Y_m[8] = 0.46060262975746175 * (33*x*x*z*z*z*z+x*x*r2*r2-y*y*r2*r2-18*x*x*z*z*r2+18*y*y*z*z*r2-33*y*y*z*z*z*z); Y_m[9] = 0.92120525951492349 * (-3*x*x*x*z*r2-33*x*y*y*z*z*z+9*x*y*y*z*r2+11*x*x*x*z*z*z); Y_m[10] = 0.50456490072872417 * (11*y*y*y*y*z*z-66*x*x*y*y*z*z-x*x*x*x*r2+6*x*x*y*y*r2+11*x*x*x*x*z*z-y*y*y*y*r2); Y_m[11] = 2.3666191622317521 * (5*x*y*y*y*y*z+x*x*x*x*x*z-10*x*x*x*y*y*z); Y_m[12] = 0.6831841051919143 * (x*x*x*x*x*x+15*x*x*y*y*y*y-15*x*x*x*x*y*y-y*y*y*y*y*y); } else if (l == 7) { Y_m[0] = -0.707162732524596 * y*y*y*y*y*y*y + 14.8504173830165 * x*x*y*y*y*y*y + -24.7506956383609 * x*x*x*x*y*y*y + 4.95013912767217 * x*x*x*x*x*x*y; Y_m[1] = 15.8757639708114 * x*y*y*y*y*y*z + -52.919213236038 * x*x*x*y*y*y*z + 15.8757639708114 * x*x*x*x*x*y*z; Y_m[2] = 4.67024020848234 * x*x*y*y*y*y*y + -0.51891557872026 * y*y*y*y*y*y*y + 6.22698694464312 * y*y*y*y*y*z*z + 2.5945778936013 * x*x*x*x*y*y*y + -62.2698694464312 * x*x*y*y*y*z*z + -2.5945778936013 * x*x*x*x*x*x*y + 31.1349347232156 * x*x*x*x*y*z*z; Y_m[3] = 12.4539738892862 * x*y*y*y*y*y*z + -41.5132462976208 * x*y*y*y*z*z*z + -12.4539738892862 * x*x*x*x*x*y*z + 41.5132462976208 * x*x*x*y*z*z*z; Y_m[4] = 2.34688400793441 * x*x*x*x*y*y*y + 0.469376801586882 * x*x*y*y*y*y*y + -18.7750720634753 * x*x*y*y*y*z*z + -0.469376801586882 * y*y*y*y*y*y*y + 9.38753603173764 * y*y*y*y*y*z*z + -12.5167147089835 * y*y*y*z*z*z*z + 1.40813040476065 * x*x*x*x*x*x*y + -28.1626080952129 * x*x*x*x*y*z*z + 37.5501441269506 * x*x*y*z*z*z*z; Y_m[5] = 6.63799038667474 * x*x*x*x*x*y*z + 13.2759807733495 * x*x*x*y*y*y*z + -35.4026153955986 * x*x*x*y*z*z*z + 6.63799038667474 * x*y*y*y*y*y*z + -35.4026153955986 * x*y*y*y*z*z*z + 21.2415692373592 * x*y*z*z*z*z*z; Y_m[6] = -0.451658037912587 * x*x*x*x*x*x*y + -1.35497411373776 * x*x*x*x*y*y*y + 10.8397929099021 * x*x*x*x*y*z*z + -1.35497411373776 * x*x*y*y*y*y*y + 21.6795858198042 * x*x*y*y*y*z*z + -21.6795858198042 * x*x*y*z*z*z*z + -0.451658037912587 * y*y*y*y*y*y*y + 10.8397929099021 * y*y*y*y*y*z*z + -21.6795858198042 * y*y*y*z*z*z*z + 5.78122288528111 * y*z*z*z*z*z*z; Y_m[7] = -2.38994969192017 * x*x*x*x*x*x*z + -7.16984907576052 * x*x*x*x*y*y*z + 14.339698151521 * x*x*x*x*z*z*z + -7.16984907576052 * x*x*y*y*y*y*z + 28.6793963030421 * x*x*y*y*z*z*z + -11.4717585212168 * x*x*z*z*z*z*z + -2.38994969192017 * y*y*y*y*y*y*z + 14.339698151521 * y*y*y*y*z*z*z + -11.4717585212168 * y*y*z*z*z*z*z + 1.09254843059208 * z*z*z*z*z*z*z; Y_m[8] = -0.451658037912587 * x*x*x*x*x*x*x + -1.35497411373776 * x*x*x*x*x*y*y + 10.8397929099021 * x*x*x*x*x*z*z + -1.35497411373776 * x*x*x*y*y*y*y + 21.6795858198042 * x*x*x*y*y*z*z + -21.6795858198042 * x*x*x*z*z*z*z + -0.451658037912587 * x*y*y*y*y*y*y + 10.8397929099021 * x*y*y*y*y*z*z + -21.6795858198042 * x*y*y*z*z*z*z + 5.78122288528111 * x*z*z*z*z*z*z; Y_m[9] = -3.31899519333737 * x*x*y*y*y*y*z + -3.31899519333737 * y*y*y*y*y*y*z + 17.7013076977993 * y*y*y*y*z*z*z + -10.6207846186796 * y*y*z*z*z*z*z + 3.31899519333737 * x*x*x*x*x*x*z + 3.31899519333737 * x*x*x*x*y*y*z + -17.7013076977993 * x*x*x*x*z*z*z + 10.6207846186796 * x*x*z*z*z*z*z; Y_m[10] = -0.469376801586882 * x*x*x*x*x*y*y + -2.34688400793441 * x*x*x*y*y*y*y + 18.7750720634753 * x*x*x*y*y*z*z + -1.40813040476065 * x*y*y*y*y*y*y + 28.1626080952129 * x*y*y*y*y*z*z + -37.5501441269506 * x*y*y*z*z*z*z + 0.469376801586882 * x*x*x*x*x*x*x + -9.38753603173764 * x*x*x*x*x*z*z + 12.5167147089835 * x*x*x*z*z*z*z; Y_m[11] = 15.5674673616078 * x*x*y*y*y*y*z + -3.11349347232156 * y*y*y*y*y*y*z + 10.3783115744052 * y*y*y*y*z*z*z + 15.5674673616078 * x*x*x*x*y*y*z + -62.2698694464312 * x*x*y*y*z*z*z + -3.11349347232156 * x*x*x*x*x*x*z + 10.3783115744052 * x*x*x*x*z*z*z; Y_m[12] = 2.5945778936013 * x*x*x*y*y*y*y + -2.5945778936013 * x*y*y*y*y*y*y + 31.1349347232156 * x*y*y*y*y*z*z + 4.67024020848234 * x*x*x*x*x*y*y + -62.2698694464312 * x*x*x*y*y*z*z + -0.51891557872026 * x*x*x*x*x*x*x + 6.22698694464312 * x*x*x*x*x*z*z; Y_m[13] = -2.6459606618019 * y*y*y*y*y*y*z + 39.6894099270285 * x*x*y*y*y*y*z + -39.6894099270285 * x*x*x*x*y*y*z + 2.6459606618019 * x*x*x*x*x*x*z; Y_m[14] = -4.95013912767217 * x*y*y*y*y*y*y + 24.7506956383609 * x*x*x*y*y*y*y + -14.8504173830165 * x*x*x*x*x*y*y + 0.707162732524596 * x*x*x*x*x*x*x; } else { PyErr_SetString(PyExc_RuntimeError, "l>7 not implemented"); return NULL; } } } Py_RETURN_NONE; } PyObject* integrate_outwards(PyObject *self, PyObject *args) { int g0; PyArrayObject* cm1_g_obj; PyArrayObject* c0_g_obj; PyArrayObject* cp1_g_obj; PyArrayObject* b_g_obj; PyArrayObject* a_g_obj; if (!PyArg_ParseTuple(args, "iOOOOO", &g0, &cm1_g_obj, &c0_g_obj, &cp1_g_obj, &b_g_obj, &a_g_obj)) return NULL; const double* cm1_g = DOUBLEP(cm1_g_obj); const double* c0_g = DOUBLEP(c0_g_obj); const double* cp1_g = DOUBLEP(cp1_g_obj); const double* b_g = DOUBLEP(b_g_obj); double* a_g = DOUBLEP(a_g_obj); for (int g = 1; g <= g0; g++) a_g[g + 1] = -(a_g[g - 1] * cm1_g[g] + a_g[g] * c0_g[g] + b_g[g]) / cp1_g[g]; Py_RETURN_NONE; } PyObject* integrate_inwards(PyObject *self, PyObject *args) { int g1, g0; PyArrayObject* c0_g_obj; PyArrayObject* cp1_g_obj; PyArrayObject* a_g_obj; if (!PyArg_ParseTuple(args, "iiOOO", &g1, &g0, &c0_g_obj, &cp1_g_obj, &a_g_obj)) return NULL; const double* c0_g = DOUBLEP(c0_g_obj); const double* cp1_g = DOUBLEP(cp1_g_obj); double* a_g = DOUBLEP(a_g_obj); const int ng = PyArray_DIM(a_g_obj, 0); for (int g = g1; g >= g0; g--) { double ag = a_g[g]; if (ag > 1e50) { for (int gg = g; gg < ng; gg++) a_g[gg] = a_g[gg] / 1e50; ag = ag / 1e50; } a_g[g - 1] = a_g[g + 1] * cp1_g[g] + ag * c0_g[g]; } Py_RETURN_NONE; } gpaw-24.1.0/c/wigner_seitz.c000066400000000000000000000030631454550013000156320ustar00rootroot00000000000000#include "extensions.h" #include double distance(double *a, double *b); // returns the squared distance between a 3d double vector // and a 3d int vector double distance3d2_di(double *a, int *b) { double sum = 0; double diff; for (int c = 0; c < 3; c++) { diff = a[c] - (double)b[c]; sum += diff*diff; } return sum; } PyObject *exterior_electron_density_region(PyObject *self, PyObject *args) { PyArrayObject* ai; PyArrayObject* aatom_c; PyArrayObject* beg_c; PyArrayObject* end_c; PyArrayObject* hh_c; PyArrayObject* vdWrad; if (!PyArg_ParseTuple(args, "OOOOOO", &ai, &aatom_c, &beg_c, &end_c, &hh_c, &vdWrad)) return NULL; long *aindex = LONGP(ai); int natoms = PyArray_DIM(aatom_c, 0); double *atom_c = DOUBLEP(aatom_c); long *beg = LONGP(beg_c); long *end = LONGP(end_c); double *h_c = DOUBLEP(hh_c); double *vdWradius = DOUBLEP(vdWrad); int n[3], ij; double pos[3]; for (int c = 0; c < 3; c++) { n[c] = end[c] - beg[c]; } // loop over all points for (int i = 0; i < n[0]; i++) { pos[0] = (beg[0] + i) * h_c[0]; for (int j = 0; j < n[1]; j++) { pos[1] = (beg[1] + j) * h_c[1]; ij = (i*n[1] + j)*n[2]; for (int k = 0; k < n[2]; k++) { pos[2] = (beg[2] + k) * h_c[2]; aindex[ij + k] = (long) 1; /* assume outside the structure */ // loop over all atoms for (int a=0; a < natoms; a++) { double d = distance(atom_c + a*3, pos); if (d < vdWradius[a]) { aindex[ij + k] = (long) 0; /* this is inside */ a = natoms; } } } } } Py_RETURN_NONE; } gpaw-24.1.0/c/woperators.c000066400000000000000000000344561454550013000153400ustar00rootroot00000000000000/* This file (woperators.c) is a modified copy of operators.c * with added support for nonlocal operator weights. * The original copyright note of operators.c follows: * Copyright (C) 2003-2007 CAMP * Copyright (C) 2007-2008 CAMd * Copyright (C) 2005-2020 CSC - IT Center for Science Ltd. * Please see the accompanying LICENSE file for further information. */ #include #define PY_ARRAY_UNIQUE_SYMBOL GPAW_ARRAY_API #define NO_IMPORT_ARRAY #include #include #include "extensions.h" #include "bc.h" #include "mympi.h" #ifdef _OPENMP #include #endif #include "threading.h" #ifdef GPAW_ASYNC #define GPAW_ASYNC3 3 #define GPAW_ASYNC2 2 #else #define GPAW_ASYNC3 1 #define GPAW_ASYNC2 1 #endif typedef struct { PyObject_HEAD int nweights; const double** weights; bmgsstencil* stencils; boundary_conditions* bc; MPI_Request recvreq[2]; MPI_Request sendreq[2]; } WOperatorObject; static void WOperator_dealloc(WOperatorObject *self) { free(self->bc); for (int i = 0; i < self->nweights; i++) { free(self->stencils[i].coefs); free(self->stencils[i].offsets); } free(self->stencils); free(self->weights); PyObject_DEL(self); } static PyObject * WOperator_relax(WOperatorObject *self, PyObject *args) { int relax_method; PyArrayObject* func; PyArrayObject* source; int nrelax; double w = 1.0; if (!PyArg_ParseTuple(args, "iOOi|d", &relax_method, &func, &source, &nrelax, &w)) return NULL; const boundary_conditions* bc = self->bc; double* fun = DOUBLEP(func); const double* src = DOUBLEP(source); const double_complex* ph; const int* size2 = bc->size2; double* buf = (double*) GPAW_MALLOC(double, size2[0] * size2[1] * size2[2] * bc->ndouble); double* sendbuf = (double*) GPAW_MALLOC(double, bc->maxsend); double* recvbuf = (double*) GPAW_MALLOC(double, bc->maxrecv); const double** weights = (const double**) GPAW_MALLOC(double*, self->nweights); ph = 0; for (int n = 0; n < nrelax; n++ ) { for (int i = 0; i < 3; i++) { bc_unpack1(bc, fun, buf, i, self->recvreq, self->sendreq, recvbuf, sendbuf, ph + 2 * i, 0, 1); bc_unpack2(bc, buf, i, self->recvreq, self->sendreq, recvbuf, 1); } for (int iw = 0; iw < self->nweights; iw++) weights[iw] = self->weights[iw]; bmgs_wrelax(relax_method, self->nweights, self->stencils, weights, buf, fun, src, w); } free(weights); free(recvbuf); free(sendbuf); free(buf); Py_RETURN_NONE; } //Plain worker void wapply_worker(WOperatorObject *self, int chunksize, int start, int end, int thread_id, int nthreads, const double* in, double* out, bool real, const double_complex* ph) { boundary_conditions* bc = self->bc; const int* size1 = bc->size1; const int* size2 = bc->size2; int ng = bc->ndouble * size1[0] * size1[1] * size1[2]; int ng2 = bc->ndouble * size2[0] * size2[1] * size2[2]; MPI_Request recvreq[2]; MPI_Request sendreq[2]; const double* my_in; double* my_out; double* sendbuf = (double*) GPAW_MALLOC(double, bc->maxsend * chunksize); double* recvbuf = (double*) GPAW_MALLOC(double, bc->maxrecv * chunksize); double* buf = (double*) GPAW_MALLOC(double, ng2 * chunksize); const double** weights = (const double**) GPAW_MALLOC(double*, self->nweights); for (int n = start; n < end; n += chunksize) { if (n + chunksize >= end && chunksize > 1) chunksize = end - n; my_in = in + n * ng; my_out = out + n * ng; for (int i = 0; i < 3; i++) { bc_unpack1(bc, my_in, buf, i, recvreq, sendreq, recvbuf, sendbuf, ph + 2 * i, thread_id, chunksize); bc_unpack2(bc, buf, i, recvreq, sendreq, recvbuf, chunksize); } for (int m = 0; m < chunksize; m++) { for (int iw = 0; iw < self->nweights; iw++) weights[iw] = self->weights[iw] + m * ng2; if (real) bmgs_wfd(self->nweights, self->stencils, weights, buf + m * ng2, my_out + m * ng); else bmgs_wfdz(self->nweights, self->stencils, weights, (const double_complex*) (buf + m * ng2), (double_complex*) (my_out + m * ng)); } } free(weights); free(buf); free(recvbuf); free(sendbuf); } //Double buffering async worker void wapply_worker_cfd(WOperatorObject *self, int chunksize, int chunkinc, int start, int end, int thread_id, int nthreads, const double* in, double* out, bool real, const double_complex* ph) { if (start >= end) return; boundary_conditions* bc = self->bc; const int* size1 = bc->size1; const int* size2 = bc->size2; int ng = bc->ndouble * size1[0] * size1[1] * size1[2]; int ng2 = bc->ndouble * size2[0] * size2[1] * size2[2]; MPI_Request recvreq[2 * GPAW_ASYNC3 * GPAW_ASYNC2]; MPI_Request sendreq[2 * GPAW_ASYNC3 * GPAW_ASYNC2]; double* sendbuf = (double*) GPAW_MALLOC(double, bc->maxsend * chunksize * GPAW_ASYNC3 * GPAW_ASYNC2); double* recvbuf = (double*) GPAW_MALLOC(double, bc->maxrecv * chunksize * GPAW_ASYNC3 * GPAW_ASYNC2); double* buf = (double*) GPAW_MALLOC(double, ng2 * chunksize * GPAW_ASYNC2); const double** weights = (const double**) GPAW_MALLOC(double*, self->nweights); if ((end - start) < chunksize) chunksize = end - start; int chunk = chunkinc; if (chunk > chunksize) chunk = chunksize; int odd = 0; const double* my_in = in + start * ng; double* my_out; for (int i = 0; i < 3; i++) bc_unpack1(bc, my_in, buf + odd * ng2 * chunksize, i, recvreq + odd * 2 + i * 4, sendreq + odd * 2 + i * 4, recvbuf + odd * bc->maxrecv * chunksize + i * bc->maxrecv * chunksize * GPAW_ASYNC2, sendbuf + odd * bc->maxsend * chunksize + i * bc->maxsend * chunksize * GPAW_ASYNC2, ph + 2 * i, thread_id, chunk); odd = odd ^ 1; int last_chunk = chunk; for (int n = start+chunk; n < end; n += chunk) { last_chunk += chunkinc; if (last_chunk > chunksize) last_chunk = chunksize; if (n + last_chunk >= end && last_chunk > 1) last_chunk = end - n; my_in = in + n * ng; my_out = out + (n-chunk) * ng; for (int i = 0; i < 3; i++) { bc_unpack1(bc, my_in, buf + odd * ng2 * chunksize, i, recvreq + odd * 2 + i * 4, sendreq + odd * 2 + i * 4, recvbuf + odd * bc->maxrecv * chunksize + i * bc->maxrecv * chunksize * GPAW_ASYNC2, sendbuf + odd * bc->maxsend * chunksize + i * bc->maxsend * chunksize * GPAW_ASYNC2, ph + 2 * i, thread_id, last_chunk); } odd = odd ^ 1; for (int i = 0; i < 3; i++) { bc_unpack2(bc, buf + odd * ng2 * chunksize, i, recvreq + odd * 2 + i * 4, sendreq + odd * 2 + i * 4, recvbuf + odd * bc->maxrecv * chunksize + i * bc->maxrecv * chunksize * GPAW_ASYNC2, chunk); } for (int m = 0; m < chunk; m++) { for (int iw = 0; iw < self->nweights; iw++) weights[iw] = self->weights[iw] + m * ng2 + odd * ng2 * chunksize; if (real) bmgs_wfd(self->nweights, self->stencils, weights, buf + m * ng2 + odd * ng2 * chunksize, my_out + m * ng); else bmgs_wfdz(self->nweights, self->stencils, weights, (const double_complex*) (buf + m * ng2 + odd * ng2 * chunksize), (double_complex*) (my_out + m * ng)); } chunk = last_chunk; } odd = odd ^ 1; my_out = out + (end-last_chunk) * ng; for (int i = 0; i < 3; i++) { bc_unpack2(bc, buf + odd * ng2 * chunksize, i, recvreq + odd * 2 + i * 4, sendreq + odd * 2 + i * 4, recvbuf + odd * bc->maxrecv * chunksize + i * bc->maxrecv * chunksize * GPAW_ASYNC2, last_chunk); } for (int m = 0; m < last_chunk; m++) { for (int iw = 0; iw < self->nweights; iw++) weights[iw] = self->weights[iw] + m * ng2 + odd * ng2 * chunksize; if (real) bmgs_wfd(self->nweights, self->stencils, weights, buf + m * ng2 + odd * ng2 * chunksize, my_out + m * ng); else bmgs_wfdz(self->nweights, self->stencils, weights, (const double_complex*) (buf + m * ng2 + odd * ng2 * chunksize), (double_complex*) (out + m * ng)); } free(weights); free(buf); free(recvbuf); free(sendbuf); } static PyObject * WOperator_apply(WOperatorObject *self, PyObject *args) { PyArrayObject* input; PyArrayObject* output; PyArrayObject* phases = 0; if (!PyArg_ParseTuple(args, "OO|O", &input, &output, &phases)) return NULL; int nin = 1; if (PyArray_NDIM(input) == 4) nin = PyArray_DIMS(input)[0]; boundary_conditions* bc = self->bc; const double* in = DOUBLEP(input); double* out = DOUBLEP(output); const double_complex* ph; bool real = (PyArray_DESCR(input)->type_num == NPY_DOUBLE); if (real) ph = 0; else ph = COMPLEXP(phases); int chunksize = 1; if (getenv("GPAW_MPI_OPTIMAL_MSG_SIZE") != NULL) { int opt_msg_size = atoi(getenv("GPAW_MPI_OPTIMAL_MSG_SIZE")); if (bc->maxsend > 0 ) chunksize = opt_msg_size * 1024 / (bc->maxsend / 2 * (2 - (int)real) * sizeof(double)); chunksize = (chunksize > 0) ? chunksize : 1; chunksize = (chunksize < nin) ? chunksize : nin; } int chunkinc = chunksize; if (getenv("GPAW_CHUNK_INC") != NULL) chunkinc = atoi(getenv("GPAW_CHUNK_INC")); #ifdef _OPENMP #pragma omp parallel #endif { int thread_id = 0; int nthreads = 1; int start, end; #ifdef _OPENMP thread_id = omp_get_thread_num(); nthreads = omp_get_num_threads(); #endif SHARE_WORK(nin, nthreads, thread_id, &start, &end); #ifndef GPAW_ASYNC if (1) #else if (bc->cfd == 0) #endif { wapply_worker(self, chunksize, start, end, thread_id, nthreads, in, out, real, ph); } else { wapply_worker_cfd(self, chunksize, chunkinc, start, end, thread_id, nthreads, in, out, real, ph); } } Py_RETURN_NONE; } static PyObject * WOperator_get_diagonal_element(WOperatorObject *self, PyObject *args) { if (!PyArg_ParseTuple(args, "")) return NULL; const double** weights = (const double**) GPAW_MALLOC(double*, self->nweights); for (int iw = 0; iw < self->nweights; iw++) weights[iw] = self->weights[iw]; const int n0 = self->stencils[0].n[0]; const int n1 = self->stencils[0].n[1]; const int n2 = self->stencils[0].n[2]; double d = 0.0; for (int i0 = 0; i0 < n0; i0++) { for (int i1 = 0; i1 < n1; i1++) { for (int i2 = 0; i2 < n2; i2++) { double coef = 0.0; for (int iw = 0; iw < self->nweights; iw++) { coef += weights[iw][0] * self->stencils[iw].coefs[0]; weights[iw]++; } if (coef < 0) coef = -coef; if (coef > d) d = coef; } } } free(weights); return Py_BuildValue("d", d); } static PyObject * WOperator_get_async_sizes(WOperatorObject *self, PyObject *args) { if (!PyArg_ParseTuple(args, "")) return NULL; #ifdef GPAW_ASYNC return Py_BuildValue("(iii)", 1, GPAW_ASYNC2, GPAW_ASYNC3); #else return Py_BuildValue("(iii)", 0, GPAW_ASYNC2, GPAW_ASYNC3); #endif } static PyMethodDef WOperator_Methods[] = { {"apply", (PyCFunction)WOperator_apply, METH_VARARGS, NULL}, {"relax", (PyCFunction)WOperator_relax, METH_VARARGS, NULL}, {"get_diagonal_element", (PyCFunction)WOperator_get_diagonal_element, METH_VARARGS, NULL}, {"get_async_sizes", (PyCFunction)WOperator_get_async_sizes, METH_VARARGS, NULL}, {NULL, NULL, 0, NULL} }; PyTypeObject WOperatorType = { PyVarObject_HEAD_INIT(NULL, 0) "WOperator", sizeof(WOperatorObject), 0, (destructor)WOperator_dealloc, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, "FDW-operator object", 0, 0, 0, 0, 0, 0, WOperator_Methods }; PyObject* NewWOperatorObject(PyObject *obj, PyObject *args) { PyObject* coefs_list; PyArrayObject* coefs; PyObject* offsets_list; PyArrayObject* offsets; PyObject* weights_list; PyArrayObject* weights; PyArrayObject* size; PyArrayObject* neighbors; int real; PyObject* comm_obj; int cfd; int range; int nweights; if (!PyArg_ParseTuple(args, "iO!O!O!OiOiOi", &nweights, &PyList_Type, &weights_list, &PyList_Type, &coefs_list, &PyList_Type, &offsets_list, &size, &range, &neighbors, &real, &comm_obj, &cfd)) return NULL; WOperatorObject *self = PyObject_NEW(WOperatorObject, &WOperatorType); if (self == NULL) return NULL; self->stencils = (bmgsstencil*) GPAW_MALLOC(bmgsstencil, nweights); self->weights = (const double**) GPAW_MALLOC(double*, nweights); self->nweights = nweights; for (int iw = 0; iw < nweights; iw++) { coefs = (PyArrayObject*) PyList_GetItem(coefs_list, iw); offsets = (PyArrayObject*) PyList_GetItem(offsets_list, iw); weights = (PyArrayObject*) PyList_GetItem(weights_list, iw); self->stencils[iw] = bmgs_stencil(PyArray_DIMS(coefs)[0], DOUBLEP(coefs), LONGP(offsets), range, LONGP(size)); self->weights[iw] = DOUBLEP(weights); } const long (*nb)[2] = (const long (*)[2])LONGP(neighbors); const long padding[3][2] = {{range, range}, {range, range}, {range, range}}; MPI_Comm comm = MPI_COMM_NULL; if (comm_obj != Py_None) comm = ((MPIObject*)comm_obj)->comm; self->bc = bc_init(LONGP(size), padding, padding, nb, comm, real, cfd); return (PyObject*) self; } gpaw-24.1.0/c/xc/000077500000000000000000000000001454550013000133655ustar00rootroot00000000000000gpaw-24.1.0/c/xc/ensemble_gga.c000066400000000000000000000030411454550013000161370ustar00rootroot00000000000000/* Copyright (C) 2003-2007 CAMP * Copyright (C) 2007-2009 CAMd * Please see the accompanying LICENSE file for further information. */ #include #include "xc_gpaw.h" double beefvdw_exchange(const xc_parameters* par, double n, double rs, double a2, double* dedrs, double* deda2) { double e = C1 / rs; *dedrs = -e / rs; double c = C2 * rs / n; c *= c; double s2 = a2 * c; /* Legendre polynomial basis expansion */ int parlen = par->nparameters-1; double p = par->parameters[0]; double tmp = p + s2; double x = 2.0 * s2 / tmp - 1.0; double dxds2 = 2.0 * p / pow(tmp,2); double Fx = 0.0; double dFxds2 = 0.0; int max_order = par->parameters[parlen+1]; double L[max_order+1]; double dL[max_order+1]; double coef; int m; int order; /* initializing */ L[0] = 1.0; L[1] = x; dL[0] = 0.0; dL[1] = 1.0; /* recursively building polynomia and their derivatives */ for(int i = 2; i < max_order+1; i++) { L[i] = 2.0 * x * L[i-1] - L[i-2] - (x * L[i-1] - L[i-2])/i; dL[i] = i * L[i-1] + x * dL[i-1]; } /* building enhancement factor Fx and derivative dFxds2 */ m = 0; for(int i = 0; i < max_order+1; i++) { order = par->parameters[2+m]; if(order == i) { coef = par->parameters[2+parlen+m]; Fx += coef * L[i]; dFxds2 += coef * dL[i] * dxds2; m += 1; } } double ds2drs = 8.0 * c * a2 / rs; *dedrs = *dedrs * Fx + e * dFxds2 * ds2drs; *deda2 = e * dFxds2 * c; e *= Fx; return e; } gpaw-24.1.0/c/xc/libvdwxc.c000066400000000000000000000155311454550013000153600ustar00rootroot00000000000000#ifdef GPAW_WITH_LIBVDWXC #include "../extensions.h" #ifdef PARALLEL #include #include "../mympi.h" #include #else #include #endif // Our heinous plan is to abuse a numpy array so that it will contain a pointer to the vdwxc_data. // This is because PyCapsules are not there until Python 3.1/2.7. // This function takes an array and returns the pointer it so outrageously contains. vdwxc_data* unpack_vdwxc_pointer(PyObject* vdwxc_obj) { vdwxc_data* vdw = (vdwxc_data *)PyArray_DATA((PyArrayObject *)vdwxc_obj); return vdw; } PyObject* libvdwxc_has(PyObject* self, PyObject* args) { char* name; if(!PyArg_ParseTuple(args, "s", &name)) { return NULL; } int val; if(strcmp("mpi", name) == 0) { val = vdwxc_has_mpi(); } else if(strcmp("pfft", name) == 0) { val = vdwxc_has_pfft(); } else { return NULL; } PyObject* pyval = val ? Py_True : Py_False; Py_INCREF(pyval); return pyval; } PyObject* libvdwxc_create(PyObject* self, PyObject* args, PyObject* kwargs) { PyObject* vdwxc_obj; int vdwxc_code; int nspins; int Nx, Ny, Nz; double C00, C10, C20, C01, C11, C21, C02, C12, C22; if(!PyArg_ParseTuple(args, "Oii(iii)(ddddddddd)", &vdwxc_obj, &vdwxc_code, // functional identifier &nspins, &Nx, &Ny, &Nz, // number of grid points &C00, &C10, &C20, // 3x3 cell &C01, &C11,&C21, &C02, &C12, &C22)) { return NULL; } vdwxc_data vdw; if(nspins == 1) { vdw = vdwxc_new(vdwxc_code); } else if(nspins == 2) { #ifdef VDWXC_HAS_SPIN vdw = vdwxc_new_spin(vdwxc_code); #else PyErr_SetString(PyExc_ImportError, "this version of libvdwxc has no spin support"); return NULL; #endif } else { PyErr_SetString(PyExc_ValueError, "nspins must be 1 or 2"); return NULL; } vdwxc_data* vdwxc_ptr = unpack_vdwxc_pointer(vdwxc_obj); vdwxc_ptr[0] = vdw; vdwxc_set_unit_cell(vdw, Nx, Ny, Nz, C00, C10, C20, C01, C11, C21, C02, C12, C22); Py_RETURN_NONE; } PyObject* libvdwxc_init_serial(PyObject* self, PyObject* args) { PyObject* vdwxc_obj; if(!PyArg_ParseTuple(args, "O", &vdwxc_obj)) { return NULL; } vdwxc_data* vdw = unpack_vdwxc_pointer(vdwxc_obj); vdwxc_init_serial(*vdw); Py_RETURN_NONE; } PyObject* libvdwxc_calculate(PyObject* self, PyObject* args) { PyObject *vdwxc_obj; PyArrayObject *rho_obj, *sigma_obj, *dedn_obj, *dedsigma_obj; if(!PyArg_ParseTuple(args, "OOOOO", &vdwxc_obj, &rho_obj, &sigma_obj, &dedn_obj, &dedsigma_obj)) { return NULL; } vdwxc_data* vdw = unpack_vdwxc_pointer(vdwxc_obj); int nspins = PyArray_DIM(rho_obj, 0); double energy; if (nspins == 1) { double* rho_g = (double*)PyArray_DATA(rho_obj); double* sigma_g = (double*)PyArray_DATA(sigma_obj); double* dedn_g = (double*)PyArray_DATA(dedn_obj); double* dedsigma_g = (double*)PyArray_DATA(dedsigma_obj); energy = vdwxc_calculate(*vdw, rho_g, sigma_g, dedn_g, dedsigma_g); } else if (nspins == 2) { // We actually only need two sigmas/dedsigmas. // The third one came along because that's what usually happens, // but we could save it entirely. assert(PyArray_DIM(sigma_obj, 0) == 3); assert(PyArray_DIM(dedn_obj, 0) == 2); assert(PyArray_DIM(dedsigma_obj, 0) == 3); #ifdef VDWXC_HAS_SPIN energy = vdwxc_calculate_spin(*vdw, (double*)PyArray_GETPTR1(rho_obj, 0), (double*)PyArray_GETPTR1(rho_obj, 1), (double*)PyArray_GETPTR1(sigma_obj, 0), (double*)PyArray_GETPTR1(sigma_obj, 2), (double*)PyArray_GETPTR1(dedn_obj, 0), (double*)PyArray_GETPTR1(dedn_obj, 1), (double*)PyArray_GETPTR1(dedsigma_obj, 0), (double*)PyArray_GETPTR1(dedsigma_obj, 2)); #else return NULL; #endif } else { PyErr_SetString(PyExc_ValueError, "Expected 1 or 2 spins"); return NULL; } return Py_BuildValue("d", energy); } PyObject* libvdwxc_tostring(PyObject* self, PyObject* args) { PyObject *vdwxc_obj; if(!PyArg_ParseTuple(args, "O", &vdwxc_obj)) { return NULL; } vdwxc_data* vdw = unpack_vdwxc_pointer(vdwxc_obj); int maxlen = 80 * 200; // up to a few hundred lines char str[maxlen]; vdwxc_tostring(*vdw, maxlen, str); return Py_BuildValue("s", str); } PyObject* libvdwxc_free(PyObject* self, PyObject* args) { PyObject* vdwxc_obj; if(!PyArg_ParseTuple(args, "O", &vdwxc_obj)) { return NULL; } vdwxc_data* vdw = unpack_vdwxc_pointer(vdwxc_obj); vdwxc_finalize(vdw); Py_RETURN_NONE; } #ifdef PARALLEL MPI_Comm unpack_gpaw_comm(PyObject* gpaw_mpi_obj) { MPIObject* gpaw_comm = (MPIObject *)gpaw_mpi_obj; return gpaw_comm->comm; } #endif PyObject* error_parallel_support(void) { // Not a true import error, but pretty close. #ifndef PARALLEL PyErr_SetString(PyExc_ImportError, "GPAW not compiled in parallel"); #endif #ifndef VDWXC_HAS_MPI PyErr_SetString(PyExc_ImportError, "libvdwxc not compiled in parallel. Recompile libvdwxc with --with-mpi"); #endif return NULL; } PyObject* libvdwxc_init_mpi(PyObject* self, PyObject* args) { PyObject* vdwxc_obj; PyObject* gpaw_comm_obj; if(!PyArg_ParseTuple(args, "OO", &vdwxc_obj, &gpaw_comm_obj)) { return NULL; } if(!vdwxc_has_mpi()) { PyErr_SetString(PyExc_ImportError, "libvdwxc not compiled with MPI."); return NULL; } #if defined(PARALLEL) && defined(VDWXC_HAS_MPI) vdwxc_data* vdw = unpack_vdwxc_pointer(vdwxc_obj); MPI_Comm comm = unpack_gpaw_comm(gpaw_comm_obj); vdwxc_init_mpi(*vdw, comm); Py_RETURN_NONE; #else return error_parallel_support(); #endif } PyObject* libvdwxc_init_pfft(PyObject* self, PyObject* args) { PyObject* vdwxc_obj; PyObject* gpaw_comm_obj; int nproc1, nproc2; if(!PyArg_ParseTuple(args, "OOii", &vdwxc_obj, &gpaw_comm_obj, &nproc1, &nproc2)) { return NULL; } if(!vdwxc_has_pfft()) { PyErr_SetString(PyExc_ImportError, "libvdwxc not compiled with PFFT."); return NULL; } #if defined(PARALLEL) vdwxc_data* vdw = unpack_vdwxc_pointer(vdwxc_obj); MPI_Comm comm = unpack_gpaw_comm(gpaw_comm_obj); vdwxc_init_pfft(*vdw, comm, nproc1, nproc2); Py_RETURN_NONE; #else return error_parallel_support(); #endif } #endif // gpaw_with_libvdwxc gpaw-24.1.0/c/xc/libxc.c000066400000000000000000000550641454550013000146440ustar00rootroot00000000000000/* Copyright (C) 2003-2007 CAMP * Copyright (C) 2007-2008 CAMd * Please see the accompanying LICENSE file for further information. */ #include #define PY_ARRAY_UNIQUE_SYMBOL GPAW_ARRAY_API #define NO_IMPORT_ARRAY #include #include #include #include "xc_gpaw.h" #include "../extensions.h" typedef struct { PyObject_HEAD /* exchange-correlation energy second derivatives */ void (*get_fxc)(XC(func_type) *func, double point[7], double der[5][5]); XC(func_type) xc_functional; XC(func_type) x_functional; XC(func_type) c_functional; XC(func_type) *functional[2]; /* store either x&c, or just xc */ int nspin; /* must be common to x and c, so declared redundantly here */ } lxcXCFunctionalObject; static void lxcXCFunctional_dealloc(lxcXCFunctionalObject *self) { for (int i=0; i<2; i++) if (self->functional[i] != NULL) xc_func_end(self->functional[i]); PyObject_DEL(self); } static PyObject* lxcXCFunctional_is_gga(lxcXCFunctionalObject *self, PyObject *args) { int success = 0; /* assume functional is not GGA */ // check family of most-complex functional if (self->functional[0]->info->family == XC_FAMILY_GGA || self->functional[0]->info->family == XC_FAMILY_HYB_GGA) success = XC_FAMILY_GGA; return Py_BuildValue("i", success); } static PyObject* lxcXCFunctional_is_mgga(lxcXCFunctionalObject *self, PyObject *args) { int success = 0; /* assume functional is not MGGA */ // check family of most-complex functional if (self->functional[0]->info->family == XC_FAMILY_MGGA) success = XC_FAMILY_MGGA; return Py_BuildValue("i", success); } static PyObject* lxcXCFunctional_set_omega(lxcXCFunctionalObject *self, PyObject *args) { int success = 0; /* Assume we don't use sfat */ int i = 0; #if XC_MAJOR_VERSION >= 4 double omega = 0.0; #else float omega = 0.0; #endif XC(func_type) *test_functional; #if XC_MAJOR_VERSION >= 4 if (!PyArg_ParseTuple(args, "d", &omega)) { PyErr_SetString(PyExc_TypeError, "Gamma has to be double"); #else if (!PyArg_ParseTuple(args, "f", &omega)) { PyErr_SetString(PyExc_TypeError, "Gamma has to be float"); #endif return NULL; } if (self->functional[0]->info->family == XC_FAMILY_HYB_GGA) { for (i=0; ifunctional[0]->n_func_aux; i++) { test_functional = self->functional[0]->func_aux[i]; #if XC_MAJOR_VERSION >= 5 if ((test_functional->info->number == XC_GGA_X_SFAT) || (test_functional->info->number == XC_GGA_X_SFAT_PBE)) { XC(func_set_ext_params)(test_functional, &omega); #else if (test_functional->info->number == XC_GGA_X_SFAT) { XC(gga_x_sfat_set_params)(test_functional, -1, omega); #endif /* XC_MAJOR_VERSION >= 5 */ success = 1; } } } if (!(success)) { PyErr_SetString(PyExc_TypeError, "Gamma can only set for range separated functionals"); return NULL; } return Py_BuildValue("i", success); } // Below are changes made by cpo@slac.stanford.edu for libxc 1.2.0 // which allows passing of arrays of points to libxc routines. // The fundamental design idea (to try to minimize code-duplication) is that // all libxc routines have input/output arrays that get processed in // common ways with three special exceptions: n_sg, e_g, dedn_sg. The // struct "xcptrlist" is used to keep track of these pointers. // Two libxc features prevent us from using a straightforward // interface: // 1) libxc calls memset(0) on output arrays, preventing us // from adding x/c contributions "in place" without scratch arrays // 2) for spin-polarized calculations libxc wants spin indices to be // dense in memory, whereas GPAW probably loops over grid indices // more often, so we want to keep those dense in memory. // I asked Miguel Marques to remove the memset, and to add a "stride" // argument to libxc routines to address the above. He says he will // consider it in the future. In the meantime we have to "block" // over gridpoints using some scratch memory. // What is supported: // - combined xc-functional mode // - separate x,c functionals. // - separate x,c can have differing complexities (e.g. one GGA, one LDA) // - "exc_vxc" style routines for LDA/GGA/MGGA both unpolarized/polarized // - "fxc" style routines for LDA/GGA both unpolarized/polarized // To support a libxc routine other than exc_vxc/fxc one needs to // copy a "Calculate" routine and change the pointer list setup, and // associated libxc function calls. // number of gridpoints we will "block" over when doing xc calculation #define BLOCKSIZE 1024 // this is the maximum number of BLOCKSIZE arrays that will be put // into scratch (depends on the "spinsize" values for the various // arrays. currently determined by fxc, which has input spinsizes // of 2+3 and output spinsizes of 3+6+6 (totalling 20). #define MAXARRAYS 20 #define LIBXCSCRATCHSIZE (BLOCKSIZE*MAXARRAYS) static double *scratch=NULL; // we don't use lapl, but libxc needs space for them. static double *scratch_lapl=NULL; static double *scratch_vlapl=NULL; // special cases for array behaviors: // flag to indicate we need to add to existing values for dedn_sg #define DEDN_SG 1 // flag to indicate we need to apply NMIN cutoff to n_sg #define N_SG 2 // flag to indicate we need to multiply by density for e_g #define E_G 4 typedef struct xcptr { double *p; int special; int spinsize; } xcptr; #define MAXPTR 10 typedef struct xcptrlist { int num; xcptr p[MAXPTR]; } xcptrlist; typedef struct xcinfo { int nspin; bool spinpolarized; int ng; } xcinfo; // these 3 functions make the spin index closest in memory ("gather") or the // farthest apart in memory ("scatter"). "scatteradd" adds to previous results. static void gather(const double* src, double* dst, int np, int stride, int nspins) { const double *dstend = dst+np*nspins; const double *srcend = src+nspins*stride; do { const double *s = src; do { *dst++ = *s; s+=stride; } while (snum; i++) { inblocklist[i] = next; next+=blocksize*inlist->p[i].spinsize; } for (int i=0; inum; i++) { outblocklist[i] = next; next+=blocksize*outlist->p[i].spinsize; } // check that we fit in the scratch space // if we don't, then we need to increase MAXARRAY assert((next - scratch) <= LIBXCSCRATCHSIZE); } // copy a piece of the full data into the block for processing by libxc static void data2block(const xcinfo *info, const xcptrlist *inlist, double *inblocklist[], int blocksize) { // copy data into the block, taking into account special cases for (int i=0; inum; i++) { double *ptr = inlist->p[i].p; double* block = inblocklist[i]; if (info->spinpolarized) { gather(ptr,block,blocksize,info->ng,inlist->p[i].spinsize); if (inlist->p[i].special&N_SG) for (int i=0; ip[i].special&N_SG) for (int i=0; inum; i++) { double *ptr = outlist->p[i].p; double* block = outblocklist[i]; if (outlist->p[i].special&E_G) { if (info->spinpolarized) { for (int i=0; ip[i].special&DEDN_SG) { if (info->spinpolarized) { scatteradd(block,ptr,blocksize,info->ng,outlist->p[i].spinsize); // need to add to pre-existing values } else { for (int i=0; ispinpolarized) { scatter(block,ptr,blocksize,info->ng,outlist->p[i].spinsize); } else { memcpy(ptr,block,blocksize*sizeof(double)); } } } } // copy the data from the block back into its final resting place, but add to previous results static void block2dataadd(const xcinfo *info, double *outblocklist[], const xcptrlist *outlist, const double *n_sg, int blocksize, int noutcopy) { for (int i=0; ip[i].p; double* block = outblocklist[i]; if (outlist->p[i].special&E_G) { if (info->spinpolarized) { for (int i=0; ispinpolarized) { scatteradd(block,ptr,blocksize,info->ng,outlist->p[i].spinsize); } else { for (int i=0; inspin; info.spinpolarized = (info.nspin==2); info.ng = PyArray_DIMS(py_e_g)[0]; xcptrlist inlist,outlist; inlist.num=0; outlist.num=0; int blocksize = BLOCKSIZE; int remaining = info.ng; // setup pointers using most complex functional switch(self->functional[0]->info->family) { case XC_FAMILY_MGGA: inlist.p[2].p = DOUBLEP(py_tau_sg); inlist.p[2].special = 0; inlist.p[2].spinsize = 2; inlist.num++; outlist.p[3].p = DOUBLEP(py_dedtau_sg); outlist.p[3].special = 0; outlist.p[3].spinsize = 2; outlist.num++; // don't break here since MGGA also needs GGA ptrs case XC_FAMILY_HYB_GGA: case XC_FAMILY_GGA: inlist.p[1].p = DOUBLEP(py_sigma_xg); inlist.p[1].special = 0; inlist.p[1].spinsize = 3; inlist.num++; outlist.p[2].p = DOUBLEP(py_dedsigma_xg); outlist.p[2].special = 0; outlist.p[2].spinsize = 3; outlist.num++; // don't break here since GGA also needs LDA ptrs case XC_FAMILY_LDA: inlist.p[0].p = DOUBLEP(py_n_sg); inlist.p[0].special = N_SG; inlist.p[0].spinsize = 2; inlist.num += 1; outlist.p[0].p = DOUBLEP(py_e_g); outlist.p[0].special = E_G; outlist.p[0].spinsize = 1; outlist.p[1].p = DOUBLEP(py_dedn_sg); outlist.p[1].special = DEDN_SG; outlist.p[1].spinsize = 2; outlist.num += 2; } assert(inlist.num < MAXPTR); assert(outlist.num < MAXPTR); double *inblock[MAXPTR]; double *outblock[MAXPTR]; setupblockptrs(&info, &inlist, &outlist, &inblock[0], &outblock[0], blocksize); do { blocksize = blocksizefunctional[i] == NULL) continue; XC(func_type) *func = self->functional[i]; int noutcopy=0; switch(func->info->family) { case XC_FAMILY_LDA: xc_lda_exc_vxc(func, blocksize, n_sg, e_g, dedn_sg); noutcopy = 2; // potentially decrease the size for block2dataadd if second functional less complex. break; case XC_FAMILY_HYB_GGA: case XC_FAMILY_GGA: xc_gga_exc_vxc(func, blocksize, n_sg, sigma_xg, e_g, dedn_sg, dedsigma_xg); noutcopy = 3; // potentially decrease the size for block2dataadd if second functional less complex. break; case XC_FAMILY_MGGA: xc_mgga_exc_vxc(func, blocksize, n_sg, sigma_xg, scratch_lapl, tau_sg, e_g, dedn_sg, dedsigma_xg, scratch_vlapl, dedtau_sg); noutcopy = 4; // potentially decrease the size for block2dataadd if second functional less complex. break; } // if we have more than 1 functional, add results // canonical example: adding "x" results to "c" if (i==0) block2data(&info, &outblock[0], &outlist, n_sg, blocksize); else block2dataadd(&info, &outblock[0], &outlist, n_sg, blocksize, noutcopy); } for (int i=0; i0); Py_RETURN_NONE; } static PyObject* lxcXCFunctional_CalculateFXC(lxcXCFunctionalObject *self, PyObject *args) { PyArrayObject* py_n_sg=NULL; PyArrayObject* py_v2rho2_xg=NULL; PyArrayObject* py_sigma_xg=NULL; PyArrayObject* py_v2rhosigma_yg=NULL; PyArrayObject* py_v2sigma2_yg=NULL; if (!PyArg_ParseTuple(args, "OO|OOO", &py_n_sg, &py_v2rho2_xg, &py_sigma_xg, &py_v2rhosigma_yg, &py_v2sigma2_yg)) return NULL; xcinfo info; info.nspin = self->nspin; info.spinpolarized = (info.nspin==2); info.ng = (info.spinpolarized) ? PyArray_DIMS(py_n_sg)[0]/2 : PyArray_DIMS(py_n_sg)[0]; xcptrlist inlist,outlist; inlist.num=0; outlist.num=0; int blocksize = BLOCKSIZE; int remaining = info.ng; // setup pointers using most complex functional switch(self->functional[0]->info->family) { case XC_FAMILY_MGGA: // not supported assert(self->functional[0]->info->family != XC_FAMILY_MGGA); // don't break here since MGGA also needs GGA ptrs case XC_FAMILY_HYB_GGA: case XC_FAMILY_GGA: inlist.p[1].p = DOUBLEP(py_sigma_xg); inlist.p[1].special = 0; inlist.p[1].spinsize = 3; inlist.num++; outlist.p[1].p = DOUBLEP(py_v2rhosigma_yg); outlist.p[1].special = 0; outlist.p[1].spinsize = 6; outlist.p[2].p = DOUBLEP(py_v2sigma2_yg); outlist.p[2].special = 0; outlist.p[2].spinsize = 6; outlist.num+=2; // don't break here since GGA also needs LDA ptrs case XC_FAMILY_LDA: inlist.p[0].p = DOUBLEP(py_n_sg); inlist.p[0].special = N_SG; inlist.p[0].spinsize = 2; inlist.num += 1; outlist.p[0].p = DOUBLEP(py_v2rho2_xg); outlist.p[0].special = 0; outlist.p[0].spinsize = 3; outlist.num++; } assert(inlist.num < MAXPTR); assert(outlist.num < MAXPTR); double *inblock[MAXPTR]; double *outblock[MAXPTR]; setupblockptrs(&info, &inlist, &outlist, &inblock[0], &outblock[0], blocksize); do { blocksize = blocksizefunctional[i] == NULL) continue; XC(func_type) *func = self->functional[i]; int noutcopy=0; switch(func->info->family) { case XC_FAMILY_LDA: xc_lda_fxc(func, blocksize, n_sg, v2rho2); noutcopy = 1; // potentially decrease the size for block2dataadd if second functional less complex. break; case XC_FAMILY_HYB_GGA: case XC_FAMILY_GGA: xc_gga_fxc(func, blocksize, n_sg, sigma_xg, v2rho2, v2rhosigma, v2sigma2); noutcopy = 3; // potentially decrease the size for block2dataadd if second functional less complex. break; case XC_FAMILY_MGGA: // not supported by GPAW yet, so crash assert (func->info->family!=XC_FAMILY_MGGA); break; } // if we have more than 1 functional, add results // canonical example: adding "x" results to "c" if (i==0) block2data(&info, &outblock[0], &outlist, n_sg, blocksize); else block2dataadd(&info, &outblock[0], &outlist, n_sg, blocksize, noutcopy); } for (int i=0; i0); Py_RETURN_NONE; } static PyObject* lxcXCFunctional_tb09(lxcXCFunctionalObject *self, PyObject *args) { double c; PyArrayObject* n_g; PyArrayObject* sigma_g; PyArrayObject* lapl_g; PyArrayObject* tau_g; PyArrayObject* v_g; PyArrayObject* vx_g; // for vsigma, vtau, vlapl if (!PyArg_ParseTuple(args, "dOOOOOO", &c, &n_g, &sigma_g, &lapl_g, &tau_g, &v_g, &vx_g)) return NULL; #if XC_MAJOR_VERSION >= 4 xc_func_set_ext_params(self->functional[0], &c); #else xc_mgga_x_tb09_set_params(self->functional[0], c); #endif xc_mgga_vxc(self->functional[0], PyArray_DIM(n_g, 0), PyArray_DATA(n_g), PyArray_DATA(sigma_g), PyArray_DATA(lapl_g), PyArray_DATA(tau_g), PyArray_DATA(v_g), PyArray_DATA(vx_g), PyArray_DATA(vx_g), PyArray_DATA(vx_g)); Py_RETURN_NONE; } static PyMethodDef lxcXCFunctional_Methods[] = { {"is_gga", (PyCFunction)lxcXCFunctional_is_gga, METH_VARARGS, 0}, {"is_mgga", (PyCFunction)lxcXCFunctional_is_mgga, METH_VARARGS, 0}, {"set_omega", (PyCFunction)lxcXCFunctional_set_omega, METH_VARARGS, 0}, {"calculate", (PyCFunction)lxcXCFunctional_Calculate, METH_VARARGS, 0}, {"calculate_fxc_spinpaired", (PyCFunction)lxcXCFunctional_CalculateFXC, METH_VARARGS, 0}, {"tb09", (PyCFunction)lxcXCFunctional_tb09, METH_VARARGS, 0}, {NULL, NULL, 0, NULL} }; PyTypeObject lxcXCFunctionalType = { PyVarObject_HEAD_INIT(NULL, 0) "lxcXCFunctional", sizeof(lxcXCFunctionalObject), 0, (destructor)lxcXCFunctional_dealloc, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, "LibXCFunctional object", 0, 0, 0, 0, 0, 0, lxcXCFunctional_Methods }; PyObject * NewlxcXCFunctionalObject(PyObject *obj, PyObject *args) { int xc, x, c; /* functionals identifier number */ int nspin; /* XC_UNPOLARIZED or XC_POLARIZED */ if (!scratch) { scratch = (double*)malloc(LIBXCSCRATCHSIZE*sizeof(double)); const int laplsize = BLOCKSIZE*sizeof(double)*2; scratch_lapl = (double*)malloc(laplsize); memset(scratch_lapl,0,laplsize); scratch_vlapl = (double*)malloc(laplsize); } if (!PyArg_ParseTuple(args, "iiii", &xc, &x, &c, &nspin)) { return NULL; } /* checking if the numbers xc x c are valid is done at python level */ lxcXCFunctionalObject *self = PyObject_NEW(lxcXCFunctionalObject, &lxcXCFunctionalType); if (self == NULL){ return NULL; } assert(nspin==XC_UNPOLARIZED || nspin==XC_POLARIZED); self->nspin = nspin; /* must be common to x and c, so declared redundantly */ int number,family,familyx,familyc; if (xc != -1) { xc_family_from_id(xc,&family,&number); assert (family != XC_FAMILY_UNKNOWN); XC(func_init)(&self->xc_functional, xc, nspin); self->functional[0]=&self->xc_functional; self->functional[1]=NULL; } else { assert (x!=-1 || c!=-1); if (x!=-1) { xc_family_from_id(x,&familyx,&number); assert (familyx != XC_FAMILY_UNKNOWN); XC(func_init)(&self->x_functional, x, nspin); } if (c!=-1) { xc_family_from_id(c,&familyc,&number); assert (familyc != XC_FAMILY_UNKNOWN); XC(func_init)(&self->c_functional, c, nspin); } if (x!=-1 && c!=-1) { /* put most complex functional first */ /* important for later loops over functionals */ if (familyx == XC_FAMILY_MGGA) { self->functional[0]=&self->x_functional; self->functional[1]=&self->c_functional; } else if (familyc == XC_FAMILY_MGGA) { self->functional[0]=&self->c_functional; self->functional[1]=&self->x_functional; } else if (familyx == XC_FAMILY_GGA || familyx == XC_FAMILY_HYB_GGA) { self->functional[0]=&self->x_functional; self->functional[1]=&self->c_functional; } else { // either c is GGA, or both are LDA (so don't care) self->functional[0]=&self->c_functional; self->functional[1]=&self->x_functional; } } else if (x!=-1) { self->functional[0]=&self->x_functional; self->functional[1]=NULL; } else if (c!=-1) { self->functional[0]=&self->c_functional; self->functional[1]=NULL; } } return (PyObject*)self; } PyObject * lxcXCFuncNum(PyObject *obj, PyObject *args) { char *funcname; if (!PyArg_ParseTuple(args, "s", &funcname)) { return NULL; } int num = XC(functional_get_number)(funcname); if (num != -1) return Py_BuildValue("i",num); else Py_RETURN_NONE; } gpaw-24.1.0/c/xc/m06l.c000066400000000000000000000562111454550013000143140ustar00rootroot00000000000000/************************************************************************ Implements Zhao, Truhlar Meta-gga M06-Local Correlation part ************************************************************************/ #include #include #include #include "xc_mgga.h" typedef struct m06l_params { common_params common; // needs to be at the beginning of every functional_params XC(func_type) *c_aux; XC(func_type) *x_aux; } m06l_params; /* derivatives of x and z with respect to rho, grho and tau*/ static void c_m06l_zx(double x, double z, double rho, double tau, double *dxdd, double *dxdgd, double *dzdd, double *dzdtau) { *dxdd = -8./3. * x * 1/rho; *dxdgd = 1./pow(rho,8./3.); *dzdd = -5./3. * 2 * tau/pow(rho, 8./3.); *dzdtau = 2./pow(rho, 5./3.); } /* Get g for Eq. (13)*/ static void c_m06_13(double *x, double *rho, double *g_ab, double *dg_abdd, double *dg_abdgd) { /*define the C_ab,i */ static double c_ab0= 0.6042374, c_ab1= 177.6783, c_ab2= -251.3252, c_ab3=76.35173, c_ab4=-12.55699; double gammaCab = 0.0031 ; double x_ab, a; double dg_abdx, dxdd_a, dxdgd_a, dzdd_a, dzdtau_a; double dxdd_b, dxdgd_b, dzdd_b, dzdtau_b; /*x = x_ba^2 = x_a^2+x_b^2*/ x_ab = x[0] + x[1]; a= (gammaCab*x_ab/(1+gammaCab*x_ab)); *g_ab = c_ab0*pow(a,0)+ c_ab1*pow(a,1)+ c_ab2*pow(a,2)+c_ab3*pow(a,3)+c_ab4*pow(a,4); double dadx = gammaCab/pow(1+gammaCab*x_ab, 2.); dg_abdx = (0.0*c_ab0*pow(a,-1)+ 1.*c_ab1*pow(a,0)+ 2.*c_ab2*pow(a,1)+3.*c_ab3*pow(a,2)+4.*c_ab4*pow(a,3))*dadx; c_m06l_zx(x[0], 0.0, rho[0], 0.0, &dxdd_a, &dxdgd_a, &dzdd_a, &dzdtau_a); c_m06l_zx(x[1], 0.0, rho[1], 0.0, &dxdd_b, &dxdgd_b, &dzdd_b, &dzdtau_b); dg_abdd[0] = dg_abdx*dxdd_a; dg_abdd[1] = dg_abdx*dxdd_b; dg_abdgd[0] = dg_abdx*dxdgd_a; dg_abdgd[1] = 0.0; dg_abdgd[2] = dg_abdx*dxdgd_b; } /* Get g for Eq. (15)*/ static void c_m06_15(double x, double rho, double *g_ss, double *dg_ssdd, double *dg_ssdgd) { /*define the C_ss,i */ static double c_ss0=0.5349466, c_ss1=0.5396620, c_ss2=-31.61217, c_ss3= 51.49592, c_ss4=-29.19613; double gammaCss = 0.06 ; double a; double dg_ssdx, dxdd, dxdgd, dzdd, dzdtau; /*x = x_a^2 */ a= (gammaCss*x/(1+gammaCss*x)); *g_ss = c_ss0*pow(a,0)+ c_ss1*pow(a,1)+ c_ss2*pow(a,2)+c_ss3*pow(a,3)+c_ss4*pow(a,4); double dadx = gammaCss/pow(1+gammaCss*x, 2.); dg_ssdx = (0.0*c_ss0*pow(a,-1)+ 1.*c_ss1*pow(a,0)+ 2.*c_ss2*pow(a,1)+3.*c_ss3*pow(a,2)+4.*c_ss4*pow(a,3))*dadx; c_m06l_zx(x, 0.0, rho, 0.0, &dxdd, &dxdgd, &dzdd, &dzdtau); *dg_ssdd = dg_ssdx*dxdd; *dg_ssdgd = dg_ssdx*dxdgd; /*printf("g_ss %19.12f\n", *g_ss);*/ } /* Get h_ab for Eq. (12)*/ static void c_m06l_hab(double *x, double *z, double *rho, double *tau, double *h_ab, double *dh_abdd, double *dh_abdgd, double *dh_abdtau) { /* define the d_ab,i for Eq. (12)*/ static double d_ab0= 0.3957626, d_ab1= -0.5614546, d_ab2= 0.01403963, d_ab3= 0.0009831442, d_ab4= -0.003577176; double alpha_ab = 0.00304966; double hab1, dhabdd1[2], dhabdgd1[3], dhabdtau1[2]; double x_ab, z_ab, gamma, xgamma, zgamma; double dgammadx, dgammadz; double dgammadd_a, dgammadgd_a, dgammadtau_a; double dgammadd_b, dgammadgd_b, dgammadtau_b; double dxdd_a, dxdgd_a, dzdd_a, dzdtau_a; double dxdd_b, dxdgd_b, dzdd_b, dzdtau_b; x_ab = x[0] + x[1]; z_ab = z[0] + z[1]; gamma = 1 + alpha_ab*(x_ab + z_ab); { /* derivatives of gamma with respect to x and z*/ dgammadx = alpha_ab; dgammadz = alpha_ab; } c_m06l_zx(x[0], z[0], rho[0], tau[0], &dxdd_a, &dxdgd_a, &dzdd_a, &dzdtau_a); c_m06l_zx(x[1], z[1], rho[1], tau[1], &dxdd_b, &dxdgd_b, &dzdd_b, &dzdtau_b); { /*derivatives of gamma with respect to density, gradient and kietic energy*/ dgammadd_a = dgammadx * dxdd_a + dgammadz * dzdd_a; dgammadd_b = dgammadx * dxdd_b + dgammadz * dzdd_b; dgammadgd_a = dgammadx * dxdgd_a; dgammadgd_b = dgammadx * dxdgd_b; dgammadtau_a = dgammadz * dzdtau_a; dgammadtau_b = dgammadz * dzdtau_b; } xgamma = x_ab/gamma; zgamma = z_ab/gamma; /* we initialize h and collect the terms*/ hab1 = 0.0; dhabdd1[0] = dhabdd1[1] = 0.0; dhabdgd1[0] = dhabdgd1[1] = dhabdgd1[2] = 0.0; dhabdtau1[0] = dhabdtau1[1] = 0.0; { /* first term */ double g2=pow(gamma,2.); hab1 += d_ab0/gamma; dhabdd1[0] += -d_ab0*dgammadd_a/g2; dhabdd1[1] += -d_ab0*dgammadd_b/g2; dhabdgd1[0] += -d_ab0*dgammadgd_a/g2; dhabdgd1[1] += 0.0; dhabdgd1[2] += -d_ab0*dgammadgd_b/g2; dhabdtau1[0] += -d_ab0*dgammadtau_a/g2 ; dhabdtau1[1] += -d_ab0*dgammadtau_b/g2 ; } { /* second term */ double g3=pow(gamma,3.); hab1 += (d_ab1*xgamma + d_ab2*zgamma)/gamma; dhabdd1[0] += (gamma*(d_ab1*dxdd_a+d_ab2*dzdd_a)-2*dgammadd_a*(d_ab1*x_ab+d_ab2*z_ab))/g3; dhabdd1[1] += (gamma*(d_ab1*dxdd_b+d_ab2*dzdd_b)-2*dgammadd_b*(d_ab1*x_ab+d_ab2*z_ab))/g3; dhabdgd1[0] += (d_ab1*dxdgd_a*gamma -2*(d_ab1*x_ab+d_ab2*z_ab)*dgammadgd_a)/g3; dhabdgd1[1] += 0.0; dhabdgd1[2] += (d_ab1*dxdgd_b*gamma -2*(d_ab1*x_ab+d_ab2*z_ab)*dgammadgd_b)/g3; dhabdtau1[0] += (d_ab2*dzdtau_a*gamma -2*(d_ab1*x_ab+d_ab2*z_ab)*dgammadtau_a)/g3; dhabdtau1[1] += (d_ab2*dzdtau_b*gamma -2*(d_ab1*x_ab+d_ab2*z_ab)*dgammadtau_b)/g3; } { /* third term */ double g4= pow(gamma,4); hab1 += (d_ab3*xgamma*xgamma+d_ab4*xgamma*zgamma)/gamma; dhabdd1[0] += (-3*dgammadd_a*(d_ab3*pow(x_ab,2.)+d_ab4*x_ab*z_ab)+dxdd_a*gamma*(2*d_ab3*x_ab+d_ab4*z_ab)+d_ab4*x_ab*dzdd_a*gamma)/g4; dhabdd1[1] += (-3*dgammadd_b*(d_ab3*pow(x_ab,2.)+d_ab4*x_ab*z_ab)+dxdd_b*gamma*(2*d_ab3*x_ab+d_ab4*z_ab)+d_ab4*x_ab*dzdd_b*gamma)/g4; dhabdgd1[0] += (-3*x_ab*(d_ab3*x_ab+d_ab4*z_ab)*dgammadgd_a+gamma*(2*d_ab3*x_ab+d_ab4*z_ab)*dxdgd_a)/g4; dhabdgd1[1] += 0.0; dhabdgd1[2] += (-3*x_ab*(d_ab3*x_ab+d_ab4*z_ab)*dgammadgd_b+gamma*(2*d_ab3*x_ab+d_ab4*z_ab)*dxdgd_b)/g4; dhabdtau1[0] += (d_ab4*x_ab*dzdtau_a*gamma-3*x_ab*(d_ab3*x_ab+d_ab4*z_ab)*dgammadtau_a)/g4; dhabdtau1[1] += (d_ab4*x_ab*dzdtau_b*gamma-3*x_ab*(d_ab3*x_ab+d_ab4*z_ab)*dgammadtau_b)/g4; } *h_ab = hab1; //derivatives dh_abdd[0] = dhabdd1[0]; dh_abdd[1] = dhabdd1[1]; dh_abdgd[0] = dhabdgd1[0]; dh_abdgd[1] = dhabdgd1[1]; dh_abdgd[2] = dhabdgd1[2]; dh_abdtau[0] = dhabdtau1[0]; dh_abdtau[1] = dhabdtau1[1]; } /* Get h_ss for Eq. (14)*/ static void c_m06l_hss(double x, double z, double rho, double tau, double *h_ss, double *dh_ssdd, double *dh_ssdgd, double *dh_ssdtau) { /* define the d_ab,i for Eq. (12)*/ static double d_ss0= 0.4650534, d_ss1= 0.1617589, d_ss2= 0.1833657, d_ss3= 0.0004692100, d_ss4= -0.004990573; double alpha_ss = 0.00515088; double hss1, dhssdd1, dhssdgd1, dhssdtau1; double gamma, xgamma, zgamma; double dgammadx, dgammadz; double dgammadd, dgammadgd, dgammadtau; double dxdd, dxdgd, dzdd, dzdtau; gamma = 1 + alpha_ss*(x + z); { /* derivatives of gamma with respect to x and z*/ dgammadx = alpha_ss; dgammadz = alpha_ss; } c_m06l_zx(x, z, rho, tau, &dxdd, &dxdgd, &dzdd, &dzdtau); { /* derivatives of gamma with respect to density, gradient and kinetic energy */ dgammadd = dgammadx * dxdd + dgammadz * dzdd; dgammadgd = dgammadx * dxdgd; dgammadtau = dgammadz * dzdtau; } xgamma = x/gamma; zgamma = z/gamma; /* we initialize h and collect the terms*/ hss1 = 0.0; dhssdd1 = 0.0; dhssdgd1 = 0.0; dhssdtau1 = 0.0; { /* first term */ double g2=pow(gamma,2.); hss1 += d_ss0/gamma; dhssdd1 += -d_ss0*dgammadd/g2; dhssdgd1 += -d_ss0*dgammadgd/g2; dhssdtau1 += -d_ss0*dgammadtau/g2 ; } { /* second term */ double g3=pow(gamma,3.); hss1 += (d_ss1*xgamma + d_ss2*zgamma)/gamma; dhssdd1 += (gamma*(d_ss1*dxdd+d_ss2*dzdd)-2*dgammadd*(d_ss1*x+d_ss2*z))/g3; dhssdgd1 += (d_ss1*dxdgd*gamma -2*(d_ss1*x+d_ss2*z)*dgammadgd)/g3; dhssdtau1 += (d_ss2*dzdtau*gamma -2*(d_ss1*x+d_ss2*z)*dgammadtau)/g3; } { /* third term */ double g4= pow(gamma,4); hss1 += (d_ss3*xgamma*xgamma+d_ss4*xgamma*zgamma)/gamma; dhssdd1 += (-3*dgammadd*(d_ss3*pow(x,2.)+d_ss4*x*z)+dxdd*gamma*(2*d_ss3*x+d_ss4*z)+d_ss4*x*dzdd*gamma)/g4; dhssdgd1 += (-3*x*(d_ss3*x+d_ss4*z)*dgammadgd+gamma*(2*d_ss3*x+d_ss4*z)*dxdgd)/g4; dhssdtau1 += (d_ss4*x*dzdtau*gamma-3*x*(d_ss3*x+d_ss4*z)*dgammadtau)/g4; } *h_ss = hss1; //derivatives *dh_ssdd = dhssdd1; *dh_ssdgd = dhssdgd1; *dh_ssdtau = dhssdtau1; } static void c_m06l_para(m06l_params *p, const double *rho, const double *sigmatmp, const double *tautmp, double *energy, double *dedd, double *vsigma, double *dedtau) { double rho2[2], rho2s[2], x[2], z[2], zc_ss[2]; double tau2[2], tauw[2], dens, dens1, sigma[3]; double g_ss[2], h_ss[2], Ec_ss[2], D_ss[2]; double g_ab=0.0, h_ab=0.0, Ec_ab=0.0; double exunif_ss[2], vxunif_up[2], vxunif_dn[2], vxunif_ss[2]; double exunif =0.0, exunif_ab=0.0, vxunif[2]; //derivatives double dh_ssdd[2], dh_ssdgd[3], dh_ssdtau[2]; double dg_ssdd[2], dg_ssdgd[3] ; double dh_abdd[2], dh_abdgd[3], dh_abdtau[2]; double dg_abdd[2], dg_abdgd[3]; double dEc_ssdd[2], dEc_ssdgd[3], dEc_ssdtau[2]; double dEc_abdd[2], dEc_abdgd[3], dEc_abdtau[2]; double dD_ssdd[2], dD_ssdgd[3], dD_ssdtau[2], dD_ssdx[2], dD_ssdz[2]; double dxdd[2], dxdgd[2], dzdd[2], dzdtau[2]; const double Cfermi= (3./5.)*pow(6*M_PI*M_PI,2./3.); /* put in by cpo for const reasons */ double sigma_[3],tau[2]; sigma_[0] = sigmatmp[0]; tau[0] = tautmp[0]; /*calculate |nabla rho|^2 */ sigma_[0] = max(MIN_GRAD*MIN_GRAD, sigma_[0]); tauw[0] = max(sigma_[0]/(8.0*rho[0]), 1.0e-12); tau[0] = max(tauw[0], tau[0]); if(p->common.nspin== XC_UNPOLARIZED) { tau[1] = 0.0; rho2[0] = rho[0]/2.; rho2[1] = rho[0]/2.; sigma[0] = sigma_[0]/4.; sigma[1] = sigma_[0]/4.; sigma[2] = sigma_[0]/4.; dens = rho[0]; dens1 = dens; tau2[0] = tau[0]/2.; tau2[1] = tau[0]/2.; }else{ dens1 = rho[0]+rho[1]; tau[1] = tautmp[1]; sigma_[1] = sigmatmp[1]; sigma_[2] = sigmatmp[2]; sigma_[2] = max(MIN_GRAD*MIN_GRAD, sigma_[2]); tauw[1] = max(sigma_[2]/(8.0*rho[1]), 1.0e-12); tau[1] = max(tauw[1], tau[1]); rho2[0]=rho[0]; rho2[1]=rho[1]; sigma[0] = sigma_[0]; sigma[1] = sigma_[1]; sigma[2] = sigma_[2]; dens = rho[0]+rho[1]; tau2[0] =tau[0]; tau2[1] =tau[1]; } //get the e_LDA(rho_a,b) const int np = 1; XC(lda_exc_vxc)(p->c_aux, np, rho2, &exunif, vxunif); exunif = exunif*dens; /*==============get the E_sigma part================*/ /*============ spin up =============*/ rho2s[0]=rho2[0]; rho2s[1]=0.; //get the e_LDA(rho_up,0) XC(lda_exc_vxc)(p->c_aux, np, rho2s, &(exunif_ss[0]), vxunif_up); exunif_ss[0] = exunif_ss[0] * rho2s[0]; vxunif_ss[0] = vxunif_up[0]; /*define variables for rho_up and zc in order to avoid x/0 -> D_ss = -inf */ x[0] = sigma[0]/(pow(rho2s[0], 8./3.)); z[0] = 2*tau2[0]/pow(rho2s[0],5./3.) - Cfermi; zc_ss[0] = 2*tau2[0]/pow(rho2s[0],5./3.); /*D_ss = 1 -x/4*(z + Cf), z+Cf = 2*tau2/pow(rho2s[0],5./3.) = zc */ D_ss[0] = 1 - x[0]/(4. * zc_ss[0]); //derivatives for D_up dD_ssdx[0] = -1/(4 * zc_ss[0]); dD_ssdz[0] = 4 * x[0]/pow(4.*zc_ss[0],2.); c_m06l_zx(x[0], z[0], rho2s[0], tau2[0], &(dxdd[0]), &(dxdgd[0]), &(dzdd[0]), &(dzdtau[0])); dD_ssdd[0] = dD_ssdx[0] * dxdd[0] + dD_ssdz[0] * dzdd[0]; dD_ssdgd[0] = dD_ssdx[0] * dxdgd[0]; dD_ssdtau[0] = dD_ssdz[0] * dzdtau[0]; /*build up Eq. (14): Ec_sigmasigma*/ c_m06_15(x[0], rho2s[0], &(g_ss[0]), &(dg_ssdd[0]), &(dg_ssdgd[0])); c_m06l_hss(x[0], z[0], rho2s[0], tau2[0], &(h_ss[0]), &(dh_ssdd[0]), &(dh_ssdgd[0]), &(dh_ssdtau[0])); Ec_ss[0] = (exunif_ss[0] * (g_ss[0]+h_ss[0]) * D_ss[0]); //printf("Ec_up %.9e\n", Ec_ss[0]); /*============== spin down =============*/ rho2s[0]=rho2[1]; rho2s[1]=0.; //get the e_LDA(0,rho_dn) XC(lda_exc_vxc)(p->c_aux, np, rho2s, &(exunif_ss[1]), vxunif_dn); exunif_ss[1] = exunif_ss[1] * rho2s[0]; vxunif_ss[1] = vxunif_dn[0]; /*define variables for rho_beta*/ x[1] = sigma[2]/(pow(rho2s[0], 8./3.)); z[1] = 2*tau2[1]/pow(rho2s[0],5./3.) - Cfermi; zc_ss[1] = 2*tau2[1]/pow(rho2s[0],5./3.); //printf("x1 %.9e, zc_ss%.9e\n", x[1], zc_ss[1]); D_ss[1] = 1 - x[1]/(4.*zc_ss[1]); //derivatives for D_dn dD_ssdx[1] = - 1/(4*zc_ss[1]); dD_ssdz[1] = 4*x[1]/pow(4.*zc_ss[1],2.); c_m06l_zx(x[1], z[1], rho2s[0], tau2[1], &(dxdd[1]), &(dxdgd[1]), &(dzdd[1]), &(dzdtau[1])); dD_ssdd[1] = dD_ssdx[1] * dxdd[1] + dD_ssdz[1] * dzdd[1]; dD_ssdgd[2] = dD_ssdx[1] * dxdgd[1]; dD_ssdtau[1] = dD_ssdz[1] * dzdtau[1]; c_m06_15(x[1], rho2s[0], &(g_ss[1]), &(dg_ssdd[1]), &(dg_ssdgd[2])); c_m06l_hss(x[1], z[1], rho2s[0], tau2[1], &(h_ss[1]), &(dh_ssdd[1]), &(dh_ssdgd[2]), &(dh_ssdtau[1])); //printf("exunif_ss %.9e, (g_ss[1]+h_ss[1])%.9e, D_ss %.9e\n", exunif_ss[1],(g_ss[1]+h_ss[1]),D_ss[1]); Ec_ss[1] = (exunif_ss[1] * (g_ss[1]+h_ss[1]) * D_ss[1]); //printf("Ec_dn %.9e\n", Ec_ss[1]); // Derivatives for Ec_up and Ec_dn with respect to density and kinetic energy int i; for(i=0; i<2; i++){ dEc_ssdd[i] = exunif_ss[i] * dh_ssdd[i] * D_ss[i] + vxunif_ss[i] * h_ss[i] * D_ss[i] + exunif_ss[i] * h_ss[i] * dD_ssdd[i] + exunif_ss[i] * dg_ssdd[i] * D_ss[i] + vxunif_ss[i] * g_ss[i] * D_ss[i] + exunif_ss[i] * g_ss[i] * dD_ssdd[i]; dEc_ssdtau[i] = exunif_ss[i] * dh_ssdtau[i] * D_ss[i] + exunif_ss[i] * h_ss[i] * dD_ssdtau[i] + exunif_ss[i] * g_ss[i] * dD_ssdtau[i]; } // Derivatives for Ec_up and Ec_dn with respect to gradient dEc_ssdgd[0] = exunif_ss[0] * dh_ssdgd[0] * D_ss[0] + exunif_ss[0] * h_ss[0] * dD_ssdgd[0] + exunif_ss[0] * dg_ssdgd[0] * D_ss[0] + exunif_ss[0] * g_ss[0] * dD_ssdgd[0]; dEc_ssdgd[2] = exunif_ss[1] * dh_ssdgd[2] * D_ss[1] + exunif_ss[1] * h_ss[1] * dD_ssdgd[2] + exunif_ss[1] * dg_ssdgd[2] * D_ss[1] + exunif_ss[1] * g_ss[1] * dD_ssdgd[2]; /*==============get the E_ab part========================*/ exunif_ab = exunif - exunif_ss[0] - exunif_ss[1]; //x_ab = sigmatot[0] /(pow(rho2[0], 8./3.)) + sigmatot[2] /(pow(rho2[1], 8./3.)); //z_ab = 2*tau2[0]/pow(rho2[0],5./3.) + 2*tau2[1]/pow(rho2[1],5./3.) - 2*Cfermi; /*build up Eq. (12): Ec_alphabeta*/ c_m06_13(x, rho2, &g_ab, dg_abdd, dg_abdgd); c_m06l_hab(x, z, rho2, tau2, &h_ab, dh_abdd, dh_abdgd, dh_abdtau); Ec_ab = exunif_ab * (g_ab+h_ab); // Derivatives for Ec_ab with respect to density and kinetic energy for(i=0; i<2; i++){ dEc_abdd[i] = exunif_ab * (dh_abdd[i]+ dg_abdd[i]) + (vxunif[i]- vxunif_ss[i]) * (g_ab+h_ab); dEc_abdtau[i] = exunif_ab * dh_abdtau[i]; } // Derivatives for Ec_ab with respect to gradient for(i=0; i<3; i++){ dEc_abdgd[i] = exunif_ab * (dh_abdgd[i] + dg_abdgd[i]); } /*==============get the total energy E_c= E_up + E_dn + E_ab========================*/ /*==============================and derivatives=====================================*/ *energy = (Ec_ss[0] + Ec_ss[1] + Ec_ab)/dens1; //printf("Ec_ss %.9e, Ec_ss %.9e, Ec_ab %.9e\n", Ec_ss[0], Ec_ss[1], Ec_ab); //derivative for the total correlation energy if(p->common.nspin== XC_UNPOLARIZED) { dedd[0]=dEc_ssdd[0] + dEc_abdd[0]; dedd[1]=0.0; vsigma[0]= (dEc_ssdgd[0] + dEc_abdgd[0])/2.; vsigma[1]= 0.0; vsigma[2]= 0.0; dedtau[0]= dEc_ssdtau[0] + dEc_abdtau[0]; dedtau[1]= 0.0; }else{ dedd[0]=dEc_ssdd[0] + dEc_abdd[0]; dedd[1]=dEc_ssdd[1] + dEc_abdd[1]; vsigma[0]= dEc_ssdgd[0] + dEc_abdgd[0]; vsigma[1]= 0.0; vsigma[2]= dEc_ssdgd[2] + dEc_abdgd[2]; dedtau[0]= dEc_ssdtau[0] + dEc_abdtau[0]; dedtau[1]= dEc_ssdtau[1] + dEc_abdtau[1]; } } static void XC(mgga_c_m06l)(void *p, const double *rho, const double *sigma, const double *tau, double *e, double *dedd, double *vsigma, double *dedtau) { c_m06l_para(p, rho, sigma, tau, e, dedd, vsigma, dedtau); } /* derivatives of x and z with respect to rho, grho and tau: Eq.(1) and Eq.(3)*/ static void x_m06l_zx(double x, double z, double rho, double tau, double *dxdd, double *dxdgd, double *dzdd, double *dzdtau) { *dxdd = -8./3. * x * 1/rho; *dxdgd = 1./pow(rho,8./3.); *dzdd = -5./3. * 2* tau/pow(rho, 8./3.); *dzdtau = 2./pow(rho, 5./3.); } /* Build gamma and its derivatives with respect to rho, grho and tau: Eq. (4)*/ static void x_m06l_gamma(double x, double z, double rho, double tau, double *gamma, double *dgammadd, double *dgammadgd, double *dgammadtau) { static double alpha = 0.00186726; /*set alpha of Eq. (4)*/ double dgammadx, dgammadz; double dxdd, dxdgd, dzdd, dzdtau; *gamma = 1 + alpha*(x + z); /*printf("gamma %19.12f\n", *gamma);*/ { /* derivatives */ dgammadx = alpha; dgammadz = alpha; } x_m06l_zx(x, z, rho, tau, &dxdd, &dxdgd, &dzdd, &dzdtau); { *dgammadd = dgammadx*dxdd + dgammadz*dzdd; *dgammadgd = dgammadx*dxdgd; *dgammadtau = dgammadz*dzdtau; } } /************************************************************************ Implements Zhao, Truhlar Meta-gga M06-Local Correlation part ************************************************************************/ /* calculate h and h derivatives with respect to rho, grho and tau: Equation (5) */ static void x_m06l_h(double x, double z, double rho, double tau, double *h, double *dhdd, double *dhdgd, double *dhdtau) { /* parameters for h(x_sigma,z_sigma) of Eq. (5)*/ static double d0=0.6012244, d1=0.004748822, d2=-0.008635108, d3=-0.000009308062, d4=0.00004482811; double h1, dhdd1, dhdgd1, dhdtau1; double gamma, dgammadd, dgammadgd, dgammadtau; double xgamma, zgamma; double dxdd, dxdgd, dzdd, dzdtau; x_m06l_gamma(x, z, rho, tau, &gamma, &dgammadd, &dgammadgd, &dgammadtau); xgamma = x/gamma; zgamma = z/gamma; /* we initialize h and its derivatives and collect the terms*/ h1 = 0.0; dhdd1 = 0.0; dhdgd1 = 0.0; dhdtau1 = 0.0; { /* first term */ double g2=pow(gamma,2.); h1 += d0/gamma; dhdd1 += -d0*dgammadd/g2; dhdgd1 += -d0*dgammadgd/g2; dhdtau1 += -d0*dgammadtau/g2 ; } x_m06l_zx(x, z, rho, tau, &dxdd, &dxdgd, &dzdd, &dzdtau); { /* second term */ double g3=pow(gamma,3.); h1 += (d1*xgamma + d2*zgamma)/gamma; dhdd1 += (gamma*(d1*dxdd+d2*dzdd)-2*dgammadd*(d1*x+d2*z))/g3; dhdgd1 += (d1*dxdgd*gamma -2*(d1*x+d2*z)*dgammadgd)/g3; dhdtau1 += (d2*dzdtau*gamma -2*(d1*x+d2*z)*dgammadtau)/g3; } { /* third term */ double g4= pow(gamma,4); h1 += (d3*xgamma*xgamma+d4*xgamma*zgamma)/gamma; dhdd1 += (-3*dgammadd*(d3*pow(x,2.)+d4*x*z)+dxdd*gamma*(2*d3*x+d4*z)+d4*x*dzdd*gamma)/g4; dhdgd1 += (-3*x*(d3*x+d4*z)*dgammadgd+gamma*(2*d3*x+d4*z)*dxdgd)/g4; dhdtau1 += (d4*x*dzdtau*gamma-3*x*(d3*x+d4*z)*dgammadtau)/g4; } *h = h1; /*printf(" h %19.12f\n", *h);*/ *dhdd = dhdd1; *dhdgd =dhdgd1; *dhdtau = dhdtau1; } /* f(w) and its derivatives with respect to rho and tau*/ static void x_m06l_fw(double rho, double tau, double *fw, double *dfwdd, double *dfwdtau) { /*define the parameters for fw of Eq. (8) as in the reference paper*/ static double a0= 0.3987756, a1= 0.2548219, a2= 0.3923994, a3= -2.103655, a4= -6.302147, a5= 10.97615, a6= 30.97273, a7=-23.18489, a8=-56.73480, a9=21.60364, a10= 34.21814, a11= -9.049762; double tau_lsda, t, w; double dtdd, dtdtau; double dfwdw, dwdt, dtau_lsdadd; double aux = (3./10.) * pow((6*M_PI*M_PI),2./3.); /*3->6 for nspin=2 */ tau_lsda = aux * pow(rho,5./3.); t = tau_lsda/tau; dtdtau = -t/tau; w = (t - 1)/(t + 1); *fw = a0*pow(w,0.)+a1*pow(w,1.)+a2*pow(w,2.)+a3*pow(w,3.)+a4*pow(w,4.)+ + a5*pow(w,5.)+a6*pow(w,6.)+a7*pow(w,7.)+a8*pow(w,8.)+a9*pow(w,9.)+a10*pow(w,10.)+a11*pow(w,11.); dfwdw = 0.0*a0*pow(w,-1)+1.0*a1*pow(w,0.)+2.0*a2*pow(w,1.)+3.0*a3*pow(w,2.)+4.0*a4*pow(w,3.)+ + 5.0*a5*pow(w,4.)+6.0*a6*pow(w,5.)+7.0*a7*pow(w,6.)+8.0*a8*pow(w,7.)+9.0*a9*pow(w,8.)+ + 10*a10*pow(w,9.)+11*a11*pow(w,10.); dwdt = 2/pow((t + 1),2.); dtau_lsdadd = aux * 5./3.* pow(rho,2./3.); dtdd = dtau_lsdadd/tau; *dfwdd = dfwdw * dwdt * dtdd; *dfwdtau = dfwdw * dwdt * dtdtau; } static void x_m06l_para(m06l_params *pt, double rho, double sigma, double tau, double *energy, double *dedd, double *vsigma, double *dedtau) { /*Build Eq. (6) collecting the terms Fx_PBE, fw, e_lsda and h*/ double grad, tauw, tau2, x, z; double rho2[2],sigmatot[3]; double F_PBE, de_PBEdd[2], de_PBEdgd[3]; double h, dhdd, dhdgd, dhdtau; double fw, dfwdd, dfwdtau; double epsx_lsda, depsx_lsdadd; const double Cfermi = (3./5.) * pow(6*M_PI*M_PI,2./3.); /* calculate |nabla rho|^2 */ grad = sigma; grad = max(MIN_GRAD*MIN_GRAD, grad); tauw = max(grad/(8.0*rho),1.0e-12); /* tau^W = |nabla rho|^2/ 8rho */ tau = max(tau, tauw); rho2[0]=rho/2.; rho2[1]=0.0; sigmatot[0] = grad/4.; sigmatot[1] = 0.0; sigmatot[2] = 0.0; tau2 =tau/2.; /* get the uniform gas energy and potential a MINUS was missing in the paper*/ epsx_lsda = -(3./2.)*pow(3./(4*M_PI),1./3.)*pow(rho2[0],4./3.); depsx_lsdadd = -2*pow(3./(4*M_PI),1./3.)*pow(rho2[0],1./3.); /*get Fx for PBE*/ const int np = 1; XC(gga_exc_vxc)(pt->x_aux, np, rho2, sigmatot, &F_PBE, de_PBEdd, de_PBEdgd); /* define x and z from Eq. (1) and Eq. (3) NOTE: we build directly x^2 */ x = grad/(4*pow(rho2[0], 8./3.)); z = 2*tau2/pow(rho2[0],5./3.) - Cfermi; /*THERE IS A 2 IN FRONT AS IN THEOR. CHEM. ACCOUNT 120 215 (2008)*/ /*get h and fw*/ x_m06l_h(x, z, rho2[0], tau2, &h, &dhdd, &dhdgd, &dhdtau); x_m06l_fw(rho2[0], tau2, &fw, &dfwdd, &dfwdtau); { /* Eq. (6) E_x = Int F_PBE*fw + exunif*h, the factor 2 accounts for spin. */ *energy = 2*(F_PBE*rho2[0] *fw + epsx_lsda *h); *dedd = (de_PBEdd[0] *fw + F_PBE*rho2[0] * dfwdd+ depsx_lsdadd *h + epsx_lsda * dhdd); *dedtau = (F_PBE * dfwdtau *rho2[0] + epsx_lsda * dhdtau); *vsigma = (de_PBEdgd[0] *fw + epsx_lsda*dhdgd)/2.; } } void XC(mgga_x_m06l)(void *p, const double *rho, const double *sigma, const double *tau, double *e, double *dedd, double *vsigma, double *dedtau) { m06l_params *par = (m06l_params*)p; if(par->common.nspin == XC_UNPOLARIZED){ double en; x_m06l_para(p, rho[0], sigma[0], tau[0], &en, dedd, vsigma, dedtau); *e = en/(rho[0]+rho[1]); }else{ *e = 0.0; double e2na, e2nb, rhoa[2], rhob[2]; double vsigmapart[3]; rhoa[0]=2*rho[0]; rhoa[1]=0.0; rhob[0]=2*rho[1]; rhob[1]=0.0; x_m06l_para(p, rhoa[0], 4*sigma[0], 2.0*tau[0], &e2na, &(dedd[0]), &(vsigmapart[0]), &(dedtau[0])); x_m06l_para(p, rhob[0], 4*sigma[2], 2.0*tau[1], &e2nb, &(dedd[1]), &(vsigmapart[2]), &(dedtau[1])); *e = (e2na + e2nb )/(2.*(rho[0]+rho[1])); vsigma[0] = 2*vsigmapart[0]; vsigma[2] = 2*vsigmapart[2]; } } static void m06l_init(void *p) { m06l_params *par = (m06l_params*)p; par->c_aux = (XC(func_type) *) malloc(sizeof(XC(func_type))); XC(func_init)(par->c_aux, XC_LDA_C_PW, XC_POLARIZED); par->x_aux = (XC(func_type) *) malloc(sizeof(XC(func_type))); XC(func_init)(par->x_aux, XC_GGA_X_PBE, XC_POLARIZED); } static void m06l_end(void *p) { m06l_params *par = (m06l_params*)p; XC(func_end)(par->c_aux); free(par->c_aux); XC(func_end)(par->x_aux); free(par->x_aux); } const mgga_func_info m06l_info = { sizeof(m06l_params), &m06l_init, &m06l_end, &XC(mgga_x_m06l), &XC(mgga_c_m06l), }; gpaw-24.1.0/c/xc/pbe.c000066400000000000000000000116421454550013000143030ustar00rootroot00000000000000/* Copyright (C) 2003-2007 CAMP * Please see the accompanying LICENSE file for further information. */ #include #include "xc_gpaw.h" double pbe_exchange(const xc_parameters* par, double n, double rs, double a2, double* dedrs, double* deda2) { double e = C1 / rs; *dedrs = -e / rs; if (par->gga) { double c = C2 * rs / n; c *= c; double s2 = a2 * c; double x = 1.0 + MU * s2 / par->kappa; double Fx = 1.0 + par->kappa - par->kappa / x; double dFxds2 = MU / (x * x); double ds2drs = 8.0 * c * a2 / rs; //double ds2drs = 8.0 * s2 / rs; *dedrs = *dedrs * Fx + e * dFxds2 * ds2drs; *deda2 = e * dFxds2 * c; e *= Fx; } return e; } /* inline */ double G(double rtrs, double A, double alpha1, double beta1, double beta2, double beta3, double beta4, double* dGdrs) { double Q0 = -2.0 * A * (1.0 + alpha1 * rtrs * rtrs); double Q1 = 2.0 * A * rtrs * (beta1 + rtrs * (beta2 + rtrs * (beta3 + rtrs * beta4))); double G1 = Q0 * log(1.0 + 1.0 / Q1); double dQ1drs = A * (beta1 / rtrs + 2.0 * beta2 + rtrs * (3.0 * beta3 + 4.0 * beta4 * rtrs)); *dGdrs = -2.0 * A * alpha1 * G1 / Q0 - Q0 * dQ1drs / (Q1 * (Q1 + 1.0)); return G1; } /* * In[1]:= H=g Log[1+b/g t^2(1+a t^2)/(1+a t^2 + a^2 t^4)] * * 2 2 * b t (1 + a t ) * Out[1]= g Log[1 + --------------------] * 2 2 4 * g (1 + a t + a t ) * * In[4]:= Simplify[D[H,t]] * * 2 * 2 b g t (1 + 2 a t ) * Out[4]= --------------------------------------------------------- * 2 2 4 2 2 4 2 4 * (1 + a t + a t ) (g + b t + a g t + a b t + a g t ) * */ double pbe_correlation(double n, double rs, double zeta, double a2, bool gga, bool spinpol, double* dedrs, double* dedzeta, double* deda2) { double rtrs = sqrt(rs); double de0drs; double e0 = G(rtrs, GAMMA, 0.21370, 7.5957, 3.5876, 1.6382, 0.49294, &de0drs); double e; double xp = 117.0; double xm = 117.0; if (spinpol) { double de1drs; double e1 = G(rtrs, 0.015545, 0.20548, 14.1189, 6.1977, 3.3662, 0.62517, &de1drs); double dalphadrs; double alpha = -G(rtrs, 0.016887, 0.11125, 10.357, 3.6231, 0.88026, 0.49671, &dalphadrs); dalphadrs = -dalphadrs; double zp = 1.0 + zeta; double zm = 1.0 - zeta; xp = pow(zp, THIRD); xm = pow(zm, THIRD); double f = CC1 * (zp * xp + zm * xm - 2.0); double f1 = CC2 * (xp - xm); double zeta2 = zeta * zeta; double zeta3 = zeta2 * zeta; double zeta4 = zeta2 * zeta2; double x = 1.0 - zeta4; *dedrs = (de0drs * (1.0 - f * zeta4) + de1drs * f * zeta4 + dalphadrs * f * x * IF2); *dedzeta = (4.0 * zeta3 * f * (e1 - e0 - alpha * IF2) + f1 * (zeta4 * e1 - zeta4 * e0 + x * alpha * IF2)); e = e0 + alpha * IF2 * f * x + (e1 - e0) * f * zeta4; } else { *dedrs = de0drs; e = e0; } if (gga) { double n2 = n * n; double t2; double y; double phi = 117.0; double phi2 = 117.0; double phi3 = 117.0; if (spinpol) { phi = 0.5 * (xp * xp + xm * xm); phi2 = phi * phi; phi3 = phi * phi2; t2 = C3 * a2 * rs / (n2 * phi2); y = -e / (GAMMA * phi3); } else { t2 = C3 * a2 * rs / n2; y = -e / GAMMA; } double x = exp(y); double A; if (x != 1.0) A = BETA / (GAMMA * (x - 1.0)); else A = BETA / (GAMMA * y); double At2 = A * t2; double nom = 1.0 + At2; double denom = nom + At2 * At2; double H = GAMMA * log( 1.0 + BETA * t2 * nom / (denom * GAMMA)); double tmp = (GAMMA * BETA / (denom * (BETA * t2 * nom + GAMMA * denom))); double tmp2 = A * A * x / BETA; double dAdrs = tmp2 * *dedrs; if (spinpol) { H *= phi3; tmp *= phi3; dAdrs /= phi3; } double dHdt2 = (1.0 + 2.0 * At2) * tmp; double dHdA = -At2 * t2 * t2 * (2.0 + At2) * tmp; *dedrs += dHdt2 * 7 * t2 / rs + dHdA * dAdrs; *deda2 = dHdt2 * C3 * rs / n2; if (spinpol) { double dphidzeta = (1.0 / xp - 1.0 / xm) / 3.0; double dAdzeta = tmp2 * (*dedzeta - 3.0 * e * dphidzeta / phi) / phi3; *dedzeta += ((3.0 * H / phi - dHdt2 * 2.0 * t2 / phi ) * dphidzeta + dHdA * dAdzeta); *deda2 /= phi2; } e += H; } return e; } gpaw-24.1.0/c/xc/pw91.c000066400000000000000000000122451454550013000143350ustar00rootroot00000000000000/* Copyright (C) 2003-2007 CAMP * Please see the accompanying LICENSE file for further information. */ #include #include "xc_gpaw.h" double G(double rtrs, double A, double alpha1, double beta1, double beta2, double beta3, double beta4, double* dGdrs); double pw91_exchange(const xc_parameters* par, double n, double rs, double a2, double* dedrs, double* deda2) { double e = C1 / rs; *dedrs = -e / rs; if (par->gga) { double c = C2 * rs / n; c *= c; double s2 = a2 * c; double s = sqrt(s2); double f1 = 7.7956 * s; double f2 = 0.19645 * asinh(f1); double f3 = 0.1508 * exp(-100.0 * s2); double f4 = 0.004 * s2 * s2; double f5 = 1.0 + s * f2; double f6 = f5 + f4; double f7 = 0.2743 - f3; double f8 = f5 + f7 * s2; double Fx = f8 / f6; double f9 = 0.5 * 7.7956 * 0.19645 / sqrt(1.0 + f1 * f1); if (s < 0.00001) f9 += 0.5 * 7.7956 * 0.19645; else f9 += 0.5 * f2 / s; double dFxds2 = ((f9 + f7 + 100.0 * f3 * s2) * f6 - f8 * (f9 + 0.008 * s2)) / (f6 * f6); double ds2drs = 8.0 * s2 / rs; *dedrs = *dedrs * Fx + e * dFxds2 * ds2drs; *deda2 = e * dFxds2 * c; e *= Fx; } return e; } double pw91_correlation(double n, double rs, double zeta, double a2, bool gga, bool spinpol, double* dedrs, double* dedzeta, double* deda2) { double rtrs = sqrt(rs); double de0drs; double e0 = G(rtrs, GAMMA, 0.21370, 7.5957, 3.5876, 1.6382, 0.49294, &de0drs); double e; double xp = 117.0; double xm = 117.0; if (spinpol) { double de1drs; double e1 = G(rtrs, 0.015545, 0.20548, 14.1189, 6.1977, 3.3662, 0.62517, &de1drs); double dalphadrs; double alpha = -G(rtrs, 0.016887, 0.11125, 10.357, 3.6231, 0.88026, 0.49671, &dalphadrs); dalphadrs = -dalphadrs; double zp = 1.0 + zeta; double zm = 1.0 - zeta; xp = pow(zp, THIRD); xm = pow(zm, THIRD); double f = CC1 * (zp * xp + zm * xm - 2.0); double f1 = CC2 * (xp - xm); double zeta2 = zeta * zeta; double zeta3 = zeta2 * zeta; double zeta4 = zeta2 * zeta2; double x = 1.0 - zeta4; *dedrs = (de0drs * (1.0 - f * zeta4) + de1drs * f * zeta4 + dalphadrs * f * x * IF2); *dedzeta = (4.0 * zeta3 * f * (e1 - e0 - alpha * IF2) + f1 * (zeta4 * e1 - zeta4 * e0 + x * alpha * IF2)); e = e0 + alpha * IF2 * f * x + (e1 - e0) * f * zeta4; } else { *dedrs = de0drs; e = e0; } if (gga) { double n2 = n * n; double t2; double y; double phi; double phi2; double phi3; double phi4; double GAMMAPW91 = BETA * BETA / 0.18; if (spinpol) { phi = 0.5 * (xp * xp + xm * xm); phi2 = phi * phi; phi3 = phi * phi2; phi4 = phi * phi3; } else { phi = 1.0; phi2 = 1.0; phi3 = 1.0; phi4 = 1.0; } t2 = C3 * a2 * rs / (n2 * phi2); y = -e / (GAMMAPW91 * phi3); double x = exp(y); double A = BETA / (GAMMAPW91 * (x - 1.0)); double At2 = A * t2; double nom = 1.0 + At2; double denom = nom + At2 * At2; double H0 = (phi3 * GAMMAPW91 * log(1.0 + BETA * t2 * nom / (denom * GAMMAPW91))); double tmp = (phi3 * GAMMAPW91 * BETA / (denom * (BETA * t2 * nom + GAMMAPW91 * denom))); double tmp2 = A * A * x / BETA; double dAdrs = tmp2 * *dedrs / phi3; const double KK = 66.343643960645011; // 100*4/pi*(4/pi/9)**(1/3.) const double XNU = 15.75592; const double Cc0 = 0.004235; const double Cx = -0.001667212; const double K1 = 0.002568; const double K2 = 0.023266; const double K3 = 7.389e-6; const double K4 = 8.723; const double K5 = 0.472; const double K6 = 7.389e-2; double f0 = XNU * exp(-KK * rs * phi4 * t2); double rs2 = rs * rs; double f1 = K1 + K2 * rs + K3 * rs2; double f2 = 1.0 + K4 * rs + K5 * rs2 + K6 * rs2 * rs; double f3 = -10.0 * Cx / 7.0 - Cc0 + f1 / f2; double H1 = f0 * phi3 * f3 * t2; double dH1drs = (-KK * phi4 * t2 * H1 + f0 * phi3 * t2 * ((K2 + 2.0 * K3 * rs) * f2 - (K4 + 2.0 * K5 * rs + 3.0 * K6 * rs2) * f1) / (f2 * f2)); double dH1dt2 = -KK * rs * phi4 * H1 + f0 * phi3 * f3; double dH1dphi = (-4.0 * KK * rs * phi3 * H1 + 3.0 * f0 * phi2 * f3) * t2; double dH0dt2 = (1.0 + 2.0 * At2) * tmp; double dH0dA = -At2 * t2 * t2 * (2.0 + At2) * tmp; *dedrs += (dH0dt2 + dH1dt2) * 7 * t2 / rs + dH0dA * dAdrs + dH1drs; *deda2 = (dH0dt2 + dH1dt2) * C3 * rs / n2; if (spinpol) { double dphidzeta = (1.0 / xp - 1.0 / xm) / 3.0; double dAdzeta = tmp2 * (*dedzeta - 3.0 * e * dphidzeta / phi) / phi3; *dedzeta += ((3.0 * H0 / phi - dH0dt2 * 2.0 * t2 / phi ) * dphidzeta + dH0dA * dAdzeta); *dedzeta += (dH1dphi - dH1dt2 * 2.0 * t2 / phi ) * dphidzeta; *deda2 /= phi2; } e += H0 + H1; } return e; } gpaw-24.1.0/c/xc/revtpss.c000066400000000000000000000401721454550013000152430ustar00rootroot00000000000000 #include #include #include #include #include "xc_mgga.h" typedef struct revtpss_params { common_params common; // needs to be at the beginning of every functional_params XC(func_type) *x_aux; XC(func_type) c_aux1; XC(func_type) c_aux2; } revtpss_params; void gga_c_pbe_revtpss(XC(func_type) *p, const double *rho, const double *sigma, double *e, double *vrho, double *vsigma, double *v2rho2, double *v2rhosigma, double *v2sigma2); /************************************************************************ Implements John P. Perdew, Adrienn Ruzsinszky, Gabor I. Csonka, Lucian A. Constantin, and Jianwei Sun meta-Generalized Gradient Approximation. Correlation part ************************************************************************/ /* some parameters */ static double d = 2.8; /* Equation (14) */ static void c_revtpss_14(double csi, double zeta, double *C, double *dCdcsi, double *dCdzeta) { double fz, C0, dC0dz, dfzdz; double z2 = zeta*zeta; /* Equation (13) */ C0 = 0.59 + z2*(0.9269 + z2*(0.6225 + z2*2.1540)); dC0dz = zeta*(2.0*0.9269 + z2*(4.0*0.6225 + z2*6.0*2.1540)); /*OK*/ fz = 0.5*(pow(1.0 + zeta, -4.0/3.0) + pow(1.0 - zeta, -4.0/3.0)); dfzdz = 0.5*(-4.0/3.0)*(pow(1.0 + zeta, -7.0/3.0) - pow(1.0 - zeta, -7.0/3.0)); /*OK*/ { /* Equation (14) */ double csi2 = csi*csi; double a = 1.0 + csi2*fz, a4 = pow(a, 4); *C = C0 / a4; *dCdcsi = -8.0*C0*csi*fz/(a*a4); /*added C OK*/ *dCdzeta = (dC0dz*a - C0*4.0*csi2*dfzdz)/(a*a4); /*OK*/ } } /* Equation (12) */ static void c_revtpss_12(revtpss_params *p, const double *rho, const double *sigma, double dens, double zeta, double z, double *e_PKZB, double *de_PKZBdd, double *de_PKZBdsigma, double *de_PKZBdz) { /*some incoming variables: dens = rho[0] + rho[1] z = tau_w/tau zeta = (rho[0] - rho[1])/dens*/ double e_PBE, e_PBEup, e_PBEdn; double de_PBEdd[2], de_PBEdsigma[3], de_PBEddup[2], de_PBEdsigmaup[3], de_PBEdddn[2], de_PBEdsigmadn[3] ; double aux, zsq; double dzetadd[2], dcsidd[2], dcsidsigma[3]; double C, dCdcsi, dCdzeta; double densp[2], densp2[2], sigmatot[3], sigmaup[3], sigmadn[3]; int i; /*initialize dCdcsi and dCdzeta and the energy*/ dCdcsi = dCdzeta = 0.0; e_PBE = 0.0; e_PBEup = 0.0; e_PBEdn = 0.0; /* get the PBE stuff */ if(p->common.nspin== XC_UNPOLARIZED) { densp[0]=rho[0]/2.; densp[1]=rho[0]/2.; sigmatot[0] = sigma[0]/4.; sigmatot[1] = sigma[0]/4.; sigmatot[2] = sigma[0]/4.; }else{ densp[0] = rho[0]; densp[1] = rho[1]; sigmatot[0] = sigma[0]; sigmatot[1] = sigma[1]; sigmatot[2] = sigma[2]; } /* e_PBE */ XC(func_type) *aux2 = (p->common.nspin == XC_UNPOLARIZED) ? &p->c_aux2 : &p->c_aux1; gga_c_pbe_revtpss(aux2, densp, sigmatot, &e_PBE, de_PBEdd, de_PBEdsigma, NULL, NULL, NULL); densp2[0]=densp[0]; densp2[1]=0.0; if(p->common.nspin== XC_UNPOLARIZED) { sigmaup[0] = sigma[0]/4.; sigmaup[1] = 0.; sigmaup[2] = 0.; }else{ sigmaup[0] = sigma[0]; sigmaup[1] = 0.; sigmaup[2] = 0.; } /* e_PBE spin up */ gga_c_pbe_revtpss(aux2, densp2, sigmaup, &e_PBEup, de_PBEddup, de_PBEdsigmaup, NULL, NULL, NULL); densp2[0]=densp[1]; densp2[1]=0.0; if(p->common.nspin== XC_UNPOLARIZED) { sigmadn[0] = sigma[0]/4.; sigmadn[1] = 0.; sigmadn[2] = 0.; }else{ sigmadn[0] = sigma[2]; sigmadn[1] = 0.; sigmadn[2] = 0.; } /* e_PBE spin down */ gga_c_pbe_revtpss(aux2, densp2, sigmadn, &e_PBEdn, de_PBEdddn, de_PBEdsigmadn, NULL, NULL, NULL); /*get Eq. (13) and (14) for the polarized case*/ if(p->common.nspin == XC_UNPOLARIZED){ C = 0.59; dzetadd[0] = 0.0; dcsidd [0] = 0.0; dzetadd[1] = 0.0; dcsidd [1] = 0.0; for(i=0; i<3; i++) dcsidsigma[i] = 0.0; }else{ // initialize derivatives for(i=0; i<2; i++){ dzetadd[i] = 0.0; dcsidd [i] = 0.0;} for(i=0; i<3; i++) dcsidsigma[i] = 0.0; double num, gzeta, csi, a; /*numerator of csi: derive as grho all components and then square the 3 parts [2 (grho_a[0]n_b - grho_b[0]n_a) +2 (grho_a[1]n_b - grho_b[1]n_a) + 2 (grho_a[2]n_b - grho_b[2]n_a)]/(n_a+n_b)^2 -> 4 (sigma_aa n_b^2 - 2 sigma_ab n_a n_b + sigma_bb n_b^2)/(n_a+n_b)^2 */ num = sigma[0] * pow(rho[1],2) - 2.* sigma[1]*rho[0]*rho[1]+ sigma[2]*pow(rho[0],2); num = max(num, 1e-20); gzeta = sqrt(4*(num))/(dens*dens); gzeta = max(gzeta, MIN_GRAD); /*denominator of csi*/ a = 2*pow(3.0*M_PI*M_PI*dens, 1.0/3.0); csi = gzeta/a; c_revtpss_14(csi, zeta, &C, &dCdcsi, &dCdzeta); dzetadd[0] = (1.0 - zeta)/dens; /*OK*/ dzetadd[1] = -(1.0 + zeta)/dens; /*OK*/ dcsidd [0] = 0.5*csi*(-2*sigma[1]*rho[1]+2*sigma[2]*rho[0])/num - 7./3.*csi/dens; /*OK*/ dcsidd [1] = 0.5*csi*(-2*sigma[1]*rho[0]+2*sigma[0]*rho[1])/num - 7./3.*csi/dens; /*OK*/ dcsidsigma[0]= csi*pow(rho[1],2)/(2*num); /*OK*/ dcsidsigma[1]= -csi*rho[0]*rho[1]/num; /*OK*/ dcsidsigma[2]= csi*pow(rho[0],2)/(2*num); /*OK*/ } aux = (densp[0] * max(e_PBEup, e_PBE) + densp[1] * max(e_PBEdn, e_PBE)) / dens; double dauxdd[2], dauxdsigma[3]; if(e_PBEup > e_PBE) { //case densp[0] * e_PBEup dauxdd[0] = de_PBEddup[0]; dauxdd[1] = 0.0; dauxdsigma[0] = de_PBEdsigmaup[0]; dauxdsigma[1] = 0.0; dauxdsigma[2] = 0.0; }else{ //case densp[0] * e_PBE dauxdd[0] = densp[0] / dens * (de_PBEdd[0] - e_PBE) + e_PBE; dauxdd[1] = densp[0] / dens * (de_PBEdd[1] - e_PBE); dauxdsigma[0] = densp[0] / dens * de_PBEdsigma[0]; dauxdsigma[1] = densp[0] / dens * de_PBEdsigma[1]; dauxdsigma[2] = densp[0] / dens * de_PBEdsigma[2]; } if(e_PBEdn > e_PBE) {//case densp[1] * e_PBEdn dauxdd[0] += 0.0; dauxdd[1] += de_PBEdddn[0]; dauxdsigma[0] += 0.0; dauxdsigma[1] += 0.0; dauxdsigma[2] += de_PBEdsigmadn[0]; }else{//case densp[1] * e_PBE dauxdd[0] += densp[1] / dens * (de_PBEdd[0] - e_PBE); dauxdd[1] += densp[1] / dens * (de_PBEdd[1] - e_PBE) + e_PBE; dauxdsigma[0] += densp[1] / dens * de_PBEdsigma[0]; dauxdsigma[1] += densp[1] / dens * de_PBEdsigma[1]; dauxdsigma[2] += densp[1] / dens * de_PBEdsigma[2]; } zsq=z*z; *e_PKZB = (e_PBE*(1.0 + C * zsq) - (1.0 + C) * zsq * aux); *de_PKZBdz = dens * e_PBE * C * 2*z - dens * (1.0 + C) * 2*z * aux; /*? think ok*/ double dCdd[2]; dCdd[0] = dCdzeta*dzetadd[0] + dCdcsi*dcsidd[0]; /*OK*/ dCdd[1] = dCdzeta*dzetadd[1] + dCdcsi*dcsidd[1]; /*OK*/ /* partial derivatives*/ de_PKZBdd[0] = de_PBEdd[0] * (1.0 + C*zsq) + dens * e_PBE * dCdd[0] * zsq - zsq * (dens*dCdd[0] * aux + (1.0 + C) * dauxdd[0]); de_PKZBdd[1] = de_PBEdd[1] * (1.0 + C*zsq) + dens * e_PBE * dCdd[1] * zsq - zsq * (dens*dCdd[1] * aux + (1.0 + C) * dauxdd[1]); int nder = (p->common.nspin==XC_UNPOLARIZED) ? 1 : 3; for(i=0; icommon.nspin==XC_UNPOLARIZED) dauxdsigma[i] /= 2.; double dCdsigma = dCdcsi*dcsidsigma[i]; /* partial derivatives*/ de_PKZBdsigma[i] = de_PBEdsigma[i] * (1.0 + C * zsq) + dens * e_PBE * dCdsigma * zsq - zsq * (dens * dCdsigma * aux + (1.0 + C) * dauxdsigma[i]); } } static void XC(mgga_c_revtpss)(void *par, const double *rho, const double *sigmatmp, const double *tau, double *energy, double *dedd, double *vsigma, double *dedtau) { double sigma[3]; revtpss_params *p = (revtpss_params*)par; double dens, zeta, grad; double tautr, taut, tauw, z; double e_PKZB, de_PKZBdd[2], de_PKZBdsigma[3], de_PKZBdz; int i, is; sigma[0] = sigmatmp[0]; sigma[1] = 0.0; sigma[2] = 0.0; zeta = (rho[0]-rho[1])/(rho[0]+rho[1]); dens = rho[0]; tautr = tau[0]; grad = sigma[0]; if(p->common.nspin == XC_POLARIZED) { dens += rho[1]; tautr += tau[1]; sigma[1] = sigmatmp[1]; sigma[2] = sigmatmp[2]; grad += (2*sigma[1] + sigma[2]); } grad = max(MIN_GRAD*MIN_GRAD, grad); tauw = max(grad/(8.0*dens), 1.0e-12); taut = max(tautr, tauw); z = tauw/taut; sigma[0] = max(MIN_GRAD*MIN_GRAD, sigma[0]); if(p->common.nspin == XC_POLARIZED) { //sigma[1] = max(MIN_GRAD*MIN_GRAD, sigma[1]); sigma[2] = max(MIN_GRAD*MIN_GRAD, sigma[2]); } /* Equation (12) */ c_revtpss_12(p, rho, sigma, dens, zeta, z, &e_PKZB, de_PKZBdd, de_PKZBdsigma, &de_PKZBdz); /* Equation (11) */ { double z2 = z*z, z3 = z2*z; double dedz; double dzdd[2], dzdsigma[3], dzdtau; if(tauw >= tautr || fabs(tauw- tautr)< 1.0e-10){ dzdtau = 0.0; dzdd[0] = 0.0; dzdd[1] = 0.0; dzdsigma[0] = 0.0; dzdsigma[1] = 0.0; dzdsigma[2] = 0.0; }else{ dzdtau = -z/taut; dzdd[0] = - z/dens; dzdd[1] = 0.0; if (p->common.nspin == XC_POLARIZED) dzdd[1] = - z/dens; dzdsigma[0] = 1.0/(8*dens*taut); dzdsigma[1] = 0.0; dzdsigma[2] = 0.0; if (p->common.nspin == XC_POLARIZED) { dzdsigma[1] = 2.0/(8*dens*taut); dzdsigma[2] = 1.0/(8*dens*taut); } } *energy = e_PKZB * (1.0 + d*e_PKZB*z3); /* due to the definition of na and nb in libxc.c we need to divide by (na+nb) to recover the * same energy for polarized and unpolarized calculation with the same total density */ if(p->common.nspin == XC_UNPOLARIZED) *energy *= dens/(rho[0]+rho[1]); dedz = de_PKZBdz*(1.0 + 2.0*d*e_PKZB*z3) + dens*e_PKZB * e_PKZB * d * 3.0*z2; for(is=0; iscommon.nspin; is++){ dedd[is] = de_PKZBdd[is] * (1.0 + 2.0*d*e_PKZB*z3) + dedz*dzdd[is] - e_PKZB*e_PKZB * d * z3; /*OK*/ dedtau[is] = dedz * dzdtau; /*OK*/ } int nder = (p->common.nspin==XC_UNPOLARIZED) ? 1 : 3; for(i=0; ix_aux, np, rho, &exunif, &vxunif); /* calculate |nabla rho|^2 */ gdms = max(MIN_GRAD*MIN_GRAD, sigma); /* Eq. (4) */ p = gdms/(4.0*pow(3*M_PI*M_PI, 2.0/3.0)*pow(rho[0], 8.0/3.0)); dpdd = -(8.0/3.0)*p/rho[0]; dpdsigma= 1/(4.0*pow(3*M_PI*M_PI, 2.0/3.0)*pow(rho[0], 8.0/3.0)); /* von Weisaecker kinetic energy density */ tauw = max(gdms/(8.0*rho[0]), 1.0e-12); tau = max(tau_, tauw); tau_lsda = aux * pow(rho[0],5./3.); dtau_lsdadd = aux * 5./3.* pow(rho[0],2./3.); alpha = (tau - tauw)/tau_lsda; if(fabs(tauw-tau_)< 1.0e-10){ dalphadsigma = 0.0; dalphadtau = 0.0; dalphadd = 0.0; }else{ dalphadtau = 1./tau_lsda; dalphadsigma = -1./(tau_lsda*8.0*rho[0]); dalphadd = (tauw/rho[0]* tau_lsda - (tau - tauw) * dtau_lsdadd)/ pow(tau_lsda,2.); } /* get Eq. (10) */ x_revtpss_10(p, alpha, &x, &dxdp, &dxdalpha); { /* Eq. (5) */ double a = kappa/(kappa + x); Fx = 1.0 + kappa*(1.0 - a); dFxdx = a*a; } { /* Eq. (3) */ *energy = exunif*Fx*rho[0]; //printf("Ex %.9e\n", *energy); /* exunif is en per particle already so we multiply by n the terms with exunif*/ *dedd = vxunif*Fx + exunif*dFxdx*rho[0]*(dxdp*dpdd + dxdalpha*dalphadd); *vsigma = exunif*dFxdx*rho[0]*(dxdp*dpdsigma + dxdalpha*dalphadsigma); *dedtau = exunif*dFxdx*rho[0]*(dxdalpha*dalphadtau); } } void XC(mgga_x_revtpss)(void *par, const double *rho, const double *sigma, const double *tau, double *e, double *dedd, double *vsigma, double *dedtau) { revtpss_params *p = (revtpss_params*)par; if(p->common.nspin == XC_UNPOLARIZED){ double en; x_revtpss_para(p, rho, sigma[0], tau[0], &en, dedd, vsigma, dedtau); *e = en/(rho[0]+rho[1]); }else{ /* The spin polarized version is handle using the exact spin scaling Ex[n1, n2] = (Ex[2*n1] + Ex[2*n2])/2 */ *e = 0.0; double e2na, e2nb, rhoa[2], rhob[2]; double vsigmapart[3]; rhoa[0]=2*rho[0]; rhoa[1]=0.0; rhob[0]=2*rho[1]; rhob[1]=0.0; x_revtpss_para(p, rhoa, 4*sigma[0], 2.0*tau[0], &e2na, &(dedd[0]), &(vsigmapart[0]), &(dedtau[0])); x_revtpss_para(p, rhob, 4*sigma[2], 2.0*tau[1], &e2nb, &(dedd[1]), &(vsigmapart[2]), &(dedtau[1])); *e = (e2na + e2nb )/(2.*(rho[0]+rho[1])); vsigma[0] = 2*vsigmapart[0]; vsigma[2] = 2*vsigmapart[2]; } } static void revtpss_init(void *p) { revtpss_params *par = (revtpss_params*)p; par->x_aux = (XC(func_type) *) malloc(sizeof(XC(func_type))); XC(func_init)(par->x_aux, XC_LDA_X, XC_UNPOLARIZED); XC(func_init)(&par->c_aux1, XC_LDA_C_PW_MOD, par->common.nspin); XC(func_init)(&par->c_aux2, XC_LDA_C_PW_MOD, XC_POLARIZED); } static void revtpss_end(void *p) { revtpss_params *par = (revtpss_params*)p; XC(func_end)(par->x_aux); free(par->x_aux); XC(func_end)(&par->c_aux1); XC(func_end)(&par->c_aux2); } const mgga_func_info revtpss_info = { sizeof(revtpss_params), &revtpss_init, &revtpss_end, &XC(mgga_x_revtpss), &XC(mgga_c_revtpss) }; gpaw-24.1.0/c/xc/revtpss_c_pbe.c000066400000000000000000000342521454550013000163750ustar00rootroot00000000000000 #include #include #include #include #include #include "xc_mgga.h" /************************************************************************ Implements Perdew, Burke & Ernzerhof Generalized Gradient Approximation correlation functional. I based this implementation on a routine from L.C. Balbas and J.M. Soler ************************************************************************/ // from old libxc util.h #define RS(x) (pow((3.0/(4*M_PI*x)), 1.0/3.0)) typedef struct XC(perdew_t) { int nspin; double dens, zeta, gdmt; double ecunif, vcunif[2], fcunif[3]; double rs, kf, ks, phi, t; double drs, dkf, dks, dphi, dt, decunif; double d2rs2, d2rskf, d2rsks, d2rsphi, d2rst, d2rsecunif; double d2kf2, d2kfks, d2kfphi, d2kft, d2kfecunif; double d2ks2, d2ksphi, d2kst, d2ksecunif; double d2phi2, d2phit, d2phiecunif; double d2t2, d2tecunif; double d2ecunif2; } XC(perdew_t); // from old libxc util.c /* this function converts the spin-density into total density and relative magnetization */ inline void XC(rho2dzeta)(int nspin, const double *rho, double *d, double *zeta) { assert(nspin==XC_UNPOLARIZED || nspin==XC_POLARIZED); if(nspin==XC_UNPOLARIZED){ *d = max(MIN_DENS, rho[0]); *zeta = 0.0; }else{ *d = max(MIN_DENS, rho[0]+rho[1]); *zeta = (*d > MIN_DENS) ? (rho[0]-rho[1])/(*d) : 0.0; } } // from old libxc gga_perdew.c static void XC(perdew_params)(const XC(func_type) *gga_p, const double *rho, const double *sigma, int order, XC(perdew_t) *pt) { pt->nspin = gga_p->nspin; XC(rho2dzeta)(pt->nspin, rho, &(pt->dens), &(pt->zeta)); const int np = 1; switch (order){ case 0: XC(lda_exc) (gga_p, np, rho, &(pt->ecunif)); break; case 1: XC(lda_exc_vxc)(gga_p, np, rho, &(pt->ecunif), pt->vcunif); break; case 2: #if XC_MAJOR_VERSION >= 5 XC(lda)(gga_p, np, rho, &(pt->ecunif), pt->vcunif, pt->fcunif, NULL, NULL); #else XC(lda)(gga_p, np, rho, &(pt->ecunif), pt->vcunif, pt->fcunif, NULL); #endif break; } pt->rs = RS(pt->dens); pt->kf = pow(3.0*M_PI*M_PI*pt->dens, 1.0/3.0); pt->ks = sqrt(4.0*pt->kf/M_PI); /* phi is bounded between 2^(-1/3) and 1 */ pt->phi = 0.5*(pow(1.0 + pt->zeta, 2.0/3.0) + pow(1.0 - pt->zeta, 2.0/3.0)); /* get gdmt = |nabla n| */ pt->gdmt = sigma[0]; if(pt->nspin == XC_POLARIZED) pt->gdmt += 2.0*sigma[1] + sigma[2]; if(pt->gdmt < MIN_GRAD*MIN_GRAD) pt->gdmt = MIN_GRAD*MIN_GRAD; pt->gdmt = sqrt(pt->gdmt); pt->t = pt->gdmt/(2.0 * pt->phi * pt->ks * pt->dens); if(order > 0) pt->drs = pt->dkf = pt->dks = pt->dphi = pt->dt = pt->decunif = 0.0; if(order > 1){ pt->d2rs2 = pt->d2rskf = pt->d2rsks = pt->d2rsphi = pt->d2rst = pt->d2rsecunif = 0.0; pt->d2kf2 = pt->d2kfks = pt->d2kfphi = pt->d2kft = pt->d2kfecunif = 0.0; pt->d2ks2 = pt->d2ksphi = pt->d2kst = pt->d2ksecunif = 0.0; pt->d2phi2 = pt->d2phit = pt->d2phiecunif = 0.0; pt->d2t2 = pt->d2tecunif = 0.0; pt->d2ecunif2 = 0.0; } } static void XC(perdew_potentials)(XC(perdew_t) *pt, const double *rho, double e_gga, int order, double *vrho, double *vsigma, double *v2rho2, double *v2rhosigma, double *v2sigma2) { /* alpha = {0->rs, 1->kf, 2->ks, 3->phi, 4->t, 5->ec */ double dalphadd[6][2], dFdalpha[6]; double d2alphadd2[6][3], d2Fdalpha2[6][6]; double dzdd[2], dpdz, d2zdd2[3], d2pdz2; double dtdsig, d2tdsig2; int is, js, ks, ns; if(order < 1) return; if(pt->nspin == XC_POLARIZED){ dpdz = 0.0; if(fabs(1.0 + pt->zeta) >= MIN_DENS) dpdz += 1.0/(3.0*pow(1.0 + pt->zeta, 1.0/3.0)); if(fabs(1.0 - pt->zeta) >= MIN_DENS) dpdz -= 1.0/(3.0*pow(1.0 - pt->zeta, 1.0/3.0)); dzdd[0] = (1.0 - pt->zeta)/pt->dens; dzdd[1] = -(1.0 + pt->zeta)/pt->dens; }else{ dpdz = 0.0; dzdd[0] = 0.0; } dFdalpha[0] = pt->drs; dFdalpha[1] = pt->dkf; dFdalpha[2] = pt->dks; dFdalpha[3] = pt->dphi; dFdalpha[4] = pt->dt; dFdalpha[5] = pt->decunif; for(is=0; isnspin; is++){ dalphadd[0][is] = -pt->rs/(3.0*pt->dens); dalphadd[1][is] = pt->kf/(3.0*pt->dens); dalphadd[2][is] = pt->ks*dalphadd[1][is]/(2.0*pt->kf); dalphadd[3][is] = dpdz*dzdd[is]; dalphadd[4][is] = -pt->t*(1.0/pt->dens + dalphadd[2][is]/pt->ks + dalphadd[3][is]/pt->phi);; dalphadd[5][is] = (pt->vcunif[is] - pt->ecunif)/pt->dens; } /* calculate vrho */ if(vrho != NULL) for(is=0; isnspin; is++){ if(rho[is] > MIN_DENS){ int k; vrho[is] = e_gga; for(k=0; k<6; k++) vrho[is] += pt->dens * dFdalpha[k]*dalphadd[k][is]; }else{ vrho[is] = 0.0; } } dtdsig = pt->t/(2.0*pt->gdmt*pt->gdmt); if(vrho != NULL){ /* calculate now vsigma */ vsigma[0] = pt->dens*pt->dt*dtdsig; if(pt->nspin == XC_POLARIZED){ vsigma[1] = 2.0*vsigma[0]; vsigma[2] = vsigma[0]; } } if(order < 2) return; /* first let us sort d2Fdalpha2 in a matrix format */ d2Fdalpha2[0][0] = pt->d2rs2; d2Fdalpha2[0][1] = pt->d2rskf; d2Fdalpha2[0][2] = pt->d2rsks; d2Fdalpha2[0][3] = pt->d2rst; d2Fdalpha2[0][4] = pt->d2rsphi; d2Fdalpha2[0][5] = pt->d2rsecunif; d2Fdalpha2[1][0] = d2Fdalpha2[0][1]; d2Fdalpha2[1][1] = pt->d2kf2; d2Fdalpha2[1][2] = pt->d2kfks; d2Fdalpha2[1][3] = pt->d2kft; d2Fdalpha2[1][4] = pt->d2kfphi; d2Fdalpha2[1][5] = pt->d2kfecunif; d2Fdalpha2[2][0] = d2Fdalpha2[0][2]; d2Fdalpha2[2][1] = d2Fdalpha2[1][2]; d2Fdalpha2[2][2] = pt->d2ks2; d2Fdalpha2[2][3] = pt->d2kst; d2Fdalpha2[2][4] = pt->d2ksphi; d2Fdalpha2[2][5] = pt->d2ksecunif; d2Fdalpha2[3][0] = d2Fdalpha2[0][3]; d2Fdalpha2[3][1] = d2Fdalpha2[1][3]; d2Fdalpha2[3][2] = d2Fdalpha2[2][3]; d2Fdalpha2[3][3] = pt->d2phi2; d2Fdalpha2[3][4] = pt->d2phit; d2Fdalpha2[3][5] = pt->d2phiecunif; d2Fdalpha2[4][0] = d2Fdalpha2[0][4]; d2Fdalpha2[4][1] = d2Fdalpha2[1][4]; d2Fdalpha2[4][2] = d2Fdalpha2[2][4]; d2Fdalpha2[4][3] = d2Fdalpha2[3][4]; d2Fdalpha2[4][4] = pt->d2t2; d2Fdalpha2[4][5] = pt->d2tecunif; d2Fdalpha2[5][0] = d2Fdalpha2[0][5]; d2Fdalpha2[5][1] = d2Fdalpha2[1][5]; d2Fdalpha2[5][2] = d2Fdalpha2[2][5]; d2Fdalpha2[5][3] = d2Fdalpha2[3][5]; d2Fdalpha2[5][4] = d2Fdalpha2[4][5]; d2Fdalpha2[5][5] = pt->d2ecunif2; /* now we sort d2alphadd2 */ if(pt->nspin == XC_POLARIZED){ d2pdz2 = 0.0; if(fabs(1.0 + pt->zeta) >= MIN_DENS) d2pdz2 += -(1.0/9.0)*pow(1.0 + pt->zeta, -4.0/3.0); if(fabs(1.0 - pt->zeta) >= MIN_DENS) d2pdz2 += -(1.0/9.0)*pow(1.0 - pt->zeta, -4.0/3.0); d2zdd2[0] = -2.0*dzdd[0]/pt->dens; d2zdd2[1] = 2.0*pt->zeta/(pt->dens*pt->dens); d2zdd2[2] = -2.0*dzdd[1]/pt->dens; }else{ d2pdz2 = 0.0; d2zdd2[0] = 0.0; } ns = (pt->nspin == XC_UNPOLARIZED) ? 0 : 2; for(ks=0; ks<=ns; ks++){ is = (ks == 0 || ks == 1) ? 0 : 1; js = (ks == 0 ) ? 0 : 1; d2alphadd2[0][ks] = 4.0/9.0*pt->rs/(pt->dens*pt->dens); d2alphadd2[1][ks] = -2.0/9.0*pt->kf/(pt->dens*pt->dens); d2alphadd2[2][ks] = pt->ks/(2.0*pt->kf)* (d2alphadd2[1][ks] - dalphadd[1][is]*dalphadd[1][js]/(2.0*pt->kf)); d2alphadd2[3][ks] = d2pdz2*dzdd[is]*dzdd[js] + dpdz*d2zdd2[ks]; d2alphadd2[4][ks] = pt->t * (+2.0/(pt->dens*pt->dens) +2.0/(pt->ks*pt->ks) *(dalphadd[2][is] * dalphadd[2][js]) +2.0/(pt->phi*pt->phi) *(dalphadd[3][is] * dalphadd[3][js]) +1.0/(pt->dens*pt->ks) *(dalphadd[2][is] + dalphadd[2][js]) +1.0/(pt->dens*pt->phi)*(dalphadd[3][is] + dalphadd[3][js]) +1.0/(pt->ks*pt->phi) *(dalphadd[2][is]*dalphadd[3][js] + dalphadd[2][js]*dalphadd[3][is]) -1.0/(pt->ks)*d2alphadd2[2][ks] -1.0/(pt->phi)*d2alphadd2[3][ks]); d2alphadd2[5][ks] = pt->fcunif[ks]/pt->dens - (pt->vcunif[is] + pt->vcunif[js] - 2.0*pt->ecunif)/(pt->dens*pt->dens); } for(ks=0; ks<=ns; ks++){ int j, k; is = (ks == 0 || ks == 1) ? 0 : 1; js = (ks == 0 ) ? 0 : 1; v2rho2[ks] = 0.0; for(j=0; j<6; j++){ v2rho2[ks] += dFdalpha[j]*(dalphadd[j][is] + dalphadd[j][js]); v2rho2[ks] += pt->dens * dFdalpha[j]*d2alphadd2[j][ks]; for(k=0; k<6; k++) v2rho2[ks] += pt->dens * d2Fdalpha2[j][k]*dalphadd[j][is]*dalphadd[k][js]; } } /* now we handle v2rhosigma */ for(is=0; isnspin; is++){ int j; ks = (is == 0) ? 0 : 5; v2rhosigma[ks] = dFdalpha[4]*dtdsig; for(j=0; j<6; j++) v2rhosigma[ks] += pt->dens * d2Fdalpha2[4][j]*dalphadd[j][is]*dtdsig; v2rhosigma[ks] += pt->dens * dFdalpha[4]*dalphadd[4][is]/(2.0*pt->gdmt*pt->gdmt); } if(pt->nspin == XC_POLARIZED){ v2rhosigma[1] = 2.0*v2rhosigma[0]; v2rhosigma[2] = v2rhosigma[0]; v2rhosigma[3] = v2rhosigma[5]; v2rhosigma[4] = 2.0*v2rhosigma[5]; } /* now wwe take care of v2sigma2 */ d2tdsig2 = -dtdsig/(2.0*pt->gdmt*pt->gdmt); v2sigma2[0] = pt->dens*(pt->d2t2*dtdsig*dtdsig + pt->dt*d2tdsig2); if(pt->nspin == XC_POLARIZED){ v2sigma2[1] = 2.0*v2sigma2[0]; /* aa_ab */ v2sigma2[2] = v2sigma2[0]; /* aa_bb */ v2sigma2[3] = 4.0*v2sigma2[0]; /* ab_ab */ v2sigma2[4] = 2.0*v2sigma2[0]; /* ab_bb */ v2sigma2[5] = v2sigma2[0]; /* bb_bb */ } } // from old libxc gga_c_pbe.c static const double beta[4] = { 0.06672455060314922, /* original PBE */ 0.046, /* PBE sol */ 0.089809, 0.06672455060314922 /* PBE for revTPSS */ }; static double gamm[4]; static inline void pbe_eq8(int func, int order, double rs, double ecunif, double phi, double *A, double *dec, double *dphi, double *drs, double *dec2, double *decphi, double *dphi2) { double phi3, f1, df1dphi, d2f1dphi2, f2, f3, dx, d2x; phi3 = pow(phi, 3); f1 = ecunif/(gamm[func]*phi3); f2 = exp(-f1); f3 = f2 - 1.0; *A = beta[func]/(gamm[func]*f3); if(func == 3) *A *= (1. + 0.1*rs)/(1. + 0.1778*rs); if(order < 1) return; df1dphi = -3.0*f1/phi; dx = (*A)*f2/f3; *dec = dx/(gamm[func]*phi3); *dphi = dx*df1dphi; *drs = 0.0; if(func == 3) *drs = beta[func]*((0.1-0.1778)/pow(1+0.1778*rs,2))/(gamm[func]*f3); if(func ==3) return; if(order < 2) return; d2f1dphi2 = -4.0*df1dphi/phi; d2x = dx*(2.0*f2 - f3)/f3; *dphi2 = d2x*df1dphi*df1dphi + dx*d2f1dphi2; *decphi = (d2x*df1dphi*f1 + dx*df1dphi)/ecunif; *dec2 = d2x/(gamm[func]*gamm[func]*phi3*phi3); } static void pbe_eq7(int func, int order, double rs, double phi, double t, double A, double *H, double *dphi, double *drs, double *dt, double *dA, double *d2phi, double *d2phit, double *d2phiA, double *d2t2, double *d2tA, double *d2A2) { double t2, phi3, f1, f2, f3; double df1dt, df2drs, df2dt, df1dA, df2dA; double d2f1dt2, d2f2dt2, d2f2dA2, d2f1dtA, d2f2dtA; t2 = t*t; phi3 = pow(phi, 3); f1 = t2 + A*t2*t2; f3 = 1.0 + A*f1; f2 = beta[func]*f1/(gamm[func]*f3); if(func == 3) f2 *= (1. + 0.1*rs)/(1. + 0.1778*rs); *H = gamm[func]*phi3*log(1.0 + f2); if(order < 1) return; *dphi = 3.0*(*H)/phi; df1dt = t*(2.0 + 4.0*A*t2); df2dt = beta[func]/(gamm[func]*f3*f3) * df1dt; if(func == 3) df2dt*=(1. + 0.1*rs)/(1. + 0.1778*rs); *dt = gamm[func]*phi3*df2dt/(1.0 + f2); df1dA = t2*t2; df2dA = beta[func]/(gamm[func]*f3*f3) * (df1dA - f1*f1); if(func == 3) df2dA *= (1. + 0.1*rs)/(1. + 0.1778*rs); *dA = gamm[func]*phi3*df2dA/(1.0 + f2); df2drs = 0.0; *drs = 0.0; if(func == 3){ df2drs = beta[func]*((0.1-0.1778)/pow(1+0.1778*rs,2))*f1/(gamm[func]*f3); *drs = gamm[func]*phi3*df2drs/(1.0 + f2); } if(func ==3) return; if(order < 2) return; *d2phi = 2.0*(*dphi)/phi; *d2phit = 3.0*(*dt)/phi; *d2phiA = 3.0*(*dA)/phi; d2f1dt2 = 2.0 + 4.0*3.0*A*t2; d2f2dt2 = beta[func]/(gamm[func]*f3*f3) * (d2f1dt2 - 2.0*A/f3*df1dt*df1dt); *d2t2 = gamm[func]*phi3*(d2f2dt2*(1.0 + f2) - df2dt*df2dt)/((1.0 + f2)*(1.0 + f2)); d2f1dtA = 4.0*t*t2; d2f2dtA = beta[func]/(gamm[func]*f3*f3) * (d2f1dtA - 2.0*df1dt*(f1 + A*df1dA)/f3); *d2tA = gamm[func]*phi3*(d2f2dtA*(1.0 + f2) - df2dt*df2dA)/((1.0 + f2)*(1.0 + f2)); d2f2dA2 = beta[func]/(gamm[func]*f3*f3*f3) *(-2.0)*(2.0*f1*df1dA - f1*f1*f1 + A*df1dA*df1dA); *d2A2 = gamm[func]*phi3*(d2f2dA2*(1.0 + f2) - df2dA*df2dA)/((1.0 + f2)*(1.0 + f2)); } void gga_c_pbe_revtpss(XC(func_type) *p, const double *rho, const double *sigma, double *e, double *vrho, double *vsigma, double *v2rho2, double *v2rhosigma, double *v2sigma2) { gamm[0] = gamm[1] = gamm[3] = (1.0 - log(2.0))/(M_PI*M_PI); XC(perdew_t) pt; int func, order; double me; double A, dAdec, dAdphi, dAdrs, d2Adec2, d2Adecphi, d2Adphi2; double H, dHdphi, dHdrs, dHdt, dHdA, d2Hdphi2, d2Hdphit, d2HdphiA, d2Hdt2, d2HdtA, d2HdA2; d2HdphiA = 0.0; d2Hdphi2 = 0.0; d2Adphi2 = 0.0; d2HdA2 = 0.0; d2HdtA = 0.0; d2Hdphit = 0.0; d2Adecphi = 0.0; d2Hdt2 = 0.0; d2Adec2 = 0.0; dAdrs = 0.0; dAdphi = 0.0; dAdec = 0.0; dHdA = 0.0; dHdt = 0.0; dHdrs = 0.0; dHdphi = 0.0; func = 3; // for revTPSS order = 0; if(vrho != NULL) order = 1; if(v2rho2 != NULL) order = 2; XC(perdew_params)(p, rho, sigma, order, &pt); pbe_eq8(func, order, pt.rs, pt.ecunif, pt.phi, &A, &dAdec, &dAdphi, &dAdrs, &d2Adec2, &d2Adecphi, &d2Adphi2); pbe_eq7(func, order, pt.rs, pt.phi, pt.t, A, &H, &dHdphi, &dHdrs, &dHdt, &dHdA, &d2Hdphi2, &d2Hdphit, &d2HdphiA, &d2Hdt2, &d2HdtA, &d2HdA2); me = pt.ecunif + H; if(e != NULL) *e = me; if(order >= 1){ pt.dphi = dHdphi + dHdA*dAdphi; pt.drs = dHdrs + dHdA*dAdrs; pt.dt = dHdt; pt.decunif = 1.0 + dHdA*dAdec; } if(order >= 2){ pt.d2phi2 = d2Hdphi2 + 2.0*d2HdphiA*dAdphi + dHdA*d2Adphi2 + d2HdA2*dAdphi*dAdphi; pt.d2phit = d2Hdphit + d2HdtA*dAdphi; pt.d2phiecunif = d2HdphiA*dAdec + d2HdA2*dAdphi*dAdec + dHdA*d2Adecphi; pt.d2t2 = d2Hdt2; pt.d2tecunif = d2HdtA*dAdec; pt.d2ecunif2 = d2HdA2*dAdec*dAdec + dHdA*d2Adec2; } XC(perdew_potentials)(&pt, rho, me, order, vrho, vsigma, v2rho2, v2rhosigma, v2sigma2); } gpaw-24.1.0/c/xc/rpbe.c000066400000000000000000000013141454550013000144600ustar00rootroot00000000000000/* Copyright (C) 2003-2007 CAMP * Please see the accompanying LICENSE file for further information. */ #include #include "xc_gpaw.h" double rpbe_exchange(const xc_parameters* par, double n, double rs, double a2, double* dedrs, double* deda2) { double e = C1 / rs; *dedrs = -e / rs; if (par->gga) // not really needed? XXX { double c = C2 * rs / n; c *= c; double s2 = a2 * c; double x = exp(-MU * s2 / 0.804); double Fx = 1.0 + 0.804 * (1 - x); double dFxds2 = MU * x; double ds2drs = 8.0 * c * a2 / rs; *dedrs = *dedrs * Fx + e * dFxds2 * ds2drs; *deda2 = e * dFxds2 * c; e *= Fx; } return e; } gpaw-24.1.0/c/xc/tpss.c000066400000000000000000000372041454550013000145300ustar00rootroot00000000000000/************************************************************************ Implements Perdew, Tao, Staroverov & Scuseria meta-Generalized Gradient Approximation. Exchange part ************************************************************************/ #include #include #include #include "xc_mgga.h" typedef struct tpss_params { common_params common; // needs to be at the beginning of every functional_params XC(func_type) *x_aux; XC(func_type) *c_aux1; XC(func_type) *c_aux2; } tpss_params; /* some parameters */ static double b=0.40, c=1.59096, e=1.537, kappa=0.804, mu=0.21951; /* This is Equation (7) from the paper and its derivatives */ static void x_tpss_7(double p, double alpha, double *qb, double *dqbdp, double *dqbdalpha) { /* Eq. (7) */ double a = sqrt(1.0 + b*alpha*(alpha-1.0)), h = 9.0/20.0; *qb = h*(alpha - 1.0)/a + 2.0*p/3.0; *dqbdp = 2.0/3.0; *dqbdalpha = h*(1.0 + 0.5*b*(alpha-1.0))/pow(a, 3); } /* Equation (10) in all it's glory */ static void x_tpss_10(double p, double alpha, double *x, double *dxdp, double *dxdalpha) { double x1, dxdp1, dxdalpha1; double aux1, ap, apsr, p2; double qb, dqbdp, dqbdalpha; /* Equation 7 */ x_tpss_7(p, alpha, &qb, &dqbdp, &dqbdalpha); p2 = p*p; aux1 = 10.0/81.0; ap = (3*alpha + 5*p)*(3*alpha + 5*p); apsr = (3*alpha + 5*p); /* first we handle the numerator */ x1 = 0.0; dxdp1 = 0.0; dxdalpha1 = 0.0; { /* first term */ double a = (9*alpha*alpha+30*alpha*p+50*p2), a2 = a*a; x1 += aux1*p + 25*c*p2*p*ap/a2; dxdp1 += aux1 + ((3*225*c*p2*alpha*alpha+ 4*750*c*p*p2*alpha + 5*625*c*p2*p2)*a2 - 25*c*p2*p*ap*2*a*(30*alpha+50*2*p))/(a2*a2); dxdalpha1 += ((225*c*p*p2*2*alpha + 750*c*p2*p2)*a2 - 25*c*p2*p*ap*2*a*(9*2*alpha+30*p))/(a2*a2); } { /* second term */ double a = 146.0/2025.0*qb; x1 += a*qb; dxdp1 += 2.0*a*dqbdp; dxdalpha1 += 2.0*a*dqbdalpha; } { /* third term */ double h = 73.0/(405*sqrt(2.0)); x1 += -h*qb*p/apsr * sqrt(ap+9); dxdp1 += -h * qb *((3*alpha)/ap * sqrt(ap+9) + p/apsr * 1./2. * pow(ap+9,-1./2.)* 2*apsr*5) - h*p/apsr*sqrt(ap+9)*dqbdp; dxdalpha1 += -h*qb*( (-1)*p*3/ap * sqrt(ap+9) + p/apsr * 1./2. * pow(ap+9,-1./2.)* 2*apsr*3) - h*p/apsr*sqrt(ap+9)*dqbdalpha; } { /* forth term */ double a = aux1*aux1/kappa; x1 += a*p2; dxdp1 += a*2.0*p; dxdalpha1 += 0.0; } { /* fifth term */ x1 += 20*sqrt(e)*p2/(9*ap); dxdp1 += 20*sqrt(e)/9*(2*p*ap-p2*2*(3*alpha + 5*p)*5)/(ap*ap); dxdalpha1 +=-20*2*sqrt(e)/3*p2/(ap*(3*alpha + 5*p)); } { /* sixth term */ double a = e*mu; x1 += a*p*p2; dxdp1 += a*3.0*p2; dxdalpha1 += 0.0; } /* and now the denominator */ { double a = 1.0+sqrt(e)*p, a2 = a*a; *x = x1/a2; *dxdp = (dxdp1*a - 2.0*sqrt(e)*x1)/(a2*a); *dxdalpha = dxdalpha1/a2; } } static void x_tpss_para(XC(func_type) *lda_aux, const double *rho, const double sigma, const double tau_, double *energy, double *dedd, double *vsigma, double *dedtau) { double gdms, p, tau, tauw; double x, dxdp, dxdalpha, Fx, dFxdx; double tau_lsda, exunif, vxunif, dtau_lsdadd; double dpdd, dpdsigma; double alpha, dalphadd, dalphadsigma, dalphadtau; double aux = (3./10.) * pow((3*M_PI*M_PI),2./3.); /* get the uniform gas energy and potential */ const int np = 1; XC(lda_exc_vxc)(lda_aux, np, rho, &exunif, &vxunif); /* calculate |nabla rho|^2 */ gdms = max(MIN_GRAD*MIN_GRAD, sigma); /* Eq. (4) */ p = gdms/(4.0*pow(3*M_PI*M_PI, 2.0/3.0)*pow(rho[0], 8.0/3.0)); dpdd = -(8.0/3.0)*p/rho[0]; dpdsigma= 1/(4.0*pow(3*M_PI*M_PI, 2.0/3.0)*pow(rho[0], 8.0/3.0)); /* von Weisaecker kinetic energy density */ tauw = max(gdms/(8.0*rho[0]), 1.0e-12); tau = max(tau_, tauw); tau_lsda = aux * pow(rho[0],5./3.); dtau_lsdadd = aux * 5./3.* pow(rho[0],2./3.); alpha = (tau - tauw)/tau_lsda; if(fabs(tauw-tau_)< 1.0e-10){ dalphadsigma = 0.0; dalphadtau = 0.0; dalphadd = 0.0; }else{ dalphadtau = 1./tau_lsda; dalphadsigma = -1./(tau_lsda*8.0*rho[0]); dalphadd = (tauw/rho[0]* tau_lsda - (tau - tauw) * dtau_lsdadd)/ pow(tau_lsda,2.); } /* get Eq. (10) */ x_tpss_10(p, alpha, &x, &dxdp, &dxdalpha); { /* Eq. (5) */ double a = kappa/(kappa + x); Fx = 1.0 + kappa*(1.0 - a); dFxdx = a*a; } { /* Eq. (3) */ *energy = exunif*Fx*rho[0]; /* exunif is en per particle already so we multiply by n the terms with exunif*/ *dedd = vxunif*Fx + exunif*dFxdx*rho[0]*(dxdp*dpdd + dxdalpha*dalphadd); *vsigma = exunif*dFxdx*rho[0]*(dxdp*dpdsigma + dxdalpha*dalphadsigma); *dedtau = exunif*dFxdx*rho[0]*(dxdalpha*dalphadtau); } } static void XC(mgga_x_tpss)(void *p, const double *rho, const double *sigma, const double *tau, double *e, double *dedd, double *vsigma, double *dedtau) { tpss_params *par = (tpss_params*)p; if(par->common.nspin == XC_UNPOLARIZED){ double en; x_tpss_para(par->x_aux, rho, sigma[0], tau[0], &en, dedd, vsigma, dedtau); *e = en/(rho[0]+rho[1]); }else{ /* The spin polarized version is handle using the exact spin scaling Ex[n1, n2] = (Ex[2*n1] + Ex[2*n2])/2 */ *e = 0.0; double e2na, e2nb, rhoa[2], rhob[2]; double vsigmapart[3]; rhoa[0]=2*rho[0]; rhoa[1]=0.0; rhob[0]=2*rho[1]; rhob[1]=0.0; x_tpss_para(par->x_aux, rhoa, 4*sigma[0], 2.0*tau[0], &e2na, &(dedd[0]), &(vsigmapart[0]), &(dedtau[0])); x_tpss_para(par->x_aux, rhob, 4*sigma[2], 2.0*tau[1], &e2nb, &(dedd[1]), &(vsigmapart[2]), &(dedtau[1])); *e = (e2na + e2nb )/(2.*(rho[0]+rho[1])); vsigma[0] = 2*vsigmapart[0]; vsigma[2] = 2*vsigmapart[2]; } } /************************************************************************ Implements Perdew, Tao, Staroverov & Scuseria meta-Generalized Gradient Approximation. J. Chem. Phys. 120, 6898 (2004) http://dx.doi.org/10.1063/1.1665298 Correlation part ************************************************************************/ /* some parameters */ static double d = 2.8; /* Equation (14) */ static void c_tpss_14(double csi, double zeta, double *C, double *dCdcsi, double *dCdzeta) { double fz, C0, dC0dz, dfzdz; double z2 = zeta*zeta; /* Equation (13) */ C0 = 0.53 + z2*(0.87 + z2*(0.50 + z2*2.26)); dC0dz = zeta*(2.0*0.87 + z2*(4.0*0.5 + z2*6.0*2.26)); /*OK*/ fz = 0.5*(pow(1.0 + zeta, -4.0/3.0) + pow(1.0 - zeta, -4.0/3.0)); dfzdz = 0.5*(-4.0/3.0)*(pow(1.0 + zeta, -7.0/3.0) - pow(1.0 - zeta, -7.0/3.0)); /*OK*/ { /* Equation (14) */ double csi2 = csi*csi; double a = 1.0 + csi2*fz, a4 = pow(a, 4); *C = C0 / a4; *dCdcsi = -8.0*C0*csi*fz/(a*a4); /*added C OK*/ *dCdzeta = (dC0dz*a - C0*4.0*csi2*dfzdz)/(a*a4); /*OK*/ } } /* Equation (12) */ static void c_tpss_12(XC(func_type) *aux1, XC(func_type) *aux2, int nspin, const double *rho, const double *sigma, double dens, double zeta, double z, double *e_PKZB, double *de_PKZBdd, double *de_PKZBdsigma, double *de_PKZBdz) { /*some incoming variables: dens = rho[0] + rho[1] z = tau_w/tau zeta = (rho[0] - rho[1])/dens*/ double e_PBE, e_PBEup, e_PBEdn; double de_PBEdd[2], de_PBEdsigma[3], de_PBEddup[2], de_PBEdsigmaup[3], de_PBEdddn[2], de_PBEdsigmadn[3] ; double aux, zsq; double dzetadd[2], dcsidd[2], dcsidsigma[3]; double C, dCdcsi, dCdzeta; double densp[2], densp2[2], sigmatot[3], sigmaup[3], sigmadn[3]; int i; /*initialize dCdcsi and dCdzeta and the energy*/ dCdcsi = dCdzeta = 0.0; e_PBE = 0.0; e_PBEup = 0.0; e_PBEdn = 0.0; /* get the PBE stuff */ if(nspin== XC_UNPOLARIZED) { densp[0]=rho[0]/2.; densp[1]=rho[0]/2.; sigmatot[0] = sigma[0]/4.; sigmatot[1] = sigma[0]/4.; sigmatot[2] = sigma[0]/4.; }else{ densp[0] = rho[0]; densp[1] = rho[1]; sigmatot[0] = sigma[0]; sigmatot[1] = sigma[1]; sigmatot[2] = sigma[2]; } /* e_PBE */ XC(func_type) *auxfunc = (nspin == XC_UNPOLARIZED) ? aux2 : aux1; const int np = 1; XC(gga_exc_vxc)(auxfunc, np, densp, sigmatot, &e_PBE, de_PBEdd, de_PBEdsigma); densp2[0]=densp[0]; densp2[1]=0.0; if(nspin== XC_UNPOLARIZED) { sigmaup[0] = sigma[0]/4.; sigmaup[1] = 0.; sigmaup[2] = 0.; }else{ sigmaup[0] = sigma[0]; sigmaup[1] = 0.; sigmaup[2] = 0.; } /* e_PBE spin up */ XC(gga_exc_vxc)(auxfunc, np, densp2, sigmaup, &e_PBEup, de_PBEddup, de_PBEdsigmaup); densp2[0]=densp[1]; densp2[1]=0.0; if(nspin== XC_UNPOLARIZED) { sigmadn[0] = sigma[0]/4.; sigmadn[1] = 0.; sigmadn[2] = 0.; }else{ sigmadn[0] = sigma[2]; sigmadn[1] = 0.; sigmadn[2] = 0.; } /* e_PBE spin down */ XC(gga_exc_vxc)(auxfunc, np, densp2, sigmadn, &e_PBEdn, de_PBEdddn, de_PBEdsigmadn); /*get Eq. (13) and (14) for the polarized case*/ if(nspin == XC_UNPOLARIZED){ C = 0.53; dzetadd[0] = 0.0; dcsidd [0] = 0.0; dzetadd[1] = 0.0; dcsidd [1] = 0.0; for(i=0; i<3; i++) dcsidsigma[i] = 0.0; }else{ // initialize derivatives for(i=0; i<2; i++){ dzetadd[i] = 0.0; dcsidd [i] = 0.0;} for(i=0; i<3; i++) dcsidsigma[i] = 0.0; double num, gzeta, csi, a; /*numerator of csi: derive as grho all components and then square the 3 parts [2 (grho_a[0]n_b - grho_b[0]n_a) +2 (grho_a[1]n_b - grho_b[1]n_a) + 2 (grho_a[2]n_b - grho_b[2]n_a)]/(n_a+n_b)^2 -> 4 (sigma_aa n_b^2 - 2 sigma_ab n_a n_b + sigma_bb n_b^2)/(n_a+n_b)^2 */ num = sigma[0] * pow(rho[1],2) - 2.* sigma[1]*rho[0]*rho[1]+ sigma[2]*pow(rho[0],2); num = max(num, 1e-20); gzeta = sqrt(4*(num))/(dens*dens); gzeta = max(gzeta, MIN_GRAD); /*denominator of csi*/ a = 2*pow(3.0*M_PI*M_PI*dens, 1.0/3.0); csi = gzeta/a; c_tpss_14(csi, zeta, &C, &dCdcsi, &dCdzeta); dzetadd[0] = (1.0 - zeta)/dens; /*OK*/ dzetadd[1] = -(1.0 + zeta)/dens; /*OK*/ dcsidd [0] = 0.5*csi*(-2*sigma[1]*rho[1]+2*sigma[2]*rho[0])/num - 7./3.*csi/dens; /*OK*/ dcsidd [1] = 0.5*csi*(-2*sigma[1]*rho[0]+2*sigma[0]*rho[1])/num - 7./3.*csi/dens; /*OK*/ dcsidsigma[0]= csi*pow(rho[1],2)/(2*num); /*OK*/ dcsidsigma[1]= -csi*rho[0]*rho[1]/num; /*OK*/ dcsidsigma[2]= csi*pow(rho[0],2)/(2*num); /*OK*/ } aux = (densp[0] * max(e_PBEup, e_PBE) + densp[1] * max(e_PBEdn, e_PBE)) / dens; double dauxdd[2], dauxdsigma[3]; if(e_PBEup > e_PBE) { //case densp[0] * e_PBEup dauxdd[0] = de_PBEddup[0]; dauxdd[1] = 0.0; dauxdsigma[0] = de_PBEdsigmaup[0]; dauxdsigma[1] = 0.0; dauxdsigma[2] = 0.0; }else{ //case densp[0] * e_PBE dauxdd[0] = densp[0] / dens * (de_PBEdd[0] - e_PBE) + e_PBE; dauxdd[1] = densp[0] / dens * (de_PBEdd[1] - e_PBE); dauxdsigma[0] = densp[0] / dens * de_PBEdsigma[0]; dauxdsigma[1] = densp[0] / dens * de_PBEdsigma[1]; dauxdsigma[2] = densp[0] / dens * de_PBEdsigma[2]; } if(e_PBEdn > e_PBE) {//case densp[1] * e_PBEdn dauxdd[0] += 0.0; dauxdd[1] += de_PBEdddn[0]; dauxdsigma[0] += 0.0; dauxdsigma[1] += 0.0; dauxdsigma[2] += de_PBEdsigmadn[0]; }else{//case densp[1] * e_PBE dauxdd[0] += densp[1] / dens * (de_PBEdd[0] - e_PBE); dauxdd[1] += densp[1] / dens * (de_PBEdd[1] - e_PBE) + e_PBE; dauxdsigma[0] += densp[1] / dens * de_PBEdsigma[0]; dauxdsigma[1] += densp[1] / dens * de_PBEdsigma[1]; dauxdsigma[2] += densp[1] / dens * de_PBEdsigma[2]; } zsq=z*z; *e_PKZB = (e_PBE*(1.0 + C * zsq) - (1.0 + C) * zsq * aux); *de_PKZBdz = dens * e_PBE * C * 2*z - dens * (1.0 + C) * 2*z * aux; /*? think ok*/ double dCdd[2]; dCdd[0] = dCdzeta*dzetadd[0] + dCdcsi*dcsidd[0]; /*OK*/ dCdd[1] = dCdzeta*dzetadd[1] + dCdcsi*dcsidd[1]; /*OK*/ /* partial derivatives*/ de_PKZBdd[0] = de_PBEdd[0] * (1.0 + C*zsq) + dens * e_PBE * dCdd[0] * zsq - zsq * (dens*dCdd[0] * aux + (1.0 + C) * dauxdd[0]); de_PKZBdd[1] = de_PBEdd[1] * (1.0 + C*zsq) + dens * e_PBE * dCdd[1] * zsq - zsq * (dens*dCdd[1] * aux + (1.0 + C) * dauxdd[1]); int nder = (nspin==XC_UNPOLARIZED) ? 1 : 3; for(i=0; icommon.nspin; dens = rho[0]; tautr = tau[0]; grad = sigma[0]; if(nspin == XC_POLARIZED) { zeta = (rho[0]-rho[1])/(rho[0]+rho[1]); dens += rho[1]; tautr += tau[1]; grad += (2*sigma[1] + sigma[2]); } else zeta = 0.0; grad = max(MIN_GRAD*MIN_GRAD, grad); tauw = max(grad/(8.0*dens), 1.0e-12); taut = max(tautr, tauw); z = tauw/taut; double sigmatmp[3]; sigmatmp[0] = max(MIN_GRAD*MIN_GRAD, sigma[0]); sigmatmp[1] = 0.0; sigmatmp[2] = 0.0; if(nspin == XC_POLARIZED) { //sigma[1] = max(MIN_GRAD*MIN_GRAD, sigma[1]); sigmatmp[1] = sigma[1]; sigmatmp[2] = max(MIN_GRAD*MIN_GRAD, sigma[2]); } /* Equation (12) */ c_tpss_12(par->c_aux1, par->c_aux2, nspin, rho, sigmatmp, dens, zeta, z, &e_PKZB, de_PKZBdd, de_PKZBdsigma, &de_PKZBdz); /* Equation (11) */ { double z2 = z*z, z3 = z2*z; double dedz; double dzdd[2], dzdsigma[3], dzdtau; if(tauw >= tautr || fabs(tauw- tautr)< 1.0e-10){ dzdtau = 0.0; dzdd[0] = 0.0; dzdd[1] = 0.0; dzdsigma[0] = 0.0; dzdsigma[1] = 0.0; dzdsigma[2] = 0.0; }else{ dzdtau = -z/taut; dzdd[0] = - z/dens; dzdd[1] = 0.0; if (nspin == XC_POLARIZED) dzdd[1] = - z/dens; dzdsigma[0] = 1.0/(8*dens*taut); dzdsigma[1] = 0.0; dzdsigma[2] = 0.0; if (nspin == XC_POLARIZED) { dzdsigma[1] = 2.0/(8*dens*taut); dzdsigma[2] = 1.0/(8*dens*taut); } } *energy = e_PKZB * (1.0 + d*e_PKZB*z3); /* due to the definition of na and nb in libxc.c we need to divide by (na+nb) to recover the * same energy for polarized and unpolarized calculation with the same total density */ if(nspin == XC_UNPOLARIZED) *energy *= dens/(rho[0]+rho[1]); dedz = de_PKZBdz*(1.0 + 2.0*d*e_PKZB*z3) + dens*e_PKZB * e_PKZB * d * 3.0*z2; for(is=0; isx_aux = (XC(func_type) *) malloc(sizeof(XC(func_type))); XC(func_init)(par->x_aux, XC_LDA_X, XC_UNPOLARIZED); par->c_aux1 = (XC(func_type) *) malloc(sizeof(XC(func_type))); par->c_aux2 = (XC(func_type) *) malloc(sizeof(XC(func_type))); XC(func_init)(par->c_aux1, XC_GGA_C_PBE, par->common.nspin); XC(func_init)(par->c_aux2, XC_GGA_C_PBE, XC_POLARIZED); } static void tpss_end(void *p) { tpss_params *par = (tpss_params*)p; XC(func_end)(par->x_aux); free(par->x_aux); XC(func_end)(par->c_aux1); XC(func_end)(par->c_aux2); free(par->c_aux1); free(par->c_aux2); } const mgga_func_info tpss_info = { sizeof(tpss_params), &tpss_init, &tpss_end, &XC(mgga_x_tpss), &XC(mgga_c_tpss) }; gpaw-24.1.0/c/xc/vdw.c000066400000000000000000000143161454550013000143360ustar00rootroot00000000000000/* Copyright (C) 2003-2007 CAMP * Copyright (C) 2007-2009 CAMd * Please see the accompanying LICENSE file for further information. */ #include "../extensions.h" double vdwkernel(double D, double d1, double d2, int nD, int ndelta, double dD, double ddelta, const double (*phi)[nD]) { if (D < 1e-10) return phi[0][0]; double y = D / dD; int j = (int)y; double e12; if (j >= nD - 1) { double d12 = d1 * d1; double d22 = d2 * d2; const double C = -1024.0 / 243.0 * M_PI * M_PI * M_PI * M_PI; e12 = C / (d12 * d22 * (d12 + d22)); } else { double x = fabs(0.5 * (d1 - d2) / D) / ddelta; int i = (int)x; if (i >= ndelta - 1) { i = ndelta - 2; x = 1.0; } else x -= i; y -= j; e12 = ((x * y * phi[i + 1][j + 1] + x * (1.0 - y) * phi[i + 1][j ] + (1.0 - x) * y * phi[i ][j + 1] + (1.0 - x) * (1.0 - y) * phi[i ][j ])); } return e12; } PyObject * vdw(PyObject* self, PyObject *args) { PyArrayObject* n_obj; PyArrayObject* q0_obj; PyArrayObject* R_obj; PyArrayObject* cell_obj; PyArrayObject* pbc_obj; PyArrayObject* repeat_obj; PyArrayObject* phi_obj; double ddelta; double dD; int iA; int iB; PyArrayObject* rhistogram_obj; double drhist; PyArrayObject* Dhistogram_obj; double dDhist; if (!PyArg_ParseTuple(args, "OOOOOOOddiiOdOd", &n_obj, &q0_obj, &R_obj, &cell_obj, &pbc_obj, &repeat_obj, &phi_obj, &ddelta, &dD, &iA, &iB, &rhistogram_obj, &drhist, &Dhistogram_obj, &dDhist)) return NULL; int ndelta = PyArray_DIMS(phi_obj)[0]; int nD = PyArray_DIMS(phi_obj)[1]; const double* n = (const double*)DOUBLEP(n_obj); const int ni = PyArray_SIZE(n_obj); const double* q0 = (const double*)DOUBLEP(q0_obj); const double (*R)[3] = (const double (*)[3])DOUBLEP(R_obj); const double* cell = (const double*)DOUBLEP(cell_obj); const char* pbc = (const char*)(PyArray_DATA(pbc_obj)); const long* repeat = (const long*)(PyArray_DATA(repeat_obj)); const double (*phi)[nD] = (const double (*)[nD])DOUBLEP(phi_obj); double* rhistogram = (double*)DOUBLEP(rhistogram_obj); double* Dhistogram = (double*)DOUBLEP(Dhistogram_obj); int nbinsr = PyArray_DIMS(rhistogram_obj)[0]; int nbinsD = PyArray_DIMS(Dhistogram_obj)[0]; double energy = 0.0; if (repeat[0] == 0 && repeat[1] == 0 && repeat[2] == 0) for (int i1 = iA; i1 < iB; i1++) { const double* R1 = R[i1]; double q01 = q0[i1]; for (int i2 = 0; i2 <= i1; i2++) { double rr = 0.0; for (int c = 0; c < 3; c++) { double f = R[i2][c] - R1[c]; if (pbc[c]) f = fmod(f + 1.5 * cell[c], cell[c]) - 0.5 * cell[c]; rr += f * f; } double r = sqrt(rr); double d1 = r * q01; double d2 = r * q0[i2]; double D = 0.5 * (d1 + d2); double e12 = (vdwkernel(D, d1, d2, nD, ndelta, dD, ddelta, phi) * n[i1] * n[i2]); if (i1 == i2) e12 /= 2.0; int bin = (int)(r / drhist); if (bin < nbinsr) rhistogram[bin] += e12; bin = (int)(D / dDhist); if (bin < nbinsD) Dhistogram[bin] += e12; energy += e12; } } else for (int i1 = iA; i1 < iB; i1++) { const double* R1 = R[i1]; double q01 = q0[i1]; for (int a1 = -repeat[0]; a1 <= repeat[0]; a1++) for (int a2 = -repeat[1]; a2 <= repeat[1]; a2++) for (int a3 = -repeat[2]; a3 <= repeat[2]; a3++) { double x = 0.5; int i2max = ni-1; if (a1 == 0 && a2 == 0 && a3 == 0) { i2max = i1; x = 1.0; } double R1a[3] = {R1[0] + a1 * cell[0], R1[1] + a2 * cell[1], R1[2] + a3 * cell[2]}; for (int i2 = 0; i2 <= i2max; i2++) { double rr = 0.0; for (int c = 0; c < 3; c++) { double f = R[i2][c] - R1a[c]; rr += f * f; } double r = sqrt(rr); double d1 = r * q01; double d2 = r * q0[i2]; double D = 0.5 * (d1 + d2); double e12 = (vdwkernel(D, d1, d2, nD, ndelta, dD, ddelta, phi) * n[i1] * n[i2] * x); int bin = (int)(r / drhist); if (bin < nbinsr) rhistogram[bin] += e12; bin = (int)(D / dDhist); if (bin < nbinsD) Dhistogram[bin] += e12; energy += e12; } } } return PyFloat_FromDouble(energy); } PyObject * vdw2(PyObject* self, PyObject *args) { PyArrayObject* phi_jp_obj; PyArrayObject* j_k_obj; PyArrayObject* dk_k_obj; PyArrayObject* theta_k_obj; PyArrayObject* F_k_obj; if (!PyArg_ParseTuple(args, "OOOOO", &phi_jp_obj, &j_k_obj, &dk_k_obj, &theta_k_obj, &F_k_obj)) return NULL; const double* phi_jp = (const double*)PyArray_DATA(phi_jp_obj); const long* j_k = (const long*)PyArray_DATA(j_k_obj); const double* dk_k = (const double*)PyArray_DATA(dk_k_obj); const complex double* theta_k = (const complex double*)PyArray_DATA(theta_k_obj); complex double* F_k = (complex double*)PyArray_DATA(F_k_obj); int nk = PyArray_SIZE(j_k_obj); for (int k = 0; k < nk; k++) { const double* phi_p = phi_jp + 4 * j_k[k]; double a = phi_p[0]; double b = phi_p[1]; double c = phi_p[2]; double d = phi_p[3]; double x = dk_k[k]; F_k[k] += theta_k[k] * (a + x * (b + x * (c + x * d))); } Py_RETURN_NONE; } gpaw-24.1.0/c/xc/xc.c000066400000000000000000000215261454550013000141510ustar00rootroot00000000000000/* Copyright (C) 2003-2007 CAMP * Copyright (C) 2007-2009 CAMd * Please see the accompanying LICENSE file for further information. */ #include #define PY_ARRAY_UNIQUE_SYMBOL GPAW_ARRAY_API #define NO_IMPORT_ARRAY #include #include "xc_gpaw.h" #include "../extensions.h" // // __ 2 // a2 = |\/n| // // dE // dedrs = --- // dr // s // // dE // deda2 = --------- // __ 2 // d(|\/n| ) // #ifndef GPAW_WITHOUT_LIBXC void init_mgga(void** params, int code, int nspin); void calc_mgga(void** params, int nspin, int ng, const double* n_g, const double* sigma_g, const double* tau_g, double *e_g, double *v_g, double *dedsigma_g, double *dedtau_g); #endif double pbe_exchange(const xc_parameters* par, double n, double rs, double a2, double* dedrs, double* deda2); double pbe_correlation(double n, double rs, double zeta, double a2, bool gga, bool spinpol, double* dedrs, double* dedzeta, double* deda2); double pw91_exchange(const xc_parameters* par, double n, double rs, double a2, double* dedrs, double* deda2); double pw91_correlation(double n, double rs, double zeta, double a2, bool gga, bool spinpol, double* dedrs, double* dedzeta, double* deda2); double rpbe_exchange(const xc_parameters* par, double n, double rs, double a2, double* dedrs, double* deda2); double beefvdw_exchange(const xc_parameters* par, double n, double rs, double a2, double* dedrs, double* deda2); // typedef struct { PyObject_HEAD double (*exchange)(const xc_parameters* par, double n, double rs, double a2, double* dedrs, double* deda2); double (*correlation)(double n, double rs, double zeta, double a2, bool gga, bool spinpol, double* dedrs, double* dedzeta, double* deda2); xc_parameters par; // below added by cpo for mgga functionals outside of libxc (TPSS, M06L, etc.) void* mgga; } XCFunctionalObject; static void XCFunctional_dealloc(XCFunctionalObject *self) { PyObject_DEL(self); } static PyObject* XCFunctional_calculate(XCFunctionalObject *self, PyObject *args) { PyArrayObject* e_array; PyArrayObject* n_array; PyArrayObject* v_array; PyArrayObject* sigma_array = 0; PyArrayObject* dedsigma_array = 0; PyArrayObject* tau_array = 0; PyArrayObject* dedtau_array = 0; if (!PyArg_ParseTuple(args, "OOO|OOOO", &e_array, &n_array, &v_array, &sigma_array, &dedsigma_array, &tau_array, &dedtau_array)) return NULL; int ng = 1; for (int d = 0; d < PyArray_NDIM(e_array); d++) ng *= PyArray_DIM(e_array, d); xc_parameters* par = &self->par; double* e_g = DOUBLEP(e_array); const double* n_g = DOUBLEP(n_array); double* v_g = DOUBLEP(v_array); const double* sigma_g = 0; double* dedsigma_g = 0; if (par->gga) { sigma_g = DOUBLEP(sigma_array); dedsigma_g = DOUBLEP(dedsigma_array); } #ifndef GPAW_WITHOUT_LIBXC const double* tau_g = 0; double* dedtau_g = 0; if (self->mgga) { tau_g = DOUBLEP(tau_array); dedtau_g = DOUBLEP(dedtau_array); int nspin = PyArray_DIM(n_array, 0) == 1 ? 1 : 2; calc_mgga(&self->mgga, nspin, ng, n_g, sigma_g, tau_g, e_g, v_g, dedsigma_g, dedtau_g); Py_RETURN_NONE; } #endif if (PyArray_DIM(n_array, 0) == 1) { for (int g = 0; g < ng; g++) { double n = n_g[g]; if (n < NMIN) n = NMIN; double rs = pow(C0I / n, THIRD); double dexdrs; double dexda2; double ex; double decdrs; double decda2; double ec; if (par->gga) { double a2 = sigma_g[g]; ex = self->exchange(par, n, rs, a2, &dexdrs, &dexda2); ec = self->correlation(n, rs, 0.0, a2, 1, 0, &decdrs, 0, &decda2); dedsigma_g[g] = n * (dexda2 + decda2); } else { ex = self->exchange(par, n, rs, 0.0, &dexdrs, 0); ec = self->correlation(n, rs, 0.0, 0.0, 0, 0, &decdrs, 0, 0); } e_g[g] = n * (ex + ec); v_g[g] += ex + ec - rs * (dexdrs + decdrs) / 3.0; } } else { const double* na_g = n_g; double* va_g = v_g; const double* nb_g = na_g + ng; double* vb_g = va_g + ng; const double* sigma0_g = 0; const double* sigma1_g = 0; const double* sigma2_g = 0; double* dedsigma0_g = 0; double* dedsigma1_g = 0; double* dedsigma2_g = 0; const xc_parameters* par = &self->par; if (par->gga) { sigma0_g = sigma_g; sigma1_g = sigma0_g + ng; sigma2_g = sigma1_g + ng; dedsigma0_g = dedsigma_g; dedsigma1_g = dedsigma0_g + ng; dedsigma2_g = dedsigma1_g + ng; } for (int g = 0; g < ng; g++) { double na = 2.0 * na_g[g]; if (na < NMIN) na = NMIN; double rsa = pow(C0I / na, THIRD); double nb = 2.0 * nb_g[g]; if (nb < NMIN) nb = NMIN; double rsb = pow(C0I / nb, THIRD); double n = 0.5 * (na + nb); double rs = pow(C0I / n, THIRD); double zeta = 0.5 * (na - nb) / n; double dexadrs; double dexada2; double exa; double dexbdrs; double dexbda2; double exb; double decdrs; double decdzeta; double decda2; double ec; if (par->gga) { exa = self->exchange(par, na, rsa, 4.0 * sigma0_g[g], &dexadrs, &dexada2); exb = self->exchange(par, nb, rsb, 4.0 * sigma2_g[g], &dexbdrs, &dexbda2); double a2 = sigma0_g[g] + 2 * sigma1_g[g] + sigma2_g[g]; ec = self->correlation(n, rs, zeta, a2, 1, 1, &decdrs, &decdzeta, &decda2); dedsigma0_g[g] = 2 * na * dexada2 + n * decda2; dedsigma1_g[g] = 2 * n * decda2; dedsigma2_g[g] = 2 * nb * dexbda2 + n * decda2; } else { exa = self->exchange(par, na, rsa, 0.0, &dexadrs, 0); exb = self->exchange(par, nb, rsb, 0.0, &dexbdrs, 0); ec = self->correlation(n, rs, zeta, 0.0, 0, 1, &decdrs, &decdzeta, 0); } e_g[g] = 0.5 * (na * exa + nb * exb) + n * ec; va_g[g] += (exa + ec - (rsa * dexadrs + rs * decdrs) / 3.0 - (zeta - 1.0) * decdzeta); vb_g[g] += (exb + ec - (rsb * dexbdrs + rs * decdrs) / 3.0 - (zeta + 1.0) * decdzeta); } } Py_RETURN_NONE; } static PyMethodDef XCFunctional_Methods[] = { {"calculate", (PyCFunction)XCFunctional_calculate, METH_VARARGS, 0}, {NULL, NULL, 0, NULL} }; PyTypeObject XCFunctionalType = { PyVarObject_HEAD_INIT(NULL, 0) "XCFunctional", sizeof(XCFunctionalObject), 0, (destructor)XCFunctional_dealloc, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, "XC object", 0, 0, 0, 0, 0, 0, XCFunctional_Methods }; PyObject * NewXCFunctionalObject(PyObject *obj, PyObject *args) { int code; PyArrayObject* parameters = 0; if (!PyArg_ParseTuple(args, "i|O", &code, ¶meters)) return NULL; XCFunctionalObject *self = PyObject_NEW(XCFunctionalObject, &XCFunctionalType); if (self == NULL) return NULL; self->mgga = NULL; self->par.gga = 1; self->correlation = pbe_correlation; self->exchange = pbe_exchange; if (code == -1) { // LDA self->par.gga = 0; } else if (code == 0) { // PBE self->par.kappa = 0.804; } else if (code == 1) { // revPBE self->par.kappa = 1.245; } else if (code == 2) { // RPBE self->exchange = rpbe_exchange; } else if (code == 14) { // PW91 self->exchange = pw91_exchange; } #ifndef GPAW_WITHOUT_LIBXC else if (code == 20 || code == 21 || code == 22) { // MGGA const int nspin = 1; // a guess, perhaps corrected later in calc_mgga init_mgga(&self->mgga,code,nspin); } #endif else { assert (code == 17); // BEEF-vdW self->exchange = beefvdw_exchange; int n = PyArray_DIM(parameters, 0); assert(n <= 110); double* p = (double*)PyArray_BYTES(parameters); for (int i = 0; i < n; i++) self->par.parameters[i] = p[i]; self->par.nparameters = n / 2; } return (PyObject*)self; } gpaw-24.1.0/c/xc/xc_gpaw.h000066400000000000000000000017601454550013000151720ustar00rootroot00000000000000/* Copyright (C) 2003-2007 CAMP * Copyright (C) 2007-2009 CAMd * Please see the accompanying LICENSE file for further information. */ #ifndef _XC_GPAW_H #define _XC_GPAW_H /* BETA = 0.066725 MU = BETA * pi * pi / 3 C2 = (1 / (18 * pi)**(1 / 3)) C0I = 3 / (4 * pi) C1 = -9 / (8 * pi) * (2 * pi / 3)**(1 / 3) CC1 = 1 / (2**(4 / 3) - 2) CC2 = 4 * CC1 / 3 IF2 = 3 / (2 * CC2); C3 = pi * (4 / (9 * pi))**(1 / 3) / 16 C0 = 4 * pi / 3 */ #define BETA 0.066725 #define GAMMA 0.031091 #define MU 0.2195164512208958 #define C2 0.26053088059892404 #define C0I 0.238732414637843 #define C1 -0.45816529328314287 #define CC1 1.9236610509315362 #define CC2 2.5648814012420482 #define IF2 0.58482236226346462 #define C3 0.10231023756535741 #define C0 4.1887902047863905 #define THIRD 0.33333333333333333 #define NMIN 1.0E-10 typedef int bool; typedef struct { bool gga; double kappa; int nparameters; double parameters[110]; } xc_parameters; #endif /* _XC_GPAW_H */ gpaw-24.1.0/c/xc/xc_mgga.c000066400000000000000000000074241454550013000151450ustar00rootroot00000000000000 #include #include #include #include "xc_mgga.h" #include "xc_gpaw.h" extern const mgga_func_info m06l_info; extern const mgga_func_info tpss_info; extern const mgga_func_info revtpss_info; static void init_common(common_params* params, int code, int nspin, const mgga_func_info *finfo) { params->code = code; params->nspin = nspin; params->funcinfo = finfo; } void init_mgga(void** params, int code, int nspin) { const mgga_func_info *finfo; if (code==20) { finfo = &tpss_info; } else if (code==21) { finfo = &m06l_info; } else if (code==22) { finfo = &revtpss_info; } else { // this should never happen. forces a crash. assert(code>=20 && code <=22); finfo = NULL; } *params = malloc(finfo->size); init_common(*params, code, nspin, finfo); finfo->init(*params); } void end_mgga(common_params *common) { common->funcinfo->end(common); free(common); } void calc_mgga(void** params, int nspin, int ng, const double* n_g, const double* sigma_g, const double* tau_g, double *e_g, double *v_g, double *dedsigma_g, double *dedtau_g) { common_params *common = (common_params*)*params; // check for a changed spin (similar to a line in gpaw/libxc.py) if (nspin!=common->nspin) { int code = common->code; // save this, since we're about to destroy common end_mgga(common); init_mgga(params, code, nspin); common = (common_params*)*params; // init_mgga changes this } if (nspin == 1) { for (int g = 0; g < ng; g++) { // kludge n[1] because of the way TPSS was written (requires n[1]=0.0 even for unpolarized) double n[2]; n[0] = n_g[g]; n[1] = 0.0; if (n[0] < NMIN) n[0] = NMIN; // m06l is assuming that there is space for spinpolarized calculation output // even for non-spin-polarized. double etmp, vtmp[2], dedsigmatmp[3], dedtautmp[2]; common->funcinfo->exch(*params, n, sigma_g+g, tau_g+g, &etmp, vtmp, dedsigmatmp, dedtautmp); e_g[g] = etmp; v_g[g] += vtmp[0]; dedsigma_g[g] = dedsigmatmp[0]; dedtau_g[g] = dedtautmp[0]; common->funcinfo->corr(*params, n, sigma_g+g, tau_g+g, &etmp, vtmp, dedsigmatmp, dedtautmp); e_g[g] += etmp; e_g[g] *= n[0]; v_g[g] += vtmp[0]; dedsigma_g[g] += dedsigmatmp[0]; dedtau_g[g] += dedtautmp[0]; } } else { double etmp, ntmp[2], vtmp[2], sigmatmp[3], dedsigmatmp[3], tautmp[2], dedtautmp[2]; for (int g = 0; g < ng; g++) { ntmp[0] = n_g[g]; if (ntmp[0] < NMIN) ntmp[0] = NMIN; ntmp[1] = n_g[g+ng]; if (ntmp[1] < NMIN) ntmp[1] = NMIN; sigmatmp[0] = sigma_g[g]; sigmatmp[1] = sigma_g[g+ng]; sigmatmp[2] = sigma_g[g+ng+ng]; tautmp[0] = tau_g[g]; tautmp[1] = tau_g[g+ng]; // kludge: mgga_x_tpss requires dedsigma[1] set to 0, since it doesn't calculate it. dedsigmatmp[1]=0.0; common->funcinfo->exch(*params, ntmp, sigmatmp, tautmp, &etmp, vtmp, dedsigmatmp, dedtautmp); e_g[g] = etmp; v_g[g] += vtmp[0]; v_g[g+ng] += vtmp[1]; dedsigma_g[g] = dedsigmatmp[0]; dedsigma_g[g+ng] = dedsigmatmp[1]; dedsigma_g[g+ng+ng] = dedsigmatmp[2]; dedtau_g[g] = dedtautmp[0]; dedtau_g[g+ng] = dedtautmp[1]; common->funcinfo->corr(*params, ntmp, sigmatmp, tautmp, &etmp, vtmp, dedsigmatmp, dedtautmp); e_g[g] += etmp; e_g[g] *= ntmp[0]+ntmp[1]; v_g[g] += vtmp[0]; v_g[g+ng] += vtmp[1]; dedsigma_g[g] += dedsigmatmp[0]; dedsigma_g[g+ng] += dedsigmatmp[1]; dedsigma_g[g+ng+ng] += dedsigmatmp[2]; dedtau_g[g] += dedtautmp[0]; dedtau_g[g+ng] += dedtautmp[1]; } } } gpaw-24.1.0/c/xc/xc_mgga.h000066400000000000000000000021311454550013000151400ustar00rootroot00000000000000 #ifndef GPAW_XC_MGGA_H #define GPAW_XC_MGGA_H #define M_PI 3.14159265358979323846 #define MIN_DENS 1.0e-20 #define MIN_GRAD 1.0e-20 #define max(x,y) ((x os.stat(o)[ST_MTIME]: print('removing', o) os.remove(o) remove = True so = 'build/lib.{}/_gpaw.so'.format(plat) if os.path.exists(so) and remove: # Remove shared object C-extension: # print 'removing', so os.remove(so) def write_configuration(define_macros, include_dirs, libraries, library_dirs, extra_link_args, extra_compile_args, runtime_library_dirs, extra_objects, compiler): # Write the compilation configuration into a file try: out = open('configuration.log', 'w') except IOError as x: print(x) return print("Current configuration", file=out) print("compiler", compiler, file=out) print("libraries", libraries, file=out) print("library_dirs", library_dirs, file=out) print("include_dirs", include_dirs, file=out) print("define_macros", define_macros, file=out) print("extra_link_args", extra_link_args, file=out) print("extra_compile_args", extra_compile_args, file=out) print("runtime_library_dirs", runtime_library_dirs, file=out) print("extra_objects", extra_objects, file=out) out.close() def build_interpreter( compiler, extension, extension_objects, *, build_temp, build_bin, debug): exename = compiler.executable_filename('gpaw-python') print(f'building {repr(exename)} executable', flush=True) macros = extension.define_macros.copy() for undef in extension.undef_macros: macros.append((undef,)) # Compile the sources that define GPAW_INTERPRETER sources = ['c/main.c'] objects = compiler.compile( sources, output_dir=str(build_temp), macros=macros, include_dirs=extension.include_dirs, debug=debug, extra_postargs=extension.extra_compile_args) objects += extension_objects # Note: LDFLAGS and LIBS go together, but depending on the platform, # it might be unnecessary to include them extra_preargs = config_args('LDFLAGS') extra_postargs = (config_args('BLDLIBRARY') + config_args('LIBS') + config_args('LIBM') + extension.extra_link_args + config_args('LINKFORSHARED')) # Link the custom interpreter compiler.link_executable( objects, exename, output_dir=str(build_bin), extra_preargs=extra_preargs, libraries=extension.libraries, library_dirs=extension.library_dirs, runtime_library_dirs=extension.runtime_library_dirs, extra_postargs=extra_postargs, debug=debug, target_lang=extension.language) return build_bin / exename def build_gpu(gpu_compiler, gpu_compile_args, gpu_include_dirs, define_macros, undef_macros, build_temp): print('building gpu kernels', flush=True) kernels_dpath = Path('c/gpu/kernels') # Create temp build directory build_temp_kernels_dpath = build_temp / kernels_dpath if not build_temp_kernels_dpath.exists(): print(f'creating {build_temp_kernels_dpath}', flush=True) build_temp_kernels_dpath.mkdir(parents=True) # Glob all kernel files, but remove those included by other kernels kernels = sorted(kernels_dpath.glob('*.cpp')) for name in ['interpolate-stencil.cpp', 'lfc-reduce.cpp', 'lfc-reduce-kernel.cpp', 'reduce.cpp', 'reduce-kernel.cpp', 'restrict-stencil.cpp']: kernels.remove(kernels_dpath / name) # Compile GPU kernels objects = [] for src in kernels: obj = build_temp / src.with_suffix('.o') objects.append(str(obj)) run_args = [gpu_compiler] run_args += gpu_compile_args for (name, value) in define_macros: arg = f'-D{name}' if value is not None: arg += f'={value}' run_args += [arg] run_args += [f'-U{name}' for name in undef_macros] run_args += [f'-I{dpath}' for dpath in gpu_include_dirs] run_args += ['-c', str(src)] run_args += ['-o', str(obj)] print(shlex.join(run_args), flush=True) p = run(run_args, check=False, shell=False) if p.returncode != 0: print(f'error: command {repr(gpu_compiler)} failed ' f'with exit code {p.returncode}', file=sys.stderr, flush=True) sys.exit(1) return objects gpaw-24.1.0/doc/000077500000000000000000000000001454550013000132765ustar00rootroot00000000000000gpaw-24.1.0/doc/GPAW.bib000066400000000000000000000135751454550013000145250ustar00rootroot00000000000000@article{Mortensen2005, author = {Mortensen, J. J. and Hansen, L. B. and Jacobsen, K. W.}, title = {Real-space grid implementation of the projector augmented wave method}, journal = {Phys. Rev. B}, volume = {71}, number = {3}, pages = {035109}, year = {2005}, doi = {10.1103/PhysRevB.71.035109} } @article{Enkovaara2010, author = {Enkovaara, J. and Rostgaard, C. and Mortensen, J. J. and Chen, J. and Du{\l}ak, M. and Ferrighi, L. and Gavnholt, J. and Glinsvad, C. and Haikola, V. and Hansen, H. A. and Kristoffersen, H. H. and Kuisma, M. and Larsen, A. H. and Lehtovaara, L. and Ljungberg, M. and Lopez-Acevedo, O. and Moses, P. G. and Ojanen, J. and Olsen, T. and Petzold, V. and Romero, N. A. and Stausholm-M{\o}ller, J. and Strange, M. and Tritsaris, G. A. and Vanin, M. and Walter, M. and Hammer, B. and H{\"a}kkinen, H. and Madsen, G. K. H. and Nieminen, R. M. and N{\o}rskov, J. K. and Puska, M. and Rantala, T. T. and Schi{\o}tz, J. and Thygesen, K. S. and Jacobsen, K. W.}, title = {Electronic structure calculations with {GPAW}: a real-space implementation of the projector augmented-wave method}, journal = {J. Phys.: Condens. Matter}, volume = {22}, number = {25}, pages = {253202}, year = {2010}, doi = {10.1088/0953-8984/22/25/253202} } @article{Lehtola2018, author = {Susi Lehtola and Conrad Steigemann and Micael J. T. Oliveira and Miguel A. L. Marques}, title = {Recent developments in libxc -- A comprehensive library of functionals for density functional theory}, journal = {SoftwareX}, volume = {7}, pages = {1-5}, year = {2018}, issn = {2352-7110}, url = {https://www.sciencedirect.com/science/article/pii/S2352711017300602}, keywords = {Density functional theory, Exchange–correlation, Local density approximations, Generalized gradient approximations, meta-GGA approximations}, abstract = {libxc is a library of exchange–correlation functionals for density-functional theory. We are concerned with semi-local functionals (or the semi-local part of hybrid functionals), namely local-density approximations, generalized-gradient approximations, and meta-generalized-gradient approximations. Currently we include around 400 functionals for the exchange, correlation, and the kinetic energy, spanning more than 50 years of research. Moreover, libxc is by now used by more than 20 codes, not only from the atomic, molecular, and solid-state physics, but also from the quantum chemistry communities.}, doi = {10.1016/j.softx.2017.11.002} } @article{Walter2008, author = {Walter, Michael and H{\"a}kkinen, Hannu and Lehtovaara, Lauri and Puska, Martti and Enkovaara, Jussi and Rostgaard, Carsten and Mortensen, Jens J{\o}rgen}, title = {Time-dependent density-functional theory in the projector augmented-wave method}, journal = {J. Chem. Phys.}, volume = {128}, number = {24}, pages = {244101}, year = {2008}, doi = {10.1063/1.2943138} } @article{Larsen2009, author = {Larsen, A. H. and Vanin, M. and Mortensen, J. J. and Thygesen, K. S. and Jacobsen, K. W.}, title = {Localized atomic basis set in the projector augmented wave method}, journal = {Phys. Rev. B}, volume = {80}, number = {19}, pages = {195112}, year = {2009}, doi = {10.1103/PhysRevB.80.195112} } @article{Yan2011, author = {Yan, Jun and Mortensen, Jens J. and Jacobsen, Karsten W. and Thygesen, Kristian S.}, title = {Linear density response function in the projector augmented wave method: Applications to solids, surfaces, and interfaces}, journal = {Phys. Rev. B}, volume = {83}, number = {24}, pages = {245122}, year = {2011}, doi = {10.1103/PhysRevB.83.245122} } @article{Huser2013, author = {H\"user, Falco and Olsen, Thomas and Thygesen, Kristian S.}, title = {Quasiparticle GW calculations for solids, molecules, and two-dimensional materials}, journal = {Phys. Rev. B}, volume = {87}, number = {23}, pages = {235132}, year = {2013}, doi = {10.1103/PhysRevB.87.235132} } @article{Held2014, author = {Held, Alexander and Walter, Michael}, title = {Simplified continuum solvent model with a smooth cavity based on volumetric data}, journal = {J. Chem. Phys.}, volume = {141}, number = {17}, pages = {174108}, year = {2014}, doi = {10.1063/1.4900838} } @article{Kuisma2015, author = {Kuisma, M. and Sakko, A. and Rossi, T. P. and Larsen, A. H. and Enkovaara, J. and Lehtovaara, L. and Rantala, T. T.}, title = {Localized surface plasmon resonance in silver nanoparticles: Atomistic first-principles time-dependent density-functional theory calculations}, journal = {Phys. Rev. B}, volume = {91}, number = {11}, pages = {115431}, year = {2015}, doi = {10.1103/PhysRevB.91.115431} } @article{Rossi2017, author = {Rossi, Tuomas P. and Kuisma, Mikael and Puska, Martti J. and Nieminen, Risto M. and Erhart, Paul}, title = {Kohn--Sham Decomposition in Real-Time Time-Dependent Density-Functional Theory: An Efficient Tool for Analyzing Plasmonic Excitations}, journal = {J. Chem. Theory Comput.}, volume = {13}, number = {10}, pages = {4779-4790}, year = {2017}, doi = {10.1021/acs.jctc.7b00589} } @article{Kastlunger2018, author = {Kastlunger, Georg and Lindgren, Per and Peterson, Andrew A.}, title = {Controlled-Potential Simulation of Elementary Electrochemical Reactions: Proton Discharge on Metal Surfaces}, journal = {The Journal of Physical Chemistry C}, volume = {122}, number = {24}, pages = {12771-12781}, year = 2018, doi = {10.1021/acs.jpcc.8b02465}, } gpaw-24.1.0/doc/Makefile000066400000000000000000000037471454550013000147510ustar00rootroot00000000000000# Makefile for Sphinx documentation # # You can set these variables from the command line. SPHINXOPTS = -n SPHINXBUILD = python3 -d -m sphinx PAPER = BUILDDIR = build # User-friendly check for sphinx-build # ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) # $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) # endif # Internal variables. PAPEROPT_a4 = -D latex_paper_size=a4 PAPEROPT_letter = -D latex_paper_size=letter ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . # the i18n builder cannot share the environment and doctrees with the others I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . .PHONY: help clean html linkcheck browse html: $(SPHINXBUILD) -b html -W --keep-going $(ALLSPHINXOPTS) $(BUILDDIR)/html @echo @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." help: @echo "Use \`make ' where is one of" @echo " html to make standalone HTML files" @echo " linkcheck to check all external links for integrity" @echo " clean to clean up" @echo " browse to open browser" @echo " doctest to run doctest" clean: rm -rf $(BUILDDIR)/* python3 -m ase.utils.sphinx clean linkcheck: $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck @echo @echo "Link check complete; look for any errors in the above output " \ "or in $(BUILDDIR)/linkcheck/output.txt." browse: firefox build/html/index.html doctest: $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest tutorialsexercises/structureoptimization/gettingstarted/gettingstarted.rst documentation/core.rst index.rst spelling: $(SPHINXBUILD) -b spelling $(ALLSPHINXOPTS) $(BUILDDIR)/spelling gpaw-24.1.0/doc/algorithms.rst000066400000000000000000000137071454550013000162110ustar00rootroot00000000000000.. _features and algorithms: ======================= Features and algorithms ======================= Quick links to all features: .. list-table:: * - :ref:`Plane-waves ` - :ref:`Finite-difference ` - :ref:`LCAO ` * - :ref:`XC-functionals ` - :ref:`DFT+U ` - :ref:`GLLB-SC ` * - :ref:`DOS ` - :ref:`STM ` - :ref:`Wannier functions ` * - :ref:`delta-SCF ` - :ref:`XAS ` - :ref:`Jellium ` * - :ref:`TDDFT ` - :ref:`LRTDDFT (molecules) ` - :ref:`LRTDDFT (extended systems) ` * - :ref:`RPA-correlation ` - :ref:`GW ` - :ref:`BSE ` * - :ref:`Parallelization ` - :ref:`Continuum Solvent Model ` - :ref:`point groups` This Page gives a quick overview of the algorithms used. We have written some :ref:`papers ` about the implementation, where *all* the details can be found. **Introduction** Using the projector-augmented wave (PAW) method [Blo94]_, [Blo03]_ allows us to get rid of the core electrons and work with soft pseudo valence wave functions. The pseudo wave functions don't need to be normalized - this is important for the efficiency of calculations involving 2. row elements (such as oxygen) and transition metals. A further advantage of the PAW method is that it is an all-electron method (frozen core approximation) and there is a one to one transformation between the pseudo and all-electron quantities. **Description of the wave functions** Pseudo wave functions can be described in three ways: Finite-difference (FD): Uniform real-space orthorhombic grids. Two kinds of grids are involved in the calculations: A coarse grid used for the wave functions and a fine grid (`2^3=8` times higher grid point density) used for densities and potentials. The pseudo electron density is first calculated on the coarse grid from the wave functions, and then interpolated to the fine grid, where compensation charges are added for achieving normalization. The effective potential is evaluated on the fine grid (solve the Poisson equation and calculate the exchange-correlation potential) and then restricted to the coarse grid where it needs to act on the wave functions (also on the coarse grid). Plane-waves (PW): Expansion in plane-waves. There is one cutoff used for the wave-functions and a higher cutoff for electron densities and potentials. Linear combination of atomic orbitals (LCAO): Expansion in atom-centered basis functions. **Grid-based techniques for FD-mode** Solving the Kohn-Sham equation is done via iterative multi-grid eigensolvers starting from a good guess for the wave functions obtained by diagonalizing a Hamiltonian for a subspace of atomic orbitals. We use the multi-grid preconditioner described by Briggs *et al.* [Bri96]_ for the residuals, and standard Pulay mixing is used to update the density. **Compensation charges** Compensation charges are expanded to give correct multipole moments up to angular momentum number `\ell=2`. **Boundary conditions** In each of the three directions, the boundary conditions can be either periodic or open. **Mask function technique** Due to the discreticed nature of space in finite difference methods, the energy of an atom will depend on its position relative to the grid points. The problem comes from the calculation of the integral of a wave function times an atom centered localized function (radial functions times a spherical harmonic). To reduce this dependence, we use the technique of [Taf06]_, where the radial functions (projector functions) are smoothened as follows: * Divide function by a mask function that goes smoothly to zero at approximately twice the cutoff radius. * Fourier transform. * Cut off short wavelength components. * Inverse Fourier transform. * Multiply by mask function. **Exchange-correlation functionals** All the functionals from the :ref:`libxc ` library can be used. Calculating the XC-energy and potential for the extended pseudo density is simple. For GGA functionals, a nearest neighbor finite difference stencil is used for the gradient operator. In the PAW method, there is a correction to the XC-energy inside the augmentation spheres. The integration is done on a non-linear radial grid - dense close to the nuclei and less dense away from the nuclei. **Parallelization** Parallelization is done by distributing **k**-points, spins, and bands over all processors and on top of that domain-decomposition is used. **ASE interface** The code has been designed to work together with the atomic simulation environment (`ASE `_). ASE provides: * Structure optimization. * Molecular dynamics. * Nudged elastic band calculations. * Maximally localized Wannier functions. * Scanning tunneling microscopy images. **Open Software** GPAW is released under the :xkcd:`GNU Public License <225>` version 3 or any later version. See the file :git:`LICENSE` which accompanies the downloaded files, or see the license at GNU's web server at http://www.gnu.org/licenses/. Everybody is invited to participate in using and :ref:`developing the code `. .. figure:: carlsberg.png :width: 12cm September 2003 - August 2005: Sponsored by The `Carlsberg Foundation`_ (artwork by P. Erhart) .. _Carlsberg Foundation: http://www.carlsbergfondet.dk .. [Blo94] P. E. Blöchl, Phys. Rev. B 50, 17953 (1994) .. [Blo03] P. E. Blöchl, C. J. Först and J. Schimpl, Bull. Mater. Sci, 26, 33 (2003) .. [Bri96] E. L. Briggs, D. J. Sullivan and J. Bernholc, Phys. Rev. B 54, 14362 (1996) .. [Taf06] *A general and efficient pseudopotential Fourier filtering scheme for real space methods using mask functions*, Maxim Tafipolsky, Rochus Schmid, J Chem Phys. 2006 May 7;124:174102 gpaw-24.1.0/doc/bugs.rst000066400000000000000000000021651454550013000147740ustar00rootroot00000000000000.. _bugs: Bugs! ===== If you find a bug in the GPAW software, please report it to the developers so it can be fixed. We need that feedback from the community to maintain the quality of the code. Bug report ---------- * If you are unsure if it is a real bug, or a usage problem, it is probably best to report the problem on the ``gpaw-users`` mailing list (see :ref:`contact`). Please provide the failing script as well as the information about your environment (processor architecture, versions of Python and NumPy). Then we (or other users) can help you to find out if it is a bug. Another advantage of reporting bugs on the mailing list: often other users will tell you how to work around the bug (until it is solved). * If you think it is a bug, you can also report it directly on our `issue tracker`_. The advantage of reporting bugs here is that it is not forgotten (which may be a risk on the mailing list). We do not guarantee to fix all bugs, but we will do our best. Known bugs ---------- * `A list of known bugs`_. .. _A list of known bugs: .. _issue tracker: https://gitlab.com/gpaw/gpaw/issues gpaw-24.1.0/doc/conf.py000066400000000000000000000072661454550013000146100ustar00rootroot00000000000000import datetime import sys import sphinx_rtd_theme from gpaw import __version__ from gpaw.doctools.aamath import autodoc_process_docstring try: import sphinxcontrib.spelling except ImportError: sphinxcontrib = None assert sys.version_info >= (3, 8) sys.path.append('.') extensions = ['images', 'ext', 'sphinx.ext.autodoc', 'sphinx.ext.doctest', 'sphinx.ext.extlinks', 'sphinx.ext.viewcode', 'sphinx.ext.napoleon', 'sphinx.ext.mathjax', 'sphinx.ext.intersphinx'] if sphinxcontrib: extensions.append('sphinxcontrib.spelling') extlinks = { 'doi': ('https://doi.org/%s', 'doi: %s'), 'arxiv': ('https://arxiv.org/abs/%s', 'arXiv: %s'), 'mr': ('https://gitlab.com/gpaw/gpaw/-/merge_requests/%s', 'MR: !%s'), 'issue': ('https://gitlab.com/gpaw/gpaw/-/issues/%s', 'issue: #%s'), 'xkcd': ('https://xkcd.com/%s', 'XKCD: %s')} spelling_word_list_filename = 'words.txt' spelling_show_suggestions = True templates_path = ['templates'] source_suffix = '.rst' master_doc = 'index' project = 'GPAW' copyright = f'{datetime.date.today().year}, GPAW developers' release = __version__ exclude_patterns = ['build'] default_role = 'math' pygments_style = 'sphinx' autoclass_content = 'both' modindex_common_prefix = ['gpaw.'] intersphinx_mapping = { 'python': ('https://docs.python.org/3.10', None), 'ase': ('https://wiki.fysik.dtu.dk/ase', None), 'numpy': ('https://numpy.org/doc/stable', None), 'cupy': ('https://docs.cupy.dev/en/stable', None), 'scipy': ('https://docs.scipy.org/doc/scipy', None), 'pytest': ('https://docs.pytest.org/en/stable', None), 'mayavi': ('http://docs.enthought.com/mayavi/mayavi', None)} nitpick_ignore = [('py:class', 'gpaw.calculator.GPAW'), ('py:class', 'gpaw.spinorbit.BZWaveFunctions'), ('py:class', 'GPAW'), ('py:class', 'Atoms'), ('py:class', 'np.ndarray'), ('py:class', 'ase.spectrum.dosdata.GridDOSData'), ('py:class', 'ase.atoms.Atoms'), ('py:class', 'gpaw.point_groups.group.PointGroup'), ('py:class', 'UGArray'), ('py:class', 'gpaw.core.arrays.DomainType'), ('py:class', 'DomainType'), ('py:class', 'Path'), ('py:class', 'Vector'), ('py:class', 'ArrayLike1D'), ('py:class', 'ArrayLike2D'), ('py:class', 'Array1D'), ('py:class', 'Array2D'), ('py:class', 'Array3D'), ('py:class', 'MPIComm'), ('py:class', 'NLOData'), ('py:class', 'numpy._typing._dtype_like._SupportsDType'), ('py:class', 'numpy._typing._dtype_like._DTypeDict'), ('py:class', 'DTypeLike'), ('py:class', 'ModuleType'), ('py:class', 'IO'), ('py:class', 'gpaw.gpu.cpupy.ndarray')] html_theme = 'sphinx_rtd_theme' html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] html_style = 'gpaw.css' html_title = 'GPAW' html_favicon = 'static/gpaw_favicon.ico' html_static_path = ['static'] html_last_updated_fmt = '%a, %d %b %Y %H:%M:%S' mathjax3_config = { 'tex': { 'macros': { 'br': '{\\mathbf r}', 'bk': '{\\mathbf k}', 'bG': '{\\mathbf G}'}}} autodoc_typehints = 'description' autodoc_typehints_description_target = 'documented' def setup(app): app.connect('autodoc-process-docstring', lambda app, what, name, obj, options, lines: autodoc_process_docstring(lines)) gpaw-24.1.0/doc/contact.rst000066400000000000000000000026121454550013000154640ustar00rootroot00000000000000.. _contact: ======= Contact ======= .. _mail list: Mail List ========= There is a mailing list for getting help and for discussing GPAW: * gpaw-users_ You should consider also subscribing to the ASE :ref:`ase:mail list`. Note that you can search the mailing list archives: click on the mailing list above, then choose the archive, and search from that page. .. _chat: Chat ==== Please join the ``#gpaw`` channel on Matrix_ (also accessible via the Element_ webclient) if you have any questions. There is also a ``#ase`` channel for ASE questions. .. _Matrix: https://matrix.org .. _Element: https://app.element.io/#/room/#gpaw:matrix.org GitLab ====== Feel free to create Merge Requests and Issues on our GitLab page: https://gitlab.com/gpaw/gpaw Old mail lists ============== These lists are inactive and serve as an archives only: * gpaw-developers_ (deactivated Jun 20 2016) * gridpaw-developer_ (deactivated Oct 20 2009) .. _webclient: http://webchat.freenode.net/?randomnick=0&channels=gpaw .. _gpaw-developers: https://listserv.fysik.dtu.dk/mailman/listinfo/ gpaw-developers .. _gpaw-svncheckins: https://listserv.fysik.dtu.dk/mailman/listinfo/ gpaw-svncheckins .. _gpaw-users: https://listserv.fysik.dtu.dk/mailman/listinfo/gpaw-users .. _gridpaw-developer: http://listserv.fysik.dtu.dk/mailman/listinfo/ gridpaw-developer gpaw-24.1.0/doc/devel/000077500000000000000000000000001454550013000143755ustar00rootroot00000000000000gpaw-24.1.0/doc/devel/256H2O/000077500000000000000000000000001454550013000152225ustar00rootroot00000000000000gpaw-24.1.0/doc/devel/256H2O/akka.sh000077500000000000000000000007771454550013000165030ustar00rootroot00000000000000#!/bin/bash ### SNAC project number, enter if applicable. ### NOTE! No spaces or slashes allowed #PBS -A HPC2N-2008-005 ### Requesting 4 nodes with 8 VP:s on each node #PBS -l nodes=4:ppn=8 ### Requesting time - 20 minutes #PBS -l walltime=00:20:00 # Change to Working Directory cd $PBS_O_WORKDIR module add openmpi/1.2.6/gcc gpawhome=${HOME}/gpaw export PYTHONPATH=${gpawhome}:${PYTHONPATH} mpiexec ${gpawhome}/build/bin.linux-x86_64-2.4/gpaw-python ../b256H2O.py --sl_diagonalize=4,4,64 --gpaw=usenewlfc=1 gpaw-24.1.0/doc/devel/256H2O/b256H2O.py000066400000000000000000000066611454550013000165740ustar00rootroot00000000000000from ase import Atoms from gpaw import GPAW, ConvergenceError, RMMDIIS, setup_paths # Use setups from the $PWD and $PWD/.. first setup_paths.insert(0, '.') setup_paths.insert(0, '../') positions = [ (-0.069, 0.824, -1.295), (0.786, 0.943, -0.752), (-0.414, -0.001, -0.865), (-0.282, -0.674, -3.822), (0.018, -0.147, -4.624), (-0.113, -0.080, -3.034), (2.253, 1.261, 0.151), (2.606, 0.638, -0.539), (2.455, 0.790, 1.019), (3.106, -0.276, -1.795), (2.914, 0.459, -2.386), (2.447, -1.053, -1.919), (6.257, -0.625, -0.626), (7.107, -1.002, -0.317), (5.526, -1.129, -0.131), (5.451, -1.261, -2.937), (4.585, -0.957, -2.503), (6.079, -0.919, -2.200), (-0.515, 3.689, 0.482), (-0.218, 3.020, -0.189), (0.046, 3.568, 1.382), (-0.205, 2.640, -3.337), (-1.083, 2.576, -3.771), (-0.213, 1.885, -2.680), (0.132, 6.301, -0.278), (1.104, 6.366, -0.068), (-0.148, 5.363, -0.112), (-0.505, 6.680, -3.285), (-0.674, 7.677, -3.447), (-0.965, 6.278, -2.517), (4.063, 3.342, -0.474), (4.950, 2.912, -0.663), (3.484, 2.619, -0.125), (2.575, 2.404, -3.170), (1.694, 2.841, -3.296), (3.049, 2.956, -2.503), (6.666, 2.030, -0.815), (7.476, 2.277, -0.316), (6.473, 1.064, -0.651), (6.860, 2.591, -3.584), (6.928, 3.530, -3.176), (6.978, 2.097, -2.754), (2.931, 6.022, -0.243), (3.732, 6.562, -0.004), (3.226, 5.115, -0.404), (2.291, 7.140, -2.455), (1.317, 6.937, -2.532), (2.586, 6.574, -1.669), (6.843, 5.460, 1.065), (7.803, 5.290, 0.852), (6.727, 5.424, 2.062), (6.896, 4.784, -2.130), (6.191, 5.238, -2.702), (6.463, 4.665, -1.259), (0.398, 0.691, 4.098), (0.047, 1.567, 3.807), (1.268, 0.490, 3.632), (2.687, 0.272, 2.641), (3.078, 1.126, 3.027), (3.376, -0.501, 2.793), (6.002, -0.525, 4.002), (6.152, 0.405, 3.660), (5.987, -0.447, 4.980), (0.649, 3.541, 2.897), (0.245, 4.301, 3.459), (1.638, 3.457, 3.084), (-0.075, 5.662, 4.233), (-0.182, 6.512, 3.776), (-0.241, 5.961, 5.212), (3.243, 2.585, 3.878), (3.110, 2.343, 4.817), (4.262, 2.718, 3.780), (5.942, 2.582, 3.712), (6.250, 3.500, 3.566), (6.379, 2.564, 4.636), (2.686, 5.638, 5.164), (1.781, 5.472, 4.698), (2.454, 6.286, 5.887), (6.744, 5.276, 3.826), (6.238, 5.608, 4.632), (7.707, 5.258, 4.110), (8.573, 8.472, 0.407), (9.069, 7.656, 0.067), (8.472, 8.425, 1.397), (8.758, 8.245, 2.989), (9.294, 9.091, 3.172), (7.906, 8.527, 3.373), (4.006, 7.734, 3.021), (4.685, 8.238, 3.547), (3.468, 7.158, 3.624), (5.281, 6.089, 6.035), (5.131, 7.033, 6.378), (4.428, 5.704, 5.720), (5.067, 7.323, 0.662), (5.785, 6.667, 0.703), (4.718, 7.252, 1.585)] prefix = 'b256H2O' L = 9.8553729 atoms = Atoms('32(OH2)', positions=positions) atoms.set_cell((L, L, L), scale_atoms=False) atoms.set_pbc(1) r = [2, 2, 2] atoms = atoms.repeat(r) n = [56 * ri for ri in r] # nbands (>=128) is the number of bands per 32 water molecules nbands = 2 * 6 * 11 # 132 for ri in r: nbands = nbands * ri # the next line decreases memory usage es = RMMDIIS(keep_htpsit=False) lcao = False # toggle this to use lcao/sz if lcao: mode_kwargs = dict(mode='lcao', basis='sz') else: mode_kwargs = dict(mode='fd') calc = GPAW(**mode_kwargs, nbands=nbands, gpts=tuple(n), maxiter=5, occupations={'name': 'fermi-dirac', 'width': 0.01}, eigensolver=es, txt=prefix + '.txt') atoms.calc = calc try: pot = atoms.get_potential_energy() except ConvergenceError: pass gpaw-24.1.0/doc/devel/256H2O/prepare.sh000077500000000000000000000014471454550013000172250ustar00rootroot00000000000000#!/bin/bash FORMAT=1 setFORMAT () { # function setFORMAT takes integer as the argument $1 # and returns integer in the format %05d or %d (printf' like format) # depending on the FORMAT variable (1 or 0) if [ ${FORMAT} -eq "1" ]; then integer_formatted=`echo $1 | awk '{if ($1<10) printf("0000%.0f", $1); else if ($1<100) printf("000%.0f", $1); else if ($1<1000) printf("00%.0f", $1); else if ($1<10000) printf("0%.0f", $1); else printf("%.0f", $1)}'` else integer_formatted=$1 fi echo $integer_formatted } if test -z $PATTERN; then echo "Error: no directory pattern provided" exit fi for p in 16 32 64 128 do proc=`setFORMAT $p` dir="${PATTERN}_${proc}_" if [ ! -d "${dir}" ]; then mkdir ${dir} echo "${dir} created" fi done gpaw-24.1.0/doc/devel/256H2O/scaling.py000066400000000000000000000205211454550013000172140ustar00rootroot00000000000000#!/usr/bin/env python # flake8: noqa from optparse import OptionParser parser = OptionParser(usage='%prog [options] output_prefix.\nExample of call:\n'+ 'python %prog --dir=. --pattern="b256H2O_120_04x04m64.grid_*_" b256H2O\n', version='%prog 0.1') parser.add_option('--dir', dest="dir", default='.', help='results directory.') parser.add_option('--iter', dest="iter", default=5, help='number of SCF steps.') parser.add_option('--pattern', dest="pattern", default='', help='pattern for directories to search.') parser.add_option('-v', '--verbose', action='store_true', default=False, help='verbose mode.') opt, args = parser.parse_args() import os def T(s): """Return time in seconds from hh:mm:ss""" t = 0 for x in s.split(':'): t = t * 60 + int(x) return t def analyse_benchmark(dir, pattern, output_prefix, iter, verbose=False): from os.path import abspath, exists, join output = output_prefix+'.txt' root_abspath = abspath(dir) # length of directory name rootlen = len(root_abspath) + 1 assert exists(root_abspath) from glob import glob f_list = sorted(glob(join(root_abspath, pattern, output))) assert not f_list == [], 'Error: list of output files is empty' import re time = {} processes = [] for f in f_list: # extract the number of processes p d = f.split('/')[-2] p = d.split('_')[-2] p = int(p) processes.append(p) # time[p] = { 'start': 0.0, 'fixdensity_start_estimate': 0.0, 'fixdensity_end': 0.0, 'SCF_start': 0.0, 'SCF_end': 0.0, 'forces_start': 0.0, 'forces_end': 0.0, 'total': 0.0, } # lines = open(f).readlines() # extract gpaw version for n, l in enumerate(lines): if l.startswith(' |__ | _|___|_____|'): gpaw_version = lines[n + 0].strip().split()[3].split('.')[-1] break if gpaw_version[-1] == 'M': gpaw_version = gpaw_version[:-1] if gpaw_version.rfind(':') != -1: gpaw_version = gpaw_version[:gpaw_version.rfind(':')] gpaw_version = int(gpaw_version) # assume old version (< 3172) start_iter = 0 # old style # # Atomic orbitals used for initialization: 1536 # log10-error: Total Iterations: # Time WFS Density Energy Fermi Poisson #iter: 0 12:10:09 -3568.07860 0 11 # log10-error: Total Iterations: # Time WFS Density Energy Fermi Poisson #iter: 0 12:13:29 +0.2 -3663.00195 0 1 #iter: 1 12:15:18 -0.8 -4417.57264 0 1 #iter: 2 12:17:07 -1.3 -4469.68829 0 1 #iter: 3 12:18:57 -0.9 -0.8 -4091.42827 0 7 #iter: 4 12:20:48 -1.1 -1.0 -4055.26110 0 7 #iter: 5 12:22:40 -1.4 -1.3 -4106.38102 0 7 # # new style # log10-error: Total Iterations: # Time WFS Density Energy Fermi Poisson #iter: 1 16:16:59 +0.4 -3663.00195 0 1 #iter: 2 16:19:11 -0.8 -4417.57264 0 1 #iter: 3 16:21:21 -1.3 -4469.68829 0 1 #iter: 4 16:23:34 -0.9 -0.8 -4091.42827 0 7 #iter: 5 16:25:49 -1.1 -1.0 -4055.26110 1 7 # if len(str(gpaw_version)) == 1: # stable release found # assume new version - this is wrong but nothing else can be done start_iter = 1 if len(str(gpaw_version)) > 4: # more then 4 digits in svnversion found # assume new version (can't compare strings here) start_iter = 1 else: if gpaw_version >= 3172: # new version start_iter = 1 # extract start time for n, l in enumerate(lines): if l.startswith('Date: '): #print l, n, f t = T(lines[n + 0].split()[4]) break time[p]['start'] = t # extract SCF beginning time estimate and end time (constant potential steps (fixdensity)) for n, l in enumerate(lines): if l.startswith('iter: 1'): #print l, n, f if start_iter == 0: fixdensity_start = n-1 else: fixdensity_start = n+0 t1 = T(lines[fixdensity_start + 0].split()[2]) t2 = T(lines[fixdensity_start + 1].split()[2]) t3 = T(lines[fixdensity_start + 2].split()[2]) break # estimate the beginning of fixdensity based on 3 fixdensity steps time[p]['fixdensity_start_estimate'] = t1-(t3-t1)/2.0 time[p]['fixdensity_end'] = t3 # extract SCF beginning and end time time[p]['SCF_start'] = time[p]['fixdensity_end'] for n, l in enumerate(lines): if l.startswith('iter: '+"%3s" % iter): #print l, n, f t = T(lines[n + 0].split()[2]) break time[p]['SCF_end'] = t for n, l in enumerate(lines): if l.startswith('Total:') and (l.find('MB') == -1) and (l.find('GB') == -1): #print l, n, f t = l.split()[1] break time[p]['total'] = t # # results # speedup = {} efficiency = {} # if verbose: print("# p - processes, p0 - reference processes, t - time [sec], s - speedup, e - efficiency") print("# GPAW version "+str(gpaw_version)+": stages: 1 - initialization, 2 - fixdensity, 3 - SCF, 4 - forces, 5 - total") print("# p "+" p/p0 "+" t1 "+" s1 "+" e1 "+" t2 "+" s2 "+" e2 "+" t3 "+" s3 "+" e3 "+" t4 "+" s4 "+" e4 "+" t5 "+" s5 "+" e5") for p in processes: time[p]['init'] = time[p]['fixdensity_start_estimate'] - time[p]['start'] time[p]['fixdensity'] = time[p]['fixdensity_end'] - time[p]['fixdensity_start_estimate'] time[p]['SCF'] = time[p]['SCF_end'] - time[p]['SCF_start'] time[p]['forces'] = time[p]['forces_end'] - time[p]['forces_start'] tot = max(time[p]['fixdensity_end'], time[p]['SCF_end'], time[p]['forces_end'])-time[p]['start'] sum_of_entries = time[p]['init'] + time[p]['fixdensity']+ time[p]['SCF'] + time[p]['forces'] #print time[p]['init'], time[p]['fixdensity'], time[p]['SCF'], time[p]['forces'] if verbose: if abs(float(tot)-float(time[p]['total'])) > 5.0: print('Warning: Sum of time entries: '+str(tot)+' does not match total time in the output: '+str(time[p]['total'])) time[p]['total'] = sum_of_entries # calculate speedup[p] = {} efficiency[p] = {} for stage in ['init', 'fixdensity', 'SCF', 'forces', 'total']: if time[p][stage] > 0.0: speedup[p][stage] = float(time[processes[0]][stage])/float(time[p][stage])*processes[0] else: speedup[p][stage] = 0.0 efficiency[p][stage] = speedup[p][stage]/p # print results print(' %5d %6.2f %7.1f %7.1f %5.2f %7.1f %7.1f %5.2f %7.1f %7.1f %5.2f %7.1f %7.1f %5.2f %7.1f %7.1f %5.2f' %( p, float(p)/processes[0], float(time[p]['init']), speedup[p]['init'], efficiency[p]['init'], float(time[p]['fixdensity']), speedup[p]['fixdensity'], efficiency[p]['fixdensity'], float(time[p]['SCF']), speedup[p]['SCF'], efficiency[p]['SCF'], float(time[p]['forces']), speedup[p]['forces'], efficiency[p]['forces'], float(time[p]['total']), speedup[p]['total'], efficiency[p]['total'], )) if __name__ == '__main__': from os import environ assert len(args) == 1, 'Error: Only one argument allowed: output prefix' output_prefix = args[0] analyse_benchmark(opt.dir, opt.pattern, output_prefix, opt.iter, opt.verbose) gpaw-24.1.0/doc/devel/256H2O/surveyor.sh000077500000000000000000000014571454550013000174660ustar00rootroot00000000000000type=b256H2O cwd=`pwd` acct=Gpaw queue=default time=60 nodes=64 mode=smp mapping=ZYXT # mapfile=BGMAP_128_4x4x4x8 # mapping=$mapfile job=${type}_${nodes}_${mode}_${mapping} input=${type}.py # pos=Au102_revised.xyz scratch=/pvfs-surveyor/${USER} install=/soft/apps rm -rf $scratch/$job mkdir $scratch/$job cp $input $scratch/$job # cp $mapfile $scratch/$job # cp $pos $scratch/$job cd $scratch/$job qsub -A $acct -n $nodes -t $time -q $queue --mode $mode --env BG_MAPPING=$mapping:MPIRUN_ENABLE_TTY_REPORTING=0:OMP_NUM_THREADS=1:GPAW_SETUP_PATH=$GPAW_SETUP_PATH:PYTHONPATH=${install}/gpaw-r6000:${install}/ase-r1428:$PYTHONPATH:LD_LIBRARY_PATH=$CN_LD_LIBRARY_PATH ${install}/gpaw-r6000/build/bin.linux-ppc64-2.6/gpaw-python ${type}.py --domain-decomposition=4,4,4 --state-parallelization=1 --sl_diagonalize=4,4,64 gpaw-24.1.0/doc/devel/Au_cluster/000077500000000000000000000000001454550013000165035ustar00rootroot00000000000000gpaw-24.1.0/doc/devel/Au_cluster/Au102_revised.xyz000066400000000000000000001135331454550013000215760ustar00rootroot00000000000000 762 co O 4.643068 55.35863 1.914528 C 5.530358 54.68889 1.117290 O 6.008219 55.21918 8.3859563E-02 C 5.952099 53.31332 1.484933 C 5.489089 52.76123 2.641151 C 5.882139 51.47557 3.007024 C 6.783984 52.64256 0.6299170 C 7.210886 51.35863 0.9500570 C 6.720107 50.79259 2.134859 S 7.281971 49.15729 2.568976 O 23.82684 45.53658 14.03025 C 23.14090 46.14394 14.88940 O 23.70600 47.10936 15.67683 C 21.68042 45.89135 14.98117 C 21.12938 44.84304 14.29801 C 19.76040 44.58648 14.39198 C 20.93243 46.67555 15.81903 C 19.54736 46.51208 15.87013 C 18.99044 45.43933 15.18126 S 17.20737 45.24619 15.20520 O 14.00951 48.13890 21.24887 C 14.41894 48.22198 20.06439 O 14.95781 49.38449 19.58521 C 14.21943 47.08743 19.12726 C 13.73244 45.90531 19.58925 C 13.53139 44.83315 18.72923 C 14.58544 47.25032 17.80919 C 14.40450 46.20317 16.90701 C 13.83438 45.02512 17.37114 S 13.53933 43.67895 16.23707 O 6.634805 43.12247 20.88744 C 6.364436 44.25287 20.16598 O 5.194173 44.70269 20.09063 C 7.455374 44.92797 19.41810 C 8.740571 44.49224 19.58603 C 9.786382 45.12344 18.91717 C 7.148305 45.95941 18.57416 C 8.167450 46.61039 17.87421 C 9.466511 46.13103 18.00820 S 10.79936 46.98213 17.16426 O 10.04447 54.06574 20.86317 C 10.15460 52.81568 20.81059 O 9.592354 51.99842 21.75259 C 10.91649 52.19519 19.69715 C 11.34933 52.98172 18.67313 C 12.02130 52.40578 17.59624 C 11.12052 50.84320 19.72609 C 11.83240 50.23237 18.70208 C 12.31142 51.04565 17.67127 S 13.26335 50.29810 16.37928 O 14.16082 59.70705 14.48564 C 13.78335 58.86912 13.47232 O 13.10441 59.30626 12.51030 C 14.20429 57.44548 13.50803 C 14.94624 57.01441 14.57421 C 15.33352 55.68627 14.66496 C 13.85516 56.61068 12.48615 C 14.23940 55.26741 12.52295 C 14.94042 54.82528 13.63594 S 15.56634 53.16148 13.67095 O -6.5146446E-02 51.59646 8.508490 C 0.2945593 51.48835 7.193063 O -0.1726409 52.26707 6.325387 C 1.273334 50.44762 6.787957 C 1.736292 49.56859 7.728009 C 2.668320 48.59532 7.373927 C 1.679030 50.40224 5.483103 C 2.578065 49.42257 5.072924 C 3.083218 48.55518 6.037987 S 4.253213 47.30558 5.539199 O 13.36074 37.85122 18.10799 C 13.70326 37.44828 16.96875 O 13.33343 36.20832 16.52474 C 14.41043 38.37921 16.05306 C 14.83851 39.58693 16.51898 C 15.50635 40.47409 15.67826 C 14.68518 37.95802 14.77715 C 15.35148 38.80738 13.90391 C 15.72939 40.06745 14.36304 S 16.38135 41.24491 13.19003 O 6.287794 37.49395 14.85789 C 6.221934 38.31190 15.95244 O 5.575912 37.97471 16.97543 C 6.901430 39.63230 15.94516 C 7.596233 40.01393 14.83218 C 8.270264 41.23211 14.81896 C 6.821406 40.41824 17.06029 C 7.470425 41.65039 17.09102 C 8.168453 42.04016 15.94409 S 8.884541 43.67139 15.89800 O 2.993605 50.10533 18.38090 C 3.362277 50.93982 17.51762 O 2.463053 51.79006 16.93460 C 4.789406 50.96944 17.10817 C 5.683452 50.19223 17.79096 C 7.030296 50.20619 17.44403 C 5.169454 51.78331 16.07700 C 6.504401 51.83043 15.69505 C 7.398354 50.98341 16.34212 S 9.112476 51.02820 15.86298 O -0.7125636 41.38853 4.938404 C 0.5016739 40.81726 4.672509 O 0.5938559 39.80952 3.928522 C 1.726058 41.36243 5.311957 C 1.610145 42.40201 6.188053 C 2.747090 42.96806 6.755086 C 2.939030 40.84001 4.961090 C 4.110150 41.36243 5.514902 C 3.977959 42.38340 6.453168 S 5.411166 42.88195 7.382144 O 5.181141 57.59570 5.192425 C 6.043667 57.00060 5.884848 O 7.145545 57.67281 6.338097 C 5.853166 55.55480 6.165186 C 4.731126 54.94861 5.664968 C 4.500156 53.59954 5.897926 C 6.752260 54.87473 6.928018 C 6.570124 53.51169 7.180985 C 5.440186 52.89737 6.655042 S 5.193209 51.14455 6.880855 O 8.619753 36.96050 18.18099 C 8.896887 36.76692 16.97133 O 8.472108 35.63259 16.33549 C 9.757255 37.75208 16.26816 C 10.14839 38.87603 16.94024 C 10.95255 39.82021 16.32104 C 10.15226 37.50484 14.98403 C 10.92341 38.44786 14.30623 C 11.30838 39.59216 14.98796 S 12.41032 40.75740 14.21226 O 3.950984 42.53683 17.42722 C 3.593360 43.73258 17.56802 O 2.648002 44.07249 18.49655 C 4.262546 44.80813 16.79303 C 5.179490 44.48700 15.83618 C 5.789275 45.49751 15.08800 C 3.867588 46.10311 17.01098 C 4.442561 47.14037 16.29496 C 5.425463 46.81110 15.35097 S 6.394397 48.13923 14.67604 O 10.90868 57.20771 18.95900 C 9.856514 56.68837 18.25569 O 8.690457 56.78666 18.71196 C 10.09432 55.97482 16.97525 C 11.36536 55.92363 16.46324 C 11.61937 55.23135 15.28523 C 9.045316 55.38842 16.33426 C 9.249225 54.68624 15.14410 C 10.55137 54.58560 14.65924 S 10.81635 53.85376 13.04497 O 14.46961 58.72963 3.168493 C 15.48247 58.07812 3.817401 O 16.63726 58.05564 3.323952 C 15.20906 57.34775 5.081142 C 13.93824 57.34659 5.589935 C 13.66512 56.71481 6.802964 C 16.23412 56.70666 5.721064 C 16.00425 56.02834 6.920157 C 14.71023 56.04347 7.436096 S 14.34314 55.02773 8.854929 O 2.909500 57.09191 9.359017 C 3.161310 56.88253 10.57157 O 2.635657 57.69863 11.53547 C 4.021297 55.73863 10.96799 C 4.511193 54.91545 9.992210 C 5.335227 53.85260 10.33593 C 4.296136 55.54142 12.28928 C 5.143184 54.50532 12.68124 C 5.603067 53.64433 11.68723 S 6.580122 52.23243 12.15315 O 1.032707 44.42430 15.61067 C 0.5422949 44.43387 14.33363 O -0.6313003 44.05661 14.09304 C 1.410306 44.91750 13.23005 C 2.691346 45.28924 13.52196 C 3.543456 45.72148 12.50795 C 0.9062620 44.98847 11.96021 C 1.733203 45.38057 10.90404 C 3.021432 45.79943 11.21596 S 4.057307 46.44110 9.916105 O 22.75822 39.18289 9.874498 C 22.89954 40.29198 9.086229 O 24.01403 40.86176 8.982292 C 21.74029 40.84001 8.337203 C 20.49323 40.31702 8.539077 C 19.39811 40.83303 7.854136 C 21.94326 41.89996 7.495051 C 20.89111 42.40899 6.742937 C 19.61829 41.87902 6.960889 S 18.25931 42.50324 5.994040 O 9.011614 33.61625 9.421977 C 8.129450 34.37983 9.887013 O 6.836926 33.95716 10.03587 C 8.492226 35.77122 10.25804 C 9.813372 36.11678 10.29806 C 10.17720 37.41583 10.63320 C 7.495189 36.66014 10.54602 C 7.813386 37.96326 10.91297 C 9.160071 38.31929 10.92726 S 9.604399 39.98542 11.36709 O 0.9573877 54.59730 14.58288 C 0.9672970 54.34949 13.23755 O 0.2956332 55.06332 12.45215 C 1.747404 53.20453 12.70303 C 2.412290 52.38950 13.56913 C 3.181167 51.33537 13.08713 C 1.806357 53.02477 11.34994 C 2.516148 51.94969 10.82114 C 3.167299 51.10266 11.71224 S 4.059295 49.70123 11.06911 O 20.01571 38.10331 11.14981 C 19.02769 37.34936 11.33123 O 19.17522 36.16042 11.99159 C 17.66317 37.80910 10.96799 C 17.47810 39.09127 10.53423 C 16.20024 39.52700 10.18194 C 16.63242 36.91204 11.02194 C 15.33730 37.30879 10.70716 C 15.14433 38.62296 10.28627 S 13.47739 39.19424 10.01793 Au 13.36135 47.11128 13.18503 Au 11.59717 47.14328 7.552933 Au 11.04726 49.57150 6.145892 Au 8.888288 47.78029 6.795103 Au 9.831093 45.10308 6.503190 Au 12.56709 45.23106 5.678902 Au 10.75526 48.00834 12.45793 Au 12.03021 49.58139 8.932105 Au 9.839216 47.88617 9.609901 Au 10.68738 45.16416 9.337996 Au 8.927027 49.25561 4.342963 Au 16.05950 46.27938 13.24899 Au 10.36721 44.46024 3.893125 Au 11.12430 46.39515 14.76822 Au 11.87624 49.30913 3.400053 Au 9.318151 50.37664 8.226083 Au 7.964193 45.91229 8.665918 Au 12.47246 43.04543 10.38988 Au 8.855413 45.98035 11.49394 Au 10.26713 50.40456 11.04124 Au 14.17730 51.86127 14.98010 Au 5.831200 48.34226 4.208976 Au 7.378880 43.28511 3.695181 Au 11.21425 43.60914 15.96803 Au 9.943442 48.94263 16.35427 Au 12.48026 52.04626 10.25304 Au 7.200296 48.65815 8.971051 Au 8.879115 43.23915 8.456898 Au 9.713131 43.26242 11.09912 Au 8.079171 48.71342 11.62221 Au 9.379185 51.75248 5.867913 Au 7.168023 49.91240 6.456027 Au 6.151152 47.02809 6.960175 Au 7.154222 44.30841 6.434946 Au 9.482951 42.41190 5.822179 Au 11.57444 42.65391 13.09213 Au 9.285130 44.40323 13.66917 Au 8.568218 47.30442 13.99610 Au 9.401384 50.22131 13.62594 Au 12.16019 51.73677 4.843896 Au 14.29926 52.74146 8.191068 Au 7.562135 51.11256 10.23303 Au 6.284110 46.77154 10.81614 Au 7.049255 44.05127 10.47921 Au 10.58616 41.18441 9.325134 Au 12.03021 54.76245 8.932105 Au 4.966249 50.45867 9.058946 Au 7.715278 41.25422 11.46893 Au 15.29596 52.95030 5.429151 Au 4.866278 44.71214 8.685212 Au 11.15109 40.06105 5.812889 Au 5.425331 48.82046 12.74019 O 19.34446 55.30380 16.01567 C 18.52981 54.68889 16.74763 O 18.00934 55.26647 17.87322 C 18.10807 53.31332 16.37999 C 18.57108 52.76123 15.22377 C 18.17803 51.47557 14.85790 C 17.27618 52.64256 17.23501 C 16.84928 51.35863 16.91487 C 17.34006 50.79259 15.73007 S 16.77820 49.15729 15.29595 O 0.1721660 45.48243 3.911289 C 0.9192697 46.14394 2.975526 O 0.4004371 47.03033 2.252563 C 2.379749 45.89135 2.883756 C 2.930790 44.84304 3.566911 C 4.299766 44.58648 3.472941 C 3.127734 46.67555 2.045891 C 4.512810 46.51208 1.994798 C 5.069727 45.43933 2.683669 S 6.852795 45.24619 2.659730 O 10.05066 48.13890 -3.383943 C 9.641226 48.22198 -2.199461 O 9.102354 49.38449 -1.720275 C 9.840737 47.08743 -1.262336 C 10.32773 45.90531 -1.724323 C 10.52878 44.83315 -0.8643050 C 9.474732 47.25032 5.5739000E-02 C 9.655665 46.20317 0.9579170 C 10.22579 45.02512 0.4937870 S 10.52084 43.67895 1.627852 O 17.42536 43.12247 -3.022517 C 17.69574 44.25287 -2.301048 O 18.86600 44.70270 -2.225700 C 16.60480 44.92797 -1.553177 C 15.31960 44.49224 -1.721107 C 14.27379 45.12344 -1.052244 C 16.91186 45.95941 -0.7092380 C 15.89272 46.61039 -9.2900004E-03 C 14.59366 46.13103 -0.1432770 S 13.26081 46.98213 0.7006620 O 14.02552 54.17721 -3.002937 C 13.90557 52.81568 -2.945663 O 14.42178 52.06533 -3.810549 C 13.14368 52.19519 -1.832227 C 12.71084 52.98172 -0.8082090 C 12.03887 52.40578 0.2686880 C 12.93965 50.84320 -1.861168 C 12.22777 50.23237 -0.8371500 C 11.74875 51.04565 0.1936560 S 10.79682 50.29810 1.485647 O 9.930249 59.63845 3.462245 C 10.27682 58.86912 4.392601 O 11.01631 59.34524 5.440407 C 9.855880 57.44548 4.356898 C 9.113933 57.01441 3.290719 C 8.726646 55.68627 3.199965 C 10.20501 56.61068 5.378772 C 9.820773 55.26741 5.341970 C 9.119745 54.82528 4.228985 S 8.493828 53.16148 4.193970 O 24.12531 51.59646 9.356437 C 23.76561 51.48835 10.67186 O 24.23281 52.26707 11.53954 C 22.78683 50.44762 11.07697 C 22.32388 49.56859 10.13692 C 21.39185 48.59532 10.49100 C 22.38114 50.40224 12.38182 C 21.48210 49.42257 12.79200 C 20.97695 48.55518 11.82694 S 19.80696 47.30558 12.32573 O 10.69943 37.85122 -0.2430668 C 10.35692 37.44828 0.8961694 O 10.72673 36.20831 1.340191 C 9.649743 38.37921 1.811861 C 9.221658 39.58693 1.345943 C 8.553819 40.47409 2.186667 C 9.374989 37.95802 3.087774 C 8.708688 38.80738 3.961011 C 8.330782 40.06745 3.501883 S 7.678825 41.24491 4.674894 O 17.77238 37.49394 3.007031 C 17.83823 38.31190 1.912485 O 18.48426 37.97472 0.8895011 C 17.15874 39.63230 1.919765 C 16.46394 40.01393 3.032750 C 15.78990 41.23211 3.045970 C 17.23876 40.41824 0.8046360 C 16.58974 41.65039 0.7739090 C 15.89172 42.04016 1.920837 S 15.17563 43.67139 1.966928 O 21.09944 50.03093 -0.5929538 C 20.69789 50.93982 0.3473062 O 21.52350 51.72045 0.8825867 C 19.27076 50.96944 0.7567580 C 18.37672 50.19223 7.3960997E-02 C 17.02987 50.20619 0.4208980 C 18.89071 51.78331 1.787922 C 17.55577 51.83043 2.169874 C 16.66182 50.98341 1.522806 S 14.94769 51.02820 2.001944 O 24.67332 41.34176 12.94829 C 23.55849 40.81726 13.19242 O 23.45810 39.71967 14.00275 C 22.33411 41.36243 12.55297 C 22.45002 42.40201 11.67687 C 21.31308 42.96806 11.10984 C 21.12114 40.84001 12.90384 C 19.95002 41.36243 12.35002 C 20.08221 42.38340 11.41176 S 18.64900 42.88195 10.48278 O 18.87903 57.59570 12.67250 C 18.01650 57.00060 11.98008 O 16.91463 57.67281 11.52683 C 18.20700 55.55480 11.69974 C 19.32904 54.94861 12.19996 C 19.56001 53.59954 11.96700 C 17.30791 54.87473 10.93691 C 17.49005 53.51169 10.68394 C 18.61998 52.89737 11.20988 S 18.86696 51.14455 10.98407 O 15.46512 36.97776 -0.4239281 C 15.16328 36.76692 0.8935930 O 15.55328 35.72546 1.477371 C 14.30291 37.75208 1.596767 C 13.91178 38.87603 0.9246890 C 13.10762 39.82021 1.543887 C 13.90791 37.50484 2.880898 C 13.13676 38.44786 3.558693 C 12.75179 39.59216 2.876968 S 11.64985 40.75740 3.652663 O 20.07730 42.43021 0.4502607 C 20.46681 43.73258 0.2969047 O 21.33477 44.04466 -0.5556093 C 19.79762 44.80813 1.071896 C 18.88068 44.48700 2.028741 C 18.27090 45.49751 2.776924 C 20.19258 46.10311 0.8539430 C 19.61761 47.14037 1.569970 C 18.63471 46.81110 2.513952 S 17.66577 48.13923 3.188889 O 13.23763 57.16519 -1.036496 C 14.20365 56.68837 -0.3907657 O 15.47369 56.79543 -0.8877281 C 13.96585 55.97482 0.8896730 C 12.69481 55.92363 1.401682 C 12.44079 55.23135 2.579695 C 15.01485 55.38842 1.530667 C 14.81094 54.68624 2.720828 C 13.50880 54.58560 3.205682 S 13.24382 53.85376 4.819957 O 9.507632 58.67628 14.64330 C 8.577701 58.07812 14.04752 O 7.319938 58.05363 14.58498 C 8.851114 57.34775 12.78378 C 10.12193 57.34659 12.27499 C 10.39505 56.71481 11.06196 C 7.826046 56.70666 12.14386 C 8.055922 56.02834 10.94477 C 9.349940 56.04347 10.42883 S 9.717032 55.02773 9.009996 O 21.17312 57.11058 8.614030 C 20.89886 56.88253 7.293349 O 21.38148 57.63181 6.408372 C 20.03887 55.73863 6.896933 C 19.54898 54.91545 7.872715 C 18.72494 53.85260 7.528994 C 19.76403 55.54142 5.575643 C 18.91698 54.50532 5.183687 C 18.45710 53.64433 6.177691 S 17.48005 52.23243 5.711774 O 23.02747 44.42430 2.254256 C 23.51787 44.43387 3.531297 O 24.69147 44.05661 3.771881 C 22.64986 44.91750 4.634876 C 21.36882 45.28924 4.342963 C 20.51671 45.72148 5.356976 C 23.15391 44.98847 5.904715 C 22.32697 45.38057 6.960889 C 21.03874 45.79943 6.648968 S 20.00286 46.44110 7.948820 O 1.301953 39.18289 7.990426 C 1.160635 40.29198 8.778694 O 4.6143651E-02 40.86176 8.882634 C 2.319879 40.84001 9.527722 C 3.566937 40.31702 9.325848 C 4.662059 40.83303 10.01079 C 2.116911 41.89996 10.36987 C 3.169056 42.40899 11.12199 C 4.441880 41.87902 10.90404 S 5.800859 42.50324 11.87089 O 15.04855 33.61625 8.442947 C 15.93072 34.37983 7.977912 O 17.22324 33.95717 7.829062 C 15.56794 35.77122 7.606885 C 14.24680 36.11678 7.566868 C 13.88297 37.41583 7.231722 C 16.56498 36.66014 7.318903 C 16.24678 37.96326 6.951957 C 14.90010 38.31929 6.937665 S 14.45577 39.98542 6.497831 O 23.10197 54.57701 3.392183 C 23.09287 54.34950 4.627372 O 23.82443 55.12697 5.482812 C 22.31277 53.20453 5.161892 C 21.64788 52.38950 4.295800 C 20.87900 51.33537 4.777796 C 22.25381 53.02477 6.514981 C 21.54402 51.94969 7.043783 C 20.89287 51.10266 6.152680 S 20.00087 49.70123 6.795818 O 3.956360 38.17054 6.731294 C 5.032481 37.34936 6.533691 O 4.897030 36.25776 5.927394 C 6.397002 37.80910 6.896933 C 6.582071 39.09127 7.330693 C 7.859930 39.52700 7.682990 C 7.427752 36.91204 6.842981 C 8.722870 37.30879 7.157761 C 8.915844 38.62296 7.578659 S 10.58278 39.19424 7.846990 Au 10.69882 47.11128 4.679896 Au 12.46300 47.14328 10.31199 Au 13.01291 49.57150 11.71903 Au 15.17188 47.78029 11.06982 Au 14.22908 45.10308 11.36174 Au 11.49308 45.23106 12.18602 Au 13.30491 48.00834 5.406998 Au 14.22095 47.88617 8.255025 Au 13.37279 45.16416 8.526929 Au 15.13314 49.25561 13.52196 Au 8.000666 46.27938 4.615939 Au 13.69296 44.46024 13.97180 Au 12.93587 46.39515 3.096706 Au 12.18393 49.30913 14.46487 Au 14.74202 50.37664 9.638842 Au 16.09598 45.91229 9.199007 Au 11.58771 43.04543 7.475042 Au 15.20476 45.98035 6.370990 Au 13.79304 50.40456 6.823687 Au 9.882868 51.86127 2.884828 Au 18.22897 48.34226 13.65595 Au 16.68129 43.28511 14.16974 Au 12.84592 43.60914 1.896898 Au 14.11673 48.94263 1.510658 Au 11.57991 52.04626 7.611887 Au 16.85987 48.65815 8.893874 Au 15.18106 43.23915 9.408027 Au 14.34704 43.26242 6.765805 Au 15.98100 48.71342 6.242719 Au 14.68098 51.75248 11.99701 Au 16.89215 49.91240 11.40890 Au 17.90902 47.02809 10.90475 Au 16.90595 44.30841 11.42998 Au 14.57722 42.41190 12.04275 Au 12.48573 42.65391 4.772793 Au 14.77504 44.40323 4.195756 Au 15.49195 47.30442 3.868828 Au 14.65879 50.22131 4.238989 Au 11.89998 51.73677 13.02103 Au 9.760904 52.74146 9.673857 Au 16.49803 51.11256 7.631896 Au 17.77606 46.77154 7.048785 Au 17.01091 44.05127 7.385717 Au 13.47401 41.18441 8.539792 Au 19.09392 50.45867 8.805979 Au 16.34489 41.25422 6.396001 Au 8.764208 52.95030 12.43577 Au 19.19389 44.71214 9.179713 Au 12.90908 40.06105 12.05204 Au 18.63484 48.82046 5.124732 H 4.807415 53.32843 3.283519 H 5.544020 51.02180 3.944466 H 7.111856 53.11485 -0.3019980 H 7.903290 50.81113 0.3021078 H 21.76348 44.20308 13.67560 H 19.30601 43.74244 13.86271 H 21.41991 47.42860 16.44698 H 18.91790 47.20725 16.43541 H 13.49565 45.79691 20.65283 H 13.15094 43.87420 19.09626 H 15.01673 48.19849 17.47157 H 14.70433 46.30693 15.85898 H 8.947954 43.64500 20.24800 H 10.82748 44.83728 19.09949 H 6.107174 46.27406 18.44740 H 7.949561 47.47427 17.23761 H 11.17027 54.06174 18.69563 H 12.31038 53.00069 16.72357 H 10.72426 50.24492 20.55317 H 12.01201 49.15220 18.70222 H 15.23519 57.72142 15.35885 H 15.92588 55.32470 15.51196 H 13.27532 56.99253 11.63940 H 13.99537 54.58651 11.70086 H 1.374198 49.62978 8.759595 H 3.061117 47.88890 8.112648 H 1.297843 51.13419 4.763409 H 2.878999 49.33641 4.023620 H 14.65571 39.86174 17.56305 H 15.84313 41.45267 16.03601 H 14.38110 36.95850 14.44924 H 15.57469 38.49433 12.87863 H 7.622866 39.36280 13.95220 H 8.860794 41.54272 13.95073 H 6.247428 40.07960 17.92913 H 7.435246 42.28933 17.97958 H 5.338098 49.55697 18.61328 H 7.771235 49.63237 18.01039 H 4.425679 52.39452 15.55522 H 6.842557 52.51338 14.90876 H 0.6196718 42.79161 6.445351 H 2.679022 43.84003 7.413918 H 2.992915 40.01233 4.246199 H 5.095037 40.98361 5.222463 H 4.012844 55.53096 5.078485 H 3.609277 53.10203 5.500629 H 7.620262 55.39634 7.344577 H 7.295382 52.94595 7.775090 H 9.824409 39.03170 17.97456 H 11.29646 40.71163 16.85593 H 9.864709 36.57100 14.48978 H 11.21722 38.28912 13.26340 H 5.437677 43.43853 15.65431 H 6.531059 45.26052 14.31818 H 3.093570 46.31846 17.75499 H 4.140304 48.17976 16.46029 H 12.18390 56.43048 16.98489 H 12.62786 55.19342 14.86033 H 8.037093 55.46783 16.75403 H 8.410931 54.22874 14.60840 H 13.13171 57.84440 5.041563 H 12.66309 56.74504 7.243469 H 17.24083 56.72539 5.290689 H 16.81367 55.50248 7.437193 H 4.254252 55.09414 8.942888 H 5.761630 53.19800 9.568656 H 3.850711 56.19806 13.04390 H 5.435155 54.37375 13.72836 H 3.049904 45.24752 14.55575 H 4.584222 45.99009 12.71695 H -0.1430433 44.73855 11.77174 H 1.379107 45.35863 9.868101 H 20.35615 39.48905 9.242426 H 18.39274 40.42902 8.012383 H 22.93876 42.34871 7.413601 H 21.05530 43.19987 6.003627 H 10.58303 35.37299 10.06692 H 11.22911 37.71835 10.66483 H 6.447533 36.34698 10.48798 H 7.033407 38.68354 11.18102 H 2.341136 52.56484 14.64765 H 3.775532 50.71114 13.76248 H 1.295664 53.72580 10.68154 H 2.560265 51.77689 9.740764 H 18.33030 39.77518 10.46323 H 16.03148 40.55112 9.833043 H 16.82802 35.87520 11.31474 H 14.49689 36.61145 10.78742 H 19.25275 53.32843 14.58141 H 18.51615 51.02180 13.92046 H 16.94831 53.11485 18.16692 H 16.15688 50.81113 17.56282 H 2.296693 44.20308 4.189324 H 4.754158 43.74244 4.002218 H 2.640261 47.42860 1.417947 H 5.142265 47.20725 1.429515 H 10.56452 45.79691 -2.787904 H 10.90923 43.87420 -1.231333 H 9.043443 48.19849 0.3933618 H 9.355838 46.30693 2.005945 H 15.11222 43.64500 -2.383070 H 13.23269 44.83727 -1.234568 H 17.95299 46.27406 -0.5824742 H 16.11061 47.47427 0.6273158 H 12.88990 54.06174 -0.8307063 H 11.74979 53.00069 1.141355 H 13.33591 50.24492 -2.688242 H 12.04816 49.15220 -0.8372912 H 8.824980 57.72142 2.506080 H 8.134295 55.32470 2.352964 H 10.78485 56.99253 6.225528 H 10.06480 54.58651 6.164064 H 22.68597 49.62978 9.105330 H 20.99905 47.88890 9.752278 H 22.76233 51.13419 13.10152 H 21.18117 49.33641 13.84130 H 9.404456 39.86174 0.3018713 H 8.217037 41.45267 1.828918 H 9.679070 36.95850 3.415682 H 8.485483 38.49433 4.986292 H 16.43731 39.36281 3.912726 H 15.19937 41.54272 3.914199 H 17.81274 40.07960 -6.4209402E-02 H 16.62492 42.28933 -0.1146565 H 18.72207 49.55698 -0.7483597 H 16.28893 49.63236 -0.1454588 H 19.63449 52.39452 2.309703 H 17.21761 52.51338 2.956168 H 23.44050 42.79161 11.41957 H 21.38115 43.84003 10.45101 H 21.06726 40.01233 13.61873 H 18.96513 40.98361 12.64246 H 20.04733 55.53096 12.78644 H 20.45089 53.10203 12.36430 H 16.43991 55.39634 10.52035 H 16.76479 52.94595 10.08984 H 14.23576 39.03170 -0.1096368 H 12.76371 40.71163 1.008989 H 14.19546 36.57100 3.375142 H 12.84295 38.28912 4.601525 H 18.62249 43.43853 2.210609 H 17.52911 45.26052 3.546743 H 20.96660 46.31846 0.1099375 H 19.91986 48.17976 1.404641 H 11.87626 56.43048 0.8800398 H 11.43231 55.19342 3.004598 H 16.02308 55.46783 1.110897 H 15.64924 54.22874 3.256528 H 10.92846 57.84440 12.82336 H 11.39708 56.74504 10.62146 H 6.819343 56.72539 12.57424 H 7.246497 55.50248 10.42773 H 19.80592 55.09414 8.922037 H 18.29854 53.19800 8.296268 H 20.20946 56.19806 4.821026 H 18.62501 54.37375 4.136564 H 21.01026 45.24752 3.309174 H 19.47595 45.99009 5.147980 H 24.20321 44.73855 6.093186 H 22.68106 45.35863 7.996823 H 3.704020 39.48905 8.622498 H 5.667424 40.42902 9.852545 H 1.121410 42.34871 10.45133 H 3.004869 43.19987 11.86130 H 13.47714 35.37299 7.798008 H 12.83106 37.71835 7.200102 H 17.61264 36.34698 7.376952 H 17.02676 38.68354 6.683907 H 21.71903 52.56484 3.217275 H 20.28464 50.71114 4.102443 H 22.76451 53.72580 7.183387 H 21.49990 51.77689 8.124163 H 5.729867 39.77518 7.401697 H 8.028692 40.55112 8.031885 H 7.232148 35.87520 6.550187 H 9.563281 36.61145 7.077501 H 4.499304 56.23379 1.504695 H 24.65897 47.12344 15.46193 H 14.99654 49.99681 20.34553 H 5.794437 42.83582 21.29505 H 9.144341 52.59243 22.38581 H 13.78954 60.58594 14.27533 H -0.7067649 52.33116 8.563978 H 12.87919 35.76204 17.26571 H 5.780760 36.68412 15.06190 H 1.590175 51.61205 17.33576 H -1.379933 40.88758 4.430267 H 7.083802 58.60372 6.048079 H 7.928782 35.11981 16.96508 H 2.340572 43.23471 18.89421 H 10.54478 57.62843 19.76219 H 14.85138 59.13754 2.367003 H 2.107297 58.37844 11.07371 H 0.3163746 44.07811 16.17772 H 23.62733 38.98650 10.27527 H 6.807898 33.02697 9.738495 H 0.3912354 55.38048 14.72651 H 20.13347 36.04847 12.14573 H 18.40709 56.15669 17.93504 H -0.7494243 45.79179 3.813842 H 9.063624 49.99681 -2.480599 H 18.26573 42.83582 -3.430121 H 14.55518 54.38012 -3.798434 H 11.18572 60.29335 5.276391 H 24.76693 52.33115 9.300949 H 11.18097 35.76204 0.5992205 H 18.27941 36.68411 2.803026 H 22.05827 50.17164 -0.7169890 H 24.36220 39.52133 14.31545 H 16.97637 58.60372 11.81685 H 16.02653 36.22573 -0.6955962 H 20.62875 41.88702 -0.1458736 H 15.41490 57.29078 -1.727784 H 7.351008 58.58350 15.40522 H 21.74837 57.89884 8.661409 H 23.74380 44.07811 1.687206 H 0.4328496 38.98650 7.589652 H 17.25227 33.02698 8.126433 H 24.25922 55.81340 4.940309 H 3.162973 37.69257 6.420479 gpaw-24.1.0/doc/devel/Au_cluster/Au_cluster.py000066400000000000000000000021561454550013000211670ustar00rootroot00000000000000from ase.io.xyz import read_xyz from gpaw import GPAW, Mixer, ConvergenceError, RMMDIIS, setup_paths # Use setups from the $PWD and $PWD/.. first setup_paths.insert(0, '.') setup_paths.insert(0, '../') atoms = read_xyz('../Au102_revised.xyz') prefix = 'Au_cluster' L = 32.0 atoms.set_cell((L, L, L), scale_atoms=False) atoms.center() atoms.set_pbc(1) r = [1, 1, 1] atoms = atoms.repeat(r) n = [240 * ri for ri in r] # nbands (>=1683) is the number of bands per cluster nbands = 3 * 6 * 6 * 16 # 1728 for ri in r: nbands = nbands * ri mixer = Mixer(beta=0.1, nmaxold=5, weight=100.0) # the next line decrease memory usage es = RMMDIIS(keep_htpsit=False) lcao = False # toggle this to use lcao/sz if lcao: mode_kwargs = dict(mode='lcao', basis='sz') else: mode_kwargs = dict(mode='fd') calc = GPAW(**mode_kwargs, nbands=nbands, gpts=tuple(n), maxiter=5, width=0.1, xc='LDA', mixer=mixer, eigensolver=es, txt=prefix + '.txt') atoms.calc = calc try: pot = atoms.get_potential_energy() except ConvergenceError: pass gpaw-24.1.0/doc/devel/Au_cluster/Au_cluster.txt_band_4x8x16x8000066400000000000000000001270511454550013000236660ustar00rootroot00000000000000 ___ ___ ___ _ _ _ | | |_ | | | | | | | | | . | | | | |__ | _|___|_____| 0.7 |___|_| User: ???@ion-R33-9 Date: Tue Apr 6 17:48:27 2010 Arch: BGP Pid: 100 Dir: /gpaw/lib/python2.6/site-packages/gpaw ase: /gpaw/lib/python2.6/site-packages/ase version: 3.3.1 numpy: /gpaw/lib/python2.6/site-packages/numpy units: Angstrom and eV Extra parameters: {'blacs': 1} **NOTE**: please start using occupations=FermiDirac(width). Memory estimate --------------- Calculator 485.91 MiB Initial overhead 379.02 MiB Density 19.48 MiB Arrays 5.36 MiB Localized functions 6.17 MiB Mixer 2.06 MiB Interpolator 5.90 MiB Hamiltonian 24.84 MiB Arrays 3.50 MiB Restrictor 3.67 MiB XC 3D grid 1.65 MiB Poisson 15.55 MiB vbar 0.47 MiB Wavefunctions 62.57 MiB Arrays psit_nG 44.49 MiB Eigensolver 1.04 MiB Projectors 0.90 MiB Overlap op 15.75 MiB Kinetic operator 0.38 MiB Positions: 0 O 8.9496 24.5522 8.6269 1 C 9.8369 23.8824 7.8297 2 O 10.3147 24.4127 6.7962 3 C 10.2586 22.5069 8.1973 4 C 9.7956 21.9548 9.3535 5 C 10.1886 20.6691 9.7194 6 C 11.0905 21.8361 7.3423 7 C 11.5174 20.5522 7.6624 8 C 11.0266 19.9861 8.8472 9 S 11.5885 18.3508 9.2813 10 O 28.1333 14.7301 20.7426 11 C 27.4474 15.3375 21.6018 12 O 28.0125 16.3029 22.3892 13 C 25.9869 15.0849 21.6935 14 C 25.4359 14.0366 21.0104 15 C 24.0669 13.7800 21.1043 16 C 25.2389 15.8691 22.5314 17 C 23.8539 15.7056 22.5825 18 C 23.2969 14.6329 21.8936 19 S 21.5139 14.4397 21.9176 20 O 18.3160 17.3324 27.9612 21 C 18.7254 17.4155 26.7768 22 O 19.2643 18.5780 26.2976 23 C 18.5259 16.2810 25.8396 24 C 18.0389 15.0989 26.3016 25 C 17.8379 14.0267 25.4416 26 C 18.8919 16.4439 24.5216 27 C 18.7110 15.3967 23.6194 28 C 18.1409 14.2187 24.0835 29 S 17.8458 12.8725 22.9494 30 O 10.9413 12.3160 27.5998 31 C 10.6709 13.4464 26.8783 32 O 9.5007 13.8962 26.8030 33 C 11.7619 14.1215 26.1305 34 C 13.0471 13.6858 26.2984 35 C 14.0929 14.3170 25.6295 36 C 11.4548 15.1530 25.2865 37 C 12.4740 15.8039 24.5866 38 C 13.7730 15.3246 24.7206 39 S 15.1059 16.1757 23.8766 40 O 14.3510 23.2593 27.5755 41 C 14.4611 22.0092 27.5230 42 O 13.8989 21.1920 28.4650 43 C 15.2230 21.3887 26.4095 44 C 15.6558 22.1753 25.3855 45 C 16.3278 21.5993 24.3086 46 C 15.4270 20.0367 26.4385 47 C 16.1389 19.4259 25.4144 48 C 16.6179 20.2392 24.3836 49 S 17.5699 19.4916 23.0916 50 O 18.4673 28.9006 21.1980 51 C 18.0899 28.0627 20.1847 52 O 17.4109 28.4998 19.2227 53 C 18.5108 26.6390 20.2204 54 C 19.2527 26.2080 21.2866 55 C 19.6400 24.8798 21.3773 56 C 18.1617 25.8042 19.1985 57 C 18.5459 24.4610 19.2353 58 C 19.2469 24.0188 20.3483 59 S 19.8728 22.3550 20.3833 60 O 4.2414 20.7900 15.2209 61 C 4.6011 20.6819 13.9054 62 O 4.1339 21.4606 13.0378 63 C 5.5798 19.6412 13.5003 64 C 6.0428 18.7621 14.4404 65 C 6.9748 17.7889 14.0863 66 C 5.9855 19.5958 12.1955 67 C 6.8846 18.6161 11.7853 68 C 7.3897 17.7487 12.7504 69 S 8.5597 16.4991 12.2516 70 O 17.6672 7.0448 24.8204 71 C 18.0098 6.6418 23.6811 72 O 17.6399 5.4019 23.2371 73 C 18.7169 7.5728 22.7654 74 C 19.1450 8.7805 23.2313 75 C 19.8129 9.6676 22.3906 76 C 18.9917 7.1516 21.4895 77 C 19.6580 8.0009 20.6163 78 C 20.0359 9.2610 21.0754 79 S 20.6879 10.4385 19.9024 80 O 10.5943 6.6875 21.5703 81 C 10.5284 7.5054 22.6648 82 O 9.8824 7.1683 23.6878 83 C 11.2079 8.8258 22.6575 84 C 11.9027 9.2075 21.5445 85 C 12.5768 10.4257 21.5313 86 C 11.1279 9.6118 23.7727 87 C 11.7769 10.8439 23.8034 88 C 12.4750 11.2337 22.6565 89 S 13.1910 12.8649 22.6104 90 O 7.3001 19.2989 25.0933 91 C 7.6688 20.1334 24.2300 92 O 6.7696 20.9836 23.6470 93 C 9.0959 20.1630 23.8205 94 C 9.9900 19.3858 24.5033 95 C 11.3368 19.3997 24.1564 96 C 9.4760 20.9769 22.7894 97 C 10.8109 21.0240 22.4074 98 C 11.7049 20.1770 23.0545 99 S 13.4190 20.2217 22.5753 100 O 3.5939 10.5821 11.6508 101 C 4.8082 10.0108 11.3849 102 O 4.9004 9.0031 10.6409 103 C 6.0326 10.5560 12.0243 104 C 5.9166 11.5956 12.9004 105 C 7.0536 12.1616 13.4675 106 C 7.2455 10.0336 11.6735 107 C 8.4167 10.5560 12.2273 108 C 8.2845 11.5769 13.1655 109 S 9.7177 12.0755 14.0945 110 O 9.4876 26.7892 11.9048 111 C 10.3502 26.1941 12.5972 112 O 11.4520 26.8664 13.0505 113 C 10.1597 24.7483 12.8776 114 C 9.0376 24.1422 12.3773 115 C 8.8067 22.7931 12.6103 116 C 11.0588 24.0683 13.6404 117 C 10.8766 22.7052 13.8934 118 C 9.7467 22.0909 13.3674 119 S 9.4997 20.3381 13.5932 120 O 12.9263 6.1540 24.8934 121 C 13.2034 5.9605 23.6837 122 O 12.7786 4.8261 23.0479 123 C 14.0638 6.9456 22.9805 124 C 14.4549 8.0696 23.6526 125 C 15.2591 9.0138 23.0334 126 C 14.4588 6.6984 21.6964 127 C 15.2299 7.6414 21.0186 128 C 15.6149 8.7857 21.7003 129 S 16.7168 9.9509 20.9246 130 O 8.2575 11.7304 24.1396 131 C 7.8999 12.9261 24.2804 132 O 6.9545 13.2660 25.2089 133 C 8.5690 14.0017 23.5054 134 C 9.4860 13.6805 22.5485 135 C 10.0958 14.6911 21.8004 136 C 8.1741 15.2967 23.7233 137 C 8.7491 16.3339 23.0073 138 C 9.7320 16.0046 22.0633 139 S 10.7009 17.3328 21.3884 140 O 15.2152 26.4013 25.6714 141 C 14.1630 25.8819 24.9681 142 O 12.9970 25.9802 25.4243 143 C 14.4008 25.1684 23.6876 144 C 15.6719 25.1172 23.1756 145 C 15.9259 24.4249 21.9976 146 C 13.3518 24.5820 23.0466 147 C 13.5557 23.8798 21.8565 148 C 14.8579 23.7791 21.3716 149 S 15.1229 23.0473 19.7573 150 O 18.7761 27.9232 9.8809 151 C 19.7890 27.2717 10.5298 152 O 20.9438 27.2492 10.0363 153 C 19.5156 26.5413 11.7935 154 C 18.2447 26.5401 12.3023 155 C 17.9716 25.9084 13.5153 156 C 20.5406 25.9002 12.4334 157 C 20.3108 25.2219 13.6325 158 C 19.0167 25.2370 14.1485 159 S 18.6496 24.2213 15.5673 160 O 7.2160 26.2855 16.0714 161 C 7.4678 26.0761 17.2839 162 O 6.9422 26.8922 18.2478 163 C 8.3278 24.9322 17.6804 164 C 8.8177 24.1090 16.7046 165 C 9.6417 23.0461 17.0483 166 C 8.6026 24.7350 19.0016 167 C 9.4497 23.6989 19.3936 168 C 9.9096 22.8379 18.3996 169 S 10.8866 21.4260 18.8655 170 O 5.3392 13.6178 22.3230 171 C 4.8488 13.6274 21.0460 172 O 3.6752 13.2502 20.8054 173 C 5.7168 14.1110 19.9424 174 C 6.9978 14.4828 20.2343 175 C 7.8500 14.9150 19.2203 176 C 5.2128 14.1820 18.6726 177 C 6.0397 14.5741 17.6164 178 C 7.3279 14.9930 17.9283 179 S 8.3638 15.6346 16.6285 180 O 27.0647 8.3764 16.5869 181 C 27.2060 9.4855 15.7986 182 O 28.3205 10.0553 15.6947 183 C 26.0468 10.0336 15.0496 184 C 24.7997 9.5106 15.2514 185 C 23.7046 10.0266 14.5665 186 C 26.2498 11.0935 14.2074 187 C 25.1976 11.6025 13.4553 188 C 23.9248 11.0726 13.6733 189 S 22.5658 11.6968 12.7064 190 O 13.3181 2.8098 16.1343 191 C 12.4360 3.5734 16.5994 192 O 11.1434 3.1507 16.7482 193 C 12.7987 4.9648 16.9704 194 C 14.1199 5.3103 17.0104 195 C 14.4837 6.6094 17.3456 196 C 11.8017 5.8537 17.2584 197 C 12.1199 7.1568 17.6253 198 C 13.4666 7.5128 17.6396 199 S 13.9109 9.1790 18.0795 200 O 5.2639 23.7908 21.2952 201 C 5.2738 23.5430 19.9499 202 O 4.6021 24.2569 19.1645 203 C 6.0539 22.3981 19.4154 204 C 6.7188 21.5830 20.2815 205 C 7.4877 20.5289 19.7995 206 C 6.1129 22.2183 18.0623 207 C 6.8226 21.1432 17.5335 208 C 7.4738 20.2962 18.4246 209 S 8.3658 18.8948 17.7815 210 O 24.3222 7.2969 17.8622 211 C 23.3342 6.5429 18.0436 212 O 23.4817 5.3540 18.7040 213 C 21.9697 7.0026 17.6804 214 C 21.7846 8.2848 17.2466 215 C 20.5067 8.7205 16.8943 216 C 20.9389 6.1056 17.7343 217 C 19.6438 6.5023 17.4195 218 C 19.4508 7.8165 16.9986 219 S 17.7839 8.3878 16.7303 220 Au 17.6679 16.3048 19.8974 221 Au 15.9037 16.3368 14.2653 222 Au 15.3538 18.7650 12.8583 223 Au 13.1948 16.9738 13.5075 224 Au 14.1376 14.2966 13.2156 225 Au 16.8736 14.4246 12.3913 226 Au 15.0618 17.2019 19.1703 227 Au 16.3367 18.7749 15.6445 228 Au 14.1457 17.0797 16.3223 229 Au 14.9939 14.3577 16.0504 230 Au 13.2335 18.4492 11.0553 231 Au 20.3660 15.4729 19.9614 232 Au 14.6737 13.6538 10.6055 233 Au 15.4308 15.5887 21.4806 234 Au 16.1827 18.5027 10.1124 235 Au 13.6247 19.5702 14.9385 236 Au 12.2707 15.1058 15.3783 237 Au 16.7790 12.2390 17.1022 238 Au 13.1619 15.1739 18.2063 239 Au 14.5736 19.5981 17.7536 240 Au 18.4838 21.0548 21.6925 241 Au 10.1377 17.5358 10.9213 242 Au 11.6854 12.4787 10.4076 243 Au 15.5208 12.8027 22.6804 244 Au 14.2499 18.1362 23.0666 245 Au 16.7868 21.2398 16.9654 246 Au 11.5068 17.8517 15.6834 247 Au 13.1856 12.4327 15.1693 248 Au 14.0196 12.4560 17.8115 249 Au 12.3857 17.9070 18.3346 250 Au 13.6857 20.9460 12.5803 251 Au 11.4745 19.1059 13.1684 252 Au 10.4577 16.2216 13.6725 253 Au 11.4607 13.5020 13.1473 254 Au 13.7895 11.6054 12.5345 255 Au 15.8809 11.8475 19.8045 256 Au 13.5916 13.5968 20.3815 257 Au 12.8747 16.4980 20.7085 258 Au 13.7079 19.4149 20.3383 259 Au 16.4667 20.9303 11.5563 260 Au 18.6058 21.9350 14.9034 261 Au 11.8686 20.3061 16.9454 262 Au 10.5906 15.9651 17.5285 263 Au 11.3558 13.2448 17.1916 264 Au 14.8927 10.3780 16.0375 265 Au 16.3367 23.9560 15.6445 266 Au 9.2728 19.6522 15.7713 267 Au 12.0218 10.4478 18.1813 268 Au 19.6025 22.1438 12.1415 269 Au 9.1728 13.9057 15.3976 270 Au 15.4576 9.2546 12.5253 271 Au 9.7318 18.0140 19.4526 272 O 23.6510 24.4973 22.7280 273 C 22.8363 23.8824 23.4600 274 O 22.3158 24.4600 24.5856 275 C 22.4146 22.5069 23.0924 276 C 22.8776 21.9548 21.9361 277 C 22.4845 20.6691 21.5703 278 C 21.5827 21.8361 23.9474 279 C 21.1558 20.5522 23.6272 280 C 21.6466 19.9861 22.4424 281 S 21.0847 18.3508 22.0083 282 O 4.4787 14.6760 10.6237 283 C 5.2258 15.3375 9.6879 284 O 4.7069 16.2239 8.9649 285 C 6.6863 15.0849 9.5961 286 C 7.2373 14.0366 10.2793 287 C 8.6063 13.7800 10.1853 288 C 7.4342 15.8691 8.7583 289 C 8.8193 15.7056 8.7072 290 C 9.3762 14.6329 9.3960 291 S 11.1593 14.4397 9.3721 292 O 14.3572 17.3324 3.3284 293 C 13.9477 17.4155 4.5129 294 O 13.4089 18.5780 4.9921 295 C 14.1472 16.2810 5.4500 296 C 14.6342 15.0989 4.9880 297 C 14.8353 14.0267 5.8481 298 C 13.7812 16.4439 6.7681 299 C 13.9622 15.3967 7.6703 300 C 14.5323 14.2187 7.2062 301 S 14.8273 12.8725 8.3402 302 O 21.7319 12.3160 3.6899 303 C 22.0022 13.4464 4.4113 304 O 23.1725 13.8962 4.4867 305 C 20.9113 14.1215 5.1592 306 C 19.6261 13.6858 4.9913 307 C 18.5803 14.3170 5.6601 308 C 21.2184 15.1530 6.0031 309 C 20.1992 15.8039 6.7031 310 C 18.9002 15.3246 6.5691 311 S 17.5673 16.1757 7.4130 312 O 18.3320 23.3708 3.7094 313 C 18.2121 22.0092 3.7667 314 O 18.7283 21.2589 2.9018 315 C 17.4502 21.3887 4.8801 316 C 17.0173 22.1753 5.9042 317 C 16.3454 21.5993 6.9811 318 C 17.2462 20.0367 4.8512 319 C 16.5343 19.4259 5.8752 320 C 16.0553 20.2392 6.9060 321 S 15.1033 19.4916 8.1980 322 O 14.2368 28.8320 10.1746 323 C 14.5833 28.0627 11.1050 324 O 15.3228 28.5388 12.1528 325 C 14.1624 26.6390 11.0693 326 C 13.4204 26.2080 10.0031 327 C 13.0331 24.8798 9.9123 328 C 14.5115 25.8042 12.0911 329 C 14.1273 24.4610 12.0543 330 C 13.4262 24.0188 10.9414 331 S 12.8003 22.3550 10.9063 332 O 28.4318 20.7900 16.0688 333 C 28.0721 20.6819 17.3842 334 O 28.5393 21.4606 18.2519 335 C 27.0933 19.6412 17.7893 336 C 26.6304 18.7621 16.8493 337 C 25.6984 17.7889 17.2034 338 C 26.6876 19.5958 19.0942 339 C 25.7886 18.6161 19.5044 340 C 25.2835 17.7487 18.5393 341 S 24.1135 16.4991 19.0381 342 O 15.0059 7.0448 6.4693 343 C 14.6634 6.6418 7.6085 344 O 15.0332 5.4019 8.0526 345 C 13.9562 7.5728 8.5242 346 C 13.5282 8.7805 8.0583 347 C 12.8603 9.6676 8.8990 348 C 13.6815 7.1516 9.8001 349 C 13.0152 8.0009 10.6734 350 C 12.6373 9.2610 10.2143 351 S 11.9853 10.4385 11.3873 352 O 22.0789 6.6875 9.7194 353 C 22.1447 7.5054 8.6249 354 O 22.7908 7.1683 7.6019 355 C 21.4652 8.8258 8.6321 356 C 20.7704 9.2075 9.7451 357 C 20.0964 10.4257 9.7583 358 C 21.5453 9.6118 7.5170 359 C 20.8962 10.8439 7.4863 360 C 20.1982 11.2337 8.6332 361 S 19.4821 12.8649 8.6793 362 O 25.4059 19.2245 6.1194 363 C 25.0044 20.1334 7.0597 364 O 25.8300 20.9140 7.5950 365 C 23.5773 20.1630 7.4691 366 C 22.6832 19.3858 6.7863 367 C 21.3364 19.3997 7.1333 368 C 23.1972 20.9769 8.5003 369 C 21.8623 21.0240 8.8822 370 C 20.9683 20.1770 8.2352 371 S 19.2542 20.2217 8.7143 372 O 28.9798 10.5353 19.6607 373 C 27.8650 10.0108 19.9048 374 O 27.7646 8.9132 20.7151 375 C 26.6406 10.5560 19.2653 376 C 26.7565 11.5956 18.3892 377 C 25.6196 12.1616 17.8222 378 C 25.4276 10.0336 19.6162 379 C 24.2565 10.5560 19.0624 380 C 24.3887 11.5769 18.1241 381 S 22.9555 12.0755 17.1951 382 O 23.1855 26.7892 19.3849 383 C 22.3230 26.1941 18.6924 384 O 21.2211 26.8664 18.2392 385 C 22.5135 24.7483 18.4121 386 C 23.6355 24.1422 18.9123 387 C 23.8665 22.7931 18.6794 388 C 21.6144 24.0683 17.6493 389 C 21.7966 22.7052 17.3963 390 C 22.9265 22.0909 17.9222 391 S 23.1735 20.3381 17.6964 392 O 19.7716 6.1713 6.2884 393 C 19.4698 5.9605 7.6060 394 O 19.8598 4.9190 8.1897 395 C 18.6094 6.9456 8.3091 396 C 18.2183 8.0696 7.6371 397 C 17.4141 9.0138 8.2563 398 C 18.2144 6.6984 9.5933 399 C 17.4433 7.6414 10.2711 400 C 17.0583 8.7857 9.5893 401 S 15.9564 9.9509 10.3650 402 O 24.3838 11.6238 7.1626 403 C 24.7733 12.9261 7.0093 404 O 25.6413 13.2382 6.1568 405 C 24.1041 14.0017 7.7843 406 C 23.1872 13.6805 8.7411 407 C 22.5774 14.6911 9.4893 408 C 24.4991 15.2967 7.5663 409 C 23.9241 16.3339 8.2823 410 C 22.9412 16.0046 9.2263 411 S 21.9723 17.3328 9.9013 412 O 17.5441 26.3587 5.6759 413 C 18.5102 25.8819 6.3216 414 O 19.7802 25.9890 5.8246 415 C 18.2724 25.1684 7.6020 416 C 17.0013 25.1172 8.1141 417 C 16.7473 24.4249 9.2921 418 C 19.3214 24.5820 8.2430 419 C 19.1174 23.8798 9.4332 420 C 17.8153 23.7791 9.9181 421 S 17.5503 23.0473 11.5323 422 O 13.8141 27.8698 21.3557 423 C 12.8842 27.2717 20.7599 424 O 11.6264 27.2472 21.2973 425 C 13.1576 26.5413 19.4961 426 C 14.4284 26.5401 18.9874 427 C 14.7016 25.9084 17.7743 428 C 12.1325 25.9002 18.8562 429 C 12.3624 25.2219 17.6571 430 C 13.6564 25.2370 17.1412 431 S 14.0235 24.2213 15.7224 432 O 25.4796 26.3041 15.3264 433 C 25.2054 26.0761 14.0057 434 O 25.6880 26.8254 13.1207 435 C 24.3454 24.9322 13.6093 436 C 23.8555 24.1090 14.5851 437 C 23.0314 23.0461 14.2414 438 C 24.0705 24.7350 12.2880 439 C 23.2235 23.6989 11.8961 440 C 22.7636 22.8379 12.8901 441 S 21.7866 21.4260 12.4241 442 O 27.3340 13.6178 8.9666 443 C 27.8244 13.6274 10.2437 444 O 28.9980 13.2502 10.4843 445 C 26.9564 14.1110 11.3472 446 C 25.6753 14.4828 11.0553 447 C 24.8232 14.9150 12.0693 448 C 27.4604 14.1820 12.6171 449 C 26.6335 14.5741 13.6733 450 C 25.3452 14.9930 13.3613 451 S 24.3094 15.6346 14.6612 452 O 5.6085 8.3764 14.7028 453 C 5.4671 9.4855 15.4911 454 O 4.3526 10.0553 15.5950 455 C 6.6264 10.0336 16.2401 456 C 7.8734 9.5106 16.0382 457 C 8.9686 10.0266 16.7232 458 C 6.4234 11.0935 17.0822 459 C 7.4756 11.6025 17.8344 460 C 8.7484 11.0726 17.6164 461 S 10.1074 11.6968 18.5833 462 O 19.3551 2.8098 15.1553 463 C 20.2372 3.5734 14.6903 464 O 21.5297 3.1507 14.5414 465 C 19.8744 4.9648 14.3193 466 C 18.5533 5.3103 14.2792 467 C 18.1895 6.6094 13.9441 468 C 20.8715 5.8537 14.0313 469 C 20.5533 7.1568 13.6643 470 C 19.2066 7.5128 13.6500 471 S 18.7623 9.1790 13.2102 472 O 27.4085 23.7706 10.1046 473 C 27.3994 23.5430 11.3397 474 O 28.1309 24.3205 12.1952 475 C 26.6193 22.3981 11.8743 476 C 25.9544 21.5830 11.0082 477 C 25.1855 20.5289 11.4902 478 C 26.5603 22.2183 13.2274 479 C 25.8505 21.1432 13.7562 480 C 25.1994 20.2962 12.8650 481 S 24.3074 18.8948 13.5082 482 O 8.2629 7.3641 13.4437 483 C 9.3390 6.5429 13.2461 484 O 9.2035 5.4513 12.6398 485 C 10.7035 7.0026 13.6093 486 C 10.8886 8.2848 14.0431 487 C 12.1664 8.7205 14.3954 488 C 11.7343 6.1056 13.5554 489 C 13.0294 6.5023 13.8701 490 C 13.2223 7.8165 14.2910 491 S 14.8893 8.3878 14.5594 492 Au 15.0053 16.3048 11.3923 493 Au 16.7695 16.3368 17.0244 494 Au 17.3194 18.7650 18.4314 495 Au 19.4784 16.9738 17.7822 496 Au 18.5356 14.2966 18.0741 497 Au 15.7996 14.4246 18.8984 498 Au 17.6114 17.2019 12.1194 499 Au 18.5275 17.0797 14.9674 500 Au 17.6793 14.3577 15.2393 501 Au 19.4396 18.4492 20.2343 502 Au 12.3072 15.4729 11.3283 503 Au 17.9995 13.6538 20.6842 504 Au 17.2424 15.5887 9.8091 505 Au 16.4904 18.5027 21.1772 506 Au 19.0485 19.5702 16.3512 507 Au 20.4025 15.1058 15.9114 508 Au 15.8942 12.2390 14.1874 509 Au 19.5113 15.1739 13.0834 510 Au 18.0995 19.5981 13.5361 511 Au 14.1894 21.0548 9.5972 512 Au 22.5355 17.5358 20.3683 513 Au 20.9878 12.4787 20.8821 514 Au 17.1524 12.8027 8.6093 515 Au 18.4232 18.1362 8.2230 516 Au 15.8864 21.2398 14.3243 517 Au 21.1664 17.8517 15.6062 518 Au 19.4876 12.4327 16.1204 519 Au 18.6535 12.4560 13.4782 520 Au 20.2875 17.9070 12.9551 521 Au 18.9875 20.9460 18.7094 522 Au 21.1987 19.1059 18.1213 523 Au 22.2155 16.2216 17.6171 524 Au 21.2125 13.5020 18.1423 525 Au 18.8837 11.6054 18.7551 526 Au 16.7922 11.8475 11.4852 527 Au 19.0815 13.5968 10.9081 528 Au 19.7985 16.4980 10.5812 529 Au 18.9653 19.4149 10.9514 530 Au 16.2065 20.9303 19.7334 531 Au 14.0674 21.9350 16.3862 532 Au 20.8045 20.3061 14.3443 533 Au 22.0826 15.9651 13.7612 534 Au 21.3174 13.2448 14.0981 535 Au 17.7805 10.3780 15.2522 536 Au 23.4004 19.6522 15.5183 537 Au 20.6514 10.4478 13.1084 538 Au 13.0707 22.1438 19.1481 539 Au 23.5004 13.9057 15.8921 540 Au 17.2156 9.2546 18.7644 541 Au 22.9413 18.0140 11.8371 542 H 9.1139 22.5220 9.9959 543 H 9.8505 20.2153 10.6568 544 H 11.4184 22.3084 6.4104 545 H 12.2098 20.0047 7.0145 546 H 26.0700 13.3966 20.3880 547 H 23.6125 12.9360 20.5751 548 H 25.7264 16.6221 23.1593 549 H 23.2244 16.4008 23.1478 550 H 17.8022 14.9905 27.3652 551 H 17.4574 13.0677 25.8086 552 H 19.3232 17.3920 24.1839 553 H 19.0108 15.5005 22.5713 554 H 13.2545 12.8385 26.9604 555 H 15.1340 14.0308 25.8119 556 H 10.4137 15.4676 25.1598 557 H 12.2561 16.6678 23.9500 558 H 15.4768 23.2553 25.4080 559 H 16.6169 22.1942 23.4359 560 H 15.0308 19.4385 27.2655 561 H 16.3185 18.3457 25.4146 562 H 19.5417 26.9150 22.0712 563 H 20.2324 24.5182 22.2243 564 H 17.5818 26.1861 18.3518 565 H 18.3019 23.7801 18.4132 566 H 5.6807 18.8233 15.4720 567 H 7.3676 17.0824 14.8250 568 H 5.6043 20.3277 11.4758 569 H 7.1855 18.5300 10.7360 570 H 18.9622 9.0553 24.2754 571 H 20.1496 10.6462 22.7484 572 H 18.6876 6.1520 21.1616 573 H 19.8812 7.6879 19.5910 574 H 11.9294 8.5563 20.6646 575 H 13.1673 10.7363 20.6631 576 H 10.5539 9.2731 24.6415 577 H 11.7417 11.4829 24.6919 578 H 9.6446 18.7505 25.3256 579 H 12.0777 18.8259 24.7228 580 H 8.7322 21.5881 22.2676 581 H 11.1491 21.7069 21.6211 582 H 4.9262 11.9852 13.1577 583 H 6.9855 13.0336 14.1263 584 H 7.2994 9.2059 10.9586 585 H 9.4015 10.1772 11.9348 586 H 8.3193 24.7245 11.7909 587 H 7.9158 22.2956 12.2130 588 H 11.9268 24.5899 14.0569 589 H 11.6019 22.1395 14.4875 590 H 14.1309 8.2252 24.6869 591 H 15.6030 9.9052 23.5683 592 H 14.1712 5.7645 21.2021 593 H 15.5237 7.4827 19.9758 594 H 9.7442 12.6321 22.3667 595 H 10.8376 14.4541 21.0305 596 H 7.4001 15.5120 24.4674 597 H 8.4468 17.3733 23.1727 598 H 16.4904 25.6240 23.6973 599 H 16.9344 24.3870 21.5727 600 H 12.3436 24.6614 23.4664 601 H 12.7174 23.4223 21.3208 602 H 17.4382 27.0379 11.7539 603 H 16.9696 25.9386 13.9558 604 H 21.5473 25.9189 12.0031 605 H 21.1202 24.6960 14.1496 606 H 8.5608 24.2877 15.6553 607 H 10.0681 22.3915 16.2810 608 H 8.1572 25.3916 19.7563 609 H 9.7417 23.5673 20.4407 610 H 7.3564 14.4411 21.2681 611 H 8.8907 15.1836 19.4293 612 H 4.1635 13.9321 18.4841 613 H 5.6856 14.5522 16.5805 614 H 24.6627 8.6826 15.9548 615 H 22.6992 9.6226 14.7248 616 H 27.2453 11.5423 14.1260 617 H 25.3618 12.3934 12.7160 618 H 14.8895 4.5665 16.7793 619 H 15.5356 6.9119 17.3772 620 H 10.7540 5.5405 17.2003 621 H 11.3399 7.8771 17.8934 622 H 6.6476 21.7584 21.3600 623 H 8.0820 19.9047 20.4748 624 H 5.6022 22.9193 17.3939 625 H 6.8668 20.9704 16.4531 626 H 22.6368 8.9687 17.1756 627 H 20.3380 9.7447 16.5454 628 H 21.1345 5.0687 18.0271 629 H 18.8034 5.8050 17.4998 630 H 23.5593 22.5220 21.2938 631 H 22.8227 20.2153 20.6328 632 H 21.2548 22.3084 24.8793 633 H 20.4634 20.0047 24.2752 634 H 6.6032 13.3966 10.9017 635 H 9.0607 12.9360 10.7146 636 H 6.9468 16.6221 8.1303 637 H 9.4488 16.4008 8.1419 638 H 14.8710 14.9905 3.9245 639 H 15.2157 13.0677 5.4810 640 H 13.3499 17.3920 7.1057 641 H 13.6623 15.5005 8.7183 642 H 19.4187 12.8385 4.3293 643 H 17.5392 14.0308 5.4778 644 H 22.2595 15.4676 6.1299 645 H 20.4171 16.6678 7.3397 646 H 17.1964 23.2553 5.8817 647 H 16.0563 22.1942 7.8537 648 H 17.6424 19.4385 4.0241 649 H 16.3547 18.3457 5.8751 650 H 13.1315 26.9150 9.2184 651 H 12.4408 24.5182 9.0653 652 H 15.0914 26.1861 12.9379 653 H 14.3713 23.7801 12.8764 654 H 26.9925 18.8233 15.8177 655 H 25.3056 17.0824 16.4646 656 H 27.0688 20.3277 19.8139 657 H 25.4877 18.5300 20.5537 658 H 13.7110 9.0553 7.0142 659 H 12.5235 10.6462 8.5413 660 H 13.9856 6.1520 10.1281 661 H 12.7920 7.6879 11.6987 662 H 20.7438 8.5564 10.6251 663 H 19.5059 10.7363 10.6266 664 H 22.1192 9.2731 6.6482 665 H 20.9314 11.4829 6.5977 666 H 23.0286 18.7505 5.9640 667 H 20.5954 18.8259 6.5669 668 H 23.9410 21.5881 9.0221 669 H 21.5241 21.7069 9.6685 670 H 27.7470 11.9852 18.1319 671 H 25.6877 13.0336 17.1634 672 H 25.3738 9.2059 20.3311 673 H 23.2716 10.1772 19.3548 674 H 24.3538 24.7245 19.4988 675 H 24.7574 22.2956 19.0767 676 H 20.7464 24.5899 17.2327 677 H 21.0713 22.1395 16.8022 678 H 18.5423 8.2252 6.6027 679 H 17.0702 9.9052 7.7214 680 H 18.5020 5.7645 10.0875 681 H 17.1495 7.4827 11.3139 682 H 22.9290 12.6321 8.9230 683 H 21.8356 14.4541 10.2591 684 H 25.2731 15.5120 6.8223 685 H 24.2264 17.3733 8.1170 686 H 16.1828 25.6240 7.5924 687 H 15.7388 24.3870 9.7170 688 H 20.3296 24.6614 7.8233 689 H 19.9557 23.4223 9.9689 690 H 15.2350 27.0379 19.5357 691 H 15.7036 25.9386 17.3338 692 H 11.1258 25.9189 19.2866 693 H 11.5530 24.6960 17.1401 694 H 24.1124 24.2877 15.6344 695 H 22.6050 22.3915 15.0086 696 H 24.5160 25.3916 11.5334 697 H 22.9315 23.5673 10.8489 698 H 25.3168 14.4411 10.0215 699 H 23.7825 15.1836 11.8603 700 H 28.5097 13.9321 12.8056 701 H 26.9876 14.5522 14.7092 702 H 8.0105 8.6826 15.3349 703 H 9.9739 9.6226 16.5649 704 H 5.4279 11.5423 17.1637 705 H 7.3114 12.3934 18.5737 706 H 17.7836 4.5665 14.5104 707 H 17.1376 6.9119 13.9125 708 H 21.9191 5.5405 14.0893 709 H 21.3333 7.8771 13.3963 710 H 26.0255 21.7584 9.9296 711 H 24.5911 19.9047 10.8148 712 H 27.0710 22.9193 13.8958 713 H 25.8064 20.9704 14.8365 714 H 10.0364 8.9687 14.1141 715 H 12.3352 9.7447 14.7443 716 H 11.5386 5.0687 13.2626 717 H 13.8698 5.8050 13.7899 718 H 8.8058 25.4273 8.2171 719 H 28.9655 16.3170 22.1743 720 H 19.3030 19.1904 27.0579 721 H 10.1009 12.0294 28.0074 722 H 13.4508 21.7860 29.0982 723 H 18.0960 29.7795 20.9877 724 H 3.5997 21.5247 15.2763 725 H 17.1857 4.9556 23.9781 726 H 10.0873 5.8777 21.7743 727 H 5.8967 20.8056 24.0481 728 H 2.9266 10.0811 11.1426 729 H 11.3903 27.7973 12.7604 730 H 12.2353 4.3134 23.6774 731 H 6.6471 12.4283 25.6066 732 H 14.8513 26.8220 26.4746 733 H 19.1579 28.3311 9.0794 734 H 6.4138 27.5720 17.7861 735 H 4.6229 13.2717 22.8901 736 H 27.9338 8.1800 16.9876 737 H 11.1144 2.2205 16.4509 738 H 4.6977 24.5740 21.4389 739 H 24.4400 5.2420 18.8581 740 H 22.7136 25.3502 24.6474 741 H 3.5571 14.9853 10.5262 742 H 13.3701 19.1904 4.2318 743 H 22.5722 12.0294 3.2822 744 H 18.8617 23.5737 2.9139 745 H 15.4922 29.4869 11.9888 746 H 29.0734 21.5247 16.0133 747 H 15.4875 4.9556 7.3116 748 H 22.5859 5.8777 9.5154 749 H 26.3648 19.3652 5.9954 750 H 28.6687 8.7149 21.0278 751 H 21.2829 27.7973 18.5292 752 H 20.3330 5.4193 6.0168 753 H 24.9353 11.0806 6.5665 754 H 19.7214 26.4843 4.9846 755 H 11.6575 27.7770 22.1176 756 H 26.0549 27.0924 15.3738 757 H 28.0503 13.2717 8.3996 758 H 4.7394 8.1800 14.3020 759 H 21.5588 2.2205 14.8388 760 H 28.5657 25.0069 11.6527 761 H 7.4695 6.8861 13.1328 .------------------------------------------------------------------------------. /| | / | | / | | / | | / | | / | | / | | / | | / | | / | H H | / | O O O | / | OC C | / | C H H | / | H C C OH H O | / | H C C O HOHH C | / | O H O C CHC C O | / | H O H C C C H CC H CHCH CHCC C | / | HO O O CC HC H C CH OC | / | H H HH CHCC C H C HCC SC HuC C C C H O | * | HO OC HHO C CSu HC S CO C H H | | | CCC C C S C C S HH C C | | | O CCH CCHS C Au C AuuH H H CC CC H OH H | | H O HC H C C SSAuH AuAu S AuS Cu C C CC H O | | | H CCHC AHCC H H O H C C O | | O C C O H CAC AuS CuuH Au AuH C S C CH O C | | | C OHHSC C C Au H C COHH Au C H H HC CO O H | | | CO C H Cu C AuC HOuu C H AuuC Au HC CCC | | H| C HC OAu CCH Au HCAuCCu S HAu C CHHAu HOCHHO H | | | C H HO S CCC AuC Au H COCu AHuC CC CC CHC C OH | | | HO CC H H Au H AH AuAuC H Auu AuAuSH C O CH C | | |H C CC Su AuAu Au S Au SHS C C | | | CCC AuH Au Au S Au AuAu C Hu Au CH C O | | | C C H C HC Au Au H S CAuC CC OC HO Au CHHC H | | O C| C H SSC C C HAu AuuHu C HC AuuAuC HC COC O | | H H H CAHCC O Au AuuAu H CCCHC CCHH C H | | H O| C C H C HAS H Auu SCS H C C | | H O H O CC C CH S SuC Au H C O | | O |CC OCCCHHCC OC HAuuH Cu C CCAH C C HH C C O | | H C .--HC--CH---CCH-SAH----HH-----O-C--O-------H----CC-----------------------------. | O/ H O H H C S HS HS OH HC HC OOHH / | / C S H Au CCH CCH C H / | / C C C H O C H C / | / H CC HH C CO O O / | / C H H C HC H / | / H H C HO C O / | / O HO H / | / H O O H / | / H / | / / | / / | / / | / / | / / | / / | / / | / / | / / |/ / *------------------------------------------------------------------------------* Unit Cell: Periodic X Y Z Points Spacing -------------------------------------------------------------------- 1. axis: yes 32.000000 0.000000 0.000000 240 0.1333 2. axis: yes 0.000000 32.000000 0.000000 240 0.1333 3. axis: yes 0.000000 0.000000 32.000000 240 0.1333 Grid-points per volume: 421.87 Effective grid-spacing: 0.1333 O-setup: name : Oxygen id : 5f3f27ba17355653aa2069308cb75aea Z : 8 valence: 6 core : 2 charge : 0.0 file : /soft/apps/gpaw-setups-0.5.3574/O.LDA.gz cutoffs: 0.74(comp), 1.30(filt), 0.83(core), lmax=2 valence states: energy radius 2s(2) -23.752 0.741 2p(4) -9.195 0.741 *s 3.459 0.741 *p 18.016 0.741 *d 0.000 0.741 Using partial waves for O as LCAO basis S-setup: name : Sulfur id : 16df0b8f883bfd770ab5c435bc804428 Z : 16 valence: 6 core : 10 charge : 0.0 file : /soft/apps/gpaw-setups-0.5.3574/S.LDA.gz cutoffs: 0.85(comp), 1.49(filt), 1.66(core), lmax=2 valence states: energy radius 3s(2) -17.278 0.847 3p(4) -7.106 0.847 *s 9.933 0.847 *p 20.105 0.847 *d 0.000 0.847 Using partial waves for S as LCAO basis C-setup: name : Carbon id : d60576a1f549371a163e72552ca58787 Z : 6 valence: 4 core : 2 charge : 0.0 file : /soft/apps/gpaw-setups-0.5.3574/C.LDA.gz cutoffs: 0.64(comp), 1.14(filt), 1.14(core), lmax=2 valence states: energy radius 2s(2) -13.639 0.635 2p(2) -5.414 0.635 *s 13.573 0.635 *p 21.797 0.635 *d 0.000 0.635 Using partial waves for C as LCAO basis H-setup: name : Hydrogen id : 4766778ce56282eaa64abeb28b7c1de3 Z : 1 valence: 1 core : 0 charge : 0.0 file : /soft/apps/gpaw-setups-0.5.3574/H.LDA.gz cutoffs: 0.48(comp), 0.85(filt), 0.53(core), lmax=2 valence states: energy radius 1s(1) -6.353 0.476 *s 20.858 0.476 *p 0.000 0.476 Using partial waves for H as LCAO basis Au-setup: name : Gold id : a44207148b704df7bec07bf25e8feca8 Z : 79 valence: 11 core : 68 charge : 0.0 file : /soft/apps/gpaw-setups-0.5.3574/Au.LDA.gz cutoffs: 1.32(comp), 2.33(filt), 2.81(core), lmax=2 valence states: energy radius 6s(1) -6.048 1.323 6p(0) -0.888 1.323 5d(10) -7.129 1.323 *s 21.163 1.323 *p 26.323 1.323 *d 20.082 1.323 Using partial waves for Au as LCAO basis Using the LDA Exchange-Correlation Functional. Spin-Paired Calculation Total Charge: 0.000000 Fermi Temperature: 0.100000 Mode: fd Eigen Solver: (3 nearest neighbors central finite-difference stencil) Diagonalizer: ScaLapack - grid: [nprow, npcol, nb] = [5, 5, 64] Inverse Cholesky: Lapack Poisson Solver: Jacobi (Mehrstellen finite-difference stencil) Interpolation: 6th Order Reference Energy: -53635200.580024 Gamma Point Calculation Total number of cores used: 4096 Using Domain Decomposition: 4 x 8 x 16 Parallelization Over bands on 8 Processors 1 k-point in the Irreducible Part of the Brillouin Zone (total: 1) Linear Mixing Parameter: 0.1 Pulay Mixing with 5 Old Densities Damping of Long Wave Oscillations: 100 Convergence Criteria: Total Energy Change per Atom: 0.001 eV / atom Integral of Absolute Density Change: 0.0001 electrons Integral of Absolute Eigenstate Change: 1e-09 Number of Bands in Calculation: 1728 Bands to Converge: Occupied States Only Number of Valence Electrons: 3366 log10-error: Total Iterations: Time WFS Density Energy Fermi Poisson iter: 1 17:54:54 +0.3 -4415.56673 3 105 iter: 2 17:56:44 -0.4 -5514.20755 5 iter: 3 17:58:34 -0.7 -5682.62208 4 iter: 4 18:01:07 +0.0 -0.9 -3933.90570 7 101 iter: 5 18:03:37 +0.0 -0.9 -3593.83801 15 93 iter: 6 18:06:07 +0.3 -1.0 -3028.82668 24 95 iter: 7 18:08:38 +0.4 -1.1 -2921.02831 26 94 iter: 8 18:11:05 +0.5 -1.1 -2940.70606 9 87 iter: 9 18:13:30 +0.4 -1.2 -3094.01547 12 80 iter: 10 18:15:54 +0.4 -1.3 -3432.21825 6 78 iter: 11 18:18:17 +0.3 -1.3 -3637.38850 3 75 iter: 12 18:20:38 +0.2 -1.3 -4192.08640 7 70 iter: 13 18:22:57 +0.2 -1.2 -4307.12409 5 64 iter: 14 18:25:14 +0.1 -1.2 -4761.82590 3 60 iter: 15 18:27:39 +0.1 -1.2 -4388.02667 7 82 iter: 16 18:30:04 -0.1 -1.1 -4787.18240 7 79 iter: 17 18:32:30 -0.1 -1.1 -4644.09637 8 82 iter: 18 18:34:58 -0.0 -1.1 -5683.63381 15 90 iter: 19 18:37:23 -0.0 -1.1 -5192.79717 4 79 iter: 20 18:39:53 -0.2 -1.0 -4643.90825 17 95 Memory usage: 1005.61 MB ============================================================ Timing: incl. excl. ============================================================ Initialization: 213.561 25.624 0.8% | Hamiltonian: 44.217 0.001 0.0% | Atomic: 0.000 0.000 0.0% | Communicate energies: 2.396 2.396 0.1% | Hartree integrate/restrict: 0.038 0.038 0.0% | Initialize Hamiltonian: 0.037 0.037 0.0% | Poisson: 40.831 40.831 1.3% || XC 3D grid: 0.907 0.907 0.0% | vbar: 0.006 0.006 0.0% | LCAO initialization: 143.720 8.160 0.3% | LCAO eigensolver: 19.933 0.013 0.0% | Atomic Hamiltonian: 0.000 0.000 0.0% | Blacs Orbital Layouts: 13.038 0.001 0.0% | General diagonalize: 12.918 12.918 0.4% | Redistribute coefs: 0.109 0.109 0.0% | Send coefs to domains: 0.010 0.010 0.0% | Calculate projections: 0.000 0.000 0.0% | Distribute overlap matrix: 6.873 1.005 0.0% | Distribute overlap matrix: 5.869 5.869 0.2% | Potential matrix: 0.008 0.008 0.0% | LCAO to grid: 0.038 0.038 0.0% | Set positions (LCAO WFS): 115.589 6.997 0.2% | Basic WFS set positions: 0.002 0.002 0.0% | Basis functions set positions: 0.134 0.134 0.0% | Distribute overlap matrix: 1.271 1.271 0.0% | TCI: Calculate S, T, P: 107.186 107.186 3.4% || SCF-cycle: 2911.908 5.082 0.2% | Density: 6.732 0.004 0.0% | Atomic density matrices: 0.002 0.002 0.0% | Mix: 1.219 1.219 0.0% | Multipole moments: 2.325 2.325 0.1% | Pseudo density: 3.183 3.183 0.1% | Hamiltonian: 643.258 0.010 0.0% | Atomic: 0.003 0.003 0.0% | Communicate energies: 43.163 43.163 1.4% || Hartree integrate/restrict: 0.688 0.688 0.0% | Poisson: 582.932 582.932 18.6% |------| XC 3D grid: 16.350 16.350 0.5% | vbar: 0.111 0.111 0.0% | Orthonormalize: 635.708 0.010 0.0% | Blacs Band Layouts: 7.387 0.002 0.0% | Inverse Cholesky: 7.385 7.385 0.2% | calc_matrix: 430.032 430.032 13.8% |-----| rotate_psi: 198.279 198.279 6.3% |--| RMM-DIIS: 1162.852 555.482 17.8% |------| precondition: 607.370 607.370 19.4% |-------| Subspace diag: 458.276 0.008 0.0% | Blacs Band Layouts: 73.776 0.004 0.0% | Diagonalize: 73.766 73.766 2.4% || Distribute results: 0.005 0.005 0.0% | calc_matrix: 196.672 196.672 6.3% |--| rotate_psi: 187.821 187.821 6.0% |-| Other: 0.301 0.301 0.0% | ============================================================ Total: 3125.770 100.0% ============================================================ date: Tue Apr 6 18:40:33 2010 gpaw-24.1.0/doc/devel/Au_cluster/Au_cluster.txt_band_8x8x8x4000066400000000000000000001270451454550013000236120ustar00rootroot00000000000000 ___ ___ ___ _ _ _ | | |_ | | | | | | | | | . | | | | |__ | _|___|_____| 0.7 |___|_| User: ???@ion-R46-1 Date: Wed Mar 31 10:30:34 2010 Arch: BGP Pid: 100 Dir: /gpaw/lib/python2.6/site-packages/gpaw ase: /gpaw/lib/python2.6/site-packages/ase version: 3.3.1 numpy: /gpaw/lib/python2.6/site-packages/numpy units: Angstrom and eV Extra parameters: {'blacs': 1} **NOTE**: please start using occupations=FermiDirac(width). Memory estimate --------------- Calculator 531.69 MiB Initial overhead 375.15 MiB Density 19.14 MiB Arrays 5.36 MiB Localized functions 6.17 MiB Mixer 2.06 MiB Interpolator 5.56 MiB Hamiltonian 24.61 MiB Arrays 3.50 MiB Restrictor 3.60 MiB XC 3D grid 1.65 MiB Poisson 15.40 MiB vbar 0.47 MiB Wavefunctions 112.79 MiB Arrays psit_nG 88.99 MiB Eigensolver 1.04 MiB Projectors 0.90 MiB Overlap op 21.50 MiB Kinetic operator 0.36 MiB Positions: 0 O 8.9496 24.5522 8.6269 1 C 9.8369 23.8824 7.8297 2 O 10.3147 24.4127 6.7962 3 C 10.2586 22.5069 8.1973 4 C 9.7956 21.9548 9.3535 5 C 10.1886 20.6691 9.7194 6 C 11.0905 21.8361 7.3423 7 C 11.5174 20.5522 7.6624 8 C 11.0266 19.9861 8.8472 9 S 11.5885 18.3508 9.2813 10 O 28.1333 14.7301 20.7426 11 C 27.4474 15.3375 21.6018 12 O 28.0125 16.3029 22.3892 13 C 25.9869 15.0849 21.6935 14 C 25.4359 14.0366 21.0104 15 C 24.0669 13.7800 21.1043 16 C 25.2389 15.8691 22.5314 17 C 23.8539 15.7056 22.5825 18 C 23.2969 14.6329 21.8936 19 S 21.5139 14.4397 21.9176 20 O 18.3160 17.3324 27.9612 21 C 18.7254 17.4155 26.7768 22 O 19.2643 18.5780 26.2976 23 C 18.5259 16.2810 25.8396 24 C 18.0389 15.0989 26.3016 25 C 17.8379 14.0267 25.4416 26 C 18.8919 16.4439 24.5216 27 C 18.7110 15.3967 23.6194 28 C 18.1409 14.2187 24.0835 29 S 17.8458 12.8725 22.9494 30 O 10.9413 12.3160 27.5998 31 C 10.6709 13.4464 26.8783 32 O 9.5007 13.8962 26.8030 33 C 11.7619 14.1215 26.1305 34 C 13.0471 13.6858 26.2984 35 C 14.0929 14.3170 25.6295 36 C 11.4548 15.1530 25.2865 37 C 12.4740 15.8039 24.5866 38 C 13.7730 15.3246 24.7206 39 S 15.1059 16.1757 23.8766 40 O 14.3510 23.2593 27.5755 41 C 14.4611 22.0092 27.5230 42 O 13.8989 21.1920 28.4650 43 C 15.2230 21.3887 26.4095 44 C 15.6558 22.1753 25.3855 45 C 16.3278 21.5993 24.3086 46 C 15.4270 20.0367 26.4385 47 C 16.1389 19.4259 25.4144 48 C 16.6179 20.2392 24.3836 49 S 17.5699 19.4916 23.0916 50 O 18.4673 28.9006 21.1980 51 C 18.0899 28.0627 20.1847 52 O 17.4109 28.4998 19.2227 53 C 18.5108 26.6390 20.2204 54 C 19.2527 26.2080 21.2866 55 C 19.6400 24.8798 21.3773 56 C 18.1617 25.8042 19.1985 57 C 18.5459 24.4610 19.2353 58 C 19.2469 24.0188 20.3483 59 S 19.8728 22.3550 20.3833 60 O 4.2414 20.7900 15.2209 61 C 4.6011 20.6819 13.9054 62 O 4.1339 21.4606 13.0378 63 C 5.5798 19.6412 13.5003 64 C 6.0428 18.7621 14.4404 65 C 6.9748 17.7889 14.0863 66 C 5.9855 19.5958 12.1955 67 C 6.8846 18.6161 11.7853 68 C 7.3897 17.7487 12.7504 69 S 8.5597 16.4991 12.2516 70 O 17.6672 7.0448 24.8204 71 C 18.0098 6.6418 23.6811 72 O 17.6399 5.4019 23.2371 73 C 18.7169 7.5728 22.7654 74 C 19.1450 8.7805 23.2313 75 C 19.8129 9.6676 22.3906 76 C 18.9917 7.1516 21.4895 77 C 19.6580 8.0009 20.6163 78 C 20.0359 9.2610 21.0754 79 S 20.6879 10.4385 19.9024 80 O 10.5943 6.6875 21.5703 81 C 10.5284 7.5054 22.6648 82 O 9.8824 7.1683 23.6878 83 C 11.2079 8.8258 22.6575 84 C 11.9027 9.2075 21.5445 85 C 12.5768 10.4257 21.5313 86 C 11.1279 9.6118 23.7727 87 C 11.7769 10.8439 23.8034 88 C 12.4750 11.2337 22.6565 89 S 13.1910 12.8649 22.6104 90 O 7.3001 19.2989 25.0933 91 C 7.6688 20.1334 24.2300 92 O 6.7696 20.9836 23.6470 93 C 9.0959 20.1630 23.8205 94 C 9.9900 19.3858 24.5033 95 C 11.3368 19.3997 24.1564 96 C 9.4760 20.9769 22.7894 97 C 10.8109 21.0240 22.4074 98 C 11.7049 20.1770 23.0545 99 S 13.4190 20.2217 22.5753 100 O 3.5939 10.5821 11.6508 101 C 4.8082 10.0108 11.3849 102 O 4.9004 9.0031 10.6409 103 C 6.0326 10.5560 12.0243 104 C 5.9166 11.5956 12.9004 105 C 7.0536 12.1616 13.4675 106 C 7.2455 10.0336 11.6735 107 C 8.4167 10.5560 12.2273 108 C 8.2845 11.5769 13.1655 109 S 9.7177 12.0755 14.0945 110 O 9.4876 26.7892 11.9048 111 C 10.3502 26.1941 12.5972 112 O 11.4520 26.8664 13.0505 113 C 10.1597 24.7483 12.8776 114 C 9.0376 24.1422 12.3773 115 C 8.8067 22.7931 12.6103 116 C 11.0588 24.0683 13.6404 117 C 10.8766 22.7052 13.8934 118 C 9.7467 22.0909 13.3674 119 S 9.4997 20.3381 13.5932 120 O 12.9263 6.1540 24.8934 121 C 13.2034 5.9605 23.6837 122 O 12.7786 4.8261 23.0479 123 C 14.0638 6.9456 22.9805 124 C 14.4549 8.0696 23.6526 125 C 15.2591 9.0138 23.0334 126 C 14.4588 6.6984 21.6964 127 C 15.2299 7.6414 21.0186 128 C 15.6149 8.7857 21.7003 129 S 16.7168 9.9509 20.9246 130 O 8.2575 11.7304 24.1396 131 C 7.8999 12.9261 24.2804 132 O 6.9545 13.2660 25.2089 133 C 8.5690 14.0017 23.5054 134 C 9.4860 13.6805 22.5485 135 C 10.0958 14.6911 21.8004 136 C 8.1741 15.2967 23.7233 137 C 8.7491 16.3339 23.0073 138 C 9.7320 16.0046 22.0633 139 S 10.7009 17.3328 21.3884 140 O 15.2152 26.4013 25.6714 141 C 14.1630 25.8819 24.9681 142 O 12.9970 25.9802 25.4243 143 C 14.4008 25.1684 23.6876 144 C 15.6719 25.1172 23.1756 145 C 15.9259 24.4249 21.9976 146 C 13.3518 24.5820 23.0466 147 C 13.5557 23.8798 21.8565 148 C 14.8579 23.7791 21.3716 149 S 15.1229 23.0473 19.7573 150 O 18.7761 27.9232 9.8809 151 C 19.7890 27.2717 10.5298 152 O 20.9438 27.2492 10.0363 153 C 19.5156 26.5413 11.7935 154 C 18.2447 26.5401 12.3023 155 C 17.9716 25.9084 13.5153 156 C 20.5406 25.9002 12.4334 157 C 20.3108 25.2219 13.6325 158 C 19.0167 25.2370 14.1485 159 S 18.6496 24.2213 15.5673 160 O 7.2160 26.2855 16.0714 161 C 7.4678 26.0761 17.2839 162 O 6.9422 26.8922 18.2478 163 C 8.3278 24.9322 17.6804 164 C 8.8177 24.1090 16.7046 165 C 9.6417 23.0461 17.0483 166 C 8.6026 24.7350 19.0016 167 C 9.4497 23.6989 19.3936 168 C 9.9096 22.8379 18.3996 169 S 10.8866 21.4260 18.8655 170 O 5.3392 13.6178 22.3230 171 C 4.8488 13.6274 21.0460 172 O 3.6752 13.2502 20.8054 173 C 5.7168 14.1110 19.9424 174 C 6.9978 14.4828 20.2343 175 C 7.8500 14.9150 19.2203 176 C 5.2128 14.1820 18.6726 177 C 6.0397 14.5741 17.6164 178 C 7.3279 14.9930 17.9283 179 S 8.3638 15.6346 16.6285 180 O 27.0647 8.3764 16.5869 181 C 27.2060 9.4855 15.7986 182 O 28.3205 10.0553 15.6947 183 C 26.0468 10.0336 15.0496 184 C 24.7997 9.5106 15.2514 185 C 23.7046 10.0266 14.5665 186 C 26.2498 11.0935 14.2074 187 C 25.1976 11.6025 13.4553 188 C 23.9248 11.0726 13.6733 189 S 22.5658 11.6968 12.7064 190 O 13.3181 2.8098 16.1343 191 C 12.4360 3.5734 16.5994 192 O 11.1434 3.1507 16.7482 193 C 12.7987 4.9648 16.9704 194 C 14.1199 5.3103 17.0104 195 C 14.4837 6.6094 17.3456 196 C 11.8017 5.8537 17.2584 197 C 12.1199 7.1568 17.6253 198 C 13.4666 7.5128 17.6396 199 S 13.9109 9.1790 18.0795 200 O 5.2639 23.7908 21.2952 201 C 5.2738 23.5430 19.9499 202 O 4.6021 24.2569 19.1645 203 C 6.0539 22.3981 19.4154 204 C 6.7188 21.5830 20.2815 205 C 7.4877 20.5289 19.7995 206 C 6.1129 22.2183 18.0623 207 C 6.8226 21.1432 17.5335 208 C 7.4738 20.2962 18.4246 209 S 8.3658 18.8948 17.7815 210 O 24.3222 7.2969 17.8622 211 C 23.3342 6.5429 18.0436 212 O 23.4817 5.3540 18.7040 213 C 21.9697 7.0026 17.6804 214 C 21.7846 8.2848 17.2466 215 C 20.5067 8.7205 16.8943 216 C 20.9389 6.1056 17.7343 217 C 19.6438 6.5023 17.4195 218 C 19.4508 7.8165 16.9986 219 S 17.7839 8.3878 16.7303 220 Au 17.6679 16.3048 19.8974 221 Au 15.9037 16.3368 14.2653 222 Au 15.3538 18.7650 12.8583 223 Au 13.1948 16.9738 13.5075 224 Au 14.1376 14.2966 13.2156 225 Au 16.8736 14.4246 12.3913 226 Au 15.0618 17.2019 19.1703 227 Au 16.3367 18.7749 15.6445 228 Au 14.1457 17.0797 16.3223 229 Au 14.9939 14.3577 16.0504 230 Au 13.2335 18.4492 11.0553 231 Au 20.3660 15.4729 19.9614 232 Au 14.6737 13.6538 10.6055 233 Au 15.4308 15.5887 21.4806 234 Au 16.1827 18.5027 10.1124 235 Au 13.6247 19.5702 14.9385 236 Au 12.2707 15.1058 15.3783 237 Au 16.7790 12.2390 17.1022 238 Au 13.1619 15.1739 18.2063 239 Au 14.5736 19.5981 17.7536 240 Au 18.4838 21.0548 21.6925 241 Au 10.1377 17.5358 10.9213 242 Au 11.6854 12.4787 10.4076 243 Au 15.5208 12.8027 22.6804 244 Au 14.2499 18.1362 23.0666 245 Au 16.7868 21.2398 16.9654 246 Au 11.5068 17.8517 15.6834 247 Au 13.1856 12.4327 15.1693 248 Au 14.0196 12.4560 17.8115 249 Au 12.3857 17.9070 18.3346 250 Au 13.6857 20.9460 12.5803 251 Au 11.4745 19.1059 13.1684 252 Au 10.4577 16.2216 13.6725 253 Au 11.4607 13.5020 13.1473 254 Au 13.7895 11.6054 12.5345 255 Au 15.8809 11.8475 19.8045 256 Au 13.5916 13.5968 20.3815 257 Au 12.8747 16.4980 20.7085 258 Au 13.7079 19.4149 20.3383 259 Au 16.4667 20.9303 11.5563 260 Au 18.6058 21.9350 14.9034 261 Au 11.8686 20.3061 16.9454 262 Au 10.5906 15.9651 17.5285 263 Au 11.3558 13.2448 17.1916 264 Au 14.8927 10.3780 16.0375 265 Au 16.3367 23.9560 15.6445 266 Au 9.2728 19.6522 15.7713 267 Au 12.0218 10.4478 18.1813 268 Au 19.6025 22.1438 12.1415 269 Au 9.1728 13.9057 15.3976 270 Au 15.4576 9.2546 12.5253 271 Au 9.7318 18.0140 19.4526 272 O 23.6510 24.4973 22.7280 273 C 22.8363 23.8824 23.4600 274 O 22.3158 24.4600 24.5856 275 C 22.4146 22.5069 23.0924 276 C 22.8776 21.9548 21.9361 277 C 22.4845 20.6691 21.5703 278 C 21.5827 21.8361 23.9474 279 C 21.1558 20.5522 23.6272 280 C 21.6466 19.9861 22.4424 281 S 21.0847 18.3508 22.0083 282 O 4.4787 14.6760 10.6237 283 C 5.2258 15.3375 9.6879 284 O 4.7069 16.2239 8.9649 285 C 6.6863 15.0849 9.5961 286 C 7.2373 14.0366 10.2793 287 C 8.6063 13.7800 10.1853 288 C 7.4342 15.8691 8.7583 289 C 8.8193 15.7056 8.7072 290 C 9.3762 14.6329 9.3960 291 S 11.1593 14.4397 9.3721 292 O 14.3572 17.3324 3.3284 293 C 13.9477 17.4155 4.5129 294 O 13.4089 18.5780 4.9921 295 C 14.1472 16.2810 5.4500 296 C 14.6342 15.0989 4.9880 297 C 14.8353 14.0267 5.8481 298 C 13.7812 16.4439 6.7681 299 C 13.9622 15.3967 7.6703 300 C 14.5323 14.2187 7.2062 301 S 14.8273 12.8725 8.3402 302 O 21.7319 12.3160 3.6899 303 C 22.0022 13.4464 4.4113 304 O 23.1725 13.8962 4.4867 305 C 20.9113 14.1215 5.1592 306 C 19.6261 13.6858 4.9913 307 C 18.5803 14.3170 5.6601 308 C 21.2184 15.1530 6.0031 309 C 20.1992 15.8039 6.7031 310 C 18.9002 15.3246 6.5691 311 S 17.5673 16.1757 7.4130 312 O 18.3320 23.3708 3.7094 313 C 18.2121 22.0092 3.7667 314 O 18.7283 21.2589 2.9018 315 C 17.4502 21.3887 4.8801 316 C 17.0173 22.1753 5.9042 317 C 16.3454 21.5993 6.9811 318 C 17.2462 20.0367 4.8512 319 C 16.5343 19.4259 5.8752 320 C 16.0553 20.2392 6.9060 321 S 15.1033 19.4916 8.1980 322 O 14.2368 28.8320 10.1746 323 C 14.5833 28.0627 11.1050 324 O 15.3228 28.5388 12.1528 325 C 14.1624 26.6390 11.0693 326 C 13.4204 26.2080 10.0031 327 C 13.0331 24.8798 9.9123 328 C 14.5115 25.8042 12.0911 329 C 14.1273 24.4610 12.0543 330 C 13.4262 24.0188 10.9414 331 S 12.8003 22.3550 10.9063 332 O 28.4318 20.7900 16.0688 333 C 28.0721 20.6819 17.3842 334 O 28.5393 21.4606 18.2519 335 C 27.0933 19.6412 17.7893 336 C 26.6304 18.7621 16.8493 337 C 25.6984 17.7889 17.2034 338 C 26.6876 19.5958 19.0942 339 C 25.7886 18.6161 19.5044 340 C 25.2835 17.7487 18.5393 341 S 24.1135 16.4991 19.0381 342 O 15.0059 7.0448 6.4693 343 C 14.6634 6.6418 7.6085 344 O 15.0332 5.4019 8.0526 345 C 13.9562 7.5728 8.5242 346 C 13.5282 8.7805 8.0583 347 C 12.8603 9.6676 8.8990 348 C 13.6815 7.1516 9.8001 349 C 13.0152 8.0009 10.6734 350 C 12.6373 9.2610 10.2143 351 S 11.9853 10.4385 11.3873 352 O 22.0789 6.6875 9.7194 353 C 22.1447 7.5054 8.6249 354 O 22.7908 7.1683 7.6019 355 C 21.4652 8.8258 8.6321 356 C 20.7704 9.2075 9.7451 357 C 20.0964 10.4257 9.7583 358 C 21.5453 9.6118 7.5170 359 C 20.8962 10.8439 7.4863 360 C 20.1982 11.2337 8.6332 361 S 19.4821 12.8649 8.6793 362 O 25.4059 19.2245 6.1194 363 C 25.0044 20.1334 7.0597 364 O 25.8300 20.9140 7.5950 365 C 23.5773 20.1630 7.4691 366 C 22.6832 19.3858 6.7863 367 C 21.3364 19.3997 7.1333 368 C 23.1972 20.9769 8.5003 369 C 21.8623 21.0240 8.8822 370 C 20.9683 20.1770 8.2352 371 S 19.2542 20.2217 8.7143 372 O 28.9798 10.5353 19.6607 373 C 27.8650 10.0108 19.9048 374 O 27.7646 8.9132 20.7151 375 C 26.6406 10.5560 19.2653 376 C 26.7565 11.5956 18.3892 377 C 25.6196 12.1616 17.8222 378 C 25.4276 10.0336 19.6162 379 C 24.2565 10.5560 19.0624 380 C 24.3887 11.5769 18.1241 381 S 22.9555 12.0755 17.1951 382 O 23.1855 26.7892 19.3849 383 C 22.3230 26.1941 18.6924 384 O 21.2211 26.8664 18.2392 385 C 22.5135 24.7483 18.4121 386 C 23.6355 24.1422 18.9123 387 C 23.8665 22.7931 18.6794 388 C 21.6144 24.0683 17.6493 389 C 21.7966 22.7052 17.3963 390 C 22.9265 22.0909 17.9222 391 S 23.1735 20.3381 17.6964 392 O 19.7716 6.1713 6.2884 393 C 19.4698 5.9605 7.6060 394 O 19.8598 4.9190 8.1897 395 C 18.6094 6.9456 8.3091 396 C 18.2183 8.0696 7.6371 397 C 17.4141 9.0138 8.2563 398 C 18.2144 6.6984 9.5933 399 C 17.4433 7.6414 10.2711 400 C 17.0583 8.7857 9.5893 401 S 15.9564 9.9509 10.3650 402 O 24.3838 11.6238 7.1626 403 C 24.7733 12.9261 7.0093 404 O 25.6413 13.2382 6.1568 405 C 24.1041 14.0017 7.7843 406 C 23.1872 13.6805 8.7411 407 C 22.5774 14.6911 9.4893 408 C 24.4991 15.2967 7.5663 409 C 23.9241 16.3339 8.2823 410 C 22.9412 16.0046 9.2263 411 S 21.9723 17.3328 9.9013 412 O 17.5441 26.3587 5.6759 413 C 18.5102 25.8819 6.3216 414 O 19.7802 25.9890 5.8246 415 C 18.2724 25.1684 7.6020 416 C 17.0013 25.1172 8.1141 417 C 16.7473 24.4249 9.2921 418 C 19.3214 24.5820 8.2430 419 C 19.1174 23.8798 9.4332 420 C 17.8153 23.7791 9.9181 421 S 17.5503 23.0473 11.5323 422 O 13.8141 27.8698 21.3557 423 C 12.8842 27.2717 20.7599 424 O 11.6264 27.2472 21.2973 425 C 13.1576 26.5413 19.4961 426 C 14.4284 26.5401 18.9874 427 C 14.7016 25.9084 17.7743 428 C 12.1325 25.9002 18.8562 429 C 12.3624 25.2219 17.6571 430 C 13.6564 25.2370 17.1412 431 S 14.0235 24.2213 15.7224 432 O 25.4796 26.3041 15.3264 433 C 25.2054 26.0761 14.0057 434 O 25.6880 26.8254 13.1207 435 C 24.3454 24.9322 13.6093 436 C 23.8555 24.1090 14.5851 437 C 23.0314 23.0461 14.2414 438 C 24.0705 24.7350 12.2880 439 C 23.2235 23.6989 11.8961 440 C 22.7636 22.8379 12.8901 441 S 21.7866 21.4260 12.4241 442 O 27.3340 13.6178 8.9666 443 C 27.8244 13.6274 10.2437 444 O 28.9980 13.2502 10.4843 445 C 26.9564 14.1110 11.3472 446 C 25.6753 14.4828 11.0553 447 C 24.8232 14.9150 12.0693 448 C 27.4604 14.1820 12.6171 449 C 26.6335 14.5741 13.6733 450 C 25.3452 14.9930 13.3613 451 S 24.3094 15.6346 14.6612 452 O 5.6085 8.3764 14.7028 453 C 5.4671 9.4855 15.4911 454 O 4.3526 10.0553 15.5950 455 C 6.6264 10.0336 16.2401 456 C 7.8734 9.5106 16.0382 457 C 8.9686 10.0266 16.7232 458 C 6.4234 11.0935 17.0822 459 C 7.4756 11.6025 17.8344 460 C 8.7484 11.0726 17.6164 461 S 10.1074 11.6968 18.5833 462 O 19.3551 2.8098 15.1553 463 C 20.2372 3.5734 14.6903 464 O 21.5297 3.1507 14.5414 465 C 19.8744 4.9648 14.3193 466 C 18.5533 5.3103 14.2792 467 C 18.1895 6.6094 13.9441 468 C 20.8715 5.8537 14.0313 469 C 20.5533 7.1568 13.6643 470 C 19.2066 7.5128 13.6500 471 S 18.7623 9.1790 13.2102 472 O 27.4085 23.7706 10.1046 473 C 27.3994 23.5430 11.3397 474 O 28.1309 24.3205 12.1952 475 C 26.6193 22.3981 11.8743 476 C 25.9544 21.5830 11.0082 477 C 25.1855 20.5289 11.4902 478 C 26.5603 22.2183 13.2274 479 C 25.8505 21.1432 13.7562 480 C 25.1994 20.2962 12.8650 481 S 24.3074 18.8948 13.5082 482 O 8.2629 7.3641 13.4437 483 C 9.3390 6.5429 13.2461 484 O 9.2035 5.4513 12.6398 485 C 10.7035 7.0026 13.6093 486 C 10.8886 8.2848 14.0431 487 C 12.1664 8.7205 14.3954 488 C 11.7343 6.1056 13.5554 489 C 13.0294 6.5023 13.8701 490 C 13.2223 7.8165 14.2910 491 S 14.8893 8.3878 14.5594 492 Au 15.0053 16.3048 11.3923 493 Au 16.7695 16.3368 17.0244 494 Au 17.3194 18.7650 18.4314 495 Au 19.4784 16.9738 17.7822 496 Au 18.5356 14.2966 18.0741 497 Au 15.7996 14.4246 18.8984 498 Au 17.6114 17.2019 12.1194 499 Au 18.5275 17.0797 14.9674 500 Au 17.6793 14.3577 15.2393 501 Au 19.4396 18.4492 20.2343 502 Au 12.3072 15.4729 11.3283 503 Au 17.9995 13.6538 20.6842 504 Au 17.2424 15.5887 9.8091 505 Au 16.4904 18.5027 21.1772 506 Au 19.0485 19.5702 16.3512 507 Au 20.4025 15.1058 15.9114 508 Au 15.8942 12.2390 14.1874 509 Au 19.5113 15.1739 13.0834 510 Au 18.0995 19.5981 13.5361 511 Au 14.1894 21.0548 9.5972 512 Au 22.5355 17.5358 20.3683 513 Au 20.9878 12.4787 20.8821 514 Au 17.1524 12.8027 8.6093 515 Au 18.4232 18.1362 8.2230 516 Au 15.8864 21.2398 14.3243 517 Au 21.1664 17.8517 15.6062 518 Au 19.4876 12.4327 16.1204 519 Au 18.6535 12.4560 13.4782 520 Au 20.2875 17.9070 12.9551 521 Au 18.9875 20.9460 18.7094 522 Au 21.1987 19.1059 18.1213 523 Au 22.2155 16.2216 17.6171 524 Au 21.2125 13.5020 18.1423 525 Au 18.8837 11.6054 18.7551 526 Au 16.7922 11.8475 11.4852 527 Au 19.0815 13.5968 10.9081 528 Au 19.7985 16.4980 10.5812 529 Au 18.9653 19.4149 10.9514 530 Au 16.2065 20.9303 19.7334 531 Au 14.0674 21.9350 16.3862 532 Au 20.8045 20.3061 14.3443 533 Au 22.0826 15.9651 13.7612 534 Au 21.3174 13.2448 14.0981 535 Au 17.7805 10.3780 15.2522 536 Au 23.4004 19.6522 15.5183 537 Au 20.6514 10.4478 13.1084 538 Au 13.0707 22.1438 19.1481 539 Au 23.5004 13.9057 15.8921 540 Au 17.2156 9.2546 18.7644 541 Au 22.9413 18.0140 11.8371 542 H 9.1139 22.5220 9.9959 543 H 9.8505 20.2153 10.6568 544 H 11.4184 22.3084 6.4104 545 H 12.2098 20.0047 7.0145 546 H 26.0700 13.3966 20.3880 547 H 23.6125 12.9360 20.5751 548 H 25.7264 16.6221 23.1593 549 H 23.2244 16.4008 23.1478 550 H 17.8022 14.9905 27.3652 551 H 17.4574 13.0677 25.8086 552 H 19.3232 17.3920 24.1839 553 H 19.0108 15.5005 22.5713 554 H 13.2545 12.8385 26.9604 555 H 15.1340 14.0308 25.8119 556 H 10.4137 15.4676 25.1598 557 H 12.2561 16.6678 23.9500 558 H 15.4768 23.2553 25.4080 559 H 16.6169 22.1942 23.4359 560 H 15.0308 19.4385 27.2655 561 H 16.3185 18.3457 25.4146 562 H 19.5417 26.9150 22.0712 563 H 20.2324 24.5182 22.2243 564 H 17.5818 26.1861 18.3518 565 H 18.3019 23.7801 18.4132 566 H 5.6807 18.8233 15.4720 567 H 7.3676 17.0824 14.8250 568 H 5.6043 20.3277 11.4758 569 H 7.1855 18.5300 10.7360 570 H 18.9622 9.0553 24.2754 571 H 20.1496 10.6462 22.7484 572 H 18.6876 6.1520 21.1616 573 H 19.8812 7.6879 19.5910 574 H 11.9294 8.5563 20.6646 575 H 13.1673 10.7363 20.6631 576 H 10.5539 9.2731 24.6415 577 H 11.7417 11.4829 24.6919 578 H 9.6446 18.7505 25.3256 579 H 12.0777 18.8259 24.7228 580 H 8.7322 21.5881 22.2676 581 H 11.1491 21.7069 21.6211 582 H 4.9262 11.9852 13.1577 583 H 6.9855 13.0336 14.1263 584 H 7.2994 9.2059 10.9586 585 H 9.4015 10.1772 11.9348 586 H 8.3193 24.7245 11.7909 587 H 7.9158 22.2956 12.2130 588 H 11.9268 24.5899 14.0569 589 H 11.6019 22.1395 14.4875 590 H 14.1309 8.2252 24.6869 591 H 15.6030 9.9052 23.5683 592 H 14.1712 5.7645 21.2021 593 H 15.5237 7.4827 19.9758 594 H 9.7442 12.6321 22.3667 595 H 10.8376 14.4541 21.0305 596 H 7.4001 15.5120 24.4674 597 H 8.4468 17.3733 23.1727 598 H 16.4904 25.6240 23.6973 599 H 16.9344 24.3870 21.5727 600 H 12.3436 24.6614 23.4664 601 H 12.7174 23.4223 21.3208 602 H 17.4382 27.0379 11.7539 603 H 16.9696 25.9386 13.9558 604 H 21.5473 25.9189 12.0031 605 H 21.1202 24.6960 14.1496 606 H 8.5608 24.2877 15.6553 607 H 10.0681 22.3915 16.2810 608 H 8.1572 25.3916 19.7563 609 H 9.7417 23.5673 20.4407 610 H 7.3564 14.4411 21.2681 611 H 8.8907 15.1836 19.4293 612 H 4.1635 13.9321 18.4841 613 H 5.6856 14.5522 16.5805 614 H 24.6627 8.6826 15.9548 615 H 22.6992 9.6226 14.7248 616 H 27.2453 11.5423 14.1260 617 H 25.3618 12.3934 12.7160 618 H 14.8895 4.5665 16.7793 619 H 15.5356 6.9119 17.3772 620 H 10.7540 5.5405 17.2003 621 H 11.3399 7.8771 17.8934 622 H 6.6476 21.7584 21.3600 623 H 8.0820 19.9047 20.4748 624 H 5.6022 22.9193 17.3939 625 H 6.8668 20.9704 16.4531 626 H 22.6368 8.9687 17.1756 627 H 20.3380 9.7447 16.5454 628 H 21.1345 5.0687 18.0271 629 H 18.8034 5.8050 17.4998 630 H 23.5593 22.5220 21.2938 631 H 22.8227 20.2153 20.6328 632 H 21.2548 22.3084 24.8793 633 H 20.4634 20.0047 24.2752 634 H 6.6032 13.3966 10.9017 635 H 9.0607 12.9360 10.7146 636 H 6.9468 16.6221 8.1303 637 H 9.4488 16.4008 8.1419 638 H 14.8710 14.9905 3.9245 639 H 15.2157 13.0677 5.4810 640 H 13.3499 17.3920 7.1057 641 H 13.6623 15.5005 8.7183 642 H 19.4187 12.8385 4.3293 643 H 17.5392 14.0308 5.4778 644 H 22.2595 15.4676 6.1299 645 H 20.4171 16.6678 7.3397 646 H 17.1964 23.2553 5.8817 647 H 16.0563 22.1942 7.8537 648 H 17.6424 19.4385 4.0241 649 H 16.3547 18.3457 5.8751 650 H 13.1315 26.9150 9.2184 651 H 12.4408 24.5182 9.0653 652 H 15.0914 26.1861 12.9379 653 H 14.3713 23.7801 12.8764 654 H 26.9925 18.8233 15.8177 655 H 25.3056 17.0824 16.4646 656 H 27.0688 20.3277 19.8139 657 H 25.4877 18.5300 20.5537 658 H 13.7110 9.0553 7.0142 659 H 12.5235 10.6462 8.5413 660 H 13.9856 6.1520 10.1281 661 H 12.7920 7.6879 11.6987 662 H 20.7438 8.5564 10.6251 663 H 19.5059 10.7363 10.6266 664 H 22.1192 9.2731 6.6482 665 H 20.9314 11.4829 6.5977 666 H 23.0286 18.7505 5.9640 667 H 20.5954 18.8259 6.5669 668 H 23.9410 21.5881 9.0221 669 H 21.5241 21.7069 9.6685 670 H 27.7470 11.9852 18.1319 671 H 25.6877 13.0336 17.1634 672 H 25.3738 9.2059 20.3311 673 H 23.2716 10.1772 19.3548 674 H 24.3538 24.7245 19.4988 675 H 24.7574 22.2956 19.0767 676 H 20.7464 24.5899 17.2327 677 H 21.0713 22.1395 16.8022 678 H 18.5423 8.2252 6.6027 679 H 17.0702 9.9052 7.7214 680 H 18.5020 5.7645 10.0875 681 H 17.1495 7.4827 11.3139 682 H 22.9290 12.6321 8.9230 683 H 21.8356 14.4541 10.2591 684 H 25.2731 15.5120 6.8223 685 H 24.2264 17.3733 8.1170 686 H 16.1828 25.6240 7.5924 687 H 15.7388 24.3870 9.7170 688 H 20.3296 24.6614 7.8233 689 H 19.9557 23.4223 9.9689 690 H 15.2350 27.0379 19.5357 691 H 15.7036 25.9386 17.3338 692 H 11.1258 25.9189 19.2866 693 H 11.5530 24.6960 17.1401 694 H 24.1124 24.2877 15.6344 695 H 22.6050 22.3915 15.0086 696 H 24.5160 25.3916 11.5334 697 H 22.9315 23.5673 10.8489 698 H 25.3168 14.4411 10.0215 699 H 23.7825 15.1836 11.8603 700 H 28.5097 13.9321 12.8056 701 H 26.9876 14.5522 14.7092 702 H 8.0105 8.6826 15.3349 703 H 9.9739 9.6226 16.5649 704 H 5.4279 11.5423 17.1637 705 H 7.3114 12.3934 18.5737 706 H 17.7836 4.5665 14.5104 707 H 17.1376 6.9119 13.9125 708 H 21.9191 5.5405 14.0893 709 H 21.3333 7.8771 13.3963 710 H 26.0255 21.7584 9.9296 711 H 24.5911 19.9047 10.8148 712 H 27.0710 22.9193 13.8958 713 H 25.8064 20.9704 14.8365 714 H 10.0364 8.9687 14.1141 715 H 12.3352 9.7447 14.7443 716 H 11.5386 5.0687 13.2626 717 H 13.8698 5.8050 13.7899 718 H 8.8058 25.4273 8.2171 719 H 28.9655 16.3170 22.1743 720 H 19.3030 19.1904 27.0579 721 H 10.1009 12.0294 28.0074 722 H 13.4508 21.7860 29.0982 723 H 18.0960 29.7795 20.9877 724 H 3.5997 21.5247 15.2763 725 H 17.1857 4.9556 23.9781 726 H 10.0873 5.8777 21.7743 727 H 5.8967 20.8056 24.0481 728 H 2.9266 10.0811 11.1426 729 H 11.3903 27.7973 12.7604 730 H 12.2353 4.3134 23.6774 731 H 6.6471 12.4283 25.6066 732 H 14.8513 26.8220 26.4746 733 H 19.1579 28.3311 9.0794 734 H 6.4138 27.5720 17.7861 735 H 4.6229 13.2717 22.8901 736 H 27.9338 8.1800 16.9876 737 H 11.1144 2.2205 16.4509 738 H 4.6977 24.5740 21.4389 739 H 24.4400 5.2420 18.8581 740 H 22.7136 25.3502 24.6474 741 H 3.5571 14.9853 10.5262 742 H 13.3701 19.1904 4.2318 743 H 22.5722 12.0294 3.2822 744 H 18.8617 23.5737 2.9139 745 H 15.4922 29.4869 11.9888 746 H 29.0734 21.5247 16.0133 747 H 15.4875 4.9556 7.3116 748 H 22.5859 5.8777 9.5154 749 H 26.3648 19.3652 5.9954 750 H 28.6687 8.7149 21.0278 751 H 21.2829 27.7973 18.5292 752 H 20.3330 5.4193 6.0168 753 H 24.9353 11.0806 6.5665 754 H 19.7214 26.4843 4.9846 755 H 11.6575 27.7770 22.1176 756 H 26.0549 27.0924 15.3738 757 H 28.0503 13.2717 8.3996 758 H 4.7394 8.1800 14.3020 759 H 21.5588 2.2205 14.8388 760 H 28.5657 25.0069 11.6527 761 H 7.4695 6.8861 13.1328 .------------------------------------------------------------------------------. /| | / | | / | | / | | / | | / | | / | | / | | / | | / | H H | / | O O O | / | OC C | / | C H H | / | H C C OH H O | / | H C C O HOHH C | / | O H O C CHC C O | / | H O H C C C H CC H CHCH CHCC C | / | HO O O CC HC H C CH OC | / | H H HH CHCC C H C HCC SC HuC C C C H O | * | HO OC HHO C CSu HC S CO C H H | | | CCC C C S C C S HH C C | | | O CCH CCHS C Au C AuuH H H CC CC H OH H | | H O HC H C C SSAuH AuAu S AuS Cu C C CC H O | | | H CCHC AHCC H H O H C C O | | O C C O H CAC AuS CuuH Au AuH C S C CH O C | | | C OHHSC C C Au H C COHH Au C H H HC CO O H | | | CO C H Cu C AuC HOuu C H AuuC Au HC CCC | | H| C HC OAu CCH Au HCAuCCu S HAu C CHHAu HOCHHO H | | | C H HO S CCC AuC Au H COCu AHuC CC CC CHC C OH | | | HO CC H H Au H AH AuAuC H Auu AuAuSH C O CH C | | |H C CC Su AuAu Au S Au SHS C C | | | CCC AuH Au Au S Au AuAu C Hu Au CH C O | | | C C H C HC Au Au H S CAuC CC OC HO Au CHHC H | | O C| C H SSC C C HAu AuuHu C HC AuuAuC HC COC O | | H H H CAHCC O Au AuuAu H CCCHC CCHH C H | | H O| C C H C HAS H Auu SCS H C C | | H O H O CC C CH S SuC Au H C O | | O |CC OCCCHHCC OC HAuuH Cu C CCAH C C HH C C O | | H C .--HC--CH---CCH-SAH----HH-----O-C--O-------H----CC-----------------------------. | O/ H O H H C S HS HS OH HC HC OOHH / | / C S H Au CCH CCH C H / | / C C C H O C H C / | / H CC HH C CO O O / | / C H H C HC H / | / H H C HO C O / | / O HO H / | / H O O H / | / H / | / / | / / | / / | / / | / / | / / | / / | / / | / / |/ / *------------------------------------------------------------------------------* Unit Cell: Periodic X Y Z Points Spacing -------------------------------------------------------------------- 1. axis: yes 32.000000 0.000000 0.000000 240 0.1333 2. axis: yes 0.000000 32.000000 0.000000 240 0.1333 3. axis: yes 0.000000 0.000000 32.000000 240 0.1333 Grid-points per volume: 421.87 Effective grid-spacing: 0.1333 O-setup: name : Oxygen id : 5f3f27ba17355653aa2069308cb75aea Z : 8 valence: 6 core : 2 charge : 0.0 file : /soft/apps/gpaw-setups-0.5.3574/O.LDA.gz cutoffs: 0.74(comp), 1.30(filt), 0.83(core), lmax=2 valence states: energy radius 2s(2) -23.752 0.741 2p(4) -9.195 0.741 *s 3.459 0.741 *p 18.016 0.741 *d 0.000 0.741 Using partial waves for O as LCAO basis S-setup: name : Sulfur id : 16df0b8f883bfd770ab5c435bc804428 Z : 16 valence: 6 core : 10 charge : 0.0 file : /soft/apps/gpaw-setups-0.5.3574/S.LDA.gz cutoffs: 0.85(comp), 1.49(filt), 1.66(core), lmax=2 valence states: energy radius 3s(2) -17.278 0.847 3p(4) -7.106 0.847 *s 9.933 0.847 *p 20.105 0.847 *d 0.000 0.847 Using partial waves for S as LCAO basis C-setup: name : Carbon id : d60576a1f549371a163e72552ca58787 Z : 6 valence: 4 core : 2 charge : 0.0 file : /soft/apps/gpaw-setups-0.5.3574/C.LDA.gz cutoffs: 0.64(comp), 1.14(filt), 1.14(core), lmax=2 valence states: energy radius 2s(2) -13.639 0.635 2p(2) -5.414 0.635 *s 13.573 0.635 *p 21.797 0.635 *d 0.000 0.635 Using partial waves for C as LCAO basis H-setup: name : Hydrogen id : 4766778ce56282eaa64abeb28b7c1de3 Z : 1 valence: 1 core : 0 charge : 0.0 file : /soft/apps/gpaw-setups-0.5.3574/H.LDA.gz cutoffs: 0.48(comp), 0.85(filt), 0.53(core), lmax=2 valence states: energy radius 1s(1) -6.353 0.476 *s 20.858 0.476 *p 0.000 0.476 Using partial waves for H as LCAO basis Au-setup: name : Gold id : a44207148b704df7bec07bf25e8feca8 Z : 79 valence: 11 core : 68 charge : 0.0 file : /soft/apps/gpaw-setups-0.5.3574/Au.LDA.gz cutoffs: 1.32(comp), 2.33(filt), 2.81(core), lmax=2 valence states: energy radius 6s(1) -6.048 1.323 6p(0) -0.888 1.323 5d(10) -7.129 1.323 *s 21.163 1.323 *p 26.323 1.323 *d 20.082 1.323 Using partial waves for Au as LCAO basis Using the LDA Exchange-Correlation Functional. Spin-Paired Calculation Total Charge: 0.000000 Fermi Temperature: 0.100000 Mode: fd Eigen Solver: (3 nearest neighbors central finite-difference stencil) Diagonalizer: ScaLapack - grid: [nprow, npcol, nb] = [5, 5, 64] Inverse Cholesky: Lapack Poisson Solver: Jacobi (Mehrstellen finite-difference stencil) Interpolation: 6th Order Reference Energy: -53635200.580024 Gamma Point Calculation Total number of cores used: 2048 Using Domain Decomposition: 8 x 8 x 8 Parallelization Over bands on 4 Processors 1 k-point in the Irreducible Part of the Brillouin Zone (total: 1) Linear Mixing Parameter: 0.1 Pulay Mixing with 5 Old Densities Damping of Long Wave Oscillations: 100 Convergence Criteria: Total Energy Change per Atom: 0.001 eV / atom Integral of Absolute Density Change: 0.0001 electrons Integral of Absolute Eigenstate Change: 1e-09 Number of Bands in Calculation: 1728 Bands to Converge: Occupied States Only Number of Valence Electrons: 3366 log10-error: Total Iterations: Time WFS Density Energy Fermi Poisson iter: 1 10:39:58 +0.3 -4415.56673 3 105 iter: 2 10:43:36 -0.4 -5514.20755 5 iter: 3 10:47:14 -0.7 -5682.62208 4 iter: 4 10:51:34 +0.0 -0.9 -3933.90570 7 101 iter: 5 10:55:52 +0.0 -0.9 -3593.83801 15 93 iter: 6 11:00:10 +0.3 -1.0 -3028.82668 24 95 iter: 7 11:04:28 +0.4 -1.1 -2921.02831 26 94 iter: 8 11:08:43 +0.5 -1.1 -2940.70606 9 87 iter: 9 11:12:56 +0.4 -1.2 -3094.01547 12 80 iter: 10 11:17:08 +0.4 -1.3 -3432.21824 6 78 iter: 11 11:21:18 +0.3 -1.3 -3637.38845 3 75 iter: 12 11:25:27 +0.2 -1.3 -4192.08658 7 70 iter: 13 11:29:34 +0.2 -1.2 -4307.12554 5 64 iter: 14 11:33:39 +0.1 -1.2 -4761.83105 3 60 iter: 15 11:37:52 +0.1 -1.2 -4388.01403 7 82 iter: 16 11:42:04 -0.1 -1.1 -4787.14716 7 79 iter: 17 11:46:18 -0.1 -1.1 -4643.95591 8 82 iter: 18 11:50:34 -0.0 -1.1 -5683.99034 15 90 iter: 19 11:54:46 -0.0 -1.1 -5193.98368 4 79 iter: 20 11:59:04 -0.2 -1.0 -4644.35219 17 95 Memory usage: 1.28 GB ============================================================ Timing: incl. excl. ============================================================ Initialization: 217.994 25.471 0.5% | Hamiltonian: 41.844 0.001 0.0% | Atomic: 0.000 0.000 0.0% | Communicate energies: 2.661 2.661 0.0% | Hartree integrate/restrict: 0.030 0.030 0.0% | Initialize Hamiltonian: 0.036 0.036 0.0% | Poisson: 38.204 38.204 0.7% | XC 3D grid: 0.907 0.907 0.0% | vbar: 0.006 0.006 0.0% | LCAO initialization: 150.678 9.376 0.2% | LCAO eigensolver: 22.300 0.015 0.0% | Atomic Hamiltonian: 0.000 0.000 0.0% | Blacs Orbital Layouts: 12.767 0.001 0.0% | General diagonalize: 12.658 12.658 0.2% | Redistribute coefs: 0.088 0.088 0.0% | Send coefs to domains: 0.020 0.020 0.0% | Calculate projections: 0.000 0.000 0.0% | Distribute overlap matrix: 9.503 2.039 0.0% | Distribute overlap matrix: 7.464 7.464 0.1% | Potential matrix: 0.015 0.015 0.0% | LCAO to grid: 0.076 0.076 0.0% | Set positions (LCAO WFS): 118.926 8.603 0.2% | Basic WFS set positions: 0.002 0.002 0.0% | Basis functions set positions: 0.134 0.134 0.0% | Distribute overlap matrix: 2.873 2.873 0.1% | TCI: Calculate S, T, P: 107.314 107.314 2.0% || SCF-cycle: 5130.082 4.613 0.1% | Density: 10.364 0.004 0.0% | Atomic density matrices: 0.002 0.002 0.0% | Mix: 1.231 1.231 0.0% | Multipole moments: 2.997 2.997 0.1% | Pseudo density: 6.130 6.130 0.1% | Hamiltonian: 612.315 0.010 0.0% | Atomic: 0.003 0.003 0.0% | Communicate energies: 47.966 47.966 0.9% | Hartree integrate/restrict: 0.551 0.551 0.0% | Poisson: 547.331 547.331 10.2% |---| XC 3D grid: 16.342 16.342 0.3% | vbar: 0.111 0.111 0.0% | Orthonormalize: 1258.718 0.009 0.0% | Blacs Band Layouts: 7.651 0.002 0.0% | Inverse Cholesky: 7.649 7.649 0.1% | calc_matrix: 911.827 911.827 17.0% |------| rotate_psi: 339.232 339.232 6.3% |--| RMM-DIIS: 2424.579 1163.140 21.7% |--------| precondition: 1261.440 1261.440 23.6% |--------| Subspace diag: 819.492 0.008 0.0% | Blacs Band Layouts: 73.433 0.004 0.0% | Diagonalize: 73.417 73.417 1.4% || Distribute results: 0.012 0.012 0.0% | calc_matrix: 423.220 423.220 7.9% |--| rotate_psi: 322.831 322.831 6.0% |-| Other: 0.297 0.297 0.0% | ============================================================ Total: 5348.372 100.0% ============================================================ date: Wed Mar 31 11:59:42 2010 gpaw-24.1.0/doc/devel/Au_cluster/Au_cluster.txt_domain_4x8x16x8000066400000000000000000001270441454550013000242330ustar00rootroot00000000000000 ___ ___ ___ _ _ _ | | |_ | | | | | | | | | . | | | | |__ | _|___|_____| 0.7 |___|_| User: ???@ion-R42-1 Date: Tue Apr 6 17:48:41 2010 Arch: BGP Pid: 100 Dir: /gpaw/lib/python2.6/site-packages/gpaw ase: /gpaw/lib/python2.6/site-packages/ase version: 3.3.1 numpy: /gpaw/lib/python2.6/site-packages/numpy units: Angstrom and eV Extra parameters: {'blacs': 1} **NOTE**: please start using occupations=FermiDirac(width). Memory estimate --------------- Calculator 485.91 MiB Initial overhead 379.02 MiB Density 19.48 MiB Arrays 5.36 MiB Localized functions 6.17 MiB Mixer 2.06 MiB Interpolator 5.90 MiB Hamiltonian 24.84 MiB Arrays 3.50 MiB Restrictor 3.67 MiB XC 3D grid 1.65 MiB Poisson 15.55 MiB vbar 0.47 MiB Wavefunctions 62.57 MiB Arrays psit_nG 44.49 MiB Eigensolver 1.04 MiB Projectors 0.90 MiB Overlap op 15.75 MiB Kinetic operator 0.38 MiB Positions: 0 O 8.9496 24.5522 8.6269 1 C 9.8369 23.8824 7.8297 2 O 10.3147 24.4127 6.7962 3 C 10.2586 22.5069 8.1973 4 C 9.7956 21.9548 9.3535 5 C 10.1886 20.6691 9.7194 6 C 11.0905 21.8361 7.3423 7 C 11.5174 20.5522 7.6624 8 C 11.0266 19.9861 8.8472 9 S 11.5885 18.3508 9.2813 10 O 28.1333 14.7301 20.7426 11 C 27.4474 15.3375 21.6018 12 O 28.0125 16.3029 22.3892 13 C 25.9869 15.0849 21.6935 14 C 25.4359 14.0366 21.0104 15 C 24.0669 13.7800 21.1043 16 C 25.2389 15.8691 22.5314 17 C 23.8539 15.7056 22.5825 18 C 23.2969 14.6329 21.8936 19 S 21.5139 14.4397 21.9176 20 O 18.3160 17.3324 27.9612 21 C 18.7254 17.4155 26.7768 22 O 19.2643 18.5780 26.2976 23 C 18.5259 16.2810 25.8396 24 C 18.0389 15.0989 26.3016 25 C 17.8379 14.0267 25.4416 26 C 18.8919 16.4439 24.5216 27 C 18.7110 15.3967 23.6194 28 C 18.1409 14.2187 24.0835 29 S 17.8458 12.8725 22.9494 30 O 10.9413 12.3160 27.5998 31 C 10.6709 13.4464 26.8783 32 O 9.5007 13.8962 26.8030 33 C 11.7619 14.1215 26.1305 34 C 13.0471 13.6858 26.2984 35 C 14.0929 14.3170 25.6295 36 C 11.4548 15.1530 25.2865 37 C 12.4740 15.8039 24.5866 38 C 13.7730 15.3246 24.7206 39 S 15.1059 16.1757 23.8766 40 O 14.3510 23.2593 27.5755 41 C 14.4611 22.0092 27.5230 42 O 13.8989 21.1920 28.4650 43 C 15.2230 21.3887 26.4095 44 C 15.6558 22.1753 25.3855 45 C 16.3278 21.5993 24.3086 46 C 15.4270 20.0367 26.4385 47 C 16.1389 19.4259 25.4144 48 C 16.6179 20.2392 24.3836 49 S 17.5699 19.4916 23.0916 50 O 18.4673 28.9006 21.1980 51 C 18.0899 28.0627 20.1847 52 O 17.4109 28.4998 19.2227 53 C 18.5108 26.6390 20.2204 54 C 19.2527 26.2080 21.2866 55 C 19.6400 24.8798 21.3773 56 C 18.1617 25.8042 19.1985 57 C 18.5459 24.4610 19.2353 58 C 19.2469 24.0188 20.3483 59 S 19.8728 22.3550 20.3833 60 O 4.2414 20.7900 15.2209 61 C 4.6011 20.6819 13.9054 62 O 4.1339 21.4606 13.0378 63 C 5.5798 19.6412 13.5003 64 C 6.0428 18.7621 14.4404 65 C 6.9748 17.7889 14.0863 66 C 5.9855 19.5958 12.1955 67 C 6.8846 18.6161 11.7853 68 C 7.3897 17.7487 12.7504 69 S 8.5597 16.4991 12.2516 70 O 17.6672 7.0448 24.8204 71 C 18.0098 6.6418 23.6811 72 O 17.6399 5.4019 23.2371 73 C 18.7169 7.5728 22.7654 74 C 19.1450 8.7805 23.2313 75 C 19.8129 9.6676 22.3906 76 C 18.9917 7.1516 21.4895 77 C 19.6580 8.0009 20.6163 78 C 20.0359 9.2610 21.0754 79 S 20.6879 10.4385 19.9024 80 O 10.5943 6.6875 21.5703 81 C 10.5284 7.5054 22.6648 82 O 9.8824 7.1683 23.6878 83 C 11.2079 8.8258 22.6575 84 C 11.9027 9.2075 21.5445 85 C 12.5768 10.4257 21.5313 86 C 11.1279 9.6118 23.7727 87 C 11.7769 10.8439 23.8034 88 C 12.4750 11.2337 22.6565 89 S 13.1910 12.8649 22.6104 90 O 7.3001 19.2989 25.0933 91 C 7.6688 20.1334 24.2300 92 O 6.7696 20.9836 23.6470 93 C 9.0959 20.1630 23.8205 94 C 9.9900 19.3858 24.5033 95 C 11.3368 19.3997 24.1564 96 C 9.4760 20.9769 22.7894 97 C 10.8109 21.0240 22.4074 98 C 11.7049 20.1770 23.0545 99 S 13.4190 20.2217 22.5753 100 O 3.5939 10.5821 11.6508 101 C 4.8082 10.0108 11.3849 102 O 4.9004 9.0031 10.6409 103 C 6.0326 10.5560 12.0243 104 C 5.9166 11.5956 12.9004 105 C 7.0536 12.1616 13.4675 106 C 7.2455 10.0336 11.6735 107 C 8.4167 10.5560 12.2273 108 C 8.2845 11.5769 13.1655 109 S 9.7177 12.0755 14.0945 110 O 9.4876 26.7892 11.9048 111 C 10.3502 26.1941 12.5972 112 O 11.4520 26.8664 13.0505 113 C 10.1597 24.7483 12.8776 114 C 9.0376 24.1422 12.3773 115 C 8.8067 22.7931 12.6103 116 C 11.0588 24.0683 13.6404 117 C 10.8766 22.7052 13.8934 118 C 9.7467 22.0909 13.3674 119 S 9.4997 20.3381 13.5932 120 O 12.9263 6.1540 24.8934 121 C 13.2034 5.9605 23.6837 122 O 12.7786 4.8261 23.0479 123 C 14.0638 6.9456 22.9805 124 C 14.4549 8.0696 23.6526 125 C 15.2591 9.0138 23.0334 126 C 14.4588 6.6984 21.6964 127 C 15.2299 7.6414 21.0186 128 C 15.6149 8.7857 21.7003 129 S 16.7168 9.9509 20.9246 130 O 8.2575 11.7304 24.1396 131 C 7.8999 12.9261 24.2804 132 O 6.9545 13.2660 25.2089 133 C 8.5690 14.0017 23.5054 134 C 9.4860 13.6805 22.5485 135 C 10.0958 14.6911 21.8004 136 C 8.1741 15.2967 23.7233 137 C 8.7491 16.3339 23.0073 138 C 9.7320 16.0046 22.0633 139 S 10.7009 17.3328 21.3884 140 O 15.2152 26.4013 25.6714 141 C 14.1630 25.8819 24.9681 142 O 12.9970 25.9802 25.4243 143 C 14.4008 25.1684 23.6876 144 C 15.6719 25.1172 23.1756 145 C 15.9259 24.4249 21.9976 146 C 13.3518 24.5820 23.0466 147 C 13.5557 23.8798 21.8565 148 C 14.8579 23.7791 21.3716 149 S 15.1229 23.0473 19.7573 150 O 18.7761 27.9232 9.8809 151 C 19.7890 27.2717 10.5298 152 O 20.9438 27.2492 10.0363 153 C 19.5156 26.5413 11.7935 154 C 18.2447 26.5401 12.3023 155 C 17.9716 25.9084 13.5153 156 C 20.5406 25.9002 12.4334 157 C 20.3108 25.2219 13.6325 158 C 19.0167 25.2370 14.1485 159 S 18.6496 24.2213 15.5673 160 O 7.2160 26.2855 16.0714 161 C 7.4678 26.0761 17.2839 162 O 6.9422 26.8922 18.2478 163 C 8.3278 24.9322 17.6804 164 C 8.8177 24.1090 16.7046 165 C 9.6417 23.0461 17.0483 166 C 8.6026 24.7350 19.0016 167 C 9.4497 23.6989 19.3936 168 C 9.9096 22.8379 18.3996 169 S 10.8866 21.4260 18.8655 170 O 5.3392 13.6178 22.3230 171 C 4.8488 13.6274 21.0460 172 O 3.6752 13.2502 20.8054 173 C 5.7168 14.1110 19.9424 174 C 6.9978 14.4828 20.2343 175 C 7.8500 14.9150 19.2203 176 C 5.2128 14.1820 18.6726 177 C 6.0397 14.5741 17.6164 178 C 7.3279 14.9930 17.9283 179 S 8.3638 15.6346 16.6285 180 O 27.0647 8.3764 16.5869 181 C 27.2060 9.4855 15.7986 182 O 28.3205 10.0553 15.6947 183 C 26.0468 10.0336 15.0496 184 C 24.7997 9.5106 15.2514 185 C 23.7046 10.0266 14.5665 186 C 26.2498 11.0935 14.2074 187 C 25.1976 11.6025 13.4553 188 C 23.9248 11.0726 13.6733 189 S 22.5658 11.6968 12.7064 190 O 13.3181 2.8098 16.1343 191 C 12.4360 3.5734 16.5994 192 O 11.1434 3.1507 16.7482 193 C 12.7987 4.9648 16.9704 194 C 14.1199 5.3103 17.0104 195 C 14.4837 6.6094 17.3456 196 C 11.8017 5.8537 17.2584 197 C 12.1199 7.1568 17.6253 198 C 13.4666 7.5128 17.6396 199 S 13.9109 9.1790 18.0795 200 O 5.2639 23.7908 21.2952 201 C 5.2738 23.5430 19.9499 202 O 4.6021 24.2569 19.1645 203 C 6.0539 22.3981 19.4154 204 C 6.7188 21.5830 20.2815 205 C 7.4877 20.5289 19.7995 206 C 6.1129 22.2183 18.0623 207 C 6.8226 21.1432 17.5335 208 C 7.4738 20.2962 18.4246 209 S 8.3658 18.8948 17.7815 210 O 24.3222 7.2969 17.8622 211 C 23.3342 6.5429 18.0436 212 O 23.4817 5.3540 18.7040 213 C 21.9697 7.0026 17.6804 214 C 21.7846 8.2848 17.2466 215 C 20.5067 8.7205 16.8943 216 C 20.9389 6.1056 17.7343 217 C 19.6438 6.5023 17.4195 218 C 19.4508 7.8165 16.9986 219 S 17.7839 8.3878 16.7303 220 Au 17.6679 16.3048 19.8974 221 Au 15.9037 16.3368 14.2653 222 Au 15.3538 18.7650 12.8583 223 Au 13.1948 16.9738 13.5075 224 Au 14.1376 14.2966 13.2156 225 Au 16.8736 14.4246 12.3913 226 Au 15.0618 17.2019 19.1703 227 Au 16.3367 18.7749 15.6445 228 Au 14.1457 17.0797 16.3223 229 Au 14.9939 14.3577 16.0504 230 Au 13.2335 18.4492 11.0553 231 Au 20.3660 15.4729 19.9614 232 Au 14.6737 13.6538 10.6055 233 Au 15.4308 15.5887 21.4806 234 Au 16.1827 18.5027 10.1124 235 Au 13.6247 19.5702 14.9385 236 Au 12.2707 15.1058 15.3783 237 Au 16.7790 12.2390 17.1022 238 Au 13.1619 15.1739 18.2063 239 Au 14.5736 19.5981 17.7536 240 Au 18.4838 21.0548 21.6925 241 Au 10.1377 17.5358 10.9213 242 Au 11.6854 12.4787 10.4076 243 Au 15.5208 12.8027 22.6804 244 Au 14.2499 18.1362 23.0666 245 Au 16.7868 21.2398 16.9654 246 Au 11.5068 17.8517 15.6834 247 Au 13.1856 12.4327 15.1693 248 Au 14.0196 12.4560 17.8115 249 Au 12.3857 17.9070 18.3346 250 Au 13.6857 20.9460 12.5803 251 Au 11.4745 19.1059 13.1684 252 Au 10.4577 16.2216 13.6725 253 Au 11.4607 13.5020 13.1473 254 Au 13.7895 11.6054 12.5345 255 Au 15.8809 11.8475 19.8045 256 Au 13.5916 13.5968 20.3815 257 Au 12.8747 16.4980 20.7085 258 Au 13.7079 19.4149 20.3383 259 Au 16.4667 20.9303 11.5563 260 Au 18.6058 21.9350 14.9034 261 Au 11.8686 20.3061 16.9454 262 Au 10.5906 15.9651 17.5285 263 Au 11.3558 13.2448 17.1916 264 Au 14.8927 10.3780 16.0375 265 Au 16.3367 23.9560 15.6445 266 Au 9.2728 19.6522 15.7713 267 Au 12.0218 10.4478 18.1813 268 Au 19.6025 22.1438 12.1415 269 Au 9.1728 13.9057 15.3976 270 Au 15.4576 9.2546 12.5253 271 Au 9.7318 18.0140 19.4526 272 O 23.6510 24.4973 22.7280 273 C 22.8363 23.8824 23.4600 274 O 22.3158 24.4600 24.5856 275 C 22.4146 22.5069 23.0924 276 C 22.8776 21.9548 21.9361 277 C 22.4845 20.6691 21.5703 278 C 21.5827 21.8361 23.9474 279 C 21.1558 20.5522 23.6272 280 C 21.6466 19.9861 22.4424 281 S 21.0847 18.3508 22.0083 282 O 4.4787 14.6760 10.6237 283 C 5.2258 15.3375 9.6879 284 O 4.7069 16.2239 8.9649 285 C 6.6863 15.0849 9.5961 286 C 7.2373 14.0366 10.2793 287 C 8.6063 13.7800 10.1853 288 C 7.4342 15.8691 8.7583 289 C 8.8193 15.7056 8.7072 290 C 9.3762 14.6329 9.3960 291 S 11.1593 14.4397 9.3721 292 O 14.3572 17.3324 3.3284 293 C 13.9477 17.4155 4.5129 294 O 13.4089 18.5780 4.9921 295 C 14.1472 16.2810 5.4500 296 C 14.6342 15.0989 4.9880 297 C 14.8353 14.0267 5.8481 298 C 13.7812 16.4439 6.7681 299 C 13.9622 15.3967 7.6703 300 C 14.5323 14.2187 7.2062 301 S 14.8273 12.8725 8.3402 302 O 21.7319 12.3160 3.6899 303 C 22.0022 13.4464 4.4113 304 O 23.1725 13.8962 4.4867 305 C 20.9113 14.1215 5.1592 306 C 19.6261 13.6858 4.9913 307 C 18.5803 14.3170 5.6601 308 C 21.2184 15.1530 6.0031 309 C 20.1992 15.8039 6.7031 310 C 18.9002 15.3246 6.5691 311 S 17.5673 16.1757 7.4130 312 O 18.3320 23.3708 3.7094 313 C 18.2121 22.0092 3.7667 314 O 18.7283 21.2589 2.9018 315 C 17.4502 21.3887 4.8801 316 C 17.0173 22.1753 5.9042 317 C 16.3454 21.5993 6.9811 318 C 17.2462 20.0367 4.8512 319 C 16.5343 19.4259 5.8752 320 C 16.0553 20.2392 6.9060 321 S 15.1033 19.4916 8.1980 322 O 14.2368 28.8320 10.1746 323 C 14.5833 28.0627 11.1050 324 O 15.3228 28.5388 12.1528 325 C 14.1624 26.6390 11.0693 326 C 13.4204 26.2080 10.0031 327 C 13.0331 24.8798 9.9123 328 C 14.5115 25.8042 12.0911 329 C 14.1273 24.4610 12.0543 330 C 13.4262 24.0188 10.9414 331 S 12.8003 22.3550 10.9063 332 O 28.4318 20.7900 16.0688 333 C 28.0721 20.6819 17.3842 334 O 28.5393 21.4606 18.2519 335 C 27.0933 19.6412 17.7893 336 C 26.6304 18.7621 16.8493 337 C 25.6984 17.7889 17.2034 338 C 26.6876 19.5958 19.0942 339 C 25.7886 18.6161 19.5044 340 C 25.2835 17.7487 18.5393 341 S 24.1135 16.4991 19.0381 342 O 15.0059 7.0448 6.4693 343 C 14.6634 6.6418 7.6085 344 O 15.0332 5.4019 8.0526 345 C 13.9562 7.5728 8.5242 346 C 13.5282 8.7805 8.0583 347 C 12.8603 9.6676 8.8990 348 C 13.6815 7.1516 9.8001 349 C 13.0152 8.0009 10.6734 350 C 12.6373 9.2610 10.2143 351 S 11.9853 10.4385 11.3873 352 O 22.0789 6.6875 9.7194 353 C 22.1447 7.5054 8.6249 354 O 22.7908 7.1683 7.6019 355 C 21.4652 8.8258 8.6321 356 C 20.7704 9.2075 9.7451 357 C 20.0964 10.4257 9.7583 358 C 21.5453 9.6118 7.5170 359 C 20.8962 10.8439 7.4863 360 C 20.1982 11.2337 8.6332 361 S 19.4821 12.8649 8.6793 362 O 25.4059 19.2245 6.1194 363 C 25.0044 20.1334 7.0597 364 O 25.8300 20.9140 7.5950 365 C 23.5773 20.1630 7.4691 366 C 22.6832 19.3858 6.7863 367 C 21.3364 19.3997 7.1333 368 C 23.1972 20.9769 8.5003 369 C 21.8623 21.0240 8.8822 370 C 20.9683 20.1770 8.2352 371 S 19.2542 20.2217 8.7143 372 O 28.9798 10.5353 19.6607 373 C 27.8650 10.0108 19.9048 374 O 27.7646 8.9132 20.7151 375 C 26.6406 10.5560 19.2653 376 C 26.7565 11.5956 18.3892 377 C 25.6196 12.1616 17.8222 378 C 25.4276 10.0336 19.6162 379 C 24.2565 10.5560 19.0624 380 C 24.3887 11.5769 18.1241 381 S 22.9555 12.0755 17.1951 382 O 23.1855 26.7892 19.3849 383 C 22.3230 26.1941 18.6924 384 O 21.2211 26.8664 18.2392 385 C 22.5135 24.7483 18.4121 386 C 23.6355 24.1422 18.9123 387 C 23.8665 22.7931 18.6794 388 C 21.6144 24.0683 17.6493 389 C 21.7966 22.7052 17.3963 390 C 22.9265 22.0909 17.9222 391 S 23.1735 20.3381 17.6964 392 O 19.7716 6.1713 6.2884 393 C 19.4698 5.9605 7.6060 394 O 19.8598 4.9190 8.1897 395 C 18.6094 6.9456 8.3091 396 C 18.2183 8.0696 7.6371 397 C 17.4141 9.0138 8.2563 398 C 18.2144 6.6984 9.5933 399 C 17.4433 7.6414 10.2711 400 C 17.0583 8.7857 9.5893 401 S 15.9564 9.9509 10.3650 402 O 24.3838 11.6238 7.1626 403 C 24.7733 12.9261 7.0093 404 O 25.6413 13.2382 6.1568 405 C 24.1041 14.0017 7.7843 406 C 23.1872 13.6805 8.7411 407 C 22.5774 14.6911 9.4893 408 C 24.4991 15.2967 7.5663 409 C 23.9241 16.3339 8.2823 410 C 22.9412 16.0046 9.2263 411 S 21.9723 17.3328 9.9013 412 O 17.5441 26.3587 5.6759 413 C 18.5102 25.8819 6.3216 414 O 19.7802 25.9890 5.8246 415 C 18.2724 25.1684 7.6020 416 C 17.0013 25.1172 8.1141 417 C 16.7473 24.4249 9.2921 418 C 19.3214 24.5820 8.2430 419 C 19.1174 23.8798 9.4332 420 C 17.8153 23.7791 9.9181 421 S 17.5503 23.0473 11.5323 422 O 13.8141 27.8698 21.3557 423 C 12.8842 27.2717 20.7599 424 O 11.6264 27.2472 21.2973 425 C 13.1576 26.5413 19.4961 426 C 14.4284 26.5401 18.9874 427 C 14.7016 25.9084 17.7743 428 C 12.1325 25.9002 18.8562 429 C 12.3624 25.2219 17.6571 430 C 13.6564 25.2370 17.1412 431 S 14.0235 24.2213 15.7224 432 O 25.4796 26.3041 15.3264 433 C 25.2054 26.0761 14.0057 434 O 25.6880 26.8254 13.1207 435 C 24.3454 24.9322 13.6093 436 C 23.8555 24.1090 14.5851 437 C 23.0314 23.0461 14.2414 438 C 24.0705 24.7350 12.2880 439 C 23.2235 23.6989 11.8961 440 C 22.7636 22.8379 12.8901 441 S 21.7866 21.4260 12.4241 442 O 27.3340 13.6178 8.9666 443 C 27.8244 13.6274 10.2437 444 O 28.9980 13.2502 10.4843 445 C 26.9564 14.1110 11.3472 446 C 25.6753 14.4828 11.0553 447 C 24.8232 14.9150 12.0693 448 C 27.4604 14.1820 12.6171 449 C 26.6335 14.5741 13.6733 450 C 25.3452 14.9930 13.3613 451 S 24.3094 15.6346 14.6612 452 O 5.6085 8.3764 14.7028 453 C 5.4671 9.4855 15.4911 454 O 4.3526 10.0553 15.5950 455 C 6.6264 10.0336 16.2401 456 C 7.8734 9.5106 16.0382 457 C 8.9686 10.0266 16.7232 458 C 6.4234 11.0935 17.0822 459 C 7.4756 11.6025 17.8344 460 C 8.7484 11.0726 17.6164 461 S 10.1074 11.6968 18.5833 462 O 19.3551 2.8098 15.1553 463 C 20.2372 3.5734 14.6903 464 O 21.5297 3.1507 14.5414 465 C 19.8744 4.9648 14.3193 466 C 18.5533 5.3103 14.2792 467 C 18.1895 6.6094 13.9441 468 C 20.8715 5.8537 14.0313 469 C 20.5533 7.1568 13.6643 470 C 19.2066 7.5128 13.6500 471 S 18.7623 9.1790 13.2102 472 O 27.4085 23.7706 10.1046 473 C 27.3994 23.5430 11.3397 474 O 28.1309 24.3205 12.1952 475 C 26.6193 22.3981 11.8743 476 C 25.9544 21.5830 11.0082 477 C 25.1855 20.5289 11.4902 478 C 26.5603 22.2183 13.2274 479 C 25.8505 21.1432 13.7562 480 C 25.1994 20.2962 12.8650 481 S 24.3074 18.8948 13.5082 482 O 8.2629 7.3641 13.4437 483 C 9.3390 6.5429 13.2461 484 O 9.2035 5.4513 12.6398 485 C 10.7035 7.0026 13.6093 486 C 10.8886 8.2848 14.0431 487 C 12.1664 8.7205 14.3954 488 C 11.7343 6.1056 13.5554 489 C 13.0294 6.5023 13.8701 490 C 13.2223 7.8165 14.2910 491 S 14.8893 8.3878 14.5594 492 Au 15.0053 16.3048 11.3923 493 Au 16.7695 16.3368 17.0244 494 Au 17.3194 18.7650 18.4314 495 Au 19.4784 16.9738 17.7822 496 Au 18.5356 14.2966 18.0741 497 Au 15.7996 14.4246 18.8984 498 Au 17.6114 17.2019 12.1194 499 Au 18.5275 17.0797 14.9674 500 Au 17.6793 14.3577 15.2393 501 Au 19.4396 18.4492 20.2343 502 Au 12.3072 15.4729 11.3283 503 Au 17.9995 13.6538 20.6842 504 Au 17.2424 15.5887 9.8091 505 Au 16.4904 18.5027 21.1772 506 Au 19.0485 19.5702 16.3512 507 Au 20.4025 15.1058 15.9114 508 Au 15.8942 12.2390 14.1874 509 Au 19.5113 15.1739 13.0834 510 Au 18.0995 19.5981 13.5361 511 Au 14.1894 21.0548 9.5972 512 Au 22.5355 17.5358 20.3683 513 Au 20.9878 12.4787 20.8821 514 Au 17.1524 12.8027 8.6093 515 Au 18.4232 18.1362 8.2230 516 Au 15.8864 21.2398 14.3243 517 Au 21.1664 17.8517 15.6062 518 Au 19.4876 12.4327 16.1204 519 Au 18.6535 12.4560 13.4782 520 Au 20.2875 17.9070 12.9551 521 Au 18.9875 20.9460 18.7094 522 Au 21.1987 19.1059 18.1213 523 Au 22.2155 16.2216 17.6171 524 Au 21.2125 13.5020 18.1423 525 Au 18.8837 11.6054 18.7551 526 Au 16.7922 11.8475 11.4852 527 Au 19.0815 13.5968 10.9081 528 Au 19.7985 16.4980 10.5812 529 Au 18.9653 19.4149 10.9514 530 Au 16.2065 20.9303 19.7334 531 Au 14.0674 21.9350 16.3862 532 Au 20.8045 20.3061 14.3443 533 Au 22.0826 15.9651 13.7612 534 Au 21.3174 13.2448 14.0981 535 Au 17.7805 10.3780 15.2522 536 Au 23.4004 19.6522 15.5183 537 Au 20.6514 10.4478 13.1084 538 Au 13.0707 22.1438 19.1481 539 Au 23.5004 13.9057 15.8921 540 Au 17.2156 9.2546 18.7644 541 Au 22.9413 18.0140 11.8371 542 H 9.1139 22.5220 9.9959 543 H 9.8505 20.2153 10.6568 544 H 11.4184 22.3084 6.4104 545 H 12.2098 20.0047 7.0145 546 H 26.0700 13.3966 20.3880 547 H 23.6125 12.9360 20.5751 548 H 25.7264 16.6221 23.1593 549 H 23.2244 16.4008 23.1478 550 H 17.8022 14.9905 27.3652 551 H 17.4574 13.0677 25.8086 552 H 19.3232 17.3920 24.1839 553 H 19.0108 15.5005 22.5713 554 H 13.2545 12.8385 26.9604 555 H 15.1340 14.0308 25.8119 556 H 10.4137 15.4676 25.1598 557 H 12.2561 16.6678 23.9500 558 H 15.4768 23.2553 25.4080 559 H 16.6169 22.1942 23.4359 560 H 15.0308 19.4385 27.2655 561 H 16.3185 18.3457 25.4146 562 H 19.5417 26.9150 22.0712 563 H 20.2324 24.5182 22.2243 564 H 17.5818 26.1861 18.3518 565 H 18.3019 23.7801 18.4132 566 H 5.6807 18.8233 15.4720 567 H 7.3676 17.0824 14.8250 568 H 5.6043 20.3277 11.4758 569 H 7.1855 18.5300 10.7360 570 H 18.9622 9.0553 24.2754 571 H 20.1496 10.6462 22.7484 572 H 18.6876 6.1520 21.1616 573 H 19.8812 7.6879 19.5910 574 H 11.9294 8.5563 20.6646 575 H 13.1673 10.7363 20.6631 576 H 10.5539 9.2731 24.6415 577 H 11.7417 11.4829 24.6919 578 H 9.6446 18.7505 25.3256 579 H 12.0777 18.8259 24.7228 580 H 8.7322 21.5881 22.2676 581 H 11.1491 21.7069 21.6211 582 H 4.9262 11.9852 13.1577 583 H 6.9855 13.0336 14.1263 584 H 7.2994 9.2059 10.9586 585 H 9.4015 10.1772 11.9348 586 H 8.3193 24.7245 11.7909 587 H 7.9158 22.2956 12.2130 588 H 11.9268 24.5899 14.0569 589 H 11.6019 22.1395 14.4875 590 H 14.1309 8.2252 24.6869 591 H 15.6030 9.9052 23.5683 592 H 14.1712 5.7645 21.2021 593 H 15.5237 7.4827 19.9758 594 H 9.7442 12.6321 22.3667 595 H 10.8376 14.4541 21.0305 596 H 7.4001 15.5120 24.4674 597 H 8.4468 17.3733 23.1727 598 H 16.4904 25.6240 23.6973 599 H 16.9344 24.3870 21.5727 600 H 12.3436 24.6614 23.4664 601 H 12.7174 23.4223 21.3208 602 H 17.4382 27.0379 11.7539 603 H 16.9696 25.9386 13.9558 604 H 21.5473 25.9189 12.0031 605 H 21.1202 24.6960 14.1496 606 H 8.5608 24.2877 15.6553 607 H 10.0681 22.3915 16.2810 608 H 8.1572 25.3916 19.7563 609 H 9.7417 23.5673 20.4407 610 H 7.3564 14.4411 21.2681 611 H 8.8907 15.1836 19.4293 612 H 4.1635 13.9321 18.4841 613 H 5.6856 14.5522 16.5805 614 H 24.6627 8.6826 15.9548 615 H 22.6992 9.6226 14.7248 616 H 27.2453 11.5423 14.1260 617 H 25.3618 12.3934 12.7160 618 H 14.8895 4.5665 16.7793 619 H 15.5356 6.9119 17.3772 620 H 10.7540 5.5405 17.2003 621 H 11.3399 7.8771 17.8934 622 H 6.6476 21.7584 21.3600 623 H 8.0820 19.9047 20.4748 624 H 5.6022 22.9193 17.3939 625 H 6.8668 20.9704 16.4531 626 H 22.6368 8.9687 17.1756 627 H 20.3380 9.7447 16.5454 628 H 21.1345 5.0687 18.0271 629 H 18.8034 5.8050 17.4998 630 H 23.5593 22.5220 21.2938 631 H 22.8227 20.2153 20.6328 632 H 21.2548 22.3084 24.8793 633 H 20.4634 20.0047 24.2752 634 H 6.6032 13.3966 10.9017 635 H 9.0607 12.9360 10.7146 636 H 6.9468 16.6221 8.1303 637 H 9.4488 16.4008 8.1419 638 H 14.8710 14.9905 3.9245 639 H 15.2157 13.0677 5.4810 640 H 13.3499 17.3920 7.1057 641 H 13.6623 15.5005 8.7183 642 H 19.4187 12.8385 4.3293 643 H 17.5392 14.0308 5.4778 644 H 22.2595 15.4676 6.1299 645 H 20.4171 16.6678 7.3397 646 H 17.1964 23.2553 5.8817 647 H 16.0563 22.1942 7.8537 648 H 17.6424 19.4385 4.0241 649 H 16.3547 18.3457 5.8751 650 H 13.1315 26.9150 9.2184 651 H 12.4408 24.5182 9.0653 652 H 15.0914 26.1861 12.9379 653 H 14.3713 23.7801 12.8764 654 H 26.9925 18.8233 15.8177 655 H 25.3056 17.0824 16.4646 656 H 27.0688 20.3277 19.8139 657 H 25.4877 18.5300 20.5537 658 H 13.7110 9.0553 7.0142 659 H 12.5235 10.6462 8.5413 660 H 13.9856 6.1520 10.1281 661 H 12.7920 7.6879 11.6987 662 H 20.7438 8.5564 10.6251 663 H 19.5059 10.7363 10.6266 664 H 22.1192 9.2731 6.6482 665 H 20.9314 11.4829 6.5977 666 H 23.0286 18.7505 5.9640 667 H 20.5954 18.8259 6.5669 668 H 23.9410 21.5881 9.0221 669 H 21.5241 21.7069 9.6685 670 H 27.7470 11.9852 18.1319 671 H 25.6877 13.0336 17.1634 672 H 25.3738 9.2059 20.3311 673 H 23.2716 10.1772 19.3548 674 H 24.3538 24.7245 19.4988 675 H 24.7574 22.2956 19.0767 676 H 20.7464 24.5899 17.2327 677 H 21.0713 22.1395 16.8022 678 H 18.5423 8.2252 6.6027 679 H 17.0702 9.9052 7.7214 680 H 18.5020 5.7645 10.0875 681 H 17.1495 7.4827 11.3139 682 H 22.9290 12.6321 8.9230 683 H 21.8356 14.4541 10.2591 684 H 25.2731 15.5120 6.8223 685 H 24.2264 17.3733 8.1170 686 H 16.1828 25.6240 7.5924 687 H 15.7388 24.3870 9.7170 688 H 20.3296 24.6614 7.8233 689 H 19.9557 23.4223 9.9689 690 H 15.2350 27.0379 19.5357 691 H 15.7036 25.9386 17.3338 692 H 11.1258 25.9189 19.2866 693 H 11.5530 24.6960 17.1401 694 H 24.1124 24.2877 15.6344 695 H 22.6050 22.3915 15.0086 696 H 24.5160 25.3916 11.5334 697 H 22.9315 23.5673 10.8489 698 H 25.3168 14.4411 10.0215 699 H 23.7825 15.1836 11.8603 700 H 28.5097 13.9321 12.8056 701 H 26.9876 14.5522 14.7092 702 H 8.0105 8.6826 15.3349 703 H 9.9739 9.6226 16.5649 704 H 5.4279 11.5423 17.1637 705 H 7.3114 12.3934 18.5737 706 H 17.7836 4.5665 14.5104 707 H 17.1376 6.9119 13.9125 708 H 21.9191 5.5405 14.0893 709 H 21.3333 7.8771 13.3963 710 H 26.0255 21.7584 9.9296 711 H 24.5911 19.9047 10.8148 712 H 27.0710 22.9193 13.8958 713 H 25.8064 20.9704 14.8365 714 H 10.0364 8.9687 14.1141 715 H 12.3352 9.7447 14.7443 716 H 11.5386 5.0687 13.2626 717 H 13.8698 5.8050 13.7899 718 H 8.8058 25.4273 8.2171 719 H 28.9655 16.3170 22.1743 720 H 19.3030 19.1904 27.0579 721 H 10.1009 12.0294 28.0074 722 H 13.4508 21.7860 29.0982 723 H 18.0960 29.7795 20.9877 724 H 3.5997 21.5247 15.2763 725 H 17.1857 4.9556 23.9781 726 H 10.0873 5.8777 21.7743 727 H 5.8967 20.8056 24.0481 728 H 2.9266 10.0811 11.1426 729 H 11.3903 27.7973 12.7604 730 H 12.2353 4.3134 23.6774 731 H 6.6471 12.4283 25.6066 732 H 14.8513 26.8220 26.4746 733 H 19.1579 28.3311 9.0794 734 H 6.4138 27.5720 17.7861 735 H 4.6229 13.2717 22.8901 736 H 27.9338 8.1800 16.9876 737 H 11.1144 2.2205 16.4509 738 H 4.6977 24.5740 21.4389 739 H 24.4400 5.2420 18.8581 740 H 22.7136 25.3502 24.6474 741 H 3.5571 14.9853 10.5262 742 H 13.3701 19.1904 4.2318 743 H 22.5722 12.0294 3.2822 744 H 18.8617 23.5737 2.9139 745 H 15.4922 29.4869 11.9888 746 H 29.0734 21.5247 16.0133 747 H 15.4875 4.9556 7.3116 748 H 22.5859 5.8777 9.5154 749 H 26.3648 19.3652 5.9954 750 H 28.6687 8.7149 21.0278 751 H 21.2829 27.7973 18.5292 752 H 20.3330 5.4193 6.0168 753 H 24.9353 11.0806 6.5665 754 H 19.7214 26.4843 4.9846 755 H 11.6575 27.7770 22.1176 756 H 26.0549 27.0924 15.3738 757 H 28.0503 13.2717 8.3996 758 H 4.7394 8.1800 14.3020 759 H 21.5588 2.2205 14.8388 760 H 28.5657 25.0069 11.6527 761 H 7.4695 6.8861 13.1328 .------------------------------------------------------------------------------. /| | / | | / | | / | | / | | / | | / | | / | | / | | / | H H | / | O O O | / | OC C | / | C H H | / | H C C OH H O | / | H C C O HOHH C | / | O H O C CHC C O | / | H O H C C C H CC H CHCH CHCC C | / | HO O O CC HC H C CH OC | / | H H HH CHCC C H C HCC SC HuC C C C H O | * | HO OC HHO C CSu HC S CO C H H | | | CCC C C S C C S HH C C | | | O CCH CCHS C Au C AuuH H H CC CC H OH H | | H O HC H C C SSAuH AuAu S AuS Cu C C CC H O | | | H CCHC AHCC H H O H C C O | | O C C O H CAC AuS CuuH Au AuH C S C CH O C | | | C OHHSC C C Au H C COHH Au C H H HC CO O H | | | CO C H Cu C AuC HOuu C H AuuC Au HC CCC | | H| C HC OAu CCH Au HCAuCCu S HAu C CHHAu HOCHHO H | | | C H HO S CCC AuC Au H COCu AHuC CC CC CHC C OH | | | HO CC H H Au H AH AuAuC H Auu AuAuSH C O CH C | | |H C CC Su AuAu Au S Au SHS C C | | | CCC AuH Au Au S Au AuAu C Hu Au CH C O | | | C C H C HC Au Au H S CAuC CC OC HO Au CHHC H | | O C| C H SSC C C HAu AuuHu C HC AuuAuC HC COC O | | H H H CAHCC O Au AuuAu H CCCHC CCHH C H | | H O| C C H C HAS H Auu SCS H C C | | H O H O CC C CH S SuC Au H C O | | O |CC OCCCHHCC OC HAuuH Cu C CCAH C C HH C C O | | H C .--HC--CH---CCH-SAH----HH-----O-C--O-------H----CC-----------------------------. | O/ H O H H C S HS HS OH HC HC OOHH / | / C S H Au CCH CCH C H / | / C C C H O C H C / | / H CC HH C CO O O / | / C H H C HC H / | / H H C HO C O / | / O HO H / | / H O O H / | / H / | / / | / / | / / | / / | / / | / / | / / | / / | / / |/ / *------------------------------------------------------------------------------* Unit Cell: Periodic X Y Z Points Spacing -------------------------------------------------------------------- 1. axis: yes 32.000000 0.000000 0.000000 240 0.1333 2. axis: yes 0.000000 32.000000 0.000000 240 0.1333 3. axis: yes 0.000000 0.000000 32.000000 240 0.1333 Grid-points per volume: 421.87 Effective grid-spacing: 0.1333 O-setup: name : Oxygen id : 5f3f27ba17355653aa2069308cb75aea Z : 8 valence: 6 core : 2 charge : 0.0 file : /soft/apps/gpaw-setups-0.5.3574/O.LDA.gz cutoffs: 0.74(comp), 1.30(filt), 0.83(core), lmax=2 valence states: energy radius 2s(2) -23.752 0.741 2p(4) -9.195 0.741 *s 3.459 0.741 *p 18.016 0.741 *d 0.000 0.741 Using partial waves for O as LCAO basis S-setup: name : Sulfur id : 16df0b8f883bfd770ab5c435bc804428 Z : 16 valence: 6 core : 10 charge : 0.0 file : /soft/apps/gpaw-setups-0.5.3574/S.LDA.gz cutoffs: 0.85(comp), 1.49(filt), 1.66(core), lmax=2 valence states: energy radius 3s(2) -17.278 0.847 3p(4) -7.106 0.847 *s 9.933 0.847 *p 20.105 0.847 *d 0.000 0.847 Using partial waves for S as LCAO basis C-setup: name : Carbon id : d60576a1f549371a163e72552ca58787 Z : 6 valence: 4 core : 2 charge : 0.0 file : /soft/apps/gpaw-setups-0.5.3574/C.LDA.gz cutoffs: 0.64(comp), 1.14(filt), 1.14(core), lmax=2 valence states: energy radius 2s(2) -13.639 0.635 2p(2) -5.414 0.635 *s 13.573 0.635 *p 21.797 0.635 *d 0.000 0.635 Using partial waves for C as LCAO basis H-setup: name : Hydrogen id : 4766778ce56282eaa64abeb28b7c1de3 Z : 1 valence: 1 core : 0 charge : 0.0 file : /soft/apps/gpaw-setups-0.5.3574/H.LDA.gz cutoffs: 0.48(comp), 0.85(filt), 0.53(core), lmax=2 valence states: energy radius 1s(1) -6.353 0.476 *s 20.858 0.476 *p 0.000 0.476 Using partial waves for H as LCAO basis Au-setup: name : Gold id : a44207148b704df7bec07bf25e8feca8 Z : 79 valence: 11 core : 68 charge : 0.0 file : /soft/apps/gpaw-setups-0.5.3574/Au.LDA.gz cutoffs: 1.32(comp), 2.33(filt), 2.81(core), lmax=2 valence states: energy radius 6s(1) -6.048 1.323 6p(0) -0.888 1.323 5d(10) -7.129 1.323 *s 21.163 1.323 *p 26.323 1.323 *d 20.082 1.323 Using partial waves for Au as LCAO basis Using the LDA Exchange-Correlation Functional. Spin-Paired Calculation Total Charge: 0.000000 Fermi Temperature: 0.100000 Mode: fd Eigen Solver: (3 nearest neighbors central finite-difference stencil) Diagonalizer: ScaLapack - grid: [nprow, npcol, nb] = [5, 5, 64] Inverse Cholesky: Lapack Poisson Solver: Jacobi (Mehrstellen finite-difference stencil) Interpolation: 6th Order Reference Energy: -53635200.580024 Gamma Point Calculation Total number of cores used: 4096 Using Domain Decomposition: 4 x 8 x 16 Parallelization Over bands on 8 Processors 1 k-point in the Irreducible Part of the Brillouin Zone (total: 1) Linear Mixing Parameter: 0.1 Pulay Mixing with 5 Old Densities Damping of Long Wave Oscillations: 100 Convergence Criteria: Total Energy Change per Atom: 0.001 eV / atom Integral of Absolute Density Change: 0.0001 electrons Integral of Absolute Eigenstate Change: 1e-09 Number of Bands in Calculation: 1728 Bands to Converge: Occupied States Only Number of Valence Electrons: 3366 log10-error: Total Iterations: Time WFS Density Energy Fermi Poisson iter: 1 17:55:51 +0.3 -4415.56673 3 105 iter: 2 17:58:12 -0.4 -5514.20755 5 iter: 3 18:00:33 -0.7 -5682.62208 4 iter: 4 18:03:37 +0.0 -0.9 -3933.90570 7 101 iter: 5 18:06:41 +0.0 -0.9 -3593.83801 15 93 iter: 6 18:09:43 +0.3 -1.0 -3028.82668 24 95 iter: 7 18:12:46 +0.4 -1.1 -2921.02831 26 94 iter: 8 18:15:45 +0.5 -1.1 -2940.70606 9 87 iter: 9 18:18:40 +0.4 -1.2 -3094.01547 12 80 iter: 10 18:21:36 +0.4 -1.3 -3432.21825 6 78 iter: 11 18:24:30 +0.3 -1.3 -3637.38850 3 75 iter: 12 18:27:23 +0.2 -1.3 -4192.08640 7 70 iter: 13 18:30:15 +0.2 -1.2 -4307.12410 5 64 iter: 14 18:33:04 +0.1 -1.2 -4761.82594 3 60 iter: 15 18:36:00 +0.1 -1.2 -4388.02660 7 82 iter: 16 18:38:57 -0.1 -1.1 -4787.18221 7 79 iter: 17 18:41:53 -0.1 -1.1 -4644.09502 8 82 iter: 18 18:44:52 -0.0 -1.1 -5683.63590 15 90 iter: 19 18:47:47 -0.0 -1.1 -5192.80257 4 79 iter: 20 18:50:51 -0.2 -1.0 -4643.91388 17 95 Memory usage: 980.27 MB ============================================================ Timing: incl. excl. ============================================================ Initialization: 214.671 26.264 0.7% | Hamiltonian: 43.972 0.001 0.0% | Atomic: 0.000 0.000 0.0% | Communicate energies: 2.405 2.405 0.1% | Hartree integrate/restrict: 0.037 0.037 0.0% | Initialize Hamiltonian: 0.038 0.038 0.0% | Poisson: 40.576 40.576 1.1% | XC 3D grid: 0.910 0.910 0.0% | vbar: 0.006 0.006 0.0% | LCAO initialization: 144.435 8.211 0.2% | LCAO eigensolver: 20.053 0.014 0.0% | Atomic Hamiltonian: 0.000 0.000 0.0% | Blacs Orbital Layouts: 13.197 0.002 0.0% | General diagonalize: 13.060 13.060 0.3% | Redistribute coefs: 0.125 0.125 0.0% | Send coefs to domains: 0.010 0.010 0.0% | Calculate projections: 0.000 0.000 0.0% | Distribute overlap matrix: 6.834 1.064 0.0% | Distribute overlap matrix: 5.770 5.770 0.2% | Potential matrix: 0.008 0.008 0.0% | LCAO to grid: 0.038 0.038 0.0% | Set positions (LCAO WFS): 116.133 7.441 0.2% | Basic WFS set positions: 0.002 0.002 0.0% | Basis functions set positions: 0.134 0.134 0.0% | Distribute overlap matrix: 1.404 1.404 0.0% | TCI: Calculate S, T, P: 107.153 107.153 2.8% || SCF-cycle: 3553.883 214.291 5.7% |-| Density: 6.943 0.004 0.0% | Atomic density matrices: 0.002 0.002 0.0% | Mix: 1.268 1.268 0.0% | Multipole moments: 2.322 2.322 0.1% | Pseudo density: 3.347 3.347 0.1% | Hamiltonian: 639.934 0.010 0.0% | Atomic: 0.003 0.003 0.0% | Communicate energies: 43.344 43.344 1.2% | Hartree integrate/restrict: 0.661 0.661 0.0% | Poisson: 579.460 579.460 15.4% |-----| XC 3D grid: 16.346 16.346 0.4% | vbar: 0.111 0.111 0.0% | Orthonormalize: 704.260 0.010 0.0% | Blacs Band Layouts: 7.733 0.002 0.0% | Inverse Cholesky: 7.732 7.732 0.2% | calc_matrix: 466.524 466.524 12.4% |----| rotate_psi: 229.993 229.993 6.1% |-| RMM-DIIS: 1359.575 586.224 15.6% |-----| precondition: 773.351 773.351 20.5% |-------| Subspace diag: 628.880 0.008 0.0% | Blacs Band Layouts: 74.455 0.004 0.0% | Diagonalize: 74.443 74.443 2.0% || Distribute results: 0.008 0.008 0.0% | calc_matrix: 327.276 327.276 8.7% |--| rotate_psi: 227.141 227.141 6.0% |-| Other: 0.301 0.301 0.0% | ============================================================ Total: 3768.855 100.0% ============================================================ date: Tue Apr 6 18:51:30 2010 gpaw-24.1.0/doc/devel/Au_cluster/Au_cluster.txt_domain_8x8x8x4000066400000000000000000001270451454550013000241550ustar00rootroot00000000000000 ___ ___ ___ _ _ _ | | |_ | | | | | | | | | . | | | | |__ | _|___|_____| 0.7 |___|_| User: ???@ion-R46-9 Date: Wed Mar 31 10:25:55 2010 Arch: BGP Pid: 100 Dir: /gpaw/lib/python2.6/site-packages/gpaw ase: /gpaw/lib/python2.6/site-packages/ase version: 3.3.1 numpy: /gpaw/lib/python2.6/site-packages/numpy units: Angstrom and eV Extra parameters: {'blacs': 1} **NOTE**: please start using occupations=FermiDirac(width). Memory estimate --------------- Calculator 531.69 MiB Initial overhead 375.15 MiB Density 19.14 MiB Arrays 5.36 MiB Localized functions 6.17 MiB Mixer 2.06 MiB Interpolator 5.56 MiB Hamiltonian 24.61 MiB Arrays 3.50 MiB Restrictor 3.60 MiB XC 3D grid 1.65 MiB Poisson 15.40 MiB vbar 0.47 MiB Wavefunctions 112.79 MiB Arrays psit_nG 88.99 MiB Eigensolver 1.04 MiB Projectors 0.90 MiB Overlap op 21.50 MiB Kinetic operator 0.36 MiB Positions: 0 O 8.9496 24.5522 8.6269 1 C 9.8369 23.8824 7.8297 2 O 10.3147 24.4127 6.7962 3 C 10.2586 22.5069 8.1973 4 C 9.7956 21.9548 9.3535 5 C 10.1886 20.6691 9.7194 6 C 11.0905 21.8361 7.3423 7 C 11.5174 20.5522 7.6624 8 C 11.0266 19.9861 8.8472 9 S 11.5885 18.3508 9.2813 10 O 28.1333 14.7301 20.7426 11 C 27.4474 15.3375 21.6018 12 O 28.0125 16.3029 22.3892 13 C 25.9869 15.0849 21.6935 14 C 25.4359 14.0366 21.0104 15 C 24.0669 13.7800 21.1043 16 C 25.2389 15.8691 22.5314 17 C 23.8539 15.7056 22.5825 18 C 23.2969 14.6329 21.8936 19 S 21.5139 14.4397 21.9176 20 O 18.3160 17.3324 27.9612 21 C 18.7254 17.4155 26.7768 22 O 19.2643 18.5780 26.2976 23 C 18.5259 16.2810 25.8396 24 C 18.0389 15.0989 26.3016 25 C 17.8379 14.0267 25.4416 26 C 18.8919 16.4439 24.5216 27 C 18.7110 15.3967 23.6194 28 C 18.1409 14.2187 24.0835 29 S 17.8458 12.8725 22.9494 30 O 10.9413 12.3160 27.5998 31 C 10.6709 13.4464 26.8783 32 O 9.5007 13.8962 26.8030 33 C 11.7619 14.1215 26.1305 34 C 13.0471 13.6858 26.2984 35 C 14.0929 14.3170 25.6295 36 C 11.4548 15.1530 25.2865 37 C 12.4740 15.8039 24.5866 38 C 13.7730 15.3246 24.7206 39 S 15.1059 16.1757 23.8766 40 O 14.3510 23.2593 27.5755 41 C 14.4611 22.0092 27.5230 42 O 13.8989 21.1920 28.4650 43 C 15.2230 21.3887 26.4095 44 C 15.6558 22.1753 25.3855 45 C 16.3278 21.5993 24.3086 46 C 15.4270 20.0367 26.4385 47 C 16.1389 19.4259 25.4144 48 C 16.6179 20.2392 24.3836 49 S 17.5699 19.4916 23.0916 50 O 18.4673 28.9006 21.1980 51 C 18.0899 28.0627 20.1847 52 O 17.4109 28.4998 19.2227 53 C 18.5108 26.6390 20.2204 54 C 19.2527 26.2080 21.2866 55 C 19.6400 24.8798 21.3773 56 C 18.1617 25.8042 19.1985 57 C 18.5459 24.4610 19.2353 58 C 19.2469 24.0188 20.3483 59 S 19.8728 22.3550 20.3833 60 O 4.2414 20.7900 15.2209 61 C 4.6011 20.6819 13.9054 62 O 4.1339 21.4606 13.0378 63 C 5.5798 19.6412 13.5003 64 C 6.0428 18.7621 14.4404 65 C 6.9748 17.7889 14.0863 66 C 5.9855 19.5958 12.1955 67 C 6.8846 18.6161 11.7853 68 C 7.3897 17.7487 12.7504 69 S 8.5597 16.4991 12.2516 70 O 17.6672 7.0448 24.8204 71 C 18.0098 6.6418 23.6811 72 O 17.6399 5.4019 23.2371 73 C 18.7169 7.5728 22.7654 74 C 19.1450 8.7805 23.2313 75 C 19.8129 9.6676 22.3906 76 C 18.9917 7.1516 21.4895 77 C 19.6580 8.0009 20.6163 78 C 20.0359 9.2610 21.0754 79 S 20.6879 10.4385 19.9024 80 O 10.5943 6.6875 21.5703 81 C 10.5284 7.5054 22.6648 82 O 9.8824 7.1683 23.6878 83 C 11.2079 8.8258 22.6575 84 C 11.9027 9.2075 21.5445 85 C 12.5768 10.4257 21.5313 86 C 11.1279 9.6118 23.7727 87 C 11.7769 10.8439 23.8034 88 C 12.4750 11.2337 22.6565 89 S 13.1910 12.8649 22.6104 90 O 7.3001 19.2989 25.0933 91 C 7.6688 20.1334 24.2300 92 O 6.7696 20.9836 23.6470 93 C 9.0959 20.1630 23.8205 94 C 9.9900 19.3858 24.5033 95 C 11.3368 19.3997 24.1564 96 C 9.4760 20.9769 22.7894 97 C 10.8109 21.0240 22.4074 98 C 11.7049 20.1770 23.0545 99 S 13.4190 20.2217 22.5753 100 O 3.5939 10.5821 11.6508 101 C 4.8082 10.0108 11.3849 102 O 4.9004 9.0031 10.6409 103 C 6.0326 10.5560 12.0243 104 C 5.9166 11.5956 12.9004 105 C 7.0536 12.1616 13.4675 106 C 7.2455 10.0336 11.6735 107 C 8.4167 10.5560 12.2273 108 C 8.2845 11.5769 13.1655 109 S 9.7177 12.0755 14.0945 110 O 9.4876 26.7892 11.9048 111 C 10.3502 26.1941 12.5972 112 O 11.4520 26.8664 13.0505 113 C 10.1597 24.7483 12.8776 114 C 9.0376 24.1422 12.3773 115 C 8.8067 22.7931 12.6103 116 C 11.0588 24.0683 13.6404 117 C 10.8766 22.7052 13.8934 118 C 9.7467 22.0909 13.3674 119 S 9.4997 20.3381 13.5932 120 O 12.9263 6.1540 24.8934 121 C 13.2034 5.9605 23.6837 122 O 12.7786 4.8261 23.0479 123 C 14.0638 6.9456 22.9805 124 C 14.4549 8.0696 23.6526 125 C 15.2591 9.0138 23.0334 126 C 14.4588 6.6984 21.6964 127 C 15.2299 7.6414 21.0186 128 C 15.6149 8.7857 21.7003 129 S 16.7168 9.9509 20.9246 130 O 8.2575 11.7304 24.1396 131 C 7.8999 12.9261 24.2804 132 O 6.9545 13.2660 25.2089 133 C 8.5690 14.0017 23.5054 134 C 9.4860 13.6805 22.5485 135 C 10.0958 14.6911 21.8004 136 C 8.1741 15.2967 23.7233 137 C 8.7491 16.3339 23.0073 138 C 9.7320 16.0046 22.0633 139 S 10.7009 17.3328 21.3884 140 O 15.2152 26.4013 25.6714 141 C 14.1630 25.8819 24.9681 142 O 12.9970 25.9802 25.4243 143 C 14.4008 25.1684 23.6876 144 C 15.6719 25.1172 23.1756 145 C 15.9259 24.4249 21.9976 146 C 13.3518 24.5820 23.0466 147 C 13.5557 23.8798 21.8565 148 C 14.8579 23.7791 21.3716 149 S 15.1229 23.0473 19.7573 150 O 18.7761 27.9232 9.8809 151 C 19.7890 27.2717 10.5298 152 O 20.9438 27.2492 10.0363 153 C 19.5156 26.5413 11.7935 154 C 18.2447 26.5401 12.3023 155 C 17.9716 25.9084 13.5153 156 C 20.5406 25.9002 12.4334 157 C 20.3108 25.2219 13.6325 158 C 19.0167 25.2370 14.1485 159 S 18.6496 24.2213 15.5673 160 O 7.2160 26.2855 16.0714 161 C 7.4678 26.0761 17.2839 162 O 6.9422 26.8922 18.2478 163 C 8.3278 24.9322 17.6804 164 C 8.8177 24.1090 16.7046 165 C 9.6417 23.0461 17.0483 166 C 8.6026 24.7350 19.0016 167 C 9.4497 23.6989 19.3936 168 C 9.9096 22.8379 18.3996 169 S 10.8866 21.4260 18.8655 170 O 5.3392 13.6178 22.3230 171 C 4.8488 13.6274 21.0460 172 O 3.6752 13.2502 20.8054 173 C 5.7168 14.1110 19.9424 174 C 6.9978 14.4828 20.2343 175 C 7.8500 14.9150 19.2203 176 C 5.2128 14.1820 18.6726 177 C 6.0397 14.5741 17.6164 178 C 7.3279 14.9930 17.9283 179 S 8.3638 15.6346 16.6285 180 O 27.0647 8.3764 16.5869 181 C 27.2060 9.4855 15.7986 182 O 28.3205 10.0553 15.6947 183 C 26.0468 10.0336 15.0496 184 C 24.7997 9.5106 15.2514 185 C 23.7046 10.0266 14.5665 186 C 26.2498 11.0935 14.2074 187 C 25.1976 11.6025 13.4553 188 C 23.9248 11.0726 13.6733 189 S 22.5658 11.6968 12.7064 190 O 13.3181 2.8098 16.1343 191 C 12.4360 3.5734 16.5994 192 O 11.1434 3.1507 16.7482 193 C 12.7987 4.9648 16.9704 194 C 14.1199 5.3103 17.0104 195 C 14.4837 6.6094 17.3456 196 C 11.8017 5.8537 17.2584 197 C 12.1199 7.1568 17.6253 198 C 13.4666 7.5128 17.6396 199 S 13.9109 9.1790 18.0795 200 O 5.2639 23.7908 21.2952 201 C 5.2738 23.5430 19.9499 202 O 4.6021 24.2569 19.1645 203 C 6.0539 22.3981 19.4154 204 C 6.7188 21.5830 20.2815 205 C 7.4877 20.5289 19.7995 206 C 6.1129 22.2183 18.0623 207 C 6.8226 21.1432 17.5335 208 C 7.4738 20.2962 18.4246 209 S 8.3658 18.8948 17.7815 210 O 24.3222 7.2969 17.8622 211 C 23.3342 6.5429 18.0436 212 O 23.4817 5.3540 18.7040 213 C 21.9697 7.0026 17.6804 214 C 21.7846 8.2848 17.2466 215 C 20.5067 8.7205 16.8943 216 C 20.9389 6.1056 17.7343 217 C 19.6438 6.5023 17.4195 218 C 19.4508 7.8165 16.9986 219 S 17.7839 8.3878 16.7303 220 Au 17.6679 16.3048 19.8974 221 Au 15.9037 16.3368 14.2653 222 Au 15.3538 18.7650 12.8583 223 Au 13.1948 16.9738 13.5075 224 Au 14.1376 14.2966 13.2156 225 Au 16.8736 14.4246 12.3913 226 Au 15.0618 17.2019 19.1703 227 Au 16.3367 18.7749 15.6445 228 Au 14.1457 17.0797 16.3223 229 Au 14.9939 14.3577 16.0504 230 Au 13.2335 18.4492 11.0553 231 Au 20.3660 15.4729 19.9614 232 Au 14.6737 13.6538 10.6055 233 Au 15.4308 15.5887 21.4806 234 Au 16.1827 18.5027 10.1124 235 Au 13.6247 19.5702 14.9385 236 Au 12.2707 15.1058 15.3783 237 Au 16.7790 12.2390 17.1022 238 Au 13.1619 15.1739 18.2063 239 Au 14.5736 19.5981 17.7536 240 Au 18.4838 21.0548 21.6925 241 Au 10.1377 17.5358 10.9213 242 Au 11.6854 12.4787 10.4076 243 Au 15.5208 12.8027 22.6804 244 Au 14.2499 18.1362 23.0666 245 Au 16.7868 21.2398 16.9654 246 Au 11.5068 17.8517 15.6834 247 Au 13.1856 12.4327 15.1693 248 Au 14.0196 12.4560 17.8115 249 Au 12.3857 17.9070 18.3346 250 Au 13.6857 20.9460 12.5803 251 Au 11.4745 19.1059 13.1684 252 Au 10.4577 16.2216 13.6725 253 Au 11.4607 13.5020 13.1473 254 Au 13.7895 11.6054 12.5345 255 Au 15.8809 11.8475 19.8045 256 Au 13.5916 13.5968 20.3815 257 Au 12.8747 16.4980 20.7085 258 Au 13.7079 19.4149 20.3383 259 Au 16.4667 20.9303 11.5563 260 Au 18.6058 21.9350 14.9034 261 Au 11.8686 20.3061 16.9454 262 Au 10.5906 15.9651 17.5285 263 Au 11.3558 13.2448 17.1916 264 Au 14.8927 10.3780 16.0375 265 Au 16.3367 23.9560 15.6445 266 Au 9.2728 19.6522 15.7713 267 Au 12.0218 10.4478 18.1813 268 Au 19.6025 22.1438 12.1415 269 Au 9.1728 13.9057 15.3976 270 Au 15.4576 9.2546 12.5253 271 Au 9.7318 18.0140 19.4526 272 O 23.6510 24.4973 22.7280 273 C 22.8363 23.8824 23.4600 274 O 22.3158 24.4600 24.5856 275 C 22.4146 22.5069 23.0924 276 C 22.8776 21.9548 21.9361 277 C 22.4845 20.6691 21.5703 278 C 21.5827 21.8361 23.9474 279 C 21.1558 20.5522 23.6272 280 C 21.6466 19.9861 22.4424 281 S 21.0847 18.3508 22.0083 282 O 4.4787 14.6760 10.6237 283 C 5.2258 15.3375 9.6879 284 O 4.7069 16.2239 8.9649 285 C 6.6863 15.0849 9.5961 286 C 7.2373 14.0366 10.2793 287 C 8.6063 13.7800 10.1853 288 C 7.4342 15.8691 8.7583 289 C 8.8193 15.7056 8.7072 290 C 9.3762 14.6329 9.3960 291 S 11.1593 14.4397 9.3721 292 O 14.3572 17.3324 3.3284 293 C 13.9477 17.4155 4.5129 294 O 13.4089 18.5780 4.9921 295 C 14.1472 16.2810 5.4500 296 C 14.6342 15.0989 4.9880 297 C 14.8353 14.0267 5.8481 298 C 13.7812 16.4439 6.7681 299 C 13.9622 15.3967 7.6703 300 C 14.5323 14.2187 7.2062 301 S 14.8273 12.8725 8.3402 302 O 21.7319 12.3160 3.6899 303 C 22.0022 13.4464 4.4113 304 O 23.1725 13.8962 4.4867 305 C 20.9113 14.1215 5.1592 306 C 19.6261 13.6858 4.9913 307 C 18.5803 14.3170 5.6601 308 C 21.2184 15.1530 6.0031 309 C 20.1992 15.8039 6.7031 310 C 18.9002 15.3246 6.5691 311 S 17.5673 16.1757 7.4130 312 O 18.3320 23.3708 3.7094 313 C 18.2121 22.0092 3.7667 314 O 18.7283 21.2589 2.9018 315 C 17.4502 21.3887 4.8801 316 C 17.0173 22.1753 5.9042 317 C 16.3454 21.5993 6.9811 318 C 17.2462 20.0367 4.8512 319 C 16.5343 19.4259 5.8752 320 C 16.0553 20.2392 6.9060 321 S 15.1033 19.4916 8.1980 322 O 14.2368 28.8320 10.1746 323 C 14.5833 28.0627 11.1050 324 O 15.3228 28.5388 12.1528 325 C 14.1624 26.6390 11.0693 326 C 13.4204 26.2080 10.0031 327 C 13.0331 24.8798 9.9123 328 C 14.5115 25.8042 12.0911 329 C 14.1273 24.4610 12.0543 330 C 13.4262 24.0188 10.9414 331 S 12.8003 22.3550 10.9063 332 O 28.4318 20.7900 16.0688 333 C 28.0721 20.6819 17.3842 334 O 28.5393 21.4606 18.2519 335 C 27.0933 19.6412 17.7893 336 C 26.6304 18.7621 16.8493 337 C 25.6984 17.7889 17.2034 338 C 26.6876 19.5958 19.0942 339 C 25.7886 18.6161 19.5044 340 C 25.2835 17.7487 18.5393 341 S 24.1135 16.4991 19.0381 342 O 15.0059 7.0448 6.4693 343 C 14.6634 6.6418 7.6085 344 O 15.0332 5.4019 8.0526 345 C 13.9562 7.5728 8.5242 346 C 13.5282 8.7805 8.0583 347 C 12.8603 9.6676 8.8990 348 C 13.6815 7.1516 9.8001 349 C 13.0152 8.0009 10.6734 350 C 12.6373 9.2610 10.2143 351 S 11.9853 10.4385 11.3873 352 O 22.0789 6.6875 9.7194 353 C 22.1447 7.5054 8.6249 354 O 22.7908 7.1683 7.6019 355 C 21.4652 8.8258 8.6321 356 C 20.7704 9.2075 9.7451 357 C 20.0964 10.4257 9.7583 358 C 21.5453 9.6118 7.5170 359 C 20.8962 10.8439 7.4863 360 C 20.1982 11.2337 8.6332 361 S 19.4821 12.8649 8.6793 362 O 25.4059 19.2245 6.1194 363 C 25.0044 20.1334 7.0597 364 O 25.8300 20.9140 7.5950 365 C 23.5773 20.1630 7.4691 366 C 22.6832 19.3858 6.7863 367 C 21.3364 19.3997 7.1333 368 C 23.1972 20.9769 8.5003 369 C 21.8623 21.0240 8.8822 370 C 20.9683 20.1770 8.2352 371 S 19.2542 20.2217 8.7143 372 O 28.9798 10.5353 19.6607 373 C 27.8650 10.0108 19.9048 374 O 27.7646 8.9132 20.7151 375 C 26.6406 10.5560 19.2653 376 C 26.7565 11.5956 18.3892 377 C 25.6196 12.1616 17.8222 378 C 25.4276 10.0336 19.6162 379 C 24.2565 10.5560 19.0624 380 C 24.3887 11.5769 18.1241 381 S 22.9555 12.0755 17.1951 382 O 23.1855 26.7892 19.3849 383 C 22.3230 26.1941 18.6924 384 O 21.2211 26.8664 18.2392 385 C 22.5135 24.7483 18.4121 386 C 23.6355 24.1422 18.9123 387 C 23.8665 22.7931 18.6794 388 C 21.6144 24.0683 17.6493 389 C 21.7966 22.7052 17.3963 390 C 22.9265 22.0909 17.9222 391 S 23.1735 20.3381 17.6964 392 O 19.7716 6.1713 6.2884 393 C 19.4698 5.9605 7.6060 394 O 19.8598 4.9190 8.1897 395 C 18.6094 6.9456 8.3091 396 C 18.2183 8.0696 7.6371 397 C 17.4141 9.0138 8.2563 398 C 18.2144 6.6984 9.5933 399 C 17.4433 7.6414 10.2711 400 C 17.0583 8.7857 9.5893 401 S 15.9564 9.9509 10.3650 402 O 24.3838 11.6238 7.1626 403 C 24.7733 12.9261 7.0093 404 O 25.6413 13.2382 6.1568 405 C 24.1041 14.0017 7.7843 406 C 23.1872 13.6805 8.7411 407 C 22.5774 14.6911 9.4893 408 C 24.4991 15.2967 7.5663 409 C 23.9241 16.3339 8.2823 410 C 22.9412 16.0046 9.2263 411 S 21.9723 17.3328 9.9013 412 O 17.5441 26.3587 5.6759 413 C 18.5102 25.8819 6.3216 414 O 19.7802 25.9890 5.8246 415 C 18.2724 25.1684 7.6020 416 C 17.0013 25.1172 8.1141 417 C 16.7473 24.4249 9.2921 418 C 19.3214 24.5820 8.2430 419 C 19.1174 23.8798 9.4332 420 C 17.8153 23.7791 9.9181 421 S 17.5503 23.0473 11.5323 422 O 13.8141 27.8698 21.3557 423 C 12.8842 27.2717 20.7599 424 O 11.6264 27.2472 21.2973 425 C 13.1576 26.5413 19.4961 426 C 14.4284 26.5401 18.9874 427 C 14.7016 25.9084 17.7743 428 C 12.1325 25.9002 18.8562 429 C 12.3624 25.2219 17.6571 430 C 13.6564 25.2370 17.1412 431 S 14.0235 24.2213 15.7224 432 O 25.4796 26.3041 15.3264 433 C 25.2054 26.0761 14.0057 434 O 25.6880 26.8254 13.1207 435 C 24.3454 24.9322 13.6093 436 C 23.8555 24.1090 14.5851 437 C 23.0314 23.0461 14.2414 438 C 24.0705 24.7350 12.2880 439 C 23.2235 23.6989 11.8961 440 C 22.7636 22.8379 12.8901 441 S 21.7866 21.4260 12.4241 442 O 27.3340 13.6178 8.9666 443 C 27.8244 13.6274 10.2437 444 O 28.9980 13.2502 10.4843 445 C 26.9564 14.1110 11.3472 446 C 25.6753 14.4828 11.0553 447 C 24.8232 14.9150 12.0693 448 C 27.4604 14.1820 12.6171 449 C 26.6335 14.5741 13.6733 450 C 25.3452 14.9930 13.3613 451 S 24.3094 15.6346 14.6612 452 O 5.6085 8.3764 14.7028 453 C 5.4671 9.4855 15.4911 454 O 4.3526 10.0553 15.5950 455 C 6.6264 10.0336 16.2401 456 C 7.8734 9.5106 16.0382 457 C 8.9686 10.0266 16.7232 458 C 6.4234 11.0935 17.0822 459 C 7.4756 11.6025 17.8344 460 C 8.7484 11.0726 17.6164 461 S 10.1074 11.6968 18.5833 462 O 19.3551 2.8098 15.1553 463 C 20.2372 3.5734 14.6903 464 O 21.5297 3.1507 14.5414 465 C 19.8744 4.9648 14.3193 466 C 18.5533 5.3103 14.2792 467 C 18.1895 6.6094 13.9441 468 C 20.8715 5.8537 14.0313 469 C 20.5533 7.1568 13.6643 470 C 19.2066 7.5128 13.6500 471 S 18.7623 9.1790 13.2102 472 O 27.4085 23.7706 10.1046 473 C 27.3994 23.5430 11.3397 474 O 28.1309 24.3205 12.1952 475 C 26.6193 22.3981 11.8743 476 C 25.9544 21.5830 11.0082 477 C 25.1855 20.5289 11.4902 478 C 26.5603 22.2183 13.2274 479 C 25.8505 21.1432 13.7562 480 C 25.1994 20.2962 12.8650 481 S 24.3074 18.8948 13.5082 482 O 8.2629 7.3641 13.4437 483 C 9.3390 6.5429 13.2461 484 O 9.2035 5.4513 12.6398 485 C 10.7035 7.0026 13.6093 486 C 10.8886 8.2848 14.0431 487 C 12.1664 8.7205 14.3954 488 C 11.7343 6.1056 13.5554 489 C 13.0294 6.5023 13.8701 490 C 13.2223 7.8165 14.2910 491 S 14.8893 8.3878 14.5594 492 Au 15.0053 16.3048 11.3923 493 Au 16.7695 16.3368 17.0244 494 Au 17.3194 18.7650 18.4314 495 Au 19.4784 16.9738 17.7822 496 Au 18.5356 14.2966 18.0741 497 Au 15.7996 14.4246 18.8984 498 Au 17.6114 17.2019 12.1194 499 Au 18.5275 17.0797 14.9674 500 Au 17.6793 14.3577 15.2393 501 Au 19.4396 18.4492 20.2343 502 Au 12.3072 15.4729 11.3283 503 Au 17.9995 13.6538 20.6842 504 Au 17.2424 15.5887 9.8091 505 Au 16.4904 18.5027 21.1772 506 Au 19.0485 19.5702 16.3512 507 Au 20.4025 15.1058 15.9114 508 Au 15.8942 12.2390 14.1874 509 Au 19.5113 15.1739 13.0834 510 Au 18.0995 19.5981 13.5361 511 Au 14.1894 21.0548 9.5972 512 Au 22.5355 17.5358 20.3683 513 Au 20.9878 12.4787 20.8821 514 Au 17.1524 12.8027 8.6093 515 Au 18.4232 18.1362 8.2230 516 Au 15.8864 21.2398 14.3243 517 Au 21.1664 17.8517 15.6062 518 Au 19.4876 12.4327 16.1204 519 Au 18.6535 12.4560 13.4782 520 Au 20.2875 17.9070 12.9551 521 Au 18.9875 20.9460 18.7094 522 Au 21.1987 19.1059 18.1213 523 Au 22.2155 16.2216 17.6171 524 Au 21.2125 13.5020 18.1423 525 Au 18.8837 11.6054 18.7551 526 Au 16.7922 11.8475 11.4852 527 Au 19.0815 13.5968 10.9081 528 Au 19.7985 16.4980 10.5812 529 Au 18.9653 19.4149 10.9514 530 Au 16.2065 20.9303 19.7334 531 Au 14.0674 21.9350 16.3862 532 Au 20.8045 20.3061 14.3443 533 Au 22.0826 15.9651 13.7612 534 Au 21.3174 13.2448 14.0981 535 Au 17.7805 10.3780 15.2522 536 Au 23.4004 19.6522 15.5183 537 Au 20.6514 10.4478 13.1084 538 Au 13.0707 22.1438 19.1481 539 Au 23.5004 13.9057 15.8921 540 Au 17.2156 9.2546 18.7644 541 Au 22.9413 18.0140 11.8371 542 H 9.1139 22.5220 9.9959 543 H 9.8505 20.2153 10.6568 544 H 11.4184 22.3084 6.4104 545 H 12.2098 20.0047 7.0145 546 H 26.0700 13.3966 20.3880 547 H 23.6125 12.9360 20.5751 548 H 25.7264 16.6221 23.1593 549 H 23.2244 16.4008 23.1478 550 H 17.8022 14.9905 27.3652 551 H 17.4574 13.0677 25.8086 552 H 19.3232 17.3920 24.1839 553 H 19.0108 15.5005 22.5713 554 H 13.2545 12.8385 26.9604 555 H 15.1340 14.0308 25.8119 556 H 10.4137 15.4676 25.1598 557 H 12.2561 16.6678 23.9500 558 H 15.4768 23.2553 25.4080 559 H 16.6169 22.1942 23.4359 560 H 15.0308 19.4385 27.2655 561 H 16.3185 18.3457 25.4146 562 H 19.5417 26.9150 22.0712 563 H 20.2324 24.5182 22.2243 564 H 17.5818 26.1861 18.3518 565 H 18.3019 23.7801 18.4132 566 H 5.6807 18.8233 15.4720 567 H 7.3676 17.0824 14.8250 568 H 5.6043 20.3277 11.4758 569 H 7.1855 18.5300 10.7360 570 H 18.9622 9.0553 24.2754 571 H 20.1496 10.6462 22.7484 572 H 18.6876 6.1520 21.1616 573 H 19.8812 7.6879 19.5910 574 H 11.9294 8.5563 20.6646 575 H 13.1673 10.7363 20.6631 576 H 10.5539 9.2731 24.6415 577 H 11.7417 11.4829 24.6919 578 H 9.6446 18.7505 25.3256 579 H 12.0777 18.8259 24.7228 580 H 8.7322 21.5881 22.2676 581 H 11.1491 21.7069 21.6211 582 H 4.9262 11.9852 13.1577 583 H 6.9855 13.0336 14.1263 584 H 7.2994 9.2059 10.9586 585 H 9.4015 10.1772 11.9348 586 H 8.3193 24.7245 11.7909 587 H 7.9158 22.2956 12.2130 588 H 11.9268 24.5899 14.0569 589 H 11.6019 22.1395 14.4875 590 H 14.1309 8.2252 24.6869 591 H 15.6030 9.9052 23.5683 592 H 14.1712 5.7645 21.2021 593 H 15.5237 7.4827 19.9758 594 H 9.7442 12.6321 22.3667 595 H 10.8376 14.4541 21.0305 596 H 7.4001 15.5120 24.4674 597 H 8.4468 17.3733 23.1727 598 H 16.4904 25.6240 23.6973 599 H 16.9344 24.3870 21.5727 600 H 12.3436 24.6614 23.4664 601 H 12.7174 23.4223 21.3208 602 H 17.4382 27.0379 11.7539 603 H 16.9696 25.9386 13.9558 604 H 21.5473 25.9189 12.0031 605 H 21.1202 24.6960 14.1496 606 H 8.5608 24.2877 15.6553 607 H 10.0681 22.3915 16.2810 608 H 8.1572 25.3916 19.7563 609 H 9.7417 23.5673 20.4407 610 H 7.3564 14.4411 21.2681 611 H 8.8907 15.1836 19.4293 612 H 4.1635 13.9321 18.4841 613 H 5.6856 14.5522 16.5805 614 H 24.6627 8.6826 15.9548 615 H 22.6992 9.6226 14.7248 616 H 27.2453 11.5423 14.1260 617 H 25.3618 12.3934 12.7160 618 H 14.8895 4.5665 16.7793 619 H 15.5356 6.9119 17.3772 620 H 10.7540 5.5405 17.2003 621 H 11.3399 7.8771 17.8934 622 H 6.6476 21.7584 21.3600 623 H 8.0820 19.9047 20.4748 624 H 5.6022 22.9193 17.3939 625 H 6.8668 20.9704 16.4531 626 H 22.6368 8.9687 17.1756 627 H 20.3380 9.7447 16.5454 628 H 21.1345 5.0687 18.0271 629 H 18.8034 5.8050 17.4998 630 H 23.5593 22.5220 21.2938 631 H 22.8227 20.2153 20.6328 632 H 21.2548 22.3084 24.8793 633 H 20.4634 20.0047 24.2752 634 H 6.6032 13.3966 10.9017 635 H 9.0607 12.9360 10.7146 636 H 6.9468 16.6221 8.1303 637 H 9.4488 16.4008 8.1419 638 H 14.8710 14.9905 3.9245 639 H 15.2157 13.0677 5.4810 640 H 13.3499 17.3920 7.1057 641 H 13.6623 15.5005 8.7183 642 H 19.4187 12.8385 4.3293 643 H 17.5392 14.0308 5.4778 644 H 22.2595 15.4676 6.1299 645 H 20.4171 16.6678 7.3397 646 H 17.1964 23.2553 5.8817 647 H 16.0563 22.1942 7.8537 648 H 17.6424 19.4385 4.0241 649 H 16.3547 18.3457 5.8751 650 H 13.1315 26.9150 9.2184 651 H 12.4408 24.5182 9.0653 652 H 15.0914 26.1861 12.9379 653 H 14.3713 23.7801 12.8764 654 H 26.9925 18.8233 15.8177 655 H 25.3056 17.0824 16.4646 656 H 27.0688 20.3277 19.8139 657 H 25.4877 18.5300 20.5537 658 H 13.7110 9.0553 7.0142 659 H 12.5235 10.6462 8.5413 660 H 13.9856 6.1520 10.1281 661 H 12.7920 7.6879 11.6987 662 H 20.7438 8.5564 10.6251 663 H 19.5059 10.7363 10.6266 664 H 22.1192 9.2731 6.6482 665 H 20.9314 11.4829 6.5977 666 H 23.0286 18.7505 5.9640 667 H 20.5954 18.8259 6.5669 668 H 23.9410 21.5881 9.0221 669 H 21.5241 21.7069 9.6685 670 H 27.7470 11.9852 18.1319 671 H 25.6877 13.0336 17.1634 672 H 25.3738 9.2059 20.3311 673 H 23.2716 10.1772 19.3548 674 H 24.3538 24.7245 19.4988 675 H 24.7574 22.2956 19.0767 676 H 20.7464 24.5899 17.2327 677 H 21.0713 22.1395 16.8022 678 H 18.5423 8.2252 6.6027 679 H 17.0702 9.9052 7.7214 680 H 18.5020 5.7645 10.0875 681 H 17.1495 7.4827 11.3139 682 H 22.9290 12.6321 8.9230 683 H 21.8356 14.4541 10.2591 684 H 25.2731 15.5120 6.8223 685 H 24.2264 17.3733 8.1170 686 H 16.1828 25.6240 7.5924 687 H 15.7388 24.3870 9.7170 688 H 20.3296 24.6614 7.8233 689 H 19.9557 23.4223 9.9689 690 H 15.2350 27.0379 19.5357 691 H 15.7036 25.9386 17.3338 692 H 11.1258 25.9189 19.2866 693 H 11.5530 24.6960 17.1401 694 H 24.1124 24.2877 15.6344 695 H 22.6050 22.3915 15.0086 696 H 24.5160 25.3916 11.5334 697 H 22.9315 23.5673 10.8489 698 H 25.3168 14.4411 10.0215 699 H 23.7825 15.1836 11.8603 700 H 28.5097 13.9321 12.8056 701 H 26.9876 14.5522 14.7092 702 H 8.0105 8.6826 15.3349 703 H 9.9739 9.6226 16.5649 704 H 5.4279 11.5423 17.1637 705 H 7.3114 12.3934 18.5737 706 H 17.7836 4.5665 14.5104 707 H 17.1376 6.9119 13.9125 708 H 21.9191 5.5405 14.0893 709 H 21.3333 7.8771 13.3963 710 H 26.0255 21.7584 9.9296 711 H 24.5911 19.9047 10.8148 712 H 27.0710 22.9193 13.8958 713 H 25.8064 20.9704 14.8365 714 H 10.0364 8.9687 14.1141 715 H 12.3352 9.7447 14.7443 716 H 11.5386 5.0687 13.2626 717 H 13.8698 5.8050 13.7899 718 H 8.8058 25.4273 8.2171 719 H 28.9655 16.3170 22.1743 720 H 19.3030 19.1904 27.0579 721 H 10.1009 12.0294 28.0074 722 H 13.4508 21.7860 29.0982 723 H 18.0960 29.7795 20.9877 724 H 3.5997 21.5247 15.2763 725 H 17.1857 4.9556 23.9781 726 H 10.0873 5.8777 21.7743 727 H 5.8967 20.8056 24.0481 728 H 2.9266 10.0811 11.1426 729 H 11.3903 27.7973 12.7604 730 H 12.2353 4.3134 23.6774 731 H 6.6471 12.4283 25.6066 732 H 14.8513 26.8220 26.4746 733 H 19.1579 28.3311 9.0794 734 H 6.4138 27.5720 17.7861 735 H 4.6229 13.2717 22.8901 736 H 27.9338 8.1800 16.9876 737 H 11.1144 2.2205 16.4509 738 H 4.6977 24.5740 21.4389 739 H 24.4400 5.2420 18.8581 740 H 22.7136 25.3502 24.6474 741 H 3.5571 14.9853 10.5262 742 H 13.3701 19.1904 4.2318 743 H 22.5722 12.0294 3.2822 744 H 18.8617 23.5737 2.9139 745 H 15.4922 29.4869 11.9888 746 H 29.0734 21.5247 16.0133 747 H 15.4875 4.9556 7.3116 748 H 22.5859 5.8777 9.5154 749 H 26.3648 19.3652 5.9954 750 H 28.6687 8.7149 21.0278 751 H 21.2829 27.7973 18.5292 752 H 20.3330 5.4193 6.0168 753 H 24.9353 11.0806 6.5665 754 H 19.7214 26.4843 4.9846 755 H 11.6575 27.7770 22.1176 756 H 26.0549 27.0924 15.3738 757 H 28.0503 13.2717 8.3996 758 H 4.7394 8.1800 14.3020 759 H 21.5588 2.2205 14.8388 760 H 28.5657 25.0069 11.6527 761 H 7.4695 6.8861 13.1328 .------------------------------------------------------------------------------. /| | / | | / | | / | | / | | / | | / | | / | | / | | / | H H | / | O O O | / | OC C | / | C H H | / | H C C OH H O | / | H C C O HOHH C | / | O H O C CHC C O | / | H O H C C C H CC H CHCH CHCC C | / | HO O O CC HC H C CH OC | / | H H HH CHCC C H C HCC SC HuC C C C H O | * | HO OC HHO C CSu HC S CO C H H | | | CCC C C S C C S HH C C | | | O CCH CCHS C Au C AuuH H H CC CC H OH H | | H O HC H C C SSAuH AuAu S AuS Cu C C CC H O | | | H CCHC AHCC H H O H C C O | | O C C O H CAC AuS CuuH Au AuH C S C CH O C | | | C OHHSC C C Au H C COHH Au C H H HC CO O H | | | CO C H Cu C AuC HOuu C H AuuC Au HC CCC | | H| C HC OAu CCH Au HCAuCCu S HAu C CHHAu HOCHHO H | | | C H HO S CCC AuC Au H COCu AHuC CC CC CHC C OH | | | HO CC H H Au H AH AuAuC H Auu AuAuSH C O CH C | | |H C CC Su AuAu Au S Au SHS C C | | | CCC AuH Au Au S Au AuAu C Hu Au CH C O | | | C C H C HC Au Au H S CAuC CC OC HO Au CHHC H | | O C| C H SSC C C HAu AuuHu C HC AuuAuC HC COC O | | H H H CAHCC O Au AuuAu H CCCHC CCHH C H | | H O| C C H C HAS H Auu SCS H C C | | H O H O CC C CH S SuC Au H C O | | O |CC OCCCHHCC OC HAuuH Cu C CCAH C C HH C C O | | H C .--HC--CH---CCH-SAH----HH-----O-C--O-------H----CC-----------------------------. | O/ H O H H C S HS HS OH HC HC OOHH / | / C S H Au CCH CCH C H / | / C C C H O C H C / | / H CC HH C CO O O / | / C H H C HC H / | / H H C HO C O / | / O HO H / | / H O O H / | / H / | / / | / / | / / | / / | / / | / / | / / | / / | / / |/ / *------------------------------------------------------------------------------* Unit Cell: Periodic X Y Z Points Spacing -------------------------------------------------------------------- 1. axis: yes 32.000000 0.000000 0.000000 240 0.1333 2. axis: yes 0.000000 32.000000 0.000000 240 0.1333 3. axis: yes 0.000000 0.000000 32.000000 240 0.1333 Grid-points per volume: 421.87 Effective grid-spacing: 0.1333 O-setup: name : Oxygen id : 5f3f27ba17355653aa2069308cb75aea Z : 8 valence: 6 core : 2 charge : 0.0 file : /soft/apps/gpaw-setups-0.5.3574/O.LDA.gz cutoffs: 0.74(comp), 1.30(filt), 0.83(core), lmax=2 valence states: energy radius 2s(2) -23.752 0.741 2p(4) -9.195 0.741 *s 3.459 0.741 *p 18.016 0.741 *d 0.000 0.741 Using partial waves for O as LCAO basis S-setup: name : Sulfur id : 16df0b8f883bfd770ab5c435bc804428 Z : 16 valence: 6 core : 10 charge : 0.0 file : /soft/apps/gpaw-setups-0.5.3574/S.LDA.gz cutoffs: 0.85(comp), 1.49(filt), 1.66(core), lmax=2 valence states: energy radius 3s(2) -17.278 0.847 3p(4) -7.106 0.847 *s 9.933 0.847 *p 20.105 0.847 *d 0.000 0.847 Using partial waves for S as LCAO basis C-setup: name : Carbon id : d60576a1f549371a163e72552ca58787 Z : 6 valence: 4 core : 2 charge : 0.0 file : /soft/apps/gpaw-setups-0.5.3574/C.LDA.gz cutoffs: 0.64(comp), 1.14(filt), 1.14(core), lmax=2 valence states: energy radius 2s(2) -13.639 0.635 2p(2) -5.414 0.635 *s 13.573 0.635 *p 21.797 0.635 *d 0.000 0.635 Using partial waves for C as LCAO basis H-setup: name : Hydrogen id : 4766778ce56282eaa64abeb28b7c1de3 Z : 1 valence: 1 core : 0 charge : 0.0 file : /soft/apps/gpaw-setups-0.5.3574/H.LDA.gz cutoffs: 0.48(comp), 0.85(filt), 0.53(core), lmax=2 valence states: energy radius 1s(1) -6.353 0.476 *s 20.858 0.476 *p 0.000 0.476 Using partial waves for H as LCAO basis Au-setup: name : Gold id : a44207148b704df7bec07bf25e8feca8 Z : 79 valence: 11 core : 68 charge : 0.0 file : /soft/apps/gpaw-setups-0.5.3574/Au.LDA.gz cutoffs: 1.32(comp), 2.33(filt), 2.81(core), lmax=2 valence states: energy radius 6s(1) -6.048 1.323 6p(0) -0.888 1.323 5d(10) -7.129 1.323 *s 21.163 1.323 *p 26.323 1.323 *d 20.082 1.323 Using partial waves for Au as LCAO basis Using the LDA Exchange-Correlation Functional. Spin-Paired Calculation Total Charge: 0.000000 Fermi Temperature: 0.100000 Mode: fd Eigen Solver: (3 nearest neighbors central finite-difference stencil) Diagonalizer: ScaLapack - grid: [nprow, npcol, nb] = [5, 5, 64] Inverse Cholesky: Lapack Poisson Solver: Jacobi (Mehrstellen finite-difference stencil) Interpolation: 6th Order Reference Energy: -53635200.580024 Gamma Point Calculation Total number of cores used: 2048 Using Domain Decomposition: 8 x 8 x 8 Parallelization Over bands on 4 Processors 1 k-point in the Irreducible Part of the Brillouin Zone (total: 1) Linear Mixing Parameter: 0.1 Pulay Mixing with 5 Old Densities Damping of Long Wave Oscillations: 100 Convergence Criteria: Total Energy Change per Atom: 0.001 eV / atom Integral of Absolute Density Change: 0.0001 electrons Integral of Absolute Eigenstate Change: 1e-09 Number of Bands in Calculation: 1728 Bands to Converge: Occupied States Only Number of Valence Electrons: 3366 log10-error: Total Iterations: Time WFS Density Energy Fermi Poisson iter: 1 10:35:21 +0.3 -4415.56673 3 105 iter: 2 10:38:59 -0.4 -5514.20755 5 iter: 3 10:42:38 -0.7 -5682.62208 4 iter: 4 10:46:59 +0.0 -0.9 -3933.90570 7 101 iter: 5 10:51:18 +0.0 -0.9 -3593.83801 15 93 iter: 6 10:55:37 +0.3 -1.0 -3028.82668 24 95 iter: 7 10:59:55 +0.4 -1.1 -2921.02831 26 94 iter: 8 11:04:11 +0.5 -1.1 -2940.70606 9 87 iter: 9 11:08:25 +0.4 -1.2 -3094.01547 12 80 iter: 10 11:12:37 +0.4 -1.3 -3432.21824 6 78 iter: 11 11:16:49 +0.3 -1.3 -3637.38845 3 75 iter: 12 11:20:58 +0.2 -1.3 -4192.08659 7 70 iter: 13 11:25:06 +0.2 -1.2 -4307.12553 5 64 iter: 14 11:29:11 +0.1 -1.2 -4761.83104 3 60 iter: 15 11:33:25 +0.1 -1.2 -4388.01429 7 82 iter: 16 11:37:38 -0.1 -1.1 -4787.14776 7 79 iter: 17 11:41:52 -0.1 -1.1 -4643.96001 8 82 iter: 18 11:46:10 -0.0 -1.1 -5683.98847 15 90 iter: 19 11:50:23 -0.0 -1.1 -5193.97274 4 79 iter: 20 11:54:42 -0.2 -1.0 -4644.38145 17 95 Memory usage: 1.25 GB ============================================================ Timing: incl. excl. ============================================================ Initialization: 217.912 25.574 0.5% | Hamiltonian: 43.437 0.001 0.0% | Atomic: 0.000 0.000 0.0% | Communicate energies: 2.644 2.644 0.0% | Hartree integrate/restrict: 0.034 0.034 0.0% | Initialize Hamiltonian: 0.036 0.036 0.0% | Poisson: 39.808 39.808 0.7% | XC 3D grid: 0.907 0.907 0.0% | vbar: 0.006 0.006 0.0% | LCAO initialization: 148.901 8.141 0.2% | LCAO eigensolver: 22.379 0.015 0.0% | Atomic Hamiltonian: 0.000 0.000 0.0% | Blacs Orbital Layouts: 12.862 0.002 0.0% | General diagonalize: 12.755 12.755 0.2% | Redistribute coefs: 0.087 0.087 0.0% | Send coefs to domains: 0.019 0.019 0.0% | Calculate projections: 0.000 0.000 0.0% | Distribute overlap matrix: 9.486 2.067 0.0% | Distribute overlap matrix: 7.419 7.419 0.1% | Potential matrix: 0.015 0.015 0.0% | LCAO to grid: 0.077 0.077 0.0% | Set positions (LCAO WFS): 118.305 8.871 0.2% | Basic WFS set positions: 0.002 0.002 0.0% | Basis functions set positions: 0.133 0.133 0.0% | Distribute overlap matrix: 2.234 2.234 0.0% | TCI: Calculate S, T, P: 107.064 107.064 2.0% || SCF-cycle: 5147.785 6.595 0.1% | Density: 10.245 0.004 0.0% | Atomic density matrices: 0.002 0.002 0.0% | Mix: 1.195 1.195 0.0% | Multipole moments: 2.767 2.767 0.1% | Pseudo density: 6.278 6.278 0.1% | Hamiltonian: 633.099 0.010 0.0% | Atomic: 0.003 0.003 0.0% | Communicate energies: 47.612 47.612 0.9% | Hartree integrate/restrict: 0.623 0.623 0.0% | Poisson: 568.398 568.398 10.6% |---| XC 3D grid: 16.342 16.342 0.3% | vbar: 0.111 0.111 0.0% | Orthonormalize: 1257.023 0.010 0.0% | Blacs Band Layouts: 7.574 0.002 0.0% | Inverse Cholesky: 7.572 7.572 0.1% | calc_matrix: 910.340 910.340 17.0% |------| rotate_psi: 339.099 339.099 6.3% |--| RMM-DIIS: 2428.998 1165.985 21.7% |--------| precondition: 1263.013 1263.013 23.5% |--------| Subspace diag: 811.825 0.008 0.0% | Blacs Band Layouts: 75.024 0.004 0.0% | Diagonalize: 75.016 75.016 1.4% || Distribute results: 0.004 0.004 0.0% | calc_matrix: 413.991 413.991 7.7% |--| rotate_psi: 322.801 322.801 6.0% |-| Other: 0.301 0.301 0.0% | ============================================================ Total: 5365.997 100.0% ============================================================ date: Wed Mar 31 11:55:21 2010 gpaw-24.1.0/doc/devel/Au_cluster/BGMAP_band_4x8x16x8000066400000000000000000001030001454550013000215540ustar00rootroot000000000000000 0 0 0 0 0 1 0 0 0 2 0 0 0 3 0 0 0 4 0 0 0 5 0 0 0 6 0 0 0 7 0 0 0 8 0 0 0 9 0 0 0 10 0 0 0 11 0 0 0 12 0 0 0 13 0 0 0 14 0 0 0 15 0 0 1 0 0 0 1 1 0 0 1 2 0 0 1 3 0 0 1 4 0 0 1 5 0 0 1 6 0 0 1 7 0 0 1 8 0 0 1 9 0 0 1 10 0 0 1 11 0 0 1 12 0 0 1 13 0 0 1 14 0 0 1 15 0 0 2 0 0 0 2 1 0 0 2 2 0 0 2 3 0 0 2 4 0 0 2 5 0 0 2 6 0 0 2 7 0 0 2 8 0 0 2 9 0 0 2 10 0 0 2 11 0 0 2 12 0 0 2 13 0 0 2 14 0 0 2 15 0 0 3 0 0 0 3 1 0 0 3 2 0 0 3 3 0 0 3 4 0 0 3 5 0 0 3 6 0 0 3 7 0 0 3 8 0 0 3 9 0 0 3 10 0 0 3 11 0 0 3 12 0 0 3 13 0 0 3 14 0 0 3 15 0 0 4 0 0 0 4 1 0 0 4 2 0 0 4 3 0 0 4 4 0 0 4 5 0 0 4 6 0 0 4 7 0 0 4 8 0 0 4 9 0 0 4 10 0 0 4 11 0 0 4 12 0 0 4 13 0 0 4 14 0 0 4 15 0 0 5 0 0 0 5 1 0 0 5 2 0 0 5 3 0 0 5 4 0 0 5 5 0 0 5 6 0 0 5 7 0 0 5 8 0 0 5 9 0 0 5 10 0 0 5 11 0 0 5 12 0 0 5 13 0 0 5 14 0 0 5 15 0 0 6 0 0 0 6 1 0 0 6 2 0 0 6 3 0 0 6 4 0 0 6 5 0 0 6 6 0 0 6 7 0 0 6 8 0 0 6 9 0 0 6 10 0 0 6 11 0 0 6 12 0 0 6 13 0 0 6 14 0 0 6 15 0 0 7 0 0 0 7 1 0 0 7 2 0 0 7 3 0 0 7 4 0 0 7 5 0 0 7 6 0 0 7 7 0 0 7 8 0 0 7 9 0 0 7 10 0 0 7 11 0 0 7 12 0 0 7 13 0 0 7 14 0 0 7 15 0 0 0 0 1 0 0 1 1 0 0 2 1 0 0 3 1 0 0 4 1 0 0 5 1 0 0 6 1 0 0 7 1 0 0 8 1 0 0 9 1 0 0 10 1 0 0 11 1 0 0 12 1 0 0 13 1 0 0 14 1 0 0 15 1 0 1 0 1 0 1 1 1 0 1 2 1 0 1 3 1 0 1 4 1 0 1 5 1 0 1 6 1 0 1 7 1 0 1 8 1 0 1 9 1 0 1 10 1 0 1 11 1 0 1 12 1 0 1 13 1 0 1 14 1 0 1 15 1 0 2 0 1 0 2 1 1 0 2 2 1 0 2 3 1 0 2 4 1 0 2 5 1 0 2 6 1 0 2 7 1 0 2 8 1 0 2 9 1 0 2 10 1 0 2 11 1 0 2 12 1 0 2 13 1 0 2 14 1 0 2 15 1 0 3 0 1 0 3 1 1 0 3 2 1 0 3 3 1 0 3 4 1 0 3 5 1 0 3 6 1 0 3 7 1 0 3 8 1 0 3 9 1 0 3 10 1 0 3 11 1 0 3 12 1 0 3 13 1 0 3 14 1 0 3 15 1 0 4 0 1 0 4 1 1 0 4 2 1 0 4 3 1 0 4 4 1 0 4 5 1 0 4 6 1 0 4 7 1 0 4 8 1 0 4 9 1 0 4 10 1 0 4 11 1 0 4 12 1 0 4 13 1 0 4 14 1 0 4 15 1 0 5 0 1 0 5 1 1 0 5 2 1 0 5 3 1 0 5 4 1 0 5 5 1 0 5 6 1 0 5 7 1 0 5 8 1 0 5 9 1 0 5 10 1 0 5 11 1 0 5 12 1 0 5 13 1 0 5 14 1 0 5 15 1 0 6 0 1 0 6 1 1 0 6 2 1 0 6 3 1 0 6 4 1 0 6 5 1 0 6 6 1 0 6 7 1 0 6 8 1 0 6 9 1 0 6 10 1 0 6 11 1 0 6 12 1 0 6 13 1 0 6 14 1 0 6 15 1 0 7 0 1 0 7 1 1 0 7 2 1 0 7 3 1 0 7 4 1 0 7 5 1 0 7 6 1 0 7 7 1 0 7 8 1 0 7 9 1 0 7 10 1 0 7 11 1 0 7 12 1 0 7 13 1 0 7 14 1 0 7 15 1 0 0 0 2 0 0 1 2 0 0 2 2 0 0 3 2 0 0 4 2 0 0 5 2 0 0 6 2 0 0 7 2 0 0 8 2 0 0 9 2 0 0 10 2 0 0 11 2 0 0 12 2 0 0 13 2 0 0 14 2 0 0 15 2 0 1 0 2 0 1 1 2 0 1 2 2 0 1 3 2 0 1 4 2 0 1 5 2 0 1 6 2 0 1 7 2 0 1 8 2 0 1 9 2 0 1 10 2 0 1 11 2 0 1 12 2 0 1 13 2 0 1 14 2 0 1 15 2 0 2 0 2 0 2 1 2 0 2 2 2 0 2 3 2 0 2 4 2 0 2 5 2 0 2 6 2 0 2 7 2 0 2 8 2 0 2 9 2 0 2 10 2 0 2 11 2 0 2 12 2 0 2 13 2 0 2 14 2 0 2 15 2 0 3 0 2 0 3 1 2 0 3 2 2 0 3 3 2 0 3 4 2 0 3 5 2 0 3 6 2 0 3 7 2 0 3 8 2 0 3 9 2 0 3 10 2 0 3 11 2 0 3 12 2 0 3 13 2 0 3 14 2 0 3 15 2 0 4 0 2 0 4 1 2 0 4 2 2 0 4 3 2 0 4 4 2 0 4 5 2 0 4 6 2 0 4 7 2 0 4 8 2 0 4 9 2 0 4 10 2 0 4 11 2 0 4 12 2 0 4 13 2 0 4 14 2 0 4 15 2 0 5 0 2 0 5 1 2 0 5 2 2 0 5 3 2 0 5 4 2 0 5 5 2 0 5 6 2 0 5 7 2 0 5 8 2 0 5 9 2 0 5 10 2 0 5 11 2 0 5 12 2 0 5 13 2 0 5 14 2 0 5 15 2 0 6 0 2 0 6 1 2 0 6 2 2 0 6 3 2 0 6 4 2 0 6 5 2 0 6 6 2 0 6 7 2 0 6 8 2 0 6 9 2 0 6 10 2 0 6 11 2 0 6 12 2 0 6 13 2 0 6 14 2 0 6 15 2 0 7 0 2 0 7 1 2 0 7 2 2 0 7 3 2 0 7 4 2 0 7 5 2 0 7 6 2 0 7 7 2 0 7 8 2 0 7 9 2 0 7 10 2 0 7 11 2 0 7 12 2 0 7 13 2 0 7 14 2 0 7 15 2 0 0 0 3 0 0 1 3 0 0 2 3 0 0 3 3 0 0 4 3 0 0 5 3 0 0 6 3 0 0 7 3 0 0 8 3 0 0 9 3 0 0 10 3 0 0 11 3 0 0 12 3 0 0 13 3 0 0 14 3 0 0 15 3 0 1 0 3 0 1 1 3 0 1 2 3 0 1 3 3 0 1 4 3 0 1 5 3 0 1 6 3 0 1 7 3 0 1 8 3 0 1 9 3 0 1 10 3 0 1 11 3 0 1 12 3 0 1 13 3 0 1 14 3 0 1 15 3 0 2 0 3 0 2 1 3 0 2 2 3 0 2 3 3 0 2 4 3 0 2 5 3 0 2 6 3 0 2 7 3 0 2 8 3 0 2 9 3 0 2 10 3 0 2 11 3 0 2 12 3 0 2 13 3 0 2 14 3 0 2 15 3 0 3 0 3 0 3 1 3 0 3 2 3 0 3 3 3 0 3 4 3 0 3 5 3 0 3 6 3 0 3 7 3 0 3 8 3 0 3 9 3 0 3 10 3 0 3 11 3 0 3 12 3 0 3 13 3 0 3 14 3 0 3 15 3 0 4 0 3 0 4 1 3 0 4 2 3 0 4 3 3 0 4 4 3 0 4 5 3 0 4 6 3 0 4 7 3 0 4 8 3 0 4 9 3 0 4 10 3 0 4 11 3 0 4 12 3 0 4 13 3 0 4 14 3 0 4 15 3 0 5 0 3 0 5 1 3 0 5 2 3 0 5 3 3 0 5 4 3 0 5 5 3 0 5 6 3 0 5 7 3 0 5 8 3 0 5 9 3 0 5 10 3 0 5 11 3 0 5 12 3 0 5 13 3 0 5 14 3 0 5 15 3 0 6 0 3 0 6 1 3 0 6 2 3 0 6 3 3 0 6 4 3 0 6 5 3 0 6 6 3 0 6 7 3 0 6 8 3 0 6 9 3 0 6 10 3 0 6 11 3 0 6 12 3 0 6 13 3 0 6 14 3 0 6 15 3 0 7 0 3 0 7 1 3 0 7 2 3 0 7 3 3 0 7 4 3 0 7 5 3 0 7 6 3 0 7 7 3 0 7 8 3 0 7 9 3 0 7 10 3 0 7 11 3 0 7 12 3 0 7 13 3 0 7 14 3 0 7 15 3 1 0 0 0 1 0 1 0 1 0 2 0 1 0 3 0 1 0 4 0 1 0 5 0 1 0 6 0 1 0 7 0 1 0 8 0 1 0 9 0 1 0 10 0 1 0 11 0 1 0 12 0 1 0 13 0 1 0 14 0 1 0 15 0 1 1 0 0 1 1 1 0 1 1 2 0 1 1 3 0 1 1 4 0 1 1 5 0 1 1 6 0 1 1 7 0 1 1 8 0 1 1 9 0 1 1 10 0 1 1 11 0 1 1 12 0 1 1 13 0 1 1 14 0 1 1 15 0 1 2 0 0 1 2 1 0 1 2 2 0 1 2 3 0 1 2 4 0 1 2 5 0 1 2 6 0 1 2 7 0 1 2 8 0 1 2 9 0 1 2 10 0 1 2 11 0 1 2 12 0 1 2 13 0 1 2 14 0 1 2 15 0 1 3 0 0 1 3 1 0 1 3 2 0 1 3 3 0 1 3 4 0 1 3 5 0 1 3 6 0 1 3 7 0 1 3 8 0 1 3 9 0 1 3 10 0 1 3 11 0 1 3 12 0 1 3 13 0 1 3 14 0 1 3 15 0 1 4 0 0 1 4 1 0 1 4 2 0 1 4 3 0 1 4 4 0 1 4 5 0 1 4 6 0 1 4 7 0 1 4 8 0 1 4 9 0 1 4 10 0 1 4 11 0 1 4 12 0 1 4 13 0 1 4 14 0 1 4 15 0 1 5 0 0 1 5 1 0 1 5 2 0 1 5 3 0 1 5 4 0 1 5 5 0 1 5 6 0 1 5 7 0 1 5 8 0 1 5 9 0 1 5 10 0 1 5 11 0 1 5 12 0 1 5 13 0 1 5 14 0 1 5 15 0 1 6 0 0 1 6 1 0 1 6 2 0 1 6 3 0 1 6 4 0 1 6 5 0 1 6 6 0 1 6 7 0 1 6 8 0 1 6 9 0 1 6 10 0 1 6 11 0 1 6 12 0 1 6 13 0 1 6 14 0 1 6 15 0 1 7 0 0 1 7 1 0 1 7 2 0 1 7 3 0 1 7 4 0 1 7 5 0 1 7 6 0 1 7 7 0 1 7 8 0 1 7 9 0 1 7 10 0 1 7 11 0 1 7 12 0 1 7 13 0 1 7 14 0 1 7 15 0 1 0 0 1 1 0 1 1 1 0 2 1 1 0 3 1 1 0 4 1 1 0 5 1 1 0 6 1 1 0 7 1 1 0 8 1 1 0 9 1 1 0 10 1 1 0 11 1 1 0 12 1 1 0 13 1 1 0 14 1 1 0 15 1 1 1 0 1 1 1 1 1 1 1 2 1 1 1 3 1 1 1 4 1 1 1 5 1 1 1 6 1 1 1 7 1 1 1 8 1 1 1 9 1 1 1 10 1 1 1 11 1 1 1 12 1 1 1 13 1 1 1 14 1 1 1 15 1 1 2 0 1 1 2 1 1 1 2 2 1 1 2 3 1 1 2 4 1 1 2 5 1 1 2 6 1 1 2 7 1 1 2 8 1 1 2 9 1 1 2 10 1 1 2 11 1 1 2 12 1 1 2 13 1 1 2 14 1 1 2 15 1 1 3 0 1 1 3 1 1 1 3 2 1 1 3 3 1 1 3 4 1 1 3 5 1 1 3 6 1 1 3 7 1 1 3 8 1 1 3 9 1 1 3 10 1 1 3 11 1 1 3 12 1 1 3 13 1 1 3 14 1 1 3 15 1 1 4 0 1 1 4 1 1 1 4 2 1 1 4 3 1 1 4 4 1 1 4 5 1 1 4 6 1 1 4 7 1 1 4 8 1 1 4 9 1 1 4 10 1 1 4 11 1 1 4 12 1 1 4 13 1 1 4 14 1 1 4 15 1 1 5 0 1 1 5 1 1 1 5 2 1 1 5 3 1 1 5 4 1 1 5 5 1 1 5 6 1 1 5 7 1 1 5 8 1 1 5 9 1 1 5 10 1 1 5 11 1 1 5 12 1 1 5 13 1 1 5 14 1 1 5 15 1 1 6 0 1 1 6 1 1 1 6 2 1 1 6 3 1 1 6 4 1 1 6 5 1 1 6 6 1 1 6 7 1 1 6 8 1 1 6 9 1 1 6 10 1 1 6 11 1 1 6 12 1 1 6 13 1 1 6 14 1 1 6 15 1 1 7 0 1 1 7 1 1 1 7 2 1 1 7 3 1 1 7 4 1 1 7 5 1 1 7 6 1 1 7 7 1 1 7 8 1 1 7 9 1 1 7 10 1 1 7 11 1 1 7 12 1 1 7 13 1 1 7 14 1 1 7 15 1 1 0 0 2 1 0 1 2 1 0 2 2 1 0 3 2 1 0 4 2 1 0 5 2 1 0 6 2 1 0 7 2 1 0 8 2 1 0 9 2 1 0 10 2 1 0 11 2 1 0 12 2 1 0 13 2 1 0 14 2 1 0 15 2 1 1 0 2 1 1 1 2 1 1 2 2 1 1 3 2 1 1 4 2 1 1 5 2 1 1 6 2 1 1 7 2 1 1 8 2 1 1 9 2 1 1 10 2 1 1 11 2 1 1 12 2 1 1 13 2 1 1 14 2 1 1 15 2 1 2 0 2 1 2 1 2 1 2 2 2 1 2 3 2 1 2 4 2 1 2 5 2 1 2 6 2 1 2 7 2 1 2 8 2 1 2 9 2 1 2 10 2 1 2 11 2 1 2 12 2 1 2 13 2 1 2 14 2 1 2 15 2 1 3 0 2 1 3 1 2 1 3 2 2 1 3 3 2 1 3 4 2 1 3 5 2 1 3 6 2 1 3 7 2 1 3 8 2 1 3 9 2 1 3 10 2 1 3 11 2 1 3 12 2 1 3 13 2 1 3 14 2 1 3 15 2 1 4 0 2 1 4 1 2 1 4 2 2 1 4 3 2 1 4 4 2 1 4 5 2 1 4 6 2 1 4 7 2 1 4 8 2 1 4 9 2 1 4 10 2 1 4 11 2 1 4 12 2 1 4 13 2 1 4 14 2 1 4 15 2 1 5 0 2 1 5 1 2 1 5 2 2 1 5 3 2 1 5 4 2 1 5 5 2 1 5 6 2 1 5 7 2 1 5 8 2 1 5 9 2 1 5 10 2 1 5 11 2 1 5 12 2 1 5 13 2 1 5 14 2 1 5 15 2 1 6 0 2 1 6 1 2 1 6 2 2 1 6 3 2 1 6 4 2 1 6 5 2 1 6 6 2 1 6 7 2 1 6 8 2 1 6 9 2 1 6 10 2 1 6 11 2 1 6 12 2 1 6 13 2 1 6 14 2 1 6 15 2 1 7 0 2 1 7 1 2 1 7 2 2 1 7 3 2 1 7 4 2 1 7 5 2 1 7 6 2 1 7 7 2 1 7 8 2 1 7 9 2 1 7 10 2 1 7 11 2 1 7 12 2 1 7 13 2 1 7 14 2 1 7 15 2 1 0 0 3 1 0 1 3 1 0 2 3 1 0 3 3 1 0 4 3 1 0 5 3 1 0 6 3 1 0 7 3 1 0 8 3 1 0 9 3 1 0 10 3 1 0 11 3 1 0 12 3 1 0 13 3 1 0 14 3 1 0 15 3 1 1 0 3 1 1 1 3 1 1 2 3 1 1 3 3 1 1 4 3 1 1 5 3 1 1 6 3 1 1 7 3 1 1 8 3 1 1 9 3 1 1 10 3 1 1 11 3 1 1 12 3 1 1 13 3 1 1 14 3 1 1 15 3 1 2 0 3 1 2 1 3 1 2 2 3 1 2 3 3 1 2 4 3 1 2 5 3 1 2 6 3 1 2 7 3 1 2 8 3 1 2 9 3 1 2 10 3 1 2 11 3 1 2 12 3 1 2 13 3 1 2 14 3 1 2 15 3 1 3 0 3 1 3 1 3 1 3 2 3 1 3 3 3 1 3 4 3 1 3 5 3 1 3 6 3 1 3 7 3 1 3 8 3 1 3 9 3 1 3 10 3 1 3 11 3 1 3 12 3 1 3 13 3 1 3 14 3 1 3 15 3 1 4 0 3 1 4 1 3 1 4 2 3 1 4 3 3 1 4 4 3 1 4 5 3 1 4 6 3 1 4 7 3 1 4 8 3 1 4 9 3 1 4 10 3 1 4 11 3 1 4 12 3 1 4 13 3 1 4 14 3 1 4 15 3 1 5 0 3 1 5 1 3 1 5 2 3 1 5 3 3 1 5 4 3 1 5 5 3 1 5 6 3 1 5 7 3 1 5 8 3 1 5 9 3 1 5 10 3 1 5 11 3 1 5 12 3 1 5 13 3 1 5 14 3 1 5 15 3 1 6 0 3 1 6 1 3 1 6 2 3 1 6 3 3 1 6 4 3 1 6 5 3 1 6 6 3 1 6 7 3 1 6 8 3 1 6 9 3 1 6 10 3 1 6 11 3 1 6 12 3 1 6 13 3 1 6 14 3 1 6 15 3 1 7 0 3 1 7 1 3 1 7 2 3 1 7 3 3 1 7 4 3 1 7 5 3 1 7 6 3 1 7 7 3 1 7 8 3 1 7 9 3 1 7 10 3 1 7 11 3 1 7 12 3 1 7 13 3 1 7 14 3 1 7 15 3 2 0 0 0 2 0 1 0 2 0 2 0 2 0 3 0 2 0 4 0 2 0 5 0 2 0 6 0 2 0 7 0 2 0 8 0 2 0 9 0 2 0 10 0 2 0 11 0 2 0 12 0 2 0 13 0 2 0 14 0 2 0 15 0 2 1 0 0 2 1 1 0 2 1 2 0 2 1 3 0 2 1 4 0 2 1 5 0 2 1 6 0 2 1 7 0 2 1 8 0 2 1 9 0 2 1 10 0 2 1 11 0 2 1 12 0 2 1 13 0 2 1 14 0 2 1 15 0 2 2 0 0 2 2 1 0 2 2 2 0 2 2 3 0 2 2 4 0 2 2 5 0 2 2 6 0 2 2 7 0 2 2 8 0 2 2 9 0 2 2 10 0 2 2 11 0 2 2 12 0 2 2 13 0 2 2 14 0 2 2 15 0 2 3 0 0 2 3 1 0 2 3 2 0 2 3 3 0 2 3 4 0 2 3 5 0 2 3 6 0 2 3 7 0 2 3 8 0 2 3 9 0 2 3 10 0 2 3 11 0 2 3 12 0 2 3 13 0 2 3 14 0 2 3 15 0 2 4 0 0 2 4 1 0 2 4 2 0 2 4 3 0 2 4 4 0 2 4 5 0 2 4 6 0 2 4 7 0 2 4 8 0 2 4 9 0 2 4 10 0 2 4 11 0 2 4 12 0 2 4 13 0 2 4 14 0 2 4 15 0 2 5 0 0 2 5 1 0 2 5 2 0 2 5 3 0 2 5 4 0 2 5 5 0 2 5 6 0 2 5 7 0 2 5 8 0 2 5 9 0 2 5 10 0 2 5 11 0 2 5 12 0 2 5 13 0 2 5 14 0 2 5 15 0 2 6 0 0 2 6 1 0 2 6 2 0 2 6 3 0 2 6 4 0 2 6 5 0 2 6 6 0 2 6 7 0 2 6 8 0 2 6 9 0 2 6 10 0 2 6 11 0 2 6 12 0 2 6 13 0 2 6 14 0 2 6 15 0 2 7 0 0 2 7 1 0 2 7 2 0 2 7 3 0 2 7 4 0 2 7 5 0 2 7 6 0 2 7 7 0 2 7 8 0 2 7 9 0 2 7 10 0 2 7 11 0 2 7 12 0 2 7 13 0 2 7 14 0 2 7 15 0 2 0 0 1 2 0 1 1 2 0 2 1 2 0 3 1 2 0 4 1 2 0 5 1 2 0 6 1 2 0 7 1 2 0 8 1 2 0 9 1 2 0 10 1 2 0 11 1 2 0 12 1 2 0 13 1 2 0 14 1 2 0 15 1 2 1 0 1 2 1 1 1 2 1 2 1 2 1 3 1 2 1 4 1 2 1 5 1 2 1 6 1 2 1 7 1 2 1 8 1 2 1 9 1 2 1 10 1 2 1 11 1 2 1 12 1 2 1 13 1 2 1 14 1 2 1 15 1 2 2 0 1 2 2 1 1 2 2 2 1 2 2 3 1 2 2 4 1 2 2 5 1 2 2 6 1 2 2 7 1 2 2 8 1 2 2 9 1 2 2 10 1 2 2 11 1 2 2 12 1 2 2 13 1 2 2 14 1 2 2 15 1 2 3 0 1 2 3 1 1 2 3 2 1 2 3 3 1 2 3 4 1 2 3 5 1 2 3 6 1 2 3 7 1 2 3 8 1 2 3 9 1 2 3 10 1 2 3 11 1 2 3 12 1 2 3 13 1 2 3 14 1 2 3 15 1 2 4 0 1 2 4 1 1 2 4 2 1 2 4 3 1 2 4 4 1 2 4 5 1 2 4 6 1 2 4 7 1 2 4 8 1 2 4 9 1 2 4 10 1 2 4 11 1 2 4 12 1 2 4 13 1 2 4 14 1 2 4 15 1 2 5 0 1 2 5 1 1 2 5 2 1 2 5 3 1 2 5 4 1 2 5 5 1 2 5 6 1 2 5 7 1 2 5 8 1 2 5 9 1 2 5 10 1 2 5 11 1 2 5 12 1 2 5 13 1 2 5 14 1 2 5 15 1 2 6 0 1 2 6 1 1 2 6 2 1 2 6 3 1 2 6 4 1 2 6 5 1 2 6 6 1 2 6 7 1 2 6 8 1 2 6 9 1 2 6 10 1 2 6 11 1 2 6 12 1 2 6 13 1 2 6 14 1 2 6 15 1 2 7 0 1 2 7 1 1 2 7 2 1 2 7 3 1 2 7 4 1 2 7 5 1 2 7 6 1 2 7 7 1 2 7 8 1 2 7 9 1 2 7 10 1 2 7 11 1 2 7 12 1 2 7 13 1 2 7 14 1 2 7 15 1 2 0 0 2 2 0 1 2 2 0 2 2 2 0 3 2 2 0 4 2 2 0 5 2 2 0 6 2 2 0 7 2 2 0 8 2 2 0 9 2 2 0 10 2 2 0 11 2 2 0 12 2 2 0 13 2 2 0 14 2 2 0 15 2 2 1 0 2 2 1 1 2 2 1 2 2 2 1 3 2 2 1 4 2 2 1 5 2 2 1 6 2 2 1 7 2 2 1 8 2 2 1 9 2 2 1 10 2 2 1 11 2 2 1 12 2 2 1 13 2 2 1 14 2 2 1 15 2 2 2 0 2 2 2 1 2 2 2 2 2 2 2 3 2 2 2 4 2 2 2 5 2 2 2 6 2 2 2 7 2 2 2 8 2 2 2 9 2 2 2 10 2 2 2 11 2 2 2 12 2 2 2 13 2 2 2 14 2 2 2 15 2 2 3 0 2 2 3 1 2 2 3 2 2 2 3 3 2 2 3 4 2 2 3 5 2 2 3 6 2 2 3 7 2 2 3 8 2 2 3 9 2 2 3 10 2 2 3 11 2 2 3 12 2 2 3 13 2 2 3 14 2 2 3 15 2 2 4 0 2 2 4 1 2 2 4 2 2 2 4 3 2 2 4 4 2 2 4 5 2 2 4 6 2 2 4 7 2 2 4 8 2 2 4 9 2 2 4 10 2 2 4 11 2 2 4 12 2 2 4 13 2 2 4 14 2 2 4 15 2 2 5 0 2 2 5 1 2 2 5 2 2 2 5 3 2 2 5 4 2 2 5 5 2 2 5 6 2 2 5 7 2 2 5 8 2 2 5 9 2 2 5 10 2 2 5 11 2 2 5 12 2 2 5 13 2 2 5 14 2 2 5 15 2 2 6 0 2 2 6 1 2 2 6 2 2 2 6 3 2 2 6 4 2 2 6 5 2 2 6 6 2 2 6 7 2 2 6 8 2 2 6 9 2 2 6 10 2 2 6 11 2 2 6 12 2 2 6 13 2 2 6 14 2 2 6 15 2 2 7 0 2 2 7 1 2 2 7 2 2 2 7 3 2 2 7 4 2 2 7 5 2 2 7 6 2 2 7 7 2 2 7 8 2 2 7 9 2 2 7 10 2 2 7 11 2 2 7 12 2 2 7 13 2 2 7 14 2 2 7 15 2 2 0 0 3 2 0 1 3 2 0 2 3 2 0 3 3 2 0 4 3 2 0 5 3 2 0 6 3 2 0 7 3 2 0 8 3 2 0 9 3 2 0 10 3 2 0 11 3 2 0 12 3 2 0 13 3 2 0 14 3 2 0 15 3 2 1 0 3 2 1 1 3 2 1 2 3 2 1 3 3 2 1 4 3 2 1 5 3 2 1 6 3 2 1 7 3 2 1 8 3 2 1 9 3 2 1 10 3 2 1 11 3 2 1 12 3 2 1 13 3 2 1 14 3 2 1 15 3 2 2 0 3 2 2 1 3 2 2 2 3 2 2 3 3 2 2 4 3 2 2 5 3 2 2 6 3 2 2 7 3 2 2 8 3 2 2 9 3 2 2 10 3 2 2 11 3 2 2 12 3 2 2 13 3 2 2 14 3 2 2 15 3 2 3 0 3 2 3 1 3 2 3 2 3 2 3 3 3 2 3 4 3 2 3 5 3 2 3 6 3 2 3 7 3 2 3 8 3 2 3 9 3 2 3 10 3 2 3 11 3 2 3 12 3 2 3 13 3 2 3 14 3 2 3 15 3 2 4 0 3 2 4 1 3 2 4 2 3 2 4 3 3 2 4 4 3 2 4 5 3 2 4 6 3 2 4 7 3 2 4 8 3 2 4 9 3 2 4 10 3 2 4 11 3 2 4 12 3 2 4 13 3 2 4 14 3 2 4 15 3 2 5 0 3 2 5 1 3 2 5 2 3 2 5 3 3 2 5 4 3 2 5 5 3 2 5 6 3 2 5 7 3 2 5 8 3 2 5 9 3 2 5 10 3 2 5 11 3 2 5 12 3 2 5 13 3 2 5 14 3 2 5 15 3 2 6 0 3 2 6 1 3 2 6 2 3 2 6 3 3 2 6 4 3 2 6 5 3 2 6 6 3 2 6 7 3 2 6 8 3 2 6 9 3 2 6 10 3 2 6 11 3 2 6 12 3 2 6 13 3 2 6 14 3 2 6 15 3 2 7 0 3 2 7 1 3 2 7 2 3 2 7 3 3 2 7 4 3 2 7 5 3 2 7 6 3 2 7 7 3 2 7 8 3 2 7 9 3 2 7 10 3 2 7 11 3 2 7 12 3 2 7 13 3 2 7 14 3 2 7 15 3 3 0 0 0 3 0 1 0 3 0 2 0 3 0 3 0 3 0 4 0 3 0 5 0 3 0 6 0 3 0 7 0 3 0 8 0 3 0 9 0 3 0 10 0 3 0 11 0 3 0 12 0 3 0 13 0 3 0 14 0 3 0 15 0 3 1 0 0 3 1 1 0 3 1 2 0 3 1 3 0 3 1 4 0 3 1 5 0 3 1 6 0 3 1 7 0 3 1 8 0 3 1 9 0 3 1 10 0 3 1 11 0 3 1 12 0 3 1 13 0 3 1 14 0 3 1 15 0 3 2 0 0 3 2 1 0 3 2 2 0 3 2 3 0 3 2 4 0 3 2 5 0 3 2 6 0 3 2 7 0 3 2 8 0 3 2 9 0 3 2 10 0 3 2 11 0 3 2 12 0 3 2 13 0 3 2 14 0 3 2 15 0 3 3 0 0 3 3 1 0 3 3 2 0 3 3 3 0 3 3 4 0 3 3 5 0 3 3 6 0 3 3 7 0 3 3 8 0 3 3 9 0 3 3 10 0 3 3 11 0 3 3 12 0 3 3 13 0 3 3 14 0 3 3 15 0 3 4 0 0 3 4 1 0 3 4 2 0 3 4 3 0 3 4 4 0 3 4 5 0 3 4 6 0 3 4 7 0 3 4 8 0 3 4 9 0 3 4 10 0 3 4 11 0 3 4 12 0 3 4 13 0 3 4 14 0 3 4 15 0 3 5 0 0 3 5 1 0 3 5 2 0 3 5 3 0 3 5 4 0 3 5 5 0 3 5 6 0 3 5 7 0 3 5 8 0 3 5 9 0 3 5 10 0 3 5 11 0 3 5 12 0 3 5 13 0 3 5 14 0 3 5 15 0 3 6 0 0 3 6 1 0 3 6 2 0 3 6 3 0 3 6 4 0 3 6 5 0 3 6 6 0 3 6 7 0 3 6 8 0 3 6 9 0 3 6 10 0 3 6 11 0 3 6 12 0 3 6 13 0 3 6 14 0 3 6 15 0 3 7 0 0 3 7 1 0 3 7 2 0 3 7 3 0 3 7 4 0 3 7 5 0 3 7 6 0 3 7 7 0 3 7 8 0 3 7 9 0 3 7 10 0 3 7 11 0 3 7 12 0 3 7 13 0 3 7 14 0 3 7 15 0 3 0 0 1 3 0 1 1 3 0 2 1 3 0 3 1 3 0 4 1 3 0 5 1 3 0 6 1 3 0 7 1 3 0 8 1 3 0 9 1 3 0 10 1 3 0 11 1 3 0 12 1 3 0 13 1 3 0 14 1 3 0 15 1 3 1 0 1 3 1 1 1 3 1 2 1 3 1 3 1 3 1 4 1 3 1 5 1 3 1 6 1 3 1 7 1 3 1 8 1 3 1 9 1 3 1 10 1 3 1 11 1 3 1 12 1 3 1 13 1 3 1 14 1 3 1 15 1 3 2 0 1 3 2 1 1 3 2 2 1 3 2 3 1 3 2 4 1 3 2 5 1 3 2 6 1 3 2 7 1 3 2 8 1 3 2 9 1 3 2 10 1 3 2 11 1 3 2 12 1 3 2 13 1 3 2 14 1 3 2 15 1 3 3 0 1 3 3 1 1 3 3 2 1 3 3 3 1 3 3 4 1 3 3 5 1 3 3 6 1 3 3 7 1 3 3 8 1 3 3 9 1 3 3 10 1 3 3 11 1 3 3 12 1 3 3 13 1 3 3 14 1 3 3 15 1 3 4 0 1 3 4 1 1 3 4 2 1 3 4 3 1 3 4 4 1 3 4 5 1 3 4 6 1 3 4 7 1 3 4 8 1 3 4 9 1 3 4 10 1 3 4 11 1 3 4 12 1 3 4 13 1 3 4 14 1 3 4 15 1 3 5 0 1 3 5 1 1 3 5 2 1 3 5 3 1 3 5 4 1 3 5 5 1 3 5 6 1 3 5 7 1 3 5 8 1 3 5 9 1 3 5 10 1 3 5 11 1 3 5 12 1 3 5 13 1 3 5 14 1 3 5 15 1 3 6 0 1 3 6 1 1 3 6 2 1 3 6 3 1 3 6 4 1 3 6 5 1 3 6 6 1 3 6 7 1 3 6 8 1 3 6 9 1 3 6 10 1 3 6 11 1 3 6 12 1 3 6 13 1 3 6 14 1 3 6 15 1 3 7 0 1 3 7 1 1 3 7 2 1 3 7 3 1 3 7 4 1 3 7 5 1 3 7 6 1 3 7 7 1 3 7 8 1 3 7 9 1 3 7 10 1 3 7 11 1 3 7 12 1 3 7 13 1 3 7 14 1 3 7 15 1 3 0 0 2 3 0 1 2 3 0 2 2 3 0 3 2 3 0 4 2 3 0 5 2 3 0 6 2 3 0 7 2 3 0 8 2 3 0 9 2 3 0 10 2 3 0 11 2 3 0 12 2 3 0 13 2 3 0 14 2 3 0 15 2 3 1 0 2 3 1 1 2 3 1 2 2 3 1 3 2 3 1 4 2 3 1 5 2 3 1 6 2 3 1 7 2 3 1 8 2 3 1 9 2 3 1 10 2 3 1 11 2 3 1 12 2 3 1 13 2 3 1 14 2 3 1 15 2 3 2 0 2 3 2 1 2 3 2 2 2 3 2 3 2 3 2 4 2 3 2 5 2 3 2 6 2 3 2 7 2 3 2 8 2 3 2 9 2 3 2 10 2 3 2 11 2 3 2 12 2 3 2 13 2 3 2 14 2 3 2 15 2 3 3 0 2 3 3 1 2 3 3 2 2 3 3 3 2 3 3 4 2 3 3 5 2 3 3 6 2 3 3 7 2 3 3 8 2 3 3 9 2 3 3 10 2 3 3 11 2 3 3 12 2 3 3 13 2 3 3 14 2 3 3 15 2 3 4 0 2 3 4 1 2 3 4 2 2 3 4 3 2 3 4 4 2 3 4 5 2 3 4 6 2 3 4 7 2 3 4 8 2 3 4 9 2 3 4 10 2 3 4 11 2 3 4 12 2 3 4 13 2 3 4 14 2 3 4 15 2 3 5 0 2 3 5 1 2 3 5 2 2 3 5 3 2 3 5 4 2 3 5 5 2 3 5 6 2 3 5 7 2 3 5 8 2 3 5 9 2 3 5 10 2 3 5 11 2 3 5 12 2 3 5 13 2 3 5 14 2 3 5 15 2 3 6 0 2 3 6 1 2 3 6 2 2 3 6 3 2 3 6 4 2 3 6 5 2 3 6 6 2 3 6 7 2 3 6 8 2 3 6 9 2 3 6 10 2 3 6 11 2 3 6 12 2 3 6 13 2 3 6 14 2 3 6 15 2 3 7 0 2 3 7 1 2 3 7 2 2 3 7 3 2 3 7 4 2 3 7 5 2 3 7 6 2 3 7 7 2 3 7 8 2 3 7 9 2 3 7 10 2 3 7 11 2 3 7 12 2 3 7 13 2 3 7 14 2 3 7 15 2 3 0 0 3 3 0 1 3 3 0 2 3 3 0 3 3 3 0 4 3 3 0 5 3 3 0 6 3 3 0 7 3 3 0 8 3 3 0 9 3 3 0 10 3 3 0 11 3 3 0 12 3 3 0 13 3 3 0 14 3 3 0 15 3 3 1 0 3 3 1 1 3 3 1 2 3 3 1 3 3 3 1 4 3 3 1 5 3 3 1 6 3 3 1 7 3 3 1 8 3 3 1 9 3 3 1 10 3 3 1 11 3 3 1 12 3 3 1 13 3 3 1 14 3 3 1 15 3 3 2 0 3 3 2 1 3 3 2 2 3 3 2 3 3 3 2 4 3 3 2 5 3 3 2 6 3 3 2 7 3 3 2 8 3 3 2 9 3 3 2 10 3 3 2 11 3 3 2 12 3 3 2 13 3 3 2 14 3 3 2 15 3 3 3 0 3 3 3 1 3 3 3 2 3 3 3 3 3 3 3 4 3 3 3 5 3 3 3 6 3 3 3 7 3 3 3 8 3 3 3 9 3 3 3 10 3 3 3 11 3 3 3 12 3 3 3 13 3 3 3 14 3 3 3 15 3 3 4 0 3 3 4 1 3 3 4 2 3 3 4 3 3 3 4 4 3 3 4 5 3 3 4 6 3 3 4 7 3 3 4 8 3 3 4 9 3 3 4 10 3 3 4 11 3 3 4 12 3 3 4 13 3 3 4 14 3 3 4 15 3 3 5 0 3 3 5 1 3 3 5 2 3 3 5 3 3 3 5 4 3 3 5 5 3 3 5 6 3 3 5 7 3 3 5 8 3 3 5 9 3 3 5 10 3 3 5 11 3 3 5 12 3 3 5 13 3 3 5 14 3 3 5 15 3 3 6 0 3 3 6 1 3 3 6 2 3 3 6 3 3 3 6 4 3 3 6 5 3 3 6 6 3 3 6 7 3 3 6 8 3 3 6 9 3 3 6 10 3 3 6 11 3 3 6 12 3 3 6 13 3 3 6 14 3 3 6 15 3 3 7 0 3 3 7 1 3 3 7 2 3 3 7 3 3 3 7 4 3 3 7 5 3 3 7 6 3 3 7 7 3 3 7 8 3 3 7 9 3 3 7 10 3 3 7 11 3 3 7 12 3 3 7 13 3 3 7 14 3 3 7 15 3 4 0 0 0 4 0 1 0 4 0 2 0 4 0 3 0 4 0 4 0 4 0 5 0 4 0 6 0 4 0 7 0 4 0 8 0 4 0 9 0 4 0 10 0 4 0 11 0 4 0 12 0 4 0 13 0 4 0 14 0 4 0 15 0 4 1 0 0 4 1 1 0 4 1 2 0 4 1 3 0 4 1 4 0 4 1 5 0 4 1 6 0 4 1 7 0 4 1 8 0 4 1 9 0 4 1 10 0 4 1 11 0 4 1 12 0 4 1 13 0 4 1 14 0 4 1 15 0 4 2 0 0 4 2 1 0 4 2 2 0 4 2 3 0 4 2 4 0 4 2 5 0 4 2 6 0 4 2 7 0 4 2 8 0 4 2 9 0 4 2 10 0 4 2 11 0 4 2 12 0 4 2 13 0 4 2 14 0 4 2 15 0 4 3 0 0 4 3 1 0 4 3 2 0 4 3 3 0 4 3 4 0 4 3 5 0 4 3 6 0 4 3 7 0 4 3 8 0 4 3 9 0 4 3 10 0 4 3 11 0 4 3 12 0 4 3 13 0 4 3 14 0 4 3 15 0 4 4 0 0 4 4 1 0 4 4 2 0 4 4 3 0 4 4 4 0 4 4 5 0 4 4 6 0 4 4 7 0 4 4 8 0 4 4 9 0 4 4 10 0 4 4 11 0 4 4 12 0 4 4 13 0 4 4 14 0 4 4 15 0 4 5 0 0 4 5 1 0 4 5 2 0 4 5 3 0 4 5 4 0 4 5 5 0 4 5 6 0 4 5 7 0 4 5 8 0 4 5 9 0 4 5 10 0 4 5 11 0 4 5 12 0 4 5 13 0 4 5 14 0 4 5 15 0 4 6 0 0 4 6 1 0 4 6 2 0 4 6 3 0 4 6 4 0 4 6 5 0 4 6 6 0 4 6 7 0 4 6 8 0 4 6 9 0 4 6 10 0 4 6 11 0 4 6 12 0 4 6 13 0 4 6 14 0 4 6 15 0 4 7 0 0 4 7 1 0 4 7 2 0 4 7 3 0 4 7 4 0 4 7 5 0 4 7 6 0 4 7 7 0 4 7 8 0 4 7 9 0 4 7 10 0 4 7 11 0 4 7 12 0 4 7 13 0 4 7 14 0 4 7 15 0 4 0 0 1 4 0 1 1 4 0 2 1 4 0 3 1 4 0 4 1 4 0 5 1 4 0 6 1 4 0 7 1 4 0 8 1 4 0 9 1 4 0 10 1 4 0 11 1 4 0 12 1 4 0 13 1 4 0 14 1 4 0 15 1 4 1 0 1 4 1 1 1 4 1 2 1 4 1 3 1 4 1 4 1 4 1 5 1 4 1 6 1 4 1 7 1 4 1 8 1 4 1 9 1 4 1 10 1 4 1 11 1 4 1 12 1 4 1 13 1 4 1 14 1 4 1 15 1 4 2 0 1 4 2 1 1 4 2 2 1 4 2 3 1 4 2 4 1 4 2 5 1 4 2 6 1 4 2 7 1 4 2 8 1 4 2 9 1 4 2 10 1 4 2 11 1 4 2 12 1 4 2 13 1 4 2 14 1 4 2 15 1 4 3 0 1 4 3 1 1 4 3 2 1 4 3 3 1 4 3 4 1 4 3 5 1 4 3 6 1 4 3 7 1 4 3 8 1 4 3 9 1 4 3 10 1 4 3 11 1 4 3 12 1 4 3 13 1 4 3 14 1 4 3 15 1 4 4 0 1 4 4 1 1 4 4 2 1 4 4 3 1 4 4 4 1 4 4 5 1 4 4 6 1 4 4 7 1 4 4 8 1 4 4 9 1 4 4 10 1 4 4 11 1 4 4 12 1 4 4 13 1 4 4 14 1 4 4 15 1 4 5 0 1 4 5 1 1 4 5 2 1 4 5 3 1 4 5 4 1 4 5 5 1 4 5 6 1 4 5 7 1 4 5 8 1 4 5 9 1 4 5 10 1 4 5 11 1 4 5 12 1 4 5 13 1 4 5 14 1 4 5 15 1 4 6 0 1 4 6 1 1 4 6 2 1 4 6 3 1 4 6 4 1 4 6 5 1 4 6 6 1 4 6 7 1 4 6 8 1 4 6 9 1 4 6 10 1 4 6 11 1 4 6 12 1 4 6 13 1 4 6 14 1 4 6 15 1 4 7 0 1 4 7 1 1 4 7 2 1 4 7 3 1 4 7 4 1 4 7 5 1 4 7 6 1 4 7 7 1 4 7 8 1 4 7 9 1 4 7 10 1 4 7 11 1 4 7 12 1 4 7 13 1 4 7 14 1 4 7 15 1 4 0 0 2 4 0 1 2 4 0 2 2 4 0 3 2 4 0 4 2 4 0 5 2 4 0 6 2 4 0 7 2 4 0 8 2 4 0 9 2 4 0 10 2 4 0 11 2 4 0 12 2 4 0 13 2 4 0 14 2 4 0 15 2 4 1 0 2 4 1 1 2 4 1 2 2 4 1 3 2 4 1 4 2 4 1 5 2 4 1 6 2 4 1 7 2 4 1 8 2 4 1 9 2 4 1 10 2 4 1 11 2 4 1 12 2 4 1 13 2 4 1 14 2 4 1 15 2 4 2 0 2 4 2 1 2 4 2 2 2 4 2 3 2 4 2 4 2 4 2 5 2 4 2 6 2 4 2 7 2 4 2 8 2 4 2 9 2 4 2 10 2 4 2 11 2 4 2 12 2 4 2 13 2 4 2 14 2 4 2 15 2 4 3 0 2 4 3 1 2 4 3 2 2 4 3 3 2 4 3 4 2 4 3 5 2 4 3 6 2 4 3 7 2 4 3 8 2 4 3 9 2 4 3 10 2 4 3 11 2 4 3 12 2 4 3 13 2 4 3 14 2 4 3 15 2 4 4 0 2 4 4 1 2 4 4 2 2 4 4 3 2 4 4 4 2 4 4 5 2 4 4 6 2 4 4 7 2 4 4 8 2 4 4 9 2 4 4 10 2 4 4 11 2 4 4 12 2 4 4 13 2 4 4 14 2 4 4 15 2 4 5 0 2 4 5 1 2 4 5 2 2 4 5 3 2 4 5 4 2 4 5 5 2 4 5 6 2 4 5 7 2 4 5 8 2 4 5 9 2 4 5 10 2 4 5 11 2 4 5 12 2 4 5 13 2 4 5 14 2 4 5 15 2 4 6 0 2 4 6 1 2 4 6 2 2 4 6 3 2 4 6 4 2 4 6 5 2 4 6 6 2 4 6 7 2 4 6 8 2 4 6 9 2 4 6 10 2 4 6 11 2 4 6 12 2 4 6 13 2 4 6 14 2 4 6 15 2 4 7 0 2 4 7 1 2 4 7 2 2 4 7 3 2 4 7 4 2 4 7 5 2 4 7 6 2 4 7 7 2 4 7 8 2 4 7 9 2 4 7 10 2 4 7 11 2 4 7 12 2 4 7 13 2 4 7 14 2 4 7 15 2 4 0 0 3 4 0 1 3 4 0 2 3 4 0 3 3 4 0 4 3 4 0 5 3 4 0 6 3 4 0 7 3 4 0 8 3 4 0 9 3 4 0 10 3 4 0 11 3 4 0 12 3 4 0 13 3 4 0 14 3 4 0 15 3 4 1 0 3 4 1 1 3 4 1 2 3 4 1 3 3 4 1 4 3 4 1 5 3 4 1 6 3 4 1 7 3 4 1 8 3 4 1 9 3 4 1 10 3 4 1 11 3 4 1 12 3 4 1 13 3 4 1 14 3 4 1 15 3 4 2 0 3 4 2 1 3 4 2 2 3 4 2 3 3 4 2 4 3 4 2 5 3 4 2 6 3 4 2 7 3 4 2 8 3 4 2 9 3 4 2 10 3 4 2 11 3 4 2 12 3 4 2 13 3 4 2 14 3 4 2 15 3 4 3 0 3 4 3 1 3 4 3 2 3 4 3 3 3 4 3 4 3 4 3 5 3 4 3 6 3 4 3 7 3 4 3 8 3 4 3 9 3 4 3 10 3 4 3 11 3 4 3 12 3 4 3 13 3 4 3 14 3 4 3 15 3 4 4 0 3 4 4 1 3 4 4 2 3 4 4 3 3 4 4 4 3 4 4 5 3 4 4 6 3 4 4 7 3 4 4 8 3 4 4 9 3 4 4 10 3 4 4 11 3 4 4 12 3 4 4 13 3 4 4 14 3 4 4 15 3 4 5 0 3 4 5 1 3 4 5 2 3 4 5 3 3 4 5 4 3 4 5 5 3 4 5 6 3 4 5 7 3 4 5 8 3 4 5 9 3 4 5 10 3 4 5 11 3 4 5 12 3 4 5 13 3 4 5 14 3 4 5 15 3 4 6 0 3 4 6 1 3 4 6 2 3 4 6 3 3 4 6 4 3 4 6 5 3 4 6 6 3 4 6 7 3 4 6 8 3 4 6 9 3 4 6 10 3 4 6 11 3 4 6 12 3 4 6 13 3 4 6 14 3 4 6 15 3 4 7 0 3 4 7 1 3 4 7 2 3 4 7 3 3 4 7 4 3 4 7 5 3 4 7 6 3 4 7 7 3 4 7 8 3 4 7 9 3 4 7 10 3 4 7 11 3 4 7 12 3 4 7 13 3 4 7 14 3 4 7 15 3 5 0 0 0 5 0 1 0 5 0 2 0 5 0 3 0 5 0 4 0 5 0 5 0 5 0 6 0 5 0 7 0 5 0 8 0 5 0 9 0 5 0 10 0 5 0 11 0 5 0 12 0 5 0 13 0 5 0 14 0 5 0 15 0 5 1 0 0 5 1 1 0 5 1 2 0 5 1 3 0 5 1 4 0 5 1 5 0 5 1 6 0 5 1 7 0 5 1 8 0 5 1 9 0 5 1 10 0 5 1 11 0 5 1 12 0 5 1 13 0 5 1 14 0 5 1 15 0 5 2 0 0 5 2 1 0 5 2 2 0 5 2 3 0 5 2 4 0 5 2 5 0 5 2 6 0 5 2 7 0 5 2 8 0 5 2 9 0 5 2 10 0 5 2 11 0 5 2 12 0 5 2 13 0 5 2 14 0 5 2 15 0 5 3 0 0 5 3 1 0 5 3 2 0 5 3 3 0 5 3 4 0 5 3 5 0 5 3 6 0 5 3 7 0 5 3 8 0 5 3 9 0 5 3 10 0 5 3 11 0 5 3 12 0 5 3 13 0 5 3 14 0 5 3 15 0 5 4 0 0 5 4 1 0 5 4 2 0 5 4 3 0 5 4 4 0 5 4 5 0 5 4 6 0 5 4 7 0 5 4 8 0 5 4 9 0 5 4 10 0 5 4 11 0 5 4 12 0 5 4 13 0 5 4 14 0 5 4 15 0 5 5 0 0 5 5 1 0 5 5 2 0 5 5 3 0 5 5 4 0 5 5 5 0 5 5 6 0 5 5 7 0 5 5 8 0 5 5 9 0 5 5 10 0 5 5 11 0 5 5 12 0 5 5 13 0 5 5 14 0 5 5 15 0 5 6 0 0 5 6 1 0 5 6 2 0 5 6 3 0 5 6 4 0 5 6 5 0 5 6 6 0 5 6 7 0 5 6 8 0 5 6 9 0 5 6 10 0 5 6 11 0 5 6 12 0 5 6 13 0 5 6 14 0 5 6 15 0 5 7 0 0 5 7 1 0 5 7 2 0 5 7 3 0 5 7 4 0 5 7 5 0 5 7 6 0 5 7 7 0 5 7 8 0 5 7 9 0 5 7 10 0 5 7 11 0 5 7 12 0 5 7 13 0 5 7 14 0 5 7 15 0 5 0 0 1 5 0 1 1 5 0 2 1 5 0 3 1 5 0 4 1 5 0 5 1 5 0 6 1 5 0 7 1 5 0 8 1 5 0 9 1 5 0 10 1 5 0 11 1 5 0 12 1 5 0 13 1 5 0 14 1 5 0 15 1 5 1 0 1 5 1 1 1 5 1 2 1 5 1 3 1 5 1 4 1 5 1 5 1 5 1 6 1 5 1 7 1 5 1 8 1 5 1 9 1 5 1 10 1 5 1 11 1 5 1 12 1 5 1 13 1 5 1 14 1 5 1 15 1 5 2 0 1 5 2 1 1 5 2 2 1 5 2 3 1 5 2 4 1 5 2 5 1 5 2 6 1 5 2 7 1 5 2 8 1 5 2 9 1 5 2 10 1 5 2 11 1 5 2 12 1 5 2 13 1 5 2 14 1 5 2 15 1 5 3 0 1 5 3 1 1 5 3 2 1 5 3 3 1 5 3 4 1 5 3 5 1 5 3 6 1 5 3 7 1 5 3 8 1 5 3 9 1 5 3 10 1 5 3 11 1 5 3 12 1 5 3 13 1 5 3 14 1 5 3 15 1 5 4 0 1 5 4 1 1 5 4 2 1 5 4 3 1 5 4 4 1 5 4 5 1 5 4 6 1 5 4 7 1 5 4 8 1 5 4 9 1 5 4 10 1 5 4 11 1 5 4 12 1 5 4 13 1 5 4 14 1 5 4 15 1 5 5 0 1 5 5 1 1 5 5 2 1 5 5 3 1 5 5 4 1 5 5 5 1 5 5 6 1 5 5 7 1 5 5 8 1 5 5 9 1 5 5 10 1 5 5 11 1 5 5 12 1 5 5 13 1 5 5 14 1 5 5 15 1 5 6 0 1 5 6 1 1 5 6 2 1 5 6 3 1 5 6 4 1 5 6 5 1 5 6 6 1 5 6 7 1 5 6 8 1 5 6 9 1 5 6 10 1 5 6 11 1 5 6 12 1 5 6 13 1 5 6 14 1 5 6 15 1 5 7 0 1 5 7 1 1 5 7 2 1 5 7 3 1 5 7 4 1 5 7 5 1 5 7 6 1 5 7 7 1 5 7 8 1 5 7 9 1 5 7 10 1 5 7 11 1 5 7 12 1 5 7 13 1 5 7 14 1 5 7 15 1 5 0 0 2 5 0 1 2 5 0 2 2 5 0 3 2 5 0 4 2 5 0 5 2 5 0 6 2 5 0 7 2 5 0 8 2 5 0 9 2 5 0 10 2 5 0 11 2 5 0 12 2 5 0 13 2 5 0 14 2 5 0 15 2 5 1 0 2 5 1 1 2 5 1 2 2 5 1 3 2 5 1 4 2 5 1 5 2 5 1 6 2 5 1 7 2 5 1 8 2 5 1 9 2 5 1 10 2 5 1 11 2 5 1 12 2 5 1 13 2 5 1 14 2 5 1 15 2 5 2 0 2 5 2 1 2 5 2 2 2 5 2 3 2 5 2 4 2 5 2 5 2 5 2 6 2 5 2 7 2 5 2 8 2 5 2 9 2 5 2 10 2 5 2 11 2 5 2 12 2 5 2 13 2 5 2 14 2 5 2 15 2 5 3 0 2 5 3 1 2 5 3 2 2 5 3 3 2 5 3 4 2 5 3 5 2 5 3 6 2 5 3 7 2 5 3 8 2 5 3 9 2 5 3 10 2 5 3 11 2 5 3 12 2 5 3 13 2 5 3 14 2 5 3 15 2 5 4 0 2 5 4 1 2 5 4 2 2 5 4 3 2 5 4 4 2 5 4 5 2 5 4 6 2 5 4 7 2 5 4 8 2 5 4 9 2 5 4 10 2 5 4 11 2 5 4 12 2 5 4 13 2 5 4 14 2 5 4 15 2 5 5 0 2 5 5 1 2 5 5 2 2 5 5 3 2 5 5 4 2 5 5 5 2 5 5 6 2 5 5 7 2 5 5 8 2 5 5 9 2 5 5 10 2 5 5 11 2 5 5 12 2 5 5 13 2 5 5 14 2 5 5 15 2 5 6 0 2 5 6 1 2 5 6 2 2 5 6 3 2 5 6 4 2 5 6 5 2 5 6 6 2 5 6 7 2 5 6 8 2 5 6 9 2 5 6 10 2 5 6 11 2 5 6 12 2 5 6 13 2 5 6 14 2 5 6 15 2 5 7 0 2 5 7 1 2 5 7 2 2 5 7 3 2 5 7 4 2 5 7 5 2 5 7 6 2 5 7 7 2 5 7 8 2 5 7 9 2 5 7 10 2 5 7 11 2 5 7 12 2 5 7 13 2 5 7 14 2 5 7 15 2 5 0 0 3 5 0 1 3 5 0 2 3 5 0 3 3 5 0 4 3 5 0 5 3 5 0 6 3 5 0 7 3 5 0 8 3 5 0 9 3 5 0 10 3 5 0 11 3 5 0 12 3 5 0 13 3 5 0 14 3 5 0 15 3 5 1 0 3 5 1 1 3 5 1 2 3 5 1 3 3 5 1 4 3 5 1 5 3 5 1 6 3 5 1 7 3 5 1 8 3 5 1 9 3 5 1 10 3 5 1 11 3 5 1 12 3 5 1 13 3 5 1 14 3 5 1 15 3 5 2 0 3 5 2 1 3 5 2 2 3 5 2 3 3 5 2 4 3 5 2 5 3 5 2 6 3 5 2 7 3 5 2 8 3 5 2 9 3 5 2 10 3 5 2 11 3 5 2 12 3 5 2 13 3 5 2 14 3 5 2 15 3 5 3 0 3 5 3 1 3 5 3 2 3 5 3 3 3 5 3 4 3 5 3 5 3 5 3 6 3 5 3 7 3 5 3 8 3 5 3 9 3 5 3 10 3 5 3 11 3 5 3 12 3 5 3 13 3 5 3 14 3 5 3 15 3 5 4 0 3 5 4 1 3 5 4 2 3 5 4 3 3 5 4 4 3 5 4 5 3 5 4 6 3 5 4 7 3 5 4 8 3 5 4 9 3 5 4 10 3 5 4 11 3 5 4 12 3 5 4 13 3 5 4 14 3 5 4 15 3 5 5 0 3 5 5 1 3 5 5 2 3 5 5 3 3 5 5 4 3 5 5 5 3 5 5 6 3 5 5 7 3 5 5 8 3 5 5 9 3 5 5 10 3 5 5 11 3 5 5 12 3 5 5 13 3 5 5 14 3 5 5 15 3 5 6 0 3 5 6 1 3 5 6 2 3 5 6 3 3 5 6 4 3 5 6 5 3 5 6 6 3 5 6 7 3 5 6 8 3 5 6 9 3 5 6 10 3 5 6 11 3 5 6 12 3 5 6 13 3 5 6 14 3 5 6 15 3 5 7 0 3 5 7 1 3 5 7 2 3 5 7 3 3 5 7 4 3 5 7 5 3 5 7 6 3 5 7 7 3 5 7 8 3 5 7 9 3 5 7 10 3 5 7 11 3 5 7 12 3 5 7 13 3 5 7 14 3 5 7 15 3 6 0 0 0 6 0 1 0 6 0 2 0 6 0 3 0 6 0 4 0 6 0 5 0 6 0 6 0 6 0 7 0 6 0 8 0 6 0 9 0 6 0 10 0 6 0 11 0 6 0 12 0 6 0 13 0 6 0 14 0 6 0 15 0 6 1 0 0 6 1 1 0 6 1 2 0 6 1 3 0 6 1 4 0 6 1 5 0 6 1 6 0 6 1 7 0 6 1 8 0 6 1 9 0 6 1 10 0 6 1 11 0 6 1 12 0 6 1 13 0 6 1 14 0 6 1 15 0 6 2 0 0 6 2 1 0 6 2 2 0 6 2 3 0 6 2 4 0 6 2 5 0 6 2 6 0 6 2 7 0 6 2 8 0 6 2 9 0 6 2 10 0 6 2 11 0 6 2 12 0 6 2 13 0 6 2 14 0 6 2 15 0 6 3 0 0 6 3 1 0 6 3 2 0 6 3 3 0 6 3 4 0 6 3 5 0 6 3 6 0 6 3 7 0 6 3 8 0 6 3 9 0 6 3 10 0 6 3 11 0 6 3 12 0 6 3 13 0 6 3 14 0 6 3 15 0 6 4 0 0 6 4 1 0 6 4 2 0 6 4 3 0 6 4 4 0 6 4 5 0 6 4 6 0 6 4 7 0 6 4 8 0 6 4 9 0 6 4 10 0 6 4 11 0 6 4 12 0 6 4 13 0 6 4 14 0 6 4 15 0 6 5 0 0 6 5 1 0 6 5 2 0 6 5 3 0 6 5 4 0 6 5 5 0 6 5 6 0 6 5 7 0 6 5 8 0 6 5 9 0 6 5 10 0 6 5 11 0 6 5 12 0 6 5 13 0 6 5 14 0 6 5 15 0 6 6 0 0 6 6 1 0 6 6 2 0 6 6 3 0 6 6 4 0 6 6 5 0 6 6 6 0 6 6 7 0 6 6 8 0 6 6 9 0 6 6 10 0 6 6 11 0 6 6 12 0 6 6 13 0 6 6 14 0 6 6 15 0 6 7 0 0 6 7 1 0 6 7 2 0 6 7 3 0 6 7 4 0 6 7 5 0 6 7 6 0 6 7 7 0 6 7 8 0 6 7 9 0 6 7 10 0 6 7 11 0 6 7 12 0 6 7 13 0 6 7 14 0 6 7 15 0 6 0 0 1 6 0 1 1 6 0 2 1 6 0 3 1 6 0 4 1 6 0 5 1 6 0 6 1 6 0 7 1 6 0 8 1 6 0 9 1 6 0 10 1 6 0 11 1 6 0 12 1 6 0 13 1 6 0 14 1 6 0 15 1 6 1 0 1 6 1 1 1 6 1 2 1 6 1 3 1 6 1 4 1 6 1 5 1 6 1 6 1 6 1 7 1 6 1 8 1 6 1 9 1 6 1 10 1 6 1 11 1 6 1 12 1 6 1 13 1 6 1 14 1 6 1 15 1 6 2 0 1 6 2 1 1 6 2 2 1 6 2 3 1 6 2 4 1 6 2 5 1 6 2 6 1 6 2 7 1 6 2 8 1 6 2 9 1 6 2 10 1 6 2 11 1 6 2 12 1 6 2 13 1 6 2 14 1 6 2 15 1 6 3 0 1 6 3 1 1 6 3 2 1 6 3 3 1 6 3 4 1 6 3 5 1 6 3 6 1 6 3 7 1 6 3 8 1 6 3 9 1 6 3 10 1 6 3 11 1 6 3 12 1 6 3 13 1 6 3 14 1 6 3 15 1 6 4 0 1 6 4 1 1 6 4 2 1 6 4 3 1 6 4 4 1 6 4 5 1 6 4 6 1 6 4 7 1 6 4 8 1 6 4 9 1 6 4 10 1 6 4 11 1 6 4 12 1 6 4 13 1 6 4 14 1 6 4 15 1 6 5 0 1 6 5 1 1 6 5 2 1 6 5 3 1 6 5 4 1 6 5 5 1 6 5 6 1 6 5 7 1 6 5 8 1 6 5 9 1 6 5 10 1 6 5 11 1 6 5 12 1 6 5 13 1 6 5 14 1 6 5 15 1 6 6 0 1 6 6 1 1 6 6 2 1 6 6 3 1 6 6 4 1 6 6 5 1 6 6 6 1 6 6 7 1 6 6 8 1 6 6 9 1 6 6 10 1 6 6 11 1 6 6 12 1 6 6 13 1 6 6 14 1 6 6 15 1 6 7 0 1 6 7 1 1 6 7 2 1 6 7 3 1 6 7 4 1 6 7 5 1 6 7 6 1 6 7 7 1 6 7 8 1 6 7 9 1 6 7 10 1 6 7 11 1 6 7 12 1 6 7 13 1 6 7 14 1 6 7 15 1 6 0 0 2 6 0 1 2 6 0 2 2 6 0 3 2 6 0 4 2 6 0 5 2 6 0 6 2 6 0 7 2 6 0 8 2 6 0 9 2 6 0 10 2 6 0 11 2 6 0 12 2 6 0 13 2 6 0 14 2 6 0 15 2 6 1 0 2 6 1 1 2 6 1 2 2 6 1 3 2 6 1 4 2 6 1 5 2 6 1 6 2 6 1 7 2 6 1 8 2 6 1 9 2 6 1 10 2 6 1 11 2 6 1 12 2 6 1 13 2 6 1 14 2 6 1 15 2 6 2 0 2 6 2 1 2 6 2 2 2 6 2 3 2 6 2 4 2 6 2 5 2 6 2 6 2 6 2 7 2 6 2 8 2 6 2 9 2 6 2 10 2 6 2 11 2 6 2 12 2 6 2 13 2 6 2 14 2 6 2 15 2 6 3 0 2 6 3 1 2 6 3 2 2 6 3 3 2 6 3 4 2 6 3 5 2 6 3 6 2 6 3 7 2 6 3 8 2 6 3 9 2 6 3 10 2 6 3 11 2 6 3 12 2 6 3 13 2 6 3 14 2 6 3 15 2 6 4 0 2 6 4 1 2 6 4 2 2 6 4 3 2 6 4 4 2 6 4 5 2 6 4 6 2 6 4 7 2 6 4 8 2 6 4 9 2 6 4 10 2 6 4 11 2 6 4 12 2 6 4 13 2 6 4 14 2 6 4 15 2 6 5 0 2 6 5 1 2 6 5 2 2 6 5 3 2 6 5 4 2 6 5 5 2 6 5 6 2 6 5 7 2 6 5 8 2 6 5 9 2 6 5 10 2 6 5 11 2 6 5 12 2 6 5 13 2 6 5 14 2 6 5 15 2 6 6 0 2 6 6 1 2 6 6 2 2 6 6 3 2 6 6 4 2 6 6 5 2 6 6 6 2 6 6 7 2 6 6 8 2 6 6 9 2 6 6 10 2 6 6 11 2 6 6 12 2 6 6 13 2 6 6 14 2 6 6 15 2 6 7 0 2 6 7 1 2 6 7 2 2 6 7 3 2 6 7 4 2 6 7 5 2 6 7 6 2 6 7 7 2 6 7 8 2 6 7 9 2 6 7 10 2 6 7 11 2 6 7 12 2 6 7 13 2 6 7 14 2 6 7 15 2 6 0 0 3 6 0 1 3 6 0 2 3 6 0 3 3 6 0 4 3 6 0 5 3 6 0 6 3 6 0 7 3 6 0 8 3 6 0 9 3 6 0 10 3 6 0 11 3 6 0 12 3 6 0 13 3 6 0 14 3 6 0 15 3 6 1 0 3 6 1 1 3 6 1 2 3 6 1 3 3 6 1 4 3 6 1 5 3 6 1 6 3 6 1 7 3 6 1 8 3 6 1 9 3 6 1 10 3 6 1 11 3 6 1 12 3 6 1 13 3 6 1 14 3 6 1 15 3 6 2 0 3 6 2 1 3 6 2 2 3 6 2 3 3 6 2 4 3 6 2 5 3 6 2 6 3 6 2 7 3 6 2 8 3 6 2 9 3 6 2 10 3 6 2 11 3 6 2 12 3 6 2 13 3 6 2 14 3 6 2 15 3 6 3 0 3 6 3 1 3 6 3 2 3 6 3 3 3 6 3 4 3 6 3 5 3 6 3 6 3 6 3 7 3 6 3 8 3 6 3 9 3 6 3 10 3 6 3 11 3 6 3 12 3 6 3 13 3 6 3 14 3 6 3 15 3 6 4 0 3 6 4 1 3 6 4 2 3 6 4 3 3 6 4 4 3 6 4 5 3 6 4 6 3 6 4 7 3 6 4 8 3 6 4 9 3 6 4 10 3 6 4 11 3 6 4 12 3 6 4 13 3 6 4 14 3 6 4 15 3 6 5 0 3 6 5 1 3 6 5 2 3 6 5 3 3 6 5 4 3 6 5 5 3 6 5 6 3 6 5 7 3 6 5 8 3 6 5 9 3 6 5 10 3 6 5 11 3 6 5 12 3 6 5 13 3 6 5 14 3 6 5 15 3 6 6 0 3 6 6 1 3 6 6 2 3 6 6 3 3 6 6 4 3 6 6 5 3 6 6 6 3 6 6 7 3 6 6 8 3 6 6 9 3 6 6 10 3 6 6 11 3 6 6 12 3 6 6 13 3 6 6 14 3 6 6 15 3 6 7 0 3 6 7 1 3 6 7 2 3 6 7 3 3 6 7 4 3 6 7 5 3 6 7 6 3 6 7 7 3 6 7 8 3 6 7 9 3 6 7 10 3 6 7 11 3 6 7 12 3 6 7 13 3 6 7 14 3 6 7 15 3 7 0 0 0 7 0 1 0 7 0 2 0 7 0 3 0 7 0 4 0 7 0 5 0 7 0 6 0 7 0 7 0 7 0 8 0 7 0 9 0 7 0 10 0 7 0 11 0 7 0 12 0 7 0 13 0 7 0 14 0 7 0 15 0 7 1 0 0 7 1 1 0 7 1 2 0 7 1 3 0 7 1 4 0 7 1 5 0 7 1 6 0 7 1 7 0 7 1 8 0 7 1 9 0 7 1 10 0 7 1 11 0 7 1 12 0 7 1 13 0 7 1 14 0 7 1 15 0 7 2 0 0 7 2 1 0 7 2 2 0 7 2 3 0 7 2 4 0 7 2 5 0 7 2 6 0 7 2 7 0 7 2 8 0 7 2 9 0 7 2 10 0 7 2 11 0 7 2 12 0 7 2 13 0 7 2 14 0 7 2 15 0 7 3 0 0 7 3 1 0 7 3 2 0 7 3 3 0 7 3 4 0 7 3 5 0 7 3 6 0 7 3 7 0 7 3 8 0 7 3 9 0 7 3 10 0 7 3 11 0 7 3 12 0 7 3 13 0 7 3 14 0 7 3 15 0 7 4 0 0 7 4 1 0 7 4 2 0 7 4 3 0 7 4 4 0 7 4 5 0 7 4 6 0 7 4 7 0 7 4 8 0 7 4 9 0 7 4 10 0 7 4 11 0 7 4 12 0 7 4 13 0 7 4 14 0 7 4 15 0 7 5 0 0 7 5 1 0 7 5 2 0 7 5 3 0 7 5 4 0 7 5 5 0 7 5 6 0 7 5 7 0 7 5 8 0 7 5 9 0 7 5 10 0 7 5 11 0 7 5 12 0 7 5 13 0 7 5 14 0 7 5 15 0 7 6 0 0 7 6 1 0 7 6 2 0 7 6 3 0 7 6 4 0 7 6 5 0 7 6 6 0 7 6 7 0 7 6 8 0 7 6 9 0 7 6 10 0 7 6 11 0 7 6 12 0 7 6 13 0 7 6 14 0 7 6 15 0 7 7 0 0 7 7 1 0 7 7 2 0 7 7 3 0 7 7 4 0 7 7 5 0 7 7 6 0 7 7 7 0 7 7 8 0 7 7 9 0 7 7 10 0 7 7 11 0 7 7 12 0 7 7 13 0 7 7 14 0 7 7 15 0 7 0 0 1 7 0 1 1 7 0 2 1 7 0 3 1 7 0 4 1 7 0 5 1 7 0 6 1 7 0 7 1 7 0 8 1 7 0 9 1 7 0 10 1 7 0 11 1 7 0 12 1 7 0 13 1 7 0 14 1 7 0 15 1 7 1 0 1 7 1 1 1 7 1 2 1 7 1 3 1 7 1 4 1 7 1 5 1 7 1 6 1 7 1 7 1 7 1 8 1 7 1 9 1 7 1 10 1 7 1 11 1 7 1 12 1 7 1 13 1 7 1 14 1 7 1 15 1 7 2 0 1 7 2 1 1 7 2 2 1 7 2 3 1 7 2 4 1 7 2 5 1 7 2 6 1 7 2 7 1 7 2 8 1 7 2 9 1 7 2 10 1 7 2 11 1 7 2 12 1 7 2 13 1 7 2 14 1 7 2 15 1 7 3 0 1 7 3 1 1 7 3 2 1 7 3 3 1 7 3 4 1 7 3 5 1 7 3 6 1 7 3 7 1 7 3 8 1 7 3 9 1 7 3 10 1 7 3 11 1 7 3 12 1 7 3 13 1 7 3 14 1 7 3 15 1 7 4 0 1 7 4 1 1 7 4 2 1 7 4 3 1 7 4 4 1 7 4 5 1 7 4 6 1 7 4 7 1 7 4 8 1 7 4 9 1 7 4 10 1 7 4 11 1 7 4 12 1 7 4 13 1 7 4 14 1 7 4 15 1 7 5 0 1 7 5 1 1 7 5 2 1 7 5 3 1 7 5 4 1 7 5 5 1 7 5 6 1 7 5 7 1 7 5 8 1 7 5 9 1 7 5 10 1 7 5 11 1 7 5 12 1 7 5 13 1 7 5 14 1 7 5 15 1 7 6 0 1 7 6 1 1 7 6 2 1 7 6 3 1 7 6 4 1 7 6 5 1 7 6 6 1 7 6 7 1 7 6 8 1 7 6 9 1 7 6 10 1 7 6 11 1 7 6 12 1 7 6 13 1 7 6 14 1 7 6 15 1 7 7 0 1 7 7 1 1 7 7 2 1 7 7 3 1 7 7 4 1 7 7 5 1 7 7 6 1 7 7 7 1 7 7 8 1 7 7 9 1 7 7 10 1 7 7 11 1 7 7 12 1 7 7 13 1 7 7 14 1 7 7 15 1 7 0 0 2 7 0 1 2 7 0 2 2 7 0 3 2 7 0 4 2 7 0 5 2 7 0 6 2 7 0 7 2 7 0 8 2 7 0 9 2 7 0 10 2 7 0 11 2 7 0 12 2 7 0 13 2 7 0 14 2 7 0 15 2 7 1 0 2 7 1 1 2 7 1 2 2 7 1 3 2 7 1 4 2 7 1 5 2 7 1 6 2 7 1 7 2 7 1 8 2 7 1 9 2 7 1 10 2 7 1 11 2 7 1 12 2 7 1 13 2 7 1 14 2 7 1 15 2 7 2 0 2 7 2 1 2 7 2 2 2 7 2 3 2 7 2 4 2 7 2 5 2 7 2 6 2 7 2 7 2 7 2 8 2 7 2 9 2 7 2 10 2 7 2 11 2 7 2 12 2 7 2 13 2 7 2 14 2 7 2 15 2 7 3 0 2 7 3 1 2 7 3 2 2 7 3 3 2 7 3 4 2 7 3 5 2 7 3 6 2 7 3 7 2 7 3 8 2 7 3 9 2 7 3 10 2 7 3 11 2 7 3 12 2 7 3 13 2 7 3 14 2 7 3 15 2 7 4 0 2 7 4 1 2 7 4 2 2 7 4 3 2 7 4 4 2 7 4 5 2 7 4 6 2 7 4 7 2 7 4 8 2 7 4 9 2 7 4 10 2 7 4 11 2 7 4 12 2 7 4 13 2 7 4 14 2 7 4 15 2 7 5 0 2 7 5 1 2 7 5 2 2 7 5 3 2 7 5 4 2 7 5 5 2 7 5 6 2 7 5 7 2 7 5 8 2 7 5 9 2 7 5 10 2 7 5 11 2 7 5 12 2 7 5 13 2 7 5 14 2 7 5 15 2 7 6 0 2 7 6 1 2 7 6 2 2 7 6 3 2 7 6 4 2 7 6 5 2 7 6 6 2 7 6 7 2 7 6 8 2 7 6 9 2 7 6 10 2 7 6 11 2 7 6 12 2 7 6 13 2 7 6 14 2 7 6 15 2 7 7 0 2 7 7 1 2 7 7 2 2 7 7 3 2 7 7 4 2 7 7 5 2 7 7 6 2 7 7 7 2 7 7 8 2 7 7 9 2 7 7 10 2 7 7 11 2 7 7 12 2 7 7 13 2 7 7 14 2 7 7 15 2 7 0 0 3 7 0 1 3 7 0 2 3 7 0 3 3 7 0 4 3 7 0 5 3 7 0 6 3 7 0 7 3 7 0 8 3 7 0 9 3 7 0 10 3 7 0 11 3 7 0 12 3 7 0 13 3 7 0 14 3 7 0 15 3 7 1 0 3 7 1 1 3 7 1 2 3 7 1 3 3 7 1 4 3 7 1 5 3 7 1 6 3 7 1 7 3 7 1 8 3 7 1 9 3 7 1 10 3 7 1 11 3 7 1 12 3 7 1 13 3 7 1 14 3 7 1 15 3 7 2 0 3 7 2 1 3 7 2 2 3 7 2 3 3 7 2 4 3 7 2 5 3 7 2 6 3 7 2 7 3 7 2 8 3 7 2 9 3 7 2 10 3 7 2 11 3 7 2 12 3 7 2 13 3 7 2 14 3 7 2 15 3 7 3 0 3 7 3 1 3 7 3 2 3 7 3 3 3 7 3 4 3 7 3 5 3 7 3 6 3 7 3 7 3 7 3 8 3 7 3 9 3 7 3 10 3 7 3 11 3 7 3 12 3 7 3 13 3 7 3 14 3 7 3 15 3 7 4 0 3 7 4 1 3 7 4 2 3 7 4 3 3 7 4 4 3 7 4 5 3 7 4 6 3 7 4 7 3 7 4 8 3 7 4 9 3 7 4 10 3 7 4 11 3 7 4 12 3 7 4 13 3 7 4 14 3 7 4 15 3 7 5 0 3 7 5 1 3 7 5 2 3 7 5 3 3 7 5 4 3 7 5 5 3 7 5 6 3 7 5 7 3 7 5 8 3 7 5 9 3 7 5 10 3 7 5 11 3 7 5 12 3 7 5 13 3 7 5 14 3 7 5 15 3 7 6 0 3 7 6 1 3 7 6 2 3 7 6 3 3 7 6 4 3 7 6 5 3 7 6 6 3 7 6 7 3 7 6 8 3 7 6 9 3 7 6 10 3 7 6 11 3 7 6 12 3 7 6 13 3 7 6 14 3 7 6 15 3 7 7 0 3 7 7 1 3 7 7 2 3 7 7 3 3 7 7 4 3 7 7 5 3 7 7 6 3 7 7 7 3 7 7 8 3 7 7 9 3 7 7 10 3 7 7 11 3 7 7 12 3 7 7 13 3 7 7 14 3 7 7 15 3 gpaw-24.1.0/doc/devel/Au_cluster/BGMAP_band_8x8x8x4000066400000000000000000000400001454550013000214750ustar00rootroot000000000000000 0 0 0 0 0 1 0 0 0 2 0 0 0 3 0 0 0 4 0 0 0 5 0 0 0 6 0 0 0 7 0 0 1 0 0 0 1 1 0 0 1 2 0 0 1 3 0 0 1 4 0 0 1 5 0 0 1 6 0 0 1 7 0 0 2 0 0 0 2 1 0 0 2 2 0 0 2 3 0 0 2 4 0 0 2 5 0 0 2 6 0 0 2 7 0 0 3 0 0 0 3 1 0 0 3 2 0 0 3 3 0 0 3 4 0 0 3 5 0 0 3 6 0 0 3 7 0 0 4 0 0 0 4 1 0 0 4 2 0 0 4 3 0 0 4 4 0 0 4 5 0 0 4 6 0 0 4 7 0 0 5 0 0 0 5 1 0 0 5 2 0 0 5 3 0 0 5 4 0 0 5 5 0 0 5 6 0 0 5 7 0 0 6 0 0 0 6 1 0 0 6 2 0 0 6 3 0 0 6 4 0 0 6 5 0 0 6 6 0 0 6 7 0 0 7 0 0 0 7 1 0 0 7 2 0 0 7 3 0 0 7 4 0 0 7 5 0 0 7 6 0 0 7 7 0 1 0 0 0 1 0 1 0 1 0 2 0 1 0 3 0 1 0 4 0 1 0 5 0 1 0 6 0 1 0 7 0 1 1 0 0 1 1 1 0 1 1 2 0 1 1 3 0 1 1 4 0 1 1 5 0 1 1 6 0 1 1 7 0 1 2 0 0 1 2 1 0 1 2 2 0 1 2 3 0 1 2 4 0 1 2 5 0 1 2 6 0 1 2 7 0 1 3 0 0 1 3 1 0 1 3 2 0 1 3 3 0 1 3 4 0 1 3 5 0 1 3 6 0 1 3 7 0 1 4 0 0 1 4 1 0 1 4 2 0 1 4 3 0 1 4 4 0 1 4 5 0 1 4 6 0 1 4 7 0 1 5 0 0 1 5 1 0 1 5 2 0 1 5 3 0 1 5 4 0 1 5 5 0 1 5 6 0 1 5 7 0 1 6 0 0 1 6 1 0 1 6 2 0 1 6 3 0 1 6 4 0 1 6 5 0 1 6 6 0 1 6 7 0 1 7 0 0 1 7 1 0 1 7 2 0 1 7 3 0 1 7 4 0 1 7 5 0 1 7 6 0 1 7 7 0 2 0 0 0 2 0 1 0 2 0 2 0 2 0 3 0 2 0 4 0 2 0 5 0 2 0 6 0 2 0 7 0 2 1 0 0 2 1 1 0 2 1 2 0 2 1 3 0 2 1 4 0 2 1 5 0 2 1 6 0 2 1 7 0 2 2 0 0 2 2 1 0 2 2 2 0 2 2 3 0 2 2 4 0 2 2 5 0 2 2 6 0 2 2 7 0 2 3 0 0 2 3 1 0 2 3 2 0 2 3 3 0 2 3 4 0 2 3 5 0 2 3 6 0 2 3 7 0 2 4 0 0 2 4 1 0 2 4 2 0 2 4 3 0 2 4 4 0 2 4 5 0 2 4 6 0 2 4 7 0 2 5 0 0 2 5 1 0 2 5 2 0 2 5 3 0 2 5 4 0 2 5 5 0 2 5 6 0 2 5 7 0 2 6 0 0 2 6 1 0 2 6 2 0 2 6 3 0 2 6 4 0 2 6 5 0 2 6 6 0 2 6 7 0 2 7 0 0 2 7 1 0 2 7 2 0 2 7 3 0 2 7 4 0 2 7 5 0 2 7 6 0 2 7 7 0 3 0 0 0 3 0 1 0 3 0 2 0 3 0 3 0 3 0 4 0 3 0 5 0 3 0 6 0 3 0 7 0 3 1 0 0 3 1 1 0 3 1 2 0 3 1 3 0 3 1 4 0 3 1 5 0 3 1 6 0 3 1 7 0 3 2 0 0 3 2 1 0 3 2 2 0 3 2 3 0 3 2 4 0 3 2 5 0 3 2 6 0 3 2 7 0 3 3 0 0 3 3 1 0 3 3 2 0 3 3 3 0 3 3 4 0 3 3 5 0 3 3 6 0 3 3 7 0 3 4 0 0 3 4 1 0 3 4 2 0 3 4 3 0 3 4 4 0 3 4 5 0 3 4 6 0 3 4 7 0 3 5 0 0 3 5 1 0 3 5 2 0 3 5 3 0 3 5 4 0 3 5 5 0 3 5 6 0 3 5 7 0 3 6 0 0 3 6 1 0 3 6 2 0 3 6 3 0 3 6 4 0 3 6 5 0 3 6 6 0 3 6 7 0 3 7 0 0 3 7 1 0 3 7 2 0 3 7 3 0 3 7 4 0 3 7 5 0 3 7 6 0 3 7 7 0 4 0 0 0 4 0 1 0 4 0 2 0 4 0 3 0 4 0 4 0 4 0 5 0 4 0 6 0 4 0 7 0 4 1 0 0 4 1 1 0 4 1 2 0 4 1 3 0 4 1 4 0 4 1 5 0 4 1 6 0 4 1 7 0 4 2 0 0 4 2 1 0 4 2 2 0 4 2 3 0 4 2 4 0 4 2 5 0 4 2 6 0 4 2 7 0 4 3 0 0 4 3 1 0 4 3 2 0 4 3 3 0 4 3 4 0 4 3 5 0 4 3 6 0 4 3 7 0 4 4 0 0 4 4 1 0 4 4 2 0 4 4 3 0 4 4 4 0 4 4 5 0 4 4 6 0 4 4 7 0 4 5 0 0 4 5 1 0 4 5 2 0 4 5 3 0 4 5 4 0 4 5 5 0 4 5 6 0 4 5 7 0 4 6 0 0 4 6 1 0 4 6 2 0 4 6 3 0 4 6 4 0 4 6 5 0 4 6 6 0 4 6 7 0 4 7 0 0 4 7 1 0 4 7 2 0 4 7 3 0 4 7 4 0 4 7 5 0 4 7 6 0 4 7 7 0 5 0 0 0 5 0 1 0 5 0 2 0 5 0 3 0 5 0 4 0 5 0 5 0 5 0 6 0 5 0 7 0 5 1 0 0 5 1 1 0 5 1 2 0 5 1 3 0 5 1 4 0 5 1 5 0 5 1 6 0 5 1 7 0 5 2 0 0 5 2 1 0 5 2 2 0 5 2 3 0 5 2 4 0 5 2 5 0 5 2 6 0 5 2 7 0 5 3 0 0 5 3 1 0 5 3 2 0 5 3 3 0 5 3 4 0 5 3 5 0 5 3 6 0 5 3 7 0 5 4 0 0 5 4 1 0 5 4 2 0 5 4 3 0 5 4 4 0 5 4 5 0 5 4 6 0 5 4 7 0 5 5 0 0 5 5 1 0 5 5 2 0 5 5 3 0 5 5 4 0 5 5 5 0 5 5 6 0 5 5 7 0 5 6 0 0 5 6 1 0 5 6 2 0 5 6 3 0 5 6 4 0 5 6 5 0 5 6 6 0 5 6 7 0 5 7 0 0 5 7 1 0 5 7 2 0 5 7 3 0 5 7 4 0 5 7 5 0 5 7 6 0 5 7 7 0 6 0 0 0 6 0 1 0 6 0 2 0 6 0 3 0 6 0 4 0 6 0 5 0 6 0 6 0 6 0 7 0 6 1 0 0 6 1 1 0 6 1 2 0 6 1 3 0 6 1 4 0 6 1 5 0 6 1 6 0 6 1 7 0 6 2 0 0 6 2 1 0 6 2 2 0 6 2 3 0 6 2 4 0 6 2 5 0 6 2 6 0 6 2 7 0 6 3 0 0 6 3 1 0 6 3 2 0 6 3 3 0 6 3 4 0 6 3 5 0 6 3 6 0 6 3 7 0 6 4 0 0 6 4 1 0 6 4 2 0 6 4 3 0 6 4 4 0 6 4 5 0 6 4 6 0 6 4 7 0 6 5 0 0 6 5 1 0 6 5 2 0 6 5 3 0 6 5 4 0 6 5 5 0 6 5 6 0 6 5 7 0 6 6 0 0 6 6 1 0 6 6 2 0 6 6 3 0 6 6 4 0 6 6 5 0 6 6 6 0 6 6 7 0 6 7 0 0 6 7 1 0 6 7 2 0 6 7 3 0 6 7 4 0 6 7 5 0 6 7 6 0 6 7 7 0 7 0 0 0 7 0 1 0 7 0 2 0 7 0 3 0 7 0 4 0 7 0 5 0 7 0 6 0 7 0 7 0 7 1 0 0 7 1 1 0 7 1 2 0 7 1 3 0 7 1 4 0 7 1 5 0 7 1 6 0 7 1 7 0 7 2 0 0 7 2 1 0 7 2 2 0 7 2 3 0 7 2 4 0 7 2 5 0 7 2 6 0 7 2 7 0 7 3 0 0 7 3 1 0 7 3 2 0 7 3 3 0 7 3 4 0 7 3 5 0 7 3 6 0 7 3 7 0 7 4 0 0 7 4 1 0 7 4 2 0 7 4 3 0 7 4 4 0 7 4 5 0 7 4 6 0 7 4 7 0 7 5 0 0 7 5 1 0 7 5 2 0 7 5 3 0 7 5 4 0 7 5 5 0 7 5 6 0 7 5 7 0 7 6 0 0 7 6 1 0 7 6 2 0 7 6 3 0 7 6 4 0 7 6 5 0 7 6 6 0 7 6 7 0 7 7 0 0 7 7 1 0 7 7 2 0 7 7 3 0 7 7 4 0 7 7 5 0 7 7 6 0 7 7 7 0 0 0 0 1 0 0 1 1 0 0 2 1 0 0 3 1 0 0 4 1 0 0 5 1 0 0 6 1 0 0 7 1 0 1 0 1 0 1 1 1 0 1 2 1 0 1 3 1 0 1 4 1 0 1 5 1 0 1 6 1 0 1 7 1 0 2 0 1 0 2 1 1 0 2 2 1 0 2 3 1 0 2 4 1 0 2 5 1 0 2 6 1 0 2 7 1 0 3 0 1 0 3 1 1 0 3 2 1 0 3 3 1 0 3 4 1 0 3 5 1 0 3 6 1 0 3 7 1 0 4 0 1 0 4 1 1 0 4 2 1 0 4 3 1 0 4 4 1 0 4 5 1 0 4 6 1 0 4 7 1 0 5 0 1 0 5 1 1 0 5 2 1 0 5 3 1 0 5 4 1 0 5 5 1 0 5 6 1 0 5 7 1 0 6 0 1 0 6 1 1 0 6 2 1 0 6 3 1 0 6 4 1 0 6 5 1 0 6 6 1 0 6 7 1 0 7 0 1 0 7 1 1 0 7 2 1 0 7 3 1 0 7 4 1 0 7 5 1 0 7 6 1 0 7 7 1 1 0 0 1 1 0 1 1 1 0 2 1 1 0 3 1 1 0 4 1 1 0 5 1 1 0 6 1 1 0 7 1 1 1 0 1 1 1 1 1 1 1 2 1 1 1 3 1 1 1 4 1 1 1 5 1 1 1 6 1 1 1 7 1 1 2 0 1 1 2 1 1 1 2 2 1 1 2 3 1 1 2 4 1 1 2 5 1 1 2 6 1 1 2 7 1 1 3 0 1 1 3 1 1 1 3 2 1 1 3 3 1 1 3 4 1 1 3 5 1 1 3 6 1 1 3 7 1 1 4 0 1 1 4 1 1 1 4 2 1 1 4 3 1 1 4 4 1 1 4 5 1 1 4 6 1 1 4 7 1 1 5 0 1 1 5 1 1 1 5 2 1 1 5 3 1 1 5 4 1 1 5 5 1 1 5 6 1 1 5 7 1 1 6 0 1 1 6 1 1 1 6 2 1 1 6 3 1 1 6 4 1 1 6 5 1 1 6 6 1 1 6 7 1 1 7 0 1 1 7 1 1 1 7 2 1 1 7 3 1 1 7 4 1 1 7 5 1 1 7 6 1 1 7 7 1 2 0 0 1 2 0 1 1 2 0 2 1 2 0 3 1 2 0 4 1 2 0 5 1 2 0 6 1 2 0 7 1 2 1 0 1 2 1 1 1 2 1 2 1 2 1 3 1 2 1 4 1 2 1 5 1 2 1 6 1 2 1 7 1 2 2 0 1 2 2 1 1 2 2 2 1 2 2 3 1 2 2 4 1 2 2 5 1 2 2 6 1 2 2 7 1 2 3 0 1 2 3 1 1 2 3 2 1 2 3 3 1 2 3 4 1 2 3 5 1 2 3 6 1 2 3 7 1 2 4 0 1 2 4 1 1 2 4 2 1 2 4 3 1 2 4 4 1 2 4 5 1 2 4 6 1 2 4 7 1 2 5 0 1 2 5 1 1 2 5 2 1 2 5 3 1 2 5 4 1 2 5 5 1 2 5 6 1 2 5 7 1 2 6 0 1 2 6 1 1 2 6 2 1 2 6 3 1 2 6 4 1 2 6 5 1 2 6 6 1 2 6 7 1 2 7 0 1 2 7 1 1 2 7 2 1 2 7 3 1 2 7 4 1 2 7 5 1 2 7 6 1 2 7 7 1 3 0 0 1 3 0 1 1 3 0 2 1 3 0 3 1 3 0 4 1 3 0 5 1 3 0 6 1 3 0 7 1 3 1 0 1 3 1 1 1 3 1 2 1 3 1 3 1 3 1 4 1 3 1 5 1 3 1 6 1 3 1 7 1 3 2 0 1 3 2 1 1 3 2 2 1 3 2 3 1 3 2 4 1 3 2 5 1 3 2 6 1 3 2 7 1 3 3 0 1 3 3 1 1 3 3 2 1 3 3 3 1 3 3 4 1 3 3 5 1 3 3 6 1 3 3 7 1 3 4 0 1 3 4 1 1 3 4 2 1 3 4 3 1 3 4 4 1 3 4 5 1 3 4 6 1 3 4 7 1 3 5 0 1 3 5 1 1 3 5 2 1 3 5 3 1 3 5 4 1 3 5 5 1 3 5 6 1 3 5 7 1 3 6 0 1 3 6 1 1 3 6 2 1 3 6 3 1 3 6 4 1 3 6 5 1 3 6 6 1 3 6 7 1 3 7 0 1 3 7 1 1 3 7 2 1 3 7 3 1 3 7 4 1 3 7 5 1 3 7 6 1 3 7 7 1 4 0 0 1 4 0 1 1 4 0 2 1 4 0 3 1 4 0 4 1 4 0 5 1 4 0 6 1 4 0 7 1 4 1 0 1 4 1 1 1 4 1 2 1 4 1 3 1 4 1 4 1 4 1 5 1 4 1 6 1 4 1 7 1 4 2 0 1 4 2 1 1 4 2 2 1 4 2 3 1 4 2 4 1 4 2 5 1 4 2 6 1 4 2 7 1 4 3 0 1 4 3 1 1 4 3 2 1 4 3 3 1 4 3 4 1 4 3 5 1 4 3 6 1 4 3 7 1 4 4 0 1 4 4 1 1 4 4 2 1 4 4 3 1 4 4 4 1 4 4 5 1 4 4 6 1 4 4 7 1 4 5 0 1 4 5 1 1 4 5 2 1 4 5 3 1 4 5 4 1 4 5 5 1 4 5 6 1 4 5 7 1 4 6 0 1 4 6 1 1 4 6 2 1 4 6 3 1 4 6 4 1 4 6 5 1 4 6 6 1 4 6 7 1 4 7 0 1 4 7 1 1 4 7 2 1 4 7 3 1 4 7 4 1 4 7 5 1 4 7 6 1 4 7 7 1 5 0 0 1 5 0 1 1 5 0 2 1 5 0 3 1 5 0 4 1 5 0 5 1 5 0 6 1 5 0 7 1 5 1 0 1 5 1 1 1 5 1 2 1 5 1 3 1 5 1 4 1 5 1 5 1 5 1 6 1 5 1 7 1 5 2 0 1 5 2 1 1 5 2 2 1 5 2 3 1 5 2 4 1 5 2 5 1 5 2 6 1 5 2 7 1 5 3 0 1 5 3 1 1 5 3 2 1 5 3 3 1 5 3 4 1 5 3 5 1 5 3 6 1 5 3 7 1 5 4 0 1 5 4 1 1 5 4 2 1 5 4 3 1 5 4 4 1 5 4 5 1 5 4 6 1 5 4 7 1 5 5 0 1 5 5 1 1 5 5 2 1 5 5 3 1 5 5 4 1 5 5 5 1 5 5 6 1 5 5 7 1 5 6 0 1 5 6 1 1 5 6 2 1 5 6 3 1 5 6 4 1 5 6 5 1 5 6 6 1 5 6 7 1 5 7 0 1 5 7 1 1 5 7 2 1 5 7 3 1 5 7 4 1 5 7 5 1 5 7 6 1 5 7 7 1 6 0 0 1 6 0 1 1 6 0 2 1 6 0 3 1 6 0 4 1 6 0 5 1 6 0 6 1 6 0 7 1 6 1 0 1 6 1 1 1 6 1 2 1 6 1 3 1 6 1 4 1 6 1 5 1 6 1 6 1 6 1 7 1 6 2 0 1 6 2 1 1 6 2 2 1 6 2 3 1 6 2 4 1 6 2 5 1 6 2 6 1 6 2 7 1 6 3 0 1 6 3 1 1 6 3 2 1 6 3 3 1 6 3 4 1 6 3 5 1 6 3 6 1 6 3 7 1 6 4 0 1 6 4 1 1 6 4 2 1 6 4 3 1 6 4 4 1 6 4 5 1 6 4 6 1 6 4 7 1 6 5 0 1 6 5 1 1 6 5 2 1 6 5 3 1 6 5 4 1 6 5 5 1 6 5 6 1 6 5 7 1 6 6 0 1 6 6 1 1 6 6 2 1 6 6 3 1 6 6 4 1 6 6 5 1 6 6 6 1 6 6 7 1 6 7 0 1 6 7 1 1 6 7 2 1 6 7 3 1 6 7 4 1 6 7 5 1 6 7 6 1 6 7 7 1 7 0 0 1 7 0 1 1 7 0 2 1 7 0 3 1 7 0 4 1 7 0 5 1 7 0 6 1 7 0 7 1 7 1 0 1 7 1 1 1 7 1 2 1 7 1 3 1 7 1 4 1 7 1 5 1 7 1 6 1 7 1 7 1 7 2 0 1 7 2 1 1 7 2 2 1 7 2 3 1 7 2 4 1 7 2 5 1 7 2 6 1 7 2 7 1 7 3 0 1 7 3 1 1 7 3 2 1 7 3 3 1 7 3 4 1 7 3 5 1 7 3 6 1 7 3 7 1 7 4 0 1 7 4 1 1 7 4 2 1 7 4 3 1 7 4 4 1 7 4 5 1 7 4 6 1 7 4 7 1 7 5 0 1 7 5 1 1 7 5 2 1 7 5 3 1 7 5 4 1 7 5 5 1 7 5 6 1 7 5 7 1 7 6 0 1 7 6 1 1 7 6 2 1 7 6 3 1 7 6 4 1 7 6 5 1 7 6 6 1 7 6 7 1 7 7 0 1 7 7 1 1 7 7 2 1 7 7 3 1 7 7 4 1 7 7 5 1 7 7 6 1 7 7 7 1 0 0 0 2 0 0 1 2 0 0 2 2 0 0 3 2 0 0 4 2 0 0 5 2 0 0 6 2 0 0 7 2 0 1 0 2 0 1 1 2 0 1 2 2 0 1 3 2 0 1 4 2 0 1 5 2 0 1 6 2 0 1 7 2 0 2 0 2 0 2 1 2 0 2 2 2 0 2 3 2 0 2 4 2 0 2 5 2 0 2 6 2 0 2 7 2 0 3 0 2 0 3 1 2 0 3 2 2 0 3 3 2 0 3 4 2 0 3 5 2 0 3 6 2 0 3 7 2 0 4 0 2 0 4 1 2 0 4 2 2 0 4 3 2 0 4 4 2 0 4 5 2 0 4 6 2 0 4 7 2 0 5 0 2 0 5 1 2 0 5 2 2 0 5 3 2 0 5 4 2 0 5 5 2 0 5 6 2 0 5 7 2 0 6 0 2 0 6 1 2 0 6 2 2 0 6 3 2 0 6 4 2 0 6 5 2 0 6 6 2 0 6 7 2 0 7 0 2 0 7 1 2 0 7 2 2 0 7 3 2 0 7 4 2 0 7 5 2 0 7 6 2 0 7 7 2 1 0 0 2 1 0 1 2 1 0 2 2 1 0 3 2 1 0 4 2 1 0 5 2 1 0 6 2 1 0 7 2 1 1 0 2 1 1 1 2 1 1 2 2 1 1 3 2 1 1 4 2 1 1 5 2 1 1 6 2 1 1 7 2 1 2 0 2 1 2 1 2 1 2 2 2 1 2 3 2 1 2 4 2 1 2 5 2 1 2 6 2 1 2 7 2 1 3 0 2 1 3 1 2 1 3 2 2 1 3 3 2 1 3 4 2 1 3 5 2 1 3 6 2 1 3 7 2 1 4 0 2 1 4 1 2 1 4 2 2 1 4 3 2 1 4 4 2 1 4 5 2 1 4 6 2 1 4 7 2 1 5 0 2 1 5 1 2 1 5 2 2 1 5 3 2 1 5 4 2 1 5 5 2 1 5 6 2 1 5 7 2 1 6 0 2 1 6 1 2 1 6 2 2 1 6 3 2 1 6 4 2 1 6 5 2 1 6 6 2 1 6 7 2 1 7 0 2 1 7 1 2 1 7 2 2 1 7 3 2 1 7 4 2 1 7 5 2 1 7 6 2 1 7 7 2 2 0 0 2 2 0 1 2 2 0 2 2 2 0 3 2 2 0 4 2 2 0 5 2 2 0 6 2 2 0 7 2 2 1 0 2 2 1 1 2 2 1 2 2 2 1 3 2 2 1 4 2 2 1 5 2 2 1 6 2 2 1 7 2 2 2 0 2 2 2 1 2 2 2 2 2 2 2 3 2 2 2 4 2 2 2 5 2 2 2 6 2 2 2 7 2 2 3 0 2 2 3 1 2 2 3 2 2 2 3 3 2 2 3 4 2 2 3 5 2 2 3 6 2 2 3 7 2 2 4 0 2 2 4 1 2 2 4 2 2 2 4 3 2 2 4 4 2 2 4 5 2 2 4 6 2 2 4 7 2 2 5 0 2 2 5 1 2 2 5 2 2 2 5 3 2 2 5 4 2 2 5 5 2 2 5 6 2 2 5 7 2 2 6 0 2 2 6 1 2 2 6 2 2 2 6 3 2 2 6 4 2 2 6 5 2 2 6 6 2 2 6 7 2 2 7 0 2 2 7 1 2 2 7 2 2 2 7 3 2 2 7 4 2 2 7 5 2 2 7 6 2 2 7 7 2 3 0 0 2 3 0 1 2 3 0 2 2 3 0 3 2 3 0 4 2 3 0 5 2 3 0 6 2 3 0 7 2 3 1 0 2 3 1 1 2 3 1 2 2 3 1 3 2 3 1 4 2 3 1 5 2 3 1 6 2 3 1 7 2 3 2 0 2 3 2 1 2 3 2 2 2 3 2 3 2 3 2 4 2 3 2 5 2 3 2 6 2 3 2 7 2 3 3 0 2 3 3 1 2 3 3 2 2 3 3 3 2 3 3 4 2 3 3 5 2 3 3 6 2 3 3 7 2 3 4 0 2 3 4 1 2 3 4 2 2 3 4 3 2 3 4 4 2 3 4 5 2 3 4 6 2 3 4 7 2 3 5 0 2 3 5 1 2 3 5 2 2 3 5 3 2 3 5 4 2 3 5 5 2 3 5 6 2 3 5 7 2 3 6 0 2 3 6 1 2 3 6 2 2 3 6 3 2 3 6 4 2 3 6 5 2 3 6 6 2 3 6 7 2 3 7 0 2 3 7 1 2 3 7 2 2 3 7 3 2 3 7 4 2 3 7 5 2 3 7 6 2 3 7 7 2 4 0 0 2 4 0 1 2 4 0 2 2 4 0 3 2 4 0 4 2 4 0 5 2 4 0 6 2 4 0 7 2 4 1 0 2 4 1 1 2 4 1 2 2 4 1 3 2 4 1 4 2 4 1 5 2 4 1 6 2 4 1 7 2 4 2 0 2 4 2 1 2 4 2 2 2 4 2 3 2 4 2 4 2 4 2 5 2 4 2 6 2 4 2 7 2 4 3 0 2 4 3 1 2 4 3 2 2 4 3 3 2 4 3 4 2 4 3 5 2 4 3 6 2 4 3 7 2 4 4 0 2 4 4 1 2 4 4 2 2 4 4 3 2 4 4 4 2 4 4 5 2 4 4 6 2 4 4 7 2 4 5 0 2 4 5 1 2 4 5 2 2 4 5 3 2 4 5 4 2 4 5 5 2 4 5 6 2 4 5 7 2 4 6 0 2 4 6 1 2 4 6 2 2 4 6 3 2 4 6 4 2 4 6 5 2 4 6 6 2 4 6 7 2 4 7 0 2 4 7 1 2 4 7 2 2 4 7 3 2 4 7 4 2 4 7 5 2 4 7 6 2 4 7 7 2 5 0 0 2 5 0 1 2 5 0 2 2 5 0 3 2 5 0 4 2 5 0 5 2 5 0 6 2 5 0 7 2 5 1 0 2 5 1 1 2 5 1 2 2 5 1 3 2 5 1 4 2 5 1 5 2 5 1 6 2 5 1 7 2 5 2 0 2 5 2 1 2 5 2 2 2 5 2 3 2 5 2 4 2 5 2 5 2 5 2 6 2 5 2 7 2 5 3 0 2 5 3 1 2 5 3 2 2 5 3 3 2 5 3 4 2 5 3 5 2 5 3 6 2 5 3 7 2 5 4 0 2 5 4 1 2 5 4 2 2 5 4 3 2 5 4 4 2 5 4 5 2 5 4 6 2 5 4 7 2 5 5 0 2 5 5 1 2 5 5 2 2 5 5 3 2 5 5 4 2 5 5 5 2 5 5 6 2 5 5 7 2 5 6 0 2 5 6 1 2 5 6 2 2 5 6 3 2 5 6 4 2 5 6 5 2 5 6 6 2 5 6 7 2 5 7 0 2 5 7 1 2 5 7 2 2 5 7 3 2 5 7 4 2 5 7 5 2 5 7 6 2 5 7 7 2 6 0 0 2 6 0 1 2 6 0 2 2 6 0 3 2 6 0 4 2 6 0 5 2 6 0 6 2 6 0 7 2 6 1 0 2 6 1 1 2 6 1 2 2 6 1 3 2 6 1 4 2 6 1 5 2 6 1 6 2 6 1 7 2 6 2 0 2 6 2 1 2 6 2 2 2 6 2 3 2 6 2 4 2 6 2 5 2 6 2 6 2 6 2 7 2 6 3 0 2 6 3 1 2 6 3 2 2 6 3 3 2 6 3 4 2 6 3 5 2 6 3 6 2 6 3 7 2 6 4 0 2 6 4 1 2 6 4 2 2 6 4 3 2 6 4 4 2 6 4 5 2 6 4 6 2 6 4 7 2 6 5 0 2 6 5 1 2 6 5 2 2 6 5 3 2 6 5 4 2 6 5 5 2 6 5 6 2 6 5 7 2 6 6 0 2 6 6 1 2 6 6 2 2 6 6 3 2 6 6 4 2 6 6 5 2 6 6 6 2 6 6 7 2 6 7 0 2 6 7 1 2 6 7 2 2 6 7 3 2 6 7 4 2 6 7 5 2 6 7 6 2 6 7 7 2 7 0 0 2 7 0 1 2 7 0 2 2 7 0 3 2 7 0 4 2 7 0 5 2 7 0 6 2 7 0 7 2 7 1 0 2 7 1 1 2 7 1 2 2 7 1 3 2 7 1 4 2 7 1 5 2 7 1 6 2 7 1 7 2 7 2 0 2 7 2 1 2 7 2 2 2 7 2 3 2 7 2 4 2 7 2 5 2 7 2 6 2 7 2 7 2 7 3 0 2 7 3 1 2 7 3 2 2 7 3 3 2 7 3 4 2 7 3 5 2 7 3 6 2 7 3 7 2 7 4 0 2 7 4 1 2 7 4 2 2 7 4 3 2 7 4 4 2 7 4 5 2 7 4 6 2 7 4 7 2 7 5 0 2 7 5 1 2 7 5 2 2 7 5 3 2 7 5 4 2 7 5 5 2 7 5 6 2 7 5 7 2 7 6 0 2 7 6 1 2 7 6 2 2 7 6 3 2 7 6 4 2 7 6 5 2 7 6 6 2 7 6 7 2 7 7 0 2 7 7 1 2 7 7 2 2 7 7 3 2 7 7 4 2 7 7 5 2 7 7 6 2 7 7 7 2 0 0 0 3 0 0 1 3 0 0 2 3 0 0 3 3 0 0 4 3 0 0 5 3 0 0 6 3 0 0 7 3 0 1 0 3 0 1 1 3 0 1 2 3 0 1 3 3 0 1 4 3 0 1 5 3 0 1 6 3 0 1 7 3 0 2 0 3 0 2 1 3 0 2 2 3 0 2 3 3 0 2 4 3 0 2 5 3 0 2 6 3 0 2 7 3 0 3 0 3 0 3 1 3 0 3 2 3 0 3 3 3 0 3 4 3 0 3 5 3 0 3 6 3 0 3 7 3 0 4 0 3 0 4 1 3 0 4 2 3 0 4 3 3 0 4 4 3 0 4 5 3 0 4 6 3 0 4 7 3 0 5 0 3 0 5 1 3 0 5 2 3 0 5 3 3 0 5 4 3 0 5 5 3 0 5 6 3 0 5 7 3 0 6 0 3 0 6 1 3 0 6 2 3 0 6 3 3 0 6 4 3 0 6 5 3 0 6 6 3 0 6 7 3 0 7 0 3 0 7 1 3 0 7 2 3 0 7 3 3 0 7 4 3 0 7 5 3 0 7 6 3 0 7 7 3 1 0 0 3 1 0 1 3 1 0 2 3 1 0 3 3 1 0 4 3 1 0 5 3 1 0 6 3 1 0 7 3 1 1 0 3 1 1 1 3 1 1 2 3 1 1 3 3 1 1 4 3 1 1 5 3 1 1 6 3 1 1 7 3 1 2 0 3 1 2 1 3 1 2 2 3 1 2 3 3 1 2 4 3 1 2 5 3 1 2 6 3 1 2 7 3 1 3 0 3 1 3 1 3 1 3 2 3 1 3 3 3 1 3 4 3 1 3 5 3 1 3 6 3 1 3 7 3 1 4 0 3 1 4 1 3 1 4 2 3 1 4 3 3 1 4 4 3 1 4 5 3 1 4 6 3 1 4 7 3 1 5 0 3 1 5 1 3 1 5 2 3 1 5 3 3 1 5 4 3 1 5 5 3 1 5 6 3 1 5 7 3 1 6 0 3 1 6 1 3 1 6 2 3 1 6 3 3 1 6 4 3 1 6 5 3 1 6 6 3 1 6 7 3 1 7 0 3 1 7 1 3 1 7 2 3 1 7 3 3 1 7 4 3 1 7 5 3 1 7 6 3 1 7 7 3 2 0 0 3 2 0 1 3 2 0 2 3 2 0 3 3 2 0 4 3 2 0 5 3 2 0 6 3 2 0 7 3 2 1 0 3 2 1 1 3 2 1 2 3 2 1 3 3 2 1 4 3 2 1 5 3 2 1 6 3 2 1 7 3 2 2 0 3 2 2 1 3 2 2 2 3 2 2 3 3 2 2 4 3 2 2 5 3 2 2 6 3 2 2 7 3 2 3 0 3 2 3 1 3 2 3 2 3 2 3 3 3 2 3 4 3 2 3 5 3 2 3 6 3 2 3 7 3 2 4 0 3 2 4 1 3 2 4 2 3 2 4 3 3 2 4 4 3 2 4 5 3 2 4 6 3 2 4 7 3 2 5 0 3 2 5 1 3 2 5 2 3 2 5 3 3 2 5 4 3 2 5 5 3 2 5 6 3 2 5 7 3 2 6 0 3 2 6 1 3 2 6 2 3 2 6 3 3 2 6 4 3 2 6 5 3 2 6 6 3 2 6 7 3 2 7 0 3 2 7 1 3 2 7 2 3 2 7 3 3 2 7 4 3 2 7 5 3 2 7 6 3 2 7 7 3 3 0 0 3 3 0 1 3 3 0 2 3 3 0 3 3 3 0 4 3 3 0 5 3 3 0 6 3 3 0 7 3 3 1 0 3 3 1 1 3 3 1 2 3 3 1 3 3 3 1 4 3 3 1 5 3 3 1 6 3 3 1 7 3 3 2 0 3 3 2 1 3 3 2 2 3 3 2 3 3 3 2 4 3 3 2 5 3 3 2 6 3 3 2 7 3 3 3 0 3 3 3 1 3 3 3 2 3 3 3 3 3 3 3 4 3 3 3 5 3 3 3 6 3 3 3 7 3 3 4 0 3 3 4 1 3 3 4 2 3 3 4 3 3 3 4 4 3 3 4 5 3 3 4 6 3 3 4 7 3 3 5 0 3 3 5 1 3 3 5 2 3 3 5 3 3 3 5 4 3 3 5 5 3 3 5 6 3 3 5 7 3 3 6 0 3 3 6 1 3 3 6 2 3 3 6 3 3 3 6 4 3 3 6 5 3 3 6 6 3 3 6 7 3 3 7 0 3 3 7 1 3 3 7 2 3 3 7 3 3 3 7 4 3 3 7 5 3 3 7 6 3 3 7 7 3 4 0 0 3 4 0 1 3 4 0 2 3 4 0 3 3 4 0 4 3 4 0 5 3 4 0 6 3 4 0 7 3 4 1 0 3 4 1 1 3 4 1 2 3 4 1 3 3 4 1 4 3 4 1 5 3 4 1 6 3 4 1 7 3 4 2 0 3 4 2 1 3 4 2 2 3 4 2 3 3 4 2 4 3 4 2 5 3 4 2 6 3 4 2 7 3 4 3 0 3 4 3 1 3 4 3 2 3 4 3 3 3 4 3 4 3 4 3 5 3 4 3 6 3 4 3 7 3 4 4 0 3 4 4 1 3 4 4 2 3 4 4 3 3 4 4 4 3 4 4 5 3 4 4 6 3 4 4 7 3 4 5 0 3 4 5 1 3 4 5 2 3 4 5 3 3 4 5 4 3 4 5 5 3 4 5 6 3 4 5 7 3 4 6 0 3 4 6 1 3 4 6 2 3 4 6 3 3 4 6 4 3 4 6 5 3 4 6 6 3 4 6 7 3 4 7 0 3 4 7 1 3 4 7 2 3 4 7 3 3 4 7 4 3 4 7 5 3 4 7 6 3 4 7 7 3 5 0 0 3 5 0 1 3 5 0 2 3 5 0 3 3 5 0 4 3 5 0 5 3 5 0 6 3 5 0 7 3 5 1 0 3 5 1 1 3 5 1 2 3 5 1 3 3 5 1 4 3 5 1 5 3 5 1 6 3 5 1 7 3 5 2 0 3 5 2 1 3 5 2 2 3 5 2 3 3 5 2 4 3 5 2 5 3 5 2 6 3 5 2 7 3 5 3 0 3 5 3 1 3 5 3 2 3 5 3 3 3 5 3 4 3 5 3 5 3 5 3 6 3 5 3 7 3 5 4 0 3 5 4 1 3 5 4 2 3 5 4 3 3 5 4 4 3 5 4 5 3 5 4 6 3 5 4 7 3 5 5 0 3 5 5 1 3 5 5 2 3 5 5 3 3 5 5 4 3 5 5 5 3 5 5 6 3 5 5 7 3 5 6 0 3 5 6 1 3 5 6 2 3 5 6 3 3 5 6 4 3 5 6 5 3 5 6 6 3 5 6 7 3 5 7 0 3 5 7 1 3 5 7 2 3 5 7 3 3 5 7 4 3 5 7 5 3 5 7 6 3 5 7 7 3 6 0 0 3 6 0 1 3 6 0 2 3 6 0 3 3 6 0 4 3 6 0 5 3 6 0 6 3 6 0 7 3 6 1 0 3 6 1 1 3 6 1 2 3 6 1 3 3 6 1 4 3 6 1 5 3 6 1 6 3 6 1 7 3 6 2 0 3 6 2 1 3 6 2 2 3 6 2 3 3 6 2 4 3 6 2 5 3 6 2 6 3 6 2 7 3 6 3 0 3 6 3 1 3 6 3 2 3 6 3 3 3 6 3 4 3 6 3 5 3 6 3 6 3 6 3 7 3 6 4 0 3 6 4 1 3 6 4 2 3 6 4 3 3 6 4 4 3 6 4 5 3 6 4 6 3 6 4 7 3 6 5 0 3 6 5 1 3 6 5 2 3 6 5 3 3 6 5 4 3 6 5 5 3 6 5 6 3 6 5 7 3 6 6 0 3 6 6 1 3 6 6 2 3 6 6 3 3 6 6 4 3 6 6 5 3 6 6 6 3 6 6 7 3 6 7 0 3 6 7 1 3 6 7 2 3 6 7 3 3 6 7 4 3 6 7 5 3 6 7 6 3 6 7 7 3 7 0 0 3 7 0 1 3 7 0 2 3 7 0 3 3 7 0 4 3 7 0 5 3 7 0 6 3 7 0 7 3 7 1 0 3 7 1 1 3 7 1 2 3 7 1 3 3 7 1 4 3 7 1 5 3 7 1 6 3 7 1 7 3 7 2 0 3 7 2 1 3 7 2 2 3 7 2 3 3 7 2 4 3 7 2 5 3 7 2 6 3 7 2 7 3 7 3 0 3 7 3 1 3 7 3 2 3 7 3 3 3 7 3 4 3 7 3 5 3 7 3 6 3 7 3 7 3 7 4 0 3 7 4 1 3 7 4 2 3 7 4 3 3 7 4 4 3 7 4 5 3 7 4 6 3 7 4 7 3 7 5 0 3 7 5 1 3 7 5 2 3 7 5 3 3 7 5 4 3 7 5 5 3 7 5 6 3 7 5 7 3 7 6 0 3 7 6 1 3 7 6 2 3 7 6 3 3 7 6 4 3 7 6 5 3 7 6 6 3 7 6 7 3 7 7 0 3 7 7 1 3 7 7 2 3 7 7 3 3 7 7 4 3 7 7 5 3 7 7 6 3 7 7 7 3 gpaw-24.1.0/doc/devel/Au_cluster/BGMAP_domain_4x8x16x8000066400000000000000000001030001454550013000221170ustar00rootroot000000000000000 0 0 0 0 0 0 1 0 0 0 2 0 0 0 3 0 0 1 0 0 0 1 1 0 0 1 2 0 0 1 3 0 0 2 0 0 0 2 1 0 0 2 2 0 0 2 3 0 0 3 0 0 0 3 1 0 0 3 2 0 0 3 3 0 1 0 0 0 1 0 1 0 1 0 2 0 1 0 3 0 1 1 0 0 1 1 1 0 1 1 2 0 1 1 3 0 1 2 0 0 1 2 1 0 1 2 2 0 1 2 3 0 1 3 0 0 1 3 1 0 1 3 2 0 1 3 3 0 2 0 0 0 2 0 1 0 2 0 2 0 2 0 3 0 2 1 0 0 2 1 1 0 2 1 2 0 2 1 3 0 2 2 0 0 2 2 1 0 2 2 2 0 2 2 3 0 2 3 0 0 2 3 1 0 2 3 2 0 2 3 3 0 3 0 0 0 3 0 1 0 3 0 2 0 3 0 3 0 3 1 0 0 3 1 1 0 3 1 2 0 3 1 3 0 3 2 0 0 3 2 1 0 3 2 2 0 3 2 3 0 3 3 0 0 3 3 1 0 3 3 2 0 3 3 3 0 4 0 0 0 4 0 1 0 4 0 2 0 4 0 3 0 4 1 0 0 4 1 1 0 4 1 2 0 4 1 3 0 4 2 0 0 4 2 1 0 4 2 2 0 4 2 3 0 4 3 0 0 4 3 1 0 4 3 2 0 4 3 3 0 5 0 0 0 5 0 1 0 5 0 2 0 5 0 3 0 5 1 0 0 5 1 1 0 5 1 2 0 5 1 3 0 5 2 0 0 5 2 1 0 5 2 2 0 5 2 3 0 5 3 0 0 5 3 1 0 5 3 2 0 5 3 3 0 6 0 0 0 6 0 1 0 6 0 2 0 6 0 3 0 6 1 0 0 6 1 1 0 6 1 2 0 6 1 3 0 6 2 0 0 6 2 1 0 6 2 2 0 6 2 3 0 6 3 0 0 6 3 1 0 6 3 2 0 6 3 3 0 7 0 0 0 7 0 1 0 7 0 2 0 7 0 3 0 7 1 0 0 7 1 1 0 7 1 2 0 7 1 3 0 7 2 0 0 7 2 1 0 7 2 2 0 7 2 3 0 7 3 0 0 7 3 1 0 7 3 2 0 7 3 3 1 0 0 0 1 0 0 1 1 0 0 2 1 0 0 3 1 0 1 0 1 0 1 1 1 0 1 2 1 0 1 3 1 0 2 0 1 0 2 1 1 0 2 2 1 0 2 3 1 0 3 0 1 0 3 1 1 0 3 2 1 0 3 3 1 1 0 0 1 1 0 1 1 1 0 2 1 1 0 3 1 1 1 0 1 1 1 1 1 1 1 2 1 1 1 3 1 1 2 0 1 1 2 1 1 1 2 2 1 1 2 3 1 1 3 0 1 1 3 1 1 1 3 2 1 1 3 3 1 2 0 0 1 2 0 1 1 2 0 2 1 2 0 3 1 2 1 0 1 2 1 1 1 2 1 2 1 2 1 3 1 2 2 0 1 2 2 1 1 2 2 2 1 2 2 3 1 2 3 0 1 2 3 1 1 2 3 2 1 2 3 3 1 3 0 0 1 3 0 1 1 3 0 2 1 3 0 3 1 3 1 0 1 3 1 1 1 3 1 2 1 3 1 3 1 3 2 0 1 3 2 1 1 3 2 2 1 3 2 3 1 3 3 0 1 3 3 1 1 3 3 2 1 3 3 3 1 4 0 0 1 4 0 1 1 4 0 2 1 4 0 3 1 4 1 0 1 4 1 1 1 4 1 2 1 4 1 3 1 4 2 0 1 4 2 1 1 4 2 2 1 4 2 3 1 4 3 0 1 4 3 1 1 4 3 2 1 4 3 3 1 5 0 0 1 5 0 1 1 5 0 2 1 5 0 3 1 5 1 0 1 5 1 1 1 5 1 2 1 5 1 3 1 5 2 0 1 5 2 1 1 5 2 2 1 5 2 3 1 5 3 0 1 5 3 1 1 5 3 2 1 5 3 3 1 6 0 0 1 6 0 1 1 6 0 2 1 6 0 3 1 6 1 0 1 6 1 1 1 6 1 2 1 6 1 3 1 6 2 0 1 6 2 1 1 6 2 2 1 6 2 3 1 6 3 0 1 6 3 1 1 6 3 2 1 6 3 3 1 7 0 0 1 7 0 1 1 7 0 2 1 7 0 3 1 7 1 0 1 7 1 1 1 7 1 2 1 7 1 3 1 7 2 0 1 7 2 1 1 7 2 2 1 7 2 3 1 7 3 0 1 7 3 1 1 7 3 2 1 7 3 3 2 0 0 0 2 0 0 1 2 0 0 2 2 0 0 3 2 0 1 0 2 0 1 1 2 0 1 2 2 0 1 3 2 0 2 0 2 0 2 1 2 0 2 2 2 0 2 3 2 0 3 0 2 0 3 1 2 0 3 2 2 0 3 3 2 1 0 0 2 1 0 1 2 1 0 2 2 1 0 3 2 1 1 0 2 1 1 1 2 1 1 2 2 1 1 3 2 1 2 0 2 1 2 1 2 1 2 2 2 1 2 3 2 1 3 0 2 1 3 1 2 1 3 2 2 1 3 3 2 2 0 0 2 2 0 1 2 2 0 2 2 2 0 3 2 2 1 0 2 2 1 1 2 2 1 2 2 2 1 3 2 2 2 0 2 2 2 1 2 2 2 2 2 2 2 3 2 2 3 0 2 2 3 1 2 2 3 2 2 2 3 3 2 3 0 0 2 3 0 1 2 3 0 2 2 3 0 3 2 3 1 0 2 3 1 1 2 3 1 2 2 3 1 3 2 3 2 0 2 3 2 1 2 3 2 2 2 3 2 3 2 3 3 0 2 3 3 1 2 3 3 2 2 3 3 3 2 4 0 0 2 4 0 1 2 4 0 2 2 4 0 3 2 4 1 0 2 4 1 1 2 4 1 2 2 4 1 3 2 4 2 0 2 4 2 1 2 4 2 2 2 4 2 3 2 4 3 0 2 4 3 1 2 4 3 2 2 4 3 3 2 5 0 0 2 5 0 1 2 5 0 2 2 5 0 3 2 5 1 0 2 5 1 1 2 5 1 2 2 5 1 3 2 5 2 0 2 5 2 1 2 5 2 2 2 5 2 3 2 5 3 0 2 5 3 1 2 5 3 2 2 5 3 3 2 6 0 0 2 6 0 1 2 6 0 2 2 6 0 3 2 6 1 0 2 6 1 1 2 6 1 2 2 6 1 3 2 6 2 0 2 6 2 1 2 6 2 2 2 6 2 3 2 6 3 0 2 6 3 1 2 6 3 2 2 6 3 3 2 7 0 0 2 7 0 1 2 7 0 2 2 7 0 3 2 7 1 0 2 7 1 1 2 7 1 2 2 7 1 3 2 7 2 0 2 7 2 1 2 7 2 2 2 7 2 3 2 7 3 0 2 7 3 1 2 7 3 2 2 7 3 3 3 0 0 0 3 0 0 1 3 0 0 2 3 0 0 3 3 0 1 0 3 0 1 1 3 0 1 2 3 0 1 3 3 0 2 0 3 0 2 1 3 0 2 2 3 0 2 3 3 0 3 0 3 0 3 1 3 0 3 2 3 0 3 3 3 1 0 0 3 1 0 1 3 1 0 2 3 1 0 3 3 1 1 0 3 1 1 1 3 1 1 2 3 1 1 3 3 1 2 0 3 1 2 1 3 1 2 2 3 1 2 3 3 1 3 0 3 1 3 1 3 1 3 2 3 1 3 3 3 2 0 0 3 2 0 1 3 2 0 2 3 2 0 3 3 2 1 0 3 2 1 1 3 2 1 2 3 2 1 3 3 2 2 0 3 2 2 1 3 2 2 2 3 2 2 3 3 2 3 0 3 2 3 1 3 2 3 2 3 2 3 3 3 3 0 0 3 3 0 1 3 3 0 2 3 3 0 3 3 3 1 0 3 3 1 1 3 3 1 2 3 3 1 3 3 3 2 0 3 3 2 1 3 3 2 2 3 3 2 3 3 3 3 0 3 3 3 1 3 3 3 2 3 3 3 3 3 4 0 0 3 4 0 1 3 4 0 2 3 4 0 3 3 4 1 0 3 4 1 1 3 4 1 2 3 4 1 3 3 4 2 0 3 4 2 1 3 4 2 2 3 4 2 3 3 4 3 0 3 4 3 1 3 4 3 2 3 4 3 3 3 5 0 0 3 5 0 1 3 5 0 2 3 5 0 3 3 5 1 0 3 5 1 1 3 5 1 2 3 5 1 3 3 5 2 0 3 5 2 1 3 5 2 2 3 5 2 3 3 5 3 0 3 5 3 1 3 5 3 2 3 5 3 3 3 6 0 0 3 6 0 1 3 6 0 2 3 6 0 3 3 6 1 0 3 6 1 1 3 6 1 2 3 6 1 3 3 6 2 0 3 6 2 1 3 6 2 2 3 6 2 3 3 6 3 0 3 6 3 1 3 6 3 2 3 6 3 3 3 7 0 0 3 7 0 1 3 7 0 2 3 7 0 3 3 7 1 0 3 7 1 1 3 7 1 2 3 7 1 3 3 7 2 0 3 7 2 1 3 7 2 2 3 7 2 3 3 7 3 0 3 7 3 1 3 7 3 2 3 7 3 3 0 0 4 0 0 0 4 1 0 0 4 2 0 0 4 3 0 0 5 0 0 0 5 1 0 0 5 2 0 0 5 3 0 0 6 0 0 0 6 1 0 0 6 2 0 0 6 3 0 0 7 0 0 0 7 1 0 0 7 2 0 0 7 3 0 1 4 0 0 1 4 1 0 1 4 2 0 1 4 3 0 1 5 0 0 1 5 1 0 1 5 2 0 1 5 3 0 1 6 0 0 1 6 1 0 1 6 2 0 1 6 3 0 1 7 0 0 1 7 1 0 1 7 2 0 1 7 3 0 2 4 0 0 2 4 1 0 2 4 2 0 2 4 3 0 2 5 0 0 2 5 1 0 2 5 2 0 2 5 3 0 2 6 0 0 2 6 1 0 2 6 2 0 2 6 3 0 2 7 0 0 2 7 1 0 2 7 2 0 2 7 3 0 3 4 0 0 3 4 1 0 3 4 2 0 3 4 3 0 3 5 0 0 3 5 1 0 3 5 2 0 3 5 3 0 3 6 0 0 3 6 1 0 3 6 2 0 3 6 3 0 3 7 0 0 3 7 1 0 3 7 2 0 3 7 3 0 4 4 0 0 4 4 1 0 4 4 2 0 4 4 3 0 4 5 0 0 4 5 1 0 4 5 2 0 4 5 3 0 4 6 0 0 4 6 1 0 4 6 2 0 4 6 3 0 4 7 0 0 4 7 1 0 4 7 2 0 4 7 3 0 5 4 0 0 5 4 1 0 5 4 2 0 5 4 3 0 5 5 0 0 5 5 1 0 5 5 2 0 5 5 3 0 5 6 0 0 5 6 1 0 5 6 2 0 5 6 3 0 5 7 0 0 5 7 1 0 5 7 2 0 5 7 3 0 6 4 0 0 6 4 1 0 6 4 2 0 6 4 3 0 6 5 0 0 6 5 1 0 6 5 2 0 6 5 3 0 6 6 0 0 6 6 1 0 6 6 2 0 6 6 3 0 6 7 0 0 6 7 1 0 6 7 2 0 6 7 3 0 7 4 0 0 7 4 1 0 7 4 2 0 7 4 3 0 7 5 0 0 7 5 1 0 7 5 2 0 7 5 3 0 7 6 0 0 7 6 1 0 7 6 2 0 7 6 3 0 7 7 0 0 7 7 1 0 7 7 2 0 7 7 3 1 0 4 0 1 0 4 1 1 0 4 2 1 0 4 3 1 0 5 0 1 0 5 1 1 0 5 2 1 0 5 3 1 0 6 0 1 0 6 1 1 0 6 2 1 0 6 3 1 0 7 0 1 0 7 1 1 0 7 2 1 0 7 3 1 1 4 0 1 1 4 1 1 1 4 2 1 1 4 3 1 1 5 0 1 1 5 1 1 1 5 2 1 1 5 3 1 1 6 0 1 1 6 1 1 1 6 2 1 1 6 3 1 1 7 0 1 1 7 1 1 1 7 2 1 1 7 3 1 2 4 0 1 2 4 1 1 2 4 2 1 2 4 3 1 2 5 0 1 2 5 1 1 2 5 2 1 2 5 3 1 2 6 0 1 2 6 1 1 2 6 2 1 2 6 3 1 2 7 0 1 2 7 1 1 2 7 2 1 2 7 3 1 3 4 0 1 3 4 1 1 3 4 2 1 3 4 3 1 3 5 0 1 3 5 1 1 3 5 2 1 3 5 3 1 3 6 0 1 3 6 1 1 3 6 2 1 3 6 3 1 3 7 0 1 3 7 1 1 3 7 2 1 3 7 3 1 4 4 0 1 4 4 1 1 4 4 2 1 4 4 3 1 4 5 0 1 4 5 1 1 4 5 2 1 4 5 3 1 4 6 0 1 4 6 1 1 4 6 2 1 4 6 3 1 4 7 0 1 4 7 1 1 4 7 2 1 4 7 3 1 5 4 0 1 5 4 1 1 5 4 2 1 5 4 3 1 5 5 0 1 5 5 1 1 5 5 2 1 5 5 3 1 5 6 0 1 5 6 1 1 5 6 2 1 5 6 3 1 5 7 0 1 5 7 1 1 5 7 2 1 5 7 3 1 6 4 0 1 6 4 1 1 6 4 2 1 6 4 3 1 6 5 0 1 6 5 1 1 6 5 2 1 6 5 3 1 6 6 0 1 6 6 1 1 6 6 2 1 6 6 3 1 6 7 0 1 6 7 1 1 6 7 2 1 6 7 3 1 7 4 0 1 7 4 1 1 7 4 2 1 7 4 3 1 7 5 0 1 7 5 1 1 7 5 2 1 7 5 3 1 7 6 0 1 7 6 1 1 7 6 2 1 7 6 3 1 7 7 0 1 7 7 1 1 7 7 2 1 7 7 3 2 0 4 0 2 0 4 1 2 0 4 2 2 0 4 3 2 0 5 0 2 0 5 1 2 0 5 2 2 0 5 3 2 0 6 0 2 0 6 1 2 0 6 2 2 0 6 3 2 0 7 0 2 0 7 1 2 0 7 2 2 0 7 3 2 1 4 0 2 1 4 1 2 1 4 2 2 1 4 3 2 1 5 0 2 1 5 1 2 1 5 2 2 1 5 3 2 1 6 0 2 1 6 1 2 1 6 2 2 1 6 3 2 1 7 0 2 1 7 1 2 1 7 2 2 1 7 3 2 2 4 0 2 2 4 1 2 2 4 2 2 2 4 3 2 2 5 0 2 2 5 1 2 2 5 2 2 2 5 3 2 2 6 0 2 2 6 1 2 2 6 2 2 2 6 3 2 2 7 0 2 2 7 1 2 2 7 2 2 2 7 3 2 3 4 0 2 3 4 1 2 3 4 2 2 3 4 3 2 3 5 0 2 3 5 1 2 3 5 2 2 3 5 3 2 3 6 0 2 3 6 1 2 3 6 2 2 3 6 3 2 3 7 0 2 3 7 1 2 3 7 2 2 3 7 3 2 4 4 0 2 4 4 1 2 4 4 2 2 4 4 3 2 4 5 0 2 4 5 1 2 4 5 2 2 4 5 3 2 4 6 0 2 4 6 1 2 4 6 2 2 4 6 3 2 4 7 0 2 4 7 1 2 4 7 2 2 4 7 3 2 5 4 0 2 5 4 1 2 5 4 2 2 5 4 3 2 5 5 0 2 5 5 1 2 5 5 2 2 5 5 3 2 5 6 0 2 5 6 1 2 5 6 2 2 5 6 3 2 5 7 0 2 5 7 1 2 5 7 2 2 5 7 3 2 6 4 0 2 6 4 1 2 6 4 2 2 6 4 3 2 6 5 0 2 6 5 1 2 6 5 2 2 6 5 3 2 6 6 0 2 6 6 1 2 6 6 2 2 6 6 3 2 6 7 0 2 6 7 1 2 6 7 2 2 6 7 3 2 7 4 0 2 7 4 1 2 7 4 2 2 7 4 3 2 7 5 0 2 7 5 1 2 7 5 2 2 7 5 3 2 7 6 0 2 7 6 1 2 7 6 2 2 7 6 3 2 7 7 0 2 7 7 1 2 7 7 2 2 7 7 3 3 0 4 0 3 0 4 1 3 0 4 2 3 0 4 3 3 0 5 0 3 0 5 1 3 0 5 2 3 0 5 3 3 0 6 0 3 0 6 1 3 0 6 2 3 0 6 3 3 0 7 0 3 0 7 1 3 0 7 2 3 0 7 3 3 1 4 0 3 1 4 1 3 1 4 2 3 1 4 3 3 1 5 0 3 1 5 1 3 1 5 2 3 1 5 3 3 1 6 0 3 1 6 1 3 1 6 2 3 1 6 3 3 1 7 0 3 1 7 1 3 1 7 2 3 1 7 3 3 2 4 0 3 2 4 1 3 2 4 2 3 2 4 3 3 2 5 0 3 2 5 1 3 2 5 2 3 2 5 3 3 2 6 0 3 2 6 1 3 2 6 2 3 2 6 3 3 2 7 0 3 2 7 1 3 2 7 2 3 2 7 3 3 3 4 0 3 3 4 1 3 3 4 2 3 3 4 3 3 3 5 0 3 3 5 1 3 3 5 2 3 3 5 3 3 3 6 0 3 3 6 1 3 3 6 2 3 3 6 3 3 3 7 0 3 3 7 1 3 3 7 2 3 3 7 3 3 4 4 0 3 4 4 1 3 4 4 2 3 4 4 3 3 4 5 0 3 4 5 1 3 4 5 2 3 4 5 3 3 4 6 0 3 4 6 1 3 4 6 2 3 4 6 3 3 4 7 0 3 4 7 1 3 4 7 2 3 4 7 3 3 5 4 0 3 5 4 1 3 5 4 2 3 5 4 3 3 5 5 0 3 5 5 1 3 5 5 2 3 5 5 3 3 5 6 0 3 5 6 1 3 5 6 2 3 5 6 3 3 5 7 0 3 5 7 1 3 5 7 2 3 5 7 3 3 6 4 0 3 6 4 1 3 6 4 2 3 6 4 3 3 6 5 0 3 6 5 1 3 6 5 2 3 6 5 3 3 6 6 0 3 6 6 1 3 6 6 2 3 6 6 3 3 6 7 0 3 6 7 1 3 6 7 2 3 6 7 3 3 7 4 0 3 7 4 1 3 7 4 2 3 7 4 3 3 7 5 0 3 7 5 1 3 7 5 2 3 7 5 3 3 7 6 0 3 7 6 1 3 7 6 2 3 7 6 3 3 7 7 0 3 7 7 1 3 7 7 2 3 7 7 3 0 0 8 0 0 0 8 1 0 0 8 2 0 0 8 3 0 0 9 0 0 0 9 1 0 0 9 2 0 0 9 3 0 0 10 0 0 0 10 1 0 0 10 2 0 0 10 3 0 0 11 0 0 0 11 1 0 0 11 2 0 0 11 3 0 1 8 0 0 1 8 1 0 1 8 2 0 1 8 3 0 1 9 0 0 1 9 1 0 1 9 2 0 1 9 3 0 1 10 0 0 1 10 1 0 1 10 2 0 1 10 3 0 1 11 0 0 1 11 1 0 1 11 2 0 1 11 3 0 2 8 0 0 2 8 1 0 2 8 2 0 2 8 3 0 2 9 0 0 2 9 1 0 2 9 2 0 2 9 3 0 2 10 0 0 2 10 1 0 2 10 2 0 2 10 3 0 2 11 0 0 2 11 1 0 2 11 2 0 2 11 3 0 3 8 0 0 3 8 1 0 3 8 2 0 3 8 3 0 3 9 0 0 3 9 1 0 3 9 2 0 3 9 3 0 3 10 0 0 3 10 1 0 3 10 2 0 3 10 3 0 3 11 0 0 3 11 1 0 3 11 2 0 3 11 3 0 4 8 0 0 4 8 1 0 4 8 2 0 4 8 3 0 4 9 0 0 4 9 1 0 4 9 2 0 4 9 3 0 4 10 0 0 4 10 1 0 4 10 2 0 4 10 3 0 4 11 0 0 4 11 1 0 4 11 2 0 4 11 3 0 5 8 0 0 5 8 1 0 5 8 2 0 5 8 3 0 5 9 0 0 5 9 1 0 5 9 2 0 5 9 3 0 5 10 0 0 5 10 1 0 5 10 2 0 5 10 3 0 5 11 0 0 5 11 1 0 5 11 2 0 5 11 3 0 6 8 0 0 6 8 1 0 6 8 2 0 6 8 3 0 6 9 0 0 6 9 1 0 6 9 2 0 6 9 3 0 6 10 0 0 6 10 1 0 6 10 2 0 6 10 3 0 6 11 0 0 6 11 1 0 6 11 2 0 6 11 3 0 7 8 0 0 7 8 1 0 7 8 2 0 7 8 3 0 7 9 0 0 7 9 1 0 7 9 2 0 7 9 3 0 7 10 0 0 7 10 1 0 7 10 2 0 7 10 3 0 7 11 0 0 7 11 1 0 7 11 2 0 7 11 3 1 0 8 0 1 0 8 1 1 0 8 2 1 0 8 3 1 0 9 0 1 0 9 1 1 0 9 2 1 0 9 3 1 0 10 0 1 0 10 1 1 0 10 2 1 0 10 3 1 0 11 0 1 0 11 1 1 0 11 2 1 0 11 3 1 1 8 0 1 1 8 1 1 1 8 2 1 1 8 3 1 1 9 0 1 1 9 1 1 1 9 2 1 1 9 3 1 1 10 0 1 1 10 1 1 1 10 2 1 1 10 3 1 1 11 0 1 1 11 1 1 1 11 2 1 1 11 3 1 2 8 0 1 2 8 1 1 2 8 2 1 2 8 3 1 2 9 0 1 2 9 1 1 2 9 2 1 2 9 3 1 2 10 0 1 2 10 1 1 2 10 2 1 2 10 3 1 2 11 0 1 2 11 1 1 2 11 2 1 2 11 3 1 3 8 0 1 3 8 1 1 3 8 2 1 3 8 3 1 3 9 0 1 3 9 1 1 3 9 2 1 3 9 3 1 3 10 0 1 3 10 1 1 3 10 2 1 3 10 3 1 3 11 0 1 3 11 1 1 3 11 2 1 3 11 3 1 4 8 0 1 4 8 1 1 4 8 2 1 4 8 3 1 4 9 0 1 4 9 1 1 4 9 2 1 4 9 3 1 4 10 0 1 4 10 1 1 4 10 2 1 4 10 3 1 4 11 0 1 4 11 1 1 4 11 2 1 4 11 3 1 5 8 0 1 5 8 1 1 5 8 2 1 5 8 3 1 5 9 0 1 5 9 1 1 5 9 2 1 5 9 3 1 5 10 0 1 5 10 1 1 5 10 2 1 5 10 3 1 5 11 0 1 5 11 1 1 5 11 2 1 5 11 3 1 6 8 0 1 6 8 1 1 6 8 2 1 6 8 3 1 6 9 0 1 6 9 1 1 6 9 2 1 6 9 3 1 6 10 0 1 6 10 1 1 6 10 2 1 6 10 3 1 6 11 0 1 6 11 1 1 6 11 2 1 6 11 3 1 7 8 0 1 7 8 1 1 7 8 2 1 7 8 3 1 7 9 0 1 7 9 1 1 7 9 2 1 7 9 3 1 7 10 0 1 7 10 1 1 7 10 2 1 7 10 3 1 7 11 0 1 7 11 1 1 7 11 2 1 7 11 3 2 0 8 0 2 0 8 1 2 0 8 2 2 0 8 3 2 0 9 0 2 0 9 1 2 0 9 2 2 0 9 3 2 0 10 0 2 0 10 1 2 0 10 2 2 0 10 3 2 0 11 0 2 0 11 1 2 0 11 2 2 0 11 3 2 1 8 0 2 1 8 1 2 1 8 2 2 1 8 3 2 1 9 0 2 1 9 1 2 1 9 2 2 1 9 3 2 1 10 0 2 1 10 1 2 1 10 2 2 1 10 3 2 1 11 0 2 1 11 1 2 1 11 2 2 1 11 3 2 2 8 0 2 2 8 1 2 2 8 2 2 2 8 3 2 2 9 0 2 2 9 1 2 2 9 2 2 2 9 3 2 2 10 0 2 2 10 1 2 2 10 2 2 2 10 3 2 2 11 0 2 2 11 1 2 2 11 2 2 2 11 3 2 3 8 0 2 3 8 1 2 3 8 2 2 3 8 3 2 3 9 0 2 3 9 1 2 3 9 2 2 3 9 3 2 3 10 0 2 3 10 1 2 3 10 2 2 3 10 3 2 3 11 0 2 3 11 1 2 3 11 2 2 3 11 3 2 4 8 0 2 4 8 1 2 4 8 2 2 4 8 3 2 4 9 0 2 4 9 1 2 4 9 2 2 4 9 3 2 4 10 0 2 4 10 1 2 4 10 2 2 4 10 3 2 4 11 0 2 4 11 1 2 4 11 2 2 4 11 3 2 5 8 0 2 5 8 1 2 5 8 2 2 5 8 3 2 5 9 0 2 5 9 1 2 5 9 2 2 5 9 3 2 5 10 0 2 5 10 1 2 5 10 2 2 5 10 3 2 5 11 0 2 5 11 1 2 5 11 2 2 5 11 3 2 6 8 0 2 6 8 1 2 6 8 2 2 6 8 3 2 6 9 0 2 6 9 1 2 6 9 2 2 6 9 3 2 6 10 0 2 6 10 1 2 6 10 2 2 6 10 3 2 6 11 0 2 6 11 1 2 6 11 2 2 6 11 3 2 7 8 0 2 7 8 1 2 7 8 2 2 7 8 3 2 7 9 0 2 7 9 1 2 7 9 2 2 7 9 3 2 7 10 0 2 7 10 1 2 7 10 2 2 7 10 3 2 7 11 0 2 7 11 1 2 7 11 2 2 7 11 3 3 0 8 0 3 0 8 1 3 0 8 2 3 0 8 3 3 0 9 0 3 0 9 1 3 0 9 2 3 0 9 3 3 0 10 0 3 0 10 1 3 0 10 2 3 0 10 3 3 0 11 0 3 0 11 1 3 0 11 2 3 0 11 3 3 1 8 0 3 1 8 1 3 1 8 2 3 1 8 3 3 1 9 0 3 1 9 1 3 1 9 2 3 1 9 3 3 1 10 0 3 1 10 1 3 1 10 2 3 1 10 3 3 1 11 0 3 1 11 1 3 1 11 2 3 1 11 3 3 2 8 0 3 2 8 1 3 2 8 2 3 2 8 3 3 2 9 0 3 2 9 1 3 2 9 2 3 2 9 3 3 2 10 0 3 2 10 1 3 2 10 2 3 2 10 3 3 2 11 0 3 2 11 1 3 2 11 2 3 2 11 3 3 3 8 0 3 3 8 1 3 3 8 2 3 3 8 3 3 3 9 0 3 3 9 1 3 3 9 2 3 3 9 3 3 3 10 0 3 3 10 1 3 3 10 2 3 3 10 3 3 3 11 0 3 3 11 1 3 3 11 2 3 3 11 3 3 4 8 0 3 4 8 1 3 4 8 2 3 4 8 3 3 4 9 0 3 4 9 1 3 4 9 2 3 4 9 3 3 4 10 0 3 4 10 1 3 4 10 2 3 4 10 3 3 4 11 0 3 4 11 1 3 4 11 2 3 4 11 3 3 5 8 0 3 5 8 1 3 5 8 2 3 5 8 3 3 5 9 0 3 5 9 1 3 5 9 2 3 5 9 3 3 5 10 0 3 5 10 1 3 5 10 2 3 5 10 3 3 5 11 0 3 5 11 1 3 5 11 2 3 5 11 3 3 6 8 0 3 6 8 1 3 6 8 2 3 6 8 3 3 6 9 0 3 6 9 1 3 6 9 2 3 6 9 3 3 6 10 0 3 6 10 1 3 6 10 2 3 6 10 3 3 6 11 0 3 6 11 1 3 6 11 2 3 6 11 3 3 7 8 0 3 7 8 1 3 7 8 2 3 7 8 3 3 7 9 0 3 7 9 1 3 7 9 2 3 7 9 3 3 7 10 0 3 7 10 1 3 7 10 2 3 7 10 3 3 7 11 0 3 7 11 1 3 7 11 2 3 7 11 3 0 0 12 0 0 0 12 1 0 0 12 2 0 0 12 3 0 0 13 0 0 0 13 1 0 0 13 2 0 0 13 3 0 0 14 0 0 0 14 1 0 0 14 2 0 0 14 3 0 0 15 0 0 0 15 1 0 0 15 2 0 0 15 3 0 1 12 0 0 1 12 1 0 1 12 2 0 1 12 3 0 1 13 0 0 1 13 1 0 1 13 2 0 1 13 3 0 1 14 0 0 1 14 1 0 1 14 2 0 1 14 3 0 1 15 0 0 1 15 1 0 1 15 2 0 1 15 3 0 2 12 0 0 2 12 1 0 2 12 2 0 2 12 3 0 2 13 0 0 2 13 1 0 2 13 2 0 2 13 3 0 2 14 0 0 2 14 1 0 2 14 2 0 2 14 3 0 2 15 0 0 2 15 1 0 2 15 2 0 2 15 3 0 3 12 0 0 3 12 1 0 3 12 2 0 3 12 3 0 3 13 0 0 3 13 1 0 3 13 2 0 3 13 3 0 3 14 0 0 3 14 1 0 3 14 2 0 3 14 3 0 3 15 0 0 3 15 1 0 3 15 2 0 3 15 3 0 4 12 0 0 4 12 1 0 4 12 2 0 4 12 3 0 4 13 0 0 4 13 1 0 4 13 2 0 4 13 3 0 4 14 0 0 4 14 1 0 4 14 2 0 4 14 3 0 4 15 0 0 4 15 1 0 4 15 2 0 4 15 3 0 5 12 0 0 5 12 1 0 5 12 2 0 5 12 3 0 5 13 0 0 5 13 1 0 5 13 2 0 5 13 3 0 5 14 0 0 5 14 1 0 5 14 2 0 5 14 3 0 5 15 0 0 5 15 1 0 5 15 2 0 5 15 3 0 6 12 0 0 6 12 1 0 6 12 2 0 6 12 3 0 6 13 0 0 6 13 1 0 6 13 2 0 6 13 3 0 6 14 0 0 6 14 1 0 6 14 2 0 6 14 3 0 6 15 0 0 6 15 1 0 6 15 2 0 6 15 3 0 7 12 0 0 7 12 1 0 7 12 2 0 7 12 3 0 7 13 0 0 7 13 1 0 7 13 2 0 7 13 3 0 7 14 0 0 7 14 1 0 7 14 2 0 7 14 3 0 7 15 0 0 7 15 1 0 7 15 2 0 7 15 3 1 0 12 0 1 0 12 1 1 0 12 2 1 0 12 3 1 0 13 0 1 0 13 1 1 0 13 2 1 0 13 3 1 0 14 0 1 0 14 1 1 0 14 2 1 0 14 3 1 0 15 0 1 0 15 1 1 0 15 2 1 0 15 3 1 1 12 0 1 1 12 1 1 1 12 2 1 1 12 3 1 1 13 0 1 1 13 1 1 1 13 2 1 1 13 3 1 1 14 0 1 1 14 1 1 1 14 2 1 1 14 3 1 1 15 0 1 1 15 1 1 1 15 2 1 1 15 3 1 2 12 0 1 2 12 1 1 2 12 2 1 2 12 3 1 2 13 0 1 2 13 1 1 2 13 2 1 2 13 3 1 2 14 0 1 2 14 1 1 2 14 2 1 2 14 3 1 2 15 0 1 2 15 1 1 2 15 2 1 2 15 3 1 3 12 0 1 3 12 1 1 3 12 2 1 3 12 3 1 3 13 0 1 3 13 1 1 3 13 2 1 3 13 3 1 3 14 0 1 3 14 1 1 3 14 2 1 3 14 3 1 3 15 0 1 3 15 1 1 3 15 2 1 3 15 3 1 4 12 0 1 4 12 1 1 4 12 2 1 4 12 3 1 4 13 0 1 4 13 1 1 4 13 2 1 4 13 3 1 4 14 0 1 4 14 1 1 4 14 2 1 4 14 3 1 4 15 0 1 4 15 1 1 4 15 2 1 4 15 3 1 5 12 0 1 5 12 1 1 5 12 2 1 5 12 3 1 5 13 0 1 5 13 1 1 5 13 2 1 5 13 3 1 5 14 0 1 5 14 1 1 5 14 2 1 5 14 3 1 5 15 0 1 5 15 1 1 5 15 2 1 5 15 3 1 6 12 0 1 6 12 1 1 6 12 2 1 6 12 3 1 6 13 0 1 6 13 1 1 6 13 2 1 6 13 3 1 6 14 0 1 6 14 1 1 6 14 2 1 6 14 3 1 6 15 0 1 6 15 1 1 6 15 2 1 6 15 3 1 7 12 0 1 7 12 1 1 7 12 2 1 7 12 3 1 7 13 0 1 7 13 1 1 7 13 2 1 7 13 3 1 7 14 0 1 7 14 1 1 7 14 2 1 7 14 3 1 7 15 0 1 7 15 1 1 7 15 2 1 7 15 3 2 0 12 0 2 0 12 1 2 0 12 2 2 0 12 3 2 0 13 0 2 0 13 1 2 0 13 2 2 0 13 3 2 0 14 0 2 0 14 1 2 0 14 2 2 0 14 3 2 0 15 0 2 0 15 1 2 0 15 2 2 0 15 3 2 1 12 0 2 1 12 1 2 1 12 2 2 1 12 3 2 1 13 0 2 1 13 1 2 1 13 2 2 1 13 3 2 1 14 0 2 1 14 1 2 1 14 2 2 1 14 3 2 1 15 0 2 1 15 1 2 1 15 2 2 1 15 3 2 2 12 0 2 2 12 1 2 2 12 2 2 2 12 3 2 2 13 0 2 2 13 1 2 2 13 2 2 2 13 3 2 2 14 0 2 2 14 1 2 2 14 2 2 2 14 3 2 2 15 0 2 2 15 1 2 2 15 2 2 2 15 3 2 3 12 0 2 3 12 1 2 3 12 2 2 3 12 3 2 3 13 0 2 3 13 1 2 3 13 2 2 3 13 3 2 3 14 0 2 3 14 1 2 3 14 2 2 3 14 3 2 3 15 0 2 3 15 1 2 3 15 2 2 3 15 3 2 4 12 0 2 4 12 1 2 4 12 2 2 4 12 3 2 4 13 0 2 4 13 1 2 4 13 2 2 4 13 3 2 4 14 0 2 4 14 1 2 4 14 2 2 4 14 3 2 4 15 0 2 4 15 1 2 4 15 2 2 4 15 3 2 5 12 0 2 5 12 1 2 5 12 2 2 5 12 3 2 5 13 0 2 5 13 1 2 5 13 2 2 5 13 3 2 5 14 0 2 5 14 1 2 5 14 2 2 5 14 3 2 5 15 0 2 5 15 1 2 5 15 2 2 5 15 3 2 6 12 0 2 6 12 1 2 6 12 2 2 6 12 3 2 6 13 0 2 6 13 1 2 6 13 2 2 6 13 3 2 6 14 0 2 6 14 1 2 6 14 2 2 6 14 3 2 6 15 0 2 6 15 1 2 6 15 2 2 6 15 3 2 7 12 0 2 7 12 1 2 7 12 2 2 7 12 3 2 7 13 0 2 7 13 1 2 7 13 2 2 7 13 3 2 7 14 0 2 7 14 1 2 7 14 2 2 7 14 3 2 7 15 0 2 7 15 1 2 7 15 2 2 7 15 3 3 0 12 0 3 0 12 1 3 0 12 2 3 0 12 3 3 0 13 0 3 0 13 1 3 0 13 2 3 0 13 3 3 0 14 0 3 0 14 1 3 0 14 2 3 0 14 3 3 0 15 0 3 0 15 1 3 0 15 2 3 0 15 3 3 1 12 0 3 1 12 1 3 1 12 2 3 1 12 3 3 1 13 0 3 1 13 1 3 1 13 2 3 1 13 3 3 1 14 0 3 1 14 1 3 1 14 2 3 1 14 3 3 1 15 0 3 1 15 1 3 1 15 2 3 1 15 3 3 2 12 0 3 2 12 1 3 2 12 2 3 2 12 3 3 2 13 0 3 2 13 1 3 2 13 2 3 2 13 3 3 2 14 0 3 2 14 1 3 2 14 2 3 2 14 3 3 2 15 0 3 2 15 1 3 2 15 2 3 2 15 3 3 3 12 0 3 3 12 1 3 3 12 2 3 3 12 3 3 3 13 0 3 3 13 1 3 3 13 2 3 3 13 3 3 3 14 0 3 3 14 1 3 3 14 2 3 3 14 3 3 3 15 0 3 3 15 1 3 3 15 2 3 3 15 3 3 4 12 0 3 4 12 1 3 4 12 2 3 4 12 3 3 4 13 0 3 4 13 1 3 4 13 2 3 4 13 3 3 4 14 0 3 4 14 1 3 4 14 2 3 4 14 3 3 4 15 0 3 4 15 1 3 4 15 2 3 4 15 3 3 5 12 0 3 5 12 1 3 5 12 2 3 5 12 3 3 5 13 0 3 5 13 1 3 5 13 2 3 5 13 3 3 5 14 0 3 5 14 1 3 5 14 2 3 5 14 3 3 5 15 0 3 5 15 1 3 5 15 2 3 5 15 3 3 6 12 0 3 6 12 1 3 6 12 2 3 6 12 3 3 6 13 0 3 6 13 1 3 6 13 2 3 6 13 3 3 6 14 0 3 6 14 1 3 6 14 2 3 6 14 3 3 6 15 0 3 6 15 1 3 6 15 2 3 6 15 3 3 7 12 0 3 7 12 1 3 7 12 2 3 7 12 3 3 7 13 0 3 7 13 1 3 7 13 2 3 7 13 3 3 7 14 0 3 7 14 1 3 7 14 2 3 7 14 3 3 7 15 0 3 7 15 1 3 7 15 2 3 7 15 3 4 0 15 0 4 0 15 1 4 0 15 2 4 0 15 3 4 0 14 0 4 0 14 1 4 0 14 2 4 0 14 3 4 0 13 0 4 0 13 1 4 0 13 2 4 0 13 3 4 0 12 0 4 0 12 1 4 0 12 2 4 0 12 3 4 1 15 0 4 1 15 1 4 1 15 2 4 1 15 3 4 1 14 0 4 1 14 1 4 1 14 2 4 1 14 3 4 1 13 0 4 1 13 1 4 1 13 2 4 1 13 3 4 1 12 0 4 1 12 1 4 1 12 2 4 1 12 3 4 2 15 0 4 2 15 1 4 2 15 2 4 2 15 3 4 2 14 0 4 2 14 1 4 2 14 2 4 2 14 3 4 2 13 0 4 2 13 1 4 2 13 2 4 2 13 3 4 2 12 0 4 2 12 1 4 2 12 2 4 2 12 3 4 3 15 0 4 3 15 1 4 3 15 2 4 3 15 3 4 3 14 0 4 3 14 1 4 3 14 2 4 3 14 3 4 3 13 0 4 3 13 1 4 3 13 2 4 3 13 3 4 3 12 0 4 3 12 1 4 3 12 2 4 3 12 3 4 4 15 0 4 4 15 1 4 4 15 2 4 4 15 3 4 4 14 0 4 4 14 1 4 4 14 2 4 4 14 3 4 4 13 0 4 4 13 1 4 4 13 2 4 4 13 3 4 4 12 0 4 4 12 1 4 4 12 2 4 4 12 3 4 5 15 0 4 5 15 1 4 5 15 2 4 5 15 3 4 5 14 0 4 5 14 1 4 5 14 2 4 5 14 3 4 5 13 0 4 5 13 1 4 5 13 2 4 5 13 3 4 5 12 0 4 5 12 1 4 5 12 2 4 5 12 3 4 6 15 0 4 6 15 1 4 6 15 2 4 6 15 3 4 6 14 0 4 6 14 1 4 6 14 2 4 6 14 3 4 6 13 0 4 6 13 1 4 6 13 2 4 6 13 3 4 6 12 0 4 6 12 1 4 6 12 2 4 6 12 3 4 7 15 0 4 7 15 1 4 7 15 2 4 7 15 3 4 7 14 0 4 7 14 1 4 7 14 2 4 7 14 3 4 7 13 0 4 7 13 1 4 7 13 2 4 7 13 3 4 7 12 0 4 7 12 1 4 7 12 2 4 7 12 3 5 0 15 0 5 0 15 1 5 0 15 2 5 0 15 3 5 0 14 0 5 0 14 1 5 0 14 2 5 0 14 3 5 0 13 0 5 0 13 1 5 0 13 2 5 0 13 3 5 0 12 0 5 0 12 1 5 0 12 2 5 0 12 3 5 1 15 0 5 1 15 1 5 1 15 2 5 1 15 3 5 1 14 0 5 1 14 1 5 1 14 2 5 1 14 3 5 1 13 0 5 1 13 1 5 1 13 2 5 1 13 3 5 1 12 0 5 1 12 1 5 1 12 2 5 1 12 3 5 2 15 0 5 2 15 1 5 2 15 2 5 2 15 3 5 2 14 0 5 2 14 1 5 2 14 2 5 2 14 3 5 2 13 0 5 2 13 1 5 2 13 2 5 2 13 3 5 2 12 0 5 2 12 1 5 2 12 2 5 2 12 3 5 3 15 0 5 3 15 1 5 3 15 2 5 3 15 3 5 3 14 0 5 3 14 1 5 3 14 2 5 3 14 3 5 3 13 0 5 3 13 1 5 3 13 2 5 3 13 3 5 3 12 0 5 3 12 1 5 3 12 2 5 3 12 3 5 4 15 0 5 4 15 1 5 4 15 2 5 4 15 3 5 4 14 0 5 4 14 1 5 4 14 2 5 4 14 3 5 4 13 0 5 4 13 1 5 4 13 2 5 4 13 3 5 4 12 0 5 4 12 1 5 4 12 2 5 4 12 3 5 5 15 0 5 5 15 1 5 5 15 2 5 5 15 3 5 5 14 0 5 5 14 1 5 5 14 2 5 5 14 3 5 5 13 0 5 5 13 1 5 5 13 2 5 5 13 3 5 5 12 0 5 5 12 1 5 5 12 2 5 5 12 3 5 6 15 0 5 6 15 1 5 6 15 2 5 6 15 3 5 6 14 0 5 6 14 1 5 6 14 2 5 6 14 3 5 6 13 0 5 6 13 1 5 6 13 2 5 6 13 3 5 6 12 0 5 6 12 1 5 6 12 2 5 6 12 3 5 7 15 0 5 7 15 1 5 7 15 2 5 7 15 3 5 7 14 0 5 7 14 1 5 7 14 2 5 7 14 3 5 7 13 0 5 7 13 1 5 7 13 2 5 7 13 3 5 7 12 0 5 7 12 1 5 7 12 2 5 7 12 3 6 0 15 0 6 0 15 1 6 0 15 2 6 0 15 3 6 0 14 0 6 0 14 1 6 0 14 2 6 0 14 3 6 0 13 0 6 0 13 1 6 0 13 2 6 0 13 3 6 0 12 0 6 0 12 1 6 0 12 2 6 0 12 3 6 1 15 0 6 1 15 1 6 1 15 2 6 1 15 3 6 1 14 0 6 1 14 1 6 1 14 2 6 1 14 3 6 1 13 0 6 1 13 1 6 1 13 2 6 1 13 3 6 1 12 0 6 1 12 1 6 1 12 2 6 1 12 3 6 2 15 0 6 2 15 1 6 2 15 2 6 2 15 3 6 2 14 0 6 2 14 1 6 2 14 2 6 2 14 3 6 2 13 0 6 2 13 1 6 2 13 2 6 2 13 3 6 2 12 0 6 2 12 1 6 2 12 2 6 2 12 3 6 3 15 0 6 3 15 1 6 3 15 2 6 3 15 3 6 3 14 0 6 3 14 1 6 3 14 2 6 3 14 3 6 3 13 0 6 3 13 1 6 3 13 2 6 3 13 3 6 3 12 0 6 3 12 1 6 3 12 2 6 3 12 3 6 4 15 0 6 4 15 1 6 4 15 2 6 4 15 3 6 4 14 0 6 4 14 1 6 4 14 2 6 4 14 3 6 4 13 0 6 4 13 1 6 4 13 2 6 4 13 3 6 4 12 0 6 4 12 1 6 4 12 2 6 4 12 3 6 5 15 0 6 5 15 1 6 5 15 2 6 5 15 3 6 5 14 0 6 5 14 1 6 5 14 2 6 5 14 3 6 5 13 0 6 5 13 1 6 5 13 2 6 5 13 3 6 5 12 0 6 5 12 1 6 5 12 2 6 5 12 3 6 6 15 0 6 6 15 1 6 6 15 2 6 6 15 3 6 6 14 0 6 6 14 1 6 6 14 2 6 6 14 3 6 6 13 0 6 6 13 1 6 6 13 2 6 6 13 3 6 6 12 0 6 6 12 1 6 6 12 2 6 6 12 3 6 7 15 0 6 7 15 1 6 7 15 2 6 7 15 3 6 7 14 0 6 7 14 1 6 7 14 2 6 7 14 3 6 7 13 0 6 7 13 1 6 7 13 2 6 7 13 3 6 7 12 0 6 7 12 1 6 7 12 2 6 7 12 3 7 0 15 0 7 0 15 1 7 0 15 2 7 0 15 3 7 0 14 0 7 0 14 1 7 0 14 2 7 0 14 3 7 0 13 0 7 0 13 1 7 0 13 2 7 0 13 3 7 0 12 0 7 0 12 1 7 0 12 2 7 0 12 3 7 1 15 0 7 1 15 1 7 1 15 2 7 1 15 3 7 1 14 0 7 1 14 1 7 1 14 2 7 1 14 3 7 1 13 0 7 1 13 1 7 1 13 2 7 1 13 3 7 1 12 0 7 1 12 1 7 1 12 2 7 1 12 3 7 2 15 0 7 2 15 1 7 2 15 2 7 2 15 3 7 2 14 0 7 2 14 1 7 2 14 2 7 2 14 3 7 2 13 0 7 2 13 1 7 2 13 2 7 2 13 3 7 2 12 0 7 2 12 1 7 2 12 2 7 2 12 3 7 3 15 0 7 3 15 1 7 3 15 2 7 3 15 3 7 3 14 0 7 3 14 1 7 3 14 2 7 3 14 3 7 3 13 0 7 3 13 1 7 3 13 2 7 3 13 3 7 3 12 0 7 3 12 1 7 3 12 2 7 3 12 3 7 4 15 0 7 4 15 1 7 4 15 2 7 4 15 3 7 4 14 0 7 4 14 1 7 4 14 2 7 4 14 3 7 4 13 0 7 4 13 1 7 4 13 2 7 4 13 3 7 4 12 0 7 4 12 1 7 4 12 2 7 4 12 3 7 5 15 0 7 5 15 1 7 5 15 2 7 5 15 3 7 5 14 0 7 5 14 1 7 5 14 2 7 5 14 3 7 5 13 0 7 5 13 1 7 5 13 2 7 5 13 3 7 5 12 0 7 5 12 1 7 5 12 2 7 5 12 3 7 6 15 0 7 6 15 1 7 6 15 2 7 6 15 3 7 6 14 0 7 6 14 1 7 6 14 2 7 6 14 3 7 6 13 0 7 6 13 1 7 6 13 2 7 6 13 3 7 6 12 0 7 6 12 1 7 6 12 2 7 6 12 3 7 7 15 0 7 7 15 1 7 7 15 2 7 7 15 3 7 7 14 0 7 7 14 1 7 7 14 2 7 7 14 3 7 7 13 0 7 7 13 1 7 7 13 2 7 7 13 3 7 7 12 0 7 7 12 1 7 7 12 2 7 7 12 3 4 0 11 0 4 0 11 1 4 0 11 2 4 0 11 3 4 0 10 0 4 0 10 1 4 0 10 2 4 0 10 3 4 0 9 0 4 0 9 1 4 0 9 2 4 0 9 3 4 0 8 0 4 0 8 1 4 0 8 2 4 0 8 3 4 1 11 0 4 1 11 1 4 1 11 2 4 1 11 3 4 1 10 0 4 1 10 1 4 1 10 2 4 1 10 3 4 1 9 0 4 1 9 1 4 1 9 2 4 1 9 3 4 1 8 0 4 1 8 1 4 1 8 2 4 1 8 3 4 2 11 0 4 2 11 1 4 2 11 2 4 2 11 3 4 2 10 0 4 2 10 1 4 2 10 2 4 2 10 3 4 2 9 0 4 2 9 1 4 2 9 2 4 2 9 3 4 2 8 0 4 2 8 1 4 2 8 2 4 2 8 3 4 3 11 0 4 3 11 1 4 3 11 2 4 3 11 3 4 3 10 0 4 3 10 1 4 3 10 2 4 3 10 3 4 3 9 0 4 3 9 1 4 3 9 2 4 3 9 3 4 3 8 0 4 3 8 1 4 3 8 2 4 3 8 3 4 4 11 0 4 4 11 1 4 4 11 2 4 4 11 3 4 4 10 0 4 4 10 1 4 4 10 2 4 4 10 3 4 4 9 0 4 4 9 1 4 4 9 2 4 4 9 3 4 4 8 0 4 4 8 1 4 4 8 2 4 4 8 3 4 5 11 0 4 5 11 1 4 5 11 2 4 5 11 3 4 5 10 0 4 5 10 1 4 5 10 2 4 5 10 3 4 5 9 0 4 5 9 1 4 5 9 2 4 5 9 3 4 5 8 0 4 5 8 1 4 5 8 2 4 5 8 3 4 6 11 0 4 6 11 1 4 6 11 2 4 6 11 3 4 6 10 0 4 6 10 1 4 6 10 2 4 6 10 3 4 6 9 0 4 6 9 1 4 6 9 2 4 6 9 3 4 6 8 0 4 6 8 1 4 6 8 2 4 6 8 3 4 7 11 0 4 7 11 1 4 7 11 2 4 7 11 3 4 7 10 0 4 7 10 1 4 7 10 2 4 7 10 3 4 7 9 0 4 7 9 1 4 7 9 2 4 7 9 3 4 7 8 0 4 7 8 1 4 7 8 2 4 7 8 3 5 0 11 0 5 0 11 1 5 0 11 2 5 0 11 3 5 0 10 0 5 0 10 1 5 0 10 2 5 0 10 3 5 0 9 0 5 0 9 1 5 0 9 2 5 0 9 3 5 0 8 0 5 0 8 1 5 0 8 2 5 0 8 3 5 1 11 0 5 1 11 1 5 1 11 2 5 1 11 3 5 1 10 0 5 1 10 1 5 1 10 2 5 1 10 3 5 1 9 0 5 1 9 1 5 1 9 2 5 1 9 3 5 1 8 0 5 1 8 1 5 1 8 2 5 1 8 3 5 2 11 0 5 2 11 1 5 2 11 2 5 2 11 3 5 2 10 0 5 2 10 1 5 2 10 2 5 2 10 3 5 2 9 0 5 2 9 1 5 2 9 2 5 2 9 3 5 2 8 0 5 2 8 1 5 2 8 2 5 2 8 3 5 3 11 0 5 3 11 1 5 3 11 2 5 3 11 3 5 3 10 0 5 3 10 1 5 3 10 2 5 3 10 3 5 3 9 0 5 3 9 1 5 3 9 2 5 3 9 3 5 3 8 0 5 3 8 1 5 3 8 2 5 3 8 3 5 4 11 0 5 4 11 1 5 4 11 2 5 4 11 3 5 4 10 0 5 4 10 1 5 4 10 2 5 4 10 3 5 4 9 0 5 4 9 1 5 4 9 2 5 4 9 3 5 4 8 0 5 4 8 1 5 4 8 2 5 4 8 3 5 5 11 0 5 5 11 1 5 5 11 2 5 5 11 3 5 5 10 0 5 5 10 1 5 5 10 2 5 5 10 3 5 5 9 0 5 5 9 1 5 5 9 2 5 5 9 3 5 5 8 0 5 5 8 1 5 5 8 2 5 5 8 3 5 6 11 0 5 6 11 1 5 6 11 2 5 6 11 3 5 6 10 0 5 6 10 1 5 6 10 2 5 6 10 3 5 6 9 0 5 6 9 1 5 6 9 2 5 6 9 3 5 6 8 0 5 6 8 1 5 6 8 2 5 6 8 3 5 7 11 0 5 7 11 1 5 7 11 2 5 7 11 3 5 7 10 0 5 7 10 1 5 7 10 2 5 7 10 3 5 7 9 0 5 7 9 1 5 7 9 2 5 7 9 3 5 7 8 0 5 7 8 1 5 7 8 2 5 7 8 3 6 0 11 0 6 0 11 1 6 0 11 2 6 0 11 3 6 0 10 0 6 0 10 1 6 0 10 2 6 0 10 3 6 0 9 0 6 0 9 1 6 0 9 2 6 0 9 3 6 0 8 0 6 0 8 1 6 0 8 2 6 0 8 3 6 1 11 0 6 1 11 1 6 1 11 2 6 1 11 3 6 1 10 0 6 1 10 1 6 1 10 2 6 1 10 3 6 1 9 0 6 1 9 1 6 1 9 2 6 1 9 3 6 1 8 0 6 1 8 1 6 1 8 2 6 1 8 3 6 2 11 0 6 2 11 1 6 2 11 2 6 2 11 3 6 2 10 0 6 2 10 1 6 2 10 2 6 2 10 3 6 2 9 0 6 2 9 1 6 2 9 2 6 2 9 3 6 2 8 0 6 2 8 1 6 2 8 2 6 2 8 3 6 3 11 0 6 3 11 1 6 3 11 2 6 3 11 3 6 3 10 0 6 3 10 1 6 3 10 2 6 3 10 3 6 3 9 0 6 3 9 1 6 3 9 2 6 3 9 3 6 3 8 0 6 3 8 1 6 3 8 2 6 3 8 3 6 4 11 0 6 4 11 1 6 4 11 2 6 4 11 3 6 4 10 0 6 4 10 1 6 4 10 2 6 4 10 3 6 4 9 0 6 4 9 1 6 4 9 2 6 4 9 3 6 4 8 0 6 4 8 1 6 4 8 2 6 4 8 3 6 5 11 0 6 5 11 1 6 5 11 2 6 5 11 3 6 5 10 0 6 5 10 1 6 5 10 2 6 5 10 3 6 5 9 0 6 5 9 1 6 5 9 2 6 5 9 3 6 5 8 0 6 5 8 1 6 5 8 2 6 5 8 3 6 6 11 0 6 6 11 1 6 6 11 2 6 6 11 3 6 6 10 0 6 6 10 1 6 6 10 2 6 6 10 3 6 6 9 0 6 6 9 1 6 6 9 2 6 6 9 3 6 6 8 0 6 6 8 1 6 6 8 2 6 6 8 3 6 7 11 0 6 7 11 1 6 7 11 2 6 7 11 3 6 7 10 0 6 7 10 1 6 7 10 2 6 7 10 3 6 7 9 0 6 7 9 1 6 7 9 2 6 7 9 3 6 7 8 0 6 7 8 1 6 7 8 2 6 7 8 3 7 0 11 0 7 0 11 1 7 0 11 2 7 0 11 3 7 0 10 0 7 0 10 1 7 0 10 2 7 0 10 3 7 0 9 0 7 0 9 1 7 0 9 2 7 0 9 3 7 0 8 0 7 0 8 1 7 0 8 2 7 0 8 3 7 1 11 0 7 1 11 1 7 1 11 2 7 1 11 3 7 1 10 0 7 1 10 1 7 1 10 2 7 1 10 3 7 1 9 0 7 1 9 1 7 1 9 2 7 1 9 3 7 1 8 0 7 1 8 1 7 1 8 2 7 1 8 3 7 2 11 0 7 2 11 1 7 2 11 2 7 2 11 3 7 2 10 0 7 2 10 1 7 2 10 2 7 2 10 3 7 2 9 0 7 2 9 1 7 2 9 2 7 2 9 3 7 2 8 0 7 2 8 1 7 2 8 2 7 2 8 3 7 3 11 0 7 3 11 1 7 3 11 2 7 3 11 3 7 3 10 0 7 3 10 1 7 3 10 2 7 3 10 3 7 3 9 0 7 3 9 1 7 3 9 2 7 3 9 3 7 3 8 0 7 3 8 1 7 3 8 2 7 3 8 3 7 4 11 0 7 4 11 1 7 4 11 2 7 4 11 3 7 4 10 0 7 4 10 1 7 4 10 2 7 4 10 3 7 4 9 0 7 4 9 1 7 4 9 2 7 4 9 3 7 4 8 0 7 4 8 1 7 4 8 2 7 4 8 3 7 5 11 0 7 5 11 1 7 5 11 2 7 5 11 3 7 5 10 0 7 5 10 1 7 5 10 2 7 5 10 3 7 5 9 0 7 5 9 1 7 5 9 2 7 5 9 3 7 5 8 0 7 5 8 1 7 5 8 2 7 5 8 3 7 6 11 0 7 6 11 1 7 6 11 2 7 6 11 3 7 6 10 0 7 6 10 1 7 6 10 2 7 6 10 3 7 6 9 0 7 6 9 1 7 6 9 2 7 6 9 3 7 6 8 0 7 6 8 1 7 6 8 2 7 6 8 3 7 7 11 0 7 7 11 1 7 7 11 2 7 7 11 3 7 7 10 0 7 7 10 1 7 7 10 2 7 7 10 3 7 7 9 0 7 7 9 1 7 7 9 2 7 7 9 3 7 7 8 0 7 7 8 1 7 7 8 2 7 7 8 3 4 0 7 0 4 0 7 1 4 0 7 2 4 0 7 3 4 0 6 0 4 0 6 1 4 0 6 2 4 0 6 3 4 0 5 0 4 0 5 1 4 0 5 2 4 0 5 3 4 0 4 0 4 0 4 1 4 0 4 2 4 0 4 3 4 1 7 0 4 1 7 1 4 1 7 2 4 1 7 3 4 1 6 0 4 1 6 1 4 1 6 2 4 1 6 3 4 1 5 0 4 1 5 1 4 1 5 2 4 1 5 3 4 1 4 0 4 1 4 1 4 1 4 2 4 1 4 3 4 2 7 0 4 2 7 1 4 2 7 2 4 2 7 3 4 2 6 0 4 2 6 1 4 2 6 2 4 2 6 3 4 2 5 0 4 2 5 1 4 2 5 2 4 2 5 3 4 2 4 0 4 2 4 1 4 2 4 2 4 2 4 3 4 3 7 0 4 3 7 1 4 3 7 2 4 3 7 3 4 3 6 0 4 3 6 1 4 3 6 2 4 3 6 3 4 3 5 0 4 3 5 1 4 3 5 2 4 3 5 3 4 3 4 0 4 3 4 1 4 3 4 2 4 3 4 3 4 4 7 0 4 4 7 1 4 4 7 2 4 4 7 3 4 4 6 0 4 4 6 1 4 4 6 2 4 4 6 3 4 4 5 0 4 4 5 1 4 4 5 2 4 4 5 3 4 4 4 0 4 4 4 1 4 4 4 2 4 4 4 3 4 5 7 0 4 5 7 1 4 5 7 2 4 5 7 3 4 5 6 0 4 5 6 1 4 5 6 2 4 5 6 3 4 5 5 0 4 5 5 1 4 5 5 2 4 5 5 3 4 5 4 0 4 5 4 1 4 5 4 2 4 5 4 3 4 6 7 0 4 6 7 1 4 6 7 2 4 6 7 3 4 6 6 0 4 6 6 1 4 6 6 2 4 6 6 3 4 6 5 0 4 6 5 1 4 6 5 2 4 6 5 3 4 6 4 0 4 6 4 1 4 6 4 2 4 6 4 3 4 7 7 0 4 7 7 1 4 7 7 2 4 7 7 3 4 7 6 0 4 7 6 1 4 7 6 2 4 7 6 3 4 7 5 0 4 7 5 1 4 7 5 2 4 7 5 3 4 7 4 0 4 7 4 1 4 7 4 2 4 7 4 3 5 0 7 0 5 0 7 1 5 0 7 2 5 0 7 3 5 0 6 0 5 0 6 1 5 0 6 2 5 0 6 3 5 0 5 0 5 0 5 1 5 0 5 2 5 0 5 3 5 0 4 0 5 0 4 1 5 0 4 2 5 0 4 3 5 1 7 0 5 1 7 1 5 1 7 2 5 1 7 3 5 1 6 0 5 1 6 1 5 1 6 2 5 1 6 3 5 1 5 0 5 1 5 1 5 1 5 2 5 1 5 3 5 1 4 0 5 1 4 1 5 1 4 2 5 1 4 3 5 2 7 0 5 2 7 1 5 2 7 2 5 2 7 3 5 2 6 0 5 2 6 1 5 2 6 2 5 2 6 3 5 2 5 0 5 2 5 1 5 2 5 2 5 2 5 3 5 2 4 0 5 2 4 1 5 2 4 2 5 2 4 3 5 3 7 0 5 3 7 1 5 3 7 2 5 3 7 3 5 3 6 0 5 3 6 1 5 3 6 2 5 3 6 3 5 3 5 0 5 3 5 1 5 3 5 2 5 3 5 3 5 3 4 0 5 3 4 1 5 3 4 2 5 3 4 3 5 4 7 0 5 4 7 1 5 4 7 2 5 4 7 3 5 4 6 0 5 4 6 1 5 4 6 2 5 4 6 3 5 4 5 0 5 4 5 1 5 4 5 2 5 4 5 3 5 4 4 0 5 4 4 1 5 4 4 2 5 4 4 3 5 5 7 0 5 5 7 1 5 5 7 2 5 5 7 3 5 5 6 0 5 5 6 1 5 5 6 2 5 5 6 3 5 5 5 0 5 5 5 1 5 5 5 2 5 5 5 3 5 5 4 0 5 5 4 1 5 5 4 2 5 5 4 3 5 6 7 0 5 6 7 1 5 6 7 2 5 6 7 3 5 6 6 0 5 6 6 1 5 6 6 2 5 6 6 3 5 6 5 0 5 6 5 1 5 6 5 2 5 6 5 3 5 6 4 0 5 6 4 1 5 6 4 2 5 6 4 3 5 7 7 0 5 7 7 1 5 7 7 2 5 7 7 3 5 7 6 0 5 7 6 1 5 7 6 2 5 7 6 3 5 7 5 0 5 7 5 1 5 7 5 2 5 7 5 3 5 7 4 0 5 7 4 1 5 7 4 2 5 7 4 3 6 0 7 0 6 0 7 1 6 0 7 2 6 0 7 3 6 0 6 0 6 0 6 1 6 0 6 2 6 0 6 3 6 0 5 0 6 0 5 1 6 0 5 2 6 0 5 3 6 0 4 0 6 0 4 1 6 0 4 2 6 0 4 3 6 1 7 0 6 1 7 1 6 1 7 2 6 1 7 3 6 1 6 0 6 1 6 1 6 1 6 2 6 1 6 3 6 1 5 0 6 1 5 1 6 1 5 2 6 1 5 3 6 1 4 0 6 1 4 1 6 1 4 2 6 1 4 3 6 2 7 0 6 2 7 1 6 2 7 2 6 2 7 3 6 2 6 0 6 2 6 1 6 2 6 2 6 2 6 3 6 2 5 0 6 2 5 1 6 2 5 2 6 2 5 3 6 2 4 0 6 2 4 1 6 2 4 2 6 2 4 3 6 3 7 0 6 3 7 1 6 3 7 2 6 3 7 3 6 3 6 0 6 3 6 1 6 3 6 2 6 3 6 3 6 3 5 0 6 3 5 1 6 3 5 2 6 3 5 3 6 3 4 0 6 3 4 1 6 3 4 2 6 3 4 3 6 4 7 0 6 4 7 1 6 4 7 2 6 4 7 3 6 4 6 0 6 4 6 1 6 4 6 2 6 4 6 3 6 4 5 0 6 4 5 1 6 4 5 2 6 4 5 3 6 4 4 0 6 4 4 1 6 4 4 2 6 4 4 3 6 5 7 0 6 5 7 1 6 5 7 2 6 5 7 3 6 5 6 0 6 5 6 1 6 5 6 2 6 5 6 3 6 5 5 0 6 5 5 1 6 5 5 2 6 5 5 3 6 5 4 0 6 5 4 1 6 5 4 2 6 5 4 3 6 6 7 0 6 6 7 1 6 6 7 2 6 6 7 3 6 6 6 0 6 6 6 1 6 6 6 2 6 6 6 3 6 6 5 0 6 6 5 1 6 6 5 2 6 6 5 3 6 6 4 0 6 6 4 1 6 6 4 2 6 6 4 3 6 7 7 0 6 7 7 1 6 7 7 2 6 7 7 3 6 7 6 0 6 7 6 1 6 7 6 2 6 7 6 3 6 7 5 0 6 7 5 1 6 7 5 2 6 7 5 3 6 7 4 0 6 7 4 1 6 7 4 2 6 7 4 3 7 0 7 0 7 0 7 1 7 0 7 2 7 0 7 3 7 0 6 0 7 0 6 1 7 0 6 2 7 0 6 3 7 0 5 0 7 0 5 1 7 0 5 2 7 0 5 3 7 0 4 0 7 0 4 1 7 0 4 2 7 0 4 3 7 1 7 0 7 1 7 1 7 1 7 2 7 1 7 3 7 1 6 0 7 1 6 1 7 1 6 2 7 1 6 3 7 1 5 0 7 1 5 1 7 1 5 2 7 1 5 3 7 1 4 0 7 1 4 1 7 1 4 2 7 1 4 3 7 2 7 0 7 2 7 1 7 2 7 2 7 2 7 3 7 2 6 0 7 2 6 1 7 2 6 2 7 2 6 3 7 2 5 0 7 2 5 1 7 2 5 2 7 2 5 3 7 2 4 0 7 2 4 1 7 2 4 2 7 2 4 3 7 3 7 0 7 3 7 1 7 3 7 2 7 3 7 3 7 3 6 0 7 3 6 1 7 3 6 2 7 3 6 3 7 3 5 0 7 3 5 1 7 3 5 2 7 3 5 3 7 3 4 0 7 3 4 1 7 3 4 2 7 3 4 3 7 4 7 0 7 4 7 1 7 4 7 2 7 4 7 3 7 4 6 0 7 4 6 1 7 4 6 2 7 4 6 3 7 4 5 0 7 4 5 1 7 4 5 2 7 4 5 3 7 4 4 0 7 4 4 1 7 4 4 2 7 4 4 3 7 5 7 0 7 5 7 1 7 5 7 2 7 5 7 3 7 5 6 0 7 5 6 1 7 5 6 2 7 5 6 3 7 5 5 0 7 5 5 1 7 5 5 2 7 5 5 3 7 5 4 0 7 5 4 1 7 5 4 2 7 5 4 3 7 6 7 0 7 6 7 1 7 6 7 2 7 6 7 3 7 6 6 0 7 6 6 1 7 6 6 2 7 6 6 3 7 6 5 0 7 6 5 1 7 6 5 2 7 6 5 3 7 6 4 0 7 6 4 1 7 6 4 2 7 6 4 3 7 7 7 0 7 7 7 1 7 7 7 2 7 7 7 3 7 7 6 0 7 7 6 1 7 7 6 2 7 7 6 3 7 7 5 0 7 7 5 1 7 7 5 2 7 7 5 3 7 7 4 0 7 7 4 1 7 7 4 2 7 7 4 3 4 0 3 0 4 0 3 1 4 0 3 2 4 0 3 3 4 0 2 0 4 0 2 1 4 0 2 2 4 0 2 3 4 0 1 0 4 0 1 1 4 0 1 2 4 0 1 3 4 0 0 0 4 0 0 1 4 0 0 2 4 0 0 3 4 1 3 0 4 1 3 1 4 1 3 2 4 1 3 3 4 1 2 0 4 1 2 1 4 1 2 2 4 1 2 3 4 1 1 0 4 1 1 1 4 1 1 2 4 1 1 3 4 1 0 0 4 1 0 1 4 1 0 2 4 1 0 3 4 2 3 0 4 2 3 1 4 2 3 2 4 2 3 3 4 2 2 0 4 2 2 1 4 2 2 2 4 2 2 3 4 2 1 0 4 2 1 1 4 2 1 2 4 2 1 3 4 2 0 0 4 2 0 1 4 2 0 2 4 2 0 3 4 3 3 0 4 3 3 1 4 3 3 2 4 3 3 3 4 3 2 0 4 3 2 1 4 3 2 2 4 3 2 3 4 3 1 0 4 3 1 1 4 3 1 2 4 3 1 3 4 3 0 0 4 3 0 1 4 3 0 2 4 3 0 3 4 4 3 0 4 4 3 1 4 4 3 2 4 4 3 3 4 4 2 0 4 4 2 1 4 4 2 2 4 4 2 3 4 4 1 0 4 4 1 1 4 4 1 2 4 4 1 3 4 4 0 0 4 4 0 1 4 4 0 2 4 4 0 3 4 5 3 0 4 5 3 1 4 5 3 2 4 5 3 3 4 5 2 0 4 5 2 1 4 5 2 2 4 5 2 3 4 5 1 0 4 5 1 1 4 5 1 2 4 5 1 3 4 5 0 0 4 5 0 1 4 5 0 2 4 5 0 3 4 6 3 0 4 6 3 1 4 6 3 2 4 6 3 3 4 6 2 0 4 6 2 1 4 6 2 2 4 6 2 3 4 6 1 0 4 6 1 1 4 6 1 2 4 6 1 3 4 6 0 0 4 6 0 1 4 6 0 2 4 6 0 3 4 7 3 0 4 7 3 1 4 7 3 2 4 7 3 3 4 7 2 0 4 7 2 1 4 7 2 2 4 7 2 3 4 7 1 0 4 7 1 1 4 7 1 2 4 7 1 3 4 7 0 0 4 7 0 1 4 7 0 2 4 7 0 3 5 0 3 0 5 0 3 1 5 0 3 2 5 0 3 3 5 0 2 0 5 0 2 1 5 0 2 2 5 0 2 3 5 0 1 0 5 0 1 1 5 0 1 2 5 0 1 3 5 0 0 0 5 0 0 1 5 0 0 2 5 0 0 3 5 1 3 0 5 1 3 1 5 1 3 2 5 1 3 3 5 1 2 0 5 1 2 1 5 1 2 2 5 1 2 3 5 1 1 0 5 1 1 1 5 1 1 2 5 1 1 3 5 1 0 0 5 1 0 1 5 1 0 2 5 1 0 3 5 2 3 0 5 2 3 1 5 2 3 2 5 2 3 3 5 2 2 0 5 2 2 1 5 2 2 2 5 2 2 3 5 2 1 0 5 2 1 1 5 2 1 2 5 2 1 3 5 2 0 0 5 2 0 1 5 2 0 2 5 2 0 3 5 3 3 0 5 3 3 1 5 3 3 2 5 3 3 3 5 3 2 0 5 3 2 1 5 3 2 2 5 3 2 3 5 3 1 0 5 3 1 1 5 3 1 2 5 3 1 3 5 3 0 0 5 3 0 1 5 3 0 2 5 3 0 3 5 4 3 0 5 4 3 1 5 4 3 2 5 4 3 3 5 4 2 0 5 4 2 1 5 4 2 2 5 4 2 3 5 4 1 0 5 4 1 1 5 4 1 2 5 4 1 3 5 4 0 0 5 4 0 1 5 4 0 2 5 4 0 3 5 5 3 0 5 5 3 1 5 5 3 2 5 5 3 3 5 5 2 0 5 5 2 1 5 5 2 2 5 5 2 3 5 5 1 0 5 5 1 1 5 5 1 2 5 5 1 3 5 5 0 0 5 5 0 1 5 5 0 2 5 5 0 3 5 6 3 0 5 6 3 1 5 6 3 2 5 6 3 3 5 6 2 0 5 6 2 1 5 6 2 2 5 6 2 3 5 6 1 0 5 6 1 1 5 6 1 2 5 6 1 3 5 6 0 0 5 6 0 1 5 6 0 2 5 6 0 3 5 7 3 0 5 7 3 1 5 7 3 2 5 7 3 3 5 7 2 0 5 7 2 1 5 7 2 2 5 7 2 3 5 7 1 0 5 7 1 1 5 7 1 2 5 7 1 3 5 7 0 0 5 7 0 1 5 7 0 2 5 7 0 3 6 0 3 0 6 0 3 1 6 0 3 2 6 0 3 3 6 0 2 0 6 0 2 1 6 0 2 2 6 0 2 3 6 0 1 0 6 0 1 1 6 0 1 2 6 0 1 3 6 0 0 0 6 0 0 1 6 0 0 2 6 0 0 3 6 1 3 0 6 1 3 1 6 1 3 2 6 1 3 3 6 1 2 0 6 1 2 1 6 1 2 2 6 1 2 3 6 1 1 0 6 1 1 1 6 1 1 2 6 1 1 3 6 1 0 0 6 1 0 1 6 1 0 2 6 1 0 3 6 2 3 0 6 2 3 1 6 2 3 2 6 2 3 3 6 2 2 0 6 2 2 1 6 2 2 2 6 2 2 3 6 2 1 0 6 2 1 1 6 2 1 2 6 2 1 3 6 2 0 0 6 2 0 1 6 2 0 2 6 2 0 3 6 3 3 0 6 3 3 1 6 3 3 2 6 3 3 3 6 3 2 0 6 3 2 1 6 3 2 2 6 3 2 3 6 3 1 0 6 3 1 1 6 3 1 2 6 3 1 3 6 3 0 0 6 3 0 1 6 3 0 2 6 3 0 3 6 4 3 0 6 4 3 1 6 4 3 2 6 4 3 3 6 4 2 0 6 4 2 1 6 4 2 2 6 4 2 3 6 4 1 0 6 4 1 1 6 4 1 2 6 4 1 3 6 4 0 0 6 4 0 1 6 4 0 2 6 4 0 3 6 5 3 0 6 5 3 1 6 5 3 2 6 5 3 3 6 5 2 0 6 5 2 1 6 5 2 2 6 5 2 3 6 5 1 0 6 5 1 1 6 5 1 2 6 5 1 3 6 5 0 0 6 5 0 1 6 5 0 2 6 5 0 3 6 6 3 0 6 6 3 1 6 6 3 2 6 6 3 3 6 6 2 0 6 6 2 1 6 6 2 2 6 6 2 3 6 6 1 0 6 6 1 1 6 6 1 2 6 6 1 3 6 6 0 0 6 6 0 1 6 6 0 2 6 6 0 3 6 7 3 0 6 7 3 1 6 7 3 2 6 7 3 3 6 7 2 0 6 7 2 1 6 7 2 2 6 7 2 3 6 7 1 0 6 7 1 1 6 7 1 2 6 7 1 3 6 7 0 0 6 7 0 1 6 7 0 2 6 7 0 3 7 0 3 0 7 0 3 1 7 0 3 2 7 0 3 3 7 0 2 0 7 0 2 1 7 0 2 2 7 0 2 3 7 0 1 0 7 0 1 1 7 0 1 2 7 0 1 3 7 0 0 0 7 0 0 1 7 0 0 2 7 0 0 3 7 1 3 0 7 1 3 1 7 1 3 2 7 1 3 3 7 1 2 0 7 1 2 1 7 1 2 2 7 1 2 3 7 1 1 0 7 1 1 1 7 1 1 2 7 1 1 3 7 1 0 0 7 1 0 1 7 1 0 2 7 1 0 3 7 2 3 0 7 2 3 1 7 2 3 2 7 2 3 3 7 2 2 0 7 2 2 1 7 2 2 2 7 2 2 3 7 2 1 0 7 2 1 1 7 2 1 2 7 2 1 3 7 2 0 0 7 2 0 1 7 2 0 2 7 2 0 3 7 3 3 0 7 3 3 1 7 3 3 2 7 3 3 3 7 3 2 0 7 3 2 1 7 3 2 2 7 3 2 3 7 3 1 0 7 3 1 1 7 3 1 2 7 3 1 3 7 3 0 0 7 3 0 1 7 3 0 2 7 3 0 3 7 4 3 0 7 4 3 1 7 4 3 2 7 4 3 3 7 4 2 0 7 4 2 1 7 4 2 2 7 4 2 3 7 4 1 0 7 4 1 1 7 4 1 2 7 4 1 3 7 4 0 0 7 4 0 1 7 4 0 2 7 4 0 3 7 5 3 0 7 5 3 1 7 5 3 2 7 5 3 3 7 5 2 0 7 5 2 1 7 5 2 2 7 5 2 3 7 5 1 0 7 5 1 1 7 5 1 2 7 5 1 3 7 5 0 0 7 5 0 1 7 5 0 2 7 5 0 3 7 6 3 0 7 6 3 1 7 6 3 2 7 6 3 3 7 6 2 0 7 6 2 1 7 6 2 2 7 6 2 3 7 6 1 0 7 6 1 1 7 6 1 2 7 6 1 3 7 6 0 0 7 6 0 1 7 6 0 2 7 6 0 3 7 7 3 0 7 7 3 1 7 7 3 2 7 7 3 3 7 7 2 0 7 7 2 1 7 7 2 2 7 7 2 3 7 7 1 0 7 7 1 1 7 7 1 2 7 7 1 3 7 7 0 0 7 7 0 1 7 7 0 2 7 7 0 3 gpaw-24.1.0/doc/devel/Au_cluster/BGMAP_domain_8x8x8x4000066400000000000000000000400001454550013000220400ustar00rootroot000000000000000 0 0 0 0 0 0 1 0 0 0 2 0 0 0 3 0 0 1 0 0 0 1 1 0 0 1 2 0 0 1 3 0 1 0 0 0 1 0 1 0 1 0 2 0 1 0 3 0 1 1 0 0 1 1 1 0 1 1 2 0 1 1 3 0 2 0 0 0 2 0 1 0 2 0 2 0 2 0 3 0 2 1 0 0 2 1 1 0 2 1 2 0 2 1 3 0 3 0 0 0 3 0 1 0 3 0 2 0 3 0 3 0 3 1 0 0 3 1 1 0 3 1 2 0 3 1 3 0 4 0 0 0 4 0 1 0 4 0 2 0 4 0 3 0 4 1 0 0 4 1 1 0 4 1 2 0 4 1 3 0 5 0 0 0 5 0 1 0 5 0 2 0 5 0 3 0 5 1 0 0 5 1 1 0 5 1 2 0 5 1 3 0 6 0 0 0 6 0 1 0 6 0 2 0 6 0 3 0 6 1 0 0 6 1 1 0 6 1 2 0 6 1 3 0 7 0 0 0 7 0 1 0 7 0 2 0 7 0 3 0 7 1 0 0 7 1 1 0 7 1 2 0 7 1 3 1 0 0 0 1 0 0 1 1 0 0 2 1 0 0 3 1 0 1 0 1 0 1 1 1 0 1 2 1 0 1 3 1 1 0 0 1 1 0 1 1 1 0 2 1 1 0 3 1 1 1 0 1 1 1 1 1 1 1 2 1 1 1 3 1 2 0 0 1 2 0 1 1 2 0 2 1 2 0 3 1 2 1 0 1 2 1 1 1 2 1 2 1 2 1 3 1 3 0 0 1 3 0 1 1 3 0 2 1 3 0 3 1 3 1 0 1 3 1 1 1 3 1 2 1 3 1 3 1 4 0 0 1 4 0 1 1 4 0 2 1 4 0 3 1 4 1 0 1 4 1 1 1 4 1 2 1 4 1 3 1 5 0 0 1 5 0 1 1 5 0 2 1 5 0 3 1 5 1 0 1 5 1 1 1 5 1 2 1 5 1 3 1 6 0 0 1 6 0 1 1 6 0 2 1 6 0 3 1 6 1 0 1 6 1 1 1 6 1 2 1 6 1 3 1 7 0 0 1 7 0 1 1 7 0 2 1 7 0 3 1 7 1 0 1 7 1 1 1 7 1 2 1 7 1 3 2 0 0 0 2 0 0 1 2 0 0 2 2 0 0 3 2 0 1 0 2 0 1 1 2 0 1 2 2 0 1 3 2 1 0 0 2 1 0 1 2 1 0 2 2 1 0 3 2 1 1 0 2 1 1 1 2 1 1 2 2 1 1 3 2 2 0 0 2 2 0 1 2 2 0 2 2 2 0 3 2 2 1 0 2 2 1 1 2 2 1 2 2 2 1 3 2 3 0 0 2 3 0 1 2 3 0 2 2 3 0 3 2 3 1 0 2 3 1 1 2 3 1 2 2 3 1 3 2 4 0 0 2 4 0 1 2 4 0 2 2 4 0 3 2 4 1 0 2 4 1 1 2 4 1 2 2 4 1 3 2 5 0 0 2 5 0 1 2 5 0 2 2 5 0 3 2 5 1 0 2 5 1 1 2 5 1 2 2 5 1 3 2 6 0 0 2 6 0 1 2 6 0 2 2 6 0 3 2 6 1 0 2 6 1 1 2 6 1 2 2 6 1 3 2 7 0 0 2 7 0 1 2 7 0 2 2 7 0 3 2 7 1 0 2 7 1 1 2 7 1 2 2 7 1 3 3 0 0 0 3 0 0 1 3 0 0 2 3 0 0 3 3 0 1 0 3 0 1 1 3 0 1 2 3 0 1 3 3 1 0 0 3 1 0 1 3 1 0 2 3 1 0 3 3 1 1 0 3 1 1 1 3 1 1 2 3 1 1 3 3 2 0 0 3 2 0 1 3 2 0 2 3 2 0 3 3 2 1 0 3 2 1 1 3 2 1 2 3 2 1 3 3 3 0 0 3 3 0 1 3 3 0 2 3 3 0 3 3 3 1 0 3 3 1 1 3 3 1 2 3 3 1 3 3 4 0 0 3 4 0 1 3 4 0 2 3 4 0 3 3 4 1 0 3 4 1 1 3 4 1 2 3 4 1 3 3 5 0 0 3 5 0 1 3 5 0 2 3 5 0 3 3 5 1 0 3 5 1 1 3 5 1 2 3 5 1 3 3 6 0 0 3 6 0 1 3 6 0 2 3 6 0 3 3 6 1 0 3 6 1 1 3 6 1 2 3 6 1 3 3 7 0 0 3 7 0 1 3 7 0 2 3 7 0 3 3 7 1 0 3 7 1 1 3 7 1 2 3 7 1 3 4 0 0 0 4 0 0 1 4 0 0 2 4 0 0 3 4 0 1 0 4 0 1 1 4 0 1 2 4 0 1 3 4 1 0 0 4 1 0 1 4 1 0 2 4 1 0 3 4 1 1 0 4 1 1 1 4 1 1 2 4 1 1 3 4 2 0 0 4 2 0 1 4 2 0 2 4 2 0 3 4 2 1 0 4 2 1 1 4 2 1 2 4 2 1 3 4 3 0 0 4 3 0 1 4 3 0 2 4 3 0 3 4 3 1 0 4 3 1 1 4 3 1 2 4 3 1 3 4 4 0 0 4 4 0 1 4 4 0 2 4 4 0 3 4 4 1 0 4 4 1 1 4 4 1 2 4 4 1 3 4 5 0 0 4 5 0 1 4 5 0 2 4 5 0 3 4 5 1 0 4 5 1 1 4 5 1 2 4 5 1 3 4 6 0 0 4 6 0 1 4 6 0 2 4 6 0 3 4 6 1 0 4 6 1 1 4 6 1 2 4 6 1 3 4 7 0 0 4 7 0 1 4 7 0 2 4 7 0 3 4 7 1 0 4 7 1 1 4 7 1 2 4 7 1 3 5 0 0 0 5 0 0 1 5 0 0 2 5 0 0 3 5 0 1 0 5 0 1 1 5 0 1 2 5 0 1 3 5 1 0 0 5 1 0 1 5 1 0 2 5 1 0 3 5 1 1 0 5 1 1 1 5 1 1 2 5 1 1 3 5 2 0 0 5 2 0 1 5 2 0 2 5 2 0 3 5 2 1 0 5 2 1 1 5 2 1 2 5 2 1 3 5 3 0 0 5 3 0 1 5 3 0 2 5 3 0 3 5 3 1 0 5 3 1 1 5 3 1 2 5 3 1 3 5 4 0 0 5 4 0 1 5 4 0 2 5 4 0 3 5 4 1 0 5 4 1 1 5 4 1 2 5 4 1 3 5 5 0 0 5 5 0 1 5 5 0 2 5 5 0 3 5 5 1 0 5 5 1 1 5 5 1 2 5 5 1 3 5 6 0 0 5 6 0 1 5 6 0 2 5 6 0 3 5 6 1 0 5 6 1 1 5 6 1 2 5 6 1 3 5 7 0 0 5 7 0 1 5 7 0 2 5 7 0 3 5 7 1 0 5 7 1 1 5 7 1 2 5 7 1 3 6 0 0 0 6 0 0 1 6 0 0 2 6 0 0 3 6 0 1 0 6 0 1 1 6 0 1 2 6 0 1 3 6 1 0 0 6 1 0 1 6 1 0 2 6 1 0 3 6 1 1 0 6 1 1 1 6 1 1 2 6 1 1 3 6 2 0 0 6 2 0 1 6 2 0 2 6 2 0 3 6 2 1 0 6 2 1 1 6 2 1 2 6 2 1 3 6 3 0 0 6 3 0 1 6 3 0 2 6 3 0 3 6 3 1 0 6 3 1 1 6 3 1 2 6 3 1 3 6 4 0 0 6 4 0 1 6 4 0 2 6 4 0 3 6 4 1 0 6 4 1 1 6 4 1 2 6 4 1 3 6 5 0 0 6 5 0 1 6 5 0 2 6 5 0 3 6 5 1 0 6 5 1 1 6 5 1 2 6 5 1 3 6 6 0 0 6 6 0 1 6 6 0 2 6 6 0 3 6 6 1 0 6 6 1 1 6 6 1 2 6 6 1 3 6 7 0 0 6 7 0 1 6 7 0 2 6 7 0 3 6 7 1 0 6 7 1 1 6 7 1 2 6 7 1 3 7 0 0 0 7 0 0 1 7 0 0 2 7 0 0 3 7 0 1 0 7 0 1 1 7 0 1 2 7 0 1 3 7 1 0 0 7 1 0 1 7 1 0 2 7 1 0 3 7 1 1 0 7 1 1 1 7 1 1 2 7 1 1 3 7 2 0 0 7 2 0 1 7 2 0 2 7 2 0 3 7 2 1 0 7 2 1 1 7 2 1 2 7 2 1 3 7 3 0 0 7 3 0 1 7 3 0 2 7 3 0 3 7 3 1 0 7 3 1 1 7 3 1 2 7 3 1 3 7 4 0 0 7 4 0 1 7 4 0 2 7 4 0 3 7 4 1 0 7 4 1 1 7 4 1 2 7 4 1 3 7 5 0 0 7 5 0 1 7 5 0 2 7 5 0 3 7 5 1 0 7 5 1 1 7 5 1 2 7 5 1 3 7 6 0 0 7 6 0 1 7 6 0 2 7 6 0 3 7 6 1 0 7 6 1 1 7 6 1 2 7 6 1 3 7 7 0 0 7 7 0 1 7 7 0 2 7 7 0 3 7 7 1 0 7 7 1 1 7 7 1 2 7 7 1 3 0 0 2 0 0 0 2 1 0 0 2 2 0 0 2 3 0 0 3 0 0 0 3 1 0 0 3 2 0 0 3 3 0 1 2 0 0 1 2 1 0 1 2 2 0 1 2 3 0 1 3 0 0 1 3 1 0 1 3 2 0 1 3 3 0 2 2 0 0 2 2 1 0 2 2 2 0 2 2 3 0 2 3 0 0 2 3 1 0 2 3 2 0 2 3 3 0 3 2 0 0 3 2 1 0 3 2 2 0 3 2 3 0 3 3 0 0 3 3 1 0 3 3 2 0 3 3 3 0 4 2 0 0 4 2 1 0 4 2 2 0 4 2 3 0 4 3 0 0 4 3 1 0 4 3 2 0 4 3 3 0 5 2 0 0 5 2 1 0 5 2 2 0 5 2 3 0 5 3 0 0 5 3 1 0 5 3 2 0 5 3 3 0 6 2 0 0 6 2 1 0 6 2 2 0 6 2 3 0 6 3 0 0 6 3 1 0 6 3 2 0 6 3 3 0 7 2 0 0 7 2 1 0 7 2 2 0 7 2 3 0 7 3 0 0 7 3 1 0 7 3 2 0 7 3 3 1 0 2 0 1 0 2 1 1 0 2 2 1 0 2 3 1 0 3 0 1 0 3 1 1 0 3 2 1 0 3 3 1 1 2 0 1 1 2 1 1 1 2 2 1 1 2 3 1 1 3 0 1 1 3 1 1 1 3 2 1 1 3 3 1 2 2 0 1 2 2 1 1 2 2 2 1 2 2 3 1 2 3 0 1 2 3 1 1 2 3 2 1 2 3 3 1 3 2 0 1 3 2 1 1 3 2 2 1 3 2 3 1 3 3 0 1 3 3 1 1 3 3 2 1 3 3 3 1 4 2 0 1 4 2 1 1 4 2 2 1 4 2 3 1 4 3 0 1 4 3 1 1 4 3 2 1 4 3 3 1 5 2 0 1 5 2 1 1 5 2 2 1 5 2 3 1 5 3 0 1 5 3 1 1 5 3 2 1 5 3 3 1 6 2 0 1 6 2 1 1 6 2 2 1 6 2 3 1 6 3 0 1 6 3 1 1 6 3 2 1 6 3 3 1 7 2 0 1 7 2 1 1 7 2 2 1 7 2 3 1 7 3 0 1 7 3 1 1 7 3 2 1 7 3 3 2 0 2 0 2 0 2 1 2 0 2 2 2 0 2 3 2 0 3 0 2 0 3 1 2 0 3 2 2 0 3 3 2 1 2 0 2 1 2 1 2 1 2 2 2 1 2 3 2 1 3 0 2 1 3 1 2 1 3 2 2 1 3 3 2 2 2 0 2 2 2 1 2 2 2 2 2 2 2 3 2 2 3 0 2 2 3 1 2 2 3 2 2 2 3 3 2 3 2 0 2 3 2 1 2 3 2 2 2 3 2 3 2 3 3 0 2 3 3 1 2 3 3 2 2 3 3 3 2 4 2 0 2 4 2 1 2 4 2 2 2 4 2 3 2 4 3 0 2 4 3 1 2 4 3 2 2 4 3 3 2 5 2 0 2 5 2 1 2 5 2 2 2 5 2 3 2 5 3 0 2 5 3 1 2 5 3 2 2 5 3 3 2 6 2 0 2 6 2 1 2 6 2 2 2 6 2 3 2 6 3 0 2 6 3 1 2 6 3 2 2 6 3 3 2 7 2 0 2 7 2 1 2 7 2 2 2 7 2 3 2 7 3 0 2 7 3 1 2 7 3 2 2 7 3 3 3 0 2 0 3 0 2 1 3 0 2 2 3 0 2 3 3 0 3 0 3 0 3 1 3 0 3 2 3 0 3 3 3 1 2 0 3 1 2 1 3 1 2 2 3 1 2 3 3 1 3 0 3 1 3 1 3 1 3 2 3 1 3 3 3 2 2 0 3 2 2 1 3 2 2 2 3 2 2 3 3 2 3 0 3 2 3 1 3 2 3 2 3 2 3 3 3 3 2 0 3 3 2 1 3 3 2 2 3 3 2 3 3 3 3 0 3 3 3 1 3 3 3 2 3 3 3 3 3 4 2 0 3 4 2 1 3 4 2 2 3 4 2 3 3 4 3 0 3 4 3 1 3 4 3 2 3 4 3 3 3 5 2 0 3 5 2 1 3 5 2 2 3 5 2 3 3 5 3 0 3 5 3 1 3 5 3 2 3 5 3 3 3 6 2 0 3 6 2 1 3 6 2 2 3 6 2 3 3 6 3 0 3 6 3 1 3 6 3 2 3 6 3 3 3 7 2 0 3 7 2 1 3 7 2 2 3 7 2 3 3 7 3 0 3 7 3 1 3 7 3 2 3 7 3 3 4 0 2 0 4 0 2 1 4 0 2 2 4 0 2 3 4 0 3 0 4 0 3 1 4 0 3 2 4 0 3 3 4 1 2 0 4 1 2 1 4 1 2 2 4 1 2 3 4 1 3 0 4 1 3 1 4 1 3 2 4 1 3 3 4 2 2 0 4 2 2 1 4 2 2 2 4 2 2 3 4 2 3 0 4 2 3 1 4 2 3 2 4 2 3 3 4 3 2 0 4 3 2 1 4 3 2 2 4 3 2 3 4 3 3 0 4 3 3 1 4 3 3 2 4 3 3 3 4 4 2 0 4 4 2 1 4 4 2 2 4 4 2 3 4 4 3 0 4 4 3 1 4 4 3 2 4 4 3 3 4 5 2 0 4 5 2 1 4 5 2 2 4 5 2 3 4 5 3 0 4 5 3 1 4 5 3 2 4 5 3 3 4 6 2 0 4 6 2 1 4 6 2 2 4 6 2 3 4 6 3 0 4 6 3 1 4 6 3 2 4 6 3 3 4 7 2 0 4 7 2 1 4 7 2 2 4 7 2 3 4 7 3 0 4 7 3 1 4 7 3 2 4 7 3 3 5 0 2 0 5 0 2 1 5 0 2 2 5 0 2 3 5 0 3 0 5 0 3 1 5 0 3 2 5 0 3 3 5 1 2 0 5 1 2 1 5 1 2 2 5 1 2 3 5 1 3 0 5 1 3 1 5 1 3 2 5 1 3 3 5 2 2 0 5 2 2 1 5 2 2 2 5 2 2 3 5 2 3 0 5 2 3 1 5 2 3 2 5 2 3 3 5 3 2 0 5 3 2 1 5 3 2 2 5 3 2 3 5 3 3 0 5 3 3 1 5 3 3 2 5 3 3 3 5 4 2 0 5 4 2 1 5 4 2 2 5 4 2 3 5 4 3 0 5 4 3 1 5 4 3 2 5 4 3 3 5 5 2 0 5 5 2 1 5 5 2 2 5 5 2 3 5 5 3 0 5 5 3 1 5 5 3 2 5 5 3 3 5 6 2 0 5 6 2 1 5 6 2 2 5 6 2 3 5 6 3 0 5 6 3 1 5 6 3 2 5 6 3 3 5 7 2 0 5 7 2 1 5 7 2 2 5 7 2 3 5 7 3 0 5 7 3 1 5 7 3 2 5 7 3 3 6 0 2 0 6 0 2 1 6 0 2 2 6 0 2 3 6 0 3 0 6 0 3 1 6 0 3 2 6 0 3 3 6 1 2 0 6 1 2 1 6 1 2 2 6 1 2 3 6 1 3 0 6 1 3 1 6 1 3 2 6 1 3 3 6 2 2 0 6 2 2 1 6 2 2 2 6 2 2 3 6 2 3 0 6 2 3 1 6 2 3 2 6 2 3 3 6 3 2 0 6 3 2 1 6 3 2 2 6 3 2 3 6 3 3 0 6 3 3 1 6 3 3 2 6 3 3 3 6 4 2 0 6 4 2 1 6 4 2 2 6 4 2 3 6 4 3 0 6 4 3 1 6 4 3 2 6 4 3 3 6 5 2 0 6 5 2 1 6 5 2 2 6 5 2 3 6 5 3 0 6 5 3 1 6 5 3 2 6 5 3 3 6 6 2 0 6 6 2 1 6 6 2 2 6 6 2 3 6 6 3 0 6 6 3 1 6 6 3 2 6 6 3 3 6 7 2 0 6 7 2 1 6 7 2 2 6 7 2 3 6 7 3 0 6 7 3 1 6 7 3 2 6 7 3 3 7 0 2 0 7 0 2 1 7 0 2 2 7 0 2 3 7 0 3 0 7 0 3 1 7 0 3 2 7 0 3 3 7 1 2 0 7 1 2 1 7 1 2 2 7 1 2 3 7 1 3 0 7 1 3 1 7 1 3 2 7 1 3 3 7 2 2 0 7 2 2 1 7 2 2 2 7 2 2 3 7 2 3 0 7 2 3 1 7 2 3 2 7 2 3 3 7 3 2 0 7 3 2 1 7 3 2 2 7 3 2 3 7 3 3 0 7 3 3 1 7 3 3 2 7 3 3 3 7 4 2 0 7 4 2 1 7 4 2 2 7 4 2 3 7 4 3 0 7 4 3 1 7 4 3 2 7 4 3 3 7 5 2 0 7 5 2 1 7 5 2 2 7 5 2 3 7 5 3 0 7 5 3 1 7 5 3 2 7 5 3 3 7 6 2 0 7 6 2 1 7 6 2 2 7 6 2 3 7 6 3 0 7 6 3 1 7 6 3 2 7 6 3 3 7 7 2 0 7 7 2 1 7 7 2 2 7 7 2 3 7 7 3 0 7 7 3 1 7 7 3 2 7 7 3 3 0 0 4 0 0 0 4 1 0 0 4 2 0 0 4 3 0 0 5 0 0 0 5 1 0 0 5 2 0 0 5 3 0 1 4 0 0 1 4 1 0 1 4 2 0 1 4 3 0 1 5 0 0 1 5 1 0 1 5 2 0 1 5 3 0 2 4 0 0 2 4 1 0 2 4 2 0 2 4 3 0 2 5 0 0 2 5 1 0 2 5 2 0 2 5 3 0 3 4 0 0 3 4 1 0 3 4 2 0 3 4 3 0 3 5 0 0 3 5 1 0 3 5 2 0 3 5 3 0 4 4 0 0 4 4 1 0 4 4 2 0 4 4 3 0 4 5 0 0 4 5 1 0 4 5 2 0 4 5 3 0 5 4 0 0 5 4 1 0 5 4 2 0 5 4 3 0 5 5 0 0 5 5 1 0 5 5 2 0 5 5 3 0 6 4 0 0 6 4 1 0 6 4 2 0 6 4 3 0 6 5 0 0 6 5 1 0 6 5 2 0 6 5 3 0 7 4 0 0 7 4 1 0 7 4 2 0 7 4 3 0 7 5 0 0 7 5 1 0 7 5 2 0 7 5 3 1 0 4 0 1 0 4 1 1 0 4 2 1 0 4 3 1 0 5 0 1 0 5 1 1 0 5 2 1 0 5 3 1 1 4 0 1 1 4 1 1 1 4 2 1 1 4 3 1 1 5 0 1 1 5 1 1 1 5 2 1 1 5 3 1 2 4 0 1 2 4 1 1 2 4 2 1 2 4 3 1 2 5 0 1 2 5 1 1 2 5 2 1 2 5 3 1 3 4 0 1 3 4 1 1 3 4 2 1 3 4 3 1 3 5 0 1 3 5 1 1 3 5 2 1 3 5 3 1 4 4 0 1 4 4 1 1 4 4 2 1 4 4 3 1 4 5 0 1 4 5 1 1 4 5 2 1 4 5 3 1 5 4 0 1 5 4 1 1 5 4 2 1 5 4 3 1 5 5 0 1 5 5 1 1 5 5 2 1 5 5 3 1 6 4 0 1 6 4 1 1 6 4 2 1 6 4 3 1 6 5 0 1 6 5 1 1 6 5 2 1 6 5 3 1 7 4 0 1 7 4 1 1 7 4 2 1 7 4 3 1 7 5 0 1 7 5 1 1 7 5 2 1 7 5 3 2 0 4 0 2 0 4 1 2 0 4 2 2 0 4 3 2 0 5 0 2 0 5 1 2 0 5 2 2 0 5 3 2 1 4 0 2 1 4 1 2 1 4 2 2 1 4 3 2 1 5 0 2 1 5 1 2 1 5 2 2 1 5 3 2 2 4 0 2 2 4 1 2 2 4 2 2 2 4 3 2 2 5 0 2 2 5 1 2 2 5 2 2 2 5 3 2 3 4 0 2 3 4 1 2 3 4 2 2 3 4 3 2 3 5 0 2 3 5 1 2 3 5 2 2 3 5 3 2 4 4 0 2 4 4 1 2 4 4 2 2 4 4 3 2 4 5 0 2 4 5 1 2 4 5 2 2 4 5 3 2 5 4 0 2 5 4 1 2 5 4 2 2 5 4 3 2 5 5 0 2 5 5 1 2 5 5 2 2 5 5 3 2 6 4 0 2 6 4 1 2 6 4 2 2 6 4 3 2 6 5 0 2 6 5 1 2 6 5 2 2 6 5 3 2 7 4 0 2 7 4 1 2 7 4 2 2 7 4 3 2 7 5 0 2 7 5 1 2 7 5 2 2 7 5 3 3 0 4 0 3 0 4 1 3 0 4 2 3 0 4 3 3 0 5 0 3 0 5 1 3 0 5 2 3 0 5 3 3 1 4 0 3 1 4 1 3 1 4 2 3 1 4 3 3 1 5 0 3 1 5 1 3 1 5 2 3 1 5 3 3 2 4 0 3 2 4 1 3 2 4 2 3 2 4 3 3 2 5 0 3 2 5 1 3 2 5 2 3 2 5 3 3 3 4 0 3 3 4 1 3 3 4 2 3 3 4 3 3 3 5 0 3 3 5 1 3 3 5 2 3 3 5 3 3 4 4 0 3 4 4 1 3 4 4 2 3 4 4 3 3 4 5 0 3 4 5 1 3 4 5 2 3 4 5 3 3 5 4 0 3 5 4 1 3 5 4 2 3 5 4 3 3 5 5 0 3 5 5 1 3 5 5 2 3 5 5 3 3 6 4 0 3 6 4 1 3 6 4 2 3 6 4 3 3 6 5 0 3 6 5 1 3 6 5 2 3 6 5 3 3 7 4 0 3 7 4 1 3 7 4 2 3 7 4 3 3 7 5 0 3 7 5 1 3 7 5 2 3 7 5 3 4 0 4 0 4 0 4 1 4 0 4 2 4 0 4 3 4 0 5 0 4 0 5 1 4 0 5 2 4 0 5 3 4 1 4 0 4 1 4 1 4 1 4 2 4 1 4 3 4 1 5 0 4 1 5 1 4 1 5 2 4 1 5 3 4 2 4 0 4 2 4 1 4 2 4 2 4 2 4 3 4 2 5 0 4 2 5 1 4 2 5 2 4 2 5 3 4 3 4 0 4 3 4 1 4 3 4 2 4 3 4 3 4 3 5 0 4 3 5 1 4 3 5 2 4 3 5 3 4 4 4 0 4 4 4 1 4 4 4 2 4 4 4 3 4 4 5 0 4 4 5 1 4 4 5 2 4 4 5 3 4 5 4 0 4 5 4 1 4 5 4 2 4 5 4 3 4 5 5 0 4 5 5 1 4 5 5 2 4 5 5 3 4 6 4 0 4 6 4 1 4 6 4 2 4 6 4 3 4 6 5 0 4 6 5 1 4 6 5 2 4 6 5 3 4 7 4 0 4 7 4 1 4 7 4 2 4 7 4 3 4 7 5 0 4 7 5 1 4 7 5 2 4 7 5 3 5 0 4 0 5 0 4 1 5 0 4 2 5 0 4 3 5 0 5 0 5 0 5 1 5 0 5 2 5 0 5 3 5 1 4 0 5 1 4 1 5 1 4 2 5 1 4 3 5 1 5 0 5 1 5 1 5 1 5 2 5 1 5 3 5 2 4 0 5 2 4 1 5 2 4 2 5 2 4 3 5 2 5 0 5 2 5 1 5 2 5 2 5 2 5 3 5 3 4 0 5 3 4 1 5 3 4 2 5 3 4 3 5 3 5 0 5 3 5 1 5 3 5 2 5 3 5 3 5 4 4 0 5 4 4 1 5 4 4 2 5 4 4 3 5 4 5 0 5 4 5 1 5 4 5 2 5 4 5 3 5 5 4 0 5 5 4 1 5 5 4 2 5 5 4 3 5 5 5 0 5 5 5 1 5 5 5 2 5 5 5 3 5 6 4 0 5 6 4 1 5 6 4 2 5 6 4 3 5 6 5 0 5 6 5 1 5 6 5 2 5 6 5 3 5 7 4 0 5 7 4 1 5 7 4 2 5 7 4 3 5 7 5 0 5 7 5 1 5 7 5 2 5 7 5 3 6 0 4 0 6 0 4 1 6 0 4 2 6 0 4 3 6 0 5 0 6 0 5 1 6 0 5 2 6 0 5 3 6 1 4 0 6 1 4 1 6 1 4 2 6 1 4 3 6 1 5 0 6 1 5 1 6 1 5 2 6 1 5 3 6 2 4 0 6 2 4 1 6 2 4 2 6 2 4 3 6 2 5 0 6 2 5 1 6 2 5 2 6 2 5 3 6 3 4 0 6 3 4 1 6 3 4 2 6 3 4 3 6 3 5 0 6 3 5 1 6 3 5 2 6 3 5 3 6 4 4 0 6 4 4 1 6 4 4 2 6 4 4 3 6 4 5 0 6 4 5 1 6 4 5 2 6 4 5 3 6 5 4 0 6 5 4 1 6 5 4 2 6 5 4 3 6 5 5 0 6 5 5 1 6 5 5 2 6 5 5 3 6 6 4 0 6 6 4 1 6 6 4 2 6 6 4 3 6 6 5 0 6 6 5 1 6 6 5 2 6 6 5 3 6 7 4 0 6 7 4 1 6 7 4 2 6 7 4 3 6 7 5 0 6 7 5 1 6 7 5 2 6 7 5 3 7 0 4 0 7 0 4 1 7 0 4 2 7 0 4 3 7 0 5 0 7 0 5 1 7 0 5 2 7 0 5 3 7 1 4 0 7 1 4 1 7 1 4 2 7 1 4 3 7 1 5 0 7 1 5 1 7 1 5 2 7 1 5 3 7 2 4 0 7 2 4 1 7 2 4 2 7 2 4 3 7 2 5 0 7 2 5 1 7 2 5 2 7 2 5 3 7 3 4 0 7 3 4 1 7 3 4 2 7 3 4 3 7 3 5 0 7 3 5 1 7 3 5 2 7 3 5 3 7 4 4 0 7 4 4 1 7 4 4 2 7 4 4 3 7 4 5 0 7 4 5 1 7 4 5 2 7 4 5 3 7 5 4 0 7 5 4 1 7 5 4 2 7 5 4 3 7 5 5 0 7 5 5 1 7 5 5 2 7 5 5 3 7 6 4 0 7 6 4 1 7 6 4 2 7 6 4 3 7 6 5 0 7 6 5 1 7 6 5 2 7 6 5 3 7 7 4 0 7 7 4 1 7 7 4 2 7 7 4 3 7 7 5 0 7 7 5 1 7 7 5 2 7 7 5 3 0 0 6 0 0 0 6 1 0 0 6 2 0 0 6 3 0 0 7 0 0 0 7 1 0 0 7 2 0 0 7 3 0 1 6 0 0 1 6 1 0 1 6 2 0 1 6 3 0 1 7 0 0 1 7 1 0 1 7 2 0 1 7 3 0 2 6 0 0 2 6 1 0 2 6 2 0 2 6 3 0 2 7 0 0 2 7 1 0 2 7 2 0 2 7 3 0 3 6 0 0 3 6 1 0 3 6 2 0 3 6 3 0 3 7 0 0 3 7 1 0 3 7 2 0 3 7 3 0 4 6 0 0 4 6 1 0 4 6 2 0 4 6 3 0 4 7 0 0 4 7 1 0 4 7 2 0 4 7 3 0 5 6 0 0 5 6 1 0 5 6 2 0 5 6 3 0 5 7 0 0 5 7 1 0 5 7 2 0 5 7 3 0 6 6 0 0 6 6 1 0 6 6 2 0 6 6 3 0 6 7 0 0 6 7 1 0 6 7 2 0 6 7 3 0 7 6 0 0 7 6 1 0 7 6 2 0 7 6 3 0 7 7 0 0 7 7 1 0 7 7 2 0 7 7 3 1 0 6 0 1 0 6 1 1 0 6 2 1 0 6 3 1 0 7 0 1 0 7 1 1 0 7 2 1 0 7 3 1 1 6 0 1 1 6 1 1 1 6 2 1 1 6 3 1 1 7 0 1 1 7 1 1 1 7 2 1 1 7 3 1 2 6 0 1 2 6 1 1 2 6 2 1 2 6 3 1 2 7 0 1 2 7 1 1 2 7 2 1 2 7 3 1 3 6 0 1 3 6 1 1 3 6 2 1 3 6 3 1 3 7 0 1 3 7 1 1 3 7 2 1 3 7 3 1 4 6 0 1 4 6 1 1 4 6 2 1 4 6 3 1 4 7 0 1 4 7 1 1 4 7 2 1 4 7 3 1 5 6 0 1 5 6 1 1 5 6 2 1 5 6 3 1 5 7 0 1 5 7 1 1 5 7 2 1 5 7 3 1 6 6 0 1 6 6 1 1 6 6 2 1 6 6 3 1 6 7 0 1 6 7 1 1 6 7 2 1 6 7 3 1 7 6 0 1 7 6 1 1 7 6 2 1 7 6 3 1 7 7 0 1 7 7 1 1 7 7 2 1 7 7 3 2 0 6 0 2 0 6 1 2 0 6 2 2 0 6 3 2 0 7 0 2 0 7 1 2 0 7 2 2 0 7 3 2 1 6 0 2 1 6 1 2 1 6 2 2 1 6 3 2 1 7 0 2 1 7 1 2 1 7 2 2 1 7 3 2 2 6 0 2 2 6 1 2 2 6 2 2 2 6 3 2 2 7 0 2 2 7 1 2 2 7 2 2 2 7 3 2 3 6 0 2 3 6 1 2 3 6 2 2 3 6 3 2 3 7 0 2 3 7 1 2 3 7 2 2 3 7 3 2 4 6 0 2 4 6 1 2 4 6 2 2 4 6 3 2 4 7 0 2 4 7 1 2 4 7 2 2 4 7 3 2 5 6 0 2 5 6 1 2 5 6 2 2 5 6 3 2 5 7 0 2 5 7 1 2 5 7 2 2 5 7 3 2 6 6 0 2 6 6 1 2 6 6 2 2 6 6 3 2 6 7 0 2 6 7 1 2 6 7 2 2 6 7 3 2 7 6 0 2 7 6 1 2 7 6 2 2 7 6 3 2 7 7 0 2 7 7 1 2 7 7 2 2 7 7 3 3 0 6 0 3 0 6 1 3 0 6 2 3 0 6 3 3 0 7 0 3 0 7 1 3 0 7 2 3 0 7 3 3 1 6 0 3 1 6 1 3 1 6 2 3 1 6 3 3 1 7 0 3 1 7 1 3 1 7 2 3 1 7 3 3 2 6 0 3 2 6 1 3 2 6 2 3 2 6 3 3 2 7 0 3 2 7 1 3 2 7 2 3 2 7 3 3 3 6 0 3 3 6 1 3 3 6 2 3 3 6 3 3 3 7 0 3 3 7 1 3 3 7 2 3 3 7 3 3 4 6 0 3 4 6 1 3 4 6 2 3 4 6 3 3 4 7 0 3 4 7 1 3 4 7 2 3 4 7 3 3 5 6 0 3 5 6 1 3 5 6 2 3 5 6 3 3 5 7 0 3 5 7 1 3 5 7 2 3 5 7 3 3 6 6 0 3 6 6 1 3 6 6 2 3 6 6 3 3 6 7 0 3 6 7 1 3 6 7 2 3 6 7 3 3 7 6 0 3 7 6 1 3 7 6 2 3 7 6 3 3 7 7 0 3 7 7 1 3 7 7 2 3 7 7 3 4 0 6 0 4 0 6 1 4 0 6 2 4 0 6 3 4 0 7 0 4 0 7 1 4 0 7 2 4 0 7 3 4 1 6 0 4 1 6 1 4 1 6 2 4 1 6 3 4 1 7 0 4 1 7 1 4 1 7 2 4 1 7 3 4 2 6 0 4 2 6 1 4 2 6 2 4 2 6 3 4 2 7 0 4 2 7 1 4 2 7 2 4 2 7 3 4 3 6 0 4 3 6 1 4 3 6 2 4 3 6 3 4 3 7 0 4 3 7 1 4 3 7 2 4 3 7 3 4 4 6 0 4 4 6 1 4 4 6 2 4 4 6 3 4 4 7 0 4 4 7 1 4 4 7 2 4 4 7 3 4 5 6 0 4 5 6 1 4 5 6 2 4 5 6 3 4 5 7 0 4 5 7 1 4 5 7 2 4 5 7 3 4 6 6 0 4 6 6 1 4 6 6 2 4 6 6 3 4 6 7 0 4 6 7 1 4 6 7 2 4 6 7 3 4 7 6 0 4 7 6 1 4 7 6 2 4 7 6 3 4 7 7 0 4 7 7 1 4 7 7 2 4 7 7 3 5 0 6 0 5 0 6 1 5 0 6 2 5 0 6 3 5 0 7 0 5 0 7 1 5 0 7 2 5 0 7 3 5 1 6 0 5 1 6 1 5 1 6 2 5 1 6 3 5 1 7 0 5 1 7 1 5 1 7 2 5 1 7 3 5 2 6 0 5 2 6 1 5 2 6 2 5 2 6 3 5 2 7 0 5 2 7 1 5 2 7 2 5 2 7 3 5 3 6 0 5 3 6 1 5 3 6 2 5 3 6 3 5 3 7 0 5 3 7 1 5 3 7 2 5 3 7 3 5 4 6 0 5 4 6 1 5 4 6 2 5 4 6 3 5 4 7 0 5 4 7 1 5 4 7 2 5 4 7 3 5 5 6 0 5 5 6 1 5 5 6 2 5 5 6 3 5 5 7 0 5 5 7 1 5 5 7 2 5 5 7 3 5 6 6 0 5 6 6 1 5 6 6 2 5 6 6 3 5 6 7 0 5 6 7 1 5 6 7 2 5 6 7 3 5 7 6 0 5 7 6 1 5 7 6 2 5 7 6 3 5 7 7 0 5 7 7 1 5 7 7 2 5 7 7 3 6 0 6 0 6 0 6 1 6 0 6 2 6 0 6 3 6 0 7 0 6 0 7 1 6 0 7 2 6 0 7 3 6 1 6 0 6 1 6 1 6 1 6 2 6 1 6 3 6 1 7 0 6 1 7 1 6 1 7 2 6 1 7 3 6 2 6 0 6 2 6 1 6 2 6 2 6 2 6 3 6 2 7 0 6 2 7 1 6 2 7 2 6 2 7 3 6 3 6 0 6 3 6 1 6 3 6 2 6 3 6 3 6 3 7 0 6 3 7 1 6 3 7 2 6 3 7 3 6 4 6 0 6 4 6 1 6 4 6 2 6 4 6 3 6 4 7 0 6 4 7 1 6 4 7 2 6 4 7 3 6 5 6 0 6 5 6 1 6 5 6 2 6 5 6 3 6 5 7 0 6 5 7 1 6 5 7 2 6 5 7 3 6 6 6 0 6 6 6 1 6 6 6 2 6 6 6 3 6 6 7 0 6 6 7 1 6 6 7 2 6 6 7 3 6 7 6 0 6 7 6 1 6 7 6 2 6 7 6 3 6 7 7 0 6 7 7 1 6 7 7 2 6 7 7 3 7 0 6 0 7 0 6 1 7 0 6 2 7 0 6 3 7 0 7 0 7 0 7 1 7 0 7 2 7 0 7 3 7 1 6 0 7 1 6 1 7 1 6 2 7 1 6 3 7 1 7 0 7 1 7 1 7 1 7 2 7 1 7 3 7 2 6 0 7 2 6 1 7 2 6 2 7 2 6 3 7 2 7 0 7 2 7 1 7 2 7 2 7 2 7 3 7 3 6 0 7 3 6 1 7 3 6 2 7 3 6 3 7 3 7 0 7 3 7 1 7 3 7 2 7 3 7 3 7 4 6 0 7 4 6 1 7 4 6 2 7 4 6 3 7 4 7 0 7 4 7 1 7 4 7 2 7 4 7 3 7 5 6 0 7 5 6 1 7 5 6 2 7 5 6 3 7 5 7 0 7 5 7 1 7 5 7 2 7 5 7 3 7 6 6 0 7 6 6 1 7 6 6 2 7 6 6 3 7 6 7 0 7 6 7 1 7 6 7 2 7 6 7 3 7 7 6 0 7 7 6 1 7 7 6 2 7 7 6 3 7 7 7 0 7 7 7 1 7 7 7 2 7 7 7 3 gpaw-24.1.0/doc/devel/Au_cluster/README000066400000000000000000000016621454550013000173700ustar00rootroot00000000000000N. A. Romero naromero@alcf.anl.gov April 7, 2010 Summary ------- The benefits of appropriate mapping are demonstrated on Argonne National Laboratory's BlueGene/P Intrepid computer using the Au_cluster.py test case. Mapfile names reflect parallelization scheme. For example, BGMAP_band_XxYxZxB means that one should use --domain-decomposition=X,Y,Z, --state-parallelization=B Note that the value of Matrix.nblocks was change to keep the message size approximately constant between the calculations at 512 and 1024-nodes: Matrix.nblocks = 16 for state-parallelization=4 Matrix.nblocks = 8 for state-parallelization=8 At a 512-node partition (mid-plane), we see no difference between the two mapping types: band and domain. Note that band mode Mapfile is equivalent to use MAPPING=ZYXT for this case! At a 1024-node partition (1-rack), we see that there is a large difference. We need to be using band mode for ground-state DFT calculations. gpaw-24.1.0/doc/devel/Au_cluster/akka.sh000077500000000000000000000010041454550013000177440ustar00rootroot00000000000000#!/bin/bash ### SNAC project number, enter if applicable. ### NOTE! No spaces or slashes allowed #PBS -A HPC2N-2008-005 ### Requesting 64 nodes with 8 VP:s on each node #PBS -l nodes=64:ppn=8 ### Requesting time - 40 minutes #PBS -l walltime=00:40:00 # Change to Working Directory cd $PBS_O_WORKDIR module add openmpi/1.2.6/gcc gpawhome=${HOME}/gpaw export PYTHONPATH=${gpawhome}:${PYTHONPATH} mpiexec ${gpawhome}/build/bin.linux-x86_64-2.4/gpaw-python ../Au_cluster.py --sl_diagonalize=5,5,64 --gpaw=usenewlfc=1 gpaw-24.1.0/doc/devel/Au_cluster/intrepid.sh000077500000000000000000000014731454550013000206650ustar00rootroot00000000000000type=Au_cluster cwd=`pwd` acct=Gpaw queue=prod time=90 nodes=512 mode=vn mapping=ZYXT # mapfile=BGMAP_128_4x4x4x8 # mapping=$mapfile job=${type}_${nodes}_${mode}_${mapping} input=${type}.py pos=Au102_revised.xyz scratch=/intrepid-fs0/users/${USER}/persistent install=/soft/apps rm -rf $scratch/$job mkdir $scratch/$job cp $input $scratch/$job # cp $mapfile $scratch/$job cp $pos $scratch/$job cd $scratch/$job qsub -A $acct -n $nodes -t $time -q $queue --mode $mode --env BG_MAPPING=$mapping:MPIRUN_ENABLE_TTY_REPORTING=0:OMP_NUM_THREADS=1:GPAW_SETUP_PATH=$GPAW_SETUP_PATH:PYTHONPATH=${install}/gpaw-r6000:${install}/ase-r1438:$PYTHONPATH:LD_LIBRARY_PATH=$CN_LD_LIBRARY_PATH ${install}/gpaw-r6000/build/bin.linux-ppc64-2.6/gpaw-python ${type}.py --domain-decomposition=8,8,8 --state-parallelization=4 --sl_diagonalize=5,5,64 gpaw-24.1.0/doc/devel/Au_cluster/prepare.sh000077500000000000000000000014611454550013000205020ustar00rootroot00000000000000#!/bin/bash FORMAT=1 setFORMAT () { # function setFORMAT takes integer as the argument $1 # and returns integer in the format %05d or %d (printf' like format) # depending on the FORMAT variable (1 or 0) if [ ${FORMAT} -eq "1" ]; then integer_formatted=`echo $1 | awk '{if ($1<10) printf("0000%.0f", $1); else if ($1<100) printf("000%.0f", $1); else if ($1<1000) printf("00%.0f", $1); else if ($1<10000) printf("0%.0f", $1); else printf("%.0f", $1)}'` else integer_formatted=$1 fi echo $integer_formatted } if test -z $PATTERN; then echo "Error: no directory pattern provided" exit fi for p in 256 512 1024 2048 4096 do proc=`setFORMAT $p` dir="${PATTERN}_${proc}_" if [ ! -d "${dir}" ]; then mkdir ${dir} echo "${dir} created" fi done gpaw-24.1.0/doc/devel/ase_optimize/000077500000000000000000000000001454550013000170655ustar00rootroot00000000000000gpaw-24.1.0/doc/devel/ase_optimize/agts.py000066400000000000000000000010071454550013000203730ustar00rootroot00000000000000# web-page: systems.db from ase.optimize.test.test import all_optimizers from ase.optimize.test.systems import create_database def workflow(): from myqueue.workflow import run with run(function=create_database): runs = [run(script='run_tests_emt.py')] for name in all_optimizers: if name == 'Berny': continue runs.append(run(script='run_tests.py', args=[name], cores=8, tmax='1d')) run(script='analyze.py', deps=runs) gpaw-24.1.0/doc/devel/ase_optimize/analyze.py000066400000000000000000000006041454550013000211020ustar00rootroot00000000000000# web-page: emt-iterations.csv, lcao-time.csv, systems.csv import ase.db from ase.optimize.test.analyze import analyze analyze('results-emt.db', 'emt') analyze('results-lcao.db', 'lcao') db1 = ase.db.connect('systems.db') with open('systems.csv', 'w') as f: print('test-name,description', file=f) for row in db1.select(): print(f'{row.name},{row.description}', file=f) gpaw-24.1.0/doc/devel/ase_optimize/ase_optimize.rst000066400000000000000000000022751454550013000223150ustar00rootroot00000000000000.. _optimizer_tests: =============== Optimizer tests =============== This page shows benchmarks of optimizations done with ASE's different :mod:`optimizers `. Note that the iteration number (steps) is not the same as the number of force evaluations. This is because some of the optimizers uses internal line searches or similar. The most important performance characteristics of an optimizer is the total optimization time. Different optimizers may perform the same number of steps, but along a different path, so the time spent on calculation of energy/forces may be different due to different convergence of the self-consistent field. Test systems ============ These are the test systems (:download:`systems.db`): .. csv-table:: :file: systems.csv :header-rows: 1 EMT calculations ================ Calculation done with :class:`~ase.calculators.emt.EMT`. Number of steps: .. csv-table:: :file: emt-iterations.csv :header-rows: 1 GPAW-LCAO calculations ====================== Parameters:: GPAW(mode='lcao', basis='dzp', kpts={'density': 2.0}) Absolute time relative to fastest optimizer: .. csv-table:: :file: lcao-time.csv :header-rows: 1 gpaw-24.1.0/doc/devel/ase_optimize/run_tests.py000066400000000000000000000012161454550013000214650ustar00rootroot00000000000000import sys import ase.db from ase.optimize.test.test import (test_optimizer, all_optimizers, get_optimizer) from gpaw import GPAW db1 = ase.db.connect('systems.db') db = ase.db.connect('results-lcao.db') def lcao(txt): return GPAW(mode='lcao', basis='dzp', kpts={'density': 2.0}, txt=txt) systems = [(row.name, row.toatoms()) for row in db1.select()] if len(sys.argv) > 1: optimizers = sys.argv[1:] else: optimizers = all_optimizers for opt in optimizers: optimizer = get_optimizer(opt) test_optimizer(systems, optimizer, lcao, 'lcao-', db) gpaw-24.1.0/doc/devel/ase_optimize/run_tests_emt.py000066400000000000000000000010041454550013000223250ustar00rootroot00000000000000import ase.db from ase.calculators.emt import EMT from ase.optimize.test.test import (test_optimizer, all_optimizers, get_optimizer) db1 = ase.db.connect('systems.db') db = ase.db.connect('results-emt.db', serial=True) systems = [(row.name, row.toatoms()) for row in db1.select() if row.formula != 'C5H12'] for opt in all_optimizers: if opt == 'Berny': continue optimizer = get_optimizer(opt) test_optimizer(systems, optimizer, EMT, 'emt-', db) gpaw-24.1.0/doc/devel/bugs.rst000066400000000000000000000053251454550013000160740ustar00rootroot00000000000000======================= Bugs in the latest GPAW ======================= See here: :ref:`bugs` ------------------ Handling segfaults ------------------ Segmentation faults are probably the hardest type of runtime error to track down, but they are also quite common during the *unstable* part of the release cycle. As a rule of thumb, if you get a segfault, start by checking that all array arguments passed from Python to C functions have the correct shapes and types. Apart from running GPAW in :ref:`debug mode `, please familiarize yourself with the :ref:`debugging tools ` for the Python and C code. If you experience segfaults or unexplained MPI crashes when running GPAW in parallel, it is recommended to try a :ref:`custom installation ` with a debugging flag in ``siteconfig.py``:: define_macros += [('GPAW_MPI_DEBUG', 1)] ---------------------- Common sources of bugs ---------------------- * General: - Elements of NumPy arrays are C ordered, BLAS and LAPACK routines expect Fortran ordering. * Python: - Always give contiguous arrays to C functions. If ``x`` is contiguous with ``dtype=complex``, then ``x.real`` is non-contiguous of ``dtype=float``. - Giving array arguments to a function is a *carte blanche* to alter the data:: def double(a): a *= 2 return a x = np.ones(5) print(double(x)) # x[:] is now 2. - Forgetting a ``n += 1`` statement in a for loop:: n = 0 for thing in things: thing.do_stuff(n) n += 1 Use this instead:: for n, thing in enumerate(things): thing.do_stuff(n) - Indentation errors like this one:: if ok: x = 1.0 else: x = 0.5 do_stuff(x) where ``do_stuff(x)`` should have been reached in both cases. Emacs: always use ``C-c >`` and ``C-c <`` for shifting in and out blocks of code (mark the block first). - Don't use mutables as default values:: class A: def __init__(self, a=[]): self.a = a # all instances get the same list! - There are subtle differences between ``x == y`` and ``x is y``. - If ``H`` is a numeric array, then ``H - x`` will subtract ``x`` from *all* elements - not only the diagonal, as in Matlab! * C: - Try building GPAW from scratch. - Typos like ``if (x = 0)`` which should have been ``if (x == 0)``. - Remember ``break`` in switch-case statements. - Check ``malloc-free`` pairs. Test for :ref:`memory leaks ` by repeating the call many times. - Remember to update reference counts of Python objects. - *Never* put function calls inside ``assert``'s. Compiling with ``-DNDEBUG`` will remove the call. gpaw-24.1.0/doc/devel/c_extension.rst000066400000000000000000000056151454550013000174540ustar00rootroot00000000000000.. _c_extension: ============ C extensions ============ The GPAW Python code makes use of some compiled C code in the dynamically linked extension ``_gpaw.so``. In the following it is demonstrated how a C function is made available to Python through a Python extension (more details can be found in the official `Python documentation`_. The wrapper code from ``c/blas.c`` shows how to wrap the two BLAS functions ``daxpy`` and ``zaxpy`` in Python:: PyObject* axpy(PyObject *self, PyObject *args) { PyObject* alpha; PyArrayObject* x; PyArrayObject* y; if (!PyArg_ParseTuple(args, "OOO", &alpha, &x, &y)) return NULL; integer n = x->dimensions[0]; for (int d = 1; d < x->nd; d++) n *= x->dimensions[d]; integer incx = 1; integer incy = 1; if (PyFloat_Check(alpha)) { PyFloatObject* palpha = (PyFloatObject*)alpha; daxpy_(&n, &(palpha->ob_fval), DOUBLEP(x), &incx, DOUBLEP(y), &incy); } else { PyComplexObject* palpha = (PyComplexObject*)alpha; zaxpy_(&n, (doublecomplex*)(&(palpha->cval)), (doublecomplex*)COMPLEXP(x), &incx, (doublecomplex*)COMPLEXP(y), &incy); } Py_RETURN_NONE; } In ``c/_gpaw.h`` and ``c/_gpaw_so.c``, we find:: static PyMethodDef functions[] = { {"axpy", axpy, METH_VARARGS, 0}, {0, 0, 0, 0} }; DL_EXPORT(void) init_gpaw(void) { PyObject* m = Py_InitModule3("_gpaw", functions, doc); if (m == NULL) return; import_array(); } We could use the C extension code directly as:: import numpy as np import _gpaw a = 2.7 x = np.array([1.1, 1.2, 1.3]) y = np.zeros(3) _gpaw.axpy(a, x, y) Instead, we wrap the code in a Python function ``axpy`` in the file ``gpaw/utilities/blas.py``:: def axpy(alpha, x, y): assert x.shape == y.shape assert x.flags.contiguous and y.flags.contiguous assert x.dtype == y.dtype if isinstance(alpha, complex): assert x.dtype == complex else: assert isinstance(alpha, float) _gpaw.axpy(alpha, x, y) if not debug: axpy = _gpaw.axpy The Python ``axpy`` function takes care of all value and type checking of the arguments to the function. There is therefore no need to do those checks in the C code (where it would be much more cumbersome to code). If the code is run in production mode (``debug == False``, default), then the Python wrapper function is bypassed for calls to the function. Versioning of the C-code ======================== Please update the version number in :git:`c/_gpaw.h` and :git:`gpaw/__init__.py` whenever changes are made that will make the Python code not work with the old C-code. Version numbers: 1) Jun 29, 2022: Compatibilty with numpy-1.23 (:mr:`1181`). 2) Sep 7, 2022: Remove old spherical harmonics C-code and add `\ell=7` functions (:mr:`914`) 3) ... .. _Python documentation: http://docs.python.org/extending/index.html gpaw-24.1.0/doc/devel/code_count.py000066400000000000000000000046641454550013000171030ustar00rootroot00000000000000# creates: lines.png import datetime as dt import os import subprocess import numpy as np import pylab as pl def count(dir, pattern): if not os.path.isdir(dir): return 0 files = subprocess.check_output(f'find {dir} -name {pattern}', shell=True).decode().split()[:-1] if not files: return 0 out = subprocess.check_output(f"wc -l {' '.join(files)} | tail -1", shell=True) return int(out.split()[0]) def polygon(x, y1, y2, *args, **kwargs): x = pl.concatenate((x, x[::-1])) y = pl.concatenate((y1, y2[::-1])) pl.fill(x, y, *args, **kwargs) def plot_count(dpi=70): year, month, f, c, py, test, doc, rst = np.loadtxt('lines.data').T date = year + (month - 1) / 12 fig = pl.figure(1, figsize=(10, 5), dpi=dpi) fig.add_subplot(111) polygon(date, c + py + test + doc, c + py + test + doc + rst, facecolor='m', label='Documentation (.rst)') polygon(date, c + py + test, c + py + test + doc, facecolor='c', label='Documentation (.py)') polygon(date, c + py, c + py + test, facecolor='y', label='Tests (.py)') polygon(date, c, c + py, facecolor='g', label='Python-code (.py) ') polygon(date, f, c, facecolor='r', label='C-code (.c, .h)') polygon(date, f, f, facecolor='b', label='Fortran-code') pl.axis('tight') pl.legend(loc='upper left') pl.title('Number of lines') pl.savefig('lines.png', dpi=dpi) def count_lines(): now = dt.date.today() stop = now.year, now.month year = 2005 month = 11 with open('lines.data', 'w') as fd: while (year, month) <= stop: hash = subprocess.check_output( 'git rev-list -n 1 --before="{}-{}-01 12:00" master' .format(year, month), shell=True).strip() print(year, month, hash) subprocess.call(['git', 'checkout', hash]) c = count('c', r'\*.[ch]') py = count('.', r'\*.py') test = count('gpaw/test', r'\*.py') test += count('test', r'\*.py') doc = count('doc', r'\*.py') py -= test + doc # avoid double counting rst = count('.', r'\*.rst') print(year, month, 0, c, py, test, doc, rst, file=fd) month += 1 if month == 13: month = 1 year += 1 plot_count() gpaw-24.1.0/doc/devel/codingstandard.rst000066400000000000000000000006551454550013000201210ustar00rootroot00000000000000.. _codingstandard: ================== Coding Conventions ================== Python Coding Conventions ========================= Follow ASE's :ref:`ase:coding conventions`. C-code ====== Code C in the C99 style:: for (int i = 0; i < 3; i++) { double f = 0.5; a[i] = 0.0; b[i + 1] = f * i; } and try to follow PEP7_. Use **M-x c++-mode** in emacs. .. _PEP7: http://www.python.org/dev/peps/pep-0007 gpaw-24.1.0/doc/devel/communicators.rst000066400000000000000000000004121454550013000200070ustar00rootroot00000000000000.. _communicators: MPI communicators ================= .. autoclass:: gpaw.mpi._Communicator :members: .. autofunction:: gpaw.mpi.send .. autofunction:: gpaw.mpi.receive .. autofunction:: gpaw.mpi.broadcast_array .. autofunction:: gpaw.mpi.broadcast_exception gpaw-24.1.0/doc/devel/debugging.rst000066400000000000000000000231311454550013000170620ustar00rootroot00000000000000.. _debugging: ========= Debugging ========= Python debugging ================ Even though some debugging can done just with the print function, a real debugger offers several advantages. It is possible, for example, to set breakpoints in certain files or functions, execute the code step by step, examine and change values of variables. Python contains a standard debugger *pdb*. A script can be started under the debugger control as *python3 -m pdb script.py*. Now before the execution of the script starts one enters the debugger prompt. The most important debugger commands are: h(elp) [command] b(reak) [[filename:]lineno|function[, condition]] Set a breakpoint. s(tep) Execute the current line, stop at the first possible occasion (either in a function that is called or on the next line in the current function). n(ext) Continue execution until the next line in the current function is reached or it returns. r(eturn) Continue execution until the current function returns. c(ont(inue)) Continue execution, only stop when a breakpoint is encountered. l(ist) [first[, last]] List source code for the current file. p expression Evaluate the expression in the current context and print its value. Note: "print" can also be used, but is not a debugger command -- this executes the Python print statement Most commands can be invoked with only the first letter. A full list of all the commands and their explanation can be found in the `Python debugger (PDB) documentation `_. An example session might look like:: corona1 ~/gpaw/trunk/test> python3 -m pdb H.py > /home/csc/jenkovaa/gpaw/trunk/test/H.py(1)?() -> from gpaw import GPAW (Pdb) l 11,5 11 hydrogen.SetCalculator(calc) 12 e1 = hydrogen.GetPotentialEnergy() 13 14 calc.Set(kpts=(1, 1, 1)) 15 e2 = hydrogen.GetPotentialEnergy() 16 equal(e1, e2) (Pdb) break 12 Breakpoint 1 at /home/csc/jenkovaa/gpaw/trunk/test/H.py:12 (Pdb) c ... output from the script... > /home/csc/jenkovaa/gpaw/trunk/test/H.py(12)?() -> e1 = hydrogen.GetPotentialEnergy() (Pdb) s --Call-- > /v/solaris9/appl/chem/CamposASE/ASE/ListOfAtoms.py(224)GetPotentialEnergy() -> def GetPotentialEnergy(self): (Pdb) p self [Atom('H', (2.0, 2.0, 2.0))] Emacs has a special mode for Python debugging which can be invoked as *M-x pdb*. After that one has to give the command to start the debugger (e.g. python3 -m pdb script.py). Emacs opens two windows, one for the debugger command prompt and one which shows the source code and the current point of execution. Breakpoints can be set also on the source-code window. C debugging =========== First, the C-extension should be compiled with the *-g* flag in order to get the debug information into the library. Also, the optimizations should be switched off which could be done in :ref:`siteconfig.py ` as:: extra_link_args += ['-g'] extra_compile_args += ['-O0', '-g'] There are several debuggers available, the following example session applies to *gdb*:: sepeli ~/gpaw/trunk/test> gdb python GNU gdb Red Hat Linux (6.1post-1.20040607.52rh) (gdb) break Operator_apply Function "Operator_apply" not defined. Make breakpoint pending on future shared library load? (y or [n]) y Breakpoint 1 (Operator_apply) pending. (gdb) run H.py Starting program: /usr/bin/python2.4 H.py ... output ... Breakpoint 2, Operator_apply (self=0x2a98f8f670, args=0x2a9af73b78) at c/operators.c:83 (gdb) One can also do combined C and Python debugging by starting the input script as ``run -m pdb H.py`` i.e:: sepeli ~/gpaw/trunk/test> gdb python GNU gdb Red Hat Linux (6.1post-1.20040607.52rh) (gdb) break Operator_apply Function "Operator_apply" not defined. Make breakpoint pending on future shared library load? (y or [n]) y Breakpoint 1 (Operator_apply) pending. (gdb) run -m pdb H.py Starting program: /usr/bin/python2.4 -m pdb H.py [Thread debugging using libthread_db enabled] [New Thread -1208371520 (LWP 1575)] > /home/jenkovaa/test/H.py(1)?() -> from gpaw import GPAW (Pdb) The basic gdb commands are the same as in pdb (or vice versa). Full documentation can be found in the `GDB user manual `_. Apart from the commands mentioned earlier, a few are worthy of mention here: backtrace [n | full] Print a backtrace of the entire stack: one line per frame for all frames in the stack ``full`` prints the values of the local variables also. ``n`` specifies the number of frames to print jump linespec Resume execution at line ``linespec`` i.e. at the given location in the corresponding source code. Any location of the type ``filename:linenum`` will do, but the results may be bizarre if ``linespec`` is in a different function from the one currently executing. tbreak [[filename:]lineno|function[, condition]] Set a breakpoint similar to how ``break`` operates, but this type of breakpoint is automatically deleted after the first time your program stops there. p(rint) expr Inquire about the symbols (names of variables, functions and types) defined in a compiled program. ``expr`` may include calls to functions in the program being debugged. Can also be used to evaluate more complicated expressions or referring to static variables in other source files as ``'foo.c'::x``. .. hint:: Emacs can be used also with gdb. Start with *M-x gdb* and then continue as when starting from the command line. .. _memory_leaks: Tracking memory leaks --------------------- Although a C-extensions runs fine, or so it seems, reference counting of Python objects and matching calls to ``malloc`` and ``free`` may not always be up to par. Frequently, the symptom of such disproportions is all too clear, resulting in segmentation faults (i.e. ``SIGSEGV``) e.g. when a memory address is accessed before it has been allocated or after is has been deallocated. Such situations can be debugged using *gdb* as described above. .. note:: Please refer to the Python/C API Reference Manual or the unofficial (but helpful) introduction to `reference counting in Python `_. On the other hand, neglecting the deallocation or forgetting to decrease the reference count of a Python object will lead to a build-up of unreachable memory blocks - a process known as memory leakage. Despite being non-critical bugs, severe memory leaks in C-code will eventually bring all computations to a halt when the program runs out of available memory. Suppose you have written a Python script called ``test.py`` which appears to suffer from memory leaks. Having build GPAW with the *-g* flag as described, tracking down the source of the memory leak (in this case line 123 of ``myfile.c``) can be done using Valgrind_ as follows:: sepeli ~/gpaw/trunk/test> valgrind --tool=memcheck --leak-check=yes \ --show-reachable=yes --num-callers=20 --track-fds=yes gpaw-python test.py ==16442== 6,587,460 bytes in 29,943 blocks are definitely lost in loss record 85 of 85 ==16442== at 0x40053C0: malloc (vg_replace_malloc.c:149) ==16442== by 0x5322831: ??? ==16442== by 0x8087BD5: my_leaky_function (myfile.c:123) Note that Valgrind_ is more than just a memory profiler for C; it provides an entire instrumentation framework for building dynamic analysis tools and thus includes other debugging tools, e.g. a heap/stack/global array overrun detector. .. _Valgrind: http://valgrind.org .. _parallel_debugging: Parallel debugging ================== Debugging programs that are run in parallel with MPI is not as straight forward as in serial, but many of the same tools can be used (e.g. GDB and Valgrind). Note that one cannot use the Python debugger as described above because GPAW requires that a custom Python interpreter is built with the necessary MPI bindings. There are probably numerous ways to debug an MPI application with GDB, and experimentation is strongly encouraged, but the following method is recommended for interactive debugging. This approach builds upon advice in Open MPI's FAQ `Debugging applications in parallel `_, but is adapted for use with Python on a GNU/Linux development platform. Prepend the following to your script:: import os, sys, time, math from gpaw.mpi import world gpaw_python_path = '/your/path/to/gpaw-python' ndigits = 1 + int(math.log10(world.size)) assert os.system('screen -S gdb.%0*d -dm gdb %s %d' \ % (ndigits, world.rank, gpaw_python_path, os.getpid())) == 0 time.sleep(ndigits) world.barrier() This runs ``gdb /path/to/gpaw-python pid`` from within each instance of the custom Python interpreter and detaches it into a `screen `_ session called ``gdb.0`` for rank 0 etc. You may now resume control of the debugger instances by running ``screen -rd gdb.0``, entering `c` to continue and so forth for all instances. .. hint:: Run ``screen -ls`` to get an overview of running sessions. Enable logging of an attached session with Ctrl+a H (capital H). Use Ctrl+a Ctrl+d to detach a session but leave it running. .. note:: This approach only works if the problem you're trying to address occurs *after* the GPAW executable has been loaded. In the alternate case, it is recommended to debug a single instance of the parallel program with the usual serial methods first. For details on using Valgrind on parallel programs, please refer to the online manual `Debugging MPI Parallel Programs with Valgrind `_ gpaw-24.1.0/doc/devel/density_and_hamiltonian.rst000066400000000000000000000002541454550013000220140ustar00rootroot00000000000000Density and hamiltonian objects =============================== .. autoclass:: gpaw.density.Density :members: .. autoclass:: gpaw.hamiltonian.Hamiltonian :members: gpaw-24.1.0/doc/devel/devel.rst000066400000000000000000000043661454550013000162370ustar00rootroot00000000000000.. _devel: =========== Development =========== To get started, take a look a our :ref:`development workflow`. GPAW development can be done by anyone! Just take a look at the `issue tracker`_ and find something that suits your talents. The primary source of information is still the :ref:`basic` and :ref:`documentation`, but as a developer you might need additional information which can be found here. For example the :ref:`code_overview`. As a developer, you should subscribe to the GPAW :ref:`mail list`. We would also like to encourage you to join our ``#gpaw`` channel for :ref:`chat`. .. _issue tracker: https://gitlab.com/gpaw/gpaw/issues/ .. toctree:: :maxdepth: 2 workflow .. note --- below toctrees are defined in separate files to make sure that the line spacing doesn't get very large (which is of course a bad hack) Development topics ================== When committing significant changes to the code, remember to add a note in the :ref:`releasenotes` at the top (development version) - the version to become the next release. .. toctree:: :maxdepth: 1 testing codingstandard c_extension writing_documentation formulas debugging turn_off_things profiling ase_optimize/ase_optimize bugs newrelease technology * Details about supported :ref:`platforms and architectures`. .. _PyLint: http://www.logilab.org/857 .. _the_big_picture: .. _code_overview: Code Overview ============= The developer guide provides an overview of the PAW quantities and how the corresponding objects are defined in the code: .. toctree:: :maxdepth: 2 overview developersguide proposals/proposals paw symmetry wavefunctions setups density_and_hamiltonian communicators others The GPAW logo ============= The GPAW-logo is available as an SVG-file: :download:`gpaw-logo.svg`. .. image:: gpaw-logo.svg Statistics ========== The image below shows the development in the volume of the code as per April 5 2016. .. image:: lines.png *Documentation* refers solely the contents of this homepage. Inline documentation is included in the other line counts. Contributing to GPAW ==================== Getting commit access to the GPAW code works the same way as for the :ref:`ASE project `. gpaw-24.1.0/doc/devel/developersguide.rst000066400000000000000000000255501454550013000203240ustar00rootroot00000000000000.. _developersguide: ======================== Developers guide to GPAW ======================== XXX Update page to new GPAW style (after guc merge) and mention NewLFCs. This page goes through the most important equations of a PAW calculation and has references to the code. It is a good idea to have :ref:`the big picture ` in front of you when reading this page. * Initial wave functions and densities (todo) * Finding the ground state (todo) * ... Wave functions ============== The central quantities in a PAW calculation are the pseudo wave-functions, `\tilde{\psi}_{\sigma\mathbf{k}n}(\mathbf{r})`, from which the all-electron wave functions can be obtained: .. math:: \psi_{\sigma\mathbf{k}n}(\mathbf{r}) = \tilde{\psi}_{\sigma\mathbf{k}n}(\mathbf{r}) + \sum_a \sum_i [\phi_i^a(\mathbf{r} - \mathbf{R}^a) - \tilde{\phi}_i^a(\mathbf{r} - \mathbf{R}^a)] \langle\tilde{p}_i^a | \tilde{\psi}_{\sigma\mathbf{k}n} \rangle, where .. math:: \langle\tilde{p}_i^a | \tilde{\psi}_{\sigma\mathbf{k}n} \rangle = \int d\mathbf{r} \tilde{p}_i^a(\mathbf{r} - \mathbf{R}^a) \tilde{\psi}_{\sigma\mathbf{k}n}(\mathbf{r}). Here, `a` is the atom number, `\mathbf{R}^a` is the position of atom number `a` and `\tilde{p}_i^a`, `\tilde{\phi}_i^a` and `\phi_i^a` are the projector functions, pseudo partial waves, and all-electron partial waves respectively, of the atoms. See :ref:`overview_array_naming` for more information on the naming of arrays. Note that ``spos_c`` gives the position of the atom in scaled coordinates in the range [0:1[ (relative to the unit cell). Note, that in the code, ``i`` refers to `n`, `\ell` and `m` quantum numbers, and ``j`` refers to `n` and `\ell` only (see :ref:`overview_array_naming`). So, to put an atom-centered function like `\tilde{p}_{n\ell m}^a(\mathbf{r})` on the 3D grid, you need both the radial part `\tilde{p}_{n\ell}^a(r)` (one of the splines in ``paw.wfs.setups[a].pt_j``) and a spherical harmonics `Y_{\ell m}(\theta,\phi)`. Putting radial functions times spherical harmonics on a grid is done by the :class:`gpaw.lfc.LocalizedFunctionsCollection` class. See also :class:`gpaw.setup.Setup` and :class:`gpaw.spline.Spline`. .. _orthogonality: The wave-functions are othonormalized such that the pseudo wave-functions obey the following orthogonality requirements: .. math:: \langle \psi_{\sigma\mathbf{k}n} | \psi_{\sigma\mathbf{k}m} \rangle = \langle \tilde{\psi}_{\sigma\mathbf{k}n} | \hat{O} | \tilde{\psi}_{\sigma\mathbf{k}m} \rangle = \delta_{nm}, , where `\hat{O}` is the overlap operator in the PAW formalism. Refer to :ref:`Orthogonalizing the wave functions ` for details. .. _overlaps: Overlaps ========= The overlap operator is defined in terms of the PAW overlap corrections: .. math:: \hat{O} = 1 + \sum_a \sum_{i_1 i_2} |\tilde{p}_{i_1}^a\rangle \Delta O_{i_1 i_2}^a \langle\tilde{p}_{i_2}^a|. The constants `\Delta O_{i_1 i_2}^a` are found in ``paw.wfs.setups[a].dO_ii`` (``ndarray``). XXX Someone should rename ``dO_ii`` to ``dS_ii`` or `\hat{S}` to `\hat{O}`. .. math:: \Delta O_{i_1 i_2}^a = \int d\mathbf{r} [\phi_{i_1}^a(\mathbf{r})\phi_{i_2}^a(\mathbf{r}) - \tilde{\phi}_{i_1}^a(\mathbf{r})\tilde{\phi}_{i_2}^a(\mathbf{r})]. An approximate inverse overlap operator is similarly defined by: .. math:: \hat{O}^{\;-1}_\mathrm{approx.} = 1 + \sum_a \sum_{i_1 i_2} |\tilde{p}_{i_1}^a\rangle \Delta C_{i_1 i_2}^a \langle\tilde{p}_{i_2}^a|. The inverse overlap coefficients `\Delta C_{i_1 i_2}^a` are found in ``setup.dC_ii`` (``ndarray``) and are solutions to the system of linear equations: .. math:: \Delta C_{i_1 i_2}^a + \Delta O_{i_1 i_2}^a + \sum_{i_3 i_4} \Delta C_{i_1 i_3}^a B_{i_3 i_4}^a \Delta O_{i_4 i_2}^a = 0 \qquad ,\forall i_1,i_2 , such that `\hat{O}^{\;-1}_\mathrm{approx.}\hat{O} = \hat{I}` provided `\langle\tilde{p}_{i_1}^a|\tilde{p}_{i_2}^{a'}\rangle = \delta_{a a'} \langle\tilde{p}_{i_1}^a|\tilde{p}_{i_2}^{a}\rangle`. These projector overlaps `B_{i_1 i_2}^a = \langle\tilde{p}_{i_1}^a|\tilde{p}_{i_2}^{a}\rangle` are likewise found in ``setup.B_ii``. .. _density: Densities ========= From the pseudo wave-functions, the pseudo electron spin-densities can be constructed (see :xkcd:`here <849>`): .. math:: \tilde{n}_\sigma(\mathbf{r}) = \frac{1}{N_s} \sum_{s=1}^{N_s} \hat{S}_s \left [ \sum_{n\mathbf{k}} f_{n\mathbf{k}\sigma} |\tilde{\psi}_{n\mathbf{k}\sigma}(\mathbf{r})|^2 + \frac{1}{2} \sum_a \tilde{n}_c^a(|\mathbf{r}-\mathbf{R}^a|) \right ]. Here, `\hat{S}_s` is one of the `N_s` symmetry operators of the system (see :class:`gpaw.symmetry.Symmetry`), `f_{n\mathbf{k}\sigma}` are the occupation numbers (adding up to the number of valence elctrons), and `\tilde{n}_c^a(r)` is the pseudo core density for atom number `a`. The all-electron spin-densities are given as: .. math:: n_\sigma(\mathbf{r}) = \tilde{n}_\sigma(\mathbf{r}) + \sum_a [n_\sigma^a(\mathbf{r} - \mathbf{R}^a) - \tilde{n}_\sigma^a(\mathbf{r} - \mathbf{R}^a)], where .. math:: n_\sigma^a(\mathbf{r}) = \sum_{i_1 i_2} D_{\sigma i_1 i_2}^a \phi_{i_1}^a(\mathbf{r})\phi_{i_2}^a(\mathbf{r}) + \frac{1}{2} n_c^a(r), .. math:: \tilde{n}_\sigma^a(\mathbf{r}) = \sum_{i_1 i_2} D_{\sigma i_1 i_2}^a \tilde{\phi}_{i_1}^a(\mathbf{r})\tilde{\phi}_{i_2}^a(\mathbf{r}) + \frac{1}{2} \tilde{n}_c^a(r), are atom centered expansions, and .. math:: D_{\sigma i_1 i_2}^a = \sum_{n\mathbf{k}} \langle \tilde{\psi}_{\sigma\mathbf{k}n} | \tilde{p}_{i_1}^a \rangle f_{n\mathbf{k}\sigma} \langle \tilde{p}_{i_2}^a | \tilde{\psi}_{\sigma\mathbf{k}n} \rangle is an atomic spin-density matrix, which must be symmetrized the same way as the pseudo electron spin-densities. .. list-table:: * - formula - object - type * - `\hat{S}_s` - ``paw.wfs.symmetry`` - :class:`gpaw.symmetry.Symmetry` * - `\tilde{n}_\sigma` - ``paw.density.nt_sG`` and ``paw.density.nt_sg`` - ``ndarray`` * - `\tilde{n}=\sum_\sigma\tilde{n}_\sigma` - ``paw.density.nt_g`` - ``ndarray`` * - `\tilde{n}_c^a(r)` - ``paw.wfs.setups[a].nct`` - :class:`gpaw.spline.Spline` * - `\tilde{n}_c^a(\mathbf{r}-\mathbf{R}^a)` - ``paw.density.nct`` - :class:`gpaw.lfc.LocalizedFunctionsCollection` * - `f_{\sigma\mathbf{k}n}` - ``paw.wfs.kpt_u[u].f_n`` - ``ndarray`` * - `D_{\sigma i_1 i_2}^a` - ``paw.density.D_asp[a]`` - ``ndarray`` From the all-electron and pseudo electron densities we can now construct corresponding total all-electron and pseudo charge densities: .. math:: \rho(\mathbf{r}) = \sum_\sigma n_\sigma(\mathbf{r}) + \sum_a Z^a(\mathbf{r} - \mathbf{R}^a), .. math:: \tilde{\rho}(\mathbf{r}) = \sum_\sigma \tilde{n}_\sigma(\mathbf{r}) + \sum_a \tilde{Z}^a(\mathbf{r} - \mathbf{R}^a). If `\mathbb{Z}^a` is the atomic number of atom number `a`, then `Z^a(\mathbf{r})=-\mathbb{Z}^a\delta(\mathbf{r})` (we count the electrons as positive charge and the protons as negative charge). The compensation charges are given as: .. math:: \tilde{Z}^a(\mathbf{r}) = \sum_{\ell=0}^{\ell_{\text{max}}} \sum_{m=-\ell}^\ell Q_{\ell m}^a \hat{g}_{\ell m}^a(\mathbf{r}) = \sum_{\ell=0}^{\ell_{\text{max}}} \sum_{m=-\ell}^\ell Q_{\ell m}^a \hat{g}_\ell^a(r) Y_{\ell m}(\theta,\phi), where `\hat{g}_\ell^a(r)\propto r^\ell\exp(-\alpha^a r^2)` are Gaussians. The compensation charges should make sure that the two atom centered densities `\rho^a=\sum_\sigma n_\sigma^a + Z^a` and `\tilde{\rho}^a=\sum_\sigma \tilde{n}_\sigma^a + \tilde{Z}^a` have identical multipole expansions outside the augmentation sphere. This gives the following equation for `Q_L^a`: .. math:: Q_L^a = \sum_{i_1 i_2} \Delta_{i_1 i_2 L}^a \sum_\sigma D_{\sigma i_1 i_2}^a + \Delta_0^a \delta_{\ell,0}, where .. math:: \Delta_{i_1 i_2 L}^a = \int d\mathbf{r} Y_L(\hat{\mathbf{r}}) r^\ell [\phi_{i_1}^a(\mathbf{r})\phi_{i_2}^a(\mathbf{r}) - \tilde{\phi}_{i_1}^a(\mathbf{r})\tilde{\phi}_{i_2}^a(\mathbf{r})], .. math:: \Delta_0^a = \int d\mathbf{r} Y_{00}(\hat{\mathbf{r}}) [-\mathbb{Z}^a \delta(\mathbf{r}) + n_c^a(\mathbf{r}) - \tilde{n}_c^a(\mathbf{r})]. .. list-table:: * - formula - object - type * - `\tilde{\rho}` - ``paw.density.rhot_g`` - ``ndarray`` * - `\mathbb{Z}^a` - ``setup.Z`` - ``int`` * - `\Delta_{i_1 i_2 L}^a` - ``setup.Delta_pL`` - ``ndarray`` * - `\Delta_0^a` - ``setup.Delta0`` - ``float`` * - `\hat{g}_\ell^a(r)` - ``setup.ghat_l`` - List of :class:`gpaw.spline.Spline`\ s * - `\hat{g}_L^a(\mathbf{r}-\mathbf{R}^a)` - ``paw.density.ghat`` - :class:`gpaw.lfc.LocalizedFunctionsCollection` * - `Q_L^a` - ``paw.density.Q_aL[a]`` - ``ndarray`` .. _developersguide_total_energy: The total energy ================ The total PAW energy is composed of a smooth part evaluated using pseudo quantities on the 3D grid, plus corrections for each atom evaluated on radial grids inside the augmentation spheres: `E=\tilde{E}+\sum_a(E^a - \tilde{E}^a)`. .. math:: \tilde{E} &= -\frac{1}{2} \sum_{\sigma\mathbf{k}n} f_{\sigma\mathbf{k}n} \int d\mathbf{r} \tilde{\psi}_{\sigma\mathbf{k}n}(\mathbf{r}) \nabla^2 \tilde{\psi}_{\sigma\mathbf{k}n}(\mathbf{r}) + \frac{1}{2}\int d\mathbf{r}d\mathbf{r}' \frac{\tilde{\rho}(\mathbf{r})\tilde{\rho}(\mathbf{r}')} {|\mathbf{r}-\mathbf{r}'|} \\ &\quad+ \sum_\sigma\sum_a\int d\mathbf{r}\tilde{n}_\sigma(\mathbf{r}) \bar{v}^a(|\mathbf{r}-\mathbf{R}^a|) + E_{\text{xc}}[\tilde{n}_\uparrow, \tilde{n}_\downarrow] % %.. math:: % \\ E^a &= -\frac{1}{2} 2\sum_i^{\text{core}} \int d\mathbf{r} \phi_i^a(\mathbf{r}) \nabla^2 \phi_i^a(\mathbf{r}) -\frac{1}{2} \sum_\sigma \sum_{i_1 i_2} D_{\sigma i_1 i_2}^a \int d\mathbf{r} \phi_{i_1}^a(\mathbf{r}) \nabla^2 \phi_{i_2}^a(\mathbf{r}) \\ &\quad+ \frac{1}{2}\int d\mathbf{r}d\mathbf{r}' \frac{\rho^a(\mathbf{r})\rho^a(\mathbf{r}')} {|\mathbf{r}-\mathbf{r}'|} + E_{\text{xc}}[n^a_\uparrow, n^a_\downarrow] % %.. math:: % \\ \tilde{E}^a &= -\frac{1}{2} \sum_\sigma\sum_{i_1 i_2} D_{\sigma i_1 i_2}^a \int d\mathbf{r} \tilde{\phi}_{i_1}^a(\mathbf{r}) \nabla^2 \tilde{\phi}_{i_2}^a(\mathbf{r}) + \frac{1}{2}\int d\mathbf{r}d\mathbf{r}' \frac{\tilde{\rho}^a(\mathbf{r})\tilde{\rho}^a(\mathbf{r}')} {|\mathbf{r}-\mathbf{r}'|} \\ &\quad+ \sum_\sigma \int d\mathbf{r}\tilde{n}^a_\sigma(\mathbf{r}) \bar{v}^a(r) + E_{\text{xc}}[\tilde{n}^a_\uparrow, \tilde{n}^a_\downarrow] In the last two equations, the integrations are limited to inside the augmentation spheres only. The electrostatic energy part of `\tilde{E}` is calculated as `\frac{1}{2}\int d\mathbf{r}\tilde{v}_H(\mathbf{r})\tilde{\rho}(\mathbf{r})`, where the Hartree potential is found by solving Poissons equation: `\nabla^2 \tilde{v}_H(\mathbf{r})=-4\pi\tilde{\rho}(\mathbf{r})` (see :class:`gpaw.poisson.FDPoissonSolver`). gpaw-24.1.0/doc/devel/formulas.rst000066400000000000000000000072611454550013000167650ustar00rootroot00000000000000======== Formulas ======== See also :xkcd:`Useful geometry formulas <2509>` and :xkcd:`What greek letters mean in equations <2586>`. Coulomb ======= .. math:: \frac{1}{|\br-\br'|} = \sum_\ell \sum_{m=-\ell}^\ell \frac{4\pi}{2\ell+1} \frac{r_<^\ell}{r_>^{\ell+1}} Y_{\ell m}^*(\hat\br) Y_{\ell m}(\hat\br') or .. math:: \frac{1}{r} = \int \frac{d\mathbf{G}}{(2\pi)^3}\frac{4\pi}{G^2} e^{i\mathbf{G}\cdot\br}. Fourier transforms ================== The Fourier transform of a radial function multiplied by a spherical harmonic is: .. math:: f(G)Y_{\ell m}(\hat G) = \int d\br e^{i\mathbf{G}\cdot\br} f(r)Y_{\ell m}(\br), where .. math:: f(G) = 4\pi i^\ell \int_0^\infty r^2 dr j_\ell(Gr) f(r). .. note:: .. math:: e^{i \mathbf{G} \cdot \br} = 4 \pi \sum_{\ell m} i^\ell j_\ell(Gr) Y_{\ell m}(\hat{\br}) Y_{lm}(\hat{\mathbf{G}}). The `spherical Bessel function`_ is defined as: .. math:: j_\ell(x) = \text{Re}\{ \frac{e^{ix}}{x} \sum_{n=0}^\ell \frac{(-i)^{\ell+1-n}}{n!(2x)^n} \frac{(\ell+n)!}{(\ell-n)!} \}. This is implemented in this function: .. autofunction:: gpaw.atom.radialgd.fsbt .. _spherical Bessel function: http://en.wikipedia.org/wiki/Bessel_function #Spherical_Bessel_functions:_jn.2C_yn Gaussians ========= .. math:: n(r) = (\alpha/\pi)^{3/2} e^{-\alpha r^2}, .. math:: \int_0^\infty 4\pi r^2 dr n(r) = 1 Its Fourier transform is: .. math:: n(k) = \int d\br e^{i\mathbf{k}\cdot\br} n(r) = \int_0^\infty 4\pi r^2 dr \frac{\sin(kr)}{kr} n(r) = e^{-k^2/(4a)}. With `\nabla^2 v=-4\pi n`, we get the potential: .. math:: v(r) = \frac{\text{erf}(\sqrt\alpha r)}{r}, and the energy: .. math:: \frac12 \int_0^\infty 4\pi r^2 dr n(r) v(r) = \sqrt{\frac{\alpha}{2\pi}}. Note: `\text{erf}(x) \simeq x\sqrt{4/\pi}` for small `x`. Shape functions --------------- GPAW uses Gaussians as shape functions for the PAW compensation charges: .. math:: g_{\ell m}(\br) = \frac{\alpha^{\ell + 3 / 2} \ell ! 2^{2\ell + 2}} {\sqrt{\pi} (2\ell + 1) !} e^{-\alpha r^2} Y_{\ell m}(\hat{\br}). They are normalized as: .. math:: \int d \br g_{\ell m}(\br) Y_{\ell m}(\hat{\br}) r^\ell = 1. Hydrogen ======== The 1s orbital: .. math:: \psi_{\text{1s}}(r) = 2Y_{00} e^{-r}, and the density is: .. math:: n(r) = |\psi_{\text{1s}}(r)|^2 = e^{-2r}/\pi. Radial Schrödinger equation =========================== With `\psi_{n\ell m}(\br) = u(r) / r Y_{\ell m}(\hat\br)`, we have the radial Schrödinger equation: .. math:: -\frac12 \frac{d^2u}{dr^2} + \frac{\ell(\ell + 1)}{2r^2} u + v u = \epsilon u. We want to solve this equation on a non-equidistant radial grid with `r_g=r(g)` for `g=0,1,...`. Inserting `u(r) = a(g) r^{\ell+1}`, we get: .. math:: \frac{d^2 a}{dg^2} (\frac{dg}{dr})^2 r^2 + \frac{da}{dg}(r^2 \frac{d^2g}{dr^2} + 2 (\ell+1) r \frac{dg}{dr}) - 2 r^2 (v - \epsilon) a = 0. Including Scalar-relativistic corrections ----------------------------------------- The scalar-relativistic equation is: .. math:: -\frac{1}{2 M} \frac{d^2u}{dr^2} + \frac{\ell(\ell + 1)}{2Mr^2} u - \frac{1}{(2Mc)^2}\frac{dv}{dr}(\frac{du}{dr}-\frac{u}{r}) + v u = \epsilon u. where the relativistic mass is: .. math:: M = 1 - \frac{1}{2c^2} (v - \epsilon). With `u(r) = a(g) r^\alpha`, `\kappa = (dv/dr)/(2Mc^2)` and .. math:: \alpha = \sqrt{\ell^2 + \ell + 1 -(Z/c)^2}, we get: .. math:: \frac{d^2 a}{dg^2} (\frac{dg}{dr})^2 r^2 + \frac{da}{dg}(r^2 \kappa \frac{dg}{dr} + r^2 \frac{d^2g}{dr^2} + 2 \alpha r \frac{dg}{dr}) + [2 M r^2 (\epsilon - v) + \alpha (\alpha - 1) - \ell (\ell + 1) + \kappa (\alpha - 1) r] a = 0. gpaw-24.1.0/doc/devel/gpaw-logo.svg000066400000000000000000000322551454550013000170210ustar00rootroot00000000000000 image/svg+xml gpaw-24.1.0/doc/devel/lines.data000066400000000000000000000122331454550013000163430ustar00rootroot000000000000002005 11 0 4112 12144 1067 0 0 2005 12 0 4285 12817 1162 0 0 2006 1 0 4375 14474 1187 0 0 2006 2 0 5165 13659 1327 0 0 2006 3 0 5176 14084 1443 0 0 2006 4 0 5255 14581 1446 0 0 2006 5 0 5050 15628 1470 0 0 2006 6 0 5056 15810 1462 0 0 2006 7 0 5518 16068 1464 0 0 2006 8 0 5553 15649 1500 0 0 2006 9 0 5557 16212 1501 0 0 2006 10 0 5635 16514 1537 0 0 2006 11 0 5713 16279 1583 0 0 2006 12 0 6419 16371 1614 0 0 2007 1 0 6630 17646 1777 0 0 2007 2 0 6916 18576 1884 0 0 2007 3 0 6916 18941 1884 0 0 2007 4 0 6917 19017 2017 0 0 2007 5 0 6944 20981 2086 0 0 2007 6 0 6973 25241 2255 0 0 2007 7 0 7096 27413 2358 0 0 2007 8 0 11947 28371 2556 0 0 2007 9 0 12031 29365 2712 0 0 2007 10 0 12186 31796 2919 0 0 2007 11 0 12248 33088 3171 0 0 2007 12 0 14561 35874 3581 0 0 2008 1 0 14611 36558 3875 0 0 2008 2 0 14762 36424 4340 0 0 2008 3 0 15673 38394 4464 0 0 2008 4 0 15759 38508 4478 0 0 2008 5 0 15817 39397 4763 0 0 2008 6 0 17530 41853 4807 0 0 2008 7 0 17685 44125 5017 273 416 2008 8 0 17859 45332 5093 1377 7591 2008 9 0 17874 44876 5178 2630 7671 2008 10 0 18085 45641 5180 2636 7791 2008 11 0 19452 46470 5375 2669 8393 2008 12 0 19510 47082 5741 2671 8628 2009 1 0 19551 48552 5949 3166 9142 2009 2 0 19570 46208 6062 3166 9204 2009 3 0 20465 47744 7401 3912 9685 2009 4 0 21404 50424 8274 4122 10078 2009 5 0 21542 52380 8963 4126 10088 2009 6 0 21524 53744 9502 4245 10715 2009 7 0 22602 58219 10935 4322 11291 2009 8 0 23222 65960 12504 4525 11668 2009 9 0 23514 67590 13525 4650 11720 2009 10 0 23553 65054 14287 4846 11828 2009 11 0 24355 66768 13531 5323 11937 2009 12 0 24884 70942 14120 5400 12228 2010 1 0 25855 71482 14584 5430 12357 2010 2 0 25708 74149 14092 5673 12879 2010 3 0 26464 75996 14936 5956 13348 2010 4 0 27336 77515 15123 6593 13452 2010 5 0 27372 80843 14878 6759 14035 2010 6 0 27378 83352 14802 7077 14240 2010 7 0 27459 79436 14999 8264 15221 2010 8 0 27545 81829 15184 8430 15623 2010 9 0 26392 82715 16044 8432 16089 2010 10 0 26395 83125 16057 8479 16155 2010 11 0 26033 82637 16130 8474 16376 2010 12 0 25221 82622 16940 8627 16684 2011 1 0 25326 82599 17221 8773 16786 2011 2 0 25340 78040 18593 8919 17664 2011 3 0 25356 78438 18781 9026 17965 2011 4 0 25357 79170 18931 9031 17999 2011 5 0 25935 79237 18799 9026 17977 2011 6 0 24564 80568 19180 9764 19098 2011 7 0 24613 81593 19376 9809 19157 2011 8 0 24613 82307 19528 9856 19587 2011 9 0 24613 82615 19631 9904 18880 2011 10 0 25230 83710 20014 9905 18987 2011 11 0 25218 84349 20094 10105 19185 2011 12 0 25515 85457 20311 10149 19182 2012 1 0 25515 86474 21163 10183 19218 2012 2 0 25898 86853 21497 10339 19277 2012 3 0 25397 87888 21996 10339 19370 2012 4 0 25539 89985 22815 10325 20019 2012 5 0 25539 90369 23401 10325 20032 2012 6 0 25786 91655 24370 10342 20072 2012 7 0 25787 92133 24372 10440 20403 2012 8 0 25793 92297 24379 10440 20541 2012 9 0 25793 92387 24379 10592 20804 2012 10 0 25793 92512 26236 10623 20907 2012 11 0 25793 92856 26247 10899 21022 2012 12 0 25793 93306 26805 10899 21064 2013 1 0 25826 93430 26861 11679 21131 2013 2 0 26473 94264 27084 11743 21351 2013 3 0 26473 94210 27079 11780 21442 2013 4 0 26477 94345 27254 11813 21484 2013 5 0 26728 94510 27344 11813 21701 2013 6 0 26733 95206 27461 12142 21898 2013 7 0 17139 98163 27536 12262 21712 2013 8 0 17139 98162 27555 12346 21841 2013 9 0 17194 98198 27556 12192 21841 2013 10 0 17241 98925 27853 12208 21842 2013 11 0 17237 98903 27862 12201 21853 2013 12 0 17238 99232 28079 12218 21962 2014 1 0 17250 101156 28360 12234 21940 2014 2 0 17237 100101 27866 12219 21969 2014 3 0 17238 100234 27957 12218 21972 2014 4 0 17238 100283 28086 12267 22002 2014 5 0 17386 101932 28225 12219 21972 2014 6 0 16645 99514 28087 12418 22229 2014 7 0 16645 100334 28246 12461 22254 2014 8 0 16658 101425 28393 12779 22285 2014 9 0 16658 101614 28532 13018 23011 2014 10 0 16844 98234 28046 13241 23174 2014 11 0 16889 102626 28645 13881 23653 2014 12 0 16889 103689 28886 13905 23821 2015 1 0 16843 103095 28799 14184 23909 2015 2 0 16843 103198 28923 14192 23845 2015 3 0 16924 104895 29174 14393 23822 2015 4 0 16924 106089 29400 14551 23808 2015 5 0 16954 107088 29583 14205 23857 2015 6 0 17768 109216 31801 14278 24006 2015 7 0 17874 109635 31343 14841 24609 2015 8 0 17805 108811 31410 14946 24994 2015 9 0 18023 109807 30849 14941 24846 2015 10 0 18012 110166 30897 14941 24846 2015 11 0 17979 112191 31034 15271 25060 2015 12 0 17993 112509 31006 15386 25373 2016 1 0 17993 112761 31057 15417 25585 2016 2 0 17993 113884 31088 15418 25587 2016 3 0 18010 113952 30980 15461 25761 2016 4 0 18010 113657 31010 15414 25451 2016 5 0 18528 114034 30888 15572 25549 2016 6 0 18528 106467 29552 15421 25518 2016 7 0 18528 105483 29452 15379 25493 2016 8 0 18528 104286 28022 15379 25478 2016 9 0 18528 103891 28005 15358 25744 2016 10 0 18528 105447 28127 15353 25757 2016 11 0 18528 105552 28158 15365 25756 2016 12 0 18532 105810 28215 15852 25831 2017 1 0 17834 96832 28077 13722 25168 2017 2 0 17834 97197 28079 13931 25222 2017 3 0 17834 97333 28095 13980 25296 2017 4 0 17834 97433 28097 14002 25306 2017 5 0 17843 97445 28128 13938 25314 2017 6 0 18038 99050 28221 14088 25502 2017 7 0 18060 99311 28219 14077 25577 2017 8 0 18059 99078 28287 14096 25562 2017 9 0 18060 99723 28460 14041 25865 gpaw-24.1.0/doc/devel/memory_bandwidth/000077500000000000000000000000001454550013000177315ustar00rootroot00000000000000gpaw-24.1.0/doc/devel/memory_bandwidth/H2Al110.py000066400000000000000000000070321454550013000212550ustar00rootroot00000000000000#!/usr/bin/env python # flake8: noqa from optparse import OptionParser code_choices = ['gpaw', 'dacapo'] parser = OptionParser(usage='%prog [options] package.\nExample of call:\n'+ 'python %prog\n', version='%prog 0.1') parser.add_option('--code', dest="code", type="choice", default=code_choices[0], choices=code_choices, help='code: which code to use.') parser.add_option("--runs", dest="runs", default=7, help='use that many runs to calculate the average.') parser.add_option('-v', '--verbose', action='store_true', default=False, help='verbose mode.') opt, args = parser.parse_args() from os import remove from os.path import exists try: import numpy as np except ImportError: raise SystemExit('numpy is not installed!') try: import gpaw except ImportError: raise SystemExit('gpaw is not installed!') from gpaw.utilities.tools import gridspacing2cutoff try: import ase except ImportError: raise SystemExit('ase is not installed!') from ase import Atoms, Atom import time a = 4.00 d = a / 2**0.5 z = 1.1 b = 1.5 def memory_bandwidth(code='gpaw', runs=7): slab = Atoms([Atom('Al', (0, 0, 0)), Atom('Al', (a, 0, 0)), Atom('Al', (a/2, d/2, -d/2)), Atom('Al', (3*a/2, d/2, -d/2)), Atom('Al', (0, 0, -d)), Atom('Al', (a, 0, -d)), Atom('Al', (a/2, d/2, -3*d/2)), Atom('Al', (3*a/2, d/2, -3*d/2)), Atom('Al', (0, 0, -2*d)), Atom('Al', (a, 0, -2*d)), Atom('H', (a/2-b/2, 0, z)), Atom('H', (a/2+b/2, 0, z))], cell=(2*a, d, 5*d), pbc=(1, 1, 1)) h = 0.15 nbands = 28 kpts = (2, 6, 1) parameters = {} if code == 'gpaw': from gpaw import GPAW as Calculator from gpaw.mpi import rank parameters['convergence'] = {'eigenstates': 1e-5} parameters['h'] = h parameters['mode'] = 'fd' elif code == 'dacapo': from ase.calculators.dacapo import Dacapo as Calculator parameters['planewavecutoff'] = gridspacing2cutoff(h) parameters['densitycutoff'] = parameters['planewavecutoff']*1.5 rank = 0 t = 0.0 t_runs = [] for n in range(runs): t0 = time.time() for i in range(1): calc = Calculator( nbands=nbands, kpts=kpts, **parameters) slab.calc = calc e = slab.get_potential_energy() del calc if exists('out.nc'): remove('out.nc') t1 = time.time() t = t + t1 - t0 t_runs.append(t1 - t0) print('Run: ', n, ' energy ', e, ' rank: ', str(rank), ' time: ', time.time() - t0) if rank == 0: print('Rank '+str(rank)+': time [sec]: avg '+str(round(np.average(t_runs),1))+', stddev '+str(round(np.std(t_runs),1))+', min '+str(round(min(t_runs),1))+', max '+str(round(max(t_runs),1))) if __name__ == '__main__': code = opt.code assert code in code_choices, code+' not in '+str(code_choices) if code == 'dacapo': try: import ASE except ImportError: raise SystemExit('ASE (2) is not installed!') runs = int(opt.runs) assert runs >= 1, runs+' must be >= 1' memory_bandwidth(code=code, runs=runs) gpaw-24.1.0/doc/devel/memory_bandwidth/analyse.py000066400000000000000000000005161454550013000217410ustar00rootroot00000000000000import os import shutil from gpaw.mpi import rank machine = os.environ.get('MACHINE', 'TEST') ncores = os.environ.get('NCORES', 8) if rank == 0: os.chdir(machine) os.system( 'python ../memory_bandwidth.py --runs=5 --startcores=' + str(ncores)) shutil.copy('memory_bandwidth_' + machine + '_py.png', '..') gpaw-24.1.0/doc/devel/memory_bandwidth/memory_bandwidth.py000066400000000000000000000216661454550013000236520ustar00rootroot00000000000000#!/usr/bin/env python import numpy as np import os from optparse import OptionParser parser = OptionParser(usage='%prog [options]', version='%prog 0.1') parser.add_option('--dir', dest="dir", default='.', help='Results directory') parser.add_option("--runs", dest="runs", default=5, help='use that many runs to calculate the average.') parser.add_option("--startcores", dest="startcores", default=1, help='use at lease that many cores.') opt, args = parser.parse_args() colors = [ 'black', 'brown', 'red', 'orange', 'yellow', 'green', 'blue', 'violet', 'gray', 'gray' ] def plot(xdata, ydata, std, title, xlabel, ylabel, label, color, num=1): # matplotlib.use('Agg') import pylab # all goes to figure num pylab.figure(num=num, figsize=(7, 5.5)) pylab.gca().set_position([0.10, 0.20, 0.85, 0.60]) # let the plot have fixed y-axis scale miny = min(ydata) maxy = max(ydata) ywindow = maxy - miny pylab.gca().set_ylim(miny - ywindow / 4.0, maxy + ywindow / 3.0) pylab.bar(xdata, ydata, 0.3, yerr=std, label=label, color=color) pylab.title(title) pylab.xlabel(xlabel) pylab.ylabel(ylabel) def plot_save(directory_name, out_prefix): from os.path import exists assert exists(directory_name) import pylab pylab.savefig(directory_name + os.path.sep + out_prefix + '.png') def analyse_benchmark(ncores=8, startcores=1, machine='TEST', runs=7): system = machine + '_py' systems_string = { 'carbon_py': 'gpaw 1865 on carbon', 'carbon': 'mkl 10.0.2.018 dsyev on carbon', 'niflheim_py': 'gpaw 1865 on niflheim', 'niflheim': 'acml 4.0.1 dsyev on niflheim' }.get(system, False) processes = { 'carbon_py': [1, 2, 4, 6, 8], 'carbon': [1, 2, 4, 8], 'niflheim_py': [1, 2, 3, 4], 'niflheim': [1, 2, 4] }.get(system, False) if not systems_string: systems_string = 'gpaw on ' + machine if not processes: processes = [startcores] for n in range(startcores + 1, ncores + 1): if n % 2 == 0: processes.append(n) timer_entries_all = [] if system.find('_py') == -1: for i in range(runs): timer_entries_all.append('run: ' + str(i)) else: for i in range(runs): timer_entries_all.append('Run: ' + str(i)) import re # Select timer entries selected_entries = range(runs) height = {} gpaw_versions = [] pre_results = {} results = {} timer_entries = [] timer_entries_re = {} for entry in selected_entries: height[entry] = [] timer_entries.append(timer_entries_all[entry]) timer_entries_re[timer_entries_all[entry]] = re.compile( timer_entries_all[entry]) # absolute path to directory root_abspath = os.path.abspath(opt.dir) # length of directory name ref_value_3300 = -44.85826 ref_value_3301 = -44.85709 # ref_value_3721 = -44.85666 # ref_SCF_3721 = 35 ref_value_5147 = -44.83504 ref_value_6383 = -44.84197 ref_value = ref_value_6383 tolerance = 0.0001 ref_failed = False h_failed = False for run in [str(p) + '_01' for p in processes]: # extract results rundir = os.path.join(root_abspath, system + run) file = os.path.join(rundir, 'out.txt') with open(file, 'r') as f: # print('Analysing ' + file, end=' ') # lines = f.readlines() # extract gpaw version for n, l in enumerate(lines): if l.startswith(' |__ | _|___|_____|'): gpaw_version = lines[n + 0].strip().split()[3].split('.')[-1] break if gpaw_version[-1] == 'M': gpaw_version = gpaw_version[:-1] if gpaw_version.rfind(':') != -1: gpaw_version = gpaw_version[:gpaw_version.rfind(':')] gpaw_version = int(gpaw_version) if len(str(gpaw_version)) > 1: if gpaw_version <= 6383: ref_value = ref_value_6383 if gpaw_version <= 5147: ref_value = ref_value_5147 if gpaw_version <= 3720: ref_value = ref_value_3301 if gpaw_version <= 3300: ref_value = ref_value_3300 elif len(str(gpaw_version)) == 1: if gpaw_version <= 4: ref_value = ref_value_3300 # ref_SCF = ref_SCF_3300 gpaw_versions.append(gpaw_version) # search for timings print('gpaw version %d' % gpaw_version) for entry in selected_entries: h = [] ref = [] for line in lines: m = timer_entries_re[timer_entries_all[entry]].search(line) if m is not None: h.append(float(line.split(':')[-1])) # break # stop after the first match for h_entry in h: if float(h_entry) < 0.0: h_failed = True break height[entry].append(h) for line in lines: m = re.compile('Zero').search(line) if m is not None: ref.append(float(line.split(':')[-1])) # break # stop after the first match for ref_entry in ref: if abs(float(ref_entry) - ref_value) > tolerance: ref_failed = True break # assert len(processes) == len(gpaw_versions) for p in range(len(processes)): assert gpaw_versions[p] == max( gpaw_versions), 'incompatible gpaw versions across cores' # if h_failed: print('Panic: negative time in ' + file) assert not h_failed if ref_failed: print('Panic: wrong Zero Kelvin: value in ' + file + ' - should be ' + str(ref_value) + ' +- ' + str(tolerance)) assert not ref_failed # arrange results for p in range(len(processes)): pre_results[processes[p]] = [] for i in range(len(height)): pre_results[processes[p]].append(height[i][p]) # # arrange results - calculate statistics for p in processes: results[p] = [] temp = [] for q in range(p): temp_q = [] for i in range(len(pre_results[p])): # print pre_results[p][i][q] temp_q.append(pre_results[p][i][q]) temp.append(pre_results[p][i][q]) # averages for a given core q results[p].append((np.average(temp_q), np.std(temp_q))) # max, avrg, and std across all cores results[p].append( (np.average(temp), np.std(temp), min(temp), max(temp))) # for p in processes: # #N = len(pre_results[p]) # #avg = sum(pre_results[p])/N # #q = sqrt(sum([(x-avg)**2/(N) for x in pre_results[p]])) # avg.append(np.average(pre_results[p])) # q.append(np.std(pre_results[p])) import matplotlib matplotlib.use('Agg') from matplotlib import pylab # from matplotlib.sourceforge.net/examples/dashtick.py DASHBASE = 5 DASHLEN = 25 DASHSTAGGER = 3 def dashlen(step): return DASHBASE + (DASHLEN * (step % DASHSTAGGER)) # print scaling results parameters = processes zero = [0.0 for i in range(len(parameters))] pylab.plot(parameters, zero, 'k-', label='_nolegend_') ay1 = pylab.gca() ay1.xaxis.set_ticks(parameters) ay1.xaxis.set_ticklabels([str(x) for x in parameters]) for p in processes: parameters = [] avg = [] std = [] for i in range(len(results[p]) - 1): parameters.append(p + 0.3 * i) # avg and std across processes avg.append(results[p][i][0]) std.append(results[p][i][1]) # height # print parameters, avg, std print('No. of processes ' + str(int(parameters[0])) + ': time [sec]: avg ' + str(round(results[p][-1][0], 1)) + ', stddev ' + str(round(results[p][-1][1], 1)) + ', min ' + str(round(results[p][-1][2], 1)) + ', max ' + str(round(results[p][-1][3], 1))) plot(parameters, avg, std, systems_string + ' version ' + str(gpaw_version), 'processes per node', 'time [s]', 'gpaw', (colors[p % 10]), num=1) # from two_scales.py plot_save(".", 'memory_bandwidth_' + system) pylab.close(1) # if __name__ == '__main__': from os import environ NCORES = int(environ.get('NCORES', 8)) MACHINE = environ.get('MACHINE', 'TEST') assert NCORES >= 1, str(NCORES) + ' must be >= 1' runs = int(opt.runs) assert runs >= 1, runs + ' must be >= 1' startcores = int(opt.startcores) assert startcores >= 1, startcores + ' must be >= 1' analyse_benchmark(NCORES, startcores, MACHINE, runs=runs) gpaw-24.1.0/doc/devel/memory_bandwidth/prepare.py000066400000000000000000000002131454550013000217350ustar00rootroot00000000000000import os import shutil machine = os.environ.get('MACHINE', 'TEST') shutil.rmtree(machine, ignore_errors=True) os.system('sh prepare.sh') gpaw-24.1.0/doc/devel/memory_bandwidth/prepare.sh000077500000000000000000000007611454550013000217320ustar00rootroot00000000000000#!/bin/sh if test -z $NCORES; then export NCORES=8 fi if test -z $MACHINE; then export MACHINE=TEST fi if [ ! -d "${MACHINE}" ]; then mkdir ${MACHINE} echo "${MACHINE} created" cd ${MACHINE} fi index=0 while [ "$index" -le "$NCORES" ]; do if [ "$index" -eq 0 ]; then p=1 else p=$index fi # if [ ! -d "${MACHINE}_py${p}_01" ]; then mkdir ${MACHINE}_py${p}_01 echo "${MACHINE}_py${p}_01 created" fi index=`expr $index + 2` done cd .. gpaw-24.1.0/doc/devel/memory_bandwidth/run.sh000077500000000000000000000023311454550013000210730ustar00rootroot00000000000000#!/bin/sh # Execute the GPAW code as one or more independent tasks if test -z $NCORES; then export NCORES=8 fi if test -z $STARTCORES; then export STARTCORES=0 fi if test -z $MACHINE; then export MACHINE=TEST fi # export PYTHONPATH=~/gpaw.mkl:${PYTHONPATH} export script=../../H2Al110.py ## CONFIGURE one of the following: if [ -f /home/camp/modulefiles.sh ]; then . /home/camp/modulefiles.sh module load openmpi fi # Using the GCC compiler and the AMD ACML library #. /usr/local/openmpi-1.2.5-gfortran/bin/mpivars-1.2.5.sh #export LD_LIBRARY_PATH=/opt/acml-4.0.1/gfortran64/lib:${LD_LIBRARY_PATH} # Using the Intel compiler and the Intel MKL library #. /usr/local/openmpi-1.2.7.intel/bin/mpivars-1.2.7.sh #. /opt/intel/cce/10.1.018/bin/iccvars.sh #. /opt/intel/mkl/10.0.4.023/tools/environment/mklvarsem64t.sh #. /opt/intel/fce/10.1.018/bin/ifortvars.sh export OMP_NUM_THREADS=1 index=${STARTCORES} while [ "$index" -le "$NCORES" ]; do if [ "$index" -eq 0 ]; then p=1 else p=$index fi # echo Benchmark for ${p} tasks started at `date` ( cd ${MACHINE}_py${p}_01; time mpiexec -np $p python $script > out.txt ) echo index=`expr $index + 2` done echo Benchmark runs ended at `date` gpaw-24.1.0/doc/devel/memory_bandwidth/run_numactl.py000066400000000000000000000003301454550013000226260ustar00rootroot00000000000000import os from gpaw.mpi import rank machine = os.environ.get('MACHINE', 'TEST') ncores = os.environ.get('NCORES', 8) if rank == 0: os.chdir(machine) os.system('STARTCORES=%d &&. ../run_numactl.sh' % ncores) gpaw-24.1.0/doc/devel/memory_bandwidth/run_numactl.sh000077500000000000000000000025321454550013000226210ustar00rootroot00000000000000#!/bin/sh # Execute the GPAW code as one or more independent tasks if test -z $NCORES; then export NCORES=8 fi if test -z $STARTCORES; then export STARTCORES=0 fi if test -z $CORES_PER_SOCKET; then export CORES_PER_SOCKET=4 fi if test -z $MACHINE; then export MACHINE=TEST fi # export PYTHONPATH=~/gpaw.mkl:${PYTHONPATH} export script=../../H2Al110.py ## CONFIGURE one of the following: if [ -f /home/camp/modulefiles.sh ]; then . /home/camp/modulefiles.sh module load openmpi fi # Using the GCC compiler and the AMD ACML library #. /usr/local/openmpi-1.2.5-gfortran/bin/mpivars-1.2.5.sh #export LD_LIBRARY_PATH=/opt/acml-4.0.1/gfortran64/lib:${LD_LIBRARY_PATH} # Using the Intel compiler and the Intel MKL library #. /usr/local/openmpi-1.2.7.intel/bin/mpivars-1.2.7.sh #. /opt/intel/cce/10.1.018/bin/iccvars.sh #. /opt/intel/mkl/10.0.4.023/tools/environment/mklvarsem64t.sh #. /opt/intel/fce/10.1.018/bin/ifortvars.sh export OMP_NUM_THREADS=1 index=${STARTCORES} while [ "$index" -le "$NCORES" ]; do if [ "$index" -eq 0 ]; then p=1 else p=$index fi # echo Benchmark for ${p} tasks started at `date` ( cd ${MACHINE}_py${p}_01; time mpiexec -np $p ../../taskit.BINDING.one.node 0 ${CORES_PER_SOCKET} python $script --runs=5 > out.txt ) echo index=`expr $index + 2` done echo Benchmark runs ended at `date` gpaw-24.1.0/doc/devel/memory_bandwidth/taskit.BINDING.one.node000077500000000000000000000026001454550013000237310ustar00rootroot00000000000000#!/bin/sh #UTILISATION METTRE 0 ET 1 EN ARGUMENT OFFSET=$1 shift # number cores per socket CORES_PER_SOCKET=$1 shift NCPU=$(grep "^processor" /proc/cpuinfo |wc -l) if [ -n "$OMPI_MCA_ns_nds_vpid" ]; then MPI_RANK=$OMPI_MCA_ns_nds_vpid #Myrinet with MX drivers elif [ -n "$MXMPI_ID" ]; then DMPI_RANK=$MXMPI_ID #Myrinet with GM drivers elif [ -n "$GMPI_ID" ]; then MPI_RANK=$GMPI_ID #INTEL MPI elif [ -n "${PMI_RANK}" ]; then MPI_RANK=${PMI_RANK} #OPEN MPI elif [ -n "${OMPI_MCA_ns_nds_vpid}" ]; then MPI_RANK=${OMPI_MCA_ns_nds_vpid} #OPEN MPI >= 1.3 # http://osdir.com/ml/clustering.open-mpi.user/2008-07/msg00048.html elif [ -n "${OMPI_COMM_WORLD_RANK}" ]; then MPI_RANK=${OMPI_COMM_WORLD_RANK} # VOLTAIRE IB & MVAPICH elif [ -n "${MPIRUN_RANK}" ]; then MPI_RANK=${MPIRUN_RANK} else echo "Error getting MPI_RANK"; fi CPU=`echo "($OFFSET + $MPI_RANK)"|bc` case $CPU in 0) CPU=0 MEM=`echo "$CPU / $CORES_PER_SOCKET" |bc`;; 1) CPU=1 MEM=`echo "$CPU / $CORES_PER_SOCKET" |bc`;; 2) CPU=2 MEM=`echo "$CPU / $CORES_PER_SOCKET" |bc`;; 3) CPU=3 MEM=`echo "$CPU / $CORES_PER_SOCKET" |bc`;; 4) CPU=4 MEM=`echo "$CPU / $CORES_PER_SOCKET" |bc`;; 5) CPU=5 MEM=`echo "$CPU / $CORES_PER_SOCKET" |bc`;; 6) CPU=6 MEM=`echo "$CPU / $CORES_PER_SOCKET" |bc`;; 7) CPU=7 MEM=`echo "$CPU / $CORES_PER_SOCKET" |bc`;; esac #ulimit -s unlimited CMD="numactl --membind=$MEM --physcpubind=$CPU $@" echo $CMD eval $CMD gpaw-24.1.0/doc/devel/memory_bandwidth/twiny.py000066400000000000000000000010251454550013000214530ustar00rootroot00000000000000from pylab import gcf, gca, draw_if_interactive def twiny(ay=None): """ Make a second axes overlay ay (or the current axes if ay is None) sharing the yaxis. The ticks for ay2 will be placed on the top, and the ay2 instance is returned. See examples/two_scales.py """ if ay is None: ay = gca() ay2 = gcf().add_axes(ay.get_position(), sharey=ay, frameon=False) ay2.xaxis.tick_top() ay2.xaxis.set_label_position('top') ay.xaxis.tick_bottom() draw_if_interactive() return ay2 gpaw-24.1.0/doc/devel/newrelease.rst000066400000000000000000000012721454550013000172630ustar00rootroot00000000000000.. _newrelease: =========== New release =========== * Update ``__version__`` in :git:`gpaw/__init__.py`. * If a new ase release is required to pass the tests modify ``__ase_version_required__`` in :git:`gpaw/__init__.py`. * Upload to PyPI:: $ python3 setup.py sdist $ twine upload dist/* * Push and make a tag. * Update :ref:`news`, :ref:`releasenotes` and :ref:`download` pages. * Increase the version number and push. * Send announcement email to the ``gpaw-users`` mailing list:: $ git shortlog -s -n 24.1.0.. | python3 -c " import sys names = [line.split(maxsplit=1)[1].strip() for line in sys.stdin] for name in sorted(names): print('*', name)" gpaw-24.1.0/doc/devel/others.rst000066400000000000000000000015621454550013000164370ustar00rootroot00000000000000Miscellaneous objects and functions =================================== .. autoclass:: gpaw.lfc.LocalizedFunctionsCollection :members: .. autoclass:: gpaw.lfc.BasisFunctions :members: .. autoclass:: gpaw.spline.Spline :members: .. autoclass:: gpaw.poisson.FDPoissonSolver :members: .. autoclass:: gpaw.xc.functional.XCFunctional :members: .. autoclass:: gpaw.xc.gga.GGA :members: .. autofunction:: gpaw.forces.calculate_forces .. autoclass:: gpaw.grid_descriptor.GridDescriptor :members: .. autoclass:: gpaw.scf.SCFLoop :members: .. autoclass:: gpaw.band_descriptor.BandDescriptor :members: .. autoclass:: gpaw.spinorbit.BZWaveFunctions :members: .. autoclass:: gpaw.spinorbit.WaveFunction :members: .. autoclass:: gpaw.kpt_descriptor.KPointDescriptor :members: .. autoclass:: gpaw.projections.Projections :members: gpaw-24.1.0/doc/devel/overview.rst000066400000000000000000000316021454550013000167770ustar00rootroot00000000000000.. _overview: ======== Overview ======== This document describes the most important objects used for a DFT calculation. More information can be found in the :git:`code <>`. PAW === This object is the central object for a GPAW calculation:: +----------+ |GPAWLogger| +-----------+ +----------+ --->|Hamiltonian| ^ / +-----------+ | ---- +------+ | / ---->|Setups| +-----+ +------+ / +------+ |Atoms|<-------------| GPAW |----- +-----+ +------+ \ / | \ \ +-----------+ +-------------+ / | --- ----------->|Occupations| |WaveFunctions|<-- v \ +-----------+ +-------------+ +-------+ \ +-------+ |Density| -->|SCFLoop| +-------+ +-------+ The implementation is in :git:`gpaw/calculator.py`. The :class:`~gpaw.calculator.GPAW` class doesn't do any part of the actual calculation - it only handles the logic of parsing the input parameters and setting up the necessary objects for doing the actual work (see figure above). A GPAW instance has the following attributes: ``atoms``, ``parameters``, ``wfs``, ``density``, ``setups``, ``hamiltonian``, ``scf``, ``log``, ``timer``, ``occupations``, ``initialized``, ``world`` and ``observers``. The :class:`~gpaw.calculator.GPAW` inherits from: * :class:`ase.calculators.calculator.Calculator` This implements the ASE calculator interface. .. note:: GPAW uses atomic units internally (`\hbar=e=m=1`) and ASE uses Ångström and eV (:mod:`~ase.units`). Generating a GPAW instance from scratch --------------------------------------- When a GPAW instance is created from scratch:: calc = GPAW(mode='fd', xc='LDA', nbands=7) the GPAW object is almost empty. In order to start a calculation, one will have to do something like:: atoms = Atoms(...) atoms.calc = calc atoms.get_potential_energy() ASE will then arrange to call the :meth:`~gpaw.calculator.GPAW.calculate` method with the correct arguments. This will trigger: 1) A call to the :meth:`~gpaw.calculator.GPAW.initialize` method, which will set up the objects needed for a calculation: :class:`~gpaw.density.Density`, :class:`~gpaw.hamiltonian.Hamiltonian`, :class:`~gpaw.wavefunctions.base.WaveFunctions`, :class:`~gpaw.setup.Setups` and a few more (see figure above). 2) A call to the :meth:`~gpaw.calculator.GPAW.set_positions` method, which will initialize everything that depends on the atomic positions: a) Pass on the atomic positions to the wave functions, Hamiltonian and density objects (call their ``set_positions()`` methods). b) Make sure the wave functions are initialized. c) Reset the :class:`~gpaw.scf.SCFLoop`. Generating a GPAW instance from a restart file ---------------------------------------------- When a GPAW instance is created like this:: calc = GPAW('restart.gpw') the :meth:`~gpaw.calculator.GPAW.initialize` method is called first, so that the parts read from the file can be placed inside the objects where they belong: the effective pseudo potential and the total energy are put in the Hamiltonian, the pseudo density is put in the density object and so on. After a restart, everything *should* be as before the restart file was written. However, there are a few exceptions: * The wave functions are only read when needed ... XXX * Atom centered functions (`\tilde{p}_i^a`, `\bar{v}^a`, `\tilde{n}_c^a` and `\hat{g}_{\ell m}^a`) are not initialized. ... XXX WaveFunctions ============= We currently have two representations for the wave functions: uniform 3-d grids and expansions in atom centered basis functions as implemented in the two classes :class:`~gpaw.wavefunctions.fd.FDWaveFunctions` and :class:`~gpaw.wavefunctions.lcao.LCAOWaveFunctions`. Both inherit from the :class:`~gpaw.wavefunctions.base.WaveFunctions` class, so the wave functions object will always have a :class:`~gpaw.grid_descriptor.GridDescriptor`, an :class:`~gpaw.eigensolvers.eigensolver.Eigensolver`, a :class:`~gpaw.setup.Setups` object and a list of :class:`~gpaw.kpoint.KPoint` objects. :: +--------------+ +-----------+ |GridDescriptor| |Eigensolver| +--------------+ +-----------+ ^ ^ |gd | \ | +------+ +-------------+ kpt_u +------+ |Setups|<-------|WaveFunctions|-------->|KPoint|+ +------+ +-------------+ +------+|+ ^ +------+| /_\ +------+ | | -------------------------------- | | +-----------------+ +-----------------+ |LCAOWaveFunctions| | FDWaveFunctions | +-----------------+ +-----------------+ | | / | | v |tci | |kin |pt +--------------+ | v | v |BasisFunctions| | +-------+ | +----------+ +--------------+ | |Overlap| | |Projectors| v +-------+ | +----------+ +------------------+ v |TwoCenterIntegrals| +---------------------+ +------------------+ |KineticEnergyOperator| +---------------------+ Attributes of the wave function object: ``gd``, ``nspins``, ``nbands``, ``mynbands``, ``dtype``, ``world``, ``kpt_comm``, ``band_comm``, ``gamma``, ``bzk_kc``, ``ibzk_kc``, ``weight_k``, ``symmetry``, ``kpt_comm``, ``rank_a``, ``nibzkpts``, ``kpt_u``, ``setups``, ``ibzk_qc``, ``eigensolver``, and ``timer``. .. _overview_xc: Exchange-correlation functionals module ======================================= The ``gpaw.xc`` module contains all the code for XC functionals in GPAW:: +--------------+ | XCFunctional | +--------------+ ^ ^ /_\ /_\ | | +-------+ | +------------------------+ | LDA | ----|vdW-DF/HybridXC/SIC/GLLB| +-------+ +------------------------+ ^ /_\ | +---+ |GGA| +---+ ^ /_\ | +----+ |MGGA| +----+ An :class:`~gpaw.xc.functional.XCFunctional` object is usually created using the :func:`gpaw.xc.XC` function: .. autofunction:: gpaw.xc.XC Example:: # Implementation of PBE from LibXC: from gpaw.xc import XC xc = XC('PBE') # alternative call: from gpaw.xc.libxc import LibXC from gpaw.xc.gga import GGA xc = GGA(LibXC('PBE')) # or, explicitly: xc = GGA(LibXC('GGA_X_PBE+GGA_C_PBE')) In this example, calling the ``calculate`` method of the ``xc`` object passing in a :class:`~gpaw.grid_descriptor.GridDescriptor`, an input density array and an output array for the potential, the :class:`~gpaw.xc.gga.GGA` object will calculate the gradient of the density and pass that and the density on to the libxc kernel. Refer to :ref:`manual_xc` for other examples. GPAW also has a few non-libxc kernels that one can use like this:: from gpaw.xc.kernel import XCKernel xc = XC(XCKernel('PBE')) .. _overview_array_naming: Naming convention for arrays ============================ A few examples: =========== =================== =========================================== name shape =========== =================== =========================================== ``spos_c`` ``(3,)`` **S**\ caled **pos**\ ition vector ``nt_sG`` ``(2, 24, 24, 24)`` Pseudo-density array `\tilde{n}_\sigma(\vec{r})` (``t`` means *tilde*): two spins, 24*24*24 grid points. ``cell_cv`` ``(3, 3)`` Unit cell vectors. =========== =================== =========================================== Commonly used indices: ======= ================================================== index description ======= ================================================== ``a`` Atom number ``c`` Unit cell axis-index (0, 1, 2) ``v`` *xyz*-index (0, 1, 2) ``k`` **k**-point index ``q`` **k**-point index (local, i.e. it starts at 0 on each processor) ``s`` Spin index (`\sigma`) ``u`` Combined spin and **k**-point index (local) ``G`` Three indices into the coarse 3D grid ``g`` Three indices into the fine 3D grid ``M`` LCAO orbital index (`\mu`) ``n`` Principal quantum number *or* band number ``l`` Angular momentum quantum number (s, p, d, ...) ``m`` Magnetic quantum number (0, 1, ..., 2*l - 1) ``L`` ``l`` and ``m`` (``L = l**2 + m``) ``j`` Valence orbital number (``n`` and ``l``) ``i`` Valence orbital number (``n``, ``l`` and ``m``) ``q`` ``j1`` and ``j2`` pair ``p`` ``i1`` and ``i2`` pair ``r`` CPU-rank ======= ================================================== Array names and their definition -------------------------------- .. list-table:: * - name in the code - definition * - wfs.kpt_u[u].P_ani - `\langle\tilde{p}_i^a|\tilde{\psi}_{\sigma\mathbf{k}n} \rangle` * - density.D_asp - `D_{s i_1i_2}^a` * - ``hamiltonian.dH_sp`` - `\Delta H_{s i_1i_2}^a` * - setup.Delta_pL - `\Delta_{Li_1i_2}^a` * - setup.M_pp - `\Delta C_{i_1i_2i_3i_4}^a` eq. (C2) in [1]_ or eq. (47) in [2]_ * - wfs.kpt_u[u].psit_nG - `\tilde{\psi}_{\sigma\mathbf{k}n}(\mathbf{r})` * - setup.pt_j - `\tilde{p}_j^a(r)` * - wfs.pt - `\tilde{p}_i^a(\mathbf{r}-\mathbf{R}^a)` The :class:`~gpaw.setup.Setup` instances are stored in the :class:`~gpaw.setup.Setups` list, shared by the wfs, density, and Hamiltonian instances. E.g. paw.wfs.setups, paw.density.setups, or paw.hamiltonian.setups. Parallelization over spins, k-points domains and states ======================================================= When using parallelization over spins, **k**-points, bands and domains, four different :ref:`MPI communicators ` are used: * *mpi.world* Communicator containing all processors. * *domain_comm* One *domain_comm* communicator contains the whole real space domain for a selection of the spin/k-point pairs and bands. * *kpt_comm* One *kpt_comm* communicator contains all k-points and spin for a selection of bands over part of the real space domain. * *band_comm* One *band_comm* communicator contains all bands for a selection of k-points and spins over part of the real space domain. These communicators constitute MPI groups, of which the latter three are subsets of the ``world`` communicator. The number of members in the a communicator group is signified by ``comm.size``. Within each group, every element (i.e. processor) is assigned a unique index ``comm.rank`` into the list of processor ids in the group. For instance, a *domain_comm* rank of zero signifies that the processor is first in the group, hence it functions as a domain master. For an example on how to use an MPI communicator to perform simple data communication, please refer to :git:`~doc/devel/parallelization.py`. To investigate the way GPAW distributes calculated quantities across the various MPI groups, simulating an MPI run can be done using ``gpaw-mpisim``:: $ gpaw-mpisim -v --dry-run=4 --spins=2 --kpoints=4 --bands=3 --domain-decomposition=2,1,1 Simulating: world.size = 4 parsize_c = (2, 1, 1) parsize_bands = 1 nspins = 2 nibzkpts = 4 nbands = 3 world: rank=0, ranks=None kpt_comm : rank=0, ranks=[0 2], mynks=4, kpt_u=[0^,1^,2^,3^] band_comm : rank=0, ranks=[0], mynbands=3, mybands=[0, 1, 2] domain_comm : rank=0, ranks=[0 1] world: rank=1, ranks=None kpt_comm : rank=0, ranks=[1 3], mynks=4, kpt_u=[0^,1^,2^,3^] band_comm : rank=0, ranks=[1], mynbands=3, mybands=[0, 1, 2] domain_comm : rank=1, ranks=[0 1] world: rank=2, ranks=None kpt_comm : rank=1, ranks=[0 2], mynks=4, kpt_u=[0v,1v,2v,3v] band_comm : rank=0, ranks=[2], mynbands=3, mybands=[0, 1, 2] domain_comm : rank=0, ranks=[2 3] world: rank=3, ranks=None kpt_comm : rank=1, ranks=[1 3], mynks=4, kpt_u=[0v,1v,2v,3v] band_comm : rank=0, ranks=[3], mynbands=3, mybands=[0, 1, 2] domain_comm : rank=1, ranks=[2 3] For the case of a `\Gamma`-point calculation without band-parallelization, all parallel communication is done in the one *domain_comm* communicator, which in this case is equal to *mpi.world*. .. [1] J J. Mortensen and L. B. Hansen and K. W. Jacobsen, Phys. Rev. B 71 (2005) 035109. .. [2] C. Rostgaard, :download:`The Projector Augmented Wave Method <../documentation/paw_note.pdf>`. gpaw-24.1.0/doc/devel/parallelization.py000077500000000000000000000076161454550013000201560ustar00rootroot00000000000000import numpy as np from gpaw.mpi import world def mpi_debug(data, ordered=True): global msgcount if not isinstance(data, list): data = [data] if ordered: for i in range(world.rank): world.barrier() for txt in data: print('%02d-mpi%d, %s' % (msgcount, world.rank, txt)) if ordered: msgcount += 1 if ordered: for i in range(world.size - world.rank): world.barrier() W = world.size N = 32 assert N % W == 0 M = N // W # Create my share of data data = np.arange(world.rank * M, (world.rank + 1) * M) # Let's calculate the global sum slocal = data.sum() s = world.sum(slocal) mpi_debug('data: %s, slocal=%d, s=%d' % (data, slocal, s)) assert s == N * (N - 1) // 2 # Subtract the global mean data -= s / N mpi_debug(f'data: {data}') # ------------------------------------------------------------------- if world.rank == 0: print('-' * 16) # Who has global index 11? The master needs it! i = 11 rank, ilocal = divmod(i, M) mpi_debug('rank=%d, ilocal=%d, i=%d' % (rank, ilocal, i)) assert rank * M + ilocal == i # Do I have it? if world.rank == rank: # Yes, so extract data (must be an array) idata = np.array([data[ilocal]], dtype=data.dtype) else: # No, so just allocate space idata = np.empty(1, dtype=data.dtype) # Broadcast from owner to everyone else world.broadcast(idata, rank) """ # This does the same as broadcast with send/receive... # Do I have it? if world.rank == rank: # Yes, now send it to the others for other_rank in range(world.size): # We don't have to send it to ourselves if other_rank != rank: world.send(idata, other_rank, tag=123) else: # No, so receive from the one that own the data world.receive(idata, rank, tag=123) """ mpi_debug('idata=%d' % idata) # ------------------------------------------------------------------- if world.rank == 0: print('-' * 16) # The master just calculated auxiliary data. Distribute it. aux = np.empty(N, dtype=float) # Only master knows the data right now if world.rank == 0: rng = np.random.default_rng(1234567) aux[:] = rng.uniform(0, 1, size=N).round(2) print(f'MASTER aux: {aux}, mean={aux.mean():f}') # Allocate space for my part of the auxiliary data myaux = np.empty(M, dtype=float) # Scatter parts from master to everyone world.scatter(aux, myaux, 0) """ # This does the same as scatter with send/receive... # Are we the master? if world.rank == 0: # Yes, so extract my part directly myaux[:] = aux[0:M] # Now send parts to the slaves for slave_rank in range(1, world.size): youraux = aux[slave_rank*M:(slave_rank+1)*M] world.send(youraux, slave_rank, tag=123) else: # No, so receive from the master world.receive(myaux, 0, tag=123) """ # We don't need original data anymore del aux # Try to calculate mean now meanaux = world.sum(myaux.mean()) / world.size mpi_debug(f'myaux: {myaux}, mean={meanaux:f}') # ------------------------------------------------------------------- if world.rank == 0: print('-' * 16) # We've done something to our part of the auxiliary data. Master needs it all if world.rank == 0: result = np.empty(N, dtype=float) else: result = None # Do something to our auxiliary data myaux[:] = np.sin(2 * np.pi * myaux).round(3) mpi_debug(f'myaux: {myaux}') # Gather parts from everyone on the master world.gather(myaux, 0, result) """ # This does the same as gather with send/receive... # Are we the master? if world.rank == 0: # Yes, so extract my part directly result[0:M] = myaux[:] # Now receive parts from the slaves for slave_rank in range(1, world.size): youraux = np.empty(M, dtype=float) world.receive(youraux, slave_rank, tag=123) result[slave_rank*M:(slave_rank+1)*M] = youraux else: # No, so send to the master world.send(myaux, 0, tag=123) """ mpi_debug(f'result: {result}') gpaw-24.1.0/doc/devel/paw.rst000066400000000000000000000002131454550013000157120ustar00rootroot00000000000000The GPAW calculator object ========================== .. module:: gpaw.calculator .. autoclass:: GPAW :members: :inherited-members: gpaw-24.1.0/doc/devel/profiling.rst000066400000000000000000000042231454550013000171210ustar00rootroot00000000000000.. _profiling: ========= Profiling ========= profile ======= Python has a :mod:`cProfile` module to help you find the places in the code where the time is spent. Let's say you have a script ``script.py`` that you want to run through the profiler. This is what you do: >>> import profile >>> profile.run('import script', 'prof') This will run your script and generate a profile in the file ``prof``. You can also generate the profile by inserting a line like this in your script:: ... import cProfile cProfile.run('atoms.get_potential_energy()', 'prof') ... .. note:: Use:: import cProfile from gpaw.mpi import rank cProfile.run('atoms.get_potential_energy()', f'prof-{rank:04}') if you want to run in parallel. To analyse the results, you do this:: >>> import pstats >>> pstats.Stats('prof').strip_dirs().sort_stats('time').print_stats(20) Tue Oct 14 19:08:54 2008 prof 1093215 function calls (1091618 primitive calls) in 37.430 CPU seconds Ordered by: internal time List reduced from 1318 to 20 due to restriction <20> ncalls tottime percall cumtime percall filename:lineno(function) 37074 10.310 0.000 10.310 0.000 :0(calculate_spinpaired) 1659 4.780 0.003 4.780 0.003 :0(relax) 167331 3.990 0.000 3.990 0.000 :0(dot) 7559 3.440 0.000 3.440 0.000 :0(apply) 370 2.730 0.007 17.090 0.046 xc_correction.py:130(calculate_energy_and_derivatives) 37000 0.780 0.000 9.650 0.000 xc_functional.py:657(get_energy_and_potential_spinpaired) 37074 0.720 0.000 12.990 0.000 xc_functional.py:346(calculate_spinpaired) ... ... The list shows the 20 functions where the most time is spent. Check the :mod:`pstats` documentation if you want to do more fancy things. .. tip:: Since the :mod:`cProfile` module does not time calls to C-code, it is a good idea to run the code in debug mode - this will wrap calls to C-code in Python functions:: $ python3 -d script.py .. tip:: There is also a quick and simple way to profile a script:: $ pyhton3 -m cProfile script.py gpaw-24.1.0/doc/devel/profiling/000077500000000000000000000000001454550013000163665ustar00rootroot00000000000000gpaw-24.1.0/doc/devel/profiling/select.tau000066400000000000000000000032771454550013000203710ustar00rootroot00000000000000 BEGIN_EXCLUDE_LIST void get_point_gga(functionals_type *, double *, double *, double *) C void xc_gga_vxc(const xc_gga_type *, const double *, const double *, double *, double *, double *) C void xc_gga(const xc_gga_type *, const double *, const double *, double *, double *, double *, double *, double *, double *) C void func(const xc_gga_type *, double, double *, double *, double *, double *) C void gga_c_pbe(const void *, const double *, const double *, double *, double *, double *, double *, double *, double *) C void xc_perdew_params(const xc_gga_type *, const double *, const double *, int, xc_perdew_t *) C void xc_rho2dzeta(int, const double *, double *, double *) C void get_vxc_lda(functionals_type *, double *, double *, double *) C void lda_x(const void *, const double *, double *, double *, double *) C void xc_lda_vxc(const xc_lda_type *, const double *, double *, double *) C void xc_lda(const xc_lda_type *, const double *, double *, double *, double *, double *) C void func(const xc_lda_type *, double *, double, double *, double *, double *, double *, double *, double *) C void g(int, int, double *, double *, double *, double *) C void get_vxc_gga(functionals_type *, double *, double *, double *) C void pbe_eq7(int, int, double, double, double, double, double *, double *, double *, double *, double *, double *, double *, double *, double *, double *, double *) C void pbe_eq8(int, int, double, double, double, double *, double *, double *, double *, double *, double *, double *) C void xc_perdew_potentials(xc_perdew_t *, const double *, double, int, double *, double *, double *, double *, double *) C PyObject *spherical_harmonics(PyObject *, PyObject *) C END_EXCLUDE_LIST gpaw-24.1.0/doc/devel/proposals/000077500000000000000000000000001454550013000164175ustar00rootroot00000000000000gpaw-24.1.0/doc/devel/proposals/ase.py000066400000000000000000000004631454550013000175440ustar00rootroot00000000000000class ASECalculator: def __init__(self): self.atoms = None def get_potential_energy(self, atoms): if self.calculation_required(atoms, 'energy'): self.calculate(atoms) self.atoms = atoms.copy() # store copy of last configuration return None ... gpaw-24.1.0/doc/devel/proposals/density.py000066400000000000000000000016011454550013000204460ustar00rootroot00000000000000# flake8: noqa class Density: def __init__(self): self.gd = None self.mixer = None self.nt_sG = None self.nct = None def read(self, reader): self.gd = reader.get_grid_descriptor() self.mixer = reader.read('mixer') self.nt_sG = reader.get_array('...') def update(self, atoms, **kwargs): # Initialize stuff: if self.gd is None: self.gd = ... if self.mixer is None: self.mixer = Mixer(self.gd) if self.nct is None: self.nct = LFC(atoms) # Change stuff: if 'mixer' in kwargs: self.mixer = kwargs['mixer'] # Update stuff: self.nct.set_positions(atoms) def allocate(self, lfc=True): if lfc: self.nct.allocate() def memory_estimate(self): ... def write(self, foo): ... gpaw-24.1.0/doc/devel/proposals/initialization.rst000066400000000000000000000166121454550013000222060ustar00rootroot00000000000000============================== Initialization and I/O changes ============================== This is a proposal for some changes that will solve various issues with the maintainability and stability of the I/O code amongst other things. .. contents:: Rationale ========= Presently the gpw I/O is handled centrally by the module gpaw/io/__init__.py. If someone makes changes in setup.py or density.py, the I/O may break due to these "non-local correlations" (we in particular, being physicists, should know to appreciate locality), or it may misbehave in subtle ways for certain cases (TDDFT/LCAO/non-gamma-point/etc.). Most of this trouble can be avoided entirely by requiring that objects should know how to read and write themselves. Thus, responsibility for what to write (and how to read it back!) is delegated to various objects as per the usual 'object oriented' way. A different but sort of related issue: The output.py module writes lots of things to the log file, and those things would be better off 'writing themselves'. There are several bugs here waiting to be fixed: if the Poisson solver was created by hand with a non-standard stencil, then the wrong stencil is written. Scalapack/BLACS information is sometimes wrong (depending on the way it was specified). No information on the stridedness of the band descriptor is written (and thus parallelization info is incomplete). There are probably other issues. Object hierarki =============== So all the objects above may implement functions to read and write their own parameters. They could also implement functions to read/write human-readable information to log files (which is highly desirable). On a somewhat deeper level, we could formalize the tree hierarchy between the major GPAW objects and define a mandatory interface for all major objects to implement (read/write, memory estimate, initialize/set_positions/allocate -- these procesure *all* involve traversing the same tree hierarchy). This might make it easier for new programmers to learn the structure of GPAW (a well-known problem). Example of what an object could look like: .. literalinclude:: density.py The PAW calculator object ========================= The following base class (rough sketch) should propably be moved to ASE, so that all ASE-calculators can use it: .. literalinclude:: ase.py It should be possible to create the PAW calculator without knowing the atoms and also from a restart file - and both ways must be cheap. .. literalinclude:: paw.py Open questions -------------- The above pseudo code is not the final answer - it is an attempt to make the further discussions more concrete. There are several things to think about: * What should the ASECalculator look like? * How much should/can ``__init__()`` for the different objects do? Reading and writing =================== We should name things like described here: http://www.etsf.eu/fileformats (is there an ETSF XC library?) Things we need to write: * Version and units. * Atoms: atomic numbers, positions, initial magnetic moments, tags, boundary conditions, unit cell, charges. * Setups: lmax, fingerprints, setup types. * Basis set. * Hamiltonian: Poisson solver, XC functional, effective pseudopotential, non-local part of hamiltonian. * Density: Charge, density convergence criterion, density error, atomic density matrices, interpolation order, pseudoelectron density, multipole moments of compensation charges. * Mixer. * Occupations: fixed magnetic moment flag, smearing type, width, Fermi level, occupation numbers. * Symmmetry: Symmetry matrices, atom maps. * Parameters that the result should not depend on: hund, random, maxiter, eigensolver?, parallel (domain, band, stridebands, scalapack). * SCF: energy convergence criterion, energy error. * Eigensolver: eigenstates convergence criterion, number of bands to converge, eigenstate error(s)? * Calculated stuff: Ekin, Epot, Ebar, Eext, Exc, S, forces, potential energy, magnetic moments, dipole moment. * Brillouin zone sampling: BZ, IBZ, weights, maps, symmetries. * Projections. * Pseudo wave functions. * Eigenvalues. What do we do with these: fixdensity, mode, Kohn Sham stencil, h, charge? What do we need in order to better support: * DSCF * DFPT * response functions * GW * TDDFT * NEGF transport * LCAO * plane waves * other things? How should reading and writing work? * Should it be like pickle where the class name is written (example: gpaw.mixer.MixerSum). The reader will then create that object and read into it? * What methods should a reader have? ``get_object('name of object')``, ``get_array('name of array')``, ``get_atoms()``, ``get_grid_descriptor()``, ... * We need backwards compatibility. Also, there should be well defined interface so that it is easy to use different backends (.gpw, hdf5, ...). I think that the current io-interface ('__set_item__', 'dimension', 'add', 'fill', ...) is quite well defined, and if new IO scheme requires additions/modifications, they should be such that different backends can easily support them. Some thoughts about different backends: --------------------------------------- If/when each object writes/reads itself, some sort of hierarchical file format would be convenient. I am not that familiar with tarfile-interface used for .gpw files, but I think that it can support hierarchical structure (folders and files). Also, HDF5 supports hierarchical structure ("hierarchical data format"), basic structure of HDF5 file is groups and datasets. Other formats that one could think of are MPI-IO and netcdf, but that they do not really support hierarchical structure. Drawback of MPI-IO is also that the files are not necessarily portable (although it should be possible to ensure portability with the price of more expensive IO). Here is a prototype implementation of a hierarchical reader/writer framework: :download:`rw.py`. Parallel IO =========== For large calculations it will be more or less compulsory to perform IO in parallel. Even though a filesystem would not support parallel IO (meaning that read/write are not faster than in serial case), memory requirements can prohibit collecting the data into single process. As an example, in large calculation with e.g. 200**3 grid, collecting density into single process requires 8 * 400**3 ~ 500 MB. Some backends supporting parallel IO are MPI-IO, parallel-netcdf, and HDF5, and there are existing python interfaces to MPI-IO (mpi4py) and HDF5 (h5py and pytables). GPAW can already use h5py without parallel capabilities. Enabling parallel IO with h5py is quite simple as it requires adding only two simple functions to GPAW. At some point, we should start using mpi4py with GPAW. Backends ======== Tarfile ------- Relatively simple, portable and no external dependencies, but: * no parallel IO, single process has to collect data * no direct access with external software (visualization etc.) HDF5 ---- Portable, can perform parallel IO and external software can access the file directly, but: * additional dependencies (at least HDF5 library, a python interface could in principle be included in GPAW) * porting to more exotic architectures (Cray, Blue Gene) can be tricky? Directory --------- A bit like an extracted tarfile. Different cpu's could write different states. When the writing is done, one can tar the directory to get a standard gpw file. The tarfile format would have to be modifyed so that one can read pseudo wave functions from several files. gpaw-24.1.0/doc/devel/proposals/paw.py000066400000000000000000000024461454550013000175660ustar00rootroot00000000000000# flake8: noqa class PAW(ASECalculator): def __init__(self, restart=None, **kwargs): ASECalculator.__init__(self) self.density = Density() self.hamiltonian = Hamiltonian() self.wfs = WaveFunctions() if restart: self.read(Reader(restart)) self.update(self.atoms, **kwargs) def read(self, reader): self.atoms = reader.read_atoms() self.density.read(reader) self.hamiltonian.read(reader) self.wfs = self.wfs.read(reader) def update(self, atoms, **kwargs): """Lazy update.""" self.density.update(self.atoms, kwargs) self.hamiltonian.update(self.atoms, kwargs) # If we change mode, we could get a completely new type of # wave function object: self.wfs = self.wfs.update(self.atoms, kwargs) def set(self, **kwargs): self.update(self,atoms, **kwargs) def allocate(self, wfs=False, lfc=True): self.density.allocate(lfc) self.hamiltonian.allocate(lfc) self.wfs.allocate(wfs, lfc) def calculate(self, atoms): self.update(atoms) self.allocate(wfs=True) ... def get_potential_energy(self, atoms): ASECalculator.get_potential_energy(self, atoms) return self.hamiltonian.energy gpaw-24.1.0/doc/devel/proposals/proposals.rst000066400000000000000000000002031454550013000211660ustar00rootroot00000000000000========================== GPAW enhancement proposals ========================== .. toctree:: :maxdepth: 1 initialization gpaw-24.1.0/doc/devel/proposals/rw.py000066400000000000000000000221331454550013000174220ustar00rootroot00000000000000# flake8: noqa """ File content:: 0: "IOASE..." 8: version 16: nitems (int64) 24: 32 (position of offsets, int64) 32: p0 (offset to json data, int64) 40: 8-byte aligned ndarrays p0: n (length of json data, int64) p0+8: json data p0+8+n: EOF """ # magig prefix?, ascii header? See hdf5 header, # ordereddict, endianness, todict? import numpy as np from ase.db.jsondb import encode, decode VERSION = 1 N1 = 42 # block size - max number of items: 1, N1, N1*N1, N1*N1*N1, ... def align(fd): """Advance file descriptor to 8 byte alignment and return position.""" pos = fd.tell() r = pos % 8 if r == 0: return pos fd.write(b'#' * (8 - r)) return pos + 8 - r def writeint(fd, n, pos=None): """Write 64 bit integer n at pos or current position.""" if pos is not None: fd.seek(pos) np.array(n, np.int64).tofile(fd) class Writer: def __init__(self, fd, mode='w', data=None): """Create writer object. The data dictionary holds: * data for type bool, int, float, complex and str * shape and dtype for ndarrays * class names for other objects These other objects must have a write() method and a static read() method.""" assert np.little_endian if data is None: data = {} if mode == 'w': self.nitems = 0 self.itemoffsets = 32 self.offsets = np.array([-1], np.int64) fd = open(fd, 'wb') # Write file format identifier: fd.write(b'IOASE...') np.array([VERSION, self.nitems, self.itemoffsets], np.int64).tofile(fd) self.offsets.tofile(fd) elif mode == 'a': fd = open(fd, 'r+b') version, self.nitems, self.itemoffsets, offsets = \ read_header(fd) assert version == VERSION n = 1 while self.nitems > n: n *= N1 padding = np.zeros(n - self.nitems, np.int64) self.offsets = np.concatenate((offsets, padding)) fd.seek(0, 2) else: 2 / 0 self.fd = fd self.data = data # Shape and dtype of array being filled: self.shape = (0,) self.dtype = None def add_array(self, name, shape, dtype=float, delayed_read=True): if isinstance(shape, int): shape = (shape,) i = align(self.fd) self.data[name] = {'_type': 'numpy.ndarray', 'shape': shape, 'dtype': np.dtype(dtype).name, 'offset': i} if delayed_read: self.data[name]['_delayed'] = True assert self.shape[0] == 0, 'last array not done' self.dtype = dtype self.shape = shape def fill(self, a): assert a.dtype == self.dtype if a.shape[1:] == self.shape[1:]: assert a.shape[0] <= self.shape[0] self.shape = (self.shape[0] - a.shape[0],) + self.shape[1:] else: assert a.shape == self.shape[1:] self.shape = (self.shape[0] - 1,) + self.shape[1:] assert self.shape[0] >= 0 a.tofile(self.fd) def sync(self): """Write data dictionary. Write bool, int, float, complex and str data, shapes and dtypes for ndarrays and class names for other objects.""" assert self.shape[0] == 0 i = self.fd.tell() s = encode(self.data).encode() writeint(self.fd, len(s)) self.fd.write(s) n = len(self.offsets) if self.nitems >= n: offsets = np.zeros(n * N1, np.int64) offsets[:n] = self.offsets self.itemoffsets = align(self.fd) offsets.tofile(self.fd) writeint(self.fd, self.itemoffsets, 24) self.offsets = offsets self.offsets[self.nitems] = i writeint(self.fd, i, self.itemoffsets + self.nitems * 8) self.nitems += 1 writeint(self.fd, self.nitems, 16) self.fd.flush() self.fd.seek(0, 2) # end of file self.data = {} def write(self, **kwargs): """Write data. Use:: writer.write(n=7, s='abc', a=np.zeros(3), density=density). """ for name, value in kwargs.items(): if isinstance(value, (bool, int, float, complex, dict, list, tuple, str)): self.data[name] = value elif isinstance(value, np.ndarray): self.add_array(name, value.shape, value.dtype, delayed_read=False) self.fill(value) else: self.data[name] = {'_type': value.__module__ + '.' + value.__class__.__name__} writer = Writer(self.fd, data=self.data[name]) value.write(writer) def close(self): self.sync() self.fd.close() def read_header(fd): fd.seek(0) assert fd.read(8) == b'IOASE...' version, nitems, itemoffsets = np.fromfile(fd, np.int64, 3) fd.seek(itemoffsets) offsets = np.fromfile(fd, np.int64, nitems) return version, nitems, itemoffsets, offsets class Reader: def __init__(self, fd, item=0, data=None): """Create hierarchy of readers. Store data as attributes for easy access and to allow tab-completion.""" assert np.little_endian if isinstance(fd, str): fd = open(fd, 'rb') self.fd = fd if data is None: self.version, self.nitems, self.itemoffsets, self.offsets = \ read_header(fd) data = self._read_data(item) for name, value in data.items(): if isinstance(value, dict) and '_type' in value: if value['_type'] == 'numpy.ndarray': read_now = '_delayed' not in value value = NDArrayReader(fd, value['shape'], np.dtype(value['dtype']), value['offset']) if read_now: value = value.read() else: value = Reader(self.fd, data=value) data[name] = value self.data = data def __dir__(self): return self.data.keys() def __getattr__(self, attr): value = self.data[attr] if isinstance(value, NDArrayReader): return value.read() return value def proxy(self, name): value = self.data[name] assert isinstance(value, NDArrayReader) return value def __len__(self): return self.nitems def _read_data(self, item): self.fd.seek(self.offsets[item]) size = np.fromfile(self.fd, np.int64, 1)[0] data = decode(self.fd.read(size).decode()) return data def __getitem__(self, i): data = self._read_data(i) return Reader(self.fd, data=data) read = Reader write = Writer class NDArrayReader: def __init__(self, fd, shape, dtype, offset): self.fd = fd self.shape = tuple(shape) self.dtype = dtype self.offset = offset self.ndim = len(self.shape) self.itemsize = dtype.itemsize self.size = np.prod(self.shape) self.nbytes = self.size * self.itemsize def __len__(self): return self.shape[0] def read(self): return self[:] def __getitem__(self, i): if isinstance(i, int): return self[i:i + 1][0] start, stop, step = i.indices(len(self)) offset = self.offset + start * self.nbytes // len(self) self.fd.seek(offset) count = (stop - start) * self.size // len(self) a = np.fromfile(self.fd, self.dtype, count) a.shape = (-1,) + self.shape[1:] if step != 1: return a[::step].copy() return a def main(): args = sys.argv[1:] r = Reader(args[0]) exec('x = ' + rags[1]) # csv for 2d ... print(x) if __name__ == '__main__': class A: def write(self, writer): writer.write(x=np.ones((2, 3))) @staticmethod def read(reader): a = A() a.x = reader.x return a w = Writer('a.ioase') w.write(a=A(), y=9) w.write(s='abc') w.sync() w.write(s='abc2') w.sync() w.write(s='abc3', z=np.ones(7, int)) w.close() print(w.data) r = Reader('a.ioase') print(r.y, r.s) print(A.read(r.a).x) print(r.a.x) print(r[1].s) print(r[2].s) print(r[2].z) w = Writer('a.ioase', 'a') print(w.nitems, w.offsets) w.write(d={'h': [1, 'asdf']}) w.add_array('psi', (4, 3)) w.fill(np.ones((1, 3))) w.fill(np.ones((1, 3)) * 2) w.fill(np.ones((2, 3)) * 3) w.close() print(Reader('a.ioase', 3).d) print(Reader('a.ioase')[2].z) print(Reader('a.ioase', 3).proxy('psi')[0:3]) gpaw-24.1.0/doc/devel/setups.rst000066400000000000000000000034261454550013000164570ustar00rootroot00000000000000Atomic PAW setups ================= .. _setup_matrix_elements_nabla: Calculating matrix elements of nabla ------------------------------------ This integral is needed for LrTDDFT and response function related quantities: .. math:: \langle\phi_i|\mathbf\nabla|\phi_{i'}\rangle - \langle\tilde\phi_i|\mathbf\nabla|\tilde\phi_{i'}\rangle, where `|\phi_i\rangle = \phi_i(\mathbf r) = \phi_j(r)Y_{\ell m}(\hat{\mathbf r})`, and `|\tilde\phi_i\rangle = \tilde\phi_i(\mathbf r) = \tilde\phi_j(r)Y_{\ell m}(\hat{\mathbf r})`. .. math:: \langle\phi_i|\mathbf\nabla|\phi_{i'}\rangle = \langle\phi_i|\frac{\partial}{\partial r}(\phi_{j'}/r^{\ell'}) \frac{\partial r}{\partial \mathbf r} r^{\ell'}Y_{\ell'm'}\rangle + \langle\phi_i|\frac{\phi_{j'}}{r^{\ell'}} \mathbf\nabla(r^{\ell'}Y_{\ell'm'})\rangle. Since we use real-valued spherical harmonics, we have: .. math:: \frac{\partial r}{\partial \mathbf r}= \hat{\mathbf r}=(x/r,y/r,z/r)= \sqrt{\frac{4\pi}{3}}(Y_{1m_x},Y_{1m_y},Y_{1m_z}). Splitting the integral in radial and angular parts, we get: .. math:: \langle\phi_i|\frac{\partial}{\partial x}|\phi_{i'}\rangle = \sqrt{\frac{4\pi}{3}} \int r^2dr \phi_j\frac{\partial}{\partial r}(\phi_{j'}/r^{\ell'})r^{\ell'} G_{1m_x,\ell'm'}^{\ell m} + \int r^2dr \phi_j\phi_{j'}/r \int d\hat{\mathbf r} Y_{\ell m}r^{1-\ell'}\frac{\partial}{\partial x} (r^{\ell'}Y_{\ell'm'}), where `G_{\ell m,\ell'm'}^{\ell''m''}` are Gaunt coefficents calculated with the :func:`~gpaw.gaunt.gaunt` function and the last angular integral has been calculated with the :func:`~gpaw.gaunt.nabla` function. .. autofunction:: gpaw.gaunt.gaunt .. autofunction:: gpaw.gaunt.nabla More stuff ---------- .. autoclass:: gpaw.setup.Setup :members: .. autoclass:: gpaw.setup.Setups :members: gpaw-24.1.0/doc/devel/symmetry.rst000066400000000000000000000054071454550013000170260ustar00rootroot00000000000000Symmetry ======== Let `\mathbf A^T=(\mathbf a_0,\mathbf a_1, \mathbf a_2)`, where `\mathbf a_0`, `\mathbf a_1` and `\mathbf a_2` are the lattice vectors of the unit cell. .. note:: `(\mathbf a_c)_v=\mathbf A_{cv}` is stored in ``gd.cell_cv[c, v]`` in units of Bohr and in ``atoms.cell[c, v]`` in Å units. The relation between scaled positions `\mathbf s` and xyz-positions `\mathbf r` is `\mathbf r=\mathbf A^T\mathbf s`. A crystal has a set of symmetry operations (``symmetry.op_scc``) in the form of matrices `\mathbf U` so that the lattice vectors are transformed to `\mathbf A'=\mathbf U\mathbf A` and `\mathbf r` is transformed to `\mathbf r'` as: .. math:: \mathbf r'= \mathbf A'^T\mathbf s= \mathbf A^T\mathbf U^T\mathbf s= \mathbf A^T\mathbf U^T\mathbf A^{-T}\mathbf r= \mathbf M\mathbf r, where `\mathbf M=\mathbf A^T\mathbf U^T\mathbf A^{-T}`. If we want to express `\mathbf r'` in terms of the original lattice vectors (`\mathbf r'=\mathbf A^T\mathbf s'`), we get: .. math:: \mathbf s' = \mathbf U^T\mathbf s. .. note:: The `\mathbf U` matrices contain only the integers -1, 0 and 1. Also note, that if `\mathbf U` is a symmetry operation, then `\mathbf U^{-1}` is too. Let `\tilde\psi_{\mathbf k}(\mathbf r)` be a Bloch wave function and `\mathbf R` any Bravais lattice vector: .. math:: \tilde\psi_{\mathbf k}(\mathbf r+\mathbf R)= e^{i\mathbf k^T\mathbf R}\tilde\psi_{\mathbf k}(\mathbf r). Transforming `\tilde\psi_{\mathbf k}` with our symmetry operation, we get `\tilde\psi'_{\mathbf k'}(\mathbf r)=\tilde\psi_{\mathbf k}(\mathbf M\mathbf r)` and: .. math:: \tilde\psi'_{\mathbf k'}(\mathbf r+\mathbf R)= \tilde\psi_{\mathbf k}(\mathbf M\mathbf r+\mathbf M\mathbf R)= e^{i\mathbf k^T\mathbf M\mathbf R} \tilde\psi_{\mathbf k}(\mathbf M\mathbf r)= e^{i\mathbf k^T\mathbf M\mathbf R} \tilde\psi'_{\mathbf k'}(\mathbf r). From this equation it is seen that `\mathbf k'=\mathbf M^T\mathbf k`. In terms of scaled k-points `\mathbf q`, where: .. math:: \mathbf k=2\pi\mathbf A^{-1}\mathbf q, we get `\mathbf q'=\mathbf U\mathbf q`. Besides cystal symmetry, there is also time reversal symmetry for all systems with no magnetic field. The wavefunction for `{\mathbf k}` and `{-\mathbf k}` is related as: .. math:: \tilde\psi_{-\mathbf k}(\mathbf r) = \tilde\psi^{\ast}_{\mathbf k}(\mathbf r) If in addition the crystal has inversion symmetry, then the wavefunction should satisfy: .. math:: \tilde\psi_{\mathbf k}(\mathbf r) = \tilde\psi_{-\mathbf k}(-\mathbf r) = \tilde\psi^{\ast}_{\mathbf k}(-\mathbf r) .. note:: Time reversal symmetry operation is not included in ``symmetry.op_scc``. Details of the symmetry object ------------------------------ .. autoclass:: gpaw.symmetry.Symmetry :members: gpaw-24.1.0/doc/devel/technology.rst000066400000000000000000000030001454550013000172730ustar00rootroot00000000000000.. _technology: ========== Technology ========== List of important stuff: ================= ====================================================== Python_ An object oriented, interpreted language. `C`_ A compiled language. reStructuredText_ This document is written using reStructuredText_. BLAS_ Basic Linear Algebra Subroutines LAPACK_ Linear Algebra PACKage `NumPy`_ Numeric Python provides array manipulation and computational capabilities similar to those found in IDL, Matlab, or Octave. distutils_ A suite of standard distribution utilities for Python MPI_ Message Passing Interface BLACS_ Basic Linear Algebra Communication Subprograms ScaLAPACK_ Scalable LAPACK ================= ====================================================== .. _Python: https://www.python.org .. _C: http://www.open-std.org/jtc1/sc22/open/n2794/n2794.pdf .. _reStructuredText: http://docutils.sourceforge.net/rst.html .. _docutils: http://docutils.sourceforge.net .. _BLAS: http://www.netlib.org/blas .. _LAPACK: http://www.netlib.org/lapack .. _NumPy: http://www.numpy.org .. _distutils: https://docs.python.org/library/distutils.html .. _MPI: http://www.mpi-forum.org .. _FFTW: http://www.fftw.org .. _BLACS: http://www.netlib.org/blacs .. _ScaLAPACK: http://www.netlib.org/scalapack/scalapack_home.html gpaw-24.1.0/doc/devel/testing.rst000066400000000000000000000126431454550013000166120ustar00rootroot00000000000000.. _testing: ============ Testing GPAW ============ Testing of gpaw is done by a nightly test suite consisting of many small and quick tests (with pytest) and by a weekly set of larger tests. Test suite with pytest ====================== The test suite consists of a large number of small and quick tests found in the :git:`gpaw/test/` directory. The tests run nightly in serial and in parallel modes. Running tests in serial mode ---------------------------- Use pytest_ to run the tests:: $ pytest --pyargs gpaw -v To speed up the test suite, use pytest-xdist_ to use multiple processes to run multiple tests at the same time (note: each test is still run in serial mode):: $ pytest --pyargs gpaw -v -n Please report errors to the ``gpaw-users`` mailing list so that we can fix them (see :ref:`mail list`). .. _pytest: http://doc.pytest.org/en/latest/contents.html .. _pytest-xdist: https://github.com/pytest-dev/pytest-xdist Running tests in parallel mode ------------------------------ In order to run the tests with MPI parallelization, do this:: $ mpiexec -n pytest --pyargs gpaw -v The tests should pass with 1, 2, 4, and 8 parallel tasks. .. hint:: If you observe issues (e.g. segmentation faults) when trying to run pytest, try this instead:: $ mpiexec -n gpaw python -m pytest --pyargs gpaw -v This should ensure that the correct environment is used. Please report also parallel errors to the mailing list so that we can fix them (see :ref:`mail list`). Running a subset of tests ------------------------- There are multiple options for running only a subset of test. 1. Use markers to run tests with that mark, for example CI tests:: $ pytest --pyargs gpaw -v -m ci 2. Use module path to run tests in that path:: $ pytest --pyargs gpaw.test.lcao -v 3. Use file/directory path to run tests in that path:: $ pytest /root/of/gpaw/git/clone/gpaw/test/lcao Special fixtures and marks -------------------------- .. highlight:: python Tests that should only run in serial can be marked like this:: import pytest @pytest.mark.serial def test_something(): ... There are two special GPAW-fixtures: .. autofunction:: gpaw.test.conftest.in_tmp_dir .. autofunction:: gpaw.test.conftest.add_cwd_to_setup_paths .. autofunction:: gpaw.test.conftest.gpw_files Check the :git:`~gpaw/test/conftest.py` to see which gpw-files are available. Use a ``_wfs`` post-fix to get a gpw-file that contains the wave functions. .. autofunction:: gpaw.test.findpeak Adding new tests ---------------- A test script should fulfill a number of requirements: * It should be quick. Preferably not more than a few milliseconds. If the test takes several minutes or more, consider making the test a :ref:`big test `. * It should not depend on other scripts. * It should be possible to run it on 1, 2, 4, and 8 cores. A test can produce standard output and files - it doesn't have to clean up. Just add the ``in_tmp_dir`` fixture as an argument:: def test_something(in_tmp_dir): # make a mess ... Here is a parametrized test that uses :func:`pytest.approx` for comparing floating point numbers:: import pytest @pytest.mark.parametrize('x', [1.0, 1.5, 2.0]) def test_sqr(x): assert x**2 == pytest.approx(x * x) .. _big-test: .. _agts: Big tests ========= The directories in :git:`gpaw/test/big/` and :git:`doc/tutorialsexercises/` contain longer and more realistic tests that we run every weekend. These are submitted to a queuing system of a large computer. The scripts in the :git:`doc` folder are used both for testing GPAW and for generating up to date figures and CSV-file for inclusion in the documentation web-pages. Adding new tests ---------------- To add a new test, create a script somewhere in the file hierarchy ending with ``agts.py`` (e.g. ``submit.agts.py`` or just ``agts.py``). ``AGTS`` is short for Advanced GPAW Test System (or Another Great Time Sink). This script defines how a number of scripts should be submitted to Niflheim and how they depend on each other. Consider an example where one script, ``calculate.py``, calculates something and saves a ``.gpw`` file and another script, ``analyse.py``, analyses this output. Then the submit script should look something like:: def workflow(): from myqueue.workflow import run with run(script='calculate.py', cores=8, tmax='25m'): run(script='analyse.py') # 1 core and 10 minutes As shown, this script has to contain the definition of the function workflow_. Start the workflow with ``mq workflow -p agts.py .`` (see https://myqueue.readthedocs.io/ for more details). Scripts that generate figures or test files for inclusion in the GPAW web-pages should start with a special ``# web-page:`` comment like this:: # web-page: fig1.png, table1.csv ... # code that creates fig1.png and table1.csv ... .. _workflow: https://myqueue.readthedocs.io/en/latest/ workflows.html .. _code coverage: Code coverage ============= We use the coverage_ tool to generate a `coverage report`_ every night. It is not 100% accurate because it does not include coverage from running our test suite in parallel. Also not included are the :ref:`agts` and building this web-page which would add some extra coverage. .. _coverage: https://coverage.readthedocs.io/ .. _coverage report: https://wiki.fysik.dtu.dk/gpaw/htmlcov/index.html gpaw-24.1.0/doc/devel/turn_off_things.rst000066400000000000000000000016551454550013000203340ustar00rootroot00000000000000How to turn off things (and make development and debugging simpler) =================================================================== When developing new features or debugging it can be an advantage to simplify the problem by turning things off. No PAW corrections ------------------ * For hydrogen we have an "all-electron" potential (bare Coulomb potential). Use ``setups='ae'``. See :ref:`ae hydrogen atom` and :git:`gpaw/ae.py`. * For aluminium we have the Appelbaum-Hamann local pseudo potential. Use ``setups='ah'``. See :git:`gpaw/test/pseudopotential/test_ah.py` and :git:`gpaw/ah.py`. * For other elements we have norm-conserving non-local pseudo-potentials: :ref:`manual_setups`. No XC functional ---------------- Use ``xc={'name': 'null'}``. No Coulomb interactions ----------------------- Use ``poissonsolver=NoInteractionPoissonSolver()`` (and ``from gpaw.poisson import NoInteractionPoissonSolver``). gpaw-24.1.0/doc/devel/wavefunctions.rst000066400000000000000000000007331454550013000200250ustar00rootroot00000000000000Wave functions ============== .. autoclass:: gpaw.wavefunctions.base.WaveFunctions :members: .. autoclass:: gpaw.wavefunctions.fd.FDWaveFunctions :members: .. autoclass:: gpaw.wavefunctions.pw.PWWaveFunctions :members: .. autoclass:: gpaw.wavefunctions.pw.PW :members: .. autoclass:: gpaw.wavefunctions.lcao.LCAOWaveFunctions :members: .. autoclass:: gpaw.kpoint.KPoint :members: .. autoclass:: gpaw.eigensolvers.eigensolver.Eigensolver :members: gpaw-24.1.0/doc/devel/workflow.rst000066400000000000000000000103031454550013000167760ustar00rootroot00000000000000.. _development workflow: ==================== Development workflow ==================== .. _ASE: https://wiki.fysik.dtu.dk/ase/ .. _NumPy: http://docs.scipy.org/doc/numpy/reference/ .. _SciPy: http://docs.scipy.org/doc/scipy/reference/ .. _venv: https://docs.python.org/3/library/venv.html#module-venv .. _pip: https://pip.pypa.io/ .. _git: https://git-scm.com/ .. _GitLab issues: https://gitlab.com/gpaw/gpaw/issues .. _pytest: https://docs.pytest.org/en/6.2.x/ .. contents:: .. seealso:: * :ref:`writing documentation` * :ref:`testing` Setting up your development environment ======================================= Make a `virtual environment `_:: $ mkdir devel $ cd devel $ unset PYTHONPATH $ python3 -m venv venv $ source venv/bin/activate # venv/bin/ is now first in $PATH $ pip install --upgrade pip Install master branch of ASE_ in *editable* mode:: $ git clone git@gitlab.com:ase/ase $ pip install --editable ase/ Same thing for GPAW:: $ git clone git@gitlab.com:gpaw/gpaw $ echo "noblas = True; nolibxc = True" > gpaw/siteconfig.py $ pip install -e gpaw .. note:: Here we used a simple ``siteconfig.py`` that *should* always work: * ``noblas = True``: Use the BLAS library built into NumPy_ (usually OpenBLAS). * ``nolibxc = True``: Use GPAW's own XC-functionals (only LDA, PBE, revPBE, RPBE and PW91). See :ref:`siteconfig` for details. Download PAW datasets:: $ gpaw install-data --register ~/PAWDATA Run the tests ============= The test-suite can be found in :git:`gpaw/test/`. Run it like this:: $ pip install pytest-xdist $ cd gpaw $ pytest -n4 And with MPI (2, 4 and 8 cores):: $ mpiexec -n 2 pytest .. warning:: This will take forever! It's a good idea to learn and master pytest_'s command-line options for selecting the subset of all the tests that are relevant. Creating a merge request ======================== Request to become a member of the ``gpaw`` project on GitLab `here `__. This will allow you to push branches to the central repository (see below). Create a branch for your changes:: $ cd gpaw $ git switch -c fix-something .. note:: ``git switch -c fix-something`` is the same as any of these: * ``git branch fix-something && git switch fix-something`` * ``git branch fix-something && git checkout fix-something`` * ``git checkout -b fix-something`` :xkcd:`More git-tricks <1597>`. Make some changes and commit:: $ git add file1 file2 ... $ git commit -m "Short summary of changes" Push your branch to GitLab:: $ git push --set-upstream origin fix-something and click the link to create a merge-request (MR). Mark the MR as DRAFT to signal that it is work-in-progress and remove the DRAFT-marker once the MR is ready for code review. Every time you push your local repository changes upstream to the remote repository, you will trigger a continuous integration (CI) runner on the GitLab servers. The script that runs in CI is :git:`.gitlab-ci.yml`. Here is a short summary of what happens in CI: * install the code * ``pytest -m ci``: small selection of fast tests * ``mypy -p gpaw``: `Static code analysis`_ (type hints) * ``flake8``: pyflakes + pycodestyle (pep8) = flake8_ If CI fails, you will have to fix things and push your changes. It's a good idea to also run the CI-checks locally:: $ pip install flake8 mypy $ flake8 ... $ mypy ... $ pytest ... $ # fix things $ git add ... $ git commit ... $ git push # Git now knows your upstream .. tip:: You can use ``git push -i ci.skip`` if you want to skip CI. .. _Static code analysis: https://mypy.readthedocs.io/en/stable/ .. _flake8: https://flake8.pycqa.org/en/latest/ How to write a good MR ====================== A good MR * is short * does one thing * is not too old For MRs with code changes: * make sure there is a test that covers the new/fixed code * make sure all variable and functions have descriptive names. * remember docstrings - if needed (no need for an ``add_numbers()`` function to have an ``"""Add numbers."""`` docstring). For MRs with documentation changes, build the HTML-pages and make sure everything looks OK:: $ pip install sphinx-rtd-theme $ cd gpaw/doc $ make $ make browse gpaw-24.1.0/doc/devel/writing_documentation.rst000066400000000000000000000052741454550013000215530ustar00rootroot00000000000000.. _writing documentation: ===================== Writing documentation ===================== .. highlight:: bash We use the Sphinx_ tool to generate the GPAW documentation. First, you should take a look at the documentation for Sphinx_ and reStructuredText_. Also, read carefully the :ref:`Writing documentation for ASE ` page. .. _reStructuredText: http://docutils.sf.net/rst.html .. _Sphinx: http://www.sphinx-doc.org **Structure** When writing documentation easy accessibility and readability is key. To that end the documentation is split into several parts: :ref:`Documentation/Basic usage `: This part contains basic usage instructions for GPAW, including references to parameters for the GPAW calculator object. This part should not contain extended examples, theory or code references. :ref:`Documentation/Advanced topics `: This part contains explanations of the various features of GPAW. The focus here is on implementation specific information, not theory, as well as code references. :ref:`Documentation/Theory `: This is the place for theoretical descriptions of methods used in the code. Reference to literature should be given. :ref:`Tutorials and Exercises `: As the name suggests, this is the heading for any worked out examples, tutorials and exercises. Entries are further sorted into fields of physics or application. One should always the different pages relating to one topic for easy navigation between theory, implementation and example sections. **Getting started** If you don't already have your own copy of the GPAW package, then see :ref:`development workflow` for how to set up your invironment. You will also need to ``pip install sphinx-rtd-theme``. Then :command:`cd` to the :file:`doc` directory and build the html-pages:: $ cd ~/gpaw/doc $ make Make your changes to the ``.rst`` files, run the :command:`make` command again, check the results and if things looks ok, commit:: $ emacs index.rst $ make $ make browse $ git add index.rst $ git commit -m "..." **Adding figures and tables** We don't want to have png and csv files committed to Git. Instead, you should add the Python scripts that generate the figures and table data so that we can always generate them again if needed. For quick scripts (no more than 5 seconds), see :ref:`ase:generated`. For more expensive scripts you can use :ref:`AGTS ` for running long jobs that create figures or table data for this web-page. For an example, look at the source code :git:`here ` which will produce this: :ref:`stm tutorial`. .. automodule:: gpaw.doctools.aamath .. autoclass:: gpaw.typing.Vector gpaw-24.1.0/doc/documentation/000077500000000000000000000000001454550013000161475ustar00rootroot00000000000000gpaw-24.1.0/doc/documentation/acf_example.py000066400000000000000000000013041454550013000207630ustar00rootroot00000000000000# creates: acf_example.png import numpy as np import matplotlib.pyplot as plt from gpaw.core import UGDesc alpha = 4.0 rcut = 2.0 l = 0 gauss = (l, rcut, lambda r: (4 * np.pi)**0.5 * np.exp(-alpha * r**2)) grid = UGDesc(cell=[4.0, 2.0, 2.0], size=[40, 20, 20]) pos = [[0.25, 0.5, 0.5], [0.75, 0.5, 0.5]] acf_aR = grid.atom_centered_functions([[gauss], [gauss]], pos) coef_as = acf_aR.empty(dims=(2,)) coef_as[0] = [[1], [-1]] coef_as[1] = [[2], [1]] print(coef_as.data, coef_as[0]) f_sR = grid.zeros(2) acf_aR.add_to(f_sR, coef_as) x = grid.xyz()[:, 10, 10, 0] y1, y2 = f_sR.data[:, :, 10, 10] ax = plt.subplot(1, 1, 1) ax.plot(x, y1, 'o-') ax.plot(x, y2, 'x-') # plt.show() plt.savefig('acf_example.png') gpaw-24.1.0/doc/documentation/advanced.rst000066400000000000000000000007121454550013000204460ustar00rootroot00000000000000.. _advanced: Advanced topics --------------- Here is a list of specific advanced topics and functionalities of the GPAW calculator: .. toctree:: :maxdepth: 2 soc/soc poisson electrodynamics/electrodynamics cdft/cdft dscf/dscf dcdft/dcdft xc/exx external grids hyperfine lcao/lcao mom/mom do-gmf/do-gmf smearing ofdft/ofdft xc/rpa scissors/scissors xc/xc custom_convergence pm/pm sic/sic gpaw-24.1.0/doc/documentation/basic.rst000066400000000000000000001216001454550013000177620ustar00rootroot00000000000000.. _basic: ============ Introduction ============ GPAW calculations are controlled through scripts written in the programming language Python_. GPAW relies on the `Atomic Simulation Environment`_ (ASE), which is a Python package that helps us describe our atoms. The ASE package also handles molecular dynamics, analysis, visualization, geometry optimization and more. If you don't know anything about ASE, then it might be a good idea to familiarize yourself with it before continuing (at least read the :ref:`ase:about` section). Below, there will be Python code examples starting with ``>>>`` (and ``...`` for continuation lines). It is a good idea to start the Python interpreter and try some of the examples below. .. _Python: http://www.python.org .. _Atomic Simulation Environment: https://wiki.fysik.dtu.dk/ase The units used by the GPAW calculator correspond to the :mod:`ASE conventions `, most importantly electron volts and angstroms. ----------------------- Doing a PAW calculation ----------------------- To do a PAW calculation with the GPAW code, you need an ASE :class:`~ase.Atoms` object and a :class:`~gpaw.calculator.GPAW` calculator:: _____________ ____________ | | | | | Atoms |------->| GPAW | | | | | |_____________| |____________| atoms calc In Python code, it looks like this: .. literalinclude:: h2.py :start-after: creates If the above code was executed, a calculation for a single `\rm{H}_2` molecule would be started. The calculation would be done using a supercell of size `6.0 \times 6.0 \times 6.0` Å with cluster boundary conditions. The parameters for the PAW calculation are: * 2 electronic bands. * Local density approximation (LDA)\ [#LDA]_ for the exchange-correlation functional. * Spin-paired calculation. * `32 \times 32 \times 32` grid points. The values of these parameters can be found in the text output file: :download:`h2.txt`. The calculator will try to make sensible choices for all parameters that the user does not specify. Specifying parameters can be done like this: >>> calc = GPAW(mode='fd', ... nbands=1, ... xc='PBE', ... gpts=(24, 24, 24)) Here, we want to use one electronic band, the Perdew, Burke, Ernzerhof (PBE)\ [#PBE]_ exchange-correlation functional and 24 grid points in each direction. ---------- Parameters ---------- The complete list of all possible parameters and their defaults is shown below. A detailed description of the individual parameters is given in the following sections. .. list-table:: :header-rows: 1 :widths: 1 1 1 2 * - keyword - type - default value - description * - ``basis`` - ``str`` or dict - ``{}`` - Specification of :ref:`manual_basis` * - ``charge`` - ``float`` - ``0`` - Total :ref:`manual_charge` of the system * - ``communicator`` - Object - - :ref:`manual_communicator` * - ``convergence`` - ``dict`` - - :ref:`manual_convergence` * - ``eigensolver`` - ``str`` - ``'dav'`` - :ref:`manual_eigensolver` * - ``external`` - Object - - :ref:`manual_external` * - ``gpts`` - *seq* - - :ref:`manual_gpts` * - ``h`` - ``float`` - ``0.2`` - :ref:`manual_h` * - ``hund`` - ``bool`` - ``False`` - :ref:`Use Hund's rule ` * - ``kpts`` - *seq* - `\Gamma`-point - :ref:`manual_kpts` * - ``maxiter`` - ``int`` - ``333`` - :ref:`manual_maxiter` * - ``mixer`` - Object - - Pulay :ref:`manual_mixer` scheme * - ``mode`` - ``str`` or ``dict`` - - :ref:`manual_mode` * - ``nbands`` - ``int`` - - :ref:`manual_nbands` * - ``occupations`` - occ. obj. - - :ref:`manual_occ` * - ``parallel`` - ``dict`` - - :ref:`manual_parallel` * - ``poissonsolver`` - Object - - Specification of :ref:`Poisson solver ` or :ref:`dipole correction ` or :ref:`Advanced Poisson solver ` * - ``random`` - ``bool`` - ``False`` - Use random numbers for :ref:`manual_random` * - ``setups`` - ``str`` or ``dict`` - ``'paw'`` - :ref:`manual_setups` * - ``spinpol`` - ``bool`` - - :ref:`manual_spinpol` * - ``symmetry`` - ``dict`` - ``{}`` - :ref:`manual_symmetry` * - ``txt`` - ``str``, None, or file obj. - ``'-'`` (``sys.stdout``) - :ref:`manual_txt` * - ``xc`` - ``str`` - ``'LDA'`` - :ref:`manual_xc` *seq*: A sequence of three ``int``'s. .. note:: Parameters can be changed after the calculator has been constructed by using the :meth:`~gpaw.calculator.GPAW.set` method: >>> calc.set(txt='H2.txt', charge=1) This would send all output to a file named :file:`'H2.txt'`, and the calculation will be done with one electron removed. Deprecated keywords (in favour of the ``parallel`` keyword) include: ================= ========= ============= ============================ keyword type default value description ================= ========= ============= ============================ ``parsize`` *seq* Parallel :ref:`manual_parsize_domain` ``parsize_bands`` ``int`` ``1`` :ref:`manual_parsize_bands` ================= ========= ============= ============================ .. _manual_mode: Plane-wave, LCAO or Finite-difference mode? ------------------------------------------- Plane-waves: Expand the wave functions in plane-waves. Use ``mode='pw'`` if you want to use the default plane-wave cutoff of `E_{\text{cut}}=340` eV. The plane-waves will be those with `|\mathbf G+\mathbf k|^2/2` parameter for a DFT calculation. For now, users will get a warning when finite-difference mode is implicitly chosen. Please change your scripts to avoid this error/warning. Comparing PW, LCAO and FD modes ``````````````````````````````` Memory consumption: With LCAO, you have fewer degrees of freedom so memory usage is low. PW mode uses more memory and FD a lot more. Speed: For small systems with many **k**-points, PW mode beats everything else. For larger systems LCAO will be most efficient. Whereas PW beats FD for smallish systems, the opposite is true for large systems where FD will parallelize better. Absolute convergence: With LCAO, it can be hard to reach the complete basis set limit and get absolute convergence of energies, whereas with FD and PW mode it is quite easy to do by decreasing the grid spacing or increasing the plane-wave cutoff energy, respectively. Eggbox errors: With LCAO and FD mode you will get a small eggbox error: you get a small periodic energy variation as you translate atoms and the period of the variation will be equal to the grid-spacing used. GPAW's PW implementation doesn't have this problem. Features: FD mode is the oldest and has most features. Only PW mode can be used for calculating the stress-tensor and for response function calculations. Features in FD, LCAO and PW modes ````````````````````````````````` Some features are not available in all modes. Here is a (possibly incomplete) table of the features: .. list-table:: * - mode - FD - LCAO - PW * - GPU ground state calculations - (experimental) - - + * - Time-propergation TDDFT - + - + - * - Dielectric function - - - + * - Casida equation - + - - * - Hybrid functionals - (no forces, no **k**-points) - - + * - Stress tensor - - - + * - GW - - - + * - BSE - - - + * - Direct orbital optimization (generalized mode following) - - + - * - Non-collinear spin - - - + * - Solvent models - + - - * - MGGA - + - - + * - Constrained DFT - + - - * - Ehrenfest - + - - * - Spin-spirals - - - + .. _manual_nbands: Number of electronic bands -------------------------- This parameter determines how many bands are included in the calculation for each spin. For example, for spin-unpolarized system with 10 valence electrons ``nbands=5`` would include all the occupied states. In 10 valence electron spin-polarized system with magnetic moment of 2 a minimum of ``nbands=6`` is needed (6 occupied bands for spin-up, 4 occupied bands and 2 empty bands for spin down). The default number of electronic bands (``nbands``) is equal to 4 plus 1.2 times the number of occupied bands. For systems with the occupied states well separated from the unoccupied states, one could use just the number of bands needed to hold the occupied states. For metals, more bands are needed. Sometimes, adding more unoccupied bands will improve convergence. .. tip:: ``nbands=0`` will give zero empty bands, and ``nbands=-n`` will give ``n`` empty bands. .. tip:: ``nbands='n%'`` will give ``n/100`` times the number of occupied bands. .. tip:: ``nbands='nao'`` will use the same number of bands as there are atomic orbitals. This corresponds to the maximum ``nbands`` value that can be used in LCAO mode. .. _manual_xc: Exchange-Correlation functional ------------------------------- Some of the most commonly used exchange-correlation functionals are listed below. ============ =========================== =========================== ========== ``xc`` full libxc_ keyword description reference ============ =========================== =========================== ========== ``'LDA'`` ``'LDA_X+LDA_C_PW'`` Local density approximation [#LDA]_ ``'PBE'`` ``'GGA_X_PBE+GGA_C_PBE'`` Perdew, Burke, Ernzerhof [#PBE]_ ``'revPBE'`` ``'GGA_X_PBE_R+GGA_C_PBE'`` revised PBE [#revPBE]_ ``'RPBE'`` ``'GGA_X_RPBE+GGA_C_PBE'`` revised revPBE [#RPBE]_ ``'PBE0'`` ``'HYB_GGA_XC_PBEH'`` Known as PBE0 [#PBE0]_ ``'B3LYP'`` ``'HYB_GGA_XC_B3LYP'`` B3LYP (as in Gaussian Inc.) [#B3LYP]_ ============ =========================== =========================== ========== ``'LDA'`` is the default value. The next three ones are of generalized gradient approximation (GGA) type, and the last two are `hybrid functionals `_. For the list of all functionals available in GPAW see :ref:`overview_xc`. GPAW uses the functionals from libxc_ by default (except for LDA, PBE, revPBE, RPBE and PW91 where GPAW's own implementation is used). Keywords are based on the names in the libxc :file:`'xc_funcs.h'` header file (the leading ``'XC_'`` should be removed from those names). You should be able to find the file installed alongside LibXC. Valid keywords are strings or combinations of exchange and correlation string joined by **+** (plus). For example, "the" (most common) LDA approximation in chemistry corresponds to ``'LDA_X+LDA_C_VWN'``. XC functionals can also be specified as dictionaries. This is useful for functionals that depend on one or more parameters. For example, to use a stencil with two nearest neighbours for the density-gradient with the PBE functional, use ``xc={'name': 'PBE', 'stencil': 2}``. The ``stencil`` keyword applies to any GGA or MGGA. Some functionals may take other parameters; see their respective documentation pages. Hybrid functionals (the feature is described at :ref:`exx`) require the setups containing exx information to be generated. Check available setups for the presence of exx information, for example:: $ zcat $GPAW_SETUP_PATH/O.PBE.gz | grep ">> from ase.dft.kpoints import monkhorst_pack >>> kpts = monkhorst_pack((1, 1, 4)) >>> kpts array([[ 0. , 0. , -0.375], [ 0. , 0. , -0.125], [ 0. , 0. , 0.125], [ 0. , 0. , 0.375]]) >>> kpts+=(0,0,0.125) >>> kpts array([[ 0. , 0. , -0.25], [ 0. , 0. , 0. ], [ 0. , 0. , 0.25], [ 0. , 0. , 0.5 ]]) .. _manual_spinpol: Spinpolarized calculation ------------------------- If any of the atoms have magnetic moments, then the calculation will be spin-polarized - otherwise, a spin-paired calculation is carried out. This behavior can be overruled with the ``spinpol`` keyword (``spinpol=True``). .. _manual_gpts: Number of grid points --------------------- The number of grid points to use for the grid representation of the wave functions determines the quality of the calculation. More gridpoints (smaller grid spacing, *h*), gives better convergence of the total energy. For most elements, *h* should be 0.2 Å for reasonable convergence of total energies. If a ``n1`` `\times` ``n2`` `\times` ``n3`` grid is desired, use ``gpts=(n1, n2, n3)``, where ``n1``, ``n2`` and ``n3`` are positive ``int``'s all divisible by four. Alternatively, one can use something like ``h=0.25``, and the program will try to choose a number of grid points that gives approximately a grid-point density of `1/h^3`. For more details, see :ref:`grids`. If you are more used to think in terms of plane waves; a conversion formula between plane wave energy cutoffs and realspace grid spacings have been provided by Briggs *et al.* PRB **54**, 14362 (1996). The conversion can be done like this:: >>> from gpaw.utilities.tools import cutoff2gridspacing, gridspacing2cutoff >>> from ase.units import Rydberg >>> h = cutoff2gridspacing(50 * Rydberg) .. _manual_h: Grid spacing ------------ The parameter ``h`` specifies the grid spacing in Å that has to be used for the realspace representation of the smooth wave functions. Note, that this grid spacing in most cases is approximate as it has to fit to the unit cell (see :ref:`manual_gpts` above). In case you want to specify ``h`` exactly you have to choose the unit cell accordingly. This can be achieved by:: from gpaw.cluster import * d = 0.74 a = 6.0 atoms = Cluster('H2', positions=[(0, 0, 0), (0, 0, d)]) # set the amount of vacuum at least to 4 Å # and ensure a grid spacing of h=0.2 atoms.minimal_box(4., h=.2) .. _manual_symmetry: Use of symmetry --------------- The default behavior is to use all point-group symmetries and time-reversal symmetry to reduce the **k**-points to only those in the irreducible part of the Brillouin-zone. Moving the atoms so that a symmetry is broken will cause an error. This can be avoided by using:: symmetry={'point_group': False} This will reduce the number of applied symmetries to just the time-reversal symmetry (implying that the Hamiltonian is invariant under **k** -> -**k**). For some purposes you might want to have no symmetry reduction of the **k**-points at all (debugging, band-structure calculations, ...). This can be achieved by specifying:: symmetry={'point_group': False, 'time_reversal': False} or simply ``symmetry='off'`` which is a short-hand notation for the same thing. For full control, here are all the available keys of the ``symmetry`` dictionary: ================= ======== =============================== key default description ================= ======== =============================== ``point_group`` ``True`` Use point-group symmetries ``time_reversal`` ``True`` Use time-reversal symmetry ``symmorphic`` ``True`` Use only symmorphic symmetries ``tolerance`` ``1e-7`` Relative tolerance ================= ======== =============================== .. _manual_random: Wave function initialization ---------------------------- By default, a linear combination of atomic orbitals is used as initial guess for the wave functions. If the user wants to calculate more bands than there are precalculated atomic orbitals, random numbers will be used for the remaining bands. .. _manual_occ: Occupation numbers ------------------ The smearing of the occupation numbers is controlled like this:: from gpaw import GPAW calc = GPAW(..., occupations={'name': 'fermi-dirac', 'width': 0.05}, ...) The distribution looks like this (width = `k_B T`): .. math:: f(E) = \frac{1}{1 + \exp[E / (k_B T)]} For calculations with periodic boundary conditions, the default value is 0.1 eV and the total energies are extrapolated to *T* = 0 Kelvin. For a molecule (no periodic boundaries) the default value is ``width=0``, which gives integer occupation numbers. Other distribution functions: * ``{'name': 'marzari-vanderbilt', 'width': ...}`` * ``{'name': 'methfessel-paxton', 'width': ..., 'order': ...}`` For a spin-polarized calculation, one can fix the total magnetic moment at the initial value using:: occupations={'name': ..., 'width': ..., 'fixmagmom': True} .. figure:: occupation_numbers.png Occupation numbers for ``width=0.05`` For fixed occupations numbers use the :class:`gpaw.occupations.FixedOccupationNumbers` class like this:: from gpaw.occupations import FixedOccupationNumbers calc = GPAW(..., occupations=FixedOccupationNumbers([[1, 1, ..., 0, 0], [1, 1, ..., 0, 0]])) See also :ref:`smearing`. .. _manual_lmax: Compensation charges -------------------- The compensation charges are expanded with correct multipoles up to and including `\ell=\ell_{max}`. Default value: ``lmax=2``. .. _manual_charge: Charge ------ The default is charge neutral. The systems total charge may be set in units of the negative electron charge (i.e. ``charge=-1`` means one electron more than the neutral). .. _manual_convergence: Accuracy of the self-consistency cycle -------------------------------------- The ``convergence`` keyword is used to set the convergence criteria for the SCF cycle. The default value is this dictionary:: {'energy': 0.0005, # eV / electron 'density': 1.0e-4, # electrons / electron 'eigenstates': 4.0e-8, # eV^2 / electron 'bands': 'occupied'} In words: * The energy change (last 3 iterations) should be less than 0.5 meV per valence electron. (See :class:`~gpaw.convergence_criteria.Energy`.) * The change in density (integrated absolute value of density change) should be less than 0.0001 electrons per valence electron. (See :class:`~gpaw.convergence_criteria.Density`.) * The integrated value of the square of the residuals of the Kohn-Sham equations should be less than 4.0 `\times` 10\ :sup:`-8` eV\ :sup:`2` per valence electron. This criterion does not affect LCAO calculations. (See :class:`~gpaw.convergence_criteria.Eigenstates`.) * Only the bands that are occupied with electrons are converged. If only a partial dictionary is provided, the remaining criteria will be set to their default values. E.g., ``convergence={'energy': 0.0001}`` will set the convergence criterion of energy to 0.1 meV and place all other criteria at their defaults. Additional keywords, including ``'forces'``, ``'work function'``, and ``'minimum iterations'``, can be set. You can also write your own criteria, and change other things about how the default criteria operate. See :ref:`custom_convergence` for details on additional keywords and customization. As the total energy and charge density depend only on the occupied states, unoccupied states do not contribute to the convergence criteria. However, with the ``'bands'`` set to ``'all'``, it is possible to force convergence also for the unoccupied states. One can also use ``{'bands': 200}`` to converge the lowest 200 bands. One can also write ``{'bands': -10}`` to converge all bands except the last 10. It is often hard to converge the last few bands in a calculation. Finally, one can also use ``{'bands': 'CBM+5.0'}`` to specify that bands up to the conduction band minimum plus 5.0 eV should be converged (for a metal, CBM is taken as the Fermi level). .. _manual_maxiter: Maximum number of SCF-iterations -------------------------------- The calculation will stop with an error if convergence is not reached in ``maxiter`` self-consistent iterations. You can also set a minimum number of iterations by employing a :ref:`custom convergence criterion `. .. _manual_txt: Where to send text output ------------------------- The ``txt`` keyword defaults to the string ``'-'``, which means standard output. One can also give a ``file`` object (anything with a ``write`` method will do). If a string (different from ``'-'``) is passed to the ``txt`` keyword, a file with that name will be opened and used for output. Use ``txt=None`` to disable all text output. .. _manual_mixer: Density mixing -------------- Three parameters determine how GPAW does Pulay mixing of the densities: * ``beta``: linear mixing coefficient * ``nmaxold``: number of old densities to mix * ``weight``: when measuring the change from input to output density, long wavelength changes are weighted ``weight`` times higher than short wavelength changes For small molecules, the best choice is to use ``mixer=Mixer(beta=0.25, nmaxold=3, weight=1.0)``, which is what GPAW will choose if the system has zero-boundary conditions. If your system is a big molecule or a cluster, it is an advantage to use something like ``mixer=Mixer(beta=0.05, nmaxold=5, weight=50.0)``, which is also what GPAW will choose if the system has periodic boundary conditions in one or more directions. In spin-polarized calculations ``MixerDif`` will be used instead of ``Mixer``. See also the documentation on :ref:`density mixing `. .. _manual_fixdensity: Fixed density calculation ------------------------- When calculating band structures or when adding unoccupied states to calculation (and wanting to converge them) it is often useful to use existing density without updating it. This can be done using the :meth:`gpaw.calculator.GPAW.fixed_density` method. This will use the density (e.g. one read from .gpw or existing from previous calculation) throughout the SCF-cycles (so called Harris calculation). .. _manual_setups: PAW datasets or pseudopotentials -------------------------------- The ``setups`` keyword is used to specify the name(s) of the setup files used in the calculation. For a given element ``E``, setup name ``NAME``, and xc-functional 'XC', GPAW looks for the file :file:`E.NAME.XC` or :file:`E.NAME.XC.gz` (in that order) in the setup locations (see :ref:`installation of paw datasets`). Unless ``NAME='paw'``, in which case it will simply look for :file:`E.XC` (or :file:`E.XC.gz`). The ``setups`` keyword can be either a single string, or a dictionary. If specified as a string, the given name is used for all atoms. If specified as a dictionary, each keys can be either a chemical symbol or an atom number. The values state the individual setup names. The special key ``'default'`` can be used to specify the default setup name. Thus ``setups={'default': 'paw'}`` is equivalent to ``setups='paw'`` which is the GPAW default. As an example, the latest PAW setup of Na includes also the 6 semicore p states in the valence, in order to use non-default setup with only the 1 s electron in valence (:file:`Na.1.XC.gz`) one can specify ``setups={'Na': '1'}`` There exist three special names that, if used, do not specify a file name: * ``'ae'`` is used for specifying all-electron mode for an atom. I.e. no PAW or pseudo potential is used. * ``'sg15'`` specifies the `SG15 optimized norm-conserving Vanderbilt pseudopotentials`_ for the PBE functional. These have to be installed separately. Use :command:`gpaw install-data --sg15 {}` to download and unpack the pseudopotentials into :file:`{}/sg15_oncv_upf_{}`. As of now, the SG15 pseudopotentials should still be considered experimental in GPAW. You can plot a UPF pseudopotential by running :file:`gpaw-upfplot {}`. Here, :file:`{}` can be either a direct path to a UPF file or the symbol or identifier to search for in the GPAW setup paths. * ``'hgh'`` is used to specify a norm-conserving Hartwigsen-Goedecker-Hutter pseudopotential (no installation necessary). Some elements have better semicore pseudopotentials. To use those, specify ``'hgh.sc'`` for the elements or atoms in question. * ``'ghost'`` is used to indicated a *ghost* atom in LCAO mode, see :ref:`ghost-atoms`. .. _SG15 optimized norm-conserving Vanderbilt pseudopotentials: http://www.quantum-simulation.org/potentials/sg15_oncv/ If a dictionary contains both chemical element specifications *and* atomic number specifications, the latter is dominant. An example:: setups={'default': 'soft', 'Li': 'hard', 5: 'ghost', 'H': 'ae'} Indicates that the files named 'hard' should be used for lithium atoms, an all-electron potential is used for hydrogen atoms, atom number 5 is a ghost atom (even if it is a Li or H atom), and for all other atoms the files named 'soft' will be used. .. _manual_basis: Atomic basis set ---------------- The ``basis`` keyword can be used to specify the basis set which should be used in LCAO mode. This also affects the LCAO initialization in FD or PW mode, where initial wave functions are constructed by solving the Kohn-Sham equations in the LCAO basis. If ``basis`` is a string, :file:`basis='basisname'`, then GPAW will look for files named :file:`{symbol}.{basisname}.basis` in the setup locations (see :ref:`installation of paw datasets`), where :file:`{symbol}` is taken as the chemical symbol from the ``Atoms`` object. If a non-default setup is used for an element, its name is included as :file:`{symbol}.{setupname}.{basisname}.basis`. If ``basis`` is a dictionary, its keys specify atoms or species while its values are corresponding basis names which work as above. Distinct basis sets can be specified for each atomic species by using the atomic symbol as a key, or for individual atoms by using an ``int`` as a key. In the latter case the integer corresponds to the index of that atom in the ``Atoms`` object. As an example, ``basis={'H': 'sz', 'C': 'dz', 7: 'dzp'}`` will use the ``sz`` basis for hydrogen atoms, the ``dz`` basis for carbon, and the ``dzp`` for whichever atom is number 7 in the ``Atoms`` object. .. note:: If you want to use only the ``sz`` basis functinons from a ``dzp`` basis set, then you can use this syntax: ``basis='sz(dzp)'``. This will read the basis functions for, say hydrogen, from the ``H.dzp.basis`` file. If the basis has a custom name, it is specified as ``'szp(mybasis.dzp)'``. The value ``None`` (default) implies that the pseudo partial waves from the setup are used as a basis. This basis is always available; choosing anything else requires the existence of the corresponding basis set file in setup locations (see :ref:`installation of paw datasets`). For details on the LCAO mode and generation of basis set files; see the :ref:`LCAO ` documentation. .. _manual_eigensolver: Eigensolver ----------- The default solver for iterative diagonalization of the Kohn-Sham Hamiltonian is a simple Davidson method, (``eigensolver='dav'``), which seems to perform well in most cases. Sometimes more efficient/stable convergence can be obtained with a different eigensolver. One option is the RMM-DIIS (Residual minimization method - direct inversion in iterative subspace), (``eigensolver='rmm-diis'``), which performs well when only a few unoccupied states are calculated. Another option is the conjugate gradient method (``eigensolver='cg'``), which is stable but slower. If parallellization over bands is necessary, then Davidson or RMM-DIIS must be used. More control can be obtained by using directly the eigensolver objects:: from gpaw.eigensolvers import CG calc = GPAW(..., eigensolver=CG(niter=5, rtol=0.20), ...) Here, ``niter`` specifies the maximum number of conjugate gradient iterations for each band (within a single SCF step), and if the relative change in residual is less than ``rtol``, the iteration for the band is not continued. LCAO mode has its own eigensolvers. ``DirectLCAO`` eigensolver directly diagonalizes the Hamiltonian matrix instead of using an iterative method. One can also use Exponential Transformation Direct Minimization (ETDM) method (see :ref:`directmin`) but it is not recommended to use it for metals because occupation numbers are not found variationally in ETDM. .. _manual_poissonsolver: Poisson solver -------------- The ``poissonsolver`` keyword is used to specify a Poisson solver class or enable dipole correction. The default Poisson solver in FD and LCAO mode is called ``FastPoissonSolver`` and uses a combination of Fourier and Fourier-sine transforms in combination with parallel array transposes. Meanwhile in PW mode, the Poisson equation is solved by dividing each planewave coefficient by the squared length of its corresponding wavevector. The old default Poisson solver uses a multigrid Jacobian method. This example corresponds to the old default Poisson solver:: from gpaw import GPAW, PoissonSolver calc = GPAW(..., poissonsolver=PoissonSolver( name='fd', nn=3, relax='J', eps=2e-10), ...) The ``nn`` argument is the stencil, see :ref:`manual_stencils`. The ``relax`` argument is the method, either ``'J'`` (Jacobian) or ``'GS'`` (Gauss-Seidel). The Gauss-Seidel method requires half as many iterations to solve the Poisson equation, but involves more communication. The Gauss-Seidel implementation also depends slightly on the domain decomposition used. The last argument, ``eps``, is the convergence criterion. .. note:: The Poisson solver is rarely a performance bottleneck, but it can sometimes perform poorly depending on the grid layout. This is mostly important in LCAO calculations, but can be good to know in general. See the LCAO notes on :ref:`Poisson performance `. .. _manual_dipole_correction: The ``poissonsolver`` keyword can also be used to specify that a dipole-layer correction should be applied along a given axis. The system should be non-periodic in that direction but periodic in the two other directions. :: from gpaw import GPAW correction = {'dipolelayer': 'xy'} calc = GPAW(..., poissonsolver=correction, ...) Without dipole correction, the potential will approach 0 at all non-periodic boundaries. With dipole correction, there will be a potential difference across the system depending on the size of the dipole moment. Other parameters in this dictionary are forwarded to the Poisson solver:: GPAW(..., poissonsolver={'dipolelayer': 'xy', 'name': 'fd', 'relax': 'GS'}, ...) An alternative Poisson solver based on Fourier transforms is available for fully periodic calculations:: GPAW(..., poissonsolver={'name': 'fft'}, ...) The FFT Poisson solver will reduce the dependence on the grid spacing and is in general less picky about the grid. It may be beneficial for non-periodic systems as well, but the system must be set up explicitly as periodic and hence should be well padded with vacuum in non-periodic directions to avoid unphysical interactions across the cell boundary. .. _manual_stencils: Finite-difference stencils -------------------------- GPAW can use finite-difference stencils for the Laplacian in the Kohn-Sham and Poisson equations. You can set the range of the stencil (number of neighbor grid points) used for the Poisson equation like this:: from gpaw import GPAW, PoissonSolver calc = GPAW(..., poissonsolver=PoissonSolver(nn=n), ...) This will give an accuracy of `O(h^{2n})`, where ``n`` must be between 1 and 6. The default value is ``n=3``. Similarly, for the Kohn-Sham equation, you can use:: from gpaw import GPAW, FD calc = GPAW(mode=FD(nn=n)) where the default value is also ``n=3``. In PW-mode, the interpolation of the density from the coarse grid to the fine grid is done with FFT's. In FD and LCAO mode, tri-quintic interpolation is used (5. degree polynomium):: from gpaw import GPAW, FD calc = GPAW(mode=FD(interpolation=n)) # or from gpaw import GPAW, LCAO calc = GPAW(mode=LCAO(interpolation=n)) The order of polynomium is `2n-1`, default value is ``n=3`` and ``n`` must be between 1 and 4 (linear, cubic, quintic, heptic). .. _manual_hund: Using Hund's rule for guessing initial magnetic moments ------------------------------------------------------- With ``hund=True``, the calculation will become spinpolarized, and the initial ocupations, and magnetic moments of all atoms will be set to the values required by Hund's rule. You may further wish to specify that the total magnetic moment be fixed, by passing e.g. ``occupations={'name': ..., 'fixmagmom': True}``. Any user specified magnetic moment is ignored. Default is False. .. _manual_external: External potential ------------------ Example:: from gpaw.external import ConstantElectricField calc = GPAW(..., external=ConstantElectricField(2.0, [1, 0, 0]), ...) See also: :mod:`gpaw.external`. .. _manual_verbose: Output verbosity ---------------- By default, only a limited number of information is printed out for each SCF step. It is possible to obtain more information (e.g. for investigating convergen problems in more detail) by ``verbose=1`` keyword. .. _manual_communicator: Communicator object ------------------- By specifying a communicator object, it is possible to use only a subset of processes for the calculator when calculating e.g. different atomic images in parallel. See :ref:`different_calculations_in parallel` for more details. .. .. _manual_parallel_calculations: .. .. --------------------- .. Parallel calculations .. --------------------- .. .. Information about running parallel calculations can be found on the .. :ref:`parallel_runs` page. .. _zero_energy: -------------- Total Energies -------------- The GPAW code calculates energies relative to the energy of separated reference atoms, where each atom is in a spin-paired, neutral, and spherically symmetric state - the state that was used to generate the setup. For a calculation of a molecule, the energy will be minus the atomization energy and for a solid, the resulting energy is minus the cohesive energy. So, if you ever get positive energies from your calculations, your system is in an unstable state! .. note:: You don't get the true atomization/cohesive energy. The true number is always lower, because most atoms have a spin-polarized and non-spherical symmetric ground state, with an energy that is lower than that of the spin-paired, and spherically symmetric reference atom. ------------------------ Restarting a calculation ------------------------ The state of a calculation can be saved to a file like this: >>> calc.write('H2.gpw') The file :file:`H2.gpw` is a binary file containing wave functions, densities, positions and everything else (also the parameters characterizing the PAW calculator used for the calculation). If you want to restart the `\rm{H}_2` calculation in another Python session at a later time, this can be done as follows: >>> from gpaw import * >>> atoms, calc = restart('H2.gpw') >>> print(atoms.get_potential_energy()) Everything will be just as before we wrote the :file:`H2.gpw` file. Often, one wants to restart the calculation with one or two parameters changed slightly. This is simple to do. Suppose you want to change the number of grid points: >>> atoms, calc = restart('H2.gpw', gpts=(20, 20, 20)) >>> print(atoms.get_potential_energy()) .. tip:: There is an alternative way to do this, that can be handy sometimes: >>> atoms, calc = restart('H2.gpw') >>> calc.set(gpts=(20, 20, 20)) >>> print(atoms.get_potential_energy()) More details can be found on the :ref:`restart_files` page. --------------------------------------- Customizing behaviour through observers --------------------------------------- An *observer* function can be *attached* to the calculator so that it will be executed every *N* iterations during a calculation. The below example saves a differently named restart file every 5 iterations:: calc = GPAW(...) occasionally = 5 class OccasionalWriter: def __init__(self): self.iter = 0 def write(self): calc.write('filename.%03d.gpw' % self.iter) self.iter += occasionally calc.attach(OccasionalWriter().write, occasionally) See also :meth:`~gpaw.calculator.GPAW.attach`. .. _command line options: -------------------- Command-line options -------------------- I order to run GPAW in debug-mode, e.g. check consistency of arrays passed to C-extensions, use Python's :option:`python:-d` option: $ python3 -d script.py If you run Python through the ``gpaw python`` command, then you can run your script in dry-run mode like this:: $ gpaw python --dry-run=N script.py This will print out the computational parameters and estimate memory usage, and not perform an actual calculation. Parallelization settings that would be employed when run on ``N`` cores will also be printed. .. tip:: If you need extra parameters from the command-line for development work:: $ python3 -X a=1 -X b >>> import sys >>> sys._xoptions {'a': '1', 'b': True} See also Python's :option:`python:-X` option. .. [#LDA] J. P. Perdew and Y. Wang, Accurate and simple analytic representation of the electron-gas correlation energy *Phys. Rev. B* **45**, 13244-13249 (1992) .. [#PBE] J. P. Perdew, K. Burke, and M. Ernzerhof, Generalized Gradient Approximation Made Simple, *Phys. Rev. Lett.* **77**, 3865 (1996) .. [#revPBE] Y. Zhang and W. Yang, Comment on "Generalized Gradient Approximation Made Simple", *Phys. Rev. Lett.* **80**, 890 (1998) .. [#RPBE] B. Hammer, L. B. Hansen and J. K. Nørskov, Improved adsorption energetics within density-functional theory using revised Perdew-Burke-Ernzerhof functionals, *Phys. Rev. B* **59**, 7413 (1999) .. [#PBE0] C. Adamo and V. Barone, *J. Chem. Phys.* **110** 6158-6170 (1999) Toward reliable density functional methods without adjustable parameters: The PBE0 model .. [#B3LYP] P. J. Stephens, F. J. Devlin, C. F. Chabalowski, and M.J. Frisch, *J. Phys. Chem.* **98** 11623-11627 (1994) Ab-Initio Calculation of Vibrational Absorption and Circular-Dichroism Spectra Using Density-Functional Force-Fields gpaw-24.1.0/doc/documentation/basic_usage.rst000066400000000000000000000002351454550013000211460ustar00rootroot00000000000000.. _basic_usage: =========== Basic usage =========== .. toctree:: :maxdepth: 1 basic parallel_runs/parallel_runs convergence restart_files gpaw-24.1.0/doc/documentation/bse/000077500000000000000000000000001454550013000167205ustar00rootroot00000000000000gpaw-24.1.0/doc/documentation/bse/Feynman.png000066400000000000000000000264221454550013000210310ustar00rootroot00000000000000PNG  IHDRh IDATxr0ѩ,|K$OR-Nwu')C*["U~C _M B_nuX]? c0a @0 a 0 1`c0a @0 a 0 1`c0a <.$?EYDatm6[~ʗ/c,8 59$,1`$$}M2pAil0`IX]צ&ImXfcXK{[0]=䧜1rcR1B c!$gc}뺮svE-ƘA'g,1,SNsm$޶}iquC?o15L%1X` ܇K1Kq R\pAiTk6s46U` 8p?!sgˇ 5E3Hc3]oOI@`;յS4j0vCBJ>d#9<` ~ X?r{59$\i6^[ao!c!{).Q?1N6dKqte%v-ld6! HIoEaQ( YmҰHy x.۰g,[R upo/pV??>}蓮CuؠAeسӁf,mkrĸ Aۏ8#:xڒ`zI3!߫>E#(C1oy aNɪk5L   _4ӟ}lڮ>og:4]CziƦ>Օ6c_=4XGhO/+>\55NQ h u4$6 #ykN5@](sEbE/N#=릣ue}Nrq::{Jp=,@Jd8e"_^H<GRZI7B=Sƚl3LЌտ)Ge!QſgWh253flm>9+h^]+֢U$܇Qhƃ/_ٙ0Ҫ]O,陴S8r8;f}QWN3 仳#A94獻Wկ 'C)~=Tg.Nm6!TgIȂgҞ'I~VEyn)q(J`Mv^9S_a w[IϴKf) \Nb<9<ϝ'd9G\aIIL8W^{h%dT᳐zHa(kRyvWԠGa'A|I &'q]50(7o=ŰmźbF[.DO1CGK1M;'縻-t >n\giԽ;zzha\zԷ@Gn9r -9#гb[ jYKB52[Ng)WӴ h['Afjk]搂C1&5.DFy Wk})l`J U뻌>p㞧2isyu| a<+1Kp kK0~ _Wz JajC x$S3UԝP ̙0&]#Zl`%S3QCNcVSdNjds{dN#SӢŀ97*jt/fgL*) dQ3*`kd3 ; cnPL-ȿ](02ɔ7-3? "tbb" BXשunl(t 37^u֦kjEY=1hdftYM#j<&i$k| xɾ7sy3|K䉣1{sJR}PMT3LEDa4zaA G\SzIIVo'k!bKɵgW!pNc#/WQQot́o˜VIso=R)xYZM{cuqcw3&L u,[;u)X8LlZi;ǞTi4{om,\zk W#B2ֲ[syoOv-Wo(M#,-nQi? ]E'{(a>f򆝏f(9W+ knc*FO-ZЦn1߼)ح\C|Ek :P3viܣ̫Se^k/!=H$1 5cCmd^ ӹGsxFJ͘o-vP/tAtf72y]b7P3v.N8Wr9 Gٖ[QEĿ~1]Vqk8c:1arڻ=?1Lv+zEXf1Gŷ |ƁS^ 4[g SȚh Ax[aLبYf8bMelVs,1pW3UrHd˧YHo}PF(l^OH`6[Gi~"h{!)O_pJaB| =wggF)}Q?TrHD?t*sطk v).'jh_TSO~kBԤɸ{+-.l™˕>0k=D5ZF@l6-,i)ŐXE-Os>UfӆqInl ;*Q_A(JW[Cq Bւ57icD1Wה:kXp)'(Ӯowv)J-3/CC0jXٙfyR\gFKq\$:ƻc񈕂bV7wwR3IC=ώF켂JĨe'*Uj3߾$"|E*LkG]ZeW'ryuRtnK-!kGwO;ɮ@쬮 N?"EYPxwW 6^svpЛJ_ h|/99$4)|rEQߩ~rV;w0}Mͺ2eH}lWHi$Y=Yq΄:F^fwՑLa:?f<Fifbb(_uB'Q}U#Yk[1oC{&7_O7CPo>n;:gɉBRPO*\^qX\ARrz3t_oXhYD6)1S#yD_}0 #u^]T~ϏO 2*2^*$"*۟svac'hN1u{IftF!IdTڈ32u"ÈMMڽTRf(Ȁ S]nwQe7Z}GT#VC͸WkN=u5l w!F֒VhA{JהHW.^%3[L mA5ۑ$w}TgieLO^/8?Bg Huxvq}F?j u~CTZIꧮkcjr( uSш\4lelCC7Ԩ%S0Ԧ?=G'Kweʣzqp}U~λ^kOF3] 60txRfw??>gVp5.ťqWR4!X u 꺾q6m= o3N/$J1Ë]DqQ^*n`G6Cc%mlm:0-> [KgdGciq+>0B7~卞Է-Iߎbe^W-{8;3L3&_:3A~89~ČփfKHڼ%&B16W=4:_}$}u<7zpˡ'Bg>MY2S MbSN?[GIJ4]F4fx'O(?oK3EYPΦjUDL \P0 #Wpvyؤ)zKrH6 `f(ZyƎƧ.9z(2N/R ;fKF_fsYʧ8xjQ |e`'ЌAg{Ъܛ%ۑA;M%}M}XtNV]_0܇&X5 >>յqB"9$N{Yx?D$1+ʂ 1c[E~[CGC!܇Oc%y7VVj`H=YH_Sy0:}:|/G$ arl3ѷuZ^X@#K[YK~\Z*Z ZVhup#ncR\rhۼщel`,]q{ד_rpz60=\f =HGO䙙7_L0/_$KJ Ե_^3h~Bv<-#KOòN7HIۓC>u M"dh:Aݏg!5@4ڟ߭_{0@F#7 eIcNY^3<%R\<- k^BjHb0+ØLqjIi\{ܡk9π0v b g1]9>_6&'x}x}֗PmW4H6t!X z~KVpuf1Rl옭~ژK00SKgErH'XE r:+}MϺܐEYe8I<Βb??Ǒ'.A"i>8jN31SŰТ?I]N_SƘzWk:hGa ƽ~[휝=Ǣ Eg@ A:π0&6"ٹ/ðtzyt֒r !ILu *ЌGqj]]>^&5@+T1Ph:Z$T^"U`:@hSzK_(ӒftVh Gz/\ x"}[]rm|a Ya Tf<BV 3>7܇17Q9m~1pc 4v Jpwi\$0R=~a Tf<-sɒ3e[T>OrH-`Tf<(:O4ʎ{~ $|( _% #-jqQEYڸo|&Z_3U IDAT'FpALi"#dNHt"[y֙Fbl!Ԟ=x,6i~ 4c]u~).<zh.\ž1>qvĿ%L}&TG\FڍBdLMw?Z?pP|}sh|H+9r{Eܚw+Fn4׏e5(v1ۥ7򟴽hҧbi=DBL Ucյrήzhk#.sv6>#秜Qo%a_1+c.Z<@#1܇s5ýٙ7=cR\I5iv-ьivh|3~72A(䔱t_1'Bfm㜝Vz1⺖CԞs4hM,3Oy{%9$Fͷ&se-޴WQ9?w!;VrLޛ.}(osh6^fۍژRbCOlqڵ| Ҿ:~U}>beiԌB(6(^?t^#`CZT[}>jk91A}idʎxv>j"-?߻%u_"R*h{<ڥ؈P`tKL ͸Aک(rM5<L6аhlv[K/@/3q.>)TW:b}XSGhR\(FVK.Ņ<^}UQ͘Hx1=E.GB-ɕPc\KGCiЌgb&nyT4Stwi +_Lڝj`VH+dܔ9W1^q541Zu!EYT׊rn߷, t%qDS lKqD9.s/ (~K#dƇWsosK08pUOP+%ٗH A\Z *H~6NDxkrc_oF@;qhHe>6tJs7#ݜ!`"Ԧ-ws`ʑ`"'cg0>C"g%vKiOٛ^ jSĞa ЌݥfunlKu2=k#J߹=`Vs1vEBk' a Tf<>cu-ګ/U~k(g:f]ә'K'lsbA1Nqis).'zLQ wuT &+A`$rS[M[ȎN=6tֆ3tZ2s'H_-WEE9` oZX Jpj%dEq CrH6ZМQ: *6wz̡1 iKsvtbFc>PgI4b0K0U+k]Ϧ a-=6hS|D|~1]"=xI[N*K}Xo^`6!t֊}97܇ Sc qzӸV|& #78ԏmKqi_6#@t9;sI徳݂dgv"zkYS;0x_uhf Yf>=7%.ϬkNv^ V|o<+ЌMBO$|~|htAg1q4[1vCBQ(SwA,gt :4c>O慽8tc>N?jB`ٱT%K@3Ym`40;}{ P}]@$H5;fghMyv`߾o/_vO;SOyv80 a @y䐜qK^ZyHeQ]+zr 4p4,1ƌ/BO00S6H'&K y3]cL``9C)g z Ux󰡝Wv1w/1聄=uMG$YV׊1WZ{: V$( C[zᮽ9 @[<4٨dPE~뺦pdci}%s?Ȏ cYI?vTa1a5?qWwxS6꺎ˆN U/_Lj>a%ee@@7h\35l@,@3 a 0 1`c0a @0 a 0 1`c0a @4\MJCIENDB`gpaw-24.1.0/doc/documentation/bse/bse.rst000066400000000000000000000450341454550013000202310ustar00rootroot00000000000000.. _bse theory: ============================================ Bethe-Salpeter Equation - Theory ============================================ Introduction ============ The BSE object calculates optical and dielectric properties of extended systems including the electron-hole interaction (excitonic effects). The four point Bethe-Salpeter equation ====================================== Please refer to :ref:`df_theory` for the documentation on the density response function `\chi`. Most of the derivations in this page follow reference \ [#Review]_. The following diagrams \ [#Review]_ representing the four point Bethe-Salpeter equation: .. image:: Feynman.png :height: 200 px :align: center It can be written as: .. math:: :label: chi_4point &\chi(\mathbf{r}_1, \mathbf{r}_2, \mathbf{r}_3, \mathbf{r}_4; \omega) = \chi^{0}(\mathbf{r}_1, \mathbf{r}_2, \mathbf{r}_3, \mathbf{r}_4; \omega) \\ & + \int d \mathbf{r}_5 d \mathbf{r}_6 d \mathbf{r}_7 d \mathbf{r}_8 \chi^{0}(\mathbf{r}_1, \mathbf{r}_2, \mathbf{r}_5, \mathbf{r}_6; \omega) K( \mathbf{r}_5, \mathbf{r}_6, \mathbf{r}_7, \mathbf{r}_8; \omega) \chi(\mathbf{r}_7, \mathbf{r}_8, \mathbf{r}_3, \mathbf{r}_4; \omega) where .. math:: K = V - W = \frac{1}{| \mathbf{r}_5 - \mathbf{r}_7|} \delta_{ \mathbf{r}_5, \mathbf{r}_6} \delta_{ \mathbf{r}_7, \mathbf{r}_8} - \int d \mathbf{r} \frac{\epsilon^{-1}( \mathbf{r}_5, \mathbf{r}; \omega )} {| \mathbf{r} - \mathbf{r}_6|} \delta_{ \mathbf{r}_5, \mathbf{r}_7} \delta_{ \mathbf{r}_6, \mathbf{r}_8} The density response function `\chi`, defined as `\chi(\mathrm{r}, \mathrm{r}^{\prime}) = \delta n(\mathrm{r}) / \delta V_{ext}(\mathrm{r}^{\prime})`, has a form of .. math:: :label: chi_2point \chi(\mathbf{r}_1, \mathbf{r}_2, \mathbf{r}_3, \mathbf{r}_4; \omega) = \chi(\mathbf{r}_1, \mathbf{r}_3; \omega) \delta_{ \mathbf{r}_1, \mathbf{r}_2} \delta_{ \mathbf{r}_3, \mathbf{r}_4} The above equation also applies for the non interacting density response function `\chi^0`. As a result, the four point Bethe-Salpeter equation :eq:`chi_4point` can be reduced to: .. math:: :label: chi_reduced \chi(\mathbf{r}, \mathbf{r}^{\prime}; \omega) &= \chi^0(\mathbf{r}, \mathbf{r}^{\prime}; \omega) + \int d \mathbf{r}_5 d \mathbf{r}_7 \chi^0(\mathbf{r}, \mathbf{r}_5; \omega) \frac{1}{| \mathbf{r}_5 - \mathbf{r}_7|} \chi(\mathbf{r}_7, \mathbf{r}^{\prime}; \omega) \\ &+ \int d \mathbf{r}_5 d \mathbf{r}_6 d \mathbf{r}^{\prime \prime} \chi^0(\mathbf{r},\mathbf{r}, \mathbf{r}_5, \mathbf{r}_6; \omega) \frac{\epsilon^{-1}( \mathbf{r}_5, \mathbf{r}^{\prime \prime}; \omega )} {| \mathbf{r}^{\prime \prime} - \mathbf{r}_6|} \chi(\mathbf{r}_5, \mathbf{r}_6, \mathbf{r}^{\prime}, \mathbf{r}^{\prime}; \omega) Transform using electron-hole pair basis ======================================== Since for each excitation, only a limited number of electron-hole pairs will contribute , the above equation can be effectively transformed to electron-hole pair space. Supposed that the eigenfunctions `\psi_{n}` of the effective Kohn-Sham hamiltonian form an orthonormal and complete basis set, any four point function `S` can then be transformed as .. math:: :label: S S(\mathbf{r}_1, \mathbf{r}_2, \mathbf{r}_3, \mathbf{r}_4; \omega) = \sum_{n_1 n_2 n_3 n_4} \psi^{\ast}_{n_{1}}(\mathbf{r}_1) \psi_{n_{2}}(\mathbf{r}_2) \psi_{n_{3}}(\mathbf{r}_3) \psi^{\ast}_{n_{4}}(\mathbf{r}_4) S_{\begin{array}{l} n_1 n_2 \\ n_3 n_4 \end{array}} (\omega) The non interacting density response function `\chi^0` .. math:: :label: chi_0 \chi^0(\mathbf{r}_1, \mathbf{r}_2, \mathbf{r}_3, \mathbf{r}_4; \omega) = \sum_{n n^{\prime}} \frac{f_n - f_{n^{\prime}}}{\epsilon_n - \epsilon_{n^{\prime}}-\omega} \psi^{\ast}_n(\mathbf{r}_1) \psi_{n^{\prime}}(\mathbf{r}_2) \psi_n(\mathbf{r}_3) \psi^{\ast}_{n^{\prime}}(\mathbf{r}_4) is then diagonal in the electron-hole basis with .. math:: :label: chi_0_eh \chi^0_{\begin{array}{l} n_1 n_2 \\ n_3 n_4 \end{array}} (\omega) = \frac{f_{n_2} - f_{n_1}}{\epsilon_{n_2} - \epsilon_{n_1}-\omega} \delta_{n_1, n_3} \delta_{n_2, n_4} Substitute Eq. :eq:`S` and :eq:`chi_0` into Eq. :eq:`chi_reduced` and by using Eq. :eq:`chi_2point` ,the four point Bethe-Salpeter equation in electron-hole pair space becomes .. math:: :label: chi_eh \chi_{\begin{array}{l} n_1 n_2 \\ n_3 n_4 \end{array}} (\omega) = \chi^0_{n_1 n_2} (\omega) \left[ \delta_{n_1 n_3} \delta_{n_2 n_4} + \sum_{n_5 n_6} K_{\begin{array}{l} n_1 n_2 \\ n_5 n_6 \end{array}} (\omega) \chi_{\begin{array}{l} n_5 n_6 \\ n_3 n_4 \end{array}} (\omega) \right] with `K = V - W` and .. math:: :label: V_2p V_{\begin{array}{l} n_1 n_2 \\ n_5 n_6 \end{array}} = \int d \mathbf{r} d \mathbf{r}^{\prime} \psi_{n_1}(\mathbf{r}) \psi_{n_2}^{\ast}(\mathbf{r}) \frac{1}{| \mathbf{r}-\mathbf{r}^{\prime} |} \psi^{\ast}_{n_5}(\mathbf{r}^{\prime}) \psi_{n_6}(\mathbf{r}^{\prime}) .. math:: :label: W_2p W_{\begin{array}{l} n_1 n_2 \\ n_5 n_6 \end{array}} (\omega) = \int d \mathbf{r} d \mathbf{r}^{\prime} d \mathbf{r}^{\prime \prime} \psi_{n_1}(\mathbf{r}) \psi_{n_2}^{\ast}(\mathbf{r}^{\prime}) \frac{\epsilon^{-1}( \mathbf{r}, \mathbf{r}^{\prime \prime}; \omega )}{| \mathbf{r}^{\prime \prime}-\mathbf{r}^{\prime} |} \psi^{\ast}_{n_5}(\mathbf{r}) \psi_{n_6}(\mathbf{r}^{\prime}) Bethe-Salpeter equation as an effective two-particle Hamiltonian ================================================================ In order to solve Eq. :eq:`chi_eh`, one has to invert a matrix for each frequency. This problem can be reformulated as an effective eigenvalue problem. Rewrite Eq. :eq:`chi_eh` as .. math:: \sum_{n_5 n_6} \left[ \delta_{n_1 n_5} \delta_{n_2 n_6} - \chi^0_{n_1 n_2}(\omega) K_{\begin{array}{l} n_1 n_2 \\ n_5 n_6 \end{array}} (\omega) \right] \chi_{\begin{array}{l} n_5 n_6 \\ n_3 n_4 \end{array}} (\omega) = \chi^0_{n_1 n_2}(\omega) Insert Eq. :eq:`chi_0_eh` into the above equation, one gets .. math:: :label: chi_rewrite \sum_{n_5 n_6} \left[ (\epsilon_{n_2} - \epsilon_{n_1}-\omega) \delta_{n_1 n_5} \delta_{n_2 n_6} - (f_{n_2} - f_{n_1}) K_{\begin{array}{l} n_1 n_2 \\ n_5 n_6 \end{array}} (\omega) \right] \chi_{\begin{array}{l} n_5 n_6 \\ n_3 n_4 \end{array}} (\omega) = f_{n_2} - f_{n_1} By using a static interaction kernel `K(\omega=0)`, an effective frequency-indendepnt two particle Hamiltonian is defined as: .. math:: :label: H_2p \mathcal{H}_{\begin{array}{l} n_1 n_2 \\ n_5 n_6 \end{array}} \equiv (\epsilon_{n_2} - \epsilon_{n_1}) \delta_{n_1 n_5} \delta_{n_2 n_6} - (f_{n_2} - f_{n_1}) K_{\begin{array}{l} n_1 n_2 \\ n_5 n_6 \end{array}} Inserting the above effective Hamiltonian into Eq. :eq:`chi_rewrite`, one can then write .. math:: :label: chi_2p \chi_{\begin{array}{l} n_1 n_2 \\ n_3 n_4 \end{array}} = \left[ \mathcal{H} - I \omega \right]^{-1}_{\begin{array}{l} n_1 n_2 \\ n_3 n_4 \end{array}} (f_{n_2} - f_{n_1}) where `I` is an identity matrix that has the same size as `\mathcal{H}`. In the following subsection, we will show that by diagonalizing the Hamiltonian matrix `\mathcal{H}`, the obtained eigenvalues are the excitations energies of elementary electronic excitations such as excitons or plasmons, while the eigenvectors are related to the strength of the electronic excitations. The spectral representation of the inverse two-particle Hamiltonian is .. math:: :label: spectral \left[ \mathcal{H} - I \omega \right]^{-1}_{\begin{array}{l} n_1 n_2 \\ n_3 n_4 \end{array}} = \sum_{\lambda \lambda^{\prime}} \frac{A^{n_1 n_2}_{\lambda} A^{n_3 n_4}_{\lambda^{\prime}} N^{-1}_{\lambda \lambda^{\prime}}}{E_{\lambda} - \omega} with the eigenvalues `E_{\lambda}` and eigenvectors `A_{\lambda}` given by .. math:: \mathcal{H} A_{\lambda} = E_{\lambda} A_{\lambda} and the overlap matrix `N_{\lambda \lambda^{\prime} }` defined by .. math:: N_{\lambda \lambda^{\prime}} \equiv \sum_{n_1 n_2} [A_{\lambda}^{n_1 n_2}]^{\ast} A_{\lambda^{\prime}}^{n_1 n_2} If the Hamiltonian `\mathcal{H}` is Hermitian, the eigenvectors `A_{\lambda}` are then orthogonal and .. math:: N_{\lambda \lambda^{\prime}} = \delta_{\lambda \lambda^{\prime}} Explicit kpoint dependence ========================== In this subsection, the kpoint dependence of the eigenstates is written explicitly. The effective two particle Hamiltonian in Eq. :eq:`H_2p` becomes .. math:: \mathcal{H}_{\begin{array}{l} n_1 n_2 \mathbf{k}_1 \\ n_5 n_6 \mathbf{k}_5 \end{array}} ( \mathbf{q}) \equiv (\epsilon_{n_2 \mathbf{k}_1 + \mathbf{q}} - \epsilon_{n_1 \mathbf{k}_1}) \delta_{n_1 n_5} \delta_{n_2 n_6} \delta_{\mathbf{k}_1 \mathbf{k}_5} - (f_{n_2 \mathbf{k}_1 + \mathbf{q}} - f_{n_1 \mathbf{k}_1}) K_{\begin{array}{l} n_1 n_2 \mathbf{k}_1 \\ n_5 n_6 \mathbf{k}_5 \end{array}} ( \mathbf{q}) where `K=V-W` and according to Eq. :eq:`V_2p` and :eq:`W_2p`, .. math:: :label: V_eh V_{\begin{array}{l} n_1 n_2 \mathbf{k}_1 \\ n_5 n_6 \mathbf{k}_5 \end{array}} ( \mathbf{q}) = \int d \mathbf{r} d \mathbf{r}^{\prime} \psi_{n_1 \mathbf{k}_1}(\mathbf{r}) \psi_{n_2 \mathbf{k}_1 + \mathbf{q}}^{\ast}(\mathbf{r}) \frac{1}{| \mathbf{r}-\mathbf{r}^{\prime} |} \psi^{\ast}_{n_5 \mathbf{k}_5}(\mathbf{r}^{\prime}) \psi_{n_6 \mathbf{k}_5 + \mathbf{q}}(\mathbf{r}^{\prime}) .. math:: :label: W_eh W_{\begin{array}{l} n_1 n_2 \mathbf{k}_1 \\ n_5 n_6 \mathbf{k}_5 \end{array}} ( \mathbf{q}) = \int d \mathbf{r} d \mathbf{r}^{\prime} \psi_{n_1 \mathbf{k}_1}(\mathbf{r}) \psi_{n_2 \mathbf{k}_1 + \mathbf{q}}^{\ast}(\mathbf{r}^{\prime}) \frac{\epsilon^{-1}( \mathbf{r}, \mathbf{r}^{\prime}; \omega=0 )}{| \mathbf{r}-\mathbf{r}^{\prime} |} \psi^{\ast}_{n_5 \mathbf{k}_5}(\mathbf{r}) \psi_{n_6 \mathbf{k}_5 + \mathbf{q}}(\mathbf{r}^{\prime}) The response function in the electron-hole pair space, according to Eq. :eq:`chi_2p` and :eq:`spectral` becomes .. math:: :label: chi_ehk \chi_{\begin{array}{l} n_1 n_2 \mathbf{k}_1 \\ n_3 n_4 \mathbf{k}_3 \end{array}} (\mathbf{q}, \omega) = \sum_{\lambda \lambda^{\prime}} \frac{A^{n_1 n_2 \mathbf{k}_1}_{\lambda} A^{n_3 n_4 \mathbf{k}_3}_{\lambda^{\prime}} N^{-1}_{\lambda \lambda^{\prime}}}{E_{\lambda} - \omega} (f_{n_2 \mathbf{k}_1 + \mathbf{q}} - f_{n_1 \mathbf{k}_1}) Transform between electron-hole pair space and reciprocal space =============================================================== The physical quantities such as macroscopic dielectric function (refer to :ref:`macroscopic_dielectric_function`) are related to the long wavelength limit `(\mathbf{G}=0, \mathbf{G}^{\prime}=0)` component of the response function `\chi_{\mathbf{G} \mathbf{G}^{\prime}}`. Its relation to the response function in electron-hole pair space `\chi_{\begin{array}{l} n_1 n_2 \mathbf{k}_1\\ n_3 n_4 \mathbf{k}_3 \end{array}}` is written as .. math:: :label: chi_eh_G_transform \chi_{\mathbf{G} \mathbf{G}^{\prime}} (\mathbf{q}, \omega) = \frac{1}{\Omega} \sum_{\begin{array}{l} n_1 n_2 \mathbf{k}_1 \\ n_3 n_4 \mathbf{k}_3 \end{array}} \chi_{\begin{array}{l} n_1 n_2 \mathbf{k}_1\\ n_3 n_4 \mathbf{k}_3 \end{array}} (\mathbf{q},\omega) \ \ \rho_{\begin{array}{l} n_1 \mathbf{k}_1 \\ n_2 \mathbf{k}_1 + \mathbf{q} \end{array}} (\mathbf{G}) \ \ \rho^{\ast}_{\begin{array}{l} n_3 \mathbf{k}_3 \\ n_4 \mathbf{k}_3 + \mathbf{q} \end{array}} (\mathbf{G}^{\prime}) where the charge density matrix `\rho (\mathbf{G})` is defined as: .. math:: \rho_{\begin{array}{l} n_1 \mathbf{k}_1 \\ n_2 \mathbf{k}_1 + \mathbf{q} \end{array}} (\mathbf{G}) \equiv \langle \psi_{n_1 \mathbf{k}_1} | e^{-i(\mathbf{q}+\mathbf{G}) \cdot \mathbf{r} } | \psi_{n_2 \mathbf{k}_1 + \mathbf{q}} \rangle Employing Fourier transform .. math:: \frac{1}{| \mathbf{r}-\mathbf{r}^{\prime} |} = \frac{1}{\Omega} \sum_{\mathbf{q} \mathbf{G}} \frac{4\pi}{ | \mathbf{q} + \mathbf{G}|^2 } e^{i ( \mathbf{q} + \mathbf{G}) \cdot ( \mathbf{r} - \mathbf{r}^{\prime} ) } .. math:: \int d \mathbf{r}^{\prime \prime}\frac{\epsilon^{-1}(\mathbf{r},\mathbf{r}^{\prime \prime}) }{| \mathbf{r}^{\prime \prime}-\mathbf{r}^{\prime} |} = \frac{1}{\Omega} \sum_{\mathbf{q} \mathbf{G} \mathbf{G}^{\prime} } e^{i ( \mathbf{q} + \mathbf{G}) \cdot \mathbf{r} } \frac{4\pi \epsilon^{-1}_{\mathbf{G} \mathbf{G}^{\prime}} (\mathbf{q}) }{ | \mathbf{q} + \mathbf{G}^{\prime}|^2 } e^{-i ( \mathbf{q} + \mathbf{G}^{\prime}) \cdot \mathbf{r}^{\prime} } where `\Omega` is the volume of the unit cell, `V` and `W` in Eq. :eq:`V_eh` and :eq:`W_eh` can then be written respectively as .. math:: :label: V_eh_G V_{\begin{array}{l} n_1 n_2 \mathbf{k}_1 \\ n_5 n_6 \mathbf{k}_5 \end{array}} ( \mathbf{q}) =\sum_{\mathbf{G}} \rho^{\ast}_{\begin{array}{l} n_1 \mathbf{k}_1 \\ n_2 \mathbf{k}_1 + \mathbf{q} \end{array}} (\mathbf{G}) \ \frac{4\pi}{| \mathbf{q} + \mathbf{G}|^2} \ \rho_{\begin{array}{l} n_5 \mathbf{k}_5 \\ n_6 \mathbf{k}_5 + \mathbf{q} \end{array}} (\mathbf{G}) .. math:: W_{\begin{array}{l} n_1 n_2 \mathbf{k}_1 \\ n_5 n_6 \mathbf{k}_5 \end{array}} ( \mathbf{q}) = \sum_{\mathbf{G} \mathbf{G}^{\prime}} \rho^{\ast}_{\begin{array}{l} n_1 \mathbf{k}_1 \\ n_5 \mathbf{k}_5 \end{array}} (\mathbf{G}) \ \frac{4\pi \epsilon^{-1}_{\mathbf{G} \mathbf{G}^{\prime}} (\mathbf{k}_5-\mathbf{k}_1; \omega=0) }{| \mathbf{k}_5-\mathbf{k}_1 + \mathbf{G}|^2} \ \rho_{\begin{array}{l} n_2 \mathbf{k}_1 + \mathbf{q} \\ n_6 \mathbf{k}_5 + \mathbf{q} \end{array}} (\mathbf{G}^{\prime}) Dielectric function and its relation to spectra =============================================== The dielectric matrix is related to the density response matrix by .. math:: \epsilon^{-1}_{\mathbf G \mathbf G^{\prime}}(\mathbf q, \omega) = \delta_{\mathbf G \mathbf G^{\prime}} + \frac{4\pi}{|\mathbf q + \mathbf G|^2} \chi_{\mathbf G \mathbf G^{\prime}}(\mathbf q, \omega) Electron energy loss spectra (EELS) is propotional to `-\mathrm{Im} \epsilon^{-1}_{00}`: .. math:: \mathrm{EELS} \propto -\mathrm{Im} \epsilon^{-1}_{00}(\mathbf q, \omega) = - \frac{4\pi}{|\mathbf{q}|^2} \mathrm{Im} \chi_{00}(\mathbf q, \omega) As shown in :ref:`macroscopic_dielectric_function`, optical absorption spectra (ABS) is `\mathrm{Im} \epsilon_M`. Instead of calculating from `\epsilon^{-1}_{00}`, `\epsilon_M` can also be constructed from a modified response function `\bar{\chi}` by .. math:: \epsilon_M (\omega) = 1 - \frac{4\pi}{|\mathbf{q}|^2} \bar{\chi}_{00}(\mathbf{q}\rightarrow 0, \omega) .. math:: \mathrm{ABS} = \mathrm{Im} \epsilon_M (\omega) = -\frac{4\pi}{|\mathbf{q}|^2} \mathrm{Im}\bar{\chi}_{00}(\mathbf{q}\rightarrow 0, \omega) The modified response function `\bar{\chi}` is constructed in the same way as `\chi`, except that the long range Coulomb interaction for kernel `V` in Eq. :eq:`V_eh_G` is excluded so that .. math:: \bar{V}_{\begin{array}{l} n_1 n_2 \mathbf{k}_1 \\ n_5 n_6 \mathbf{k}_5 \end{array}} ( \mathbf{q}) =\sum_{\mathbf{G} \neq 0} \rho^{\ast}_{\begin{array}{l} n_1 \mathbf{k}_1 \\ n_2 \mathbf{k}_1 + \mathbf{q} \end{array}} (\mathbf{G}) \ \frac{4\pi}{| \mathbf{q} + \mathbf{G}|^2} \ \rho_{\begin{array}{l} n_5 \mathbf{k}_5 \\ n_6 \mathbf{k}_5 + \mathbf{q} \end{array}} (\mathbf{G}) The implementation flowchart ============================ Here is a short summary for the actual implementation: 1. Construct the effective two particle Hamiltonian (using notation `S \equiv \left\{ n_1 n_2 \mathbf{k}_1; \mathbf{q} \right\}` and `S^{\prime} \equiv \left\{ n_3 n_4 \mathbf{k}_3; \mathbf{q} \right\}`) .. math:: \mathcal{H}_{SS^{\prime}} (\mathbf{q}) = \epsilon_S \delta_{SS^{\prime}} - f_S K_{SS^{\prime}} ( \mathbf{q}) where .. math:: :label: epsilon_S \epsilon_S = \epsilon_{n_2 \mathbf{k}_1 + \mathbf{q}} - \epsilon_{n_1 \mathbf{k}_1} .. math:: f_S = f_{n_2 \mathbf{k}_1 + \mathbf{q}} - f_{n_1 \mathbf{k}_1} with `K=V-0.5W`, where 0.5 accounts for the fact that only singlet excitations are allowed in the optical absorption and `W` are diagonal in spin. The Coulomb interaction `V` is given by .. math:: V_{SS^{\prime}} (\mathbf{q}) = \sum_{\mathbf{G} \neq 0} \rho^{\ast}_S(\mathbf{G}) \frac{4\pi}{| \mathbf{q} + \mathbf{G}|^2} \rho_{S^{\prime}}(\mathbf{G}) \ \ (\mathrm{ABS}) .. math:: V_{SS^{\prime}} (\mathbf{q}) = \sum_{\mathbf{G}} \rho^{\ast}_S(\mathbf{G}) \frac{4\pi}{| \mathbf{q} + \mathbf{G}|^2} \rho_{S^{\prime}}(\mathbf{G}) \ \ (\mathrm{EELS}) where .. math:: \rho_{S}(\mathbf{G}) = \langle \psi_{n_1 \mathbf{k}_1} | e^{-i(\mathbf{q}+\mathbf{G}) \cdot \mathbf{r} } | \psi_{n_2 \mathbf{k}_1 + \mathbf{q}} \rangle The screened interaction kernel `W` is given by .. math:: W_{SS^{\prime}} ( \mathbf{q}) = \sum_{\mathbf{G} \mathbf{G}^{\prime}} \rho^{\ast}_{\begin{array}{l} n_1 \mathbf{k}_1 \\ n_3 \mathbf{k}_3 \end{array}} (\mathbf{G}) \ \frac{4\pi \epsilon^{-1}_{\mathbf{G} \mathbf{G}^{\prime}} (\mathbf{k}_3 - \mathbf{k}_1; \omega=0) }{| \mathbf{k}_3 - \mathbf{k}_1 + \mathbf{G}|^2} \ \rho_{\begin{array}{l} n_2 \mathbf{k}_1 + \mathbf{q} \\ n_4 \mathbf{k}_3 + \mathbf{q} \end{array}} (\mathbf{G}^{\prime}) 2. Diagonalize `\mathcal{H}_{SS^{\prime}}` with the eigenvalues `E_{\lambda}` and eigenvectors `A_{\lambda}` given by .. math:: \mathcal{H} A_{\lambda} = E_{\lambda} A_{\lambda} and the overlap matrix `N_{\lambda \lambda^{\prime} }` defined by .. math:: N_{\lambda \lambda^{\prime}} \equiv \sum_{S} [A_{\lambda}^{S}]^{\ast} A_{\lambda^{\prime}}^{S} The eigenvalues `E_{\lambda}`, which correpond to the poles of `\chi`, give the excitation energies of the elementary electron excitations. 3. The spectra (both EELS and ABS) are calculated by .. math:: -\frac{4\pi}{|\mathbf{q}|^2} \mathrm{Im} \chi_{00}(\mathbf q, \omega) = - \frac{4\pi}{|\mathbf{q}|^2 \Omega} \sum_{\lambda \lambda^{\prime}} \sum_{SS^{\prime}} \frac{ f_S A^{S}_{\lambda} A^{S^{\prime}}_{\lambda^{\prime}} N^{-1}_{\lambda \lambda^{\prime}}}{E_{\lambda} - \omega} \ \rho_S(0) \rho_{S^{\prime}}(0) Tamm-Dancoff approximation ========================== The Tamm-Dancoff approximation corresponds to `\epsilon_S >= 0` in Eq. :eq:`epsilon_S`. .. [#Review] G. Onida, L. Reining and A. Rubio, Electronic excitations: density-functional versus many-body Green's-function approaches, *Rev. Mod. Phys.* **74**, 601 (2002) gpaw-24.1.0/doc/documentation/cdft/000077500000000000000000000000001454550013000170675ustar00rootroot00000000000000gpaw-24.1.0/doc/documentation/cdft/He2.py000066400000000000000000000045771454550013000200740ustar00rootroot00000000000000from ase import Atoms from gpaw import GPAW, FermiDirac, Davidson, Mixer from gpaw.cdft.cdft import CDFT from gpaw.cdft.cdft_coupling import CouplingParameters # Set up the system distance = 2.5 sys = Atoms('He2', positions=([0., 0., 0.], [0., 0., distance])) sys.center(3) sys.set_pbc(False) sys.set_initial_magnetic_moments([0.5, 0.5]) # Calculator for the initial state calc_a = GPAW( h=0.2, mode='fd', basis='dzp', charge=1, xc='PBE', symmetry='off', occupations=FermiDirac(0., fixmagmom=True), eigensolver=Davidson(3), spinpol=True, # only spin-polarized calculations are supported nbands=4, mixer=Mixer(beta=0.25, nmaxold=3, weight=100.0), txt=f'He2+_initial_{distance:3.2f}.txt', convergence={ 'eigenstates': 1.0e-4, 'density': 1.0e-1, 'energy': 1e-1, 'bands': 4}) # Set initial state cdft cdft_a = CDFT( calc=calc_a, atoms=sys, charge_regions=[[0]], # choose atom 0 as the constrained region charges=[1], # constrain +1 charge charge_coefs=[2.7], # initial guess for Vc method='L-BFGS-B', # Vc optimization method txt=f'He2+_initial_{distance:3.2f}.cdft', # cDFT output file minimizer_options={'gtol': 0.01}) # tolerance for cdft # Get cdft energy sys.calc = cdft_a sys.get_potential_energy() # the same for the final state calc_b = GPAW(h=0.2, mode='fd', basis='dzp', charge=1, xc='PBE', symmetry='off', occupations=FermiDirac(0., fixmagmom=True), eigensolver=Davidson(3), spinpol=True, nbands=4, mixer=Mixer(beta=0.25, nmaxold=3, weight=100.0), txt=f'He2+_final_{distance:3.2f}.txt', convergence={ 'eigenstates': 1.0e-4, 'density': 1.0e-1, 'energy': 1e-1, 'bands': 4}) cdft_b = CDFT( calc=calc_b, atoms=sys, charge_regions=[[1]], # choose atom 1 charges=[1], # constrained charge +1 charge_coefs=[2.7], method='L-BFGS-B', txt=f'He2+_final_{distance:3.2f}.cdft', minimizer_options={'gtol': 0.01}) sys.calc = cdft_b sys.get_potential_energy() # Now for the coupling parameter coupling = CouplingParameters(cdft_a, cdft_b, AE=False) # use pseudo orbitals H12 = coupling.get_coupling_term() # use original cDFT method gpaw-24.1.0/doc/documentation/cdft/agts.py000066400000000000000000000001141454550013000203730ustar00rootroot00000000000000from myqueue.workflow import run def workflow(): run(script='He2.py') gpaw-24.1.0/doc/documentation/cdft/cdft.rst000066400000000000000000000211551454550013000205450ustar00rootroot00000000000000.. module:: gpaw.cdft .. _constrained_DFT: ====================== Constrained DFT (cDFT) ====================== Introduction ============ cDFT is a method for build charge/spin localized or diabatic states with a user-defined charge and/or spin state. As such cDFT, is a useful tool for widening the scope of ground-state DFT to excitation processes, correcting for self- interaction energy in current DFT functionals, excitation energy, and electron transfer as well as parametrizing model Hamiltonians, for example. Theoretical Background ====================== The general cDFT methodology is reviewed in [#cdft1]_ and the publication of the GPAW implementation is available in [#cdft2]_. In short, the cDFT works by specifying an additional constraining term to the KS functional. The role of this constraint is to enforce a user specified charge/spin state (`N_c`) on the chosen regions of atoms. The constrained regions are specified by a weight function `w(\mathbf{r})` and the strength of the constraining potential acting on the specified region is `V_c`. With these definitions the new energy functional with the constraint is .. math:: F[n(\mathbf{r}), V_c] = E^{KS}[n] + \sum_c V_c \sum_{\sigma}\left(\int d\mathbf{r}w_c^{\sigma}(\mathbf{r}) n^{\sigma}(\mathbf{r})-N_c\right) where `E^{KS}` is the Kohn-Sham energy, `c` specifies the region, and `\sigma` is the spin variable. It is also seen that `V_c` is also a Lagrange multiplier. The modified energy functional leads to an additional external potential .. math:: v_{\rm eff}^{\sigma}=\dfrac{\delta E^{KS}[n(\mathbf{r})]} {\delta n(\mathbf{r})} + \sum_c V_cw_c^{\sigma}(\mathbf{r}) This is just a sum of the usual KS potential and the constraining potential which is also used in the self-consistent calculation. The constraint is further enforced by introducing the convergence criteria .. math:: C \geq \bigg\lvert \sum_{\sigma} \int {\rm d}\mathbf{r} w_c^{\sigma}(\mathbf{r})n^{\sigma}(\mathbf{r}) - N_c \bigg\rvert \, ,\forall\, c The `V_c` is self-consistently optimized so that the specified constraints are satisfied. Formally, this is written as .. math:: F\left[V_{c}\right]= \min _{n} \max _{\left\{V_{c}\right\}} \left[E^{\mathrm{KS}}[n(\mathbf{r})]+ \sum_{c} V_{c}\left(\int \mathrm{d} \mathbf{r} w_{c}(\mathbf{r}) n(\mathbf{r})-N_{c}\right)\right] `V_c` is obtained from .. math:: \frac{\mathrm{d} F}{\mathrm{d} V_{c}} = \int \mathrm{d} \mathbf{r} w_{c}(\mathbf{r}) n(\mathbf{r})-N_{c}=0 In the end, one ends up with a modified electron/spin density localized on the chosen atoms. Notes on using cDFT =================== The weight function ------------------- In the GPAW implementation a Hirschfeld partition scheme with atom-centered Gaussian functions is used. These Gaussian have two tunable parameters: the cut-off `R_c` and the width `\mu`. If the constrained region cuts a covalent bond, the results are sensitive to width parameter. There is no universal guide for choosing the width in such cases. A sensible choice is to compute match the Gaussian-Hirschfeld charges with e.g. Bader charges. The function :meth:`~gpaw.cdft.cdft.CDFT.get_number_of_electrons_on_atoms` helps in this process. .. autoclass:: gpaw.cdft.cdft.CDFT :members: Optimizing `V_c` ---------------- Updating and optimizing the Lagrange multipliers `V_c` is achieved using Scipy optimization routines. As it is easy to compute the gradient of the energy wrt `V_c`, gradient based optimizers are recommended. The best performing optimizer seems to be the :literal:`L-BFGS-B`. The accuracy of the optimization is controlled mainly by the :literal:`minimizer_options={'gtol':0.01})` parameter which measurest the error between the set and computed charge/spin value. A typical :literal:`gtol` value is around 0.01-0.1. Choosing the constraint values and regions ------------------------------------------ Both charge and spin constraints can be specified. The charge constraints are specified to neutral atoms: specifying :literal:`charges = [1]` constrains the first regions to have a net charge of +1. Note, that if the usual DFT calculation yields e.g. a Fe ion with charge +2.5, specifying the charge constraint +1 will result in +1, not an additional hole on Fe with a charge state +3.5! A constrained regions may contain several atoms and also several constrained regions can be specified. However, converging more than two regions is usually very difficult. Tips for converging cDFT calculations ------------------------------------- Unfortunaly, the cDFT sometimes exhibits poor convergence. 1. Choose a meaningful constraints and regions 2. Try to provide a good initial guess for the `V_c`. In the actual calculation this initial guess given by the parameter :literal:`charge_coefs` or :literal:`spin_coefs`. 3. Use L-BFGS-B to set bounds for `V_c` by using :literal:`minimizer_options={'bounds':[min,max]})`. 4. Converge the underlying DFT calculation well i.e. use tight convergence criteria. Constructing diabatic Hamiltonians ================================== One of the main uses for cDFT is constructing diabatic Hamiltonians which utilize cDFT states as the diabats. For instance, a 2x2 Hamiltonian matrix would have diagonal elements `H_{11}` and `H_{22}` as well as off-diagonals `H_{12}` and `H_{21}`. `H_{11}` and `H_{22}` values are given directly by cDFT energies (not cDFT free energies). The off-diagonal coupling elements `H_{12}` and `H_{21}` are also needed and often utilized in e.g. Configuration Interaction-cDFT, in parametrizing model Hamiltonians or in computing non-adiabatic charge transfer rates. Note, that all parameters need to be computed at the same geometry. The coupling elements are computed using the CouplingParameters class. There are several options and optional inputs. There are two main methods for computing coupling elements: the original cDFT approach from [#cdft1]_ and a more general overlap or Migliore method detailed in [#cdft3]_. cDFT coupling constant ---------------------- This method is the original approach in the context of cDFT. It works well for simple system. For complex system it becomes quite sensitive to small errors in the Lagrange multipliers and constraints, and the overlap method is recommended instead. The inputs for the calculation are two cDFT objects with different constraints. The coupling constant is computed using :literal:`get_coupling_term`. Overlap coupling constant ------------------------- This approach has been found to perform very well for a range of systems. However, it has one major limitation: it can only be used if the two diabatic states have different energies. In addition to the two cDFT states/objects also a normal DFT calculator without any constraints is needed. The coupling constant is computed using :literal:`get_migliore_coupling`. Additional comments ------------------- In [#cdft2]_ the coupling constants were computed by computing all-electron wave functions on a grid. However, this is quite slow and much faster implementation utilizing only pseudo wave functions and atomic corrections has been added. For the tested cases both give equally good values for the coupling constant. Hence, it is recommended to use the pseudo wave functions which is set by :literal:`AE=False`. The quantities needed for computing the coupling constants can be parallellized only over the grid. Example of hole transfer in `He_2^+` ==================================== Both the cDFT calculation and extraction of the coupling element calculation are demonstrated for the simple `He_2^+` system. .. literalinclude:: He2.py The most important cDFT results are found in the .cdft files. For instance, the errors, iterations, and cDFT parameters are shown. Also, the energy can be found in this file. The most relevant energy is the Final cDFT Energy (not the free energy). References ========== .. [#cdft1] B. Kaduk, T. Kowalczyk, T. Van Voorhis, :doi:`Constrained Density Functional Theory <10.1021/cr200148b>`, *Chem. Rev.*, **112** 321–370 (2012) .. [#cdft2] M. Melander, E. Jońsson, J.J. Mortensen, T. Vegge,J.M. Garcia-Lastra, :doi:`Implementation of Constrained DFT for Computing Charge Transfer Rates within the Projector Augmented Wave Method <10.1021/acs.jctc.6b00815>`, *J. Chem. Theory Comput.*, **12**, 5367−5378 (2016) .. [#cdft3] A. Migliore, :doi:`Nonorthogonality Problem and Effective Electronic Coupling Calculation: Application to Charge Transfer in π-Stacks Relevant to Biochemistry and Molecular Electronics <10.1021/ct200192d>`, *J. Chem. Theory Comput.*, **7**, 1712-1725 (2011) gpaw-24.1.0/doc/documentation/citations.py000066400000000000000000000065371454550013000205310ustar00rootroot00000000000000# creates: citations.png import os import datetime import matplotlib.pyplot as plt months = ['JAN', 'FEB', 'MAR', 'APR', 'MAY', 'JUN', 'JUL', 'AUG', 'SEP', 'OCT', 'NOV', 'DEC'] def f(filename): papers = {} lines = open(filename).readlines() n = 0 dois = set() while n < len(lines): line = lines[n] tag = line[:2] if tag == 'TI': ntitle = n y = None m = 1 d = 15 elif tag == 'SO': title = ' '.join(lines[i][3:-1] for i in range(ntitle, n)) elif tag == 'DI': doi = line[3:-1] elif tag == 'PY': y = int(line.split()[1]) elif tag == 'PD': for w in line.split()[1:]: if w[0].isdigit(): w = int(w) if w < 100: d = w else: y = w else: if '-' in w: w = w.split('-')[-1] m = months.index(w) + 1 elif tag == '\n' and y is not None: date = datetime.date(y, m, d) if doi not in dois: dois.add(doi) papers[doi] = (date, title) n += 1 return papers # The papers here are: label_bib = { 'gpaw1': 'Mortensen et al., Phys. Rev. B (2005)', # http://doi.org/10.1103/PhysRevB.71.035109 'gpaw2': 'Enkovaara et al., J. Phys.: Condens. Matter (2010)', # http://doi.org/10.1088/0953-8984/22/25/253202 'lcao': 'Larsen et al., Phys. Rev. B (2009)', # http://doi.org/10.1103/PhysRevB.80.195112 'tddft': 'Walter et al., J. Chem. Phys. (2008)', # http://doi.org/10.1063/1.2943138 'response': 'Yan et al., Phys. Rev. B (2011)', # http://doi.org/10.1103/PhysRevB.83.245122 } plt.figure(figsize=(8, 4)) total = {} # for bib in ['gpaw1', 'tddft', 'lcao', 'gpaw2', 'response']: for bib in ['gpaw1', 'gpaw2']: papers = {} for line in open(bib + '.txt'): date, doi, title = line.split(' ', 2) papers[doi] = (datetime.date(*[int(x) for x in date.split('-')]), title.strip()) if os.path.isfile(bib + '.bib'): papers.update(f(bib + '.bib')) papers = sorted((papers[doi][0], doi, papers[doi][1]) for doi in papers) plt.plot([paper[0] for paper in papers], range(1, len(papers) + 1), '-o', label=label_bib[bib]) with open(bib + '.txt', 'w') as fd: for date, doi, title in papers: fd.write('%d-%02d-%02d %s %s\n' % (date.year, date.month, date.day, doi, title)) # assert '"' not in title, title total[doi] = (date, title) x = dict([(p[1], 0) for p in papers]) print((bib, len(papers), len(x), len(total))) allpapers = sorted((paper[0], doi, paper[1]) for doi, paper in total.items()) plt.plot([paper[0] for paper in allpapers], range(1, len(allpapers) + 1), '-o', label='Total') if 0: with open('citations.csv', 'w') as fd: n = len(allpapers) for date, doi, title in allpapers[::-1]: fd.write('%d,":doi:`%s <%s>`"\n' % (n, title, doi)) n -= 1 plt.xlabel('date') plt.ylabel('number of citations') plt.legend(loc='upper left') plt.savefig('citations.png') gpaw-24.1.0/doc/documentation/cmdline.rst000066400000000000000000000063151454550013000203210ustar00rootroot00000000000000.. program:: gpaw .. highlight:: bash .. index:: gpaw, command line interface, CLI .. _cli: ====================== Command line interface ====================== GPAW has a command line tool called :program:`gpaw` with the following sub-commands: ============== ===================================================== sub-command description ============== ===================================================== help Help for sub-command run Run calculation with GPAW info Show versions of GPAW and its dependencies dos Calculate (projected) density of states from gpw-file gpw Write summary of GPAW-restart file completion Add tab-completion for Bash atom Solve radial equation for an atom python Run GPAW's parallel Python interpreter sbatch Submit a GPAW Python script via sbatch dataset Calculate density of states from gpw-file symmetry Analyse symmetry (and show IBZ **k**-points) install-data Install PAW datasets, pseudopotential or basis sets ============== ===================================================== Example:: $ gpaw info Help ==== You can do:: $ gpaw --help $ gpaw sub-command --help to get help (or ``-h`` for short). Other command-line tools ======================== There are also CLI tools for: ===================================== ============================ description module ===================================== ============================ analysing :ref:`point groups` :mod:`gpaw.point_groups` :ref:`hyperfine` :mod:`gpaw.hyperfine` :ref:`fulldiag` :mod:`gpaw.fulldiag` Calculation of dipole matrix elements :mod:`gpaw.utilities.dipole` PAW-dataset convergence :mod:`gpaw.utilities.ekin` :ref:`elph` ``gpaw.elph.gpts`` ===================================== ============================ Try:: $ python3 -m --help .. module:: gpaw.fulldiag .. _fulldiag: Finding all or some unocupied states ------------------------------------ If you have a gpw-file containing the ground-state density for a plane-wave calculation, then you can set up the full `H_{\mathbf{G}\mathbf{G}'}(\mathbf{k})` and `S_{\mathbf{G}\mathbf{G}'}(\mathbf{k})` matrices in your plane-wave basis and use direct diagonalization to find all the eigenvalues and eigenstates in one step. Usage:: $ python3 -m gpaw.fulldiag [options] Options: -h, --help Show this help message and exit -n BANDS, --bands=BANDS Number of bands to calculate. Defaults to all. -s SCALAPACK, --scalapack=SCALAPACK Number of cores to use for ScaLapack. Default is one. -d, --dry-run Just write out size of matrices. Typpically, you will want to run this in parallel and distribute the matrices using ScaLapack:: $ gpaw -P 8 python -m gpaw.fulldiag abc.gpw --scalapack=8 ... .. _bash completion: Bash completion =============== You can enable bash completion like this:: $ gpaw completions This will append a line like this:: complete -o default -C /path/to/gpaw/gpaw/cli/complete.py gpaw to your ``~/.bashrc``. gpaw-24.1.0/doc/documentation/co_wavefunctions.py000066400000000000000000000045351454550013000221040ustar00rootroot00000000000000# creates: 2sigma.png, co_wavefunctions.png import numpy as np import matplotlib.pyplot as plt from ase import Atoms from ase.units import Bohr from gpaw import GPAW from gpaw.spherical_harmonics import Y L = 6.0 d = 1.13 co = Atoms('CO', cell=[L, L, L], positions=[(0, 0, 0), (d, 0, 0)]) co.center() co.calc = GPAW(mode='lcao', txt='CO.txt') e = co.get_potential_energy() print(co.positions[:, 0] - L / 2) dpi = 100 C = 'g' N = 100 for a, pp in enumerate(co.calc.wfs.setups): rc = max(pp.rcut_j) print(pp.rcut_j) x = np.linspace(-rc, rc, 2 * N + 1) P_i = co.calc.wfs.kpt_qs[0][0].projections[a][1] / Bohr**1.5 phi_i = np.empty((len(P_i), len(x))) phit_i = np.empty((len(P_i), len(x))) i = 0 for l, phi_g, phit_g in zip(pp.l_j, pp.data.phi_jg, pp.data.phit_jg): f = pp.rgd.spline(phi_g, rc + 0.3, l).map(x[N:]) * x[N:]**l ft = pp.rgd.spline(phit_g, rc + 0.3, l).map(x[N:]) * x[N:]**l for m in range(2 * l + 1): ll = l**2 + m phi_i[i, N:] = f * Y(ll, 1, 0, 0) phi_i[i, N::-1] = f * Y(ll, -1, 0, 0) phit_i[i, N:] = ft * Y(ll, 1, 0, 0) phit_i[i, N::-1] = ft * Y(ll, -1, 0, 0) i += 1 x0 = co.positions[a, 0] - L / 2 symbol = co.symbols[a] print(symbol, x0, rc) plt.plot([x0], [0], 'o', ms=dpi * rc * 2 / 2.33 * 1.3 * Bohr, mfc='None', label='_nolegend_') plt.plot(x * Bohr + x0, P_i.dot(phit_i), C + '-', lw=1, label=r'$\tilde{\psi}^%s$' % symbol) plt.plot(x * Bohr + x0, P_i.dot(phi_i), C + '-', lw=2, label=r'$\psi^%s$' % symbol) C = 'r' psit = co.calc.get_pseudo_wave_function(band=1) n = len(psit) psit2 = psit[:, :, n // 2] psit1 = psit2[:, n // 2] x = np.linspace(-L / 2, L / 2, n, endpoint=False) plt.plot(x, psit1, 'bx', mew=2, label=r'$\tilde{\psi}$') plt.legend(loc='best') plt.xlabel('x [Å]') plt.ylabel(r'$\psi$') plt.ylim(ymin=-2, ymax=2) # plt.show() plt.savefig('co_wavefunctions.png', dpi=dpi) fig = plt.figure() ax = fig.add_subplot(111) m = abs(psit2).max() * 1.1 cax = ax.contour(x, x, psit2.T, np.linspace(-m, m, 31)) ax.text(-d / 2, 0, 'C', ha='center', va='center') ax.text(d / 2, 0, 'O', ha='center', va='center') cbar = fig.colorbar(cax) ax.set_xlabel('x (Angstrom)') ax.set_ylabel('y (Angstrom)') # plt.show() fig.savefig('2sigma.png') gpaw-24.1.0/doc/documentation/codegraph.py000066400000000000000000000001631454550013000204550ustar00rootroot00000000000000# creates: code.svg, abc.svg, acf.svg, da.svg, builder.svg, aa.svg from gpaw.doctools.codegraph import main main() gpaw-24.1.0/doc/documentation/convergence.rst000066400000000000000000000056541454550013000212110ustar00rootroot00000000000000.. _convergence: ================== Convergence Issues ================== *Try to use default parameters for the calculator. Simple and often useful.* Here you find a list of suggestions that should be considered when encountering convergence problems: 1) Make sure the geometry and spin-state is physically sound. Remember that ASE uses Ångström and not Bohr or nm. For spin polarized systems, make sure you have sensible initial magnetic moments. Don't do spin-paired calculations for molecules with an odd number of electrons. Before performing calculations of isolated atoms see :ref:`atomization_energy`. 2) Use less aggressive density mixing. Try something like ``mixer=Mixer(0.02, 5, 100)`` or ``mixer=MixerSum(0.02, 5, 100)``, ``mixer=MixerDif(0.02, 5, 100)`` for spin-polarized calculations and remember to import the mixer classes:: from gpaw import Mixer, MixerSum, MixerDif For some systems (for example transition metal atoms) it is helpful to reduce the number of history steps in the mixer to ``1`` (instead of ``5``). 3) Solve the eigenvalue problem more accurately at each scf-step. Import the Davidson eigensolver:: from gpaw import Davidson and increase the number iterations per scf-step ``eigensolver=Davidson(3)``. CG eigensolver tends converge fastest the unoccupied bands ``eigensolver='cg'``. 4) Use a smoother distribution function for the occupation numbers. Remember that for systems without periodic boundary conditions (molecules) the :xkcd:`Fermi temperature <2780>` is set to zero by default. You might want to specify a finite Fermi temperature as described :ref:`here ` and check the convergence of the results with respect to the temperature! 5) Try adding more empty states. If you are specifying the :ref:`number of bands ` manually, try to increase the number of empty states. You might also let GPAW choose the default number, which is in general large enough. 6) Use enough k-points. Try something like ``kpts={'density': 3.5, 'even': True}`` (see :ref:`manual_kpts`). 7) Don't let your structure optimization algorithm take too large steps. 8) Better initial guess for the wave functions. The initial guess for the wave functions is always calculated using the LCAO scheme, with a default single-zeta basis, i.e. one orbital for each valence electron. It is possible to use ``basis='szp(dzp)'`` to extract the single-zeta polarization basis set from the double-zeta polarization basis sets that are distributed together with the latest PAW datasets. You can also try to make a better initial guess by enlarging the :ref:`manual_basis`. Note that you first need to generate the basis file, as described in :ref:`LCAO mode `. Warning: this may in some cases worsen the convergence, and improves it usually only when the number of empty states is significantly increased. gpaw-24.1.0/doc/documentation/core.rst000066400000000000000000000254701454550013000176410ustar00rootroot00000000000000============================== Introduction to GPAW internals ============================== .. testsetup:: from gpaw.fftw import * from gpaw.core.matrix import * from gpaw.core.atom_arrays import * This guide will contain graphs showing the relationship between objects that build up a DFT calculation engine. .. hint:: Here is a simple graph showing the relations between the classes ``A``, ``B`` and ``C``: .. image:: abc.svg Here, the object of type ``A`` has an attribute ``a`` of type ``int`` and an attribute ``b`` of type ``B`` or ``C``, where ``C`` inherits from ``B``. .. contents:: DFT components ============== The components needed for a DFT calculation are created by a "builder" that can be made with the :func:`~gpaw.new.builder.builder` function, an ASE :class:`ase.Atoms` object and some input parameters: >>> from ase import Atoms >>> atoms = Atoms('Li', cell=[2, 2, 2], pbc=True) >>> from gpaw.new.builder import builder >>> params = {'mode': 'pw', 'kpts': (5, 5, 5)} >>> b = builder(atoms, params) .. image:: builder.svg As seen in the figure above, there are builders for each of the modes: PW, FD and LCAO (builders for TB and ATOM modes are not shown). The :class:`~gpaw.new.input_parameters.InputParameters` object takes care of user parameters: * checks for errors * does normalization * handles backwards compatibility and deprecation warnings Normally, you will not need to create a DFT-components builder yourself. It will happen automatically when you create a DFT-calculation object like this: >>> from gpaw.new.calculation import DFTCalculation >>> calculation = DFTCalculation.from_parameters(atoms, params) or when you create an ASE-calculator interface: >>> from gpaw.new.ase_interface import GPAW >>> atoms.calc = GPAW(**params, txt='li.txt') Full picture ============ The :class:`ase.Atoms` object has an :class:`gpaw.new.ase_interface.ASECalculator` object attached created with the :func:`gpaw.new.ase_interface.GPAW` function: >>> atoms = Atoms('H2', ... positions=[(0, 0, 0), (0, 0, 0.75)], ... cell=[2, 2, 3], ... pbc=True) >>> atoms.calc = GPAW(mode='pw', txt='h2.txt') >>> atoms.calc ASECalculator(mode: {'name': 'pw'}) The ``atoms.calc`` object manages a :class:`gpaw.new.calculation.DFTCalculation` object that does the actual work. When we do this: >>> e = atoms.get_potential_energy() the :meth:`gpaw.new.ase_interface.ASECalculator.get_potential_energy` method gets called (``atoms.calc.get_potential_energy(atoms)``) and the following will happen: * create :class:`gpaw.new.calculation.DFTCalculation` object if not already done * update positions/unit cell if they have changed * start SCF loop and converge if needed * calculate energy * store a copy of the atoms .. image:: code.svg DFT-calculation object ====================== .. module:: gpaw.core An instance of the :class:`gpaw.new.calculation.DFTCalculation` class has the following attributes: .. list-table:: * - ``state`` - :class:`gpaw.new.calculation.DFTState` * - ``scf_loop`` - :class:`gpaw.new.scf.SCFLoop` * - ``pot_calc`` - :class:`gpaw.new.pot_calc.PotentialCalculator` and a the :class:`gpaw.new.calculation.DFTState` object has these attributes: .. list-table:: * - ``density`` - :class:`gpaw.new.density.Density` * - ``ibzwfs`` - :class:`gpaw.new.ibzwfs.IBZWaveFunctions` * - ``potential`` - :class:`gpaw.new.potential.Potential` Naming convention for arrays ============================ Commonly used indices: ======= ==================================================================== index description ======= ==================================================================== ``a`` Atom number ``c`` Unit cell axis-index (0, 1, 2) ``v`` *xyz*-index (0, 1, 2) ``K`` BZ **k**-point index ``k`` IBZ **k**-point index ``q`` IBZ **k**-point index (local, i.e. it starts at 0 on each processor) ``s`` Spin index (`\sigma`) ``s`` Symmetry index ``u`` Combined spin and **k**-point index (local) ``R`` Three indices into the coarse 3D grid ``r`` Three indices into the fine 3D grid ``G`` Index of plane-wave coefficient (wave function expansion, ``ecut``) ``g`` Index of plane-wave coefficient (densities, ``2 * ecut``) ``h`` Index of plane-wave coefficient (compensation charges, ``8 * ecut``) ``X`` ``R`` or ``G`` ``x`` ``r``, ``g`` or ``h`` ``x`` Zero or more extra dimensions ``M`` LCAO orbital index (`\mu`) ``n`` Band number ``n`` Principal quantum number ``l`` Angular momentum quantum number (s, p, d, ...) ``m`` Magnetic quantum number (0, 1, ..., 2*`ell` - 1) ``L`` ``l`` and ``m`` (``L = l**2 + m``) ``j`` Valence orbital number (``n`` and ``l``) ``i`` Valence orbital number (``n``, ``l`` and ``m``) ``q`` ``j1`` and ``j2`` pair ``p`` ``i1`` and ``i2`` pair ``r`` CPU-rank ======= ==================================================================== Examples: .. list-table:: * - ``density.D_asii`` - `D_{\sigma,i_1,i_2}^a` - :class:`~atom_arrays.AtomArrays` * - ``density.nt_sR`` - `\tilde{n}_\sigma(\mathbf{r})` - :class:`~UGArray` * - ``ibzwfs.wfs_qs[q][s].P_ain`` - `P_{\sigma \mathbf{k} in}^a` - :class:`~atom_arrays.AtomArrays` * - ``ibzwfs.wfs_qs[q][s].psit_nX`` - `\tilde{\psi}_{\sigma \mathbf{k} n}(\mathbf{r})` - :class:`~UGArray` | :class:`~PWArray` * - ``ibzwfs.wfs_qs[q][s].pt_aX`` - `\tilde{p}_{\sigma \mathbf{k} i}^a(\mathbf{r}-\mathbf{R}^a)` - :class:`~atom_centered_functions.AtomCenteredFunctions` Domain descriptors ================== GPAW has two different container types for storing one or more functions in a unit cell (wave functions, electron densities, ...): * :class:`~PWArray` * :class:`UGArray` .. image:: da.svg Uniform grids ------------- A uniform grid can be created with the :class:`UGDesc` class: >>> import numpy as np >>> from gpaw.core import UGDesc >>> a = 4.0 >>> n = 20 >>> grid = UGDesc(cell=a * np.eye(3), ... size=(n, n, n)) Given a :class:`UGDesc` object, one can create :class:`UGArray` objects like this >>> func_R = grid.empty() >>> func_R.data.shape (20, 20, 20) >>> func_R.data[:] = 1.0 >>> grid.zeros((3, 2)).data.shape (3, 2, 20, 20, 20) Here are the methods of the :class:`UGDesc` class: .. csv-table:: :file: ugd.csv and the :class:`UGArray` class: .. csv-table:: :file: uga.csv Plane waves ----------- A set of plane-waves are characterized by a cutoff energy and a uniform grid: >>> from gpaw.core import PWDesc >>> pw = PWDesc(ecut=100, cell=grid.cell) >>> func_G = pw.empty() >>> func_R.fft(out=func_G) PWArray(pw=PWDesc(ecut=100 , cell=[4.0, 4.0, 4.0], pbc=[True, True, True], comm=0/1, dtype=float64), dims=()) >>> G = pw.reciprocal_vectors() >>> G.shape (1536, 3) >>> G[0] array([0., 0., 0.]) >>> func_G.data[0] (1+0j) >>> func_G.ifft(out=func_R) UGArray(grid=UGDesc(size=[20, 20, 20], cell=[4.0, 4.0, 4.0], pbc=[True, True, True], comm=0/1, dtype=float64), dims=()) >>> round(func_R.data[0, 0, 0], 15) 1.0 Here are the methods of the :class:`~PWDesc` class: .. csv-table:: :file: pwd.csv and the :class:`~PWArray` class: .. csv-table:: :file: pwa.csv Atoms-arrays ============ .. image:: aa.svg Block boundary conditions ========================= ... Matrix elements =============== >>> psit_nG = pw.zeros(5) >>> def T(psit_nG): ... """Kinetic energy operator.""" ... out = psit_nG.new() ... out.data[:] = psit_nG.desc.ekin_G * psit_nG.data ... return out >>> H_nn = psit_nG.matrix_elements(psit_nG, function=T) Same as: >>> Tpsit_nG = T(psit_nG) >>> psit_nG.matrix_elements(Tpsit_nG, symmetric=True) Matrix(float64: 5x5) but faster. Atom-centered functions ======================= .. image:: acf.svg .. literalinclude:: acf_example.py .. figure:: acf_example.png Matrix object ============= .. module:: gpaw.core.matrix Here are the methods of the :class:`~Matrix` class: .. csv-table:: :file: m.csv A simple example that we can run with MPI on 4 cores:: from gpaw.core.matrix import Matrix from gpaw.mpi import world a = Matrix(5, 5, dist=(world, 2, 2, 2)) a.data[:] = world.rank print(world.rank, a.data.shape) Here, we have created a 5x5 :class:`Matrix` of floats distributed on a 2x2 BLACS grid with a block size of 2 and we then print the shapes of the ndarrays, which looks like this (in random order):: 1 (2, 3) 2 (3, 2) 3 (2, 2) 0 (3, 3) Let's create a new matrix ``b`` and :meth:`redistribute ` from ``a`` to ``b``:: b = a.new(dist=(None, 1, 1, None)) a.redist(b) if world.rank == 0: print(b.array) This will output:: [[ 0. 0. 2. 2. 0.] [ 0. 0. 2. 2. 0.] [ 1. 1. 3. 3. 1.] [ 1. 1. 3. 3. 1.] [ 0. 0. 2. 2. 0.]] Matrix-matrix multiplication works like this:: c = a.multiply(a, opb='T') API === Core ---- .. autoclass:: gpaw.core.UGDesc :members: :undoc-members: .. autoclass:: gpaw.core.PWDesc :members: :undoc-members: .. autoclass:: gpaw.core.atom_centered_functions.AtomCenteredFunctions :members: :undoc-members: .. autoclass:: gpaw.core.UGArray :members: :undoc-members: .. autoclass:: gpaw.core.arrays.DistributedArrays :members: :undoc-members: .. autoclass:: gpaw.core.atom_arrays.AtomArrays :members: :undoc-members: .. autoclass:: gpaw.core.atom_arrays.AtomArraysLayout :members: :undoc-members: .. autoclass:: gpaw.core.atom_arrays.AtomDistribution :members: :undoc-members: .. autoclass:: gpaw.core.PWArray :members: :undoc-members: .. autoclass:: gpaw.core.plane_waves.Empty :members: :undoc-members: .. autoclass:: Matrix :members: :undoc-members: .. autoclass:: MatrixDistribution :members: :undoc-members: DFT --- .. autoclass:: gpaw.new.calculation.DFTCalculation :members: :undoc-members: .. autoclass:: gpaw.new.calculation.DFTState :members: :undoc-members: .. autoclass:: gpaw.new.density.Density :members: :undoc-members: .. autofunction:: gpaw.new.builder.builder .. autoclass:: gpaw.new.ibzwfs.IBZWaveFunctions :members: :undoc-members: .. autoclass:: gpaw.new.potential.Potential :members: :undoc-members: .. autoclass:: gpaw.new.pot_calc.PotentialCalculator :members: :undoc-members: .. autoclass:: gpaw.new.scf.SCFLoop :members: :undoc-members: .. autoclass:: gpaw.new.input_parameters.InputParameters :members: :undoc-members: .. autoclass:: gpaw.new.pwfd.wave_functions.PWFDWaveFunctions :members: :undoc-members: .. autoclass:: gpaw.new.ase_interface.ASECalculator :members: :undoc-members: .. autofunction:: gpaw.new.ase_interface.GPAW FFTW ---- .. automodule:: gpaw.fftw :members: BLAS ---- .. autofunction:: gpaw.utilities.blas.mmm .. autofunction:: gpaw.utilities.blas.rk gpaw-24.1.0/doc/documentation/create_csv_files.py000066400000000000000000000014601454550013000220220ustar00rootroot00000000000000# creates: ugd.csv, uga.csv, pwd.csv, pwa.csv, m.csv from gpaw.core import PWArray, PWDesc, UGArray, UGDesc from gpaw.core.matrix import Matrix for cls in [UGDesc, PWDesc, UGArray, PWArray, Matrix]: name = ''.join(x for x in cls.__name__ if x.isupper()).lower() mod = cls.__module__ mod = mod.replace('.plane_waves', '') mod = mod.replace('.uniform_grid', '') print(name, mod) with open(f'{name}.csv', 'w') as fd: for name, meth in cls.__dict__.items(): if name[0] != '_': try: doc = meth.__doc__.splitlines()[0] except AttributeError: doc = '...' print(f':meth:`~{mod}.{cls.__name__}.{name}`, "{doc}"', file=fd) gpaw-24.1.0/doc/documentation/cu_agts.py000066400000000000000000000012321454550013000201440ustar00rootroot00000000000000from myqueue.workflow import run def workflow(): with run(script='cu_calc.py', cores=4, tmax='1h'): run(script='cu_plot.py') run(function=check) def check(): import numpy as np from ase.io import read energies = [] k = 20 for name in ['ITM', 'FD-0.05', 'MV-0.2']: e = read(f'Cu-{name}-{k}.txt').get_potential_energy() energies.append(e) # Extrapolate TM: e19 = read('Cu-TM-19.txt').get_potential_energy() e20 = read('Cu-TM-20.txt').get_potential_energy() e = np.polyval(np.polyfit([20**-2, 19**-2], [e20, e19], 1), 0) energies.append(e) assert max(energies) - min(energies) < 0.001 gpaw-24.1.0/doc/documentation/cu_calc.py000066400000000000000000000012511454550013000201110ustar00rootroot00000000000000from ase.build import bulk from gpaw import GPAW, PW cu = bulk('Cu', 'fcc', a=3.6) for smearing in [{'name': 'improved-tetrahedron-method'}, {'name': 'tetrahedron-method'}, {'name': 'fermi-dirac', 'width': 0.05}, {'name': 'marzari-vanderbilt', 'width': 0.2}]: name = ''.join(word[0].upper() for word in smearing['name'].split('-')) width = smearing.get('width') if width: name += f'-{width}' for k in range(8, 21): cu.calc = GPAW( mode=PW(400), kpts=(k, k, k), occupations=smearing, txt=f'Cu-{name}-{k}.txt') e = cu.get_potential_energy() gpaw-24.1.0/doc/documentation/cu_plot.py000066400000000000000000000013651454550013000201730ustar00rootroot00000000000000# web-page: cu.png import numpy as np import matplotlib.pyplot as plt from ase.io import read fig, ax = plt.subplots(constrained_layout=True) e0 = None k = np.arange(8, 21, dtype=float) for name in ['ITM', 'TM', 'FD-0.05', 'MV-0.2']: energies = [] for n in k: e = read(f'Cu-{name}-{int(n)}.txt').get_potential_energy() energies.append(e) if e0 is None: e0 = e ax.plot(k**-2, (np.array(energies) - e0) * 1000, label=name) ax.set_xlabel(r'$1/k^2$') ax.set_ylabel(r'$\Delta E$ [meV]') ax2 = ax.secondary_xaxis('top', functions=(lambda x: (x + 1e-10)**-0.5, lambda k: (k + 1e-10)**-2)) ax2.set_xlabel('Number of k-points (k)') plt.legend() plt.savefig('cu.png') # plt.show() gpaw-24.1.0/doc/documentation/custom_convergence.rst000066400000000000000000000175111454550013000225760ustar00rootroot00000000000000.. _custom_convergence: =========================== Custom convergence criteria =========================== Additional convergence keywords ------------------------------- There are additional keywords that you can provide to the ``convergence`` dictionary beyond those in the :ref:`default dictionary `. These include ``'forces'``, ``'work function'``, and ``'minimum iterations'``. (See :ref:`builtin_criteria` for a list of all available criteria and their parameters.) For example, to make sure that the work function changes by no more than 0.001 eV across the last three SCF iterations, you can do:: from gpaw import GPAW convergence={'work function': 0.001} calc = GPAW(..., convergence=convergence) In the example above, the default criteria (energy, eigenstates, and density) will still be present and enforced at their default values. The default convergence criteria are always active, but you can effectively turn them off by setting any of them to :code:`np.inf`. Changing criteria behavior -------------------------- You can change things about how some convergence criteria work through an alternative syntax. For example, the default syntax of :code:`convergence={'energy': 0.0005}` ensures that the last three values of the energy change by no more than 5 meV. If you'd rather have it examine changes in the last *four* values of the energy, you can set your convergence dictionary to:: from gpaw.convergence_criteria import Energy convergence = {'energy': Energy(tol=0.0005, n_old=4)} (In fact, :code:`convergence={'energy': 0.0005}` is just a shortcut to :code:`convergence={'energy': Energy(0.0005)}`; the dictionary value :code:`0.0005` becomes the first positional argument to :code:`Energy`.) Converging forces ----------------- You can ensure that the forces are converged like:: convergence = {'forces': 0.01} This requires that the maximum change in the magnitude of the vector representing the difference in forces for each atom is less than 0.01 eV/ Angstrom, compared to the previous iteration. Since calculating the atomic forces takes computational time and memory, by default this waits until all other convergence criteria are met before beginning to check the forces. If you'd rather have it check the forces at every SCF iteration you can instead do:: from gpaw.convergence_criteria import Forces convergence = {'forces': Forces(0.01, calc_last=False)} You can also choose to converge forces relative to the current maximum force acting on all atoms in your system. This is particularly useful for example in the case of geometry optimizations far from local minima where large forces mean that strict SCF (and therefore forces) convergence is not necessary. For this one can do:: # Converge forces to 10% of the highest force. convergence = {'forces': Forces(atol=np.inf, rtol=0.1)} If both ``atol`` and ``rtol`` are supplied, then forces are converged to whichever is the stricter convergence for that SCF cycle:: # During a geometry optimization, converge forces to 0.01 eV/Ang # between successive SCF iterations until forces are below # 0.1 eV/Ang, then 10% of maximum force in system. convergence = {'forces': Forces(atol=0.01, rtol=0.1)} Example: fixed iterations ------------------------- You can use this approach to tell the SCF cycle to run for a fixed number of iterations. To do this, set all the default criteria to :code:`np.inf` to turn them off, then use the :class:`~gpaw.convergence_criteria.MinIter` class to set a minimum number of iterations. (Also be sure your :ref:`maxiter ` keyword is set higher than this value!) For example, to run for exactly 10 iterations:: convergence = {'energy': np.inf, 'eigenstates': np.inf, 'density': np.inf, 'minimum iterations': 10} The :class:`~gpaw.convergence_criteria.MinIter` class can work in concert with other convergence criteria as well; that is, it can act simply to define a minimum number of iterations that must be run, even if all other criteria have been met. Writing your own criteria ------------------------- You can write your own custom convergence criteria if you structure them like this:: from gpaw.convergence_criteria import Criterion class MyCriterion(Criterion): name = 'my criterion' # must be a unique name tablename = 'mycri' # <=5 char, prints as header in the SCF table calc_last = False # if True, waits until all other criteria are met # before checking (for expensive criteria) def __init__(self, ...): ... # your code here; note if you save all arguments directly # (as self.a, self.b, ...) then todict() and __repr__ methods # will work automatically. # The next line prints at the top of the log file. self.description = 'My custom criterion with tolerance ...' def __call__(self, context): ... # your code here # 'context' is an object containing references to the current # state of the calculation, such as the hamiltonian and wave # functions converged = ... # True or False if your criterion is met entry = ... # a string with up to 5 characters to print in SCF table return converged, entry def reset(self): ... # your code here to clear anything saved whenever # the SCF restarts calc = GPAW(..., convergence={'custom': [MyCriterion(0.01, 4)]} ) All user-written criteria must enter the dictionary through the special ``custom`` keyword, and you can include as many criteria as you like in the list. .. note:: If you have written your own criterion and you save your calculator instance (that is, :code:`calc.write('out.gpw')`), GPAW won't know how to load your custom criterion when it opens"out.gpw". You will need to add your custom criteria back manually. .. note:: If you are running multiple GPAW calculator instances simultaneously, make sure each calculator instance gets its own unique instance of your custom criterion. (You do not need to worry about this for any of the built-in criteria, as it makes an internal copy.) .. _builtin_criteria: Built-in criteria ----------------- The built-in criteria, along with their shortcut names that you can use to access them in the :code:`convergence` dictionary, are below. The criteria marked as defaults are present in the default convergence dictionary and will always be present; the others are optional. .. list-table:: :header-rows: 1 :widths: 1 1 1 1 1 * - class - name attribute - default? - calc_last? - override_others? * - :class:`~gpaw.convergence_criteria.Energy` - ``energy`` - Yes - No - No * - :class:`~gpaw.convergence_criteria.Density` - ``density`` - Yes - No - No * - :class:`~gpaw.convergence_criteria.Eigenstates` - ``eigenstates`` - Yes - No - No * - :class:`~gpaw.convergence_criteria.Forces` - ``forces`` - No - Yes - No * - :class:`~gpaw.convergence_criteria.WorkFunction` - ``work function`` - No - No - No * - :class:`~gpaw.convergence_criteria.MinIter` - ``minimum iterations`` - No - No - No * - :class:`~gpaw.convergence_criteria.MaxIter` - ``maximum iterations`` - No - No - Yes Full descriptions for the built-in criteria follow. .. autoclass:: gpaw.convergence_criteria.Energy .. autoclass:: gpaw.convergence_criteria.Density .. autoclass:: gpaw.convergence_criteria.Eigenstates .. autoclass:: gpaw.convergence_criteria.Forces .. autoclass:: gpaw.convergence_criteria.WorkFunction .. autoclass:: gpaw.convergence_criteria.MinIter .. autoclass:: gpaw.convergence_criteria.MaxIter gpaw-24.1.0/doc/documentation/dcdft/000077500000000000000000000000001454550013000172335ustar00rootroot00000000000000gpaw-24.1.0/doc/documentation/dcdft/dcdft.rst000066400000000000000000000076431454550013000210630ustar00rootroot00000000000000.. _dcdft_exercise: ========================================================================== DeltaCodesDFT - Comparing Solid State DFT Codes, Basis Sets and Potentials ========================================================================== .. note:: This exercise is currently broken. Please skip it. The webpage https://molmod.ugent.be/deltacodesdft provides a method for measuring the precision of a given calculation method against a chosen reference method (computational or experimental) for parameters of the equation of state (see :ref:`aluminium_exercise`) of elementary solids. When performing any benchmark calculations, especially involving a large number of systems, it is important to be aware of the fact that we, humans tend to do mistakes. Therefore the motto of this exercise is taken from Karl Popper's "All life is problem solving": `the novelty in the scientific approach is that we actively seek to eliminate our attempted solutions `_. In this exercise, in addition to the traditional, error prone method of writing output files generated using separate scripts on disk, we write the results into a database. All calculations are performed using one script, and therefore not only sharing the results with other researches is easy (by granting the access to the database) but also the precise method of performing the calculations should be shared (by presenting the script). Please consult the introduction to the :mod:`ase.db` module for details. You can find out more about "reproducible science" with ASE in the following talks: `Emacs + org-mode + python in reproducible research `_ or `How Python & the iPython notebook can revamp quantum chemical reseach `_. We will compare PBE numbers from GPAW with http://www.wien2k.at/ for K, Ca and Ti. We use default PAW-datasets, a plane-wave cutoff of 340 eV, a **k**-point density of 3.5 Å and a Fermi-Dirac distribution with a 0.1 eV temperature. Copy this :download:`dcdft_gpaw.py` to a place in your file area: .. literalinclude:: dcdft_gpaw.py .. highlight:: bash Read the script and try to understand it. Run the script by typing:: $ python3 dcdft_gpaw.py It should take about 15 minutes to run the script. Note that you can start several instances of the script simultaneously in order to speed things up. The script will generate ``.txt`` files and an SQLite3 database file. Watch the progess as the calculations run:: $ ase db dcdft.db -c +x,time Examine the equation of state (see :ref:`aluminium_exercise`) using :command:`ase gui`:: $ ase gui dcdft.db@name=Ca .. note:: The PBE reference values from https://molmod.ugent.be/deltacodesdft are: ======= ================== ========= element `V` [Å\ `^3`/atom] `B` [GPa] ======= ================== ========= K 73.68 3.6 Ca 42.20 17.1 Ti 17.39 112.2 ======= ================== ========= Extract the results from the database in order to calculate the parameters of the equation of state:: K 73.6852 3.6070 19.7231 Ca 42.5903 24.2170 -13.2987 Ti 17.3542 113.9593 3.5966 and use the script available from https://molmod.ugent.be/deltacodesdft to calculate the Delta factors. * How well do the obtained values agree with the references? Do you think they can be further improved (hint: check out https://wiki.fysik.dtu.dk/gpaw/setups/dcdft.html)? Do you agree with Karl Popper? gpaw-24.1.0/doc/documentation/dcdft/dcdft_gpaw.py000066400000000000000000000022131454550013000217050ustar00rootroot00000000000000from time import time import numpy as np import ase.db from ase.test.tasks.dcdft import DeltaCodesDFTCollection as Collection from gpaw import GPAW, PW, FermiDirac c = ase.db.connect('dcdft.db') ecut = 340 kptdensity = 3.5 width = 0.10 collection = Collection() for name in ['K', 'Ca', 'Ti']: atoms = collection[name] cell = atoms.get_cell() # Loop over volumes: for n, x in enumerate(np.linspace(0.98, 1.02, 5)): id = c.reserve(name=name, x=x) if id is None: # This calculation has been or is being done: continue atoms.set_cell(cell * x, scale_atoms=True) atoms.calc = GPAW(txt='%s-%d.txt' % (name, n), mode=PW(ecut), xc='PBE', kpts={'density': kptdensity}, parallel={'band': 1}, occupations=FermiDirac(width)) t1 = time() atoms.get_potential_energy() t2 = time() # Write to database: c.write(atoms, name=name, x=x, time=t2 - t1, ecut=ecut, kptdensity=kptdensity, width=width) del c[id] gpaw-24.1.0/doc/documentation/dcdft/extract.py000066400000000000000000000031741454550013000212640ustar00rootroot00000000000000import sys import numpy as np from numpy.linalg.linalg import LinAlgError from ase.units import kJ import ase.db from ase.test.tasks.dcdft import DeltaCodesDFTCollection as Collection from ase.test.tasks.dcdft import FullEquationOfState as EquationOfState collection = Collection() db = sys.argv[1] c = ase.db.connect(db) def analyse(c, collection): A = [] for name in collection.names: ve = [] # volume, energy pairs for d in c.select(name=name): try: ve.append((abs(np.linalg.det(d.cell)), d.energy)) except AttributeError: ve.append((np.nan, np.nan)) # sort according to volume ves = sorted(ve, key=lambda x: x[0]) # EOS eos = EquationOfState([t[0] for t in ves], [t[1] for t in ves]) try: v, e, B0, B1, R = eos.fit() except (ValueError, TypeError, LinAlgError): (v, e, B0, B1, R) = (np.nan, np.nan, np.nan, np.nan, np.nan) e = e / len(collection[name]) v = v / len(collection[name]) B0 = B0 / kJ * 1.0e24 # GPa A.append((e, v, B0, B1, R)) return np.array(A).T E, V, B0, B1, R = analyse(c, collection) with open(db + '_raw.txt', 'w') as fd: for name, e, v, b0, b1, r, in zip(collection.names, E, V, B0, B1, R): if not np.isnan(e): print('%2s %8.4f %8.4f %8.4f' % (name, v, b0, b1), file=fd) with open(db + '_raw.csv', 'w') as fd: for name, e, v, b0, b1, r, in zip(collection.names, E, V, B0, B1, R): if not np.isnan(e): print(f'{name}, {v:8.4f}, {b0:8.4f}, {b1:8.4f}', file=fd) gpaw-24.1.0/doc/documentation/dcdft/testdb.py000066400000000000000000000001171454550013000210710ustar00rootroot00000000000000import os os.system('ase db dcdft.db name=Ca -c +ecut,kpts,width,x,time,iter') gpaw-24.1.0/doc/documentation/defects_theory.rst000066400000000000000000000231541454550013000217150ustar00rootroot00000000000000.. _defects_theory: ======================================================================= Localised electrostatic charges in non-uniform dielectric media: Theory ======================================================================= For examples, see :ref:`defects_tutorial`. Introduction ============ The purpose of this section is to enable us to calculate the formation energy of charged defects. In the Zhang-Northrup formula, this is given by .. math:: E^f[X^q] = E[X^q] - E_0 - \sum_i\mu_in_i + q (\epsilon_v + \epsilon_F) In this formula, `X` labels the type of defect and `q` its charge state, i.e. the net charge contained in some volume surrounding the defect. `q` is defined such that `q=-1` for an electron. `E[X^q]` is the total energy of the sample with the defect, and `E_0` the energy of the pristine (bulklike) sample. These quantities are usually calculated in a supercell approach with periodic boundary conditions. That is, we create the defect we are interested in, and place it in an environment containing many repetitions of the pristine unit cell. In the infinite supercell limit, we can describe the properties of the isolated defect. When we employ periodic boundary conditions, our calculation includes spurious electrostatic interactions between localised charge distribution of the defect state, and all its periodically repeated images. These long-ranged interactions mean that the convergence with respect to supercell size is slow. To accelerate this convergence, we employ an electrostatic correction scheme, and write .. math:: E^f[X^q]_{\mathrm{corrected}} = E^f[X^q]_{\mathrm{uncorrected}} - E_{\mathrm{periodic}} + E_{\mathrm{isolated}} + q\Delta V This assumes that DFT describes the bonding and energies of the system well, but contains errors in the electrostatics. The correction thus consists in subtracting the spurious interactions between the periodic images (the term included in the DFT calculation) and adding in the energy of an isolated charge distribution in the given dielectric environment. Finally, the potentials between the charged and neutral states must be aligned to the same reference. Implementing this scheme consists of the following steps, which will be described in further detail in the sections below. First, we determine the `z`-dependent dielectric function of the 2D layers. Knowing this, we use a model charge distribution to emulate the behaviour of the defect charge state, and find the potential associated with this model distribution by solving the Poisson equation. We do this twice: first with periodic boundary conditions, and then with zero boundary conditions. The electrostatic energy associated with the charge distribution in a given potential can then be found from the usual formula, .. math:: U = \frac{1}{2}\int_{\Omega} \rho V. Defining the dielectric response of 2D layers ============================================= In two dimensions, the bulk dielectric response is poorly defined, and some model must be used. The approach used here, following Ref [#Komsa]_ is to assume that the dielectric function of the isolated layer is isotropic in plane, and varies only in the `z` direction. Additionally, the screening is assumed to follow the density distribution of the system so that we can write .. math:: \varepsilon^{i}(z) = k^i\cdot n(z) + 1, Where `i` varies over "in-plane" and "out-of-plane", and `n` is the in-plane averaged density of the system. The normalization constants, `k^i`, are chosen such that .. math:: \frac{1}{L} \int \mathrm{d} z\, \varepsilon^{\parallel}(z) &= \varepsilon^{\parallel}_{\mathrm{DFT}} \\ \frac{1}{L} \int \mathrm{d} z\, \left(\varepsilon^{\perp}(z)\right)^{-1} &= \left(\varepsilon^{\perp}_{\mathrm{DFT}}\right)^{-1} Calculating the energy of the periodic images ============================================= Once we have the dielectric response, we proceed by solving the poisson equation .. math:: \nabla \cdot \boldsymbol{\varepsilon}(z) \odot \nabla \phi(\mathbf r) = -\rho(\mathbf r). Here `\odot` is the elementwise (Hadamard) product, and `\boldsymbol{\varepsilon} = (\varepsilon^{\parallel}, \varepsilon^{\parallel}, \varepsilon^{\perp})`. `\rho(\mathbf r)` is a model charge distribution that we use to describe the charge distribution of the defect state. For convenience, a Gaussian is often chosen, so that .. math:: \rho(\mathbf r) = \frac{1}{\left(\sqrt{2\pi}\sigma\right)^3} e^{-(\mathbf r - \mathbf r_0)^2/(2\sigma^2)}. In terms of the coordinate axes, the poisson equation reduces to .. math:: \varepsilon^{\parallel}(z)\frac{\partial^2 V}{\partial x^2} + \varepsilon^{\parallel}(z)\frac{\partial^2 V}{\partial y^2} + \varepsilon^{\perp}(z)\frac{\partial^2 V}{\partial z^2} + \frac{\partial \varepsilon^{\perp}}{\partial z} \frac{\partial V}{\partial z} = -\rho(\mathbf r) Since we wish to solve this equation with periodic boundary conditions, we Fourier transform the above equation, giving .. math:: \varepsilon^{\parallel}(G_z) * \left[(G_x^2 + G_y^2)V(\mathbf G)\right] + \varepsilon^{\perp}(G_z) * \left[G_z^2 V(\mathbf G)\right] + \left[G_z \varepsilon^{\perp}(G_z)\right] * \left[ G_z V\mathbf(G)\right] = \rho(\mathbf G), where `*` denotes a convolution along the `z` axis. Writing out these convolutions, we finally arrive at the expression .. math:: \rho_{G_x,G_y,G_z} = \sum_{G_z'} \left[\varepsilon^\parallel_{G_z - G_z'}\left(G_x^2 + G_y^2\right) + \varepsilon^{\perp}_{G_z - G_z'}G_zG_z'\right] V_{G_x, G_y, G_z'}. For each value of `(G_x, G_y)`, we can thus calculate the corresponding potential `V_{G_z}` through a matrix inversion, and use that to calculate the energy of the model charge distribution. We can also use the potential `V_{G_z}` to calculate the alignment term, `\Delta V`. We can Fourier transform this to get real-space potential of the model charge distribution. If we have described the electrostatics of the system well, this potential should be similar to the true potential of the defect charge distribution, up to a constant shift. Defining .. math:: \Delta V(\vec{r}) = V(\vec{r}) - [V^{X^q}_\mathrm{el}(\vec{r}) - V^{0}_\mathrm{el}(\vec{r}) ], We set Calculating the energy of the isolated system ============================================= We start as before, with the Poisson equation, but since we would like to describe the energy of the isolated defect, we do not impose periodic boundary conditions and Fourier transform. Instead, following Ref. [#Ping]_ we can exploit the in-plane symmetry of the problem and expand `\phi` using cylindrical Bessel functions. .. math:: \phi(\mathbf r) = \int_0^\infty \mathrm{d}k'\, 2qe^{-k'^2\sigma^2/2} \varphi_{k'}(z) J_0(\rho k') Inserting this into the above equation and using the orthogonality relation `\int \rho\mathrm{d}\rho J_0(\rho k)J_0(\rho k') = \delta(k - k') / k` we find that `\varphi_k` must obey the Poisson equation .. math:: -\frac{\partial}{\partial z}\left(\varepsilon^{\perp}(z) \frac{\partial \varphi_k(z)}{\partial z}\right) + k^2\varepsilon^{\parallel}(z) \varphi_k(z) = \frac{1}{\sqrt{2\pi}\sigma}e^{-\left(z - z_0\right)^2/\left(2\sigma^2\right)}, where `z_0` is the center of the gaussian density along the `z` direction. The normalization of `\varphi_k` defined above was chosen precisely so that the right hand side of this equation is a normalized gaussian along the `z` direction. We solve this equation by separating the response into two components: The bulk response, describing the screening far away from the material, and the remaining `z` -dependent response close to the system. We thus define `\Delta \varepsilon^i(z) = \varepsilon^i(z) - \varepsilon^{i}_{\mathrm{bulk}}` and the Green's function of the bulk response `\hat K = (-\varepsilon^{\perp}_{\mathrm{bulk}} \frac{\partial^2}{\partial z^2} + k^2\varepsilon^{\parallel}_{\mathrm{bulk}})^{-1}`. As an implementation detail, we note that for 2D materials, the bulk response is generally 1. The equation for `\varphi_k` can then be written as .. math:: \hat{K}^{-1} \varphi_k - \frac{\partial}{\partial z}\left(\Delta\varepsilon^{\perp} \frac{\partial \varphi_k}{\partial z}\right) + k^2\Delta\varepsilon^{\parallel}\varphi_k = \frac{1}{\sqrt{2\pi}\sigma}e^{-\left(z - z_0\right)^2/\left(2\sigma^2\right)}. Only the first term on the left hand side is affected by the boundary conditions on `\varphi_k`. We can solve this by Fourier transforming along the `z` axis and wigner-seitz truncating the Green's function, which yields the following equation .. math:: \sum_{G_z} D_{G_zG_z'} \left(\varphi_k\right)_{G_z} = e^{-i G_z'z_0 - G_z'^2\sigma^2/2}, with the matrix `D` given by .. math:: \frac{1}{L}D_{G_z'G_z} = \frac{\varepsilon^{\parallel}_{\mathrm{b}}k^2 + \varepsilon^{\perp}_{\mathrm{b}}G_z^2}{1 - e^{-kL/2}\cos(G_zL/2)}\delta_{G_zG_z'} + \Delta\varepsilon^{\parallel}_{G_z - G_z'}k^2 + \varepsilon^{\perp}_{G_z - G_z'}G_zG_z'. Finding `\varphi_k` is thus just a simple matrix inversion. Once we have solved the poisson equation, we calculate the total energy. .. math:: U &= \frac{1}{2} \int_{\Omega} \rho(\mathbf r) \phi(\mathbf r) \\ &= q^2 \int k \mathrm{d}k e^{-k^2 \sigma^2} U_k, with .. math:: U_k \int \mathrm{d}z\, \varphi_k(z) \frac{1}{\sqrt{2\pi}\sigma}e^{-\left(z - z_0\right)^2/\left(2\sigma^2\right)} Using the solution to the Poisson equation, this reduces to .. math:: U_k = \sum_{G_z,G_z'} e^{i(G_z - G_z')z_0 - (G_z^2 + G_z'^2)\sigma^2 / 2} \left(D^{-1}\right)_{G_zG_z'}, With `D` defined as above. References ========== .. [#Komsa] H.-P. Komsa, T. T. Rantala and A. Pasquarello *Phys. Rev. B* **86**, 045112 (2012) .. [#Ping] R. Sundararaman and Y. Ping *J. Chem. Phys.* **146**, 104109 (2017) gpaw-24.1.0/doc/documentation/densitymix/000077500000000000000000000000001454550013000203445ustar00rootroot00000000000000gpaw-24.1.0/doc/documentation/densitymix/densitymix.rst000066400000000000000000000115531454550013000233000ustar00rootroot00000000000000.. _densitymix: ============== Density Mixing ============== Pulay Mixing ------------ The density is updated using Pulay-mixing [#Pulay1980]_, [#Kresse1996]_. Pulay mixing (or direct inversion of the iterative subspace (DIIS)) attempts to find a good approximation of the final solution as a linear combination of a set of trial vectors `\{n^i\}` generated during an iterative solution of a problem. If the error associated with a given solution is given as `\{R^i\}` then Pulay mixing assumes that the error of a linear combination of the trail vectors is given as the same linear combination of errors .. math:: n_{i+1}=\sum \alpha_i n_i \quad,\quad R_{i+1}=\sum \alpha_i R_i The norm `R^{i+1}` is thus given as .. math:: \langle R_{i+1}|R_{i+1}\rangle=\bar{\alpha}^T \bar{\bar{R}}\bar{\alpha} where elements of the matrix is given as `\bar{\bar{R}}_{ij}=\langle R_{i}|R_{j}\rangle`. The norm can thus be minimized by solving .. math:: \frac{\delta \langle R_{i+1}|R_{i+1}\rangle}{\delta \bar{\alpha}^T}=2 \bar{\bar{R}}\bar{\alpha}=0 In density mixing the error of a given input density is given as .. math:: R_i = n_i^{out}[n_i^{in}]-n_i^{in} The original Pulay mixing only uses `n_i^{out}` to calculate the errors and thereby the mixing parameters. To more efficiently cover solution space it can be an advantage to include them with a certain weight, given as the input parameter `\beta`. .. math:: n_{i+1}^{in}=\sum \alpha_i (n_i^{in}+\beta R_i) Special Metric -------------- Convergence is improved by an optimized metric `\hat{M}` for calculation of scalar products in the mixing scheme, `\langle A | B \rangle _s = \langle A | \hat{M} | B \rangle`, where `\langle \rangle _s` is the scalar product with the special metric and `\langle \rangle` is the usual scalar product. The metric is based on the rationale that contributions for small wave vectors are more important than contributions for large wave vectors [#Kresse1996]_. Using a metric that weighs short wave density changes more than long wave changes can reduce charge sloshing significantly. It has been found [#Kresse1996]_ that the metric .. math:: \hat{M} = \sum_q | q \rangle f_q \langle q |, \quad f_q = 1 + \frac{w}{q^2} is particularly useful (`w` is a suitably chosen weight). This is easy to apply in plane wave codes, as it is local in reciprocal space. Expressed in real space, this metric is .. math:: \hat{M} = \sum_{R R'} | R \rangle f(R' - R) \langle R' |, \quad f(R) = \sum_q f_q e^{i q R} As this is fully nonlocal in real space, it would be very costly to apply. Instead we use a semilocal stencil with only three nearest neighbors: .. math:: f(R) = \begin{cases} 1 + w/8 & R = 0 \\ w / 16 & R = \text{nearest neighbor dist.} \\ w / 32 & R = \text{2nd nearest neighbor dist.} \\ w / 64 & R = \text{3rd nearest neighbor dist.} \\ 0 & \text{otherwise} \end{cases} which corresponds to the reciprocal space metric .. math:: f_q = 1 + \frac{w}{8} (1 + \cos q_x + \cos q_y + \cos q_z + \cos q_x \cos q_y + \cos q_y \cos q_z + \cos q_x \cos q_z + \cos q_x \cos q_y \cos q_z) With the nice property that it is a monotonously decaying function from `f_q = w + 1` at `q = 0` to `f_q = 1` anywhere at the zone boundary in reciprocal space. A comparison of the two metrics is displayed in the figure below .. image:: metric.png :align: center Specifying a Mixing Scheme in GPAW ---------------------------------- Specifying the mixing scheme and metric is done using the ``mixer`` keyword of the GPAW calculator:: from gpaw import GPAW, Mixer calc = GPAW(..., mixer=Mixer(beta=0.05, nmaxold=5, weight=50.0), ...) which is the recommended value if the default fails to converge. The class ``Mixer`` indicates one of the possible mixing schemes. The Pulay mixing can be based on: 1. The spin densities separately, ``Mixer`` (This will *not* work for a spinpolarized system, unless the magnetic moment is fixed) 2. The total density, ``MixerSum2`` 3. Spin channels separately for the density matrices, and the summed channels for the pseudo electron density, ``MixerSum`` 4. The total density and magnetization densities separately, ``MixerDif`` Where the magnetization density is the difference between the two spin densities. All mixer classes takes the arguments ``(beta=0.25, nmaxold=3, weight=50.0)``. In addition, the ``MixerDif`` also takes the arguments ``(beta_m=0.7, nmaxold_m=2, weight_m=10.0)`` which is the corresponding mixing parameters for the magnetization density. Here ``beta`` is the linear mixing coefficient, ``nmaxold`` is the number of old densities used, and ``weight`` is the weight used by the metric, if any. MixerDif seems to be a good choice for spin polarized molecules. MixerSum is sometimes better for bulk systems. References ---------- .. [#Pulay1980] Pulay, Chem. Phys. Let. **73**, 393 (1980) .. [#Kresse1996] Kresse, Phys. Rev. B **54**, 11169 (1996) gpaw-24.1.0/doc/documentation/densitymix/metric.py000066400000000000000000000026631454550013000222100ustar00rootroot00000000000000# creates: metric.png import numpy as np import matplotlib.pyplot as plt from math import pi, cos # Special points in the BZ of a simple cubic cell G = pi * np.array([0., 0., 0.]) R = pi * np.array([1., 1., 1.]) X = pi * np.array([1., 0., 0.]) M = pi * np.array([1., 1., 0.]) # The path for the band plot path = [X, G, R, X, M, G] textpath = [r'$X$', r'$\Gamma$', r'$R$', r'$X$', r'$M$', r'$\Gamma$'] # Make band data qvec = [] lines = [0] previous = path[0] for next in path[1:]: Npoints = int(round(20 * np.linalg.norm(next - previous))) lines.append(lines[-1] + Npoints) for t in np.linspace(0, 1, Npoints): qvec.append((1 - t) * previous + t * next) previous = next vasp = [1 / max(np.linalg.norm(q), 1e-6)**2 for q in qvec] gpaw = [(1 + cos(qx) + cos(qy) + cos(qz) + cos(qx) * cos(qy) + cos(qx) * cos(qz) + cos(qy) * cos(qz) + cos(qx) * cos(qy) * cos(qz)) / 8. for qx, qy, qz in qvec] # Plot band data fig = plt.figure(1, figsize=(5, 3), dpi=90) fig.subplots_adjust(left=.1, right=.95) lim = [0, lines[-1], 0, 1.25] plt.plot(vasp, 'k:', label='VASP') plt.plot(gpaw, 'k-', label='GPAW') for q in lines: plt.plot([q, q], lim[2:], 'k-') plt.xticks(lines, textpath) plt.yticks([0, 1], [r'$1$', r'$w+1$']) plt.axis(lim) kwpad = {'borderpad': 0.2, 'borderaxespad': 0.06} plt.legend(loc='upper right', **kwpad) plt.title('Special metric for density changes') plt.savefig('metric.png', dpi=90) # plt.show() gpaw-24.1.0/doc/documentation/directmin/000077500000000000000000000000001454550013000201255ustar00rootroot00000000000000gpaw-24.1.0/doc/documentation/directmin/__init__.py000066400000000000000000000000001454550013000222240ustar00rootroot00000000000000gpaw-24.1.0/doc/documentation/directmin/agts.py000066400000000000000000000004471454550013000214420ustar00rootroot00000000000000from myqueue.workflow import run def workflow(): run(script='h2o_lcao.py') r1 = run(script='g2_dm_ui_vs_scf.py', cores=8, tmax='4h') with r1: run(script='plot_g2.py') r2 = run(script='wm_dm_vs_scf.py', cores=8, tmax='1h') with r2: run(script='plot_h2o.py') gpaw-24.1.0/doc/documentation/directmin/directmin.rst000066400000000000000000000247371454550013000226520ustar00rootroot00000000000000.. _directmin: ================================ Direct Minimization Methods ================================ Direct minimization methods are an alternative to self-consistent field eigensolvers avoiding density mixing and diagonalization of the Kohn-Sham Hamiltonian matrix. PW and FD mode -------------- The energy is minimized w.r.t. orbitals subject to orthonomality constraints .. math:: E_0 = \min_{{\bf\Psi} {\bf \Psi^\dagger} = I} E[{\bf\Psi}]. Orbitals are updated at each step according to iteratives: .. math:: {\bf\Psi}^{(k+1)} \leftarrow {\bf\Psi}^{(k)} + \alpha {\bf V}^{(k)}, where search direction, :math:`{\bf V}^{(k)}`, is calculated according to L-BFGS algorithm and projected on the tangent space to orbitals. After each iteration orthonormalization procedure is applied to satify orthonormality constriants. For details of the implementation see Ref. [#Ivanov2021pwfd]_ Example ~~~~~~~~ .. literalinclude:: h2o_pw.py If you want to converge the unoccupied orbitals too then set: * ``converge_unocc=True``. LCAO mode ---------- Exponential Transformation Direct Minimization ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The orbitals are expanded into a finite basis set: .. math:: \phi_{i} ({\bf r}) = \sum_{\mu=1..M} O_{\mu i} \chi_{\mu}({\bf r}), \quad i = 1 .. M and the energy needs to be minimized with respect to the expansion coefficients subject to orthonormality constraints: .. math:: E_0 = \min_{O^{\dagger}SO = I} E\left(O\right) If we have some orthonormal reference orbitals with known coefficient matrix (c.m.) `C`, then *any* c.m. `O` can be obtained from the reference c.m. `C` by some unitary transformation: .. math:: O = C U where U is a unitary matrix. Thus, the objective is to find the unitary matrix that transforms the reference c.m. into an optimal c.m., minimizing the energy of the electronic system. A unitary matrix can be parametrized as the exponential of a skew-hermitian matrix `A`: .. math:: U = \exp(A) This parametrisation is advantageous since the orthonormality constraints are automatically satisfied: .. math:: UU^{\dagger} = \exp(A)\exp(A^{\dagger}) = \exp(A)\exp(-A) = I If the reference c.m. is fixed, then the energy is a function of `A`: .. math:: F\left(A\right) = E\left(C e^A \right) Skew-hermitian matrices form a linear space and, therefore, conventional unconstrained minimization algorithms can be applied to minimize the energy with respect to `A`. Example ~~~~~~~~ To run an LCAO calculation with direct minimization, it is necessary to specify the following in the calculator: * ``nbands='nao'``. Ensures that the number of bands used in the calculation is equal to the number of atomic orbitals. * ``mixer={'backend': 'no-mixing'}``. No density mixing. * ``occupations={'name': 'fixed-uniform'}``. Uniform distribution of the occupation numbers (same number of occupied bands for each **k**-point per spin). Here is an example of how to run a calculation with direct minimization in LCAO: .. literalinclude:: h2o_lcao.py As one can see, it is possible to specify the amount of memory used in the L-BFGS algorithm. The larger the memory, the fewer iterations required to reach convergence. Default value is 3. One cannot use a memory larger than the number of iterations after which the reference orbitals are updated to the canonical orbitals (specified by the keyword ``update_ref_orbs_counter`` in ``LCAOETDM``, default value is 20). **Important:** The exponential matrix is calculated here using the SciPy function *expm*. In order to obtain good performance, please make sure that your SciPy library is optimized. Otherwise see `Implementation Details`_. When all occupied orbitals of a given spin channel have the same occupation number, as in the example above, the functional is unitary invariant and a more efficient algorithm for computing the matrix exponential should be used (see also `Implementation Details`_): .. code-block:: python calc = GPAW(eigensolver=LCAOETDM(matrix_exp='egdecomp-u-invar', representation='u-invar'), ...) .. _Performance: Performance ~~~~~~~~~~~~~ G2 Molecular Set ````````````````` Here we compare the number of energy and gradient evaluations in direct minimization using the L-BFGS algorithm (memory=3) with preconditioning and the number of iterations in the SCF LCAO eigensolver with default density mixing. The left panel of the figure below shows several examples for molecules from the G2 set. The right panel shows the results of direct minimization and SCF for molecules that are difficult to converge; these molecules are radicals and the calculations are carried out within spin-polarized DFT. Direct minimization demonstrates stable performance in all cases. Note that by choosing different parameters for the density mixing one may improve the convergence of the SCF methods. The calculations were run with the script :download:`g2_dm_ui_vs_scf.py`, while the figure was generated using :download:`plot_g2.py`. .. image:: g2.png 32-128 Water Molecules ``````````````````````` In this test, the ground state of liquid water configurations with 32, 64, 128 molecules and the TZDP basis set is calculated. The geometries are taken from `here `_. The GPAW parameters used in this test include: PBE functional, grid spacing h=0.2 Å, and 8-core domain decomposition. The convergence criterion is a change in density smaller than `10^{-6}` electrons per valence electron. The ratio of the elapsed times spent by the default LCAO eigensolver and the direct minimization methods as a function of the number of water molecules is shown below. In direct minimization, the unitary invariant representation has been used [#Hutter]_ (see `Implementation Details`_). As can be seen, direct minimization converges faster by around a factor of 1.5 for 32 molecules and around a factor of 2 for 128 molecules. The calculations were run with the script :download:`wm_dm_vs_scf.py`, while the figure was generated using :download:`plot_h2o.py`. .. image:: water.png :width: 100% :align: center Implementation Details ~~~~~~~~~~~~~~~~~~~~~~ The implementation follows ref. [#Ivanov2021]_ The iteratives are: .. math:: A^{(k+1)} = A^{(k)} + \gamma^{(k)} Q^{(k)} where `Q` is the search direction and `\gamma` is step length. The search direction is calculated according to the L-BFGS algorithm with preconditioning, and the step length satisfies the Strong Wolfe Conditions [#Nocedal]_ and/or approximate Wolfe Conditions [#Hager]_. The last two conditions are important as they guarantee stability and fast convergence of the L-BFGS algorithm [#Nocedal]_. Apart from the L-BFGS algorithm, one can use a limited-memory symmetric rank-one (L-SR1, default memory 20) quasi-Newton algorithm, which has also been shown to have good convergence performance and is especially recommended for calculations of excited states [#Levi2020]_ (see also :ref:`mom` ). There is also an option to use a conjugate gradient algorithm, but it is less efficient. Here are the three algorithms that can be used to calculate the matrix exponential: 1. The scaling and squaring algorithm, which is based on the equation: .. math:: \exp(A) = \exp(A/2^{m})^{2^{m}} Since :math:`A/2^{m}` has a small norm, then :math:`\exp(A/2^{m})` can be effectively estimated using a Pade approximant of order :math:`[q/q]`. Here q and m are positive integers. The scaling and squaring algorithm algorithm of Al-Moly and Higham [#AlMoly]_ from the SciPy library is used. 2. Using the eigendecompostion of the matrix :math:`iA`. Let :math:`\Omega` be a diagonal real-valued matrix with elements corresponding to the eigenvalues of the matrix :math:`iA`, and let :math:`U` be the matrix having as columns the eigenvectors of :math:`iA`. Then the matrix exponential of :math:`A` is: .. math:: \exp(A) = U \exp(-i\Omega) U^{\dagger} 3. For a unitary invariant functional, the matrix `A` can be parametrized as [#Hutter]_: .. math:: A = \begin{pmatrix} 0 & A_{ov} \\ -A_{ov}^{\dagger} & 0 \end{pmatrix} where :math:`A_{ov}` is a :math:`N \times (M-N)` matrix, where :math:`N` is the number of occupied states and :math:`M` is the number of basis functions, while `0` is an :math:`N \times N` zero matrix. In this case the matrix exponential can be calculated as [#Hutter]_: .. math:: \exp(A) = \begin{pmatrix} \cos(P) & P^{-1/2} \sin(P^{1/2}) A_{ov}\\ -A_{ov}^{\dagger} P^{-1/2} \sin(P^{1/2}) & I_{M-N} + A_{ov}^{\dagger}\cos(P^{1/2} - I_N) P^{-1} A_{ov} ) \end{pmatrix} where :math:`P = A_{ov}A_{ov}^{\dagger}` The first method is the default choice. To use the second algorithm do the following: .. code-block:: python from gpaw.directmin.etdm_lcao import LCAOETDM calc = GPAW(eigensolver=LCAOETDM(matrix_exp='egdecomp'), ...) To use the third method, first ensure that your functional is unitary invariant and then do the following: .. code-block:: python from gpaw.directmin.etdm_lcao import LCAOETDM calc = GPAW(eigensolver=LCAOETDM(matrix_exp='egdecomp-u-invar', representation='u-invar'), ...) The last option is the most efficient but it is valid only for a unitary invariant functionals (e.g. when all occupied orbitals of a given spin channel have the same occupation number) For all three algorithms, the unitary invariant representation can be chosen. ScaLAPCK and the parallelization over bands are currently not supported. It is also not recommended to use the direct minimization for metals because the occupation numbers are not found variationally but rather fixed during the calculation. References ~~~~~~~~~~ .. [#Ivanov2021pwfd] A. V. Ivanov, G. Levi, E.Ö. Jónsson, and H. Jónsson, *J. Chem. Theory Comput.*, **17**, 5034, (2021). .. [#Ivanov2021] A. V. Ivanov, E.Ö. Jónsson, T. Vegge, and H. Jónsson, *Comput. Phys. Commun.*, **267**, 108047 (2021). .. [#Levi2020] G. Levi, A. V. Ivanov, and H. Jónsson, *J. Chem. Theory Comput.*, **16**, 6968, (2020). .. [#Hutter] J. Hutter, M. Parrinello, and S. Vogel, *J. Chem. Phys.* **101**, 3862 (1994) .. [#Nocedal] J. Nocedal and S. J. Wright, *Numerical Optimization*, 2nd ed. (Springer, New York, NY, USA, 2006). .. [#Hager] W. W. Hager and H. Zhang, *SIAM Journal on Optimization* **16**, 170 (2006). .. [#AlMoly] A. H. Al-Moly, and N. J. Higham, *SIAM J. Matrix Anal. Appl.*, **31(3)**, 970–989, (2009). gpaw-24.1.0/doc/documentation/directmin/g2_dm_ui_vs_scf.py000066400000000000000000000034451454550013000235350ustar00rootroot00000000000000# Generate the data visualized in web-page: g2.png from ase.collections import g2 from doc.documentation.directmin import tools_and_data from gpaw import LCAO, ConvergenceError from ase.parallel import paropen # Results (total energy, number of iterations) obtained # in a previous calculation. Used to compare with the # current results. saved_results = \ {0: tools_and_data.read_data(tools_and_data.data_g2_scf), 1: tools_and_data.read_data(tools_and_data.data_g2_dm)} calc_args = {'xc': 'PBE', 'h': 0.15, 'convergence': {'density': 1.0e-6, 'eigenstates': 100}, 'maxiter': 333, 'basis': 'dzp', 'mode': LCAO(), 'symmetry': 'off'} eig_string = ['scf', 'dm'] with paropen('dm-g2-results.txt', 'w') as fdm, \ paropen('scf-g2-results.txt', 'w') as fscf: fd = {0: fscf, 1: fdm} for name in saved_results[0].keys(): atoms = g2[name] atoms.center(vacuum=7.0) for dm in [0, 1]: txt = name + eig_string[dm] + '.txt' tools_and_data.set_calc(atoms, calc_args, txt, dm) try: e, iters, t = tools_and_data.get_energy_and_iters(atoms, dm) # Compare with saved results from previous calculation e_diff_saved_calc = abs(saved_results[dm][name][1] - e) iters_diff_saved_calc = abs(saved_results[dm][name][0] - iters) tools_and_data.compare_calculated_and_saved_results( e_diff_saved_calc, iters_diff_saved_calc, eig_string, name, dm) print(name + "\t{}".format(iters), file=fd[dm], flush=True) except ConvergenceError: print(name + "\t{}".format(None), file=fd[dm], flush=True) gpaw-24.1.0/doc/documentation/directmin/h2o_lcao.py000066400000000000000000000012451454550013000221670ustar00rootroot00000000000000from gpaw import GPAW, LCAO from ase import Atoms import numpy as np from gpaw.directmin.etdm_lcao import LCAOETDM # Water molecule: d = 0.9575 t = np.pi / 180 * 104.51 H2O = Atoms('OH2', positions=[(0, 0, 0), (d, 0, 0), (d * np.cos(t), d * np.sin(t), 0)]) H2O.center(vacuum=5.0) calc = GPAW(mode=LCAO(), basis='dzp', eigensolver=LCAOETDM( searchdir_algo={'name': 'l-bfgs-p', 'memory': 10}), occupations={'name': 'fixed-uniform'}, mixer={'backend': 'no-mixing'}, nbands='nao' ) H2O.set_calculator(calc) H2O.get_potential_energy() gpaw-24.1.0/doc/documentation/directmin/h2o_pw.py000066400000000000000000000011121454550013000216700ustar00rootroot00000000000000import numpy as np from ase import Atoms from gpaw import GPAW from gpaw.directmin.etdm_fdpw import FDPWETDM # Water molecule: d = 0.9575 t = np.pi / 180 * 104.51 H2O = Atoms('OH2', positions=[(0, 0, 0), (d, 0, 0), (d * np.cos(t), d * np.sin(t), 0)]) H2O.center(vacuum=5.0) calc = GPAW(mode='pw', eigensolver=FDPWETDM(converge_unocc=False), mixer={'backend': 'no-mixing'}, occupations={'name': 'fixed-uniform'}, spinpol=True) H2O.set_calculator(calc) H2O.get_potential_energy() gpaw-24.1.0/doc/documentation/directmin/plot_g2.py000066400000000000000000000037131454550013000220510ustar00rootroot00000000000000# web-page: g2.png import matplotlib.pyplot as plt import numpy as np from doc.documentation.directmin import tools_and_data def read_molecules(filename, molnames): with open(filename, 'r') as fd: calculated_data_string = fd.read() calculated_data = \ tools_and_data.read_data(calculated_data_string) data2return = [] for _ in molnames: data2return.append(_) data2return.append(calculated_data[_][0]) return data2return f = plt.figure(figsize=(12, 4), dpi=240) plt.subplot(121) mollist = \ ['PH3', 'P2', 'CH3CHO', 'H2COH', 'CS', 'OCHCHO', 'C3H9C', 'CH3COF', 'CH3CH2OCH3', 'HCOOH'] data = read_molecules('scf-g2-results.txt', mollist) # scf x = data[::2] y = data[1::2] plt.xticks(range(len(x)), x, rotation=45) plt.grid(color='k', linestyle=':', linewidth=0.3) plt.plot(range(len(x)), y, 'b^-', label='SCF', fillstyle='none') # direct_min data = read_molecules('dm-g2-results.txt', mollist) x = data[::2] # 2 is added to account for the diagonalization # performed at the beginning and at the end of etdm y = np.asarray(data[1::2]) + 2 plt.plot(range(len(x)), y, 'ro-', label='ETDM', fillstyle='none') plt.legend() plt.ylabel('Number of iterations (energy and gradients calls)') plt.subplot(122) # direct_min mollist = \ ['NO', 'CH', 'OH', 'ClO', 'SH'] data = read_molecules('dm-g2-results.txt', mollist) x = data[::2] y = np.asarray(data[1::2]) + 2 plt.xticks(range(len(x)), x, rotation=45) plt.grid(color='k', linestyle=':', linewidth=0.3) plt.plot(range(len(x)), y, 'ro-', label='ETDM', fillstyle='none') # scf data = read_molecules('scf-g2-results.txt', mollist) x = data[::2] y = np.asarray(data[1::2]) plt.xticks(range(len(x)), x, rotation=45) plt.grid(color='k', linestyle=':', linewidth=0.3) plt.plot(range(len(x)), y, 'bo-', label='SCF', fillstyle='none') plt.legend() plt.ylabel('Number of iterations (energy and gradients calls)') f.savefig("g2.png", bbox_inches='tight') gpaw-24.1.0/doc/documentation/directmin/plot_h2o.py000066400000000000000000000017271454550013000222340ustar00rootroot00000000000000# web-page: water.png import matplotlib.pyplot as plt import numpy as np # Data from wm_dm_vs_scf.py calculated_data = np.genfromtxt('water-results.txt') # x should be number of water molecules. # First column is number of atoms, so divide by 3 to # obtain the number of water molecules. x = calculated_data[:, 0] / 3 f = plt.figure(figsize=(12, 4), dpi=240) plt.subplot(121) plt.grid(color='k', linestyle=':', linewidth=0.3) plt.title('Ratio of total elapsed times') plt.ylabel(r'$T_{scf}$ / $T_{etdm}$') plt.xlabel('Number of water molecules') plt.ylim(1.0, 3.0) plt.yticks(np.arange(1, 3.1, 0.5)) plt.plot(x, calculated_data[:, 1], 'bo-') plt.subplot(122) plt.grid(color='k', linestyle=':', linewidth=0.3) plt.title('Ratio of elapsed times per iteration') plt.ylabel(r'$T_{scf}$ / $T_{etdm}$') plt.xlabel('Number of water molecules') plt.ylim(1.0, 3.0) plt.yticks(np.arange(1, 3.1, 0.5)) plt.plot(x, calculated_data[:, 2], 'ro-') f.savefig("water.png", bbox_inches='tight') gpaw-24.1.0/doc/documentation/directmin/tools_and_data.py000066400000000000000000000330441454550013000234560ustar00rootroot00000000000000import time import numpy as np import warnings from gpaw import GPAW, FermiDirac from gpaw.directmin.etdm_lcao import LCAOETDM def read_data(output): saved_data = {} for i in output.splitlines(): if i == '': continue mol = i.split() if 'None' in mol[1:]: saved_data[mol[0]] = np.array([None for _ in mol[1:]]) else: saved_data[mol[0]] = np.array([float(_) for _ in mol[1:]]) return saved_data def set_calc(atoms, calc_args, txt, dm): if dm: calc = GPAW(**calc_args, txt=txt, eigensolver=LCAOETDM(matrix_exp='egdecomp-u-invar', representation='u-invar'), mixer={'backend': 'no-mixing'}, nbands='nao', occupations={'name': 'fixed-uniform'}) else: calc = GPAW(**calc_args, txt=txt, occupations=FermiDirac(width=0.0, fixmagmom=True)) atoms.set_calculator(calc) def get_energy_and_iters(atoms, dm): t1 = time.time() e = atoms.get_potential_energy() t2 = time.time() if dm: iters = atoms.calc.wfs.eigensolver.eg_count else: iters = atoms.calc.get_number_of_iterations() return e, iters, t2 - t1 def compare_calculated_and_saved_results(e_diff, iters_diff, eig_string, name, dm): if e_diff > 1.0e-2: warnings.warn('Absolute difference in total energy ' 'for ' + eig_string[dm] + ' calculation of ' + name + ' with respect to saved results ' 'is %f eV' % e_diff) if iters_diff > 3: warnings.warn('Absolute difference in total number of ' 'iterations for ' + eig_string[dm] + ' calculation of ' + name + ' with respect ' 'to saved results is %d' % iters_diff) # Coordinates of liquid water configuration with 32 molecules positions = [ (-0.069, 0.824, -1.295), (0.786, 0.943, -0.752), (-0.414, -0.001, -0.865), (-0.282, -0.674, -3.822), (0.018, -0.147, -4.624), (-0.113, -0.080, -3.034), (2.253, 1.261, 0.151), (2.606, 0.638, -0.539), (2.455, 0.790, 1.019), (3.106, -0.276, -1.795), (2.914, 0.459, -2.386), (2.447, -1.053, -1.919), (6.257, -0.625, -0.626), (7.107, -1.002, -0.317), (5.526, -1.129, -0.131), (5.451, -1.261, -2.937), (4.585, -0.957, -2.503), (6.079, -0.919, -2.200), (-0.515, 3.689, 0.482), (-0.218, 3.020, -0.189), (0.046, 3.568, 1.382), (-0.205, 2.640, -3.337), (-1.083, 2.576, -3.771), (-0.213, 1.885, -2.680), (0.132, 6.301, -0.278), (1.104, 6.366, -0.068), (-0.148, 5.363, -0.112), (-0.505, 6.680, -3.285), (-0.674, 7.677, -3.447), (-0.965, 6.278, -2.517), (4.063, 3.342, -0.474), (4.950, 2.912, -0.663), (3.484, 2.619, -0.125), (2.575, 2.404, -3.170), (1.694, 2.841, -3.296), (3.049, 2.956, -2.503), (6.666, 2.030, -0.815), (7.476, 2.277, -0.316), (6.473, 1.064, -0.651), (6.860, 2.591, -3.584), (6.928, 3.530, -3.176), (6.978, 2.097, -2.754), (2.931, 6.022, -0.243), (3.732, 6.562, -0.004), (3.226, 5.115, -0.404), (2.291, 7.140, -2.455), (1.317, 6.937, -2.532), (2.586, 6.574, -1.669), (6.843, 5.460, 1.065), (7.803, 5.290, 0.852), (6.727, 5.424, 2.062), (6.896, 4.784, -2.130), (6.191, 5.238, -2.702), (6.463, 4.665, -1.259), (0.398, 0.691, 4.098), (0.047, 1.567, 3.807), (1.268, 0.490, 3.632), (2.687, 0.272, 2.641), (3.078, 1.126, 3.027), (3.376, -0.501, 2.793), (6.002, -0.525, 4.002), (6.152, 0.405, 3.660), (5.987, -0.447, 4.980), (0.649, 3.541, 2.897), (0.245, 4.301, 3.459), (1.638, 3.457, 3.084), (-0.075, 5.662, 4.233), (-0.182, 6.512, 3.776), (-0.241, 5.961, 5.212), (3.243, 2.585, 3.878), (3.110, 2.343, 4.817), (4.262, 2.718, 3.780), (5.942, 2.582, 3.712), (6.250, 3.500, 3.566), (6.379, 2.564, 4.636), (2.686, 5.638, 5.164), (1.781, 5.472, 4.698), (2.454, 6.286, 5.887), (6.744, 5.276, 3.826), (6.238, 5.608, 4.632), (7.707, 5.258, 4.110), (8.573, 8.472, 0.407), (9.069, 7.656, 0.067), (8.472, 8.425, 1.397), (8.758, 8.245, 2.989), (9.294, 9.091, 3.172), (7.906, 8.527, 3.373), (4.006, 7.734, 3.021), (4.685, 8.238, 3.547), (3.468, 7.158, 3.624), (5.281, 6.089, 6.035), (5.131, 7.033, 6.378), (4.428, 5.704, 5.720), (5.067, 7.323, 0.662), (5.785, 6.667, 0.703), (4.718, 7.252, 1.585) ] # Results (total energy, number of iterations) of LCAO calculations on # liquid water configurations. Obtained with wm_dm_vs_scf.py. # 0: scf # 1: dm wm_saved_results = {0: np.array([[-449.2501666690716, 22], [-899.7732083940263, 21], [-1802.1232238298205, 21]]), 1: np.array([[-449.29433888653887, 15], [-899.8689779482846, 15], [-1802.1980642103324, 15]])} # Results (total energy, number of iterations) of calculations # on the molecules from the G2 set using direct minimization # in LCAO. Obtained with g2_dm_ui_vs_scf.py. data_g2_dm = \ """ PH3 10 -15.067594250491323 P2 8 -8.379318635747365 CH3CHO 17 -37.75204876792297 H2COH 16 -23.69898686685713 CS 13 -9.417455952101529 OCHCHO 15 -35.666514372774785 C3H9C 15 -66.59136951997174 CH3COF 16 -38.4905594550757 CH3CH2OCH3 14 -61.30296354126676 HCOOH 16 -28.408002357246865 HCCl3 14 -17.40322721170416 HOCl 19 -9.763721177128842 H2 6 -6.490747304345913 SH2 10 -10.506039275136805 C2H2 10 -21.831120179124515 C4H4NH 15 -60.36706614168592 CH3SCH3 13 -42.75640371331927 SiH2_s3B1d 11 -8.439696476805787 CH3SH 12 -26.575973842997186 CH3CO 17 -32.72729035439991 CO 11 -13.815908836466543 ClF3 18 -7.279906905738146 SiH4 9 -18.407226340441163 C2H6CHOH 15 -61.72625777640576 CH2NHCH2 15 -42.17124083425262 isobutene 16 -63.67223781609167 HCO 20 -16.124831216748227 bicyclobutane 14 -54.503393011455486 LiF 14 -5.817805188634345 C2H6 12 -39.5282496643079 CN 18 -12.014894676426977 ClNO 20 -13.63804219450336 SiF4 11 -25.20937860100401 H3CNH2 14 -34.54290009661039 methylenecyclopropane 15 -54.6879018075723 CH3CH2OH 15 -45.38183883038578 NaCl 15 -3.9915002448726358 CH3Cl 13 -21.364341391719453 CH3SiH3 12 -34.95738850941009 AlF3 12 -18.708233835513557 C2H3 15 -24.91262778147432 ClF 13 -3.083951458122166 PF3 14 -17.6200414942034 PH2 11 -10.333093382175694 CH3CN 16 -35.40005851345955 cyclobutene 13 -55.09356789335748 CH3ONO 18 -37.48477835032511 SiH3 11 -13.414712732958558 C3H6_D3h 12 -47.16406379242271 CO2 10 -21.686869966483524 NO 47 -11.327631125222853 trans-butane 13 -71.89450752855547 H2CCHCl 15 -29.238625168100576 LiH 11 -3.531257198110949 NH2 13 -12.766498080792353 CH 26 -5.674378327614047 CH2OCH2 15 -36.76143971942084 C6H6 12 -73.85217535506382 CH3CONH2 18 -50.16888351727816 cyclobutane 12 -63.53872939601613 H2CCHCN 18 -43.162802485831776 butadiene 14 -55.3102844458418 H2CO 14 -21.136756510681643 CH3COOH 17 -44.92291892250132 HCF3 14 -23.33193072036996 CH3S 13 -21.690397937558533 CS2 11 -15.408202617521624 SiH2_s1A1d 10 -9.141455540051073 C4H4S 16 -52.33896491532939 N2H4 14 -29.04826077564009 OH 23 -6.982508936175612 CH3OCH3 16 -44.99441429004591 C5H5N 16 -69.10577872701126 H2O 12 -13.248879020254805 HCl 9 -5.362280651746735 CH2_s1A1d 11 -10.757688251327073 CH3CH2SH 15 -42.77182919987737 CH3NO2 16 -37.59598912842743 BCl3 12 -14.568120361655941 C4H4O 16 -54.632472747679074 CH3O 14 -23.459267720379835 CH3OH 14 -29.060866334893923 C3H7Cl 15 -53.83123625718132 isobutane 13 -71.94266082590747 CCl4 12 -15.275852686988774 CH3CH2O 16 -39.641252099285346 H2CCHF 15 -30.904670428100868 C3H7 15 -50.22805517183567 CH3 11 -17.49384850623045 O3 16 -12.433529583245473 C2H4 11 -30.958765485095768 NCCN 12 -30.658037813517044 S2 8 -6.219282082506314 AlCl3 13 -12.98573282335161 SiCl4 13 -16.78447293181344 SiO 15 -10.262784811760204 C3H4_D2d 15 -38.56777011081724 COF2 15 -22.14128628537746 2-butyne 14 -54.939470685326455 C2H5 14 -33.8500443324274 BF3 11 -21.572371839064136 N2O 14 -20.102195376334976 F2O 15 -7.399576680971737 SO2 13 -14.870794217160935 H2CCl2 14 -19.399637762862064 CF3CN 16 -34.906808720109986 HCN 14 -18.73012010449129 C2H6NH 15 -50.52630869093685 OCS 14 -18.601453536910835 ClO 52 -4.6936828818312275 C3H8 13 -55.717607762841126 HF 11 -7.0029471215133015 O2 8 -9.108226430211426 SO 14 -7.920249959323364 NH 11 -7.489024231960305 C2F4 13 -30.24116221475319 NF3 15 -14.064869244897391 CH2_s3B1d 12 -11.464140729402992 CH3CH2Cl 15 -37.64619843915724 CH3COCl 18 -36.57929939296139 NH3 12 -18.687024847371475 C3H9N 15 -66.57679757132048 CF4 12 -23.61462611454026 C3H6_Cs 15 -47.320065716714126 Si2H6 10 -30.15440392023656 HCOOCH3 16 -44.2664512112896 CCH 16 -14.799136984583923 Si2 9 -4.793225261681442 C2H6SO 17 -48.02622324542428 C5H8 13 -70.91759596894674 H2CF2 13 -22.97018289970631 Li2 7 -1.3181686820255385 CH2SCH2 13 -34.71535928912935 C2Cl4 14 -23.59447986714585 C3H4_C3v 16 -38.41646304753126 CH3COCH3 19 -54.26543917872743 F2 9 -2.7777443960323573 CH4 11 -23.399963889551103 SH 41 -5.401168291262951 H2CCO 15 -29.572163404216557 CH3CH2NH2 14 -50.82292789668282 N2 8 -15.608612050282868 Cl2 9 -2.535359129835213 H2O2 14 -16.95818938312642 Na2 10 -1.064315921432062 BeH 10 -2.9158725067581828 C3H4_C2v 14 -37.68429215491533 NO2 15 -17.169478949870694 """ # Results (total energy, number of iterations) of calculations # on the molecules from the G2 set using the standard SCF eigensolver # in LCAO . Obtained with g2_dm_ui_vs_scf.py. data_g2_scf = \ """ PH3 18 -15.067939739162565 P2 14 -8.379364578606403 CH3CHO 17 -37.752640153790544 H2COH 18 -23.700059799196794 CS 15 -9.417393294333172 OCHCHO 17 -35.66811875892615 C3H9C 19 -66.59205808003622 CH3COF 17 -38.4908567580481 CH3CH2OCH3 17 -61.30320737242064 HCOOH 18 -28.40857295188662 HCCl3 16 -17.403156748455167 HOCl 16 -9.765718189549954 H2 12 -6.490910413647257 SH2 18 -10.506376963353159 C2H2 15 -21.8315594881508 C4H4NH 17 -60.36737073551913 CH3SCH3 18 -42.75755391907749 SiH2_s3B1d 15 -8.439652470852263 CH3SH 19 -26.57600571342069 CH3CO 15 -32.727526205821974 CO 14 -13.817286401313696 ClF3 15 -7.280418158543331 SiH4 16 -18.406906983682326 C2H6CHOH 16 -61.727709924460434 CH2NHCH2 17 -42.170691521207104 isobutene 17 -63.67196360060958 HCO 17 -16.12661908530326 bicyclobutane 15 -54.50318222862183 LiF 18 -5.818530975164816 C2H6 18 -39.52761899945607 CN 17 -12.013574497449207 ClNO 18 -13.637173578862363 SiF4 15 -25.21531130705909 H3CNH2 15 -34.542907090515556 methylenecyclopropane 17 -54.687880878011434 CH3CH2OH 15 -45.38267490450617 NaCl 16 -3.991481648956574 CH3Cl 18 -21.364671136168802 CH3SiH3 18 -34.957867383396355 AlF3 15 -18.70969513486594 C2H3 20 -24.912867583547744 ClF 15 -3.0836445438877313 PF3 16 -17.620133982392183 PH2 20 -10.333156669751412 CH3CN 17 -35.399551652561215 cyclobutene 15 -55.09416925492654 CH3ONO 22 -37.484991962456576 SiH3 19 -13.414822942888721 C3H6_D3h 15 -47.164632312754364 CO2 14 -21.690591409424943 NO 75 -11.327965166239705 trans-butane 17 -71.89452926449687 H2CCHCl 19 -29.238622363016635 LiH 17 -3.5313140027080223 NH2 13 -12.76557469532906 CH 101 -5.673636310132295 CH2OCH2 18 -36.76395515833223 C6H6 15 -73.85339953604031 CH3CONH2 19 -50.168313481824775 cyclobutane 16 -63.539274905937184 H2CCHCN 18 -43.1621120373009 butadiene 19 -55.31058655984656 H2CO 15 -21.139006626605422 CH3COOH 19 -44.924174089136265 HCF3 16 -23.33279099725403 CH3S 17 -21.690728087340183 CS2 17 -15.408150740029571 SiH2_s1A1d 19 -9.141474384102668 C4H4S 19 -52.339132780598575 N2H4 15 -29.04650645552515 OH 92 -6.984028868322686 CH3OCH3 16 -44.99641117875515 C5H5N 19 -69.10427719659162 H2O 16 -13.250986473888151 HCl 16 -5.36259443336188 CH2_s1A1d 17 -10.75750241996795 CH3CH2SH 18 -42.77177811221571 CH3NO2 17 -37.59617685073973 BCl3 15 -14.567528269012527 C4H4O 18 -54.63443308676421 CH3O 17 -23.46123621228536 CH3OH 15 -29.061696621373784 C3H7Cl 18 -53.83150398571328 isobutane 18 -71.94286533151197 CCl4 15 -15.275216282079997 CH3CH2O 17 -39.64358031935646 H2CCHF 16 -30.90462662415892 C3H7 19 -50.22810186101624 CH3 15 -17.49423441816605 O3 17 -12.436218920717389 C2H4 15 -30.960041413667845 NCCN 17 -30.656829904791742 S2 17 -6.219212745370212 AlCl3 15 -12.985497496691243 SiCl4 16 -16.784060452785397 SiO 17 -10.264395373463588 C3H4_D2d 16 -38.567667331353746 COF2 16 -22.143807001114947 2-butyne 19 -54.940138451098214 C2H5 17 -33.851029358002215 BF3 14 -21.57253092881569 N2O 19 -20.101453880304287 F2O 15 -7.401790642950211 SO2 15 -14.870076420824926 H2CCl2 19 -19.399891835301172 CF3CN 19 -34.90747376810633 HCN 17 -18.729247544127606 C2H6NH 15 -50.52638663455311 OCS 17 -18.602824279134886 ClO 168 -4.694492551841379 C3H8 18 -55.71715479135624 HF 15 -7.004240357122936 O2 15 -9.111654239498307 SO 35 -7.921946288251748 NH 16 -7.488300012131835 C2F4 14 -30.241856053715637 NF3 15 -14.066501151063182 CH2_s3B1d 16 -11.46398769462341 CH3CH2Cl 18 -37.64667833838129 CH3COCl 17 -36.58066815772619 NH3 13 -18.686996789673476 C3H9N 16 -66.5765684297463 CF4 17 -23.614422687205757 C3H6_Cs 18 -47.31966084193544 Si2H6 15 -30.154437351754027 HCOOCH3 19 -44.26973461011855 CCH 16 -14.799409011958037 Si2 17 -4.793115114511537 C2H6SO 18 -48.02735877864575 C5H8 15 -70.91659810523136 H2CF2 16 -22.971190008076956 Li2 21 -1.3181781742775984 CH2SCH2 20 -34.71520519895414 C2Cl4 17 -23.595139969720563 C3H4_C3v 16 -38.416472047250096 CH3COCH3 17 -54.26678298560979 F2 16 -2.7789800209690525 CH4 15 -23.40050852386761 SH 52 -5.401240463979583 H2CCO 15 -29.57475371469119 CH3CH2NH2 16 -50.82201231180034 N2 18 -15.60692193925049 Cl2 15 -2.535145846065773 H2O2 17 -16.958825223285647 Na2 11 -1.0643211358816327 BeH 45 -2.9158429067846203 C3H4_C2v 16 -37.684405607781144 NO2 16 -17.167930472676215 """ gpaw-24.1.0/doc/documentation/directmin/wm_dm_vs_scf.py000066400000000000000000000050651454550013000231530ustar00rootroot00000000000000# Generate the data visualized in web-page: water.png import numpy as np from doc.documentation.directmin import tools_and_data from ase import Atoms from gpaw import LCAO, ConvergenceError from ase.parallel import paropen from gpaw.mpi import world from gpaw.atom.basis import BasisMaker from gpaw import setup_paths setup_paths.insert(0, '.') for symbol in ['H', 'O']: bm = BasisMaker(symbol, xc='PBE') basis = bm.generate(zetacount=3, polarizationcount=2) basis.write_xml() positions = tools_and_data.positions L = 9.8553729 r = [[1, 1, 1], [2, 1, 1], [2, 2, 1]] calc_args = {'xc': 'PBE', 'h': 0.2, 'convergence': {'density': 1.0e-6, 'eigenstates': 100}, 'maxiter': 333, 'basis': 'tzdp', 'mode': LCAO(), 'symmetry': 'off', 'parallel': {'domain': world.size}} # Results (total energy, number of iterations) obtained # in a previous calculation. Used to compare with the # current results. saved_results = tools_and_data.wm_saved_results eig_string = ['scf', 'dm'] t = np.zeros(2) iters = np.zeros(2) with paropen('water-results.txt', 'w') as fd: for i, x in enumerate(r): atoms = Atoms('32(OH2)', positions=positions) atoms.set_cell((L, L, L)) atoms.set_pbc(1) atoms = atoms.repeat(x) name = str(len(atoms) // 3) + '_H2Omlcls' try: for dm in [0, 1]: txt = name + '_' + eig_string[dm] + '.txt' tools_and_data.set_calc(atoms, calc_args, txt, dm) e, iters[dm], t[dm] = \ tools_and_data.get_energy_and_iters(atoms, dm) # Compare with saved results from previous calculation e_diff_saved_calc = abs(saved_results[dm][i, 0] - e) iters_diff_saved_calc = \ abs(saved_results[dm][i, 1] - iters[dm]) tools_and_data.compare_calculated_and_saved_results( e_diff_saved_calc, iters_diff_saved_calc, eig_string, name, dm) # Ratio of elapsed times per iteration # 2 is added to account for the diagonalization # performed at the beginning and at the end of etdm ratio_per_iter = (t[0] / iters[0]) / (t[1] / (iters[1] + 2)) print("{}\t{}\t{}".format( len(atoms), t[0] / t[1], ratio_per_iter), flush=True, file=fd) except ConvergenceError: print("{}\t{}\t{}".format( len(atoms), None, None), flush=True, file=fd) gpaw-24.1.0/doc/documentation/do-gmf/000077500000000000000000000000001454550013000173205ustar00rootroot00000000000000gpaw-24.1.0/doc/documentation/do-gmf/H2_instability.py000066400000000000000000000026611454550013000225630ustar00rootroot00000000000000from gpaw import GPAW, LCAO from gpaw.directmin.derivatives import Davidson from ase import Atoms calc = GPAW(xc='PBE', mode=LCAO(), h=0.2, basis='dzp', spinpol=True, eigensolver='etdm-lcao', occupations={'name': 'fixed-uniform'}, mixer={'backend': 'no-mixing'}, nbands='nao', symmetry='off', txt='H2_GS.txt') atoms = Atoms('H2', positions=[(0, 0, 0), (0, 0, 2.0)]) atoms.center(vacuum=5.0) atoms.set_pbc(False) atoms.calc = calc # Ground state calculation E_GS_spin_symmetric = atoms.get_potential_energy() # Stability analysis using the generalized Davidson method davidson = Davidson(calc.wfs.eigensolver, 'davidson_H2_S.txt', seed=42) davidson.run(calc.wfs, calc.hamiltonian, calc.density) # Break the instability by displacing along the eigenvector of the electronic # Hessian corresponding to the negative eigenvalue C_ref = [calc.wfs.kpt_u[x].C_nM.copy() for x in range(len(calc.wfs.kpt_u))] davidson.break_instability(calc.wfs, n_dim=[10, 10], c_ref=C_ref, number=1) # Reconverge the electronic structure calc.calculate(properties=['energy'], system_changes=['positions']) E_GS_broken_spin_symmetry = atoms.get_potential_energy() # Repeat stability analysis to confirm that a minimum was found davidson = Davidson(calc.wfs.eigensolver, 'davidson_H2_BS.txt', seed=42) davidson.run(calc.wfs, calc.hamiltonian, calc.density) gpaw-24.1.0/doc/documentation/do-gmf/N-Phenylpyrrole.xyz000066400000000000000000000033031454550013000231420ustar00rootroot0000000000000020 Lattice="20.27909865320759 0.0 0.0 0.0 20.194227899817882 0.0 0.0 0.0 24.238478373740524" Properties=species:S:1:pos:R:3:magmoms:R:1 energy=-128.80117597214567 dipole="6.016048366334884e-14 8.648069526606396e-14 0.45606466144580454" magmom=-1.6765558975898102e-14 free_energy=-128.80117597214567 pbc="F F F" C 10.13954933 10.09711395 12.38421108 -0.00000000 C 11.34646202 10.09711395 13.07460199 0.00000000 C 8.93263664 10.09711395 13.07460199 -0.00000000 C 10.13954933 10.09711395 15.16101197 -0.00000000 C 11.34465495 10.09711395 14.46510328 -0.00000000 C 8.93444370 10.09711395 14.46510328 0.00000000 C 10.13954933 11.21630306 10.16939448 -0.00000000 C 10.13954933 8.97792484 10.16939448 -0.00000000 C 10.13954933 10.80761594 8.85675649 0.00000000 C 10.13954933 9.38661196 8.85675649 0.00000000 N 10.13954933 10.09711395 10.96513494 0.00000000 H 12.26979504 10.09711395 12.51888639 -0.00000000 H 8.00930362 10.09711395 12.51888639 -0.00000000 H 12.27909865 10.09711395 15.00190089 -0.00000000 H 8.00000000 10.09711395 15.00190089 -0.00000000 H 10.13954933 10.09711395 16.23847837 0.00000000 H 10.13954933 12.19422790 10.60962753 -0.00000000 H 10.13954933 8.00000000 10.60962753 -0.00000000 H 10.13954933 11.45309491 8.00000000 -0.00000000 H 10.13954933 8.74113299 8.00000000 -0.00000000 gpaw-24.1.0/doc/documentation/do-gmf/agts.py000066400000000000000000000035641454550013000206400ustar00rootroot00000000000000from pathlib import Path from myqueue.workflow import run def workflow(): with run(script='H2_instability.py', cores=8): run(function=check_instability) with run(script='tPP.py', cores=8, tmax='1h'): run(function=check_tPP) with run(script='ethylene.py', cores=8): run(function=check_ethylene) def check_ethylene(): text = Path('Ethylene_EX_DO-GMF.txt').read_text() for line in text.splitlines(): if line.startswith('Extrapolated:'): gmf = float(line.split()[-1]) assert abs(gmf + 18.783) < 0.01 def check_instability(): get = 10 text = Path('davidson_H2_S.txt').read_text() for line in text.splitlines(): if line.startswith('Eigenvalues:'): get = 0 continue get += 1 if get == 3: temp = line.split() eigv_s = [float(temp[0]), float(temp[1])] if get == 8: temp = line.split() if temp[0][-1] == 'c' and temp[1][-1] == 'c': break get = 10 text = Path('davidson_H2_BS.txt').read_text() for line in text.splitlines(): if line.startswith('Eigenvalues:'): get = 0 continue get += 1 if get == 3: temp = line.split() eigv_bs = [float(temp[0]), float(temp[1])] if get == 8: temp = line.split() if temp[0][-1] == 'c' and temp[1][-1] == 'c': break assert abs(eigv_s[0] + 0.118) < 0.01 assert abs(eigv_s[1] - 0.891) < 0.01 assert abs(eigv_bs[0] - 0.198) < 0.01 assert abs(eigv_bs[1] - 0.492) < 0.01 def check_tPP(): text = Path('N-Phenylpyrrole_EX_DO-GMF.txt').read_text() for line in text.splitlines(): if line.startswith('Dipole moment:'): gmf = float(line.split()[-2].replace(')', '')) assert abs(gmf * 4.803 + 10.227) < 0.01 gpaw-24.1.0/doc/documentation/do-gmf/do-gmf.rst000066400000000000000000000324501454550013000212270ustar00rootroot00000000000000.. _do-gmf: ================================================================================== Excited State Calculations with Direct Optimization and Generalized Mode Following ================================================================================== The direct optimization generalized mode following (DO-GMF) method can be used to perform variational calculations of excited electronic states, where, contrary to :ref:`linear response TDDFT `, the orbitals are variationally optimized for the excited state. The main challenge of variational density functional calculations of excited states is that excited states often correspond to saddle points on the surface describing the variation of the energy as a function of the electronic degrees of freedom (the orbital variations). :ref:`Standard self-consistent field (SCF) algorithms ` typically perform well in ground state calculations, as the latter is a minimum of the energy, but face convergence issues in excited state calculations. As an alternative, direct optimization (DO) approaches can be used, which have been found to converge more robustly than the standard eigensolvers for excited states, especially in the vicinity of electronic degeneracies. One option is to use quasi-Newton algorithms that can converge to saddle points of arbitrary order in conjunction with the :ref:`maximum overlap method (MOM) `, which can reduce the risk of converging to a minimum or lower-energy saddle point (variational collapse). This is the DO-MOM method implemented in GPAW and illustrated :ref:`here `. However, DO-MOM can still be affected by variational collapse in challenging cases. GPAW also implements an alternative DO approach using a generalized mode following (GMF) method. DO-GMF targets a stationary solution with a specific saddle point order and is more robust than both DO-MOM and the standard SCF algorithms, while being inherently free from variational collapse. On the other hand, DO-GMF has a bigger computational cost than DO-MOM, because it requires more energy/gradient evaluations per iteration due to the partial diagonalization of the Hessian. -------------------------- Generalized mode following -------------------------- ~~~~~~~~~~~~~~ Implementation ~~~~~~~~~~~~~~ The implementation of the DO-GMF method is presented in [#dogmfgpaw1]_. For the moment, the method can be used only in the LCAO mode. GMF is a generalization of the minimum mode following method traditionally used to optimize first-order saddle points on the potential energy surface for atomic rearrangements. The method recasts the challenging saddle point search as a minimization by inverting the projection of the gradient on the lowest eigenmode of the Hessian. It is generalized to target an `n`-th-order saddle point on the electronic energy surface by inverting the projections on the eigenmodes, `v_i`, of the electronic Hessian corresponding to the `n` lowest eigenvalues, yielding the modified gradient .. math:: g^{\mathrm{\,mod}} = g - 2\sum_{i = 1}^{n}v_{i}v_{i}^{\mathrm{T}}g if the energy surface is concave along all target eigenvectors, or .. math:: g^{\mathrm{\,mod}} = -\sum_{i = 1, \lambda_{i} > 0}^{n}v_{i}v_{i}^{\mathrm{T}}g if any target eigenvalue, `\lambda_i`, is positive. Notice that in the latter case only the target eigenvectors along which the energy surface is convex are followed to increase stability of the method. The target eigenvalues and eigenvectors of the electronic Hessian matrix are obtained by using a finite difference generalized Davidson method [#gendavidson]_. This method can also be used for stability analysis of an electronic solution (see :ref:`stabanalysisexample` below). ~~~~~~~~~~~~~~~~~ How to use DO-GMF ~~~~~~~~~~~~~~~~~ To provide initial guess orbitals for an excited state DO-GMF calculation, a ground state calculation is typically performed first. Then, a DO-GMF calculation can be requested as follows:: from gpaw.directmin.lcao_etdm import LCAOETDM calc.set(eigensolver=LCAOETDM( partial_diagonalizer={'name': 'Davidson', 'logfile': None}, linesearch_algo={'name': 'max-step'}, searchdir_algo={'name': 'l-bfgs-p_gmf'}, need_init_orbs=False), occupations={'name': 'mom', 'numbers': f, 'use_fixed_occupations': True}) where a log file for the partial Hessian diagonalization can be specified and ``f`` contains the occupation numbers of the excited state (see :ref:`ethyleneexample` and :ref:`tPPexample`). Line search algorithms cannot be applied for saddle point searches, so a maximum step length is used. Any of the search direction algorithms implemented in GPAW (see :ref:`directmin`) can be used by appending ``_gmf`` to the ``name`` keyword of the ETDM search direction algorithms (e.g. specify ``l-bfgs-p_gmf`` to use the ``l-bfgs-p`` search direction with GMF). A helper function can be used to create the list of excited state occupation numbers:: from gpaw.directmin.tools import excite f = excite(calc, i, a, spin=(si, sa)) which will promote an electron from occupied orbital ``i`` in spin channel ``si`` to unoccupied orbital ``a`` in spin channel ``sa`` (the index of HOMO and LUMO is 0). For example, ``excite(calc, -1, 2, spin=(0, 1))`` will remove an electron from the HOMO-1 in spin channel 0 and add an electron to LUMO+2 in spin channel 1. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Estimating the saddle point order of the target excited state ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The DO-GMF method requires an estimation of the saddle point order of the sought excited state ahead of the actual calculation. GPAW estimates the saddle point order at the initial guess using the following efficient diagonal approximation of the electronic Hessian: .. math:: :label: eq:hessapprox \mathscr{H}_{ijij} \approx 2\left(f_{j} - f_{i}\right)\left(\epsilon_{i} - \epsilon_{j}\right) where `f_{i}` and `\epsilon_{i}` are the orbital occupation numbers and energies of the initial guess orbitals, respectively. This approximation gives one negative eigenvalue for each pair of occupied-unoccupied orbitals where the unoccupied orbital has lower energy than the occupied one. For example, for a calculation initialized from an excitation from the ground state HOMO to the ground state LUMO + 1, there will be two unoccupied orbitals (ground state HOMO and LUMO) lower in energy than an occupied orbital (ground state LUMO + 1) and therefore the estimated saddle point order is 2. This is usually a good estimation for low-lying valence and Rydberg excitations. For excitations involving significant charge transfer (see :ref:`tPPexample`), the energy ordering of the orbitals of the converged solution can differ from the order of the initial guess orbitals. In such cases, the diagonal Hessian approximation at the initial guess does not provide a good enough estimation of the saddle point order. As shown in :ref:`tPPexample`, a better estimation is given by first performing a constrained optimization with DO-MOM and then evaluating the saddle point order using either the diagonal approximation of the Hessian or partial diagonalization of the full Hessian (the latter is preferred). This can be done using:: from gpaw.directmin.derivatives import Davidson davidson = Davidson(calc.wfs.eigensolver, eps=1e-2, seed=42) appr_sp_order = davidson.estimate_sp_order(calc, method='full-hess', target_more=3) The estimated saddle point order then needs to be specified when requesting a DO-GMF calculation:: from gpaw.directmin.lcao_etdm import LCAOETDM calc.set(eigensolver=LCAOETDM( partial_diagonalizer={'name': 'Davidson', 'sp_order': appr_sp_order}, ...) .. _ethyleneexample: ------------------------------------------- Example I: Doubly excited state of ethylene ------------------------------------------- In this example, the lowest doubly excited state of ethylene is obtained with the DO-GMF method. First, a ground state calculation is performed and then the DO-GMF calculation is initialized by promoting one electron from the HOMO to the LUMO in both spin channels simultaneously. According to the diagonal Hessian approximation, eq. :any:`eq:hessapprox`, the excited state is targeted as a second-order saddle point on the electronic energy surface. .. literalinclude:: ethylene.py It is recommended to deactivate updates of the reference orbitals by setting the ``update_ref_orbs_counter`` keyword to a large value (e.g. 1000). The unitary invariant representation should be used (if the density functional is orbital density independent) because the redundant rotations among the occupied orbitals introduce many degenerate eigenvectors of the electronic Hessian with zero curvature, which can lead to convergence problems of the generalized Davidson method. The keyword ``use_fixed_occupations`` is set to ``True`` to deactivate the use of the maximum overlap method, which is not needed here because variational collapse is impossible with the DO-GMF method. .. _tPPexample: ------------------------------------------------------------ Example II: Charge transfer excited state of N-phenylpyrrole ------------------------------------------------------------ In this example, a charge transfer excited state of the N-phenylpyrrole molecule is calculated using the DO-GMF method. Since the target state is open-shell, the calculation gives the energy of a mixed-spin solution. The energy of the mixed-spin solution can be purified as shown in :ref:`h2oexample`, but this is not done in this example. The excited state calculation is initialized by a single electron excitation from the HOMO to the LUMO in one spin channel using the ground state orbitals. This target saddle point order cannot be estimated using eq. :any:`eq:hessapprox` because the charge transfer excitation leads to a large energetic rearrangement of the orbitals. To take this energetic rearrangement into account and achieve a better estimation of the saddle point order, we first perform a constrained optimization with DO-MOM freezing the hole and excited electron and minimizing all other electronic degrees of freedom (see also :ref:`directopt`). Then, the saddle point order is estimated from partial diagonalization of the full Hessian. .. literalinclude:: estimate_sp_order.py The saddle point order estimated by partial diagonalization of the Hessian is 9. However, closer inspection of the negative eigenvalues from the log file of the Davidson calculation reveals that two of them are significantly closer to 0 than the others, pointing towards a target saddle point order closer to 7 rather than 9. It is then recommended to perform three trial calculations targeting saddle points of order `n-1`, `n` and `n+1`, respectively, where `n` is the estimated approximate saddle point order (here 7). Finally, the wanted excited state solution needs to be identified by inspecting the character of each of the calculated solutions. Below we target with DO-GMF a sixth-order saddle point only, corresponding to the calculation with `n-1`, because the wanted solution has been previously identified to be a sixth-order saddle point. .. literalinclude:: tPP.py DO-GMF converges to a sixth-order saddle point with a dipole moment of -10.227 D consistent with the charge transfer character of the wanted excited state. Note that an unconstrained optimization of this excited state with DO-MOM starting form an initial guess made of ground state orbitals leads to variational collapse to a lower-energy saddle point with pronounced mixing between the HOMO and LUMO and a small dipole moment of -3.396 D [#dogmfgpaw1]_. .. _stabanalysisexample: ----------------------------------------------------------------------------------- Example III: Stability analysis and breaking instability of ground state dihydrogen ----------------------------------------------------------------------------------- In this example, the generalized Davidson method is used for stability analysis of the ground state of the dihydrogen molecule. The molecule is stretched beyond the Coulson-Fischer point, at which both a ground state solution with conserved symmetry and two lower-energy degenerate ground state solutions with broken spin symmetry exist. First, a spin-polarized direct minimization is performed starting from the GPAW default initial guess for the orbitals. Stability analysis confirms that the obtained solution is a first-order saddle point on the electronic energy surface, meaning that the symmetry-conserving solution is obtained. Second, the electronic structure is displaced along the eigenvector of the electronic Hessian corresponding to its lowest, negative eigenvalue, and thereby, the instability is broken. This displaced electronic structure is reoptimized yielding a lower-energy solution with broken spin symmetry. Stability analysis is applied to this solution to confirm that it is a minimum on the electronic energy surface. .. literalinclude:: H2_instability.py ---------- References ---------- .. [#dogmfgpaw1] Y. L. A. Schmerwitz, G. Levi, H. Jónsson :doi:`Calculations of Excited Electronic States by Converging on Saddle Points Using Generalized Mode Following <10.48550/ARXIV.2302.05912>`, (2023). .. [#gendavidson] M. Crouzeix, B. Philippe, M. Sadkane :doi:`The Davidson Method <10.1137/0915004>`, *SIAM J. Sci. Comput.*, (1994). gpaw-24.1.0/doc/documentation/do-gmf/estimate_sp_order.py000066400000000000000000000030111454550013000233750ustar00rootroot00000000000000from ase.io import read from gpaw import GPAW, LCAO from gpaw.mom import prepare_mom_calculation from gpaw.directmin.tools import excite from gpaw.directmin.etdm_lcao import LCAOETDM from gpaw.directmin.derivatives import Davidson calc = GPAW(xc='PBE', mode=LCAO(), h=0.2, basis='dzp', spinpol=True, eigensolver={'name': 'etdm-lcao', 'representation': 'u-invar'}, occupations={'name': 'fixed-uniform'}, mixer={'backend': 'no-mixing'}, nbands='nao', symmetry='off', txt='N-Phenylpyrrole_GS.txt') atoms = read('N-Phenylpyrrole.xyz') atoms.center(vacuum=5.0) atoms.set_pbc(False) atoms.calc = calc # Ground state calculation E_GS = atoms.get_potential_energy() h = 26 # Hole p = 27 # Excited electron # Constrained optimization freezing hole and excited electron calc.set(eigensolver=LCAOETDM(constraints=[[[h], [p]], []], need_init_orbs=False), txt='N-Phenylpyrrole_EX_constrained.txt') # Spin-mixed open-shell occupation numbers f = excite(calc, 0, 0, spin=(0, 0)) # Direct optimization maximum overlap method calculation prepare_mom_calculation(calc, atoms, f) E_EX_constrained = atoms.get_potential_energy() # Stability analysis using the generalized Davidson method davidson = Davidson( calc.wfs.eigensolver, 'davidson_tPP_constrained.txt', eps=1e-2, seed=42) appr_sp_order = davidson.estimate_sp_order( calc, method='full-hess', target_more=3) print(appr_sp_order) gpaw-24.1.0/doc/documentation/do-gmf/ethylene.py000066400000000000000000000030611454550013000215070ustar00rootroot00000000000000from ase.io import read from gpaw import GPAW, LCAO from gpaw.directmin.etdm_lcao import LCAOETDM from gpaw.directmin.tools import excite calc = GPAW(xc='PBE', mode=LCAO(), h=0.2, basis='dzp', spinpol=True, eigensolver='etdm-lcao', occupations={'name': 'fixed-uniform'}, mixer={'backend': 'no-mixing'}, nbands='nao', symmetry='off', txt='Ethylene_GS.txt') atoms = read('ethylene.xyz') atoms.center(vacuum=5.0) atoms.set_pbc(False) atoms.calc = calc # Ground state calculation E_GS = atoms.get_potential_energy() # Occupation numbers for double LUMO<-HOMO excitation in both spin channels f0 = excite(calc, 0, 0, spin=(0, 0)) f1 = excite(calc, 0, 0, spin=(1, 1)) f = [f0[0], f1[1]] # Direct approach using ground state orbitals with changed occupation numbers calc.set(eigensolver=LCAOETDM(searchdir_algo={'name': 'l-bfgs-p_gmf'}, linesearch_algo={'name': 'max-step'}, partial_diagonalizer={ 'name': 'Davidson', 'logfile': 'davidson_ethylene.txt', 'seed': 42}, update_ref_orbs_counter=1000, representation='u-invar', need_init_orbs=False), occupations={'name': 'mom', 'numbers': f, 'use_fixed_occupations': True}, txt='Ethylene_EX_DO-GMF.txt') E_EX = atoms.get_potential_energy() gpaw-24.1.0/doc/documentation/do-gmf/ethylene.xyz000066400000000000000000000005071454550013000217130ustar00rootroot000000000000006 C 10.57612940 10.92886724 10.00012352 C 11.91481242 10.92869525 10.00001619 H 10.00013211 9.99998861 10.00005100 H 9.99990807 11.85768578 10.00005093 H 12.49103480 9.99987695 10.00008869 H 12.49080945 11.85757444 10.00008877 gpaw-24.1.0/doc/documentation/do-gmf/tPP.py000066400000000000000000000030441454550013000203760ustar00rootroot00000000000000from ase.io import read from gpaw import GPAW, LCAO from gpaw.directmin.etdm_lcao import LCAOETDM from gpaw.directmin.tools import excite calc = GPAW(xc='PBE', mode=LCAO(), h=0.2, basis='dzp', spinpol=True, eigensolver='etdm-lcao', occupations={'name': 'fixed-uniform'}, mixer={'backend': 'no-mixing'}, nbands='nao', symmetry='off', txt='N-Phenylpyrrole_GS.txt') atoms = read('N-Phenylpyrrole.xyz') atoms.center(vacuum=5.0) atoms.set_pbc(False) atoms.calc = calc # Ground state calculation E_GS = atoms.get_potential_energy() # Spin-mixed open-shell occupation numbers f = excite(calc, 0, 0, spin=(0, 0)) # Direct approach using ground state orbitals with changed occupation numbers calc.set(eigensolver=LCAOETDM(searchdir_algo={'name': 'l-bfgs-p_gmf'}, linesearch_algo={'name': 'max-step'}, partial_diagonalizer={ 'name': 'Davidson', 'logfile': 'davidson_tPP.txt', 'sp_order': 6, 'seed': 42}, update_ref_orbs_counter=1000, representation='u-invar', need_init_orbs=False), occupations={'name': 'mom', 'numbers': f, 'use_fixed_occupations': True}, txt='N-Phenylpyrrole_EX_DO-GMF.txt') E_EX = atoms.get_potential_energy() gpaw-24.1.0/doc/documentation/documentation.rst000066400000000000000000000003271454550013000215540ustar00rootroot00000000000000.. _documentation: ============= Documentation ============= .. toctree:: :maxdepth: 1 basic_usage advanced theory core cmdline gpu utilities/utilities reports_presentations_and_theses gpaw-24.1.0/doc/documentation/dscf/000077500000000000000000000000001454550013000170665ustar00rootroot00000000000000gpaw-24.1.0/doc/documentation/dscf/co.py000066400000000000000000000026501454550013000200440ustar00rootroot00000000000000 from ase.build import molecule from gpaw import GPAW from gpaw import dscf # Ground state calculation calc = GPAW(mode='fd', nbands=8, h=0.2, xc='PBE', spinpol=True, convergence={'energy': 100, 'density': 100, 'eigenstates': 1.0e-9, 'bands': -1}) CO = molecule('CO') CO.center(vacuum=3) CO.calc = calc E_gs = CO.get_potential_energy() # Obtain the pseudowavefunctions and projector overlaps of the # state which is to be occupied. n=5,6 is the 2pix and 2piy orbitals n = 5 molecule = [0, 1] wf_u = [kpt.psit_nG[n] for kpt in calc.wfs.kpt_u] p_uai = [dict([(molecule[a], P_ni[n]) for a, P_ni in kpt.P_ani.items()]) for kpt in calc.wfs.kpt_u] # Excited state calculation calc_es = GPAW(mode='fd', nbands=8, h=0.2, xc='PBE', spinpol=True, convergence={'energy': 100, 'density': 100, 'eigenstates': 1.0e-9, 'bands': -1}) CO.calc = calc_es lumo = dscf.AEOrbital(calc_es, wf_u, p_uai) # lumo = dscf.MolecularOrbital(calc, weights={0: [0, 0, 0, 1], # 1: [0, 0, 0, -1]}) dscf.dscf_calculation(calc_es, [[1.0, lumo, 1]], CO) E_es = CO.get_potential_energy() print('Excitation energy: ', E_es - E_gs) gpaw-24.1.0/doc/documentation/dscf/dscf.rst000066400000000000000000000110401454550013000205330ustar00rootroot00000000000000.. _dscf: =========================== Delta Self-Consistent Field =========================== -------------------------------------------- Linear expansion Delta Self-Consistent Field -------------------------------------------- The method of linear expansion Delta Self-Consistent Field \ [#delscf]_ adds the density of a specified orbital `\varphi_a(r)` to the total density in each step of the self-consistency cycle. The extra charge is usually taken from the fermi level to keep the system neutral: .. math:: n(r) = \sum_nf_{N-1}(T,\varepsilon_n)|\varphi_n(r)|^2 + |\varphi_a(r)|^2. with `N` being the total number of electrons and `f_{N-1}(T,\varepsilon_n)` is the Fermi-Dirac distribution of the `N-1` electron system . To get the band energy right `\varphi_a(r)` needs to be expanded in Kohn-Sham orbitals: .. math:: |\varphi_a\rangle = \sum_nc_{na}|\varphi_n\rangle, \qquad c_{na} = \langle\varphi_n|\varphi_a\rangle and the band energy of the orbital becomes .. math:: \varepsilon_a = \sum_n|c_{na}|^2\varepsilon_n. The method is a generalization of traditional Delta Self-Consistent Field where only the occupation numbers are modified and it will reduce to that, if only one (normalized) term is included in the expansion of `\varphi_a(r)`. ---------------- Simple molecules ---------------- The example below calculates the excitation energy of the `5\sigma\rightarrow2\pi` transition in CO. We only specify that the `2\pi` orbital should be occupied ([[1.0, lumo, 1]] means 1.0 electrons in lumo with spin 1) and the method will take the electron from highest occupied orbital which in this case is `5\sigma`. The lumo is an instance of the class AEOrbital which calculates the expansion of the saved `2\pi` state in each iteration step. In order to obtain the all-electron overlaps `\langle\varphi_n|2\pi\rangle` we need to supply the projector overlaps in addition to the pseudowavefunction. Exciting the LUMO in CO (:download:`co.py`): .. literalinclude:: co.py The commented lines ``lumo = dscf.Molecular...`` uses another class to specify the `2\pi` orbital of CO which does not require a ground state calculation of the molecule. In the simple example above the two methods give identical results, but for more complicated systems the AEOrbital class should be used \ [#des]_. When using the AEOrbital class a new calculator object must be constructed for the dscf calculation. In the example above we only specify a single state, but the function ``dscf.dscf_calculation`` takes a list of orbitals as input and we could for example have given the argument [[1.0, lumo, 1], [-1.0, pi, 0]] which would force the electron to be taken from the `\pi` orbital with spin 0. The pi should of course be another instance of the AEOrbital class. --------------------- Exciting an adsorbate --------------------- The method of linear expansion Delta Self-Consistent Field was designed for calculations with strongly hybridized orbitals. For example molecules chemisorbed on transition metals. In such cases the traditional Delta Self-Consistent Field breaks down since the orbital to be occupied is no longer well described by a single Kohn-Sham state. The script :git:`~doc/documentation/dscf/homo.py` calculates the HOMO energy of CO adsorbed on-top Pt(111). The script starts from scratch, but usually one would start from an optimized configuration saved in a file ``gs.gpw``. The script only calculates the total energy of the excited state so the excitation energy is obtained as the difference between ground and excited state energies. First a calculation of gas-phase CO is performed and the HOMO pseudo-wavefunctions and the projector overlaps are saved. The energy range [-100.0, 0.0] means we only include states below the Fermi level (default is states above). The script :git:`~doc/documentation/dscf/lumo.py` calculates the LUMO energy of the same system, but is slightly more complicated due to the degeneracy of the `2\pi` orbital. We would like to occupy the `2\pi_y` orbital and we need to figure out which band (5 or 6) this orbital corresponds to in each k-point before we start the slab calculation. .. [#delscf] J. Gavnholt, T. Olsen, M. Engelund and J. Schiøtz, Delta Self-Consistent Field as a method to obtain potential energy surfaces of excited molecules on surfaces, *Phys. Rev. B* **78**, 075441 (2008) .. [#des] T. Olsen, J. Gavnholt and J. Schiøtz, Hot electron mediated desorption rates calculated from excited state potential energy surfaces, *Phys. Rev. B* **79**, 035403 (2009) gpaw-24.1.0/doc/documentation/dscf/gs.xyz000066400000000000000000000017641454550013000202630ustar00rootroot0000000000000014 Pt 0.000000000000000 0.000000000000000 0.000000000000000 Pt 1.417500000000000 2.455182019728883 0.000000000000000 Pt 0.000000000000000 3.273576026305178 2.314777500000000 Pt 1.417500000000000 0.818394006576295 2.314777500000000 Pt 0.000000000000000 1.636788013152589 4.641355000000001 Pt 1.417500000000000 4.091970032881473 4.641355000000001 Pt 2.835000000000000 0.000000000000000 0.000000000000000 Pt 4.252500000000000 2.455182019728883 0.000000000000000 Pt 2.835000000000000 3.273576026305178 2.314777500000000 Pt 4.252500000000000 0.818394006576295 2.314777500000000 Pt 2.835000000000000 1.636788013152589 4.641355000000001 Pt 4.252500000000000 4.091970032881473 4.641355000000001 C 1.417500015396857 4.092053726367659 6.510235842827654 O 1.417500024461528 4.091934739974321 7.668105648600448 gpaw-24.1.0/doc/documentation/dscf/homo.py000066400000000000000000000032701454550013000204040ustar00rootroot00000000000000from ase.visualize import view from ase.build import fcc111, add_adsorbate from gpaw import GPAW from gpaw.mixer import MixerSum import gpaw.dscf as dscf filename = 'homo' c_mol = GPAW(mode='fd', nbands=9, h=0.2, xc='RPBE', kpts=(8, 6, 1), spinpol=True, convergence={'energy': 100, 'density': 100, 'eigenstates': 1.0e-9, 'bands': 'occupied'}, txt='CO_homo.txt') calc = GPAW(mode='fd', nbands=80, h=0.2, xc='RPBE', kpts=(8, 6, 1), eigensolver='cg', spinpol=True, mixer=MixerSum(nmaxold=5, beta=0.1, weight=100), convergence={'energy': 100, 'density': 100, 'eigenstates': 1.0e-7, 'bands': -10}, txt=filename + '.txt') # Import Slab with relaxed CO slab = fcc111('Pt', size=(1, 2, 3), orthogonal=True) add_adsorbate(slab, 'C', 2.0, 'ontop') add_adsorbate(slab, 'O', 3.15, 'ontop') slab.center(axis=2, vacuum=4.0) view(slab) molecule = slab.copy() del molecule[:-2] # Molecule molecule.calc = c_mol molecule.get_potential_energy() # Homo wavefunction wf_u = [kpt.psit_nG[4] for kpt in c_mol.wfs.kpt_u] # Homo projector overlaps mol = range(len(slab))[-2:] p_uai = [dict([(mol[a], P_ni[4]) for a, P_ni in kpt.P_ani.items()]) for kpt in c_mol.wfs.kpt_u] # Slab with adsorbed molecule slab.calc = calc orbital = dscf.AEOrbital(calc, wf_u, p_uai, Estart=-100.0, Eend=0.0) dscf.dscf_calculation(calc, [[-1.0, orbital, 1]], slab) slab.get_potential_energy() gpaw-24.1.0/doc/documentation/dscf/lumo.py000066400000000000000000000044431454550013000204210ustar00rootroot00000000000000from numpy import reshape, dot from ase.visualize import view from ase.build import fcc111, add_adsorbate from gpaw import GPAW from gpaw.mixer import MixerSum import gpaw.dscf as dscf filename = 'lumo' c_mol = GPAW(mode='fd', nbands=9, h=0.2, xc='RPBE', kpts=(8, 6, 1), spinpol=True, convergence={'energy': 100, 'density': 100, 'eigenstates': 1.0e-9, 'bands': -2}, txt='CO_lumo.txt') calc = GPAW(mode='fd', nbands=80, h=0.2, xc='RPBE', kpts=(8, 6, 1), eigensolver='cg', spinpol=True, mixer=MixerSum(nmaxold=5, beta=0.1, weight=100), convergence={'energy': 100, 'density': 100, 'eigenstates': 1.0e-7, 'bands': -10}, txt=filename + '.txt') # Import Slab with relaxed CO slab = fcc111('Pt', size=(1, 2, 3), orthogonal=True) add_adsorbate(slab, 'C', 2.0, 'ontop') add_adsorbate(slab, 'O', 3.15, 'ontop') slab.center(axis=2, vacuum=4.0) view(slab) molecule = slab.copy() del molecule[:-2] # Molecule molecule.calc = c_mol molecule.get_potential_energy() # Find band corresponding to lumo lumo = c_mol.get_pseudo_wave_function(band=5, kpt=0, spin=1) lumo = reshape(lumo, -1) wf1_k = [c_mol.get_pseudo_wave_function(band=5, kpt=k, spin=1) for k in range(c_mol.wfs.kd.nibzkpts)] wf2_k = [c_mol.get_pseudo_wave_function(band=6, kpt=k, spin=1) for k in range(c_mol.wfs.kd.nibzkpts)] band_k = [] for k in range(c_mol.wfs.kd.nibzkpts): wf1 = reshape(wf1_k[k], -1) wf2 = reshape(wf2_k[k], -1) p1 = abs(dot(wf1, lumo)) p2 = abs(dot(wf2, lumo)) if p1 > p2: band_k.append(5) else: band_k.append(6) # Lumo wavefunction wf_u = [kpt.psit_nG[band_k[kpt.k]] for kpt in c_mol.wfs.kpt_u] # Lumo projector overlaps mol = range(len(slab))[-2:] p_uai = [dict([(mol[a], P_ni[band_k[kpt.k]]) for a, P_ni in kpt.P_ani.items()]) for kpt in c_mol.wfs.kpt_u] # Slab with adsorbed molecule slab.calc = calc orbital = dscf.AEOrbital(calc, wf_u, p_uai) dscf.dscf_calculation(calc, [[1.0, orbital, 1]], slab) slab.get_potential_energy() gpaw-24.1.0/doc/documentation/ehrenfest/000077500000000000000000000000001454550013000201325ustar00rootroot00000000000000gpaw-24.1.0/doc/documentation/ehrenfest/ehrenfest_theory.rst000066400000000000000000000201461454550013000242440ustar00rootroot00000000000000.. _ehrenfest_theory: ====================================== Ehrenfest dynamics (TDDFT/MD) - Theory ====================================== Molecular dynamics (MD) simulations usually rely on the Born-Oppenheimer approximation, where the electronic system is assumed to react so much faster than the ionic system that it reaches its ground state at each timestep. Thus, forces for the dynamics are calculated from the DFT groundstate density. While this approximation is sufficently valid in most situations, there are cases where the explicit dynamics of the electronic system can affect the molecular dynamics, or the movement of the atoms can affect averaged spectral or other properties. These cases can be handled using so-called Ehrenfest dynamics, ie. time-dependent density functional theory molecular dynamics (TDDFT/MD). This guide describes the basics of the Ehrenfest dynamics implementation in GPAW from a theoretical and point of view. For examples, see :ref:`Ehrenfest dynamics `. The original implementation by Ari Ojanpera is described in Ref. \ [#Ojanpera2012]_. Time-dependent DFT in the PAW formalism ======================================= In the past decades, time-dependent DFT has become a popular method for calculating materials properties related excited electronic states as well as for simulating processes, in which nonadiabatic electron-ion dynamics plays a significant role. There are two main realizations of TDDFT: the time-propagation scheme and the linear-response method. The most general realization of TDDFT is the former scheme, in which the time-dependent Kohn-Sham equations are integrated over the time domain. The starting point of time-propagation TDDFT is the all-electron time-dependent Kohn-Sham (TDKS) equation, .. math:: \begin{equation} i \frac{\partial \psi_n ({\bf r}, t)}{\partial t} = \hat {H} (t) \psi_n ({\bf r},t), \end{equation} where $\psi_n$ is the Kohn-Sham wavefunction of electronic state $n$, and $\hat{H}$ is the electronic Hamiltonian. Using the PAW approximation `\psi_n ({\bf r}, t) = \hat{\cal T} \tilde{\psi}_n ({\bf r}, t)` and operating from the left with the adjoint of the PAW operator `\hat{\cal T}^{\dagger}`, we obtain the following equation: .. math:: :label: pawt_tdks \begin{equation} i \hat{\cal T}^{\dagger} \hat{\cal T} \frac{\partial \tilde{\psi}_n ({\bf r}, t)}{\partial t} = [\hat{\cal T}^{\dagger} \hat{H} \hat{\cal T} -i \hat{\cal T}^{\dagger} \frac{\partial \hat{\cal T}} {\partial t}]\tilde{\psi}_n ({\bf r}, t). \end{equation} Next, we define the PAW Hamiltonian `\tilde{H}`, the PAW overlap operator and the `\tilde{P}` term, which corrects for the movement of the atoms in the TDKS equation, in the following manner: .. math:: \begin{align} \tilde{S} &= \hat{\cal T}^{\dagger} \hat{\cal T},\\ \tilde{H} &= \hat{\cal T}^{\dagger} \hat{H} \hat{\cal T}, \\ \tilde{P} &= -i \hat{\cal T}^{\dagger} \frac{\partial \hat{\cal T}}{\partial t}. \end{align} Using these definitions, the PAW-transformed TDKS equation (Eq. :eq:`pawt_tdks`) is reduced to a more compact form, .. math:: :label: tdks_paw \begin{equation} i \tilde{S} \frac{\partial \tilde{\psi}_n}{\partial t} = [\tilde{H} + \tilde{P}] \tilde{\psi}_n ({\bf r}, t) \end{equation} In order to solve Eq. :eq:`tdks_paw` in practice, a method called semi-implicit Crank-Nicholson (SICN) is used in GPAW. The Crank-Nicholson propagator is often used in time-propagation TDDFT calculations, since it is both unitary and time-reversible. Semi-implicit means that a predictor-corrector scheme is used for accounting for the non-linearity of the Hamiltonian. At each time step, one first assumes the Hamiltonian to be constant during the time step, and solves the predictor equation to obtain the predicted future wavefunctions, .. math:: \begin{equation} [\tilde{S} + i \frac{\Delta t}{2} (\tilde{H} (t) + \tilde{P})] \tilde{\psi}^{\text{pred}} (t + \Delta t) = [\tilde{S} - i \frac{\Delta t}{2}(\tilde{H} (t) + \tilde{P}) \tilde{\psi}(t), \end{equation} where the position-dependence of `\psi_n` is dropped for convenience. Note that `\tilde{S}` and `\tilde{P}` do not depend explicitly on time, but instead through the atomic positions and velocities. The predicted Hamiltonian `\tilde{H}^{\text{pred}}` is calculated from the predicted wavefunctions. The Hamiltonian in the middle of the time step is obtained by taking the average .. math:: \begin{equation} \tilde{H}(t + \Delta t/2) = \frac{1}{2} [\tilde{H}(t) + \tilde{H}^{\text{pred}} (t + \Delta t)]. \end{equation} Finally, the propagated wavefunctions are obtained from the corrector equation, .. math:: \begin{equation} [\tilde{S} + i \frac{\Delta t}{2} (\tilde{H} (t + \Delta t/2) + \tilde{P})] \tilde{\psi} (t + \Delta t) = [\tilde{S} - i \frac{\Delta t}{2}(\tilde{H} (t + \Delta t/2) + \tilde{P}) \tilde{\psi}(t). \end{equation} Time-propagation of the electron-ion system =========================================== However, this only covers the propagation of the electronic subsystem. In order to propagate the coupled electron-ion system, the following splitting of electronic and nuclear propagation is employed, .. math:: :label: uen \begin{equation} \hat{U}_{N,e} = \hat{U}_N (t, t + \Delta t/2) \hat{U}_e (t + \Delta t) \hat{U}_N (t + \Delta t/2, t + \Delta t), \end{equation} where the propagator for the nuclei (`U_N`) is the Velocity Verlet algorithm. In practice, Eq. (:eq:`uen`) means that the nuclei are first propagated forward by `\Delta t/2`, while the electronic subsystem is kept unchanged. Then, the positions of the nuclei remain fixed, while the electronic subsystem is propagated by `\Delta t`. Finally, the nuclei are propagated by `\Delta t/2`. The following five-step scheme describes the propagation of electrons and nuclei in the GPAW implementation of Ehrenfest dynamics: .. math:: \begin{align} \ddot{\bf R}(t) &= \frac{\mathbf{F}(\mathbf{R}(t), n (t))}{M} \\ \mathbf{R} (t + \Delta t/2) &= \mathbf{R}(t) + \dot{\bf R} (t) \frac{\Delta t}{2} + \frac{1}{2} \ddot{\bf R}(t) \left(\frac{\Delta t}{2}\right)^2 \\ \dot{\bf R}(t+ \Delta t/4) &= \dot{\bf R}(t) + \frac{1}{2} \ddot{\bf R}(t) \frac{\Delta t}{2} \end{align} | .. math:: \begin{align} \ddot{\bf R} (t + \Delta t/2) &= \frac{\mathbf{F} (\mathbf{R}(t+ \Delta t /2), n(t))}{M} \\ \dot{\bf R} (t + \Delta t/2) &= \dot{\bf R} (t + \Delta t /4) + \frac{1}{2} \ddot{\bf R} (t + \Delta t/2) \frac{\Delta t}{2} \end{align} | .. math:: \begin{align} \tilde{\psi}_n(t + \Delta t; {\bf R} (t+ \Delta t/2)) = \hat{U}^{\text{SICN}} (t, t+\Delta t) \tilde{\psi}_n (t; {\bf R} (t+ \Delta t/2)) \end{align} | .. math:: \begin{align} \ddot{\bf R}(t + \Delta t/2) &= \frac{\mathbf{F}( \mathbf{R}(t+\Delta t/2), n(t+\Delta t))}{M} \\ \mathbf{R}(t + \Delta t) = \mathbf{R}(t+\Delta t/2) &+ \dot{\bf R}(t + \Delta t/2) \frac{\Delta t}{2} + \frac{1}{2} \ddot{\bf R}(t+\Delta t/2) \left( \frac{\Delta t}{2}\right)^2\\ \dot{\bf R}(t+ 3\Delta t/4) &= \dot{\bf R}(t + \Delta t/2) + \frac{1}{2} \ddot{\bf R}(\Delta t/2) \frac{\Delta t}{2} \end{align} .. math:: \begin{align} &\dot{\bf R} ( t+ \Delta t) = \dot{\bf R}(t+ 3\Delta t/4) + \frac{1}{2} \ddot{\bf R}(t+ \Delta t) \frac{\Delta t}{2} \\ &\text{update } n (t + \Delta t, {\bf R} (t + \Delta t/2)) \rightarrow n (t + \Delta t, {\bf R}(t + \Delta t)), \end{align} where `{\bf R}`, `M` and `{\bf F}` denote the positions of the nuclei, atomic masses and atomic forces, respectively, and `n` denotes the electron density. Calculation of the atomic forces is tricky in PAW-based Ehrenfest dynamics due to the atomic position-dependent PAW transformation. In the GPAW program the force is derived on the grounds the total energy of the quantum-classical system is conserved. The atomic forces in Ehrenfest dynamics are thoroughly analysed and explained in Ref. [#Ojanpera2012]_. References ========== .. [#Ojanpera2012] A. Ojanpera, V. Havu, L. Lehtovaara, M. Puska, "Nonadiabatic Ehrenfest molecular dynamics within the projector augmented-wave method", *J. Chem. Phys.* **136**, 144103 (2012). gpaw-24.1.0/doc/documentation/eigenvalues_of_core_states.rst000066400000000000000000000035131454550013000242710ustar00rootroot00000000000000.. _eigenvalues_of_core_states: ========================== Eigenvalues of core states ========================== Calculating eigenvalues for core states can be useful for XAS, XES and core-level shift calculations. The eigenvalue of a core state `k` with a wave function `\phi_k^a(\mathbf{r})` located on atom number `a`, can be calculated using this formula: .. math:: \epsilon_k = \frac{\partial E}{\partial f_k} = \frac{\partial}{\partial f_k}(\tilde{E} - \tilde{E}^a + E^a), where `f_k` is the occupation of the core state. When `f_k` is varied, `Q_L^a` and `n_c^a(r)` will also vary: .. math:: \frac{\partial Q_L^a}{\partial f_k} = \int d\mathbf{r} Y_{00} [\phi_k^a(\mathbf{r})]^2 \delta_{\ell,0} = Y_{00}, .. math:: \frac{\partial n_c^a(r)}{\partial f_k} = [\phi_k^a(\mathbf{r})]^2. Using the PAW expressions for the :ref:`energy contributions`, we get: .. math:: \frac{\partial \tilde{E}}{\partial f_k} = Y_{00} \int d\mathbf{r} \int d\mathbf{r}' \frac{\tilde{\rho}(\mathbf{r}') \hat{g}_{00}^a(\mathbf{r} - \mathbf{R}^a)} {|\mathbf{r} - \mathbf{r}'|} = Y_{00} \int d\mathbf{r} \tilde{v}_H(\mathbf{r}) \hat{g}_{00}^a(\mathbf{r} - \mathbf{R}^a), .. math:: \frac{\partial \tilde{E}^a}{\partial f_k} = Y_{00} \int_{rPhys. Rev. B 6, 4370-4379 (1972)" COMMENTS: "Room temperature" DATA: - type: tabulated nk data: | 0.1879 1.28 1.188 0.1916 1.32 1.203 0.1953 1.34 1.226 0.1993 1.33 1.251 0.2033 1.33 1.277 0.2073 1.30 1.304 0.2119 1.30 1.350 0.2164 1.30 1.387 0.2214 1.30 1.427 0.2262 1.31 1.460 0.2313 1.30 1.497 0.2371 1.32 1.536 0.2426 1.32 1.577 0.2490 1.33 1.631 0.2551 1.33 1.688 0.2616 1.35 1.749 0.2689 1.38 1.803 0.2761 1.43 1.847 0.2844 1.47 1.869 0.2924 1.49 1.878 0.3009 1.53 1.889 0.3107 1.53 1.893 0.3204 1.54 1.898 0.3315 1.48 1.883 0.3425 1.48 1.871 0.3542 1.50 1.866 0.3679 1.48 1.895 0.3815 1.46 1.933 0.3974 1.47 1.952 0.4133 1.46 1.958 0.4305 1.45 1.948 0.4509 1.38 1.914 0.4714 1.31 1.849 0.4959 1.04 1.833 0.5209 0.62 2.081 0.5486 0.43 2.455 0.5821 0.29 2.863 0.6168 0.21 3.272 0.6595 0.14 3.697 0.7045 0.13 4.103 0.7560 0.14 4.542 0.8211 0.16 5.083 0.8920 0.17 5.663 0.9840 0.22 6.350 1.0880 0.27 7.150 1.2160 0.35 8.145 1.3930 0.43 9.519 1.6100 0.56 11.21 1.9370 0.92 13.78 gpaw-24.1.0/doc/documentation/electrodynamics/electrodynamics.rst000066400000000000000000000012401454550013000252500ustar00rootroot00000000000000.. _electrodynamics: ========================= Classical electrodynamics ========================= GPAW can perform classical electrodynamics simulations using quasistatic finite-difference time-domain (QSFDTD) method. In these calculations you must specify the regions with classically polarizable material, as well as their permittivities `\epsilon(\mathbf{r}, \omega)`. The electric field and the polarization charge density are propagated in time under an influence of external perturbation. The QSFDTD method can be also merged with time-propagation simulation, which yields a hybrid multiscale method. .. toctree:: :maxdepth: 2 qsfdtd hybridscheme gpaw-24.1.0/doc/documentation/electrodynamics/gold+na2_nanosphere/000077500000000000000000000000001454550013000251575ustar00rootroot00000000000000gpaw-24.1.0/doc/documentation/electrodynamics/gold+na2_nanosphere/calculate.py000066400000000000000000000073361454550013000274770ustar00rootroot00000000000000from ase import Atoms from gpaw.fdtd.poisson_fdtd import QSFDTD from gpaw.fdtd.polarizable_material import (PermittivityPlus, PolarizableMaterial, PolarizableSphere) from gpaw.tddft import photoabsorption_spectrum import numpy as np # Nanosphere radius (Angstroms) radius = 7.40 # Geometry atom_center = np.array([30., 15., 15.]) sphere_center = np.array([15., 15., 15.]) simulation_cell = np.array([40., 30., 30.]) # Atoms object atoms = Atoms('Na2', atom_center + np.array([[-1.5, 0.0, 0.0], [1.5, 0.0, 0.0]])) # Permittivity of Gold # J. Chem. Phys. 135, 084121 (2011); http://dx.doi.org/10.1063/1.3626549 eps_gold = PermittivityPlus(data=[[0.2350, 0.1551, 95.62], [0.4411, 0.1480, -12.55], [0.7603, 1.946, -40.89], [1.161, 1.396, 17.22], [2.946, 1.183, 15.76], [4.161, 1.964, 36.63], [5.747, 1.958, 22.55], [7.912, 1.361, 81.04]]) # 1) Nanosphere only classical_material = PolarizableMaterial() classical_material.add_component(PolarizableSphere(center=sphere_center, radius=radius, permittivity=eps_gold)) qsfdtd = QSFDTD(classical_material=classical_material, atoms=None, cells=simulation_cell, spacings=[2.0, 0.5], remove_moments=(1, 1)) energy = qsfdtd.ground_state('gs.gpw', mode='fd', nbands=1, symmetry='off') qsfdtd.time_propagation('gs.gpw', kick_strength=[0.001, 0.000, 0.000], time_step=10, iterations=1500, dipole_moment_file='dm.dat') photoabsorption_spectrum('dm.dat', 'spec.1.dat', width=0.15) # 2) Na2 only (radius=0) classical_material = PolarizableMaterial() classical_material.add_component(PolarizableSphere(center=sphere_center, radius=0.0, permittivity=eps_gold)) qsfdtd = QSFDTD(classical_material=classical_material, atoms=atoms, cells=(simulation_cell, 4.0), # vacuum = 4.0 Ang spacings=[2.0, 0.5], remove_moments=(1, 1)) energy = qsfdtd.ground_state('gs.gpw', mode='fd', nbands=-1, symmetry='off') qsfdtd.time_propagation('gs.gpw', kick_strength=[0.001, 0.000, 0.000], time_step=10, iterations=1500, dipole_moment_file='dm.dat') photoabsorption_spectrum('dm.dat', 'spec.2.dat', width=0.15) # 3) Nanosphere + Na2 classical_material = PolarizableMaterial() classical_material.add_component(PolarizableSphere(center=sphere_center, radius=radius, permittivity=eps_gold)) qsfdtd = QSFDTD(classical_material=classical_material, atoms=atoms, cells=(simulation_cell, 4.0), # vacuum = 4.0 Ang spacings=[2.0, 0.5], remove_moments=(1, 1)) energy = qsfdtd.ground_state('gs.gpw', mode='fd', nbands=-1, symmetry='off') qsfdtd.time_propagation('gs.gpw', kick_strength=[0.001, 0.000, 0.000], time_step=10, iterations=1500, dipole_moment_file='dm.dat') photoabsorption_spectrum('dm.dat', 'spec.3.dat', width=0.15) gpaw-24.1.0/doc/documentation/electrodynamics/gold+na2_nanosphere/plot.py000066400000000000000000000013161454550013000265100ustar00rootroot00000000000000# web-page: hybrid.png import numpy as np import pylab as plt # Plot spectrum with r=0nm and r=5nm spec0 = np.loadtxt('spec.1.dat') # AuNP spec1 = np.loadtxt('spec.2.dat') # Na2 spec2 = np.loadtxt('spec.3.dat') # AuNP+Na2 plt.figure() plt.plot(spec0[:, 0], spec0[:, 1], 'r', label='Au nanoparticle') plt.plot(spec1[:, 0], spec1[:, 1], 'g', label='Na$_2$') plt.plot(spec1[:, 0], spec1[:, 1] + spec0[:, 1], 'k:', label='Sum of Na$_2$ and Au nanoparticle') plt.plot(spec2[:, 0], spec2[:, 1], 'b', label='Na$_2$ near Au nanoparticle') plt.legend(loc=1) plt.xlabel('Energy [eV]', fontsize=12) plt.ylabel('Dipole strength [1/eV]', fontsize=12) plt.xlim((0, 5.0)) plt.ylim((-1, 22.5)) plt.savefig('hybrid.png') gpaw-24.1.0/doc/documentation/electrodynamics/gold+na2_nanosphere/plot_geom.py000066400000000000000000000057071454550013000275270ustar00rootroot00000000000000# web-page: geom.png import numpy as np import matplotlib.pyplot as plt from matplotlib.colors import ListedColormap from ase.units import Bohr from ase.visualize.plot import plot_atoms from gpaw.tddft import TDDFT # Initialize TDDFT and QSFDTD td_calc = TDDFT('gs.gpw') def generate_xygrid(d, g, box): vslice = 2 # yx # Determine the array lengths in each dimension ng = d.shape X = None Y = None U = None V = None # Slice data d_slice = np.rollaxis(d, vslice)[g[vslice], :, :] d_proj = np.zeros(d_slice.shape) for ind, val in np.ndenumerate(d_slice): d_proj[ind] = np.where( np.append( np.rollaxis(d, vslice)[:, ind[0], ind[1]], 1.0) != 0)[0][0] # Grids x = np.linspace(0, box[1], ng[1]) y = np.linspace(0, box[0], ng[0]) # Meshgrid and corresponding data X, Y = np.meshgrid(x, y) U = np.real(d_slice[1]) # y V = np.real(d_slice[0]) # x # Spacing dx = x[1] - x[0] dy = y[1] - y[0] return d_slice, d_proj, (x, y, dx, dy), (X, Y, U, V) poisson_solver = td_calc.hamiltonian.poisson atoms = td_calc.atoms box = np.diagonal(poisson_solver.cl.gd.cell_cv) * Bohr # in Ang # create figure plt.figure(1, figsize=(4, 4)) plt.rcParams['font.size'] = 14 # prepare data plotData = poisson_solver.classical_material.beta[0] ng = plotData.shape axis = 2 ax = plt.subplot(1, 1, 1) g = [None, None, ng[2] // 2] dmy1, d_proj, (x, y, dx, dy), dmy2 = generate_xygrid(plotData, g, box) # choose the colourmap for the polarizable material here plt.imshow(d_proj, interpolation='bicubic', origin='lower', cmap=ListedColormap(["goldenrod", "white"]), extent=[x[0] - dx / 2, x[-1] + dx / 2, y[0] - dy / 2, y[-1] + dy / 2]) # Plot atoms # switch x and y orientation for yx plot pos = atoms.get_positions() pos[:, [0, 1]] = pos[:, [1, 0]] atoms.set_positions(pos) cell = atoms.get_cell() cell[:, [0, 1]] = cell[:, [1, 0]] atoms.set_cell(cell) # ASE plot atoms function i, j = 1, 0 offset = np.array( [poisson_solver.qm.corner1[i], poisson_solver.qm.corner1[j]]) * 2 * Bohr bbox = np.array( [poisson_solver.qm.corner1[i], poisson_solver.qm.corner1[j], poisson_solver.qm.corner2[i], poisson_solver.qm.corner2[j]]) * Bohr plot_atoms(atoms, ax=None, show_unit_cell=2, offset=offset, bbox=bbox) ax.autoscale() # Classical grid dmy1, dmy_proj, (x, y, dx, dy), dmy3 = generate_xygrid(plotData, g, box) xx, yy = np.meshgrid(x, y) plt.scatter(xx, yy, s=0.75, c='k', marker='o') # Quantum grid dmy1, dmy_proj, (x, y, dx, dy), dmy3 = generate_xygrid( plotData, g, box=np.diagonal(poisson_solver.qm.gd.cell_cv) * Bohr) xx, yy = np.meshgrid(x, y) plt.scatter(poisson_solver.qm.corner1[i] * Bohr + xx, poisson_solver.qm.corner1[j] * Bohr + yy, s=0.25, c='k', marker='o') # Labels plt.xlabel('y [Ang]') plt.ylabel('x [Ang]') # Plot plt.tight_layout() plt.savefig('geom.png') gpaw-24.1.0/doc/documentation/electrodynamics/gold+na2_nanosphere/submit.agts.py000066400000000000000000000002551454550013000277730ustar00rootroot00000000000000from myqueue.workflow import run def workflow(): with run(script='calculate.py', cores=8, tmax='1h'): run(script='plot_geom.py') run(script='plot.py') gpaw-24.1.0/doc/documentation/electrodynamics/gold+na2_nanosphere_inducedfield/000077500000000000000000000000001454550013000276565ustar00rootroot00000000000000gpaw-24.1.0/doc/documentation/electrodynamics/gold+na2_nanosphere_inducedfield/calculate.py000066400000000000000000000063671454550013000322010ustar00rootroot00000000000000from ase import Atoms from gpaw import GPAW from gpaw.fdtd.poisson_fdtd import FDTDPoissonSolver from gpaw.fdtd.polarizable_material import (PermittivityPlus, PolarizableMaterial, PolarizableSphere) from gpaw.tddft import TDDFT, DipoleMomentWriter, photoabsorption_spectrum from gpaw.inducedfield.inducedfield_tddft import TDDFTInducedField from gpaw.inducedfield.inducedfield_fdtd import FDTDInducedField from gpaw.mpi import world import numpy as np # Nanosphere radius (Angstroms) radius = 7.40 # Geometry atom_center = np.array([30., 15., 15.]) sphere_center = np.array([15., 15., 15.]) simulation_cell = np.array([40., 30., 30.]) # Atoms object atoms = Atoms('Na2', atom_center + np.array([[-1.5, 0.0, 0.0], [1.5, 0.0, 0.0]])) # Permittivity of Gold # J. Chem. Phys. 135, 084121 (2011); http://dx.doi.org/10.1063/1.3626549 eps_gold = PermittivityPlus(data=[[0.2350, 0.1551, 95.62], [0.4411, 0.1480, -12.55], [0.7603, 1.946, -40.89], [1.161, 1.396, 17.22], [2.946, 1.183, 15.76], [4.161, 1.964, 36.63], [5.747, 1.958, 22.55], [7.912, 1.361, 81.04]]) # 3) Nanosphere + Na2 classical_material = PolarizableMaterial() classical_material.add_component(PolarizableSphere(center=sphere_center, radius=radius, permittivity=eps_gold)) # Combined Poisson solver poissonsolver = FDTDPoissonSolver(classical_material=classical_material, qm_spacing=0.5, cl_spacing=2.0, cell=simulation_cell, communicator=world, remove_moments=(1, 1)) poissonsolver.set_calculation_mode('iterate') # Combined system atoms.set_cell(simulation_cell) atoms, qm_spacing, gpts = poissonsolver.cut_cell(atoms, vacuum=4.0) # Initialize GPAW gs_calc = GPAW(mode='fd', gpts=gpts, nbands=-1, poissonsolver=poissonsolver, symmetry={'point_group': False}) atoms.calc = gs_calc # Ground state energy = atoms.get_potential_energy() # Save state gs_calc.write('gs.gpw', 'all') # Initialize TDDFT and FDTD kick = [0.001, 0.000, 0.000] time_step = 10 iterations = 1500 td_calc = TDDFT('gs.gpw') DipoleMomentWriter(td_calc, 'dm.dat') # Attach InducedFields to the calculation frequencies = [2.05, 2.60] width = 0.15 cl_ind = FDTDInducedField(paw=td_calc, frequencies=frequencies, width=width) qm_ind = TDDFTInducedField(paw=td_calc, frequencies=frequencies, width=width) # Propagate TDDFT and FDTD td_calc.absorption_kick(kick_strength=kick) td_calc.propagate(time_step, iterations) # Save results td_calc.write('td.gpw', 'all') cl_ind.write('cl.ind') qm_ind.write('qm.ind') photoabsorption_spectrum('dm.dat', 'spec.3.dat', width=width) gpaw-24.1.0/doc/documentation/electrodynamics/gold+na2_nanosphere_inducedfield/plot.py000066400000000000000000000045541454550013000312160ustar00rootroot00000000000000# web-page: cl_field.ind_Ffe.png, qm_field.ind_Ffe.png, tot_field.ind_Ffe.png from gpaw.mpi import world import numpy as np import matplotlib.pyplot as plt from gpaw.inducedfield.inducedfield_base import BaseInducedField from gpaw.tddft.units import aufrequency_to_eV assert world.size == 1, 'This script should be run in serial mode.' # Helper function def do_plot(d_g, ng, box, atoms): # Take slice of data array d_yx = d_g[:, :, ng[2] // 2] y = np.linspace(0, box[0], ng[0] + 1)[:-1] dy = box[0] / (ng[0] + 1) y += dy * 0.5 ylabel = u'x / Å' x = np.linspace(0, box[1], ng[1] + 1)[:-1] dx = box[1] / (ng[1] + 1) x += dx * 0.5 xlabel = u'y / Å' # Plot plt.figure() ax = plt.subplot(1, 1, 1) X, Y = np.meshgrid(x, y) plt.contourf(X, Y, d_yx, 40) plt.colorbar() for atom in atoms: pos = atom.position plt.scatter(pos[1], pos[0], s=50, c='k', marker='o') plt.xlabel(xlabel) plt.ylabel(ylabel) plt.xlim([x[0], x[-1]]) plt.ylim([y[0], y[-1]]) ax.set_aspect('equal') for fname, name in zip(['cl_field.ind', 'qm_field.ind', 'tot_field.ind'], ['Classical subsystem', 'Quantum subsystem', 'Total hybrid system']): # Read InducedField object ind = BaseInducedField(fname, readmode='all') # Choose array w = 0 # Frequency index freq = ind.omega_w[w] * aufrequency_to_eV # Frequency box = np.diag(ind.atoms.get_cell()) # Calculation box d_g = ind.Ffe_wg[w] # Data array ng = d_g.shape # Size of grid atoms = ind.atoms # Atoms do_plot(d_g, ng, box, atoms) plt.title(f'{name}\nField enhancement @ {freq:.2f} eV') plt.savefig(fname + '_Ffe.png', bbox_inches='tight') # Imaginary part of density d_g = ind.Frho_wg[w].imag ng = d_g.shape do_plot(d_g, ng, box, atoms) plt.title('%s\nImaginary part of induced charge density @ %.2f eV' % (name, freq)) plt.savefig(fname + '_Frho.png', bbox_inches='tight') # Imaginary part of potential d_g = ind.Fphi_wg[w].imag ng = d_g.shape do_plot(d_g, ng, box, atoms) plt.title(f'{name}\nImaginary part of induced potential @ {freq:.2f} eV') plt.savefig(fname + '_Fphi.png', bbox_inches='tight') gpaw-24.1.0/doc/documentation/electrodynamics/gold+na2_nanosphere_inducedfield/postprocess.py000066400000000000000000000013451454550013000326170ustar00rootroot00000000000000from gpaw.tddft import TDDFT from gpaw.inducedfield.inducedfield_fdtd import ( FDTDInducedField, calculate_hybrid_induced_field) from gpaw.inducedfield.inducedfield_tddft import TDDFTInducedField td_calc = TDDFT('td.gpw') # Classical subsystem cl_ind = FDTDInducedField(filename='cl.ind', paw=td_calc) cl_ind.calculate_induced_field(gridrefinement=2) cl_ind.write('cl_field.ind', mode='all') # Quantum subsystem qm_ind = TDDFTInducedField(filename='qm.ind', paw=td_calc) qm_ind.calculate_induced_field(gridrefinement=2) qm_ind.write('qm_field.ind', mode='all') # Total system, interpolate/extrapolate to a grid with spacing h tot_ind = calculate_hybrid_induced_field(cl_ind, qm_ind, h=1.0) tot_ind.write('tot_field.ind', mode='all') gpaw-24.1.0/doc/documentation/electrodynamics/gold+na2_nanosphere_inducedfield/submit.agts.py000066400000000000000000000003021454550013000324630ustar00rootroot00000000000000from myqueue.workflow import run def workflow(): with run(script='calculate.py', cores=8, tmax='1h'): with run(script='postprocess.py', cores=8): run(script='plot.py') gpaw-24.1.0/doc/documentation/electrodynamics/gold_nanosphere/000077500000000000000000000000001454550013000245035ustar00rootroot00000000000000gpaw-24.1.0/doc/documentation/electrodynamics/gold_nanosphere/calculate.py000066400000000000000000000033511454550013000270140ustar00rootroot00000000000000from gpaw.fdtd.poisson_fdtd import QSFDTD from gpaw.fdtd.polarizable_material import (PermittivityPlus, PolarizableMaterial, PolarizableSphere) from gpaw.tddft import photoabsorption_spectrum from gpaw.mpi import world import numpy as np # Nanosphere radius (Angstroms) radius = 50.0 # Whole simulation cell (Angstroms) large_cell = np.array([3 * radius, 3 * radius, 3 * radius]) # Permittivity of Gold # J. Chem. Phys. 135, 084121 (2011); http://dx.doi.org/10.1063/1.3626549 gold = [[0.2350, 0.1551, 95.62], [0.4411, 0.1480, -12.55], [0.7603, 1.946, -40.89], [1.161, 1.396, 17.22], [2.946, 1.183, 15.76], [4.161, 1.964, 36.63], [5.747, 1.958, 22.55], [7.912, 1.361, 81.04]] # Initialize classical material classical_material = PolarizableMaterial() # Classical nanosphere classical_material.add_component( PolarizableSphere(center=0.5 * large_cell, radius=radius, permittivity=PermittivityPlus(data=gold))) # Quasistatic FDTD qsfdtd = QSFDTD(classical_material=classical_material, atoms=None, cells=large_cell, spacings=[8.0, 1.0], communicator=world, remove_moments=(4, 1)) # Run ground state energy = qsfdtd.ground_state('gs.gpw', mode='fd', nbands=-1, symmetry='off') # Run time evolution qsfdtd.time_propagation('gs.gpw', time_step=10, iterations=1000, kick_strength=[0.001, 0.000, 0.000], dipole_moment_file='dm.dat') # Spectrum photoabsorption_spectrum('dm.dat', 'spec.dat', width=0.0) gpaw-24.1.0/doc/documentation/electrodynamics/gold_nanosphere/plot.py000066400000000000000000000022641454550013000260370ustar00rootroot00000000000000# web-page: qsfdtd_vs_mie.png import numpy as np import pylab as plt from ase.units import Hartree, Bohr from gpaw.fdtd.polarizable_material import PermittivityPlus, _eps0_au # Nanosphere radius (Angstroms) radius = 50.0 # Permittivity of Gold # J. Chem. Phys. 135, 084121 (2011); http://dx.doi.org/10.1063/1.3626549 gold = [[0.2350, 0.1551, 95.62], [0.4411, 0.1480, -12.55], [0.7603, 1.946, -40.89], [1.161, 1.396, 17.22], [2.946, 1.183, 15.76], [4.161, 1.964, 36.63], [5.747, 1.958, 22.55], [7.912, 1.361, 81.04]] # Plot calculated spectrum and compare with Mie theory spec = np.loadtxt('spec.dat') perm = PermittivityPlus(data=gold).value(spec[:, 0] / Hartree) plt.figure() plt.plot(spec[:, 0], spec[:, 1], 'r', label='QSFDTD') plt.plot(spec[:, 0], 3. * (4. / 3. * np.pi * (radius / Bohr)**3) * (spec[:, 0] / Hartree) / (2. * np.pi**2) / Hartree * np.imag((perm - _eps0_au) / (perm + 2. * _eps0_au)), 'b', label='Mie theory') plt.legend(loc=2) plt.xlabel('Energy [eV]', fontsize=12) plt.ylabel('Dipole strength [1/eV]', fontsize=12) plt.xlim((0, 5.0)) plt.ylim((-1, 3500)) plt.savefig('qsfdtd_vs_mie.png') gpaw-24.1.0/doc/documentation/electrodynamics/gold_nanosphere/submit.agts.py000066400000000000000000000002011454550013000273060ustar00rootroot00000000000000from myqueue.workflow import run def workflow(): with run(script='calculate.py', tmax='1h'): run(script='plot.py') gpaw-24.1.0/doc/documentation/electrodynamics/gold_nanosphere_inducedfield/000077500000000000000000000000001454550013000272025ustar00rootroot00000000000000gpaw-24.1.0/doc/documentation/electrodynamics/gold_nanosphere_inducedfield/calculate.py000066400000000000000000000054451454550013000315210ustar00rootroot00000000000000from ase import Atoms from gpaw import GPAW from gpaw.fdtd.poisson_fdtd import FDTDPoissonSolver from gpaw.fdtd.polarizable_material import (PermittivityPlus, PolarizableMaterial, PolarizableSphere) from gpaw.tddft import TDDFT, DipoleMomentWriter, photoabsorption_spectrum from gpaw.inducedfield.inducedfield_fdtd import FDTDInducedField from gpaw.mpi import world import numpy as np # Nanosphere radius (Angstroms) radius = 50.0 # Whole simulation cell (Angstroms) large_cell = np.array([3 * radius, 3 * radius, 3 * radius]) # Permittivity of Gold # J. Chem. Phys. 135, 084121 (2011); http://dx.doi.org/10.1063/1.3626549 gold = [[0.2350, 0.1551, 95.62], [0.4411, 0.1480, -12.55], [0.7603, 1.946, -40.89], [1.161, 1.396, 17.22], [2.946, 1.183, 15.76], [4.161, 1.964, 36.63], [5.747, 1.958, 22.55], [7.912, 1.361, 81.04]] # Initialize classical material classical_material = PolarizableMaterial() # Classical nanosphere classical_material.add_component( PolarizableSphere(center=0.5 * large_cell, radius=radius, permittivity=PermittivityPlus(data=gold))) # Poisson solver poissonsolver = FDTDPoissonSolver(classical_material=classical_material, cl_spacing=8.0, qm_spacing=1.0, cell=large_cell, communicator=world, remove_moments=(4, 1)) poissonsolver.set_calculation_mode('iterate') # Dummy quantum system atoms = Atoms('H', [0.5 * large_cell], cell=large_cell) atoms, qm_spacing, gpts = poissonsolver.cut_cell(atoms) del atoms[:] # Remove atoms, quantum system is empty # Initialize GPAW gs_calc = GPAW(mode='fd', gpts=gpts, nbands=-1, poissonsolver=poissonsolver, symmetry={'point_group': False}) atoms.calc = gs_calc # Ground state energy = atoms.get_potential_energy() # Save state gs_calc.write('gs.gpw', 'all') # Initialize TDDFT and FDTD kick = [0.001, 0.000, 0.000] time_step = 10 iterations = 1000 td_calc = TDDFT('gs.gpw') DipoleMomentWriter(td_calc, 'dm0.dat') # Attach InducedField to the calculation frequencies = [2.45] width = 0.0 ind = FDTDInducedField(paw=td_calc, frequencies=frequencies, width=width) # Propagate TDDFT and FDTD td_calc.absorption_kick(kick_strength=kick) td_calc.propagate(time_step, iterations) # Save results td_calc.write('td.gpw', 'all') ind.write('td.ind') # Spectrum photoabsorption_spectrum('dm0.dat', 'spec.dat', width=width) # Induced field ind.calculate_induced_field(gridrefinement=2) ind.write('field.ind', mode='all') gpaw-24.1.0/doc/documentation/electrodynamics/gold_nanosphere_inducedfield/plot.py000066400000000000000000000042721454550013000305370ustar00rootroot00000000000000# web-page: field.ind_Ffe.png from gpaw.mpi import world import numpy as np import matplotlib.pyplot as plt from gpaw.inducedfield.inducedfield_base import BaseInducedField from gpaw.tddft.units import aufrequency_to_eV assert world.size == 1, 'This script should be run in serial mode.' # Helper function def do_plot(d_g, ng, box, atoms): # Take slice of data array d_yx = d_g[:, :, ng[2] // 2] y = np.linspace(0, box[0], ng[0] + 1)[:-1] dy = box[0] / (ng[0] + 1) y += dy * 0.5 ylabel = u'x / Å' x = np.linspace(0, box[1], ng[1] + 1)[:-1] dx = box[1] / (ng[1] + 1) x += dx * 0.5 xlabel = u'y / Å' # Plot plt.figure() ax = plt.subplot(1, 1, 1) X, Y = np.meshgrid(x, y) plt.contourf(X, Y, d_yx, 40) plt.colorbar() for atom in atoms: pos = atom.position plt.scatter(pos[1], pos[0], s=50, c='k', marker='o') plt.xlabel(xlabel) plt.ylabel(ylabel) plt.xlim([x[0], x[-1]]) plt.ylim([y[0], y[-1]]) ax.set_aspect('equal') for fname, name in zip(['field.ind'], ['Classical system']): # Read InducedField object ind = BaseInducedField(fname, readmode='all') # Choose array w = 0 # Frequency index freq = ind.omega_w[w] * aufrequency_to_eV # Frequency box = np.diag(ind.atoms.get_cell()) # Calculation box d_g = ind.Ffe_wg[w] # Data array ng = d_g.shape # Size of grid atoms = ind.atoms # Atoms do_plot(d_g, ng, box, atoms) plt.title(f'{name}\nField enhancement @ {freq:.2f} eV') plt.savefig(fname + '_Ffe.png', bbox_inches='tight') # Imaginary part of density d_g = ind.Frho_wg[w].imag ng = d_g.shape do_plot(d_g, ng, box, atoms) plt.title('%s\nImaginary part of induced charge density @ %.2f eV' % (name, freq)) plt.savefig(fname + '_Frho.png', bbox_inches='tight') # Imaginary part of potential d_g = ind.Fphi_wg[w].imag ng = d_g.shape do_plot(d_g, ng, box, atoms) plt.title(f'{name}\nImaginary part of induced potential @ {freq:.2f} eV') plt.savefig(fname + '_Fphi.png', bbox_inches='tight') gpaw-24.1.0/doc/documentation/electrodynamics/gold_nanosphere_inducedfield/submit.agts.py000066400000000000000000000002011454550013000320050ustar00rootroot00000000000000from myqueue.workflow import run def workflow(): with run(script='calculate.py', tmax='1h'): run(script='plot.py') gpaw-24.1.0/doc/documentation/electrodynamics/hybridscheme.rst000066400000000000000000000131751454550013000245430ustar00rootroot00000000000000.. _hybridscheme: =============================== Hybrid Quantum/Classical Scheme =============================== The basic idea is to separate the calculation into two parts: the first one is the quantum subsystem, which is propagated using :ref:`timepropagation` scheme, and the second one is the classical subsystem that is treated using :ref:`qsfdtd`. The subsystems are propagated separately in their own real space grids, but they share a common electrostatic potential. In the :ref:`timepropagation` part of the calculation the electrostatic potential is known as the Hartree potential `V^{\rm{qm}}(\mathbf{r}, t)` and it is solved from the Poisson equation `\nabla^2 V^{\rm{qm}}(\mathbf{r}, t) = -4\pi\rho^{\rm{qm}}(\mathbf{r}, t)` In the :ref:`qsfdtd` the electrostatic potential is solved from the Poisson equation as well: `V^{\rm{cl}}(\mathbf{r}, t) = -4\pi\rho^{\rm{cl}}(\mathbf{r}, t).` The hybrid scheme is created by replacing in both schemes the electrostatic (Hartree) potential by a common potential: `\nabla^2 V^{\rm{tot}}(\mathbf{r}, t) = -4\pi\left[\rho^{\rm{cl}}(\mathbf{r}, t)+\rho^{\rm{qm}}(\mathbf{r}, t)\right].` ----------- Double grid ----------- The observables of the quantum and classical subsystems are defined in their own grids, which are overlapping but can have different spacings. The following restrictions must hold: * The quantum grid must fit completely inside the classical grid * The spacing of the classical grid `h_{\rm{cl}}` must be equal to `2^n h_{\rm{qm}}`, where `h_{\rm{qm}}` is the spacing of the quantum grid and n is an integer. When these conditions hold, the potential from one subsystem can be transferred to the other one. The grids are automatically adjusted so that some grid points are common. -------------------------------------------- Transferring the potential between two grids -------------------------------------------- * Transferring the potential from classical subsystem to the quantum grid is performed by interpolating the classical potential to the denser grid of the quantum subsystem. The interpolation only takes place in the small subgrid around the quantum mechanical region. * Transferring the potential from quantum subsystem to the classical one is done in another way: instead of the potential itself, it is the quantum mechanical electron density `\rho^{\rm{qm}}(\mathbf{r}, t)` that is copied to the coarser classical grid. Its contribution to the total electrostatic potential is then determined by solving the Poisson equation in that grid. * Altogether this means that although there is only one potential to be determined `(V^{\rm{tot}}(\mathbf{r}, t))`, three Poisson equations must be solved: 1. `V^{\rm{cl}}(\mathbf{r}, t)` in classical grid 2. `V^{\rm{qm}}(\mathbf{r}, t)` in quantum grid 3. `V^{\rm{qm}}(\mathbf{r}, t)` in classical grid When these are ready and `V^{\rm{cl}}(\mathbf{r}, t)` is transferred to the quantum grid, `V^{\rm{tot}}(\mathbf{r}, t)` is determined in both grids. ---------------------------------------------------------------------------- Example: photoabsorption of Na2 near gold nanosphere ---------------------------------------------------------------------------- This example calculates the photoabsorption of `\text{Na}_2` molecule in (i) presence and (ii) absence of a gold nanosphere: .. literalinclude:: gold+na2_nanosphere/calculate.py |enhanced_absorption| .. |enhanced_absorption| image:: gold+na2_nanosphere/hybrid.png The optical response of the molecule apparently enhances when it is located near the metallic nanoparticle, see Ref. \ [#Sakko]_ for more examples. The geometry and the distribution of the grid points are shown in the following figure (generated with :download:`this script `): |geometry| .. |geometry| image:: gold+na2_nanosphere/geom.png .. _hybrid-inducedfield: ---------------------------------------------------------------------------- Advanced example: Near field enhancement of hybrid system ---------------------------------------------------------------------------- In this example we calculate the same hybrid Na2 + gold nanoparticle system as above, but using the advanced syntax instead of the :code:`QSFDTD` wrapper. This allows us to include :code:`InducedField` observers in the calculation, see :ref:`TDDFTInducedField module documentation `: .. literalinclude:: gold+na2_nanosphere_inducedfield/calculate.py The :code:`TDDFTInducedField` records the quantum part of the calculation and the :code:`FDTDInducedField` records the classical part. We can calculate the individual and the total induced field by the following script: .. literalinclude:: gold+na2_nanosphere_inducedfield/postprocess.py All the :code:`InducedField` objects can be analyzed in the same way as described in :ref:`TDDFTInducedField module documentation `. Here we show an example script for plotting (run in serial mode, i.e., with one process): .. literalinclude:: gold+na2_nanosphere_inducedfield/plot.py This produces the following figures for the electric near field: |cl_fe| |qm_fe| |tot_fe| .. |cl_fe| image:: gold+na2_nanosphere_inducedfield/cl_field.ind_Ffe.png :scale: 70 % .. |qm_fe| image:: gold+na2_nanosphere_inducedfield/qm_field.ind_Ffe.png :scale: 70 % .. |tot_fe| image:: gold+na2_nanosphere_inducedfield/tot_field.ind_Ffe.png :scale: 70 % ---------- References ---------- .. [#Sakko] A. Sakko, T. P. Rossi and R. M. Nieminen, Dynamical coupling of plasmons and molecular excitations by hybrid quantum/classical calculations: time-domain approach *J. Phys.: Condens. Matter* **26**, 315013 (2014) gpaw-24.1.0/doc/documentation/electrodynamics/plot_permittivity.py000066400000000000000000000045111454550013000255160ustar00rootroot00000000000000# web-page: Au.yml.png import numpy as np import matplotlib.pyplot as plt from ase.units import _hplanck, _c, _e, Hartree from gpaw.fdtd.polarizable_material import PermittivityPlus _eps0_au = 1.0 / (4.0 * np.pi) def eV_from_um(um_i): return _hplanck / _e * _c / (um_i * 1e-6) def plot(fname, fiteps): with open(fname, 'r') as yml: for line in yml: if line.strip().startswith('data'): data_ij = np.array([[float(d) for d in line.split()] for line in yml]) energy_j = eV_from_um(data_ij[:, 0]) n_j = data_ij[:, 1] k_j = data_ij[:, 2] eps_j = (n_j ** 2 - k_j ** 2) + 1.0j * 2 * n_j * k_j energy_e = np.linspace(1.0, 6.0, 100) fiteps_e = np.array([fiteps.value(energy / Hartree) / _eps0_au for energy in energy_e]) plt.figure(figsize=(12, 6)) plt.subplot(1, 2, 1) plt.plot(energy_j, eps_j.real, 'bv', label='data') plt.plot(energy_e, fiteps_e.real, 'b-', label='fit') plt.xlim(energy_e.min(), energy_e.max()) # plt.ylim(fiteps_e.real.min(), fiteps_e.real.max()) plt.ylim(-70, 0) plt.xlabel('Energy (eV)') plt.ylabel(r'Real($\epsilon$)') plt.legend(loc='best') plt.subplot(1, 2, 2) plt.plot(energy_j, eps_j.imag, 'bv') plt.plot(energy_e, fiteps_e.imag, 'b-') plt.xlim(energy_e.min(), energy_e.max()) # plt.ylim(fiteps_e.imag.min(), fiteps_e.imag.max()) plt.ylim(0, 7) plt.xlabel('Energy (eV)') plt.ylabel(r'Imaginary($\epsilon$)') plt.tight_layout() plt.savefig(f'{fname}.png') # Permittivity of Gold # Source: # http://refractiveindex.info/?shelf=main&book=Au&page=Johnson # Direct download link: # wget https://refractiveindex.info/database/data/main/Au/Johnson.yml ymlfname = 'Au.yml' # Fit to the permittivity # J. Chem. Phys. 135, 084121 (2011); http://dx.doi.org/10.1063/1.3626549 fiteps = PermittivityPlus(data=[[0.2350, 0.1551, 95.62], [0.4411, 0.1480, -12.55], [0.7603, 1.946, -40.89], [1.161, 1.396, 17.22], [2.946, 1.183, 15.76], [4.161, 1.964, 36.63], [5.747, 1.958, 22.55], [7.912, 1.361, 81.04]]) plot(ymlfname, fiteps) gpaw-24.1.0/doc/documentation/electrodynamics/qsfdtd.rst000066400000000000000000000261221454550013000233560ustar00rootroot00000000000000.. _qsfdtd: ================================================ Quasistatic Finite-Difference Time-Domain method ================================================ The optical properties of all materials depend on how they respond (absorb and scatter) to external electromagnetic fields. In classical electrodynamics, this response is described by the Maxwell equations. One widely used method for solving them numerically is the finite-difference time-domain (FDTD) approach. \ [#Taflove]_. It is based on propagating the electric and magnetic fields in time under the influence of an external perturbation (light) in such a way that the observables are expressed in real space grid points. The optical constants are obtained by analyzing the resulting far-field pattern. In the microscopic limit of classical electrodynamics the quasistatic approximation is valid and an alternative set of time-dependent equations for the polarization charge, polarization current, and the electric field can be derived.\ [#coomar]_ The quasistatic formulation of FDTD is implemented in GPAW. It can be used to model the optical properties of metallic nanostructures (i) purely classically, or (ii) in combination with :ref:`timepropagation`, which yields :ref:`hybridscheme`. .. TODO: a schematic picture of classical case and hybrid case ------------------------- Quasistatic approximation ------------------------- The quasistatic approximation of classical electrodynamics means that the retardation effects due to the finite speed of light are neglected. It is valid at very small length scales, typically below ~50 nm. Compared to full FDTD, quasistatic formulation has some advantageous features. The magnetic field is negligible and only the longitudinal electric field need to be considered, so the number of degrees of freedom is smaller. Because the retardation effects and propagating solutions are excluded, longer time steps and a simpler treatment of the boundary conditions can be used. ------------ Permittivity ------------ In the current implementation, the permittivity of the classical material is parametrized as a linear combination of Lorentzian oscillators .. math:: \epsilon(\mathbf{r}, \omega) = \epsilon_{\infty} + \sum_j \frac{\epsilon_0 \beta_j(\mathbf{r})}{\bar{\omega}_j^2(\mathbf{r})-\mbox{i}\omega\alpha_j(\mathbf{r})-\omega^2}, where `\alpha_j, \beta_j, \bar{\omega}_j` are fitted to reproduce the experimental permittivity. For gold and silver they can be found in Ref. \ [#Coomar]_. Permittivity defines how classical charge density polarizes when it is subject to external electric fields. The time-evolution for the charges in GPAW is performed with the leap-frog algorithm, following Ref. \ [#Gao]_. To test the quality of the fit, one can use :download:`this script `. This gives a following plot for Au permittivity fitting. .. image:: Au.yml.png :scale: 50 % ------------------- Geometry components ------------------- Several routines are available to generate the basic shapes: * `\text{PolarizableBox}(\mathbf{r}_1, \mathbf{r}_2, \epsilon({\mathbf{r}, \omega}))` where `\mathbf{r}_1` and `\mathbf{r}_2` are the corner points, and `\epsilon({\mathbf{r}, \omega})` is the permittivity inside the structure * `\text{PolarizableSphere}(\mathbf{p}, r, \epsilon({\mathbf{r}, \omega}))` where `\mathbf{p}` is the center and `r` is the radius of the sphere * `\text{PolarizableEllipsoid}(\mathbf{p}, \mathbf{r}, \epsilon({\mathbf{r}, \omega}))` where `\mathbf{p}` is the center and `\mathbf{r}` is the array containing the three radii * `\text{PolarizableRod}(\mathbf{p}, r, \epsilon({\mathbf{r}, \omega}), c)` where `\mathbf{p}` is an array of subsequent corner coordinates, `r` is the radius, and `c` is a boolean denoting whether the corners are rounded * `\text{PolarizableTetrahedron}(\mathbf{p}, \epsilon({\mathbf{r}, \omega}))` where `\mathbf{p}` is an array containing the four corner points of the tetrahedron These routines can generate many typical geometries, and for general cases a set of tetrahedra can be used. ---------------- Optical response ---------------- The QSFDTD method can be used to calculate the optical photoabsorption spectrum just like in :ref:`timepropagation`: The classical charge density is first perturbed with an instantaneous electric field, and then the time dependence of the induced dipole moment is recorderd. Its Fourier transformation gives the photoabsorption spectrum. ------------------------------------------- Example: photoabsorption of gold nanosphere ------------------------------------------- This example calculates the photoabsorption spectrum of a nanosphere that has a diameter of 10 nm, and compares the result with analytical Mie scattering limit. .. literalinclude:: gold_nanosphere/calculate.py Here the *QSFDTD* object generates a dummy quantum system that is treated using GPAW in *qsfdtd.ground_state*. One can pass the GPAW arguments, like *xc* or *nbands*, to this function: in the example script one empty KS-orbital was included (*nbands* =1) because GPAW needs to propagate something. Similarly, the arguments for TDDFT (such as *propagator*) can be passed to *time_propagation* method. Note that the permittivity was initialized as PermittivityPlus, where Plus indicates that a renormalizing Lorentzian term is included; this extra term brings the static limit to vacuum value, i.e., `\epsilon(\omega=0)=\epsilon_0`, see Ref. \ [#Sakko]_ for detailed explanation. The above script generates the photoabsorption spectrum and compares it with analytical formula of the Mie theory: .. math:: S(\omega) = \frac{3V\omega}{2\pi^2}\mbox{Im}\left[\frac{\epsilon(\omega)-1}{\epsilon(\omega)+2}\right], where *V* is the nanosphere volume: |qsfdtd_vs_mie| .. |qsfdtd_vs_mie| image:: gold_nanosphere/qsfdtd_vs_mie.png The general shape of Mie spectrum, and especially the localized surface plasmon resonance (LSPR) at 2.5 eV, is clearly reproduced by QSFDTD. The shoulder at 1.9 eV and the stronger overall intensity are examples of the inaccuracies of the used discretization scheme: the shoulder originates from spurious surface scattering, and the intensity from the larger volume of the nanosphere defined in the grid. For a better estimate of the effective volume, you can take a look at the standard output where the "Fill ratio" tells that 18.035% of the grid points locate inside the sphere. This means that the volume (and intensity) is roughly 16% too large: `\frac{V}{V_{\text{sphere}}}\approx\frac{0.18035\times(15\text{nm})^3)}{\frac{4}{3}\pi\times(5\text{nm})^3}\approx1.16`. ---------------------------------------- Advanced example: Near field enhancement ---------------------------------------- This example shows how to calculate the induced electric near field enhancement of the same nanosphere considered in the previous example. The induced field calculations can be included by using the advanced syntax instead of the simple :code:`QSFDTD` wrapper. In the example one can also see how the dummy empty quantum system is generated. .. literalinclude:: gold_nanosphere_inducedfield/calculate.py The contents of the obtained file :code:`field.ind` can be visualized like described in :ref:`hybrid-inducedfield`. We obtain a following plot of the field: |cl_fe| .. |cl_fe| image:: gold_nanosphere_inducedfield/field.ind_Ffe.png :scale: 70 % Note that the oscillations in the induced field (and density) inside the material are caused by numerical limitations of the current implementation. ----------- Limitations ----------- * The scattering from the spurious surfaces of materials, which are present because of the representation of the polarizable material in uniformly spaced grid points, can cause unphysical broadening of the spectrum. * Nonlinear response (hyperpolarizability) of the classical material is not supported, so do not use too large external fields. In addition to nonlinear media, also other special cases (nonlocal permittivity, natural birefringence, dichroism, etc.) are not enabled. * The frequency-dependent permittivity of the classical material must be represented as a linear combination of Lorentzian oscillators. Other forms, such as Drude terms, should be implemented in the future. Also, the high-frequency limit must be vacuum permittivity. Future implementations should get rid of also this limitation. * Only the grid-mode of GPAW (not e.g. LCAO) is supported. ----------------- Technical remarks ----------------- * Double grid technique: the calculation always uses two grids: one for the classical part and one for the TDDFT part. In purely classical simulations, suchs as the ones discussed in this page, the quantum subsystem contains one empty Kohn-Sham orbital. For more information, see the description of :ref:`hybridscheme` because there the double grid is very important. * Parallelizatility: QSFDTD calculations can by parallelized only over domains, so use either *communicator=serial_comm* or *communicator=world* when initializing *QSFDTD* (or *FDTDPoissonSolver*) class. The domain parallelization of QSFDTD does not affect the parallelization of DFT calculation. * Multipole corrections to Poissonsolver: QSFDTD module is mainly intended for nanoplasmonic simulations. There the charge oscillations are strong and the usual zero boundary conditions for the electrostatic potential can give inaccurate results if the simulation box is not large enough. In some cases, such as for single nanospheres, one can improve the situation by defining remove_moments argument in FDTDPoissonSolver: this will then use the multipole moments correction scheme, see e.g. Ref. \ [#Castro]_. ---- TODO ---- * Dielectrics (`\epsilon_{\infty}\neq\epsilon_0`) * Geometries from 3D model files * Sub-cell averaging * Full FDTD (retardation effects) or interface to an external FDTD software * Fix grid-dependent oscillations in the induced density ---------------------- Combination with TDDFT ---------------------- The QSFDTD module is mainly aimed to be used in combination with :ref:`timepropagation`: see :ref:`hybridscheme` for more information. ---------- References ---------- .. [#Taflove] A. Taflove and S. Hagness, Computational Electrodynamics: The Finite-Difference Time-Domain Method (3rd ed.), Artech House, Norwood, MA (2005). .. [#Coomar] A. Coomar, C. Arntsen, K. A. Lopata, S. Pistinner and D. Neuhauser, Near-field: a finite-difference time-dependent method for simulation of electrodynamics on small scales, *J. Chem. Phys.* **135**, 084121 (2011) .. [#Gao] Y. Gao and D. Neuhauser, Dynamical quantum-electrodynamics embedding: Combining time-dependent density functional theory and the near-field method *J. Chem. Phys.* **137**, 074113 (2012) .. [#Sakko] A. Sakko, T. P. Rossi and R. M. Nieminen, Dynamical coupling of plasmons and molecular excitations by hybrid quantum/classical calculations: time-domain approach *J. Phys.: Condens. Matter* **26**, 315013 (2014) .. [#Castro] A. Castro, A. Rubio, and M. J. Stott Solution of Poisson's equation for finite systems using plane wave methods *Canad. J. Phys.:* **81**, 1151 (2003) gpaw-24.1.0/doc/documentation/electrodynamics/submit.agts.py000066400000000000000000000001321454550013000241420ustar00rootroot00000000000000from myqueue.workflow import run def workflow(): run(script='plot_permittivity.py') gpaw-24.1.0/doc/documentation/electrostatic_potential.rst000066400000000000000000000035131454550013000236270ustar00rootroot00000000000000.. _electrostatic potential: =============================== Note on electrostatic potential =============================== In the PAW formalism, the electrostatic potential from the pseudo charge `\tilde{\rho}(\mathbf{r})` is obtained by solving a Poisson equation: .. math:: \nabla^2 \tilde{v}_H(\mathbf{r})=-4\pi\tilde{\rho}(\mathbf{r}). To get the *real* all-electron electrostatic potential, we need the all-electron charge density: .. math:: \rho(\mathbf{r}) = \tilde{\rho}(\mathbf{r}) + \sum_a \Delta\tilde{\rho}^a(\mathbf{r} - \mathbf{R}^a), where `\Delta\tilde{\rho}^a` is an atomic PAW correction to the pseudo charge density: .. math:: \Delta\tilde{\rho}^a(\mathbf{r}) = n_c^a(r) - \tilde{n}_c^a(r) - \mathbb{Z}^a\delta(\mathbf{r}) - \sum_{\ell=0}^{\ell_{\text{max}}} \sum_{m=-\ell}^\ell Q_{\ell m}^a \hat{g}_{\ell m}^a(\mathbf{r}) + \sum_{\sigma i_1 i_2} D_{\sigma i_1 i_2}^a (\phi_{i_1}^a(\mathbf{r})\phi_{i_2}^a(\mathbf{r}) - \tilde{\phi}_{i_1}^a(\mathbf{r})\tilde{\phi}_{i_2}^a(\mathbf{r})). See :ref:`here ` for details. So, the all-electron potential is: .. math:: v_H(\mathbf{r}) = \tilde{v}_H(\mathbf{r}) + \sum_a \Delta\tilde{v}_H^a(\mathbf{r} - \mathbf{R}^a) and .. math:: \Delta\tilde{v}_H^a(\mathbf{r}) = \int d\mathbf{r}' \frac{\Delta\tilde{\rho}^a(\mathbf{r}')} {|\mathbf{r}-\mathbf{r}'|}. Notice that the `Q_{\ell m}^a` have been chosen so that all multipole moments of `\Delta\tilde{\rho}^a` are zero and therefore, the potential from these correction charges (`\Delta\tilde{v}_H^a`) will be non-zero only inside the atomic augmentation spheres. The :meth:`~gpaw.calculator.GPAW.get_electrostatic_corrections` method will return an array of integrated corrections: .. math:: \int d\mathbf{r} \Delta\tilde{v}_H^a(\mathbf{r}) in units of eV Å\ :sup:`3`. gpaw-24.1.0/doc/documentation/elph/000077500000000000000000000000001454550013000170775ustar00rootroot00000000000000gpaw-24.1.0/doc/documentation/elph/elph.rst000066400000000000000000000106661454550013000205720ustar00rootroot00000000000000.. _elphtheory: =============================== Electron-Phonon Coupling Theory =============================== Phonons can interact with the electrons in a variety of ways. For example, when an electron moves through the crystal, it can scatter off of a phonon, thereby transferring some of its energy to the lattice. Conversely, when a phonon vibrates, it can create an oscillating electric field that can interact with the electrons, inducing a change in their energies and momenta. The coupling between electrons and lattice vibrations is responsible for a range of interesting and important phenomena, from electrical and thermal conductivity to superconductivity. The first order electron-phonon coupling matrix `g_{mn}^\nu(\mathbf{k}, \mathbf{q})` couples the electronic states `m(\mathbf{k}+ \mathbf{q}),n(\mathbf{k})` via phonons `\nu` at wave vectors `\mathbf{q}` and frequencies `\omega_\nu`:[#Giustino2017]_ .. math:: g_{mn}^\nu(\mathbf{k}, \mathbf{q}) = \sqrt{ \frac{\hbar}{2 m_0 \omega_\nu}} M_{mn}^\nu(\mathbf{k}, \mathbf{q}) . with .. math:: M_{mn}^\nu(\mathbf{k}, \mathbf{q}) = \langle \psi_{m \mathbf{k}+ \mathbf{q}} \vert \nabla_u V^{KS} \cdot \mathbf{e}_\nu \vert \psi_{n\mathbf{k}} \rangle. Here `m_0` is the sum of the masses of all the atoms in the unit cell and `\nabla_u` denotes the gradient with respect to atomic displacements. For the three translational modes at `\vert \mathbf{q} \vert = 0` the matrix elements `g_{mn}^\nu = 0`, as a consequence of the acoustic sum rule. -------------- Implementation -------------- Within the PAW framework to Kohn-Sham potential can be split into a local part `V(\mathbf{r})` represented on a regular grid and a nonlocal part `\Delta H^a_{i_1 i_2}`: .. math:: V^{KS} = V + \Delta H^a_{i_1 i_2}. In GPAW `\nabla_u V^{KS}(\mathbf{r})` is determined using the finite difference method in a supercell. The potential at the displaced coordinates is computed by the :meth:`~gpaw.elph.DisplacementRunner` class, which is based on ASEs ``ase.phonon.Displacement`` class. The central difference derivative, as evaluted in the :meth:`~gpaw.elph.Supercell` class, consists of four contributions: .. math:: \langle \psi_{i} \vert \nabla_u V^{KS} \vert \psi_{j} \rangle = \langle \tilde \psi_{i} \vert \nabla_u V(\mathbf{r}) \vert \tilde \psi_{j} \rangle + \sum_{a, ij} \langle \tilde \psi_{i} \vert \tilde p^a_i \rangle (\nabla_u \Delta H^a_{i_1 i_2}) \langle \tilde p^a_j \vert \tilde \psi_{j} \rangle + \sum_{a, ij} \langle \tilde \psi_{i} \vert \nabla_u \tilde p^a_i \rangle \Delta H^a_{i_1 i_2} \langle \tilde p^a_j \vert \tilde \psi_{j} \rangle + \sum_{a, ij} \langle \tilde \psi_{i} \vert \tilde p^a_i \rangle \Delta H^a_{i_1 i_2} \langle \sum_{a, ij} \langle \tilde \psi_{i} \vert \nabla_u \tilde p^a_i \rangle \Delta H^a_{i_1 i_2} \langle \tilde p^a_j \vert \tilde \psi_{j} \rangle \tilde p^a_j \vert \tilde \psi_{j} \rangle Here we do not project the derivatives onto electronics states actually, not rather onto LCAO orbitals `\Psi_{NM}`, where `N` denotes the cell index and `M` the orbital index. We use a The Fourier transform from the `\mathbf{k}`-space Bloch to the real space representation so that we can can later to compute `M_{mn}^\nu` for arbitrary `\mathbf{q}`: .. math:: \mathbf{g}_{\substack{N M\\ N^\prime M^\prime}}^{sc} = FFT\left[ \langle \Psi_{NM}(\mathbf{k}) \vert \nabla_u V^{KS} \vert \Psi_{N^\prime M^\prime}(\mathbf{k}) \rangle \right]. Finally, the electron-phonon coupling matrix is obtained by projecting the supercell matrix into the primitive unit cell bands `m, n` and phonon modes `\nu` in :meth:`~gpaw.elph.ElectronPhononMatrix`: .. math:: M_{mn}^\nu(\mathbf{k}, \mathbf{q}) = \sum_{\substack{N M\\ N^\prime M^\prime}} C_{mM}^{\star} C_{nM^\prime} \mathbf{g}_{\mu}^{sc} \cdot \mathbf{u}_{q \nu} e^{2\pi i [(\mathbf{k}+\mathbf{q})\cdot \mathbf{R_N} - \mathbf{k}\cdot \mathbf{R_N^\prime}]}, where `C_{nM}` are the LCAO coefficients and `\mathbf{u}_{q \nu}` are the mass-scaled phonon displacement vectors. Checkout :ref:`elph` for an example and exerice and :ref:`raman` and :ref:`elphraman` for an application. ---------- References ---------- .. [#Giustino2017] F. Giustino, "Electron-phonon interactions from first principles", Reviews of Modern Physics 89, 015003 (2017). Code ==== .. autoclass:: gpaw.elph.DisplacementRunner :members: .. autoclass:: gpaw.elph.Supercell :members: .. autoclass:: gpaw.elph.ElectronPhononMatrix :members: gpaw-24.1.0/doc/documentation/elph/raman.rst000066400000000000000000000111171454550013000207300ustar00rootroot00000000000000.. _raman: ======================== Raman spectroscopy ======================== GPAW offers two ways of calculating Raman intensities. One can use the `ASE Raman `_ utility together with the GPAW LRTDDFT module as shown in the Resonant Raman tutorial :ref:`resonant_raman_water`. GPAW also implements Raman spectroscopy for zone-center phonons of extended systems using the electron-phonon coupling (see :ref:`elphtheory` and :ref:`elph`) within 3rd order perturbation theory :dfn:`Taghizadeh et a.` [#Taghizadeh2020]_ , which is discussed here. This method is currently only implementated for the LCAO mode. The Stokes Raman intensity can be written as .. math:: I(\omega) = I_0 \sum_\nu \frac{n_\nu+1}{\omega_\nu} \vert \sum_{\alpha, \beta} u_{in}^\alpha R_{\alpha \beta}^\nu u_{out}^\beta \vert^2 \delta(\omega-\omega_\nu) where `\nu` denotes phonon modes and `\alpha`, `\beta` denote polarisations of the incoming and outgoing laser light. The Raman tensor `R_{\alpha \beta}^\nu` has six terms and is given by Ref. [#Taghizadeh2020]_ Eq. (10) .. math:: R_{\alpha \beta}^\nu \equiv \sum_{ijmn \mathbf{k}} \left[ \frac{p_{ij}^\alpha (g_{jm}^\nu \delta_{in} - g_{ni}^\nu \delta_{jm})p_{mn}^\beta}{(\hbar \omega_{in}-\varepsilon_{ji})(\hbar \omega_{out}-\varepsilon_{mn})} + \frac{p_{ij}^\alpha (p_{jm}^\beta \delta_{in} - p_{ni}^\beta \delta_{jm})g_{mn}^\nu}{(\hbar \omega_{in}-\varepsilon_{ji})(\hbar \omega_{\nu}-\varepsilon_{mn})} + \\ \frac{p_{ij}^\beta (g_{jm}^\nu \delta_{in} - g_{ni}^\nu \delta_{jm})p_{mn}^\alpha}{(-\hbar \omega_{out}-\varepsilon_{ji})(-\hbar \omega_{in}-\varepsilon_{mn})} + \frac{p_{ij}^\beta (p_{jm}^\alpha \delta_{in} - p_{ni}^\alpha \delta_{jm})g_{mn}^\nu}{(-\hbar \omega_{out}-\varepsilon_{ji})(\hbar \omega_{\nu}-\varepsilon_{mn})} + \\ \frac{g_{ij}^\nu (p_{jm}^\alpha \delta_{in} - p_{ni}^\alpha \delta_{jm})p_{mn}^\beta}{(-\hbar \omega_{\nu}-\varepsilon_{ji})(\hbar \omega_{out}-\varepsilon_{mn})} + \frac{g_{ij}^\nu (p_{jm}^\beta \delta_{in} - p_{ni}^\beta \delta_{jm})p_{mn}^\alpha}{(-\hbar \omega_{\nu}-\varepsilon_{ji})(-\hbar \omega_{in}-\varepsilon_{mn})} \right] f_i(1-f_j)f_n(1-f_m) The first term is considered to be the resonant term of the expression, the other terms represent different time orderings of the interaction in the Feynman diagrams. To compute the Raman intensity we need these ingredients: The momentum matrix elements `p_{ij}^\alpha=\langle i \mathbf{k} | \hat p^\alpha| j \mathbf{k} \rangle`, the electron-phonon matrix `g_{ij}^\nu = \langle i \mathbf{k} \vert \partial_{\nu{q=0}} V^{KS} \vert j \mathbf{k} \rangle` in the optical limit `\mathbf{q}=0` and of course knowledge of the electronic states and phonon modes throughout the Brillouin zone. For these calculations we can employ in the :meth:`~gpaw.lcao.dipoletransition.get_momentum_transitions` method, the GPAW electron-phonon module :ref:`elph` and the ASE phonon module, respectively. It is required to calculate all properties for the full Brillouin zone using `symmetry=off`. By default the routine saves a file called ``mom_skvnm.npy`` containing the momentum matrix. This can be deactivated using the ``savetofile`` switch. The matrix is always the return value of :meth:`~gpaw.lcao.dipoletransition.get_momentum_transitions`. Energy changes for phonons and potential changes for electron-phonon couplings are both computed using a finite displacement technique. Both quantities can be obtained simultaenously. In principle the phonon modes can be obtained in any fashion, which yields an ASE phonon object though. For small systems the finite displacement method has the disadvantage of leading to an interaction of a displaced atom with its periodic images. This can lead to large errors especially in the electron-phonon matrix. This can be avoided by using a sufficiently large supercell for the finite displacement simulations. If phonon and effective potential are calculated simultaenously, results are saved in the same cache files with default name `elph`. Some more details are elaborated in the related tutorial :ref:`elphraman`. ---------- References ---------- .. [#Taghizadeh2020] A. Taghizadeh, U. Leffers, T.G. Pedersen, K.S. Thygesen, "A library of ab initio Raman spectra for automated identification of 2D materials", *Nature Communications* **11**, 3011 (2020). ---- Code ---- .. autoclass:: gpaw.elph.ResonantRamanCalculator :members: .. autoclass:: gpaw.elph.RamanData :members: .. autofunction:: gpaw.lcao.dipoletransition.get_momentum_transitions gpaw-24.1.0/doc/documentation/external.rst000066400000000000000000000026211454550013000205240ustar00rootroot00000000000000.. module:: gpaw.external External potential ================== External potentials are applied to all charged particles, i.e. electrons and nuclei. Examples -------- >>> # 2.5 V/Ang along z: >>> from gpaw.external import ConstantElectricField >>> calc = GPAW(external=ConstantElectricField(2.5, [0, 0, 1]), ...) .. autoclass:: ConstantElectricField >>> # Two point-charges (in units of |e|): >>> from gpaw.external import PointChargePotential >>> pc = PointChargePotential([-1, 1], [[4.0, 4.0, 0.0], [4.0, 4.0, 10.0]]) >>> calc = GPAW(external=pc, ...) .. autoclass:: PointChargePotential >>> # Step potential in z-direction >>> from gpaw.external import StepPotentialz >>> step = StepPotentialz(zstep=10, value_right=-7) >>> calc = GPAW(external=step, ...) .. autoclass:: StepPotentialz .. autoclass:: gpaw.bfield.BField Several potentials ------------------ A collection of potentials can be applied using :class:`PotentialCollection` >>> from gpaw.external import ConstantElectric, PointChargePotential >>> from gpaw.external import PotentialCollection >>> ext1 = ConstantElectricField(1) >>> ext2 = PointChargePotential([1, -5], positions=((0, 0, -10), (0, 0, 10))) >>> collection = PotentialCollection([ext1, ext2]) >>> calc = GPAW(external=collection, ...) .. autoclass:: PotentialCollection Your own potential ------------------ See an example here: :git:`gpaw/test/ext_potential/test_harmonic.py`. gpaw-24.1.0/doc/documentation/gpaw1.txt000066400000000000000000003747051454550013000177470ustar00rootroot000000000000002005-11-18 10.1103/PhysRevLett.95.216401 Bayesian error estimation in density-functional theory 2006-04-15 10.1002/pssb.200541328 Implementation of linear-scaling plane wave density functional theory on parallel computers 2006-04-15 10.1002/pssb.200541348 Three real-space discretization techniques in electronic structure calculations 2006-04-15 10.1002/pssb.200541391 Car-Parrinello molecular dynamics using real space wavefunctions 2006-05-07 10.1063/1.2193514 A general and efficient pseudopotential Fourier filtering scheme for real space methods using mask functions 2007-01-12 10.1103/PhysRevLett.98.026804 First-principles study of electron-conduction properties of C-60 bridges 2007-02-15 10.1103/PhysRevB.75.075108 Evolution-operator method for density functional theory 2008-01-15 10.1039/b801632c The solid state 2008-03-26 10.1021/ja800594p On the structure of thiolate-protected Au-25 2008-06-28 10.1063/1.2943138 Time-dependent density-functional theory in the projector augmented-wave method 2008-07-07 10.1063/1.2949547 Daubechies wavelets as a basis set for density functional pseudopotential calculations 2008-07-08 10.1073/pnas.0801001105 A unified view of ligand-protected gold clusters as superatom complexes 2008-08-15 10.1103/PhysRevB.78.075441 Delta self-consistent field method to obtain potential energy surfaces of excited molecules on surfaces 2008-09-15 10.1103/PhysRevB.78.104102 Compression curves of transition metals in the Mbar range: Experiments and projector augmented-wave calculations 2009-01-15 10.1002/chem.200900301 Synthesis, Stabilization, Functionalization and, DFT Calculations of Gold Nanoparticles in Fluorous Phases (PTFE and Ionic Liquids) 2009-01-15 10.1039/b904491d A density functional investigation of thiolate-protected bimetal PdAu24(SR)(18)(z) clusters: doping the superatom complex 2009-01-15 10.1039/b907148b Density functional theory for transition metals and transition metal chemistry 2009-01-15 10.1039/b911944b The interplay of van der Waals and weak chemical forces in the adsorption of salicylic acid on NaCl(001) 2009-01-15 10.1103/PhysRevB.79.035403 Hot-electron-mediated desorption rates calculated from excited-state potential energy surfaces 2009-01-15 10.1109/ICPPW.2009.42 Analyzing Checkpointing Trends for Applications on the IBM Blue Gene/P System 2009-01-26 10.1063/1.3075216 Gold in graphene: In-plane adsorption and diffusion 2009-01-28 10.1063/1.3055419 Effect of subsurface Ti-interstitials on the bonding of small gold clusters on rutile TiO2(110) 2009-03-13 10.1103/PhysRevLett.102.106102 Bright Beaches of Nanoscale Potassium Islands on Graphite in STM Imaging 2009-03-14 10.1063/1.3086040 Density functional study of the adsorption and van der Waals binding of aromatic and conjugated compounds on the basal plane of MoS2 2009-04-16 10.1021/jp811214k First-Principles Study on Even-Odd Conductance Oscillation of Pt Atomic Nanowires 2009-05-15 10.1103/PhysRevB.79.195326 First-principles study of leakage current through a Si/SiO2 interface 2009-05-15 10.1103/PhysRevB.79.195405 Hot-electron-assisted femtochemistry at surfaces: A time-dependent density functional theory approach 2009-05-22 10.1103/PhysRevLett.102.206801 Quantum Well States in Two-Dimensional Gold Clusters on MgO Thin Films 2009-06-15 10.1002/jcc.21138 Linear Augmented Slater-Type Orbital Method for Free Standing Clusters 2009-06-15 10.1103/PhysRevB.79.235414 Inelastic scattering in a local polaron model with quadratic coupling to bosons 2009-07-28 10.1063/1.3193462 Density functional for van der Waals forces accounts for hydrogen bond in benchmark set of water hexamers 2009-08-05 10.1021/ja903069x 2D-3D Transition for Cationic and Anionic Gold Clusters: A Kinetic Energy Density Functional Study 2009-08-07 10.1063/1.3176508 All-electron density functional theory and time-dependent density functional theory with high-order finite elements 2009-09-09 10.1021/ja905182g Characterization of Iron-Carbonyl-Protected Gold Clusters 2009-09-10 10.1021/jp9023298 Ligand-Protected Gold Alloy Clusters: Doping the Superatom 2009-09-30 10.1088/0953-8984/21/39/395502 QUANTUM ESPRESSO: a modular and open-source software project for quantum simulations of materials 2009-11-15 10.1016/j.commatsci.2009.07.013 Density-functional tight-binding for beginners 2009-11-15 10.1103/PhysRevB.80.195112 Localized atomic basis set in the projector augmented wave method 2009-11-25 10.1021/ja906865f Low Temperature CO Oxidation over Supported Ultrathin MgO Films 2009-12-04 10.1103/PhysRevLett.103.238301 Origin of Power Laws for Reactions at Metal Surfaces Mediated by Hot Electrons 2009-12-10 10.1016/j.jcat.2009.09.016 The effect of Co-promotion on MoS2 catalysts for hydrodesulfurization of thiophene: A density functional study 2010-01-14 10.1021/jp909308k Insight from First-Principles Calculations into the Interactions between Hydroxybenzoic Acids and Alkali Chloride Surfaces 2010-01-15 10.1002/anie.201003851 Formation of Gold(I) Edge Oxide at Flat Gold Nanoclusters on an Ultrathin MgO Film under Ambient Conditions 2010-01-15 10.1002/chem.201000327 Gold Sulfide Nanoclusters: A Unique Core-In-Cage Structure 2010-01-15 10.1039/b917723j Adsorption and activation of O-2 at Au chains on MgO/Mo thin films 2010-01-15 10.1039/b926414k Complementarity between high-energy photoelectron and L-edge spectroscopy for probing the electronic structure of 5d transition metal catalysts 2010-01-15 10.1051/sfn/201112004 Wavelets for electronic structure calculations 2010-01-15 10.1088/1742-6596/209/1/012030 Theoretical and experimental factors affecting measurements of semiconductor mean inner potentials 2010-01-20 10.1088/0953-8984/22/2/022201 Chemical accuracy for the van der Waals density functional 2010-01-21 10.1021/jz9002422 Treatment of Layered Structures Using a Semilocal meta-GGA Density Functional 2010-02-15 10.1016/j.actamat.2009.10.049 Atomistic modeling of interfaces and their impact on microstructure and properties 2010-02-15 10.1103/PhysRevB.81.081408 Graphene on metals: A van der Waals density functional study 2010-02-15 10.1103/PhysRevB.81.085103 Fully self-consistent GW calculations for molecules 2010-02-21 10.1063/1.3298994 Communications: Elementary oxygen electrode reactions in the aprotic Li-air battery 2010-03-15 10.1016/j.elspec.2010.02.004 Theoretical approximations to X-ray absorption spectroscopy of liquid water and ice 2010-03-15 10.1103/PhysRevB.81.115443 Vibrationally mediated control of single-electron transmission in weakly coupled molecule-metal junctions 2010-03-15 10.1137/060651653 Numerical Methods for Electronic Structure Calculations of Materials 2010-04-15 10.1038/NCHEM.589 Quantum size effects in ambient CO oxidation catalysed by ligand-protected gold clusters 2010-04-22 10.1021/jp9116062 Water Dissociation on MgO/Ag(100): Support Induced Stabilization or Electron Pairing? 2010-05-13 10.1021/jp101265v Experimental and Density Functional Theory Analysis of Serial Introductions of Electron-Withdrawing Ligands into the Ligand Shell of a Thiolate-Protected Au-25 Nanoparticle 2010-05-15 10.1002/pssb.200945474 How to observe the oxidation of magnesia-supported Pd clusters by scanning tunnelling microscopy 2010-05-15 10.1007/s11244-010-9443-6 RPBE-vdW Description of Benzene Adsorption on Au(111) 2010-05-15 10.1038/NNANO.2010.45 Penetration of thin C-60 films by metal nanoparticles 2010-05-20 10.1021/jz1002988 Oligomeric Gold-Thiolate Units Define the Properties of the Molecular Junction between Gold and Benzene Dithiols 2010-05-26 10.1016/j.cplett.2010.04.034 Alkane dimers interaction: A semi-local MGGA functional study 2010-06-04 10.1103/PhysRevB.81.245105 Projector augmented wave formulation of Hartree-Fock calculations of electronic structure 2010-06-14 10.1063/1.3451265 Electrochemical control of quantum interference in anthraquinone-based molecular switches 2010-06-16 10.1021/ja102934q Chirality and Electronic Structure of the Thiolate-Protected Au-38 Nanocluster 2010-06-22 10.1103/PhysRevB.81.245429 Modeling nanoscale gas sensors under realistic conditions: Computational screening of metal-doped carbon nanotubes 2010-06-25 10.1103/PhysRevLett.104.256102 Site Specificity in Femtosecond Laser Desorption of Neutral H Atoms from Graphite(0001) 2010-06-30 10.1088/0953-8984/22/25/253202 Electronic structure calculations with GPAW: a real-space implementation of the projector augmented-wave method 2010-07-15 10.1007/s11249-009-9566-8 Atomistic Insights into the Running-in, Lubrication, and Failure of Hydrogenated Diamond-Like Carbon Coatings 2010-07-21 10.1063/1.3457947 Quantum corrected Langevin dynamics for adsorbates on metal surfaces interacting with hot electrons 2010-08-15 10.1021/nn101330c Electron Stimulation of Internal Torsion of a Surface-Mounted Molecular Rotor 2010-08-19 10.1021/jp1045436 Theoretical Characterization of Cyclic Thiolated Copper, Silver, and Gold Clusters 2010-08-28 10.1063/1.3464481 CO oxidation on PdO surfaces 2010-09-07 10.1103/PhysRevB.82.115106 Kohn-Sham potential with discontinuity for band gap materials 2010-09-15 10.1016/j.susc.2010.05.024 A Density Functional Theory study on gold cyanide interactions: The fundamentals of ore cleaning 2010-09-16 10.1103/PhysRevB.82.121412 Quantifying transition voltage spectroscopy of molecular junctions: Ab initio calculations 2010-09-24 10.1103/PhysRevB.82.125444 Combined experimental and ab initio study of the electronic structure of narrow-diameter single-wall carbon nanotubes with predominant (6,4),(6,5) chirality 2010-09-30 10.1021/jp1015438 Thiolate-Protected Au-25 Superatoms as Building Blocks: Dimers and Crystals 2010-09-30 10.1021/jp9097342 On the Structure of a Thiolated Gold Cluster: Au-44(SR)(28)(2-) 2010-10-07 10.1063/1.3490247 Memory effects in nonadiabatic molecular dynamics at metal surfaces 2010-10-14 10.1063/1.3492449 DFT plus U study of defects in bulk rutile TiO2 2010-10-28 10.1021/jp1045707 First Principles Studies of the Effect of Nickel Carbide Catalyst Composition on Carbon Nanotube Growth 2010-11-05 10.1103/PhysRevB.82.195411 First-principles calculations of graphene nanoribbons in gaseous environments: Structural and electronic properties 2010-11-10 10.1088/1367-2630/12/11/113016 Graphene on the Ir(111) surface: from van der Waals to strong bonding 2010-11-15 10.1016/j.cpc.2010.07.036 Electronic structure packages: Two implementations of the projector augmented wave (PAW) formalism 2010-11-15 10.1016/j.cplett.2010.10.040 Desorption of H atoms from graphite (0001) using XUV free electron laser pulses 2010-11-16 10.1103/PhysRevB.82.205115 Real-space electronic structure calculations with full-potential all-electron precision for transition metals 2010-11-23 10.1103/PhysRevB.82.201410 Effective elastic properties of a van der Waals molecular monolayer at a metal surface 2010-11-30 10.1016/j.electacta.2010.02.056 The oxygen reduction reaction mechanism on Pt(111) from density functional theory calculations 2010-12-14 10.1063/1.3512618 Low O-2 dissociation barrier on Pt(111) due to adsorbate-adsorbate interactions 2010-12-15 10.1002/pssb.201000171 Designing multifunctional chemical sensors using Ni and Cu doped carbon nanotubes 2010-12-15 10.1002/pssb.201000226 Mechanism study of floating catalyst CVD synthesis of SWCNTs 2010-12-15 10.1002/pssb.201000373 A combined photoemission and ab initio study of the electronic structure of (6,4)/(6,5) enriched single wall carbon nanotubes 2010-12-15 10.1007/s11467-010-0133-4 First-principles modelling of scanning tunneling microscopy using non-equilibrium Green's functions 2011-01-15 10.1002/anie.201104381 "Ligand-Free" Cluster Quantized Charging in an Ionic Liquid 2011-01-15 10.1007/978-3-642-15748-6_3 Ligand Protected Gold Alloy Clusters as Superatoms 2011-01-15 10.1016/j.procs.2011.04.003 GPAW - massively parallel electronic structure calculations with Python-based software 2011-01-15 10.1039/c1cp20406h Thermodynamic and kinetic properties of hydrogen defect pairs in SrTiO3 from density functional theory 2011-01-15 10.1039/c1cp20454h Mechanism of the initial stages of nitrogen-doped single-walled carbon nanotube growth 2011-01-15 10.1039/c1cp20924h Graphical prediction of quantum interference-induced transmission nodes in functionalized organic molecules 2011-01-15 10.1039/c1sc00060h A 58-electron superatom-complex model for the magic phosphine-protected gold clusters (Schmid-gold, Nanogold (R)) of 1.4-nm dimension 2011-03-11 10.1103/PhysRevB.83.113401 Van der Waals effect in weak adsorption affecting trends in adsorption, reactivity, and the view of substrate nobility 2011-03-23 10.1021/ja111077e Electronic and Vibrational Signatures of the Au-102(p-MBA)(44) Cluster 2011-04-05 10.1103/PhysRevB.83.155407 Improving transition voltage spectroscopy of molecular junctions 2011-04-05 10.1103/PhysRevLett.106.146803 Nonlocal Screening of Plasmons in Graphene by Semiconducting and Metallic Substrates: First-Principles Calculations 2011-04-07 10.1063/1.3574836 Robust acceleration of self consistent field calculations for density functional theory 2011-04-11 10.1016/j.cplett.2011.03.001 Electronic hole localization in rutile and anatase TiO2 - Self-interaction correction in Delta-SCF DFT 2011-04-14 10.1103/PhysRevB.83.165418 Charge-transfer model for carbonaceous electrodes in polar environments 2011-04-28 10.1063/1.3563632 Pyrene: Hydrogenation, hydrogen evolution, and pi-band model 2011-05-05 10.1021/jp112114p Size and Shape Dependence of the Electronic and Spectral Properties in TiO2 Nanoparticles 2011-05-12 10.1021/jp2011827 Atomic Layer Deposition of Aluminum Oxide on TiO2 and Its Impact on N3 Dye Adsorption from First Principles 2011-05-15 10.1177/1094342010369118 Understanding Checkpointing Overheads on Massive-Scale Systems: Analysis of the IBM Blue Gene/P System 2011-05-16 10.1016/j.cattod.2010.12.022 The role of transition metal interfaces on the electronic transport in lithium-air batteries 2011-05-21 10.1063/1.3589861 Adsorption properties versus oxidation states of rutile TiO2(110) 2011-05-31 10.1103/PhysRevB.83.184119 Optimized orthogonal tight-binding basis: Application to iron 2011-06-10 10.1016/j.jcp.2011.03.018 A mesh-free convex approximation scheme for Kohn-Sham density functional theory 2011-06-15 10.1002/ejic.201100374 The Al50Cp*(12) Cluster - A 138-Electron Closed Shell (L=6) Superatom 2011-06-15 10.1038/NCHEM.1032 Visible-light-enhanced catalytic oxidation reactions on plasmonic silver nanostructures 2011-06-24 10.1103/PhysRevB.83.245122 Linear density response function in the projector augmented wave method: Applications to solids, surfaces, and interfaces 2011-07-15 10.1140/epjd/e2011-10603-9 Derivatives of the thiolate-protected gold cluster Au-25(SR)(18)(-1) 2011-08-04 10.1021/jp200893w Ab Initio Calculations of the Electronic Properties of Polypyridine Transition Metal Complexes and Their Adsorption on Metal Surfaces in the Presence of Solvent and Counterions 2011-08-11 10.1021/jp203274a Interaction of Au-16 Nanocluster with Defects in Supporting Graphite: A Density-Functional Study 2011-08-15 10.1007/s10562-011-0632-0 Finite Size Effects in Chemical Bonding: From Small Clusters to Solids 2011-08-18 10.1103/PhysRevB.84.085101 Construction and performance of fully numerical optimum atomic basis sets 2011-08-25 10.1021/jp204886n ALD Grown Aluminum Oxide Submonolayers in Dye-Sensitized Solar Cells: The Effect on Interfacial Electron Transfer and Performance 2011-08-28 10.1063/1.3624529 Self-consistent meta-generalized gradient approximation study of adsorption of aromatic molecules on noble metal surfaces 2011-09-07 10.1063/1.3632087 Evidence of superatom electronic shells in ligand-stabilized aluminum clusters 2011-09-12 10.1103/PhysRevB.84.104514 Tuning MgB2(0001) surface states through surface termination 2011-09-15 10.1021/jz201059k Electronic Structure and Bonding of Icosahedral Core-Shell Gold-Silver Nanoalloy Clusters Au144-xAgx(SR)(60) 2011-09-15 10.1166/mex.2011.1027 Computing C1s X-ray Absorption for Single-Walled Carbon Nanotubes with Distinct Electronic Type 2011-09-19 10.1103/PhysRevLett.107.136102 Direct Evidence for Ethanol Dissociation on Rutile TiO2(110) 2011-10-03 10.1103/PhysRevLett.107.156401 Dispersive and Covalent Interactions between Graphene and Metal Surfaces from the Random Phase Approximation 2011-10-06 10.1021/jp204360c Role of the Interface between Pd and PdO in Methane Dissociation 2011-10-21 10.1063/1.3653790 Controlling the transmission line shape of molecular t-stubs and potential thermoelectric applications 2011-10-25 10.1103/PhysRevB.84.153410 First-principles study for the adsorption of segments of BPA-PC on alpha-Al2O3(0001) 2011-11-02 10.1088/0953-8984/23/43/434001 Real-space finite-difference approach for multi-body systems: path-integral renormalization group method and direct energy minimization method 2011-11-04 10.1103/PhysRevLett.107.195502 Oxidation of Pt(111) under Near-Ambient Conditions 2011-11-08 10.1103/PhysRevB.84.193402 Au-40: A large tetrahedral magic cluster 2011-11-15 10.1007/s11244-011-9736-4 On the Importance of Gradient-Corrected Correlation for van der Waals Density Functionals 2011-11-15 10.1016/j.elspec.2011.05.004 An implementation of core level spectroscopies in a real space Projector Augmented Wave density functional theory code 2011-11-16 10.1103/PhysRevB.84.205434 Steps on rutile TiO2(110): Active sites for water and methanol dissociation 2011-11-22 10.1016/j.cattod.2011.03.072 Thermally activated transformation of the adsorption configurations of a complex molecule on a Cu(111) surface 2011-11-30 10.1016/j.electacta.2011.08.045 Trends in oxygen reduction and methanol activation on transition metal chalcogenides 2011-12-07 10.1063/1.3663385 Electrical conductivity in Li2O2 and its role in determining capacity limitations in non-aqueous Li-O-2 batteries 2011-12-08 10.1021/jp2040345 Ab Initio van der Waals Interactions in Simulations of Water Alter Structure from Mainly Tetrahedral to High-Density-Like 2011-12-14 10.1103/PhysRevB.84.245429 Electronic shell structure and chemisorption on gold nanoparticles 2011-12-15 10.1007/s11249-011-9864-9 Formation and Oxidation of Linear Carbon Chains and Their Role in the Wear of Carbon Materials 2011-12-20 10.1103/PhysRevB.84.235430 First-principles study of surface plasmons on Ag(111) and H/Ag(111) 2011-12-22 10.1021/jp209198g Progressive Shortening of sp-Hybridized Carbon Chains through Oxygen-Induced Cleavage 2012-01-05 10.1021/jz2013853 Scanning Tunneling Microscopy Measurements of the Full Cycle of a Heterogeneous Asymmetric Hydrogenation Reaction on Chirally Modified Pt(111) 2012-01-14 10.1063/1.3675494 Water monomer interaction with gold nanoclusters from van der Waals density functional theory 2012-01-15 10.1039/c1ce05737e Solvent driven formation of silver embedded resorcinarene nanorods 2012-01-15 10.1039/c2cp23229d The electronic structure of Ge-9[Si(SiMe3)(3)](3)(-): a superantiatom complex 2012-01-15 10.1039/c2cp40715a DFT studies of oxidation routes for Pd-9 clusters supported on gamma-alumina 2012-01-15 10.1039/c2nr30377a The halogen analogs of thiolated gold nanoclusters 2012-01-15 10.1137/110856976 PYCLAW: ACCESSIBLE, EXTENSIBLE, SCALABLE TOOLS FOR WAVE PROPAGATION PROBLEMS 2012-01-21 10.1063/1.3675834 X-ray emission spectroscopy and density functional study of CO/Fe(100) 2012-02-02 10.1021/jz201616z Ethanol Diffusion on Rutile TiO2(110) Mediated by H Adatoms 2012-02-08 10.1103/PhysRevB.85.085412 Physisorption of benzene on a tin dioxide surface: van der Waals interaction 2012-02-14 10.1103/PhysRevB.85.085422 Nonmagnetic and magnetic thiolate-protected Au-25 superatoms on Cu(111), Ag(111), and Au(111) surfaces 2012-02-15 10.1002/pssb.201100786 Ab initio derived force-field parameters for molecular dynamics simulations of deprotonated amorphous-SiO2/water interfaces 2012-02-15 10.1039/c1ee02717d Computational screening of perovskite metal oxides for optimal solar light capture 2012-02-23 10.1021/jp211749g Supramolecular Environment-Dependent Electronic Properties of Metal-Organic Interfaces. 2012-02-28 10.1063/1.3685849 Promoter effect of BaO on CO oxidation on PdO surfaces 2012-03-15 10.1021/jz300069s The Active Phase of Palladium during Methane Oxidation 2012-03-15 10.1088/0034-4885/75/3/036503 O(N) methods in electronic structure calculations 2012-03-19 10.1103/PhysRevLett.108.126101 Systematic Study of Au-6 to Au-12 Gold Clusters on MgO(100) F Centers Using Density-Functional Theory 2012-03-23 10.1103/PhysRevB.85.115317 Phonon-limited mobility in n-type single-layer MoS2 from first principles 2012-04-05 10.1021/jp210869r Study of Alkylthiolate Self-assembled Monolayers on Au(111) Using a Semilocal meta-GGA Density Functional 2012-04-14 10.1063/1.3700800 Nonadiabatic Ehrenfest molecular dynamics within the projector augmented-wave method 2012-04-21 10.1063/1.4704546 Perspective on density functional theory 2012-05-03 10.1021/jp300514f Adsorption and Cyclotrimerization Kinetics of C2H2 at a Cu(110) Surface 2012-05-07 10.1063/1.4707952 Adsorption, mobility, and dimerization of benzaldehyde on Pt(111) 2012-05-09 10.1021/ja211121m Oxidation State and Symmetry of Magnesia-Supported Pd13Ox Nanocatalysts Influence Activation Barriers of CO Oxidation 2012-05-15 10.1039/c2ee03590a Understanding the electrocatalysis of oxygen reduction on platinum and its alloys 2012-05-22 10.1103/PhysRevB.85.205437 Magnetism in nanoscale graphite flakes as seen via electron spin resonance 2012-06-07 10.1021/jp209506d Methanol Oxidation on Model Elemental and Bimetallic Transition Metal Surfaces 2012-06-07 10.1103/PhysRevLett.108.236103 Packing Defects into Ordered Structures: Strands on TiO2 2012-06-13 10.1021/ja3003765 Balance of Nanostructure and Bimetallic Interactions in Pt Model Fuel Cell Catalysts: In Situ XAS and DFT Study 2012-06-13 10.1088/0953-8984/24/23/233202 Time-dependent density-functional theory in massively parallel computer architectures: the OCTOPUS project 2012-06-15 10.1007/s11244-012-9801-7 Construction of New Electronic Density Functionals with Error Estimation Through Fitting 2012-06-15 10.1166/jctn.2012.2102 Progress in Understanding Controlled Single-Walled Carbon Nanotube Growth from Computer Simulations 2012-06-27 10.1103/PhysRevB.85.235149 Density functionals for surface science: Exchange-correlation model development with Bayesian error estimation 2012-07-19 10.1103/PhysRevB.86.045208 Optical properties of bulk semiconductors and graphene/boron nitride: The Bethe-Salpeter equation with derivative discontinuity-corrected density functional energies 2012-08-01 10.1021/ja305004a Structure and Mobility of Metal Clusters in MOFs: Au, Pd, and AuPd Clusters in MOF-74 2012-08-13 10.1103/PhysRevB.86.075429 Anomalous insulator-metal transition in boron nitride-graphene hybrid atomic layers 2012-08-15 10.1002/cctc.201100450 Evidence of Scrambling over Ruthenium-based Catalysts in Supercritical-water Gasification 2012-08-15 10.1021/ja3032339 Structural and Theoretical Basis for Ligand Exchange on Thiolate Monolayer Protected Gold Nanoclusters 2012-08-15 10.1103/PhysRevB.86.081103 Extending the random-phase approximation for electronic correlation energies: The renormalized adiabatic local density approximation 2012-08-16 10.1021/jp304946n Phase Transition of Mg during Hydrogenation of Mg-Nb2O5 Evaporated Composites 2012-08-30 10.1021/jp3004213 Understanding Charge Transfer in Donor-Acceptor/Metal Systems: A Combined Theoretical and Experimental Study 2012-08-30 10.1021/jp3056653 Reactivity and Morphology of Oxygen-Modified Au Surfaces 2012-08-30 10.1021/jp306376r Initial Stages of Growth of Nitrogen-Doped Single-Walled Carbon Nanotubes 2012-09-15 10.1007/s10562-012-0870-9 Modeling van der Waals Interactions in Zeolites with Periodic DFT: Physisorption of n-Alkanes in ZSM-22 2012-09-15 10.1021/ct300172m Ab Initio Parametrized Force Field for the Flexible Metal-Organic Framework MIL-53(Al) 2012-09-27 10.1021/jp306885u Effects of Silver Doping on the Geometric and Electronic Structure and Optical Absorption Spectra of the Au25-nAgn(SH)(18)(-) (n=1, 2, 4, 6, 8, 10, 12) Bimetallic Nanoclusters 2012-10-15 10.1016/j.cpc.2012.05.007 LIBXC: A library of exchange and correlation functionals for density functional theory 2012-10-15 10.1039/c2ee22341d New cubic perovskites for one- and two-photon water splitting using the computational materials repository 2012-10-24 10.1088/0953-8984/24/42/424210 Physisorption of nucleobases on graphene: a comparative van der Waals study 2012-10-24 10.1088/0953-8984/24/42/424212 Desorption of n-alkanes from graphene: a van der Waals density functional study 2012-11-07 10.1063/1.4764356 A van der Waals density functional study of chloroform and other trihalomethanes on graphene 2012-11-08 10.1021/jp307608k Global Minima of Protonated Water Clusters (H2O)(20)H+ Revisited 2012-11-15 10.1002/cctc.201200140 Volcano Relations for Oxidation of Hydrogen Halides over Rutile Oxide Surfaces 2012-11-15 10.1140/epjd/e2012-30485-5 Density functional theory molecular dynamics study of the Au-25(SR)(18)(-) cluster 2012-11-26 10.1103/PhysRevB.86.195429 TDDFT study of time-dependent and static screening in graphene 2012-12-15 10.1007/s11244-012-9908-x Finite-Size Effects in O and CO Adsorption for the Late Transition Metals 2012-12-15 10.1016/j.ijhydene.2012.09.129 Resolving the stability and structure of strontium chloride amines from equilibrium pressures, XRD and DFT 2012-12-20 10.1103/PhysRevB.86.241404 Conventional and acoustic surface plasmons on noble metal surfaces: A time-dependent density functional theory study 2013-01-03 10.1016/j.cplett.2012.10.055 The structural and electronic properties of small osmium clusters (2-14): A density functional theory study 2013-01-03 10.1016/j.cplett.2012.11.025 Avoiding pitfalls in the modeling of electrochemical interfaces 2013-01-10 10.1103/PhysRevB.87.045411 Charging properties of gold clusters in different environments 2013-01-15 10.1002/anie.201208443 The Redox Chemistry of Gold with High-Valence Doped Calcium Oxide 2013-01-15 10.1007/s10562-012-0918-x The Oxygen Reduction Reaction on Nitrogen-Doped Graphene 2013-01-15 10.1007/s10562-012-0947-5 CO and CO2 Hydrogenation to Methanol Calculated Using the BEEF-vdW Functional 2013-01-15 10.1039/c3cp50257k Generalized trends in the formation energies of perovskite oxides 2013-01-15 10.1039/c3cp50349f First principles investigation of zinc-anode dissolution in zinc-air batteries 2013-01-15 10.1039/c3cp51083b pH in atomic scale simulations of electrochemical interfaces 2013-01-15 10.1039/c3cp51295a A DFT study of adsorption of perylene on clean and altered anatase (101) TiO2 2013-01-15 10.1039/c3cp51685g Porphyrin adsorbed on the (10(1)over-bar0) surface of the wurtzite structure of ZnO - conformation induced effects on the electron transfer characteristics 2013-01-15 10.1039/c3cp53160k Electrochemical ammonia production on molybdenum nitride nanoclusters 2013-01-15 10.1039/c3ra42651c The stabilization of Fe, Ru, and Os clusters upon hydrogenation 2013-01-15 10.1140/epjd/e2012-30537-x Structural and electronic properties of AuIr nanoalloys 2013-01-15 10.1149/05802.0053ecst Activity and Selectivity for O-2 Reduction to H2O2 on Transition Metal Surfaces 2013-01-15 10.1155/2013/871706 Methanol Adsorption on Graphene 2013-01-15 10.2533/chimia.2013.271 Methane Catalytic Combustion on Pd-9/gamma-Al2O3 with Different Degrees of Pd Oxidation 2013-01-21 10.1063/1.4773242 Theoretical evidence for low kinetic overpotentials in Li-O-2 electrochemistry 2013-01-30 10.1103/PhysRevB.87.045428 Stratified graphene/noble metal systems for low-loss plasmonics applications 2013-02-07 10.1021/jz3021155 Understanding Trends in the Electrocatalytic Activity of Metals and Enzymes for CO2 Reduction to CO 2013-02-13 10.1103/PhysRevB.87.075111 Random phase approximation applied to solids, molecules, and graphene-metal interfaces: From van der Waals to covalent bonding 2013-02-14 10.1063/1.4790368 Thiolate adsorption on Au(hkl) and equilibrium shape of large thiolate-covered gold nanoparticles 2013-02-15 10.1109/JPROC.2012.2197810 Quantum Transport Modeling From First Principles 2013-02-15 10.1140/epjd/e2012-30486-4 Modeling thiolate-protected gold clusters with density-functional tight-binding 2013-02-21 10.1021/jp305303q Refractive Index Functions of TiO2 Nanoparticles 2013-02-25 10.1103/PhysRevB.87.075207 Formation energies of group I and II metal oxides using random phase approximation 2013-03-15 10.1002/cctc.201200635 First-Principles Calculations of FischerTropsch Processes Catalyzed by Nitrogenase Enzymes 2013-03-15 10.1016/j.scriptamat.2012.10.026 Strain field of interstitial hydrogen atom in body-centered cubic iron and its effect on hydrogen-dislocation interaction 2013-03-18 10.1063/1.4798511 Chemisorption of hydrogen on Fe clusters through hybrid bonding mechanisms 2013-04-15 10.1016/j.carbon.2012.12.008 Binding of atomic oxygen on graphene from small epoxy clusters to a fully oxidized surface 2013-04-15 10.1016/j.jcat.2013.01.009 Guest-host interactions of arenes in H-ZSM-5 and their impact on methanol-to-hydrocarbons deactivation processes 2013-04-25 10.1021/jp400980y 1,3-Diketone Fluids and Their Complexes with Iron 2013-04-28 10.1063/1.4800754 Surface adsorption in strontium chloride ammines 2013-04-28 10.1063/1.4801943 A full implementation of the response iteration scheme for density functional calculations 2013-05-07 10.1103/PhysRevB.87.205410 Screened empirical bond-order potentials for Si-C 2013-05-09 10.1021/jp306172k Electrochemical CO2 and CO Reduction on Metal-Functionalized Porphyrin-like Graphene 2013-05-09 10.1021/jp311980h Li-ion Conduction in the LiBH4:Lil System from Density Functional Theory Calculations and Quasi-Elastic Neutron Scattering 2013-05-15 10.1140/epjb/e2013-40113-5 Carbon nanotubes as heat dissipaters in microelectronics 2013-05-23 10.1021/jp400287h Polycyclic Aromatic Hydrocarbons: Trends for Bonding Hydrogen 2013-06-06 10.1021/jp4024684 First-Principles Study of Excited State Evolution in a Protected Gold Complex 2013-06-19 10.1103/PhysRevB.87.235312 Acoustic phonon limited mobility in two-dimensional semiconductors: Deformation potential and piezoelectric scattering in monolayer MoS2 from first principles 2013-06-21 10.1126/science.1238187 Direct Imaging of Covalent Bond Structure in Single-Molecule Chemical Reactions 2013-06-26 10.1103/PhysRevB.87.235433 Visualizing hybridized quantum plasmons in coupled nanowires: From classical to tunneling regime 2013-07-07 10.1063/1.4812398 Excited-state potential-energy surfaces of metal-adsorbed organic molecules from linear expansion Delta-self-consistent field density-functional theory (Delta SCF-DFT) 2013-07-11 10.1103/PhysRevB.88.035418 Charge localization on a redox-active single-molecule junction and its influence on coherent electron transport 2013-07-11 10.1103/PhysRevLett.111.027601 Layer-Resolved Study of Mg Atom Incorporation at the MgO/Ag(001) Buried Interface 2013-07-15 10.7566/JPSJ.82.074709 A Comparative Density-Functional Theory Investigation of Oxygen Adsorption on Stepped Ni Surfaces 3(hkl) x (111) [hkl = (111), (100), (110)]: Role of Terrace Orientation 2013-08-13 10.1021/cm400541n Lithium Chalcogenidotetrelates: LiChT-Synthesis and Characterization of New Li+ Ion Conducting Li/Sn/Se Compounds 2013-08-15 10.1021/jp400486r Stability of Pt-Modified Cu(111) in the Presence of Oxygen and Its Implication on the Overall Electronic Structure 2013-09-04 10.1021/ja4059074 Protected but Accessible: Oxygen Activation by a Calixarene-Stabilized Undecagold Cluster 2013-09-05 10.1021/jz401553p Gold and Methane: A Noble Combination for Delicate Oxidation 2013-09-15 10.1016/j.jmmm.2013.04.025 Phase stability of chromium based compensated ferrimagnets with inverse Heusler structure 2013-09-18 10.1103/PhysRevB.88.115131 Beyond the random phase approximation: Improved description of short-range correlation by a renormalized adiabatic local density approximation 2013-09-24 10.1021/la402565b H-2 Dissociation over NbO: The First Step toward Hydrogenation of Mg 2013-10-01 10.1016/j.jpowsour.2013.03.110 Li+ adsorption at prismatic graphite surfaces enhances interlayer cohesion 2013-10-03 10.1021/jp4003092 (H2O)(20) Water Clusters at Finite Temperatures 2013-10-10 10.1021/jp407494v Electronic Structure and Optical Properties of the Thiolate-Protected Au-28(SMe)(20) Cluster 2013-10-15 10.1016/j.susc.2013.06.014 Methane oxidation over Pd and Pt studied by DFT and kinetic modeling 2013-10-15 10.1021/ct400520e Real-Space Density Functional Theory on Graphical Processing Units: Computational Approach and Comparison to Gaussian Basis Set Methods 2013-10-17 10.1021/jp404569m Equilibrium Crystal Shape of Ni from First Principles 2013-10-28 10.1063/1.4827078 CO dissociation on iron nanoparticles: Size and geometry effects 2013-10-28 10.1088/1367-2630/15/10/105026 Stability and bandgaps of layered perovskites for one- and two-photon water splitting 2013-11-01 10.1016/j.jpowsour.2013.04.109 Crystal structure analysis and first principle investigation of F doping in LiFePO4 2013-11-05 10.1103/PhysRevA.88.052501 Koopmans' condition in self-interaction-corrected density-functional theory 2013-11-07 10.1021/jp405670v Improving the Adsorption of Au Atoms and Nanoparticles on Graphite via Li Intercalation 2013-11-11 10.1002/chem.201301450 The Isolation of Single MMX Chains from Solution: Unravelling the Assembly-Disassembly Process 2013-11-15 10.1021/nn4046634 Birth of the Localized Surface Plasmon Resonance in Mono layer-Protected Gold Nanoclusters 2013-11-21 10.1063/1.4829539 Self-interaction corrected density functional calculations of molecular Rydberg states 2013-11-28 10.1063/1.4829640 Interfacial oxygen under TiO2 supported Au clusters revealed by a genetic algorithm search 2013-12-05 10.1021/jp409479h The Influence of Functionals on Density Functional Theory Calculations of the Properties of Reducible Transition Metal Oxide Catalysts 2013-12-12 10.1021/jp410379u Competition between Icosahedral Motifs in AgCu, AgNi, and AgCo Nanoalloys: A Combined Atomistic-DFT Study 2013-12-15 10.1016/j.cpc.2013.07.014 Graphics Processing Unit acceleration of the Random Phase Approximation in the projector augmented wave method 2013-12-15 10.1016/j.jallcom.2013.02.044 The catalytic effect of Nb, NbO and Nb2O5 with different surface planes on dehydrogenation in MgH2: Density functional theory study 2013-12-15 10.1038/NMAT3795 Enabling direct H2O2 production through rational electrocatalyst design 2013-12-21 10.1063/1.4840515 A density functional theory study of atomic steps on stoichiometric rutile TiO2(110) 2013-12-27 10.1103/PhysRevB.88.245204 Calculated formation and reaction energies of 3d transition metal oxides using a hierachy of exchange-correlation functionals 2014-01-01 10.1080/10408436.2013.772503 Error Estimates for Solid-State Density-Functional Theory Predictions: An Overview by Means of the Ground-State Elemental Crystals 2014-01-15 10.1007/978-3-319-01427-2_5 Plane-Wave Approaches to the Electronic Structure of Semiconductor Nanostructures 2014-01-15 10.1016/j.susc.2013.09.020 Formation of metastable, heterolytic H-pairs on the RuO2(110) surface 2014-01-15 10.1021/nn406219x Supramolecular Functionalization and Concomitant Enhancement in Properties of Au-25 Clusters 2014-01-15 10.1039/c3cp53922a Interactions of polymers with reduced graphene oxide: van der Waals binding energies of benzene on graphene with defects 2014-01-15 10.1039/c3cp54491e The molecular and magnetic structure of carbon-enclosed and partially covered Fe-55 particles 2014-01-15 10.1039/c3py00853c Soluble and stable alternating main-chain merocyanine copolymers through quantitative spiropyran-merocyanine conversion 2014-01-15 10.1039/c3ra47390b Selective poisoning of Li-air batteries for increased discharge capacity 2014-01-15 10.1039/c3ra47784c Dissociation of oxygen on pristine and nitrogen-doped carbon nanotubes: a spin-polarized density functional study 2014-01-15 10.1039/c4cp00705k H-2 production through electro-oxidation of SO2: identifying the fundamental limitations 2014-01-15 10.1039/c4cp03133d Designing mixed metal halide ammines for ammonia storage using density functional theory and genetic algorithms 2014-01-15 10.1039/c4sc01646g Ultrafast structural dynamics in Rydberg excited N,N,N ',N '-tetramethylethylenediamine: conformation dependent electron lone pair interaction and charge delocalization 2014-01-28 10.1103/PhysRevB.89.014304 Ab initio based thermal property predictions at a low cost: An error analysis 2014-01-31 10.3762/bjnano.5.11 The role of oxygen and water on molybdenum nanoclusters for electro catalytic ammonia production 2014-02-03 10.3762/bjnano.5.12 Core level binding energies of functionalized and defective graphene 2014-02-15 10.1007/s11244-013-0160-9 Genetic Algorithm Procreation Operators for Alloy Nanoparticle Catalysts 2014-02-15 10.1007/s11244-013-0173-4 Modeling Methyl Chloride Photo Oxidation by Oxygen Species on TiO2(110) 2014-02-15 10.1007/s11244-013-0181-4 Calculated Pourbaix Diagrams of Cubic Perovskites for Water Splitting: Stability Against Corrosion 2014-02-15 10.1016/j.jnucmat.2013.11.021 Effect of impurities on vacancy migration energy in Fe-based alloys 2014-02-15 10.1021/nn405114z Revealing the Adsorption Mechanisms of Nitroxides on Ultrapure, Metallicity-Sorted Carbon Nanotubes 2014-02-15 10.1039/c3ee42446d Lithium and oxygen vacancies and their role in Li2O2 charge transport in Li-O-2 batteries 2014-02-27 10.1021/jp500800n Graphene Edges Dictate the Morphology of Nanoparticles during Catalytic Channeling 2014-03-05 10.1002/jcc.23487 A Survey of the Parallel Performance and Accuracy of Poisson Solvers for Electronic Structure Calculations 2014-03-12 10.1103/PhysRevB.89.115412 Density functional theory based calculations of the transfer integral in a redox-active single-molecule junction 2014-03-15 10.1016/j.jssc.2013.12.006 Ionic conductivity and the formation of cubic CaH2 in the LiBH4-Ca(BH4)(2) composite 2014-03-15 10.1021/ct400931p Configurational Entropy in Ice Nanosystems: Tools for Structure Generation and Screening 2014-03-28 10.1063/1.4869212 Communication: The influence of CO2 poisoning on overvoltages and discharge capacity in non-aqueous Li-Air batteries 2014-03-28 10.1103/RevModPhys.86.253 First-principles calculations for point defects in solids 2014-04-02 10.1021/ja412141j Single Crystal XRD Structure and Theoretical Analysis of the Chiral Au30S(S-t-Bu)(18) Cluster 2014-04-08 10.1038/srep04598 Low temperature hydrogenation of iron nanoparticles on graphene 2014-04-14 10.1063/1.4870397 mBEEF: An accurate semi-local Bayesian error estimation density functional 2014-04-15 10.1021/cs400875k Remote Activation of Chemical Bonds in Heterogeneous Catalysis 2014-04-15 10.1109/MCSE.2013.51 A Multiscale Code for Flexible Hybrid Simulations Using ASE Framework 2014-04-22 10.1021/cm4042007 X-ray Absorption Study of Structural Coupling in Photomagnetic Prussian Blue Analogue Core@Shell Particles 2014-05-15 10.1016/j.jcat.2014.04.006 Methanol-to-hydrocarbons conversion: The alkene methylation pathway 2014-05-15 10.1021/ct500087v Quasiparticle Level Alignment for Photocatalytic Interfaces 2014-06-15 10.1021/cs500202f Identification of the Catalytic Site at the Interface Perimeter of Au Clusters on Rutile TiO2(110) 2014-06-27 10.1103/PhysRevB.89.245445 Quasiparticle scattering from topological crystalline insulator SnTe (001) surface states 2014-07-15 10.1021/cs500328c Intermetallic Alloys as CO Electroreduction Catalysts-Role of Isolated Active Sites 2014-07-15 10.1088/0965-0393/22/5/055002 EON: software for long time simulations of atomic scale systems 2014-07-15 10.1088/0965-0393/22/5/055007 Designing rules and probabilistic weighting for fast materials discovery in the Perovskite structure 2014-07-15 10.1117/1.OE.53.7.071808 Optical and other material properties of SiO2 from ab initio studies 2014-07-17 10.1021/jz500850s Direct Dynamics Studies of a Binuclear Metal Complex in Solution: The Interplay Between Vibrational Relaxation, Coherence, and Solvent Effects 2014-07-24 10.1021/jp501581g Atomic Structure, Electronic Properties, and Reactivity of In-Plane Heterostructures of Graphene and Hexagonal Boron Nitride 2014-07-24 10.1021/jp5035147 Hydrogen-Induced Reconstruction of Cu(100): Two-Dimensional and One-Dimensional Structures of Surface Hydride 2014-08-01 10.1093/mnras/stu869 Do cement nanoparticles exist in space? 2014-08-06 10.1088/0953-8984/26/28/315013 Dynamical coupling of plasmons and molecular excitations by hybrid quantum/classical calculations: time-domain approach 2014-08-15 10.1016/j.ssc.2014.04.023 MoS2 nanostructures: Semiconductors with metallic edges 2014-08-15 10.1063/1.4893495 Bandgap calculations and trends of organometal halide perovskites 2014-08-21 10.1039/c4cp02204a Enhancing the hydrogen storage capacity of TiFe by utilizing clusters 2014-08-21 10.1039/c4dt01329h Chloride-bridged, defect-dicubane {Ln(4)} core clusters: syntheses, crystal structures and magnetic properties 2014-08-28 10.1021/jp505462m TDDFT Analysis of Optical Properties of Thiol Monolayer-Protected Gold and Intermetallic Silver-Gold Au-144(SR)(60) and Au84Ag60(SR)(60) Clusters 2014-09-11 10.1103/PhysRevLett.113.115501 Silicon-Carbon Bond Inversions Driven by 60-keV Electrons in Graphene 2014-09-15 10.1088/0031-8949/2014/T162/014019 Ab initio study of structural and electronic properties of partially reduced graphene oxide 2014-09-18 10.1021/jp412360b Electronic and Vibrational Properties of meso-Tetraphenylporphyrin on Silver Substrates 2014-09-18 10.1021/jp501185q Superatomic S-2 Silver Clusters Stabilized by a Thiolate-Phosphine Monolayer: Insight into Electronic and Optical Properties of Ag-14(SC6H3F2)(12)(PPh3)(8) and Ag-16(SC6H3F2)(14)(DPPE)(4) 2014-09-18 10.1021/jp506158c Periodic DFT Study of Benzene Adsorption on Pd(100) and Pd(110) at Medium and Saturation Coverage 2014-09-22 10.1103/PhysRevB.90.125433 Induced work function changes at Mg-doped MgO/Ag(001) interfaces: Combined Auger electron diffraction and density functional study 2014-10-02 10.1021/jp507519a Ab Initio Thermodynamic Modeling of Electrified Metal-Oxide Interfaces: Consistent Treatment of Electronic and Ionic Chemical Potentials 2014-10-15 10.1016/j.carbon.2014.06.060 Atomic and electronic structure of tetrahedral amorphous carbon surfaces from density functional theory: Properties and simulation strategies 2014-10-15 10.1016/j.ssc.2014.07.008 Physical properties of alpha-Fe upon the introduction of H, He, C, and N 2014-10-15 10.1016/j.susc.2014.05.017 Investigating energetics of Au-8 on graphene/Ru(0001) using a genetic algorithm and density functional theory 2014-10-15 10.1021/nl5029045 High-Conductive Organometallic Molecular Wires with De localized Electron Systems Strongly Coupled to Metal Electrodes 2014-10-16 10.1021/jp507349k Coverage-Dependent Adsorption of Bifunctional Molecules: Detailed Insights into Interactions between Adsorbates 2014-10-21 10.1039/c4cp00753k Segregation effects on the properties of (AuAg)(147) 2014-10-23 10.1021/jp508076c Temperature- and Pressure-Induced Changes in the Crystal Structure of Sr(NH3)(8)Cl-2 2014-11-07 10.1063/1.4900628 Thermodynamic aspects of dehydrogenation reactions on noble metal surfaces 2014-11-07 10.1063/1.4900838 Simplified continuum solvent model with a smooth cavity based on volumetric data 2014-11-15 10.1016/j.susc.2014.03.008 Structure determination of chemisorbed chirality transfer complexes: Accelerated STM analysis and exchange-correlation functional sensitivity 2014-11-15 10.1107/S2052252514020181 Location of Cu2+ in CHA zeolite investigated by X-ray diffraction using the Rietveld/maximum entropy method 2014-11-17 10.1002/anie.201406246 Squeezing, Then Stacking: From Breathing Pores to Three-Dimensional Ionic Self-Assembly under Electrochemical Control 2014-11-17 10.1002/anie.201406528 A Surface Coordination Network Based on Copper Adatom Trimers 2014-12-04 10.1021/jp509510j Atomic-Scale View on the H2O Formation Reaction from H-2 on O-Rich RuO2(110) 2014-12-07 10.1063/1.4902249 Nucleation and growth of Pt nanoparticles on reduced and oxidized rutile TiO2 (110) 2014-12-11 10.1021/jp506508x Au-36(SPh)(24) Nanomolecules: X-ray Crystal Structure, Optical Spectroscopy, Electrochemistry, and Theoretical Analysis 2014-12-11 10.1021/jp509970y Collective Diffusion of Gold Clusters and F-Centers at MgO(100) and CaO(100) Surfaces 2014-12-15 10.1002/pssb.201451171 PFO-BPy solubilizers for SWNTs: Modelling of polymers from oligomers 2014-12-15 10.1002/pssb.201451174 Theoretical electron energy loss spectroscopy of isolated graphene 2014-12-15 10.1364/OE.22.030725 Effects of exchange correlation functional on optical permittivity of gold and electromagnetic responses 2014-12-16 10.1021/la504056v Triazatriangulene as Binding Group for Molecular Electronics 2014-12-18 10.1021/jp505394e Operando Characterization of an Amorphous Molybdenum Sulfide Nanoparticle Catalyst during the Hydrogen Evolution Reaction 2014-12-21 10.1063/1.4903450 Orbital-free density functional theory implementation with the projector augmented-wave method 2015-01-13 10.1103/PhysRevB.91.045418 pi-plasmon dispersion in free-standing graphene by momentum-resolved electron energy-loss spectroscopy 2015-01-15 10.1002/cpe.3199 Design and performance characterization of electronic structure calculations on massively parallel supercomputers: a case study of GPAW on the Blue Gene/P architecture 2015-01-15 10.1007/978-3-319-18747-1_3 Treating Relativistic Effects in Transition Metal Complexes 2015-01-15 10.1007/978-3-319-18747-1_5 Background 2015-01-15 10.1016/B978-0-08-100086-1.00008-7 Electronic Structure: Shell Structure and the Superatom Concept 2015-01-15 10.1016/j.jcp.2014.10.052 Real-time adaptive finite element solution of time-dependent Kohn-Sham equation 2015-01-15 10.1016/j.jssc.2014.09.014 Solid solution barium-strontium chlorides with tunable ammonia desorption properties and superior storage capacity 2015-01-15 10.1021/jp508932x Influence of Adsorbed Water on the Oxygen Evolution Reaction on Oxides 2015-01-15 10.1039/c4cc09467k Iron oxide cluster induced barrier-free conversion of nitric oxide to ammonia 2015-01-15 10.1039/c4cp02789b A DFT study of the effect of OH groups on the optical, electronic, and structural properties of TiO2 nanoparticles 2015-01-15 10.1039/c4cy01044b Single-chiral-catalytic-surface-sites: STM and DFT study of stereodirecting complexes formed between (R)-1-(1-naphthyl)ethylamine and ketopantolactone on Pt(111) 2015-01-15 10.1039/c5cc04513d UV photoexcitation of a dissolved metalloid Ge-9 cluster compound and its extensive ultrafast response 2015-01-15 10.1039/c5cp00071h First principles investigation of the activity of thin film Pt, Pd and Au surface alloys for oxygen reduction 2015-01-15 10.1039/c5cp00298b A DFT-based genetic algorithm search for AuCu nanoalloy electrocatalysts for CO2 reduction 2015-01-15 10.1039/c5cp00435g An old workhorse for new applications: Fe(dpm)(3) as a precursor for low-temperature PECVD of iron(III) oxide 2015-01-15 10.1039/c5cp01065a Widely available active sites on Ni2P for electrochemical hydrogen evolution - insights from first principles calculations 2015-01-15 10.1039/c5cp01211b Optimizing a parametrized Thomas-Fermi-Dirac-Weizsacker density functional for atoms 2015-01-15 10.1039/c5cp01222h A real-space stochastic density matrix approach for density functional electronic structure 2015-01-15 10.1039/c5cp03382a Low temperature pollutant trapping and dissociation over two-dimensional tin 2015-01-15 10.1039/c5cp90198g Real-space numerical grid methods in quantum chemistry 2015-01-15 10.1039/c5nr04324g Pd2Au36(SR)(24) cluster: structure studies 2015-01-15 10.1039/c5py00141b High molecular weight mechanochromic spiropyran main chain copolymers via reproducible microwave-assisted Suzuki polycondensation 2015-01-15 10.1039/c5ta01586c Calculated optical absorption of different perovskite phases 2015-01-21 10.1002/aenm.201400915 New Light-Harvesting Materials Using Accurate and Efficient Bandgap Calculations 2015-01-22 10.1103/PhysRevB.91.045204 Strain sensitivity of band gaps of Sn-containing semiconductors 2015-01-28 10.1021/ja5109968 A Critical Size for Emergence of Nonbulk Electronic and Geometric Structures in Dodecanethiolate-Protected Au Clusters 2015-02-02 10.1103/PhysRevB.91.081401 Calculation of the graphene C 1s core level binding energy 2015-02-05 10.1021/jz502637b Copper Induces a Core Plasmon in Intermetallic Au(144,145)-xCux(SR)(60) Nanoclusters 2015-03-03 10.1103/PhysRevB.91.094104 High-pressure neutron scattering of the magnetoelastic Ni-Cr Prussian blue analog 2015-03-07 10.1063/1.4913739 Nanoplasmonics simulations at the basis set limit through completeness-optimized, local numerical basis sets 2015-03-09 10.1103/PhysRevB.91.125410 Density functional theory based direct comparison of coherent tunneling and electron hopping in redox-active single-molecule junctions 2015-03-14 10.1063/1.4906048 Chemical insight from density functional modeling of molecular adsorption: Tracking the bonding and diffusion of anthracene derivatives on Cu(111) with molecular orbitals 2015-03-14 10.1063/1.4908062 Selection of conformational states in self-assembled surface structures formed from an oligo(naphthylene-ethynylene) 3-bit binary switch 2015-03-15 10.1016/j.susc.2014.11.006 Detection of adsorbate overlayer structural transitions using sum-frequency generation spectroscopy 2015-03-15 10.1021/ct501155k Removing External Degrees of Freedom from Transition-State Search Methods using Quaternions 2015-03-15 10.1088/2053-1583/2/1/014001 The growth of Fe clusters over graphene/Cu(111) 2015-03-18 10.1002/adfm.201404388 Molecular Heterojunctions of Oligo(phenylene ethynylene)s with Linear to Cruciform Framework 2015-03-26 10.1021/acs.jpca.5b01797 Ultrafast Structural Pathway of Charge Transfer in N,N,N ',N '-Tetramethylethylenediamine 2015-03-28 10.1063/1.4915265 Density functional theory and chromium: Insights from the dimers 2015-04-01 10.1016/j.cplett.2015.02.013 Relationship between unbranched alkane dimer interaction energies using different theoretical methods and correlations with thermodynamic properties 2015-04-02 10.1021/acs.jpcc.5b00734 Isolating a Reaction Intermediate in the Hydrogenation of 2,2,2-Trifluoroacetophenone on Pt(111) 2015-04-08 10.1002/aenm.201401082 Design Principles for Metal Oxide Redox Materials for Solar-Driven Isothermal Fuel Production 2015-04-15 10.1515/ntrev-2012-0047 On the interaction between gold and silver metal atoms and DNA/RNA nucleobases - a comprehensive computational study of ground state properties 2015-04-16 10.1021/jp5125475 Impacts of Copper Position on the Electronic Structure of [Au25-xCux(SH)(18)](-) Nanoclusters 2015-04-24 10.1103/PhysRevB.91.165309 Band-gap engineering of functional perovskites through quantum confinement and tunneling 2015-04-30 10.1021/acs.jpcc.5b01068 The Role of the Anchor Atom in the Ligand of the Monolayer-Protected Au-25(XR)(18)(-) Nanocluster 2015-05-08 10.1088/0953-8984/27/17/175007 The effect of point defects on diffusion pathway within alpha-Fe 2015-05-13 10.1088/0953-8984/27/18/183202 Subsystem density-functional theory as an effective tool for modeling ground and excited states, their dynamics and many-body interactions 2015-05-14 10.1021/acs.jpcc.5b01580 Improved Tight-Binding Charge Transfer Model and Calculations of Energetics of a Step on the Rutile TiO2(110) Surface 2015-05-15 10.1021/cs501673g A Consistent Reaction Scheme for the Selective Catalytic Reduction of Nitrogen Oxides with Ammonia 2015-05-21 10.1021/jp510926q Theoretical Analysis of the M12Ag32(SR)(40)(4-) and X@M12Ag32(SR)(30)(4-) Nanoclusters (M = Au, Ag; X = H, Mn) 2015-06-06 10.1098/rsfs.2014.0084 Rational design of metal nitride redox materials for solar-driven ammonia synthesis 2015-06-08 10.1002/cssc.201500239 Design Principles of Perovskites for Thermochemical Oxygen Separation 2015-06-15 10.1002/anie.201410974 Ketene as a Reaction Intermediate in the Carbonylation of Dimethyl Ether to Methyl Acetate over Mordenite 2015-06-20 10.1016/j.electacta.2015.04.006 First principles study of (Cd, Hg, In, Tl, Sn, Pb, As, Sb, Bi, Se) modified Pt (111), Pt(100) and Pt(211) electrodes as CO oxidation catalysts 2015-06-24 10.1021/cr500551h Quantum-Chemical Characterization of the Properties and Reactivities of Metal-Organic Frameworks 2015-07-14 10.1021/acs.chemmater.5b00446 Accelerated DFT-Based Design of Materials for Ammonia Storage 2015-07-15 10.1002/ente.201500065 Carbon Dioxide Reforming of Methane using an Isothermal Redox Membrane Reactor 2015-07-15 10.1021/acscatal.5b00754 Tailoring Gold Nanoparticle Characteristics and the Impact on Aqueous-Phase Oxidation of Glycerol 2015-07-15 10.1021/cs501542n Mechanistic Pathway in the Electrochemical Reduction of CO2 on RuO2 2015-07-15 10.1595/205651315X687975 Atomic-Scale Modelling and its Application to Catalytic Materials Science Developing an interdisciplinary approach to modelling 2015-07-16 10.1021/acs.jpclett.5b01043 Electrochemical Barriers Made Simple 2015-08-13 10.1021/acs.jpcc.5b04432 Role of Li2O2@Li2CO3 Interfaces on Charge Transport in Nonaqueous Li-Air Batteries 2015-08-20 10.1021/acs.jpcc.5b04985 Real-Time Study of CVD Growth of Silicon Oxide on Rutile TiO2(110) Using Tetraethyl Orthosilicate 2015-08-20 10.1021/acs.jpcc.5b05894 Optical Properties of Monolayer-Protected Aluminum Clusters: Time-Dependent Density Functional Theory Study 2015-08-21 10.1063/1.4928646 Indication of non-thermal contribution to visible femtosecond laser-induced CO oxidation on Ru(0001) 2015-08-27 10.1021/acs.jpcc.5b05392 Using G(0)W(0) Level Alignment to Identify Catechol's Structure on TiO2(110) 2015-09-05 10.1002/qua.24945 Unoccupied titanium 3d states due to subcluster formation in stoichiometric TiO2 nanoparticles 2015-09-15 10.1016/j.ccr.2015.05.002 The role of density functional theory methods in the prediction of nanostructured gas-adsorbent materials 2015-09-15 10.1016/j.molcata.2015.04.016 Identifying the active sites for CO dissociation on Fe-BCC nanoclusters 2015-09-20 10.1016/j.electacta.2015.01.136 Theoretical modeling of the PEMFC catalyst layer: A review of atomistic methods 2015-10-15 10.1016/j.susc.2015.03.011 Optical laser-induced CO desorption from Ru(0001) monitored with a free-electron X-ray laser: DFT prediction and X-ray confirmation of a precursor state 2015-10-15 10.1016/j.susc.2015.04.013 Dissociative adsorption of water on Au/MgO/Ag(001) from first principles calculations 2015-10-15 10.1021/acs.jpcc.5b06132 Coexistence of Square Pyramidal Structures of Oxo Vanadium (+5) and (+4) Species Over Low-Coverage VOX/TiO2 (101) and (001) Anatase Catalysts 2015-10-15 10.1021/acscatal.5b01254 Comparison between the Oxygen Reduction Reaction Activity of Pd5Ce and Pt5Ce: The Importance of Crystal Structure 2015-10-16 10.1103/PhysRevD.92.083517 Modulation effects in dark matter-electron scattering experiments 2015-10-25 10.1016/j.jallcom.2015.05.253 Dependence of constituent elements of AB(5) type metal hydrides on hydrogenation degradation by CO2 poisoning 2015-12-01 10.1103/PhysRevLett.115.236804 Quantized Evolution of the Plasmonic Response in a Stretched Nanorod 2015-12-15 10.1016/j.ultramic.2015.07.011 Surface effects on mean inner potentials studied using density functional theory 2016-01-15 10.1016/j.carbon.2015.09.062 Planar versus three-dimensional growth of metal nanostructures at graphene 2016-01-15 10.1016/j.commatsci.2015.09.013 AiiDA: automated interactive infrastructure and database for computational science 2016-01-15 10.1039/c5cy01016k Graphene decorated with Fe nanoclusters for improving the hydrogen sorption kinetics of MgH2 - experimental and theoretical evidence 2016-01-15 10.1039/c5cy01839k Gold assisted oxygen dissociation on a molybdenum-doped CaO(001) surface 2016-01-15 10.1039/c5dt04542h Hydrophobic and antioxidant effects in In, Sn, and Sb based two dimensional materials 2016-01-15 10.1039/c5fd00203f Effects of particle size and edge structure on the electronic structure, spectroscopic features, and chemical properties of Au(111)-supported MoS2 nanoparticles 2016-01-15 10.1039/c5nr08122j Tuning Ag-29 nanocluster light emission from red to blue with one and two-photon excitation 2016-01-15 10.1039/c5sc03042k Charge transfer and ultrafast nuclear motions: the complex structural dynamics of an electronically excited triamine 2016-01-15 10.1039/c6cc06876f Selection of conformational states in surface self-assembly for a molecule with eight possible pairs of surface enantiomers 2016-01-15 10.1039/c6cp04194a Decoupling strain and ligand effects in ternary nanoparticles for improved ORR electrocatalysis 2016-01-15 10.1039/c6cp05681d A DFT study of the effect of SO4 groups on the properties of TiO2 nanoparticles 2016-01-15 10.1039/c6cy00756b H-2/D-2 exchange reaction on mono-disperse Pt clusters: enhanced activity from minute O-2 concentrations 2016-01-15 10.1039/c6cy01820c Nitrate-nitrite equilibrium in the reaction of NO with a Cu-CHA catalyst for NH3-SCR 2016-01-15 10.1039/c6ee01010e Computer calculations across time and length scales in photovoltaic solar cells 2016-01-15 10.1039/c6ra21668d Ab initio calculation of halide ligand passivation on PbSe quantum dot facets 2016-01-15 10.1039/c6sc01360k Conformations of cyclopentasilane stereoisomers control molecular junction conductance 2016-01-15 10.1149/2.0111609jes Electrochemical Investigation of 1-Ethyl-3-methylimidazolium Bromide and Tetrafluoroborate Mixture at Bi(111) Electrode Interface 2016-01-19 10.1016/j.ssc.2015.11.017 Strain engineering of electronic properties of transition metal dichalcogenide monolayers 2016-01-19 10.1038/srep19375 Machine learning bandgaps of double perovskites 2016-01-19 10.1103/PhysRevB.93.041302 Transformation of metallic boron into substitutional dopants in graphene on 6H-SiC(0001) 2016-01-28 10.1021/acs.jpcc.5b10025 Photoinduced Absorption within Single-Walled Carbon Nanotube Systems 2016-01-28 10.1039/c5cp04694g Correlation between diffusion barriers and alloying energy in binary alloys 2016-02-09 10.1088/0957-4484/27/7/075501 B-40 fullerene as a highly sensitive molecular device for NH3 detection at low bias: a first-principles study 2016-02-10 10.1016/j.electacta.2016.01.070 Targeted design of alpha-MnO2 based catalysts for oxygen reduction 2016-02-15 10.1007/s11082-015-0370-4 A model for terahertz plasmons in graphene 2016-02-15 10.1016/j.commatsci.2015.11.013 Material synthesis and design from first principle calculations and machine learning 2016-02-15 10.1021/acscatal.5b02369 Catalytic Activities of Sulfur Atoms in Amorphous Molybdenum Sulfide for the Electrochemical Hydrogen Evolution Reaction 2016-02-15 10.1021/acscatal.5b02409 Oxygen Reduction Reaction on Pt Overlayers Deposited onto a Gold Film: Ligand, Strain, and Ensemble Effect 2016-02-15 10.1038/NNANO.2015.255 Field-induced conductance switching by charge-state alternation in organometallic single-molecule junctions 2016-02-17 10.1021/acsami.5b11792 Selective Growth of Noble Gases at Metal/Oxide Interface 2016-02-18 10.1002/cctc.201501049 Exploring Scaling Relations for Chemisorption Energies on Transition-Metal-Exchanged Zeolites ZSM-22 and ZSM-5 2016-02-26 10.1038/srep21990 Unravelling Site-Specific Photo-Reactions of Ethanol on Rutile TiO2(110) 2016-02-28 10.1063/1.4942665 Structure and role of metal clusters in a metal-organic coordination network determined by density functional theory 2016-03-15 10.1016/j.commatsci.2015.12.012 Plasmonic and dielectric properties of ideal graphene 2016-03-15 10.1021/acs.cgd.6b00003 Designing Square Two-Dimensional Gold and Platinum 2016-03-15 10.1038/ncomms11013 Charge localization in a diamine cation provides a test of energy functionals and self-interaction correction 2016-03-31 10.1021/acs.jpca.5b12739 Reversible Hydrogen Uptake by BN and BC3 Monolayers Functionalized with Small Fe Clusters: A Route to Effective Energy Storage 2016-03-31 10.1021/acs.jpcc.5b11211 When Conductance Is Less than the Sum of Its Parts: Exploring Interference in Multiconnected Molecules 2016-03-31 10.1021/acs.jpcc.5b12611 Theoretical Insight into the Internal Quantum Efficiencies of Polymer/C-60 and Polymer/SWNT Photovoltaic Devices 2016-04-06 10.1002/adma.201505498 Extremely Weak van der Waals Coupling in Vertical ReS2 Nanowalls for High-Current-Density Lithium-Ion Batteries 2016-04-15 10.1002/qua.25106 Update to ACE-molecule: Projector augmented wave method on lagrange-sinc basis set 2016-04-15 10.1016/j.jcp.2016.01.034 Development of an exchange-correlation functional with uncertainty quantification capabilities for density functional theory 2016-04-15 10.1016/j.susc.2015.11.018 A comparative study of diastereomeric complexes formed by a prochiral substrate and three structurally analogous chiral molecules on Pt(111) 2016-04-15 10.1038/NCHEM.2454 Effects of correlated parameters and uncertainty in electronic-structure-based chemical kinetic modelling 2016-04-21 10.1021/acs.jpcc.5b12448 Atomic-Scale Analysis of the RuO2/Water Interface under Electrochemical Conditions 2016-04-27 10.1002/adma.201504650 Evidence of Porphyrin-Like Structures in Natural Melanin Pigments Using Electrochemical Fingerprinting 2016-04-27 10.1088/0022-3727/49/16/165303 Effect of interface geometry on electron tunnelling in Al/Al2O3/Al junctions 2016-04-28 10.1063/1.4947225 Pyridine adsorption and diffusion on Pt(111) investigated with density functional theory 2016-05-01 10.1016/j.jcp.2016.02.023 Grid-based electronic structure calculations: The tensor decomposition approach 2016-05-05 10.1021/acs.jpcc.6b01828 Tuning Conductance in Aromatic Molecules: Constructive and Counteractive Substituent Effects 2016-05-21 10.1063/1.4950828 An approach to develop chemical intuition for atomistic electron transport calculations using basis set rotations 2016-06-15 10.1016/j.cpc.2016.02.005 High performance Python for direct numerical simulations of turbulent flows 2016-06-21 10.1063/1.4954003 All-silicon tandem solar cells: Practical limits for energy conversion and possible routes for improvement 2016-06-23 10.1021/acs.jpcc.6b05207 Revealing the Multibonding State between Hydrogen and GrapheneSupported Ti Clusters 2016-06-30 10.1103/PhysRevB.93.235162 mBEEF-vdW: Robust fitting of error estimation density functionals 2016-07-01 10.1103/PhysRevB.94.041401 Stark shift and electric-field-induced dissociation of excitons in monolayer MoS2 and hBN/MoS2 heterostructures 2016-07-07 10.1039/c6cp02274j The reaction mechanism for the SCR process on monomer V5+ sites and the effect of modified Bronsted acidity 2016-07-12 10.1103/PhysRevB.94.035128 Limitations of effective medium theory in multilayer graphite/hBN heterostructures 2016-07-14 10.1021/acs.jpcc.6b05068 Machine Learning Assisted Predictions of Intrinsic Dielectric Breakdown Strength of ABX(3) Perovskites 2016-07-25 10.1103/PhysRevB.94.041112 Offset-corrected Delta-Kohn-Sham scheme for semiempirical prediction of absolute x-ray photoelectron energies in molecules and solids 2016-08-05 10.7498/aps.65.157303 First-principles study on thermodynamic properties of CdxZn1-xO alloys 2016-08-15 10.1016/j.enconman.2016.05.069 Computational study on oxynitride perovskites for CO2 photoreduction 2016-08-15 10.1016/j.engfracmech.2016.04.024 Hydrogen induced amorphisation around nanocracks in aluminium 2016-08-15 10.1021/acs.jctc.6b00456 Minimal Basis Iterative Stockholder: Atoms in Molecules for Force-Field Development 2016-08-16 10.1103/PhysRevB.94.064105 Quantification of uncertainty in first-principles predicted mechanical properties of solids: Application to solid ion conductors 2016-08-18 10.1021/acs.jpcc.6b06163 Disentangling Vacancy Oxidation on Metallicity-Sorted Carbon Nanotubes 2016-08-23 10.1021/acs.chemmater.6b01956 Thermodynamic Insight in the High-Pressure Behavior of UiO-66: Effect of Linker Defects and Linker Expansion 2016-09-05 10.1002/anie.201604269 Controlling the Adsorption of Carbon Monoxide on Platinum Clusters by Dopant-Induced Electronic Structure Modification 2016-09-15 10.1209/0295-5075/115/57002 Ab initio study of M2SnBr6 (M = K, Rb, Cs): Electronic and optical properties 2016-09-19 10.1002/anie.201605559 Charge Transport and Conductance Switching of Redox-Active Azulene Derivatives 2016-09-19 10.1021/acs.inorgchem.6b01635 Reactivity of Two-Dimensional Au-9, Pt-9, and Au18Pt18 against Common Molecules 2016-09-28 10.1039/c6cp05188j Strong 1D localization and highly anisotropic electron-hole masses in heavy-halogen functionalized graphenes 2016-09-29 10.1021/acs.jpcc.6b06254 Gold/lsophorone Interaction Driven by Keto/Enol Tautomerization 2016-09-29 10.1103/PhysRevB.94.125444 Band-gap control in phosphorene/BN structures from first-principles calculations 2016-10-06 10.1038/ncomms12962 Metal-free photochemical silylations and transfer hydrogenations of benzenoid hydrocarbons and graphene 2016-10-10 10.1038/ncomms13040 Isotope analysis in the transmission electron microscope 2016-10-15 10.1007/s00706-016-1795-6 Bias-induced conductance switching in single molecule junctions containing a redox-active transition metal complex 2016-10-15 10.1088/0953-8984/28/39/393001 Applications of large-scale density functional theory in biology 2016-10-15 10.15199/62.2016.10.11 Role of support in industrial catalytic processes. Theoretical modeling 2016-10-17 10.1021/acs.inorgchem.6b01840 Electron Transfer and Solvent-Mediated Electronic Localization in Molecular Photocatalysis 2016-10-17 10.1038/srep35605 Spectromicroscopy of C-60 and azafullerene C59N: Identifying surface adsorbed water 2016-10-21 10.1039/c6cp05014j Enhanced hydrogen desorption properties of LiAlH4 by doping lithium metatitanate 2016-10-21 10.1063/1.4964671 Accelerating the search for global minima on potential energy surfaces using machine learning 2016-10-27 10.1103/PhysRevB.94.155310 Strain dependence of band gaps and exciton energies in pure and mixed transition-metal dichalcogenides 2016-10-30 10.1002/jcc.24477 Density Functional Theory for Molecular and Periodic Systems Using Density Fitting and Continuous Fast Multipole Method: Analytical Gradients 2016-11-01 10.1016/j.jphotochem.2016.08.007 Does organic/organic interface mimic band bending by deforming structure? 2016-11-08 10.1002/cctc.201601014 Relation between Hydrogen Evolution and Hydrodesulfurization Catalysis 2016-11-10 10.1021/acs.jpcc.6b06638 A DFT Structural Investigation of New Bimetallic PtSnx Surface Alloys Formed on the Pt(110) Surface and Their Interaction with Carbon Monoxide 2016-11-15 10.1016/j.cpc.2016.06.012 A wavelet-based Projector Augmented-Wave (PAW) method: Reaching frozen-core all-electron precision with a systematic, adaptive and localized wavelet basis set 2016-11-15 10.1016/j.nanoen.2016.04.011 Beyond the top of the volcano? - A unified approach to electrocatalytic oxygen reduction and oxygen evolution 2016-11-15 10.1016/j.nanoen.2016.05.044 Characterization of oxygenated species at water/Pt(111) interfaces from DFT energetics and XPS simulations 2016-11-15 10.1021/acs.jctc.6b00815 Implementation of Constrained DFT for Computing Charge Transfer Rates within the Projector Augmented Wave Method 2016-11-15 10.1021/acscatal.6b01848 Operando Raman Spectroscopy of Amorphous Molybdenum Sulfide (MoSx) during the Electrochemical Hydrogen Evolution Reaction: Identification of Sulfur Atoms as Catalytically Active Sites for H+ Reduction 2016-11-21 10.1039/c6cp04575h Investigating the coverage dependent behaviour of CO on Gd/Pt(111) 2016-12-07 10.1063/1.4971010 Density and localized states' impact on amorphous carbon electron transport mechanisms 2016-12-15 10.1016/j.jcat.2016.09.004 Atomic scale analysis of sterical effects in the adsorption of 4,6-dimethyldibenzothiophene on a CoMoS hydrotreating catalyst 2016-12-15 10.1021/acsnano.6b04671 Symmetry-Driven Band Gap Engineering in Hydrogen Functionalized Graphene 2016-12-15 10.1021/acsnano.6b05823 Tomography of a Probe Potential Using Atomic Sensors on Graphene 2016-12-22 10.1021/acs.jpcc.6b09682 Field-Induced Conformational Change in a Single-Molecule-Graphene-Nanoribbon Junction: Effect of Vibrational Energy Redistribution 2016-12-22 10.1038/ncomms14036 Atomic-level insights in optimizing reaction paths for hydroformylation reaction over Rh/CoO single-atom catalyst 2016-12-29 10.1021/acs.jpcc.6b09019 pH in Grand Canonical Statistics of an Electrochemical Interface 2017-01-03 10.1073/pnas.1615733114 Enhanced strength and temperature dependence of mechanical properties of Li at small scales and its implications for Li metal anodes 2017-01-03 10.1103/PhysRevB.95.035109 Nonuniform sampling schemes of the Brillouin zone for many-electron perturbation-theory calculations in reduced dimensionality 2017-01-04 10.1103/PhysRevB.95.014101 Role of descriptors in predicting the dissolution energy of embedded oxides and the bulk modulus of oxide-embedded iron 2017-01-06 10.1103/PhysRevB.95.045407 Principles and simulations of high-resolution STM imaging with a flexible tip apex 2017-01-14 10.1039/c6cp06445k A hybrid method using the widely-used WIEN2k and VASP codes to calculate the complete set of XAS/EELS edges in a hundred-atoms system 2017-01-14 10.1088/1361-6455/50/1/015102 Electronic structure transformation in small bare Au clusters as seen by x-ray photoelectron spectroscopy 2017-01-15 10.1016/j.flatc.2016.11.002 Designing Mg-7 cluster-assembled two dimensional crystal 2017-01-15 10.1021/acscatal.6b02765 Quantitative Differences in Sulfur Poisoning Phenomena over Ruthenium and Palladium: An Attempt To Deconvolute Geometric and Electronic Poisoning Effects Using Model Catalysts 2017-01-15 10.1039/9781782626862-00221 DFT calculations of atoms and molecules in Cartesian grids 2017-01-15 10.1039/c7ra06130g Lead-free and stable antimony-silver-halide double perovskite (CH3NH3)(2)AgSbI6 2017-01-18 10.1088/0953-8984/29/2/024001 Linear-scaling density functional theory using the projector augmented wave method 2017-01-20 10.1103/PhysRevLett.118.036101 Orbitalwise Coordination Number for Predicting Adsorption Properties of Metal Nanocatalysts 2017-01-21 10.1039/c6cp06881b Modeling the active sites of Co-promoted MoS2 particles by DFT 2017-01-24 10.3762/bjnano.8.25 Colorimetric gas detection by the varying thickness of a thin film of ultrasmall PTSA-coated TiO2 nanoparticles on a Si substrate 2017-01-26 10.1021/acs.jpcc.6b10251 Toward Two-Dimensional Superatomic Honeycomb Structures. Evaluation of [Ge-9(Si(SiMe3))(3)](-) as Source of Ge-9-Cluster Building Blocks for Extended Materials 2017-01-28 10.1039/c6nr08958e Grain boundary-mediated nanopores in molybdenum disulfide grown by chemical vapor deposition 2017-02-14 10.1021/acs.chemmater.6b04216 Solution-Synthesized In4SnSe4 Semiconductor Microwires with a Direct Band Gap 2017-02-14 10.1103/PhysRevB.95.054110 Unveiling descriptors for predicting the bulk modulus of amorphous carbon 2017-02-15 10.1016/j.jcat.2016.12.017 A complete reaction mechanism for standard and fast selective catalytic reduction of nitrogen oxides on low coverage VOx/TiO2(001) catalysts 2017-02-15 10.1021/acs.jctc.6b00809 Theory and Applications of Generalized Pipek-Mezey Wannier Functions 2017-02-15 10.1103/PhysRevB.95.085422 Alternative structure of TiO2 with higher energy valence band edge 2017-02-25 10.1016/j.jallcom.2016.11.153 Global structural optimization and growth mechanism of cobalt oxide nanoclusters by genetic algorithm with spin-polarized DFT 2017-02-28 10.1039/c6ta06644e Tunable thermodynamic activity of LaxSr1-xMnyAl1-yO3-delta (0 <= x <= 1, 0 <= y <= 1) perovskites for solar thermochemical fuel synthesis 2017-03-02 10.1021/acs.jpcc.6b11953 Adsorption and Activation of Water on Cuboctahedral Rhodium and Platinum Nanoparticles 2017-03-07 10.1039/c6cy01904h Reaction mechanism of dimethyl ether carbonylation to methyl acetate over mordenite a combined DFT/experimental study 2017-03-15 10.1002/sia.6106 Probing CO/Fe(100) surfaces from firstprinciples: structures, energetics, and vibrations 2017-03-15 10.1021/acscatal.6b02590 STM Study of Ketopantolactone/(R)-1-(1-Naphthyl)ethylamine Complexes on Pt(111): Comparison of Prochiral and Enantiomeric Ratios and Examination of the Contribution of CH center dot center dot center dot OC Bonding 2017-03-15 10.1093/mnrasl/slw226 Missing Fe: hydrogenated iron nanoparticles 2017-03-23 10.1021/acs.jpcc.6b09289 Synergetic Surface Sensitivity of Photoelectrochemical Water Oxidation on TiO2 (Anatase) Electrodes 2017-03-30 10.1038/s41524-017-0017-z A high-throughput framework for determining adsorption energies on solid surfaces 2017-04-06 10.1021/acs.jpca.7b01248 Substituent Correlations Characterized by Hammett Constants in the Spiropyran Merocyanine Transition 2017-04-07 10.1002/cctc.201601662 A New Type of Scaling Relations to Assess the Accuracy of Computational Predictions of Catalytic Activities Applied to the Oxygen Evolution Reaction 2017-04-07 10.1039/c7dt00372b Structural stability and electronic properties of an octagonal allotrope of two dimensional boron nitride 2017-04-10 10.1002/cssc.201601869 High Redox Capacity of Al-Doped La1-xSrxMnO3- Perovskites for Splitting CO2 and H2O at Mn-Enriched Surfaces 2017-04-15 10.1016/j.commatsci.2017.01.031 Growth of two-dimensional Au patches in graphene pores: A density-functional study 2017-04-20 10.1021/acs.jpcc.7b00283 Effects of Aromaticity and Connectivity on the Conductance of Five-Membered Rings 2017-05-01 10.1039/c6mh00465b Unexpected length dependence of excited-state charge transfer dynamics for surface-confined perylenediimide ensembles 2017-05-07 10.1039/c7tc00336f Probing lattice vibration and surface electronic state in a layered (NH4)(2)V3O8 single crystal 2017-05-09 10.1002/cssc.201601632 Determination of Conduction and Valence Band Electronic Structure of LaTiOxNy Thin Film 2017-05-10 10.1002/smll.201604161 One-Step In Situ Growth of Iron-Nickel Sulfide Nanosheets on FeNi Alloy Foils: High-Performance and Self-Supported Electrodes for Water Oxidation 2017-05-15 10.1007/s11244-016-0701-0 Understanding Structure and Stability of Monoclinic Zirconia Surfaces from First-Principles Calculations 2017-05-15 10.1016/j.cplett.2017.02.018 Catalysis in real time using X-ray lasers 2017-05-15 10.1021/acs.accounts.6b00516 Structure and Dynamics of Individual Diastereomeric Complexes on Platinum: Surface Studies Related to Heterogeneous Enantioselective Catalysis 2017-05-18 10.1002/ejic.201700008 Rapid Ultrasound-Assisted Synthesis of Mesoporous Manganese Oxides for Low-Concentration NO Elimination with Superior Water-Resistance 2017-05-25 10.1021/acs.jpcc.6b10618 Analysis of the Electronic Structure of Non-Spherical Ligand-Protected Metal Nanoclusters: The Case of a Box-Like Ag-67 2017-05-25 10.1021/acs.jpcc.6b12004 Isophorone on Au/MgO/Ag(001): Physisorption with Electrostatic Site Selection 2017-05-26 10.1103/PhysRevB.95.195158 Convergence behavior of the random phase approximation renormalized correlation energy 2017-05-28 10.1063/1.4984047 Descriptors for predicting the lattice constant of body centered cubic crystal 2017-06-01 10.1002/anie.201701135 Reversible Supracolloidal Self-Assembly of Cobalt Nanoparticles to Hollow Capsids and Their Superstructures 2017-06-07 10.1039/c7cp01440f Stability, electronic structure, and optical properties of protected gold-doped silver Ag29-xAux (x=0-5) nanoclusters 2017-06-15 10.1016/j.cattod.2017.02.028 Single site porphyrine-like structures advantages over metals for selective electrochemical CO2 reduction 2017-06-15 10.1016/j.micromeso.2017.02.065 Distribution of open sites in Sn-Beta zeolite 2017-06-15 10.1038/NCHEM.2753 Monitoring interconversion between stereochemical states in single chirality-transfer complexes on a platinum surface 2017-06-28 10.1021/jacs.7b04755 Formation of Germa-ketenimine on the Ge(100) Surface by Adsorption of tert-Butyl Isocyanide 2017-07-01 10.1016/j.camwa.2016.12.003 Multi-domain muffin tin finite element density functional calculations for small molecules 2017-07-12 10.1088/1361-648X/aa680e The atomic simulation environment-a Python library for working with atoms 2017-07-15 10.1021/acs.nanolett.7b01592 The Role of Through-Space Interactions in Modulating Constructive and Destructive Interference Effects in Benzene 2017-07-17 10.1103/PhysRevB.96.045419 Adsorption sites of individual metal atoms on ultrathin MgO(100) films 2017-07-18 10.1103/PhysRevB.96.035422 Anisotropic plasmons, excitons, and electron energy loss spectroscopy of phosphorene 2017-07-20 10.1021/acs.jpcc.7b02608 Defect Chemistry and Electrical Conductivity of Sm-Doped La1-xSrxCoO3-delta for Solid Oxide Fuel Cells 2017-08-02 10.1021/jacs.7b05599 Extreme Conductance Suppression in Molecular Siloxanes 2017-08-07 10.1039/c7ta02081c Design principles of perovskites for solar-driven thermochemical splitting of CO2 2017-08-11 10.1038/s41598-017-08651-1 Spectroscopic observation of oxygen dissociation on nitrogen-doped graphene 2017-08-14 10.1088/1361-6455/aa7d2c Spectroscopic signatures of triplet states in acenes 2017-08-15 10.1021/acsnano.7b03484 Supramolecular Corrals on Surfaces Resulting from Aromatic Interactions of Nonplanar Triazoles 2017-08-15 10.1103/PhysRevB.96.085421 Quantum interference in coherent tunneling through branched molecular junctions containing ferrocene centers 2017-08-21 10.1038/s41467-017-00385-y Visualizing atomic-scale redox dynamics in vanadium oxide-based catalysts 2017-08-31 10.1021/acs.jpcc.7b04974 Role of the Band Gap for the Interaction Energy of Coadsorbed Fragments 2017-08-31 10.1103/PhysRevLett.119.096102 Structure of the SnO2(110)-(4 x 1) Surface 2017-09-01 10.1088/1361-651X/aa7320 libvdwxc: a library for exchange-correlation functionals in the vdW-DF family 2017-09-07 10.1021/acs.jpcc.7b02500 Charge Storage Mechanism of RuO2/Water Interfaces 2017-09-07 10.1103/PhysRevB.96.115304 Angle-resolved electron energy loss spectroscopy in hexagonal boron nitride 2017-09-08 10.1103/PhysRevMaterials.1.044003 Graphene/MoS2 heterostructures as templates for growing two-dimensional metals: Predictions from ab initio calculations 2017-09-14 10.1039/c7cp03576d Modelling pH and potential in dynamic structures of the water/Pt(111) interface on the atomic scale 2017-09-14 10.1063/1.4991033 Electronic structure of boron based single and multi-layer two dimensional materials 2017-09-15 10.1002/sia.6238 Unsaturated surface in CO saturation 2017-09-20 10.1038/s41598-017-12009-y Atomic Scale Formation Mechanism of Edge Dislocation Relieving Lattice Strain in a GeSi overlayer on Si(001) 2017-09-21 10.1039/c7nj02365k Combining theory and experiment in the design of a lead-free ((CH3NH3)(2)AgBiI6) double perovskite 2017-09-21 10.1039/c7py00987a Alkyl-substituted spiropyrans: electronic effects, model compounds and synthesis of aliphatic main-chain copolymers 2017-09-28 10.1088/1361-6455/aa83d1 Model of resonant high harmonic generation in multi-electron systems 2017-10-05 10.1103/PhysRevB.96.155407 Effect of edge plasmons on the optical properties of MoS2 monolayer flakes 2017-10-07 10.1039/c7ta04063f Trends in the phase stability and thermochemical oxygen exchange of ceria doped with potentially tetravalent metals 2017-10-14 10.1039/c7cp05244h Coherence in nonradiative transitions: internal conversion in Rydberg-excited N-methyl and N-ethyl morpholine 2017-10-15 10.1016/j.jcis.2017.06.017 Investigation of anti-solvent induced optical properties change of cesium lead bromide iodide mixed perovskite (CsPbBr3-xIx) quantum dots 2017-10-15 10.1016/j.susc.2017.06.004 Adsorption and desorption of propane on Pd (111): A van der Waals density functional study. Energy binding sites and geometries 2017-10-15 10.1021/acs.jctc.7b00589 Kohn-Sham Decomposition in Real-Time Time-Dependent Density-Functional Theory: An Efficient Tool for Analyzing Plasmonic Excitations 2017-10-16 10.1038/s41467-017-01035-z Understanding activity and selectivity of metal-nitrogen-doped carbon catalysts for electrochemical reduction of CO2 2017-10-17 10.1021/acs.langmuir.7b02207 Influence of CH center dot center dot center dot N Interaction in the Self-Assembly of an Oligo(isoquinolyne-ethynylyne) Molecule with Distinct Conformational States 2017-10-20 10.1103/PhysRevLett.119.166402 Importance of sigma Bonding Electrons for the Accurate Description of Electron Correlation in Graphene 2017-10-26 10.1103/PhysRevB.96.134426 Quantifying confidence in density functional theory predictions of magnetic ground states 2017-10-27 10.1103/PhysRevB.96.165436 Nanoribbon edges of transition-metal dichalcogenides: Stability and electronic properties 2017-10-28 10.1039/c7cp02978k Ultrafast X-ray absorption study of longitudinal-transverse phonon coupling in electrolyte aqueous solution 2017-11-09 10.1021/acs.jpcc.7b04685 First-Principles Screening of Lead-Free Methylammonium Metal Iodine Perovskites for Photovoltaic Application 2017-11-15 10.1002/pssb.201700188 Structure and Energetics of Embedded Si Patterns in Graphene 2017-11-15 10.1007/s12678-017-0375-9 New Platinum Alloy Catalysts for Oxygen Electroreduction Based on Alkaline Earth Metals 2017-11-15 10.1016/j.joule.2017.08.020 Surface Restructuring of Nickel Sulfide Generates Optimally Coordinated Active Sites for Oxygen Reduction Catalysis 2017-11-16 10.1021/acs.jpcc.7b08269 Stability and Effects of Subsurface Oxygen in Oxide-Derived Cu Catalyst for CO2 Reduction 2017-11-16 10.1021/acs.jpcc.7b08971 Methane Adsorption in Zr-Based MOFs: Comparison and Critical Evaluation of Force Fields 2017-11-17 10.1002/cphc.201700736 Electrochemical CO2 Reduction: A Classification Problem 2017-12-07 10.1021/acs.jpcc.7b10154 Analysis of Localized Surface Plasmon Resonances in Spherical Jellium Clusters and Their Assemblies 2017-12-08 10.3762/bjnano.8.265 Patterning of supported gold monolayers via chemical lift-off lithography 2017-12-13 10.1002/smll.201702379 Reversible Anion-Driven Switching of an Organic 2D Crystal at a Solid-Liquid Interface 2017-12-15 10.1016/j.apsusc.2017.07.064 Improving the photocatalytic properties of anatase TiO2(101) surface by co-doping with Cu and N: Ab initio study 2017-12-15 10.1016/j.cocom.2017.08.008 Investigation of density fluctuations in graphene using the fluctuation-dissipation relations 2017-12-15 10.1021/acs.jctc.7b00621 Grid-Based Projector Augmented Wave (GPAW) Implementation of Quantum Mechanics/Molecular Mechanics (QM/MM) Electrostatic Embedding and Application to a Solvated Diplatinum Complex 2017-12-15 10.1021/acscatal.7b02712 From 3D to 2D Co and Ni Oxyhydroxide Catalysts: Elucidation of the Active Site and Influence of Doping on the Oxygen Evolution Activity 2017-12-21 10.1063/1.5000908 Self-consistent assessment of Englert-Schwinger model on atomic properties 2017-12-21 10.1103/PhysRevB.96.241411 Symmetry-forbidden intervalley scattering by atomic defects in monolayer transition-metal dichalcogenides 2018-01-07 10.1063/1.5010963 Designing a tunable magnet using cluster-assembled iron 2018-01-10 10.1103/PhysRevB.97.035411 Atlas for the properties of elemental two-dimensional metals 2018-01-11 10.1021/acs.jpcc.7b10760 The Influence of Inert Ions on the Reactivity of Manganese Oxides 2018-01-11 10.1039/c7cc08014j Au70S20(PPh3)(12): an intermediate sized metalloid gold cluster stabilized by the Au4S4 ring motif and Au-PPh3 groups 2018-01-12 10.1103/PhysRevLett.120.026102 On-the-Fly Machine Learning of Atomic Potential in Density Functional Theory Structure Optimization 2018-01-15 10.1016/j.apsusc.2017.09.124 Multivariate analysis for scanning tunneling spectroscopy data 2018-01-15 10.1016/j.cplett.2017.10.063 Revealing the multi hydrogen bonding state within iron doped amorphous carbon 2018-01-15 10.1016/j.solidstatesciences.2017.11.007 O/F-substitution in BiVO4: Defect structures, phase stability and optical properties 2018-01-15 10.1021/acsnano.7b07079 Exciting H-2 Molecules for Graphene Functionalization 2018-01-15 10.1039/c8ra01818a Aryl fluoride functionalized graphene oxides for excellent room temperature ammonia sensitivity/selectivity 2018-01-15 10.1080/10420150.2018.1442456 Modelling relativistic effects in momentum-resolved electron energy loss spectroscopy of graphene 2018-01-22 10.1002/anie.201709142 A Simply Synthesized, Tough Polyarylene with Transient Mechanochromic Response 2018-01-23 10.1002/cssc.201701653 Revealing the Chemistry between Band Gap and Binding Energy for Lead-/Tin-Based Trihalide Perovskite Solar Cell Semiconductors 2018-01-24 10.1103/PhysRevB.97.024112 Neural-network-based depth-resolved multiscale structural optimization using density functional theory and electron diffraction data 2018-02-12 10.1140/epjb/e2017-80280-7 Computational investigation of CO adsorbed on Au-x, Ag-x and (AuAg)(x) nanoclusters (x=1-5, 147) and monometallic Au and Ag low-energy surfaces 2018-02-13 10.1021/acs.chemmater.7b04618 Adiabatic and Nonadiabatic Charge Transport in Li-S Batteries 2018-02-14 10.1088/1361-648X/aaa471 Accelerating the discovery of hidden two-dimensional magnets using machine learning and first principle calculations 2018-02-15 10.1016/j.cplett.2017.12.069 Adsorption of NO on Fe3O4(111) 2018-02-15 10.1016/j.jcat.2017.12.001 Modeling the adsorption of sulfur containing molecules and their hydrodesulfurization intermediates on the Co-promoted MoS2 catalyst by DFT 2018-02-15 10.1021/acs.jctc.7b01070 Efficient Transition State Optimization of Periodic Structures through Automated Relaxed Potential Energy Surface Scans 2018-02-15 10.1021/acsanm.7b00403 Tuning the Electronic Structure of an Aluminum Phosphide Nanotube through Configuration of the Lattice Geometry 2018-02-15 10.1021/acsenergylett.7b01312 High-Throughput Computational Assessment of Previously Synthesized Semiconductors for Photovoltaic and Photoelectrochemical Devices 2018-02-27 10.1021/acs.chemmater.7b03760 Atomic Structure of Intrinsic and Electron-Irradiation-Induced Defects in MoTe2 2018-02-28 10.1063/1.5009405 Cyanographone and isocyanographone - Two asymmetrically functionalized graphene pseudohalides and their potential use in chemical sensing 2018-03-01 10.1021/acs.jpcc.7b12258 Benchmark Database of Transition Metal Surface and Adsorption Energies from Many-Body Perturbation Theory 2018-03-07 10.1063/1.5012549 Adsorption of 3d, 4d, and 5d transition-metal atoms on single-layer boron nitride 2018-03-09 10.1002/cssc.201702229 Ultrathin Bismuth Nanosheets as a Highly Efficient CO2 Reduction Electrocatalyst 2018-03-15 10.1007/s12274-017-1786-x High-metallic-phase-concentration Mo1-xWxS2 nanosheets with expanded interlayers as efficient electrocatalysts 2018-03-15 10.1016/j.susc.2017.11.003 The structure of reconstructed chalcopyrite surfaces 2018-03-15 10.1021/acs.jpcc.7b10614 DFT-Computed Trends in the Properties of Bimetallic Precious Metal Nanoparticles with Core@Shell Segregation 2018-03-15 10.1021/acsnano.7b07759 Fullerene-Functionalized Monolayer-Protected Silver Clusters: [Ag-29(BDT)(12)(C-60)(n)](3-) (n=1-9) 2018-03-15 10.1021/acsphotonics.7b01479 Searching for Hidden Perovskite Materials for Photovoltaic Systems by Combining Data Science and First Principle Calculations 2018-03-15 10.1103/PhysRevApplied.9.034010 Electronic Transport Properties of Carbon-Nanotube Networks: The Effect of Nitrate Doping on Intratube and Intertube Conductances 2018-03-15 10.1107/S1600577517016964 Anisotropy enhanced X-ray scattering from solvated transition metal complexes 2018-03-21 10.1039/c7cp08181b Understanding proton capture and cation-induced dimerization of [Ag-29(BDT)(12)](3-) clusters by ion mobility mass spectrometry 2018-03-21 10.1103/PhysRevB.97.115140 Rocksalt or cesium chloride: Investigating the relative stability of the cesium halide structures with random phase approximation based methods 2018-03-28 10.1063/1.5017581 Rotation and diffusion of naphthalene on Pt(111) 2018-04-05 10.1021/acs.jpcc.8b00301 Solution Structure and Ultrafast Vibrational Relaxation of the PtPOP Complex Revealed by Delta SCF-QM/MM Direct Dynamics Simulations 2018-04-12 10.1021/acs.jpcc.8b02448 First-Principles Screening of All-Inorganic Lead-Free ABX(3) Perovskites 2018-04-15 10.1016/j.jcat.2018.01.035 Ab initio study of CO2 hydrogenation mechanisms on inverse ZnO/Cu catalysts 2018-04-15 10.1016/j.mcat.2018.01.023 Ab initio studies of ethanol dehydrogenation at binary AuPd nanocatalysts 2018-04-15 10.7569/JRM.2017.634183 Tuning Intermolecular Interaction Between Lignin and Carbon Nanotubes in Fiber Composites - A Combined Experimental and Ab-Initio Modeling Study 2018-04-28 10.1039/c8cp01476k Fundamental limitation of electrocatalytic methane conversion to methanol 2018-05-04 10.1103/PhysRevB.97.195406 Structural details of Al/Al2O3 junctions and their role in the formation of electron tunnel barriers 2018-05-15 10.1021/acs.jctc.8b00039 Tight-Binding Approximation-Enhanced Global Optimization 2018-05-17 10.1021/acs.jpcc.8b01713 Oxidation of Ethylene Carbonate on Li Metal Oxide Surfaces 2018-05-21 10.1103/PhysRevB.97.195435 Quantum plasmons with optical-range frequencies in doped few-layer graphene 2018-05-30 10.1021/jacs.7b13621 Structure of Copper-Cobalt Surface Alloys in Equilibrium with Carbon Monoxide Gas 2018-05-31 10.1088/1361-648X/aabcfb Simplified DFT methods for consistent structures and energies of large systems 2018-06-05 10.1002/qua.25542 Electronic structure of octagonal boron nitride nanotubes 2018-06-14 10.1021/acs.jpcc.8b02165 Spin Uncoupling in Chemisorbed OCCO and CO2: Two High-Energy Intermediates in Catalytic CO2 Reduction 2018-06-15 10.1007/s12274-017-1891-x Enhanced O-2 reduction on atomically thin Pt-based nanoshells by integrating surface facet, interfacial electronic, and substrate stabilization effects 2018-06-15 10.1016/j.softx.2017.11.002 Recent developments in LIBXC - A comprehensive library of functionals for density functional theory 2018-06-15 10.1021/acs.jctc.8b00067 Adiabatic Connection without Coupling Constant Integration 2018-06-15 10.1021/acsnano.8b00125 Topotactic Growth of Edge-Terminated MoS2 from MoO2 Nanocrystals 2018-06-15 10.1093/mnras/sty607 Tetrahedral hydrocarbon nanoparticles in space: X-ray spectra 2018-06-15 10.1134/S0030400X18060073 Two Methods of Amplification of Coherent Extreme Ultraviolet Radiation During Harmonic Generation in Plasmas 2018-06-20 10.1103/PhysRevB.97.235136 From semilocal density functionals to random phase approximation renormalized perturbation theory: A methodological assessment of structural phase transitions 2018-06-21 10.1038/s41586-018-0197-9 Comprehensive suppression of single-molecule conductance using destructive sigma-interference 2018-06-22 10.1002/cssc.201800225 Combined DFT and Differential Electrochemical Mass Spectrometry Investigation of the Effect of Dopants in Secondary Zinc-Air Batteries 2018-06-22 10.1140/epjb/e2018-90166-9 Gradient-level and nonlocal density functional descriptions of Cu-Au intermetallic compounds 2018-07-05 10.1002/cphc.201800141 Uncovering Periodicity and Hidden Trends Responsible for Predicting the Magnetic Moment of Body Centered Cubic Crystal 2018-07-07 10.1039/c8cp03052a Incorporation of oxygen atoms as a mechanism for photoluminescence enhancement of chemically treated MoS2 2018-07-15 10.1016/j.cpc.2018.03.001 Brillouin zone grid refinement for highly resolved ab initio THz optical properties of graphene 2018-07-15 10.1021/acs.jctc.8b00238 Charge Transfer Excitations with Range Separated Functionals Using Improved Virtual Orbitals 2018-07-21 10.1039/c8cp02590h DFT investigation on the adsorption of munition compounds on alpha-Fe2O3: similarity and differences with alpha-Al2O3 2018-07-21 10.1063/1.5029329 Effects of the cooperative interaction on the diffusion of hydrogen on MgO(100) 2018-08-02 10.1021/acs.jpclett.8b01790 Thermodynamic and Kinetic Limitations for Peroxide and Superoxide Formation in Na-O-2 Batteries 2018-08-08 10.1021/acs.chemrev.7b00577 Recent Advances and Perspectives on Nonadiabatic Mixed Quantum-Classical Dynamics 2018-08-13 10.1002/cctc.201800310 Unveiling Hidden Catalysts for the Oxidative Coupling of Methane based on Combining Machine Learning with Literature Data 2018-08-15 10.1016/j.nanoen.2018.05.052 Significantly enhanced electrocatalytic activity of Au-25 clusters by single platinum atom doping 2018-08-15 10.1016/j.nme.2018.07.004 Study on synergistic effects of H and He in alpha-Fe 2018-08-22 10.1002/chem.201801587 Active-Phase Formation and Stability of Gd/Pt(111) Electrocatalysts for Oxygen Reduction: An In Situ Grazing Incidence X-Ray Diffraction Study 2018-08-22 10.1103/PhysRevLett.121.086804 3D Dirac Plasmons in the Type-II Dirac Semimetal PtTe2 2018-08-28 10.1063/1.5044765 Toward quantum-chemical method development for arbitrary basis functions 2018-08-30 10.1021/acs.jpcc.8b03220 Detection of [Au-25(PET)(18)(O-2)(n)](-) ( n=1, 2, 3) Species by Mass Spectrometry 2018-09-06 10.1021/acs.jpcc.8b02191 X-ray Absorption Near-Edge Spectroscopy Calculations on Pristine and Modified Chalcopyrite Surfaces 2018-09-07 10.1063/1.5037794 Discrete discontinuous basis projection method for large-scale electronic structure calculations 2018-09-24 10.1103/PhysRevB.98.115433 Beyond ideal two-dimensional metals: Edges, vacancies, and polarizabilities 2018-10-07 10.1063/1.5048290 Machine learning enhanced global optimization by clustering local environments to enable bundled atomic energies 2018-10-15 10.1016/j.susc.2018.02.008 The most stable adsorption geometries of two chiral modifiers on Pt(111) 2018-10-15 10.1021/acscatal.8b01432 Quantifying Confidence in DFT Predicted Surface Pourbaix Diagrams and Associated Reaction Pathways for Chlorine Evolution 2018-10-15 10.1021/acscatal.8b01615 Cooperative Catalysis by Surface Lewis Acid/Silanol for Selective Fructose Etherification on Sn-SPP Zeolite 2018-10-15 10.1021/acscatal.8b02022 Oxygen Evolution Reaction on Perovskites: A Multieffect Descriptor Study Combining Experimental and Theoretical Methods 2018-10-17 10.1021/jacs.8b06964 Resonant Transport in Single Diketopyrrolopyrrole Junctions 2018-10-28 10.1063/1.5051510 An extended chiral surface coordination network based on Ag-7-clusters 2018-11-07 10.1021/jacs.8b10296 Large Variations in the Single-Molecule Conductance of Cyclic and Bicyclic Silanes 2018-11-14 10.1039/c8nr05989f A thirty-fold photoluminescence enhancement induced by secondary ligands in monolayer protected silver clusters 2018-11-15 10.1016/j.apcatb.2018.05.038 Importance of the Cu oxidation state for the SO2-poisoning of a Cu-SAPO-34 catalyst in the NH3-SCR reaction 2018-11-15 10.1016/j.ceramint.2018.08.120 Influence of interfacial Sn-doping on band alignment of ZnO-nanorods/MAPbI(3) interface: The density functional calculation 2018-11-15 10.1021/acs.jpclett.8b02253 Dipole-Induced Transition Orbitals: A Novel Tool for Investigating Optical Transitions in Extended Systems 2018-11-16 10.1103/PhysRevMaterials.2.113603 Temperature and loading rate dependent rupture forces from universal paths in mechanochemistry 2018-11-26 10.1103/PhysRevMaterials.2.114007 Understanding trends in lithium binding at two-dimensional materials 2018-11-29 10.1021/acs.jpcc.8b05661 Reverse Bond-Length Alternation in Cumulenes: Candidates for Increasing Electronic Transmission with Length 2018-12-06 10.1103/PhysRevMaterials.2.125401 Robust high-fidelity DFT study of the lithium-graphite phase diagram 2018-12-15 10.1021/acs.nanolett.8b02919 Atom-by-Atom Construction of a Cyclic Artificial Molecule in Silicon 2018-12-15 10.1021/acscatal.8b02596 Unraveling the Role of the Rh-ZrO2 Interface in the Water-Gas-Shift Reaction via a First-Principles Microkinetic Study 2018-12-17 10.1063/1.5080006 Structural basis for a naphthyl end-capped oligothiophene with embedded metallic nanoparticles for organic field-effect transistors 2018-12-20 10.1021/acs.jpclett.8b03432 The Bicyclo[2.2.2]octane Motif: A Class of Saturated Group 14 Quantum Interference Based Single-Molecule Insulators 2018-12-21 10.1063/1.5044579 Atom-specific activation in CO oxidation 2019-01-02 10.1002/anie.201809469 Camouflaging Structural Diversity: Co-crystallization of Two Different Nanoparticles Having Different Cores But the Same Shell 2019-01-05 10.1002/qua.25725 Continuum embeddings in condensed-matter simulations 2019-01-07 10.1021/acssuschemeng.8b04173 Climbing the 3D Volcano for the Oxygen Reduction Reaction Using Porphyrin Motifs 2019-01-07 10.1063/1.5064602 Phonon properties and thermal conductivity from first principles, lattice dynamics, and the Boltzmann transport equation 2019-01-09 10.1103/PhysRevB.99.045414 Broadband excitation spectrum of bulk crystals and thin layers of PtTe2 2019-01-15 10.1016/j.commatsci.2018.09.026 An automated algorithm for reliable equation of state fitting of magnetic systems 2019-01-15 10.1021/acsaelm.8b00036 Functionalized Single-Atom-Embedded Bilayer Graphene and Hexagonal Boron Nitride 2019-01-15 10.1021/acscatal.8b03664 Lewis Acid Site and Hydrogen-Bond-Mediated Polarization Synergy in the Catalysis of Diels-Alder Cycloaddition by Band-Gap Transition-Metal Oxides 2019-01-15 10.1126/sciadv.aau7555 Rapid isotopic exchange in nanoparticles 2019-01-28 10.1016/j.apsusc.2018.09.152 A DFT study on dimethyl oxalate synthesis over Pd-ML/Ni(111) and Pd-ML/Co(111) surfaces 2019-01-28 10.1063/1.5047829 Grand-canonical approach to density functional theory of electrocatalytic systems: Thermodynamics of solid-liquid interfaces at constant ion and electrode potentials 2019-01-28 10.1063/1.5049674 Face-centered tetragonal (FCT) Fe and Co alloys of Pt as catalysts for the oxygen reduction reaction (ORR): A DFT study 2019-01-28 10.1063/1.5056167 Quantifying robustness of DFT predicted pathways and activity determining elementary steps for electrochemical reactions 2019-02-07 10.1021/acs.jpcc.8b11875 Spectroscopic Properties of Chalcopyrite Nanoparticles 2019-02-07 10.1063/1.5078432 Space partitioning of exchange-correlation functionals with the projector augmented-wave method 2019-02-13 10.1103/PhysRevLett.122.063001 Ultrafast X-Ray Scattering Measurements of Coherent Structural Dynamics on the Ground-State Potential Energy Surface of a Diplatinum Molecule 2019-02-15 10.1134/S0965542519020064 On the Calculation of the Interaction Potential in Multiatomic Systems 2019-02-15 10.1142/S0219633619500032 Structural, electronic and magnetic properties of stoichiometric cobalt oxide clusters (CoO)(n)(q) (n=3-10, q=0,+1): A modified basin-hopping Monte Carlo algorithm with spin-polarized DFT 2019-02-19 10.1103/PhysRevB.99.064202 Density functional theory description of random Cu-Au alloys 2019-02-21 10.1021/acs.jpcc.8b11689 Uncertainty Quantification in First-Principles Predictions of Harmonic Vibrational Frequencies of Molecules and Molecular Complexes 2019-02-21 10.1039/c8cp06567e Excited-state solvation structure of transition metal complexes from molecular dynamics simulations and assessment of partial atomic charge methods 2019-02-28 10.1021/acs.jpcc.8b12214 Amorphous, Periodic Model of a Copper Electrocatalyst with Subsurface Oxygen for Enhanced CO Coverage and Dimerization 2019-03-14 10.1021/acs.jpcc.8b11571 Solvent-Adsorbate Interactions and Adsorbate-Specific Solvent Structure in Carbon Dioxide Reduction on a Stepped Cu Surface 2019-03-14 10.1103/PhysRevMaterials.3.034003 Definition of a scoring parameter to identify low-dimensional materials components 2019-03-15 10.1016/j.actamat.2018.12.050 Atomistic simulations of early stage clusters in Al-Mg alloys 2019-03-15 10.1021/acsnano.8b08703 Plasmon-Induced Direct Hot-Carrier Transfer at Metal-Acceptor Interfaces 2019-03-15 10.1140/epjd/e2019-90441-5 Ab initio molecular dynamics studies of Au-38(SR)(24) isomers under heating 2019-03-15 10.3144/expresspolymlett.2019.24 Mechanochemically aminated multilayer graphene for carbon/polypropylene graft polymers and nanocomposites 2019-03-20 10.1016/j.joule.2018.12.015 High-Entropy Alloys as a Discovery Platform for Electrocatalysis 2019-03-20 10.1103/PhysRevB.99.115428 Density functional theory based electron transport study of coherent tunneling through cyclic molecules containing Ru and Os as redox active centers 2019-03-22 10.1002/admi.201801874 Multiple Reaction Paths for CO Oxidation on a 2D SnOx Nano-Oxide on the Pt(110) Surface: Intrinsic Reactivity and Spillover 2019-03-28 10.1021/acs.jpcc.9b00272 Globally Optimized Equilibrium Shapes of Zirconia-Supported Rh and Pt Nanoclusters: Insights into Site Assembly and Reactivity 2019-03-28 10.1039/c8cp07169a Experimental and theoretical 2p core-level spectra of size-selected gas-phase aluminum and silicon cluster cations: chemical shifts, geometric structure, and coordination-dependent screening 2019-04-03 10.1021/jacs.8b12101 Functional Role of Fe-Doping in Co-Based Perovskite Oxide Catalysts for Oxygen Evolution Reaction 2019-04-04 10.1021/acs.jpca.9b00927 DFTB-Assisted Global Structure Optimization of 13-and 55-Atom Late Transition Metal Clusters 2019-04-15 10.1002/adts.201800142 Benchmarking Computational Alchemy for Carbide, Nitride, and Oxide Catalysts 2019-04-15 10.1002/adts.201800177 Modeling Gas Adsorption in Flexible Metal-Organic Frameworks via Hybrid Monte Carlo/Molecular Dynamics Schemes 2019-04-15 10.1007/s00214-019-2445-y QTAIM method for accelerated prediction of band gaps in perovskites 2019-04-17 10.1038/s41598-019-41165-6 A potential sensing mechanism for DNA nucleobases by optical properties of GO and MoS2 Nanopores 2019-04-23 10.1021/acs.chemmater.9b00668 Roles of Precursor Conformation and Adatoms in Ullmann Coupling: An Inverted Porphyrin on Cu(111) 2019-05-01 10.1039/c8fd00154e Direct hot-carrier transfer in plasmonic catalysis 2019-05-07 10.1039/c8sc05464a Helical orbitals and circular currents in linear carbon wires 2019-05-15 10.1007/s00894-019-4016-5 Functionalized graphene pieces to trap the insecticide imidacloprid: a theoretical analysis 2019-05-15 10.1016/j.cpc.2018.12.001 Efficient technique for ab-initio calculation of magnetocrystalline anisotropy energy 2019-05-15 10.1021/acs.jctc.8b01229 R-NEB: Accelerated Nudged Elastic Band Calculations by Use of Reflection Symmetry 2019-05-15 10.1021/acs.jctc.8b01297 Modeling the Structural and Thermal Properties of Loaded Metal-Organic Frameworks. An Interplay of Quantum and Anharmonic Fluctuations 2019-05-15 10.1021/acsnano.8b09826 Plasmon Excitations in Mixed Metallic Nanoarrays 2019-05-16 10.1021/acs.jpcc.8b11092 When Current Does Not Follow Bonds: Current Density in Saturated Molecules 2019-05-23 10.1021/acs.jpcb.9b00654 Stability and IR Spectroscopy of Zwitterionic Form of beta-Alanine in Water Clusters 2019-06-12 10.1103/PhysRevMaterials.3.063801 Design and analysis of machine learning exchange-correlation functionals via rotationally invariant convolutional descriptors 2019-06-15 10.1016/j.apsusc.2019.02.112 A DFT study and microkinetic analysis of CO oxidation to dimethyl oxalate over Pd stripe and Pd single atom-doped Cu(111) surfaces 2019-06-15 10.1021/acs.nanolett.9b00183 Cavity Control of Excitons in Two-Dimensional Materials 2019-06-15 10.1021/acscatal.9b00330 Free Standing Nanoporous Palladium Alloys as CO Poisoning Tolerant Electrocatalysts for the Electrochemical Reduction of CO2 to Formate 2019-06-26 10.1103/PhysRevB.99.245149 All-electron product basis set: Application to plasmon anisotropy in simple metals 2019-06-28 10.1063/1.5097553 Optical signatures of pentacene in soft rare-gas environments 2019-07-01 10.1088/2515-7639/ab084b From DFT to machine learning: recent approaches to materials science-a review 2019-07-07 10.1039/c9cp02059d Chemically-resolved determination of hydrogenated graphene-substrate interaction 2019-07-09 10.3389/fchem.2019.00377 The GW Compendium: A Practical Guide to Theoretical Photoemission Spectroscopy 2019-07-15 10.1021/acsanm.9b00758 Tailoring Organic-Organic Poly(vinylpyrrolidone) Microparticles and Fibers with Multiwalled Carbon Nanotubes for Reinforced Composites 2019-07-18 10.1021/acs.jpclett.9b01394 Data Driven Determination in Growth of Silver from Clusters to Nanoparticles and Bulk 2019-07-21 10.1039/c9cy00363k Powerful CO2 electroreduction performance with N-carbon doped with single Ni atoms 2019-07-21 10.1039/c9nj02132a Linear acene molecules in plasmonic cavities: mapping evolution of optical absorption spectra and electric field intensity enhancements 2019-07-25 10.1103/PhysRevB.100.045135 Treating different bonding situations: Revisiting Au-Cu alloys using the random phase approximation 2019-07-26 10.1038/s41467-019-11315-5 Strong plasmon-molecule coupling at the nanoscale revealed by first-principles modeling 2019-07-29 10.1103/PhysRevB.100.035439 Adsorption on transition metal surfaces: Transferability and accuracy of DFT using the ADS41 dataset 2019-08-07 10.1063/1.5108871 Atomistic structure learning 2019-08-07 10.1088/1361-648X/ab18ea van der Waals exchange-correlation functionals over bulk and surface properties of transition metals 2019-08-15 10.1002/ijch.201900026 Overview of Computational Simulations in Quantum Dots 2019-08-15 10.1016/j.jcat.2019.07.011 Nitrogen-doped graphene as metal free basic catalyst for coupling reactions 2019-09-01 10.1016/j.apsusc.2019.04.249 Water adsorption and dissociation on gold catalysts supported on anatase-TiO2(101) 2019-09-06 10.1103/PhysRevB.100.115409 Electron and hole transport in disordered monolayer MoS2: Atomic vacancy induced short-range and Coulomb disorder scattering 2019-09-15 10.1007/s12678-019-00546-1 Hydrogen Oxidation in Alkaline Media: the Bifunctional Mechanism for Water Formation 2019-09-15 10.1016/j.physe.2019.04.027 Electric field hotspots of all-inorganic off-stoichiometric APbX(3) (A = Cs, Rb and X = Cl, Br, I) perovskite quantum dots 2019-09-15 10.1021/acscatal.9b01899 Electrochemical CO2 Reduction: Classifying Cu Facets 2019-09-15 10.12693/APhysPolA.136.486 Electronic Band Structure and Complex Dielectric Function of zb-AlP: A First Principles Study 2019-09-16 10.1088/1367-2630/ab3d78 Strain and electric field tuning of 2D hexagonal boron arsenide 2019-09-20 10.1103/PhysRevB.100.125143 Symmetry-adapted real-space density functional theory for cylindrical geometries: Application to large group-IV nanotubes 2019-09-23 10.1103/PhysRevB.100.115148 Real time time-dependent density functional theory using higher order finite-element methods 2019-10-15 10.1088/2053-1591/ab3fd4 A study of properties of palladium metal as a component of fuel cells 2019-10-15 10.1140/epjb/e2019-100310-6 Finite element density functional calculations for light molecules using a cusp factor to mitigate the Coulomb potential 2019-10-15 10.4208/cicp.OA-2018-0302 Implementation of the Projector Augmented-Wave Method: The Use of Atomic Datasets in the Standard PAW-XML Format 2019-10-17 10.1021/acs.jpcc.9b06602 Ab Initio Simulation of Position-Dependent Electron Energy Loss and Its Application to the Plasmon Excitation of Nanographene 2019-10-21 10.1103/PhysRevA.100.043412 Spatiotemporal analysis of a final-state shape resonance in interferometric photoemission from Cu(111) surfaces 2019-10-23 10.1021/acsami.9b12533 Thermal Engineering of Metal-Organic Frameworks for Adsorption Applications: A Molecular Simulation Perspective 2019-10-28 10.1039/c9cp03982a Elucidating the optical spectra of [Au-25(SR)(18)](q) nanoclusters 2019-10-28 10.1063/1.5126261 Escaping scaling relationships for water dissociation at interfacial sites of zirconia-supported Rh and Pt clusters 2019-11-05 10.1002/jcc.26033 Density functional theory for molecular and periodic systems using density fitting and continuous fast multipole method: Stress tensor 2019-11-07 10.1063/1.5121721 Optical excitations of chlorophyll a and b monomers and dimers 2019-11-15 10.1126/sciadv.aaw1634 Fast nonadiabatic dynamics of many-body quantum systems 2019-11-21 10.1021/acs.jpclett.9b02717 Descriptors for Electrolyte-Renormalized Oxidative Stability of Solvents in Lithium-Ion Batteries 2019-11-21 10.1039/c9gc02265a Catalyst design criteria and fundamental limitations in the electrochemical synthesis of dimethyl carbonate 2019-11-25 10.1103/PhysRevB.100.174431 Effect of H adsorption on the magnetic properties of an Fe island on a W(110) surface 2019-11-25 10.1103/PhysRevB.100.205423 Charge density wave hampers exciton condensation in 1T-TiSe2 2019-11-28 10.1021/acs.jpcc.9b10110 Interfacial Charge Transfer Transitions in Colloidal TiO2 Nanoparticles Functionalized with Salicylic acid and 5-Aminosalicylic acid: A Comparative Photoelectron Spectroscopy and DFT Study 2019-12-05 10.1002/qua.26021 Electric field amplification of plasmon-molecule hybrids revealed by first-principles time dependent density functional theory calculations 2019-12-09 10.1103/PhysRevB.100.241405 Signatures of adatom effects in the quasiparticle spectrum of Li-doped graphene 2019-12-14 10.1039/c9nj03784e Dopant-induced localized light absorption in CsPbX3 (X = Cl, Br, I) perovskite quantum dots 2019-12-15 10.1007/s11468-019-01001-z Probing Subnanometric-Scale Hotspots in Metallic Interfaces 2019-12-15 10.1016/j.mtener.2019.100359 Metal (M = Ru, Pd and Co) embedded in C2N with enhanced lithium storage properties 2019-12-15 10.1021/acs.jctc.9b00777 Polarizable Embedding with a Transferable H2O Potential Function I: Formulation and Tests on Dimer 2019-12-15 10.1021/acs.jctc.9b00778 Polarizable Embedding with a Transferable H2O Potential Function II: Application to (H2O)(n) Clusters and Liquid Water 2019-12-15 10.1021/acsenergylett.9b02306 Computational Screening of Current Collectors for Enabling Anode-Free Lithium Metal Batteries 2019-12-16 10.1103/PhysRevB.100.235429 Optical absorption and energy loss spectroscopy of single-walled carbon nanotubes 2019-12-19 10.1103/PhysRevB.100.235129 Substituent effects on the Su-Schrieffer-Heeger electron-phonon coupling in conjugated polyenes 2019-12-19 10.1103/PhysRevB.100.235436 Constructing convex energy landscapes for atomistic structure optimization 2019-12-24 10.1021/acs.chemmater.9b02905 Length- and Thickness-Dependent Optical Response of Liquid-Exfoliated Transition Metal Dichalcogenides 2019-12-26 10.1021/acs.jpcc.9b07715 Effects of Gas-Phase Conditions and Particle Size on the Properties of Cu(111)-Supported ZnyOx Particles Revealed by Global Optimization and Ab Initio Thermodynamics 2019-12-28 10.1063/1.5129397 Large-Z limit in atoms and solids from first principles 2019-12-28 10.1063/1.5132332 Uncertainty quantification of DFT-predicted finite temperature thermodynamic properties within the Debye model 2020-01-02 10.1080/10420150.2020.1718133 Relativistic effects in the interaction of fast charged particles with graphene 2020-01-03 10.1021/acscatal.9b02799 A Challenge to the G similar to 0 Interpretation of Hydrogen Evolution 2020-01-14 10.1021/acs.chemmater.9b04530 Intercluster Reactions Resulting in Silver-Rich Trimetallic Nanoclusters 2020-01-14 10.1039/c9cp06086c On the interplay of solvent and conformational effects in simulated excited-state dynamics of a copper phenanthroline photosensitizer 2020-01-15 10.1007/s10853-019-04042-1 Controlling electronic structure of single-layered HfX3 (X=S, Se) trichalcogenides through systematic Zr doping 2020-01-15 10.1021/acs.jctc.9b00584 Ab Initio Wavelength-Dependent Raman Spectra: Placzek Approximation and Beyond 2020-01-15 10.1021/acsanm.9b02555 First-Principles Design of Cu12shellFecore Core-Shell Clusters Assembled with K3O into Hexameric Rings: Implications for Gas-Storage Materials 2020-01-15 10.2320/matertrans.MT-MK2019009 Doping of Interstitials (H, He, C, N) in CrCoFeNi High Entropy Alloy: A DFT Study 2020-01-16 10.1021/acs.jpcc.9b10217 First-Principles Screening of Lead-Free Mixed-Anion Perovskites for Photovoltaics 2020-01-31 10.1103/PhysRevB.101.045433 Atomistic T-matrix theory of disordered two-dimensional materials: Bound states, spectral properties, quasiparticle scattering, and transport 2020-02-05 10.1021/jacs.9b11370 Understanding the Electron-Doping Mechanism in Potassium-Intercalated Single-Walled Carbon Nanotubes 2020-02-07 10.1021/acscatal.9b04343 High-Entropy Alloys as Catalysts for the CO2 and CO Reduction Reactions 2020-02-07 10.1039/c9cp03677f Time-resolved observation of transient precursor state of CO on Ru(0001) using carbon K-edge spectroscopy 2020-02-21 10.1063/1.5141931 The effect of CO2 contamination in rechargeable non-aqueous sodium-air batteries 2020-03-06 10.1021/acscatal.9b04682 Relative Abundances of Surface Diastereomeric Complexes Formed by Two Chiral Modifiers That Differ by a Methyl Group 2020-03-14 10.1039/c9nr10487a Understanding the structural diversity of freestanding Al2O3 ultrathin films through a DFTB-aided genetic algorithm 2020-03-15 10.1016/j.mcat.2019.110731 Dimethyl oxalate synthesis via CO oxidation on Pd-doped Ag(111) surface: A theoretic study 2020-03-15 10.1021/acsanm.0c00385 DNA Sequencing with Single-Stranded DNA Rectification in a Nanogap Gated by N-Terminated Carbon Nanotube Electrodes 2020-03-16 10.1021/acs.inorgchem.9b03461 Crystal Chemistry and Bonding Patterns of Bismuth-Based Topological Insulators 2020-03-16 10.1103/PhysRevLett.124.117401 Parametric Excitation of an Optically Silent Goldstone-Like Phonon Mode 2020-03-21 10.1039/c9sc05897d Oxygen evolution reaction: a perspective on a decade of atomic scale simulations 2020-03-25 10.1021/acsami.0c00328 Light-Triggered Reversible Supracolloidal Self-Assembly of Precision Gold Nanoclusters 2020-03-31 10.1063/1.5142502 Octopus, a computational framework for exploring light-driven phenomena and quantum dynamics in extended and finite systems 2020-03-31 10.1063/5.0002959 ACE-Molecule: An open-source real-space quantum chemistry package 2020-04-01 10.1039/c9re00430k High redox performance of Y0.5Ba0.5CoO3-delta for thermochemical oxygen production and separation 2020-04-01 10.1088/2515-7655/ab783a Effect of high oxygen deficiency in nano-confined bismuth sesquioxide 2020-04-02 10.1103/PhysRevB.101.155404 Conductance of quantum spin Hall edge states from first principles: The critical role of magnetic impurities and inter-edge scattering 2020-04-08 10.1149/1945-7111/ab836b Engineering Solid Electrolyte Interphase Composition by Assessing Decomposition Pathways of Fluorinated Organic Solvents in Lithium Metal Batteries 2020-04-15 10.1007/s42452-020-2488-7 Elucidating the stability of ligand-protected Au nanoclusters under electrochemical reduction of CO2 2020-04-15 10.1016/j.commatsci.2020.109540 Electronic and magnetic properties of group-V TMDs monolayers with defects: A first-principles study 2020-04-15 10.1016/j.jcat.2020.02.003 Understanding the interplay of bifunctional and electronic effects: Microkinetic modeling of the CO electro-oxidation reaction 2020-04-20 10.1103/PhysRevMaterials.4.045002 Density functional simulations of pressurized Mg-Zn and Al-Zn alloys 2020-04-21 10.1039/c9qi01688k Semi-sacrificial template synthesis of single-atom Ni sites supported on hollow carbon nanospheres for efficient and stable electrochemical CO2 reduction 2020-04-21 10.1039/c9sc05768d The role of an interface in stabilizing reaction intermediates for hydrogen evolution in aprotic electrolytes 2020-04-23 10.1021/acs.jpcc.9b11768 Machine Learning for Predicting the Band Gaps of ABX(3) Perovskites from Elemental Properties 2020-04-25 10.1016/j.jallcom.2019.153343 Thermodynamics of the iron-nitrogen system with vacancies. From first principles to applications 2020-05-13 10.1021/acs.nanolett.9b04789 Microscopic Theory of Plasmons in Substrate-Supported Borophene 2020-05-15 10.1016/j.mcat.2020.110855 Ab initio studies of propene oxide formation at gold nanocatalysts supported on anatase-TiO2 2020-05-15 10.1038/s41563-020-0622-y Bandgap tuning of two-dimensional materials by sphere diameter engineering 2020-05-20 10.1103/PhysRevB.101.195137 Understanding plasmon dispersion in nearly free electron metals: Relevance of exact constraints for exchange-correlation kernels within time-dependent density functional theory 2020-05-26 10.1021/acsaelm.0c00179 Electronic Structure and Trap States of Two-Dimensional Ruddlesden-Popper Perovskites with the Relaxed Goldschmidt Tolerance Factor 2020-06-03 10.1021/acs.cgd.0c00281 Modeling of Grazing-Incidence X-ray Diffraction from Naphthyl End-Capped Oligothiophenes in Organic Field-Effect Transistors 2020-06-05 10.1021/acscatal.0c01085 Insights in the Oxygen Reduction Reaction: From Metallic Electrocatalysts to Diporphyrins 2020-06-07 10.1039/d0ra03126g Boron and pyridinic nitrogen-doped graphene as potential catalysts for rechargeable non-aqueous sodium-air batteries 2020-06-11 10.1021/acs.jpca.0c01512 Monte Carlo Simulations of Au-38(SCH3)(24) Nanocluster Using Distance-Based Machine Learning Methods 2020-06-11 10.1021/acs.jpcc.0c02889 Charge Transfer Plasmons in Dimeric Electron Clusters 2020-06-11 10.1103/PhysRevB.101.235132 Plasmon excitations in chemically heterogeneous nanoarrays 2020-06-15 10.1007/s11468-019-01062-0 Strong Collectivity of Optical Transitions in Lead Halide Perovskite Quantum Dots 2020-06-15 10.1007/s42452-020-2788-y Experimental and computational investigation of PVDF-BaTiO3 interface for impact sensing and energy harvesting applications 2020-06-15 10.1016/j.cplett.2020.137358 Orbital-free density functional theory calculation applying semi-local machine-learned kinetic energy density functional and kinetic potential 2020-06-15 10.1038/s41467-020-16529-6 A library of ab initio Raman spectra for automated identification of 2D materials 2020-06-18 10.1038/s41524-020-0345-2 Graph theory approach to determine configurations of multidentate and high coverage adsorbates for heterogeneous catalysis 2020-06-25 10.1021/acs.jpcc.0c04512 Thermodynamic Adsorption States of TNT and DNAN on Corundum and Hematite 2020-07-02 10.1021/acs.jpcc.0c02953 Supramolecular Ordering and Reactions of a Chlorophenyl Porphyrin on Ag(111) 2020-07-09 10.1021/acs.jpcc.0c03383 Fullerene-Mediated Aggregation of M-25(SR)(18)(-) (M = Ag, Au) Nanoclusters 2020-07-10 10.1016/j.ijhydene.2020.04.242 HER activity of MxNi1-x (M = Cr, Mo and W; x approximate to 0.2) alloy in acid and alkaline media 2020-07-10 10.1103/PhysRevMaterials.4.075401 Theoretical investigation of the structural, elastic, electronic, and dielectric properties of alkali-metal-based bismuth ternary chalcogenides 2020-07-14 10.1021/acs.chemmater.0c00933 Exploiting Colorimetry for Fidelity in Data Visualization 2020-07-14 10.1063/5.0012901 The CECAM electronic structure library and the modular software development paradigm 2020-07-15 10.1002/admi.201902090 Metastability at Defective Metal Oxide Interfaces and Nanoconfined Structures 2020-07-15 10.1002/qua.26247 Calculation of core-level electron spectra of ionic liquids 2020-07-15 10.1016/j.ultramic.2020.113012 Conductivity models for electron energy loss spectroscopy of graphene in a scanning transmission electron microscope with high energy resolution 2020-07-15 10.1038/s41563-020-0655-2 Universal chemomechanical design rules for solid-ion conductors to prevent dendrite formation in lithium metal batteries 2020-07-15 10.1088/1361-648X/ab82d2 f90wrap: an automated tool for constructing deep Python interfaces to modern Fortran codes 2020-07-15 10.3390/en13143516 Halide Pb-Free Double-Perovskites: Ternary vs. Quaternary Stoichiometry 2020-07-28 10.1039/d0tc02371j Transition of wide-band gap semiconductor h-BN(BN)/P heterostructure via single-atom-embedding 2020-07-28 10.1063/5.0015571 Gaussian representation for image recognition and reinforcement learning of atomistic structure 2020-07-29 10.1021/jacs.0c02399 Metal-Assisted and Solvent-Mediated Synthesis of Two-Dimensional Triazine Structures on Gram Scale 2020-08-06 10.1021/acs.jpcc.0c01151 Hydrogen Evolution Reaction over Single-Atom Catalysts Based on Metal Adatoms at Defected Graphene and h-BN 2020-08-10 10.1002/anie.202004016 Ambient Bistable Single Dipole Switching in a Molecular Monolayer 2020-08-11 10.1021/acs.jctc.0c00137 Projector Augmented Wave Method with Gauss-Type Atomic Orbital Basis: Implementation of the Generalized Gradient Approximation and Mesh Grid Quadrature 2020-08-11 10.1021/acs.jctc.9b01251 Reliable Computational Prediction of the Supramolecular Ordering of Complex Molecules under Electrochemical Conditions 2020-08-12 10.1103/PhysRevB.102.075427 Atomistic structure learning algorithm with surrogate energy model relaxation 2020-08-13 10.1021/acs.jpca.0c03992 Computational Comparative Analysis of Small Atomically Precise Copper Clusters 2020-08-14 10.1103/PhysRevMaterials.4.083805 Uncertainty quantification in first-principles predictions of phonon properties and lattice thermal conductivity 2020-08-15 10.1002/pssr.202000182 Air Stable, High-Efficiency, Pt-Based Halide Perovskite Solar Cells with Long Carrier Lifetimes 2020-08-15 10.1016/j.actamat.2020.05.050 Precipitate formation in aluminium alloys: Multi-scale modelling approach 2020-08-15 10.1038/s41565-020-0717-2 High oscillator strength interlayer excitons in two-dimensional heterostructures for mid-infrared photodetection 2020-08-17 10.1103/PhysRevMaterials.4.084003 Naphthylene-gamma: 1D and 2D carbon allotropes based on the fusion of phenyl- and naphthyl-like groups 2020-08-25 10.1021/acsnano.0c03004 Hot-Carrier Generation in Plasmonic Nanoparticles: The Importance of Atomic Structure 2020-09-05 10.1002/jcc.26370 SuSMoST: Surface Science Modeling and Simulation Toolkit 2020-09-07 10.1039/d0cp02792h Ternary multicomponent Ba/Mg/Si compounds with inherent bonding hierarchy and rattling Ba atoms toward low lattice thermal conductivity 2020-09-07 10.1039/d0cy00413h First-principles insight into CO hindered agglomeration of Rh and Pt single atoms onm-ZrO2 2020-09-09 10.1103/PhysRevLett.125.116802 Integrated Plasmonics: Broadband Dirac Plasmons in Borophene 2020-09-14 10.1039/d0tc01206h Two-dimensional penta-SiAs2: a potential metal-free photocatalyst for overall water splitting 2020-09-21 10.1039/d0dt01448f Experimental and theoretical investigation of the chemical exfoliation of Cr-based MAX phase particles 2020-09-23 10.1088/1361-648X/ab94f2 Structure prediction of surface reconstructions by deep reinforcement learning 2020-09-25 10.1103/PhysRevB.102.115307 Strain-engineered widely tunable perfect absorption angle in black phosphorus from first principles 2020-09-30 10.1088/1361-648X/ab99ea LCAO-TDDFT-k-omega: spectroscopy in the optical limit 2020-10-01 10.1016/j.matchemphys.2020.123407 Application of the training of density functional theory potentials within machine learning to adsorptions and reaction paths on Platinum surfaces 2020-10-06 10.1021/acs.langmuir.0c01652 Not All Fluorination Is the Same: Unique Effects of Fluorine Functionalization of Ethylene Carbonate for Tuning Solid-Electrolyte Interphase in Li Metal Batteries 2020-10-07 10.1063/5.0023611 Marcus-Hush-Chidsey kinetics at electrode-electrolyte interfaces 2020-10-15 10.1002/chem.202000659 pH Tuning of Water-Soluble Arylazopyrazole Photoswitches 2020-10-15 10.1007/s11468-020-01163-1 Electric Near-field Modulations of Charged Deoxyribonucleic Acid Nucleobases 2020-10-21 10.1039/d0cp03667f Design criteria for the competing chlorine and oxygen evolution reactions: avoid the OCl adsorbate to enhance chlorine selectivity 2020-10-22 10.1021/acs.jpcc.0c06692 Robust Active Site Design of Single-Atom Catalysts for Electrochemical Ammonia Synthesis 2020-10-22 10.1021/acs.jpcc.0c07140 Dithiol-Induced Contraction in Ag-14 Clusters and Its Manifestation in Electronic Structures 2020-10-27 10.1073/pnas.2008841117 Design rules for liquid crystalline electrolytes for enabling dendrite-free lithium metal batteries 2020-10-28 10.1039/d0cp02574g Tripentaphenes: two-dimensional acepentalene-based nanocarbon allotropes 2020-10-29 10.1021/acs.jpcc.0c05161 Electronic Current Mapping of Transport through Defective Zigzag Graphene Nanoribbons 2020-11-01 10.1002/qua.26343 Multiscale electrostatic embedding simulations for modeling structure and dynamics of molecules in solution: A tutorial review 2020-11-01 10.1063/5.0028002 Accuracy of XAS theory for unraveling structural changes of adsorbates: CO on Ni(100) 2020-11-01 10.1080/00223131.2020.1779143 Behavior of hydrogen at Fe/W interface: a first-principle calculation study 2020-11-03 10.1073/pnas.2001923117 Design principles for self-forming interfaces enabling stable lithium-metal anodes 2020-11-06 10.1021/acscatal.0c03686 PdSO4 Surfaces in Methane Oxidation Catalysts: DFT Studies on Stability, Reactivity, and Water Inhibition 2020-11-07 10.1039/d0cp04216a Localized surface plasmon resonances of a metal nanoring 2020-11-10 10.1021/acs.jctc.0c00597 Variational Density Functional Calculations of Excited States via Direct Optimization 2020-11-10 10.1021/acs.jctc.0c00729 Tackling Solvent Effects by Coupling Electronic and Molecular Density Functional Theory 2020-11-10 10.1021/acs.jctc.0c00842 Adventures in DFTB: Toward an Automatic Parameterization Scheme 2020-11-11 10.1021/jacs.0c08962 Anisotropic Strain Tuning of L1(0) Ternary Nanoparticles for Oxygen Reduction 2020-11-11 10.1103/PhysRevB.102.195118 Real-space orthogonal projector-augmented-wave method 2020-11-12 10.1021/acs.jpcc.0c07004 Atomistic Insight into Cation Effects on Binding Energies in Cu-Catalyzed Carbon Dioxide Reduction 2020-11-13 10.1021/acsenergylett.0c01815 Beyond Transition Metal Oxide Cathodes for Electric Aviation: The Case of Rechargeable CFx 2020-11-13 10.1103/PhysRevMaterials.4.114006 Proximity-induced magnetization in graphene: Towards efficient spin gating 2020-11-15 10.1016/j.cpc.2020.107365 CONUNDrum: A program for orbital-free density functional theory calculations 2020-11-15 10.1038/s41563-020-0717-5 P-block single-metal-site tin/nitrogen-doped carbon fuel cell cathode catalyst for oxygen reduction reaction 2020-11-15 10.1166/mex.2020.1848 Optical and electronic properties of croconates dye molecules adsorbed on TiO2 brookite nanocluster for dye sensitized solar cells application 2020-11-19 10.1103/PhysRevB.102.205121 Formation energy puzzle in intermetallic alloys: Random phase approximation fails to predict accurate formation energies 2020-11-21 10.1039/d0py00973c Semifluorinated, kinked polyarylenes via direct arylation polycondensation 2020-11-21 10.1063/5.0021821 Smart local orbitals for efficient calculations within density functional theory and beyond 2020-11-24 10.1103/PhysRevB.102.184428 Critical assessment of Co-Cu phase diagram from first-principles calculations 2020-11-30 10.1103/PhysRevB.102.195433 Analysis of the plasmonic excitations in assemblies of three-dimensional electron clusters 2020-12-01 10.1039/d0fd00064g Variational calculations of excited states via direct optimization of the orbitals in DFT 2020-12-01 10.1103/PhysRevB.102.245301 Origin of weak Fermi level pinning at the graphene/silicon interface 2020-12-16 10.1021/jacs.0c09000 Direct Evidence of Photoinduced Charge Transport Mechanism in 2D Conductive Metal Organic Frameworks 2020-12-16 10.1103/PhysRevMaterials.4.124004 Monomeric two-dimensionally ordered WO3 clusters on anatase TiO2 (101) 2021-01-06 10.1103/PhysRevMaterials.5.015401 Electron microscopy and spectroscopic study of structural changes, electronic properties, and conductivity in annealed LixCoO2 2021-01-14 10.1021/acs.jpcc.0c08597 Trends in Carbon, Oxygen, and Nitrogen Core in the X-ray Absorption Spectroscopy of Carbon Nanomaterials: A Guide for the Perplexed 2021-01-14 10.1039/d0cp05221c Half metallicity and ferromagnetism of vanadium nitride nanoribbons: a first-principles study 2021-01-15 10.1007/s11837-020-04436-6 Uncertainty Quantification in Atomistic Modeling of Metals and Its Effect on Mesoscale and Continuum Modeling: A Review 2021-01-15 10.1021/acscatal.0c04733 Engendering Unprecedented Activation of Oxygen Evolution via Rational Pinning of Ni Oxidation State in Prototypical Perovskite: Close Juxtaposition of Synthetic Approach and Theoretical Conception 2021-01-15 10.1021/acscatal.0c04878 Three-Dimensional Carbon Electrocatalysts for CO2 or CO Reduction 2021-01-15 10.1088/2515-7655/abc96f An accurate machine learning calculator for the lithium-graphite system 2021-01-25 10.1021/acsaem.0c02798 Influence of the Artificial Nanostructure on the LiF Formation at the Solid-Electrolyte Interphase of Carbon-Based Anodes 2021-02-03 10.1002/cphc.202000771 Field Effect and Local Gating in Nitrogen-Terminated Nanopores (NtNP) and Nanogaps (NtNG) in Graphene 2021-02-10 10.1103/PhysRevMaterials.5.023801 Tight-binding bond parameters for dimers across the periodic table from density-functional theory 2021-03-14 10.1063/5.0042302 Electronic and optical properties of fluorinated graphene within many-body Green's function framework 2021-03-15 10.1002/wcms.1492 First-principles dynamics of photoexcited molecules and materials towards a quantum description 2021-03-15 10.1016/j.cocom.2020.e00524 The effect of non-centrosymmetricity on optical and electronic properties of BaHfO3 perovskite 2021-03-17 10.1088/1361-648X/abc407 Improved band gaps and structural properties from Wannier-Fermi-Lowdin self-interaction corrections for periodic systems gpaw-24.1.0/doc/documentation/gpaw2.txt000066400000000000000000004457541454550013000177530ustar00rootroot000000000000002010-01-15 10.1002/anie.201003851 Formation of Gold(I) Edge Oxide at Flat Gold Nanoclusters on an Ultrathin MgO Film under Ambient Conditions 2010-09-16 10.1103/PhysRevB.82.121412 Quantifying transition voltage spectroscopy of molecular junctions: Ab initio calculations 2010-10-14 10.1063/1.3492449 DFT plus U study of defects in bulk rutile TiO2 2010-10-15 10.1021/nl101688a The Relation between Structure and Quantum Interference in Single Molecule Junctions 2010-11-05 10.1103/PhysRevB.82.195411 First-principles calculations of graphene nanoribbons in gaseous environments: Structural and electronic properties 2010-12-30 10.1021/jp1076774 Ab Initio Adsorption Thermodynamics of H2S and H-2 on Ni(111): The Importance of Thermal Corrections and Multiple Reaction Equilibria 2011-01-15 10.1002/anie.201104381 "Ligand-Free" Cluster Quantized Charging in an Ionic Liquid 2011-01-15 10.1016/j.procs.2011.04.003 GPAW - massively parallel electronic structure calculations with Python-based software 2011-01-15 10.1021/nn102887x Solid-State Reactions in Binary Molecular Assemblies of F16CuPc and Pentacene 2011-01-15 10.1039/c1cp20406h Thermodynamic and kinetic properties of hydrogen defect pairs in SrTiO3 from density functional theory 2011-01-15 10.1039/c1cp20924h Graphical prediction of quantum interference-induced transmission nodes in functionalized organic molecules 2011-01-15 10.1039/c1sc00060h A 58-electron superatom-complex model for the magic phosphine-protected gold clusters (Schmid-gold, Nanogold (R)) of 1.4-nm dimension 2011-03-04 10.1103/PhysRevB.83.115108 Self-consistent GW calculations of electronic transport in thiol- and amine-linked molecular junctions 2011-03-23 10.1021/ja111077e Electronic and Vibrational Signatures of the Au-102(p-MBA)(44) Cluster 2011-04-05 10.1103/PhysRevB.83.155407 Improving transition voltage spectroscopy of molecular junctions 2011-04-05 10.1103/PhysRevLett.106.146803 Nonlocal Screening of Plasmons in Graphene by Semiconducting and Metallic Substrates: First-Principles Calculations 2011-04-07 10.1063/1.3574836 Robust acceleration of self consistent field calculations for density functional theory 2011-04-11 10.1016/j.cplett.2011.03.001 Electronic hole localization in rutile and anatase TiO2 - Self-interaction correction in Delta-SCF DFT 2011-04-15 10.1103/PhysRevB.83.165423 Stacking and band structure of van derWaals bonded graphane multilayers 2011-04-28 10.1063/1.3563632 Pyrene: Hydrogenation, hydrogen evolution, and pi-band model 2011-05-05 10.1021/jp112114p Size and Shape Dependence of the Electronic and Spectral Properties in TiO2 Nanoparticles 2011-05-12 10.1021/jp2011827 Atomic Layer Deposition of Aluminum Oxide on TiO2 and Its Impact on N3 Dye Adsorption from First Principles 2011-05-16 10.1016/j.cattod.2010.12.022 The role of transition metal interfaces on the electronic transport in lithium-air batteries 2011-05-19 10.1021/jp1121799 Density Functional Theory Study on Propane and Propene Adsorption on Pt(111) and PtSn Alloy Surfaces 2011-05-21 10.1063/1.3589861 Adsorption properties versus oxidation states of rutile TiO2(110) 2011-05-31 10.1103/PhysRevB.83.184119 Optimized orthogonal tight-binding basis: Application to iron 2011-06-15 10.1002/ejic.201100374 The Al50Cp*(12) Cluster - A 138-Electron Closed Shell (L=6) Superatom 2011-06-15 10.1103/PhysRevB.83.235419 PbSe nanocrystals remain intrinsic after surface adsorption of hydrazine 2011-06-24 10.1103/PhysRevB.83.245122 Linear density response function in the projector augmented wave method: Applications to solids, surfaces, and interfaces 2011-07-11 10.1088/0953-8984/23/27/276004 Tight-binding simulation of transition-metal alloys 2011-07-15 10.1007/s10562-011-0637-8 Atomic-Scale Modeling of Particle Size Effects for the Oxygen Reduction Reaction on Pt 2011-07-15 10.1140/epjd/e2011-10603-9 Derivatives of the thiolate-protected gold cluster Au-25(SR)(18)(-1) 2011-07-22 10.1103/PhysRevB.84.035117 Self-consistent meta-generalized gradient approximation within the projector-augmented-wave method 2011-07-25 10.1103/PhysRevB.84.035434 Electronic structure of gold, aluminum, and gallium superatom complexes 2011-07-29 10.1103/PhysRevB.84.041412 Multiterminal single-molecule-graphene-nanoribbon junctions with the thermoelectric figure of merit optimized via evanescent mode transport and gate voltage 2011-08-04 10.1021/jp200893w Ab Initio Calculations of the Electronic Properties of Polypyridine Transition Metal Complexes and Their Adsorption on Metal Surfaces in the Presence of Solvent and Counterions 2011-08-11 10.1021/jp203274a Interaction of Au-16 Nanocluster with Defects in Supporting Graphite: A Density-Functional Study 2011-08-15 10.1007/s10562-011-0632-0 Finite Size Effects in Chemical Bonding: From Small Clusters to Solids 2011-08-25 10.1021/jp204886n ALD Grown Aluminum Oxide Submonolayers in Dye-Sensitized Solar Cells: The Effect on Interfacial Electron Transfer and Performance 2011-08-28 10.1063/1.3624529 Self-consistent meta-generalized gradient approximation study of adsorption of aromatic molecules on noble metal surfaces 2011-09-01 10.1021/jz200513h Nonspectral Methods for Solving the Schrodinger Equation for Electronic and Vibrational Problems 2011-09-07 10.1063/1.3632087 Evidence of superatom electronic shells in ligand-stabilized aluminum clusters 2011-09-12 10.1103/PhysRevB.84.104514 Tuning MgB2(0001) surface states through surface termination 2011-09-15 10.1166/mex.2011.1027 Computing C1s X-ray Absorption for Single-Walled Carbon Nanotubes with Distinct Electronic Type 2011-09-19 10.1103/PhysRevLett.107.136102 Direct Evidence for Ethanol Dissociation on Rutile TiO2(110) 2011-09-29 10.1103/PhysRevB.84.121203 Electronic hole transfer in rutile and anatase TiO2: Effect of a delocalization error in the density functional theory on the charge transfer barrier height 2011-10-03 10.1103/PhysRevLett.107.156401 Dispersive and Covalent Interactions between Graphene and Metal Surfaces from the Random Phase Approximation 2011-10-06 10.1021/jp204360c Role of the Interface between Pd and PdO in Methane Dissociation 2011-10-14 10.1063/1.3646510 Robust conductance of dumbbell molecular junctions with fullerene anchoring groups 2011-10-14 10.1103/PhysRevB.84.155113 Adaptation of the projector-augmented-wave formalism to the treatment of orbital-dependent exchange-correlation functionals 2011-10-15 10.1002/cctc.201100160 Tailoring the Activity for Oxygen Evolution Electrocatalysis on Rutile TiO2(110) by Transition-Metal Substitution 2011-10-20 10.1103/PhysRevB.84.153104 Pseudopotential approximation in van derWaals density functional calculations 2011-10-21 10.1063/1.3651239 All-electron time-dependent density functional theory with finite elements: Time-propagation approach 2011-10-21 10.1063/1.3653790 Controlling the transmission line shape of molecular t-stubs and potential thermoelectric applications 2011-10-25 10.1103/PhysRevB.84.153410 First-principles study for the adsorption of segments of BPA-PC on alpha-Al2O3(0001) 2011-11-04 10.1103/PhysRevLett.107.195502 Oxidation of Pt(111) under Near-Ambient Conditions 2011-11-08 10.1103/PhysRevB.84.193402 Au-40: A large tetrahedral magic cluster 2011-11-09 10.3762/bjnano.2.82 Towards quantitative accuracy in first-principles transport calculations: The GW method applied to alkane/gold junctions 2011-11-15 10.1016/j.elspec.2011.05.004 An implementation of core level spectroscopies in a real space Projector Augmented Wave density functional theory code 2011-11-16 10.1103/PhysRevB.84.205434 Steps on rutile TiO2(110): Active sites for water and methanol dissociation 2011-11-30 10.1016/j.electacta.2011.08.045 Trends in oxygen reduction and methanol activation on transition metal chalcogenides 2011-12-07 10.1063/1.3663385 Electrical conductivity in Li2O2 and its role in determining capacity limitations in non-aqueous Li-O-2 batteries 2011-12-08 10.1021/jp2040345 Ab Initio van der Waals Interactions in Simulations of Water Alter Structure from Mainly Tetrahedral to High-Density-Like 2011-12-14 10.1103/PhysRevB.84.245429 Electronic shell structure and chemisorption on gold nanoparticles 2011-12-15 10.1007/s11249-011-9864-9 Formation and Oxidation of Linear Carbon Chains and Their Role in the Wear of Carbon Materials 2011-12-20 10.1103/PhysRevB.84.235430 First-principles study of surface plasmons on Ag(111) and H/Ag(111) 2011-12-22 10.1021/jp209198g Progressive Shortening of sp-Hybridized Carbon Chains through Oxygen-Induced Cleavage 2012-01-05 10.1021/jz2013853 Scanning Tunneling Microscopy Measurements of the Full Cycle of a Heterogeneous Asymmetric Hydrogenation Reaction on Chirally Modified Pt(111) 2012-01-14 10.1063/1.3675494 Water monomer interaction with gold nanoclusters from van der Waals density functional theory 2012-01-15 10.1039/c1cp23212f Solar hydrogen production with semiconductor metal oxides: new directions in experiment and theory 2012-01-15 10.1039/c2cp23229d The electronic structure of Ge-9[Si(SiMe3)(3)](3)(-): a superantiatom complex 2012-01-15 10.1039/c2cp40715a DFT studies of oxidation routes for Pd-9 clusters supported on gamma-alumina 2012-01-15 10.1039/c2nr30377a The halogen analogs of thiolated gold nanoclusters 2012-01-15 10.1039/c2nr30444a One-pot synthesis and characterization of subnanometre-size benzotriazolate protected copper clusters 2012-01-21 10.1063/1.3675834 X-ray emission spectroscopy and density functional study of CO/Fe(100) 2012-02-02 10.1021/jz201616z Ethanol Diffusion on Rutile TiO2(110) Mediated by H Adatoms 2012-02-08 10.1103/PhysRevB.85.085412 Physisorption of benzene on a tin dioxide surface: van der Waals interaction 2012-02-14 10.1103/PhysRevB.85.085422 Nonmagnetic and magnetic thiolate-protected Au-25 superatoms on Cu(111), Ag(111), and Au(111) surfaces 2012-02-15 10.1002/pssb.201100786 Ab initio derived force-field parameters for molecular dynamics simulations of deprotonated amorphous-SiO2/water interfaces 2012-02-15 10.1016/j.susc.2011.11.007 Preservation of the Pt(100) surface reconstruction after growth of a continuous layer of graphene 2012-02-15 10.1039/c1ee02717d Computational screening of perovskite metal oxides for optimal solar light capture 2012-02-16 10.1103/PhysRevB.85.085424 Channeling of charge carrier plasmons in carbon nanotubes 2012-02-22 10.1088/0953-8984/24/7/075604 A self-consistent DFT+DMFT scheme in the projector augmented wave method: applications to cerium, Ce2O3 and Pu2O3 with the Hubbard I solver and comparison to DFT+U 2012-02-23 10.1021/jp211749g Supramolecular Environment-Dependent Electronic Properties of Metal-Organic Interfaces. 2012-02-28 10.1063/1.3685849 Promoter effect of BaO on CO oxidation on PdO surfaces 2012-03-15 10.1007/s10825-012-0386-y First-principles quantum transport modeling of thermoelectricity in single-molecule nanojunctions with graphene nanoribbon electrodes 2012-03-19 10.1103/PhysRevLett.108.126101 Systematic Study of Au-6 to Au-12 Gold Clusters on MgO(100) F Centers Using Density-Functional Theory 2012-03-23 10.1103/PhysRevB.85.115317 Phonon-limited mobility in n-type single-layer MoS2 from first principles 2012-04-05 10.1021/jp210869r Study of Alkylthiolate Self-assembled Monolayers on Au(111) Using a Semilocal meta-GGA Density Functional 2012-04-05 10.1021/jz300051d Understanding Periodic Dislocations in 2D Supramolecular Crystals: The PFP/Ag(111) Interface 2012-04-14 10.1063/1.3700800 Nonadiabatic Ehrenfest molecular dynamics within the projector augmented-wave method 2012-04-15 10.1021/cs200693g NaBr Poisoning of Au/TiO2 Catalysts: Effects on Kinetics, Poisoning Mechanism, and Estimation of the Number of Catalytic Active Sites 2012-04-23 10.1103/PhysRevB.85.155441 First-principles analysis of photocurrent in graphene PN junctions 2012-04-23 10.1103/PhysRevB.85.165440 Unraveling the acoustic electron-phonon interaction in graphene 2012-04-27 10.1103/PhysRevB.85.155140 Ab initio nonequilibrium quantum transport and forces with the real-space projector augmented wave method 2012-05-01 10.1016/j.cplett.2012.03.031 Ab-initio calculations of the direct and hydrogen-assisted dissociation of CO on Fe(310) 2012-05-03 10.1021/jp300514f Adsorption and Cyclotrimerization Kinetics of C2H2 at a Cu(110) Surface 2012-05-07 10.1063/1.4707952 Adsorption, mobility, and dimerization of benzaldehyde on Pt(111) 2012-05-09 10.1021/ja211121m Oxidation State and Symmetry of Magnesia-Supported Pd13Ox Nanocatalysts Influence Activation Barriers of CO Oxidation 2012-05-22 10.1103/PhysRevB.85.205437 Magnetism in nanoscale graphite flakes as seen via electron spin resonance 2012-05-25 10.1103/PhysRevB.85.184426 Magnetoresistance and negative differential resistance in Ni/graphene/Ni vertical heterostructures driven by finite bias voltage: A first-principles study 2012-06-07 10.1021/jp209506d Methanol Oxidation on Model Elemental and Bimetallic Transition Metal Surfaces 2012-06-07 10.1103/PhysRevLett.108.236103 Packing Defects into Ordered Structures: Strands on TiO2 2012-06-10 10.1088/0004-637X/752/1/3 EXPERIMENTAL EVIDENCE FOR THE FORMATION OF HIGHLY SUPERHYDROGENATED POLYCYCLIC AROMATIC HYDROCARBONS THROUGH H ATOM ADDITION AND THEIR CATALYTIC ROLE IN H-2 FORMATION 2012-06-13 10.1021/ja3003765 Balance of Nanostructure and Bimetallic Interactions in Pt Model Fuel Cell Catalysts: In Situ XAS and DFT Study 2012-06-15 10.1007/s11244-012-9801-7 Construction of New Electronic Density Functionals with Error Estimation Through Fitting 2012-06-15 10.1016/j.jcat.2012.03.007 Elementary steps of syngas reactions on Mo2C(001): Adsorption thermochemistry and bond dissociation 2012-06-27 10.1103/PhysRevB.85.235149 Density functionals for surface science: Exchange-correlation model development with Bayesian error estimation 2012-07-05 10.1002/jcc.22987 ERKALEuA flexible program package for X-ray properties of atoms and molecules 2012-07-12 10.1021/jp302424g Scanning Tunneling Microscopy Evidence for the Dissociation of Carbon Monoxide on Ruthenium Steps 2012-07-15 10.1016/j.carbon.2012.02.035 Computational study of linear carbon chains on gold and silver surfaces 2012-07-16 10.1103/PhysRevB.86.041406 Edge currents and nanopore arrays in zigzag and chiral graphene nanoribbons as a route toward high-ZT thermoelectrics 2012-07-19 10.1103/PhysRevB.86.045208 Optical properties of bulk semiconductors and graphene/boron nitride: The Bethe-Salpeter equation with derivative discontinuity-corrected density functional energies 2012-08-01 10.1021/ja305004a Structure and Mobility of Metal Clusters in MOFs: Au, Pd, and AuPd Clusters in MOF-74 2012-08-03 10.1103/PhysRevB.86.085405 Graphene on metal surfaces and its hydrogen adsorption: A meta-GGA functional study 2012-08-08 10.1103/PhysRevB.86.075417 Reversible graphene-metal contact through hydrogenation 2012-08-13 10.1103/PhysRevB.86.075429 Anomalous insulator-metal transition in boron nitride-graphene hybrid atomic layers 2012-08-15 10.1002/cctc.201100450 Evidence of Scrambling over Ruthenium-based Catalysts in Supercritical-water Gasification 2012-08-15 10.1021/ja3032339 Structural and Theoretical Basis for Ligand Exchange on Thiolate Monolayer Protected Gold Nanoclusters 2012-08-15 10.1103/PhysRevB.86.081103 Extending the random-phase approximation for electronic correlation energies: The renormalized adiabatic local density approximation 2012-08-27 10.1103/PhysRevB.86.075146 Tuning the magnetic moments in zigzag graphene nanoribbons: Effects of metal substrates 2012-08-30 10.1021/jp3004213 Understanding Charge Transfer in Donor-Acceptor/Metal Systems: A Combined Theoretical and Experimental Study 2012-08-30 10.1021/jp3056653 Reactivity and Morphology of Oxygen-Modified Au Surfaces 2012-08-30 10.1021/jp306376r Initial Stages of Growth of Nitrogen-Doped Single-Walled Carbon Nanotubes 2012-09-15 10.1007/s10562-012-0870-9 Modeling van der Waals Interactions in Zeolites with Periodic DFT: Physisorption of n-Alkanes in ZSM-22 2012-09-15 10.1021/ct300172m Ab Initio Parametrized Force Field for the Flexible Metal-Organic Framework MIL-53(Al) 2012-09-27 10.1021/jp306885u Effects of Silver Doping on the Geometric and Electronic Structure and Optical Absorption Spectra of the Au25-nAgn(SH)(18)(-) (n=1, 2, 4, 6, 8, 10, 12) Bimetallic Nanoclusters 2012-10-04 10.1021/jp306303y Importance of Correlation in Determining Electrocatalytic Oxygen Evolution Activity on Cobalt Oxides 2012-10-09 10.1103/PhysRevB.86.155115 DFT-based tight-binding modeling of iron-carbon 2012-10-10 10.1103/RevModPhys.84.1419 Maximally localized Wannier functions: Theory and applications 2012-10-15 10.1016/j.cpc.2012.05.007 LIBXC: A library of exchange and correlation functionals for density functional theory 2012-10-15 10.1039/c2ee22341d New cubic perovskites for one- and two-photon water splitting using the computational materials repository 2012-10-18 10.1021/jz301261x Experimental and Theoretical Determination of the Optical Gap of the Au-144(SC2H4Ph)(60) Cluster and the (Au/Ag)(144)(SC2H4Ph)(60) Nanoalloys 2012-10-24 10.1088/0953-8984/24/42/424212 Desorption of n-alkanes from graphene: a van der Waals density functional study 2012-10-24 10.1088/0953-8984/24/42/424215 Rationale for switching to nonlocal functionals in density functional theory 2012-10-24 10.1088/0953-8984/24/42/424219 Nonequilibrium thermodynamics of interacting tunneling transport: variational grand potential, density functional formulation and nature of steady-state forces 2012-10-25 10.1021/jp3055894 Divide and Protect: Passivating Cu(111) by Cu-(benzotriazole)(2) 2012-11-08 10.1021/jp307608k Global Minima of Protonated Water Clusters (H2O)(20)H+ Revisited 2012-11-15 10.1021/nn3040588 Graphene Coatings: Probing the Limits of the One Atom Thick Protection Layer 2012-11-15 10.1063/1.4765721 Optical and elastic properties of diamond-like carbon with metallic inclusions: A theoretical study 2012-11-15 10.1140/epjd/e2012-30485-5 Density functional theory molecular dynamics study of the Au-25(SR)(18)(-) cluster 2012-11-26 10.1103/PhysRevB.86.195429 TDDFT study of time-dependent and static screening in graphene 2012-12-05 10.1021/ja309619n Au-40(SR)(24) Cluster as a Chiral Dimer of 8-Electron Superatoms: Structure and Optical Properties 2012-12-13 10.1021/jp3066794 Energetics of Oxygen Adatoms, Hydroxyl Species and Water Dissociation on Pt(111) 2012-12-15 10.1007/s11244-012-9908-x Finite-Size Effects in O and CO Adsorption for the Late Transition Metals 2012-12-15 10.1038/NMAT3454 Singular characteristics and unique chemical bond activation mechanisms of photocatalytic reactions on plasmonic nanostructures 2012-12-15 10.1039/c2ee22721e Oxidative trends of TiO2-hole trapping at anatase and rutile surfaces 2012-12-20 10.1021/jz301806b Thermodynamics of Pore Filling Metal Clusters in Metal Organic Frameworks: Pd in UiO-66 2012-12-20 10.1103/PhysRevB.86.241404 Conventional and acoustic surface plasmons on noble metal surfaces: A time-dependent density functional theory study 2012-12-26 10.1103/PhysRevB.86.245129 Spatially resolved quantum plasmon modes in metallic nano-films from first-principles 2013-01-03 10.1021/jz3018286 Investigation of Catalytic Finite-Size-Effects of Platinum Metal Clusters 2013-01-10 10.1021/jp310667r Electronic Origin of the Surface Reactivity of Transition-Metal-Doped TiO2(110) 2013-01-10 10.1103/PhysRevB.87.045411 Charging properties of gold clusters in different environments 2013-01-15 10.1002/anie.201208443 The Redox Chemistry of Gold with High-Valence Doped Calcium Oxide 2013-01-15 10.1007/s10562-012-0918-x The Oxygen Reduction Reaction on Nitrogen-Doped Graphene 2013-01-15 10.1007/s10562-012-0947-5 CO and CO2 Hydrogenation to Methanol Calculated Using the BEEF-vdW Functional 2013-01-15 10.1039/c3cp44641g DFT based study of transition metal nano-clusters for electrochemical NH3 production 2013-01-15 10.1039/c3cp51083b pH in atomic scale simulations of electrochemical interfaces 2013-01-15 10.1039/c3cp51295a A DFT study of adsorption of perylene on clean and altered anatase (101) TiO2 2013-01-15 10.1039/c3cp51685g Porphyrin adsorbed on the (10(1)over-bar0) surface of the wurtzite structure of ZnO - conformation induced effects on the electron transfer characteristics 2013-01-15 10.1039/c3cp53160k Electrochemical ammonia production on molybdenum nitride nanoclusters 2013-01-15 10.1039/c3cp54050b Computational screening of functionalized zinc porphyrins for dye sensitized solar cells 2013-01-15 10.1109/IPDPS.2013.106 Early Experience on the Blue Gene/Q Supercomputing System 2013-01-15 10.1140/epjd/e2012-30537-x Structural and electronic properties of AuIr nanoalloys 2013-01-15 10.1149/05701.2449ecst Coverage Dependent Thermodynamics for Sulfur Poisoning of Ni based Anodes 2013-01-15 10.2533/chimia.2013.271 Methane Catalytic Combustion on Pd-9/gamma-Al2O3 with Different Degrees of Pd Oxidation 2013-01-30 10.1103/PhysRevB.87.045428 Stratified graphene/noble metal systems for low-loss plasmonics applications 2013-02-07 10.1021/jz3021155 Understanding Trends in the Electrocatalytic Activity of Metals and Enzymes for CO2 Reduction to CO 2013-02-13 10.1103/PhysRevB.87.075111 Random phase approximation applied to solids, molecules, and graphene-metal interfaces: From van der Waals to covalent bonding 2013-02-15 10.1140/epjd/e2012-30486-4 Modeling thiolate-protected gold clusters with density-functional tight-binding 2013-02-21 10.1021/jp305303q Refractive Index Functions of TiO2 Nanoparticles 2013-02-25 10.1103/PhysRevB.87.075207 Formation energies of group I and II metal oxides using random phase approximation 2013-03-15 10.1147/JRD.2013.2238371 Argonne applications for the IBM Blue Gene/Q, Mira 2013-03-20 10.1088/0953-8984/25/11/115502 Environmental tight-binding modeling of nickel and cobalt clusters 2013-03-21 10.1021/jp3107809 DFT plus U Study of Polaronic Conduction in Li2O2 and Li2CO3: Implications for Li-Air Batteries 2013-04-15 10.1016/j.carbon.2012.12.008 Binding of atomic oxygen on graphene from small epoxy clusters to a fully oxidized surface 2013-04-15 10.1016/j.jcat.2013.01.009 Guest-host interactions of arenes in H-ZSM-5 and their impact on methanol-to-hydrocarbons deactivation processes 2013-04-15 10.1021/cs300722w Methanol to Dimethyl Ether over ZSM-22: A Periodic Density Functional Theory Study 2013-04-25 10.1021/jp400980y 1,3-Diketone Fluids and Their Complexes with Iron 2013-05-02 10.1021/jp400082u Absorption Spectra of Trapped Holes in Anatase TiO2 2013-05-07 10.1103/PhysRevB.87.205410 Screened empirical bond-order potentials for Si-C 2013-05-09 10.1021/jp306172k Electrochemical CO2 and CO Reduction on Metal-Functionalized Porphyrin-like Graphene 2013-05-09 10.1021/jp311980h Li-ion Conduction in the LiBH4:Lil System from Density Functional Theory Calculations and Quasi-Elastic Neutron Scattering 2013-05-15 10.1021/nl400830u Adsorption and Diffusion of Lithium on Layered Silicon for Li-Ion Storage 2013-05-15 10.1021/nn400780x Controlling Hydrogenation of Graphene on Ir(111) 2013-05-15 10.1140/epjb/e2013-40113-5 Carbon nanotubes as heat dissipaters in microelectronics 2013-05-23 10.1021/jp400287h Polycyclic Aromatic Hydrocarbons: Trends for Bonding Hydrogen 2013-06-06 10.1021/jp4024684 First-Principles Study of Excited State Evolution in a Protected Gold Complex 2013-06-19 10.1103/PhysRevB.87.235312 Acoustic phonon limited mobility in two-dimensional semiconductors: Deformation potential and piezoelectric scattering in monolayer MoS2 from first principles 2013-06-21 10.1063/1.4811455 Optoelectronic properties of single-layer, double-layer, and bulk tin sulfide: A theoretical study 2013-06-21 10.1126/science.1238187 Direct Imaging of Covalent Bond Structure in Single-Molecule Chemical Reactions 2013-06-24 10.1103/PhysRevB.87.235132 Quasiparticle GW calculations for solids, molecules, and two-dimensional materials 2013-06-26 10.1103/PhysRevB.87.235433 Visualizing hybridized quantum plasmons in coupled nanowires: From classical to tunneling regime 2013-07-04 10.1021/jp404240h Interaction between Coronene and Graphite from Temperature-Programmed Desorption and DFT-vdW Calculations: Importance of Entropic Effects and Insights into Graphite Interlayer Binding 2013-07-07 10.1063/1.4812398 Excited-state potential-energy surfaces of metal-adsorbed organic molecules from linear expansion Delta-self-consistent field density-functional theory (Delta SCF-DFT) 2013-07-11 10.1103/PhysRevB.88.035418 Charge localization on a redox-active single-molecule junction and its influence on coherent electron transport 2013-07-11 10.1103/PhysRevLett.111.027601 Layer-Resolved Study of Mg Atom Incorporation at the MgO/Ag(001) Buried Interface 2013-07-15 10.1038/ncomms3121 Direct measurement and modulation of single-molecule coordinative bonding forces in a transition metal complex 2013-07-15 10.7566/JPSJ.82.074709 A Comparative Density-Functional Theory Investigation of Oxygen Adsorption on Stepped Ni Surfaces 3(hkl) x (111) [hkl = (111), (100), (110)]: Role of Terrace Orientation 2013-08-07 10.1063/1.4817001 A variational method for density functional theory calculations on metallic systems with thousands of atoms 2013-08-13 10.1021/cm400541n Lithium Chalcogenidotetrelates: LiChT-Synthesis and Characterization of New Li+ Ion Conducting Li/Sn/Se Compounds 2013-08-15 10.1021/jp400486r Stability of Pt-Modified Cu(111) in the Presence of Oxygen and Its Implication on the Overall Electronic Structure 2013-08-15 10.1021/jp4043045 CO Intercalation of Graphene on Ir(111) in the Millibar Regime 2013-08-20 10.1103/PhysRevLett.111.085503 Interlayer Carbon Bond Formation Induced by Hydrogen Adsorption in Few-Layer Supported Graphene 2013-09-04 10.1021/ja4059074 Protected but Accessible: Oxygen Activation by a Calixarene-Stabilized Undecagold Cluster 2013-09-05 10.1021/jz401553p Gold and Methane: A Noble Combination for Delicate Oxidation 2013-09-11 10.1021/ja405997s Theoretical Investigation of the Activity of Cobalt Oxides for the Electrochemical Oxidation of Water 2013-09-11 10.1088/0953-8984/25/36/365403 Modelling the lattice dynamics in SixGe1-x alloys 2013-09-15 10.1038/ncomms3422 All-thiol-stabilized Ag-44 and Au12Ag32 nanoparticles with single-crystal structures 2013-09-18 10.1103/PhysRevB.88.115131 Beyond the random phase approximation: Improved description of short-range correlation by a renormalized adiabatic local density approximation 2013-10-03 10.1021/jp4003092 (H2O)(20) Water Clusters at Finite Temperatures 2013-10-10 10.1021/jp407494v Electronic Structure and Optical Properties of the Thiolate-Protected Au-28(SMe)(20) Cluster 2013-10-15 10.1007/s10853-013-7448-9 Performance of genetic algorithms in search for water splitting perovskites 2013-10-15 10.1016/j.susc.2013.06.014 Methane oxidation over Pd and Pt studied by DFT and kinetic modeling 2013-10-15 10.1021/ct400520e Real-Space Density Functional Theory on Graphical Processing Units: Computational Approach and Comparison to Gaussian Basis Set Methods 2013-10-17 10.1021/jp404569m Equilibrium Crystal Shape of Ni from First Principles 2013-10-17 10.1103/PhysRevA.88.043202 Hybridization of angular-momentum eigenstates in nonspherical sodium clusters 2013-10-23 10.1103/PhysRevB.88.155128 Plasmons in metallic monolayer and bilayer transition metal dichalcogenides 2013-10-24 10.1021/jp406819f Role of the Central Gold Atom in Ligand-Protected Biicosahedral Au-24 and Au-25 Clusters 2013-10-28 10.1063/1.4827078 CO dissociation on iron nanoparticles: Size and geometry effects 2013-10-28 10.1088/1367-2630/15/10/105026 Stability and bandgaps of layered perovskites for one- and two-photon water splitting 2013-11-01 10.1016/j.jpowsour.2013.04.109 Crystal structure analysis and first principle investigation of F doping in LiFePO4 2013-11-07 10.1021/jp405670v Improving the Adsorption of Au Atoms and Nanoparticles on Graphite via Li Intercalation 2013-11-07 10.1021/jz402054e Breakdown of the Graphene Coating Effect under Sequential Exposure to O-2 and H2S 2013-11-11 10.1002/chem.201301450 The Isolation of Single MMX Chains from Solution: Unravelling the Assembly-Disassembly Process 2013-11-14 10.1063/1.4829520 Energy level alignment and quantum conductance of functionalized metal-molecule junctions: Density functional theory versus GW calculations 2013-11-15 10.1002/pssb.201349217 Quantitatively accurate calculations of conductance and thermopower of molecular junctions 2013-11-15 10.1021/nn4046634 Birth of the Localized Surface Plasmon Resonance in Mono layer-Protected Gold Nanoclusters 2013-11-21 10.1063/1.4829539 Self-interaction corrected density functional calculations of molecular Rydberg states 2013-11-28 10.1063/1.4829640 Interfacial oxygen under TiO2 supported Au clusters revealed by a genetic algorithm search 2013-12-05 10.1021/jp409479h The Influence of Functionals on Density Functional Theory Calculations of the Properties of Reducible Transition Metal Oxide Catalysts 2013-12-12 10.1021/jp410379u Competition between Icosahedral Motifs in AgCu, AgNi, and AgCo Nanoalloys: A Combined Atomistic-DFT Study 2013-12-15 10.1016/j.cpc.2013.07.014 Graphics Processing Unit acceleration of the Random Phase Approximation in the projector augmented wave method 2013-12-15 10.1021/cs400566y Selectivity in Propene Dehydrogenation on Pt and Pt3Sn Surfaces from First Principles 2013-12-21 10.1063/1.4840515 A density functional theory study of atomic steps on stoichiometric rutile TiO2(110) 2013-12-26 10.1103/PhysRevB.88.245309 How dielectric screening in two-dimensional crystals affects the convergence of excited-state calculations: Monolayer MoS2 2013-12-27 10.1103/PhysRevB.88.245204 Calculated formation and reaction energies of 3d transition metal oxides using a hierachy of exchange-correlation functionals 2013-12-28 10.1063/1.4849178 Phonon interference effects in molecular junctions 2014-01-01 10.1080/10408436.2013.772503 Error Estimates for Solid-State Density-Functional Theory Predictions: An Overview by Means of the Ground-State Elemental Crystals 2014-01-14 10.1103/PhysRevB.89.035120 Electronic stopping power from first-principles calculations with account for core electron excitations and projectile ionization 2014-01-15 10.1016/j.jcat.2013.10.015 Thermochemistry and micro-kinetic analysis of methanol synthesis on ZnO (0001) 2014-01-15 10.1016/j.susc.2013.09.020 Formation of metastable, heterolytic H-pairs on the RuO2(110) surface 2014-01-15 10.1021/nn406219x Supramolecular Functionalization and Concomitant Enhancement in Properties of Au-25 Clusters 2014-01-15 10.1039/c3cp53922a Interactions of polymers with reduced graphene oxide: van der Waals binding energies of benzene on graphene with defects 2014-01-15 10.1039/c3cp54491e The molecular and magnetic structure of carbon-enclosed and partially covered Fe-55 particles 2014-01-15 10.1039/c3py00853c Soluble and stable alternating main-chain merocyanine copolymers through quantitative spiropyran-merocyanine conversion 2014-01-15 10.1039/c3ra47390b Selective poisoning of Li-air batteries for increased discharge capacity 2014-01-15 10.1039/c3ra47784c Dissociation of oxygen on pristine and nitrogen-doped carbon nanotubes: a spin-polarized density functional study 2014-01-15 10.1039/c4cp01289e Optimizing porphyrins for dye sensitized solar cells using large-scale ab initio calculations 2014-01-15 10.1039/c4cp02742f Water and carbon oxides on monoclinic zirconia: experimental and computational insights 2014-01-15 10.1039/c4cy00262h Verification of the dual cycle mechanism for methanol-to-olefin conversion in HSAPO-34: a methylbenzene-based cycle from DFT calculations 2014-01-15 10.1039/c4fd00083h Exploring coherent transport through pi-stacked systems for molecular electronic devices 2014-01-15 10.1039/c4nr01255k Solvation chemistry of water-soluble thiol-protected gold nanocluster Au-102 from DOSY NMR spectroscopy and DFT calculations 2014-01-15 10.1039/c4nr02455a A gate controlled molecular switch based on picene-F(4)TCNQ charge-transfer material 2014-01-15 10.1039/c4nr04286g Quantum state engineering with ultra-short-period (AlN)(m)/(GaN)(n) superlattices for narrowband deep-ultraviolet detection 2014-01-15 10.1039/c4sc01646g Ultrafast structural dynamics in Rydberg excited N,N,N ',N '-tetramethylethylenediamine: conformation dependent electron lone pair interaction and charge delocalization 2014-01-15 10.1088/1742-6596/526/1/012003 Analysis of energy gap opening in graphene oxide 2014-01-28 10.1103/PhysRevB.89.014304 Ab initio based thermal property predictions at a low cost: An error analysis 2014-01-31 10.3762/bjnano.5.11 The role of oxygen and water on molybdenum nanoclusters for electro catalytic ammonia production 2014-02-15 10.1007/s11244-013-0160-9 Genetic Algorithm Procreation Operators for Alloy Nanoparticle Catalysts 2014-02-15 10.1007/s11244-013-0173-4 Modeling Methyl Chloride Photo Oxidation by Oxygen Species on TiO2(110) 2014-02-15 10.1007/s11244-013-0181-4 Calculated Pourbaix Diagrams of Cubic Perovskites for Water Splitting: Stability Against Corrosion 2014-02-15 10.1021/nn405114z Revealing the Adsorption Mechanisms of Nitroxides on Ultrapure, Metallicity-Sorted Carbon Nanotubes 2014-02-15 10.1088/0268-1242/29/2/023002 Hard x-ray emission spectroscopy: a powerful tool for the characterization of magnetic semiconductors 2014-02-15 10.3390/polym6020491 Computational Study of a Heterostructural Model of Type I Collagen and Implementation of an Amino Acid Potential Method Applicable to Large Proteins 2014-02-20 10.1016/j.electacta.2013.12.047 Balance of the interfacial interactions of 4,4 '-bipyridine at Bi(111) surface 2014-02-21 10.1103/PhysRevB.89.085420 Temperature effects on quantum interference in molecular junctions 2014-03-12 10.1103/PhysRevB.89.115412 Density functional theory based calculations of the transfer integral in a redox-active single-molecule junction 2014-03-13 10.1098/rsta.2013.0270 Density functional theory in the solid state 2014-03-15 10.1016/j.jssc.2013.12.006 Ionic conductivity and the formation of cubic CaH2 in the LiBH4-Ca(BH4)(2) composite 2014-03-15 10.1021/ct400931p Configurational Entropy in Ice Nanosystems: Tools for Structure Generation and Screening 2014-03-19 10.1021/ja500809p Chiral Phase Transfer and Enantioenrichment of Thiolate-Protected Au-102 Clusters 2014-03-28 10.1063/1.4869212 Communication: The influence of CO2 poisoning on overvoltages and discharge capacity in non-aqueous Li-Air batteries 2014-04-02 10.1021/ja412141j Single Crystal XRD Structure and Theoretical Analysis of the Chiral Au30S(S-t-Bu)(18) Cluster 2014-04-14 10.1063/1.4870397 mBEEF: An accurate semi-local Bayesian error estimation density functional 2014-04-15 10.1021/cs400875k Remote Activation of Chemical Bonds in Heterogeneous Catalysis 2014-04-15 10.1021/nn500963m Designer Titania-Supported Au-Pd Nanoparticles for Efficient Photocatalytic Hydrogen Production 2014-04-17 10.1021/jz500449k A DFT Study of Linear Gold-Thiolate Superclusters Absorbing in the Therapeutic NIR Window 2014-04-18 10.1371/journal.pone.0095390 Performance Analysis of Electronic Structure Codes on HPC Systems: A Case Study of SIESTA 2014-04-22 10.1021/cm4042007 X-ray Absorption Study of Structural Coupling in Photomagnetic Prussian Blue Analogue Core@Shell Particles 2014-04-28 10.1063/1.4871875 Static correlation beyond the random phase approximation: Dissociating H-2 with the Bethe-Salpeter equation and time-dependent GW 2014-05-01 10.1021/jz500482z Reactivity Descriptor in Solid Acid Catalysis: Predicting Turnover Frequencies for Propene Methylation in Zeotypes 2014-05-07 10.1063/1.4874775 Structural stability and electronic properties of low-index surfaces of SnS 2014-05-15 10.1007/s00339-013-8034-3 Atomistic approach for simulating plasmons in nanostructures 2014-05-15 10.1016/j.jcat.2014.04.006 Methanol-to-hydrocarbons conversion: The alkene methylation pathway 2014-05-15 10.1021/ct500087v Quasiparticle Level Alignment for Photocatalytic Interfaces 2014-05-21 10.1103/PhysRevLett.112.203001 Accurate Ground-State Energies of Solids and Molecules from Time-Dependent Density-Functional Theory 2014-06-12 10.1021/jp5033959 Electronic Structure and Optical Properties of the Intrinsically Chiral 16-Electron Superatom Complex [Au-20(PP3)(4)](4+) 2014-06-15 10.1021/cs500202f Identification of the Catalytic Site at the Interface Perimeter of Au Clusters on Rutile TiO2(110) 2014-06-27 10.1103/PhysRevB.89.245445 Quasiparticle scattering from topological crystalline insulator SnTe (001) surface states 2014-07-15 10.1088/0965-0393/22/5/055007 Designing rules and probabilistic weighting for fast materials discovery in the Perovskite structure 2014-07-24 10.1021/jp501581g Atomic Structure, Electronic Properties, and Reactivity of In-Plane Heterostructures of Graphene and Hexagonal Boron Nitride 2014-07-24 10.1021/jp5035147 Hydrogen-Induced Reconstruction of Cu(100): Two-Dimensional and One-Dimensional Structures of Surface Hydride 2014-07-24 10.1021/jp504709d Effect of Magnetic States on the Reactivity of an FCC(111) Iron Surface 2014-07-30 10.1021/ie502170y Kinetic Modeling of Ethyl Benzoylformate Enantioselective Hydrogenation over Pt/Al2O3 2014-08-01 10.1093/mnras/stu869 Do cement nanoparticles exist in space? 2014-08-06 10.1088/0953-8984/26/28/315013 Dynamical coupling of plasmons and molecular excitations by hybrid quantum/classical calculations: time-domain approach 2014-08-06 10.1088/0953-8984/26/31/315013 Dynamical coupling of plasmons and molecular excitations by hybrid quantum/classical calculations: time-domain approach 2014-08-07 10.1021/jp505464z Ultrafast Electronic Relaxation and Vibrational Cooling Dynamics of Au-144(SC2H4Ph)(60) Nanocluster Probed by Transient Mid-IR Spectroscopy 2014-08-11 10.1103/PhysRevB.90.075115 Simultaneous description of conductance and thermopower in single-molecule junctions from many-body ab initio calculations 2014-08-15 10.1016/j.ssc.2014.04.023 MoS2 nanostructures: Semiconductors with metallic edges 2014-08-15 10.1063/1.4893495 Bandgap calculations and trends of organometal halide perovskites 2014-08-21 10.1039/c4dt01329h Chloride-bridged, defect-dicubane {Ln(4)} core clusters: syntheses, crystal structures and magnetic properties 2014-08-28 10.1021/jp503494g Mechanism of Trichloroethene Hydrodehalogenation: A First-Principles Kinetic Monte Carlo Study 2014-08-28 10.1021/jp505462m TDDFT Analysis of Optical Properties of Thiol Monolayer-Protected Gold and Intermetallic Silver-Gold Au-144(SR)(60) and Au84Ag60(SR)(60) Clusters 2014-09-09 10.1103/PhysRevB.90.125413 Quantum interference in off-resonant transport through single molecules 2014-09-11 10.1103/PhysRevLett.113.115501 Silicon-Carbon Bond Inversions Driven by 60-keV Electrons in Graphene 2014-09-15 10.1021/nl502571b Controlling Catalytic Selectivity on Metal Nanoparticles by Direct Photoexcitation of Adsorbate-Metal Bonds 2014-09-15 10.1088/0031-8949/2014/T162/014019 Ab initio study of structural and electronic properties of partially reduced graphene oxide 2014-09-15 10.1088/2053-1583/1/2/025002 Sequential oxygen and alkali intercalation of epitaxial graphene on Ir(111): enhanced many-body effects and formation of pn-interfaces 2014-09-18 10.1021/jp501185q Superatomic S-2 Silver Clusters Stabilized by a Thiolate-Phosphine Monolayer: Insight into Electronic and Optical Properties of Ag-14(SC6H3F2)(12)(PPh3)(8) and Ag-16(SC6H3F2)(14)(DPPE)(4) 2014-09-18 10.1021/jp506158c Periodic DFT Study of Benzene Adsorption on Pd(100) and Pd(110) at Medium and Saturation Coverage 2014-09-22 10.1103/PhysRevB.90.125433 Induced work function changes at Mg-doped MgO/Ag(001) interfaces: Combined Auger electron diffraction and density functional study 2014-09-23 10.1088/1367-2630/16/9/093029 GOLLUM: a next-generation simulation tool for electron, thermal and spin transport 2014-10-15 10.1016/j.susc.2014.05.017 Investigating energetics of Au-8 on graphene/Ru(0001) using a genetic algorithm and density functional theory 2014-10-15 10.1016/j.susc.2014.06.001 Interplay of hydrogen bonding and molecule-substrate interaction in self-assembled adlayer structures of a hydroxyphenyl-substituted porphyrin 2014-10-15 10.1021/nl5029045 High-Conductive Organometallic Molecular Wires with De localized Electron Systems Strongly Coupled to Metal Electrodes 2014-10-16 10.1021/jp507349k Coverage-Dependent Adsorption of Bifunctional Molecules: Detailed Insights into Interactions between Adsorbates 2014-10-16 10.1103/PhysRevB.90.155428 Understanding intercalation structures formed under graphene on Ir(111) 2014-10-21 10.1039/c4cp00753k Segregation effects on the properties of (AuAg)(147) 2014-10-29 10.1103/PhysRevB.90.161410 Plasmons on the edge of MoS2 nanostructures 2014-11-06 10.1021/jp506935a Physical Factors Affecting Charge Transfer at the Pe-COOH-TiO2 Anatase Interface 2014-11-07 10.1063/1.4900628 Thermodynamic aspects of dehydrogenation reactions on noble metal surfaces 2014-11-07 10.1063/1.4900838 Simplified continuum solvent model with a smooth cavity based on volumetric data 2014-11-10 10.1016/j.electacta.2014.09.056 Ti atoms in Ru0.3Ti0.7O2 mixed oxides form active and selective sites for electrochemical chlorine evolution 2014-11-15 10.1016/j.susc.2014.03.021 Walking-like diffusion of two-footed asymmetric aromatic adsorbates on Pt(111) 2014-11-15 10.1107/S2052252514020181 Location of Cu2+ in CHA zeolite investigated by X-ray diffraction using the Rietveld/maximum entropy method 2014-11-17 10.1002/anie.201406246 Squeezing, Then Stacking: From Breathing Pores to Three-Dimensional Ionic Self-Assembly under Electrochemical Control 2014-11-17 10.1002/anie.201406528 A Surface Coordination Network Based on Copper Adatom Trimers 2014-11-19 10.1021/ja5095099 Active and Selective Conversion of CO2 to CO on Ultrathin Au Nanowires 2014-11-20 10.1021/jp509505j Toward Stronger Al-BN Nanotube Composite Materials: Insights into Bonding at the Al/BN Interface from First-Principles Calculations 2014-11-27 10.1021/jp508557w Dynamics of the Photogenerated Hole at the Rutile TiO2(110)/Water Interface: A Nonadiabatic Simulation Study 2014-12-04 10.1021/jp509510j Atomic-Scale View on the H2O Formation Reaction from H-2 on O-Rich RuO2(110) 2014-12-07 10.1063/1.4902249 Nucleation and growth of Pt nanoparticles on reduced and oxidized rutile TiO2 (110) 2014-12-08 10.1063/1.4903340 Designing pi-stacked molecular structures to control heat transport through molecular junctions 2014-12-11 10.1021/jp506508x Au-36(SPh)(24) Nanomolecules: X-ray Crystal Structure, Optical Spectroscopy, Electrochemistry, and Theoretical Analysis 2014-12-11 10.1021/jp509970y Collective Diffusion of Gold Clusters and F-Centers at MgO(100) and CaO(100) Surfaces 2014-12-15 10.1002/pssb.201451174 Theoretical electron energy loss spectroscopy of isolated graphene 2014-12-15 10.1021/cs5014267 Methanol-Alkene Reactions in Zeotype Acid Catalysts: Insights from a Descriptor-Based Approach and Microkinetic Modeling 2014-12-15 10.1038/ncomms6803 Hydrogen bond rotations as a uniform structural tool for analyzing protein architecture 2014-12-15 10.1364/OE.22.030725 Effects of exchange correlation functional on optical permittivity of gold and electromagnetic responses 2014-12-18 10.1021/jp505394e Operando Characterization of an Amorphous Molybdenum Sulfide Nanoparticle Catalyst during the Hydrogen Evolution Reaction 2014-12-21 10.1063/1.4902383 Self-interaction corrected density functional calculations of Rydberg states of molecular clusters: N,N-dimethylisopropylamine 2014-12-31 10.1021/ja510335z Electrochemical Control of Single-Molecule Conductance by FermiLevel Tuning and Conjugation Switching 2015-01-13 10.1103/PhysRevB.91.045418 pi-plasmon dispersion in free-standing graphene by momentum-resolved electron energy-loss spectroscopy 2015-01-15 10.1002/cctc.201402756 Enhancing Activity for the Oxygen Evolution Reaction: The Beneficial Interaction of Gold with Manganese and Cobalt Oxides 2015-01-15 10.1002/cpe.3199 Design and performance characterization of electronic structure calculations on massively parallel supercomputers: a case study of GPAW on the Blue Gene/P architecture 2015-01-15 10.1007/978-3-319-18747-1_3 Treating Relativistic Effects in Transition Metal Complexes 2015-01-15 10.1007/978-3-319-18747-1_5 Background 2015-01-15 10.1016/j.jcat.2014.10.009 Ab initio prediction of the equilibrium shape of supported Ag nanoparticles on alpha-Al2O3(0001) 2015-01-15 10.1016/j.jcp.2014.10.052 Real-time adaptive finite element solution of time-dependent Kohn-Sham equation 2015-01-15 10.1021/cs5015749 Understanding the Early Stages of the Methanol-to-Olefin Conversion on H-SAPO-34 2015-01-15 10.1021/jp508932x Influence of Adsorbed Water on the Oxygen Evolution Reaction on Oxides 2015-01-15 10.1021/n1503518q1 Single-Molecule Electrochemical Transistor Utilizing a Nickel-Pyridyl Spinterface 2015-01-15 10.1021/nl503518q Single-Molecule Electrochemical Transistor Utilizing a Nickel-Pyridyl Spinterface 2015-01-15 10.1038/npjcompumats.2015.8 A bridge for accelerating materials by design 2015-01-15 10.1039/c4cp02789b A DFT study of the effect of OH groups on the optical, electronic, and structural properties of TiO2 nanoparticles 2015-01-15 10.1039/c4cy01044b Single-chiral-catalytic-surface-sites: STM and DFT study of stereodirecting complexes formed between (R)-1-(1-naphthyl)ethylamine and ketopantolactone on Pt(111) 2015-01-15 10.1039/c4cy01692k Transition-state scaling relations in zeolite catalysis: influence of framework topology and acid-site reactivity 2015-01-15 10.1039/c4sc03835e Design of two-photon molecular tandem architectures for solar cells by ab initio theory 2015-01-15 10.1039/c5cc04513d UV photoexcitation of a dissolved metalloid Ge-9 cluster compound and its extensive ultrafast response 2015-01-15 10.1039/c5cp00298b A DFT-based genetic algorithm search for AuCu nanoalloy electrocatalysts for CO2 reduction 2015-01-15 10.1039/c5cp00351b Real-space grids and the Octopus code as tools for the development of new simulation approaches for electronic systems 2015-01-15 10.1039/c5cp00352k Configuration interaction singles based on the real-space numerical grid method: Kohn-Sham versus Hartree-Fock orbitals 2015-01-15 10.1039/c5cp00435g An old workhorse for new applications: Fe(dpm)(3) as a precursor for low-temperature PECVD of iron(III) oxide 2015-01-15 10.1039/c5cp01912e First principles study of the atomic layer deposition of alumina by TMA-H2O-process 2015-01-15 10.1039/c5cp04034e Mechanistic insights into nitrogen fixation by nitrogenase enzymes 2015-01-15 10.1039/c5nr04324g Pd2Au36(SR)(24) cluster: structure studies 2015-01-15 10.1039/c5py00141b High molecular weight mechanochromic spiropyran main chain copolymers via reproducible microwave-assisted Suzuki polycondensation 2015-01-15 10.1039/c5ta01586c Calculated optical absorption of different perovskite phases 2015-01-15 10.1103/PhysRevB.91.041112 Unified picture of the doping dependence of superconducting transition temperatures in alkali metal/ammonia intercalated FeSe 2015-01-21 10.1002/aenm.201400915 New Light-Harvesting Materials Using Accurate and Efficient Bandgap Calculations 2015-01-22 10.1021/jp511037x Silver Sulfide Nanoclusters and the Superatom Model 2015-01-22 10.1103/PhysRevB.91.045204 Strain sensitivity of band gaps of Sn-containing semiconductors 2015-01-28 10.1021/ja5109968 A Critical Size for Emergence of Nonbulk Electronic and Geometric Structures in Dodecanethiolate-Protected Au Clusters 2015-02-02 10.1103/PhysRevB.91.081401 Calculation of the graphene C 1s core level binding energy 2015-02-05 10.1021/jz502637b Copper Induces a Core Plasmon in Intermetallic Au(144,145)-xCux(SR)(60) Nanoclusters 2015-02-05 10.1038/srep08276 Real-space Wigner-Seitz Cells Imaging of Potassium on Graphite via Elastic Atomic Manipulation 2015-02-15 10.1016/j.intermet.2014.09.006 First-principles investigation of the Cu-Ni, Cu-Pd, and Ni-Pd binary alloy systems 2015-02-28 10.1063/1.4913290 Interference enhanced thermoelectricity in quinoid type structures 2015-03-03 10.1103/PhysRevB.91.094104 High-pressure neutron scattering of the magnetoelastic Ni-Cr Prussian blue analog 2015-03-07 10.1063/1.4913739 Nanoplasmonics simulations at the basis set limit through completeness-optimized, local numerical basis sets 2015-03-09 10.1103/PhysRevB.91.125410 Density functional theory based direct comparison of coherent tunneling and electron hopping in redox-active single-molecule junctions 2015-03-14 10.1063/1.4906048 Chemical insight from density functional modeling of molecular adsorption: Tracking the bonding and diffusion of anthracene derivatives on Cu(111) with molecular orbitals 2015-03-14 10.1063/1.4908062 Selection of conformational states in self-assembled surface structures formed from an oligo(naphthylene-ethynylene) 3-bit binary switch 2015-03-15 10.1007/s10562-015-1495-6 CatMAP: A Software Package for Descriptor-Based Microkinetic Mapping of Catalytic Trends 2015-03-15 10.1016/j.susc.2014.11.006 Detection of adsorbate overlayer structural transitions using sum-frequency generation spectroscopy 2015-03-15 10.1021/ct501155k Removing External Degrees of Freedom from Transition-State Search Methods using Quaternions 2015-03-15 10.1021/nn506711a Molecule-like Photodynamics of Au-102(pMBA)(44) Nano cluster 2015-03-15 10.1088/2053-1583/2/1/014001 The growth of Fe clusters over graphene/Cu(111) 2015-03-18 10.1002/adfm.201404388 Molecular Heterojunctions of Oligo(phenylene ethynylene)s with Linear to Cruciform Framework 2015-03-24 10.1103/PhysRevB.91.115431 Localized surface plasmon resonance in silver nanoparticles: Atomistic first-principles time-dependent density-functional theory calculations 2015-03-26 10.1021/acs.jpca.5b01797 Ultrafast Structural Pathway of Charge Transfer in N,N,N ',N '-Tetramethylethylenediamine 2015-03-28 10.1063/1.4915265 Density functional theory and chromium: Insights from the dimers 2015-04-01 10.1016/j.cplett.2015.02.013 Relationship between unbranched alkane dimer interaction energies using different theoretical methods and correlations with thermodynamic properties 2015-04-02 10.1021/acs.jpcc.5b00734 Isolating a Reaction Intermediate in the Hydrogenation of 2,2,2-Trifluoroacetophenone on Pt(111) 2015-04-05 10.1002/jcc.23834 van der Waals Interactions are Critical in Car-Parrinello Molecular Dynamics Simulations of Porphyrin-Fullerene Dyads 2015-04-08 10.1002/aenm.201401082 Design Principles for Metal Oxide Redox Materials for Solar-Driven Isothermal Fuel Production 2015-04-15 10.1515/ntrev-2012-0047 On the interaction between gold and silver metal atoms and DNA/RNA nucleobases - a comprehensive computational study of ground state properties 2015-04-16 10.1021/jp5125475 Impacts of Copper Position on the Electronic Structure of [Au25-xCux(SH)(18)](-) Nanoclusters 2015-04-16 10.1103/PhysRevLett.114.156101 Strong Influence of Coadsorbate Interaction on CO Desorption Dynamics on Ru(0001) Probed by Ultrafast X-Ray Spectroscopy and Ab Initio Simulations 2015-04-24 10.1103/PhysRevB.91.165309 Band-gap engineering of functional perovskites through quantum confinement and tunneling 2015-04-30 10.1021/acs.jpcc.5b01068 The Role of the Anchor Atom in the Ligand of the Monolayer-Protected Au-25(XR)(18)(-) Nanocluster 2015-05-07 10.1021/acs.jpclett.5b00353 Two-Dimensional Metal Dichalcogenides and Oxides for Hydrogen Evolution: A Computational Screening Approach 2015-05-08 10.1088/0953-8984/27/17/175007 The effect of point defects on diffusion pathway within alpha-Fe 2015-05-13 10.1088/0953-8984/27/18/183202 Subsystem density-functional theory as an effective tool for modeling ground and excited states, their dynamics and many-body interactions 2015-05-14 10.1021/acs.jpcc.5b01580 Improved Tight-Binding Charge Transfer Model and Calculations of Energetics of a Step on the Rutile TiO2(110) Surface 2015-05-14 10.1038/srep10163 Silver (I) as DNA glue: Ag+-mediated guanine pairing revealed by removing Watson-Crick constraints 2015-05-15 10.1021/cs501673g A Consistent Reaction Scheme for the Selective Catalytic Reduction of Nitrogen Oxides with Ammonia 2015-05-21 10.1021/jp510926q Theoretical Analysis of the M12Ag32(SR)(40)(4-) and X@M12Ag32(SR)(30)(4-) Nanoclusters (M = Au, Ag; X = H, Mn) 2015-06-03 10.1103/PhysRevB.91.235201 Heats of formation of solids with error estimation: The mBEEF functional with and without fitted reference energies 2015-06-06 10.1098/rsfs.2014.0084 Rational design of metal nitride redox materials for solar-driven ammonia synthesis 2015-06-08 10.1002/cssc.201500239 Design Principles of Perovskites for Thermochemical Oxygen Separation 2015-06-11 10.1021/acs.jpcc.5b02950 Computational 2D Materials Database: Electronic Structure of Transition-Metal Dichalcogenides and Oxides 2015-06-11 10.1021/jp512627e Importance of the Reorganization Energy Barrier in Computational Design of Porphyrin-Based Solar Cells with Cobalt-Based Redox Mediators 2015-06-15 10.1002/anie.201410974 Ketene as a Reaction Intermediate in the Carbonylation of Dimethyl Ether to Methyl Acetate over Mordenite 2015-06-20 10.1016/j.electacta.2015.04.006 First principles study of (Cd, Hg, In, Tl, Sn, Pb, As, Sb, Bi, Se) modified Pt (111), Pt(100) and Pt(211) electrodes as CO oxidation catalysts 2015-06-24 10.1021/cr500551h Quantum-Chemical Characterization of the Properties and Reactivities of Metal-Organic Frameworks 2015-06-25 10.1021/acs.jpcc.5b04106 From Chemistry to Functionality: Trends for the Length Dependence of the Thermopower in Molecular Junctions 2015-07-01 10.1016/j.comptc.2015.03.026 Hydrogen oxidation reaction on Pd(111) electrode in alkaline media: Ab-initio DFT study of OH effects 2015-07-02 10.1021/acs.jpclett.5b01045 Impact of Ga-V Codoping on Interfacial Electron Transfer in Dye-Sensitized TiO2 2015-07-14 10.1021/acs.chemmater.5b00446 Accelerated DFT-Based Design of Materials for Ammonia Storage 2015-07-15 10.1002/ente.201500065 Carbon Dioxide Reforming of Methane using an Isothermal Redox Membrane Reactor 2015-07-15 10.1021/acs.nanolett.5b01251 Dielectric Genome of van der Waals Heterostructures 2015-07-15 10.1021/acscatal.5b00754 Tailoring Gold Nanoparticle Characteristics and the Impact on Aqueous-Phase Oxidation of Glycerol 2015-07-15 10.1021/cs501542n Mechanistic Pathway in the Electrochemical Reduction of CO2 on RuO2 2015-07-15 10.1595/205651315X687975 Atomic-Scale Modelling and its Application to Catalytic Materials Science Developing an interdisciplinary approach to modelling 2015-07-16 10.1021/acs.jpclett.5b01043 Electrochemical Barriers Made Simple 2015-07-30 10.1021/acs.jpcc.5b04977 Self-Metalation of Phthalocyanine Molecules with Silver Surface Atoms by Adsorption on Ag(110) 2015-08-05 10.1088/1367-2630/17/8/083006 Calculations of Al dopant in alpha-quartz using a variational implementation of the Perdew-Zunger self-interaction correction 2015-08-11 10.1103/PhysRevB.92.081109 Influence of molecular conformations on the electronic structure of organic charge transfer salts 2015-08-13 10.1021/acs.jpcc.5b04432 Role of Li2O2@Li2CO3 Interfaces on Charge Transport in Nonaqueous Li-Air Batteries 2015-08-15 10.1021/acs.nanolett.5b02188 Electric-Field Control of Interfering Transport Pathways in a Single-Molecule Anthraquinone Transistor 2015-08-20 10.1021/acs.jpcc.5b04985 Real-Time Study of CVD Growth of Silicon Oxide on Rutile TiO2(110) Using Tetraethyl Orthosilicate 2015-08-20 10.1021/acs.jpcc.5b05894 Optical Properties of Monolayer-Protected Aluminum Clusters: Time-Dependent Density Functional Theory Study 2015-08-21 10.1063/1.4928646 Indication of non-thermal contribution to visible femtosecond laser-induced CO oxidation on Ru(0001) 2015-08-26 10.1038/srep13382 Edge state magnetism in zigzag-interfaced graphene via spin susceptibility measurements 2015-08-27 10.1021/acs.jpcc.5b05392 Using G(0)W(0) Level Alignment to Identify Catechol's Structure on TiO2(110) 2015-08-27 10.1021/acs.jpcc.5b05580 Tuning the Schottky Barrier at the Graphene/MoS2 Interface by Electron Doping: Density Functional Theory and Many-Body Calculations 2015-08-27 10.1021/acs.jpcc.5b07622 Photodynamics of a Molecular Water-Soluble Nanocluster Identified as Au-130(pMBA)(50) 2015-08-27 10.1021/acs.jpcc.5b07672 Photodynamics of a Molecular Water-Soluble Nanocluster Identified as Au-130(pMBA)(50) 2015-09-05 10.1002/qua.24945 Unoccupied titanium 3d states due to subcluster formation in stoichiometric TiO2 nanoparticles 2015-09-10 10.1021/acs.jpcc.5b05824 Superatom Model for Ag-S Nanocluster with Delocalized Electrons 2015-09-14 10.1063/1.4919236 Adiabatic-connection fluctuation-dissipation DFT for the structural properties of solids-The renormalized ALDA and electron gas kernels 2015-09-15 10.1016/j.ccr.2015.05.002 The role of density functional theory methods in the prediction of nanostructured gas-adsorbent materials 2015-09-15 10.1016/j.molcata.2015.04.016 Identifying the active sites for CO dissociation on Fe-BCC nanoclusters 2015-09-15 10.1021/acsnano.5b03199 In Situ Detection of Active Edge Sites in Single-Layer MoS2 Catalysts 2015-09-20 10.1016/j.electacta.2015.01.136 Theoretical modeling of the PEMFC catalyst layer: A review of atomistic methods 2015-09-21 10.1103/PhysRevB.92.115140 Improved description of metal oxide stability: Beyond the random phase approximation with renormalized kernels 2015-10-01 10.1021/acs.jpclett.5b01746 Surface Tension Effects on the Reactivity of Metal Nanoparticles 2015-10-15 10.1016/j.susc.2015.03.011 Optical laser-induced CO desorption from Ru(0001) monitored with a free-electron X-ray laser: DFT prediction and X-ray confirmation of a precursor state 2015-10-15 10.1016/j.susc.2015.04.013 Dissociative adsorption of water on Au/MgO/Ag(001) from first principles calculations 2015-10-15 10.1021/acs.jpcc.5b06132 Coexistence of Square Pyramidal Structures of Oxo Vanadium (+5) and (+4) Species Over Low-Coverage VOX/TiO2 (101) and (001) Anatase Catalysts 2015-10-15 10.1021/acs.jpclett.5b01864 The Role of Hydrogen Bonds in the Stabilization of Silver-Mediated Cytosine Tetramers 2015-10-20 10.1016/j.electacta.2015.08.003 Adsorption of 4,4 '-bipyridine on the Cd(0001) single crystal electrode surface 2015-10-20 10.1016/j.electacta.2015.08.101 The electrocatalytic properties of doped TiO2 2015-11-18 10.1002/aenm.201500991 Toward an Active and Stable Catalyst for Oxygen Evolution in Acidic Media: Ti-Stabilized MnO2 2015-11-20 10.1103/PhysRevB.92.201205 Anharmonic stabilization and band gap renormalization in the perovskite CsSnI3 2015-11-23 10.1002/anie.201506026 Controlling Electrical Conductance through a pi-Conjugated Cruciform Molecule by Selective Anchoring to Gold Electrodes 2015-12-01 10.1103/PhysRevLett.115.236804 Quantized Evolution of the Plasmonic Response in a Stretched Nanorod 2015-12-03 10.1021/acs.jpcc.5b10407 Illusory Connection between Cross-Conjugation and Quantum Interference 2015-12-07 10.1063/1.4936409 Electron transport in molecular junctions with graphene as protecting layer 2015-12-15 10.1016/j.jallcom.2015.08.241 First-principles modelling of solid Ni-Rh (nickel-rhodium) alloys 2015-12-15 10.1016/j.ultramic.2015.07.011 Surface effects on mean inner potentials studied using density functional theory 2015-12-15 10.1021/acsnano.5b02850 Dynamic Diglyme-Mediated Self-Assembly of Gold Nanoclusters 2015-12-16 10.1088/0953-8984/27/49/495501 Exploiting the locality of periodic subsystem density-functional theory: efficient sampling of the Brillouin zone 2015-12-17 10.1103/PhysRevB.92.245123 Excitons in van der Waals heterostructures: The important role of dielectric screening 2015-12-24 10.1021/acs.jpcc.5b10182 Charge Transfer at the Hybrid Interfaces in the Presence of Water: A Theoretical Study 2016-01-13 10.1021/jacs.5b09401 Intercluster Reactions between Au-25(SR)(18) and Ag-44(SR)(30) 2016-01-15 10.1016/j.carbon.2015.09.062 Planar versus three-dimensional growth of metal nanostructures at graphene 2016-01-15 10.1016/j.cpc.2015.09.010 An object oriented Python interface for atomistic simulations 2016-01-15 10.1038/ncomms10401 Conformation and dynamics of the ligand shell of a water-soluble Au-102 nanoparticle 2016-01-15 10.1038/ncomms10545 Evidence and implications of direct charge excitation as the dominant mechanism in plasmon-mediated photocatalysis 2016-01-15 10.1039/c5cy01839k Gold assisted oxygen dissociation on a molybdenum-doped CaO(001) surface 2016-01-15 10.1039/c5dt03141a Magnetic structures of the low temperature phase of Mn-3(VO4)(2) - towards understanding magnetic ordering between adjacent Kagome layers 2016-01-15 10.1039/c5fd00203f Effects of particle size and edge structure on the electronic structure, spectroscopic features, and chemical properties of Au(111)-supported MoS2 nanoparticles 2016-01-15 10.1039/c5nr08122j Tuning Ag-29 nanocluster light emission from red to blue with one and two-photon excitation 2016-01-15 10.1039/c5nr08671j Tunable charge transfer properties in metal-phthalocyanine heterojunctions 2016-01-15 10.1039/c5sc03042k Charge transfer and ultrafast nuclear motions: the complex structural dynamics of an electronically excited triamine 2016-01-15 10.1039/c6cc06876f Selection of conformational states in surface self-assembly for a molecule with eight possible pairs of surface enantiomers 2016-01-15 10.1039/c6cp04194a Decoupling strain and ligand effects in ternary nanoparticles for improved ORR electrocatalysis 2016-01-15 10.1039/c6cp05681d A DFT study of the effect of SO4 groups on the properties of TiO2 nanoparticles 2016-01-15 10.1039/c6cy01820c Nitrate-nitrite equilibrium in the reaction of NO with a Cu-CHA catalyst for NH3-SCR 2016-01-15 10.1039/c6nr00931j Isolation of atomically precise mixed ligand shell PdAu24 clusters 2016-01-15 10.1039/c6nr05267c Covalently linked multimers of gold nanoclusters Au-102(p-MBA)(44) and Au-similar to 250(p-MBA)(n) 2016-01-15 10.1039/c6ra21668d Ab initio calculation of halide ligand passivation on PbSe quantum dot facets 2016-01-15 10.1149/2.0111609jes Electrochemical Investigation of 1-Ethyl-3-methylimidazolium Bromide and Tetrafluoroborate Mixture at Bi(111) Electrode Interface 2016-01-19 10.1016/j.ssc.2015.11.017 Strain engineering of electronic properties of transition metal dichalcogenide monolayers 2016-01-19 10.1103/PhysRevB.93.041302 Transformation of metallic boron into substitutional dopants in graphene on 6H-SiC(0001) 2016-01-25 10.1103/PhysRevB.93.035133 Hubbard-U corrected Hamiltonians for non-self-consistent random-phase approximation total-energy calculations: A study of ZnS, TiO2, and NiO 2016-01-26 10.1002/anie.201507631 First Principles Calculations for Hydrogenation of Acrolein on Pd and Pt: Chemoselectivity Depends on Steric Effects on the Surface 2016-01-28 10.1021/acs.jpcc.5b10025 Photoinduced Absorption within Single-Walled Carbon Nanotube Systems 2016-02-02 10.1103/PhysRevLett.116.056401 Simple Screened Hydrogen Model of Excitons in Two-Dimensional Materials 2016-02-09 10.1088/0957-4484/27/7/075501 B-40 fullerene as a highly sensitive molecular device for NH3 detection at low bias: a first-principles study 2016-02-10 10.1016/j.electacta.2016.01.070 Targeted design of alpha-MnO2 based catalysts for oxygen reduction 2016-02-14 10.1039/c5cp06969f The electrooxidation-induced structural changes of gold di-superatomic molecules: Au-23 vs. Au-25 2016-02-15 10.1007/s11082-015-0370-4 A model for terahertz plasmons in graphene 2016-02-15 10.1016/j.jcp.2015.12.014 RESCU: A real space electronic structure method 2016-02-15 10.1021/acscatal.5b02369 Catalytic Activities of Sulfur Atoms in Amorphous Molybdenum Sulfide for the Electrochemical Hydrogen Evolution Reaction 2016-02-15 10.1038/NNANO.2015.255 Field-induced conductance switching by charge-state alternation in organometallic single-molecule junctions 2016-02-24 10.1103/PhysRevB.93.085135 Self-consistent parametrization of DFT plus U framework using linear response approach: Application to evaluation of redox potentials of battery cathodes 2016-02-25 10.1021/acs.jpcc.5b11489 Comparative Ab-Initio Study of Substituted Norbornadiene-Quadricyclane Compounds for Solar Thermal Storage 2016-02-26 10.1038/srep21990 Unravelling Site-Specific Photo-Reactions of Ethanol on Rutile TiO2(110) 2016-02-28 10.1063/1.4942665 Structure and role of metal clusters in a metal-organic coordination network determined by density functional theory 2016-03-03 10.1080/10408436.2015.1053603 Understanding the Growth of Interfacial Reaction Product Layers between Dissimilar Materials 2016-03-15 10.1016/j.carbon.2015.10.098 Activation and mechanochemical breaking of C-C bonds initiate wear of diamond (110) surfaces in contact with silica 2016-03-15 10.1016/j.commatsci.2015.12.012 Plasmonic and dielectric properties of ideal graphene 2016-03-15 10.1021/acs.jctc.5b01053 A Unified AMBER-Compatible Molecular Mechanics Force Field for Thiolate-Protected Gold Nanoclusters 2016-03-15 10.1038/ncomms11013 Charge localization in a diamine cation provides a test of energy functionals and self-interaction correction 2016-03-31 10.1021/acs.jpcc.5b11211 When Conductance Is Less than the Sum of Its Parts: Exploring Interference in Multiconnected Molecules 2016-03-31 10.1021/acs.jpcc.5b12611 Theoretical Insight into the Internal Quantum Efficiencies of Polymer/C-60 and Polymer/SWNT Photovoltaic Devices 2016-04-01 10.1016/j.commatsci.2015.12.023 Modeling of complex ternary structures: Cu-Ni-Pd alloys via first-principles 2016-04-07 10.1021/acs.jpcc.5b11696 Structural Changes in RuO2 during Electrochemical Hydrogen Evolution 2016-04-14 10.1557/jmr.2016.99 Perturbation theory for weakly coupled two-dimensional layers 2016-04-15 10.1002/ejic.201501270 The Stability of Copper Oxo Species in Zeolite Frameworks 2016-04-15 10.1002/qua.25106 Update to ACE-molecule: Projector augmented wave method on lagrange-sinc basis set 2016-04-15 10.1016/j.jcp.2016.01.034 Development of an exchange-correlation functional with uncertainty quantification capabilities for density functional theory 2016-04-15 10.1016/j.susc.2015.11.018 A comparative study of diastereomeric complexes formed by a prochiral substrate and three structurally analogous chiral molecules on Pt(111) 2016-04-15 10.1021/acs.nanolett.5b04513 Defect-Tolerant Monolayer Transition Metal Dichalcogenides 2016-04-15 10.1038/NCHEM.2454 Effects of correlated parameters and uncertainty in electronic-structure-based chemical kinetic modelling 2016-04-21 10.1021/acs.jpcc.5b12448 Atomic-Scale Analysis of the RuO2/Water Interface under Electrochemical Conditions 2016-04-21 10.1021/acs.jpcc.6b02342 Correlation between sp(3)-to-sp(2) Ratio and Surface Oxygen Functionalities in Tetrahedral Amorphous Carbon (ta-C) Thin Film Electrodes and Implications of Their Electrochemical Properties 2016-04-27 10.1002/adma.201504650 Evidence of Porphyrin-Like Structures in Natural Melanin Pigments Using Electrochemical Fingerprinting 2016-04-27 10.1088/0022-3727/49/16/165303 Effect of interface geometry on electron tunnelling in Al/Al2O3/Al junctions 2016-04-27 10.1103/PhysRevB.93.165437 Band-gap engineering by Bi intercalation of graphene on Ir(111) 2016-04-28 10.1021/acs.jpcc.6b01710 Stability and Polaronic Motion of Self-Trapped Holes in Silver Halides: Insight through DFT plus U Calculations 2016-04-28 10.1039/c5cp07624b The structure-function relationship for alumina supported platinum during the formation of ammonia from nitrogen oxide and hydrogen in the presence of oxygen 2016-04-28 10.1063/1.4947225 Pyridine adsorption and diffusion on Pt(111) investigated with density functional theory 2016-05-17 10.1038/srep25988 Prospect of quantum anomalous Hall and quantum spin Hall effect in doped kagome lattice Mott insulators 2016-05-21 10.1063/1.4950828 An approach to develop chemical intuition for atomistic electron transport calculations using basis set rotations 2016-05-28 10.1063/1.4951686 Ab initio electronic structure of quasi-two-dimensional materials: A "native" Gaussian-plane wave approach 2016-06-02 10.1021/acs.jpclett.6b00742 Self-Interaction Corrected Functional Calculations of a Dipole-Bound Molecular Anion 2016-06-02 10.1103/PhysRevB.93.241401 Effective gating and tunable magnetic proximity effects in two-dimensional heterostructures 2016-06-30 10.1103/PhysRevB.93.235162 mBEEF-vdW: Robust fitting of error estimation density functionals 2016-07-01 10.1103/PhysRevB.94.041401 Stark shift and electric-field-induced dissociation of excitons in monolayer MoS2 and hBN/MoS2 heterostructures 2016-07-07 10.1039/c6cp02274j The reaction mechanism for the SCR process on monomer V5+ sites and the effect of modified Bronsted acidity 2016-07-21 10.1021/acs.jpcc.6b04769 Toward a Janus Cluster: Regiospecific Decarboxylation of Ag-44(4-MBA)(30)@Ag Nanoparticles 2016-07-25 10.1088/1367-2630/18/7/073043 Exciton ionization in multilayer transition-metal dichalcogenides 2016-07-25 10.1103/PhysRevB.94.041112 Offset-corrected Delta-Kohn-Sham scheme for semiempirical prediction of absolute x-ray photoelectron energies in molecules and solids 2016-08-09 10.1371/journal.pone.0159168 Involving High School Students in Computational Physics University Research: Theory Calculations of Toluene Adsorbed on Graphene 2016-08-15 10.1016/j.enconman.2016.05.069 Computational study on oxynitride perovskites for CO2 photoreduction 2016-08-15 10.1021/acs.jctc.6b00456 Minimal Basis Iterative Stockholder: Atoms in Molecules for Force-Field Development 2016-08-16 10.1103/PhysRevB.94.064105 Quantification of uncertainty in first-principles predicted mechanical properties of solids: Application to solid ion conductors 2016-08-18 10.1021/acs.jpcc.6b06163 Disentangling Vacancy Oxidation on Metallicity-Sorted Carbon Nanotubes 2016-08-23 10.1021/acs.chemmater.6b01956 Thermodynamic Insight in the High-Pressure Behavior of UiO-66: Effect of Linker Defects and Linker Expansion 2016-09-05 10.1002/anie.201604269 Controlling the Adsorption of Carbon Monoxide on Platinum Clusters by Dopant-Induced Electronic Structure Modification 2016-09-07 10.1063/1.4961868 An automated nudged elastic band method 2016-09-15 10.1016/j.jmgm.2016.07.008 Open source molecular modeling 2016-09-15 10.1021/acs.jpclett.6b01543 Chemical Bond Activation Observed with an X-ray Laser 2016-09-15 10.1038/ncomms12809 Plasmonic twinned silver nanoparticles with molecular precision 2016-09-19 10.1002/anie.201605559 Charge Transport and Conductance Switching of Redox-Active Azulene Derivatives 2016-09-20 10.1103/PhysRevB.94.125136 Reduction of magnetic interlayer coupling in barlowite through isoelectronic substitution 2016-09-22 10.1021/acs.jpcc.6b02126 Surface Chemistry Controls Magnetism in Cobalt Nanoclusters 2016-09-28 10.1039/c6cp05188j Strong 1D localization and highly anisotropic electron-hole masses in heavy-halogen functionalized graphenes 2016-09-28 10.1063/1.4963338 Molecular spin on surface: From strong correlation to dispersion interactions 2016-09-29 10.1021/acs.jpcc.6b06141 Atomic Layer Deposition of Zinc Oxide: Diethyl Zinc Reactions and Surface Saturation from First-Principles 2016-09-29 10.1021/acs.jpcc.6b06254 Gold/lsophorone Interaction Driven by Keto/Enol Tautomerization 2016-09-29 10.1103/PhysRevB.94.125444 Band-gap control in phosphorene/BN structures from first-principles calculations 2016-10-05 10.1002/qua.25193 Improvement of initial guess via grid-cutting for efficient grid-based density functional calculations 2016-10-06 10.1103/PhysRevB.94.155406 Efficient many-body calculations for two-dimensional materials using exact limits for the screened potential: Band gaps of MoS2, h-BN, and phosphorene 2016-10-10 10.1038/ncomms13040 Isotope analysis in the transmission electron microscope 2016-10-13 10.1021/acs.jpcc.6b07283 Atomically Thin Ordered Alloys of Transition Metal Dichalcogenides: Stability and Band Structures 2016-10-15 10.1016/j.cpc.2016.05.010 Amp: A modular approach to machine learning in atomistic simulations 2016-10-15 10.1016/j.jcat.2016.07.014 Comparison of mechanistic understanding and experiments for CO methanation over nickel 2016-10-15 10.1021/acscatal.6b01310 Reaction Pathways and Intermediates in Selective Ring Opening of Biomass-Derived Heterocyclic Compounds by Iridium 2016-10-15 10.1088/0953-8984/28/39/393001 Applications of large-scale density functional theory in biology 2016-10-15 10.15199/62.2016.10.11 Role of support in industrial catalytic processes. Theoretical modeling 2016-10-17 10.1021/acs.inorgchem.6b01840 Electron Transfer and Solvent-Mediated Electronic Localization in Molecular Photocatalysis 2016-10-17 10.1038/srep35605 Spectromicroscopy of C-60 and azafullerene C59N: Identifying surface adsorbed water 2016-10-18 10.1103/PhysRevB.94.155309 Ab initio density functional theory study on the atomic and electronic structure of GaP/Si(001) heterointerfaces 2016-10-21 10.1063/1.4964671 Accelerating the search for global minima on potential energy surfaces using machine learning 2016-10-27 10.1103/PhysRevB.94.155310 Strain dependence of band gaps and exciton energies in pure and mixed transition-metal dichalcogenides 2016-11-01 10.1016/j.jphotochem.2016.08.007 Does organic/organic interface mimic band bending by deforming structure? 2016-11-03 10.1021/acs.jpclett.6b01998 Band Gap Tuning and Defect Tolerance of Atomically Thin Two-Dimensional Organic-Inorganic Halide Perovskites 2016-11-07 10.1063/1.4966259 The influence of coronene super-hydrogenation on the coronene-graphite interaction 2016-11-08 10.1002/cctc.201601014 Relation between Hydrogen Evolution and Hydrodesulfurization Catalysis 2016-11-09 10.1021/jacs.6b09007 [Ag-67(SPhMe2)(32)(PPh3)(8)](3+): Synthesis, Total Structure, and Optical Properties of a Large Box-Shaped Silver Nanocluster 2016-11-09 10.1103/RevModPhys.88.045004 Time-dependent density-functional description of nuclear dynamics 2016-11-10 10.1021/acs.jpcc.6b06638 A DFT Structural Investigation of New Bimetallic PtSnx Surface Alloys Formed on the Pt(110) Surface and Their Interaction with Carbon Monoxide 2016-11-10 10.1038/ncomms13447 Structure-conserving spontaneous transformations between nanoparticles 2016-11-15 10.1016/j.cpc.2016.06.012 A wavelet-based Projector Augmented-Wave (PAW) method: Reaching frozen-core all-electron precision with a systematic, adaptive and localized wavelet basis set 2016-11-15 10.1016/j.jcat.2016.03.016 Mechanism of CO2 reduction by H-2 on Ru(0001) and general selectivity descriptors for late-transition metal catalysts 2016-11-15 10.1016/j.nanoen.2016.04.011 Beyond the top of the volcano? - A unified approach to electrocatalytic oxygen reduction and oxygen evolution 2016-11-15 10.1021/acs.jctc.6b00815 Implementation of Constrained DFT for Computing Charge Transfer Rates within the Projector Augmented Wave Method 2016-11-15 10.1021/acscatal.6b01848 Operando Raman Spectroscopy of Amorphous Molybdenum Sulfide (MoSx) during the Electrochemical Hydrogen Evolution Reaction: Identification of Sulfur Atoms as Catalytically Active Sites for H+ Reduction 2016-11-21 10.1039/c6cp04575h Investigating the coverage dependent behaviour of CO on Gd/Pt(111) 2016-11-24 10.1021/acs.jpcc.6b08986 Three-Dimensional Uracil Network with Sodium as a Linker 2016-12-02 10.1103/PhysRevB.94.235106 Designing in-plane heterostructures of quantum spin Hall insulators from first principles: 1T'-MoS2 with adsorbates 2016-12-14 10.1063/1.4971786 Outstanding performance of configuration interaction singles and doubles using exact exchange Kohn-Sham orbitals in real-space numerical grid method 2016-12-15 10.1002/advs.201600126 Single Crystal Sub-Nanometer Sized Cu-6(SR)(6) Clusters: Structure, Photophysical Properties, and Electrochemical Sensing 2016-12-15 10.1016/j.jcat.2016.09.004 Atomic scale analysis of sterical effects in the adsorption of 4,6-dimethyldibenzothiophene on a CoMoS hydrotreating catalyst 2016-12-15 10.1021/acsnano.6b04671 Symmetry-Driven Band Gap Engineering in Hydrogen Functionalized Graphene 2016-12-15 10.1021/acsnano.6b05823 Tomography of a Probe Potential Using Atomic Sensors on Graphene 2016-12-22 10.1021/acs.jpcc.6b09682 Field-Induced Conformational Change in a Single-Molecule-Graphene-Nanoribbon Junction: Effect of Vibrational Energy Redistribution 2016-12-29 10.1021/acs.jpcc.6b09019 pH in Grand Canonical Statistics of an Electrochemical Interface 2017-01-04 10.1103/PhysRevB.95.014101 Role of descriptors in predicting the dissolution energy of embedded oxides and the bulk modulus of oxide-embedded iron 2017-01-06 10.1103/PhysRevB.95.045407 Principles and simulations of high-resolution STM imaging with a flexible tip apex 2017-01-14 10.1088/1361-6455/50/1/015102 Electronic structure transformation in small bare Au clusters as seen by x-ray photoelectron spectroscopy 2017-01-15 10.1016/j.procs.2017.05.203 Performance Analysis of Parallel Python Applications 2017-01-15 10.1021/acscatal.6b02054 Tuning Catalytic Performance through a Single or Sequential Post Synthesis Reaction(s) in a Gas Phase 2017-01-15 10.1021/acscatal.6b02856 Pd/C-Catalyzed Hydrosilylation of Enals and Enones with Triethylsilane: Conformer Populations Control the Stereoselectivity 2017-01-15 10.1080/23746149.2017.1308230 Atomic scale simulations of heterogeneous electrocatalysis: recent advances 2017-01-15 10.1117/12.2283318 Ab initio calculation of transport properties between PbSe quantum dots facets with iodide ligands 2017-01-20 10.1103/PhysRevLett.118.036101 Orbitalwise Coordination Number for Predicting Adsorption Properties of Metal Nanocatalysts 2017-01-21 10.1039/c6cp06881b Modeling the active sites of Co-promoted MoS2 particles by DFT 2017-01-24 10.3762/bjnano.8.25 Colorimetric gas detection by the varying thickness of a thin film of ultrasmall PTSA-coated TiO2 nanoparticles on a Si substrate 2017-01-26 10.1021/acs.jpcc.6b10926 Fe-Ni Nanoparticles: A Multiscale First-Principles Study to Predict Geometry, Structure, and Catalytic Activity 2017-01-28 10.1039/c6nr08958e Grain boundary-mediated nanopores in molybdenum disulfide grown by chemical vapor deposition 2017-02-14 10.1063/1.4975193 Macroscopic dielectric function within time-dependent density functional theory-Real time evolution versus the Casida approach 2017-02-15 10.1007/s11581-016-1813-z Double-layer capacitance for a charged surface 2017-02-15 10.1016/j.jcat.2016.12.017 A complete reaction mechanism for standard and fast selective catalytic reduction of nitrogen oxides on low coverage VOx/TiO2(001) catalysts 2017-02-15 10.1021/acs.jctc.6b00809 Theory and Applications of Generalized Pipek-Mezey Wannier Functions 2017-02-15 10.1021/acs.jctc.6b01207 A Generalized Grid-Based Fast Multipole Method for Integrating Helmholtz Kernels 2017-02-15 10.1021/acs.nanolett.6b04275 Interlayer Excitons and Band Alignment in MoS2/hBN/WSe2 van der Waals Heterostructures 2017-02-15 10.1515/zkri-2016-1961 Synthesis and characterization of metastable transition metal oxides and oxide nitrides 2017-02-25 10.1016/j.jallcom.2016.11.153 Global structural optimization and growth mechanism of cobalt oxide nanoclusters by genetic algorithm with spin-polarized DFT 2017-02-28 10.1039/c6ta06644e Tunable thermodynamic activity of LaxSr1-xMnyAl1-yO3-delta (0 <= x <= 1, 0 <= y <= 1) perovskites for solar thermochemical fuel synthesis 2017-03-02 10.1021/acs.jpcc.6b11953 Adsorption and Activation of Water on Cuboctahedral Rhodium and Platinum Nanoparticles 2017-03-07 10.1039/c6cy01904h Reaction mechanism of dimethyl ether carbonylation to methyl acetate over mordenite a combined DFT/experimental study 2017-03-15 10.1002/sia.6106 Probing CO/Fe(100) surfaces from firstprinciples: structures, energetics, and vibrations 2017-03-15 10.1007/s40830-016-0089-5 Thin NiTi Films Deposited on Graphene Substrates 2017-03-15 10.1016/j.jallcom.2016.12.116 Influence of titanium doping on the Raman spectra of nanocrystalline ZnAl2O4 2017-03-15 10.1016/j.physe.2016.11.009 GW approach to electron-electron interactions within the Anderson impurity model: Kondo correlated quantum transport through two coupled molecules 2017-03-15 10.1021/acscatal.6b02590 STM Study of Ketopantolactone/(R)-1-(1-Naphthyl)ethylamine Complexes on Pt(111): Comparison of Prochiral and Enantiomeric Ratios and Examination of the Contribution of CH center dot center dot center dot OC Bonding 2017-03-15 10.1093/mnrasl/slw226 Missing Fe: hydrogenated iron nanoparticles 2017-03-16 10.1021/acs.jpclett.6b03014 Stabilization of the Perovskite Phase of Formamidinium Lead Triiodide by Methylammonium, Cs, and/or Rb Doping 2017-03-22 10.1021/acsami.6b12261 First-Principles Study of the Band Diagrams and Schottky-Type Barrier Heights of Aqueous Ta3N5 Interfaces 2017-03-23 10.1021/acs.jpcc.6b09289 Synergetic Surface Sensitivity of Photoelectrochemical Water Oxidation on TiO2 (Anatase) Electrodes 2017-03-29 10.1021/jacs.7b01081 Quantitative and Atomic-Scale View of CO-Induced Pt Nanoparticle Surface Reconstruction at Saturation Coverage via DFT Calculations Coupled with in Situ TEM and IR 2017-03-30 10.1038/s41524-017-0017-z A high-throughput framework for determining adsorption energies on solid surfaces 2017-04-06 10.1021/acs.jpcc.7b02005 A Strategy to Suppress Phonon Transport in Molecular Junctions Using pi-Stacked Systems 2017-04-10 10.1002/cssc.201601869 High Redox Capacity of Al-Doped La1-xSrxMnO3- Perovskites for Splitting CO2 and H2O at Mn-Enriched Surfaces 2017-04-11 10.1038/s41699-017-0003-9 Probing the local nature of excitons and plasmons in few-layer MoS2 2017-04-14 10.1039/c6cp06965g Facile embedding of single vanadium atoms at the anatase TiO2(101) surface 2017-04-15 10.1016/j.commatsci.2017.01.031 Growth of two-dimensional Au patches in graphene pores: A density-functional study 2017-04-20 10.1021/acs.jpcc.7b00283 Effects of Aromaticity and Connectivity on the Conductance of Five-Membered Rings 2017-04-20 10.1103/PhysRevB.95.165130 Real-space and plane-wave hybrid method for electronic structure calculations for two-dimensional materials 2017-04-24 10.1038/ncomms15133 Band structure engineered layered metals for low-loss plasmonics 2017-05-01 10.1016/j.cattod.2017.02.004 Screening the bulk properties and reducibility of Fe-doped Mn2O3 from first principles calculations 2017-05-01 10.1039/c6mh00465b Unexpected length dependence of excited-state charge transfer dynamics for surface-confined perylenediimide ensembles 2017-05-09 10.1002/cssc.201601632 Determination of Conduction and Valence Band Electronic Structure of LaTiOxNy Thin Film 2017-05-15 10.1007/s11244-016-0701-0 Understanding Structure and Stability of Monoclinic Zirconia Surfaces from First-Principles Calculations 2017-05-15 10.1016/j.cattod.2016.09.023 Computational screening of perovskite redox materials for solar thermochemical ammonia synthesis from N-2 and H2O 2017-05-15 10.1016/j.cplett.2017.02.018 Catalysis in real time using X-ray lasers 2017-05-21 10.1063/1.4983697 Single-molecule spin orientation control by an electric field 2017-05-25 10.1021/acs.jpcc.6b10618 Analysis of the Electronic Structure of Non-Spherical Ligand-Protected Metal Nanoclusters: The Case of a Box-Like Ag-67 2017-05-25 10.1021/acs.jpcc.6b12004 Isophorone on Au/MgO/Ag(001): Physisorption with Electrostatic Site Selection 2017-06-01 10.1002/anie.201701135 Reversible Supracolloidal Self-Assembly of Cobalt Nanoparticles to Hollow Capsids and Their Superstructures 2017-06-07 10.1039/c7cp01440f Stability, electronic structure, and optical properties of protected gold-doped silver Ag29-xAux (x=0-5) nanoclusters 2017-06-08 10.1021/acs.jpcc.7b01819 Comparative Analysis of the Electronic Structure and Nonlinear Optical Susceptibility of alpha-TeO2 and beta-TeO3 Crystals 2017-06-15 10.1016/j.cattod.2017.02.028 Single site porphyrine-like structures advantages over metals for selective electrochemical CO2 reduction 2017-06-15 10.1016/j.cpc.2017.02.001 Lattice dynamics calculations based on density-functional perturbation theory in real space 2017-06-15 10.1016/j.micromeso.2017.02.065 Distribution of open sites in Sn-Beta zeolite 2017-06-15 10.1021/acsnano.7b01912 Manifestation of Geometric and Electronic Shell Structures of Metal Clusters in Intercluster Reactions 2017-06-15 10.1038/NCHEM.2753 Monitoring interconversion between stereochemical states in single chirality-transfer complexes on a platinum surface 2017-06-15 10.1088/2053-1583/aa6531 Band structure engineering in van der Waals heterostructures via dielectric screening: the G Delta W method 2017-06-15 10.1126/sciadv.1700176 Buckyball sandwiches 2017-06-15 10.24200/sci.2017.4143 Petahertz-frequency plasmons in graphene nanopore and their application to nanoparticle sensing 2017-06-28 10.1021/jacs.7b04755 Formation of Germa-ketenimine on the Ge(100) Surface by Adsorption of tert-Butyl Isocyanide 2017-06-28 10.3389/fninf.2017.00040 The NEST Dry-Run Mode: Efficient Dynamic Analysis of Neuronal Network Simulation Code 2017-06-29 10.1021/acs.jpcc.7b05270 Two-Dimensional MXenes as Catalysts for Electrochemical Hydrogen Evolution: A Computational Screening Study 2017-06-30 10.1038/s41598-017-04683-9 Computational insights and the observation of SiC nanograin assembly: towards 2D silicon carbide 2017-07-01 10.1016/j.camwa.2016.12.003 Multi-domain muffin tin finite element density functional calculations for small molecules 2017-07-12 10.1088/1361-648X/aa680e The atomic simulation environment-a Python library for working with atoms 2017-07-15 10.1021/acs.nanolett.7b01592 The Role of Through-Space Interactions in Modulating Constructive and Destructive Interference Effects in Benzene 2017-07-17 10.1103/PhysRevB.96.045419 Adsorption sites of individual metal atoms on ultrathin MgO(100) films 2017-07-18 10.1103/PhysRevB.96.035422 Anisotropic plasmons, excitons, and electron energy loss spectroscopy of phosphorene 2017-07-20 10.1021/acs.jpcc.7b02608 Defect Chemistry and Electrical Conductivity of Sm-Doped La1-xSrxCoO3-delta for Solid Oxide Fuel Cells 2017-07-31 10.1103/PhysRevMaterials.1.024411 Rare-earth/transition-metal magnetic interactions in pristine and (Ni,Fe)-doped YCo5 and GdCo5 2017-08-01 10.1038/s41598-017-07456-6 Excitation-dependent fluorescence from atomic/molecular layer deposited sodium-uracil thin films 2017-08-07 10.1039/c7ta02081c Design principles of perovskites for solar-driven thermochemical splitting of CO2 2017-08-14 10.1088/1361-6455/aa7d2c Spectroscopic signatures of triplet states in acenes 2017-08-15 10.1021/acsnano.7b03484 Supramolecular Corrals on Surfaces Resulting from Aromatic Interactions of Nonplanar Triazoles 2017-08-15 10.1103/PhysRevB.96.085421 Quantum interference in coherent tunneling through branched molecular junctions containing ferrocene centers 2017-08-17 10.1021/acs.jpclett.7b01549 Real-Time Elucidation of Catalytic Pathways in CO Hydrogenation on Ru 2017-08-21 10.1038/s41467-017-00385-y Visualizing atomic-scale redox dynamics in vanadium oxide-based catalysts 2017-08-22 10.1038/s41467-017-00412-y Layered van der Waals crystals with hyperbolic light dispersion 2017-08-24 10.1021/acs.jpcc.7b07437 II-IV-V-2 and III-III-V-2 Polytypes as Light Absorbers for Single Junction and Tandem Photovoltaic Devices 2017-08-31 10.1021/acs.jpcc.7b04974 Role of the Band Gap for the Interaction Energy of Coadsorbed Fragments 2017-09-01 10.1088/1361-651X/aa7320 libvdwxc: a library for exchange-correlation functionals in the vdW-DF family 2017-09-07 10.1021/acs.jpcc.7b02500 Charge Storage Mechanism of RuO2/Water Interfaces 2017-09-08 10.1103/PhysRevMaterials.1.044003 Graphene/MoS2 heterostructures as templates for growing two-dimensional metals: Predictions from ab initio calculations 2017-09-14 10.1039/c7cp03576d Modelling pH and potential in dynamic structures of the water/Pt(111) interface on the atomic scale 2017-09-14 10.1063/1.4991033 Electronic structure of boron based single and multi-layer two dimensional materials 2017-09-15 10.1002/sia.6238 Unsaturated surface in CO saturation 2017-09-15 10.1016/j.susc.2017.05.007 Initial stages of Lutetium growth on Si (111)-7 x 7 probed by STM and core-level photoelectron spectroscopy 2017-09-20 10.1038/s41598-017-12009-y Atomic Scale Formation Mechanism of Edge Dislocation Relieving Lattice Strain in a GeSi overlayer on Si(001) 2017-09-21 10.1039/c7nj02365k Combining theory and experiment in the design of a lead-free ((CH3NH3)(2)AgBiI6) double perovskite 2017-09-21 10.1039/c7py00987a Alkyl-substituted spiropyrans: electronic effects, model compounds and synthesis of aliphatic main-chain copolymers 2017-09-22 10.1103/PhysRevB.96.125143 Assessing the performance of the random phase approximation for exchange and superexchange coupling constants in magnetic crystalline solids 2017-10-05 10.1103/PhysRevB.96.155407 Effect of edge plasmons on the optical properties of MoS2 monolayer flakes 2017-10-06 10.3762/bjnano.8.209 Electronic structure, transport, and collective effects in molecular layered systems 2017-10-07 10.1039/c7ta04063f Trends in the phase stability and thermochemical oxygen exchange of ceria doped with potentially tetravalent metals 2017-10-14 10.1039/c7cp05244h Coherence in nonradiative transitions: internal conversion in Rydberg-excited N-methyl and N-ethyl morpholine 2017-10-15 10.1016/j.jcis.2017.06.017 Investigation of anti-solvent induced optical properties change of cesium lead bromide iodide mixed perovskite (CsPbBr3-xIx) quantum dots 2017-10-15 10.1016/j.susc.2017.06.012 Methylbenzenes on graphene 2017-10-15 10.1021/acs.jctc.7b00589 Kohn-Sham Decomposition in Real-Time Time-Dependent Density-Functional Theory: An Efficient Tool for Analyzing Plasmonic Excitations 2017-10-15 10.1021/acsomega.7b01089 Silver-Mediated Double Helix: Structural Parameters for a Robust DNA Building Block 2017-10-16 10.1038/s41467-017-01035-z Understanding activity and selectivity of metal-nitrogen-doped carbon catalysts for electrochemical reduction of CO2 2017-10-17 10.1021/acs.langmuir.7b02207 Influence of CH center dot center dot center dot N Interaction in the Self-Assembly of an Oligo(isoquinolyne-ethynylyne) Molecule with Distinct Conformational States 2017-10-27 10.1103/PhysRevB.96.165436 Nanoribbon edges of transition-metal dichalcogenides: Stability and electronic properties 2017-10-28 10.1039/c7cp02978k Ultrafast X-ray absorption study of longitudinal-transverse phonon coupling in electrolyte aqueous solution 2017-11-09 10.1021/acs.jpcc.7b04685 First-Principles Screening of Lead-Free Methylammonium Metal Iodine Perovskites for Photovoltaic Application 2017-11-15 10.1002/pssb.201700188 Structure and Energetics of Embedded Si Patterns in Graphene 2017-11-15 10.1007/s12678-017-0375-9 New Platinum Alloy Catalysts for Oxygen Electroreduction Based on Alkaline Earth Metals 2017-11-15 10.3139/146.111517 Si- and Sn-containing SiOCN-based nanocomposites as anode materials for lithium ion batteries: synthesis, thermodynamic characterization and modeling 2017-11-16 10.1021/acs.jpcc.7b08269 Stability and Effects of Subsurface Oxygen in Oxide-Derived Cu Catalyst for CO2 Reduction 2017-11-16 10.1021/acs.jpcc.7b08971 Methane Adsorption in Zr-Based MOFs: Comparison and Critical Evaluation of Force Fields 2017-11-17 10.1002/cphc.201700736 Electrochemical CO2 Reduction: A Classification Problem 2017-11-21 10.1063/1.4999779 Structure and electronic states of a graphene double vacancy with an embedded Si dopant 2017-11-29 10.1021/jacs.7b10329 Two-Dimensional Boron Polymorphs for Visible Range Plasmonics: A First-Principles Exploration 2017-11-30 10.1103/PhysRevB.96.205206 Simple vertex correction improves GW band energies of bulk and two-dimensional crystals 2017-12-07 10.1021/acs.jpcc.7b10154 Analysis of Localized Surface Plasmon Resonances in Spherical Jellium Clusters and Their Assemblies 2017-12-08 10.3762/bjnano.8.265 Patterning of supported gold monolayers via chemical lift-off lithography 2017-12-13 10.1002/smll.201702379 Reversible Anion-Driven Switching of an Organic 2D Crystal at a Solid-Liquid Interface 2017-12-15 10.1016/j.cocom.2017.08.008 Investigation of density fluctuations in graphene using the fluctuation-dissipation relations 2017-12-15 10.1021/acs.jctc.7b00621 Grid-Based Projector Augmented Wave (GPAW) Implementation of Quantum Mechanics/Molecular Mechanics (QM/MM) Electrostatic Embedding and Application to a Solvated Diplatinum Complex 2017-12-15 10.1021/acsnano.7b07787 Dynamic Stabilization of the Ligand-Metal Interface in Atomically Precise Gold Nanoclusters Au-68 and Au-144 Protected by meta-Mercaptobenzoic Acid 2017-12-15 10.1039/c7ee02702h Sulfide perovskites for solar energy conversion applications: computational screening and synthesis of the selected compound LaYS3 2017-12-21 10.1103/PhysRevB.96.241411 Symmetry-forbidden intervalley scattering by atomic defects in monolayer transition-metal dichalcogenides 2017-12-28 10.1021/acs.jpca.7b10159 Effective Polarizability Models 2018-01-10 10.1103/PhysRevB.97.035411 Atlas for the properties of elemental two-dimensional metals 2018-01-11 10.1021/acs.jpcc.7b10760 The Influence of Inert Ions on the Reactivity of Manganese Oxides 2018-01-11 10.1021/acs.jpcc.7b10912 Chemical Oxidation of Graphite: Evolution of the Structure and Properties 2018-01-15 10.1016/j.apsusc.2017.09.124 Multivariate analysis for scanning tunneling spectroscopy data 2018-01-15 10.1016/j.solidstatesciences.2017.11.007 O/F-substitution in BiVO4: Defect structures, phase stability and optical properties 2018-01-15 10.1021/acsnano.7b07079 Exciting H-2 Molecules for Graphene Functionalization 2018-01-15 10.1080/10420150.2018.1442456 Modelling relativistic effects in momentum-resolved electron energy loss spectroscopy of graphene 2018-01-15 10.1117/12.2319498 Computational Plasmonics with Applications to Bulk and Nanosized Systems 2018-01-22 10.1002/anie.201709142 A Simply Synthesized, Tough Polyarylene with Transient Mechanochromic Response 2018-01-23 10.1002/cssc.201701653 Revealing the Chemistry between Band Gap and Binding Energy for Lead-/Tin-Based Trihalide Perovskite Solar Cell Semiconductors 2018-01-24 10.1103/PhysRevB.97.024112 Neural-network-based depth-resolved multiscale structural optimization using density functional theory and electron diffraction data 2018-01-28 10.1063/1.5007739 Driving spin transition at interface: Role of adsorption configurations 2018-02-13 10.1021/acs.chemmater.7b04618 Adiabatic and Nonadiabatic Charge Transport in Li-S Batteries 2018-02-14 10.1039/c7cp06831j Ligand mediated evolution of size dependent magnetism in cobalt nanoclusters 2018-02-15 10.1002/wcms.1340 PYSCF: the Python-based simulations of chemistry framework 2018-02-15 10.1007/s00607-017-0573-6 MPI-Performance-Aware-Reallocation: method to optimize the mapping of processes applied to a cloud infrastructure 2018-02-15 10.1016/j.cplett.2017.12.069 Adsorption of NO on Fe3O4(111) 2018-02-15 10.1016/j.jcat.2017.12.001 Modeling the adsorption of sulfur containing molecules and their hydrodesulfurization intermediates on the Co-promoted MoS2 catalyst by DFT 2018-02-15 10.1021/acsenergylett.7b01312 High-Throughput Computational Assessment of Previously Synthesized Semiconductors for Photovoltaic and Photoelectrochemical Devices 2018-02-15 10.1021/acsnano.7b08650 Local Plasmon Engineering in Doped Graphene 2018-02-28 10.1063/1.5009405 Cyanographone and isocyanographone - Two asymmetrically functionalized graphene pseudohalides and their potential use in chemical sensing 2018-03-15 10.1016/j.susc.2017.11.003 The structure of reconstructed chalcopyrite surfaces 2018-03-15 10.1021/acs.jpcc.7b10614 DFT-Computed Trends in the Properties of Bimetallic Precious Metal Nanoparticles with Core@Shell Segregation 2018-03-15 10.1021/acsnano.7b07759 Fullerene-Functionalized Monolayer-Protected Silver Clusters: [Ag-29(BDT)(12)(C-60)(n)](3-) (n=1-9) 2018-03-15 10.1103/PhysRevApplied.9.034010 Electronic Transport Properties of Carbon-Nanotube Networks: The Effect of Nitrate Doping on Intratube and Intertube Conductances 2018-03-15 10.1107/S1600577517016964 Anisotropy enhanced X-ray scattering from solvated transition metal complexes 2018-03-21 10.1039/c7cp08181b Understanding proton capture and cation-induced dimerization of [Ag-29(BDT)(12)](3-) clusters by ion mobility mass spectrometry 2018-03-28 10.1063/1.5017581 Rotation and diffusion of naphthalene on Pt(111) 2018-03-28 10.1063/1.5021407 NH3 adsorption on anatase-TiO2(101) 2018-03-29 10.1021/acs.jpcc.8b01046 DFT Prediction of Enhanced Reducibility of Monoclinic Zirconia upon Rhodium Deposition 2018-04-05 10.1021/acs.jpcc.8b00301 Solution Structure and Ultrafast Vibrational Relaxation of the PtPOP Complex Revealed by Delta SCF-QM/MM Direct Dynamics Simulations 2018-04-12 10.1021/acs.jpcc.7b11469 Atomic Layer Deposition of Zinc Oxide: Study on the Water Pulse Reactions from First-Principles 2018-04-12 10.1021/acs.jpcc.8b02448 First-Principles Screening of All-Inorganic Lead-Free ABX(3) Perovskites 2018-04-15 10.1016/j.jcat.2018.01.035 Ab initio study of CO2 hydrogenation mechanisms on inverse ZnO/Cu catalysts 2018-04-15 10.1016/j.mcat.2018.01.023 Ab initio studies of ethanol dehydrogenation at binary AuPd nanocatalysts 2018-04-15 10.1038/s41929-018-0047-z Electrocatalytic transformation of HF impurity to H-2 and LiF in lithium-ion batteries 2018-04-15 10.7569/JRM.2017.634183 Tuning Intermolecular Interaction Between Lignin and Carbon Nanotubes in Fiber Composites - A Combined Experimental and Ab-Initio Modeling Study 2018-04-21 10.1039/c7cp07939g On the thickness of the double layer in ionic liquids 2018-05-04 10.1103/PhysRevB.97.195406 Structural details of Al/Al2O3 junctions and their role in the formation of electron tunnel barriers 2018-05-15 10.1016/j.actamat.2018.03.005 Ab initio simulation of hydrogen-induced decohesion in cementite-containing microstructures 2018-05-15 10.1016/j.cplett.2018.03.040 Insight into the adsorption of chloramphenicol on a vermiculite surface 2018-05-15 10.1021/acs.jctc.8b00039 Tight-Binding Approximation-Enhanced Global Optimization 2018-05-15 10.1021/acsnano.8b01191 Implanting Germanium into Graphene 2018-05-16 10.1103/PhysRevB.97.195424 Neural-network-enhanced evolutionary algorithm applied to supported metal nanoparticles 2018-05-17 10.1002/zaac.201800135 Topology and Equilibrium Analysis of the Monovalent Aluminum Compound Al4Cp4*(Ph) 2018-05-17 10.1021/acs.jpcc.8b01713 Oxidation of Ethylene Carbonate on Li Metal Oxide Surfaces 2018-05-21 10.1103/PhysRevB.97.195435 Quantum plasmons with optical-range frequencies in doped few-layer graphene 2018-06-07 10.1038/s41467-018-04615-9 Visualizing hydrogen-induced reshaping and edge activation in MoS2 and Co-promoted MoS2 catalyst clusters 2018-06-14 10.1021/acs.jpcc.8b02165 Spin Uncoupling in Chemisorbed OCCO and CO2: Two High-Energy Intermediates in Catalytic CO2 Reduction 2018-06-15 10.1016/j.softx.2017.11.002 Recent developments in LIBXC - A comprehensive library of functionals for density functional theory 2018-06-15 10.1021/acsnano.8b00125 Topotactic Growth of Edge-Terminated MoS2 from MoO2 Nanocrystals 2018-06-15 10.1093/mnras/sty607 Tetrahedral hydrocarbon nanoparticles in space: X-ray spectra 2018-06-20 10.1103/PhysRevB.97.235136 From semilocal density functionals to random phase approximation renormalized perturbation theory: A methodological assessment of structural phase transitions 2018-06-22 10.1002/cssc.201800225 Combined DFT and Differential Electrochemical Mass Spectrometry Investigation of the Effect of Dopants in Secondary Zinc-Air Batteries 2018-06-22 10.1140/epjb/e2018-90166-9 Gradient-level and nonlocal density functional descriptions of Cu-Au intermetallic compounds 2018-07-05 10.1021/acs.jpcc.8b00464 Controlling Band Alignment in Molecular Junctions: Utilizing Two-Dimensional Transition-Metal Dichalcogenides as Electrodes for Thermoelectric Devices 2018-07-07 10.1039/c8cp03052a Incorporation of oxygen atoms as a mechanism for photoluminescence enhancement of chemically treated MoS2 2018-07-09 10.1002/cctc.201800293 Racemization of Secondary-Amine-Containing Natural Products Using Heterogeneous Metal Catalysts 2018-07-12 10.1021/acs.jpcc.8b04502 Toward an Accurate Tight-Binding Model of Graphene's Electronic Properties under Strain 2018-07-15 10.1016/j.cpc.2018.03.001 Brillouin zone grid refinement for highly resolved ab initio THz optical properties of graphene 2018-07-15 10.1021/acs.jctc.8b00238 Charge Transfer Excitations with Range Separated Functionals Using Improved Virtual Orbitals 2018-07-21 10.1039/c8cp02590h DFT investigation on the adsorption of munition compounds on alpha-Fe2O3: similarity and differences with alpha-Al2O3 2018-07-21 10.1063/1.5029329 Effects of the cooperative interaction on the diffusion of hydrogen on MgO(100) 2018-07-23 10.1103/PhysRevMaterials.2.074005 Intrinsic core level photoemission of suspended monolayer graphene 2018-07-27 10.1038/s41467-018-05372-5 Real-space imaging with pattern recognition of a ligand-protected Ag-374 nanocluster at sub-molecular resolution 2018-07-30 10.1002/jcc.25208 Real-space grid representation of momentum and kinetic energy operators for electronic structure calculations 2018-08-02 10.1021/acs.jpclett.8b01790 Thermodynamic and Kinetic Limitations for Peroxide and Superoxide Formation in Na-O-2 Batteries 2018-08-15 10.1002/qua.25622 On the achievement of high fidelity and scalability for large-scale diagonalizations in grid-based DFT simulations 2018-08-15 10.1016/j.nanoen.2018.05.052 Significantly enhanced electrocatalytic activity of Au-25 clusters by single platinum atom doping 2018-08-15 10.1021/acs.nanolett.8b02406 Electron-Beam Manipulation of Silicon Dopants in Graphene 2018-08-15 10.3390/ijms19082346 Optical Properties of Silver-Mediated DNA from Molecular Dynamics and Time Dependent Density Functional Theory 2018-08-15 10.7567/JJAP.57.08RF01 Ab initio calculation of electronic transport properties between PbSe quantum dots facets with halide ligands (Cl, Br, I) 2018-08-16 10.1021/acs.jpclett.8b01908 Silver-Stabilized Guanine Duplex: Structural and Optical Properties 2018-08-22 10.1038/s41467-018-05584-9 Co-crystallization of atomically precise metal nanoparticles driven by magic atomic and electronic shells 2018-08-22 10.1103/PhysRevLett.121.086804 3D Dirac Plasmons in the Type-II Dirac Semimetal PtTe2 2018-08-23 10.1021/acs.jpcc.8b04753 Role of Disorder in NaO2 and Its Implications for Na-O-2 Batteries 2018-08-28 10.1063/1.5044765 Toward quantum-chemical method development for arbitrary basis functions 2018-08-30 10.1021/acs.jpcc.8b03220 Detection of [Au-25(PET)(18)(O-2)(n)](-) ( n=1, 2, 3) Species by Mass Spectrometry 2018-09-06 10.1021/acs.jpcc.8b02191 X-ray Absorption Near-Edge Spectroscopy Calculations on Pristine and Modified Chalcopyrite Surfaces 2018-09-15 10.1016/j.cpc.2018.04.010 Kohn-Sham approach for fast hybrid density functional calculations in real-space numerical grid methods 2018-09-15 10.1093/nsr/nwy034 Thiol-stabilized atomically precise, superatomic silver nanoparticles for catalysing cycloisomerization of alkynyl amines 2018-09-24 10.1103/PhysRevB.98.115433 Beyond ideal two-dimensional metals: Edges, vacancies, and polarizabilities 2018-09-28 10.1103/PhysRevB.98.125206 Direct and indirect excitons in boron nitride polymorphs: A story of atomic configuration and electronic correlation 2018-10-07 10.1063/1.5048290 Machine learning enhanced global optimization by clustering local environments to enable bundled atomic energies 2018-10-09 10.1103/PhysRevMaterials.2.105402 Promising quaternary chalcogenides as high-band-gap semiconductors for tandem photoelectrochemical water splitting devices: A computational screening approach 2018-10-15 10.1021/acscatal.8b01432 Quantifying Confidence in DFT Predicted Surface Pourbaix Diagrams and Associated Reaction Pathways for Chlorine Evolution 2018-10-15 10.1021/acscatal.8b01615 Cooperative Catalysis by Surface Lewis Acid/Silanol for Selective Fructose Etherification on Sn-SPP Zeolite 2018-10-15 10.1021/acscatal.8b02022 Oxygen Evolution Reaction on Perovskites: A Multieffect Descriptor Study Combining Experimental and Theoretical Methods 2018-10-15 10.1088/2053-1583/aacfc1 The Computational 2D Materials Database: high-throughput modeling and discovery of atomically thin crystals 2018-10-15 10.1088/2053-1583/aadc28 Unraveling the not-so-large trion binding energy in monolayer black phosphorus 2018-10-28 10.1063/1.5051510 An extended chiral surface coordination network based on Ag-7-clusters 2018-11-01 10.1021/acs.jpca.8b07923 Point Group Symmetry Analysis of the Electronic Structure of Bare and Protected Metal Nanocrystals 2018-11-01 10.1021/acs.jpcc.8b05792 Efficient Charge Separation in 2D Janus van der Waals Structures with Built-in Electric Fields and Intrinsic p-n Doping 2018-11-13 10.1021/acs.chemmater.8b03353 Reactivity of Amorphous Carbon Surfaces: Rationalizing the Role of Structural Motifs in Functionalization Using Machine Learning 2018-11-15 10.1016/j.apcatb.2018.05.038 Importance of the Cu oxidation state for the SO2-poisoning of a Cu-SAPO-34 catalyst in the NH3-SCR reaction 2018-11-15 10.1016/j.cpc.2018.05.016 Tensor decompositions for the bubbles and cube numerical framework 2018-11-15 10.1016/j.nanoen.2018.08.027 CO2 electroreduction on copper- cobalt nanoparticles: Size and composition effect 2018-11-15 10.1021/acs.jpclett.8b02253 Dipole-Induced Transition Orbitals: A Novel Tool for Investigating Optical Transitions in Extended Systems 2018-11-15 10.1021/acsnano.8b03754 In Pursuit of 2D Materials for Maximum Optical Response 2018-11-15 10.1103/PhysRevLett.121.206003 Water Dissociation and Hydroxyl Ordering on Anatase TiO2 (001)-(1 x 4) 2018-11-16 10.1103/PhysRevMaterials.2.113603 Temperature and loading rate dependent rupture forces from universal paths in mechanochemistry 2018-11-26 10.1103/PhysRevMaterials.2.114007 Understanding trends in lithium binding at two-dimensional materials 2018-12-05 10.1016/j.apcatb.2018.05.091 Local dynamics of copper active sites in zeolite catalysts for selective catalytic reduction of NOx with NH3 2018-12-06 10.1103/PhysRevMaterials.2.125401 Robust high-fidelity DFT study of the lithium-graphite phase diagram 2018-12-10 10.1002/chem.201803276 The Influence of Tin(II) Incorporation on Visible Light Absorption and Photocatalytic Activity in Defect-Pyrochlores 2018-12-15 10.1021/acs.nanolett.8b02919 Atom-by-Atom Construction of a Cyclic Artificial Molecule in Silicon 2018-12-15 10.1021/acscatal.8b02596 Unraveling the Role of the Rh-ZrO2 Interface in the Water-Gas-Shift Reaction via a First-Principles Microkinetic Study 2018-12-21 10.1063/1.5044579 Atom-specific activation in CO oxidation 2019-01-07 10.1063/1.5064602 Phonon properties and thermal conductivity from first principles, lattice dynamics, and the Boltzmann transport equation 2019-01-07 10.1063/1.5064687 Electric-field control of spin orientation of manganocene: An insight into molecule-substrate interactions 2019-01-09 10.1103/PhysRevB.99.045414 Broadband excitation spectrum of bulk crystals and thin layers of PtTe2 2019-01-15 10.1002/qua.25671 GPUs as boosters to analyze scalar and vector fields in quantum chemistry 2019-01-15 10.1016/j.commatsci.2018.09.026 An automated algorithm for reliable equation of state fitting of magnetic systems 2019-01-15 10.1021/acscatal.8b03664 Lewis Acid Site and Hydrogen-Bond-Mediated Polarization Synergy in the Catalysis of Diels-Alder Cycloaddition by Band-Gap Transition-Metal Oxides 2019-01-15 10.1088/2053-1583/aaf06d Calculating critical temperatures for ferromagnetic order in two-dimensional materials 2019-01-21 10.1021/acssuschemeng.8b05413 Energy Efficient Formaldehyde Synthesis by Direct Hydrogenation of Carbon Monoxide in Functionalized Metal-Organic Frameworks 2019-01-21 10.1039/c8cp06528d Exploring new approaches towards the formability of mixed-ion perovskites by DFT and machine learning 2019-01-22 10.1103/PhysRevB.99.045431 DFT-based study of electron transport through ferrocene compounds with different anchor groups in different adsorption configurations of an STM setup 2019-01-23 10.1021/acsami.8b03482 Study of Li Adsorption on Graphdiyne Using Hybrid DFT Calculations 2019-01-28 10.1063/1.5047829 Grand-canonical approach to density functional theory of electrocatalytic systems: Thermodynamics of solid-liquid interfaces at constant ion and electrode potentials 2019-01-28 10.1063/1.5049674 Face-centered tetragonal (FCT) Fe and Co alloys of Pt as catalysts for the oxygen reduction reaction (ORR): A DFT study 2019-01-28 10.1063/1.5056167 Quantifying robustness of DFT predicted pathways and activity determining elementary steps for electrochemical reactions 2019-01-30 10.1021/jacs.8b08839 Electrochemical CO Reduction: A Property of the Electrochemical Interface 2019-02-07 10.1021/acs.jpcc.8b11875 Spectroscopic Properties of Chalcopyrite Nanoparticles 2019-02-07 10.1039/c8cp04905j Probing the gas-phase structure of charge-tagged intermediates of a proline catalyzed aldol reaction - vibrational spectroscopy distinguishes oxazolidinone from enamine species 2019-02-07 10.1063/1.5078432 Space partitioning of exchange-correlation functionals with the projector augmented-wave method 2019-02-13 10.1103/PhysRevLett.122.063001 Ultrafast X-Ray Scattering Measurements of Coherent Structural Dynamics on the Ground-State Potential Energy Surface of a Diplatinum Molecule 2019-02-15 10.1016/j.commatsci.2018.11.021 Effects of vacancies at Al(111)/6H-SiC(0001) interfaces on deformation behavior: A first-principle study 2019-02-15 10.1016/j.ultramic.2018.11.002 Efficient first principles simulation of electron scattering factors for transmission electron microscopy 2019-02-15 10.1142/S0219633619500032 Structural, electronic and magnetic properties of stoichiometric cobalt oxide clusters (CoO)(n)(q) (n=3-10, q=0,+1): A modified basin-hopping Monte Carlo algorithm with spin-polarized DFT 2019-02-19 10.1103/PhysRevB.99.064202 Density functional theory description of random Cu-Au alloys 2019-02-21 10.1021/acs.jpcc.8b11689 Uncertainty Quantification in First-Principles Predictions of Harmonic Vibrational Frequencies of Molecules and Molecular Complexes 2019-02-21 10.1039/c8cp06567e Excited-state solvation structure of transition metal complexes from molecular dynamics simulations and assessment of partial atomic charge methods 2019-02-27 10.1021/acsami.8b22104 Mechanistic Understanding of Cu-CHA Catalyst as Sensor for Direct NH3-SCR Monitoring: The Role of Cu Mobility 2019-02-27 10.1088/1361-648X/aaf76b Hydrogen interaction with graphene on Ir(111): a combined intercalation and functionalization study 2019-02-28 10.1021/acs.jpcc.8b12214 Amorphous, Periodic Model of a Copper Electrocatalyst with Subsurface Oxygen for Enhanced CO Coverage and Dimerization 2019-02-28 10.1038/s42005-019-0122-z Interlayer exciton dynamics in van der Waals heterostructures 2019-02-28 10.1103/PhysRevMaterials.3.024005 Discovering two-dimensional topological insulators from high-throughput computations 2019-03-01 10.1039/c8ee03426e Towards an atomistic understanding of electrocatalytic partial hydrocarbon oxidation: propene on palladium 2019-03-14 10.1021/acs.jpcc.8b11571 Solvent-Adsorbate Interactions and Adsorbate-Specific Solvent Structure in Carbon Dioxide Reduction on a Stepped Cu Surface 2019-03-14 10.1103/PhysRevMaterials.3.034003 Definition of a scoring parameter to identify low-dimensional materials components 2019-03-15 10.1016/j.actamat.2018.12.050 Atomistic simulations of early stage clusters in Al-Mg alloys 2019-03-15 10.1021/acs.jctc.8b01089 Assessment of Initial Guesses for Self-Consistent Field Calculations. Superposition of Atomic Potentials: Simple yet Efficient 2019-03-15 10.1021/acsnano.8b08703 Plasmon-Induced Direct Hot-Carrier Transfer at Metal-Acceptor Interfaces 2019-03-15 10.1088/1367-2630/ab05ed Coherent diffraction of hydrogen through the 246 pm lattice of graphene 2019-03-15 10.1140/epjd/e2019-90441-5 Ab initio molecular dynamics studies of Au-38(SR)(24) isomers under heating 2019-03-15 10.3144/expresspolymlett.2019.24 Mechanochemically aminated multilayer graphene for carbon/polypropylene graft polymers and nanocomposites 2019-03-20 10.1016/j.joule.2018.12.015 High-Entropy Alloys as a Discovery Platform for Electrocatalysis 2019-03-20 10.1103/PhysRevB.99.115428 Density functional theory based electron transport study of coherent tunneling through cyclic molecules containing Ru and Os as redox active centers 2019-03-22 10.1002/admi.201801874 Multiple Reaction Paths for CO Oxidation on a 2D SnOx Nano-Oxide on the Pt(110) Surface: Intrinsic Reactivity and Spillover 2019-03-28 10.1021/acs.jpcc.9b00272 Globally Optimized Equilibrium Shapes of Zirconia-Supported Rh and Pt Nanoclusters: Insights into Site Assembly and Reactivity 2019-03-28 10.1039/c8cp07169a Experimental and theoretical 2p core-level spectra of size-selected gas-phase aluminum and silicon cluster cations: chemical shifts, geometric structure, and coordination-dependent screening 2019-04-03 10.1021/jacs.8b12101 Functional Role of Fe-Doping in Co-Based Perovskite Oxide Catalysts for Oxygen Evolution Reaction 2019-04-04 10.1021/acs.jpca.9b00927 DFTB-Assisted Global Structure Optimization of 13-and 55-Atom Late Transition Metal Clusters 2019-04-04 10.1021/acs.jpcc.8b06378 Computational Study of Adsorption of CO2, SO2, and H2CO on Free Standing and Molybdenum-Supported CaO Films 2019-04-10 10.1021/jacs.9b01204 Chiral Inversion of Thiolate-Protected Gold Nanoclusters via Core Reconstruction without Breaking a Au-S Bond 2019-04-10 10.1038/s41524-019-0181-4 Genetic algorithms for computational materials discovery accelerated by machine learning 2019-04-11 10.1021/acs.jpcc.9b00217 Efficient Photoelectrochemical Performance of gamma Irradiated g-C3N4 and Its g-C3N4@BiVO4 Heterojunction for Solar Water Splitting 2019-04-15 10.1002/adts.201800177 Modeling Gas Adsorption in Flexible Metal-Organic Frameworks via Hybrid Monte Carlo/Molecular Dynamics Schemes 2019-04-15 10.1007/s00214-019-2445-y QTAIM method for accelerated prediction of band gaps in perovskites 2019-04-15 10.1103/PhysRevLett.122.156001 Low-Scaling Algorithm for Nudged Elastic Band Calculations Using a Surrogate Machine Learning Model 2019-04-17 10.1038/s41598-019-41165-6 A potential sensing mechanism for DNA nucleobases by optical properties of GO and MoS2 Nanopores 2019-04-23 10.1021/acs.chemmater.9b00668 Roles of Precursor Conformation and Adatoms in Ullmann Coupling: An Inverted Porphyrin on Cu(111) 2019-05-01 10.1039/c8fd00154e Direct hot-carrier transfer in plasmonic catalysis 2019-05-15 10.1007/s00894-019-4016-5 Functionalized graphene pieces to trap the insecticide imidacloprid: a theoretical analysis 2019-05-15 10.1016/j.cpc.2018.12.001 Efficient technique for ab-initio calculation of magnetocrystalline anisotropy energy 2019-05-15 10.1021/acs.jctc.8b01229 R-NEB: Accelerated Nudged Elastic Band Calculations by Use of Reflection Symmetry 2019-05-15 10.1021/acsnano.8b09826 Plasmon Excitations in Mixed Metallic Nanoarrays 2019-05-15 10.1021/acsnano.9b02052 Atomically Precise, Thiolated Copper-Hydride Nanoclusters as Single-Site Hydrogenation Catalysts for Ketones in Mild Conditions 2019-05-15 10.1038/s41557-019-0246-5 N-heterocyclic carbene-functionalized magic-number gold nanoclusters 2019-05-16 10.1021/acs.jpcc.8b11092 When Current Does Not Follow Bonds: Current Density in Saturated Molecules 2019-05-23 10.1021/acs.jpcb.9b00654 Stability and IR Spectroscopy of Zwitterionic Form of beta-Alanine in Water Clusters 2019-05-23 10.1021/acs.jpcc.9b01894 Silicon Substitution in Nanotubes and Graphene via Intermittent Vacancies 2019-05-28 10.1038/s41597-019-0081-y Catalysis-Hub.org an open electronic structure database for surface reactions 2019-06-15 10.1007/s10825-019-01317-3 Theoretical investigation of the structural, electronic and thermodynamic properties of cubic and orthorhombic XZrS3 (X=Ba,Sr,Ca) compounds 2019-06-15 10.1021/acs.nanolett.9b00183 Cavity Control of Excitons in Two-Dimensional Materials 2019-06-28 10.1063/1.5097553 Optical signatures of pentacene in soft rare-gas environments 2019-07-01 10.1088/2515-7639/ab084b From DFT to machine learning: recent approaches to materials science-a review 2019-07-07 10.1039/c9cp02059d Chemically-resolved determination of hydrogenated graphene-substrate interaction 2019-07-09 10.3389/fchem.2019.00377 The GW Compendium: A Practical Guide to Theoretical Photoemission Spectroscopy 2019-07-11 10.1021/acs.jpcc.8b11757 Phthalocyanine and Metal Phthalocyanines Adsorbed on Graphene: A Density Functional Study 2019-07-15 10.1021/acsanm.9b00758 Tailoring Organic-Organic Poly(vinylpyrrolidone) Microparticles and Fibers with Multiwalled Carbon Nanotubes for Reinforced Composites 2019-07-21 10.1039/c9nj02132a Linear acene molecules in plasmonic cavities: mapping evolution of optical absorption spectra and electric field intensity enhancements 2019-07-26 10.1038/s41467-019-11315-5 Strong plasmon-molecule coupling at the nanoscale revealed by first-principles modeling 2019-07-29 10.1103/PhysRevB.100.035439 Adsorption on transition metal surfaces: Transferability and accuracy of DFT using the ADS41 dataset 2019-07-31 10.1021/jacs.9b03009 Combinatorial Identification of Hydrides in a Ligated Ag-40 Nanocluster with Noncompact Metal Core 2019-07-31 10.1088/1361-648X/ab18f3 Crystal field coefficients for yttrium analogues of rare-earth/transition-metal magnets using density-functional theory in the projector-augmented wave formalism 2019-08-07 10.1088/1361-648X/ab18ea van der Waals exchange-correlation functionals over bulk and surface properties of transition metals 2019-08-15 10.1002/vjch.201900068 Theoretical study on the adsorption of benzylpenicillin molecule onto vermiculite surface 2019-08-15 10.1016/j.jcat.2019.07.011 Nitrogen-doped graphene as metal free basic catalyst for coupling reactions 2019-08-15 10.1021/acsaelm.9b00354 Probing Charge Carrier Movement in Organic Semiconductor Thin Films via Nanowire Conductance Spectroscopy 2019-08-18 10.1039/c9cc04914b Chiral footprint of the ligand layer in the all-alkynyl-protected gold nanocluster Au-144(CCPhF)(60) 2019-08-22 10.1021/acs.jpcc.9b05863 Role of Nanocrystal Symmetry in the Crossover Region from Molecular to Metallic Gold Nanoparticles 2019-08-28 10.1088/1361-648X/ab20a1 Structural, vibrational and electronic properties of SnMBO4 (M = Al, Ga): a predictive hybrid DFT study 2019-09-01 10.1016/j.apsusc.2019.04.249 Water adsorption and dissociation on gold catalysts supported on anatase-TiO2(101) 2019-09-03 10.1038/s41467-019-12031-w A method for structure prediction of metal-ligand interfaces of hybrid nanoparticles 2019-09-05 10.1103/PhysRevB.100.104103 Local Bayesian optimizer for atomic structures 2019-09-06 10.1103/PhysRevB.100.115409 Electron and hole transport in disordered monolayer MoS2: Atomic vacancy induced short-range and Coulomb disorder scattering 2019-09-15 10.1016/j.physe.2019.04.027 Electric field hotspots of all-inorganic off-stoichiometric APbX(3) (A = Cs, Rb and X = Cl, Br, I) perovskite quantum dots 2019-09-15 10.1021/acscatal.9b01899 Electrochemical CO2 Reduction: Classifying Cu Facets 2019-09-16 10.1088/1367-2630/ab3d78 Strain and electric field tuning of 2D hexagonal boron arsenide 2019-09-17 10.1002/cphc.201900633 Hierarchy in the Halogen Activation During Surface-Promoted Ullmann Coupling 2019-09-23 10.1021/acs.organomet.9b00351 Fundamental Insights into the Reactivity and Utilization of Open Metal Sites in Cu(I)-MFU-4l 2019-09-26 10.1103/PhysRevB.100.104114 Materials property prediction using symmetry-labeled graphs as atomic position independent descriptors 2019-10-02 10.1021/jacs.9b06965 Permethylation Introduces Destructive Quantum Interference in Saturated Silanes 2019-10-03 10.1103/PhysRevMaterials.3.101401 Temperature-dependent magnetocrystalline anisotropy of rare earth/transition metal permanent magnets from first principles: The light RCo5 (R = Y, La-Gd) intermetallics 2019-10-07 10.1063/1.5124239 Calculating spin crossover temperatures by a first-principles LDA plus U scheme with parameter U evaluated from GW 2019-10-07 10.1103/PhysRevMaterials.3.105403 Theoretical characterization of structural disorder in the tetramer model structure of eumelanin 2019-10-15 10.1088/2053-1583/ab2c43 High throughput computational screening for 2D ferromagnetic materials: the critical role of anisotropy and local correlations 2019-10-15 10.1088/2053-1591/ab3fd4 A study of properties of palladium metal as a component of fuel cells 2019-10-15 10.4208/cicp.OA-2018-0302 Implementation of the Projector Augmented-Wave Method: The Use of Atomic Datasets in the Standard PAW-XML Format 2019-10-17 10.1021/acs.jpcc.9b06602 Ab Initio Simulation of Position-Dependent Electron Energy Loss and Its Application to the Plasmon Excitation of Nanographene 2019-10-21 10.1103/PhysRevA.100.043412 Spatiotemporal analysis of a final-state shape resonance in interferometric photoemission from Cu(111) surfaces 2019-10-28 10.1039/c9cp03982a Elucidating the optical spectra of [Au-25(SR)(18)](q) nanoclusters 2019-10-28 10.1063/1.5126261 Escaping scaling relationships for water dissociation at interfacial sites of zirconia-supported Rh and Pt clusters 2019-11-07 10.1021/acs.jpclett.9b02529 Nonstoichiometric Phases of Two-Dimensional Transition-Metal Dichalcogenides: From Chalcogen Vacancies to Pure Metal Membranes 2019-11-07 10.1063/1.5121721 Optical excitations of chlorophyll a and b monomers and dimers 2019-11-11 10.1038/s41524-019-0242-8 Beyond the RPA and GW methods with adiabatic xc-kernels for accurate ground state and quasiparticle energies 2019-11-15 10.1007/s10853-019-03866-1 Elastic, phononic, magnetic and electronic properties of quasi-one-dimensional PbFeBO4 2019-11-15 10.1021/acs.jctc.9b00869 Accelerated Saddle Point Refinement through Full Exploitation of Partial Hessian Diagonalization 2019-11-15 10.1021/acsnano.9b06698 Classifying the Electronic and Optical Properties of Janus Monolayers 2019-11-15 10.1126/sciadv.aaw1634 Fast nonadiabatic dynamics of many-body quantum systems 2019-11-19 10.1002/cphc.201900509 Ab Initio Cyclic Voltammetry on Cu(111), Cu(100) and Cu(110) in Acidic, Neutral and Alkaline Solutions 2019-11-21 10.1021/acs.jpclett.9b02717 Descriptors for Electrolyte-Renormalized Oxidative Stability of Solvents in Lithium-Ion Batteries 2019-11-21 10.1039/c9gc02265a Catalyst design criteria and fundamental limitations in the electrochemical synthesis of dimethyl carbonate 2019-11-25 10.1103/PhysRevB.100.174431 Effect of H adsorption on the magnetic properties of an Fe island on a W(110) surface 2019-11-25 10.1103/PhysRevB.100.205423 Charge density wave hampers exciton condensation in 1T-TiSe2 2019-11-26 10.1021/acs.chemmater.9b02049 Understanding X-ray Spectroscopy of Carbonaceous Materials by Combining Experiments, Density Functional Theory, and Machine Learning. Part I: Fingerprint Spectra 2019-11-28 10.1021/acs.jpcc.9b10110 Interfacial Charge Transfer Transitions in Colloidal TiO2 Nanoparticles Functionalized with Salicylic acid and 5-Aminosalicylic acid: A Comparative Photoelectron Spectroscopy and DFT Study 2019-12-05 10.1002/qua.26021 Electric field amplification of plasmon-molecule hybrids revealed by first-principles time dependent density functional theory calculations 2019-12-09 10.1103/PhysRevB.100.241405 Signatures of adatom effects in the quasiparticle spectrum of Li-doped graphene 2019-12-14 10.1039/c9nj03784e Dopant-induced localized light absorption in CsPbX3 (X = Cl, Br, I) perovskite quantum dots 2019-12-15 10.1002/adfm.201901327 Electron-Beam Manipulation of Silicon Impurities in Single-Walled Carbon Nanotubes 2019-12-15 10.1007/s11468-019-01001-z Probing Subnanometric-Scale Hotspots in Metallic Interfaces 2019-12-15 10.1021/acs.jctc.9b00777 Polarizable Embedding with a Transferable H2O Potential Function I: Formulation and Tests on Dimer 2019-12-15 10.1021/acs.jctc.9b00778 Polarizable Embedding with a Transferable H2O Potential Function II: Application to (H2O)(n) Clusters and Liquid Water 2019-12-15 10.1021/acsenergylett.9b02306 Computational Screening of Current Collectors for Enabling Anode-Free Lithium Metal Batteries 2019-12-15 10.1557/mrc.2019.117 Theory and simulations of critical temperatures in CrI3 and other 2D materials: easy-axis magnetic order and easy-plane Kosterlitz-Thouless transitions 2019-12-16 10.1103/PhysRevB.100.235429 Optical absorption and energy loss spectroscopy of single-walled carbon nanotubes 2019-12-17 10.1103/PhysRevLett.123.254801 Single-Shot Multi-keV X-Ray Absorption Spectroscopy Using an Ultrashort Laser-Wakefield Accelerator Source 2019-12-19 10.1103/PhysRevB.100.235129 Substituent effects on the Su-Schrieffer-Heeger electron-phonon coupling in conjugated polyenes 2019-12-26 10.1021/acs.jpcc.9b07715 Effects of Gas-Phase Conditions and Particle Size on the Properties of Cu(111)-Supported ZnyOx Particles Revealed by Global Optimization and Ab Initio Thermodynamics 2019-12-28 10.1063/1.5129397 Large-Z limit in atoms and solids from first principles 2019-12-28 10.1063/1.5132332 Uncertainty quantification of DFT-predicted finite temperature thermodynamic properties within the Debye model 2020-01-01 10.1088/1361-648X/ab4007 QuantumATK: an integrated platform of electronic and atomic-scale modelling tools 2020-01-02 10.1080/10420150.2020.1718133 Relativistic effects in the interaction of fast charged particles with graphene 2020-01-03 10.1021/acscatal.9b02799 A Challenge to the G similar to 0 Interpretation of Hydrogen Evolution 2020-01-14 10.1021/acs.chemmater.9b04530 Intercluster Reactions Resulting in Silver-Rich Trimetallic Nanoclusters 2020-01-14 10.1039/c9cp06086c On the interplay of solvent and conformational effects in simulated excited-state dynamics of a copper phenanthroline photosensitizer 2020-01-15 10.1002/admi.201901265 Atomic Scale Understanding of the Epitaxy of Perovskite Oxides on Flexible Mica Substrate 2020-01-15 10.1007/s40820-019-0345-2 Experimental and DFT Studies of Au Deposition Over WO3/g-C3N4 Z-Scheme Heterojunction 2020-01-15 10.1016/j.cpc.2019.07.016 DFT-FE - A massively parallel adaptive finite-element code for large-scale density functional theory calculations 2020-01-15 10.1021/acs.jctc.9b00584 Ab Initio Wavelength-Dependent Raman Spectra: Placzek Approximation and Beyond 2020-01-15 10.2320/matertrans.MT-MK2019009 Doping of Interstitials (H, He, C, N) in CrCoFeNi High Entropy Alloy: A DFT Study 2020-01-16 10.1021/acs.jpcc.9b10217 First-Principles Screening of Lead-Free Mixed-Anion Perovskites for Photovoltaics 2020-01-30 10.1016/j.jallcom.2019.152249 Mechanism of hydrogen modification of titanium-dioxide 2020-01-31 10.1103/PhysRevB.101.045433 Atomistic T-matrix theory of disordered two-dimensional materials: Bound states, spectral properties, quasiparticle scattering, and transport 2020-02-01 10.1039/c9se00775j Boron-doped graphene-supported manganese oxide nanotubes as an efficient non-metal catalyst for the oxygen reduction reaction 2020-02-05 10.1021/jacs.9b11370 Understanding the Electron-Doping Mechanism in Potassium-Intercalated Single-Walled Carbon Nanotubes 2020-02-06 10.1038/s42005-020-0299-1 Broken adiabaticity induced by Lifshitz transition in MoS2 and WS2 single layers 2020-02-07 10.1021/acscatal.9b04343 High-Entropy Alloys as Catalysts for the CO2 and CO Reduction Reactions 2020-02-07 10.1039/c9cp03677f Time-resolved observation of transient precursor state of CO on Ru(0001) using carbon K-edge spectroscopy 2020-02-15 10.1007/s42341-019-00148-0 Analysis of Structural and Electronic Properties of Novel (PMMA/Al2O3, PMMA/Al2O3-Ag, PMMA/ZrO2, PMMA/ZrO2-Ag, PMMA-Ag) Nanocomposites for Low Cost Electronics and Optics Applications 2020-02-19 10.1021/acsami.9b18019 Steric Effects Control Dry Friction of H- and F-Terminated Carbon Surfaces 2020-02-19 10.1103/PhysRevB.101.085130 Valley selectivity induced by magnetic adsorbates: Triplet oxygen on monolayer MoS2 2020-02-21 10.1063/1.5141931 The effect of CO2 contamination in rechargeable non-aqueous sodium-air batteries 2020-03-01 10.1039/c9se01092k Stabilization of Li-S batteries with a lean electrolyte via ion-exchange trapping of lithium polysulfides using a cationic, polybenzimidazolium binder 2020-03-06 10.1021/acscatal.9b04682 Relative Abundances of Surface Diastereomeric Complexes Formed by Two Chiral Modifiers That Differ by a Methyl Group 2020-03-09 10.1063/1.5135640 Tunability of the spin reorientation transitions with pressure in NdCo5 2020-03-14 10.1039/c9nr10487a Understanding the structural diversity of freestanding Al2O3 ultrathin films through a DFTB-aided genetic algorithm 2020-03-15 10.1021/acsanm.0c00385 DNA Sequencing with Single-Stranded DNA Rectification in a Nanogap Gated by N-Terminated Carbon Nanotube Electrodes 2020-03-15 10.1177/1094342019845046 The static parallel distribution algorithms for hybrid density-functional calculations in HONPAS package 2020-03-16 10.1103/PhysRevB.101.121110 Electrically controlled dielectric band gap engineering in a two-dimensional semiconductor 2020-03-21 10.1039/c9sc05897d Oxygen evolution reaction: a perspective on a decade of atomic scale simulations 2020-03-25 10.1021/acsami.0c00328 Light-Triggered Reversible Supracolloidal Self-Assembly of Precision Gold Nanoclusters 2020-03-31 10.1063/5.0002959 ACE-Molecule: An open-source real-space quantum chemistry package 2020-04-01 10.1039/c9re00430k High redox performance of Y0.5Ba0.5CoO3-delta for thermochemical oxygen production and separation 2020-04-01 10.1088/2515-7655/ab783a Effect of high oxygen deficiency in nano-confined bismuth sesquioxide 2020-04-02 10.1103/PhysRevB.101.155404 Conductance of quantum spin Hall edge states from first principles: The critical role of magnetic impurities and inter-edge scattering 2020-04-08 10.1021/acs.nanolett.0c00670 Formation of Defects in Two-Dimensional MoS2 in the Transmission Electron Microscope at Electron Energies below the Knock-on Threshold: The Role of Electronic Excitations 2020-04-08 10.1149/1945-7111/ab836b Engineering Solid Electrolyte Interphase Composition by Assessing Decomposition Pathways of Fluorinated Organic Solvents in Lithium Metal Batteries 2020-04-14 10.1038/s41524-020-0307-8 Machine-learning structural and electronic properties of metal halide perovskites using a hierarchical convolutional neural network 2020-04-15 10.1007/s42452-020-2488-7 Elucidating the stability of ligand-protected Au nanoclusters under electrochemical reduction of CO2 2020-04-15 10.1016/j.jcat.2020.02.003 Understanding the interplay of bifunctional and electronic effects: Microkinetic modeling of the CO electro-oxidation reaction 2020-04-15 10.1016/j.mcat.2020.110845 A theoretical study of the influence of gold nanoplatelets sites in C-C coupling reaction 2020-04-15 10.1515/zpch-2019-1476 Synthesis and Doping Strategies to Improve the Photoelectrochemical Water Oxidation Activity of BiVO4 Photoanodes 2020-04-16 10.1021/acs.jpclett.0c00786 Electron Binding in a Superatom with a Repulsive Coulomb Barrier: The Case of [Ag-44(SC6H3F2)(30)](4-) in the Gas Phase 2020-04-20 10.1103/PhysRevMaterials.4.045002 Density functional simulations of pressurized Mg-Zn and Al-Zn alloys 2020-04-21 10.1039/c9sc05768d The role of an interface in stabilizing reaction intermediates for hydrogen evolution in aprotic electrolytes 2020-04-25 10.1016/j.jallcom.2019.153343 Thermodynamics of the iron-nitrogen system with vacancies. From first principles to applications 2020-04-30 10.1016/j.apsusc.2020.145470 First principle studies of oxygen reduction reaction on N doped graphene: Impact of N concentration, position and co-adsorbate effect 2020-04-30 10.1021/acs.jpcc.0c01286 Engineering Atomically Sharp Potential Steps and Band Alignment at Solid Interfaces using 2D Janus Layers 2020-05-05 10.1073/pnas.1917448117 Large H2O solubility in dense silica and its implications for the interiors of water-rich planets 2020-05-06 10.1038/s41467-020-16062-6 Solvent-mediated assembly of atom-precise gold-silver nanoclusters to semiconducting one-dimensional materials 2020-05-07 10.1039/d0cp00158a The mechanism of Mg2+ conduction in ammine magnesium borohydride promoted by a neutral molecule 2020-05-13 10.1021/acs.nanolett.9b04789 Microscopic Theory of Plasmons in Substrate-Supported Borophene 2020-05-15 10.1016/j.mcat.2020.110855 Ab initio studies of propene oxide formation at gold nanocatalysts supported on anatase-TiO2 2020-05-15 10.1016/j.susc.2020.121588 A molecular level insight into adsorption of beta-lactam antibiotics on vermiculite surface 2020-05-15 10.1038/s41563-020-0622-y Bandgap tuning of two-dimensional materials by sphere diameter engineering 2020-05-15 10.1038/s41586-020-2241-9 Engineering covalently bonded 2D layered materials by self-intercalation 2020-05-26 10.1021/acsaelm.0c00179 Electronic Structure and Trap States of Two-Dimensional Ruddlesden-Popper Perovskites with the Relaxed Goldschmidt Tolerance Factor 2020-05-28 10.1021/acs.jpcc.0c01635 Efficient Ab Initio Modeling of Dielectric Screening in 2D van der Waals Materials: Including Phonons, Substrates, and Doping 2020-06-01 10.1103/PhysRevMaterials.4.065601 Atomic structure and origin of chirality of DNA-stabilized silver clusters 2020-06-03 10.1021/acs.cgd.0c00281 Modeling of Grazing-Incidence X-ray Diffraction from Naphthyl End-Capped Oligothiophenes in Organic Field-Effect Transistors 2020-06-07 10.1039/d0cp01239d Tuning the binding energy of excitons in the MoS2 monolayer by molecular functionalization and defective engineering 2020-06-07 10.1039/d0ra03126g Boron and pyridinic nitrogen-doped graphene as potential catalysts for rechargeable non-aqueous sodium-air batteries 2020-06-09 10.1021/acs.jctc.0c00214 Validation of Pseudopotential Calculations for the Electronic Band Gap of Solids 2020-06-09 10.1021/acs.jctc.9b01167 Enabling Large-Scale Condensed-Phase Hybrid Density Functional Theory Based Ab Initio Molecular Dynamics. 1. Theory, Algorithm, and Performance 2020-06-11 10.1021/acs.jpca.0c01512 Monte Carlo Simulations of Au-38(SCH3)(24) Nanocluster Using Distance-Based Machine Learning Methods 2020-06-11 10.1021/acs.jpcc.0c02889 Charge Transfer Plasmons in Dimeric Electron Clusters 2020-06-11 10.1103/PhysRevB.101.235132 Plasmon excitations in chemically heterogeneous nanoarrays 2020-06-15 10.1007/s11468-019-01062-0 Strong Collectivity of Optical Transitions in Lead Halide Perovskite Quantum Dots 2020-06-15 10.1007/s42452-020-2788-y Experimental and computational investigation of PVDF-BaTiO3 interface for impact sensing and energy harvesting applications 2020-06-15 10.1038/s41467-020-16529-6 A library of ab initio Raman spectra for automated identification of 2D materials 2020-06-15 10.1038/s41563-020-0631-x Atomically thin half-van der Waals metals enabled by confinement heteroepitaxy 2020-06-15 10.3938/jkps.76.1071 Effects of Oxygen Adsorption on Morphological Evolution of Epitaxial Ag Island Films Grown on Si 2020-06-22 10.1103/PhysRevB.101.245433 Three-particle states and brightening of intervalley excitons in a doped MoS2 monolayer 2020-06-25 10.1021/acs.jpcc.0c04512 Thermodynamic Adsorption States of TNT and DNAN on Corundum and Hematite 2020-07-02 10.1021/acs.jpcc.0c02953 Supramolecular Ordering and Reactions of a Chlorophenyl Porphyrin on Ag(111) 2020-07-06 10.1103/PhysRevB.102.024407 Shortcomings of meta-GGA functionals when describing magnetism 2020-07-09 10.1021/acs.jpcc.0c03383 Fullerene-Mediated Aggregation of M-25(SR)(18)(-) (M = Ag, Au) Nanoclusters 2020-07-10 10.1016/j.ijhydene.2020.04.242 HER activity of MxNi1-x (M = Cr, Mo and W; x approximate to 0.2) alloy in acid and alkaline media 2020-07-14 10.1063/5.0006074 Recent developments in the PySCF program package 2020-07-15 10.1002/admi.201902090 Metastability at Defective Metal Oxide Interfaces and Nanoconfined Structures 2020-07-15 10.1002/qua.26247 Calculation of core-level electron spectra of ionic liquids 2020-07-15 10.1016/j.ultramic.2020.113012 Conductivity models for electron energy loss spectroscopy of graphene in a scanning transmission electron microscope with high energy resolution 2020-07-15 10.1038/s41563-020-0655-2 Universal chemomechanical design rules for solid-ion conductors to prevent dendrite formation in lithium metal batteries 2020-07-17 10.1103/RevModPhys.92.035001 Advances and challenges in single-molecule electron transport 2020-07-20 10.1364/OE.397167 Low-loss hyperbolic dispersion and anisotropic plasmonic excitation in nodal-line semimetallic yttrium nitride 2020-07-22 10.1088/1361-648X/ab7f6a Generation of low-symmetry perovskite structures for ab initio computation 2020-07-23 10.1021/acs.jpcc.0c03837 Promoting Z-to-E Thermal Isomerization of Azobenzene Derivatives by Noncovalent Interaction with Phosphorene: Theoretical Prediction and Experimental Study 2020-07-25 10.1039/d0cc03334k A topological isomer of the Au-25(SR)(18)(-)nanocluster 2020-07-28 10.1063/5.0015571 Gaussian representation for image recognition and reinforcement learning of atomistic structure 2020-07-29 10.1021/jacs.0c02399 Metal-Assisted and Solvent-Mediated Synthesis of Two-Dimensional Triazine Structures on Gram Scale 2020-07-29 10.1103/PhysRevApplied.14.014091 Spin Orientation and Magnetostriction of Tb1-xDyxFe2 from First Principles 2020-08-05 10.1016/j.matt.2020.04.016 Crystal Site Feature Embedding Enables Exploration of Large Chemical Spaces 2020-08-05 10.1088/1361-648X/ab8664 First principles Heisenberg models of 2D magnetic materials: the importance of quantum corrections to the exchange coupling 2020-08-06 10.1021/acs.jpcc.0c01151 Hydrogen Evolution Reaction over Single-Atom Catalysts Based on Metal Adatoms at Defected Graphene and h-BN 2020-08-07 10.1039/d0cp02251a Biofouling affects the redox kinetics of outer and inner sphere probes on carbon surfaces drastically differently - implications to biosensing 2020-08-07 10.1063/5.0015872 An accurate machine-learning calculator for optimization of Li-ion battery cathodes 2020-08-10 10.1002/anie.202004016 Ambient Bistable Single Dipole Switching in a Molecular Monolayer 2020-08-11 10.1021/acs.jctc.0c00137 Projector Augmented Wave Method with Gauss-Type Atomic Orbital Basis: Implementation of the Generalized Gradient Approximation and Mesh Grid Quadrature 2020-08-11 10.1021/acs.jctc.9b01251 Reliable Computational Prediction of the Supramolecular Ordering of Complex Molecules under Electrochemical Conditions 2020-08-12 10.1103/PhysRevB.102.075427 Atomistic structure learning algorithm with surrogate energy model relaxation 2020-08-13 10.1021/acs.jpca.0c03992 Computational Comparative Analysis of Small Atomically Precise Copper Clusters 2020-08-14 10.1103/PhysRevMaterials.4.083805 Uncertainty quantification in first-principles predictions of phonon properties and lattice thermal conductivity 2020-08-15 10.1002/pssr.202000182 Air Stable, High-Efficiency, Pt-Based Halide Perovskite Solar Cells with Long Carrier Lifetimes 2020-08-15 10.1016/j.actamat.2020.05.050 Precipitate formation in aluminium alloys: Multi-scale modelling approach 2020-08-15 10.1038/s41565-020-0717-2 High oscillator strength interlayer excitons in two-dimensional heterostructures for mid-infrared photodetection 2020-08-17 10.1103/PhysRevMaterials.4.084003 Naphthylene-gamma: 1D and 2D carbon allotropes based on the fusion of phenyl- and naphthyl-like groups 2020-08-18 10.1021/acsomega.0c02679 Density Functional Theory Study of NiFeCo Trinary Oxy-Hydroxides for an Efficient and Stable Oxygen Evolution Reaction Catalyst 2020-08-25 10.1021/acsnano.0c03004 Hot-Carrier Generation in Plasmonic Nanoparticles: The Importance of Atomic Structure 2020-08-26 10.1002/adfm.202002122 A Library of Late Transition Metal Alloy Dielectric Functions for Nanophotonic Applications 2020-09-03 10.1021/acs.jpcc.0c07051 Three Distinct Torsion Profiles of Electronic Transmission through Linear Carbon Wires 2020-09-03 10.1021/acs.jpclett.0c01727 Simultaneous Suppression of pi- and sigma-Transmission in pi-Conjugated Molecules 2020-09-05 10.1002/jcc.26370 SuSMoST: Surface Science Modeling and Simulation Toolkit 2020-09-07 10.1039/d0cy00413h First-principles insight into CO hindered agglomeration of Rh and Pt single atoms onm-ZrO2 2020-09-09 10.1103/PhysRevLett.125.116802 Integrated Plasmonics: Broadband Dirac Plasmons in Borophene 2020-09-14 10.1039/d0dt00406e Rapid fabrication of oxygen defective alpha-Fe2O3(110) for enhanced photoelectrochemical activities 2020-09-14 10.1063/5.0021237 Anisotropic properties of monolayer 2D materials: An overview from the C2DB database 2020-09-15 10.1016/j.cpc.2020.107204 The dynamic parallel distribution algorithm for hybrid density-functional calculations in HONPAS package 2020-09-17 10.1021/acs.jpcc.0c04367 Realistic Cyclic Voltammograms from Ab Initio Simulations in Alkaline and Acidic Electrolytes 2020-09-23 10.1088/1361-648X/ab94f2 Structure prediction of surface reconstructions by deep reinforcement learning 2020-09-25 10.1103/PhysRevB.102.115307 Strain-engineered widely tunable perfect absorption angle in black phosphorus from first principles 2020-09-28 10.1063/5.0020555 Effective mass path integral simulations of quasiparticles in condensed phases 2020-09-30 10.1088/1361-648X/ab99ea LCAO-TDDFT-k-omega: spectroscopy in the optical limit 2020-10-01 10.1016/j.matchemphys.2020.123407 Application of the training of density functional theory potentials within machine learning to adsorptions and reaction paths on Platinum surfaces 2020-10-06 10.1021/acs.langmuir.0c01652 Not All Fluorination Is the Same: Unique Effects of Fluorine Functionalization of Ethylene Carbonate for Tuning Solid-Electrolyte Interphase in Li Metal Batteries 2020-10-07 10.1063/5.0027641 Surface effects on temperature-driven spin crossover in Fe(phen)(2)(NCS)(2) 2020-10-15 10.1002/chem.202000659 pH Tuning of Water-Soluble Arylazopyrazole Photoswitches 2020-10-15 10.1007/s11468-020-01163-1 Electric Near-field Modulations of Charged Deoxyribonucleic Acid Nucleobases 2020-10-15 10.1016/j.jcat.2020.07.033 Nanometer-thick films of antimony oxide nanoparticles grafted on defective graphenes as heterogeneous base catalysts for coupling reactions 2020-10-21 10.1038/s41524-020-00428-x High-throughput computational screening for two-dimensional magnetic materials based on experimental databases of three-dimensional compounds 2020-10-21 10.1039/d0cp03667f Design criteria for the competing chlorine and oxygen evolution reactions: avoid the OCl adsorbate to enhance chlorine selectivity 2020-10-22 10.1021/acs.jpcc.0c06716 How Do the Coadsorbates Affect the Oxygen Reduction Reaction Activity of Undoped and N-Doped Graphene Nanoribbon Edges? A Density Functional Theory Study 2020-10-22 10.1021/acs.jpcc.0c07140 Dithiol-Induced Contraction in Ag-14 Clusters and Its Manifestation in Electronic Structures 2020-10-28 10.1039/d0cp02574g Tripentaphenes: two-dimensional acepentalene-based nanocarbon allotropes 2020-10-29 10.1021/acs.jpcc.0c05161 Electronic Current Mapping of Transport through Defective Zigzag Graphene Nanoribbons 2020-11-01 10.1002/qua.26343 Multiscale electrostatic embedding simulations for modeling structure and dynamics of molecules in solution: A tutorial review 2020-11-01 10.1063/5.0028002 Accuracy of XAS theory for unraveling structural changes of adsorbates: CO on Ni(100) 2020-11-01 10.1080/00223131.2020.1779143 Behavior of hydrogen at Fe/W interface: a first-principle calculation study 2020-11-01 10.1364/JOSAB.399078 Theory and numerical aspects of fundamental light-matter interactions 2020-11-03 10.1073/pnas.2001923117 Design principles for self-forming interfaces enabling stable lithium-metal anodes 2020-11-05 10.1021/acs.jpca.0c05739 Carbon Bond Breaking under Ar+-Ion Irradiation in Dependence on sp Hybridization: Car-Parrinello, Ehrenfest, and Classical Dynamics Study 2020-11-06 10.1021/acscatal.0c03686 PdSO4 Surfaces in Methane Oxidation Catalysts: DFT Studies on Stability, Reactivity, and Water Inhibition 2020-11-07 10.1039/d0cp04216a Localized surface plasmon resonances of a metal nanoring 2020-11-10 10.1021/acs.jctc.0c00597 Variational Density Functional Calculations of Excited States via Direct Optimization 2020-11-10 10.1021/acs.jctc.0c00729 Tackling Solvent Effects by Coupling Electronic and Molecular Density Functional Theory 2020-11-10 10.1021/acs.jctc.0c00842 Adventures in DFTB: Toward an Automatic Parameterization Scheme 2020-11-11 10.1021/jacs.0c08962 Anisotropic Strain Tuning of L1(0) Ternary Nanoparticles for Oxygen Reduction 2020-11-11 10.1103/PhysRevB.102.195118 Real-space orthogonal projector-augmented-wave method 2020-11-12 10.1021/acs.jpcc.0c07004 Atomistic Insight into Cation Effects on Binding Energies in Cu-Catalyzed Carbon Dioxide Reduction 2020-11-13 10.1021/acsenergylett.0c01815 Beyond Transition Metal Oxide Cathodes for Electric Aviation: The Case of Rechargeable CFx 2020-11-13 10.1103/PhysRevMaterials.4.114006 Proximity-induced magnetization in graphene: Towards efficient spin gating 2020-11-15 10.1007/s10762-019-00634-9 Vibrational Response of Felodipine in the THz Domain: Optical and Neutron Spectroscopy Versus Plane-Wave DFT Modeling 2020-11-15 10.1007/s11244-020-01290-3 Binding Behavior of Carbonmonoxide to Gold Atoms on Ag(001) 2020-11-15 10.1016/j.cpc.2020.107372 PWDFT.jl: A Julia package for electronic structure calculation using density functional theory and plane wave basis 2020-11-15 10.1038/s41566-020-0689-7 Tunable free-electron X-ray radiation from van der Waals materials 2020-11-21 10.1039/d0py00973c Semifluorinated, kinked polyarylenes via direct arylation polycondensation 2020-11-21 10.1063/5.0021821 Smart local orbitals for efficient calculations within density functional theory and beyond 2020-11-24 10.1103/PhysRevB.102.184428 Critical assessment of Co-Cu phase diagram from first-principles calculations 2020-11-30 10.1103/PhysRevB.102.195433 Analysis of the plasmonic excitations in assemblies of three-dimensional electron clusters 2020-12-01 10.1039/d0fd00064g Variational calculations of excited states via direct optimization of the orbitals in DFT 2020-12-01 10.1103/PhysRevB.102.245301 Origin of weak Fermi level pinning at the graphene/silicon interface 2020-12-01 10.1103/PhysRevMaterials.4.121601 NH3 on anatase TiO2(101): Diffusion mechanisms and the effect of intermolecular repulsion 2020-12-09 10.1021/acsami.0c14696 Engineering Dielectric Screening for Potential-well Arrays of Excitons in 2D Materials 2020-12-14 10.1039/d0cp05229a Polarization consistent basis sets using the projector augmented wave method: a renovation brought by PAW into Gaussian basis sets 2020-12-14 10.1039/d0nr07366k Dynamics of weak interactions in the ligand layer of meta-mercaptobenzoic acid protected gold nanoclusters Au-68(m-MBA)(32) and Au-144(m-MBA)(40) 2020-12-15 10.1016/j.cocom.2020.e00491 ATiO(3)/TiO (A=Pb, Sn) superlattice: Bridging ferroelectricity and conductivity 2020-12-16 10.1021/jacs.0c09000 Direct Evidence of Photoinduced Charge Transport Mechanism in 2D Conductive Metal Organic Frameworks 2020-12-16 10.1103/PhysRevMaterials.4.124004 Monomeric two-dimensionally ordered WO3 clusters on anatase TiO2 (101) 2020-12-21 10.1063/5.0033778 Machine learning with bond information for local structure optimizations in surface science 2021-01-06 10.1103/PhysRevMaterials.5.015401 Electron microscopy and spectroscopic study of structural changes, electronic properties, and conductivity in annealed LixCoO2 2021-01-11 10.1002/anie.202011780 A Homoleptic Alkynyl-Ligated [Au13Ag16L24](3-) Cluster as a Catalytically Active Eight-Electron Superatom 2021-01-13 10.1021/acs.nanolett.0c04260 Voltage-Induced Single-Molecule Junction Planarization 2021-01-14 10.1021/acs.jpcc.0c08597 Trends in Carbon, Oxygen, and Nitrogen Core in the X-ray Absorption Spectroscopy of Carbon Nanomaterials: A Guide for the Perplexed 2021-01-15 10.1021/acscatal.0c04733 Engendering Unprecedented Activation of Oxygen Evolution via Rational Pinning of Ni Oxidation State in Prototypical Perovskite: Close Juxtaposition of Synthetic Approach and Theoretical Conception 2021-01-15 10.1088/2515-7655/abc96f An accurate machine learning calculator for the lithium-graphite system 2021-01-25 10.1021/acsaem.0c02798 Influence of the Artificial Nanostructure on the LiF Formation at the Solid-Electrolyte Interphase of Carbon-Based Anodes 2021-01-29 10.1038/s41524-020-00480-7 Towards fully automatized GW band structure calculations: What we can learn from 60.000 self-energy evaluations 2021-02-03 10.1002/cphc.202000771 Field Effect and Local Gating in Nitrogen-Terminated Nanopores (NtNP) and Nanogaps (NtNG) in Graphene 2021-02-10 10.1103/PhysRevMaterials.5.023801 Tight-binding bond parameters for dimers across the periodic table from density-functional theory 2021-02-15 10.1016/j.cpc.2020.107676 NanoNET : An extendable Python framework for semi-empirical tight-binding models 2021-02-15 10.3390/molecules26040955 Density Functional Theory Study of Optical and Electronic Properties of (TiO2)(n=5,8,68) Clusters for Application in Solar Cells 2021-02-17 10.1103/PhysRevB.103.054201 Compositional ordering in relaxor ferroelectric Pb(BB ')O-3: Nearest neighbor approach 2021-03-01 10.1016/j.jphotochem.2020.113103 Fluorescence quenching mechanism of 9-hydroxyphenal-1-one carbon quantum dots by Cu2+ ions: An experimental and computational investigation 2021-03-14 10.1063/5.0042302 Electronic and optical properties of fluorinated graphene within many-body Green's function framework 2021-03-15 10.1002/lpor.202000346 Ultrafast Carrier and Lattice Dynamics in Plasmonic Nanocrystalline Copper Sulfide Films 2021-03-15 10.1016/j.cocom.2020.e00524 The effect of non-centrosymmetricity on optical and electronic properties of BaHfO3 perovskite 2021-03-17 10.1088/1361-648X/abc407 Improved band gaps and structural properties from Wannier-Fermi-Lowdin self-interaction corrections for periodic systems 2021-03-23 10.1088/1361-648X/abd5f6 Electronic structure and transport properties of coupled CdS/ZnSe quantum dots 2021-04-16 10.1088/1361-6528/abdb64 Electronic excitation in graphene under single-particle irradiation 2021-05-01 10.35848/1347-4065/abd6de Temperature dependence of resistivity increases induced by thiols adsorption in gold nanosheets gpaw-24.1.0/doc/documentation/gpu.rst000066400000000000000000000042611454550013000174770ustar00rootroot00000000000000.. _gpu: GPU === Ground-state calculations on a GPU is an new feature with some limitations: * only PW-mode * it has only been implemented in the new GPAW code * only parallelization over **k**-points See :git:`gpaw/test/gpu/test_pw.py` for an example. .. tip:: >>> import numpy as np >>> from gpaw.gpu import cupy as cp >>> a_cpu = np.zeros(...) >>> a_gpu = cp.asarray(a_cpu) # from CPU to GPU >>> b_cpu = a_gpu.get() # from GPU to CPU The gpaw.gpu module =================== .. module:: gpaw.gpu .. data:: cupy :mod:`cupy` module (or :mod:`gpaw.gpu.cpupy` if :mod:`cupy` is not available) .. data:: cupyx ``cupyx`` module (or :mod:`gpaw.gpu.cpupyx` if ``cupyx`` is not available) .. autodata:: cupy_is_fake .. autodata:: is_hip .. autofunction:: as_np .. autofunction:: as_xp .. autofunction:: cupy_eigh Fake cupy library ================= .. module:: gpaw.gpu.cpupy .. module:: gpaw.gpu.cpupyx The implementation uses cupy_. In the code, we don't do ``import cupy as cp``. Instead we use ``from gpaw.gpu import cupy as cp``. This allows us to use a fake ``cupy`` implementation so that we can run GPAW's ``cupy`` code without having a physical GPU. To enable the fake ``cupy`` module, do:: GPAW_CPUPY=1 python ... This allows users without a GPU to find out if their code interferes with the GPU implementation, simply by running the tests. .. _cupy: https://cupy.dev/ CuPy enabled container objects ============================== The following objects: * :class:`~gpaw.core.UGArray` * :class:`~gpaw.core.PWArray` * :class:`~gpaw.core.atom_arrays.AtomArrays` * :class:`~gpaw.core.matrix.Matrix` can have their data (``.data`` attribute) stored in a :class:`cupy.ndarray` array instead of, as normal, a :class:`numpy.ndarray` array. In additions, these objects now have an ``xp`` attribute that can be :mod:`numpy` or :mod:`cupy`. Also, the :class:`~gpaw.core.atom_centered_functions.AtomCenteredFunctions` object can do its operations on the GPU. GPU-aware MPI ============= Use a GPU-aware MPI implementation and set the :envvar:`GPAW_GPU` when compiling GPAW's C-extension. .. envvar:: GPAW_GPU Add support for passing :class:`cupy.ndarray` objects to MPI gpaw-24.1.0/doc/documentation/grids.rst000066400000000000000000000025331454550013000200140ustar00rootroot00000000000000.. _grids: ===== Grids ===== Assume that we have an ``Atoms`` object contained in a cubic unit cell of sidelength ``L``:: L = 2.0 atoms = Atoms(cell=(L, L, L), pbc=True) and we use a calculator with a grid spacing of ``h=0.25`` Å or ``gpts=(8, 8, 8)``. Since we have periodic boundary conditions, the *x*-axis will look like this (the *y* and *z*-axes look the same):: 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 0 -+---------------+---------------+---------------+- -L 0 L 2*L Wave functions are represented on 8x8x8 grids, where the grid points are numbered from 0 to 7. If we use zero boundary conditions (``pbc=False``), then the *x*-axis will look like this:: 0 1 2 3 4 5 6 +---------------+ 0 L Here the wave functions are exactly zero at *x*\ =0 Å and *x*\ =\ *L*, and only the non-zero values are stored in 7x7x7 grids (grid points numbered from 0 to 6). Update this XXX how about padding? An example: >>> L = 2.0 >>> atoms = Atoms(..., ... cell=(L, L, L), ... pbc=False) >>> calc = GPAW(..., gpts=(8, 8, 8)) >>> atoms.SetCalculator(calc) >>> e = atoms.get_potential_energy() >>> wf = calc.get_pseudo_wave_function(band=0) >>> wf.shape (7, 7, 7) >>> calc.GetGridSpacings() array([ 0.25, 0.25]) gpaw-24.1.0/doc/documentation/gw_theory/000077500000000000000000000000001454550013000201565ustar00rootroot00000000000000gpaw-24.1.0/doc/documentation/gw_theory/gw_theory.rst000066400000000000000000000233501454550013000227220ustar00rootroot00000000000000.. _gw_theory: ======================================================= Quasi-particle spectrum in the GW approximation: theory ======================================================= The foundations of the GW method are described in Refs. \ [#Hedin1965]_ and \ [#Hybertsen1986]_. The implementation in GPAW is documented in Ref. \ [#Hueser2013]_. For examples, see :ref:`gw tutorial`. Introduction ============ Quasi-particle energies are obtained by replacing the DFT exchange- correlation contributions by the GW self energy and exact exchange: .. math:: E_{n \mathbf{k}} = \epsilon_{n \mathbf{k}} + Z_{n \mathbf{k}} \cdot \text{Re} \left(\Sigma_{n \mathbf{k}}^{\vphantom{\text{XC}}} + \epsilon^{\text{EXX}}_{n \mathbf{k}} - V^{\text{XC}}_{n \mathbf{k}} \right) where `n` and `\mathbf{k}` are band and k-point indices, respectively. The different contributions are: `\epsilon_{n \mathbf{k}}`: Kohn-Sham eigenvalues taken from a groundstate calculation `V^{\text{XC}}_{n \mathbf{k}}`: DFT exchange-correlation contributions extracted from a groundstate calculation `\epsilon^{\text{EXX}}_{n \mathbf{k}}`: exact exchange contributions The renormalization factor is given by: .. math:: Z_{n \mathbf{k}} = \left(1 - \text{Re}\left< n \mathbf{k}\middle| \frac{\partial}{\partial\omega} \Sigma(\omega)_{|\omega = \epsilon_{n \mathbf{k}}}\middle| n \mathbf{k}\right>\right)^{-1} `\left| n \mathbf{k} \right>` denotes the Kohn-Sham wavefunction which is taken from the groundstate calculation. The self energy is expanded in plane waves, denoted by `\mathbf{G}` and `\mathbf{G}'`: .. math:: \Sigma_{n \mathbf{k}}(\omega = \epsilon_{n \mathbf{k}}) =& \left\\ =& \frac{1}{\Omega} \sum\limits_{\mathbf{G} \mathbf{G}'} \sum\limits_{\vphantom{\mathbf{G}}\mathbf{q}}^{1. \text{BZ}} \sum\limits_{\vphantom{\mathbf{G}}m}^{\text{all}} \frac{i}{2 \pi} \int\limits_{-\infty}^\infty\!d\omega'\, W_{\mathbf{G} \mathbf{G}'}(\mathbf{q}, \omega') \, \cdot \\ & \frac{\rho^{n \mathbf{k}}_{m \mathbf{k} - \mathbf{q}}(\mathbf{G}) \rho^{n \mathbf{k}*}_{m \mathbf{k} - \mathbf{q}}(\mathbf{G}')}{\omega + \omega' - \epsilon_{m \, \mathbf{k} - \mathbf{q}} + i \eta \, \text{sgn}(\epsilon_{m \, \mathbf{k} - \mathbf{q}} - \mu)} where `m` runs both over occupied and unoccupied bands and `\mathbf{q}` covers the differences between all k-points in the first Brillouin zone. `\Omega = \Omega_\text{cell} \cdot N_\mathbf{k}` is the volume and `\eta` an (artificial) broadening parameter. `\mu` is the chemical potential. The screened potential is calculated from the (time-ordered) dielectric matrix in the Random Phase Approximation: .. math:: W_{\mathbf{G} \mathbf{G}'}(\mathbf{q}, \omega) = \frac{4 \pi}{|\mathbf{q} + \mathbf{G}|} \left( (\varepsilon^{\text{RPA}-1}_{\mathbf{G} \mathbf{G}'}(\mathbf{q}, \omega) - \delta^{\vphantom{\text{RPA}}}_{\mathbf{G} \mathbf{G}'} \right) \frac{1}{|\mathbf{q} + \mathbf{G}'|} Refer to :ref:`df_theory` for details on how the response function and the pair density matrix elements `\rho^{n \mathbf{k}}_{m \mathbf{k} - \mathbf{q}}(\mathbf{G}) \equiv \left` including the PAW corrections are calculated. Coulomb divergence ================== The head of the screened potential (`\mathbf{G} = \mathbf{G}' = 0`) diverges as `1/q^2` for `\mathbf{q} \rightarrow 0`. This divergence, however, is removed for an infinitesimally fine k-point sampling, as `\sum\limits_{\mathbf{q}} \rightarrow \frac{\Omega}{(2\pi)^3} \int\!d^3 \mathbf{q} \propto q^2`. Therefore, the `\mathbf{q} = 0` term can be evaluated analytically, which yields: .. math:: W_{\mathbf{00}}(\mathbf{q}=0, \omega) = \frac{2\Omega}{\pi} \left(\frac{6\pi^2}{\Omega}\right)^{1/3} \varepsilon^{-1}_{\mathbf{00}}(\mathbf{q} \rightarrow 0, \omega) for the head and similarly .. math:: W_{\mathbf{G0}}(\mathbf{q}=0, \omega) = \frac{1}{|\mathbf{G}|} \frac{\Omega}{\pi} \left(\frac{6\pi^2}{\Omega}\right)^{2/3} \varepsilon^{-1}_{\mathbf{G0}}(\mathbf{q} \rightarrow 0, \omega) for the wings of the screened potential. Here, the dielectric function is used in the optical limit. This is only relevant for the terms with `n = m`, as otherwise the pair density matrix elements vanish: `\rho^{n \mathbf{k}}_{m \mathbf{k}} = 0` for `n \neq m`. Frequency integration ===================== `\rightarrow` ``domega0, omega2`` The frequency integration is performed numerically on a user-defined grid for positive values only. This is done by rewriting the integral as: .. math:: & \int\limits_{-\infty}^\infty\!d\omega'\, \frac{W(\omega')}{\omega + \omega' - \epsilon_{m \, \mathbf{k} - \mathbf{q}} \pm i \eta}\\ =& \int\limits_{0}^\infty\!d\omega'\, W(\omega') \left(\frac{1}{\omega + \omega' - \epsilon_{m \, \mathbf{k} - \mathbf{q}} \pm i \eta} + \frac{1}{\omega - \omega' - \epsilon_{m \, \mathbf{k} - \mathbf{q}} \pm i \eta}\right) with the use of `W(\omega') = W(-\omega')`. The frequency grid is the same as that used for the dielectric function. Read more about it here: :ref:`frequency grid`. .. _gw_theory_ppa: Plasmon Pole Approximation ========================== `\rightarrow` ``ppa = True`` Within the plasmon pole approximation (PPA), the dielectric function is modelled as a single peak at the main plasmon frequency `\tilde{\omega}_{\mathbf{G}\mathbf{G}'}(\mathbf{q})`: .. math:: \varepsilon^{-1}_{\mathbf{G}\mathbf{G}'}(\mathbf{q}, \omega) = R _{\mathbf{G}\mathbf{G}'}(\mathbf{q}) \left(\frac{1}{\omega - \tilde{\omega}_{\mathbf{G}\mathbf{G}'}(\mathbf{q}) + i\eta} - \frac{1}{\omega + \tilde{\omega}_{\mathbf{G}\mathbf{G}'}(\mathbf{q}) - i\eta}\right) The two parameters are found by fitting this expression to the full dielectric function for the values `\omega = 0` and `\omega = i E_0`: .. math:: \varepsilon^{-1}_{\mathbf{G}\mathbf{G}'}(\mathbf{q}, 0) =& \frac{-2 R}{\tilde{\omega}} \hspace{0.5cm} \varepsilon^{-1}_{\mathbf{G}\mathbf{G}'}(\mathbf{q}, iE_0) = \frac{-2 R \tilde{\omega}}{E_0^2 + \tilde{\omega}^2}\\ \Rightarrow \tilde{\omega}_{\mathbf{G}\mathbf{G}'}(\mathbf{q}) =& E_0 \sqrt{\frac{\varepsilon^{-1}_{\mathbf{G}\mathbf{G}'}(\mathbf{q}, iE_0)} {\varepsilon^{-1}_{\mathbf{G}\mathbf{G}'}(\mathbf{q}, 0) - \varepsilon^{-1}_{\mathbf{G}\mathbf{G}'}(\mathbf{q}, iE_0)}}\\ R _{\mathbf{G}\mathbf{G}'}(\mathbf{q}) =& -\frac {\tilde{\omega}_{\mathbf{G}\mathbf{G}'}(\mathbf{q})}{2} \varepsilon^{-1}_{\mathbf{G}\mathbf{G}'}(\mathbf{q}, 0) In this way, the frequency integration for the self energy can be evaluated analytically. The fitting value `E_0` has to be chosen carefully. By default, it is 1 H. Hilbert transform ================= The self-energy is evaluated using the Hilbert transform technique described in \ [#Kresse2006]_ . Parallelization =============== `\rightarrow` ``nblocks = int`` By default, the calculation is fully parallelized over k-points and bands. If more memory is required for storing the response function in the plane wave basis, additional block parallelization is possible. This distributes the matrix amongst the number of CPUs specified by ``nblocks``, resulting in a lower total memory requirement of the node. ``nblocks`` needs to be an integer divisor of the number of requested CPUs. I/O === All necessary informations of the system are read from ``calc = 'filename.gpw'`` which must contain the wavefunctions. This is done by performing ``calc.write('groundstate.gpw', 'all')`` after the groundstate calculation. GW supports spin-paired planewave calculations. The exchange-correlation contribution to the Kohn-Sham eigenvalues is stored in ``'filename.vxc.npy'`` and the exact-exchange eigenvalues are stored in ``'filename.exx.npy'``. The resulting output is written to ``'filename_results.pckl'`` and a summary of input as well as a output parameters are given in the human-readable ``'filename.txt'`` file. Information about the calculation of the screened coulomb interaction is printed in ``'filename.w.txt'``. Convergence =========== The results must be converged with respect to: - the number of k-points from the groundstate calculation A much finer k-point sampling might be required for converging the GW results than for the DFT bandstructure. - the number of bands included in the calculation of the self energy ``nbands`` - the planewave energy cutoff ``ecut`` ``ecut`` and ``nbands`` do not converge independently. As a rough estimation, ``ecut`` should be around the energy of the highest included band. If ``nbands`` is not specified it will be set equal to the amount of plane waves determined by ``ecut``. - the number of frequency points ``domega0, omega2`` The grid needs to resolve the features of the DFT spectrum. - the broadening ``eta`` This parameter is only used for the response function and in the plasmon pole approximation. Otherwise, it is automatically set to `\eta = 0.1`. Parameters ========== For input parameters, see :ref:`gw tutorial`. References ========== .. [#Hedin1965] L. Hedin, "New Method for Calculating the One-Particle Green's Function with Application to the Electron-Gas Problem", *Phys. Rev.* **139**, A796 (1965). .. [#Hybertsen1986] M.S. Hybertsen and S.G. Louie, "Electron correlation in semiconductors and insulators: Band gaps and quasiparticle energies", *Phys. Rev. B* **34**, 5390 (1986). .. [#Hueser2013] F. Hüser, T. Olsen, and K. S. Thygesen, "Quasiparticle GW calculations for solids, molecules, and two-dimensional materials", *Phys. Rev. B* **87**, 235132 (2013). .. [#Kresse2006] M. Shishkin and G. Kresse, "Implementation and performance of the frequency-dependent GW method within the PAW framework", *Phys. Rev. B* **74**, 035101 (2006). gpaw-24.1.0/doc/documentation/h2.py000066400000000000000000000004671454550013000170410ustar00rootroot00000000000000# creates: h2.txt from ase import Atoms from gpaw import GPAW d = 0.74 a = 6.0 atoms = Atoms('H2', positions=[(0, 0, 0), (0, 0, d)], cell=(a, a, a)) atoms.center() calc = GPAW(mode='fd', nbands=2, txt='h2.txt') atoms.calc = calc print(atoms.get_forces()) gpaw-24.1.0/doc/documentation/hyperfine.rst000066400000000000000000000025741454550013000207020ustar00rootroot00000000000000.. module:: gpaw.hyperfine .. _hyperfine: Isotropic and anisotropic hyperfine coupling paramters ====================================================== .. contents:: Python API and CLI ------------------ Use the :func:`hyperfine_parameters` function or the CLI tool:: $ python3 -m gpaw.hyperfine --help .. autofunction:: hyperfine_parameters For details, see :doi:`Peter E. Blöchl <10.1103/PhysRevB.62.6158>` and :doi:`Oleg V. Yazyev et al. <10.1103/PhysRevB.71.115110>`. The results should be divided by the net mangetic moments averaged over an entire supercell. If one wants to calculate the localized HF effects on an atom or group of atoms in an anti-ferromagnetic material, one needs to divide the HF constants with the average magnetic moment of that atom or group of atoms. As, an anti-ferromagnetic system, overall should have a net magnetic moment of zero. G-factors --------- Here is a list of g-factors (from Wikipedia_): .. csv-table:: :file: g-factors.csv .. _Wikipedia: https://en.wikipedia.org/wiki/Gyromagnetic_ratio Hydrogen 21 cm line ------------------- Here is how to calculate the famous hydrogen spectral line of 21 cm: .. literalinclude:: hyperfine_21.py :end-before: assert The output will be ``23.2 cm``. It's slightly off because the LDA spin-density at the position of the hydrogen nucleus is a bit too low (should be `1/\pi` in atomic units). gpaw-24.1.0/doc/documentation/hyperfine_21.py000066400000000000000000000007501454550013000210160ustar00rootroot00000000000000import numpy as np from ase import Atoms import ase.units as units from gpaw import GPAW, PW from gpaw.hyperfine import hyperfine_parameters h = Atoms('H', magmoms=[1]) h.center(vacuum=3) h.calc = GPAW(mode=PW(400), txt=None) e = h.get_potential_energy() A = hyperfine_parameters(h.calc)[0] * 5.586 a = np.trace(A) / 3 frequency = a * units._e / units._hplanck # Hz wavelength = units._c / frequency # meters print(f'{wavelength * 100:.1f} cm') assert abs(wavelength - 0.232) < 0.0005 gpaw-24.1.0/doc/documentation/hyperfine_agts.py000066400000000000000000000001251454550013000215260ustar00rootroot00000000000000from myqueue.workflow import run def workflow(): run(script='hyperfine_21.py') gpaw-24.1.0/doc/documentation/hyperfine_csv.py000066400000000000000000000005731454550013000213720ustar00rootroot00000000000000# creates: g-factors.csv from math import pi import ase.units as units from gpaw.hyperfine import gyromagnetic_ratios with open('g-factors.csv', 'w') as fd: print('Nucleus, g-factor', file=fd) for symbol, (n, ratio) in gyromagnetic_ratios.items(): g = ratio * 1e6 * 4 * pi * units._mp / units._e print(f'":math:`^{{{n}}}`\\ {symbol}", {g:.3f}', file=fd) gpaw-24.1.0/doc/documentation/introduction_to_paw.rst000066400000000000000000000031371454550013000227770ustar00rootroot00000000000000.. _introduction_to_paw: =================== Introduction to PAW =================== A simple example ================ We look at the `2\sigma`\ * orbital of a CO molecule: |ts| .. |ts| image:: 2sigma.png The main quantity in the PAW method is the pseudo wave-function (blue crosses) defined in all of the simulation box: .. math:: \tilde{\psi}(\mathbf{r}) = \tilde{\psi}(ih, jh, kh), where `h` is the grid spacing and `(i, j, k)` are the indices of the grid points. .. figure:: co_wavefunctions.png In order to get the all-electron wave function, we add and subtract one-center expansions of the all-electron (thick lines) and pseudo wave-functions (thin lines): .. math:: \tilde{\psi}^a(\mathbf{r}) = \sum_i C_i^a \tilde{\phi}_i^a(\mathbf{r}) .. math:: \psi^a(\mathbf{r}) = \sum_i C_i^a \phi_i^a(\mathbf{r}), where `a` is C or O and `\phi_i` and `\tilde{\phi}_i` are atom centered basis functions formed as radial functions on logarithmic radial grid multiplied by spherical harmonics. The expansion coefficients are given as: .. math:: C_i^a = \int d\mathbf{r} \tilde{p}^a_i(\mathbf{r} - \mathbf{R}^a) \tilde{\psi}(\mathbf{r}). Approximations ============== * Frozen core orbitals. * Truncated angular momentum expansion of compensation charges. * Finite number of basis functions and projector functions. More information on PAW ======================= You can find additional information on the :ref:`reports presentations and theses ` page, or by reading the :download:`paw note `. Script ====== .. literalinclude:: co_wavefunctions.py gpaw-24.1.0/doc/documentation/lcao/000077500000000000000000000000001454550013000170655ustar00rootroot00000000000000gpaw-24.1.0/doc/documentation/lcao/basisgeneration.py000066400000000000000000000016431454550013000226200ustar00rootroot00000000000000from gpaw.atom.generator import Generator from gpaw.atom.configurations import parameters from gpaw.atom.basis import BasisMaker symbol = 'Au' args = parameters[symbol] # Dictionary of default setup parameters args['rcut'] = 2.6 # Set cutoff of augmentation sphere generator = Generator(symbol, 'RPBE') generator.N *= 2 # Increase grid resolution generator.run(write_xml=False, **args) bm = BasisMaker(generator, name='special', run=False) # Create double-zeta RPBE basis where p orbital is considered a valence state # (ordinary dzp basis would use a smaller p-type Gaussian for polarization) # The list jvalues indicates which states should be included, and the # ordering corresponds to the valence states in the setup. basis = bm.generate(zetacount=2, polarizationcount=0, energysplit=0.1, jvalues=[0, 1, 2], rcutmax=12.0) basis.write_xml() # Dump to file 'Au.special.dz.basis' gpaw-24.1.0/doc/documentation/lcao/lcao.rst000066400000000000000000000275021454550013000205430ustar00rootroot00000000000000.. _lcao: ========= LCAO Mode ========= .. highlight:: bash GPAW supports an alternative mode of calculation, :dfn:`LCAO mode` [LCAO-article]_, which will use a basis set of atomic orbital-like functions rather than grid-based wave functions. This makes calculations considerably cheaper, although the accuracy will be limited by the quality of the chosen basis. The sections below explain briefly how LCAO mode works, how to generate basis sets and how to use them in calculations. LCAO mode is available for TD-DFT via the :ref:`LCAOTDDFT ` module. Introduction ------------ In the LCAO mode the Kohn-Sham pseudo wave functions `\tilde{\psi}_n(\mathbf r)` are expanded onto a set of atomic-like orbitals `\Phi_{nlm}(\mathbf r)`, in the same spirit as the SIESTA method [Siesta]_ : .. math:: \tilde{\psi}_n(\mathbf r) = \sum_\mu c_{\mu n} \Phi_{\mu}(\mathbf r) The basis functions are constructed as products of numerical radial functions and spherical harmonics .. math:: \Phi_{nlm}(\mathbf{r}) = \Phi_{nlm}(\mathbf r^a + \mathbf R^a) = \varphi_{nl}(r^a) Y_{lm}(\hat{\mathbf{r}}^a) where `\mathbf R^a` is the position of nucleus `a`, and `\mathbf r^a = \mathbf r - \mathbf R^a`. In this approximation the variational parameters are the coefficients `c_{\mu n}` rather than the real space wave function. The eigenvalue problem then becomes .. math:: \sum_\nu H_{\mu\nu} c_{\nu n} = \sum_{\nu} S_{\mu\nu} c_{\nu n} \epsilon_n which can be solved by directly diagonalizating the Hamiltonian in the basis of the atomic orbitals. Some detailed information can be found in the master theses :download:`1 <../askhl_master.pdf>` and :download:`2 <../marco_master.pdf>`. Basis-set generation -------------------- In order to perform an LCAO calculation, a basis-set must be generated for every element in your system. This can be done by using the :command:`gpaw-basis` tool, located in your :file:`{gpaw-directory}/tools` directory. For example, typing:: $ gpaw-basis H Cl will generate the basis-set files :file:`H.dzp.basis` and :file:`Cl.dzp.basis` for hydrogen and chlorine with sensible default parameters. Note that :file:`dzp` stands for ``double zeta polarized`` which is the default basis-set type. The basis-set should be placed in the same directory as the GPAW setups (see :ref:`installation of paw datasets` for details). For a complete list of the parameters do:: $ gpaw-basis --help For technical reasons, the basis set generator always generates the corresponding PAW, even if the latter exists on the user's system. Use the ``--save-setup`` option to save the calculated setup along with the basis set. Running a calculation --------------------- In order to run an LCAO calculation, the ``lcao`` mode and a basis-set should be set in the calculator:: >>> calc = GPAW(mode='lcao', >>> basis='dzp', >>> ...) The calculator can then be used in the usual way. The ``basis`` keyword accepts the same types of values as the ``setups`` keyword, such as ``basis={'H' : 'dzp', 'O' : 'mine', 'C' : 'szp'}``. For larger systems, to get good performance, be sure to enable ScaLAPACK to parallelize the cubic-scaling diagonalization step and distribute many matrices. If possible, install and enable Elpa [Elpa]_ to further save time. See the :ref:`parallel keyword `. Example ------- The following example will relax a water molecule using the LCAO calculator. The ``QuasiNewton`` minimizer will use the forces calculated using the localized basis set. .. literalinclude:: lcao_h2o.py It is possible to switch to the Finite Difference mode and further relax the structure simply by doing:: >>> calc.set(mode='fd') >>> dyn.run(fmax=0.05) More on basis sets ------------------ A minimal basis set consists of one atomic orbital-like function for each valence state of the atom. Extra radial functions can be added to improve the span of the basis; basis sets are called *single-zeta* (sz), *double-zeta* (dz) and so on, depending on the number of such radial functions per valence state. It is normally desirable to add a basis function corresponding to the lowest unoccupied angular momentum quantum number. This is called a *polarization* function. *Double-zeta polarized* basis sets are normally required *and* sufficient to obtain results of reasonable accuracy; they are also the basis type generated by default. A dzp basis set for nitrogen will have 2s and 2p valence states, each with two radial functions, plus a polarization function of type d, for a total of 5 distinct radial functions. Each will be degenerate by `2l+1`, meaning that GPAW will use a total of 13 basis functions to represent the atom during a calculation. Transition metals, having s- and d-type valence states, get a p-type polarization function and thus a total of 15 basis functions. To plot already generated basis functions, use the :command:`gpaw-analyse-basis` command like:: $ gpaw-analyse-basis -f H.dzp.basis O.dzp.basis This will plot the basis functions in the specified files. If the ``-f`` option is not included, the script will look for the first matching file in the GPAW setups paths, rather than the precise specified files. Run ``gpaw-analyse-basis --help`` for more options. .. _ghost-atoms: Ghost atoms and basis set superposition errors ---------------------------------------------- In the vicinity of a surface with many basis functions, an adsorbate can "benefit" from the degrees of freedom from the surface basis functions, resulting in a lower energy compared to a calculation on the isolated adsorbate and thus too strong binding energy. This error referred to as the *basis set superposition error*. It can be eliminated by adding *ghost* atoms to the calculation on the isolated adsorbate. Ghost atoms possess basis functions as normal but do not otherwise affect the calculation (no projectors, compensation charges and so on), thereby ensuring that the same degrees of freedom are available to the wave functions in any calculation. A calculation with ghost atoms is performed precisely like a normal calculation, in the sense that the ASE atoms object should contain all the involved atoms including those which are ghosts, with the only difference being that ghost atoms have their setup type set to ``ghost``. It is stressed that the *only* difference between an ordinary atom and the corresponding ghost atom is the setup type. Perform a calculation using ghost copper atoms and ordinary oxygen and hydrogen atoms:: >>> GPAW(setups={'Cu' : 'ghost', 'O' : 'paw', 'H' : 'paw'}, basis='dzp', mode='lcao', ...) Perform a calculation where atom 17 and atom 42 (designated by their indices in the ``Atoms`` object) use ordinary setups, while all other atoms are ghosts:: >>> GPAW(setups={'default': 'ghost', 17: 'paw', 42:'paw'}, basis='dzp', mode='lcao', ...) .. _poisson_performance: Notes on performance -------------------- For larger LCAO calculations, it is crucial to use ScaLAPACK and recommended to also use Elpa. See the dedicated section on :ref:`manual_ScaLAPACK` for more information. Below are some hints on how to obtain good performance for operations not related to ScaLAPACK. The *only* difference between the FD (grid-based finite-difference) and LCAO modes is the way in which pseudo wave functions are represented. The usual real-space grid methods are still used for the density and potential. The associated computations will therefore take a larger percentage of the CPU time compared to FD mode, where operations on the wave functions usually dominate. Thus it makes sense to pay some attention to the performance of these operations. This example shows the :ref:`most important parameters ` to achieve good performance with LCAO. The example is actually much too small to make much use of parallelism, but these parameters will provide good performance for large-scale systems. .. literalinclude:: lcao_opt.py .. note:: The following paragraph refers to the old ``FDPoissonSolver``. This has since been replaced by ``FastPoissonSolver`` which always performs well, and for which the paragraph does not apply. The multigrid method used in the FD Poisson solver relies on alternating interpolations and restrictions of the density on grids of different sizes. Make sure that the grid point count along each axis is divisible by 8, by specifying e.g. ``gpts=(96, 96, 96)`` when creating the calculator -- this will *dramatically* reduce the number of required Poisson iterations in large or very oblong systems in those cases where the code would otherwise have chosen a grid point count not divisible by 8. By default, the FD Poisson solver uses the *Jacobi method*. To increase performance further use the *Gauss-Seidel* method instead, which usually reduces the Poisson iteration count by around 40% (ideally 50%). Again, please note that none of the above applies to the ``FastPoissonSolver`` which is now default. Advanced basis generation ------------------------- The class :class:`gpaw.atom.basis.BasisMaker` is the backend of the basis generation programme. Use this to create basis sets with specialized parameters that cannot be set using the command line interface. In particular, the basis generator relies on the *setup* generator to define the basis functions; therefore, any parameters that apply to setup generation will equally apply to basis set generation. .. autoclass:: gpaw.atom.basis.BasisMaker This example shows how to generate an RPBE double-zeta basis set for gold, in which the otherwise empty p-state is considered a valence state, and using a non-standard size of the augmentation sphere. .. literalinclude:: basisgeneration.py Miscellaneous remarks --------------------- In FD or PW mode, a single LCAO iteration is used to initialize the wave functions and density. Specifying a basis to the calculator in FD or PW mode can be used to increase the quality of the initial guess, but does not in any other way affect the subsequent iterations:: >>> calc = GPAW(mode='fd', basis='dzp', ...) In either mode, if a basis is not specified to the calculator, the calculator will use the pseudo partial waves `\tilde \phi_i^a(\mathbf r)`, smoothly truncated to 8 Bohr radii, as a basis. This corresponds roughly to a single-zeta basis in most cases. Depending on the unoccupied states defined on the PAW setups, it may be roughly equivalent to a single-zeta polarized basis set for certain elements. .. _los in lcao: Local Orbitals -------------- In LCAO mode, it is possible to obtain a reduced basis set of localised orbitals that can be used to define effective tight-binding Hamiltonians. Contrary to Wannier functions (WFs), the local orbital (LO) construction is not based on a projection of the Kohn-Sham states and does not require any physical input such as the initial guesses for the WFs. In fact, the LOs are obtained directly from a sub-diagonalization of the LCAO Hamiltonian. The LOs are constructed for any atom in the system through a sub-diagonalization of the Hamiltonian block of its AOs. This procedure yields a set of LOs whichare atomic-like functions and are by construction atom-centred and orthogonal within the same atom (but not among different atoms). Furthermore, the LO representation can coexist with the original AO one ne, in the sense that one can sub-diagonalize only a subset of atoms in the system. This is useful if one is particularly interested in a limited part of a system, such as a molecular bridge in a quantum junction, or an adsorbate on a substrate. More details and examples can be found in :ref:`los tutorial` tutorial. .. [LCAO-article] A. H. Larsen, M. Vanin, J. J. Mortensen, K. S. Thygesen, and K. W. Jacobsen, Phys. Rev. B 80, 195112 (2009) .. [Siesta] J.M. Soler et al., J. Phys. Cond. Matter 14, 2745-2779 (2002) .. [Elpa] A Marek et al., J. Phys.: Condens. Matter 26 213201 (2014) gpaw-24.1.0/doc/documentation/lcao/lcao_h2o.py000066400000000000000000000006761454550013000211360ustar00rootroot00000000000000from ase import Atoms from ase.optimize import QuasiNewton from gpaw import GPAW a = 6 b = a / 2 mol = Atoms('H2O', [(b, 0.7633 + b, -0.4876 + b), (b, -0.7633 + b, -0.4876 + b), (b, b, 0.1219 + b)], cell=[a, a, a]) calc = GPAW(nbands=4, h=0.2, mode='lcao', basis='dzp') mol.calc = calc dyn = QuasiNewton(mol, trajectory='lcao_h2o.traj') dyn.run(fmax=0.05) gpaw-24.1.0/doc/documentation/lcao/lcao_opt.py000066400000000000000000000011761454550013000212440ustar00rootroot00000000000000from ase.build import molecule from ase.optimize import QuasiNewton from gpaw import GPAW atoms = molecule('CH3CH2OH', vacuum=4.0) atoms.rattle(stdev=0.1) # displace positions randomly a bit calc = GPAW(mode='lcao', basis='dzp', nbands='110%', parallel=dict(band=2, # band parallelization augment_grids=True, # use all cores for XC/Poisson sl_auto=True, # enable ScaLAPACK parallelization use_elpa=True)) # enable Elpa eigensolver atoms.calc = calc opt = QuasiNewton(atoms, trajectory='opt.traj') opt.run(fmax=0.05) gpaw-24.1.0/doc/documentation/lcao/submit.agts.py000066400000000000000000000001661454550013000217020ustar00rootroot00000000000000from myqueue.workflow import run def workflow(): run(script='basisgeneration.py') run(script='lcao_h2o.py') gpaw-24.1.0/doc/documentation/mom/000077500000000000000000000000001454550013000167375ustar00rootroot00000000000000gpaw-24.1.0/doc/documentation/mom/N-Phenylpyrrole.xyz000066400000000000000000000033031454550013000225610ustar00rootroot0000000000000020 Lattice="20.27909865320759 0.0 0.0 0.0 20.194227899817882 0.0 0.0 0.0 24.238478373740524" Properties=species:S:1:pos:R:3:magmoms:R:1 energy=-128.80117597214567 dipole="6.016048366334884e-14 8.648069526606396e-14 0.45606466144580454" magmom=-1.6765558975898102e-14 free_energy=-128.80117597214567 pbc="F F F" C 10.13954933 10.09711395 12.38421108 -0.00000000 C 11.34646202 10.09711395 13.07460199 0.00000000 C 8.93263664 10.09711395 13.07460199 -0.00000000 C 10.13954933 10.09711395 15.16101197 -0.00000000 C 11.34465495 10.09711395 14.46510328 -0.00000000 C 8.93444370 10.09711395 14.46510328 0.00000000 C 10.13954933 11.21630306 10.16939448 -0.00000000 C 10.13954933 8.97792484 10.16939448 -0.00000000 C 10.13954933 10.80761594 8.85675649 0.00000000 C 10.13954933 9.38661196 8.85675649 0.00000000 N 10.13954933 10.09711395 10.96513494 0.00000000 H 12.26979504 10.09711395 12.51888639 -0.00000000 H 8.00930362 10.09711395 12.51888639 -0.00000000 H 12.27909865 10.09711395 15.00190089 -0.00000000 H 8.00000000 10.09711395 15.00190089 -0.00000000 H 10.13954933 10.09711395 16.23847837 0.00000000 H 10.13954933 12.19422790 10.60962753 -0.00000000 H 10.13954933 8.00000000 10.60962753 -0.00000000 H 10.13954933 11.45309491 8.00000000 -0.00000000 H 10.13954933 8.74113299 8.00000000 -0.00000000 gpaw-24.1.0/doc/documentation/mom/agts.py000066400000000000000000000026061454550013000202530ustar00rootroot00000000000000from pathlib import Path from ase.io import read from myqueue.workflow import run def workflow(): with run(script='domom_co.py', cores=8): run(function=check_co) with run(script='mom_h2o.py', cores=8): run(function=check_h2o) with run(script='constraints.py', cores=8): run(function=check_constraints) def check_co(): for tag in ['spinpol', 'spinpaired']: co = read('co_' + tag + '.txt') assert abs(co.get_distance(0, 1) - 1.248) < 0.01 def check_h2o(): text = Path('h2o_energies.txt').read_text() for line in text.splitlines(): if line.startswith('Excitation energy triplet'): et = float(line.split()[-2]) elif line.startswith('Excitation energy singlet'): es = float(line.split()[-2]) assert abs(et - 9.21) < 0.005 assert abs(es - 9.68) < 0.005 def check_constraints(): text = Path('N-Phenylpyrrole_EX_direct.txt').read_text() for line in text.splitlines(): if line.startswith('Dipole moment:'): direct = float(line.split()[-2].replace(')', '')) text = Path('N-Phenylpyrrole_EX_from_constrained.txt').read_text() for line in text.splitlines(): if line.startswith('Dipole moment:'): constrained = float(line.split()[-2].replace(')', '')) assert abs(direct * 4.803 + 3.396) < 0.01 assert abs(constrained * 4.803 + 10.227) < 0.01 gpaw-24.1.0/doc/documentation/mom/constraints.py000066400000000000000000000047241454550013000216670ustar00rootroot00000000000000from ase.io import read from gpaw import GPAW, LCAO from gpaw.mom import prepare_mom_calculation from gpaw.directmin.tools import excite from gpaw.directmin.etdm_lcao import LCAOETDM calc = GPAW(xc='PBE', mode=LCAO(), h=0.2, basis='dzp', spinpol=True, eigensolver='etdm-lcao', occupations={'name': 'fixed-uniform'}, mixer={'backend': 'no-mixing'}, nbands='nao', symmetry='off', txt='N-Phenylpyrrole_GS.txt') atoms = read('N-Phenylpyrrole.xyz') atoms.center(vacuum=5.0) atoms.set_pbc(False) atoms.calc = calc # Ground state calculation E_GS = atoms.get_potential_energy() # Ground state LCAO coefficients and occupation numbers C_GS = [calc.wfs.kpt_u[x].C_nM.copy() for x in range(len(calc.wfs.kpt_u))] f_gs = [calc.wfs.kpt_u[x].f_n.copy() for x in range(len(calc.wfs.kpt_u))] # Direct approach using ground state orbitals with changed occupation numbers calc.set(eigensolver=LCAOETDM(searchdir_algo={'name': 'l-sr1p'}, linesearch_algo={'name': 'max-step'}, need_init_orbs=False), txt='N-Phenylpyrrole_EX_direct.txt') # Spin-mixed open-shell occupation numbers f = excite(calc, 0, 0, spin=(0, 0)) # Direct optimization maximum overlap method calculation prepare_mom_calculation(calc, atoms, f) E_EX_direct = atoms.get_potential_energy() # Reset LCAO coefficients and occupation numbers to ground state solution for k, kpt in enumerate(calc.wfs.kpt_u): kpt.C_nM = C_GS[k] kpt.f_n = f_gs[k] h = 26 # Hole p = 27 # Excited electron # Constrained optimization freezing hole and excited electron calc.set(eigensolver=LCAOETDM(constraints=[[[h], [p]], []], need_init_orbs=False), txt='N-Phenylpyrrole_EX_constrained.txt') # Spin-mixed open-shell occupation numbers f = excite(calc, 0, 0, spin=(0, 0)) # Direct optimization maximum overlap method calculation prepare_mom_calculation(calc, atoms, f) E_EX_constrained = atoms.get_potential_energy() # Unconstrained optimization using constrained solution as initial guess calc.set(eigensolver=LCAOETDM(searchdir_algo={'name': 'l-sr1p'}, linesearch_algo={'name': 'max-step'}, need_init_orbs=False), txt='N-Phenylpyrrole_EX_from_constrained.txt') # Direct optimization maximum overlap method calculation E_EX_from_constrained = atoms.get_potential_energy() gpaw-24.1.0/doc/documentation/mom/domom_co.py000066400000000000000000000042171454550013000211110ustar00rootroot00000000000000from ase.build import molecule from ase.optimize import BFGS from ase.parallel import paropen from gpaw import GPAW, LCAO from gpaw.mom import prepare_mom_calculation from gpaw.directmin.tools import excite from gpaw.directmin.etdm_lcao import LCAOETDM for spinpol in [True, False]: if spinpol: tag = 'spinpol' else: tag = 'spinpaired' atoms = molecule('CO') atoms.center(vacuum=5) calc = GPAW(xc='PBE', mode=LCAO(force_complex_dtype=True), h=0.2, basis='dzp', spinpol=spinpol, eigensolver='etdm-lcao', occupations={'name': 'fixed-uniform'}, mixer={'backend': 'no-mixing'}, nbands='nao', symmetry='off', txt='co_' + tag + '.txt') atoms.calc = calc # Ground-state calculation E_gs = atoms.get_potential_energy() # Prepare initial guess for complex pi* orbitals by taking # linear combination of real pi*x and pi*y orbitals lumo = 5 # lumo is pi*x or pi*y orbital for kpt in calc.wfs.kpt_u: pp = kpt.C_nM[lumo] + 1.0j * kpt.C_nM[lumo + 1] pm = kpt.C_nM[lumo] - 1.0j * kpt.C_nM[lumo + 1] kpt.C_nM[lumo][:] = pm kpt.C_nM[lumo + 1][:] = pp calc.set(eigensolver=LCAOETDM(searchdir_algo={'name': 'l-sr1p'}, linesearch_algo={'name': 'max-step'}, need_init_orbs=False)) # Occupation numbers for sigma->pi* excited state: # Remove one electron from homo (sigma) and add one electron to lumo (pi*) f = excite(calc, 0, 0, spin=(0, 0)) if not spinpol: f[0] /= 2 # Prepare excited-state DO-MOM calculation prepare_mom_calculation(calc, atoms, f) opt = BFGS(atoms, logfile='co_' + tag + '.log', maxstep=0.05) opt.run(fmax=0.05) d = atoms.get_distance(0, 1) with paropen('co_' + tag + '.log', 'a') as fd: print(f'Optimized CO bond length sigma->pi* state: {d:.2f} Å', file=fd) # https://doi.org/10.1007/978-1-4757-0961-2 print('Experimental CO bond length sigma->pi* state: 1.24 Å', file=fd) gpaw-24.1.0/doc/documentation/mom/mom.rst000066400000000000000000000375451454550013000202770ustar00rootroot00000000000000.. _mom: ============================================================================== Excited-State Calculations with Maximum Overlap Method and Direct Optimization ============================================================================== The maximum overlap method (MOM) can be used to perform variational calculations of excited states. It is an alternative to the linear expansion :ref:`dscf` for obtaining excited states within a time-independent DFT framework. Since MOM calculations are variational, atomic forces are readily available from the method ``get_forces`` and can, therefore, be used to perform geometry optimization and molecular dynamics in the excited state. Excited-state solutions of the SCF equations are obtained for non-Aufbau orbital occupations. MOM is a simple strategy to choose non-Aufbau occupation numbers consistent with the initial guess for an excited state during optimization of the wave function, thereby facilitating convergence to the target excited state and avoiding variational collapse to lower energy solutions. Even if MOM is used, an excited-state calculation can still be difficult to convergence with the SCF algorithms based on diagonalization of the Hamiltonian matrix that are commonly employed in ground-state calculations. One of the main problems is that excited states often correspond to saddle points of the energy as a function of the electronic degrees of freedom (the orbital variations), but these algorithms perform better for minima (ground states usually correspond to minima). Moreover, standard SCF algorithms tend to fail when degenerate or nearly degenerate orbitals are unequally occupied, a situation that is more common in excited-state rather than ground-state calculations (see :ref:`coexample` below). In GPAW, excited-state calculations can be performed via a :ref:`direct optimization ` (DO) of the orbital (implemented for the moment only in LCAO). DO can converge to a generic stationary point, and not only to a minimum and has been shown to be more robust than diagonalization-based :ref:`SCF algorithms ` using density mixing in excited-state calculations of molecules [#momgpaw1]_ [#momgpaw2]_ [#momgpaw3]_; therefore, it is the recommended method for obtaining excited-state solutions with MOM. ---------------------- Maximum overlap method ---------------------- ~~~~~~~~~~~~~~ Implementation ~~~~~~~~~~~~~~ The MOM approach implemented in GPAW is the initial maximum overlap method [#imom]_. The implementation is presented in [#momgpaw1]_ (real space grid and plane waves approaches) and [#momgpaw2]_ (LCAO approach). The orbitals `\{|\psi_{i}\rangle\}` used as initial guess for an excited-state calculation are taken as fixed reference orbitals for MOM. The implementation in GPAW supports the use of fractional occupation numbers. Let `\{|\psi_{n}\rangle\}_{s}` be a subspace of `N` initial guess orbitals with occupation number `f_{s}` and `\{|\psi_{m}^{(k)}\rangle\}` the orbitals determined at iteration `k` of the wave-function optimization. An occupation number of `f_{s}` is given to the first `N` orbitals with the biggest numerical weights, evaluated as [#dongmom]_: .. math:: :label: eq:mommaxoverlap P_{m}^{(k)} = \max_{n}\left( |O_{nm}^{(k)}| \right) where `O_{nm}^{(k)} = \langle\psi_n | \psi_{m}^{(k)}\rangle`. Alternatively, the numerical weights can be evaluated as the following projections onto the manifold `\{|\psi_{n}\rangle\}_{s}` [#imom]_: .. math:: :label: eq:momprojections P_{m}^{(k)} = \left(\sum_{n=1}^{N} |O_{nm}^{(k)}|^{2} \right)^{1/2} In :ref:`plane-waves` or :ref:`finite-difference ` modes, the elements of the overlap matrix are calculated from: .. math:: O_{nm}^{(k)} = \langle\tilde{\psi}_n | \tilde{\psi}_{m}^{(k)}\rangle + \sum_{a, i_1, i_2} \langle\tilde{\psi}_n | \tilde{p}_{i_1}^{a}\rangle \left( \langle\phi_{i_1}^{a} | \phi_{i_2}^{a}\rangle - \langle\tilde{\phi}_{i_1}^{a} | \tilde{\phi}_{i_2}^{a}\rangle \right) \langle\tilde{p}_{i_2}^{a} | \tilde{\psi}_{m}^{(k)}\rangle where `|\tilde{\psi}_{n}\rangle` and `|\tilde{\psi}_{m}^{(k)}\rangle` are the pseudo orbitals, `|\tilde{p}_{i_1}^{a}\rangle`, `|\phi_{i_1}^{a}\rangle` and `|\tilde{\phi}_{i_1}^{a}\rangle` are projector functions, partial waves and pseudo partial waves localized on atom `a`, respectively. In :ref:`LCAO `, the overlaps `O_{nm}^{(k)}` are calculated as: .. math:: O_{nm}^{(k)} = \sum_{\mu\nu} c^*_{\mu n}S_{\mu\nu}c^{(k)}_{\nu m}, \qquad S_{\mu\nu} = \langle\Phi_{\mu} | \Phi_{\nu}\rangle + \sum_{a, i_1, i_2} \langle\Phi_{\mu} | \tilde{p}_{i_1}^{a}\rangle \left( \langle\phi_{i_1}^{a} | \phi_{i_2}^{a}\rangle - \langle\tilde{\phi}_{i_1}^{a} | \tilde{\phi}_{i_2}^{a}\rangle \right) \langle\tilde{p}_{i_2}^{a} | \Phi_{\nu}\rangle where `c^*_{\mu n}` and `c^{(k)}_{\nu m}` are the expansion coefficients for the initial guess orbitals and orbitals at iteration `k`, while `|\Phi_{\nu}\rangle` are the basis functions. ~~~~~~~~~~~~~~ How to use MOM ~~~~~~~~~~~~~~ Initial guess orbitals for the excited-state calculation are first needed. Typically, they are obtained from a ground-state calculation. Then, to prepare the calculator for a MOM excited-state calculation, the function ``mom.prepare_mom_calculation`` can be used:: from gpaw import mom mom.prepare_mom_calculation(calc, atoms, f) where ``f`` contains the occupation numbers of the excited state (see examples below). Alternatively, the MOM calculation can be initialized by setting ``calc.set(occupations={'name': 'mom', 'numbers': f}``. A helper function can be used to create the list of excited-state occupation numbers:: from gpaw.directmin.tools import excite f = excite(calc, i, a, spin=(si, sa)) which will promote an electron from occupied orbital ``i`` in spin channel ``si`` to unoccupied orbital ``a`` in spin channel ``sa`` (the index of HOMO and LUMO is 0). For example, ``excite(calc, -1, 2, spin=(0, 1))`` will remove an electron from the HOMO-1 in spin channel 0 and add an electron to LUMO+2 in spin channel 1. The default is to use eq. :any:`eq:mommaxoverlap` to compute the numerical weights used to assign the occupation numbers. This was found to be more stable in the presence of diffuse virtual orbitals [#dongmom]_. In order to use eq. :any:`eq:momprojections`, instead, corresponding to the original MOM approach [#imom]_, one has to specify:: mom.prepare_mom_calculation(..., use_projections=True, ...) .. autofunction:: gpaw.mom.prepare_mom_calculation .. _directopt: ------------------- Direct optimization ------------------- Direct optimization (DO) can be performed using the implementation of exponential transformation direct minimization (ETDM) [#momgpaw1]_ [#momgpaw2]_ [#momgpaw3]_ described in :ref:`directmin`. This method uses the exponential transformation and efficient quasi-Newton algorithms to find stationary points of the energy in the space of unitary matrices. For excited-state calculations, the recommended quasi-Newton algorithm is a limited-memory symmetric rank-one (L-SR1) method [#momgpaw2]_ with unit step. In order to use this algorithm, the following ``eigensolver`` has to be specified:: from gpaw.directmin.lcao_etdm import LCAOETDM calc.set(eigensolver=LCAOETDM(searchdir_algo={'name': 'l-sr1p'}, linesearch_algo={'name': 'max-step', 'max_step': 0.20}) The maximum step length avoids taking too large steps at the beginning of the wave function optimization. The default maximum step length is 0.20, which has been found to provide an adequate balance between stability and speed of convergence for calculations of excited states of molecules [#momgpaw2]_. However, a different value might improve the convergence for specific cases. If the target excited state shows pronounced charge transfer character, variational collapse can sometimes not be prevented even if DO and MOM are used in conjunction. In such cases, it can be worthwhile to first perform a constrained optimization in which the electron and hole orbitals involved in the target excitation are frozen, and a minimization is done in the remaining subspace, before performing a full unconstrained optimization. The constrained minimization takes care of a large part of the prominent orbital relaxation effect in charge transfer excited states and thereby significantly simplifies the subsequent saddle point search, preventing variational collapse. Constrained optimization can be performed by using the ``constraints`` keyword:: calc.set(eigensolver=LCAOETDM(constraints=[[[h11], [h12],..., [p11], [p12],...], [[h21], [h22],..., [p21], [p22],...], ...]) Each ``hij`` refers to the index of the ``j``-th hole in the ``i``-th K-point, each ``pij`` to the index of the j-th excited electron in the ``i``-th K-point. For example, if an excited state calculation is initialize by promoting an electron from the ground state HOMO to the ground state LUMO, one needs to specify the indices of the ground state HOMO (hole) and LUMO (excited electron) in the spin channel where the excitation is performed. All rotations involving these orbitals are frozen during the constrained optimization resulting in these orbitals remaining unaltered after the optimization. It is also possible to constrain selected orbital rotations without completely freezing the involved orbitals by specifying lists of two orbital indices instead of lists of single orbital indices. However, care has to be taken in that case since constraining a single orbital rotation may not fully prevent mixing between those two orbitals during the constrained optimization. .. _h2oexample: --------------------------------------------------- Example I: Excitation energy Rydberg state of water --------------------------------------------------- In this example, the excitation energies of the singlet and triplet states of water corresponding to excitation from the HOMO-1 non-bonding (`n`) to the LUMO `3s` Rydberg orbitals are calculated. In order to calculate the energy of the open-shell singlet state, first a calculation of the mixed-spin state obtained for excitation within the same spin channel is performed, and, then, the spin-purification formula [#spinpur]_ is used: `E_s=2E_m-E_t`, where `E_m` and `E_t` are the energies of the mixed-spin and triplet states, respectively. The calculations use the Finite Difference mode to obtain an accurate representation of the diffuse Rydberg orbital [#momgpaw1]_. .. literalinclude:: mom_h2o.py .. _coexample: ---------------------------------------------------------------- Example II: Geometry relaxation excited-state of carbon monoxide ---------------------------------------------------------------- In this example, the bond length of the carbon monoxide molecule in the lowest singlet `\Pi(\sigma\rightarrow \pi^*)` excited state is optimized using two types of calculations, each based on a different approximation to the potential energy curve of an open-shell excited singlet state. The first is a spin-polarized calculation of the mixed-spin state as defined in :ref:`h2oexample`. The second is a spin-paired calculation where the occupation numbers of the open-shell orbitals are set to 1 [#levi2018]_. Both calculations use LCAO basis and the :ref:`direct optimization ` (DO) method. In order to obtain the correct angular momentum of the excited state, the electron is excited into a complex `\pi^*_{+1}` or `\pi^*_{-1}` orbital, where +1 or −1 is the eigenvalue of the z-component angular momentum operator. The use of complex orbitals provides an excited-state density with the uniaxial symmetry consistent with the symmetry of the molecule [#momgpaw1]_. .. literalinclude:: domom_co.py The electronic configuration of the `\Pi(\sigma\rightarrow \pi^*)` state includes two unequally occupied, degenerate `\pi^*` orbitals. Because of this, convergence to this excited state is more difficult when using SCF eigensolvers with density mixing instead of DO, unless symmetry constraints on the density are enforced during the calculation. Convergence of such excited-state calculations with an SCF eigensolver can be improved by using a Gaussian smearing of the holes and excited electrons [#levi2018]_. Gaussian smearing is implemented in MOM and can be used by specifying a ``width`` in eV for the Gaussian smearing function:: mom.prepare_mom_calculation(..., width=0.01, ...) For difficult cases, the ``width`` can be increased at regular intervals by specifying a ``width_increment=...``. *Note*, however, that too extended smearing can lead to discontinuities in the potentials and forces close to crossings between electronic states [#momgpaw2]_, so this feature should be used with caution and only at geometries far from state crossings. .. _ppexample: -------------------------------------------------------------------------------------- Example III: Constrained optimization charge transfer excited state of N-phenylpyrrole -------------------------------------------------------------------------------------- In this example, a calculation of a charge transfer excited state of the N-phenylpyrrole molecule is carried out. After a ground state calculation, a single excitation is performed from the HOMO to the LUMO in one spin channel. No spin purification is used, meaning that only the mixed-spin open-shell determinant is optimized. If an unconstrained optimization is performed from this initial guess, the calculation collapses to a first-order saddle point with pronounced mixing between the HOMO and LUMO and a small dipole moment of -3.396 D, which is not consistent with the wanted charge transfer excited state. Variational collapse is avoided here by performing first a constrained optimization freezing the hole and excited electron of the initial guess. Then the new orbitals are used as the initial guess of an unconstrained optimization, which converges to a higher-energy saddle point with a large dipole moment of -10.227 D consistent with the wanted charge transfer state. .. literalinclude:: constraints.py ---------- References ---------- .. [#momgpaw1] A. V. Ivanov, G. Levi, H. Jónsson :doi:`Method for Calculating Excited Electronic States Using Density Functionals and Direct Orbital Optimization with Real Space Grid or Plane-Wave Basis Set <10.1021/acs.jctc.1c00157>`, *J. Chem. Theory Comput.*, (2021). .. [#momgpaw2] G. Levi, A. V. Ivanov, H. Jónsson :doi:`Variational Density Functional Calculations of Excited States via Direct Optimization <10.1021/acs.jctc.0c00597>`, *J. Chem. Theory Comput.*, **16** 6968–6982 (2020). .. [#momgpaw3] G. Levi, A. V. Ivanov, H. Jónsson :doi:`Variational Calculations of Excited States Via Direct Optimization of Orbitals in DFT <10.1039/D0FD00064G>`, *Faraday Discuss.*, **224** 448-466 (2020). .. [#imom] G. M. J. Barca, A. T. B. Gilbert, P. M. W. Gill :doi:`Simple Models for Difficult Electronic Excitations <10.1021/acs.jctc.7b00994>`, *J. Chem. Theory Comput.*, **14** 1501-1509 (2018). .. [#dongmom] X. Dong, A. D. Mahler, E. M. Kempfer-Robertson, L. M. Thompson :doi:`Global Elucidation of Self-Consistent Field Solution Space Using Basin Hopping <10.1021/acs.jctc.0c00488>`, *J. Chem. Theory Comput.*, **16** 5635−5644 (2020). .. [#spinpur] T. Ziegler, A. Rauk, E. J. Baerends :doi:`On the calculation of multiplet energies by the hartree-fock-slater method <10.1007/BF00551551>` *Theoret. Chim. Acta*, **43** 261–271 (1977). .. [#levi2018] G. Levi, M. Pápai, N. E. Henriksen, A. O. Dohn, K. B. Møller :doi:`Solution structure and ultrafast vibrational relaxation of the PtPOP complex revealed by ∆SCF-QM/MM Direct Dynamics simulations <10.1021/acs.jpcc.8b00301>`, *J. Phys. Chem. C*, **122** 7100-7119 (2018).gpaw-24.1.0/doc/documentation/mom/mom_h2o.py000066400000000000000000000033131454550013000206510ustar00rootroot00000000000000import copy from ase.build import molecule from ase.parallel import paropen from gpaw import GPAW from gpaw.mom import prepare_mom_calculation atoms = molecule('H2O') atoms.center(vacuum=7) calc = GPAW(mode='fd', basis='dzp', nbands=6, h=0.2, xc='PBE', spinpol=True, symmetry='off', convergence={'bands': -1}, txt='h2o.txt') atoms.calc = calc # Ground-state calculation E_gs = atoms.get_potential_energy() # Ground-state occupation numbers f_gs = [] for s in range(2): f_gs.append(calc.get_occupation_numbers(spin=s)) # Triplet n->3s occupation numbers f_t = copy.deepcopy(f_gs) f_t[0][2] -= 1. # Remove one electron from homo-1 (n) spin up f_t[1][4] += 1. # Add one electron to lumo (3s) spin down # MOM calculation for triplet n->3s state prepare_mom_calculation(calc, atoms, f_t) E_t = atoms.get_potential_energy() # Mixed-spin n->3s occupation numbers f_m = copy.deepcopy(f_gs) f_m[0][2] -= 1. # Remove one electron from homo-1 (n) spin up f_m[0][4] += 1. # Add one electron to lumo (3s) spin up # MOM calculation for mixed-spin n->3s state prepare_mom_calculation(calc, atoms, f_m) E_m = atoms.get_potential_energy() E_s = 2 * E_m - E_t # Spin purified energy with paropen('h2o_energies.txt', 'w') as fd: print(f'Excitation energy triplet n->3s state: {E_t - E_gs:.2f} eV', file=fd) print(f'Excitation energy singlet n->3s state: {E_s - E_gs:.2f} eV', file=fd) # https://doi.org/10.1021/acs.jctc.8b00406 print('Experimental excitation energy triplet n->3s state: 9.46 eV', file=fd) print('Experimental excitation energy singlet n->3s state: 9.67 eV', file=fd) gpaw-24.1.0/doc/documentation/occupation_numbers_figure.py000066400000000000000000000012471454550013000237650ustar00rootroot00000000000000# creates: occupation_numbers.png import numpy as np import matplotlib.pyplot as plt from gpaw.occupations import fermi_dirac, marzari_vanderbilt, methfessel_paxton width = 0.05 x = np.linspace(-0.2, 0.2, 101) ax = plt.subplot() ax.plot(x, fermi_dirac(x, 0.0, width)[0], label='fermi-dirac') ax.plot(x, marzari_vanderbilt(x, 0.0, width)[0], label='marzari-vanderbilt') ax.plot(x, methfessel_paxton(x, 0.0, width, 0)[0], label='methfessel_paxton-0') ax.plot(x, methfessel_paxton(x, 0.0, width, 1)[0], label='methfessel_paxton-1') plt.xlabel('energy [eV]') plt.ylabel('occupation') plt.legend() # plt.show() plt.savefig('occupation_numbers.png') gpaw-24.1.0/doc/documentation/ofdft/000077500000000000000000000000001454550013000172515ustar00rootroot00000000000000gpaw-24.1.0/doc/documentation/ofdft/N_ofdft.py000066400000000000000000000021741454550013000212060ustar00rootroot00000000000000from ase import Atoms from ase.parallel import paropen from gpaw import GPAW from gpaw.mixer import Mixer from gpaw.eigensolvers import CG from gpaw.poisson import PoissonSolver from gpaw import setup_paths setup_paths.insert(0, '.') # Usual GPAW definitions h = 0.18 a = 12.00 c = a / 2 # XC functional + kinetic functional (minus the Tw contribution) to be used xcname = '1.0_LDA_K_TF+1.0_LDA_X+1.0_LDA_C_PW' # Fraction of Tw lambda_coeff = 1.0 name = f'lambda_{lambda_coeff}' filename = 'atoms_' + name + '.dat' f = paropen(filename, 'w') elements = ['N'] for symbol in elements: mixer = Mixer() eigensolver = CG(tw_coeff=lambda_coeff) poissonsolver = PoissonSolver() molecule = Atoms(symbol, positions=[(c, c, c)], cell=(a, a, a)) calc = GPAW(mode='fd', h=h, xc=xcname, maxiter=240, eigensolver=eigensolver, mixer=mixer, setups=name, poissonsolver=poissonsolver) molecule.calc = calc E = molecule.get_total_energy() f.write(f'{symbol}\t{E}\n') gpaw-24.1.0/doc/documentation/ofdft/finen2.png000066400000000000000000000660161454550013000211510ustar00rootroot00000000000000PNG  IHDRKbKGD IDATxy@TM! C$E\A@M\2p7ICQrͥV]W+ O4QspCrIY1],03 <8{=yy9FrDDD[ƢG,DDD LDD$ 0,DDD LDD$ 0,DDD LDD$ 0zZ򢢢|}}%QnnҌ''' GGgffה{ѰaC__cǎi"""=-W^ݾ}Ksss%Ɍ3֭[7s>}Z[.88bٲe+Wر;w4=H?\VVfll `Rť]gմ6mڼ[WoADDT=+JLMMMLLh*>>lɒ%jH}zZUQXXѣɓ'[ZZN4IF?흔Լys>L? Q]b*:322888xxxH^^^^^ޒ%K/_m۶YfKDDD.Zii}%, JٳǎKLLڵ /pɪwTaSS>W^U&FDDRu9PpffdkaÆjj._ܷo9iii׮]kٲen62w00 /A722R gzzzmZH$m۶Qo'NPFjJ|W&2Ly2???++M:@rrr7iYֈT$\!\{n 8F*?~xŊmSSSªm*((W^njѢ322&""ҵZ*))y>jppbillkxxsʷUtgUiJ.?|022iӦ 4h׮_]M-[䮮/,OJozwWAx~Š <q1((0#EǪCaa^ϯꊫWUej2 /Ag(*r`h!3d^ϯjTCNN7T**  << :}{2믿>{,SSSUE}H=Z,XLDEn߾ 88888X1gϞ:t9wWƍmll]\\ʷb}-&^PP ^)Z(@ DD*++k[RRgu¢k׮xkM׌[sv({bHD?BDTׯptt0wu}GGݫ#]5 9ȣj\[1 A*wYXXt͛f̘ѥK6mXXXTmM׌e`e:#jc16U ..SLO<-_c͕{ծ-FazhSSGL`UDDt+Wݸq;rH~>#FFF6l}L&߽{C ݻ׬Y7oV>Ն"Jg^P~q&Sfuza DDdX5& QZt ""2 ,:^+qZt""2,ckD!""AXj=sBRfqah#1K|):;` F:+D D &[ZZFFF޽{Wx:EbbXnnnӦMS,:r}s, :GyK.=sرciӦ]>zHt.{IqQ,'ai^4=`$: ̺uZn]>پ};wN8Q`*EEEn%%%&LxWsRRR4SRA2QAƔ/zٳg;88wU"}ܹSʠmڵݥvM" 4… :|𫯾*Hf͚.Ξ=;x`[[[D痞^mƏxdǎ(NAϜ9s>Ttrrjҭ[O޵kWūRǏK :yzM 2Ϥs&$OޯFH)^gY;t萖vիWؿb#GмysGGOMM511ӧ7|ewww[[\}n￿qƕ+Wř3gt}k 4ʪAe֭[|jѢEdd\.?Mfmm }M{RO pzzz&M{|ܼyWXiӦ?e˖ 4?5w*Q\xMXX^,w>-?]m;D-Z*(z۷o?f=zhӦL&SL?PE~W[lYRRv횙ԩSuaME~ZhcŤL&~תhiiiN߱K1M… mllؼN^kݺuzj?e哣FrqqILLiSʓ-2227n\-,]DQ21KV,--;w\>zur~7o_v/+~d#lmm>}9w\Sӿ=z8vXjs U8--mV}qHH+kP{pz =-T`jj?Dw^6`+\3kRQQСCϜ9榼ZyL{5kLyidff}\ ϟ/S>U|_|ERL&*kP{!=-(,,,--[|Ij`ZZח,YthFc1v-֮ 4HDQ\\<| ϫUqWy=ݴ߹sGy;w7n.lllLLLN|1a0QЁ58x`JJG-ܴiՈ#4@45 i[iiQ>o߾g.QUW_MMM-{%JXXXteΝu_?Tq]XXX=z_;+ L&{D ;Y=ڰaC71D Np6HLgϞ1co̙3ldɒG*չ|ѣ7_T>J߳g͛cee5w\wj*TiӦÇرcܹU7X#EEEk֬9yٳg5լyZܥ:(๡x䉓S߾}QMmܸՍTG-v;ɋTl|8Y> x֬YM6U^_W'o޺u ӧQr<%%:$$勞ߗ*_^u{{ KC IIIAeU.--322z饗n^6GeggO*NΔ[zuttT*uqqVNMRT;#0n4F2 I ,ťm"ɔVyW^=~… qa6fp|""E|\.߽{7E899mllRi||Ǐ/^6555(((!!!,,ڦk\>~xm.sG{6'""C$tJKK̞o>qqqIII/^|S׮]ϟ߮];j8p`bbѣm \.oٲ37 TF'%Sg흂j')A;`?ONDD,6 fQ BDD=`5;#y%bXMN'tkz""p&`QHDT?&|'C^;A=54=ຩ / DDkIS߉r) Ѱf\gF D!""bXMNt`%\2F$""Ma. \6c DD;I߉R(H߰\C?kXAHGX<[erQ/BZC8$: 0,BDD8///**W"*/ qrrptt}^|Fuy֭ZT2cQHg_zuvvv/͕H$3fXn̙3ϟ?i5ׯߟoڴ1,,l˖-}TchDDD\/d2111Ri++:j45{l##|diii PmB-}t&&8Ք kDd2bRsYY{,KXb#*醞`U>z(''gɖ&MRŶӦMqݻw?䓋/Θ1CiU7 7qY`""6S!%%CFڴis!C|W,--n5a)+6`DDU{27;vXbbKPPP=pmݻ799yȑ{Ue[Jш77EDD!2{*?rKðaZj`tD"ٷo9ܼy322^v[գ֔&aJ_hiDDCj陞^$I۶msrr… ڵST_N::tIUK0 ,DD% 6Q ].Qc͚5;{ӧO-,,sN8ammmmmFkMbJ\b6/w 99mllRi||Ǐ/^6555(((!!!,,ڦx~M2|ΝG]pae75<Wp/Q}0 tGuX':i{j}y~WpEwMDT?L O_nFAV0S85/tw"=`KGt||D!"ZaXMz~!A9̅ "?iv1V0 &"o &"2tIl{,v {j;DD0U`""%g N Dg!")E!"ыn!ғ08/LDqS<':Հt \q a-: Q0U'):JO p^^^TTD"122U^daa8xL޳gOKK^xaԨQn/u}AH%zZ^}v;;;__J$3f[n̙ϟ?}MO}yƍW\?<~XÿuAH5r$?JUFGGԀO3g>jG+ϵݕ^O{5`jjjbbFS'NkR7o{n?Z BDDGL>>jgH$^^^(nX2\B IQ rŽ@ 8F*?~xŊmSSSªm̙3?;udlly戈 5f&f=^t""6n.+ٙ;w|d4u^zښ{{{ƖPo?r^&:a^^cXmHzjvtEWY ߆D5fX*\˸|E!"ge01 DD p7 dAo}$AXR:㼩uFD8/Kt""Bkx.YRt""C0J't\t""!ua\nv`': `j1|":znV{?Lt""àU=a|̿{^t""w ?£hvh': 5` kX/:Q}\LA5z f74 \PE!" r$>0AXh"Dg!"_EhD!" '蟃Fh$: b4޽Ћoh "qԁ0[Eg!"G|ީ",C^ IDATBDT_TzmY.o޼yʕΝj׮݋/hffv޽_|qͳqǏ6sO::ɓ,JV~+kDKd2⇘R ^xQy2)) ƍPGg//HhhoZYa9riӬ*\;;{-++ҵIK[n<֭[Hg>'𞈉.pϛ7O_{5ͅѤҼ˗[ZZN4I222xyyi9j&31s>o6YJǏOKKeFyxx߶m[C=Biiȑ#8ܳgOw]~q{xOt""mHEEEիnwoڴ)66@~mllRi|||~~~jj%$$x77nX޾[P 0_LD!xŋ.\آE 5z7UVJJ*xdqppbillkxxsʷMNN|j33P ޔ +/N`;cVIc8%^xo1nܸN:ic`{4FODAP_{q7h@;6t~w$"h4)ԟ `YS|*:ih҅SN :ŋ̙sAm"=!>܂-qQt"ޭ[WFDD?ࢬ/r6/X<E!";/ :} pǎO:lG";~߈BDTGT_O:al54irm"cUX5sQ,: Q]P}67733oܸakkHzw'tBDTT_{jժ(66O> FhV"6GW={[n/a>S=zڵkZMJ=TnCRx0.H=wܴ///m2= fBDuz XXXh#a!BT6`&: vE&eEh7t٢i'aeffVq~~~ttÇ5j!Ot""CUinժ_Νcbb233._yC:;;JKΓ1y!BDdY_|>KJJRܰaC33̙aZ=ROAx'܌~H[D^.,,۷o{yyٳiӦd(X sg8*;, C0s0Gt""`;, q:e"ZBDyLTg8O4ޏDDTS,T[0,~A 0Ֆ9x*: `& ~^Z pN+=o%V~ϯ DDlkkѬYiӦ;wNuE Bt""P}>tХKz;vx{{%$$<}ʫ}hDm&:Pۊ+n޼e33q5k,::ŋZ+Hrssfdd899YXX8::44T_ kpIK[n<֭[jTV`L9S|IQeT Ͽqƶmʫ3stѣG999'O4i*[eddrz ` DD޽{CBB*_iСCN@E5uppHIIv{͙3gРAX}? DDuC=!CTV}5B&=PqqqǎKLLtqq b BaaahhhAA;4U-zfMdATCz=gtqر7ML__IʃO#6UV ,8x`e+ :̙3iiinnn \Nlމ[% aUCjpӧ.\}{ӦM/_K/޽O>N陞$I۶msrr*[xުDڠ-8!8DDSБgϞtRzzKΜ9#H}+W4nxI`mmDMd2d~~~VVVeQF>|x߾}]mҁN9#:ީhѢ?Pyp?{7o^ttcݻwloo`666R4>>Nj/Vl`ʔ){쉈vڥXͭCjoy "Pt""}R 4HIIyfryzzz 4}w~~~vvv殮Ν+699@bbb"##UQTH!Y"wy,,:Qih_<<fwڤIwڵܹs۶mpС.]h#%=ڏh/(DD/c8|ҥKO{oo&H vp-I!qwfID "aixy2plG*SN :ŋ̙sA-g:n |!:QEnE-[b7_}NOO֭իW#""LLw/xWacy.rEg!"L,+u)""R"ϟ??((˗///;v(66O>BcLh"+wt,2y; blٲ{zzΚ5K&vҥKK4i>Ǣ!1$_c\]~=4X*/Ξ;wnZZZIII@@@LLf>C+xe/vB'Yzw65h666ڈbXXa/.,dI ꩹{WwabR/ 0)c֒+\BDOifh#߿۶mׯ_'Nh#A`֒x @t"_ğ!1C0|ȷ!ļ;۷wuu533Fr6 _M38D!zVU_[l=}t2kL%S"ґ38c"OAK$Tࠍ.`z]e!Eg!Wŷ߅!uފ7 ,Q뢳Q'|&LdY}O333u|=cdBDulxX1_~ҥi/V^^^TTD"122U^daa8x*M0522ݻ*.%fcv J`"Ҟ\侏o¨dͪٳ+[駟j:_?>lذΝ;?y$--M*/MJJڻwo.]4irΝ߼yĉ>>>7Pgg .]iӦ.ƯGpډBDuM)J{g8ەţ\/d2111Ri++.xdRR7 z= 66A#3J>uwtܹxUspp055511pi֭'ܺuK`<a DT!mZQ*s 6ܻw炂sΕ/J=ə>> Rvk$"2#|"&Fo1t=y2ѣG哶*nwׯAAA)));wbЂC=222p D!j ""**F{xOyfetQeffD z1z#GXYY-Xz̙T77-LWIa^>Q}pgVbWPt=`OOڴ HڶmS ÇHMM}V0i:s/.yǣ0*1px˟I5D&)%ʪv5ɾ5ZJeVŋ7{0"_4!: .:PŃ8T9\.߽{7~ӦM~~~mcc#JSSS{ 555(((!!!,, oqƈ:tP*AFAqxhgE#E""ďQ&2`VކvmFM6MyRZZ ...))ŋ?vrrڵ۵II8p`bbѣk4^Z`]suի %"w6,_l("2pp5PB,fak-|""OK4'A'F(:B"<ֽ0 ⡨DDd(faVtPj 0e`t٦aA=VF*ND|s>r2,d Fhpu=z ֜ :\侅a-T}ʓBx XO\5_~x1Q®:Sbڍ0QZ* C#<~m"Oaܦ` c&a$/ H8C_k#!*yT5^)AI/!s0Gt".B8j;,&nvAEg!" P*^Y0`=Yz/BDL'x$!i BH<|g0Q8q\t&S)JoYŢe"s#j![u=֧"Ut"ҩ؀ ZLu#ŷc0": H1`dLAQSz.˱$N6B#YHf`Fv`(hc|1` D];sg;,~ G,Cx[6$@=m&A#:-U_b7NMT8YHJQr,^x ZOfl@Dg!"BU\k`lX`mރ=ZJD:XiZ a6,%(~| : V#~|/k{_|Ql l¦o,DT+7qs4F:T_b7NMld"0Qt"RCiiMؤ4f¨!2KP": ,;Nt`7NM.91ZVlccHKW+NUEEEJ$##\!!!NNN̬ &[ZZFFF޽{Wn0Fl˰Lt"R1{'#.]|8vXgg .]6999;;ںMQa vh7CE!ͱ[,%K2LCLL TZʊqtttK/^<`ƍj4Lo?:R/_?*u_~m<^Tq 988TuʓnݺFSTx#0nBDA;Dg<=-(,,|QNNɓ---'MVj0`#!BDϚF0 zz XjQ&ݛ3gϠAjo<  TZBt"8 YiH3A<+),(Q}øcǎ%&&U;z044`ǎϜdiST,h9cP&: l_ $hYO8[>LիWGGGKRZ(,,lժ+[hȐ!'NHKKb*M)UzOoxop F~N:۩OA{zzצDҶmۜV(..>|xFFFjjjU۔2ں_qql6]V_T~H0גlmm_Md29*kiiQ>V)ѿ CTa,ޢh\!\{n 8F*?~xŊmSSSL2eϞ=ۮ]븹uСڦq^.":QR7"!:NhZ*)A~~~vvv殮Ν+699@bbb"##Ui zQi"o-y*Kk| ^ ,ŗ1mC|x(: Q0o7 fzz Ht 8Du{x/GpDߪVH?w P : Q]+aAlϣ27l,"6c",JGzs4b} IDATD[ExC4n'vwV_b& L`]܍B,DuJ*Rc~koU 0Q5vl?cYld-׮8 zMAJLȰ_㮪LhԞi,"R\1Lhq9C*:En cE<`Gzw&)7~Y 0Q͸| [@q C4Y}q4QWb8;=&atLE+®0!Mt"٘-:~a&RS؊#1BNIs1WtL{c>H!zLy':>ė1Pc86#bk={o~owEgHD8,: ^}ѷT_b&1sDg!6nB0VH3v<Eg!n MQW84:蟏(:]~K'a,HZu R ɘ,:\¥X}ULa(E?,HH\r '\iW2aZᢳ̪&Lպ0ژ:0n36"Pt}D #.")Cyq"&&#V_b&"+Xd#1iҗr "":Ab&. $ƿm`3ği,AډbEEEJ$##\!!!NNN̬ &[ZZFFF޽{5 fdd4~x*D0 ^+J)J'c@xYtի۷o}~innD"1cƺufΜyyӧOWҥKϜ93vظÇoڴk׮=zfoѣ 4/Cg(BqOb9#/Eq \/d2111Ri++.xdRR7*|Q7l`ee5n8UGGz77I'BTc77^ez66A0SSS nZy2 [g.Zy&-uC1t+): Q \ЕXi#q~Gaaaiii^^---U,dee}'O42?)҅}Br,2?7Gh,ƊRwp TTSj7wޜ9s||| #"""~mooo%R~ď0Wk`&:Q.݊=Ct:E)hL@;v,11%(( ;v(?_sݥK@ҵfh#~=ѓCI? $NjpffPgO*?NY+P= cB, ڈND?(C8E:_LWF m۶/_lÇgdd$''Ͽvړ'O|`֭/¡CoCTdR,H/q&09B!]|߿Fd21YYYʕUYiiQ>]m۶MKKS3p={Ο?IaX$"gc6XHQר= prݻdggHNNwtt0p@gggoooTŋ+MMM JHH 0eʔ={DDDovR֡CF)J*gbbL"舎{ñl [щ΋Z{5~~~vvv殮Ν+699@bbb"##+/Abɋ"r_忊B⟟bk;jI{K_t/no㢪>FaXcG6PAHxUEQz34.ݫe@a%.)LI1-J2* oI "$ǹ3!p_1gaw|SA}G @cQ]%񈗀>;3'qon\/Q]ܝ0>tJ8LE؎LȀc ` #!ϣ BV=*! b;3vo{9I,؎38j/4K?繘 0 c;$|v`> F0q A2< nBَCS8<LH؛p"V 1ۉH#`<ƻ<[’8 AXK|IZвOD2b*q&4L q8C8AM  Pe4F'!l!MDN@q%B:s[>Gb$5q)B7ssW!A_WOx~O`v¾t#~;o7RyPL>k&0qa;aY&byG~4-En$"aDsV2w݂-˱JRP=/x]q7t'<λ5yWq* )-BۉIK=x<rԼW~la۷I/Q&Oq>31s5V?#^IIAd$n“'(+CT].h(l%%@'2{~útoDаd?cg۷07n07m}OBAuTEz 0G.r bv0--v)Tj- !|6 t!&be\^a{֠DD12v#1T}5* 0h .pC\#E H_,㎾x ׮J,bun:2AC|x Q$N>,޽9Vuv}v#%4>+FjalG#,d;@0,33ѱM֮].=;tИɓ''$$MOH?`b_*X=ׅ KHބMS0e"lgUb8Dv.FؿH$555I' xǎWt{)--𐳣ׯ]s%__߿cs8ϼ\?)!} Mۣ8z&a\̝Y0f;;D |sMa k75"j/Ny`QF9::2WTuu֬Yˋlllϝ;'?6]MH'hñ8٘=mavЂ38-MG9s0gF(;vlvv񜜜JKKB[[ۼyBaVV C BȳC1/f4g!+C XLt?"e!+ Yylނ-`;/zzz>>>=D$^\[[ѱ`seddvK55pԩ%K(!D.tp1Wp%q1q: ~?\"1M9|/8D":u&M0}tkkk>_VV\[[ +++(((%%%""kinnnfΜiaaꪧw}^zظ3X4MH4.ùx7{P6?U\AN6a __g8܀RGG<888==@RRRjjjIIIss?gggf>00СC-|NMEGG޽mnݺlnni&33nR&q>?˸܂>p1 ] E(??w;ܽ?*̀+ 0!RBE(h4FhG8v8[**QY;Sb`'g8uƍ(@FXP&DuMdU06 ac 3Щ{W T%5CݯW V5m`1=+*j 0!}U!>xP:UըnDcan;0L`bCr vB@w{:A! 0!*B B!,L! 0!*B B!,L! 0!*B B!,L! 0!*B B!,L! 0!*B B!,L! 0!*B Dׯãl󫈚+Vx{{x<S^^.T(XZZjii=)@dmmm;;躺N>}zʔ)/^TAB!Rl۷9-:--//x+W411ٻwOnnMmڴqŻwȸv횞g}lٲXmmš!!ND"`W^ҒTg&ʴV\ӄjSgQOүKy~ՅW!AzLSSSCC˥GPUUL&''7Ž K!`E455FFFjkk/]TB!f2''秦ZYYihh&$$0y!Qs`Yffc444]=44S]]]]]qƸ8[[ÇYuݺuMO!dSж:::T9‹/:tkذaW\ǏݸqC:ɓ's :::F!/~8//[:xm#ϝ;wԨQׯ?s̳Vnmm3gNAA@ 744qƴiӤsAEEȑ#j0!~;vlvv񜜜JKKB[[ۼyBaVVV*;677W2{t!S===l"d'kkke:::,Xpܹtٮ6cΜ9222sN>mbbbmmݳ Bzp$_ ڵk222'M 00ښ뗕%''777oذ6+++(((%%%""eN<uǏ3ۻ 9rP(LJJ0!Rѹt`fibbI \/\TT$ݖ:t;vMEGGKoll6552dH ! L ]LD!9h%BX@Ba`B!T !P&BX@Ba?^b7p8ϳŋNjdd4tP4%W0RV6l؂ 7Q=:@dmmm;;躺J (5mR<J/ CBB,--KxYsp8*2K)1ٳg9ddd_3N>=e]]]}}}oo/v ܾ}ȑ#OheOWϟ>}Gvuss󈈈Jԣ/_,88XKK+66??~|MMM!%"rnoée&;::fΜ̊G3g444|3YPPp㕞w:)))LMM~Di{ԔR(1<+O^*LhJJojjutt׿*9)1}F%ONN7nУ7y*sΝ[\\jkkrs~'P(\Jۉh]&liiijj*--eM1녧LJJp8RHy]][n{T+++ [[ۄE>FݐƌsܹYf۷vZZZPP۹蘗'ωeee---<t444]=44,_i;^Y wX33LGGGtxHo|vuC>zʺux<^zz:3;s̻wFGGꫯVUU}{i\ZΙ3@ ۳+m'ëO(Z;wQ֯_\"& ޽׮]"Ç̶jj[)SX[[;`=7nܘ6mtN@@@ 9rպ7ӯ;;;˖ &TWWKo 6޽{7==LJP6o֭gxk6w[ZZڰaΞ=b`Y=zmtttՙl9ᙻ˞ew;^HXU,,, k+m'ë/K IDATO(d'kkkէ/'f̘!C#;yԩj//> (s!sikkkͪ˸H$7m] ##|ҤIRRR"""]yŊ .>}e˸\c.\;||&L4hP(/Uyeٲe'OǙ]ZMyxV% eee6lPCe#affizbaaꪧw}YYYk#J UUUeccsP3=[{{`f)СC,H'OlhhKo߾o``r|~bbX,Vi9fו:gPbI&r\;;_~Y? y'Ǐ700Դw^ /HMMM |AE2p$}gB!ȢsB B!,L! 0!*B B!,L! 0!*B B!,L! 0!*B B!,L! 0!*(ݻ9а\p͛7UiӦt(66VWWWE:|||윷z̬GZ*00HzlݺuD)#DAT Qĉ+W>!RT Q^x@{{tNVV֋/CCCl///gook.֎;6f---''Ǐ+0,,M4);;[=z٩ɓ'ޫVklld---jkk?ݚԈN<caaqAEVTT,X̌>|MMMrrZpaQQ}T"}JBQO?իW>|xʕ & >Y!++KCCcԩ~W_}```P^^,]f uss۱cӧO3Kϟ?p222RRR̓LeqAA#GN>={!C+yeffIwZ__WVVVVVv5]]]O>{Wd[ٳ\";JCCߗH$Vr ҥN0B㑑k9ѡ에Aa #FJW8qȑ#ۙɊ/_\fMy}CH$gVd'Ovss.vNy}}}2:uvvv:ٙ[n?K$?ԩ.Ӷq8?sviĉ3fv5B QK.Θ1ɓ'yyydִ2eŋjkk˞ H._K/ ^^^#F6L[[@ ;w3gРA!!!999ʕ+E|>A\̙3Ņiir3<L8qĈFv7o޵kO?$?dݻ׋ *(ٸq|||&O`nٲÇbqo阙544H'd<ɓ'Z[[-,,dv҃wڥ%㣏>Wp&&&KMMM@)eZ{Z{{e>]XXXnn/mZ|UBB (qi.i=U$7,xxNNNǏ/~i2`O(x|BVA %L("""JX^^L&@"""u\[ZZzyygdJJJ7nfJ=zUV---5jDGG?>P ǃJ<$O(xЫ]Яԭ[;wcǎ]hQKDD$YRգw+x +L u&ԦqΝpuzc4DDD,8S"### ,:ߤ>/'ܹ@\]qLDD}Է ZG /t:p98ηCD~kܼ֬nݺ سgOĉ˗/WkH*d ͕?,,p@DDnݺ4G/_,ͼ,`MѓE~>ݻ! [WiHљ4ǧQFʟg̘nݺS~_}`T x.RUt""1Lҵk׍7fff|aÜLLL\\\yϟ+?.]ʝ^::.dFl^%:{>v;w|ݺu4Zjqqq˖-p™3g|r|˖-/^1W#T x0üt[Bt""$''SNnѢE˖-/]ԬYUZYY8;;[՘xѢW)#^. ^Ot""1(᚛jժ֭[ښo_7oY?Eܺ5BNAD$ƽ{={G}С .;v @1HPt.^J}#A8pI֭l۶-((hm۶mܸIQRR@g/%Ջ6YoA DDnݺǏOAHNo=3` &"""m‡1؆m /DD-[ hDBDDO,l$Fn6A0{` cH[jAfhvD!""}_ ` l)H]N4oKIDevرSN8|Ÿyk֬`]vWnjddk``~e+`sF]%:i}X[[v1"";v6[]WrR}Tp":QQQƱ |̘1O< B9sE?(>|u ?r䈡/cǎ[[[+ߝ9sqÆ :t[n)'Nd>};::)g177o۶mHHÇWr=zsNӴi^RO z EEkٲ3/ B7oU+ǔRwqq5j߮]z*_޽{RJSL)Ǐs΍7ʗ޽Szz\.wwwׯ_1 ,"<<  ( _Yq4h #8":Iի>|hѢbllܣG2Yf S ˨T:utԩSK322)xP(A׭[WsNNNxxy3g^@A&?e/#[':IQrr kew*'䤦ZXXfBϷ,P(RSS\Tf+WVZ=}4;;VZg)xٳܯo-xW.-P}؛ЦÇ;wlaaaeeYrGp*钻wfffN8;w9vXYQ؃VTZٳ_ӧ LSȥK E5# 6L<[nK,133xbeUUkB&cBhҤh={z{{ϙ3FaiiY9&&&m۶ݽ{ŋ{>}zҤI[L&k׮ݑ#G̙ƍz)sɕ+WveT"(w}w_~ ;#,`"*U9\jhhXf͂nnn_}O>ѣǛoh"ssٳg3… }}}.]qƙ_xyyyyyM6ɓ'ϟg5+U+Saڱ _p!|U-7z"*},^}z*u5|S6jٳNNNҥK]vmݺήgϞ#Gl߾}͛7rqq={v^{7ntڵ^_ ӎiiicǎ]|yBBSPPЌ3dNl+1R}|zHDD:F}#ԎnԨQRRҥKw?|~a3/|~%I\.OKK+x|G n޼?O97޸pǏ YZ1C/c~oVAz""N Ւ1ਨ(BUnݺ޽{ssq~P ""mRbY΂V^-߉7g\Q> `#^(B[*УUn<5XHDDEXT VhDS;i .C|?BDDz? 4'p+PO.hM@ ܂-_5o5!""5ְ>㢃c5wS.袞3o@DD9VVlt &`&lR@ X,Ww>b SH}x֫ *TDD-x@.;E!""~-^LDD~/ DDXeg3:$hqaTDD-xp숎Kt""5,7`DDkX%艞(ѢNa1q#6BDD:'a4AxXDD'aT5vlt T&czt TK6B""Rp @G"""UIXTW$,a`BDD\1q36NADD\qZt""zQG۶m5d:/5^""1qXsY`… Qϳ HDE].I}'aiG1O?uЂpd˱\%"" bO~~~ݺuO:Uձ^GTd""^?;vuP:.QHh_ !HP\.OKK+ximm]ݴ{8¢ d^] ]5ygp ^^IB}$7)Ȼw,gkT0dL Hg,!bcc ^zykٹs{ŕWOaW 5 wAWT.G""I-]At""&-W|bŊk׮M>]t2Y9BDDIhmΟ?߷oߤ$fff[ly뭷JKRa\]BDD%S_HryZZZKkkk]vmѢE``qHHȎ;BBBWd2oVtۖK}!,x׫W˗/߼yX9[nW^MLL,~i𘉙Q #FXhEEDDxզM/6m;v,55U9D.s1zLTSBDDbH---LUŋ_|ibbiiiii{qLЎwcǎСC#Gc DDZztjIOdL+ZHt7ma{E!""X"%XDDD>,`ěrȏ DDi,`d-yA0a 6wE!""b &l>/ X< X< X8&"7,`I H߰b "`""=  XBb`&2yc,""}|` DDv%?e)22_LNNɱuwwիfRGJ (Z|+m۶\ҥKM6^zJ>}zիO4>RG2кp`E\sP[ȑ# ֪U+CCݻw˖-IIIǏwwwWG8E!"w xŊϿo>gg-Z! hiñzW`$: ^PAK xc$FxAzGǎ`'$٢Z333ԩ3jԨѣG7h@TB{Gz?b DDK.贴={:uJPxzz=zȐ!666ȡ&Z]ш>7q f)ǀݻu֭[^~ظO>Gѣ `x=':IXΝ  yW^L*|7QDD(`?/T(6m/`c0..BAz _um۶ݽ{J*VG *|'crMTbqHHHpps v:f̘~j&bELA7Ft""#`tvvrss4i2f̘#G֪UK9D7 5B8 WYIMMVڰaƌӦMu^t,˸;E!"/ _~cƌ\:V:SYj?h-: YEg lEHta500vYf=zT^'""fС\L$F7;IѡC;w<8??j?˗/7o޼r'==]cY|YJYw.44$&&QF;v;wn||FBgϞQQQqqqbcc[llٲ>ui49o-Y]]rr:Rdd}4o144Tc$B7f6fKYBJ.`cc㴴"߿ommHcll\x˗5@:< xS""mWr{{{S׮]`ҨQ}eeeeeei,t|>Xt""* xɒ%W\qss9s\._vǍ7.\@r<iӦݼy3(((99ۣFRqz EX$:_-Ξ=;<<<77g͚5SGHOOς/^l2吷W^FFFO>-~i2uoiY1&h": .|_䮜gϞYYY#RZZZlllK//nݺUjZj5nܸA/~i:vta$N_D!"eֿǏ?|pϞ=prZ튮,!+**SRRf̘qqU**&&f„ ֭۰aѣ'LPbʨ x_""-nРAǎ=<pZ\p,DD\NPaىp9I#!®N$: N4$> X|Oc:/I"",`1+R0E!"R=z Z1zTHGݦMI<]c DDT : VZ.]@&*Ol{Q J.cǎݸq̮ IDAT駟;vK rsf` DDT=044tÆ N3foAW^VAN X[t""'ٳg>}ZڵWqzXL+0Hŋ֭kѢE"/C6mŋoܸqƍSS?֭[UV3g:JS|:䐋BDDSr͟?ذa'n߾}ٲe/_1cƓ'O8o᭩*: 9uVjՊL^[ԫW/==]HUd}o`A"Eg!"\u OQ(6m[.Ǐ٩+03E!"|# 1e˖ {={.]k.ǎk۶sREfhvډHJk-:w\vvq۶m-ZԥKFFF&&&Z<\ @@DTzBxiժUe2: a g>xmejժiQ}7c_Kt""}WrܺuKO-G7`Zyo""MyRӦM/]u1_kAZ~ӧ%pJoBD$i"OªYfjjjvvQ;r9sCrrrV^ݸqp1.bwtEl HDw V޽?>99'%%E9u;aܻxWt""}T߿k̚5ѣ*ѡCر#66vǏ) NRLHH8~x߾}^2*oR(( >rrr={feeebb "44C~ǵkw@nnn֭|]ݻWHudR!p9\(Q""!iHW~7oA*7;~'"R^z3fxw4H8.~ɭ,c .:"wAլYSW7p9'-L`nci+DzdѢZ2MmxQy3锒="""((hƍɤB <-IDG.5kfggYZZ~߱\N}@5Zw nHD=rHu$mL TCsgD""%:>@vܹHLӧ{pCEDQVvvgϤj4 X%;Xt""pž}?gdd_txf0؂-k,ΊBD#^[yyyʟsss:M"ɩOEg!"%_L At 3|vWa DDZLe`]537G#"NJJRބ2-- { Ip$A+|}UUD!"V=Z&8eHj2YBD!"R/woԱ> _ᫎ yZJ""t|h : |!+9i6 x,: "G633~G$Qx>H˔8BbTB_K;s?E!":5#1=#Xt""U.hh+` H;#`MHދDD:'ۣ 苾v+ͼ,rbd!߅]KcoxNDkᚲw/Bwtu.'0I\>wc,A>!F0H 4N8/zg_f0+Y&$$c)LE"&0¡8^ۢA/a i^{-DDҕ8k] µˉL(zwщ$0+X7cu,rbJAlX 0A}C̳8{'.ЭE]u\N,`v9ٍk>øIA.rq?шnVm6e\N,`q;|[L~0H_8387F7t.\N,`1?.p s=<<233Ë׶m[;;lܸ1!!!22e˖/Lz9"t;G#7z^ D"}8gЩ3:{û!NW*_3f)"޽޿[{w (Z#D=^ZmІT 4򒒒.]jff6aщ1{6f'!-^T{>7roF4'ˍ{1KhSwٳj֬o5jD%ZS0e 0_>ćNp^=|(7p# Qq>qm0Zp[Óiii/Khu={޽{~m^~R<6L !8~ B]oxo:AtF|Ĝ(DE#:Bh}۠MTQ^Wj\{(22ӳe9 +++A5:zhIXD{Gq(ʨ,Np*lN|ao%K0|x1}q2. &/84Gh-[4\Rt$,77,ԴI&ׯ_WU$"e1#!NXNM*PUҴ`L C@0xld_+r/\IGzs4of𘈉Mє{M@[ZZzyyi\^甔ϗx0I]ԭc1 8cXe/:(xC-yl-}lc\wɸvv_Rsn4hfpBP34 TNP(bccw@Ϟ=ԩneeߧGCh b*$?G".C=5-Т1vRkvk1j8݅ە ? 'Cyyy*wA֭۾}kڷo?gΜMX&R<]s8͕{/?RzmVEn4hRKkpB -ё KMXDj%&n"61q1nfnps[4]܍G]-Cꣾr;7DC?c.LXX#G Z\N,`" {'h ܸHW+\롞 \ꡞ#K>,' d$ ! H{ HH@B<_ *TG▸cEb"x1 W),rb oMT&U)}Npڎpt`Ze;#$^xT*R? 6jF:ʢK\N,`"iBV<ĻAYH:iG}`jYom k+XY梿d #HOEj2S<}' ((<c;vF효Y FiJD.'0֑CuSlr#[<ԞH}HE,am3sWA*b#kX0V>|ef^%x!!/ i@F*Rӑgx\%,`ejVUZ5T_VEUckڰˉL\]q$(PمiH{rȕݜx,iHC^lS涁! JeT6 lanseˠԝJdIѓx.,~eTKx{9"Cq#`dlKhHZ&"c|zMIDATP_pLDD$ H0,`"""XDDD` &""LDD$ H0,`"""XDDD` &""LDD$ H0,`"""XDDD` &DG(A %LXa'x'""!pry@@ԩSEg!""R#`\VںYV^Ecu1K99y_\.OMMUNIMM02*+p"""!vss(߽{733sĉ6عsͱc4H---J&MҳgOoo9s0i)P(BCC رc*U||| аf͚E&i\.4hP˩S;xPDDD"iJDDD0,`"""XDDD` #QFUZ¢GW1633:thbb#;vLoիWWGJJJ 455dYo9V],S` lX 7n\\\\MC)G6,Sڳg;88'**JyTIjdcǖcRGEvÇҥK}||.^XF~?mٲ͛322> .XXXH9ҪU?0j1ܹYnZui7`k`ê<6E?>z:u\zu͚5aaaRmX񦦦ӧO{ƍ"##[l)Hـʕ+s ?8q+Wf̘rTvA߿v]tQtppݷo+?SkixLfo[f` GV]ʭ Xذ# ذa/W$uDR6_5k0IRp&L(J=t\rͭM޹s˗pNNN}&&&/_x$///sss ܾ}[i]i67`Iy={@fDWF6JKK~I*R$$ xׯ[N&{!p ӧ-Z((?>bX$-,,h4Ѐwӣ8ݽ}]RNMM ###555}}}/P|ѴP5҅☘V.݇gVMHPC92ߏwY19bLyyI}}FS bܺu ihhRqLf0VVV/_KJJHJojkk?~/4Նpaa!66fϞ=8&SIj966ގUՑtNNNMF"BBB2||Ἴ<6IPTɓ'</,,f#qE777==+W$&&lKKKcc7t:ĉ ,L= A\֭[]Z1(VytGNLL|Iee%6ҽxijQP&IRSSSGt*ҠFGGl6F344ܻwoOO|E?h4L&@*rsڵFD@H$K㾴Re*1(VytJOa?6-((ڼy3vvv^b$]+UA x@p 4`Ѐ@p 4`Ѐ@p 4`Ѐ@p 4`ЀPҥKgieweffӺrي#'OhP5" Tk.LXЀXYaa7N8455wubc[QAAAzzlEELz2"99Ͽr`@`eQQQ}Q^^;wLݻ_gggB lڴ AD"xo&LR]ۖOOO2loo8۵kvA"*++Տbh4LjiiQъ lQggju䬬Obܭtuu.MlYYV/..vttp8ŊT@Q*::zjjJENEqqq]]]V ..^ D"Q[[4?\zF a{SRRPuuumjj_~% 7o666rKJJt֭[×MAP;::(W^y]Ecbb Bcll<88888#l6###sݸqwQMI}}= mmmӧe2Yrr2Ӻ1ʄkٜK-,,b րvvv_}vDmĤ$l3%%E!Ͼ;vX\\6Ai{ݶmM"ɌVgQWWW(.駟RTujkkMTjgg'MIVV sss9992 ;-,,Tٴb@ |KZsY!!!+ \%hVVVVr-[`7BݻwСC+mll|}}oݺ%?PQ;;;> L&{v0b魷ޢP(؈^DD?΢mmmb2WDQt߾}``-%PU,..f2xxx*]~^Z"⒞ݭ:LMM*`@`eo񆷷Oll,DD"TĄ|Xq/HCdbbb~~Rq&''%I~~>IٳgB+ݲei4] (b-%HDH{{{ooodduΝA3g0 ::"(5: cw^04ںy֙FGG_0::ݨڦMPThRIIIǎ4 ㊃cccT*UөeffST*onVVvuZ$33s kARRRt311affF3`4>Y'H׮]?o߾<ÃG D"777:ۊ鹻WUUG*n`0Rmrsuu---i-ι(bעd2W 577 dǏbJOO߷o_HHÙIKKP(NRg?<000++ጏ$Isss?NӅBa{{; gϞ]شC9rD(ZXXOXw9??…={H$M@{.rmmP(UzG'%%577mxxfŽzzz$((hٜ</,,DX``F΀XYRRRdd$' rl6رc۶mSgڀ<444>>S]\\ݻgggwԩ?ÇXgRgȌlkkk%訨Č OOϰ0u&Tݡ&?...666^z8..L&+݊Fstttcbbzzz}||)JR*SSUl FW/@`oooicX|>{֛Kxx3g@djj:883>>ۻyfm-|ymM ׯ_OHHeh| G=zT(WVVj"r~Mk28p7a8 0%ON}IENDB`gpaw-24.1.0/doc/documentation/ofdft/ofdft.rst000066400000000000000000000561441454550013000211170ustar00rootroot00000000000000.. index:: OFDFT .. _ofdft: ============================================== Orbital-free Density Functional Theory (OFDFT) ============================================== This page introduces the orbital-free DFT method in a comprehensive way. If you are already familiar with the theory and want to learn how to use the orbital-free GPAW module you can skip the introduction and go directly to the section running the code. Theoretical introduction ======================== Orbital-based (Kohn-Sham) density functional theory --------------------------------------------------- Density functional theory (DFT) has become possibly the most popular method for electronic structure calculations. This is due to its balance between accuracy and computational cost. However, the success of DFT mostly relies on the introduction of the Kohn-Sham single-particle ansatz. [#kohn-sham]_ DFT, as formulated by Hohenberg and Kohn in their seminal paper, [#hohenberg-kohn]_ is an exact theory. In principle all the properties of a system of interacting electrons in an external potential (for example that determined by the charged atomic nuclei) can be derived from the knowledge of the electronic density `n` and the universal energy functional `E[n]` , where the electronic density can be obtained variationally as the density that minimizes `E[n]` . The general form of this functional is .. math:: E[n] = \langle \Psi | \hat{T} | \Psi \rangle + \langle \Psi | \hat{V} | \Psi \rangle, where `\hat{T}` and `\hat{V}` , are the kinetic and potential energy operators, respectively, and `|\Psi \rangle` is the many-body wave function. The exact kinetic energy functional is then .. math:: T[n] = \langle \Psi | \hat{T} | \Psi \rangle. In practice, the form of the universal density functional is unknown and we must rely on approximations. As we have already mentioned, introducing the Kohn-Sham single-particle ansatz is the most popular strategy to tackle this problem. Kohn and Sham proposed that the electronic density can be expressed as a sum of the density of a set of `N` non-interacting single-particle wave functions, also called orbitals: .. math:: n(\textbf{r}) = \sum_{i=1}^N | \psi_i (\textbf{r})|^2. The Kohn-Sham energy functional (in atomic units) now becomes .. math:: E_\text{KS} [n] = -\frac{1}{2} \sum_{i=1}^{N} \langle \psi_i | \nabla_i^2 | \psi_i \rangle + \int \text{d}\textbf{r} \, V_\text{ext} (\textbf{r}) \, n(\textbf{r}) + \frac{1}{2} \int \int \text{d}\textbf{r} \, \text{d}\textbf{r}' \, \frac{n (\textbf{r}) \, n (\textbf{r}')}{|\textbf{r} - \textbf{r}'|} + E_\text{xc} [n(\textbf{r})]. The first term, denoted Kohn-Sham kinetic energy functional `T_\text{s}[n]` , now depends explicitly on the orbitals. All the other terms, including the exchange-correlation term `E_\text{xc} [n]` , depend only implicitly on the orbitals, because the density is calculated from them. Applying a variational principle to the expression for the total Kohn-Sham energy (e.g. that it is minimal with respect to changes in the wave functions), this formulation in turns leads to `N` Kohn-Sham Schrödinger-like equations (one per orbital): .. math:: \hat{H}_\text{KS} \, \psi_i (\textbf{r}) = \epsilon_i \, \psi_i (\textbf{r}), that need to be solved in order to obtain the orbitals. The aim of orbital-free DFT is to avoid the need to solve the `N` equations by removing the explicit dependence of the kinetic energy term on the orbitals, effectively obtaining a kinetic energy functional `T[n]` that depends explicitly only on the density. The motivation for this objective is straightforward: by reducing the complexity of the problem from `N` particles to one "particle" the computational cost is greatly reduced. In particular, the scaling law for the time cost versus system size is reduced from cubic (Kohn-Sham DFT) to linear (orbital-free DFT). The question that follows is a no-brainer: if orbital-free DFT is so wonderful why is it not the standard implementation of DFT? Orbital-free density functional theory -------------------------------------- An orbital-free formulation of DFT is more in line with the original spirit of the Hohenberg-Kohn theorems, [#hohenberg-kohn]  whereby the universal energy functional can be cast in terms of the electronic density alone. The success of the Kohn-Sham method relies on the fact that it provides an accurate description of the kinetic energy, which is the leading term in the total energy. All the many-body effects neglected by the Kohn-Sham independent-particle formulation are "pushed" into the exchange-correlation energy functional, which is then estimated by e.g. local-density, generalized-gradient or hybrid-functional (which typically include Hartree-Fock exchange) approximations. Therefore the accuracy that can be achieved within the realm of orbital-free DFT calculations heavily depends on the quality of approximated orbital-free kinetic energy functionals. As a historically important development and to illustrate how critical the quality of the kinetic energy functional is, consider the kinetic energy functional of the non-interacting homogeneous electron gas, also known as the Thomas-Fermi kinetic functional: .. math:: T_\text{TF} = \frac{3}{10} (3 \pi^2)^{2/3} \int \text{d}\textbf{r} \, [ n (\textbf{r})]^{5/3}. When the Thomas-Fermi functional is used to represent the kinetic energy of electrons in matter, one obtains results that are quantitatively quite far from reality but, more importantly, are also qualitatively incorrect. For instance, DFT calculations based on the Thomas-Fermi functional fail to reproduce molecular bonding of simple diatomic molecules, such as H2, N2, O2, CO, etc. [#teller]_ On the other hand, calculations based on local-density approximations (LDAs) for the exchange-correlation functional (i.e. at the same level of approximation as the TF functional) used in combination with Kohn-Sham kinetic energies have been quite successful at describing qualitative and quantitative properties of matter, such as shell structure, molecular bonding, phase diagrams, elastic and structural properties, and so on. It becomes clear at this point that the prospects of orbital-free DFT becoming a successful electronic structure method rely of refining the approximation of the kinetic energy functional as an explicit functional of the density alone. We shall come back to this issue later on. Orbital-free implementation in GPAW: reusing a Kohn-Sham calculator ------------------------------------------------------------------- Many years of development and popularization of DFT have left us with a variety of efficient codes to solve the Kohn-Sham equations and an active community hungry for new functionals. It would then be a great advantage if OFDFT calculations could be carried out reusing the computational tools already available. Levy et al. [#levy]_ showed that it is possible to reformulate the orbital-free problem in such a convenient way. The total orbital-free (i.e. explicitly density-dependent) energy functional can be expressed as .. math:: E_\text{OF} [n] = \underbrace{\int \text{d}\textbf{r} \, n^{1/2} (\textbf{r}) \left( - \frac{1}{2} \nabla^2 \right) \, n^{1/2} (\textbf{r})}_{T_\text{W} [n]} + J[n] + V[n] + E_\text{xc} [n] + T_\text{s} [n] - T_\text{W} [n], where the first and last terms, known as the Weizsäcker functional, are just subtracting each other. `J[n]` and `V[n]` are the classical electrostatic energies due to electron-electron and electron-nuclei interactions, respectively, and `E_\text{xc}[n]` is the exchange-correlation energy functional, whose approximate form can correspond to any of the usual LDAs or GGAs developed for Kohn-Sham DFT available for GPAW. The kinetic energy functional `T_\text{s} [n]` is the non-interacting Kohn-Sham kinetic energy, and the last two terms combined are known as the Pauli functional, .. math:: T_\theta [n] = T_\text{s}[n] - T_\text{W} [n]. Levy et al. showed that a Kohn-Sham-like equation, derived variationally from the equation above, holds for the square root of the density: .. math:: \left( - \frac{1}{2} \nabla^2 + V_\text{eff}(\textbf{r}) \right) \, n^{1/2} (\textbf{r}) = \mu \, n^{1/2} (\textbf{r}), where `\mu` is the negative of the ionization energy. By making the equivalence between a single orbital and the square root of the density, .. math:: \psi_0 (\textbf{r}) = n^{1/2} (\textbf{r}) , with the condition that `\psi_0 (\textbf{r})` renormalizes to the total number of electrons in the system, i.e. .. math:: \int \text{d} \textbf{r} \, |\psi_0 (\textbf{r})|^2 = N , we can rewrite Levy's expression in terms of this orbital, .. math:: \left( - \frac{1}{2} \nabla^2 + V_\text{eff}(\textbf{r}) \right) \, \psi_0 (\textbf{r}) = \mu \, \psi_0 (\textbf{r}), and use GPAW's Kohn-Sham solver truncated to a single orbital with its occupancy set to the total number of electrons. [#lehtomaki]_ This effectively orbital-free equation can be solved self-consistently using GPAW's iterative algorithms originally designed to solve the Kohn-Sham equations. The development of accurate orbital-free kinetic functionals will focus on obtaining a close approximation to `T_\text{s} [n].` Historically, proposed orbital-free kinetic functionals incorporate only a fraction of the von Weizsäcker term, parametrized by `\lambda` and the full Thomas-Fermi contribution, or the other way around, where the Thomas-Fermi part is considered to be the correction to the Weizsäcker term. This dichotomy is known as the `\text{"} \lambda and \gamma` controversy". [#ludena]_ Both OF approximations to `T_\text{s} [n]` derive from the more general form .. math:: T_\text{s} [n] \approx \gamma T_\text{TF} [n] + \lambda T_\text{W} [n] , and the corresponding Pauli functional is .. math:: T_\theta [n] \approx \gamma T_\text{TF} [n] + (\lambda - 1) T_\text{W} [n]. Since one could choose to construct a kinetic functional which does not explicitly include the Thomas-Fermi part, Thomas-Fermi is only one among possible OF kinetic functionals, we can express in a more general form `T_\text{s} [n]` as .. math:: T_\text{s} [n] \approx T_\text{r} [n] + \lambda T_\text{W} [n], where r stands for "rest", referring to the approximation to the total kinetic functional minus the included fraction of Weizsäcker. In practice, the "rest" term will be included in the code as part of the definition of the exchange-correlation functional, and the Weizsäcker contribution will be included via an additional parameter (see the next subsection on "λ scaling"). This more general form leads to the Pauli functional expressed as .. math:: T_\theta [n] \approx T_\text{r} [n] + (\lambda - 1) T_\text{W} [n]. `\lambda` scaling When using only a fraction of the Weizsäcker term the orbital-free equation needs to be rearranged in the following way due to convergence issues and practicalities of the implementation: [#lehtomaki]_ .. math:: \left( - \frac{1}{2} \nabla^2 + \frac{1}{\lambda} V_\text{eff}' (\textbf{r}) \right) \, \psi_0 (\textbf{r}) = \frac{\mu}{\lambda} \, \psi_0 (\textbf{r}). The new modified effective potential has the form: .. math:: V_\text{eff}' (\textbf{r}) = \frac{\delta}{\delta n} \left( T_\text{r} [n] + J[n] + V[n] + E_\text{xc} [n] \right). Because of practical considerations, the term `T_\text{r} [n]` is included as part of a parametrized exchange-correlation energy functional when running GPAW's OFDFT module, as explained in detail in the section on running the code. Read through the next section to learn how the kinetic functional is defined in terms of how the present GPAW OFDFT implementation works. Construction and suitability of orbital-free kinetic energy functionals Although in principle any kinetic energy functional available from LibXC can be used to run OFDFT calculations in GPAW, we have only tested extensively a parametrized combination of Thomas-Fermi and von Weizsäcker, in combination with LDA exchange and correlation. On the list of ongoing research is the derivation of more accurate kinetic energy functionals. The recurrent (parametrized) form of the kinetic energy functional used in the examples below is .. math:: E_\text{OF} [n; \lambda , \gamma] = \lambda T_\text{W}[n] + \gamma T_\text{TF}[n] + J[n] + V[n] + E_\text{xc}^\text{PW}[n], where the fractions of Thomas-Fermi and von Weizsäcker are given by `\gamma` and `\lambda` , respectively, and the exchange-correlation energy functional is the Perdew-Wang LDA (although we could have chosen any other LDA of GGA functional). An extensive and detailed study on the performance of this parametrized functional for atoms has been presented in the paper by Espinosa Leal et al. [#espinosa]_ An important thing to note is that because of how the implementation is done in GPAW, your kinetic energy functional must always contain a fraction of Weizsäcker, `\lambda T_\text{W} [n]` , where `\lambda` is set by the use of the keyword tw_coeff. The definition of the remainder of the kinetic functional, `T_\text{r} [n] = T[n] - \lambda T_\text{W} [n]` , is done through the definition of the XC functional choosing a kinetic functional from those available in LibXC and prepending a number for the corresponding fraction to be incorporated into `T[n]` . For instance, in the example above, `T[n] - \lambda T_\text{W} [n] = \gamma T_\text{TF} [n]` . When defining this kinetic functional in GPAW, say for `\lambda = 0.2 , \, \gamma = 0.8` , we would do:: lambda = 0.2 gamma = '0.8' # Fraction of Weizsacker introduced through eigensolver definition eigensolver = CG(tw_coeff=lambda) # Fraction of Thomas-Fermi included in the definition of the XC functional xcname = gamma + '_LDA_K_TF+1.0_LDA_X+1.0_LDA_C_PW' A note on convergence --------------------- Convergence problems have been one of the historical obstacles to the development and spreading of OFDFT. Convergence instabilities of the self-consistency cycle have been attributed to the quality of the kinetic energy functional. [#karasiev]_ As a general rule, the more inaccurate the approximated orbital-free kinetic functional the more severe convergence problems will be. If you experience convergence problems, chances are that you are using an unreasonable approximation for the kinetic functional of your system. Running the OFDFT GPAW module ============================= Running GPAW's OFDFT module consists of two steps. The first thing to do is to generate the OFDFT PAW setups for each element and functional of interest. This needs to be done only once. The second step is to run the calculation itself. Both steps are described in detail below. Setup generation ---------------- Before an orbital-free calculation can be carried out the PAW setups need to be generated. Currently, only a 1s projector can be used for setup generation, but the plan is to extend this capability in the future to be able to use a more flexible basis. Below we give a code example to generate the setup for a N atom with `\lambda = 1, \, \gamma = 1` and Perdew-Wang LDA exchange-correlation. The code includes the optimum cutoff distances for the augmentation spheres for all the atoms in the first three rows of the periodic table. N and the different functional options can be replaced by the desired values. Note that the definition of the functional is done separately for the Weizsäcker part (through the tw_coeff keyword) and the rest (Thomas-Fermi in the present case) which is done through the definition of the exchange-correlation functional. The orbital-free mode is enabled through the option orbital_free=True. Also note that the parametrized exchange-correlation functionality allows to use a linear combination of the different exchange-correlation functionals available from LibXC by changing the number prepended. For instance, xcname='1.0_LDA_X+0.5_LDA_C_PW+0.5_LDA_C_PZ' would combine half Perdew-Wang with half Perdew-Zunger LDA exchange-correlation functionals. .. literalinclude:: setup_generator.py Running a simple OFDFT calculation ---------------------------------- Once the needed setups have been generated, an OFDFT calculation can be run similarly to any standard Kohn-Sham GPAW calculation. Remember to make the path where you saved your OFDFT setups available to GPAW via the setup_paths list, as in the example below, where we run a PAW calculation for a N atom. Also remember the name of your XC functional needs to match the name of the corresponding setup you generated. GPAW will recognize the setup as an OFDFT setup and the orbital-free mode will be automatically enabled. .. literalinclude:: N_ofdft.py Another example calculation --------------------------- Here you will learn how to run a GPAW OFDFT calculation for the binding energy of an N2 molecule. Any other GPAW method, as explained in the different GPAW tutorials, can also be used with OFDFT by employing the definition of the GPAW calculator detailed here. In the present example, our kinetic energy functional will be `T [n] = T_\text{W} + T_\text{TF}` , corresponding to `\lambda = 1, \, \gamma = 1` , and our XC functional will be Perdew-Wang LDA. Below you will find the steps you need to follow in this tutorial. Generate the setups Follow the instructions given in the section on running OFDFT for PAW setup generation for N. This should generate a file called "N.lambda_1.0.1.0_LDA_K_TF+1.0_LDA_X+1.0_LDA_C_PW" in your current directory, which contains the OFDFT PAW setup information for N generated with the desired energy functional. Run the grid calculations We will now run grid calculations for atomic N and molecular N2 using the setup generated in the preceding step. For the atomic calculation use the code given in the second part of the running OFDFT section. The energy calculated by GPAW on the grid is given with respect to the total energy of the atomic calculation done during setup generation, and should be close to zero. For the N2 molecule, we first need to optimize the bond length. In order to do so, plot the system's energy as a function of interatomic distance and look for the minimum, for instance by adding the following loop to your script (since the experimental bond length is about 1.098 Å we will start searching in that region):: for d in [0.9, 1.0, 1.1, 1.2, 1.3, 1.4]: molecule = Atoms('N2', positions=([c - d/2, c,c], [c+d/2,c,c]), cell=(a,a,a)) We plot the output, which looks like this: .. image:: n2.png :align: center Since errors with OFDFT can be quite large, the initial range was very wide (between 0.9 Å and 1.4 Å), and a 4th-order polynomial is required to fit the data. The analysis reveals that for this particular energy functional the equilibrium bond length of N2 is close to 1.2 Å, which allows us to refine the search by adding further data points around that value: .. image:: finen2.png :align: center Refining the range allows us to establish the interatomic distance in :mol:`N_2` at approximately 1.229 Å for this functional, about +12% error compared to the experimental value. The energy calculation for :mol:`N_2 is thus performed for this value. The results are summarized in the table below: System Energy (eV) Bond length (Å) N 0.00723 n/a N2 -13.25142 1.229 The binding energy, ` 2 E(\text{N}) - E(\text{N$_2$})` is 13.266 eV. For reference, the experimental value is about 9.79 eV. Accuracy -------- For PAW calculations, as well as for pseudopotentials, the formalism itself will introduce (hopefully small) errors compared to full potential calculations, often referred to as "all-electron calculations" in the context of the frozen-core formalism. These errors also affect usual Kohn-Sham calculations, not only OFDFT. In the case of GPAW, the main sources of error will be the cutoff of the augmentation spheres and the grid spacing, both in the radial grid for atomic setup generation and the regular grid for PAW calculations. For N2 there is a full potential OFDFT calculation that we can use for reference, by Chan et al. [#chan]_ Since this reference calculation did not include correlation, in order to compare our result to Chan's we need to perform the same calculation as above removing the 1.0_LDA_C_PW from the definition of the XC functional, which yields 12.602 eV for our PAW calculation. The reference full-potential binding energy from Chan is 12.599 eV (0.463 Hartree) giving a deviation of only 0.004 eV. As is the case for Kohn-Sham PAW calculations in GPAW, one would also need to check the effect of varying the cutoff radius of the setups and the grid spacing on the results. For the comparison with Chan's reference binding energy of N2, the table below summarizes the effect on the PAW calculation of changing the cutoff during setup generation while keeping the other parameters unchanged: Rcut (Bohr) N2 binding energy deviation (eV) 0.9 -0.091 1.0 -0.075 1.1 -0.031 1.2 -0.004 Note that as the cutoff is reduced the potential becomes harder (less smooth) and a finer grid would be required to keep the error small in the PAW calculation (this roughly corresponds to increasing the size of the basis in plane-waves calculations). The "default" values given in the scripts of the present tutorial correspond to our own optimization of these values, but depending on your own requirements for accuracy and your specific system under study you might have to consider optimizing these values yourself. A much larger source of error than the technical parameters discussed above, and even more so for OFDFT calculations, is the choice of functional. For a test on the performance of different possible kinetic functionals, you can vary the values of `\lambda` and `\gamma` and repeat the calculation, in order to check how the choice of OF functional affects the values of bond length and binding energy. Citation information -------------------- If you use GPAW's OFDFT module for the compilation of published work, remember to add (in addition to the general GPAW and PAW references) a citation to the implementation paper [#lehtomaki]_ . References ---------- .. [#kohn-sham] W. Kohn and L. J. Sham, Self-Consistent Equations Including Exchange and Correlation Effects, Phys. Rev. 140, A1133 (1965). .. [#hohenberg-kohn] P. Hohenberg and W. Kohn, Inhomogeneous Electron Gas, Phys. Rev. 136, B864 (1964). .. [#teller] E. Teller, On the Stability of Molecules in the Thomas-Fermi Theory, Rev. Mod. Phys. 34, 627 (1962). .. [#levy] M. Levy, J. P. Perdew, and V. Sahni, Exact differential equation for the density and ionization energy of a many-particle system, Phys. Rev. A 30, 2745 (1984). .. [#lehtomaki] J. Lehtomäki, I. Makkonen, M. A. Caro, A. Harju, and O. Lopez-Acevedo, Orbital-free density functional theory implementation with the projector augmented-wave method, J. Chem. Phys. 141, 234102 (2014). .. [#espinosa] L. A. Espinosa Leal, A. Karpenko, M. A. Caro, and O. Lopez-Acevedo, Optimizing a parametrized Thomas-Fermi-Dirac-Weizsäcker density functional for atoms, Phys. Chem. Chem. Phys. , (2015) :doi:`10.1039/C5CP01211B`. .. [#karasiev] V. V. Karasiev, S. B. Trickey, Issues and challenges in orbital-free density functional calculations, Comp. Phys. Comm. 183, 2519 (2012). .. [#chan] G. K.-L. Chan, A. J. Cohen and N. C. Handy, Thomas-Fermi-Dirac-von Weizsäcker models in finite systems, J. Chem. Phys. 114, 631 (2001). .. [#ludena] E. V. Ludeña and V. V. Karasiev, Kinetic energy functionals: history, challenges and prospects, in Reviews of Modern Quantum Chemistry Vol. 1, pp612 (World Scientific, Singapore, 2002). gpaw-24.1.0/doc/documentation/ofdft/setup_generator.py000066400000000000000000000021351454550013000230320ustar00rootroot00000000000000from gpaw.atom.generator import Generator # List of elements for which setups will be generated elements = ['N'] # Fraction of Weizsacker lambda_coeff = 1.0 # Fraction of Thomas-Fermi gamma_coeff = 1.0 # Select optimum cutoff and grid for symbol in elements: gpernode = 800 if symbol == 'H': rcut = 0.9 elif symbol in ['He' or 'Li']: rcut = 1.0 elif symbol in ['Be', 'B', 'C', 'N', 'O', 'F', 'Ne']: rcut = 1.2 elif symbol in ['Na', 'Mg', 'Al', 'Si', 'P', 'S', 'Cl', 'Ar']: rcut = 1.4 else: rcut = 1.0 # If the lambda scaling is used change name to differentiate the setup name = f'lambda_{lambda_coeff}' # Use of Kinetic functional (minus the Tw contribution) inside the # xc definition pauliname = f'{gamma_coeff}_LDA_K_TF+1.0_LDA_X+1.0_LDA_C_PW' # Calculate OFDFT density g = Generator(symbol, xcname=pauliname, scalarrel=False, orbital_free=True, tw_coeff=lambda_coeff, gpernode=gpernode) g.run(exx=False, name=name, rcut=rcut, write_xml=True) gpaw-24.1.0/doc/documentation/orthogonalization.rst000066400000000000000000000046421454550013000224610ustar00rootroot00000000000000.. _orthogonalization: ================================== Orthogonalizing the wave functions ================================== Let `\tilde{\Psi}_{nG}` be an element of a wave function matrix holding the value of `\tilde{\psi}_{n}(\mathbf{r}_G)` (state number `n` and grid point number `G`). Then we can write the :ref:`orthogonality requirement ` like this: .. math:: \Delta v \tilde{\mathbf{\Psi}}^T \hat{\mathbf{O}} \tilde{\mathbf{\Psi}} = \mathbf{1}, where `\Delta v` is the volume per grid point and .. math:: \hat{\mathbf{O}} = \mathbf{1} + \sum_a \tilde{\mathbf{P}}^a \mathbf{\Delta O}^a (\tilde{\mathbf{P}}^a)^T \Delta v is the matrix form of the overlap operator. This matrix is very sparse because the projector functions `\tilde{P}^a_{iG} = \tilde{p}^a_i(\mathbf{r}_G - \mathbf{R}^a)` are localized inside the augmentation spheres. The `\Delta O^a_{i_1i_2}` atomic PAW overlap corrections are small `N_p^a \times N_p^a` matrices (`N_p^a \sim 10`) defined :ref:`here `. Gram-Schmidt procedure ====================== The traditional sequential Gram-Schmidt orthogonalization procedure is not very efficient, so we do some linear algebra to allow us to use efficient matrix-matrix products. Let `\tilde{\mathbf{\Psi}}_0` be the non-orthogonal wave functions. We calculate the overlap matrix: .. math:: \mathbf{S} = \Delta v \tilde{\mathbf{\Psi}}_0^T \hat{\mathbf{O}} \tilde{\mathbf{\Psi}}_0, from the raw overlap `\tilde{\mathbf{\Psi}}_0^T \tilde{\mathbf{\Psi}}_0` and the projections `(\tilde{\mathbf{P}}^a)^T \tilde{\mathbf{\Psi}}_0`. This can be Cholesky factored into `\mathbf{S} = \mathbf{L}^T \mathbf{L}` and we can get the orthogonalized wave functions as: .. math:: \tilde{\mathbf{\Psi}} = \tilde{\mathbf{\Psi}}_0 \mathbf{L}^{-1}. Parallelization =============== The orthogonalization can be paralleized over **k**-points, spins, domains, and bands. **k**-points and spins ---------------------- Each **k**-point and each spin can be treated separately. Domains ------- Each domain will have its contribution to the overlap matrix, and these will have to be summed up using the domain communicator. The dense linear algebra can be performed in a replication fashion on all MPI tasks using LAPACK or in parallel on a subset of MPI tasks using ScaLAPACK. Bands ----- Band parallelization is described at :ref:`Band parallelization `. gpaw-24.1.0/doc/documentation/parallel_runs/000077500000000000000000000000001454550013000210125ustar00rootroot00000000000000gpaw-24.1.0/doc/documentation/parallel_runs/band_parallelization/000077500000000000000000000000001454550013000251705ustar00rootroot00000000000000gpaw-24.1.0/doc/documentation/parallel_runs/band_parallelization/band_parallelization.rst000066400000000000000000000030551454550013000321030ustar00rootroot00000000000000.. _band_parallelization: ==================== Band parallelization ==================== The orthogonalization can be paralleized over **k**-points, spins, domains (see :ref:`orthogonalization`), and bands, described below. Let's say we split the bands in five groups and give each group of wave functions to one of five processes: The overlap matrix contains 5x5 blocks. These are the steps:: rank: 1 2 3 4 5 A . . . . . B . . . . . C . . . . . . . . . . . . . . . . . . A . . . . . B . . . . . C . . . . . . S: . . . . . . . . . . . . A . . . . . B . . . . . C C . . . . . . . . . . . . . . . . . A . . . . . B B . . . . . C . . . . . . . . . . . . . . . . . A A. Each process calculates its block in the diagonal and sends a copy of its wave functions to the right (rank 5 sends to rank 1). B. Rank 1 now has the wave functions from rank 5, so it can do the row 5, column 1 block of `\mathbf{S}`. Rank 2 can do the row 1, column 2 block and so on. Shift wave functions to the right. C. Rank 1 now has the wave functions from rank 4, so it can do the row 4, column 1 block of `\mathbf{S}` and so on. Since `\mathbf{S}` is symmetric, we have all we need:: A B C . . . A B C . . . A B C C . . A B B C . . A With `B` blocks, we need `(B - 1) / 2` shifts. Now we can calculate `\mathbf{L}^{-1}` and do the matrix product `\tilde{\mathbf{\Psi}}_0 \mathbf{L}^{-1}` which requires `B - 1` shifts of the wave functions. gpaw-24.1.0/doc/documentation/parallel_runs/gpaw-qsub.dtu-databar000077500000000000000000000030561454550013000250410ustar00rootroot00000000000000#!/usr/bin/env python # This version of gpaw-qsub works in the DTU databar as per August 2012. from optparse import OptionParser import os import sys queue='hpc' # Other option is app maxcores = 8 p = OptionParser() p.disable_interspersed_args() p.add_option('-p', '--procs', type=int, default=1, help='number of processes') p.add_option('-t', '--time', type=int, default=72, help='max running time in hours') opts, args = p.parse_args() # How many nodes? if opts.procs <= maxcores: # Single node nodeclause = "nodes=1:ppn="+str(opts.procs) print "Job will be submitted to a single nodes with %i cores." % (opts.procs,) elif opts.procs % maxcores == 0: # Use an integer number of nodes nodeclause = "nodes=%i:ppn=%i" % (opts.procs // maxcores, maxcores) print "Job will be submitted to %i nodes each with %i cores." % (opts.procs // maxcores, maxcores) else: whole = opts.procs // maxcores remainder = opts.procs % maxcores nodeclause = "nodes=%i:ppn=%i+1:ppn=%i" % (whole, maxcores, remainder) print "Job will be submitted to %i nodes with %i cores, and one node with %i cores. THIS IS NOT OPTIMAL." % (whole, maxcores, remainder) print "Node clause is:", nodeclause jobdir = os.getcwd() #jobname = os.path.join(os.getcwd(), args[0]) jobname = args[0] qsub = '''#!/bin/bash cd %s mpirun gpaw-python %s %s''' % (jobdir, jobname, ' '.join(args[1:])) pipe = os.popen('qsub -N %s -q %s -l %s -l walltime=%i:00:00' % (jobname, queue, nodeclause, opts.time), 'w') pipe.write(qsub) pipe.close() gpaw-24.1.0/doc/documentation/parallel_runs/parallel_atomization.py000066400000000000000000000024351454550013000256020ustar00rootroot00000000000000"""This script calculates the atomization energy of nitrogen using two processes, each process working on a separate system.""" from gpaw import GPAW, mpi import numpy as np from ase import Atoms cell = (8., 8., 8.) p = 4. separation = 1.103 rank = mpi.world.rank # Master process calculates energy of N, while the other one takes N2 if rank == 0: system = Atoms('N', [(p, p, p)], magmoms=[3], cell=cell) elif rank == 1: system = Atoms('N2', [(p, p, p + separation / 2.), (p, p, p - separation / 2.)], cell=cell) else: raise Exception('This example uses only two processes') # Open different files depending on rank output = '%d.txt' % rank calc = GPAW(mode='fd', communicator=[rank], txt=output, xc='PBE') system.calc = calc energy = system.get_potential_energy() # Now send the energy from the second process to the first process, if rank == 1: # Communicators work with arrays from Numeric only: mpi.world.send(np.array([energy]), 0) else: # The first process receives the number and prints the atomization energy container = np.array([0.]) mpi.world.receive(container, 1) # Ea = E[molecule] - 2 * E[atom] atomization_energy = container[0] - 2 * energy print(f'Atomization energy: {atomization_energy:.4f} eV') gpaw-24.1.0/doc/documentation/parallel_runs/parallel_runs.rst000066400000000000000000000363541454550013000244220ustar00rootroot00000000000000.. _parallel_runs: ============= Parallel runs ============= .. toctree:: :maxdepth: 1 .. _parallel_running_jobs: Running jobs in parallel ======================== Parallel calculations are done primarily with MPI. The parallelization can be done over the **k**-points, bands, and using real-space domain decomposition. The code will try to make a sensible domain decomposition that match both the number of processors and the size of the unit cell. This choice can be overruled, see :ref:`manual_parallelization_types`. Complementary OpenMP parallelization can improve the performance in some cases, see :ref:`manual_openmp`. Before starting a parallel calculation, it might be useful to check how the parallelization corresponding to the given number of processes would be done with the ``--dry-run=N`` command line option:: $ gpaw python --dry-run=8 script.py The output will contain also the "Calculator" RAM Memory estimate per process. In order to run GPAW in parallel, you do one of these two:: $ mpiexec -n gpaw python script.py $ gpaw -P python script.py $ mpiexec -n python3 script.py The first two are the recommended ones: The *gpaw* script will make sure that imports are done in an efficient way. .. tip:: You can use the :envvar:`GPAW_MPI_OPTIONS` to pass options to ``mpiexex``. Example:: GPAW_MPI_OPTIONS="--oversubscribe" .. envvar:: GPAW_MPI_OPTIONS Options for ``mpiexec``. Submitting a job to a queuing system ==================================== You can write a shell-script that contains this line:: mpiexec gpaw python script.py and then submit that with ``sbatch``, ``qsub`` or some other command. Alternatives: * If you are on a SLURM system: use the :ref:`sbatch ` sub-command of the ``gpaw`` command-line tool:: $ gpaw sbatch -- [sbatch options] script.py [script options] * Use MyQueue_:: $ mq submit "script.py [script options]" -R * Write you own *submit* script. See this example: :git:`doc/platforms/gbar/qsub.py`. .. _MyQueue: https://myqueue.readthedocs.io/ Alternative submit tool ======================= Alternatively, the script gpaw-runscript can be used, try:: $ gpaw-runscript -h to get the architectures implemented and the available options. As an example, use:: $ gpaw-runscript script.py 32 to write a job sumission script running script.py on 32 cpus. The tool tries to guess the architecture/host automatically. By default it uses the following environment variables to write the runscript: =============== =================================== variable meaning =============== =================================== HOSTNAME name used to assing host type PYTHONPATH path for Python GPAW_SETUP_PATH where to find the setups GPAW_MAIL where to send emails about the jobs =============== =================================== Writing to files ================ Be careful when writing to files in a parallel run. Instead of ``f = open('data', 'w')``, use: >>> from ase.parallel import paropen >>> f = paropen('data', 'w') Using ``paropen``, you get a real file object on the master node, and dummy objects on the slaves. It is equivalent to this: >>> from ase.parallel import world >>> if world.rank == 0: ... f = open('data', 'w') ... else: ... f = open('/dev/null', 'w') If you *really* want all nodes to write something to files, you should make sure that the files have different names: >>> from ase.parallel import world >>> f = open('data.{}'.format(world.rank), 'w') Writing text output =================== Text output written by the ``print`` statement is written by all nodes. To avoid this use: >>> from ase.parallel import parprint >>> print('This is written by all nodes') >>> parprint('This is written by the master only') which is equivalent to >>> from ase.parallel import world >>> print('This is written by all nodes') >>> if world.rank == 0: ... print('This is written by the master only') .. _different_calculations_in parallel: Running different calculations in parallel ========================================== A GPAW calculator object will per default distribute its work on all available processes. If you want to use several different calculators at the same time, however, you can specify a set of processes to be used by each calculator. The processes are supplied to the constructor, either by specifying an :ref:`MPI Communicator object `, or simply a list of ranks. Thus, you may write:: from gpaw import GPAW import gpaw.mpi as mpi # Create a calculator using ranks 0, 3 and 4 from the mpi world communicator ranks = [0, 3, 4] comm = mpi.world.new_communicator(ranks) if mpi.world.rank in ranks: calc = GPAW(communicator=comm, ...) ... Be sure to specify different output files to each calculator, otherwise their outputs will be mixed up. Here is an example which calculates the atomization energy of a nitrogen molecule using two processes: .. literalinclude:: parallel_atomization.py .. _manual_parallelization_types: .. _manual_parallel: Parallelization options ======================= In version 0.7, a new keyword called ``parallel`` was introduced to provide a unified way of specifying parallelization-related options. Similar to the way we :ref:`specify convergence criteria ` with the ``convergence`` keyword, a Python dictionary is used to contain all such options in a single keyword. The default value corresponds to this Python dictionary:: {'kpt': None, 'domain': None, 'band': None, 'order': 'kdb', 'stridebands': False, 'augment_grids': False, 'sl_auto': False, 'sl_default': None, 'sl_diagonalize': None, 'sl_inverse_cholesky': None, 'sl_lcao': None, 'sl_lrtddft': None, 'use_elpa': False, 'elpasolver': '2stage', 'buffer_size': None} In words: * ``'kpt'`` is an integer and denotes the number of groups of k-points over which to parallelize. k-point parallelization is the most efficient type of parallelization for most systems with many electrons and/or many k-points. If unspecified, the calculator will choose a parallelization itself which maximizes the k-point parallelization unless that leads to load imbalance; in that case, it may prioritize domain decomposition. Note: parallelization over spin is not possible in :ref:`GPAW 20.10.0 and newer versions `. * The ``'domain'`` value specifies either an integer ``n`` or a tuple ``(nx,ny,nz)`` of 3 integers for :ref:`domain decomposition `. If not specified (i.e. ``None``), the calculator will try to determine the best domain parallelization size based on number of kpoints etc. * The ``'band'`` value specifies the number of parallelization groups to use for :ref:`band parallelization `. If not specified (i.e. ``None``), the calculator will try to determine the best band parallelization size based on number of kpoints etc. * ``'order'`` specifies how different parallelization modes are nested within the calculator's world communicator. Must be a permutation of the characters ``'kdb'`` which is the default. The characters denote k-point, domain or band parallelization respectively. The last mode will be assigned contiguous ranks and thus, depending on network layout, probably becomes more efficient. Usually for static calculations the most efficient order is ``'kdb'`` whereas for TDDFT it is ``'kbd'``. * The ``'stridebands'`` value only applies when band parallelization is used, and can be used to toggle between grouped and strided band distribution. * If ``'augment_grids'`` is ``True``, all cores will be used for XC/Poisson solver. When parallelizing over k-points or bands, in the planewave mode, and using ScaLAPACK, setting ``'augment_grids'`` to True will make use of all cores including those for k-point and band parallelization. * If ``'sl_auto'`` is ``True``, ScaLAPACK will be enabled with automatically chosen parameters and using all available CPUs. * The other ``'sl_...'`` values are for using ScaLAPACK with different parameters in different operations. Each can be specified as a tuple ``(m,n,mb)`` of 3 integers to indicate an ``m*n`` grid of CPUs and a block size of ``mb``. If any of the three latter keywords are not specified (i.e. ``None``), they default to the value of ``'sl_default'``. Presently, ``'sl_inverse_cholesky'`` must equal ``'sl_diagonalize'``. * If the Elpa library is installed, enable it by setting ``use_elpa`` to ``True``. Elpa will be used to diagonalize the Hamiltonian. The Elpa distribution relies on BLACS and ScaLAPACK, and hence can only be used alongside ``sl_auto``, ``sl_default``, or a similar keyword. Enabling Elpa is highly recommended as it significantly speeds up the diagonalization step. See also :ref:`lcao`. * ``elpasolver`` indicates which solver to use with Elpa. By default it uses the two-stage solver, ``'2stage'``. The other allowed value is ``'1stage'``. This setting will only have effect if Elpa is enabled. * The ``'buffer_size'`` is specified as an integer and corresponds to the size of the buffer in KiB used in the 1D systolic parallel matrix multiply algorithm. The default value corresponds to sending all wavefunctions simultaneously. A reasonable value would be the size of the largest cache (L2 or L3) divide by the number of MPI tasks per CPU. Values larger than the default value are non-sensical and internally reset to the default value. .. note:: With the exception of ``'stridebands'``, these parameters all have an equivalent command line argument which can equally well be used to specify these parallelization options. Note however that the values explicitly given in the ``parallel`` keyword to a calculator will override those given via the command line. As such, the command line arguments thus merely redefine the default values which are used in case the ``parallel`` keyword doesn't specifically state otherwise. .. _manual_parsize_domain: Domain decomposition -------------------- Any choice for the domain decomposition can be forced by specifying ``domain`` in the ``parallel`` keyword. It can be given in the form ``parallel={'domain': (nx,ny,nz)}`` to force the decomposition into ``nx``, ``ny``, and ``nz`` boxes in x, y, and z direction respectively. Alternatively, one may just specify the total number of domains to decompose into, leaving it to an internal cost-minimizer algorithm to determine the number of domains in the x, y and z directions such that parallel efficiency is optimal. This is achieved by giving the ``domain`` argument as ``parallel={'domain': n}`` where ``n`` is the total number of boxes. .. tip:: ``parallel={'domain': world.size}`` will force all parallelization to be carried out solely in terms of domain decomposition, and will in general be much more efficient than e.g. ``parallel={'domain': (1,1,world.size)}``. You might have to add ``from gpaw.mpi import world`` to the script to define ``world``. .. _manual_parsize_bands: Band parallelization -------------------- Parallelization over Kohn-Sham orbitals (i.e. bands) becomes favorable when the number of bands `N` is so large that `\mathcal{O}(N^2)` operations begin to dominate in terms of computational time. Linear algebra for orthonormalization and diagonalization of the wavefunctions is the most noticeable contributor in this regime, and therefore, band parallelization can be used to distribute the computational load over several CPUs. This is achieved by giving the ``band`` argument as ``parallel={'band': nbg}`` where ``nbg`` is the number of band groups to parallelize over. .. tip:: Whereas band parallelization in itself will reduce the amount of operations each CPU has to carry out to calculate e.g. the overlap matrix, the actual linear algebra necessary to solve such linear systems is in fact still done using serial LAPACK by default. It is therefor advisable to use both band parallelization and ScaLAPACK in conjunction to reduce this potential bottleneck. More information about these topics can be found here: .. toctree:: :maxdepth: 1 band_parallelization/band_parallelization .. _manual_ScaLAPACK: ScaLAPACK --------- ScaLAPACK improves performance of calculations beyond a certain size. This size depends on whether using FD, LCAO, or PW mode. In FD or PW mode, ScaLAPACK operations are applied to arrays of size nbands by nbands, whereas in LCAO mode, the arrays are generally the number of orbitals by the number of orbitals and therefore larger, making ScaLAPACK particularly important for LCAO calculations. With LCAO, it starts to become an advantage to use ScaLAPACK at around 800 orbitals which corresponds to about 50 normal (non-hydrogen, non-semicore) atoms with standard DZP basis set. In FD mode, calculations with nbands > 500 will benefit from ScaLAPACK; otherwise, the default serial LAPACK might as well be used. The ScaLAPACK parameters are defined using the parallel keyword dictionary, e.g., ``sl_default=(m, n, block)``. A block size of 64 has been found to be a universally good choice both in all modes. In LCAO mode, it is normally best to assign as many cores as possible, which means that ``m`` and ``n`` should multiply to the total number of cores divided by the k-point parallelization. For example with 128 cores and parallelizing by 4 over k-points, there are 32 cores per k-point available per scalapack and a sensible choice is ``m=8``, ``n=4``. You can use ``sl_auto=True`` to make such a choice automatically. In FD or PW mode, a good guess for these parameters on most systems is related to the numbers of bands. We recommend for FD/PW:: mb = 64 m = floor(sqrt(nbands/mb)) n = m There are a total of four ``'sl_...'`` keywords. Most people will be fine just using ``'sl_default'`` or even ``'sl_auto'``. Here we use the same ScaLAPACK parameters in three different places: i) general eigensolve in the LCAO intilization ii) standard eigensolve in the FD calculation and iii) Cholesky decomposition in the FD calculation. It is currently possible to use different ScaLAPACK parameters in the LCAO initialization and the FD calculation by using two of the ScaLAPACK keywords in tandem, e.g:: GPAW(..., parallel={'sl_lcao': (p, q, p), 'sl_default': (m, n, mb)}) where ``p``, ``q``, ``pb``, ``m``, ``n``, and ``mb`` all have different values. The most general case is the combination of three ScaLAPACK keywords. Note that some combinations of keywords may not be supported. .. _manual_openmp: Hybrid OpenMP/MPI parallelization --------------------------------- In some hardware the performance of large FD and LCAO and calculations can be improved by using OpenMP parallelization in addition to MPI. When GPAW is built with OpenMP support, hybrid parallelization is enabled by setting the OMP_NUM_THREADS environment variable:: export OMP_NUM_THREADS=4 mpiexec -n 512 gpaw python script.py This would run the calculation with a total of 2048 CPU cores. As the optimum MPI task / OpenMP thread ratio depends a lot on the particular input and underlying hardware, it is recommended to experiment with different settings before production calculations. gpaw-24.1.0/doc/documentation/paw_note/000077500000000000000000000000001454550013000177635ustar00rootroot00000000000000gpaw-24.1.0/doc/documentation/paw_note/make_paw_note.py000066400000000000000000000020161454550013000231450ustar00rootroot00000000000000# creates: paw_note.pdf import os import shutil import subprocess from pathlib import Path import matplotlib.pyplot as plt from gpaw.atom.aeatom import AllElectronAtom ae = AllElectronAtom('Pt') ae.run() ae.plot_wave_functions(show=False) rcut = 2.5 lim = [0, 3.5, -2, 4] plt.plot([rcut, rcut], lim[2:], 'k--', label='_nolegend_') plt.axis(lim) plt.text(0.6, 2, '[Pt] = [Xe]4f$^{14}$5d$^9$6s$^1$') plt.savefig('Pt.png', dpi=80) dir = os.environ.get('PDF_FILE_DIR') if dir: shutil.copyfile(Path(dir) / 'paw_note.pdf', 'paw_note.pdf') else: try: subprocess.run( 'pdflatex -interaction=nonstopmode paw_note > /dev/null && ' 'bibtex paw_note > /dev/null && ' 'pdflatex -interaction=nonstopmode paw_note > /dev/null && ' 'pdflatex -interaction=nonstopmode paw_note > /dev/null', shell=True, check=True) except subprocess.CalledProcessError: subprocess.run('echo "No pdflatex" > paw_note.pdf', shell=True) subprocess.run(['cp', 'paw_note.pdf', '..']) gpaw-24.1.0/doc/documentation/paw_note/paw_note.tex000066400000000000000000002501341454550013000223260ustar00rootroot00000000000000\documentclass[a4paper]{article} \usepackage{amsmath} \usepackage{graphicx} %\usepackage{a4wide} \newcommand{\gpaw}{\textsc{gpaw}} \newcommand{\trans}[1]{{#1}^{\mbox{\tiny $T$}}} \newcommand{\f}[1]{\mathbf{#1}} \newcommand{\fs}[1]{\mathbf{\tilde{#1}}} \newcommand{\s}[1]{\tilde{#1}} \newcommand{\ws}[1]{\widetilde{#1}} \newcommand{\h}[1]{\hat{#1}} \newcommand{\wh}[1]{\widehat{#1}} \newcommand{\ext}{\text{ext}} \newcommand{\br}{\mathbf{r}} \newcommand{\rr}{|\mathbf{r} - \mathbf{r}'|} \newcommand{\bk}{\mathbf{k}} \newcommand{\bR}{\mathbf{R}} \newcommand{\T}{\hat{\mathcal{T}}} \newcommand{\Z}{\mathcal{Z}} \newcommand{\Ham}{\widehat{H}} \newcommand{\bra}[1]{\langle #1 |} \newcommand{\ket}[1]{| #1 \rangle} \newcommand{\braket}[2]{\langle #1 | #2 \rangle} \newcommand{\psit}{\tilde{\psi}} \newcommand{\phit}{\tilde{\phi}} \newcommand{\pt}{\tilde{p}} \title{\vspace*{-21mm}% \textbf{The Projector Augmented-wave Method}} \author{Carsten Rostgaard} \date{February 3, 2010} \begin{document} \thispagestyle{empty} \maketitle \begin{abstract} The purpose of this text is to give a self-contained description of the basic theory of the projector augmented-wave (PAW) method, as well as most of the details required to make the method work in practice. These two topics are covered in the first two sections, while the last is dedicated to examples of how to apply the PAW transformation when extracting non-standard quantities from a density-functional theory (DFT) calculation. The formalism is based on Bl{\"o}chl's original formulation of PAW \cite{Blochl1994}, and the notation and extensions follow those used and implemented in the \gpaw\cite{gpaw} code. \end{abstract} %\vspace*{15mm} \tableofcontents \clearpage \section{Formalism}\label{sec: paw} By the requirement of orthogonality, DFT wave functions have very sharp features close to the nuclei, as all the states are non-zero in this region. Further out only the valence states are non-zero, resulting in much smoother wave functions in this interstitial region. The oscillatory behavior in the core regions, requires a very large set of plane waves, or equivalently a very fine grid, to be described correctly. One way of solving this problem is the use of pseudopotentials in which the collective system of nuclei and core electrons are described by an effective, much smoother, potential. The KS equations are then solved for the valence electrons only. The pseudopotentials are constructed such that the correct scattering potential is obtained beyond a certain radius from the core. This method reduces the number of wave functions to be calculated, since the pseudo potentials only have to be calculated and tabulated once for each type of atom, so that only calculations on the valence states are needed. It justifies the neglect of relativistic effects in the KS equations, since the valence electrons are non-relativistic (the pseudopotentials describing core states are of course constructed with full consideration of relativistic effects). The technique also removes the unwanted singular behavior of the ionic potential at the lattice points. \par The drawback of the method is that all information on the full wave function close to the nuclei is lost. This can influence the calculation of certain properties, such as hyperfine parameters, and electric field gradients. Another problem is that one has no before hand knowledge of when the approximation yields reliable results. \par A different approach is the augmented-plane-wave method (APW), in which space is divided into atom-centered augmentation spheres inside which the wave functions are taken as some atom-like partial waves, and a bonding region outside the spheres, where some envelope functions are defined. The partial waves and envelope functions are then matched at the boundaries of the spheres. \par A more general approach is the projector augmented wave method (PAW) presented here, which offers APW as a special case\cite{Blochl2003}, and the pseudopotential method as a well defined approximation\cite{Kresse1999}. The PAW method was first proposed by Bl{\"o}chl in 1994\cite{Blochl1994}. \subsection{The Transformation Operator}\label{sec: transformation operator} The features of the wave functions, are very different in different regions of space. In the bonding region it is smooth, but near the nuclei it displays rapid oscillations, which are very demanding on the numerical representation of the wave functions. To address this problem, we seek a linear transformation $\T$ which takes us from an auxiliary smooth wave function $\ket{\s{\psi}_n}$ to the true all electron Kohn-Sham single particle wave function $\ket{\psi_n}$ % \begin{equation} \ket{\psi_n}=\T\ket{\s{\psi}_n}, \end{equation} % where $n$ is the quantum state label, containing a $\f{k}$ index, a band index, and a spin index. \par This transformation yields the transformed KS equations % \begin{equation}\label{eq: paw ks equations} \T^\dagger \Ham \T \ket{\s{\psi}_n}=\epsilon_n\T^\dagger\T\ket{\s{\psi}_n}, \end{equation} % which needs to be solved instead of the usual KS equation. Now we need to define $\T$ in a suitable way, such that the auxiliary wave functions obtained from solving \ref{eq: paw ks equations} becomes smooth. \par Since the true wave functions are already smooth at a certain minimum distance from the core, $\T$ should only modify the wave function close to the nuclei. We thus define % \begin{equation} \T = 1 + \sum_a \T^a, \end{equation} % where $a$ is an atom index, and the atom-centered transformation, $\T^a$, has no effect outside a certain atom-specific augmentation region $|\f{r}-\f{R}^a|r_c^a \end{equation*} % where $\phi_i^a(\f{r})=\braket{\f{r}}{\phi_i^a}$ and likewise for $\s{\phi}_i^a$. \par If the smooth partial waves form a complete set inside the augmentation sphere, we can formally expand the smooth all electron wave functions as % \begin{equation}\label{eq: smooth psi expansion} \ket{\s{\psi}_n} = \sum_i P_{ni}^a \ket{\s{\phi}_i^a} \text{, for } |\f{r}-\f{R}^a|r_c^a$. \par Note that the completeness relation \ref{eq: phi p completeness} is equivalent to the requirement that $\s{p}_i^a$ should produce the correct expansion coefficients of \ref{eq: smooth psi expansion}-\ref{eq: psi expansion}, while \ref{eq: phi p orthogonal} is merely an implication of this restriction. Translating \ref{eq: phi p completeness} to an explicit restriction on the projector functions is a rather involved procedure, but according to Bl{\"o}chl, \cite{Blochl1994}, the most general form of the projector functions is: % \begin{equation}\label{eq: projector general} \bra{\s{p}_i^a} = \sum_j (\{\braket{f_k^a}{\s{\phi}_l^a}\})^{-1}_{ij}\bra{f_j^a}, \end{equation} % where $\ket{f_j^a}$ is any set of linearly independent functions. The projector functions will be localized if the functions $\ket{f_j^a}$ are localized. \par Using the completeness relation \ref{eq: phi p completeness}, we see that % \begin{equation*} \T^a =\sum_i \T^a\ket{\s{\phi}_i^a}\bra{\s{p}_i^a} = \sum_i \big(\ket{\phi_i^a} - \ket{\s{\phi}_i^a}\big) \bra{\s{p}_i^a}, \end{equation*} % where the first equality is true in all of space, since \ref{eq: phi p completeness} holds inside the augmentation spheres and outside $\T^a$ is zero, so anything can be multiplied with it. The second equality is due to \ref{eq: ta} (remember that $\ket{\phi_i^a} - \ket{\s{\phi}_i^a}=0$ outside the augmentation sphere). Thus we conclude that % \begin{equation}\label{eq: T operator} \T =1+ \sum_a\sum_i \big(\ket{\phi_i^a} - \ket{\s{\phi}_i^a}\big) \bra{\s{p}_i^a}. \end{equation} % \par To summarize, we obtain the all electron KS wave function $\psi_n(\f{r})=\braket{\f{r}}{\psi_n}$ from the transformation % \begin{equation}\label{eq: psi transform in r} \psi_n(\f{r}) = \s{\psi}_n(\f{r})+\sum_a\sum_i \big( \phi_i^a(\f{r}) - \s{\phi}_i^a(\f{r}) \big)\braket{\s{p}_i^a}{\s{\psi}_n}, \end{equation} % where the smooth (and thereby numerically convenient) auxiliary wave function $\s{\psi}_n(\f{r})$ is obtained by solving the eigenvalue equation \ref{eq: paw ks equations}. % Note that although the double sum has been indicated by a single % summation symbol, the order of summation must be preserved, since % the number of states $i$ depends on the nature of the current atom % (index $a$). \par The transformation \ref{eq: psi transform in r} is expressed in terms of the three components: a) the partial waves $\phi_i^a(\f{r})$, b) the smooth partial waves $\s{\phi}_i^a(\f{r})$, and c) the smooth projector functions $\s{p}_i^a(\f{r})$. \par The restriction on the choice of these sets of functions are: a) Since the partial- and smooth partial wave functions are used to expand the all electron wave functions, i.e. are used as atom-specific basis sets, these must be complete (inside the augmentation spheres). b) the smooth projector functions must satisfy \ref{eq: phi p completeness}, i.e. be constructed according to \ref{eq: projector general}. All remaining degrees of freedom are used to make the expansions converge as fast as possible, and to make the functions termed `smooth', as smooth as possible. For a specific choice of these sets of functions, see section \ref{sec: partial wave basis}. As the partial- and smooth partial waves are merely used as basis sets they can be chosen as real functions (any imaginary parts of the functions they expand, are then introduced through the complex expansion coefficients $P_{ni}^a$). In the remainder of this document $\phi$ and $\s{\phi}$ will be assumed real. \par Note that the sets of functions needed to define the transformation are system independent, and as such they can conveniently be pre-calculated and tabulated for each element of the periodic table. \par For future convenience, we also define the one center expansions \begin{subequations} \begin{align} \psi_{n}^a(\f{r}) &= \sum_i \phi_i^a(\f{r})\braket{\s{p}_i^a}{\s{\psi}_n},\\ \s{\psi}_{n}^a(\f{r}) &= \sum_i \s{\phi}_i^a(\f{r})\braket{\s{p}_i^a}{\s{\psi}_n}. \end{align} \end{subequations} % In terms of these, the all electron KS wave function is % \begin{equation} \psi_n(\f{r})=\s{\psi}_n(\f{r})+\sum_{a} \big( \psi_{n}^{a}(\f{r}-\f{R}^a) - \s{\psi}_{n}^a(\f{r}-\f{R}^a) \big). \end{equation} \par So what have we achieved by this transformation? The trouble of the original KS wave functions, was that they displayed rapid oscillations in some parts of space, and smooth behavior in other parts of space. By the decomposition \ref{eq: psi transform in r} we have separated the original wave functions into auxiliary wave functions which are smooth everywhere and a contribution which contains rapid oscillations, but only contributes in certain, small, areas of space. This decomposition is shown on the front page for the hydrogen molecule. Having separated the different types of waves, these can be treated individually. The localized atom centered parts, are indicated by a superscript $a$, and can efficiently be represented on atom centered radial grids. Smooth functions are indicated by a tilde \~{}. The delocalized parts (no superscript $a$) are all smooth, and can thus be represented on coarse Fourier- or real space grids. \subsection{The Frozen Core Approximation}\label{sec: frozen core} In the frozen core approximation, it is assumed that the core states are naturally localized within the augmentation spheres, and that the core states of the isolated atoms are not changed by the formation of molecules or solids. Thus the core KS states are identical to the atomic core states % \begin{figure} \begin{center} \includegraphics[scale=0.6]{Pt.png}% \caption{The core states of Platinum.}\label{fig: frozen core}% \end{center} \end{figure} % \begin{equation*} \ket{\psi^c_n} = \ket{\phi_\alpha^{a,\text{core}}}, \end{equation*} % where the index $n$ on the left hand site refers to both a specific atom, $a$, and an atomic state, $\alpha$. \par In this approximation only valence states are included in the expansions of $\ket{\psi_n}$, \ref{eq: psi expansion}, and $\ket{\s{\psi}_n}$, \ref{eq: smooth psi expansion}. \par Figure \ref{fig: frozen core}, shows the atomic states of Platinum in its ground state, obtained with an atomic DFT program at an LDA level, using spherical averaging, i.e. a spin-compensated calculation, assuming the degenerate occupation 9/10 of all 5d states, and both of the 6s states half filled. It is seen that at the typical length of atomic interaction (the indicated cut-off $r_c=2.5$ Bohr is approximately half the inter-atomic distance in bulk Pt), only the 5d and 6s states are non-zero. Note that the frozen core approximation is not a prerequisite for PAW. See e.g. \cite{Marsman2006} for a relaxation of this requirement. \subsection{Expectation Values}\label{sec: expectation values} The expectation value of an operator $\wh{O}$ is, within the frozen core approximation, given by % \begin{equation} \langle \wh{O} \rangle = \sum_n^\text{val} f_n \bra{\psi_n}\wh{O}\ket{\psi_n} + \sum_a \sum_\alpha^\text{core} \bra{\phi_\alpha^{a,\text{core}}}\wh{O}\ket{\phi_\alpha^{a,\text{core}}}. \end{equation} % Using that $\bra{\psi_n}\wh{O}\ket{\psi_n} = \bra{\s{\psi}_n}\T^\dagger \wh{O}\T\ket{\s{\psi}_n}$, and skipping the state index for notational convenience, we see that % \begin{equation} \begin{split} \bra{\psi}\wh{O}\ket{\psi} &= \bra{\s{\psi} + \sum_a(\psi^a - \s{\psi}^a)} \wh{O} \ket {\s{\psi} + \sum_a(\psi^a - \s{\psi}^a)}\\ &= \bra{\s{\psi}}\wh{O}\ket{\s{\psi}} + \sum_{aa'} \bra{\psi^a - \s{\psi}^a}\wh{O}\ket{\psi^{a'} - \s{\psi}^{a'}} + \sum_a \left(\bra{\s{\psi}}\wh{O}\ket{\psi^a - \s{\psi}^a} + \bra{\psi^a - \s{\psi}^a}\wh{O}\ket{\s{\psi}}\right)\\ &= \bra{\s{\psi}}\wh{O}\ket{\s{\psi}} + \sum_{a}\left( \bra{\psi^a}\wh{O}\ket{\psi^a} - \bra{\s{\psi}^a}\wh{O}\ket{\s{\psi}^a} \right)\\ & \hspace{48pt}+ \sum_a \left( \bra{\psi^a - \s{\psi}^a}\wh{O}\ket{\s{\psi} - \s{\psi}^a} + \bra{\s{\psi} - \s{\psi}^a}\wh{O}\ket{\psi^a - \s{\psi}^a} \right)\\ & \hspace{48pt}+ \sum_{a\neq a'} \bra{\psi^a - \s{\psi}^a}\wh{O}\ket{\psi^{a'} - \s{\psi}^{a'}}. \end{split} \end{equation} % For local operators\footnote{Local operator $\wh{O}$: An operator which does not correlate separate parts of space, i.e. $\bra{\br}\wh{O}\ket{\br'} = 0$ if $\br\neq \br'$.} the last two lines does not contribute. The first line, because $\ket{\psi^a - \s{\psi}^a}$ is only non-zero inside the spheres, while $\ket{\s{\psi} - \s{\psi}^a}$ is only non-zero outside the spheres. The second line simply because $\ket{\psi^a - \s{\psi}^a}$ is zero outside the spheres, so two such states centered on different nuclei have no overlap (provided that the augmentation spheres do not overlap). \par Reintroducing the partial waves in the one-center expansions, we see that % \begin{equation} \sum_n^\text{val} f_n \bra{\psi_{n}^{a}}\wh{O}\ket{\psi_{n}^{a}} = \sum_n^\text{val} f_n \sum_{i_1i_2} \bra{\phi_{i_1}^{a}P^a_{ni_1}} \wh{O} \ket{\phi_{i_2}^{a} P_{ni_2}^{a}} = \sum_{i_1i_2}\bra{\phi_{i_1}^{a}}\wh{O}\ket{\phi_{i_2}^a} \sum_n^\text{val} f_n P_{ni_1}^{a*}P_{ni_2}^a, \end{equation} % and likewise for the smooth waves.%: $\sum_n f_n \bra{\s{\psi}_{n}^{a}}\wh{O}\ket{\s{\psi}_{n}^{a}} = \sum_{i_1i_2}^\text{val} \bra{\s{\phi}_{i_1}^{a}}\wh{O}\ket{\s{\phi}_{i_2}^a} \sum_n^\text{val}f_n P_{i_1}^{a*}P_{i_2}^a$. \par Introducing the Hermitian one-center density matrix % \begin{equation}\label{eq: density matrix} D_{i_1i_2}^a =\sum_n f_n P_{ni_1}^{a*} P_{ni_2}^{a} = \sum_n f_n \braket{\s{\psi}_n}{\s{p}_{i_1}^a} \braket{\s{p}_{i_2}^a}{\s{\psi}_n}. \end{equation} % We conclude that for any local operator $\wh{O}$, the expectation value is % \begin{equation}\label{eq: local exp values} \langle \wh{O} \rangle = \sum_n^\text{val} f_n \bra{\s{\psi}_n}\wh{O}\ket{\s{\psi}_n} + \sum_a \sum_{i_1i_2} \left( \bra{\phi_{i_1}^{a}}\wh{O}\ket{\phi_{i_2}^a} - \bra{\s{\phi}_{i_1}^{a}}\wh{O}\ket{\s{\phi}_{i_2}^a} \right)D_{i_1i_2}^a + \sum_a \sum_\alpha^\text{core} \bra{\phi_\alpha^{a,\text{core}}}\wh{O}\ket{\phi_\alpha^{a,\text{core}}}. \end{equation} % \subsection{Densities}\label{sec: densities} The electron density is obviously a very important quantity in DFT, as all observables in principle are calculated as functionals of the density. In reality the kinetic energy is calculated as a functional of the orbitals, and some specific exchange-correlation functionals also rely on KS-orbitals rather then the density for their evaluation, but these are still \emph{implicit} functionals of the density. \par To obtain the electron density we need to determine the expectation value of the real-space projection operator $\ket{\f{r}}\bra{\f{r}}$ % \begin{equation} n(\f{r}) = \sum_n f_n \braket{\psi_n}{\f{r}}\braket{\f{r}}{\psi_n} = \sum_n f_n|\psi_n(\f{r})|^2, \end{equation} % where $f_n$ are the occupation numbers. \par As the real-space projection operator is obviously a local operator, we can use the results \ref{eq: local exp values} of the previous section, and immediately arrive at % \begin{equation}\label{eq: electron density} n(\f{r}) = \sum_n^\text{val} f_n |\s{\psi}_n|^2 + \sum_a \sum_{i_1i_2} \left(\phi_{i_1}^{a} \phi_{i_2}^{a} - \s{\phi}_{i_1}^{a} \s{\phi}_{i_2}^{a} \right) D_{i_1i_2}^a + \sum_a \sum_\alpha^\text{core} |\phi_\alpha^{a,\text{core}}|^2. \end{equation} % \par To ensure that \ref{eq: electron density} reproduce the correct density even though some of the core states are not strictly localized within the augmentation spheres, a smooth core density, $\s{n}_c(\f{r})$, is usually constructed, which is identical to the core density outside the augmentation sphere, and a smooth continuation inside. Thus the density is typically evaluated as % \begin{equation}\label{eq: PAW density} n(\f{r}) = \s{n}(\f{r}) + \sum_a \left( n^a(\f{r}) - \s{n}^a(\f{r}) \right), \end{equation} % where % \begin{subequations}\label{eq: density contributions} \begin{align} \s{n}(\f{r}) &= \sum_n^\text{val} f_n |\s{\psi}_n(\f{r})|^2 + \s{n}_c(\f{r})\label{eq: smooth n}\\ n^a(\f{r}) &= \sum_{i_1i_2} D_{i_1i_2}^a \phi_{i_1}^a(\f{r})\phi_{i_2}^{a}(\f{r}) + n_c^a(\f{r})\label{eq: partial n}\\ \s{n}^a(\f{r}) &= \sum_{i_1i_2} D_{i_1i_2}^a \s{\phi}_{i_1}^a(\f{r})\s{\phi}_{i_2}^{a}(\f{r}) + \s{n}_c^a(\f{r})\label{eq: smooth partial n} \end{align} \end{subequations} % \subsection{Total Energies}\label{sec: total energies} The total energy of the electronic system is given by: % \begin{equation} E[n] = T_s[n]+U_H[n]+V_{ext}[n]+E_{xc}[n]. \end{equation} % In this section, the usual energy expression above, is sought re-expressed in terms of the PAW quantities: the smooth waves and the auxiliary partial waves. \par For the local and semi-local functionals, we can utilize \ref{eq: local exp values}, while the nonlocal parts needs more careful consideration. \subsubsection{The Semi-local Contributions} \par The kinetic energy functional $T_s = \sum_n f_n \bra{\psi_n} -\frac{1}{2}\nabla^2\ket{\psi_n}$ is obviously a (semi-) local functional, so we can apply \ref{eq: local exp values} and immediately arrive at: % \begin{equation} \begin{split} T_s[\{\psi_n\}] &= \sum_n f_n \bra{\psi_n} -\tfrac{1}{2}\nabla^2\ket{\psi_n}\\ &= \sum_n^\text{val} f_n \bra{\s{\psi}_n} - \tfrac{1}{2} \nabla^2\ket{\s{\psi}_n} + \sum_a \left(T_c^a + \Delta T_{i_1i_2}^a D^a_{i_1i_2} \right), \end{split} \end{equation} % where % \begin{equation} T_c^a = \sum_\alpha^\text{core} \bra{\phi_\alpha^{a,\text{core}}} - \tfrac{1}{2}\nabla^2 \ket{\phi_\alpha^{a,\text{core}}} \quad \text{and} \quad \Delta T_{i_1i_2}^a = \bra{\phi_{i_1}^{a}} - \tfrac{1}{2}\nabla^2\ket{\phi_{i_2}^a} - \bra{\s{\phi}_{i_1}^{a}} - \tfrac{1}{2} \nabla^2 \ket{\s{\phi}_{i_2}^a}. \end{equation} % For LDA and GGA type exchange-correlation functionals, $E_{xc}$ is likewise, per definition, a semi-local functional, such that it can be expressed as % \begin{equation} E_{xc}[n] = E_{xc}[\s{n}] + \sum_a \left( E_{xc}[n^a] - E_{xc}[\s{n}^a] \right). \end{equation} % By virtue of \ref{eq: partial n}-\ref{eq: smooth partial n} we can consider the atomic corrections as functionals of the density matrix defined in \ref{eq: density matrix}, i.e. % \begin{equation} E_{xc}[n] = E_{xc}[\s{n}] + \sum_a \Delta E_{xc}^a[\{D^a_{i_1i_2}\}], \end{equation} % where % \begin{equation} \Delta E_{xc}^a[\{D^a_{i_1i_2}\}] = E_{xc}[n^a] - E_{xc}[\s{n}^a]. \end{equation} % \subsubsection{The Nonlocal Contributions} The Hartree term is both nonlinear and nonlocal, so more care needs to be taken when introducing the PAW transformation for this expression. \par In the following we will assume that there is no `true' external field, such that $V_\text{ext}[n]$ is only due to the static nuclei, i.e. it is a sum of the classical interaction of the electron density with the static ionic potential, and the electrostatic energy of the nuclei. \par We define the total classical electrostatic energy functional as % \begin{equation}\label{eq: coulomb energy} \begin{split} E_C[n] &= U_H[n] + V_\text{ext}[n] = \frac{1}{2} ((n)) + (n|\textstyle\sum_a Z^a) + \frac{1}{2} \sum_{a\neq a'} (Z^a | Z^{a'}),%\\ % &= \frac{1}{2}((n+\textstyle\sum_a Z^a)) - \frac{1}{2} \sum_a ((Z^a)) \end{split} \end{equation} % where the notation (f|g) indicates the Coulomb integral % \begin{equation} (f|g) = \iint d\br d\br' \frac{f^*(\br) g(\br') }{|\br-\br'|} \end{equation} % and I have introduced the short hand notation $((f)) = (f|f)$. In \ref{eq: coulomb energy}, $Z^a(\br)$ is the charge density of the nucleus at atomic site $a$, which in the classical point approximation is given by % \begin{equation} Z^a(\br) = -\Z^a\delta(\br-\bR^a) \end{equation} % with $\Z^a$ being the atomic number of the nuclei. As the Hartree energy of a density with non-zero total charge is numerically inconvenient, we introduce the charge neutral total density % \begin{equation} \rho(\br) = n(\br) + \sum_a Z^a(\br) \quad (= n_\text{electrons} + n_\text{nuclei}). \end{equation} % In terms of this, the coulombic energy of the system can be expressed by % \begin{equation}\label{eq: coulomb energy reduced} E_C[n] = U_H'[\rho] = \frac{1}{2}((n+{\textstyle\sum_a Z^a}))' \end{equation} % where the prime indicates that one should remember the self-interaction error of the nuclei introduced in the Hartree energy of the total density. This correction is obviously ill defined, and different schemes exist for making this correction. As it turns out, this correction is handled very naturally in the PAW formalism. \par For now, we will focus on the term $((\rho)) = ((n+\textstyle\sum_a Z^a))$. If one where to directly include the expansion of $n(\br)$ according to \ref{eq: PAW density}, one would get: % \begin{align*} ((n+\textstyle\sum_a Z^a)) &= ((\s{n}+\textstyle\sum_a n^a - \s{n}^a + Z^a)) \\&= ((\s{n})) + \sum_{aa'}(n^a - \s{n}^a + Z^a|n^{a'} - \s{n}^{a'} + Z^{a'}) + 2\sum_a(\s{n}|n^a - \s{n}^a + Z^a), \end{align*} % where in the last expression, the first term is the Hartree energy of the smooth electron density, which is numerically problematic because of the nonzero total charge. The second term contains a double summation over all nuclei, which would scale badly with system size, and the last term involves integrations of densities represented on incompatible grids (remember that the one-center densities are represented on radial grids to capture the oscillatory behavior near the nuclei)\footnote{One could separate the terms in other ways, but it is impossible to separate the smooth and the localized terms completely.}. This is clearly not a feasible procedure. To correct these problem we add and subtract some atom centered compensation charges $\s{Z}^a$: % \begin{multline*} ((n+\textstyle\sum_a \s{Z}^a + \textstyle\sum_a \left[Z^a - \s{Z}^a\right])) = ((\s{n} + \textstyle\sum_a \s{Z}^a)) + \sum_{aa'}(n^a - \s{n}^a + Z^a - \s{Z}^a|n^{a'} - \s{n}^{a'} + Z^{a'}- \s{Z}^a) \\+ 2\sum_a(\s{n}+\textstyle\sum_{a'}\s{Z}^{a'}|n^a - \s{n}^a + Z^a - \s{Z}^a). \end{multline*} % If we define $\s{Z}^a(\br)$ in such a way that $n^a(\br) - \s{n}^a(\br) + Z^a(\br) - \s{Z}^a(\br)$ has no multipole moments, i.e. % \begin{equation}\label{eq: no multipole} \int d\br r^l Y_L(\wh{\br-\bR^a}) (n^a - \s{n}^a + Z^a - \s{Z}^a) = 0 \end{equation} % for all $a$, the potentials of these densities are zero outside their respective augmentation spheres ($L=(l,m)$ is a collective angular- and magnetic quantum number). Exploiting this feature, the Coulomb integral reduce to % \begin{align*} ((n+\textstyle\sum_a Z^a)) %&= ((n+\textstyle\sum_a \s{Z}^a + \textstyle\sum_a Z^a - \s{Z}^a))\\ &= ((\s{n} + \textstyle\sum_a \s{Z}^a)) + \sum_{a}((n^a - \s{n}^a + Z^a - \s{Z}^a)) + 2\sum_a(\s{n}^a+\s{Z}^a|n^a - \s{n}^a + Z^a - \s{Z}^a)\\ % &= ((\s{n} + \textstyle\sum_a \s{Z}^a)) + \sum_{a}(n^a + \s{n}^a + Z^a + \s{Z}^a|n^a - \s{n}^a + Z^a - \s{Z}^a)\\ &= ((\s{n} + \textstyle\sum_a \s{Z}^a)) + \sum_{a}\left( ((n^a + Z^a)) - ((\s{n}^a + \s{Z}^a)) \right) \end{align*} % where it has been used that inside the augmentation spheres $\s{n} = \s{n}^a$. In this expression, we have circumvented all of the previous problems. None of the terms correlates functions on different grids, there is only a single summation over the atomic sites, and furthermore the only thing that has to be evaluated in the full space is the Hartree energy of $\s{n}(\br) + \sum_a \s{Z}^a(\br)$ which is charge neutral (see eq. \ref{eq: rhot charge neutral}). \par Inserting the final expression in \ref{eq: coulomb energy}, we see that % \begin{equation} \begin{split} E_C[n] &= \frac{1}{2}((\s{n} + {\textstyle\sum_a} \s{Z}^a)) + \frac{1}{2}\sum_a \left(((n^a + Z^a))' - ((\s{n}^a + \s{Z}^a))\right)\\ &=U_H[\s{\rho}] + \frac{1}{2}\sum_a \left( ((n^a)) + 2(n^a|Z^a) - ((\s{n}^a + \s{Z}^a))\right) \end{split} \end{equation} % where we have introduced the smooth total density % \begin{equation} \s{\rho}(\br) = \s{n} + \sum_a \s{Z}^a(\br). \end{equation} % Note that the problem with the self interaction error of the nuclei could easily be resolved once it was moved to the atom centered part, as handling charged densities is not a problem on radial grids. \par To obtain an explicit expression for the compensation charges, we make a multipole expansion of $\s{Z}^a(\br)$ % \begin{equation}\label{eq: compensation expansion} \s{Z}^a = \sum_L Q_L^a ~\s{g}_L^a(\br), \end{equation} % where $\s{g}_L^a(\br)$ is any smooth function localized within $|\br-\bR^a|^{l+1}} Y_L^*(\h{\br})Y_L(\h{\br}') \end{equation} % with $r_< = \min(r, r')$ and $r_> = \max(r, r')$. Using this it is seen that for any density with a known angular dependence, e.g. the density $R(r) Y_L(\h{\br})$, the potential can be determined by % \begin{equation}\label{eq: radial potential} \begin{split} v[R(r) Y_L(\h{\br})](\br) &= \int d\br' \frac{R(r') Y_L(\h{\br}')}{|\br - \br'|}\\ &= \frac{4\pi}{2l+1} Y_L(\h{\br}) \int_0^\infty r'^2dr' R(r') \frac{r_<^l}{r_>^{l+1}}\\ &= \frac{4\pi}{2l+1} Y_L(\h{\br}) \left[\int_0^r dr' R(r')r'\Big(\frac{r'}{r}\Big)^{l+1} + \int_r^\infty dr' R(r')r'\Big(\frac{r}{r'}\Big)^l \right] \end{split} \end{equation} % if the angular dependence is not a spherical harmonic, one can always do a multipole expansion, and use the above expression on the individual terms. \par In the case of a radial density $n(\br) = n(r)$, the Hartree potential becomes % \begin{equation} u_H(r) = \frac{4\pi}{r}\int_0^r dr' n(r')r'^2 + 4\pi\int_r^\infty dr' n(r')r' \end{equation} % A purely radial dependent density also implies that the xc-potential is a radial function. Using this, the entire KS equation can be reduced to a 1D problem in $r$, while the angular part is treated analytically. \subsubsection{The Radial Kohn-Sham Equation} For a spherical KS potential, and using that $Y_L$ are eigenstates of the Laplacian, the KS equation can be reduced to the simpler one-dimensional second order eigenvalue problem % \begin{equation}\label{eq: radial KS equation} \left[ -\frac{1}{2}\frac{d^2}{dr^2} - \frac{1}{r}\frac{d}{dr} + \frac{l(l+1)}{2r^2} + v_s(r)\right]R_j(r) = \epsilon_j R_j(r) \end{equation} % If we introduce the radial wave function $u_j(r)$ defined by % \begin{equation} r R_j(r) = u_j(r) \end{equation} % the KS equation can be written as % \begin{equation} u_j''(r) + \left(2\epsilon_j - 2v_s(r)- \frac{l(l+1)}{r^2}\right) u_j(r) = 0 \end{equation} % which is easily integrated using standard techniques. See e.g. \cite[chapter 6]{Fiolhais2003}. \subsection{The Atomic Data Set of PAW}\label{sec: partial wave basis} The very large degree of freedom when choosing the functions defining the PAW transformation means that the choice varies a great deal between different implementations. In any actual implementation expansions are obviously finite, and many numerical considerations must be made when choosing these basis sets, to ensure fast and reliable convergence. This section provides an overview of the information needed for uniquely defining the PAW transformation, and the level of freedom when choosing the parameters. \subsubsection*{The Partial Waves} The basis functions, $\phi_i^a$, for the expansion of $\ket{\psi_n}$ should be chosen to ensure a fast convergence to the KS wave function. For this reason we choose the partial waves as the eigenstates of the Schr{\"o}dinger equation for the isolated spin-saturated atoms. Thus the index $i$ is a combination of main-, angular-, and magnetic quantum number, $(n,l,m)$. And the explicit form is % \begin{equation*} \phi_i^a(\f{r})=\phi_{nl}^a(r)Y_{lm}(\hat{\f{r}}) \end{equation*} % where $\phi_{nl}^a(r)$ are the solutions of the radial KS Schr{\"o}dinger equation \ref{eq: radial KS equation}, and $Y_{lm}$ are the spherical harmonics. For convenience we choose $\phi_i^a(\f{r})$ to be real, i.e. we use the real spherical harmonics instead of the complex valued. This choice of partial waves implies that the smooth partial waves and the smooth projector functions can also be chosen real, and as products of some radial functions and the same real spherical harmonic. \par Note that including unbound states of the radial KS equation in the partial waves is not a problem, as the diverging tail is exactly canceled by the smooth partial waves. In practice we only integrate the KS equation outward from the origin to the cutoff radius for unbound states, thus making the energies free parameters. In principle the same could be done for the bound states, but in \gpaw, the energies of bound states are fixed by making the inward integration for these states and doing the usual matching (see e.g. \cite[chapter 6]{Fiolhais2003}), i.e. the energies are chosen as the eigen energies of the system. \subsubsection*{The Smooth Partial Waves} \par The smooth partial waves $\s{\psi}_i^a(\br)$ are per construction identical to the partial waves outside the augmentation sphere. Inside the spheres, we can choose them as any smooth continuation. Presently \gpaw{} uses simple 6'th order polynomials of even powers only (as odd powers in $r$ results in a kink in the functions at the origin, i.e. that the first derivatives are not defined at this point), where the coefficients are used to match the partial waves smoothly at $r=r_c$. Other codes uses Bessel functions\cite{Kresse1999} or Gaussians. \subsubsection*{The Smooth Projector Functions} \par The smooth projector functions are a bit more tricky. Making them orthonormal to $\s{\phi}_i^a(\f{r})$ is a simple task of applying an orthonormalization procedure. This is the only formal requirement, but in any actual implementation all expansions are necessarily finite, and we therefore want them to converge as fast as possible, so only a few terms needs to be evaluated. \par A popular choice is to determine the smooth projector functions according to % \begin{equation}\label{eq: construct projector} \ket{\s{p}_i^a} = \left( -\tfrac{1}{2} \nabla^2 + \s{v}_s - \epsilon_i\right) \ket{\s{\phi}^a_i} \end{equation} % or equivalently % \begin{equation} \s{p}_{j}^a(r) = \left[-\frac{1}{2}\frac{d^2}{dr^2} - \frac{1}{r}\frac{d}{dr} + \frac{l(l+1)}{2r^2} + \s{v}_s(r) - \epsilon_j \right] \s{\phi}^a_j(r) \end{equation} % where $\s{v}_s(r)$ is the smooth KS potential $u_H[\s{\rho}](r) + v_{xc}[\s{n}](r)$. And then enforce the complementary orthogonality condition $\braket{\s{p}_{j}^a}{\s{\phi}^a_{j'}} = \delta_{j,j'}$ inside the augmentation sphere, e.g. by a standard Gram-Schmidt procedure. Using this procedure ensures that the reference atom is described correctly despite the finite number of projectors. \subsubsection*{The Smooth Compensation Charge Expansion Functions}\label{sec: choosing comp charge} The smooth compensation charges $\s{g}_L^a(\br)$, are products of spherical harmonics, and radial functions $\s{g}_l^a(r)$ satisfying that % \begin{equation} \int d\f{r} r^l Y_L(\hat{\f{r}})\s{g}_{L'}^a(\f{r}) = \delta_{LL'} \end{equation} % In \gpaw{} the radial functions are chosen as generalized Gaussian according to (here shown for $\bR^a=0$): % \begin{equation}\label{eq: generalized gaussians} \s{g}_L^a(\f{r}) = \s{g}_l^a(r) Y_L(\hat{\f{r}})~~,\quad \s{g}_l^a(r) = \frac{1}{\sqrt{4\pi}}\frac{l!}{(2l+1)!}(4\alpha^a)^{l+3/2}r^le^{-\alpha^ar^2} \end{equation} % where the atom-dependent decay factor $\alpha$ is chosen such that the charges are localized within the augmentation sphere. %\par With this choice of compensation charges, the tensors %$N_{L_1L_2}^a$ and the potential part of the $M_{i_1i_2L}^a$ tensors %can be evaluated analytically, see \cite{Obara:1986}. \subsubsection*{The Core- and Smooth Core Densities } The core density follows directly from the all electron partial waves by % \begin{equation}\label{eq: core density} n_c(r) = \sum_i^\text{core} |\phi_i(\br)|^2 = \sum_j^\text{core} 2(2l+1) |\phi_j(r)|^2 / 4\pi \end{equation} % \par The smooth core densities $\s{n}_c^a(\br)$ are like the smooth partial waves expanded in a few (two or three) Bessel functions, Gaussians, polynomials or otherwise, fitted such that it matches the true core density smoothly at the cut-off radius. \subsubsection*{The Localized Potential} An additional freedom in PAW is that for any operator $\wh{L}$, localized within the augmentation spheres, we can exploit the identity \ref{eq: phi p completeness} % \begin{equation} \sum_i \ket{\s{\phi}_i^a}\bra{\s{p}_i^a} = 1 %\qquad\text{, for }|\f{r}-\f{R}^a|r_c^a$) we get the identity % \begin{equation*} 0 = \int d\br \s{n}(\br) \sum_a \bar{v}^a(\br) - \sum_a \int d\br \s{n}^a \bar{v}^a(\br) \end{equation*} % This expression can be used as an `intelligent zero'. Using this, we can make the replacement of the smooth potential % \begin{equation} \s{v}_\text{eff}(\br) = u_H[\s{\rho}](\br) + v_{xc}[\s{n}](\br) \to \s{v}_\text{eff}(\br) = u_H[\s{\rho}](\br) + v_{xc}[\s{n}](\br) + \bar{v}(\br) \end{equation} % if we at the same time add % \begin{equation} B^a + \sum_{i_1i_2} B^a_{i_1i_2} D^a_{i_1i_2} \end{equation} % to the energy corrections $\Delta E^a$, where % \begin{equation} B^a = -\int d\br \s{n}_c^a\bar{v}^a(\br) \quad\text{and}\quad \Delta B^a_{i_1i_2} = -\int d\br \s{\phi}^a_{i_1}\s{\phi}^a_{i_2}\bar{v}^a(\br) \end{equation} % This also implies that $B^a_{i_1i_2}$ should be added to $\Delta H^a_{i_1i_2}$. \par The advantage of doing this is that the Hartree potential and the xc-potential might not be optimally smooth close to the nuclei, but if we define the localized potential properly, the sum of the three potentials might still be smooth. Thus one can initially evaluate $u_H[\s{\rho}](\br)$ and $v_{xc}[\s{n}](\br)$ on an extra fine grid, add $\bar{v}(\br)$ and then restrict the total potential to the coarse grid again before solving the KS equation. \par The typical way of constructing the localized potentials $\bar{v}^a$ is by expanding it in some basis, and then choosing the coefficients such that the potential $u_H[\s{\rho}](\br) + v_{xc}[\s{n}](\br) + \bar{v}(\br)$ is optimally smooth at the core for the reference system. \par Inclusion of $\bar{v}^a(\br)$ changes the forces on each atom only through the redefinitions of $\s{v}_\text{eff}(\br)$ and $\Delta H^a_{i_1i_2}$. \subsubsection*{Summary} When constructing a data set for a specific atom, one must specify the following quantities, all defined within the augmentation spheres only: % \begin{enumerate} \item $\phi_i^a$ from radial KS equation \item $\s{\phi}_i^a$ by appropriate smooth continuation of $\phi_i^a$ \item $\s{p}_i^a$ from equation \ref{eq: construct projector} \item $\s{g}_L^a$ localized within $r^{l+1}} u^a_{j_1}(r)u^a_{j_c}(r) u^a_{j_c}(r')u^a_{j_2}(r'). \end{split} \end{equation} % Although the valence-core interaction is computationally trivial to include, it is not unimportant, giving rise to shifts in the valence eigenvalues of up to 1eV (though only a few kcal/mol in atomization energies), and we note that this contribution is unavailable in pseudopotential schemes. The core-core exchange is simply a reference energy, and will not affect self-consistency or energy differences. For the iterative minimization schemes used in real-space and plane wave codes, the explicit form of the non-local Fock operator $v^\text{NL}(\br, \br')$ is never needed, and would indeed be impossible to represent on any realistic grid. Instead only the action of the operator on a state is needed. As with the Hamiltonian operator, the action on the pseudo waves is derived via the relation $f_n \hat{v}^\text{NL} \ket{\tilde{\psi}_n} = \partial E_\text{xx} / \partial \bra{\tilde{\psi}_n}$. Referring to \cite{Paier2005} for a derivation, we merely state the result \begin{multline}\label{eq: nonlocal exchange} \h{v}^\text{NL} \ket{\s{\psi}_n} = \sum_m f_m \s{v}_{nm}(\br) \ket{\s{\psi}_m} \\ + \sum_a \sum_{i_1i_2} \ket{\s{p}_{i_1}^a} \left[ \sum_m v_{nm,i_1i_2}^a P^a_{mi_2} - X^a_{i_1i_2} P^a_{ni_2} - 2 \left( \sum_{i_3i_4}C^a_{i_1i_3i_2i_4}D^a_{i_3i_4} \right) P^a_{ni_2} \right] \end{multline} where $\tilde{v}_{nm}$ is the solution of $\nabla^2 \tilde{v}_{nm}(\br) = -4\pi \tilde{\rho}_{nm}(\br)$, and $v_{nm,i_1i_2}^a = \sum_L \Delta^a_{Li_1i_2}\int d\br\s{g}^a_L(\br) \tilde{v}_{nm}(\br)$. Again the computationally demanding first term is related to smooth pseudo quantities only, which can be accurately represented on coarse grids, making it possible to do basis set converged self-consistent EXX calculations at a relatively modest cost. Applying the Fock operator is however still expensive, as a Poisson equation must be solved for all pairs of orbitals. As a technical consideration, note that the effect of the atomic corrections due to valence-valence, valence-core, and core-core exchange interactions can simply be incorporated into the standard equations by redefining equations \ref{eq:coulom tensor 2}, \ref{eq:coulom tensor 1}, and \ref{eq:coulom tensor 0} respectively, which will also take care of the last two terms in the Fock operator above. The introduction of the pair orbital compensation charges does however lead to a non-trivial correction to the Fock operato; the term proportional to $v^a_{nm, i_1i_2}$. This term also leads to a distinct contribution when calculating the kinetic energy via the eigenvalues as done in equation \ref{eq:kinetic-energy}. The additional term (besides those related to redefining \ref{eq:coulom tensor 0}--\ref{eq:coulom tensor 2}) \begin{equation} \sum_{nm}f_n\left[f_{m} \delta_{\sigma_n,\sigma_{m}} \int d\br \tilde{v}_{nm}(\br) \psit_n^*(\br)\psit_{m}(\br) - \sum_a\sum_{i_1i_2} P_{ni_1}^aP_{mi_2}^a v^a_{nm,i_1i_2}\right], \end{equation} should be added to the right hand side of \ref{eq:kinetic-energy} on inclusion of exact exchange. In a similar fashion, the compensation charges leads to an additional force contribution in equation \ref{eq: force no exx} given by \begin{equation}\label{eq:paw-exx-force} \begin{split} \mathbf{F}^a_{xx} = \sum_{nm} f_{n}f_{nm}\delta_{\sigma_{n} \sigma_{m}}\Bigg\{ &\int d\br' \tilde{v}_{nm}(\br') \sum_{i_1i_2} P^{a*}_{ni_1}P^{a}_{mi_2}\sum_L\Delta_{Li_1i_2}\frac{\partial \tilde{g}^a_L(\br')}{\partial \bR^a}\\ & + \sum_{i_1i_2} v^a_{n_1n_2i_1i_2}\left( P^{a*}_{n i_1}\braket{\frac{d \pt^a_{i_2}}{d\bR^a}}{\psit_{m}} + \braket{\psit_{n}}{\frac{d \pt^a_{i_1}}{d\bR^a}} P^a_{m i_2} \right)\Bigg\}. \end{split} \end{equation} \subsubsection{Optimized Effective Potential} The optimized effective potential (OEP) method, is a way of converting the non-local Fock operator $\hat{v}^\text{NL}_x$ into a local form $\hat{v}^\text{L}_x = v^\text{L}_x(\br)$. One way to derive the OEP equations in standard KS-DFT, is to use perturbation theory along the adiabatic connection (G\"orling-Levy perturbation theory \cite{Gorling1994}). On converting the OEP equation to the PAW formalism, it should be remembered that local potentials in PAW transform to a local pseudo part plus non-local atomic corrections. Hence we want to arrive at a potential of the form \begin{equation}\label{eq: local exchange} \h{v}_x^\text{L} = \s{v}_x^\text{L}(\br) + \sum_a \sum_{i_1i_2} \ket{\s{p}_{i_1}^a}\Delta v_{i_1i_2}^a \bra{\s{p}_{i_2}^a}, \end{equation} where both the pseudo part $\s{v}_x^\text{L}$ as well as the coefficients $\Delta v_{i_1i_2}^a$ should be determined. The derivation is more or less straight forward, if one remembers the the PAW KS equation is a generalized eigenvalue problem, that the variational quantity is the pseudo orbitals, and that the first order shift in the density has both a pseudo and an atomic part. The result is \begin{subequations}\label{eq: paw oep2} \begin{align} \sum_n f_n \s{\psi}_n^*(\br) \sum_{m\neq n} \s{\psi}_m(\br) \frac{\bra{\s{\psi}_m} \h{v}_x^\text{NL} - \h{v}_x^\text{L} \ket{\s{\psi}_n}}{\epsilon_n - \epsilon_m} + c.c. &= 0\\ \sum_n f_n P_{ni_1}^{a*}\sum_{m\neq n}P_{mi_2}^{a} \frac{\bra{\s{\psi}_m} \h{v}_x^\text{NL} - \h{v}_x^\text{L} \ket{\s{\psi}_n}}{\epsilon_n - \epsilon_m} + c.c. &= 0 \end{align} \end{subequations} where $\hat{v}^\text{NL}_x$ is the non-local exchange operator of equation \ref{eq: nonlocal exchange} and $\hat{v}^\text{L}_x$ is the local version in \ref{eq: local exchange}. These can be solved iteratively starting from a local density-function approximation to the exchange potential in the spirit of \cite{Kummel2003}. It might seem that OEP is just extra work on top of the already expensive non-local operator, but it can in some cases be faster, as the number of SCF iterations in the KS cycle are greatly reduced. \clearpage \addcontentsline{toc}{section}{References} \bibliographystyle{unsrt} \bibliography{references} \end{document} gpaw-24.1.0/doc/documentation/paw_note/references.bib000066400000000000000000000245501454550013000225700ustar00rootroot00000000000000% This file was created with JabRef 2.5. % Encoding: ISO8859_1 @ARTICLE{Blochl1994, author = {P. E. Bl{\"o}chl}, title = {Projector augmented-wave method}, journal = {Physical Review B}, year = {1994}, volume = {50}, pages = {17953--17979}, number = {24}, month = {Dec}, doi = {10.1103/PhysRevB.50.17953}, numpages = {26}, publisher = {American Physical Society}, review = {The original PAW paper.} } @ARTICLE{Blochl2003, author = {P. E. Bl{\"o}chl and C. J. F{\"o}rst and J. Schimpl}, title = {Projector Augmented Wave Method: ab-initio molecular dynamics with full wave functions}, journal = {Bulletin of Materials Science}, year = {2003}, volume = {26}, pages = {33--41}, review = {Review of the PAW method by Bl{\"o}chl and coworkers. Easier to read than the original.} } @ARTICLE{Dudarev1998, author = {Dudarev, S. L. and Botton, G. A. and Savrasov, S. Y. and Humphreys, C. J. and Sutton, A. P.}, title = {Electron-energy-loss spectra and the structural stability of nickel oxide: An LSDA+U study}, journal = {Phys. Rev. B}, year = {1998}, volume = {57}, pages = {1505--1509}, number = {3}, month = {Jan}, doi = {10.1103/PhysRevB.57.1505}, numpages = {4}, publisher = {American Physical Society} } @ARTICLE{Ferretti2007, author = {A Ferretti and A Calzolari and B Bonferroni and R Di Felice}, title = {Maximally localized Wannier functions constructed from projector-augmented waves or ultrasoft pseudopotentials}, journal = {Journal of Physics: Condensed Matter}, year = {2007}, volume = {19}, pages = {036215 (16pp)}, number = {3}, abstract = {We report a theoretical scheme that enables the calculation of maximally localized Wannier functions within the formalism of projector-augmented waves (PAW), which also includes the ultrasoft pseudopotential (USPP) approach. We give a description of the basic underlying formalism and explicitly write out all the required matrix elements using the common ingredients of the PAW/USPP theory. We report an implementation of the method in a form suitable for accepting the input electronic structure from USPP plane-wave DFT simulations. We apply the method to the calculation of Wannier functions, dipole moments and spontaneous polarizations for a range of test cases. A comparison with norm-conserving pseudopotentials is reported as a benchmark.}, doi = {10.1088/0953-8984/19/3/036215}, url = {http://stacks.iop.org/0953-8984/19/036215} } @ARTICLE{Gorling1994, author = {A. G\"orling and M. Levy}, title = {Exact Kohn-Sham scheme based on perturbation theory}, journal = PRA, year = {1994}, volume = {50}, pages = {196--204}, review = {Scheme for doing exact xc} } @ARTICLE{Kummel2003, author = {S. K\"ummel and J. P. Perdew}, title = {Simple Iterative Construction of the Optimized Effective Potential for Orbital Functionals, Including Exact Exchange}, journal = PRL, year = {2003}, volume = {90}, pages = {043004}, review = {Iterate from KLI to OEP: short} } @ARTICLE{Kresse1999, author = {G. Kresse and D. Joubert}, title = {From ultrasoft pseudopotentials to the projector augmented-wave method}, journal = {Physical Review B}, year = {1999}, volume = {59}, pages = {1758--1775}, month = {Jan}, abstract = {The formal relationship between ultrasoft (US) Vanderbilt-type pseudopotentials and Blchl's projector augmented wave (PAW) method is derived. It is shown that the total energy functional for US pseudopotentials can be obtained by linearization of two terms in a slightly modified PAW total energy functional. The Hamilton operator, the forces, and the stress tensor are derived for this modified PAW functional. A simple way to implement the PAW method in existing plane-wave codes supporting US pseudopotentials is pointed out. In addition, critical tests are presented to compare the accuracy and efficiency of the PAW and the US pseudopotential method with relaxed core all electron methods. These tests include small molecules (H2, H2O, Li2, N2, F2, BF3, SiF4) and several bulk systems (diamond, Si, V, Li, Ca, CaF2, Fe, Co, Ni). Particular attention is paid to the bulk properties and magnetic energies of Fe, Co, and Ni.}, doi = {10.1103/PhysRevB.59.1758}, numpages = {17}, publisher = {American Physical Society}, review = {From USPP to PAW} } @ARTICLE{Marsman2006, author = {M. Marsman and G. Kresse}, title = {Relaxed core projector-augmented-wave method}, journal = {The Journal of Chemical Physics}, year = {2006}, volume = {125}, pages = {104101}, number = {10}, eid = {104101}, doi = {10.1063/1.2338035}, keywords = {density functional theory; SCF calculations}, numpages = {12}, publisher = {AIP}, url = {http://link.aip.org/link/?JCP/125/104101/1} } @ARTICLE{Paier2005, author = {Joachim Paier and Robin Hirschl and Martijn Marsman and Georg Kresse}, title = {The Perdew--Burke--Ernzerhof exchange-correlation functional applied to the G2-1 test set using a plane-wave basis set}, journal = {The Journal of Chemical Physics}, year = {2005}, volume = {122}, pages = {234102}, number = {23}, eid = {234102}, doi = {10.1063/1.1926272}, keywords = {molecular configurations; dissociation energies; ab initio calculations; electron correlations}, numpages = {13}, publisher = {AIP}, review = {Implementation of exact exchange in VASP, (PAW program), and very large test on molecular systems.}, url = {http://link.aip.org/link/?JCP/122/234102/1} } @ARTICLE{Rohrbach2004, author = {Rohrbach, A. and Hafner, J. and Kresse, G.}, title = {Molecular adsorption on the surface of strongly correlated transition-metal oxides: A case study for CO/NiO(100)}, journal = {Physical Review B}, year = {2004}, volume = {69}, pages = {075413}, number = {7}, month = {Feb}, doi = {10.1103/PhysRevB.69.075413}, numpages = {13}, publisher = {American Physical Society} } @Article{Rostgaard2010, title = {Fully self-consistent GW calculations for molecules}, author = {Rostgaard, C. and Jacobsen, K. W. and Thygesen, K. S.}, journal = {Phys. Rev. B}, volume = {81}, number = {8}, pages = {085103}, numpages = {10}, year = {2010}, month = {Feb}, doi = {10.1103/PhysRevB.81.085103}, publisher = {American Physical Society} } @ARTICLE{Tang2009, author = {W Tang and E Sanville and G Henkelman}, title = {A grid-based Bader analysis algorithm without lattice bias}, journal = {Journal of Physics: Condensed Matter}, year = {2009}, volume = {21}, pages = {084204 (7pp)}, number = {8}, abstract = {A computational method for partitioning a charge density grid into Bader volumes is presented which is efficient, robust, and scales linearly with the number of grid points. The partitioning algorithm follows the steepest ascent paths along the charge density gradient from grid point to grid point until a charge density maximum is reached. In this paper, we describe how accurate off-lattice ascent paths can be represented with respect to the grid points. This improvement maintains the efficient linear scaling of an earlier version of the algorithm, and eliminates a tendency for the Bader surfaces to be aligned along the grid directions. As the algorithm assigns grid points to charge density maxima, subsequent paths are terminated when they reach previously assigned grid points. It is this grid-based approach which gives the algorithm its efficiency, and allows for the analysis of the large grids generated from plane-wave-based density functional theory calculations.}, pdf = {Tang2009.pdf}, url = {http://stacks.iop.org/0953-8984/21/084204} } @ARTICLE{Thygesen2005, author = {Thygesen, K. S. and Hansen, L. B. and Jacobsen, K. W.}, title = {Partly occupied Wannier functions: Construction and applications}, journal = {Physical Review B}, year = {2005}, volume = {72}, pages = {125119}, number = {12}, month = sep, abstract = {We have developed a practical scheme to construct partly occupied, maximally localized Wannier functions (WFs) for a wide range of systems. We explain and demonstrate how the inclusion of selected unoccupied states in the definition of the WFs can improve both their localization and symmetry properties. A systematic selection of the relevant unoccupied states is achieved by minimizing the spread of the resulting WFs. The method is applied to a silicon cluster, a copper crystal, and a Cu(100) surface with nitrogen adsorbed. In all cases we demonstrate the existence of a set of WFs with particularly good localization and symmetry properties, and we show that this set of WFs is characterized by a maximal average localization.} } @ARTICLE{Walter2008, author = {Michael Walter and Hannu H{\"a}kkinen and Lauri Lehtovaara and Martti Puska and Jussi Enkovaara and Carsten Rostgaard and Jens J{\o}rgen Mortensen}, title = {Time-dependent density-functional theory in the projector augmented-wave method}, journal = {The Journal of Chemical Physics}, year = {2008}, volume = {128}, pages = {244101}, number = {24}, eid = {244101}, doi = {10.1063/1.2943138}, keywords = {density functional theory; excited states; photoacoustic spectra; potential energy surfaces}, numpages = {10}, publisher = {AIP}, url = {http://link.aip.org/link/?JCP/128/244101/1} } @ARTICLE{Yin2009, author = {F. Yin and J. Akola and P. Koskinen and M. Manninen and R. E. Palmer}, title = {Bright Beaches of Nanoscale Potassium Islands on Graphite in STM Imaging}, journal = {Physical Review Letters}, year = {2009}, volume = {102}, pages = {106102}, number = {10}, eid = {106102}, doi = {10.1103/PhysRevLett.102.106102}, numpages = {4}, publisher = {APS}, review = {GPAW with linear external potential applied to STM simulation.}, url = {http://link.aps.org/abstract/PRL/v102/e106102} } @BOOK{Fiolhais2003, title = {A Primer in Density Functional Theory}, publisher = {Springer}, year = {2003}, editor = {C. Fiolhais and F. Nogueira and M. Margues}, volume = {620}, series = {Lecture Notes in Physics}, review = {Very good DFT book.} } @MISC{gpaw, title = {{The real-space PAW-DFT code GPAW is part of the CAMP Open-Source (CAMPOS) project.}}, note = {GPAW is freely available at https://wiki.fysik.dtu.dk/gpaw.}, review = {Where to find GPAW} } @comment{jabref-meta: selector_publisher:} @comment{jabref-meta: selector_author:} @comment{jabref-meta: selector_journal:} @comment{jabref-meta: selector_keywords:} gpaw-24.1.0/doc/documentation/paw_papers.rst000066400000000000000000000015341454550013000210450ustar00rootroot00000000000000.. _paw_papers: Articles on the PAW formalism ----------------------------- The original article introducing the PAW formalism: | P. E. Blöchl | :doi:`Projector augmented-wave method <10.1103/PhysRevB.50.17953>` | Physical Review B, Vol. **50**, 17953, 1994 A different formulation of PAW by Kresse and Joubert designed to make the transition from USPP to PAW easy. | G. Kresse and D. Joubert | :doi:`From ultrasoft pseudopotentials to the projector augmented-wave method <10.1103/PhysRevB.59.1758>` | Physical Review B, Vol. **59**, 1758, 1999 A second, more pedagogical, article on PAW by Blöchl and co-workers. | P. E. Blöchl, C. J. Först, and J. Schimpl | :doi:`Projector Augmented Wave Method: ab-initio molecular dynamics with full wave functions <10.1007/BF02712785>` | Bulletin of Materials Science, Vol. **26**, 33, 2003 gpaw-24.1.0/doc/documentation/pm/000077500000000000000000000000001454550013000165635ustar00rootroot00000000000000gpaw-24.1.0/doc/documentation/pm/pm.rst000066400000000000000000000050451454550013000177350ustar00rootroot00000000000000.. module:: gpaw.pipekmezey .. _pipek_mezey_wannier: ============================= Pipek-Mezey Wannier Functions ============================= Introduction ============ Pipek-Mezey [#pm]_ Wannier functions (PMWF) is an alternative to the maximally localized (Foster-Boys) Wannier functions (MLWF). PMWFs are higly localized orbitals with chemical intuition where a distinction is maintained between `\sigma` and `\pi` type orbitals. The PMWFs are as localized as the MLWFs as measured by spread function, whereas the MLWFs frequently mix chemically distinct orbitals [#pmwfs]_. Theoretical Background ====================== In PMWFs the objective function which is maximized is .. math:: \mathcal{P}(\mathbf{W}) = \sum^{N_\mathrm{occ}}_n \sum_{a}^{N_a} \mid Q^a_{nn}(\mathbf{W}) \mid^p where the quantity `Q^a_{nn}` is the atomic partial charge matrix of atom `a`. `\mathbf{W}` is a unitary matrix which connects the canonical orbitals `R` to the localized orbitals `n` .. math:: \psi_n(\mathbf{r}) = \sum_R W_{Rn}\phi_R(\mathbf{r}) The atomic partial charge is defined by partitioning the total electron density, in real-space, with suitable atomic centered weight functions .. math:: n_a(\mathbf{r}) = w_a(\mathbf{r})n(\mathbf{r}) Formulated in this way the atomic charge matrix is defined as .. math:: Q^a_{mn} = \int \psi^*_m(\mathbf{r})w_a(\mathbf{r})\psi_n(\mathbf r)d^3r where the number of electrons localized on atom `a` follows .. math:: \sum_n^{N_\mathrm{occ}}Q^a_{nn}=n_a A choice of Wigner-Seitz or Hirshfeld weight functions is provided, but the orbital localization is insensitive to the choice of weight function [#genpm]_. ------------ Localization ------------ The PMWFs is applicable to LCAO, PW and FD mode, and to both open and periodic boundary conditions. For periodic simulations a uniform Monkhorst-Pack grid must be used. ---------- References ---------- .. [#pm] J. Pipek, P. G. Mezey :doi:`A fast intrinsic localization procedure applicable for ab initio and semiempirical linear combination of atomic orbital wave functions <10.1063/1.456588>`, *J. Chem. Phys.*, (1989) .. [#pmwfs] E. Ö. Jónsson, S. Lethola, M. Puska, H. Jónsson :doi:`Theory and Application of Generalized Pipek-Mezey Wannier Functions <10.1021/acs.jctc.6b00809>`, *J. Chem. Theory Comput.*, (2017) .. [#genpm] S. Lethola, H. Jónsson :doi:`Pipek-Mezey Orbital Localization Using Various Partial Charge Estimates <10.1021/ct401016x>` *J. Chem. Theory Comput.*, (2014) gpaw-24.1.0/doc/documentation/poisson.rst000066400000000000000000000160031454550013000203730ustar00rootroot00000000000000.. _advancedpoisson: ======================== Advanced Poisson solvers ======================== The ``PoissonSolver`` with default parameters uses zero boundary conditions on the cell boundaries. This becomes a problem in systems involving large dipole moment, for example (due to, e.g., plasmonic charge oscillation on a nanoparticle). The potential due to the dipole is long-ranged and, thus, the converged potential requires large vacuum sizes. However, in LCAO approach large vacuum size is often unnecessary. Thus, to avoid using large vacuum sizes but get converged potential, one can use two approaches or their combination: 1) use multipole moment corrections or 2) solve Poisson equation on a extended grid. These two approaches are implemented in ``MomentCorrectionPoissonSolver`` and ``ExtraVacuumPoissonSolver``. In any nano-particle plasmonics calculation, it is necessary to use multipole correction. Without corrections more than 10Å of vacuum is required for converged results. Multipole moment corrections ---------------------------- The boundary conditions can be improved by adding multipole moment corrections to the density so that the corresponding multipoles of the density vanish. The potential of these corrections is added to the obtained potential. For a description of the method, see [#Castro2003]_. This can be accomplished by following solver:: from gpaw.poisson import PoissonSolver from gpaw.poisson_moment import MomentCorrectionPoissonSolver poissonsolver = MomentCorrectionPoissonSolver(poissonsolver=PoissonSolver(), moment_corrections=4) This corrects the 4 first multipole moments, i.e., `s`, `p_x`, `p_y`, and `p_z` type multipoles. The potential of the corrected density is solved with the given ``poissonsolver``. The range of multipoles can be changed by changing ``moment_corrections`` parameter. For example, ``moment_correction=9`` includes in addition to the previous multipoles, also `d_{xx}`, `d_{xy}`, `d_{yy}`, `d_{yz}`, and `d_{zz}` type multipoles. This setting suffices usually for spherical-like metallic nanoparticles, but more complex geometries require inclusion of very high multipoles or, alternatively, a multicenter multipole approach. For this, consider the advanced syntax of the moment_corrections. The previous code snippet is equivalent to:: from gpaw.poisson import PoissonSolver from gpaw.poisson_moment import MomentCorrectionPoissonSolver poissonsolver = MomentCorrectionPoissonSolver(poissonsolver=PoissonSolver(), moment_corrections=[{'moms': range(4), 'center': None}]) Here ``moment_corrections`` is a list of dictionaries with following keywords: ``moms`` specifies the considered multipole moments, e.g., ``range(4)`` equals to `s`, `p_x`, `p_y`, and `p_z` multipoles, and ``center`` specifies the center of the added corrections in atomic units (``None`` corresponds to the center of the cell). As an example, consider metallic nanoparticle dimer where the nanoparticle centers are at ``(x1, y1, z1)`` Å and ``(x2, y2, z2)`` Å. In this case, the following settings for the ``MomentCorrectionPoissonSolver`` may be tried out:: import numpy as np from gpaw.poisson import PoissonSolver from gpaw.poisson_moment import MomentCorrectionPoissonSolver moms = range(4) center1 = np.array([x1, y1, z1]) center2 = np.array([x2, y2, z2]) poissonsolver = MomentCorrectionPoissonSolver(poissonsolver=PoissonSolver(), moment_corrections=[{'moms': moms, 'center': center1}, {'moms': moms, 'center': center2}]) When multiple centers are used, the multipole moments are calculated on non-overlapping regions of the calculation cell. Each point in space is associated to its closest center. See `Voronoi diagrams `_ for analogous illustration of the partitioning of a plane. .. [#Castro2003] A. Castro, A. Rubio, and M. J. Stott, Solution of Poisson's equation for finite systems using plane-wave methods, *Can. J. Phys.* **81**, 1151 (2003). :doi:`10.1139/p03-078` Adding extra vacuum to the Poisson grid --------------------------------------- The multipole correction scheme is not always successful for complex system geometries. For these cases, one can use a separate large grid just for solving the Hartree potential. Such a large grid can be set up by using ``ExtraVacuumPoissonSolver`` wrapper:: from gpaw.poisson import PoissonSolver from gpaw.poisson_extravacuum import ExtraVacuumPoissonSolver poissonsolver = ExtraVacuumPoissonSolver(gpts=(256, 256, 256), poissonsolver_large=PoissonSolver()) This uses the given ``poissonsolver_large`` to solve the Poisson equation on a large grid defined by the number of grid points `gpts`. The size of the grid is given **in the units of the Poisson grid** (this is usually the same as the fine grid). If using the ``FDPoissonSolver``, it is important to use grid sizes that are divisible by high powers of 2 to accelerate the multigrid scheme. To speed up the calculation of the Hartree potential on the large grid, one can apply additional coarsening:: from gpaw.poisson import PoissonSolver from gpaw.poisson_extravacuum import ExtraVacuumPoissonSolver poissonsolver = ExtraVacuumPoissonSolver(gpts=(256, 256, 256), poissonsolver_large=PoissonSolver(), coarses=1, poissonsolver_small=PoissonSolver()) The ``coarses`` parameter describes how many times the given large grid is coarsed before the ``poissonsolver_large`` is used solve the Poisson equation there. With the given value ``coarses=1``, the grid is coarsed once and the actual calculation grid is of size ``(128, 128, 128)`` with the grid spacing twice as large compared to the original one. The obtained coarse potential is used to correct the boundary conditions of the potential calculated on the original small and fine grid by ``poissonsolver_small``. As ``ExtraVacuumPoissonSolver`` is wrapper, it can be combined with any ``PoissonSolver`` instance. For example, one can define multiple subsequently larger grids via:: from gpaw.poisson import PoissonSolver from gpaw.poisson_extravacuum import ExtraVacuumPoissonSolver poissonsolver0 = ExtraVacuumPoissonSolver(gpts=(256, 256, 256), poissonsolver_large=PoissonSolver(), coarses=1, poissonsolver_small=PoissonSolver()) poissonsolver = ExtraVacuumPoissonSolver(gpts=(256, 256, 256), poissonsolver_large=poissonsolver0, coarses=1, poissonsolver_small=PoissonSolver()) See ``poissonsolver.get_description()`` or the ``txt`` output for the corresponding grids. gpaw-24.1.0/doc/documentation/pw_and_exx.rst000066400000000000000000000051731454550013000210430ustar00rootroot00000000000000============================= Planewaves and exact exchange ============================= With `N=N_1N_2N_3` grid points: `\br^T=(g_1/N_1,g_2/N_2,g_3/N_3)\mathbf A`, where `g_c=0,1,...,N_c-1`, we get a plane wave expansion of the wave function as: .. math:: \tilde\psi_{k n}(\br) = \frac{1}{N} \sum_\bG e^{i(\bG+\bk)\cdot \br}c_{\bk n}(\bG), where the coefficients are given as: .. math:: c_{\bk n}(\bG) = \sum_\br e^{-i(\bG+\bk)\cdot\br}\tilde\psi_{\bk n}(\br) **Exact exchange** From the pair densities: .. math:: \tilde\rho_{\bk_1n_1 \bk_2n_2}(\br) = \tilde\psi_{\bk_1n_1}(\br)^* \tilde\psi_{\bk_2n_2}(\br) + ... = \\ \frac{1}{N^2} \sum_{\bG\bG'} e^{i(\bG-\bk_1+\bk_2)\cdot \br} c_{\bk_1n_1}(\bG)^* c_{\bk_2n_2}(\bG+\bG') = \sum_\bG e^{i(\bG-\bk_1+\bk_2)\cdot \br}C_{\bk_1n_1\bk_2n_2}(\bG), we get the exact exchange energy: .. math:: E_x = -\pi\Omega \sum_{\bk_1n_1} \sum_{\bk_2n_2} f_{\bk_1n_1}f_{\bk_2n_2} \sum_\bG \frac{|C_{\bk_1n_1\bk_2n_2}(\bG)|^2}{|\bk_1-\bk_2-\bG|^2}, where the weight of a `\bk`-point is included in `f_{\bk n}`. Let `E_x'` be defined as the sum above excluding the divergent terms for `\bk_1=\bk_2` and `\bG=0`. With .. math:: F(\bG)=\frac{e^{-\alpha G^2}}{G^2}, we get (see [#Sorouri]_): .. math:: E_x = E_x' -\pi\Omega\sum_{\bk_1n_1n_2}f_{\bk_1n_1}f_{\bk_1n_2} |C_{\bk_1n_1\bk_1n_2}(0)|^2 \left(\sum_{\bk_2\bG}F(\bk_1-\bk_2-\bG)- \sum_{\bk_2}\sum_{\bG\neq\bk_1-\bk_2}F(\bk_1-\bk_2-\bG)\right). In the limit of an infinitely dense sampling of the BZ and a not too small `\alpha`, we get .. math:: \sum_{\bk_2\bG}F(\bk_1-\bk_2-\bG)= \frac{N_k\Omega}{(2\pi)^3}\int_{\text{BZ}}F(\bk)d\bk= \frac{N_k\Omega}{(2\pi)^2}\sqrt{\pi/\alpha}, where `N_k` is the number of `\bk`-points. Finally: .. math:: E_x = E_x' -\pi\Omega\sum_{\bk_1n_1n_2}f_{\bk_1n_1}f_{\bk_1n_2} |C_{\bk_1n_1\bk_1n_2}(0)|^2\gamma, where .. math:: \gamma = \frac{\Omega}{(2\pi)^2}\sqrt{\pi/\alpha}- \sum_{\bk}\sum_{\bG\neq\bk}F(\bk-\bG). The gradient is: .. math:: \frac{\partial E_x}{\partial\tilde\psi_{\bk_1n_1}(\br)}= -\pi\Omega\sum_{\bk_2n_2}f_{\bk_1n_1}f_{\bk_2n_2} e^{i(\bk_1-\bk_2)\cdot\br}\tilde\psi_{\bk_2n_2}(\br) \frac1N\sum_\bG\frac{C_{\bk_1n_1\bk_2n_2}(G)^*}{|\bk_1-\bk_2-\bG|^2} e^{-i\bG\cdot\br}, where `1/|\bk_1-\bk_2-\bG|^2` is replaced by `\gamma` for the term where `\bk_1=\bk_2` and `\bG=0`. .. [#Sorouri] *Accurate and Efficient Method for the Treatment of Exchange in a Plane-Wave Basis*, A. Sorouri, W.M.C. Foulkes, and N.D.M. Hine, J. Chem. Phys. 124, 064105-1 -- 064105-7 (2006) gpaw-24.1.0/doc/documentation/qeh/000077500000000000000000000000001454550013000167245ustar00rootroot00000000000000gpaw-24.1.0/doc/documentation/qeh/qeh.rst000066400000000000000000000107561454550013000202440ustar00rootroot00000000000000============================================================= The Quantum Electrostatic Heterostructure (QEH) model: Theory ============================================================= We follow the notation of Ref [#andersen2015]_. For each monolayer in a heterostructure, the monolayer response function `\widetilde{\chi}(\mathbf{r}, \mathbf{r}', q_\parallel, \omega)` is first calculated. We assume here that `\widetilde{\chi}` is isotropic, i.e. only a function of `q_\parallel = |\mathbf{q}_\parallel|`, and independent of the direction of `\mathbf{q}_\parallel`. The response function is averaged over the in-plane coordinates, and we define .. math:: :label: eq_chizz \widetilde{\chi}(z, z', q_\parallel, \omega) = \frac{1}{A} \int_A \int_A \mathrm{d} \mathbf{{r}}_\parallel \mathrm{d} \mathbf{{r}}'_\parallel \widetilde{\chi}(\mathbf{r}, \mathbf{r}', q_\parallel, \omega), where the integration is over the in-plane coordinates, and `A` is the in-plane area of the supercell. The `z`-dependence can be approximated in a monopole-dipole basis, in which we express `\widetilde{\chi}` as a `2 \times 2` matrix `\chi_{\alpha \alpha'}`, where `\alpha=0` corresponds to a monopole component, while `\alpha = 1` corresponds to a dipole component, and likewise for `\alpha'`. These components are given by .. math:: :label: eq_chi_alpha \widetilde{\chi}_{\alpha \alpha'} (q_\parallel, \omega) = \int \int \mathrm{d}z \mathrm{d}z' (z-z_c)^\alpha \widetilde{\chi}(z, z', q_\parallel, \omega) (z'-z_c)^{\alpha'}, where each integral runs over the interval `[z_c - \frac{L}{2}, z_c + \frac{L}{2}]`, where `L` is the thickness of the layer, and `z_c` the position of the middle of the layer. To make explicit the monopole/dipole structure, we label the components of the `\chi_{\alpha \alpha'}` matrix as `\alpha \in {M, D}`, where `M` corresponds to `\alpha=0` and `D` to `\alpha = 1.` This corresponds to the naming convention used in the GPAW implementation. Expressed in a plane-wave basis, we have .. math:: \widetilde{\chi}(\mathbf{r}, \mathbf{r}', q_\parallel, \omega) = \frac{1}{\Omega} \sum_{\mathbf{G} \mathbf{G}'} e^{i(\mathbf{q}_\parallel + \mathbf{G})\cdot \mathbf{r}} \widetilde{\chi}_{\mathbf{G}\mathbf{G}'}(q_\parallel, \omega) e^{-i(\mathbf{q}_\parallel + \mathbf{G'})\cdot \mathbf{r}'}, `\Omega` being the volume of the supercell. Integrating over the plane corresponds to taking `\mathbf{G}_\parallel = \mathbf{G}_\parallel' = 0`, such that equation :eq:`eq_chizz` becomes .. math:: \widetilde{\chi}(z, z', q_\parallel, \omega) = \frac{1}{L} \sum_{G_z G_z'} e^{iG_z z} \widetilde{\chi}_{G_z G_z'}(q_\parallel, \omega) e^{-iG_z' z'} The integrals over `z` in equation :eq:`eq_chi_alpha` can then be carried out analytically, and we find .. math:: \begin{aligned} &\widetilde{\chi}_M(q_\parallel, \omega) = L \widetilde{\chi}_{G_z = 0, G_z' = 0} \\ &\widetilde{\chi}_{MD}(q_\parallel, \omega) = \sum_{G_z' \neq 0} \widetilde{\chi}_{0,G_z'} z_F^*(G_z') \\ &\widetilde{\chi}_{DM}(q_\parallel, \omega) = \sum_{G_z \neq 0} z_F(G_z) \widetilde{\chi}_{G_z,0} \\ &\widetilde{\chi}_{D}(q_\parallel, \omega) = \frac{1}{L} \sum_{G_z \neq 0, G_z' \neq 0} z_F(G_z) \widetilde{\chi}_{G_z G_z'}z_F^*(G_z'), \end{aligned} where the so-called *z-factor* `z_F` is .. math:: z_F(G_z) = \int_{z_c - \frac{L}{2}}^{z_c + \frac{L}{2}} e^{i G_z z} z \mathrm{d}z = -\frac{i e^{i G_z z_c}}{G_z^2} \left[G_z L \cos\left(\frac{G_z L}{2}\right) - 2 \sin\left(\frac{G_z L}{2}\right)\right], and `z_F^*` is the complex conjugate of `z_F`. For systems with mirror symmetry in the out of plane (`z`) direction, the off-diagonal elements `\chi_{MD}` and `\chi_{DM}` must vanish. This can be seen from the following: the mirror symmetry implies that `\chi(z,z') = \chi(-z, -z')`, where we have set `z_c = 0` for simplicity, and we have then for e.g. `\chi_{DM}` that .. math:: \chi_{DM} = \int z \chi(z,z') \mathrm{d}z\mathrm{d}z' = \int z \chi(-z,-z') \mathrm{d}z \mathrm{d}z' = \int (-z) \chi(z,z') \mathrm{d}z \mathrm{d}z' = - \chi_{DM} where for the last equality we made the substitution `z \rightarrow-z` and `z' \rightarrow- z'`. A similar result holds for `\chi_{MD}`. Therefore one only needs to calculate the off-diagonal elements for materials that do not have mirror symmetry. .. [#andersen2015] Andersen, Kirsten, Simone Latini, and Kristian S. Thygesen. Dielectric genome of van der Waals heterostructures, *Nano letters* 15.7 (2015): 4616-4621. gpaw-24.1.0/doc/documentation/reports_presentations_and_theses.rst000066400000000000000000000024221454550013000255520ustar00rootroot00000000000000.. _reports_presentations_and_theses: Reports, presentations, and theses using gpaw --------------------------------------------- * Slides from the "GPAW 2021 Users and developers meeting": `A tour of the GPAW source code `__ * Summer-school 2014 talk about PAW, GPAW and ASE: :download:`ss14.pdf` * A short note on the basics of PAW: :download:`paw_note.pdf` * A master thesis on the inclusion of non-local exact exchange in the PAW formalism, and the implementation in gpaw: :download:`rostgaard_master.pdf` * A master thesis on the inclusion of a localized basis in the PAW formalism, plus implementation and test results in GPAW: :download:`marco_master.pdf` * A master thesis on the inclusion of localized basis sets in the PAW formalism, focusing on basis set generation and force calculations: :download:`askhl_master.pdf` * A course report on a project involving the optimization of the setups (equivalent of pseudopotentials) in gpaw: :download:`askhl_10302_report.pdf` * Slides from a talk about PAW: :download:`mortensen_paw.pdf` * Slides from a talk about GPAW development: :download:`mortensen_gpaw-dev.pdf` * Slides from a mini symposium during early development stage: :download:`mortensen_mini2003talk.pdf`gpaw-24.1.0/doc/documentation/restart_files.rst000066400000000000000000000016661454550013000215600ustar00rootroot00000000000000.. _restart_files: ============= Restart files ============= Writing restart files ===================== Use ``calc.write('xyz.gpw')`` or ``calc.write('xyz.gpw', mode='all')`` to include also the wave functions. You can register an automatic call to the ``write`` method, every ``n``'th iteration of the SCF cycle like this:: calc.attach(calc.write, n, 'xyz.gpw') or:: calc.attach(calc.write, n, 'xyz.gpw', mode='all') This can be useful for very expensive calculations, where the SCF cycle may be interrupted before it completes. In this way, you can resume the calculation from an intermediate electronic structure. Reading restart files ===================== The calculation can be read from file like this:: calc = GPAW('xyz.gpw') or this:: atoms, calc = restart('xyz.gpw') By adding the option txt=None you can suppress text output when restarting (e.g. when plotting a DOS):: atoms, calc = restart('xyz.gpw', txt=None) gpaw-24.1.0/doc/documentation/rmm-diis.rst000066400000000000000000000047701454550013000204320ustar00rootroot00000000000000.. _RMM-DIIS: ============================ Residual minimization method ============================ Algorithm --------- 1) Initial guess for wave functions (`\tilde{\psi}_n`). 2) Orthogonalize wavefunctions (make sure `\langle \tilde{\psi}_n | \hat{S} | \tilde{\psi}_m \rangle = \delta_{nm}`). 3) Calculate density (`\tilde{n}`, `D_{ij}^a`). 4) Calculate potential (`\tilde{v}`, `\Delta H_{ij}^a`). 5) Apply hamiltonian (`\hat{H}\tilde{\psi}_n`). 6) Subspace diagonalization (rotate `\tilde{\psi}_n` so that `\langle \tilde{\psi}_n | \hat{H} | \tilde{\psi}_m \rangle = \delta_{nm} \epsilon_n`). 7) Calculate residuals (`R_n = \hat{H}\tilde{\psi}_n - \epsilon_n \hat{S}\tilde{\psi}_n`). 8) Improve wave functions using the RMM-DIIS algorithm (see below). 9) Back to (2). RMM-DIIS step ------------- For each wave function we calculate the residual: .. math:: R_n = (\hat{H} - \epsilon_n \hat{S}) \tilde{\psi}_n New improved wave function: `\tilde{\psi}_n' = \tilde{\psi}_n + \lambda \hat{P} R_n`, where `\hat{P}` is a preconditioner_. Find step length `\lambda` by minimizing the norm of: .. math:: R_n' = (\hat{H} - \epsilon_n \hat{S}) \tilde{\psi}_n' Since we already have `R_n'`, we might as well use it to take an extra step (with the same step length as for the first step): .. math:: \tilde{\psi}_n \leftarrow \tilde{\psi}_n' + \lambda \hat{P} R_n' = \tilde{\psi}_n + \lambda \hat{P} R_n + \lambda \hat{P} R_n' See [Kresse96]_ for details. .. _preconditioner: Preconditioning --------------- .. hhhh image:: images/preconditioning.png :width: 3cm :align: center The ideal preconditioner would be: .. math:: \hat{P} = -(\hat{H} - \epsilon_n \hat{S})^{-1}. For the short wavelength parts of the residuals, `\hat{H} - \epsilon_n \hat{S}` will be dominated by the kinetic energy operator, so we have approximately `\hat{P} \simeq -\hat{T}^{-1}`. We calculate preconditioned residuals (`\tilde{R}_n = \hat{P} R_n`) by solving `\hat{T} \tilde{R}_n = -R_n` or equivalently .. math:: \frac{1}{2} \nabla^2 \tilde{R}_n = R_n approximately using multigrid techniques as described in [Briggs95]_. References ---------- .. [Kresse96] G. Kresse, J. Furthmüller: Phys. Rev. B 54, 11169 - 11186 (1996) "Efficient iterative schemes for ab initio total-energy calculations using a plane-wave basis set" .. [Briggs95] E. L. Briggs, D. J. Sullivan and J. Bernholc: Phys. Rev. B 52, R5471 (1995), "Large Scale Electronic Structure Calculations with Multigrid Acceleration" gpaw-24.1.0/doc/documentation/scissors/000077500000000000000000000000001454550013000200175ustar00rootroot00000000000000gpaw-24.1.0/doc/documentation/scissors/mos2ws2.py000066400000000000000000000014371454550013000217120ustar00rootroot00000000000000from ase.build import mx2 from gpaw import GPAW from gpaw.lcao.scissors import Scissors a12 = mx2(formula='MoS2', kind='2H', a=3.184, thickness=3.13, size=(1, 1, 1)) a12 += mx2(formula='WS2', kind='2H', a=3.184, thickness=3.15, size=(1, 1, 1)) a12.positions[3:, 2] += 3.6 + 3.184 a12.center(vacuum=3.0, axis=2) k = 6 a12.calc = GPAW(mode='lcao', basis='dzp', nbands='nao', kpts=(k, k, 1), eigensolver=Scissors([(-0.5, 0.5, 3), (-0.3, 0.3, 3)]), txt='12.txt') a12.get_potential_energy() a12.calc.write('12.gpw') bp = a12.cell.bandpath('GMKG', npoints=80) c12 = a12.calc.fixed_density(kpts=bp, symmetry='off') bs = c12.band_structure() bs.write('12bs.json') gpaw-24.1.0/doc/documentation/scissors/scissors.rst000066400000000000000000000024151454550013000224230ustar00rootroot00000000000000.. _scissors operator: =============================== Scissors operator for LCAO mode =============================== .. warning:: **Work in progress** .. module:: gpaw.lcao.scissors .. autoclass:: Scissors In :ref:`lcao` we solve the following generalized eigenvalue problem: .. math:: \sum_\nu (H + \Delta H)_{\mu\nu} C_{\nu n} = \sum_{\nu} S_{\mu\nu} C_{\nu n} \epsilon_n, where `\Delta H` is a scissors operator. Space is divided into regions `\Omega_i` and for each region we define desired shifts of the occupied and unoccupied bands: `\Delta_{i,\text{occ}}` and `\Delta_{i,\text{unocc}}`. The scissors operator is given as: .. math:: \Delta H = \sum_i(\Delta H^{i,\text{occ}}+\Delta H^{i,\text{unocc}}), where .. math:: \Delta H_{\mu\nu}^{i,\text{occ}} = \Delta_{i,\text{occ}} \sum_{n,n'}^{\text{occ}} \sum_{\mu',\nu'\in\Omega_i} C_{n\mu}^{-1} C_{\mu'n} S_{\mu'\nu'} C_{\nu'n'} C_{n'\nu}^{-1}, .. math:: \Delta H_{\mu\nu}^{i,\text{unocc}} = \Delta_{i,\text{unocc}} \sum_{n,n'}^{\text{unocc}} \sum_{\mu',\nu'\in\Omega_i} C_{n\mu}^{-1} C_{\mu'n} S_{\mu'\nu'} C_{\nu'n'} C_{n'\nu}^{-1}. Example ======= :download:`mos2ws2.py`. gpaw-24.1.0/doc/documentation/sic/000077500000000000000000000000001454550013000167255ustar00rootroot00000000000000gpaw-24.1.0/doc/documentation/sic/sic.rst000066400000000000000000000035721454550013000202440ustar00rootroot00000000000000.. _sic: ====================================================== The Perdew-Zunger Self-Interaction Correction (PZ-SIC) ====================================================== The self-interaction corrected density functional with PZ-SIC [#Perdew]_ has the following form: .. math:: E^{PZ-SIC}[\{n_i\}] = E^{DFA}[n] - \beta \sum_{i=1}^{N} \left(E_H[n_i] + E^{DFA}_{xc}[n_i]\right) here `E^{DFA}` - density functional approximation, `E_H` - Hartree energy and `E^{DFA}_{xc}` - exchange correlation part of density functional approximation. `n` is the total density and `n_i = |\psi_i (\mathbf{r})|^{2}` - orbital density. `\beta` is a scaling factor. The SIC functional is not a unitary invariant functional and is dependent on orbital densities. Therefore, the :ref:`fully variational approach ` [#Ivanov2021pwfd]_, [#Ivanov2021]_ is used to find the optimal orbitals which provide the ground state energy. Example -------- The implementation support all three modes (PW, FD, and LCAO) using with direct minimization described :ref:`here ` . Since the functional is not a unitary invariant functional, it is necessary to employ complex orbitals to find the lowest energy state. Here is an example using FD mode: .. literalinclude:: sic_example_fd.py To use PW mode, just import PW mode and replace FD with PW. While here is the example for LCAO mode: .. literalinclude:: sic_example_lcao.py If you use this module, please refer to implementation papers Refs. [#Ivanov2021pwfd]_, [#Ivanov2021]_. References ---------- .. [#Perdew] J. P. Perdew and Alex Zunger *Phys. Rev. B* **23**, 5048 (1981) .. [#Ivanov2021pwfd] A. V. Ivanov, G. Levi, E.Ö. Jónsson, and H. Jónsson, *J. Chem. Theory Comput.*, **17**, 5034, (2021). .. [#Ivanov2021] A. V. Ivanov, E.Ö. Jónsson, T. Vegge, and H. Jónsson, *Comput. Phys. Commun.*, **267**, 108047 (2021). gpaw-24.1.0/doc/documentation/sic/sic_example_fd.py000077500000000000000000000016371454550013000222530ustar00rootroot00000000000000import numpy as np from ase import Atoms from gpaw import FD, GPAW from gpaw.directmin.etdm_fdpw import FDPWETDM # Water molecule: d = 0.9575 t = np.pi / 180 * 104.51 H2O = Atoms('OH2', positions=[(0, 0, 0), (d, 0, 0), (d * np.cos(t), d * np.sin(t), 0)]) H2O.center(vacuum=5.0) calc = GPAW(mode=FD(force_complex_dtype=True), xc='PBE', occupations={'name': 'fixed-uniform'}, eigensolver=FDPWETDM(localizationtype='PM_PZ', functional={'name': 'PZ-SIC', 'scaling_factor': (0.5, 0.5)}, grad_tol_pz_localization=1.0e-4), mixer={'backend': 'no-mixing'}, symmetry='off' ) H2O.set_calculator(calc) H2O.get_potential_energy() H2O.get_forces() gpaw-24.1.0/doc/documentation/sic/sic_example_lcao.py000077500000000000000000000015621454550013000225750ustar00rootroot00000000000000import numpy as np from ase import Atoms from gpaw import GPAW, LCAO from gpaw.directmin.etdm_lcao import LCAOETDM # Water molecule: d = 0.9575 t = np.pi / 180 * 104.51 H2O = Atoms('OH2', positions=[(0, 0, 0), (d, 0, 0), (d * np.cos(t), d * np.sin(t), 0)]) H2O.center(vacuum=5.0) calc = GPAW(mode=LCAO(force_complex_dtype=True), xc='PBE', occupations={'name': 'fixed-uniform'}, eigensolver=LCAOETDM(localizationtype='PM_PZ', functional={'name': 'PZ-SIC', 'scaling_factor': (0.5, 0.5)}), mixer={'backend': 'no-mixing'}, nbands='nao', symmetry='off' ) H2O.calc = calc H2O.get_potential_energy() H2O.get_forces() gpaw-24.1.0/doc/documentation/sjm/000077500000000000000000000000001454550013000167405ustar00rootroot00000000000000gpaw-24.1.0/doc/documentation/sjm/agts.py000066400000000000000000000005341454550013000202520ustar00rootroot00000000000000from myqueue.workflow import run def workflow(): with run(script='run-vary-potential.py', cores=72, tmax='4h'): run(script='plot-overview.py') run(script='plot-traces.py') with run(script='run-vary-charge.py', cores=72, tmax='4h'): run(script='plot-delta-ne-phi.py') run(script='plot-charge-potential.py') gpaw-24.1.0/doc/documentation/sjm/plot-charge-potential.py000066400000000000000000000015711454550013000235200ustar00rootroot00000000000000# web-page: charge-potential.png import numpy as np from matplotlib import pyplot from scipy.stats import linregress fig, ax = pyplot.subplots() fig.subplots_adjust(top=0.99, right=0.99) potentials, charges = [], [] with open('potential.txt', 'r') as f: lines = f.read().splitlines() for line in lines: potential, charge = line.split(',') potentials += [float(potential)] charges += [float(charge)] line = linregress(charges, potentials) x_line = [min(charges), max(charges)] y_line = [line.slope * _ + line.intercept for _ in x_line] ax.plot(x_line, y_line, '-', color='C1') ax.plot(charges, potentials, 'o', color='C0') ax.text(np.mean(x_line) + 0.0 * np.ptp(x_line), np.mean(y_line) + 0.1 * np.ptp(y_line), '$R^2 = {:.4f}$'.format(line.rvalue**2)) ax.set_xlabel('excess electrons') ax.set_ylabel('potential, V') fig.savefig('charge-potential.png') gpaw-24.1.0/doc/documentation/sjm/plot-delta-ne-phi.py000066400000000000000000000042061454550013000225370ustar00rootroot00000000000000# web-page: delta-ne-phi.png import pickle import numpy as np from matplotlib import pyplot import ase.io def get_potential_trace(filename): with open(filename, 'rb') as f: phi = pickle.load(f) phi = phi.mean(axis=(0, 1)) return phi def get_electron_trace(filename): with open(filename, 'rb') as f: n = pickle.load(f) # Multiply each point by its local cell volume to convert from # electron density to electrons in each cube. vol_adjust = atoms.cell.volume / np.prod(n.shape) n *= vol_adjust z_trace = np.sum(n, axis=(0, 1)) z_trace = np.cumsum(z_trace) return z_trace excess_electrons = [-0.2, -0.1, 0., 0.1, 0.2] atoms = ase.io.read('atoms0.0000.traj') fig, axes = pyplot.subplots(nrows=2, figsize=(6.4, 4.0), sharex=True) fig.subplots_adjust(right=0.99, top=0.99) ########################### # Potential axis. ########################### ax = axes[0] z_trace0 = get_potential_trace('esp0.0000.pckl') z_trace0 -= z_trace0[0] for excess_electron in excess_electrons: label = '{:.4f}'.format(excess_electron) z_trace = get_potential_trace(f'esp{label}.pckl') z_trace -= z_trace[0] diff = z_trace - z_trace0 zs = np.linspace(0., atoms.cell[2][2], num=len(diff)) ax.plot(zs, diff, color='C1', linewidth=2) ax.text(zs[-1] + 1., diff[-1], '{:+.1f}'.format(excess_electron), ha='left', va='center', color='C1') ########################### # Electrons axis. ########################### ax = axes[1] z_trace0 = get_electron_trace('allelectrondensity0.0000.pckl') for excess_electron in excess_electrons: label = '{:.4f}'.format(excess_electron) z_trace = get_electron_trace(f'allelectrondensity{label}.pckl') diff = z_trace - z_trace0 ax.plot(zs, diff, color='C0') ax.text(zs[-1] + 1., diff[-1], '{:+.1f}'.format(excess_electron), ha='left', va='center', color='C0') ########################### # Touch up. ########################### axes[0].set_xlim(0., zs[-1] + 4.) axes[0].set_ylabel(r'$\Delta \phi_{xy}$, V') axes[1].set_ylabel(r'$\Delta n_{xy}$') axes[1].set_xlabel(r'$z$ coordinate, $\mathrm{\AA}$') fig.savefig('delta-ne-phi.png') gpaw-24.1.0/doc/documentation/sjm/plot-overview.py000066400000000000000000000041121454550013000221320ustar00rootroot00000000000000# web-page: overview.png import numpy as np from ase.io.cube import read_cube from matplotlib import pyplot from ase.data import covalent_radii as radii from ase.data.colors import jmol_colors from matplotlib.patches import Circle fig, ax = pyplot.subplots(figsize=(6., 1.8)) fig.subplots_adjust(left=0.08, right=0.99, bottom=0.23, top=0.99) ########################### # Plot solvent. ########################### with open('sjm_traces4.4V-cube.out/cavity.cube') as f: data = read_cube(f) atoms = data['atoms'] cube = data['data'] flat = np.mean(cube, axis=0) ax.imshow(flat, cmap='Blues', interpolation='spline16', origin='lower', vmax=1.5, extent=(0., atoms.cell[2, 2], 0., atoms.cell[1, 1])) ########################### # Add atoms as circles. ########################### cell = atoms.cell atomslist = [atom for atom in atoms] atomslist = sorted(atomslist, key=lambda atom: atom.x) atomslist.reverse() # Add the atoms to the plot as circles. for atom in atomslist: color = jmol_colors[atom.number] radius = radii[atom.number] circle = Circle((atom.z, atom.y), radius, facecolor=color, edgecolor='k', linewidth=1) ax.add_patch(circle) xlim = ax.get_xlim() ylim = ax.get_ylim() ########################### # Add the jellium region. ########################### with open('sjm_traces4.4V.out/background_charge.txt', 'r') as f: lines = f.read().splitlines() zs = [] jelliums = [] for line in lines: z, jellium = line.split() zs.append(float(z)) jelliums.append(float(jellium)) jelliums = np.array(jelliums) * atoms.cell[1][1] where = [True if jellium != 0 else False for jellium in jelliums] ax.fill_between(zs, jelliums, where=where, hatch='///', ec='0.2') ########################### # Touch up. ########################### ax.text(24., max(jelliums) / 2., 'jellium', ha='center', va='center') ax.text(17.8, max(jelliums) / 2., 'solvent', ha='center', va='center', rotation=90.) ax.set_xlim(xlim) ax.set_ylim(ylim) ax.set_xlabel(r'$z$, $\mathrm{\AA}$') ax.set_ylabel(r'$y$, $\mathrm{\AA}$') fig.savefig('overview.png') gpaw-24.1.0/doc/documentation/sjm/plot-traces.py000066400000000000000000000060601454550013000215510ustar00rootroot00000000000000# web-page: traces.png import pickle import numpy as np import ase.io from ase.data import covalent_radii as radii from ase.data.colors import jmol_colors from matplotlib import pyplot from matplotlib.patches import Circle from matplotlib.ticker import (MultipleLocator, AutoMinorLocator) def get_electron_trace(filename): with open(filename, 'rb') as f: n = pickle.load(f) # Multiply each point by its local cell volume to convert from # electron density to electrons in each cube. vol_adjust = atoms.cell.volume / np.prod(n.shape) n *= vol_adjust z_trace = np.sum(n, axis=(0, 1)) z_trace = np.cumsum(z_trace) return z_trace def get_potential_trace(filename): with open(filename, 'rb') as f: phi = pickle.load(f) phi = phi.mean(axis=(0, 1)) return phi fig, ax_potential = pyplot.subplots(figsize=(6.4, 2.2)) fig.subplots_adjust(bottom=0.2, top=0.99, left=0.11, right=0.90) atoms = ase.io.read('atoms.traj') ########################### # Potential axis. ########################### z_trace44 = get_potential_trace('esp4.4V.pckl') z_trace43 = get_potential_trace('esp4.3V.pckl') z_trace44 -= z_trace44[0] z_trace43 -= z_trace43[0] diff = z_trace43 - z_trace44 zs = np.linspace(0., atoms.cell[2][2], num=len(diff)) ax_potential.plot(zs, diff, color='C1', linewidth=2) label_at = int(len(zs) * 0.80) ax_potential.text(zs[label_at], diff[label_at] + 0.01, 'potential', ha='center', va='center', color='C1') ########################### # Electrons axis. ########################### ax_electrons = ax_potential.twinx() # ax1.set_zorder(ax2.get_zorder()+1) z_trace44 = get_electron_trace('all4.4V.pckl') z_trace43 = get_electron_trace('all4.3V.pckl') diff = z_trace43 - z_trace44 ax_electrons.plot(zs, diff) ax_electrons.text(zs[label_at], diff[label_at] - 0.003, 'electrons', ha='center', va='center', color='C0') ########################### # Atoms axis. ########################### ax_atoms = ax_potential.twinx() # Sort atoms by x value. cell = atoms.cell atomslist = [atom for atom in atoms] atomslist = sorted(atomslist, key=lambda atom: atom.x) atomslist.reverse() # Add the atoms to the plot as circles. for atom in atomslist: color = jmol_colors[atom.number] radius = radii[atom.number] circle = Circle((atom.z, atom.y), radius, facecolor='none', edgecolor='0.5', linewidth=0.5) ax_atoms.add_patch(circle) ########################### # Touch up. ########################### ax_atoms.axis('equal') ax_potential.set_xlabel(r'$z$ coordinate, $\mathrm{\AA}$') ax_atoms.set_yticks([]) ax_potential.set_zorder(2) ax_electrons.set_zorder(1) ax_potential.patch.set_visible(False) ax_potential.set_ylabel(r'$\phi_{xy}$(4.4 V) - $\phi_{xy}$(4.3 V), V') ax_potential.yaxis.set_major_locator(MultipleLocator(0.1)) ax_potential.yaxis.set_minor_locator(AutoMinorLocator(5)) ax_electrons.set_ylabel(r'$n_{xy}$(4.4 V) - $n_{xy}$(4.3 V)') ax_electrons.yaxis.set_major_locator(MultipleLocator(0.01)) ax_electrons.yaxis.set_minor_locator(AutoMinorLocator(2)) fig.savefig('traces.png') gpaw-24.1.0/doc/documentation/sjm/run-vary-charge.py000066400000000000000000000122011454550013000223200ustar00rootroot00000000000000import pickle import numpy as np from ase import Atoms from ase.units import Pascal, m from ase.parallel import paropen from gpaw.solvation.sjm import SJM, SJMPower12Potential from gpaw import FermiDirac from gpaw.solvation import ( EffectivePotentialCavity, LinearDielectric, GradientSurface, SurfaceInteraction ) def write_everything(label): """Writes out all the useful data after the SJM calculation.""" atoms.write(f'atoms{label}.traj') esp = atoms.calc.get_electrostatic_potential() with paropen(f'esp{label}.pckl', 'wb') as f: pickle.dump(esp, f) n = calc.get_all_electron_density() with open(f'allelectrondensity{label}.pckl', 'wb') as f: pickle.dump(n, f) def log_potential(): """Saves charge and potential in simple text file for subsequent plot.""" with paropen('potential.txt', 'a') as f: f.write('{:10.4f}, {:10.4f}\n' .format(calc.results['electrode_potential'], calc.results['excess_electrons'])) atoms = Atoms(symbols='Pt27OH2OH2OH2OH2OH2OH2', pbc=np.array([True, True, False]), cell=np.array( [[8.42164176, 0.00000000, 0.00000000], [4.21082088, 7.29335571, 0.00000000], [0.00000000, 0.00000000, 30.00000000]]), positions=np.array( [[1.40360696, 0.81037286, 6.65200000], [4.21082088, 0.81037286, 6.65200000], [7.01803480, 0.81037286, 6.65200000], [2.80721392, 3.24149143, 6.65200000], [5.61442784, 3.24149143, 6.65200000], [8.42164176, 3.24149143, 6.65200000], [4.21082088, 5.67261000, 6.65200000], [7.01803480, 5.67261000, 6.65200000], [9.82524872, 5.67261000, 6.65200000], [0.03036456, 1.64159455, 8.98780067], [2.83952601, 1.63656373, 8.96913616], [5.65700108, 1.63548404, 8.98839035], [1.43694079, 4.06184422, 8.97011747], [4.25013151, 4.06794136, 8.97319238], [7.04795258, 4.05874015, 8.98685186], [2.83797345, 6.50356945, 8.97451149], [5.64860418, 6.49353012, 8.97149139], [8.46007009, 6.50339758, 8.97110938], [0.04167726, 0.01659389, 11.33367526], [2.84872686, 0.01609218, 11.33191349], [5.65812910, 0.01711834, 11.39819957], [1.45292097, 2.44351232, 11.38081839], [4.25298704, 2.44406661, 11.32946091], [7.06279504, 2.44664945, 11.44031710], [2.84886720, 4.87885276, 11.32743866], [5.65767156, 4.88199267, 11.38959530], [8.46251173, 4.88143235, 11.31836346], [1.39280453, 2.54002304, 14.97410114], [1.91436258, 3.34474212, 15.30247756], [1.60746426, 2.47760171, 14.01227886], [2.81907167, -0.18784312, 15.85479905], [3.75807226, -0.09412171, 15.55917086], [2.37285739, 0.64492180, 15.59351972], [5.59285245, -0.22157210, 15.07552505], [6.07745845, 0.54995965, 15.43008777], [5.64779097, -0.12087038, 14.09724225], [7.28795735, 2.71603215, 14.82288807], [8.34375424, 2.73525339, 14.98389958], [6.79086560, 3.65674631, 14.98624863], [5.91582226, 4.82631629, 14.97807974], [6.34845728, 5.68351739, 15.30313705], [5.73153676, 4.98072249, 14.02008620], [2.76657910, 4.66918882, 15.85175518], [3.71162532, 4.72529333, 15.60227465], [2.36035150, 5.52417539, 15.56370623]])) # Solvated jellium parameters. sj = {'excess_electrons': 0.} # Implicit solvent parameters (to SolvationGPAW). epsinf = 78.36 # dielectric constant of water at 298 K gamma = 18.4 * 1e-3 * Pascal * m cavity = EffectivePotentialCavity( effective_potential=SJMPower12Potential(H2O_layer=True), temperature=298.15, # K surface_calculator=GradientSurface()) dielectric = LinearDielectric(epsinf=epsinf) interactions = [SurfaceInteraction(surface_tension=gamma)] calc = SJM(mode='fd', txt='gpaw-charge.txt', kpts=(4, 4, 1), gpts=(48, 48, 192), xc='PBE', occupations=FermiDirac(0.1), convergence={'work function': 0.001}, # Solvated jellium parameters. sj=sj, # Implicit solvent parameters. cavity=cavity, dielectric=dielectric, interactions=interactions) atoms.set_calculator(calc) for excess_electrons in [-0.2, -0.1, 0., .1, .2]: sj['excess_electrons'] = excess_electrons calc.set(sj=sj) atoms.get_potential_energy() log_potential() write_everything(label='{:.4f}'.format(excess_electrons)) gpaw-24.1.0/doc/documentation/sjm/run-vary-potential.py000066400000000000000000000122011454550013000230660ustar00rootroot00000000000000import numpy as np import pickle from ase import Atoms from ase.units import Pascal, m from ase.parallel import paropen from gpaw.solvation.sjm import SJM, SJMPower12Potential from gpaw import FermiDirac from gpaw.solvation import ( EffectivePotentialCavity, LinearDielectric, GradientSurface, SurfaceInteraction ) def write_potential_and_charge(label): """Dumps the full potential and charge to pickle files for analysis by separate script.""" esp = atoms.calc.get_electrostatic_potential() with paropen(f'esp{label}.pckl', 'wb') as f: pickle.dump(esp, f) n = calc.get_all_electron_density() with paropen(f'all{label}.pckl', 'wb') as f: pickle.dump(n, f) atoms = Atoms(symbols='Pt27OH2OH2OH2OH2OH2OH2', pbc=np.array([True, True, False]), cell=np.array( [[8.42164176, 0.00000000, 0.00000000], [4.21082088, 7.29335571, 0.00000000], [0.00000000, 0.00000000, 30.00000000]]), positions=np.array( [[1.40360696, 0.81037286, 6.65200000], [4.21082088, 0.81037286, 6.65200000], [7.01803480, 0.81037286, 6.65200000], [2.80721392, 3.24149143, 6.65200000], [5.61442784, 3.24149143, 6.65200000], [8.42164176, 3.24149143, 6.65200000], [4.21082088, 5.67261000, 6.65200000], [7.01803480, 5.67261000, 6.65200000], [9.82524872, 5.67261000, 6.65200000], [0.03036456, 1.64159455, 8.98780067], [2.83952601, 1.63656373, 8.96913616], [5.65700108, 1.63548404, 8.98839035], [1.43694079, 4.06184422, 8.97011747], [4.25013151, 4.06794136, 8.97319238], [7.04795258, 4.05874015, 8.98685186], [2.83797345, 6.50356945, 8.97451149], [5.64860418, 6.49353012, 8.97149139], [8.46007009, 6.50339758, 8.97110938], [0.04167726, 0.01659389, 11.33367526], [2.84872686, 0.01609218, 11.33191349], [5.65812910, 0.01711834, 11.39819957], [1.45292097, 2.44351232, 11.38081839], [4.25298704, 2.44406661, 11.32946091], [7.06279504, 2.44664945, 11.44031710], [2.84886720, 4.87885276, 11.32743866], [5.65767156, 4.88199267, 11.38959530], [8.46251173, 4.88143235, 11.31836346], [1.39280453, 2.54002304, 14.97410114], [1.91436258, 3.34474212, 15.30247756], [1.60746426, 2.47760171, 14.01227886], [2.81907167, -0.18784312, 15.85479905], [3.75807226, -0.09412171, 15.55917086], [2.37285739, 0.64492180, 15.59351972], [5.59285245, -0.22157210, 15.07552505], [6.07745845, 0.54995965, 15.43008777], [5.64779097, -0.12087038, 14.09724225], [7.28795735, 2.71603215, 14.82288807], [8.34375424, 2.73525339, 14.98389958], [6.79086560, 3.65674631, 14.98624863], [5.91582226, 4.82631629, 14.97807974], [6.34845728, 5.68351739, 15.30313705], [5.73153676, 4.98072249, 14.02008620], [2.76657910, 4.66918882, 15.85175518], [3.71162532, 4.72529333, 15.60227465], [2.36035150, 5.52417539, 15.56370623]])) # Solvated jellium parameters. sj = {'excess_electrons': 0.45235, 'target_potential': 4.4} # Implicit solvent parameters (to SolvationGPAW). epsinf = 78.36 # dielectric constant of water at 298 K gamma = 18.4 * 1e-3 * Pascal * m cavity = EffectivePotentialCavity( effective_potential=SJMPower12Potential(H2O_layer=True), temperature=298.15, # K surface_calculator=GradientSurface()) dielectric = LinearDielectric(epsinf=epsinf) interactions = [SurfaceInteraction(surface_tension=gamma)] calc = SJM(mode='fd', txt='gpaw-potential.txt', kpts=(4, 4, 1), gpts=(48, 48, 192), xc='PBE', occupations=FermiDirac(0.1), convergence={'work function': 0.001}, # Solvated jellium parameters. sj=sj, # Implicit solvent parameters. cavity=cavity, dielectric=dielectric, interactions=interactions) atoms.set_calculator(calc) atoms.get_potential_energy() # Write output for all the figures. atoms.write('atoms.traj') calc.write_sjm_traces(path='sjm_traces4.4V.out') # *.out for .gitignore calc.write_sjm_traces(path='sjm_traces4.4V-cube.out', style='cube') # *.out for .gitignore write_potential_and_charge('4.4V') # Vary the potential for the traces figure. sj = {'target_potential': 4.3, 'excess_electrons': 0.47848} atoms.calc.set(sj=sj) atoms.get_potential_energy() write_potential_and_charge('4.3V') gpaw-24.1.0/doc/documentation/sjm/sjm.rst000066400000000000000000000263151454550013000202720ustar00rootroot00000000000000.. module:: gpaw.solvation.sjm .. _sjm: ====================================================== Solvated Jellium (constant-potential electrochemistry) ====================================================== Overview ======== The Solvated Jellium method (:class:`~gpaw.solvation.sjm.SJM`) is a simple method for the simulation of electrochemical interfaces in DFT. A full description of the approach can be found in [Kastlunger2018]_. The method allows you to control the simulated electrode potential (manifested as the topside work function) by varying the number of electrons in the simulation; calculations can be run in either constant-charge or constant-potential mode. The :class:`~gpaw.solvation.sjm.SJM` calculator can be used just like the standard GPAW calculator; it returns energy and forces, but can do so at a fixed potential. (Please see the note below on the Legendre-transform of the energy.) The potential is controlled by a simple iterative technique; in practice if you are running a trajectory (such as a relaxation or nudged elastic band) the first image will take longer than a conventional calculation as the potential equilibrates, but the computational cost is much less on subsequent images; practically, we estimate the extra cost to be <50% compared to a traditional DFT calculation of a full trajectory. For a practical guide on the use of the method, please see the :ref:`solvated_jellium_method` tutorial. Theoretical background ====================== The philosophy of the solvated jellium method is to construct the simplest model that captures the physics of interest, without introducing spurious effects. The solvated jellium approach consists of two components: jellium and an implicit solvent. A schematic is shown below: .. image:: overview.png :width: 600 px :align: center In this figure, the jellium is shown by the hashed marks, while the implicit solvent is shown in the blue shaded region. Note that an explicit solvent (the water molecules) is also conventionally used in this approach, as the major purpose of the implicit solvent is not to simulate the solvation of individual species but rather to screen the net field. A more detailed discussion of both of these components follows. The jellium slab: charging -------------------------- In a periodic system we cannot have a net charge; therefore, any additional, fractional electrons that are added to the system must be compensated by an equal amount of counter charge. In GPAW, this is conveniently accomplished with a :class:`~gpaw.jellium.JelliumSlab`. This adds a smeared-out background charge in a fixed region of the simulation cell; in the figure above it is shown as the dashed region to the right of the atoms. The :class:`~gpaw.jellium.JelliumSlab` also increases the number of electrons in the simulation by an amount equal to the total charge of the slab. When you run a simulation, you should see that these excess electrons localize on the `+z` side (the right side in these figures) of the metal atoms that simulate the electrode surface, and not on the `-z` (left) side, which simulates the bulk. This is accomplished by only putting the jellium region on one side of the simulation, and employing a dipole correction (included by default when you run SJM) to electrostatically decouple the two sides of the cell. The figure below shows the difference in two simulations, one run at 4.4 V and one run at 4.3 V. The orange curve shows where the potential drops off, and the blue curve shows where the electrons localize. .. image:: traces.png :width: 600 px :align: center The jellium region is conventionally thought of as a region of smeared-out positive charge, accompanied by a positive number of electrons. However, the signs can readily be reversed, making the jellium region a smeared-out negative region accompanied by a reduction in the total number of electrons. In this way, the same tool can be used to perturb the electrons in either a positive or negative direction, and thus vary the potential in either direction in order to find its target. Note also that the jellium region does not overlap any atoms, separating this from approaches that employ a homogeneous background charge throughout the unit cell (in which spurious interactions can occur). This is important to not distort the electronic structure of the atoms and molecules being simulated. Additionally, note that the jellium is enclosed in a regular slab geometry in the figure above, but this need not be the case; it can, for example, follow the cavity of the implicit solvent if this is preferred (by using the :code:`jelliumregion` keyword as described in the :class:`~gpaw.solvation.sjm.SJM` documentation). The solvation: screening ------------------------ By itself, the excess electrons and the jellium counter charge would set up an artificially high potential field in the region of the reaction. To screen this large field, an implicit solvent is added to the simulation in the region above the explicit solvent, completely surrounding the jellium counter charge. For this purpose, the solvated jellium method employs the implicit solvation model of Held and Walter [Held2014]_, which changes the dielectric constant of the vacuum region. (You can learn more about the solvation method in the :ref:`continuum_solvent_model` tutorial.) Here, the primary purpose of the implicit solvent is *not* to solvate the species reacting at the surface; explicit solvent (shown by the water molecules above) is typically employed in SJ simulations for this purpose. The implicit solvent is located above the explicit solvent (and therefore may provide some solvent stabilization to the explicit solvent molecules). This can be seen in the figure above, where the implicit solvent is shown as the blue shaded region. In this figure, the small amount of solvent that is apparent at a `z` coordinate corresponding to the water layer is just the result of the implicit solvent penetrating slightly into the cavity at the center of a hexagonal ice-like water structure. It is important that the implicit solvent not be present in the region of the reaction, as this would be "double"-solvating those parts. If this occurs, "ghost" atoms can be added to exclude the solvent from specific regions. Generalized Poisson equation ---------------------------- In net, the SJ method is manifested as two changes to the generalized Poisson equation, .. math:: \vec{\nabla} \cdot \Big(\epsilon(\br) \vec{\nabla} \phi(\br)\Big) = -4\pi \Big[ \rho_\mathrm{explicit}(\br) + \rho_\mathrm{jellium} (\br) \Big], where `\epsilon(\br)` accounts for the solvation; that is, the dielectric constant is spatially variant, and the spatially-resolved charge density is modified by the presence of the `\rho_\mathrm{jellium}(\br)` term, which contains the smeared-out counter charge in a region away from all of the atoms (and electronic density) of the system. `\rho_\mathrm{explicit} (\br)` contains the standard charge density of the system; that is, due to the electrons and nuclei. Since the changes to the Poisson equation are relatively simple, it can be solved without relying on linearization. The electrode potential ----------------------- The electrode potential (`\phi_\mathrm{e}`) is then defined as the Fermi-level energy (`\mu`) referenced to a point deep in the solvent (`\Phi_\mathrm{w}`), where the whole charge on the electrode has been screened and no electric field is present. (This is equivalently the topside work function of the slab.) This is divided by the unit electronic charge `e` to convert from energy (typically in eV) to potential (typically in V) dimensions. .. math:: \phi_\mathrm{e} = \frac{\Phi_\mathrm{w} - \mu}{e} . Note that this gives the potential with respect to vacuum; if you would like your potential on a reference electrode scale, such as SHE, please see the :ref:`solvated_jellium_method` tutorial. .. _grand-potential-energy: Legendre-transformed energy --------------------------- The energy used in the analysis of electrode reactions is the grand-potential energy .. math:: \Omega \equiv E_\mathrm{tot} + N_\mathrm{e}e \phi_\mathrm{e} . Whereas :math:`E_\mathrm{tot}` is consistent with the forces in traditional electronic structure calculations, the grand-potential energy :math:`\Omega` is consistent with the forces in electronically grand-canonical (that is, constant-potential) simulations. This means that relaxations that follow forces will find local minima in :math:`\Omega`, and generally methods that rely on consistent force and energy information (such as BFGSLineSearch or NEB) will work fine as long as :math:`\Omega` is employed. Thus, this calculator returns :math:`\Omega` by default, rather than :math:`E_\mathrm{tot}`. Potential control ================= The below figure shows both the localization of excess electrons and the local change in potential, when the total number of electrons in an example simulation are changed. .. image:: delta-ne-phi.png :width: 600 px :align: center As mentioned above, the excess electrons localize only on the top side of the slab, which is meant to represent the electrode surface, and not on the bottom side, which is mean to represent the bulk. The potential drop is seen to localize in the Stern layer where the reaction takes place. Over reasonable deviations, the relationship between the number of excess electrons and the potential :math:`\phi` is approximately linear: .. image:: charge-potential.png :width: 600 px :align: center Due to the simple relationship between the excess electrons and the potential, reaching a desired potential is typically a fast process. If you are running a trajectory---for example, a relaxation, a molecular dynamics simulation, or a saddle-point search---the first image will often take a few repetitions (that is, sequential constant-electron calculations) until the desired potential is reached. Atoms typically move relatively little from image-to-image in a trajectory; therefore, subsequent images are often already at the target potential and no equilibration steps are necessary; when equilibration steps are required, the slope (of potential vs. number of electrons) is recalled from the last adjustment, and it often only takes a single equilibration step. Typically, over the course of a full trajectory, the added computational cost of working in the constant-potential ensemble is minimal, generally <50% greater computational time compared to a constant-charge calculation. As described in the :ref:`solvated_jellium_method` tutorial, this can sometimes be further improved by simultaneously optimizing the potential with the atomic positions. References ========== .. [Kastlunger2018] G. Kastlunger, P. Lindgren, A. A. Peterson, :doi:`Controlled-Potential Simulation of Elementary Electrochemical Reactions: Proton Discharge on Metal Surfaces <10.1021/acs.jpcc.8b02465>`, *J. Phys. Chem. C* **122** (24), 12771 (2018) .. [Held2014] A. Held and M. Walter, :doi:`Simplified continuum solvent model with a smooth cavity based on volumetric data <10.1063/1.4900838>`, *J. Chem. Phys.* **141**, 174108 (2014). Class documentation =================== .. autoclass:: gpaw.solvation.sjm.SJM .. autoclass:: gpaw.solvation.sjm.SJMPower12Potential gpaw-24.1.0/doc/documentation/smearing.rst000066400000000000000000000014321454550013000205060ustar00rootroot00000000000000.. _smearing: Occupation number smearing ========================== .. module:: gpaw.occupations .. seealso:: :ref:`manual_occ` Convergence with respect to number of k-point for bulk Cu energy with different smearing methods: .. literalinclude:: cu_calc.py .. figure:: cu.png (made with :download:`cu_plot.py`). See also figure 3 in :doi:`Blöchl et. al <10.1103/PhysRevB.49.16223>`. .. autofunction:: create_occ_calc .. autofunction:: fermi_dirac .. autofunction:: marzari_vanderbilt .. autofunction:: methfessel_paxton .. autoclass:: OccupationNumberCalculator :members: .. autoclass:: FixedOccupationNumbers .. autoclass:: ParallelLayout .. autofunction:: occupation_numbers Tetrahedron method ------------------ .. module:: gpaw.tetrahedron .. autoclass:: TetrahedronMethod gpaw-24.1.0/doc/documentation/soc/000077500000000000000000000000001454550013000167335ustar00rootroot00000000000000gpaw-24.1.0/doc/documentation/soc/VCl2.py000066400000000000000000000020611454550013000200520ustar00rootroot00000000000000from gpaw.new.ase_interface import GPAW from ase import Atoms import numpy as np a = 6.339 d = 1.331 atoms = Atoms('V3Cl6', cell=[a, a, 1, 90, 90, 60], pbc=[1, 1, 0], scaled_positions=[ [0, 0, 0], [1 / 3, 1 / 3, 0], [2 / 3, 2 / 3, 0], [0, 2 / 3, d], [0, 1 / 3, -d], [1 / 3, 0, d], [1 / 3, 2 / 3, -d], [2 / 3, 1 / 3, d], [2 / 3, 0, -d]]) atoms.center(axis=2, vacuum=5) m = 3.0 magmoms = np.zeros((9, 3)) magmoms[0] = [m, 0, 0] magmoms[1] = [-m / 2, m * 3**0.5 / 2, 0] magmoms[2] = [-m / 2, -m * 3**0.5 / 2, 0] atoms.calc = GPAW(mode={'name': 'pw', 'ecut': 400}, magmoms=magmoms, symmetry='off', kpts={'size': (2, 2, 1), 'gamma': True}, parallel={'domain': 1, 'band': 1}, txt='VCl2_gs.txt') atoms.get_potential_energy() atoms.calc.write('VCl2_gs.gpw') gpaw-24.1.0/doc/documentation/soc/agts.py000066400000000000000000000010511454550013000202400ustar00rootroot00000000000000from math import cos, pi, sin from gpaw.new.ase_interface import GPAW from myqueue.workflow import run def workflow(): with run(script='VCl2.py', cores=4): run(script='plot.py') run(function=check) def check(): calc = GPAW('VCl2_gs.gpw') M_v, M_av = calc.calculation.state.density.calculate_magnetic_moments() print(M_v) print(M_av) m = 2.50 for x, m_v in zip([0, 2 * pi / 3, 4 * pi / 3], M_av): assert abs(m_v - [m * cos(x), m * sin(x), 0]).max() < 0.01 if __name__ == '__main__': check() gpaw-24.1.0/doc/documentation/soc/plot.py000066400000000000000000000021651454550013000202670ustar00rootroot00000000000000# web-page: mag1d.png, mag2d.png from gpaw.new.ase_interface import GPAW import matplotlib.pyplot as plt import numpy as np calc = GPAW('VCl2_gs.gpw') dens = calc.calculation.densities() grid_spacing = calc.atoms.cell[2, 2] / 200 nt = dens.pseudo_densities(grid_spacing) n = dens.all_electron_densities(grid_spacing=grid_spacing) i = nt.desc.size[2] // 2 x, y = n.desc.xyz()[:, :, i, :2].transpose((2, 0, 1)) uv = n.data[1:3, :, :, i] m = (uv**2).sum(0)**0.5 u, v = uv / m fig, ax = plt.subplots() ct = ax.contourf(x, y, m) cbar = fig.colorbar(ct) cbar.ax.set_ylabel('magnetization [Å$^{-3}$]') ax.quiver(*(a[::3, ::3] for a in [x, y, u, v])) ax.axis('equal') ax.set_xlabel('x [Å]') ax.set_ylabel('y [Å]') fig.savefig('mag2d.png') fig, ax = plt.subplots() x, y = n.xy(1, ..., 0, i) x, yt = nt.xy(1, ..., 0, i) j = len(x) // 2 L = calc.atoms.cell[0, 0] x = np.concatenate((x[j:] - L, x[:j])) y = np.concatenate((y[j:], y[:j])) yt = np.concatenate((yt[j:], yt[:j])) ax.plot(x, y, label='all-electron') ax.plot(x, yt, label='pseudo') ax.legend() ax.set_xlabel('x [Å]') ax.set_ylabel('magnetization [Å$^{-3}$]') fig.savefig('mag1d.png') gpaw-24.1.0/doc/documentation/soc/soc.rst000066400000000000000000000006741454550013000202600ustar00rootroot00000000000000.. _soc: Spin-orbit coupling and non-collinear calculations ================================================== https://journals.aps.org/prb/abstract/10.1103/PhysRevB.62.11556 .. literalinclude:: VCl2.py .. literalinclude:: plot.py .. figure:: mag2d.png .. figure:: mag1d.png Experiential: https://doi.org/10.3390/cryst7050121 Theoretical: https://doi.org/10.1088/0953-8984/10/22/004 DFT: https://doi.org/10.1063/1.4791437 gpaw-24.1.0/doc/documentation/tddft/000077500000000000000000000000001454550013000172545ustar00rootroot00000000000000gpaw-24.1.0/doc/documentation/tddft/dielectric_response.rst000066400000000000000000000413251454550013000240400ustar00rootroot00000000000000.. _df_theory: ======================================================== Linear dielectric response of an extended system: theory ======================================================== Introduction ============ The DF (dielectric function) object calculates optical and dielectric properties of extended systems. It computes the linear response function of an interacting many-body system from its ground state electronic structure, which are obtained from GPAW in the real-space grids scheme. The frequency and wave-vector dependent density response function are calculated within Time-Dependent Density-Functional Theory formalism using the projector-augmented wave method. Random phase approximation and adiabatic local density approximation are used for exchange-correlation functional. Local field, which indicate the inhomogeneity of the system, are included by constructing the density response matrix in reciprocal space. Fast Fourier Transform (FFT) are utilized to transform between real and reciprocal spaces. Refer to :ref:`df_tutorial` for getting started with examples. Non-interacting density response function ========================================= The Non-interacting density response function in real space is written as, .. math:: \chi^0(\mathbf{r}, \mathbf{r}^{\prime}, \omega) = \sum_{\mathbf{k}, \mathbf{q}}^{\mathrm{BZ}} \sum_{n, n^{\prime}} \frac{f_{n\mathbf{k}}-f_{n^{\prime} \mathbf{k} + \mathbf{q}}}{\omega + \epsilon_{n\mathbf{k}} - \epsilon_{n^{\prime} \mathbf{k} + \mathbf{q} } + i\eta} \psi_{n\mathbf{k}}^{\ast}(\mathbf{r}) \psi_{n^{\prime} \mathbf{k} + \mathbf{q} }(\mathbf{r}) \psi_{n\mathbf{k}}(\mathbf{r}^{\prime}) \psi^{\ast}_{n^{\prime} \mathbf{k} + \mathbf{q} }(\mathbf{r}^{\prime}), where `\epsilon_{n \mathbf{k}}` and `\psi_{n \mathbf{k}}(\mathbf{r})` are the eigenvalue and eigen wavefunction, which is normalized to 1 in the crystal volume `\Omega (= \Omega_{\mathrm{cell}} N_k)`. The sum of occupation `f_{n \mathbf{k}}` should be the total number of electrons in the crystal, satisfying .. math:: \sum_{n \mathbf{k}} f_{n \mathbf{k}}= N_k N where `N` is the number of electrons in a single unit cell and `N_k` is the number of unit cells (kpoints). For translation invariant systems, `\chi^0` can be expanded in planewave basis as .. math:: \chi^0(\mathbf{r}, \mathbf{r}^{\prime}, \omega) = \frac{1}{\Omega} \sum_{\mathbf{q}}^{\mathrm{BZ}} \sum_{\mathbf{G} \mathbf{G}^{\prime}} e^{i(\mathbf{q} + \mathbf{G}) \cdot \mathbf{r}} \chi^0_{\mathbf{G} \mathbf{G}^{\prime}}(\mathbf{q}, \omega) e^{-i(\mathbf{q} + \mathbf{G}^{\prime}) \cdot \mathbf{r}^{\prime}} where `\mathbf q` stands for the Bloch vector of the incident wave and `\mathbf G (\mathbf G^{\prime})` are reciprocal lattice vectors. The Fourier coefficients, derived by Adler \ [#Adler]_ and Wiser \ [#Wiser]_, are written as .. math:: \chi^0_{\mathbf{G} \mathbf{G}^{\prime}}(\mathbf{q}, \omega) = \frac{1}{\Omega} \sum_{\mathbf{k}}^{\mathrm{BZ}} \sum_{n, n^{\prime}} \frac{f_{n\mathbf{k}}-f_{n^{\prime} \mathbf{k} + \mathbf{q} }}{\omega + \epsilon_{n\mathbf{k}} - \epsilon_{n^{\prime} \mathbf{k} + \mathbf{q} } + i\eta} \langle \psi_{n \mathbf{k}} | e^{-i(\mathbf{q} + \mathbf{G}) \cdot \mathbf{r}} | \psi_{n^{\prime} \mathbf{k} + \mathbf{q} } \rangle_{\Omega_{\mathrm{cell}}} \langle \psi_{n\mathbf{k}} | e^{i(\mathbf{q} + \mathbf{G}^{\prime}) \cdot \mathbf{r}^{\prime}} | \psi_{n^{\prime} \mathbf{k} + \mathbf{q} } \rangle_{\Omega_{\mathrm{cell}}} Interacting density response function ===================================== The full interacting density response function is obtained by solving Dyson's equation: .. math:: \chi(\mathbf r, \mathbf{r^{\prime}}, \omega) = \chi_0(\mathbf r, \mathbf{r^{\prime}}, \omega) + \iint_{\Omega} d\mathbf{r}_1 d\mathbf{r}_2 \ \chi_0(\mathbf r, \mathbf{r}_1, \omega) K(\mathbf{r}_1, \mathbf{r}_2) \chi(\mathbf{r}_2, \mathbf{r^{\prime}} ,\omega), where the kernel is the summation of coulomb and exchange-correlation interaction .. math:: K(\mathbf{r}_1, \mathbf{r}_2) = \frac{1}{|\mathbf{r}_1 -\mathbf{r}_2|} + \frac{\partial V_{xc}[n]}{\partial n}. By Fourier transform, the Dyson's equation in reciprocal space becomes .. math:: \chi_{\mathbf G \mathbf G^{\prime}}(\mathbf q, \omega) = \chi^0_{\mathbf G \mathbf G^{\prime}}(\mathbf q, \omega) + \sum_{\mathbf G_1 \mathbf G_2} \chi^0_{\mathbf G \mathbf G_1}(\mathbf q \omega) K_{\mathbf G_1 \mathbf G_2}(\mathbf q) \chi_{\mathbf G_2 \mathbf G^{\prime}}(\mathbf q, \omega). Note, the coulomb kernel becomes diagonal in reciprocal space .. math:: K^{\mathrm{Coulomb}}_{\mathbf G_1 \mathbf G_2}(\mathbf q) = \frac{4\pi}{|\mathbf q+\mathbf G_1|^2} \delta_{\mathbf G_1 \mathbf G_2} The exchange-correlation (xc) is obtained using adiabatic local density approximation (ALDA): .. math:: K^{xc.ALDA}_{\mathbf G_1 \mathbf G_2}(\mathbf q) = \frac{1}{\Omega} \int d\mathbf{r} f_{xc}[n(\mathbf{r})] e^{-i(\mathbf{G}_1-\mathbf{G}_2)\cdot \mathbf{r}} with .. math:: f_{xc}[n(\mathbf{r})] = \left. \frac{\partial^2 E_{xc}[n]}{\partial n^2} \right|_{n_0(\mathbf{r})}. .. _macroscopic_dielectric_function: Dielectric function and its relation to spectra =============================================== The dielectric matrix is related to the density response matrix by .. math:: \epsilon^{-1}_{\mathbf G \mathbf G^{\prime}}(\mathbf q, \omega) = \delta_{\mathbf G \mathbf G^{\prime}} + \frac{4\pi}{|\mathbf q + \mathbf G|^2} \chi_{\mathbf G \mathbf G^{\prime}}(\mathbf q, \omega) Within RPA approximation, the dielectric matrix can also be written as .. math:: \epsilon^{\mathrm{RPA}}_{\mathbf G \mathbf G^{\prime}}(\mathbf q, \omega) = \delta_{\mathbf G \mathbf G^{\prime}} - \frac{4\pi}{|\mathbf q + \mathbf G|^2} \chi^0_{\mathbf G \mathbf G^{\prime}}(\mathbf q, \omega) The macroscopic dielectric function is defined by .. math:: \epsilon_M(\mathbf q, \omega) = \frac{1}{\epsilon^{-1}_{00}(\mathbf q, \omega)} Optical absorption spectrum is obtained through .. math:: \mathrm{ABS} = \mathrm{Im} \epsilon_M(\mathbf q \rightarrow 0, \omega) Electron energy loss spectrum is .. math:: \mathrm{EELS} = -\mathrm{Im}\frac{1}{\epsilon_M(\mathbf q, \omega)} The f-sum rule ============== The scalar dielectric function is related to the dielectric tensor by .. math:: \epsilon_M(\mathbf q, \omega) = \mathrm{lim}_{\mathbf q \rightarrow 0} \ \hat{q}_{\alpha} \epsilon_{\alpha \beta}(\mathbf q, \omega) \hat{q}_{\beta}, and the dielectric tensor `\epsilon_{\alpha \beta}(\omega)` satify the "f-sum rule" .. math:: \int_0^{\infty} d\omega \ \omega \ \mathrm{Im} \epsilon_{\alpha \beta}(\omega) = \frac{2\pi^2N}{\Omega_{\mathrm{cell}}} \delta_{\alpha \beta} where `N` is the number of electrons in the unit cell and `\frac{N}{\Omega_{\mathrm{cell}}}` is the electron density. Optical limit (q -> 0) ====================== In the above sections we have derived the longitudianl dielectric function `\epsilon(\mathbf q, \omega)`. For external perturbation by a tranverse electro-magnetic field, the full dielectric tensor should be calculated. However, in the long-wavelength limit, which is the case for light absorption, the dielectric tensor can be recovered by scalar or longitudinal dielectric function considering different direction of `\hat{\mathbf q}`. Although `\mathbf q` is close to zero, we can't use the approximation `\mathbf q = 0` because the Coulomb kernel (`\frac{4\pi}{|\mathbf q + \mathbf G|^2}`) diverges at `\mathbf q = \mathbf G = 0`. In this section we will focus on evaluating `\chi_{\mathbf G \mathbf G^{\prime}}^0(\mathbf q, \omega)` in the limit of `\mathbf q \rightarrow 0` and `\mathbf G = 0` \ [#Louie]_. The dipole transition matrix `\langle \psi_{n \mathbf k} | e^{-i (\mathbf q + \mathbf G) \cdot \mathbf r} | \psi_{n^{\prime} \mathbf k + \mathbf q} \rangle` with `\mathbf G = 0` becomes .. math:: \langle \psi_{n \mathbf k} | e^{-i (\mathbf q + \mathbf G) \cdot \mathbf r} | \psi_{n^{\prime} \mathbf k + \mathbf q} \rangle = \langle u_{n \mathbf k} | u_{n^{\prime} \mathbf k + \mathbf q} \rangle Note, `\psi_{n \mathbf k}` is all-electron wavefunction with band index `n` at kpoint `\mathbf k` , and `u_{n \mathbf k}` is the periodic part of the Bloch wave written as `\psi_{n \mathbf k}(\mathbf r) = u_{n \mathbf k}(\mathbf r) e^{i \mathbf k \cdot \mathbf r}`. Employing second order perturbation theory, `u_{n^{\prime} \mathbf k + \mathbf q}` can be expanded in terms of other orbitals written as .. math:: | u_{n^{\prime} \mathbf k + \mathbf q} \rangle = | u_{n^{\prime} \mathbf k } \rangle + \sum_{m \neq n^{\prime}} \frac{ \langle u_{m \mathbf k} | \tilde V | u_{n^{\prime} \mathbf k} \rangle }{\epsilon_{n^{\prime} \mathbf k} - \epsilon_{m \mathbf k} } | u_{m \mathbf k} \rangle where the perturbation `\tilde V` is obtained in the following through k.p perturbation theory. The k.p Hamiltonian is expressed as .. math:: H(\mathbf k) u_{n \mathbf k}(\mathbf r) = \left[ -\frac{\hbar^2}{2m}(\nabla + i\mathbf k)^2 + V(\mathbf r) \right] u_{n \mathbf k}(\mathbf r) = \epsilon_{n \mathbf k} u_{n \mathbf k}(\mathbf r), where `V(\mathbf r)` is the periodic crystal potential. The perturbation Hamiltonian `\tilde V` is calculated by (atomic unit): .. math:: \tilde V = H(\mathbf k + \mathbf q) - H(\mathbf k) = -i\mathbf q \cdot (\nabla + i \mathbf k) Substitute `\tilde V` into the expression of `| u_{n^{\prime} \mathbf k + \mathbf q} \rangle`, multiply `\langle u_{n \mathbf k} |` to the left, and apply the orthonormalized condition for the all-electron wavefunction `\langle u_{n \mathbf k} | u_{m \mathbf k} \rangle = \delta_{nm}`, we get .. math:: \langle \psi_{n \mathbf k} | e^{-i (\mathbf q + \mathbf G) \cdot \mathbf r} | \psi_{n^{\prime} \mathbf k + \mathbf q} \rangle_{\mathbf q \rightarrow 0, \mathbf G=0} = -i \mathbf q \cdot \frac{ \langle u_{n \mathbf k} | \nabla + i \mathbf k |u_{n^{\prime} \mathbf k} \rangle }{\epsilon_{n^{\prime} \mathbf k} - \epsilon_{n \mathbf k}} = -i \mathbf q \cdot \frac{ \langle \psi_{n \mathbf k} | \nabla |\psi_{n^{\prime} \mathbf k} \rangle }{\epsilon_{n^{\prime} \mathbf k} - \epsilon_{n \mathbf k}} Hilbert Transform ================= The non-interaction density response function `\chi^0_{\mathbf{G} \mathbf{G}^{\prime}}(\mathbf{q}, \omega)` can be calculated through hilbert transform, written as .. math:: \chi^0_{\mathbf{G} \mathbf{G}^{\prime}}(\mathbf{q}, \omega) = \int_{-\infty}^{\infty} d\omega^{\prime} \frac{A_{\mathbf{G} \mathbf{G}^{\prime}}(\mathbf{q}, \omega^{\prime})} {\omega - \omega^{\prime}+ i\eta} where spectral function `A_{\mathbf{G} \mathbf{G}^{\prime}}(\mathbf{q}, \omega^{\prime})` is defined as .. math:: A_{\mathbf{G} \mathbf{G}^{\prime}}(\mathbf{q}, \omega^{\prime}) = \frac{1}{\Omega} \sum_{\mathbf{k}}^{\mathrm{BZ}} \sum_{n, n^{\prime}} ( f_{n\mathbf{k}}-f_{n^{\prime} \mathbf{k} + \mathbf{q}} ) \langle \psi_{n \mathbf{k}} | e^{-i(\mathbf{q} + \mathbf{G}) \cdot \mathbf{r}} | \psi_{n^{\prime} \mathbf{k} + \mathbf{q} } \rangle_{\Omega_{\mathrm{cell}}} \langle \psi_{n\mathbf{k}} | e^{i(\mathbf{q} + \mathbf{G}^{\prime}) \cdot \mathbf{r}^{\prime}} | \psi_{n^{\prime} \mathbf{k} + \mathbf{q} } \rangle_{\Omega_{\mathrm{cell}}} \times \delta( \omega^{\prime} + \epsilon_{n\mathbf{k}} - \epsilon_{n^{\prime} \mathbf{k} + \mathbf{q} } ) Note that the integration above requires both positive and negative frequencies. In the following derivation, the integration will be reduced to only half of the frequency domain. In the system that possesses the time-reversal symmetry, the bloch states have the following properties .. math:: \epsilon_{n, -\mathbf{k}} = \epsilon_{n, \mathbf{k}} f_{n, -\mathbf{k}} = f_{n, \mathbf{k}} \psi_{n, -\mathbf{k}}(\mathbf{r}) = \psi^{\ast}_{n, \mathbf{k}}(\mathbf{r}) Change the index in `A_{\mathbf{G} \mathbf{G}^{\prime}}(\mathbf{q}, \omega^{\prime})` as .. math:: n, \mathbf{k} \rightarrow n^{\prime}, -\mathbf{k}-\mathbf{q} n^{\prime}, \mathbf{k}+\mathbf{q} \rightarrow n, -\mathbf{k} and employing the time-reversal symmetry, one can get .. math:: A_{\mathbf{G} \mathbf{G}^{\prime}}(\mathbf{q}, \omega^{\prime}) = - A_{\mathbf{G} \mathbf{G}^{\prime}}(\mathbf{q}, -\omega^{\prime}) Substitute it to the integration in the beginning of this section, one get .. math:: \chi^0_{\mathbf{G} \mathbf{G}^{\prime}}(\mathbf{q}, \omega) = \int_0^{\infty} d\omega^{\prime} \frac{ A_{\mathbf{G} \mathbf{G}^{\prime}}(\mathbf{q}, \omega^{\prime})}{\omega-\omega^{\prime}+i\eta} + \int_{-\infty}^{0} d\omega^{\prime} \frac{ A_{\mathbf{G} \mathbf{G}^{\prime}}(\mathbf{q}, \omega^{\prime})}{\omega-\omega^{\prime}+i\eta} = \int_0^{\infty} d\omega^{\prime} \left[ \frac{1}{ \omega-\omega^{\prime}+i\eta } - \frac{1}{ \omega+\omega^{\prime}+i\eta }\right] A_{\mathbf{G} \mathbf{G}^{\prime}}(\mathbf{q}, \omega^{\prime}) Applying the hilbert transform can make the calculations of `\chi^0_{\mathbf{G} \mathbf{G}^{\prime}}(\mathbf{q}, \omega)` `Nw / 2` times faster, where `Nw` is the number of frequency points used. For the delta function, we use either a triangular function, which is described in \ [#DeltaFunc]_ and is normalized to 1 or a gaussian function, which is in principle normalized but in fact not due to numerically finite frequency points used. We tried both and it turns out that the spectrum does not sensitively depend on the function applied. PAW terms ========= The PAW terms comes in when calculating the dipole transition matrix .. math:: \langle \psi_{n \mathbf k} | e^{-i (\mathbf q + \mathbf G) \cdot \mathbf r} | \psi_{n^{\prime} \mathbf k + \mathbf q} \rangle = \langle \tilde{\psi}_{n \mathbf k} | e^{-i (\mathbf q + \mathbf G) \cdot \mathbf r} | \tilde{\psi}_{n^{\prime} \mathbf k + \mathbf q} \rangle + \sum_{a,ij} \langle \tilde{\psi}_{n \mathbf k} | \tilde{p}_i^a \rangle^{\ast} \langle \tilde{\psi}_{n^{\prime} \mathbf k + \mathbf q} | \tilde{p}_j^a \rangle \left[ \langle \phi_i^a | e^{-i(\mathbf{q} + \mathbf{G}) \cdot \mathbf{r}} | \phi_j^a \rangle - \langle \tilde{\phi}_i^a | e^{-i(\mathbf{q} + \mathbf{G}) \cdot \mathbf{r}} | \tilde{\phi}_j^a \rangle \right] We calculate the last term in the above equation by expanding the planewave in such a way .. math:: e^{i \mathbf{k} \cdot \mathbf{r}} = 4 \pi \sum_{lm} i^l j_l(kr) Y_{lm}(\hat{\mathbf{r}}) Y_{lm}(\hat{\mathbf{k}}) where `j_l` is spherical bessel function and write (for simplicity, define `\mathbf{k} = \mathbf{q} + \mathbf{G}`) .. math:: \langle \phi_i^a | e^{-i \mathbf{k} \cdot \mathbf{r}} | \phi_j^a \rangle - \langle \tilde{\phi}_i^a | e^{-i \mathbf{k} \cdot \mathbf{r}} | \tilde{\phi}_j^a \rangle = 4 \pi e^{-i \mathbf{k} \cdot \mathbf{R}_a} \sum_{lm} (-i)^l Y_{lm}(\hat{\mathbf{k}}) \int dr \ r^2 j_l(kr) \left[ \phi^{a}_{n_1 l_1}(r) \phi^{a}_{n_2 l_2}(r) - \tilde{\phi}^{a}_{n_1 l_1}(r) \tilde{\phi}^{a}_{n_2 l_2}(r) \right] \int d\Omega \ Y_{lm} Y_{l_1 m_1} Y_{l_2 m_2} where `\mathbf{R}_a` are the positions of atoms in the unit cell. For optical limit calculation, the dipole matrix related is .. math:: \langle \psi_{n \mathbf{k}} | \nabla | \psi_{n^{\prime} \mathbf{k}} \rangle = \langle \tilde{\psi}_{n \mathbf{k}} | \nabla | \tilde{\psi}_{n^{\prime} \mathbf{k}} \rangle + \sum_{a,ij} \langle \tilde{\psi}_{n \mathbf k} | \tilde{p}_i^a \rangle^{\ast} \langle \tilde{\psi}_{n^{\prime} \mathbf k} | \tilde{p}_j^a \rangle \left[ \langle \phi_i^a | \nabla_{\mathbf{r}} | \phi_j^a \rangle - \langle \tilde{\phi}_i^a | \nabla_{\mathbf{r}} | \tilde{\phi}_j^a \rangle \right] Refer to :ref:`setup_matrix_elements_nabla` for calculation of `\langle \phi_i^a | \nabla_{\mathbf{r}} | \phi_j^a \rangle - \langle \tilde{\phi}_i^a | \nabla_{\mathbf{r}} | \tilde{\phi}_j^a \rangle` .. [#Adler] S. L. Adler, Quantum theory of the dielectric constant in real solids, *Phys. Rev.* **126**, 413 (1962) .. [#Wiser] N. Wiser, Dielectric constant with local field effects included, *Phys. Rev.* **129**, 62 (1963). .. [#Louie] M. S. Hybertsen and S. G. Louie, Ab initio static dielectric matrices from the density-functional approach. I. Formulation and application to semiconductors and insulators, *Phys. Rev. B* **35**, 5585 (1987). .. [#DeltaFunc] M. Shishkin and G. Kresse, Implementation and performance of the frequency-dependent GW method within the PAW framework, *Phys. Rev. B* **74**, 035101 (2006). gpaw-24.1.0/doc/documentation/theory.rst000066400000000000000000000007051454550013000202150ustar00rootroot00000000000000.. _theory: Theory ------ Theory in order of abstraction. .. toctree:: :maxdepth: 1 introduction_to_paw paw_papers rmm-diis orthogonalization densitymix/densitymix directmin/directmin eigenvalues_of_core_states pw_and_exx defects_theory electrostatic_potential tddft/dielectric_response gw_theory/gw_theory bse/bse xas/xas ehrenfest/ehrenfest_theory elph/elph elph/raman sjm/sjm qeh/qeh gpaw-24.1.0/doc/documentation/utilities/000077500000000000000000000000001454550013000201625ustar00rootroot00000000000000gpaw-24.1.0/doc/documentation/utilities/utilities.rst000066400000000000000000000005551454550013000227340ustar00rootroot00000000000000.. module:: gpaw.utilities Utilities ========= .. autoclass:: gpaw.utilities.partition.AtomPartition :members: .. module:: gpaw.utilities.dipole .. autofunction:: gpaw.utilities.dipole.dipole_matrix_elements .. autofunction:: gpaw.utilities.dipole.dipole_matrix_elements_from_calc .. module:: gpaw.utilities.ekin .. autofunction:: gpaw.utilities.ekin.ekin gpaw-24.1.0/doc/documentation/xas/000077500000000000000000000000001454550013000167425ustar00rootroot00000000000000gpaw-24.1.0/doc/documentation/xas/xas.rst000066400000000000000000000066231454550013000202760ustar00rootroot00000000000000.. _xas: =================================== X-Ray Absorption Spectroscopy (XAS) =================================== Schematic illustration of XAS (from [Nil04]_): .. figure:: xas_illustration.png :width: 250 px The oscillator strengths are proportional to `|\langle \phi_{1s}| \mathbf{r} | \psi_n \rangle|^2`, where the one-center expansion of `\psi_n` for the core-hole atom can be used. Introduction ============ The pseudo wave-functions are solutions to this generalized eigenvalue problem: .. math:: H \tilde{\psi}_n = \epsilon_n S \tilde{\psi}_n. This can be transformed into a standard eigenvalue problem: .. math:: S^{-1/2} H S^{-1/2} \psi_n = \epsilon_n \psi_n, where `\psi_n = S^{1/2} \tilde{\psi}_n` is an all-electron wave function. XAS cross section ================= For the cross section, we need this quantity: .. math:: \langle \psi_n | x | \phi^a \rangle = \sum_i \langle \tilde{\psi}_n | \tilde{p}_i^a \rangle \langle \phi_i^a | x | \phi^a \rangle = \langle \tilde{\psi}_n | \tilde{\phi}^a \rangle, where `\phi^a` is the core state localized on atom `a` and `\tilde{\phi}^a = \sum_i \langle \phi_i^a | x | \phi^a \rangle \tilde{p}_i^a`. Now, the cross section is: .. math:: \sum_n |\langle \tilde{\psi}_n | \tilde{\phi}^a \rangle|^2 \delta(\epsilon_n - E) = \sum_n \langle \tilde{\phi}^a | S^{-1/2} | \psi_n \rangle \delta(\epsilon_n - E) \langle \psi_n | S^{-1/2} | \tilde{\phi}^a \rangle. By introducing `G(E) = (E - S^{-1/2} H S^{-1/2} + i \gamma)^{-1}`, we get: .. math:: \text{Im}[\langle S^{-1/2} \tilde{\phi}^a | G(E) | S^{-1/2} \tilde{\phi}^a \rangle]. Recursion method ================ Instead of working with the `u_i` functions from the Taillefumier paper, we introduce `w_i=S^{1/2}u_i` which are the actual functions that we need to find. We now define `y_i` and `z_i` as: .. math:: w_i = S z_i, .. math:: y_i = H z_i. With these definitions, the recursion formula reads: .. math:: y_i = a_i w_i + b_{i+1} w_{i+1} + b_i w_{i-1}, where: .. math:: a_i = \langle z_i | y_i \rangle, and .. math:: b_i = \langle z_i | y_{i-1} \rangle = \langle z_{i-1} | y_i \rangle. The `w_i` functions should be normalized as: .. math:: \langle w_i | S^{-1} | w_i \rangle = \langle w_i | z_i \rangle = 1, and the recursion is started with `w_0 \propto \tilde{\phi}^a`. Inverting the S matrix ====================== The S (or O) operator is defined as: .. math:: \hat O = 1 + \sum_a \sum_{i_1 i_2} |\tilde p^a_{i_1}> O^a_{i_1 i_2}< \tilde p^q_{i_2}| Where `O^a_{i_1 i_2} = <\phi ^a_{i_1}| \phi ^a_{i_2}> - <\tilde \phi ^a_{i_1}| \tilde \phi ^a_{i_2}>` Assume that `\hat O^{-1}` can be written as .. math:: \hat O^{-1} = 1 + \sum_a \sum_{i_1 i_2} |\tilde p^a_{i_1}> P^a_{i_1 i_2}< \tilde p^a_{i_2}| Then according to [P.J. Hasnip et al, Comp. Phys. Comm. 174 (2006) 24-29 ] the coefficients `P^a_{i_1 i_2}` are given by .. math:: P^a_{i_1 i_2} = -O^a_{i_1 j} ( 1 + B^a_{kl} O^a_{lm} )^{-1}_{j i_2} .. math:: B^a_{kl} = < \tilde p^a_{k}| \tilde p^a_{l}> With summation over equal indices (except a). These formulas ignore overlap between projectors on different atoms. The accuracy of the `\hat O^{-1}` operator can be checked for example by doing: .. math:: <\tilde \phi_{i_1}| \hat O \hat O^{-1} \hat O |\tilde \phi_{i_2}> - \delta_{i_1 i_2} which should be zero for all normalized, orthogonalized `\tilde \phi` gpaw-24.1.0/doc/documentation/xc/000077500000000000000000000000001454550013000165615ustar00rootroot00000000000000gpaw-24.1.0/doc/documentation/xc/E_rpa.png000066400000000000000000001264351454550013000203300ustar00rootroot00000000000000PNG  IHDR XvpsBIT|d pHYsaa?i IDATxy\M׹m*mRJh,Caƾ%Twwb0c-0 3}a̘B; | ([7T|y}s>$ """""`BDDDDD:t  """""& DDDDD3L@Hg0!"""""aBDDDDD:t  """""& DDDDD3L@Hg0!"""""aBDDDDD:t  """""& DDDDD3L@Hg0!"""""aBDDDDD:t  "KGB1c6nBC!=_ LLLP(P!s)))055UqFSSS1eL::H$wS_(1~HOOGw!!!VVVpppCEP`ҤIU}1o_ff&8'uְWoooXE>G"=& Dzn֭T~ۧbٲeXt)sC)۷/]2s:uzxYYY3gN*J8;;cܹߑdbժU033C\\}<ıQaBΟ?/w9/f||<~7 0044&O4u'NW^1|M|P*6 =$$$9JHHPS*MFFFᅬ۷k:uR_W_a&߼y&LoSSSXZZe˖?~[TTT۷/W###ԪU cǎݻw[YYYoի۷/~T;v 7n֭ ###ڢW^8sk駟0`899666w}/^hBsѣG:u*6l*U@PԩS QﷴDm6/ 6mRP(`kkQWϛ7U筊uӦMy^>$  rrr0}"C >x`TZQ9%H/eddkkk!IBqi!I066ק^H$$IQ0`@QQQ1$IBsjɓ's-!IP(bXH$VźumllD*UmG%$Icƌɳ}ɒ%y՗zxuGT(ZT^]ԪIxwJX~044$@abb"|||DBBBL`1"##ŧ-)Hjjhذ$I["++Kٳb„ "--M=--M]VXYY IĊ+WLP8~A ȹs۷o/bccB5k Ixi'+++ѷo_quu~I-RygϪtf⯿:_ܱ5nXDEE]rE4kLH$%xB1b!IhРEffBl)|M!Iׯ_LXZZM6ϟ !xHMMBѽ{w!I077ׯWݸ{>|044!0a$Ixzzj$^zt)P!F]ɫr' B䀘W$tzk;^"*{L@$IL2%K IDÆ 5}wdoW|ܹPjC$ѤI<U|}}mu1; ID@@@}EM@^x!:w,$IkIII꧲{nuw RP{祐ٳ|wudK$:wq+W?`NX c޼yB$ѽ{wmÅ$I.UըQC$''~;*ǏW߽yƱܹs…qCTT$I}#G!_b_;wƏq'''?ΏI&(nݺxl'|Mq&j„ 8rV|o/q<СCiӦ,_͛0Ǒ$ 3fиO> EO5hʔ)ZxQz|7n ggg!UcZj///P_WG&<;;;cذahдiSmB^: 6@TIk 45kL}fΜY`սZl~'H˗ʪ$HmذЦM4h >;;;BuRRR It颵ݻ xn߾o$Ihݺƾ?MJJJaChѢEXn u<xr{),Y///X=i%@wll,5и=wUy]d{gϞAqI\xw)7$˗kkTo^تcǎZ۴kZ͛7/ڵkE*]Zò0|ڵkظqcsN"1rR"= P*~~~^~*YԪUKk;'''5ԸPgUmmU8*ݻwc$ -B^4q1m4?~> ]kZ^oR%P{G!99YKIX.T?y333 ;w*]Uƻ7*Fjc޽GA\\ &>,m7n Vp >B㫯*00!3R^ZZ}tYxɓ=z4ݻwÇHKKý{;wxMУu?TwZ$Iµkאڗꚗ}]V*E;*=,xϟ?GΝ7hmӧOC$毿*Xz ·(mݺuNMu,ǶÇ <v]7?ܵjJcչjRjѲeK\f͚Pn]899Adylz AӧOk]S7@Ν5k֨uJEx~ǰGJJ .\XH{Ő!CT*͛7 R"*ML@Ȗ-[T*amm^zL뭷B MFPȪUտ{U|||JKLTbРAr W~urvZiiihNrmHMM-rVVVx_~y /^Ğ={z;˻i[Zܔ \rV*mzz:<ƠAoXaL/PRMF߸q"??b?U߇5k`ʕ-CBTbȐ! -ѣoD$& DzDH'ϤbmT6oެ~5kք[lZBsƌƣGХK<%}O:.]ǰŧ~ZS+5}"""`bb{^zӰaCԩSBʕ+}o CCC!iӦbg7n@npe/3 t'::Z O?T>$ISo;wz{ժUEݺuEݺuСC?vzf"vՠU ^Y%B;,}-ܧmu IQ֗HLLT;pz?ժfffB$ahh(vءwر|3&O_5ͽPdA#;v&&&yVRuźB$Ѷm[1$I2϶zk\Ղsͷ/++K;68F  }NϞ= l[} Q6 jm۶<M ߿%; DzBU]v-TƍaÆ$)OuYf᫯BVP(p$&&{ģcǎv>c4jˉ5ԩSqڵW(|+˶>x ݟ=zѣlllKKKG oEpp07nl$&&"111ϝםq7xѢE ̛7/_[SISNaӧs(J8;;W^ꫯvu\mO022իqi=$ Q0{l\pk.qsٳg1g999ҥ ~|I֯(xV=BV%y_uA OSY$<|gI!% G d $!115C.]S |̈*!""*c .|dggo"o 䩤cƌƍ!IO999F\\1a]NDs/CDDDz)VX+V@PȒ$aK޽SLGSH`T^]קADL@JiӦ q5044DݺuѶm[7>{ 000 &L)S tЉHg8P(ڶiii5k~mXYY 6ĢET*u-Q>]vҲPIvZxyya툈@^0c b""""`FFF4icժUpqq#G 쓞B*UoE|||ŮƜ9s`aaI&Y4iHJJ** V*sb 8q%>ÇA-J%0vXxxxx={駟Rc3LMMK<& ܾ}ݺuz=%ѣG$Iz]xQ!… x8uERtE(JBCC//+44JI!^iР֮][kFBBΝ+W";;x?)_{Wk׮hҤ W\...P>EjAAAX|y*RI_ϻoO'hiܼ9M[,_pTSJW IQ*]={Z~CҕRixT1!"""*Gҕa6eo?2bBDDDTNd*3/;}XʄG#g=~-bו]fT2Yxן @?[sD~Xx|Qb^""""O)SpdԑJ5\;;;DGg^Y%x+3& DDDD2:}w􅝙bc*daÆ/t`$R(l邦5VJWD0!"""ұܕ5C#UJWD,""""Tf"`B/Nc;+dsW1!"""ґoG?{a0PC"9& DDDD:4"""2v"ڬk@ kL@+] !07j.+]s@JYJWyf?"V"*"""RJWD9 DDDD Q m Q1*]>tETBDDDT tET6H&zq$** :u*T .iӦXd pb~ J.\y兓'OJk X[[bBDDeWWW8;;#22ŋO>WqY,Zyܕ|bM5lNT0)mwdgg#''GsZ`dd_$ C@@>SXXX*d"""cfr-r HܺuPk׮;w"$"""+]U$L@B]V"*!UX'N6$ 11L>*qqq)g0p̙R?.tET1$)u/?^&%""򋕮*6_Zx( QJWD3335ԎHQ堳|1zl끇Y銨I?Դʟ 'w{_ؚ"& \J@' HϞ=+1|a+ʇQ#c1|p m<Q%YfXr%RRRt5$U@L7s`>>R rrrpa 6 8q".\ЈH&aa輹32D,U$IB.]uV_Xjz-@JJ VZVZUVXjRSSʐհ0}~JWD@2VVV0aΞ=XzBĉQfM >;\"""*VwL Mt@$7www,[ wŞ={УG C׮]JQ#t 헷cktEgUbdd~a˖-OP0T.?þ10C""3;W !c߾}̄(stDDDT\'N5EL@ 'rܺu 6lMnhh={QqņahS ‡s95ݻ~z8q999}nnn/eKώ9Qs0H鵆͉, HLL ֯_;wɓ'i5V""t̟?cƌA͚5u5$PJW{!Q9fΜ䃈B`w#~eADZ ?#:xdddȑ#O>ŹsP(СC#%""W HBBϫ |}}q@1p2H3!>?9A_|Q*XZ(J`ر(rݻw#""W^-興4]*3s’-B_Qvm̜9oߖ+""rk輹3Vohh% M$""ݻ ݺuÞ={xW>{|0}"FF.Ux% ;wq]|pssCNN"""0h Ԯ]3f(o""&+; c}7.U % *իWk׮رc>|8LLLp=|x7еkW!""nv9 [mAW0Q!{[e$%%믿FƍH 4j9W*nt"GF.U:*Qĉs<߿-o!C,""TN'Fum-Y銈*rdffb˖-ر#qq@ݺuwwwdggc׮]h޼9Ξ=+sDDD%vt 7D +]QUn˗/㣏>B͚51j(a6<<ai;D"2'$t"""}  0,T[~v\z'N At&!DT镛2DDDQVv~ì<n;e nӦ-1R""ݐ-Q(000( 5kD׮]l2:Qd-vۂkm~g^exDDЯϟ޽{Ĕ)SаaC:uJKCumqED#Tw>^g?Q&#G˘9s&^x $;v ;v쀉 KT^֮]w޸z*jԨ!i壭ҕa6IH?*7GGGիWǡCꚯ͘1c0{lt 8<뇠 tׯ__ pDDD~' zG#\z:F:H.=5|aڵcڵwΝ ҥKIDDDQUg(6yXxן @?[sD~Xx&"=Y233kzzz?z[Νa``7oeDDDqaㅍ 9Y!:zM[g?Rik%" [ 33B$ wz R2R0`L8-`DÆ _(:"GGlmmSmӧVZZjYHDDT8[_Dok""1鑚qƩܾ}ϟ?KYJDDx{P(u;Q #XfBxx8Ν; b„ D͚5!I޽(|w>0k,u<<<:""SJWZ!?lNDDɖ4nv°ap}䛰'!447V{)FQF4n""_B,<3&ñ:Q"[{Fll,,X={{  =j5mKt```& 5k`ՈCrr2Փ9:""6݌FQ%[o@$DFFN:$ G (8=@o$'l H\\PNB ""*賽X#?oھ)wHDDlex '""*Ў;iS'4ހQ)-i׮={Q>B,8C F92ͪQ![2e?Dzz\aeegG?<2o 2899᫯'|ƍ?Daoo8g+]l $IoƔ)S .$IaDDX銈Hwd]Dҹ_מg{XUds"" /v\ށQ?BkDD: [,DDXxr!fMc]ulND#>EDDkYY`<6\؀``{8JWI@ȶ!|ׯ.^• pQڵKQe6k@D4""ɖ`̙044IJeː {{{meDDT 츼ޛVݍevB|g ֶ:t\xQWQ' 0tP h4##Yfmɓ'P(0nܸ׶9޽Ȉc+"K; ]vҲI-[oɳFe"?t;$""zl Μ9+Wݽ?h֬ 7XjU !`ii 2Tg{XUBL@ 'S=էOE lO?0`2Uś&MT6EmODDJWDDl |'''buz_VV_%K୷_7775J1;w+V"KKKAXpX*V""xd{GnݰzjYF} ԬY066Y|J;vlԩ|}}ѨQ#cݘ5k`޽e1~a+"I7o .`֬Yضm233/022ñ`888{(tԩPm/\Mbɒ%HNN… 59sܩS'XYYaѢE8uڷooPPl񁏏Ob!"lR2R0p@?M}7!Q%^ ccc|ӧahXn #""t12*>퇇V^ __~EDD/*]YX"?nJBܹ*U`*KI IDATr@DD+;n䃈-133) !X!`@2DDl RSSq]B "r +; 0 ~,KDTɖAy,y*>\܂M}7a\%"dK@wKbÆ C\\\ nBu_#bd ٪`@$cƍشilll`iiY`?&*DD+]/LDTq !/0 k2 zC*rEDD:&[2zh&""„&`1sBw@"""0h l HBB\UV-pɃ(dK@ wq5?ӊR,Y IHHZaydAc0CezfPY2eIZ(LŒ5IO:es0SD~z=\uS<|}] 4044TDDTqqp s!b}bѱIG#Q '֭[#;;=wr9:t蠆dDDp; nz""Rh!^9K. :TшA>>wNWDDT) χ6V^9 pvv˗a``9s9%PQg`I| =;]QVXZZ"88B6m3}y氳CLL 455{n4iO؈Ēkk.|in"""zQ333}vx\\\۷oobD$""UQ ^7oܹsx"ҠP(`aa~ b$"銈%#ŎBDD/ 'E/8DDDBDD5;]Quw@(TbO>?7QaBDDŲ0vX{pF& DDdNQHDDT!""v"""&t"z.z8v9C⃈ "zH \EDTQ=NWDD$& DD;]D/@~ O>Eaa+߽{WMɈꖢNW:tEDDQ*ݻŊ@DToKއI&iCS#Q=%ZuW^puu45ˏ DD#D.gWO ر ̟?7n+QNWDDTVܺu OŊ@DTg*td("v$""""oB722ꜢNW?f+""qD{zNϟΉOG>W#'8 SLB@ppX}GOQ$Z2w\5 -b+^"7 ƅ(Q6DDTcԩS|r -[`۶mP?˃аaC4hΈDTWihʁ~〛ݑETHU> Hzz:ۇDddd`cc7774mTr9O3fֶR禥.\#Fxt֭:bQT(|X*`81/~[΢#"" Q \O>7nDaaav킯/|}}jժJ݅xYtt4U/{@ff&֮][)J޽{ׯOϟccct"ݲ0vX7 VȈzJhn֭#VD":+,, aaa%ƲDJSwZL2U[[666h޼9 55 (((|wu+++4e˖HII?n BQMT2 zzz.57n xwJ[nz+3lڴ *%޳{p sAZNNN:.u9 ?ֶ3֭SS>x+WЫW/  ,.>|}}?++ k֬A@@0n89277]|̜93g,q,55&&&o-*oӺX턉O돮GO:6عsɈތhЃhWPccc[˖- رcZ%дiSpww/f͚%=z_vvv-Q-/yw9}%""@"[5mO>Efff˲иqc"==]M KD۶mq̙۷%N>]L&a}2 2LMݫpDDu@V^ƅg:2SzL;Q  6lP/ 9C~HHKI'Y|Q'ZRԕjXd rrrJƲe˰j*Z3Ux phѢ6oތ2Khjj?Ě5kDHHDT銈;Q mmm`駟 lllf͚J* 1] Rh+aD!jRyĎADTm^tsNL1UHDDDQ]v?>0i9i89$7QŧGWk#'VMĎDDD$* H$hҤ ̙Sb/_^񈈪;]&AH/6YZYY!99XeH$(*V\rzBbb"ŎCDIYEa|NWDDu߯9qppnݺXe""uzղӧtEDD *ftEDDT1nB'"/w:18Fm]ٳgx;wx!OXXDDD '''#--MHIIӛЉ)'L<0TTkvFYA06|,Fvɑ,>TPk@KKKDDtEDDjEr=EbG!zN'𱈾NWDDDAm޽{SeeeۻsA@VVV~UP"rēlNDDV\t %JqX|y#"RG0"l &VbG"""V`ſ߽{7tuu1~r7KR[n3f 5jDDŊ:]4Cٜ ?~<Ə_ݻw;wTW"JccEed2r945?ZÆ all9%5tEDD$.C&ATYZZBTڵk%fgg#++ D1)' v&?=D+@z-͛cחj*(JtAn`+""CdРAǏ͞=R{E׮]1i$6lx{{j;]-D&www\ryyycݺu֭[ 99<<<0o<1Q-%˓a\8DݏBk7  -5>{l899!<<>1 ggg$Z~}G9pb 8!DDD5hȫXYYO>;RtEDDTs[DTQV#Jff&ҥdTƶ,=]#tT(7@j)@˗/!)##}zΝ5|$|R oDj+@D"aBT-Zϋ>.EHG ""R888TIDtR2^`|<;sĥb#"" Ve5HOW DDDTc  @q@xN@Ҍxv-UF ƒˍDTJsEBs05*ģ]eXn1.*,,/Zl ,ZTǬO{HIoooHR899U8J~[N Y !q __)ѹtΝG`SSS+zdܸq!HХK%4jSLA`` ݋ŋ=gdd$ahhR'f͚!.. o>9:bI@&APTxw!-- 999hݺk]+::Ϊ_н{w 33k׮}k,,, */?>KyzzT<ƅC(`ZO.W$""q!,,XVVHi ^z!""ϟ+ZײBPPJs[lc֭P(єJ% !ɠmmm^344-[ĠATiӦ %`Ku"""SY_rz)Q Z先,^Ν+#Gzj)S^ZVykא3gb̙%_,[ҹK.5  ;͉8 OOOl߾ϟ>CB]b\.テ=ڵkWjv"""Dxydd$veX @CC}Ehh("""``` fT() JxDD222x%tEDDTv&MI&APӧP(hܸ-z޽{wqqBHD ;]hR)j bTHDT:]fQ=&Z" "^tNWDDDh{@5k\.ODj~8:I[McADDTωV888 77T[":];#:@H%""" KA3gĊADՠS qHff&>S|'ҥ &M~ W9`+"""z '''H$\|Wꫯ}ֆ H$lkKTUD瀼WrueNWDDDJ e=9j7cx-8DDDT.6 &&2LDՙgX|+vxX * 1\lO܎b*H%}ADDDh6 a`` V"zM<̽3v wOo#Q-!ZҦMܽ{+U˝""0@#Q-"z bϞ=bE J2w߈eADDD&ZdtѣGŊAD*c?@66DDDZD[uV 6 ۷o+tғЗ/_ƔD$6n)?2#C'v,"""D+@K>)) IIIs荈V888TDR I233.%C.׀D/9C@FTv"""*#Z-֥dddo_ܹ:(HDxoZ "¢E)>v_@%<5։ DKVigmxI'v'"""zs-zY~~>N:dff`ccAA[[[Du[\ NWRw#""" m۶O> AJ755ŪU0sL}B<hw88 |sT MMhnC@@@5k-ZRSSL̞=w_|!VT:I'>wdM-.cVVtrAAA B@Րnφ.``R AJDDD]ĉ .Ķmʜ#v؁>0i$# d=@~KCW{z96O2~-Ξ=ٳgcprr*~ÇG )Sֹ2Fb|bٔ """Vhhho̹nnnTe#D(D}ۇxٳ 3&Q!6n)?<Ffs"""QD-@ND\)cs-q/gΟq985!7 pwDދDk07Q%Z_!!! 4pեK޽{AjJHT;R0x({ l;PHDDDDm}FHHwU8ovڥdDGBZ9b|bX|Q'Zr *ApETkq;`illKDDDhȵkנ޽{W8W^kԐfb6m\:3lKDDDhȟ ###hhhT8WKK FFFx\rƿODZ?.*mBoذ!!$+* @WWWMĕ ?ut)r45apy1*MM6v.\rnLL aeetD@߾sg u$}$N n=ĎIDDDZD[5dŋQXXX,^0x`d#ӢE)>oJ(w‘-qbG$"""zm ̓.bccRs._ gggBWW~Iҥdv/~cuZAq@?ljj'і`5o8q".^[[[4m/AT*EPPZl)V\"i@X.@W8Q-$Z0119sp}#==Ĝv!00H)KCp _ nV*5ћCGtt4.^tH$XXX_~pttT*Jb ŀUϟHKK>UVGjHLQv~6]nڧCAt[[>pj/ ĠA0h )22044ep銈j0)M!A;DuKOODZcT;zhcǎغu+Ǝ[|{hݺ59==rr$$$ׯG}Tv={SN4Q`+"""SGIIIǼy/DU"E{ĥqDDDD*R[RϞ=C`` z^z!00YYYBF`dİ.Q%ԩ}aذaH$z*͛f͚ OVW$Jc+"""7%XZZZ3f =`ժUhӦ VAApMu#* c5% M]Oͱl2ܾ}*]v?ELJ\)ǜcsɏ~6DDDDF H$prr·~Ǐ#00GnеkWR=a#t%;Fځkf ԸwRFFF={6pU̝;&&&7n;#EbqL.v$"""ZOm)CAFocJKColق Qw\]M]p9QQk1cÇu BKс<<<C7a f <q9QR?Ç/7nwE^^"Pjx_Q5P[TݻXh$ n߾oV]ɕr}|t#c+"""j͚5,--vZ,^<"u)teݼy)̙3nRW$NW1c銈H Զ ]&I&hРA[l VW$:]h ;]̺SAQ~Z* uEzCpu@kֈ⃈HMНAetEDDD$>$77w. ɓ'WK6J9>8N-škٜHZd2L:H$ Z1~xs ;Ffs""""ySEwI*!0d11;QݻQĴDm mv""""VXZZRD^tu?s9Q TjXatEDDDTԚ$777o;pr{'}{E~IDDDDW ϟ/%|}}ŎC5Xv~6F1b>90t;w :w JB-ɰyflٲO>!%&tEDDDT;3f `kk ;;;4i~~~P*R7+W,.>\\\θTK$%.وaADDDTHAA ׯC@@ VZ#G"22aٲex뭷jv""""]V...2dA@DD;[Ň;V\ +++uEZDlۄN~1`lNDDDT 9p?ė_~Y<>w\,X7oa``p;F\)? _cQEX3p 7j{oA"`ť-Z_~%*˝lg+"""ZFmw@>} ccc4mZz9!0j(uEZj? )6QFDHbZ"F6bcŬؑ5p xnCZDxDDDD A_b~D.Q-'CCCc @Nj(*E5\)LJ?~~X;p-7Q ''GD KuZ  =Nu_v~6.]? 5CC~}v""""gsrss1k,X&&&*#GO?G}{{{bop֭jN]s%%.<bcX|QM:R)))j%ڴi)GGGH5mʕ+!JqYP PsfP˗/,XCcccnÇC8pstEDT\z3f@ǎa``===m&Lcx6؛^666HOODV&HĬY`ccSEH,\ زe Ο? +((hkk/}RRRՅA=1`lNDTG)J,^҂#\]] 22?#|}}QC޽{HLL,~P̝;hٲέ[pʕ*JEbaR\ӧcƌԹ]dŋ׿U<~EYuAk8R#>.~vZn6'"VXh۶mㅅرcn޼)RBM5n7;`tt4RJ_+ Xvm7l0oK,ٳgsa…ШpMZ+,,쵾w1Q92331mBt2;K6m!233kn߾kעI&(U|̙S1w\t 6d_pE033СCqĉ9EΝ;W ^+%%~~~ݻ7 mbܹ~ؽ{7M6;{0}tj h֬MV~GF `jj ///R)M_~...066F `oo_ϧ|AJw6mZcǎԄ lllnݺJLzO6z-T۲eK[nB@VV !ɠWjU---DDD`bظq#5k 6mu%Úʗ}=pH(| ϏGlֺڵ Jfͪ^ohxx8'''9r111/pY;we |]]]5 m۶ӧOqyٳC Q{Qe™3g ggg <RW\_'N 11  44׮]all EaR$>>C AAAF#%%{#I&A__^^^055ɓ'aooFg}8;;#)) ٳg:Ͻ}6L 6 &M{ȑ#8y$uo߾ GGG`ܹx;ʕ+ܹ3wBCC\=zxQ17;,,,WbϞ=%Ʈ\^zU%%H$W~mذAz$<\HKK$k׮2&&& Īv.Q``!$Ee :+B_1ԩl꺞 H3gT꼴4AT_v H~)SD">>oܸZv޽r&A׮]qio?|7O.J]LgϟaۙDDejj;׹#//ر#ѵkWhiiDff&.\h׮ 'NO>8p ={~ zR]bӦMЭ[75 mڴAVVΟ?kkkܹ`kkĉǀ}o߾%ZX===x{{c۶mÑt 86l30zh'N6ۇaÆOٳФIASSS033}]ԩS%={LLMMK.͛2biiY9 ҥK-ZZZZB-ӧ ˜/~zCvۣG ;vtuu]]]}ĉcǎzG }F BRRX .\}}}X=ztAx0c E`nn.L0AqF+WRi6mZ_P. -Zn-hii RB6޽|};\Cp#,n&K-a7fs\6DSSQdeFNMb T]KWt|=C>{]߫|{ !lllIJeDAAAǁi>JnDQdV$2̏ g7p+""%Zg8QM憕+WbŊ-ݝSsz,W""""j=^HMá8ٜU+)+ ^^p"ll[KDDDD1!Q0jԨ-KPEL  +([X^mu#p+""""j @7o3ԓ+]3JWDDDDԜ8p qAW8)DDDD1*WwW""""fJWDDDDԒ8)()?bf͘=xvKw:  88 nu@HJЭC['|Q TTT_׫l^^o߾Ԅ)q̙j fffd066ѣo߾4ت|IӃ:兘jocccd2 Gχ:߿$c&^i~#1|?{N6'"w-Aaa!$Is{az* I&!77aaax~xzz*ٱc___XYY֭[ɓ+V4{)ٳg?]!8u ooohhh 55QQQ8x 1|2VªU`ll_~HKKCDD: J5ٸLP(FUQQ!>KLH+%11l(,-lQu3z-$e|M!IPHOOO:u@a_݅HJJRHOKKBKKKȑ#E=We$Ib յ +JKK>B___ddd(9{PQQ={TZ $I>kXڛ<`Cee=+]Qc8p۶mN߿$SH¬Yp}n݂޽{w١uj>WkC d22}􁝝>|{PRR;;;tU#q-uFEE|||i***puu1eBKK 4^0ig J 0a|yKlڌ/T$DD2220|̙3 #;;iVVV8c"++ III _akk 33>v؁ L>u*dt _EFFBg";;cǎUZgdd${aȑHKKk3fڵk4i9RUs@tEDDf͂!֭[z:ulW^ BzHH0rHL8Q>$"" mpJ $W_6߅  ddd 22=z޽{䍊+fffHOOGDD<<eWS#hP;TO>x"V^->|}}ѳg*yv ___?֭[ݻwǔ)S`bbRLW&LO__|B /kkk$&&>H:bp+"+ep8;,5Y}+W` 55/ƬYWիW#&&;v˗ꊼ<۷{.TTo= –-[0qD>|믿vZ8B0ʓL-`l߾0k֬TTT?~UV!66ǎ- sbҥܹ3O,]?pڵ*W:`eeUe> 0U---VcH&x%|i''1L!qvui&oՐ$I~@$f͂1>esUgϞիqĨQs`˖-ٳ's/i&\t ;vŋaccetCBB T闁A}CDD WqæM8p/" ::xqFy{~-#*cǎ!((HiʂʾTTTԩDu*(ÂXf=;/Gc>ds"6H[]ID?044T̙9s&K̙3ֶ̰~z_^!}ǎ!C( NNNJ'~ɓ_AUয়~$I~fݻQ\\3f@SS^e`̘1˗HFQ%yʕ+6lDyyWD-HTPR^1{QPTTT%=11ϟȑ#ѧO߿Q޽$W^rrr{.@WWQmdN>IVV(Wel\022ȑ#\*~*W""6?WrJ?3g#33Ʋ2W](88G?ӝG"!!NNN}W\AXX455қ҅ py888`Рt;wUSRRqFhhhC>j(@XX-Z޽{?~qqqu%%%O&zV!OturItEDDw}@ $$D~M 8cǎE޽QRR8\p/l٢P)V\K'ND^_!<<> cc:`UN>ꇿ?$I ,--Q^^d>|ظq0gl޼|z ex뭷*S\\Phiiaڴi5]d pdggCUU]t+zKՓ/Æ W_}055'-Zgggyި*WQꪾ&$ҶIIIh8GχO?울+]B}4!:wAlZu=y7Mj*֡vX~ >^N>l~Eoj5VH+ !\銈 жJWDDDDpH+4aNz; @Z)vS.K @0!""""fIDDDM(%%@D s0!""jzzziӦpO1*ezv5>}GAAAKwHOO}in{ @ՎЉ0F||T~O+**j឴a$ixbdgg 1f̘۽{$I=zʾÇ~)-k.7nܸqƍ[+vU/$!۶mSnݺU nnn8rHu߿_e߽{]Ekܸqصkz--:Oqq1_qƵtWڬXXX 00$ \t b" @i9SSSL:IDDDDMٹЦqz=deeѣxk׵kW8::",, pY DDDDaIBsb̙#tuu?8vy{=z 66VO?aܸqܹsŋcРA :^F߾}_~XbӧO+P^^ 4777HOO-[W^y1vXt2 prrBhhB۷ocҥ d21e\~J_~%899!<<\!ʕ+Rvg::|0н{wd2/^TףGɓ9s&젦'ļy mmmرcU"((;w&lmmy*v '''B]]&&&ٳg򥥥U;3g>Ǐ#TTTp%IMMEXX1j(DGGWO#=accŋysss1n8,[ &&&q֮]#FԩSpppP{THFuWxz뭷`cc555/xq)8qJSN)HZKTWN¸q0tPB__QQQxwq-|{'q6"***D^ĠAB$%% I|oΝB$~y۷Bb+&LPc$M65HΝ;U233 C̜9***?Iܗ<>O3$$DH$;r oV!}޼yBGGGܿ6_.$I?}M0 X{OH$~*i oy I_>zha`` g]cjx Q3$ 򿔪=Xy.@&d2)chiiaʔ)CS155fii cccdffӄuV۪t5iS~&$$@t{zzÇ|OUI5kMiTק5D5~:~ @!p]|Ǹt-Z>39sŸtV^ GGG;V^ϒ%Koaݻw;ov"22>>>mK. IC[[jǧWv{Yyy9=z4Z|/L&ƌg0zZYY`?UYZZBMM ]t9spΝm_"3gLLL0o-v-z!,]hhhZ UUUt}怴nݻwIIIPDFF ###!Ix׫u%qY.yahh(Ν;WcZݻI6@$&&'OO>D WWWѣ덎$5kT-1D3F:tH8qBBUUU]VѰϺ'֍Q3qwwzzzT۷OH$>}ZH$֭['T ///|V*m+99YH$?4_]U9Չ$s焭$IH$ Ć T7>,X@H$rrr<+$I{5V߿D5:u AAABMMMdgg !Y'D~BL ''W^=W%$Ihշg,\6m۬m3x`oHOOύ|]\\j,>} ==9JT:yROj_W_~PH8p ˑ qu@~B _`ll# IDAT sssXYYx^z;wMMMD***pyt޽J[صk޽{?!5ʿo|ذaO^rv킪* RmV)j P|'|{{{12Crr2\]]kWXXHM_@a2DnnJmKWqE)z|1yd 4HR޽{qaxxxGHH"##pB_XwE||<YB ""BVh%O+с`Ŋ%n߾۷#!!ʪUΘddd ""F޾T:?t EEE8v_|Q^3MHǏ?oTiDo6|||0~x,\zzz8x m???t@>sP·;6lÆ BUUUwwwq|bB SSS1i$\[N 0@hhh---1dȐj,+tuuEAA3cc5J}rsuuBq)*̄"$$J}qqq*** u+{aK'd?{)3gѵkW&Ŵiĵkת5w\ѿ-E׮]ŴiDrrB۷!C(ǟx۫^Nj/ oooѥK&xÇBSSSd2amm-ϟ_ܜ~~ !/\\\dO>bŊDu=GmS]yp:5 DDDDDlQaBDDDDD͆5 DDDDDlQQ[Op"vdʕ/Ooӧf̘jH] "bBDNYXX7sssHT|7>|8VZ]$"Q;$I233[VV qY899VZ毄Q4hUUU@TTT : DD:uUWVVgφ=333xxx`޽ڵ+^{5_5ʕ+:u*,,,nݺaܹ̬ulGĉѥKd2hhhK.pssÚ5kPPPPkDDltyݾ}wX[[+_m\044Ľ{߿{v"$I?Ӄ&233}v:tgΜAΝJKK!Ituuq}lٲxڵk```UUUܹsYYY1hР7""jBDA!pL4 022?<:::;w.bccΝ;֭[}a5ꫯ)))E~~>8dffbҥUݼy'OFii)piL&ŋy X$asݻ;wÇ8s y4 QIBҝ "rJ^[b;}߿^GDD_5}pss!66Jc޼yF^^|. شiLMMqe*MNNƠA#Hr}appp5&""j^BDNݹsGa+))$Ix!nݺU\ eƏHMMEvv<$aٲeJM0PTT!;wn I&)WM999(**h%0!"j*<!)) HHH;vUn:5 fffd啷0 !j `4R?_~߇$I(ToĈ͛7aoo>gϞEYYYuQ`BDAd2888`֭:yO 0K,?hhhLPJն4JSW""|$I2d(\HIIRk֬gr`ӦMɩWZ$PBDD-Qr-ذa Æ  BL>r 5j޽tRݻ;v,Ν;qsxxx@CCCioGΝ;n*//O?%K<<` page. A comparison of the atomization energies of the g2-1 test-set calculated in VASP, Gaussian03, and GPAW is shown in the below two figures for the PBE and the PBE0 functional respectively. .. image:: g2test_pbe.png .. image:: g2test_pbe0.png In the last figure, the curve marked ``GPAW (nonself.)`` is a non self-consistent PBE0 calculation using self-consistent PBE orbitals. It should be noted, that the implementation lacks an optimized effective potential. Therefore the unoccupied states utilizing EXX as implemented in GPAW usually approximate (excited) electron affinities. Therefore calculations utilizing Hartree-Fock exchange are usually a bad basis for the calculation of optical excitations by lrTDDFT. As a remedy, the improved virtual orbitals (IVOs, [HA71]_) were implemented. The requested excitation basis can be chosen by the keyword ``excitation`` and the state by ``excited`` where the state is counted from the HOMO downwards: .. literalinclude:: ivo_hft.py Support for IVOs in lrTDDFT is done along the work of Berman and Kaldor [BK79]_. If the number of bands in the calculation exceeds the number of bands delivered by the datasets, GPAW initializes the missing bands randomly. Calculations utilizing Hartree-Fock exchange can only use the RMM-DIIS eigensolver. Therefore the states might not converge to the energetically lowest states. To circumvent this problem on can made a calculation using a semi-local functional like PBE and uses this wave-functions as a basis for the following calculation utilizing Hartree-Fock exchange as shown in the following code snippet which uses PBE0 in conjuncture with the IVOs: .. literalinclude:: rsf_ivo_nacl.py .. [AB98] C. Adamo and V. Barone. *Toward Chemical Accuracy in the Computation of NMR Shieldings: The PBE0 Model.*. Chem. Phys. Lett. 298.1 (11. Dec. 1998), S. 113–119. .. [Ba94] V. Barone. *Inclusion of Hartree–Fock exchange in density functional methods. Hyperfine structure of second row atoms and hydrides*. Jour. Chem. Phys. 101.8 (1994), S. 6834–6838. .. [BK79] M. Berman and U. Kaldor. *Fast calculation of excited-state potentials for rare-gas diatomic molecules: Ne2 and Ar2*. Chem. Phys. 43.3 (1979), S. 375–383. .. [HA71] S. Huzinaga and C. Arnau. *Virtual Orbitals in Hartree–Fock Theory. II*. Jour. Chem. Phys. 54.5 (1. Ma. 1971), S. 1948–1951. gpaw-24.1.0/doc/documentation/xc/functionals.rst000066400000000000000000000124241454550013000216430ustar00rootroot00000000000000.. _xc_functionals: ==================================== Exchange and correlation functionals ==================================== .. index:: libxc Libxc ===== We used the functionals from libxc_. ... Calculation of GGA potential ============================ In libxc_ we have (see also "Standard subroutine calls" on ccg_dft_design_) `\sigma_0=\sigma_{\uparrow\uparrow}`, `\sigma_1=\sigma_{\uparrow\downarrow}` and `\sigma_2=\sigma_{\downarrow\downarrow}` with .. math:: \sigma_{ij} = \mathbf{\nabla}n_i \cdot \mathbf{\nabla}n_j .. _libxc: http://www.tddft.org/programs/octopus/wiki/index.php/Libxc .. _ccg_dft_design: http://www.cse.scitech.ac.uk/ccg/dft/design.html Uniform 3D grid =============== We use a finite-difference stencil to calculate the gradients: .. math:: \mathbf{\nabla}n_g = \sum_{g'} \mathbf{D}_{gg'} n_{g'}. The `x`-component of `\mathbf{D}_{gg'}` will be non-zero only when `g` and `g'` grid points are neighbors in the `x`-direction, where the values will be `1/(2h)` when `g'` is to the right of `g` and `-1/(2h)` when `g'` is to the left of `g`. Similar story for the `y` and `z` components. Let's look at the spin-`k` XC potential from the energy expression `\sum_g\epsilon(\sigma_{ijg})`: .. math:: v_{kg} = \sum_{g'} \frac{\partial \epsilon(\sigma_{ijg'})}{\partial n_{kg}} = \sum_{g'} \frac{\partial \epsilon(\sigma_{ijg'})}{\partial \sigma_{ijg'}} \frac{\partial \sigma_{ijg'}}{\partial n_{kg}} Using `v_{ijg}=\partial \epsilon(\sigma_{ijg})/\partial \sigma_{ijg}`, `\mathbf{D}_{gg'}=-\mathbf{D}_{g'g}` and .. math:: \frac{\partial \sigma_{ijg'}}{\partial n_{kg}} = (\delta_{jk} \mathbf{D}_{g'g} \cdot \mathbf{\nabla}n_{ig'} + \delta_{ik} \mathbf{D}_{g'g} \cdot \mathbf{\nabla}n_{jg'}), we get: .. math:: v_{kg} = -\sum_{g'} \mathbf{D}_{gg'} \cdot (v_{ijg'} [\delta_{jk} \mathbf{\nabla}n_{ig'} + \delta_{ik} \mathbf{\nabla}n_{jg'}]). The potentials from the general energy expression `\sum_g\epsilon(\sigma_{0g}, \sigma_{1g}, \sigma_{2g})` will be: .. math:: v_{\uparrow g} = -\sum_{g'} \mathbf{D}_{gg'} \cdot (2v_{\uparrow\uparrow g'} \mathbf{\nabla}n_{\uparrow g'} + v_{\uparrow\downarrow g'} \mathbf{\nabla}n_{\downarrow g'}) and .. math:: v_{\downarrow g} = -\sum_{g'} \mathbf{D}_{gg'} \cdot (2v_{\downarrow\downarrow g'} \mathbf{\nabla}n_{\downarrow g'} + v_{\uparrow\downarrow g'} \mathbf{\nabla}n_{\uparrow g'}). PAW correction ============== Spin-paired case: .. math:: \Delta E = \sum_g 4 \pi w r_g^2 \Delta r_g [\epsilon(n_g, \sigma_g) - \epsilon(\tilde n_g, \tilde\sigma_g)], where `w` is the weight ... .. math:: n_g = \sum_{i_ii_2} D_{i_1i_2} \phi_{j_1g} Y_{L_1} \phi_{j_2g} Y_{L_2} + n_c(r_g) = \sum_L n_{Lg} Y_L, where .. math:: n_{Lg} = \sum_q D_{Lq} n_{qg} + \delta_{L,0} \sqrt{4 \pi} n_c(r_g) and .. math:: D_{Lq} = \sum_p D_p G_{L_1L_2}^L \delta_{q_p,q} = \sum_p D_p B_{Lpq}. .. math:: \mathbf{\nabla} n_g = \sum_L Y_L \sum_{g'} D_{gg'} n_{Lg'} \hat{\mathbf{r}} + \sum_L \frac{n_{Lg}}{r_g} r \mathbf{\nabla} Y_L = a_g \hat{\mathbf{r}} + \mathbf{b}_g / r_g. Notice that `r \mathbf{\nabla} Y_L` is independent of `r` - just as `Y_L` is. From the two contributions, which are orthogonal (`\hat{\mathbf{r}} \cdot \mathbf{b}_g = 0`), we get .. math:: \sigma_g = a_g^2 + \mathbf b_g \cdot \mathbf b_g / r_g^2. .. math:: \frac{\partial \Delta E}{\partial n_{Lg}} = 4 \pi w \sum_{g'} r_{g'}^2 \Delta r_{g'} \frac{\partial \epsilon}{\partial \sigma_{g'}} \frac{\partial \sigma_{g'}}{\partial n_{Lg}}. Inserting .. math:: \frac{\partial \sigma_{g'}}{\partial n_{Lg}} = 2 a_{g'} Y_L D_{g'g} + 2 \mathbf b_g \cdot (r \mathbf{\nabla} Y_L) \delta_{gg'} / r_g^2, we get .. math:: \frac{\partial \Delta E}{\partial n_{Lg}} = 8 \pi w \sum_{g'} r_{g'}^2 \Delta r_{g'} \frac{\partial \epsilon}{\partial \sigma_{g'}} a_{g'} Y_L D_{g'g} + 8 \pi w \Delta r_g \frac{\partial \epsilon}{\partial \sigma_g} \mathbf b_g \cdot (r \mathbf{\nabla} Y_L). Non-collinear case ------------------ .. math:: \mathbf{m}_g = \sum_L \mathbf{M}_{Lg} Y_L. .. math:: n_{\alpha g} = (n_g + \alpha m_g) / 2. .. math:: 2 \mathbf{\nabla} n_{\alpha g} = \mathbf{\nabla} n_g + \alpha \sum_L ( Y_L \sum_{g'} D_{gg'} \frac{\mathbf{m}_g \cdot \mathbf{M}_{Lg'}}{m_g} \hat{\mathbf{r}} + \frac{\mathbf{m}_g \cdot \mathbf{M}_{Lg}}{m_g r_g} r \mathbf{\nabla} Y_L) .. math:: = (a_g + \alpha c_g) \hat{\mathbf{r}} + (\mathbf{b}_g + \alpha \mathbf{d}_g) / r_g. .. math:: 4 \sigma_{\alpha \beta g} = (a_g + \alpha c_g) (a_g + \beta c_g) + (\mathbf{b}_g + \alpha \mathbf{d}_g) \cdot (\mathbf{b}_g + \beta \mathbf{d}_g) / r_g^2. .. math:: \frac{\partial c_g}{\partial \mathbf{M}_{Lg'}} = \frac{Y_L}{m_g} ( D_{gg'} \mathbf{m}_g + \delta_{gg'} \mathbf{m}_g' - \delta_{gg'} \frac{\mathbf{m}_g \cdot \mathbf{m}_g'}{m_g^2} \mathbf{m}_g). .. math:: \frac{\partial (\mathbf{d}_g)_\gamma}{\partial \mathbf{M}_{Lg'}} = \frac{Y_L \delta_{gg'}}{m_g} ( \mathbf{m}_g r \nabla_\gamma Y_L + \sum_{L'} \mathbf{M}_{L'g} r \nabla_\gamma Y_{L'} - \frac{\mathbf{m}_g}{m_g^2} \sum_{L'} \mathbf{m}_g \cdot \mathbf{M}_{L'g} r \nabla_\gamma Y_{L'}). gpaw-24.1.0/doc/documentation/xc/hydrogen_atom.py000066400000000000000000000026551454550013000220020ustar00rootroot00000000000000"""EXX hydrogen atom. Compare self-consistent EXX calculation with non self-consistent EXX calculation on top of LDA. """ from ase import Atoms from ase.units import Ry from gpaw import GPAW, PW from gpaw.hybrids.eigenvalues import non_self_consistent_eigenvalues from gpaw.hybrids.energy import non_self_consistent_energy atoms = Atoms('H', magmoms=[1.0]) atoms.center(vacuum=5.0) # Self-consistent calculation: atoms.calc = GPAW(mode=PW(600), xc='EXX:backend=pw') eexx = atoms.get_potential_energy() + atoms.calc.get_reference_energy() # Check energy eexxref = -1.0 * Ry assert abs(eexx - eexxref) < 0.001 # ... and eigenvalues eig1, eig2 = (atoms.calc.get_eigenvalues(spin=spin)[0] for spin in [0, 1]) eigref1 = -1.0 * Ry eigref2 = ... # ? assert abs(eig1 - eigref1) < 0.03 # assert abs(eig2 - eigref2) < 0.03 # LDA: atoms.calc = GPAW(mode=PW(600), xc='LDA') atoms.get_potential_energy() # Check non self-consistent eigenvalues result = non_self_consistent_eigenvalues(atoms.calc, 'EXX', snapshot='h-hse-snapshot.json') eiglda, vlda, vexx = result eig1b, eig2b = (eiglda - vlda + vexx)[:, 0, 0] assert abs(eig1b - eig1) < 0.04 assert abs(eig2b - eig2) < 1.1 # ... and energy energies = non_self_consistent_energy(atoms.calc, 'EXX') eexxb = energies.sum() + atoms.calc.get_reference_energy() assert abs(eexxb - eexx) < 0.03 gpaw-24.1.0/doc/documentation/xc/ivo_hft.py000066400000000000000000000026351454550013000205770ustar00rootroot00000000000000"""Calculate the excitation energy of NaCl by an RSF using IVOs.""" from ase.build import molecule from ase.units import Hartree from gpaw import GPAW, setup_paths from gpaw.mpi import world from gpaw.occupations import FermiDirac from gpaw.test import gen from gpaw.eigensolvers import RMMDIIS from gpaw.cluster import Cluster from gpaw.lrtddft import LrTDDFT h = 0.3 # Gridspacing e_singlet = 4.3 e_singlet_lr = 4.3 if setup_paths[0] != '.': setup_paths.insert(0, '.') gen('Na', xcname='PBE', scalarrel=True, exx=True, yukawa_gamma=0.40) gen('Cl', xcname='PBE', scalarrel=True, exx=True, yukawa_gamma=0.40) c = {'energy': 0.005, 'eigenstates': 1e-2, 'density': 1e-2} mol = Cluster(molecule('NaCl')) mol.minimal_box(5.0, h=h) calc = GPAW(mode='fd', txt='NaCl.txt', xc='LCY-PBE:omega=0.40:excitation=singlet', eigensolver=RMMDIIS(), h=h, occupations=FermiDirac(width=0.0), spinpol=False, convergence=c) mol.calc = calc mol.get_potential_energy() (eps_homo, eps_lumo) = calc.get_homo_lumo() e_ex = eps_lumo - eps_homo assert abs(e_singlet - e_ex) < 0.15 calc.write('NaCl.gpw') lr = LrTDDFT(calc, txt='LCY_TDDFT_NaCl.log', restrict={'istart': 6, 'jend': 7}, nspins=2) lr.write('LCY_TDDFT_NaCl.ex.gz') if world.rank == 0: lr2 = LrTDDFT.read('LCY_TDDFT_NaCl.ex.gz') lr2.diagonalize() ex_lr = lr2[1].get_energy() * Hartree assert abs(e_singlet_lr - e_singlet) < 0.05 gpaw-24.1.0/doc/documentation/xc/libvdwxc-example.py000066400000000000000000000006601454550013000224100ustar00rootroot00000000000000from ase.build import molecule from gpaw import GPAW atoms = molecule('H2O') atoms.center(vacuum=3.0) # There are these functionals: vdW-DF, vdW-DF2, vdW-DF-cx, optPBE-vdW, # optB88-vdW, C09-vdW, BEEF-vdW, and mBEEF-vdW. # There are three modes: serial, mpi, and pfft. Default is auto. calc = GPAW(mode='fd', xc={'name': 'BEEF-vdW', 'backend': 'libvdwxc', 'mode': 'mpi'}) atoms.calc = calc atoms.get_potential_energy() gpaw-24.1.0/doc/documentation/xc/libvdwxc-pfft-example.py000066400000000000000000000005501454550013000233430ustar00rootroot00000000000000from ase.build import bulk from gpaw import GPAW, PW from gpaw.xc.libvdwxc import vdw_df_cx # "Large" system: atoms = bulk('Cu').repeat((2, 2, 2)) calc = GPAW(mode=PW(600), kpts=(4, 4, 4), xc=vdw_df_cx(mode='pfft', pfft_grid=(2, 2)), parallel=dict(kpt=4, augment_grids=True)) atoms.calc = calc atoms.get_potential_energy() gpaw-24.1.0/doc/documentation/xc/libvdwxc.rst000066400000000000000000000023621454550013000211400ustar00rootroot00000000000000.. _libvdwxc-doc: libvdwxc ======== `libvdwxc `_ is a library which provides fast and scalable implementations of non-local van der Waals density functionals in the vdW-DF family. To use libvdwxc, you need to install it and compile GPAW with it. libvdwxc can be used with other semilocal functionals like optPBE, optB88, and BEEF-vdW. `Install `_ libvdwxc, making sure that its dependencies FFTW3 and FFTW3-MPI are available on the system. For truly large systems, you may install PFFT to achieve better scalability. For realistically-sized systems, FFTW3-MPI is efficient and might be a bit faster than PFFT. Run a calculation by specifying backend, like {'name':'BEEF-vdW', 'backend':'libvdwxc'}, as in this example: .. literalinclude:: libvdwxc-example.py libvdwxc will automatically parallelize with as many cores as are available for domain decomposition. If you parallelize over *k*-points or bands, and *especially* if you use planewave mode, be sure to pass the parallelization keyword ``augment_grids=True`` to make use of *all* cores including those for *k*-point and band parallelization (see :ref:`parallel_runs`). Note that libvdwxc 0.4 has no stress term implementation. gpaw-24.1.0/doc/documentation/xc/qna.rst000066400000000000000000000013721454550013000200750ustar00rootroot00000000000000.. module:: gpaw.xc.qna .. _qna: ================================================= Quasi-non-local exchange correlation approxmation ================================================= Rationale --------- Using QNA --------- Code example:: from gpaw import GPAW, PW from ase.lattice.compounds import L1_2 QNA = {'alpha': 2.0, 'name': 'QNA', 'orbital_dependent': False, 'parameters': {'Au': (0.125, 0.1), 'Cu': (0.0795, 0.005)}, 'setup_name': 'PBE', 'type': 'qna-gga'} atoms = L1_2(['Au','Cu'],latticeconstant=3.74) calc = GPAW(mode=PW(300), xc = QNA, kpts=kpts, txt='AuCu3_QNA.txt') atoms.get_potential_energy() .. autoclass:: gpaw.xc.qna.QNA :members: gpaw-24.1.0/doc/documentation/xc/range_separated_functionals.rst000066400000000000000000000171761454550013000250600ustar00rootroot00000000000000.. _rsf: ================================= Range separated functionals (RSF) ================================= Introduction ============ Range separated functionals (RSF) are a subgroup of hybrid functionals. While conventional (global) hybrid functionals like PBE0 or B3LYP use fixed fractions of Hartree-Fock (HFT, E\ :sub:`XX`\ ) and DFT (E\ :sub:`X`\ ) exchange for exchange, f.e. 1/4 E\ :sub:`XX`\ and 3/4 E\ :sub:`X`\ in the case of PBE0, RSFs mix the two contributions by the spatial distance between two points, `r_{12}`, using a soft function `\omega_\mathrm{RSF}(\gamma, r_{12})`. To achieve this, the coulomb interaction kernel, `\frac{1}{r_{12}} = \frac{1}{|r_1 - r_2|}`, which appears in the exchange integral from HFT is split into two parts: `\frac{1}{r_{12}} = \underbrace{\frac{1 - [\alpha + \beta ( 1 - \omega_\mathrm{RSF} (\gamma, r_{12}))]}{r_{12}}}_{\text{SR, DFT}} + \underbrace{\frac{\alpha + \beta ( 1 - \omega_\mathrm{RSF} (\gamma, r_{12}))}{r_{12}}}_{\text{LR, HFT}}`, the short-range (SR) part is handled by the exchange from a (semi-)local LDA or GGA functional such as PBE, while the long-range part (LR) is handled by the exchange from HFT. `\alpha` and `\beta` are functional dependent mixing parameters. `\alpha \ne 0` and `\beta = 0` resembles conventional global hybrids. RSFs with `\alpha = 0` and `\beta \ne 0` are usually denoted by ``LC`` and the name of the semi-local functional, f.e. LC-PBE. RSFs with `\alpha \ne 0` and `\beta \ne 0` are usually denoted by ``CAM`` and the name of the semi-local functional, f.e. CAM-BLYP. For the separating function `\omega_\mathrm{RSF}`, two functions are in common use: either the complementary error function, `\omega_\mathrm{RSF} = \mathrm{erfc}(\gamma r_{12})`, or the Slater-function, `\omega_\mathrm{RSF} = e^{(-\gamma r_{12})}`. While the use of the complementary error function is computationally fortunate for codes utilizing Gaussian type basis sets, the Slater-function give superior results in the calculation of Rydberg-state and charge transfer excitations. To distinguish between these both functions, functionals using the Slater-function append the letter "Y" to the RSF marker, f.e. LCY-PBE or CAMY-B3LYP, while functionals using the complementary error function keep the marker as it is, f.e. LC-PBE or CAM-B3LYP. Besides `r_{12}`, the separation functions use a second parameter, the screening factor `\gamma`. The optional value for `\gamma` is under discussion. A density dependence is stated. For most RSF standard values for `\gamma` are defined, although it is possible to tune `\gamma` to optimal values for calculations investigating ionization potentials, charge transfer excitations and the binding curves of bi-radical cations. Implementation ============== The implementation of RSFs in gpaw consists of two parts: * once the implementation of the semi-local functional part. This is done in libxc. * once the implementation of the Hartree-Fock exchange. This is done in ``hybrid.py``. As range separating function the Slater-function, `\omega_\mathrm{RSF} = e^{(-\gamma r_{12})}`, is used. Besides the possibility to set `\gamma` to an arbitrary value, the following functionals were implemented: ========== ======== ======= ===================== ========= Functional `\alpha` `\beta` `\gamma` (`a_0^{-1}`) Reference ========== ======== ======= ===================== ========= CAMY-BLYP 0.2 0.8 0.44 [AT08]_ CAMY-B3LYP 0.19 0.46 0.34 [SZ12]_ LCY-BLYP 0.0 1.0 0.75 [SZ12]_ LCY-PBE 0.0 1.0 0.75 [SZ12]_ ========== ======== ======= ===================== ========= As the implementation of RSFs in gpaw is based on the finite difference exact exchange code (hybrid.py), the implementation inherits its positive and negative properties, in summary: * self-consistent calculations using RSFs * calculations can only be done for the `\Gamma` point * only non-periodic boundary conditions can be used * only RMMDIIS can be used as eigensolver Important: As one of the major benefits of the RSF is to retain the `\frac{1}{r}` asymptote of the exchange potential, one has to use large boxes if neutral or anionic systems are considered. Large boxes start at 6Å vacuum around each atom. For anionic systems "large" should be extended. Further information about the implementation and RSFs can be found in [WW18]_ and in detail in [Wu16]_. Simple usage ============ In general calculations using RSF can simply be done choosing the appropriate functional as in the following snippet: .. literalinclude:: rsf_simple.py Three main points can be seen already in this small snippet. Even if choosing the RSF is quite simple by choosing ``xc=LCY-PBE``, one has to choose RMMDIIS as eigensolver, ``eigensolver=RMMDIIS()``, and has to decrease the convergence criteria a little. Improving results ================= However, there are a few drawbacks, at first in an SCF calculation the contributions from the core electrons are also needed, which have to be calculated during the generation of the PAW datasets. Second: for the calculation of the exchange on the Cartesian grid, the (screened) Poisson equation has to be solved numerically. For a charged system, as f.e. the exchange of a state with itself, one has to neutralize the charge by subtracting a Gaussian representing the "over-charge", solve the (screened) Poisson-equation for the neutral system and add the solution for the Gaussian to the solution for the neutral system. However, if the charge to remove is "off-center", the center of the neutralizing charge should match the center of the "over-charge" preventing an artificial dipole. The latter is done by using a Poisson solver which uses the charge center for removal: ``poissonsolver=PoissonSolver(use_charge_center=True)``. The next listing shows these two steps: .. literalinclude:: rsf_setup_poisson.py The generation of PAW-datasets can also be done by ``gpaw-setup -f PBE -x --gamma=0.75 C O`` Tuning `\gamma` =============== As stated in the introduction, the optimal value for `\gamma` is under discussion. One way to find the optimal value for `\gamma` for ionization potentials is to tune `\gamma` in a way, that the negative eigenvalue of the HOMO matches the calculated IP. To use different values of `\gamma`, one has to pass the desired value of `\gamma` to the variable ``omega``. .. literalinclude:: rsf_gamma.py linear response TDDFT ===================== One of the major benefits of RSF is their ability to describe long-range charge transfer by linear response time-dependent DFT (lrTDDFT). If one uses RSF with lrTDDFT one has at least to activate the use of the Fock operator (FO) on the unoccupied states. Also the charge centered compensation of the over charge should be activated, see [Wu16]_ for details. The use of the FO on the unoccupied states is activated by the keyword ``unocc=True`` as in the following code: .. literalinclude:: rsf_lrtddft.py .. [AT08] Y. Akinaga and S. Ten-no. *Range-separation by the Yukawa potential in long-range corrected density functional theory with Gaussian-type basis functions*. Chemical Physics Letters 462.4 (10. Sep. 2008), S. 348–351. .. [SZ12] M. Seth and T. Ziegler. *Range-Separated Exchange Functionals with Slater-Type Functions*. J. Chem. Theory Comput. 8.3 (2012), S. 901–907. .. [Wu16] R. Würdemann. *Berechnung optischer Spektren und Grundzustandseigenschaften neutraler und geladener Moleküle mittels Dichtefunktionaltheorie*, PhD-Thesis. :doi:`10.6094/UNIFR/11315` .. [WW18] R. Würdemann and M. Walter. *Charge Transfer Excitations with Range Separated Functionals Using Improved Virtual Orbitals*. J. Chem. Theory Comput. 14.7 (2018), S. 3667-3676 gpaw-24.1.0/doc/documentation/xc/rpa.rst000066400000000000000000000166231454550013000201050ustar00rootroot00000000000000.. _rpa: ======================= RPA correlation energy ======================= The correlation energy within the Random Phase Approximation (RPA) can be written .. math:: E_c^{RPA} = \int_0^{\infty}\frac{d\omega}{2\pi}\text{Tr}\Big[\text{ln}\{1-\chi^0(i\omega)v\}+\chi^0(i\omega)v\Big], where `\chi^0(i\omega)` is the non-interacting (Kohn-Sham) response function evaluated at complex frequencies, `\text{Tr}` is the Trace and `\it{v}` is the Coulomb interaction. The response function and Coulomb interaction are evaluated in a plane wave basis as described in :ref:`df_tutorial` and :ref:`df_theory` and for periodic systems the Trace therefore involves a summation over `\mathbf{q}`-points, which are determined from the Brillouin zone sampling used when calculating `\chi^0(i\omega)`. The RPA correlation energy is obtained by:: from gpaw.xc.rpa import RPACorrelation rpa = RPACorrelation(calc, txt='rpa_correlation.txt', ecut=400) E_rpa = rpa.calculate() where calc is either a calculator object containing converged wavefunctions from a ground state calculation or a string reference to a .gpw file containing wavefunctions. If calc is a calculator object it should be loaded in serial since the RPA parallellization scheme is rather different from that of standard DFT calculatons. txt denotes the output file. The RPACorrelation also takes a number of optional keywords described below. The calculate() function performs the actual calculation at the cutoff energy specified by ecut (in eV). In addition the rpa calculator will calculate the correlation energy at four values for the cutoff energies up to the specified cutoff, but one can also give a list of cutoff values instead. By default, the response function is calculated with the same number of bands as the number of plane waves, but one can also specify that it should use N bands with nbands=N in the calculate() function. Parameters ========== =================== ================== =================== ================================================================== keyword type default value description =================== ================== =================== ================================================================== ``nfrequencies`` ``int`` 16 Number of Gauss-legendre points used in the integration. ``frequency_cut`` ``float`` 800. (eV) The maximum frequency is the largest frequency included in the Gauss-Legendre integration. The integral is always an approximation to the infinite integral, but the max frequency determines the distribution of frequencies. ``frequency_scale`` ``float`` 2.0 (eV) The frequency scale sets the density of frequency points near `\omega = 0`. ``frequencies`` ``numpy.ndarray`` None Specifies frequency points used to integrate the correlation integrand. Ex: numpy.linspace(0,20,201). If None, the Gauss-legendre method is used. ``weights`` ``numpy.ndarray`` None Should be used in conjunction with frequencies (e.i. when not using the Gauss-Legendre integration). For example np.array([0.5,1,1,...,1,1,0.5] gives a trapezoid integration ``skip_gamma`` ``bool`` False For metals the `\mathbf{q} = 0` point can give rise to divergent contributions and it may be faster to converge the k-point sampling if this point is excluded. ``nblocks`` ``int`` 1 **G**-vector parallelization. Default parallelization scheme is over kpoints, spin and bands. If memory becomes an issue it can be an advantage to use **G**-vector parallelization also. ``filename`` ``str`` None Restart file. If calculations with k-point sampling, the contributions from different q-points are calculated sequentially and written to filename such that these do not have to be recalculated when a calculation is restarted. =================== ================== =================== ================================================================== In addition to the usual kpoint and plane wave cutoff, the RPA correlation energy needs to be converged with respect to a plane wave cutoff in the response function (set by ecut) and the frequency integration. As it turns out, the integrand is usually rather smooth and one can perform the integration with 8-16 (special!) Gauss-Legendre frequency points, but see the tutorial :ref:`rpa_tut` for an example of converging the frequency integration. Convergence =========== A major complication with the RPA correlation energy is that it converges very slowly with the number of unoccupied bands included in the evaluation of `\chi^0(i\omega)`. However, as described in Ref. \ [#Harl1]_ the high energy part of the response function resembles the Lindhard function, which for high energies gives a correlation energy converging as .. math:: E_c^{Lindhard}(E^{\chi}_{cut}) = E_c^{\infty}+\frac{A}{(E^{\chi}_{cut})^{3/2}}, where `E^{\chi}_{cut}` is cutoff energy used in the evaluation of `\chi^0`. With an external potential, the number of unoccupied bands is an additional convergence parameter, but for reproducing the scaling of the Lindhard function, it is natural to set the total number of bands equal to the number of plane waves used. Thus, to obtain a converged RPA correlation energy one should proceed in three steps. * Perform a ground state calculation with a lot of converged unoccupied bands. * Define a list of cutoff energies - typically something like [200, 225, 250, 275, 300] (eV). For each cutoff energy perform an RPA correlation energy calculation with the number bands `n` set equal to the number of plane waves defined by that cutoff energy. * Fit the list of obtained correlation energies to `E_c^{RPA}(E) = E_c^{\infty}+A/E^{3/2}` to obtain `E_c^{\infty}=E_c^{RPA}`. Per default, the rpa module defines a list of five cutoff energies up to the specified value and performs the extrapolation at the end of the calculation. If one is not interested in the total correlation energy, but only energy differences between similar systems, it is sometimes possible to avoid the extrapolation procedure and the rpa correlation energy can be obtained at a single point by specifying a list with one element (for example ecut=[400]). .. [#Harl1] J. Harl and G. Kresse, *Phys. Rev. B* **77**, 045136 (2008) .. [#Harl2] J. Harl and L. Schimka and G. Kresse, *Phys. Rev. B* **81**, 115126 (2010) gpaw-24.1.0/doc/documentation/xc/rsf_gamma.py000066400000000000000000000021101454550013000210610ustar00rootroot00000000000000"""Calculation utilizing RSF with optimized gamma.""" from ase import Atoms from gpaw import GPAW, setup_paths from gpaw.poisson import PoissonSolver from gpaw.eigensolvers import RMMDIIS from gpaw.occupations import FermiDirac from gpaw.test import gen # IP for CO using LCY-PBE with gamma=0.81 after # dx.doi.org/10.1021/acs.jctc.8b00238 IP = 14.31 if setup_paths[0] != '.': setup_paths.insert(0, '.') for atom in ['C', 'O']: gen(atom, xcname='PBE', scalarrel=True, exx=True, yukawa_gamma=0.81) h = 0.30 co = Atoms('CO', positions=[(0, 0, 0), (0, 0, 1.15)]) co.minimal_box(5) # c = {'energy': 0.005, 'eigenstates': 1e-4} # Usable values c = {'energy': 0.1, 'eigenstates': 3, 'density': 3} # Values for test calc = GPAW(mode='fd', txt='CO.txt', xc='LCY-PBE:omega=0.81', convergence=c, eigensolver=RMMDIIS(), h=h, poissonsolver=PoissonSolver(use_charge_center=True), occupations=FermiDirac(width=0.0), spinpol=False) co.calc = calc co.get_potential_energy() (eps_homo, eps_lumo) = calc.get_homo_lumo() assert abs(eps_homo - -IP) < 0.15 gpaw-24.1.0/doc/documentation/xc/rsf_ivo_nacl.py000066400000000000000000000034571454550013000216100ustar00rootroot00000000000000"""Test calculation for unoccupied states using IVOs.""" from ase.build import molecule from gpaw.cluster import Cluster from gpaw import GPAW, KohnShamConvergenceError, FermiDirac from gpaw.eigensolvers import CG, RMMDIIS calc_parms = [ {'xc': 'PBE0:unocc=True', 'eigensolver': RMMDIIS(niter=5), 'convergence': { 'energy': 0.005, 'bands': -2, 'eigenstates': 1e-4, 'density': 1e-3}}, {'xc': 'PBE0:excitation=singlet', 'convergence': { 'energy': 0.005, 'bands': 'occupied', 'eigenstates': 1e-4, 'density': 1e-3}}] def calc_me(atoms, nbands): """Do the calculation.""" molecule_name = atoms.get_chemical_formula() atoms.set_initial_magnetic_moments([-1.0, 1.0]) fname = '.'.join([molecule_name, 'PBE-SIN']) calc = GPAW(mode='fd', h=0.25, xc='PBE', eigensolver=CG(niter=5), nbands=nbands, txt=fname + '.log', occupations=FermiDirac(0.0, fixmagmom=True), convergence={ 'energy': 0.005, 'bands': nbands, 'eigenstates': 1e-4, 'density': 1e-3}) atoms.calc = calc try: atoms.get_potential_energy() except KohnShamConvergenceError: pass if calc.scf.converged: for calcp in calc_parms: calc.set(**calcp) try: calc.calculate(system_changes=[]) except KohnShamConvergenceError: break if calc.scf.converged: calc.write(fname + '.gpw', mode='all') loa = Cluster(molecule('NaCl')) loa.minimal_box(border=6.0, h=0.25, multiple=16) loa.center() loa.translate([0.001, 0.002, 0.003]) nbands = 25 calc_me(loa, nbands) gpaw-24.1.0/doc/documentation/xc/rsf_lrtddft.py000066400000000000000000000032321454550013000214500ustar00rootroot00000000000000"""Check TDDFT ionizations with Yukawa potential.""" from ase.structure import molecule from ase.units import Hartree from gpaw import GPAW from gpaw.mpi import world from gpaw.cluster import Cluster from gpaw.occupations import FermiDirac from gpaw.eigensolvers import RMMDIIS from gpaw.lrtddft import LrTDDFT h2o = Cluster(molecule('H2O')) h2o.set_initial_magnetic_moments([2, -1, -1]) h2o.minimal_box(3.0, h=0.3) h2o_plus = Cluster(molecule('H2O')) h2o_plus.set_initial_magnetic_moments([2, -0.5, -0.5]) h2o_plus.minimal_box(3.0, h=0.3) def get_paw(): """Return calculator object.""" c = {'energy': 0.001, 'eigenstates': 0.001, 'density': 0.001} return GPAW(mode='fd', convergence=c, eigensolver=RMMDIIS(), xc='LCY-PBE:omega=0.83:unocc=True', parallel={'domain': world.size}, h=0.3, occupations=FermiDirac(width=0.0, fixmagmom=True)) calc = get_paw() calc.set(txt='H2O_LCY_PBE_083.log') calc_plus = get_paw() calc_plus.set(txt='H2O_plus_LCY_PBE_083.log', charge=1) h2o.calc = calc e_h2o = h2o.get_potential_energy() h2o_plus.calc = calc_plus e_h2o_plus = h2o_plus.get_potential_energy() e_ion = e_h2o_plus - e_h2o print(e_ion, 12.62) assert abs(e_ion - 12.62) < 0.1 lr = LrTDDFT(calc_plus, txt='LCY_TDDFT_H2O.log', jend=4) assert lr.xc.omega == 0.83 lr.write('LCY_TDDFT_H2O.ex.gz') # reading is problematic with EXX on more than one core if world.rank == 0: lr2 = LrTDDFT('LCY_TDDFT_H2O.ex.gz') lr2.diagonalize() assert lr2.xc.omega == 0.83 for i, ip_i in enumerate([14.74, 18.51]): ion_i = lr2[i].get_energy() * Hartree + e_ion print(ion_i, ip_i) assert abs(ion_i - ip_i) < 0.6 gpaw-24.1.0/doc/documentation/xc/rsf_setup_poisson.py000066400000000000000000000016331454550013000227220ustar00rootroot00000000000000"""Calculations using RSF with dedicated datasets and Poisson-solver.""" from ase import Atoms from gpaw import GPAW, setup_paths from gpaw.poisson import PoissonSolver from gpaw.eigensolvers import RMMDIIS from gpaw.occupations import FermiDirac from gpaw.test import gen if setup_paths[0] != '.': setup_paths.insert(0, '.') for atom in ['C', 'O']: gen(atom, xcname='PBE', scalarrel=True, exx=True, yukawa_gamma=0.75) h = 0.30 co = Atoms('CO', positions=[(0, 0, 0), (0, 0, 1.15)]) co.center(5) # c = {'energy': 0.005, 'eigenstates': 1e-4} # Usable values c = {'energy': 0.1, 'eigenstates': 3, 'density': 3} # Values for test calc = GPAW(mode='fd', txt='CO.txt', xc='LCY-PBE', convergence=c, eigensolver=RMMDIIS(), h=h, poissonsolver=PoissonSolver(use_charge_center=True), occupations=FermiDirac(width=0.0), spinpol=False) co.calc = calc co.get_potential_energy() gpaw-24.1.0/doc/documentation/xc/rsf_simple.py000066400000000000000000000011001454550013000212660ustar00rootroot00000000000000"""First example for using RSF.""" from ase import Atoms from gpaw import GPAW from gpaw.eigensolvers import RMMDIIS from gpaw.occupations import FermiDirac h = 0.30 co = Atoms('CO', positions=[(0, 0, 0), (0, 0, 1.15)]) co.center(5) # c = {'energy': 0.005, 'eigenstates': 1e-4} # Usable values c = {'energy': 0.1, 'eigenstates': 3, 'density': 3} # Values for test calc = GPAW(mode='fd', txt='CO.txt', xc='LCY-PBE', convergence=c, eigensolver=RMMDIIS(), h=h, occupations=FermiDirac(width=0.0), spinpol=False) co.calc = calc co.get_potential_energy() gpaw-24.1.0/doc/documentation/xc/s26_set.py000066400000000000000000000037031454550013000204230ustar00rootroot00000000000000import sys from ase import Atoms from ase.parallel import paropen from ase.data.s22 import data from ase.calculators.vdwcorrection import vdWTkatchenko09prl from gpaw import GPAW, FermiDirac from gpaw.cluster import Cluster from gpaw.analyse.hirshfeld import HirshfeldPartitioning from gpaw.analyse.vdwradii import vdWradii try: from dftd4 import D4_model except ModuleNotFoundError: pass h = 0.18 box = 4. xc = 'TS09' if len(sys.argv) > 1: xc = sys.argv[1] f = paropen('energies_' + xc + '.dat', 'w') print('# h=', h, file=f) print('# box=', box, file=f) print('# molecule E[1] E[2] E[1+2] E[1]+E[2]-E[1+2]', file=f) for molecule in data: print(molecule, end=' ', file=f) ss = Cluster(Atoms(data[molecule]['symbols'], data[molecule]['positions'])) # split the structures s1 = ss.find_connected(0) s2 = ss.find_connected(-1) assert len(ss) == len(s1) + len(s2) if xc == 'TS09' or xc == 'TPSS' or xc == 'M06-L' or xc == 'dftd4': c = GPAW(mode='fd', xc='PBE', h=h, nbands=-6, occupations=FermiDirac(width=0.1)) else: c = GPAW(mode='fd', xc=xc, h=h, nbands=-6, occupations=FermiDirac(width=0.1)) E = [] for s in [s1, s2, ss]: s.calc = c s.minimal_box(box, h=h) if xc == 'TS09': s.get_potential_energy() cc = vdWTkatchenko09prl(HirshfeldPartitioning(c), vdWradii(s.get_chemical_symbols(), 'PBE')) s.calc = cc elif xc == 'dftd4': s.get_potential_energy() cc = D4_model(xc='PBE', calc=c) s.calc = cc if xc == 'TPSS' or xc == 'M06-L': ene = s.get_potential_energy() ene += c.get_xc_difference(xc) E.append(ene) else: E.append(s.get_potential_energy()) print(E[0], E[1], E[2], end=' ', file=f) print(E[0] + E[1] - E[2], file=f) f.flush() f.close() gpaw-24.1.0/doc/documentation/xc/tpss.rst000066400000000000000000000023761454550013000203140ustar00rootroot00000000000000========== TPSS notes ========== Kinetic energy density ====================== Inside the augmentation sphere of atom `a` (`r` library. Note that these use different kernels and hence will yield slightly different results. Several vdW-DF [#vdW-DF1a]_ type XC functionals are implemented self-consistently in GPAW, and also the BEEF-vdW [#BEEF-vdW]_ density functional. The vdW-DF variants include vdW-DF [#vdW-DF1a]_, [#vdW-DF1b]_, vdW-DF2 [#vdW-DF2]_, vdW-DF-cx, [#vdW-DF-cx]_, optPBE-vdW [#opt-vdW]_, optB88-vdW [#opt-vdW]_, and C09-vdW [#C09-vdW]_. Of these, vdW-DF-cx is available only through libvdwxc. The spin-polarized generalization of the vdW-DF functionals, [#svdW-DF]_, is also only available with libvdwxc. The self-consistent implementation uses the Perez-Soler [#soler]_ FFT algorithm to evaluate the total energy and potential of the Rutgers-Chalmers nonlocal correlation, which is originally a six dimensional integral in real space. However, a non self-consistent method which directly sums up the real-space integral is also available. Doing a vdW-DF calculation ================================== The self-consistent FFT method is highly recommended over the real-space method. Often, the vdW-DF electron density will be very similar to an ordinary GGA density, so non self-consistent evaluations of a vdW-DF type total energy using the FFT method is often ok. However, vdW-DF forces obviously require a self-consistent potential. As the examples below illustrate, FFT-based vdW-DF calculations are most easily done by setting e.g. "xc='vdW-DF'" in the GPAW calculator object. However, parameters of the FFT algorithm can be assigned non-default values by importing the vdW-DF base class. For larger systems, the van der Waals functionals may be computationally expensive. Consider using :ref:`libvdwxc ` which typically increases the efficiency of the van der Waals evaluation by an order of magnitude, and parallelizes to any desired system size. Selfconsistent vdW-DF calculations ------------------------------------- >>> from ase import * >>> from gpaw import GPAW >>> vdw = 'vdW-DF' >>> atoms = ... >>> calc = GPAW(xc=vdw, ...) >>> atoms.calc = calc >>> e = atoms.get_potential_energy() Perturbative vdW-DF calculations (non self-consistent) -------------------------------------------------------- >>> from gpaw import GPAW >>> xc = 'vdW-DF' >>> calc = GPAW('input.gpw') >>> GGA_energy = calc.get_potential_energy() >>> vdWDF_diff = calc.get_xc_difference(xc) >>> vdWDF_energy = GGA_energy + vdWDF_diff In the above examples, other vdW-DF type functionals can be used by substituting 'vdW-DF2', 'vdW-DF-cx' (if GPAW is compiled with libvdwxc), 'optPBE-vdW', 'optB88-vdW', or 'C09-vdW' for 'vdW-DF'. To explicitly use the faster libvdwxc backend, use e.g. ``xc={'name': 'vdW-DF', 'backend': 'libvdwxc'}``. t libvdwxc uses a different kernel parametrization, which will slightly affect calculated values. Non-default FFT parameters for vdW-DF calculations ----------------------------------------------------- A number of parameters determine the spline interpolation of the vdW-DF nonlocal kernel. These may be assigned non-default values if the vdW-DF base class is explicitly initialized with new settings. The example below redefines the number of interpolating cubic splines (Nalpha) used in a vdW-DF2 calculation. >>> from ase import * >>> from gpaw import GPAW >>> from gpaw.xc.vdw import VDWFunctional >>> vdw = VDWFunctional('vdW-DF2', Nalpha=24) >>> atoms = ... >>> calc = GPAW(xc=vdw, ...) >>> atoms.calc = calc >>> e = atoms.get_potential_energy() Real-space method vdW-DF ------------------------------------ It is also possible to use the much slower real-space method for non self-consistent evaluations of the nonlocal correlation energy, which might make sense for (very) small systems. To use the real-space method one must import a class and set a few parameters: >>> from gpaw.xc.vdw import VDWFunctional >>> vdw = VDWFunctional('vdW-DF', fft=False, nspins=1, ncut=0.0005) where nspins=1 is for spin-paired systems and nspins=2 is used for spin-polarized calculations. A cutoff, ncut, defines how small a density must be in order not to be included in the 6D integral. BEEF-vdW functional =================== The BEEF-vdW density functional uses the vdW-DF2 nonlocal correlation energy and potential. It is implemented selfconistently in GPAW. Furthermore, the BEEF-vdW constructions allows the user to calculate an estimate of the error to be expected on the quantity calculated self-consistently with BEEF-vdW (i.e. an error estimate on relative energies, not on total energies). This estimate stems from non self-consistently applying an ensemble of XC functionals to BEEF-vdW electron densities. The ensemble error estimate is then computed from the variance of the ensemble predictions of the quantity of interest. Below is an example which calculates the BEEF-vdW binding energy of molecular H2 (E_bind), as well as an ensemble estimate of the binding energy error (dE_bind) >>> from ase import * >>> from gpaw import GPAW >>> from ase.dft.bee import BEEFEnsemble >>> xc = 'BEEF-vdW' >>> h2 = Atoms('H2',[[0.,0.,0.],[0.,0.,0.75]]) >>> h2.center(vacuum=3) >>> cell = h2.get_cell() >>> calc = GPAW(mode='fd', xc=xc) >>> h2.calc = calc >>> e_h2 = h2.get_potential_energy() >>> ens = BEEFEnsemble(calc) >>> de_h2 = ens.get_ensemble_energies() >>> del h2, calc, ens >>> h = Atoms('H') >>> h.set_cell(cell) >>> h.center() >>> calc = GPAW(mode='fd', xc=xc) >>> h.calc = calc >>> e_h = h.get_potential_energy() >>> ens = BEEFEnsemble(calc) >>> de_h = ens.get_ensemble_energies() >>> E_bind = 2*e_h - e_h2 >>> dE_bind = 2*de_h[:] - de_h2[:] >>> dE_bind = dE_bind.std() Note that the BEEFEnsemble module has recently been moved from GPAW to the ASE package. The default number of ensemble XC functionals is 2000, for which well-converged error estimates should be ensured. Therefore, "de_h2" and "de_h" in the example are both arrays of 2000 perturbations of a BEEF-vdW total energy. The syntax "ens.get_ensemble_energies(N)" changes this number to N. The calculator object input to the BEEFEnsemble class could of course stem from a restarted GPAW calculation. It is very important to calculate the ensemble statistics correctly. Computing the standard deviation of each array of total energy perturbations makes little sense, only the standard deviation of the relative energy perturbations should be used for the BEEF-vdW ensemble error estimates on a quantity. .. [#vdW-DF1a] M. Dion, H. Rydberg, E. Schroder, D.C. Langreth, and B. I. Lundqvist, Van der Waals density functional for general geometries, Physical Review Letters, 92, 246401 (2004) .. [#BEEF-vdW] J. Wellendorff, K. T. Lundgaard, A. Mogelhoj, V. Petzold, D. D. Landis, J. K. Norskov, T. Bligard, and K. W. Jacobsen, Physical Review B, 85, 235149 (2012) .. [#vdW-DF1b] M. Dion, H. Rydberg, E. Schroder, D.C. Langreth, and B. I. Lundqvist, Erratum: Van der Waals density functional for general geometries, Physical Review Letters, 95, 109902 (2005) .. [#vdW-DF2] K. Lee, D. E. Murray, L. Kong, B. I. Lundqvist, and D. C. Langreth, Higher-accuracy van der Waals density functional, Physical Review B, 82, 081101 (2010) .. [#vdW-DF-cx] K. Berland and P. Hyldgaard, Exchange functional that tests the robustness of the plasmon description of the van der Waals density functional, Physical Review B 89, 035412 (2014) .. [#opt-vdW] J. Klimes, D. R. Bowler, and A. Michaelides, Chemical accuracy for the van der Waals density functional, Journal of Physics: Condensed Matter, 22, 022201 (2010) .. [#C09-vdW] V. R. Cooper, Van der Waals density functional: An appropriate exchange functional, Physical Review B, 81, 161104(R) (2010) .. [#soler] Guillermo Román-Pérez and José M. Soler, Efficient Implementation of a van der Waals Density Functional: Application to Double-Wall Carbon Nanotubes, Physical Review Letters 103, 096102 (2009) .. [#svdW-DF] T. Thonhauser, S. Zuluaga, C. A. Arter, K. Berland, E. Schröder, and P. Hyldgaard, Spin Signature of Nonlocal Correlation Binding in Metal-Organic Frameworks, Physical Review Letters 115, 136402 (2015) gpaw-24.1.0/doc/documentation/xc/vdwcorrection.rst000066400000000000000000000025751454550013000222140ustar00rootroot00000000000000.. _vdwcorrection: ======================== van der Waals correction ======================== A correction on top of the PBE functional has been proposed by Tkatchenko and Scheffler [#TS09]_. While nearly all parameters are obtained from ab-initio calculations, the method requires nearly no additional computational cost and performs very well: ======================= === ===== ====== ======= ==== ========= . PBE TPSS vdW-DF vdW-DF2 TS09 Grimme D4 ======================= === ===== ====== ======= ==== ========= Mean absolute deviation 115 154 76 48 16 14 RMS deviation 108 128 60 42 21 14 ======================= === ===== ====== ======= ==== ========= Error in energies compared to CCSD results of the S26 test set. All values in meV. GPAW calculations were done with h=0.18 and at least 4 A vacuum. The TS09 results are in good agreement to the results obtained with the FHI-aims code [#Hanke11jcc]_. Grimme D4 is available at github_. Calculating the S26 test set ============================ As an example of the usage, here the S26 (S22 plus 4 other pairs) test set is calculated: .. literalinclude:: s26_set.py .. [#TS09] :doi:`Tkatchenko and Scheffler Phys. Rev. Lett. 102 (2009) <10.1002/jcc.21724>` .. [#Hanke11jcc] Felix Hanke J. Comp. Chem. 32 (2011) 1424 .. _github: https://github.com/dftd4/dftd4 gpaw-24.1.0/doc/documentation/xc/xc.rst000066400000000000000000000003041454550013000177220ustar00rootroot00000000000000.. _xc: ============== XC Functionals ============== .. toctree:: :maxdepth: 2 functionals exx range_separated_functionals rpa tpss vdw libvdwxc vdwcorrection qna gpaw-24.1.0/doc/ext.py000066400000000000000000000007521454550013000144540ustar00rootroot00000000000000from ase.utils.sphinx import mol_role from ase.utils.sphinx import git_role_tmpl from ase.utils.sphinx import create_png_files def git_role(role, rawtext, text, lineno, inliner, options={}, content=[]): return git_role_tmpl('https://gitlab.com/gpaw/gpaw/blob/master/', role, rawtext, text, lineno, inliner, options, content) def setup(app): app.add_role('mol', mol_role) app.add_role('git', git_role) create_png_files() gpaw-24.1.0/doc/faq.rst000066400000000000000000000131621454550013000146020ustar00rootroot00000000000000.. _faq: ========================== Frequently Asked Questions ========================== .. contents:: General ======= .. _citation: Citation: How should I cite GPAW? --------------------------------- If you find GPAW useful in your research please cite the original reference: | J. J. Mortensen, L. B. Hansen, and K. W. Jacobsen | :doi:`Real-space grid implementation of the projector augmented wave method <10.1103/PhysRevB.71.035109>` | Phys. Rev. B **71**, 035109 (2005) and the major GPAW review: | J. Enkovaara, C. Rostgaard, J. J. Mortensen et al. | :doi:`Electronic structure calculations with GPAW: a real-space implementation of the projector augmented-wave method <10.1088/0953-8984/22/25/253202>` | J. Phys.: Condens. Matter **22**, 253202 (2010) together with the ASE review (see :ref:`ase:cite`). Please also cite those of the following that are relevant to you work: * `Libxc `_ for XC-functionals other than LDA, PBE, revPBE, RPBE and PW91: *S. Lehtola, C. Steigemann, M. J. T. Oliveira and M. A. L. Marques.*, :doi:`Recent developments in LIBXC — a comprehensive library of functionals for density functional theory <10.1016/j.softx.2017.11.002>`, SoftwareX **7**, 1 (2018) * :ref:`timepropagation` or :ref:`lrtddft`: *M. Walter, H. Häkkinen, L. Lehtovaara, M. Puska, J. Enkovaara, C. Rostgaard and J. J. Mortensen*, :doi:`Time-dependent density-functional theory in the projector augmented-wave method <10.1063/1.2943138>`, J. Chem. Phys. **128**, 244101 (2008) * :ref:`Localized basis set calculations ` (LCAO): *A. H. Larsen, M. Vanin, J. J. Mortensen, K. S. Thygesen, and K. W. Jacobsen*, :doi:`Localized atomic basis set in the projector augmented wave method <10.1103/PhysRevB.80.195112>`, Phys. Rev. B **80**, 195112 (2009) * :ref:`Linear dielectric response of an extended systems `: *J. Yan, J. J. Mortensen, K. W. Jacobsen, and K. S. Thygesen*, :doi:`Linear density response function in the projector augmented wave method: Applications to solids, surfaces, and interfaces <10.1103/PhysRevB.83.245122>`, Phys. Rev. B **83**, 245122 (2011) * :ref:`Quasi-particle spectrum in the GW approximation `: *F. Hüser, T. Olsen, and K. S. Thygesen*, :doi:`Quasiparticle GW calculations for solids, molecules, and two-dimensional materials <10.1103/PhysRevB.87.235132>`, Phys. Rev. B **87**, 235132 (2013) * :ref:`continuum_solvent_model`: *A. Held and M. Walter*, :doi:`Simplified continuum solvent model with a smooth cavity based on volumetric data <10.1063/1.4900838>`, J. Chem. Phys. **141**, 174108 (2014) * :ref:`lcaotddft`: *M. Kuisma, A. Sakko, T. P. Rossi, A. H. Larsen, J. Enkovaara, L. Lehtovaara, and T. T. Rantala*, :doi:`Localized surface plasmon resonance in silver nanoparticles: Atomistic first-principles time-dependent density functional theory calculations <10.1103/PhysRevB.91.115431>`, Phys. Rev. B **91**, 115431 (2015) * :ref:`ksdecomposition` and :ref:`lcaotddft`: *T. P. Rossi, M. Kuisma, M. J. Puska, R. M. Nieminen, and P. Erhart*, :doi:`Kohn--Sham Decomposition in Real-Time Time-Dependent Density-Functional Theory: An Efficient Tool for Analyzing Plasmonic Excitations <10.1021/acs.jctc.7b00589>`, J. Chem. Theory Comput. **13**, 4779 (2017) * :ref:`sjm`: *G. Kastlunger, P. Lindgren, A.A. Peterson*, :doi:`Controlled-potential simulation of elementary electrochemical reactions: proton discharge on metal surfaces <10.1021/acs.jpcc.8b02465>`, J. Phys. Chem. C **122**, 12771 (2018) Citations of the GPAW method papers ----------------------------------- .. image:: documentation/citations.png :width: 750 (updated on 18 Mar 2021) The total number of citations above is the number of publications citing at least one of the other papers, not the sum of all citation counts. BibTex (:git:`doc/GPAW.bib`): .. literalinclude:: GPAW.bib :language: bibtex How do you pronounce GPAW? -------------------------- In English: "geepaw" with a long "a". In Danish: Først bogstavet "g", derefter "pav": "g-pav". In Finnish: supisuomalaisittain "kee-pav". In Polish: "gyeh" jak `"Gie"rek `_, "pav" jak `paw `_: "gyeh-pav". Compiling the C-code ==================== For architecture dependent settings see the :ref:`platforms and architectures` page. Compilation of the C part failed:: [~]$ python2.4 setup.py build_ext building '_gpaw' extension pgcc -fno-strict-aliasing -DNDEBUG -O2 -g -pipe -Wp,-D_FORTIFY_SOURCE=2 -fexceptions -m64 -D_GNU_SOURCE -fPIC -fPIC -I/usr/include/python2.4 -c c/localized_functions.c -o build/temp.linux-x86_64-2.4/c/localized_functions.o -Wall -std=c99 pgcc-Warning-Unknown switch: -fno-strict-aliasing PGC-S-0040-Illegal use of symbol, _Complex (/usr/include/bits/cmathcalls.h: 54) You are probably using another compiler, than was used for compiling python. Undefine the environment variables CC, CFLAGS and LDFLAGS with:: # sh/bash users: unset CC; unset CFLAGS; unset LDFLAGS # csh/tcsh users: unsetenv CC; unsetenv CFLAGS; unsetenv LDFLAGS and try again. Calculation does not converge ============================= Consult the :ref:`convergence` page. Poisson solver did not converge! ================================ If you are doing a spin-polarized calculation for an isolated molecule, then you should set the Fermi temperature to a low value. You can also try to set the number of grid points to be divisible by 8. Consult the :ref:`poisson_performance` page. gpaw-24.1.0/doc/images.py000066400000000000000000000134441454550013000151230ustar00rootroot00000000000000""" TODO: 1. we should find a good way in which to store files elsewhere than static 2. currently the files that are not generated by weekly tests are copied from srcpath. This needs to be documented. Make sure that downloaded files are copied to build dir on build This must (probably) be done *after* compilation because otherwise dirs may not exist. """ try: from urllib2 import urlopen, HTTPError except ImportError: from urllib.request import urlopen from urllib.error import HTTPError import ssl import os srcpath = 'https://wiki.fysik.dtu.dk/gpaw-files' agtspath = 'https://wiki.fysik.dtu.dk' def get(path, names, target=None, source=None): """Get files from web-server. Returns True if something new was fetched.""" if target is None: target = path if source is None: source = srcpath got_something = False # We get images etc from a web server with a self-signed certificate # That cause trouble on some machines. ctx = ssl.create_default_context() ctx.check_hostname = False ctx.verify_mode = ssl.CERT_NONE for name in names: src = os.path.join(source, path, name) dst = os.path.join(target, name) if not os.path.isfile(dst): print(dst, end=' ') try: data = urlopen(src, context=ctx).read() with open(dst, 'wb') as sink: sink.write(data) print('OK') got_something = True except HTTPError: print('HTTP Error!') return got_something literature = """ askhl_10302_report.pdf mortensen_gpaw-dev.pdf rostgaard_master.pdf askhl_master.pdf mortensen_mini2003talk.pdf marco_master.pdf mortensen_paw.pdf ss14.pdf """.split() get('doc/literature', literature, 'documentation') # Note: bz-all.png is used both in an exercise and a tutorial. Therefore # we put it in the common dir so far, rather than any of the two places get('.', ['bz-all.png'], 'static') # These files have different destinations after webpage refactor then they # used to have. Might need to keep track of this. get('exercises/wavefunctions', ['co_bonding.jpg'], target='tutorialsexercises/wavefunctions/wavefunctions') get('exercises/lrtddft', ['spectrum.png'], target='tutorialsexercises/opticalresponse/lrtddft') get('tutorials/wannier90', ['GaAs.png', 'Cu.png', 'Fe.png'], target='tutorialsexercises/wavefunctions/wannier90') get('tutorials/xas', ['h2o_xas_3.png', 'h2o_xas_4.png'], target='tutorialsexercises/opticalresponse/xas') get('tutorials/xas', ['xas_illustration.png'], target='documentation/xas') # This files is not used anymore? # get('tutorialsexercises/opticalresponse/xas', ['xas_h2o_convergence.png']) # ---- get('documentation/xc', 'g2test_pbe0.png g2test_pbe.png results.png'.split()) get('performance', 'dacapoperf.png goldwire.png gridperf.png'.split(), 'static') get('bgp', ['bgp_mapping_intranode.png', 'bgp_mapping1.png', 'bgp_mapping2.png'], 'platforms/BGP') # workshop 2013 and 2016 photos: get('workshop13', ['workshop13_01_33-1.jpg'], 'static') get('workshop16', ['gpaw2016-photo.jpg'], 'static') # files from https://wiki.fysik.dtu.dk/gpaw-files/things/ # Warning: for the moment dcdft runs are not run (files are static)! dcdft_pbe_aims_stuff = """ dcdft_aims.tight.01.16.db.csv dcdft_aims.tight.01.16.db_raw.csv dcdft_aims.tight.01.16.db_Delta.txt """.split() get('things', dcdft_pbe_aims_stuff, target='setups') # Warning: for the moment dcdft runs are not run (files are static)! dcdft_pbe_gpaw_pw_stuff = """ dcdft_pbe_gpaw_pw.csv dcdft_pbe_gpaw_pw_raw.csv dcdft_pbe_gpaw_pw_Delta.txt """.split() get('things', dcdft_pbe_gpaw_pw_stuff, target='setups') # Warning: for the moment dcdft runs are not run (files are static)! dcdft_pbe_jacapo_stuff = """ dcdft_pbe_jacapo.csv dcdft_pbe_jacapo_raw.csv dcdft_pbe_jacapo_Delta.txt """.split() get('things', dcdft_pbe_jacapo_stuff, target='setups') # Warning: for the moment dcdft runs are not run (files are static)! dcdft_pbe_abinit_fhi_stuff = """ dcdft_pbe_abinit_fhi.csv dcdft_pbe_abinit_fhi_raw.csv dcdft_pbe_abinit_fhi_Delta.txt """.split() get('things', dcdft_pbe_abinit_fhi_stuff, target='setups') g2_1_stuff = """ pbe_gpaw_nrel_ea_vs.csv pbe_gpaw_nrel_ea_vs.png pbe_gpaw_nrel_opt_ea_vs.csv pbe_gpaw_nrel_opt_distance_vs.csv pbe_nwchem_def2_qzvppd_opt_ea_vs.csv pbe_nwchem_def2_qzvppd_opt_distance_vs.csv """.split() get('things', g2_1_stuff, target='setups') get('things', ['datasets.json'], 'setups') # Carlsberg foundation figure: get('.', ['carlsberg.png']) get('static', ['NOMAD_Logo_supported_by.png']) # Summer school 2022 get('summerschool2018', ['CreateTunnelWin.png', 'JupyterRunningMac.png', 'JupyterRunningWin.png', 'Logged_in_Mac.png', 'Logged_in_Win.png', 'Moba_ssh.png', 'UseTunnelWin.png'], target='summerschools/summerschool22') get('summerschool2018', ['organometal.master.db'], target='summerschools/summerschool22/machinelearning') get('summerschool2018', ['C144Li18.png', 'C64.png', 'final.png', 'initial.png', 'Li2.png', 'lifepo4_wo_li.traj', 'NEB_init.traj'], target='summerschools/summerschool22/batteries') get('summerschool2022', ['Intro_projects_CAMD2022.pdf'], target='summerschools/summerschool22') def setup(app): # Get png and csv files and other stuff from the AGTS scripts that run # every weekend: from gpaw.doctools.agts_crontab import find_created_files for path in find_created_files(): # the files are saved by the weekly tests under agtspath/agts-files # now we are copying them back to their original run directories if path.is_file(): continue print(path, 'copied from', agtspath) get('agts-files', [path.name], str(path.parent), source=agtspath) gpaw-24.1.0/doc/index.rst000066400000000000000000000175101454550013000151430ustar00rootroot00000000000000=============================================================== GPAW: DFT and beyond within the projector-augmented wave method =============================================================== GPAW is a density-functional theory (DFT) Python_ code based on the projector-augmented wave (:ref:`PAW `) method and the atomic simulation environment (ASE_). The wave functions can be described with: * Plane-waves (:ref:`pw `) * Real-space uniform grids, multigrid methods and the finite-difference approximation (:ref:`fd `) * Atom-centered basis-functions (:ref:`lcao `) >>> # H2-molecule example: >>> import numpy as np >>> from ase import Atoms >>> from gpaw import GPAW, PW >>> h2 = Atoms('H2', [(0, 0, 0), (0, 0, 0.74)]) >>> h2.center(vacuum=2.5) >>> h2.cell Cell([5.0, 5.0, 5.74]) >>> h2.positions array([[2.5 , 2.5 , 2.5 ], [2.5 , 2.5 , 3.24]]) >>> h2.calc = GPAW(xc='PBE', ... mode=PW(300), ... txt='h2.txt') >>> energy = h2.get_potential_energy() >>> print(f'Energy: {energy:.3f} eV') Energy: -6.631 eV >>> forces = h2.get_forces() >>> forces.shape (2, 3) >>> print(f'Force: {forces[0, 2]:.3f} eV/Å') Force: -0.639 eV/Å .. image:: https://badge.fury.io/py/gpaw.svg :target: https://pypi.org/project/gpaw/ .. _Python: http://www.python.org .. _ASE: https://wiki.fysik.dtu.dk/ase .. _news: News ==== * :ref:`GPAW version 24.1.0 ` released (Jan 4, 2024). * :ref:`GPAW version 23.9.1 ` released (Sep 15, 2023). * :ref:`GPAW version 23.9.0 ` released (Sep 13, 2023). * Monthly *response code* two-day sprints will start on the last Monday of the month and continue the next day (Aug 28. 2023). * Monthly *general maintenance* one-day sprints will start on the Tuesday in the week after the monthly response sprints (this will typically be the first Tuesday of the month, but it can also be the second Tuesday) (Aug 28. 2023). * :ref:`GPAW version 23.6.1 ` released (Jul 5, 2023). * :ref:`GPAW version 23.6.0 ` released (Jun 9, 2023). * :ref:`GPAW version 22.8.0 ` released (Aug 18, 2022). * :ref:`GPAW version 22.1.0 ` released (Jan 12, 2022). * :ref:`GPAW version 21.6.0 ` released (Jun 24, 2021). * Slides from the "GPAW 2021 Users and developers meeting" are now available `here `__ (Jun 2, 2021). * Upcoming workshop: The `GPAW 2021 Users and developers meeting `__ will be held online on June 1--4, 2021. See also announcement on `Psi-k `__ (Mar 1, 2021). * :ref:`GPAW version 21.1.0 ` released (Jan 18, 2021). * :ref:`GPAW version 20.10.0 ` released (Oct 19, 2020). * :ref:`GPAW version 20.1.0 ` released (Jan 30, 2020). * :ref:`GPAW version 19.8.1 ` released (Aug 8, 2019). * :ref:`GPAW version 19.8.0 ` released (Aug 1, 2019). * :ref:`GPAW version 1.5.2 ` released (May 8, 2019). * :ref:`GPAW version 1.5.1 ` released (Jan 23, 2019). * :ref:`GPAW version 1.5.0 ` released (Jan 11, 2019). * :ref:`GPAW version 1.4.0 ` released (May 29, 2018). * :ref:`GPAW version 1.3.0 ` released (Oct 2, 2017). * Supported by NOMAD_ (Mar 1, 2017) .. image:: static/NOMAD_Logo_supported_by.png :width: 100 px :target: NOMAD_ * Code-sprints moved to first Tuesday of every month (Feb 17, 2017) * :ref:`GPAW version 1.2 ` released (Feb 7, 2017) * It has been decided to have monthly GPAW/ASE code-sprints at DTU in Lyngby. The sprints will be the first Wednesday of every month starting December 7, 2016 (Nov 11, 2016) * Slides from the talks at :ref:`workshop16` are now available (Sep 5, 2016) * :ref:`GPAW version 1.1 ` released (Jun 22, 2016) * :ref:`GPAW version 1.0 ` released (Mar 18, 2016) * Web-page now use the `Read the Docs Sphinx Theme `_ (Mar 18, 2016) * :ref:`GPAW version 0.11 ` released (Jul 22, 2015) * :ref:`GPAW version 0.10 ` released (Apr 8, 2014) * GPAW is part of the `PRACE Unified European Application Benchmark Suite`_ (Oct 17, 2013) * May 21-23, 2013: :ref:`GPAW workshop ` at the Technical University of Denmark (Feb 8, 2013) * Prof. Häkkinen has received `18 million CPU hour grant`_ for GPAW based research project (Nov 20, 2012) * A new :ref:`setups` bundle released (Oct 26, 2012) * :ref:`GPAW version 0.9 ` released (March 7, 2012) * :ref:`GPAW version 0.8 ` released (May 25, 2011) * GPAW is part of benchmark suite for `CSC's supercomputer procurement`_ (Apr 19, 2011) * New features: Calculation of the linear :ref:`dielectric response ` of an extended system (RPA and ALDA kernels) and calculation of :ref:`rpa` (Mar 18, 2011) * Massively parallel GPAW calculations presented at `PyCon 2011`_. See William Scullin's talk here: `Python for High Performance Computing`_ (Mar 12, 2011) * :ref:`GPAW version 0.7.2 ` released (Aug 13, 2010) * :ref:`GPAW version 0.7 ` released (Apr 23, 2010) * GPAW is `\Psi_k` `scientific highlight of the month`_ (Apr 3, 2010) * A third GPAW code sprint was successfully hosted at CAMD (Oct 20, 2009) * :ref:`GPAW version 0.6 ` released (Oct 9, 2009) * `QuantumWise `_ adds GPAW-support to `Virtual NanoLab`_ (Sep 8, 2009) * Join the new IRC channel ``#gpaw`` on FreeNode (Jul 15, 2009) * :ref:`GPAW version 0.5 ` released (Apr 1, 2009) * A new :ref:`setups` bundle released (Mar 27, 2009) * A second GPAW code sprint was successfully hosted at CAMD (Mar 20, 2009) * :ref:`GPAW version 0.4 ` released (Nov 13, 2008) * The :ref:`tutorialsexercises` are finally ready for use in the `CAMd summer school 2008`_ (Aug 15, 2008) * This site is now powered by Sphinx_ (Jul 31, 2008) * GPAW is now based on numpy_ instead of of Numeric (Jan 22, 2008) * :ref:`GPAW version 0.3 ` released (Dec 19, 2007) * CSC_ is organizing a `GPAW course`_: "Electronic structure calculations with GPAW" (Dec 11, 2007) * The `code sprint 2007`_ was successfully finished (Nov 16, 2007) * The source code is now in the hands of SVN and Trac (Oct 22, 2007) * A GPAW Sprint will be held on November 16 in Lyngby (Oct 18, 2007) * Work on atomic basis-sets begun (Sep 25, 2007) .. _numpy: http://numpy.scipy.org/ .. _CSC: http://www.csc.fi .. _GPAW course: http://www.csc.fi/english/csc/courses/archive/gpaw-2008-01 .. _Sphinx: http://www.sphinx-doc.org .. _CAMd summer school 2008: http://www.camd.dtu.dk/English/Events/CAMD_Summer_School_2008/Programme.aspx .. _code sprint 2007: http://www.dtu.dk/Nyheder/Nyt_fra_Institutterne.aspx?guid={38B92D63-FB09-4DFA-A074-504146A2D678} .. _Virtual NanoLab: http://www.quantumwise.com/products/12-products/28-atk-se-200906#GPAW .. _scientific highlight of the month: http://www.psi-k.org/newsletters/News_98/Highlight_98.pdf .. _pycon 2011: http://us.pycon.org/2011/schedule/presentations/226/ .. _Python for High Performance Computing: http://pycon.blip.tv/file/4881240/ .. _CSC's supercomputer procurement: http://www.csc.fi/english/pages/hpc2011 .. _18 million CPU hour grant: http://www.prace-ri.eu/PRACE-5thRegular-Call .. _PRACE Unified European Application Benchmark Suite: http://www.prace-ri.eu/ueabs .. _NOMAD: http://repository.nomad-coe.eu/ .. toctree:: algorithms install documentation/documentation tutorialsexercises/tutorialsexercises setups/setups releasenotes contact faq devel/devel summerschools/summerschools workshops/workshops bugs gpaw-24.1.0/doc/install.rst000066400000000000000000000202531454550013000155000ustar00rootroot00000000000000.. _installation: ============ Installation ============ .. toctree:: :hidden: troubleshooting platforms/platforms GPAW relies on the Python library *atomic simulation environment* (ASE_), so you need to :ref:`install ASE ` first. GPAW itself is written mostly in the Python programming language, but there are also some C-code used for: * performance critical parts * allowing Python to talk to external numerical libraries (BLAS_, LibXC_, MPI_ and ScaLAPACK_) So, in order to make GPAW work, you need to compile some C-code. You will need to build a dynamically linked library (``_gpaw.so``) that the standard Python interpreter can load. There are several ways to install GPAW: * Directly from PyPI_: ``python3 -m pip install gpaw``. More details here: :ref:`installation using pip`. * Alternatively, you can :ref:`download ` the source code and then install with a ``python3 -m pip install .``. * There may be a package for your Linux distribution that you can use (named ``gpaw``). * If you are a developer that need to change the code you should look at this description: :ref:`development workflow`. .. seealso:: * :ref:`siteconfig` * Using :ref:`homebrew` on MacOSX * Using :ref:`anaconda` * This `docker image`_ * Tips and tricks for installation on many :ref:`platforms and architectures` * :ref:`troubleshooting` * In case of trouble: :ref:`Our mail list and #gpaw chat channel ` Requirements ============ * Python_ 3.8 or later * ASE_ * NumPy_ * SciPy_ * LibXC_ * A C compiler * A BLAS_ library Optional (highly recommended for increased performance): * A MPI_ library (required for parallel calculations) * BLACS_ and ScaLAPACK_ libraries * FFTW_ See :ref:`releasenotes` for version requirements. .. _Python: http://www.python.org/ .. _NumPy: http://docs.scipy.org/doc/numpy/reference/ .. _SciPy: http://docs.scipy.org/doc/scipy/reference/ .. _LibXC: http://www.tddft.org/programs/libxc/ .. _MPI: http://www.mpi-forum.org/ .. _BLAS: http://www.netlib.org/blas/ .. _BLACS: http://www.netlib.org/blacs/ .. _ScaLAPACK: http://www.netlib.org/scalapack/ .. _PyPI: https://pypi.org/project/gpaw .. _PIP: https://pip.pypa.io/en/stable/ .. _ASE: https://wiki.fysik.dtu.dk/ase .. _FFTW: http://www.fftw.org/ .. _docker image: https://hub.docker.com/r/marcindulak/gpaw-openmpi .. _installation using pip: Installation using ``pip`` ========================== .. highlight:: bash The simplest way to install GPAW is using pip_ and the GPAW package from the Python package index (PyPI_):: $ python3 -m pip install gpaw This will compile and install GPAW (both ``_gpaw.so`` and all the Python files) in your ``~/.local/lib/pythonX.Y/site-packages`` folder where Python can automatically find it. Pip will also place the command line tool :command:`gpaw` in the ``~/.local/bin`` folder, so make sure you have that in your ``$PATH`` environment variable. Check that you have installed everything in the correct places:: $ gpaw info Install PAW datasets ==================== Install the datasets into the folder ```` using this command:: $ gpaw install-data See :ref:`installation of paw datasets` for more details. Now you should be ready to use GPAW, but before you start, please run the tests as described below. .. index:: test .. _run a tests: Run a simple test calculation ============================= Make sure that everything works by running a simple test:: $ gpaw test If this worked then try also a parallel calculation:: $ gpaw -P 4 test Please report errors to the ``gpaw-users`` mailing list so that we can fix them (see :ref:`mail list`). .. seealso:: :ref:`testing` for how to run the complete test suite. .. _download: Getting the source code ======================= You can get the source from a tar-file or from Git: :Tar-file: You can get the source as a tar-file for the latest stable release (gpaw-24.1.0.tar.gz_) or the latest development snapshot (``_). Unpack and make a soft link:: $ tar -xf gpaw-24.1.0.tar.gz $ ln -s gpaw-24.1.0 gpaw Here is a `list of tarballs `__. :Git clone: Alternatively, you can get the source for the latest stable release from https://gitlab.com/gpaw/gpaw like this:: $ git clone -b 24.1.0 https://gitlab.com/gpaw/gpaw.git or if you want the development version:: $ git clone https://gitlab.com/gpaw/gpaw.git .. note:: We also have Git tags for older stable versions of GPAW. See the :ref:`releasenotes` for which tags are available. Also the dates of older releases can be found there. .. _gpaw-24.1.0.tar.gz: https://pypi.org/packages/source/g/gpaw/gpaw-24.1.0.tar.gz .. _siteconfig: Customizing installation ======================== The install script may need a little help finding you libraries (BLAS, FFTW, ScaLapack, libxc, libvdwxc, ...). This can be done by adding a ``siteconfig.py`` file in one of these three places: 1) the file that ``$GPAW_CONFIG`` points at 2) ``/siteconfig.py`` 3) ``~/.gpaw/siteconfig.py`` The first one found will be used. As an example, ``siteconfig.py`` might contain the following lines:: libraries = ['myblas'] library_dirs = ['path_to_myblas'] Now, GPAW would be built with "``-Lpath_to_myblas -lmyblas``" linker flags. Look at the file :git:`siteconfig_example.py` for more possible options. :ref:`platforms and architectures` provides examples of ``siteconfig.py`` files for different platforms. .. _parallel installation: Parallel installation ===================== By default, setup looks if :program:`mpicc` is available, and if setup finds one, a parallel version is build. If the setup does not find :program:`mpicc`, a user can specify one in the ``siteconfig.py`` file. Additionally a user may want to enable ScaLAPACK, setting in ``siteconfig.py``:: scalapack = True and, in this case, provide BLACS/ScaLAPACK ``libraries`` and ``library_dirs`` as described in :ref:`siteconfig`. Part of the code can be parallelized also with OpenMP. OpenMP threading can be enabled by setting in ``siteconfig.py``:: extra_compile_args += ['-fopenmp'] extra_link_args += ['-fopenmp'] Note that a multithreaded BLAS library should be used together with OpenMP parallelization. Also, MPI library needs to support the MPI_THREAD_MULTIPLE thread support level. Instructions for running parallel calculations can be found in the :ref:`user manual `. FFTW ==== In order to use FFTW_ instead of :mod:`numpy.fft`, add something like this to your ``siteconfig.py``:: fftw = True libraries += ['fftw3'] .. _libxc installation: Libxc Installation ================== If you OS does not have a LibXC package you can use then you can download and install LibXC as described `here `_. A few extra tips: * Libxc installation requires both a C compiler and a Fortran compiler. * We've tried Intel and gnu compilers and haven't noticed much of a performance difference. Use whatever is easiest. * Libxc shared libraries can be built with the "--enable-shared" option to configure. This might be slightly preferred because it reduces memory footprints for executables. * Typically when building GPAW one has to modify ``siteconfig.py`` in a manner similar to the following:: library_dirs += ['/my/path/to/libxc/5.2.0/install/lib'] include_dirs += ['/my/path/to/libxc/5.2.0/install/include'] or if you don't want to modify your ``siteconfig.py``, you can add these lines to your ``.bashrc``:: export C_INCLUDE_PATH=/my/path/to/libxc/5.2.0/install/include export LIBRARY_PATH=/my/path/to/libxc/5.2.0/install/lib export LD_LIBRARY_PATH=/my/path/to/libxc/5.2.0/install/lib Example:: wget http://www.tddft.org/programs/octopus/down.php?file=libxc/5.2.0/libxc-5.2.0.tar.gz -O libxc-5.2.0.tar.gz tar -xf libxc-5.2.0.tar.gz cd libxc-5.2.0 ./configure --enable-shared --disable-fortran --prefix=$HOME/libxc-5.2.0 make make install # add these to your .bashrc: XC=~/libxc-5.2.0 export C_INCLUDE_PATH=$XC/include export LIBRARY_PATH=$XC/lib export LD_LIBRARY_PATH=$XC/lib gpaw-24.1.0/doc/platforms/000077500000000000000000000000001454550013000153055ustar00rootroot00000000000000gpaw-24.1.0/doc/platforms/AIX/000077500000000000000000000000001454550013000157265ustar00rootroot00000000000000gpaw-24.1.0/doc/platforms/AIX/ibmsc.rst000066400000000000000000000007411454550013000175570ustar00rootroot00000000000000.. _ibmcsc: ============ ibmsc.csc.fi ============ Here you find information about the system ``_. Debug like this:: p690m ~/gpaw/demo> dbx /p/bin/python2.3 Type 'help' for help. reading symbolic information ...warning: no source compiled with -g (dbx) run Python 2.3.4 (#4, May 28 2004, 15:30:35) [C] on aix5 Type "help", "copyright", "credits" or "license" for more information. >>> import grr gpaw-24.1.0/doc/platforms/AIX/jump.rst000066400000000000000000000012731454550013000174360ustar00rootroot00000000000000.. _jump: ================== jump.fz-juelich.de ================== Here you find information about the system ``_. The only way we found to compile numpy is using python2.3 and numpy-1.0.4. The next version numpy-1.1.0 did not work unfortunately. In addition the usage of the generic IBM lapack/blas in numpy does not work, hence one has to use site.cfg:: : diff site.cfg site.cfg.example 58,60c58,60 < [DEFAULT] < library_dirs = < include_dirs = --- > #[DEFAULT] > #library_dirs = /usr/local/lib > #include_dirs = /usr/local/include With his change numpy compiles and the installation of ASE and gpaw does not cause problems. gpaw-24.1.0/doc/platforms/AIX/seaborg.rst000066400000000000000000000015661454550013000201120ustar00rootroot00000000000000.. _seaborg: ================= seaborg.nersc.gov ================= Here you find information about the system ``_. We need to use the mpi-enabled compiler ``mpcc`` and we should link to LAPACK before ESSL. Make sure LAPACK is added:: $ module add lapack and use this customize.py:: from os import environ mpicompiler = 'mpcc' libraries = ['f'] extra_link_args += [environ['LAPACK'], '-lessl'] The Numeric Python extension is not installed on NERSC, so we should install it. Get the Numeric-24.2 and do this:: $ wget http://downloads.sourceforge.net/numpy/Numeric-24.2.tar.gz $ gunzip -c Numeric-24.2.tar.gz | tar xf - $ cd Numeric-24.2 $ python setup.py install --home=$HOME and put the :file:`$HOME/lib/python/Numeric` directory in your :envvar:`PYTHONPATH`. Now we are ready to :ref:`compile GPAW ` gpaw-24.1.0/doc/platforms/BGP/000077500000000000000000000000001454550013000157155ustar00rootroot00000000000000gpaw-24.1.0/doc/platforms/BGP/Makefile.ramdisk000066400000000000000000000010431454550013000210040ustar00rootroot00000000000000PROFILE=gpaw BGPHOME=unknown BGPBASE=/bgsys/drivers/ppcfloor BGP_LINUX_OS_PATH=/bgsys/linux/1.4.120091014 .PHONY: all clean install ramdisk all: @echo "Make Targets" @echo " install - copy ramdisk into kernel location" @echo " ramdisk - builds the ramdisk" @echo " clean - removes targets" install: cp ramdisk /bgsys/argonne-utils/profiles/$(PROFILE)/ramdisk chgrp bgpadmin /bgsys/argonne-utils/profiles/$(PROFILE)/ramdisk ramdisk: $(BGPBASE)/build-ramdisk --runos $(BGP_LINUX_OS_PATH) --addtree fs clean: rm -f ramdisk gpaw-24.1.0/doc/platforms/BGP/intrepid-ramdisk.sh000077500000000000000000000031141454550013000215210ustar00rootroot00000000000000type=Au_bulk6x6x6 cwd=`pwd` acct=Gpaw queue=prod time=45 nodes=8192 mode=vn # uncomment below for mapfile # mapfile=BGMAP_8x8x4x16 # 1024 nodes # mapping=$mapfile mapping=ZYXT input=${type}.py scratch=/intrepid-fs0/users/${USER}/persistent install=/gpfs/home/${USER} setups=/soft/apps/gpaw-setups-0.6.6300 gpawversion=7800 sharelibs=/lib:/gpaw/V1R4M2 job=${type}_${nodes}_${mode}_${mapping}_r${gpawversion} taulibs=/gpaw/tau-2.19.2 bin=gpaw-python kernel=gpaw rm -rf $scratch/$job mkdir $scratch/$job cp $input $scratch/$job cp $mapfile $scratch/$job # cp $pos $scratch/$job cd $scratch/$job # Many of the values for the DMCF variables below are defaults # pristine qsub --kernel $kernel -A $acct -n $nodes -t $time -q $queue --mode $mode --env DCMF_EAGER=8388608:BG_MAPPING=$mapping:MPIRUN_ENABLE_TTY_REPORTING=0:OMP_NUM_THREADS=1:GPAW_SETUP_PATH=$setups:PYTHONHOME=/gpaw:LD_LIBRARY_PATH=$sharelibs ${install}/gpaw-r${gpawversion}/build/bin.linux-ppc64-2.6/${bin} ${type}.py --domain-decomposition=8,8,4 --state-parallelization=16 # TAU manual instrumentation # qsub --kernel $kernel -A $acct -n $nodes -t $time -q $queue --mode $mode --env TAU_VERBOSE=1:TAU_CALLPATH=0:TAU_CALLPATH_DEPTH=10:TAU_COMM_MATRIX=0:TAU_TRACK_MESSAGE=0:TAU_THROTTLE=0:TAU_COMPENSATE=1:TAU_METRICS=BGPTIMERS:DCMF_EAGER=8388608:BG_MAPPING=$mapping:MPIRUN_ENABLE_TTY_REPORTING=0:OMP_NUM_THREADS=1:GPAW_SETUP_PATH=$setups:PYTHONHOME=/gpaw:PYTHONPATH=${taulibs}:LD_LIBRARY_PATH=$sharelibs:${taulibs} ${install}/gpaw-r${gpawversion}/build/bin.linux-ppc64-2.6/${bin} ${type}.py --domain-decomposition=8,8,16 --state-parallelization=32 gpaw-24.1.0/doc/platforms/BGP/jugene.rst000066400000000000000000000056721454550013000177360ustar00rootroot00000000000000.. _jugene: ==================== jugene.fz-juelich.de ==================== Here you find information about the system ``_. Numpy needs to be build with powerpc-bgp-linux-gfortran instead of gfortran compiler, so in order to build numpy specify the environment variable F90:: $ export F90=/bgsys/drivers/ppcfloor/gnu-linux/powerpc-bgp-linux/bin/gfortran After that, numpy can be installed to $HOME/python as:: $ ldpath=/bgsys/drivers/ppcfloor/gnu-linux/lib $ p=/bgsys/drivers/ppcfloor/gnu-linux/bin/python $ LD_LIBRARY_PATH="$ldpath" $p setup.py install --home=$HOME/python In order to build GPAW, use the following customize.py:: scalapack = True libraries = [ 'scalapack', 'blacsCinit', 'blacsF77init', 'blacs', 'lapack', 'esslbg', 'xl', 'xlopt', 'xlf90_r', 'xlfmath', 'pthread', 'xlomp_ser', ] library_dirs = [ '/bgsys/local/lib/', '/opt/ibmcmp/xlf/bg/11.1/lib', '/opt/ibmcmp/xlsmp/bg/1.7/lib', '/bgsys/drivers/ppcfloor/gnu-linux/lib' ] extra_compile_args += ['-std=c99'] define_macros += [('GPAW_AIX', '1')] define_macros += [('GPAW_BGP', '1')] define_macros += [('GPAW_NO_UNDERSCORE_BLAS', '1')] define_macros += [('GPAW_NO_UNDERSCORE_LAPACK', '1')] define_macros += [('GPAW_NO_UNDERSCORE_CBLACS', '1')] define_macros += [('GPAW_NO_UNDERSCORE_CSCALAPACK', '1')] define_macros += [('GPAW_NO_UNDERSCORE_BLACS', '1')] define_macros += [('GPAW_NO_UNDERSCORE_SCALAPACK', '1')] Because of missing ``popen3`` function you need to remove all the contents of the :file:`gpaw/version.py` file after ``version = '0.4'``. The same holds for :file:`ase/version.py` in the ase installation! Suggestions how to skip the ``popen3`` testing in :file:`gpaw/version.py` on BGP are welcome! Here is an example of batch job script:: #!/bin/bash # @ job_name = hello # @ output = $(job_name).o$(jobid) # @ error = $(job_name).e$(jobid) # @ wall_clock_limit = 00:12:00 # @ notification = never # @ notify_user = my_email@csc.fi # @ job_type = bluegene # @ bg_size = 1 # @ queue home=/homea/prace/prace025 prog=${home}/python/bin/gpaw-python args="${home}/test-gpaw/CH4.py" mpirun=/bgsys/drivers/ppcfloor/bin/mpirun ldpath="/bgsys/local/lib/ibmcmp/lib/bglib" pythonpath="${home}/python/lib/python/" gpaw_setups="${home}/gpaw-setups-0.4.2039" runargs="-np 4" runargs="$runargs -cwd $PWD" runargs="$runargs -mode SMP" runargs="$runargs -env LD_LIBRARY_PATH=$ldpath -env PYTHONPATH=$pythonpath -envGPAW_SETUP_PATH=$gpaw_setups" echo "Hello. This is `hostname` at `date` `pwd`" echo "$mpirun $runargs -exe $prog $args" /usr/bin/time $mpirun $runargs -exe $prog -args $args echo "Program completed at `date` with exit code $?." The batch jobs are submitted with ``llsubmit``:: $ llsubmit job_file gpaw-24.1.0/doc/platforms/BGP/performance.rst000066400000000000000000000241231454550013000207520ustar00rootroot00000000000000.. _bgp_performance: ============================== Maximizing performance on BG/P ============================== Begin by reading up on the GPAW parallelization strategies (:ref:`parallel_runs`) and the `BG/P architecture `_. In particular, :ref:`band_parallelization` will be needed to scale your calculation to large number of cores. The BG/P systems at the `Argonne Leadership Computing Facility `_ uses Cobalt for scheduling and it will be referred to frequently below. Other schedulers should have similar functionality. There are four key aspects that require careful considerations: 1) Choosing a parallelization strategy. #) Selecting the correct partition size (number of nodes) and mapping. #) Choosing an appropriate value of ``buffer_size``. The use of ``nblocks`` is no longer recommended. #) Setting the appropriate DCMF environmental variables. In the sections that follow, we aim to cultivate an understanding of how to choose these parameters. Parallelization Strategy ==================================== Parallelization options are specified at the ``gpaw-python`` command line. Domain decomposition with ``--domain-decomposition=Nx,Ny,Nz`` and band parallelization with ``--state-parallelization=B``. Additionally, the ``parallel`` keyword is also available. The smallest calculation that can benefit from band/state parallelization is *nbands = 1000*. If you are using fewer bands, you are possibly *not* in need of a leadership class computing facility. Note that only :ref:`RMM-DIIS` eigensolver is compatible with band parallelization. Furthermore, the RMM-DIIS eigensolver requires some unoccupied bands in order to converge properly. Recommend range is:: spinpol=False nbands = valence electrons/2*[1.0 - 1.2] spinpol=True nbands = max(up valence electrons, down valence electrons)*[1.0 - 1.2] It was empirically determined that you need to have *nbands/B > 256* for reasonable performance. It is also possible use smaller groups, *nbands/B < 256*, but this may require large domains. It is *required* that *nbands/B* be integer-divisible. The best values for B =2, 4, 8, 16, 32, 64, and 128. Obviously, the number of total cores must equal:: Nx*Ny*Nz*B The parallelization strategy will require careful consideration of the partition size and mapping. And, obviously, also memory! Partition size and Mapping ======================================== The BG/P partition dimensions (Px, Py, Pz, T) for Surveyor and Intrepid at the Argonne Leadership Computing Facility are `available here `_, where T represents the number of MPI tasks per node (not whether a torus network is available). The number of cores per node which execute MPI tasks is specified by the Cobalt flag:: --mode={smp,dual,vn} Hence, the possible values of T are:: T = 1 for smp T = 2 for dual T = 4 for vn Note that there are 4 cores per node and 2 GB per node on BG/P. As GPAW is presently an MPI-only code, vn mode is preferred since all cores will perform computational work. It is essential to think of the BG/P network as a 4-dimensional object with 3 spatial dimensions and a T-dimension. For optimum scalability it would seem necessary to maximize the locality of two distinct communications patterns arising in the canonical O(N^3) DFT algorithm: 1) H*Psi products #) parallel matrix multiplies. However, it turns out that this is *not* necessary. The mesh network can handle small messages rather efficiently such that the time to send a small message to a nearest-neighbor node versus a node half-way across the machine is comparable. Hence, it is only necessary to optimize the mapping for the communication arising from the parallel matrix multiply which is a simple 1D systolic communication pattern. Here we show the examples of different mappings on a 512-node BG/P partition. Band groups are colored coded. *(Left)* Inefficient mapping for four groups of bands (B = 4). This mapping leads to contention on network links in the z-direction. *(Right)* Efficient mapping for eight groups of bands (B=8). Correct mapping maximizes scalability and single-core peak performance. |mapping1| |mapping2| .. |mapping1| image:: bgp_mapping1.png :width: 40 % .. |mapping2| image:: bgp_mapping2.png :width: 40 % For the mapping on the *(Right)* above image, there are two communication patterns (and hence mappings) that are worth distinguishing. |intranode| .. |intranode| image:: bgp_mapping_intranode.png :width: 60 % The boxes in these images represent a node and the numbers inside the box repesent the distinct cores in the node (four for BG/P). Intuitively, the communication pattern of the *(Left)* image should lead to less network contention than the *(Right)*. However, this is not the case due to lack of optimization in the intranode implementation of MPI. The performance of these communications patterns is presently identical, though this may change in future version of the BG/P implementation of MPI. Mapping is accomplished by the Cobalt flag:: --env=BG_MAPPING= where ** can be one of the canonical BG/P mappings (permutations of XYZT with T at the beginning or end) or a mapfile. Lastly, it is important to note that GPAW orders the MPI tasks as follows:: Z, Y, X, bands, kpoints, and spins. A list of mappings is provided below. Note that this list is not exhaustive. The contraint on the mapping comes from the value of *B*; only *one* of these constraints must be true: 1) The last dimension in the canonical BG/P mapping equals the value of *B*. #) For canonical BG/P mappings which end in T, the product of T and the last cartesian dimension in the mapping equals *B*. #) If a canonical mapping is not immediately suitable, the keyword ``order`` in the ``parallel`` dictionary can be used to rectify the problem. See the documentation on :ref:`parallel_runs`. B = 2 -------- Simply set the following variables in your submission script:: mode = dual mapping = any canonical mapping ending with a T the constraint on the domain-decomposition is simply:: Nx*Ny*Nz = Px*Py*Pz B = 4 -------- Similar to the *B=2* case, but with:: mode = vn B = 8, 16, 32, 64, or 128 -------------------------- This is left as an exercises to the user. Setting the value of buffer_size ================================ Use ``buffer_size=2048``. Refer to :ref:`manual_parallel` for more information about the ``buffer_size`` keyword. Larger values require increasing the default value of DCMF_RECFIFO. For those interested in more technical details, continue reading this section. The computation of the hamiltonian and overlap matrix elements, as well as the computation of the new wavefunctions, is accomplished by a hand-coded parallel matrix-multiply ``hs_operators.py`` employing a 1D systolic ring algorithm. Under the *original* implementation of the matrix-multiply algorithm, it was necessary to select appropriate values for the number of blocks ``nblocks``:: from gpaw.hs_operators import MatrixOperator MatrixOperator.nblocks = K MatrixOperator.async = True (default) where the ``B`` groups of bands are further divided into ``K`` blocks. It was also required that *nbands/B/K* be integer-divisible. The value of ``K`` should be chosen so that 2 MB of wavefunctions are interchanged. The special cases of B=2, 4 as described above permit the use blocks of wavefunctions larger than 2 MB to be interchanged since there is only intranode communication. The size of the wavefunction being interchanged is given by:: gpts = (Gx, Gy, Gz) size of wavefunction block in MB = (Gx/Nx, Gy/Ny, Gz/Nz)*(nbands/B/K)*8/1024^2 The constraints on the value of nbands are: 1) ``nbands/B`` must be integer divisible #) ``nbands/B/K`` must be integer divisible. #) size of wavefunction block ~ 2 MB #) ``nbands`` must be sufficient largely so that the RMM-DIIS eigensolver converges The second constraint above is no longer applicable as of SVN version 7520. Important DCMF environment variables =============================================== `DCMF `_ is one of the lower layers in the BG/P implementation of MPI software stack. To understand th DCMF environment variables in greater detail, please read the appropriate sections of the IBM System Blue Gene Solution: `Blue Gene/P Application Development `_ DCMF_EAGER and DCMF_RECFIFO ----------------------------------- Communication and computation is overlapped to the extent allowed by the hardware by using non-blocking sends (Isend) and receives (Irecv). It will be also be necessary to pass to Cobalt:: --env=DCMF_EAGER=8388608 which corresponds to the larger size message that can be overlapped (8 MB). Note that the number is specified in bytes and not megabytes. This is larger than the target 2 MB size, but we keep this for historical reasons since it is possible to use larger blocks of wavefunctions in the case of *smp* or *dual* mode. This is also equal to the default size of the DCMF_RECFIFO. If the following warning is obtained,:: A DMA unit reception FIFO is full. Automatic recovery occurs for this event, but performance might be improved by increasing the FIFO size the default value of the DCMF_RECFIFO should be increased:: --env=DCMF_RECFIFO= DCMF_REUSE_STORAGE ------------------------- If you receive allocation error on MPI_Allreduce, please add the following environment variables:: --env=DCMF_REDUCE_REUSE_STORAGE=N:DCMF_ALLREDUCE_REUSE_STORAGE=N:DCMF_REDUCE=RECT It is very likely that your calculation is low on memory. Simply try using more nodes. DCMF_ALLTOALL_PREMALLOC ------------------------------- HDF5 uses MPI_Alltoall which can consume a significant amount of memory. The default behavior for MPI collectives on Blue Gene/P is to not release memory between calls due to peformance reasons. We recommend setting this environment variable to overide the default behavior:: --env DCMF_ALLTOALL_PREMALLOC=N: gpaw-24.1.0/doc/platforms/BGQ/000077500000000000000000000000001454550013000157165ustar00rootroot00000000000000gpaw-24.1.0/doc/platforms/BGQ/bgq_xlc.py000077500000000000000000000030721454550013000177140ustar00rootroot00000000000000#!/usr/bin/env python """bgp_xlc.py is a wrapper for the BGP xlc compiler, converting/removing incompatible gcc args. """ import sys from subprocess import call from glob import glob args2change = {"-fno-strict-aliasing":"", "-fmessage-length=0":"", "-Wall":"", "-std=c99":"-qlanglvl=extc99", "-fPIC":"", "-g":"", "-D_FORTIFY_SOURCE=2":"", "-DNDEBUG":"", "-UNDEBUG":"", "-pthread":"", "-shared":"-qmkshrobj", "-Xlinker":"", "-export-dynamic":"", "-Wstrict-prototypes":"", "-dynamic":"", "-O3":"", "-O2":"", "-O1":"", "-fwrapv":""} fragile_files = ["test.c"] qhot_files = ["c/blas.c", "c/utilities.c","c/lfc.c","c/localized_functions.c"] non_c99files = glob('c/libxc/src/*.c') cmd = "" opt = 1 for arg in sys.argv[1:]: cmd += " " t = arg.strip() if t in fragile_files: opt = 2 if t in non_c99files: opt = 3 if t in qhot_files: opt = 4 if t in args2change: cmd += args2change[t] else: cmd += arg flags_list = {1: "-g -O3 -qlanglvl=extc99 -qflag=w:w", 2: "-g -O3 -qlanglvl=extc99 -qflag=w:w", 3: "-g -O3 -qflag=w:w ", 4: "-g -O3 -qnohot -qsimd=noauto -qlanglvl=extc99 -qflag=w:w", } flags = flags_list[opt] cmd = f"mpixlc_r {flags} {cmd}" print(f"\nexecmd: {cmd}\n") call(cmd, shell=True) gpaw-24.1.0/doc/platforms/BGQ/bgq_xlc_linker.py000077500000000000000000000030531454550013000212570ustar00rootroot00000000000000#!/usr/bin/env python """bgp_xlc.py is a wrapper for the BGP xlc compiler, converting/removing incompatible gcc args. """ import sys from subprocess import call from glob import glob args2change = {"-fno-strict-aliasing":"", "-fmessage-length=0":"", "-Wall":"", "-std=c99":"-qlanglvl=extc99", "-fPIC":"", "-g":"", "-D_FORTIFY_SOURCE=2":"", "-DNDEBUG":"", "-UNDEBUG":"", "-pthread":"", "-shared":"-qmkshrobj", "-Xlinker":"", "-export-dynamic":"", "-Wstrict-prototypes":"", "-dynamic":"", "-O3":"", "-O2":"", "-O1":"", "-fwrapv":""} fragile_files = ["test.c"] qhot_files = ["c/blas.c", "c/utilities.c","c/lfc.c","c/localized_functions.c"] non_c99files = glob('c/libxc/src/*.c') cmd = "" opt = 1 for arg in sys.argv[1:]: cmd += " " t = arg.strip() if t in fragile_files: opt = 2 if t in non_c99files: opt = 3 if t in qhot_files: opt = 4 if t in args2change: cmd += args2change[t] else: cmd += arg flags_list = {1: "-g -O3 -qlanglvl=extc99 -qflag=w:w", 2: "-g -O3 -qstrict -qlanglvl=extc99 -qflag=w:w", 3: "-g -O3 -qflag=w:w", 4: "-g -O3 -qlanglvl=extc99 -qflag=w:w", } flags = flags_list[opt] cmd = f"bgxlc_r {flags} {cmd}" print(f"\nexecmd: {cmd}\n") call(cmd, shell=True) gpaw-24.1.0/doc/platforms/BGQ/build_gpaw.sh000077500000000000000000000011201454550013000203640ustar00rootroot00000000000000#!/bin/bash export ibmcmp_base=/soft/compilers/ibmcmp-nov2012 python=/soft/apps/python/scalable-python-2.6.7-cnk-gcc/bin/python #python=/bgsys/tools/Python-2.6/bin/python echo $python ${python} setup.py clean # manually remove _hdf5.so due to bug in build system rm -f build/lib.linux-ppc64-2.6/_hdf5.so # rm -f build/temp.linux-ppc64-2.6/*.o # compile _gpaw.so # ${python} setup.py build_ext --customize=customize_mira_xlc_serial.py >& build_xlc_serial.log # compile gpaw-python ${python} setup.py build_ext --ignore-numpy --customize=customize_mira_xlc_mpi.py 2>&1 | tee build_xlc_mpi.log gpaw-24.1.0/doc/platforms/BGQ/build_numpy.sh000077500000000000000000000010021454550013000205750ustar00rootroot00000000000000#!/bin/sh export CC=/bgsys/drivers/ppcfloor/gnu-linux/bin/powerpc64-bgq-linux-gcc export BASECFLAGS="-fno-strict-aliasing" export LD_LIBRARY_PATH=/bgsys/drivers/ppcfloor/gnu-linux/lib64 export PYTHONHOME=/soft/apps/python/scalable-python-2.6.7-cnk-gcc export PYTHON=${PYTHONHOME}/bin/python # root=/soft/apps/python/scalable-python-2.6.7-cnk-gcc buildir=build rm -rf ${builddir} # ${PYTHON} setup.py install --root="$root" 2>&1 | tee numpy-1.3.0.log.mira ${PYTHON} setup.py install 2>&1 | tee numpy-1.3.0.log.miragpaw-24.1.0/doc/platforms/BGQ/build_scalable_python.sh000077500000000000000000000011011454550013000225740ustar00rootroot00000000000000#!/bin/bash export CC=/bgsys/drivers/ppcfloor/gnu-linux/bin/powerpc64-bgq-linux-gcc export CXX=/bgsys/drivers/ppcfloor/gnu-linux/bin/powerpc64-bgq-linux-g++ export MPICC=mpicc export CCSHARED=-fPIC export LINKFORSHARED='-Xlinker -export-dynamic -dynamic' export MPI_LDFLAGS_SHARED='-Xlinker -export-dynamic -dynamic' ./configure --prefix=/soft/apps/python/scalable-python-2.6.7-cnk-gcc --enable-mpi --disable-ipv6 2>&1 | tee mira-conf make 2>&1 | tee mira-make make mpi 2>&1 | tee mira-make-mpi make install 2>&1 | tee mira-inst make install-mpi 2>&1 | tee mira-inst-mpi gpaw-24.1.0/doc/platforms/BGQ/config_mira.py000066400000000000000000000431671454550013000205600ustar00rootroot00000000000000# Copyright (C) 2006 CSC-Scientific Computing Ltd. # Please see the accompanying LICENSE file for further information. import os import sys import re import distutils.util from packaging.version import Version from distutils.sysconfig import get_config_var, get_config_vars from glob import glob from os.path import join from stat import ST_MTIME def check_packages(packages, msg, include_ase, import_numpy): """Check the python version and required extra packages If ASE is not installed, the `packages` list is extended with the ASE modules if they are found.""" if sys.version_info < (2, 3, 0, 'final', 0): raise SystemExit('Python 2.3.1 or later is required!') if import_numpy: try: import numpy except ImportError: raise SystemExit('numpy is not installed!') else: msg += ['* numpy is not installed.', ' "include_dirs" in your customize.py must point to "numpy/core/include".'] if not include_ase: if import_numpy: try: import ase except ImportError: import_ase = True else: import_ase = False else: import_ase = False if include_ase or import_ase: # Find ASE directories: # include_ase works in case: # cd gpaw # top-level gpaw source directory # tar zxf ~/python-ase-3.1.0.846.tar.gz # ln -s python-ase-3.1.0.846/ase . ase_root = 'ase' if include_ase: assert os.path.isdir(ase_root), ase_root+': No such file or directory' ase = [] for root, dirs, files in os.walk(ase_root): if 'CVS' in dirs: dirs.remove('CVS') if '.svn' in dirs: dirs.remove('.svn') if '__init__.py' in files: ase.append(root.replace('/', '.')) if len(ase) == 0: msg += ['* ASE is not installed! You may be able to install', " gpaw, but you can't use it without ASE!"] else: packages += ase def find_file(arg, dir, files): #looks if the first element of the list arg is contained in the list files # and if so, appends dir to to arg. To be used with the os.path.walk if arg[0] in files: arg.append(dir) def get_system_config(define_macros, undef_macros, include_dirs, libraries, library_dirs, extra_link_args, extra_compile_args, runtime_library_dirs, extra_objects, msg, import_numpy): undef_macros += ['NDEBUG'] if import_numpy: import numpy include_dirs += [numpy.get_include()] include_dirs += ['c/libxc'] machine = os.uname()[4] if machine == 'sun4u': # _ # |_ | ||\ | # _||_|| \| # extra_compile_args += ['-Kpic', '-fast'] # Suppress warning from -fast (-xarch=native): f = open('cc-test.c', 'w') f.write('int main(){}\n') f.close() stderr = os.popen3('cc cc-test.c -fast')[2].read() arch = re.findall('-xarch=(\S+)', stderr) os.remove('cc-test.c') if len(arch) > 0: extra_compile_args += [f'-xarch={arch[-1]}'] # We need the -Bstatic before the -lsunperf and -lfsu: # forum.java.sun.com/thread.jspa?threadID=5072537&messageID=9265782 extra_link_args += ['-Bstatic', '-lsunperf', '-lfsu', '-Bdynamic'] cc_version = os.popen3('cc -V')[2].readline().split()[3] if Version(cc_version) > '5.6': libraries.append('mtsk') else: extra_link_args.append('-lmtsk') #define_macros.append(('NO_C99_COMPLEX', '1')) msg += ['* Using SUN high performance library'] elif sys.platform in ['aix5', 'aix6']: # # o|_ _ _ # ||_)| | | # extra_compile_args += ['-qlanglvl=stdc99'] # setting memory limit is necessary on aix5 if sys.platform == 'aix5': extra_link_args += ['-bmaxdata:0x80000000', '-bmaxstack:0x80000000'] libraries += ['f', 'lapack', 'essl'] define_macros.append(('GPAW_AIX', '1')) elif machine == 'x86_64': # _ # \/|_||_ |_ |_| # /\|_||_| _ |_| | # extra_compile_args += ['-Wall', '-std=c99'] # Look for ACML libraries: acml = glob('/opt/acml*/g*64/lib') if len(acml) > 0: library_dirs += [acml[-1]] libraries += ['acml'] if acml[-1].find('gfortran') != -1: libraries.append('gfortran') if acml[-1].find('gnu') != -1: libraries.append('g2c') extra_link_args += ['-Wl,-rpath=' + acml[-1]] msg += ['* Using ACML library'] else: atlas = False for dir in ['/usr/lib', '/usr/local/lib']: if glob(join(dir, 'libatlas.a')) != []: atlas = True break if atlas: libraries += ['lapack', 'atlas', 'blas'] library_dirs += [dir] msg += ['* Using ATLAS library'] else: libraries += ['blas', 'lapack'] msg += ['* Using standard lapack'] elif machine =='ia64': # _ _ # |_ | o # _||_|| # extra_compile_args += ['-Wall', '-std=c99'] libraries += ['mkl','mkl_lapack64'] elif machine == 'i686': # _ # o|_ |_||_ # ||_||_||_| # extra_compile_args += ['-Wall', '-std=c99'] if 'MKL_ROOT' in os.environ: mklbasedir = [os.environ['MKL_ROOT']] else: mklbasedir = glob('/opt/intel/mkl*') libs = ['libmkl_ia32.a'] if mklbasedir != []: os.path.walk(mklbasedir[0],find_file, libs) libs.pop(0) if libs != []: libs.sort() libraries += ['mkl_lapack', 'mkl_ia32', 'guide', 'pthread', 'mkl']#, 'mkl_def'] library_dirs += libs msg += [f'* Using MKL library: {library_dirs[-1]}'] #extra_link_args += ['-Wl,-rpath=' + library_dirs[-1]] else: atlas = False for dir in ['/usr/lib', '/usr/local/lib']: if glob(join(dir, 'libatlas.a')) != []: atlas = True break if atlas: libraries += ['lapack', 'atlas', 'blas'] library_dirs += [dir] msg += ['* Using ATLAS library'] else: libraries += ['blas', 'lapack'] msg += ['* Using standard lapack'] # add libg2c if available g2c=False for dir in ['/usr/lib', '/usr/local/lib']: if glob(join(dir, 'libg2c.so')) != []: g2c=True break if glob(join(dir, 'libg2c.a')) != []: g2c=True break if g2c: libraries += ['g2c'] elif sys.platform == 'darwin': extra_compile_args += ['-Wall', '-std=c99'] include_dirs += ['/usr/include/malloc'] if glob('/System/Library/Frameworks/vecLib.framework') != []: extra_link_args += ['-framework vecLib'] msg += ['* Using vecLib'] else: libraries += ['blas', 'lapack'] msg += ['* Using standard lapack'] return msg def get_parallel_config(mpi_libraries,mpi_library_dirs,mpi_include_dirs, mpi_runtime_library_dirs,mpi_define_macros): globals = {} exec(open('gpaw/mpi/config.py').read(), globals) mpi = globals['get_mpi_implementation']() if mpi == '': mpicompiler = None elif mpi == 'sun': mpi_include_dirs += ['/opt/SUNWhpc/include'] mpi_libraries += ['mpi'] mpi_library_dirs += ['/opt/SUNWhpc/lib'] mpi_runtime_library_dirs += ['/opt/SUNWhpc/lib'] mpicompiler = get_config_var('CC') elif mpi == 'poe': mpicompiler = 'mpcc_r' else: #Try to use mpicc mpicompiler = 'mpicc' return mpicompiler def get_scalapack_config(define_macros): # check ScaLapack settings define_macros.append(('GPAW_WITH_SL', '1')) def mtime(path, name, mtimes): """Return modification time. The modification time of a source file is returned. If one of its dependencies is newer, the mtime of that file is returned. This function fails if two include files with the same name are present in different directories.""" include = re.compile('^#\s*include "(\S+)"', re.MULTILINE) if name in mtimes: return mtimes[name] t = os.stat(os.path.join(path, name))[ST_MTIME] for name2 in include.findall(open(os.path.join(path, name)).read()): path2, name22 = os.path.split(name2) if name22 != name: t = max(t, mtime(os.path.join(path, path2), name22, mtimes)) mtimes[name] = t return t def check_dependencies(sources): # Distutils does not do deep dependencies correctly. We take care of # that here so that "python setup.py build_ext" always does the right # thing! mtimes = {} # modification times # Remove object files if any dependencies have changed: plat = distutils.util.get_platform() + '-' + sys.version[0:3] remove = False for source in sources: path, name = os.path.split(source) t = mtime(path + '/', name, mtimes) o = f'build/temp.{plat}/{source[:-2]}.o' # object file if os.path.exists(o) and t > os.stat(o)[ST_MTIME]: print('removing', o) os.remove(o) remove = True so = f'build/lib.{plat}/_gpaw.so' if os.path.exists(so) and remove: # Remove shared object C-extension: # print 'removing', so os.remove(so) def test_configuration(): raise NotImplementedError def write_configuration(define_macros, include_dirs, libraries, library_dirs, extra_link_args, extra_compile_args, runtime_library_dirs, extra_objects, mpicompiler, mpi_libraries, mpi_library_dirs, mpi_include_dirs, mpi_runtime_library_dirs, mpi_define_macros): # Write the compilation configuration into a file try: out = open('configuration.log', 'w') except IOError as x: print(x) return print("Current configuration", file=out) print("libraries", libraries, file=out) print("library_dirs", library_dirs, file=out) print("include_dirs", include_dirs, file=out) print("define_macros", define_macros, file=out) print("extra_link_args", extra_link_args, file=out) print("extra_compile_args", extra_compile_args, file=out) print("runtime_library_dirs", runtime_library_dirs, file=out) print("extra_objects", extra_objects, file=out) if mpicompiler is not None: print(file=out) print("Parallel configuration", file=out) print("mpicompiler", mpicompiler, file=out) print("mpi_libraries", mpi_libraries, file=out) print("mpi_library_dirs", mpi_library_dirs, file=out) print("mpi_include_dirs", mpi_include_dirs, file=out) print("mpi_define_macros", mpi_define_macros, file=out) print("mpi_runtime_library_dirs", mpi_runtime_library_dirs, file=out) out.close() def build_interpreter(define_macros, include_dirs, libraries, library_dirs, extra_link_args, extra_compile_args, runtime_library_dirs, extra_objects, mpicompiler, mpilinker, mpi_libraries, mpi_library_dirs, mpi_include_dirs, mpi_runtime_library_dirs, mpi_define_macros): #Build custom interpreter which is used for parallel calculations cfgDict = get_config_vars() plat = distutils.util.get_platform() + '-' + sys.version[0:3] cfiles = glob('c/[a-zA-Z_]*.c') + ['c/bmgs/bmgs.c'] cfiles += glob('c/libxc/src/*.c') if ('HDF5', 1) in define_macros: cfiles += glob('h5py/c/*.c') cfiles += glob('h5py/c/lzf/*.c') cfiles2remove = ['c/libxc/src/test.c', 'c/libxc/src/xc_f.c', 'c/libxc/src/work_gga_x.c', 'c/libxc/src/work_lda.c' ] for c2r in glob('c/libxc/src/funcs_*.c'): cfiles2remove.append(c2r) for c2r in cfiles2remove: cfiles.remove(c2r) sources = ['c/bc.c', 'c/localized_functions.c', 'c/mpi.c', 'c/_gpaw.c', 'c/operators.c', 'c/transformers.c', 'c/compiled_WITH_SL.c', 'c/blacs.c', 'c/utilities.c'] objects = ' '.join(['build/temp.%s/' % plat + x[:-1] + 'o' for x in cfiles]) if not os.path.isdir(f'build/bin.{plat}/'): os.makedirs(f'build/bin.{plat}/') exefile = f'build/bin.{plat}/' + '/gpaw-python' # if you want static linked MPI libraries, uncomment the line below libraries += mpi_libraries library_dirs += mpi_library_dirs define_macros += mpi_define_macros include_dirs += mpi_include_dirs runtime_library_dirs += mpi_runtime_library_dirs define_macros.append(('PARALLEL', '1')) define_macros.append(('GPAW_INTERPRETER', '1')) macros = ' '.join(['-D%s=%s' % x for x in define_macros if x[0].strip()]) include_dirs.append(cfgDict['INCLUDEPY']) include_dirs.append(cfgDict['CONFINCLUDEPY']) includes = ' '.join(['-I' + incdir for incdir in include_dirs]) library_dirs.append(cfgDict['LIBPL']) lib_dirs = ' '.join(['-L' + lib for lib in library_dirs]) libs = ' '.join(['-l' + lib for lib in libraries if lib.strip()]) # BlueGene/P can statically link everything except # python, runtime and pthread library libs += ' -Wl,-dy' libpl = cfgDict['LIBPL'] if glob(libpl + '/libpython*mpi*'): libs += f" -lpython{cfgDict['VERSION']}_mpi" else: libs += f" -lpython{cfgDict['VERSION']}" # libs += ' -lpython%s ' % cfgDict['VERSION'] # if you want dynamic linked MPI libraries, uncomment the line below # only really needed for TAU profiling # libs += ' '.join(['-l' + lib for lib in mpi_libraries]) libs += ' -lrt -lpthread' libs = ' '.join([libs, cfgDict['LIBS'], cfgDict['LIBM']]) #Hack taken from distutils to determine option for runtime_libary_dirs if sys.platform[:6] == 'darwin': # MacOSX's linker doesn't understand the -R flag at all runtime_lib_option = '-L' elif sys.platform[:5] == 'hp-ux': runtime_lib_option = '+s -L' elif os.popen('mpicc --showme 2> /dev/null', 'r').read()[:3] == 'gcc': runtime_lib_option = '-Wl,-R' elif os.popen('mpicc -show 2> /dev/null', 'r').read()[:3] == 'gcc': runtime_lib_option = '-Wl,-R' else: runtime_lib_option = '-R' runtime_libs = ' '.join([ runtime_lib_option + lib for lib in runtime_library_dirs]) extra_link_args.append(cfgDict['LDFLAGS']) if sys.platform in ['aix5', 'aix6']: extra_link_args.append(cfgDict['LINKFORSHARED'].replace('Modules', cfgDict['LIBPL'])) elif sys.platform == 'darwin': pass else: extra_link_args.append(cfgDict['LINKFORSHARED']) if ('IO_WRAPPERS', 1) in define_macros: extra_link_args += ['-Wl,-wrap,fread', '-Wl,-wrap,_IO_getc', '-Wl,-wrap,getc_unlocked', '-Wl,-wrap,fgets', '-Wl,-wrap,ungetc', '-Wl,-wrap,feof', '-Wl,-wrap,ferror', '-Wl,-wrap,fflush', '-Wl,-wrap,fseek', '-Wl,-wrap,rewind', # '-Wl,-wrap,fileno', '-Wl,-wrap,flockfile', '-Wl,-wrap,funlockfile', '-Wl,-wrap,clearerr', '-Wl,-wrap,fgetpos', '-Wl,-wrap,fsetpos', '-Wl,-wrap,setbuf', '-Wl,-wrap,setvbuf', '-Wl,-wrap,ftell', '-Wl,-wrap,fstat', '-Wl,-wrap,fstat64', '-Wl,-wrap,fgetc', # '-Wl,-wrap,fputc', # '-Wl,-wrap,fputs', # '-Wl,-wrap,fwrite', # '-Wl,-wrap,_IO_putc', '-Wl,-wrap,fopen', '-Wl,-wrap,fopen64', '-Wl,-wrap,fclose', ] # Compile the parallel sources for src in sources: obj = f'build/temp.{plat}/' + src[:-1] + 'o' cmd = ('%s %s %s %s -o %s -c %s ' ) % \ (mpicompiler, macros, ' '.join(extra_compile_args), includes, obj, src) print(cmd) if '--dry-run' not in sys.argv: error=os.system(cmd) if error != 0: msg = ['* compiling FAILED! Only serial version of code will work.'] break # Link the custom interpreter cmd = ('%s -o %s %s %s %s %s %s %s' ) % \ (mpilinker, exefile, objects, ' '.join(extra_objects), lib_dirs, libs, runtime_libs, ' '.join(extra_link_args)) msg = ['* Building a custom interpreter'] print(cmd) if '--dry-run' not in sys.argv: error=os.system(cmd) if error != 0: msg += ['* linking FAILED! Only serial version of code will work.'] return error, msg gpaw-24.1.0/doc/platforms/BGQ/customize_mira_xlc_mpi.py000066400000000000000000000062221454550013000230370ustar00rootroot00000000000000define_macros += [('GPAW_NO_UNDERSCORE_BLAS', '1')] define_macros += [('GPAW_NO_UNDERSCORE_LAPACK', '1')] define_macros += [('GPAW_NO_UNDERSCORE_CBLACS', '1')] define_macros += [('GPAW_NO_UNDERSCORE_CSCALAPACK', '1')] define_macros += [('GPAW_NO_UNDERSCORE_BLACS', '1')] define_macros += [('GPAW_NO_UNDERSCORE_SCALAPACK', '1')] define_macros += [('GPAW_ASYNC', 1)] # define_macros += [('GPAW_MR3',1)] # requires developmental ScaLAPACK # uncomment two lines below for FLOP rate measurement # define_macros += [('GPAW_HPM',1)] define_macros += [('GPAW_PERFORMANCE_REPORT',1)] # define_macros += [('GPAW_MPI_DEBUG', 1)] # debugging # define_macros += [('GPAW_OMP',1)] # not really working scalapack = True hdf5 = True # If you are using threading, you probably # need to change the following library: # xlomp_ser -> xlsmp # # DO NOT INTERCHANGE THE ORDER OF LAPACK # & ESSL, LAPACK SHOULD BE LINKED FIRST. # # Goto appears to be much faster for general # DGEMM operations, particularly those with: # alpha != 1.0 and beta != 0.0 # # Goto is hand-tuned assembly, it will most # likely always be faster than ESSL-4.x. # NAR: Goto appears to cause core dumps for # some problems, use at your own risk. # Disabling the stackground seems to make # the problem go away, but this is not # recommended. # --env BG_STACKGUARDENABLE=0 #multi threaded libraries = [ 'scalapack', 'lapack', 'esslsmpbg', 'xlf90_r', 'xlopt', 'xl', 'xlfmath', 'xlsmp', ] #single threaded # libraries = [ # 'scalapack', # 'lapack', # 'esslbg', # 'xlf90_r', # 'xlopt', # 'xl', # 'xlfmath', # 'xlomp_ser', # ] import os ibmcmp_base = os.environ['ibmcmp_base'] python_base = '/soft/apps/python/scalable-python-2.6.7-cnk-gcc' library_dirs = [ '/soft/libraries/alcf/current/xl/LAPACK/lib', '/soft/libraries/alcf/current/xl/SCALAPACK/lib', '/soft/libraries/essl/5.1.1-0/lib64', f'{ibmcmp_base}/xlf/bg/14.1/bglib64', f'{ibmcmp_base}/xlsmp/bg/3.1/bglib64', # plain vanilla Python # '/bgsys/tools/Python-2.6/lib64', # scalable Python 2.6.7 f'{python_base}', '/soft/libraries/unsupported/hdf5-1.8.8/lib/', ] # plain vanilla Python # include_dirs += [ # '/soft/apps/python/python-2.6.6-cnk-gcc/bgsys/tools/Python-2.6/lib64/python2.6/site-packages/numpy/core/include' # ] # scalable Python 2.6.7 include_dirs += [ f'{python_base}/lib/python2.6/site-packages/numpy/core/include', '/soft/libraries/unsupported/hdf5-1.8.8/include/' ] mpi_libraries = [ # 'mpihpm_smp', 'hdf5', 'mpich', 'opa', 'mpl', 'pami', 'SPI', 'SPI_cnk', 'stdc++', # 'bgpm', ] mpi_library_dirs = [ '/bgsys/drivers/ppcfloor/comm/xl.legacy/lib', '/bgsys/drivers/ppcfloor/comm/sys/lib', '/bgsys/drivers/ppcfloor/spi/lib', '/soft/perftools/hpctw', '/soft/perftools/bgpm/lib', ] extra_link_args = ['-Wl,-export-dynamic'] compiler = "./bgq_xlc.py" mpicompiler = "./bgq_xlc.py" mpilinker = "./bgq_xlc_linker.py" gpaw-24.1.0/doc/platforms/BGQ/mira.rst000066400000000000000000000041301454550013000173760ustar00rootroot00000000000000.. _mira: ======================= Blue Gene/Q - Mira ======================= The build instructions here are representative of the Blue Gene/Q system at the Argonne Leadership Computing Facility. Users will need to adapt these instructions for specific installations at their respective sites. In addition to the standard libraries needed on other platforms, Scalable Python ``_ is required for running effectively on Blue Gene/Q. A build script for Scalable Python is provided below: .. literalinclude:: build_scalable_python.sh NumPy 1.3.0 or later is recommended. Disutils does not work well on PPC architectures and a compiler must be explicitly specified. A build script for NumPy 1.3.0 is provided below: .. literalinclude:: build_numpy.sh GPAW will build with the XL legacy MPI wrapper script. It is recommeded that you statically link as many libraries as possible into GPAW to avoid potential performance bottlencks in loading shared libraries at scale. This can be done with some modification of the stock GPAW config.py file :download:`config_mira.py`. Lastly, we recommend that GPAW is compiled with both ScaLAPACK (v. 2.0.2 or later) as well as HDF5 support. Here is an example customization file: .. literalinclude:: customize_mira_xlc_mpi.py which requires a number of wrappers for the XL compilers (:download:`bgq_xlc.py` and :download:`bgq_xlc_linker.py`). A build script for GPAW is provided for convenience :download:`build_gpaw.sh`. After all Python modules are installed, they should be byte compiled before running GPAW. This can be accomplished by going to the top level directory for each Python library (Scalable Python, NumPy, ASE, and GPAW) an executing the command:: ${python} -m compileall . where ``${python}`` is the explicit path to the Scalable Python interpreter. Some users have noticed that the Python interpreter may unnecessarily re-compile Python modules. This is problematic at large (>10,000) number of MPI tasks and we recommend that users set the environment variable:: PYTHONDONTBYTECOMPILE=1 in the job submissions script. gpaw-24.1.0/doc/platforms/BSD/000077500000000000000000000000001454550013000157155ustar00rootroot00000000000000gpaw-24.1.0/doc/platforms/BSD/FreeBSD.rst000066400000000000000000000006441454550013000176650ustar00rootroot00000000000000.. _FreeBSD: ======= FreeBSD ======= Here you find information about the system ``_. To build gpaw add to the ``gpaw/customize.py``:: compiler='gcc44' extra_compile_args += ['-Wall', '-std=c99'] library_dirs += ['/usr/local/lib'] libraries += ['blas', 'lapack', 'gfortran'] If you want to build a parallel version install mpich2 (net/mpich2). Openmpi does currently not work with gpaw! gpaw-24.1.0/doc/platforms/Bull/000077500000000000000000000000001454550013000162035ustar00rootroot00000000000000gpaw-24.1.0/doc/platforms/Bull/build_scalable_python_curie.sh000066400000000000000000000010671454550013000242600ustar00rootroot00000000000000#!/bin/bash export CC=gcc export CXX=g++ export MPICC=mpicc # export XTPE_LINK_TYPE=dynamic export LINKFORSHARED='-Wl,-export-dynamic -dynamic' export MPI_LINKFORSHARED='-Wl,-export-dynamic -dynamic' install_prefix=/my_prefix/scalable-gnu # Make zlib built-in to the interpreter sed -i -e 's/^#zlib.*/zlib zlibmodule.c -lz/' Modules/Setup.dist ./configure --prefix=$install_prefix --enable-mpi --disable-ipv6 2>&1 | tee log-conf make 2>&1 | tee log-make make install 2>&1 | tee log-inst make mpi 2>&1 | tee log-make-mpi make install-mpi 2>&1 | tee log-inst-mpi gpaw-24.1.0/doc/platforms/Bull/curie.rst000066400000000000000000000047201454550013000200470ustar00rootroot00000000000000.. _curie: =========================================================== Curie (BullX cluster, Intel Nehalem, Infiniband QDR, MKL) =========================================================== .. note:: These instructions are up-to-date as of October 2014 Here you find information about the system http://www-hpc.cea.fr/en/complexe/tgcc-curie.htm For large calculations, it is suggested that one utilizes the Scalable Python interpreter. Small to medium size calculations are fine with standard Python, for these one can use system's default Python (which contains NumPy), thus one can skip directly to LibXC/GPAW instructions. Scalable Python =============== Standard Python interpreter has serious bottleneck in large scale parallel calculations when many MPI tasks perform the same I/O operations during the import statetements. Scalable Python ``_ reduces the import time by having only single MPI task to perform import related I/O operations and using then MPI for broadcasting the data. First, download the source code and switch to GNU environment:: git clone git@gitorious.org:scalable-python/scalable-python.git module switch intel gnu export OMPI_MPICC=gcc Use the following build script (change the installation prefix to a proper one): .. literalinclude:: build_scalable_python_curie.sh Add then ``install_prefix/bin`` to your PATH, and download and install NumPy:: export PATH=install_prefix/bin:$PATH wget http://sourceforge.net/projects/numpy/files/NumPy/1.8.1/numpy-1.8.1.tar.gz tar xzf numpy-1.8.1.tar.gz cd numpy-1.8.1 python setup.py install LibXC ===== Download libxc:: wget http://www.tddft.org/programs/octopus/down.php?file=libxc/libxc-2.2.0.tar.gz Configure and make:: ./configure --prefix=install_prefix CFLAGS=-fPIC make make install GPAW ==== Intel compiler gives a bit better performance for GPAW, so one should switch back to Intel environment after Scalable Python/LibXC installation:: module switch gnu intel unset OMPI_MPICC Furthermore, in order to utilize HDF5 load the module:: module load hdf5/1.8.12_parallel Use the compiler wrapper file :git:`~doc/platforms/Linux/icc.py` .. literalinclude:: icc.py and the following configuration file :download:`customize_curie_icc.py`. .. literalinclude:: customize_curie_icc.py GPAW can now build in a normal way (make sure Scalable Python is in the PATH):: python setup.py install --home=path_to_install_prefix gpaw-24.1.0/doc/platforms/Bull/curie_gpu.rst000066400000000000000000000021531454550013000207200ustar00rootroot00000000000000.. _curie_gpu: ==================== curie.ccc.cea.fr GPU ==================== Here you find information about the system http://www-hpc.cea.fr/en/complexe/tgcc-curie.htm. **Warning**: May 14 2013: rpa-gpu-expt branch fails to compile due to cublasZdgmm missing in cuda-4.2. The system runs Bull Linux. The installation assumes *bash* shell: - packages are installed under ``~/CAMd``:: mkdir ~/CAMd cd ~/CAMd - module files are located under ``~/CAMd/modulefiles``:: mkdir ~/CAMd/modulefiles - download the :download:`customize_curie_gpu.py` file: .. literalinclude:: customize_curie_gpu.py - download packages with :download:`download_curie_gpu.sh`: .. literalinclude:: download_curie_gpu.sh - install packages, deploy modules and test with :download:`install_curie_gpu.sh`: .. literalinclude:: install_curie_gpu.sh **Note** that every time you wish to install a new version of a package, and deploy new module file, better keep the old module file. - submit a test job:: ccc_msub msub_curie_gpu.sh using the following :file:`msub_curie_gpu.sh`: .. literalinclude:: msub_curie_gpu.sh gpaw-24.1.0/doc/platforms/Bull/customize_curie_gcc.py000066400000000000000000000006631454550013000226070ustar00rootroot00000000000000import os scalapack = True hdf5 = True mpicompiler = 'mpicc' libraries = [] # MKL flags mkl_flags = os.environ['MKL_SCA_LIBS'] extra_link_args = [mkl_flags] # HDF5 flags include_dirs += [os.environ['HDF5_INC_DIR']] libraries += ['hdf5'] library_dirs += [os.environ['HDF5_LIB_DIR']] define_macros += [('GPAW_NO_UNDERSCORE_CBLACS', '1')] define_macros += [('GPAW_NO_UNDERSCORE_CSCALAPACK', '1')] define_macros += [("GPAW_ASYNC",1)] gpaw-24.1.0/doc/platforms/Bull/customize_curie_gpu.py000066400000000000000000000034641454550013000226500ustar00rootroot00000000000000import os scalapack = True hdf5 = False # ld: /usr/local/phdf5-1.8.5/lib/libhdf5.a(H5.o): relocation R_X86_64_32 against `.rodata.str1.4' can not be used when making a shared object; recompile with -fPIC compiler = 'icc' mpi='/opt/mpi/bullxmpi/1.1.16.5' mkl='/usr/local/Intel_compilers/c/composer_xe_2011_sp1.7.256/mkl/lib/intel64' intel='/usr/local/Intel_compilers/c/composer_xe_2011_sp1.7.256/compiler/lib/intel64' hdf='/usr/local/phdf5-1.8.5' # # cublasZdgmm does not exist in cuda 4.2 # /tmp/ipo_iccjq2M5h1.o: In function `cudgmm': # ipo_out1.c:(.text.hot0001d+0x522b): undefined reference to `cublasZdgmm' # strings /usr/local/cuda-4.2/lib64/libcublas.so | grep "cublasZdgmm" cuda='/usr/local/cuda-4.2' # comment out if no cuda libraries =[ 'cublas', 'cufft', 'cuda', # comment out if no cuda 'cudart', # comment out if no cuda #'mkl_def', 'mkl_scalapack_lp64', 'mkl_intel_lp64', 'mkl_sequential', 'mkl_core', 'mkl_blacs_openmpi_lp64', #'hdf5', 'mpi', ] library_dirs =[ intel, os.path.join(mpi, 'lib'), mkl, os.path.join(cuda, 'lib64'), # comment out if no cuda #os.path.join(hdf, 'lib'), ] include_dirs +=[ os.path.join(mpi, 'include'), os.path.join(cuda, 'include'), # comment out if no cuda #os.path.join(hdf, 'include'), ] extra_link_args =[ '-Wl,-rpath=' + intel + ',-rpath=' + os.path.join(mpi, 'lib') + ',-rpath=' + os.path.join(cuda, 'lib64') + # comment out if no cuda ',-rpath=' + mkl #',-rpath=' + os.path.join(hdf, 'lib') ] extra_compile_args =['-xHOST', '-O3', '-ipo', '-std=c99', '-fPIC', '-Wall'] extra_objects += ['./c/cukernels.o'] define_macros += [('GPAW_NO_UNDERSCORE_CBLACS', '1')] define_macros += [('GPAW_NO_UNDERSCORE_CSCALAPACK', '1')] mpicompiler = os.path.join(mpi, 'bin', 'mpicc') mpilinker = mpicompiler gpaw-24.1.0/doc/platforms/Bull/customize_curie_icc.py000066400000000000000000000012141454550013000226020ustar00rootroot00000000000000import os extra_compile_args = ['-std=c99'] compiler = './icc.py' mpicompiler = './icc.py' mpilinker = './icc.py' #libxc library_dirs = ['/ccc/cont005/home/pawp72/enkovaaj/libxc/lib'] include_dirs += ['/ccc/cont005/home/pawp72/enkovaaj/libxc/include'] libraries = ['z', 'xc'] scalapack = True hdf5 = True mkl_flags = os.environ['MKL_SCA_LIBS'] extra_link_args = [mkl_flags] # HDF5 flags include_dirs += [os.environ['HDF5_INCDIR']] libraries += ['hdf5'] library_dirs += [os.environ['HDF5_LIBDIR']] define_macros += [('GPAW_NO_UNDERSCORE_CBLACS', '1')] define_macros += [('GPAW_NO_UNDERSCORE_CSCALAPACK', '1')] define_macros += [("GPAW_ASYNC",1)] gpaw-24.1.0/doc/platforms/Bull/download_curie_gpu.sh000077500000000000000000000016041454550013000224140ustar00rootroot00000000000000export APPS="readlink -f ~/CAMd" export MODULEFILES="${APPS}/modulefiles" # warning - firewall blocks that, so download on other machine and scp! cd ${APPS} # download packages nose_version=1.1.2 wget http://pypi.python.org/packages/source/n/nose/nose-${nose_version}.tar.gz numpy_version=1.5.1 wget http://downloads.sourceforge.net/numpy/numpy-${numpy_version}.tar.gz scipy_version=0.9.0 wget https://downloads.sourceforge.net/project/scipy/scipy/${scipy_version}/scipy-${scipy_version}.tar.gz ase_version=3.7.0.3168 wget https://wiki.fysik.dtu.dk/ase-files/python-ase-${ase_version}.tar.gz gpaw_version=0.9.0.8965 wget https://wiki.fysik.dtu.dk/gpaw-files/gpaw-${gpaw_version}.tar.gz gpaw_setups_version=0.9.9672 wget http://wiki.fysik.dtu.dk/gpaw-files/gpaw-setups-${gpaw_setups_version}.tar.gz # OK, curie does not allow svn! svn co https://svn.fysik.dtu.dk/projects/gpaw/branches/rpa-gpu-expt gpaw-24.1.0/doc/platforms/Bull/icc.py000077500000000000000000000021141454550013000173140ustar00rootroot00000000000000#!/usr/bin/python """icc.py is a wrapper for the Intel compiler, converting/removing incompatible gcc args. """ import sys from subprocess import call args2change = {"-fno-strict-aliasing":"", "-fmessage-length=0":"", "-Wall":"", "-std=c99":"-qlanglvl=extc99", "-fPIC":"", "-g":"", "-D_FORTIFY_SOURCE=2":"", "-DNDEBUG":"", "-UNDEBUG":"", "-pthread":"", "-shared":"-qmkshrobj", "-Xlinker":"", "-export-dynamic":"", "-Wstrict-prototypes":"", "-dynamic":"", "-O3":"", "-O3":"", "-O2":"", "-O1":""} fragile_files = ["test.c"] cmd = "" fragile = False for arg in sys.argv[1:]: cmd += " " t = arg.strip() if t in fragile_files: fragile = True if t in args2change: cmd += args2change[t] else: cmd += arg flags = "-w -O3 -std=c99" cmd = f"mpicc {flags} {cmd}" call(cmd, shell=True) gpaw-24.1.0/doc/platforms/Bull/install_curie_gpu.sh000077500000000000000000000122321454550013000222520ustar00rootroot00000000000000export APPS=`readlink -f ~/CAMd` export CAMD_MODULEFILES="${APPS}/modulefiles" export GPAW_PLATFORM=`python -c "from distutils import util, sysconfig; print util.get_platform()+'-'+sysconfig.get_python_version()"` export PYTHONVERSION=`python -c "from distutils import sysconfig; print sysconfig.get_python_version()"` # build packages nose_version=1.1.2 tar zxf nose-${nose_version}.tar.gz cd nose-${nose_version} python setup.py install --root=${APPS}/nose-${nose_version}-1 cd .. mkdir -p ${CAMD_MODULEFILES}/nose cat < ${CAMD_MODULEFILES}/nose/${nose_version}-1 #%Module1.0 set apps_path ${APPS} prepend-path PATH \$apps_path/nose-${nose_version}-1/usr/bin prepend-path PYTHONPATH \$apps_path/nose-${nose_version}-1/usr/lib/python${PYTHONVERSION}/site-packages/ unset apps_path EOF numpy_version=1.5.1 tar zxf numpy-${numpy_version}.tar.gz cd numpy-${numpy_version} # atlas on curie is built without -fPIC # /usr/bin/ld: /usr/local/atlas-3.9.72/lib/libcblas.a(cblas_dgemm.o): relocation R_X86_64_32 against `.rodata.str1.8' can not be used when making a shared object; recompile with -fPIC # and atlas-devel is not installed! # so hack! ln -s /usr/lib64/atlas/libatlas.so.3.0 libatlas.so ln -s /usr/lib64/atlas/libcblas.so.3.0 libcblas.so ln -s /usr/lib64/atlas/libclapack.so.3 libclapack.so ln -s /usr/lib64/atlas/libf77blas.so.3 libf77blas.so ln -s /usr/lib64/atlas/liblapack.so.3 liblapack.so echo "[DEFAULT]" > site.cfg echo "library_dirs = $PWD" >> site.cfg echo "include_dirs = /usr/local/atlas-3.9.72/include" >> site.cfg # avoid "Both g77 and gfortran runtimes linked in lapack_lite !" setting --fcompiler=gnu95 # note that this forces /usr/bin/gfortran to be used python setup.py build --fcompiler=gnu95 2>&1 | tee build.log python setup.py install --root=${APPS}/numpy-${numpy_version}-1 2>&1 | tee install.log cd .. mkdir -p ${CAMD_MODULEFILES}/numpy cat < ${CAMD_MODULEFILES}/numpy/${numpy_version}-1 #%Module1.0 set apps_path ${APPS} prereq nose prepend-path PATH \$apps_path/numpy-${numpy_version}-1/usr/bin prepend-path PYTHONPATH \$apps_path/numpy-${numpy_version}-1/usr/lib64/python${PYTHONVERSION}/site-packages/ unset apps_path EOF # the atlas is missing on the hybrid (only?) nodes so hack again! mkdir atlas && cd atlas cp -p /usr/lib64/atlas/liblapack.so.3 . cp -p /usr/lib64/atlas/libf77blas.so.3 . cp -p /usr/lib64/atlas/libcblas.so.3 . cp -p /usr/lib64/atlas/libatlas.so.3 . cd .. module use $CAMD_MODULEFILES module load nose module load numpy # scipy build needs numpy! scipy_version=0.9.0 tar zxf scipy-${scipy_version}.tar.gz cd scipy-${scipy_version} # avoid g77 - leads to Segmentation faults # note that this forces /usr/bin/gfortran to be used python setup.py build --fcompiler=gnu95 2>&1 | tee build.log python setup.py install --root=${APPS}/scipy-${scipy_version}-1 2>&1 | tee install.log cd .. mkdir -p ${CAMD_MODULEFILES}/scipy cat < ${CAMD_MODULEFILES}/scipy/${scipy_version}-1 #%Module1.0 set apps_path ${APPS} prereq nose prepend-path PATH \$apps_path/scipy-${scipy_version}-1/usr/bin prepend-path PYTHONPATH \$apps_path/scipy-${scipy_version}-1/usr/lib64/python${PYTHONVERSION}/site-packages/ unset apps_path EOF ase_version=3.7.0.3168 tar zxf python-ase-${ase_version}.tar.gz mkdir -p ${CAMD_MODULEFILES}/python-ase cat < ${CAMD_MODULEFILES}/python-ase/${ase_version}-1 #%Module1.0 set apps_path ${APPS} prereq numpy prepend-path PATH \$apps_path/python-ase-${ase_version}/tools prepend-path PYTHONPATH \$apps_path/python-ase-${ase_version}/ unset apps_path EOF gpaw_setups_version=0.9.9672 tar zxf gpaw-setups-${gpaw_setups_version}.tar.gz mkdir -p ${CAMD_MODULEFILES}/gpaw-setups cat < ${CAMD_MODULEFILES}/gpaw-setups/${gpaw_setups_version}-1 #%Module1.0 set apps_path ${APPS} prepend-path GPAW_SETUP_PATH \$apps_path/gpaw-setups-${gpaw_setups_version} unset apps_path EOF gpaw_version=0.9.0.8965 tar zxf gpaw-${gpaw_version}.tar.gz mkdir -p ${CAMD_MODULEFILES}/gpaw cat < ${CAMD_MODULEFILES}/gpaw/${gpaw_version}-1 #%Module1.0 set apps_path ${APPS} prereq python-ase prereq gpaw-setups prepend-path PATH \$apps_path/gpaw-${gpaw_version}/tools prepend-path PATH \$apps_path/gpaw-${gpaw_version}/build/bin.${GPAW_PLATFORM} prepend-path PYTHONPATH \$apps_path/gpaw-${gpaw_version}/ prepend-path PYTHONPATH \$apps_path/gpaw-${gpaw_version}/build/lib.${GPAW_PLATFORM} setenv OMP_NUM_THREADS 1 unset apps_path EOF module load nose module load numpy module load scipy # test numpy and scipy python -c "import numpy; numpy.test()" python -c "import scipy; scipy.test()" # test ase module load python-ase mkdir -p testase cd testase testase.py --no-display 2>&1 | tee testase.log cd .. # build gpaw cd gpaw-${gpaw_version} module load cuda # if on rpa-gpu-expt branch rm -f c/cukernels.o cd c nvcc -arch sm_20 -c cukernels.cu -Xcompiler -fPIC cd .. # wget https://svn.fysik.dtu.dk/projects/gpaw/trunk/config.py # fixed in trunk python setup.py build_ext --customize=../customize_curie_gpu.py --remove-default-flags 2>&1 | tee build_ext.log cd .. module load gpaw-setups module load gpaw gpaw-24.1.0/doc/platforms/Bull/msub_curie_gpu.sh000077500000000000000000000010171454550013000215510ustar00rootroot00000000000000#/bin/sh #MSUB -n 8 # number of tasks #MSUB -T 4600 # time #MSUB -q hybrid # use hybrid for GPU #MSUB -A paxxxx set -x cd ${BRIDGE_MSUB_PWD} module use ${HOME}/CAMd/modulefiles module load nose # blas/lapack/atlas missing export LD_LIBRARY_PATH=${HOME}/CAMd/atlas:${LD_LIBRARY_PATH} module load numpy module load scipy module load python-ase module load gpaw-setups/0.8.7929-1 #module load gpaw-setups/0.9.9672-1 module load gpaw #ccc_mprun gpaw-python gpaw-0.9.0.8965/gpaw/test/2Al.py ccc_mprun gpaw-python `which gpaw-test` gpaw-24.1.0/doc/platforms/Cray/000077500000000000000000000000001454550013000162035ustar00rootroot00000000000000gpaw-24.1.0/doc/platforms/Cray/Setup_jaguar000066400000000000000000000427721454550013000205730ustar00rootroot00000000000000# -*- makefile -*- # The file Setup is used by the makesetup script to construct the files # Makefile and config.c, from Makefile.pre and config.c.in, # respectively. The file Setup itself is initially copied from # Setup.dist; once it exists it will not be overwritten, so you can edit # Setup to your heart's content. Note that Makefile.pre is created # from Makefile.pre.in by the toplevel configure script. # (VPATH notes: Setup and Makefile.pre are in the build directory, as # are Makefile and config.c; the *.in and *.dist files are in the source # directory.) # Each line in this file describes one or more optional modules. # Modules enabled here will not be compiled by the setup.py script, # so the file can be used to override setup.py's behavior. # Lines have the following structure: # # ... [ ...] [ ...] [ ...] # # is anything ending in .c (.C, .cc, .c++ are C++ files) # is anything starting with -I, -D, -U or -C # is anything ending in .a or beginning with -l or -L # is anything else but should be a valid Python # identifier (letters, digits, underscores, beginning with non-digit) # # (As the makesetup script changes, it may recognize some other # arguments as well, e.g. *.so and *.sl as libraries. See the big # case statement in the makesetup script.) # # Lines can also have the form # # = # # which defines a Make variable definition inserted into Makefile.in # # Finally, if a line contains just the word "*shared*" (without the # quotes but with the stars), then the following modules will not be # built statically. The build process works like this: # # 1. Build all modules that are declared as static in Modules/Setup, # combine them into libpythonxy.a, combine that into python. # 2. Build all modules that are listed as shared in Modules/Setup. # 3. Invoke setup.py. That builds all modules that # a) are not builtin, and # b) are not listed in Modules/Setup, and # c) can be build on the target # # Therefore, modules declared to be shared will not be # included in the config.c file, nor in the list of objects to be # added to the library archive, and their linker options won't be # added to the linker options. Rules to create their .o files and # their shared libraries will still be added to the Makefile, and # their names will be collected in the Make variable SHAREDMODS. This # is used to build modules as shared libraries. (They can be # installed using "make sharedinstall", which is implied by the # toplevel "make install" target.) (For compatibility, # *noconfig* has the same effect as *shared*.) # # In addition, *static* explicitly declares the following modules to # be static. Lines containing "*static*" and "*shared*" may thus # alternate throughout this file. # NOTE: As a standard policy, as many modules as can be supported by a # platform should be present. The distribution comes with all modules # enabled that are supported by most platforms and don't require you # to ftp sources from elsewhere. # Some special rules to define PYTHONPATH. # Edit the definitions below to indicate which options you are using. # Don't add any whitespace or comments! # Directories where library files get installed. # DESTLIB is for Python modules; MACHDESTLIB for shared libraries. DESTLIB=$(LIBDEST) MACHDESTLIB=$(BINLIBDEST) # NOTE: all the paths are now relative to the prefix that is computed # at run time! # Standard path -- don't edit. # No leading colon since this is the first entry. # Empty since this is now just the runtime prefix. DESTPATH= # Site specific path components -- should begin with : if non-empty SITEPATH= # Standard path components for test modules TESTPATH= # Path components for machine- or system-dependent modules and shared libraries MACHDEPPATH=:plat-$(MACHDEP) EXTRAMACHDEPPATH= # Path component for the Tkinter-related modules # The TKPATH variable is always enabled, to save you the effort. TKPATH=:lib-tk COREPYTHONPATH=$(DESTPATH)$(SITEPATH)$(TESTPATH)$(MACHDEPPATH)$(EXTRAMACHDEPPATH)$(TKPATH) PYTHONPATH=$(COREPYTHONPATH) # The modules listed here can't be built as shared libraries for # various reasons; therefore they are listed here instead of in the # normal order. # This only contains the minimal set of modules required to run the # setup.py script in the root of the Python source tree. posix posixmodule.c # posix (UNIX) system calls errno errnomodule.c # posix (UNIX) errno values pwd pwdmodule.c # this is needed to find out the user's home dir # if $HOME is not set _sre _sre.c # Fredrik Lundh's new regular expressions _codecs _codecsmodule.c # access to the builtin codecs and codec registry # The zipimport module is always imported at startup. Having it as a # builtin module avoids some bootstrapping problems and reduces overhead. zipimport zipimport.c # The rest of the modules listed in this file are all commented out by # default. Usually they can be detected and built as dynamically # loaded modules by the new setup.py script added in Python 2.1. If # you're on a platform that doesn't support dynamic loading, want to # compile modules statically into the Python binary, or need to # specify some odd set of compiler switches, you can uncomment the # appropriate lines below. # ====================================================================== # The Python symtable module depends on .h files that setup.py doesn't track _symtable symtablemodule.c # The SGI specific GL module: GLHACK=-Dclear=__GLclear #gl glmodule.c cgensupport.c -I$(srcdir) $(GLHACK) -lgl -lX11 # Pure module. Cannot be linked dynamically. # -DWITH_QUANTIFY, -DWITH_PURIFY, or -DWITH_ALL_PURE #WHICH_PURE_PRODUCTS=-DWITH_ALL_PURE #PURE_INCLS=-I/usr/local/include #PURE_STUBLIBS=-L/usr/local/lib -lpurify_stubs -lquantify_stubs #pure puremodule.c $(WHICH_PURE_PRODUCTS) $(PURE_INCLS) $(PURE_STUBLIBS) # Uncommenting the following line tells makesetup that all following # modules are to be built as shared libraries (see above for more # detail; also note that *static* reverses this effect): #*shared* # GNU readline. Unlike previous Python incarnations, GNU readline is # now incorporated in an optional module, configured in the Setup file # instead of by a configure script switch. You may have to insert a # -L option pointing to the directory where libreadline.* lives, # and you may have to change -ltermcap to -ltermlib or perhaps remove # it, depending on your system -- see the GNU readline instructions. # It's okay for this to be a shared library, too. readline readline.c -L/usr/lib64/termcap -lreadline -ltermcap # Modules that should always be present (non UNIX dependent): array arraymodule.c # array objects cmath cmathmodule.c # -lm # complex math library functions math mathmodule.c # -lm # math library functions, e.g. sin() _struct _struct.c # binary structure packing/unpacking time timemodule.c # -lm # time operations and variables operator operator.c # operator.add() and similar goodies _weakref _weakref.c # basic weak reference support _testcapi _testcapimodule.c # Python C API test module _random _randommodule.c # Random number generator collections collectionsmodule.c # Container types itertools itertoolsmodule.c # Functions creating iterators for efficient looping strop stropmodule.c # String manipulations unicodedata unicodedata.c # static Unicode character database # access to ISO C locale support #_locale _localemodule.c # -lintl # Modules with some UNIX dependencies -- on by default: # (If you have a really backward UNIX, select and socket may not be # supported...) #fcntl fcntlmodule.c # fcntl(2) and ioctl(2) #spwd spwdmodule.c # spwd(3) #grp grpmodule.c # grp(3) #select selectmodule.c # select(2); not on ancient System V # Memory-mapped files (also works on Win32). mmap mmapmodule.c # CSV file helper #_csv _csv.c # Socket module helper for socket(2) #_socket socketmodule.c # Socket module helper for SSL support; you must comment out the other # socket line above, and possibly edit the SSL variable: #SSL=/usr/local/ssl #_ssl _ssl.c \ # -DUSE_SSL -I$(SSL)/include -I$(SSL)/include/openssl \ # -L$(SSL)/lib -lssl -lcrypto # The crypt module is now disabled by default because it breaks builds # on many systems (where -lcrypt is needed), e.g. Linux (I believe). # # First, look at Setup.config; configure may have set this for you. #crypt cryptmodule.c # -lcrypt # crypt(3); needs -lcrypt on some systems # Some more UNIX dependent modules -- off by default, since these # are not supported by all UNIX systems: #nis nismodule.c -lnsl # Sun yellow pages -- not everywhere #termios termios.c # Steen Lumholt's termios module resource resource.c # Jeremy Hylton's rlimit interface # Multimedia modules -- off by default. # These don't work for 64-bit platforms!!! # #993173 says audioop works on 64-bit platforms, though. # These represent audio samples or images as strings: #audioop audioop.c # Operations on audio samples #imageop imageop.c # Operations on images #rgbimg rgbimgmodule.c # Read SGI RGB image files (but coded portably) # Note that the _md5 and _sha modules are normally only built if the # system does not have the OpenSSL libs containing an optimized version. # The _md5 module implements the RSA Data Security, Inc. MD5 # Message-Digest Algorithm, described in RFC 1321. The necessary files # md5.c and md5.h are included here. _md5 md5module.c md5.c # The _sha module implements the SHA checksum algorithm. # (NIST's Secure Hash Algorithm.) _sha shamodule.c _sha256 sha256module.c _sha512 sha512module.c # SGI IRIX specific modules -- off by default. # These module work on any SGI machine: # *** gl must be enabled higher up in this file *** #fm fmmodule.c $(GLHACK) -lfm -lgl # Font Manager #sgi sgimodule.c # sgi.nap() and a few more # This module requires the header file # /usr/people/4Dgifts/iristools/include/izoom.h: #imgfile imgfile.c -limage -lgutil -lgl -lm # Image Processing Utilities # These modules require the Multimedia Development Option (I think): #al almodule.c -laudio # Audio Library #cd cdmodule.c -lcdaudio -lds -lmediad # CD Audio Library #cl clmodule.c -lcl -lawareaudio # Compression Library #sv svmodule.c yuvconvert.c -lsvideo -lXext -lX11 # Starter Video # The FORMS library, by Mark Overmars, implements user interface # components such as dialogs and buttons using SGI's GL and FM # libraries. You must ftp the FORMS library separately from # ftp://ftp.cs.ruu.nl/pub/SGI/FORMS. It was tested with FORMS 2.2a. # NOTE: if you want to be able to use FORMS and curses simultaneously # (or both link them statically into the same binary), you must # compile all of FORMS with the cc option "-Dclear=__GLclear". # The FORMS variable must point to the FORMS subdirectory of the forms # toplevel directory: #FORMS=/ufs/guido/src/forms/FORMS #fl flmodule.c -I$(FORMS) $(GLHACK) $(FORMS)/libforms.a -lfm -lgl # SunOS specific modules -- off by default: #sunaudiodev sunaudiodev.c # A Linux specific module -- off by default; this may also work on # some *BSDs. #linuxaudiodev linuxaudiodev.c # George Neville-Neil's timing module: #timing timingmodule.c # The _tkinter module. # # The command for _tkinter is long and site specific. Please # uncomment and/or edit those parts as indicated. If you don't have a # specific extension (e.g. Tix or BLT), leave the corresponding line # commented out. (Leave the trailing backslashes in! If you # experience strange errors, you may want to join all uncommented # lines and remove the backslashes -- the backslash interpretation is # done by the shell's "read" command and it may not be implemented on # every system. # *** Always uncomment this (leave the leading underscore in!): # _tkinter _tkinter.c tkappinit.c -DWITH_APPINIT \ # *** Uncomment and edit to reflect where your Tcl/Tk libraries are: # -L/usr/local/lib \ # *** Uncomment and edit to reflect where your Tcl/Tk headers are: # -I/usr/local/include \ # *** Uncomment and edit to reflect where your X11 header files are: # -I/usr/X11R6/include \ # *** Or uncomment this for Solaris: # -I/usr/openwin/include \ # *** Uncomment and edit for Tix extension only: # -DWITH_TIX -ltix8.1.8.2 \ # *** Uncomment and edit for BLT extension only: # -DWITH_BLT -I/usr/local/blt/blt8.0-unoff/include -lBLT8.0 \ # *** Uncomment and edit for PIL (TkImaging) extension only: # (See http://www.pythonware.com/products/pil/ for more info) # -DWITH_PIL -I../Extensions/Imaging/libImaging tkImaging.c \ # *** Uncomment and edit for TOGL extension only: # -DWITH_TOGL togl.c \ # *** Uncomment and edit to reflect your Tcl/Tk versions: # -ltk8.2 -ltcl8.2 \ # *** Uncomment and edit to reflect where your X11 libraries are: # -L/usr/X11R6/lib \ # *** Or uncomment this for Solaris: # -L/usr/openwin/lib \ # *** Uncomment these for TOGL extension only: # -lGL -lGLU -lXext -lXmu \ # *** Uncomment for AIX: # -lld \ # *** Always uncomment this; X11 libraries to link with: # -lX11 # Lance Ellinghaus's syslog module #syslog syslogmodule.c # syslog daemon interface # Curses support, requring the System V version of curses, often # provided by the ncurses library. e.g. on Linux, link with -lncurses # instead of -lcurses). # # First, look at Setup.config; configure may have set this for you. #_curses _cursesmodule.c -lcurses -ltermcap # Wrapper for the panel library that's part of ncurses and SYSV curses. #_curses_panel _curses_panel.c -lpanel -lncurses # Generic (SunOS / SVR4) dynamic loading module. # This is not needed for dynamic loading of Python modules -- # it is a highly experimental and dangerous device for calling # *arbitrary* C functions in *arbitrary* shared libraries: #dl dlmodule.c # Modules that provide persistent dictionary-like semantics. You will # probably want to arrange for at least one of them to be available on # your machine, though none are defined by default because of library # dependencies. The Python module anydbm.py provides an # implementation independent wrapper for these; dumbdbm.py provides # similar functionality (but slower of course) implemented in Python. # The standard Unix dbm module has been moved to Setup.config so that # it will be compiled as a shared library by default. Compiling it as # a built-in module causes conflicts with the pybsddb3 module since it # creates a static dependency on an out-of-date version of db.so. # # First, look at Setup.config; configure may have set this for you. #dbm dbmmodule.c # dbm(3) may require -lndbm or similar # Anthony Baxter's gdbm module. GNU dbm(3) will require -lgdbm: # # First, look at Setup.config; configure may have set this for you. #gdbm gdbmmodule.c -I/usr/local/include -L/usr/local/lib -lgdbm # Sleepycat Berkeley DB interface. # # This requires the Sleepycat DB code, see http://www.sleepycat.com/ # The earliest supported version of that library is 3.0, the latest # supported version is 4.0 (4.1 is specifically not supported, as that # changes the semantics of transactional databases). A list of available # releases can be found at # # http://www.sleepycat.com/update/index.html # # Edit the variables DB and DBLIBVERto point to the db top directory # and the subdirectory of PORT where you built it. #DB=/usr/local/BerkeleyDB.4.0 #DBLIBVER=4.0 #DBINC=$(DB)/include #DBLIB=$(DB)/lib #_bsddb _bsddb.c -I$(DBINC) -L$(DBLIB) -ldb-$(DBLIBVER) # Historical Berkeley DB 1.85 # # This module is deprecated; the 1.85 version of the Berkeley DB library has # bugs that can cause data corruption. If you can, use later versions of the # library instead, available from . #DB=/depot/sundry/src/berkeley-db/db.1.85 #DBPORT=$(DB)/PORT/irix.5.3 #bsddb185 bsddbmodule.c -I$(DBPORT)/include -I$(DBPORT) $(DBPORT)/libdb.a # Helper module for various ascii-encoders binascii binascii.c # Fred Drake's interface to the Python parser #parser parsermodule.c # cStringIO and cPickle cStringIO cStringIO.c cPickle cPickle.c # Lee Busby's SIGFPE modules. # The library to link fpectl with is platform specific. # Choose *one* of the options below for fpectl: # For SGI IRIX (tested on 5.3): #fpectl fpectlmodule.c -lfpe # For Solaris with SunPro compiler (tested on Solaris 2.5 with SunPro C 4.2): # (Without the compiler you don't have -lsunmath.) #fpectl fpectlmodule.c -R/opt/SUNWspro/lib -lsunmath -lm # For other systems: see instructions in fpectlmodule.c. #fpectl fpectlmodule.c ... # Test module for fpectl. No extra libraries needed. #fpetest fpetestmodule.c # Andrew Kuchling's zlib module. # This require zlib 1.1.3 (or later). # See http://www.gzip.org/zlib/ zlib zlibmodule.c -I$(ZLIB_DIR)/include -L$(ZLIB_DIR)/lib -lz # Interface to the Expat XML parser # # Expat was written by James Clark and is now maintained by a group of # developers on SourceForge; see www.libexpat.org for more # information. The pyexpat module was written by Paul Prescod after a # prototype by Jack Jansen. Source of Expat 1.95.2 is included in # Modules/expat/. Usage of a system shared libexpat.so/expat.dll is # not advised. # # More information on Expat can be found at www.libexpat.org. # #EXPAT_DIR=/usr/local/src/expat-1.95.2 pyexpat pyexpat.c -DHAVE_EXPAT_H -I$(EXPAT_DIR)/lib -L$(EXPAT_DIR) -lexpat # Hye-Shik Chang's CJKCodecs # multibytecodec is required for all the other CJK codec modules #_multibytecodec cjkcodecs/multibytecodec.c #_codecs_cn cjkcodecs/_codecs_cn.c #_codecs_hk cjkcodecs/_codecs_hk.c #_codecs_iso2022 cjkcodecs/_codecs_iso2022.c #_codecs_jp cjkcodecs/_codecs_jp.c #_codecs_kr cjkcodecs/_codecs_kr.c #_codecs_tw cjkcodecs/_codecs_tw.c # Example -- included for reference only: # xx xxmodule.c # Another example -- the 'xxsubtype' module shows C-level subtyping in action xxsubtype xxsubtype.c #Numpy: gpaw-24.1.0/doc/platforms/Cray/Setup_louhi000066400000000000000000000442431454550013000204350ustar00rootroot00000000000000# -*- makefile -*- # The file Setup is used by the makesetup script to construct the files # Makefile and config.c, from Makefile.pre and config.c.in, # respectively. The file Setup itself is initially copied from # Setup.dist; once it exists it will not be overwritten, so you can edit # Setup to your heart's content. Note that Makefile.pre is created # from Makefile.pre.in by the toplevel configure script. # (VPATH notes: Setup and Makefile.pre are in the build directory, as # are Makefile and config.c; the *.in and *.dist files are in the source # directory.) # Each line in this file describes one or more optional modules. # Modules enabled here will not be compiled by the setup.py script, # so the file can be used to override setup.py's behavior. # Lines have the following structure: # # ... [ ...] [ ...] [ ...] # # is anything ending in .c (.C, .cc, .c++ are C++ files) # is anything starting with -I, -D, -U or -C # is anything ending in .a or beginning with -l or -L # is anything else but should be a valid Python # identifier (letters, digits, underscores, beginning with non-digit) # # (As the makesetup script changes, it may recognize some other # arguments as well, e.g. *.so and *.sl as libraries. See the big # case statement in the makesetup script.) # # Lines can also have the form # # = # # which defines a Make variable definition inserted into Makefile.in # # Finally, if a line contains just the word "*shared*" (without the # quotes but with the stars), then the following modules will not be # built statically. The build process works like this: # # 1. Build all modules that are declared as static in Modules/Setup, # combine them into libpythonxy.a, combine that into python. # 2. Build all modules that are listed as shared in Modules/Setup. # 3. Invoke setup.py. That builds all modules that # a) are not builtin, and # b) are not listed in Modules/Setup, and # c) can be build on the target # # Therefore, modules declared to be shared will not be # included in the config.c file, nor in the list of objects to be # added to the library archive, and their linker options won't be # added to the linker options. Rules to create their .o files and # their shared libraries will still be added to the Makefile, and # their names will be collected in the Make variable SHAREDMODS. This # is used to build modules as shared libraries. (They can be # installed using "make sharedinstall", which is implied by the # toplevel "make install" target.) (For compatibility, # *noconfig* has the same effect as *shared*.) # # In addition, *static* explicitly declares the following modules to # be static. Lines containing "*static*" and "*shared*" may thus # alternate throughout this file. # NOTE: As a standard policy, as many modules as can be supported by a # platform should be present. The distribution comes with all modules # enabled that are supported by most platforms and don't require you # to ftp sources from elsewhere. # Some special rules to define PYTHONPATH. # Edit the definitions below to indicate which options you are using. # Don't add any whitespace or comments! # Directories where library files get installed. # DESTLIB is for Python modules; MACHDESTLIB for shared libraries. DESTLIB=$(LIBDEST) MACHDESTLIB=$(BINLIBDEST) # NOTE: all the paths are now relative to the prefix that is computed # at run time! # Standard path -- don't edit. # No leading colon since this is the first entry. # Empty since this is now just the runtime prefix. DESTPATH= # Site specific path components -- should begin with : if non-empty SITEPATH= # Standard path components for test modules TESTPATH= # Path components for machine- or system-dependent modules and shared libraries MACHDEPPATH=:plat-$(MACHDEP) EXTRAMACHDEPPATH= # Path component for the Tkinter-related modules # The TKPATH variable is always enabled, to save you the effort. TKPATH=:lib-tk COREPYTHONPATH=$(DESTPATH)$(SITEPATH)$(TESTPATH)$(MACHDEPPATH)$(EXTRAMACHDEPPATH)$(TKPATH) PYTHONPATH=$(COREPYTHONPATH) # The modules listed here can't be built as shared libraries for # various reasons; therefore they are listed here instead of in the # normal order. # This only contains the minimal set of modules required to run the # setup.py script in the root of the Python source tree. posix posixmodule.c # posix (UNIX) system calls errno errnomodule.c # posix (UNIX) errno values pwd pwdmodule.c # this is needed to find out the user's home dir # if $HOME is not set _sre _sre.c # Fredrik Lundh's new regular expressions _codecs _codecsmodule.c # access to the builtin codecs and codec registry # The zipimport module is always imported at startup. Having it as a # builtin module avoids some bootstrapping problems and reduces overhead. zipimport zipimport.c # The rest of the modules listed in this file are all commented out by # default. Usually they can be detected and built as dynamically # loaded modules by the new setup.py script added in Python 2.1. If # you're on a platform that doesn't support dynamic loading, want to # compile modules statically into the Python binary, or need to # specify some odd set of compiler switches, you can uncomment the # appropriate lines below. # ====================================================================== # The Python symtable module depends on .h files that setup.py doesn't track _symtable symtablemodule.c # The SGI specific GL module: GLHACK=-Dclear=__GLclear #gl glmodule.c cgensupport.c -I$(srcdir) $(GLHACK) -lgl -lX11 # Pure module. Cannot be linked dynamically. # -DWITH_QUANTIFY, -DWITH_PURIFY, or -DWITH_ALL_PURE #WHICH_PURE_PRODUCTS=-DWITH_ALL_PURE #PURE_INCLS=-I/usr/local/include #PURE_STUBLIBS=-L/usr/local/lib -lpurify_stubs -lquantify_stubs #pure puremodule.c $(WHICH_PURE_PRODUCTS) $(PURE_INCLS) $(PURE_STUBLIBS) # Uncommenting the following line tells makesetup that all following # modules are to be built as shared libraries (see above for more # detail; also note that *static* reverses this effect): #*shared* # GNU readline. Unlike previous Python incarnations, GNU readline is # now incorporated in an optional module, configured in the Setup file # instead of by a configure script switch. You may have to insert a # -L option pointing to the directory where libreadline.* lives, # and you may have to change -ltermcap to -ltermlib or perhaps remove # it, depending on your system -- see the GNU readline instructions. # It's okay for this to be a shared library, too. # readline readline.c -lreadline -L/usr/lib64/termcap -ltermcap # Modules that should always be present (non UNIX dependent): array arraymodule.c # array objects cmath cmathmodule.c # -lm # complex math library functions math mathmodule.c # -lm # math library functions, e.g. sin() _struct _struct.c # binary structure packing/unpacking time timemodule.c # -lm # time operations and variables operator operator.c # operator.add() and similar goodies _weakref _weakref.c # basic weak reference support _testcapi _testcapimodule.c # Python C API test module _random _randommodule.c # Random number generator collections collectionsmodule.c # Container types itertools itertoolsmodule.c # Functions creating iterators for efficient looping strop stropmodule.c # String manipulations unicodedata unicodedata.c # static Unicode character database # access to ISO C locale support #_locale _localemodule.c # -lintl # Modules with some UNIX dependencies -- on by default: # (If you have a really backward UNIX, select and socket may not be # supported...) #fcntl fcntlmodule.c # fcntl(2) and ioctl(2) #spwd spwdmodule.c # spwd(3) #grp grpmodule.c # grp(3) #select selectmodule.c # select(2); not on ancient System V # Memory-mapped files (also works on Win32). mmap mmapmodule.c # CSV file helper #_csv _csv.c # Socket module helper for socket(2) #_socket socketmodule.c # Socket module helper for SSL support; you must comment out the other # socket line above, and possibly edit the SSL variable: #SSL=/usr/local/ssl #_ssl _ssl.c \ # -DUSE_SSL -I$(SSL)/include -I$(SSL)/include/openssl \ # -L$(SSL)/lib -lssl -lcrypto # The crypt module is now disabled by default because it breaks builds # on many systems (where -lcrypt is needed), e.g. Linux (I believe). # # First, look at Setup.config; configure may have set this for you. #crypt cryptmodule.c # -lcrypt # crypt(3); needs -lcrypt on some systems # Some more UNIX dependent modules -- off by default, since these # are not supported by all UNIX systems: #nis nismodule.c -lnsl # Sun yellow pages -- not everywhere #termios termios.c # Steen Lumholt's termios module resource resource.c # Jeremy Hylton's rlimit interface # Multimedia modules -- off by default. # These don't work for 64-bit platforms!!! # #993173 says audioop works on 64-bit platforms, though. # These represent audio samples or images as strings: #audioop audioop.c # Operations on audio samples #imageop imageop.c # Operations on images #rgbimg rgbimgmodule.c # Read SGI RGB image files (but coded portably) # Note that the _md5 and _sha modules are normally only built if the # system does not have the OpenSSL libs containing an optimized version. # The _md5 module implements the RSA Data Security, Inc. MD5 # Message-Digest Algorithm, described in RFC 1321. The necessary files # md5.c and md5.h are included here. _md5 md5module.c md5.c # The _sha module implements the SHA checksum algorithm. # (NIST's Secure Hash Algorithm.) _sha shamodule.c _sha256 sha256module.c _sha512 sha512module.c # SGI IRIX specific modules -- off by default. # These module work on any SGI machine: # *** gl must be enabled higher up in this file *** #fm fmmodule.c $(GLHACK) -lfm -lgl # Font Manager #sgi sgimodule.c # sgi.nap() and a few more # This module requires the header file # /usr/people/4Dgifts/iristools/include/izoom.h: #imgfile imgfile.c -limage -lgutil -lgl -lm # Image Processing Utilities # These modules require the Multimedia Development Option (I think): #al almodule.c -laudio # Audio Library #cd cdmodule.c -lcdaudio -lds -lmediad # CD Audio Library #cl clmodule.c -lcl -lawareaudio # Compression Library #sv svmodule.c yuvconvert.c -lsvideo -lXext -lX11 # Starter Video # The FORMS library, by Mark Overmars, implements user interface # components such as dialogs and buttons using SGI's GL and FM # libraries. You must ftp the FORMS library separately from # ftp://ftp.cs.ruu.nl/pub/SGI/FORMS. It was tested with FORMS 2.2a. # NOTE: if you want to be able to use FORMS and curses simultaneously # (or both link them statically into the same binary), you must # compile all of FORMS with the cc option "-Dclear=__GLclear". # The FORMS variable must point to the FORMS subdirectory of the forms # toplevel directory: #FORMS=/ufs/guido/src/forms/FORMS #fl flmodule.c -I$(FORMS) $(GLHACK) $(FORMS)/libforms.a -lfm -lgl # SunOS specific modules -- off by default: #sunaudiodev sunaudiodev.c # A Linux specific module -- off by default; this may also work on # some *BSDs. #linuxaudiodev linuxaudiodev.c # George Neville-Neil's timing module: #timing timingmodule.c # The _tkinter module. # # The command for _tkinter is long and site specific. Please # uncomment and/or edit those parts as indicated. If you don't have a # specific extension (e.g. Tix or BLT), leave the corresponding line # commented out. (Leave the trailing backslashes in! If you # experience strange errors, you may want to join all uncommented # lines and remove the backslashes -- the backslash interpretation is # done by the shell's "read" command and it may not be implemented on # every system. # *** Always uncomment this (leave the leading underscore in!): # _tkinter _tkinter.c tkappinit.c -DWITH_APPINIT \ # *** Uncomment and edit to reflect where your Tcl/Tk libraries are: # -L/usr/local/lib \ # *** Uncomment and edit to reflect where your Tcl/Tk headers are: # -I/usr/local/include \ # *** Uncomment and edit to reflect where your X11 header files are: # -I/usr/X11R6/include \ # *** Or uncomment this for Solaris: # -I/usr/openwin/include \ # *** Uncomment and edit for Tix extension only: # -DWITH_TIX -ltix8.1.8.2 \ # *** Uncomment and edit for BLT extension only: # -DWITH_BLT -I/usr/local/blt/blt8.0-unoff/include -lBLT8.0 \ # *** Uncomment and edit for PIL (TkImaging) extension only: # (See http://www.pythonware.com/products/pil/ for more info) # -DWITH_PIL -I../Extensions/Imaging/libImaging tkImaging.c \ # *** Uncomment and edit for TOGL extension only: # -DWITH_TOGL togl.c \ # *** Uncomment and edit to reflect your Tcl/Tk versions: # -ltk8.2 -ltcl8.2 \ # *** Uncomment and edit to reflect where your X11 libraries are: # -L/usr/X11R6/lib \ # *** Or uncomment this for Solaris: # -L/usr/openwin/lib \ # *** Uncomment these for TOGL extension only: # -lGL -lGLU -lXext -lXmu \ # *** Uncomment for AIX: # -lld \ # *** Always uncomment this; X11 libraries to link with: # -lX11 # Lance Ellinghaus's syslog module #syslog syslogmodule.c # syslog daemon interface # Curses support, requring the System V version of curses, often # provided by the ncurses library. e.g. on Linux, link with -lncurses # instead of -lcurses). # # First, look at Setup.config; configure may have set this for you. #_curses _cursesmodule.c -lcurses -ltermcap # Wrapper for the panel library that's part of ncurses and SYSV curses. #_curses_panel _curses_panel.c -lpanel -lncurses # Generic (SunOS / SVR4) dynamic loading module. # This is not needed for dynamic loading of Python modules -- # it is a highly experimental and dangerous device for calling # *arbitrary* C functions in *arbitrary* shared libraries: #dl dlmodule.c # Modules that provide persistent dictionary-like semantics. You will # probably want to arrange for at least one of them to be available on # your machine, though none are defined by default because of library # dependencies. The Python module anydbm.py provides an # implementation independent wrapper for these; dumbdbm.py provides # similar functionality (but slower of course) implemented in Python. # The standard Unix dbm module has been moved to Setup.config so that # it will be compiled as a shared library by default. Compiling it as # a built-in module causes conflicts with the pybsddb3 module since it # creates a static dependency on an out-of-date version of db.so. # # First, look at Setup.config; configure may have set this for you. #dbm dbmmodule.c # dbm(3) may require -lndbm or similar # Anthony Baxter's gdbm module. GNU dbm(3) will require -lgdbm: # # First, look at Setup.config; configure may have set this for you. #gdbm gdbmmodule.c -I/usr/local/include -L/usr/local/lib -lgdbm # Sleepycat Berkeley DB interface. # # This requires the Sleepycat DB code, see http://www.sleepycat.com/ # The earliest supported version of that library is 3.0, the latest # supported version is 4.0 (4.1 is specifically not supported, as that # changes the semantics of transactional databases). A list of available # releases can be found at # # http://www.sleepycat.com/update/index.html # # Edit the variables DB and DBLIBVERto point to the db top directory # and the subdirectory of PORT where you built it. #DB=/usr/local/BerkeleyDB.4.0 #DBLIBVER=4.0 #DBINC=$(DB)/include #DBLIB=$(DB)/lib #_bsddb _bsddb.c -I$(DBINC) -L$(DBLIB) -ldb-$(DBLIBVER) # Historical Berkeley DB 1.85 # # This module is deprecated; the 1.85 version of the Berkeley DB library has # bugs that can cause data corruption. If you can, use later versions of the # library instead, available from . #DB=/depot/sundry/src/berkeley-db/db.1.85 #DBPORT=$(DB)/PORT/irix.5.3 #bsddb185 bsddbmodule.c -I$(DBPORT)/include -I$(DBPORT) $(DBPORT)/libdb.a # Helper module for various ascii-encoders binascii binascii.c # Fred Drake's interface to the Python parser #parser parsermodule.c # cStringIO and cPickle cStringIO cStringIO.c cPickle cPickle.c # Lee Busby's SIGFPE modules. # The library to link fpectl with is platform specific. # Choose *one* of the options below for fpectl: # For SGI IRIX (tested on 5.3): #fpectl fpectlmodule.c -lfpe # For Solaris with SunPro compiler (tested on Solaris 2.5 with SunPro C 4.2): # (Without the compiler you don't have -lsunmath.) #fpectl fpectlmodule.c -R/opt/SUNWspro/lib -lsunmath -lm # For other systems: see instructions in fpectlmodule.c. #fpectl fpectlmodule.c ... # Test module for fpectl. No extra libraries needed. #fpetest fpetestmodule.c # Andrew Kuchling's zlib module. # This require zlib 1.1.3 (or later). # See http://www.gzip.org/zlib/ ZLIB=$/HOME}/mika_proj/zlib-1.2.3 zlib zlibmodule.c -I$(ZLIB) -L$(ZLIB) -lz # Interface to the Expat XML parser # # Expat was written by James Clark and is now maintained by a group of # developers on SourceForge; see www.libexpat.org for more # information. The pyexpat module was written by Paul Prescod after a # prototype by Jack Jansen. Source of Expat 1.95.2 is included in # Modules/expat/. Usage of a system shared libexpat.so/expat.dll is # not advised. # # More information on Expat can be found at www.libexpat.org. # EXPAT_DIR=${HOME}/mika_proj/lib/expat pyexpat pyexpat.c -DHAVE_EXPAT_H -I$(EXPAT_DIR)/include -L$(EXPAT_DIR)/lib -lexpat # Hye-Shik Chang's CJKCodecs # multibytecodec is required for all the other CJK codec modules #_multibytecodec cjkcodecs/multibytecodec.c #_codecs_cn cjkcodecs/_codecs_cn.c #_codecs_hk cjkcodecs/_codecs_hk.c #_codecs_iso2022 cjkcodecs/_codecs_iso2022.c #_codecs_jp cjkcodecs/_codecs_jp.c #_codecs_kr cjkcodecs/_codecs_kr.c #_codecs_tw cjkcodecs/_codecs_tw.c # Example -- included for reference only: # xx xxmodule.c # Another example -- the 'xxsubtype' module shows C-level subtyping in action xxsubtype xxsubtype.c #Numpy: NUMPY=${HOME}/mika_proj/numpy-1.0.4/ multiarray $(NUMPY)/build/lib.linux-x86_64-2.5/numpy/core/multiarray.a umath $(NUMPY)/build/lib.linux-x86_64-2.5/numpy/core/umath.a _sort $(NUMPY)/build/lib.linux-x86_64-2.5/numpy/core/_sort.a scalarmath $(NUMPY)/build/lib.linux-x86_64-2.5/numpy/core/scalarmath.a _compiled_base $(NUMPY)/build/lib.linux-x86_64-2.5/numpy/lib/_compiled_base.a _capi $(NUMPY)/build/lib.linux-x86_64-2.5/numpy/numarray/_capi.a fftpack_lite $(NUMPY)/build/lib.linux-x86_64-2.5/numpy/fft/fftpack_lite.a lapack_lite $(NUMPY)/build/lib.linux-x86_64-2.5/numpy/linalg/lapack_lite.a mtrand $(NUMPY)/build/lib.linux-x86_64-2.5/numpy/random/mtrand.a gpaw-24.1.0/doc/platforms/Cray/build_gcc.sh000066400000000000000000000011021454550013000204440ustar00rootroot00000000000000#!/bin/bash -x export CC=cc export CXX=g++ export MPICC=cc export LINKFORSHARED='-Wl,-export-dynamic -dynamic' export MPI_LINKFORSHARED='-Wl,-export-dynamic -dynamic' install_dir='/some_path/scalable-python-gcc' ./configure --prefix=$install_dir --enable-mpi --disable-ipv6 2>&1 | tee loki-conf module swap craype-interlagos craype-istanbul module list make 2>&1 | tee log-make make install 2>&1 | tee log-inst make clean module swap craype-istanbul craype-interlagos make mpi 2>&1 | tee log-make-mpi cp ${install_dir}/bin/python . make install-mpi 2>&1 | tee log-inst-mpi gpaw-24.1.0/doc/platforms/Cray/build_scalable_python.sh000066400000000000000000000011321454550013000230620ustar00rootroot00000000000000#!/bin/bash export CC=cc export CXX=g++ export MPICC=cc # export XTPE_LINK_TYPE=dynamic export LINKFORSHARED='-Wl,-export-dynamic -dynamic' export MPI_LINKFORSHARED='-Wl,-export-dynamic -dynamic' install_prefix=/appl/opt/python/scalable-gnu # Make zlib built-in to the interpreter sed -i -e 's/^#zlib.*/zlib zlibmodule.c -I\/usr\/include -L\/usr\/lib64 -lz/' Modules/Setup.dist ./configure --prefix=$install_prefix --enable-mpi --disable-ipv6 2>&1 | tee log-conf make 2>&1 | tee log-make make install 2>&1 | tee log-inst make mpi 2>&1 | tee log-make-mpi make install-mpi 2>&1 | tee log-inst-mpi gpaw-24.1.0/doc/platforms/Cray/config.py000066400000000000000000000006571454550013000200320ustar00rootroot00000000000000config = { 'scheduler': 'slurm', 'extra_args': ['--account=project_4xxxxxxxx', '--mem=0'], 'mpiexec': 'srun', 'parallel_python': 'gpaw python', 'nodes': [ ('standard', {'cores': 128}), ('small', {'cores': 128}), ('debug', {'cores': 128}), ('largemem', {'cores': 128}), ('standard-g', {'cores': 63}), ('small-g', {'cores': 63}), ('dev-g', {'cores': 63})]} gpaw-24.1.0/doc/platforms/Cray/customize_hermit.py000066400000000000000000000010211454550013000221410ustar00rootroot00000000000000extra_compile_args = ['-std=c99', '-O3'] #, '-O0'] compiler = 'cc' mpicompiler = 'cc' mpilinker= 'cc' libraries = ['acml'] extra_link_args += ['-dynamic'] include_dirs += ['/usr/lib64/python2.6/site-packages/numpy/core/include'] # include_dirs += ['/path_in_workspace/lib/python/numpy/core/include/'] scalapack = True hdf5 = True define_macros += [('GPAW_NO_UNDERSCORE_CBLACS', '1')] define_macros += [('GPAW_NO_UNDERSCORE_CSCALAPACK', '1')] define_macros += [("GPAW_ASYNC",1)] define_macros += [("GPAW_PERFORMANCE_REPORT",1)] gpaw-24.1.0/doc/platforms/Cray/customize_jaguar.py000066400000000000000000000017421454550013000221340ustar00rootroot00000000000000scalapack = True compiler = 'cc' mpicompiler = compiler mpilinker = mpicompiler extra_compile_args += [ '-O3', '-funroll-all-loops', '-fPIC', ] libraries= [] dir_base = '/autofs/na1_home/farberow/sw/xt5/' acml_base = '/opt/acml/4.1.0/gfortran64/lib/' numpy_base = dir_base+'numpy-1.2.1/build/lib.linux-x86_64-2.5/numpy/' extra_link_args = [ '-L/usr/lib64 -lreadline -lncurses', numpy_base+'core/multiarray.a', numpy_base+'core/_sort.a', numpy_base+'core/scalarmath.a', numpy_base+'core/umath.a', numpy_base+'lib/_compiled_base.a', numpy_base+'numarray/_capi.a', numpy_base+'fft/fftpack_lite.a', numpy_base+'linalg/lapack_lite.a', numpy_base+'random/mtrand.a', '-L'+dir_base+'zlib-1.2.3-1/lib -lz', '-L'+dir_base+'expat-2.0.1-1/lib -lexpat', '-L'+acml_base+' -lacml', ] define_macros += [('GPAW_NO_UNDERSCORE_CBLACS', '1')] define_macros += [('GPAW_NO_UNDERSCORE_CSCALAPACK', '1')] define_macros += [('NO_SOCKET', '1')] gpaw-24.1.0/doc/platforms/Cray/customize_louhi.py000066400000000000000000000005121454550013000217750ustar00rootroot00000000000000#User provided customizations for the gpaw setup compiler = 'cc' mpicompiler = 'cc' mpilinker= 'cc' extra_compile_args = ['-std=c99'] libraries = [] scalapack = True hdf5 = True define_macros += [('GPAW_NO_UNDERSCORE_CBLACS', '1')] define_macros += [('GPAW_NO_UNDERSCORE_CSCALAPACK', '1')] define_macros += [("GPAW_ASYNC",1)] gpaw-24.1.0/doc/platforms/Cray/customize_nersc_cori.py000066400000000000000000000011741454550013000230100ustar00rootroot00000000000000parallel_python_interpreter = True compiler = 'cc' mpicompiler = 'cc' mpilinker = 'cc' scalapack = True libxc = '/usr/common/software/libxc/4.2.3/gnu/haswell' include_dirs += [libxc + '/include'] library_dirs += [libxc + '/lib'] extra_link_args += [f'-Wl,-rpath={libxc}/lib'] extra_compile_args += ['-O2'] if 'xc' not in libraries: libraries.append('xc') # these are in the cray wrapper if 'blas' in libraries: libraries.remove('blas') if 'lapack' in libraries: libraries.remove('lapack') if scalapack: define_macros += [('GPAW_NO_UNDERSCORE_CBLACS', '1')] define_macros += [('GPAW_NO_UNDERSCORE_CSCALAPACK', '1')] gpaw-24.1.0/doc/platforms/Cray/customize_sisu.py000066400000000000000000000007251454550013000216460ustar00rootroot00000000000000extra_compile_args = ['-std=c99', '-O3'] compiler = 'cc' mpicompiler = 'cc' mpilinker= 'cc' # edit library and include paths for libxc include_dirs += ['/homeappl/home/jenkovaa/libxc/sisu/gcc/include'] library_dirs = ['/homeappl/home/jenkovaa/libxc/sisu/gcc/lib'] libraries = ['z', 'xc'] scalapack = True hdf5 = True define_macros += [('GPAW_NO_UNDERSCORE_CBLACS', '1')] define_macros += [('GPAW_NO_UNDERSCORE_CSCALAPACK', '1')] define_macros += [("GPAW_ASYNC",1)] gpaw-24.1.0/doc/platforms/Cray/dynload_redstorm.c000066400000000000000000000010331454550013000217150ustar00rootroot00000000000000 /* This module provides the simulation of dynamic loading in Red Storm */ #include "Python.h" #include "importdl.h" const struct filedescr _PyImport_DynLoadFiletab[] = { {".a", "rb", C_EXTENSION}, {0, 0} }; extern struct _inittab _PyImport_Inittab[]; dl_funcptr _PyImport_GetDynLoadFunc(const char *fqname, const char *shortname, const char *pathname, FILE *fp) { struct _inittab *tab = _PyImport_Inittab; while (tab->name && strcmp(shortname, tab->name)) tab++; return tab->initfunc; } gpaw-24.1.0/doc/platforms/Cray/hermit.rst000066400000000000000000000072611454550013000202330ustar00rootroot00000000000000.. _hermit: ============================ hermit.hww.de (Cray XE6) ============================ Here you find information about the system ``_. .. note:: These instructions are up-to-date as of November 18th 2013. Scalable Python =============== As the Hermit system is intedend for simulations with thousands of CPU cores, a special Python interpreter is used here. The scalable Python reduces the import time by performing all import related I/O operations with a single CPU core and uses MPI for broadcasting the data. As a limitation, all the MPI tasks have to perform the same **import** statements. As HLRS does not allow general internet access on compute system, e.g. version control repositories cannot be accessed directly (it is possible to setup ssh tunnel for some services). Here, we download the scalable Python first to to a local machine and use then scp for copying it to Hermit:: git clone git@gitorious.org:scalable-python/scalable-python.git scalable-python-src scp -r scalable-python-src username@hermit1.hww.de: We will build scalable Python with GNU compilers (other compilers can be used for actual GPAW build), so start by changing the default programming environment on Hermit:: module swap PrgEnv-cray PrgEnv-gnu Due to cross-compile environment in Cray XE6, a normal Python interpreter is build for the front-end nodes and the MPI-enabled one for the compute nodes. The build can be accomplished by the following ``build_gcc`` script .. literalinclude:: build_gcc.sh Python packages can now be built on the front-end node with ``/some_path/scalable-python-gcc/bin/python``. NumPy ===== As the performance of the HOME filesystem is not very good, we install all the other components than the pure Python to a disk within the workspace mechanism of HLRS (with disadvantage that the workspaces expire and have to be manually reallocated). Otherwise, no special tricks are needed for installing NumPy:: /some_path/scalable-python-gcc/bin/python setup.py install --home=/path_in_workspace GPAW ==== On Hermit, Intel compiler together with ACML library seemed to give best performance for GPAW, in addition HDF5 will be used for parallel I/O. Thus, load the followgin modules:: module swap PrgEnv-gnu PrgEnv-intel module load acml module load hdf5-parallel The compilation is relatively straightforward, however, as we build NumPy for compute nodes it does not work in front-end, and one has to specify NumPy include dirs in ``customize.py`` and provide ``--ignore-numpy`` flag when building. The system NumPy headers seem to work fine, but safer option is to use headers of own NumPy installation .. literalinclude:: customize_hermit.py Buid with:: /some_path/scalable-python-gcc/bin/python setup.py install --home=/path_in_workspace --ignore-numpy Creating a module ================= Users can define their own modules for making it easier to setup environment variables. First, create (or edit) a file ``.modulerc`` in the home directory:: #%Module1.0 ## append-path MODULEPATH $HOME/modules Now, custom modules can be put to the ``modules`` directory, e.g. GPAW module in file ``modules/gpaw``:: #%Module1.0 set prog_root "/root_folder_of_gpaw_installation" # Change PrgEnv to intel set confl_prgenvs [list "PrgEnv-cray" "PrgEnv-gnu"] foreach prgenv $confl_prgenvs { if { [ is-loaded $prgenv] } { module swap $prgenv PrgEnv-intel } } setenv GPAW_SETUP_PATH "$prog_root/gpaw-setups-0.8.7929" prepend-path PYTHONPATH "$prog_root/lib/python" prepend-path PATH "$prog_root/bin/" Now, GPAW paths can be set as:: module load gpaw gpaw-24.1.0/doc/platforms/Cray/jaguar.rst000066400000000000000000000205261454550013000202130ustar00rootroot00000000000000.. _jaguar: ================== jaguar (Cray XT5) ================== Here you find information about the system http://www.nccs.gov/computing-resources/jaguar/. The current operating system in Cray XT4/XT5 compute nodes, Compute Linux Environment (CLE) has some limitations, most notably it does not support shared libraries. In order to use python in CLE some modifications to the standard python are needed. Instructions below assume **tcsh**. The installations process of python and numpy can be performed with the script :download:`make_python_numpy`:: ./make_python_numpy |& tee all.log whose details are given below. **Note**: One may want to change the installation paths in the beginning of *make_python_numpy*. Set the correct C compiler and flags, e.g.:: module swap PrgEnv-pgi PrgEnv-gnu setenv CC cc setenv CXX CC setenv OPT '-O3 -funroll-all-loops' The following modules are loaded:: module avail Currently Loaded Modulefiles: 1) modules/3.1.6 2) DefApps 3) torque/2.2.0-snap.200707311754 4) moab/5.2.4 5) xtpe-quadcore 6) MySQL/5.0.45 7) xt-service/2.1.50HD 8) xt-libc/2.1.50HD 9) xt-os/2.1.50HD 10) xt-boot/2.1.50HD 11) xt-lustre-ss/2.1.50HD.PS04.lus.1.6.5.steve.8103_1.6.5 12) xtpe-target-cnl 13) Base-opts/2.1.50HD 14) PrgEnv-gnu/2.1.50HD 15) xt-asyncpe/2.3 16) xt-pe/2.1.50HD 17) xt-mpt/3.1.0 18) xt-libsci/10.3.1 19) fftw/3.1.1 20) gcc/4.2.0.quadcore The recommended place for user's applications is under ``$HOME``:: cd mkdir -p sw/xt5 cd sw/xt5 set sw_home=~/sw/xt5 wget http://www.python.org/ftp/python/2.5.4/Python-2.5.4.tar.bz2 wget http://sunet.dl.sourceforge.net/sourceforge/expat/expat-2.0.1.tar.gz wget http://www.zlib.net/zlib-1.2.3.tar.bz2 tar jxf Python-2.5.4.tar.bz2 tar zxf expat-2.0.1.tar.gz tar jxf zlib-1.2.3.tar.bz2 wget http://python-nose.googlecode.com/files/nose-0.11.0.tar.gz tar zxf nose-0.11.0.tar.gz wget http://dfn.dl.sourceforge.net/sourceforge/numpy/numpy-1.2.1.tar.gz tar zxf numpy-1.2.1.tar.gz Before installing a special python, expat_ and zlib_ which are needed by GPAW, but which are not included in the python distribution. The installation is based on instructions from http://yt.enzotools.org/wiki/CrayXT5Installation. .. _expat: http://expat.sourceforge.net/ .. _zlib: http://www.zlib.net/ Install expat:: cd ${sw_home} setenv EXPAT_DIR ${sw_home}/expat-2.0.1-1 cd expat-2.0.1 ./configure --disable-shared --prefix=${EXPAT_DIR} make make install Install zlib:: cd ${sw_home} setenv ZLIB_DIR ${sw_home}/zlib-1.2.3-1 cd zlib-1.2.3 ./configure --prefix=${ZLIB_DIR} make # ignore error: /usr/lib/../lib64/libc.a: could not read symbols: Bad value make install Next, one can proceed with the actual python installation. The following instructions are tested with python 2.5.4: - enter the python source directory:: cd ${sw_home} setenv PYTHON_DIR ${sw_home}/Python-2.5.4-1 cd Python-2.5.4 - create a special dynamic loader for correct resolution of namespaces:: wget --no-check-certificate http://svn.fysik.dtu.dk/projects/gpaw/trunk/doc/platforms/Cray/dynload_redstorm.c -O Python/dynload_jaguar.c - run :file:`configure`:: ./configure --prefix=${PYTHON_DIR} SO=.a DYNLOADFILE=dynload_jaguar.o MACHDEP=jaguar --host=x86_64-unknown-linux-gnu --disable-sockets --disable-ssl --enable-static --disable-shared | tee config.log - in order to use ``distutils`` append the :file:`Lib/distutils/unixccompiler.py` file, so that static libraries are created instead of shared ones:: wget --no-check-certificate http://svn.fysik.dtu.dk/projects/gpaw/trunk/doc/platforms/Cray/linkforshared.py cat Lib/distutils/unixccompiler.py linkforshared.py > unixccompiler.py mv unixccompiler.py Lib/distutils - specify which modules will be statically linked in to the python interpreter by editing :file:`Modules/Setup`:: mv Modules/Setup Modules/Setup.orig wget --no-check-certificate http://svn.fysik.dtu.dk/projects/gpaw/trunk/doc/platforms/Cray/Setup_jaguar -O Modules/Setup touch Modules/Setup **Note**: sha modules are required by numpy, so the following lines should be present in Modules/Setup:: _sha shamodule.c _sha256 sha256module.c _sha512 sha512module.c - modify :file:`Lib/locale.py` as described at ``_ (is it really needed?), - build and install:: make | tee make.log # ignore errors like: # *** WARNING: renaming "_ctypes" since importing it failed: dynamic module does not define init function (init_ctypes) make install | tee make_install.log - build numpy:: cd ${sw_home} cd numpy-1.2.1 ${PYTHON_DIR}/bin/python setup.py install | tee install.log **Note**: numpy 1.3.0 gives:: # ImportError: No module named select - append numpy to pythons's :file:`Modules/Setup`:: cd ${sw_home}/Python-2.5.4 cat ../numpy-1.2.1/install.log | grep Append | cut -d ":" -f 2 | sed -n 's/ *//p' > append cat Modules/Setup append > Setup mv Setup Modules example output:: cat append multiarray /autofs/na1_home/farberow/sw/xt5/numpy-1.2.1/build/lib.linux-x86_64-2.5/numpy/core/multiarray.a umath /autofs/na1_home/farberow/sw/xt5/numpy-1.2.1/build/lib.linux-x86_64-2.5/numpy/core/umath.a _sort /autofs/na1_home/farberow/sw/xt5/numpy-1.2.1/build/lib.linux-x86_64-2.5/numpy/core/_sort.a scalarmath /autofs/na1_home/farberow/sw/xt5/numpy-1.2.1/build/lib.linux-x86_64-2.5/numpy/core/scalarmath.a _compiled_base /autofs/na1_home/farberow/sw/xt5/numpy-1.2.1/build/lib.linux-x86_64-2.5/numpy/lib/_compiled_base.a _capi /autofs/na1_home/farberow/sw/xt5/numpy-1.2.1/build/lib.linux-x86_64-2.5/numpy/numarray/_capi.a fftpack_lite /autofs/na1_home/farberow/sw/xt5/numpy-1.2.1/build/lib.linux-x86_64-2.5/numpy/fft/fftpack_lite.a lapack_lite /autofs/na1_home/farberow/sw/xt5/numpy-1.2.1/build/lib.linux-x86_64-2.5/numpy/linalg/lapack_lite.a mtrand /autofs/na1_home/farberow/sw/xt5/numpy-1.2.1/build/lib.linux-x86_64-2.5/numpy/random/mtrand.a - rebuild python:: make | tee make2.log make install | tee make_install2.log On jaguar only */tmp/work/$USER* filesystem is available for batch jobs. **Note**: that this space is cleaning periodically http://www.nccs.gov/computing-resources/jaguar/file-systems/. Test python/numpy:: cp -r ${PYTHON_DIR} /tmp/work/$USER cp -r ${sw_home}/nose-0.11.0 /tmp/work/$USER cd /tmp/work/$USER cat < ./numpyTest.py import numpy from numpy.core.multiarray import dot b = numpy.ones(13, numpy.complex) d = dot(b, b) print('Hello') numpy.test() EOF cat < ./numpyTest.pbs #!/bin/bash #PBS -l walltime=00:10:00,size=8 #PBS -N numpyTest #PBS -A XXXXXX #PBS -j oe export PYTHONHOME=/tmp/work/$USER/Python-2.5.4-1 export PYTHONPATH=/tmp/work/$USER/nose-0.11.0 cd /tmp/work/$USER env | grep PYTHON env | grep LD_LIBRARY_PATH aprun -n1 ${PYTHONHOME}/bin/python -v ./numpyTest.py EOF qsub numpyTest.pbs Install ase/gpaw-setups (**Note**: use the latest releases):: cd ${sw_home} wget --no-check-certificate https://wiki.fysik.dtu.dk/ase-files/python-ase-3.1.0.846.tar.gz tar zxf python-ase-3.1.0.846.tar.gz wget --no-check-certificate http://wiki.fysik.dtu.dk/gpaw-files/gpaw-setups-0.5.3574.tar.gz tar zxf gpaw-setups-0.5.3574.tar.gz cp -r python-ase-3.1.0.846 gpaw-setups-0.5.3574 /tmp/work/$USER cd /tmp/work/$USER ln -s python-ase-3.1.0.846 ase Install gpaw (**Note**: instructions valid from the **5232** release):: cd ${sw_home} wget --no-check-certificate https://wiki.fysik.dtu.dk/gpaw/gpaw-0.7.5232.tar.gz tar zxf gpaw-0.7.5232.tar.gz cd gpaw-0.7.5232 wget --no-check-certificate http://svn.fysik.dtu.dk/projects/gpaw/trunk/doc/platforms/Cray/customize_jaguar.py -O customize.py ${PYTHON_DIR}/bin/python setup.py build_ext | tee build_ext.log cp -r ${sw_home}/gpaw-0.7.5232 /tmp/work/$USER cd /tmp/work/$USER ln -s gpaw-0.7.5232 gpaw Test gpaw:: cd /tmp/work/$USER cat < ./gpawTest.pbs #!/bin/bash #PBS -l walltime=00:40:00,size=8 #PBS -N gpawTest #PBS -A XXXXXX #PBS -j oe export PYTHONHOME=/tmp/work/$USER/Python-2.5.4-1 export GPAW_SETUP_PATH=/tmp/work/$USER/gpaw-setups-0.5.3574 export PYTHONPATH=/tmp/work/$USER/gpaw:/tmp/work/$USER/ase cd /tmp/work/$USER/gpaw/gpaw/test env | grep PYTHON env | grep LD_LIBRARY_PATH aprun -n4 /tmp/work/$USER/gpaw/build/bin.linux-x86_64-2.5/gpaw-python -v ./test.py EOF qsub gpawTest.pbs gpaw-24.1.0/doc/platforms/Cray/linkforshared.py000066400000000000000000000023151454550013000214110ustar00rootroot00000000000000def link_shared_object (self, objects, output_filename, output_dir=None, libraries=None, library_dirs=None, runtime_library_dirs=None, export_symbols=None, debug=0, extra_preargs=None, extra_postargs=None, build_temp=None, target_lang=None): if output_dir is None: (output_dir, output_filename) = os.path.split(output_filename) output_fullname = os.path.join(output_dir, output_filename) output_fullname = os.path.abspath(output_fullname) linkline = f"{output_filename[:-2]} {output_fullname}" for l in library_dirs: linkline += " -L" + l for l in libraries: linkline += " -l" + l old_fmt = self.static_lib_format self.static_lib_format = "%s%.0s" self.create_static_lib(objects, output_filename, output_dir, debug, target_lang) self.static_lib_format = old_fmt print("Append to Setup: ", linkline) gpaw-24.1.0/doc/platforms/Cray/louhi.rst000066400000000000000000000070111454550013000200540ustar00rootroot00000000000000.. _louhi: ============================ louhi.csc.fi (Cray XT4/XT5) ============================ Here you find information about the system ``_. .. note:: These instructions are up-to-date as of August 28th 2012. GPAW ==== The recent operating system releases for Cray XT4/5 (CLE 2.2 UP01 and later) supports dynamic libraries which simplifies GPAW installation significantly. These instructions for GPAW installation use Python 2.6.5 compiled with GNU compiler suite, see the end of this page for instructions for compiling Python. First, load the Python module and set ``XTPE_LINK_TYPE`` environment variable for dynamic linking:: module load python module load hdf5-parallel setenv XTPE_LINK_TYPE dynamic GPAW can now be build with a minimal ``customize.py`` .. literalinclude:: customize_louhi.py Currently, there is a small bug in the Cray libraries which results in failure when trying to build the ``_gpaw.so`` library. As the library is not really needed in parallel calculations, the problem can be circumvented by creating the file after running the setup.py script:: python setup.py build_ext ... touch build/lib.linux-x86_64-2.6/_gpaw.so python setup.py install --home=... Python and Numpy ================ Python can be compiled with PGI compiler as follows:: setenv XTPE_LINK_TYPE dynamic ./configure --prefix=path_to_install CC=cc CXX=cc OPT=-fastsse LINKFORSHARED=-Wl,--export-dynamic make install In order to use optimized BLAS with Numpy one has to first build a CBLAS which is linked with Cray's optimized BLAS routines. First, download the CBLAS source from netlib:: wget http://www.netlib.org/blas/blast-forum/cblas.tgz tar -xzf cblas.tgz Change to the CBLAS directory and copy ``Makefile.LINUX`` to ``Makefile.in``. Add correct compiler commands and paths to ``Makefile.in``:: ... PLAT = louhi #----------------------------------------------------------------------------- # Libraries and includs #----------------------------------------------------------------------------- BLLIB = CBDIR = $(HOME)/CBLAS CBLIBDIR = $(CBDIR)/lib/$(PLAT) CBLIB = $(CBLIBDIR)/libcblas.a #----------------------------------------------------------------------------- # Compilers #----------------------------------------------------------------------------- CC = cc FC = ftn LOADER = $(FC) #----------------------------------------------------------------------------- # Flags for Compilers #----------------------------------------------------------------------------- CFLAGS = -O3 -DADD_ -fPIC FFLAGS = -O3 -fPIC ... Finally, build CBLAS:: make alllib You are now ready to build Numpy with the newly created CBLAS library. The standard Numpy tries to use only the ATLAS BLAS, and in order to use different BLAS one has to manually edit the file ``numpy/core/setup.py``. Comment out an if statement as follows:: def get_dotblas_sources(ext, build_dir): if blas_info: # if ('NO_ATLAS_INFO',1) in blas_info.get('define_macros',[]): # return None # dotblas needs ATLAS, Fortran compiled blas will not be sufficient. return ext.depends[:1] Then, add the correct libraries and paths to the file ``site.cfg``:: [blas] blas_libs = cblas library_dirs = /home/csc/jenkovaa/CBLAS/lib/louhi [lapack] lapack_libs = sci library_dirs = /opt/xt-libsci/10.3.8/pgi/lib Now, one should be able to build Numpy as usual:: python setup.py install gpaw-24.1.0/doc/platforms/Cray/lumi.rst000066400000000000000000000040031454550013000177000ustar00rootroot00000000000000.. _lumi: ================================= The ``lumi.csc.fi`` supercomputer ================================= .. note:: These instructions are up-to-date as of September 2023. It is recommended to perform installation under the ``/projappl/project_...`` directory (see `LUMI user documentation `_). A separate installation is needed for LUMI-C and LUMI-G. GPAW for LUMI-G =============== Load the following modules: .. code-block:: bash export EBU_USER_PREFIX=/scratch/project_465000538/GPAW/EasyBuild module load LUMI/22.12 partition/G module load cpeGNU/22.12 module load craype-accel-amd-gfx90a module load rocm/5.2.3 module load cray-python/3.9.13.1 module load cray-fftw/3.3.10.1 module load ASE/3.22.1-cpeGNU-22.12 module load CuPy/12.2.0-cpeGNU-22.12 module load ELPA/2023.05.001-cpeGNU-22.12-GPU module load libxc/6.2.2-cpeGNU-22.12 Create a virtual environment and activate it:: python3 -m venv venv source venv/bin/activate Clone the GPAW source code:: git clone git@gitlab.com:gpaw/gpaw Copy this :git:`~doc/platforms/Cray/siteconfig-lumi-gpu.py` to ``gpaw/siteconfig.py`` and compile the C-code and the GPU kernels with:: pip install -v -e gpaw/ Now insert the ``export EBU_USER_PREFIX=...`` line and all the ``module load`` lines from above into the start of your ``venv/bin/activate`` script so that the modules are always loaded when you activate your new environment. Interactive jobs can be run like this:: srun -A project_465000538 -p small-g --nodes=1 --ntasks-per-node=2 --gpus-per-node=1 -t 0:30:00 --pty bash To use Omnitrace, source this file???:: source /scratch/project_465000538/GPAW/omnitrace-1.10.2-opensuse-15.4-ROCm-50200-PAPI-OMPT-Python3/share/omnitrace/setup-env.sh Configuring MyQueue =================== Use the following MyQueue_ :file:`config.py` file: .. literalinclude:: config.py and submit jobs like this:: mq submit job.py -R 128:standard:2h .. _MyQueue: https://myqueue.readthedocs.io/en/latest/ gpaw-24.1.0/doc/platforms/Cray/make_python_numpy000077500000000000000000000045651454550013000217110ustar00rootroot00000000000000#!/bin/csh module swap PrgEnv-pgi PrgEnv-gnu setenv CC cc setenv CXX CC setenv OPT '-O3 -funroll-all-loops' # edit the installation paths if necessary mkdir -p sw/xt5 cd sw/xt5 set sw_home=`pwd` setenv EXPAT_DIR ${sw_home}/expat-2.0.1-1 setenv ZLIB_DIR ${sw_home}/zlib-1.2.3-1 setenv PYTHON_DIR ${sw_home}/Python-2.5.4-1 wget http://www.python.org/ftp/python/2.5.4/Python-2.5.4.tar.bz2 wget http://sunet.dl.sourceforge.net/sourceforge/expat/expat-2.0.1.tar.gz wget http://www.zlib.net/zlib-1.2.3.tar.bz2 tar jxf Python-2.5.4.tar.bz2 tar zxf expat-2.0.1.tar.gz tar jxf zlib-1.2.3.tar.bz2 wget http://python-nose.googlecode.com/files/nose-0.11.0.tar.gz tar zxf nose-0.11.0.tar.gz wget http://dfn.dl.sourceforge.net/sourceforge/numpy/numpy-1.2.1.tar.gz tar zxf numpy-1.2.1.tar.gz cd ${sw_home} echo "Making Expat" cd expat-2.0.1 ./configure --disable-shared --prefix=${EXPAT_DIR} make make install cd ${sw_home} echo "Making Zlib" cd zlib-1.2.3 ./configure --prefix=${ZLIB_DIR} make # ignore error: /usr/lib/../lib64/libc.a: could not read symbols: Bad value make install cd ${sw_home} echo "Making Python" cd Python-2.5.4 wget --no-check-certificate http://svn.fysik.dtu.dk/projects/gpaw/trunk/doc/install/Cray/dynload_redstorm.c -O Python/dynload_jaguar.c ./configure --prefix=${PYTHON_DIR} SO=.a DYNLOADFILE=dynload_jaguar.o MACHDEP=jaguar --host=x86_64-unknown-linux-gnu --disable-sockets --disable-ssl --enable-static --disable-shared | tee config.log wget --no-check-certificate http://svn.fysik.dtu.dk/projects/gpaw/trunk/doc/install/Cray/linkforshared.py cat Lib/distutils/unixccompiler.py linkforshared.py > unixccompiler.py mv -f unixccompiler.py Lib/distutils mv -f Modules/Setup Modules/Setup.orig wget --no-check-certificate http://svn.fysik.dtu.dk/projects/gpaw/trunk/doc/install/Cray/Setup_jaguar -O Modules/Setup touch Modules/Setup make | tee make.log # ignore errors like: # *** WARNING: renaming "_ctypes" since importing it failed: dynamic module does not define init function (init_ctypes) make install | tee make_install.log cd ${sw_home} echo "Making Numpy" cd numpy-1.2.1 ${PYTHON_DIR}/bin/python setup.py install | tee install.log cd ${sw_home}/Python-2.5.4 echo "Remaking Python" cat ../numpy-1.2.1/install.log | grep Append | cut -d ":" -f 2 | sed -n 's/ *//p' > append cat Modules/Setup append > Setup mv -f Setup Modules make | tee make2.log make install | tee make_install2.log gpaw-24.1.0/doc/platforms/Cray/nersc_cori.rst000066400000000000000000000030031454550013000210570ustar00rootroot00000000000000.. _nersc_cori: ===================== cori.nersc.gov (XC40) ===================== .. note:: These instructions are up-to-date as of April 2021. GPAW ==== At NERSC it is recommened to install GPAW on Cori with Anaconda python. For massivly parallel applications it is recommened to use `Shifter `_. GPAW can be built with a minimal ``siteconfig.py`` .. literalinclude:: customize_nersc_cori.py Load the GNU programming environment and set Cray environment for dynamic linking:: export CRAYPE_LINK_TYPE=dynamic module swap PrgEnv-${PE_ENV,,} PrgEnv-gnu module load python Create a conda environment for gpaw:: conda create --name gpaw python=3.8 pip numpy scipy matplotlib Install ASE with pip while the Anaconda python module is loaded:: source activate gpaw pip install ase Build and install GPAW:: python setup.py build_ext python setup.py install To setup the environment:: module swap PrgEnv-${PE_ENV,,} PrgEnv-gnu module load python source activate gpaw export OMP_NUM_THREADS=1 Then the test suite can be run from a batch script or interactive session with:: export MKL_CBWR="AVX" srun -n 8 -c 2 --cpu_bind=cores gpaw-python -m gpaw test .. note:: For all tests to pass enable MKL's conditional Numerical Reproducibility mode with the `MKL_CBWR` environment variable. .. note:: To run the test suite you will first need the GPAW setups installed, as described at :ref:`installation of paw datasets`. gpaw-24.1.0/doc/platforms/Cray/sisu.rst000066400000000000000000000046421454550013000177260ustar00rootroot00000000000000.. _sisu: ============================ sisu.csc.fi (Cray XC30) ============================ .. note:: These instructions are up-to-date as of July 2014. GPAW ==== These instructions for GPAW installation use the Scalable Python interpreter which reduces drastically the import time in massively parallel calculations. See the end of this document for installation instructions for Scalable Python. First, the serial version of code is built with serial HDF5 library, e.g. for analysis purposes in the front-end:: module load scalable-python module load cray-hdf5 GPAW can be build with a minimal ``customize.py`` (edit the correct paths for libxc) .. literalinclude:: customize_sisu.py Then build the code (no installation at this point yet) with the setup.py script:: python setup.py build_ext The build of parallel gpaw-python interpreter fails at this point with an error like:: build/temp.linux-x86_64-2.6/c/hdf5.o: In function 'h5p_set_fapl_mpio': hdf5.c:(.text+0x1bad): undefined reference to 'H5Pset_fapl_mpio' Next, switch to the parallel version of HDF5 library and do build and install:: module switch cray-hdf5 cray-hdf5-parallel python setup.py install --home=path_to_install_prefix LibXC ===== Download libxc:: wget http://www.tddft.org/programs/octopus/down.php?file=libxc/libxc-2.2.0.tar.gz Configure and make (use GNU environment):: ./configure --prefix=install_prefix CC=cc CFLAGS=-fPIC make make install Scalable Python =============== Standard Python interpreter has serious bottleneck in large scale parallel calculations when many MPI tasks perform the same I/O operations during the import statetements. Scalable Python ``_ reduces the import time by having only single MPI task to perform import related I/O operations and using then MPI for broadcasting the data. First, download the source code and switch to GNU environment:: git clone git@gitorious.org:scalable-python/scalable-python.git module switch PrgEnv-cray PrgEnv-gnu Use the following build script (change the installation prefix to a proper one): .. literalinclude:: build_scalable_python.sh Add then ``install_prefix/bin`` to your PATH, and download and install NumPy:: export PATH=install_prefix/bin:$PATH wget http://sourceforge.net/projects/numpy/files/NumPy/1.8.1/numpy-1.8.1.tar.gz tar xzf numpy-1.8.1.tar.gz cd numpy-1.8.1 python setup.py install gpaw-24.1.0/doc/platforms/Cray/siteconfig-lumi-gpu.py000066400000000000000000000015311454550013000224440ustar00rootroot00000000000000"""Custom GPAW siteconfig for LUMI-G.""" mpi = True compiler = 'cc' libraries = [] library_dirs = [] include_dirs = [] extra_compile_args = [ '-O3', '-march=native', '-mtune=native', '-mavx2', '-fopenmp', '-fPIC', '-Wall', '-Wno-stringop-overflow', # suppress warnings from MPI_STATUSES_IGNORE '-DNDEBUG', '-g'] extra_link_args = ['-fopenmp'] # hip gpu = True gpu_target = 'hip-amd' gpu_compiler = 'hipcc' gpu_include_dirs = [] gpu_compile_args = ['--offload-arch=gfx90a', '-O3', '-g'] libraries += ['amdhip64', 'hipblas'] # define_macros += [('GPAW_GPU_AWARE_MPI', None)] # ELPA elpa = True libraries += ['elpa'] # FFTW fftw = True libraries += ['fftw3'] # ScaLAPACK # Note: required libraries are linked by compiler wrappers scalapack = True # Libxc libraries += ['xc'] define_macros += [('GPAW_ASYNC', 1)] gpaw-24.1.0/doc/platforms/Cray/unixccompiler.py000066400000000000000000000344601454550013000214450ustar00rootroot00000000000000"""distutils.unixccompiler Contains the UnixCCompiler class, a subclass of CCompiler that handles the "typical" Unix-style command-line C compiler: * macros defined with -Dname[=value] * macros undefined with -Uname * include search directories specified with -Idir * libraries specified with -lllib * library search directories specified with -Ldir * compile handled by 'cc' (or similar) executable with -c option: compiles .c to .o * link static library handled by 'ar' command (possibly with 'ranlib') * link shared library handled by 'cc -shared' """ __revision__ = "$Id: unixccompiler.py 54954 2007-04-25 06:42:41Z neal.norwitz $" import os, sys from types import StringType, NoneType from copy import copy from distutils import sysconfig from distutils.dep_util import newer from distutils.ccompiler import \ CCompiler, gen_preprocess_options, gen_lib_options from distutils.errors import \ DistutilsExecError, CompileError, LibError, LinkError from distutils import log # XXX Things not currently handled: # * optimization/debug/warning flags; we just use whatever's in Python's # Makefile and live with it. Is this adequate? If not, we might # have to have a bunch of subclasses GNUCCompiler, SGICCompiler, # SunCCompiler, and I suspect down that road lies madness. # * even if we don't know a warning flag from an optimization flag, # we need some way for outsiders to feed preprocessor/compiler/linker # flags in to us -- eg. a sysadmin might want to mandate certain flags # via a site config file, or a user might want to set something for # compiling this module distribution only via the setup.py command # line, whatever. As long as these options come from something on the # current system, they can be as system-dependent as they like, and we # should just happily stuff them into the preprocessor/compiler/linker # options and carry on. def _darwin_compiler_fixup(compiler_so, cc_args): """ This function will strip '-isysroot PATH' and '-arch ARCH' from the compile flags if the user has specified one them in extra_compile_flags. This is needed because '-arch ARCH' adds another architecture to the build, without a way to remove an architecture. Furthermore GCC will barf if multiple '-isysroot' arguments are present. """ stripArch = stripSysroot = 0 compiler_so = list(compiler_so) kernel_version = os.uname()[2] # 8.4.3 major_version = int(kernel_version.split('.')[0]) if major_version < 8: # OSX before 10.4.0, these don't support -arch and -isysroot at # all. stripArch = stripSysroot = True else: stripArch = '-arch' in cc_args stripSysroot = '-isysroot' in cc_args if stripArch: while True: try: index = compiler_so.index('-arch') # Strip this argument and the next one: del compiler_so[index:index+2] except ValueError: break if stripSysroot: try: index = compiler_so.index('-isysroot') # Strip this argument and the next one: del compiler_so[index:index+2] except ValueError: pass # Check if the SDK that is used during compilation actually exists, # the universal build requires the usage of a universal SDK and not all # users have that installed by default. sysroot = None if '-isysroot' in cc_args: idx = cc_args.index('-isysroot') sysroot = cc_args[idx+1] elif '-isysroot' in compiler_so: idx = compiler_so.index('-isysroot') sysroot = compiler_so[idx+1] if sysroot and not os.path.isdir(sysroot): log.warn("Compiling with an SDK that doesn't seem to exist: %s", sysroot) log.warn("Please check your Xcode installation") return compiler_so class UnixCCompiler(CCompiler): compiler_type = 'unix' # These are used by CCompiler in two places: the constructor sets # instance attributes 'preprocessor', 'compiler', etc. from them, and # 'set_executable()' allows any of these to be set. The defaults here # are pretty generic; they will probably have to be set by an outsider # (eg. using information discovered by the sysconfig about building # Python extensions). executables = {'preprocessor' : None, 'compiler' : ["cc"], 'compiler_so' : ["cc"], 'compiler_cxx' : ["cc"], 'linker_so' : ["cc", "-shared"], 'linker_exe' : ["cc"], 'archiver' : ["ar", "-cr"], 'ranlib' : None, } if sys.platform[:6] == "darwin": executables['ranlib'] = ["ranlib"] # Needed for the filename generation methods provided by the base # class, CCompiler. NB. whoever instantiates/uses a particular # UnixCCompiler instance should set 'shared_lib_ext' -- we set a # reasonable common default here, but it's not necessarily used on all # Unices! src_extensions = [".c",".C",".cc",".cxx",".cpp",".m"] obj_extension = ".o" static_lib_extension = ".a" shared_lib_extension = ".so" dylib_lib_extension = ".dylib" static_lib_format = shared_lib_format = dylib_lib_format = "lib%s%s" if sys.platform == "cygwin": exe_extension = ".exe" def preprocess(self, source, output_file=None, macros=None, include_dirs=None, extra_preargs=None, extra_postargs=None): ignore, macros, include_dirs = \ self._fix_compile_args(None, macros, include_dirs) pp_opts = gen_preprocess_options(macros, include_dirs) pp_args = self.preprocessor + pp_opts if output_file: pp_args.extend(['-o', output_file]) if extra_preargs: pp_args[:0] = extra_preargs if extra_postargs: pp_args.extend(extra_postargs) pp_args.append(source) # We need to preprocess: either we're being forced to, or we're # generating output to stdout, or there's a target output file and # the source file is newer than the target (or the target doesn't # exist). if self.force or output_file is None or newer(source, output_file): if output_file: self.mkpath(os.path.dirname(output_file)) try: self.spawn(pp_args) except DistutilsExecError as msg: raise CompileError(msg) def _compile(self, obj, src, ext, cc_args, extra_postargs, pp_opts): compiler_so = self.compiler_so if sys.platform == 'darwin': compiler_so = _darwin_compiler_fixup(compiler_so, cc_args + extra_postargs) try: self.spawn(compiler_so + cc_args + [src, '-o', obj] + extra_postargs) except DistutilsExecError as msg: raise CompileError(msg) def create_static_lib(self, objects, output_libname, output_dir=None, debug=0, target_lang=None): objects, output_dir = self._fix_object_args(objects, output_dir) output_filename = \ self.library_filename(output_libname, output_dir=output_dir) if self._need_link(objects, output_filename): self.mkpath(os.path.dirname(output_filename)) self.spawn(self.archiver + [output_filename] + objects + self.objects) # Not many Unices required ranlib anymore -- SunOS 4.x is, I # think the only major Unix that does. Maybe we need some # platform intelligence here to skip ranlib if it's not # needed -- or maybe Python's configure script took care of # it for us, hence the check for leading colon. if self.ranlib: try: self.spawn(self.ranlib + [output_filename]) except DistutilsExecError as msg: raise LibError(msg) else: log.debug("skipping %s (up-to-date)", output_filename) def link(self, target_desc, objects, output_filename, output_dir=None, libraries=None, library_dirs=None, runtime_library_dirs=None, export_symbols=None, debug=0, extra_preargs=None, extra_postargs=None, build_temp=None, target_lang=None): objects, output_dir = self._fix_object_args(objects, output_dir) libraries, library_dirs, runtime_library_dirs = \ self._fix_lib_args(libraries, library_dirs, runtime_library_dirs) lib_opts = gen_lib_options(self, library_dirs, runtime_library_dirs, libraries) if type(output_dir) not in (StringType, NoneType): raise TypeError("'output_dir' must be a string or None") if output_dir is not None: output_filename = os.path.join(output_dir, output_filename) if self._need_link(objects, output_filename): ld_args = (objects + self.objects + lib_opts + ['-o', output_filename]) if debug: ld_args[:0] = ['-g'] if extra_preargs: ld_args[:0] = extra_preargs if extra_postargs: ld_args.extend(extra_postargs) self.mkpath(os.path.dirname(output_filename)) try: if target_desc == CCompiler.EXECUTABLE: linker = self.linker_exe[:] else: linker = self.linker_so[:] if target_lang == "c++" and self.compiler_cxx: # skip over environment variable settings if /usr/bin/env # is used to set up the linker's environment. # This is needed on OSX. Note: this assumes that the # normal and C++ compiler have the same environment # settings. i = 0 if os.path.basename(linker[0]) == "env": i = 1 while '=' in linker[i]: i = i + 1 linker[i] = self.compiler_cxx[i] if sys.platform == 'darwin': linker = _darwin_compiler_fixup(linker, ld_args) self.spawn(linker + ld_args) except DistutilsExecError as msg: raise LinkError(msg) else: log.debug("skipping %s (up-to-date)", output_filename) # -- Miscellaneous methods ----------------------------------------- # These are all used by the 'gen_lib_options() function, in # ccompiler.py. def library_dir_option(self, dir): return "-L" + dir def runtime_library_dir_option(self, dir): # XXX Hackish, at the very least. See Python bug #445902: # sourceforge.net/tracker/index.php # ?func=detail&aid=445902&group_id=5470&atid=105470 # Linkers on different platforms need different options to # specify that directories need to be added to the list of # directories searched for dependencies when a dynamic library # is sought. GCC has to be told to pass the -R option through # to the linker, whereas other compilers just know this. # Other compilers may need something slightly different. At # this time, there's no way to determine this information from # the configuration data stored in the Python installation, so # we use this hack. compiler = os.path.basename(sysconfig.get_config_var("CC")) if sys.platform[:6] == "darwin": # MacOSX's linker doesn't understand the -R flag at all return "-L" + dir elif sys.platform[:5] == "hp-ux": return "+s -L" + dir elif sys.platform[:7] == "irix646" or sys.platform[:6] == "osf1V5": return ["-rpath", dir] elif compiler[:3] == "gcc" or compiler[:3] == "g++": return "-Wl,-R" + dir else: return "-R" + dir def library_option(self, lib): return "-l" + lib def find_library_file(self, dirs, lib, debug=0): shared_f = self.library_filename(lib, lib_type='shared') dylib_f = self.library_filename(lib, lib_type='dylib') static_f = self.library_filename(lib, lib_type='static') for dir in dirs: shared = os.path.join(dir, shared_f) dylib = os.path.join(dir, dylib_f) static = os.path.join(dir, static_f) # We're second-guessing the linker here, with not much hard # data to go on: GCC seems to prefer the shared library, so I'm # assuming that *all* Unix C compilers do. And of course I'm # ignoring even GCC's "-static" option. So sue me. if os.path.exists(dylib): return dylib elif os.path.exists(shared): return shared elif os.path.exists(static): return static # Oops, didn't find it in *any* of 'dirs' return None def link_shared_object (self, objects, output_filename, output_dir=None, libraries=None, library_dirs=None, runtime_library_dirs=None, export_symbols=None, debug=0, extra_preargs=None, extra_postargs=None, build_temp=None, target_lang=None): if output_dir is None: (output_dir, output_filename) = os.path.split(output_filename) output_fullname = os.path.join(output_dir, output_filename) output_fullname = os.path.abspath(output_fullname) linkline = f"{output_filename[:-2]} {output_fullname}" for l in library_dirs: linkline += " -L" + l for l in libraries: linkline += " -l" + l old_fmt = self.static_lib_format self.static_lib_format = "%s%.0s" self.create_static_lib(objects, output_filename, output_dir, debug, target_lang) self.static_lib_format = old_fmt print("Append to Setup: ", linkline) gpaw-24.1.0/doc/platforms/HP/000077500000000000000000000000001454550013000156145ustar00rootroot00000000000000gpaw-24.1.0/doc/platforms/HP/xc.rst000066400000000000000000000006701454550013000167630ustar00rootroot00000000000000======================= xc2.rz.uni-karlsruhe.de ======================= Here you find information about the system ``_. The installation works using ``gcc``:: module load gcc/4.1.2/default and numpy-1.0.4 (site.cfg was not used). The installation of gpaw requires to modify customize.py to:: libraries = ['acml', 'gfortran'] library_dirs = ['/software/all/acml/acml4.0/gfortran64/lib'] gpaw-24.1.0/doc/platforms/Linux/000077500000000000000000000000001454550013000164045ustar00rootroot00000000000000gpaw-24.1.0/doc/platforms/Linux/EasyBuild/000077500000000000000000000000001454550013000202655ustar00rootroot00000000000000gpaw-24.1.0/doc/platforms/Linux/EasyBuild/config_foss.py000066400000000000000000000030271454550013000231400ustar00rootroot00000000000000# Convert static library specs from EasyBuild to GPAW def static_eblibs_to_gpawlibs(lib_specs): return [libfile[3:-2] for libfile in os.getenv(lib_specs).split(',')] # Clean out any autodetected things, we only want the EasyBuild # definitions to be used. libraries = [] include_dirs = [] # Use EasyBuild fftw from the active toolchain fftw = os.getenv('FFT_STATIC_LIBS') if fftw: libraries += static_eblibs_to_gpawlibs('FFT_STATIC_LIBS') # Use ScaLAPACK from the active toolchain scalapack = os.getenv('SCALAPACK_STATIC_LIBS') if scalapack: libraries += static_eblibs_to_gpawlibs('SCALAPACK_STATIC_LIBS') # Add EasyBuild LAPACK/BLAS libs libraries += static_eblibs_to_gpawlibs('LAPACK_STATIC_LIBS') libraries += static_eblibs_to_gpawlibs('BLAS_STATIC_LIBS') # LibXC: # Use EasyBuild libxc libxc = os.getenv('EBROOTLIBXC') if libxc: include_dirs.append(os.path.join(libxc, 'include')) libraries.append('xc') # libvdwxc: # Use EasyBuild libvdwxc libvdwxc = os.getenv('EBROOTLIBVDWXC') if libvdwxc: include_dirs.append(os.path.join(libvdwxc, 'include')) libraries.append('vdwxc') # ELPA: # Use EasyBuild ELPA if loaded elpa = os.getenv('EBROOTELPA') if elpa: libraries += ['elpa'] elpaversion = os.path.basename(elpa).split('-')[0] library_dirs.append(os.path.join(elpa, 'lib')) extra_link_args += [f'-Wl,-rpath={elpa}/lib'] include_dirs.append(os.path.join(elpa, 'include', 'elpa-'+elpaversion)) # Now add a EasyBuild "cover-all-bases" library_dirs library_dirs = os.getenv('LD_LIBRARY_PATH').split(':') gpaw-24.1.0/doc/platforms/Linux/EasyBuild/config_intel.py000066400000000000000000000023331454550013000233000ustar00rootroot00000000000000mpicompiler = 'mpiicc' # FFTW should be configured from environment variables, but they do # not report the correct names for a dynamically loaded library. fftw = True # Use Intel MKL libraries += ['mkl_sequential','mkl_core', 'fftw3xc_intel_pic', 'mkl_rt', ] # Use EasyBuild scalapack from the active toolchain scalapack = True libraries += ['mkl_scalapack_lp64', 'mkl_blacs_intelmpi_lp64'] # Use EasyBuild libxc libxc = os.getenv('EBROOTLIBXC') include_dirs.append(os.path.join(libxc, 'include')) # libvdwxc: # Use EasyBuild libvdwxc # NOTE: This currenlty does not work together with the Intel MKL, so # the easyconfig files does not load libvdwxc libvdwxc = os.getenv('EBROOTLIBVDWXC') if libvdwxc: include_dirs.append(os.path.join(libvdwxc, 'include')) libraries.append('vdwxc') # ELPA: # Use EasyBuild ELPA if loaded elpa = os.getenv('EBROOTELPA') if elpa: libraries += ['elpa'] elpaversion = os.path.basename(elpa).split('-')[0] library_dirs.append(os.path.join(elpa, 'lib')) extra_link_args += [f'-Wl,-rpath={elpa}/lib'] include_dirs.append(os.path.join(elpa, 'include', 'elpa-'+elpaversion)) # Now add a EasyBuild "cover-all-bases" library_dirs library_dirs = os.getenv('LD_LIBRARY_PATH').split(':') gpaw-24.1.0/doc/platforms/Linux/Fedora.rst000066400000000000000000000014131454550013000203350ustar00rootroot00000000000000.. _Fedora: ====== Fedora ====== Here you find information about the system ``_. System wide installation with yum ================================= **Warning**: this section is outdated! The steps described below require root access and assume bash shell: - `configure fys yum repository `_ - on Fedora 12 or newer i386 or x86_64, as root: - install gpaw and dependencies:: yum -y install --enablerepo=fys_fc campos-gpaw - install optional packages:: yum -y install scipy ScientificPython .. note:: There are no new releases of fys packages for "Old Unsupported Releases" of Fedora: see http://fedoraproject.org/wiki/Releases gpaw-24.1.0/doc/platforms/Linux/Juwels/000077500000000000000000000000001454550013000176555ustar00rootroot00000000000000gpaw-24.1.0/doc/platforms/Linux/Juwels/gpaw-venv.sh000077500000000000000000000034571454550013000221370ustar00rootroot00000000000000#!/usr/bin/bash # Install gpaw, ase, ase-ext on Juwels in a virtual environment set -e # stop if there are errors NAME=$1 USAGE="Usage: $0 foldername" FOLDER=$PWD ASE_REPO=https://gitlab.com/ase/ase.git GPAW_REPO=https://gitlab.com/gpaw/gpaw.git if [[ $# -ne 1 ]]; then echo "Wrong number of arguments, expected 1 (install dir), got $#" echo $USAGE exit 1 fi echo " module purge module load StdEnv module load Python module load SciPy-Stack module load intel-para/2021 module load FFTW module load libxc module load ELPA " > modules.sh . modules.sh # Create venv: echo "Creating virtual environment $NAME" python3 -m venv --system-site-packages $NAME cd $NAME VENV=$PWD . bin/activate PIP="python3 -m pip" $PIP install --upgrade pip -qq # Load modules in activate script: mv bin/activate old mv $FOLDER/modules.sh bin/activate cat old >> bin/activate rm old # Install ASE from git: git clone $ASE_REPO $PIP install -e ase/ $PIP install myqueue graphviz qeh CMD="cd $VENV && . bin/activate && pip install ase-ext" echo $CMD # Install GPAW: git clone $GPAW_REPO cd gpaw cp ./doc/platforms/Linux/Juwels/siteconfig_juwels.py siteconfig.py cd $VENV . bin/activate pip install -e gpaw -v > compilation.out # Install extra basis-functions: cd $VENV gpaw install-data . gpaw install-data --basis --version=20000 . --no-register export GPAW_SETUP_PATH=$GPAW_SETUP_PATH:$VENV/gpaw-basis-pvalence-0.9.20000 echo "export GPAW_SETUP_PATH=$GPAW_SETUP_PATH" >> bin/activate # Tab completion: ase completion >> bin/activate gpaw completion >> bin/activate mq completion >> bin/activate $PIP completion --bash >> bin/activate # Set matplotlib backend: echo ' if [[ $SLURM_SUBMIT_DIR ]]; then export MPLBACKEND=Agg else export MPLBACKEND=TkAgg fi ' >> bin/activate # Run tests: mq --version ase info gpaw test gpaw-24.1.0/doc/platforms/Linux/Juwels/juwels.rst000066400000000000000000000021331454550013000217170ustar00rootroot00000000000000.. _juwels: ================== juwels @ FZ-Jülich ================== This installation tutorial was written for GPAW version 21.6.1b1. Building in a virtual environment ================================= GPAW (and ASE and others) can be installed in a self contained virtual environment. In order to install the latest version download :download:`gpaw-venv.sh` and run it like this:: bash gpaw-venv.sh after the installation is complete you can load the virtual environment via:: source /bin/activate Running GPAW ============ The easiest way to submit a job is to write a shell script containing the following:: #!/bin/bash # (Don't forget the account key!) # if GPAW has been compiled in venv: source /bin/activate srun gpaw -P python [OPTIONS] The script can then be submitted via ``sbatch ``. ```` can also be replaced with ``$1``, which enables you to give the scriptname from the command-line and makes the submission script more general. gpaw-24.1.0/doc/platforms/Linux/Juwels/siteconfig_juwels.py000066400000000000000000000030201454550013000237450ustar00rootroot00000000000000import os parallel_python_interpreter=False #compiler = 'gcc' mpicompiler = 'mpicc' scalapack = True fftw = True libraries = ['mkl_intel_lp64', 'mkl_sequential', 'mkl_lapack', 'mkl_core', 'pthread', 'readline', 'termcap', 'xc', 'mkl_blacs_intelmpi_lp64'] # FFTW should be configured from environment variables, but they do # not report the correct names for a dynamically loaded library. # Use Intel MKL if fftw: libraries += ['fftw3xc_intel_pic', 'mkl_rt'] # Use EasyBuild scalapack from the active toolchain if scalapack: libraries += ['mkl_scalapack_lp64','mkl_lapack95_lp64'] # Use EasyBuild libxc libxc = os.getenv('EBROOTLIBXC') include_dirs.append(os.path.join(libxc, 'include')) # libvdwxc: # Use EasyBuild libvdwxc # NOTE: This currenlty does not work together with the Intel MKL, so # the easyconfig files does not load libvdwxc libvdwxc = os.getenv('EBROOTLIBVDWXC') if libvdwxc: include_dirs.append(os.path.join(libvdwxc, 'include')) libraries.append('vdwxc') # ELPA: # Use EasyBuild ELPA if loaded elpa = os.getenv('EBROOTELPA') if elpa: libraries += ['elpa'] elpaversion = os.path.basename(elpa).split('-')[0] library_dirs.append(os.path.join(elpa, 'lib')) extra_link_args += ['-Wl,-rpath={}/lib'.format(elpa)] include_dirs.append(os.path.join(elpa, 'include', 'elpa-'+elpaversion)) # Now add a EasyBuild "cover-all-bases" library_dirs library_dirs = os.getenv('LD_LIBRARY_PATH').split(':') gpaw-24.1.0/doc/platforms/Linux/Niflheim/000077500000000000000000000000001454550013000201375ustar00rootroot00000000000000gpaw-24.1.0/doc/platforms/Linux/Niflheim/Niflheim.rst000066400000000000000000000010161454550013000224220ustar00rootroot00000000000000.. _Niflheim: ======== Niflheim ======== Information about the Niflheim cluster can be found at ``_. There are two ways to use GPAW on Niflheim. You can use a preinstalled GPAW, or you can compile it yourself (the latter is mainly for GPAW developpers). * If you compile it yourself, please refer to the guide :ref:`Compiling GPAW on Niflheim `. * If you want to run a precompiled module, please read :ref:`Loading GPAW modules on Niflheim `. gpaw-24.1.0/doc/platforms/Linux/Niflheim/build.rst000066400000000000000000000066061454550013000220000ustar00rootroot00000000000000.. _build on niflheim: ========================================== Building GPAW in a Python venv on Niflheim ========================================== This document explains how to compile a developer version of GPAW on Niflheim. If you just want to run the pre-installed version, please read the guide :ref:`Using a pre-installed GPAW on Niflheim `. .. seealso:: * :mod:`Creation of Python virtual environments `. * Information about the Niflheim cluster can be found at ``_. * `MyQueue `__. .. contents:: .. highlight:: bash Creating the venv ================= Download the :download:`gpaw_venv.py` script (use ``wget ``) and run it like this:: $ python3 gpaw_venv.py ... .. tip:: You will need Python 3.8 or later. You can install that with:: $ module load Python/3.8.6-GCCcore-10.2.0 Type ``python3 gpaw_venv.py --help`` for help. After a few minutes, you will have a ```` folder with a GPAW installation inside. In the following, we will assume that your venv folder is ``~/venv1/``. The ``gpaw_venv.py`` script does the following: * load relevant modules from the foss toolchain * create the venv * clone and install ASE and GPAW from gitlab * install some other Python packages from PyPI: sklearn, graphviz, matplotlib, pytest-xdist, myqueue, ase-ext, spglib * enable tab-completion for command-line tools: `ase `__, `gpaw `__, `mq `__ Using the venv ============== The venv needs to be activated like this:: $ source venv1/bin/activate and you can deactivate it when you no longer need to use it:: $ deactivate You will want the activation to happen automatically for the jobs you submit to Niflheim. Here are three ways to do it (pick one, and only one): 1) If you always want to use one venv then just put the activation command in your ``~/.bashrc``. 2) If you only want jobs running inside a certain folder to use the venv, then add this to your ``~/.bashrc``:: if [[ $SLURM_SUBMIT_DIR/ = $HOME/project-1* ]]; then source ~/venv1/bin/activate fi Now, SLURM-jobs submitted inside your ``~/project-1/`` folder will use the venv. 3) Use MyQueue. Make sure you have MyQueue version 22.7.0 or later (``mq --version``). The venv will automatically be activated if it was activated at submit time. If you haven't configured MyQueue then you can do that with this command:: $ mq config slurm | grep -v sm3090 > ~/.myqueue/config.py (skips the *sm3090* GPU-enabled nodes). Adding additional packages ========================== In order to add more Python packages to your venv, you need to activate it and then you can ``pip install`` packages. Here is how to install ASR_:: $ git clone https://gitlab.com/asr-dev/asr.git $ cd asr $ git checkout old-master $ pip install . .. warning:: Pip may need co compile some code. It is therefore safest to use the ``thul`` login node to pip install software as it is the oldest CPU architcture and the other login nodes will understand its code. .. _ASR: https://asr.readthedocs.io/en/latest/ Full script =========== .. literalinclude:: gpaw_venv.py gpaw-24.1.0/doc/platforms/Linux/Niflheim/gpaw-venv.sh000077500000000000000000000002111454550013000224020ustar00rootroot00000000000000#!/usr/bin/bash echo "Please use ./gpaw_venv.py instead! See: https://wiki.fysik.dtu.dk/gpaw/platforms/Linux/Niflheim/build.html " gpaw-24.1.0/doc/platforms/Linux/Niflheim/gpaw_venv.py000077500000000000000000000244061454550013000225160ustar00rootroot00000000000000#!/usr/bin/env python3 """Install gpaw on Niflheim in a virtual environment. Also installs ase, ase-ext, spglib, sklearn and myqueue. """ import argparse import os import shutil import subprocess from pathlib import Path from sys import version_info if version_info < (3, 8): raise ValueError('Please use Python-3.8 or later') # Python version in the venv that we are creating version = '3.11' fversion = 'cpython-311' # Niflheim login hosts, with the oldest architecture as the first nifllogin = ['thul', 'sylg', 'svol', 'surt'] # Easybuild uses a hierarchy of toolchains for the main foss and intel # chains. The order in the tuples before are # fullchain: Full chain. # mathchain: Chain with math libraries but no MPI # compchain: Chain with full compiler suite (but no fancy libs) # corechain: Core compiler # The subchain complementary to 'mathchain', with MPI but no math libs, is # not used here. _gcccore = 'GCCcore-12.3.0' toolchains = { 'foss': dict( fullchain='foss-2023a', mathchain='gfbf-2023a', compchain='GCC-12.3.0', corechain=_gcccore, ), 'intel': dict( fullchain='intel-2023a', mathchain='iimkl-2023a', compchain='intel-compilers-2023.1.0', corechain=_gcccore, ) } # These modules are always loaded module_cmds_all = """\ module purge unset PYTHONPATH module load GPAW-setups/0.9.20000 module load ELPA/2023.05.001-{fullchain} module load Wannier90/3.1.0-{fullchain} module load Python-bundle-PyPI/2023.06-{corechain} module load Tkinter/3.11.3-{corechain} module load libxc/6.2.2-{compchain} """ # These modules are not loaded if --piponly is specified module_cmds_easybuild = """\ module load matplotlib/3.7.2-{mathchain} module load scikit-learn/1.3.1-{mathchain} module load spglib-python/2.1.0-{mathchain} """ # These modules are loaded depending on the toolchain module_cmds_tc = { 'foss': """\ module load libvdwxc/0.4.0-{fullchain} """, 'intel': "" } activate_extra = """ export GPAW_SETUP_PATH=$GPAW_SETUP_PATH:{venv}/gpaw-basis-pvalence-0.9.20000 # Set matplotlib backend: if [[ $SLURM_SUBMIT_DIR ]]; then export MPLBACKEND=Agg export PYTHONWARNINGS="ignore:Matplotlib is currently using agg" else export MPLBACKEND=TkAgg fi """ dftd3 = """\ mkdir {venv}/DFTD3 cd {venv}/DFTD3 URL=https://www.chemiebn.uni-bonn.de/pctc/mulliken-center/software/dft-d3 wget $URL/dftd3.tgz tar -xf dftd3.tgz ssh {nifllogin[0]} ". {venv}/bin/activate && cd {venv}/DFTD3 && make >& d3.log" ln -s {venv}/DFTD3/dftd3 {venv}/bin """ def run(cmd: str, **kwargs) -> subprocess.CompletedProcess: print(cmd) return subprocess.run(cmd, shell=True, check=True, **kwargs) def compile_gpaw_c_code(gpaw: Path, activate: Path) -> None: """Compile for all architectures: xeon16, xeon24, xeon40, ...""" # Remove targets: for path in gpaw.glob('build/lib.linux-x86_64-*/_gpaw.*.so'): path.unlink() # Compile: for host in nifllogin: run(f'ssh {host} ". {activate} && pip install -q -e {gpaw}"') # Clean up: for path in gpaw.glob('_gpaw.*.so'): path.unlink() for path in gpaw.glob('build/temp.linux-x86_64-*'): shutil.rmtree(path) def fix_installed_scripts(venvdir: Path, rootdir: str, pythonroot: str) -> None: """Fix command line tools so they work in the virtual environment. Command line tools (pytest, sphinx-build etc) fail in virtual enviroments created with --system-site-packages, as the scripts are not copied into the virtual environment. The scripts have the original Python interpreter hardcoded in the hash-bang line. This function copies all scripts into the virtual environment, and changes the hash-bang so it works. Starting with the 2023a toolchains, the scripts are distributed over more than one EasyBuild module. Arguments: venvdir: Path to the virtual environment rootdir: string holding folder of the EasyBuild package being processed pythondir: string holding folder of the Python package. """ assert rootdir is not None assert pythonroot is not None bindir = rootdir / Path('bin') print(f'Patching executable scripts from {bindir} to {venvdir}/bin') assert '+' not in str(pythonroot) and '+' not in str(venvdir), ( 'Script will fail with "+" in folder names!') sedscript = f's+{pythonroot}+{venvdir}+g' # Loop over potential executables for exe in bindir.iterdir(): target = venvdir / 'bin' / exe.name # Skip files that already exist, are part of Python itself, # or are not a regular file or symlink to a file. if (not target.exists() and not exe.name.lower().startswith('python') and exe.is_file()): # Check if it is a script file referring the original # Python executable in the hash-bang with open(exe) as f: firstline = f.readline() if pythonroot in firstline: shutil.copy2(exe, target, follow_symlinks=False) # Now patch the file (if not a symlink) if not exe.is_symlink(): assert not target.is_symlink() subprocess.run( f"sed -e '{sedscript}' --in-place '{target}'", shell=True, check=True) def main(): parser = argparse.ArgumentParser(description=__doc__) parser.add_argument('venv', help='Name of venv.') parser.add_argument('--toolchain', default='foss', choices=['foss', 'intel'], help='Default is foss.') parser.add_argument('--dftd3', action='store_true', help='Also build DFTD3.') parser.add_argument('--recompile', action='store_true', help='Recompile the GPAW C-extensions in an ' 'exising venv.') parser.add_argument('--piponly', action='store_true', help='Do not use EasyBuild python modules, ' 'install from pip (may affect performance).') args = parser.parse_args() # if args.toolchain == 'intel': # raise ValueError('See: https://gitlab.com/gpaw/gpaw/-/issues/241') venv = Path(args.venv).absolute() activate = venv / 'bin/activate' gpaw = venv / 'gpaw' if args.recompile: compile_gpaw_c_code(gpaw, activate) return 0 # Sanity checks if args.toolchain not in ('foss', 'intel'): raise ValueError(f'Unsupported toolchain "{args.toolchain}"') module_cmds = module_cmds_all.format(**toolchains[args.toolchain]) if not args.piponly: module_cmds += module_cmds_easybuild.format( **toolchains[args.toolchain]) module_cmds += module_cmds_tc[args.toolchain].format( **toolchains[args.toolchain]) cmds = (' && '.join(module_cmds.splitlines()) + f' && python3 -m venv --system-site-packages {args.venv}') run(cmds) os.chdir(venv) activate.write_text(module_cmds + activate.read_text()) run(f'. {activate} && pip install --upgrade pip -q') # Fix venv so pytest etc work pythonroot = None for ebrootvar in ('EBROOTPYTHON', 'EBROOTPYTHONMINBUNDLEMINPYPI'): # Note that we need the environment variable from the newly # created venv, NOT from this process! comm = run(f'. {activate} && echo ${ebrootvar}', capture_output=True, text=True) ebrootdir = comm.stdout.strip() if pythonroot is None: # The first module is the actual Python module. pythonroot = ebrootdir assert ebrootdir, f'Env variable {ebrootvar} appears to be unset.' fix_installed_scripts(venvdir=venv, rootdir=ebrootdir, pythonroot=pythonroot) packages = ['myqueue', 'graphviz', 'qeh', 'sphinx_rtd_theme'] if args.piponly: packages += ['matplotlib', 'scipy', 'pandas', 'pytest', 'pytest-xdist', 'pytest-mock', 'scikit-learn'] run(f'. {activate} && pip install -q -U ' + ' '.join(packages)) for name in ['ase', 'gpaw']: run(f'git clone -q https://gitlab.com/{name}/{name}.git') run(f'. {activate} && pip install -q -e ase/') if args.dftd3: run(' && '.join(dftd3.format(venv=venv, nifllogin=nifllogin).splitlines())) # Compile ase-ext C-extension on old thul so that it works on # newer architectures run(f'ssh {nifllogin[0]} ". {activate} && pip install -q ase-ext"') if args.piponly: run('git clone -q https://github.com/spglib/spglib.git') run(f'ssh {nifllogin[0]} ". {activate} && pip install {venv}/spglib"') # Install GPAW: siteconfig = Path( f'gpaw/doc/platforms/Linux/Niflheim/siteconfig-{args.toolchain}.py') Path('gpaw/siteconfig.py').write_text(siteconfig.read_text()) compile_gpaw_c_code(gpaw, activate) for fro, to in [('ivybridge', 'sandybridge'), ('nahelem', 'icelake')]: f = gpaw / f'build/lib.linux-x86_64-{fro}-{fversion}' t = gpaw / f'build/lib.linux-x86_64-{to}-{fversion}' f.symlink_to(t) # Create .pth file to load correct .so file: pth = ( 'import sys, os; ' 'arch = os.environ["CPU_ARCH"]; ' f"path = f'{venv}/gpaw/build/lib.linux-x86_64-{{arch}}-{fversion}'; " 'sys.path.append(path)\n') Path(f'lib/python{version}/site-packages/niflheim.pth').write_text(pth) # Install extra basis-functions: run(f'. {activate} && gpaw install-data --basis --version=20000 ' f'{venv} --no-register') extra = activate_extra.format(venv=venv) # Tab completion: for cmd in ['ase', 'gpaw', 'mq', 'pip']: txt = run(f'. {activate} && {cmd} completion' + (' --bash' if cmd == 'pip' else ''), capture_output=True).stdout.decode() extra += txt activate.write_text(activate.read_text() + extra) # Run tests: run(f'. {activate} && ase info && gpaw test') return 0 if __name__ == '__main__': raise SystemExit(main()) gpaw-24.1.0/doc/platforms/Linux/Niflheim/load.rst000066400000000000000000000155511454550013000216170ustar00rootroot00000000000000.. _load on niflheim: ====================================== Using a pre-installed GPAW at Niflheim ====================================== This is the guide for using the pre-installed GPAW modules on Niflheim. Modules on Niflheim =================== You can see which modules are available with the ``module avail [package]`` command, for example:: $ module avail GPAW -------------------------- /home/modules/modules/all -------------------------- GPAW-setups/0.8.7929 GPAW-setups/0.9.9672 GPAW-setups/0.9.11271 GPAW-setups/0.9.20000 (D) GPAW/1.4.0-foss-2018a-Python-3.6.4 GPAW/1.4.0-foss-2018b-Python-3.6.6 GPAW/1.4.0-intel-2018b-Python-3.6.6 GPAW/1.5.1-foss-2018b-Python-3.6.6 GPAW/1.5.1-intel-2018b-Python-3.6.6 GPAW/1.5.2-foss-2018b-Python-3.6.6 GPAW/1.5.2-intel-2018b-Python-3.6.6 GPAW/19.8.1-foss-2018b-ASE-3.18.0-Python-3.6.6 GPAW/19.8.1-intel-2018b-ASE-3.18.0-Python-3.6.6 GPAW/20.1.0-foss-2019b-Python-3.7.4 GPAW/20.1.0-intel-2019b-Python-3.7.4 GPAW/20.10.0-foss-2019b-ASE-3.20.1-Python-3.7.4 GPAW/20.10.0-foss-2020b GPAW/20.10.0-intel-2019b-ASE-3.20.1-Python-3.7.4 GPAW/20.10.0-intel-2020b GPAW/21.1.0-foss-2020b-ASE-3.21.1 GPAW/21.1.0-intel-2020b-ASE-3.21.1 (D) Where: D: Default Module You can see which modules you have loaded with ``module list``. You can unload all modules to start from scratch with ``module purge``. Choose the right version of GPAW ================================ This is a brief guide to which version of GPAW you should use. It reflects the situation in December 2020 and will be updated as the situation changes. I have an ongoing project You should probably continue to use the version you are using in that project, unless you want to change. See the section below on using different versions for different project. I am a normal user You should load ``GPAW/23.9.1-intel-2023a``. This will give the newest version of GPAW, as recommended by the developers. It has new features and is significantly faster, in particular on the new Xeon40 nodes. For ongoing projects that have been using an older version, you may find that some values have changed slightly - check for consistency, or be sure to always use the same version for ongoing projects. See below for a description on how to do that. I am sligtly conservative or need ``libvwdxc``. The version of GPAW compiled with the FOSS toolchain is somewhat slower in many situations, but is better tested and may use less memory. You may also have to use this version if you want the functionality from ``libvwdxc`` library, but be aware that many vad der Waals potentials do not use ``libvwdxc``. **IMPORTANT:** You do *not* need to load Python, ASE, matplotlib etc. Loading GPAW pulls all that stuff in, in versions consistent with the chosen GPAW version. If you want to generate Wannier functions with the Wannier90 module, you need to explicitly load ``Wannier90/3.1.0-foss-2023a`` or ``Wannier90/3.1.0-intel-2023a``. Intel or foss versions? ======================= The versions built with the Intel compilers and the Intel Math Kernel Library (MKL) are in average faster than the ones build with the Open Source (GNU) compilers (FOSS = Free and Open Source Software). On newer hardware this difference can be very significant, and we recommend using the Intel versions unless you have a good reason not to. The ``libvdwcx`` library of van der Waals exchange-correlation potentials in incompatible with the MKL, so if you need these methods you have to use the foss versions. However, most van der Waals calculations use the native van der Waals support in GPAW, and works fine with the Intel versions. Module consistency is important: check it. ========================================== For a reliable computational experience, you need to make sure that all modules come from the same toolchain (i.e. that the software is compiled with a consistent set of tools). **All modules you load should belong to the same toolchain.** Use ``module list`` to list your modules. Check for consistency: ================== ================================== Toolchain Module suffixes ================== ================================== foss/2023a foss-2023a gfbf-2023a gompi-2023a GCC-12.3.0 GCCcore-12.3.0 ------------------ ---------------------------------- intel/2023a intel-2023a iimkl-2023a iimpi-2023a intel-compilers-2023.1.0 GCCcore-12.3.0 ------------------ ---------------------------------- foss/2022a foss-2022a gompi-2022a GCC-11.3.0 GCCcore-11.3.0 ------------------ ---------------------------------- intel/2022a intel-2022a iimpi-2022a intel-compilers-2022.1.0 GCCcore-11.3.0 ------------------ ---------------------------------- foss/2020b foss-2020b gompi-2020b GCC-10.2.0 GCCcore-10.2.0 ------------------ ---------------------------------- intel/2020b intel-2020b iimpi-2020b iccifort-2020.4.304 GCCcore-10.2.0 ------------------ ---------------------------------- fosscuda-2020b (*) fosscuda-2020b gompic-2020b gcccuda-2020b GCC-10.2.0 GCCcore-10.2.0 ================== ================================== (*) For use on the GPU nodes, so only available on the sm3090 and xeon40 partitions. Newer toolchains do not use a special toolchain for this. If your ``module load XXX`` commands give warnings about reloaded modules, you are almost certainly mixing incompatible toolchains. Using different versions for different projects. ================================================ You do not have to use the same modules for all your projects. If you want all jobs submitted from the folder ``~/ProjectAlpha`` to run with an one version of GPAW, but everything else with a another version, you can put this in your .bashrc:: if [[ $SLURM_SUBMIT_DIR/ = $HOME/ProjectAlpha* ]]; then # Extreme consistency is important for this old project module purge module load GPAW/1.4.0-foss-2018a-Python-3.6.4 else # Performance is important for everything else. module load GPAW/20.10.0-intel-2020b module load scikit-learn/0.23.2-intel-2020b fi The ``module purge`` command in the special branch is because SLURM will remember which modules you have loaded when you submit the job, and that will typically be the default version, which must then be unloaded. gpaw-24.1.0/doc/platforms/Linux/Niflheim/siteconfig-foss.py000066400000000000000000000021451454550013000236150ustar00rootroot00000000000000import os scalapack = True fftw = True # Clean out any autodetected things, we only want the EasyBuild # definitions to be used. libraries = ['openblas', 'fftw3', 'readline', 'gfortran'] mpi_libraries = [] include_dirs = [] # Use EasyBuild scalapack from the active toolchain libraries += ['scalapack'] # Use EasyBuild libxc libxc = os.getenv('EBROOTLIBXC') if libxc: include_dirs.append(os.path.join(libxc, 'include')) libraries.append('xc') # libvdwxc: # Use EasyBuild libvdwxc # This will only work with the foss toolchain. libvdwxc = os.getenv('EBROOTLIBVDWXC') if libvdwxc: include_dirs.append(os.path.join(libvdwxc, 'include')) libraries.append('vdwxc') # ELPA: # Use EasyBuild ELPA if loaded elpa = os.getenv('EBROOTELPA') if elpa: libraries += ['elpa'] elpaversion = os.path.basename(elpa).split('-')[0] library_dirs = [os.path.join(elpa, 'lib')] extra_link_args = [f'-Wl,-rpath={elpa}/lib'] include_dirs.append(os.path.join(elpa, 'include', 'elpa-' + elpaversion)) # Now add a EasyBuild "cover-all-bases" library_dirs library_dirs = os.getenv('LD_LIBRARY_PATH').split(':') gpaw-24.1.0/doc/platforms/Linux/Niflheim/siteconfig-intel.py000066400000000000000000000020321454550013000237510ustar00rootroot00000000000000import os scalapack = True fftw = True mpicompiler = 'mpiicc' # Use Intel MKL libraries = ['xc', 'mkl_sequential', 'mkl_core', 'fftw3xc_intel_pic', 'mkl_rt'] # Use EasyBuild scalapack from the active toolchain libraries += ['mkl_scalapack_lp64', 'mkl_blacs_intelmpi_lp64'] # Use EasyBuild libxc libxc = os.getenv('EBROOTLIBXC') include_dirs = [os.path.join(libxc, 'include')] # libvdwxc: # Use EasyBuild libvdwxc # This will only work with the foss toolchain. libvdwxc = os.getenv('EBROOTLIBVDWXC') if libvdwxc: include_dirs.append(os.path.join(libvdwxc, 'include')) libraries.append('vdwxc') # ELPA: # Use EasyBuild ELPA if loaded elpa = os.getenv('EBROOTELPA') if elpa: libraries += ['elpa'] elpaversion = os.path.basename(elpa).split('-')[0] library_dirs = [os.path.join(elpa, 'lib')] extra_link_args = [f'-Wl,-rpath={elpa}/lib'] include_dirs.append(os.path.join(elpa, 'include', 'elpa-' + elpaversion)) # Now add a EasyBuild "cover-all-bases" library_dirs library_dirs = os.getenv('LD_LIBRARY_PATH').split(':') gpaw-24.1.0/doc/platforms/Linux/SUNCAT/000077500000000000000000000000001454550013000174015ustar00rootroot00000000000000gpaw-24.1.0/doc/platforms/Linux/SUNCAT/SUNCAT.rst000066400000000000000000000037611454550013000211370ustar00rootroot00000000000000.. _SUNCAT: ====== SUNCAT ====== (NOTE: With MKL 10.3 we have seen hangs in the early mpireduce calls for a small number of calculations. Until I have understood this I am backing out to MKL 10.2.) At SLAC we compiled GPAW for RHEL5 x86_64, on intel Xeon 5650 with intel compilers and mkl. This improved the 8-core performance benchmark by 13% compared to the opencc/ACML approach. ================ ================== Package Version ================ ================== python 2.4 gpaw 0.8.0.7419 ase 3.5.0.1919 numpy 1.4.1 openmpi 1.4.3 mkl 10.3 intel compilers 11.1 (includes mkl 10.2 by default) ================ ================== openmpi ======= openmpi was built with the intel compilers as follows:: $ ./configure --prefix=/nfs/slac/g/suncatfs/sw/gpawv15/install CC=icc CXX=icpc F77=ifort FC=ifort $ make $ make install numpy ===== Build in usual fashion. At the moment we use default gnu compilers for numpy, since gpaw performance benchmark drops by 3% when it is built with icc/mkl/dotblas, for reasons that are not understood. Also, some gpaw self-tests start to fail. gpaw ==== For this we use :download:`customize_mkl10.3.py`: .. literalinclude:: customize_mkl10.3.py Note that this customize.py works only with MKL version 10.3 which has simplified linking. The environment settings (valid at SUNCAT) to be able to link and run: .. literalinclude:: setupenv MKL 10.2 Notes ============== For historical reasons, we also include the customize.py for MKL 10.2: .. literalinclude:: customize_mkl10.2.py This older version requires a fairly bad hack to make it work in all cases:: $ setenv LD_PRELOAD libmkl_core.so:libmkl_sequential.so I believe this is because python uses "dlopen" for shared libraries, which has troubles with the circular dependencies present in MKL 10.2. This hack can cause (ignorable) errors from unrelated commands like "ping" which prevents the use of LD_PRELOAD for security reasons.gpaw-24.1.0/doc/platforms/Linux/SUNCAT/customize_mkl10.2.py000066400000000000000000000013131454550013000231370ustar00rootroot00000000000000scalapack = False compiler = 'icc' libraries =['mkl_intel_lp64','mkl_sequential','mkl_cdft_core','mkl_core','pthread','m'] library_dirs = ['/nfs/slac/g/suncatfs/sw/external/intel11.1/openmpi/1.4.3/install/lib','/afs/slac/package/intel_tools/compiler11.1/mkl/lib/em64t/'] include_dirs += ['/nfs/slac/g/suncatfs/sw/external/numpy/1.4.1/install/lib64/python2.4/site-packages/numpy/core/include'] extra_link_args += ['-fPIC'] extra_compile_args = ['-I/afs/slac/package/intel_tools/compiler11.1/mkl/include','-xHOST','-O1','-ipo','-no-prec-div','-static','-std=c99','-fPIC'] define_macros =[('GPAW_NO_UNDERSCORE_CBLACS', '1'), ('GPAW_NO_UNDERSCORE_CSCALAPACK', '1')] mpicompiler = 'mpicc' mpilinker = mpicompiler gpaw-24.1.0/doc/platforms/Linux/SUNCAT/customize_mkl10.3.py000066400000000000000000000010601454550013000231370ustar00rootroot00000000000000scalapack = False compiler = 'icc' libraries =['mkl_rt','pthread','m'] library_dirs = ['/nfs/slac/g/suncatfs/sw/external/intel11.1/openmpi/1.4.3/install/lib','/afs/slac/package/intel_tools/2011u8/mkl/lib/intel64/'] include_dirs += ['/nfs/slac/g/suncatfs/sw/external/numpy/1.4.1/install/lib64/python2.4/site-packages/numpy/core/include'] extra_link_args += ['-fPIC'] extra_compile_args = ['-I/afs/slac/package/intel_tools/2011u8/mkl/include','-xHOST','-O1','-ipo','-no-prec-div','-static','-std=c99','-fPIC'] mpicompiler = 'mpicc' mpilinker = mpicompiler gpaw-24.1.0/doc/platforms/Linux/SUNCAT/setupenv000077500000000000000000000016661454550013000212110ustar00rootroot00000000000000#!/bin/bash export EXTERNALDIR=/nfs/slac/g/suncatfs/sw/external export NUMPYDIR=${EXTERNALDIR}/numpy/1.4.1/install/lib64/python2.4/site-packages export SCIPYDIR=${EXTERNALDIR}/scipy/0.7.0/install/lib64/python2.4/site-packages export ASEBASE=${EXTERNALDIR}/ase/3.5.0.1919/install export ASEDIR=${ASEBASE}/lib/python2.4/site-packages export INTELDIR=/afs/slac/package/intel_tools/2011u8 export MKLDIR=${INTELDIR}/mkl/lib/intel64 export OPENMPIDIR=${EXTERNALDIR}/intel11.1/openmpi/1.4.3/install export MKL_THREADING_LAYER=MKL_THREADING_SEQUENTIAL export OMP_NUM_THREADS=1 export INSTALLDIR=${GPAW_HOME}/install export PYTHONPATH=${ASEDIR}:${SCIPYDIR}:${NUMPYDIR}:${INSTALLDIR}/lib64/python export PATH=/bin:/usr/bin:${OPENMPIDIR}/bin:${INTELDIR}/bin:${INSTALLDIR}/bin:${ASEBASE}/bin export LD_LIBRARY_PATH=${INSTALLDIR}/lib:${MKLDIR}:${INTELDIR}/lib/intel64:${OPENMPIDIR}/lib:${MKLDIR}/../32 export GPAW_SETUP_PATH=${EXTERNALDIR}/gpaw-setups-0.6.6300 gpaw-24.1.0/doc/platforms/Linux/akka.rst000066400000000000000000000053521454550013000200520ustar00rootroot00000000000000.. _akka: ================= akka.hpc2n.umu.se ================= The Akka machine (http://www.hpc2n.umu.se/resources/Akka/) is a cluster of Intel Xeon dual-socket, quad-core L5420 CPUs, 2.5 GHz processors with 2 GB of memory per core. On Akka, you need to use the filesystem located under */pfs/* to be able to write files when running in the queue. Enable it with (http://www.hpc2n.umu.se/support/userguide/common/filesystems.html):: ln -s /pfs/nobackup$HOME $HOME/pfs Due to problems with intel mkl (version *10.0.2.018* gives errors when running on the compute nodes: *cannot allocate memory for thread-local data: ABORT*) build numpy using its internal blas/lapack:: python setup.py install --home=~/pfs/numpy-1.0.4-1 Set these environment variables in the :file:`.bashrc` file:: export home=~/pfs module add openmpi/1.2.6/gcc export PYTHONPATH=${home}/gpaw:${home}/ase3k:${home}/numpy-1.0.4-1/lib64/python: export GPAW_SETUP_PATH=${home}/gpaw-setups-0.4.2039 export LD_LIBRARY_PATH=/usr/local/lib export PATH=${home}/gpaw/tools:${home}/ase3k/tools:${PATH} if [ $PBS_ENVIRONMENT ]; then cd $PBS_O_WORKDIR export PYTHONPATH=${PBS_O_WORKDIR}:${PYTHONPATH} return fi and build GPAW (``python setup.py build_ext``) with this :file:`customize.py` file (static linking fixes *cannot allocate memory for thread-local data: ABORT*):: scalapack = True libraries = [] extra_compile_args += [ '-O3' ] mkl_lib_path = '/usr/local/lib/' extra_link_args = [ mkl_lib_path+'libmkl_intel_lp64.a', mkl_lib_path+'libmkl_sequential.a', mkl_lib_path+'libmkl_core.a', mkl_lib_path+'libmkl_blacs_openmpi_lp64.a', mkl_lib_path+'libmkl_scalapack.a', mkl_lib_path+'libmkl_blacs_openmpi_lp64.a', mkl_lib_path+'libmkl_intel_lp64.a', mkl_lib_path+'libmkl_sequential.a', mkl_lib_path+'libmkl_core.a', mkl_lib_path+'libmkl_intel_lp64.a', mkl_lib_path+'libmkl_sequential.a', mkl_lib_path+'libmkl_core.a', ] define_macros += [('GPAW_NO_UNDERSCORE_CBLACS', '1')] define_macros += [('GPAW_NO_UNDERSCORE_CSCALAPACK', '1')] A gpaw script :file:`gpaw-script.py` can be submitted like this:: qsub -l nodes=1:ppn=8 -l walltime=02:00:00 -m abe pbs_submitfile with the following `pbs_submitfile `_:: #!/bin/bash ###PBS -A SNICXXX-YY-ZZ ###PBS -N Parallel ###PBS -o test.out ###PBS -e test.err ###PBS -m ae ###PBS -l nodes=2:ppn=8 ###PBS -l walltime=00:10:00 ###PBS -l pmem=1900mb # default ###PBS -l pvmem=2000mb # default cd $PBS_O_WORKDIR module add openmpi/1.2.6/gcc mpiexec ${HOME}/build/bin.linux-x86_64-2.4/gpaw-python gpaw-script.py It's convenient to customize as described on the :ref:`parallel_runs` page. gpaw-24.1.0/doc/platforms/Linux/armageddon.rst000066400000000000000000000017311454550013000212410ustar00rootroot00000000000000.. _armageddon: =========================== armageddon.chimfar.unimo.it =========================== The installation of user's packages described below assumes *bash* shell: - packages are installed under ``~/CAMd``:: mkdir ~/CAMd cd ~/CAMd - download the :download:`customize_armageddon.py` and :download:`set_env_armageddon.sh` files:: wget https://svn.fysik.dtu.dk/projects/gpaw/trunk/doc/platforms/Linux/customize_armageddon.py wget https://svn.fysik.dtu.dk/projects/gpaw/trunk/doc/platforms/Linux/set_env_armageddon.sh .. literalinclude:: customize_armageddon.py - download packages with :download:`download_armageddon.sh`, buy running ``sh download_armageddon.sh``: .. literalinclude:: download_armageddon.sh - install packages and test with :download:`install_armageddon.sh`, buy running ``sh install_armageddon.sh``. - enable packages with :download:`set_env_armageddon.sh`, buy running ``. set_env_armageddon.sh`` (put this in *~/.bashrc*). gpaw-24.1.0/doc/platforms/Linux/batman.rst000066400000000000000000000011121454550013000203730ustar00rootroot00000000000000.. _batman: ================== batman.chem.jyu.fi ================== To prepare the compilation, we need to load the required modules and clean the environment:: > module purge # remove all modules > module add mpt > module add mkl > unset CC CFLAGS LDFLAGS We have to change :file:`customize.py` to get the libs and the right compiler:: # uncomment and change in customize.py libraries += ['mpi','mkl'] mpicompiler = 'gcc' custom_interpreter = True Then compile as usual (``python setup.py build``). This will build the custom python interpreter for parallel use also. gpaw-24.1.0/doc/platforms/Linux/brown.rst000066400000000000000000000011671454550013000202720ustar00rootroot00000000000000.. _brown: ================================ Brown University (ccv.brown.edu) ================================ GPAW is centrally installed and can be installed per-user on 'Oscar', Brown University's Supercomputer run by the Center for Computation and Visualization (CCV_). We keep detailed instructions, as well as customization files and submit scripts in a dedicated repository called brown-gpaw_. In this repository, you should be able to find instructions to use and/or install GPAW for the versions currently available on CCV. .. _brown-gpaw: https://bitbucket.org/andrewpeterson/brown-gpaw .. _CCV: https://ccv.brown.edu/ gpaw-24.1.0/doc/platforms/Linux/bwgrid.rst000066400000000000000000000050201454550013000204110ustar00rootroot00000000000000====== bwgrid ====== The `BWgrid `__ is an grid of machines located in Baden-Württemberg, Germany. The installation in Freiburg is a cluster containing 139 dual socket, quad-core Intel Xenon E5440 CPUs, 2.83GHz processors with 2 GB of memory per core, 16 dual socket, quad-core Intel Xenon X5550 CPUs, 2.67GHz processors with 3 GB of memory per core and eight dual socket, six-core Intel Xenon X5650 CPUs, 2.66GHz processors with 2 GB of memory per core. For more information visit ``_. Building GPAW with Intel compiler ================================= Use the compiler wrapper file :download:`icc.py` .. literalinclude:: icc.py Instructions assume **bash**, installation under $HOME/opt. Load the necessary modules:: module load devel/python/2.7.2 module load compiler/intel/12.0 module load mpi/impi/4.0.2-intel-12.0 module load numlib/mkl/10.3.5 module load numlib/python_numpy/1.6.1-python-2.7.2 Internal libxc -------------- Before revision 10429 libxc was internal. The :file:`customize.py` had to be changed to :download:`customize_bwgrid_icc.py` .. literalinclude:: customize_bwgrid_icc.py External libxc -------------- After svn revision 10429 libxc has to be included as external library (see also the `libxc web site `__). To install libxc we assume that MYLIBXCDIR is set to the directory where you want to install:: $ module load compiler/intel/12.0 $ cd $MYLIBXCDIR $ wget http://www.tddft.org/programs/octopus/down.php?file=libxc/libxc-2.0.2.tar.gz $ tar -xzvf libxc-2.0.2.tar.gz $ cd libxc-2.0.2/ $ mkdir install $ ./configure CFLAGS="-fPIC" --prefix=$PWD/install -enable-shared $ make |tee make.log $ make install This will have installed the libs $MYLIBXCDIR/libxc-2.0.2/install/lib and the C header files to $MYLIBXCDIR/libxc-2.0.2/install/include. We have to modify the file :file:`customize.py` to :download:`customize_bwgrid_icc_libxc.py` .. literalinclude:: customize_bwgrid_icc_libxc.py Note that the location of the external libxc on runtime has to be enabled by setting:: export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$MYLIBXCDIR/libxc-2.0.2/install/lib Building and running GPAW ------------------------- To build GPAW use:: python setup.py build_ext 2>&1 | tee build_ext.log and ignore some intermediate warnings. A gpaw script :file:`test.py` can be submitted to run on 8 cpus like this:: > gpaw-runscript test.py 8 using bwg run.bwg written > qsub run.bwg gpaw-24.1.0/doc/platforms/Linux/bwuni.rst000066400000000000000000000105111454550013000202600ustar00rootroot00000000000000===== bwuni ===== Information about `bwUniCluster `__. Building GPAW ============= We assume that the installation will be located in ``$HOME/source``. Setups ------ The setups must be installed first:: cd GPAW_SETUP_SOURCE=$PWD/source/gpaw-setups mkdir -p $GPAW_SETUP_SOURCE cd $GPAW_SETUP_SOURCE wget https://wiki.fysik.dtu.dk/gpaw-files/gpaw-setups-0.9.11271.tar.gz tar xzf gpaw-setups-0.9.11271.tar.gz Let gpaw know about the setups:: export GPAW_SETUP_PATH=$GPAW_SETUP_SOURCE/gpaw-setups-0.9.11271 Using the module environment ---------------------------- It is very handy to add our installation to the module environment:: cd mkdir -p modulefiles/gpaw-setups cd modulefiles/gpaw-setups echo -e "#%Module1.0\nprepend-path GPAW_SETUP_PATH $GPAW_SETUP_SOURCE/gpaw-setups-0.9.11271" > 0.9.11271 We need to let the system know about our modules:: module use $HOME/modulefiles such that we also see them with:: module avail libxc ----- GPAW relies on libxc (see the `libxc web site `__). To install libxc we assume that ``MYLIBXCDIR`` is set to the directory where you want to install (e.g. ``MYLIBXCDIR=$HOME/source/libxc``):: $ cd $MYLIBXCDIR $ wget http://www.tddft.org/programs/octopus/down.php?file=libxc/libxc-2.2.0.tar.gz $ tar -xzvf libxc-2.2.0.tar.gz $ cd libxc-2.2.0/ $ mkdir install $ ./configure CFLAGS="-fPIC" --prefix=$PWD/install -enable-shared $ make |tee make.log $ make install This will have installed the libs ``$MYLIBXCDIR/libxc-2.0.2/install/lib`` and the C header files to ``$MYLIBXCDIR/libxc-2.0.2/install/include``. We create a module for libxc:: cd mkdir modulefiles/libxc cd modulefiles/libxc and edit the module file :file:`2.2.0` that should read:: #%Module1.0 # change this to your path set libxchome /home/fr/fr_fr/fr_mw767/source/libxc/libxc-2.2.0/install prepend-path C_INCLUDE_PATH $libxchome/include prepend-path LIBRARY_PATH $libxchome/lib prepend-path LD_LIBRARY_PATH $libxchome/lib ASE trunk --------- We get ASE trunk:: cd ASE_SOURCE=$PWD/source/ase mkdir -p $ASE_SOURCE cd $ASE_SOURCE git clone https://gitlab.com/ase/ase.git trunk which can be updated using:: cd $ASE_SOURCE/trunk git pull We add our installation to the module environment:: cd mkdir -p modulefiles/ase cd modulefiles/ase and edit the module file :file:`trunk` that should read:: #%Module1.0 if {![is-loaded numlib/python_scipy]} {module load numlib/python_scipy} set asehome /home/fr/fr_fr/fr_mw767/source/ase/trunk prepend-path PYTHONPATH $asehome prepend-path PATH $asehome/tools Building GPAW ------------- We create a place for gpaw and get the trunk version:: GPAW_SOURCE=$PWD/source/gpaw mkdir -p $GPAW_SOURCE cd $GPAW_SOURCE svn checkout https://svn.fysik.dtu.dk/projects/gpaw/trunk trunk The current trunk version can then be updated by:: cd $GPAW_SOURCE/trunk svn up We have to modify the file :file:`customize.py` to :download:`customize_bwuni.py` .. literalinclude:: customize_bwuni.py To build GPAW use:: module purge module load libxc module load ase module load mpi/openmpi cd $GPAW_SOURCE/trunk mkdir install python setup.py install --prefix=$PWD/install which installs GPAW to ``$GPAW_SOURCE/trunk/install``. We create a module that does the necessary things:: cd mkdir -p modulefiles/gpaw cd modulefiles/gpaw the file :file:`trunk` that should read:: #%Module1.0 if {![is-loaded ase]} {module load ase} if {![is-loaded libxc]} {module load libxc} if {![is-loaded mpi]} {module load mpi/openmpi} if {![is-loaded gpaw-setups]} {module load gpaw-setups} # change this to your needs set gpawhome /home/fr/fr_fr/fr_mw767/source/gpaw/trunk/install prepend-path PATH $gpawhome/bin prepend-path PYTHONPATH $gpawhome/lib/python2.7/site-packages/ setenv GPAW_PYTHON $gpawhome/bin/gpaw-python Running GPAW ------------ A gpaw script :file:`test.py` can be submitted to run on 8 cpus like this:: > module load gpaw > gpaw-runscript test.py 8 hostname uc1n997 found, using bwUniCluster run.bwUniCluster written > msub run.bwUniCluster gpaw-24.1.0/doc/platforms/Linux/carbon_cnm.rst000066400000000000000000000060111454550013000212350ustar00rootroot00000000000000.. _carbon_cnm: ====== carbon ====== Here you find information about the system ``_. The Carbon machine is a cluster of dual socket, quad-core Intel Xeon 5355 CPUs, 2.66 GHz processors with 2 GB of memory per core. **Warning**: numpy build instructions have not been tested recently and may not work. Please use the (unoptimized) system default numpy. To build (``python setup.py install --home=~/numpy-1.0.4-1``) numpy-1.0.4 add these lines to :file:`site.cfg`:: [DEFAULT] library_dirs = /usr/local/lib:/opt/intel/mkl/10.0.2.018/lib/em64t include_dirs = /usr/local/include:/opt/intel/mkl/10.0.2.018/include and, in :file:`numpy/distutils/system_info.py` change the line:: _lib_mkl = ['mkl','vml','guide'] into:: _lib_mkl = ['mkl','guide'] and the line:: lapack_libs = self.get_libs('lapack_libs',['mkl_lapack32','mkl_lapack64']) into:: lapack_libs = self.get_libs('lapack_libs',['mkl_lapack']) Set these environment variables in the :file:`.bashrc` file:: export OMPI_CC=gcc export OMP_NUM_THREADS=1 export PYTHONPATH=${HOME}/gpaw:${HOME}/ase3k:${HOME}/numpy-1.0.4-1/lib64/python: export GPAW_SETUP_PATH=${HOME}/gpaw-setups-0.4.2039 export LD_LIBRARY_PATH=/usr/lib64/openmpi:/opt/intel/mkl/10.0.2.018/lib/em64t export PATH=${HOME}/gpaw/tools:${HOME}/ase3k/tools:/usr/share/openmpi/bin64:${PATH} if [ $PBS_ENVIRONMENT ]; then cd $PBS_O_WORKDIR export PYTHONPATH=${PBS_O_WORKDIR}:${PYTHONPATH} return fi and build GPAW (``python setup.py build_ext``) with this :file:`customize.py` file:: scalapack = True extra_compile_args += [ '-O3' ] libraries= [] mkl_lib_path = '/opt/intel/mkl/10.0.2.018/lib/em64t/' extra_link_args = [ mkl_lib_path+'libmkl_intel_lp64.a', mkl_lib_path+'libmkl_sequential.a', mkl_lib_path+'libmkl_core.a', mkl_lib_path+'libmkl_blacs_openmpi_lp64.a', mkl_lib_path+'libmkl_scalapack.a', mkl_lib_path+'libmkl_blacs_openmpi_lp64.a', mkl_lib_path+'libmkl_intel_lp64.a', mkl_lib_path+'libmkl_sequential.a', mkl_lib_path+'libmkl_core.a', mkl_lib_path+'libmkl_intel_lp64.a', mkl_lib_path+'libmkl_sequential.a', mkl_lib_path+'libmkl_core.a', ] define_macros += [('GPAW_NO_UNDERSCORE_CBLACS', '1')] define_macros += [('GPAW_NO_UNDERSCORE_CSCALAPACK', '1')] **Note**: due to linking problems similar to those found on :ref:`akka` static linking is required. A gpaw script :file:`gpaw-script.py` can be submitted like this:: qsub -l nodes=1:ppn=8 -l walltime=02:00:00 \ -m abe run.sh where :file:`run.sh` looks like this:: cd $PBS_O_WORKDIR mpirun -machinefile $PBS_NODEFILE -np 8 -x OMP_NUM_THREADS \ $HOME/gpaw/build/bin.linux-x86_64-2.4/gpaw-python gpaw-script.py Please make sure that your jobs do not run multi-threaded, e.g. for a job running on ``n090`` do from a login node:: ssh n090 ps -fL you should see **1** in the **NLWP** column. Numbers higher then **1** mean multi-threaded job. It's convenient to customize as described on the :ref:`parallel_runs` page. gpaw-24.1.0/doc/platforms/Linux/centos.rst000066400000000000000000000016601454550013000204340ustar00rootroot00000000000000====== CentOS ====== Install these CentOS_ packages:: $ yum install libxc-devel openblas-devel openmpi-devel fftw-devel $ yum install blacs-openmpi-devel scalapack-openmpi-devel Add this to your ``~/.bashrc``:: $ OPENMPI=/usr/lib64/openmpi $ export PATH=$OPENMPI/bin/:$PATH $ export LD_LIBRARY_PATH=$OPENMPI/lib:$LD_LIBRARY_PATH Make sure you have the latest pip:: $ python3 -m ensurepip --user $ python3 -m pip install pip --user Then install ASE_, Numpy and SciPy:: $ python3 -m pip install ase --user And finally, GPAW with ScaLAPACK and FFTW:: $ git clone git@gitlab.com:gpaw/gpaw.git $ cd gpaw $ cat > siteconfig.py fftw = True scalapack = True libraries = ['xc', 'fftw3', 'scalapack', 'mpiblacs'] library_dirs = ['/usr/lib64/openmpi/lib/'] ^D $ python3 -m pip install -v gpaw --user .. _CentOS: http://www.centos.org/ .. _ASE: https://wiki.fysik.dtu.dk/ase/ gpaw-24.1.0/doc/platforms/Linux/customize-ubuntu-sl-blacs-lam.py000066400000000000000000000004221454550013000245630ustar00rootroot00000000000000scalapack = True libraries = ['scalapack-lam', 'blacsCinit-lam', 'blacsF77init-lam', 'blacs-lam', 'lapack'] define_macros += [('GPAW_NO_UNDERSCORE_CBLACS', '1')] define_macros += [('GPAW_NO_UNDERSCORE_CSCALAPACK', '1')] gpaw-24.1.0/doc/platforms/Linux/customize-ubuntu-sl-blacs-openmpi.py000066400000000000000000000005121454550013000254610ustar00rootroot00000000000000scalapack = True libraries = ['scalapack-openmpi', 'blacsCinit-openmpi', 'blacsF77init-openmpi', 'blacs-openmpi', 'xc', 'blas', 'lapack'] define_macros += [('GPAW_NO_UNDERSCORE_CBLACS', '1')] define_macros += [('GPAW_NO_UNDERSCORE_CSCALAPACK', '1')] gpaw-24.1.0/doc/platforms/Linux/customize_armageddon.py000066400000000000000000000011341454550013000231600ustar00rootroot00000000000000scalapack = False extra_compile_args = ['-O3', '-std=c99', '-fpic'] compiler = 'gcc' mpicompiler = '/home/firegam/CAMd/openmpi-1.4.3-1/bin/mpicc' mpilinker = mpicompiler libraries = ['mkl_lapack', 'mkl_core', 'mkl_sequential', 'mkl_gf_lp64', 'iomp5'] mkl_lib_path = '/opt/intel/Compiler/11.1/072/mkl/lib/em64t/' ompi_lib_path = '/home/firegam/CAMd/openmpi-1.4.3-1/lib' library_dirs = [mkl_lib_path, ompi_lib_path] extra_link_args =['-Wl,-rpath='+mkl_lib_path+',-rpath='+ompi_lib_path] define_macros += [('GPAW_NO_UNDERSCORE_CBLACS', '1')] define_macros += [('GPAW_NO_UNDERSCORE_CSCALAPACK', '1')] gpaw-24.1.0/doc/platforms/Linux/customize_bwgrid_icc.py000066400000000000000000000006501454550013000231550ustar00rootroot00000000000000compiler = './icc.py' mpicompiler = './icc.py' mpilinker = 'MPICH_CC=gcc mpicc' scalapack = True define_macros += [('GPAW_NO_UNDERSCORE_CBLACS', '1')] define_macros += [('GPAW_NO_UNDERSCORE_CSCALAPACK', '1')] define_macros += [("GPAW_ASYNC",1)] libraries += ['mkl_intel_lp64' ,'mkl_sequential' ,'mkl_core', 'mkl_lapack95_lp64', 'mkl_scalapack_lp64', 'mkl_blacs_intelmpi_lp64', 'pthread' ] gpaw-24.1.0/doc/platforms/Linux/customize_bwgrid_icc_libxc.py000066400000000000000000000011561454550013000243400ustar00rootroot00000000000000compiler = './icc.py' mpicompiler = './icc.py' mpilinker = 'MPICH_CC=gcc mpicc' scalapack = True define_macros += [('GPAW_NO_UNDERSCORE_CBLACS', '1')] define_macros += [('GPAW_NO_UNDERSCORE_CSCALAPACK', '1')] define_macros += [("GPAW_ASYNC",1)] libraries += ['mkl_intel_lp64' ,'mkl_sequential' ,'mkl_core', 'mkl_lapack95_lp64', 'mkl_scalapack_lp64', 'mkl_blacs_intelmpi_lp64', 'pthread', 'xc' ] libraries += ['xc'] # change this to your installation directory LIBXCDIR='/home/mw767/gridpaw/libxc-2.0.2/install/' library_dirs += [LIBXCDIR + 'lib'] include_dirs += [LIBXCDIR + 'include'] gpaw-24.1.0/doc/platforms/Linux/customize_bwuni.py000066400000000000000000000004221454550013000222020ustar00rootroot00000000000000compiler='mpicc' extra_link_args += [ '-Wl,--no-as-needed', '-L/opt/bwhpc/common/compiler/intel/compxe.2013.sp1.4.211/mkl/lib/intel64', '-lmkl_scalapack_lp64', '-lmkl_intel_lp64', '-lmkl_core', '-lmkl_sequential', '-lmkl_blacs_intelmpi_lp64', ] gpaw-24.1.0/doc/platforms/Linux/customize_gardar.py000066400000000000000000000016231454550013000223220ustar00rootroot00000000000000scalapack = True mklpath ='/global/apps/intel/2013.1/mkl' omppath ='/global/apps/openmpi/1.6.5/intel/13.1' lxcpath ='/home/pcje/global/apps/libxc-2.2.1-1' compiler = 'icc' libraries = ['xc', 'mpi', 'mkl_scalapack_lp64', 'mkl_lapack95_lp64', 'mkl_intel_lp64', 'mkl_sequential', 'mkl_mc', 'mkl_core', 'mkl_def', 'mkl_intel_thread', 'iomp5'] library_dirs += [f'{omppath}/lib'] library_dirs += [f'{mklpath}/lib/intel64'] library_dirs += [f'{lxcpath}/lib'] include_dirs += [f'{omppath}/include'] include_dirs += [f'{mklpath}/include'] include_dirs += [f'{lxcpath}/include'] extra_link_args += [f'{mklpath}/lib/intel64/libmkl_blacs_openmpi_lp64.a', f'{mklpath}/lib/intel64/libmkl_blas95_lp64.a'] extra_compile_args += ['-O3', '-std=c99', '-fPIC', '-Wall'] define_macros += [('GPAW_NO_UNDERSCORE_CBLACS', '1')] define_macros += [('GPAW_NO_UNDERSCORE_CSCALAPACK', '1')] mpicompiler = 'mpicc' mpilinker = mpicompiler gpaw-24.1.0/doc/platforms/Linux/customize_jureca.py000066400000000000000000000041141454550013000223310ustar00rootroot00000000000000#User provided customizations for the gpaw setup #Here, one can override the default arguments, or append own #arguments to default ones #To override use the form # libraries = ['somelib','otherlib'] #To append use the form # libraries += ['somelib','otherlib'] compiler = 'mpicc' libraries = ['mkl_intel_lp64' ,'mkl_sequential' ,'mkl_core', 'mkl_lapack', 'mkl_scalapack_lp64', 'mkl_blacs_intelmpi_lp64', 'pthread', 'readline', 'termcap', 'xc' ] #libraries = [] #libraries += [] #library_dirs = [] #library_dirs += [] #include_dirs = [] #include_dirs += [] #extra_link_args = [] #extra_link_args += [] #extra_compile_args = [] #extra_compile_args += [] #runtime_library_dirs = [] #runtime_library_dirs += [] #extra_objects = [] #extra_objects += [] #define_macros = [] #define_macros += [] #mpicompiler = None #mpilinker = None #mpi_libraries = [] #mpi_libraries += [] #mpi_library_dirs = [] #mpi_library_dirs += [] #mpi_include_dirs = [] #mpi_include_dirs += [] #mpi_runtime_library_dirs = [] #mpi_runtime_library_dirs += [] #mpi_define_macros = [] #mpi_define_macros += [] #platform_id = '' #hdf5 = True # Valid values for scalapack are False, or True: # False (the default) - no ScaLapack compiled in # True - ScaLapack compiled in # Warning! At least scalapack 2.0.1 is required! scalapack = False if scalapack: libraries += ['scalapack'] library_dirs += [] define_macros += [('GPAW_NO_UNDERSCORE_CBLACS', '1')] define_macros += [('GPAW_NO_UNDERSCORE_CSCALAPACK', '1')] # In order to link libxc installed in a non-standard location # (e.g.: configure --prefix=/home/user/libxc-2.0.1-1), use: # - static linking: #include_dirs += ['/home/user/libxc-2.0.1-1/include'] #extra_link_args += ['/home/user/libxc-2.0.1-1/lib/libxc.a'] #if 'xc' in libraries: libraries.remove('xc') # - dynamic linking (requires also setting LD_LIBRARY_PATH at runtime): #include_dirs += ['/home/user/libxc-2.0.1-1/include'] #library_dirs += ['/home/user/libxc-2.0.1-1/lib'] #if 'xc' not in libraries: libraries.append('xc') gpaw-24.1.0/doc/platforms/Linux/customize_juropa_gcc.py000066400000000000000000000006431454550013000231770ustar00rootroot00000000000000scalapack = True library_dirs += ['/opt/intel/Compiler/11.0/074/mkl/lib/em64t'] libraries = ['mkl_intel_lp64' ,'mkl_sequential' ,'mkl_core', 'mkl_lapack', 'mkl_scalapack_lp64', 'mkl_blacs_intelmpi_lp64', 'pthread' ] define_macros += [('GPAW_NO_UNDERSCORE_CBLACS', '1')] define_macros += [('GPAW_NO_UNDERSCORE_CSCALAPACK', '1')] define_macros += [("GPAW_ASYNC",1)] gpaw-24.1.0/doc/platforms/Linux/customize_juropa_icc.py000066400000000000000000000007641454550013000232050ustar00rootroot00000000000000compiler = './icc.py' mpicompiler = './icc.py' mpilinker = 'MPICH_CC=gcc mpicc' scalapack = True library_dirs += ['/opt/intel/Compiler/11.0/074/mkl/lib/em64t'] libraries = ['mkl_intel_lp64' ,'mkl_sequential' ,'mkl_core', 'mkl_lapack', 'mkl_scalapack_lp64', 'mkl_blacs_intelmpi_lp64', 'pthread' ] define_macros += [('GPAW_NO_UNDERSCORE_CBLACS', '1')] define_macros += [('GPAW_NO_UNDERSCORE_CSCALAPACK', '1')] define_macros += [("GPAW_ASYNC",1)] gpaw-24.1.0/doc/platforms/Linux/customize_juropa_icc_libxc.py000066400000000000000000000013041454550013000243550ustar00rootroot00000000000000compiler = './icc.py' mpicompiler = './icc.py' mpilinker = 'MPICH_CC=gcc mpicc' scalapack = True library_dirs += ['/opt/intel/Compiler/11.0/074/mkl/lib/em64t'] libraries = ['mkl_intel_lp64' ,'mkl_sequential' ,'mkl_core', 'mkl_lapack', 'mkl_scalapack_lp64', 'mkl_blacs_intelmpi_lp64', 'pthread' ] libraries += ['xc'] # change this to your installation directory LIBXCDIR='/lustre/jhome5/hfr04/hfr047/gridpaw/libxc-2.0.2/install/' library_dirs += [LIBXCDIR + 'lib'] include_dirs += [LIBXCDIR + 'include'] define_macros += [('GPAW_NO_UNDERSCORE_CBLACS', '1')] define_macros += [('GPAW_NO_UNDERSCORE_CSCALAPACK', '1')] define_macros += [("GPAW_ASYNC",1)] gpaw-24.1.0/doc/platforms/Linux/customize_justus.py000066400000000000000000000003251454550013000224150ustar00rootroot00000000000000compiler='mpicc' libraries = ['xc'] extra_link_args += [ '-Wl,--no-as-needed', '-lmkl_scalapack_lp64', '-lmkl_intel_lp64', '-lmkl_core', '-lmkl_sequential', '-lmkl_blacs_intelmpi_lp64', ] gpaw-24.1.0/doc/platforms/Linux/customize_nanolab_EL4.py000066400000000000000000000010701454550013000231340ustar00rootroot00000000000000scalapack = False extra_compile_args = ['-O3', '-std=c99', '-fpic'] compiler = 'gcc' mpicompiler = '/usr/local/openmpi-1.3.3/bin/mpicc' mpilinker = mpicompiler libraries = ['mkl_lapack', 'mkl_core', 'mkl_sequential', 'mkl_gf', 'iomp5'] mkl_lib_path = '/opt/intel/mkl/10.2.1.017/lib/32' ompi_lib_path = '/usr/local/openmpi-1.3.3/lib' library_dirs = [mkl_lib_path, ompi_lib_path] extra_link_args =['-Wl,-rpath='+mkl_lib_path+',-rpath='+ompi_lib_path] define_macros += [('GPAW_NO_UNDERSCORE_CBLACS', '1')] define_macros += [('GPAW_NO_UNDERSCORE_CSCALAPACK', '1')] gpaw-24.1.0/doc/platforms/Linux/customize_neolith.py000066400000000000000000000010661454550013000225250ustar00rootroot00000000000000# 19 Nov 2009: problems with scalapack #scalapack = True extra_compile_args = ['-fast', '-std=c99', '-fPIC'] compiler = 'icc' libraries = ['mkl_core', 'mkl_sequential', 'mkl_gf_lp64', 'iomp5'] #libraries = ['mkl_core', 'mkl_intel_thread', 'mkl_gf_lp64', 'mkl_blacs_intelmpi_lp64', 'mkl_scalapack_lp64', 'iomp5'] mkl_lib_path = '/software/intel/mkl/10.2.1.017/lib/em64t/' library_dirs = [mkl_lib_path] extra_link_args =['-Wl,-rpath='+mkl_lib_path] define_macros += [('GPAW_NO_UNDERSCORE_CBLACS', '1')] define_macros += [('GPAW_NO_UNDERSCORE_CSCALAPACK', '1')] gpaw-24.1.0/doc/platforms/Linux/customize_newcell.py000066400000000000000000000011221454550013000225050ustar00rootroot00000000000000scalapack = False extra_compile_args = ['-O3', '-std=c99', '-fpic'] compiler = '/afs/crc.nd.edu/x86_64_linux/openmpi/1.3.2/gnu/bin/mpicc' libraries = ['mkl_def', 'mkl_lapack', 'mkl_core', 'mkl_sequential', 'mkl_gf_lp64', 'iomp5'] mkl_lib_path = '/opt/crc/scilib/mkl/10.1.0.015/lib/em64t/' ompi_lib_path = '/afs/crc.nd.edu/x86_64_linux/openmpi/1.3.2/gnu/lib/' library_dirs = [mkl_lib_path, ompi_lib_path] extra_link_args =['-Wl,-rpath='+mkl_lib_path+',-rpath='+ompi_lib_path] define_macros += [('GPAW_NO_UNDERSCORE_CBLACS', '1')] define_macros += [('GPAW_NO_UNDERSCORE_CSCALAPACK', '1')] gpaw-24.1.0/doc/platforms/Linux/customize_odyssey.py000066400000000000000000000014261454550013000225620ustar00rootroot00000000000000scalapack = True mklpath ='/n/sw/intel/mkl/10.3.1.107/composerxe-2011.3.174/mkl' omppath ='/n/sw/openmpi-1.5.3_intel-12.3.174ib' compiler = 'icc' libraries = ['mpi', 'mpi_f77', 'mkl_scalapack_lp64', 'mkl_lapack95_lp64', 'mkl_intel_lp64', 'mkl_sequential', 'mkl_mc', 'mkl_core', 'mkl_def', 'mkl_intel_thread', 'iomp5'] library_dirs += [f'{omppath}/lib', f'{mklpath}/lib/intel64'] include_dirs += ['/usr/include', f'{omppath}/include', f'{mklpath}/include'] extra_link_args += [f'{mklpath}/lib/intel64/libmkl_blacs_openmpi_lp64.a', f'{mklpath}/lib/intel64/libmkl_blas95_lp64.a'] extra_compile_args += ['-O3', '-std=c99', '-w'] define_macros += [('GPAW_NO_UNDERSCORE_CBLACS', '1')] define_macros += [('GPAW_NO_UNDERSCORE_CSCALAPACK', '1')] mpicompiler = 'mpicc' mpilinker = mpicompiler gpaw-24.1.0/doc/platforms/Linux/customize_r410_psmn.py000066400000000000000000000011431454550013000226020ustar00rootroot00000000000000scalapack = False compiler = 'gcc' extra_compile_args += [ '-O3', '-funroll-all-loops', '-fPIC', ] libraries = ['gfortran'] mpi_prefix = '/softs/openmpi-gnu/' blas_lib_path = '/home/tjiang/softs/acml-4.0.1/gfortran64/lib/' lapack_lib_path = blas_lib_path library_dirs = [mpi_prefix + 'lib'] include_dirs += [mpi_prefix + 'include'] # includes alreay numpy's include extra_link_args = [ blas_lib_path+'libacml.a', lapack_lib_path+'libacml.a', '-Wl,-rpath=' + mpi_prefix + 'lib,' '-rpath=' + blas_lib_path ] mpicompiler= mpi_prefix + 'bin/mpicc' mpilinker = mpicompiler gpaw-24.1.0/doc/platforms/Linux/customize_ranger_EL4.py000066400000000000000000000017501454550013000230050ustar00rootroot00000000000000scalapack = True extra_compile_args = ['-O3', '-std=c99', '-fpic'] compiler = 'gcc' mpicompiler = '/opt/apps/gcc4_4/openmpi/1.3b/bin/mpicc' mpilinker = mpicompiler mkl_lib_path = '/opt/apps/intel/mkl/10.0.1.014/lib/em64t/' ompi_lib_path = '/opt/apps/gcc4_4/openmpi/1.3b/lib' libraries = [] # use static linking to avoid # "cannot allocate memory for thread-local data: ABORT" extra_link_args = [ mkl_lib_path+'libmkl_intel_lp64.a', mkl_lib_path+'libmkl_sequential.a', mkl_lib_path+'libmkl_core.a', mkl_lib_path+'libmkl_blacs_openmpi_lp64.a', mkl_lib_path+'libmkl_scalapack_lp64.a', mkl_lib_path+'libmkl_blacs_openmpi_lp64.a', mkl_lib_path+'libmkl_intel_lp64.a', mkl_lib_path+'libmkl_sequential.a', mkl_lib_path+'libmkl_core.a', mkl_lib_path+'libmkl_intel_lp64.a', mkl_lib_path+'libmkl_sequential.a', mkl_lib_path+'libmkl_core.a', ] extra_link_args += ['-Wl,-rpath='+ompi_lib_path] define_macros += [('GPAW_NO_UNDERSCORE_CBLACS', '1')] define_macros += [('GPAW_NO_UNDERSCORE_CSCALAPACK', '1')] gpaw-24.1.0/doc/platforms/Linux/customize_sun_chpc_SUSE10.py000066400000000000000000000006661454550013000236720ustar00rootroot00000000000000compiler = 'gcc' mpicompiler = '/opt/SUNWhpc/HPC8.2/gnu/bin/mpicc' mpilinker = 'MPICH_CC=gcc mpicc -Xlinker --export-dynamic' extra_compile_args = ['-O3', '-std=c99', '-fpic'] scalapack = False mkl_dir = '/opt/gridware/intel/Compiler/11.1/056/mkl/lib/em64t/' library_dirs += [mkl_dir] libraries = ['mkl_intel_lp64' ,'mkl_sequential' ,'mkl_core', 'mkl_lapack', ] extra_link_args = ['-Wl,-rpath=' + mkl_dir] gpaw-24.1.0/doc/platforms/Linux/customize_vsc_univie.py000066400000000000000000000005271454550013000232360ustar00rootroot00000000000000scalapack = False compiler = 'gcc' extra_compile_args += [ '-O3', '-funroll-all-loops', '-fPIC', ] libraries = ['gfortran', 'util'] blas_lib_path = '/home/lv70174/gpaw/opt/acml-4.0.1/gfortran64/lib/' lapack_lib_path = blas_lib_path extra_link_args = [ blas_lib_path+'libacml.a', lapack_lib_path+'libacml.a', ] gpaw-24.1.0/doc/platforms/Linux/davinci.rst000066400000000000000000000074061454550013000205620ustar00rootroot00000000000000.. _davinci: ====================== davinci.ssci.liv.ac.uk ====================== The machine is a cluster of dual-core Intel Xeon CPUs, 3.2 GHz processors with 2 GB of memory per core. To build (``python setup.py install --home=~/numpy-1.1.0-1``) numpy-1.1.0 add this line to :file:`site.cfg`:: [DEFAULT] library_dirs = /usr/local/Cluster-Apps/intel_mkl_7.0.1.006/mkl701/lib/32 and build GPAW (``PYTHONPATH=${HOME}/dulak/numpy-1.1.0-1/usr/local/lib/python2.5/site-packages python setup.py build_ext``) with this ``customize.py`` file:: home='/home/haiping' extra_compile_args += [ '-O3' ] libraries = [ 'mkl', 'mkl_lapack', 'guide' ] library_dirs = [ '/usr/local/Cluster-Apps/intel_mkl_7.0.1.006/mkl701/lib/32' ] include_dirs += [ home+'numpy-1.1.0-1/usr/local/lib/python2.5/site-packages/numpy/core/include' ] A gpaw script :file:`test/CH4.py` can be submitted like this:: qsub submit.sh where :file:`submit.sh` looks like this:: #!/bin/bash # # Script to submit an mpi job # ---------------------------- # Replace these with the name of the executable # and the parameters it needs export home=/home/haiping export MYAPP=${home}/gpaw-0.4.2063/build/bin.linux-i686-2.5/gpaw-python export MYAPP_FLAGS=${home}/gpaw-0.4.2063/test/CH4.py export PYTHONPATH="${home}/numpy-1.1.0-1/usr/local/lib/python2.5/site-packages" export PYTHONPATH="${PYTHONPATH}:${home}/gpaw-0.4.2063:${home}/python-ase-3.0.0.358" # --------------------------- # set the name of the job #$ -N CH4 # request 2 slots #$ -pe fatmpi 2 ################################################################# ################################################################# # there shouldn't be a need to change anything below this line export MPICH_PROCESS_GROUP=no # --------------------------- # set up the mpich version to use # --------------------------- # load the module if [ -f /usr/local/Cluster-Apps/Modules/init/bash ] then . /usr/local/Cluster-Apps/Modules/init/bash module load default-ethernet fi #---------------------------- # set up the parameters for qsub # --------------------------- # Mail to user at beginning/end/abort/on suspension ####$ -m beas # By default, mail is sent to the submitting user # Use $ -M username to direct mail to another userid # Execute the job from the current working directory # Job output will appear in this directory #$ -cwd # can use -o dirname to redirect stdout # can use -e dirname to redirect stderr # Export these environment variables #$ -v PATH #$ -v MPI_HOME #$ -v LD_LIBRARY_PATH #$ -v GPAW_SETUP_PATH #$ -v PYTHONPATH # Gridengine allocates the max number of free slots and sets the # variable $NSLOTS. echo "Got $NSLOTS slots." # Gridengine sets also $TMPDIR and writes to $TMPDIR/machines the # corresponding list of nodes. It also generates some special scripts in # $TMPDIR. Therefore, the next two lines are practically canonical: # # export PATH=$TMPDIR:$PATH # echo "Stack size is "`ulimit -S -s` # --------------------------- # run the job # --------------------------- date_start=`date +%s` $MPI_HOME/bin/mpirun -np $NSLOTS -machinefile $TMPDIR/machines $MYAPP $MYAPP_FLAGS date_end=`date +%s` seconds=$((date_end-date_start)) minutes=$((seconds/60)) seconds=$((seconds-60*minutes)) hours=$((minutes/60)) minutes=$((minutes-60*hours)) echo ========================================================= echo SGE job: finished date = `date` echo Total run time : $hours Hours $minutes Minutes $seconds Seconds echo ========================================================= It's convenient to customize as described on the :ref:`parallel_runs` page. gpaw-24.1.0/doc/platforms/Linux/download_armageddon.sh000077500000000000000000000024521454550013000227360ustar00rootroot00000000000000#!/bin/sh export APPS="/home/firegam/CAMd" export MODULEFILES="${APPS}/modulefiles" INSTALL_DACAPO=False # download packages openmpi_version=1.4.3 wget http://www.open-mpi.org/software/ompi/v1.4/downloads/openmpi-${openmpi_version}.tar.bz2 nose_version=0.11.3 wget http://python-nose.googlecode.com/files/nose-${nose_version}.tar.gz numpy_version=1.1.1 wget http://downloads.sourceforge.net/numpy/numpy-${numpy_version}.tar.gz numpy_version=1.5.0 wget http://downloads.sourceforge.net/numpy/numpy-${numpy_version}.tar.gz ase_version=3.4.1.1765 wget https://wiki.fysik.dtu.dk/ase-files/python-ase-${ase_version}.tar.gz gpaw_version=0.7.2.6974 wget https://wiki.fysik.dtu.dk/gpaw-files/gpaw-${gpaw_version}.tar.gz gpaw_setups_version=0.6.6300 wget http://wiki.fysik.dtu.dk/gpaw-files/gpaw-setups-${gpaw_setups_version}.tar.gz if [ "${INSTALL_DACAPO}" = "True" ]; then # dacapo dacapo_version=2.7.16 wget "https://wiki.fysik.dtu.dk/dacapo/Installation?action=AttachFile&do=get&target=campos-dacapo-${dacapo_version}.tar.gz" -O campos-dacapo-${dacapo_version}.tar.gz dacapo_psp_version=1 wget "https://wiki.fysik.dtu.dk/dacapo/Installation?action=AttachFile&do=get&target=campos-dacapo-pseudopotentials-${dacapo_psp_version}.tar.gz" -O campos-dacapo-pseudopotentials-${dacapo_psp_version}.tar.gz fi gpaw-24.1.0/doc/platforms/Linux/download_nanolab.sh000077500000000000000000000012471454550013000222500ustar00rootroot00000000000000export APPS="/home/karsten/CAMd" export MODULEFILES="${APPS}/modulefiles" # download packages nose_version=0.11.3 wget http://python-nose.googlecode.com/files/nose-${nose_version}.tar.gz numpy_version=1.1.1 wget http://downloads.sourceforge.net/numpy/numpy-${numpy_version}.tar.gz numpy_version=1.5.0 wget http://downloads.sourceforge.net/numpy/numpy-${numpy_version}.tar.gz ase_version=3.4.1.1765 wget https://wiki.fysik.dtu.dk/ase-files/python-ase-${ase_version}.tar.gz gpaw_version=0.7.2.6974 wget https://wiki.fysik.dtu.dk/gpaw-files/gpaw-${gpaw_version}.tar.gz gpaw_setups_version=0.6.6300 wget http://wiki.fysik.dtu.dk/gpaw-files/gpaw-setups-${gpaw_setups_version}.tar.gz gpaw-24.1.0/doc/platforms/Linux/download_ranger.sh000077500000000000000000000011521454550013000221070ustar00rootroot00000000000000setenv APPS "/share/home/01067/tg803307/CAMd" setenv MODULEFILES "${APPS}/modulefiles" # download packages set nose_version=0.11.3 wget http://python-nose.googlecode.com/files/nose-${nose_version}.tar.gz set numpy_version=1.5.0 wget http://downloads.sourceforge.net/numpy/numpy-${numpy_version}.tar.gz set ase_version=3.4.1.1765 wget https://wiki.fysik.dtu.dk/ase-files/python-ase-${ase_version}.tar.gz set gpaw_version=0.7.2.6974 wget https://wiki.fysik.dtu.dk/gpaw-files/gpaw-${gpaw_version}.tar.gz set gpaw_setups_version=0.6.6300 wget http://wiki.fysik.dtu.dk/gpaw-files/gpaw-setups-${gpaw_setups_version}.tar.gz gpaw-24.1.0/doc/platforms/Linux/download_sun_chpc.sh000077500000000000000000000011101454550013000224250ustar00rootroot00000000000000export APPS="~/CAMd" export MODULEFILES="${APPS}/modulefiles" cd ${APPS} # download packages nose_version=1.1.2 http://pypi.python.org/packages/source/n/nose/nose-${nose_version}.tar.gz numpy_version=1.5.1 wget http://downloads.sourceforge.net/numpy/numpy-${numpy_version}.tar.gz ase_version=3.6.0.2515 wget https://wiki.fysik.dtu.dk/ase-files/python-ase-${ase_version}.tar.gz gpaw_version=0.9.0.8965 wget https://wiki.fysik.dtu.dk/gpaw-files/gpaw-${gpaw_version}.tar.gz gpaw_setups_version=0.8.7929 wget http://wiki.fysik.dtu.dk/gpaw-files/gpaw-setups-${gpaw_setups_version}.tar.gz gpaw-24.1.0/doc/platforms/Linux/gardar.nhpc.hi.is.rst000066400000000000000000000075171454550013000223500ustar00rootroot00000000000000.. _gardar.nhpc.hi.is: ================= gardar.nhpc.hi.is ================= Information about the machine http://nhpc.hi.is/content/nhpc-system Instructions assume **bash**, installation under *${HOME}/global/apps*. Make sure the following modules are loaded:: $ module list Currently Loaded Modules: 1) intel/13.1 3) openmpi/1.6.5 5) gold/2.2.0.5 2) mkl/11.0.0 4) python/2.7.5 Setup the root directory:: export APPHOME=${HOME}/global/apps mkdir -p ${APPHOME} cd ${APPHOME} export GPAW_PLATFORM=`python -c "from distutils import util, sysconfig; print(util.get_platform()+'-'+sysconfig.get_python_version())"` Download software:: svn co -r 3906 https://svn.fysik.dtu.dk/projects/ase/trunk ase.3906 svn co -r 12224 https://svn.fysik.dtu.dk/projects/gpaw/trunk gpaw.12224 wget https://wiki.fysik.dtu.dk/gpaw-files/gpaw-setups-0.9.11271.tar.gz wget https://wiki.fysik.dtu.dk/gpaw-files/gpaw-setups-0.9.9672.tar.gz tar zxf gpaw-setups-0.9.11271.tar.gz tar zxf gpaw-setups-0.9.9672.tar.gz wget "http://www.tddft.org/programs/octopus/down.php?file=libxc/libxc-2.2.1.tar.gz" -O libxc-2.2.1.tar.gz tar zxf libxc-2.2.1.tar.gz and create the modules:: mkdir -p $APPHOME/modulefiles/{ase,gpaw,gpaw-setups,libxc} cat < modulefiles/gpaw/0.11.0.12224 set modulefile [lrange [split [module-info name] {/}] 0 0] set release [lrange [split [module-info name] {/}] 1 1] set apphome $APPHOME/gpaw.12224 set appname "gpaw" prepend-path PATH \$apphome/build/bin.$GPAW_PLATFORM prepend-path PATH \$apphome/tools prepend-path PYTHONPATH \$apphome prepend-path PYTHONPATH \$apphome/build/lib.$GPAW_PLATFORM EOF cat < modulefiles/ase/3.9.0.3906 set modulefile [lrange [split [module-info name] {/}] 0 0] set release [lrange [split [module-info name] {/}] 1 1] set apphome $APPHOME/ase.3906 set appname "ase" prepend-path PATH \$apphome/tools prepend-path PYTHONPATH \$apphome EOF cat < modulefiles/gpaw-setups/0.9.9672 set modulefile [lrange [split [module-info name] {/}] 0 0] set release [lrange [split [module-info name] {/}] 1 1] set apphome $APPHOME/gpaw-setups-0.9.9672 set appname "gpaw-setups" prepend-path GPAW_SETUP_PATH \$apphome EOF cat < modulefiles/gpaw-setups/0.9.11271 set modulefile [lrange [split [module-info name] {/}] 0 0] set release [lrange [split [module-info name] {/}] 1 1] set apphome $APPHOME/gpaw-setups-0.9.11271 set appname "gpaw-setups" prepend-path GPAW_SETUP_PATH \$apphome EOF cat < modulefiles/libxc/2.2.1-1 set modulefile [lrange [split [module-info name] {/}] 0 0] set release [lrange [split [module-info name] {/}] 1 1] set apphome $APPHOME/libxc-2.2.1-1 set appname "libxc" prepend-path LD_LIBRARY_PATH \$apphome/lib prepend-path PATH \$apphome/bin prepend-path C_INCLUDE_PATH \$apphome/include prepend-path PKG_CONFIG_PATH \$apphome/lib/pkgconfig EOF Build libxc:: cd $APPHOME/libxc-2.2.1 ./configure --prefix $APPHOME/libxc-2.2.1-1 --enable-shared make make install and GPAW:: cd $APPHOME wget https://svn.fysik.dtu.dk/projects/gpaw/trunk/doc/platforms/Linux/customize_gardar.py cd $APPHOME/gpaw.12224 python setup.py --remove-default-flags --customize=../customize_gardar.py build_ext 2>&1 | tee build_ext.log The :file:`customize_gardar.py` looks like: .. literalinclude:: customize_gardar.py GPAW tests :file:`gpaw-test` can be submitted like this:: qsub -l nodes=1:ppn=8 run.sh where :file:`run.sh` looks like this:: #!/bin/sh . /opt/lmod/lmod/init/sh module use --append ~/global/apps/modulefiles module load ase/3.9.0.3906 module load gpaw-setups/0.9.11271 module load libxc/2.2.1-1 module load gpaw/0.11.0.12224 export OMP_NUM_THREADS=1 mpiexec `which gpaw-python` `which gpaw-test` gpaw-24.1.0/doc/platforms/Linux/huygens.rst000066400000000000000000000034131454550013000206210ustar00rootroot00000000000000.. _huygens: =============================================== huygens.sara.nl (IBM Power6, Infiniband, ESSL) =============================================== Here you find information about the system ``_. One should not use the systems defaul python, but load the python module:: $ module load python Now, numpy and ASE can be installed in the standard way:: $ python setup.py install ... In order to use gcc for parallel compilation of GPAW, set the environment variable MP_COMPILER:: $ export MP_COMPILER=gcc Use the following customize.py:: libraries += ['xlf90_r', 'xlsmp', 'xlfmath', 'lapack', 'essl', 'xl'] library_dirs += ['/sara/sw/lapack/3.1.1/lib', '/opt/ibmcmp/xlf/12.1/lib64/', '/opt/ibmcmp/xlsmp/1.8/lib64/', ] define_macros += [('GPAW_AIX', '1')] define_macros += [('GPAW_NO_UNDERSCORE_BLAS', '1')] define_macros += [('GPAW_NO_UNDERSCORE_LAPACK', '1')] extra_compile_args += ['-std=c99'] mpicompiler = 'mpcc' mpilinker = 'mpcc Here is an example batch job script :: # @ node = 1 # @ tasks_per_node = 4 # # Loadleveler can send email, but for this job, we ask Loadleveler not # to send any email: # # @ notification = never # # Define the standard input, output and error for the job: # # @ input = /dev/null # @ output = out.$(jobid) # @ error = err.$(jobid) # # @ wall_clock_limit = 0:30:00 # # @ job_type = parallel # # @ network.MPI = sn_all,not_shared,US # @ queue # cd $HOME/gpaw-benchmarks/ export PYTHONPATH=$HOME/python/lib/python export GPAW_SETUP_PATH=$HOME/gpaw-setups-0.4.2039 export GPAW_PYTHON=$HOME/python/bin/gpaw-python $GPAW_PYTHON input.py The batch jobs are submitted with ``llsubmit``:: $ llsubmit job_file gpaw-24.1.0/doc/platforms/Linux/icc.py000077500000000000000000000021141454550013000175150ustar00rootroot00000000000000#!/usr/bin/python """icc.py is a wrapper for the Intel compiler, converting/removing incompatible gcc args. """ import sys from subprocess import call args2change = {"-fno-strict-aliasing":"", "-fmessage-length=0":"", "-Wall":"", "-std=c99":"-qlanglvl=extc99", "-fPIC":"", "-g":"", "-D_FORTIFY_SOURCE=2":"", "-DNDEBUG":"", "-UNDEBUG":"", "-pthread":"", "-shared":"-qmkshrobj", "-Xlinker":"", "-export-dynamic":"", "-Wstrict-prototypes":"", "-dynamic":"", "-O3":"", "-O3":"", "-O2":"", "-O1":""} fragile_files = ["test.c"] cmd = "" fragile = False for arg in sys.argv[1:]: cmd += " " t = arg.strip() if t in fragile_files: fragile = True if t in args2change: cmd += args2change[t] else: cmd += arg flags = "-w -O3 -std=c99" cmd = f"mpicc {flags} {cmd}" call(cmd, shell=True) gpaw-24.1.0/doc/platforms/Linux/install_armageddon.sh000077500000000000000000000071031454550013000225730ustar00rootroot00000000000000#!/bin/sh export APPS="/home/firegam/CAMd" export MODULEFILES="${APPS}/modulefiles" INSTALL_DACAPO=False # the following packages are needed #sudo apt-get install python-all-dev #sudo apt-get install python-numpy #sudo apt-get install python-scientific #sudo apt-get install python-matplotlib #sudo apt-get install netcdf-dev # for dacapo #sudo apt-get install netcdf-bin # for dacapo #sudo apt-get install fftw-dev # for dacapo #sudo apt-get install gfortran # for dacapo # build packages openmpi_version=1.4.3 tar jxf openmpi-${openmpi_version}.tar.bz2 cd openmpi-${openmpi_version} ./configure --prefix=${APPS}/openmpi-${openmpi_version}-1 make && make install cd .. nose_version=0.11.3 tar zxf nose-${nose_version}.tar.gz cd nose-${nose_version} python setup.py install --root=${APPS}/nose-${nose_version}-1 cd .. numpy_version=1.5.0 tar zxf numpy-${numpy_version}.tar.gz cd numpy-${numpy_version} # disable compiling with atlas sed -i "s/_lib_atlas =.*/_lib_atlas = ['ignore_atlas']/g" numpy/distutils/system_info.py python setup.py install --root=${APPS}/numpy-${numpy_version}-1 cd .. ase_version=3.4.1.1765 tar zxf python-ase-${ase_version}.tar.gz gpaw_version=0.7.2.6974 tar zxf gpaw-${gpaw_version}.tar.gz gpaw_setups_version=0.6.6300 tar zxf gpaw-setups-${gpaw_setups_version}.tar.gz if [ "${INSTALL_DACAPO}" = "True" ]; then dacapo_version=2.7.16 tar zxf campos-dacapo-${dacapo_version}.tar.gz cd campos-dacapo-${dacapo_version} export FFTW=/usr/lib64 export NETCDF=/usr/lib64 export BLASLAPACK='${NETCDF}/libnetcdff.a ${NETCDF}/libnetcdf.a -L/opt/intel/Compiler/11.1/072/mkl/lib/em64t/ -lmkl_lapack -lmkl_core -lmkl_sequential -lmkl_gf_lp64' export MPIDIR=/home/firegam/CAMd/openmpi-1.4.3-1 export MPI_LIBDIR=${MPIDIR}/lib export MPI_BINDIR=${MPIDIR}/bin export MPI_INCLUDEDIR=${MPIDIR}/include cd src # patch -f90: f951: error: unrecognized command line option "-f90=gfortran" sed -i 's#-f90=$(GFORTRAN_FNOSECOND_UNDERSCORE_FC90)##g' Makefile make gfortran_fnosecond_underscore make gfortran_fnosecond_underscore MP=mpi cd gfortran_fnosecond_underscore_serial ln -s dacapo.run dacapo_${dacapo_version}-1_serial.run cd .. cd gfortran_fnosecond_underscore_mpi ln -s dacapo.run dacapo_${dacapo_version}-1_mpi.run cd .. cd .. wget https://svn.fysik.dtu.dk/projects/ase/trunk/ase/calculators/jacapo/tools/dacapo.run chmod ugo+rx dacapo.run cd .. dacapo_psp_version=1 tar zxf campos-dacapo-pseudopotentials-${dacapo_psp_version}.tar.gz mkdir psp-1 cp -pr campos-dacapo-pseudopotentials-1/psp/*/*/*.pseudo psp-1 fi . ./set_env_armageddon.sh # test numpy python -c "import numpy; numpy.test()" # test ase mkdir -p testase cd testase testase.py --no-display 2>&1 | tee testase.log cd .. # build gpaw cd gpaw-${gpaw_version} python setup.py build_ext --customize=../customize_armageddon.py --remove-default-flags cd .. mkdir -p testgpaw cd testgpaw mpiexec -np 4 gpaw-python `which gpaw-test` 2>&1 | tee testgpaw.log if [ "${INSTALL_DACAPO}" == "True" ]; then mkdir -p testjacapo cd testjacapo # from https://wiki.fysik.dtu.dk/ase/ase/calculators/jacapo.html cat < CO.py #!/usr/bin/env python from ase import * from ase.structure import molecule from ase.calculators.jacapo import * CO = molecule('CO') CO.set_cell([6,6,6]) CO.center() calc = Jacapo(nc='CO.nc', atoms=CO, pw=300, nbands=8) print CO.get_potential_energy() EOF # test on 4 cores python -c "for i in range(4): print 'localhost'" > pbs_nodefile && PBS_NODEFILE=pbs_nodefile python CO.py fi gpaw-24.1.0/doc/platforms/Linux/install_nanolab_EL4.sh000077500000000000000000000052731454550013000225560ustar00rootroot00000000000000export APPS="/home/karsten/CAMd" export MODULEFILES="${APPS}/modulefiles" # build packages numpy_version=1.1.1 tar zxf numpy-${numpy_version}.tar.gz cd numpy-${numpy_version} # disable compiling with atlas sed -i "s/_lib_atlas =.*/_lib_atlas = ['ignore_atlas']/g" numpy/distutils/system_info.py python setup.py install --root=${APPS}/numpy-${numpy_version}-1 cd .. ase_version=3.4.1.1765 tar zxf python-ase-${ase_version}.tar.gz # patch cPickle sed -i "s/cPickle as //g" python-ase-${ase_version}/ase/io/trajectory.py gpaw_version=0.7.2.6974 tar zxf gpaw-${gpaw_version}.tar.gz gpaw_setups_version=0.6.6300 tar zxf gpaw-setups-${gpaw_setups_version}.tar.gz mkdir -p ${MODULEFILES}/numpy cat < ${MODULEFILES}/numpy/${numpy_version}-1 #%Module1.0 set apps_path ${APPS} prepend-path PATH \$apps_path/numpy-${numpy_version}-1/usr/bin prepend-path PYTHONPATH \$apps_path/numpy-${numpy_version}-1/usr/lib/python2.3/site-packages/ unset apps_path EOF mkdir -p ${MODULEFILES}/campos-ase3 cat < ${MODULEFILES}/campos-ase3/${ase_version}-1 #%Module1.0 set apps_path ${APPS} prereq numpy prepend-path PATH \$apps_path/python-ase-${ase_version}/tools prepend-path PYTHONPATH \$apps_path/python-ase-${ase_version}/ unset apps_path EOF mkdir -p ${MODULEFILES}/campos-gpaw-setups cat < ${MODULEFILES}/campos-gpaw-setups/${gpaw_setups_version}-1 #%Module1.0 set apps_path ${APPS} prepend-path GPAW_SETUP_PATH \$apps_path/gpaw-setups-${gpaw_setups_version} unset apps_path EOF mkdir -p ${MODULEFILES}/campos-gpaw cat < ${MODULEFILES}/campos-gpaw/${gpaw_version}-1 #%Module1.0 set apps_path ${APPS} prereq numpy prereq campos-ase3 prereq campos-gpaw-setups prereq intel_compilers/11.1 prereq openmpi/1.3.3 prepend-path PATH \$apps_path/gpaw-${gpaw_version}/tools prepend-path PATH \$apps_path/gpaw-${gpaw_version}/build/bin.linux-i686-2.3 prepend-path PYTHONPATH \$apps_path/gpaw-${gpaw_version}/ prepend-path PYTHONPATH \$apps_path/gpaw-${gpaw_version}/build/lib.linux-i686-2.3 setenv OMP_NUM_THREADS 1 unset apps_path EOF module avail module use --append ${MODULEFILES} module load numpy module load campos-ase3 # test numpy python -c "import numpy; numpy.test()" # test ase mkdir -p testase cd testase testase.py --no-display 2>&1 | tee testase.log cd .. # build gpaw cd gpaw-${gpaw_version} python setup.py build_ext --customize=../customize_nanolab_EL4.py --remove-default-flags cd .. module load campos-gpaw-setups module load intel_compilers/11.1 module load openmpi/1.3.3 module load campos-gpaw mkdir -p testgpaw cd testgpaw mpiexec -np 4 gpaw-python `which gpaw-test` 2>&1 | tee testgpaw.log gpaw-24.1.0/doc/platforms/Linux/install_newcell.csh000066400000000000000000000062721454550013000222710ustar00rootroot00000000000000setenv APPS "/afs/crc.nd.edu/user/j/jbray2/CAMd" setenv MODULEFILES "${APPS}/modulefiles" set nose_version=0.11.3 wget http://python-nose.googlecode.com/files/nose-${nose_version}.tar.gz tar zxf nose-${nose_version}.tar.gz cd nose-${nose_version} python setup.py install --root=${APPS}/nose-${nose_version}-1 cd .. set numpy_version=1.4.1 wget http://downloads.sourceforge.net/numpy/numpy-${numpy_version}.tar.gz tar zxf numpy-${numpy_version}.tar.gz cd numpy-${numpy_version} python setup.py install --root=${APPS}/numpy-${numpy_version}-1 cd .. set ase_version=3.4.1.1765 wget https://wiki.fysik.dtu.dk/ase-files/python-ase-${ase_version}.tar.gz tar zxf python-ase-${ase_version}.tar.gz set gpaw_version=0.7.2.6974 wget https://wiki.fysik.dtu.dk/gpaw-files/gpaw-${gpaw_version}.tar.gz tar zxf gpaw-${gpaw_version}.tar.gz set gpaw_setups_version=0.6.6300 wget http://wiki.fysik.dtu.dk/gpaw-files/gpaw-setups-${gpaw_setups_version}.tar.gz tar zxf gpaw-setups-${gpaw_setups_version}.tar.gz mkdir -p ${MODULEFILES}/nose cat < ${MODULEFILES}/nose/${nose_version}-1 #%Module1.0 set apps_path ${APPS} prepend-path PATH \$apps_path/nose-${nose_version}-1/usr/bin prepend-path PYTHONPATH \$apps_path/nose-${nose_version}-1/usr/lib/python2.4/site-packages/ unset apps_path EOF mkdir -p ${MODULEFILES}/numpy cat < ${MODULEFILES}/numpy/${numpy_version}-1 #%Module1.0 set apps_path ${APPS} prereq nose # nose prepend-path PATH \$apps_path/numpy-${numpy_version}-1/usr/bin prepend-path PYTHONPATH \$apps_path/numpy-${numpy_version}-1/usr/lib64/python2.4/site-packages/ unset apps_path EOF mkdir -p ${MODULEFILES}/campos-ase3 cat < ${MODULEFILES}/campos-ase3/${ase_version}-1 #%Module1.0 set apps_path ${APPS} prereq nose prereq numpy prepend-path PATH \$apps_path/python-ase-${ase_version}/tools prepend-path PYTHONPATH \$apps_path/python-ase-${ase_version}/ unset apps_path EOF mkdir -p ${MODULEFILES}/campos-gpaw-setups cat < ${MODULEFILES}/campos-gpaw-setups/${gpaw_setups_version}-1 #%Module1.0 set apps_path ${APPS} prepend-path GPAW_SETUP_PATH \$apps_path/gpaw-setups-${gpaw_setups_version} unset apps_path EOF mkdir -p ${MODULEFILES}/campos-gpaw cat < ${MODULEFILES}/campos-gpaw/${gpaw_version}-1 #%Module1.0 set apps_path ${APPS} prereq ompi/1.3.2-gnu prereq numpy prereq campos-ase3 prereq campos-gpaw-setups prepend-path PATH \$apps_path/gpaw-${gpaw_version}/tools prepend-path PATH \$apps_path/gpaw-${gpaw_version}/build/bin.linux-x86_64-2.4 prepend-path PYTHONPATH \$apps_path/gpaw-${gpaw_version}/ prepend-path PYTHONPATH \$apps_path/gpaw-${gpaw_version}/build/lib.linux-x86_64-2.4 setenv OMP_NUM_THREADS 1 unset apps_path EOF module avail module use --append ${MODULEFILES} module load nose module load numpy module load campos-ase3 # test numpy python -c "import numpy; numpy.test()" # test ase mkdir -p tmp cd tmp testase.py --no-display cd .. # build gpaw cd gpaw-${gpaw_version} module load ompi/1.3.2-gnu python setup.py build_ext --customize=../customize_newcell.py --remove-default-flags cd .. module load campos-gpaw-setups module load campos-gpaw gpaw-24.1.0/doc/platforms/Linux/install_ranger_EL4.sh000077500000000000000000000064621454550013000224230ustar00rootroot00000000000000setenv APPS "/share/home/01067/tg803307/CAMd" setenv MODULEFILES "${APPS}/modulefiles" # build packages set nose_version=0.11.3 tar zxf nose-${nose_version}.tar.gz cd nose-${nose_version} python setup.py install --root=${APPS}/nose-${nose_version}-1 cd .. set numpy_version=1.5.0 tar zxf numpy-${numpy_version}.tar.gz cd numpy-${numpy_version} python setup.py install --root=${APPS}/numpy-${numpy_version}-1 cd .. set ase_version=3.4.1.1765 tar zxf python-ase-${ase_version}.tar.gz set gpaw_version=0.7.2.6974 tar zxf gpaw-${gpaw_version}.tar.gz set gpaw_setups_version=0.6.6300 tar zxf gpaw-setups-${gpaw_setups_version}.tar.gz mkdir -p ${MODULEFILES}/nose cat < ${MODULEFILES}/nose/${nose_version}-1 #%Module1.0 set apps_path ${APPS} prereq python/2.5.2 prepend-path PATH \$apps_path/nose-${nose_version}-1/opt/apps/python/python-2.5.2/bin/ prepend-path PYTHONPATH \$apps_path/nose-${nose_version}-1/opt/apps/python/python-2.5.2/lib/python2.5/site-packages/ unset apps_path EOF mkdir -p ${MODULEFILES}/numpy cat < ${MODULEFILES}/numpy/${numpy_version}-1 #%Module1.0 set apps_path ${APPS} prereq python/2.5.2 prereq nose/0.11.3-1 prepend-path PATH \$apps_path/numpy-${numpy_version}-1/opt/apps/python/python-2.5.2/bin prepend-path PYTHONPATH \$apps_path/numpy-${numpy_version}-1/opt/apps/python/python-2.5.2/lib/python2.5/site-packages/ unset apps_path EOF mkdir -p ${MODULEFILES}/campos-ase3 cat < ${MODULEFILES}/campos-ase3/${ase_version}-1 #%Module1.0 set apps_path ${APPS} prereq python/2.5.2 prereq nose/0.11.3-1 prereq numpy/1.5.0-1 prepend-path PATH \$apps_path/python-ase-${ase_version}/tools prepend-path PYTHONPATH \$apps_path/python-ase-${ase_version}/ unset apps_path EOF mkdir -p ${MODULEFILES}/campos-gpaw-setups cat < ${MODULEFILES}/campos-gpaw-setups/${gpaw_setups_version}-1 #%Module1.0 set apps_path ${APPS} prepend-path GPAW_SETUP_PATH \$apps_path/gpaw-setups-${gpaw_setups_version} unset apps_path EOF mkdir -p ${MODULEFILES}/campos-gpaw cat < ${MODULEFILES}/campos-gpaw/${gpaw_version}-1 #%Module1.0 set apps_path ${APPS} prereq python/2.5.2 prereq nose/0.11.3-1 prereq numpy/1.5.0-1 prereq campos-ase3 prereq campos-gpaw-setups prereq gcc/4.4.5 prereq openmpi/1.3b prereq mkl/10.0 prepend-path PATH \$apps_path/gpaw-${gpaw_version}/tools prepend-path PATH \$apps_path/gpaw-${gpaw_version}/build/bin.linux-x86_64-2.5/ prepend-path PYTHONPATH \$apps_path/gpaw-${gpaw_version}/ prepend-path PYTHONPATH \$apps_path/gpaw-${gpaw_version}/build/lib.linux-x86_64-2.5/ setenv OMP_NUM_THREADS 1 unset apps_path EOF module avail module use --append ${MODULEFILES} module load python/2.5.2 module load nose/0.11.3-1 module load numpy/1.5.0-1 module load campos-ase3 # test numpy python -c "import numpy; numpy.test()" # test ase mkdir -p testase cd testase testase.py --no-display >& testase.log cd .. # build gpaw cd gpaw-${gpaw_version} python setup.py build_ext --customize=../customize_ranger_EL4.py --remove-default-flags >& build_ext.log cd .. module load campos-gpaw-setups module load gcc/4.4.5 module load openmpi/1.3b module load mkl/10.0 module load campos-gpaw mkdir -p testgpaw cd testgpaw mpiexec -np 4 gpaw-python `which gpaw-test` >& tee testgpaw.log gpaw-24.1.0/doc/platforms/Linux/install_sun_chpc_SUSE10.sh000077500000000000000000000104511454550013000232740ustar00rootroot00000000000000export APPS=`echo ~/CAMd` export MODULEFILES="${APPS}/modulefiles" # build packages python_version=2.7.3 wget http://www.python.org/ftp/python/${python_version}/Python-${python_version}.tgz tar zxf Python-${python_version}.tgz cd Python-${python_version} ./configure --prefix=${APPS}/Python-${python_version}-1 make 2>&1 | tee make.log make install 2>&1 | tee make_install.log cd .. mkdir -p ${MODULEFILES}/python cat < ${MODULEFILES}/python/${python_version}-1 #%Module1.0 set apps_path ${APPS} prepend-path PATH \$apps_path/Python-${python_version}-1/bin unset apps_path EOF module use --append ${MODULEFILES} module load python export GPAW_PLATFORM=`python -c "from distutils import util, sysconfig; print util.get_platform()+'-'+sysconfig.get_python_version()"` export PYTHONVERSION=`python -c "from distutils import sysconfig; print sysconfig.get_python_version()"` nose_version=1.1.2 tar zxf nose-${nose_version}.tar.gz cd nose-${nose_version} python setup.py install --root=${APPS}/nose-${nose_version}-1 cd .. numpy_version=1.6.1 tar zxf numpy-${numpy_version}.tar.gz cd numpy-${numpy_version} # Use /usr/bin/gfortran and numpy's internal blas/lapack sed -i "s/_lib_names = \['blas'\]/_lib_names = ['']/g" numpy/distutils/system_info.py sed -i "s/_lib_names = \['lapack'\]/_lib_names = ['']/g" numpy/distutils/system_info.py # avoid "Both g77 and gfortran runtimes linked in lapack_lite !" setting --fcompiler=gnu95 # note that this forces /usr/bin/gfortran to be used python setup.py build --fcompiler=gnu95 2>&1 | tee build.log python setup.py install --root=${APPS}/numpy-${numpy_version}-1 2>&1 | tee install.log cd .. ase_version=3.6.0.2515 tar zxf python-ase-${ase_version}.tar.gz gpaw_version=0.9.0.8965 tar zxf gpaw-${gpaw_version}.tar.gz gpaw_setups_version=0.8.7929 tar zxf gpaw-setups-${gpaw_setups_version}.tar.gz mkdir -p ${MODULEFILES}/nose cat < ${MODULEFILES}/nose/${nose_version}-1 #%Module1.0 set apps_path ${APPS} prereq python prepend-path PATH \$apps_path/nose-${nose_version}-1${APPS}/Python-${python_version}-1/bin prepend-path PYTHONPATH \$apps_path/nose-${nose_version}-1${APPS}/Python-${python_version}-1/lib/python${PYTHONVERSION}/site-packages/ unset apps_path EOF mkdir -p ${MODULEFILES}/numpy cat < ${MODULEFILES}/numpy/${numpy_version}-1 #%Module1.0 set apps_path ${APPS} prereq nose prepend-path PATH \$apps_path/numpy-${numpy_version}-1${APPS}/Python-${python_version}-1/bin prepend-path PYTHONPATH \$apps_path/numpy-${numpy_version}-1${APPS}/Python-${python_version}-1/lib/python${PYTHONVERSION}/site-packages/ unset apps_path EOF mkdir -p ${MODULEFILES}/python-ase cat < ${MODULEFILES}/python-ase/${ase_version}-1 #%Module1.0 set apps_path ${APPS} prereq numpy prepend-path PATH \$apps_path/python-ase-${ase_version}/tools prepend-path PYTHONPATH \$apps_path/python-ase-${ase_version}/ unset apps_path EOF mkdir -p ${MODULEFILES}/gpaw-setups cat < ${MODULEFILES}/gpaw-setups/${gpaw_setups_version}-1 #%Module1.0 set apps_path ${APPS} prepend-path GPAW_SETUP_PATH \$apps_path/gpaw-setups-${gpaw_setups_version} unset apps_path EOF mkdir -p ${MODULEFILES}/gpaw cat < ${MODULEFILES}/gpaw/${gpaw_version}-1 #%Module1.0 set apps_path ${APPS} prereq python-ase prereq gpaw-setups prepend-path PATH \$apps_path/gpaw-${gpaw_version}/tools prepend-path PATH \$apps_path/gpaw-${gpaw_version}/build/bin.${GPAW_PLATFORM} prepend-path PYTHONPATH \$apps_path/gpaw-${gpaw_version}/ prepend-path PYTHONPATH \$apps_path/gpaw-${gpaw_version}/build/lib.${GPAW_PLATFORM} setenv OMP_NUM_THREADS 1 unset apps_path EOF module load nose module load numpy # test numpy python -c "import numpy; numpy.test()" module load python-ase # test ase mkdir -p testase cd testase testase.py --no-display 2>&1 | tee testase.log cd .. # build gpaw cd gpaw-${gpaw_version} # disable fftw (something is wrong with /usr/local/lib/libfftw3.a) sed -i 's/libfftw3\.so/libfftw3_not.so/' gpaw/fftw.py python setup.py build_ext --customize=../customize_sun_chpc_SUSE10.py --remove-default-flags 2>&1 | tee build_ext.log cd .. module load gpaw-setups module load gpaw mkdir -p testgpaw cd testgpaw mpiexec -np 4 gpaw-python `which gpaw-test` 2>&1 | tee testgpaw.log gpaw-24.1.0/doc/platforms/Linux/jureca.rst000066400000000000000000000072201454550013000204100ustar00rootroot00000000000000.. _jureca: ================== jureca @ FZ-Jülich ================== Find information about the `Jureca system here`_. .. _Jureca system here: http://www.fz-juelich.de/ias/jsc/jureca Pre-installed versions ====================== You may use the pre-installed versions:: module load intel-para module load GPAW In case you are happy with these versions, you need to install the setups (next point) and you are done. Setups ====== The setups are not defined in the pre-installed vesrion, so we need to install them ourselves:: cd GPAW_SETUP_SOURCE=$PWD/source/gpaw-setups mkdir -p $GPAW_SETUP_SOURCE cd $GPAW_SETUP_SOURCE wget https://wiki.fysik.dtu.dk/gpaw-files/gpaw-setups-0.9.11271.tar.gz tar xzf gpaw-setups-0.9.11271.tar.gz Let gpaw know about the setups:: export GPAW_SETUP_PATH=$GPAW_SETUP_SOURCE/gpaw-setups-0.9.11271 Using the module environment ============================ It is very handy to add our installation to the module environment:: cd mkdir -p modulefiles/gpaw-setups cd modulefiles/gpaw-setups echo -e "#%Module1.0\nprepend-path GPAW_SETUP_PATH $GPAW_SETUP_SOURCE/gpaw-setups-0.9.11271" > 0.9.11271 We need to let the system know about our modules (add this command to ``~/.profile`` or ``~/.bashrc`` to execute automatically):: module use $HOME/modulefiles such that we also see them with:: module avail Building from trunk =================== In case that you need a newer version than is installed, you might want to install gpaw yourself. We first get ASE trunk:: ASE_SOURCE=$HOME/source/ase mkdir -p $ASE_SOURCE cd $ASE_SOURCE git clone https://gitlab.com/ase/ase.git trunk which can be updated using:: cd $ASE_SOURCE/trunk git pull We add our installation to the module environment:: cd mkdir -p modulefiles/ase cd modulefiles/ase and edit the module file :file:`trunk` that should read:: #%Module1.0 if {![is-loaded intel-para]} {module load intel-para} if {![is-loaded Python]} {module load Python} if {![is-loaded SciPy-Stack]} {module load SciPy-Stack} # customize the following according to your taste set HOME $env(HOME) set asehome $HOME/source/ase/trunk prepend-path PYTHONPATH $asehome prepend-path PATH $asehome/bin:$asehome/tools We create a place for gpaw and get the trunk version:: # customize the following according to your taste GPAW_SOURCE=$HOME/source/gpaw mkdir -p $GPAW_SOURCE cd $GPAW_SOURCE git clone https://gitlab.com/gpaw/gpaw.git trunk The current trunk version can then be updated by:: cd $GPAW_SOURCE/trunk git pull We use the installed version of libxc and our ase/trunk:: module purge module load intel-para module load Libxc module load ase/trunk and install using :download:`customize_jureca.py`:: cd $GPAW_SOURCE/trunk mkdir install cp customize_jureca.py customize.py python setup.py install --prefix=$PWD/install We add this also to the module environment:: cd mkdir -p modulefiles/gpaw cd modulefiles/gpaw and the module file :file:`trunk` should read:: #%Module1.0 if {![is-loaded ase/trunk]} {module load ase} if {![is-loaded Libxc]} {module load Libxc} if {![is-loaded gpaw-setups]} {module load gpaw-setups} # customize the following according to your taste set HOME $env(HOME) set gpawhome $HOME/source/gpaw/trunk/install prepend-path PATH $gpawhome/bin prepend-path PYTHONPATH $gpawhome/lib/python3.6/site-packages setenv GPAW_PYTHON $gpawhome/bin/gpaw-python Execution ========= Job scripts can be written using:: gpaw-runscript -h and the jobs sumitted as:: sbatch run.jureca gpaw-24.1.0/doc/platforms/Linux/juropa.rst000066400000000000000000000070321454550013000204400ustar00rootroot00000000000000.. _juropa: ==================================================== juropa.fz-juelich.de (Intel Xeon, Infiniband, MKL) ==================================================== Here you find information about the system http://www.fz-juelich.de/jsc/juropa. Numpy is installed system wide, so separate installation is not needed. Building GPAW with gcc ====================== Build GPAW using **gcc** with the configuration file :download:`customize_juropa_gcc.py`. .. literalinclude:: customize_juropa_gcc.py and by executing:: module unload parastation/intel module load parastation/gcc python setup.py install --prefix='' --home=MY_INSTALLATION_DIR Building GPAW with Intel compiler ================================= Use the compiler wrapper file :download:`icc.py` .. literalinclude:: icc.py Internal libxc -------------- Before revision 10429 libxc was internal, the corresponding configuration file is :download:`customize_juropa_icc.py`. .. literalinclude:: customize_juropa_icc.py External libxc -------------- After svn revision 10429 libxc has to be included as external library (see also the `libxc web site `__). To install libxc we assume that MYLIBXCDIR is set to the directory where you want to install:: $ wget http://www.tddft.org/programs/octopus/down.php?file=libxc/libxc-2.0.2.tar.gz $ tar -xzvf libxc-2.0.2.tar.gz $ cd libxc-2.0.2/ $ mkdir install $ ./configure CFLAGS="-fPIC" --prefix=$PWD/install -enable-shared $ make |tee make.log $ make install This will have installed the libs $MYLIBXCDIR/libxc-2.0.2/install/lib and the C header files to $MYLIBXCDIR/libxc-2.0.2/install/include. We have to modify the file :file:`customize.py` to :download:`customize_juropa_icc_libxc.py` .. literalinclude:: customize_juropa_icc_libxc.py Note that the location of the external libxc on runtime has to be enabled by setting:: export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$MYLIBXCDIR/libxc-2.0.2/install/lib Compiling --------- Now, default parastation/intel module is used so execute only:: python setup.py install --prefix='' --home=MY_INSTALLATION_DIR Execution ========= General execution instructions can be found at http://www.fz-juelich.de/jsc/juropa/usage/quick-intro. Example batch job script for GPAW (512 cores, 30 minutes):: #!/bin/bash -x #MSUB -l nodes=64:ppn=8 #MSUB -l walltime=0:30:00 cd $PBS_O_WORKDIR export PYTHONPATH="MY_INSTALLATION_DIR/ase/lib64/python" export PYTHONPATH="$PYTHONPATH":"MY_INSTALLATION_DIR/gpaw/svn/lib64/python" export GPAW_SETUP_PATH=SETUP_DIR/gpaw-setups-0.5.3574 export GPAW_PYTHON=MY_INSTALLATION_DIR/bin/gpaw-python export PSP_ONDEMAND=1 mpiexec -np 512 -x $GPAW_PYTHON my_input.py Note that **-x** flag for *mpiexec* is needed for exporting the environment variables to MPI tasks. The environment variable ``PSP_ONDEMAND`` can decrease the running time with almost a factor of two with large process counts! Job scripts can be written also using:: gpaw-runscript -h Simultaneous Multi-Threading ============================ SMT_ can be used to virtually double the number of nodes. A test case did not show an improvement in performance though. .. _SMT: http://www2.fz-juelich.de/jsc/juropa/usage/smt ====== ===== === ========= #cores t[s] SMT date ====== ===== === ========= 64 2484 no 9.5.2011 64 2438 no 16.5.2011 128 1081 no 16.5.2011 64 4812 yes 16.5.2011 128 2077 yes 16.5.2011 ====== ===== === ========= SMT can be switched on in *gpaw-runscript* via:: gpaw-runscript -s gpaw-24.1.0/doc/platforms/Linux/justus.rst000066400000000000000000000130461454550013000204770ustar00rootroot00000000000000======= justus2 ======= Information about `justus2 `__. Building GPAW ============= We assume that the installation will be located in ``$SOURCEDIR``, which might be set to:: export SOURCEDIR=$HOME/source for example. Setups ------ The setups of your choice must be installed (see also :ref:`installation of paw datasets`):: cd GPAW_SETUP_SOURCE=$SOURCEDIR/gpaw-setups mkdir -p $GPAW_SETUP_SOURCE cd $GPAW_SETUP_SOURCE wget https://wiki.fysik.dtu.dk/gpaw-files/gpaw-setups-0.9.11271.tar.gz tar xzf gpaw-setups-0.9.11271.tar.gz Let gpaw know about the setups:: export GPAW_SETUP_PATH=$GPAW_SETUP_SOURCE/gpaw-setups-0.9.11271 Using the module environment ---------------------------- It is very handy to add our installation to the module environment:: cd mkdir -p modulefiles/gpaw-setups cd modulefiles/gpaw-setups echo -e "#%Module1.0\nprepend-path GPAW_SETUP_PATH $GPAW_SETUP_SOURCE/gpaw-setups-0.9.11271" > 0.9.11271 We need to let the system know about our modules (add this command to ``~/.profile`` or ``~/.bashrc`` to execute automatically):: module use $HOME/modulefiles such that we also see them with:: module avail libxc ----- GPAW relies on libxc (see the `libxc web site `__). To install libxc we assume that ``MYLIBXCDIR`` is set to the directory where you want to install (e.g. ``MYLIBXCDIR=$SOURCEDIR/libxc``):: mkdir -p $MYLIBXCDIR cd $MYLIBXCDIR wget -O libxc-5.0.0.tar.gz http://www.tddft.org/programs/libxc/down.php?file=5.0.0/libxc-5.0.0.tar.gz tar xzf libxc-5.0.0.tar.gz cd libxc-5.0.0 mkdir install ./configure CFLAGS="-fPIC" --prefix=$PWD/install -enable-shared make |tee make.log make install This will have installed the libs ``$MYLIBXCDIR/libxc-5.0.0/install/lib`` and the C header files to ``$MYLIBXCDIR/libxc-5.0.0/install/include``. We create a module for libxc:: cd mkdir modulefiles/libxc cd modulefiles/libxc and edit the module file :file:`5.0.0` that should read:: #%Module1.0 # change this to your path set libxchome /home/fr/fr_fr/fr_mw767/source/libxc/libxc-5.0.0/install prepend-path C_INCLUDE_PATH $libxchome/include prepend-path LIBRARY_PATH $libxchome/lib prepend-path LD_LIBRARY_PATH $libxchome/lib ASE release ----------- You might want to install a stable version of ASE:: cd ASE_SOURCE=$PWD/source/ase mkdir -p $ASE_SOURCE cd $ASE_SOURCE git clone -b 3.18.1 https://gitlab.com/ase/ase.git 3.18.1 We add our installation to the module environment:: cd mkdir -p modulefiles/ase cd modulefiles/ase Edit the module file :file:`3.18.1` that should read:: #%Module1.0 if {![is-loaded numlib/python_scipy]} {module load numlib/python_scipy/1.1.0-python_numpy-1.14.0-python-3.5.0} # change this to your path set asehome /home/fr/fr_fr/fr_mw767/source/ase/3.18.1 prepend-path PYTHONPATH $asehome prepend-path PATH $asehome/tools ASE trunk --------- We get ASE trunk:: cd ASE_SOURCE=$PWD/source/ase mkdir -p $ASE_SOURCE cd $ASE_SOURCE git clone https://gitlab.com/ase/ase.git trunk which can be updated using:: cd $ASE_SOURCE/trunk git pull We add our installation to the module environment:: cd mkdir -p modulefiles/ase cd modulefiles/ase and edit the module file :file:`trunk` that should read:: #%Module1.0 if {![is-loaded numlib/python_scipy]} {module load numlib/python_scipy} # change this to your path set asehome /home/fr/fr_fr/fr_mw767/source/ase/trunk prepend-path PYTHONPATH $asehome prepend-path PATH $asehome/tools matplotlib ---------- In order to use `ase gui` in it's full strength it is useful to install `matplotlib` via pip:: python3 -m pip install matplotlib Building GPAW ------------- We create a place for gpaw and get it:: cd $SOURCEDIR git clone https://gitlab.com/gpaw/gpaw.git The current version can then be updated by:: cd $SOURCEDIR/gpaw git pull A specific tag can be loaded by:: cd $GPAW_SOURCE/trunk # list tags git tag # load version 1.2.0 git checkout 1.2.0 To build the current trunk version of GPAW we need to create a file :file:`siteconfig.py` that reads .. literalinclude:: nemo_siteconfig.py To build GPAW use:: module purge module load libxc module load ase module load compiler/intel module load mpi/impi cd $GPAW_SOURCE/trunk CC=mpicc; python3 setup.py build which builds GPAW to ``$GPAW_SOURCE/trunk/build``. We create a module that creates the necessary definitions:: cd mkdir -p modulefiles/gpaw cd modulefiles/gpaw The file :file:`master` that should read:: #%Module1.0 if {![is-loaded ase]} {module load ase} if {![is-loaded libxc]} {module load libxc} if {![is-loaded mpi]} {module load mpi/impi} if {![is-loaded compiler/intel]} {module load compiler/intel} if {![is-loaded gpaw-setups]} {module load gpaw-setups} # change the following directory definition to your needs set gpawhome /home/fr/fr_fr/fr_mw767/source/gpaw # this can stay as is prepend-path PATH $gpawhome/tools:$gpawhome/build/scripts-3.8 prepend-path PYTHONPATH $gpawhome:$gpawhome/build/lib.linux-x86_64-3.8 Running GPAW ------------ A gpaw script :file:`test.py` can be submitted with the help of :file:`gpaw-runscript` to run on 48 cores like this:: > module load gpaw > gpaw-runscript test.py 48 using justus2 run.justus2 written > sbatch run.justus gpaw-24.1.0/doc/platforms/Linux/monolith.rst000066400000000000000000000026371454550013000207770ustar00rootroot00000000000000.. _monolith: ======== monolith ======== Here you find information about the system ``_. The Monolith machine is a cluster of 2.4Ghz Xeon processors with 2GB of memory. The ScaMPI implementation of MPI has a problem, but we can use MPICH. Add these two line to the :file:`.modules` file:: python/2.3.3 mkl/9.0p18 The Numeric Python module on the system is way too old, so we build our own version with this :file:`customize.py` file:: use_system_lapack = 1 mkl = '/usr/local/intel/ict/l_ict_p_3.0.023/cmkl/9.0' lapack_library_dirs = [mkl + '/lib/32'] lapack_libraries = ['mkl', 'mkl_lapack', 'g2c'] use_dotblas = 1 dotblas_include_dirs = [mkl + '/include'] dotblas_cblas_header = '' Set these environment variables in the :file:`.bashrc` file:: export PYTHONPATH=$HOME/campos-ase-2.3.4:$HOME/gpaw:$HOME/lib/python/Numeric export GPAW_SETUP_PATH=$HOME/setups export LD_LIBRARY_PATH=$MKL_ROOT and build GPAW (``python setup.py build_ext``) with this :file:`customize.py` file:: extra_compile_args += ['-w1'] mpicompiler = 'icc -Nmpich' custom_interpreter = True compiler = 'icc' Jobs can be submitted like this:: qsub -l nodes=2:ppn=2 -A -l walltime=2:00:00 \ -m abe run.sh where :file:`run.sh` looks like this:: cd $PBS_O_WORKDIR mpirun $HOME/gpaw/build/bin.linux-i686-2.3/gpaw-python gpaw-script.py gpaw-24.1.0/doc/platforms/Linux/msub_sun_chpc.sh000077500000000000000000000011561454550013000215760ustar00rootroot00000000000000#!/bin/sh ###These lines are for Moab #MSUB -l nodes=1:ppn=8 #MSUB -l walltime=02:00:00 #MSUB -l partition=nehalem|harpertown|westmere . /opt/gridware/modules-3.2.7/modules.sh export APPS=`echo ~/CAMd` export MODULEFILES="${APPS}/modulefiles" module use --append ${MODULEFILES} module load python module load nose module load numpy module load python-ase module load gpaw-setups module load gpaw/0.9.0.8965-1 export OMP_NUM_THREADS=1 ##### Running commands cat $PBS_NODEFILE NP=`cat $PBS_NODEFILE | wc -l` mpirun -x PYTHONPATH -x GPAW_SETUP_PATH -x PATH -np $NP -machinefile $PBS_NODEFILE gpaw-python `which gpaw-test` gpaw-24.1.0/doc/platforms/Linux/murska.rst000066400000000000000000000035171454550013000204460ustar00rootroot00000000000000.. _murska: ========================================================= murska.csc.fi (AMD Opteron, Infiniband, ACML) ========================================================= Here you find information about the system ``_. Installation of user's packages on murska is recommended under /v/users/$USER/appl/. We want to use python2.4 and gcc compiler:: > module load python > module swap PrgEnv-pgi PrgEnv-gnu and use this :file:`customize.py`:: scalapack = True extra_compile_args =['-O3', '-std=c99'] libraries =['gfortran','acml','scalapack','mpiblacsF77init','mpiblacs','scalapack'] library_dirs =[ '/v/linux26_x86_64/opt/blacs/1.1gnu/hpmpi/lib64', '/v/linux26_x86_64/opt/scalapack/1.8.0gnu/scalapack-1.8.0' ] define_macros += [('GPAW_NO_UNDERSCORE_CBLACS', '1')] define_macros += [('GPAW_NO_UNDERSCORE_CSCALAPACK', '1')] Then, compile GPAW ``python setup.py build_ext``. A sample job script:: #!/bin/csh #BSUB -n 4 #BSUB -W 0:10 #BSUB -J jobname_%J #BSUB -e jobname_err_%J #BSUB -o jobname_out_%J # If you install you personal version of gpaw under /v/users/$USER/appl/ # load the required modules and set the environment variables PYTHONPATH, etc. module load ASE/svn module swap PrgEnv-pgi PrgEnv-gnu module load gpaw-setups setenv PYTHONPATH /v/users/$USER/appl/gpaw:$PYTHONPATH setenv PATH /v/users/$USER/appl/gpaw/build/bin.linux-x86_64-2.4:$PATH # Alternatively, use a preinstalled version of gpaw load just gpaw/svn module # which sets all the correct environment variables # PYTHONPATH, PATH, GPAW_SETUP_PATH, etc. # module load gpaw/svn mpirun -srun gpaw-python input.py Murska uses LSF-HPC batch system where jobs are submitted as (note the stdin redirection):: > bsub < input.py gpaw-24.1.0/doc/platforms/Linux/nanolab_EL4.rst000066400000000000000000000032001454550013000212070ustar00rootroot00000000000000.. _nanolab: ======================= nanolab.cnf.cornell.edu ======================= Here you find information about the system http://www.cnf.cornell.edu/cnf5_tool.taf?_function=detail&eq_id=111. The installation of user's packages on nanolab EL4, 32-bit described below uses `modules `_, and assumes *bash* shell: - packages are installed under ``~/CAMd``:: mkdir ~/CAMd cd ~/CAMd - module files are located under ``~/CAMd/modulefiles`` - download the :download:`customize_nanolab_EL4.py` file:: wget https://svn.fysik.dtu.dk/projects/gpaw/trunk/doc/platforms/Linux/customize_nanolab_EL4.py .. literalinclude:: customize_nanolab_EL4.py - download packages with :download:`download_nanolab.sh`, buy running ``sh download_nanolab.sh``: .. literalinclude:: download_nanolab.sh - from *nanolab.cnf.cornell.edu* login to one of c-nodes (Red Hat 4, 32-bit):: ssh c7.cnf.cornell.edu - install packages, deploy modules and test with :download:`install_nanolab_EL4.sh`, buy running ``sh install_nanolab_EL4.sh``: .. literalinclude:: install_nanolab_EL4.sh **Note** that every time you wish to install a new version of a package, and deploy new module file, better keep the old module file. - submit the test job:: qsub submit.sh using the following :file:`submit.sh`:: TODO - to enable the installation permanently add the following to *~/.bashrc*:: module use --append /home/karsten/CAMd/modulefiles module load numpy module load campos-ase3 module load campos-gpaw-setups module load intel_compilers/11.1 module load openmpi/1.3.3 module load campos-gpaw gpaw-24.1.0/doc/platforms/Linux/nemo.rst000066400000000000000000000126131454550013000200770ustar00rootroot00000000000000==== nemo ==== Information about `nemo `__. Building GPAW ============= We assume that the installation will be located in ``$HOME/source``. Setups ------ The setups of your choice must be installed (see also :ref:`installation of paw datasets`):: cd GPAW_SETUP_SOURCE=$PWD/source/gpaw-setups mkdir -p $GPAW_SETUP_SOURCE cd $GPAW_SETUP_SOURCE wget https://wiki.fysik.dtu.dk/gpaw-files/gpaw-setups-0.9.11271.tar.gz tar xzf gpaw-setups-0.9.11271.tar.gz Let gpaw know about the setups:: export GPAW_SETUP_PATH=$GPAW_SETUP_SOURCE/gpaw-setups-0.9.11271 Using the module environment ---------------------------- It is very handy to add our installation to the module environment:: cd mkdir -p modulefiles/gpaw-setups cd modulefiles/gpaw-setups echo -e "#%Module1.0\nprepend-path GPAW_SETUP_PATH $GPAW_SETUP_SOURCE/gpaw-setups-0.9.11271" > 0.9.11271 We need to let the system know about our modules (add this command to ``~/.profile`` or ``~/.bashrc`` to execute automatically):: module use $HOME/modulefiles such that we also see them with:: module avail libxc ----- GPAW relies on libxc (see the `libxc web site `__). To install libxc we assume that ``MYLIBXCDIR`` is set to the directory where you want to install (e.g. ``MYLIBXCDIR=$HOME/source/libxc``):: mkdir -p $MYLIBXCDIR cd $MYLIBXCDIR wget http://www.tddft.org/programs/libxc/down.php?file=5.2.2/libxc-5.2.2.tar.gz -O libxc-5.2.2.tar.gz tar xvzf libxc-5.2.2.tar.gz cd libxc-5.2.2 mkdir install module purge module load compiler/gnu ./configure CFLAGS="-fPIC" --prefix=$PWD/install -enable-shared make |tee make.log make install This will have installed the libs ``$MYLIBXCDIR/libxc-5.2.2/install/lib`` and the C header files to ``$MYLIBXCDIR/libxc-5.2.2/install/include``. We create a module for libxc:: cd mkdir modulefiles/libxc cd modulefiles/libxc and edit the module file :file:`5.2.2` that should read:: #%Module1.0 # change this to your path set source $::env(HOME)/source set libxchome $source/libxc/libxc-5.2.2/install prepend-path C_INCLUDE_PATH $libxchome/include prepend-path LIBRARY_PATH $libxchome/lib prepend-path LD_LIBRARY_PATH $libxchome/lib ASE release ----------- You might want to install a stable version of ASE:: cd ASE_SOURCE=$PWD/source/ase mkdir -p $ASE_SOURCE cd $ASE_SOURCE git clone -b 3.18.1 https://gitlab.com/ase/ase.git 3.18.1 We add our installation to the module environment:: cd mkdir -p modulefiles/ase cd modulefiles/ase Edit the module file :file:`3.18.1` that should read:: #%Module1.0 devel/python/3.7.10 # change this to your path set source $::env(HOME)/source set asehome $source/ase/3.18.1 prepend-path PYTHONPATH $asehome prepend-path PATH $asehome/tools ASE origin ---------- We get ASE origin:: cd ASE_SOURCE=$PWD/source/ase mkdir -p $ASE_SOURCE cd $ASE_SOURCE git clone https://gitlab.com/ase/ase.git origin which can be updated using:: cd $ASE_SOURCE/origin git pull We add our installation to the module environment:: cd mkdir -p modulefiles/ase cd modulefiles/ase and edit the module file :file:`origin` that should read:: #%Module1.0 devel/python/3.7.10 # change this to your path set source $::env(HOME)/source set asehome $source/ase/origin prepend-path PYTHONPATH $asehome prepend-path PATH $asehome/tools Building GPAW ------------- We create a place for gpaw and get the origin version:: cd GPAW_SOURCE=$PWD/source/gpaw mkdir -p $GPAW_SOURCE cd $GPAW_SOURCE git clone https://gitlab.com/gpaw/gpaw.git origin The current origin version can then be updated by:: cd $GPAW_SOURCE/origin git pull A specific tag can be loaded by:: cd $GPAW_SOURCE/origin # list tags git tag # load version 1.2.0 git checkout 1.2.0 To build the current origin version of GPAW we need to create a file :file:`siteconfig.py` that reads .. literalinclude:: nemo_siteconfig.py Then we build the executable:: module purge module load libxc module load compiler/intel module load mpi/impi module load numlib/mkl module load ase cd $GPAW_SOURCE/origin unset CC python3 setup.py build which builds GPAW to ``$GPAW_SOURCE/origin/build``. We create a module that creates the necessary definitions:: cd mkdir -p modulefiles/gpaw cd modulefiles/gpaw The file :file:`origin` that should read:: #%Module1.0 if {![is-loaded ase]} {module load ase} if {![is-loaded libxc]} {module load libxc} if {![is-loaded mpi]} {module load mpi/impi} if {![is-loaded gpaw-setups]} {module load gpaw-setups} # change the following directory definition to your needs set source $::env(HOME)/source set gpawhome $source/gpaw/origin # this can stay as is prepend-path PATH $gpawhome/tools:$gpawhome/build/scripts-3.7 prepend-path PYTHONPATH $gpawhome:$gpawhome/build/lib.linux-x86_64-3.7 Running GPAW ------------ A gpaw script :file:`test.py` can be submitted with the help of :file:`gpaw-runscript` to run on 20 cpus like this:: > module load gpaw > gpaw-runscript test.py 20 using nemo run.nemo written > msub run.nemo See options of :file:`gpaw-runscript` with:: > gpaw-runscript -h gpaw-24.1.0/doc/platforms/Linux/nemo_siteconfig.py000066400000000000000000000003071454550013000221260ustar00rootroot00000000000000# necessary MKL libs libraries += ['mkl_intel_lp64', 'mkl_sequential', 'mkl_core', 'svml'] # Now add a EasyBuild "cover-all-bases" library_dirs library_dirs = os.getenv('LD_LIBRARY_PATH').split(':') gpaw-24.1.0/doc/platforms/Linux/neolith.rst000066400000000000000000000126771454550013000206150ustar00rootroot00000000000000.. _neolith: ================== neolith.nsc.liu.se ================== Here you find information about the system http://www.nsc.liu.se/systems/neolith. The installation of user's packages on neolith described below uses `cmod `_ modules: - packages are installed under ``~/apps``:: mkdir ~/apps - module files are located under ``~/modulefiles``:: mkdir ~/modulefiles - build `nose `_:: cd ~/apps wget http://python-nose.googlecode.com/files/nose-0.10.1.tar.gz tar zxf nose-0.10.1.tar.gz cd nose-0.10.1 python setup.py install --root=~/apps/nose-0.10.1-1 - build `numpy `_:: cd ~/apps wget http://downloads.sourceforge.net/numpy/numpy-1.3.0.tar.gz tar zxf numpy-1.3.0.tar.gz cd numpy-1.3.0 python setup.py install --root=~/apps/numpy-1.3.0-1 - build `ase `_:: cd ~/apps svn co https://svn.fysik.dtu.dk/projects/ase/trunk ase cd ase python setup.py sdist; cp dist/python-ase-*.tar.gz .. cd .. tar zxf python-ase-3.2.0.962.tar.gz - build gpaw-setups:: cd ~/apps wget --no-check-certificate "http://wiki.fysik.dtu.dk/gpaw-files/gpaw-setups-0.5.3574.tar.gz" tar zxf gpaw-setups-0.5.3574.tar.gz - deploy modules:: export MODULEFILES="/home/x_andke/modulefiles" export APPS="/home/x_andke/apps" mkdir ${MODULEFILES}/numpy cat < ${MODULEFILES}/numpy/1.3.0-1.el5.gfortran.python2.4.default.blas.lapack \$(/etc/cmod/modulegroups numpy numpy/1.3.0-1.el5.gfortran.python2.4.default.blas.lapack) #prereq python-nose # nose prepend-path PATH ${APPS}/numpy-1.3.0-1/usr/bin prepend-path PYTHONPATH ${APPS}/numpy-1.3.0-1/usr/lib64/python2.4/site-packages EOF ln -s ${MODULEFILES}/numpy/1.3.0-1.el5.gfortran.python2.4.default.blas.lapack ${MODULEFILES}/numpy/default mkdir ${MODULEFILES}/nose cat < ${MODULEFILES}/nose/0.10.1-1.el5.gfortran.python2.4 \$(/etc/cmod/modulegroups nose nose/0.10.1-1.el5.gfortran.python2.4) prepend-path PATH ${APPS}/nose-0.10.1-1/usr/bin prepend-path PYTHONPATH ${APPS}/nose-0.10.1-1/usr/lib/python2.4/site-packages EOF ln -s ${MODULEFILES}/nose/0.10.1-1.el5.gfortran.python2.4 ${MODULEFILES}/nose/default mkdir ${MODULEFILES}/campos-ase3 cat < ${MODULEFILES}/campos-ase3/3.2.0.962-1.el5.python2.4 \$(/etc/cmod/modulegroups campos-ase3 campos-ase3/3.2.0.962-1.el5.python2.4) #prereq numpy # numpy prepend-path PATH ${APPS}/python-ase-3.2.0.962/tools prepend-path PYTHONPATH ${APPS}/python-ase-3.2.0.962 EOF ln -s ${MODULEFILES}/campos-ase3/3.2.0.962-1.el5.python2.4 ${MODULEFILES}/campos-ase3/default mkdir ${MODULEFILES}/campos-gpaw-setups cat < ${MODULEFILES}/campos-gpaw-setups/0.5.3574-1.el5 \$(/etc/cmod/modulegroups campos-gpaw-setups campos-gpaw-setups/0.5.3574-1.el5) prepend-path GPAW_SETUP_PATH ${APPS}/gpaw-setups-0.5.3574 EOF ln -s ${MODULEFILES}/campos-gpaw-setups/0.5.3574-1.el5 ${MODULEFILES}/campos-gpaw-setups/default mkdir ${MODULEFILES}/campos-gpaw cat < ${MODULEFILES}/campos-gpaw/0.6.3934-1.el5.gfortran.python2.4.openmpi.mkl.10.0.4.023.mkl_lapack \$(/etc/cmod/modulegroups campos-gpaw campos-gpaw/0.6.3934-1.el5.gfortran.python2.5.openmpi.mkl.10.0.4.023.mkl_lapack) #prereq numpy # numpy #prereq campos-ase3 # ase #prereq campos-gpaw-setups # gpaw-setups prepend-path PATH ${APPS}/gpaw-0.6.3934/tools prepend-path PATH ${APPS}/gpaw-0.6.3934/build/bin.linux-x86_64-2.4 prepend-path PYTHONPATH ${APPS}/gpaw-0.6.3934 EOF ln -s ${MODULEFILES}/campos-gpaw/0.6.3934-1.el5.gfortran.python2.4.openmpi.mkl.10.0.4.023.mkl_lapack ${MODULEFILES}/campos-gpaw/default **Note** that every time you wish to install a new version of a package, you (``svn up`` is necessary), create new tarball, and deploy new module file, keeping the old module file. - test numpy installation:: module use ${MODULEFILES} module load nose module load numpy python -c "import numpy; numpy.test()" - use :download:`customize_neolith.py`: .. literalinclude:: customize_neolith.py to build `gpaw `_:: cd ~/apps svn co https://svn.fysik.dtu.dk/projects/gpaw/trunk gpaw cd gpaw python setup.py sdist; cp dist/gpaw-*.tar.gz .. cd .. tar zxf gpaw-0.6.3934.tar.gz cd gpaw-0.6.3934 wget https://svn.fysik.dtu.dk/projects/gpaw/trunk/doc/platforms/Linux/customize_neolith.py -O customize.py module load openmpi/1.2.7-i101017 python setup.py build_ext --remove-default-flags - test gpaw installation by loading the modules:: module load campos-ase3 module load campos-gpaw-setups module load campos-gpaw export OMP_NUM_THREADS=1 and :ref:`run the tests `. - **logout**, and login again. - submit a test job:: cp ~/apps/gpaw-0.6.3934/test/CH4.py ~/ cd sbatch -N 1 --tasks-per-node 4 submit.sh using the following :file:`submit.sh`:: #!/bin/bash #SBATCH -N 1 #SBATCH -t 00:10:00 export OMP_NUM_THREADS=1 . /etc/cmod/path.sh module use /home/x_andke/modulefiles module load openmpi/1.2.7-i101017 module load nose module load numpy module load campos-ase3 module load campos-gpaw-setups module load campos-gpaw mpprun --force-mpi="openmpi/1.2.7-i101017" `which gpaw-python` ./CH4.py gpaw-24.1.0/doc/platforms/Linux/newcell.rst000066400000000000000000000031031454550013000205640ustar00rootroot00000000000000.. _newcell: ================== newcell.crc.nd.edu ================== Here you find information about the system ``_. The installation of user's packages on newcell described below uses `modules `_, and assumes csh shell: - packages are installed under ``~/CAMd``:: mkdir ~/CAMd cd ~/CAMd - module files are located under ``~/CAMd/modulefiles`` - download the :download:`customize_newcell.py` file:: wget https://svn.fysik.dtu.dk/projects/gpaw/trunk/doc/platforms/Linux/customize_newcell.py .. literalinclude:: customize_newcell.py - install packages, deploy modules and test with :download:`install_newcell.csh`, buy running ``csh install_newcell.csh``: .. literalinclude:: install_newcell.csh **Note** that every time you wish to install a new version of a package, and deploy new module file, better keep the old module file. - submit the test job:: qsub submit.csh using the following :file:`submit.csh`:: #!/bin/csh #$ -q short #$ -pe ompi-8 8 #$ -l arch=lx24-amd64 ##$ -M jbray2@nd.edu ##$ -m abe module load ompi/1.3.2-gnu module use --append /afs/crc.nd.edu/user/j/jbray2/CAMd/modulefiles module load nose module load numpy module load campos-ase3 module load campos-gpaw-setups module load campos-gpaw setenv P4_GLOBMEMSIZE 268435456 setenv P4_SOCKBUFSIZE 262144 echo job was accepted on: date mpirun -np $NSLOTS `which gpaw-python` `which gpaw-test` echo Job has completed. date exit 0 gpaw-24.1.0/doc/platforms/Linux/odyssey.rst000066400000000000000000000007411454550013000206370ustar00rootroot00000000000000.. _odyssey: ======================= odyssey.fas.harvard.edu ======================= Information about the Harvard Odyssey cluster can be found at ``_. Intel C++ compiler, MKL, and OpenMPI are required:: > module load hpc/numpy-1.6.0_python-2.7.1 > module load hpc/intel-and-mkl-12.3.174 > module load hpc/openmpi-1.5.3_intel-12.3.174ib The following :file:`customize.py` file can be used to build GPAW: .. literalinclude:: customize_odyssey.py gpaw-24.1.0/doc/platforms/Linux/openSUSE.rst000066400000000000000000000014711454550013000206020ustar00rootroot00000000000000.. _openSUSE: ======== openSUSE ======== Here you find information about the system ``_. System wide installation with yast ================================== **Warning**: this section is outdated! The steps described below require root access and assume bash shell: - `configure fys yum repository `_ - on openSUSE 11.2 or newer i386 or x86_64, as root: - list available packages:: zypper packages -r fys_openSUSE - install gpaw and dependencies:: yast -i campos-gpaw - install optional packages:: yast -i scipy ScientificPython .. note:: There are no new releases of fys packages for "Discontinued distributions" of openSUSE: see http://en.opensuse.org/Lifetime gpaw-24.1.0/doc/platforms/Linux/r410_psmn.ens-lyon.rst000066400000000000000000000150101454550013000224210ustar00rootroot00000000000000.. _r410_psmn.ens_lyon: ================== r410 psmn.ens-lyon ================== Information about the machine https://www.psmn.ens-lyon.fr. Instructions assume **cshrc**, installation under *${HOME}/softs*. Enable the basic settings:: source /usr/local/modeles/Cshrc # The next line causes gcc 4.2.1 to be loaded from /usr/local/bin: crazy! # source /usr/local/modeles/openmpi-1.4.1-intel-11.1.069-mkl # You must NOT do that: # when building GPAW, and other apllications using /usr/bin/gcc # remove it temporarily from ~/.tcshrc and logout/login again! Setup the root directory:: setenv HOMESOFTS ${HOME}/softs mkdir -p ${HOMESOFTS} cd ${HOMESOFTS} setenv GPAW_PLATFORM `python -c "from distutils import util, sysconfig; print(util.get_platform()+'-'+sysconfig.get_python_version())"` Set the versions:: set nose=0.11.3 # Warning: version 1.6.0 seems inconsistent about C-, Fortran-contiguous # http://mail.scipy.org/pipermail/numpy-discussion/2011-July/057557.html set numpy=1.5.1 set scipy=0.9.0 set acml=4.0.1 set ase=3.5.1.2175 set gpaw=0.8.0.8092 set setups=0.8.7929 and create startup scripts:: cat < ${HOMESOFTS}/acml-${acml}-1.cshrc setenv LD_LIBRARY_PATH \${HOMESOFTS}/acml-${acml}/gfortran64/lib:\${LD_LIBRARY_PATH} EOF #cat < ${HOMESOFTS}/acml-${acml}-1.cshrc #setenv LD_LIBRARY_PATH /softs/acml/acml${acml}/gfortran/gfortran64/lib:\${LD_LIBRARY_PATH} #EOF cat < ${HOMESOFTS}/nose-${nose}-1.cshrc setenv PYTHONPATH \${HOMESOFTS}/nose-${nose}-1/usr/lib/python2.4/site-packages:\${PYTHONPATH} setenv PATH \${HOMESOFTS}/nose-${nose}-1/usr/bin:\${PATH} EOF cat < ${HOMESOFTS}/numpy-${numpy}-1.cshrc setenv PYTHONPATH \${HOMESOFTS}/numpy-${numpy}-1/usr/lib64/python2.4/site-packages:\${PYTHONPATH} setenv PATH \${HOMESOFTS}/numpy-${numpy}-1/usr/bin:\${PATH} EOF cat < ${HOMESOFTS}/scipy-${scipy}-1.cshrc setenv PYTHONPATH \${HOMESOFTS}/scipy-${scipy}-1/usr/lib64/python2.4/site-packages:\${PYTHONPATH} setenv PATH \${HOMESOFTS}/scipy-${scipy}-1/usr/bin:\${PATH} EOF cat < ${HOMESOFTS}/python-ase-${ase}.cshrc setenv PYTHONPATH \${HOMESOFTS}/python-ase-${ase}:\${PYTHONPATH} setenv PATH \${HOMESOFTS}/python-ase-${ase}/tools:\${PATH} EOF cat < ${HOMESOFTS}/gpaw-setups-${setups}.cshrc setenv GPAW_SETUP_PATH \${HOMESOFTS}/gpaw-setups-${setups} EOF cat < ${HOMESOFTS}/gpaw-${gpaw}.cshrc setenv GPAW_PLATFORM `python -c "from distutils import util, sysconfig; print util.get_platform()+'-'+sysconfig.get_python_version()"` setenv GPAW_HOME \${HOMESOFTS}/gpaw-${gpaw} setenv PYTHONPATH \${GPAW_HOME}:\${PYTHONPATH} setenv PYTHONPATH \${GPAW_HOME}/build/lib.${GPAW_PLATFORM}:\${PYTHONPATH} setenv PATH \${GPAW_HOME}/build/bin.${GPAW_PLATFORM}:\${PATH} setenv PATH \${GPAW_HOME}/tools:\${PATH} EOF cat < ${HOMESOFTS}/campos.cshrc #!/bin/tcsh # source ${HOMESOFTS}/acml-${acml}-1.cshrc # if ! ($?PYTHONPATH) then setenv PYTHONPATH "" endif # setenv HOMESOFTS \${HOME}/softs # source \${HOMESOFTS}/nose-${nose}-1.cshrc source \${HOMESOFTS}/numpy-${numpy}-1.cshrc source \${HOMESOFTS}/scipy-${scipy}-1.cshrc # source \${HOMESOFTS}/python-ase-${ase}.cshrc # source \${HOMESOFTS}/gpaw-setups-${setups}.cshrc source \${HOMESOFTS}/gpaw-${gpaw}.cshrc EOF Make sure that you have the right compiler:: % which gfortran # gfortran 4.1.2 /usr/bin/gfortran Build nose/numpy/scipy:: wget --no-check-certificate https://downloads.sourceforge.net/project/numpy/NumPy/${numpy}/numpy-${numpy}.tar.gz wget --no-check-certificate https://downloads.sourceforge.net/project/scipy/scipy/${scipy}/scipy-${scipy}.tar.gz wget http://python-nose.googlecode.com/files/nose-${nose}.tar.gz tar zxf nose-${nose}.tar.gz tar zxf numpy-${numpy}.tar.gz tar zxf scipy-${scipy}.tar.gz cd nose-${nose} python setup.py install --root=${HOMESOFTS}/nose-${nose}-1 >& install.log cd .. continue with:: cd numpy-${numpy} # Warning: numpy with gfortran44 fails under gpaw-python # ImportError: numpy.core.multiarray failed to import # Use /usr/bin/gfortran and numpy's internal blas/lapack sed -i "s/_lib_names = \['blas'\]/_lib_names = ['']/g" numpy/distutils/system_info.py sed -i "s/_lib_names = \['lapack'\]/_lib_names = ['']/g" numpy/distutils/system_info.py # avoid "Both g77 and gfortran runtimes linked in lapack_lite !" setting --fcompiler=gnu95 # note that this forces /usr/bin/gfortran to be used python setup.py build --fcompiler=gnu95 >& build.log python setup.py install --root=${HOMESOFTS}/numpy-${numpy}-1 >& install.log cd .. source ${HOMESOFTS}/campos.cshrc python -c "import numpy; numpy.test()" # scipy cannot be installed due to missing suitesparse-devel and suitesparse #cd scipy-${scipy} #python setup.py config_fc --fcompiler=gfortran install --root=${HOMESOFTS}/scipy-${scipy}-1 2>&1 | tee install.log #cd .. #python -c "import scipy; scipy.test()" Install ASE/GPAW:: wget https://wiki.fysik.dtu.dk/ase-files/python-ase-${ase}.tar.gz wget https://wiki.fysik.dtu.dk/gpaw-files/gpaw-${gpaw}.tar.gz wget http://wiki.fysik.dtu.dk/gpaw-files/gpaw-setups-${setups}.tar.gz tar zxf python-ase-${ase}.tar.gz tar zxf gpaw-${gpaw}.tar.gz tar zxf gpaw-setups-${setups}.tar.gz mkdir testase && cd testase && testase.py >& ../testase.log wget https://svn.fysik.dtu.dk/projects/gpaw/trunk/doc/platforms/Linux/customize_r410_psmn.py cd ../gpaw-${gpaw} python setup.py --remove-default-flags --customize=../customize_r410_psmn.py build_ext >& build_ext.log The :file:`customize_r410_psmn.py` looks like: .. literalinclude:: customize_r410_psmn.py GPAW tests :file:`gpaw-test` can be submitted like this:: qsub -cwd -pe mpi8 8 run.tcsh where :file:`run.tcsh` looks like this:: #!/bin/tcsh setenv HOSTFILE $TMPDIR/machines setenv MPIPREFIX /softs/openmpi-gnu source ${HOME}/softs/campos.cshrc setenv OMP_NUM_THREADS 1 ${MPIPREFIX}/bin/mpirun -v -prefix ${MPIPREFIX} -mca pls_rsh_agent "ssh" -mca btl openib,tcp,self -mca btl_tcp_if_include eth1,eth0 -np $NSLOTS `which gpaw-python` `which gpaw-test` Only some nodes have infiniband, on those without ignore the errors:: OpenIB on host r410lin???.ens-lyon.fr was unable to find any HCAs Please make sure that your jobs do not run multi-threaded, e.g. for a job running on ``dl165lin7`` do from a login node:: ssh dl165lin7 ps -fL you should see **1** in the **NLWP** column. Numbers higher then **1** mean multi-threaded job. It's convenient to customize as described on the :ref:`parallel_runs` page. gpaw-24.1.0/doc/platforms/Linux/ranger_EL4.rst000066400000000000000000000067041454550013000210670ustar00rootroot00000000000000.. _ranger: ====================== ranger.tacc.utexas.edu ====================== Here you find information about the system http://services.tacc.utexas.edu/index.php/ranger-user-guide. The installation of user's packages on ranger EL4, 64-bit described below uses `modules `_, and assumes *csh* shell: - packages are installed under ``~/CAMd``:: mkdir ~/CAMd cd ~/CAMd - module files are located under ``~/CAMd/modulefiles`` - download the :download:`customize_ranger_EL4.py` file:: wget https://svn.fysik.dtu.dk/projects/gpaw/trunk/doc/platforms/Linux/customize_ranger_EL4.py .. literalinclude:: customize_ranger_EL4.py - download packages with :download:`download_ranger.sh`, buy running ``sh download_ranger.sh``: .. literalinclude:: download_ranger.sh - start with the following modules loaded:: login3% module list Currently Loaded Modules: 1) TACC-paths 9) srb-client/3.4.1 17) globus/4.0.8 2) Linux 10) tg-policy/0.2 18) GLOBUS-4.0 3) cluster-paths 11) tgproxy/0.9.1 19) TERAGRID-DEV 4) pgi/7.2-5 12) tgresid/2.3.4 20) CTSSV4 5) mvapich/1.0.1 13) tgusage/3.0 21) gzip/1.3.12 6) binutils-amd/070220 14) uberftp/2.4 22) tar/1.22 7) TERAGRID-paths 15) tginfo/1.0.1 23) cluster 8) gx-map/0.5.3.3 16) TERAGRID-BASIC 24) TACC - unload/load the modules:: module switch pgi/7.2-5 gcc/4.4.5 module swap mvapich openmpi/1.3b module load python/2.5.2 module load mkl/10.0 - install packages, deploy modules and test with :download:`install_ranger_EL4.sh`, buy running ``sh install_ranger_EL4.sh``: .. literalinclude:: install_ranger_EL4.sh **Note** that every time you wish to install a new version of a package, and deploy new module file, better keep the old module file. - submit the test job:: qsub submit.sh using the following :file:`submit.sh`:: #!/bin/bash #$ -V # Inherit the submission environment #$ -cwd # Start job in submission directory ##$ -N myMPI # Job Name ##$ -j y # Combine stderr and stdout ##$ -o $JOB_NAME.o$JOB_ID # Name of the output file (eg. myMPI.oJobID) #$ -pe 16way 32 # Requests 16 tasks/node, 32 cores total #$ -q development # OR Queue name "normal" #$ -l h_rt=00:40:00 # Run time (hh:mm:ss) - 40 mins ##$ -M # Use email notification address ##$ -m be # Email at Begin and End of job #set -x # Echo commands, use "set echo" with csh module use --append /share/home/01067/tg803307/CAMd/modulefiles module load python/2.5.2 module load nose/0.11.3-1 module load numpy/1.5.0-1 module load campos-ase3 module load campos-gpaw-setups module unload pgi module load gcc/4.4.5 module unload mvapich module load openmpi/1.3b module load mkl/10.0 module load campos-gpaw # wget http://svn.fysik.dtu.dk/projects/gpaw/trunk/doc/devel/256H2O/b256H2O.py ibrun `which gpaw-python` b256H2O.py - to enable the installation permanently add the following to *~/.bashrc*:: module use --append /share/home/01067/tg803307/CAMd/modulefiles module load python/2.5.2 module load nose/0.11.3-1 module load numpy/1.5.0-1 module load campos-ase3 module load campos-gpaw-setups module unload pgi module load gcc/4.4.5 module unload mvapich module load openmpi/1.3b module load mkl/10.0 module load campos-gpaw gpaw-24.1.0/doc/platforms/Linux/sepeli.rst000066400000000000000000000030521454550013000204170ustar00rootroot00000000000000.. _sepeli: ============= sepeli.csc.fi ============= Here you find information about the system ``_. The installed subversion in sepeli does not support https-protocol, so one should use a tar file. Compile like this:: # use the following modules and define the right python interpreter sepeli ~/gpaw/trunk> use mvapich-gnu64 mvapich-gnu64 is now in use MVAPICH environment set MPIHOME=/opt/mvapich//gnu64/ sepeli ~/gpaw/trunk> use ASE Atomic Simulation Environment in use [ASE is now in use] sepeli ~/gpaw/trunk> alias python 'python-pathscale64' sepeli ~/gpaw/trunk> unsetenv CC; unsetenv CFLAGS; unsetenv LDFLAGS On runtime you need the following:: # make shure, that the right acml library is found sepeli> setenv LD_LIBRARY_PATH "/opt/acml/gnu64/lib:${LD_LIBRARY_PATH}" .. Note:: The compute nodes have different filesystem than the front end node. Especially, ``$HOME`` and ``$METAWRK`` are mounted only on the frontend, so one should place gpaw on ``$WRKDIR`` A sample job script with mvapich (Infiniband) MPI:: #$ -cwd #$ -pe mvapich-gnu64-4 8 #$ -S /bin/csh setenv PYTHONPATH /path_to_ase/:/path_to_gpaw/ setenv GPAW_SETUP_PATH /path_to_setups/ setenv PATH "$PATH":/path_to_gpaw-python/ mpirun -np 8 gpaw-python input.py In order to use a preinstalled version of gpaw one give the command ``use gpaw`` which sets all the correct environment variables (:envvar:`PYTHONPATH`, :envvar:`GPAW_SETUP_PATH`, ...) gpaw-24.1.0/doc/platforms/Linux/set_env_armageddon.sh000077500000000000000000000072071454550013000225750ustar00rootroot00000000000000#!/bin/sh export APPS="/home/firegam/CAMd" INSTALL_DACAPO=False # openmpi openmpi_version=1.4.3 PA=${APPS}/openmpi-${openmpi_version}-1/bin case $PATH in *${PA}*) ;; *?*) PATH=${PA}:${PATH} ;; *) PATH=${PA} ;; esac export PATH PY=${APPS}/openmpi-${openmpi_version}-1/lib case $LD_LIBRARY_PATH in *${PY}*) ;; *?*) LD_LIBRARY_PATH=${PY}:${LD_LIBRARY_PATH} ;; *) LD_LIBRARY_PATH=${PY} ;; esac export LD_LIBRARY_PATH # nose nose_version=0.11.3 PA=${APPS}/nose-${nose_version}-1/usr/local/bin case $PATH in *${PA}*) ;; *?*) PATH=${PA}:${PATH} ;; *) PATH=${PA} ;; esac export PATH PY=${APPS}/nose-${nose_version}-1/usr/local/lib/python2.6/dist-packages case $PYTHONPATH in *${PY}*) ;; *?*) PYTHONPATH=${PY}:${PYTHONPATH} ;; *) PYTHONPATH=${PY} ;; esac export PYTHONPATH # numpy numpy_version=1.5.0 PA=${APPS}/numpy-${numpy_version}-1/usr/local/bin case $PATH in *${PA}*) ;; *?*) PATH=${PA}:${PATH} ;; *) PATH=${PA} ;; esac export PATH PY=${APPS}/numpy-${numpy_version}-1/usr/local/lib/python2.6/dist-packages case $PYTHONPATH in *${PY}*) ;; *?*) PYTHONPATH=${PY}:${PYTHONPATH} ;; *) PYTHONPATH=${PY} ;; esac export PYTHONPATH # campos-ase3 ase_version=3.4.1.1765 PA=${APPS}/python-ase-${ase_version}/tools case $PATH in *${PA}*) ;; *?*) PATH=${PA}:${PATH} ;; *) PATH=${PA} ;; esac export PATH PY=${APPS}/python-ase-${ase_version}/ case $PYTHONPATH in *${PY}*) ;; *?*) PYTHONPATH=${PY}:${PYTHONPATH} ;; *) PYTHONPATH=${PY} ;; esac export PYTHONPATH # campos-gpaw-setups gpaw_setups_version=0.6.6300 PA=${APPS}/gpaw-setups-${gpaw_setups_version} case $GPAW_SETUP_PATH in *${PA}*) ;; *?*) GPAW_SETUP_PATH=${PA}:${GPAW_SETUP_PATH} ;; *) GPAW_SETUP_PATH=${PA} ;; esac export GPAW_SETUP_PATH # campos-gpaw gpaw_version=0.7.2.6974 PA=${APPS}/gpaw-${gpaw_version}/tools case $PATH in *${PA}*) ;; *?*) PATH=${PA}:${PATH} ;; *) PATH=${PA} ;; esac export PATH PA=${APPS}/gpaw-${gpaw_version}/build/bin.linux-x86_64-2.6/ case $PATH in *${PA}*) ;; *?*) PATH=${PA}:${PATH} ;; *) PATH=${PA} ;; esac export PATH PY=${APPS}/gpaw-${gpaw_version}/ case $PYTHONPATH in *${PY}*) ;; *?*) PYTHONPATH=${PY}:${PYTHONPATH} ;; *) PYTHONPATH=${PY} ;; esac export PYTHONPATH PY=${APPS}/gpaw-${gpaw_version}/build/lib.linux-x86_64-2.6/ case $PYTHONPATH in *${PY}*) ;; *?*) PYTHONPATH=${PY}:${PYTHONPATH} ;; *) PYTHONPATH=${PY} ;; esac export PYTHONPATH if [ "${INSTALL_DACAPO}" = "True" ]; then # dacapo-pseudopotentials dacapo_psp_version=1 PA=${APPS}/psp-${dacapo_psp_version} case $DACAPOPATH in *${PA}*) ;; *?*) DACAPOPATH=${PA}:${DACAPOPATH} ;; *) DACAPOPATH=${PA} ;; esac export DACAPOPATH # dacapo dacapo_version=2.7.16 PA=${APPS}/campos-dacapo-${dacapo_version}/src/gfortran_fnosecond_underscore_serial/dacapo_${dacapo_version}-1_serial.run case $DACAPOEXE_SERIAL in *${PA}*) ;; *?*) DACAPOEXE_SERIAL=${PA}:${DACAPOEXE_SERIAL} ;; *) DACAPOEXE_SERIAL=${PA} ;; esac export DACAPOEXE_SERIAL PA=${APPS}/campos-dacapo-${dacapo_version}/src/gfortran_fnosecond_underscore_mpi/dacapo_${dacapo_version}-1_mpi.run case $DACAPOEXE_PARALLEL in *${PA}*) ;; *?*) DACAPOEXE_PARALLEL=${PA}:${DACAPOEXE_PARALLEL} ;; *) DACAPOEXE_PARALLEL=${PA} ;; esac export DACAPOEXE_PARALLEL PA=${APPS}/campos-dacapo-${dacapo_version}/ case $PATH in *${PA}*) ;; *?*) PATH=${PA}:${PATH} ;; *) PATH=${PA} ;; esac export PATH fi gpaw-24.1.0/doc/platforms/Linux/siteconfig.py000066400000000000000000000001261454550013000211070ustar00rootroot00000000000000fftw = True scalapack = True libraries = ['xc', 'blas', 'fftw3', 'scalapack-openmpi'] gpaw-24.1.0/doc/platforms/Linux/sophia-venv.sh000077500000000000000000000051721454550013000212070ustar00rootroot00000000000000#!/usr/bin/bash set -e # stop if there are errors NAME=$1 FOLDER=$PWD echo ' export EASYBUILD_PREFIX=/groups/physics/modules module use $EASYBUILD_PREFIX/modules/all module purge unset PYTHONPATH module load GPAW/21.6.0-foss-2020b-libxc-5.1.5-ASE-3.22.0 ' > modules.sh . modules.sh python3 -m venv $NAME cd $NAME VENV=$PWD . bin/activate PIP="python3 -m pip" $PIP install --upgrade pip -qq mv bin/activate old mv ../modules.sh bin/activate cat old >> bin/activate rm old $PIP install pytz git clone https://gitlab.com/ase/ase.git $PIP install -U ase/ git clone https://gitlab.com/asr-dev/asr.git cd asr git checkout old-master #git checkout om_spinspiral cd .. $PIP install -e asr git clone https://gitlab.com/gpaw/gpaw.git cd gpaw #git checkout old-spinspiral cd .. echo " from os import environ from pathlib import Path scalapack = True fftw = True libraries = ['openblas', 'fftw3', 'readline', 'gfortran', 'scalapack', 'xc', 'vdwxc'] libxc = Path(environ['EBROOTLIBXC']) include_dirs.append(libxc / 'include') libvdwxc = Path(environ['EBROOTLIBVDWXC']) include_dirs.append(libvdwxc / 'include') library_dirs = environ['LD_LIBRARY_PATH'].split(':') " > gpaw/siteconfig.py pip install -e gpaw $PIP install myqueue graphviz qeh ase-ext gpaw install-data --basis --version=20000 . --no-register export GPAW_SETUP_PATH=$GPAW_SETUP_PATH:$VENV/gpaw-basis-pvalence-0.9.20000 echo "export GPAW_SETUP_PATH=$GPAW_SETUP_PATH" >> bin/activate ase completion >> bin/activate gpaw completion >> bin/activate mq completion >> bin/activate $PIP completion --bash >> bin/activate echo ' if [[ $SLURM_SUBMIT_DIR ]]; then export MPLBACKEND=Agg else export MPLBACKEND=TkAgg fi' >> bin/activate pip install flask==2.1.0 # Fix to remove the GPAW and ASE module installed in the "GPAW/21.6.0-foss-2020b" toolchain from path echo ' export PYTHONPATH=/groups/physics/modules/software/spglib-python/1.16.0-foss-2020b/lib/python3.8/site-packages:/groups/physics/modules/software/matplotlib/3.3.3-foss-2020b/lib/python3.8/site-packages:/g\ roups/physics/modules/software/Pillow/8.0.1-GCCcore-10.2.0/lib/python3.8/site-packages:/groups/physics/modules/software/Tkinter/3.8.6-GCCcore-10.2.0/lib/python3.8:/groups/physics/modules/software/Tkinte\ r/3.8.6-GCCcore-10.2.0/easybuild/python:/groups/physics/modules/software/SciPy-bundle/2020.11-foss-2020b/lib/python3.8/site-packages:/groups/physics/modules/software/pybind11/2.6.0-GCCcore-10.2.0/lib/py\ thon3.8/site-packages:/groups/physics/modules/software/Python/3.8.6-GCCcore-10.2.0/easybuild/python ' >> bin/activate mq --version #ase info #gpaw test source bin/activate pip uninstall ase pip install ase/ ase info gpaw test gpaw-24.1.0/doc/platforms/Linux/sophia.rst000066400000000000000000000036441454550013000204300ustar00rootroot00000000000000========================== Sophia cluster at HPC Risø ========================== This document explains how to compile a developer version of GPAW on Sophia. .. seealso:: * :mod:`Creation of Python virtual environments `. * Information about the Sophia cluster can be found at ``_. Please note that the Sophia web-page is only accessible from the DTU network. * `MyQueue `__. .. highlight:: bash Creating the venv ================= Download the :download:`sophia-venv.sh` script and run it like this:: $ ./sophia-venv.sh ... After a few minutes, you will have a ```` folder with a GPAW installation inside. Using the venv ============== In the following, we will assume that your venv folder is ``~/venv1/``. The venv needs to be activated like this:: $ source ~/venv1/bin/activate and you can deactivate it when you no longer need to use it:: $ deactivate You will want the activation to happen automatically for the jobs you submit to Sophia. Here are three ways to do it: 1) If you always want to use one venv then just put the activation command in your ``~/.bashrc``. 2) If you only want jobs running inside a certain folder to use the venv, then add this to your ``~/.bashrc``:: if [[ $SLURM_SUBMIT_DIR/ = $HOME/project-1* ]]; then source ~/venv1/bin/activate fi Now, SLURM-jobs submitted inside your ``~/project-1/`` folder will use the venv. 3) Use the "automatic discovery of venv's" feature of MyQueue:: $ cd ~/project-1 $ ln -s ~/venv1 venv $ mq submit job.py MyQueue will look for ``venv/`` folders (or soft-links as in the example) in one of the parent folders and activate the venv automatically when your job starts running. Full script =========== .. literalinclude:: sophia-venv.sh gpaw-24.1.0/doc/platforms/Linux/sun_chpc.rst000066400000000000000000000021671454550013000207460ustar00rootroot00000000000000.. _sun_chpc: ============== sun.chpc.ac.za ============== Here you find information about the system http://www.chpc.ac.za/sun/. The installation of user's packages on SUSE 10 **login01**, 64-bit described below uses `modules `_, and assumes ``sh`` shell: - packages are installed under ``~/CAMd``:: mkdir ~/CAMd cd ~/CAMd - module files are located under ``~/CAMd/modulefiles``:: mkdir ~/CAMd/modulefiles - download the :download:`customize_sun_chpc_SUSE10.py` file: .. literalinclude:: customize_sun_chpc_SUSE10.py - download packages with :download:`download_sun_chpc.sh`: .. literalinclude:: download_sun_chpc.sh - install packages, deploy modules and test with :download:`install_sun_chpc_SUSE10.sh`: .. literalinclude:: install_sun_chpc_SUSE10.sh **Note** that every time you wish to install a new version of a package, and deploy new module file, better keep the old module file. - submit the test job (jobs must be submitted from under *~/scratch*):: mqsub msub_sun_chpc.sh using the following :file:`msub_sun_chpc.sh`: .. literalinclude:: msub_sun_chpc.sh gpaw-24.1.0/doc/platforms/Linux/supernova.rst000066400000000000000000000071011454550013000211570ustar00rootroot00000000000000.. _supernova: ========= supernova ========= The supernova machine is a cluster of dual-core AMD Athlon 64 X2 5000+ CPUs, 2.6 GHz processors with 1 GB of memory per core. Instructions assume **tcsh**, installation under *${HOME}/opt*. Build the unoptimized numpy/scipy:: mkdir ${HOME}/opt cd ${HOME}/opt mkdir -p ${HOME}/opt/python/lib/python2.4/site-packages setenv PYTHONPATH ${HOME}/opt/python/lib/python2.4/site-packages wget http://dfn.dl.sourceforge.net/sourceforge/numpy/numpy-1.3.0.tar.gz wget http://dfn.dl.sourceforge.net/sourceforge/scipy/scipy-0.7.0.tar.gz wget http://python-nose.googlecode.com/files/nose-0.11.0.tar.gz tar zxf nose-0.11.0.tar.gz tar zxf numpy-1.3.0.tar.gz tar zxf scipy-0.7.0.tar.gz cd nose-0.11.0 python setup.py install --prefix=${HOME}/opt/python | tee install.log cd ../numpy-1.3.0 python setup.py install --prefix=${HOME}/opt/python | tee install.log cd .. python -c "import numpy; numpy.test()" wget http://www.netlib.org/blas/blas.tgz tar zxf blas.tgz export BLAS_SRC=${HOME}/opt/BLAS wget http://www.netlib.org/lapack/lapack.tgz tar zxf lapack.tgz export LAPACK_SRC=${HOME}/opt/lapack-3.2.1 cd scipy-0.7.0 python setup.py config_fc --fcompiler=gfortran install --prefix=${HOME}/opt/python | tee install.log cd .. python -c "import scipy; scipy.test()" Make sure that you have the right mpicc:: which mpicc /usr/local/ompi-1.2.5-pgi/bin/mpicc and build GPAW (``python setup.py build_ext | tee build_ext.log``) with this :file:`customize.py` file (**Note**: instructions valid from the **5232** release):: scalapack = True compiler = 'gcc' extra_compile_args += [ '-O3', '-funroll-all-loops', '-fPIC', ] libraries= [] mkl_lib_path = '/usr/local/intel/mkl/10.0.011/lib/em64t/' library_dirs = [mkl_lib_path] extra_link_args = [ mkl_lib_path+'libmkl_intel_lp64.a', mkl_lib_path+'libmkl_sequential.a', mkl_lib_path+'libmkl_core.a', mkl_lib_path+'libmkl_blacs_openmpi_lp64.a', mkl_lib_path+'libmkl_scalapack.a', mkl_lib_path+'libmkl_blacs_openmpi_lp64.a', mkl_lib_path+'libmkl_intel_lp64.a', mkl_lib_path+'libmkl_sequential.a', mkl_lib_path+'libmkl_core.a', mkl_lib_path+'libmkl_intel_lp64.a', mkl_lib_path+'libmkl_sequential.a', mkl_lib_path+'libmkl_core.a', ] define_macros += [('GPAW_NO_UNDERSCORE_CBLACS', '1')] define_macros += [('GPAW_NO_UNDERSCORE_CSCALAPACK', '1')] **Note**: is case of problems similar to those found on :ref:`akka` static linking is required. A gpaw script :file:`test.py` can be submitted like this:: qsub -l nodes=1:ppn=8 -l walltime=00:30:00 -m abe run.sh where :file:`run.sh` looks like this:: #!/bin/sh #PBS -m ae #PBS -M email@email.com #PBS -q long #PBS -r n #PBS -l nodes=1:ppn=2 cd $PBS_O_WORKDIR echo Running on host `hostname` in directory `pwd` NPROCS=`wc -l < $PBS_NODEFILE` echo This jobs runs on the following $NPROCS processors: cat $PBS_NODEFILE export PYTHONPATH=~/opt/gpaw-0.7.5232:~/opt/python-ase-3.1.0.846:${PYTHONPATH} export PYTHONPATH=~/opt/python/lib/python2.4/site-packages:${PYTHONPATH} export PATH=~/opt/gpaw-0.7.5232/build/bin.linux-x86_64-2.4:${PATH} export GPAW_SETUP_PATH=~/opt/gpaw-setups-0.5.3574 export OMP_NUM_THREADS=1 mpiexec gpaw-python test.py Please make sure that your jobs do not run multi-threaded, e.g. for a job running on ``star237`` do from a login node:: ssh star237 ps -fL you should see **1** in the **NLWP** column. Numbers higher then **1** mean multi-threaded job. It's convenient to customize as described on the :ref:`parallel_runs` page. gpaw-24.1.0/doc/platforms/Linux/ubuntu.rst000066400000000000000000000012471454550013000204640ustar00rootroot00000000000000============= Ubuntu 18.04+ ============= .. highlight:: bash Install these Ubuntu_ packages:: $ sudo apt install python3-dev libopenblas-dev libxc-dev libscalapack-mpi-dev libfftw3-dev Use this :download:`siteconfig.py` file: .. literalinclude:: siteconfig.py Put the file in your ``~/.gpaw/`` folder. See also :ref:`siteconfig`. Then install GPAW (and dependencies: ASE_, Numpy, SciPy): * Latest stable version from PyPI:: $ pip install gpaw * Development version: Clone the source code and install it:: $ git clone https://gitlab.com/gpaw/gpaw.git $ pip install gpaw/ .. _Ubuntu: http://www.ubuntu.com/ .. _ASE: https://wiki.fysik.dtu.dk/ase/ gpaw-24.1.0/doc/platforms/Linux/uranus.rst000066400000000000000000000103231454550013000204520ustar00rootroot00000000000000.. _uranus: ====== uranus ====== The uranus machine is a cluster of dual socket, quad-core AMD Opteron 2354 CPUs, 2.2 GHz processors with 2 GB of memory per core. Instructions assume **bash**, installation under *${HOME}/opt*. Build the unoptimized numpy/scipy:: mkdir ${HOME}/opt cd ${HOME}/opt wget http://dfn.dl.sourceforge.net/sourceforge/numpy/numpy-1.3.0.tar.gz wget http://dfn.dl.sourceforge.net/sourceforge/scipy/scipy-0.7.0.tar.gz wget http://python-nose.googlecode.com/files/nose-0.11.0.tar.gz tar zxf nose-0.11.0.tar.gz tar zxf numpy-1.3.0.tar.gz tar zxf scipy-0.7.0.tar.gz cd nose-0.11.0 python setup.py install --prefix=${HOME}/opt/python 2>&1 | tee install.log cd ../numpy-1.3.0 python setup.py install --prefix=${HOME}/opt/python 2>&1 | tee install.log export PYTHONPATH=${HOME}/opt/python/lib64/python2.4/site-packages:${HOME}/opt/python/lib/python2.4/site-packages cd .. python -c "import numpy; numpy.test()" wget http://www.netlib.org/blas/blas.tgz tar zxf blas.tgz export BLAS_SRC=${HOME}/opt/BLAS wget http://www.netlib.org/lapack/lapack.tgz tar zxf lapack.tgz export LAPACK_SRC=${HOME}/opt/lapack-3.2.1 cd scipy-0.7.0 python setup.py config_fc --fcompiler=gfortran install --prefix=${HOME}/opt/python 2>&1 | tee install.log cd .. python -c "import scipy; scipy.test()" Set these environment variables in the :file:`.bashrc` file:: if [ -z "${PYTHONPATH}" ] then export PYTHONPATH="" fi export PYTHONPATH=${HOME}/opt/python/lib64/python2.4/site-packages:${HOME}/opt/python/lib/python2.4/site-packages:${PYTHONPATH} export OMPI=/usr/local/ompi-ifort export OPAL_PREFIX=${OMPI} export OMP_NUM_THREADS=1 if [ -z "${PATH}" ] then export PATH=${OMPI}/bin else export PATH=${OMPI}/bin:${PATH} fi if [ -z "${LD_LIBRARY_PATH}" ] then export LD_LIBRARY_PATH=${OMPI}/lib else export LD_LIBRARY_PATH=${OMPI}/lib:${LD_LIBRARY_PATH} fi Make sure that you have the right mpicc:: which mpicc /usr/local/ompi-ifort/bin/mpicc and build GPAW (``python setup.py build_ext 2>&1 | tee build_ext.log``) with this :file:`customize.py` file (**Note**: instructions valid from the **5232** release):: scalapack = True compiler = 'gcc' extra_compile_args += [ '-O3', '-funroll-all-loops', '-fPIC', ] libraries= [] mkl_lib_path = '/opt/intel/mkl/10.0.4.023/lib/em64t/' extra_link_args = [ mkl_lib_path+'libmkl_intel_lp64.a', mkl_lib_path+'libmkl_sequential.a', mkl_lib_path+'libmkl_core.a', mkl_lib_path+'libmkl_blacs_openmpi_lp64.a', mkl_lib_path+'libmkl_scalapack.a', mkl_lib_path+'libmkl_blacs_openmpi_lp64.a', mkl_lib_path+'libmkl_intel_lp64.a', mkl_lib_path+'libmkl_sequential.a', mkl_lib_path+'libmkl_core.a', mkl_lib_path+'libmkl_intel_lp64.a', mkl_lib_path+'libmkl_sequential.a', mkl_lib_path+'libmkl_core.a', ] define_macros += [('GPAW_NO_UNDERSCORE_CBLACS', '1')] define_macros += [('GPAW_NO_UNDERSCORE_CSCALAPACK', '1')] **Note**: is case of problems similar to those found on :ref:`akka` static linking is required. A gpaw script :file:`test.py` can be submitted like this:: qsub -l nodes=1:ppn=8 -l walltime=00:30:00 -m abe run.sh where :file:`run.sh` looks like this:: #!/bin/sh #PBS -m ae #PBS -M email@email.com #PBS -q long #PBS -r n #PBS -l nodes=1:ppn=8 cd $PBS_O_WORKDIR echo Running on host `hostname` in directory `pwd` NPROCS=`wc -l < $PBS_NODEFILE` echo This jobs runs on the following $NPROCS processors: cat $PBS_NODEFILE export PYTHONPATH=${HOME}/opt/gpaw-0.7.5232:${HOME}/opt/python-ase-3.1.0.846:${PYTHONPATH} export PYTHONPATH=${HOME}/opt/python/lib64/python2.4/site-packages:${HOME}/opt/python/lib/python2.4/site-packages:${PYTHONPATH} export PATH=${HOME}/opt/gpaw-0.7.5232/build/bin.linux-x86_64-2.4:${PATH} export GPAW_SETUP_PATH=${HOME}/opt/gpaw-setups-0.5.3574 export OMP_NUM_THREADS=1 mpiexec gpaw-python test.py Please make sure that your jobs do not run multi-threaded, e.g. for a job running on ``node02`` do from a login node:: ssh node02 ps -fL you should see **1** in the **NLWP** column. Numbers higher then **1** mean multi-threaded job. It's convenient to customize as described on the :ref:`parallel_runs` page. gpaw-24.1.0/doc/platforms/Linux/vsc.univie.rst000066400000000000000000000207721454550013000212370ustar00rootroot00000000000000.. _vsc.univie: ========== vsc.univie ========== The vsc.univie machine is a cluster of dual socket, hexa-core Intel Xeon X5650 2.67 GHz processors with 2 GB of memory per core. Instructions assume **bash**, installation under ``${HOME}/opt``. Setup the root directory:: mkdir -p ${HOME}/opt cd ${HOME}/opt Set the versions:: export nose=0.11.3 # Warning: version 1.6.0 seems inconsistent about C-, Fortran-contiguous # http://mail.scipy.org/pipermail/numpy-discussion/2011-July/057557.html export numpy=1.5.1 export scipy=0.9.0 export acml=4.0.1 export ase=3.5.1.2175 export gpaw=0.8.0.8092 export setups=0.8.7929 and create sh startup script:: cat < ${HOME}/opt/campos.sh #!/bin/sh # export GPAW_PLATFORM=`python -c "from distutils import util, sysconfig; print(util.get_platform()+'-'+sysconfig.get_python_version())"` # export LD_LIBRARY_PATH=\${HOME}/opt/acml-${acml}/gfortran64/lib:\${LD_LIBRARY_PATH} export LD_LIBRARY_PATH=\${HOME}/opt/CBLAS.acml-${acml}/lib:\${LD_LIBRARY_PATH} # if cblas used # export PYTHONPATH=\${HOME}/opt/nose-${nose}-1/usr/lib/python2.4/site-packages:\${PYTHONPATH} export PATH=\${HOME}/opt/nose-${nose}-1/usr/bin:\${PATH} # export PYTHONPATH=\${HOME}/opt/numpy-${numpy}-1/usr/lib64/python2.4/site-packages:\${PYTHONPATH} export PATH=\${HOME}/opt/numpy-${numpy}-1/usr/bin:\${PATH} # export PYTHONPATH=\${HOME}/opt/scipy-${scipy}-1/usr/lib64/python2.4/site-packages:\${PYTHONPATH} export PATH=\${HOME}/opt/scipy-${scipy}-1/usr/bin:\${PATH} # export PYTHONPATH=\${HOME}/opt/python-ase-${ase}:\${PYTHONPATH} export PATH=\${HOME}/opt/python-ase-${ase}/tools:\${PATH} # export GPAW_SETUP_PATH=\${HOME}/opt/gpaw-setups-${setups} # export GPAW_HOME=\${HOME}/opt/gpaw-${gpaw} export PYTHONPATH=\${GPAW_HOME}:\${PYTHONPATH} export PYTHONPATH=\${GPAW_HOME}/build/lib.${GPAW_PLATFORM}:\${PYTHONPATH} export PATH=\${GPAW_HOME}/build/bin.${GPAW_PLATFORM}:\${PATH} export PATH=\${GPAW_HOME}/tools:\${PATH} EOF Download and install acml:: acml-${acml} # download cd acml-${acml} tar zxf acml-*.tgz && tar zxf contents-acml-*.tgz **Note**: numpy with acml dotblas Segmentation Faults (well, for some versions on numpy, etc?) for :file:`gpaw/test/numpy_core_multiarray_dot.py` or :file:`gpaw/test/gemm.py`. Still there is no performance improvement for :file:`gpaw/test/gemm.py` (if it works), even if case of dynamic linking of cblas/acml - check with ldd that _dotblas.so is linked to both acml and cblas. This is how you can download and install cblas:: wget http://www.netlib.org/blas/blast-forum/cblas.tgz tar zxf cblas.tar.gz && mv -f CBLAS CBLAS.acml-${acml} && cd CBLAS.acml-${acml} cp -p Makefile.LINUX Makefile.in # fix Makefile.in export PLAT=LINUX export BLLIB=${HOME}/opt/acml-${acml}/gfortran64/lib/libacml.a export CC=gcc export FC=gfortran export CFLAGS='-O3 -funroll-all-loops -DADD_ -fPIC' export FFLAGS='-O3 -funroll-all-loops -DADD_ -fPIC' # sed -i "s<^PLAT =.*&1 | tee make_all.log make shared 2>&1 | tee make_shared.log # create link: numpy needs all the libraries in one directory # separate directories in site.cfg do not work cd lib && ln -s cblas_${PLAT}.a libcblas.a ln -s ${HOME}/opt/acml-${acml}/gfortran64/lib/libacml.a . ln -s ${HOME}/opt/acml-${acml}/gfortran64/lib/libacml_mv.a . # if dynamic library needed ln -s ${HOME}/opt/acml-${acml}/gfortran64/lib/libacml.so . ln -s ${HOME}/opt/acml-${acml}/gfortran64/lib/libacml_mv.so . cd ../.. Build nose/numpy/scipy:: wget --no-check-certificate https://downloads.sourceforge.net/project/numpy/NumPy/${numpy}/numpy-${numpy}.tar.gz wget --no-check-certificate https://downloads.sourceforge.net/project/scipy/scipy/${scipy}/scipy-${scipy}.tar.gz wget http://python-nose.googlecode.com/files/nose-${nose}.tar.gz tar zxf nose-${nose}.tar.gz tar zxf numpy-${numpy}.tar.gz tar zxf scipy-${scipy}.tar.gz cd nose-${nose} python setup.py install --root=${HOME}/opt/nose-${nose}-1 2>&1 | tee install.log use the following ``site.cfg`` to build numpy without cblas (that's safer):: cat < ${HOME}/opt/numpy-${numpy}/site.cfg [DEFAULT] library_dirs = /usr/lib64:${HOME}/opt/acml-${acml}/gfortran64/lib include_dirs = ${HOME}/opt/acml-${acml}/gfortran64/lib/../include:/usr/include/suitesparse [blas] libraries = acml library_dirs = ${HOME}/opt/acml-${acml}/gfortran64/lib [lapack] libraries = acml, gfortran library_dirs = ${HOME}/opt/acml-${acml}/gfortran64/lib EOF and this one with cblas based on acml:: cat < ${HOME}/opt/numpy-${numpy}/site.cfg [DEFAULT] library_dirs = /usr/lib64:${HOME}/opt/CBLAS.acml-${acml}/lib include_dirs = ${HOME}/opt/acml-${acml}/gfortran64/lib/../include:/usr/include/suitesparse:${HOME}/opt/CBLAS.acml-${acml}/include [blas] libraries = acml library_dirs = ${HOME}/opt/CBLAS.acml-${acml}/lib [lapack] libraries = acml, gfortran library_dirs = ${HOME}/opt/CBLAS.acml-${acml}/lib EOF **Note**: the ``site.cfg`` file is used only to specify directories, libraries from ``site.cfg`` are ignored by numpy. Moreover numpy needs all the libraries in one directory, separate directories in site.cfg do not work. continue with:: cd ../numpy-${numpy} # force numpy to use internal blas for dotblas + acml, note the double quotes! sed -i "s/_lib_names = \['blas'\]/_lib_names = ['acml']/g" numpy/distutils/system_info.py sed -i "s/_lib_names = \['lapack'\]/_lib_names = ['acml']/g" numpy/distutils/system_info.py # or with dotblas acml (through cblas) - seems not working or Segmentation Faults sed -i "s<_lib_mkl = .*<_lib_mkl = ['acml','cblas']<" numpy/distutils/system_info.py sed -i "s<\['mkl_lapack32','mkl_lapack64'\]<['acml','gfortran']<" numpy/distutils/system_info.py sed -i "s&1 | tee build.log python setup.py install --root=${HOME}/opt/numpy-${numpy}-1 2>&1 | tee install.log cd .. source ${HOME}/opt/campos.sh python -c "import numpy; numpy.test()" cd scipy-${scipy} python setup.py config_fc --fcompiler=gfortran install --root=${HOME}/opt/scipy-${scipy}-1 2>&1 | tee install.log cd .. python -c "import scipy; scipy.test()" Make sure that you have the right mpicc:: which mpicc /usr/mpi/qlogic/bin/mpicc Install ASE/GPAW:: wget https://wiki.fysik.dtu.dk/ase-files/python-ase-${ase}.tar.gz wget https://wiki.fysik.dtu.dk/gpaw-files/gpaw-${gpaw}.tar.gz wget http://wiki.fysik.dtu.dk/gpaw-files/gpaw-setups-${setups}.tar.gz tar zxf python-ase-${ase}.tar.gz tar zxf gpaw-${gpaw}.tar.gz tar zxf gpaw-setups-${setups}.tar.gz mkdir testase && cd testase && testase.py 2>&1 | tee ../testase.log wget https://svn.fysik.dtu.dk/projects/gpaw/trunk/doc/platforms/Linux/customize_vsc_univie.py cd ../gpaw-${gpaw} python setup.py --remove-default-flags --customize=../customize_vsc_univie.py build_ext 2>&1 | tee build_ext.log The :file:`customize_vsc_univie.py` looks like: .. literalinclude:: customize_vsc_univie.py GPAW tests :file:`gpaw-test` can be submitted like this:: qsub run.sh where :file:`run.sh` looks like this:: #!/bin/sh #$ -pe mpich 8 #$ -V #$ -M my.name@example.at #$ -m be #$ -l h_rt=00:50:00 if [ -z "${PYTHONPATH}" ] then export PYTHONPATH="" fi source ${HOME}/opt/campos.sh export OMP_NUM_THREADS=1 mpirun -m $TMPDIR/machines -np $NSLOTS gpaw-python `which gpaw-test` Please make sure that your jobs do not run multi-threaded, e.g. for a job running on ``node02`` do from a login node:: ssh node02 ps -fL you should see **1** in the **NLWP** column. Numbers higher then **1** mean multi-threaded job. It's convenient to customize as described on the :ref:`parallel_runs` page. gpaw-24.1.0/doc/platforms/MacOSX/000077500000000000000000000000001454550013000163775ustar00rootroot00000000000000gpaw-24.1.0/doc/platforms/MacOSX/anaconda.rst000066400000000000000000000031331454550013000206750ustar00rootroot00000000000000.. _anaconda: ================= Anaconda on MacOS ================= We recommend using Python from :ref:`homebrew` on macOS, but if you need to use the Anaconda python that is also possible. Both ASE and GPAW work with Anaconda python. Install Anaconda ================ * Install the Python 3 version. * We strongly recommend installing Anaconda for a single user. The permission handling in Anaconda is broken on macOS, and a multi-user installation of Anaconda will break as soon as another user installs a package. Install Homebrew ================ You need it for some GPAW prerequisites! Get the Xcode Command Line Tools with the command:: $ xcode-select --install (if it fails, you may have to download Xcode from the App Store) Install Homebrew:: $ /usr/bin/ruby -e "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install)" $ echo 'export PATH=/usr/local/bin:$PATH' >> ~/.bash_profile Install ASE and GPAW dependencies ================================= :: $ brew install libxc $ brew install open-mpi $ brew install fftw Check your installation ======================= You should still get python and pip from the anaconda installation:: $ python --version $ pip --version Python should list an anaconda version, and pip should be loaded from ``/Users/xxxx/anaconda3/....`` Install and test ASE and GPAW ============================= Install and test ASE:: $ pip install --upgrade --user ase $ python -m ase test Install GPAW:: $ pip install --upgrade --user gpaw Install GPAW setups:: $ gpaw --verbose install-data gpaw-24.1.0/doc/platforms/MacOSX/homebrew.rst000066400000000000000000000024061454550013000207430ustar00rootroot00000000000000.. _homebrew: ======== Homebrew ======== .. highlight:: bash Get Xcode from the App Store and install it. You also need to install the command line tools, do this with the command:: $ xcode-select --install Install Homebrew:: $ /usr/bin/ruby -e "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install)" $ echo 'export PATH=/usr/local/bin:$PATH' >> ~/.bash_profile Install ASE and GPAW dependencies:: $ brew install python $ brew install gcc $ brew install libxc $ brew install open-mpi $ brew install fftw Install pip:: $ sudo easy_install pip Install required Python packages:: $ pip install numpy scipy matplotlib Install and test ASE:: $ pip install --upgrade --user ase $ python -m ase test Use this :ref:`siteconfig.py ` file: .. literalinclude:: siteconfig.py Install GPAW:: $ pip install --upgrade --user gpaw Install GPAW setups:: $ gpaw install-data .. note:: Alternative solution if the above command fails:: $ curl -O https://wiki.fysik.dtu.dk/gpaw-files/gpaw-setups-0.9.20000.tar.gz $ tar -xf gpaw-setups-0.9.20000.tar $ echo 'export GPAW_SETUP_PATH=~/gpaw-setups-0.9.20000' >> ~/.bash_profile Test GPAW:: $ gpaw test $ gpaw -P 4 test gpaw-24.1.0/doc/platforms/MacOSX/siteconfig.py000066400000000000000000000000501454550013000210760ustar00rootroot00000000000000fftw = True libraries = ['xc', 'fftw3'] gpaw-24.1.0/doc/platforms/Solaris/000077500000000000000000000000001454550013000167215ustar00rootroot00000000000000gpaw-24.1.0/doc/platforms/Solaris/corona.rst000066400000000000000000000004071454550013000207350ustar00rootroot00000000000000.. _corona: ============= corona.csc.fi ============= Here you find information about the system ``_. Submit jobs like this:: qsub -pe cre 8 -cwd -V -S /p/bin/python job.py gpaw-24.1.0/doc/platforms/Windows/000077500000000000000000000000001454550013000167375ustar00rootroot00000000000000gpaw-24.1.0/doc/platforms/Windows/Vagrantfile000066400000000000000000000160061454550013000211270ustar00rootroot00000000000000# -*- mode: ruby -*- # vi: set ft=ruby : # http://stackoverflow.com/questions/23926945/specify-headless-or-gui-from-command-line def gui_enabled? !ENV.fetch('GUI', '').empty? end Vagrant.configure(2) do |config| # windows config.vm.define "windows" do |windows| # the http://aka.ms/vagrant-win boxes hang for me ... windows.vm.box = "opentable/win-2012r2-standard-amd64-nocm" windows.vm.box_url = "opentable/win-2012r2-standard-amd64-nocm" windows.vm.provider 'virtualbox' do |v| v.gui = gui_enabled? end windows.vm.provider "virtualbox" do |v| v.memory = 512 # Windows is greedy v.cpus = 1 end windows.ssh.shell = "powershell" end # compilation of openblas takes ~1h $install_openblas = <