pax_global_header 0000666 0000000 0000000 00000000064 13164413722 0014515 g ustar 00root root 0000000 0000000 52 comment=82cebebc037510d876f90d9f8d533fd021f751f5
gpaw-1.3.0-82cebebc037510d876f90d9f8d533fd021f751f5/ 0000775 0000000 0000000 00000000000 13164413722 0020124 5 ustar 00root root 0000000 0000000 gpaw-1.3.0-82cebebc037510d876f90d9f8d533fd021f751f5/.flake8 0000664 0000000 0000000 00000000041 13164413722 0021272 0 ustar 00root root 0000000 0000000 [flake8]
ignore = E129,W293,W503
gpaw-1.3.0-82cebebc037510d876f90d9f8d533fd021f751f5/.gitignore 0000664 0000000 0000000 00000001115 13164413722 0022112 0 ustar 00root root 0000000 0000000 *.pyc
build/
/dist
MANIFEST
.DS_Store
*/.DS_Store
# If you build the documentation in the source tree, you will create a
# lot of files that we don't want to see with "git status":
*.png
*.gif
*.pov
*.jpg
*.odg
*.dat
*.ini
*.traj
*.pdf
*.csv
*.txt
*.log
*.xyz
*.db
*.pckl
*.gz
N.LDA
*.gpw
*.npy
theme.css
*.svg
*.json
doc/setups/[A-Z]*.rst
!doc/devel/gpaw-logo.svg
# If you run the tests, these may write out files
*.gpw
# Stuff from LaTeX:
*.aux
*.bbl
*.blg
*.toc
# Editor backup files:
*~
\#*
# Vim swap files:
.*.sw?
# Emacs lock files:
.\#*
# This one is OK:
!requirements.txt
gpaw-1.3.0-82cebebc037510d876f90d9f8d533fd021f751f5/.gitlab-ci.yml 0000664 0000000 0000000 00000001017 13164413722 0022557 0 ustar 00root root 0000000 0000000 master:
script:
- apt-get update -qy
- apt-get install -qy python-dev python-numpy python-scipy libopenblas-dev liblapack-dev libxc-dev python-pip > apt-get.output
- pip install flake8
- pip install git+https://gitlab.com/ase/ase.git@master > ase-install.output
- python setup.py install --user > gpaw-install.output
- export PATH=~/.local/bin:$PATH
- gpaw install-data --register gpaw-datasets
- gpaw info
- ase build H2 -V1 | gpaw run -p mode=pw
- python -m gpaw.test.pyflakes_check
gpaw-1.3.0-82cebebc037510d876f90d9f8d533fd021f751f5/CHANGELOG.rst 0000664 0000000 0000000 00000000154 13164413722 0022145 0 ustar 00root root 0000000 0000000 Changelog
=========
See what's new in GPAW here:
https://wiki.fysik.dtu.dk/gpaw/releasenotes.html
gpaw-1.3.0-82cebebc037510d876f90d9f8d533fd021f751f5/CONTRIBUTING.rst 0000664 0000000 0000000 00000000334 13164413722 0022565 0 ustar 00root root 0000000 0000000 Contributing
============
The source code for GPAW is handled the same way as the source code for the
ASE project. Read here about how to get started:
https://wiki.fysik.dtu.dk/ase/development/contribute.html
gpaw-1.3.0-82cebebc037510d876f90d9f8d533fd021f751f5/COPYING 0000664 0000000 0000000 00000104513 13164413722 0021163 0 ustar 00root root 0000000 0000000 GNU GENERAL PUBLIC LICENSE
Version 3, 29 June 2007
Copyright (C) 2007 Free Software Foundation, Inc.
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.
Preamble
The GNU General Public License is a free, copyleft license for
software and other kinds of works.
The licenses for most software and other practical works are designed
to take away your freedom to share and change the works. By contrast,
the GNU General Public License is intended to guarantee your freedom to
share and change all versions of a program--to make sure it remains free
software for all its users. We, the Free Software Foundation, use the
GNU General Public License for most of our software; it applies also to
any other work released this way by its authors. You can apply it to
your programs, too.
When we speak of free software, we are referring to freedom, not
price. Our General Public Licenses are designed to make sure that you
have the freedom to distribute copies of free software (and charge for
them if you wish), that you receive source code or can get it if you
want it, that you can change the software or use pieces of it in new
free programs, and that you know you can do these things.
To protect your rights, we need to prevent others from denying you
these rights or asking you to surrender the rights. Therefore, you have
certain responsibilities if you distribute copies of the software, or if
you modify it: responsibilities to respect the freedom of others.
For example, if you distribute copies of such a program, whether
gratis or for a fee, you must pass on to the recipients the same
freedoms that you received. You must make sure that they, too, receive
or can get the source code. And you must show them these terms so they
know their rights.
Developers that use the GNU GPL protect your rights with two steps:
(1) assert copyright on the software, and (2) offer you this License
giving you legal permission to copy, distribute and/or modify it.
For the developers' and authors' protection, the GPL clearly explains
that there is no warranty for this free software. For both users' and
authors' sake, the GPL requires that modified versions be marked as
changed, so that their problems will not be attributed erroneously to
authors of previous versions.
Some devices are designed to deny users access to install or run
modified versions of the software inside them, although the manufacturer
can do so. This is fundamentally incompatible with the aim of
protecting users' freedom to change the software. The systematic
pattern of such abuse occurs in the area of products for individuals to
use, which is precisely where it is most unacceptable. Therefore, we
have designed this version of the GPL to prohibit the practice for those
products. If such problems arise substantially in other domains, we
stand ready to extend this provision to those domains in future versions
of the GPL, as needed to protect the freedom of users.
Finally, every program is threatened constantly by software patents.
States should not allow patents to restrict development and use of
software on general-purpose computers, but in those that do, we wish to
avoid the special danger that patents applied to a free program could
make it effectively proprietary. To prevent this, the GPL assures that
patents cannot be used to render the program non-free.
The precise terms and conditions for copying, distribution and
modification follow.
TERMS AND CONDITIONS
0. Definitions.
"This License" refers to version 3 of the GNU General Public License.
"Copyright" also means copyright-like laws that apply to other kinds of
works, such as semiconductor masks.
"The Program" refers to any copyrightable work licensed under this
License. Each licensee is addressed as "you". "Licensees" and
"recipients" may be individuals or organizations.
To "modify" a work means to copy from or adapt all or part of the work
in a fashion requiring copyright permission, other than the making of an
exact copy. The resulting work is called a "modified version" of the
earlier work or a work "based on" the earlier work.
A "covered work" means either the unmodified Program or a work based
on the Program.
To "propagate" a work means to do anything with it that, without
permission, would make you directly or secondarily liable for
infringement under applicable copyright law, except executing it on a
computer or modifying a private copy. Propagation includes copying,
distribution (with or without modification), making available to the
public, and in some countries other activities as well.
To "convey" a work means any kind of propagation that enables other
parties to make or receive copies. Mere interaction with a user through
a computer network, with no transfer of a copy, is not conveying.
An interactive user interface displays "Appropriate Legal Notices"
to the extent that it includes a convenient and prominently visible
feature that (1) displays an appropriate copyright notice, and (2)
tells the user that there is no warranty for the work (except to the
extent that warranties are provided), that licensees may convey the
work under this License, and how to view a copy of this License. If
the interface presents a list of user commands or options, such as a
menu, a prominent item in the list meets this criterion.
1. Source Code.
The "source code" for a work means the preferred form of the work
for making modifications to it. "Object code" means any non-source
form of a work.
A "Standard Interface" means an interface that either is an official
standard defined by a recognized standards body, or, in the case of
interfaces specified for a particular programming language, one that
is widely used among developers working in that language.
The "System Libraries" of an executable work include anything, other
than the work as a whole, that (a) is included in the normal form of
packaging a Major Component, but which is not part of that Major
Component, and (b) serves only to enable use of the work with that
Major Component, or to implement a Standard Interface for which an
implementation is available to the public in source code form. A
"Major Component", in this context, means a major essential component
(kernel, window system, and so on) of the specific operating system
(if any) on which the executable work runs, or a compiler used to
produce the work, or an object code interpreter used to run it.
The "Corresponding Source" for a work in object code form means all
the source code needed to generate, install, and (for an executable
work) run the object code and to modify the work, including scripts to
control those activities. However, it does not include the work's
System Libraries, or general-purpose tools or generally available free
programs which are used unmodified in performing those activities but
which are not part of the work. For example, Corresponding Source
includes interface definition files associated with source files for
the work, and the source code for shared libraries and dynamically
linked subprograms that the work is specifically designed to require,
such as by intimate data communication or control flow between those
subprograms and other parts of the work.
The Corresponding Source need not include anything that users
can regenerate automatically from other parts of the Corresponding
Source.
The Corresponding Source for a work in source code form is that
same work.
2. Basic Permissions.
All rights granted under this License are granted for the term of
copyright on the Program, and are irrevocable provided the stated
conditions are met. This License explicitly affirms your unlimited
permission to run the unmodified Program. The output from running a
covered work is covered by this License only if the output, given its
content, constitutes a covered work. This License acknowledges your
rights of fair use or other equivalent, as provided by copyright law.
You may make, run and propagate covered works that you do not
convey, without conditions so long as your license otherwise remains
in force. You may convey covered works to others for the sole purpose
of having them make modifications exclusively for you, or provide you
with facilities for running those works, provided that you comply with
the terms of this License in conveying all material for which you do
not control copyright. Those thus making or running the covered works
for you must do so exclusively on your behalf, under your direction
and control, on terms that prohibit them from making any copies of
your copyrighted material outside their relationship with you.
Conveying under any other circumstances is permitted solely under
the conditions stated below. Sublicensing is not allowed; section 10
makes it unnecessary.
3. Protecting Users' Legal Rights From Anti-Circumvention Law.
No covered work shall be deemed part of an effective technological
measure under any applicable law fulfilling obligations under article
11 of the WIPO copyright treaty adopted on 20 December 1996, or
similar laws prohibiting or restricting circumvention of such
measures.
When you convey a covered work, you waive any legal power to forbid
circumvention of technological measures to the extent such circumvention
is effected by exercising rights under this License with respect to
the covered work, and you disclaim any intention to limit operation or
modification of the work as a means of enforcing, against the work's
users, your or third parties' legal rights to forbid circumvention of
technological measures.
4. Conveying Verbatim Copies.
You may convey verbatim copies of the Program's source code as you
receive it, in any medium, provided that you conspicuously and
appropriately publish on each copy an appropriate copyright notice;
keep intact all notices stating that this License and any
non-permissive terms added in accord with section 7 apply to the code;
keep intact all notices of the absence of any warranty; and give all
recipients a copy of this License along with the Program.
You may charge any price or no price for each copy that you convey,
and you may offer support or warranty protection for a fee.
5. Conveying Modified Source Versions.
You may convey a work based on the Program, or the modifications to
produce it from the Program, in the form of source code under the
terms of section 4, provided that you also meet all of these conditions:
a) The work must carry prominent notices stating that you modified
it, and giving a relevant date.
b) The work must carry prominent notices stating that it is
released under this License and any conditions added under section
7. This requirement modifies the requirement in section 4 to
"keep intact all notices".
c) You must license the entire work, as a whole, under this
License to anyone who comes into possession of a copy. This
License will therefore apply, along with any applicable section 7
additional terms, to the whole of the work, and all its parts,
regardless of how they are packaged. This License gives no
permission to license the work in any other way, but it does not
invalidate such permission if you have separately received it.
d) If the work has interactive user interfaces, each must display
Appropriate Legal Notices; however, if the Program has interactive
interfaces that do not display Appropriate Legal Notices, your
work need not make them do so.
A compilation of a covered work with other separate and independent
works, which are not by their nature extensions of the covered work,
and which are not combined with it such as to form a larger program,
in or on a volume of a storage or distribution medium, is called an
"aggregate" if the compilation and its resulting copyright are not
used to limit the access or legal rights of the compilation's users
beyond what the individual works permit. Inclusion of a covered work
in an aggregate does not cause this License to apply to the other
parts of the aggregate.
6. Conveying Non-Source Forms.
You may convey a covered work in object code form under the terms
of sections 4 and 5, provided that you also convey the
machine-readable Corresponding Source under the terms of this License,
in one of these ways:
a) Convey the object code in, or embodied in, a physical product
(including a physical distribution medium), accompanied by the
Corresponding Source fixed on a durable physical medium
customarily used for software interchange.
b) Convey the object code in, or embodied in, a physical product
(including a physical distribution medium), accompanied by a
written offer, valid for at least three years and valid for as
long as you offer spare parts or customer support for that product
model, to give anyone who possesses the object code either (1) a
copy of the Corresponding Source for all the software in the
product that is covered by this License, on a durable physical
medium customarily used for software interchange, for a price no
more than your reasonable cost of physically performing this
conveying of source, or (2) access to copy the
Corresponding Source from a network server at no charge.
c) Convey individual copies of the object code with a copy of the
written offer to provide the Corresponding Source. This
alternative is allowed only occasionally and noncommercially, and
only if you received the object code with such an offer, in accord
with subsection 6b.
d) Convey the object code by offering access from a designated
place (gratis or for a charge), and offer equivalent access to the
Corresponding Source in the same way through the same place at no
further charge. You need not require recipients to copy the
Corresponding Source along with the object code. If the place to
copy the object code is a network server, the Corresponding Source
may be on a different server (operated by you or a third party)
that supports equivalent copying facilities, provided you maintain
clear directions next to the object code saying where to find the
Corresponding Source. Regardless of what server hosts the
Corresponding Source, you remain obligated to ensure that it is
available for as long as needed to satisfy these requirements.
e) Convey the object code using peer-to-peer transmission, provided
you inform other peers where the object code and Corresponding
Source of the work are being offered to the general public at no
charge under subsection 6d.
A separable portion of the object code, whose source code is excluded
from the Corresponding Source as a System Library, need not be
included in conveying the object code work.
A "User Product" is either (1) a "consumer product", which means any
tangible personal property which is normally used for personal, family,
or household purposes, or (2) anything designed or sold for incorporation
into a dwelling. In determining whether a product is a consumer product,
doubtful cases shall be resolved in favor of coverage. For a particular
product received by a particular user, "normally used" refers to a
typical or common use of that class of product, regardless of the status
of the particular user or of the way in which the particular user
actually uses, or expects or is expected to use, the product. A product
is a consumer product regardless of whether the product has substantial
commercial, industrial or non-consumer uses, unless such uses represent
the only significant mode of use of the product.
"Installation Information" for a User Product means any methods,
procedures, authorization keys, or other information required to install
and execute modified versions of a covered work in that User Product from
a modified version of its Corresponding Source. The information must
suffice to ensure that the continued functioning of the modified object
code is in no case prevented or interfered with solely because
modification has been made.
If you convey an object code work under this section in, or with, or
specifically for use in, a User Product, and the conveying occurs as
part of a transaction in which the right of possession and use of the
User Product is transferred to the recipient in perpetuity or for a
fixed term (regardless of how the transaction is characterized), the
Corresponding Source conveyed under this section must be accompanied
by the Installation Information. But this requirement does not apply
if neither you nor any third party retains the ability to install
modified object code on the User Product (for example, the work has
been installed in ROM).
The requirement to provide Installation Information does not include a
requirement to continue to provide support service, warranty, or updates
for a work that has been modified or installed by the recipient, or for
the User Product in which it has been modified or installed. Access to a
network may be denied when the modification itself materially and
adversely affects the operation of the network or violates the rules and
protocols for communication across the network.
Corresponding Source conveyed, and Installation Information provided,
in accord with this section must be in a format that is publicly
documented (and with an implementation available to the public in
source code form), and must require no special password or key for
unpacking, reading or copying.
7. Additional Terms.
"Additional permissions" are terms that supplement the terms of this
License by making exceptions from one or more of its conditions.
Additional permissions that are applicable to the entire Program shall
be treated as though they were included in this License, to the extent
that they are valid under applicable law. If additional permissions
apply only to part of the Program, that part may be used separately
under those permissions, but the entire Program remains governed by
this License without regard to the additional permissions.
When you convey a copy of a covered work, you may at your option
remove any additional permissions from that copy, or from any part of
it. (Additional permissions may be written to require their own
removal in certain cases when you modify the work.) You may place
additional permissions on material, added by you to a covered work,
for which you have or can give appropriate copyright permission.
Notwithstanding any other provision of this License, for material you
add to a covered work, you may (if authorized by the copyright holders of
that material) supplement the terms of this License with terms:
a) Disclaiming warranty or limiting liability differently from the
terms of sections 15 and 16 of this License; or
b) Requiring preservation of specified reasonable legal notices or
author attributions in that material or in the Appropriate Legal
Notices displayed by works containing it; or
c) Prohibiting misrepresentation of the origin of that material, or
requiring that modified versions of such material be marked in
reasonable ways as different from the original version; or
d) Limiting the use for publicity purposes of names of licensors or
authors of the material; or
e) Declining to grant rights under trademark law for use of some
trade names, trademarks, or service marks; or
f) Requiring indemnification of licensors and authors of that
material by anyone who conveys the material (or modified versions of
it) with contractual assumptions of liability to the recipient, for
any liability that these contractual assumptions directly impose on
those licensors and authors.
All other non-permissive additional terms are considered "further
restrictions" within the meaning of section 10. If the Program as you
received it, or any part of it, contains a notice stating that it is
governed by this License along with a term that is a further
restriction, you may remove that term. If a license document contains
a further restriction but permits relicensing or conveying under this
License, you may add to a covered work material governed by the terms
of that license document, provided that the further restriction does
not survive such relicensing or conveying.
If you add terms to a covered work in accord with this section, you
must place, in the relevant source files, a statement of the
additional terms that apply to those files, or a notice indicating
where to find the applicable terms.
Additional terms, permissive or non-permissive, may be stated in the
form of a separately written license, or stated as exceptions;
the above requirements apply either way.
8. Termination.
You may not propagate or modify a covered work except as expressly
provided under this License. Any attempt otherwise to propagate or
modify it is void, and will automatically terminate your rights under
this License (including any patent licenses granted under the third
paragraph of section 11).
However, if you cease all violation of this License, then your
license from a particular copyright holder is reinstated (a)
provisionally, unless and until the copyright holder explicitly and
finally terminates your license, and (b) permanently, if the copyright
holder fails to notify you of the violation by some reasonable means
prior to 60 days after the cessation.
Moreover, your license from a particular copyright holder is
reinstated permanently if the copyright holder notifies you of the
violation by some reasonable means, this is the first time you have
received notice of violation of this License (for any work) from that
copyright holder, and you cure the violation prior to 30 days after
your receipt of the notice.
Termination of your rights under this section does not terminate the
licenses of parties who have received copies or rights from you under
this License. If your rights have been terminated and not permanently
reinstated, you do not qualify to receive new licenses for the same
material under section 10.
9. Acceptance Not Required for Having Copies.
You are not required to accept this License in order to receive or
run a copy of the Program. Ancillary propagation of a covered work
occurring solely as a consequence of using peer-to-peer transmission
to receive a copy likewise does not require acceptance. However,
nothing other than this License grants you permission to propagate or
modify any covered work. These actions infringe copyright if you do
not accept this License. Therefore, by modifying or propagating a
covered work, you indicate your acceptance of this License to do so.
10. Automatic Licensing of Downstream Recipients.
Each time you convey a covered work, the recipient automatically
receives a license from the original licensors, to run, modify and
propagate that work, subject to this License. You are not responsible
for enforcing compliance by third parties with this License.
An "entity transaction" is a transaction transferring control of an
organization, or substantially all assets of one, or subdividing an
organization, or merging organizations. If propagation of a covered
work results from an entity transaction, each party to that
transaction who receives a copy of the work also receives whatever
licenses to the work the party's predecessor in interest had or could
give under the previous paragraph, plus a right to possession of the
Corresponding Source of the work from the predecessor in interest, if
the predecessor has it or can get it with reasonable efforts.
You may not impose any further restrictions on the exercise of the
rights granted or affirmed under this License. For example, you may
not impose a license fee, royalty, or other charge for exercise of
rights granted under this License, and you may not initiate litigation
(including a cross-claim or counterclaim in a lawsuit) alleging that
any patent claim is infringed by making, using, selling, offering for
sale, or importing the Program or any portion of it.
11. Patents.
A "contributor" is a copyright holder who authorizes use under this
License of the Program or a work on which the Program is based. The
work thus licensed is called the contributor's "contributor version".
A contributor's "essential patent claims" are all patent claims
owned or controlled by the contributor, whether already acquired or
hereafter acquired, that would be infringed by some manner, permitted
by this License, of making, using, or selling its contributor version,
but do not include claims that would be infringed only as a
consequence of further modification of the contributor version. For
purposes of this definition, "control" includes the right to grant
patent sublicenses in a manner consistent with the requirements of
this License.
Each contributor grants you a non-exclusive, worldwide, royalty-free
patent license under the contributor's essential patent claims, to
make, use, sell, offer for sale, import and otherwise run, modify and
propagate the contents of its contributor version.
In the following three paragraphs, a "patent license" is any express
agreement or commitment, however denominated, not to enforce a patent
(such as an express permission to practice a patent or covenant not to
sue for patent infringement). To "grant" such a patent license to a
party means to make such an agreement or commitment not to enforce a
patent against the party.
If you convey a covered work, knowingly relying on a patent license,
and the Corresponding Source of the work is not available for anyone
to copy, free of charge and under the terms of this License, through a
publicly available network server or other readily accessible means,
then you must either (1) cause the Corresponding Source to be so
available, or (2) arrange to deprive yourself of the benefit of the
patent license for this particular work, or (3) arrange, in a manner
consistent with the requirements of this License, to extend the patent
license to downstream recipients. "Knowingly relying" means you have
actual knowledge that, but for the patent license, your conveying the
covered work in a country, or your recipient's use of the covered work
in a country, would infringe one or more identifiable patents in that
country that you have reason to believe are valid.
If, pursuant to or in connection with a single transaction or
arrangement, you convey, or propagate by procuring conveyance of, a
covered work, and grant a patent license to some of the parties
receiving the covered work authorizing them to use, propagate, modify
or convey a specific copy of the covered work, then the patent license
you grant is automatically extended to all recipients of the covered
work and works based on it.
A patent license is "discriminatory" if it does not include within
the scope of its coverage, prohibits the exercise of, or is
conditioned on the non-exercise of one or more of the rights that are
specifically granted under this License. You may not convey a covered
work if you are a party to an arrangement with a third party that is
in the business of distributing software, under which you make payment
to the third party based on the extent of your activity of conveying
the work, and under which the third party grants, to any of the
parties who would receive the covered work from you, a discriminatory
patent license (a) in connection with copies of the covered work
conveyed by you (or copies made from those copies), or (b) primarily
for and in connection with specific products or compilations that
contain the covered work, unless you entered into that arrangement,
or that patent license was granted, prior to 28 March 2007.
Nothing in this License shall be construed as excluding or limiting
any implied license or other defenses to infringement that may
otherwise be available to you under applicable patent law.
12. No Surrender of Others' Freedom.
If conditions are imposed on you (whether by court order, agreement or
otherwise) that contradict the conditions of this License, they do not
excuse you from the conditions of this License. If you cannot convey a
covered work so as to satisfy simultaneously your obligations under this
License and any other pertinent obligations, then as a consequence you may
not convey it at all. For example, if you agree to terms that obligate you
to collect a royalty for further conveying from those to whom you convey
the Program, the only way you could satisfy both those terms and this
License would be to refrain entirely from conveying the Program.
13. Use with the GNU Affero General Public License.
Notwithstanding any other provision of this License, you have
permission to link or combine any covered work with a work licensed
under version 3 of the GNU Affero General Public License into a single
combined work, and to convey the resulting work. The terms of this
License will continue to apply to the part which is the covered work,
but the special requirements of the GNU Affero General Public License,
section 13, concerning interaction through a network will apply to the
combination as such.
14. Revised Versions of this License.
The Free Software Foundation may publish revised and/or new versions of
the GNU General Public License from time to time. Such new versions will
be similar in spirit to the present version, but may differ in detail to
address new problems or concerns.
Each version is given a distinguishing version number. If the
Program specifies that a certain numbered version of the GNU General
Public License "or any later version" applies to it, you have the
option of following the terms and conditions either of that numbered
version or of any later version published by the Free Software
Foundation. If the Program does not specify a version number of the
GNU General Public License, you may choose any version ever published
by the Free Software Foundation.
If the Program specifies that a proxy can decide which future
versions of the GNU General Public License can be used, that proxy's
public statement of acceptance of a version permanently authorizes you
to choose that version for the Program.
Later license versions may give you additional or different
permissions. However, no additional obligations are imposed on any
author or copyright holder as a result of your choosing to follow a
later version.
15. Disclaimer of Warranty.
THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
16. Limitation of Liability.
IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
SUCH DAMAGES.
17. Interpretation of Sections 15 and 16.
If the disclaimer of warranty and limitation of liability provided
above cannot be given local legal effect according to their terms,
reviewing courts shall apply local law that most closely approximates
an absolute waiver of all civil liability in connection with the
Program, unless a warranty or assumption of liability accompanies a
copy of the Program in return for a fee.
END OF TERMS AND CONDITIONS
How to Apply These Terms to Your New Programs
If you develop a new program, and you want it to be of the greatest
possible use to the public, the best way to achieve this is to make it
free software which everyone can redistribute and change under these terms.
To do so, attach the following notices to the program. It is safest
to attach them to the start of each source file to most effectively
state the exclusion of warranty; and each file should have at least
the "copyright" line and a pointer to where the full notice is found.
Copyright (C)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see .
Also add information on how to contact you by electronic and paper mail.
If the program does terminal interaction, make it output a short
notice like this when it starts in an interactive mode:
Copyright (C)
This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
This is free software, and you are welcome to redistribute it
under certain conditions; type `show c' for details.
The hypothetical commands `show w' and `show c' should show the appropriate
parts of the General Public License. Of course, your program's commands
might be different; for a GUI interface, you would use an "about box".
You should also get your employer (if you work as a programmer) or school,
if any, to sign a "copyright disclaimer" for the program, if necessary.
For more information on this, and how to apply and follow the GNU GPL, see
.
The GNU General Public License does not permit incorporating your program
into proprietary programs. If your program is a subroutine library, you
may consider it more useful to permit linking proprietary applications with
the library. If this is what you want to do, use the GNU Lesser General
Public License instead of this License. But first, please read
.
gpaw-1.3.0-82cebebc037510d876f90d9f8d533fd021f751f5/LICENSE 0000664 0000000 0000000 00000001114 13164413722 0021126 0 ustar 00root root 0000000 0000000 GPAW is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
GPAW is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with GPAW. If not, see .
gpaw-1.3.0-82cebebc037510d876f90d9f8d533fd021f751f5/MANIFEST.in 0000664 0000000 0000000 00000000306 13164413722 0021661 0 ustar 00root root 0000000 0000000 include MANIFEST.in
include COPYING LICENSE CONTRIBUTING.rst CHANGELOG.rst config.py customize.py
include c/*.c
include c/*.h
include c/xc/*.h
include c/xc/*.c
include c/bmgs/*.h
include c/bmgs/*.c
gpaw-1.3.0-82cebebc037510d876f90d9f8d533fd021f751f5/README.rst 0000664 0000000 0000000 00000005126 13164413722 0021617 0 ustar 00root root 0000000 0000000 GPAW
====
GPAW is a density-functional theory (DFT) Python_ code based on the
projector-augmented wave (PAW) method and the atomic simulation environment
(ASE_). It uses plane-waves, atom-centered basis-functions or real-space
uniform grids combined with multigrid methods.
Webpage: http://wiki.fysik.dtu.dk/gpaw
Requirements
------------
* Python_ 2.6-3.5
* ASE_ (atomic simulation environment)
* NumPy_ (base N-dimensional array package)
* LibXC
* BLAS
* LAPACK
Optional:
* MPI
* ScaLAPACK
* SciPy_ (library for scientific computing)
Installation
------------
Do this::
$ python setup.py install --user
and make sure you have ``~/.local/bin`` in your $PATH.
For more details, please see:
https://wiki.fysik.dtu.dk/gpaw/install.html
Testing
-------
Please run the tests::
$ gpaw test -j 4 # takes 1 hour!
and send us the output if there are failing tests.
Contact
-------
* Mailing lists: gpaw-users_ and gpaw-developers_
* IRC_: #gpaw on freenode.net
Please send us bug-reports, patches, code, ideas and questions.
Example
-------
Geometry optimization of hydrogen molecule:
>>> from ase import Atoms
>>> from ase.optimize import BFGS
>>> from ase.io import write
>>> from gpaw import GPAW, PW
>>> h2 = Atoms('H2',
positions=[[0, 0, 0],
[0, 0, 0.7]])
>>> h2.center(vacuum=2.5)
>>> h2.set_calculator(GPAW(xc='PBE',
mode=PW(300),
txt='h2.txt'))
>>> opt = BFGS(h2, trajectory='h2.traj')
>>> opt.run(fmax=0.02)
BFGS: 0 09:08:09 -6.566505 2.2970
BFGS: 1 09:08:11 -6.629859 0.1871
BFGS: 2 09:08:12 -6.630410 0.0350
BFGS: 3 09:08:13 -6.630429 0.0003
>>> write('H2.xyz', h2)
>>> h2.get_potential_energy() # ASE's units are eV and Å
-6.6304292169392784
Getting started
---------------
Once you have familiarized yourself with ASE_ and NumPy_, you should take a
look at the GPAW exercises_ and tutorials_.
.. _Python: http://www.python.org/
.. _ASE: http://wiki.fysik.dtu.dk/ase
.. _NumPy: http://docs.scipy.org/doc/numpy/reference/
.. _SciPy: http://docs.scipy.org/doc/scipy/reference/
.. _Matplotlib: http://matplotlib.org/
.. _pygtk: http://www.pygtk.org/
.. _gpaw-users: https://listserv.fysik.dtu.dk/mailman/listinfo/gpaw-users
.. _gpaw-developers: https://listserv.fysik.dtu.dk/mailman/listinfo/gpaw-developers
.. _IRC: http://webchat.freenode.net/?randomnick=0&channels=gpaw
.. _exercises: https://wiki.fysik.dtu.dk/gpaw/exercises/exercises.html
.. _tutorials: https://wiki.fysik.dtu.dk/gpaw/tutorials/tutorials.html
gpaw-1.3.0-82cebebc037510d876f90d9f8d533fd021f751f5/c/ 0000775 0000000 0000000 00000000000 13164413722 0020346 5 ustar 00root root 0000000 0000000 gpaw-1.3.0-82cebebc037510d876f90d9f8d533fd021f751f5/c/_gpaw.c 0000664 0000000 0000000 00000035410 13164413722 0021612 0 ustar 00root root 0000000 0000000 /* Copyright (C) 2003-2007 CAMP
* Copyright (C) 2007-2009 CAMd
* Copyright (C) 2007-2010 CSC - IT Center for Science Ltd.
* Please see the accompanying LICENSE file for further information. */
#include
#define PY_ARRAY_UNIQUE_SYMBOL GPAW_ARRAY_API
#include
#ifdef GPAW_HPM
PyObject* ibm_hpm_start(PyObject *self, PyObject *args);
PyObject* ibm_hpm_stop(PyObject *self, PyObject *args);
PyObject* ibm_mpi_start(PyObject *self);
PyObject* ibm_mpi_stop(PyObject *self);
#endif
#ifdef CRAYPAT
#include
PyObject* craypat_region_begin(PyObject *self, PyObject *args);
PyObject* craypat_region_end(PyObject *self, PyObject *args);
#endif
PyObject* symmetrize(PyObject *self, PyObject *args);
PyObject* symmetrize_ft(PyObject *self, PyObject *args);
PyObject* symmetrize_wavefunction(PyObject *self, PyObject *args);
PyObject* symmetrize_return_index(PyObject *self, PyObject *args);
PyObject* symmetrize_with_index(PyObject *self, PyObject *args);
PyObject* map_k_points(PyObject *self, PyObject *args);
PyObject* scal(PyObject *self, PyObject *args);
PyObject* mmm(PyObject *self, PyObject *args);
PyObject* tetrahedron_weight(PyObject *self, PyObject *args);
PyObject* gemm(PyObject *self, PyObject *args);
PyObject* gemv(PyObject *self, PyObject *args);
PyObject* axpy(PyObject *self, PyObject *args);
PyObject* czher(PyObject *self, PyObject *args);
PyObject* rk(PyObject *self, PyObject *args);
PyObject* r2k(PyObject *self, PyObject *args);
PyObject* dotc(PyObject *self, PyObject *args);
PyObject* dotu(PyObject *self, PyObject *args);
PyObject* multi_dotu(PyObject *self, PyObject *args);
PyObject* multi_axpy(PyObject *self, PyObject *args);
PyObject* diagonalize(PyObject *self, PyObject *args);
PyObject* diagonalize_mr3(PyObject *self, PyObject *args);
PyObject* general_diagonalize(PyObject *self, PyObject *args);
PyObject* inverse_cholesky(PyObject *self, PyObject *args);
PyObject* inverse_symmetric(PyObject *self, PyObject *args);
PyObject* inverse_general(PyObject *self, PyObject *args);
PyObject* linear_solve_band(PyObject *self, PyObject *args);
PyObject* linear_solve_tridiag(PyObject *self, PyObject *args);
PyObject* right_eigenvectors(PyObject *self, PyObject *args);
PyObject* NewLocalizedFunctionsObject(PyObject *self, PyObject *args);
PyObject* NewOperatorObject(PyObject *self, PyObject *args);
PyObject* NewWOperatorObject(PyObject *self, PyObject *args);
PyObject* NewSplineObject(PyObject *self, PyObject *args);
PyObject* NewTransformerObject(PyObject *self, PyObject *args);
PyObject* pc_potential(PyObject *self, PyObject *args);
PyObject* heap_mallinfo(PyObject *self);
PyObject* elementwise_multiply_add(PyObject *self, PyObject *args);
PyObject* utilities_gaussian_wave(PyObject *self, PyObject *args);
PyObject* utilities_vdot(PyObject *self, PyObject *args);
PyObject* utilities_vdot_self(PyObject *self, PyObject *args);
PyObject* errorfunction(PyObject *self, PyObject *args);
PyObject* cerf(PyObject *self, PyObject *args);
PyObject* pack(PyObject *self, PyObject *args);
PyObject* unpack(PyObject *self, PyObject *args);
PyObject* unpack_complex(PyObject *self, PyObject *args);
PyObject* hartree(PyObject *self, PyObject *args);
PyObject* localize(PyObject *self, PyObject *args);
PyObject* NewXCFunctionalObject(PyObject *self, PyObject *args);
PyObject* NewlxcXCFunctionalObject(PyObject *self, PyObject *args);
PyObject* lxcXCFuncNum(PyObject *self, PyObject *args);
PyObject* exterior_electron_density_region(PyObject *self, PyObject *args);
PyObject* plane_wave_grid(PyObject *self, PyObject *args);
PyObject* overlap(PyObject *self, PyObject *args);
PyObject* vdw(PyObject *self, PyObject *args);
PyObject* vdw2(PyObject *self, PyObject *args);
PyObject* spherical_harmonics(PyObject *self, PyObject *args);
PyObject* spline_to_grid(PyObject *self, PyObject *args);
PyObject* NewLFCObject(PyObject *self, PyObject *args);
#if defined(GPAW_WITH_SL) && defined(PARALLEL)
PyObject* new_blacs_context(PyObject *self, PyObject *args);
PyObject* get_blacs_gridinfo(PyObject* self, PyObject *args);
PyObject* get_blacs_local_shape(PyObject* self, PyObject *args);
PyObject* blacs_destroy(PyObject *self, PyObject *args);
PyObject* scalapack_set(PyObject *self, PyObject *args);
PyObject* scalapack_redist(PyObject *self, PyObject *args);
PyObject* scalapack_diagonalize_dc(PyObject *self, PyObject *args);
PyObject* scalapack_diagonalize_ex(PyObject *self, PyObject *args);
#ifdef GPAW_MR3
PyObject* scalapack_diagonalize_mr3(PyObject *self, PyObject *args);
#endif
PyObject* scalapack_general_diagonalize_dc(PyObject *self, PyObject *args);
PyObject* scalapack_general_diagonalize_ex(PyObject *self, PyObject *args);
#ifdef GPAW_MR3
PyObject* scalapack_general_diagonalize_mr3(PyObject *self, PyObject *args);
#endif
PyObject* scalapack_inverse_cholesky(PyObject *self, PyObject *args);
PyObject* scalapack_inverse(PyObject *self, PyObject *args);
PyObject* scalapack_solve(PyObject *self, PyObject *args);
PyObject* pblas_tran(PyObject *self, PyObject *args);
PyObject* pblas_gemm(PyObject *self, PyObject *args);
PyObject* pblas_hemm(PyObject *self, PyObject *args);
PyObject* pblas_gemv(PyObject *self, PyObject *args);
PyObject* pblas_r2k(PyObject *self, PyObject *args);
PyObject* pblas_rk(PyObject *self, PyObject *args);
#endif // GPAW_WITH_SL and PARALLEL
#ifdef GPAW_PAPI
PyObject* papi_mem_info(PyObject *self, PyObject *args);
#endif
#ifdef GPAW_WITH_LIBVDWXC
PyObject* libvdwxc_create(PyObject *self, PyObject *args);
PyObject* libvdwxc_has(PyObject* self, PyObject *args);
PyObject* libvdwxc_init_serial(PyObject *self, PyObject *args);
PyObject* libvdwxc_calculate(PyObject *self, PyObject *args);
PyObject* libvdwxc_tostring(PyObject *self, PyObject *args);
PyObject* libvdwxc_free(PyObject* self, PyObject* args);
PyObject* libvdwxc_init_mpi(PyObject* self, PyObject* args);
PyObject* libvdwxc_init_pfft(PyObject* self, PyObject* args);
#endif // GPAW_WITH_LIBVDWXC
// Moving least squares interpolation
PyObject* mlsqr(PyObject *self, PyObject *args);
static PyMethodDef functions[] = {
{"symmetrize", symmetrize, METH_VARARGS, 0},
{"symmetrize_ft", symmetrize_ft, METH_VARARGS, 0},
{"symmetrize_wavefunction", symmetrize_wavefunction, METH_VARARGS, 0},
{"symmetrize_return_index", symmetrize_return_index, METH_VARARGS, 0},
{"symmetrize_with_index", symmetrize_with_index, METH_VARARGS, 0},
{"map_k_points", map_k_points, METH_VARARGS, 0},
{"scal", scal, METH_VARARGS, 0},
{"mmm", mmm, METH_VARARGS, 0},
{"tetrahedron_weight", tetrahedron_weight, METH_VARARGS, 0},
{"gemm", gemm, METH_VARARGS, 0},
{"gemv", gemv, METH_VARARGS, 0},
{"axpy", axpy, METH_VARARGS, 0},
{"czher", czher, METH_VARARGS, 0},
{"rk", rk, METH_VARARGS, 0},
{"r2k", r2k, METH_VARARGS, 0},
{"dotc", dotc, METH_VARARGS, 0},
{"dotu", dotu, METH_VARARGS, 0},
{"multi_dotu", multi_dotu, METH_VARARGS, 0},
{"multi_axpy", multi_axpy, METH_VARARGS, 0},
{"diagonalize", diagonalize, METH_VARARGS, 0},
{"diagonalize_mr3", diagonalize_mr3, METH_VARARGS, 0},
{"general_diagonalize", general_diagonalize, METH_VARARGS, 0},
{"inverse_cholesky", inverse_cholesky, METH_VARARGS, 0},
{"inverse_symmetric", inverse_symmetric, METH_VARARGS, 0},
{"inverse_general", inverse_general, METH_VARARGS, 0},
{"linear_solve_band", linear_solve_band, METH_VARARGS, 0},
{"linear_solve_tridiag", linear_solve_tridiag, METH_VARARGS, 0},
{"right_eigenvectors", right_eigenvectors, METH_VARARGS, 0},
{"LocalizedFunctions", NewLocalizedFunctionsObject, METH_VARARGS, 0},
{"Operator", NewOperatorObject, METH_VARARGS, 0},
{"WOperator", NewWOperatorObject, METH_VARARGS, 0},
{"Spline", NewSplineObject, METH_VARARGS, 0},
{"Transformer", NewTransformerObject, METH_VARARGS, 0},
{"heap_mallinfo", (PyCFunction) heap_mallinfo, METH_NOARGS, 0},
{"elementwise_multiply_add", elementwise_multiply_add, METH_VARARGS, 0},
{"utilities_gaussian_wave", utilities_gaussian_wave, METH_VARARGS, 0},
{"utilities_vdot", utilities_vdot, METH_VARARGS, 0},
{"utilities_vdot_self", utilities_vdot_self, METH_VARARGS, 0},
{"eed_region", exterior_electron_density_region, METH_VARARGS, 0},
{"plane_wave_grid", plane_wave_grid, METH_VARARGS, 0},
{"erf", errorfunction, METH_VARARGS, 0},
{"cerf", cerf, METH_VARARGS, 0},
{"pack", pack, METH_VARARGS, 0},
{"unpack", unpack, METH_VARARGS, 0},
{"unpack_complex", unpack_complex, METH_VARARGS, 0},
{"hartree", hartree, METH_VARARGS, 0},
{"localize", localize, METH_VARARGS, 0},
{"XCFunctional", NewXCFunctionalObject, METH_VARARGS, 0},
{"lxcXCFunctional", NewlxcXCFunctionalObject, METH_VARARGS, 0},
{"lxcXCFuncNum", lxcXCFuncNum, METH_VARARGS, 0},
{"overlap", overlap, METH_VARARGS, 0},
{"vdw", vdw, METH_VARARGS, 0},
{"vdw2", vdw2, METH_VARARGS, 0},
{"spherical_harmonics", spherical_harmonics, METH_VARARGS, 0},
{"pc_potential", pc_potential, METH_VARARGS, 0},
{"spline_to_grid", spline_to_grid, METH_VARARGS, 0},
{"LFC", NewLFCObject, METH_VARARGS, 0},
#if defined(GPAW_WITH_SL) && defined(PARALLEL)
{"new_blacs_context", new_blacs_context, METH_VARARGS, NULL},
{"get_blacs_gridinfo", get_blacs_gridinfo, METH_VARARGS, NULL},
{"get_blacs_local_shape", get_blacs_local_shape, METH_VARARGS, NULL},
{"blacs_destroy", blacs_destroy, METH_VARARGS, 0},
{"scalapack_set", scalapack_set, METH_VARARGS, 0},
{"scalapack_redist", scalapack_redist, METH_VARARGS, 0},
{"scalapack_diagonalize_dc", scalapack_diagonalize_dc, METH_VARARGS, 0},
{"scalapack_diagonalize_ex", scalapack_diagonalize_ex, METH_VARARGS, 0},
#ifdef GPAW_MR3
{"scalapack_diagonalize_mr3", scalapack_diagonalize_mr3, METH_VARARGS, 0},
#endif // GPAW_MR3
{"scalapack_general_diagonalize_dc",
scalapack_general_diagonalize_dc, METH_VARARGS, 0},
{"scalapack_general_diagonalize_ex",
scalapack_general_diagonalize_ex, METH_VARARGS, 0},
#ifdef GPAW_MR3
{"scalapack_general_diagonalize_mr3",
scalapack_general_diagonalize_mr3, METH_VARARGS, 0},
#endif // GPAW_MR3
{"scalapack_inverse_cholesky", scalapack_inverse_cholesky,
METH_VARARGS, 0},
{"scalapack_inverse", scalapack_inverse, METH_VARARGS, 0},
{"scalapack_solve", scalapack_solve, METH_VARARGS, 0},
{"pblas_tran", pblas_tran, METH_VARARGS, 0},
{"pblas_gemm", pblas_gemm, METH_VARARGS, 0},
{"pblas_hemm", pblas_hemm, METH_VARARGS, 0},
{"pblas_gemv", pblas_gemv, METH_VARARGS, 0},
{"pblas_r2k", pblas_r2k, METH_VARARGS, 0},
{"pblas_rk", pblas_rk, METH_VARARGS, 0},
#endif // GPAW_WITH_SL && PARALLEL
#ifdef GPAW_HPM
{"hpm_start", ibm_hpm_start, METH_VARARGS, 0},
{"hpm_stop", ibm_hpm_stop, METH_VARARGS, 0},
{"mpi_start", (PyCFunction) ibm_mpi_start, METH_NOARGS, 0},
{"mpi_stop", (PyCFunction) ibm_mpi_stop, METH_NOARGS, 0},
#endif // GPAW_HPM
#ifdef CRAYPAT
{"craypat_region_begin", craypat_region_begin, METH_VARARGS, 0},
{"craypat_region_end", craypat_region_end, METH_VARARGS, 0},
#endif // CRAYPAT
#ifdef GPAW_PAPI
{"papi_mem_info", papi_mem_info, METH_VARARGS, 0},
#endif // GPAW_PAPI
#ifdef GPAW_WITH_LIBVDWXC
{"libvdwxc_create", libvdwxc_create, METH_VARARGS, 0},
{"libvdwxc_has", libvdwxc_has, METH_VARARGS, 0},
{"libvdwxc_init_serial", libvdwxc_init_serial, METH_VARARGS, 0},
{"libvdwxc_calculate", libvdwxc_calculate, METH_VARARGS, 0},
{"libvdwxc_tostring", libvdwxc_tostring, METH_VARARGS, 0},
{"libvdwxc_free", libvdwxc_free, METH_VARARGS, 0},
{"libvdwxc_init_mpi", libvdwxc_init_mpi, METH_VARARGS, 0},
{"libvdwxc_init_pfft", libvdwxc_init_pfft, METH_VARARGS, 0},
#endif // GPAW_WITH_LIBVDWXC
{"mlsqr", mlsqr, METH_VARARGS, 0},
{0, 0, 0, 0}
};
#ifdef PARALLEL
extern PyTypeObject MPIType;
extern PyTypeObject GPAW_MPI_Request_type;
#endif
extern PyTypeObject LFCType;
extern PyTypeObject LocalizedFunctionsType;
extern PyTypeObject OperatorType;
extern PyTypeObject WOperatorType;
extern PyTypeObject SplineType;
extern PyTypeObject TransformerType;
extern PyTypeObject XCFunctionalType;
extern PyTypeObject lxcXCFunctionalType;
#if PY_MAJOR_VERSION >= 3
static struct PyModuleDef moduledef = {
PyModuleDef_HEAD_INIT,
"_gpaw",
"C-extension for GPAW",
-1,
functions,
NULL,
NULL,
NULL,
NULL
};
#endif
static PyObject* moduleinit(void)
{
#ifdef PARALLEL
if (PyType_Ready(&MPIType) < 0)
return NULL;
if (PyType_Ready(&GPAW_MPI_Request_type) < 0)
return NULL;
#endif
if (PyType_Ready(&LFCType) < 0)
return NULL;
if (PyType_Ready(&LocalizedFunctionsType) < 0)
return NULL;
if (PyType_Ready(&OperatorType) < 0)
return NULL;
if (PyType_Ready(&WOperatorType) < 0)
return NULL;
if (PyType_Ready(&SplineType) < 0)
return NULL;
if (PyType_Ready(&TransformerType) < 0)
return NULL;
if (PyType_Ready(&XCFunctionalType) < 0)
return NULL;
if (PyType_Ready(&lxcXCFunctionalType) < 0)
return NULL;
#if PY_MAJOR_VERSION >= 3
PyObject* m = PyModule_Create(&moduledef);
#else
PyObject* m = Py_InitModule3("_gpaw", functions,
"C-extension for GPAW\n\n...\n");
#endif
if (m == NULL)
return NULL;
#ifdef PARALLEL
Py_INCREF(&MPIType);
Py_INCREF(&GPAW_MPI_Request_type);
PyModule_AddObject(m, "Communicator", (PyObject *)&MPIType);
#endif
Py_INCREF(&LFCType);
Py_INCREF(&LocalizedFunctionsType);
Py_INCREF(&OperatorType);
Py_INCREF(&WOperatorType);
Py_INCREF(&SplineType);
Py_INCREF(&TransformerType);
Py_INCREF(&XCFunctionalType);
Py_INCREF(&lxcXCFunctionalType);
import_array1(NULL);
return m;
}
#ifndef GPAW_INTERPRETER
#if PY_MAJOR_VERSION >= 3
PyMODINIT_FUNC PyInit__gpaw(void)
{
return moduleinit();
}
#else
PyMODINIT_FUNC init_gpaw(void)
{
moduleinit();
}
#endif
#else // ifndef GPAW_INTERPRETER
#if PY_MAJOR_VERSION >= 3
#define moduleinit0 moduleinit
#else
void moduleinit0(void) { moduleinit(); }
#endif
#include
int
main(int argc, char **argv)
{
#ifndef GPAW_OMP
MPI_Init(&argc, &argv);
#else
int granted;
MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &granted);
if (granted != MPI_THREAD_MULTIPLE)
exit(1);
#endif // GPAW_OMP
#if PY_MAJOR_VERSION >= 3
wchar_t* wargv[argc];
wchar_t* wargv2[argc];
for (int i = 0; i < argc; i++) {
int n = 1 + mbstowcs(NULL, argv[i], 0);
wargv[i] = (wchar_t*)malloc(n * sizeof(wchar_t));
wargv2[i] = wargv[i];
mbstowcs(wargv[i], argv[i], n);
}
#else
char** wargv = argv;
#endif
Py_SetProgramName(wargv[0]);
PyImport_AppendInittab("_gpaw", &moduleinit0);
Py_Initialize();
int status = Py_Main(argc, wargv);
Py_Finalize();
MPI_Finalize();
#if PY_MAJOR_VERSION >= 3
for (int i = 0; i < argc; i++)
free(wargv2[i]);
#endif
return status;
}
#endif // GPAW_INTERPRETER
gpaw-1.3.0-82cebebc037510d876f90d9f8d533fd021f751f5/c/bc.c 0000664 0000000 0000000 00000020432 13164413722 0021077 0 ustar 00root root 0000000 0000000 /* Copyright (C) 2003-2007 CAMP
* Copyright (C) 2007-2009 CAMd
* Copyright (C) 2005 CSC - IT Center for Science Ltd.
* Please see the accompanying LICENSE file for further information. */
// Copyright (C) 2003 CAMP
// Please see the accompanying LICENSE file for further information.
#include "extensions.h"
#include "bc.h"
#include
#include
#include
#include
boundary_conditions* bc_init(const long size1[3],
const long padding[3][2],
const long npadding[3][2],
const long neighbors[3][2],
MPI_Comm comm, bool real, bool cfd)
{
boundary_conditions* bc = GPAW_MALLOC(boundary_conditions, 1);
for (int i = 0; i < 3; i++)
{
bc->size1[i] = size1[i];
bc->size2[i] = size1[i] + padding[i][0] + padding[i][1];
bc->padding[i] = padding[i][0];
}
bc->comm = comm;
bc->ndouble = (real ? 1 : 2);
bc->cfd = cfd;
int rank = 0;
if (comm != MPI_COMM_NULL)
MPI_Comm_rank(comm, &rank);
int start[3];
int size[3];
for (int i = 0; i < 3; i++)
{
start[i] = padding[i][0];
size[i] = size1[i];
}
for (int i = 0; i < 3; i++)
{
int n = bc->ndouble;
for (int j = 0; j < 3; j++)
if (j != i)
n *= size[j];
for (int d = 0; d < 2; d++)
{
int ds = npadding[i][d];
int dr = padding[i][d];
for (int j = 0; j < 3; j++)
{
bc->sendstart[i][d][j] = start[j];
bc->sendsize[i][d][j] = size[j];
bc->recvstart[i][d][j] = start[j];
bc->recvsize[i][d][j] = size[j];
}
if (d == 0)
{
bc->sendstart[i][d][i] = dr;
bc->recvstart[i][d][i] = 0;
}
else
{
bc->sendstart[i][d][i] = padding[i][0] + size1[i] - ds;
bc->recvstart[i][d][i] = padding[i][0] + size1[i];
}
bc->sendsize[i][d][i] = ds;
bc->recvsize[i][d][i] = dr;
bc->sendproc[i][d] = DO_NOTHING;
bc->recvproc[i][d] = DO_NOTHING;
bc->nsend[i][d] = 0;
bc->nrecv[i][d] = 0;
int p = neighbors[i][d];
if (p == rank)
{
if (ds > 0)
bc->sendproc[i][d] = COPY_DATA;
if (dr > 0)
bc->recvproc[i][d] = COPY_DATA;
}
else if (p >= 0)
{
// Communication required:
if (ds > 0)
{
bc->sendproc[i][d] = p;
bc->nsend[i][d] = n * ds;
}
if (dr > 0)
{
bc->recvproc[i][d] = p;
bc->nrecv[i][d] = n * dr;
}
}
}
if (cfd == 0)
{
start[i] = 0;
size[i] = bc->size2[i];
}
// If the two neighboring processors along the
// i'th axis are the same, then we join the two communications
// into one:
bc->rjoin[i] = ((bc->recvproc[i][0] == bc->recvproc[i][1]) &&
bc->recvproc[i][0] >= 0);
bc->sjoin[i] = ((bc->sendproc[i][0] == bc->sendproc[i][1]) &&
bc->sendproc[i][0] >= 0);
}
bc->maxsend = 0;
bc->maxrecv = 0;
for (int i = 0; i < 3; i++)
{
int n = bc->nsend[i][0] + bc->nsend[i][1];
if (n > bc->maxsend)
bc->maxsend = n;
n = bc->nrecv[i][0] + bc->nrecv[i][1];
if (n > bc->maxrecv)
bc->maxrecv = n;
}
return bc;
}
void bc_unpack1(const boundary_conditions* bc,
const double* aa1, double* aa2, int i,
MPI_Request recvreq[2],
MPI_Request sendreq[2],
double* rbuff, double* sbuff,
const double_complex phases[2], int thd, int nin)
{
int ng = bc->ndouble * bc->size1[0] * bc->size1[1] * bc->size1[2];
int ng2 = bc->ndouble * bc->size2[0] * bc->size2[1] * bc->size2[2];
bool real = (bc->ndouble == 1);
for (int m = 0; m < nin; m++)
// Copy data:
if (i == 0)
{
// Zero all of a2 array. We should only zero the bounaries
// that are not periodic, but it's simpler to zero everything!
// XXX
memset(aa2 + m * ng2, 0, ng2 * sizeof(double));
// Copy data from a1 to central part of a2:
if (real)
bmgs_paste(aa1 + m * ng, bc->size1, aa2 + m * ng2,
bc->size2, bc->sendstart[0][0]);
else
bmgs_pastez((const double_complex*)(aa1 + m * ng), bc->size1,
(double_complex*)(aa2 + m * ng2),
bc->size2, bc->sendstart[0][0]);
}
#ifdef PARALLEL
// Start receiving.
for (int d = 0; d < 2; d++)
{
int p = bc->recvproc[i][d];
if (p >= 0)
{
if (bc->rjoin[i])
{
if (d == 0)
MPI_Irecv(rbuff, (bc->nrecv[i][0] + bc->nrecv[i][1]) * nin,
MPI_DOUBLE, p,
10 * thd + 1000 * i + 100000,
bc->comm, &recvreq[0]);
}
else
{
MPI_Irecv(rbuff, bc->nrecv[i][d] * nin, MPI_DOUBLE, p,
d + 10 * thd + 1000 * i,
bc->comm, &recvreq[d]);
rbuff += bc->nrecv[i][d] * nin;
}
}
}
// Prepare send-buffers and start sending:
double* sbuf = sbuff;
double* sbuf0 = sbuff;
for (int d = 0; d < 2; d++)
{
sendreq[d] = 0;
int p = bc->sendproc[i][d];
if (p >= 0)
{
const int* start = bc->sendstart[i][d];
const int* size = bc->sendsize[i][d];
for (int m = 0; m < nin; m++)
if (real)
bmgs_cut(aa2 + m * ng2, bc->size2, start,
sbuf + m * bc->nsend[i][d],
size);
else
bmgs_cutmz((const double_complex*)(aa2 + m * ng2),
bc->size2, start,
(double_complex*)(sbuf + m * bc->nsend[i][d]),
size, phases[d]);
if (bc->sjoin[i])
{
if (d == 1)
{
MPI_Isend(sbuf0, (bc->nsend[i][0] + bc->nsend[i][1]) * nin,
MPI_DOUBLE, p,
10 * thd + 1000 * i + 100000,
bc->comm, &sendreq[0]);
}
}
else
{
MPI_Isend(sbuf, bc->nsend[i][d] * nin, MPI_DOUBLE, p,
1 - d + 10 * thd + 1000 * i, bc->comm, &sendreq[d]);
}
sbuf += bc->nsend[i][d] * nin;
}
}
#endif // Parallel
for (int m = 0; m < nin; m++)
{
// Copy data for periodic boundary conditions:
for (int d = 0; d < 2; d++)
if (bc->sendproc[i][d] == COPY_DATA)
{
if (real)
bmgs_translate(aa2 + m * ng2, bc->size2, bc->sendsize[i][d],
bc->sendstart[i][d], bc->recvstart[i][1 - d]);
else
bmgs_translatemz((double_complex*)(aa2 + m * ng2), bc->size2,
bc->sendsize[i][d],
bc->sendstart[i][d], bc->recvstart[i][1 - d],
phases[d]);
}
}
}
void bc_unpack2(const boundary_conditions* bc,
double* a2, int i,
MPI_Request recvreq[2],
MPI_Request sendreq[2],
double* rbuf, int nin)
{
#ifdef PARALLEL
int ng2 = bc->ndouble * bc->size2[0] * bc->size2[1] * bc->size2[2];
// Store data from receive-buffer:
bool real = (bc->ndouble == 1);
double* rbuf0 = rbuf;
for (int d = 0; d < 2; d++)
if (bc->recvproc[i][d] >= 0)
{
if (bc->rjoin[i])
{
if (d == 0)
{
MPI_Wait(&recvreq[0], MPI_STATUS_IGNORE);
rbuf += bc->nrecv[i][1] * nin;
}
else
rbuf = rbuf0;
}
else
MPI_Wait(&recvreq[d], MPI_STATUS_IGNORE);
for (int m = 0; m < nin; m++)
if (real)
bmgs_paste(rbuf + m * bc->nrecv[i][d], bc->recvsize[i][d],
a2 + m * ng2, bc->size2, bc->recvstart[i][d]);
else
bmgs_pastez((const double_complex*)(rbuf +
m * bc->nrecv[i][d]),
bc->recvsize[i][d],
(double_complex*)(a2 + m * ng2),
bc->size2, bc->recvstart[i][d]);
rbuf += bc->nrecv[i][d] * nin;
}
// This does not work on the ibm with gcc! We do a blocking send instead.
for (int d = 0; d < 2; d++)
if (sendreq[d] != 0)
MPI_Wait(&sendreq[d], MPI_STATUS_IGNORE);
#endif // PARALLEL
}
gpaw-1.3.0-82cebebc037510d876f90d9f8d533fd021f751f5/c/bc.h 0000664 0000000 0000000 00000002653 13164413722 0021111 0 ustar 00root root 0000000 0000000 /* Copyright (C) 2003-2007 CAMP
* Copyright (C) 2005 CSC - IT Center for Science Ltd.
* Please see the accompanying LICENSE file for further information. */
#include "bmgs/bmgs.h"
#ifdef PARALLEL
#include
#else
typedef int* MPI_Request; // !!!!!!!???????????
typedef int* MPI_Comm;
#define MPI_COMM_NULL 0
#define MPI_Comm_rank(comm, rank) *(rank) = 0
#endif
typedef struct
{
int size1[3];
int size2[3];
int sendstart[3][2][3];
int sendsize[3][2][3];
int recvstart[3][2][3];
int recvsize[3][2][3];
int sendproc[3][2];
int recvproc[3][2];
int nsend[3][2];
int nrecv[3][2];
int maxsend;
int maxrecv;
int padding[3];
bool sjoin[3];
bool rjoin[3];
int ndouble;
bool cfd;
MPI_Comm comm;
} boundary_conditions;
static const int COPY_DATA = -2;
static const int DO_NOTHING = -3; // ??????????
boundary_conditions* bc_init(const long size1[3],
const long padding[3][2],
const long npadding[3][2],
const long neighbors[3][2],
MPI_Comm comm, bool real, bool cfd);
void bc_unpack1(const boundary_conditions* bc,
const double* input, double* output, int i,
MPI_Request recvreq[2],
MPI_Request sendreq[2],
double* rbuf, double* sbuf,
const double_complex phases[2], int thd, int nin);
void bc_unpack2(const boundary_conditions* bc,
double* a2, int i,
MPI_Request recvreq[2],
MPI_Request sendreq[2],
double* rbuf, int nin);
gpaw-1.3.0-82cebebc037510d876f90d9f8d533fd021f751f5/c/blacs.c 0000664 0000000 0000000 00000155233 13164413722 0021607 0 ustar 00root root 0000000 0000000 /* Copyright (C) 2003-2007 CAMP
* Copyright (C) 2007-2009 CAMd
* Copyright (C) 2010 Argonne National Laboratory
* Please see the accompanying LICENSE file for further information. */
#ifdef PARALLEL
#include
#ifdef GPAW_WITH_SL
#define PY_ARRAY_UNIQUE_SYMBOL GPAW_ARRAY_API
#define NO_IMPORT_ARRAY
#include
#include
#include
#include
#include "extensions.h"
#include "mympi.h"
// BLACS
#define BLOCK_CYCLIC_2D 1
#ifdef GPAW_NO_UNDERSCORE_CBLACS
#define Cblacs_barrier_ Cblacs_barrier
#define Cblacs_gridexit_ Cblacs_gridexit
#define Cblacs_gridinfo_ Cblacs_gridinfo
#define Cblacs_gridinit_ Cblacs_gridinit
#define Cblacs_pinfo_ Cblacs_pinfo
#define Csys2blacs_handle_ Csys2blacs_handle
#endif
void Cblacs_barrier_(int ConTxt, char *scope);
void Cblacs_gridexit_(int ConTxt);
void Cblacs_gridinfo_(int ConTxt, int* nprow, int* npcol,
int* myrow, int* mycol);
void Cblacs_gridinit_(int* ConTxt, char* order, int nprow, int npcol);
void Cblacs_pinfo_(int* mypnum, int* nprocs);
int Csys2blacs_handle_(MPI_Comm SysCtxt);
// End of BLACS
// ScaLAPACK
#ifdef GPAW_NO_UNDERSCORE_SCALAPACK
#define numroc_ numroc
#define pdlamch_ pdlamch
#define pdlaset_ pdlaset
#define pzlaset_ pzlaset
#define pdpotrf_ pdpotrf
#define pzpotrf_ pzpotrf
#define pzpotri_ pzpotri
#define pdtrtri_ pdtrtri
#define pztrtri_ pztrtri
#define pzgesv_ pzgesv
#define pdgesv_ pdgesv
#define pdsyevd_ pdsyevd
#define pzheevd_ pzheevd
#define pdsyevx_ pdsyevx
#define pzheevx_ pzheevx
#define pdsygvx_ pdsygvx
#define pzhegvx_ pzhegvx
#define pdsyngst_ pdsyngst
#define pzhengst_ pzhengst
#ifdef GPAW_MR3
#define pdsyevr_ pdsyevr
#define pzheevr_ pzheevr
#endif // GPAW_MR3
#define pdtran_ pdtran
#define pztranc_ pztranc
#define pdgemm_ pdgemm
#define pzgemm_ pzgemm
#define pdgemv_ pdgemv
#define pzgemv_ pzgemv
#define pdsyr2k_ pdsyr2k
#define pzher2k_ pzher2k
#define pdsyrk_ pdsyrk
#define pzherk_ pzherk
#define pdtrsm_ pdtrsm
#define pztrsm_ pztrsm
#define pzhemm_ pzhemm
#define pdsymm_ pdsymm
#endif
#ifdef GPAW_NO_UNDERSCORE_CSCALAPACK
#define Cpdgemr2d_ Cpdgemr2d
#define Cpzgemr2d_ Cpzgemr2d
#define Cpdtrmr2d_ Cpdtrmr2d
#define Cpztrmr2d_ Cpztrmr2d
#endif
// tools
int numroc_(int* n, int* nb, int* iproc, int* isrcproc, int* nprocs);
void Cpdgemr2d_(int m, int n,
double* a, int ia, int ja, int* desca,
double* b, int ib, int jb, int* descb,
int gcontext);
void Cpzgemr2d_(int m, int n,
void* a, int ia, int ja, int* desca,
void* b, int ib, int jb, int* descb,
int gcontext);
void Cpdtrmr2d_(char* uplo, char* diag, int m, int n,
double* a, int ia, int ja, int* desca,
double* b, int ib, int jb, int* descb,
int gcontext);
void Cpztrmr2d_(char* uplo, char* diag, int m, int n,
void* a, int ia, int ja, int* desca,
void* b, int ib, int jb, int* descb,
int gcontext);
double pdlamch_(int* ictxt, char* cmach);
void pzpotri_(char* uplo, int* n, void* a, int *ia, int* ja, int* desca, int* info);
void pzgetri_(int* n, void* a,
int *ia, int* ja, int* desca, int* info);
void pdlaset_(char* uplo, int* m, int* n, double* alpha, double* beta,
double* a, int* ia, int* ja, int* desca);
void pzlaset_(char* uplo, int* m, int* n, void* alpha, void* beta,
void* a, int* ia, int* ja, int* desca);
// cholesky
void pdpotrf_(char* uplo, int* n, double* a,
int* ia, int* ja, int* desca, int* info);
void pzpotrf_(char* uplo, int* n, void* a,
int* ia, int* ja, int* desca, int* info);
void pzgesv_(int* n, int* nrhs, void* a,
int* ia, int* ja, int* desca, int* ipiv,
void* b, int* ib, int* jb, int* descb, int* info);
void pdgesv_(int *n, int *nrhs, void *a,
int *ia, int *ja, int* desca, int *ipiv,
void* b, int* ib, int* jb, int* descb, int* info);
void pdtrtri_(char* uplo, char* diag, int* n, double* a,
int *ia, int* ja, int* desca, int* info);
void pztrtri_(char* uplo, char* diag, int* n, void* a,
int *ia, int* ja, int* desca, int* info);
// diagonalization
void pdsyevd_(char* jobz, char* uplo, int* n,
double* a, int* ia, int* ja, int* desca,
double* w, double* z, int* iz, int* jz,
int* descz, double* work, int* lwork, int* iwork,
int* liwork, int* info);
void pzheevd_(char* jobz, char* uplo, int* n,
void* a, int* ia, int* ja, int* desca,
double* w, void* z, int* iz, int* jz,
int* descz, void* work, int* lwork, double* rwork,
int* lrwork, int* iwork, int* liwork, int* info);
void pdsyevx_(char* jobz, char* range,
char* uplo, int* n,
double* a, int* ia, int* ja, int* desca,
double* vl, double* vu,
int* il, int* iu, double* abstol,
int* m, int* nz, double* w, double* orfac,
double* z, int* iz, int* jz, int* descz,
double* work, int* lwork, int* iwork, int* liwork,
int* ifail, int* iclustr, double* gap, int* info);
void pzheevx_(char* jobz, char* range,
char* uplo, int* n,
void* a, int* ia, int* ja, int* desca,
double* vl, double* vu,
int* il, int* iu, double* abstol,
int* m, int* nz, double* w, double* orfac,
void* z, int* iz, int* jz, int* descz,
void* work, int* lwork, double* rwork, int* lrwork,
int* iwork, int* liwork,
int* ifail, int* iclustr, double* gap, int* info);
void pdsygvx_(int* ibtype, char* jobz, char* range,
char* uplo, int* n,
double* a, int* ia, int* ja, int* desca,
double* b, int *ib, int* jb, int* descb,
double* vl, double* vu,
int* il, int* iu, double* abstol,
int* m, int* nz, double* w, double* orfac,
double* z, int* iz, int* jz, int* descz,
double* work, int* lwork, int* iwork, int* liwork,
int* ifail, int* iclustr, double* gap, int* info);
void pzhegvx_(int* ibtype, char* jobz, char* range,
char* uplo, int* n,
void* a, int* ia, int* ja, int* desca,
void* b, int *ib, int* jb, int* descb,
double* vl, double* vu,
int* il, int* iu, double* abstol,
int* m, int* nz, double* w, double* orfac,
void* z, int* iz, int* jz, int* descz,
void* work, int* lwork, double* rwork, int* lrwork,
int* iwork, int* liwork,
int* ifail, int* iclustr, double* gap, int* info);
void pdsyngst_(int* ibtype, char* uplo, int* n,
double* a, int* ia, int* ja, int* desca,
double* b, int* ib, int* jb, int* descb,
double* scale, double* work, int* lwork, int* info);
void pzhengst_(int* ibtype, char* uplo, int* n,
void* a, int* ia, int* ja, int* desca,
void* b, int* ib, int* jb, int* descb,
double* scale, void* work, int* lwork, int* info);
#ifdef GPAW_MR3
void pdsyevr_(char* jobz, char* range,
char* uplo, int* n,
double* a, int* ia, int* ja, int* desca,
double* vl, double* vu,
int* il, int* iu,
int* m, int* nz, double* w,
double* z, int* iz, int* jz, int* descz,
double* work, int* lwork, int* iwork, int* liwork,
int* info);
void pzheevr_(char* jobz, char* range,
char* uplo, int* n,
void* a, int* ia, int* ja, int* desca,
double* vl, double* vu,
int* il, int* iu,
int* m, int* nz, double* w,
void* z, int* iz, int* jz, int* descz,
void* work, int* lwork, double* rwork, int* lrwork,
int* iwork, int* liwork,
int* info);
#endif // GPAW_MR3
// pblas
void pdtran_(int* m, int* n,
double* alpha,
double* a, int* ia, int* ja, int* desca,
double* beta,
double* c, int* ic, int* jc, int* descc);
void pztranc_(int* m, int* n,
void* alpha,
void* a, int* ia, int* ja, int* desca,
void* beta,
void* c, int* ic, int* jc, int* descc);
void pdgemm_(char* transa, char* transb, int* m, int* n, int* k,
double* alpha,
double* a, int* ia, int* ja, int* desca,
double* b, int* ib, int* jb, int* descb,
double* beta,
double* c, int* ic, int* jc, int* descc);
void pzgemm_(char* transa, char* transb, int* m, int* n, int* k,
void* alpha,
void* a, int* ia, int* ja, int* desca,
void* b, int* ib, int* jb, int* descb,
void* beta,
void* c, int* ic, int* jc, int* descc);
void pzhemm_(char* side, char* uplo, int* m, int* n,
void* alpha,
void* a, int* ia, int* ja, int* desca,
void* b, int* ib, int* jb, int* descb,
void* beta,
void* c, int* ic, int* jc, int* descc);
void pdsymm_(char* side, char* uplo, int* m, int* n,
void* alpha,
void* a, int* ia, int* ja, int* desca,
void* b, int* ib, int* jb, int* descb,
void* beta,
void* c, int* ic, int* jc, int* descc);
void pdgemv_(char* transa, int* m, int* n, double* alpha,
double* a, int* ia, int* ja, int* desca,
double* x, int* ix, int* jx, int* descx, int* incx,
double* beta,
double* y, int* iy, int* jy, int* descy, int* incy);
void pzgemv_(char* transa, int* m, int* n, void* alpha,
void* a, int* ia, int* ja, int* desca,
void* x, int* ix, int* jx, int* descx, int* incx,
void* beta,
void* y, int* iy, int* jy, int* descy, int* incy);
void pdsyr2k_(char* uplo, char* trans, int* n, int* k,
double* alpha,
double* a, int* ia, int* ja, int* desca,
double* b, int* ib, int* jb, int* descb,
double* beta,
double* c, int* ic, int *jc, int* descc);
void pzher2k_(char* uplo, char* trans, int* n, int* k,
void* alpha,
void* a, int* ia, int* ja, int* desca,
void* b, int* ib, int* jb, int* descb,
void* beta,
void* c, int* ic, int* jc, int* descc);
void pdsyrk_(char* uplo, char* trans, int* n, int* k,
double* alpha,
double* a, int* ia, int* ja, int* desca,
double* beta,
double* c, int* ic, int* jc, int* descc);
void pzherk_(char* uplo, char* trans, int* n, int* k,
void* alpha,
void* a, int* ia, int* ja, int* desca,
void* beta,
void* c, int* ic, int* jc, int* descc);
void pdtrsm_(char* side, char* uplo, char* trans, char* diag,
int* m, int *n, double* alpha,
double* a, int* ia, int* ja, int* desca,
double* b, int* ib, int* jb, int* descb);
void pztrsm_(char* side, char* uplo, char* trans, char* diag,
int* m, int *n, void* alpha,
void* a, int* ia, int* ja, int* desca,
void* b, int* ib, int* jb, int* descb);
PyObject* pblas_tran(PyObject *self, PyObject *args)
{
int m, n;
Py_complex alpha;
Py_complex beta;
PyArrayObject *a, *c;
PyArrayObject *desca, *descc;
if (!PyArg_ParseTuple(args, "iiDODOOO", &m, &n, &alpha,
&a, &beta, &c,
&desca, &descc))
return NULL;
int one = 1;
if (PyArray_DESCR(c)->type_num == NPY_DOUBLE)
pdtran_(&m, &n,
&(alpha.real),
DOUBLEP(a), &one, &one, INTP(desca),
&(beta.real),
DOUBLEP(c), &one, &one, INTP(descc));
else
pztranc_(&m, &n,
&alpha,
(void*)PyArray_DATA(a), &one, &one, INTP(desca),
&beta,
(void*)PyArray_DATA(c), &one, &one, INTP(descc));
Py_RETURN_NONE;
}
PyObject* pblas_gemm(PyObject *self, PyObject *args)
{
char* transa;
char* transb;
int m, n, k;
Py_complex alpha;
Py_complex beta;
PyArrayObject *a, *b, *c;
PyArrayObject *desca, *descb, *descc;
int one = 1;
if (!PyArg_ParseTuple(args, "iiiDOODOOOOss", &m, &n, &k, &alpha,
&a, &b, &beta, &c,
&desca, &descb, &descc,
&transa, &transb)) {
return NULL;
}
// cdesc
// int c_ConTxt = INTP(descc)[1];
// If process not on BLACS grid, then return.
// if (c_ConTxt == -1) Py_RETURN_NONE;
if (PyArray_DESCR(c)->type_num == NPY_DOUBLE)
pdgemm_(transa, transb, &m, &n, &k,
&(alpha.real),
DOUBLEP(a), &one, &one, INTP(desca),
DOUBLEP(b), &one, &one, INTP(descb),
&(beta.real),
DOUBLEP(c), &one, &one, INTP(descc));
else
pzgemm_(transa, transb, &m, &n, &k,
&alpha,
(void*)COMPLEXP(a), &one, &one, INTP(desca),
(void*)COMPLEXP(b), &one, &one, INTP(descb),
&beta,
(void*)COMPLEXP(c), &one, &one, INTP(descc));
Py_RETURN_NONE;
}
PyObject* pblas_hemm(PyObject *self, PyObject *args)
{
char* side;
char* uplo;
int m, n;
Py_complex alpha;
Py_complex beta;
PyArrayObject *a, *b, *c;
PyArrayObject *desca, *descb, *descc;
int one = 1;
if (!PyArg_ParseTuple(args, "ssiiDOOdOOOO",
&side, &uplo, &n, &m,
&alpha, &a, &b, &beta,
&c, &desca, &descb, &descc)) {
return NULL;
}
if (PyArray_DESCR(c)->type_num == NPY_DOUBLE) {
pdsymm_(side, uplo, &n, &m, &alpha,
(void*)DOUBLEP(a), &one, &one, INTP(desca),
(void*)DOUBLEP(b), &one, &one, INTP(descb),
&beta,
(void*)DOUBLEP(c), &one, &one, INTP(descc));
} else {
pzhemm_(side, uplo, &n, &m, &alpha,
(void*)COMPLEXP(a), &one, &one, INTP(desca),
(void*)COMPLEXP(b), &one, &one, INTP(descb),
&beta,
(void*)COMPLEXP(c), &one, &one, INTP(descc));
}
Py_RETURN_NONE;
}
PyObject* pblas_gemv(PyObject *self, PyObject *args)
{
char* transa;
int m, n;
Py_complex alpha;
Py_complex beta;
PyArrayObject *a, *x, *y;
int incx = 1, incy = 1; // what should these be?
PyArrayObject *desca, *descx, *descy;
int one = 1;
if (!PyArg_ParseTuple(args, "iiDOODOOOOs",
&m, &n, &alpha,
&a, &x, &beta, &y,
&desca, &descx,
&descy, &transa)) {
return NULL;
}
// ydesc
// int y_ConTxt = INTP(descy)[1];
// If process not on BLACS grid, then return.
// if (y_ConTxt == -1) Py_RETURN_NONE;
if (PyArray_DESCR(y)->type_num == NPY_DOUBLE)
pdgemv_(transa, &m, &n,
&(alpha.real),
DOUBLEP(a), &one, &one, INTP(desca),
DOUBLEP(x), &one, &one, INTP(descx), &incx,
&(beta.real),
DOUBLEP(y), &one, &one, INTP(descy), &incy);
else
pzgemv_(transa, &m, &n,
&alpha,
(void*)COMPLEXP(a), &one, &one, INTP(desca),
(void*)COMPLEXP(x), &one, &one, INTP(descx), &incx,
&beta,
(void*)COMPLEXP(y), &one, &one, INTP(descy), &incy);
Py_RETURN_NONE;
}
PyObject* pblas_r2k(PyObject *self, PyObject *args)
{
char* uplo;
int n, k;
Py_complex alpha;
Py_complex beta;
PyArrayObject *a, *b, *c;
PyArrayObject *desca, *descb, *descc;
int one = 1;
if (!PyArg_ParseTuple(args, "iiDOODOOOOs", &n, &k, &alpha,
&a, &b, &beta, &c,
&desca, &descb, &descc,
&uplo)) {
return NULL;
}
// cdesc
// int c_ConTxt = INTP(descc)[1];
// If process not on BLACS grid, then return.
// if (c_ConTxt == -1) Py_RETURN_NONE;
if (PyArray_DESCR(c)->type_num == NPY_DOUBLE)
pdsyr2k_(uplo, "T", &n, &k,
&(alpha.real),
DOUBLEP(a), &one, &one, INTP(desca),
DOUBLEP(b), &one, &one, INTP(descb),
&(beta.real),
DOUBLEP(c), &one, &one, INTP(descc));
else
pzher2k_(uplo, "C", &n, &k,
&alpha,
(void*)COMPLEXP(a), &one, &one, INTP(desca),
(void*)COMPLEXP(b), &one, &one, INTP(descb),
&beta,
(void*)COMPLEXP(c), &one, &one, INTP(descc));
Py_RETURN_NONE;
}
PyObject* pblas_rk(PyObject *self, PyObject *args)
{
char* uplo;
int n, k;
Py_complex alpha;
Py_complex beta;
PyArrayObject *a, *c;
PyArrayObject *desca, *descc;
int one = 1;
if (!PyArg_ParseTuple(args, "iiDODOOOs", &n, &k, &alpha,
&a, &beta, &c,
&desca, &descc,
&uplo)) {
return NULL;
}
// cdesc
// int c_ConTxt = INTP(descc)[1];
// If process not on BLACS grid, then return.
// if (c_ConTxt == -1) Py_RETURN_NONE;
if (PyArray_DESCR(c)->type_num == NPY_DOUBLE)
pdsyrk_(uplo, "T", &n, &k,
&(alpha.real),
DOUBLEP(a), &one, &one, INTP(desca),
&(beta.real),
DOUBLEP(c), &one, &one, INTP(descc));
else
pzherk_(uplo, "C", &n, &k,
&alpha,
(void*)COMPLEXP(a), &one, &one, INTP(desca),
&beta,
(void*)COMPLEXP(c), &one, &one, INTP(descc));
Py_RETURN_NONE;
}
PyObject* new_blacs_context(PyObject *self, PyObject *args)
{
PyObject* comm_obj;
int nprow, npcol;
int iam, nprocs;
int ConTxt;
char* order;
if (!PyArg_ParseTuple(args, "Oiis", &comm_obj, &nprow, &npcol, &order)){
return NULL;
}
// Create blacs grid on this communicator
MPI_Comm comm = ((MPIObject*)comm_obj)->comm;
// Get my id and nprocs. This is for debugging purposes only
Cblacs_pinfo_(&iam, &nprocs);
MPI_Comm_size(comm, &nprocs);
// Create blacs grid on this communicator continued
ConTxt = Csys2blacs_handle_(comm);
Cblacs_gridinit_(&ConTxt, order, nprow, npcol);
PyObject* returnvalue = Py_BuildValue("i", ConTxt);
return returnvalue;
}
PyObject* get_blacs_gridinfo(PyObject *self, PyObject *args)
{
int ConTxt, nprow, npcol;
int myrow, mycol;
if (!PyArg_ParseTuple(args, "iii", &ConTxt, &nprow, &npcol)) {
return NULL;
}
Cblacs_gridinfo_(ConTxt, &nprow, &npcol, &myrow, &mycol);
return Py_BuildValue("(ii)", myrow, mycol);
}
PyObject* get_blacs_local_shape(PyObject *self, PyObject *args)
{
int ConTxt;
int m, n, mb, nb, rsrc, csrc;
int nprow, npcol, myrow, mycol;
int locM, locN;
if (!PyArg_ParseTuple(args, "iiiiiii", &ConTxt, &m, &n, &mb,
&nb, &rsrc, &csrc)){
return NULL;
}
Cblacs_gridinfo_(ConTxt, &nprow, &npcol, &myrow, &mycol);
locM = numroc_(&m, &mb, &myrow, &rsrc, &nprow);
locN = numroc_(&n, &nb, &mycol, &csrc, &npcol);
return Py_BuildValue("(ii)", locM, locN);
}
PyObject* blacs_destroy(PyObject *self, PyObject *args)
{
int ConTxt;
if (!PyArg_ParseTuple(args, "i", &ConTxt))
return NULL;
Cblacs_gridexit_(ConTxt);
Py_RETURN_NONE;
}
PyObject* scalapack_set(PyObject *self, PyObject *args)
{
PyArrayObject* a; // matrix;
PyArrayObject* desca; // descriptor
Py_complex alpha;
Py_complex beta;
int m, n;
int ia, ja;
char* uplo;
if (!PyArg_ParseTuple(args, "OODDsiiii", &a, &desca,
&alpha, &beta, &uplo,
&m, &n, &ia, &ja))
return NULL;
if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
pdlaset_(uplo, &m, &n, &(alpha.real), &(beta.real), DOUBLEP(a),
&ia, &ja, INTP(desca));
else
pzlaset_(uplo, &m, &n, &alpha, &beta, (void*)COMPLEXP(a),
&ia, &ja, INTP(desca));
Py_RETURN_NONE;
}
PyObject* scalapack_redist(PyObject *self, PyObject *args)
{
PyArrayObject* a; // source matrix
PyArrayObject* b; // destination matrix
PyArrayObject* desca; // source descriptor
PyArrayObject* descb; // destination descriptor
char* uplo;
char diag='N'; // copy the diagonal
int c_ConTxt;
int m;
int n;
int ia, ja, ib, jb;
if (!PyArg_ParseTuple(args, "OOOOiiiiiiis",
&desca, &descb,
&a, &b,
&m, &n,
&ia, &ja,
&ib, &jb,
&c_ConTxt,
&uplo))
return NULL;
if (*uplo == 'G') // General matrix
{
if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
Cpdgemr2d_(m, n,
DOUBLEP(a), ia, ja, INTP(desca),
DOUBLEP(b), ib, jb, INTP(descb),
c_ConTxt);
else
Cpzgemr2d_(m, n,
(void*)COMPLEXP(a), ia, ja, INTP(desca),
(void*)COMPLEXP(b), ib, jb, INTP(descb),
c_ConTxt);
}
else // Trapezoidal matrix
{
if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
Cpdtrmr2d_(uplo, &diag, m, n,
DOUBLEP(a), ia, ja, INTP(desca),
DOUBLEP(b), ib, jb, INTP(descb),
c_ConTxt);
else
Cpztrmr2d_(uplo, &diag, m, n,
(void*)COMPLEXP(a), ia, ja, INTP(desca),
(void*)COMPLEXP(b), ib, jb, INTP(descb),
c_ConTxt);
}
Py_RETURN_NONE;
}
PyObject* scalapack_diagonalize_dc(PyObject *self, PyObject *args)
{
// Standard driver for divide and conquer algorithm
// Computes all eigenvalues and eigenvectors
PyArrayObject* a; // symmetric matrix
PyArrayObject* desca; // symmetric matrix description vector
PyArrayObject* z; // eigenvector matrix
PyArrayObject* w; // eigenvalue array
int one = 1;
char jobz = 'V'; // eigenvectors also
char* uplo;
if (!PyArg_ParseTuple(args, "OOsOO", &a, &desca, &uplo, &z, &w))
return NULL;
// adesc
// int a_ConTxt = INTP(desca)[1];
int a_m = INTP(desca)[2];
int a_n = INTP(desca)[3];
// zdesc = adesc; this can be relaxed a bit according to pdsyevd.f
// Only square matrices
assert (a_m == a_n);
int n = a_n;
// If process not on BLACS grid, then return.
// if (a_ConTxt == -1) Py_RETURN_NONE;
// Query part, need to find the optimal size of a number of work arrays
int info;
int querywork = -1;
int* iwork;
int liwork;
int lwork;
int lrwork;
int i_work;
double d_work;
double_complex c_work;
if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
{
pdsyevd_(&jobz, uplo, &n,
DOUBLEP(a), &one, &one, INTP(desca),
DOUBLEP(w),
DOUBLEP(z), &one, &one, INTP(desca),
&d_work, &querywork, &i_work, &querywork, &info);
lwork = (int)(d_work);
}
else
{
pzheevd_(&jobz, uplo, &n,
(void*)COMPLEXP(a), &one, &one, INTP(desca),
DOUBLEP(w),
(void*)COMPLEXP(z), &one, &one, INTP(desca),
(void*)&c_work, &querywork, &d_work, &querywork,
&i_work, &querywork, &info);
lwork = (int)(c_work);
lrwork = (int)(d_work);
}
if (info != 0)
{
PyErr_SetString(PyExc_RuntimeError,
"scalapack_diagonalize_dc error in query.");
return NULL;
}
// Computation part
liwork = i_work;
iwork = GPAW_MALLOC(int, liwork);
if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
{
double* work = GPAW_MALLOC(double, lwork);
pdsyevd_(&jobz, uplo, &n,
DOUBLEP(a), &one, &one, INTP(desca),
DOUBLEP(w),
DOUBLEP(z), &one, &one, INTP(desca),
work, &lwork, iwork, &liwork, &info);
free(work);
}
else
{
double_complex *work = GPAW_MALLOC(double_complex, lwork);
double* rwork = GPAW_MALLOC(double, lrwork);
pzheevd_(&jobz, uplo, &n,
(void*)COMPLEXP(a), &one, &one, INTP(desca),
DOUBLEP(w),
(void*)COMPLEXP(z), &one, &one, INTP(desca),
(void*)work, &lwork, rwork, &lrwork,
iwork, &liwork, &info);
free(rwork);
free(work);
}
free(iwork);
PyObject* returnvalue = Py_BuildValue("i", info);
return returnvalue;
}
PyObject* scalapack_diagonalize_ex(PyObject *self, PyObject *args)
{
// Standard driver for bisection and inverse iteration algorithm
// Computes 'iu' eigenvalues and eigenvectors
PyArrayObject* a; // Hamiltonian matrix
PyArrayObject* desca; // Hamintonian matrix descriptor
PyArrayObject* z; // eigenvector matrix
PyArrayObject* w; // eigenvalue array
int a_mycol = -1;
int a_myrow = -1;
int a_nprow, a_npcol;
int il = 1; // not used when range = 'A' or 'V'
int iu;
int eigvalm, nz;
int one = 1;
double vl, vu; // not used when range = 'A' or 'I'
char jobz = 'V'; // eigenvectors also
char range = 'I'; // eigenvalues il-th through iu-th
char* uplo;
if (!PyArg_ParseTuple(args, "OOsiOO", &a, &desca, &uplo, &iu,
&z, &w))
return NULL;
// a desc
int a_ConTxt = INTP(desca)[1];
int a_m = INTP(desca)[2];
int a_n = INTP(desca)[3];
// Only square matrices
assert (a_m == a_n);
int n = a_n;
// zdesc = adesc = bdesc; required by pdsyevx.f
// If process not on BLACS grid, then return.
// if (a_ConTxt == -1) Py_RETURN_NONE;
Cblacs_gridinfo_(a_ConTxt, &a_nprow, &a_npcol, &a_myrow, &a_mycol);
// Convergence tolerance
double abstol = 1.0e-8;
// char cmach = 'U'; // most orthogonal eigenvectors
// char cmach = 'S'; // most acccurate eigenvalues
// double abstol = pdlamch_(&a_ConTxt, &cmach); // most orthogonal eigenvectors
// double abstol = 2.0*pdlamch_(&a_ConTxt, &cmach); // most accurate eigenvalues
double orfac = -1.0;
// Query part, need to find the optimal size of a number of work arrays
int info;
int *ifail;
ifail = GPAW_MALLOC(int, n);
int *iclustr;
iclustr = GPAW_MALLOC(int, 2*a_nprow*a_npcol);
double *gap;
gap = GPAW_MALLOC(double, a_nprow*a_npcol);
int querywork = -1;
int* iwork;
int liwork;
int lwork; // workspace size must be at least 3
int lrwork; // workspace size must be at least 3
int i_work;
double d_work[3];
double_complex c_work;
if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
{
pdsyevx_(&jobz, &range, uplo, &n,
DOUBLEP(a), &one, &one, INTP(desca),
&vl, &vu, &il, &iu, &abstol, &eigvalm,
&nz, DOUBLEP(w), &orfac,
DOUBLEP(z), &one, &one, INTP(desca),
d_work, &querywork, &i_work, &querywork,
ifail, iclustr, gap, &info);
lwork = MAX(3, (int)(d_work[0]));
}
else
{
pzheevx_(&jobz, &range, uplo, &n,
(void*)COMPLEXP(a), &one, &one, INTP(desca),
&vl, &vu, &il, &iu, &abstol, &eigvalm,
&nz, DOUBLEP(w), &orfac,
(void*)COMPLEXP(z), &one, &one, INTP(desca),
(void*)&c_work, &querywork, d_work, &querywork,
&i_work, &querywork,
ifail, iclustr, gap, &info);
lwork = MAX(3, (int)(c_work));
lrwork = MAX(3, (int)(d_work[0]));
}
if (info != 0) {
printf ("info = %d", info);
PyErr_SetString(PyExc_RuntimeError,
"scalapack_diagonalize_ex error in query.");
return NULL;
}
// Computation part
// lwork = lwork + (n-1)*n; // this is a ridiculous amount of workspace
liwork = i_work;
iwork = GPAW_MALLOC(int, liwork);
if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
{
double* work = GPAW_MALLOC(double, lwork);
pdsyevx_(&jobz, &range, uplo, &n,
DOUBLEP(a), &one, &one, INTP(desca),
&vl, &vu, &il, &iu, &abstol, &eigvalm,
&nz, DOUBLEP(w), &orfac,
DOUBLEP(z), &one, &one, INTP(desca),
work, &lwork, iwork, &liwork,
ifail, iclustr, gap, &info);
free(work);
}
else
{
double_complex* work = GPAW_MALLOC(double_complex, lwork);
double* rwork = GPAW_MALLOC(double, lrwork);
pzheevx_(&jobz, &range, uplo, &n,
(void*)COMPLEXP(a), &one, &one, INTP(desca),
&vl, &vu, &il, &iu, &abstol, &eigvalm,
&nz, DOUBLEP(w), &orfac,
(void*)COMPLEXP(z), &one, &one, INTP(desca),
(void*)work, &lwork, rwork, &lrwork,
iwork, &liwork,
ifail, iclustr, gap, &info);
free(rwork);
free(work);
}
free(iwork);
free(gap);
free(iclustr);
free(ifail);
// If this fails, fewer eigenvalues than requested were computed.
assert (eigvalm == iu);
PyObject* returnvalue = Py_BuildValue("i", info);
return returnvalue;
}
#ifdef GPAW_MR3
PyObject* scalapack_diagonalize_mr3(PyObject *self, PyObject *args)
{
// Standard driver for MRRR algorithm
// Computes 'iu' eigenvalues and eigenvectors
// http://icl.cs.utk.edu/lapack-forum/archives/scalapack/msg00159.html
PyArrayObject* a; // Hamiltonian matrix
PyArrayObject* desca; // Hamintonian matrix descriptor
PyArrayObject* z; // eigenvector matrix
PyArrayObject* w; // eigenvalue array
int il = 1; // not used when range = 'A' or 'V'
int iu;
int eigvalm, nz;
int one = 1;
double vl, vu; // not used when range = 'A' or 'I'
char jobz = 'V'; // eigenvectors also
char range = 'I'; // eigenvalues il-th through iu-th
char* uplo;
if (!PyArg_ParseTuple(args, "OOsiOO", &a, &desca, &uplo, &iu,
&z, &w))
return NULL;
// a desc
// int a_ConTxt = INTP(desca)[1];
int a_m = INTP(desca)[2];
int a_n = INTP(desca)[3];
// Only square matrices
assert (a_m == a_n);
int n = a_n;
// zdesc = adesc = bdesc; required by pdsyevx.f
// If process not on BLACS grid, then return.
// if (a_ConTxt == -1) Py_RETURN_NONE;
// Query part, need to find the optimal size of a number of work arrays
int info;
int querywork = -1;
int* iwork;
int liwork;
int lwork;
int lrwork;
int i_work;
double d_work[3];
double_complex c_work;
if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
{
pdsyevr_(&jobz, &range, uplo, &n,
DOUBLEP(a), &one, &one, INTP(desca),
&vl, &vu, &il, &iu, &eigvalm,
&nz, DOUBLEP(w),
DOUBLEP(z), &one, &one, INTP(desca),
d_work, &querywork, &i_work, &querywork,
&info);
lwork = (int)(d_work[0]);
}
else
{
pzheevr_(&jobz, &range, uplo, &n,
(void*)COMPLEXP(a), &one, &one, INTP(desca),
&vl, &vu, &il, &iu, &eigvalm,
&nz, DOUBLEP(w),
(void*)COMPLEXP(z), &one, &one, INTP(desca),
(void*)&c_work, &querywork, d_work, &querywork,
&i_work, &querywork,
&info);
lwork = (int)(c_work);
lrwork = (int)(d_work[0]);
}
if (info != 0) {
printf ("info = %d", info);
PyErr_SetString(PyExc_RuntimeError,
"scalapack_diagonalize_evr error in query.");
return NULL;
}
// Computation part
liwork = i_work;
iwork = GPAW_MALLOC(int, liwork);
if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
{
double* work = GPAW_MALLOC(double, lwork);
pdsyevr_(&jobz, &range, uplo, &n,
DOUBLEP(a), &one, &one, INTP(desca),
&vl, &vu, &il, &iu, &eigvalm,
&nz, DOUBLEP(w),
DOUBLEP(z), &one, &one, INTP(desca),
work, &lwork, iwork, &liwork,
&info);
free(work);
}
else
{
double_complex* work = GPAW_MALLOC(double_complex, lwork);
double* rwork = GPAW_MALLOC(double, lrwork);
pzheevr_(&jobz, &range, uplo, &n,
(void*)COMPLEXP(a), &one, &one, INTP(desca),
&vl, &vu, &il, &iu, &eigvalm,
&nz, DOUBLEP(w),
(void*)COMPLEXP(z), &one, &one, INTP(desca),
(void*)work, &lwork, rwork, &lrwork,
iwork, &liwork,
&info);
free(rwork);
free(work);
}
free(iwork);
// If this fails, fewer eigenvalues than requested were computed.
assert (eigvalm == iu);
PyObject* returnvalue = Py_BuildValue("i", info);
return returnvalue;
}
#endif
PyObject* scalapack_general_diagonalize_dc(PyObject *self, PyObject *args)
{
// General driver for divide and conquer algorithm
// Computes *all* eigenvalues and eigenvectors
PyArrayObject* a; // Hamiltonian matrix
PyArrayObject* b; // overlap matrix
PyArrayObject* desca; // Hamintonian matrix descriptor
PyArrayObject* z; // eigenvector matrix
PyArrayObject* w; // eigenvalue array
int ibtype = 1; // Solve H*psi = lambda*S*psi
int one = 1;
char jobz = 'V'; // eigenvectors also
char* uplo;
double scale;
if (!PyArg_ParseTuple(args, "OOsOOO", &a, &desca, &uplo,
&b, &z, &w))
return NULL;
// a desc
// int a_ConTxt = INTP(desca)[1];
int a_m = INTP(desca)[2];
int a_n = INTP(desca)[3];
// Only square matrices
assert (a_m == a_n);
int n = a_n;
// zdesc = adesc = bdesc can be relaxed a bit according to pdsyevd.f
// If process not on BLACS grid, then return.
// if (a_ConTxt == -1) Py_RETURN_NONE;
// Cholesky Decomposition
int info;
if (PyArray_DESCR(b)->type_num == NPY_DOUBLE)
pdpotrf_(uplo, &n, DOUBLEP(b), &one, &one, INTP(desca), &info);
else
pzpotrf_(uplo, &n, (void*)COMPLEXP(b), &one, &one, INTP(desca), &info);
if (info != 0)
{
PyErr_SetString(PyExc_RuntimeError,
"scalapack_general_diagonalize_dc error in Cholesky.");
return NULL;
}
// Query variables
int querywork = -1;
int* iwork;
int liwork;
int lwork;
int lrwork;
int i_work;
double d_work;
double_complex c_work;
// NGST Query
if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
{
pdsyngst_(&ibtype, uplo, &n,
DOUBLEP(a), &one, &one, INTP(desca),
DOUBLEP(b), &one, &one, INTP(desca),
&scale, &d_work, &querywork, &info);
lwork = (int)(d_work);
}
else
{
pzhengst_(&ibtype, uplo, &n,
(void*)COMPLEXP(a), &one, &one, INTP(desca),
(void*)COMPLEXP(b), &one, &one, INTP(desca),
&scale, (void*)&c_work, &querywork, &info);
lwork = (int)(c_work);
}
if (info != 0) {
PyErr_SetString(PyExc_RuntimeError,
"scalapack_general_diagonalize_dc error in NGST query.");
return NULL;
}
// NGST Compute
if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
{
double* work = GPAW_MALLOC(double, lwork);
pdsyngst_(&ibtype, uplo, &n,
DOUBLEP(a), &one, &one, INTP(desca),
DOUBLEP(b), &one, &one, INTP(desca),
&scale, work, &lwork, &info);
free(work);
}
else
{
double_complex* work = GPAW_MALLOC(double_complex, lwork);
pzhengst_(&ibtype, uplo, &n,
(void*)COMPLEXP(a), &one, &one, INTP(desca),
(void*)COMPLEXP(b), &one, &one, INTP(desca),
&scale, (void*)work, &lwork, &info);
free(work);
}
if (info != 0) {
PyErr_SetString(PyExc_RuntimeError,
"scalapack_general_diagonalize_dc error in NGST compute.");
return NULL;
}
// NOTE: Scale is always equal to 1.0 above. In future version of ScaLAPACK, we
// may need to rescale eigenvalues by scale. This can be accomplised by using
// the BLAS1 d/zscal. See pdsygvx.f
// EVD Query
if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
{
pdsyevd_(&jobz, uplo, &n,
DOUBLEP(a), &one, &one, INTP(desca),
DOUBLEP(w),
DOUBLEP(z), &one, &one, INTP(desca),
&d_work, &querywork, &i_work, &querywork, &info);
lwork = (int)(d_work);
}
else
{
pzheevd_(&jobz, uplo, &n,
(void*)COMPLEXP(a), &one, &one, INTP(desca),
DOUBLEP(w),
(void*)COMPLEXP(z), &one, &one, INTP(desca),
(void*)&c_work, &querywork, &d_work, &querywork,
&i_work, &querywork, &info);
lwork = (int)(c_work);
lrwork = (int)(d_work);
}
if (info != 0)
{
PyErr_SetString(PyExc_RuntimeError,
"scalapack_general_diagonalize_dc error in EVD query.");
return NULL;
}
// EVD Computation
liwork = i_work;
iwork = GPAW_MALLOC(int, liwork);
if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
{
double* work = GPAW_MALLOC(double, lwork);
pdsyevd_(&jobz, uplo, &n,
DOUBLEP(a), &one, &one, INTP(desca),
DOUBLEP(w),
DOUBLEP(z), &one, &one, INTP(desca),
work, &lwork, iwork, &liwork, &info);
free(work);
}
else
{
double_complex *work = GPAW_MALLOC(double_complex, lwork);
double* rwork = GPAW_MALLOC(double, lrwork);
pzheevd_(&jobz, uplo, &n,
(void*)COMPLEXP(a), &one, &one, INTP(desca),
DOUBLEP(w),
(void*)COMPLEXP(z), &one, &one, INTP(desca),
(void*)work, &lwork, rwork, &lrwork,
iwork, &liwork, &info);
free(rwork);
free(work);
}
free(iwork);
// Backtransformation to the original problem
char trans;
double d_one = 1.0;
double_complex c_one = 1.0;
if (*uplo == 'U')
trans = 'N';
else
trans = 'T';
if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
pdtrsm_("L", uplo, &trans, "N", &n, &n, &d_one,
DOUBLEP(b), &one, &one, INTP(desca),
DOUBLEP(z), &one, &one, INTP(desca));
else
pztrsm_("L", uplo, &trans, "N", &n, &n, (void*)&c_one,
(void*)COMPLEXP(b), &one, &one, INTP(desca),
(void*)COMPLEXP(z), &one, &one, INTP(desca));
PyObject* returnvalue = Py_BuildValue("i", info);
return returnvalue;
}
PyObject* scalapack_general_diagonalize_ex(PyObject *self, PyObject *args)
{
// General driver for bisection and inverse iteration algorithm
// Computes 'iu' eigenvalues and eigenvectors
PyArrayObject* a; // Hamiltonian matrix
PyArrayObject* b; // overlap matrix
PyArrayObject* desca; // Hamintonian matrix descriptor
PyArrayObject* z; // eigenvector matrix
PyArrayObject* w; // eigenvalue array
int ibtype = 1; // Solve H*psi = lambda*S*psi
int a_mycol = -1;
int a_myrow = -1;
int a_nprow, a_npcol;
int il = 1; // not used when range = 'A' or 'V'
int iu; //
int eigvalm, nz;
int one = 1;
double vl, vu; // not used when range = 'A' or 'I'
char jobz = 'V'; // eigenvectors also
char range = 'I'; // eigenvalues il-th through iu-th
char* uplo;
if (!PyArg_ParseTuple(args, "OOsiOOO", &a, &desca, &uplo, &iu,
&b, &z, &w))
return NULL;
// a desc
int a_ConTxt = INTP(desca)[1];
int a_m = INTP(desca)[2];
int a_n = INTP(desca)[3];
// Only square matrices
assert (a_m == a_n);
int n = a_n;
// zdesc = adesc = bdesc; required by pdsygvx.f
// If process not on BLACS grid, then return.
// if (a_ConTxt == -1) Py_RETURN_NONE;
Cblacs_gridinfo_(a_ConTxt, &a_nprow, &a_npcol, &a_myrow, &a_mycol);
// Convergence tolerance
double abstol = 1.0e-8;
// char cmach = 'U'; // most orthogonal eigenvectors
// char cmach = 'S'; // most acccurate eigenvalues
// double abstol = pdlamch_(&a_ConTxt, &cmach); // most orthogonal eigenvectors
// double abstol = 2.0*pdlamch_(&a_ConTxt, &cmach); // most accurate eigenvalues
double orfac = -1.0;
// Query part, need to find the optimal size of a number of work arrays
int info;
int *ifail;
ifail = GPAW_MALLOC(int, n);
int *iclustr;
iclustr = GPAW_MALLOC(int, 2*a_nprow*a_npcol);
double *gap;
gap = GPAW_MALLOC(double, a_nprow*a_npcol);
int querywork = -1;
int* iwork;
int liwork;
int lwork; // workspace size must be at least 3
int lrwork; // workspace size must be at least 3
int i_work;
double d_work[3];
double_complex c_work;
if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
{
pdsygvx_(&ibtype, &jobz, &range, uplo, &n,
DOUBLEP(a), &one, &one, INTP(desca),
DOUBLEP(b), &one, &one, INTP(desca),
&vl, &vu, &il, &iu, &abstol, &eigvalm,
&nz, DOUBLEP(w), &orfac,
DOUBLEP(z), &one, &one, INTP(desca),
d_work, &querywork, &i_work, &querywork,
ifail, iclustr, gap, &info);
lwork = MAX(3, (int)(d_work[0]));
}
else
{
pzhegvx_(&ibtype, &jobz, &range, uplo, &n,
(void*)COMPLEXP(a), &one, &one, INTP(desca),
(void*)COMPLEXP(b), &one, &one, INTP(desca),
&vl, &vu, &il, &iu, &abstol, &eigvalm,
&nz, DOUBLEP(w), &orfac,
(void*)COMPLEXP(z), &one, &one, INTP(desca),
(void*)&c_work, &querywork, d_work, &querywork,
&i_work, &querywork,
ifail, iclustr, gap, &info);
lwork = MAX(3, (int)(c_work));
lrwork = MAX(3, (int)(d_work[0]));
}
if (info != 0) {
PyErr_SetString(PyExc_RuntimeError,
"scalapack_general_diagonalize_ex error in query.");
return NULL;
}
// Computation part
// lwork = lwork + (n-1)*n; // this is a ridiculous amount of workspace
liwork = i_work;
iwork = GPAW_MALLOC(int, liwork);
if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
{
double* work = GPAW_MALLOC(double, lwork);
pdsygvx_(&ibtype, &jobz, &range, uplo, &n,
DOUBLEP(a), &one, &one, INTP(desca),
DOUBLEP(b), &one, &one, INTP(desca),
&vl, &vu, &il, &iu, &abstol, &eigvalm,
&nz, DOUBLEP(w), &orfac,
DOUBLEP(z), &one, &one, INTP(desca),
work, &lwork, iwork, &liwork,
ifail, iclustr, gap, &info);
free(work);
}
else
{
double_complex* work = GPAW_MALLOC(double_complex, lwork);
double* rwork = GPAW_MALLOC(double, lrwork);
pzhegvx_(&ibtype, &jobz, &range, uplo, &n,
(void*)COMPLEXP(a), &one, &one, INTP(desca),
(void*)COMPLEXP(b), &one, &one, INTP(desca),
&vl, &vu, &il, &iu, &abstol, &eigvalm,
&nz, DOUBLEP(w), &orfac,
(void*)COMPLEXP(z), &one, &one, INTP(desca),
(void*)work, &lwork, rwork, &lrwork,
iwork, &liwork,
ifail, iclustr, gap, &info);
free(rwork);
free(work);
}
free(iwork);
free(gap);
free(iclustr);
free(ifail);
// If this fails, fewer eigenvalues than requested were computed.
assert (eigvalm == iu);
PyObject* returnvalue = Py_BuildValue("i", info);
return returnvalue;
}
#ifdef GPAW_MR3
PyObject* scalapack_general_diagonalize_mr3(PyObject *self, PyObject *args)
{
// General driver for MRRR algorithm
// Computes 'iu' eigenvalues and eigenvectors
// http://icl.cs.utk.edu/lapack-forum/archives/scalapack/msg00159.html
PyArrayObject* a; // Hamiltonian matrix
PyArrayObject* b; // overlap matrix
PyArrayObject* desca; // Hamintonian matrix descriptor
PyArrayObject* z; // eigenvector matrix
PyArrayObject* w; // eigenvalue array
int ibtype = 1; // Solve H*psi = lambda*S*psi
int il = 1; // not used when range = 'A' or 'V'
int iu;
int eigvalm, nz;
int one = 1;
double vl, vu; // not used when range = 'A' or 'I'
char jobz = 'V'; // eigenvectors also
char range = 'I'; // eigenvalues il-th through iu-th
char* uplo;
double scale;
if (!PyArg_ParseTuple(args, "OOsiOOO", &a, &desca, &uplo, &iu,
&b, &z, &w))
return NULL;
// a desc
// int a_ConTxt = INTP(desca)[1];
int a_m = INTP(desca)[2];
int a_n = INTP(desca)[3];
// Only square matrices
assert (a_m == a_n);
int n = a_n;
// zdesc = adesc = bdesc can be relaxed a bit according to pdsyevd.f
// If process not on BLACS grid, then return.
// if (a_ConTxt == -1) Py_RETURN_NONE;
// Cholesky Decomposition
int info;
if (PyArray_DESCR(b)->type_num == NPY_DOUBLE)
pdpotrf_(uplo, &n, DOUBLEP(b), &one, &one, INTP(desca), &info);
else
pzpotrf_(uplo, &n, (void*)COMPLEXP(b), &one, &one, INTP(desca), &info);
if (info != 0)
{
PyErr_SetString(PyExc_RuntimeError,
"scalapack_general_diagonalize_mr3 error in Cholesky.");
return NULL;
}
// Query variables
int querywork = -1;
int* iwork;
int liwork;
int lwork;
int lrwork;
int i_work;
double d_work[3];
double_complex c_work;
// NGST Query
if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
{
pdsyngst_(&ibtype, uplo, &n,
DOUBLEP(a), &one, &one, INTP(desca),
DOUBLEP(b), &one, &one, INTP(desca),
&scale, d_work, &querywork, &info);
lwork = (int)(d_work[0]);
}
else
{
pzhengst_(&ibtype, uplo, &n,
(void*)COMPLEXP(a), &one, &one, INTP(desca),
(void*)COMPLEXP(b), &one, &one, INTP(desca),
&scale, (void*)&c_work, &querywork, &info);
lwork = (int)(c_work);
}
if (info != 0) {
PyErr_SetString(PyExc_RuntimeError,
"scalapack_general_diagonalize_mr3 error in NGST query.");
return NULL;
}
// NGST Compute
if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
{
double* work = GPAW_MALLOC(double, lwork);
pdsyngst_(&ibtype, uplo, &n,
DOUBLEP(a), &one, &one, INTP(desca),
DOUBLEP(b), &one, &one, INTP(desca),
&scale, work, &lwork, &info);
free(work);
}
else
{
double_complex* work = GPAW_MALLOC(double_complex, lwork);
pzhengst_(&ibtype, uplo, &n,
(void*)COMPLEXP(a), &one, &one, INTP(desca),
(void*)COMPLEXP(b), &one, &one, INTP(desca),
&scale, (void*)work, &lwork, &info);
free(work);
}
if (info != 0) {
PyErr_SetString(PyExc_RuntimeError,
"scalapack_general_diagonalize_mr3 error in NGST compute.");
return NULL;
}
// NOTE: Scale is always equal to 1.0 above. In future version of ScaLAPACK, we
// may need to rescale eigenvalues by scale. This can be accomplised by using
// the BLAS1 d/zscal. See pdsygvx.f
// EVR Query
if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
{
pdsyevr_(&jobz, &range, uplo, &n,
DOUBLEP(a), &one, &one, INTP(desca),
&vl, &vu, &il, &iu, &eigvalm,
&nz, DOUBLEP(w),
DOUBLEP(z), &one, &one, INTP(desca),
d_work, &querywork, &i_work, &querywork,
&info);
lwork = (int)(d_work[0]);
}
else
{
pzheevr_(&jobz, &range, uplo, &n,
(void*)COMPLEXP(a), &one, &one, INTP(desca),
&vl, &vu, &il, &iu, &eigvalm,
&nz, DOUBLEP(w),
(void*)COMPLEXP(z), &one, &one, INTP(desca),
(void*)&c_work, &querywork, d_work, &querywork,
&i_work, &querywork,
&info);
lwork = (int)(c_work);
lrwork = (int)(d_work[0]);
}
if (info != 0) {
printf ("info = %d", info);
PyErr_SetString(PyExc_RuntimeError,
"scalapack_general_diagonalize_evr error in query.");
return NULL;
}
// EVR Computation
liwork = i_work;
iwork = GPAW_MALLOC(int, liwork);
if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
{
double* work = GPAW_MALLOC(double, lwork);
pdsyevr_(&jobz, &range, uplo, &n,
DOUBLEP(a), &one, &one, INTP(desca),
&vl, &vu, &il, &iu, &eigvalm,
&nz, DOUBLEP(w),
DOUBLEP(z), &one, &one, INTP(desca),
work, &lwork, iwork, &liwork,
&info);
free(work);
}
else
{
double_complex* work = GPAW_MALLOC(double_complex, lwork);
double* rwork = GPAW_MALLOC(double, lrwork);
pzheevr_(&jobz, &range, uplo, &n,
(void*)COMPLEXP(a), &one, &one, INTP(desca),
&vl, &vu, &il, &iu, &eigvalm,
&nz, DOUBLEP(w),
(void*)COMPLEXP(z), &one, &one, INTP(desca),
(void*)work, &lwork, rwork, &lrwork,
iwork, &liwork,
&info);
free(rwork);
free(work);
}
free(iwork);
// Backtransformation to the original problem
char trans;
double d_one = 1.0;
double_complex c_one = 1.0;
if (*uplo == 'U')
trans = 'N';
else
trans = 'T';
if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
pdtrsm_("L", uplo, &trans, "N", &n, &n, &d_one,
DOUBLEP(b), &one, &one, INTP(desca),
DOUBLEP(z), &one, &one, INTP(desca));
else
pztrsm_("L", uplo, &trans, "N", &n, &n, (void*)&c_one,
(void*)COMPLEXP(b), &one, &one, INTP(desca),
(void*)COMPLEXP(z), &one, &one, INTP(desca));
// If this fails, fewer eigenvalues than requested were computed.
assert (eigvalm == iu);
PyObject* returnvalue = Py_BuildValue("i", info);
return returnvalue;
}
#endif
PyObject* scalapack_inverse_cholesky(PyObject *self, PyObject *args)
{
// Cholesky plus inverse of triangular matrix
PyArrayObject* a; // overlap matrix
PyArrayObject* desca; // symmetric matrix description vector
int info;
double d_zero = 0.0;
double_complex c_zero = 0.0;
int one = 1;
int two = 2;
char diag = 'N'; // non-unit triangular
char* uplo;
if (!PyArg_ParseTuple(args, "OOs", &a, &desca, &uplo))
return NULL;
// adesc
// int a_ConTxt = INTP(desca)[1];
int a_m = INTP(desca)[2];
int a_n = INTP(desca)[3];
// Only square matrices
assert (a_m == a_n);
int n = a_n;
int p = a_n - 1;
// If process not on BLACS grid, then return.
// if (a_ConTxt == -1) Py_RETURN_NONE;
if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
{
pdpotrf_(uplo, &n, DOUBLEP(a), &one, &one,
INTP(desca), &info);
if (info == 0)
{
pdtrtri_(uplo, &diag, &n, DOUBLEP(a), &one, &one,
INTP(desca), &info);
if (*uplo == 'L')
pdlaset_("U", &p, &p, &d_zero, &d_zero, DOUBLEP(a),
&one, &two, INTP(desca));
else
pdlaset_("L", &p, &p, &d_zero, &d_zero, DOUBLEP(a),
&two, &one, INTP(desca));
}
}
else
{
pzpotrf_(uplo, &n, (void*)COMPLEXP(a), &one, &one,
INTP(desca), &info);
if (info == 0)
{
pztrtri_(uplo, &diag, &n, (void*)COMPLEXP(a), &one, &one,
INTP(desca), &info);
if (*uplo == 'L')
pzlaset_("U", &p, &p, (void*)&c_zero, (void*)&c_zero,
(void*)COMPLEXP(a), &one, &two, INTP(desca));
else
pzlaset_("L", &p, &p, (void*)&c_zero, (void*)&c_zero,
(void*)COMPLEXP(a), &two, &one, INTP(desca));
}
}
PyObject* returnvalue = Py_BuildValue("i", info);
return returnvalue;
}
PyObject* scalapack_inverse(PyObject *self, PyObject *args)
{
// Inverse of an hermitean matrix
PyArrayObject* a; // Matrix
PyArrayObject* desca; // Matrix description vector
char* uplo;
int info;
int one = 1;
if (!PyArg_ParseTuple(args, "OOs", &a, &desca, &uplo))
return NULL;
int a_m = INTP(desca)[2];
int a_n = INTP(desca)[3];
// Only square matrices
assert (a_m == a_n);
int n = a_n;
if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
{
assert(1==-1); // No double version implemented
}
else
{
pzpotrf_(uplo, &n, (void*)COMPLEXP(a), &one, &one, INTP(desca), &info);
if (info == 0)
{
pzpotri_(uplo, &n, (void*)COMPLEXP(a), &one, &one, INTP(desca), &info);
}
}
PyObject* returnvalue = Py_BuildValue("i", info);
return returnvalue;
}
/*
PyObject* scalapack_solve(PyObject *self, PyObject *args)
{
// Solves equation Ax = B, where A is a general matrix
PyArrayObject* a; // Matrix
PyArrayObject* desca; // Matrix description vector
PyArrayObject* b; // Matrix
PyArrayObject* descb; // Matrix description vector
char uplo;
int info;
int one = 1;
if (!PyArg_ParseTuple(args, "OOOO", &a, &desca, &b, &descb))
return NULL;
int a_m = INTP(desca)[2];
int a_n = INTP(desca)[3];
// Only square matrices
assert (a_m == a_n);
int b_m = INTP(descb)[2];
int b_n = INTP(descb)[3];
// Equation valid
assert (a_n == b_m);
int n = a_n;
int nrhs = b_n;
int* pivot = GPAW_MALLOC(int, a_m+2000); // TODO: How long should this exaclty be?
if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
{
assert(1==-1); // No double version implemented
}
else
{
pzgesv_(&n, &nrhs,(void*)COMPLEXP(a), &one, &one, INTP(desca), pivot,
(void*)COMPLEXP(b), &one, &one, INTP(descb), &info);
}
free(pivot);
PyObject* returnvalue = Py_BuildValue("i", info);
return returnvalue;
}
*/
PyObject* scalapack_solve(PyObject *self, PyObject *args) {
// Solves equation Ax = B, where A is a general matrix
PyArrayObject* a; // Matrix
PyArrayObject* desca; // Matrix description vector
PyArrayObject* b; // Matrix
PyArrayObject* descb; // Matrix description vector
int info;
int one = 1;
if (!PyArg_ParseTuple(args, "OOOO", &a, &desca, &b, &descb))
return NULL;
int a_ConTxt = INTP(desca)[1];
int a_m = INTP(desca)[2];
int a_n = INTP(desca)[3];
int a_mb = INTP(desca)[4];
// Only square matrices
assert (a_m == a_n);
int b_m = INTP(descb)[2];
int b_n = INTP(descb)[3];
// Equation valid
assert (a_n == b_m);
int n = a_n;
int nrhs = b_n;
int nprow, npcol, myrow, mycol, locM;
Cblacs_gridinfo_(a_ConTxt, &nprow, &npcol, &myrow, &mycol);
// LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
locM = (((a_m/a_mb) + 1)/nprow + 1) * a_mb;
/*
* IPIV (local output) INTEGER array, dimension ( LOCr(M_A)+MB_A )
* This array contains the pivoting information.
* IPIV(i) -> The global row local row i was swapped with.
* This array is tied to the distributed matrix A.
* An upper bound for these quantities may be computed by:
* LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
* M_A (global) DESCA( M_ ) The number of rows in the global
* array A.
* MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
* the rows of the array.
* NPROW (global input) INTEGER
* NPROW specifies the number of process rows in the grid
* to be created.
*/
int* pivot = GPAW_MALLOC(int, locM + a_mb);
//if (a->descr->type_num == PyArray_DOUBLE)
if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
{
pdgesv_(&n, &nrhs,(double*)DOUBLEP(a), &one, &one, INTP(desca), pivot,
(double*)DOUBLEP(b), &one, &one, INTP(descb), &info);
}
else
{
pzgesv_(&n, &nrhs,(void*)COMPLEXP(a), &one, &one, INTP(desca), pivot,
(void*)COMPLEXP(b), &one, &one, INTP(descb), &info);
}
free(pivot);
PyObject* returnvalue = Py_BuildValue("i", info);
return returnvalue;
}
#endif
#endif // PARALLEL
gpaw-1.3.0-82cebebc037510d876f90d9f8d533fd021f751f5/c/blas.c 0000664 0000000 0000000 00000032216 13164413722 0021437 0 ustar 00root root 0000000 0000000 /* Copyright (C) 2003-2007 CAMP
* Copyright (C) 2007-2009 CAMd
* Copyright (C) 2007 CSC - IT Center for Science Ltd.
* Please see the accompanying LICENSE file for further information. */
#include
#define PY_ARRAY_UNIQUE_SYMBOL GPAW_ARRAY_API
#define NO_IMPORT_ARRAY
#include
#include "extensions.h"
#ifdef GPAW_NO_UNDERSCORE_BLAS
# define dscal_ dscal
# define zscal_ zscal
# define daxpy_ daxpy
# define zaxpy_ zaxpy
# define dsyrk_ dsyrk
# define zher_ zher
# define zherk_ zherk
# define dsyr2k_ dsyr2k
# define zher2k_ zher2k
# define dgemm_ dgemm
# define zgemm_ zgemm
# define dgemv_ dgemv
# define zgemv_ zgemv
# define ddot_ ddot
#endif
void dscal_(int*n, double* alpha, double* x, int* incx);
void zscal_(int*n, void* alpha, void* x, int* incx);
void daxpy_(int* n, double* alpha,
double* x, int *incx,
double* y, int *incy);
void zaxpy_(int* n, void* alpha,
void* x, int *incx,
void* y, int *incy);
void dsyrk_(char *uplo, char *trans, int *n, int *k,
double *alpha, double *a, int *lda, double *beta,
double *c, int *ldc);
void zher_(char *uplo, int *n,
double *alpha, void *x, int *incx,
void *a, int *lda);
void zherk_(char *uplo, char *trans, int *n, int *k,
double *alpha, void *a, int *lda,
double *beta,
void *c, int *ldc);
void dsyr2k_(char *uplo, char *trans, int *n, int *k,
double *alpha, double *a, int *lda,
double *b, int *ldb, double *beta,
double *c, int *ldc);
void zher2k_(char *uplo, char *trans, int *n, int *k,
void *alpha, void *a, int *lda,
void *b, int *ldb, double *beta,
void *c, int *ldc);
void dgemm_(char *transa, char *transb, int *m, int * n,
int *k, double *alpha, double *a, int *lda,
double *b, int *ldb, double *beta,
double *c, int *ldc);
void zgemm_(char *transa, char *transb, int *m, int * n,
int *k, void *alpha, void *a, int *lda,
void *b, int *ldb, void *beta,
void *c, int *ldc);
void dgemv_(char *trans, int *m, int * n,
double *alpha, double *a, int *lda,
double *x, int *incx, double *beta,
double *y, int *incy);
void zgemv_(char *trans, int *m, int * n,
void *alpha, void *a, int *lda,
void *x, int *incx, void *beta,
void *y, int *incy);
double ddot_(int *n, void *dx, int *incx, void *dy, int *incy);
PyObject* scal(PyObject *self, PyObject *args)
{
Py_complex alpha;
PyArrayObject* x;
if (!PyArg_ParseTuple(args, "DO", &alpha, &x))
return NULL;
int n = PyArray_DIMS(x)[0];
for (int d = 1; d < PyArray_NDIM(x); d++)
n *= PyArray_DIMS(x)[d];
int incx = 1;
if (PyArray_DESCR(x)->type_num == NPY_DOUBLE)
dscal_(&n, &(alpha.real), DOUBLEP(x), &incx);
else
zscal_(&n, &alpha, (void*)COMPLEXP(x), &incx);
Py_RETURN_NONE;
}
PyObject* gemm(PyObject *self, PyObject *args)
{
Py_complex alpha;
PyArrayObject* a;
PyArrayObject* b;
Py_complex beta;
PyArrayObject* c;
char t = 'n';
char* transa = &t;
if (!PyArg_ParseTuple(args, "DOODO|s", &alpha, &a, &b, &beta, &c, &transa))
return NULL;
int m, k, lda, ldb, ldc;
if (*transa == 'n')
{
m = PyArray_DIMS(a)[1];
for (int i = 2; i < PyArray_NDIM(a); i++)
m *= PyArray_DIMS(a)[i];
k = PyArray_DIMS(a)[0];
lda = MAX(1, PyArray_STRIDES(a)[0] / PyArray_STRIDES(a)[PyArray_NDIM(a) - 1]);
ldb = MAX(1, PyArray_STRIDES(b)[0] / PyArray_STRIDES(b)[1]);
ldc = MAX(1, PyArray_STRIDES(c)[0] / PyArray_STRIDES(c)[PyArray_NDIM(c) - 1]);
}
else
{
k = PyArray_DIMS(a)[1];
for (int i = 2; i < PyArray_NDIM(a); i++)
k *= PyArray_DIMS(a)[i];
m = PyArray_DIMS(a)[0];
lda = MAX(1, k);
ldb = MAX(1, PyArray_STRIDES(b)[0] / PyArray_STRIDES(b)[PyArray_NDIM(b) - 1]);
ldc = MAX(1, PyArray_STRIDES(c)[0] / PyArray_STRIDES(c)[1]);
}
int n = PyArray_DIMS(b)[0];
if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
dgemm_(transa, "n", &m, &n, &k,
&(alpha.real),
DOUBLEP(a), &lda,
DOUBLEP(b), &ldb,
&(beta.real),
DOUBLEP(c), &ldc);
else
zgemm_(transa, "n", &m, &n, &k,
&alpha,
(void*)COMPLEXP(a), &lda,
(void*)COMPLEXP(b), &ldb,
&beta,
(void*)COMPLEXP(c), &ldc);
Py_RETURN_NONE;
}
PyObject* mmm(PyObject *self, PyObject *args)
{
Py_complex alpha;
PyArrayObject* M1;
char* trans1;
PyArrayObject* M2;
char* trans2;
Py_complex beta;
PyArrayObject* M3;
if (!PyArg_ParseTuple(args, "DOsOsDO",
&alpha, &M1, &trans1, &M2, &trans2, &beta, &M3))
return NULL;
int m = PyArray_DIM(M3, 1);
int n = PyArray_DIM(M3, 0);
int k;
int bytes = PyArray_ITEMSIZE(M3);
int lda = MAX(1, PyArray_STRIDE(M2, 0) / bytes);
int ldb = MAX(1, PyArray_STRIDE(M1, 0) / bytes);
int ldc = MAX(1, PyArray_STRIDE(M3, 0) / bytes);
void* a = PyArray_DATA(M2);
void* b = PyArray_DATA(M1);
void* c = PyArray_DATA(M3);
if (*trans2 == 'n')
k = PyArray_DIM(M2, 0);
else
k = PyArray_DIM(M2, 1);
if (bytes == 8)
dgemm_(trans2, trans1, &m, &n, &k,
&(alpha.real), a, &lda, b, &ldb, &(beta.real), c, &ldc);
else
zgemm_(trans2, trans1, &m, &n, &k,
&alpha, a, &lda, b, &ldb, &beta, c, &ldc);
Py_RETURN_NONE;
}
PyObject* gemv(PyObject *self, PyObject *args)
{
Py_complex alpha;
PyArrayObject* a;
PyArrayObject* x;
Py_complex beta;
PyArrayObject* y;
char t = 't';
char* trans = &t;
if (!PyArg_ParseTuple(args, "DOODO|s", &alpha, &a, &x, &beta, &y, &trans))
return NULL;
int m, n, lda, itemsize, incx, incy;
if (*trans == 'n')
{
m = PyArray_DIMS(a)[1];
for (int i = 2; i < PyArray_NDIM(a); i++)
m *= PyArray_DIMS(a)[i];
n = PyArray_DIMS(a)[0];
lda = MAX(1, m);
}
else
{
n = PyArray_DIMS(a)[0];
for (int i = 1; i < PyArray_NDIM(a)-1; i++)
n *= PyArray_DIMS(a)[i];
m = PyArray_DIMS(a)[PyArray_NDIM(a)-1];
lda = MAX(1, m);
}
if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
itemsize = sizeof(double);
else
itemsize = sizeof(double_complex);
incx = PyArray_STRIDES(x)[0]/itemsize;
incy = 1;
if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
dgemv_(trans, &m, &n,
&(alpha.real),
DOUBLEP(a), &lda,
DOUBLEP(x), &incx,
&(beta.real),
DOUBLEP(y), &incy);
else
zgemv_(trans, &m, &n,
&alpha,
(void*)COMPLEXP(a), &lda,
(void*)COMPLEXP(x), &incx,
&beta,
(void*)COMPLEXP(y), &incy);
Py_RETURN_NONE;
}
PyObject* axpy(PyObject *self, PyObject *args)
{
Py_complex alpha;
PyArrayObject* x;
PyArrayObject* y;
if (!PyArg_ParseTuple(args, "DOO", &alpha, &x, &y))
return NULL;
int n = PyArray_DIMS(x)[0];
for (int d = 1; d < PyArray_NDIM(x); d++)
n *= PyArray_DIMS(x)[d];
int incx = 1;
int incy = 1;
if (PyArray_DESCR(x)->type_num == NPY_DOUBLE)
daxpy_(&n, &(alpha.real),
DOUBLEP(x), &incx,
DOUBLEP(y), &incy);
else
zaxpy_(&n, &alpha,
(void*)COMPLEXP(x), &incx,
(void*)COMPLEXP(y), &incy);
Py_RETURN_NONE;
}
PyObject* czher(PyObject *self, PyObject *args)
{
double alpha;
PyArrayObject* x;
PyArrayObject* a;
if (!PyArg_ParseTuple(args, "dOO", &alpha, &x, &a))
return NULL;
int n = PyArray_DIMS(x)[0];
for (int d = 1; d < PyArray_NDIM(x); d++)
n *= PyArray_DIMS(x)[d];
int incx = 1;
int lda = MAX(1, n);
zher_("l", &n, &(alpha),
(void*)COMPLEXP(x), &incx,
(void*)COMPLEXP(a), &lda);
Py_RETURN_NONE;
}
PyObject* rk(PyObject *self, PyObject *args)
{
double alpha;
PyArrayObject* a;
double beta;
PyArrayObject* c;
char t = 'c';
char* trans = &t;
if (!PyArg_ParseTuple(args, "dOdO|s", &alpha, &a, &beta, &c, &trans))
return NULL;
int n = PyArray_DIMS(c)[0];
int k, lda;
if (*trans == 'c') {
k = PyArray_DIMS(a)[1];
for (int d = 2; d < PyArray_NDIM(a); d++)
k *= PyArray_DIMS(a)[d];
lda = k;
}
else {
k = PyArray_DIMS(a)[0];
lda = n;
}
int ldc = PyArray_STRIDES(c)[0] / PyArray_STRIDES(c)[1];
if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
dsyrk_("u", trans, &n, &k,
&alpha, DOUBLEP(a), &lda, &beta,
DOUBLEP(c), &ldc);
else
zherk_("u", trans, &n, &k,
&alpha, (void*)COMPLEXP(a), &lda, &beta,
(void*)COMPLEXP(c), &ldc);
Py_RETURN_NONE;
}
PyObject* r2k(PyObject *self, PyObject *args)
{
Py_complex alpha;
PyArrayObject* a;
PyArrayObject* b;
double beta;
PyArrayObject* c;
if (!PyArg_ParseTuple(args, "DOOdO", &alpha, &a, &b, &beta, &c))
return NULL;
int n = PyArray_DIMS(a)[0];
int k = PyArray_DIMS(a)[1];
for (int d = 2; d < PyArray_NDIM(a); d++)
k *= PyArray_DIMS(a)[d];
int ldc = PyArray_STRIDES(c)[0] / PyArray_STRIDES(c)[1];
if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
dsyr2k_("u", "t", &n, &k,
(double*)(&alpha), DOUBLEP(a), &k,
DOUBLEP(b), &k, &beta,
DOUBLEP(c), &ldc);
else
zher2k_("u", "c", &n, &k,
(void*)(&alpha), (void*)COMPLEXP(a), &k,
(void*)COMPLEXP(b), &k, &beta,
(void*)COMPLEXP(c), &ldc);
Py_RETURN_NONE;
}
PyObject* dotc(PyObject *self, PyObject *args)
{
PyArrayObject* a;
PyArrayObject* b;
if (!PyArg_ParseTuple(args, "OO", &a, &b))
return NULL;
int n = PyArray_DIMS(a)[0];
for (int i = 1; i < PyArray_NDIM(a); i++)
n *= PyArray_DIMS(a)[i];
int incx = 1;
int incy = 1;
if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
{
double result;
result = ddot_(&n, (void*)DOUBLEP(a),
&incx, (void*)DOUBLEP(b), &incy);
return PyFloat_FromDouble(result);
}
else
{
double_complex* ap = COMPLEXP(a);
double_complex* bp = COMPLEXP(b);
double_complex z = 0.0;
for (int i = 0; i < n; i++)
z += conj(ap[i]) * bp[i];
return PyComplex_FromDoubles(creal(z), cimag(z));
}
}
PyObject* dotu(PyObject *self, PyObject *args)
{
PyArrayObject* a;
PyArrayObject* b;
if (!PyArg_ParseTuple(args, "OO", &a, &b))
return NULL;
int n = PyArray_DIMS(a)[0];
for (int i = 1; i < PyArray_NDIM(a); i++)
n *= PyArray_DIMS(a)[i];
int incx = 1;
int incy = 1;
if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
{
double result;
result = ddot_(&n, (void*)DOUBLEP(a),
&incx, (void*)DOUBLEP(b), &incy);
return PyFloat_FromDouble(result);
}
else
{
double_complex* ap = COMPLEXP(a);
double_complex* bp = COMPLEXP(b);
double_complex z = 0.0;
for (int i = 0; i < n; i++)
z += ap[i] * bp[i];
return PyComplex_FromDoubles(creal(z), cimag(z));
}
}
PyObject* multi_dotu(PyObject *self, PyObject *args)
{
PyArrayObject* a;
PyArrayObject* b;
PyArrayObject* c;
if (!PyArg_ParseTuple(args, "OOO", &a, &b, &c))
return NULL;
int n0 = PyArray_DIMS(a)[0];
int n = PyArray_DIMS(a)[1];
for (int i = 2; i < PyArray_NDIM(a); i++)
n *= PyArray_DIMS(a)[i];
int incx = 1;
int incy = 1;
if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
{
double *ap = DOUBLEP(a);
double *bp = DOUBLEP(b);
double *cp = DOUBLEP(c);
for (int i = 0; i < n0; i++)
{
cp[i] = ddot_(&n, (void*)ap,
&incx, (void*)bp, &incy);
ap += n;
bp += n;
}
}
else
{
double_complex* ap = COMPLEXP(a);
double_complex* bp = COMPLEXP(b);
double_complex* cp = COMPLEXP(c);
for (int i = 0; i < n0; i++)
{
cp[i] = 0.0;
for (int j = 0; j < n; j++)
cp[i] += ap[j] * bp[j];
ap += n;
bp += n;
}
}
Py_RETURN_NONE;
}
PyObject* multi_axpy(PyObject *self, PyObject *args)
{
PyArrayObject* alpha;
PyArrayObject* x;
PyArrayObject* y;
if (!PyArg_ParseTuple(args, "OOO", &alpha, &x, &y))
return NULL;
int n0 = PyArray_DIMS(x)[0];
int n = PyArray_DIMS(x)[1];
for (int d = 2; d < PyArray_NDIM(x); d++)
n *= PyArray_DIMS(x)[d];
int incx = 1;
int incy = 1;
if (PyArray_DESCR(alpha)->type_num == NPY_DOUBLE)
{
if (PyArray_DESCR(x)->type_num == NPY_CDOUBLE)
n *= 2;
double *ap = DOUBLEP(alpha);
double *xp = DOUBLEP(x);
double *yp = DOUBLEP(y);
for (int i = 0; i < n0; i++)
{
daxpy_(&n, &ap[i],
(void*)xp, &incx,
(void*)yp, &incy);
xp += n;
yp += n;
}
}
else
{
double_complex *ap = COMPLEXP(alpha);
double_complex *xp = COMPLEXP(x);
double_complex *yp = COMPLEXP(y);
for (int i = 0; i < n0; i++)
{
zaxpy_(&n, (void*)(&ap[i]),
(void*)xp, &incx,
(void*)yp, &incy);
xp += n;
yp += n;
}
}
Py_RETURN_NONE;
}
gpaw-1.3.0-82cebebc037510d876f90d9f8d533fd021f751f5/c/bmgs/ 0000775 0000000 0000000 00000000000 13164413722 0021276 5 ustar 00root root 0000000 0000000 gpaw-1.3.0-82cebebc037510d876f90d9f8d533fd021f751f5/c/bmgs/bmgs.c 0000664 0000000 0000000 00000001063 13164413722 0022372 0 ustar 00root root 0000000 0000000 /* Copyright (C) 2003-2007 CAMP
* Copyright (C) 2005 CSC - IT Center for Science Ltd.
* Please see the accompanying LICENSE file for further information. */
#include "fd.c"
#include "wfd.c"
#include "relax.c"
#include "wrelax.c"
#include "cut.c"
#include "zero.c"
#include "paste.c"
#include "spline.c"
#include "stencils.c"
#include "restrict.c"
#include "translate.c"
#include "interpolate.c"
#define BMGSCOMPLEX
#include "fd.c"
#include "wfd.c"
#include "cut.c"
#include "zero.c"
#include "paste.c"
#include "restrict.c"
#include "interpolate.c"
gpaw-1.3.0-82cebebc037510d876f90d9f8d533fd021f751f5/c/bmgs/bmgs.h 0000664 0000000 0000000 00000011533 13164413722 0022402 0 ustar 00root root 0000000 0000000 /* Copyright (C) 2003-2007 CAMP
* Copyright (C) 2007-2008 CAMd
* Copyright (C) 2005 CSC - IT Center for Science Ltd.
* Please see the accompanying LICENSE file for further information. */
#ifndef DOUBLECOMPLEXDEFINED
# define DOUBLECOMPLEXDEFINED 1
# include
typedef double complex double_complex;
#endif
#undef T
#undef Z
#ifndef BMGSCOMPLEX
# define T double
# define Z(f) f
#else
# define T double_complex
# define Z(f) f ## z
#endif
#ifndef BMGS_H
#define BMGS_H
//#ifdef NO_C99_COMPLEX
typedef int bool;
#define true 1
#define false 0
//#else
//#include
//#endif
typedef struct
{
int ncoefs;
double* coefs;
long* offsets;
long n[3];
long j[3];
} bmgsstencil;
typedef struct
{
int l;
double dr;
int nbins;
double* data;
} bmgsspline;
bmgsstencil bmgs_stencil(int ncoefs, const double* coefs, const long* offsets,
int range, const long size[3]);
bmgsstencil bmgs_laplace(int k, double scale, const double h[3], const long n[3]);
bmgsstencil bmgs_mslaplaceA(double scale,
const double h[3],
const long n[3]);
bmgsstencil bmgs_mslaplaceB(const long n[3]);
bmgsstencil bmgs_gradient(int k, int i, double h,
const long n[3]);
void bmgs_deletestencil(bmgsstencil* spline);
bmgsspline bmgs_spline(int l, double dr, int nbins, double* f);
double bmgs_splinevalue(const bmgsspline* spline, double r);
void bmgs_get_value_and_derivative(const bmgsspline* spline, double r,
double *f, double *dfdr);
void bmgs_deletespline(bmgsspline* spline);
void bmgs_radial1(const bmgsspline* spline,
const int n[3], const double C[3],
const double h[3],
int* b, double* d);
void bmgs_radial2(const bmgsspline* spline, const int n[3],
const int* b, const double* d,
double* f, double* g);
void bmgs_radial3(const bmgsspline* spline, int m,
const int n[3],
const double C[3],
const double h[3],
const double* f, double* a);
void bmgs_radiald3(const bmgsspline* spline, int m, int c,
const int n[3],
const double C[3],
const double h[3],
const double* f, const double* g, double* a);
void bmgs_fd(const bmgsstencil* s, const double* a, double* b);
void bmgs_wfd(int nweights, const bmgsstencil* stencils, const double** weights, const double* a, double* b);
void bmgs_relax(const int relax_method, const bmgsstencil* s, double* a, double* b,
const double* src, const double w);
void bmgs_wrelax(const int relax_method, const int nweights, const bmgsstencil* stencils, const double** weights, double* a, double* b,
const double* src, const double w);
void bmgs_cut(const double* a, const int n[3], const int c[3],
double* b, const int m[3]);
void bmgs_zero(double* a, const int n[3], const int c[3],
const int s[3]);
void bmgs_paste(const double* a, const int n[3],
double* b, const int m[3], const int c[3]);
void bmgs_pastep(const double* a, const int n[3],
double* b, const int m[3], const int c[3]);
void bmgs_rotate(const double* a, const int size[3], double* b, double angle,
int d, long c, double*, long*, long*, double*, long*, long*,
int exact);
void bmgs_translate(double* a, const int sizea[3], const int size[3],
const int start1[3], const int start2[3]);
void bmgs_restrict(int k, double* a, const int n[3], double* b, double* w);
void bmgs_interpolate(int k, int skip[3][2],
const double* a, const int n[3],
double* b, double* w);
// complex routines:
void bmgs_fdz(const bmgsstencil* s, const double_complex* a,
double_complex* b);
void bmgs_wfdz(int nweights, const bmgsstencil* stencils, const double** weights, const double_complex* a, double_complex* b);
void bmgs_cutz(const double_complex* a, const int n[3],
const int c[3],
double_complex* b, const int m[3]);
void bmgs_cutmz(const double_complex* a, const int n[3],
const int c[3],
double_complex* b, const int m[3], double_complex phase);
void bmgs_zeroz(double_complex* a, const int n[3],
const int c[3],
const int s[3]);
void bmgs_pastez(const double_complex* a, const int n[3],
double_complex* b, const int m[3],
const int c[3]);
void bmgs_pastepz(const double_complex* a, const int n[3],
double_complex* b, const int m[3],
const int c[3]);
void bmgs_rotatez(const double_complex* a, const int size[3],
double_complex* b, double angle, int d,
long c, double*, long*, long*, double*, long*, long*,
int exact);
void bmgs_translatemz(double_complex* a, const int sizea[3], const int size[3],
const int start1[3], const int start2[3],
double_complex phase);
void bmgs_restrictz(int k, double_complex* a,
const int n[3], double_complex* b, double_complex* w);
void bmgs_interpolatez(int k, int skip[3][2],
const double_complex* a, const int n[3],
double_complex* b, double_complex* w);
#endif
gpaw-1.3.0-82cebebc037510d876f90d9f8d533fd021f751f5/c/bmgs/cut.c 0000664 0000000 0000000 00000001753 13164413722 0022243 0 ustar 00root root 0000000 0000000 /* Copyright (C) 2003-2007 CAMP
* Please see the accompanying LICENSE file for further information. */
#include
#include "bmgs.h"
void Z(bmgs_cut)(const T* a, const int n[3], const int c[3],
T* b, const int m[3])
{
a += c[2] + (c[1] + c[0] * n[1]) * n[2];
for (int i0 = 0; i0 < m[0]; i0++)
{
for (int i1 = 0; i1 < m[1]; i1++)
{
memcpy(b, a, m[2] * sizeof(T));
a += n[2];
b += m[2];
}
a += n[2] * (n[1] - m[1]);
}
}
#ifdef BMGSCOMPLEX
void bmgs_cutmz(const double_complex* a, const int sizea[3],
const int start[3],
double_complex* b, const int sizeb[3], double_complex p)
{
a += start[2] + (start[1] + start[0] * sizea[1]) * sizea[2];
for (int i0 = 0; i0 < sizeb[0]; i0++)
{
for (int i1 = 0; i1 < sizeb[1]; i1++)
{
for (int i2 = 0; i2 < sizeb[2]; i2++)
b[i2] = p * a[i2];
a += sizea[2];
b += sizeb[2];
}
a += sizea[2] * (sizea[1] - sizeb[1]);
}
}
#endif
gpaw-1.3.0-82cebebc037510d876f90d9f8d533fd021f751f5/c/bmgs/fd.c 0000664 0000000 0000000 00000003711 13164413722 0022035 0 ustar 00root root 0000000 0000000 /* Copyright (C) 2003-2007 CAMP
* Please see the accompanying LICENSE file for further information. */
#include "../extensions.h"
#include "bmgs.h"
#include
struct Z(fds){
int thread_id;
int nthds;
const bmgsstencil* s;
const T* a;
T* b;
};
void *Z(bmgs_fd_worker)(void *threadarg)
{
struct Z(fds) *args = (struct Z(fds) *) threadarg;
const T* a = args->a;
T* b = args->b;
const bmgsstencil* s = args->s;
int chunksize = s->n[0] / args->nthds + 1;
int nstart = args->thread_id * chunksize;
if (nstart >= s->n[0])
return NULL;
int nend = nstart + chunksize;
if (nend > s->n[0])
nend = s->n[0];
for (int i0 = nstart; i0 < nend; i0++)
{
const T* aa = a + i0 * (s->j[1] + s->n[1] * (s->j[2] + s->n[2]));
T* bb = b + i0 * s->n[1] * s->n[2];
for (int i1 = 0; i1 < s->n[1]; i1++)
{
#pragma omp simd
for (int i2 = 0; i2 < s->n[2]; i2++)
{
T x = 0.0;
for (int c = 0; c < s->ncoefs; c++)
x += aa[s->offsets[c]+i2] * s->coefs[c];
bb[i2] = x;
}
bb += s->n[2];
aa += s->j[2] + s->n[2];
}
}
return NULL;
}
void Z(bmgs_fd)(const bmgsstencil* s, const T* a, T* b)
{
a += (s->j[0] + s->j[1] + s->j[2]) / 2;
int nthds = 1;
#ifdef GPAW_OMP_MONLY
if (getenv("OMP_NUM_THREADS") != NULL)
nthds = atoi(getenv("OMP_NUM_THREADS"));
#endif
struct Z(fds) *wargs = GPAW_MALLOC(struct Z(fds), nthds);
pthread_t *thds = GPAW_MALLOC(pthread_t, nthds);
for(int i=0; i < nthds; i++)
{
(wargs+i)->thread_id = i;
(wargs+i)->nthds = nthds;
(wargs+i)->s = s;
(wargs+i)->a = a;
(wargs+i)->b = b;
}
#ifdef GPAW_OMP_MONLY
for(int i=1; i < nthds; i++)
pthread_create(thds + i, NULL, Z(bmgs_fd_worker), (void*) (wargs+i));
#endif
Z(bmgs_fd_worker)(wargs);
#ifdef GPAW_OMP_MONLY
for(int i=1; i < nthds; i++)
pthread_join(*(thds+i), NULL);
#endif
free(wargs);
free(thds);
}
gpaw-1.3.0-82cebebc037510d876f90d9f8d533fd021f751f5/c/bmgs/interpolate.c 0000664 0000000 0000000 00000010111 13164413722 0023762 0 ustar 00root root 0000000 0000000 /* Copyright (C) 2003-2007 CAMP
* Please see the accompanying LICENSE file for further information. */
#include "bmgs.h"
#include
#include "../extensions.h"
#ifdef K
struct IP1DA{
int thread_id;
int nthds;
const T* a;
int n;
int m;
T* b;
int *skip;
};
void *IP1DW(void *threadarg)
{
struct IP1DA *args = (struct IP1DA *) threadarg;
int m = args->m;
int chunksize = m / args->nthds + 1;
int nstart = args->thread_id * chunksize;
if (nstart >= m)
return NULL;
int nend = nstart + chunksize;
if (nend > m)
nend = m;
for (int j = nstart; j < nend; j++)
{
const T* aa = args->a + j * (K - 1 - args->skip[1] + args->n);
T* bb = args->b + j;
for (int i = 0; i < args->n; i++)
{
if (i == 0 && args->skip[0])
bb -= m;
else
bb[0] = aa[0];
if (i == args->n - 1 && args->skip[1])
bb -= m;
else
{
if (K == 2)
bb[m] = 0.5 * (aa[0] + aa[1]);
else if (K == 4)
bb[m] = ( 0.5625 * (aa[ 0] + aa[1]) +
-0.0625 * (aa[-1] + aa[2]));
else if (K == 6)
bb[m] = ( 0.58593750 * (aa[ 0] + aa[1]) +
-0.09765625 * (aa[-1] + aa[2]) +
0.01171875 * (aa[-2] + aa[3]));
else
bb[m] = ( 0.59814453125 * (aa[ 0] + aa[1]) +
-0.11962890625 * (aa[-1] + aa[2]) +
0.02392578125 * (aa[-2] + aa[3]) +
-0.00244140625 * (aa[-3] + aa[4]));
}
aa++;
bb += 2 * m;
}
}
return NULL;
}
void IP1D(const T* a, int n, int m, T* b, int skip[2])
{
a += K / 2 - 1;
int nthds = 1;
#ifdef GPAW_OMP_MONLY
if (getenv("OMP_NUM_THREADS") != NULL)
nthds = atoi(getenv("OMP_NUM_THREADS"));
#endif
struct IP1DA *wargs = GPAW_MALLOC(struct IP1DA, nthds);
pthread_t *thds = GPAW_MALLOC(pthread_t, nthds);
for(int i=0; i < nthds; i++)
{
(wargs+i)->thread_id = i;
(wargs+i)->nthds = nthds;
(wargs+i)->a = a;
(wargs+i)->n = n;
(wargs+i)->m = m;
(wargs+i)->b = b;
(wargs+i)->skip = skip;
}
#ifdef GPAW_OMP_MONLY
for(int i=1; i < nthds; i++)
pthread_create(thds + i, NULL, IP1DW, (void*) (wargs+i));
#endif
IP1DW(wargs);
#ifdef GPAW_OMP_MONLY
for(int i=1; i < nthds; i++)
pthread_join(*(thds+i), NULL);
#endif
free(wargs);
free(thds);
}
#else
# define K 2
# define IP1D Z(bmgs_interpolate1D2)
# define IP1DA Z(bmgs_interpolate1D2_args)
# define IP1DW Z(bmgs_interpolate1D2_worker)
# include "interpolate.c"
# undef IP1D
# undef IP1DA
# undef IP1DW
# undef K
# define K 4
# define IP1D Z(bmgs_interpolate1D4)
# define IP1DA Z(bmgs_interpolate1D4_args)
# define IP1DW Z(bmgs_interpolate1D4_worker)
# include "interpolate.c"
# undef IP1D
# undef IP1DA
# undef IP1DW
# undef K
# define K 6
# define IP1D Z(bmgs_interpolate1D6)
# define IP1DA Z(bmgs_interpolate1D6_args)
# define IP1DW Z(bmgs_interpolate1D6_worker)
# include "interpolate.c"
# undef IP1D
# undef IP1DA
# undef IP1DW
# undef K
# define K 8
# define IP1D Z(bmgs_interpolate1D8)
# define IP1DA Z(bmgs_interpolate1D8_args)
# define IP1DW Z(bmgs_interpolate1D8_worker)
# include "interpolate.c"
# undef IP1D
# undef IP1DA
# undef IP1DW
# undef K
void Z(bmgs_interpolate)(int k, int skip[3][2],
const T* a, const int size[3], T* b, T* w)
{
void (*ip)(const T*, int, int, T*, int[2]);
if (k == 2)
ip = Z(bmgs_interpolate1D2);
else if (k == 4)
ip = Z(bmgs_interpolate1D4);
else if (k == 6)
ip = Z(bmgs_interpolate1D6);
else
ip = Z(bmgs_interpolate1D8);
int e = k - 1;
ip(a, size[2] - e + skip[2][1],
size[0] *
size[1],
b, skip[2]);
ip(b, size[1] - e + skip[1][1],
size[0] *
((size[2] - e) * 2 - skip[2][0] + skip[2][1]),
w, skip[1]);
ip(w, size[0] - e + skip[0][1],
((size[1] - e) * 2 - skip[1][0] + skip[1][1]) *
((size[2] - e) * 2 - skip[2][0] + skip[2][1]),
b, skip[0]);
}
#endif
gpaw-1.3.0-82cebebc037510d876f90d9f8d533fd021f751f5/c/bmgs/paste.c 0000664 0000000 0000000 00000001634 13164413722 0022562 0 ustar 00root root 0000000 0000000 /* Copyright (C) 2003-2007 CAMP
* Please see the accompanying LICENSE file for further information. */
#include "bmgs.h"
void Z(bmgs_paste)(const T* a, const int sizea[3],
T* b, const int sizeb[3], const int startb[3])
{
b += startb[2] + (startb[1] + startb[0] * sizeb[1]) * sizeb[2];
for (int i0 = 0; i0 < sizea[0]; i0++)
{
for (int i1 = 0; i1 < sizea[1]; i1++)
{
memcpy(b, a, sizea[2] * sizeof(T));
a += sizea[2];
b += sizeb[2];
}
b += sizeb[2] * (sizeb[1] - sizea[1]);
}
}
void Z(bmgs_pastep)(const T* a, const int sizea[3],
T* b, const int sizeb[3], const int startb[3])
{
b += startb[2] + (startb[1] + startb[0] * sizeb[1]) * sizeb[2];
for (int i0 = 0; i0 < sizea[0]; i0++)
{
for (int i1 = 0; i1 < sizea[1]; i1++)
{
for (int i2 = 0; i2 < sizea[2]; i2++)
b[i2] += *a++;
b += sizeb[2];
}
b += sizeb[2] * (sizeb[1] - sizea[1]);
}
}
gpaw-1.3.0-82cebebc037510d876f90d9f8d533fd021f751f5/c/bmgs/relax.c 0000664 0000000 0000000 00000003701 13164413722 0022556 0 ustar 00root root 0000000 0000000 /* Copyright (C) 2003-2007 CAMP
* Copyright (C) 2005 CSC - IT Center for Science Ltd.
* Please see the accompanying LICENSE file for further information. */
#include "bmgs.h"
void bmgs_relax(const int relax_method, const bmgsstencil* s, double* a, double* b,
const double* src, const double w)
{
if (relax_method == 1)
{
/* Weighted Gauss-Seidel relaxation for the equation "operator" b = src
a contains the temporary array holding also the boundary values. */
// Coefficient needed multiple times later
const double coef = 1.0/s->coefs[0];
// The number of steps in each direction
long nstep[3] = {s->n[0], s->n[1], s->n[2]};
a += (s->j[0] + s->j[1] + s->j[2]) / 2;
for (int i0 = 0; i0 < nstep[0]; i0++)
{
for (int i1 = 0; i1 < nstep[1]; i1++)
{
#pragma omp simd
for (int i2 = 0; i2 < nstep[2]; i2++)
{
double x = 0.0;
for (int c = 1; c < s->ncoefs; c++)
x += a[s->offsets[c] + i2] * s->coefs[c];
x = (src[i2] - x) * coef;
b[i2] = x;
a[i2] = x;
}
src += nstep[2];
b += nstep[2];
a += s->j[2] + nstep[2];
}
a += s->j[1];
}
}
else
{
/* Weighted Jacobi relaxation for the equation "operator" b = src
a contains the temporariry array holding also the boundary values. */
a += (s->j[0] + s->j[1] + s->j[2]) / 2;
for (int i0 = 0; i0 < s->n[0]; i0++)
{
for (int i1 = 0; i1 < s->n[1]; i1++)
{
#pragma omp simd
for (int i2 = 0; i2 < s->n[2]; i2++)
{
double x = 0.0;
for (int c = 1; c < s->ncoefs; c++)
x += a[s->offsets[c] + i2] * s->coefs[c];
b[i2] = (1.0 - w) * b[i2] + w * (src[i2] - x)/s->coefs[0];
}
src += s->n[2];
b += s->n[2];
a += s->j[2] + s->n[2];
}
a += s->j[1];
}
}
}
gpaw-1.3.0-82cebebc037510d876f90d9f8d533fd021f751f5/c/bmgs/restrict.c 0000664 0000000 0000000 00000007065 13164413722 0023311 0 ustar 00root root 0000000 0000000 /* Copyright (C) 2003-2007 CAMP
* Please see the accompanying LICENSE file for further information. */
#include "bmgs.h"
#include
#include "../extensions.h"
#ifdef K
struct RST1DA{
int thread_id;
int nthds;
const T* a;
int n;
int m;
T* b;
};
void *RST1DW(void *threadarg)
{
struct RST1DA *args = (struct RST1DA *) threadarg;
int m = args->m;
int chunksize = m / args->nthds + 1;
int nstart = args->thread_id * chunksize;
if (nstart >= m)
return NULL;
int nend = nstart + chunksize;
if (nend > m)
nend = m;
for (int j = 0; j < m; j++)
{
const T* aa = args->a + j * (args->n * 2 + K * 2 - 3);
T* bb = args->b + j;
for (int i = 0; i < args->n; i++)
{
if (K == 2)
bb[0] = 0.5 * (aa[0] +
0.5 * (aa[1] + aa[-1]));
else if (K == 4)
bb[0] = 0.5 * (aa[0] +
0.5625 * (aa[1] + aa[-1]) +
-0.0625 * (aa[3] + aa[-3]));
else if (K == 6)
bb[0] = 0.5 * (aa[0] +
0.58593750 * (aa[1] + aa[-1]) +
-0.09765625 * (aa[3] + aa[-3]) +
0.01171875 * (aa[5] + aa[-5]));
else
bb[0] = 0.5 * (aa[0] +
0.59814453125 * (aa[1] + aa[-1]) +
-0.11962890625 * (aa[3] + aa[-3]) +
0.02392578125 * (aa[5] + aa[-5]) +
-0.00244140625 * (aa[7] + aa[-7]));
aa += 2;
bb += m;
}
}
return NULL;
}
void RST1D(const T* a, int n, int m, T* b)
{
a += K - 1;
int nthds = 1;
#ifdef GPAW_OMP_MONLY
if (getenv("OMP_NUM_THREADS") != NULL)
nthds = atoi(getenv("OMP_NUM_THREADS"));
#endif
struct RST1DA *wargs = GPAW_MALLOC(struct RST1DA, nthds);
pthread_t *thds = GPAW_MALLOC(pthread_t, nthds);
for(int i=0; i < nthds; i++)
{
(wargs+i)->thread_id = i;
(wargs+i)->nthds = nthds;
(wargs+i)->a = a;
(wargs+i)->n = n;
(wargs+i)->m = m;
(wargs+i)->b = b;
}
#ifdef GPAW_OMP_MONLY
for(int i=1; i < nthds; i++)
pthread_create(thds + i, NULL, RST1DW, (void*) (wargs+i));
#endif
RST1DW(wargs);
#ifdef GPAW_OMP_MONLY
for(int i=1; i < nthds; i++)
pthread_join(*(thds+i), NULL);
#endif
free(wargs);
free(thds);
}
#else
# define K 2
# define RST1D Z(bmgs_restrict1D2)
# define RST1DA Z(bmgs_restrict1D2_args)
# define RST1DW Z(bmgs_restrict1D2_worker)
# include "restrict.c"
# undef RST1D
# undef RST1DA
# undef RST1DW
# undef K
# define K 4
# define RST1D Z(bmgs_restrict1D4)
# define RST1DA Z(bmgs_restrict1D4_args)
# define RST1DW Z(bmgs_restrict1D4_worker)
# include "restrict.c"
# undef RST1D
# undef RST1DA
# undef RST1DW
# undef K
# define K 6
# define RST1D Z(bmgs_restrict1D6)
# define RST1DA Z(bmgs_restrict1D6_args)
# define RST1DW Z(bmgs_restrict1D6_worker)
# include "restrict.c"
# undef RST1D
# undef RST1DA
# undef RST1DW
# undef K
# define K 8
# define RST1D Z(bmgs_restrict1D8)
# define RST1DA Z(bmgs_restrict1D8_args)
# define RST1DW Z(bmgs_restrict1D8_worker)
# include "restrict.c"
# undef RST1D
# undef RST1DA
# undef RST1DW
# undef K
void Z(bmgs_restrict)(int k, T* a, const int n[3], T* b, T* w)
{
void (*plg)(const T*, int, int, T*);
if (k == 2)
plg = Z(bmgs_restrict1D2);
else if (k == 4)
plg = Z(bmgs_restrict1D4);
else if (k == 6)
plg = Z(bmgs_restrict1D6);
else
plg = Z(bmgs_restrict1D8);
int e = k * 2 - 3;
plg(a, (n[2] - e) / 2, n[0] * n[1], w);
plg(w, (n[1] - e) / 2, n[0] * (n[2] - e) / 2, a);
plg(a, (n[0] - e) / 2, (n[1] - e) * (n[2] - e) / 4, b);
}
#endif
gpaw-1.3.0-82cebebc037510d876f90d9f8d533fd021f751f5/c/bmgs/sharmonic.py 0000664 0000000 0000000 00000062536 13164413722 0023647 0 ustar 00root root 0000000 0000000 import numpy as np
from Numeric import pi, sqrt
from tools import factorial
from tools import Rational as Q
"""
This is a script designed for construction of the real solid spherical
harmonics (RSSH) in cartesian form. These can be written as::
m |m| l |m|
Y = Y = C r P (cos theta) Phi (phi)
L l l l m
where C_l^|m| is a normalization constant
P_l^|m| is the associatied legendre polynomial
and:
/ cos(m phi) , m > 0
Phi (phi) = | 1 , m = 0
m \ sin(-m phi), m < 0
The first few harmonics are listed below::
+----+---------------------+-__---------------------------------------+
| L | l | m | r^l * Y | \/ (r^l * Y) |
+----+---s----+------------+------------------------------------------+
| 0 | 0 | 0 | 1 | (0, 0, 0) |
+----+---p----+------------+------------------------------------------+
| 1 | 1 | -1 | y | (0, 1, 0) |
| 2 | 1 | 0 | z | (0, 0, 1) |
| 3 | 1 | 1 | x | (1, 0, 0) |
+----+---d----+------------+------------------------------------------+
| 4 | 2 | -2 | xy | ( y, x, 0) |
| 5 | 2 | -1 | yz | ( 0, z, y) |
| 6 | 2 | 0 | 3z^2-r^2 | (-x, -y, 2z) |
| 7 | 2 | 1 | xz | ( z, 0, x) |
| 8 | 2 | 2 | x^2-y^2 | ( x, -y, 0) |
+----+---f----+------------+------------------------------------------+
| 9 | 3 | -3 | 3x^2y-y^3 | ( 2xy, x^2-y^2, 0) |
| 10 | 3 | -2 | xyz | ( yz, xz, xy) |
| 11 | 3 | -1 | 5yz^2-yr^2 | ( -2xy, 4z^2-x^2-3y^2, 8yz) |
| 12 | 3 | 0 | 5z^3-3zr^2 | ( -2xz, -2yz, 3z^2-r^2) |
| 13 | 3 | 1 | 5xz^2-xr^2 | (4z^2-3x^2-y^2, -2xy, 8xz) |
| 14 | 3 | 2 | x^2z-y^2z | ( 2xz, -2yz, x^2-y^2) |
| 15 | 3 | 3 | x^3-3xy^2 | ( x^2-y^2, -2xy, 0) |
+----+--------+----------+--------------------------------------------+
Y_lm is represented as a polynomial in x, y, and z
The function consists of three parts: a normalization constant accessed by
class 'Normalization(l, m)', a polynomial in z accessed with method
'legendre(l, m)', and a polynomial in x and y accessed with method 'Phi(l, m)'
The normalization and the z-polynomial are both invariant of the sign of m
The z-polynomial has powers l-|m|, l-|m|-2, l-|m|-4, l-..., i.e. it is strictly odd (even) if l-|m| is odd (even)
The combined power of x and y is |m| in all terms of Phi
"""
Y_lp = [{}, {}] # Global list of dictionaries for storing calculated
# Legendre polynomials, and Phi functions
#--------------------------- RELEVANT USER METHODS ---------------------------
def L_to_lm(L):
"""convert L index to (l, m) index"""
l = int(sqrt(L))
m = L - l**2 - l
return l, m
def lm_to_L(l,m):
"""convert (l, m) index to L index"""
return l**2 + l + m
def Y_to_string(l, m, deriv=None, multiply=None, numeric=False):
# for L in range(40): print L, Y_to_string(*L_to_lm(L))
""" l m
If deriv is None, return string representation of r * Y (x, y, z)
l
If deriv == q, return string is the derivative of above with respect
to x, y or z if q is 0, 1 or 2 respectively.
multiply=q indicates that the entire expression should be multiplied by
x, y or z if q is 0, 1 or 2 respectively.
numeric=True/False indicates whether the normalization constant should
be written as a numeric or an algebraic expression.
"""
assert deriv is None or deriv in range(3)
assert multiply is None or multiply in range(3)
if deriv is None:
norm, xyzs = Y_collect(l, m)
else:
norm, xyzs = dYdq(l, m, deriv)
if multiply is not None:
xyzs = q_times_xyzs(xyzs, multiply)
string = to_string(l, xyzs, deriv is not None, multiply is not None)
if string == '0': return '0'
else: return norm.tostring(numeric) + (' * ' + string) * (string != '1')
def gauss_to_string(l, m, numeric=False):
"""Return string representation of the generalized gaussian::
_____ 2
m / 1 l! l+3/2 -a r l m
g (x,y,z) = / ----- --------- (4 a) e r Y (x,y,z)
l \/ 4 pi (2l + 1)! l
numeric=True/False indicates whether the normalization constant should
be written as a number or an algebraic expression.
"""
norm, xyzs = Y_collect(l, m)
ng = Q(2**(2*l+3) * factorial(l), 2 * factorial(2 * l + 1))
norm.multiply(ng)
string = to_string(l, xyzs)
string = (' * ' + string) * (string != '1')
if numeric:
snorm = repr(eval(repr(norm.norm)))
else:
snorm = repr(norm.norm)
string = 'sqrt(a**%s*%s)/pi'%(2*l+3, snorm) + string
string += ' * exp(-a*r2)'
return string
def gauss_potential_to_string(l, m, numeric=False):
"""Return string representation of the potential of a generalized
gaussian.
The potential is determined by::
m m ^ _ m ^
v [g (r) Y (r) ](r) = v (r) Y (r)
l l l l l l
where::
4 pi / -l-1 /r l+2 l /oo 1-l \
v (r) = ---- | r | dx x g (r) + r | dx x g (r) |
l 2l+1 \ /0 l /r l /
"""
v_l = [[Q(4,1), 1],
[Q(4,3), 1, 2],
[Q(4,15), 3, 6, 4],
[Q(4,105), 15, 30, 20, 8],
[Q(4,945), 105, 210, 140, 56, 16],
[Q(4,10395), 945, 1890, 1260, 504, 144, 32],
]
norm, xyzs = Y_collect(l, m)
norm.multiply(v_l[l][0])
string = txt_sqrt(norm.norm, numeric) + '*' + (l!=0)*'('
if numeric:
string += repr(v_l[l][1] * sqrt(pi))
else:
string += str(v_l[l][1]) + '*sqrt(pi)'
string += '*erf(sqrt(a)*r)'
if len(v_l[l]) > 2:
string += '-('
for n, coeff in enumerate(v_l[l][2:]):
if n == 0:
string += str(coeff)
else:
string += '+' + str(coeff) + '*(sqrt(a)*r)**%d'%(2*n)
string += ')*sqrt(a)*r*exp(-a*r2)'
if l == 0:
string += '/r'
elif l == 1:
string += ')/r/r2*' + to_string(l, xyzs)
else:
string += ')/r/r2**%d*'%l + to_string(l, xyzs)
return string
#----------------------------- TECHNICAL METHODS -----------------------------
def to_string(l, xyzs, deriv=False, multiply=False):
"""Return string representation of an xyz dictionary"""
if xyzs == {}: return '0'
out = ''
for xyz, coef in xyzs.items():
x, y, z = xyz
r = l - x - y - z - deriv + multiply
one = abs(coef) != 1 or (x == 0 and y == 0 and z == 0 and r == 0)
out += sign(coef) + str(abs(coef)) * one
out += ('*x'*x + '*y'*y + '*z'*z + '*r2'*(r/2))[1 - one:]
if out[0] == '+': out = out[1:]
if len(xyzs) > 1: out = '(' + out + ')'
return out
def sign(x):
"""Return string representation of the sign of x"""
if x >= 0: return '+'
else: return '-'
def txt_sqrt(norm, numeric=False):
if numeric:
return repr(sqrt(norm))
else:
if sqrt(norm) % 1 == 0:
return str(sqrt(norm))
else:
return 'sqrt(' + str(norm.nom) + \
('./' + str(norm.denom)) * (norm.denom != 1) + ')'
class Normalization:
"""Determine normalization factor of spherical harmonic
______________
/ / 2l+1 (l-m)!
| / ---- * ------ , m != 0
| \/ 2 pi (l+m)!
C = < _____
L | / 2l+1
| / ---- , m = 0
\ \/ 4 pi
"""
def __init__(self, l, m):
m = abs(m)
if m == 0:
self.norm = Q(2 * l + 1, 4)
else:
self.norm = Q((2 * l + 1) * factorial(l - m), 2 * factorial(l + m))
def __str__(self):
n = self.norm
sn = sqrt(n)
if int(sn) == sn:
string = repr(sn) + '/sqrt(pi)'
else:
string = 'sqrt(' + repr(n.nom) + \
('./' + repr(n.denom)) * (n.denom != 1) + '/pi)'
return string
def __repr__(self):
return repr(self.__float__())
def __float__(self):
return sqrt(self.norm / pi)
def multiply(self, x):
self.norm *= x**2
def tostring(self, numeric=False):
if numeric:
return self.__repr__()
else:
return self.__str__()
def legendre(l, m):
"""Determine z dependence of spherical harmonic.
Returns vector, where the p'th element is the coefficient of
z^p r^(l-|m|-p).
"""
# Check if requested has already been calculated
if (l, m) in Y_lp[0]:
return Y_lp[0][(l, m)]
m = abs(m)
assert l >= 0 and 0 <= m <=l
result = np.zeros(l - m + 1, 'O')
if l == m == 0:
"""Use that
0
P (z) = 1
0
"""
result[0] = Q(1)
elif l == m:
"""Use the recursion relation
m m-1
P (z) = (2m-1) P (z)
m m-1
"""
result[:] += (2 * m - 1) * legendre(l - 1, m - 1)
elif l == m + 1:
"""Use the recursion relation
l-1 l-1
P (z) = (2l-1)z P (z)
l l-1
"""
result[1:] += (2 * l - 1) * legendre(l-1, l-1)
else:
"""Use the recursion relation
m 2l-1 m l+m-1 2 m
P (z)= ---- z P (z) - ----- r P (z)
l l-m l-1 l-m l-2
"""
result[1:] += np.multiply(legendre(l - 1, m), Q(2 * l - 1, l - m))
result[:(l - 2) - m + 1] -= np.multiply(legendre(l - 2, m),
Q(l + m - 1, l - m))
# Store result in global dictionary
Y_lp[0][(l, m)] = result
return result
def Phi(m):
"""Determine the x and y dependence of the spherical harmonics from
|m| |m|
/ r sin (theta) cos(|m| phi), m >= 0
Phi (phi) = |
m | |m| |m|
\ r sin (theta) sin(|m| phi), m < 0
Returns dictionary of format {(i, j): c} where c is the coefficient
of x^i y^j
"""
# Check if requested has already been calculated
if m in Y_lp[1]:
return Y_lp[1][m]
if m == 0:
xys = {(0, 0): 1} # use that Phi_0 = 1
elif m == 1:
xys = {(1, 0): 1} # use that Phi_1 = x
elif m == -1:
xys = {(0, 1): 1} # use that Phi_-1 = y
else:
"""Use the recurrence formula
m > 0: Phi (x,y) = x Phi (x,y) - y Phi (x,y)
|m| |m|-1 1-|m|
m < 0: Phi (x,y) = y Phi (x,y) + x Phi (x,y)
|m| |m|-1 1-|m|
"""
xys = {}
phi1 = Phi(abs(m) - 1)
phi2 = Phi(1 - abs(m))
for x, y in phi1:
new = (x + (m > 0), y + (m < 0))
xys[new] = xys.get(new, 0) + phi1[(x, y)]
for x,y in phi2:
new = (x + (m < 0), y + (m > 0))
sign = 2 * (m < 0) - 1
xys[new] = xys.get(new, 0) + sign * phi2[(x, y)]
# Store result in global dictionary
Y_lp[1][m] = xys
return xys
def Y_collect(l, m):
"""Collect all necessary parts of spherical harmonic and return in
simplified format.
Return dictionary xyzs has format {(i, j, k): c} where c is the
coefficient of x^i y^j z^k r^(l-|m|-k), or (since i+j = |m|) the
coefficient of x^i y^j z^k r^(l-i-j-k), from which it is clear that all
terms are of power l in x, y and z collectively.
"""
zs = legendre(l, m)
xys = Phi(m)
xyzs = {}
for xy in xys:
if xys[xy] != 0:
for p in range(len(zs)):
if zs[p] != 0:
xyzs[xy + (p,)] = xys[xy] * zs[p]
# get normalization constant and simplify
norm = Normalization(l, m)
norm.multiply(simplify(xyzs))
return norm, xyzs
def Y_collect2(l, m):
"""Same as Y_collect, but collective power of x, y, and z are
adjusted, such the it is always equal to l (thus avoiding
multiplication by r)
"""
norm, p = Y_collect(l, m)
done = False
while not done:
p2 = {}
done = True
for (nx, ny, nz), c in p.items():
n = nx + ny + nz
if n < l:
p2[(nx + 2, ny, nz)] = p2.get((nx + 2, ny, nz), 0) + c
p2[(nx, ny + 2, nz)] = p2.get((nx, ny + 2, nz), 0) + c
p2[(nx, ny, nz + 2)] = p2.get((nx, ny, nz + 2), 0) + c
if n + 2 < l:
done = False
else:
assert n == l
p2[(nx, ny, nz)] = p2.get((nx, ny, nz), 0) + c
p = p2
p2 = p.copy()
for n, c in p.items():
if c == 0:
del p2[n]
return norm, p2
def dYdq(l, m, q):
"""Returns a normalization constant, and a dictionary describing
the functional form of the derivative of r^l Y_l^m(x,y,z) with
respect to x, y or z if q is either 0, 1 or 2 respectively. The
format of the output dictionary is {(i, j, k): c}, where c is the
coefficient of x^i y^j z^k r^(l-i-j-k-1).
"""
norm, xyzs = Y_collect(l, m)
dxyzs = {}
for xyz, coef in xyzs.items():
x, y, z = xyz
r = l - x - y - z
# chain rule: diff coordinate q only
if xyz[q] != 0:
dxyz = list(xyz)
dxyz[q] -= 1
dxyz = tuple(dxyz)
dxyzs[dxyz] = dxyzs.get(dxyz, 0) + xyz[q] * coef
# chain rule: diff coordinate r only
if r != 0:
dxyz = list(xyz)
dxyz[q] += 1
dxyz = tuple(dxyz)
dxyzs[dxyz] = dxyzs.get(dxyz, 0) + r * coef
# remove zeros from list
for dxyz in dxyzs.keys():
if dxyzs[dxyz] == 0: dxyzs.pop(dxyz)
# simplify
if dxyzs != {}: norm.multiply(simplify(dxyzs))
return norm, dxyzs
def simplify(xyzs):
"""Rescale coefficients to smallest integer value"""
norm = Q(1)
numxyz = np.array(xyzs.values())
# up-scale all 'xyz' coefficients to integers
for xyz in numxyz:
numxyz *= xyz.denom
norm /= xyz.denom
# determine least common divisor for 'xyz' coefficients
dmax = 1
num_max = max(abs(np.floor(numxyz)))
for d in range(2, num_max + 1):
test = numxyz / d
if np.alltrue(test == np.floor(test)): dmax = d
# Update simplified dictionary
norm *= dmax
for i, xyz in enumerate(xyzs):
xyzs[xyz] = numxyz[i] / dmax
return norm
def q_times_xyzs(xyzs, q):
"""multiply xyz dictionary by x, y, or z according to q = 0, 1, or 2"""
qxyzs = {}
for xyz, c in xyzs.items():
qxyz = list(xyz)
qxyz[q] += 1
qxyz = tuple(qxyz)
qxyzs[qxyz] = c
return qxyzs
#--------------------- TEST AND CODE CONSTRUCTING METHODS ---------------------
def orthogonal(L1, L2):
"""Perform the integral
2pi pi
/ /
I = | |sin(theta) d(theta) d(phi) Y (theta, phi) * Y (theta, phi)
/ / L1 L2
0 0
which should be a kronecker delta in L1 and L2
"""
I = 0.0
N = 40
for theta in np.arange(0, pi, pi / N):
for phi in np.arange(0, 2 * pi, 2 * pi / N):
x = np.cos(phi) * np.sin(theta)
y = np.sin(phi) * np.sin(theta)
z = np.cos(theta)
r2 = x*x + y*y + z*z
Y1 = eval(Y_to_string(*L_to_lm(L1)))
Y2 = eval(Y_to_string(*L_to_lm(L2)))
I += np.sin(theta) * Y1 * Y2
I *= 2 * (pi / N)**2
return I
def check_orthogonality(Lmax=10):
"""Check orthogonality for all combinations of the first few harmonics"""
all_passed = True
for L1 in range(Lmax+1):
for L2 in range(L1, Lmax+1):
I = orthogonal(L1, L2)
passed = abs(I - (L1 == L2)) < 3e-3
all_passed *= passed
print('L1 = %s, L2 = %s, passed = %s, I = %s' %(L1, L2, passed, I))
if all_passed: print('All tests passed')
else: print('Some tests failed')
def symmetry1(lmax, display=True):
"""Make dictionary of format
diff = {(l1, m1, q1): (nrel, l2, m2, q2)}
indicating that
m1 m2
d Y d Y
l1 l2
------ = nrel * ------
d q1 d q2
"""
diff = {} # diff[(l1, m1, q1)] = (nrel, l2, m2, q2)
unique_L = [] # unique_L[L] = (l, m, q, norm, dxyzs)
for L in range((lmax + 1)**2):
l, m = L_to_lm(L)
for q in range(3):
identical = False
name = (l, m, 'xyz'[q])
norm, dxyzs = dYdq(l, m, q)
for unique in unique_L:
if dxyzs == unique[4]:
diff[name] = (norm.eval() / unique[3],) + unique[0:3]
identical = True
break
if identical == False:
unique_L.append(name + (norm.eval(), dxyzs))
if display:
for key, value in diff.items():
print(str(key) + ' = ' + str(value[0]) + ' * ' + str(value[1:]))
else: return diff
def symmetry2(l, display=True):
"""Make dictionary of format
diff = {(l1, m1, q1): (nrel, l2, m2, q2)}
indicating that
m1 m2
d Y d Y
l1 l2
------ = nrel * ------
d q1 d q2
and
m1 m2
q1 * Y = nrel * q2 * Y
l1 l2
"""
diff = {} # diff[(l1, m1, q1)] = (nrel, l2, m2, q2)
unique_L = [] # unique_L[L] = (l, m, q, dnorm, dxyzs, qnorm, qxyzs)
for m in range(-l, l+1):
for q in range(3):
identical = False
name = (l, m, q)
qnorm, xyzs = Y_collect(l, m)
qxyzs = q_times_xyzs(xyzs, q)
dnorm, dxyzs = dYdq(l, m, q)
for unique in unique_L:
if dxyzs == unique[4] and qxyzs == unique[6]:
dnrel = dnorm.eval() / unique[3]
qnrel = qnorm.eval() / unique[5]
print(dnrel == qnrel)
if dnrel == qnrel:
diff[name] = (dnrel,) + unique[0:3]
identical = True
break
if identical == False:
unique_L.append(name + (dnorm.eval(), dxyzs,
qnorm.eval(), qxyzs))
if display:
for key, value in diff.items():
print(str(key) + ' = ' + str(value[0]) + ' * ' + str(value[1:]))
else: return diff
def construct_spherical_harmonic_c_function(file, lmax, funcname,
multiply=None, deriv=None):
"""Construct a macro for evaluating values of spherical harmonics,
or the derivative of any spherical harmonic with respect to some axis.
The deriv keyword corresponds to that of the Y_to_string function."""
w = file.write
indent = 0
def wn(string=''):
w(2 * indent * ' ')
w(string)
w('\\\n')
wn('#define %s(l, f, x, y, z, r2, p) (' % funcname)
indent = 2
wn('{')
wn(' switch(l)')
wn(' {')
switchindent = 3
indent += switchindent
for l in range(lmax + 1):
wn('case %d:' % l)
indent += 1
for M, m in enumerate(range(-l, l + 1)):
Ystr = Y_to_string(l, m, numeric=True, deriv=deriv)
wn('p[%d] = f * %s;' % (M, Ystr))
wn('break;')
indent -= 1
wn('default:')
wn(' assert(0 == 1);')
indent -= switchindent
wn(' }')
wn('}')
indent = 0
wn(')')
w('\n')
def construct_spherical_harmonic_c_code(filename='spherical_harmonics.h',
lmax=4):
"""Construct macros for evaluating spherical harmonics as well as their
derivatives."""
file = open(filename, 'w')
construct = construct_spherical_harmonic_c_function
construct(file, lmax, 'spherical_harmonics')
for c in range(3):
construct(file, lmax, 'spherical_harmonics_derivative_%s' % 'xyz'[c],
multiply=c, deriv=c)
file.close()
def construct_c_code(file='temp.c', lmax=3):
"""Method for generating the code in c/spline.c"""
txt = '//Computer generated code! Hands off!'
start_func = """
// inserts values of f(r) r^l Y_lm(theta, phi) in elements of input array 'a'
void bmgs_radial3(const bmgsspline* spline, int m,
const int n[3],
const double C[3],
const double h[3],
const double* f, double* a)
{
int l = spline->l;
if (l == 0)
for (int q = 0; q < n[0] * n[1] * n[2]; q++)
a[q] = 0.28209479177387814 * f[q];
"""
start_deriv = """
// insert values of
// d( f(r) * r^l Y_l^m ) d( r^l Y_l^m )
// --------------------- = g(r) q r^l Y_l^m + f(r) --------------
// dq dq
// where q={x, y, z} and g(r) = 1/r*(df/dr)
void bmgs_radiald3(const bmgsspline* spline, int m, int c,
const int n[3],
const double C[3],
const double h[3],
const double* f, const double* g, double* a)
{
int l = spline->l;
"""
start_case = """
{
int q = 0;
double x = C[0];
for (int i0 = 0; i0 < n[0]; i0++)
{
double y = C[1];
for (int i1 = 0; i1 < n[1]; i1++)
{
double z = C[2];
for (int i2 = 0; i2 < n[2]; i2++, q++)
{
"""
end_case = """
z += h[2];
}
y += h[1];
}
x += h[0];
}
}
"""
# insert code for evaluating the function
txt += start_func
for l in range(1, lmax + 1):
txt += ' else if (l == %s)' %l
txt += start_case
case = ''
for m in range(-l, l+1):
if m == -l: case += ' ' * 18 + 'if (m == %s)\n' %m
elif m == l: case += '\n' + ' ' * 18 +'else\n'
else: case += '\n' + ' ' * 18 + 'else if (m == %s)\n' %m
case += ' ' * 20 + 'a[q] = f[q] * '
case += Y_to_string(l,m, numeric=True) + ';'
if 'r2' in case: txt += ' ' * 18 + 'double r2 = x*x+y*y+z*z;\n'
txt += case
txt += end_case
txt += """ else
assert(0 == 1);
}
"""
# insert code for evaluating the derivative
txt += start_deriv
for q in range(3):
txt += ' // ' + 'xyz'[q] + '\n'
for l in range(0, lmax + 1):
if l == 0 and q == 0:
txt += ' if (c == 0 && l == 0)'
else: txt += ' else if (c == %s && l == %s)' %(q, l)
txt += start_case
case = ''
for m in range(-l, l+1):
if m == -l: case += ' ' * 18 + 'if (m == %s)\n' %m
elif m == l: case += '\n' + ' ' * 18 + 'else\n'
else: case += '\n' + ' ' * 18 + 'else if (m == %s)\n' %m
case += ' ' * 20 + 'a[q] = g[q] * '
case += Y_to_string(l, m, multiply=q, numeric=True)
diff = Y_to_string(l, m, deriv=q, numeric=True)
if diff != '0':
case += ' + f[q] * ' + diff
case += ';'
if 'r2' in case: txt += ' ' * 18 + 'double r2 = x*x+y*y+z*z;\n'
txt += case
txt += end_case
txt += """ else
assert(0 == 1);
}
"""
f = open(file, 'w')
print(txt, file=f)
f.close()
def construct_gauss_code(lmax=2):
"""Method for generating the code in gpaw/utilities/gauss.py"""
Lmax = (lmax + 1)**2
out= 'Y_L = [\n'
for L in range(Lmax):
l, m = L_to_lm(L)
out+= ' \'' + Y_to_string(l, m, numeric=True) + '\',\n'
out += ']'
out += '\ngauss_L = [\n'
for L in range(Lmax):
l, m = L_to_lm(L)
out += ' \'' + gauss_to_string(l, m, numeric=True) + '\',\n'
out += ']'
out += '\ngausspot_L = [\n'
for L in range(Lmax):
l, m = L_to_lm(L)
out += ' \'' + gauss_potential_to_string(l, m, numeric=True) + '\',\n'
out += ']'
print(out)
def construct_spherical_code(lmax=3):
"""Method for generating the code in gpaw/spherical_harmonics.py"""
YL = []
norms = []
for L in range((lmax+1)**2):
#norm, xyzs = Y_collect(*L_to_lm(L))
norm, xyzs = Y_collect2(*L_to_lm(L))
norms.append(str(norm))
YL.append(zip(xyzs.values(), xyzs.keys()))
print('Y_L = [')
for L, Y in enumerate(YL):
l = sqrt(L)
if l % 1 == 0:
print(' #' + 'spdfghijklmn'[int(l)] + ':')
print(' %s,' % Y)
print(']')
print('norms =', norms)
if __name__ == '__main__':
construct_spherical_harmonic_c_code()
gpaw-1.3.0-82cebebc037510d876f90d9f8d533fd021f751f5/c/bmgs/spherical_harmonics.h 0000664 0000000 0000000 00000015261 13164413722 0025471 0 ustar 00root root 0000000 0000000 /* Copyright (C) 2003-2007 CAMP
* Copyright (C) 2007-2009 CAMd
* Please see the accompanying LICENSE file for further information. */
#define spherical_harmonics(l, f, x, y, z, r2, p) (\
{\
switch(l)\
{\
case 0:\
p[0] = f * 0.28209479177387814;\
break;\
case 1:\
p[0] = f * 0.48860251190291992 * y;\
p[1] = f * 0.48860251190291992 * z;\
p[2] = f * 0.48860251190291992 * x;\
break;\
case 2:\
p[0] = f * 1.0925484305920792 * x*y;\
p[1] = f * 1.0925484305920792 * y*z;\
p[2] = f * 0.31539156525252005 * (-r2+3*z*z);\
p[3] = f * 1.0925484305920792 * x*z;\
p[4] = f * 0.54627421529603959 * (-y*y+x*x);\
break;\
case 3:\
p[0] = f * 0.59004358992664352 * (-y*y*y+3*x*x*y);\
p[1] = f * 2.8906114426405538 * x*y*z;\
p[2] = f * 0.45704579946446577 * (5*y*z*z-y*r2);\
p[3] = f * 0.3731763325901154 * (-3*z*r2+5*z*z*z);\
p[4] = f * 0.45704579946446577 * (-x*r2+5*x*z*z);\
p[5] = f * 1.4453057213202769 * (-y*y*z+x*x*z);\
p[6] = f * 0.59004358992664352 * (x*x*x-3*x*y*y);\
break;\
case 4:\
p[0] = f * 2.5033429417967046 * (x*x*x*y-x*y*y*y);\
p[1] = f * 1.7701307697799307 * (3*x*x*y*z-y*y*y*z);\
p[2] = f * 0.94617469575756008 * (-x*y*r2+7*x*y*z*z);\
p[3] = f * 0.66904654355728921 * (-3*y*z*r2+7*y*z*z*z);\
p[4] = f * 0.10578554691520431 * (3*r2*r2-30*z*z*r2+35*z*z*z*z);\
p[5] = f * 0.66904654355728921 * (7*x*z*z*z-3*x*z*r2);\
p[6] = f * 0.47308734787878004 * (y*y*r2+7*x*x*z*z-x*x*r2-7*y*y*z*z);\
p[7] = f * 1.7701307697799307 * (x*x*x*z-3*x*y*y*z);\
p[8] = f * 0.62583573544917614 * (-6*x*x*y*y+x*x*x*x+y*y*y*y);\
break;\
default:\
assert(0 == 1);\
}\
}\
)\
#define spherical_harmonics_derivative_x(l, f, x, y, z, r2, p) (\
{\
switch(l)\
{\
case 0:\
p[0] = f * 0;\
break;\
case 1:\
p[0] = f * 0;\
p[1] = f * 0;\
p[2] = f * 0.48860251190291992;\
break;\
case 2:\
p[0] = f * 1.0925484305920792 * y;\
p[1] = f * 0;\
p[2] = f * 0.63078313050504009 * -x;\
p[3] = f * 1.0925484305920792 * z;\
p[4] = f * 1.0925484305920792 * x;\
break;\
case 3:\
p[0] = f * 3.5402615395598613 * x*y;\
p[1] = f * 2.8906114426405538 * y*z;\
p[2] = f * 0.91409159892893155 * -x*y;\
p[3] = f * 2.2390579955406924 * -x*z;\
p[4] = f * 0.45704579946446577 * (-r2-2*x*x+5*z*z);\
p[5] = f * 2.8906114426405538 * x*z;\
p[6] = f * 1.7701307697799307 * (-y*y+x*x);\
break;\
case 4:\
p[0] = f * 2.5033429417967046 * (-y*y*y+3*x*x*y);\
p[1] = f * 10.620784618679583 * x*y*z;\
p[2] = f * 0.94617469575756008 * (7*y*z*z-y*r2-2*x*x*y);\
p[3] = f * 4.0142792613437353 * -x*y*z;\
p[4] = f * 1.2694265629824517 * (x*r2-5*x*z*z);\
p[5] = f * 0.66904654355728921 * (-3*z*r2-6*x*x*z+7*z*z*z);\
p[6] = f * 0.94617469575756008 * (-x*r2-x*x*x+x*y*y+7*x*z*z);\
p[7] = f * 5.3103923093397913 * (-y*y*z+x*x*z);\
p[8] = f * 2.5033429417967046 * (-3*x*y*y+x*x*x);\
break;\
default:\
assert(0 == 1);\
}\
}\
)\
#define spherical_harmonics_derivative_y(l, f, x, y, z, r2, p) (\
{\
switch(l)\
{\
case 0:\
p[0] = f * 0;\
break;\
case 1:\
p[0] = f * 0.48860251190291992;\
p[1] = f * 0;\
p[2] = f * 0;\
break;\
case 2:\
p[0] = f * 1.0925484305920792 * x;\
p[1] = f * 1.0925484305920792 * z;\
p[2] = f * 0.63078313050504009 * -y;\
p[3] = f * 0;\
p[4] = f * 1.0925484305920792 * -y;\
break;\
case 3:\
p[0] = f * 1.7701307697799307 * (-y*y+x*x);\
p[1] = f * 2.8906114426405538 * x*z;\
p[2] = f * 0.45704579946446577 * (-2*y*y-r2+5*z*z);\
p[3] = f * 2.2390579955406924 * -y*z;\
p[4] = f * 0.91409159892893155 * -x*y;\
p[5] = f * 2.8906114426405538 * -y*z;\
p[6] = f * 3.5402615395598613 * -x*y;\
break;\
case 4:\
p[0] = f * 2.5033429417967046 * (x*x*x-3*x*y*y);\
p[1] = f * 5.3103923093397913 * (-y*y*z+x*x*z);\
p[2] = f * 0.94617469575756008 * (-x*r2-2*x*y*y+7*x*z*z);\
p[3] = f * 0.66904654355728921 * (-6*y*y*z-3*z*r2+7*z*z*z);\
p[4] = f * 1.2694265629824517 * (-5*y*z*z+y*r2);\
p[5] = f * 4.0142792613437353 * -x*y*z;\
p[6] = f * 0.94617469575756008 * (y*y*y-7*y*z*z+y*r2-x*x*y);\
p[7] = f * 10.620784618679583 * -x*y*z;\
p[8] = f * 2.5033429417967046 * (y*y*y-3*x*x*y);\
break;\
default:\
assert(0 == 1);\
}\
}\
)\
#define spherical_harmonics_derivative_z(l, f, x, y, z, r2, p) (\
{\
switch(l)\
{\
case 0:\
p[0] = f * 0;\
break;\
case 1:\
p[0] = f * 0;\
p[1] = f * 0.48860251190291992;\
p[2] = f * 0;\
break;\
case 2:\
p[0] = f * 0;\
p[1] = f * 1.0925484305920792 * y;\
p[2] = f * 1.2615662610100802 * z;\
p[3] = f * 1.0925484305920792 * x;\
p[4] = f * 0;\
break;\
case 3:\
p[0] = f * 0;\
p[1] = f * 2.8906114426405538 * x*y;\
p[2] = f * 3.6563663957157262 * y*z;\
p[3] = f * 1.1195289977703462 * (-r2+3*z*z);\
p[4] = f * 3.6563663957157262 * x*z;\
p[5] = f * 1.4453057213202769 * (-y*y+x*x);\
p[6] = f * 0;\
break;\
case 4:\
p[0] = f * 0;\
p[1] = f * 1.7701307697799307 * (-y*y*y+3*x*x*y);\
p[2] = f * 11.354096349090721 * x*y*z;\
p[3] = f * 2.0071396306718676 * (5*y*z*z-y*r2);\
p[4] = f * 1.6925687506432689 * (-3*z*r2+5*z*z*z);\
p[5] = f * 2.0071396306718676 * (-x*r2+5*x*z*z);\
p[6] = f * 5.6770481745453605 * (-y*y*z+x*x*z);\
p[7] = f * 1.7701307697799307 * (x*x*x-3*x*y*y);\
p[8] = f * 0;\
break;\
default:\
assert(0 == 1);\
}\
}\
)\
gpaw-1.3.0-82cebebc037510d876f90d9f8d533fd021f751f5/c/bmgs/spline.c 0000664 0000000 0000000 00000060042 13164413722 0022736 0 ustar 00root root 0000000 0000000 /* Copyright (C) 2003-2007 CAMP
* Copyright (C) 2007-2008 CAMd
* Please see the accompanying LICENSE file for further information. */
#include
#include
#include
#include "bmgs.h"
bmgsspline bmgs_spline(int l, double dr, int nbins, double* f)
{
double c = 3.0 / (dr * dr);
double* f2 = (double*)malloc((nbins + 1) * sizeof(double));
assert(f2 != NULL);
double* u = (double*)malloc(nbins * sizeof(double));
assert(u != NULL);
f2[0] = -0.5;
u[0] = (f[1] - f[0]) * c;
for (int b = 1; b < nbins; b++)
{
double p = 0.5 * f2[b - 1] + 2.0;
f2[b] = -0.5 / p;
u[b] = ((f[b + 1] - 2.0 * f[b] + f[b - 1]) * c - 0.5 * u[b - 1]) / p;
}
f2[nbins] = ((f[nbins - 1] * c - 0.5 * u[nbins - 1]) /
(0.5 * f2[nbins - 1] + 1.0));
for (int b = nbins - 1; b >= 0; b--)
f2[b] = f2[b] * f2[b + 1] + u[b];
double* data = (double*)malloc(4 * (nbins + 1) * sizeof(double));
assert(data != NULL);
bmgsspline spline = {l, dr, nbins, data};
for (int b = 0; b < nbins; b++)
{
*data++ = f[b];
*data++ = (f[b + 1] - f[b]) / dr - (f2[b] / 3 + f2[b + 1] / 6) * dr;
*data++ = 0.5 * f2[b];
*data++ = (f2[b + 1] - f2[b]) / (6 * dr);
}
data[0] = 0.0;
data[1] = 0.0;
data[2] = 0.0;
data[3] = 0.0;
free(u);
free(f2);
return spline;
}
double bmgs_splinevalue(const bmgsspline* spline, double r)
{
int b = r / spline->dr;
if (b >= spline->nbins)
return 0.0;
double u = r - b * spline->dr;
double* s = spline->data + 4 * b;
return s[0] + u * (s[1] + u * (s[2] + u * s[3]));
}
void bmgs_get_value_and_derivative(const bmgsspline* spline, double r,
double *f, double *dfdr)
{
int b = r / spline->dr;
if (b >= spline->nbins)
{
*f = 0.0;
*dfdr = 0.0;
return;
}
double u = r - b * spline->dr;
double* s = spline->data + 4 * b;
*f = s[0] + u * (s[1] + u * (s[2] + u * s[3]));
*dfdr = s[1] + u * (2.0 * s[2] + u * 3.0 * s[3]);
}
void bmgs_deletespline(bmgsspline* spline)
{
free(spline->data);
}
void bmgs_radial1(const bmgsspline* spline,
const int n[3], const double C[3],
const double h[3],
int* b, double* d)
{
int nbins = spline->nbins;
double dr = spline->dr;
double x = C[0];
for (int i0 = 0; i0 < n[0]; i0++)
{
double xx = x * x;
double y = C[1];
for (int i1 = 0; i1 < n[1]; i1++)
{
double xxpyy = xx + y * y;
double z = C[2];
for (int i2 = 0; i2 < n[2]; i2++)
{
double r = sqrt(xxpyy + z * z);
int j = r / dr;
if (j < nbins)
{
*b++ = j;
*d++ = r - j * dr;
}
else
{
*b++ = nbins;
*d++ = 0.0;
}
z += h[2];
}
y += h[1];
}
x += h[0];
}
}
void bmgs_radial2(const bmgsspline* spline, const int n[3],
const int* b, const double* d,
double* f, double* g)
{
double dr = spline->dr;
for (int q = 0; q < n[0] * n[1] * n[2]; q++)
{
int j = b[q];
const double* s = spline->data + 4 * j;
double u = d[q];
f[q] = s[0] + u * (s[1] + u * (s[2] + u * s[3]));
if (g != 0)
{
if (j == 0)
g[q] = 2.0 * s[2] + u * 3.0 * s[3];
else
g[q] = (s[1] + u * (2.0 * s[2] + u * 3.0 * s[3])) / (j * dr + u);
}
}
}
//Computer generated code! Hands off!
// inserts values of f(r) r^l Y_lm(theta, phi) in elements of input array 'a'
void bmgs_radial3(const bmgsspline* spline, int m,
const int n[3],
const double C[3],
const double h[3],
const double* f, double* a)
{
int l = spline->l;
if (l == 0)
for (int q = 0; q < n[0] * n[1] * n[2]; q++)
a[q] = 0.28209479177387814 * f[q];
else if (l == 1)
{
int q = 0;
double x = C[0];
for (int i0 = 0; i0 < n[0]; i0++)
{
double y = C[1];
for (int i1 = 0; i1 < n[1]; i1++)
{
double z = C[2];
for (int i2 = 0; i2 < n[2]; i2++, q++)
{
if (m == -1)
a[q] = f[q] * 0.48860251190291992 * y;
else if (m == 0)
a[q] = f[q] * 0.48860251190291992 * z;
else
a[q] = f[q] * 0.48860251190291992 * x;
z += h[2];
}
y += h[1];
}
x += h[0];
}
}
else if (l == 2)
{
int q = 0;
double x = C[0];
for (int i0 = 0; i0 < n[0]; i0++)
{
double y = C[1];
for (int i1 = 0; i1 < n[1]; i1++)
{
double z = C[2];
for (int i2 = 0; i2 < n[2]; i2++, q++)
{
double r2 = x*x+y*y+z*z;
if (m == -2)
a[q] = f[q] * 1.0925484305920792 * x*y;
else if (m == -1)
a[q] = f[q] * 1.0925484305920792 * y*z;
else if (m == 0)
a[q] = f[q] * 0.31539156525252005 * (3*z*z-r2);
else if (m == 1)
a[q] = f[q] * 1.0925484305920792 * x*z;
else
a[q] = f[q] * 0.54627421529603959 * (x*x-y*y);
z += h[2];
}
y += h[1];
}
x += h[0];
}
}
else if (l == 3)
{
int q = 0;
double x = C[0];
for (int i0 = 0; i0 < n[0]; i0++)
{
double y = C[1];
for (int i1 = 0; i1 < n[1]; i1++)
{
double z = C[2];
for (int i2 = 0; i2 < n[2]; i2++, q++)
{
double r2 = x*x+y*y+z*z;
if (m == -3)
a[q] = f[q] * 0.59004358992664352 * (-y*y*y+3*x*x*y);
else if (m == -2)
a[q] = f[q] * 2.8906114426405538 * x*y*z;
else if (m == -1)
a[q] = f[q] * 0.45704579946446577 * (-y*r2+5*y*z*z);
else if (m == 0)
a[q] = f[q] * 0.3731763325901154 * (5*z*z*z-3*z*r2);
else if (m == 1)
a[q] = f[q] * 0.45704579946446577 * (5*x*z*z-x*r2);
else if (m == 2)
a[q] = f[q] * 1.4453057213202769 * (x*x*z-y*y*z);
else
a[q] = f[q] * 0.59004358992664352 * (x*x*x-3*x*y*y);
z += h[2];
}
y += h[1];
}
x += h[0];
}
}
else if (l == 4)
{
int q = 0;
double x = C[0];
for (int i0 = 0; i0 < n[0]; i0++)
{
double y = C[1];
for (int i1 = 0; i1 < n[1]; i1++)
{
double z = C[2];
for (int i2 = 0; i2 < n[2]; i2++, q++)
{
double r2 = x*x+y*y+z*z;
if (m == -4)
a[q] = f[q] * 2.5033429417967046 * (x*x*x*y-x*y*y*y);
else if (m == -3)
a[q] = f[q] * 1.7701307697799307 * (-y*y*y*z+3*x*x*y*z);
else if (m == -2)
a[q] = f[q] * 0.94617469575756008 * (-x*y*r2+7*x*y*z*z);
else if (m == -1)
a[q] = f[q] * 0.66904654355728921 * (-3*y*z*r2+7*y*z*z*z);
else if (m == 0)
a[q] = f[q] * 0.10578554691520431 * (-30*z*z*r2+3*r2*r2+35*z*z*z*z);
else if (m == 1)
a[q] = f[q] * 0.66904654355728921 * (7*x*z*z*z-3*x*z*r2);
else if (m == 2)
a[q] = f[q] * 0.47308734787878004 * (-x*x*r2+7*x*x*z*z+y*y*r2-7*y*y*z*z);
else if (m == 3)
a[q] = f[q] * 1.7701307697799307 * (x*x*x*z-3*x*y*y*z);
else
a[q] = f[q] * 0.62583573544917614 * (-6*x*x*y*y+x*x*x*x+y*y*y*y);
z += h[2];
}
y += h[1];
}
x += h[0];
}
}
else
assert(0 == 1);
}
// insert values of
// d( f(r) * r^l Y_l^m ) d( r^l Y_l^m )
// --------------------- = g(r) q r^l Y_l^m + f(r) --------------
// dq dq
// where q={x, y, z} and g(r) = 1/r*(df/dr)
void bmgs_radiald3(const bmgsspline* spline, int m, int c,
const int n[3],
const double C[3],
const double h[3],
const double* f, const double* g, double* a)
{
int l = spline->l;
// x
if (c == 0 && l == 0)
{
int q = 0;
double x = C[0];
for (int i0 = 0; i0 < n[0]; i0++)
{
double y = C[1];
for (int i1 = 0; i1 < n[1]; i1++)
{
double z = C[2];
for (int i2 = 0; i2 < n[2]; i2++, q++)
{
if (m == 0)
a[q] = g[q] * 0.28209479177387814 * x;
z += h[2];
}
y += h[1];
}
x += h[0];
}
}
else if (c == 0 && l == 1)
{
int q = 0;
double x = C[0];
for (int i0 = 0; i0 < n[0]; i0++)
{
double y = C[1];
for (int i1 = 0; i1 < n[1]; i1++)
{
double z = C[2];
for (int i2 = 0; i2 < n[2]; i2++, q++)
{
if (m == -1)
a[q] = g[q] * 0.48860251190291992 * x*y;
else if (m == 0)
a[q] = g[q] * 0.48860251190291992 * x*z;
else
a[q] = g[q] * 0.48860251190291992 * x*x + f[q] * 0.48860251190291992;
z += h[2];
}
y += h[1];
}
x += h[0];
}
}
else if (c == 0 && l == 2)
{
int q = 0;
double x = C[0];
for (int i0 = 0; i0 < n[0]; i0++)
{
double y = C[1];
for (int i1 = 0; i1 < n[1]; i1++)
{
double z = C[2];
for (int i2 = 0; i2 < n[2]; i2++, q++)
{
double r2 = x*x+y*y+z*z;
if (m == -2)
a[q] = g[q] * 1.0925484305920792 * x*x*y + f[q] * 1.0925484305920792 * y;
else if (m == -1)
a[q] = g[q] * 1.0925484305920792 * x*y*z;
else if (m == 0)
a[q] = g[q] * 0.31539156525252005 * (3*x*z*z-x*r2) + f[q] * 0.63078313050504009 * -x;
else if (m == 1)
a[q] = g[q] * 1.0925484305920792 * x*x*z + f[q] * 1.0925484305920792 * z;
else
a[q] = g[q] * 0.54627421529603959 * (x*x*x-x*y*y) + f[q] * 1.0925484305920792 * x;
z += h[2];
}
y += h[1];
}
x += h[0];
}
}
else if (c == 0 && l == 3)
{
int q = 0;
double x = C[0];
for (int i0 = 0; i0 < n[0]; i0++)
{
double y = C[1];
for (int i1 = 0; i1 < n[1]; i1++)
{
double z = C[2];
for (int i2 = 0; i2 < n[2]; i2++, q++)
{
double r2 = x*x+y*y+z*z;
if (m == -3)
a[q] = g[q] * 0.59004358992664352 * (3*x*x*x*y-x*y*y*y) + f[q] * 3.5402615395598613 * x*y;
else if (m == -2)
a[q] = g[q] * 2.8906114426405538 * x*x*y*z + f[q] * 2.8906114426405538 * y*z;
else if (m == -1)
a[q] = g[q] * 0.45704579946446577 * (-x*y*r2+5*x*y*z*z) + f[q] * 0.91409159892893155 * -x*y;
else if (m == 0)
a[q] = g[q] * 0.3731763325901154 * (5*x*z*z*z-3*x*z*r2) + f[q] * 2.2390579955406924 * -x*z;
else if (m == 1)
a[q] = g[q] * 0.45704579946446577 * (-x*x*r2+5*x*x*z*z) + f[q] * 0.45704579946446577 * (5*z*z-r2-2*x*x);
else if (m == 2)
a[q] = g[q] * 1.4453057213202769 * (x*x*x*z-x*y*y*z) + f[q] * 2.8906114426405538 * x*z;
else
a[q] = g[q] * 0.59004358992664352 * (-3*x*x*y*y+x*x*x*x) + f[q] * 1.7701307697799307 * (x*x-y*y);
z += h[2];
}
y += h[1];
}
x += h[0];
}
}
else if (c == 0 && l == 4)
{
int q = 0;
double x = C[0];
for (int i0 = 0; i0 < n[0]; i0++)
{
double y = C[1];
for (int i1 = 0; i1 < n[1]; i1++)
{
double z = C[2];
for (int i2 = 0; i2 < n[2]; i2++, q++)
{
double r2 = x*x+y*y+z*z;
if (m == -4)
a[q] = g[q] * 2.5033429417967046 * (-x*x*y*y*y+x*x*x*x*y) + f[q] * 2.5033429417967046 * (-y*y*y+3*x*x*y);
else if (m == -3)
a[q] = g[q] * 1.7701307697799307 * (-x*y*y*y*z+3*x*x*x*y*z) + f[q] * 10.620784618679583 * x*y*z;
else if (m == -2)
a[q] = g[q] * 0.94617469575756008 * (7*x*x*y*z*z-x*x*y*r2) + f[q] * 0.94617469575756008 * (-y*r2+7*y*z*z-2*x*x*y);
else if (m == -1)
a[q] = g[q] * 0.66904654355728921 * (-3*x*y*z*r2+7*x*y*z*z*z) + f[q] * 4.0142792613437353 * -x*y*z;
else if (m == 0)
a[q] = g[q] * 0.10578554691520431 * (-30*x*z*z*r2+3*x*r2*r2+35*x*z*z*z*z) + f[q] * 1.2694265629824517 * (-5*x*z*z+x*r2);
else if (m == 1)
a[q] = g[q] * 0.66904654355728921 * (-3*x*x*z*r2+7*x*x*z*z*z) + f[q] * 0.66904654355728921 * (7*z*z*z-6*x*x*z-3*z*r2);
else if (m == 2)
a[q] = g[q] * 0.47308734787878004 * (-x*x*x*r2+x*y*y*r2+7*x*x*x*z*z-7*x*y*y*z*z) + f[q] * 0.94617469575756008 * (-x*x*x+7*x*z*z-x*r2+x*y*y);
else if (m == 3)
a[q] = g[q] * 1.7701307697799307 * (-3*x*x*y*y*z+x*x*x*x*z) + f[q] * 5.3103923093397913 * (x*x*z-y*y*z);
else
a[q] = g[q] * 0.62583573544917614 * (-6*x*x*x*y*y+x*y*y*y*y+x*x*x*x*x) + f[q] * 2.5033429417967046 * (-3*x*y*y+x*x*x);
z += h[2];
}
y += h[1];
}
x += h[0];
}
}
// y
else if (c == 1 && l == 0)
{
int q = 0;
double x = C[0];
for (int i0 = 0; i0 < n[0]; i0++)
{
double y = C[1];
for (int i1 = 0; i1 < n[1]; i1++)
{
double z = C[2];
for (int i2 = 0; i2 < n[2]; i2++, q++)
{
if (m == 0)
a[q] = g[q] * 0.28209479177387814 * y;
z += h[2];
}
y += h[1];
}
x += h[0];
}
}
else if (c == 1 && l == 1)
{
int q = 0;
double x = C[0];
for (int i0 = 0; i0 < n[0]; i0++)
{
double y = C[1];
for (int i1 = 0; i1 < n[1]; i1++)
{
double z = C[2];
for (int i2 = 0; i2 < n[2]; i2++, q++)
{
if (m == -1)
a[q] = g[q] * 0.48860251190291992 * y*y + f[q] * 0.48860251190291992;
else if (m == 0)
a[q] = g[q] * 0.48860251190291992 * y*z;
else
a[q] = g[q] * 0.48860251190291992 * x*y;
z += h[2];
}
y += h[1];
}
x += h[0];
}
}
else if (c == 1 && l == 2)
{
int q = 0;
double x = C[0];
for (int i0 = 0; i0 < n[0]; i0++)
{
double y = C[1];
for (int i1 = 0; i1 < n[1]; i1++)
{
double z = C[2];
for (int i2 = 0; i2 < n[2]; i2++, q++)
{
double r2 = x*x+y*y+z*z;
if (m == -2)
a[q] = g[q] * 1.0925484305920792 * x*y*y + f[q] * 1.0925484305920792 * x;
else if (m == -1)
a[q] = g[q] * 1.0925484305920792 * y*y*z + f[q] * 1.0925484305920792 * z;
else if (m == 0)
a[q] = g[q] * 0.31539156525252005 * (-y*r2+3*y*z*z) + f[q] * 0.63078313050504009 * -y;
else if (m == 1)
a[q] = g[q] * 1.0925484305920792 * x*y*z;
else
a[q] = g[q] * 0.54627421529603959 * (-y*y*y+x*x*y) + f[q] * 1.0925484305920792 * -y;
z += h[2];
}
y += h[1];
}
x += h[0];
}
}
else if (c == 1 && l == 3)
{
int q = 0;
double x = C[0];
for (int i0 = 0; i0 < n[0]; i0++)
{
double y = C[1];
for (int i1 = 0; i1 < n[1]; i1++)
{
double z = C[2];
for (int i2 = 0; i2 < n[2]; i2++, q++)
{
double r2 = x*x+y*y+z*z;
if (m == -3)
a[q] = g[q] * 0.59004358992664352 * (3*x*x*y*y-y*y*y*y) + f[q] * 1.7701307697799307 * (x*x-y*y);
else if (m == -2)
a[q] = g[q] * 2.8906114426405538 * x*y*y*z + f[q] * 2.8906114426405538 * x*z;
else if (m == -1)
a[q] = g[q] * 0.45704579946446577 * (-y*y*r2+5*y*y*z*z) + f[q] * 0.45704579946446577 * (5*z*z-r2-2*y*y);
else if (m == 0)
a[q] = g[q] * 0.3731763325901154 * (-3*y*z*r2+5*y*z*z*z) + f[q] * 2.2390579955406924 * -y*z;
else if (m == 1)
a[q] = g[q] * 0.45704579946446577 * (-x*y*r2+5*x*y*z*z) + f[q] * 0.91409159892893155 * -x*y;
else if (m == 2)
a[q] = g[q] * 1.4453057213202769 * (-y*y*y*z+x*x*y*z) + f[q] * 2.8906114426405538 * -y*z;
else
a[q] = g[q] * 0.59004358992664352 * (x*x*x*y-3*x*y*y*y) + f[q] * 3.5402615395598613 * -x*y;
z += h[2];
}
y += h[1];
}
x += h[0];
}
}
else if (c == 1 && l == 4)
{
int q = 0;
double x = C[0];
for (int i0 = 0; i0 < n[0]; i0++)
{
double y = C[1];
for (int i1 = 0; i1 < n[1]; i1++)
{
double z = C[2];
for (int i2 = 0; i2 < n[2]; i2++, q++)
{
double r2 = x*x+y*y+z*z;
if (m == -4)
a[q] = g[q] * 2.5033429417967046 * (x*x*x*y*y-x*y*y*y*y) + f[q] * 2.5033429417967046 * (x*x*x-3*x*y*y);
else if (m == -3)
a[q] = g[q] * 1.7701307697799307 * (3*x*x*y*y*z-y*y*y*y*z) + f[q] * 5.3103923093397913 * (x*x*z-y*y*z);
else if (m == -2)
a[q] = g[q] * 0.94617469575756008 * (-x*y*y*r2+7*x*y*y*z*z) + f[q] * 0.94617469575756008 * (-2*x*y*y+7*x*z*z-x*r2);
else if (m == -1)
a[q] = g[q] * 0.66904654355728921 * (-3*y*y*z*r2+7*y*y*z*z*z) + f[q] * 0.66904654355728921 * (7*z*z*z-3*z*r2-6*y*y*z);
else if (m == 0)
a[q] = g[q] * 0.10578554691520431 * (3*y*r2*r2-30*y*z*z*r2+35*y*z*z*z*z) + f[q] * 1.2694265629824517 * (y*r2-5*y*z*z);
else if (m == 1)
a[q] = g[q] * 0.66904654355728921 * (-3*x*y*z*r2+7*x*y*z*z*z) + f[q] * 4.0142792613437353 * -x*y*z;
else if (m == 2)
a[q] = g[q] * 0.47308734787878004 * (-7*y*y*y*z*z+y*y*y*r2+7*x*x*y*z*z-x*x*y*r2) + f[q] * 0.94617469575756008 * (y*r2+y*y*y-7*y*z*z-x*x*y);
else if (m == 3)
a[q] = g[q] * 1.7701307697799307 * (x*x*x*y*z-3*x*y*y*y*z) + f[q] * 10.620784618679583 * -x*y*z;
else
a[q] = g[q] * 0.62583573544917614 * (x*x*x*x*y-6*x*x*y*y*y+y*y*y*y*y) + f[q] * 2.5033429417967046 * (y*y*y-3*x*x*y);
z += h[2];
}
y += h[1];
}
x += h[0];
}
}
// z
else if (c == 2 && l == 0)
{
int q = 0;
double x = C[0];
for (int i0 = 0; i0 < n[0]; i0++)
{
double y = C[1];
for (int i1 = 0; i1 < n[1]; i1++)
{
double z = C[2];
for (int i2 = 0; i2 < n[2]; i2++, q++)
{
if (m == 0)
a[q] = g[q] * 0.28209479177387814 * z;
z += h[2];
}
y += h[1];
}
x += h[0];
}
}
else if (c == 2 && l == 1)
{
int q = 0;
double x = C[0];
for (int i0 = 0; i0 < n[0]; i0++)
{
double y = C[1];
for (int i1 = 0; i1 < n[1]; i1++)
{
double z = C[2];
for (int i2 = 0; i2 < n[2]; i2++, q++)
{
if (m == -1)
a[q] = g[q] * 0.48860251190291992 * y*z;
else if (m == 0)
a[q] = g[q] * 0.48860251190291992 * z*z + f[q] * 0.48860251190291992;
else
a[q] = g[q] * 0.48860251190291992 * x*z;
z += h[2];
}
y += h[1];
}
x += h[0];
}
}
else if (c == 2 && l == 2)
{
int q = 0;
double x = C[0];
for (int i0 = 0; i0 < n[0]; i0++)
{
double y = C[1];
for (int i1 = 0; i1 < n[1]; i1++)
{
double z = C[2];
for (int i2 = 0; i2 < n[2]; i2++, q++)
{
double r2 = x*x+y*y+z*z;
if (m == -2)
a[q] = g[q] * 1.0925484305920792 * x*y*z;
else if (m == -1)
a[q] = g[q] * 1.0925484305920792 * y*z*z + f[q] * 1.0925484305920792 * y;
else if (m == 0)
a[q] = g[q] * 0.31539156525252005 * (3*z*z*z-z*r2) + f[q] * 1.2615662610100802 * z;
else if (m == 1)
a[q] = g[q] * 1.0925484305920792 * x*z*z + f[q] * 1.0925484305920792 * x;
else
a[q] = g[q] * 0.54627421529603959 * (x*x*z-y*y*z);
z += h[2];
}
y += h[1];
}
x += h[0];
}
}
else if (c == 2 && l == 3)
{
int q = 0;
double x = C[0];
for (int i0 = 0; i0 < n[0]; i0++)
{
double y = C[1];
for (int i1 = 0; i1 < n[1]; i1++)
{
double z = C[2];
for (int i2 = 0; i2 < n[2]; i2++, q++)
{
double r2 = x*x+y*y+z*z;
if (m == -3)
a[q] = g[q] * 0.59004358992664352 * (-y*y*y*z+3*x*x*y*z);
else if (m == -2)
a[q] = g[q] * 2.8906114426405538 * x*y*z*z + f[q] * 2.8906114426405538 * x*y;
else if (m == -1)
a[q] = g[q] * 0.45704579946446577 * (-y*z*r2+5*y*z*z*z) + f[q] * 3.6563663957157262 * y*z;
else if (m == 0)
a[q] = g[q] * 0.3731763325901154 * (-3*z*z*r2+5*z*z*z*z) + f[q] * 1.1195289977703462 * (3*z*z-r2);
else if (m == 1)
a[q] = g[q] * 0.45704579946446577 * (5*x*z*z*z-x*z*r2) + f[q] * 3.6563663957157262 * x*z;
else if (m == 2)
a[q] = g[q] * 1.4453057213202769 * (x*x*z*z-y*y*z*z) + f[q] * 1.4453057213202769 * (x*x-y*y);
else
a[q] = g[q] * 0.59004358992664352 * (x*x*x*z-3*x*y*y*z);
z += h[2];
}
y += h[1];
}
x += h[0];
}
}
else if (c == 2 && l == 4)
{
int q = 0;
double x = C[0];
for (int i0 = 0; i0 < n[0]; i0++)
{
double y = C[1];
for (int i1 = 0; i1 < n[1]; i1++)
{
double z = C[2];
for (int i2 = 0; i2 < n[2]; i2++, q++)
{
double r2 = x*x+y*y+z*z;
if (m == -4)
a[q] = g[q] * 2.5033429417967046 * (x*x*x*y*z-x*y*y*y*z);
else if (m == -3)
a[q] = g[q] * 1.7701307697799307 * (-y*y*y*z*z+3*x*x*y*z*z) + f[q] * 1.7701307697799307 * (-y*y*y+3*x*x*y);
else if (m == -2)
a[q] = g[q] * 0.94617469575756008 * (-x*y*z*r2+7*x*y*z*z*z) + f[q] * 11.354096349090721 * x*y*z;
else if (m == -1)
a[q] = g[q] * 0.66904654355728921 * (-3*y*z*z*r2+7*y*z*z*z*z) + f[q] * 2.0071396306718676 * (-y*r2+5*y*z*z);
else if (m == 0)
a[q] = g[q] * 0.10578554691520431 * (-30*z*z*z*r2+3*z*r2*r2+35*z*z*z*z*z) + f[q] * 1.6925687506432689 * (5*z*z*z-3*z*r2);
else if (m == 1)
a[q] = g[q] * 0.66904654355728921 * (-3*x*z*z*r2+7*x*z*z*z*z) + f[q] * 2.0071396306718676 * (5*x*z*z-x*r2);
else if (m == 2)
a[q] = g[q] * 0.47308734787878004 * (-x*x*z*r2+7*x*x*z*z*z+y*y*z*r2-7*y*y*z*z*z) + f[q] * 5.6770481745453605 * (x*x*z-y*y*z);
else if (m == 3)
a[q] = g[q] * 1.7701307697799307 * (x*x*x*z*z-3*x*y*y*z*z) + f[q] * 1.7701307697799307 * (x*x*x-3*x*y*y);
else
a[q] = g[q] * 0.62583573544917614 * (x*x*x*x*z-6*x*x*y*y*z+y*y*y*y*z);
z += h[2];
}
y += h[1];
}
x += h[0];
}
}
else
assert(0 == 1);
}
gpaw-1.3.0-82cebebc037510d876f90d9f8d533fd021f751f5/c/bmgs/stencils.c 0000664 0000000 0000000 00000011555 13164413722 0023275 0 ustar 00root root 0000000 0000000 /* Copyright (C) 2003-2007 CAMP
* Copyright (C) 2005 CSC - IT Center for Science Ltd.
* Please see the accompanying LICENSE file for further information. */
#include
#include "bmgs.h"
// Expansion coefficients for finite difference Laplacian. The numbers are
// from J. R. Chelikowsky et al., Phys. Rev. B 50, 11355 (1994):
bmgsstencil bmgs_stencil(int ncoefs, const double* coefs, const long* offsets,
int r, const long n[3])
{
bmgsstencil stencil =
{ncoefs,
(double*)malloc(ncoefs * sizeof(double)),
(long*)malloc(ncoefs * sizeof(long)),
{n[0], n[1], n[2]},
{2 * r * (n[2] + 2 * r) * (n[1] + 2 * r),
2 * r * (n[2] + 2 * r),
2 * r}};
assert((stencil.coefs != NULL) && (stencil.offsets != NULL));
memcpy(stencil.coefs, coefs, ncoefs * sizeof(double));
memcpy(stencil.offsets, offsets, ncoefs * sizeof(long));
return stencil;
}
static const double laplace[4][5] =
{{-2.0, 1.0, 0.0, 0.0, 0.0},
{-5.0/2.0, 4.0/3.0, -1.0/12.0, 0.0, 0.0},
{-49.0/18.0, 3.0/2.0, -3.0/20.0, 1.0/90.0, 0.0},
{-205.0/72.0, 8.0/5.0, -1.0/5.0, 8.0/315.0, -1.0/560.0}};
bmgsstencil bmgs_laplace(int k, double scale,
const double h[3],
const long n[3])
{
int ncoefs = 3 * k - 2;
double* coefs = (double*)malloc(ncoefs * sizeof(double));
long* offsets = (long*)malloc(ncoefs * sizeof(long));
assert((coefs != NULL) && (offsets != NULL));
double f1 = 1.0 / (h[0] * h[0]);
double f2 = 1.0 / (h[1] * h[1]);
double f3 = 1.0 / (h[2] * h[2]);
int r = (k - 1) / 2; // range
double s[3] = {(n[2] + 2 * r) * (n[1] + 2 * r), n[2] + 2 * r, 1};
int m = 0;
for (int j = 1; j <= r; j++)
{
double c = scale * laplace[r - 1][j];
coefs[m] = c * f1; offsets[m++] = -j * s[0];
coefs[m] = c * f1; offsets[m++] = +j * s[0];
coefs[m] = c * f2; offsets[m++] = -j * s[1];
coefs[m] = c * f2; offsets[m++] = +j * s[1];
coefs[m] = c * f3; offsets[m++] = -j;
coefs[m] = c * f3; offsets[m++] = +j;
}
double c = scale * laplace[r - 1][0];
coefs[m] = c * (f1 + f2 + f3); offsets[m] = 0;
bmgsstencil stencil =
{ncoefs, coefs, offsets,
{n[0], n[1], n[2]},
{2 * r * (n[2] + 2 * r) * (n[1] + 2 * r),
2 * r * (n[2] + 2 * r),
2 * r}};
return stencil;
}
bmgsstencil bmgs_mslaplaceA(double scale,
const double h[3],
const long n[3])
{
int ncoefs = 19;
double* coefs = (double*)malloc(ncoefs * sizeof(double));
long* offsets = (long*)malloc(ncoefs * sizeof(long));
assert((coefs != NULL) && (offsets != NULL));
double e[3] = {-scale / (12.0 * h[0] * h[0]),
-scale / (12.0 * h[1] * h[1]),
-scale / (12.0 * h[2] * h[2])};
double f = -16.0 * (e[0] + e[1] + e[2]);
double g[3] = {10.0 * e[0] + 0.125 * f,
10.0 * e[1] + 0.125 * f,
10.0 * e[2] + 0.125 * f};
double s[3] = {(n[2] + 2) * (n[1] + 2), n[2] + 2, 1};
int m = 0;
coefs[m] = f;
offsets[m++] = 0;
for (int j = -1; j <= 1; j += 2)
{
coefs[m] = g[0];
offsets[m++] = j * s[0];
coefs[m] = g[1];
offsets[m++] = j * s[1];
coefs[m] = g[2];
offsets[m++] = j * s[2];
}
for (int j = -1; j <= 1; j += 2)
for (int k = -1; j <= 1; j += 2)
{
coefs[m] = e[1] + e[2];
offsets[m++] = -j * s[1] - k * s[2];
coefs[m] = e[0] + e[2];
offsets[m++] = -j * s[0] - k * s[2];
coefs[m] = e[0] + e[1];
offsets[m++] = -j * s[0] - k * s[1];
}
bmgsstencil stencil =
{ncoefs, coefs, offsets,
{n[0], n[1], n[2]},
{2 * s[0], 2 * s[1], 2}};
return stencil;
}
bmgsstencil bmgs_mslaplaceB(const long n[3])
{
int ncoefs = 7;
double* coefs = (double*)malloc(ncoefs * sizeof(double));
long* offsets = (long*)malloc(ncoefs * sizeof(long));
assert((coefs != NULL) && (offsets != NULL));
double s[3] = {(n[2] + 2) * (n[1] + 2), n[2] + 2, 1};
int k = 0;
coefs[k] = 0.5;
offsets[k++] = 0;
for (int j = -1; j <= 1; j += 2)
{
coefs[k] = 1.0 / 12.0;
offsets[k++] = j * s[0];
coefs[k] = 1.0 / 12.0;
offsets[k++] = j * s[1];
coefs[k] = 1.0 / 12.0;
offsets[k++] = j * s[2];
}
bmgsstencil stencil =
{ncoefs, coefs, offsets,
{n[0], n[1], n[2]},
{2 * s[0], 2 * s[1], 2}};
return stencil;
}
bmgsstencil bmgs_gradient(int k, int i, double h,
const long n[3])
{
int ncoefs = k - 1;
double* coefs = (double*)malloc(ncoefs * sizeof(double));
long* offsets = (long*)malloc(ncoefs * sizeof(long));
assert((coefs != NULL) && (offsets != NULL));
int r = 1;
double s[3] = {(n[2] + 2 * r) * (n[1] + 2 * r), n[2] + 2 * r, 1};
double c = 0.5 / h;
coefs[0] = +c; offsets[0] = +s[i];
coefs[1] = -c; offsets[1] = -s[i];
bmgsstencil stencil =
{ncoefs, coefs, offsets,
{n[0], n[1], n[2]},
{2 * r * s[0], 2 * r * s[1], 2 * r}};
return stencil;
}
void bmgs_deletestencil(bmgsstencil* stencil)
{
free(stencil->coefs);
free(stencil->offsets);
}
gpaw-1.3.0-82cebebc037510d876f90d9f8d533fd021f751f5/c/bmgs/tools.py 0000664 0000000 0000000 00000007514 13164413722 0023017 0 ustar 00root root 0000000 0000000 def factorial(x):
"""Return x!, where x is a non-negative integer"""
if x < 2: return 1
else: return x * factorial(x - 1)
def gcd(a, b):
"""Return greatest common divisor of a and b.
Uses the euclidian algorithm
"""
if b == 0: return a
else: return gcd(b, a % b)
class Rational:
"""Class used to represent rational numbers as fractions, such that
no precision is lost during calculation operations.
Example usage with Numeric:
import numpy as np
from tools import Rational as Q
n = np.zeros(4, 'O') array([0 , 0 , 0 , 0 ],'O')
n[2:4] = [Q(35,12), Q(36,12)] array([0 , 0 , 35./12 , 3 ],'O')
24 * n array([0 , 0 , 70 , 72 ],'O')
np.multiply(n, Q(3,9)) array([0 , 0 , 35./36 , 1 ],'O')
"""
def __init__(self, nom=0, denom=1):
## assert type(nom) == type(denom) == int
# ensure that sign is in the nominator
nom = cmp(denom, 0) * nom
denom = abs(denom)
# reduce fraction
q = gcd(nom, denom)
self.nom = nom / q
self.denom = denom / q
def __add__(self, x):
if type(x) == float:
return float(self) + x
elif type(x) == int:
x = Rational(x)
nom = self.nom * x.denom + x.nom * self.denom
denom = self.denom * x.denom
return Rational(nom, denom)
def __radd__(self, x):
return self.__add__(x)
def __mul__(self, x):
if type(x) == float:
return float(self) * x
elif type(x) == int:
x = Rational(x)
return Rational(self.nom * x.nom, self.denom * x.denom)
def __rmul__(self, x):
return self.__mul__(x)
def __neg__(self):
return Rational(-self.nom, self.denom)
def __pos__(self):
return self.copy()
def __sub__(self, x):
return self.__add__(-x)
def __rsub__(self, x):
return -self.__sub__(x)
def __div__(self, x):
if type(x) == float:
return float(self) / x
elif type(x) == int:
x = Rational(x)
return self.__mul__(Rational(x.denom, x.nom))
def __rdiv__(self, x):
if type(x) == float:
return x / float(self)
elif type(x) == int:
x = Rational(x)
return x.__mul__(Rational(self.denom, self.nom))
def __pow__(self, p):
if p == 0: return Rational(1)
if p >= 0 and type(p) == int:
return Rational(self.nom**p, self.denom**p)
else:
return float(self)**p
def __mod__(self, x):
if type(x) == float:
return float(self) % x
return Rational(self.nom % (x * self.denom), self.denom)
def __rmod__(self, x):
if type(x) == int:
x = Rational(x)
i = self.__int__()
return x.__mod__(i)
def __abs__(self):
return Rational(abs(self.nom), self.denom)
def __nonzero__(self):
return self.nom.__nonzero__()
def __cmp__(self, x):
return cmp(float(self), float(x))
def __str__(self):
out = str(self.nom)
if self.denom != 1:
out += './' + str(self.denom)
return out
def __int__(self):
assert self.denom == 1
return self.nom
def __float__(self):
return float(self.nom) / self.denom
def __repr__(self):
out = repr(self.nom)
if self.denom != 1:
out += './' + repr(self.denom)
return out
def __copy__(self):
return Rational(self.nom, self.denom)
def floor(self):
return int(float(self))
def sqrt(self):
return self**.5
def abs(self):
return Rational(abs(self.nom), self.denom)
def copy(self):
return Rational(self.nom, self.denom)
gpaw-1.3.0-82cebebc037510d876f90d9f8d533fd021f751f5/c/bmgs/translate.c 0000664 0000000 0000000 00000002650 13164413722 0023442 0 ustar 00root root 0000000 0000000 /* Copyright (C) 2003-2007 CAMP
* Please see the accompanying LICENSE file for further information. */
#include
#include "bmgs.h"
void bmgs_translate(double* a, const int sizea[3], const int size[3],
const int start1[3], const int start2[3])
{
const double* restrict s =
a + start1[2] + (start1[1] + start1[0] * sizea[1]) * sizea[2];
double* restrict d =
a + start2[2] + (start2[1] + start2[0] * sizea[1]) * sizea[2];
for (int i0 = 0; i0 < size[0]; i0++)
{
for (int i1 = 0; i1 < size[1]; i1++)
{
memcpy(d, s, size[2] * sizeof(double));
s += sizea[2];
d += sizea[2];
}
s += sizea[2] * (sizea[1] - size[1]);
d += sizea[2] * (sizea[1] - size[1]);
}
}
void bmgs_translatemz(double_complex* a, const int sizea[3], const int size[3],
const int start1[3], const int start2[3],
double_complex phase)
{
const double_complex* restrict s =
a + start1[2] + (start1[1] + start1[0] * sizea[1]) * sizea[2];
double_complex* restrict d =
a + start2[2] + (start2[1] + start2[0] * sizea[1]) * sizea[2];
for (int i0 = 0; i0 < size[0]; i0++)
{
for (int i1 = 0; i1 < size[1]; i1++)
{
for (int i2 = 0; i2 < size[2]; i2++)
d[i2] = phase * s[i2];
s += sizea[2];
d += sizea[2];
}
s += sizea[2] * (sizea[1] - size[1]);
d += sizea[2] * (sizea[1] - size[1]);
}
}
gpaw-1.3.0-82cebebc037510d876f90d9f8d533fd021f751f5/c/bmgs/wfd.c 0000664 0000000 0000000 00000005577 13164413722 0022240 0 ustar 00root root 0000000 0000000 /* This file (wfd.c) is a modified copy of fd.c
* with added support for nonlocal operator weights.
* The original copyright note of fd.c follows:
* Copyright (C) 2003-2007 CAMP
* Please see the accompanying LICENSE file for further information. */
#include "bmgs.h"
#include
#include "../extensions.h"
struct Z(wfds){
int thread_id;
int nthds;
int nweights;
const bmgsstencil* s;
const double** w;
const T* a;
T* b;
};
void *Z(bmgs_wfd_worker)(void *threadarg)
{
struct Z(wfds) *args = (struct Z(wfds) *) threadarg;
const T* a = args->a;
T* b = args->b;
const bmgsstencil* stencils = args->s;
const int n0 = stencils[0].n[0];
const int n1 = stencils[0].n[1];
const int n2 = stencils[0].n[2];
const int j1 = stencils[0].j[1];
const int j2 = stencils[0].j[2];
const double** weights = (const double**) GPAW_MALLOC(double*, args->nweights);
int chunksize = n0 / args->nthds + 1;
int nstart = args->thread_id * chunksize;
if (nstart >= n0)
return NULL;
int nend = nstart + chunksize;
if (nend > n0)
nend = n0;
for (int i0 = nstart; i0 < nend; i0++)
{
const T* aa = a + i0 * (j1 + n1 * (j2 + n2));
T* bb = b + i0 * n1 * n2;
for (int iw = 0; iw < args->nweights; iw++)
weights[iw] = args->w[iw] + i0 * n1 * n2;
for (int i1 = 0; i1 < n1; i1++)
{
for (int i2 = 0; i2 < n2; i2++)
{
T x = 0.0;
for (int iw = 0; iw < args->nweights; iw++)
{
const bmgsstencil* s = &(stencils[iw]);
T tmp = 0.0;
for (int c = 0; c < s->ncoefs; c++)
tmp += aa[s->offsets[c]] * s->coefs[c];
tmp *= weights[iw][0];
x += tmp;
weights[iw]++;
}
*bb++ = x;
aa++;
}
aa += j2;
}
}
free(weights);
return NULL;
}
void Z(bmgs_wfd)(int nweights, const bmgsstencil* stencils, const double** weights, const T* a, T* b)
{
a += (stencils[0].j[0] + stencils[0].j[1] + stencils[0].j[2]) / 2;
int nthds = 1;
#ifdef GPAW_OMP_MONLY
if (getenv("OMP_NUM_THREADS") != NULL)
nthds = atoi(getenv("OMP_NUM_THREADS"));
#endif
struct Z(wfds) *wargs = GPAW_MALLOC(struct Z(wfds), nthds);
pthread_t *thds = GPAW_MALLOC(pthread_t, nthds);
for(int i=0; i < nthds; i++)
{
(wargs+i)->thread_id = i;
(wargs+i)->nthds = nthds;
(wargs+i)->nweights = nweights;
(wargs+i)->s = stencils;
(wargs+i)->w = weights;
(wargs+i)->a = a;
(wargs+i)->b = b;
}
#ifdef GPAW_OMP_MONLY
for(int i=1; i < nthds; i++)
pthread_create(thds + i, NULL, Z(bmgs_wfd_worker), (void*) (wargs+i));
#endif
Z(bmgs_wfd_worker)(wargs);
#ifdef GPAW_OMP_MONLY
for(int i=1; i < nthds; i++)
pthread_join(*(thds+i), NULL);
#endif
free(wargs);
free(thds);
}
gpaw-1.3.0-82cebebc037510d876f90d9f8d533fd021f751f5/c/bmgs/wrelax.c 0000664 0000000 0000000 00000005535 13164413722 0022754 0 ustar 00root root 0000000 0000000 /* This file (wrelax.c) is a modified copy of relax.c
* with added support for nonlocal operator weights.
* The original copyright note of relax.c follows:
* Copyright (C) 2003-2007 CAMP
* Copyright (C) 2005 CSC - IT Center for Science Ltd.
* Please see the accompanying LICENSE file for further information. */
#include "bmgs.h"
void bmgs_wrelax(const int relax_method, const int nweights,
const bmgsstencil* stencils, const double** weights,
double* a, double* b,
const double* src, const double w)
{
const int n0 = stencils[0].n[0];
const int n1 = stencils[0].n[1];
const int n2 = stencils[0].n[2];
const int j0 = stencils[0].j[0];
const int j1 = stencils[0].j[1];
const int j2 = stencils[0].j[2];
a += (j0 + j1 + j2) / 2;
if (relax_method == 1)
{
/* Weighted Gauss-Seidel relaxation for the equation "operator" b = src
a contains the temporary array holding also the boundary values. */
for (int i0 = 0; i0 < n0; i0++)
{
for (int i1 = 0; i1 < n1; i1++)
{
for (int i2 = 0; i2 < n2; i2++)
{
double x = 0.0;
double coef = 0.0;
for (int iw = 0; iw < nweights; iw++)
{
double weight = weights[iw][0];
double tmp = 0.0;
const bmgsstencil* s = &(stencils[iw]);
for (int c = 1; c < s->ncoefs; c++)
tmp += a[s->offsets[c]] * s->coefs[c];
tmp *= weight;
x += tmp;
coef += weight * s->coefs[0];
weights[iw]++;
}
x = (*src - x) / coef;
*b++ = x;
*a++ = x;
src++;
}
a += j2;
}
a += j1;
}
}
else
{
/* Weighted Jacobi relaxation for the equation "operator" b = src
a contains the temporariry array holding also the boundary values. */
double temp;
for (int i0 = 0; i0 < n0; i0++)
{
for (int i1 = 0; i1 < n1; i1++)
{
for (int i2 = 0; i2 < n2; i2++)
{
double x = 0.0;
double coef = 0.0;
for (int iw = 0; iw < nweights; iw++)
{
double weight = weights[iw][0];
double tmp = 0.0;
const bmgsstencil* s = &(stencils[iw]);
for (int c = 1; c < s->ncoefs; c++)
tmp += a[s->offsets[c]] * s->coefs[c];
tmp *= weight;
x += tmp;
coef += weight * s->coefs[0];
weights[iw]++;
}
temp = (1.0 - w) * *b + w * (*src - x) / coef;
*b++ = temp;
a++;
src++;
}
a += j2;
}
a += j1;
}
}
}
gpaw-1.3.0-82cebebc037510d876f90d9f8d533fd021f751f5/c/bmgs/zero.c 0000664 0000000 0000000 00000000676 13164413722 0022432 0 ustar 00root root 0000000 0000000 /* Copyright (C) 2003-2007 CAMP
* Please see the accompanying LICENSE file for further information. */
#include
#include "bmgs.h"
void Z(bmgs_zero)(T* a, const int n[3], const int c[3],
const int s[3])
{
a += c[2] + (c[1] + c[0] * n[1]) * n[2];
for (int i0 = 0; i0 < s[0]; i0++)
{
for (int i1 = 0; i1 < s[1]; i1++)
{
memset(a, 0, s[2] * sizeof(T));
a += n[2];
}
a += n[2] * (n[1] - s[1]);
}
}
gpaw-1.3.0-82cebebc037510d876f90d9f8d533fd021f751f5/c/cerf.c 0000664 0000000 0000000 00000013201 13164413722 0021426 0 ustar 00root root 0000000 0000000 #include
#include "extensions.h"
#include
#include
#define eps 1.e-15
double_complex itpp_erf(double_complex z);
PyObject* cerf(PyObject *self, PyObject *args)
{
double complex z, res;
if (!PyArg_ParseTuple(args, "D", &z))
return NULL;
res = itpp_erf(z);
return Py_BuildValue("D", &res);
}
/* taken from
http://prdownloads.sourceforge.net/itpp/itpp-3.10.7.tar.bz2
and transformed to C */
/*!
* \file
* \brief Implementation of scalar functions
* \author Tony Ottosson, Pal Frenger and Adam Piatyszek
*
* $Date: 2006-08-19 10:53:33 +0200 (sob, 19 sie 2006) $
* $Revision: 643 $
*
* -------------------------------------------------------------------------
*
* IT++ - C++ library of mathematical, signal processing, speech processing,
* and communications classes and functions
*
* Copyright (C) 1995-2006 (see AUTHORS file for a list of contributors)
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
* -------------------------------------------------------------------------
*/
double_complex itpp_cerf_series(double_complex z);
double_complex itpp_cerfc_continued_fraction(double_complex z);
double_complex itpp_cerf_continued_fraction(double_complex z);
double_complex itpp_cerf_rybicki(double_complex z);
double cabs(double_complex z) {
return sqrt(creal(z) * creal(z) + cimag(z) * cimag(z));
}
/*
* This function calculates a well known error function erf(z) for
* complex z. Three methods are implemented. Which one is used
* depends on z.
*/
double_complex itpp_erf(double_complex z)
{
// Use the method appropriate to size of z -
// there probably ought to be an extra option for NaN z, or infinite z
if (cabs(z) < 2.0)
return itpp_cerf_series(z);
else {
if (fabs(creal(z)) < 0.5)
// XXX neither rybicki nor continued_fraction seem to work here
// return itpp_cerf_rybicki(z);
//return itpp_cerf_continued_fraction(z);
return itpp_cerf_series(z);
else
return itpp_cerf_continued_fraction(z);
}
}
/*
* Abramawitz and Stegun: Eq. (7.1.5) gives a series for erf(z) good
* for all z, but converges faster for smallish abs(z), say abs(z) < 2.
*/
double_complex itpp_cerf_series(double_complex z)
{
double_complex sum, term, z2, oldsum;
double error;
sum = 0.0;
term = z;
z2 = z * z;
oldsum = 1.e32;
for (int n = 0; 1; n++) {
sum += term / (2. * n + 1);
term *= -z2 / (1. * n + 1);
error = cabs(sum / oldsum - 1.);
if (error < eps) {
return sum * (2.0 / sqrt(M_PI)); }
oldsum = sum;
}
}
/*
* Abramowitz and Stegun: Eq. (7.1.14) gives this continued fraction
* for erfc(z)
*
* erfc(z) = sqrt(pi).exp(-z^2). 1 1/2 1 3/2 2 5/2
* --- --- --- --- --- --- ...
* z + z + z + z + z + z +
*
* This is evaluated using Lentz's method, as described in the
* narative of Numerical Recipes in C.
*
* The continued fraction is true providing real(z) > 0. In practice
* we like real(z) to be significantly greater than 0, say greater
* than 0.5.
*/
double_complex itpp_cerfc_continued_fraction(double_complex z)
{
// first calculate z+ 1/2 1
// --- --- ...
// z + z +
double_complex f, C, D, delta;
double a;
// printf("itpp_cerfc_continued_fraction\n");
f = z;
C = f;
D = 0.0;
a = 0.0;
do {
a += 0.5;
D = z + a * D;
C = z + a / C;
if ((creal(D) == 0.0) && (cimag(D) == 0.0))
D = DBL_MIN;
D = 1.0 / D;
delta = C * D;
f = f * delta;
} while (cabs(1.0 - delta) > eps);
// Do the first term of the continued fraction
f = 1.0 / f;
// and do the final scaling
f = f * exp(-z * z) / sqrt(M_PI);
return f;
}
double_complex itpp_cerf_continued_fraction(double_complex z)
{
if (creal(z) > 0)
return 1.0 - itpp_cerfc_continued_fraction(z);
else
return -1.0 + itpp_cerfc_continued_fraction(-z);
}
/*
* Numerical Recipes quotes a formula due to Rybicki for evaluating
* Dawson's Integral:
*
* exp(-x^2) integral exp(t^2).dt = 1/sqrt(pi) lim sum exp(-(z-n.h)^2) / n
* 0 to x h->0 n odd
*
* This can be adapted to erf(z).
*/
double_complex itpp_cerf_rybicki(double_complex z)
{
double h = 0.2; // numerical experiment suggests this is small enough
printf("itpp_cerf_rybicki");
// choose an even n0, and then shift z->z-n0.h and n->n-h.
// n0 is chosen so that real((z-n0.h)^2) is as small as possible.
int n0 = 2 * ((int)(cimag(z) / (2 * h) + 0.5));
double_complex z0 = I * n0 * h;
double_complex zp = z - z0;
double_complex sum = 0.0;
// limits of sum chosen so that the end sums of the sum are
// fairly small. In this case exp(-(35.h)^2)=5e-22
for (int np = -35; np <= 35; np += 2) {
double_complex t = creal(zp) + I * (cimag(zp) - np * h);
double_complex b = (exp(t * t) / ((double)(np + n0)));
sum += b;
}
sum *= 2.0 * exp(-z * z) / M_PI;
sum = - cimag(sum) + creal(sum) * I;
return sum;
}
gpaw-1.3.0-82cebebc037510d876f90d9f8d533fd021f751f5/c/extensions.h 0000664 0000000 0000000 00000002674 13164413722 0022727 0 ustar 00root root 0000000 0000000 /* Copyright (C) 2003-2007 CAMP
* Copyright (C) 2007-2008 CAMd
* Copyright (C) 2005 CSC - IT Center for Science Ltd.
* Please see the accompanying LICENSE file for further information. */
#ifndef H_EXTENSIONS
#define H_EXTENSIONS
#include
#define PY_ARRAY_UNIQUE_SYMBOL GPAW_ARRAY_API
#define NO_IMPORT_ARRAY
#include
#include
/* If strict ANSI, then some useful macros are not defined */
#if defined(__STRICT_ANSI__) && !defined(__DARWIN_UNIX03)
# define M_PI 3.14159265358979323846 /* pi */
#endif
#ifndef DOUBLECOMPLEXDEFINED
# define DOUBLECOMPLEXDEFINED 1
# include
typedef double complex double_complex;
#endif
#if PY_MAJOR_VERSION == 2 && PY_MINOR_VERSION < 4
# define Py_RETURN_NONE return Py_INCREF(Py_None), Py_None
#endif
#define INLINE inline
static INLINE void* gpaw_malloc(size_t n)
{
void* p = malloc(n);
assert(p != NULL);
return p;
}
#ifdef GPAW_BGP
#define GPAW_MALLOC(T, n) (gpaw_malloc((n) * sizeof(T)))
#else
#ifdef GPAW_AIX
#define GPAW_MALLOC(T, n) (malloc((n) * sizeof(T)))
#else
#define GPAW_MALLOC(T, n) (gpaw_malloc((n) * sizeof(T)))
#endif
#endif
#define MIN(x, y) ((x) < (y) ? (x) : (y))
#define MAX(x, y) ((x) > (y) ? (x) : (y))
#define INTP(a) ((int*)PyArray_DATA(a))
#define LONGP(a) ((long*)PyArray_DATA(a))
#define DOUBLEP(a) ((double*)PyArray_DATA(a))
#define COMPLEXP(a) ((double_complex*)PyArray_DATA(a))
#endif //H_EXTENSIONS
gpaw-1.3.0-82cebebc037510d876f90d9f8d533fd021f751f5/c/f2c.h 0000664 0000000 0000000 00000000442 13164413722 0021171 0 ustar 00root root 0000000 0000000 /* Definitions needed by code transferred with f2c */
#include
#include
typedef int integer;
typedef double doublereal;
typedef struct { doublereal r, i; } doublecomplex;
#ifndef STATIC_NUMERIC
inline double pow_dd(double *x, double *y) {
return pow(*x,*y);
}
#endif
gpaw-1.3.0-82cebebc037510d876f90d9f8d533fd021f751f5/c/fftw.c 0000664 0000000 0000000 00000003025 13164413722 0021460 0 ustar 00root root 0000000 0000000 #ifdef GPAW_WITH_FFTW
#include
#define PY_ARRAY_UNIQUE_SYMBOL GPAW_ARRAY_API
#define NO_IMPORT_ARRAY
#include
#include
/* Create plan and return pointer to plan as a string */
PyObject * FFTWPlan(PyObject *self, PyObject *args)
{
PyArrayObject* in;
PyArrayObject* out;
int sign;
unsigned int flags;
if (!PyArg_ParseTuple(args, "OOiI",
&in, &out, &sign, &flags))
return NULL;
fftw_plan* plan = (fftw_plan*)malloc(sizeof(fftw_plan));
if (in->descr->type_num == PyArray_DOUBLE)
*plan = fftw_plan_dft_r2c(in->nd, in->dimensions,
(double*)in->data,
(double (*)[2])out->data,
flags);
else if (out->descr->type_num == PyArray_DOUBLE)
*plan = fftw_plan_dft_c2r(in->nd, out->dimensions,
(double (*)[2])in->data,
(double*)out->data,
flags);
else
*plan = fftw_plan_dft(in->nd, out->dimensions,
(double (*)[2])in->data,
(double (*)[2])out->data,
sign, flags);
return Py_BuildValue("s#", plan, sizeof(fftw_plan*));
}
PyObject * FFTWExecute(PyObject *self, PyObject *args)
{
fftw_plan* plan;
int n;
if (!PyArg_ParseTuple(args, "s#", &plan, &n))
return NULL;
fftw_execute(*plan);
Py_RETURN_NONE;
}
PyObject * FFTWDestroy(PyObject *self, PyObject *args)
{
fftw_plan* plan;
int n;
if (!PyArg_ParseTuple(args, "s#", &plan, &n))
return NULL;
fftw_destroy_plan(*plan);
free(plan);
Py_RETURN_NONE;
}
#endif // GPAW_WITH_FFTW
gpaw-1.3.0-82cebebc037510d876f90d9f8d533fd021f751f5/c/lapack.c 0000664 0000000 0000000 00000041164 13164413722 0021753 0 ustar 00root root 0000000 0000000 /* Copyright (C) 2003-2007 CAMP
* Copyright (C) 2007-2009 CAMd
* Copyright (C) 2005-2007 CSC - IT Center for Science Ltd.
* Please see the accompanying LICENSE file for further information. */
#include
#define PY_ARRAY_UNIQUE_SYMBOL GPAW_ARRAY_API
#define NO_IMPORT_ARRAY
#include
#include "extensions.h"
#ifdef GPAW_NO_UNDERSCORE_LAPACK
# define dlamch_ dlamch
# define dsyev_ dsyev
# define zheev_ zheev
# define dsyevr_ dsyevr
# define zheevr_ zheevr
# define dsygv_ dsygv
# define dsygvx_ dsygvx
# define dhegv_ dhegv
# define zhegv_ zhegv
# define zhegvx_ zhegvx
# define dgeev_ dgeev
# define dpotrf_ dpotrf
# define dpotri_ dpotri
# define zpotrf_ zpotrf
# define zpotri_ zpotri
# define dtrtri_ dtrtri
# define ztrtri_ ztrtri
# define dsytrf_ dsytrf
# define zsytrf_ zsytrf
# define dgetrf_ dgetrf
# define zgetrf_ zgetrf
# define dsytri_ dsytri
# define zsytri_ zsytri
# define dgetri_ dgetri
# define zgetri_ zgetri
# define zgbsv_ zgbsv
# define zgttrf_ zgttrf
# define zgttrs_ zgttrs
# define ilaenv_ ilaenv
#endif
double dlamch_(char* cmach);
void dsyev_(char *jobz, char *uplo, int *n,
double *a, int *lda, double *w, double *work, int *lwork,
int *info);
void zheev_(char *jobz, char *uplo, int *n,
void *a, int *lda, double *w, void *work,
int *lwork, double *rwork, int *lrwork, int *info);
void dsyevr_(char *jobz, char *range, char *uplo, int *n,
double *a, int *lda,
double *vl, double *vu, int *il, int*iu, double *abstol,
int *m, double *w, double *z, int *ldz, int *isuppz,
double *work, int *lwork, int *iwork, int *liwork,
int *info);
void zheevr_(char *jobz, char *range, char *uplo, int *n,
void *a, int *lda,
double *vl, double *vu, int *il, int *iu, double *abstol,
int *m, double *w, void *z, int *ldz, int *isuppz,
void *work, int *lwork, double *rwork, int *lrwork,
int *iwork, int *liwork,
int *info);
void dsygv_(int *itype, char *jobz, char *uplo, int *
n, double *a, int *lda, double *b, int *ldb,
double *w, double *work, int *lwork, int *info);
void dsygvx_(int *itype, char *jobz, char *range, char *uplo,
int *n, void *a, int *lda, void *b, int *ldb,
double *vl, double *vu, int *il, int *iu, double *abstol,
int *m, double *w, void *z, int *ldz, void *work,
int *lwork, int *iwork, int *ifail,
int *info);
void zhegv_(int *itype, char *jobz, char *uplo, int *
n, void *a, int *lda, void *b, int *ldb,
double *w, void *work, int *lwork,
double *rwork,
int *lrwork, int *info);
void zhegvx_(int *itype, char *jobz, char *range, char *uplo,
int *n, void *a, int *lda, void *b, int *ldb,
double *vl, double *vu, int *il, int *iu, double *abstol,
int *m, double *w, void *z, int *ldz, void *work,
int *lwork, double *rwork, int *iwork, int *ifail,
int *info);
void dpotrf_(char *uplo, int *n, double *a, int *
lda, int *info);
void dpotri_(char *uplo, int *n, double *a, int *
lda, int *info);
void zpotrf_(char *uplo, int *n, void *a,
int *lda, int *info);
void zpotri_(char *uplo, int *n, void *a,
int *lda, int *info);
void dgeev_(char *jovl, char *jobvr, int *n, double *a, int *lda,
double *wr, double *wl,
double *vl, int *ldvl, double *vr, int *ldvr,
double *work, int *lwork, int *info);
void dtrtri_(char *uplo,char *diag, int *n, void *a,
int *lda, int *info );
void ztrtri_(char *uplo,char *diag, int *n, void *a,
int *lda, int *info );
void dsytrf_(char *uplo, int *n, double *a, int *lda, int *ipiv,
double *work, int *lwork, int *info);
void zsytrf_(char *uplo, int *n, void *a, int *lda, int *ipiv,
void *work, int *lwork, int *info);
void dgetrf_(int *n, int *m, double *a, int *lda, int *ipiv, int *info);
void zgetrf_(int *n, int *m, void *a, int *lda, int *ipiv, int *info);
void dsytri_(char *uplo, int *n, double *a, int *lda, int *ipiv,
double *work, int *info);
void zsytri_(char *uplo, int *n, void *a, int *lda, int *ipiv,
void *work, int *info);
void dgetri_(int *n, double *a, int *lda, int *ipiv,
double *work, int *lwork, int *info);
void zgetri_(int *n, void *a, int *lda, int *ipiv,
void *work, int *lwork, int *info);
void zgbsv_(int*n, int* kl, int* ku, int* nrhs, void* ab, int*ldab,
int*ipiv, void* b, int*ldb, int*info);
void zgttrf_(int* n, void* dl, void* d, void* du,
void* du2, int* ipiv, int* info);
void zgttrs_(char* tran, int* n, int* nrhs, void* dl,
void* d, void* du, void* du2,
int* ipiv, void* b, int* ldb, int* info);
int ilaenv_(int* ispec, char* name, char* opts, int* n1,
int* n2, int* n3, int* n4, short name_len, short opts_len);
PyObject* diagonalize(PyObject *self, PyObject *args)
{
PyArrayObject* a;
PyArrayObject* w;
if (!PyArg_ParseTuple(args, "OO", &a, &w))
return NULL;
int n = PyArray_DIMS(a)[0];
int lda = n;
int info = 0;
if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
{
int lwork = 3 * n + 1;
double* work = GPAW_MALLOC(double, lwork);
dsyev_("V", "U", &n, DOUBLEP(a), &lda,
DOUBLEP(w), work, &lwork, &info);
free(work);
}
else
{
int lwork = 2 * n + 1;
int lrwork = 3 * n + 1;
void* work = GPAW_MALLOC(double_complex, lwork);
double* rwork = GPAW_MALLOC(double, lrwork);
zheev_("V", "U", &n, (void*)COMPLEXP(a), &lda,
DOUBLEP(w),
work, &lwork, rwork, &lrwork, &info);
free(work);
free(rwork);
}
return Py_BuildValue("i", info);
}
PyObject* diagonalize_mr3(PyObject *self, PyObject *args)
{
PyArrayObject* a;
PyArrayObject* w;
PyArrayObject* z;
if (!PyArg_ParseTuple(args, "OOO", &a, &w, &z))
return NULL;
char jobz = 'V';
char range = 'A';
char uplo = 'U';
int n = PyArray_DIMS(a)[0];
int lda = MAX(1, n);
double vl, vu;
int il, iu;
double abstol = dlamch_("Safe minimum");
int m = n; /* assume we find all eigenvalues */
int ldz = lda;
int info = 0;
int* isuppz = GPAW_MALLOC(int, 2*m);
if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
{
/* Minimum workspace plus a little extra */
int lwork = 26 * n + 1;
int liwork = 10 * n + 1;
double* work = GPAW_MALLOC(double, lwork);
int* iwork = GPAW_MALLOC(int, liwork);
dsyevr_(&jobz, &range, &uplo, &n,
DOUBLEP(a), &lda,
&vl, &vu, &il, &iu, &abstol,
&m, DOUBLEP(w), DOUBLEP(z), &ldz, isuppz,
work, &lwork, iwork, &liwork,
&info);
free(work);
free(iwork);
}
else
{
/* Minimum workspace plus a little extra */
int lwork = 2 * n + 1;
int lrwork = 24 * n + 1;
int liwork = 10 * n + 1;
void* work = GPAW_MALLOC(double_complex, lwork);
double* rwork = GPAW_MALLOC(double, lrwork);
int* iwork = GPAW_MALLOC(int, liwork);
zheevr_(&jobz, &range, &uplo, &n,
(void*)COMPLEXP(a), &lda,
&vl, &vu, &il, &iu, &abstol,
&m, DOUBLEP(w), (void*)COMPLEXP(z), &ldz, isuppz,
work, &lwork, rwork, &lrwork,
iwork, &liwork,
&info);
free(work);
free(rwork);
free(iwork);
}
free(isuppz);
// If this fails, fewer eigenvalues than request were computed
assert (m == n);
return Py_BuildValue("i", info);
}
PyObject* general_diagonalize(PyObject *self, PyObject *args)
{
PyArrayObject* a;
PyArrayObject* w;
PyArrayObject* b;
PyArrayObject* z;
int iu = -1;
if (!PyArg_ParseTuple(args, "OOO|Oi", &a, &w, &b, &z, &iu))
return NULL;
int itype = 1;
char jobz = 'V';
char range = 'I';
char uplo = 'U';
int n = PyArray_DIMS(a)[0];
int lda = MAX(1, n);
int ldb = lda;
double vl, vu;
int il = 1;
double abstol = dlamch_("Safe minimum");
int m;
int ldz = lda;
int info = 0;
int ispec = 1;
int dummy = -1;
if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
{
// Optimal number of blocks for dsygv(x)_
int NB = ilaenv_(&ispec, "dsytrd", &uplo, &n, &dummy, &dummy, &dummy,
6, 1);
if (iu == -1)
{
int lwork = MAX((NB + 2) * n, 3 * n + 1);
double* work = GPAW_MALLOC(double, lwork);
dsygv_(&itype, &jobz, &uplo, &n, DOUBLEP(a), &lda,
DOUBLEP(b), &ldb, DOUBLEP(w),
work, &lwork, &info);
free(work);
}
else
{
int lwork = MAX((NB + 3) * n, 8 * n);
int liwork = 5 * n;
double* work = GPAW_MALLOC(double, lwork);
int* iwork = GPAW_MALLOC(int, liwork);
int* ifail = GPAW_MALLOC(int, n);
dsygvx_(&itype, &jobz, &range, &uplo, &n,
DOUBLEP(a), &lda, DOUBLEP(b), &ldb,
&vl, &vu, &il, &iu, &abstol,
&m, DOUBLEP(w), DOUBLEP(z), &ldz,
work, &lwork, iwork, ifail,
&info);
free(iwork);
free(work);
free(ifail);
assert (m == iu);
}
}
else
{
// Optimal number of blocks for zhegv(x)_
int NB = ilaenv_(&ispec, "zhetrd", &uplo, &n, &dummy, &dummy, &dummy,
6, 1);
if (iu == -1)
{
int lwork = MAX((NB + 1) * n, 2 * n + 1);
int lrwork = MAX(1, 3 * n + 1);
void* work = GPAW_MALLOC(double_complex, lwork);
double* rwork = GPAW_MALLOC(double, lrwork);
zhegv_(&itype, &jobz, &uplo, &n, (void*)COMPLEXP(a), &lda,
(void*)COMPLEXP(b), &lda,
DOUBLEP(w),
work, &lwork, rwork, &lrwork, &info);
free(work);
free(rwork);
}
else
{
int lwork = MAX((NB + 1) * n, 2 * n);
int lrwork = 7 * n;
int liwork = 5 * n;
void* work = GPAW_MALLOC(double_complex, lwork);
double* rwork = GPAW_MALLOC(double, lrwork);
int* iwork = GPAW_MALLOC(int, liwork);
int* ifail = GPAW_MALLOC(int, n);
zhegvx_(&itype, &jobz, &range, &uplo, &n,
(void*)COMPLEXP(a), &lda, (void*)COMPLEXP(b), &ldb,
&vl, &vu, &il, &iu, &abstol,
&m, DOUBLEP(w), (void*)COMPLEXP(z), &ldz,
work, &lwork, rwork, iwork, ifail, &info);
free(work);
free(rwork);
free(iwork);
free(ifail);
assert (m == iu);
}
}
return Py_BuildValue("i", info);
}
PyObject* inverse_cholesky(PyObject *self, PyObject *args)
{
PyArrayObject* a;
if (!PyArg_ParseTuple(args, "O", &a))
return NULL;
int n = PyArray_DIMS(a)[0];
int lda = MAX(1, n);
int info = 0;
if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
{
dpotrf_("U", &n, (void*)DOUBLEP(a), &lda, &info);
if (info == 0)
{
dtrtri_("U", "N", &n, (void*)DOUBLEP(a), &lda, &info);
if (info == 0)
{
/* Make sure that the other diagonal is zero */
double* ap = DOUBLEP(a);
ap++;
for (int i = 0; i < n - 1; i++)
{
memset(ap, 0, (n-1-i) * sizeof(double));
ap += n + 1;
}
}
}
}
else
{
zpotrf_("U", &n, (void*)COMPLEXP(a), &lda, &info);
if (info == 0)
{
ztrtri_("U", "N", &n, (void*)DOUBLEP(a), &lda, &info);
if (info == 0)
{
/* Make sure that lower diagonal is zero */
double_complex* ap = COMPLEXP(a);
ap++;
for (int i = 0; i < n - 1; i++)
{
memset(ap, 0, (n-1-i) * sizeof(double_complex));
ap += n + 1;
}
}
}
}
return Py_BuildValue("i", info);
}
void swap(double *a, double *b) {
double tmp=*b;
*b = *a;
*a = tmp;
}
void transpose(double *A, int n) {
int i, j;
int in=0;
for(i=0;itype_num == NPY_DOUBLE)
{
int lwork = -1;
double* work = GPAW_MALLOC(double, 1);
double* wr = GPAW_MALLOC(double, n);
double* wi = GPAW_MALLOC(double, n);
int ldvl = 1;
int ldvr = n;
double* vl = 0;
int i;
/* get size of work needed */
dgeev_("No eigenvectors left", "Vectors right",
&n, DOUBLEP(A), &lda, wr, wi,
vl, &ldvl, DOUBLEP(v), &ldvr, work, &lwork, &info);
lwork = (int) work[0];
free(work);
work = GPAW_MALLOC(double, lwork);
transpose(DOUBLEP(A),n); /* transform to Fortran form */
dgeev_("No eigenvectors left", "Vectors right",
&n, DOUBLEP(A), &lda, wr, wi,
vl, &ldvl, DOUBLEP(v), &ldvr, work, &lwork, &info);
for(i=0;i dgeev i=%d,wi[i]=%g\n",
i,wi[i]);
DOUBLEP(w)[i]=wr[i];
}
free(wr);
free(wi);
free(work);
}
return Py_BuildValue("i", info);
}
PyObject* inverse_general(PyObject *self, PyObject *args)
{
PyArrayObject* a;
if (!PyArg_ParseTuple(args, "O", &a))
return NULL;
int n = PyArray_DIMS(a)[0];
int m = n;
int lda = n;
int lwork = n;
int* ipiv = GPAW_MALLOC(int, n);
int info = 0;
if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
{
double* work = GPAW_MALLOC(double, lwork);
dgetrf_(&n, &m, DOUBLEP(a), &lda, ipiv, &info);
dgetri_(&n, DOUBLEP(a), &lda, ipiv, work, &lwork, &info);
free(work);
}
else
{
void *work = GPAW_MALLOC(double_complex, lwork);
zgetrf_(&n, &m, (void*)COMPLEXP(a), &lda, ipiv, &info);
zgetri_(&n, (void*)COMPLEXP(a), &lda, ipiv, work, &lwork, &info);
free(work);
}
free(ipiv);
return Py_BuildValue("i", info);
}
PyObject* inverse_symmetric(PyObject *self, PyObject *args)
{
PyArrayObject* a;
if (!PyArg_ParseTuple(args, "O", &a))
return NULL;
int n = PyArray_DIMS(a)[0];
int lda = n;
int lwork =n;
int* ipiv = GPAW_MALLOC(int, n);
int info = 0;
if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
{
double* work = GPAW_MALLOC(double, lwork);
dsytrf_("U", &n, DOUBLEP(a), &lda, ipiv, work, &lwork, &info);
dsytri_("U", &n, DOUBLEP(a), &lda, ipiv, work, &info);
free(work);
}
else
{
void *work = GPAW_MALLOC(double_complex, lwork);
zsytrf_("U", &n, (void*)COMPLEXP(a), &lda, ipiv, work, &lwork, &info);
zsytri_("U", &n, (void*)COMPLEXP(a), &lda, ipiv, work, &info);
free(work);
}
free(ipiv);
return Py_BuildValue("i", info);
}
PyObject* linear_solve_band(PyObject *self, PyObject *args)
{
PyArrayObject* a;
PyArrayObject* b;
int kl, ku, info=0, *ipiv;
if(!PyArg_ParseTuple(args,"OOii",&a, &b,&kl,&ku))
return NULL;
int n=PyArray_DIMS(a)[0];
int ldab=PyArray_DIMS(a)[1];
int ldb=PyArray_DIMS(b)[0];
int nrhs=PyArray_DIMS(b)[1];
ipiv = GPAW_MALLOC(int, n);
zgbsv_(&n, &kl,&ku, &nrhs, (void*)COMPLEXP(a), &ldab, ipiv, (void*)COMPLEXP(b), &ldb, &info);
free(ipiv);
return Py_BuildValue("i",info);
}
PyObject* linear_solve_tridiag(PyObject *self, PyObject *args)
{
PyArrayObject* A;
PyArrayObject* du;
PyArrayObject* du2;
PyArrayObject* dl;
PyArrayObject* phi;
int dim=0, one=1, info=0;
if(!PyArg_ParseTuple(args,"iOOOOO", &dim, &A, &du, &dl, &du2, &phi))
return NULL;
int ldb = dim;
int *ipiv = GPAW_MALLOC(int, dim);
zgttrf_(&dim, (void*)COMPLEXP(dl), (void*)COMPLEXP(A), (void*)COMPLEXP(du), (void*)COMPLEXP(du2), ipiv, &info);
zgttrs_("N", &dim, &one, (void*)COMPLEXP(dl), (void*)COMPLEXP(A), (void*)COMPLEXP(du),
(void*)COMPLEXP(du2), ipiv, (void*)COMPLEXP(phi), &ldb, &info);
free(ipiv);
return Py_BuildValue("i",info);
}
gpaw-1.3.0-82cebebc037510d876f90d9f8d533fd021f751f5/c/lcao.c 0000664 0000000 0000000 00000013236 13164413722 0021435 0 ustar 00root root 0000000 0000000 /* Copyright (C) 2003-2007 CAMP
* Copyright (C) 2007-2009 CAMd
* Please see the accompanying LICENSE file for further information. */
#include "extensions.h"
#include "localized_functions.h"
#include "bmgs/bmgs.h"
#include
#ifdef GPAW_NO_UNDERSCORE_BLAS
# define dgemv_ dgemv
# define dgemm_ dgemm
#endif
int dgemv_(char *trans, int *m, int * n,
double *alpha, double *a, int *lda,
double *x, int *incx, double *beta,
double *y, int *incy);
int dgemm_(char *transa, char *transb, int *m, int * n,
int *k, const double *alpha, double *a, int *lda,
double *b, int *ldb, double *beta,
double *c, int *ldc);
// +-----------n
// +----m +----m | +----c+m |
// | | | | | | | |
// | b | = | v | * | | a | |
// | | | | | | | |
// 0----+ 0----+ | c----+ |
// | |
// 0-----------+
void cut(const double* a, const int n[3], const int c[3],
const double* v,
double* b, const int m[3])
{
a += c[2] + (c[1] + c[0] * n[1]) * n[2];
for (int i0 = 0; i0 < m[0]; i0++)
{
for (int i1 = 0; i1 < m[1]; i1++)
{
for (int i2 = 0; i2 < m[2]; i2++)
b[i2] = v[i2] * a[i2];
a += n[2];
b += m[2];
v += m[2];
}
a += n[2] * (n[1] - m[1]);
}
}
PyObject * overlap(PyObject* self, PyObject *args)
{
PyObject* lfs_b_obj;
PyArrayObject* m_b_obj;
PyArrayObject* phase_bk_obj;
PyArrayObject* vt_sG_obj;
PyArrayObject* Vt_skmm_obj;
if (!PyArg_ParseTuple(args, "OOOOO", &lfs_b_obj, &m_b_obj, &phase_bk_obj,
&vt_sG_obj, &Vt_skmm_obj))
return NULL;
int nk = PyArray_DIMS(phase_bk_obj)[1];
int nm = PyArray_DIMS(Vt_skmm_obj)[2];
int nspins = PyArray_DIMS(vt_sG_obj)[0];
const long *m_b = LONGP(m_b_obj);
const double complex *phase_bk = COMPLEXP(phase_bk_obj);
const double *vt_sG = DOUBLEP(vt_sG_obj);
double *Vt_smm = 0;
double complex *Vt_skmm = 0;
if (nk == 0)
Vt_smm = DOUBLEP(Vt_skmm_obj);
else
Vt_skmm = COMPLEXP(Vt_skmm_obj);
int nb = PyList_Size(lfs_b_obj);
int nmem = 0;
double* a1 = 0;
for (int b1 = 0; b1 < nb; b1++)
{
const LocalizedFunctionsObject* lf1 =
(const LocalizedFunctionsObject*)PyList_GetItem(lfs_b_obj, b1);
int m1 = m_b[b1];
int nao1 = lf1->nf;
double* f1 = lf1->f;
double* vt1 = GPAW_MALLOC(double, lf1->ng0 * nspins);
for (int s = 0; s < nspins; s++)
bmgs_cut(vt_sG + s * lf1->ng, lf1->size, lf1->start,
vt1 + s * lf1->ng0, lf1->size0);
for (int b2 = b1; b2 < nb; b2++)
{
const LocalizedFunctionsObject* lf2 =
(const LocalizedFunctionsObject*)PyList_GetItem(lfs_b_obj, b2);
int beg[3];
int end[3];
int size[3];
int beg1[3];
int beg2[3];
bool overlap = true;
for (int c = 0; c < 3; c++)
{
beg[c] = MAX(lf1->start[c], lf2->start[c]);
end[c] = MIN(lf1->start[c] + lf1->size0[c],
lf2->start[c] + lf2->size0[c]);
size[c] = end[c] - beg[c];
if (size[c] <= 0)
{
overlap = false;
continue;
}
beg1[c] = beg[c] - lf1->start[c];
beg2[c] = beg[c] - lf2->start[c];
}
int nao2 = lf2->nf;
if (overlap)
{
int ng = size[0] * size[1] * size[2];
int n = ng * (nao1 + nao2) + nao1 * nao2;
if (n > nmem)
{
if (nmem != 0)
free(a1);
nmem = n;
a1 = GPAW_MALLOC(double, nmem);
}
double* a2 = a1 + ng * nao1;
double* H = a2 + ng * nao2;
double* f2 = lf2->f;
double* vt2 = lf2->w;
double dv = lf1->dv;
int m2 = m_b[b2];
if (b2 > b1)
for (int i = 0; i < nao2; i++)
bmgs_cut(f2 + i * lf2->ng0, lf2->size0, beg2,
a2 + i * ng, size);
else
a2 = f2;
for (int s = 0; s < nspins; s++)
{
if (b2 > b1)
{
bmgs_cut(vt1 + s * lf1->ng0, lf1->size0, beg1, vt2, size);
for (int i = 0; i < nao1; i++)
cut(f1 + i * lf1->ng0, lf1->size0, beg1, vt2,
a1 + i * ng, size);
}
else
{
for (int i1 = 0; i1 < nao1; i1++)
for (int g = 0; g < ng; g++)
a1[i1 * ng + g] = (vt1[g + s * lf1->ng0] *
f1[i1 * ng + g]);
}
double zero = 0.0;
dgemm_("t", "n", &nao2, &nao1, &ng, &dv,
a2, &ng, a1, &ng, &zero, H, &nao2);
if (nk == 0)
{
double* Vt_mm = (Vt_smm + s * nm * nm + m1 + m2 * nm);
if (b2 == b1)
for (int i1 = 0; i1 < nao1; i1++)
for (int i2 = i1; i2 < nao2; i2++)
Vt_mm[i1 + i2 * nm] += H[i2 + i1 * nao2];
else if (m1 == m2)
for (int i1 = 0; i1 < nao1; i1++)
for (int i2 = i1; i2 < nao2; i2++)
Vt_mm[i1 + i2 * nm] += (H[i2 + i1 * nao2] +
H[i1 + i2 * nao2]);
else
for (int ii = 0, i1 = 0; i1 < nao1; i1++)
for (int i2 = 0; i2 < nao2; i2++, ii++)
Vt_mm[i1 + i2 * nm] += H[ii];
}
else
for (int k = 0; k < nk; k++)
{
double complex* Vt_mm = (Vt_skmm +
(s * nk + k) * nm * nm +
m1 + m2 * nm);
if (b2 == b1)
for (int i1 = 0; i1 < nao1; i1++)
for (int i2 = i1; i2 < nao2; i2++)
Vt_mm[i1 + i2 * nm] += H[i2 + i1 * nao2];
else
{
double complex phase = \
(phase_bk[b1 * nk + k] *
conj(phase_bk[b2 * nk + k]));
if (m1 == m2)
for (int i1 = 0; i1 < nao1; i1++)
for (int i2 = i1; i2 < nao2; i2++)
Vt_mm[i1 + i2 * nm] += \
(phase * H[i2 + i1 * nao2] +
conj(phase) * H[i1 + i2 * nao2]);
else
for (int ii = 0, i1 = 0; i1 < nao1; i1++)
for (int i2 = 0; i2 < nao2; i2++, ii++)
Vt_mm[i1 + i2 * nm] += phase * H[ii];
}
}
}
}
}
free(vt1);
}
if (nmem != 0)
free(a1);
Py_RETURN_NONE;
}
gpaw-1.3.0-82cebebc037510d876f90d9f8d533fd021f751f5/c/lfc.c 0000664 0000000 0000000 00000156502 13164413722 0021267 0 ustar 00root root 0000000 0000000 /* Copyright (C) 2003-2007 CAMP
* Copyright (C) 2007-2009 CAMd
* Please see the accompanying LICENSE file for further information. */
#include
#define PY_ARRAY_UNIQUE_SYMBOL GPAW_ARRAY_API
#define NO_IMPORT_ARRAY
#include
#include "spline.h"
#include "lfc.h"
#include "bmgs/spherical_harmonics.h"
#include "bmgs/bmgs.h"
#ifdef GPAW_NO_UNDERSCORE_BLAS
# define zgemm_ zgemm
#endif
void zgemm_(char *transa, char *transb, int *m, int * n,
int *k, void *alpha, void *a, int *lda,
const void *b, int *ldb, void *beta,
void *c, int *ldc);
static void lfc_dealloc(LFCObject *self)
{
if (self->bloch_boundary_conditions)
free(self->phase_i);
free(self->volume_i);
free(self->work_gm);
free(self->ngm_W);
free(self->i_W);
free(self->volume_W);
PyObject_DEL(self);
}
PyObject* calculate_potential_matrix(LFCObject *self, PyObject *args);
PyObject* calculate_potential_matrices(LFCObject *self, PyObject *args);
PyObject* lfcintegrate(LFCObject *self, PyObject *args);
PyObject* derivative(LFCObject *self, PyObject *args);
PyObject* normalized_derivative(LFCObject *self, PyObject *args);
PyObject* construct_density(LFCObject *self, PyObject *args);
PyObject* construct_density1(LFCObject *self, PyObject *args);
PyObject* ae_valence_density_correction(LFCObject *self, PyObject *args);
PyObject* ae_core_density_correction(LFCObject *self, PyObject *args);
PyObject* lcao_to_grid(LFCObject *self, PyObject *args);
PyObject* lcao_to_grid_k(LFCObject *self, PyObject *args);
PyObject* add(LFCObject *self, PyObject *args);
PyObject* calculate_potential_matrix_derivative(LFCObject *self,
PyObject *args);
PyObject* calculate_potential_matrix_force_contribution(LFCObject *self,
PyObject *args);
PyObject* second_derivative(LFCObject *self, PyObject *args);
PyObject* add_derivative(LFCObject *self, PyObject *args);
static PyMethodDef lfc_methods[] = {
{"calculate_potential_matrix",
(PyCFunction)calculate_potential_matrix, METH_VARARGS, 0},
{"calculate_potential_matrices",
(PyCFunction)calculate_potential_matrices, METH_VARARGS, 0},
{"integrate",
(PyCFunction)lfcintegrate, METH_VARARGS, 0},
{"derivative",
(PyCFunction)derivative, METH_VARARGS, 0},
{"normalized_derivative",
(PyCFunction)normalized_derivative, METH_VARARGS, 0},
{"construct_density",
(PyCFunction)construct_density, METH_VARARGS, 0},
{"construct_density1",
(PyCFunction)construct_density1, METH_VARARGS, 0},
{"ae_valence_density_correction",
(PyCFunction)ae_valence_density_correction, METH_VARARGS, 0},
{"ae_core_density_correction",
(PyCFunction)ae_core_density_correction, METH_VARARGS, 0},
{"lcao_to_grid",
(PyCFunction)lcao_to_grid, METH_VARARGS, 0},
{"lcao_to_grid_k",
(PyCFunction)lcao_to_grid_k, METH_VARARGS, 0},
{"add",
(PyCFunction)add, METH_VARARGS, 0},
{"calculate_potential_matrix_derivative",
(PyCFunction)calculate_potential_matrix_derivative, METH_VARARGS, 0},
{"calculate_potential_matrix_force_contribution",
(PyCFunction)calculate_potential_matrix_force_contribution, METH_VARARGS, 0},
{"second_derivative",
(PyCFunction)second_derivative, METH_VARARGS, 0},
{"add_derivative",
(PyCFunction)add_derivative, METH_VARARGS, 0},
{NULL, NULL, 0, NULL}
};
PyTypeObject LFCType = {
PyVarObject_HEAD_INIT(NULL, 0)
"LocalizedFunctionsCollection",
sizeof(LFCObject),
0,
(destructor)lfc_dealloc,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,
"LFC object",
0, 0, 0, 0, 0, 0,
lfc_methods
};
PyObject * NewLFCObject(PyObject *obj, PyObject *args)
{
PyObject* A_Wgm_obj;
PyArrayObject* M_W_obj;
PyArrayObject* G_B_obj;
PyArrayObject* W_B_obj;
double dv;
PyArrayObject* phase_kW_obj;
if (!PyArg_ParseTuple(args, "OOOOdO",
&A_Wgm_obj, &M_W_obj, &G_B_obj, &W_B_obj, &dv,
&phase_kW_obj))
return NULL;
LFCObject *self = PyObject_NEW(LFCObject, &LFCType);
if (self == NULL)
return NULL;
self->dv = dv;
const int* M_W = (const int*)PyArray_DATA(M_W_obj);
self->G_B = (int*)PyArray_DATA(G_B_obj);
self->W_B = (int*)PyArray_DATA(W_B_obj);
if (PyArray_DIMS(phase_kW_obj)[0] > 0) {
self->bloch_boundary_conditions = true;
self->phase_kW = (double complex*)PyArray_DATA(phase_kW_obj);
}
else {
self->bloch_boundary_conditions = false;
}
int nB = PyArray_DIMS(G_B_obj)[0];
int nW = PyList_Size(A_Wgm_obj);
self->nW = nW;
self->nB = nB;
int nimax = 0;
int ngmax = 0;
int ni = 0;
int Ga = 0;
for (int B = 0; B < nB; B++) {
int Gb = self->G_B[B];
int nG = Gb - Ga;
if (ni > 0 && nG > ngmax)
ngmax = nG;
if (self->W_B[B] >= 0)
ni += 1;
else {
if (ni > nimax)
nimax = ni;
ni--;
}
Ga = Gb;
}
assert(ni == 0);
self->volume_W = GPAW_MALLOC(LFVolume, nW);
self->i_W = GPAW_MALLOC(int, nW);
self->ngm_W = GPAW_MALLOC(int, nW);
int nmmax = 0;
for (int W = 0; W < nW; W++) {
PyArrayObject* A_gm_obj = (PyArrayObject*)PyList_GetItem(A_Wgm_obj, W);
LFVolume* volume = &self->volume_W[W];
volume->A_gm = (const double*)PyArray_DATA(A_gm_obj);
self->ngm_W[W] = PyArray_DIMS(A_gm_obj)[0] * PyArray_DIMS(A_gm_obj)[1];
volume->nm = PyArray_DIMS(A_gm_obj)[1];
volume->M = M_W[W];
volume->W = W;
if (volume->nm > nmmax)
nmmax = volume->nm;
}
self->work_gm = GPAW_MALLOC(double, ngmax * nmmax);
self->volume_i = GPAW_MALLOC(LFVolume, nimax);
if (self->bloch_boundary_conditions)
self->phase_i = GPAW_MALLOC(complex double, nimax);
return (PyObject*)self;
}
PyObject* calculate_potential_matrix(LFCObject *lfc, PyObject *args)
{
PyArrayObject* vt_G_obj;
PyArrayObject* Vt_MM_obj;
int k;
int Mstart;
int Mstop;
if (!PyArg_ParseTuple(args, "OOiii", &vt_G_obj, &Vt_MM_obj, &k,
&Mstart, &Mstop))
return NULL;
const double* vt_G = (const double*)PyArray_DATA(vt_G_obj);
int nM = PyArray_DIMS(Vt_MM_obj)[1];
double dv = lfc->dv;
double* work_gm = lfc->work_gm;
if (!lfc->bloch_boundary_conditions) {
double* Vt_MM = (double*)PyArray_DATA(Vt_MM_obj);
GRID_LOOP_START(lfc, -1) { // ORDINARY/GAMMA-POINT
for (int i1 = 0; i1 < ni; i1++) {
LFVolume* v1 = volume_i + i1;
int M1 = v1->M;
int nm1 = v1->nm;
int M1p = MAX(M1, Mstart);
int nm1p = MIN(M1 + nm1, Mstop) - M1p;
if (nm1p <= 0)
continue;
int gm = M1p - M1;
int gm1 = 0;
const double* A1_gm = v1->A_gm;
for (int G = Ga; G < Gb; G++, gm += nm1 - nm1p) {
double vtdv = vt_G[G] * dv;
for (int m1 = 0; m1 < nm1p; m1++, gm1++, gm++)
work_gm[gm1] = vtdv * A1_gm[gm];
}
for (int i2 = 0; i2 < ni; i2++) {
LFVolume* v2 = volume_i + i2;
int M2 = v2->M;
if (M1 >= M2) {
int nm2 = v2->nm;
const double* A2_gm = v2->A_gm;
double* Vt_mm = Vt_MM + (M1p - Mstart) * nM + M2;
for (int g = 0; g < nG; g++){
int gnm1 = g * nm1p;
int gnm2 = g * nm2;
for (int m1 = 0; m1 < nm1p; m1++) {
int m1nM = m1 * nM;
for (int m2 = 0; m2 < nm2; m2++)
Vt_mm[m2 + m1nM] += A2_gm[gnm2 + m2] * work_gm[gnm1 + m1];
}
}
}
}
}
}
GRID_LOOP_STOP(lfc, -1);
}
else {
complex double* Vt_MM = (complex double*)PyArray_DATA(Vt_MM_obj);
GRID_LOOP_START(lfc, k) { // KPOINT CALC POT MATRIX
for (int i1 = 0; i1 < ni; i1++) {
LFVolume* v1 = volume_i + i1;
double complex conjphase1 = conj(phase_i[i1]);
int M1 = v1->M;
int nm1 = v1->nm;
int M1p = MAX(M1, Mstart);
int nm1p = MIN(M1 + nm1, Mstop) - M1p;
if (nm1p <= 0)
continue;
int gm = M1p - M1;
int gm1 = 0;
const double* A1_gm = v1->A_gm;
for (int G = Ga; G < Gb; G++, gm += nm1 - nm1p) {
double vtdv = vt_G[G] * dv;
for (int m1 = 0; m1 < nm1p; m1++, gm1++, gm++)
work_gm[gm1] = vtdv * A1_gm[gm];
}
for (int i2 = 0; i2 < ni; i2++) {
LFVolume* v2 = volume_i + i2;
const double* A2_gm = v2->A_gm;
int M2 = v2->M;
if (M1 >= M2) {
int nm2 = v2->nm;
double complex phase = conjphase1 * phase_i[i2];
double complex* Vt_mm = Vt_MM + (M1p - Mstart) * nM + M2;
for (int g = 0; g < nG; g++) {
int gnm1 = g * nm1p;
int gnm2 = g * nm2;
int m1nM = 0;
for (int m1 = 0; m1 < nm1p; m1++, m1nM += nM) {
complex double wphase = work_gm[gnm1 + m1] * phase;
for (int m2 = 0; m2 < nm2; m2++) {
Vt_mm[m1nM + m2] += A2_gm[gnm2 + m2] * wphase;
}
}
}
}
}
}
}
GRID_LOOP_STOP(lfc, k);
}
Py_RETURN_NONE;
}
PyObject* calculate_potential_matrices(LFCObject *lfc, PyObject *args)
{
PyArrayObject* vt_G_obj;
PyArrayObject* Vt_xMM_obj;
PyArrayObject* x_W_obj;
int Mstart;
int Mstop;
if (!PyArg_ParseTuple(args, "OOOii", &vt_G_obj, &Vt_xMM_obj, &x_W_obj,
&Mstart, &Mstop))
return NULL;
const double* vt_G = (const double*)PyArray_DATA(vt_G_obj);
int nM = PyArray_DIMS(Vt_xMM_obj)[2];
double dv = lfc->dv;
double* work_gm = lfc->work_gm;
double* Vt_xMM = (double*)PyArray_DATA(Vt_xMM_obj);
int* x_W = (int*)PyArray_DATA(x_W_obj);
GRID_LOOP_START(lfc, -1) {
for (int i1 = 0; i1 < ni; i1++) {
LFVolume* v1 = volume_i + i1;
int M1 = v1->M;
int nm1 = v1->nm;
int M1p = MAX(M1, Mstart);
int nm1p = MIN(M1 + nm1, Mstop) - M1p;
if (nm1p <= 0)
continue;
int x1 = x_W[v1->W];
int gm = M1p - M1;
int gm1 = 0;
const double* A1_gm = v1->A_gm;
for (int G = Ga; G < Gb; G++, gm += nm1 - nm1p) {
double vtdv = vt_G[G] * dv;
for (int m1 = 0; m1 < nm1p; m1++, gm1++, gm++)
work_gm[gm1] = vtdv * A1_gm[gm];
}
for (int i2 = 0; i2 < ni; i2++) {
LFVolume* v2 = volume_i + i2;
int x = x_W[v2->W] - x1;
if (x >= 0) {
int M2 = v2->M;
int nm2 = v2->nm;
const double* A2_gm = v2->A_gm;
double* Vt_mm = (Vt_xMM +
(M1p - Mstart) * nM + M2 +
x * (Mstop - Mstart) * nM);
for (int g = 0; g < nG; g++) {
int gnm1 = g * nm1p;
int gnm2 = g * nm2;
for (int m1 = 0; m1 < nm1p; m1++) {
int m1nM = m1 * nM;
for (int m2 = 0; m2 < nm2; m2++)
Vt_mm[m2 + m1nM] += (A2_gm[gnm2 + m2] *
work_gm[gnm1 + m1]);
}
}
}
}
}
}
GRID_LOOP_STOP(lfc, -1);
Py_RETURN_NONE;
}
PyObject* lfcintegrate(LFCObject *lfc, PyObject *args)
{
PyArrayObject* a_xG_obj;
PyArrayObject* c_xM_obj;
int q;
if (!PyArg_ParseTuple(args, "OOi", &a_xG_obj, &c_xM_obj, &q))
return NULL;
int nd = PyArray_NDIM(a_xG_obj);
npy_intp* dims = PyArray_DIMS(a_xG_obj);
int nx = PyArray_MultiplyList(dims, nd - 3);
int nG = PyArray_MultiplyList(dims + nd - 3, 3);
int nM = PyArray_DIMS(c_xM_obj)[PyArray_NDIM(c_xM_obj) - 1];
double dv = lfc->dv;
if (!lfc->bloch_boundary_conditions) {
const double* a_G = (const double*)PyArray_DATA(a_xG_obj);
double* c_M = (double*)PyArray_DATA(c_xM_obj);
for (int x = 0; x < nx; x++) {
GRID_LOOP_START(lfc, -1) {
for (int i = 0; i < ni; i++) {
LFVolume* v = volume_i + i;
const double* A_gm = v->A_gm;
int nm = v->nm;
double* c_M1 = c_M + v->M;
for (int gm = 0, G = Ga; G < Gb; G++){
double av = a_G[G] * dv;
for (int m = 0; m < nm; m++, gm++){
c_M1[m] += av * A_gm[gm];
}
}
}
}
GRID_LOOP_STOP(lfc, -1);
c_M += nM;
a_G += nG;
}
}
else {
const complex double* a_G = (const complex double*)PyArray_DATA(a_xG_obj);
complex double* c_M = (complex double*)PyArray_DATA(c_xM_obj);
for (int x = 0; x < nx; x++) {
GRID_LOOP_START(lfc, q) {
for (int i = 0; i < ni; i++) {
LFVolume* v = volume_i + i;
int nm = v->nm;
complex double* c_M1 = c_M + v->M;
const double* A_gm = v->A_gm;
double complex vphase = phase_i[i] * dv;
for (int gm = 0, G = Ga; G < Gb; G++){
double complex avphase = a_G[G] * vphase;
for (int m = 0; m < nm; m++, gm++){
c_M1[m] += avphase * A_gm[gm];
}
}
}
}
GRID_LOOP_STOP(lfc, q);
c_M += nM;
a_G += nG;
}
}
Py_RETURN_NONE;
}
PyObject* construct_density(LFCObject *lfc, PyObject *args)
{
PyArrayObject* rho_MM_obj;
PyArrayObject* nt_G_obj;
int k;
int Mstart, Mstop;
if (!PyArg_ParseTuple(args, "OOiii", &rho_MM_obj, &nt_G_obj, &k,
&Mstart, &Mstop))
return NULL;
double* nt_G = (double*)PyArray_DATA(nt_G_obj);
int nM = PyArray_DIMS(rho_MM_obj)[1];
double* work_gm = lfc->work_gm;
if (!lfc->bloch_boundary_conditions) {
const double* rho_MM = (const double*)PyArray_DATA(rho_MM_obj);
GRID_LOOP_START(lfc, -1) {
for (int i1 = 0; i1 < ni; i1++) {
LFVolume* v1 = volume_i + i1;
int M1 = v1->M;
int nm1 = v1->nm;
int M1p = MAX(M1, Mstart);
int nm1p = MIN(M1 + nm1, Mstop) - M1p;
if (nm1p <= 0)
continue;
memset(work_gm, 0, nG * nm1 * sizeof(double));
double factor = 1.0;
int m1end = MIN(nm1, Mstop - M1);
int m1start = MAX(0, Mstart - M1);
for (int i2 = i1; i2 < ni; i2++) {
LFVolume* v2 = volume_i + i2;
int M2 = v2->M;
int nm2 = v2->nm;
const double* rho_mm = rho_MM + (M1p - Mstart) * nM + M2;
//assert(M1 - Mstart + m1start >= 0);
for (int g = 0; g < nG; g++) {
for (int m1 = m1start, m1p = 0; m1 < m1end; m1++, m1p++) {
for (int m2 = 0; m2 < nm2; m2++) {
work_gm[g * nm1 + m1] += (v2->A_gm[g * nm2 + m2] *
rho_mm[m1p * nM + m2] *
factor);
}
}
}
factor = 2.0;
}
int gm1 = 0;
for (int G = Ga; G < Gb; G++) {
double nt = 0.0;
for (int m1 = 0; m1 < nm1; m1++, gm1++) {
nt += v1->A_gm[gm1] * work_gm[gm1];
}
nt_G[G] += nt;
}
}
}
GRID_LOOP_STOP(lfc, -1);
}
else {
const double complex* rho_MM = (const double complex*)PyArray_DATA(rho_MM_obj);
GRID_LOOP_START(lfc, k) {
for (int i1 = 0; i1 < ni; i1++) {
LFVolume* v1 = volume_i + i1;
int M1 = v1->M;
int nm1 = v1->nm;
int M1p = MAX(M1, Mstart);
int nm1p = MIN(M1 + nm1, Mstop) - M1p;
if (nm1p <= 0)
continue;
memset(work_gm, 0, nG * nm1 * sizeof(double));
double complex factor = 1.0;
int m1end = MIN(nm1, Mstop - M1);
int m1start = MAX(0, Mstart - M1);
for (int i2 = i1; i2 < ni; i2++) {
if (i2 > i1)
factor = 2.0 * phase_i[i1] * conj(phase_i[i2]);
double rfactor = creal(factor);
double ifactor = cimag(factor);
LFVolume* v2 = volume_i + i2;
const double* A2_gm = v2->A_gm;
int M2 = v2->M;
int nm2 = v2->nm;
const double complex* rho_mm = rho_MM + (M1p - Mstart) * nM + M2;
double rrho, irho, rwork, iwork;
complex double rho;
for (int g = 0; g < nG; g++) {
int gnm1 = g * nm1;
int gnm2 = g * nm2;
int m1pnM = 0;
for (int m1 = m1start, m1p=0; m1 < m1end; m1++, m1p++) {
m1pnM = m1p * nM;
iwork = 0;
rwork = 0;
for (int m2 = 0; m2 < nm2; m2++) {
rho = rho_mm[m1pnM + m2];
rrho = creal(rho);
irho = cimag(rho);
rwork += A2_gm[gnm2 + m2] * rrho;
iwork += A2_gm[gnm2 + m2] * irho;
// We could save one of those multiplications if the buffer
// were twice as large
//work += A2_gm[gnm2 + m2] * (rfactor * rrho - ifactor * irho);
}
//work_gm[m1 + gnm1] += work;
work_gm[m1 + gnm1] += rwork * rfactor - iwork * ifactor;
}
}
}
int gm1 = 0;
const double* A1_gm = v1->A_gm;
for (int G = Ga; G < Gb; G++) {
double nt = 0.0;
for (int m1 = 0; m1 < nm1; m1++, gm1++) {
nt += A1_gm[gm1] * work_gm[gm1];
}
nt_G[G] += nt;
}
}
}
GRID_LOOP_STOP(lfc, k);
}
Py_RETURN_NONE;
}
PyObject* construct_density1(LFCObject *lfc, PyObject *args)
{
PyArrayObject* f_M_obj;
PyArrayObject* nt_G_obj;
if (!PyArg_ParseTuple(args, "OO", &f_M_obj, &nt_G_obj))
return NULL;
const double* f_M = (const double*)PyArray_DATA(f_M_obj);
double* nt_G = (double*)PyArray_DATA(nt_G_obj);
GRID_LOOP_START(lfc, -1) {
for (int i = 0; i < ni; i++) {
LFVolume* v = volume_i + i;
for (int gm = 0, G = Ga; G < Gb; G++) {
for (int m = 0; m < v->nm; m++, gm++) {
nt_G[G] += v->A_gm[gm] * v->A_gm[gm] * f_M[v->M + m];
}
}
}
}
GRID_LOOP_STOP(lfc, -1);
Py_RETURN_NONE;
}
PyObject* lcao_to_grid(LFCObject *lfc, PyObject *args)
{
PyArrayObject* c_M_obj;
PyArrayObject* psit_G_obj;
int k;
if (!PyArg_ParseTuple(args, "OOi", &c_M_obj, &psit_G_obj, &k))
return NULL;
if (!lfc->bloch_boundary_conditions) {
if (PyArray_DESCR(c_M_obj)->type_num == NPY_DOUBLE) {
const double* c_M = (const double*)PyArray_DATA(c_M_obj);
double* psit_G = (double*)PyArray_DATA(psit_G_obj);
GRID_LOOP_START(lfc, -1) {
for (int i = 0; i < ni; i++) {
LFVolume* v = volume_i + i;
for (int gm = 0, G = Ga; G < Gb; G++) {
for (int m = 0; m < v->nm; m++, gm++) {
psit_G[G] += v->A_gm[gm] * c_M[v->M + m];
}
}
}
}
GRID_LOOP_STOP(lfc, -1);
}
else {
const double complex* c_M = (const double complex*)PyArray_DATA(c_M_obj);
double complex* psit_G = (double complex*)PyArray_DATA(psit_G_obj);
GRID_LOOP_START(lfc, -1) {
for (int i = 0; i < ni; i++) {
LFVolume* v = volume_i + i;
for (int gm = 0, G = Ga; G < Gb; G++) {
for (int m = 0; m < v->nm; m++, gm++) {
psit_G[G] += v->A_gm[gm] * c_M[v->M + m];
}
}
}
}
GRID_LOOP_STOP(lfc, -1);
}
}
else {
const double complex* c_M = (const double complex*)PyArray_DATA(c_M_obj);
double complex* psit_G = (double complex*)PyArray_DATA(psit_G_obj);
GRID_LOOP_START(lfc, k) {
for (int i = 0; i < ni; i++) {
LFVolume* v = volume_i + i;
double complex conjphase = conj(phase_i[i]);
const double* A_gm = v->A_gm;
const double complex* c_M1 = c_M + v->M;
for (int gm = 0, G = Ga; G < Gb; G++) {
double complex psit = 0.0;
for (int m = 0; m < v->nm; m++, gm++) {
psit += A_gm[gm] * c_M1[m];
}
psit_G[G] += psit * conjphase;
}
}
}
GRID_LOOP_STOP(lfc, k);
}
Py_RETURN_NONE;
}
// Faster implementation of lcao_to_grid() function specialized
// for k-points
PyObject* lcao_to_grid_k(LFCObject *lfc, PyObject *args)
{
PyArrayObject* c_xM_obj;
PyArrayObject* psit_xG_obj;
int k;
int Mblock;
if (!PyArg_ParseTuple(args, "OOii", &c_xM_obj, &psit_xG_obj, &k,
&Mblock))
return NULL;
const double complex* c_xM = (const double complex*)PyArray_DATA(c_xM_obj);
double complex* psit_xG = (double complex*)PyArray_DATA(psit_xG_obj);
int nd = PyArray_NDIM(psit_xG_obj);
npy_intp* dims = PyArray_DIMS(psit_xG_obj);
int nx = PyArray_MultiplyList(dims, nd - 3);
int Gmax = PyArray_MultiplyList(dims + nd - 3, 3);
int Mmax = PyArray_DIMS(c_xM_obj)[PyArray_NDIM(c_xM_obj) - 1];
double complex* tmp_GM = 0;
for (int Mstart = 0; Mstart < Mmax; Mstart += Mblock) {
int Mstop = Mstart + Mblock;
if (Mstop > Mmax) {
Mstop = Mmax;
Mblock = Mstop - Mstart;
}
if (tmp_GM == 0)
tmp_GM = GPAW_MALLOC(double complex, Mblock * Gmax);
for (int GM = 0; GM < Gmax * Mblock; GM++)
tmp_GM[GM] = 0.0;
GRID_LOOP_START(lfc, k) {
for (int i = 0; i < ni; i++) {
LFVolume* v = volume_i + i;
int M1 = v->M;
if (M1 >= Mstop)
continue;
int nm = v->nm;
int M2 = M1 + nm;
if (M2 <= Mstart)
continue;
int M1p = MAX(M1, Mstart);
int M2p = MIN(M2, Mstop);
if (M1p == M2p)
continue;
double complex phase = phase_i[i];
const double* A_gm = v->A_gm;
for (int G = Ga; G < Gb; G++)
for (int M = M1p; M < M2p; M++)
tmp_GM[G * Mblock + M - Mstart] += \
A_gm[(G - Ga) * nm + M - M1] * phase;
}
}
GRID_LOOP_STOP(lfc, k);
double complex one = 1.0;
zgemm_("C", "N", &Gmax, &nx, &Mblock, &one, tmp_GM, &Mblock,
c_xM + Mstart, &Mmax, &one, psit_xG, &Gmax);
}
free(tmp_GM);
Py_RETURN_NONE;
}
PyObject* add(LFCObject *lfc, PyObject *args)
{
PyArrayObject* c_xM_obj;
PyArrayObject* a_xG_obj;
int q;
if (!PyArg_ParseTuple(args, "OOi", &c_xM_obj, &a_xG_obj, &q))
return NULL;
int nd = PyArray_NDIM(a_xG_obj);
npy_intp* dims = PyArray_DIMS(a_xG_obj);
int nx = PyArray_MultiplyList(dims, nd - 3);
int nG = PyArray_MultiplyList(dims + nd - 3, 3);
int nM = PyArray_DIMS(c_xM_obj)[PyArray_NDIM(c_xM_obj) - 1];
if (!lfc->bloch_boundary_conditions) {
const double* c_M = (const double*)PyArray_DATA(c_xM_obj);
double* a_G = (double*)PyArray_DATA(a_xG_obj);
for (int x = 0; x < nx; x++) {
GRID_LOOP_START(lfc, -1) {
for (int i = 0; i < ni; i++) {
LFVolume* v = volume_i + i;
for (int gm = 0, G = Ga; G < Gb; G++) {
for (int m = 0; m < v->nm; m++, gm++) {
a_G[G] += v->A_gm[gm] * c_M[v->M + m];
}
}
}
}
GRID_LOOP_STOP(lfc, -1);
c_M += nM;
a_G += nG;
}
}
else {
const double complex* c_M = (const double complex*)PyArray_DATA(c_xM_obj);
double complex* a_G = (double complex*)PyArray_DATA(a_xG_obj);
for (int x = 0; x < nx; x++) {
GRID_LOOP_START(lfc, q) {
for (int i = 0; i < ni; i++) {
double complex conjphase = conj(phase_i[i]);
LFVolume* v = volume_i + i;
const double complex* c_M1 = c_M + v->M;
const double* A_gm = v->A_gm;
for (int gm = 0, G = Ga; G < Gb; G++) {
double complex a = 0.0;
for (int m = 0; m < v->nm; m++, gm++) {
a += A_gm[gm] * c_M1[m];
}
a_G[G] += a * conjphase;
}
}
}
GRID_LOOP_STOP(lfc, q);
c_M += nM;
a_G += nG;
}
}
Py_RETURN_NONE;
}
PyObject* spline_to_grid(PyObject *self, PyObject *args)
{
SplineObject* spline_obj;
PyArrayObject* beg_c_obj;
PyArrayObject* end_c_obj;
PyArrayObject* pos_v_obj;
PyArrayObject* h_cv_obj;
PyArrayObject* n_c_obj;
PyArrayObject* gdcorner_c_obj;
if (!PyArg_ParseTuple(args, "OOOOOOO", &spline_obj,
&beg_c_obj, &end_c_obj, &pos_v_obj, &h_cv_obj,
&n_c_obj, &gdcorner_c_obj))
return NULL;
const bmgsspline* spline = (const bmgsspline*)(&(spline_obj->spline));
long* beg_c = LONGP(beg_c_obj);
long* end_c = LONGP(end_c_obj);
double* pos_v = DOUBLEP(pos_v_obj);
double* h_cv = DOUBLEP(h_cv_obj);
long* n_c = LONGP(n_c_obj);
long* gdcorner_c = LONGP(gdcorner_c_obj);
int l = spline_obj->spline.l;
int nm = 2 * l + 1;
double rcut = spline->dr * spline->nbins;
int ngmax = ((end_c[0] - beg_c[0]) *
(end_c[1] - beg_c[1]) *
(end_c[2] - beg_c[2]));
double* A_gm = GPAW_MALLOC(double, ngmax * nm);
int nBmax = ((end_c[0] - beg_c[0]) *
(end_c[1] - beg_c[1]));
int* G_B = GPAW_MALLOC(int, 2 * nBmax);
int nB = 0;
int ngm = 0;
int G = -gdcorner_c[2] + n_c[2] * (beg_c[1] - gdcorner_c[1] + n_c[1]
* (beg_c[0] - gdcorner_c[0]));
for (int g0 = beg_c[0]; g0 < end_c[0]; g0++) {
for (int g1 = beg_c[1]; g1 < end_c[1]; g1++) {
int g2_beg = -1; // function boundary coordinates
int g2_end = -1;
for (int g2 = beg_c[2]; g2 < end_c[2]; g2++) {
double x = h_cv[0] * g0 + h_cv[3] * g1 + h_cv[6] * g2 - pos_v[0];
double y = h_cv[1] * g0 + h_cv[4] * g1 + h_cv[7] * g2 - pos_v[1];
double z = h_cv[2] * g0 + h_cv[5] * g1 + h_cv[8] * g2 - pos_v[2];
double r2 = x * x + y * y + z * z;
double r = sqrt(r2);
if (r < rcut) {
if (g2_beg < 0)
g2_beg = g2; // found boundary
g2_end = g2;
double A = bmgs_splinevalue(spline, r);
double* p = A_gm + ngm;
spherical_harmonics(l, A, x, y, z, r2, p);
ngm += nm;
}
}
if (g2_end >= 0) {
g2_end++;
G_B[nB++] = G + g2_beg;
G_B[nB++] = G + g2_end;
}
G += n_c[2];
}
G += n_c[2] * (n_c[1] - end_c[1] + beg_c[1]);
}
npy_intp gm_dims[2] = {ngm / (2 * l + 1), 2 * l + 1};
PyArrayObject* A_gm_obj = (PyArrayObject*)PyArray_SimpleNew(2, gm_dims,
NPY_DOUBLE);
memcpy(PyArray_DATA(A_gm_obj), A_gm, ngm * sizeof(double));
free(A_gm);
npy_intp B_dims[1] = {nB};
PyArrayObject* G_B_obj = (PyArrayObject*)PyArray_SimpleNew(1, B_dims,
NPY_INT);
memcpy(PyArray_DATA(G_B_obj), G_B, nB * sizeof(int));
free(G_B);
// PyObjects created in the C code will be initialized with a refcount
// of 1, for which reason we'll have to decref them when done here
PyObject* values = Py_BuildValue("(OO)", A_gm_obj, G_B_obj);
Py_DECREF(A_gm_obj);
Py_DECREF(G_B_obj);
return values;
}
// Horrible copy-paste of calculate_potential_matrix
// Surely it must be possible to find a way to actually reuse code
// Maybe some kind of preprocessor thing
PyObject* calculate_potential_matrix_derivative(LFCObject *lfc, PyObject *args)
{
PyArrayObject* vt_G_obj;
PyArrayObject* DVt_MM_obj;
PyArrayObject* h_cv_obj;
PyArrayObject* n_c_obj;
int k, c;
PyArrayObject* spline_obj_M_obj;
PyArrayObject* beg_c_obj;
PyArrayObject* pos_Wc_obj;
int Mstart, Mstop;
if (!PyArg_ParseTuple(args, "OOOOiiOOOii", &vt_G_obj, &DVt_MM_obj,
&h_cv_obj, &n_c_obj, &k, &c,
&spline_obj_M_obj, &beg_c_obj,
&pos_Wc_obj, &Mstart, &Mstop))
return NULL;
const double* vt_G = (const double*)PyArray_DATA(vt_G_obj);
const double* h_cv = (const double*)PyArray_DATA(h_cv_obj);
const long* n_c = (const long*)PyArray_DATA(n_c_obj);
const SplineObject** spline_obj_M = \
(const SplineObject**)PyArray_DATA(spline_obj_M_obj);
const double (*pos_Wc)[3] = (const double (*)[3])PyArray_DATA(pos_Wc_obj);
long* beg_c = LONGP(beg_c_obj);
int nM = PyArray_DIMS(DVt_MM_obj)[1];
double* work_gm = lfc->work_gm;
double dv = lfc->dv;
if (!lfc->bloch_boundary_conditions) {
double* DVt_MM = (double*)PyArray_DATA(DVt_MM_obj);
{
GRID_LOOP_START(lfc, -1) {
// In one grid loop iteration, only z changes.
int iza = Ga % n_c[2] + beg_c[2];
int iy = (Ga / n_c[2]) % n_c[1] + beg_c[1];
int ix = Ga / (n_c[2] * n_c[1]) + beg_c[0];
int iz = iza;
//assert(Ga == ((ix - beg_c[0]) * n_c[1] + (iy - beg_c[1]))
// * n_c[2] + iza - beg_c[2]);
for (int i1 = 0; i1 < ni; i1++) {
iz = iza;
LFVolume* v1 = volume_i + i1;
int M1 = v1->M;
const SplineObject* spline_obj = spline_obj_M[M1];
const bmgsspline* spline = \
(const bmgsspline*)(&(spline_obj->spline));
int nm1 = v1->nm;
int M1p = MAX(M1, Mstart);
int nm1p = MIN(M1 + nm1, Mstop) - M1p;
if (nm1p <= 0)
continue;
double fdYdc_m[nm1];
double rlYdfdr_m[nm1];
double f, dfdr;
int l = (nm1 - 1) / 2;
const double* pos_c = pos_Wc[v1->W];
//assert(2 * l + 1 == nm1);
//assert(spline_obj->spline.l == l);
int gm1 = 0;
for (int G = Ga; G < Gb; G++, iz++) {
double x = h_cv[0] * ix + h_cv[3] * iy + h_cv[6] * iz - pos_c[0];
double y = h_cv[1] * ix + h_cv[4] * iy + h_cv[7] * iz - pos_c[1];
double z = h_cv[2] * ix + h_cv[5] * iy + h_cv[8] * iz - pos_c[2];
double vtdv = vt_G[G] * dv;
double R_c[] = {x, y, z};
double r2 = x * x + y * y + z * z;
double r = sqrt(r2);
double Rcinvr = r > 1e-15 ? R_c[c] / r : 0.0;
//assert(G == ((ix - beg_c[0]) * n_c[1] +
// (iy - beg_c[1])) * n_c[2] + iz - beg_c[2]);
bmgs_get_value_and_derivative(spline, r, &f, &dfdr);
//assert (r <= spline->dr * spline->nbins); // important
switch(c) {
case 0:
spherical_harmonics_derivative_x(l, f, x, y, z, r2, fdYdc_m);
break;
case 1:
spherical_harmonics_derivative_y(l, f, x, y, z, r2, fdYdc_m);
break;
case 2:
spherical_harmonics_derivative_z(l, f, x, y, z, r2, fdYdc_m);
break;
}
spherical_harmonics(l, dfdr * Rcinvr, x, y, z, r2, rlYdfdr_m);
int m1start = M1 < Mstart ? nm1 - nm1p : 0;
for (int m1 = 0; m1 < nm1p; m1++, gm1++) {
work_gm[gm1] = vtdv * (fdYdc_m[m1 + m1start]
+ rlYdfdr_m[m1 + m1start]);
}
} // end loop over G
for (int i2 = 0; i2 < ni; i2++) {
LFVolume* v2 = volume_i + i2;
int M2 = v2->M;
const double* A2_start_gm = v2->A_gm;
const double* A2_gm;
int nm2 = v2->nm;
double* DVt_start_mm = DVt_MM + (M1p - Mstart) * nM + M2;
double* DVt_mm;
double work;
for (int g = 0; g < nG; g++) {
A2_gm = A2_start_gm + g * nm2;
for (int m1 = 0; m1 < nm1p; m1++) {
work = work_gm[g * nm1p + m1];
DVt_mm = DVt_start_mm + m1 * nM;
for (int m2 = 0; m2 < nm2; m2++) {
DVt_mm[m2] += A2_gm[m2] * work;
}
}
}
} // i2 loop
} // G loop
} // i1 loop
GRID_LOOP_STOP(lfc, -1);
} // c loop
}
else {
complex double* DVt_MM = (complex double*)PyArray_DATA(DVt_MM_obj);
{
GRID_LOOP_START(lfc, k) {
// In one grid loop iteration, only z changes.
int iza = Ga % n_c[2] + beg_c[2];
int iy = (Ga / n_c[2]) % n_c[1] + beg_c[1];
int ix = Ga / (n_c[2] * n_c[1]) + beg_c[0];
int iz = iza;
for (int i1 = 0; i1 < ni; i1++) {
iz = iza;
LFVolume* v1 = volume_i + i1;
int M1 = v1->M;
const SplineObject* spline_obj = spline_obj_M[M1];
const bmgsspline* spline = \
(const bmgsspline*)(&(spline_obj->spline));
int nm1 = v1->nm;
int M1p = MAX(M1, Mstart);
int nm1p = MIN(M1 + nm1, Mstop) - M1p;
if (nm1p <= 0)
continue;
double fdYdc_m[nm1];
double rlYdfdr_m[nm1];
double f, dfdr;
int l = (nm1 - 1) / 2;
//assert(2 * l + 1 == nm1);
//assert(spline_obj->spline.l == l);
const double* pos_c = pos_Wc[v1->W];
int gm1 = 0;
for (int G = Ga; G < Gb; G++, iz++) {
double x = h_cv[0] * ix + h_cv[3] * iy + h_cv[6] * iz - pos_c[0];
double y = h_cv[1] * ix + h_cv[4] * iy + h_cv[7] * iz - pos_c[1];
double z = h_cv[2] * ix + h_cv[5] * iy + h_cv[8] * iz - pos_c[2];
double vtdv = vt_G[G] * dv;
double R_c[] = {x, y, z};
double r2 = x * x + y * y + z * z;
double r = sqrt(r2);
double Rc_over_r = r > 1e-15 ? R_c[c] / r : 0.0;
bmgs_get_value_and_derivative(spline, r, &f, &dfdr);
//assert (r <= spline->dr * spline->nbins);
switch(c) {
case 0:
spherical_harmonics_derivative_x(l, f, x, y, z, r2, fdYdc_m);
break;
case 1:
spherical_harmonics_derivative_y(l, f, x, y, z, r2, fdYdc_m);
break;
case 2:
spherical_harmonics_derivative_z(l, f, x, y, z, r2, fdYdc_m);
break;
}
spherical_harmonics(l, dfdr * Rc_over_r, x, y, z, r2, rlYdfdr_m);
int m1start = M1 < Mstart ? nm1 - nm1p : 0;
for (int m1 = 0; m1 < nm1p; m1++, gm1++) {
work_gm[gm1] = vtdv * (fdYdc_m[m1 + m1start]
+ rlYdfdr_m[m1 + m1start]);
}
} // end loop over G
for (int i2 = 0; i2 < ni; i2++) {
LFVolume* v2 = volume_i + i2;
int M2 = v2->M;
const double* A2_start_gm = v2->A_gm;
const double* A2_gm;
double complex* DVt_start_mm = DVt_MM + (M1p - Mstart) * nM + M2;
double complex* DVt_mm;
double complex work;
int nm2 = v2->nm;
double complex phase = conj(phase_i[i1]) * phase_i[i2];
for (int g = 0; g < nG; g++) {
A2_gm = A2_start_gm + g * nm2;
for (int m1 = 0; m1 < nm1p; m1++) {
work = work_gm[g * nm1p + m1] * phase;
DVt_mm = DVt_start_mm + m1 * nM;
for (int m2 = 0; m2 < nm2; m2++) {
DVt_mm[m2] += A2_gm[m2] * work;
}
}
}
} // i2 loop
} // G loop
} // i1 loop
GRID_LOOP_STOP(lfc, k);
} // c loop
}
Py_RETURN_NONE;
}
// Horrible copy-paste of calculate_potential_matrix
// Surely it must be possible to find a way to actually reuse code
// Maybe some kind of preprocessor thing
PyObject* calculate_potential_matrix_force_contribution(LFCObject *lfc, PyObject *args)
{
PyArrayObject* vt_G_obj;
PyArrayObject* rho_MM_obj;
PyArrayObject* F_M_obj;
PyArrayObject* h_cv_obj;
PyArrayObject* n_c_obj;
int k, c;
PyArrayObject* spline_obj_M_obj;
PyArrayObject* beg_c_obj;
PyArrayObject* pos_Wc_obj;
int Mstart, Mstop;
if (!PyArg_ParseTuple(args, "OOOOOiiOOOii", &vt_G_obj, &rho_MM_obj,
&F_M_obj,
&h_cv_obj, &n_c_obj, &k, &c,
&spline_obj_M_obj, &beg_c_obj,
&pos_Wc_obj, &Mstart, &Mstop))
return NULL;
const double* vt_G = (const double*)PyArray_DATA(vt_G_obj);
const double* h_cv = (const double*)PyArray_DATA(h_cv_obj);
const long* n_c = (const long*)PyArray_DATA(n_c_obj);
const SplineObject** spline_obj_M = \
(const SplineObject**)PyArray_DATA(spline_obj_M_obj);
const double (*pos_Wc)[3] = (const double (*)[3])PyArray_DATA(pos_Wc_obj);
double* F_M = (double*)PyArray_DATA(F_M_obj);
long* beg_c = LONGP(beg_c_obj);
int nM = PyArray_DIMS(rho_MM_obj)[1];
double* work_gm = lfc->work_gm;
double dv = lfc->dv;
if (!lfc->bloch_boundary_conditions) {
double* rho_MM = (double*)PyArray_DATA(rho_MM_obj);
{
GRID_LOOP_START(lfc, -1) {
// In one grid loop iteration, only z changes.
int iza = Ga % n_c[2] + beg_c[2];
int iy = (Ga / n_c[2]) % n_c[1] + beg_c[1];
int ix = Ga / (n_c[2] * n_c[1]) + beg_c[0];
int iz = iza;
//assert(Ga == ((ix - beg_c[0]) * n_c[1] + (iy - beg_c[1]))
// * n_c[2] + iza - beg_c[2]);
for (int i1 = 0; i1 < ni; i1++) {
iz = iza;
LFVolume* v1 = volume_i + i1;
int M1 = v1->M;
const SplineObject* spline_obj = spline_obj_M[M1];
const bmgsspline* spline = \
(const bmgsspline*)(&(spline_obj->spline));
int nm1 = v1->nm;
int M1p = MAX(M1, Mstart);
int nm1p = MIN(M1 + nm1, Mstop) - M1p;
if (nm1p <= 0)
continue;
int m1start = M1 < Mstart ? nm1 - nm1p : 0;
double fdYdc_m[nm1];
double rlYdfdr_m[nm1];
double f, dfdr;
int l = (nm1 - 1) / 2;
const double* pos_c = pos_Wc[v1->W];
//assert(2 * l + 1 == nm1);
//assert(spline_obj->spline.l == l);
int gm1 = 0;
for (int G = Ga; G < Gb; G++, iz++) {
double x = h_cv[0] * ix + h_cv[3] * iy + h_cv[6] * iz - pos_c[0];
double y = h_cv[1] * ix + h_cv[4] * iy + h_cv[7] * iz - pos_c[1];
double z = h_cv[2] * ix + h_cv[5] * iy + h_cv[8] * iz - pos_c[2];
double vtdv = vt_G[G] * dv;
double R_c[] = {x, y, z};
double r2 = x * x + y * y + z * z;
double r = sqrt(r2);
double Rcinvr = r > 1e-15 ? R_c[c] / r : 0.0;
//assert(G == ((ix - beg_c[0]) * n_c[1] +
// (iy - beg_c[1])) * n_c[2] + iz - beg_c[2]);
bmgs_get_value_and_derivative(spline, r, &f, &dfdr);
//assert (r <= spline->dr * spline->nbins); // important
switch(c) {
case 0:
spherical_harmonics_derivative_x(l, f, x, y, z, r2, fdYdc_m);
break;
case 1:
spherical_harmonics_derivative_y(l, f, x, y, z, r2, fdYdc_m);
break;
case 2:
spherical_harmonics_derivative_z(l, f, x, y, z, r2, fdYdc_m);
break;
}
spherical_harmonics(l, dfdr * Rcinvr, x, y, z, r2, rlYdfdr_m);
for (int m1 = 0; m1 < nm1p; m1++, gm1++) {
work_gm[gm1] = vtdv * (fdYdc_m[m1 + m1start]
+ rlYdfdr_m[m1 + m1start]);
}
} // end loop over G
for (int i2 = 0; i2 < ni; i2++) {
LFVolume* v2 = volume_i + i2;
int M2 = v2->M;
const double* A2_start_gm = v2->A_gm;
const double* A2_gm;
int nm2 = v2->nm;
double* rho_start_mm = rho_MM + (M1p - Mstart) * nM + M2;
double* rho_mm;
double work;
for (int g = 0; g < nG; g++) {
A2_gm = A2_start_gm + g * nm2;
for (int m1 = 0; m1 < nm1p; m1++) {
rho_mm = rho_start_mm + m1 * nM;
work = 0.0;
for (int m2 = 0; m2 < nm2; m2++) {
work += A2_gm[m2] * rho_mm[m2];
}
F_M[M1p - Mstart + m1] += work * work_gm[g * nm1p + m1];
}
}
} // i2 loop
} // G loop
} // i1 loop
GRID_LOOP_STOP(lfc, -1);
} // c loop
}
else {
complex double* rho_MM = (complex double*)PyArray_DATA(rho_MM_obj);
{
GRID_LOOP_START(lfc, k) {
// In one grid loop iteration, only z changes.
int iza = Ga % n_c[2] + beg_c[2];
int iy = (Ga / n_c[2]) % n_c[1] + beg_c[1];
int ix = Ga / (n_c[2] * n_c[1]) + beg_c[0];
int iz = iza;
for (int i1 = 0; i1 < ni; i1++) {
iz = iza;
LFVolume* v1 = volume_i + i1;
int M1 = v1->M;
const SplineObject* spline_obj = spline_obj_M[M1];
const bmgsspline* spline = \
(const bmgsspline*)(&(spline_obj->spline));
int nm1 = v1->nm;
int M1p = MAX(M1, Mstart);
int nm1p = MIN(M1 + nm1, Mstop) - M1p;
if (nm1p <= 0)
continue;
int m1start = M1 < Mstart ? nm1 - nm1p : 0;
double fdYdc_m[nm1];
double rlYdfdr_m[nm1];
double f, dfdr;
int l = (nm1 - 1) / 2;
//assert(2 * l + 1 == nm1);
//assert(spline_obj->spline.l == l);
const double* pos_c = pos_Wc[v1->W];
int gm1 = 0;
for (int G = Ga; G < Gb; G++, iz++) {
double x = h_cv[0] * ix + h_cv[3] * iy + h_cv[6] * iz - pos_c[0];
double y = h_cv[1] * ix + h_cv[4] * iy + h_cv[7] * iz - pos_c[1];
double z = h_cv[2] * ix + h_cv[5] * iy + h_cv[8] * iz - pos_c[2];
double vtdv = vt_G[G] * dv;
double R_c[] = {x, y, z};
double r2 = x * x + y * y + z * z;
double r = sqrt(r2);
double Rc_over_r = r > 1e-15 ? R_c[c] / r : 0.0;
bmgs_get_value_and_derivative(spline, r, &f, &dfdr);
//assert (r <= spline->dr * spline->nbins);
switch(c) {
case 0:
spherical_harmonics_derivative_x(l, f, x, y, z, r2, fdYdc_m);
break;
case 1:
spherical_harmonics_derivative_y(l, f, x, y, z, r2, fdYdc_m);
break;
case 2:
spherical_harmonics_derivative_z(l, f, x, y, z, r2, fdYdc_m);
break;
}
spherical_harmonics(l, dfdr * Rc_over_r, x, y, z, r2, rlYdfdr_m);
for (int m1 = 0; m1 < nm1p; m1++, gm1++) {
work_gm[gm1] = vtdv * (fdYdc_m[m1 + m1start]
+ rlYdfdr_m[m1 + m1start]);
}
} // end loop over G
for (int i2 = 0; i2 < ni; i2++) {
LFVolume* v2 = volume_i + i2;
int M2 = v2->M;
const double* A2_start_gm = v2->A_gm;
const double* A2_gm;
int nm2 = v2->nm;
double complex* rho_start_mm = rho_MM + (M1p - Mstart) * nM + M2;
double complex* rho_mm;
double complex phase = conj(phase_i[i1]) * phase_i[i2];
double complex work;
for (int g = 0; g < nG; g++) {
A2_gm = A2_start_gm + g * nm2;
for (int m1 = 0; m1 < nm1p; m1++) {
rho_mm = rho_start_mm + m1 * nM;
work = 0.0;
for (int m2 = 0; m2 < nm2; m2++) {
work += A2_gm[m2] * rho_mm[m2];
}
F_M[M1p - Mstart + m1] += creal(work * work_gm[g * nm1p + m1]
* phase);
}
}
} // i2 loop
} // G loop
} // i1 loop
GRID_LOOP_STOP(lfc, k);
} // c loop
}
Py_RETURN_NONE;
}
PyObject* derivative(LFCObject *lfc, PyObject *args)
{
PyArrayObject* a_xG_obj;
PyArrayObject* c_xMv_obj;
PyArrayObject* h_cv_obj;
PyArrayObject* n_c_obj;
PyObject* spline_M_obj;
PyArrayObject* beg_c_obj;
PyArrayObject* pos_Wc_obj;
int q;
if (!PyArg_ParseTuple(args, "OOOOOOOi", &a_xG_obj, &c_xMv_obj,
&h_cv_obj, &n_c_obj,
&spline_M_obj, &beg_c_obj,
&pos_Wc_obj, &q))
return NULL;
int nd = PyArray_NDIM(a_xG_obj);
npy_intp* dims = PyArray_DIMS(a_xG_obj);
int nx = PyArray_MultiplyList(dims, nd - 3);
int nG = PyArray_MultiplyList(dims + nd - 3, 3);
int nM = PyArray_DIMS(c_xMv_obj)[PyArray_NDIM(c_xMv_obj) - 2];
const double* h_cv = (const double*)PyArray_DATA(h_cv_obj);
const long* n_c = (const long*)PyArray_DATA(n_c_obj);
const double (*pos_Wc)[3] = (const double (*)[3])PyArray_DATA(pos_Wc_obj);
long* beg_c = LONGP(beg_c_obj);
if (!lfc->bloch_boundary_conditions) {
const double* a_G = (const double*)PyArray_DATA(a_xG_obj);
double* c_Mv = (double*)PyArray_DATA(c_xMv_obj);
for (int x = 0; x < nx; x++) {
GRID_LOOP_START(lfc, -1) {
// In one grid loop iteration, only i2 changes.
int i2 = Ga % n_c[2] + beg_c[2];
int i1 = (Ga / n_c[2]) % n_c[1] + beg_c[1];
int i0 = Ga / (n_c[2] * n_c[1]) + beg_c[0];
double xG = h_cv[0] * i0 + h_cv[3] * i1 + h_cv[6] * i2;
double yG = h_cv[1] * i0 + h_cv[4] * i1 + h_cv[7] * i2;
double zG = h_cv[2] * i0 + h_cv[5] * i1 + h_cv[8] * i2;
for (int G = Ga; G < Gb; G++) {
for (int i = 0; i < ni; i++) {
LFVolume* vol = volume_i + i;
int M = vol->M;
double* c_mv = c_Mv + 3 * M;
const bmgsspline* spline = (const bmgsspline*) \
&((const SplineObject*)PyList_GetItem(spline_M_obj, M))->spline;
int nm = vol->nm;
int l = (nm - 1) / 2;
double x = xG - pos_Wc[vol->W][0];
double y = yG - pos_Wc[vol->W][1];
double z = zG - pos_Wc[vol->W][2];
double R_c[] = {x, y, z};
double r2 = x * x + y * y + z * z;
double r = sqrt(r2);
double af;
double dfdr;
bmgs_get_value_and_derivative(spline, r, &af, &dfdr);
af *= a_G[G] * lfc->dv;
double afdrlYdx_m[nm]; // a * f * d(r^l * Y)/dx
spherical_harmonics_derivative_x(l, af, x, y, z, r2, afdrlYdx_m);
for (int m = 0; m < nm; m++)
c_mv[3 * m] += afdrlYdx_m[m];
spherical_harmonics_derivative_y(l, af, x, y, z, r2, afdrlYdx_m);
for (int m = 0; m < nm; m++)
c_mv[3 * m + 1] += afdrlYdx_m[m];
spherical_harmonics_derivative_z(l, af, x, y, z, r2, afdrlYdx_m);
for (int m = 0; m < nm; m++)
c_mv[3 * m + 2] += afdrlYdx_m[m];
if (r > 1e-15) {
double arlm1Ydfdr_m[nm]; // a * r^(l-1) * Y * df/dr
double arm1dfdr = a_G[G] / r * dfdr * lfc->dv;
spherical_harmonics(l, arm1dfdr, x, y, z, r2, arlm1Ydfdr_m);
for (int m = 0; m < nm; m++)
for (int v = 0; v < 3; v++)
c_mv[m * 3 + v] += arlm1Ydfdr_m[m] * R_c[v];
}
}
xG += h_cv[6];
yG += h_cv[7];
zG += h_cv[8];
}
}
GRID_LOOP_STOP(lfc, -1);
c_Mv += 3 * nM;
a_G += nG;
}
}
else {
const complex double* a_G = (const complex double*)PyArray_DATA(a_xG_obj);
complex double* c_Mv = (complex double*)PyArray_DATA(c_xMv_obj);
for (int x = 0; x < nx; x++) {
GRID_LOOP_START(lfc, q) {
// In one grid loop iteration, only i2 changes.
int i2 = Ga % n_c[2] + beg_c[2];
int i1 = (Ga / n_c[2]) % n_c[1] + beg_c[1];
int i0 = Ga / (n_c[2] * n_c[1]) + beg_c[0];
double xG = h_cv[0] * i0 + h_cv[3] * i1 + h_cv[6] * i2;
double yG = h_cv[1] * i0 + h_cv[4] * i1 + h_cv[7] * i2;
double zG = h_cv[2] * i0 + h_cv[5] * i1 + h_cv[8] * i2;
for (int G = Ga; G < Gb; G++) {
for (int i = 0; i < ni; i++) {
LFVolume* vol = volume_i + i;
int M = vol->M;
complex double* c_mv = c_Mv + 3 * M;
const bmgsspline* spline = (const bmgsspline*) \
&((const SplineObject*)PyList_GetItem(spline_M_obj, M))->spline;
int nm = vol->nm;
int l = (nm - 1) / 2;
double x = xG - pos_Wc[vol->W][0];
double y = yG - pos_Wc[vol->W][1];
double z = zG - pos_Wc[vol->W][2];
double R_c[] = {x, y, z};
double r2 = x * x + y * y + z * z;
double r = sqrt(r2);
double f;
double dfdr;
bmgs_get_value_and_derivative(spline, r, &f, &dfdr);
double fdrlYdx_m[nm]; // a * f * d(r^l * Y)/dx
complex double ap = a_G[G] * phase_i[i] * lfc->dv;
spherical_harmonics_derivative_x(l, f, x, y, z, r2, fdrlYdx_m);
for (int m = 0; m < nm; m++)
c_mv[3 * m ] += ap * fdrlYdx_m[m];
spherical_harmonics_derivative_y(l, f, x, y, z, r2, fdrlYdx_m);
for (int m = 0; m < nm; m++)
c_mv[3 * m + 1] += ap * fdrlYdx_m[m];
spherical_harmonics_derivative_z(l, f, x, y, z, r2, fdrlYdx_m);
for (int m = 0; m < nm; m++)
c_mv[3 * m + 2] += ap * fdrlYdx_m[m];
if (r > 1e-15) {
double rlm1Ydfdr_m[nm]; // r^(l-1) * Y * df/dr
double rm1dfdr = dfdr / r;
spherical_harmonics(l, rm1dfdr, x, y, z, r2, rlm1Ydfdr_m);
for (int m = 0; m < nm; m++)
for (int v = 0; v < 3; v++)
c_mv[m * 3 + v] += ap * rlm1Ydfdr_m[m] * R_c[v];
}
}
xG += h_cv[6];
yG += h_cv[7];
zG += h_cv[8];
}
}
GRID_LOOP_STOP(lfc, q);
c_Mv += 3 * nM;
a_G += nG;
}
}
Py_RETURN_NONE;
}
PyObject* normalized_derivative(LFCObject *lfc, PyObject *args)
{
PyArrayObject* a_G_obj;
PyArrayObject* c_Mv_obj;
PyArrayObject* h_cv_obj;
PyArrayObject* n_c_obj;
PyObject* spline_M_obj;
PyArrayObject* beg_c_obj;
PyArrayObject* pos_Wc_obj;
if (!PyArg_ParseTuple(args, "OOOOOOO", &a_G_obj, &c_Mv_obj,
&h_cv_obj, &n_c_obj,
&spline_M_obj, &beg_c_obj,
&pos_Wc_obj))
return NULL;
const double* h_cv = (const double*)PyArray_DATA(h_cv_obj);
const long* n_c = (const long*)PyArray_DATA(n_c_obj);
const double (*pos_Wc)[3] = (const double (*)[3])PyArray_DATA(pos_Wc_obj);
long* beg_c = LONGP(beg_c_obj);
const double* a_G = (const double*)PyArray_DATA(a_G_obj);
double* c_Mv = (double*)PyArray_DATA(c_Mv_obj);
GRID_LOOP_START(lfc, -1) {
int i2 = Ga % n_c[2] + beg_c[2];
int i1 = (Ga / n_c[2]) % n_c[1] + beg_c[1];
int i0 = Ga / (n_c[2] * n_c[1]) + beg_c[0];
double xG = h_cv[0] * i0 + h_cv[3] * i1 + h_cv[6] * i2;
double yG = h_cv[1] * i0 + h_cv[4] * i1 + h_cv[7] * i2;
double zG = h_cv[2] * i0 + h_cv[5] * i1 + h_cv[8] * i2;
for (int G = Ga; G < Gb; G++) {
for (int i = 0; i < ni; i++) {
LFVolume* vol = volume_i + i;
int M = vol->M;
double* c_mv = c_Mv + 7 * M;
const bmgsspline* spline = (const bmgsspline*) \
&((const SplineObject*)PyList_GetItem(spline_M_obj, M))->spline;
int nm = vol->nm;
int l = (nm - 1) / 2;
double x = xG - pos_Wc[vol->W][0];
double y = yG - pos_Wc[vol->W][1];
double z = zG - pos_Wc[vol->W][2];
double R_c[] = {x, y, z};
double r2 = x * x + y * y + z * z;
double r = sqrt(r2);
double f;
double dfdr;
bmgs_get_value_and_derivative(spline, r, &f, &dfdr);
f *= lfc->dv;
double a = a_G[G];
if (l == 0)
c_mv[6] += 0.28209479177387814 * a * f;
double fdrlYdx_m[nm]; // f * d(r^l * Y)/dx
spherical_harmonics_derivative_x(l, f, x, y, z, r2, fdrlYdx_m);
for (int m = 0; m < nm; m++) {
c_mv[7 * m ] += a * fdrlYdx_m[m];
c_mv[7 * m + 3] += fdrlYdx_m[m];
}
spherical_harmonics_derivative_y(l, f, x, y, z, r2, fdrlYdx_m);
for (int m = 0; m < nm; m++) {
c_mv[7 * m + 1] += a * fdrlYdx_m[m];
c_mv[7 * m + 4] += fdrlYdx_m[m];
}
spherical_harmonics_derivative_z(l, f, x, y, z, r2, fdrlYdx_m);
for (int m = 0; m < nm; m++) {
c_mv[7 * m + 2] += a * fdrlYdx_m[m];
c_mv[7 * m + 5] += fdrlYdx_m[m];
}
if (r > 1e-15) {
double rlm1Ydfdr_m[nm]; // r^(l-1) * Y * df/dr
double rm1dfdr = dfdr * lfc->dv / r;
spherical_harmonics(l, rm1dfdr, x, y, z, r2, rlm1Ydfdr_m);
for (int m = 0; m < nm; m++)
for (int v = 0; v < 3; v++) {
c_mv[m * 7 + v] += a * rlm1Ydfdr_m[m] * R_c[v];
c_mv[m * 7 + v + 3] += rlm1Ydfdr_m[m] * R_c[v];
}
}
}
xG += h_cv[6];
yG += h_cv[7];
zG += h_cv[8];
}
}
GRID_LOOP_STOP(lfc, -1);
Py_RETURN_NONE;
}
PyObject* ae_valence_density_correction(LFCObject *lfc, PyObject *args)
{
PyArrayObject* rho_MM_obj;
PyArrayObject* n_G_obj;
PyArrayObject* a_W_obj;
PyArrayObject* I_a_obj;
PyArrayObject* x_W_obj;
if (!PyArg_ParseTuple(args, "OOOOO", &rho_MM_obj, &n_G_obj,
&a_W_obj, &I_a_obj, &x_W_obj))
return NULL;
double* n_G = (double*)PyArray_DATA(n_G_obj);
int* a_W = (int*)PyArray_DATA(a_W_obj);
double* I_a = (double*)PyArray_DATA(I_a_obj);
const double* rho_MM = (const double*)PyArray_DATA(rho_MM_obj);
int* x_W = (int*)PyArray_DATA(x_W_obj);
int nM = PyArray_DIMS(rho_MM_obj)[0];
GRID_LOOP_START(lfc, -1) {
for (int i1 = 0; i1 < ni; i1++) {
LFVolume* v1 = volume_i + i1;
int x1 = x_W[v1->W];
int a1 = a_W[v1->W];
int M1 = v1->M;
int nm1 = v1->nm;
double Ia = 0.0;
for (int i2 = 0; i2 < ni; i2++) {
LFVolume* v2 = volume_i + i2;
int x2 = x_W[v2->W];
if (x1 != x2)
continue;
int a2 = a_W[v2->W];
if (a1 != a2)
continue;
int M2 = v2->M;
int nm2 = v2->nm;
const double* rho_mm = rho_MM + M1 * nM + M2;
for (int g = 0; g < nG; g++) {
double density = 0.0;
for (int m2 = 0; m2 < nm2; m2++)
for (int m1 = 0; m1 < nm1; m1++)
density += (rho_mm[m2 + m1 * nM] *
v1->A_gm[g * nm1 + m1] *
v2->A_gm[g * nm2 + m2]);
n_G[Ga + g] += density;
Ia += density;
}
}
I_a[a1] += Ia * lfc->dv;
}
}
GRID_LOOP_STOP(lfc, -1);
Py_RETURN_NONE;
}
PyObject* ae_core_density_correction(LFCObject *lfc, PyObject *args)
{
double scale;
PyArrayObject* n_G_obj;
PyArrayObject* a_W_obj;
PyArrayObject* I_a_obj;
if (!PyArg_ParseTuple(args, "dOOO", &scale, &n_G_obj,
&a_W_obj, &I_a_obj))
return NULL;
double* n_G = (double*)PyArray_DATA(n_G_obj);
int* a_W = (int*)PyArray_DATA(a_W_obj);
double* I_a = (double*)PyArray_DATA(I_a_obj);
GRID_LOOP_START(lfc, -1) {
for (int i = 0; i < ni; i++) {
LFVolume* v = volume_i + i;
double Ia = 0.0;
for (int g = 0; g < nG; g++) {
double density = scale * v->A_gm[g];
n_G[Ga + g] += density;
Ia += density;
}
I_a[a_W[v->W]] += Ia * lfc->dv;
}
}
GRID_LOOP_STOP(lfc, -1);
Py_RETURN_NONE;
}
gpaw-1.3.0-82cebebc037510d876f90d9f8d533fd021f751f5/c/lfc.h 0000664 0000000 0000000 00000010610 13164413722 0021261 0 ustar 00root root 0000000 0000000 /* Copyright (C) 2003-2007 CAMP
* Copyright (C) 2007-2009 CAMd
* Please see the accompanying LICENSE file for further information. */
#ifndef LFC_H
#define LFC_H
#include
typedef struct
{
const double* A_gm; // function values
int nm; // number of functions (2*l+1)
int M; // global number of first function
int W; // volume number
} LFVolume;
typedef struct
{
PyObject_HEAD
double dv; // volume per grid point
int nW; // number of volumes
int nB; // number of boundary points
double* work_gm; // work space
LFVolume* volume_W; // pointers to volumes
LFVolume* volume_i; // pointers to volumes at current grid point
int* G_B; // boundary grid points
int* W_B; // volume numbers
int* i_W; // mapping from all volumes to current volumes
int* ngm_W; // number of grid points per volume
bool bloch_boundary_conditions; // Gamma-point calculation?
complex double* phase_kW; // phase factors: exp(ik.R)
complex double* phase_i; // phase factors for current volumes
} LFCObject;
#define GRID_LOOP_START(lfc, k) \
{ \
int* G_B = lfc->G_B; \
int* W_B = lfc->W_B; \
int* i_W = lfc->i_W; \
complex double* phase_i = lfc->phase_i; \
LFVolume* volume_i = lfc->volume_i; \
LFVolume* volume_W = lfc->volume_W; \
double complex* phase_W = lfc->phase_kW + k * lfc->nW; \
int Ga = 0; \
int ni = 0; \
for (int B = 0; B < lfc->nB; B++) \
{ \
int Gb = G_B[B]; \
int nG = Gb - Ga; \
if (nG > 0) \
{
#define GRID_LOOP_STOP(lfc, k) \
for (int i = 0; i < ni; i++) \
volume_i[i].A_gm += nG * volume_i[i].nm; \
} \
int Wnew = W_B[B]; \
if (Wnew >= 0) \
{ \
/* Entering new sphere: */ \
volume_i[ni] = volume_W[Wnew]; \
if (k >= 0) \
phase_i[ni] = phase_W[Wnew]; \
i_W[Wnew] = ni; \
ni++; \
} \
else \
{ \
/* Leaving sphere: */ \
int Wold = -1 - Wnew; \
int iold = i_W[Wold]; \
volume_W[Wold].A_gm = volume_i[iold].A_gm; \
ni--; \
volume_i[iold] = volume_i[ni]; \
if (k >= 0) \
phase_i[iold] = phase_i[ni]; \
int Wlast = volume_i[iold].W; \
i_W[Wlast] = iold; \
} \
Ga = Gb; \
} \
for (int W = 0; W < lfc->nW; W++) \
volume_W[W].A_gm -= lfc->ngm_W[W]; \
}
#endif
gpaw-1.3.0-82cebebc037510d876f90d9f8d533fd021f751f5/c/lfc2.c 0000664 0000000 0000000 00000031435 13164413722 0021346 0 ustar 00root root 0000000 0000000 /* Copyright (C) 2010 CAMd
* Please see the accompanying LICENSE file for further information. */
#include "extensions.h"
#include "spline.h"
#include "lfc.h"
#include "bmgs/spherical_harmonics.h"
PyObject* second_derivative(LFCObject *lfc, PyObject *args)
{
PyArrayObject* a_G_obj;
PyArrayObject* c_Mvv_obj;
PyArrayObject* h_cv_obj;
PyArrayObject* n_c_obj;
PyObject* spline_M_obj;
PyArrayObject* beg_c_obj;
PyArrayObject* pos_Wc_obj;
int q;
if (!PyArg_ParseTuple(args, "OOOOOOOi", &a_G_obj, &c_Mvv_obj,
&h_cv_obj, &n_c_obj,
&spline_M_obj, &beg_c_obj,
&pos_Wc_obj, &q))
return NULL;
// Copied from derivative member function
int nd = PyArray_NDIM(a_G_obj);
npy_intp* dims = PyArray_DIMS(a_G_obj);
int nx = PyArray_MultiplyList(dims, nd - 3);
int nG = PyArray_MultiplyList(dims + nd - 3, 3);
int nM = PyArray_DIM(c_Mvv_obj, PyArray_NDIM(c_Mvv_obj) - 2);
// These were already present
const double* h_cv = (const double*)PyArray_DATA(h_cv_obj);
const long* n_c = (const long*)PyArray_DATA(n_c_obj);
const double (*pos_Wc)[3] = (const double (*)[3])PyArray_DATA(pos_Wc_obj);
long* beg_c = LONGP(beg_c_obj);
///////////////////////////////////////////////
const double Y00dv = lfc->dv / sqrt(4.0 * M_PI);
if (!lfc->bloch_boundary_conditions) {
const double* a_G = (const double*)PyArray_DATA(a_G_obj);
double* c_Mvv = (double*)PyArray_DATA(c_Mvv_obj);
// Loop over number of x-dimension in a_xG (not relevant yet)
for (int x = 0; x < nx; x++) {
// JJs old stuff
GRID_LOOP_START(lfc, -1) {
// In one grid loop iteration, only i2 changes.
int i2 = Ga % n_c[2] + beg_c[2];
int i1 = (Ga / n_c[2]) % n_c[1] + beg_c[1];
int i0 = Ga / (n_c[2] * n_c[1]) + beg_c[0];
double xG = h_cv[0] * i0 + h_cv[3] * i1 + h_cv[6] * i2;
double yG = h_cv[1] * i0 + h_cv[4] * i1 + h_cv[7] * i2;
double zG = h_cv[2] * i0 + h_cv[5] * i1 + h_cv[8] * i2;
for (int G = Ga; G < Gb; G++) {
for (int i = 0; i < ni; i++) {
LFVolume* vol = volume_i + i;
int M = vol->M;
double* c_mvv = c_Mvv + 9 * M;
const bmgsspline* spline = (const bmgsspline*) \
&((const SplineObject*)PyList_GetItem(spline_M_obj, M))->spline;
double x = xG - pos_Wc[vol->W][0];
double y = yG - pos_Wc[vol->W][1];
double z = zG - pos_Wc[vol->W][2];
double r2 = x * x + y * y + z * z;
double r = sqrt(r2);
int bin = r / spline->dr;
assert(bin <= spline->nbins);
double* s = spline->data + 4 * bin;
double u = r - bin * spline->dr;
double dfdror;
if (bin == 0)
dfdror = 2.0 * s[2] + 3.0 * s[3] * r;
else
dfdror = (s[1] + u * (2.0 * s[2] + u * 3.0 * s[3])) / r;
double a = a_G[G] * Y00dv;
dfdror *= a;
c_mvv[0] += dfdror;
c_mvv[4] += dfdror;
c_mvv[8] += dfdror;
if (r > 1e-15) {
double b = ((2.0 * s[2] + 6.0 * s[3] * u) * a - dfdror) / r2;
c_mvv[0] += b * x * x;
c_mvv[1] += b * x * y;
c_mvv[2] += b * x * z;
c_mvv[3] += b * y * x;
c_mvv[4] += b * y * y;
c_mvv[5] += b * y * z;
c_mvv[6] += b * z * x;
c_mvv[7] += b * z * y;
c_mvv[8] += b * z * z;
}
}
xG += h_cv[6];
yG += h_cv[7];
zG += h_cv[8];
}
}
GRID_LOOP_STOP(lfc, -1);
c_Mvv += 9 * nM;
a_G += nG;
}
}
else {
const complex double* a_G = (const complex double*)PyArray_DATA(a_G_obj);
complex double* c_Mvv = (complex double*)PyArray_DATA(c_Mvv_obj);
for (int x = 0; x < nx; x++) {
GRID_LOOP_START(lfc, q) {
// In one grid loop iteration, only i2 changes.
int i2 = Ga % n_c[2] + beg_c[2];
int i1 = (Ga / n_c[2]) % n_c[1] + beg_c[1];
int i0 = Ga / (n_c[2] * n_c[1]) + beg_c[0];
double xG = h_cv[0] * i0 + h_cv[3] * i1 + h_cv[6] * i2;
double yG = h_cv[1] * i0 + h_cv[4] * i1 + h_cv[7] * i2;
double zG = h_cv[2] * i0 + h_cv[5] * i1 + h_cv[8] * i2;
for (int G = Ga; G < Gb; G++) {
for (int i = 0; i < ni; i++) {
LFVolume* vol = volume_i + i;
int M = vol->M;
complex double* c_mvv = c_Mvv + 9 * M;
const bmgsspline* spline = (const bmgsspline*) \
&((const SplineObject*)PyList_GetItem(spline_M_obj, M))->spline;
double x = xG - pos_Wc[vol->W][0];
double y = yG - pos_Wc[vol->W][1];
double z = zG - pos_Wc[vol->W][2];
double r2 = x * x + y * y + z * z;
double r = sqrt(r2);
double dfdror;
// use bmgs_get_value_and_derivative instead ??!!
int bin = r / spline->dr;
assert(bin <= spline->nbins);
double u = r - bin * spline->dr;
double* s = spline->data + 4 * bin;
if (bin == 0)
dfdror = 2.0 * s[2] + 3.0 * s[3] * r;
else
dfdror = (s[1] + u * (2.0 * s[2] + u * 3.0 * s[3])) / r;
// phase added here
complex double a = a_G[G] * phase_i[i] * Y00dv;
// dfdror *= a;
c_mvv[0] += a * dfdror;
c_mvv[4] += a * dfdror;
c_mvv[8] += a * dfdror;
if (r > 1e-15) {
double b = (2.0 * s[2] + 6.0 * s[3] * u - dfdror) / r2;
c_mvv[0] += a * b * x * x;
c_mvv[1] += a * b * x * y;
c_mvv[2] += a * b * x * z;
c_mvv[3] += a * b * y * x;
c_mvv[4] += a * b * y * y;
c_mvv[5] += a * b * y * z;
c_mvv[6] += a * b * z * x;
c_mvv[7] += a * b * z * y;
c_mvv[8] += a * b * z * z;
}
}
xG += h_cv[6];
yG += h_cv[7];
zG += h_cv[8];
}
}
GRID_LOOP_STOP(lfc, q);
c_Mvv += 9 * nM;
a_G += nG;
}
}
Py_RETURN_NONE;
}
PyObject* add_derivative(LFCObject *lfc, PyObject *args)
{
// Coefficients for the lfc's
PyArrayObject* c_xM_obj;
// Array
PyArrayObject* a_xG_obj;
PyArrayObject* h_cv_obj;
PyArrayObject* n_c_obj;
PyObject* spline_M_obj;
PyArrayObject* beg_c_obj;
PyArrayObject* pos_Wc_obj;
// Atom index
int a;
// Cartesian coordinate
int v;
// k-point index
int q;
if (!PyArg_ParseTuple(args, "OOOOOOOiii", &c_xM_obj, &a_xG_obj,
&h_cv_obj, &n_c_obj, &spline_M_obj, &beg_c_obj,
&pos_Wc_obj, &a, &v, &q))
return NULL;
// Number of dimensions
int nd = PyArray_NDIM(a_xG_obj);
// Array with lengths of array dimensions
npy_intp* dims = PyArray_DIMS(a_xG_obj);
// Number of extra dimensions
int nx = PyArray_MultiplyList(dims, nd - 3);
// Number of grid points
int nG = PyArray_MultiplyList(dims + nd - 3, 3);
// Number of lfc's
int nM = PyArray_DIM(c_xM_obj, PyArray_NDIM(c_xM_obj) - 1);
const double* h_cv = (const double*)PyArray_DATA(h_cv_obj);
const long* n_c = (const long*)PyArray_DATA(n_c_obj);
const double (*pos_Wc)[3] = (const double (*)[3])PyArray_DATA(pos_Wc_obj);
long* beg_c = LONGP(beg_c_obj);
if (!lfc->bloch_boundary_conditions) {
const double* c_M = (const double*)PyArray_DATA(c_xM_obj);
double* a_G = (double*)PyArray_DATA(a_xG_obj);
for (int x = 0; x < nx; x++) {
GRID_LOOP_START(lfc, -1) {
// In one grid loop iteration, only i2 changes.
int i2 = Ga % n_c[2] + beg_c[2];
int i1 = (Ga / n_c[2]) % n_c[1] + beg_c[1];
int i0 = Ga / (n_c[2] * n_c[1]) + beg_c[0];
// Grid point position
double xG = h_cv[0] * i0 + h_cv[3] * i1 + h_cv[6] * i2;
double yG = h_cv[1] * i0 + h_cv[4] * i1 + h_cv[7] * i2;
double zG = h_cv[2] * i0 + h_cv[5] * i1 + h_cv[8] * i2;
// Loop over grid points in current stride
for (int G = Ga; G < Gb; G++) {
// Loop over volumes at current grid point
for (int i = 0; i < ni; i++) {
LFVolume* vol = volume_i + i;
int M = vol->M;
// Check that the volume belongs to the atom in consideration later
int W = vol->W;
int nm = vol->nm;
int l = (nm - 1) / 2;
const bmgsspline* spline = (const bmgsspline*) \
&((const SplineObject*)PyList_GetItem(spline_M_obj, M))->spline;
double x = xG - pos_Wc[W][0];
double y = yG - pos_Wc[W][1];
double z = zG - pos_Wc[W][2];
double R_c[] = {x, y, z};
double r2 = x * x + y * y + z * z;
double r = sqrt(r2);
double f;
double dfdr;
bmgs_get_value_and_derivative(spline, r, &f, &dfdr);
// First contribution: f * d(r^l * Y)/dv
double fdrlYdx_m[nm];
if (v == 0)
spherical_harmonics_derivative_x(l, f, x, y, z, r2, fdrlYdx_m);
else if (v == 1)
spherical_harmonics_derivative_y(l, f, x, y, z, r2, fdrlYdx_m);
else
spherical_harmonics_derivative_z(l, f, x, y, z, r2, fdrlYdx_m);
for (int m = 0; m < nm; m++)
a_G[G] += fdrlYdx_m[m] * c_M[M + m];
// Second contribution: r^(l-1) * Y * df/dr * R_v
if (r > 1e-15) {
double rlm1Ydfdr_m[nm]; // r^(l-1) * Y * df/dr
double rm1dfdr = 1. / r * dfdr;
spherical_harmonics(l, rm1dfdr, x, y, z, r2, rlm1Ydfdr_m);
for (int m = 0; m < nm; m++)
a_G[G] += rlm1Ydfdr_m[m] * R_c[v] * c_M[M + m];
}
}
// Update coordinates of current grid point
xG += h_cv[6];
yG += h_cv[7];
zG += h_cv[8];
}
}
GRID_LOOP_STOP(lfc, -1);
c_M += nM;
a_G += nG;
}
}
else {
const double complex* c_M = (const double complex*)PyArray_DATA(c_xM_obj);
double complex* a_G = (double complex*)PyArray_DATA(a_xG_obj);
for (int x = 0; x < nx; x++) {
GRID_LOOP_START(lfc, q) {
// In one grid loop iteration, only i2 changes.
int i2 = Ga % n_c[2] + beg_c[2];
int i1 = (Ga / n_c[2]) % n_c[1] + beg_c[1];
int i0 = Ga / (n_c[2] * n_c[1]) + beg_c[0];
// Grid point position
double xG = h_cv[0] * i0 + h_cv[3] * i1 + h_cv[6] * i2;
double yG = h_cv[1] * i0 + h_cv[4] * i1 + h_cv[7] * i2;
double zG = h_cv[2] * i0 + h_cv[5] * i1 + h_cv[8] * i2;
// Loop over grid points in current stride
for (int G = Ga; G < Gb; G++) {
// Loop over volumes at current grid point
for (int i = 0; i < ni; i++) {
// Phase of volume
double complex conjphase = conj(phase_i[i]);
LFVolume* vol = volume_i + i;
int M = vol->M;
// Check that the volume belongs to the atom in consideration later
int W = vol->W;
int nm = vol->nm;
int l = (nm - 1) / 2;
const bmgsspline* spline = (const bmgsspline*) \
&((const SplineObject*)PyList_GetItem(spline_M_obj, M))->spline;
double x = xG - pos_Wc[W][0];
double y = yG - pos_Wc[W][1];
double z = zG - pos_Wc[W][2];
double R_c[] = {x, y, z};
double r2 = x * x + y * y + z * z;
double r = sqrt(r2);
double f;
double dfdr;
bmgs_get_value_and_derivative(spline, r, &f, &dfdr);
// First contribution: f * d(r^l * Y)/dv
double fdrlYdx_m[nm];
if (v == 0)
spherical_harmonics_derivative_x(l, f, x, y, z, r2, fdrlYdx_m);
else if (v == 1)
spherical_harmonics_derivative_y(l, f, x, y, z, r2, fdrlYdx_m);
else
spherical_harmonics_derivative_z(l, f, x, y, z, r2, fdrlYdx_m);
for (int m = 0; m < nm; m++)
a_G[G] += fdrlYdx_m[m] * c_M[M + m] * conjphase;
// Second contribution: r^(l-1) * Y * df/dr * R_v
if (r > 1e-15) {
double rlm1Ydfdr_m[nm]; // r^(l-1) * Y * df/dr
double rm1dfdr = 1. / r * dfdr;
spherical_harmonics(l, rm1dfdr, x, y, z, r2, rlm1Ydfdr_m);
for (int m = 0; m < nm; m++)
a_G[G] += rlm1Ydfdr_m[m] * R_c[v] * c_M[M + m] * conjphase;
}
}
// Update coordinates of current grid point
xG += h_cv[6];
yG += h_cv[7];
zG += h_cv[8];
}
}
GRID_LOOP_STOP(lfc, q);
c_M += nM;
a_G += nG;
}
}
Py_RETURN_NONE;
}
gpaw-1.3.0-82cebebc037510d876f90d9f8d533fd021f751f5/c/localized_functions.c 0000664 0000000 0000000 00000034666 13164413722 0024567 0 ustar 00root root 0000000 0000000 /* Copyright (C) 2003-2007 CAMP
* Copyright (C) 2007-2009 CAMd
* Copyright (C) 2005-2008 CSC - IT Center for Science Ltd.
* Please see the accompanying LICENSE file for further information. */
#include "spline.h"
#include
#ifdef PARALLEL
# include
#else
typedef int* MPI_Request; // !!!!!!!???????????
typedef int* MPI_Comm;
# define MPI_COMM_NULL 0
# define MPI_Comm_rank(comm, rank) *(rank) = 0
# define MPI_Bcast(buff, count, datatype, root, comm) 0
#endif
#include "mympi.h"
#include "localized_functions.h"
#ifdef GPAW_NO_UNDERSCORE_BLAS
# define dgemm_ dgemm
# define dgemv_ dgemv
#endif
int dgemm_(char *transa, char *transb, int *m, int * n,
int *k, double *alpha, double *a, int *lda,
double *b, int *ldb, double *beta,
double *c, int *ldc);
int dgemv_(char *trans, int *m, int * n,
double *alpha, double *a, int *lda,
double *x, int *incx, double *beta,
double *y, int *incy);
static void localized_functions_dealloc(LocalizedFunctionsObject *self)
{
free(self->f);
free(self->w);
PyObject_DEL(self);
}
static PyObject * localized_functions_integrate(LocalizedFunctionsObject *self,
PyObject *args)
{
PyArrayObject* aa;
PyArrayObject* bb;
if (!PyArg_ParseTuple(args, "OO", &aa, &bb))
return NULL;
const double* a = DOUBLEP(aa);
double* b = DOUBLEP(bb);
int na = 1;
for (int d = 0; d < PyArray_NDIM(aa) - 3; d++)
na *= PyArray_DIM(aa, d);
int nf = self->nf;
double* f = self->f;
double* w = self->w;
int ng = self->ng;
int ng0 = self->ng0;
if (PyArray_DESCR(aa)->type_num == NPY_DOUBLE)
for (int n = 0; n < na; n++)
{
bmgs_cut(a, self->size, self->start, w, self->size0);
double zero = 0.0;
int inc = 1;
dgemv_("t", &ng0, &nf, &self->dv, f, &ng0, w, &inc, &zero, b, &inc);
a += ng;
b += nf;
}
else
for (int n = 0; n < na; n++)
{
bmgs_cutz((const double_complex*)a, self->size, self->start,
(double_complex*)w, self->size0);
double zero = 0.0;
int inc = 2;
dgemm_("n", "n", &inc, &nf, &ng0, &self->dv, w, &inc, f, &ng0,
&zero, b, &inc);
a += 2 * ng;
b += 2 * nf;
}
Py_RETURN_NONE;
}
static PyObject * localized_functions_derivative(
LocalizedFunctionsObject *self, PyObject *args)
{
PyArrayObject* aa;
PyArrayObject* bb;
if (!PyArg_ParseTuple(args, "OO", &aa, &bb))
return NULL;
const double* a = DOUBLEP(aa);
double* b = DOUBLEP(bb);
int na = 1;
for (int d = 0; d < PyArray_NDIM(aa) - 3; d++)
na *= PyArray_DIM(aa, d);
int nf = self->nfd;
double* f = self->fd;
double* w = self->w;
int ng = self->ng;
int ng0 = self->ng0;
if (PyArray_DESCR(aa)->type_num == NPY_DOUBLE)
for (int n = 0; n < na; n++)
{
bmgs_cut(a, self->size, self->start, w, self->size0);
double zero = 0.0;
int inc = 1;
dgemv_("t", &ng0, &nf, &self->dv, f, &ng0, w, &inc, &zero, b, &inc);
a += ng;
b += nf;
}
else
for (int n = 0; n < na; n++)
{
bmgs_cutz((const double_complex*)a, self->size, self->start,
(double_complex*)w, self->size0);
double zero = 0.0;
int inc = 2;
dgemm_("n", "n", &inc, &nf, &ng0, &self->dv, w, &inc, f, &ng0,
&zero, b, &inc);
a += 2 * ng;
b += 2 * nf;
}
Py_RETURN_NONE;
}
static PyObject * localized_functions_add(LocalizedFunctionsObject *self,
PyObject *args)
{
PyArrayObject* cc;
PyArrayObject* aa;
if (!PyArg_ParseTuple(args, "OO", &cc, &aa))
return NULL;
double* c = DOUBLEP(cc);
double* a = DOUBLEP(aa);
int na = 1;
for (int d = 0; d < PyArray_NDIM(aa) - 3; d++)
na *= PyArray_DIM(aa, d);
int ng = self->ng;
int ng0 = self->ng0;
int nf = self->nf;
double* f = self->f;
double* w = self->w;
if (PyArray_DESCR(aa)->type_num == NPY_DOUBLE)
for (int n = 0; n < na; n++)
{
double zero = 0.0;
double one = 1.0;
int inc = 1;
dgemv_("n", &ng0, &nf, &one, f, &ng0, c, &inc, &zero, w, &inc);
bmgs_pastep(w, self->size0, a, self->size, self->start);
a += ng;
c += nf;
}
else
for (int n = 0; n < na; n++)
{
double zero = 0.0;
double one = 1.0;
int inc = 2;
dgemm_("n", "t", &inc, &ng0, &nf, &one, c, &inc, f, &ng0,
&zero, w, &inc);
bmgs_pastepz((const double_complex*)w, self->size0,
(double_complex*)a, self->size, self->start);
a += 2 * ng;
c += 2 * nf;
}
Py_RETURN_NONE;
}
static PyObject * localized_functions_add_density(LocalizedFunctionsObject*
self,
PyObject *args)
{
PyArrayObject* dd;
PyArrayObject* oo;
if (!PyArg_ParseTuple(args, "OO", &dd, &oo))
return NULL;
const double* o = DOUBLEP(oo);
double* d = DOUBLEP(dd);
int nf = self->nf;
int ng0 = self->ng0;
const double* f = self->f;
double* w = self->w;
memset(w, 0, ng0 * sizeof(double));
for (int i = 0; i < nf; i++)
for (int n = 0; n < ng0; n++)
{
double g = *f++;
w[n] += o[i] * g * g;
}
bmgs_pastep(w, self->size0, d, self->size, self->start);
Py_RETURN_NONE;
}
static PyObject * localized_functions_add_density2(LocalizedFunctionsObject*
self,
PyObject *args)
{
PyArrayObject* dd; // density array to be added to
PyArrayObject* oo; // density matrix
if (!PyArg_ParseTuple(args, "OO", &dd, &oo))
return NULL;
const double* o = DOUBLEP(oo);
double* d = DOUBLEP(dd);
int nf = self->nf;
int ng0 = self->ng0;
const double* f = self->f;
double* w = self->w;
memset(w, 0, ng0 * sizeof(double));
int p = 0; // compressed ii index
double F = 0.0; // integrated value
for (int i = 0; i < nf; i++)
{
for (int j = i; j < nf; j++)
{
for (int n = 0; n < ng0; n++)
{
double tmp = o[p] * f[n + i * ng0] * f[n + j * ng0];
F += tmp;
w[n] += tmp;
}
p++;
}
}
bmgs_pastep(w, self->size0, d, self->size, self->start);
//Py_RETURN_NONE;
return Py_BuildValue("d", F * self->dv);
}
static PyObject * localized_functions_norm(LocalizedFunctionsObject* self,
PyObject *args)
{
PyArrayObject* I_obj;
if (!PyArg_ParseTuple(args, "O", &I_obj))
return NULL;
double (*II)[4] = (double (*)[4])DOUBLEP(I_obj);
const double* f = self->f;
for (int i = 0; i < self->nf; i++)
{
double F = 0.0;
for (int n = 0; n < self->ng0; n++)
F += f[n];
II[i][0] += F * self->dv;
f += self->ng0;
}
if (self->nfd > 0)
{
const double* fd = self->fd;
for (int i = 0; i < self->nf; i++)
for (int c = 0; c < 3; c++)
{
double F = 0.0;
for (int n = 0; n < self->ng0; n++)
F += fd[n];
II[i][c + 1] += F * self->dv;
fd += self->ng0;
}
}
Py_RETURN_NONE;
}
static PyObject * localized_functions_normalize(LocalizedFunctionsObject* self,
PyObject *args)
{
double I0;
PyArrayObject* I_obj;
if (!PyArg_ParseTuple(args, "dO", &I0, &I_obj))
return NULL;
double (*II)[4] = (double (*)[4])DOUBLEP(I_obj);
double* f = self->f;
double s = I0 / II[0][0];
// Scale spherically symmetric function so that the integral
// becomes exactly I0:
for (int n = 0; n < self->ng0; n++)
f[n] *= s;
// Adjust all other functions (l > 0) so that they integrate to zero:
for (int i = 1; i < self->nf; i++)
{
double *g = f + i * self->ng0;
double a = -II[i][0] / I0;
for (int n = 0; n < self->ng0; n++)
g[n] += a * f[n];
}
if (self->nfd > 0)
{
// Adjust derivatives:
double* fd = self->fd;
for (int n = 0; n < 3 * self->ng0; n++)
fd[n] *= s;
for (int c = 0; c < 3; c++)
{
double sd = II[0][c + 1] / II[0][0];
for (int n = 0; n < self->ng0; n++)
fd[n + c * self->ng0] -= f[n] * sd ;
}
for (int i = 1; i < self->nf; i++)
{
double *gd = fd + 3 * i * self->ng0;
double a = -II[i][0] / I0;
for (int n = 0; n < 3 * self->ng0; n++)
gd[n] += a * fd[n];
for (int c = 0; c < 3; c++)
{
double sd = II[i][c + 1] / I0;
for (int n = 0; n < self->ng0; n++)
gd[n + c * self->ng0] -= f[n] * sd ;
}
}
}
Py_RETURN_NONE;
}
static PyObject * get_functions(LocalizedFunctionsObject* self,
PyObject *args)
{
if (!PyArg_ParseTuple(args, ""))
return NULL;
npy_intp dims[4] = {self->nf,
self->size0[0], self->size0[1], self->size0[2]};
PyArrayObject* functions = (PyArrayObject*)PyArray_SimpleNew(4, dims,
NPY_DOUBLE);
memcpy(PyArray_DATA(functions), self->f,
self->nf * self->ng0 * sizeof(double));
return (PyObject*)functions;
}
static PyObject * set_corner(LocalizedFunctionsObject* self,
PyObject *args)
{
PyArrayObject* start_c_obj;
if (!PyArg_ParseTuple(args, "O", &start_c_obj))
return NULL;
double *start_c = DOUBLEP(start_c_obj);
for (int c = 0; c < 3; c++)
self->start[c] = start_c[c];
Py_RETURN_NONE;
}
#ifdef PARALLEL
static PyObject * localized_functions_broadcast(LocalizedFunctionsObject*
self,
PyObject *args)
{
PyObject* comm_obj;
int root;
if (!PyArg_ParseTuple(args, "Oi", &comm_obj, &root))
return NULL;
MPI_Comm comm = ((MPIObject*)comm_obj)->comm;
MPI_Bcast(self->f, self->ng0 * (self->nf + self->nfd),
MPI_DOUBLE, root, comm);
Py_RETURN_NONE;
}
#endif
static PyMethodDef localized_functions_methods[] = {
{"integrate",
(PyCFunction)localized_functions_integrate, METH_VARARGS, 0},
{"derivative",
(PyCFunction)localized_functions_derivative, METH_VARARGS, 0},
{"add",
(PyCFunction)localized_functions_add, METH_VARARGS, 0},
{"add_density",
(PyCFunction)localized_functions_add_density, METH_VARARGS, 0},
{"add_density2",
(PyCFunction)localized_functions_add_density2, METH_VARARGS, 0},
{"norm",
(PyCFunction)localized_functions_norm, METH_VARARGS, 0},
{"normalize",
(PyCFunction)localized_functions_normalize, METH_VARARGS, 0},
{"get_functions",
(PyCFunction)get_functions, METH_VARARGS, 0},
{"set_corner",
(PyCFunction)set_corner, METH_VARARGS, 0},
#ifdef PARALLEL
{"broadcast",
(PyCFunction)localized_functions_broadcast, METH_VARARGS, 0},
#endif
{NULL, NULL, 0, NULL}
};
PyTypeObject LocalizedFunctionsType = {
PyVarObject_HEAD_INIT(NULL, 0)
"LocalizedFunctions",
sizeof(LocalizedFunctionsObject),
0,
(destructor)localized_functions_dealloc,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,
"LF object",
0, 0, 0, 0, 0, 0,
localized_functions_methods
};
PyObject * NewLocalizedFunctionsObject(PyObject *obj, PyObject *args)
{
PyObject* radials;
PyArrayObject* size0_array;
PyArrayObject* size_array;
PyArrayObject* start_array;
PyArrayObject* h_array;
PyArrayObject* C_array;
int real;
int forces;
int compute;
if (!PyArg_ParseTuple(args, "OOOOOOiii", &radials,
&size0_array, &size_array,
&start_array, &h_array, &C_array,
&real, &forces, &compute))
return NULL;
LocalizedFunctionsObject *self = PyObject_NEW(LocalizedFunctionsObject,
&LocalizedFunctionsType);
if (self == NULL)
return NULL;
const long* size0 = LONGP(size0_array);
const long* size = LONGP(size_array);
const long* start = LONGP(start_array);
const double* h = DOUBLEP(h_array);
const double* C = DOUBLEP(C_array);
self->dv = h[0] * h[1] * h[2];
int ng = size[0] * size[1] * size[2];
int ng0 = size0[0] * size0[1] * size0[2];
self->ng = ng;
self->ng0 = ng0;
for (int i = 0; i < 3; i++)
{
self->size0[i] = size0[i];
self->size[i] = size[i];
self->start[i] = start[i];
}
int nf = 0;
int nfd = 0;
int nbins = 0;
double dr = 0.0;
for (int j = 0; j < PyList_Size(radials); j++)
{
const bmgsspline* spline =
&(((SplineObject*)PyList_GetItem(radials, j))->spline);
int l = spline->l;
assert(l <= 4);
if (j == 0)
{
nbins = spline->nbins;
dr = spline->dr;
}
else
{
assert(spline->nbins == nbins);
assert(spline->dr == dr);
}
nf += (2 * l + 1);
}
if (forces)
nfd = 3 * nf;
self->nf = nf;
self->nfd = nfd;
self->f = GPAW_MALLOC(double, (nf + nfd) * ng0);
if (forces)
self->fd = self->f + nf * ng0;
else
self->fd = 0;
int ndouble = (real ? 1 : 2);
self->w = GPAW_MALLOC(double, ng0 * ndouble);
if (compute)
{
int* bin = GPAW_MALLOC(int, ng0);
double* d = GPAW_MALLOC(double, ng0);
double* f0 = GPAW_MALLOC(double, ng0);
double* fd0 = 0;
if (forces)
fd0 = GPAW_MALLOC(double, ng0);
double* a = self->f;
double* ad = self->fd;
for (int j = 0; j < PyList_Size(radials); j++)
{
const bmgsspline* spline =
&(((SplineObject*)PyList_GetItem(radials, j))->spline);
if (j == 0)
bmgs_radial1(spline, self->size0, C, h, bin, d);
bmgs_radial2(spline, self->size0, bin, d, f0, fd0);
int l = spline->l;
for (int m = -l; m <= l; m++)
{
bmgs_radial3(spline, m, self->size0, C, h, f0, a);
a += ng0;
}
if (forces)
for (int m = -l; m <= l; m++)
for (int c = 0; c < 3; c++)
{
bmgs_radiald3(spline, m, c, self->size0, C, h, f0, fd0, ad);
ad += ng0;
}
}
if (forces)
free(fd0);
free(f0);
free(d);
free(bin);
}
return (PyObject*)self;
}
gpaw-1.3.0-82cebebc037510d876f90d9f8d533fd021f751f5/c/localized_functions.h 0000664 0000000 0000000 00000001470 13164413722 0024557 0 ustar 00root root 0000000 0000000 /* Copyright (C) 2003-2007 CAMP
* Copyright (C) 2007-2008 CAMd
* Please see the accompanying LICENSE file for further information. */
#include
typedef struct
{
PyObject_HEAD
double dv; // volume per grid point
int size[3]; // dimensions of big box
int start[3]; // corner of small box
int size0[3]; // dimensions of small box
int ng; // number of grid points in big box
int ng0; // number of grid points in small box
int nf; // number of localized functions
int nfd; // number of derivatives: zero or 3*nf
// pointers to size0 arrays:
double* f; // localized functions
double* fd; // xyz-derivatives of localized functions
double* w; // work array for one double or double complex array
} LocalizedFunctionsObject;
gpaw-1.3.0-82cebebc037510d876f90d9f8d533fd021f751f5/c/mlsqr.c 0000664 0000000 0000000 00000013033 13164413722 0021650 0 ustar 00root root 0000000 0000000 /* Copyright (C) 2003-2007 CAMP
* Copyright (C) 2007-2009 CAMd
* Please see the accompanying LICENSE file for further information. */
#include
#define PY_ARRAY_UNIQUE_SYMBOL GPAW_ARRAY_API
#define NO_IMPORT_ARRAY
#include
#include "extensions.h"
#ifdef GPAW_NO_UNDERSCORE_LAPACK
# define dgels_ dgels
#endif
// Predefine dgels function of lapack
int dgels_(char* trans, int *m, int *n, int *nrhs, double* a, int *lda, double* b, int *ldb, double* work, int* lwork, int *info);
int safemod(int x, int m)
{
return (x%m + m)%m;
}
// Perform a moving linear least squares interpolation to arrays
// Input arguments:
// order: order of polynomial used (1 or 2)
// cutoff: the cutoff of weight (in grid points)
// coords: scaled coords [0,1] for interpolation
// N_c: number of grid points
// beg_c: first grid point
// data: the array used
// target: the results are stored in this array
PyObject* mlsqr(PyObject *self, PyObject *args)
{
// The order of interpolation
unsigned char order = -1;
// The cutoff for moving least squares
double cutoff = -1;
// The coordinates for interpolation: array of size (3, N)
PyArrayObject* coords = 0;
// Number of grid points
PyArrayObject* N_c = 0;
// Beginning of grid
PyArrayObject* beg_c = 0;
// The 3d-data to be interpolated: array of size (X, Y, Z)
PyArrayObject* data;
// The interpolation target: array of size (N,)
PyArrayObject* target = 0;
if (!PyArg_ParseTuple(args, "BdOOOOO", &order, &cutoff, &coords, &N_c, &beg_c, &data, &target))
{
return NULL;
}
int coeffs = -1;
if (order == 1)
{
coeffs = 4;
}
if (order == 2)
{
coeffs = 10;
// 1 x y z xy yz zx xx yy zz
}
if (order == 3)
{
// 1 x y z xy yz zx xx yy zz
// xxy xxz yyx yyz zzx zzy
// xxx yyy zzz zyz
coeffs = 20;
}
int points = PyArray_DIM(coords, 0);
double* coord_nc = DOUBLEP(coords);
double* grid_points = DOUBLEP(N_c);
double* grid_start = DOUBLEP(beg_c);
double* target_n = DOUBLEP(target);
double* data_g = DOUBLEP(data);
// TODO: Calculate fit
const int sizex = (int) ceil(cutoff);
const int sizey = (int) ceil(cutoff);
const int sizez = (int) ceil(cutoff);
// Allocate X-matrix and b-vector
int source_points = (2*sizex+1)*(2*sizey+1)*(2*sizez+1);
double* X = GPAW_MALLOC(double, coeffs*source_points);
double* b = GPAW_MALLOC(double, source_points);
double* work = GPAW_MALLOC(double, coeffs*source_points);
// The multipliers for each dimension
int ldx = PyArray_DIM(data, 1) * PyArray_DIM(data, 2);
int ldy = PyArray_DIM(data, 2);
int ldz = 1;
// For each point to be interpolated
for (int p=0; p< points; p++)
{
double x = (*coord_nc++)*grid_points[0] - grid_start[0];
double y = (*coord_nc++)*grid_points[1] - grid_start[1];
double z = (*coord_nc++)*grid_points[2] - grid_start[2];
// The grid center point
int cx2 = (int) round(x);
int cy2 = (int) round(y);
int cz2 = (int) round(z);
// Scaled to grid
int cx = safemod(cx2, PyArray_DIM(data, 0));
int cy = safemod(cy2, PyArray_DIM(data, 1));
int cz = safemod(cz2, PyArray_DIM(data, 2));
double* i_X = X;
double* i_b = b;
// For each point to take into account
for (int dx=-sizex;dx<=sizex;dx++)
for (int dy=-sizey;dy<=sizey;dy++)
for (int dz=-sizez;dz<=sizez;dz++)
{
// Coordinates centered on x,y,z
double sx = (cx2 + dx) - x;
double sy = (cy2 + dy) - y;
double sz = (cz2 + dz) - z;
// Normalized distance from center
double d = sqrt(sx*sx+sy*sy+sz*sz) / cutoff;
double w = 0.0;
if (d < 1)
{
w = (1-d)*(1-d);
w*=w;
w*=(4*d+1);
}
//double w = exp(-d*d);
*i_X++ = w*1.0;
*i_X++ = w*sx;
*i_X++ = w*sy;
*i_X++ = w*sz;
if (order > 1)
{
*i_X++ = w*sx*sy;
*i_X++ = w*sy*sz;
*i_X++ = w*sz*sx;
*i_X++ = w*sx*sx;
*i_X++ = w*sy*sy;
*i_X++ = w*sz*sz;
}
if (order > 2)
{
*i_X++ = w*sx*sy*sz; // xyz
*i_X++ = w*sx*sx*sx; // xxx
*i_X++ = w*sy*sy*sy; // yyy
*i_X++ = w*sz*sz*sz; // zzz
*i_X++ = w*sx*sx*sy; // xxy
*i_X++ = w*sx*sx*sz; // xxz
*i_X++ = w*sy*sy*sx; // yyx
*i_X++ = w*sy*sy*sz; // yyz
*i_X++ = w*sz*sz*sx; // zzx
*i_X++ = w*sz*sz*sy; // zzy
}
*i_b++ = w*data_g[ safemod(cx+dx, PyArray_DIM(data, 0)) * ldx +
safemod(cy+dy, PyArray_DIM(data, 1)) * ldy +
safemod(cz+dz, PyArray_DIM(data, 2)) * ldz ];
}
int info = 0;
int rhs = 1;
int worksize = coeffs*source_points;
int ldb = source_points;
dgels_("T",
&coeffs, // ...times 4.
&source_points, // lhs is of size sourcepoints...
&rhs, // one rhs.
X, // provide lhs
&coeffs, // Leading dimension of X
b, // provide rhs
&ldb, // Leading dimension of b
work, // work array (and output)
&worksize, // the size of work array
&info); // info
if (info != 0)
printf("WARNING: dgels returned %d!", info);
// Evaluate the polynomial
// Due to centered coordinates, it's just the constant term
double value = b[0];
*target_n++ = value;
//Nearest neighbour
//double value = data_g[ cx*data->dimensions[1]*data->dimensions[2] + cy*data->dimensions[2] + cz ];
//printf("%.5f" , value);
}
free(work);
free(b);
free(X);
Py_RETURN_NONE;
}
gpaw-1.3.0-82cebebc037510d876f90d9f8d533fd021f751f5/c/mpi.c 0000664 0000000 0000000 00000114266 13164413722 0021311 0 ustar 00root root 0000000 0000000 /* Copyright (C) 2003-2007 CAMP
* Copyright (C) 2007-2009 CAMd
* Copyright (C) 2005-2009 CSC - IT Center for Science Ltd.
* Please see the accompanying LICENSE file for further information. */
#include
#ifdef PARALLEL
#define PY_ARRAY_UNIQUE_SYMBOL GPAW_ARRAY_API
#define NO_IMPORT_ARRAY
#include
#include
#include "extensions.h"
#include
#include "mympi.h"
#ifdef __bgp__
#include
#endif
// Check that array is well-behaved and contains data that can be sent.
#define CHK_ARRAY(a) if ((a) == NULL || !PyArray_Check(a) \
|| !PyArray_ISCARRAY(a) || !PyArray_ISNUMBER(a)) { \
PyErr_SetString(PyExc_TypeError, \
"Not a proper NumPy array for MPI communication."); \
return NULL; }
// Check that two arrays have the same type, and the size of the
// second is a given multiple of the size of the first
#define CHK_ARRAYS(a,b,n) \
if ((PyArray_TYPE(a) != PyArray_TYPE(b)) \
|| (PyArray_SIZE(b) != PyArray_SIZE(a) * n)) { \
PyErr_SetString(PyExc_ValueError, \
"Incompatible array types or sizes."); \
return NULL; }
// Check that a processor number is valid
#define CHK_PROC(n) if (n < 0 || n >= self->size) {\
PyErr_SetString(PyExc_ValueError, "Invalid processor number."); \
return NULL; }
// Check that a processor number is valid or is -1
#define CHK_PROC_DEF(n) if (n < -1 || n >= self->size) {\
PyErr_SetString(PyExc_ValueError, "Invalid processor number."); \
return NULL; }
// Check that a processor number is valid and is not this processor
#define CHK_OTHER_PROC(n) if (n < 0 || n >= self->size || n == self->rank) { \
PyErr_SetString(PyExc_ValueError, "Invalid processor number."); \
return NULL; }
// MPI request object, so we can store a reference to the buffer,
// preventing its early deallocation.
typedef struct {
PyObject_HEAD
MPI_Request rq;
PyObject *buffer;
int status;
} GPAW_MPI_Request;
static PyObject *mpi_request_wait(GPAW_MPI_Request *self, PyObject *noargs)
{
if (self->status == 0)
{
// Calling wait multiple times is allowed but meaningless (as in the MPI standard)
Py_RETURN_NONE;
}
#ifndef GPAW_MPI_DEBUG
MPI_Wait(&(self->rq), MPI_STATUS_IGNORE);
#else
int ret = MPI_Wait(&(self->rq), MPI_STATUS_IGNORE);
if (ret != MPI_SUCCESS)
{
PyErr_SetString(PyExc_RuntimeError, "MPI_Wait error occurred.");
return NULL;
}
#endif
Py_DECREF(self->buffer);
self->status = 0;
Py_RETURN_NONE;
}
static PyObject *mpi_request_test(GPAW_MPI_Request *self, PyObject *noargs)
{
if (self->status == 0)
{
Py_RETURN_TRUE; // Already completed
}
int flag;
#ifndef GPAW_MPI_DEBUG
MPI_Test(&(self->rq), &flag, MPI_STATUS_IGNORE); // Can this change the Python string?
#else
int ret = MPI_Test(&(self->rq), &flag, MPI_STATUS_IGNORE); // Can this change the Python string?
if (ret != MPI_SUCCESS)
{
PyErr_SetString(PyExc_RuntimeError, "MPI_Test error occurred.");
return NULL;
}
#endif
if (flag)
{
Py_DECREF(self->buffer);
self->status = 0;
Py_RETURN_TRUE;
}
else
{
Py_RETURN_FALSE;
}
}
static void mpi_request_dealloc(GPAW_MPI_Request *self)
{
if (self->status)
{
PyObject *none = mpi_request_wait(self, NULL);
Py_DECREF(none);
}
PyObject_Del(self);
}
static PyMemberDef mpi_request_members[] = {
{"status", T_INT, offsetof(GPAW_MPI_Request, status), READONLY,
"status of the request, non-zero if communication is pending."},
{NULL}
};
static PyMethodDef mpi_request_methods[] = {
{"wait", (PyCFunction) mpi_request_wait, METH_NOARGS,
"Wait for the communication to complete."
},
{"test", (PyCFunction) mpi_request_test, METH_NOARGS,
"Test if the communication has completed."
},
{NULL}
};
PyTypeObject GPAW_MPI_Request_type = {
PyVarObject_HEAD_INIT(NULL, 0)
"MPI_Request", /*tp_name*/
sizeof(GPAW_MPI_Request), /*tp_basicsize*/
0, /*tp_itemsize*/
(destructor)mpi_request_dealloc, /*tp_dealloc*/
0, /*tp_print*/
0, /*tp_getattr*/
0, /*tp_setattr*/
0, /*tp_compare*/
0, /*tp_repr*/
0, /*tp_as_number*/
0, /*tp_as_sequence*/
0, /*tp_as_mapping*/
0, /*tp_hash */
0, /*tp_call*/
0, /*tp_str*/
0, /*tp_getattro*/
0, /*tp_setattro*/
0, /*tp_as_buffer*/
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
"MPI request object", /* tp_doc */
0, /* tp_traverse */
0, /* tp_clear */
0, /* tp_richcompare */
0, /* tp_weaklistoffset */
0, /* tp_iter */
0, /* tp_iternext */
mpi_request_methods, /* tp_methods */
mpi_request_members,
0, /* tp_getset */
0, /* tp_base */
0, /* tp_dict */
0, /* tp_descr_get */
0, /* tp_descr_set */
0, /* tp_dictoffset */
0, /* tp_init */
0, /* tp_alloc */
0, /* tp_new */
};
static GPAW_MPI_Request *NewMPIRequest(void)
{
GPAW_MPI_Request *self;
self = PyObject_NEW(GPAW_MPI_Request, &GPAW_MPI_Request_type);
if (self == NULL) return NULL;
memset(&(self->rq), 0, sizeof(MPI_Request));
self->buffer = NULL;
self->status = 1; // Active
return self;
}
static void mpi_dealloc(MPIObject *obj)
{
if (obj->comm == MPI_COMM_WORLD) {
# ifndef GPAW_INTERPRETER
MPI_Finalize();
# endif
} else
MPI_Comm_free(&(obj->comm));
Py_XDECREF(obj->parent);
free(obj->members);
PyObject_DEL(obj);
}
static PyObject * mpi_sendreceive(MPIObject *self, PyObject *args,
PyObject *kwargs)
{
PyArrayObject* a;
PyArrayObject* b;
int dest, src;
int sendtag = 123;
int recvtag = 123;
static char *kwlist[] = {"a", "dest", "b", "src", "sendtag", "recvtag",
NULL};
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OiOi|ii:sendreceive",
kwlist,
&a, &dest, &b, &src, &sendtag, &recvtag))
return NULL;
CHK_ARRAY(a);
CHK_OTHER_PROC(dest);
CHK_ARRAY(b);
CHK_OTHER_PROC(src);
int nsend = PyArray_DESCR(a)->elsize;
for (int d = 0; d < PyArray_NDIM(a); d++)
nsend *= PyArray_DIM(a,d);
int nrecv = PyArray_DESCR(b)->elsize;
for (int d = 0; d < PyArray_NDIM(b); d++)
nrecv *= PyArray_DIM(b,d);
#ifndef GPAW_MPI_DEBUG
MPI_Sendrecv(PyArray_BYTES(a), nsend, MPI_BYTE, dest, sendtag,
PyArray_BYTES(b), nrecv, MPI_BYTE, src, recvtag,
self->comm, MPI_STATUS_IGNORE);
#else
int ret = MPI_Sendrecv(PyArray_BYTES(a), nsend, MPI_BYTE, dest, sendtag,
PyArray_BYTES(b), nrecv, MPI_BYTE, src, recvtag,
self->comm, MPI_STATUS_IGNORE);
if (ret != MPI_SUCCESS) {
PyErr_SetString(PyExc_RuntimeError, "MPI_Sendrecv error occurred.");
return NULL;
}
#endif
Py_RETURN_NONE;
}
static PyObject * mpi_receive(MPIObject *self, PyObject *args, PyObject *kwargs)
{
PyArrayObject* a;
int src;
int tag = 123;
int block = 1;
static char *kwlist[] = {"a", "src", "tag", "block", NULL};
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "Oi|ii:receive", kwlist,
&a, &src, &tag, &block))
return NULL;
CHK_ARRAY(a);
CHK_OTHER_PROC(src);
int n = PyArray_DESCR(a)->elsize;
for (int d = 0; d < PyArray_NDIM(a); d++)
n *= PyArray_DIM(a, d);
if (block)
{
#ifndef GPAW_MPI_DEBUG
MPI_Recv(PyArray_BYTES(a), n, MPI_BYTE, src, tag, self->comm,
MPI_STATUS_IGNORE);
#else
int ret = MPI_Recv(PyArray_BYTES(a), n, MPI_BYTE, src, tag, self->comm,
MPI_STATUS_IGNORE);
if (ret != MPI_SUCCESS)
{
PyErr_SetString(PyExc_RuntimeError, "MPI_Recv error occurred.");
return NULL;
}
#endif
Py_RETURN_NONE;
}
else
{
GPAW_MPI_Request *req = NewMPIRequest();
if (req == NULL) return NULL;
req->buffer = (PyObject*)a;
Py_INCREF(req->buffer);
#ifndef GPAW_MPI_DEBUG
MPI_Irecv(PyArray_BYTES(a), n, MPI_BYTE, src, tag, self->comm, &(req->rq));
#else
int ret = MPI_Irecv(PyArray_BYTES(a), n, MPI_BYTE, src, tag, self->comm,
&(req->rq));
if (ret != MPI_SUCCESS)
{
PyErr_SetString(PyExc_RuntimeError, "MPI_Irecv error occurred.");
return NULL;
}
#endif
return (PyObject *) req;
}
}
static PyObject * mpi_send(MPIObject *self, PyObject *args, PyObject *kwargs)
{
PyArrayObject* a;
int dest;
int tag = 123;
int block = 1;
static char *kwlist[] = {"a", "dest", "tag", "block", NULL};
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "Oi|ii:send", kwlist,
&a, &dest, &tag, &block))
return NULL;
CHK_ARRAY(a);
CHK_OTHER_PROC(dest);
int n = PyArray_DESCR(a)->elsize;
for (int d = 0; d < PyArray_NDIM(a); d++)
n *= PyArray_DIM(a,d);
if (block)
{
#ifndef GPAW_MPI_DEBUG
MPI_Send(PyArray_BYTES(a), n, MPI_BYTE, dest, tag, self->comm);
#else
int ret = MPI_Send(PyArray_BYTES(a), n, MPI_BYTE, dest, tag, self->comm);
if (ret != MPI_SUCCESS)
{
PyErr_SetString(PyExc_RuntimeError, "MPI_Send error occurred.");
return NULL;
}
#endif
Py_RETURN_NONE;
}
else
{
GPAW_MPI_Request *req = NewMPIRequest();
req->buffer = (PyObject*)a;
Py_INCREF(a);
#ifndef GPAW_MPI_DEBUG
MPI_Isend(PyArray_BYTES(a), n, MPI_BYTE, dest, tag, self->comm,
&(req->rq));
#else
int ret = MPI_Isend(PyArray_BYTES(a), n, MPI_BYTE, dest, tag, self->comm,
&(req->rq));
if (ret != MPI_SUCCESS)
{
PyErr_SetString(PyExc_RuntimeError, "MPI_Isend error occurred.");
return NULL;
}
#endif
return (PyObject *)req;
}
}
static PyObject * mpi_ssend(MPIObject *self, PyObject *args, PyObject *kwargs)
{
PyArrayObject* a;
int dest;
int tag = 123;
static char *kwlist[] = {"a", "dest", "tag", NULL};
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "Oi|i:send", kwlist,
&a, &dest, &tag))
return NULL;
CHK_ARRAY(a);
CHK_OTHER_PROC(dest);
int n = PyArray_DESCR(a)->elsize;
for (int d = 0; d < PyArray_NDIM(a); d++)
n *= PyArray_DIM(a,d);
MPI_Ssend(PyArray_BYTES(a), n, MPI_BYTE, dest, tag, self->comm);
Py_RETURN_NONE;
}
static PyObject * mpi_name(MPIObject *self, PyObject *noargs)
{
char name[MPI_MAX_PROCESSOR_NAME];
int resultlen;
MPI_Get_processor_name(name, &resultlen);
return Py_BuildValue("s#", name, resultlen);
}
static PyObject * mpi_abort(MPIObject *self, PyObject *args)
{
int errcode;
if (!PyArg_ParseTuple(args, "i:abort", &errcode))
return NULL;
MPI_Abort(self->comm, errcode);
Py_RETURN_NONE;
}
static PyObject * mpi_barrier(MPIObject *self)
{
MPI_Barrier(self->comm);
Py_RETURN_NONE;
}
static PyObject * mpi_test(MPIObject *self, PyObject *args)
{
GPAW_MPI_Request* s;
if (!PyArg_ParseTuple(args, "O!:wait", &GPAW_MPI_Request_type, &s))
return NULL;
return mpi_request_test(s, NULL);
}
static PyObject * mpi_testall(MPIObject *self, PyObject *requests)
{
int n; // Number of requests
MPI_Request *rqs = NULL;
int flag = 0;
if (!PySequence_Check(requests))
{
PyErr_SetString(PyExc_TypeError, "mpi.testall: argument must be a sequence");
return NULL;
}
// Extract the request objects
n = PySequence_Size(requests);
assert(n >= 0); // This cannot fail.
rqs = GPAW_MALLOC(MPI_Request, n);
assert(rqs != NULL);
for (int i = 0; i < n; i++)
{
PyObject *o = PySequence_GetItem(requests, i);
if (o == NULL)
return NULL;
if (Py_TYPE(o) != &GPAW_MPI_Request_type)
{
Py_DECREF(o);
free(rqs);
PyErr_SetString(PyExc_TypeError, "mpi.testall: argument must be a sequence of MPI requests");
return NULL;
}
GPAW_MPI_Request *s = (GPAW_MPI_Request *)o;
rqs[i] = s->rq;
Py_DECREF(o);
}
// Do the actual test.
#ifndef GPAW_MPI_DEBUG
MPI_Testall(n, rqs, &flag, MPI_STATUSES_IGNORE);
#else
int ret = MPI_Testall(n, rqs, &flag, MPI_STATUSES_IGNORE);
if (ret != MPI_SUCCESS)
{
// We do not dare to release the buffers now!
PyErr_SetString(PyExc_RuntimeError, "MPI_Testall error occurred.");
return NULL;
}
#endif
// Unlike MPI_Test, if flag outcome is non-zero, MPI_Testall will deallocate
// all requests which were allocated by nonblocking communication calls, so
// we must free these buffers. Otherwise, none of the requests are modified.
if (flag != 0)
{
// Release the buffers used by the MPI communication
for (int i = 0; i < n; i++)
{
GPAW_MPI_Request *o = (GPAW_MPI_Request *) PySequence_GetItem(requests, i);
if (o->status)
{
assert(o->buffer != NULL);
Py_DECREF(o->buffer);
}
o->status = 0;
Py_DECREF(o);
}
}
// Release internal data and return.
free(rqs);
return Py_BuildValue("i", flag);
}
static PyObject * mpi_wait(MPIObject *self, PyObject *args)
{
GPAW_MPI_Request* s;
if (!PyArg_ParseTuple(args, "O!:wait", &GPAW_MPI_Request_type, &s))
return NULL;
return mpi_request_wait(s, NULL);
}
static PyObject * mpi_waitall(MPIObject *self, PyObject *requests)
{
int n; // Number of requests
MPI_Request *rqs = NULL;
if (!PySequence_Check(requests))
{
PyErr_SetString(PyExc_TypeError, "mpi.waitall: argument must be a sequence");
return NULL;
}
// Extract the request objects
n = PySequence_Size(requests);
assert(n >= 0); // This cannot fail.
rqs = GPAW_MALLOC(MPI_Request, n);
for (int i = 0; i < n; i++)
{
PyObject *o = PySequence_GetItem(requests, i);
if (o == NULL)
return NULL;
if (Py_TYPE(o) != &GPAW_MPI_Request_type)
{
Py_DECREF(o);
free(rqs);
PyErr_SetString(PyExc_TypeError, "mpi.waitall: argument must be a sequence of MPI requests");
return NULL;
}
GPAW_MPI_Request *s = (GPAW_MPI_Request *)o;
rqs[i] = s->rq;
Py_DECREF(o);
}
// Do the actual wait.
#ifndef GPAW_MPI_DEBUG
MPI_Waitall(n, rqs, MPI_STATUSES_IGNORE);
#else
int ret = MPI_Waitall(n, rqs, MPI_STATUSES_IGNORE);
if (ret != MPI_SUCCESS)
{
// We do not dare to release the buffers now!
PyErr_SetString(PyExc_RuntimeError, "MPI_Waitall error occurred.");
return NULL;
}
#endif
// Release the buffers used by the MPI communication
for (int i = 0; i < n; i++)
{
GPAW_MPI_Request *o = (GPAW_MPI_Request *) PySequence_GetItem(requests, i);
if (o->status)
{
assert(o->buffer != NULL);
Py_DECREF(o->buffer);
}
o->status = 0;
Py_DECREF(o);
}
// Release internal data and return.
free(rqs);
Py_RETURN_NONE;
}
static MPI_Datatype get_mpi_datatype(PyArrayObject *a)
{
int n = PyArray_DESCR(a)->elsize;
if (PyArray_ISCOMPLEX(a))
n = n / 2;
switch(PyArray_TYPE(a))
{
// Floating point numbers including complex numbers
case NPY_DOUBLE:
case NPY_CDOUBLE:
assert(sizeof(double) == n);
return MPI_DOUBLE;
case NPY_FLOAT:
case NPY_CFLOAT:
assert(sizeof(float) == n);
return MPI_FLOAT;
case NPY_LONGDOUBLE:
case NPY_CLONGDOUBLE:
assert(sizeof(long double) == n);
return MPI_LONG_DOUBLE;
// Signed integer types
case NPY_BYTE:
assert(sizeof(char) == n);
return MPI_CHAR;
case NPY_SHORT:
assert(sizeof(short) == n);
return MPI_SHORT;
case NPY_INT:
assert(sizeof(int) == n);
return MPI_INT;
case NPY_LONG:
assert(sizeof(long) == n);
return MPI_LONG;
// Unsigned integer types
case NPY_BOOL:
case NPY_UBYTE:
assert(sizeof(unsigned char) == n);
return MPI_UNSIGNED_CHAR;
case NPY_USHORT:
assert(sizeof(unsigned short) == n);
return MPI_UNSIGNED_SHORT;
case NPY_UINT:
assert(sizeof(unsigned) == n);
return MPI_UNSIGNED;
case NPY_ULONG:
assert(sizeof(unsigned long) == n);
return MPI_UNSIGNED_LONG;
}
// If we reach this point none of the cases worked out.
PyErr_SetString(PyExc_ValueError, "Cannot communicate data of this type.");
return 0;
}
#if PY_MAJOR_VERSION >= 3
#define PyInt_FromLong PyLong_FromLong
#define PyInt_Check PyLong_Check
#define PyInt_AS_LONG PyLong_AS_LONG
#endif
static PyObject * mpi_reduce(MPIObject *self, PyObject *args, PyObject *kwargs,
MPI_Op operation, int allowcomplex)
{
#ifdef GPAW_MPI_DEBUG
MPI_Barrier(self->comm);
#endif
PyObject* obj;
int root = -1;
static char *kwlist[] = {"a", "root", NULL};
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|i:reduce", kwlist,
&obj, &root))
return NULL;
CHK_PROC_DEF(root);
if (PyFloat_Check(obj))
{
double din = PyFloat_AS_DOUBLE(obj);
double dout;
if (root == -1)
MPI_Allreduce(&din, &dout, 1, MPI_DOUBLE, operation, self->comm);
else
MPI_Reduce(&din, &dout, 1, MPI_DOUBLE, operation, root, self->comm);
return PyFloat_FromDouble(dout);
}
if (PyInt_Check(obj))
{
long din = PyInt_AS_LONG(obj);
long dout;
if (root == -1)
MPI_Allreduce(&din, &dout, 1, MPI_LONG, operation, self->comm);
else
MPI_Reduce(&din, &dout, 1, MPI_LONG, operation, root, self->comm);
return PyInt_FromLong(dout);
}
else if (PyComplex_Check(obj) && allowcomplex)
{
double din[2];
double dout[2];
din[0] = PyComplex_RealAsDouble(obj);
din[1] = PyComplex_ImagAsDouble(obj);
if (root == -1)
MPI_Allreduce(&din, &dout, 2, MPI_DOUBLE, MPI_SUM, self->comm);
else
MPI_Reduce(&din, &dout, 2, MPI_DOUBLE, MPI_SUM, root, self->comm);
return PyComplex_FromDoubles(dout[0], dout[1]);
}
else if (PyComplex_Check(obj))
{
PyErr_SetString(PyExc_ValueError,
"Operation not allowed on complex numbers");
return NULL;
}
else // It should be an array
{
int n;
int elemsize;
MPI_Datatype datatype;
PyArrayObject* aobj = (PyArrayObject*)obj;
CHK_ARRAY(aobj);
datatype = get_mpi_datatype(aobj);
if (datatype == 0)
return NULL;
n = PyArray_SIZE(aobj);
elemsize = PyArray_DESCR(aobj)->elsize;
if (PyArray_ISCOMPLEX(aobj))
{
if (allowcomplex)
{
n *= 2;
elemsize /= 2;
}
else
{
PyErr_SetString(PyExc_ValueError,
"Operation not allowed on complex numbers");
return NULL;
}
}
if (root == -1)
{
#ifdef GPAW_MPI2
MPI_Allreduce(MPI_IN_PLACE, PyArray_BYTES(aobj), n, datatype,
operation, self->comm);
#else
char* b = GPAW_MALLOC(char, n * elemsize);
MPI_Allreduce(PyArray_BYTES(aobj), b, n, datatype, operation,
self->comm);
assert(PyArray_NBYTES(aobj) == n * elemsize);
memcpy(PyArray_BYTES(aobj), b, n * elemsize);
free(b);
#endif
}
else
{
int rank;
MPI_Comm_rank(self->comm, &rank);
char* b = 0;
if (rank == root)
{
#ifdef GPAW_MPI2
MPI_Reduce(MPI_IN_PLACE, PyArray_BYTES(aobj), n,
datatype, operation, root, self->comm);
#else
b = GPAW_MALLOC(char, n * elemsize);
MPI_Reduce(PyArray_BYTES(aobj), b, n, datatype,
operation, root, self->comm);
assert(PyArray_NBYTES(aobj) == n * elemsize);
memcpy(PyArray_BYTES(aobj), b, n * elemsize);
free(b);
#endif
}
else
{
MPI_Reduce(PyArray_BYTES(aobj), b, n, datatype,
operation, root, self->comm);
}
}
Py_RETURN_NONE;
}
}
static PyObject * mpi_sum(MPIObject *self, PyObject *args, PyObject *kwargs)
{
return mpi_reduce(self, args, kwargs, MPI_SUM, 1);
}
static PyObject * mpi_product(MPIObject *self, PyObject *args, PyObject *kwargs)
{
// No complex numbers as that would give separate products of
// real and imaginary parts.
return mpi_reduce(self, args, kwargs, MPI_PROD, 0);
}
static PyObject * mpi_max(MPIObject *self, PyObject *args, PyObject *kwargs)
{
return mpi_reduce(self, args, kwargs, MPI_MAX, 0);
}
static PyObject * mpi_min(MPIObject *self, PyObject *args, PyObject *kwargs)
{
return mpi_reduce(self, args, kwargs, MPI_MIN, 0);
}
static PyObject * mpi_scatter(MPIObject *self, PyObject *args)
{
PyArrayObject* sendobj;
PyArrayObject* recvobj;
int root;
if (!PyArg_ParseTuple(args, "OOi:scatter", &sendobj, &recvobj, &root))
return NULL;
CHK_ARRAY(recvobj);
CHK_PROC(root);
char* source = 0;
if (self->rank == root) {
CHK_ARRAY(sendobj);
CHK_ARRAYS(recvobj, sendobj, self->size); // size(send) = size(recv)*Ncpu
source = PyArray_BYTES(sendobj);
}
int n = PyArray_DESCR(recvobj)->elsize;
for (int d = 0; d < PyArray_NDIM(recvobj); d++)
n *= PyArray_DIM(recvobj,d);
MPI_Scatter(source, n, MPI_BYTE, PyArray_BYTES(recvobj),
n, MPI_BYTE, root, self->comm);
Py_RETURN_NONE;
}
static PyObject * mpi_allgather(MPIObject *self, PyObject *args)
{
PyArrayObject* a;
PyArrayObject* b;
if (!PyArg_ParseTuple(args, "OO:allgather", &a, &b))
return NULL;
CHK_ARRAY(a);
CHK_ARRAY(b);
CHK_ARRAYS(a, b, self->size);
int n = PyArray_DESCR(a)->elsize;
for (int d = 0; d < PyArray_NDIM(a); d++)
n *= PyArray_DIM(a,d);
// What about endianness????
MPI_Allgather(PyArray_BYTES(a), n, MPI_BYTE, PyArray_BYTES(b), n,
MPI_BYTE, self->comm);
Py_RETURN_NONE;
}
static PyObject * mpi_gather(MPIObject *self, PyObject *args)
{
PyArrayObject* a;
int root;
PyArrayObject* b = 0;
if (!PyArg_ParseTuple(args, "Oi|O", &a, &root, &b))
return NULL;
CHK_ARRAY(a);
CHK_PROC(root);
if (root == self->rank)
{
CHK_ARRAY(b);
CHK_ARRAYS(a, b, self->size);
}
else if ((PyObject*)b != Py_None && b != NULL)
{
fprintf(stderr, "******** Root=%d\n", root);
PyErr_SetString(PyExc_ValueError,
"mpi_gather: b array should not be given on non-root processors.");
return NULL;
}
int n = PyArray_DESCR(a)->elsize;
for (int d = 0; d < PyArray_NDIM(a); d++)
n *= PyArray_DIM(a,d);
if (root != self->rank)
MPI_Gather(PyArray_BYTES(a), n, MPI_BYTE, 0, n, MPI_BYTE, root, self->comm);
else
MPI_Gather(PyArray_BYTES(a), n, MPI_BYTE, PyArray_BYTES(b), n, MPI_BYTE, root, self->comm);
Py_RETURN_NONE;
}
static PyObject * mpi_broadcast(MPIObject *self, PyObject *args)
{
#ifdef GPAW_MPI_DEBUG
MPI_Barrier(self->comm);
#endif
PyArrayObject* buf;
int root;
if (!PyArg_ParseTuple(args, "Oi:broadcast", &buf, &root))
return NULL;
CHK_ARRAY(buf);
CHK_PROC(root);
int n = PyArray_DESCR(buf)->elsize;
for (int d = 0; d < PyArray_NDIM(buf); d++)
n *= PyArray_DIM(buf,d);
MPI_Bcast(PyArray_BYTES(buf), n, MPI_BYTE, root, self->comm);
Py_RETURN_NONE;
}
static PyObject *mpi_compare(MPIObject *self, PyObject *args)
{
MPIObject* other;
int result;
char* pyresult;
if (!PyArg_ParseTuple(args, "O", &other))
return NULL;
MPI_Comm_compare(self->comm, other->comm, &result);
if(result == MPI_IDENT) pyresult = "ident";
else if (result == MPI_CONGRUENT) pyresult = "congruent";
else if (result == MPI_SIMILAR) pyresult = "similar";
else if (result == MPI_UNEQUAL) pyresult = "unequal";
else return NULL;
return Py_BuildValue("s", pyresult);
}
static PyObject *mpi_translate_ranks(MPIObject *self, PyObject *args)
{
PyObject* myranks_anytype; // Conversion to numpy array below
MPIObject* other;
if (!PyArg_ParseTuple(args, "OO", &other, &myranks_anytype))
return NULL;
// XXXXXX This uses NPY_LONG and NPY_INT. On some computers the
// returned array is int32 while np.array(..., dtype=int) returns
// int64. This should very probably be changed so it always
// corresponds to the default int of numpy.
// This handling of arrays of ranks is taken from the MPICommunicator
// creation method. See that method for explanation of casting, datatypes
// etc.
PyArrayObject *myranks_long = (PyArrayObject*)PyArray_ContiguousFromAny(
myranks_anytype, NPY_LONG, 1, 1);
if(myranks_long == NULL)
return NULL;
int nranks = PyArray_DIM(myranks_long, 0);
PyArrayObject *myranks;
myranks = (PyArrayObject*)PyArray_Cast(myranks_long, NPY_INT);
npy_intp rankshape[1];
rankshape[0] = PyArray_SIZE(myranks);
PyArrayObject* other_ranks = (PyArrayObject*)PyArray_SimpleNew(1, rankshape,
NPY_INT);
MPI_Group mygroup, othergroup;
MPI_Comm_group(self->comm, &mygroup);
MPI_Comm_group(other->comm, &othergroup);
int* rankdata = (int*)PyArray_BYTES(myranks);
int* otherrankdata = (int*)PyArray_BYTES(other_ranks);
MPI_Group_translate_ranks(mygroup, nranks, rankdata, othergroup,
otherrankdata);
// Return something with a definite value to Python.
for(int i=0; i < nranks; i++) {
if(otherrankdata[i] == MPI_UNDEFINED) {
otherrankdata[i] = -1;
}
}
PyObject* other_ranks_anytype = PyArray_Cast(other_ranks,
PyArray_TYPE((PyArrayObject*)myranks_anytype));
Py_DECREF(myranks_long);
Py_DECREF(myranks);
Py_DECREF(other_ranks);
return (PyObject*)other_ranks_anytype;
}
static PyObject * mpi_alltoallv(MPIObject *self, PyObject *args)
{
PyArrayObject* send_obj;
PyArrayObject* send_cnts;
PyArrayObject* send_displs;
PyArrayObject* recv_obj;
PyArrayObject* recv_cnts;
PyArrayObject* recv_displs;
if (!PyArg_ParseTuple(args, "OOOOOO:alltoallv", &send_obj, &send_cnts,
&send_displs, &recv_obj,
&recv_cnts, &recv_displs))
return NULL;
CHK_ARRAY(send_obj);
CHK_ARRAY(send_cnts);
CHK_ARRAY(send_displs);
CHK_ARRAY(recv_obj);
CHK_ARRAY(recv_cnts);
CHK_ARRAY(recv_displs);
int *s_cnts = GPAW_MALLOC(int, self->size);
int *s_displs = GPAW_MALLOC(int, self->size);
int *r_cnts = GPAW_MALLOC(int, self->size);
int *r_displs = GPAW_MALLOC(int, self->size);
/* Create count and displacement arrays in units of bytes */
int elem_size = PyArray_ITEMSIZE(send_obj);
long* tmp1 = PyArray_DATA(send_cnts);
long* tmp2 = PyArray_DATA(send_displs);
long* tmp3 = PyArray_DATA(recv_cnts);
long* tmp4 = PyArray_DATA(recv_displs);
for (int i=0; i < self->size; i++) {
s_cnts[i] = tmp1[i] * elem_size;
s_displs[i] = tmp2[i] * elem_size;
r_cnts[i] = tmp3[i] * elem_size;
r_displs[i] = tmp4[i] * elem_size;
}
MPI_Alltoallv(PyArray_BYTES(send_obj),
s_cnts, s_displs,
MPI_BYTE, PyArray_BYTES(recv_obj), r_cnts,
r_displs, MPI_BYTE, self->comm);
free(s_cnts);
free(s_displs);
free(r_cnts);
free(r_displs);
Py_RETURN_NONE;
}
static PyObject * get_members(MPIObject *self, PyObject *args)
{
PyArrayObject *ranks;
npy_intp ranks_dims[1] = {self->size};
ranks = (PyArrayObject *) PyArray_SimpleNew(1, ranks_dims, NPY_INT);
if (ranks == NULL)
return NULL;
memcpy(INTP(ranks), self->members, self->size*sizeof(int));
PyObject* values = Py_BuildValue("O", ranks);
Py_DECREF(ranks);
return values;
}
// See the documentation for corresponding function in debug wrapper
// for the purpose of this function (gpaw/mpi/__init__.py)
static PyObject * get_c_object(MPIObject *self, PyObject *args)
{
return Py_BuildValue("O", self);
}
// Forward declaration of MPI_Communicator because it needs MPIType
// that needs MPI_getattr that needs MPI_Methods that need
// MPI_Communicator that need ...
static PyObject * MPICommunicator(MPIObject *self, PyObject *args);
static PyMethodDef mpi_methods[] = {
{"sendreceive", (PyCFunction)mpi_sendreceive,
METH_VARARGS|METH_KEYWORDS,
"sendreceive(a, dest, b, src, desttag=123, srctag=123) sends an array a to dest and receives an array b from src."},
{"receive", (PyCFunction)mpi_receive,
METH_VARARGS|METH_KEYWORDS,
"receive(a, src, tag=123, block=1) receives array a from src."},
{"send", (PyCFunction)mpi_send,
METH_VARARGS|METH_KEYWORDS,
"send(a, dest, tag=123, block=1) sends array a to dest."},
{"ssend", (PyCFunction)mpi_ssend,
METH_VARARGS|METH_KEYWORDS,
"ssend(a, dest, tag=123) synchronously sends array a to dest."},
{"abort", (PyCFunction)mpi_abort, METH_VARARGS,
"abort(errcode) aborts all MPI tasks."},
{"name", (PyCFunction)mpi_name, METH_NOARGS,
"name() returns the name of the processor node."},
{"barrier", (PyCFunction)mpi_barrier, METH_VARARGS,
"barrier() synchronizes all MPI tasks"},
{"test", (PyCFunction)mpi_test, METH_VARARGS,
"test(request) tests if a nonblocking communication is complete."},
{"testall", (PyCFunction)mpi_testall, METH_O,
"testall(list_of_rqs) tests if multiple nonblocking communications are complete."},
{"wait", (PyCFunction)mpi_wait, METH_VARARGS,
"wait(request) waits for a nonblocking communication to complete."},
{"waitall", (PyCFunction)mpi_waitall, METH_O,
"waitall(list_of_rqs) waits for multiple nonblocking communications to complete."},
{"sum", (PyCFunction)mpi_sum,
METH_VARARGS|METH_KEYWORDS,
"sum(a, root=-1) sums arrays, result on all tasks unless root is given."},
{"product", (PyCFunction)mpi_product,
METH_VARARGS|METH_KEYWORDS,
"product(a, root=-1) multiplies arrays, result on all tasks unless root is given."},
{"max", (PyCFunction)mpi_max,
METH_VARARGS|METH_KEYWORDS,
"max(a, root=-1) maximum of arrays, result on all tasks unless root is given."},
{"min", (PyCFunction)mpi_min,
METH_VARARGS|METH_KEYWORDS,
"min(a, root=-1) minimum of arrays, result on all tasks unless root is given."},
{"scatter", (PyCFunction)mpi_scatter, METH_VARARGS,
"scatter(src, target, root) distributes array from root task."},
{"gather", (PyCFunction)mpi_gather, METH_VARARGS,
"gather(src, root, target=None) gathers data from all tasks on root task."},
{"all_gather", (PyCFunction)mpi_allgather, METH_VARARGS,
"all_gather(src, target) gathers data from all tasks on all tasks."},
{"alltoallv", (PyCFunction)mpi_alltoallv, METH_VARARGS,
"alltoallv(sbuf, scnt, sdispl, rbuf, ...) send data from all tasks to all tasks."},
{"broadcast", (PyCFunction)mpi_broadcast, METH_VARARGS,
"broadcast(buffer, root) Broadcast data in-place from root task."},
{"compare", (PyCFunction)mpi_compare, METH_VARARGS,
"compare two communicators for identity using MPI_Comm_compare."},
{"translate_ranks", (PyCFunction)mpi_translate_ranks, METH_VARARGS,
"figure out correspondence between ranks on two communicators."},
{"get_members", (PyCFunction)get_members, METH_VARARGS, 0},
{"get_c_object", (PyCFunction)get_c_object, METH_VARARGS, 0},
{"new_communicator", (PyCFunction)MPICommunicator, METH_VARARGS,
"new_communicator(ranks) creates a new communicator."},
{0, 0, 0, 0}
};
static PyMemberDef mpi_members[] = {
{"size", T_INT, offsetof(MPIObject, size), 0, "Number of processors"},
{"rank", T_INT, offsetof(MPIObject, rank), 0, "Number of this processor"},
{"parent", T_OBJECT_EX, offsetof(MPIObject, parent), 0, "Parent communicator"},
{0, 0, 0, 0, 0} /* Sentinel */
};
// __new__
static PyObject *NewMPIObject(PyTypeObject* type, PyObject *args,
PyObject *kwds)
{
static char *kwlist[] = {NULL};
MPIObject* self;
if (! PyArg_ParseTupleAndKeywords(args, kwds, "", kwlist))
return NULL;
self = (MPIObject *) type->tp_alloc(type, 0);
if (self == NULL)
return NULL;
# ifndef GPAW_INTERPRETER
MPI_Init(NULL, NULL);
# endif
MPI_Comm_size(MPI_COMM_WORLD, &(self->size));
MPI_Comm_rank(MPI_COMM_WORLD, &(self->rank));
self->comm = MPI_COMM_WORLD;
Py_INCREF(Py_None);
self->parent = Py_None;
self->members = (int*) malloc(self->size*sizeof(int));
if (self->members == NULL)
return NULL;
for (int i=0; isize; i++)
self->members[i] = i;
return (PyObject *) self;
}
// __init__ does nothing.
static int InitMPIObject(MPIObject* self, PyObject *args, PyObject *kwds)
{
static char *kwlist[] = {NULL};
if (! PyArg_ParseTupleAndKeywords(args, kwds, "", kwlist))
return -1;
return 0;
}
PyTypeObject MPIType = {
PyVarObject_HEAD_INIT(NULL, 0)
"MPI", /*tp_name*/
sizeof(MPIObject), /*tp_basicsize*/
0, /*tp_itemsize*/
(destructor)mpi_dealloc, /*tp_dealloc*/
0, /*tp_print*/
0, /*tp_getattr*/
0, /*tp_setattr*/
0, /*tp_compare*/
0, /*tp_repr*/
0, /*tp_as_number*/
0, /*tp_as_sequence*/
0, /*tp_as_mapping*/
0, /*tp_hash*/
0, /*tp_call*/
0, /*tp_str*/
0, /*tp_getattro*/
0, /*tp_setattro*/
0, /*tp_as_buffer*/
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
"MPI object", /*tp_doc*/
0, /*tp_traverse*/
0, /*tp_clear*/
0, /*tp_richcompare*/
0, /*tp_weaklistoffset*/
0, /*tp_iter*/
0, /*tp_iternext*/
mpi_methods, /*tp_methods*/
mpi_members, /*tp_members*/
0, /*tp_getset*/
0, /*tp_base*/
0, /*tp_dict*/
0, /*tp_descr_get*/
0, /*tp_descr_set*/
0, /*tp_dictoffset*/
(initproc)InitMPIObject, /*tp_init*/
0, /*tp_alloc*/
NewMPIObject, /*tp_new*/
};
static PyObject * MPICommunicator(MPIObject *self, PyObject *args)
{
PyObject* orig_ranks;
if (!PyArg_ParseTuple(args, "O", &orig_ranks))
return NULL;
// NB: int32 is NPY_LONG on 32-bit Linux and NPY_INT on 64-bit Linux!
// First convert to NumPy array of NPY_LONG, then cast to NPY_INT, to
// allow both 32 and 64 bit integers in the argument (except 64 on 32).
PyArrayObject *ranks = (PyArrayObject*)PyArray_ContiguousFromAny(
orig_ranks, NPY_LONG, 1, 1);
if (ranks == NULL)
return NULL;
PyArrayObject *iranks;
int n = PyArray_DIM(ranks, 0);
iranks = (PyArrayObject*)PyArray_Cast((PyArrayObject*) ranks, NPY_INT);
Py_DECREF(ranks);
if (iranks == NULL)
return NULL;
// Check that all ranks make sense
for (int i = 0; i < n; i++)
{
int *x = PyArray_GETPTR1(iranks, i);
if (*x < 0 || *x >= self->size)
{
Py_DECREF(iranks);
PyErr_SetString(PyExc_ValueError, "invalid rank");
return NULL;
}
for (int j = 0; j < i; j++)
{
int *y = PyArray_GETPTR1(iranks, j);
if (*y == *x)
{
Py_DECREF(iranks);
PyErr_SetString(PyExc_ValueError, "duplicate rank");
return NULL;
}
}
}
MPI_Group group;
MPI_Comm_group(self->comm, &group);
MPI_Group newgroup;
MPI_Group_incl(group, n, (int *) PyArray_BYTES(iranks), &newgroup);
MPI_Comm comm;
MPI_Comm_create(self->comm, newgroup, &comm); // has a memory leak!
#ifdef GPAW_MPI_DEBUG
if (comm != MPI_COMM_NULL)
{
// Default Errhandler is MPI_ERRORS_ARE_FATAL
MPI_Errhandler_set(comm, MPI_ERRORS_RETURN);
#ifdef __bgp__
int result;
int rank;
MPI_Comm_rank(comm, &rank);
MPIX_Get_property(comm, MPIDO_RECT_COMM, &result);
if (rank == 0) {
if(result) fprintf(stderr, "Get_property: comm is rectangular. \n");
}
#endif
}
#endif // GPAW_MPI_DEBUG
MPI_Group_free(&newgroup);
MPI_Group_free(&group);
if (comm == MPI_COMM_NULL)
{
Py_DECREF(iranks);
Py_RETURN_NONE;
}
else
{
MPIObject *obj = PyObject_NEW(MPIObject, &MPIType);
if (obj == NULL)
return NULL;
MPI_Comm_size(comm, &(obj->size));
MPI_Comm_rank(comm, &(obj->rank));
obj->comm = comm;
if (obj->parent == Py_None)
Py_DECREF(obj->parent);
obj->members = (int*) malloc(obj->size*sizeof(int));
if (obj->members == NULL)
return NULL;
memcpy(obj->members, (int *) PyArray_BYTES(iranks), obj->size*sizeof(int));
Py_DECREF(iranks);
// Make sure that MPI_COMM_WORLD is kept alive till the end (we
// don't want MPI_Finalize to be called before MPI_Comm_free):
Py_INCREF(self);
obj->parent = (PyObject*)self;
return (PyObject*)obj;
}
}
#endif // PARALLEL
gpaw-1.3.0-82cebebc037510d876f90d9f8d533fd021f751f5/c/mympi.h 0000664 0000000 0000000 00000000350 13164413722 0021650 0 ustar 00root root 0000000 0000000 /* Copyright (C) 2003-2007 CAMP
* Please see the accompanying LICENSE file for further information. */
typedef struct
{
PyObject_HEAD
int size;
int rank;
MPI_Comm comm;
PyObject* parent;
int* members;
} MPIObject;
gpaw-1.3.0-82cebebc037510d876f90d9f8d533fd021f751f5/c/operators.c 0000664 0000000 0000000 00000035521 13164413722 0022536 0 ustar 00root root 0000000 0000000 /* Copyright (C) 2003-2007 CAMP
* Copyright (C) 2007-2008 CAMd
* Copyright (C) 2005 CSC - IT Center for Science Ltd.
* Please see the accompanying LICENSE file for further information. */
//*** The apply operator and some associate structors are imple- ***//
//*** mented in two version: a original version and a speciel ***//
//*** OpenMP version. By default the original version will ***//
//*** be used, but it's possible to use the OpenMP version ***//
//*** by compiling gpaw with the macro GPAW_OMP defined and ***//
//*** and the compile/link option "-fopenmp". ***//
//*** Author of the optimized OpenMP code: ***//
//*** Mads R. B. Kristensen - madsbk@diku.dk ***//
#include
#define PY_ARRAY_UNIQUE_SYMBOL GPAW_ARRAY_API
#define NO_IMPORT_ARRAY
#include
#include
#include
#include "extensions.h"
#include "bc.h"
#include "mympi.h"
#ifdef GPAW_ASYNC
#define GPAW_ASYNC3 3
#define GPAW_ASYNC2 2
#else
#define GPAW_ASYNC3 1
#define GPAW_ASYNC2 1
#endif
typedef struct
{
PyObject_HEAD
bmgsstencil stencil;
boundary_conditions* bc;
MPI_Request recvreq[2];
MPI_Request sendreq[2];
} OperatorObject;
static void Operator_dealloc(OperatorObject *self)
{
free(self->bc);
PyObject_DEL(self);
}
static PyObject * Operator_relax(OperatorObject *self,
PyObject *args)
{
int relax_method;
PyArrayObject* func;
PyArrayObject* source;
int nrelax;
double w = 1.0;
if (!PyArg_ParseTuple(args, "iOOi|d", &relax_method, &func, &source,
&nrelax, &w))
return NULL;
const boundary_conditions* bc = self->bc;
double* fun = DOUBLEP(func);
const double* src = DOUBLEP(source);
const double_complex* ph;
const int* size2 = bc->size2;
double* buf = GPAW_MALLOC(double, size2[0] * size2[1] * size2[2] *
bc->ndouble);
double* sendbuf = GPAW_MALLOC(double, bc->maxsend);
double* recvbuf = GPAW_MALLOC(double, bc->maxrecv);
ph = 0;
for (int n = 0; n < nrelax; n++ )
{
for (int i = 0; i < 3; i++)
{
bc_unpack1(bc, fun, buf, i,
self->recvreq, self->sendreq,
recvbuf, sendbuf, ph + 2 * i, 0, 1);
bc_unpack2(bc, buf, i,
self->recvreq, self->sendreq, recvbuf, 1);
}
bmgs_relax(relax_method, &self->stencil, buf, fun, src, w);
}
free(recvbuf);
free(sendbuf);
free(buf);
Py_RETURN_NONE;
}
struct apply_args{
int thread_id;
OperatorObject *self;
int ng;
int ng2;
int nin;
int nthds;
int chunksize;
int chunkinc;
const double* in;
double* out;
int real;
const double_complex* ph;
};
//Plain worker
void *apply_worker(void *threadarg)
{
struct apply_args *args = (struct apply_args *) threadarg;
boundary_conditions* bc = args->self->bc;
MPI_Request recvreq[2];
MPI_Request sendreq[2];
int chunksize = args->nin / args->nthds;
if (!chunksize)
chunksize = 1;
int nstart = args->thread_id * chunksize;
if (nstart >= args->nin)
return NULL;
int nend = nstart + chunksize;
if (nend > args->nin)
nend = args->nin;
if (chunksize > args->chunksize)
chunksize = args->chunksize;
double* sendbuf = GPAW_MALLOC(double, bc->maxsend * args->chunksize);
double* recvbuf = GPAW_MALLOC(double, bc->maxrecv * args->chunksize);
double* buf = GPAW_MALLOC(double, args->ng2 * args->chunksize);
for (int n = nstart; n < nend; n += chunksize)
{
if (n + chunksize >= nend && chunksize > 1)
chunksize = nend - n;
const double* in = args->in + n * args->ng;
double* out = args->out + n * args->ng;
for (int i = 0; i < 3; i++)
{
bc_unpack1(bc, in, buf, i,
recvreq, sendreq,
recvbuf, sendbuf, args->ph + 2 * i,
args->thread_id, chunksize);
bc_unpack2(bc, buf, i, recvreq, sendreq, recvbuf, chunksize);
}
for (int m = 0; m < chunksize; m++)
if (args->real)
bmgs_fd(&args->self->stencil, buf + m * args->ng2, out + m * args->ng);
else
bmgs_fdz(&args->self->stencil, (const double_complex*) (buf + m * args->ng2),
(double_complex*) (out + m * args->ng));
}
free(buf);
free(recvbuf);
free(sendbuf);
return NULL;
}
//Async worker
void *apply_worker_cfd_async(void *threadarg)
{
struct apply_args *args = (struct apply_args *) threadarg;
boundary_conditions* bc = args->self->bc;
MPI_Request recvreq[2 * GPAW_ASYNC3];
MPI_Request sendreq[2 * GPAW_ASYNC3];
int chunksize = args->nin / args->nthds;
if (!chunksize)
chunksize = 1;
int nstart = args->thread_id * chunksize;
if (nstart >= args->nin)
return NULL;
int nend = nstart + chunksize;
if (nend > args->nin)
nend = args->nin;
if (chunksize > args->chunksize)
chunksize = args->chunksize;
double* sendbuf = GPAW_MALLOC(double, bc->maxsend * GPAW_ASYNC3 *
args->chunksize);
double* recvbuf = GPAW_MALLOC(double, bc->maxrecv * GPAW_ASYNC3 *
args->chunksize);
double* buf = GPAW_MALLOC(double, args->ng2 * args->chunksize);
for (int n = nstart; n < nend; n += chunksize)
{
if (n + chunksize >= nend && chunksize > 1)
chunksize = nend - n;
const double* in = args->in + n * args->ng;
double* out = args->out + n * args->ng;
for (int i = 0; i < 3; i++)
{
bc_unpack1(bc, in, buf, i,
recvreq + i * 2, sendreq + i * 2,
recvbuf + i * bc->maxrecv * chunksize,
sendbuf + i * bc->maxsend * chunksize, args->ph + 2 * i,
args->thread_id, chunksize);
}
for (int i = 0; i < 3; i++)
{
bc_unpack2(bc, buf, i,
recvreq + i * 2, sendreq + i * 2,
recvbuf + i * bc->maxrecv * chunksize, chunksize);
}
for (int m = 0; m < chunksize; m++)
if (args->real)
bmgs_fd(&args->self->stencil, buf + m * args->ng2, out + m * args->ng);
else
bmgs_fdz(&args->self->stencil, (const double_complex*) (buf + m * args->ng2),
(double_complex*) (out + m * args->ng));
}
free(buf);
free(recvbuf);
free(sendbuf);
return NULL;
}
//Double buffering async worker
void *apply_worker_cfd(void *threadarg)
{
struct apply_args *args = (struct apply_args *) threadarg;
boundary_conditions* bc = args->self->bc;
MPI_Request recvreq[2 * GPAW_ASYNC3 * GPAW_ASYNC2];
MPI_Request sendreq[2 * GPAW_ASYNC3 * GPAW_ASYNC2];
int chunksize = args->nin / args->nthds;
if (!chunksize)
chunksize = 1;
int nstart = args->thread_id * chunksize;
if (nstart >= args->nin)
return NULL;
int nend = nstart + chunksize;
if (nend > args->nin)
nend = args->nin;
if (chunksize > args->chunksize)
chunksize = args->chunksize;
int chunk = args->chunkinc;
if (chunk > chunksize)
chunk = chunksize;
double* sendbuf = GPAW_MALLOC(double, bc->maxsend * args->chunksize
* GPAW_ASYNC3 * GPAW_ASYNC2);
double* recvbuf = GPAW_MALLOC(double, bc->maxrecv * args->chunksize
* GPAW_ASYNC3 * GPAW_ASYNC2);
double* buf = GPAW_MALLOC(double, args->ng2 * args->chunksize * GPAW_ASYNC2);
int odd = 0;
const double* in = args->in + nstart * args->ng;
double* out;
for (int i = 0; i < 3; i++)
bc_unpack1(bc, in, buf + odd * args->ng2 * chunksize, i,
recvreq + odd * 2 + i * 4, sendreq + odd * 2 + i * 4,
recvbuf + odd * bc->maxrecv * chunksize + i * bc->maxrecv * chunksize * GPAW_ASYNC2,
sendbuf + odd * bc->maxsend * chunksize + i * bc->maxsend * chunksize * GPAW_ASYNC2, args->ph + 2 * i,
args->thread_id, chunk);
odd = odd ^ 1;
int last_chunk = chunk;
for (int n = nstart+chunk; n < nend; n += chunk)
{
last_chunk += args->chunkinc;
if (last_chunk > chunksize)
last_chunk = chunksize;
if (n + last_chunk >= nend && last_chunk > 1)
last_chunk = nend - n;
in = args->in + n * args->ng;
out = args->out + (n-chunk) * args->ng;
for (int i = 0; i < 3; i++)
{
bc_unpack1(bc, in, buf + odd * args->ng2 * chunksize, i,
recvreq + odd * 2 + i * 4, sendreq + odd * 2 + i * 4,
recvbuf + odd * bc->maxrecv * chunksize + i * bc->maxrecv * chunksize * GPAW_ASYNC2,
sendbuf + odd * bc->maxsend * chunksize + i * bc->maxsend * chunksize * GPAW_ASYNC2, args->ph + 2 * i,
args->thread_id, last_chunk);
}
odd = odd ^ 1;
for (int i = 0; i < 3; i++)
{
bc_unpack2(bc, buf + odd * args->ng2 * chunksize, i,
recvreq + odd * 2 + i * 4, sendreq + odd * 2 + i * 4,
recvbuf + odd * bc->maxrecv * chunksize + i * bc->maxrecv * chunksize * GPAW_ASYNC2, chunk);
}
for (int m = 0; m < chunk; m++)
if (args->real)
bmgs_fd(&args->self->stencil, buf + m * args->ng2 + odd * args->ng2 * chunksize,
out + m * args->ng);
else
bmgs_fdz(&args->self->stencil, (const double_complex*) (buf + m * args->ng2 + odd * args->ng2 * chunksize),
(double_complex*) (out + m * args->ng));
chunk = last_chunk;
}
odd = odd ^ 1;
out = args->out + (nend-last_chunk) * args->ng;
for (int i = 0; i < 3; i++)
{
bc_unpack2(bc, buf + odd * args->ng2 * chunksize, i,
recvreq + odd * 2 + i * 4, sendreq + odd * 2 + i * 4,
recvbuf + odd * bc->maxrecv * chunksize + i * bc->maxrecv * chunksize * GPAW_ASYNC2, last_chunk);
}
for (int m = 0; m < last_chunk; m++)
if (args->real)
bmgs_fd(&args->self->stencil, buf + m * args->ng2 + odd * args->ng2 * chunksize,
out + m * args->ng);
else
bmgs_fdz(&args->self->stencil, (const double_complex*) (buf + m * args->ng2 + odd * args->ng2 * chunksize),
(double_complex*) (out + m * args->ng));
free(buf);
free(recvbuf);
free(sendbuf);
return NULL;
}
static PyObject * Operator_apply(OperatorObject *self,
PyObject *args)
{
PyArrayObject* input;
PyArrayObject* output;
PyArrayObject* phases = 0;
if (!PyArg_ParseTuple(args, "OO|O", &input, &output, &phases))
return NULL;
int nin = 1;
if (PyArray_NDIM(input) == 4)
nin = PyArray_DIMS(input)[0];
boundary_conditions* bc = self->bc;
const int* size1 = bc->size1;
const int* size2 = bc->size2;
int ng = bc->ndouble * size1[0] * size1[1] * size1[2];
int ng2 = bc->ndouble * size2[0] * size2[1] * size2[2];
const double* in = DOUBLEP(input);
double* out = DOUBLEP(output);
const double_complex* ph;
bool real = (PyArray_DESCR(input)->type_num == NPY_DOUBLE);
if (real)
ph = 0;
else
ph = COMPLEXP(phases);
int chunksize = 1;
if (getenv("GPAW_CHUNK_SIZE") != NULL)
chunksize = atoi(getenv("GPAW_CHUNK_SIZE"));
int chunkinc = chunksize;
if (getenv("GPAW_CHUNK_INC") != NULL)
chunkinc = atoi(getenv("GPAW_CHUNK_INC"));
int nthds = 1;
#ifdef GPAW_OMP
if (getenv("OMP_NUM_THREADS") != NULL)
nthds = atoi(getenv("OMP_NUM_THREADS"));
#endif
struct apply_args *wargs = GPAW_MALLOC(struct apply_args, nthds);
pthread_t *thds = GPAW_MALLOC(pthread_t, nthds);
for(int i=0; i < nthds; i++)
{
(wargs+i)->thread_id = i;
(wargs+i)->nthds = nthds;
(wargs+i)->chunksize = chunksize;
(wargs+i)->chunkinc = chunkinc;
(wargs+i)->self = self;
(wargs+i)->ng = ng;
(wargs+i)->ng2 = ng2;
(wargs+i)->nin = nin;
(wargs+i)->in = in;
(wargs+i)->out = out;
(wargs+i)->real = real;
(wargs+i)->ph = ph;
}
#ifndef GPAW_ASYNC
if (1)
#else
if (bc->cfd == 0)
#endif
{
#ifdef GPAW_OMP
for(int i=1; i < nthds; i++)
pthread_create(thds + i, NULL, apply_worker, (void*) (wargs+i));
#endif
apply_worker(wargs);
}
else
{
#ifdef GPAW_OMP
for(int i=1; i < nthds; i++)
pthread_create(thds + i, NULL, apply_worker_cfd, (void*) (wargs+i));
#endif
apply_worker_cfd(wargs);
}
#ifdef GPAW_OMP
for(int i=1; i < nthds; i++)
pthread_join(*(thds+i), NULL);
#endif
free(wargs);
free(thds);
Py_RETURN_NONE;
}
static PyObject * Operator_get_diagonal_element(OperatorObject *self,
PyObject *args)
{
if (!PyArg_ParseTuple(args, ""))
return NULL;
const bmgsstencil* s = &self->stencil;
double d = 0.0;
for (int n = 0; n < s->ncoefs; n++)
if (s->offsets[n] == 0)
d = s->coefs[n];
return Py_BuildValue("d", d);
}
static PyObject * Operator_get_async_sizes(OperatorObject *self, PyObject *args)
{
if (!PyArg_ParseTuple(args, ""))
return NULL;
#ifdef GPAW_ASYNC
return Py_BuildValue("(iii)", 1, GPAW_ASYNC2, GPAW_ASYNC3);
#else
return Py_BuildValue("(iii)", 0, GPAW_ASYNC2, GPAW_ASYNC3);
#endif
}
static PyMethodDef Operator_Methods[] = {
{"apply",
(PyCFunction)Operator_apply, METH_VARARGS, NULL},
{"relax",
(PyCFunction)Operator_relax, METH_VARARGS, NULL},
{"get_diagonal_element",
(PyCFunction)Operator_get_diagonal_element, METH_VARARGS, NULL},
{"get_async_sizes",
(PyCFunction)Operator_get_async_sizes, METH_VARARGS, NULL},
{NULL, NULL, 0, NULL}
};
PyTypeObject OperatorType = {
PyVarObject_HEAD_INIT(NULL, 0)
"Operator",
sizeof(OperatorObject),
0,
(destructor)Operator_dealloc,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,
"FD-operator object",
0, 0, 0, 0, 0, 0,
Operator_Methods
};
PyObject * NewOperatorObject(PyObject *obj, PyObject *args)
{
PyArrayObject* coefs;
PyArrayObject* offsets;
PyArrayObject* size;
int range;
PyArrayObject* neighbors;
int real;
PyObject* comm_obj;
int cfd;
if (!PyArg_ParseTuple(args, "OOOiOiOi",
&coefs, &offsets, &size, &range, &neighbors,
&real, &comm_obj, &cfd))
return NULL;
OperatorObject *self = PyObject_NEW(OperatorObject, &OperatorType);
if (self == NULL)
return NULL;
self->stencil = bmgs_stencil(PyArray_DIMS(coefs)[0], DOUBLEP(coefs),
LONGP(offsets), range, LONGP(size));
const long (*nb)[2] = (const long (*)[2])LONGP(neighbors);
const long padding[3][2] = {{range, range},
{range, range},
{range, range}};
MPI_Comm comm = MPI_COMM_NULL;
if (comm_obj != Py_None)
comm = ((MPIObject*)comm_obj)->comm;
self->bc = bc_init(LONGP(size), padding, padding, nb, comm, real, cfd);
return (PyObject*)self;
}
gpaw-1.3.0-82cebebc037510d876f90d9f8d533fd021f751f5/c/plane_wave.c 0000664 0000000 0000000 00000002037 13164413722 0022635 0 ustar 00root root 0000000 0000000 #include "extensions.h"
#include
PyObject *plane_wave_grid(PyObject *self, PyObject *args)
{
PyArrayObject* beg_c;
PyArrayObject* end_c;
PyArrayObject* h_c;
PyArrayObject* k_c;
PyArrayObject* r0_c;
PyArrayObject* pw_g;
if (!PyArg_ParseTuple(args, "OOOOOO", &beg_c, &end_c, &h_c,
&k_c, &r0_c, &pw_g))
return NULL;
long *beg = LONGP(beg_c);
long *end = LONGP(end_c);
double *h = DOUBLEP(h_c);
double *vk = DOUBLEP(k_c);
double *vr0 = DOUBLEP(r0_c);
double_complex *pw = COMPLEXP(pw_g);
double kr[3], kr0[3];
int n[3], ij;
for (int c = 0; c < 3; c++) {
n[c] = end[c] - beg[c];
kr0[c] = vk[c] * vr0[c];
}
for (int i = 0; i < n[0]; i++) {
kr[0] = vk[0] * h[0] * (beg[0] + i) - kr0[0];
for (int j = 0; j < n[1]; j++) {
kr[1] = kr[0] + vk[1] * h[1] * (beg[1] + j) - kr0[1];
ij = (i*n[1] + j)*n[2];
for (int k = 0; k < n[2]; k++) {
kr[2] = kr[1] + vk[2] * h[2] * (beg[2] + k) - kr0[2];
pw[ij + k] = cos(kr[2]) + I * sin(kr[2]);
}
}
}
Py_RETURN_NONE;
}
gpaw-1.3.0-82cebebc037510d876f90d9f8d533fd021f751f5/c/plt.c 0000664 0000000 0000000 00000006014 13164413722 0021312 0 ustar 00root root 0000000 0000000 /* Copyright (C) 2003-2007 CAMP
* Copyright (C) 2007-2008 CAMd
* Please see the accompanying LICENSE file for further information. */
#include
#define PY_ARRAY_UNIQUE_SYMBOL GPAW_ARRAY_API
#define NO_IMPORT_ARRAY
#include
#include "extensions.h"
int write_plt_file(char *fname,
int nx, int ny, int nz,
double x0, double y0, double z0,
double dx, double dy, double dz,
double *grid);
/* write grid to binary plt (gOpenMol) plot file */
PyObject* WritePLT(PyObject *self, PyObject *args)
{
char* fname; /* file name */
PyArrayObject* ho; /* grid spacings */
PyArrayObject* go; /* grid to write */
if (!PyArg_ParseTuple(args, "sOO", &fname, &ho, &go))
return NULL;
/* must be 3D */
if(PyArray_NDIM(go) != 3) return NULL;
double* g = DOUBLEP(go);
double* h = DOUBLEP(ho);
write_plt_file(fname,
PyArray_DIM(go, 0),
PyArray_DIM(go, 1),
PyArray_DIM(go, 2),
0.,0.,0.,
h[0],h[1],h[2],
g);
Py_RETURN_NONE;
}
/* -----------------------------------------------------------------
* write grid to binary plt (gOpenMol) plot file
*
* x0, dx etc are assumed to be atomic units
* the grid is assumed to be in the format:
* grid(ix,iy,iz) = grid[ ix + ( iy + iz*ny )*nx ];
* where ix=0..nx-1 etc
*/
/* stolen from pltfile.c */
#define FWRITE(value , size) { \
Items = fwrite(&value, size , 1L , Output_p);\
if(Items < 1) {\
printf("?ERROR - in writing contour file (*)\n");\
return(1);}}
int write_plt_file(char *fname,
int nx, int ny, int nz,
double x0, double y0, double z0,
double dx, double dy, double dz,
double *grid) {
FILE *Output_p;
static int Items;
float scale,zmin,zmax,ymin,ymax,xmin,xmax,val;
int rank,TypeOfSurface;
int ix,iy,iz,indx;
double norm,sum,dV;
Output_p = fopen(fname,"wb");
/* see http://www.csc.fi/gopenmol/developers/plt_format.phtml */
#define au_A 0.52917725
scale = au_A; /* atomic length in Angstroem */
rank=3; /* always 3 */
FWRITE(rank , sizeof(int));
TypeOfSurface=4; /* arbitrary */
FWRITE(TypeOfSurface , sizeof(int));
FWRITE(nz , sizeof(int));
FWRITE(ny , sizeof(int));
FWRITE(nx , sizeof(int));
zmin= scale * ((float) z0);
zmax= scale * ((float) z0+(nz-1)*dz);
/* float zmax=(float) z0+nz*dz; */
FWRITE(zmin , sizeof(float));
FWRITE(zmax , sizeof(float));
ymin= scale * ((float) y0);
ymax= scale * ((float) y0+(ny-1)*dy);
/* float ymax=(float) y0+ny*dy; */
FWRITE(ymin , sizeof(float));
FWRITE(ymax , sizeof(float));
xmin= scale * ((float) x0);
xmax= scale * ((float) x0+(nx-1)*dx);
/* float xmax=(float) x0+nx*dx; */
FWRITE(xmin , sizeof(float));
FWRITE(xmax , sizeof(float));
indx=0;
norm = 0;
sum=0;
dV=dx*dy*dz;
for(iz=0;iz %s written (sum=%g,norm=%g)\n",
fname,sum*dV,norm*dV);
return 0;
}
gpaw-1.3.0-82cebebc037510d876f90d9f8d533fd021f751f5/c/point_charges.c 0000664 0000000 0000000 00000013040 13164413722 0023335 0 ustar 00root root 0000000 0000000 #include "extensions.h"
//#include
PyObject *pc_potential(PyObject *self, PyObject *args)
{
PyArrayObject* beg_v_obj;
PyArrayObject* h_v_obj;
PyArrayObject* q_p_obj;
PyArrayObject* R_pv_obj;
double rc;
double rc2;
double width;
PyArrayObject* vext_G_obj;
PyArrayObject* dcom_pv_obj;
PyArrayObject* rhot_G_obj = 0;
PyArrayObject* F_pv_obj = 0;
if (!PyArg_ParseTuple(args, "OOOOdddOO|OO", &beg_v_obj, &h_v_obj, &q_p_obj,
&R_pv_obj, &rc, &rc2, &width,
&vext_G_obj, &dcom_pv_obj, &rhot_G_obj, &F_pv_obj))
return NULL;
const long *beg_v = PyArray_DATA(beg_v_obj);
const double *h_v = PyArray_DATA(h_v_obj);
const double *q_p = PyArray_DATA(q_p_obj);
const double *R_pv = PyArray_DATA(R_pv_obj);
const double *dcom_pv = 0;
if ((PyObject*)dcom_pv_obj != Py_None)
dcom_pv = PyArray_DATA(dcom_pv_obj);
double *vext_G = PyArray_DATA(vext_G_obj);
int np = PyArray_DIM(R_pv_obj, 0);
npy_intp* n = PyArray_DIMS(vext_G_obj);
const double* rhot_G = 0;
double* F_pv = 0;
double dV = 0.0;
if (F_pv_obj != 0) {
// Handle the two extra arguments for the force calculation:
rhot_G = PyArray_DATA(rhot_G_obj);
F_pv = PyArray_DATA(F_pv_obj);
dV = h_v[0] * h_v[1] * h_v[2];
}
double rc12 = rc2 - width;
for (int i = 0; i < n[0]; i++) {
double x = (beg_v[0] + i) * h_v[0];
for (int j = 0; j < n[1]; j++) {
double y = (beg_v[1] + j) * h_v[1];
int ij = (i * n[1] + j) * n[2];
for (int k = 0; k < n[2]; k++) {
double z = (beg_v[2] + k) * h_v[2];
for (int p = 0; p < np; p++) {
const double* R_v = R_pv + 3 * p;
double dx = R_v[0] - x;
double dy = R_v[1] - y;
double dz = R_v[2] - z;
double d = sqrt(dx * dx + dy * dy + dz * dz);
double dc, dxc, dyc, dzc;
if (dcom_pv == 0) {
dc = d;
dxc = dx;
dyc = dy;
dzc = dz;
} else {
const double* dcom_v = dcom_pv + 3 * p;
dxc = dcom_v[0];
dyc = dcom_v[1];
dzc = dcom_v[2];
dc = sqrt(dxc * dxc + dyc * dyc + dzc * dzc);
}
int G = ij + k;
if (F_pv == 0) {
// Calculate potential:
double v;
if (rc < 0.0)
v = (q_p[p] * (d * d * d * d - rc * rc * rc * rc) /
(d * d * d * d * d + rc * rc * rc * rc * rc));
else
if (dc > rc2)
v = 0.0;
else if (dc > rc12) {
double x = (dc - rc12) / width;
v = q_p[p] * (1 - x * x * (3 - 2 * x)) / d;
}
else if (d > rc)
v = q_p[p] / d;
else {
double s = d / rc;
double s2 = s * s;
v = q_p[p] * (3.28125 +
s2 * (-5.46875 +
s2 * (4.59375 +
s2 * -1.40625))) / rc;
}
vext_G[G] -= v;
}
else {
// Calculate forces:
double w; // -(dv/dr)/r
double o = 0.0;
if (rc < 0.0) {
double x = (d * d * d * d * d +
rc * rc * rc * rc * rc);
w = ((d * d * d * d - rc * rc * rc * rc) /
(x * x) * 5 * d * d * d -
4 * d * d / x);
}
else
if (dc > rc2)
w = 0.0;
else if (dc > rc12) {
double x = (dc - rc12) / width;
w = (1 - x * x * (3 - 2 * x)) / (d * d * d);
o = 6 * x * (1 - x) / (width * dc * d);
}
else if (d > rc)
w = 1 / (d * d * d);
else {
double s = d / rc;
double s2 = s * s;
w = (-2 * (-5.46875 +
s2 * (2 * 4.59375 +
s2 * 3 * -1.40625)) /
(rc * rc * rc));
}
w *= q_p[p] * rhot_G[G] * dV;
o *= q_p[p] * rhot_G[G] * dV;
double* F_v = F_pv + 3 * p;
F_v[0] -= w * dx + o * dxc;
F_v[1] -= w * dy + o * dyc;
F_v[2] -= w * dz + o * dzc;
}
}
}
}
}
Py_RETURN_NONE;
}
gpaw-1.3.0-82cebebc037510d876f90d9f8d533fd021f751f5/c/spline.c 0000664 0000000 0000000 00000007511 13164413722 0022010 0 ustar 00root root 0000000 0000000 /* Copyright (C) 2003-2007 CAMP
* Copyright (C) 2007-2008 CAMd
* Please see the accompanying LICENSE file for further information. */
#include "spline.h"
static void spline_dealloc(SplineObject *xp)
{
bmgs_deletespline(&xp->spline);
PyObject_DEL(xp);
}
static PyObject * spline_get_cutoff(SplineObject *self, PyObject *args)
{
return Py_BuildValue("d", self->spline.dr * self->spline.nbins);
}
static PyObject * spline_get_angular_momentum_number(SplineObject *self,
PyObject *args)
{
return Py_BuildValue("i", self->spline.l);
}
static PyObject * spline_get_value_and_derivative(SplineObject *obj,
PyObject *args,
PyObject *kwargs)
{
double r;
if (!PyArg_ParseTuple(args, "d", &r))
return NULL;
double f;
double dfdr;
bmgs_get_value_and_derivative(&obj->spline, r, &f, &dfdr);
return Py_BuildValue("(dd)", f, dfdr);
}
// Convert boundary point z-ranges to grid indices for the 2*l+1 boxes
static PyObject * spline_get_indices_from_zranges(SplineObject *self,
PyObject *args)
{
PyArrayObject* beg_c_obj;
PyArrayObject* end_c_obj;
PyArrayObject* G_b_obj;
int nm = 2 * self->spline.l + 1;
if (!PyArg_ParseTuple(args, "OOO", &beg_c_obj, &end_c_obj, &G_b_obj))
return NULL;
long* beg_c = LONGP(beg_c_obj);
long* end_c = LONGP(end_c_obj);
int ngmax = ((end_c[0] - beg_c[0]) *
(end_c[1] - beg_c[1]) *
(end_c[2] - beg_c[2]));
int* G_B = INTP(G_b_obj);
int nB = PyArray_DIMS(G_b_obj)[0];
int ng = 0;
for (int b = 0; b < nB; b+=2)
ng += G_B[b+1]-G_B[b];
npy_intp gm_dims[2] = {ng, nm};
PyArrayObject* indices_gm_obj = (PyArrayObject*)PyArray_SimpleNew(2, gm_dims,
NPY_INT);
int* p = INTP(indices_gm_obj);
for (int b = 0; b < nB; b += 2) {
int Ga = G_B[b], Gb = G_B[b+1];
for (int G = Ga; G < Gb; G++)
for (int m = 0; m < nm; m++)
*p++ = m * ngmax + G;
}
// PyObjects created in the C code will be initialized with a refcount
// of 1, for which reason we'll have to decref them when done here
PyObject* values = Py_BuildValue("(Oii)", indices_gm_obj, ng, nm);
Py_DECREF(indices_gm_obj);
return values;
}
static PyMethodDef spline_methods[] = {
{"get_cutoff",
(PyCFunction)spline_get_cutoff, METH_VARARGS, 0},
{"get_angular_momentum_number",
(PyCFunction)spline_get_angular_momentum_number, METH_VARARGS, 0},
{"get_value_and_derivative",
(PyCFunction)spline_get_value_and_derivative, METH_VARARGS, 0},
{"get_indices_from_zranges",
(PyCFunction)spline_get_indices_from_zranges, METH_VARARGS, 0},
{NULL, NULL, 0, NULL}
};
static PyObject * spline_call(SplineObject *obj, PyObject *args,
PyObject *kwargs)
{
double r;
if (!PyArg_ParseTuple(args, "d", &r))
return NULL;
return Py_BuildValue("d", bmgs_splinevalue(&obj->spline, r));
}
PyTypeObject SplineType = {
PyVarObject_HEAD_INIT(NULL, 0)
"Spline",
sizeof(SplineObject), 0,
(destructor)spline_dealloc,
0, 0, 0, 0, 0, 0, 0, 0, 0,
(ternaryfunc)spline_call,
0, 0, 0, 0,
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,
"Spline object",
0, 0, 0, 0, 0, 0,
spline_methods
};
PyObject * NewSplineObject(PyObject *self, PyObject *args)
{
int l;
double rcut;
PyArrayObject* farray;
if (!PyArg_ParseTuple(args, "idO", &l, &rcut, &farray))
return NULL;
SplineObject *spline = PyObject_NEW(SplineObject, &SplineType);
if (spline == NULL)
return NULL;
int nbins = PyArray_DIMS(farray)[0] - 1;
double dr = rcut / nbins;
spline->spline = bmgs_spline(l, dr, nbins, DOUBLEP(farray));
return (PyObject*)spline;
}
gpaw-1.3.0-82cebebc037510d876f90d9f8d533fd021f751f5/c/spline.h 0000664 0000000 0000000 00000000343 13164413722 0022011 0 ustar 00root root 0000000 0000000 /* Copyright (C) 2003-2007 CAMP
* Please see the accompanying LICENSE file for further information. */
#include "extensions.h"
#include "bmgs/bmgs.h"
typedef struct
{
PyObject_HEAD
bmgsspline spline;
} SplineObject;
gpaw-1.3.0-82cebebc037510d876f90d9f8d533fd021f751f5/c/symmetry.c 0000664 0000000 0000000 00000017205 13164413722 0022410 0 ustar 00root root 0000000 0000000 /* Copyright (C) 2010-2011 CAMd
* Please see the accompanying LICENSE file for further information. */
#include "extensions.h"
//
// Apply symmetry operation op_cc to a and add result to b:
//
// =T_ _
// b(U g) += a(g),
//
// where:
//
// = _T
// U = op_cc[c1, c2] and g = (g0, g1, g2).
// c1,c2
//
PyObject* symmetrize(PyObject *self, PyObject *args)
{
PyArrayObject* a_g_obj;
PyArrayObject* b_g_obj;
PyArrayObject* op_cc_obj;
if (!PyArg_ParseTuple(args, "OOO", &a_g_obj, &b_g_obj, &op_cc_obj))
return NULL;
const long* C = (const long*)PyArray_DATA(op_cc_obj);
int ng0 = PyArray_DIMS(a_g_obj)[0];
int ng1 = PyArray_DIMS(a_g_obj)[1];
int ng2 = PyArray_DIMS(a_g_obj)[2];
const double* a_g = (const double*)PyArray_DATA(a_g_obj);
double* b_g = (double*)PyArray_DATA(b_g_obj);
#pragma omp simd
for (int g0 = 0; g0 < ng0; g0++)
for (int g1 = 0; g1 < ng1; g1++)
for (int g2 = 0; g2 < ng2; g2++) {
int p0 = ((C[0] * g0 + C[3] * g1 + C[6] * g2) % ng0 + ng0) % ng0;
int p1 = ((C[1] * g0 + C[4] * g1 + C[7] * g2) % ng1 + ng1) % ng1;
int p2 = ((C[2] * g0 + C[5] * g1 + C[8] * g2) % ng2 + ng2) % ng2;
b_g[(p0 * ng1 + p1) * ng2 + p2] += a_g[(g0 * ng1 + g1) * ng2 + g2];
}
Py_RETURN_NONE;
}
PyObject* symmetrize_ft(PyObject *self, PyObject *args)
{
PyArrayObject* a_g_obj;
PyArrayObject* b_g_obj;
PyArrayObject* op_cc_obj;
PyArrayObject* ft_c_obj;
if (!PyArg_ParseTuple(args, "OOOO", &a_g_obj, &b_g_obj, &op_cc_obj, &ft_c_obj))
return NULL;
const double* ft = (const double*)PyArray_DATA(ft_c_obj);
const long* C = (const long*)PyArray_DATA(op_cc_obj);
int ng0 = PyArray_DIMS(a_g_obj)[0];
int ng1 = PyArray_DIMS(a_g_obj)[1];
int ng2 = PyArray_DIMS(a_g_obj)[2];
int ft0 = (int)(ft[0]*ng0);
int ft1 = (int)(ft[1]*ng1);
int ft2 = (int)(ft[2]*ng2);
const double* a_g = (const double*)PyArray_DATA(a_g_obj);
double* b_g = (double*)PyArray_DATA(b_g_obj);
for (int g0 = 0; g0 < ng0; g0++)
for (int g1 = 0; g1 < ng1; g1++)
for (int g2 = 0; g2 < ng2; g2++) {
int p0 = ((C[0] * g0 + C[3] * g1 + C[6] * g2 - ft0) % ng0 + ng0) % ng0;
int p1 = ((C[1] * g0 + C[4] * g1 + C[7] * g2 - ft1) % ng1 + ng1) % ng1;
int p2 = ((C[2] * g0 + C[5] * g1 + C[8] * g2 - ft2) % ng2 + ng2) % ng2;
b_g[(p0 * ng1 + p1) * ng2 + p2] += *a_g++;
}
Py_RETURN_NONE;
}
PyObject* symmetrize_wavefunction(PyObject *self, PyObject *args)
{
PyArrayObject* a_g_obj;
PyArrayObject* b_g_obj;
PyArrayObject* op_cc_obj;
PyArrayObject* kpt0_obj;
PyArrayObject* kpt1_obj;
if (!PyArg_ParseTuple(args, "OOOOO", &a_g_obj, &b_g_obj, &op_cc_obj, &kpt0_obj, &kpt1_obj))
return NULL;
const long* C = (const long*)PyArray_DATA(op_cc_obj);
const double* kpt0 = (const double*) PyArray_DATA(kpt0_obj);
const double* kpt1 = (const double*) PyArray_DATA(kpt1_obj);
int ng0 = PyArray_DIMS(a_g_obj)[0];
int ng1 = PyArray_DIMS(a_g_obj)[1];
int ng2 = PyArray_DIMS(a_g_obj)[2];
const double complex* a_g = (const double complex*)PyArray_DATA(a_g_obj);
double complex* b_g = (double complex*)PyArray_DATA(b_g_obj);
for (int g0 = 0; g0 < ng0; g0++)
for (int g1 = 0; g1 < ng1; g1++)
for (int g2 = 0; g2 < ng2; g2++) {
int p0 = ((C[0] * g0 + C[3] * g1 + C[6] * g2) % ng0 + ng0) % ng0;
int p1 = ((C[1] * g0 + C[4] * g1 + C[7] * g2) % ng1 + ng1) % ng1;
int p2 = ((C[2] * g0 + C[5] * g1 + C[8] * g2) % ng2 + ng2) % ng2;
double complex phase = cexp(I * 2. * M_PI *
(kpt1[0]/ng0*p0 +
kpt1[1]/ng1*p1 +
kpt1[2]/ng2*p2 -
kpt0[0]/ng0*g0 -
kpt0[1]/ng1*g1 -
kpt0[2]/ng2*g2));
b_g[(p0 * ng1 + p1) * ng2 + p2] += (*a_g * phase);
a_g++;
}
Py_RETURN_NONE;
}
PyObject* symmetrize_return_index(PyObject *self, PyObject *args)
{
PyArrayObject* a_g_obj;
PyArrayObject* b_g_obj;
PyArrayObject* op_cc_obj;
PyArrayObject* kpt0_obj;
PyArrayObject* kpt1_obj;
if (!PyArg_ParseTuple(args, "OOOOO", &a_g_obj, &b_g_obj, &op_cc_obj, &kpt0_obj, &kpt1_obj))
return NULL;
const long* C = (const long*)PyArray_DATA(op_cc_obj);
const double* kpt0 = (const double*) PyArray_DATA(kpt0_obj);
const double* kpt1 = (const double*) PyArray_DATA(kpt1_obj);
int ng0 = PyArray_DIMS(a_g_obj)[0];
int ng1 = PyArray_DIMS(a_g_obj)[1];
int ng2 = PyArray_DIMS(a_g_obj)[2];
unsigned long* a_g = (unsigned long*)PyArray_DATA(a_g_obj);
double complex* b_g = (double complex*)PyArray_DATA(b_g_obj);
for (int g0 = 0; g0 < ng0; g0++)
for (int g1 = 0; g1 < ng1; g1++)
for (int g2 = 0; g2 < ng2; g2++) {
int p0 = ((C[0] * g0 + C[3] * g1 + C[6] * g2) % ng0 + ng0) % ng0;
int p1 = ((C[1] * g0 + C[4] * g1 + C[7] * g2) % ng1 + ng1) % ng1;
int p2 = ((C[2] * g0 + C[5] * g1 + C[8] * g2) % ng2 + ng2) % ng2;
double complex phase = cexp(I * 2. * M_PI *
(kpt1[0]/ng0*p0 +
kpt1[1]/ng1*p1 +
kpt1[2]/ng2*p2 -
kpt0[0]/ng0*g0 -
kpt0[1]/ng1*g1 -
kpt0[2]/ng2*g2));
*a_g++ = (p0 * ng1 + p1) * ng2 + p2;
*b_g++ = phase;
}
Py_RETURN_NONE;
}
PyObject* symmetrize_with_index(PyObject *self, PyObject *args)
{
PyArrayObject* a_g_obj;
PyArrayObject* b_g_obj;
PyArrayObject* index_g_obj;
PyArrayObject* phase_g_obj;
if (!PyArg_ParseTuple(args, "OOOO", &a_g_obj, &b_g_obj, &index_g_obj, &phase_g_obj))
return NULL;
int ng0 = PyArray_DIMS(a_g_obj)[0];
int ng1 = PyArray_DIMS(a_g_obj)[1];
int ng2 = PyArray_DIMS(a_g_obj)[2];
const unsigned long* index_g = (const unsigned long*)PyArray_DATA(index_g_obj);
const double complex* phase_g = (const double complex*)PyArray_DATA(phase_g_obj);
const double complex* a_g = (const double complex*)PyArray_DATA(a_g_obj);
double complex* b_g = (double complex*)PyArray_DATA(b_g_obj);
for (int g0 = 0; g0 < ng0; g0++)
for (int g1 = 0; g1 < ng1; g1++)
for (int g2 = 0; g2 < ng2; g2++) {
b_g[*index_g] += (*a_g * *phase_g);
a_g++;
phase_g++;
index_g++;
}
Py_RETURN_NONE;
}
PyObject* map_k_points(PyObject *self, PyObject *args)
{
PyArrayObject* bzk_kc_obj;
PyArrayObject* U_scc_obj;
double tol;
PyArrayObject* bz2bz_ks_obj;
int ka, kb;
if (!PyArg_ParseTuple(args, "OOdOii", &bzk_kc_obj, &U_scc_obj,
&tol, &bz2bz_ks_obj, &ka, &kb))
return NULL;
const long* U_scc = (const long*)PyArray_DATA(U_scc_obj);
const double* bzk_kc = (const double*)PyArray_DATA(bzk_kc_obj);
long* bz2bz_ks = (long*)PyArray_DATA(bz2bz_ks_obj);
int nbzkpts = PyArray_DIMS(bzk_kc_obj)[0];
int nsym = PyArray_DIMS(U_scc_obj)[0];
for (int k1 = ka; k1 < kb; k1++) {
const double* q = bzk_kc + k1 * 3;
for (int s = 0; s < nsym; s++) {
const long* U = U_scc + s * 9;
double q0 = U[0] * q[0] + U[1] * q[1] + U[2] * q[2];
double q1 = U[3] * q[0] + U[4] * q[1] + U[5] * q[2];
double q2 = U[6] * q[0] + U[7] * q[1] + U[8] * q[2];
for (int k2 = 0; k2 < nbzkpts; k2++) {
double p0 = q0 - bzk_kc[k2 * 3];
if (fabs(p0 - round(p0)) > tol)
continue;
double p1 = q1 - bzk_kc[k2 * 3 + 1];
if (fabs(p1 - round(p1)) > tol)
continue;
double p2 = q2 - bzk_kc[k2 * 3 + 2];
if (fabs(p2 - round(p2)) > tol)
continue;
bz2bz_ks[k1 * nsym + s] = k2;
break;
}
}
}
Py_RETURN_NONE;
}
gpaw-1.3.0-82cebebc037510d876f90d9f8d533fd021f751f5/c/tetra.c 0000664 0000000 0000000 00000007412 13164413722 0021635 0 ustar 00root root 0000000 0000000 #include
#define PY_ARRAY_UNIQUE_SYMBOL GPAW_ARRAY_API
#define NO_IMPORT_ARRAY
#include
#include "extensions.h"
int compare_doubles (const void *a, const void *b)
{
const double *da = (const double *) a;
const double *db = (const double *) b;
return (*da > *db) - (*da < *db);
}
PyObject* tetrahedron_weight(PyObject *self, PyObject *args)
{
PyArrayObject* epsilon_k;
int K;
PyArrayObject* allsimplices_sk;
PyArrayObject* simplices_s;
PyArrayObject* Win_w;
PyArrayObject* omega_w;
PyArrayObject* vol_s;
double f10, f20, f21, f30, f31, f32;
double f01, f02, f12, f03, f13, f23;
double omega;
if (!PyArg_ParseTuple(args, "OOiOOOO",
&epsilon_k, &allsimplices_sk, &K,
&simplices_s, &Win_w, &omega_w,
&vol_s))
return NULL;
int nsimplex = PyArray_DIMS(simplices_s)[0];
int nw = PyArray_DIMS(omega_w)[0];
double* e_k = (double*)PyArray_DATA(epsilon_k);
double* o_w = (double*)PyArray_DATA(omega_w);
double* W_w = (double*)PyArray_DATA(Win_w);
long* s_s = (long*)PyArray_DATA(simplices_s);
int* alls_sk = (int*)PyArray_DATA(allsimplices_sk);
double* v_s = (double*)PyArray_DATA(vol_s);
double* et_k = GPAW_MALLOC(double, 4);
double gw = 0;
double Iw = 0;
double delta = 0;
int relk = 0;
double ek = 0;
for (int s = 0; s < nsimplex; s++) {
relk = 0;
for (int k = 0; k < 4; k++) {
et_k[k] = e_k[alls_sk[s_s[s] * 4 + k]];
}
ek = e_k[K];
for (int k = 0; k < 4; k++) {
if (et_k[k] < ek) {
relk += 1;
}
}
qsort(et_k, 4, sizeof (double), compare_doubles);
delta = et_k[3] - et_k[0];
for (int w = 0; w < nw; w++) {
Iw = 0;
gw = 0;
omega = o_w[w];
f10 = (omega - et_k[0]) / (et_k[1] - et_k[0]);
f20 = (omega - et_k[0]) / (et_k[2] - et_k[0]);
f21 = (omega - et_k[1]) / (et_k[2] - et_k[1]);
f30 = (omega - et_k[0]) / (et_k[3] - et_k[0]);
f31 = (omega - et_k[1]) / (et_k[3] - et_k[1]);
f32 = (omega - et_k[2]) / (et_k[3] - et_k[2]);
f01 = 1 - f10;
f02 = 1 - f20;
f03 = 1 - f30;
f12 = 1 - f21;
f13 = 1 - f31;
f23 = 1 - f32;
if (et_k[1] != et_k[0] && et_k[0] <= omega && omega <= et_k[1])
{
gw = 3 * f20 * f30 / (et_k[1] - et_k[0]);
switch (relk) {
case 0:
Iw = (f01 + f02 + f03) / 3;
break;
case 1:
Iw = f10 / 3;
break;
case 2:
Iw = f20 / 3;
break;
case 3:
Iw = f30 / 3;
break;
}
}
else if (et_k[1] != et_k[2] && et_k[1] < omega && omega < et_k[2])
{
gw = 3 / delta * (f12 * f20 + f21 * f13);
switch (relk) {
case 0:
Iw = f03 / 3 + f02 * f20 * f12 / (gw * delta);
break;
case 1:
Iw = f12 / 3 + f13 * f13 * f21 / (gw * delta);
break;
case 2:
Iw = f21 / 3 + f20 * f20 * f12 / (gw * delta);
break;
case 3:
Iw = f30 / 3 + f31 * f13 * f21 / (gw * delta);
break;
}
}
else if (et_k[2] != et_k[3] && et_k[2] <= omega && omega <= et_k[3])
{
gw = 3 * f03 * f13 / (et_k[3] - et_k[2]);
switch (relk) {
case 0:
Iw = f03 / 3;
break;
case 1:
Iw = f13 / 3;
break;
case 2:
Iw = f23 / 3;
break;
case 3:
Iw = (f30 + f31 + f32) / 3;
break;
}
}
else {
continue;
}
W_w[w] += v_s[s] * Iw * gw;
}
}
free(et_k);
Py_RETURN_NONE;
}
gpaw-1.3.0-82cebebc037510d876f90d9f8d533fd021f751f5/c/transformers.c 0000664 0000000 0000000 00000015607 13164413722 0023250 0 ustar 00root root 0000000 0000000 /* Copyright (C) 2003-2007 CAMP
* Copyright (C) 2007-2008 CAMd
* Copyright (C) 2005-2009 CSC - IT Center for Science Ltd.
* Please see the accompanying LICENSE file for further information. */
#include
#include
#define PY_ARRAY_UNIQUE_SYMBOL GPAW_ARRAY_API
#define NO_IMPORT_ARRAY
#include
#include "extensions.h"
#include "bc.h"
#include "mympi.h"
#include "bmgs/bmgs.h"
#ifdef GPAW_ASYNC
#define GPAW_ASYNC_D 3
#else
#define GPAW_ASYNC_D 1
#endif
typedef struct
{
PyObject_HEAD
boundary_conditions* bc;
int p;
int k;
bool interpolate;
MPI_Request recvreq[2];
MPI_Request sendreq[2];
int skip[3][2];
int size_out[3]; /* Size of the output grid */
} TransformerObject;
static void Transformer_dealloc(TransformerObject *self)
{
free(self->bc);
PyObject_DEL(self);
}
struct transapply_args{
int thread_id;
TransformerObject *self;
int ng;
int ng2;
int nin;
int nthds;
const double* in;
double* out;
int real;
const double_complex* ph;
};
void *transapply_worker(void *threadarg)
{
struct transapply_args *args = (struct transapply_args *) threadarg;
boundary_conditions* bc = args->self->bc;
TransformerObject *self = args->self;
double* sendbuf = GPAW_MALLOC(double, bc->maxsend * GPAW_ASYNC_D);
double* recvbuf = GPAW_MALLOC(double, bc->maxrecv * GPAW_ASYNC_D);
double* buf = GPAW_MALLOC(double, args->ng2);
int buf2size = args->ng2;
if (self->interpolate)
buf2size *= 16;
else
buf2size /= 2;
double* buf2 = GPAW_MALLOC(double, buf2size);
MPI_Request recvreq[2 * GPAW_ASYNC_D];
MPI_Request sendreq[2 * GPAW_ASYNC_D];
int chunksize = args->nin / args->nthds;
if (!chunksize)
chunksize = 1;
int nstart = args->thread_id * chunksize;
if (nstart >= args->nin)
return NULL;
int nend = nstart + chunksize;
if (nend > args->nin)
nend = args->nin;
int out_ng = bc->ndouble * self->size_out[0] * self->size_out[1]
* self->size_out[2];
for (int n = nstart; n < nend; n++)
{
const double* in = args->in + n * args->ng;
double* out = args->out + n * out_ng;
for (int i = 0; i < 3; i++)
{
bc_unpack1(bc, in, buf, i,
recvreq, sendreq,
recvbuf, sendbuf, args->ph + 2 * i,
args->thread_id, 1);
bc_unpack2(bc, buf, i,
recvreq, sendreq, recvbuf, 1);
}
if (args->real)
{
if (self->interpolate)
bmgs_interpolate(self->k, self->skip, buf, bc->size2,
out, buf2);
else
bmgs_restrict(self->k, buf, bc->size2,
out, buf2);
}
else
{
if (self->interpolate)
bmgs_interpolatez(self->k, self->skip, (double_complex*)buf,
bc->size2, (double_complex*)out,
(double_complex*) buf2);
else
bmgs_restrictz(self->k, (double_complex*) buf,
bc->size2, (double_complex*)out,
(double_complex*) buf2);
}
}
free(buf2);
free(buf);
free(recvbuf);
free(sendbuf);
return NULL;
}
static PyObject* Transformer_apply(TransformerObject *self, PyObject *args)
{
PyArrayObject* input;
PyArrayObject* output;
PyArrayObject* phases = 0;
if (!PyArg_ParseTuple(args, "OO|O", &input, &output, &phases))
return NULL;
int nin = 1;
if (PyArray_NDIM(input) == 4)
nin = PyArray_DIMS(input)[0];
boundary_conditions* bc = self->bc;
const int* size1 = bc->size1;
const int* size2 = bc->size2;
int ng = bc->ndouble * size1[0] * size1[1] * size1[2];
int ng2 = bc->ndouble * size2[0] * size2[1] * size2[2];
const double* in = DOUBLEP(input);
double* out = DOUBLEP(output);
bool real = (PyArray_DESCR(input)->type_num == NPY_DOUBLE);
const double_complex* ph = (real ? 0 : COMPLEXP(phases));
int nthds = 1;
#ifdef GPAW_OMP
if (getenv("OMP_NUM_THREADS") != NULL)
nthds = atoi(getenv("OMP_NUM_THREADS"));
#endif
struct transapply_args *wargs = GPAW_MALLOC(struct transapply_args, nthds);
pthread_t *thds = GPAW_MALLOC(pthread_t, nthds);
for(int i=0; i < nthds; i++)
{
(wargs+i)->thread_id = i;
(wargs+i)->nthds = nthds;
(wargs+i)->self = self;
(wargs+i)->ng = ng;
(wargs+i)->ng2 = ng2;
(wargs+i)->nin = nin;
(wargs+i)->in = in;
(wargs+i)->out = out;
(wargs+i)->real = real;
(wargs+i)->ph = ph;
}
#ifdef GPAW_OMP
for(int i=1; i < nthds; i++)
pthread_create(thds + i, NULL, transapply_worker, (void*) (wargs+i));
#endif
transapply_worker(wargs);
#ifdef GPAW_OMP
for(int i=1; i < nthds; i++)
pthread_join(*(thds+i), NULL);
#endif
free(wargs);
free(thds);
Py_RETURN_NONE;
}
static PyObject * Transformer_get_async_sizes(TransformerObject *self, PyObject *args)
{
if (!PyArg_ParseTuple(args, ""))
return NULL;
#ifdef GPAW_ASYNC
return Py_BuildValue("(ii)", 1, GPAW_ASYNC_D);
#else
return Py_BuildValue("(ii)", 0, GPAW_ASYNC_D);
#endif
}
static PyMethodDef Transformer_Methods[] = {
{"apply", (PyCFunction)Transformer_apply, METH_VARARGS, NULL},
{"get_async_sizes",
(PyCFunction)Transformer_get_async_sizes, METH_VARARGS, NULL},
{NULL, NULL, 0, NULL}
};
PyTypeObject TransformerType = {
PyVarObject_HEAD_INIT(NULL, 0)
"Transformer",
sizeof(TransformerObject),
0,
(destructor)Transformer_dealloc,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,
"Transformer object",
0, 0, 0, 0, 0, 0,
Transformer_Methods
};
PyObject * NewTransformerObject(PyObject *obj, PyObject *args)
{
PyArrayObject* size_in;
PyArrayObject* size_out;
int k;
PyArrayObject* paddings;
PyArrayObject* npaddings;
PyArrayObject* skip;
PyArrayObject* neighbors;
int real;
PyObject* comm_obj;
int interpolate;
if (!PyArg_ParseTuple(args, "OOiOOOOiOi",
&size_in, &size_out, &k, &paddings, &npaddings, &skip,
&neighbors, &real, &comm_obj,
&interpolate))
return NULL;
TransformerObject* self = PyObject_NEW(TransformerObject, &TransformerType);
if (self == NULL)
return NULL;
self->k = k;
self->interpolate = interpolate;
MPI_Comm comm = MPI_COMM_NULL;
if (comm_obj != Py_None)
comm = ((MPIObject*)comm_obj)->comm;
const long (*nb)[2] = (const long (*)[2])LONGP(neighbors);
const long (*pad)[2] = (const long (*)[2])LONGP(paddings);
const long (*npad)[2] = (const long (*)[2])LONGP(npaddings);
const long (*skp)[2] = (const long (*)[2])LONGP(skip);
self->bc = bc_init(LONGP(size_in), pad, npad, nb, comm, real, 0);
for (int c = 0; c < 3; c++)
self->size_out[c] = LONGP(size_out)[c];
for (int c = 0; c < 3; c++)
for (int d = 0; d < 2; d++)
self->skip[c][d] = (int)skp[c][d];
return (PyObject*)self;
}
gpaw-1.3.0-82cebebc037510d876f90d9f8d533fd021f751f5/c/utilities.c 0000664 0000000 0000000 00000054064 13164413722 0022536 0 ustar 00root root 0000000 0000000 /* Copyright (C) 2003-2007 CAMP
* Copyright (C) 2007-2008 CAMd
* Copyright (C) 2008-2010 CSC - IT Center for Science Ltd.
* Copyright (C) 2011 Argonne National Laboratory
* Please see the accompanying LICENSE file for further information. */
#include
#define PY_ARRAY_UNIQUE_SYMBOL GPAW_ARRAY_API
#define NO_IMPORT_ARRAY
#include
#include "extensions.h"
#include
#include
#ifdef __DARWIN_UNIX03
/* Allows for special MaxOS magic */
#include
#endif
#ifdef __linux__
/* stdlib.h does not define mallinfo (it should!) */
#include
#endif
#ifdef GPAW_HPM
void HPM_Start(char *);
void HPM_Stop(char *);
void summary_start(void);
void summary_stop(void);
PyObject* ibm_hpm_start(PyObject *self, PyObject *args)
{
char* s;
if (!PyArg_ParseTuple(args, "s", &s))
return NULL;
HPM_Start(s);
Py_RETURN_NONE;
}
PyObject* ibm_hpm_stop(PyObject *self, PyObject *args)
{
char* s;
if (!PyArg_ParseTuple(args, "s", &s))
return NULL;
HPM_Stop(s);
Py_RETURN_NONE;
}
PyObject* ibm_mpi_start(PyObject *self)
{
summary_start();
Py_RETURN_NONE;
}
PyObject* ibm_mpi_stop(PyObject *self)
{
summary_stop();
Py_RETURN_NONE;
}
#endif
#ifdef CRAYPAT
#include
PyObject* craypat_region_begin(PyObject *self, PyObject *args)
{
int n;
char* s;
if (!PyArg_ParseTuple(args, "is", &n, &s))
return NULL;
PAT_region_begin(n, s);
Py_RETURN_NONE;
}
PyObject* craypat_region_end(PyObject *self, PyObject *args)
{
int n;
if (!PyArg_ParseTuple(args, "i", &n))
return NULL;
PAT_region_end(n);
Py_RETURN_NONE;
}
#endif
#ifdef PARALLEL
#include
struct eval {
double val;
int rank;
};
static void coll_print(FILE *fp, const char *label, double val,
int print_aggregate, MPI_Comm Comm){
double sum;
struct eval in;
struct eval out;
int rank, numranks;
MPI_Comm_size(Comm, &numranks);
MPI_Comm_rank(Comm, &rank);
in.val=val;
in.rank=rank;
MPI_Reduce(&val, &sum, 1, MPI_DOUBLE, MPI_SUM, 0, Comm);
if(rank==0) {
if(print_aggregate)
fprintf(fp,"#%19s %14.3f %10.3f ",label,sum,sum/numranks);
else
fprintf(fp,"#%19s %10.3f ",label,sum/numranks);
}
MPI_Reduce(&in, &out, 1, MPI_DOUBLE_INT, MPI_MINLOC, 0, Comm);
if(rank==0){
fprintf(fp,"%4d %10.3f ", out.rank, out.val);
}
MPI_Reduce(&in, &out, 1, MPI_DOUBLE_INT, MPI_MAXLOC, 0, Comm);
if(rank==0){
fprintf(fp,"%4d %10.3f\n",out.rank, out.val);
}
}
// Utilities for performance measurement with PAPI
#ifdef GPAW_PAPI
#include
#define NUM_PAPI_EV 1
static long_long papi_start_usec_p;
static long_long papi_start_usec_r;
// Returns PAPI_dmem_info structure in Python dictionary
// Units used by PAPI are kB
PyObject* papi_mem_info(PyObject *self, PyObject *args)
{
PAPI_dmem_info_t dmem;
PyObject* py_dmem;
PAPI_get_dmem_info(&dmem);
py_dmem = PyDict_New();
PyDict_SetItemString(py_dmem, "peak", PyLong_FromLongLong(dmem.peak));
PyDict_SetItemString(py_dmem, "size", PyLong_FromLongLong(dmem.size));
PyDict_SetItemString(py_dmem, "resident", PyLong_FromLongLong(dmem.resident));
PyDict_SetItemString(py_dmem, "high_water_mark",
PyLong_FromLongLong(dmem.high_water_mark));
PyDict_SetItemString(py_dmem, "shared", PyLong_FromLongLong(dmem.shared));
PyDict_SetItemString(py_dmem, "text", PyLong_FromLongLong(dmem.text));
PyDict_SetItemString(py_dmem, "library", PyLong_FromLongLong(dmem.library));
PyDict_SetItemString(py_dmem, "heap", PyLong_FromLongLong(dmem.heap));
PyDict_SetItemString(py_dmem, "stack", PyLong_FromLongLong(dmem.stack));
PyDict_SetItemString(py_dmem, "pagesize", PyLong_FromLongLong(dmem.pagesize));
PyDict_SetItemString(py_dmem, "pte", PyLong_FromLongLong(dmem.pte));
return py_dmem;
}
int gpaw_perf_init()
{
int events[NUM_PAPI_EV];
events[0] = PAPI_FP_OPS;
// events[1] = PAPI_L1_DCM;
// events[2] = PAPI_L1_DCH;
// events[3] = PAPI_TOT_INS;
PAPI_start_counters(events, NUM_PAPI_EV);
papi_start_usec_r = PAPI_get_real_usec();
papi_start_usec_p = PAPI_get_virt_usec();
return 0;
}
void gpaw_perf_finalize()
{
long long papi_values[NUM_PAPI_EV];
double rtime,ptime;
double avegflops;
double gflop_opers;
PAPI_dmem_info_t dmem;
int error = 0;
double l1hitratio;
long_long papi_end_usec_p;
long_long papi_end_usec_r;
int rank, numranks;
MPI_Comm Comm = MPI_COMM_WORLD;
//get papi info, first time it intializes PAPI counters
papi_end_usec_r = PAPI_get_real_usec();
papi_end_usec_p = PAPI_get_virt_usec();
MPI_Comm_size(Comm, &numranks);
MPI_Comm_rank(Comm, &rank);
FILE *fp;
if (rank == 0)
fp = fopen("gpaw_perf.log", "w");
else
fp = NULL;
if(PAPI_read_counters(papi_values, NUM_PAPI_EV) != PAPI_OK)
error++;
if(PAPI_get_dmem_info(&dmem) != PAPI_OK)
error++;
rtime=(double)(papi_end_usec_r - papi_start_usec_r)/1e6;
ptime=(double)(papi_end_usec_p - papi_start_usec_p)/1e6;
avegflops=(double)papi_values[0]/rtime/1e9;
gflop_opers = (double)papi_values[0]/1e9;
// l1hitratio=100.0*(double)papi_values[1]/(papi_values[0] + papi_values[1]);
if (rank==0 ) {
fprintf(fp,"######## GPAW PERFORMANCE REPORT (PAPI) ########\n");
fprintf(fp,"# MPI tasks %d\n", numranks);
fprintf(fp,"# aggregated average min(rank/val) max(rank/val) \n");
}
coll_print(fp, "Real time (s)", rtime, 1, Comm);
coll_print(fp, "Process time (s)", ptime, 1, Comm);
coll_print(fp, "Flops (GFlop/s)", avegflops, 1, Comm);
coll_print(fp, "Flp-opers (10^9)", gflop_opers, 1, Comm);
// coll_print(fp, "L1 hit ratio (%)", l1hitratio, 0, Comm);
coll_print(fp, "Peak mem size (MB)", (double)dmem.peak/1.0e3, 0, Comm );
coll_print(fp, "Peak resident (MB)", (double)dmem.high_water_mark/1.0e3 ,
0, Comm);
if(rank==0) {
fflush(fp);
fclose(fp);
}
}
#elif GPAW_HPM
void HPM_Start(char *);
int gpaw_perf_init()
{
HPM_Start("GPAW");
return 0;
}
void gpaw_perf_finalize()
{
HPM_Stop("GPAW");
}
#else // Use just MPI_Wtime
static double t0;
int gpaw_perf_init(void)
{
t0 = MPI_Wtime();
return 0;
}
void gpaw_perf_finalize(void)
{
double rtime;
int rank, numranks;
MPI_Comm Comm = MPI_COMM_WORLD;
MPI_Comm_size(Comm, &numranks);
MPI_Comm_rank(Comm, &rank);
double t1 = MPI_Wtime();
rtime = t1 - t0;
FILE *fp;
if (rank == 0)
fp = fopen("gpaw_perf.log", "w");
else
fp = NULL;
if (rank==0 ) {
fprintf(fp,"######## GPAW PERFORMANCE REPORT (MPI_Wtime) ########\n");
fprintf(fp,"# MPI tasks %d\n", numranks);
fprintf(fp,"# aggregated average min(rank/val) max(rank/val) \n");
}
coll_print(fp, "Real time (s)", rtime, 1, Comm);
if(rank==0) {
fflush(fp);
fclose(fp);
}
}
#endif
#endif
// returns the distance between two 3d double vectors
double distance(double *a, double *b)
{
double sum = 0;
double diff;
for (int c = 0; c < 3; c++) {
diff = a[c] - b[c];
sum += diff*diff;
}
return sqrt(sum);
}
/* get heap memory using mallinfo.
There is a UNIX version and a Mac OS X version is not well tested
but seems to give credible values in simple tests.*/
PyObject* heap_mallinfo(PyObject *self)
{
double heap;
#ifdef __linux__
unsigned int mmap, arena, small;
struct mallinfo mi; /* structure in bytes */
mi = mallinfo();
mmap = mi.hblkhd;
arena = mi.uordblks;
small = mi.usmblks;
heap = ((double)(mmap + arena + small))/1024.0; /* convert to KB */
#elif defined(__DARWIN_UNIX03)
/* Mac OS X specific hack */
struct malloc_statistics_t mi; /* structure in bytes */
malloc_zone_statistics(NULL, &mi);
heap = ((double)(mi.size_in_use))/1024.0; /* convert to KB */
#else
heap = -1;
#endif
return Py_BuildValue("d",heap);
}
/* elementwise multiply and add result to another vector
*
* c[i] += a[i] * b[i] , for i = every element in the vectors
*/
PyObject* elementwise_multiply_add(PyObject *self, PyObject *args)
{
PyArrayObject* aa;
PyArrayObject* bb;
PyArrayObject* cc;
if (!PyArg_ParseTuple(args, "OOO", &aa, &bb, &cc))
return NULL;
const double* const a = DOUBLEP(aa);
const double* const b = DOUBLEP(bb);
double* const c = DOUBLEP(cc);
int n = 1;
for (int d = 0; d < PyArray_NDIM(aa); d++)
n *= PyArray_DIMS(aa)[d];
for (int i = 0; i < n; i++)
{
c[i] += a[i] * b[i];
}
Py_RETURN_NONE;
}
PyObject* utilities_gaussian_wave(PyObject *self, PyObject *args)
{
Py_complex A_obj;
PyArrayObject* r_cG_obj;
PyArrayObject* r0_c_obj;
Py_complex sigma_obj; // imaginary part ignored
PyArrayObject* k_c_obj;
PyArrayObject* gs_G_obj;
if (!PyArg_ParseTuple(args, "DOODOO", &A_obj, &r_cG_obj, &r0_c_obj, &sigma_obj, &k_c_obj, &gs_G_obj))
return NULL;
int C, G;
C = PyArray_DIMS(r_cG_obj)[0];
G = PyArray_DIMS(r_cG_obj)[1];
for (int i = 2; i < PyArray_NDIM(r_cG_obj); i++)
G *= PyArray_DIMS(r_cG_obj)[i];
double* r_cG = DOUBLEP(r_cG_obj); // XXX not ideally strided
double* r0_c = DOUBLEP(r0_c_obj);
double dr2, kr, alpha = -0.5/pow(sigma_obj.real, 2);
int gammapoint = 1;
double* k_c = DOUBLEP(k_c_obj);
for (int c=0; ctype_num == NPY_DOUBLE)
{
double* gs_G = DOUBLEP(gs_G_obj);
if(gammapoint)
for(int g=0; g0)
for(int g=0; g 1 ?
}
}
else
{
double_complex* gs_G = COMPLEXP(gs_G_obj);
double_complex A = A_obj.real+I*A_obj.imag;
if(gammapoint)
for(int g=0; g0)
for(int g=0; g 1 ?
}
}
Py_RETURN_NONE;
}
/* vdot
*
* If a and b are input vectors,
* a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + ...
* is returned.
*/
PyObject* utilities_vdot(PyObject *self, PyObject *args)
{
PyArrayObject* aa;
PyArrayObject* bb;
if (!PyArg_ParseTuple(args, "OO", &aa, &bb))
return NULL;
const double* const a = DOUBLEP(aa);
const double* const b = DOUBLEP(bb);
double sum = 0.0;
int n = 1;
for (int d = 0; d < PyArray_NDIM(aa); d++)
n *= PyArray_DIMS(aa)[d];
for (int i = 0; i < n; i++)
{
sum += a[i] * b[i];
}
return PyFloat_FromDouble(sum);
}
/* vdot
*
* If a is the input vector,
* a[0]*a[0] + a[1]*a[1] + a[2]*a[2] + ...
* is returned.
*/
PyObject* utilities_vdot_self(PyObject *self, PyObject *args)
{
PyArrayObject* aa;
if (!PyArg_ParseTuple(args, "O", &aa))
return NULL;
const double* const a = DOUBLEP(aa);
double sum = 0.0;
int n = 1;
for (int d = 0; d < PyArray_NDIM(aa); d++)
n *= PyArray_DIMS(aa)[d];
for (int i = 0; i < n; i++)
{
sum += a[i] * a[i];
}
return PyFloat_FromDouble(sum);
}
PyObject* errorfunction(PyObject *self, PyObject *args)
{
double x;
if (!PyArg_ParseTuple(args, "d", &x))
return NULL;
return Py_BuildValue("d", erf(x));
}
PyObject* pack(PyObject *self, PyObject *args)
{
PyArrayObject* a_obj;
if (!PyArg_ParseTuple(args, "O", &a_obj))
return NULL;
a_obj = PyArray_GETCONTIGUOUS(a_obj);
int n = PyArray_DIMS(a_obj)[0];
npy_intp dims[1] = {n * (n + 1) / 2};
int typenum = PyArray_DESCR(a_obj)->type_num;
PyArrayObject* b_obj = (PyArrayObject*) PyArray_SimpleNew(1, dims,
typenum);
if (b_obj == NULL)
return NULL;
if (typenum == NPY_DOUBLE) {
double* a = (double*)PyArray_DATA(a_obj);
double* b = (double*)PyArray_DATA(b_obj);
for (int r = 0; r < n; r++) {
*b++ = a[r + n * r];
for (int c = r + 1; c < n; c++)
*b++ = a[r + n * c] + a[c + n * r];
}
} else {
double complex* a = (double complex*)PyArray_DATA(a_obj);
double complex* b = (double complex*)PyArray_DATA(b_obj);
for (int r = 0; r < n; r++) {
*b++ = a[r + n * r];
for (int c = r + 1; c < n; c++)
*b++ = a[r + n * c] + a[c + n * r];
}
}
Py_DECREF(a_obj);
PyObject* value = Py_BuildValue("O", b_obj);
Py_DECREF(b_obj);
return value;
}
PyObject* unpack(PyObject *self, PyObject *args)
{
PyArrayObject* ap;
PyArrayObject* a;
if (!PyArg_ParseTuple(args, "OO", &ap, &a))
return NULL;
int n = PyArray_DIMS(a)[0];
double* datap = DOUBLEP(ap);
double* data = DOUBLEP(a);
for (int r = 0; r < n; r++)
for (int c = r; c < n; c++)
{
double d = *datap++;
data[c + r * n] = d;
data[r + c * n] = d;
}
Py_RETURN_NONE;
}
PyObject* unpack_complex(PyObject *self, PyObject *args)
{
PyArrayObject* ap;
PyArrayObject* a;
if (!PyArg_ParseTuple(args, "OO", &ap, &a))
return NULL;
int n = PyArray_DIMS(a)[0];
double_complex* datap = COMPLEXP(ap);
double_complex* data = COMPLEXP(a);
for (int r = 0; r < n; r++)
for (int c = r; c < n; c++)
{
double_complex d = *datap++;
data[c + r * n] = d;
data[r + c * n] = conj(d);
}
Py_RETURN_NONE;
}
PyObject* hartree(PyObject *self, PyObject *args)
{
int l;
PyArrayObject* nrdr_obj;
PyArrayObject* r_obj;
PyArrayObject* vr_obj;
if (!PyArg_ParseTuple(args, "iOOO", &l, &nrdr_obj, &r_obj, &vr_obj))
return NULL;
const int M = PyArray_DIM(nrdr_obj, 0);
const double* nrdr = DOUBLEP(nrdr_obj);
const double* r = DOUBLEP(r_obj);
double* vr = DOUBLEP(vr_obj);
double p = 0.0;
double q = 0.0;
for (int g = M - 1; g > 0; g--)
{
double R = r[g];
double rl = pow(R, l);
double dp = nrdr[g] / rl;
double rlp1 = rl * R;
double dq = nrdr[g] * rlp1;
vr[g] = (p + 0.5 * dp) * rlp1 - (q + 0.5 * dq) / rl;
p += dp;
q += dq;
}
vr[0] = 0.0;
double f = 4.0 * M_PI / (2 * l + 1);
for (int g = 1; g < M; g++)
{
double R = r[g];
vr[g] = f * (vr[g] + q / pow(R, l));
}
Py_RETURN_NONE;
}
PyObject* localize(PyObject *self, PyObject *args)
{
PyArrayObject* Z_nnc;
PyArrayObject* U_nn;
if (!PyArg_ParseTuple(args, "OO", &Z_nnc, &U_nn))
return NULL;
int n = PyArray_DIMS(U_nn)[0];
double complex (*Z)[n][3] = (double complex (*)[n][3])COMPLEXP(Z_nnc);
double (*U)[n] = (double (*)[n])DOUBLEP(U_nn);
double value = 0.0;
for (int a = 0; a < n; a++)
{
for (int b = a + 1; b < n; b++)
{
double complex* Zaa = Z[a][a];
double complex* Zab = Z[a][b];
double complex* Zbb = Z[b][b];
double x = 0.0;
double y = 0.0;
for (int c = 0; c < 3; c++)
{
x += (0.25 * creal(Zbb[c] * conj(Zbb[c])) +
0.25 * creal(Zaa[c] * conj(Zaa[c])) -
0.5 * creal(Zaa[c] * conj(Zbb[c])) -
creal(Zab[c] * conj(Zab[c])));
y += creal((Zaa[c] - Zbb[c]) * conj(Zab[c]));
}
double t = 0.25 * atan2(y, x);
double C = cos(t);
double S = sin(t);
for (int i = 0; i < a; i++)
for (int c = 0; c < 3; c++)
{
double complex Ziac = Z[i][a][c];
Z[i][a][c] = C * Ziac + S * Z[i][b][c];
Z[i][b][c] = C * Z[i][b][c] - S * Ziac;
}
for (int c = 0; c < 3; c++)
{
double complex Zaac = Zaa[c];
double complex Zabc = Zab[c];
double complex Zbbc = Zbb[c];
Zaa[c] = C * C * Zaac + 2 * C * S * Zabc + S * S * Zbbc;
Zbb[c] = C * C * Zbbc - 2 * C * S * Zabc + S * S * Zaac;
Zab[c] = S * C * (Zbbc - Zaac) + (C * C - S * S) * Zabc;
}
for (int i = a + 1; i < b; i++)
for (int c = 0; c < 3; c++)
{
double complex Zaic = Z[a][i][c];
Z[a][i][c] = C * Zaic + S * Z[i][b][c];
Z[i][b][c] = C * Z[i][b][c] - S * Zaic;
}
for (int i = b + 1; i < n; i++)
for (int c = 0; c < 3; c++)
{
double complex Zaic = Z[a][i][c];
Z[a][i][c] = C * Zaic + S * Z[b][i][c];
Z[b][i][c] = C * Z[b][i][c] - S * Zaic;
}
for (int i = 0; i < n; i++)
{
double Uia = U[i][a];
U[i][a] = C * Uia + S * U[i][b];
U[i][b] = C * U[i][b] - S * Uia;
}
}
double complex* Zaa = Z[a][a];
for (int c = 0; c < 3; c++)
value += creal(Zaa[c] * conj(Zaa[c]));
}
return Py_BuildValue("d", value);
}
PyObject* spherical_harmonics(PyObject *self, PyObject *args)
{
int l;
PyArrayObject* R_obj_c;
PyArrayObject* Y_obj_m;
if (!PyArg_ParseTuple(args, "iOO", &l, &R_obj_c, &Y_obj_m))
return NULL;
double* R_c = DOUBLEP(R_obj_c);
double* Y_m = DOUBLEP(Y_obj_m);
if (l == 0)
Y_m[0] = 0.28209479177387814;
else
{
double x = R_c[0];
double y = R_c[1];
double z = R_c[2];
if (l == 1)
{
Y_m[0] = 0.48860251190291992 * y;
Y_m[1] = 0.48860251190291992 * z;
Y_m[2] = 0.48860251190291992 * x;
}
else
{
double r2 = x*x+y*y+z*z;
if (l == 2)
{
Y_m[0] = 1.0925484305920792 * x*y;
Y_m[1] = 1.0925484305920792 * y*z;
Y_m[2] = 0.31539156525252005 * (3*z*z-r2);
Y_m[3] = 1.0925484305920792 * x*z;
Y_m[4] = 0.54627421529603959 * (x*x-y*y);
}
else if (l == 3)
{
Y_m[0] = 0.59004358992664352 * (-y*y*y+3*x*x*y);
Y_m[1] = 2.8906114426405538 * x*y*z;
Y_m[2] = 0.45704579946446577 * (-y*r2+5*y*z*z);
Y_m[3] = 0.3731763325901154 * (5*z*z*z-3*z*r2);
Y_m[4] = 0.45704579946446577 * (5*x*z*z-x*r2);
Y_m[5] = 1.4453057213202769 * (x*x*z-y*y*z);
Y_m[6] = 0.59004358992664352 * (x*x*x-3*x*y*y);
}
else if (l == 4)
{
Y_m[0] = 2.5033429417967046 * (x*x*x*y-x*y*y*y);
Y_m[1] = 1.7701307697799307 * (-y*y*y*z+3*x*x*y*z);
Y_m[2] = 0.94617469575756008 * (-x*y*r2+7*x*y*z*z);
Y_m[3] = 0.66904654355728921 * (-3*y*z*r2+7*y*z*z*z);
Y_m[4] = 0.10578554691520431 * (-30*z*z*r2+3*r2*r2+35*z*z*z*z);
Y_m[5] = 0.66904654355728921 * (7*x*z*z*z-3*x*z*r2);
Y_m[6] = 0.47308734787878004 * (-x*x*r2+7*x*x*z*z+y*y*r2-7*y*y*z*z);
Y_m[7] = 1.7701307697799307 * (x*x*x*z-3*x*y*y*z);
Y_m[8] = 0.62583573544917614 * (-6*x*x*y*y+x*x*x*x+y*y*y*y);
}
else if (l == 5)
{
Y_m[0] = 0.65638205684017015 * (y*y*y*y*y+5*x*x*x*x*y-10*x*x*y*y*y);
Y_m[1] = 8.3026492595241645 * (x*x*x*y*z-x*y*y*y*z);
Y_m[2] = 0.48923829943525038 * (y*y*y*r2-9*y*y*y*z*z-3*x*x*y*r2+27*x*x*y*z*z);
Y_m[3] = 4.7935367849733241 * (3*x*y*z*z*z-x*y*z*r2);
Y_m[4] = 0.45294665119569694 * (-14*y*z*z*r2+y*r2*r2+21*y*z*z*z*z);
Y_m[5] = 0.1169503224534236 * (63*z*z*z*z*z+15*z*r2*r2-70*z*z*z*r2);
Y_m[6] = 0.45294665119569694 * (x*r2*r2-14*x*z*z*r2+21*x*z*z*z*z);
Y_m[7] = 2.3967683924866621 * (-3*y*y*z*z*z+y*y*z*r2+3*x*x*z*z*z-x*x*z*r2);
Y_m[8] = 0.48923829943525038 * (9*x*x*x*z*z-27*x*y*y*z*z-x*x*x*r2+3*x*y*y*r2);
Y_m[9] = 2.0756623148810411 * (y*y*y*y*z-6*x*x*y*y*z+x*x*x*x*z);
Y_m[10] = 0.65638205684017015 * (-10*x*x*x*y*y+5*x*y*y*y*y+x*x*x*x*x);
}
else if (l == 6)
{
Y_m[0] = 1.3663682103838286 * (-10*x*x*x*y*y*y+3*x*x*x*x*x*y+3*x*y*y*y*y*y);
Y_m[1] = 2.3666191622317521 * (y*y*y*y*y*z-10*x*x*y*y*y*z+5*x*x*x*x*y*z);
Y_m[2] = 2.0182596029148967 * (-x*x*x*y*r2+x*y*y*y*r2-11*x*y*y*y*z*z+11*x*x*x*y*z*z);
Y_m[3] = 0.92120525951492349 * (-11*y*y*y*z*z*z-9*x*x*y*z*r2+33*x*x*y*z*z*z+3*y*y*y*z*r2);
Y_m[4] =0.92120525951492349 * (x*y*r2*r2+33*x*y*z*z*z*z-18*x*y*z*z*r2);
Y_m[5] = 0.58262136251873142 * (5*y*z*r2*r2+33*y*z*z*z*z*z-30*y*z*z*z*r2);
Y_m[6] = 0.063569202267628425 * (231*z*z*z*z*z*z-5*r2*r2*r2+105*z*z*r2*r2-315*z*z*z*z*r2);
Y_m[7] = 0.58262136251873142 * (-30*x*z*z*z*r2+33*x*z*z*z*z*z+5*x*z*r2*r2);
Y_m[8] = 0.46060262975746175 * (33*x*x*z*z*z*z+x*x*r2*r2-y*y*r2*r2-18*x*x*z*z*r2+18*y*y*z*z*r2-33*y*y*z*z*z*z);
Y_m[9] = 0.92120525951492349 * (-3*x*x*x*z*r2-33*x*y*y*z*z*z+9*x*y*y*z*r2+11*x*x*x*z*z*z);
Y_m[10] = 0.50456490072872417 * (11*y*y*y*y*z*z-66*x*x*y*y*z*z-x*x*x*x*r2+6*x*x*y*y*r2+11*x*x*x*x*z*z-y*y*y*y*r2);
Y_m[11] = 2.3666191622317521 * (5*x*y*y*y*y*z+x*x*x*x*x*z-10*x*x*x*y*y*z);
Y_m[12] = 0.6831841051919143 * (x*x*x*x*x*x+15*x*x*y*y*y*y-15*x*x*x*x*y*y-y*y*y*y*y*y);
}
}
}
Py_RETURN_NONE;
}
gpaw-1.3.0-82cebebc037510d876f90d9f8d533fd021f751f5/c/wigner_seitz.c 0000664 0000000 0000000 00000003063 13164413722 0023225 0 ustar 00root root 0000000 0000000 #include "extensions.h"
#include
double distance(double *a, double *b);
// returns the squared distance between a 3d double vector
// and a 3d int vector
double distance3d2_di(double *a, int *b)
{
double sum = 0;
double diff;
for (int c = 0; c < 3; c++) {
diff = a[c] - (double)b[c];
sum += diff*diff;
}
return sum;
}
PyObject *exterior_electron_density_region(PyObject *self, PyObject *args)
{
PyArrayObject* ai;
PyArrayObject* aatom_c;
PyArrayObject* beg_c;
PyArrayObject* end_c;
PyArrayObject* hh_c;
PyArrayObject* vdWrad;
if (!PyArg_ParseTuple(args, "OOOOOO", &ai, &aatom_c,
&beg_c, &end_c, &hh_c, &vdWrad))
return NULL;
long *aindex = LONGP(ai);
int natoms = PyArray_DIM(aatom_c, 0);
double *atom_c = DOUBLEP(aatom_c);
long *beg = LONGP(beg_c);
long *end = LONGP(end_c);
double *h_c = DOUBLEP(hh_c);
double *vdWradius = DOUBLEP(vdWrad);
int n[3], ij;
double pos[3];
for (int c = 0; c < 3; c++) { n[c] = end[c] - beg[c]; }
// loop over all points
for (int i = 0; i < n[0]; i++) {
pos[0] = (beg[0] + i) * h_c[0];
for (int j = 0; j < n[1]; j++) {
pos[1] = (beg[1] + j) * h_c[1];
ij = (i*n[1] + j)*n[2];
for (int k = 0; k < n[2]; k++) {
pos[2] = (beg[2] + k) * h_c[2];
aindex[ij + k] = (long) 1; /* assume outside the structure */
// loop over all atoms
for (int a=0; a < natoms; a++) {
double d = distance(atom_c + a*3, pos);
if (d < vdWradius[a]) {
aindex[ij + k] = (long) 0; /* this is inside */
a = natoms;
}
}
}
}
}
Py_RETURN_NONE;
}
gpaw-1.3.0-82cebebc037510d876f90d9f8d533fd021f751f5/c/woperators.c 0000664 0000000 0000000 00000044534 13164413722 0022731 0 ustar 00root root 0000000 0000000 /* This file (woperators.c) is a modified copy of operators.c
* with added support for nonlocal operator weights.
* The original copyright note of operators.c follows:
* Copyright (C) 2003-2007 CAMP
* Copyright (C) 2007-2008 CAMd
* Copyright (C) 2005 CSC - IT Center for Science Ltd.
* Please see the accompanying LICENSE file for further information. */
//*** The apply operator and some associate structors are imple- ***//
//*** mented in two version: a original version and a speciel ***//
//*** OpenMP version. By default the original version will ***//
//*** be used, but it's possible to use the OpenMP version ***//
//*** by compiling gpaw with the macro GPAW_OMP defined and ***//
//*** and the compile/link option "-fopenmp". ***//
//*** Author of the optimized OpenMP code: ***//
//*** Mads R. B. Kristensen - madsbk@diku.dk ***//
#include
#define PY_ARRAY_UNIQUE_SYMBOL GPAW_ARRAY_API
#define NO_IMPORT_ARRAY
#include
#include
#include
#include "extensions.h"
#include "bc.h"
#include "mympi.h"
#ifdef GPAW_ASYNC
#define GPAW_ASYNC3 3
#define GPAW_ASYNC2 2
#else
#define GPAW_ASYNC3 1
#define GPAW_ASYNC2 1
#endif
typedef struct
{
PyObject_HEAD
int nweights;
const double** weights;
bmgsstencil* stencils;
boundary_conditions* bc;
MPI_Request recvreq[2];
MPI_Request sendreq[2];
} WOperatorObject;
static void WOperator_dealloc(WOperatorObject *self)
{
free(self->bc);
for (int i = 0; i < self->nweights; i++)
{
free(self->stencils[i].coefs);
free(self->stencils[i].offsets);
}
free(self->stencils);
free(self->weights);
PyObject_DEL(self);
}
static PyObject * WOperator_relax(WOperatorObject *self,
PyObject *args)
{
int relax_method;
PyArrayObject* func;
PyArrayObject* source;
int nrelax;
double w = 1.0;
if (!PyArg_ParseTuple(args, "iOOi|d", &relax_method, &func, &source,
&nrelax, &w))
return NULL;
const boundary_conditions* bc = self->bc;
double* fun = DOUBLEP(func);
const double* src = DOUBLEP(source);
const double_complex* ph;
const int* size2 = bc->size2;
double* buf = (double*) GPAW_MALLOC(double, size2[0] * size2[1] * size2[2] *
bc->ndouble);
double* sendbuf = (double*) GPAW_MALLOC(double, bc->maxsend);
double* recvbuf = (double*) GPAW_MALLOC(double, bc->maxrecv);
const double** weights = (const double**) GPAW_MALLOC(double*, self->nweights);
ph = 0;
for (int n = 0; n < nrelax; n++ )
{
for (int i = 0; i < 3; i++)
{
bc_unpack1(bc, fun, buf, i,
self->recvreq, self->sendreq,
recvbuf, sendbuf, ph + 2 * i, 0, 1);
bc_unpack2(bc, buf, i,
self->recvreq, self->sendreq, recvbuf, 1);
}
for (int iw = 0; iw < self->nweights; iw++)
weights[iw] = self->weights[iw];
bmgs_wrelax(relax_method, self->nweights, self->stencils, weights, buf, fun, src, w);
}
free(weights);
free(recvbuf);
free(sendbuf);
free(buf);
Py_RETURN_NONE;
}
struct wapply_args{
int thread_id;
WOperatorObject *self;
int ng;
int ng2;
int nin;
int nthds;
int chunksize;
int chunkinc;
const double* in;
double* out;
int real;
const double_complex* ph;
};
//Plain worker
void *wapply_worker(void *threadarg)
{
struct wapply_args *args = (struct wapply_args *) threadarg;
boundary_conditions* bc = args->self->bc;
MPI_Request recvreq[2];
MPI_Request sendreq[2];
int chunksize = args->nin / args->nthds;
if (!chunksize)
chunksize = 1;
int nstart = args->thread_id * chunksize;
if (nstart >= args->nin)
return NULL;
int nend = nstart + chunksize;
if (nend > args->nin)
nend = args->nin;
if (chunksize > args->chunksize)
chunksize = args->chunksize;
double* sendbuf = (double*) GPAW_MALLOC(double, bc->maxsend * args->chunksize);
double* recvbuf = (double*) GPAW_MALLOC(double, bc->maxrecv * args->chunksize);
double* buf = (double*) GPAW_MALLOC(double, args->ng2 * args->chunksize);
const double** weights = (const double**) GPAW_MALLOC(double*, args->self->nweights);
for (int n = nstart; n < nend; n += chunksize)
{
if (n + chunksize >= nend && chunksize > 1)
chunksize = nend - n;
const double* in = args->in + n * args->ng;
double* out = args->out + n * args->ng;
for (int i = 0; i < 3; i++)
{
bc_unpack1(bc, in, buf, i,
recvreq, sendreq,
recvbuf, sendbuf, args->ph + 2 * i,
args->thread_id, chunksize);
bc_unpack2(bc, buf, i, recvreq, sendreq, recvbuf, chunksize);
}
for (int m = 0; m < chunksize; m++)
{
for (int iw = 0; iw < args->self->nweights; iw++)
weights[iw] = args->self->weights[iw] + m * args->ng2;
if (args->real)
bmgs_wfd(args->self->nweights, args->self->stencils, weights,
buf + m * args->ng2, out + m * args->ng);
else
bmgs_wfdz(args->self->nweights, args->self->stencils, weights,
(const double_complex*) (buf + m * args->ng2),
(double_complex*) (out + m * args->ng));
}
}
free(weights);
free(buf);
free(recvbuf);
free(sendbuf);
return NULL;
}
//Async worker
void *wapply_worker_cfd_async(void *threadarg)
{
struct wapply_args *args = (struct wapply_args *) threadarg;
boundary_conditions* bc = args->self->bc;
MPI_Request recvreq[2 * GPAW_ASYNC3];
MPI_Request sendreq[2 * GPAW_ASYNC3];
int chunksize = args->nin / args->nthds;
if (!chunksize)
chunksize = 1;
int nstart = args->thread_id * chunksize;
if (nstart >= args->nin)
return NULL;
int nend = nstart + chunksize;
if (nend > args->nin)
nend = args->nin;
if (chunksize > args->chunksize)
chunksize = args->chunksize;
double* sendbuf = (double*) GPAW_MALLOC(double, bc->maxsend * GPAW_ASYNC3 *
args->chunksize);
double* recvbuf = (double*) GPAW_MALLOC(double, bc->maxrecv * GPAW_ASYNC3 *
args->chunksize);
double* buf = (double*) GPAW_MALLOC(double, args->ng2 * args->chunksize);
const double** weights = (const double**) GPAW_MALLOC(double*, args->self->nweights);
for (int n = nstart; n < nend; n += chunksize)
{
if (n + chunksize >= nend && chunksize > 1)
chunksize = nend - n;
const double* in = args->in + n * args->ng;
double* out = args->out + n * args->ng;
for (int i = 0; i < 3; i++)
{
bc_unpack1(bc, in, buf, i,
recvreq + i * 2, sendreq + i * 2,
recvbuf + i * bc->maxrecv * chunksize,
sendbuf + i * bc->maxsend * chunksize, args->ph + 2 * i,
args->thread_id, chunksize);
}
for (int i = 0; i < 3; i++)
{
bc_unpack2(bc, buf, i,
recvreq + i * 2, sendreq + i * 2,
recvbuf + i * bc->maxrecv * chunksize, chunksize);
}
for (int m = 0; m < chunksize; m++)
{
for (int iw = 0; iw < args->self->nweights; iw++)
weights[iw] = args->self->weights[iw] + m * args->ng2;
if (args->real)
bmgs_wfd(args->self->nweights, args->self->stencils, weights,
buf + m * args->ng2, out + m * args->ng);
else
bmgs_wfdz(args->self->nweights, args->self->stencils, weights,
(const double_complex*) (buf + m * args->ng2),
(double_complex*) (out + m * args->ng));
}
}
free(weights);
free(buf);
free(recvbuf);
free(sendbuf);
return NULL;
}
//Double buffering async worker
void *wapply_worker_cfd(void *threadarg)
{
struct wapply_args *args = (struct wapply_args *) threadarg;
boundary_conditions* bc = args->self->bc;
MPI_Request recvreq[2 * GPAW_ASYNC3 * GPAW_ASYNC2];
MPI_Request sendreq[2 * GPAW_ASYNC3 * GPAW_ASYNC2];
int chunksize = args->nin / args->nthds;
if (!chunksize)
chunksize = 1;
int nstart = args->thread_id * chunksize;
if (nstart >= args->nin)
return NULL;
int nend = nstart + chunksize;
if (nend > args->nin)
nend = args->nin;
if (chunksize > args->chunksize)
chunksize = args->chunksize;
int chunk = args->chunkinc;
if (chunk > chunksize)
chunk = chunksize;
double* sendbuf = (double*) GPAW_MALLOC(double, bc->maxsend * args->chunksize
* GPAW_ASYNC3 * GPAW_ASYNC2);
double* recvbuf = (double*) GPAW_MALLOC(double, bc->maxrecv * args->chunksize
* GPAW_ASYNC3 * GPAW_ASYNC2);
double* buf = (double*) GPAW_MALLOC(double, args->ng2 * args->chunksize * GPAW_ASYNC2);
const double** weights = (const double**) GPAW_MALLOC(double*, args->self->nweights);
int odd = 0;
const double* in = args->in + nstart * args->ng;
double* out;
for (int i = 0; i < 3; i++)
bc_unpack1(bc, in, buf + odd * args->ng2 * chunksize, i,
recvreq + odd * 2 + i * 4, sendreq + odd * 2 + i * 4,
recvbuf + odd * bc->maxrecv * chunksize + i * bc->maxrecv * chunksize * GPAW_ASYNC2,
sendbuf + odd * bc->maxsend * chunksize + i * bc->maxsend * chunksize * GPAW_ASYNC2, args->ph + 2 * i,
args->thread_id, chunk);
odd = odd ^ 1;
int last_chunk = chunk;
for (int n = nstart+chunk; n < nend; n += chunk)
{
last_chunk += args->chunkinc;
if (last_chunk > chunksize)
last_chunk = chunksize;
if (n + last_chunk >= nend && last_chunk > 1)
last_chunk = nend - n;
in = args->in + n * args->ng;
out = args->out + (n-chunk) * args->ng;
for (int i = 0; i < 3; i++)
{
bc_unpack1(bc, in, buf + odd * args->ng2 * chunksize, i,
recvreq + odd * 2 + i * 4, sendreq + odd * 2 + i * 4,
recvbuf + odd * bc->maxrecv * chunksize + i * bc->maxrecv * chunksize * GPAW_ASYNC2,
sendbuf + odd * bc->maxsend * chunksize + i * bc->maxsend * chunksize * GPAW_ASYNC2, args->ph + 2 * i,
args->thread_id, last_chunk);
}
odd = odd ^ 1;
for (int i = 0; i < 3; i++)
{
bc_unpack2(bc, buf + odd * args->ng2 * chunksize, i,
recvreq + odd * 2 + i * 4, sendreq + odd * 2 + i * 4,
recvbuf + odd * bc->maxrecv * chunksize + i * bc->maxrecv * chunksize * GPAW_ASYNC2, chunk);
}
for (int m = 0; m < chunk; m++)
{
for (int iw = 0; iw < args->self->nweights; iw++)
weights[iw] = args->self->weights[iw] + m * args->ng2 + odd * args->ng2 * chunksize;
if (args->real)
bmgs_wfd(args->self->nweights, args->self->stencils, weights,
buf + m * args->ng2 + odd * args->ng2 * chunksize,
out + m * args->ng);
else
bmgs_wfdz(args->self->nweights, args->self->stencils, weights,
(const double_complex*) (buf + m * args->ng2 + odd * args->ng2 * chunksize),
(double_complex*) (out + m * args->ng));
}
chunk = last_chunk;
}
odd = odd ^ 1;
out = args->out + (nend-last_chunk) * args->ng;
for (int i = 0; i < 3; i++)
{
bc_unpack2(bc, buf + odd * args->ng2 * chunksize, i,
recvreq + odd * 2 + i * 4, sendreq + odd * 2 + i * 4,
recvbuf + odd * bc->maxrecv * chunksize + i * bc->maxrecv * chunksize * GPAW_ASYNC2, last_chunk);
}
for (int m = 0; m < last_chunk; m++)
{
for (int iw = 0; iw < args->self->nweights; iw++)
weights[iw] = args->self->weights[iw] + m * args->ng2 + odd * args->ng2 * chunksize;
if (args->real)
bmgs_wfd(args->self->nweights, args->self->stencils, weights,
buf + m * args->ng2 + odd * args->ng2 * chunksize,
out + m * args->ng);
else
bmgs_wfdz(args->self->nweights, args->self->stencils, weights,
(const double_complex*) (buf + m * args->ng2 + odd * args->ng2 * chunksize),
(double_complex*) (out + m * args->ng));
}
free(weights);
free(buf);
free(recvbuf);
free(sendbuf);
return NULL;
}
static PyObject * WOperator_apply(WOperatorObject *self,
PyObject *args)
{
PyArrayObject* input;
PyArrayObject* output;
PyArrayObject* phases = 0;
if (!PyArg_ParseTuple(args, "OO|O", &input, &output, &phases))
return NULL;
int nin = 1;
if (PyArray_NDIM(input) == 4)
nin = PyArray_DIMS(input)[0];
boundary_conditions* bc = self->bc;
const int* size1 = bc->size1;
const int* size2 = bc->size2;
int ng = bc->ndouble * size1[0] * size1[1] * size1[2];
int ng2 = bc->ndouble * size2[0] * size2[1] * size2[2];
const double* in = DOUBLEP(input);
double* out = DOUBLEP(output);
const double_complex* ph;
bool real = (PyArray_DESCR(input)->type_num == NPY_DOUBLE);
if (real)
ph = 0;
else
ph = COMPLEXP(phases);
int chunksize = 1;
if (getenv("GPAW_CHUNK_SIZE") != NULL)
chunksize = atoi(getenv("GPAW_CHUNK_SIZE"));
int chunkinc = chunksize;
if (getenv("GPAW_CHUNK_INC") != NULL)
chunkinc = atoi(getenv("GPAW_CHUNK_INC"));
int nthds = 1;
#ifdef GPAW_OMP
if (getenv("OMP_NUM_THREADS") != NULL)
nthds = atoi(getenv("OMP_NUM_THREADS"));
#endif
struct wapply_args *wargs = GPAW_MALLOC(struct wapply_args, nthds);
pthread_t *thds = GPAW_MALLOC(pthread_t, nthds);
for(int i=0; i < nthds; i++)
{
(wargs+i)->thread_id = i;
(wargs+i)->nthds = nthds;
(wargs+i)->chunksize = chunksize;
(wargs+i)->chunkinc = chunkinc;
(wargs+i)->self = self;
(wargs+i)->ng = ng;
(wargs+i)->ng2 = ng2;
(wargs+i)->nin = nin;
(wargs+i)->in = in;
(wargs+i)->out = out;
(wargs+i)->real = real;
(wargs+i)->ph = ph;
}
#ifndef GPAW_ASYNC
if (1)
#else
if (bc->cfd == 0)
#endif
{
#ifdef GPAW_OMP
for(int i=1; i < nthds; i++)
pthread_create(thds + i, NULL, wapply_worker, (void*) (wargs+i));
#endif
wapply_worker(wargs);
}
else
{
#ifdef GPAW_OMP
for(int i=1; i < nthds; i++)
pthread_create(thds + i, NULL, wapply_worker_cfd, (void*) (wargs+i));
#endif
wapply_worker_cfd(wargs);
}
#ifdef GPAW_OMP
for(int i=1; i < nthds; i++)
pthread_join(*(thds+i), NULL);
#endif
free(wargs);
free(thds);
Py_RETURN_NONE;
}
static PyObject * WOperator_get_diagonal_element(WOperatorObject *self,
PyObject *args)
{
if (!PyArg_ParseTuple(args, ""))
return NULL;
const double** weights = (const double**) GPAW_MALLOC(double*, self->nweights);
for (int iw = 0; iw < self->nweights; iw++)
weights[iw] = self->weights[iw];
const int n0 = self->stencils[0].n[0];
const int n1 = self->stencils[0].n[1];
const int n2 = self->stencils[0].n[2];
double d = 0.0;
for (int i0 = 0; i0 < n0; i0++)
{
for (int i1 = 0; i1 < n1; i1++)
{
for (int i2 = 0; i2 < n2; i2++)
{
double coef = 0.0;
for (int iw = 0; iw < self->nweights; iw++)
{
coef += weights[iw][0] * self->stencils[iw].coefs[0];
weights[iw]++;
}
if (coef < 0)
coef = -coef;
if (coef > d)
d = coef;
}
}
}
free(weights);
return Py_BuildValue("d", d);
}
static PyObject * WOperator_get_async_sizes(WOperatorObject *self, PyObject *args)
{
if (!PyArg_ParseTuple(args, ""))
return NULL;
#ifdef GPAW_ASYNC
return Py_BuildValue("(iii)", 1, GPAW_ASYNC2, GPAW_ASYNC3);
#else
return Py_BuildValue("(iii)", 0, GPAW_ASYNC2, GPAW_ASYNC3);
#endif
}
static PyMethodDef WOperator_Methods[] = {
{"apply",
(PyCFunction)WOperator_apply, METH_VARARGS, NULL},
{"relax",
(PyCFunction)WOperator_relax, METH_VARARGS, NULL},
{"get_diagonal_element",
(PyCFunction)WOperator_get_diagonal_element, METH_VARARGS, NULL},
{"get_async_sizes",
(PyCFunction)WOperator_get_async_sizes, METH_VARARGS, NULL},
{NULL, NULL, 0, NULL}
};
PyTypeObject WOperatorType = {
PyVarObject_HEAD_INIT(NULL, 0)
"WOperator",
sizeof(WOperatorObject),
0,
(destructor)WOperator_dealloc,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,
"FDW-operator object",
0, 0, 0, 0, 0, 0,
WOperator_Methods
};
PyObject* NewWOperatorObject(PyObject *obj, PyObject *args)
{
PyObject* coefs_list;
PyArrayObject* coefs;
PyObject* offsets_list;
PyArrayObject* offsets;
PyObject* weights_list;
PyArrayObject* weights;
PyArrayObject* size;
PyArrayObject* neighbors;
int real;
PyObject* comm_obj;
int cfd;
int range;
int nweights;
if (!PyArg_ParseTuple(args, "iO!O!O!OiOiOi",
&nweights,
&PyList_Type, &weights_list,
&PyList_Type, &coefs_list,
&PyList_Type, &offsets_list,
&size,
&range,
&neighbors, &real, &comm_obj, &cfd))
return NULL;
WOperatorObject *self = PyObject_NEW(WOperatorObject, &WOperatorType);
if (self == NULL)
return NULL;
self->stencils = (bmgsstencil*) GPAW_MALLOC(bmgsstencil, nweights);
self->weights = (const double**) GPAW_MALLOC(double*, nweights);
self->nweights = nweights;
for (int iw = 0; iw < nweights; iw++)
{
coefs = (PyArrayObject*) PyList_GetItem(coefs_list, iw);
offsets = (PyArrayObject*) PyList_GetItem(offsets_list, iw);
weights = (PyArrayObject*) PyList_GetItem(weights_list, iw);
self->stencils[iw] = bmgs_stencil(PyArray_DIMS(coefs)[0], DOUBLEP(coefs),
LONGP(offsets), range, LONGP(size));
self->weights[iw] = DOUBLEP(weights);
}
const long (*nb)[2] = (const long (*)[2])LONGP(neighbors);
const long padding[3][2] = {{range, range},
{range, range},
{range, range}};
MPI_Comm comm = MPI_COMM_NULL;
if (comm_obj != Py_None)
comm = ((MPIObject*)comm_obj)->comm;
self->bc = bc_init(LONGP(size), padding, padding, nb, comm, real, cfd);
return (PyObject*) self;
}
gpaw-1.3.0-82cebebc037510d876f90d9f8d533fd021f751f5/c/xc/ 0000775 0000000 0000000 00000000000 13164413722 0020760 5 ustar 00root root 0000000 0000000 gpaw-1.3.0-82cebebc037510d876f90d9f8d533fd021f751f5/c/xc/ensemble_gga.c 0000664 0000000 0000000 00000003041 13164413722 0023532 0 ustar 00root root 0000000 0000000 /* Copyright (C) 2003-2007 CAMP
* Copyright (C) 2007-2009 CAMd
* Please see the accompanying LICENSE file for further information. */
#include
#include "xc_gpaw.h"
double beefvdw_exchange(const xc_parameters* par,
double n, double rs, double a2,
double* dedrs, double* deda2)
{
double e = C1 / rs;
*dedrs = -e / rs;
double c = C2 * rs / n;
c *= c;
double s2 = a2 * c;
/* Legendre polynomial basis expansion */
int parlen = par->nparameters-1;
double p = par->parameters[0];
double tmp = p + s2;
double x = 2.0 * s2 / tmp - 1.0;
double dxds2 = 2.0 * p / pow(tmp,2);
double Fx = 0.0;
double dFxds2 = 0.0;
int max_order = par->parameters[parlen+1];
double L[max_order+1];
double dL[max_order+1];
double coef;
int m;
int order;
/* initializing */
L[0] = 1.0;
L[1] = x;
dL[0] = 0.0;
dL[1] = 1.0;
/* recursively building polynomia and their derivatives */
for(int i = 2; i < max_order+1; i++)
{
L[i] = 2.0 * x * L[i-1] - L[i-2] - (x * L[i-1] - L[i-2])/i;
dL[i] = i * L[i-1] + x * dL[i-1];
}
/* building enhancement factor Fx and derivative dFxds2 */
m = 0;
for(int i = 0; i < max_order+1; i++)
{
order = par->parameters[2+m];
if(order == i)
{
coef = par->parameters[2+parlen+m];
Fx += coef * L[i];
dFxds2 += coef * dL[i] * dxds2;
m += 1;
}
}
double ds2drs = 8.0 * c * a2 / rs;
*dedrs = *dedrs * Fx + e * dFxds2 * ds2drs;
*deda2 = e * dFxds2 * c;
e *= Fx;
return e;
}
gpaw-1.3.0-82cebebc037510d876f90d9f8d533fd021f751f5/c/xc/libvdwxc.c 0000664 0000000 0000000 00000015531 13164413722 0022753 0 ustar 00root root 0000000 0000000 #ifdef GPAW_WITH_LIBVDWXC
#include "../extensions.h"
#ifdef PARALLEL
#include
#include "../mympi.h"
#include
#else
#include
#endif
// Our heinous plan is to abuse a numpy array so that it will contain a pointer to the vdwxc_data.
// This is because PyCapsules are not there until Python 3.1/2.7.
// This function takes an array and returns the pointer it so outrageously contains.
vdwxc_data* unpack_vdwxc_pointer(PyObject* vdwxc_obj)
{
vdwxc_data* vdw = (vdwxc_data *)PyArray_DATA((PyArrayObject *)vdwxc_obj);
return vdw;
}
PyObject* libvdwxc_has(PyObject* self, PyObject* args)
{
char* name;
if(!PyArg_ParseTuple(args, "s", &name)) {
return NULL;
}
int val;
if(strcmp("mpi", name) == 0) {
val = vdwxc_has_mpi();
} else if(strcmp("pfft", name) == 0) {
val = vdwxc_has_pfft();
} else {
return NULL;
}
PyObject* pyval = val ? Py_True : Py_False;
Py_INCREF(pyval);
return pyval;
}
PyObject* libvdwxc_create(PyObject* self, PyObject* args, PyObject* kwargs)
{
PyObject* vdwxc_obj;
int vdwxc_code;
int nspins;
int Nx, Ny, Nz;
double C00, C10, C20, C01, C11, C21, C02, C12, C22;
if(!PyArg_ParseTuple(args, "Oii(iii)(ddddddddd)",
&vdwxc_obj,
&vdwxc_code, // functional identifier
&nspins,
&Nx, &Ny, &Nz, // number of grid points
&C00, &C10, &C20, // 3x3 cell
&C01, &C11,&C21,
&C02, &C12, &C22)) {
return NULL;
}
vdwxc_data vdw;
if(nspins == 1) {
vdw = vdwxc_new(vdwxc_code);
} else if(nspins == 2) {
#ifdef VDWXC_HAS_SPIN
vdw = vdwxc_new_spin(vdwxc_code);
#else
PyErr_SetString(PyExc_ImportError, "this version of libvdwxc has no spin support");
return NULL;
#endif
} else {
PyErr_SetString(PyExc_ValueError, "nspins must be 1 or 2");
return NULL;
}
vdwxc_data* vdwxc_ptr = unpack_vdwxc_pointer(vdwxc_obj);
vdwxc_ptr[0] = vdw;
vdwxc_set_unit_cell(vdw, Nx, Ny, Nz, C00, C10, C20, C01, C11, C21, C02, C12, C22);
Py_RETURN_NONE;
}
PyObject* libvdwxc_init_serial(PyObject* self, PyObject* args)
{
PyObject* vdwxc_obj;
if(!PyArg_ParseTuple(args, "O", &vdwxc_obj)) {
return NULL;
}
vdwxc_data* vdw = unpack_vdwxc_pointer(vdwxc_obj);
vdwxc_init_serial(*vdw);
Py_RETURN_NONE;
}
PyObject* libvdwxc_calculate(PyObject* self, PyObject* args)
{
PyObject *vdwxc_obj;
PyArrayObject *rho_obj, *sigma_obj, *dedn_obj, *dedsigma_obj;
if(!PyArg_ParseTuple(args, "OOOOO",
&vdwxc_obj, &rho_obj, &sigma_obj,
&dedn_obj, &dedsigma_obj)) {
return NULL;
}
vdwxc_data* vdw = unpack_vdwxc_pointer(vdwxc_obj);
int nspins = PyArray_DIM(rho_obj, 0);
double energy;
if (nspins == 1) {
double* rho_g = (double*)PyArray_DATA(rho_obj);
double* sigma_g = (double*)PyArray_DATA(sigma_obj);
double* dedn_g = (double*)PyArray_DATA(dedn_obj);
double* dedsigma_g = (double*)PyArray_DATA(dedsigma_obj);
energy = vdwxc_calculate(*vdw, rho_g, sigma_g, dedn_g, dedsigma_g);
} else if (nspins == 2) {
// We actually only need two sigmas/dedsigmas.
// The third one came along because that's what usually happens,
// but we could save it entirely.
assert(PyArray_DIM(sigma_obj, 0) == 3);
assert(PyArray_DIM(dedn_obj, 0) == 2);
assert(PyArray_DIM(dedsigma_obj, 0) == 3);
#ifdef VDWXC_HAS_SPIN
energy = vdwxc_calculate_spin(*vdw,
(double*)PyArray_GETPTR1(rho_obj, 0),
(double*)PyArray_GETPTR1(rho_obj, 1),
(double*)PyArray_GETPTR1(sigma_obj, 0),
(double*)PyArray_GETPTR1(sigma_obj, 2),
(double*)PyArray_GETPTR1(dedn_obj, 0),
(double*)PyArray_GETPTR1(dedn_obj, 1),
(double*)PyArray_GETPTR1(dedsigma_obj, 0),
(double*)PyArray_GETPTR1(dedsigma_obj, 2));
#else
return NULL;
#endif
} else {
PyErr_SetString(PyExc_ValueError, "Expected 1 or 2 spins");
return NULL;
}
return Py_BuildValue("d", energy);
}
PyObject* libvdwxc_tostring(PyObject* self, PyObject* args)
{
PyObject *vdwxc_obj;
if(!PyArg_ParseTuple(args, "O", &vdwxc_obj)) {
return NULL;
}
vdwxc_data* vdw = unpack_vdwxc_pointer(vdwxc_obj);
int maxlen = 80 * 200; // up to a few hundred lines
char str[maxlen];
vdwxc_tostring(*vdw, maxlen, str);
return Py_BuildValue("s", str);
}
PyObject* libvdwxc_free(PyObject* self, PyObject* args)
{
PyObject* vdwxc_obj;
if(!PyArg_ParseTuple(args, "O", &vdwxc_obj)) {
return NULL;
}
vdwxc_data* vdw = unpack_vdwxc_pointer(vdwxc_obj);
vdwxc_finalize(vdw);
Py_RETURN_NONE;
}
#ifdef PARALLEL
MPI_Comm unpack_gpaw_comm(PyObject* gpaw_mpi_obj)
{
MPIObject* gpaw_comm = (MPIObject *)gpaw_mpi_obj;
return gpaw_comm->comm;
}
#endif
PyObject* error_parallel_support(void)
{
// Not a true import error, but pretty close.
#ifndef PARALLEL
PyErr_SetString(PyExc_ImportError,
"GPAW not compiled in parallel");
#endif
#ifndef VDWXC_HAS_MPI
PyErr_SetString(PyExc_ImportError,
"libvdwxc not compiled in parallel. Recompile libvdwxc with --with-mpi");
#endif
return NULL;
}
PyObject* libvdwxc_init_mpi(PyObject* self, PyObject* args)
{
PyObject* vdwxc_obj;
PyObject* gpaw_comm_obj;
if(!PyArg_ParseTuple(args, "OO", &vdwxc_obj, &gpaw_comm_obj)) {
return NULL;
}
if(!vdwxc_has_mpi()) {
PyErr_SetString(PyExc_ImportError, "libvdwxc not compiled with MPI.");
return NULL;
}
#if defined(PARALLEL) && defined(VDWXC_HAS_MPI)
vdwxc_data* vdw = unpack_vdwxc_pointer(vdwxc_obj);
MPI_Comm comm = unpack_gpaw_comm(gpaw_comm_obj);
vdwxc_init_mpi(*vdw, comm);
Py_RETURN_NONE;
#else
return error_parallel_support();
#endif
}
PyObject* libvdwxc_init_pfft(PyObject* self, PyObject* args)
{
PyObject* vdwxc_obj;
PyObject* gpaw_comm_obj;
int nproc1, nproc2;
if(!PyArg_ParseTuple(args, "OOii", &vdwxc_obj, &gpaw_comm_obj, &nproc1, &nproc2)) {
return NULL;
}
if(!vdwxc_has_pfft()) {
PyErr_SetString(PyExc_ImportError, "libvdwxc not compiled with PFFT.");
return NULL;
}
#if defined(PARALLEL)
vdwxc_data* vdw = unpack_vdwxc_pointer(vdwxc_obj);
MPI_Comm comm = unpack_gpaw_comm(gpaw_comm_obj);
vdwxc_init_pfft(*vdw, comm, nproc1, nproc2);
Py_RETURN_NONE;
#else
return error_parallel_support();
#endif
}
#endif // gpaw_with_libvdwxc
gpaw-1.3.0-82cebebc037510d876f90d9f8d533fd021f751f5/c/xc/libxc.c 0000664 0000000 0000000 00000073335 13164413722 0022240 0 ustar 00root root 0000000 0000000 /* Copyright (C) 2003-2007 CAMP
* Copyright (C) 2007-2008 CAMd
* Please see the accompanying LICENSE file for further information. */
#include
#define PY_ARRAY_UNIQUE_SYMBOL GPAW_ARRAY_API
#define NO_IMPORT_ARRAY
#include
#include
#include
#include "xc_gpaw.h"
#include "../extensions.h"
typedef struct
{
PyObject_HEAD
/* exchange-correlation energy second derivatives */
void (*get_fxc)(XC(func_type) *func, double point[7], double der[5][5]);
XC(func_type) xc_functional;
XC(func_type) x_functional;
XC(func_type) c_functional;
XC(func_type) *functional[2]; /* store either x&c, or just xc */
int nspin; /* must be common to x and c, so declared redundantly here */
} lxcXCFunctionalObject;
void XC(lda_fxc_fd)(const XC(func_type) *p, const double *rho, double *fxc);
/* a general call for an LDA functional - finite difference */
void get_fxc_fd_lda(XC(func_type) *func, double point[7], double der[5][5])
{
double v2rho2[3], v2rhosigma[6], v2sigma2[6];
for(int i=0; i<3; i++) v2rho2[i] = 0.0;
for(int i=0; i<6; i++){
v2rhosigma[i] = 0.0;
v2sigma2[i] = 0.0;
}
XC(lda_fxc_fd)(func, point, v2rho2);
der[0][0] = v2rho2[0];
der[0][1] = der[1][0] = v2rho2[1];
der[1][1] = v2rho2[2];
der[0][2] = der[2][0] = v2rhosigma[0];
der[0][3] = der[3][0] = v2rhosigma[1];
der[0][4] = der[4][0] = v2rhosigma[2];
der[1][2] = der[2][1] = v2rhosigma[3];
der[1][3] = der[3][1] = v2rhosigma[4];
der[1][4] = der[4][1] = v2rhosigma[5];
der[2][2] = v2sigma2[0];
der[2][3] = der[3][2] = v2sigma2[1];
der[2][4] = der[4][2] = v2sigma2[2];
der[3][3] = v2sigma2[3];
der[3][4] = der[4][3] = v2sigma2[4];
der[4][4] = v2sigma2[5];
}
// finite difference calculation of second functional derivative
// stolen from libxc/testsuite/xc-consistency.c
double get_point(XC(func_type) *func, double point[7], double *e, double der[5], int which)
{
const int np = 1;
switch(func->info->family)
{
case XC_FAMILY_LDA:
XC(lda_exc_vxc)(func, np, &(point[0]), e, &(der[0]));
break;
case XC_FAMILY_GGA:
case XC_FAMILY_HYB_GGA:
XC(gga_exc_vxc)(func, np, &(point[0]), &(point[2]),
e, &(der[0]), &(der[2]));
break;
}
if(which == 0)
return (*e)*(point[0] + point[1]);
else
return der[which-1];
}
void first_derivative(XC(func_type) *func, double point[7], double der[5], int which,
int nspin)
{
int i;
for(i=0; i<5; i++){
const double delta = 5e-10;
double dd, p[5], v[5];
int j;
if(nspin==1 && (i!=0 && i!=2)){
der[i] = 0.0;
continue;
}
dd = point[i]*delta;
if(dd < delta) dd = delta;
for(j=0; j<5; j++) p[j] = point[j];
if(point[i]>=3.0*dd){ /* centered difference */
double e, em1, em2, ep1, ep2;
p[i] = point[i] + dd;
ep1 = get_point(func, p, &e, v, which);
p[i] = point[i] + 2*dd;
ep2 = get_point(func, p, &e, v, which);
p[i] = point[i] - dd; /* backward point */
em1 = get_point(func, p, &e, v, which);
p[i] = point[i] - 2*dd; /* backward point */
em2 = get_point(func, p, &e, v, which);
der[i] = 1.0/2.0*(ep1 - em1);
der[i] += 1.0/12.0*(em2 - 2*em1 + 2*ep1 - ep2);
der[i] /= dd;
}else{ /* we use a 5 point forward difference */
double e, e1, e2, e3, e4, e5;
p[i] = point[i];
e1 = get_point(func, p, &e, v, which);
p[i] = point[i] + dd;
e2 = get_point(func, p, &e, v, which);
p[i] = point[i] + 2.0*dd;
e3 = get_point(func, p, &e, v, which);
p[i] = point[i] + 3.0*dd;
e4 = get_point(func, p, &e, v, which);
p[i] = point[i] + 4.0*dd;
e5 = get_point(func, p, &e, v, which);
der[i] = (-e1 + e2);
der[i] -= 1.0/2.0*( e1 - 2*e2 + e3);
der[i] += 1.0/3.0*(-e1 + 3*e2 - 3*e3 + e4);
der[i] -= 1.0/4.0*( e1 - 4*e2 + 6*e3 - 4*e4 + e5);
der[i] /= dd;
}
}
}
void first_derivative_spinpaired(XC(func_type) *func, double point[7], double der[5],
int which)
{
first_derivative(func, point, der, which, XC_UNPOLARIZED);
}
void first_derivative_spinpolarized(XC(func_type) *func, double point[7], double der[5],
int which)
{
first_derivative(func, point, der, which, XC_POLARIZED);
}
void second_derivatives_spinpaired(XC(func_type) *func, double point[7], double der[5][5])
{
int i;
for(i=0; i<5; i++){
first_derivative_spinpaired(func, point, der[i], i+1);
}
}
void second_derivatives_spinpolarized(XC(func_type) *func, double point[7], double der[5][5])
{
int i;
for(i=0; i<5; i++){
first_derivative_spinpolarized(func, point, der[i], i+1);
}
}
/* a general call for a functional - finite difference */
void get_fxc_fd_spinpaired(XC(func_type) *func, double point[7], double der[5][5])
{
second_derivatives_spinpaired(func, point, der);
}
/* a general call for a functional - finite difference */
void get_fxc_fd_spinpolarized(XC(func_type) *func, double point[7], double der[5][5])
{
second_derivatives_spinpolarized(func, point, der);
}
static void lxcXCFunctional_dealloc(lxcXCFunctionalObject *self)
{
for (int i=0; i<2; i++)
if (self->functional[i] != NULL) xc_func_end(self->functional[i]);
PyObject_DEL(self);
}
static PyObject*
lxcXCFunctional_is_gga(lxcXCFunctionalObject *self, PyObject *args)
{
int success = 0; /* assume functional is not GGA */
// check family of most-complex functional
if (self->functional[0]->info->family == XC_FAMILY_GGA ||
self->functional[0]->info->family == XC_FAMILY_HYB_GGA) success = XC_FAMILY_GGA;
return Py_BuildValue("i", success);
}
static PyObject*
lxcXCFunctional_is_mgga(lxcXCFunctionalObject *self, PyObject *args)
{
int success = 0; /* assume functional is not MGGA */
// check family of most-complex functional
if (self->functional[0]->info->family == XC_FAMILY_MGGA) success = XC_FAMILY_MGGA;
return Py_BuildValue("i", success);
}
static PyObject*
lxcXCFunctional_CalculateFXC_FD_SpinPaired(lxcXCFunctionalObject *self, PyObject *args)
{
PyArrayObject* n_array; /* rho */
PyArrayObject* v2rho2_array; /* d2E/drho2 */
PyArrayObject* a2_array = 0; /* |nabla rho|^2*/
PyArrayObject* v2rhosigma_array = 0; /* d2E/drhod|nabla rho|^2 */
PyArrayObject* v2sigma2_array = 0; /* d2E/drhod|nabla rho|^2 */
if (!PyArg_ParseTuple(args, "OO|OOO", &n_array, &v2rho2_array, /* object | optional objects*/
&a2_array, &v2rhosigma_array, &v2sigma2_array))
return NULL;
/* find nspin */
int nspin = self->nspin;
assert(nspin == XC_UNPOLARIZED); /* we are spinpaired */
assert (self->functional[0]->info->family != XC_FAMILY_MGGA);
int ng = PyArray_DIMS(n_array)[0]; /* number of grid points */
const double* n_g = DOUBLEP(n_array); /* density on the grid */
double* v2rho2_g = DOUBLEP(v2rho2_array); /* v on the grid */
const double* a2_g = 0; /* a2 on the grid */
double* v2rhosigma_g = 0; /* d2Ednda2 on the grid */
double* v2sigma2_g = 0; /* d2Eda2da2 on the grid */
if ((self->functional[0]->info->family == XC_FAMILY_GGA) ||
(self->functional[0]->info->family == XC_FAMILY_HYB_GGA))
{
a2_g = DOUBLEP(a2_array);
v2rhosigma_g = DOUBLEP(v2rhosigma_array);
v2sigma2_g = DOUBLEP(v2sigma2_array);
}
self->get_fxc = get_fxc_fd_spinpaired;
/* ################################################################ */
for (int g = 0; g < ng; g++)
{
double n = n_g[g];
if (n < NMIN)
n = NMIN;
double a2 = 0.0; /* initialize for lda */
if ((self->functional[0]->info->family == XC_FAMILY_GGA) ||
(self->functional[0]->info->family == XC_FAMILY_HYB_GGA))
{
a2 = a2_g[g];
}
double point[7]; /* generalized point */
// from http://www.tddft.org/programs/octopus/wiki/index.php/Libxc:manual
// rhoa rhob sigmaaa sigmaab sigmabb taua taub
// \sigma[0] = \nabla n_\uparrow \cdot \nabla n_\uparrow \qquad
// \sigma[1] = \nabla n_\uparrow \cdot \nabla n_\downarrow \qquad
// \sigma[2] = \nabla n_\downarrow \cdot \nabla n_\downarrow \qquad
double derivative[5][5]; /* generalized derivative */
double v2rho2[3];
double v2rhosigma[6];
double v2sigma2[6];
// one that uses this: please add description of spin derivative order notation
// (see c/libxc/src/gga_perdew.c) MDTMP
for(int i=0; i<3; i++) v2rho2[i] = 0.0;
for(int i=0; i<6; i++){
v2rhosigma[i] = 0.0;
v2sigma2[i] = 0.0;
}
for(int j=0; j<7; j++)
{
point[j] = 0.0;
}
for(int i=0; i<5; i++)
{
for(int j=0; j<5; j++)
{
derivative[i][j] = 0.0;
}
}
point[0] = n; /* -> rho */
point[2] = a2; /* -> sigma */
for (int i=0; i<2; i++) {
XC(func_type) *func = self->functional[i];
if (func == NULL) continue;
self->get_fxc(func, point, derivative);
v2rho2[0] = derivative[0][0];
v2rho2[1] = derivative[0][1]; // XC_POLARIZED
v2rho2[2] = derivative[1][1]; // XC_POLARIZED
v2rhosigma[0] = derivative[0][2];
v2rhosigma[1] = derivative[0][3]; // XC_POLARIZED
v2rhosigma[2] = derivative[0][4]; // XC_POLARIZED
v2rhosigma[3] = derivative[1][2]; // XC_POLARIZED
v2rhosigma[4] = derivative[1][3]; // XC_POLARIZED
v2rhosigma[5] = derivative[1][4]; // XC_POLARIZED
v2sigma2[0] = derivative[2][2]; /* aa_aa */
v2sigma2[1] = derivative[2][3]; // XC_POLARIZED /* aa_ab */
v2sigma2[2] = derivative[2][4]; // XC_POLARIZED /* aa_bb */
v2sigma2[3] = derivative[3][3]; // XC_POLARIZED /* ab_ab */
v2sigma2[4] = derivative[3][4]; // XC_POLARIZED /* ab_bb */
v2sigma2[5] = derivative[4][4]; // XC_POLARIZED /* bb_bb */
switch(func->info->family)
{
case XC_FAMILY_HYB_GGA:
case XC_FAMILY_GGA:
v2rhosigma_g[g] += v2rhosigma[0];
v2sigma2_g[g] += v2sigma2[0];
// don't break here since we need LDA values as well
case XC_FAMILY_LDA:
v2rho2_g[g] += v2rho2[0];
}
}
}
Py_RETURN_NONE;
}
// Below are changes made by cpo@slac.stanford.edu for libxc 1.2.0
// which allows passing of arrays of points to libxc routines.
// The fundamental design idea (to try to minimize code-duplication) is that
// all libxc routines have input/output arrays that get processed in
// common ways with three special exceptions: n_sg, e_g, dedn_sg. The
// struct "xcptrlist" is used to keep track of these pointers.
// Two libxc features prevent us from using a straightforward
// interface:
// 1) libxc calls memset(0) on output arrays, preventing us
// from adding x/c contributions "in place" without scratch arrays
// 2) for spin-polarized calculations libxc wants spin indices to be
// dense in memory, whereas GPAW probably loops over grid indices
// more often, so we want to keep those dense in memory.
// I asked Miguel Marques to remove the memset, and to add a "stride"
// argument to libxc routines to address the above. He says he will
// consider it in the future. In the meantime we have to "block"
// over gridpoints using some scratch memory.
// What is supported:
// - combined xc-functional mode
// - separate x,c functionals.
// - separate x,c can have differing complexities (e.g. one GGA, one LDA)
// - "exc_vxc" style routines for LDA/GGA/MGGA both unpolarized/polarized
// - "fxc" style routines for LDA/GGA both unpolarized/polarized
// To support a libxc routine other than exc_vxc/fxc one needs to
// copy a "Calculate" routine and change the pointer list setup, and
// associated libxc function calls.
// number of gridpoints we will "block" over when doing xc calculation
#define BLOCKSIZE 1024
// this is the maximum number of BLOCKSIZE arrays that will be put
// into scratch (depends on the "spinsize" values for the various
// arrays. currently determined by fxc, which has input spinsizes
// of 2+3 and output spinsizes of 3+6+6 (totalling 20).
#define MAXARRAYS 20
#define LIBXCSCRATCHSIZE (BLOCKSIZE*MAXARRAYS)
static double *scratch=NULL;
// we don't use lapl, but libxc needs space for them.
static double *scratch_lapl=NULL;
static double *scratch_vlapl=NULL;
// special cases for array behaviors:
// flag to indicate we need to add to existing values for dedn_sg
#define DEDN_SG 1
// flag to indicate we need to apply NMIN cutoff to n_sg
#define N_SG 2
// flag to indicate we need to multiply by density for e_g
#define E_G 4
typedef struct xcptr {
double *p;
int special;
int spinsize;
} xcptr;
#define MAXPTR 10
typedef struct xcptrlist {
int num;
xcptr p[MAXPTR];
} xcptrlist;
typedef struct xcinfo {
int nspin;
bool spinpolarized;
int ng;
} xcinfo;
// these 3 functions make the spin index closest in memory ("gather") or the
// farthest apart in memory ("scatter"). "scatteradd" adds to previous results.
static void gather(const double* src, double* dst, int np, int stride, int nspins) {
const double *dstend = dst+np*nspins;
const double *srcend = src+nspins*stride;
do {
const double *s = src;
do {
*dst++ = *s; s+=stride;
} while (snum; i++) {
inblocklist[i] = next;
next+=blocksize*inlist->p[i].spinsize;
}
for (int i=0; inum; i++) {
outblocklist[i] = next;
next+=blocksize*outlist->p[i].spinsize;
}
// check that we fit in the scratch space
// if we don't, then we need to increase MAXARRAY
assert((next - scratch) <= LIBXCSCRATCHSIZE);
}
// copy a piece of the full data into the block for processing by libxc
static void data2block(const xcinfo *info,
const xcptrlist *inlist, double *inblocklist[],
int blocksize) {
// copy data into the block, taking into account special cases
for (int i=0; inum; i++) {
double *ptr = inlist->p[i].p; double* block = inblocklist[i];
if (info->spinpolarized) {
gather(ptr,block,blocksize,info->ng,inlist->p[i].spinsize);
if (inlist->p[i].special&N_SG)
for (int i=0; ip[i].special&N_SG) for (int i=0; inum; i++) {
double *ptr = outlist->p[i].p; double* block = outblocklist[i];
if (outlist->p[i].special&E_G) {
if (info->spinpolarized) {
for (int i=0; ip[i].special&DEDN_SG) {
if (info->spinpolarized) {
scatteradd(block,ptr,blocksize,info->ng,outlist->p[i].spinsize); // need to add to pre-existing values
} else {
for (int i=0; ispinpolarized) {
scatter(block,ptr,blocksize,info->ng,outlist->p[i].spinsize);
} else {
memcpy(ptr,block,blocksize*sizeof(double));
}
}
}
}
// copy the data from the block back into its final resting place, but add to previous results
static void block2dataadd(const xcinfo *info, double *outblocklist[], const xcptrlist *outlist,
const double *n_sg, int blocksize, int noutcopy) {
for (int i=0; ip[i].p; double* block = outblocklist[i];
if (outlist->p[i].special&E_G) {
if (info->spinpolarized) {
for (int i=0; ispinpolarized) {
scatteradd(block,ptr,blocksize,info->ng,outlist->p[i].spinsize);
} else {
for (int i=0; inspin;
info.spinpolarized = (info.nspin==2);
info.ng = PyArray_DIMS(py_e_g)[0];
xcptrlist inlist,outlist;
inlist.num=0;
outlist.num=0;
int blocksize = BLOCKSIZE;
int remaining = info.ng;
// setup pointers using most complex functional
switch(self->functional[0]->info->family)
{
case XC_FAMILY_MGGA:
inlist.p[2].p = DOUBLEP(py_tau_sg);
inlist.p[2].special = 0;
inlist.p[2].spinsize = 2;
inlist.num++;
outlist.p[3].p = DOUBLEP(py_dedtau_sg);
outlist.p[3].special = 0;
outlist.p[3].spinsize = 2;
outlist.num++;
// don't break here since MGGA also needs GGA ptrs
case XC_FAMILY_HYB_GGA:
case XC_FAMILY_GGA:
inlist.p[1].p = DOUBLEP(py_sigma_xg);
inlist.p[1].special = 0;
inlist.p[1].spinsize = 3;
inlist.num++;
outlist.p[2].p = DOUBLEP(py_dedsigma_xg);
outlist.p[2].special = 0;
outlist.p[2].spinsize = 3;
outlist.num++;
// don't break here since GGA also needs LDA ptrs
case XC_FAMILY_LDA:
inlist.p[0].p = DOUBLEP(py_n_sg);
inlist.p[0].special = N_SG;
inlist.p[0].spinsize = 2;
inlist.num += 1;
outlist.p[0].p = DOUBLEP(py_e_g);
outlist.p[0].special = E_G;
outlist.p[0].spinsize = 1;
outlist.p[1].p = DOUBLEP(py_dedn_sg);
outlist.p[1].special = DEDN_SG;
outlist.p[1].spinsize = 2;
outlist.num += 2;
}
assert(inlist.num < MAXPTR);
assert(outlist.num < MAXPTR);
double *inblock[MAXPTR];
double *outblock[MAXPTR];
setupblockptrs(&info, &inlist, &outlist, &inblock[0], &outblock[0], blocksize);
do {
blocksize = blocksizefunctional[i] == NULL) continue;
XC(func_type) *func = self->functional[i];
int noutcopy=0;
switch(func->info->family)
{
case XC_FAMILY_LDA:
xc_lda_exc_vxc(func, blocksize, n_sg, e_g, dedn_sg);
noutcopy = 2; // potentially decrease the size for block2dataadd if second functional less complex.
break;
case XC_FAMILY_HYB_GGA:
case XC_FAMILY_GGA:
xc_gga_exc_vxc(func, blocksize,
n_sg, sigma_xg, e_g,
dedn_sg, dedsigma_xg);
noutcopy = 3; // potentially decrease the size for block2dataadd if second functional less complex.
break;
case XC_FAMILY_MGGA:
xc_mgga_exc_vxc(func, blocksize, n_sg, sigma_xg, scratch_lapl,
tau_sg, e_g, dedn_sg, dedsigma_xg, scratch_vlapl,
dedtau_sg);
noutcopy = 4; // potentially decrease the size for block2dataadd if second functional less complex.
break;
}
// if we have more than 1 functional, add results
// canonical example: adding "x" results to "c"
if (i==0)
block2data(&info, &outblock[0], &outlist, n_sg, blocksize);
else
block2dataadd(&info, &outblock[0], &outlist, n_sg, blocksize, noutcopy);
}
for (int i=0; i0);
Py_RETURN_NONE;
}
static PyObject*
lxcXCFunctional_CalculateFXC(lxcXCFunctionalObject *self, PyObject *args)
{
PyArrayObject* py_n_sg=NULL;
PyArrayObject* py_v2rho2_xg=NULL;
PyArrayObject* py_sigma_xg=NULL;
PyArrayObject* py_v2rhosigma_yg=NULL;
PyArrayObject* py_v2sigma2_yg=NULL;
if (!PyArg_ParseTuple(args, "OO|OOO", &py_n_sg, &py_v2rho2_xg,
&py_sigma_xg, &py_v2rhosigma_yg, &py_v2sigma2_yg))
return NULL;
xcinfo info;
info.nspin = self->nspin;
info.spinpolarized = (info.nspin==2);
info.ng = (info.spinpolarized) ? PyArray_DIMS(py_n_sg)[0]/2 : PyArray_DIMS(py_n_sg)[0];
xcptrlist inlist,outlist;
inlist.num=0;
outlist.num=0;
int blocksize = BLOCKSIZE;
int remaining = info.ng;
// setup pointers using most complex functional
switch(self->functional[0]->info->family)
{
case XC_FAMILY_MGGA:
// not supported
assert(self->functional[0]->info->family != XC_FAMILY_MGGA);
// don't break here since MGGA also needs GGA ptrs
case XC_FAMILY_HYB_GGA:
case XC_FAMILY_GGA:
inlist.p[1].p = DOUBLEP(py_sigma_xg);
inlist.p[1].special = 0;
inlist.p[1].spinsize = 3;
inlist.num++;
outlist.p[1].p = DOUBLEP(py_v2rhosigma_yg);
outlist.p[1].special = 0;
outlist.p[1].spinsize = 6;
outlist.p[2].p = DOUBLEP(py_v2sigma2_yg);
outlist.p[2].special = 0;
outlist.p[2].spinsize = 6;
outlist.num+=2;
// don't break here since GGA also needs LDA ptrs
case XC_FAMILY_LDA:
inlist.p[0].p = DOUBLEP(py_n_sg);
inlist.p[0].special = N_SG;
inlist.p[0].spinsize = 2;
inlist.num += 1;
outlist.p[0].p = DOUBLEP(py_v2rho2_xg);
outlist.p[0].special = 0;
outlist.p[0].spinsize = 3;
outlist.num++;
}
assert(inlist.num < MAXPTR);
assert(outlist.num < MAXPTR);
double *inblock[MAXPTR];
double *outblock[MAXPTR];
setupblockptrs(&info, &inlist, &outlist, &inblock[0], &outblock[0], blocksize);
do {
blocksize = blocksizefunctional[i] == NULL) continue;
XC(func_type) *func = self->functional[i];
int noutcopy=0;
switch(func->info->family)
{
case XC_FAMILY_LDA:
xc_lda_fxc(func, blocksize, n_sg, v2rho2);
noutcopy = 1; // potentially decrease the size for block2dataadd if second functional less complex.
break;
case XC_FAMILY_HYB_GGA:
case XC_FAMILY_GGA:
xc_gga_fxc(func, blocksize, n_sg, sigma_xg,
v2rho2, v2rhosigma, v2sigma2);
noutcopy = 3; // potentially decrease the size for block2dataadd if second functional less complex.
break;
case XC_FAMILY_MGGA:
// not supported by GPAW yet, so crash
assert (func->info->family!=XC_FAMILY_MGGA);
break;
}
// if we have more than 1 functional, add results
// canonical example: adding "x" results to "c"
if (i==0)
block2data(&info, &outblock[0], &outlist, n_sg, blocksize);
else
block2dataadd(&info, &outblock[0], &outlist, n_sg, blocksize, noutcopy);
}
for (int i=0; i0);
Py_RETURN_NONE;
}
static PyObject*
lxcXCFunctional_tb09(lxcXCFunctionalObject *self, PyObject *args)
{
double c;
PyArrayObject* n_g;
PyArrayObject* sigma_g;
PyArrayObject* lapl_g;
PyArrayObject* tau_g;
PyArrayObject* v_g;
PyArrayObject* vx_g; // for vsigma, vtau, vlapl
if (!PyArg_ParseTuple(args, "dOOOOOO",
&c, &n_g, &sigma_g, &lapl_g, &tau_g, &v_g, &vx_g))
return NULL;
xc_mgga_x_tb09_set_params(self->functional[0], c);
xc_mgga_vxc(self->functional[0], PyArray_DIM(n_g, 0),
PyArray_DATA(n_g),
PyArray_DATA(sigma_g),
PyArray_DATA(lapl_g),
PyArray_DATA(tau_g),
PyArray_DATA(v_g),
PyArray_DATA(vx_g),
PyArray_DATA(vx_g),
PyArray_DATA(vx_g));
Py_RETURN_NONE;
}
static PyMethodDef lxcXCFunctional_Methods[] = {
{"is_gga",
(PyCFunction)lxcXCFunctional_is_gga, METH_VARARGS, 0},
{"is_mgga",
(PyCFunction)lxcXCFunctional_is_mgga, METH_VARARGS, 0},
{"calculate_fxc_fd_spinpaired",
(PyCFunction)lxcXCFunctional_CalculateFXC_FD_SpinPaired, METH_VARARGS, 0},
{"calculate",
(PyCFunction)lxcXCFunctional_Calculate, METH_VARARGS, 0},
{"calculate_fxc_spinpaired",
(PyCFunction)lxcXCFunctional_CalculateFXC, METH_VARARGS, 0},
{"tb09",
(PyCFunction)lxcXCFunctional_tb09, METH_VARARGS, 0},
{NULL, NULL, 0, NULL}
};
PyTypeObject lxcXCFunctionalType = {
PyVarObject_HEAD_INIT(NULL, 0)
"lxcXCFunctional",
sizeof(lxcXCFunctionalObject),
0,
(destructor)lxcXCFunctional_dealloc,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,
"LibXCFunctional object",
0, 0, 0, 0, 0, 0,
lxcXCFunctional_Methods
};
PyObject * NewlxcXCFunctionalObject(PyObject *obj, PyObject *args)
{
int xc, x, c; /* functionals identifier number */
int nspin; /* XC_UNPOLARIZED or XC_POLARIZED */
if (!scratch) {
scratch = (double*)malloc(LIBXCSCRATCHSIZE*sizeof(double));
const int laplsize = BLOCKSIZE*sizeof(double)*2;
scratch_lapl = (double*)malloc(laplsize);
memset(scratch_lapl,0,laplsize);
scratch_vlapl = (double*)malloc(laplsize);
}
if (!PyArg_ParseTuple(args, "iiii", &xc, &x, &c, &nspin)) {
return NULL;
}
/* checking if the numbers xc x c are valid is done at python level */
lxcXCFunctionalObject *self = PyObject_NEW(lxcXCFunctionalObject,
&lxcXCFunctionalType);
if (self == NULL){
return NULL;
}
assert(nspin==XC_UNPOLARIZED || nspin==XC_POLARIZED);
self->nspin = nspin; /* must be common to x and c, so declared redundantly */
int number,family,familyx,familyc;
if (xc != -1) {
xc_family_from_id(xc,&family,&number);
assert (family != XC_FAMILY_UNKNOWN);
XC(func_init)(&self->xc_functional, xc, nspin);
self->functional[0]=&self->xc_functional;
self->functional[1]=NULL;
} else {
assert (x!=-1 || c!=-1);
if (x!=-1) {
xc_family_from_id(x,&familyx,&number);
assert (familyx != XC_FAMILY_UNKNOWN);
XC(func_init)(&self->x_functional, x, nspin);
}
if (c!=-1) {
xc_family_from_id(c,&familyc,&number);
assert (familyc != XC_FAMILY_UNKNOWN);
XC(func_init)(&self->c_functional, c, nspin);
}
if (x!=-1 && c!=-1) {
/* put most complex functional first */
/* important for later loops over functionals */
if (familyx == XC_FAMILY_MGGA) {
self->functional[0]=&self->x_functional;
self->functional[1]=&self->c_functional;
} else if (familyc == XC_FAMILY_MGGA) {
self->functional[0]=&self->c_functional;
self->functional[1]=&self->x_functional;
} else if (familyx == XC_FAMILY_GGA || familyx == XC_FAMILY_HYB_GGA) {
self->functional[0]=&self->x_functional;
self->functional[1]=&self->c_functional;
} else {
// either c is GGA, or both are LDA (so don't care)
self->functional[0]=&self->c_functional;
self->functional[1]=&self->x_functional;
}
} else if (x!=-1) {
self->functional[0]=&self->x_functional;
self->functional[1]=NULL;
} else if (c!=-1) {
self->functional[0]=&self->c_functional;
self->functional[1]=NULL;
}
}
return (PyObject*)self;
}
PyObject * lxcXCFuncNum(PyObject *obj, PyObject *args)
{
char *funcname;
if (!PyArg_ParseTuple(args, "s", &funcname)) {
return NULL;
}
int num = XC(functional_get_number)(funcname);
if (num != -1)
return Py_BuildValue("i",num);
else
Py_RETURN_NONE;
}
gpaw-1.3.0-82cebebc037510d876f90d9f8d533fd021f751f5/c/xc/m06l.c 0000664 0000000 0000000 00000056363 13164413722 0021717 0 ustar 00root root 0000000 0000000 /************************************************************************
Implements Zhao, Truhlar
Meta-gga M06-Local
Correlation part
************************************************************************/
#include
#include
#include
#include "xc_mgga.h"
typedef struct m06l_params {
common_params common; // needs to be at the beginning of every functional_params
XC(func_type) *c_aux;
XC(func_type) *x_aux;
} m06l_params;
/* derivatives of x and z with respect to rho, grho and tau*/
static void
c_m06l_zx(double x, double z, double rho, double tau, double *dxdd, double *dxdgd, double *dzdd, double *dzdtau)
{
*dxdd = -8./3. * x * 1/rho;
*dxdgd = 1./pow(rho,8./3.);
*dzdd = -5./3. * 2 * tau/pow(rho, 8./3.);
*dzdtau = 2./pow(rho, 5./3.);
}
/* Get g for Eq. (13)*/
static void
c_m06_13(double *x, double *rho, double *g_ab, double *dg_abdd, double *dg_abdgd)
{
/*define the C_ab,i */
static double c_ab0= 0.6042374, c_ab1= 177.6783, c_ab2= -251.3252, c_ab3=76.35173, c_ab4=-12.55699;
double gammaCab = 0.0031 ;
double x_ab, a;
double dg_abdx, dxdd_a, dxdgd_a, dzdd_a, dzdtau_a;
double dxdd_b, dxdgd_b, dzdd_b, dzdtau_b;
/*x = x_ba^2 = x_a^2+x_b^2*/
x_ab = x[0] + x[1];
a= (gammaCab*x_ab/(1+gammaCab*x_ab));
*g_ab = c_ab0*pow(a,0)+ c_ab1*pow(a,1)+ c_ab2*pow(a,2)+c_ab3*pow(a,3)+c_ab4*pow(a,4);
double dadx = gammaCab/pow(1+gammaCab*x_ab, 2.);
dg_abdx = (0.0*c_ab0*pow(a,-1)+ 1.*c_ab1*pow(a,0)+ 2.*c_ab2*pow(a,1)+3.*c_ab3*pow(a,2)+4.*c_ab4*pow(a,3))*dadx;
c_m06l_zx(x[0], 0.0, rho[0], 0.0, &dxdd_a, &dxdgd_a, &dzdd_a, &dzdtau_a);
c_m06l_zx(x[1], 0.0, rho[1], 0.0, &dxdd_b, &dxdgd_b, &dzdd_b, &dzdtau_b);
dg_abdd[0] = dg_abdx*dxdd_a;
dg_abdd[1] = dg_abdx*dxdd_b;
dg_abdgd[0] = dg_abdx*dxdgd_a;
dg_abdgd[1] = 0.0;
dg_abdgd[2] = dg_abdx*dxdgd_b;
}
/* Get g for Eq. (15)*/
static void
c_m06_15(double x, double rho, double *g_ss, double *dg_ssdd, double *dg_ssdgd)
{
/*define the C_ss,i */
static double c_ss0=0.5349466, c_ss1=0.5396620, c_ss2=-31.61217, c_ss3= 51.49592, c_ss4=-29.19613;
double gammaCss = 0.06 ;
double a;
double dg_ssdx, dxdd, dxdgd, dzdd, dzdtau;
/*x = x_a^2 */
a= (gammaCss*x/(1+gammaCss*x));
*g_ss = c_ss0*pow(a,0)+ c_ss1*pow(a,1)+ c_ss2*pow(a,2)+c_ss3*pow(a,3)+c_ss4*pow(a,4);
double dadx = gammaCss/pow(1+gammaCss*x, 2.);
dg_ssdx = (0.0*c_ss0*pow(a,-1)+ 1.*c_ss1*pow(a,0)+ 2.*c_ss2*pow(a,1)+3.*c_ss3*pow(a,2)+4.*c_ss4*pow(a,3))*dadx;
c_m06l_zx(x, 0.0, rho, 0.0, &dxdd, &dxdgd, &dzdd, &dzdtau);
*dg_ssdd = dg_ssdx*dxdd;
*dg_ssdgd = dg_ssdx*dxdgd;
/*printf("g_ss %19.12f\n", *g_ss);*/
}
/* Get h_ab for Eq. (12)*/
static
void c_m06l_hab(double *x, double *z, double *rho, double *tau, double *h_ab, double *dh_abdd, double *dh_abdgd, double *dh_abdtau)
{
/* define the d_ab,i for Eq. (12)*/
static double d_ab0= 0.3957626, d_ab1= -0.5614546, d_ab2= 0.01403963, d_ab3= 0.0009831442, d_ab4= -0.003577176;
double alpha_ab = 0.00304966;
double hab1, dhabdd1[2], dhabdgd1[3], dhabdtau1[2];
double x_ab, z_ab, gamma, xgamma, zgamma;
double dgammadx, dgammadz;
double dgammadd_a, dgammadgd_a, dgammadtau_a;
double dgammadd_b, dgammadgd_b, dgammadtau_b;
double dxdd_a, dxdgd_a, dzdd_a, dzdtau_a;
double dxdd_b, dxdgd_b, dzdd_b, dzdtau_b;
x_ab = x[0] + x[1];
z_ab = z[0] + z[1];
gamma = 1 + alpha_ab*(x_ab + z_ab);
{ /* derivatives of gamma with respect to x and z*/
dgammadx = alpha_ab;
dgammadz = alpha_ab;
}
c_m06l_zx(x[0], z[0], rho[0], tau[0], &dxdd_a, &dxdgd_a, &dzdd_a, &dzdtau_a);
c_m06l_zx(x[1], z[1], rho[1], tau[1], &dxdd_b, &dxdgd_b, &dzdd_b, &dzdtau_b);
{ /*derivatives of gamma with respect to density, gradient and kietic energy*/
dgammadd_a = dgammadx * dxdd_a + dgammadz * dzdd_a;
dgammadd_b = dgammadx * dxdd_b + dgammadz * dzdd_b;
dgammadgd_a = dgammadx * dxdgd_a;
dgammadgd_b = dgammadx * dxdgd_b;
dgammadtau_a = dgammadz * dzdtau_a;
dgammadtau_b = dgammadz * dzdtau_b;
}
xgamma = x_ab/gamma;
zgamma = z_ab/gamma;
/* we initialize h and collect the terms*/
hab1 = 0.0;
dhabdd1[0] = dhabdd1[1] = 0.0;
dhabdgd1[0] = dhabdgd1[1] = dhabdgd1[2] = 0.0;
dhabdtau1[0] = dhabdtau1[1] = 0.0;
{ /* first term */
double g2=pow(gamma,2.);
hab1 += d_ab0/gamma;
dhabdd1[0] += -d_ab0*dgammadd_a/g2;
dhabdd1[1] += -d_ab0*dgammadd_b/g2;
dhabdgd1[0] += -d_ab0*dgammadgd_a/g2;
dhabdgd1[1] += 0.0;
dhabdgd1[2] += -d_ab0*dgammadgd_b/g2;
dhabdtau1[0] += -d_ab0*dgammadtau_a/g2 ;
dhabdtau1[1] += -d_ab0*dgammadtau_b/g2 ;
}
{ /* second term */
double g3=pow(gamma,3.);
hab1 += (d_ab1*xgamma + d_ab2*zgamma)/gamma;
dhabdd1[0] += (gamma*(d_ab1*dxdd_a+d_ab2*dzdd_a)-2*dgammadd_a*(d_ab1*x_ab+d_ab2*z_ab))/g3;
dhabdd1[1] += (gamma*(d_ab1*dxdd_b+d_ab2*dzdd_b)-2*dgammadd_b*(d_ab1*x_ab+d_ab2*z_ab))/g3;
dhabdgd1[0] += (d_ab1*dxdgd_a*gamma -2*(d_ab1*x_ab+d_ab2*z_ab)*dgammadgd_a)/g3;
dhabdgd1[1] += 0.0;
dhabdgd1[2] += (d_ab1*dxdgd_b*gamma -2*(d_ab1*x_ab+d_ab2*z_ab)*dgammadgd_b)/g3;
dhabdtau1[0] += (d_ab2*dzdtau_a*gamma -2*(d_ab1*x_ab+d_ab2*z_ab)*dgammadtau_a)/g3;
dhabdtau1[1] += (d_ab2*dzdtau_b*gamma -2*(d_ab1*x_ab+d_ab2*z_ab)*dgammadtau_b)/g3;
}
{ /* third term */
double g4= pow(gamma,4);
hab1 += (d_ab3*xgamma*xgamma+d_ab4*xgamma*zgamma)/gamma;
dhabdd1[0] += (-3*dgammadd_a*(d_ab3*pow(x_ab,2.)+d_ab4*x_ab*z_ab)+dxdd_a*gamma*(2*d_ab3*x_ab+d_ab4*z_ab)+d_ab4*x_ab*dzdd_a*gamma)/g4;
dhabdd1[1] += (-3*dgammadd_b*(d_ab3*pow(x_ab,2.)+d_ab4*x_ab*z_ab)+dxdd_b*gamma*(2*d_ab3*x_ab+d_ab4*z_ab)+d_ab4*x_ab*dzdd_b*gamma)/g4;
dhabdgd1[0] += (-3*x_ab*(d_ab3*x_ab+d_ab4*z_ab)*dgammadgd_a+gamma*(2*d_ab3*x_ab+d_ab4*z_ab)*dxdgd_a)/g4;
dhabdgd1[1] += 0.0;
dhabdgd1[2] += (-3*x_ab*(d_ab3*x_ab+d_ab4*z_ab)*dgammadgd_b+gamma*(2*d_ab3*x_ab+d_ab4*z_ab)*dxdgd_b)/g4;
dhabdtau1[0] += (d_ab4*x_ab*dzdtau_a*gamma-3*x_ab*(d_ab3*x_ab+d_ab4*z_ab)*dgammadtau_a)/g4;
dhabdtau1[1] += (d_ab4*x_ab*dzdtau_b*gamma-3*x_ab*(d_ab3*x_ab+d_ab4*z_ab)*dgammadtau_b)/g4;
}
*h_ab = hab1;
//derivatives
dh_abdd[0] = dhabdd1[0];
dh_abdd[1] = dhabdd1[1];
dh_abdgd[0] = dhabdgd1[0];
dh_abdgd[1] = dhabdgd1[1];
dh_abdgd[2] = dhabdgd1[2];
dh_abdtau[0] = dhabdtau1[0];
dh_abdtau[1] = dhabdtau1[1];
}
/* Get h_ss for Eq. (14)*/
static
void c_m06l_hss(double x, double z, double rho, double tau, double *h_ss, double *dh_ssdd, double *dh_ssdgd, double *dh_ssdtau)
{
/* define the d_ab,i for Eq. (12)*/
static double d_ss0= 0.4650534, d_ss1= 0.1617589, d_ss2= 0.1833657, d_ss3= 0.0004692100, d_ss4= -0.004990573;
double alpha_ss = 0.00515088;
double hss1, dhssdd1, dhssdgd1, dhssdtau1;
double gamma, xgamma, zgamma;
double dgammadx, dgammadz;
double dgammadd, dgammadgd, dgammadtau;
double dxdd, dxdgd, dzdd, dzdtau;
gamma = 1 + alpha_ss*(x + z);
{ /* derivatives of gamma with respect to x and z*/
dgammadx = alpha_ss;
dgammadz = alpha_ss;
}
c_m06l_zx(x, z, rho, tau, &dxdd, &dxdgd, &dzdd, &dzdtau);
{ /* derivatives of gamma with respect to density, gradient and kinetic energy */
dgammadd = dgammadx * dxdd + dgammadz * dzdd;
dgammadgd = dgammadx * dxdgd;
dgammadtau = dgammadz * dzdtau;
}
xgamma = x/gamma;
zgamma = z/gamma;
/* we initialize h and collect the terms*/
hss1 = 0.0;
dhssdd1 = 0.0;
dhssdgd1 = 0.0;
dhssdtau1 = 0.0;
{ /* first term */
double g2=pow(gamma,2.);
hss1 += d_ss0/gamma;
dhssdd1 += -d_ss0*dgammadd/g2;
dhssdgd1 += -d_ss0*dgammadgd/g2;
dhssdtau1 += -d_ss0*dgammadtau/g2 ;
}
{ /* second term */
double g3=pow(gamma,3.);
hss1 += (d_ss1*xgamma + d_ss2*zgamma)/gamma;
dhssdd1 += (gamma*(d_ss1*dxdd+d_ss2*dzdd)-2*dgammadd*(d_ss1*x+d_ss2*z))/g3;
dhssdgd1 += (d_ss1*dxdgd*gamma -2*(d_ss1*x+d_ss2*z)*dgammadgd)/g3;
dhssdtau1 += (d_ss2*dzdtau*gamma -2*(d_ss1*x+d_ss2*z)*dgammadtau)/g3;
}
{ /* third term */
double g4= pow(gamma,4);
hss1 += (d_ss3*xgamma*xgamma+d_ss4*xgamma*zgamma)/gamma;
dhssdd1 += (-3*dgammadd*(d_ss3*pow(x,2.)+d_ss4*x*z)+dxdd*gamma*(2*d_ss3*x+d_ss4*z)+d_ss4*x*dzdd*gamma)/g4;
dhssdgd1 += (-3*x*(d_ss3*x+d_ss4*z)*dgammadgd+gamma*(2*d_ss3*x+d_ss4*z)*dxdgd)/g4;
dhssdtau1 += (d_ss4*x*dzdtau*gamma-3*x*(d_ss3*x+d_ss4*z)*dgammadtau)/g4;
}
*h_ss = hss1;
//derivatives
*dh_ssdd = dhssdd1;
*dh_ssdgd = dhssdgd1;
*dh_ssdtau = dhssdtau1;
}
static void
c_m06l_para(m06l_params *p, const double *rho, const double *sigmatmp, const double *tautmp,
double *energy, double *dedd, double *vsigma, double *dedtau)
{
double rho2[2], rho2s[2], x[2], z[2], zc_ss[2];
double tau2[2], tauw[2], dens, dens1, sigma[3];
double g_ss[2], h_ss[2], Ec_ss[2], D_ss[2];
double g_ab=0.0, h_ab=0.0, Ec_ab=0.0;
double exunif_ss[2], vxunif_up[2], vxunif_dn[2], vxunif_ss[2];
double exunif =0.0, exunif_ab=0.0, vxunif[2];
//derivatives
double dh_ssdd[2], dh_ssdgd[3], dh_ssdtau[2];
double dg_ssdd[2], dg_ssdgd[3] ;
double dh_abdd[2], dh_abdgd[3], dh_abdtau[2];
double dg_abdd[2], dg_abdgd[3];
double dEc_ssdd[2], dEc_ssdgd[3], dEc_ssdtau[2];
double dEc_abdd[2], dEc_abdgd[3], dEc_abdtau[2];
double dD_ssdd[2], dD_ssdgd[3], dD_ssdtau[2], dD_ssdx[2], dD_ssdz[2];
double dxdd[2], dxdgd[2], dzdd[2], dzdtau[2];
const double Cfermi= (3./5.)*pow(6*M_PI*M_PI,2./3.);
/* put in by cpo for const reasons */
double sigma_[3],tau[2];
sigma_[0] = sigmatmp[0];
sigma_[1] = sigmatmp[1];
sigma_[2] = sigmatmp[2];
tau[0] = tautmp[0];
tau[1] = tautmp[1];
/*calculate |nabla rho|^2 */
sigma_[0] = max(MIN_GRAD*MIN_GRAD, sigma_[0]);
tauw[0] = max(sigma_[0]/(8.0*rho[0]), 1.0e-12);
tau[0] = max(tauw[0], tau[0]);
dens1 = rho[0]+rho[1];
if(p->common.nspin== XC_UNPOLARIZED)
{
tau[1] = 0.0;
rho2[0] = rho[0]/2.;
rho2[1] = rho[0]/2.;
sigma[0] = sigma_[0]/4.;
sigma[1] = sigma_[0]/4.;
sigma[2] = sigma_[0]/4.;
dens = rho[0];
tau2[0] = tau[0]/2.;
tau2[1] = tau[0]/2.;
}else{
sigma_[2] = max(MIN_GRAD*MIN_GRAD, sigma_[2]);
tauw[1] = max(sigma_[2]/(8.0*rho[1]), 1.0e-12);
tau[1] = max(tauw[1], tau[1]);
rho2[0]=rho[0];
rho2[1]=rho[1];
sigma[0] = sigma_[0];
sigma[1] = sigma_[1];
sigma[2] = sigma_[2];
dens = rho[0]+rho[1];
tau2[0] =tau[0];
tau2[1] =tau[1];
}
//get the e_LDA(rho_a,b)
const int np = 1;
XC(lda_exc_vxc)(p->c_aux, np, rho2, &exunif, vxunif);
exunif = exunif*dens;
/*==============get the E_sigma part================*/
/*============ spin up =============*/
rho2s[0]=rho2[0];
rho2s[1]=0.;
//get the e_LDA(rho_up,0)
XC(lda_exc_vxc)(p->c_aux, np, rho2s, &(exunif_ss[0]), vxunif_up);
exunif_ss[0] = exunif_ss[0] * rho2s[0];
vxunif_ss[0] = vxunif_up[0];
/*define variables for rho_up and zc in order to avoid x/0 -> D_ss = -inf */
x[0] = sigma[0]/(pow(rho2s[0], 8./3.));
z[0] = 2*tau2[0]/pow(rho2s[0],5./3.) - Cfermi;
zc_ss[0] = 2*tau2[0]/pow(rho2s[0],5./3.);
/*D_ss = 1 -x/4*(z + Cf), z+Cf = 2*tau2/pow(rho2s[0],5./3.) = zc */
D_ss[0] = 1 - x[0]/(4. * zc_ss[0]);
//derivatives for D_up
dD_ssdx[0] = -1/(4 * zc_ss[0]);
dD_ssdz[0] = 4 * x[0]/pow(4.*zc_ss[0],2.);
c_m06l_zx(x[0], z[0], rho2s[0], tau2[0], &(dxdd[0]), &(dxdgd[0]), &(dzdd[0]), &(dzdtau[0]));
dD_ssdd[0] = dD_ssdx[0] * dxdd[0] + dD_ssdz[0] * dzdd[0];
dD_ssdgd[0] = dD_ssdx[0] * dxdgd[0];
dD_ssdtau[0] = dD_ssdz[0] * dzdtau[0];
/*build up Eq. (14): Ec_sigmasigma*/
c_m06_15(x[0], rho2s[0], &(g_ss[0]), &(dg_ssdd[0]), &(dg_ssdgd[0]));
c_m06l_hss(x[0], z[0], rho2s[0], tau2[0], &(h_ss[0]), &(dh_ssdd[0]), &(dh_ssdgd[0]), &(dh_ssdtau[0]));
Ec_ss[0] = (exunif_ss[0] * (g_ss[0]+h_ss[0]) * D_ss[0]);
//printf("Ec_up %.9e\n", Ec_ss[0]);
/*============== spin down =============*/
rho2s[0]=rho2[1];
rho2s[1]=0.;
//get the e_LDA(0,rho_dn)
XC(lda_exc_vxc)(p->c_aux, np, rho2s, &(exunif_ss[1]), vxunif_dn);
exunif_ss[1] = exunif_ss[1] * rho2s[0];
vxunif_ss[1] = vxunif_dn[0];
/*define variables for rho_beta*/
x[1] = sigma[2]/(pow(rho2s[0], 8./3.));
z[1] = 2*tau2[1]/pow(rho2s[0],5./3.) - Cfermi;
zc_ss[1] = 2*tau2[1]/pow(rho2s[0],5./3.);
//printf("x1 %.9e, zc_ss%.9e\n", x[1], zc_ss[1]);
D_ss[1] = 1 - x[1]/(4.*zc_ss[1]);
//derivatives for D_dn
dD_ssdx[1] = - 1/(4*zc_ss[1]);
dD_ssdz[1] = 4*x[1]/pow(4.*zc_ss[1],2.);
c_m06l_zx(x[1], z[1], rho2s[0], tau2[1], &(dxdd[1]), &(dxdgd[1]), &(dzdd[1]), &(dzdtau[1]));
dD_ssdd[1] = dD_ssdx[1] * dxdd[1] + dD_ssdz[1] * dzdd[1];
dD_ssdgd[2] = dD_ssdx[1] * dxdgd[1];
dD_ssdtau[1] = dD_ssdz[1] * dzdtau[1];
c_m06_15(x[1], rho2s[0], &(g_ss[1]), &(dg_ssdd[1]), &(dg_ssdgd[2]));
c_m06l_hss(x[1], z[1], rho2s[0], tau2[1], &(h_ss[1]), &(dh_ssdd[1]), &(dh_ssdgd[2]), &(dh_ssdtau[1]));
//printf("exunif_ss %.9e, (g_ss[1]+h_ss[1])%.9e, D_ss %.9e\n", exunif_ss[1],(g_ss[1]+h_ss[1]),D_ss[1]);
Ec_ss[1] = (exunif_ss[1] * (g_ss[1]+h_ss[1]) * D_ss[1]);
//printf("Ec_dn %.9e\n", Ec_ss[1]);
// Derivatives for Ec_up and Ec_dn with respect to density and kinetic energy
int i;
for(i=0; i<2; i++){
dEc_ssdd[i] = exunif_ss[i] * dh_ssdd[i] * D_ss[i] + vxunif_ss[i] * h_ss[i] * D_ss[i] + exunif_ss[i] * h_ss[i] * dD_ssdd[i] +
exunif_ss[i] * dg_ssdd[i] * D_ss[i] + vxunif_ss[i] * g_ss[i] * D_ss[i] + exunif_ss[i] * g_ss[i] * dD_ssdd[i];
dEc_ssdtau[i] = exunif_ss[i] * dh_ssdtau[i] * D_ss[i] + exunif_ss[i] * h_ss[i] * dD_ssdtau[i] + exunif_ss[i] * g_ss[i] * dD_ssdtau[i];
}
// Derivatives for Ec_up and Ec_dn with respect to gradient
dEc_ssdgd[0] = exunif_ss[0] * dh_ssdgd[0] * D_ss[0] + exunif_ss[0] * h_ss[0] * dD_ssdgd[0] +
exunif_ss[0] * dg_ssdgd[0] * D_ss[0] + exunif_ss[0] * g_ss[0] * dD_ssdgd[0];
dEc_ssdgd[2] = exunif_ss[1] * dh_ssdgd[2] * D_ss[1] + exunif_ss[1] * h_ss[1] * dD_ssdgd[2] +
exunif_ss[1] * dg_ssdgd[2] * D_ss[1] + exunif_ss[1] * g_ss[1] * dD_ssdgd[2];
/*==============get the E_ab part========================*/
exunif_ab = exunif - exunif_ss[0] - exunif_ss[1];
//x_ab = sigmatot[0] /(pow(rho2[0], 8./3.)) + sigmatot[2] /(pow(rho2[1], 8./3.));
//z_ab = 2*tau2[0]/pow(rho2[0],5./3.) + 2*tau2[1]/pow(rho2[1],5./3.) - 2*Cfermi;
/*build up Eq. (12): Ec_alphabeta*/
c_m06_13(x, rho2, &g_ab, dg_abdd, dg_abdgd);
c_m06l_hab(x, z, rho2, tau2, &h_ab, dh_abdd, dh_abdgd, dh_abdtau);
Ec_ab = exunif_ab * (g_ab+h_ab);
// Derivatives for Ec_ab with respect to density and kinetic energy
for(i=0; i<2; i++){
dEc_abdd[i] = exunif_ab * (dh_abdd[i]+ dg_abdd[i]) + (vxunif[i]- vxunif_ss[i]) * (g_ab+h_ab);
dEc_abdtau[i] = exunif_ab * dh_abdtau[i];
}
// Derivatives for Ec_ab with respect to gradient
for(i=0; i<3; i++){
dEc_abdgd[i] = exunif_ab * (dh_abdgd[i] + dg_abdgd[i]);
}
/*==============get the total energy E_c= E_up + E_dn + E_ab========================*/
/*==============================and derivatives=====================================*/
*energy = (Ec_ss[0] + Ec_ss[1] + Ec_ab)/dens1;
//printf("Ec_ss %.9e, Ec_ss %.9e, Ec_ab %.9e\n", Ec_ss[0], Ec_ss[1], Ec_ab);
//derivative for the total correlation energy
if(p->common.nspin== XC_UNPOLARIZED)
{
dedd[0]=dEc_ssdd[0] + dEc_abdd[0];
dedd[1]=0.0;
vsigma[0]= (dEc_ssdgd[0] + dEc_abdgd[0])/2.;
vsigma[1]= 0.0;
vsigma[2]= 0.0;
dedtau[0]= dEc_ssdtau[0] + dEc_abdtau[0];
dedtau[1]= 0.0;
}else{
dedd[0]=dEc_ssdd[0] + dEc_abdd[0];
dedd[1]=dEc_ssdd[1] + dEc_abdd[1];
vsigma[0]= dEc_ssdgd[0] + dEc_abdgd[0];
vsigma[1]= 0.0;
vsigma[2]= dEc_ssdgd[2] + dEc_abdgd[2];
dedtau[0]= dEc_ssdtau[0] + dEc_abdtau[0];
dedtau[1]= dEc_ssdtau[1] + dEc_abdtau[1];
}
}
static void
XC(mgga_c_m06l)(void *p, const double *rho, const double *sigma, const double *tau,
double *e, double *dedd, double *vsigma, double *dedtau)
{
c_m06l_para(p, rho, sigma, tau, e, dedd, vsigma, dedtau);
}
/* derivatives of x and z with respect to rho, grho and tau: Eq.(1) and Eq.(3)*/
static void
x_m06l_zx(double x, double z, double rho, double tau, double *dxdd, double *dxdgd, double *dzdd, double *dzdtau)
{
*dxdd = -8./3. * x * 1/rho;
*dxdgd = 1./pow(rho,8./3.);
*dzdd = -5./3. * 2* tau/pow(rho, 8./3.);
*dzdtau = 2./pow(rho, 5./3.);
}
/* Build gamma and its derivatives with respect to rho, grho and tau: Eq. (4)*/
static void
x_m06l_gamma(double x, double z, double rho, double tau, double *gamma, double *dgammadd, double *dgammadgd, double *dgammadtau)
{
static double alpha = 0.00186726; /*set alpha of Eq. (4)*/
double dgammadx, dgammadz;
double dxdd, dxdgd, dzdd, dzdtau;
*gamma = 1 + alpha*(x + z);
/*printf("gamma %19.12f\n", *gamma);*/
{ /* derivatives */
dgammadx = alpha;
dgammadz = alpha;
}
x_m06l_zx(x, z, rho, tau, &dxdd, &dxdgd, &dzdd, &dzdtau);
{
*dgammadd = dgammadx*dxdd + dgammadz*dzdd;
*dgammadgd = dgammadx*dxdgd;
*dgammadtau = dgammadz*dzdtau;
}
}
/************************************************************************
Implements Zhao, Truhlar
Meta-gga M06-Local
Correlation part
************************************************************************/
/* calculate h and h derivatives with respect to rho, grho and tau: Equation (5) */
static
void x_m06l_h(double x, double z, double rho, double tau, double *h, double *dhdd, double *dhdgd, double *dhdtau)
{
/* parameters for h(x_sigma,z_sigma) of Eq. (5)*/
static double d0=0.6012244, d1=0.004748822, d2=-0.008635108, d3=-0.000009308062, d4=0.00004482811;
double h1, dhdd1, dhdgd1, dhdtau1;
double gamma, dgammadd, dgammadgd, dgammadtau;
double xgamma, zgamma;
double dxdd, dxdgd, dzdd, dzdtau;
x_m06l_gamma(x, z, rho, tau, &gamma, &dgammadd, &dgammadgd, &dgammadtau);
xgamma = x/gamma;
zgamma = z/gamma;
/* we initialize h and its derivatives and collect the terms*/
h1 = 0.0;
dhdd1 = 0.0;
dhdgd1 = 0.0;
dhdtau1 = 0.0;
{ /* first term */
double g2=pow(gamma,2.);
h1 += d0/gamma;
dhdd1 += -d0*dgammadd/g2;
dhdgd1 += -d0*dgammadgd/g2;
dhdtau1 += -d0*dgammadtau/g2 ;
}
x_m06l_zx(x, z, rho, tau, &dxdd, &dxdgd, &dzdd, &dzdtau);
{ /* second term */
double g3=pow(gamma,3.);
h1 += (d1*xgamma + d2*zgamma)/gamma;
dhdd1 += (gamma*(d1*dxdd+d2*dzdd)-2*dgammadd*(d1*x+d2*z))/g3;
dhdgd1 += (d1*dxdgd*gamma -2*(d1*x+d2*z)*dgammadgd)/g3;
dhdtau1 += (d2*dzdtau*gamma -2*(d1*x+d2*z)*dgammadtau)/g3;
}
{ /* third term */
double g4= pow(gamma,4);
h1 += (d3*xgamma*xgamma+d4*xgamma*zgamma)/gamma;
dhdd1 += (-3*dgammadd*(d3*pow(x,2.)+d4*x*z)+dxdd*gamma*(2*d3*x+d4*z)+d4*x*dzdd*gamma)/g4;
dhdgd1 += (-3*x*(d3*x+d4*z)*dgammadgd+gamma*(2*d3*x+d4*z)*dxdgd)/g4;
dhdtau1 += (d4*x*dzdtau*gamma-3*x*(d3*x+d4*z)*dgammadtau)/g4;
}
*h = h1;
/*printf(" h %19.12f\n", *h);*/
*dhdd = dhdd1;
*dhdgd =dhdgd1;
*dhdtau = dhdtau1;
}
/* f(w) and its derivatives with respect to rho and tau*/
static void
x_m06l_fw(double rho, double tau, double *fw, double *dfwdd, double *dfwdtau)
{
/*define the parameters for fw of Eq. (8) as in the reference paper*/
static double a0= 0.3987756, a1= 0.2548219, a2= 0.3923994, a3= -2.103655, a4= -6.302147, a5= 10.97615,
a6= 30.97273, a7=-23.18489, a8=-56.73480, a9=21.60364, a10= 34.21814, a11= -9.049762;
double tau_lsda, t, w;
double dtdd, dtdtau;
double dfwdw, dwdt, dtau_lsdadd;
double aux = (3./10.) * pow((6*M_PI*M_PI),2./3.); /*3->6 for nspin=2 */
tau_lsda = aux * pow(rho,5./3.);
t = tau_lsda/tau;
dtdtau = -t/tau;
w = (t - 1)/(t + 1);
*fw = a0*pow(w,0.)+a1*pow(w,1.)+a2*pow(w,2.)+a3*pow(w,3.)+a4*pow(w,4.)+
+ a5*pow(w,5.)+a6*pow(w,6.)+a7*pow(w,7.)+a8*pow(w,8.)+a9*pow(w,9.)+a10*pow(w,10.)+a11*pow(w,11.);
dfwdw = 0.0*a0*pow(w,-1)+1.0*a1*pow(w,0.)+2.0*a2*pow(w,1.)+3.0*a3*pow(w,2.)+4.0*a4*pow(w,3.)+
+ 5.0*a5*pow(w,4.)+6.0*a6*pow(w,5.)+7.0*a7*pow(w,6.)+8.0*a8*pow(w,7.)+9.0*a9*pow(w,8.)+
+ 10*a10*pow(w,9.)+11*a11*pow(w,10.);
dwdt = 2/pow((t + 1),2.);
dtau_lsdadd = aux * 5./3.* pow(rho,2./3.);
dtdd = dtau_lsdadd/tau;
*dfwdd = dfwdw * dwdt * dtdd;
*dfwdtau = dfwdw * dwdt * dtdtau;
}
static void
x_m06l_para(m06l_params *pt, double rho, double sigma, double tau, double *energy, double *dedd, double *vsigma, double *dedtau)
{
/*Build Eq. (6) collecting the terms Fx_PBE, fw, e_lsda and h*/
double grad, tauw, tau2, x, z;
double rho2[2],sigmatot[3];
double F_PBE, de_PBEdd[2], de_PBEdgd[3];
double h, dhdd, dhdgd, dhdtau;
double fw, dfwdd, dfwdtau;
double epsx_lsda, depsx_lsdadd;
const double Cfermi = (3./5.) * pow(6*M_PI*M_PI,2./3.);
/* calculate |nabla rho|^2 */
grad = sigma;
grad = max(MIN_GRAD*MIN_GRAD, grad);
tauw = max(grad/(8.0*rho),1.0e-12); /* tau^W = |nabla rho|^2/ 8rho */
tau = max(tau, tauw);
rho2[0]=rho/2.;
rho2[1]=0.0;
sigmatot[0] = grad/4.;
sigmatot[1] = 0.0;
sigmatot[2] = 0.0;
tau2 =tau/2.;
/* get the uniform gas energy and potential a MINUS was missing in the paper*/
epsx_lsda = -(3./2.)*pow(3./(4*M_PI),1./3.)*pow(rho2[0],4./3.);
depsx_lsdadd = -2*pow(3./(4*M_PI),1./3.)*pow(rho2[0],1./3.);
/*get Fx for PBE*/
const int np = 1;
XC(gga_exc_vxc)(pt->x_aux, np, rho2, sigmatot, &F_PBE, de_PBEdd, de_PBEdgd);
/* define x and z from Eq. (1) and Eq. (3) NOTE: we build directly x^2 */
x = grad/(4*pow(rho2[0], 8./3.));
z = 2*tau2/pow(rho2[0],5./3.) - Cfermi; /*THERE IS A 2 IN FRONT AS IN THEOR. CHEM. ACCOUNT 120 215 (2008)*/
/*get h and fw*/
x_m06l_h(x, z, rho2[0], tau2, &h, &dhdd, &dhdgd, &dhdtau);
x_m06l_fw(rho2[0], tau2, &fw, &dfwdd, &dfwdtau);
{ /* Eq. (6) E_x = Int F_PBE*fw + exunif*h, the factor 2 accounts for spin. */
*energy = 2*(F_PBE*rho2[0] *fw + epsx_lsda *h);
*dedd = (de_PBEdd[0] *fw + F_PBE*rho2[0] * dfwdd+ depsx_lsdadd *h + epsx_lsda * dhdd);
*dedtau = (F_PBE * dfwdtau *rho2[0] + epsx_lsda * dhdtau);
*vsigma = (de_PBEdgd[0] *fw + epsx_lsda*dhdgd)/2.;
}
}
void
XC(mgga_x_m06l)(void *p, const double *rho, const double *sigma, const double *tau,
double *e, double *dedd, double *vsigma, double *dedtau)
{
m06l_params *par = (m06l_params*)p;
if(par->common.nspin == XC_UNPOLARIZED){
double en;
x_m06l_para(p, rho[0], sigma[0], tau[0], &en, dedd, vsigma, dedtau);
*e = en/(rho[0]+rho[1]);
}else{
*e = 0.0;
double e2na, e2nb, rhoa[2], rhob[2];
double vsigmapart[3];
rhoa[0]=2*rho[0];
rhoa[1]=0.0;
rhob[0]=2*rho[1];
rhob[1]=0.0;
x_m06l_para(p, rhoa[0], 4*sigma[0], 2.0*tau[0], &e2na, &(dedd[0]), &(vsigmapart[0]), &(dedtau[0]));
x_m06l_para(p, rhob[0], 4*sigma[2], 2.0*tau[1], &e2nb, &(dedd[1]), &(vsigmapart[2]), &(dedtau[1]));
*e = (e2na + e2nb )/(2.*(rho[0]+rho[1]));
vsigma[0] = 2*vsigmapart[0];
vsigma[2] = 2*vsigmapart[2];
}
}
static void m06l_init(void *p)
{
m06l_params *par = (m06l_params*)p;
par->c_aux = (XC(func_type) *) malloc(sizeof(XC(func_type)));
XC(func_init)(par->c_aux, XC_LDA_C_PW, XC_POLARIZED);
par->x_aux = (XC(func_type) *) malloc(sizeof(XC(func_type)));
XC(func_init)(par->x_aux, XC_GGA_X_PBE, XC_POLARIZED);
}
static void m06l_end(void *p)
{
m06l_params *par = (m06l_params*)p;
XC(func_end)(par->c_aux);
free(par->c_aux);
XC(func_end)(par->x_aux);
free(par->x_aux);
}
const mgga_func_info m06l_info = {
sizeof(m06l_params),
&m06l_init,
&m06l_end,
&XC(mgga_x_m06l),
&XC(mgga_c_m06l),
};
gpaw-1.3.0-82cebebc037510d876f90d9f8d533fd021f751f5/c/xc/pbe.c 0000664 0000000 0000000 00000011642 13164413722 0021676 0 ustar 00root root 0000000 0000000 /* Copyright (C) 2003-2007 CAMP
* Please see the accompanying LICENSE file for further information. */
#include
#include "xc_gpaw.h"
double pbe_exchange(const xc_parameters* par,
double n, double rs, double a2,
double* dedrs, double* deda2)
{
double e = C1 / rs;
*dedrs = -e / rs;
if (par->gga)
{
double c = C2 * rs / n;
c *= c;
double s2 = a2 * c;
double x = 1.0 + MU * s2 / par->kappa;
double Fx = 1.0 + par->kappa - par->kappa / x;
double dFxds2 = MU / (x * x);
double ds2drs = 8.0 * c * a2 / rs;
//double ds2drs = 8.0 * s2 / rs;
*dedrs = *dedrs * Fx + e * dFxds2 * ds2drs;
*deda2 = e * dFxds2 * c;
e *= Fx;
}
return e;
}
/* inline */ double G(double rtrs, double A, double alpha1,
double beta1, double beta2, double beta3, double beta4,
double* dGdrs)
{
double Q0 = -2.0 * A * (1.0 + alpha1 * rtrs * rtrs);
double Q1 = 2.0 * A * rtrs * (beta1 +
rtrs * (beta2 +
rtrs * (beta3 +
rtrs * beta4)));
double G1 = Q0 * log(1.0 + 1.0 / Q1);
double dQ1drs = A * (beta1 / rtrs + 2.0 * beta2 +
rtrs * (3.0 * beta3 + 4.0 * beta4 * rtrs));
*dGdrs = -2.0 * A * alpha1 * G1 / Q0 - Q0 * dQ1drs / (Q1 * (Q1 + 1.0));
return G1;
}
/*
* In[1]:= H=g Log[1+b/g t^2(1+a t^2)/(1+a t^2 + a^2 t^4)]
*
* 2 2
* b t (1 + a t )
* Out[1]= g Log[1 + --------------------]
* 2 2 4
* g (1 + a t + a t )
*
* In[4]:= Simplify[D[H,t]]
*
* 2
* 2 b g t (1 + 2 a t )
* Out[4]= ---------------------------------------------------------
* 2 2 4 2 2 4 2 4
* (1 + a t + a t ) (g + b t + a g t + a b t + a g t )
*
*/
double pbe_correlation(double n, double rs, double zeta, double a2,
bool gga, bool spinpol,
double* dedrs, double* dedzeta, double* deda2)
{
double rtrs = sqrt(rs);
double de0drs;
double e0 = G(rtrs, GAMMA, 0.21370, 7.5957, 3.5876, 1.6382, 0.49294,
&de0drs);
double e;
double xp = 117.0;
double xm = 117.0;
if (spinpol)
{
double de1drs;
double e1 = G(rtrs, 0.015545, 0.20548, 14.1189, 6.1977, 3.3662,
0.62517, &de1drs);
double dalphadrs;
double alpha = -G(rtrs, 0.016887, 0.11125, 10.357, 3.6231, 0.88026,
0.49671, &dalphadrs);
dalphadrs = -dalphadrs;
double zp = 1.0 + zeta;
double zm = 1.0 - zeta;
xp = pow(zp, THIRD);
xm = pow(zm, THIRD);
double f = CC1 * (zp * xp + zm * xm - 2.0);
double f1 = CC2 * (xp - xm);
double zeta2 = zeta * zeta;
double zeta3 = zeta2 * zeta;
double zeta4 = zeta2 * zeta2;
double x = 1.0 - zeta4;
*dedrs = (de0drs * (1.0 - f * zeta4) +
de1drs * f * zeta4 +
dalphadrs * f * x * IF2);
*dedzeta = (4.0 * zeta3 * f * (e1 - e0 - alpha * IF2) +
f1 * (zeta4 * e1 - zeta4 * e0 + x * alpha * IF2));
e = e0 + alpha * IF2 * f * x + (e1 - e0) * f * zeta4;
}
else
{
*dedrs = de0drs;
e = e0;
}
if (gga)
{
double n2 = n * n;
double t2;
double y;
double phi = 117.0;
double phi2 = 117.0;
double phi3 = 117.0;
if (spinpol)
{
phi = 0.5 * (xp * xp + xm * xm);
phi2 = phi * phi;
phi3 = phi * phi2;
t2 = C3 * a2 * rs / (n2 * phi2);
y = -e / (GAMMA * phi3);
}
else
{
t2 = C3 * a2 * rs / n2;
y = -e / GAMMA;
}
double x = exp(y);
double A;
if (x != 1.0)
A = BETA / (GAMMA * (x - 1.0));
else
A = BETA / (GAMMA * y);
double At2 = A * t2;
double nom = 1.0 + At2;
double denom = nom + At2 * At2;
double H = GAMMA * log( 1.0 + BETA * t2 * nom / (denom * GAMMA));
double tmp = (GAMMA * BETA /
(denom * (BETA * t2 * nom + GAMMA * denom)));
double tmp2 = A * A * x / BETA;
double dAdrs = tmp2 * *dedrs;
if (spinpol)
{
H *= phi3;
tmp *= phi3;
dAdrs /= phi3;
}
double dHdt2 = (1.0 + 2.0 * At2) * tmp;
double dHdA = -At2 * t2 * t2 * (2.0 + At2) * tmp;
*dedrs += dHdt2 * 7 * t2 / rs + dHdA * dAdrs;
*deda2 = dHdt2 * C3 * rs / n2;
if (spinpol)
{
double dphidzeta = (1.0 / xp - 1.0 / xm) / 3.0;
double dAdzeta = tmp2 * (*dedzeta -
3.0 * e * dphidzeta / phi) / phi3;
*dedzeta += ((3.0 * H / phi - dHdt2 * 2.0 * t2 / phi ) * dphidzeta +
dHdA * dAdzeta);
*deda2 /= phi2;
}
e += H;
}
return e;
}
gpaw-1.3.0-82cebebc037510d876f90d9f8d533fd021f751f5/c/xc/pw91.c 0000664 0000000 0000000 00000012245 13164413722 0021730 0 ustar 00root root 0000000 0000000 /* Copyright (C) 2003-2007 CAMP
* Please see the accompanying LICENSE file for further information. */
#include
#include "xc_gpaw.h"
double G(double rtrs, double A, double alpha1,
double beta1, double beta2, double beta3, double beta4,
double* dGdrs);
double pw91_exchange(const xc_parameters* par,
double n, double rs, double a2,
double* dedrs, double* deda2)
{
double e = C1 / rs;
*dedrs = -e / rs;
if (par->gga)
{
double c = C2 * rs / n;
c *= c;
double s2 = a2 * c;
double s = sqrt(s2);
double f1 = 7.7956 * s;
double f2 = 0.19645 * asinh(f1);
double f3 = 0.1508 * exp(-100.0 * s2);
double f4 = 0.004 * s2 * s2;
double f5 = 1.0 + s * f2;
double f6 = f5 + f4;
double f7 = 0.2743 - f3;
double f8 = f5 + f7 * s2;
double Fx = f8 / f6;
double f9 = 0.5 * 7.7956 * 0.19645 / sqrt(1.0 + f1 * f1);
if (s < 0.00001)
f9 += 0.5 * 7.7956 * 0.19645;
else
f9 += 0.5 * f2 / s;
double dFxds2 = ((f9 + f7 + 100.0 * f3 * s2) * f6 -
f8 * (f9 + 0.008 * s2)) / (f6 * f6);
double ds2drs = 8.0 * s2 / rs;
*dedrs = *dedrs * Fx + e * dFxds2 * ds2drs;
*deda2 = e * dFxds2 * c;
e *= Fx;
}
return e;
}
double pw91_correlation(double n, double rs, double zeta, double a2,
bool gga, bool spinpol,
double* dedrs, double* dedzeta, double* deda2)
{
double rtrs = sqrt(rs);
double de0drs;
double e0 = G(rtrs, GAMMA, 0.21370, 7.5957, 3.5876, 1.6382, 0.49294,
&de0drs);
double e;
double xp = 117.0;
double xm = 117.0;
if (spinpol)
{
double de1drs;
double e1 = G(rtrs, 0.015545, 0.20548, 14.1189, 6.1977, 3.3662,
0.62517, &de1drs);
double dalphadrs;
double alpha = -G(rtrs, 0.016887, 0.11125, 10.357, 3.6231, 0.88026,
0.49671, &dalphadrs);
dalphadrs = -dalphadrs;
double zp = 1.0 + zeta;
double zm = 1.0 - zeta;
xp = pow(zp, THIRD);
xm = pow(zm, THIRD);
double f = CC1 * (zp * xp + zm * xm - 2.0);
double f1 = CC2 * (xp - xm);
double zeta2 = zeta * zeta;
double zeta3 = zeta2 * zeta;
double zeta4 = zeta2 * zeta2;
double x = 1.0 - zeta4;
*dedrs = (de0drs * (1.0 - f * zeta4) +
de1drs * f * zeta4 +
dalphadrs * f * x * IF2);
*dedzeta = (4.0 * zeta3 * f * (e1 - e0 - alpha * IF2) +
f1 * (zeta4 * e1 - zeta4 * e0 + x * alpha * IF2));
e = e0 + alpha * IF2 * f * x + (e1 - e0) * f * zeta4;
}
else
{
*dedrs = de0drs;
e = e0;
}
if (gga)
{
double n2 = n * n;
double t2;
double y;
double phi;
double phi2;
double phi3;
double phi4;
double GAMMAPW91 = BETA * BETA / 0.18;
if (spinpol)
{
phi = 0.5 * (xp * xp + xm * xm);
phi2 = phi * phi;
phi3 = phi * phi2;
phi4 = phi * phi3;
}
else
{
phi = 1.0;
phi2 = 1.0;
phi3 = 1.0;
phi4 = 1.0;
}
t2 = C3 * a2 * rs / (n2 * phi2);
y = -e / (GAMMAPW91 * phi3);
double x = exp(y);
double A = BETA / (GAMMAPW91 * (x - 1.0));
double At2 = A * t2;
double nom = 1.0 + At2;
double denom = nom + At2 * At2;
double H0 = (phi3 * GAMMAPW91 *
log(1.0 + BETA * t2 * nom / (denom * GAMMAPW91)));
double tmp = (phi3 * GAMMAPW91 * BETA /
(denom * (BETA * t2 * nom + GAMMAPW91 * denom)));
double tmp2 = A * A * x / BETA;
double dAdrs = tmp2 * *dedrs / phi3;
const double KK = 66.343643960645011; // 100*4/pi*(4/pi/9)**(1/3.)
const double XNU = 15.75592;
const double Cc0 = 0.004235;
const double Cx = -0.001667212;
const double K1 = 0.002568;
const double K2 = 0.023266;
const double K3 = 7.389e-6;
const double K4 = 8.723;
const double K5 = 0.472;
const double K6 = 7.389e-2;
double f0 = XNU * exp(-KK * rs * phi4 * t2);
double rs2 = rs * rs;
double f1 = K1 + K2 * rs + K3 * rs2;
double f2 = 1.0 + K4 * rs + K5 * rs2 + K6 * rs2 * rs;
double f3 = -10.0 * Cx / 7.0 - Cc0 + f1 / f2;
double H1 = f0 * phi3 * f3 * t2;
double dH1drs = (-KK * phi4 * t2 * H1 + f0 * phi3 * t2 *
((K2 + 2.0 * K3 * rs) * f2 -
(K4 + 2.0 * K5 * rs + 3.0 * K6 * rs2) * f1) / (f2 * f2));
double dH1dt2 = -KK * rs * phi4 * H1 + f0 * phi3 * f3;
double dH1dphi = (-4.0 * KK * rs * phi3 * H1 + 3.0 * f0 * phi2 * f3) * t2;
double dH0dt2 = (1.0 + 2.0 * At2) * tmp;
double dH0dA = -At2 * t2 * t2 * (2.0 + At2) * tmp;
*dedrs += (dH0dt2 + dH1dt2) * 7 * t2 / rs + dH0dA * dAdrs + dH1drs;
*deda2 = (dH0dt2 + dH1dt2) * C3 * rs / n2;
if (spinpol)
{
double dphidzeta = (1.0 / xp - 1.0 / xm) / 3.0;
double dAdzeta = tmp2 * (*dedzeta -
3.0 * e * dphidzeta / phi) / phi3;
*dedzeta += ((3.0 * H0 / phi - dH0dt2 * 2.0 * t2 / phi ) * dphidzeta +
dH0dA * dAdzeta);
*dedzeta += (dH1dphi - dH1dt2 * 2.0 * t2 / phi ) * dphidzeta;
*deda2 /= phi2;
}
e += H0 + H1;
}
return e;
}
gpaw-1.3.0-82cebebc037510d876f90d9f8d533fd021f751f5/c/xc/revtpss.c 0000664 0000000 0000000 00000040226 13164413722 0022636 0 ustar 00root root 0000000 0000000
#include
#include
#include
#include
#include "xc_mgga.h"
typedef struct revtpss_params {
common_params common; // needs to be at the beginning of every functional_params
XC(func_type) *x_aux;
XC(func_type) c_aux1;
XC(func_type) c_aux2;
} revtpss_params;
void gga_c_pbe_revtpss(XC(func_type) *p, const double *rho, const double *sigma,
double *e, double *vrho, double *vsigma,
double *v2rho2, double *v2rhosigma, double *v2sigma2);
/************************************************************************
Implements John P. Perdew, Adrienn Ruzsinszky, Gabor I. Csonka, Lucian A. Constantin, and Jianwei Sun
meta-Generalized Gradient Approximation.
Correlation part
************************************************************************/
/* some parameters */
static double d = 2.8;
/* Equation (14) */
static void
c_revtpss_14(double csi, double zeta, double *C, double *dCdcsi, double *dCdzeta)
{
double fz, C0, dC0dz, dfzdz;
double z2 = zeta*zeta;
/* Equation (13) */
C0 = 0.59 + z2*(0.9269 + z2*(0.6225 + z2*2.1540));
dC0dz = zeta*(2.0*0.9269 + z2*(4.0*0.6225 + z2*6.0*2.1540)); /*OK*/
fz = 0.5*(pow(1.0 + zeta, -4.0/3.0) + pow(1.0 - zeta, -4.0/3.0));
dfzdz = 0.5*(-4.0/3.0)*(pow(1.0 + zeta, -7.0/3.0) - pow(1.0 - zeta, -7.0/3.0)); /*OK*/
{ /* Equation (14) */
double csi2 = csi*csi;
double a = 1.0 + csi2*fz, a4 = pow(a, 4);
*C = C0 / a4;
*dCdcsi = -8.0*C0*csi*fz/(a*a4); /*added C OK*/
*dCdzeta = (dC0dz*a - C0*4.0*csi2*dfzdz)/(a*a4); /*OK*/
}
}
/* Equation (12) */
static void c_revtpss_12(revtpss_params *p, const double *rho, const double *sigma,
double dens, double zeta, double z,
double *e_PKZB, double *de_PKZBdd, double *de_PKZBdsigma, double *de_PKZBdz)
{
/*some incoming variables:
dens = rho[0] + rho[1]
z = tau_w/tau
zeta = (rho[0] - rho[1])/dens*/
double e_PBE, e_PBEup, e_PBEdn;
double de_PBEdd[2], de_PBEdsigma[3], de_PBEddup[2], de_PBEdsigmaup[3], de_PBEdddn[2], de_PBEdsigmadn[3] ;
double aux, zsq;
double dzetadd[2], dcsidd[2], dcsidsigma[3];
double C, dCdcsi, dCdzeta;
double densp[2], densp2[2], sigmatot[3], sigmaup[3], sigmadn[3];
int i;
/*initialize dCdcsi and dCdzeta and the energy*/
dCdcsi = dCdzeta = 0.0;
e_PBE = 0.0;
e_PBEup = 0.0;
e_PBEdn = 0.0;
/* get the PBE stuff */
if(p->common.nspin== XC_UNPOLARIZED)
{ densp[0]=rho[0]/2.;
densp[1]=rho[0]/2.;
sigmatot[0] = sigma[0]/4.;
sigmatot[1] = sigma[0]/4.;
sigmatot[2] = sigma[0]/4.;
}else{
densp[0] = rho[0];
densp[1] = rho[1];
sigmatot[0] = sigma[0];
sigmatot[1] = sigma[1];
sigmatot[2] = sigma[2];
}
/* e_PBE */
XC(func_type) *aux2 = (p->common.nspin == XC_UNPOLARIZED) ? &p->c_aux2 : &p->c_aux1;
gga_c_pbe_revtpss(aux2, densp, sigmatot, &e_PBE, de_PBEdd, de_PBEdsigma, NULL, NULL, NULL);
densp2[0]=densp[0];
densp2[1]=0.0;
if(p->common.nspin== XC_UNPOLARIZED)
{
sigmaup[0] = sigma[0]/4.;
sigmaup[1] = 0.;
sigmaup[2] = 0.;
}else{
sigmaup[0] = sigma[0];
sigmaup[1] = 0.;
sigmaup[2] = 0.;
}
/* e_PBE spin up */
gga_c_pbe_revtpss(aux2, densp2, sigmaup, &e_PBEup, de_PBEddup, de_PBEdsigmaup, NULL, NULL, NULL);
densp2[0]=densp[1];
densp2[1]=0.0;
if(p->common.nspin== XC_UNPOLARIZED)
{
sigmadn[0] = sigma[0]/4.;
sigmadn[1] = 0.;
sigmadn[2] = 0.;
}else{
sigmadn[0] = sigma[2];
sigmadn[1] = 0.;
sigmadn[2] = 0.;
}
/* e_PBE spin down */
gga_c_pbe_revtpss(aux2, densp2, sigmadn, &e_PBEdn, de_PBEdddn, de_PBEdsigmadn, NULL, NULL, NULL);
/*get Eq. (13) and (14) for the polarized case*/
if(p->common.nspin == XC_UNPOLARIZED){
C = 0.59;
dzetadd[0] = 0.0;
dcsidd [0] = 0.0;
dzetadd[1] = 0.0;
dcsidd [1] = 0.0;
for(i=0; i<3; i++) dcsidsigma[i] = 0.0;
}else{
// initialize derivatives
for(i=0; i<2; i++){
dzetadd[i] = 0.0;
dcsidd [i] = 0.0;}
for(i=0; i<3; i++) dcsidsigma[i] = 0.0;
double num, gzeta, csi, a;
/*numerator of csi: derive as grho all components and then square the 3 parts
[2 (grho_a[0]n_b - grho_b[0]n_a) +2 (grho_a[1]n_b - grho_b[1]n_a) + 2 (grho_a[2]n_b - grho_b[2]n_a)]/(n_a+n_b)^2
-> 4 (sigma_aa n_b^2 - 2 sigma_ab n_a n_b + sigma_bb n_b^2)/(n_a+n_b)^2 */
num = sigma[0] * pow(rho[1],2) - 2.* sigma[1]*rho[0]*rho[1]+ sigma[2]*pow(rho[0],2);
num = max(num, 1e-20);
gzeta = sqrt(4*(num))/(dens*dens);
gzeta = max(gzeta, MIN_GRAD);
/*denominator of csi*/
a = 2*pow(3.0*M_PI*M_PI*dens, 1.0/3.0);
csi = gzeta/a;
c_revtpss_14(csi, zeta, &C, &dCdcsi, &dCdzeta);
dzetadd[0] = (1.0 - zeta)/dens; /*OK*/
dzetadd[1] = -(1.0 + zeta)/dens; /*OK*/
dcsidd [0] = 0.5*csi*(-2*sigma[1]*rho[1]+2*sigma[2]*rho[0])/num - 7./3.*csi/dens; /*OK*/
dcsidd [1] = 0.5*csi*(-2*sigma[1]*rho[0]+2*sigma[0]*rho[1])/num - 7./3.*csi/dens; /*OK*/
dcsidsigma[0]= csi*pow(rho[1],2)/(2*num); /*OK*/
dcsidsigma[1]= -csi*rho[0]*rho[1]/num; /*OK*/
dcsidsigma[2]= csi*pow(rho[0],2)/(2*num); /*OK*/
}
aux = (densp[0] * max(e_PBEup, e_PBE) + densp[1] * max(e_PBEdn, e_PBE)) / dens;
double dauxdd[2], dauxdsigma[3];
if(e_PBEup > e_PBE)
{
//case densp[0] * e_PBEup
dauxdd[0] = de_PBEddup[0];
dauxdd[1] = 0.0;
dauxdsigma[0] = de_PBEdsigmaup[0];
dauxdsigma[1] = 0.0;
dauxdsigma[2] = 0.0;
}else{
//case densp[0] * e_PBE
dauxdd[0] = densp[0] / dens * (de_PBEdd[0] - e_PBE) + e_PBE;
dauxdd[1] = densp[0] / dens * (de_PBEdd[1] - e_PBE);
dauxdsigma[0] = densp[0] / dens * de_PBEdsigma[0];
dauxdsigma[1] = densp[0] / dens * de_PBEdsigma[1];
dauxdsigma[2] = densp[0] / dens * de_PBEdsigma[2];
}
if(e_PBEdn > e_PBE)
{//case densp[1] * e_PBEdn
dauxdd[0] += 0.0;
dauxdd[1] += de_PBEdddn[0];
dauxdsigma[0] += 0.0;
dauxdsigma[1] += 0.0;
dauxdsigma[2] += de_PBEdsigmadn[0];
}else{//case densp[1] * e_PBE
dauxdd[0] += densp[1] / dens * (de_PBEdd[0] - e_PBE);
dauxdd[1] += densp[1] / dens * (de_PBEdd[1] - e_PBE) + e_PBE;
dauxdsigma[0] += densp[1] / dens * de_PBEdsigma[0];
dauxdsigma[1] += densp[1] / dens * de_PBEdsigma[1];
dauxdsigma[2] += densp[1] / dens * de_PBEdsigma[2];
}
zsq=z*z;
*e_PKZB = (e_PBE*(1.0 + C * zsq) - (1.0 + C) * zsq * aux);
*de_PKZBdz = dens * e_PBE * C * 2*z - dens * (1.0 + C) * 2*z * aux; /*? think ok*/
double dCdd[2];
dCdd[0] = dCdzeta*dzetadd[0] + dCdcsi*dcsidd[0]; /*OK*/
dCdd[1] = dCdzeta*dzetadd[1] + dCdcsi*dcsidd[1]; /*OK*/
/* partial derivatives*/
de_PKZBdd[0] = de_PBEdd[0] * (1.0 + C*zsq) + dens * e_PBE * dCdd[0] * zsq
- zsq * (dens*dCdd[0] * aux + (1.0 + C) * dauxdd[0]);
de_PKZBdd[1] = de_PBEdd[1] * (1.0 + C*zsq) + dens * e_PBE * dCdd[1] * zsq
- zsq * (dens*dCdd[1] * aux + (1.0 + C) * dauxdd[1]);
int nder = (p->common.nspin==XC_UNPOLARIZED) ? 1 : 3;
for(i=0; icommon.nspin==XC_UNPOLARIZED) dauxdsigma[i] /= 2.;
double dCdsigma[i];
dCdsigma[i]= dCdcsi*dcsidsigma[i];
/* partial derivatives*/
de_PKZBdsigma[i] = de_PBEdsigma[i] * (1.0 + C * zsq) + dens * e_PBE * dCdsigma[i] * zsq
- zsq * (dens * dCdsigma[i] * aux + (1.0 + C) * dauxdsigma[i]);
}
}
static void
XC(mgga_c_revtpss)(void *par, const double *rho, const double *sigmatmp, const double *tau,
double *energy, double *dedd, double *vsigma, double *dedtau)
{
double sigma[3];
revtpss_params *p = (revtpss_params*)par;
double dens, zeta, grad;
double tautr, taut, tauw, z;
double e_PKZB, de_PKZBdd[2], de_PKZBdsigma[3], de_PKZBdz;
int i, is;
sigma[0] = sigmatmp[0];
sigma[1] = 0.0;
sigma[2] = 0.0;
zeta = (rho[0]-rho[1])/(rho[0]+rho[1]);
dens = rho[0];
tautr = tau[0];
grad = sigma[0];
if(p->common.nspin == XC_POLARIZED) {
dens += rho[1];
tautr += tau[1];
sigma[1] = sigmatmp[1];
sigma[2] = sigmatmp[2];
grad += (2*sigma[1] + sigma[2]);
}
grad = max(MIN_GRAD*MIN_GRAD, grad);
tauw = max(grad/(8.0*dens), 1.0e-12);
taut = max(tautr, tauw);
z = tauw/taut;
sigma[0] = max(MIN_GRAD*MIN_GRAD, sigma[0]);
if(p->common.nspin == XC_POLARIZED)
{
//sigma[1] = max(MIN_GRAD*MIN_GRAD, sigma[1]);
sigma[2] = max(MIN_GRAD*MIN_GRAD, sigma[2]);
}
/* Equation (12) */
c_revtpss_12(p, rho, sigma, dens, zeta, z,
&e_PKZB, de_PKZBdd, de_PKZBdsigma, &de_PKZBdz);
/* Equation (11) */
{
double z2 = z*z, z3 = z2*z;
double dedz;
double dzdd[2], dzdsigma[3], dzdtau;
if(tauw >= tautr || fabs(tauw- tautr)< 1.0e-10){
dzdtau = 0.0;
dzdd[0] = 0.0;
dzdd[1] = 0.0;
dzdsigma[0] = 0.0;
dzdsigma[1] = 0.0;
dzdsigma[2] = 0.0;
}else{
dzdtau = -z/taut;
dzdd[0] = - z/dens;
dzdd[1] = 0.0;
if (p->common.nspin == XC_POLARIZED) dzdd[1] = - z/dens;
dzdsigma[0] = 1.0/(8*dens*taut);
dzdsigma[1] = 0.0;
dzdsigma[2] = 0.0;
if (p->common.nspin == XC_POLARIZED) {
dzdsigma[1] = 2.0/(8*dens*taut);
dzdsigma[2] = 1.0/(8*dens*taut);
}
}
*energy = e_PKZB * (1.0 + d*e_PKZB*z3);
/* due to the definition of na and nb in libxc.c we need to divide by (na+nb) to recover the
* same energy for polarized and unpolarized calculation with the same total density */
if(p->common.nspin == XC_UNPOLARIZED) *energy *= dens/(rho[0]+rho[1]);
dedz = de_PKZBdz*(1.0 + 2.0*d*e_PKZB*z3) + dens*e_PKZB * e_PKZB * d * 3.0*z2;
for(is=0; iscommon.nspin; is++){
dedd[is] = de_PKZBdd[is] * (1.0 + 2.0*d*e_PKZB*z3) + dedz*dzdd[is] - e_PKZB*e_PKZB * d * z3; /*OK*/
dedtau[is] = dedz * dzdtau; /*OK*/
}
int nder = (p->common.nspin==XC_UNPOLARIZED) ? 1 : 3;
for(i=0; ix_aux, np, rho, &exunif, &vxunif);
/* calculate |nabla rho|^2 */
gdms = max(MIN_GRAD*MIN_GRAD, sigma);
/* Eq. (4) */
p = gdms/(4.0*pow(3*M_PI*M_PI, 2.0/3.0)*pow(rho[0], 8.0/3.0));
dpdd = -(8.0/3.0)*p/rho[0];
dpdsigma= 1/(4.0*pow(3*M_PI*M_PI, 2.0/3.0)*pow(rho[0], 8.0/3.0));
/* von Weisaecker kinetic energy density */
tauw = max(gdms/(8.0*rho[0]), 1.0e-12);
tau = max(tau_, tauw);
tau_lsda = aux * pow(rho[0],5./3.);
dtau_lsdadd = aux * 5./3.* pow(rho[0],2./3.);
alpha = (tau - tauw)/tau_lsda;
if(fabs(tauw-tau_)< 1.0e-10){
dalphadsigma = 0.0;
dalphadtau = 0.0;
dalphadd = 0.0;
}else{
dalphadtau = 1./tau_lsda;
dalphadsigma = -1./(tau_lsda*8.0*rho[0]);
dalphadd = (tauw/rho[0]* tau_lsda - (tau - tauw) * dtau_lsdadd)/ pow(tau_lsda,2.);
}
/* get Eq. (10) */
x_revtpss_10(p, alpha, &x, &dxdp, &dxdalpha);
{ /* Eq. (5) */
double a = kappa/(kappa + x);
Fx = 1.0 + kappa*(1.0 - a);
dFxdx = a*a;
}
{ /* Eq. (3) */
*energy = exunif*Fx*rho[0];
//printf("Ex %.9e\n", *energy);
/* exunif is en per particle already so we multiply by n the terms with exunif*/
*dedd = vxunif*Fx + exunif*dFxdx*rho[0]*(dxdp*dpdd + dxdalpha*dalphadd);
*vsigma = exunif*dFxdx*rho[0]*(dxdp*dpdsigma + dxdalpha*dalphadsigma);
*dedtau = exunif*dFxdx*rho[0]*(dxdalpha*dalphadtau);
}
}
void
XC(mgga_x_revtpss)(void *par, const double *rho, const double *sigma, const double *tau,
double *e, double *dedd, double *vsigma, double *dedtau)
{
revtpss_params *p = (revtpss_params*)par;
if(p->common.nspin == XC_UNPOLARIZED){
double en;
x_revtpss_para(p, rho, sigma[0], tau[0], &en, dedd, vsigma, dedtau);
*e = en/(rho[0]+rho[1]);
}else{
/* The spin polarized version is handle using the exact spin scaling
Ex[n1, n2] = (Ex[2*n1] + Ex[2*n2])/2
*/
*e = 0.0;
double e2na, e2nb, rhoa[2], rhob[2];
double vsigmapart[3];
rhoa[0]=2*rho[0];
rhoa[1]=0.0;
rhob[0]=2*rho[1];
rhob[1]=0.0;
x_revtpss_para(p, rhoa, 4*sigma[0], 2.0*tau[0], &e2na, &(dedd[0]), &(vsigmapart[0]), &(dedtau[0]));
x_revtpss_para(p, rhob, 4*sigma[2], 2.0*tau[1], &e2nb, &(dedd[1]), &(vsigmapart[2]), &(dedtau[1]));
*e = (e2na + e2nb )/(2.*(rho[0]+rho[1]));
vsigma[0] = 2*vsigmapart[0];
vsigma[2] = 2*vsigmapart[2];
}
}
static void revtpss_init(void *p) {
revtpss_params *par = (revtpss_params*)p;
par->x_aux = (XC(func_type) *) malloc(sizeof(XC(func_type)));
XC(func_init)(par->x_aux, XC_LDA_X, XC_UNPOLARIZED);
XC(func_init)(&par->c_aux1, XC_LDA_C_PW_MOD, par->common.nspin);
XC(func_init)(&par->c_aux2, XC_LDA_C_PW_MOD, XC_POLARIZED);
}
static void revtpss_end(void *p) {
revtpss_params *par = (revtpss_params*)p;
XC(func_end)(par->x_aux);
free(par->x_aux);
XC(func_end)(&par->c_aux1);
XC(func_end)(&par->c_aux2);
}
const mgga_func_info revtpss_info = {
sizeof(revtpss_params),
&revtpss_init,
&revtpss_end,
&XC(mgga_x_revtpss),
&XC(mgga_c_revtpss)
};
gpaw-1.3.0-82cebebc037510d876f90d9f8d533fd021f751f5/c/xc/revtpss_c_pbe.c 0000664 0000000 0000000 00000034063 13164413722 0023770 0 ustar 00root root 0000000 0000000
#include
#include
#include
#include
#include
#include "xc_mgga.h"
/************************************************************************
Implements Perdew, Burke & Ernzerhof Generalized Gradient Approximation
correlation functional.
I based this implementation on a routine from L.C. Balbas and J.M. Soler
************************************************************************/
// from old libxc util.h
#define RS(x) (pow((3.0/(4*M_PI*x)), 1.0/3.0))
typedef struct XC(perdew_t) {
int nspin;
double dens, zeta, gdmt;
double ecunif, vcunif[2], fcunif[3];
double rs, kf, ks, phi, t;
double drs, dkf, dks, dphi, dt, decunif;
double d2rs2, d2rskf, d2rsks, d2rsphi, d2rst, d2rsecunif;
double d2kf2, d2kfks, d2kfphi, d2kft, d2kfecunif;
double d2ks2, d2ksphi, d2kst, d2ksecunif;
double d2phi2, d2phit, d2phiecunif;
double d2t2, d2tecunif;
double d2ecunif2;
} XC(perdew_t);
// from old libxc util.c
/* this function converts the spin-density into total density and
relative magnetization */
inline void
XC(rho2dzeta)(int nspin, const double *rho, double *d, double *zeta)
{
assert(nspin==XC_UNPOLARIZED || nspin==XC_POLARIZED);
if(nspin==XC_UNPOLARIZED){
*d = max(MIN_DENS, rho[0]);
*zeta = 0.0;
}else{
*d = max(MIN_DENS, rho[0]+rho[1]);
*zeta = (*d > MIN_DENS) ? (rho[0]-rho[1])/(*d) : 0.0;
}
}
// from old libxc gga_perdew.c
static void
XC(perdew_params)(const XC(func_type) *gga_p, const double *rho, const double *sigma, int order, XC(perdew_t) *pt)
{
pt->nspin = gga_p->nspin;
XC(rho2dzeta)(pt->nspin, rho, &(pt->dens), &(pt->zeta));
const int np = 1;
switch (order){
case 0:
XC(lda_exc) (gga_p, np, rho, &(pt->ecunif));
break;
case 1:
XC(lda_exc_vxc)(gga_p, np, rho, &(pt->ecunif), pt->vcunif);
break;
case 2:
XC(lda)(gga_p, np, rho, &(pt->ecunif), pt->vcunif, pt->fcunif, NULL);
break;
}
pt->rs = RS(pt->dens);
pt->kf = pow(3.0*M_PI*M_PI*pt->dens, 1.0/3.0);
pt->ks = sqrt(4.0*pt->kf/M_PI);
/* phi is bounded between 2^(-1/3) and 1 */
pt->phi = 0.5*(pow(1.0 + pt->zeta, 2.0/3.0) + pow(1.0 - pt->zeta, 2.0/3.0));
/* get gdmt = |nabla n| */
pt->gdmt = sigma[0];
if(pt->nspin == XC_POLARIZED) pt->gdmt += 2.0*sigma[1] + sigma[2];
if(pt->gdmt < MIN_GRAD*MIN_GRAD) pt->gdmt = MIN_GRAD*MIN_GRAD;
pt->gdmt = sqrt(pt->gdmt);
pt->t = pt->gdmt/(2.0 * pt->phi * pt->ks * pt->dens);
if(order > 0)
pt->drs = pt->dkf = pt->dks = pt->dphi = pt->dt = pt->decunif = 0.0;
if(order > 1){
pt->d2rs2 = pt->d2rskf = pt->d2rsks = pt->d2rsphi = pt->d2rst = pt->d2rsecunif = 0.0;
pt->d2kf2 = pt->d2kfks = pt->d2kfphi = pt->d2kft = pt->d2kfecunif = 0.0;
pt->d2ks2 = pt->d2ksphi = pt->d2kst = pt->d2ksecunif = 0.0;
pt->d2phi2 = pt->d2phit = pt->d2phiecunif = 0.0;
pt->d2t2 = pt->d2tecunif = 0.0;
pt->d2ecunif2 = 0.0;
}
}
static void
XC(perdew_potentials)(XC(perdew_t) *pt, const double *rho, double e_gga, int order,
double *vrho, double *vsigma,
double *v2rho2, double *v2rhosigma, double *v2sigma2)
{
/* alpha = {0->rs, 1->kf, 2->ks, 3->phi, 4->t, 5->ec */
double dalphadd[6][2], dFdalpha[6];
double d2alphadd2[6][3], d2Fdalpha2[6][6];
double dzdd[2], dpdz, d2zdd2[3], d2pdz2;
double dtdsig, d2tdsig2;
int is, js, ks, ns;
if(order < 1) return;
if(pt->nspin == XC_POLARIZED){
dpdz = 0.0;
if(fabs(1.0 + pt->zeta) >= MIN_DENS)
dpdz += 1.0/(3.0*pow(1.0 + pt->zeta, 1.0/3.0));
if(fabs(1.0 - pt->zeta) >= MIN_DENS)
dpdz -= 1.0/(3.0*pow(1.0 - pt->zeta, 1.0/3.0));
dzdd[0] = (1.0 - pt->zeta)/pt->dens;
dzdd[1] = -(1.0 + pt->zeta)/pt->dens;
}else{
dpdz = 0.0;
dzdd[0] = 0.0;
}
dFdalpha[0] = pt->drs;
dFdalpha[1] = pt->dkf;
dFdalpha[2] = pt->dks;
dFdalpha[3] = pt->dphi;
dFdalpha[4] = pt->dt;
dFdalpha[5] = pt->decunif;
for(is=0; isnspin; is++){
dalphadd[0][is] = -pt->rs/(3.0*pt->dens);
dalphadd[1][is] = pt->kf/(3.0*pt->dens);
dalphadd[2][is] = pt->ks*dalphadd[1][is]/(2.0*pt->kf);
dalphadd[3][is] = dpdz*dzdd[is];
dalphadd[4][is] = -pt->t*(1.0/pt->dens + dalphadd[2][is]/pt->ks + dalphadd[3][is]/pt->phi);;
dalphadd[5][is] = (pt->vcunif[is] - pt->ecunif)/pt->dens;
}
/* calculate vrho */
if(vrho != NULL)
for(is=0; isnspin; is++){
if(rho[is] > MIN_DENS){
int k;
vrho[is] = e_gga;
for(k=0; k<6; k++)
vrho[is] += pt->dens * dFdalpha[k]*dalphadd[k][is];
}else{
vrho[is] = 0.0;
}
}
dtdsig = pt->t/(2.0*pt->gdmt*pt->gdmt);
if(vrho != NULL){ /* calculate now vsigma */
vsigma[0] = pt->dens*pt->dt*dtdsig;
if(pt->nspin == XC_POLARIZED){
vsigma[1] = 2.0*vsigma[0];
vsigma[2] = vsigma[0];
}
}
if(order < 2) return;
/* first let us sort d2Fdalpha2 in a matrix format */
d2Fdalpha2[0][0] = pt->d2rs2;
d2Fdalpha2[0][1] = pt->d2rskf;
d2Fdalpha2[0][2] = pt->d2rsks;
d2Fdalpha2[0][3] = pt->d2rst;
d2Fdalpha2[0][4] = pt->d2rsphi;
d2Fdalpha2[0][5] = pt->d2rsecunif;
d2Fdalpha2[1][0] = d2Fdalpha2[0][1];
d2Fdalpha2[1][1] = pt->d2kf2;
d2Fdalpha2[1][2] = pt->d2kfks;
d2Fdalpha2[1][3] = pt->d2kft;
d2Fdalpha2[1][4] = pt->d2kfphi;
d2Fdalpha2[1][5] = pt->d2kfecunif;
d2Fdalpha2[2][0] = d2Fdalpha2[0][2];
d2Fdalpha2[2][1] = d2Fdalpha2[1][2];
d2Fdalpha2[2][2] = pt->d2ks2;
d2Fdalpha2[2][3] = pt->d2kst;
d2Fdalpha2[2][4] = pt->d2ksphi;
d2Fdalpha2[2][5] = pt->d2ksecunif;
d2Fdalpha2[3][0] = d2Fdalpha2[0][3];
d2Fdalpha2[3][1] = d2Fdalpha2[1][3];
d2Fdalpha2[3][2] = d2Fdalpha2[2][3];
d2Fdalpha2[3][3] = pt->d2phi2;
d2Fdalpha2[3][4] = pt->d2phit;
d2Fdalpha2[3][5] = pt->d2phiecunif;
d2Fdalpha2[4][0] = d2Fdalpha2[0][4];
d2Fdalpha2[4][1] = d2Fdalpha2[1][4];
d2Fdalpha2[4][2] = d2Fdalpha2[2][4];
d2Fdalpha2[4][3] = d2Fdalpha2[3][4];
d2Fdalpha2[4][4] = pt->d2t2;
d2Fdalpha2[4][5] = pt->d2tecunif;
d2Fdalpha2[5][0] = d2Fdalpha2[0][5];
d2Fdalpha2[5][1] = d2Fdalpha2[1][5];
d2Fdalpha2[5][2] = d2Fdalpha2[2][5];
d2Fdalpha2[5][3] = d2Fdalpha2[3][5];
d2Fdalpha2[5][4] = d2Fdalpha2[4][5];
d2Fdalpha2[5][5] = pt->d2ecunif2;
/* now we sort d2alphadd2 */
if(pt->nspin == XC_POLARIZED){
d2pdz2 = 0.0;
if(fabs(1.0 + pt->zeta) >= MIN_DENS)
d2pdz2 += -(1.0/9.0)*pow(1.0 + pt->zeta, -4.0/3.0);
if(fabs(1.0 - pt->zeta) >= MIN_DENS)
d2pdz2 += -(1.0/9.0)*pow(1.0 - pt->zeta, -4.0/3.0);
d2zdd2[0] = -2.0*dzdd[0]/pt->dens;
d2zdd2[1] = 2.0*pt->zeta/(pt->dens*pt->dens);
d2zdd2[2] = -2.0*dzdd[1]/pt->dens;
}else{
d2pdz2 = 0.0;
d2zdd2[0] = 0.0;
}
ns = (pt->nspin == XC_UNPOLARIZED) ? 0 : 2;
for(ks=0; ks<=ns; ks++){
is = (ks == 0 || ks == 1) ? 0 : 1;
js = (ks == 0 ) ? 0 : 1;
d2alphadd2[0][ks] = 4.0/9.0*pt->rs/(pt->dens*pt->dens);
d2alphadd2[1][ks] = -2.0/9.0*pt->kf/(pt->dens*pt->dens);
d2alphadd2[2][ks] = pt->ks/(2.0*pt->kf)*
(d2alphadd2[1][ks] - dalphadd[1][is]*dalphadd[1][js]/(2.0*pt->kf));
d2alphadd2[3][ks] = d2pdz2*dzdd[is]*dzdd[js] + dpdz*d2zdd2[ks];
d2alphadd2[4][ks] = pt->t *
(+2.0/(pt->dens*pt->dens)
+2.0/(pt->ks*pt->ks) *(dalphadd[2][is] * dalphadd[2][js])
+2.0/(pt->phi*pt->phi) *(dalphadd[3][is] * dalphadd[3][js])
+1.0/(pt->dens*pt->ks) *(dalphadd[2][is] + dalphadd[2][js])
+1.0/(pt->dens*pt->phi)*(dalphadd[3][is] + dalphadd[3][js])
+1.0/(pt->ks*pt->phi) *(dalphadd[2][is]*dalphadd[3][js] + dalphadd[2][js]*dalphadd[3][is])
-1.0/(pt->ks)*d2alphadd2[2][ks] -1.0/(pt->phi)*d2alphadd2[3][ks]);
d2alphadd2[5][ks] = pt->fcunif[ks]/pt->dens -
(pt->vcunif[is] + pt->vcunif[js] - 2.0*pt->ecunif)/(pt->dens*pt->dens);
}
for(ks=0; ks<=ns; ks++){
int j, k;
is = (ks == 0 || ks == 1) ? 0 : 1;
js = (ks == 0 ) ? 0 : 1;
v2rho2[ks] = 0.0;
for(j=0; j<6; j++){
v2rho2[ks] += dFdalpha[j]*(dalphadd[j][is] + dalphadd[j][js]);
v2rho2[ks] += pt->dens * dFdalpha[j]*d2alphadd2[j][ks];
for(k=0; k<6; k++)
v2rho2[ks] += pt->dens * d2Fdalpha2[j][k]*dalphadd[j][is]*dalphadd[k][js];
}
}
/* now we handle v2rhosigma */
for(is=0; isnspin; is++){
int j;
ks = (is == 0) ? 0 : 5;
v2rhosigma[ks] = dFdalpha[4]*dtdsig;
for(j=0; j<6; j++)
v2rhosigma[ks] += pt->dens * d2Fdalpha2[4][j]*dalphadd[j][is]*dtdsig;
v2rhosigma[ks] += pt->dens * dFdalpha[4]*dalphadd[4][is]/(2.0*pt->gdmt*pt->gdmt);
}
if(pt->nspin == XC_POLARIZED){
v2rhosigma[1] = 2.0*v2rhosigma[0];
v2rhosigma[2] = v2rhosigma[0];
v2rhosigma[3] = v2rhosigma[5];
v2rhosigma[4] = 2.0*v2rhosigma[5];
}
/* now wwe take care of v2sigma2 */
d2tdsig2 = -dtdsig/(2.0*pt->gdmt*pt->gdmt);
v2sigma2[0] = pt->dens*(pt->d2t2*dtdsig*dtdsig + pt->dt*d2tdsig2);
if(pt->nspin == XC_POLARIZED){
v2sigma2[1] = 2.0*v2sigma2[0]; /* aa_ab */
v2sigma2[2] = v2sigma2[0]; /* aa_bb */
v2sigma2[3] = 4.0*v2sigma2[0]; /* ab_ab */
v2sigma2[4] = 2.0*v2sigma2[0]; /* ab_bb */
v2sigma2[5] = v2sigma2[0]; /* bb_bb */
}
}
// from old libxc gga_c_pbe.c
static const double beta[4] = {
0.06672455060314922, /* original PBE */
0.046, /* PBE sol */
0.089809,
0.06672455060314922 /* PBE for revTPSS */
};
static double gamm[4];
static inline void
pbe_eq8(int func, int order, double rs, double ecunif, double phi,
double *A, double *dec, double *dphi, double *drs,
double *dec2, double *decphi, double *dphi2)
{
double phi3, f1, df1dphi, d2f1dphi2, f2, f3, dx, d2x;
phi3 = pow(phi, 3);
f1 = ecunif/(gamm[func]*phi3);
f2 = exp(-f1);
f3 = f2 - 1.0;
*A = beta[func]/(gamm[func]*f3);
if(func == 3) *A *= (1. + 0.1*rs)/(1. + 0.1778*rs);
if(order < 1) return;
df1dphi = -3.0*f1/phi;
dx = (*A)*f2/f3;
*dec = dx/(gamm[func]*phi3);
*dphi = dx*df1dphi;
*drs = 0.0;
if(func == 3) *drs = beta[func]*((0.1-0.1778)/pow(1+0.1778*rs,2))/(gamm[func]*f3);
if(func ==3) return;
if(order < 2) return;
d2f1dphi2 = -4.0*df1dphi/phi;
d2x = dx*(2.0*f2 - f3)/f3;
*dphi2 = d2x*df1dphi*df1dphi + dx*d2f1dphi2;
*decphi = (d2x*df1dphi*f1 + dx*df1dphi)/ecunif;
*dec2 = d2x/(gamm[func]*gamm[func]*phi3*phi3);
}
static void
pbe_eq7(int func, int order, double rs, double phi, double t, double A,
double *H, double *dphi, double *drs, double *dt, double *dA,
double *d2phi, double *d2phit, double *d2phiA, double *d2t2, double *d2tA, double *d2A2)
{
double t2, phi3, f1, f2, f3;
double df1dt, df2drs, df2dt, df1dA, df2dA;
double d2f1dt2, d2f2dt2, d2f2dA2, d2f1dtA, d2f2dtA;
t2 = t*t;
phi3 = pow(phi, 3);
f1 = t2 + A*t2*t2;
f3 = 1.0 + A*f1;
f2 = beta[func]*f1/(gamm[func]*f3);
if(func == 3) f2 *= (1. + 0.1*rs)/(1. + 0.1778*rs);
*H = gamm[func]*phi3*log(1.0 + f2);
if(order < 1) return;
*dphi = 3.0*(*H)/phi;
df1dt = t*(2.0 + 4.0*A*t2);
df2dt = beta[func]/(gamm[func]*f3*f3) * df1dt;
if(func == 3) df2dt*=(1. + 0.1*rs)/(1. + 0.1778*rs);
*dt = gamm[func]*phi3*df2dt/(1.0 + f2);
df1dA = t2*t2;
df2dA = beta[func]/(gamm[func]*f3*f3) * (df1dA - f1*f1);
if(func == 3) df2dA *= (1. + 0.1*rs)/(1. + 0.1778*rs);
*dA = gamm[func]*phi3*df2dA/(1.0 + f2);
df2drs = 0.0;
*drs = 0.0;
if(func == 3){
df2drs = beta[func]*((0.1-0.1778)/pow(1+0.1778*rs,2))*f1/(gamm[func]*f3);
*drs = gamm[func]*phi3*df2drs/(1.0 + f2);
}
if(func ==3) return;
if(order < 2) return;
*d2phi = 2.0*(*dphi)/phi;
*d2phit = 3.0*(*dt)/phi;
*d2phiA = 3.0*(*dA)/phi;
d2f1dt2 = 2.0 + 4.0*3.0*A*t2;
d2f2dt2 = beta[func]/(gamm[func]*f3*f3) * (d2f1dt2 - 2.0*A/f3*df1dt*df1dt);
*d2t2 = gamm[func]*phi3*(d2f2dt2*(1.0 + f2) - df2dt*df2dt)/((1.0 + f2)*(1.0 + f2));
d2f1dtA = 4.0*t*t2;
d2f2dtA = beta[func]/(gamm[func]*f3*f3) *
(d2f1dtA - 2.0*df1dt*(f1 + A*df1dA)/f3);
*d2tA = gamm[func]*phi3*(d2f2dtA*(1.0 + f2) - df2dt*df2dA)/((1.0 + f2)*(1.0 + f2));
d2f2dA2 = beta[func]/(gamm[func]*f3*f3*f3) *(-2.0)*(2.0*f1*df1dA - f1*f1*f1 + A*df1dA*df1dA);
*d2A2 = gamm[func]*phi3*(d2f2dA2*(1.0 + f2) - df2dA*df2dA)/((1.0 + f2)*(1.0 + f2));
}
void
gga_c_pbe_revtpss(XC(func_type) *p, const double *rho, const double *sigma,
double *e, double *vrho, double *vsigma,
double *v2rho2, double *v2rhosigma, double *v2sigma2)
{
gamm[0] = gamm[1] = gamm[3] = (1.0 - log(2.0))/(M_PI*M_PI);
XC(perdew_t) pt;
int func, order;
double me;
double A, dAdec, dAdphi, dAdrs, d2Adec2, d2Adecphi, d2Adphi2;
double H, dHdphi, dHdrs, dHdt, dHdA, d2Hdphi2, d2Hdphit, d2HdphiA, d2Hdt2, d2HdtA, d2HdA2;
d2HdphiA = 0.0;
d2Hdphi2 = 0.0;
d2Adphi2 = 0.0;
d2HdA2 = 0.0;
d2HdtA = 0.0;
d2Hdphit = 0.0;
d2Adecphi = 0.0;
d2Hdt2 = 0.0;
d2Adec2 = 0.0;
dAdrs = 0.0;
dAdphi = 0.0;
dAdec = 0.0;
dHdA = 0.0;
dHdt = 0.0;
dHdrs = 0.0;
dHdphi = 0.0;
func = 3; // for revTPSS
order = 0;
if(vrho != NULL) order = 1;
if(v2rho2 != NULL) order = 2;
XC(perdew_params)(p, rho, sigma, order, &pt);
pbe_eq8(func, order, pt.rs, pt.ecunif, pt.phi,
&A, &dAdec, &dAdphi, &dAdrs, &d2Adec2, &d2Adecphi, &d2Adphi2);
pbe_eq7(func, order, pt.rs, pt.phi, pt.t, A,
&H, &dHdphi, &dHdrs, &dHdt, &dHdA, &d2Hdphi2, &d2Hdphit, &d2HdphiA, &d2Hdt2, &d2HdtA, &d2HdA2);
me = pt.ecunif + H;
if(e != NULL) *e = me;
if(order >= 1){
pt.dphi = dHdphi + dHdA*dAdphi;
pt.drs = dHdrs + dHdA*dAdrs;
pt.dt = dHdt;
pt.decunif = 1.0 + dHdA*dAdec;
}
if(order >= 2){
pt.d2phi2 = d2Hdphi2 + 2.0*d2HdphiA*dAdphi + dHdA*d2Adphi2 + d2HdA2*dAdphi*dAdphi;
pt.d2phit = d2Hdphit + d2HdtA*dAdphi;
pt.d2phiecunif = d2HdphiA*dAdec + d2HdA2*dAdphi*dAdec + dHdA*d2Adecphi;
pt.d2t2 = d2Hdt2;
pt.d2tecunif = d2HdtA*dAdec;
pt.d2ecunif2 = d2HdA2*dAdec*dAdec + dHdA*d2Adec2;
}
XC(perdew_potentials)(&pt, rho, me, order, vrho, vsigma, v2rho2, v2rhosigma, v2sigma2);
}
gpaw-1.3.0-82cebebc037510d876f90d9f8d533fd021f751f5/c/xc/rpbe.c 0000664 0000000 0000000 00000001314 13164413722 0022053 0 ustar 00root root 0000000 0000000 /* Copyright (C) 2003-2007 CAMP
* Please see the accompanying LICENSE file for further information. */
#include
#include "xc_gpaw.h"
double rpbe_exchange(const xc_parameters* par,
double n, double rs, double a2,
double* dedrs, double* deda2)
{
double e = C1 / rs;
*dedrs = -e / rs;
if (par->gga) // not really needed? XXX
{
double c = C2 * rs / n;
c *= c;
double s2 = a2 * c;
double x = exp(-MU * s2 / 0.804);
double Fx = 1.0 + 0.804 * (1 - x);
double dFxds2 = MU * x;
double ds2drs = 8.0 * c * a2 / rs;
*dedrs = *dedrs * Fx + e * dFxds2 * ds2drs;
*deda2 = e * dFxds2 * c;
e *= Fx;
}
return e;
}
gpaw-1.3.0-82cebebc037510d876f90d9f8d533fd021f751f5/c/xc/tpss.c 0000664 0000000 0000000 00000037236 13164413722 0022130 0 ustar 00root root 0000000 0000000 /************************************************************************
Implements Perdew, Tao, Staroverov & Scuseria
meta-Generalized Gradient Approximation.
Exchange part
************************************************************************/
#include
#include
#include
#include "xc_mgga.h"
typedef struct tpss_params {
common_params common; // needs to be at the beginning of every functional_params
XC(func_type) *x_aux;
XC(func_type) *c_aux1;
XC(func_type) *c_aux2;
} tpss_params;
/* some parameters */
static double b=0.40, c=1.59096, e=1.537, kappa=0.804, mu=0.21951;
/* This is Equation (7) from the paper and its derivatives */
static void
x_tpss_7(double p, double alpha,
double *qb, double *dqbdp, double *dqbdalpha)
{
/* Eq. (7) */
double a = sqrt(1.0 + b*alpha*(alpha-1.0)), h = 9.0/20.0;
*qb = h*(alpha - 1.0)/a + 2.0*p/3.0;
*dqbdp = 2.0/3.0;
*dqbdalpha = h*(1.0 + 0.5*b*(alpha-1.0))/pow(a, 3);
}
/* Equation (10) in all it's glory */
static
void x_tpss_10(double p, double alpha,
double *x, double *dxdp, double *dxdalpha)
{
double x1, dxdp1, dxdalpha1;
double aux1, ap, apsr, p2;
double qb, dqbdp, dqbdalpha;
/* Equation 7 */
x_tpss_7(p, alpha, &qb, &dqbdp, &dqbdalpha);
p2 = p*p;
aux1 = 10.0/81.0;
ap = (3*alpha + 5*p)*(3*alpha + 5*p);
apsr = (3*alpha + 5*p);
/* first we handle the numerator */
x1 = 0.0;
dxdp1 = 0.0;
dxdalpha1 = 0.0;
{ /* first term */
double a = (9*alpha*alpha+30*alpha*p+50*p2), a2 = a*a;
x1 += aux1*p + 25*c*p2*p*ap/a2;
dxdp1 += aux1 + ((3*225*c*p2*alpha*alpha+ 4*750*c*p*p2*alpha + 5*625*c*p2*p2)*a2 - 25*c*p2*p*ap*2*a*(30*alpha+50*2*p))/(a2*a2);
dxdalpha1 += ((225*c*p*p2*2*alpha + 750*c*p2*p2)*a2 - 25*c*p2*p*ap*2*a*(9*2*alpha+30*p))/(a2*a2);
}
{ /* second term */
double a = 146.0/2025.0*qb;
x1 += a*qb;
dxdp1 += 2.0*a*dqbdp;
dxdalpha1 += 2.0*a*dqbdalpha;
}
{ /* third term */
double h = 73.0/(405*sqrt(2.0));
x1 += -h*qb*p/apsr * sqrt(ap+9);
dxdp1 += -h * qb *((3*alpha)/ap * sqrt(ap+9) + p/apsr * 1./2. * pow(ap+9,-1./2.)* 2*apsr*5) - h*p/apsr*sqrt(ap+9)*dqbdp;
dxdalpha1 += -h*qb*( (-1)*p*3/ap * sqrt(ap+9) + p/apsr * 1./2. * pow(ap+9,-1./2.)* 2*apsr*3) - h*p/apsr*sqrt(ap+9)*dqbdalpha;
}
{ /* forth term */
double a = aux1*aux1/kappa;
x1 += a*p2;
dxdp1 += a*2.0*p;
dxdalpha1 += 0.0;
}
{ /* fifth term */
x1 += 20*sqrt(e)*p2/(9*ap);
dxdp1 += 20*sqrt(e)/9*(2*p*ap-p2*2*(3*alpha + 5*p)*5)/(ap*ap);
dxdalpha1 +=-20*2*sqrt(e)/3*p2/(ap*(3*alpha + 5*p));
}
{ /* sixth term */
double a = e*mu;
x1 += a*p*p2;
dxdp1 += a*3.0*p2;
dxdalpha1 += 0.0;
}
/* and now the denominator */
{
double a = 1.0+sqrt(e)*p, a2 = a*a;
*x = x1/a2;
*dxdp = (dxdp1*a - 2.0*sqrt(e)*x1)/(a2*a);
*dxdalpha = dxdalpha1/a2;
}
}
static void
x_tpss_para(XC(func_type) *lda_aux, const double *rho, const double sigma, const double tau_,
double *energy, double *dedd, double *vsigma, double *dedtau)
{
double gdms, p, tau, tauw;
double x, dxdp, dxdalpha, Fx, dFxdx;
double tau_lsda, exunif, vxunif, dtau_lsdadd;
double dpdd, dpdsigma;
double alpha, dalphadd, dalphadsigma, dalphadtau;
double aux = (3./10.) * pow((3*M_PI*M_PI),2./3.);
/* get the uniform gas energy and potential */
const int np = 1;
XC(lda_exc_vxc)(lda_aux, np, rho, &exunif, &vxunif);
/* calculate |nabla rho|^2 */
gdms = max(MIN_GRAD*MIN_GRAD, sigma);
/* Eq. (4) */
p = gdms/(4.0*pow(3*M_PI*M_PI, 2.0/3.0)*pow(rho[0], 8.0/3.0));
dpdd = -(8.0/3.0)*p/rho[0];
dpdsigma= 1/(4.0*pow(3*M_PI*M_PI, 2.0/3.0)*pow(rho[0], 8.0/3.0));
/* von Weisaecker kinetic energy density */
tauw = max(gdms/(8.0*rho[0]), 1.0e-12);
tau = max(tau_, tauw);
tau_lsda = aux * pow(rho[0],5./3.);
dtau_lsdadd = aux * 5./3.* pow(rho[0],2./3.);
alpha = (tau - tauw)/tau_lsda;
if(fabs(tauw-tau_)< 1.0e-10){
dalphadsigma = 0.0;
dalphadtau = 0.0;
dalphadd = 0.0;
}else{
dalphadtau = 1./tau_lsda;
dalphadsigma = -1./(tau_lsda*8.0*rho[0]);
dalphadd = (tauw/rho[0]* tau_lsda - (tau - tauw) * dtau_lsdadd)/ pow(tau_lsda,2.);
}
/* get Eq. (10) */
x_tpss_10(p, alpha, &x, &dxdp, &dxdalpha);
{ /* Eq. (5) */
double a = kappa/(kappa + x);
Fx = 1.0 + kappa*(1.0 - a);
dFxdx = a*a;
}
{ /* Eq. (3) */
*energy = exunif*Fx*rho[0];
/* exunif is en per particle already so we multiply by n the terms with exunif*/
*dedd = vxunif*Fx + exunif*dFxdx*rho[0]*(dxdp*dpdd + dxdalpha*dalphadd);
*vsigma = exunif*dFxdx*rho[0]*(dxdp*dpdsigma + dxdalpha*dalphadsigma);
*dedtau = exunif*dFxdx*rho[0]*(dxdalpha*dalphadtau);
}
}
static void
XC(mgga_x_tpss)(void *p, const double *rho, const double *sigma, const double *tau,
double *e, double *dedd, double *vsigma, double *dedtau)
{
tpss_params *par = (tpss_params*)p;
if(par->common.nspin == XC_UNPOLARIZED){
double en;
x_tpss_para(par->x_aux, rho, sigma[0], tau[0], &en, dedd, vsigma, dedtau);
*e = en/(rho[0]+rho[1]);
}else{
/* The spin polarized version is handle using the exact spin scaling
Ex[n1, n2] = (Ex[2*n1] + Ex[2*n2])/2
*/
*e = 0.0;
double e2na, e2nb, rhoa[2], rhob[2];
double vsigmapart[3];
rhoa[0]=2*rho[0];
rhoa[1]=0.0;
rhob[0]=2*rho[1];
rhob[1]=0.0;
x_tpss_para(par->x_aux, rhoa, 4*sigma[0], 2.0*tau[0], &e2na, &(dedd[0]), &(vsigmapart[0]), &(dedtau[0]));
x_tpss_para(par->x_aux, rhob, 4*sigma[2], 2.0*tau[1], &e2nb, &(dedd[1]), &(vsigmapart[2]), &(dedtau[1]));
*e = (e2na + e2nb )/(2.*(rho[0]+rho[1]));
vsigma[0] = 2*vsigmapart[0];
vsigma[2] = 2*vsigmapart[2];
}
}
/************************************************************************
Implements Perdew, Tao, Staroverov & Scuseria
meta-Generalized Gradient Approximation.
J. Chem. Phys. 120, 6898 (2004)
http://dx.doi.org/10.1063/1.1665298
Correlation part
************************************************************************/
/* some parameters */
static double d = 2.8;
/* Equation (14) */
static void
c_tpss_14(double csi, double zeta, double *C, double *dCdcsi, double *dCdzeta)
{
double fz, C0, dC0dz, dfzdz;
double z2 = zeta*zeta;
/* Equation (13) */
C0 = 0.53 + z2*(0.87 + z2*(0.50 + z2*2.26));
dC0dz = zeta*(2.0*0.87 + z2*(4.0*0.5 + z2*6.0*2.26)); /*OK*/
fz = 0.5*(pow(1.0 + zeta, -4.0/3.0) + pow(1.0 - zeta, -4.0/3.0));
dfzdz = 0.5*(-4.0/3.0)*(pow(1.0 + zeta, -7.0/3.0) - pow(1.0 - zeta, -7.0/3.0)); /*OK*/
{ /* Equation (14) */
double csi2 = csi*csi;
double a = 1.0 + csi2*fz, a4 = pow(a, 4);
*C = C0 / a4;
*dCdcsi = -8.0*C0*csi*fz/(a*a4); /*added C OK*/
*dCdzeta = (dC0dz*a - C0*4.0*csi2*dfzdz)/(a*a4); /*OK*/
}
}
/* Equation (12) */
static void c_tpss_12(XC(func_type) *aux1, XC(func_type) *aux2, int nspin, const double *rho, const double *sigma,
double dens, double zeta, double z,
double *e_PKZB, double *de_PKZBdd, double *de_PKZBdsigma, double *de_PKZBdz)
{
/*some incoming variables:
dens = rho[0] + rho[1]
z = tau_w/tau
zeta = (rho[0] - rho[1])/dens*/
double e_PBE, e_PBEup, e_PBEdn;
double de_PBEdd[2], de_PBEdsigma[3], de_PBEddup[2], de_PBEdsigmaup[3], de_PBEdddn[2], de_PBEdsigmadn[3] ;
double aux, zsq;
double dzetadd[2], dcsidd[2], dcsidsigma[3];
double C, dCdcsi, dCdzeta;
double densp[2], densp2[2], sigmatot[3], sigmaup[3], sigmadn[3];
int i;
/*initialize dCdcsi and dCdzeta and the energy*/
dCdcsi = dCdzeta = 0.0;
e_PBE = 0.0;
e_PBEup = 0.0;
e_PBEdn = 0.0;
/* get the PBE stuff */
if(nspin== XC_UNPOLARIZED)
{ densp[0]=rho[0]/2.;
densp[1]=rho[0]/2.;
sigmatot[0] = sigma[0]/4.;
sigmatot[1] = sigma[0]/4.;
sigmatot[2] = sigma[0]/4.;
}else{
densp[0] = rho[0];
densp[1] = rho[1];
sigmatot[0] = sigma[0];
sigmatot[1] = sigma[1];
sigmatot[2] = sigma[2];
}
/* e_PBE */
XC(func_type) *auxfunc = (nspin == XC_UNPOLARIZED) ? aux2 : aux1;
const int np = 1;
XC(gga_exc_vxc)(auxfunc, np, densp, sigmatot, &e_PBE, de_PBEdd, de_PBEdsigma);
densp2[0]=densp[0];
densp2[1]=0.0;
if(nspin== XC_UNPOLARIZED)
{
sigmaup[0] = sigma[0]/4.;
sigmaup[1] = 0.;
sigmaup[2] = 0.;
}else{
sigmaup[0] = sigma[0];
sigmaup[1] = 0.;
sigmaup[2] = 0.;
}
/* e_PBE spin up */
XC(gga_exc_vxc)(auxfunc, np, densp2, sigmaup, &e_PBEup, de_PBEddup, de_PBEdsigmaup);
densp2[0]=densp[1];
densp2[1]=0.0;
if(nspin== XC_UNPOLARIZED)
{
sigmadn[0] = sigma[0]/4.;
sigmadn[1] = 0.;
sigmadn[2] = 0.;
}else{
sigmadn[0] = sigma[2];
sigmadn[1] = 0.;
sigmadn[2] = 0.;
}
/* e_PBE spin down */
XC(gga_exc_vxc)(auxfunc, np, densp2, sigmadn, &e_PBEdn, de_PBEdddn, de_PBEdsigmadn);
/*get Eq. (13) and (14) for the polarized case*/
if(nspin == XC_UNPOLARIZED){
C = 0.53;
dzetadd[0] = 0.0;
dcsidd [0] = 0.0;
dzetadd[1] = 0.0;
dcsidd [1] = 0.0;
for(i=0; i<3; i++) dcsidsigma[i] = 0.0;
}else{
// initialize derivatives
for(i=0; i<2; i++){
dzetadd[i] = 0.0;
dcsidd [i] = 0.0;}
for(i=0; i<3; i++) dcsidsigma[i] = 0.0;
double num, gzeta, csi, a;
/*numerator of csi: derive as grho all components and then square the 3 parts
[2 (grho_a[0]n_b - grho_b[0]n_a) +2 (grho_a[1]n_b - grho_b[1]n_a) + 2 (grho_a[2]n_b - grho_b[2]n_a)]/(n_a+n_b)^2
-> 4 (sigma_aa n_b^2 - 2 sigma_ab n_a n_b + sigma_bb n_b^2)/(n_a+n_b)^2 */
num = sigma[0] * pow(rho[1],2) - 2.* sigma[1]*rho[0]*rho[1]+ sigma[2]*pow(rho[0],2);
num = max(num, 1e-20);
gzeta = sqrt(4*(num))/(dens*dens);
gzeta = max(gzeta, MIN_GRAD);
/*denominator of csi*/
a = 2*pow(3.0*M_PI*M_PI*dens, 1.0/3.0);
csi = gzeta/a;
c_tpss_14(csi, zeta, &C, &dCdcsi, &dCdzeta);
dzetadd[0] = (1.0 - zeta)/dens; /*OK*/
dzetadd[1] = -(1.0 + zeta)/dens; /*OK*/
dcsidd [0] = 0.5*csi*(-2*sigma[1]*rho[1]+2*sigma[2]*rho[0])/num - 7./3.*csi/dens; /*OK*/
dcsidd [1] = 0.5*csi*(-2*sigma[1]*rho[0]+2*sigma[0]*rho[1])/num - 7./3.*csi/dens; /*OK*/
dcsidsigma[0]= csi*pow(rho[1],2)/(2*num); /*OK*/
dcsidsigma[1]= -csi*rho[0]*rho[1]/num; /*OK*/
dcsidsigma[2]= csi*pow(rho[0],2)/(2*num); /*OK*/
}
aux = (densp[0] * max(e_PBEup, e_PBE) + densp[1] * max(e_PBEdn, e_PBE)) / dens;
double dauxdd[2], dauxdsigma[3];
if(e_PBEup > e_PBE)
{
//case densp[0] * e_PBEup
dauxdd[0] = de_PBEddup[0];
dauxdd[1] = 0.0;
dauxdsigma[0] = de_PBEdsigmaup[0];
dauxdsigma[1] = 0.0;
dauxdsigma[2] = 0.0;
}else{
//case densp[0] * e_PBE
dauxdd[0] = densp[0] / dens * (de_PBEdd[0] - e_PBE) + e_PBE;
dauxdd[1] = densp[0] / dens * (de_PBEdd[1] - e_PBE);
dauxdsigma[0] = densp[0] / dens * de_PBEdsigma[0];
dauxdsigma[1] = densp[0] / dens * de_PBEdsigma[1];
dauxdsigma[2] = densp[0] / dens * de_PBEdsigma[2];
}
if(e_PBEdn > e_PBE)
{//case densp[1] * e_PBEdn
dauxdd[0] += 0.0;
dauxdd[1] += de_PBEdddn[0];
dauxdsigma[0] += 0.0;
dauxdsigma[1] += 0.0;
dauxdsigma[2] += de_PBEdsigmadn[0];
}else{//case densp[1] * e_PBE
dauxdd[0] += densp[1] / dens * (de_PBEdd[0] - e_PBE);
dauxdd[1] += densp[1] / dens * (de_PBEdd[1] - e_PBE) + e_PBE;
dauxdsigma[0] += densp[1] / dens * de_PBEdsigma[0];
dauxdsigma[1] += densp[1] / dens * de_PBEdsigma[1];
dauxdsigma[2] += densp[1] / dens * de_PBEdsigma[2];
}
zsq=z*z;
*e_PKZB = (e_PBE*(1.0 + C * zsq) - (1.0 + C) * zsq * aux);
*de_PKZBdz = dens * e_PBE * C * 2*z - dens * (1.0 + C) * 2*z * aux; /*? think ok*/
double dCdd[2];
dCdd[0] = dCdzeta*dzetadd[0] + dCdcsi*dcsidd[0]; /*OK*/
dCdd[1] = dCdzeta*dzetadd[1] + dCdcsi*dcsidd[1]; /*OK*/
/* partial derivatives*/
de_PKZBdd[0] = de_PBEdd[0] * (1.0 + C*zsq) + dens * e_PBE * dCdd[0] * zsq
- zsq * (dens*dCdd[0] * aux + (1.0 + C) * dauxdd[0]);
de_PKZBdd[1] = de_PBEdd[1] * (1.0 + C*zsq) + dens * e_PBE * dCdd[1] * zsq
- zsq * (dens*dCdd[1] * aux + (1.0 + C) * dauxdd[1]);
int nder = (nspin==XC_UNPOLARIZED) ? 1 : 3;
for(i=0; icommon.nspin;
dens = rho[0];
tautr = tau[0];
grad = sigma[0];
if(nspin == XC_POLARIZED) {
zeta = (rho[0]-rho[1])/(rho[0]+rho[1]);
dens += rho[1];
tautr += tau[1];
grad += (2*sigma[1] + sigma[2]);
}
else
zeta = 0.0;
grad = max(MIN_GRAD*MIN_GRAD, grad);
tauw = max(grad/(8.0*dens), 1.0e-12);
taut = max(tautr, tauw);
z = tauw/taut;
double sigmatmp[3];
sigmatmp[0] = max(MIN_GRAD*MIN_GRAD, sigma[0]);
sigmatmp[1] = 0.0;
sigmatmp[2] = 0.0;
if(nspin == XC_POLARIZED)
{
//sigma[1] = max(MIN_GRAD*MIN_GRAD, sigma[1]);
sigmatmp[1] = sigma[1];
sigmatmp[2] = max(MIN_GRAD*MIN_GRAD, sigma[2]);
}
/* Equation (12) */
c_tpss_12(par->c_aux1, par->c_aux2, nspin, rho, sigmatmp, dens, zeta, z,
&e_PKZB, de_PKZBdd, de_PKZBdsigma, &de_PKZBdz);
/* Equation (11) */
{
double z2 = z*z, z3 = z2*z;
double dedz;
double dzdd[2], dzdsigma[3], dzdtau;
if(tauw >= tautr || fabs(tauw- tautr)< 1.0e-10){
dzdtau = 0.0;
dzdd[0] = 0.0;
dzdd[1] = 0.0;
dzdsigma[0] = 0.0;
dzdsigma[1] = 0.0;
dzdsigma[2] = 0.0;
}else{
dzdtau = -z/taut;
dzdd[0] = - z/dens;
dzdd[1] = 0.0;
if (nspin == XC_POLARIZED) dzdd[1] = - z/dens;
dzdsigma[0] = 1.0/(8*dens*taut);
dzdsigma[1] = 0.0;
dzdsigma[2] = 0.0;
if (nspin == XC_POLARIZED) {
dzdsigma[1] = 2.0/(8*dens*taut);
dzdsigma[2] = 1.0/(8*dens*taut);
}
}
*energy = e_PKZB * (1.0 + d*e_PKZB*z3);
/* due to the definition of na and nb in libxc.c we need to divide by (na+nb) to recover the
* same energy for polarized and unpolarized calculation with the same total density */
if(nspin == XC_UNPOLARIZED) *energy *= dens/(rho[0]+rho[1]);
dedz = de_PKZBdz*(1.0 + 2.0*d*e_PKZB*z3) + dens*e_PKZB * e_PKZB * d * 3.0*z2;
for(is=0; isx_aux = (XC(func_type) *) malloc(sizeof(XC(func_type)));
XC(func_init)(par->x_aux, XC_LDA_X, XC_UNPOLARIZED);
par->c_aux1 = (XC(func_type) *) malloc(sizeof(XC(func_type)));
par->c_aux2 = (XC(func_type) *) malloc(sizeof(XC(func_type)));
XC(func_init)(par->c_aux1, XC_GGA_C_PBE, par->common.nspin);
XC(func_init)(par->c_aux2, XC_GGA_C_PBE, XC_POLARIZED);
}
static void tpss_end(void *p) {
tpss_params *par = (tpss_params*)p;
XC(func_end)(par->x_aux);
free(par->x_aux);
XC(func_end)(par->c_aux1);
XC(func_end)(par->c_aux2);
free(par->c_aux1);
free(par->c_aux2);
}
const mgga_func_info tpss_info = {
sizeof(tpss_params),
&tpss_init,
&tpss_end,
&XC(mgga_x_tpss),
&XC(mgga_c_tpss)
};
gpaw-1.3.0-82cebebc037510d876f90d9f8d533fd021f751f5/c/xc/vdw.c 0000664 0000000 0000000 00000014316 13164413722 0021731 0 ustar 00root root 0000000 0000000 /* Copyright (C) 2003-2007 CAMP
* Copyright (C) 2007-2009 CAMd
* Please see the accompanying LICENSE file for further information. */
#include "../extensions.h"
double vdwkernel(double D, double d1, double d2, int nD, int ndelta,
double dD, double ddelta,
const double (*phi)[nD])
{
if (D < 1e-10)
return phi[0][0];
double y = D / dD;
int j = (int)y;
double e12;
if (j >= nD - 1)
{
double d12 = d1 * d1;
double d22 = d2 * d2;
const double C = -1024.0 / 243.0 * M_PI * M_PI * M_PI * M_PI;
e12 = C / (d12 * d22 * (d12 + d22));
}
else
{
double x = fabs(0.5 * (d1 - d2) / D) / ddelta;
int i = (int)x;
if (i >= ndelta - 1)
{
i = ndelta - 2;
x = 1.0;
}
else
x -= i;
y -= j;
e12 = ((x * y * phi[i + 1][j + 1] +
x * (1.0 - y) * phi[i + 1][j ] +
(1.0 - x) * y * phi[i ][j + 1] +
(1.0 - x) * (1.0 - y) * phi[i ][j ]));
}
return e12;
}
PyObject * vdw(PyObject* self, PyObject *args)
{
PyArrayObject* n_obj;
PyArrayObject* q0_obj;
PyArrayObject* R_obj;
PyArrayObject* cell_obj;
PyArrayObject* pbc_obj;
PyArrayObject* repeat_obj;
PyArrayObject* phi_obj;
double ddelta;
double dD;
int iA;
int iB;
PyArrayObject* rhistogram_obj;
double drhist;
PyArrayObject* Dhistogram_obj;
double dDhist;
if (!PyArg_ParseTuple(args, "OOOOOOOddiiOdOd", &n_obj, &q0_obj, &R_obj,
&cell_obj, &pbc_obj, &repeat_obj,
&phi_obj, &ddelta, &dD, &iA, &iB,
&rhistogram_obj, &drhist,
&Dhistogram_obj, &dDhist))
return NULL;
int ndelta = PyArray_DIMS(phi_obj)[0];
int nD = PyArray_DIMS(phi_obj)[1];
const double* n = (const double*)DOUBLEP(n_obj);
const int ni = PyArray_SIZE(n_obj);
const double* q0 = (const double*)DOUBLEP(q0_obj);
const double (*R)[3] = (const double (*)[3])DOUBLEP(R_obj);
const double* cell = (const double*)DOUBLEP(cell_obj);
const char* pbc = (const char*)(PyArray_DATA(pbc_obj));
const long* repeat = (const long*)(PyArray_DATA(repeat_obj));
const double (*phi)[nD] = (const double (*)[nD])DOUBLEP(phi_obj);
double* rhistogram = (double*)DOUBLEP(rhistogram_obj);
double* Dhistogram = (double*)DOUBLEP(Dhistogram_obj);
int nbinsr = PyArray_DIMS(rhistogram_obj)[0];
int nbinsD = PyArray_DIMS(Dhistogram_obj)[0];
double energy = 0.0;
if (repeat[0] == 0 && repeat[1] == 0 && repeat[2] == 0)
for (int i1 = iA; i1 < iB; i1++)
{
const double* R1 = R[i1];
double q01 = q0[i1];
for (int i2 = 0; i2 <= i1; i2++)
{
double rr = 0.0;
for (int c = 0; c < 3; c++)
{
double f = R[i2][c] - R1[c];
if (pbc[c])
f = fmod(f + 1.5 * cell[c], cell[c]) - 0.5 * cell[c];
rr += f * f;
}
double r = sqrt(rr);
double d1 = r * q01;
double d2 = r * q0[i2];
double D = 0.5 * (d1 + d2);
double e12 = (vdwkernel(D, d1, d2, nD, ndelta, dD, ddelta, phi) *
n[i1] * n[i2]);
if (i1 == i2)
e12 /= 2.0;
int bin = (int)(r / drhist);
if (bin < nbinsr)
rhistogram[bin] += e12;
bin = (int)(D / dDhist);
if (bin < nbinsD)
Dhistogram[bin] += e12;
energy += e12;
}
}
else
for (int i1 = iA; i1 < iB; i1++)
{
const double* R1 = R[i1];
double q01 = q0[i1];
for (int a1 = -repeat[0]; a1 <= repeat[0]; a1++)
for (int a2 = -repeat[1]; a2 <= repeat[1]; a2++)
for (int a3 = -repeat[2]; a3 <= repeat[2]; a3++)
{
double x = 0.5;
int i2max = ni-1;
if (a1 == 0 && a2 == 0 && a3 == 0)
{
i2max = i1;
x = 1.0;
}
double R1a[3] = {R1[0] + a1 * cell[0],
R1[1] + a2 * cell[1],
R1[2] + a3 * cell[2]};
for (int i2 = 0; i2 <= i2max; i2++)
{
double rr = 0.0;
for (int c = 0; c < 3; c++)
{
double f = R[i2][c] - R1a[c];
rr += f * f;
}
double r = sqrt(rr);
double d1 = r * q01;
double d2 = r * q0[i2];
double D = 0.5 * (d1 + d2);
double e12 = (vdwkernel(D, d1, d2,
nD, ndelta, dD, ddelta, phi) *
n[i1] * n[i2] * x);
int bin = (int)(r / drhist);
if (bin < nbinsr)
rhistogram[bin] += e12;
bin = (int)(D / dDhist);
if (bin < nbinsD)
Dhistogram[bin] += e12;
energy += e12;
}
}
}
return PyFloat_FromDouble(energy);
}
PyObject * vdw2(PyObject* self, PyObject *args)
{
PyArrayObject* phi_jp_obj;
PyArrayObject* j_k_obj;
PyArrayObject* dk_k_obj;
PyArrayObject* theta_k_obj;
PyArrayObject* F_k_obj;
if (!PyArg_ParseTuple(args, "OOOOO", &phi_jp_obj, &j_k_obj, &dk_k_obj,
&theta_k_obj, &F_k_obj))
return NULL;
const double* phi_jp = (const double*)PyArray_DATA(phi_jp_obj);
const long* j_k = (const long*)PyArray_DATA(j_k_obj);
const double* dk_k = (const double*)PyArray_DATA(dk_k_obj);
const complex double* theta_k = (const complex double*)PyArray_DATA(theta_k_obj);
complex double* F_k = (complex double*)PyArray_DATA(F_k_obj);
int nk = PyArray_SIZE(j_k_obj);
for (int k = 0; k < nk; k++)
{
const double* phi_p = phi_jp + 4 * j_k[k];
double a = phi_p[0];
double b = phi_p[1];
double c = phi_p[2];
double d = phi_p[3];
double x = dk_k[k];
F_k[k] += theta_k[k] * (a + x * (b + x * (c + x * d)));
}
Py_RETURN_NONE;
}
gpaw-1.3.0-82cebebc037510d876f90d9f8d533fd021f751f5/c/xc/xc.c 0000664 0000000 0000000 00000021350 13164413722 0021537 0 ustar 00root root 0000000 0000000 /* Copyright (C) 2003-2007 CAMP
* Copyright (C) 2007-2009 CAMd
* Please see the accompanying LICENSE file for further information. */
#include
#define PY_ARRAY_UNIQUE_SYMBOL GPAW_ARRAY_API
#define NO_IMPORT_ARRAY
#include
#include "xc_gpaw.h"
#include "../extensions.h"
//
// __ 2
// a2 = |\/n|
//
// dE
// dedrs = ---
// dr
// s
//
// dE
// deda2 = ---------
// __ 2
// d(|\/n| )
//
void init_mgga(void** params, int code, int nspin);
void calc_mgga(void** params, int nspin, int ng,
const double* n_g, const double* sigma_g, const double* tau_g,
double *e_g, double *v_g, double *dedsigma_g, double *dedtau_g);
double pbe_exchange(const xc_parameters* par,
double n, double rs, double a2,
double* dedrs, double* deda2);
double pbe_correlation(double n, double rs, double zeta, double a2,
bool gga, bool spinpol,
double* dedrs, double* dedzeta, double* deda2);
double pw91_exchange(const xc_parameters* par,
double n, double rs, double a2,
double* dedrs, double* deda2);
double pw91_correlation(double n, double rs, double zeta, double a2,
bool gga, bool spinpol,
double* dedrs, double* dedzeta, double* deda2);
double rpbe_exchange(const xc_parameters* par,
double n, double rs, double a2,
double* dedrs, double* deda2);
double beefvdw_exchange(const xc_parameters* par,
double n, double rs, double a2,
double* dedrs, double* deda2);
//
typedef struct
{
PyObject_HEAD
double (*exchange)(const xc_parameters* par,
double n, double rs, double a2,
double* dedrs, double* deda2);
double (*correlation)(double n, double rs, double zeta, double a2,
bool gga, bool spinpol,
double* dedrs, double* dedzeta, double* deda2);
xc_parameters par;
// below added by cpo for mgga functionals outside of libxc (TPSS, M06L, etc.)
void* mgga;
} XCFunctionalObject;
static void XCFunctional_dealloc(XCFunctionalObject *self)
{
PyObject_DEL(self);
}
static PyObject*
XCFunctional_calculate(XCFunctionalObject *self, PyObject *args)
{
PyArrayObject* e_array;
PyArrayObject* n_array;
PyArrayObject* v_array;
PyArrayObject* sigma_array = 0;
PyArrayObject* dedsigma_array = 0;
PyArrayObject* tau_array = 0;
PyArrayObject* dedtau_array = 0;
if (!PyArg_ParseTuple(args, "OOO|OOOO", &e_array, &n_array, &v_array,
&sigma_array, &dedsigma_array, &tau_array, &dedtau_array))
return NULL;
int ng = 1;
for (int d = 0; d < PyArray_NDIM(e_array); d++)
ng *= PyArray_DIM(e_array, d);
xc_parameters* par = &self->par;
double* e_g = DOUBLEP(e_array);
const double* n_g = DOUBLEP(n_array);
double* v_g = DOUBLEP(v_array);
const double* sigma_g = 0;
double* dedsigma_g = 0;
if (par->gga)
{
sigma_g = DOUBLEP(sigma_array);
dedsigma_g = DOUBLEP(dedsigma_array);
}
const double* tau_g = 0;
double* dedtau_g = 0;
if (self->mgga)
{
tau_g = DOUBLEP(tau_array);
dedtau_g = DOUBLEP(dedtau_array);
}
if (self->mgga) {
int nspin = PyArray_DIM(n_array, 0) == 1 ? 1 : 2;
calc_mgga(&self->mgga, nspin, ng, n_g, sigma_g, tau_g, e_g, v_g, dedsigma_g, dedtau_g);
Py_RETURN_NONE;
}
if (PyArray_DIM(n_array, 0) == 1)
for (int g = 0; g < ng; g++)
{
double n = n_g[g];
if (n < NMIN)
n = NMIN;
double rs = pow(C0I / n, THIRD);
double dexdrs;
double dexda2;
double ex;
double decdrs;
double decda2;
double ec;
if (par->gga)
{
double a2 = sigma_g[g];
ex = self->exchange(par, n, rs, a2, &dexdrs, &dexda2);
ec = self->correlation(n, rs, 0.0, a2, 1, 0, &decdrs, 0, &decda2);
dedsigma_g[g] = n * (dexda2 + decda2);
}
else
{
ex = self->exchange(par, n, rs, 0.0, &dexdrs, 0);
ec = self->correlation(n, rs, 0.0, 0.0, 0, 0, &decdrs, 0, 0);
}
e_g[g] = n * (ex + ec);
v_g[g] += ex + ec - rs * (dexdrs + decdrs) / 3.0;
}
else
{
const double* na_g = n_g;
double* va_g = v_g;
const double* nb_g = na_g + ng;
double* vb_g = va_g + ng;
const double* sigma0_g = 0;
const double* sigma1_g = 0;
const double* sigma2_g = 0;
double* dedsigma0_g = 0;
double* dedsigma1_g = 0;
double* dedsigma2_g = 0;
const xc_parameters* par = &self->par;
if (par->gga)
{
sigma0_g = sigma_g;
sigma1_g = sigma0_g + ng;
sigma2_g = sigma1_g + ng;
dedsigma0_g = dedsigma_g;
dedsigma1_g = dedsigma0_g + ng;
dedsigma2_g = dedsigma1_g + ng;
}
for (int g = 0; g < ng; g++)
{
double na = 2.0 * na_g[g];
if (na < NMIN)
na = NMIN;
double rsa = pow(C0I / na, THIRD);
double nb = 2.0 * nb_g[g];
if (nb < NMIN)
nb = NMIN;
double rsb = pow(C0I / nb, THIRD);
double n = 0.5 * (na + nb);
double rs = pow(C0I / n, THIRD);
double zeta = 0.5 * (na - nb) / n;
double dexadrs;
double dexada2;
double exa;
double dexbdrs;
double dexbda2;
double exb;
double decdrs;
double decdzeta;
double decda2;
double ec;
if (par->gga)
{
exa = self->exchange(par, na, rsa, 4.0 * sigma0_g[g],
&dexadrs, &dexada2);
exb = self->exchange(par, nb, rsb, 4.0 * sigma2_g[g],
&dexbdrs, &dexbda2);
double a2 = sigma0_g[g] + 2 * sigma1_g[g] + sigma2_g[g];
ec = self->correlation(n, rs, zeta, a2, 1, 1,
&decdrs, &decdzeta, &decda2);
dedsigma0_g[g] = 2 * na * dexada2 + n * decda2;
dedsigma1_g[g] = 2 * n * decda2;
dedsigma2_g[g] = 2 * nb * dexbda2 + n * decda2;
}
else
{
exa = self->exchange(par, na, rsa, 0.0, &dexadrs, 0);
exb = self->exchange(par, nb, rsb, 0.0, &dexbdrs, 0);
ec = self->correlation(n, rs, zeta, 0.0, 0, 1,
&decdrs, &decdzeta, 0);
}
e_g[g] = 0.5 * (na * exa + nb * exb) + n * ec;
va_g[g] += (exa + ec -
(rsa * dexadrs + rs * decdrs) / 3.0 -
(zeta - 1.0) * decdzeta);
vb_g[g] += (exb + ec -
(rsb * dexbdrs + rs * decdrs) / 3.0 -
(zeta + 1.0) * decdzeta);
}
}
Py_RETURN_NONE;
}
static PyMethodDef XCFunctional_Methods[] = {
{"calculate",
(PyCFunction)XCFunctional_calculate, METH_VARARGS, 0},
{NULL, NULL, 0, NULL}
};
PyTypeObject XCFunctionalType = {
PyVarObject_HEAD_INIT(NULL, 0)
"XCFunctional",
sizeof(XCFunctionalObject),
0,
(destructor)XCFunctional_dealloc,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,
"XC object",
0, 0, 0, 0, 0, 0,
XCFunctional_Methods
};
PyObject * NewXCFunctionalObject(PyObject *obj, PyObject *args)
{
int code;
PyArrayObject* parameters = 0;
if (!PyArg_ParseTuple(args, "i|O", &code, ¶meters))
return NULL;
XCFunctionalObject *self = PyObject_NEW(XCFunctionalObject,
&XCFunctionalType);
if (self == NULL)
return NULL;
self->mgga = NULL;
self->par.gga = 1;
self->correlation = pbe_correlation;
self->exchange = pbe_exchange;
if (code == -1) {
// LDA
self->par.gga = 0;
}
else if (code == 0) {
// PBE
self->par.kappa = 0.804;
}
else if (code == 1) {
// revPBE
self->par.kappa = 1.245;
}
else if (code == 2) {
// RPBE
self->exchange = rpbe_exchange;
}
else if (code == 14) {
// PW91
self->exchange = pw91_exchange;
}
else if (code == 20 || code == 21 || code == 22) {
// MGGA
const int nspin = 1; // a guess, perhaps corrected later in calc_mgga
init_mgga(&self->mgga,code,nspin);
}
else {
assert (code == 17);
// BEEF-vdW
self->exchange = beefvdw_exchange;
int n = PyArray_DIM(parameters, 0);
assert(n <= 110);
double* p = (double*)PyArray_BYTES(parameters);
for (int i = 0; i < n; i++)
self->par.parameters[i] = p[i];
self->par.nparameters = n / 2;
}
return (PyObject*)self;
}
gpaw-1.3.0-82cebebc037510d876f90d9f8d533fd021f751f5/c/xc/xc_gpaw.h 0000664 0000000 0000000 00000001760 13164413722 0022565 0 ustar 00root root 0000000 0000000 /* Copyright (C) 2003-2007 CAMP
* Copyright (C) 2007-2009 CAMd
* Please see the accompanying LICENSE file for further information. */
#ifndef _XC_GPAW_H
#define _XC_GPAW_H
/*
BETA = 0.066725
MU = BETA * pi * pi / 3
C2 = (1 / (18 * pi)**(1 / 3))
C0I = 3 / (4 * pi)
C1 = -9 / (8 * pi) * (2 * pi / 3)**(1 / 3)
CC1 = 1 / (2**(4 / 3) - 2)
CC2 = 4 * CC1 / 3
IF2 = 3 / (2 * CC2);
C3 = pi * (4 / (9 * pi))**(1 / 3) / 16
C0 = 4 * pi / 3
*/
#define BETA 0.066725
#define GAMMA 0.031091
#define MU 0.2195164512208958
#define C2 0.26053088059892404
#define C0I 0.238732414637843
#define C1 -0.45816529328314287
#define CC1 1.9236610509315362
#define CC2 2.5648814012420482
#define IF2 0.58482236226346462
#define C3 0.10231023756535741
#define C0 4.1887902047863905
#define THIRD 0.33333333333333333
#define NMIN 1.0E-10
typedef int bool;
typedef struct
{
bool gga;
double kappa;
int nparameters;
double parameters[110];
} xc_parameters;
#endif /* _XC_GPAW_H */
gpaw-1.3.0-82cebebc037510d876f90d9f8d533fd021f751f5/c/xc/xc_mgga.c 0000664 0000000 0000000 00000007424 13164413722 0022540 0 ustar 00root root 0000000 0000000
#include
#include
#include
#include "xc_mgga.h"
#include "xc_gpaw.h"
extern const mgga_func_info m06l_info;
extern const mgga_func_info tpss_info;
extern const mgga_func_info revtpss_info;
static void init_common(common_params* params, int code, int nspin, const mgga_func_info *finfo) {
params->code = code;
params->nspin = nspin;
params->funcinfo = finfo;
}
void init_mgga(void** params, int code, int nspin) {
const mgga_func_info *finfo;
if (code==20) {
finfo = &tpss_info;
} else if (code==21) {
finfo = &m06l_info;
} else if (code==22) {
finfo = &revtpss_info;
} else {
// this should never happen. forces a crash.
assert(code>=20 && code <=22);
finfo = NULL;
}
*params = malloc(finfo->size);
init_common(*params, code, nspin, finfo);
finfo->init(*params);
}
void end_mgga(common_params *common) {
common->funcinfo->end(common);
free(common);
}
void calc_mgga(void** params, int nspin, int ng,
const double* n_g, const double* sigma_g, const double* tau_g,
double *e_g, double *v_g, double *dedsigma_g, double *dedtau_g) {
common_params *common = (common_params*)*params;
// check for a changed spin (similar to a line in gpaw/libxc.py)
if (nspin!=common->nspin) {
int code = common->code; // save this, since we're about to destroy common
end_mgga(common);
init_mgga(params, code, nspin);
common = (common_params*)*params; // init_mgga changes this
}
if (nspin == 1) {
for (int g = 0; g < ng; g++) {
// kludge n[1] because of the way TPSS was written (requires n[1]=0.0 even for unpolarized)
double n[2];
n[0] = n_g[g];
n[1] = 0.0;
if (n[0] < NMIN) n[0] = NMIN;
// m06l is assuming that there is space for spinpolarized calculation output
// even for non-spin-polarized.
double etmp, vtmp[2], dedsigmatmp[3], dedtautmp[2];
common->funcinfo->exch(*params, n, sigma_g+g, tau_g+g,
&etmp, vtmp, dedsigmatmp, dedtautmp);
e_g[g] = etmp;
v_g[g] += vtmp[0];
dedsigma_g[g] = dedsigmatmp[0];
dedtau_g[g] = dedtautmp[0];
common->funcinfo->corr(*params, n, sigma_g+g, tau_g+g,
&etmp, vtmp, dedsigmatmp, dedtautmp);
e_g[g] += etmp;
e_g[g] *= n[0];
v_g[g] += vtmp[0];
dedsigma_g[g] += dedsigmatmp[0];
dedtau_g[g] += dedtautmp[0];
}
} else {
double etmp, ntmp[2], vtmp[2], sigmatmp[3], dedsigmatmp[3],
tautmp[2], dedtautmp[2];
for (int g = 0; g < ng; g++) {
ntmp[0] = n_g[g];
if (ntmp[0] < NMIN) ntmp[0] = NMIN;
ntmp[1] = n_g[g+ng];
if (ntmp[1] < NMIN) ntmp[1] = NMIN;
sigmatmp[0] = sigma_g[g];
sigmatmp[1] = sigma_g[g+ng];
sigmatmp[2] = sigma_g[g+ng+ng];
tautmp[0] = tau_g[g];
tautmp[1] = tau_g[g+ng];
// kludge: mgga_x_tpss requires dedsigma[1] set to 0, since it doesn't calculate it.
dedsigmatmp[1]=0.0;
common->funcinfo->exch(*params, ntmp, sigmatmp, tautmp,
&etmp, vtmp, dedsigmatmp, dedtautmp);
e_g[g] = etmp;
v_g[g] += vtmp[0];
v_g[g+ng] += vtmp[1];
dedsigma_g[g] = dedsigmatmp[0];
dedsigma_g[g+ng] = dedsigmatmp[1];
dedsigma_g[g+ng+ng] = dedsigmatmp[2];
dedtau_g[g] = dedtautmp[0];
dedtau_g[g+ng] = dedtautmp[1];
common->funcinfo->corr(*params, ntmp, sigmatmp, tautmp,
&etmp, vtmp, dedsigmatmp, dedtautmp);
e_g[g] += etmp;
e_g[g] *= ntmp[0]+ntmp[1];
v_g[g] += vtmp[0];
v_g[g+ng] += vtmp[1];
dedsigma_g[g] += dedsigmatmp[0];
dedsigma_g[g+ng] += dedsigmatmp[1];
dedsigma_g[g+ng+ng] += dedsigmatmp[2];
dedtau_g[g] += dedtautmp[0];
dedtau_g[g+ng] += dedtautmp[1];
}
}
}
gpaw-1.3.0-82cebebc037510d876f90d9f8d533fd021f751f5/c/xc/xc_mgga.h 0000664 0000000 0000000 00000002131 13164413722 0022533 0 ustar 00root root 0000000 0000000
#ifndef GPAW_XC_MGGA_H
#define GPAW_XC_MGGA_H
#define M_PI 3.14159265358979323846
#define MIN_DENS 1.0e-20
#define MIN_GRAD 1.0e-20
#define max(x,y) ((x