gpaw-0.11.0.13004/ 0000775 0001750 0001750 00000000000 12553644063 013453 5 ustar jensj jensj 0000000 0000000 gpaw-0.11.0.13004/LICENSE 0000664 0001750 0001750 00000001114 12553643466 014463 0 ustar jensj jensj 0000000 0000000 GPAW is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
GPAW is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with GPAW. If not, see .
gpaw-0.11.0.13004/c/ 0000775 0001750 0001750 00000000000 12553644063 013675 5 ustar jensj jensj 0000000 0000000 gpaw-0.11.0.13004/c/utilities.c 0000664 0001750 0001750 00000054064 12553643466 016073 0 ustar jensj jensj 0000000 0000000 /* Copyright (C) 2003-2007 CAMP
* Copyright (C) 2007-2008 CAMd
* Copyright (C) 2008-2010 CSC - IT Center for Science Ltd.
* Copyright (C) 2011 Argonne National Laboratory
* Please see the accompanying LICENSE file for further information. */
#include
#define PY_ARRAY_UNIQUE_SYMBOL GPAW_ARRAY_API
#define NO_IMPORT_ARRAY
#include
#include "extensions.h"
#include
#include
#ifdef __DARWIN_UNIX03
/* Allows for special MaxOS magic */
#include
#endif
#ifdef __linux__
/* stdlib.h does not define mallinfo (it should!) */
#include
#endif
#ifdef GPAW_HPM
void HPM_Start(char *);
void HPM_Stop(char *);
void summary_start(void);
void summary_stop(void);
PyObject* ibm_hpm_start(PyObject *self, PyObject *args)
{
char* s;
if (!PyArg_ParseTuple(args, "s", &s))
return NULL;
HPM_Start(s);
Py_RETURN_NONE;
}
PyObject* ibm_hpm_stop(PyObject *self, PyObject *args)
{
char* s;
if (!PyArg_ParseTuple(args, "s", &s))
return NULL;
HPM_Stop(s);
Py_RETURN_NONE;
}
PyObject* ibm_mpi_start(PyObject *self)
{
summary_start();
Py_RETURN_NONE;
}
PyObject* ibm_mpi_stop(PyObject *self)
{
summary_stop();
Py_RETURN_NONE;
}
#endif
#ifdef CRAYPAT
#include
PyObject* craypat_region_begin(PyObject *self, PyObject *args)
{
int n;
char* s;
if (!PyArg_ParseTuple(args, "is", &n, &s))
return NULL;
PAT_region_begin(n, s);
Py_RETURN_NONE;
}
PyObject* craypat_region_end(PyObject *self, PyObject *args)
{
int n;
if (!PyArg_ParseTuple(args, "i", &n))
return NULL;
PAT_region_end(n);
Py_RETURN_NONE;
}
#endif
#ifdef PARALLEL
#include
struct eval {
double val;
int rank;
};
static void coll_print(FILE *fp, const char *label, double val,
int print_aggregate, MPI_Comm Comm){
double sum;
struct eval in;
struct eval out;
int rank, numranks;
MPI_Comm_size(Comm, &numranks);
MPI_Comm_rank(Comm, &rank);
in.val=val;
in.rank=rank;
MPI_Reduce(&val, &sum, 1, MPI_DOUBLE, MPI_SUM, 0, Comm);
if(rank==0) {
if(print_aggregate)
fprintf(fp,"#%19s %14.3f %10.3f ",label,sum,sum/numranks);
else
fprintf(fp,"#%19s %10.3f ",label,sum/numranks);
}
MPI_Reduce(&in, &out, 1, MPI_DOUBLE_INT, MPI_MINLOC, 0, Comm);
if(rank==0){
fprintf(fp,"%4d %10.3f ", out.rank, out.val);
}
MPI_Reduce(&in, &out, 1, MPI_DOUBLE_INT, MPI_MAXLOC, 0, Comm);
if(rank==0){
fprintf(fp,"%4d %10.3f\n",out.rank, out.val);
}
}
// Utilities for performance measurement with PAPI
#ifdef GPAW_PAPI
#include
#define NUM_PAPI_EV 1
static long_long papi_start_usec_p;
static long_long papi_start_usec_r;
// Returns PAPI_dmem_info structure in Python dictionary
// Units used by PAPI are kB
PyObject* papi_mem_info(PyObject *self, PyObject *args)
{
PAPI_dmem_info_t dmem;
PyObject* py_dmem;
PAPI_get_dmem_info(&dmem);
py_dmem = PyDict_New();
PyDict_SetItemString(py_dmem, "peak", PyLong_FromLongLong(dmem.peak));
PyDict_SetItemString(py_dmem, "size", PyLong_FromLongLong(dmem.size));
PyDict_SetItemString(py_dmem, "resident", PyLong_FromLongLong(dmem.resident));
PyDict_SetItemString(py_dmem, "high_water_mark",
PyLong_FromLongLong(dmem.high_water_mark));
PyDict_SetItemString(py_dmem, "shared", PyLong_FromLongLong(dmem.shared));
PyDict_SetItemString(py_dmem, "text", PyLong_FromLongLong(dmem.text));
PyDict_SetItemString(py_dmem, "library", PyLong_FromLongLong(dmem.library));
PyDict_SetItemString(py_dmem, "heap", PyLong_FromLongLong(dmem.heap));
PyDict_SetItemString(py_dmem, "stack", PyLong_FromLongLong(dmem.stack));
PyDict_SetItemString(py_dmem, "pagesize", PyLong_FromLongLong(dmem.pagesize));
PyDict_SetItemString(py_dmem, "pte", PyLong_FromLongLong(dmem.pte));
return py_dmem;
}
int gpaw_perf_init()
{
int events[NUM_PAPI_EV];
events[0] = PAPI_FP_OPS;
// events[1] = PAPI_L1_DCM;
// events[2] = PAPI_L1_DCH;
// events[3] = PAPI_TOT_INS;
PAPI_start_counters(events, NUM_PAPI_EV);
papi_start_usec_r = PAPI_get_real_usec();
papi_start_usec_p = PAPI_get_virt_usec();
return 0;
}
void gpaw_perf_finalize()
{
long long papi_values[NUM_PAPI_EV];
double rtime,ptime;
double avegflops;
double gflop_opers;
PAPI_dmem_info_t dmem;
int error = 0;
double l1hitratio;
long_long papi_end_usec_p;
long_long papi_end_usec_r;
int rank, numranks;
MPI_Comm Comm = MPI_COMM_WORLD;
//get papi info, first time it intializes PAPI counters
papi_end_usec_r = PAPI_get_real_usec();
papi_end_usec_p = PAPI_get_virt_usec();
MPI_Comm_size(Comm, &numranks);
MPI_Comm_rank(Comm, &rank);
FILE *fp;
if (rank == 0)
fp = fopen("gpaw_perf.log", "w");
else
fp = NULL;
if(PAPI_read_counters(papi_values, NUM_PAPI_EV) != PAPI_OK)
error++;
if(PAPI_get_dmem_info(&dmem) != PAPI_OK)
error++;
rtime=(double)(papi_end_usec_r - papi_start_usec_r)/1e6;
ptime=(double)(papi_end_usec_p - papi_start_usec_p)/1e6;
avegflops=(double)papi_values[0]/rtime/1e9;
gflop_opers = (double)papi_values[0]/1e9;
// l1hitratio=100.0*(double)papi_values[1]/(papi_values[0] + papi_values[1]);
if (rank==0 ) {
fprintf(fp,"######## GPAW PERFORMANCE REPORT (PAPI) ########\n");
fprintf(fp,"# MPI tasks %d\n", numranks);
fprintf(fp,"# aggregated average min(rank/val) max(rank/val) \n");
}
coll_print(fp, "Real time (s)", rtime, 1, Comm);
coll_print(fp, "Process time (s)", ptime, 1, Comm);
coll_print(fp, "Flops (GFlop/s)", avegflops, 1, Comm);
coll_print(fp, "Flp-opers (10^9)", gflop_opers, 1, Comm);
// coll_print(fp, "L1 hit ratio (%)", l1hitratio, 0, Comm);
coll_print(fp, "Peak mem size (MB)", (double)dmem.peak/1.0e3, 0, Comm );
coll_print(fp, "Peak resident (MB)", (double)dmem.high_water_mark/1.0e3 ,
0, Comm);
if(rank==0) {
fflush(fp);
fclose(fp);
}
}
#elif GPAW_HPM
void HPM_Start(char *);
int gpaw_perf_init()
{
HPM_Start("GPAW");
return 0;
}
void gpaw_perf_finalize()
{
HPM_Stop("GPAW");
}
#else // Use just MPI_Wtime
static double t0;
int gpaw_perf_init(void)
{
t0 = MPI_Wtime();
return 0;
}
void gpaw_perf_finalize(void)
{
double rtime;
int rank, numranks;
MPI_Comm Comm = MPI_COMM_WORLD;
MPI_Comm_size(Comm, &numranks);
MPI_Comm_rank(Comm, &rank);
double t1 = MPI_Wtime();
rtime = t1 - t0;
FILE *fp;
if (rank == 0)
fp = fopen("gpaw_perf.log", "w");
else
fp = NULL;
if (rank==0 ) {
fprintf(fp,"######## GPAW PERFORMANCE REPORT (MPI_Wtime) ########\n");
fprintf(fp,"# MPI tasks %d\n", numranks);
fprintf(fp,"# aggregated average min(rank/val) max(rank/val) \n");
}
coll_print(fp, "Real time (s)", rtime, 1, Comm);
if(rank==0) {
fflush(fp);
fclose(fp);
}
}
#endif
#endif
// returns the distance between two 3d double vectors
double distance(double *a, double *b)
{
double sum = 0;
double diff;
for (int c = 0; c < 3; c++) {
diff = a[c] - b[c];
sum += diff*diff;
}
return sqrt(sum);
}
/* get heap memory using mallinfo.
There is a UNIX version and a Mac OS X version is not well tested
but seems to give credible values in simple tests.*/
PyObject* heap_mallinfo(PyObject *self)
{
double heap;
#ifdef __linux__
unsigned int mmap, arena, small;
struct mallinfo mi; /* structure in bytes */
mi = mallinfo();
mmap = mi.hblkhd;
arena = mi.uordblks;
small = mi.usmblks;
heap = ((double)(mmap + arena + small))/1024.0; /* convert to KB */
#elif defined(__DARWIN_UNIX03)
/* Mac OS X specific hack */
struct malloc_statistics_t mi; /* structure in bytes */
malloc_zone_statistics(NULL, &mi);
heap = ((double)(mi.size_in_use))/1024.0; /* convert to KB */
#else
heap = -1;
#endif
return Py_BuildValue("d",heap);
}
/* elementwise multiply and add result to another vector
*
* c[i] += a[i] * b[i] , for i = every element in the vectors
*/
PyObject* elementwise_multiply_add(PyObject *self, PyObject *args)
{
PyArrayObject* aa;
PyArrayObject* bb;
PyArrayObject* cc;
if (!PyArg_ParseTuple(args, "OOO", &aa, &bb, &cc))
return NULL;
const double* const a = DOUBLEP(aa);
const double* const b = DOUBLEP(bb);
double* const c = DOUBLEP(cc);
int n = 1;
for (int d = 0; d < PyArray_NDIM(aa); d++)
n *= PyArray_DIMS(aa)[d];
for (int i = 0; i < n; i++)
{
c[i] += a[i] * b[i];
}
Py_RETURN_NONE;
}
PyObject* utilities_gaussian_wave(PyObject *self, PyObject *args)
{
Py_complex A_obj;
PyArrayObject* r_cG_obj;
PyArrayObject* r0_c_obj;
Py_complex sigma_obj; // imaginary part ignored
PyArrayObject* k_c_obj;
PyArrayObject* gs_G_obj;
if (!PyArg_ParseTuple(args, "DOODOO", &A_obj, &r_cG_obj, &r0_c_obj, &sigma_obj, &k_c_obj, &gs_G_obj))
return NULL;
int C, G;
C = PyArray_DIMS(r_cG_obj)[0];
G = PyArray_DIMS(r_cG_obj)[1];
for (int i = 2; i < PyArray_NDIM(r_cG_obj); i++)
G *= PyArray_DIMS(r_cG_obj)[i];
double* r_cG = DOUBLEP(r_cG_obj); // XXX not ideally strided
double* r0_c = DOUBLEP(r0_c_obj);
double dr2, kr, alpha = -0.5/pow(sigma_obj.real, 2);
int gammapoint = 1;
double* k_c = DOUBLEP(k_c_obj);
for (int c=0; ctype_num == NPY_DOUBLE)
{
double* gs_G = DOUBLEP(gs_G_obj);
if(gammapoint)
for(int g=0; g0)
for(int g=0; g 1 ?
}
}
else
{
double_complex* gs_G = COMPLEXP(gs_G_obj);
double_complex A = A_obj.real+I*A_obj.imag;
if(gammapoint)
for(int g=0; g0)
for(int g=0; g 1 ?
}
}
Py_RETURN_NONE;
}
/* vdot
*
* If a and b are input vectors,
* a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + ...
* is returned.
*/
PyObject* utilities_vdot(PyObject *self, PyObject *args)
{
PyArrayObject* aa;
PyArrayObject* bb;
if (!PyArg_ParseTuple(args, "OO", &aa, &bb))
return NULL;
const double* const a = DOUBLEP(aa);
const double* const b = DOUBLEP(bb);
double sum = 0.0;
int n = 1;
for (int d = 0; d < PyArray_NDIM(aa); d++)
n *= PyArray_DIMS(aa)[d];
for (int i = 0; i < n; i++)
{
sum += a[i] * b[i];
}
return PyFloat_FromDouble(sum);
}
/* vdot
*
* If a is the input vector,
* a[0]*a[0] + a[1]*a[1] + a[2]*a[2] + ...
* is returned.
*/
PyObject* utilities_vdot_self(PyObject *self, PyObject *args)
{
PyArrayObject* aa;
if (!PyArg_ParseTuple(args, "O", &aa))
return NULL;
const double* const a = DOUBLEP(aa);
double sum = 0.0;
int n = 1;
for (int d = 0; d < PyArray_NDIM(aa); d++)
n *= PyArray_DIMS(aa)[d];
for (int i = 0; i < n; i++)
{
sum += a[i] * a[i];
}
return PyFloat_FromDouble(sum);
}
PyObject* errorfunction(PyObject *self, PyObject *args)
{
double x;
if (!PyArg_ParseTuple(args, "d", &x))
return NULL;
return Py_BuildValue("d", erf(x));
}
PyObject* pack(PyObject *self, PyObject *args)
{
PyArrayObject* a_obj;
if (!PyArg_ParseTuple(args, "O", &a_obj))
return NULL;
a_obj = PyArray_GETCONTIGUOUS(a_obj);
int n = PyArray_DIMS(a_obj)[0];
npy_intp dims[1] = {n * (n + 1) / 2};
int typenum = PyArray_DESCR(a_obj)->type_num;
PyArrayObject* b_obj = (PyArrayObject*) PyArray_SimpleNew(1, dims,
typenum);
if (b_obj == NULL)
return NULL;
if (typenum == NPY_DOUBLE) {
double* a = (double*)PyArray_DATA(a_obj);
double* b = (double*)PyArray_DATA(b_obj);
for (int r = 0; r < n; r++) {
*b++ = a[r + n * r];
for (int c = r + 1; c < n; c++)
*b++ = a[r + n * c] + a[c + n * r];
}
} else {
double complex* a = (double complex*)PyArray_DATA(a_obj);
double complex* b = (double complex*)PyArray_DATA(b_obj);
for (int r = 0; r < n; r++) {
*b++ = a[r + n * r];
for (int c = r + 1; c < n; c++)
*b++ = a[r + n * c] + a[c + n * r];
}
}
Py_DECREF(a_obj);
PyObject* value = Py_BuildValue("O", b_obj);
Py_DECREF(b_obj);
return value;
}
PyObject* unpack(PyObject *self, PyObject *args)
{
PyArrayObject* ap;
PyArrayObject* a;
if (!PyArg_ParseTuple(args, "OO", &ap, &a))
return NULL;
int n = PyArray_DIMS(a)[0];
double* datap = DOUBLEP(ap);
double* data = DOUBLEP(a);
for (int r = 0; r < n; r++)
for (int c = r; c < n; c++)
{
double d = *datap++;
data[c + r * n] = d;
data[r + c * n] = d;
}
Py_RETURN_NONE;
}
PyObject* unpack_complex(PyObject *self, PyObject *args)
{
PyArrayObject* ap;
PyArrayObject* a;
if (!PyArg_ParseTuple(args, "OO", &ap, &a))
return NULL;
int n = PyArray_DIMS(a)[0];
double_complex* datap = COMPLEXP(ap);
double_complex* data = COMPLEXP(a);
for (int r = 0; r < n; r++)
for (int c = r; c < n; c++)
{
double_complex d = *datap++;
data[c + r * n] = d;
data[r + c * n] = conj(d);
}
Py_RETURN_NONE;
}
PyObject* hartree(PyObject *self, PyObject *args)
{
int l;
PyArrayObject* nrdr_obj;
PyArrayObject* r_obj;
PyArrayObject* vr_obj;
if (!PyArg_ParseTuple(args, "iOOO", &l, &nrdr_obj, &r_obj, &vr_obj))
return NULL;
const int M = PyArray_DIM(nrdr_obj, 0);
const double* nrdr = DOUBLEP(nrdr_obj);
const double* r = DOUBLEP(r_obj);
double* vr = DOUBLEP(vr_obj);
double p = 0.0;
double q = 0.0;
for (int g = M - 1; g > 0; g--)
{
double R = r[g];
double rl = pow(R, l);
double dp = nrdr[g] / rl;
double rlp1 = rl * R;
double dq = nrdr[g] * rlp1;
vr[g] = (p + 0.5 * dp) * rlp1 - (q + 0.5 * dq) / rl;
p += dp;
q += dq;
}
vr[0] = 0.0;
double f = 4.0 * M_PI / (2 * l + 1);
for (int g = 1; g < M; g++)
{
double R = r[g];
vr[g] = f * (vr[g] + q / pow(R, l));
}
Py_RETURN_NONE;
}
PyObject* localize(PyObject *self, PyObject *args)
{
PyArrayObject* Z_nnc;
PyArrayObject* U_nn;
if (!PyArg_ParseTuple(args, "OO", &Z_nnc, &U_nn))
return NULL;
int n = PyArray_DIMS(U_nn)[0];
double complex (*Z)[n][3] = (double complex (*)[n][3])COMPLEXP(Z_nnc);
double (*U)[n] = (double (*)[n])DOUBLEP(U_nn);
double value = 0.0;
for (int a = 0; a < n; a++)
{
for (int b = a + 1; b < n; b++)
{
double complex* Zaa = Z[a][a];
double complex* Zab = Z[a][b];
double complex* Zbb = Z[b][b];
double x = 0.0;
double y = 0.0;
for (int c = 0; c < 3; c++)
{
x += (0.25 * creal(Zbb[c] * conj(Zbb[c])) +
0.25 * creal(Zaa[c] * conj(Zaa[c])) -
0.5 * creal(Zaa[c] * conj(Zbb[c])) -
creal(Zab[c] * conj(Zab[c])));
y += creal((Zaa[c] - Zbb[c]) * conj(Zab[c]));
}
double t = 0.25 * atan2(y, x);
double C = cos(t);
double S = sin(t);
for (int i = 0; i < a; i++)
for (int c = 0; c < 3; c++)
{
double complex Ziac = Z[i][a][c];
Z[i][a][c] = C * Ziac + S * Z[i][b][c];
Z[i][b][c] = C * Z[i][b][c] - S * Ziac;
}
for (int c = 0; c < 3; c++)
{
double complex Zaac = Zaa[c];
double complex Zabc = Zab[c];
double complex Zbbc = Zbb[c];
Zaa[c] = C * C * Zaac + 2 * C * S * Zabc + S * S * Zbbc;
Zbb[c] = C * C * Zbbc - 2 * C * S * Zabc + S * S * Zaac;
Zab[c] = S * C * (Zbbc - Zaac) + (C * C - S * S) * Zabc;
}
for (int i = a + 1; i < b; i++)
for (int c = 0; c < 3; c++)
{
double complex Zaic = Z[a][i][c];
Z[a][i][c] = C * Zaic + S * Z[i][b][c];
Z[i][b][c] = C * Z[i][b][c] - S * Zaic;
}
for (int i = b + 1; i < n; i++)
for (int c = 0; c < 3; c++)
{
double complex Zaic = Z[a][i][c];
Z[a][i][c] = C * Zaic + S * Z[b][i][c];
Z[b][i][c] = C * Z[b][i][c] - S * Zaic;
}
for (int i = 0; i < n; i++)
{
double Uia = U[i][a];
U[i][a] = C * Uia + S * U[i][b];
U[i][b] = C * U[i][b] - S * Uia;
}
}
double complex* Zaa = Z[a][a];
for (int c = 0; c < 3; c++)
value += creal(Zaa[c] * conj(Zaa[c]));
}
return Py_BuildValue("d", value);
}
PyObject* spherical_harmonics(PyObject *self, PyObject *args)
{
int l;
PyArrayObject* R_obj_c;
PyArrayObject* Y_obj_m;
if (!PyArg_ParseTuple(args, "iOO", &l, &R_obj_c, &Y_obj_m))
return NULL;
double* R_c = DOUBLEP(R_obj_c);
double* Y_m = DOUBLEP(Y_obj_m);
if (l == 0)
Y_m[0] = 0.28209479177387814;
else
{
double x = R_c[0];
double y = R_c[1];
double z = R_c[2];
if (l == 1)
{
Y_m[0] = 0.48860251190291992 * y;
Y_m[1] = 0.48860251190291992 * z;
Y_m[2] = 0.48860251190291992 * x;
}
else
{
double r2 = x*x+y*y+z*z;
if (l == 2)
{
Y_m[0] = 1.0925484305920792 * x*y;
Y_m[1] = 1.0925484305920792 * y*z;
Y_m[2] = 0.31539156525252005 * (3*z*z-r2);
Y_m[3] = 1.0925484305920792 * x*z;
Y_m[4] = 0.54627421529603959 * (x*x-y*y);
}
else if (l == 3)
{
Y_m[0] = 0.59004358992664352 * (-y*y*y+3*x*x*y);
Y_m[1] = 2.8906114426405538 * x*y*z;
Y_m[2] = 0.45704579946446577 * (-y*r2+5*y*z*z);
Y_m[3] = 0.3731763325901154 * (5*z*z*z-3*z*r2);
Y_m[4] = 0.45704579946446577 * (5*x*z*z-x*r2);
Y_m[5] = 1.4453057213202769 * (x*x*z-y*y*z);
Y_m[6] = 0.59004358992664352 * (x*x*x-3*x*y*y);
}
else if (l == 4)
{
Y_m[0] = 2.5033429417967046 * (x*x*x*y-x*y*y*y);
Y_m[1] = 1.7701307697799307 * (-y*y*y*z+3*x*x*y*z);
Y_m[2] = 0.94617469575756008 * (-x*y*r2+7*x*y*z*z);
Y_m[3] = 0.66904654355728921 * (-3*y*z*r2+7*y*z*z*z);
Y_m[4] = 0.10578554691520431 * (-30*z*z*r2+3*r2*r2+35*z*z*z*z);
Y_m[5] = 0.66904654355728921 * (7*x*z*z*z-3*x*z*r2);
Y_m[6] = 0.47308734787878004 * (-x*x*r2+7*x*x*z*z+y*y*r2-7*y*y*z*z);
Y_m[7] = 1.7701307697799307 * (x*x*x*z-3*x*y*y*z);
Y_m[8] = 0.62583573544917614 * (-6*x*x*y*y+x*x*x*x+y*y*y*y);
}
else if (l == 5)
{
Y_m[0] = 0.65638205684017015 * (y*y*y*y*y+5*x*x*x*x*y-10*x*x*y*y*y);
Y_m[1] = 8.3026492595241645 * (x*x*x*y*z-x*y*y*y*z);
Y_m[2] = 0.48923829943525038 * (y*y*y*r2-9*y*y*y*z*z-3*x*x*y*r2+27*x*x*y*z*z);
Y_m[3] = 4.7935367849733241 * (3*x*y*z*z*z-x*y*z*r2);
Y_m[4] = 0.45294665119569694 * (-14*y*z*z*r2+y*r2*r2+21*y*z*z*z*z);
Y_m[5] = 0.1169503224534236 * (63*z*z*z*z*z+15*z*r2*r2-70*z*z*z*r2);
Y_m[6] = 0.45294665119569694 * (x*r2*r2-14*x*z*z*r2+21*x*z*z*z*z);
Y_m[7] = 2.3967683924866621 * (-3*y*y*z*z*z+y*y*z*r2+3*x*x*z*z*z-x*x*z*r2);
Y_m[8] = 0.48923829943525038 * (9*x*x*x*z*z-27*x*y*y*z*z-x*x*x*r2+3*x*y*y*r2);
Y_m[9] = 2.0756623148810411 * (y*y*y*y*z-6*x*x*y*y*z+x*x*x*x*z);
Y_m[10] = 0.65638205684017015 * (-10*x*x*x*y*y+5*x*y*y*y*y+x*x*x*x*x);
}
else if (l == 6)
{
Y_m[0] = 1.3663682103838286 * (-10*x*x*x*y*y*y+3*x*x*x*x*x*y+3*x*y*y*y*y*y);
Y_m[1] = 2.3666191622317521 * (y*y*y*y*y*z-10*x*x*y*y*y*z+5*x*x*x*x*y*z);
Y_m[2] = 2.0182596029148967 * (-x*x*x*y*r2+x*y*y*y*r2-11*x*y*y*y*z*z+11*x*x*x*y*z*z);
Y_m[3] = 0.92120525951492349 * (-11*y*y*y*z*z*z-9*x*x*y*z*r2+33*x*x*y*z*z*z+3*y*y*y*z*r2);
Y_m[4] =0.92120525951492349 * (x*y*r2*r2+33*x*y*z*z*z*z-18*x*y*z*z*r2);
Y_m[5] = 0.58262136251873142 * (5*y*z*r2*r2+33*y*z*z*z*z*z-30*y*z*z*z*r2);
Y_m[6] = 0.063569202267628425 * (231*z*z*z*z*z*z-5*r2*r2*r2+105*z*z*r2*r2-315*z*z*z*z*r2);
Y_m[7] = 0.58262136251873142 * (-30*x*z*z*z*r2+33*x*z*z*z*z*z+5*x*z*r2*r2);
Y_m[8] = 0.46060262975746175 * (33*x*x*z*z*z*z+x*x*r2*r2-y*y*r2*r2-18*x*x*z*z*r2+18*y*y*z*z*r2-33*y*y*z*z*z*z);
Y_m[9] = 0.92120525951492349 * (-3*x*x*x*z*r2-33*x*y*y*z*z*z+9*x*y*y*z*r2+11*x*x*x*z*z*z);
Y_m[10] = 0.50456490072872417 * (11*y*y*y*y*z*z-66*x*x*y*y*z*z-x*x*x*x*r2+6*x*x*y*y*r2+11*x*x*x*x*z*z-y*y*y*y*r2);
Y_m[11] = 2.3666191622317521 * (5*x*y*y*y*y*z+x*x*x*x*x*z-10*x*x*x*y*y*z);
Y_m[12] = 0.6831841051919143 * (x*x*x*x*x*x+15*x*x*y*y*y*y-15*x*x*x*x*y*y-y*y*y*y*y*y);
}
}
}
Py_RETURN_NONE;
}
gpaw-0.11.0.13004/c/blas.c 0000664 0001750 0001750 00000032216 12553643466 014774 0 ustar jensj jensj 0000000 0000000 /* Copyright (C) 2003-2007 CAMP
* Copyright (C) 2007-2009 CAMd
* Copyright (C) 2007 CSC - IT Center for Science Ltd.
* Please see the accompanying LICENSE file for further information. */
#include
#define PY_ARRAY_UNIQUE_SYMBOL GPAW_ARRAY_API
#define NO_IMPORT_ARRAY
#include
#include "extensions.h"
#ifdef GPAW_NO_UNDERSCORE_BLAS
# define dscal_ dscal
# define zscal_ zscal
# define daxpy_ daxpy
# define zaxpy_ zaxpy
# define dsyrk_ dsyrk
# define zher_ zher
# define zherk_ zherk
# define dsyr2k_ dsyr2k
# define zher2k_ zher2k
# define dgemm_ dgemm
# define zgemm_ zgemm
# define dgemv_ dgemv
# define zgemv_ zgemv
# define ddot_ ddot
#endif
void dscal_(int*n, double* alpha, double* x, int* incx);
void zscal_(int*n, void* alpha, void* x, int* incx);
void daxpy_(int* n, double* alpha,
double* x, int *incx,
double* y, int *incy);
void zaxpy_(int* n, void* alpha,
void* x, int *incx,
void* y, int *incy);
void dsyrk_(char *uplo, char *trans, int *n, int *k,
double *alpha, double *a, int *lda, double *beta,
double *c, int *ldc);
void zher_(char *uplo, int *n,
double *alpha, void *x, int *incx,
void *a, int *lda);
void zherk_(char *uplo, char *trans, int *n, int *k,
double *alpha, void *a, int *lda,
double *beta,
void *c, int *ldc);
void dsyr2k_(char *uplo, char *trans, int *n, int *k,
double *alpha, double *a, int *lda,
double *b, int *ldb, double *beta,
double *c, int *ldc);
void zher2k_(char *uplo, char *trans, int *n, int *k,
void *alpha, void *a, int *lda,
void *b, int *ldb, double *beta,
void *c, int *ldc);
void dgemm_(char *transa, char *transb, int *m, int * n,
int *k, double *alpha, double *a, int *lda,
double *b, int *ldb, double *beta,
double *c, int *ldc);
void zgemm_(char *transa, char *transb, int *m, int * n,
int *k, void *alpha, void *a, int *lda,
void *b, int *ldb, void *beta,
void *c, int *ldc);
void dgemv_(char *trans, int *m, int * n,
double *alpha, double *a, int *lda,
double *x, int *incx, double *beta,
double *y, int *incy);
void zgemv_(char *trans, int *m, int * n,
void *alpha, void *a, int *lda,
void *x, int *incx, void *beta,
void *y, int *incy);
double ddot_(int *n, void *dx, int *incx, void *dy, int *incy);
PyObject* scal(PyObject *self, PyObject *args)
{
Py_complex alpha;
PyArrayObject* x;
if (!PyArg_ParseTuple(args, "DO", &alpha, &x))
return NULL;
int n = PyArray_DIMS(x)[0];
for (int d = 1; d < PyArray_NDIM(x); d++)
n *= PyArray_DIMS(x)[d];
int incx = 1;
if (PyArray_DESCR(x)->type_num == NPY_DOUBLE)
dscal_(&n, &(alpha.real), DOUBLEP(x), &incx);
else
zscal_(&n, &alpha, (void*)COMPLEXP(x), &incx);
Py_RETURN_NONE;
}
PyObject* gemm(PyObject *self, PyObject *args)
{
Py_complex alpha;
PyArrayObject* a;
PyArrayObject* b;
Py_complex beta;
PyArrayObject* c;
char t = 'n';
char* transa = &t;
if (!PyArg_ParseTuple(args, "DOODO|s", &alpha, &a, &b, &beta, &c, &transa))
return NULL;
int m, k, lda, ldb, ldc;
if (*transa == 'n')
{
m = PyArray_DIMS(a)[1];
for (int i = 2; i < PyArray_NDIM(a); i++)
m *= PyArray_DIMS(a)[i];
k = PyArray_DIMS(a)[0];
lda = MAX(1, PyArray_STRIDES(a)[0] / PyArray_STRIDES(a)[PyArray_NDIM(a) - 1]);
ldb = MAX(1, PyArray_STRIDES(b)[0] / PyArray_STRIDES(b)[1]);
ldc = MAX(1, PyArray_STRIDES(c)[0] / PyArray_STRIDES(c)[PyArray_NDIM(c) - 1]);
}
else
{
k = PyArray_DIMS(a)[1];
for (int i = 2; i < PyArray_NDIM(a); i++)
k *= PyArray_DIMS(a)[i];
m = PyArray_DIMS(a)[0];
lda = MAX(1, k);
ldb = MAX(1, PyArray_STRIDES(b)[0] / PyArray_STRIDES(b)[PyArray_NDIM(b) - 1]);
ldc = MAX(1, PyArray_STRIDES(c)[0] / PyArray_STRIDES(c)[1]);
}
int n = PyArray_DIMS(b)[0];
if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
dgemm_(transa, "n", &m, &n, &k,
&(alpha.real),
DOUBLEP(a), &lda,
DOUBLEP(b), &ldb,
&(beta.real),
DOUBLEP(c), &ldc);
else
zgemm_(transa, "n", &m, &n, &k,
&alpha,
(void*)COMPLEXP(a), &lda,
(void*)COMPLEXP(b), &ldb,
&beta,
(void*)COMPLEXP(c), &ldc);
Py_RETURN_NONE;
}
PyObject* mmm(PyObject *self, PyObject *args)
{
Py_complex alpha;
PyArrayObject* M1;
char* trans1;
PyArrayObject* M2;
char* trans2;
Py_complex beta;
PyArrayObject* M3;
if (!PyArg_ParseTuple(args, "DOsOsDO",
&alpha, &M1, &trans1, &M2, &trans2, &beta, &M3))
return NULL;
int m = PyArray_DIM(M3, 1);
int n = PyArray_DIM(M3, 0);
int k;
int bytes = PyArray_ITEMSIZE(M3);
int lda = MAX(1, PyArray_STRIDE(M2, 0) / bytes);
int ldb = MAX(1, PyArray_STRIDE(M1, 0) / bytes);
int ldc = MAX(1, PyArray_STRIDE(M3, 0) / bytes);
void* a = PyArray_DATA(M2);
void* b = PyArray_DATA(M1);
void* c = PyArray_DATA(M3);
if (*trans2 == 'n')
k = PyArray_DIM(M2, 0);
else
k = PyArray_DIM(M2, 1);
if (bytes == 8)
dgemm_(trans2, trans1, &m, &n, &k,
&(alpha.real), a, &lda, b, &ldb, &(beta.real), c, &ldc);
else
zgemm_(trans2, trans1, &m, &n, &k,
&alpha, a, &lda, b, &ldb, &beta, c, &ldc);
Py_RETURN_NONE;
}
PyObject* gemv(PyObject *self, PyObject *args)
{
Py_complex alpha;
PyArrayObject* a;
PyArrayObject* x;
Py_complex beta;
PyArrayObject* y;
char t = 't';
char* trans = &t;
if (!PyArg_ParseTuple(args, "DOODO|s", &alpha, &a, &x, &beta, &y, &trans))
return NULL;
int m, n, lda, itemsize, incx, incy;
if (*trans == 'n')
{
m = PyArray_DIMS(a)[1];
for (int i = 2; i < PyArray_NDIM(a); i++)
m *= PyArray_DIMS(a)[i];
n = PyArray_DIMS(a)[0];
lda = MAX(1, m);
}
else
{
n = PyArray_DIMS(a)[0];
for (int i = 1; i < PyArray_NDIM(a)-1; i++)
n *= PyArray_DIMS(a)[i];
m = PyArray_DIMS(a)[PyArray_NDIM(a)-1];
lda = MAX(1, m);
}
if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
itemsize = sizeof(double);
else
itemsize = sizeof(double_complex);
incx = PyArray_STRIDES(x)[0]/itemsize;
incy = 1;
if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
dgemv_(trans, &m, &n,
&(alpha.real),
DOUBLEP(a), &lda,
DOUBLEP(x), &incx,
&(beta.real),
DOUBLEP(y), &incy);
else
zgemv_(trans, &m, &n,
&alpha,
(void*)COMPLEXP(a), &lda,
(void*)COMPLEXP(x), &incx,
&beta,
(void*)COMPLEXP(y), &incy);
Py_RETURN_NONE;
}
PyObject* axpy(PyObject *self, PyObject *args)
{
Py_complex alpha;
PyArrayObject* x;
PyArrayObject* y;
if (!PyArg_ParseTuple(args, "DOO", &alpha, &x, &y))
return NULL;
int n = PyArray_DIMS(x)[0];
for (int d = 1; d < PyArray_NDIM(x); d++)
n *= PyArray_DIMS(x)[d];
int incx = 1;
int incy = 1;
if (PyArray_DESCR(x)->type_num == NPY_DOUBLE)
daxpy_(&n, &(alpha.real),
DOUBLEP(x), &incx,
DOUBLEP(y), &incy);
else
zaxpy_(&n, &alpha,
(void*)COMPLEXP(x), &incx,
(void*)COMPLEXP(y), &incy);
Py_RETURN_NONE;
}
PyObject* czher(PyObject *self, PyObject *args)
{
double alpha;
PyArrayObject* x;
PyArrayObject* a;
if (!PyArg_ParseTuple(args, "dOO", &alpha, &x, &a))
return NULL;
int n = PyArray_DIMS(x)[0];
for (int d = 1; d < PyArray_NDIM(x); d++)
n *= PyArray_DIMS(x)[d];
int incx = 1;
int lda = MAX(1, n);
zher_("l", &n, &(alpha),
(void*)COMPLEXP(x), &incx,
(void*)COMPLEXP(a), &lda);
Py_RETURN_NONE;
}
PyObject* rk(PyObject *self, PyObject *args)
{
double alpha;
PyArrayObject* a;
double beta;
PyArrayObject* c;
char t = 'c';
char* trans = &t;
if (!PyArg_ParseTuple(args, "dOdO|s", &alpha, &a, &beta, &c, &trans))
return NULL;
int n = PyArray_DIMS(c)[0];
int k, lda;
if (*trans == 'c') {
k = PyArray_DIMS(a)[1];
for (int d = 2; d < PyArray_NDIM(a); d++)
k *= PyArray_DIMS(a)[d];
lda = k;
}
else {
k = PyArray_DIMS(a)[0];
lda = n;
}
int ldc = PyArray_STRIDES(c)[0] / PyArray_STRIDES(c)[1];
if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
dsyrk_("u", trans, &n, &k,
&alpha, DOUBLEP(a), &lda, &beta,
DOUBLEP(c), &ldc);
else
zherk_("u", trans, &n, &k,
&alpha, (void*)COMPLEXP(a), &lda, &beta,
(void*)COMPLEXP(c), &ldc);
Py_RETURN_NONE;
}
PyObject* r2k(PyObject *self, PyObject *args)
{
Py_complex alpha;
PyArrayObject* a;
PyArrayObject* b;
double beta;
PyArrayObject* c;
if (!PyArg_ParseTuple(args, "DOOdO", &alpha, &a, &b, &beta, &c))
return NULL;
int n = PyArray_DIMS(a)[0];
int k = PyArray_DIMS(a)[1];
for (int d = 2; d < PyArray_NDIM(a); d++)
k *= PyArray_DIMS(a)[d];
int ldc = PyArray_STRIDES(c)[0] / PyArray_STRIDES(c)[1];
if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
dsyr2k_("u", "t", &n, &k,
(double*)(&alpha), DOUBLEP(a), &k,
DOUBLEP(b), &k, &beta,
DOUBLEP(c), &ldc);
else
zher2k_("u", "c", &n, &k,
(void*)(&alpha), (void*)COMPLEXP(a), &k,
(void*)COMPLEXP(b), &k, &beta,
(void*)COMPLEXP(c), &ldc);
Py_RETURN_NONE;
}
PyObject* dotc(PyObject *self, PyObject *args)
{
PyArrayObject* a;
PyArrayObject* b;
if (!PyArg_ParseTuple(args, "OO", &a, &b))
return NULL;
int n = PyArray_DIMS(a)[0];
for (int i = 1; i < PyArray_NDIM(a); i++)
n *= PyArray_DIMS(a)[i];
int incx = 1;
int incy = 1;
if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
{
double result;
result = ddot_(&n, (void*)DOUBLEP(a),
&incx, (void*)DOUBLEP(b), &incy);
return PyFloat_FromDouble(result);
}
else
{
double_complex* ap = COMPLEXP(a);
double_complex* bp = COMPLEXP(b);
double_complex z = 0.0;
for (int i = 0; i < n; i++)
z += conj(ap[i]) * bp[i];
return PyComplex_FromDoubles(creal(z), cimag(z));
}
}
PyObject* dotu(PyObject *self, PyObject *args)
{
PyArrayObject* a;
PyArrayObject* b;
if (!PyArg_ParseTuple(args, "OO", &a, &b))
return NULL;
int n = PyArray_DIMS(a)[0];
for (int i = 1; i < PyArray_NDIM(a); i++)
n *= PyArray_DIMS(a)[i];
int incx = 1;
int incy = 1;
if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
{
double result;
result = ddot_(&n, (void*)DOUBLEP(a),
&incx, (void*)DOUBLEP(b), &incy);
return PyFloat_FromDouble(result);
}
else
{
double_complex* ap = COMPLEXP(a);
double_complex* bp = COMPLEXP(b);
double_complex z = 0.0;
for (int i = 0; i < n; i++)
z += ap[i] * bp[i];
return PyComplex_FromDoubles(creal(z), cimag(z));
}
}
PyObject* multi_dotu(PyObject *self, PyObject *args)
{
PyArrayObject* a;
PyArrayObject* b;
PyArrayObject* c;
if (!PyArg_ParseTuple(args, "OOO", &a, &b, &c))
return NULL;
int n0 = PyArray_DIMS(a)[0];
int n = PyArray_DIMS(a)[1];
for (int i = 2; i < PyArray_NDIM(a); i++)
n *= PyArray_DIMS(a)[i];
int incx = 1;
int incy = 1;
if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
{
double *ap = DOUBLEP(a);
double *bp = DOUBLEP(b);
double *cp = DOUBLEP(c);
for (int i = 0; i < n0; i++)
{
cp[i] = ddot_(&n, (void*)ap,
&incx, (void*)bp, &incy);
ap += n;
bp += n;
}
}
else
{
double_complex* ap = COMPLEXP(a);
double_complex* bp = COMPLEXP(b);
double_complex* cp = COMPLEXP(c);
for (int i = 0; i < n0; i++)
{
cp[i] = 0.0;
for (int j = 0; j < n; j++)
cp[i] += ap[j] * bp[j];
ap += n;
bp += n;
}
}
Py_RETURN_NONE;
}
PyObject* multi_axpy(PyObject *self, PyObject *args)
{
PyArrayObject* alpha;
PyArrayObject* x;
PyArrayObject* y;
if (!PyArg_ParseTuple(args, "OOO", &alpha, &x, &y))
return NULL;
int n0 = PyArray_DIMS(x)[0];
int n = PyArray_DIMS(x)[1];
for (int d = 2; d < PyArray_NDIM(x); d++)
n *= PyArray_DIMS(x)[d];
int incx = 1;
int incy = 1;
if (PyArray_DESCR(alpha)->type_num == NPY_DOUBLE)
{
if (PyArray_DESCR(x)->type_num == NPY_CDOUBLE)
n *= 2;
double *ap = DOUBLEP(alpha);
double *xp = DOUBLEP(x);
double *yp = DOUBLEP(y);
for (int i = 0; i < n0; i++)
{
daxpy_(&n, &ap[i],
(void*)xp, &incx,
(void*)yp, &incy);
xp += n;
yp += n;
}
}
else
{
double_complex *ap = COMPLEXP(alpha);
double_complex *xp = COMPLEXP(x);
double_complex *yp = COMPLEXP(y);
for (int i = 0; i < n0; i++)
{
zaxpy_(&n, (void*)(&ap[i]),
(void*)xp, &incx,
(void*)yp, &incy);
xp += n;
yp += n;
}
}
Py_RETURN_NONE;
}
gpaw-0.11.0.13004/c/plane_wave.c 0000664 0001750 0001750 00000002037 12553643466 016172 0 ustar jensj jensj 0000000 0000000 #include "extensions.h"
#include
PyObject *plane_wave_grid(PyObject *self, PyObject *args)
{
PyArrayObject* beg_c;
PyArrayObject* end_c;
PyArrayObject* h_c;
PyArrayObject* k_c;
PyArrayObject* r0_c;
PyArrayObject* pw_g;
if (!PyArg_ParseTuple(args, "OOOOOO", &beg_c, &end_c, &h_c,
&k_c, &r0_c, &pw_g))
return NULL;
long *beg = LONGP(beg_c);
long *end = LONGP(end_c);
double *h = DOUBLEP(h_c);
double *vk = DOUBLEP(k_c);
double *vr0 = DOUBLEP(r0_c);
double_complex *pw = COMPLEXP(pw_g);
double kr[3], kr0[3];
int n[3], ij;
for (int c = 0; c < 3; c++) {
n[c] = end[c] - beg[c];
kr0[c] = vk[c] * vr0[c];
}
for (int i = 0; i < n[0]; i++) {
kr[0] = vk[0] * h[0] * (beg[0] + i) - kr0[0];
for (int j = 0; j < n[1]; j++) {
kr[1] = kr[0] + vk[1] * h[1] * (beg[1] + j) - kr0[1];
ij = (i*n[1] + j)*n[2];
for (int k = 0; k < n[2]; k++) {
kr[2] = kr[1] + vk[2] * h[2] * (beg[2] + k) - kr0[2];
pw[ij + k] = cos(kr[2]) + I * sin(kr[2]);
}
}
}
Py_RETURN_NONE;
}
gpaw-0.11.0.13004/c/blacs.c 0000664 0001750 0001750 00000151536 12553643466 015146 0 ustar jensj jensj 0000000 0000000 /* Copyright (C) 2003-2007 CAMP
* Copyright (C) 2007-2009 CAMd
* Copyright (C) 2010 Argonne National Laboratory
* Please see the accompanying LICENSE file for further information. */
#ifdef PARALLEL
#include
#ifdef GPAW_WITH_SL
#define PY_ARRAY_UNIQUE_SYMBOL GPAW_ARRAY_API
#define NO_IMPORT_ARRAY
#include
#include
#include
#include
#include "extensions.h"
#include "mympi.h"
// BLACS
#define BLOCK_CYCLIC_2D 1
#ifdef GPAW_NO_UNDERSCORE_CBLACS
#define Cblacs_barrier_ Cblacs_barrier
#define Cblacs_gridexit_ Cblacs_gridexit
#define Cblacs_gridinfo_ Cblacs_gridinfo
#define Cblacs_gridinit_ Cblacs_gridinit
#define Cblacs_pinfo_ Cblacs_pinfo
#define Csys2blacs_handle_ Csys2blacs_handle
#endif
void Cblacs_barrier_(int ConTxt, char *scope);
void Cblacs_gridexit_(int ConTxt);
void Cblacs_gridinfo_(int ConTxt, int* nprow, int* npcol,
int* myrow, int* mycol);
void Cblacs_gridinit_(int* ConTxt, char* order, int nprow, int npcol);
void Cblacs_pinfo_(int* mypnum, int* nprocs);
int Csys2blacs_handle_(MPI_Comm SysCtxt);
// End of BLACS
// ScaLAPACK
#ifdef GPAW_NO_UNDERSCORE_SCALAPACK
#define numroc_ numroc
#define pdlamch_ pdlamch
#define pdlaset_ pdlaset
#define pzlaset_ pzlaset
#define pdpotrf_ pdpotrf
#define pzpotrf_ pzpotrf
#define pzpotri_ pzpotri
#define pdtrtri_ pdtrtri
#define pztrtri_ pztrtri
#define pzgesv_ pzgesv
#define pdgesv_ pdgesv
#define pdsyevd_ pdsyevd
#define pzheevd_ pzheevd
#define pdsyevx_ pdsyevx
#define pzheevx_ pzheevx
#define pdsygvx_ pdsygvx
#define pzhegvx_ pzhegvx
#define pdsyngst_ pdsyngst
#define pzhengst_ pzhengst
#ifdef GPAW_MR3
#define pdsyevr_ pdsyevr
#define pzheevr_ pzheevr
#endif // GPAW_MR3
#define pdtran_ pdtran
#define pztranc_ pztranc
#define pdgemm_ pdgemm
#define pzgemm_ pzgemm
#define pdgemv_ pdgemv
#define pzgemv_ pzgemv
#define pdsyr2k_ pdsyr2k
#define pzher2k_ pzher2k
#define pdsyrk_ pdsyrk
#define pzherk_ pzherk
#define pdtrsm_ pdtrsm
#define pztrsm_ pztrsm
#define pzhemm_ pzhemm
#define pdsymm_ pdsymm
#endif
#ifdef GPAW_NO_UNDERSCORE_CSCALAPACK
#define Cpdgemr2d_ Cpdgemr2d
#define Cpzgemr2d_ Cpzgemr2d
#define Cpdtrmr2d_ Cpdtrmr2d
#define Cpztrmr2d_ Cpztrmr2d
#endif
// tools
int numroc_(int* n, int* nb, int* iproc, int* isrcproc, int* nprocs);
void Cpdgemr2d_(int m, int n,
double* a, int ia, int ja, int* desca,
double* b, int ib, int jb, int* descb,
int gcontext);
void Cpzgemr2d_(int m, int n,
void* a, int ia, int ja, int* desca,
void* b, int ib, int jb, int* descb,
int gcontext);
void Cpdtrmr2d_(char* uplo, char* diag, int m, int n,
double* a, int ia, int ja, int* desca,
double* b, int ib, int jb, int* descb,
int gcontext);
void Cpztrmr2d_(char* uplo, char* diag, int m, int n,
void* a, int ia, int ja, int* desca,
void* b, int ib, int jb, int* descb,
int gcontext);
double pdlamch_(int* ictxt, char* cmach);
void pzpotri_(char* uplo, int* n, void* a, int *ia, int* ja, int* desca, int* info);
void pzgetri_(int* n, void* a,
int *ia, int* ja, int* desca, int* info);
void pdlaset_(char* uplo, int* m, int* n, double* alpha, double* beta,
double* a, int* ia, int* ja, int* desca);
void pzlaset_(char* uplo, int* m, int* n, void* alpha, void* beta,
void* a, int* ia, int* ja, int* desca);
// cholesky
void pdpotrf_(char* uplo, int* n, double* a,
int* ia, int* ja, int* desca, int* info);
void pzpotrf_(char* uplo, int* n, void* a,
int* ia, int* ja, int* desca, int* info);
void pzgesv_(int* n, int* nrhs, void* a,
int* ia, int* ja, int* desca, int* ipiv,
void* b, int* ib, int* jb, int* descb, int* info);
void pdgesv_(int *n, int *nrhs, void *a,
int *ia, int *ja, int* desca, int *ipiv,
void* b, int* ib, int* jb, int* descb, int* info);
void pdtrtri_(char* uplo, char* diag, int* n, double* a,
int *ia, int* ja, int* desca, int* info);
void pztrtri_(char* uplo, char* diag, int* n, void* a,
int *ia, int* ja, int* desca, int* info);
// diagonalization
void pdsyevd_(char* jobz, char* uplo, int* n,
double* a, int* ia, int* ja, int* desca,
double* w, double* z, int* iz, int* jz,
int* descz, double* work, int* lwork, int* iwork,
int* liwork, int* info);
void pzheevd_(char* jobz, char* uplo, int* n,
void* a, int* ia, int* ja, int* desca,
double* w, void* z, int* iz, int* jz,
int* descz, void* work, int* lwork, double* rwork,
int* lrwork, int* iwork, int* liwork, int* info);
void pdsyevx_(char* jobz, char* range,
char* uplo, int* n,
double* a, int* ia, int* ja, int* desca,
double* vl, double* vu,
int* il, int* iu, double* abstol,
int* m, int* nz, double* w, double* orfac,
double* z, int* iz, int* jz, int* descz,
double* work, int* lwork, int* iwork, int* liwork,
int* ifail, int* iclustr, double* gap, int* info);
void pzheevx_(char* jobz, char* range,
char* uplo, int* n,
void* a, int* ia, int* ja, int* desca,
double* vl, double* vu,
int* il, int* iu, double* abstol,
int* m, int* nz, double* w, double* orfac,
void* z, int* iz, int* jz, int* descz,
void* work, int* lwork, double* rwork, int* lrwork,
int* iwork, int* liwork,
int* ifail, int* iclustr, double* gap, int* info);
void pdsygvx_(int* ibtype, char* jobz, char* range,
char* uplo, int* n,
double* a, int* ia, int* ja, int* desca,
double* b, int *ib, int* jb, int* descb,
double* vl, double* vu,
int* il, int* iu, double* abstol,
int* m, int* nz, double* w, double* orfac,
double* z, int* iz, int* jz, int* descz,
double* work, int* lwork, int* iwork, int* liwork,
int* ifail, int* iclustr, double* gap, int* info);
void pzhegvx_(int* ibtype, char* jobz, char* range,
char* uplo, int* n,
void* a, int* ia, int* ja, int* desca,
void* b, int *ib, int* jb, int* descb,
double* vl, double* vu,
int* il, int* iu, double* abstol,
int* m, int* nz, double* w, double* orfac,
void* z, int* iz, int* jz, int* descz,
void* work, int* lwork, double* rwork, int* lrwork,
int* iwork, int* liwork,
int* ifail, int* iclustr, double* gap, int* info);
void pdsyngst_(int* ibtype, char* uplo, int* n,
double* a, int* ia, int* ja, int* desca,
double* b, int* ib, int* jb, int* descb,
double* scale, double* work, int* lwork, int* info);
void pzhengst_(int* ibtype, char* uplo, int* n,
void* a, int* ia, int* ja, int* desca,
void* b, int* ib, int* jb, int* descb,
double* scale, void* work, int* lwork, int* info);
#ifdef GPAW_MR3
void pdsyevr_(char* jobz, char* range,
char* uplo, int* n,
double* a, int* ia, int* ja, int* desca,
double* vl, double* vu,
int* il, int* iu,
int* m, int* nz, double* w,
double* z, int* iz, int* jz, int* descz,
double* work, int* lwork, int* iwork, int* liwork,
int* info);
void pzheevr_(char* jobz, char* range,
char* uplo, int* n,
void* a, int* ia, int* ja, int* desca,
double* vl, double* vu,
int* il, int* iu,
int* m, int* nz, double* w,
void* z, int* iz, int* jz, int* descz,
void* work, int* lwork, double* rwork, int* lrwork,
int* iwork, int* liwork,
int* info);
#endif // GPAW_MR3
// pblas
void pdtran_(int* m, int* n,
double* alpha,
double* a, int* ia, int* ja, int* desca,
double* beta,
double* c, int* ic, int* jc, int* descc);
void pztranc_(int* m, int* n,
void* alpha,
void* a, int* ia, int* ja, int* desca,
void* beta,
void* c, int* ic, int* jc, int* descc);
void pdgemm_(char* transa, char* transb, int* m, int* n, int* k,
double* alpha,
double* a, int* ia, int* ja, int* desca,
double* b, int* ib, int* jb, int* descb,
double* beta,
double* c, int* ic, int* jc, int* descc);
void pzgemm_(char* transa, char* transb, int* m, int* n, int* k,
void* alpha,
void* a, int* ia, int* ja, int* desca,
void* b, int* ib, int* jb, int* descb,
void* beta,
void* c, int* ic, int* jc, int* descc);
void pzhemm_(char* side, char* uplo, int* m, int* n,
void* alpha,
void* a, int* ia, int* ja, int* desca,
void* b, int* ib, int* jb, int* descb,
void* beta,
void* c, int* ic, int* jc, int* descc);
void pdsymm_(char* side, char* uplo, int* m, int* n,
void* alpha,
void* a, int* ia, int* ja, int* desca,
void* b, int* ib, int* jb, int* descb,
void* beta,
void* c, int* ic, int* jc, int* descc);
void pdgemv_(char* transa, int* m, int* n, double* alpha,
double* a, int* ia, int* ja, int* desca,
double* x, int* ix, int* jx, int* descx, int* incx,
double* beta,
double* y, int* iy, int* jy, int* descy, int* incy);
void pzgemv_(char* transa, int* m, int* n, void* alpha,
void* a, int* ia, int* ja, int* desca,
void* x, int* ix, int* jx, int* descx, int* incx,
void* beta,
void* y, int* iy, int* jy, int* descy, int* incy);
void pdsyr2k_(char* uplo, char* trans, int* n, int* k,
double* alpha,
double* a, int* ia, int* ja, int* desca,
double* b, int* ib, int* jb, int* descb,
double* beta,
double* c, int* ic, int *jc, int* descc);
void pzher2k_(char* uplo, char* trans, int* n, int* k,
void* alpha,
void* a, int* ia, int* ja, int* desca,
void* b, int* ib, int* jb, int* descb,
void* beta,
void* c, int* ic, int* jc, int* descc);
void pdsyrk_(char* uplo, char* trans, int* n, int* k,
double* alpha,
double* a, int* ia, int* ja, int* desca,
double* beta,
double* c, int* ic, int* jc, int* descc);
void pzherk_(char* uplo, char* trans, int* n, int* k,
void* alpha,
void* a, int* ia, int* ja, int* desca,
void* beta,
void* c, int* ic, int* jc, int* descc);
void pdtrsm_(char* side, char* uplo, char* trans, char* diag,
int* m, int *n, double* alpha,
double* a, int* ia, int* ja, int* desca,
double* b, int* ib, int* jb, int* descb);
void pztrsm_(char* side, char* uplo, char* trans, char* diag,
int* m, int *n, void* alpha,
void* a, int* ia, int* ja, int* desca,
void* b, int* ib, int* jb, int* descb);
PyObject* pblas_tran(PyObject *self, PyObject *args)
{
int m, n;
Py_complex alpha;
Py_complex beta;
PyArrayObject *a, *c;
PyArrayObject *desca, *descc;
if (!PyArg_ParseTuple(args, "iiDODOOO", &m, &n, &alpha,
&a, &beta, &c,
&desca, &descc))
return NULL;
int one = 1;
if (PyArray_DESCR(c)->type_num == NPY_DOUBLE)
pdtran_(&m, &n,
&(alpha.real),
DOUBLEP(a), &one, &one, INTP(desca),
&(beta.real),
DOUBLEP(c), &one, &one, INTP(descc));
else
pztranc_(&m, &n,
&alpha,
(void*)PyArray_DATA(a), &one, &one, INTP(desca),
&beta,
(void*)PyArray_DATA(c), &one, &one, INTP(descc));
Py_RETURN_NONE;
}
PyObject* pblas_gemm(PyObject *self, PyObject *args)
{
char transa;
char transb;
int m, n, k;
Py_complex alpha;
Py_complex beta;
PyArrayObject *a, *b, *c;
PyArrayObject *desca, *descb, *descc;
int one = 1;
if (!PyArg_ParseTuple(args, "iiiDOODOOOOcc", &m, &n, &k, &alpha,
&a, &b, &beta, &c,
&desca, &descb, &descc,
&transa, &transb)) {
return NULL;
}
// cdesc
// int c_ConTxt = INTP(descc)[1];
// If process not on BLACS grid, then return.
// if (c_ConTxt == -1) Py_RETURN_NONE;
if (PyArray_DESCR(c)->type_num == NPY_DOUBLE)
pdgemm_(&transa, &transb, &m, &n, &k,
&(alpha.real),
DOUBLEP(a), &one, &one, INTP(desca),
DOUBLEP(b), &one, &one, INTP(descb),
&(beta.real),
DOUBLEP(c), &one, &one, INTP(descc));
else
pzgemm_(&transa, &transb, &m, &n, &k,
&alpha,
(void*)COMPLEXP(a), &one, &one, INTP(desca),
(void*)COMPLEXP(b), &one, &one, INTP(descb),
&beta,
(void*)COMPLEXP(c), &one, &one, INTP(descc));
Py_RETURN_NONE;
}
PyObject* pblas_hemm(PyObject *self, PyObject *args)
{
char side;
char uplo;
int m, n;
Py_complex alpha;
Py_complex beta;
PyArrayObject *a, *b, *c;
PyArrayObject *desca, *descb, *descc;
int one = 1;
if (!PyArg_ParseTuple(args, "cciiDOOdOOOO",
&side, &uplo, &n, &m,
&alpha, &a, &b, &beta,
&c, &desca, &descb, &descc)) {
return NULL;
}
if (PyArray_DESCR(c)->type_num == NPY_DOUBLE) {
pdsymm_(&side, &uplo, &n, &m, &alpha,
(void*)DOUBLEP(a), &one, &one, INTP(desca),
(void*)DOUBLEP(b), &one, &one, INTP(descb),
&beta,
(void*)DOUBLEP(c), &one, &one, INTP(descc));
} else {
pzhemm_(&side, &uplo, &n, &m, &alpha,
(void*)COMPLEXP(a), &one, &one, INTP(desca),
(void*)COMPLEXP(b), &one, &one, INTP(descb),
&beta,
(void*)COMPLEXP(c), &one, &one, INTP(descc));
}
Py_RETURN_NONE;
}
PyObject* pblas_gemv(PyObject *self, PyObject *args)
{
char transa;
int m, n;
Py_complex alpha;
Py_complex beta;
PyArrayObject *a, *x, *y;
int incx = 1, incy = 1; // what should these be?
PyArrayObject *desca, *descx, *descy;
int one = 1;
if (!PyArg_ParseTuple(args, "iiDOODOOOOc",
&m, &n, &alpha,
&a, &x, &beta, &y,
&desca, &descx,
&descy, &transa)) {
return NULL;
}
// ydesc
// int y_ConTxt = INTP(descy)[1];
// If process not on BLACS grid, then return.
// if (y_ConTxt == -1) Py_RETURN_NONE;
if (PyArray_DESCR(y)->type_num == NPY_DOUBLE)
pdgemv_(&transa, &m, &n,
&(alpha.real),
DOUBLEP(a), &one, &one, INTP(desca),
DOUBLEP(x), &one, &one, INTP(descx), &incx,
&(beta.real),
DOUBLEP(y), &one, &one, INTP(descy), &incy);
else
pzgemv_(&transa, &m, &n,
&alpha,
(void*)COMPLEXP(a), &one, &one, INTP(desca),
(void*)COMPLEXP(x), &one, &one, INTP(descx), &incx,
&beta,
(void*)COMPLEXP(y), &one, &one, INTP(descy), &incy);
Py_RETURN_NONE;
}
PyObject* pblas_r2k(PyObject *self, PyObject *args)
{
char uplo;
int n, k;
Py_complex alpha;
Py_complex beta;
PyArrayObject *a, *b, *c;
PyArrayObject *desca, *descb, *descc;
int one = 1;
if (!PyArg_ParseTuple(args, "iiDOODOOOOc", &n, &k, &alpha,
&a, &b, &beta, &c,
&desca, &descb, &descc,
&uplo)) {
return NULL;
}
// cdesc
// int c_ConTxt = INTP(descc)[1];
// If process not on BLACS grid, then return.
// if (c_ConTxt == -1) Py_RETURN_NONE;
if (PyArray_DESCR(c)->type_num == NPY_DOUBLE)
pdsyr2k_(&uplo, "T", &n, &k,
&(alpha.real),
DOUBLEP(a), &one, &one, INTP(desca),
DOUBLEP(b), &one, &one, INTP(descb),
&(beta.real),
DOUBLEP(c), &one, &one, INTP(descc));
else
pzher2k_(&uplo, "C", &n, &k,
&alpha,
(void*)COMPLEXP(a), &one, &one, INTP(desca),
(void*)COMPLEXP(b), &one, &one, INTP(descb),
&beta,
(void*)COMPLEXP(c), &one, &one, INTP(descc));
Py_RETURN_NONE;
}
PyObject* pblas_rk(PyObject *self, PyObject *args)
{
char uplo;
int n, k;
Py_complex alpha;
Py_complex beta;
PyArrayObject *a, *c;
PyArrayObject *desca, *descc;
int one = 1;
if (!PyArg_ParseTuple(args, "iiDODOOOc", &n, &k, &alpha,
&a, &beta, &c,
&desca, &descc,
&uplo)) {
return NULL;
}
// cdesc
// int c_ConTxt = INTP(descc)[1];
// If process not on BLACS grid, then return.
// if (c_ConTxt == -1) Py_RETURN_NONE;
if (PyArray_DESCR(c)->type_num == NPY_DOUBLE)
pdsyrk_(&uplo, "T", &n, &k,
&(alpha.real),
DOUBLEP(a), &one, &one, INTP(desca),
&(beta.real),
DOUBLEP(c), &one, &one, INTP(descc));
else
pzherk_(&uplo, "C", &n, &k,
&alpha,
(void*)COMPLEXP(a), &one, &one, INTP(desca),
&beta,
(void*)COMPLEXP(c), &one, &one, INTP(descc));
Py_RETURN_NONE;
}
PyObject* new_blacs_context(PyObject *self, PyObject *args)
{
PyObject* comm_obj;
int nprow, npcol;
int iam, nprocs;
int ConTxt;
char order;
if (!PyArg_ParseTuple(args, "Oiic", &comm_obj, &nprow, &npcol, &order)){
return NULL;
}
// Create blacs grid on this communicator
MPI_Comm comm = ((MPIObject*)comm_obj)->comm;
// Get my id and nprocs. This is for debugging purposes only
Cblacs_pinfo_(&iam, &nprocs);
MPI_Comm_size(comm, &nprocs);
// Create blacs grid on this communicator continued
ConTxt = Csys2blacs_handle_(comm);
Cblacs_gridinit_(&ConTxt, &order, nprow, npcol);
PyObject* returnvalue = Py_BuildValue("i", ConTxt);
return returnvalue;
}
PyObject* get_blacs_gridinfo(PyObject *self, PyObject *args)
{
int ConTxt, nprow, npcol;
int myrow, mycol;
if (!PyArg_ParseTuple(args, "iii", &ConTxt, &nprow, &npcol)) {
return NULL;
}
Cblacs_gridinfo_(ConTxt, &nprow, &npcol, &myrow, &mycol);
return Py_BuildValue("(ii)", myrow, mycol);
}
PyObject* get_blacs_local_shape(PyObject *self, PyObject *args)
{
int ConTxt;
int m, n, mb, nb, rsrc, csrc;
int nprow, npcol, myrow, mycol;
int locM, locN;
if (!PyArg_ParseTuple(args, "iiiiiii", &ConTxt, &m, &n, &mb,
&nb, &rsrc, &csrc)){
return NULL;
}
Cblacs_gridinfo_(ConTxt, &nprow, &npcol, &myrow, &mycol);
locM = numroc_(&m, &mb, &myrow, &rsrc, &nprow);
locN = numroc_(&n, &nb, &mycol, &csrc, &npcol);
return Py_BuildValue("(ii)", locM, locN);
}
PyObject* blacs_destroy(PyObject *self, PyObject *args)
{
int ConTxt;
if (!PyArg_ParseTuple(args, "i", &ConTxt))
return NULL;
Cblacs_gridexit_(ConTxt);
Py_RETURN_NONE;
}
PyObject* scalapack_set(PyObject *self, PyObject *args)
{
PyArrayObject* a; // matrix;
PyArrayObject* desca; // descriptor
Py_complex alpha;
Py_complex beta;
int m, n;
int ia, ja;
char uplo;
if (!PyArg_ParseTuple(args, "OODDciiii", &a, &desca,
&alpha, &beta, &uplo,
&m, &n, &ia, &ja))
return NULL;
if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
pdlaset_(&uplo, &m, &n, &(alpha.real), &(beta.real), DOUBLEP(a),
&ia, &ja, INTP(desca));
else
pzlaset_(&uplo, &m, &n, &alpha, &beta, (void*)COMPLEXP(a),
&ia, &ja, INTP(desca));
Py_RETURN_NONE;
}
PyObject* scalapack_redist(PyObject *self, PyObject *args)
{
PyArrayObject* a; // source matrix
PyArrayObject* b; // destination matrix
PyArrayObject* desca; // source descriptor
PyArrayObject* descb; // destination descriptor
char uplo;
char diag='N'; // copy the diagonal
int c_ConTxt;
int m;
int n;
int ia, ja, ib, jb;
if (!PyArg_ParseTuple(args, "OOOOiiiiiiic",
&desca, &descb,
&a, &b,
&m, &n,
&ia, &ja,
&ib, &jb,
&c_ConTxt,
&uplo))
return NULL;
if (uplo == 'G') // General matrix
{
if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
Cpdgemr2d_(m, n,
DOUBLEP(a), ia, ja, INTP(desca),
DOUBLEP(b), ib, jb, INTP(descb),
c_ConTxt);
else
Cpzgemr2d_(m, n,
(void*)COMPLEXP(a), ia, ja, INTP(desca),
(void*)COMPLEXP(b), ib, jb, INTP(descb),
c_ConTxt);
}
else // Trapezoidal matrix
{
if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
Cpdtrmr2d_(&uplo, &diag, m, n,
DOUBLEP(a), ia, ja, INTP(desca),
DOUBLEP(b), ib, jb, INTP(descb),
c_ConTxt);
else
Cpztrmr2d_(&uplo, &diag, m, n,
(void*)COMPLEXP(a), ia, ja, INTP(desca),
(void*)COMPLEXP(b), ib, jb, INTP(descb),
c_ConTxt);
}
Py_RETURN_NONE;
}
PyObject* scalapack_diagonalize_dc(PyObject *self, PyObject *args)
{
// Standard driver for divide and conquer algorithm
// Computes all eigenvalues and eigenvectors
PyArrayObject* a; // symmetric matrix
PyArrayObject* desca; // symmetric matrix description vector
PyArrayObject* z; // eigenvector matrix
PyArrayObject* w; // eigenvalue array
int one = 1;
char jobz = 'V'; // eigenvectors also
char uplo;
if (!PyArg_ParseTuple(args, "OOcOO", &a, &desca, &uplo, &z, &w))
return NULL;
// adesc
// int a_ConTxt = INTP(desca)[1];
int a_m = INTP(desca)[2];
int a_n = INTP(desca)[3];
// zdesc = adesc; this can be relaxed a bit according to pdsyevd.f
// Only square matrices
assert (a_m == a_n);
int n = a_n;
// If process not on BLACS grid, then return.
// if (a_ConTxt == -1) Py_RETURN_NONE;
// Query part, need to find the optimal size of a number of work arrays
int info;
int querywork = -1;
int* iwork;
int liwork;
int lwork;
int lrwork;
int i_work;
double d_work;
double_complex c_work;
if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
{
pdsyevd_(&jobz, &uplo, &n,
DOUBLEP(a), &one, &one, INTP(desca),
DOUBLEP(w),
DOUBLEP(z), &one, &one, INTP(desca),
&d_work, &querywork, &i_work, &querywork, &info);
lwork = (int)(d_work);
}
else
{
pzheevd_(&jobz, &uplo, &n,
(void*)COMPLEXP(a), &one, &one, INTP(desca),
DOUBLEP(w),
(void*)COMPLEXP(z), &one, &one, INTP(desca),
(void*)&c_work, &querywork, &d_work, &querywork,
&i_work, &querywork, &info);
lwork = (int)(c_work);
lrwork = (int)(d_work);
}
if (info != 0)
{
PyErr_SetString(PyExc_RuntimeError,
"scalapack_diagonalize_dc error in query.");
return NULL;
}
// Computation part
liwork = i_work;
iwork = GPAW_MALLOC(int, liwork);
if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
{
double* work = GPAW_MALLOC(double, lwork);
pdsyevd_(&jobz, &uplo, &n,
DOUBLEP(a), &one, &one, INTP(desca),
DOUBLEP(w),
DOUBLEP(z), &one, &one, INTP(desca),
work, &lwork, iwork, &liwork, &info);
free(work);
}
else
{
double_complex *work = GPAW_MALLOC(double_complex, lwork);
double* rwork = GPAW_MALLOC(double, lrwork);
pzheevd_(&jobz, &uplo, &n,
(void*)COMPLEXP(a), &one, &one, INTP(desca),
DOUBLEP(w),
(void*)COMPLEXP(z), &one, &one, INTP(desca),
(void*)work, &lwork, rwork, &lrwork,
iwork, &liwork, &info);
free(rwork);
free(work);
}
free(iwork);
PyObject* returnvalue = Py_BuildValue("i", info);
return returnvalue;
}
PyObject* scalapack_diagonalize_ex(PyObject *self, PyObject *args)
{
// Standard driver for bisection and inverse iteration algorithm
// Computes 'iu' eigenvalues and eigenvectors
PyArrayObject* a; // Hamiltonian matrix
PyArrayObject* desca; // Hamintonian matrix descriptor
PyArrayObject* z; // eigenvector matrix
PyArrayObject* w; // eigenvalue array
int a_mycol = -1;
int a_myrow = -1;
int a_nprow, a_npcol;
int il = 1; // not used when range = 'A' or 'V'
int iu;
int eigvalm, nz;
int one = 1;
double vl, vu; // not used when range = 'A' or 'I'
char jobz = 'V'; // eigenvectors also
char range = 'I'; // eigenvalues il-th thru iu-th
char uplo;
if (!PyArg_ParseTuple(args, "OOciOO", &a, &desca, &uplo, &iu,
&z, &w))
return NULL;
// a desc
int a_ConTxt = INTP(desca)[1];
int a_m = INTP(desca)[2];
int a_n = INTP(desca)[3];
// Only square matrices
assert (a_m == a_n);
int n = a_n;
// zdesc = adesc = bdesc; required by pdsyevx.f
// If process not on BLACS grid, then return.
// if (a_ConTxt == -1) Py_RETURN_NONE;
Cblacs_gridinfo_(a_ConTxt, &a_nprow, &a_npcol, &a_myrow, &a_mycol);
// Convergence tolerance
double abstol = 1.0e-8;
// char cmach = 'U'; // most orthogonal eigenvectors
// char cmach = 'S'; // most acccurate eigenvalues
// double abstol = pdlamch_(&a_ConTxt, &cmach); // most orthogonal eigenvectors
// double abstol = 2.0*pdlamch_(&a_ConTxt, &cmach); // most accurate eigenvalues
double orfac = -1.0;
// Query part, need to find the optimal size of a number of work arrays
int info;
int *ifail;
ifail = GPAW_MALLOC(int, n);
int *iclustr;
iclustr = GPAW_MALLOC(int, 2*a_nprow*a_npcol);
double *gap;
gap = GPAW_MALLOC(double, a_nprow*a_npcol);
int querywork = -1;
int* iwork;
int liwork;
int lwork; // workspace size must be at least 3
int lrwork; // workspace size must be at least 3
int i_work;
double d_work[3];
double_complex c_work;
if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
{
pdsyevx_(&jobz, &range, &uplo, &n,
DOUBLEP(a), &one, &one, INTP(desca),
&vl, &vu, &il, &iu, &abstol, &eigvalm,
&nz, DOUBLEP(w), &orfac,
DOUBLEP(z), &one, &one, INTP(desca),
d_work, &querywork, &i_work, &querywork,
ifail, iclustr, gap, &info);
lwork = MAX(3, (int)(d_work[0]));
}
else
{
pzheevx_(&jobz, &range, &uplo, &n,
(void*)COMPLEXP(a), &one, &one, INTP(desca),
&vl, &vu, &il, &iu, &abstol, &eigvalm,
&nz, DOUBLEP(w), &orfac,
(void*)COMPLEXP(z), &one, &one, INTP(desca),
(void*)&c_work, &querywork, d_work, &querywork,
&i_work, &querywork,
ifail, iclustr, gap, &info);
lwork = MAX(3, (int)(c_work));
lrwork = MAX(3, (int)(d_work[0]));
}
if (info != 0) {
printf ("info = %d", info);
PyErr_SetString(PyExc_RuntimeError,
"scalapack_diagonalize_ex error in query.");
return NULL;
}
// Computation part
// lwork = lwork + (n-1)*n; // this is a ridiculous amount of workspace
liwork = i_work;
iwork = GPAW_MALLOC(int, liwork);
if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
{
double* work = GPAW_MALLOC(double, lwork);
pdsyevx_(&jobz, &range, &uplo, &n,
DOUBLEP(a), &one, &one, INTP(desca),
&vl, &vu, &il, &iu, &abstol, &eigvalm,
&nz, DOUBLEP(w), &orfac,
DOUBLEP(z), &one, &one, INTP(desca),
work, &lwork, iwork, &liwork,
ifail, iclustr, gap, &info);
free(work);
}
else
{
double_complex* work = GPAW_MALLOC(double_complex, lwork);
double* rwork = GPAW_MALLOC(double, lrwork);
pzheevx_(&jobz, &range, &uplo, &n,
(void*)COMPLEXP(a), &one, &one, INTP(desca),
&vl, &vu, &il, &iu, &abstol, &eigvalm,
&nz, DOUBLEP(w), &orfac,
(void*)COMPLEXP(z), &one, &one, INTP(desca),
(void*)work, &lwork, rwork, &lrwork,
iwork, &liwork,
ifail, iclustr, gap, &info);
free(rwork);
free(work);
}
free(iwork);
free(gap);
free(iclustr);
free(ifail);
// If this fails, fewer eigenvalues than requested were computed.
assert (eigvalm == iu);
PyObject* returnvalue = Py_BuildValue("i", info);
return returnvalue;
}
#ifdef GPAW_MR3
PyObject* scalapack_diagonalize_mr3(PyObject *self, PyObject *args)
{
// Standard driver for MRRR algorithm
// Computes 'iu' eigenvalues and eigenvectors
// http://icl.cs.utk.edu/lapack-forum/archives/scalapack/msg00159.html
PyArrayObject* a; // Hamiltonian matrix
PyArrayObject* desca; // Hamintonian matrix descriptor
PyArrayObject* z; // eigenvector matrix
PyArrayObject* w; // eigenvalue array
int il = 1; // not used when range = 'A' or 'V'
int iu;
int eigvalm, nz;
int one = 1;
double vl, vu; // not used when range = 'A' or 'I'
char jobz = 'V'; // eigenvectors also
char range = 'I'; // eigenvalues il-th thru iu-th
char uplo;
if (!PyArg_ParseTuple(args, "OOciOO", &a, &desca, &uplo, &iu,
&z, &w))
return NULL;
// a desc
// int a_ConTxt = INTP(desca)[1];
int a_m = INTP(desca)[2];
int a_n = INTP(desca)[3];
// Only square matrices
assert (a_m == a_n);
int n = a_n;
// zdesc = adesc = bdesc; required by pdsyevx.f
// If process not on BLACS grid, then return.
// if (a_ConTxt == -1) Py_RETURN_NONE;
// Query part, need to find the optimal size of a number of work arrays
int info;
int querywork = -1;
int* iwork;
int liwork;
int lwork;
int lrwork;
int i_work;
double d_work[3];
double_complex c_work;
if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
{
pdsyevr_(&jobz, &range, &uplo, &n,
DOUBLEP(a), &one, &one, INTP(desca),
&vl, &vu, &il, &iu, &eigvalm,
&nz, DOUBLEP(w),
DOUBLEP(z), &one, &one, INTP(desca),
d_work, &querywork, &i_work, &querywork,
&info);
lwork = (int)(d_work[0]);
}
else
{
pzheevr_(&jobz, &range, &uplo, &n,
(void*)COMPLEXP(a), &one, &one, INTP(desca),
&vl, &vu, &il, &iu, &eigvalm,
&nz, DOUBLEP(w),
(void*)COMPLEXP(z), &one, &one, INTP(desca),
(void*)&c_work, &querywork, d_work, &querywork,
&i_work, &querywork,
&info);
lwork = (int)(c_work);
lrwork = (int)(d_work[0]);
}
if (info != 0) {
printf ("info = %d", info);
PyErr_SetString(PyExc_RuntimeError,
"scalapack_diagonalize_evr error in query.");
return NULL;
}
// Computation part
liwork = i_work;
iwork = GPAW_MALLOC(int, liwork);
if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
{
double* work = GPAW_MALLOC(double, lwork);
pdsyevr_(&jobz, &range, &uplo, &n,
DOUBLEP(a), &one, &one, INTP(desca),
&vl, &vu, &il, &iu, &eigvalm,
&nz, DOUBLEP(w),
DOUBLEP(z), &one, &one, INTP(desca),
work, &lwork, iwork, &liwork,
&info);
free(work);
}
else
{
double_complex* work = GPAW_MALLOC(double_complex, lwork);
double* rwork = GPAW_MALLOC(double, lrwork);
pzheevr_(&jobz, &range, &uplo, &n,
(void*)COMPLEXP(a), &one, &one, INTP(desca),
&vl, &vu, &il, &iu, &eigvalm,
&nz, DOUBLEP(w),
(void*)COMPLEXP(z), &one, &one, INTP(desca),
(void*)work, &lwork, rwork, &lrwork,
iwork, &liwork,
&info);
free(rwork);
free(work);
}
free(iwork);
// If this fails, fewer eigenvalues than requested were computed.
assert (eigvalm == iu);
PyObject* returnvalue = Py_BuildValue("i", info);
return returnvalue;
}
#endif
PyObject* scalapack_general_diagonalize_dc(PyObject *self, PyObject *args)
{
// General driver for divide and conquer algorithm
// Computes *all* eigenvalues and eigenvectors
PyArrayObject* a; // Hamiltonian matrix
PyArrayObject* b; // overlap matrix
PyArrayObject* desca; // Hamintonian matrix descriptor
PyArrayObject* z; // eigenvector matrix
PyArrayObject* w; // eigenvalue array
int ibtype = 1; // Solve H*psi = lambda*S*psi
int one = 1;
char jobz = 'V'; // eigenvectors also
char uplo;
double scale;
if (!PyArg_ParseTuple(args, "OOcOOO", &a, &desca, &uplo,
&b, &z, &w))
return NULL;
// a desc
// int a_ConTxt = INTP(desca)[1];
int a_m = INTP(desca)[2];
int a_n = INTP(desca)[3];
// Only square matrices
assert (a_m == a_n);
int n = a_n;
// zdesc = adesc = bdesc can be relaxed a bit according to pdsyevd.f
// If process not on BLACS grid, then return.
// if (a_ConTxt == -1) Py_RETURN_NONE;
// Cholesky Decomposition
int info;
if (PyArray_DESCR(b)->type_num == NPY_DOUBLE)
pdpotrf_(&uplo, &n, DOUBLEP(b), &one, &one, INTP(desca), &info);
else
pzpotrf_(&uplo, &n, (void*)COMPLEXP(b), &one, &one, INTP(desca), &info);
if (info != 0)
{
PyErr_SetString(PyExc_RuntimeError,
"scalapack_general_diagonalize_dc error in Cholesky.");
return NULL;
}
// Query variables
int querywork = -1;
int* iwork;
int liwork;
int lwork;
int lrwork;
int i_work;
double d_work;
double_complex c_work;
// NGST Query
if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
{
pdsyngst_(&ibtype, &uplo, &n,
DOUBLEP(a), &one, &one, INTP(desca),
DOUBLEP(b), &one, &one, INTP(desca),
&scale, &d_work, &querywork, &info);
lwork = (int)(d_work);
}
else
{
pzhengst_(&ibtype, &uplo, &n,
(void*)COMPLEXP(a), &one, &one, INTP(desca),
(void*)COMPLEXP(b), &one, &one, INTP(desca),
&scale, (void*)&c_work, &querywork, &info);
lwork = (int)(c_work);
}
if (info != 0) {
PyErr_SetString(PyExc_RuntimeError,
"scalapack_general_diagonalize_dc error in NGST query.");
return NULL;
}
// NGST Compute
if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
{
double* work = GPAW_MALLOC(double, lwork);
pdsyngst_(&ibtype, &uplo, &n,
DOUBLEP(a), &one, &one, INTP(desca),
DOUBLEP(b), &one, &one, INTP(desca),
&scale, work, &lwork, &info);
free(work);
}
else
{
double_complex* work = GPAW_MALLOC(double_complex, lwork);
pzhengst_(&ibtype, &uplo, &n,
(void*)COMPLEXP(a), &one, &one, INTP(desca),
(void*)COMPLEXP(b), &one, &one, INTP(desca),
&scale, (void*)work, &lwork, &info);
free(work);
}
if (info != 0) {
PyErr_SetString(PyExc_RuntimeError,
"scalapack_general_diagonalize_dc error in NGST compute.");
return NULL;
}
// NOTE: Scale is always equal to 1.0 above. In future version of ScaLAPACK, we
// may need to rescale eigenvalues by scale. This can be accomplised by using
// the BLAS1 d/zscal. See pdsygvx.f
// EVD Query
if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
{
pdsyevd_(&jobz, &uplo, &n,
DOUBLEP(a), &one, &one, INTP(desca),
DOUBLEP(w),
DOUBLEP(z), &one, &one, INTP(desca),
&d_work, &querywork, &i_work, &querywork, &info);
lwork = (int)(d_work);
}
else
{
pzheevd_(&jobz, &uplo, &n,
(void*)COMPLEXP(a), &one, &one, INTP(desca),
DOUBLEP(w),
(void*)COMPLEXP(z), &one, &one, INTP(desca),
(void*)&c_work, &querywork, &d_work, &querywork,
&i_work, &querywork, &info);
lwork = (int)(c_work);
lrwork = (int)(d_work);
}
if (info != 0)
{
PyErr_SetString(PyExc_RuntimeError,
"scalapack_general_diagonalize_dc error in EVD query.");
return NULL;
}
// EVD Computation
liwork = i_work;
iwork = GPAW_MALLOC(int, liwork);
if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
{
double* work = GPAW_MALLOC(double, lwork);
pdsyevd_(&jobz, &uplo, &n,
DOUBLEP(a), &one, &one, INTP(desca),
DOUBLEP(w),
DOUBLEP(z), &one, &one, INTP(desca),
work, &lwork, iwork, &liwork, &info);
free(work);
}
else
{
double_complex *work = GPAW_MALLOC(double_complex, lwork);
double* rwork = GPAW_MALLOC(double, lrwork);
pzheevd_(&jobz, &uplo, &n,
(void*)COMPLEXP(a), &one, &one, INTP(desca),
DOUBLEP(w),
(void*)COMPLEXP(z), &one, &one, INTP(desca),
(void*)work, &lwork, rwork, &lrwork,
iwork, &liwork, &info);
free(rwork);
free(work);
}
free(iwork);
// Backtransformation to the original problem
char trans;
double d_one = 1.0;
double_complex c_one = 1.0;
if (uplo == 'U')
trans = 'N';
else
trans = 'T';
if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
pdtrsm_("L", &uplo, &trans, "N", &n, &n, &d_one,
DOUBLEP(b), &one, &one, INTP(desca),
DOUBLEP(z), &one, &one, INTP(desca));
else
pztrsm_("L", &uplo, &trans, "N", &n, &n, (void*)&c_one,
(void*)COMPLEXP(b), &one, &one, INTP(desca),
(void*)COMPLEXP(z), &one, &one, INTP(desca));
PyObject* returnvalue = Py_BuildValue("i", info);
return returnvalue;
}
PyObject* scalapack_general_diagonalize_ex(PyObject *self, PyObject *args)
{
// General driver for bisection and inverse iteration algorithm
// Computes 'iu' eigenvalues and eigenvectors
PyArrayObject* a; // Hamiltonian matrix
PyArrayObject* b; // overlap matrix
PyArrayObject* desca; // Hamintonian matrix descriptor
PyArrayObject* z; // eigenvector matrix
PyArrayObject* w; // eigenvalue array
int ibtype = 1; // Solve H*psi = lambda*S*psi
int a_mycol = -1;
int a_myrow = -1;
int a_nprow, a_npcol;
int il = 1; // not used when range = 'A' or 'V'
int iu; //
int eigvalm, nz;
int one = 1;
double vl, vu; // not used when range = 'A' or 'I'
char jobz = 'V'; // eigenvectors also
char range = 'I'; // eigenvalues il-th thru iu-th
char uplo;
if (!PyArg_ParseTuple(args, "OOciOOO", &a, &desca, &uplo, &iu,
&b, &z, &w))
return NULL;
// a desc
int a_ConTxt = INTP(desca)[1];
int a_m = INTP(desca)[2];
int a_n = INTP(desca)[3];
// Only square matrices
assert (a_m == a_n);
int n = a_n;
// zdesc = adesc = bdesc; required by pdsygvx.f
// If process not on BLACS grid, then return.
// if (a_ConTxt == -1) Py_RETURN_NONE;
Cblacs_gridinfo_(a_ConTxt, &a_nprow, &a_npcol, &a_myrow, &a_mycol);
// Convergence tolerance
double abstol = 1.0e-8;
// char cmach = 'U'; // most orthogonal eigenvectors
// char cmach = 'S'; // most acccurate eigenvalues
// double abstol = pdlamch_(&a_ConTxt, &cmach); // most orthogonal eigenvectors
// double abstol = 2.0*pdlamch_(&a_ConTxt, &cmach); // most accurate eigenvalues
double orfac = -1.0;
// Query part, need to find the optimal size of a number of work arrays
int info;
int *ifail;
ifail = GPAW_MALLOC(int, n);
int *iclustr;
iclustr = GPAW_MALLOC(int, 2*a_nprow*a_npcol);
double *gap;
gap = GPAW_MALLOC(double, a_nprow*a_npcol);
int querywork = -1;
int* iwork;
int liwork;
int lwork; // workspace size must be at least 3
int lrwork; // workspace size must be at least 3
int i_work;
double d_work[3];
double_complex c_work;
if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
{
pdsygvx_(&ibtype, &jobz, &range, &uplo, &n,
DOUBLEP(a), &one, &one, INTP(desca),
DOUBLEP(b), &one, &one, INTP(desca),
&vl, &vu, &il, &iu, &abstol, &eigvalm,
&nz, DOUBLEP(w), &orfac,
DOUBLEP(z), &one, &one, INTP(desca),
d_work, &querywork, &i_work, &querywork,
ifail, iclustr, gap, &info);
lwork = MAX(3, (int)(d_work[0]));
}
else
{
pzhegvx_(&ibtype, &jobz, &range, &uplo, &n,
(void*)COMPLEXP(a), &one, &one, INTP(desca),
(void*)COMPLEXP(b), &one, &one, INTP(desca),
&vl, &vu, &il, &iu, &abstol, &eigvalm,
&nz, DOUBLEP(w), &orfac,
(void*)COMPLEXP(z), &one, &one, INTP(desca),
(void*)&c_work, &querywork, d_work, &querywork,
&i_work, &querywork,
ifail, iclustr, gap, &info);
lwork = MAX(3, (int)(c_work));
lrwork = MAX(3, (int)(d_work[0]));
}
if (info != 0) {
PyErr_SetString(PyExc_RuntimeError,
"scalapack_general_diagonalize_ex error in query.");
return NULL;
}
// Computation part
// lwork = lwork + (n-1)*n; // this is a ridiculous amount of workspace
liwork = i_work;
iwork = GPAW_MALLOC(int, liwork);
if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
{
double* work = GPAW_MALLOC(double, lwork);
pdsygvx_(&ibtype, &jobz, &range, &uplo, &n,
DOUBLEP(a), &one, &one, INTP(desca),
DOUBLEP(b), &one, &one, INTP(desca),
&vl, &vu, &il, &iu, &abstol, &eigvalm,
&nz, DOUBLEP(w), &orfac,
DOUBLEP(z), &one, &one, INTP(desca),
work, &lwork, iwork, &liwork,
ifail, iclustr, gap, &info);
free(work);
}
else
{
double_complex* work = GPAW_MALLOC(double_complex, lwork);
double* rwork = GPAW_MALLOC(double, lrwork);
pzhegvx_(&ibtype, &jobz, &range, &uplo, &n,
(void*)COMPLEXP(a), &one, &one, INTP(desca),
(void*)COMPLEXP(b), &one, &one, INTP(desca),
&vl, &vu, &il, &iu, &abstol, &eigvalm,
&nz, DOUBLEP(w), &orfac,
(void*)COMPLEXP(z), &one, &one, INTP(desca),
(void*)work, &lwork, rwork, &lrwork,
iwork, &liwork,
ifail, iclustr, gap, &info);
free(rwork);
free(work);
}
free(iwork);
free(gap);
free(iclustr);
free(ifail);
// If this fails, fewer eigenvalues than requested were computed.
assert (eigvalm == iu);
PyObject* returnvalue = Py_BuildValue("i", info);
return returnvalue;
}
#ifdef GPAW_MR3
PyObject* scalapack_general_diagonalize_mr3(PyObject *self, PyObject *args)
{
// General driver for MRRR algorithm
// Computes 'iu' eigenvalues and eigenvectors
// http://icl.cs.utk.edu/lapack-forum/archives/scalapack/msg00159.html
PyArrayObject* a; // Hamiltonian matrix
PyArrayObject* b; // overlap matrix
PyArrayObject* desca; // Hamintonian matrix descriptor
PyArrayObject* z; // eigenvector matrix
PyArrayObject* w; // eigenvalue array
int ibtype = 1; // Solve H*psi = lambda*S*psi
int il = 1; // not used when range = 'A' or 'V'
int iu;
int eigvalm, nz;
int one = 1;
double vl, vu; // not used when range = 'A' or 'I'
char jobz = 'V'; // eigenvectors also
char range = 'I'; // eigenvalues il-th thru iu-th
char uplo;
double scale;
if (!PyArg_ParseTuple(args, "OOciOOO", &a, &desca, &uplo, &iu,
&b, &z, &w))
return NULL;
// a desc
// int a_ConTxt = INTP(desca)[1];
int a_m = INTP(desca)[2];
int a_n = INTP(desca)[3];
// Only square matrices
assert (a_m == a_n);
int n = a_n;
// zdesc = adesc = bdesc can be relaxed a bit according to pdsyevd.f
// If process not on BLACS grid, then return.
// if (a_ConTxt == -1) Py_RETURN_NONE;
// Cholesky Decomposition
int info;
if (PyArray_DESCR(b)->type_num == NPY_DOUBLE)
pdpotrf_(&uplo, &n, DOUBLEP(b), &one, &one, INTP(desca), &info);
else
pzpotrf_(&uplo, &n, (void*)COMPLEXP(b), &one, &one, INTP(desca), &info);
if (info != 0)
{
PyErr_SetString(PyExc_RuntimeError,
"scalapack_general_diagonalize_mr3 error in Cholesky.");
return NULL;
}
// Query variables
int querywork = -1;
int* iwork;
int liwork;
int lwork;
int lrwork;
int i_work;
double d_work[3];
double_complex c_work;
// NGST Query
if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
{
pdsyngst_(&ibtype, &uplo, &n,
DOUBLEP(a), &one, &one, INTP(desca),
DOUBLEP(b), &one, &one, INTP(desca),
&scale, d_work, &querywork, &info);
lwork = (int)(d_work[0]);
}
else
{
pzhengst_(&ibtype, &uplo, &n,
(void*)COMPLEXP(a), &one, &one, INTP(desca),
(void*)COMPLEXP(b), &one, &one, INTP(desca),
&scale, (void*)&c_work, &querywork, &info);
lwork = (int)(c_work);
}
if (info != 0) {
PyErr_SetString(PyExc_RuntimeError,
"scalapack_general_diagonalize_mr3 error in NGST query.");
return NULL;
}
// NGST Compute
if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
{
double* work = GPAW_MALLOC(double, lwork);
pdsyngst_(&ibtype, &uplo, &n,
DOUBLEP(a), &one, &one, INTP(desca),
DOUBLEP(b), &one, &one, INTP(desca),
&scale, work, &lwork, &info);
free(work);
}
else
{
double_complex* work = GPAW_MALLOC(double_complex, lwork);
pzhengst_(&ibtype, &uplo, &n,
(void*)COMPLEXP(a), &one, &one, INTP(desca),
(void*)COMPLEXP(b), &one, &one, INTP(desca),
&scale, (void*)work, &lwork, &info);
free(work);
}
if (info != 0) {
PyErr_SetString(PyExc_RuntimeError,
"scalapack_general_diagonalize_mr3 error in NGST compute.");
return NULL;
}
// NOTE: Scale is always equal to 1.0 above. In future version of ScaLAPACK, we
// may need to rescale eigenvalues by scale. This can be accomplised by using
// the BLAS1 d/zscal. See pdsygvx.f
// EVR Query
if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
{
pdsyevr_(&jobz, &range, &uplo, &n,
DOUBLEP(a), &one, &one, INTP(desca),
&vl, &vu, &il, &iu, &eigvalm,
&nz, DOUBLEP(w),
DOUBLEP(z), &one, &one, INTP(desca),
d_work, &querywork, &i_work, &querywork,
&info);
lwork = (int)(d_work[0]);
}
else
{
pzheevr_(&jobz, &range, &uplo, &n,
(void*)COMPLEXP(a), &one, &one, INTP(desca),
&vl, &vu, &il, &iu, &eigvalm,
&nz, DOUBLEP(w),
(void*)COMPLEXP(z), &one, &one, INTP(desca),
(void*)&c_work, &querywork, d_work, &querywork,
&i_work, &querywork,
&info);
lwork = (int)(c_work);
lrwork = (int)(d_work[0]);
}
if (info != 0) {
printf ("info = %d", info);
PyErr_SetString(PyExc_RuntimeError,
"scalapack_general_diagonalize_evr error in query.");
return NULL;
}
// EVR Computation
liwork = i_work;
iwork = GPAW_MALLOC(int, liwork);
if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
{
double* work = GPAW_MALLOC(double, lwork);
pdsyevr_(&jobz, &range, &uplo, &n,
DOUBLEP(a), &one, &one, INTP(desca),
&vl, &vu, &il, &iu, &eigvalm,
&nz, DOUBLEP(w),
DOUBLEP(z), &one, &one, INTP(desca),
work, &lwork, iwork, &liwork,
&info);
free(work);
}
else
{
double_complex* work = GPAW_MALLOC(double_complex, lwork);
double* rwork = GPAW_MALLOC(double, lrwork);
pzheevr_(&jobz, &range, &uplo, &n,
(void*)COMPLEXP(a), &one, &one, INTP(desca),
&vl, &vu, &il, &iu, &eigvalm,
&nz, DOUBLEP(w),
(void*)COMPLEXP(z), &one, &one, INTP(desca),
(void*)work, &lwork, rwork, &lrwork,
iwork, &liwork,
&info);
free(rwork);
free(work);
}
free(iwork);
// Backtransformation to the original problem
char trans;
double d_one = 1.0;
double_complex c_one = 1.0;
if (uplo == 'U')
trans = 'N';
else
trans = 'T';
if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
pdtrsm_("L", &uplo, &trans, "N", &n, &n, &d_one,
DOUBLEP(b), &one, &one, INTP(desca),
DOUBLEP(z), &one, &one, INTP(desca));
else
pztrsm_("L", &uplo, &trans, "N", &n, &n, (void*)&c_one,
(void*)COMPLEXP(b), &one, &one, INTP(desca),
(void*)COMPLEXP(z), &one, &one, INTP(desca));
// If this fails, fewer eigenvalues than requested were computed.
assert (eigvalm == iu);
PyObject* returnvalue = Py_BuildValue("i", info);
return returnvalue;
}
#endif
PyObject* scalapack_inverse_cholesky(PyObject *self, PyObject *args)
{
// Cholesky plus inverse of triangular matrix
PyArrayObject* a; // overlap matrix
PyArrayObject* desca; // symmetric matrix description vector
int info;
double d_zero = 0.0;
double_complex c_zero = 0.0;
int one = 1;
int two = 2;
char diag = 'N'; // non-unit triangular
char uplo;
if (!PyArg_ParseTuple(args, "OOc", &a, &desca, &uplo))
return NULL;
// adesc
// int a_ConTxt = INTP(desca)[1];
int a_m = INTP(desca)[2];
int a_n = INTP(desca)[3];
// Only square matrices
assert (a_m == a_n);
int n = a_n;
int p = a_n - 1;
// If process not on BLACS grid, then return.
// if (a_ConTxt == -1) Py_RETURN_NONE;
if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
{
pdpotrf_(&uplo, &n, DOUBLEP(a), &one, &one,
INTP(desca), &info);
if (info == 0)
{
pdtrtri_(&uplo, &diag, &n, DOUBLEP(a), &one, &one,
INTP(desca), &info);
if (uplo == 'L')
pdlaset_("U", &p, &p, &d_zero, &d_zero, DOUBLEP(a),
&one, &two, INTP(desca));
else
pdlaset_("L", &p, &p, &d_zero, &d_zero, DOUBLEP(a),
&two, &one, INTP(desca));
}
}
else
{
pzpotrf_(&uplo, &n, (void*)COMPLEXP(a), &one, &one,
INTP(desca), &info);
if (info == 0)
{
pztrtri_(&uplo, &diag, &n, (void*)COMPLEXP(a), &one, &one,
INTP(desca), &info);
if (uplo == 'L')
pzlaset_("U", &p, &p, (void*)&c_zero, (void*)&c_zero,
(void*)COMPLEXP(a), &one, &two, INTP(desca));
else
pzlaset_("L", &p, &p, (void*)&c_zero, (void*)&c_zero,
(void*)COMPLEXP(a), &two, &one, INTP(desca));
}
}
PyObject* returnvalue = Py_BuildValue("i", info);
return returnvalue;
}
PyObject* scalapack_inverse(PyObject *self, PyObject *args)
{
// Inverse of an hermitean matrix
PyArrayObject* a; // Matrix
PyArrayObject* desca; // Matrix description vector
char uplo;
int info;
int one = 1;
if (!PyArg_ParseTuple(args, "OOc", &a, &desca, &uplo))
return NULL;
int a_m = INTP(desca)[2];
int a_n = INTP(desca)[3];
// Only square matrices
assert (a_m == a_n);
int n = a_n;
if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
{
assert(1==-1); // No double version implemented
}
else
{
pzpotrf_(&uplo, &n, (void*)COMPLEXP(a), &one, &one, INTP(desca), &info);
if (info == 0)
{
pzpotri_(&uplo, &n, (void*)COMPLEXP(a), &one, &one, INTP(desca), &info);
}
}
PyObject* returnvalue = Py_BuildValue("i", info);
return returnvalue;
}
/*
PyObject* scalapack_solve(PyObject *self, PyObject *args)
{
// Solves equation Ax = B, where A is a general matrix
PyArrayObject* a; // Matrix
PyArrayObject* desca; // Matrix description vector
PyArrayObject* b; // Matrix
PyArrayObject* descb; // Matrix description vector
char uplo;
int info;
int one = 1;
if (!PyArg_ParseTuple(args, "OOOO", &a, &desca, &b, &descb))
return NULL;
int a_m = INTP(desca)[2];
int a_n = INTP(desca)[3];
// Only square matrices
assert (a_m == a_n);
int b_m = INTP(descb)[2];
int b_n = INTP(descb)[3];
// Equation valid
assert (a_n == b_m);
int n = a_n;
int nrhs = b_n;
int* pivot = GPAW_MALLOC(int, a_m+2000); // TODO: How long should this exaclty be?
if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
{
assert(1==-1); // No double version implemented
}
else
{
pzgesv_(&n, &nrhs,(void*)COMPLEXP(a), &one, &one, INTP(desca), pivot,
(void*)COMPLEXP(b), &one, &one, INTP(descb), &info);
}
free(pivot);
PyObject* returnvalue = Py_BuildValue("i", info);
return returnvalue;
}
*/
PyObject* scalapack_solve(PyObject *self, PyObject *args) {
// Solves equation Ax = B, where A is a general matrix
PyArrayObject* a; // Matrix
PyArrayObject* desca; // Matrix description vector
PyArrayObject* b; // Matrix
PyArrayObject* descb; // Matrix description vector
char uplo;
int info;
int one = 1;
if (!PyArg_ParseTuple(args, "OOOO", &a, &desca, &b, &descb))
return NULL;
int a_ConTxt = INTP(desca)[1];
int a_m = INTP(desca)[2];
int a_n = INTP(desca)[3];
int a_mb = INTP(desca)[4];
int a_nb = INTP(desca)[5];
// Only square matrices
assert (a_m == a_n);
int b_ConTxt = INTP(descb)[1];
int b_m = INTP(descb)[2];
int b_n = INTP(descb)[3];
// Equation valid
assert (a_n == b_m);
int n = a_n;
int nrhs = b_n;
int nprow, npcol, myrow, mycol, locM;
Cblacs_gridinfo_(a_ConTxt, &nprow, &npcol, &myrow, &mycol);
// LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
locM = (((a_m/a_mb) + 1)/nprow + 1) * a_mb;
/*
* IPIV (local output) INTEGER array, dimension ( LOCr(M_A)+MB_A )
* This array contains the pivoting information.
* IPIV(i) -> The global row local row i was swapped with.
* This array is tied to the distributed matrix A.
* An upper bound for these quantities may be computed by:
* LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
* M_A (global) DESCA( M_ ) The number of rows in the global
* array A.
* MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
* the rows of the array.
* NPROW (global input) INTEGER
* NPROW specifies the number of process rows in the grid
* to be created.
*/
int* pivot = GPAW_MALLOC(int, locM + a_mb);
//if (a->descr->type_num == PyArray_DOUBLE)
if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
{
pdgesv_(&n, &nrhs,(double*)DOUBLEP(a), &one, &one, INTP(desca), pivot,
(double*)DOUBLEP(b), &one, &one, INTP(descb), &info);
}
else
{
pzgesv_(&n, &nrhs,(void*)COMPLEXP(a), &one, &one, INTP(desca), pivot,
(void*)COMPLEXP(b), &one, &one, INTP(descb), &info);
}
free(pivot);
PyObject* returnvalue = Py_BuildValue("i", info);
return returnvalue;
}
#endif
#endif // PARALLEL
gpaw-0.11.0.13004/c/bc.c 0000664 0001750 0001750 00000020432 12553643466 014434 0 ustar jensj jensj 0000000 0000000 /* Copyright (C) 2003-2007 CAMP
* Copyright (C) 2007-2009 CAMd
* Copyright (C) 2005 CSC - IT Center for Science Ltd.
* Please see the accompanying LICENSE file for further information. */
// Copyright (C) 2003 CAMP
// Please see the accompanying LICENSE file for further information.
#include
#include
#include "bc.h"
#include "extensions.h"
#include
#include
boundary_conditions* bc_init(const long size1[3],
const long padding[3][2],
const long npadding[3][2],
const long neighbors[3][2],
MPI_Comm comm, bool real, bool cfd)
{
boundary_conditions* bc = GPAW_MALLOC(boundary_conditions, 1);
for (int i = 0; i < 3; i++)
{
bc->size1[i] = size1[i];
bc->size2[i] = size1[i] + padding[i][0] + padding[i][1];
bc->padding[i] = padding[i][0];
}
bc->comm = comm;
bc->ndouble = (real ? 1 : 2);
bc->cfd = cfd;
int rank = 0;
if (comm != MPI_COMM_NULL)
MPI_Comm_rank(comm, &rank);
int start[3];
int size[3];
for (int i = 0; i < 3; i++)
{
start[i] = padding[i][0];
size[i] = size1[i];
}
for (int i = 0; i < 3; i++)
{
int n = bc->ndouble;
for (int j = 0; j < 3; j++)
if (j != i)
n *= size[j];
for (int d = 0; d < 2; d++)
{
int ds = npadding[i][d];
int dr = padding[i][d];
for (int j = 0; j < 3; j++)
{
bc->sendstart[i][d][j] = start[j];
bc->sendsize[i][d][j] = size[j];
bc->recvstart[i][d][j] = start[j];
bc->recvsize[i][d][j] = size[j];
}
if (d == 0)
{
bc->sendstart[i][d][i] = dr;
bc->recvstart[i][d][i] = 0;
}
else
{
bc->sendstart[i][d][i] = padding[i][0] + size1[i] - ds;
bc->recvstart[i][d][i] = padding[i][0] + size1[i];
}
bc->sendsize[i][d][i] = ds;
bc->recvsize[i][d][i] = dr;
bc->sendproc[i][d] = DO_NOTHING;
bc->recvproc[i][d] = DO_NOTHING;
bc->nsend[i][d] = 0;
bc->nrecv[i][d] = 0;
int p = neighbors[i][d];
if (p == rank)
{
if (ds > 0)
bc->sendproc[i][d] = COPY_DATA;
if (dr > 0)
bc->recvproc[i][d] = COPY_DATA;
}
else if (p >= 0)
{
// Communication required:
if (ds > 0)
{
bc->sendproc[i][d] = p;
bc->nsend[i][d] = n * ds;
}
if (dr > 0)
{
bc->recvproc[i][d] = p;
bc->nrecv[i][d] = n * dr;
}
}
}
if (cfd == 0)
{
start[i] = 0;
size[i] = bc->size2[i];
}
// If the two neighboring processors along the
// i'th axis are the same, then we join the two communications
// into one:
bc->rjoin[i] = ((bc->recvproc[i][0] == bc->recvproc[i][1]) &&
bc->recvproc[i][0] >= 0);
bc->sjoin[i] = ((bc->sendproc[i][0] == bc->sendproc[i][1]) &&
bc->sendproc[i][0] >= 0);
}
bc->maxsend = 0;
bc->maxrecv = 0;
for (int i = 0; i < 3; i++)
{
int n = bc->nsend[i][0] + bc->nsend[i][1];
if (n > bc->maxsend)
bc->maxsend = n;
n = bc->nrecv[i][0] + bc->nrecv[i][1];
if (n > bc->maxrecv)
bc->maxrecv = n;
}
return bc;
}
void bc_unpack1(const boundary_conditions* bc,
const double* aa1, double* aa2, int i,
MPI_Request recvreq[2],
MPI_Request sendreq[2],
double* rbuff, double* sbuff,
const double_complex phases[2], int thd, int nin)
{
int ng = bc->ndouble * bc->size1[0] * bc->size1[1] * bc->size1[2];
int ng2 = bc->ndouble * bc->size2[0] * bc->size2[1] * bc->size2[2];
bool real = (bc->ndouble == 1);
for (int m = 0; m < nin; m++)
// Copy data:
if (i == 0)
{
// Zero all of a2 array. We should only zero the bounaries
// that are not periodic, but it's simpler to zero everything!
// XXX
memset(aa2 + m * ng2, 0, ng2 * sizeof(double));
// Copy data from a1 to central part of a2:
if (real)
bmgs_paste(aa1 + m * ng, bc->size1, aa2 + m * ng2,
bc->size2, bc->sendstart[0][0]);
else
bmgs_pastez((const double_complex*)(aa1 + m * ng), bc->size1,
(double_complex*)(aa2 + m * ng2),
bc->size2, bc->sendstart[0][0]);
}
#ifdef PARALLEL
// Start receiving.
for (int d = 0; d < 2; d++)
{
int p = bc->recvproc[i][d];
if (p >= 0)
{
if (bc->rjoin[i])
{
if (d == 0)
MPI_Irecv(rbuff, (bc->nrecv[i][0] + bc->nrecv[i][1]) * nin,
MPI_DOUBLE, p,
10 * thd + 1000 * i + 100000,
bc->comm, &recvreq[0]);
}
else
{
MPI_Irecv(rbuff, bc->nrecv[i][d] * nin, MPI_DOUBLE, p,
d + 10 * thd + 1000 * i,
bc->comm, &recvreq[d]);
rbuff += bc->nrecv[i][d] * nin;
}
}
}
// Prepare send-buffers and start sending:
double* sbuf = sbuff;
double* sbuf0 = sbuff;
for (int d = 0; d < 2; d++)
{
sendreq[d] = 0;
int p = bc->sendproc[i][d];
if (p >= 0)
{
const int* start = bc->sendstart[i][d];
const int* size = bc->sendsize[i][d];
for (int m = 0; m < nin; m++)
if (real)
bmgs_cut(aa2 + m * ng2, bc->size2, start,
sbuf + m * bc->nsend[i][d],
size);
else
bmgs_cutmz((const double_complex*)(aa2 + m * ng2),
bc->size2, start,
(double_complex*)(sbuf + m * bc->nsend[i][d]),
size, phases[d]);
if (bc->sjoin[i])
{
if (d == 1)
{
MPI_Isend(sbuf0, (bc->nsend[i][0] + bc->nsend[i][1]) * nin,
MPI_DOUBLE, p,
10 * thd + 1000 * i + 100000,
bc->comm, &sendreq[0]);
}
}
else
{
MPI_Isend(sbuf, bc->nsend[i][d] * nin, MPI_DOUBLE, p,
1 - d + 10 * thd + 1000 * i, bc->comm, &sendreq[d]);
}
sbuf += bc->nsend[i][d] * nin;
}
}
#endif // Parallel
for (int m = 0; m < nin; m++)
{
// Copy data for periodic boundary conditions:
for (int d = 0; d < 2; d++)
if (bc->sendproc[i][d] == COPY_DATA)
{
if (real)
bmgs_translate(aa2 + m * ng2, bc->size2, bc->sendsize[i][d],
bc->sendstart[i][d], bc->recvstart[i][1 - d]);
else
bmgs_translatemz((double_complex*)(aa2 + m * ng2), bc->size2,
bc->sendsize[i][d],
bc->sendstart[i][d], bc->recvstart[i][1 - d],
phases[d]);
}
}
}
void bc_unpack2(const boundary_conditions* bc,
double* a2, int i,
MPI_Request recvreq[2],
MPI_Request sendreq[2],
double* rbuf, int nin)
{
#ifdef PARALLEL
int ng2 = bc->ndouble * bc->size2[0] * bc->size2[1] * bc->size2[2];
// Store data from receive-buffer:
bool real = (bc->ndouble == 1);
double* rbuf0 = rbuf;
for (int d = 0; d < 2; d++)
if (bc->recvproc[i][d] >= 0)
{
if (bc->rjoin[i])
{
if (d == 0)
{
MPI_Wait(&recvreq[0], MPI_STATUS_IGNORE);
rbuf += bc->nrecv[i][1] * nin;
}
else
rbuf = rbuf0;
}
else
MPI_Wait(&recvreq[d], MPI_STATUS_IGNORE);
for (int m = 0; m < nin; m++)
if (real)
bmgs_paste(rbuf + m * bc->nrecv[i][d], bc->recvsize[i][d],
a2 + m * ng2, bc->size2, bc->recvstart[i][d]);
else
bmgs_pastez((const double_complex*)(rbuf +
m * bc->nrecv[i][d]),
bc->recvsize[i][d],
(double_complex*)(a2 + m * ng2),
bc->size2, bc->recvstart[i][d]);
rbuf += bc->nrecv[i][d] * nin;
}
// This does not work on the ibm with gcc! We do a blocking send instead.
for (int d = 0; d < 2; d++)
if (sendreq[d] != 0)
MPI_Wait(&sendreq[d], MPI_STATUS_IGNORE);
#endif // PARALLEL
}
gpaw-0.11.0.13004/c/extensions.h 0000664 0001750 0001750 00000002674 12553643466 016264 0 ustar jensj jensj 0000000 0000000 /* Copyright (C) 2003-2007 CAMP
* Copyright (C) 2007-2008 CAMd
* Copyright (C) 2005 CSC - IT Center for Science Ltd.
* Please see the accompanying LICENSE file for further information. */
#ifndef H_EXTENSIONS
#define H_EXTENSIONS
#include
#define PY_ARRAY_UNIQUE_SYMBOL GPAW_ARRAY_API
#define NO_IMPORT_ARRAY
#include
#include
/* If strict ANSI, then some useful macros are not defined */
#if defined(__STRICT_ANSI__) && !defined(__DARWIN_UNIX03)
# define M_PI 3.14159265358979323846 /* pi */
#endif
#ifndef DOUBLECOMPLEXDEFINED
# define DOUBLECOMPLEXDEFINED 1
# include
typedef double complex double_complex;
#endif
#if PY_MAJOR_VERSION == 2 && PY_MINOR_VERSION < 4
# define Py_RETURN_NONE return Py_INCREF(Py_None), Py_None
#endif
#define INLINE inline
static INLINE void* gpaw_malloc(size_t n)
{
void* p = malloc(n);
assert(p != NULL);
return p;
}
#ifdef GPAW_BGP
#define GPAW_MALLOC(T, n) (gpaw_malloc((n) * sizeof(T)))
#else
#ifdef GPAW_AIX
#define GPAW_MALLOC(T, n) (malloc((n) * sizeof(T)))
#else
#define GPAW_MALLOC(T, n) (gpaw_malloc((n) * sizeof(T)))
#endif
#endif
#define MIN(x, y) ((x) < (y) ? (x) : (y))
#define MAX(x, y) ((x) > (y) ? (x) : (y))
#define INTP(a) ((int*)PyArray_DATA(a))
#define LONGP(a) ((long*)PyArray_DATA(a))
#define DOUBLEP(a) ((double*)PyArray_DATA(a))
#define COMPLEXP(a) ((double_complex*)PyArray_DATA(a))
#endif //H_EXTENSIONS
gpaw-0.11.0.13004/c/mympi.h 0000664 0001750 0001750 00000000350 12553643466 015205 0 ustar jensj jensj 0000000 0000000 /* Copyright (C) 2003-2007 CAMP
* Please see the accompanying LICENSE file for further information. */
typedef struct
{
PyObject_HEAD
int size;
int rank;
MPI_Comm comm;
PyObject* parent;
int* members;
} MPIObject;
gpaw-0.11.0.13004/c/mlsqr.c 0000664 0001750 0001750 00000013033 12553643466 015205 0 ustar jensj jensj 0000000 0000000 /* Copyright (C) 2003-2007 CAMP
* Copyright (C) 2007-2009 CAMd
* Please see the accompanying LICENSE file for further information. */
#include
#define PY_ARRAY_UNIQUE_SYMBOL GPAW_ARRAY_API
#define NO_IMPORT_ARRAY
#include
#include "extensions.h"
#ifdef GPAW_NO_UNDERSCORE_LAPACK
# define dgels_ dgels
#endif
// Predefine dgels function of lapack
int dgels_(char* trans, int *m, int *n, int *nrhs, double* a, int *lda, double* b, int *ldb, double* work, int* lwork, int *info);
int safemod(int x, int m)
{
return (x%m + m)%m;
}
// Perform a moving linear least squares interpolation to arrays
// Input arguments:
// order: order of polynomial used (1 or 2)
// cutoff: the cutoff of weight (in grid points)
// coords: scaled coords [0,1] for interpolation
// N_c: number of grid points
// beg_c: first grid point
// data: the array used
// target: the results are stored in this array
PyObject* mlsqr(PyObject *self, PyObject *args)
{
// The order of interpolation
unsigned char order = -1;
// The cutoff for moving least squares
double cutoff = -1;
// The coordinates for interpolation: array of size (3, N)
PyArrayObject* coords = 0;
// Number of grid points
PyArrayObject* N_c = 0;
// Beginning of grid
PyArrayObject* beg_c = 0;
// The 3d-data to be interpolated: array of size (X, Y, Z)
PyArrayObject* data;
// The interpolation target: array of size (N,)
PyArrayObject* target = 0;
if (!PyArg_ParseTuple(args, "BdOOOOO", &order, &cutoff, &coords, &N_c, &beg_c, &data, &target))
{
return NULL;
}
int coeffs = -1;
if (order == 1)
{
coeffs = 4;
}
if (order == 2)
{
coeffs = 10;
// 1 x y z xy yz zx xx yy zz
}
if (order == 3)
{
// 1 x y z xy yz zx xx yy zz
// xxy xxz yyx yyz zzx zzy
// xxx yyy zzz zyz
coeffs = 20;
}
int points = PyArray_DIM(coords, 0);
double* coord_nc = DOUBLEP(coords);
double* grid_points = DOUBLEP(N_c);
double* grid_start = DOUBLEP(beg_c);
double* target_n = DOUBLEP(target);
double* data_g = DOUBLEP(data);
// TODO: Calculate fit
const int sizex = (int) ceil(cutoff);
const int sizey = (int) ceil(cutoff);
const int sizez = (int) ceil(cutoff);
// Allocate X-matrix and b-vector
int source_points = (2*sizex+1)*(2*sizey+1)*(2*sizez+1);
double* X = GPAW_MALLOC(double, coeffs*source_points);
double* b = GPAW_MALLOC(double, source_points);
double* work = GPAW_MALLOC(double, coeffs*source_points);
// The multipliers for each dimension
int ldx = PyArray_DIM(data, 1) * PyArray_DIM(data, 2);
int ldy = PyArray_DIM(data, 2);
int ldz = 1;
// For each point to be interpolated
for (int p=0; p< points; p++)
{
double x = (*coord_nc++)*grid_points[0] - grid_start[0];
double y = (*coord_nc++)*grid_points[1] - grid_start[1];
double z = (*coord_nc++)*grid_points[2] - grid_start[2];
// The grid center point
int cx2 = (int) round(x);
int cy2 = (int) round(y);
int cz2 = (int) round(z);
// Scaled to grid
int cx = safemod(cx2, PyArray_DIM(data, 0));
int cy = safemod(cy2, PyArray_DIM(data, 1));
int cz = safemod(cz2, PyArray_DIM(data, 2));
double* i_X = X;
double* i_b = b;
// For each point to take into account
for (int dx=-sizex;dx<=sizex;dx++)
for (int dy=-sizey;dy<=sizey;dy++)
for (int dz=-sizez;dz<=sizez;dz++)
{
// Coordinates centered on x,y,z
double sx = (cx2 + dx) - x;
double sy = (cy2 + dy) - y;
double sz = (cz2 + dz) - z;
// Normalized distance from center
double d = sqrt(sx*sx+sy*sy+sz*sz) / cutoff;
double w = 0.0;
if (d < 1)
{
w = (1-d)*(1-d);
w*=w;
w*=(4*d+1);
}
//double w = exp(-d*d);
*i_X++ = w*1.0;
*i_X++ = w*sx;
*i_X++ = w*sy;
*i_X++ = w*sz;
if (order > 1)
{
*i_X++ = w*sx*sy;
*i_X++ = w*sy*sz;
*i_X++ = w*sz*sx;
*i_X++ = w*sx*sx;
*i_X++ = w*sy*sy;
*i_X++ = w*sz*sz;
}
if (order > 2)
{
*i_X++ = w*sx*sy*sz; // xyz
*i_X++ = w*sx*sx*sx; // xxx
*i_X++ = w*sy*sy*sy; // yyy
*i_X++ = w*sz*sz*sz; // zzz
*i_X++ = w*sx*sx*sy; // xxy
*i_X++ = w*sx*sx*sz; // xxz
*i_X++ = w*sy*sy*sx; // yyx
*i_X++ = w*sy*sy*sz; // yyz
*i_X++ = w*sz*sz*sx; // zzx
*i_X++ = w*sz*sz*sy; // zzy
}
*i_b++ = w*data_g[ safemod(cx+dx, PyArray_DIM(data, 0)) * ldx +
safemod(cy+dy, PyArray_DIM(data, 1)) * ldy +
safemod(cz+dz, PyArray_DIM(data, 2)) * ldz ];
}
int info = 0;
int rhs = 1;
int worksize = coeffs*source_points;
int ldb = source_points;
dgels_("T",
&coeffs, // ...times 4.
&source_points, // lhs is of size sourcepoints...
&rhs, // one rhs.
X, // provide lhs
&coeffs, // Leading dimension of X
b, // provide rhs
&ldb, // Leading dimension of b
work, // work array (and output)
&worksize, // the size of work array
&info); // info
if (info != 0)
printf("WARNING: dgels returned %d!", info);
// Evaluate the polynomial
// Due to centered coordinates, it's just the constant term
double value = b[0];
*target_n++ = value;
//Nearest neighbour
//double value = data_g[ cx*data->dimensions[1]*data->dimensions[2] + cy*data->dimensions[2] + cz ];
//printf("%.5f" , value);
}
free(work);
free(b);
free(X);
Py_RETURN_NONE;
}
gpaw-0.11.0.13004/c/f2c.h 0000664 0001750 0001750 00000000441 12553643466 014525 0 ustar jensj jensj 0000000 0000000 /* Definitions needed by code transfered with f2c */
#include
#include
typedef int integer;
typedef double doublereal;
typedef struct { doublereal r, i; } doublecomplex;
#ifndef STATIC_NUMERIC
inline double pow_dd(double *x, double *y) {
return pow(*x,*y);
}
#endif
gpaw-0.11.0.13004/c/hdf5.c 0000664 0001750 0001750 00000042310 12553643466 014675 0 ustar jensj jensj 0000000 0000000 /*
* Copyright (C) 2010-2011 CSC - IT Center for Science Ltd.
* Please see the accompanying LICENSE file for further information. */
/* Light weight Python interface to HDF5 functions needed by GPAW
Generally, HDF5 object identifiers are integers and they are
passed as such between Python and C. All other data to
HDF5 functions is passed in NumPy arrays. At the moment no checks are
made for ensuring that data actually is NumPy array.
The Python interface functions return either
object identifier as int
None
with the exceptions
tuple from h5s_get_shape
string from h5l_get_name_by_idx
*/
#ifdef GPAW_WITH_HDF5
#include
#define PY_ARRAY_UNIQUE_SYMBOL GPAW_ARRAY_API
#define NO_IMPORT_ARRAY
#include
#include
#include "extensions.h"
#ifdef PARALLEL
#include
#include "mympi.h"
#endif
// File functions
PyObject* h5f_open(PyObject *self, PyObject *args)
{
int pid = H5P_DEFAULT;
const char* name;
const char mode = 'r';
if (!PyArg_ParseTuple(args, "s|ci", &name, &mode, &pid))
return NULL;
unsigned flag;
if (mode == 'r')
flag = H5F_ACC_RDONLY;
else
flag = H5F_ACC_RDWR;
int fid = H5Fopen(name, flag, pid);
return Py_BuildValue("i", fid);
}
PyObject* h5f_create(PyObject *self, PyObject *args)
{
int pid = H5P_DEFAULT; // Property list id
const char* name;
if (!PyArg_ParseTuple(args, "s|i", &name, &pid))
return NULL;
unsigned flag = H5F_ACC_TRUNC; // Always truncate the file
int fid = H5Fcreate(name, flag, H5P_DEFAULT, pid);
return Py_BuildValue("i", fid);
}
PyObject* h5f_close(PyObject *self, PyObject *args)
{
int fid;
if (!PyArg_ParseTuple(args, "i", &fid))
return NULL;
H5Fclose(fid);
Py_RETURN_NONE;
}
// Group functions
PyObject* h5g_open(PyObject *self, PyObject *args)
{
int loc_id;
const char* name;
if (!PyArg_ParseTuple(args, "is", &loc_id, &name))
return NULL;
int gid = H5Gopen(loc_id, name, H5P_DEFAULT);
return Py_BuildValue("i", gid);
}
PyObject* h5g_create(PyObject *self, PyObject *args)
{
int loc_id;
const char* name;
if (!PyArg_ParseTuple(args, "is", &loc_id, &name))
return NULL;
int gid = H5Gcreate2(loc_id, name, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
return Py_BuildValue("i", gid);
}
PyObject* h5g_get_num_objs(PyObject *self, PyObject *args)
{
int id;
if (!PyArg_ParseTuple(args, "i", &id))
return NULL;
H5G_info_t group_info;
H5Gget_info(id, &group_info);
int nobjs = group_info.nlinks;
return Py_BuildValue("i", nobjs);
}
PyObject* h5g_close(PyObject *self, PyObject *args)
{
int id;
if (!PyArg_ParseTuple(args, "i", &id))
return NULL;
H5Gclose(id);
Py_RETURN_NONE;
}
// Attribute functions
PyObject* h5a_open(PyObject *self, PyObject *args)
{
int loc_id;
const char* name;
if (!PyArg_ParseTuple(args, "is", &loc_id, &name))
return NULL;
// Check first for the existence
if (H5Aexists(loc_id, name) == 0 )
return PyErr_Format(PyExc_KeyError, "HDF5 Attribute %s does not exist",
name);
int aid = H5Aopen(loc_id, name, H5P_DEFAULT);
return Py_BuildValue("i", aid);
}
PyObject* h5a_create(PyObject *self, PyObject *args)
{
int loc_id;
int datatype;
int dataspace;
const char* name;
if (!PyArg_ParseTuple(args, "isii", &loc_id, &name, &datatype, &dataspace))
return NULL;
hid_t aid = H5Acreate2(loc_id, name, datatype, dataspace,
H5P_DEFAULT, H5P_DEFAULT);
return Py_BuildValue("i", aid);
}
PyObject* h5a_write(PyObject *self, PyObject *args)
{
int aid;
int datatype;
PyArrayObject* data;
if (!PyArg_ParseTuple(args, "iiO", &aid, &datatype, &data))
return NULL;
char* buf = PyArray_DATA(data);
H5Awrite(aid, datatype, buf);
Py_RETURN_NONE;
}
PyObject* h5a_read(PyObject *self, PyObject *args)
{
int aid;
int datatype;
PyArrayObject* data;
if (!PyArg_ParseTuple(args, "iiO", &aid, &datatype, &data))
return NULL;
char* buf = PyArray_DATA(data);
H5Aread(aid, datatype, buf);
Py_RETURN_NONE;
}
PyObject* h5a_get_space(PyObject *self, PyObject *args)
{
// Returns the dataspace as tuple
int aid;
if (!PyArg_ParseTuple(args, "i", &aid))
return NULL;
hid_t dataspace = H5Aget_space(aid);
return Py_BuildValue("i", dataspace);
}
PyObject* h5a_get_type(PyObject *self, PyObject *args)
{
// Returns the dataspace as tuple
int aid;
if (!PyArg_ParseTuple(args, "i", &aid))
return NULL;
hid_t filetype = H5Aget_type(aid);
hid_t datatype = H5Tget_native_type(filetype, H5T_DIR_ASCEND);
H5Tclose(filetype);
return Py_BuildValue("i", datatype);
}
PyObject* h5a_exists_by_name(PyObject *self, PyObject *args)
{
int id;
const char* name;
if (!PyArg_ParseTuple(args, "is", &id, &name))
return NULL;
int exists = H5Aexists_by_name(id, ".", name, H5P_DEFAULT);
return Py_BuildValue("b", exists);
}
PyObject* h5a_close(PyObject *self, PyObject *args)
{
int id;
if (!PyArg_ParseTuple(args, "i", &id))
return NULL;
H5Aclose(id);
Py_RETURN_NONE;
}
// Datatype functions
PyObject* h5t_get_class(PyObject *self, PyObject *args)
{
int tid;
if (!PyArg_ParseTuple(args, "i", &tid))
return NULL;
hid_t class = H5Tget_class(tid);
return Py_BuildValue("i", class);
}
PyObject* h5t_get_size(PyObject *self, PyObject *args)
{
int tid;
if (!PyArg_ParseTuple(args, "i", &tid))
return NULL;
hid_t size = H5Tget_size(tid);
return Py_BuildValue("i", size);
}
PyObject* h5_type_from_numpy(PyObject *self, PyObject *args)
{
PyArrayObject *array;
if (!PyArg_ParseTuple(args, "O", &array))
return NULL;
int type = PyArray_TYPE(array);
hid_t datatype;
if (type == NPY_STRING ) {
datatype = H5Tcopy(H5T_C_S1);
H5Tset_size(datatype, PyArray_ITEMSIZE(array));
} else if (type == NPY_DOUBLE) {
datatype = H5Tcopy(H5T_NATIVE_DOUBLE);
} else if (type == NPY_LONG) {
datatype = H5Tcopy(H5T_NATIVE_LONG);
} else if (type == NPY_INT) {
datatype = H5Tcopy(H5T_NATIVE_INT);
} else if (type == NPY_BOOL) {
datatype = H5Tenum_create(H5T_NATIVE_INT8);
int value;
value = 0;
// Convert the int value to int8 with HDF5
H5Tconvert(H5T_NATIVE_INT, H5T_NATIVE_INT8, 1, &value, NULL, H5P_DEFAULT);
H5Tenum_insert(datatype, "FALSE", &value);
value = 1;
H5Tconvert(H5T_NATIVE_INT, H5T_NATIVE_INT8, 1, &value, NULL, H5P_DEFAULT);
H5Tenum_insert(datatype, "TRUE", &value);
} else if (type == NPY_CDOUBLE) {
datatype = H5Tcreate(H5T_COMPOUND, sizeof(double complex));
H5Tinsert(datatype, "re", 0, H5T_NATIVE_DOUBLE);
H5Tinsert(datatype, "im", sizeof(double), H5T_NATIVE_DOUBLE);
} else {
return PyErr_Format(PyExc_RuntimeError, "Unsupportted datatype");
}
return Py_BuildValue("i", datatype);
}
PyObject* h5t_close(PyObject *self, PyObject *args)
{
int id;
if (!PyArg_ParseTuple(args, "i", &id))
return NULL;
H5Tclose(id);
Py_RETURN_NONE;
}
// Dataspace functions
PyObject* h5s_create(PyObject *self, PyObject *args)
{
PyArrayObject *shape;
if (!PyArg_ParseTuple(args, "O", &shape))
return NULL;
int rank = PyArray_DIM(shape, 0);
long* dims_i = PyArray_DATA(shape);
// hsize_t may be larger than long so we need to copy
hsize_t* dims = (hsize_t *) malloc(rank * sizeof(hsize_t));
for (int i=0; i < rank; i++)
dims[i] = dims_i[i];
int sid = H5Screate_simple(rank, dims, NULL);
free(dims);
return Py_BuildValue("i", sid);
}
PyObject* h5s_select_hyperslab(PyObject *self, PyObject *args)
{
int dataspace;
PyArrayObject* np_offset;
PyArrayObject* np_stride;
PyArrayObject* np_count;
PyArrayObject* np_block;
if (!PyArg_ParseTuple(args, "iOOOO", &dataspace, &np_offset, &np_stride,
&np_count, &np_block))
return NULL;
// None can be passed to indicate use of default values e.g. NULL for
// stride and block
long* temp = PyArray_DATA(np_offset);
int rank = PyArray_DIMS(np_offset)[0];
hsize_t* offset = (hsize_t *) malloc(rank * sizeof(hsize_t));
for (int i=0; i < rank; i++)
offset[i] = temp[i];
hsize_t* stride = NULL;
if ((PyObject *)np_stride != Py_None)
{
temp = PyArray_DATA(np_stride);
stride = (hsize_t *) malloc(rank * sizeof(hsize_t));
for (int i=0; i < rank; i++)
stride[i] = temp[i];
}
temp = PyArray_DATA(np_count);
hsize_t* count = (hsize_t *) malloc(rank * sizeof(hsize_t));
for (int i=0; i < rank; i++)
count[i] = temp[i];
hsize_t* block = NULL;
if ((PyObject *)np_block != Py_None)
return PyErr_Format(PyExc_NotImplementedError, "Block parameter");
H5Sselect_hyperslab(dataspace, H5S_SELECT_SET, offset, stride, count, block);
free(offset);
if (stride != NULL)
free(stride);
free(count);
Py_RETURN_NONE;
}
PyObject* h5s_select_none(PyObject *self, PyObject *args)
{
int dataspace;
if (!PyArg_ParseTuple(args, "i", &dataspace))
return NULL;
H5Sselect_none(dataspace);
Py_RETURN_NONE;
}
PyObject* h5s_get_shape(PyObject *self, PyObject *args)
{
// Returns the dataspace as tuple
int dataspace;
if (!PyArg_ParseTuple(args, "i", &dataspace))
return NULL;
int rank = H5Sget_simple_extent_ndims(dataspace);
hsize_t* dims = (hsize_t *) malloc(rank * sizeof(hsize_t));
rank = H5Sget_simple_extent_dims(dataspace, dims, NULL);
PyObject* shape = PyTuple_New(rank);
int i;
for (i=0; i < rank; i++)
{
PyTuple_SetItem(shape, i, PyInt_FromLong(dims[i]));
}
free(dims);
return shape;
}
PyObject* h5s_close(PyObject *self, PyObject *args)
{
int id;
if (!PyArg_ParseTuple(args, "i", &id))
return NULL;
H5Sclose(id);
Py_RETURN_NONE;
}
// Dataset functions
PyObject* h5d_open(PyObject *self, PyObject *args)
{
int loc_id;
const char* name;
if (!PyArg_ParseTuple(args, "is", &loc_id, &name))
return NULL;
int did = H5Dopen2(loc_id, name, H5P_DEFAULT);
return Py_BuildValue("i", did);
}
PyObject* h5d_create(PyObject *self, PyObject *args)
{
int loc_id;
const char* name;
int dtype_id;
int space_id;
if (!PyArg_ParseTuple(args, "isii", &loc_id, &name, &dtype_id, &space_id))
return NULL;
int did = H5Dcreate2(loc_id, name, dtype_id, space_id,
H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
return Py_BuildValue("i", did);
}
PyObject* h5d_write(PyObject *self, PyObject *args)
{
int did;
int memtype;
int memspace;
int filespace;
int pid = H5P_DEFAULT;
PyArrayObject* data;
if (!PyArg_ParseTuple(args, "iiiiO|i", &did, &memtype, &memspace,
&filespace, &data, &pid))
return NULL;
char* buf = PyArray_DATA(data);
H5Dwrite(did, memtype, memspace, filespace, pid, buf);
Py_RETURN_NONE;
}
PyObject* h5d_read(PyObject *self, PyObject *args)
{
int did;
int memtype;
int memspace;
int filespace;
int pid = H5P_DEFAULT;
PyArrayObject* data;
if (!PyArg_ParseTuple(args, "iiiiO|i", &did, &memtype, &memspace,
&filespace, &data, &pid))
return NULL;
char* buf = PyArray_DATA(data);
H5Dread(did, memtype, memspace, filespace, pid, buf);
Py_RETURN_NONE;
}
PyObject* h5d_get_space(PyObject *self, PyObject *args)
{
int did;
if (!PyArg_ParseTuple(args, "i", &did))
return NULL;
hid_t dataspace = H5Dget_space(did);
return Py_BuildValue("i", dataspace);
}
PyObject* h5d_get_type(PyObject *self, PyObject *args)
{
int did;
if (!PyArg_ParseTuple(args, "i", &did))
return NULL;
hid_t filetype = H5Dget_type(did);
hid_t datatype = H5Tget_native_type(filetype, H5T_DIR_ASCEND);
H5Tclose(filetype);
return Py_BuildValue("i", datatype);
}
PyObject* h5d_close(PyObject *self, PyObject *args)
{
int id;
if (!PyArg_ParseTuple(args, "i", &id))
return NULL;
H5Dclose(id);
Py_RETURN_NONE;
}
// Property list related functions
PyObject* h5p_create(PyObject *self, PyObject *args)
{
int cls_id;
if (!PyArg_ParseTuple(args, "i", &cls_id))
return NULL;
int pid = H5Pcreate(cls_id);
return Py_BuildValue("i", pid);
}
#ifdef PARALLEL
PyObject* h5p_set_fapl_mpio(PyObject *self, PyObject *args)
{
PyObject *comm_obj;
int plist_id;
if (!PyArg_ParseTuple(args, "iO", &plist_id, &comm_obj))
return NULL;
MPI_Comm comm = MPI_COMM_NULL;
MPI_Info info = MPI_INFO_NULL;
if (comm_obj != Py_None)
{
comm = ((MPIObject*)comm_obj)->comm;
// The following was needed at some point due to bug in Cray MPI
/* int nprocs;
MPI_Comm_size(comm, &nprocs);
char tmp[20];
MPI_Info_create(&info);
sprintf(tmp,"%d", nprocs);
MPI_Info_set(info,"cb_nodes",tmp); */
#ifdef __bgp__
// Wavefunction write requires large amounts of memory.
// Appears to be a deficiency in the ROMIO driver.
// bgl_nodes_pset controls the number of aggregator
// tasks per pset. The default value is 8.
// In many cases, it will also be necessary to set
// DCMF_ALLTOALL_PREMALLOC = N
char tmp[20];
int info_param = 32;
MPI_Info_create(&info);
sprintf(tmp,"%d", info_param);
MPI_Info_set(info,"bgl_nodes_pset",tmp);
#endif
}
H5Pset_fapl_mpio(plist_id, comm, info);
Py_RETURN_NONE;
}
PyObject* h5p_set_dxpl_mpio(PyObject *self, PyObject *args)
{
int plist_id;
if (!PyArg_ParseTuple(args, "i", &plist_id))
return NULL;
H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE);
Py_RETURN_NONE;
}
#endif
PyObject* h5p_close(PyObject *self, PyObject *args)
{
int id;
if (!PyArg_ParseTuple(args, "i", &id))
return NULL;
H5Pclose(id);
Py_RETURN_NONE;
}
// Info functions
PyObject* h5i_get_type(PyObject *self, PyObject *args)
{
int id;
if (!PyArg_ParseTuple(args, "i", &id))
return NULL;
int type = H5Iget_type(id);
return Py_BuildValue("i", type);
}
// Object functions
PyObject* h5o_open(PyObject *self, PyObject *args)
{
int loc_id;
const char* name;
if (!PyArg_ParseTuple(args, "is", &loc_id, &name))
return NULL;
int oid = H5Oopen(loc_id, name, H5P_DEFAULT);
return Py_BuildValue("i", oid);
}
PyObject* h5o_close(PyObject *self, PyObject *args)
{
int oid;
if (!PyArg_ParseTuple(args, "i", &oid))
return NULL;
H5Oclose(oid);
Py_RETURN_NONE;
}
// List functions
PyObject* h5l_get_name_by_idx(PyObject *self, PyObject *args)
{
int id;
int idx;
if (!PyArg_ParseTuple(args, "ii", &id, &idx))
return NULL;
ssize_t size;
char *name;
// Get size of the name, add 1 for NULL terminator
size = 1 + H5Lget_name_by_idx(id, ".", H5_INDEX_NAME, H5_ITER_INC,
idx, NULL, 0, H5P_DEFAULT);
name = (char *) malloc(size);
H5Lget_name_by_idx(id, ".", H5_INDEX_NAME, H5_ITER_INC, idx, name,
(size_t) size, H5P_DEFAULT);
PyObject* retval = Py_BuildValue("s", name);
free(name);
return retval;
}
static PyMethodDef functions[] = {
{"h5f_open", h5f_open, METH_VARARGS, 0},
{"h5f_create", h5f_create, METH_VARARGS, 0},
{"h5f_close", h5f_close, METH_VARARGS, 0},
{"h5g_open", h5g_open, METH_VARARGS, 0},
{"h5g_get_num_objs", h5g_get_num_objs, METH_VARARGS, 0},
{"h5g_create", h5g_create, METH_VARARGS, 0},
{"h5g_close", h5g_close, METH_VARARGS, 0},
{"h5a_open", h5a_open, METH_VARARGS, 0},
{"h5a_create", h5a_create, METH_VARARGS, 0},
{"h5a_write", h5a_write, METH_VARARGS, 0},
{"h5a_read", h5a_read, METH_VARARGS, 0},
{"h5a_get_space", h5a_get_space, METH_VARARGS, 0},
{"h5a_get_type", h5a_get_type, METH_VARARGS, 0},
{"h5a_exists_by_name", h5a_exists_by_name, METH_VARARGS, 0},
{"h5a_close", h5a_close, METH_VARARGS, 0},
{"h5_type_from_numpy", h5_type_from_numpy, METH_VARARGS, 0},
{"h5t_get_class", h5t_get_class, METH_VARARGS, 0},
{"h5t_get_size", h5t_get_size, METH_VARARGS, 0},
{"h5t_close", h5t_close, METH_VARARGS, 0},
{"h5s_create", h5s_create, METH_VARARGS, 0},
{"h5s_select_hyperslab", h5s_select_hyperslab, METH_VARARGS, 0},
{"h5s_select_none", h5s_select_none, METH_VARARGS, 0},
{"h5s_get_shape", h5s_get_shape, METH_VARARGS, 0},
{"h5s_close", h5s_close, METH_VARARGS, 0},
{"h5d_open", h5d_open, METH_VARARGS, 0},
{"h5d_create", h5d_create, METH_VARARGS, 0},
{"h5d_write", h5d_write, METH_VARARGS, 0},
{"h5d_read", h5d_read, METH_VARARGS, 0},
{"h5d_get_space", h5d_get_space, METH_VARARGS, 0},
{"h5d_get_type", h5d_get_type, METH_VARARGS, 0},
{"h5d_close", h5d_close, METH_VARARGS, 0},
{"h5p_create", h5p_create, METH_VARARGS, 0},
#ifdef PARALLEL
{"h5p_set_fapl_mpio", h5p_set_fapl_mpio, METH_VARARGS, 0},
{"h5p_set_dxpl_mpio", h5p_set_dxpl_mpio, METH_VARARGS, 0},
#endif
{"h5p_close", h5p_close, METH_VARARGS, 0},
{"h5o_open", h5o_open, METH_VARARGS, 0},
{"h5o_close", h5o_close, METH_VARARGS, 0},
{"h5i_get_type", h5i_get_type, METH_VARARGS, 0},
{"h5l_get_name_by_idx", h5l_get_name_by_idx, METH_VARARGS, 0},
{0, 0, 0, 0}
};
PyMODINIT_FUNC init_gpaw_hdf5(void)
{
PyObject *m = Py_InitModule("_gpaw_hdf5",functions);
// Set some hdf5 constants as attributes
PyModule_AddIntConstant(m, "H5T_FLOAT", H5T_FLOAT);
PyModule_AddIntConstant(m, "H5T_INTEGER", H5T_INTEGER);
PyModule_AddIntConstant(m, "H5T_COMPOUND", H5T_COMPOUND);
PyModule_AddIntConstant(m, "H5T_STRING", H5T_STRING);
PyModule_AddIntConstant(m, "H5T_ENUM", H5T_ENUM);
PyModule_AddIntConstant(m, "H5P_DATASET_XFER", H5P_DATASET_XFER);
PyModule_AddIntConstant(m, "H5P_FILE_ACCESS", H5P_FILE_ACCESS);
PyModule_AddIntConstant(m, "H5P_DEFAULT", H5P_DEFAULT);
PyModule_AddIntConstant(m, "H5I_GROUP", H5I_GROUP);
PyModule_AddIntConstant(m, "H5I_DATASET", H5I_DATASET);
}
#endif
gpaw-0.11.0.13004/c/lfc2.c 0000664 0001750 0001750 00000031435 12553643466 014703 0 ustar jensj jensj 0000000 0000000 /* Copyright (C) 2010 CAMd
* Please see the accompanying LICENSE file for further information. */
#include "extensions.h"
#include "spline.h"
#include "lfc.h"
#include "bmgs/spherical_harmonics.h"
PyObject* second_derivative(LFCObject *lfc, PyObject *args)
{
PyArrayObject* a_G_obj;
PyArrayObject* c_Mvv_obj;
PyArrayObject* h_cv_obj;
PyArrayObject* n_c_obj;
PyObject* spline_M_obj;
PyArrayObject* beg_c_obj;
PyArrayObject* pos_Wc_obj;
int q;
if (!PyArg_ParseTuple(args, "OOOOOOOi", &a_G_obj, &c_Mvv_obj,
&h_cv_obj, &n_c_obj,
&spline_M_obj, &beg_c_obj,
&pos_Wc_obj, &q))
return NULL;
// Copied from derivative member function
int nd = PyArray_NDIM(a_G_obj);
npy_intp* dims = PyArray_DIMS(a_G_obj);
int nx = PyArray_MultiplyList(dims, nd - 3);
int nG = PyArray_MultiplyList(dims + nd - 3, 3);
int nM = PyArray_DIM(c_Mvv_obj, PyArray_NDIM(c_Mvv_obj) - 2);
// These were already present
const double* h_cv = (const double*)PyArray_DATA(h_cv_obj);
const long* n_c = (const long*)PyArray_DATA(n_c_obj);
const double (*pos_Wc)[3] = (const double (*)[3])PyArray_DATA(pos_Wc_obj);
long* beg_c = LONGP(beg_c_obj);
///////////////////////////////////////////////
const double Y00dv = lfc->dv / sqrt(4.0 * M_PI);
if (!lfc->bloch_boundary_conditions) {
const double* a_G = (const double*)PyArray_DATA(a_G_obj);
double* c_Mvv = (double*)PyArray_DATA(c_Mvv_obj);
// Loop over number of x-dimension in a_xG (not relevant yet)
for (int x = 0; x < nx; x++) {
// JJs old stuff
GRID_LOOP_START(lfc, -1) {
// In one grid loop iteration, only i2 changes.
int i2 = Ga % n_c[2] + beg_c[2];
int i1 = (Ga / n_c[2]) % n_c[1] + beg_c[1];
int i0 = Ga / (n_c[2] * n_c[1]) + beg_c[0];
double xG = h_cv[0] * i0 + h_cv[3] * i1 + h_cv[6] * i2;
double yG = h_cv[1] * i0 + h_cv[4] * i1 + h_cv[7] * i2;
double zG = h_cv[2] * i0 + h_cv[5] * i1 + h_cv[8] * i2;
for (int G = Ga; G < Gb; G++) {
for (int i = 0; i < ni; i++) {
LFVolume* vol = volume_i + i;
int M = vol->M;
double* c_mvv = c_Mvv + 9 * M;
const bmgsspline* spline = (const bmgsspline*) \
&((const SplineObject*)PyList_GetItem(spline_M_obj, M))->spline;
double x = xG - pos_Wc[vol->W][0];
double y = yG - pos_Wc[vol->W][1];
double z = zG - pos_Wc[vol->W][2];
double r2 = x * x + y * y + z * z;
double r = sqrt(r2);
int bin = r / spline->dr;
assert(bin <= spline->nbins);
double* s = spline->data + 4 * bin;
double u = r - bin * spline->dr;
double dfdror;
if (bin == 0)
dfdror = 2.0 * s[2] + 3.0 * s[3] * r;
else
dfdror = (s[1] + u * (2.0 * s[2] + u * 3.0 * s[3])) / r;
double a = a_G[G] * Y00dv;
dfdror *= a;
c_mvv[0] += dfdror;
c_mvv[4] += dfdror;
c_mvv[8] += dfdror;
if (r > 1e-15) {
double b = ((2.0 * s[2] + 6.0 * s[3] * u) * a - dfdror) / r2;
c_mvv[0] += b * x * x;
c_mvv[1] += b * x * y;
c_mvv[2] += b * x * z;
c_mvv[3] += b * y * x;
c_mvv[4] += b * y * y;
c_mvv[5] += b * y * z;
c_mvv[6] += b * z * x;
c_mvv[7] += b * z * y;
c_mvv[8] += b * z * z;
}
}
xG += h_cv[6];
yG += h_cv[7];
zG += h_cv[8];
}
}
GRID_LOOP_STOP(lfc, -1);
c_Mvv += 9 * nM;
a_G += nG;
}
}
else {
const complex double* a_G = (const complex double*)PyArray_DATA(a_G_obj);
complex double* c_Mvv = (complex double*)PyArray_DATA(c_Mvv_obj);
for (int x = 0; x < nx; x++) {
GRID_LOOP_START(lfc, q) {
// In one grid loop iteration, only i2 changes.
int i2 = Ga % n_c[2] + beg_c[2];
int i1 = (Ga / n_c[2]) % n_c[1] + beg_c[1];
int i0 = Ga / (n_c[2] * n_c[1]) + beg_c[0];
double xG = h_cv[0] * i0 + h_cv[3] * i1 + h_cv[6] * i2;
double yG = h_cv[1] * i0 + h_cv[4] * i1 + h_cv[7] * i2;
double zG = h_cv[2] * i0 + h_cv[5] * i1 + h_cv[8] * i2;
for (int G = Ga; G < Gb; G++) {
for (int i = 0; i < ni; i++) {
LFVolume* vol = volume_i + i;
int M = vol->M;
complex double* c_mvv = c_Mvv + 9 * M;
const bmgsspline* spline = (const bmgsspline*) \
&((const SplineObject*)PyList_GetItem(spline_M_obj, M))->spline;
double x = xG - pos_Wc[vol->W][0];
double y = yG - pos_Wc[vol->W][1];
double z = zG - pos_Wc[vol->W][2];
double r2 = x * x + y * y + z * z;
double r = sqrt(r2);
double dfdror;
// use bmgs_get_value_and_derivative instead ??!!
int bin = r / spline->dr;
assert(bin <= spline->nbins);
double u = r - bin * spline->dr;
double* s = spline->data + 4 * bin;
if (bin == 0)
dfdror = 2.0 * s[2] + 3.0 * s[3] * r;
else
dfdror = (s[1] + u * (2.0 * s[2] + u * 3.0 * s[3])) / r;
// phase added here
complex double a = a_G[G] * phase_i[i] * Y00dv;
// dfdror *= a;
c_mvv[0] += a * dfdror;
c_mvv[4] += a * dfdror;
c_mvv[8] += a * dfdror;
if (r > 1e-15) {
double b = (2.0 * s[2] + 6.0 * s[3] * u - dfdror) / r2;
c_mvv[0] += a * b * x * x;
c_mvv[1] += a * b * x * y;
c_mvv[2] += a * b * x * z;
c_mvv[3] += a * b * y * x;
c_mvv[4] += a * b * y * y;
c_mvv[5] += a * b * y * z;
c_mvv[6] += a * b * z * x;
c_mvv[7] += a * b * z * y;
c_mvv[8] += a * b * z * z;
}
}
xG += h_cv[6];
yG += h_cv[7];
zG += h_cv[8];
}
}
GRID_LOOP_STOP(lfc, q);
c_Mvv += 9 * nM;
a_G += nG;
}
}
Py_RETURN_NONE;
}
PyObject* add_derivative(LFCObject *lfc, PyObject *args)
{
// Coefficients for the lfc's
PyArrayObject* c_xM_obj;
// Array
PyArrayObject* a_xG_obj;
PyArrayObject* h_cv_obj;
PyArrayObject* n_c_obj;
PyObject* spline_M_obj;
PyArrayObject* beg_c_obj;
PyArrayObject* pos_Wc_obj;
// Atom index
int a;
// Cartesian coordinate
int v;
// k-point index
int q;
if (!PyArg_ParseTuple(args, "OOOOOOOiii", &c_xM_obj, &a_xG_obj,
&h_cv_obj, &n_c_obj, &spline_M_obj, &beg_c_obj,
&pos_Wc_obj, &a, &v, &q))
return NULL;
// Number of dimensions
int nd = PyArray_NDIM(a_xG_obj);
// Array with lengths of array dimensions
npy_intp* dims = PyArray_DIMS(a_xG_obj);
// Number of extra dimensions
int nx = PyArray_MultiplyList(dims, nd - 3);
// Number of grid points
int nG = PyArray_MultiplyList(dims + nd - 3, 3);
// Number of lfc's
int nM = PyArray_DIM(c_xM_obj, PyArray_NDIM(c_xM_obj) - 1);
const double* h_cv = (const double*)PyArray_DATA(h_cv_obj);
const long* n_c = (const long*)PyArray_DATA(n_c_obj);
const double (*pos_Wc)[3] = (const double (*)[3])PyArray_DATA(pos_Wc_obj);
long* beg_c = LONGP(beg_c_obj);
if (!lfc->bloch_boundary_conditions) {
const double* c_M = (const double*)PyArray_DATA(c_xM_obj);
double* a_G = (double*)PyArray_DATA(a_xG_obj);
for (int x = 0; x < nx; x++) {
GRID_LOOP_START(lfc, -1) {
// In one grid loop iteration, only i2 changes.
int i2 = Ga % n_c[2] + beg_c[2];
int i1 = (Ga / n_c[2]) % n_c[1] + beg_c[1];
int i0 = Ga / (n_c[2] * n_c[1]) + beg_c[0];
// Grid point position
double xG = h_cv[0] * i0 + h_cv[3] * i1 + h_cv[6] * i2;
double yG = h_cv[1] * i0 + h_cv[4] * i1 + h_cv[7] * i2;
double zG = h_cv[2] * i0 + h_cv[5] * i1 + h_cv[8] * i2;
// Loop over grid points in current stride
for (int G = Ga; G < Gb; G++) {
// Loop over volumes at current grid point
for (int i = 0; i < ni; i++) {
LFVolume* vol = volume_i + i;
int M = vol->M;
// Check that the volume belongs to the atom in consideration later
int W = vol->W;
int nm = vol->nm;
int l = (nm - 1) / 2;
const bmgsspline* spline = (const bmgsspline*) \
&((const SplineObject*)PyList_GetItem(spline_M_obj, M))->spline;
double x = xG - pos_Wc[W][0];
double y = yG - pos_Wc[W][1];
double z = zG - pos_Wc[W][2];
double R_c[] = {x, y, z};
double r2 = x * x + y * y + z * z;
double r = sqrt(r2);
double f;
double dfdr;
bmgs_get_value_and_derivative(spline, r, &f, &dfdr);
// First contribution: f * d(r^l * Y)/dv
double fdrlYdx_m[nm];
if (v == 0)
spherical_harmonics_derivative_x(l, f, x, y, z, r2, fdrlYdx_m);
else if (v == 1)
spherical_harmonics_derivative_y(l, f, x, y, z, r2, fdrlYdx_m);
else
spherical_harmonics_derivative_z(l, f, x, y, z, r2, fdrlYdx_m);
for (int m = 0; m < nm; m++)
a_G[G] += fdrlYdx_m[m] * c_M[M + m];
// Second contribution: r^(l-1) * Y * df/dr * R_v
if (r > 1e-15) {
double rlm1Ydfdr_m[nm]; // r^(l-1) * Y * df/dr
double rm1dfdr = 1. / r * dfdr;
spherical_harmonics(l, rm1dfdr, x, y, z, r2, rlm1Ydfdr_m);
for (int m = 0; m < nm; m++)
a_G[G] += rlm1Ydfdr_m[m] * R_c[v] * c_M[M + m];
}
}
// Update coordinates of current grid point
xG += h_cv[6];
yG += h_cv[7];
zG += h_cv[8];
}
}
GRID_LOOP_STOP(lfc, -1);
c_M += nM;
a_G += nG;
}
}
else {
const double complex* c_M = (const double complex*)PyArray_DATA(c_xM_obj);
double complex* a_G = (double complex*)PyArray_DATA(a_xG_obj);
for (int x = 0; x < nx; x++) {
GRID_LOOP_START(lfc, q) {
// In one grid loop iteration, only i2 changes.
int i2 = Ga % n_c[2] + beg_c[2];
int i1 = (Ga / n_c[2]) % n_c[1] + beg_c[1];
int i0 = Ga / (n_c[2] * n_c[1]) + beg_c[0];
// Grid point position
double xG = h_cv[0] * i0 + h_cv[3] * i1 + h_cv[6] * i2;
double yG = h_cv[1] * i0 + h_cv[4] * i1 + h_cv[7] * i2;
double zG = h_cv[2] * i0 + h_cv[5] * i1 + h_cv[8] * i2;
// Loop over grid points in current stride
for (int G = Ga; G < Gb; G++) {
// Loop over volumes at current grid point
for (int i = 0; i < ni; i++) {
// Phase of volume
double complex conjphase = conj(phase_i[i]);
LFVolume* vol = volume_i + i;
int M = vol->M;
// Check that the volume belongs to the atom in consideration later
int W = vol->W;
int nm = vol->nm;
int l = (nm - 1) / 2;
const bmgsspline* spline = (const bmgsspline*) \
&((const SplineObject*)PyList_GetItem(spline_M_obj, M))->spline;
double x = xG - pos_Wc[W][0];
double y = yG - pos_Wc[W][1];
double z = zG - pos_Wc[W][2];
double R_c[] = {x, y, z};
double r2 = x * x + y * y + z * z;
double r = sqrt(r2);
double f;
double dfdr;
bmgs_get_value_and_derivative(spline, r, &f, &dfdr);
// First contribution: f * d(r^l * Y)/dv
double fdrlYdx_m[nm];
if (v == 0)
spherical_harmonics_derivative_x(l, f, x, y, z, r2, fdrlYdx_m);
else if (v == 1)
spherical_harmonics_derivative_y(l, f, x, y, z, r2, fdrlYdx_m);
else
spherical_harmonics_derivative_z(l, f, x, y, z, r2, fdrlYdx_m);
for (int m = 0; m < nm; m++)
a_G[G] += fdrlYdx_m[m] * c_M[M + m] * conjphase;
// Second contribution: r^(l-1) * Y * df/dr * R_v
if (r > 1e-15) {
double rlm1Ydfdr_m[nm]; // r^(l-1) * Y * df/dr
double rm1dfdr = 1. / r * dfdr;
spherical_harmonics(l, rm1dfdr, x, y, z, r2, rlm1Ydfdr_m);
for (int m = 0; m < nm; m++)
a_G[G] += rlm1Ydfdr_m[m] * R_c[v] * c_M[M + m] * conjphase;
}
}
// Update coordinates of current grid point
xG += h_cv[6];
yG += h_cv[7];
zG += h_cv[8];
}
}
GRID_LOOP_STOP(lfc, q);
c_M += nM;
a_G += nG;
}
}
Py_RETURN_NONE;
}
gpaw-0.11.0.13004/c/localized_functions.h 0000664 0001750 0001750 00000001470 12553643466 020114 0 ustar jensj jensj 0000000 0000000 /* Copyright (C) 2003-2007 CAMP
* Copyright (C) 2007-2008 CAMd
* Please see the accompanying LICENSE file for further information. */
#include
typedef struct
{
PyObject_HEAD
double dv; // volume per grid point
int size[3]; // dimensions of big box
int start[3]; // corner of small box
int size0[3]; // dimensions of small box
int ng; // number of grid points in big box
int ng0; // number of grid points in small box
int nf; // number of localized functions
int nfd; // number of derivatives: zero or 3*nf
// pointers to size0 arrays:
double* f; // localized functions
double* fd; // xyz-derivatives of localized functions
double* w; // work array for one double or double complex array
} LocalizedFunctionsObject;
gpaw-0.11.0.13004/c/plt.c 0000664 0001750 0001750 00000006014 12553643466 014647 0 ustar jensj jensj 0000000 0000000 /* Copyright (C) 2003-2007 CAMP
* Copyright (C) 2007-2008 CAMd
* Please see the accompanying LICENSE file for further information. */
#include
#define PY_ARRAY_UNIQUE_SYMBOL GPAW_ARRAY_API
#define NO_IMPORT_ARRAY
#include
#include "extensions.h"
int write_plt_file(char *fname,
int nx, int ny, int nz,
double x0, double y0, double z0,
double dx, double dy, double dz,
double *grid);
/* write grid to binary plt (gOpenMol) plot file */
PyObject* WritePLT(PyObject *self, PyObject *args)
{
char* fname; /* file name */
PyArrayObject* ho; /* grid spacings */
PyArrayObject* go; /* grid to write */
if (!PyArg_ParseTuple(args, "sOO", &fname, &ho, &go))
return NULL;
/* must be 3D */
if(PyArray_NDIM(go) != 3) return NULL;
double* g = DOUBLEP(go);
double* h = DOUBLEP(ho);
write_plt_file(fname,
PyArray_DIM(go, 0),
PyArray_DIM(go, 1),
PyArray_DIM(go, 2),
0.,0.,0.,
h[0],h[1],h[2],
g);
Py_RETURN_NONE;
}
/* -----------------------------------------------------------------
* write grid to binary plt (gOpenMol) plot file
*
* x0, dx etc are assumed to be atomic units
* the grid is assumed to be in the format:
* grid(ix,iy,iz) = grid[ ix + ( iy + iz*ny )*nx ];
* where ix=0..nx-1 etc
*/
/* stolen from pltfile.c */
#define FWRITE(value , size) { \
Items = fwrite(&value, size , 1L , Output_p);\
if(Items < 1) {\
printf("?ERROR - in writing contour file (*)\n");\
return(1);}}
int write_plt_file(char *fname,
int nx, int ny, int nz,
double x0, double y0, double z0,
double dx, double dy, double dz,
double *grid) {
FILE *Output_p;
static int Items;
float scale,zmin,zmax,ymin,ymax,xmin,xmax,val;
int rank,TypeOfSurface;
int ix,iy,iz,indx;
double norm,sum,dV;
Output_p = fopen(fname,"wb");
/* see http://www.csc.fi/gopenmol/developers/plt_format.phtml */
#define au_A 0.52917725
scale = au_A; /* atomic length in Angstroem */
rank=3; /* always 3 */
FWRITE(rank , sizeof(int));
TypeOfSurface=4; /* arbitrary */
FWRITE(TypeOfSurface , sizeof(int));
FWRITE(nz , sizeof(int));
FWRITE(ny , sizeof(int));
FWRITE(nx , sizeof(int));
zmin= scale * ((float) z0);
zmax= scale * ((float) z0+(nz-1)*dz);
/* float zmax=(float) z0+nz*dz; */
FWRITE(zmin , sizeof(float));
FWRITE(zmax , sizeof(float));
ymin= scale * ((float) y0);
ymax= scale * ((float) y0+(ny-1)*dy);
/* float ymax=(float) y0+ny*dy; */
FWRITE(ymin , sizeof(float));
FWRITE(ymax , sizeof(float));
xmin= scale * ((float) x0);
xmax= scale * ((float) x0+(nx-1)*dx);
/* float xmax=(float) x0+nx*dx; */
FWRITE(xmin , sizeof(float));
FWRITE(xmax , sizeof(float));
indx=0;
norm = 0;
sum=0;
dV=dx*dy*dz;
for(iz=0;iz %s written (sum=%g,norm=%g)\n",
fname,sum*dV,norm*dV);
return 0;
}
gpaw-0.11.0.13004/c/spline.c 0000664 0001750 0001750 00000007511 12553643466 015345 0 ustar jensj jensj 0000000 0000000 /* Copyright (C) 2003-2007 CAMP
* Copyright (C) 2007-2008 CAMd
* Please see the accompanying LICENSE file for further information. */
#include "spline.h"
static void spline_dealloc(SplineObject *xp)
{
bmgs_deletespline(&xp->spline);
PyObject_DEL(xp);
}
static PyObject * spline_get_cutoff(SplineObject *self, PyObject *args)
{
return Py_BuildValue("d", self->spline.dr * self->spline.nbins);
}
static PyObject * spline_get_angular_momentum_number(SplineObject *self,
PyObject *args)
{
return Py_BuildValue("i", self->spline.l);
}
static PyObject * spline_get_value_and_derivative(SplineObject *obj,
PyObject *args,
PyObject *kwargs)
{
double r;
if (!PyArg_ParseTuple(args, "d", &r))
return NULL;
double f;
double dfdr;
bmgs_get_value_and_derivative(&obj->spline, r, &f, &dfdr);
return Py_BuildValue("(dd)", f, dfdr);
}
// Convert boundary point z-ranges to grid indices for the 2*l+1 boxes
static PyObject * spline_get_indices_from_zranges(SplineObject *self,
PyObject *args)
{
PyArrayObject* beg_c_obj;
PyArrayObject* end_c_obj;
PyArrayObject* G_b_obj;
int nm = 2 * self->spline.l + 1;
if (!PyArg_ParseTuple(args, "OOO", &beg_c_obj, &end_c_obj, &G_b_obj))
return NULL;
long* beg_c = LONGP(beg_c_obj);
long* end_c = LONGP(end_c_obj);
int ngmax = ((end_c[0] - beg_c[0]) *
(end_c[1] - beg_c[1]) *
(end_c[2] - beg_c[2]));
int* G_B = INTP(G_b_obj);
int nB = PyArray_DIMS(G_b_obj)[0];
int ng = 0;
for (int b = 0; b < nB; b+=2)
ng += G_B[b+1]-G_B[b];
npy_intp gm_dims[2] = {ng, nm};
PyArrayObject* indices_gm_obj = (PyArrayObject*)PyArray_SimpleNew(2, gm_dims,
NPY_INT);
int* p = INTP(indices_gm_obj);
for (int b = 0; b < nB; b += 2) {
int Ga = G_B[b], Gb = G_B[b+1];
for (int G = Ga; G < Gb; G++)
for (int m = 0; m < nm; m++)
*p++ = m * ngmax + G;
}
// PyObjects created in the C code will be initialized with a refcount
// of 1, for which reason we'll have to decref them when done here
PyObject* values = Py_BuildValue("(Oii)", indices_gm_obj, ng, nm);
Py_DECREF(indices_gm_obj);
return values;
}
static PyMethodDef spline_methods[] = {
{"get_cutoff",
(PyCFunction)spline_get_cutoff, METH_VARARGS, 0},
{"get_angular_momentum_number",
(PyCFunction)spline_get_angular_momentum_number, METH_VARARGS, 0},
{"get_value_and_derivative",
(PyCFunction)spline_get_value_and_derivative, METH_VARARGS, 0},
{"get_indices_from_zranges",
(PyCFunction)spline_get_indices_from_zranges, METH_VARARGS, 0},
{NULL, NULL, 0, NULL}
};
static PyObject * spline_call(SplineObject *obj, PyObject *args,
PyObject *kwargs)
{
double r;
if (!PyArg_ParseTuple(args, "d", &r))
return NULL;
return Py_BuildValue("d", bmgs_splinevalue(&obj->spline, r));
}
PyTypeObject SplineType = {
PyVarObject_HEAD_INIT(NULL, 0)
"Spline",
sizeof(SplineObject), 0,
(destructor)spline_dealloc,
0, 0, 0, 0, 0, 0, 0, 0, 0,
(ternaryfunc)spline_call,
0, 0, 0, 0,
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,
"Spline object",
0, 0, 0, 0, 0, 0,
spline_methods
};
PyObject * NewSplineObject(PyObject *self, PyObject *args)
{
int l;
double rcut;
PyArrayObject* farray;
if (!PyArg_ParseTuple(args, "idO", &l, &rcut, &farray))
return NULL;
SplineObject *spline = PyObject_NEW(SplineObject, &SplineType);
if (spline == NULL)
return NULL;
int nbins = PyArray_DIMS(farray)[0] - 1;
double dr = rcut / nbins;
spline->spline = bmgs_spline(l, dr, nbins, DOUBLEP(farray));
return (PyObject*)spline;
}
gpaw-0.11.0.13004/c/symmetry.c 0000664 0001750 0001750 00000017126 12553643466 015747 0 ustar jensj jensj 0000000 0000000 /* Copyright (C) 2010-2011 CAMd
* Please see the accompanying LICENSE file for further information. */
#include "extensions.h"
//
// Apply symmetry operation op_cc to a and add result to b:
//
// =T_ _
// b(U g) += a(g),
//
// where:
//
// = _T
// U = op_cc[c1, c2] and g = (g0, g1, g2).
// c1,c2
//
PyObject* symmetrize(PyObject *self, PyObject *args)
{
PyArrayObject* a_g_obj;
PyArrayObject* b_g_obj;
PyArrayObject* op_cc_obj;
if (!PyArg_ParseTuple(args, "OOO", &a_g_obj, &b_g_obj, &op_cc_obj))
return NULL;
const long* C = (const long*)PyArray_DATA(op_cc_obj);
int ng0 = PyArray_DIMS(a_g_obj)[0];
int ng1 = PyArray_DIMS(a_g_obj)[1];
int ng2 = PyArray_DIMS(a_g_obj)[2];
const double* a_g = (const double*)PyArray_DATA(a_g_obj);
double* b_g = (double*)PyArray_DATA(b_g_obj);
for (int g0 = 0; g0 < ng0; g0++)
for (int g1 = 0; g1 < ng1; g1++)
for (int g2 = 0; g2 < ng2; g2++) {
int p0 = ((C[0] * g0 + C[3] * g1 + C[6] * g2) % ng0 + ng0) % ng0;
int p1 = ((C[1] * g0 + C[4] * g1 + C[7] * g2) % ng1 + ng1) % ng1;
int p2 = ((C[2] * g0 + C[5] * g1 + C[8] * g2) % ng2 + ng2) % ng2;
b_g[(p0 * ng1 + p1) * ng2 + p2] += *a_g++;
}
Py_RETURN_NONE;
}
PyObject* symmetrize_ft(PyObject *self, PyObject *args)
{
PyArrayObject* a_g_obj;
PyArrayObject* b_g_obj;
PyArrayObject* op_cc_obj;
PyArrayObject* ft_c_obj;
if (!PyArg_ParseTuple(args, "OOOO", &a_g_obj, &b_g_obj, &op_cc_obj, &ft_c_obj))
return NULL;
const double* ft = (const double*)PyArray_DATA(ft_c_obj);
const long* C = (const long*)PyArray_DATA(op_cc_obj);
int ng0 = PyArray_DIMS(a_g_obj)[0];
int ng1 = PyArray_DIMS(a_g_obj)[1];
int ng2 = PyArray_DIMS(a_g_obj)[2];
int ft0 = (int)(ft[0]*ng0);
int ft1 = (int)(ft[1]*ng1);
int ft2 = (int)(ft[2]*ng2);
const double* a_g = (const double*)PyArray_DATA(a_g_obj);
double* b_g = (double*)PyArray_DATA(b_g_obj);
for (int g0 = 0; g0 < ng0; g0++)
for (int g1 = 0; g1 < ng1; g1++)
for (int g2 = 0; g2 < ng2; g2++) {
int p0 = ((C[0] * g0 + C[3] * g1 + C[6] * g2 - ft0) % ng0 + ng0) % ng0;
int p1 = ((C[1] * g0 + C[4] * g1 + C[7] * g2 - ft1) % ng1 + ng1) % ng1;
int p2 = ((C[2] * g0 + C[5] * g1 + C[8] * g2 - ft2) % ng2 + ng2) % ng2;
b_g[(p0 * ng1 + p1) * ng2 + p2] += *a_g++;
}
Py_RETURN_NONE;
}
PyObject* symmetrize_wavefunction(PyObject *self, PyObject *args)
{
PyArrayObject* a_g_obj;
PyArrayObject* b_g_obj;
PyArrayObject* op_cc_obj;
PyArrayObject* kpt0_obj;
PyArrayObject* kpt1_obj;
if (!PyArg_ParseTuple(args, "OOOOO", &a_g_obj, &b_g_obj, &op_cc_obj, &kpt0_obj, &kpt1_obj))
return NULL;
const long* C = (const long*)PyArray_DATA(op_cc_obj);
const double* kpt0 = (const double*) PyArray_DATA(kpt0_obj);
const double* kpt1 = (const double*) PyArray_DATA(kpt1_obj);
int ng0 = PyArray_DIMS(a_g_obj)[0];
int ng1 = PyArray_DIMS(a_g_obj)[1];
int ng2 = PyArray_DIMS(a_g_obj)[2];
const double complex* a_g = (const double complex*)PyArray_DATA(a_g_obj);
double complex* b_g = (double complex*)PyArray_DATA(b_g_obj);
for (int g0 = 0; g0 < ng0; g0++)
for (int g1 = 0; g1 < ng1; g1++)
for (int g2 = 0; g2 < ng2; g2++) {
int p0 = ((C[0] * g0 + C[3] * g1 + C[6] * g2) % ng0 + ng0) % ng0;
int p1 = ((C[1] * g0 + C[4] * g1 + C[7] * g2) % ng1 + ng1) % ng1;
int p2 = ((C[2] * g0 + C[5] * g1 + C[8] * g2) % ng2 + ng2) % ng2;
double complex phase = cexp(I * 2. * M_PI *
(kpt1[0]/ng0*p0 +
kpt1[1]/ng1*p1 +
kpt1[2]/ng2*p2 -
kpt0[0]/ng0*g0 -
kpt0[1]/ng1*g1 -
kpt0[2]/ng2*g2));
b_g[(p0 * ng1 + p1) * ng2 + p2] += (*a_g * phase);
a_g++;
}
Py_RETURN_NONE;
}
PyObject* symmetrize_return_index(PyObject *self, PyObject *args)
{
PyArrayObject* a_g_obj;
PyArrayObject* b_g_obj;
PyArrayObject* op_cc_obj;
PyArrayObject* kpt0_obj;
PyArrayObject* kpt1_obj;
if (!PyArg_ParseTuple(args, "OOOOO", &a_g_obj, &b_g_obj, &op_cc_obj, &kpt0_obj, &kpt1_obj))
return NULL;
const long* C = (const long*)PyArray_DATA(op_cc_obj);
const double* kpt0 = (const double*) PyArray_DATA(kpt0_obj);
const double* kpt1 = (const double*) PyArray_DATA(kpt1_obj);
int ng0 = PyArray_DIMS(a_g_obj)[0];
int ng1 = PyArray_DIMS(a_g_obj)[1];
int ng2 = PyArray_DIMS(a_g_obj)[2];
unsigned long* a_g = (unsigned long*)PyArray_DATA(a_g_obj);
double complex* b_g = (double complex*)PyArray_DATA(b_g_obj);
for (int g0 = 0; g0 < ng0; g0++)
for (int g1 = 0; g1 < ng1; g1++)
for (int g2 = 0; g2 < ng2; g2++) {
int p0 = ((C[0] * g0 + C[3] * g1 + C[6] * g2) % ng0 + ng0) % ng0;
int p1 = ((C[1] * g0 + C[4] * g1 + C[7] * g2) % ng1 + ng1) % ng1;
int p2 = ((C[2] * g0 + C[5] * g1 + C[8] * g2) % ng2 + ng2) % ng2;
double complex phase = cexp(I * 2. * M_PI *
(kpt1[0]/ng0*p0 +
kpt1[1]/ng1*p1 +
kpt1[2]/ng2*p2 -
kpt0[0]/ng0*g0 -
kpt0[1]/ng1*g1 -
kpt0[2]/ng2*g2));
*a_g++ = (p0 * ng1 + p1) * ng2 + p2;
*b_g++ = phase;
}
Py_RETURN_NONE;
}
PyObject* symmetrize_with_index(PyObject *self, PyObject *args)
{
PyArrayObject* a_g_obj;
PyArrayObject* b_g_obj;
PyArrayObject* index_g_obj;
PyArrayObject* phase_g_obj;
if (!PyArg_ParseTuple(args, "OOOO", &a_g_obj, &b_g_obj, &index_g_obj, &phase_g_obj))
return NULL;
int ng0 = PyArray_DIMS(a_g_obj)[0];
int ng1 = PyArray_DIMS(a_g_obj)[1];
int ng2 = PyArray_DIMS(a_g_obj)[2];
const unsigned long* index_g = (const unsigned long*)PyArray_DATA(index_g_obj);
const double complex* phase_g = (const double complex*)PyArray_DATA(phase_g_obj);
const double complex* a_g = (const double complex*)PyArray_DATA(a_g_obj);
double complex* b_g = (double complex*)PyArray_DATA(b_g_obj);
for (int g0 = 0; g0 < ng0; g0++)
for (int g1 = 0; g1 < ng1; g1++)
for (int g2 = 0; g2 < ng2; g2++) {
b_g[*index_g] += (*a_g * *phase_g);
a_g++;
phase_g++;
index_g++;
}
Py_RETURN_NONE;
}
PyObject* map_k_points(PyObject *self, PyObject *args)
{
PyArrayObject* bzk_kc_obj;
PyArrayObject* U_scc_obj;
double tol;
PyArrayObject* bz2bz_ks_obj;
int ka, kb;
if (!PyArg_ParseTuple(args, "OOdOii", &bzk_kc_obj, &U_scc_obj,
&tol, &bz2bz_ks_obj, &ka, &kb))
return NULL;
const long* U_scc = (const long*)PyArray_DATA(U_scc_obj);
const double* bzk_kc = (const double*)PyArray_DATA(bzk_kc_obj);
long* bz2bz_ks = (long*)PyArray_DATA(bz2bz_ks_obj);
int nbzkpts = PyArray_DIMS(bzk_kc_obj)[0];
int nsym = PyArray_DIMS(U_scc_obj)[0];
for (int k1 = ka; k1 < kb; k1++) {
const double* q = bzk_kc + k1 * 3;
for (int s = 0; s < nsym; s++) {
const long* U = U_scc + s * 9;
double q0 = U[0] * q[0] + U[1] * q[1] + U[2] * q[2];
double q1 = U[3] * q[0] + U[4] * q[1] + U[5] * q[2];
double q2 = U[6] * q[0] + U[7] * q[1] + U[8] * q[2];
for (int k2 = 0; k2 < nbzkpts; k2++) {
double p0 = q0 - bzk_kc[k2 * 3];
if (fabs(p0 - round(p0)) > tol)
continue;
double p1 = q1 - bzk_kc[k2 * 3 + 1];
if (fabs(p1 - round(p1)) > tol)
continue;
double p2 = q2 - bzk_kc[k2 * 3 + 2];
if (fabs(p2 - round(p2)) > tol)
continue;
bz2bz_ks[k1 * nsym + s] = k2;
break;
}
}
}
Py_RETURN_NONE;
}
gpaw-0.11.0.13004/c/localized_functions.c 0000664 0001750 0001750 00000034666 12553643466 020124 0 ustar jensj jensj 0000000 0000000 /* Copyright (C) 2003-2007 CAMP
* Copyright (C) 2007-2009 CAMd
* Copyright (C) 2005-2008 CSC - IT Center for Science Ltd.
* Please see the accompanying LICENSE file for further information. */
#include "spline.h"
#include
#ifdef PARALLEL
# include
#else
typedef int* MPI_Request; // !!!!!!!???????????
typedef int* MPI_Comm;
# define MPI_COMM_NULL 0
# define MPI_Comm_rank(comm, rank) *(rank) = 0
# define MPI_Bcast(buff, count, datatype, root, comm) 0
#endif
#include "mympi.h"
#include "localized_functions.h"
#ifdef GPAW_NO_UNDERSCORE_BLAS
# define dgemm_ dgemm
# define dgemv_ dgemv
#endif
int dgemm_(char *transa, char *transb, int *m, int * n,
int *k, double *alpha, double *a, int *lda,
double *b, int *ldb, double *beta,
double *c, int *ldc);
int dgemv_(char *trans, int *m, int * n,
double *alpha, double *a, int *lda,
double *x, int *incx, double *beta,
double *y, int *incy);
static void localized_functions_dealloc(LocalizedFunctionsObject *self)
{
free(self->f);
free(self->w);
PyObject_DEL(self);
}
static PyObject * localized_functions_integrate(LocalizedFunctionsObject *self,
PyObject *args)
{
PyArrayObject* aa;
PyArrayObject* bb;
if (!PyArg_ParseTuple(args, "OO", &aa, &bb))
return NULL;
const double* a = DOUBLEP(aa);
double* b = DOUBLEP(bb);
int na = 1;
for (int d = 0; d < PyArray_NDIM(aa) - 3; d++)
na *= PyArray_DIM(aa, d);
int nf = self->nf;
double* f = self->f;
double* w = self->w;
int ng = self->ng;
int ng0 = self->ng0;
if (PyArray_DESCR(aa)->type_num == NPY_DOUBLE)
for (int n = 0; n < na; n++)
{
bmgs_cut(a, self->size, self->start, w, self->size0);
double zero = 0.0;
int inc = 1;
dgemv_("t", &ng0, &nf, &self->dv, f, &ng0, w, &inc, &zero, b, &inc);
a += ng;
b += nf;
}
else
for (int n = 0; n < na; n++)
{
bmgs_cutz((const double_complex*)a, self->size, self->start,
(double_complex*)w, self->size0);
double zero = 0.0;
int inc = 2;
dgemm_("n", "n", &inc, &nf, &ng0, &self->dv, w, &inc, f, &ng0,
&zero, b, &inc);
a += 2 * ng;
b += 2 * nf;
}
Py_RETURN_NONE;
}
static PyObject * localized_functions_derivative(
LocalizedFunctionsObject *self, PyObject *args)
{
PyArrayObject* aa;
PyArrayObject* bb;
if (!PyArg_ParseTuple(args, "OO", &aa, &bb))
return NULL;
const double* a = DOUBLEP(aa);
double* b = DOUBLEP(bb);
int na = 1;
for (int d = 0; d < PyArray_NDIM(aa) - 3; d++)
na *= PyArray_DIM(aa, d);
int nf = self->nfd;
double* f = self->fd;
double* w = self->w;
int ng = self->ng;
int ng0 = self->ng0;
if (PyArray_DESCR(aa)->type_num == NPY_DOUBLE)
for (int n = 0; n < na; n++)
{
bmgs_cut(a, self->size, self->start, w, self->size0);
double zero = 0.0;
int inc = 1;
dgemv_("t", &ng0, &nf, &self->dv, f, &ng0, w, &inc, &zero, b, &inc);
a += ng;
b += nf;
}
else
for (int n = 0; n < na; n++)
{
bmgs_cutz((const double_complex*)a, self->size, self->start,
(double_complex*)w, self->size0);
double zero = 0.0;
int inc = 2;
dgemm_("n", "n", &inc, &nf, &ng0, &self->dv, w, &inc, f, &ng0,
&zero, b, &inc);
a += 2 * ng;
b += 2 * nf;
}
Py_RETURN_NONE;
}
static PyObject * localized_functions_add(LocalizedFunctionsObject *self,
PyObject *args)
{
PyArrayObject* cc;
PyArrayObject* aa;
if (!PyArg_ParseTuple(args, "OO", &cc, &aa))
return NULL;
double* c = DOUBLEP(cc);
double* a = DOUBLEP(aa);
int na = 1;
for (int d = 0; d < PyArray_NDIM(aa) - 3; d++)
na *= PyArray_DIM(aa, d);
int ng = self->ng;
int ng0 = self->ng0;
int nf = self->nf;
double* f = self->f;
double* w = self->w;
if (PyArray_DESCR(aa)->type_num == NPY_DOUBLE)
for (int n = 0; n < na; n++)
{
double zero = 0.0;
double one = 1.0;
int inc = 1;
dgemv_("n", &ng0, &nf, &one, f, &ng0, c, &inc, &zero, w, &inc);
bmgs_pastep(w, self->size0, a, self->size, self->start);
a += ng;
c += nf;
}
else
for (int n = 0; n < na; n++)
{
double zero = 0.0;
double one = 1.0;
int inc = 2;
dgemm_("n", "t", &inc, &ng0, &nf, &one, c, &inc, f, &ng0,
&zero, w, &inc);
bmgs_pastepz((const double_complex*)w, self->size0,
(double_complex*)a, self->size, self->start);
a += 2 * ng;
c += 2 * nf;
}
Py_RETURN_NONE;
}
static PyObject * localized_functions_add_density(LocalizedFunctionsObject*
self,
PyObject *args)
{
PyArrayObject* dd;
PyArrayObject* oo;
if (!PyArg_ParseTuple(args, "OO", &dd, &oo))
return NULL;
const double* o = DOUBLEP(oo);
double* d = DOUBLEP(dd);
int nf = self->nf;
int ng0 = self->ng0;
const double* f = self->f;
double* w = self->w;
memset(w, 0, ng0 * sizeof(double));
for (int i = 0; i < nf; i++)
for (int n = 0; n < ng0; n++)
{
double g = *f++;
w[n] += o[i] * g * g;
}
bmgs_pastep(w, self->size0, d, self->size, self->start);
Py_RETURN_NONE;
}
static PyObject * localized_functions_add_density2(LocalizedFunctionsObject*
self,
PyObject *args)
{
PyArrayObject* dd; // density array to be added to
PyArrayObject* oo; // density matrix
if (!PyArg_ParseTuple(args, "OO", &dd, &oo))
return NULL;
const double* o = DOUBLEP(oo);
double* d = DOUBLEP(dd);
int nf = self->nf;
int ng0 = self->ng0;
const double* f = self->f;
double* w = self->w;
memset(w, 0, ng0 * sizeof(double));
int p = 0; // compressed ii index
double F = 0.0; // integrated value
for (int i = 0; i < nf; i++)
{
for (int j = i; j < nf; j++)
{
for (int n = 0; n < ng0; n++)
{
double tmp = o[p] * f[n + i * ng0] * f[n + j * ng0];
F += tmp;
w[n] += tmp;
}
p++;
}
}
bmgs_pastep(w, self->size0, d, self->size, self->start);
//Py_RETURN_NONE;
return Py_BuildValue("d", F * self->dv);
}
static PyObject * localized_functions_norm(LocalizedFunctionsObject* self,
PyObject *args)
{
PyArrayObject* I_obj;
if (!PyArg_ParseTuple(args, "O", &I_obj))
return NULL;
double (*II)[4] = (double (*)[4])DOUBLEP(I_obj);
const double* f = self->f;
for (int i = 0; i < self->nf; i++)
{
double F = 0.0;
for (int n = 0; n < self->ng0; n++)
F += f[n];
II[i][0] += F * self->dv;
f += self->ng0;
}
if (self->nfd > 0)
{
const double* fd = self->fd;
for (int i = 0; i < self->nf; i++)
for (int c = 0; c < 3; c++)
{
double F = 0.0;
for (int n = 0; n < self->ng0; n++)
F += fd[n];
II[i][c + 1] += F * self->dv;
fd += self->ng0;
}
}
Py_RETURN_NONE;
}
static PyObject * localized_functions_normalize(LocalizedFunctionsObject* self,
PyObject *args)
{
double I0;
PyArrayObject* I_obj;
if (!PyArg_ParseTuple(args, "dO", &I0, &I_obj))
return NULL;
double (*II)[4] = (double (*)[4])DOUBLEP(I_obj);
double* f = self->f;
double s = I0 / II[0][0];
// Scale spherically symmetric function so that the integral
// becomes exactly I0:
for (int n = 0; n < self->ng0; n++)
f[n] *= s;
// Adjust all other functions (l > 0) so that they integrate to zero:
for (int i = 1; i < self->nf; i++)
{
double *g = f + i * self->ng0;
double a = -II[i][0] / I0;
for (int n = 0; n < self->ng0; n++)
g[n] += a * f[n];
}
if (self->nfd > 0)
{
// Adjust derivatives:
double* fd = self->fd;
for (int n = 0; n < 3 * self->ng0; n++)
fd[n] *= s;
for (int c = 0; c < 3; c++)
{
double sd = II[0][c + 1] / II[0][0];
for (int n = 0; n < self->ng0; n++)
fd[n + c * self->ng0] -= f[n] * sd ;
}
for (int i = 1; i < self->nf; i++)
{
double *gd = fd + 3 * i * self->ng0;
double a = -II[i][0] / I0;
for (int n = 0; n < 3 * self->ng0; n++)
gd[n] += a * fd[n];
for (int c = 0; c < 3; c++)
{
double sd = II[i][c + 1] / I0;
for (int n = 0; n < self->ng0; n++)
gd[n + c * self->ng0] -= f[n] * sd ;
}
}
}
Py_RETURN_NONE;
}
static PyObject * get_functions(LocalizedFunctionsObject* self,
PyObject *args)
{
if (!PyArg_ParseTuple(args, ""))
return NULL;
npy_intp dims[4] = {self->nf,
self->size0[0], self->size0[1], self->size0[2]};
PyArrayObject* functions = (PyArrayObject*)PyArray_SimpleNew(4, dims,
NPY_DOUBLE);
memcpy(PyArray_DATA(functions), self->f,
self->nf * self->ng0 * sizeof(double));
return (PyObject*)functions;
}
static PyObject * set_corner(LocalizedFunctionsObject* self,
PyObject *args)
{
PyArrayObject* start_c_obj;
if (!PyArg_ParseTuple(args, "O", &start_c_obj))
return NULL;
double *start_c = DOUBLEP(start_c_obj);
for (int c = 0; c < 3; c++)
self->start[c] = start_c[c];
Py_RETURN_NONE;
}
#ifdef PARALLEL
static PyObject * localized_functions_broadcast(LocalizedFunctionsObject*
self,
PyObject *args)
{
PyObject* comm_obj;
int root;
if (!PyArg_ParseTuple(args, "Oi", &comm_obj, &root))
return NULL;
MPI_Comm comm = ((MPIObject*)comm_obj)->comm;
MPI_Bcast(self->f, self->ng0 * (self->nf + self->nfd),
MPI_DOUBLE, root, comm);
Py_RETURN_NONE;
}
#endif
static PyMethodDef localized_functions_methods[] = {
{"integrate",
(PyCFunction)localized_functions_integrate, METH_VARARGS, 0},
{"derivative",
(PyCFunction)localized_functions_derivative, METH_VARARGS, 0},
{"add",
(PyCFunction)localized_functions_add, METH_VARARGS, 0},
{"add_density",
(PyCFunction)localized_functions_add_density, METH_VARARGS, 0},
{"add_density2",
(PyCFunction)localized_functions_add_density2, METH_VARARGS, 0},
{"norm",
(PyCFunction)localized_functions_norm, METH_VARARGS, 0},
{"normalize",
(PyCFunction)localized_functions_normalize, METH_VARARGS, 0},
{"get_functions",
(PyCFunction)get_functions, METH_VARARGS, 0},
{"set_corner",
(PyCFunction)set_corner, METH_VARARGS, 0},
#ifdef PARALLEL
{"broadcast",
(PyCFunction)localized_functions_broadcast, METH_VARARGS, 0},
#endif
{NULL, NULL, 0, NULL}
};
PyTypeObject LocalizedFunctionsType = {
PyVarObject_HEAD_INIT(NULL, 0)
"LocalizedFunctions",
sizeof(LocalizedFunctionsObject),
0,
(destructor)localized_functions_dealloc,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,
"LF object",
0, 0, 0, 0, 0, 0,
localized_functions_methods
};
PyObject * NewLocalizedFunctionsObject(PyObject *obj, PyObject *args)
{
PyObject* radials;
PyArrayObject* size0_array;
PyArrayObject* size_array;
PyArrayObject* start_array;
PyArrayObject* h_array;
PyArrayObject* C_array;
int real;
int forces;
int compute;
if (!PyArg_ParseTuple(args, "OOOOOOiii", &radials,
&size0_array, &size_array,
&start_array, &h_array, &C_array,
&real, &forces, &compute))
return NULL;
LocalizedFunctionsObject *self = PyObject_NEW(LocalizedFunctionsObject,
&LocalizedFunctionsType);
if (self == NULL)
return NULL;
const long* size0 = LONGP(size0_array);
const long* size = LONGP(size_array);
const long* start = LONGP(start_array);
const double* h = DOUBLEP(h_array);
const double* C = DOUBLEP(C_array);
self->dv = h[0] * h[1] * h[2];
int ng = size[0] * size[1] * size[2];
int ng0 = size0[0] * size0[1] * size0[2];
self->ng = ng;
self->ng0 = ng0;
for (int i = 0; i < 3; i++)
{
self->size0[i] = size0[i];
self->size[i] = size[i];
self->start[i] = start[i];
}
int nf = 0;
int nfd = 0;
int nbins = 0;
double dr = 0.0;
for (int j = 0; j < PyList_Size(radials); j++)
{
const bmgsspline* spline =
&(((SplineObject*)PyList_GetItem(radials, j))->spline);
int l = spline->l;
assert(l <= 4);
if (j == 0)
{
nbins = spline->nbins;
dr = spline->dr;
}
else
{
assert(spline->nbins == nbins);
assert(spline->dr == dr);
}
nf += (2 * l + 1);
}
if (forces)
nfd = 3 * nf;
self->nf = nf;
self->nfd = nfd;
self->f = GPAW_MALLOC(double, (nf + nfd) * ng0);
if (forces)
self->fd = self->f + nf * ng0;
else
self->fd = 0;
int ndouble = (real ? 1 : 2);
self->w = GPAW_MALLOC(double, ng0 * ndouble);
if (compute)
{
int* bin = GPAW_MALLOC(int, ng0);
double* d = GPAW_MALLOC(double, ng0);
double* f0 = GPAW_MALLOC(double, ng0);
double* fd0 = 0;
if (forces)
fd0 = GPAW_MALLOC(double, ng0);
double* a = self->f;
double* ad = self->fd;
for (int j = 0; j < PyList_Size(radials); j++)
{
const bmgsspline* spline =
&(((SplineObject*)PyList_GetItem(radials, j))->spline);
if (j == 0)
bmgs_radial1(spline, self->size0, C, h, bin, d);
bmgs_radial2(spline, self->size0, bin, d, f0, fd0);
int l = spline->l;
for (int m = -l; m <= l; m++)
{
bmgs_radial3(spline, m, self->size0, C, h, f0, a);
a += ng0;
}
if (forces)
for (int m = -l; m <= l; m++)
for (int c = 0; c < 3; c++)
{
bmgs_radiald3(spline, m, c, self->size0, C, h, f0, fd0, ad);
ad += ng0;
}
}
if (forces)
free(fd0);
free(f0);
free(d);
free(bin);
}
return (PyObject*)self;
}
gpaw-0.11.0.13004/c/spline.h 0000664 0001750 0001750 00000000343 12553643466 015346 0 ustar jensj jensj 0000000 0000000 /* Copyright (C) 2003-2007 CAMP
* Please see the accompanying LICENSE file for further information. */
#include "extensions.h"
#include "bmgs/bmgs.h"
typedef struct
{
PyObject_HEAD
bmgsspline spline;
} SplineObject;
gpaw-0.11.0.13004/c/fftw.c 0000664 0001750 0001750 00000003025 12553643466 015015 0 ustar jensj jensj 0000000 0000000 #ifdef GPAW_WITH_FFTW
#include
#define PY_ARRAY_UNIQUE_SYMBOL GPAW_ARRAY_API
#define NO_IMPORT_ARRAY
#include
#include
/* Create plan and return pointer to plan as a string */
PyObject * FFTWPlan(PyObject *self, PyObject *args)
{
PyArrayObject* in;
PyArrayObject* out;
int sign;
unsigned int flags;
if (!PyArg_ParseTuple(args, "OOiI",
&in, &out, &sign, &flags))
return NULL;
fftw_plan* plan = (fftw_plan*)malloc(sizeof(fftw_plan));
if (in->descr->type_num == PyArray_DOUBLE)
*plan = fftw_plan_dft_r2c(in->nd, in->dimensions,
(double*)in->data,
(double (*)[2])out->data,
flags);
else if (out->descr->type_num == PyArray_DOUBLE)
*plan = fftw_plan_dft_c2r(in->nd, out->dimensions,
(double (*)[2])in->data,
(double*)out->data,
flags);
else
*plan = fftw_plan_dft(in->nd, out->dimensions,
(double (*)[2])in->data,
(double (*)[2])out->data,
sign, flags);
return Py_BuildValue("s#", plan, sizeof(fftw_plan*));
}
PyObject * FFTWExecute(PyObject *self, PyObject *args)
{
fftw_plan* plan;
int n;
if (!PyArg_ParseTuple(args, "s#", &plan, &n))
return NULL;
fftw_execute(*plan);
Py_RETURN_NONE;
}
PyObject * FFTWDestroy(PyObject *self, PyObject *args)
{
fftw_plan* plan;
int n;
if (!PyArg_ParseTuple(args, "s#", &plan, &n))
return NULL;
fftw_destroy_plan(*plan);
free(plan);
Py_RETURN_NONE;
}
#endif // GPAW_WITH_FFTW
gpaw-0.11.0.13004/c/_gpaw.c 0000664 0001750 0001750 00000037707 12553643466 015162 0 ustar jensj jensj 0000000 0000000 /* Copyright (C) 2003-2007 CAMP
* Copyright (C) 2007-2009 CAMd
* Copyright (C) 2007-2010 CSC - IT Center for Science Ltd.
* Please see the accompanying LICENSE file for further information. */
#include
#define PY_ARRAY_UNIQUE_SYMBOL GPAW_ARRAY_API
#include
#ifdef GPAW_WITH_HDF5
PyMODINIT_FUNC init_gpaw_hdf5(void);
#endif
#ifdef GPAW_HPM
PyObject* ibm_hpm_start(PyObject *self, PyObject *args);
PyObject* ibm_hpm_stop(PyObject *self, PyObject *args);
PyObject* ibm_mpi_start(PyObject *self);
PyObject* ibm_mpi_stop(PyObject *self);
#endif
#ifdef CRAYPAT
#include
PyObject* craypat_region_begin(PyObject *self, PyObject *args);
PyObject* craypat_region_end(PyObject *self, PyObject *args);
#endif
PyObject* symmetrize(PyObject *self, PyObject *args);
PyObject* symmetrize_ft(PyObject *self, PyObject *args);
PyObject* symmetrize_wavefunction(PyObject *self, PyObject *args);
PyObject* symmetrize_return_index(PyObject *self, PyObject *args);
PyObject* symmetrize_with_index(PyObject *self, PyObject *args);
PyObject* map_k_points(PyObject *self, PyObject *args);
PyObject* scal(PyObject *self, PyObject *args);
PyObject* mmm(PyObject *self, PyObject *args);
PyObject* gemm(PyObject *self, PyObject *args);
PyObject* gemv(PyObject *self, PyObject *args);
PyObject* axpy(PyObject *self, PyObject *args);
PyObject* czher(PyObject *self, PyObject *args);
PyObject* rk(PyObject *self, PyObject *args);
PyObject* r2k(PyObject *self, PyObject *args);
PyObject* dotc(PyObject *self, PyObject *args);
PyObject* dotu(PyObject *self, PyObject *args);
PyObject* multi_dotu(PyObject *self, PyObject *args);
PyObject* multi_axpy(PyObject *self, PyObject *args);
PyObject* diagonalize(PyObject *self, PyObject *args);
PyObject* diagonalize_mr3(PyObject *self, PyObject *args);
PyObject* general_diagonalize(PyObject *self, PyObject *args);
PyObject* inverse_cholesky(PyObject *self, PyObject *args);
PyObject* inverse_symmetric(PyObject *self, PyObject *args);
PyObject* inverse_general(PyObject *self, PyObject *args);
PyObject* linear_solve_band(PyObject *self, PyObject *args);
PyObject* linear_solve_tridiag(PyObject *self, PyObject *args);
PyObject* right_eigenvectors(PyObject *self, PyObject *args);
PyObject* NewLocalizedFunctionsObject(PyObject *self, PyObject *args);
PyObject* NewOperatorObject(PyObject *self, PyObject *args);
PyObject* NewWOperatorObject(PyObject *self, PyObject *args);
PyObject* NewSplineObject(PyObject *self, PyObject *args);
PyObject* NewTransformerObject(PyObject *self, PyObject *args);
PyObject* pc_potential(PyObject *self, PyObject *args);
PyObject* pc_potential_value(PyObject *self, PyObject *args);
PyObject* heap_mallinfo(PyObject *self);
PyObject* elementwise_multiply_add(PyObject *self, PyObject *args);
PyObject* utilities_gaussian_wave(PyObject *self, PyObject *args);
PyObject* utilities_vdot(PyObject *self, PyObject *args);
PyObject* utilities_vdot_self(PyObject *self, PyObject *args);
PyObject* errorfunction(PyObject *self, PyObject *args);
PyObject* cerf(PyObject *self, PyObject *args);
PyObject* pack(PyObject *self, PyObject *args);
PyObject* unpack(PyObject *self, PyObject *args);
PyObject* unpack_complex(PyObject *self, PyObject *args);
PyObject* hartree(PyObject *self, PyObject *args);
PyObject* localize(PyObject *self, PyObject *args);
PyObject* NewXCFunctionalObject(PyObject *self, PyObject *args);
PyObject* NewlxcXCFunctionalObject(PyObject *self, PyObject *args);
PyObject* lxcXCFuncNum(PyObject *self, PyObject *args);
PyObject* exterior_electron_density_region(PyObject *self, PyObject *args);
PyObject* plane_wave_grid(PyObject *self, PyObject *args);
PyObject* overlap(PyObject *self, PyObject *args);
PyObject* vdw(PyObject *self, PyObject *args);
PyObject* vdw2(PyObject *self, PyObject *args);
PyObject* spherical_harmonics(PyObject *self, PyObject *args);
PyObject* spline_to_grid(PyObject *self, PyObject *args);
PyObject* NewLFCObject(PyObject *self, PyObject *args);
#if defined(GPAW_WITH_SL) && defined(PARALLEL)
PyObject* new_blacs_context(PyObject *self, PyObject *args);
PyObject* get_blacs_gridinfo(PyObject* self, PyObject *args);
PyObject* get_blacs_local_shape(PyObject* self, PyObject *args);
PyObject* blacs_destroy(PyObject *self, PyObject *args);
PyObject* scalapack_set(PyObject *self, PyObject *args);
PyObject* scalapack_redist(PyObject *self, PyObject *args);
PyObject* scalapack_diagonalize_dc(PyObject *self, PyObject *args);
PyObject* scalapack_diagonalize_ex(PyObject *self, PyObject *args);
#ifdef GPAW_MR3
PyObject* scalapack_diagonalize_mr3(PyObject *self, PyObject *args);
#endif
PyObject* scalapack_general_diagonalize_dc(PyObject *self, PyObject *args);
PyObject* scalapack_general_diagonalize_ex(PyObject *self, PyObject *args);
#ifdef GPAW_MR3
PyObject* scalapack_general_diagonalize_mr3(PyObject *self, PyObject *args);
#endif
PyObject* scalapack_inverse_cholesky(PyObject *self, PyObject *args);
PyObject* scalapack_inverse(PyObject *self, PyObject *args);
PyObject* scalapack_solve(PyObject *self, PyObject *args);
PyObject* pblas_tran(PyObject *self, PyObject *args);
PyObject* pblas_gemm(PyObject *self, PyObject *args);
PyObject* pblas_hemm(PyObject *self, PyObject *args);
PyObject* pblas_gemv(PyObject *self, PyObject *args);
PyObject* pblas_r2k(PyObject *self, PyObject *args);
PyObject* pblas_rk(PyObject *self, PyObject *args);
#endif
#ifdef GPAW_PAPI
PyObject* papi_mem_info(PyObject *self, PyObject *args);
#endif
// Moving least squares interpolation
PyObject* mlsqr(PyObject *self, PyObject *args);
static PyMethodDef functions[] = {
{"symmetrize", symmetrize, METH_VARARGS, 0},
{"symmetrize_ft", symmetrize_ft, METH_VARARGS, 0},
{"symmetrize_wavefunction", symmetrize_wavefunction, METH_VARARGS, 0},
{"symmetrize_return_index", symmetrize_return_index, METH_VARARGS, 0},
{"symmetrize_with_index", symmetrize_with_index, METH_VARARGS, 0},
{"map_k_points", map_k_points, METH_VARARGS, 0},
{"scal", scal, METH_VARARGS, 0},
{"mmm", mmm, METH_VARARGS, 0},
{"gemm", gemm, METH_VARARGS, 0},
{"gemv", gemv, METH_VARARGS, 0},
{"axpy", axpy, METH_VARARGS, 0},
{"czher", czher, METH_VARARGS, 0},
{"rk", rk, METH_VARARGS, 0},
{"r2k", r2k, METH_VARARGS, 0},
{"dotc", dotc, METH_VARARGS, 0},
{"dotu", dotu, METH_VARARGS, 0},
{"multi_dotu", multi_dotu, METH_VARARGS, 0},
{"multi_axpy", multi_axpy, METH_VARARGS, 0},
{"diagonalize", diagonalize, METH_VARARGS, 0},
{"diagonalize_mr3", diagonalize_mr3, METH_VARARGS, 0},
{"general_diagonalize", general_diagonalize, METH_VARARGS, 0},
{"inverse_cholesky", inverse_cholesky, METH_VARARGS, 0},
{"inverse_symmetric", inverse_symmetric, METH_VARARGS, 0},
{"inverse_general", inverse_general, METH_VARARGS, 0},
{"linear_solve_band", linear_solve_band, METH_VARARGS, 0},
{"linear_solve_tridiag", linear_solve_tridiag, METH_VARARGS, 0},
{"right_eigenvectors", right_eigenvectors, METH_VARARGS, 0},
{"LocalizedFunctions", NewLocalizedFunctionsObject, METH_VARARGS, 0},
{"Operator", NewOperatorObject, METH_VARARGS, 0},
{"WOperator", NewWOperatorObject, METH_VARARGS, 0},
{"Spline", NewSplineObject, METH_VARARGS, 0},
{"Transformer", NewTransformerObject, METH_VARARGS, 0},
{"heap_mallinfo", (PyCFunction) heap_mallinfo, METH_NOARGS, 0},
{"elementwise_multiply_add", elementwise_multiply_add, METH_VARARGS, 0},
{"utilities_gaussian_wave", utilities_gaussian_wave, METH_VARARGS, 0},
{"utilities_vdot", utilities_vdot, METH_VARARGS, 0},
{"utilities_vdot_self", utilities_vdot_self, METH_VARARGS, 0},
{"eed_region", exterior_electron_density_region, METH_VARARGS, 0},
{"plane_wave_grid", plane_wave_grid, METH_VARARGS, 0},
{"erf", errorfunction, METH_VARARGS, 0},
{"cerf", cerf, METH_VARARGS, 0},
{"pack", pack, METH_VARARGS, 0},
{"unpack", unpack, METH_VARARGS, 0},
{"unpack_complex", unpack_complex, METH_VARARGS, 0},
{"hartree", hartree, METH_VARARGS, 0},
{"localize", localize, METH_VARARGS, 0},
{"XCFunctional", NewXCFunctionalObject, METH_VARARGS, 0},
/* {"MGGAFunctional", NewMGGAFunctionalObject, METH_VARARGS, 0},*/
{"lxcXCFunctional", NewlxcXCFunctionalObject, METH_VARARGS, 0},
{"lxcXCFuncNum", lxcXCFuncNum, METH_VARARGS, 0},
{"overlap", overlap, METH_VARARGS, 0},
{"vdw", vdw, METH_VARARGS, 0},
{"vdw2", vdw2, METH_VARARGS, 0},
{"spherical_harmonics", spherical_harmonics, METH_VARARGS, 0},
{"pc_potential", pc_potential, METH_VARARGS, 0},
{"pc_potential_value", pc_potential_value, METH_VARARGS, 0},
{"spline_to_grid", spline_to_grid, METH_VARARGS, 0},
{"LFC", NewLFCObject, METH_VARARGS, 0},
/*
{"calculate_potential_matrix", calculate_potential_matrix, METH_VARARGS, 0},
{"construct_density", construct_density, METH_VARARGS, 0},
{"construct_density1", construct_density1, METH_VARARGS, 0},
*/
#if defined(GPAW_WITH_SL) && defined(PARALLEL)
{"new_blacs_context", new_blacs_context, METH_VARARGS, NULL},
{"get_blacs_gridinfo", get_blacs_gridinfo, METH_VARARGS, NULL},
{"get_blacs_local_shape", get_blacs_local_shape, METH_VARARGS, NULL},
{"blacs_destroy", blacs_destroy, METH_VARARGS, 0},
{"scalapack_set", scalapack_set, METH_VARARGS, 0},
{"scalapack_redist", scalapack_redist, METH_VARARGS, 0},
{"scalapack_diagonalize_dc", scalapack_diagonalize_dc, METH_VARARGS, 0},
{"scalapack_diagonalize_ex", scalapack_diagonalize_ex, METH_VARARGS, 0},
#ifdef GPAW_MR3
{"scalapack_diagonalize_mr3", scalapack_diagonalize_mr3, METH_VARARGS, 0},
#endif // GPAW_MR3
{"scalapack_general_diagonalize_dc",
scalapack_general_diagonalize_dc, METH_VARARGS, 0},
{"scalapack_general_diagonalize_ex",
scalapack_general_diagonalize_ex, METH_VARARGS, 0},
#ifdef GPAW_MR3
{"scalapack_general_diagonalize_mr3",
scalapack_general_diagonalize_mr3, METH_VARARGS, 0},
#endif // GPAW_MR3
{"scalapack_inverse_cholesky", scalapack_inverse_cholesky, METH_VARARGS, 0},
{"scalapack_inverse", scalapack_inverse, METH_VARARGS, 0},
{"scalapack_solve", scalapack_solve, METH_VARARGS, 0},
{"pblas_tran", pblas_tran, METH_VARARGS, 0},
{"pblas_gemm", pblas_gemm, METH_VARARGS, 0},
{"pblas_hemm", pblas_hemm, METH_VARARGS, 0},
{"pblas_gemv", pblas_gemv, METH_VARARGS, 0},
{"pblas_r2k", pblas_r2k, METH_VARARGS, 0},
{"pblas_rk", pblas_rk, METH_VARARGS, 0},
#endif // GPAW_WITH_SL && PARALLEL
#ifdef GPAW_HPM
{"hpm_start", ibm_hpm_start, METH_VARARGS, 0},
{"hpm_stop", ibm_hpm_stop, METH_VARARGS, 0},
{"mpi_start", (PyCFunction) ibm_mpi_start, METH_NOARGS, 0},
{"mpi_stop", (PyCFunction) ibm_mpi_stop, METH_NOARGS, 0},
#endif // GPAW_HPM
#ifdef CRAYPAT
{"craypat_region_begin", craypat_region_begin, METH_VARARGS, 0},
{"craypat_region_end", craypat_region_end, METH_VARARGS, 0},
#endif // CRAYPAT
#ifdef GPAW_PAPI
{"papi_mem_info", papi_mem_info, METH_VARARGS, 0},
#endif // GPAW_PAPI
{"mlsqr", mlsqr, METH_VARARGS, 0},
{0, 0, 0, 0}
};
#ifdef PARALLEL
extern PyTypeObject MPIType;
extern PyTypeObject GPAW_MPI_Request_type;
#endif
extern PyTypeObject LFCType;
extern PyTypeObject LocalizedFunctionsType;
extern PyTypeObject OperatorType;
extern PyTypeObject WOperatorType;
extern PyTypeObject SplineType;
extern PyTypeObject TransformerType;
extern PyTypeObject XCFunctionalType;
extern PyTypeObject lxcXCFunctionalType;
#if PY_MAJOR_VERSION >= 3
static struct PyModuleDef moduledef = {
PyModuleDef_HEAD_INIT,
"_gpaw",
"C-extension for GPAW",
-1,
functions,
NULL,
NULL,
NULL,
NULL
};
#endif
#ifndef GPAW_INTERPRETER
static PyObject* moduleinit(void)
{
#ifdef PARALLEL
if (PyType_Ready(&MPIType) < 0)
return NULL;
if (PyType_Ready(&GPAW_MPI_Request_type) < 0)
return NULL;
#endif
if (PyType_Ready(&LFCType) < 0)
return NULL;
if (PyType_Ready(&LocalizedFunctionsType) < 0)
return NULL;
if (PyType_Ready(&OperatorType) < 0)
return NULL;
if (PyType_Ready(&WOperatorType) < 0)
return NULL;
if (PyType_Ready(&SplineType) < 0)
return NULL;
if (PyType_Ready(&TransformerType) < 0)
return NULL;
if (PyType_Ready(&XCFunctionalType) < 0)
return NULL;
if (PyType_Ready(&lxcXCFunctionalType) < 0)
return NULL;
#if PY_MAJOR_VERSION >= 3
PyObject* m = PyModule_Create(&moduledef);
#else
PyObject* m = Py_InitModule3("_gpaw", functions,
"C-extension for GPAW\n\n...\n");
#endif
if (m == NULL)
return NULL;
#ifdef PARALLEL
Py_INCREF(&MPIType);
Py_INCREF(&GPAW_MPI_Request_type);
PyModule_AddObject(m, "Communicator", (PyObject *)&MPIType);
#endif
Py_INCREF(&LFCType);
Py_INCREF(&LocalizedFunctionsType);
Py_INCREF(&OperatorType);
Py_INCREF(&WOperatorType);
Py_INCREF(&SplineType);
Py_INCREF(&TransformerType);
Py_INCREF(&XCFunctionalType);
Py_INCREF(&lxcXCFunctionalType);
import_array1(NULL);
return m;
}
#if PY_MAJOR_VERSION >= 3
PyMODINIT_FUNC PyInit__gpaw(void)
{
return moduleinit();
}
#else
PyMODINIT_FUNC init_gpaw(void)
{
moduleinit();
}
#endif
#else // ifndef GPAW_INTERPRETER
extern DL_EXPORT(int) Py_Main(int, char **);
// Performance measurement
int gpaw_perf_init();
void gpaw_perf_finalize();
#include
int
main(int argc, char **argv)
{
int status;
#ifdef CRAYPAT
PAT_region_begin(1, "C-Initializations");
#endif
#ifndef GPAW_OMP
MPI_Init(&argc, &argv);
#else
int granted;
MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &granted);
if(granted != MPI_THREAD_MULTIPLE) exit(1);
#endif // GPAW_OMP
// Get initial timing
double t0 = MPI_Wtime();
#ifdef GPAW_PERFORMANCE_REPORT
gpaw_perf_init();
#endif
#ifdef GPAW_MPI_MAP
int tag = 99;
int myid, numprocs, i, procnamesize;
char procname[MPI_MAX_PROCESSOR_NAME];
MPI_Comm_size(MPI_COMM_WORLD, &numprocs );
MPI_Comm_rank(MPI_COMM_WORLD, &myid );
MPI_Get_processor_name(procname, &procnamesize);
if (myid > 0) {
MPI_Send(&procnamesize, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
MPI_Send(procname, procnamesize, MPI_CHAR, 0, tag, MPI_COMM_WORLD);
}
else {
printf("MPI_COMM_SIZE is %d \n", numprocs);
printf("%s \n", procname);
for (i = 1; i < numprocs; ++i) {
MPI_Recv(&procnamesize, 1, MPI_INT, i, tag, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
MPI_Recv(procname, procnamesize, MPI_CHAR, i, tag, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
printf("%s \n", procname);
}
}
#endif // GPAW_MPI_MAP
#ifdef GPAW_MPI_DEBUG
// Default Errhandler is MPI_ERRORS_ARE_FATAL
MPI_Errhandler_set(MPI_COMM_WORLD, MPI_ERRORS_RETURN);
#endif
// Progname seems to be needed in some circumstances to resolve
// correct default sys.path
Py_SetProgramName(argv[0]);
Py_Initialize();
if (PyType_Ready(&MPIType) < 0)
return -1;
if (PyType_Ready(&LFCType) < 0)
return -1;
if (PyType_Ready(&LocalizedFunctionsType) < 0)
return -1;
if (PyType_Ready(&OperatorType) < 0)
return -1;
if (PyType_Ready(&WOperatorType) < 0)
return -1;
if (PyType_Ready(&SplineType) < 0)
return -1;
if (PyType_Ready(&TransformerType) < 0)
return -1;
if (PyType_Ready(&XCFunctionalType) < 0)
return -1;
if (PyType_Ready(&lxcXCFunctionalType) < 0)
return -1;
PyObject* m = Py_InitModule3("_gpaw", functions,
"C-extension for GPAW\n\n...\n");
if (m == NULL)
return -1;
Py_INCREF(&MPIType);
PyModule_AddObject(m, "Communicator", (PyObject *)&MPIType);
// Add initial time to _gpaw object
PyModule_AddObject(m, "time0", PyFloat_FromDouble(t0));
Py_INCREF(&LFCType);
Py_INCREF(&LocalizedFunctionsType);
Py_INCREF(&OperatorType);
Py_INCREF(&WOperatorType);
Py_INCREF(&SplineType);
Py_INCREF(&TransformerType);
Py_INCREF(&XCFunctionalType);
Py_INCREF(&lxcXCFunctionalType);
#ifdef GPAW_WITH_HDF5
init_gpaw_hdf5();
#endif
import_array1(-1);
MPI_Barrier(MPI_COMM_WORLD);
#ifdef CRAYPAT
PAT_region_end(1);
PAT_region_begin(2, "all other");
#endif
status = Py_Main(argc, argv);
#ifdef CRAYPAT
PAT_region_end(2);
#endif
#ifdef GPAW_PERFORMANCE_REPORT
gpaw_perf_finalize();
#endif
MPI_Finalize();
return status;
}
#endif // GPAW_INTERPRETER
gpaw-0.11.0.13004/c/transformers.c 0000664 0001750 0001750 00000015607 12553643466 016605 0 ustar jensj jensj 0000000 0000000 /* Copyright (C) 2003-2007 CAMP
* Copyright (C) 2007-2008 CAMd
* Copyright (C) 2005-2009 CSC - IT Center for Science Ltd.
* Please see the accompanying LICENSE file for further information. */
#include
#include
#define PY_ARRAY_UNIQUE_SYMBOL GPAW_ARRAY_API
#define NO_IMPORT_ARRAY
#include
#include "extensions.h"
#include "bc.h"
#include "mympi.h"
#include "bmgs/bmgs.h"
#ifdef GPAW_ASYNC
#define GPAW_ASYNC_D 3
#else
#define GPAW_ASYNC_D 1
#endif
typedef struct
{
PyObject_HEAD
boundary_conditions* bc;
int p;
int k;
bool interpolate;
MPI_Request recvreq[2];
MPI_Request sendreq[2];
int skip[3][2];
int size_out[3]; /* Size of the output grid */
} TransformerObject;
static void Transformer_dealloc(TransformerObject *self)
{
free(self->bc);
PyObject_DEL(self);
}
struct transapply_args{
int thread_id;
TransformerObject *self;
int ng;
int ng2;
int nin;
int nthds;
const double* in;
double* out;
int real;
const double_complex* ph;
};
void *transapply_worker(void *threadarg)
{
struct transapply_args *args = (struct transapply_args *) threadarg;
boundary_conditions* bc = args->self->bc;
TransformerObject *self = args->self;
double* sendbuf = GPAW_MALLOC(double, bc->maxsend * GPAW_ASYNC_D);
double* recvbuf = GPAW_MALLOC(double, bc->maxrecv * GPAW_ASYNC_D);
double* buf = GPAW_MALLOC(double, args->ng2);
int buf2size = args->ng2;
if (self->interpolate)
buf2size *= 16;
else
buf2size /= 2;
double* buf2 = GPAW_MALLOC(double, buf2size);
MPI_Request recvreq[2 * GPAW_ASYNC_D];
MPI_Request sendreq[2 * GPAW_ASYNC_D];
int chunksize = args->nin / args->nthds;
if (!chunksize)
chunksize = 1;
int nstart = args->thread_id * chunksize;
if (nstart >= args->nin)
return NULL;
int nend = nstart + chunksize;
if (nend > args->nin)
nend = args->nin;
int out_ng = bc->ndouble * self->size_out[0] * self->size_out[1]
* self->size_out[2];
for (int n = nstart; n < nend; n++)
{
const double* in = args->in + n * args->ng;
double* out = args->out + n * out_ng;
for (int i = 0; i < 3; i++)
{
bc_unpack1(bc, in, buf, i,
recvreq, sendreq,
recvbuf, sendbuf, args->ph + 2 * i,
args->thread_id, 1);
bc_unpack2(bc, buf, i,
recvreq, sendreq, recvbuf, 1);
}
if (args->real)
{
if (self->interpolate)
bmgs_interpolate(self->k, self->skip, buf, bc->size2,
out, buf2);
else
bmgs_restrict(self->k, buf, bc->size2,
out, buf2);
}
else
{
if (self->interpolate)
bmgs_interpolatez(self->k, self->skip, (double_complex*)buf,
bc->size2, (double_complex*)out,
(double_complex*) buf2);
else
bmgs_restrictz(self->k, (double_complex*) buf,
bc->size2, (double_complex*)out,
(double_complex*) buf2);
}
}
free(buf2);
free(buf);
free(recvbuf);
free(sendbuf);
return NULL;
}
static PyObject* Transformer_apply(TransformerObject *self, PyObject *args)
{
PyArrayObject* input;
PyArrayObject* output;
PyArrayObject* phases = 0;
if (!PyArg_ParseTuple(args, "OO|O", &input, &output, &phases))
return NULL;
int nin = 1;
if (PyArray_NDIM(input) == 4)
nin = PyArray_DIMS(input)[0];
boundary_conditions* bc = self->bc;
const int* size1 = bc->size1;
const int* size2 = bc->size2;
int ng = bc->ndouble * size1[0] * size1[1] * size1[2];
int ng2 = bc->ndouble * size2[0] * size2[1] * size2[2];
const double* in = DOUBLEP(input);
double* out = DOUBLEP(output);
bool real = (PyArray_DESCR(input)->type_num == NPY_DOUBLE);
const double_complex* ph = (real ? 0 : COMPLEXP(phases));
int nthds = 1;
#ifdef GPAW_OMP
if (getenv("OMP_NUM_THREADS") != NULL)
nthds = atoi(getenv("OMP_NUM_THREADS"));
#endif
struct transapply_args *wargs = GPAW_MALLOC(struct transapply_args, nthds);
pthread_t *thds = GPAW_MALLOC(pthread_t, nthds);
for(int i=0; i < nthds; i++)
{
(wargs+i)->thread_id = i;
(wargs+i)->nthds = nthds;
(wargs+i)->self = self;
(wargs+i)->ng = ng;
(wargs+i)->ng2 = ng2;
(wargs+i)->nin = nin;
(wargs+i)->in = in;
(wargs+i)->out = out;
(wargs+i)->real = real;
(wargs+i)->ph = ph;
}
#ifdef GPAW_OMP
for(int i=1; i < nthds; i++)
pthread_create(thds + i, NULL, transapply_worker, (void*) (wargs+i));
#endif
transapply_worker(wargs);
#ifdef GPAW_OMP
for(int i=1; i < nthds; i++)
pthread_join(*(thds+i), NULL);
#endif
free(wargs);
free(thds);
Py_RETURN_NONE;
}
static PyObject * Transformer_get_async_sizes(TransformerObject *self, PyObject *args)
{
if (!PyArg_ParseTuple(args, ""))
return NULL;
#ifdef GPAW_ASYNC
return Py_BuildValue("(ii)", 1, GPAW_ASYNC_D);
#else
return Py_BuildValue("(ii)", 0, GPAW_ASYNC_D);
#endif
}
static PyMethodDef Transformer_Methods[] = {
{"apply", (PyCFunction)Transformer_apply, METH_VARARGS, NULL},
{"get_async_sizes",
(PyCFunction)Transformer_get_async_sizes, METH_VARARGS, NULL},
{NULL, NULL, 0, NULL}
};
PyTypeObject TransformerType = {
PyVarObject_HEAD_INIT(NULL, 0)
"Transformer",
sizeof(TransformerObject),
0,
(destructor)Transformer_dealloc,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,
"Transformer object",
0, 0, 0, 0, 0, 0,
Transformer_Methods
};
PyObject * NewTransformerObject(PyObject *obj, PyObject *args)
{
PyArrayObject* size_in;
PyArrayObject* size_out;
int k;
PyArrayObject* paddings;
PyArrayObject* npaddings;
PyArrayObject* skip;
PyArrayObject* neighbors;
int real;
PyObject* comm_obj;
int interpolate;
if (!PyArg_ParseTuple(args, "OOiOOOOiOi",
&size_in, &size_out, &k, &paddings, &npaddings, &skip,
&neighbors, &real, &comm_obj,
&interpolate))
return NULL;
TransformerObject* self = PyObject_NEW(TransformerObject, &TransformerType);
if (self == NULL)
return NULL;
self->k = k;
self->interpolate = interpolate;
MPI_Comm comm = MPI_COMM_NULL;
if (comm_obj != Py_None)
comm = ((MPIObject*)comm_obj)->comm;
const long (*nb)[2] = (const long (*)[2])LONGP(neighbors);
const long (*pad)[2] = (const long (*)[2])LONGP(paddings);
const long (*npad)[2] = (const long (*)[2])LONGP(npaddings);
const long (*skp)[2] = (const long (*)[2])LONGP(skip);
self->bc = bc_init(LONGP(size_in), pad, npad, nb, comm, real, 0);
for (int c = 0; c < 3; c++)
self->size_out[c] = LONGP(size_out)[c];
for (int c = 0; c < 3; c++)
for (int d = 0; d < 2; d++)
self->skip[c][d] = (int)skp[c][d];
return (PyObject*)self;
}
gpaw-0.11.0.13004/c/wigner_seitz.c 0000664 0001750 0001750 00000003063 12553643466 016562 0 ustar jensj jensj 0000000 0000000 #include "extensions.h"
#include
double distance(double *a, double *b);
// returns the squared distance between a 3d double vector
// and a 3d int vector
double distance3d2_di(double *a, int *b)
{
double sum = 0;
double diff;
for (int c = 0; c < 3; c++) {
diff = a[c] - (double)b[c];
sum += diff*diff;
}
return sum;
}
PyObject *exterior_electron_density_region(PyObject *self, PyObject *args)
{
PyArrayObject* ai;
PyArrayObject* aatom_c;
PyArrayObject* beg_c;
PyArrayObject* end_c;
PyArrayObject* hh_c;
PyArrayObject* vdWrad;
if (!PyArg_ParseTuple(args, "OOOOOO", &ai, &aatom_c,
&beg_c, &end_c, &hh_c, &vdWrad))
return NULL;
long *aindex = LONGP(ai);
int natoms = PyArray_DIM(aatom_c, 0);
double *atom_c = DOUBLEP(aatom_c);
long *beg = LONGP(beg_c);
long *end = LONGP(end_c);
double *h_c = DOUBLEP(hh_c);
double *vdWradius = DOUBLEP(vdWrad);
int n[3], ij;
double pos[3];
for (int c = 0; c < 3; c++) { n[c] = end[c] - beg[c]; }
// loop over all points
for (int i = 0; i < n[0]; i++) {
pos[0] = (beg[0] + i) * h_c[0];
for (int j = 0; j < n[1]; j++) {
pos[1] = (beg[1] + j) * h_c[1];
ij = (i*n[1] + j)*n[2];
for (int k = 0; k < n[2]; k++) {
pos[2] = (beg[2] + k) * h_c[2];
aindex[ij + k] = (long) 1; /* assume outside the structure */
// loop over all atoms
for (int a=0; a < natoms; a++) {
double d = distance(atom_c + a*3, pos);
if (d < vdWradius[a]) {
aindex[ij + k] = (long) 0; /* this is inside */
a = natoms;
}
}
}
}
}
Py_RETURN_NONE;
}
gpaw-0.11.0.13004/c/point_charges.c 0000664 0001750 0001750 00000004267 12553643466 016705 0 ustar jensj jensj 0000000 0000000 #include "extensions.h"
#include
double distance(double *a, double *b);
double pc_pot_value(double *pos, // position [Bohr]
double *pc_pos, // PC positions [Bohr]
double *pc_q, // PC charges [atomic units]
int npc, // # of PCs
double v_max) // max. potential
// loop over all point charges and add their potentials
{
double V = 0.0;
for (int a=0; a < npc; a++) {
double d = distance(pc_pos + a*3, pos);
double v = v_max;
if(d != 0.0) { v = MIN(v_max, 1. / d); }
V -= pc_q[a] * v;
}
return V;
}
PyObject *pc_potential_value(PyObject *self, PyObject *args)
{
PyArrayObject* posi_c;
PyArrayObject* pci_nc;
PyArrayObject* qi_n;
if (!PyArg_ParseTuple(args, "OOO", &posi_c, &pci_nc, &qi_n))
return NULL;
double *pos_c = DOUBLEP(posi_c);
int npc = PyArray_DIMS(pci_nc)[0];
double *pc_nc = DOUBLEP(pci_nc);
double *q_n = DOUBLEP(qi_n);
return Py_BuildValue("d", pc_pot_value(pos_c, pc_nc, q_n, npc, 1.e+99));
}
PyObject *pc_potential(PyObject *self, PyObject *args)
{
PyArrayObject* poti;
PyArrayObject* pci_nc;
PyArrayObject* beg_c;
PyArrayObject* end_c;
PyArrayObject* hh_c;
PyArrayObject* qi_n;
if (!PyArg_ParseTuple(args, "OOOOOO", &poti, &pci_nc, &qi_n,
&beg_c, &end_c, &hh_c))
return NULL;
double *pot = DOUBLEP(poti);
int npc = PyArray_DIMS(pci_nc)[0];
double *pc_nc = DOUBLEP(pci_nc);
long *beg = LONGP(beg_c);
long *end = LONGP(end_c);
double *h_c = DOUBLEP(hh_c);
double *q_n = DOUBLEP(qi_n);
// cutoff to avoid singularities
// = Coulomb integral over a ball of the same volume
// as the volume element of the grid
double dV = h_c[0] * h_c[1] * h_c[2];
double v_max = 2.417988 / cbrt(dV);
// double v_max = .5;
int n[3], ij;
double pos[3];
for (int c = 0; c < 3; c++) { n[c] = end[c] - beg[c]; }
// loop over all points
for (int i = 0; i < n[0]; i++) {
pos[0] = (beg[0] + i) * h_c[0];
for (int j = 0; j < n[1]; j++) {
pos[1] = (beg[1] + j) * h_c[1];
ij = (i*n[1] + j)*n[2];
for (int k = 0; k < n[2]; k++) {
pos[2] = (beg[2] + k) * h_c[2];
pot[ij + k] = pc_pot_value(pos, pc_nc, q_n, npc, v_max);
}
}
}
Py_RETURN_NONE;
}
gpaw-0.11.0.13004/c/lfc.h 0000664 0001750 0001750 00000010610 12553643466 014616 0 ustar jensj jensj 0000000 0000000 /* Copyright (C) 2003-2007 CAMP
* Copyright (C) 2007-2009 CAMd
* Please see the accompanying LICENSE file for further information. */
#ifndef LFC_H
#define LFC_H
#include
typedef struct
{
const double* A_gm; // function values
int nm; // number of functions (2*l+1)
int M; // global number of first function
int W; // volume number
} LFVolume;
typedef struct
{
PyObject_HEAD
double dv; // volume per grid point
int nW; // number of volumes
int nB; // number of boundary points
double* work_gm; // work space
LFVolume* volume_W; // pointers to volumes
LFVolume* volume_i; // pointers to volumes at current grid point
int* G_B; // boundary grid points
int* W_B; // volume numbers
int* i_W; // mapping from all volumes to current volumes
int* ngm_W; // number of grid points per volume
bool bloch_boundary_conditions; // Gamma-point calculation?
complex double* phase_kW; // phase factors: exp(ik.R)
complex double* phase_i; // phase factors for current volumes
} LFCObject;
#define GRID_LOOP_START(lfc, k) \
{ \
int* G_B = lfc->G_B; \
int* W_B = lfc->W_B; \
int* i_W = lfc->i_W; \
complex double* phase_i = lfc->phase_i; \
LFVolume* volume_i = lfc->volume_i; \
LFVolume* volume_W = lfc->volume_W; \
double complex* phase_W = lfc->phase_kW + k * lfc->nW; \
int Ga = 0; \
int ni = 0; \
for (int B = 0; B < lfc->nB; B++) \
{ \
int Gb = G_B[B]; \
int nG = Gb - Ga; \
if (nG > 0) \
{
#define GRID_LOOP_STOP(lfc, k) \
for (int i = 0; i < ni; i++) \
volume_i[i].A_gm += nG * volume_i[i].nm; \
} \
int Wnew = W_B[B]; \
if (Wnew >= 0) \
{ \
/* Entering new sphere: */ \
volume_i[ni] = volume_W[Wnew]; \
if (k >= 0) \
phase_i[ni] = phase_W[Wnew]; \
i_W[Wnew] = ni; \
ni++; \
} \
else \
{ \
/* Leaving sphere: */ \
int Wold = -1 - Wnew; \
int iold = i_W[Wold]; \
volume_W[Wold].A_gm = volume_i[iold].A_gm; \
ni--; \
volume_i[iold] = volume_i[ni]; \
if (k >= 0) \
phase_i[iold] = phase_i[ni]; \
int Wlast = volume_i[iold].W; \
i_W[Wlast] = iold; \
} \
Ga = Gb; \
} \
for (int W = 0; W < lfc->nW; W++) \
volume_W[W].A_gm -= lfc->ngm_W[W]; \
}
#endif
gpaw-0.11.0.13004/c/woperators.c 0000664 0001750 0001750 00000044536 12553643466 016270 0 ustar jensj jensj 0000000 0000000 /* This file (woperators.c) is a modified copy of operators.c
* with added support for nonlocal operator weights.
* The original copyright note of operators.c follows:
* Copyright (C) 2003-2007 CAMP
* Copyright (C) 2007-2008 CAMd
* Copyright (C) 2005 CSC - IT Center for Science Ltd.
* Please see the accompanying LICENSE file for further information. */
//*** The apply operator and some associate structors are imple- ***//
//*** mented in two version: a original version and a speciel ***//
//*** OpenMP version. By default the original version will ***//
//*** be used, but it's possible to use the OpenMP version ***//
//*** by compiling gpaw with the macro GPAW_OMP defined and ***//
//*** and the compile/link option "-fopenmp". ***//
//*** Author of the optimized OpenMP code: ***//
//*** Mads R. B. Kristensen - madsbk@diku.dk ***//
#include
#define PY_ARRAY_UNIQUE_SYMBOL GPAW_ARRAY_API
#define NO_IMPORT_ARRAY
#include
#include
#include
#include "extensions.h"
#include "bc.h"
#include "mympi.h"
#ifdef GPAW_ASYNC
#define GPAW_ASYNC3 3
#define GPAW_ASYNC2 2
#else
#define GPAW_ASYNC3 1
#define GPAW_ASYNC2 1
#endif
typedef struct
{
PyObject_HEAD
int nweights;
const double** weights;
bmgsstencil* stencils;
boundary_conditions* bc;
MPI_Request recvreq[2];
MPI_Request sendreq[2];
} WOperatorObject;
static void WOperator_dealloc(WOperatorObject *self)
{
free(self->bc);
for (int i = 0; i < self->nweights; i++)
{
free(self->stencils[i].coefs);
free(self->stencils[i].offsets);
}
free(self->stencils);
free(self->weights);
PyObject_DEL(self);
}
static PyObject * WOperator_relax(WOperatorObject *self,
PyObject *args)
{
int relax_method;
PyArrayObject* func;
PyArrayObject* source;
int nrelax;
double w = 1.0;
if (!PyArg_ParseTuple(args, "iOOi|d", &relax_method, &func, &source,
&nrelax, &w))
return NULL;
const boundary_conditions* bc = self->bc;
double* fun = DOUBLEP(func);
const double* src = DOUBLEP(source);
const double_complex* ph;
const int* size2 = bc->size2;
double* buf = (double*) GPAW_MALLOC(double, size2[0] * size2[1] * size2[2] *
bc->ndouble);
double* sendbuf = (double*) GPAW_MALLOC(double, bc->maxsend);
double* recvbuf = (double*) GPAW_MALLOC(double, bc->maxrecv);
const double** weights = (const double**) GPAW_MALLOC(double*, self->nweights);
ph = 0;
for (int n = 0; n < nrelax; n++ )
{
for (int i = 0; i < 3; i++)
{
bc_unpack1(bc, fun, buf, i,
self->recvreq, self->sendreq,
recvbuf, sendbuf, ph + 2 * i, 0, 1);
bc_unpack2(bc, buf, i,
self->recvreq, self->sendreq, recvbuf, 1);
}
for (int iw = 0; iw < self->nweights; iw++)
weights[iw] = self->weights[iw];
bmgs_wrelax(relax_method, self->nweights, self->stencils, weights, buf, fun, src, w);
}
free(weights);
free(recvbuf);
free(sendbuf);
free(buf);
Py_RETURN_NONE;
}
struct wapply_args{
int thread_id;
WOperatorObject *self;
int ng;
int ng2;
int nin;
int nthds;
int chunksize;
int chunkinc;
const double* in;
double* out;
int real;
const double_complex* ph;
};
//Plain worker
void *wapply_worker(void *threadarg)
{
struct wapply_args *args = (struct wapply_args *) threadarg;
boundary_conditions* bc = args->self->bc;
MPI_Request recvreq[2];
MPI_Request sendreq[2];
int chunksize = args->nin / args->nthds;
if (!chunksize)
chunksize = 1;
int nstart = args->thread_id * chunksize;
if (nstart >= args->nin)
return NULL;
int nend = nstart + chunksize;
if (nend > args->nin)
nend = args->nin;
if (chunksize > args->chunksize)
chunksize = args->chunksize;
double* sendbuf = (double*) GPAW_MALLOC(double, bc->maxsend * args->chunksize);
double* recvbuf = (double*) GPAW_MALLOC(double, bc->maxrecv * args->chunksize);
double* buf = (double*) GPAW_MALLOC(double, args->ng2 * args->chunksize);
const double** weights = (const double**) GPAW_MALLOC(double*, args->self->nweights);
for (int n = nstart; n < nend; n += chunksize)
{
if (n + chunksize >= nend && chunksize > 1)
chunksize = nend - n;
const double* in = args->in + n * args->ng;
double* out = args->out + n * args->ng;
for (int i = 0; i < 3; i++)
{
bc_unpack1(bc, in, buf, i,
recvreq, sendreq,
recvbuf, sendbuf, args->ph + 2 * i,
args->thread_id, chunksize);
bc_unpack2(bc, buf, i, recvreq, sendreq, recvbuf, chunksize);
}
for (int m = 0; m < chunksize; m++)
{
for (int iw = 0; iw < args->self->nweights; iw++)
weights[iw] = args->self->weights[iw] + m * args->ng2;
if (args->real)
bmgs_wfd(args->self->nweights, args->self->stencils, weights,
buf + m * args->ng2, out + m * args->ng);
else
bmgs_wfdz(args->self->nweights, args->self->stencils, weights,
(const double_complex*) (buf + m * args->ng2),
(double_complex*) (out + m * args->ng));
}
}
free(weights);
free(buf);
free(recvbuf);
free(sendbuf);
return NULL;
}
//Async worker
void *wapply_worker_cfd_async(void *threadarg)
{
struct wapply_args *args = (struct wapply_args *) threadarg;
boundary_conditions* bc = args->self->bc;
MPI_Request recvreq[2 * GPAW_ASYNC3];
MPI_Request sendreq[2 * GPAW_ASYNC3];
int chunksize = args->nin / args->nthds;
if (!chunksize)
chunksize = 1;
int nstart = args->thread_id * chunksize;
if (nstart >= args->nin)
return NULL;
int nend = nstart + chunksize;
if (nend > args->nin)
nend = args->nin;
if (chunksize > args->chunksize)
chunksize = args->chunksize;
double* sendbuf = (double*) GPAW_MALLOC(double, bc->maxsend * GPAW_ASYNC3 *
args->chunksize);
double* recvbuf = (double*) GPAW_MALLOC(double, bc->maxrecv * GPAW_ASYNC3 *
args->chunksize);
double* buf = (double*) GPAW_MALLOC(double, args->ng2 * args->chunksize);
const double** weights = (const double**) GPAW_MALLOC(double*, args->self->nweights);
for (int n = nstart; n < nend; n += chunksize)
{
if (n + chunksize >= nend && chunksize > 1)
chunksize = nend - n;
const double* in = args->in + n * args->ng;
double* out = args->out + n * args->ng;
for (int i = 0; i < 3; i++)
{
bc_unpack1(bc, in, buf, i,
recvreq + i * 2, sendreq + i * 2,
recvbuf + i * bc->maxrecv * chunksize,
sendbuf + i * bc->maxsend * chunksize, args->ph + 2 * i,
args->thread_id, chunksize);
}
for (int i = 0; i < 3; i++)
{
bc_unpack2(bc, buf, i,
recvreq + i * 2, sendreq + i * 2,
recvbuf + i * bc->maxrecv * chunksize, chunksize);
}
for (int m = 0; m < chunksize; m++)
{
for (int iw = 0; iw < args->self->nweights; iw++)
weights[iw] = args->self->weights[iw] + m * args->ng2;
if (args->real)
bmgs_wfd(args->self->nweights, args->self->stencils, weights,
buf + m * args->ng2, out + m * args->ng);
else
bmgs_wfdz(args->self->nweights, args->self->stencils, weights,
(const double_complex*) (buf + m * args->ng2),
(double_complex*) (out + m * args->ng));
}
}
free(weights);
free(buf);
free(recvbuf);
free(sendbuf);
return NULL;
}
//Double buffering async worker
void *wapply_worker_cfd(void *threadarg)
{
struct wapply_args *args = (struct wapply_args *) threadarg;
boundary_conditions* bc = args->self->bc;
MPI_Request recvreq[2 * GPAW_ASYNC3 * GPAW_ASYNC2];
MPI_Request sendreq[2 * GPAW_ASYNC3 * GPAW_ASYNC2];
int chunksize = args->nin / args->nthds;
if (!chunksize)
chunksize = 1;
int nstart = args->thread_id * chunksize;
if (nstart >= args->nin)
return NULL;
int nend = nstart + chunksize;
if (nend > args->nin)
nend = args->nin;
if (chunksize > args->chunksize)
chunksize = args->chunksize;
int chunk = args->chunkinc;
if (chunk > chunksize);
chunk = chunksize;
double* sendbuf = (double*) GPAW_MALLOC(double, bc->maxsend * args->chunksize
* GPAW_ASYNC3 * GPAW_ASYNC2);
double* recvbuf = (double*) GPAW_MALLOC(double, bc->maxrecv * args->chunksize
* GPAW_ASYNC3 * GPAW_ASYNC2);
double* buf = (double*) GPAW_MALLOC(double, args->ng2 * args->chunksize * GPAW_ASYNC2);
const double** weights = (const double**) GPAW_MALLOC(double*, args->self->nweights);
int odd = 0;
const double* in = args->in + nstart * args->ng;
double* out;
for (int i = 0; i < 3; i++)
bc_unpack1(bc, in, buf + odd * args->ng2 * chunksize, i,
recvreq + odd * 2 + i * 4, sendreq + odd * 2 + i * 4,
recvbuf + odd * bc->maxrecv * chunksize + i * bc->maxrecv * chunksize * GPAW_ASYNC2,
sendbuf + odd * bc->maxsend * chunksize + i * bc->maxsend * chunksize * GPAW_ASYNC2, args->ph + 2 * i,
args->thread_id, chunk);
odd = odd ^ 1;
int last_chunk = chunk;
for (int n = nstart+chunk; n < nend; n += chunk)
{
last_chunk += args->chunkinc;
if (last_chunk > chunksize);
last_chunk = chunksize;
if (n + last_chunk >= nend && last_chunk > 1)
last_chunk = nend - n;
in = args->in + n * args->ng;
out = args->out + (n-chunk) * args->ng;
for (int i = 0; i < 3; i++)
{
bc_unpack1(bc, in, buf + odd * args->ng2 * chunksize, i,
recvreq + odd * 2 + i * 4, sendreq + odd * 2 + i * 4,
recvbuf + odd * bc->maxrecv * chunksize + i * bc->maxrecv * chunksize * GPAW_ASYNC2,
sendbuf + odd * bc->maxsend * chunksize + i * bc->maxsend * chunksize * GPAW_ASYNC2, args->ph + 2 * i,
args->thread_id, last_chunk);
}
odd = odd ^ 1;
for (int i = 0; i < 3; i++)
{
bc_unpack2(bc, buf + odd * args->ng2 * chunksize, i,
recvreq + odd * 2 + i * 4, sendreq + odd * 2 + i * 4,
recvbuf + odd * bc->maxrecv * chunksize + i * bc->maxrecv * chunksize * GPAW_ASYNC2, chunk);
}
for (int m = 0; m < chunk; m++)
{
for (int iw = 0; iw < args->self->nweights; iw++)
weights[iw] = args->self->weights[iw] + m * args->ng2 + odd * args->ng2 * chunksize;
if (args->real)
bmgs_wfd(args->self->nweights, args->self->stencils, weights,
buf + m * args->ng2 + odd * args->ng2 * chunksize,
out + m * args->ng);
else
bmgs_wfdz(args->self->nweights, args->self->stencils, weights,
(const double_complex*) (buf + m * args->ng2 + odd * args->ng2 * chunksize),
(double_complex*) (out + m * args->ng));
}
chunk = last_chunk;
}
odd = odd ^ 1;
out = args->out + (nend-last_chunk) * args->ng;
for (int i = 0; i < 3; i++)
{
bc_unpack2(bc, buf + odd * args->ng2 * chunksize, i,
recvreq + odd * 2 + i * 4, sendreq + odd * 2 + i * 4,
recvbuf + odd * bc->maxrecv * chunksize + i * bc->maxrecv * chunksize * GPAW_ASYNC2, last_chunk);
}
for (int m = 0; m < last_chunk; m++)
{
for (int iw = 0; iw < args->self->nweights; iw++)
weights[iw] = args->self->weights[iw] + m * args->ng2 + odd * args->ng2 * chunksize;
if (args->real)
bmgs_wfd(args->self->nweights, args->self->stencils, weights,
buf + m * args->ng2 + odd * args->ng2 * chunksize,
out + m * args->ng);
else
bmgs_wfdz(args->self->nweights, args->self->stencils, weights,
(const double_complex*) (buf + m * args->ng2 + odd * args->ng2 * chunksize),
(double_complex*) (out + m * args->ng));
}
free(weights);
free(buf);
free(recvbuf);
free(sendbuf);
return NULL;
}
static PyObject * WOperator_apply(WOperatorObject *self,
PyObject *args)
{
PyArrayObject* input;
PyArrayObject* output;
PyArrayObject* phases = 0;
if (!PyArg_ParseTuple(args, "OO|O", &input, &output, &phases))
return NULL;
int nin = 1;
if (PyArray_NDIM(input) == 4)
nin = PyArray_DIMS(input)[0];
boundary_conditions* bc = self->bc;
const int* size1 = bc->size1;
const int* size2 = bc->size2;
int ng = bc->ndouble * size1[0] * size1[1] * size1[2];
int ng2 = bc->ndouble * size2[0] * size2[1] * size2[2];
const double* in = DOUBLEP(input);
double* out = DOUBLEP(output);
const double_complex* ph;
bool real = (PyArray_DESCR(input)->type_num == NPY_DOUBLE);
if (real)
ph = 0;
else
ph = COMPLEXP(phases);
int chunksize = 1;
if (getenv("GPAW_CHUNK_SIZE") != NULL)
chunksize = atoi(getenv("GPAW_CHUNK_SIZE"));
int chunkinc = chunksize;
if (getenv("GPAW_CHUNK_INC") != NULL)
chunkinc = atoi(getenv("GPAW_CHUNK_INC"));
int nthds = 1;
#ifdef GPAW_OMP
if (getenv("OMP_NUM_THREADS") != NULL)
nthds = atoi(getenv("OMP_NUM_THREADS"));
#endif
struct wapply_args *wargs = GPAW_MALLOC(struct wapply_args, nthds);
pthread_t *thds = GPAW_MALLOC(pthread_t, nthds);
for(int i=0; i < nthds; i++)
{
(wargs+i)->thread_id = i;
(wargs+i)->nthds = nthds;
(wargs+i)->chunksize = chunksize;
(wargs+i)->chunkinc = chunkinc;
(wargs+i)->self = self;
(wargs+i)->ng = ng;
(wargs+i)->ng2 = ng2;
(wargs+i)->nin = nin;
(wargs+i)->in = in;
(wargs+i)->out = out;
(wargs+i)->real = real;
(wargs+i)->ph = ph;
}
#ifndef GPAW_ASYNC
if (1)
#else
if (bc->cfd == 0)
#endif
{
#ifdef GPAW_OMP
for(int i=1; i < nthds; i++)
pthread_create(thds + i, NULL, wapply_worker, (void*) (wargs+i));
#endif
wapply_worker(wargs);
}
else
{
#ifdef GPAW_OMP
for(int i=1; i < nthds; i++)
pthread_create(thds + i, NULL, wapply_worker_cfd, (void*) (wargs+i));
#endif
wapply_worker_cfd(wargs);
}
#ifdef GPAW_OMP
for(int i=1; i < nthds; i++)
pthread_join(*(thds+i), NULL);
#endif
free(wargs);
free(thds);
Py_RETURN_NONE;
}
static PyObject * WOperator_get_diagonal_element(WOperatorObject *self,
PyObject *args)
{
if (!PyArg_ParseTuple(args, ""))
return NULL;
const double** weights = (const double**) GPAW_MALLOC(double*, self->nweights);
for (int iw = 0; iw < self->nweights; iw++)
weights[iw] = self->weights[iw];
const int n0 = self->stencils[0].n[0];
const int n1 = self->stencils[0].n[1];
const int n2 = self->stencils[0].n[2];
double d = 0.0;
for (int i0 = 0; i0 < n0; i0++)
{
for (int i1 = 0; i1 < n1; i1++)
{
for (int i2 = 0; i2 < n2; i2++)
{
double coef = 0.0;
for (int iw = 0; iw < self->nweights; iw++)
{
coef += weights[iw][0] * self->stencils[iw].coefs[0];
weights[iw]++;
}
if (coef < 0)
coef = -coef;
if (coef > d)
d = coef;
}
}
}
free(weights);
return Py_BuildValue("d", d);
}
static PyObject * WOperator_get_async_sizes(WOperatorObject *self, PyObject *args)
{
if (!PyArg_ParseTuple(args, ""))
return NULL;
#ifdef GPAW_ASYNC
return Py_BuildValue("(iii)", 1, GPAW_ASYNC2, GPAW_ASYNC3);
#else
return Py_BuildValue("(iii)", 0, GPAW_ASYNC2, GPAW_ASYNC3);
#endif
}
static PyMethodDef WOperator_Methods[] = {
{"apply",
(PyCFunction)WOperator_apply, METH_VARARGS, NULL},
{"relax",
(PyCFunction)WOperator_relax, METH_VARARGS, NULL},
{"get_diagonal_element",
(PyCFunction)WOperator_get_diagonal_element, METH_VARARGS, NULL},
{"get_async_sizes",
(PyCFunction)WOperator_get_async_sizes, METH_VARARGS, NULL},
{NULL, NULL, 0, NULL}
};
PyTypeObject WOperatorType = {
PyVarObject_HEAD_INIT(NULL, 0)
"WOperator",
sizeof(WOperatorObject),
0,
(destructor)WOperator_dealloc,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,
"FDW-operator object",
0, 0, 0, 0, 0, 0,
WOperator_Methods
};
PyObject* NewWOperatorObject(PyObject *obj, PyObject *args)
{
PyObject* coefs_list;
PyArrayObject* coefs;
PyObject* offsets_list;
PyArrayObject* offsets;
PyObject* weights_list;
PyArrayObject* weights;
PyArrayObject* size;
PyArrayObject* neighbors;
int real;
PyObject* comm_obj;
int cfd;
int range;
int nweights;
if (!PyArg_ParseTuple(args, "iO!O!O!OiOiOi",
&nweights,
&PyList_Type, &weights_list,
&PyList_Type, &coefs_list,
&PyList_Type, &offsets_list,
&size,
&range,
&neighbors, &real, &comm_obj, &cfd))
return NULL;
WOperatorObject *self = PyObject_NEW(WOperatorObject, &WOperatorType);
if (self == NULL)
return NULL;
self->stencils = (bmgsstencil*) GPAW_MALLOC(bmgsstencil, nweights);
self->weights = (const double**) GPAW_MALLOC(double*, nweights);
self->nweights = nweights;
for (int iw = 0; iw < nweights; iw++)
{
coefs = (PyArrayObject*) PyList_GetItem(coefs_list, iw);
offsets = (PyArrayObject*) PyList_GetItem(offsets_list, iw);
weights = (PyArrayObject*) PyList_GetItem(weights_list, iw);
self->stencils[iw] = bmgs_stencil(PyArray_DIMS(coefs)[0], DOUBLEP(coefs),
LONGP(offsets), range, LONGP(size));
self->weights[iw] = DOUBLEP(weights);
}
const long (*nb)[2] = (const long (*)[2])LONGP(neighbors);
const long padding[3][2] = {{range, range},
{range, range},
{range, range}};
MPI_Comm comm = MPI_COMM_NULL;
if (comm_obj != Py_None)
comm = ((MPIObject*)comm_obj)->comm;
self->bc = bc_init(LONGP(size), padding, padding, nb, comm, real, cfd);
return (PyObject*) self;
}
gpaw-0.11.0.13004/c/operators.c 0000664 0001750 0001750 00000035523 12553643466 016075 0 ustar jensj jensj 0000000 0000000 /* Copyright (C) 2003-2007 CAMP
* Copyright (C) 2007-2008 CAMd
* Copyright (C) 2005 CSC - IT Center for Science Ltd.
* Please see the accompanying LICENSE file for further information. */
//*** The apply operator and some associate structors are imple- ***//
//*** mented in two version: a original version and a speciel ***//
//*** OpenMP version. By default the original version will ***//
//*** be used, but it's possible to use the OpenMP version ***//
//*** by compiling gpaw with the macro GPAW_OMP defined and ***//
//*** and the compile/link option "-fopenmp". ***//
//*** Author of the optimized OpenMP code: ***//
//*** Mads R. B. Kristensen - madsbk@diku.dk ***//
#include
#define PY_ARRAY_UNIQUE_SYMBOL GPAW_ARRAY_API
#define NO_IMPORT_ARRAY
#include
#include
#include
#include "extensions.h"
#include "bc.h"
#include "mympi.h"
#ifdef GPAW_ASYNC
#define GPAW_ASYNC3 3
#define GPAW_ASYNC2 2
#else
#define GPAW_ASYNC3 1
#define GPAW_ASYNC2 1
#endif
typedef struct
{
PyObject_HEAD
bmgsstencil stencil;
boundary_conditions* bc;
MPI_Request recvreq[2];
MPI_Request sendreq[2];
} OperatorObject;
static void Operator_dealloc(OperatorObject *self)
{
free(self->bc);
PyObject_DEL(self);
}
static PyObject * Operator_relax(OperatorObject *self,
PyObject *args)
{
int relax_method;
PyArrayObject* func;
PyArrayObject* source;
int nrelax;
double w = 1.0;
if (!PyArg_ParseTuple(args, "iOOi|d", &relax_method, &func, &source,
&nrelax, &w))
return NULL;
const boundary_conditions* bc = self->bc;
double* fun = DOUBLEP(func);
const double* src = DOUBLEP(source);
const double_complex* ph;
const int* size2 = bc->size2;
double* buf = GPAW_MALLOC(double, size2[0] * size2[1] * size2[2] *
bc->ndouble);
double* sendbuf = GPAW_MALLOC(double, bc->maxsend);
double* recvbuf = GPAW_MALLOC(double, bc->maxrecv);
ph = 0;
for (int n = 0; n < nrelax; n++ )
{
for (int i = 0; i < 3; i++)
{
bc_unpack1(bc, fun, buf, i,
self->recvreq, self->sendreq,
recvbuf, sendbuf, ph + 2 * i, 0, 1);
bc_unpack2(bc, buf, i,
self->recvreq, self->sendreq, recvbuf, 1);
}
bmgs_relax(relax_method, &self->stencil, buf, fun, src, w);
}
free(recvbuf);
free(sendbuf);
free(buf);
Py_RETURN_NONE;
}
struct apply_args{
int thread_id;
OperatorObject *self;
int ng;
int ng2;
int nin;
int nthds;
int chunksize;
int chunkinc;
const double* in;
double* out;
int real;
const double_complex* ph;
};
//Plain worker
void *apply_worker(void *threadarg)
{
struct apply_args *args = (struct apply_args *) threadarg;
boundary_conditions* bc = args->self->bc;
MPI_Request recvreq[2];
MPI_Request sendreq[2];
int chunksize = args->nin / args->nthds;
if (!chunksize)
chunksize = 1;
int nstart = args->thread_id * chunksize;
if (nstart >= args->nin)
return NULL;
int nend = nstart + chunksize;
if (nend > args->nin)
nend = args->nin;
if (chunksize > args->chunksize)
chunksize = args->chunksize;
double* sendbuf = GPAW_MALLOC(double, bc->maxsend * args->chunksize);
double* recvbuf = GPAW_MALLOC(double, bc->maxrecv * args->chunksize);
double* buf = GPAW_MALLOC(double, args->ng2 * args->chunksize);
for (int n = nstart; n < nend; n += chunksize)
{
if (n + chunksize >= nend && chunksize > 1)
chunksize = nend - n;
const double* in = args->in + n * args->ng;
double* out = args->out + n * args->ng;
for (int i = 0; i < 3; i++)
{
bc_unpack1(bc, in, buf, i,
recvreq, sendreq,
recvbuf, sendbuf, args->ph + 2 * i,
args->thread_id, chunksize);
bc_unpack2(bc, buf, i, recvreq, sendreq, recvbuf, chunksize);
}
for (int m = 0; m < chunksize; m++)
if (args->real)
bmgs_fd(&args->self->stencil, buf + m * args->ng2, out + m * args->ng);
else
bmgs_fdz(&args->self->stencil, (const double_complex*) (buf + m * args->ng2),
(double_complex*) (out + m * args->ng));
}
free(buf);
free(recvbuf);
free(sendbuf);
return NULL;
}
//Async worker
void *apply_worker_cfd_async(void *threadarg)
{
struct apply_args *args = (struct apply_args *) threadarg;
boundary_conditions* bc = args->self->bc;
MPI_Request recvreq[2 * GPAW_ASYNC3];
MPI_Request sendreq[2 * GPAW_ASYNC3];
int chunksize = args->nin / args->nthds;
if (!chunksize)
chunksize = 1;
int nstart = args->thread_id * chunksize;
if (nstart >= args->nin)
return NULL;
int nend = nstart + chunksize;
if (nend > args->nin)
nend = args->nin;
if (chunksize > args->chunksize)
chunksize = args->chunksize;
double* sendbuf = GPAW_MALLOC(double, bc->maxsend * GPAW_ASYNC3 *
args->chunksize);
double* recvbuf = GPAW_MALLOC(double, bc->maxrecv * GPAW_ASYNC3 *
args->chunksize);
double* buf = GPAW_MALLOC(double, args->ng2 * args->chunksize);
for (int n = nstart; n < nend; n += chunksize)
{
if (n + chunksize >= nend && chunksize > 1)
chunksize = nend - n;
const double* in = args->in + n * args->ng;
double* out = args->out + n * args->ng;
for (int i = 0; i < 3; i++)
{
bc_unpack1(bc, in, buf, i,
recvreq + i * 2, sendreq + i * 2,
recvbuf + i * bc->maxrecv * chunksize,
sendbuf + i * bc->maxsend * chunksize, args->ph + 2 * i,
args->thread_id, chunksize);
}
for (int i = 0; i < 3; i++)
{
bc_unpack2(bc, buf, i,
recvreq + i * 2, sendreq + i * 2,
recvbuf + i * bc->maxrecv * chunksize, chunksize);
}
for (int m = 0; m < chunksize; m++)
if (args->real)
bmgs_fd(&args->self->stencil, buf + m * args->ng2, out + m * args->ng);
else
bmgs_fdz(&args->self->stencil, (const double_complex*) (buf + m * args->ng2),
(double_complex*) (out + m * args->ng));
}
free(buf);
free(recvbuf);
free(sendbuf);
return NULL;
}
//Double buffering async worker
void *apply_worker_cfd(void *threadarg)
{
struct apply_args *args = (struct apply_args *) threadarg;
boundary_conditions* bc = args->self->bc;
MPI_Request recvreq[2 * GPAW_ASYNC3 * GPAW_ASYNC2];
MPI_Request sendreq[2 * GPAW_ASYNC3 * GPAW_ASYNC2];
int chunksize = args->nin / args->nthds;
if (!chunksize)
chunksize = 1;
int nstart = args->thread_id * chunksize;
if (nstart >= args->nin)
return NULL;
int nend = nstart + chunksize;
if (nend > args->nin)
nend = args->nin;
if (chunksize > args->chunksize)
chunksize = args->chunksize;
int chunk = args->chunkinc;
if (chunk > chunksize);
chunk = chunksize;
double* sendbuf = GPAW_MALLOC(double, bc->maxsend * args->chunksize
* GPAW_ASYNC3 * GPAW_ASYNC2);
double* recvbuf = GPAW_MALLOC(double, bc->maxrecv * args->chunksize
* GPAW_ASYNC3 * GPAW_ASYNC2);
double* buf = GPAW_MALLOC(double, args->ng2 * args->chunksize * GPAW_ASYNC2);
int odd = 0;
const double* in = args->in + nstart * args->ng;
double* out;
for (int i = 0; i < 3; i++)
bc_unpack1(bc, in, buf + odd * args->ng2 * chunksize, i,
recvreq + odd * 2 + i * 4, sendreq + odd * 2 + i * 4,
recvbuf + odd * bc->maxrecv * chunksize + i * bc->maxrecv * chunksize * GPAW_ASYNC2,
sendbuf + odd * bc->maxsend * chunksize + i * bc->maxsend * chunksize * GPAW_ASYNC2, args->ph + 2 * i,
args->thread_id, chunk);
odd = odd ^ 1;
int last_chunk = chunk;
for (int n = nstart+chunk; n < nend; n += chunk)
{
last_chunk += args->chunkinc;
if (last_chunk > chunksize);
last_chunk = chunksize;
if (n + last_chunk >= nend && last_chunk > 1)
last_chunk = nend - n;
in = args->in + n * args->ng;
out = args->out + (n-chunk) * args->ng;
for (int i = 0; i < 3; i++)
{
bc_unpack1(bc, in, buf + odd * args->ng2 * chunksize, i,
recvreq + odd * 2 + i * 4, sendreq + odd * 2 + i * 4,
recvbuf + odd * bc->maxrecv * chunksize + i * bc->maxrecv * chunksize * GPAW_ASYNC2,
sendbuf + odd * bc->maxsend * chunksize + i * bc->maxsend * chunksize * GPAW_ASYNC2, args->ph + 2 * i,
args->thread_id, last_chunk);
}
odd = odd ^ 1;
for (int i = 0; i < 3; i++)
{
bc_unpack2(bc, buf + odd * args->ng2 * chunksize, i,
recvreq + odd * 2 + i * 4, sendreq + odd * 2 + i * 4,
recvbuf + odd * bc->maxrecv * chunksize + i * bc->maxrecv * chunksize * GPAW_ASYNC2, chunk);
}
for (int m = 0; m < chunk; m++)
if (args->real)
bmgs_fd(&args->self->stencil, buf + m * args->ng2 + odd * args->ng2 * chunksize,
out + m * args->ng);
else
bmgs_fdz(&args->self->stencil, (const double_complex*) (buf + m * args->ng2 + odd * args->ng2 * chunksize),
(double_complex*) (out + m * args->ng));
chunk = last_chunk;
}
odd = odd ^ 1;
out = args->out + (nend-last_chunk) * args->ng;
for (int i = 0; i < 3; i++)
{
bc_unpack2(bc, buf + odd * args->ng2 * chunksize, i,
recvreq + odd * 2 + i * 4, sendreq + odd * 2 + i * 4,
recvbuf + odd * bc->maxrecv * chunksize + i * bc->maxrecv * chunksize * GPAW_ASYNC2, last_chunk);
}
for (int m = 0; m < last_chunk; m++)
if (args->real)
bmgs_fd(&args->self->stencil, buf + m * args->ng2 + odd * args->ng2 * chunksize,
out + m * args->ng);
else
bmgs_fdz(&args->self->stencil, (const double_complex*) (buf + m * args->ng2 + odd * args->ng2 * chunksize),
(double_complex*) (out + m * args->ng));
free(buf);
free(recvbuf);
free(sendbuf);
return NULL;
}
static PyObject * Operator_apply(OperatorObject *self,
PyObject *args)
{
PyArrayObject* input;
PyArrayObject* output;
PyArrayObject* phases = 0;
if (!PyArg_ParseTuple(args, "OO|O", &input, &output, &phases))
return NULL;
int nin = 1;
if (PyArray_NDIM(input) == 4)
nin = PyArray_DIMS(input)[0];
boundary_conditions* bc = self->bc;
const int* size1 = bc->size1;
const int* size2 = bc->size2;
int ng = bc->ndouble * size1[0] * size1[1] * size1[2];
int ng2 = bc->ndouble * size2[0] * size2[1] * size2[2];
const double* in = DOUBLEP(input);
double* out = DOUBLEP(output);
const double_complex* ph;
bool real = (PyArray_DESCR(input)->type_num == NPY_DOUBLE);
if (real)
ph = 0;
else
ph = COMPLEXP(phases);
int chunksize = 1;
if (getenv("GPAW_CHUNK_SIZE") != NULL)
chunksize = atoi(getenv("GPAW_CHUNK_SIZE"));
int chunkinc = chunksize;
if (getenv("GPAW_CHUNK_INC") != NULL)
chunkinc = atoi(getenv("GPAW_CHUNK_INC"));
int nthds = 1;
#ifdef GPAW_OMP
if (getenv("OMP_NUM_THREADS") != NULL)
nthds = atoi(getenv("OMP_NUM_THREADS"));
#endif
struct apply_args *wargs = GPAW_MALLOC(struct apply_args, nthds);
pthread_t *thds = GPAW_MALLOC(pthread_t, nthds);
for(int i=0; i < nthds; i++)
{
(wargs+i)->thread_id = i;
(wargs+i)->nthds = nthds;
(wargs+i)->chunksize = chunksize;
(wargs+i)->chunkinc = chunkinc;
(wargs+i)->self = self;
(wargs+i)->ng = ng;
(wargs+i)->ng2 = ng2;
(wargs+i)->nin = nin;
(wargs+i)->in = in;
(wargs+i)->out = out;
(wargs+i)->real = real;
(wargs+i)->ph = ph;
}
#ifndef GPAW_ASYNC
if (1)
#else
if (bc->cfd == 0)
#endif
{
#ifdef GPAW_OMP
for(int i=1; i < nthds; i++)
pthread_create(thds + i, NULL, apply_worker, (void*) (wargs+i));
#endif
apply_worker(wargs);
}
else
{
#ifdef GPAW_OMP
for(int i=1; i < nthds; i++)
pthread_create(thds + i, NULL, apply_worker_cfd, (void*) (wargs+i));
#endif
apply_worker_cfd(wargs);
}
#ifdef GPAW_OMP
for(int i=1; i < nthds; i++)
pthread_join(*(thds+i), NULL);
#endif
free(wargs);
free(thds);
Py_RETURN_NONE;
}
static PyObject * Operator_get_diagonal_element(OperatorObject *self,
PyObject *args)
{
if (!PyArg_ParseTuple(args, ""))
return NULL;
const bmgsstencil* s = &self->stencil;
double d = 0.0;
for (int n = 0; n < s->ncoefs; n++)
if (s->offsets[n] == 0)
d = s->coefs[n];
return Py_BuildValue("d", d);
}
static PyObject * Operator_get_async_sizes(OperatorObject *self, PyObject *args)
{
if (!PyArg_ParseTuple(args, ""))
return NULL;
#ifdef GPAW_ASYNC
return Py_BuildValue("(iii)", 1, GPAW_ASYNC2, GPAW_ASYNC3);
#else
return Py_BuildValue("(iii)", 0, GPAW_ASYNC2, GPAW_ASYNC3);
#endif
}
static PyMethodDef Operator_Methods[] = {
{"apply",
(PyCFunction)Operator_apply, METH_VARARGS, NULL},
{"relax",
(PyCFunction)Operator_relax, METH_VARARGS, NULL},
{"get_diagonal_element",
(PyCFunction)Operator_get_diagonal_element, METH_VARARGS, NULL},
{"get_async_sizes",
(PyCFunction)Operator_get_async_sizes, METH_VARARGS, NULL},
{NULL, NULL, 0, NULL}
};
PyTypeObject OperatorType = {
PyVarObject_HEAD_INIT(NULL, 0)
"Operator",
sizeof(OperatorObject),
0,
(destructor)Operator_dealloc,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,
"FD-operator object",
0, 0, 0, 0, 0, 0,
Operator_Methods
};
PyObject * NewOperatorObject(PyObject *obj, PyObject *args)
{
PyArrayObject* coefs;
PyArrayObject* offsets;
PyArrayObject* size;
int range;
PyArrayObject* neighbors;
int real;
PyObject* comm_obj;
int cfd;
if (!PyArg_ParseTuple(args, "OOOiOiOi",
&coefs, &offsets, &size, &range, &neighbors,
&real, &comm_obj, &cfd))
return NULL;
OperatorObject *self = PyObject_NEW(OperatorObject, &OperatorType);
if (self == NULL)
return NULL;
self->stencil = bmgs_stencil(PyArray_DIMS(coefs)[0], DOUBLEP(coefs),
LONGP(offsets), range, LONGP(size));
const long (*nb)[2] = (const long (*)[2])LONGP(neighbors);
const long padding[3][2] = {{range, range},
{range, range},
{range, range}};
MPI_Comm comm = MPI_COMM_NULL;
if (comm_obj != Py_None)
comm = ((MPIObject*)comm_obj)->comm;
self->bc = bc_init(LONGP(size), padding, padding, nb, comm, real, cfd);
return (PyObject*)self;
}
gpaw-0.11.0.13004/c/xc/ 0000775 0001750 0001750 00000000000 12553644063 014307 5 ustar jensj jensj 0000000 0000000 gpaw-0.11.0.13004/c/xc/xc_mgga.c 0000664 0001750 0001750 00000007424 12553643466 016075 0 ustar jensj jensj 0000000 0000000
#include
#include
#include
#include "xc_mgga.h"
#include "xc_gpaw.h"
extern const mgga_func_info m06l_info;
extern const mgga_func_info tpss_info;
extern const mgga_func_info revtpss_info;
static void init_common(common_params* params, int code, int nspin, const mgga_func_info *finfo) {
params->code = code;
params->nspin = nspin;
params->funcinfo = finfo;
}
void init_mgga(void** params, int code, int nspin) {
const mgga_func_info *finfo;
if (code==20) {
finfo = &tpss_info;
} else if (code==21) {
finfo = &m06l_info;
} else if (code==22) {
finfo = &revtpss_info;
} else {
// this should never happen. forces a crash.
assert(code>=20 && code <=22);
finfo = NULL;
}
*params = malloc(finfo->size);
init_common(*params, code, nspin, finfo);
finfo->init(*params);
}
void end_mgga(common_params *common) {
common->funcinfo->end(common);
free(common);
}
void calc_mgga(void** params, int nspin, int ng,
const double* n_g, const double* sigma_g, const double* tau_g,
double *e_g, double *v_g, double *dedsigma_g, double *dedtau_g) {
common_params *common = (common_params*)*params;
// check for a changed spin (similar to a line in gpaw/libxc.py)
if (nspin!=common->nspin) {
int code = common->code; // save this, since we're about to destroy common
end_mgga(common);
init_mgga(params, code, nspin);
common = (common_params*)*params; // init_mgga changes this
}
if (nspin == 1) {
for (int g = 0; g < ng; g++) {
// kludge n[1] because of the way TPSS was written (requires n[1]=0.0 even for unpolarized)
double n[2];
n[0] = n_g[g];
n[1] = 0.0;
if (n[0] < NMIN) n[0] = NMIN;
// m06l is assuming that there is space for spinpolarized calculation output
// even for non-spin-polarized.
double etmp, vtmp[2], dedsigmatmp[3], dedtautmp[2];
common->funcinfo->exch(*params, n, sigma_g+g, tau_g+g,
&etmp, vtmp, dedsigmatmp, dedtautmp);
e_g[g] = etmp;
v_g[g] += vtmp[0];
dedsigma_g[g] = dedsigmatmp[0];
dedtau_g[g] = dedtautmp[0];
common->funcinfo->corr(*params, n, sigma_g+g, tau_g+g,
&etmp, vtmp, dedsigmatmp, dedtautmp);
e_g[g] += etmp;
e_g[g] *= n[0];
v_g[g] += vtmp[0];
dedsigma_g[g] += dedsigmatmp[0];
dedtau_g[g] += dedtautmp[0];
}
} else {
double etmp, ntmp[2], vtmp[2], sigmatmp[3], dedsigmatmp[3],
tautmp[2], dedtautmp[2];
for (int g = 0; g < ng; g++) {
ntmp[0] = n_g[g];
if (ntmp[0] < NMIN) ntmp[0] = NMIN;
ntmp[1] = n_g[g+ng];
if (ntmp[1] < NMIN) ntmp[1] = NMIN;
sigmatmp[0] = sigma_g[g];
sigmatmp[1] = sigma_g[g+ng];
sigmatmp[2] = sigma_g[g+ng+ng];
tautmp[0] = tau_g[g];
tautmp[1] = tau_g[g+ng];
// kludge: mgga_x_tpss requires dedsigma[1] set to 0, since it doesn't calculate it.
dedsigmatmp[1]=0.0;
common->funcinfo->exch(*params, ntmp, sigmatmp, tautmp,
&etmp, vtmp, dedsigmatmp, dedtautmp);
e_g[g] = etmp;
v_g[g] += vtmp[0];
v_g[g+ng] += vtmp[1];
dedsigma_g[g] = dedsigmatmp[0];
dedsigma_g[g+ng] = dedsigmatmp[1];
dedsigma_g[g+ng+ng] = dedsigmatmp[2];
dedtau_g[g] = dedtautmp[0];
dedtau_g[g+ng] = dedtautmp[1];
common->funcinfo->corr(*params, ntmp, sigmatmp, tautmp,
&etmp, vtmp, dedsigmatmp, dedtautmp);
e_g[g] += etmp;
e_g[g] *= ntmp[0]+ntmp[1];
v_g[g] += vtmp[0];
v_g[g+ng] += vtmp[1];
dedsigma_g[g] += dedsigmatmp[0];
dedsigma_g[g+ng] += dedsigmatmp[1];
dedsigma_g[g+ng+ng] += dedsigmatmp[2];
dedtau_g[g] += dedtautmp[0];
dedtau_g[g+ng] += dedtautmp[1];
}
}
}
gpaw-0.11.0.13004/c/xc/pw91.c 0000664 0001750 0001750 00000012245 12553643466 015265 0 ustar jensj jensj 0000000 0000000 /* Copyright (C) 2003-2007 CAMP
* Please see the accompanying LICENSE file for further information. */
#include
#include "xc_gpaw.h"
double G(double rtrs, double A, double alpha1,
double beta1, double beta2, double beta3, double beta4,
double* dGdrs);
double pw91_exchange(const xc_parameters* par,
double n, double rs, double a2,
double* dedrs, double* deda2)
{
double e = C1 / rs;
*dedrs = -e / rs;
if (par->gga)
{
double c = C2 * rs / n;
c *= c;
double s2 = a2 * c;
double s = sqrt(s2);
double f1 = 7.7956 * s;
double f2 = 0.19645 * asinh(f1);
double f3 = 0.1508 * exp(-100.0 * s2);
double f4 = 0.004 * s2 * s2;
double f5 = 1.0 + s * f2;
double f6 = f5 + f4;
double f7 = 0.2743 - f3;
double f8 = f5 + f7 * s2;
double Fx = f8 / f6;
double f9 = 0.5 * 7.7956 * 0.19645 / sqrt(1.0 + f1 * f1);
if (s < 0.00001)
f9 += 0.5 * 7.7956 * 0.19645;
else
f9 += 0.5 * f2 / s;
double dFxds2 = ((f9 + f7 + 100.0 * f3 * s2) * f6 -
f8 * (f9 + 0.008 * s2)) / (f6 * f6);
double ds2drs = 8.0 * s2 / rs;
*dedrs = *dedrs * Fx + e * dFxds2 * ds2drs;
*deda2 = e * dFxds2 * c;
e *= Fx;
}
return e;
}
double pw91_correlation(double n, double rs, double zeta, double a2,
bool gga, bool spinpol,
double* dedrs, double* dedzeta, double* deda2)
{
double rtrs = sqrt(rs);
double de0drs;
double e0 = G(rtrs, GAMMA, 0.21370, 7.5957, 3.5876, 1.6382, 0.49294,
&de0drs);
double e;
double xp = 117.0;
double xm = 117.0;
if (spinpol)
{
double de1drs;
double e1 = G(rtrs, 0.015545, 0.20548, 14.1189, 6.1977, 3.3662,
0.62517, &de1drs);
double dalphadrs;
double alpha = -G(rtrs, 0.016887, 0.11125, 10.357, 3.6231, 0.88026,
0.49671, &dalphadrs);
dalphadrs = -dalphadrs;
double zp = 1.0 + zeta;
double zm = 1.0 - zeta;
xp = pow(zp, THIRD);
xm = pow(zm, THIRD);
double f = CC1 * (zp * xp + zm * xm - 2.0);
double f1 = CC2 * (xp - xm);
double zeta2 = zeta * zeta;
double zeta3 = zeta2 * zeta;
double zeta4 = zeta2 * zeta2;
double x = 1.0 - zeta4;
*dedrs = (de0drs * (1.0 - f * zeta4) +
de1drs * f * zeta4 +
dalphadrs * f * x * IF2);
*dedzeta = (4.0 * zeta3 * f * (e1 - e0 - alpha * IF2) +
f1 * (zeta4 * e1 - zeta4 * e0 + x * alpha * IF2));
e = e0 + alpha * IF2 * f * x + (e1 - e0) * f * zeta4;
}
else
{
*dedrs = de0drs;
e = e0;
}
if (gga)
{
double n2 = n * n;
double t2;
double y;
double phi;
double phi2;
double phi3;
double phi4;
double GAMMAPW91 = BETA * BETA / 0.18;
if (spinpol)
{
phi = 0.5 * (xp * xp + xm * xm);
phi2 = phi * phi;
phi3 = phi * phi2;
phi4 = phi * phi3;
}
else
{
phi = 1.0;
phi2 = 1.0;
phi3 = 1.0;
phi4 = 1.0;
}
t2 = C3 * a2 * rs / (n2 * phi2);
y = -e / (GAMMAPW91 * phi3);
double x = exp(y);
double A = BETA / (GAMMAPW91 * (x - 1.0));
double At2 = A * t2;
double nom = 1.0 + At2;
double denom = nom + At2 * At2;
double H0 = (phi3 * GAMMAPW91 *
log(1.0 + BETA * t2 * nom / (denom * GAMMAPW91)));
double tmp = (phi3 * GAMMAPW91 * BETA /
(denom * (BETA * t2 * nom + GAMMAPW91 * denom)));
double tmp2 = A * A * x / BETA;
double dAdrs = tmp2 * *dedrs / phi3;
const double KK = 66.343643960645011; // 100*4/pi*(4/pi/9)**(1/3.)
const double XNU = 15.75592;
const double Cc0 = 0.004235;
const double Cx = -0.001667212;
const double K1 = 0.002568;
const double K2 = 0.023266;
const double K3 = 7.389e-6;
const double K4 = 8.723;
const double K5 = 0.472;
const double K6 = 7.389e-2;
double f0 = XNU * exp(-KK * rs * phi4 * t2);
double rs2 = rs * rs;
double f1 = K1 + K2 * rs + K3 * rs2;
double f2 = 1.0 + K4 * rs + K5 * rs2 + K6 * rs2 * rs;
double f3 = -10.0 * Cx / 7.0 - Cc0 + f1 / f2;
double H1 = f0 * phi3 * f3 * t2;
double dH1drs = (-KK * phi4 * t2 * H1 + f0 * phi3 * t2 *
((K2 + 2.0 * K3 * rs) * f2 -
(K4 + 2.0 * K5 * rs + 3.0 * K6 * rs2) * f1) / (f2 * f2));
double dH1dt2 = -KK * rs * phi4 * H1 + f0 * phi3 * f3;
double dH1dphi = (-4.0 * KK * rs * phi3 * H1 + 3.0 * f0 * phi2 * f3) * t2;
double dH0dt2 = (1.0 + 2.0 * At2) * tmp;
double dH0dA = -At2 * t2 * t2 * (2.0 + At2) * tmp;
*dedrs += (dH0dt2 + dH1dt2) * 7 * t2 / rs + dH0dA * dAdrs + dH1drs;
*deda2 = (dH0dt2 + dH1dt2) * C3 * rs / n2;
if (spinpol)
{
double dphidzeta = (1.0 / xp - 1.0 / xm) / 3.0;
double dAdzeta = tmp2 * (*dedzeta -
3.0 * e * dphidzeta / phi) / phi3;
*dedzeta += ((3.0 * H0 / phi - dH0dt2 * 2.0 * t2 / phi ) * dphidzeta +
dH0dA * dAdzeta);
*dedzeta += (dH1dphi - dH1dt2 * 2.0 * t2 / phi ) * dphidzeta;
*deda2 /= phi2;
}
e += H0 + H1;
}
return e;
}
gpaw-0.11.0.13004/c/xc/ensemble_gga.c 0000664 0001750 0001750 00000003041 12553643466 017067 0 ustar jensj jensj 0000000 0000000 /* Copyright (C) 2003-2007 CAMP
* Copyright (C) 2007-2009 CAMd
* Please see the accompanying LICENSE file for further information. */
#include
#include "xc_gpaw.h"
double beefvdw_exchange(const xc_parameters* par,
double n, double rs, double a2,
double* dedrs, double* deda2)
{
double e = C1 / rs;
*dedrs = -e / rs;
double c = C2 * rs / n;
c *= c;
double s2 = a2 * c;
/* Legendre polynomial basis expansion */
int parlen = par->nparameters-1;
double p = par->parameters[0];
double tmp = p + s2;
double x = 2.0 * s2 / tmp - 1.0;
double dxds2 = 2.0 * p / pow(tmp,2);
double Fx = 0.0;
double dFxds2 = 0.0;
int max_order = par->parameters[parlen+1];
double L[max_order+1];
double dL[max_order+1];
double coef;
int m;
int order;
/* initializing */
L[0] = 1.0;
L[1] = x;
dL[0] = 0.0;
dL[1] = 1.0;
/* recursively building polynomia and their derivatives */
for(int i = 2; i < max_order+1; i++)
{
L[i] = 2.0 * x * L[i-1] - L[i-2] - (x * L[i-1] - L[i-2])/i;
dL[i] = i * L[i-1] + x * dL[i-1];
}
/* building enhancement factor Fx and derivative dFxds2 */
m = 0;
for(int i = 0; i < max_order+1; i++)
{
order = par->parameters[2+m];
if(order == i)
{
coef = par->parameters[2+parlen+m];
Fx += coef * L[i];
dFxds2 += coef * dL[i] * dxds2;
m += 1;
}
}
double ds2drs = 8.0 * c * a2 / rs;
*dedrs = *dedrs * Fx + e * dFxds2 * ds2drs;
*deda2 = e * dFxds2 * c;
e *= Fx;
return e;
}
gpaw-0.11.0.13004/c/xc/tpss.c 0000664 0001750 0001750 00000037466 12553643466 015472 0 ustar jensj jensj 0000000 0000000 /************************************************************************
Implements Perdew, Tao, Staroverov & Scuseria
meta-Generalized Gradient Approximation.
Exchange part
************************************************************************/
#include
#include
#include
#include "xc_mgga.h"
typedef struct tpss_params {
common_params common; // needs to be at the beginning of every functional_params
XC(func_type) *x_aux;
XC(func_type) *c_aux1;
XC(func_type) *c_aux2;
} tpss_params;
/* some parameters */
static double b=0.40, c=1.59096, e=1.537, kappa=0.804, mu=0.21951;
/* This is Equation (7) from the paper and its derivatives */
static void
x_tpss_7(double p, double alpha,
double *qb, double *dqbdp, double *dqbdalpha)
{
/* Eq. (7) */
double a = sqrt(1.0 + b*alpha*(alpha-1.0)), h = 9.0/20.0;
*qb = h*(alpha - 1.0)/a + 2.0*p/3.0;
*dqbdp = 2.0/3.0;
*dqbdalpha = h*(1.0 + 0.5*b*(alpha-1.0))/pow(a, 3);
}
/* Equation (10) in all it's glory */
static
void x_tpss_10(double p, double alpha,
double *x, double *dxdp, double *dxdalpha)
{
double x1, dxdp1, dxdalpha1;
double aux1, ap, apsr, p2;
double qb, dqbdp, dqbdalpha;
/* Equation 7 */
x_tpss_7(p, alpha, &qb, &dqbdp, &dqbdalpha);
p2 = p*p;
aux1 = 10.0/81.0;
ap = (3*alpha + 5*p)*(3*alpha + 5*p);
apsr = (3*alpha + 5*p);
/* first we handle the numerator */
x1 = 0.0;
dxdp1 = 0.0;
dxdalpha1 = 0.0;
{ /* first term */
double a = (9*alpha*alpha+30*alpha*p+50*p2), a2 = a*a;
x1 += aux1*p + 25*c*p2*p*ap/a2;
dxdp1 += aux1 + ((3*225*c*p2*alpha*alpha+ 4*750*c*p*p2*alpha + 5*625*c*p2*p2)*a2 - 25*c*p2*p*ap*2*a*(30*alpha+50*2*p))/(a2*a2);
dxdalpha1 += ((225*c*p*p2*2*alpha + 750*c*p2*p2)*a2 - 25*c*p2*p*ap*2*a*(9*2*alpha+30*p))/(a2*a2);
}
{ /* second term */
double a = 146.0/2025.0*qb;
x1 += a*qb;
dxdp1 += 2.0*a*dqbdp;
dxdalpha1 += 2.0*a*dqbdalpha;
}
{ /* third term */
double h = 73.0/(405*sqrt(2.0));
x1 += -h*qb*p/apsr * sqrt(ap+9);
dxdp1 += -h * qb *((3*alpha)/ap * sqrt(ap+9) + p/apsr * 1./2. * pow(ap+9,-1./2.)* 2*apsr*5) - h*p/apsr*sqrt(ap+9)*dqbdp;
dxdalpha1 += -h*qb*( (-1)*p*3/ap * sqrt(ap+9) + p/apsr * 1./2. * pow(ap+9,-1./2.)* 2*apsr*3) - h*p/apsr*sqrt(ap+9)*dqbdalpha;
}
{ /* forth term */
double a = aux1*aux1/kappa;
x1 += a*p2;
dxdp1 += a*2.0*p;
dxdalpha1 += 0.0;
}
{ /* fifth term */
x1 += 20*sqrt(e)*p2/(9*ap);
dxdp1 += 20*sqrt(e)/9*(2*p*ap-p2*2*(3*alpha + 5*p)*5)/(ap*ap);
dxdalpha1 +=-20*2*sqrt(e)/3*p2/(ap*(3*alpha + 5*p));
}
{ /* sixth term */
double a = e*mu;
x1 += a*p*p2;
dxdp1 += a*3.0*p2;
dxdalpha1 += 0.0;
}
/* and now the denominator */
{
double a = 1.0+sqrt(e)*p, a2 = a*a;
*x = x1/a2;
*dxdp = (dxdp1*a - 2.0*sqrt(e)*x1)/(a2*a);
*dxdalpha = dxdalpha1/a2;
}
}
static void
x_tpss_para(XC(func_type) *lda_aux, const double *rho, const double sigma, const double tau_,
double *energy, double *dedd, double *vsigma, double *dedtau)
{
double gdms, p, tau, tauw;
double x, dxdp, dxdalpha, Fx, dFxdx;
double tau_lsda, exunif, vxunif, dtau_lsdadd;
double dpdd, dpdsigma;
double alpha, dalphadd, dalphadsigma, dalphadtau;
double aux = (3./10.) * pow((3*M_PI*M_PI),2./3.);
/* get the uniform gas energy and potential */
const int np = 1;
XC(lda_exc_vxc)(lda_aux, np, rho, &exunif, &vxunif);
/* calculate |nabla rho|^2 */
gdms = max(MIN_GRAD*MIN_GRAD, sigma);
/* Eq. (4) */
p = gdms/(4.0*pow(3*M_PI*M_PI, 2.0/3.0)*pow(rho[0], 8.0/3.0));
dpdd = -(8.0/3.0)*p/rho[0];
dpdsigma= 1/(4.0*pow(3*M_PI*M_PI, 2.0/3.0)*pow(rho[0], 8.0/3.0));
/* von Weisaecker kinetic energy density */
tauw = max(gdms/(8.0*rho[0]), 1.0e-12);
tau = max(tau_, tauw);
tau_lsda = aux * pow(rho[0],5./3.);
dtau_lsdadd = aux * 5./3.* pow(rho[0],2./3.);
alpha = (tau - tauw)/tau_lsda;
if(fabs(tauw-tau_)< 1.0e-10){
dalphadsigma = 0.0;
dalphadtau = 0.0;
dalphadd = 0.0;
}else{
dalphadtau = 1./tau_lsda;
dalphadsigma = -1./(tau_lsda*8.0*rho[0]);
dalphadd = (tauw/rho[0]* tau_lsda - (tau - tauw) * dtau_lsdadd)/ pow(tau_lsda,2.);
}
/* get Eq. (10) */
x_tpss_10(p, alpha, &x, &dxdp, &dxdalpha);
{ /* Eq. (5) */
double a = kappa/(kappa + x);
Fx = 1.0 + kappa*(1.0 - a);
dFxdx = a*a;
}
{ /* Eq. (3) */
*energy = exunif*Fx*rho[0];
/* exunif is en per particle already so we multiply by n the terms with exunif*/
*dedd = vxunif*Fx + exunif*dFxdx*rho[0]*(dxdp*dpdd + dxdalpha*dalphadd);
*vsigma = exunif*dFxdx*rho[0]*(dxdp*dpdsigma + dxdalpha*dalphadsigma);
*dedtau = exunif*dFxdx*rho[0]*(dxdalpha*dalphadtau);
}
}
static void
XC(mgga_x_tpss)(void *p, const double *rho, const double *sigma, const double *tau,
double *e, double *dedd, double *vsigma, double *dedtau)
{
tpss_params *par = (tpss_params*)p;
if(par->common.nspin == XC_UNPOLARIZED){
double en;
x_tpss_para(par->x_aux, rho, sigma[0], tau[0], &en, dedd, vsigma, dedtau);
*e = en/(rho[0]+rho[1]);
}else{
/* The spin polarized version is handle using the exact spin scaling
Ex[n1, n2] = (Ex[2*n1] + Ex[2*n2])/2
*/
*e = 0.0;
double e2na, e2nb, rhoa[2], rhob[2];
double vsigmapart[3];
rhoa[0]=2*rho[0];
rhoa[1]=0.0;
rhob[0]=2*rho[1];
rhob[1]=0.0;
x_tpss_para(par->x_aux, rhoa, 4*sigma[0], 2.0*tau[0], &e2na, &(dedd[0]), &(vsigmapart[0]), &(dedtau[0]));
x_tpss_para(par->x_aux, rhob, 4*sigma[2], 2.0*tau[1], &e2nb, &(dedd[1]), &(vsigmapart[2]), &(dedtau[1]));
*e = (e2na + e2nb )/(2.*(rho[0]+rho[1]));
vsigma[0] = 2*vsigmapart[0];
vsigma[2] = 2*vsigmapart[2];
}
}
/************************************************************************
Implements Perdew, Tao, Staroverov & Scuseria
meta-Generalized Gradient Approximation.
J. Chem. Phys. 120, 6898 (2004)
http://dx.doi.org/10.1063/1.1665298
Correlation part
************************************************************************/
/* some parameters */
static double d = 2.8;
/* Equation (14) */
static void
c_tpss_14(double csi, double zeta, double *C, double *dCdcsi, double *dCdzeta)
{
double fz, C0, dC0dz, dfzdz;
double z2 = zeta*zeta;
/* Equation (13) */
C0 = 0.53 + z2*(0.87 + z2*(0.50 + z2*2.26));
dC0dz = zeta*(2.0*0.87 + z2*(4.0*0.5 + z2*6.0*2.26)); /*OK*/
fz = 0.5*(pow(1.0 + zeta, -4.0/3.0) + pow(1.0 - zeta, -4.0/3.0));
dfzdz = 0.5*(-4.0/3.0)*(pow(1.0 + zeta, -7.0/3.0) - pow(1.0 - zeta, -7.0/3.0)); /*OK*/
{ /* Equation (14) */
double csi2 = csi*csi;
double a = 1.0 + csi2*fz, a4 = pow(a, 4);
*C = C0 / a4;
*dCdcsi = -8.0*C0*csi*fz/(a*a4); /*added C OK*/
*dCdzeta = (dC0dz*a - C0*4.0*csi2*dfzdz)/(a*a4); /*OK*/
}
}
/* Equation (12) */
static void c_tpss_12(XC(func_type) *aux1, XC(func_type) *aux2, int nspin, const double *rho, const double *sigma,
double dens, double zeta, double z,
double *e_PKZB, double *de_PKZBdd, double *de_PKZBdsigma, double *de_PKZBdz)
{
/*some incoming variables:
dens = rho[0] + rho[1]
z = tau_w/tau
zeta = (rho[0] - rho[1])/dens*/
double e_PBE, e_PBEup, e_PBEdn;
double de_PBEdd[2], de_PBEdsigma[3], de_PBEddup[2], de_PBEdsigmaup[3], de_PBEdddn[2], de_PBEdsigmadn[3] ;
double aux, zsq;
double dzetadd[2], dcsidd[2], dcsidsigma[3];
double C, dCdcsi, dCdzeta;
double densp[2], densp2[2], sigmatot[3], sigmaup[3], sigmadn[3];
int i;
/*initialize dCdcsi and dCdzeta and the energy*/
dCdcsi = dCdzeta = 0.0;
e_PBE = 0.0;
e_PBEup = 0.0;
e_PBEdn = 0.0;
/* get the PBE stuff */
if(nspin== XC_UNPOLARIZED)
{ densp[0]=rho[0]/2.;
densp[1]=rho[0]/2.;
sigmatot[0] = sigma[0]/4.;
sigmatot[1] = sigma[0]/4.;
sigmatot[2] = sigma[0]/4.;
}else{
densp[0] = rho[0];
densp[1] = rho[1];
sigmatot[0] = sigma[0];
sigmatot[1] = sigma[1];
sigmatot[2] = sigma[2];
}
/* e_PBE */
XC(func_type) *auxfunc = (nspin == XC_UNPOLARIZED) ? aux2 : aux1;
const int np = 1;
XC(gga_exc_vxc)(auxfunc, np, densp, sigmatot, &e_PBE, de_PBEdd, de_PBEdsigma);
densp2[0]=densp[0];
densp2[1]=0.0;
if(nspin== XC_UNPOLARIZED)
{
sigmaup[0] = sigma[0]/4.;
sigmaup[1] = 0.;
sigmaup[2] = 0.;
}else{
sigmaup[0] = sigma[0];
sigmaup[1] = 0.;
sigmaup[2] = 0.;
}
/* e_PBE spin up */
XC(gga_exc_vxc)(auxfunc, np, densp2, sigmaup, &e_PBEup, de_PBEddup, de_PBEdsigmaup);
densp2[0]=densp[1];
densp2[1]=0.0;
if(nspin== XC_UNPOLARIZED)
{
sigmadn[0] = sigma[0]/4.;
sigmadn[1] = 0.;
sigmadn[2] = 0.;
}else{
sigmadn[0] = sigma[2];
sigmadn[1] = 0.;
sigmadn[2] = 0.;
}
/* e_PBE spin down */
XC(gga_exc_vxc)(auxfunc, np, densp2, sigmadn, &e_PBEdn, de_PBEdddn, de_PBEdsigmadn);
/*get Eq. (13) and (14) for the polarized case*/
if(nspin == XC_UNPOLARIZED){
C = 0.53;
dzetadd[0] = 0.0;
dcsidd [0] = 0.0;
dzetadd[1] = 0.0;
dcsidd [1] = 0.0;
for(i=0; i<3; i++) dcsidsigma[i] = 0.0;
}else{
// initialize derivatives
for(i=0; i<2; i++){
dzetadd[i] = 0.0;
dcsidd [i] = 0.0;}
for(i=0; i<3; i++) dcsidsigma[i] = 0.0;
double num, gzeta, csi, a;
/*numerator of csi: derive as grho all components and then square the 3 parts
[2 (grho_a[0]n_b - grho_b[0]n_a) +2 (grho_a[1]n_b - grho_b[1]n_a) + 2 (grho_a[2]n_b - grho_b[2]n_a)]/(n_a+n_b)^2
-> 4 (sigma_aa n_b^2 - 2 sigma_ab n_a n_b + sigma_bb n_b^2)/(n_a+n_b)^2 */
num = sigma[0] * pow(rho[1],2) - 2.* sigma[1]*rho[0]*rho[1]+ sigma[2]*pow(rho[0],2);
num = max(num, 1e-20);
gzeta = sqrt(4*(num))/(dens*dens);
gzeta = max(gzeta, MIN_GRAD);
/*denominator of csi*/
a = 2*pow(3.0*M_PI*M_PI*dens, 1.0/3.0);
csi = gzeta/a;
c_tpss_14(csi, zeta, &C, &dCdcsi, &dCdzeta);
dzetadd[0] = (1.0 - zeta)/dens; /*OK*/
dzetadd[1] = -(1.0 + zeta)/dens; /*OK*/
dcsidd [0] = 0.5*csi*(-2*sigma[1]*rho[1]+2*sigma[2]*rho[0])/num - 7./3.*csi/dens; /*OK*/
dcsidd [1] = 0.5*csi*(-2*sigma[1]*rho[0]+2*sigma[0]*rho[1])/num - 7./3.*csi/dens; /*OK*/
dcsidsigma[0]= csi*pow(rho[1],2)/(2*num); /*OK*/
dcsidsigma[1]= -csi*rho[0]*rho[1]/num; /*OK*/
dcsidsigma[2]= csi*pow(rho[0],2)/(2*num); /*OK*/
}
aux = (densp[0] * max(e_PBEup, e_PBE) + densp[1] * max(e_PBEdn, e_PBE)) / dens;
double dauxdd[2], dauxdsigma[3];
if(e_PBEup > e_PBE)
{
//case densp[0] * e_PBEup
dauxdd[0] = de_PBEddup[0];
dauxdd[1] = 0.0;
dauxdsigma[0] = de_PBEdsigmaup[0];
dauxdsigma[1] = 0.0;
dauxdsigma[2] = 0.0;
}else{
//case densp[0] * e_PBE
dauxdd[0] = densp[0] / dens * (de_PBEdd[0] - e_PBE) + e_PBE;
dauxdd[1] = densp[0] / dens * (de_PBEdd[1] - e_PBE);
dauxdsigma[0] = densp[0] / dens * de_PBEdsigma[0];
dauxdsigma[1] = densp[0] / dens * de_PBEdsigma[1];
dauxdsigma[2] = densp[0] / dens * de_PBEdsigma[2];
}
if(e_PBEdn > e_PBE)
{//case densp[1] * e_PBEdn
dauxdd[0] += 0.0;
dauxdd[1] += de_PBEdddn[0];
dauxdsigma[0] += 0.0;
dauxdsigma[1] += 0.0;
dauxdsigma[2] += de_PBEdsigmadn[0];
}else{//case densp[1] * e_PBE
dauxdd[0] += densp[1] / dens * (de_PBEdd[0] - e_PBE);
dauxdd[1] += densp[1] / dens * (de_PBEdd[1] - e_PBE) + e_PBE;
dauxdsigma[0] += densp[1] / dens * de_PBEdsigma[0];
dauxdsigma[1] += densp[1] / dens * de_PBEdsigma[1];
dauxdsigma[2] += densp[1] / dens * de_PBEdsigma[2];
}
zsq=z*z;
*e_PKZB = (e_PBE*(1.0 + C * zsq) - (1.0 + C) * zsq * aux);
*de_PKZBdz = dens * e_PBE * C * 2*z - dens * (1.0 + C) * 2*z * aux; /*? think ok*/
double dCdd[2];
dCdd[0] = dCdzeta*dzetadd[0] + dCdcsi*dcsidd[0]; /*OK*/
dCdd[1] = dCdzeta*dzetadd[1] + dCdcsi*dcsidd[1]; /*OK*/
/* partial derivatives*/
de_PKZBdd[0] = de_PBEdd[0] * (1.0 + C*zsq) + dens * e_PBE * dCdd[0] * zsq
- zsq * (dens*dCdd[0] * aux + (1.0 + C) * dauxdd[0]);
de_PKZBdd[1] = de_PBEdd[1] * (1.0 + C*zsq) + dens * e_PBE * dCdd[1] * zsq
- zsq * (dens*dCdd[1] * aux + (1.0 + C) * dauxdd[1]);
int nder = (nspin==XC_UNPOLARIZED) ? 1 : 3;
for(i=0; icommon.nspin;
zeta = (rho[0]-rho[1])/(rho[0]+rho[1]);
dens = rho[0];
tautr = tau[0];
grad = sigma[0];
if(nspin == XC_POLARIZED) {
dens += rho[1];
tautr += tau[1];
grad += (2*sigma[1] + sigma[2]);
}
grad = max(MIN_GRAD*MIN_GRAD, grad);
tauw = max(grad/(8.0*dens), 1.0e-12);
taut = max(tautr, tauw);
z = tauw/taut;
double sigmatmp[3];
sigmatmp[0] = max(MIN_GRAD*MIN_GRAD, sigma[0]);
sigmatmp[1] = 0.0;
sigmatmp[2] = 0.0;
if(nspin == XC_POLARIZED)
{
//sigma[1] = max(MIN_GRAD*MIN_GRAD, sigma[1]);
sigmatmp[1] = sigma[1];
sigmatmp[2] = max(MIN_GRAD*MIN_GRAD, sigma[2]);
}
/* Equation (12) */
c_tpss_12(par->c_aux1, par->c_aux2, nspin, rho, sigmatmp, dens, zeta, z,
&e_PKZB, de_PKZBdd, de_PKZBdsigma, &de_PKZBdz);
/* Equation (11) */
{
double z2 = z*z, z3 = z2*z;
double dedz;
double dzdd[2], dzdsigma[3], dzdtau;
if(tauw >= tautr || fabs(tauw- tautr)< 1.0e-10){
dzdtau = 0.0;
dzdd[0] = 0.0;
dzdd[1] = 0.0;
dzdsigma[0] = 0.0;
dzdsigma[1] = 0.0;
dzdsigma[2] = 0.0;
}else{
dzdtau = -z/taut;
dzdd[0] = - z/dens;
dzdd[1] = 0.0;
if (nspin == XC_POLARIZED) dzdd[1] = - z/dens;
dzdsigma[0] = 1.0/(8*dens*taut);
dzdsigma[1] = 0.0;
dzdsigma[2] = 0.0;
if (nspin == XC_POLARIZED) {
dzdsigma[1] = 2.0/(8*dens*taut);
dzdsigma[2] = 1.0/(8*dens*taut);
}
}
*energy = e_PKZB * (1.0 + d*e_PKZB*z3);
/* due to the definition of na and nb in libxc.c we need to divide by (na+nb) to recover the
* same energy for polarized and unpolarized calculation with the same total density */
if(nspin == XC_UNPOLARIZED) *energy *= dens/(rho[0]+rho[1]);
dedz = de_PKZBdz*(1.0 + 2.0*d*e_PKZB*z3) + dens*e_PKZB * e_PKZB * d * 3.0*z2;
for(is=0; isx_aux = (XC(func_type) *) malloc(sizeof(XC(func_type)));
XC(func_init)(par->x_aux, XC_LDA_X, XC_UNPOLARIZED);
par->c_aux1 = (XC(func_type) *) malloc(sizeof(XC(func_type)));
par->c_aux2 = (XC(func_type) *) malloc(sizeof(XC(func_type)));
XC(func_init)(par->c_aux1, XC_GGA_C_PBE, par->common.nspin);
XC(func_init)(par->c_aux2, XC_GGA_C_PBE, XC_POLARIZED);
}
static void tpss_end(void *p) {
tpss_params *par = (tpss_params*)p;
XC(func_end)(par->x_aux);
free(par->x_aux);
XC(func_end)(par->c_aux1);
XC(func_end)(par->c_aux2);
free(par->c_aux1);
free(par->c_aux2);
}
const mgga_func_info tpss_info = {
sizeof(tpss_params),
&tpss_init,
&tpss_end,
&XC(mgga_x_tpss),
&XC(mgga_c_tpss)
};
gpaw-0.11.0.13004/c/xc/xc_gpaw.h 0000664 0001750 0001750 00000001760 12553643466 016122 0 ustar jensj jensj 0000000 0000000 /* Copyright (C) 2003-2007 CAMP
* Copyright (C) 2007-2009 CAMd
* Please see the accompanying LICENSE file for further information. */
#ifndef _XC_GPAW_H
#define _XC_GPAW_H
/*
BETA = 0.066725
MU = BETA * pi * pi / 3
C2 = (1 / (18 * pi)**(1 / 3))
C0I = 3 / (4 * pi)
C1 = -9 / (8 * pi) * (2 * pi / 3)**(1 / 3)
CC1 = 1 / (2**(4 / 3) - 2)
CC2 = 4 * CC1 / 3
IF2 = 3 / (2 * CC2);
C3 = pi * (4 / (9 * pi))**(1 / 3) / 16
C0 = 4 * pi / 3
*/
#define BETA 0.066725
#define GAMMA 0.031091
#define MU 0.2195164512208958
#define C2 0.26053088059892404
#define C0I 0.238732414637843
#define C1 -0.45816529328314287
#define CC1 1.9236610509315362
#define CC2 2.5648814012420482
#define IF2 0.58482236226346462
#define C3 0.10231023756535741
#define C0 4.1887902047863905
#define THIRD 0.33333333333333333
#define NMIN 1.0E-10
typedef int bool;
typedef struct
{
bool gga;
double kappa;
int nparameters;
double parameters[110];
} xc_parameters;
#endif /* _XC_GPAW_H */
gpaw-0.11.0.13004/c/xc/m06l.c 0000664 0001750 0001750 00000056363 12553643466 015254 0 ustar jensj jensj 0000000 0000000 /************************************************************************
Implements Zhao, Truhlar
Meta-gga M06-Local
Correlation part
************************************************************************/
#include
#include
#include
#include "xc_mgga.h"
typedef struct m06l_params {
common_params common; // needs to be at the beginning of every functional_params
XC(func_type) *c_aux;
XC(func_type) *x_aux;
} m06l_params;
/* derivatives of x and z with respect to rho, grho and tau*/
static void
c_m06l_zx(double x, double z, double rho, double tau, double *dxdd, double *dxdgd, double *dzdd, double *dzdtau)
{
*dxdd = -8./3. * x * 1/rho;
*dxdgd = 1./pow(rho,8./3.);
*dzdd = -5./3. * 2 * tau/pow(rho, 8./3.);
*dzdtau = 2./pow(rho, 5./3.);
}
/* Get g for Eq. (13)*/
static void
c_m06_13(double *x, double *rho, double *g_ab, double *dg_abdd, double *dg_abdgd)
{
/*define the C_ab,i */
static double c_ab0= 0.6042374, c_ab1= 177.6783, c_ab2= -251.3252, c_ab3=76.35173, c_ab4=-12.55699;
double gammaCab = 0.0031 ;
double x_ab, a;
double dg_abdx, dxdd_a, dxdgd_a, dzdd_a, dzdtau_a;
double dxdd_b, dxdgd_b, dzdd_b, dzdtau_b;
/*x = x_ba^2 = x_a^2+x_b^2*/
x_ab = x[0] + x[1];
a= (gammaCab*x_ab/(1+gammaCab*x_ab));
*g_ab = c_ab0*pow(a,0)+ c_ab1*pow(a,1)+ c_ab2*pow(a,2)+c_ab3*pow(a,3)+c_ab4*pow(a,4);
double dadx = gammaCab/pow(1+gammaCab*x_ab, 2.);
dg_abdx = (0.0*c_ab0*pow(a,-1)+ 1.*c_ab1*pow(a,0)+ 2.*c_ab2*pow(a,1)+3.*c_ab3*pow(a,2)+4.*c_ab4*pow(a,3))*dadx;
c_m06l_zx(x[0], 0.0, rho[0], 0.0, &dxdd_a, &dxdgd_a, &dzdd_a, &dzdtau_a);
c_m06l_zx(x[1], 0.0, rho[1], 0.0, &dxdd_b, &dxdgd_b, &dzdd_b, &dzdtau_b);
dg_abdd[0] = dg_abdx*dxdd_a;
dg_abdd[1] = dg_abdx*dxdd_b;
dg_abdgd[0] = dg_abdx*dxdgd_a;
dg_abdgd[1] = 0.0;
dg_abdgd[2] = dg_abdx*dxdgd_b;
}
/* Get g for Eq. (15)*/
static void
c_m06_15(double x, double rho, double *g_ss, double *dg_ssdd, double *dg_ssdgd)
{
/*define the C_ss,i */
static double c_ss0=0.5349466, c_ss1=0.5396620, c_ss2=-31.61217, c_ss3= 51.49592, c_ss4=-29.19613;
double gammaCss = 0.06 ;
double a;
double dg_ssdx, dxdd, dxdgd, dzdd, dzdtau;
/*x = x_a^2 */
a= (gammaCss*x/(1+gammaCss*x));
*g_ss = c_ss0*pow(a,0)+ c_ss1*pow(a,1)+ c_ss2*pow(a,2)+c_ss3*pow(a,3)+c_ss4*pow(a,4);
double dadx = gammaCss/pow(1+gammaCss*x, 2.);
dg_ssdx = (0.0*c_ss0*pow(a,-1)+ 1.*c_ss1*pow(a,0)+ 2.*c_ss2*pow(a,1)+3.*c_ss3*pow(a,2)+4.*c_ss4*pow(a,3))*dadx;
c_m06l_zx(x, 0.0, rho, 0.0, &dxdd, &dxdgd, &dzdd, &dzdtau);
*dg_ssdd = dg_ssdx*dxdd;
*dg_ssdgd = dg_ssdx*dxdgd;
/*printf("g_ss %19.12f\n", *g_ss);*/
}
/* Get h_ab for Eq. (12)*/
static
void c_m06l_hab(double *x, double *z, double *rho, double *tau, double *h_ab, double *dh_abdd, double *dh_abdgd, double *dh_abdtau)
{
/* define the d_ab,i for Eq. (12)*/
static double d_ab0= 0.3957626, d_ab1= -0.5614546, d_ab2= 0.01403963, d_ab3= 0.0009831442, d_ab4= -0.003577176;
double alpha_ab = 0.00304966;
double hab1, dhabdd1[2], dhabdgd1[3], dhabdtau1[2];
double x_ab, z_ab, gamma, xgamma, zgamma;
double dgammadx, dgammadz;
double dgammadd_a, dgammadgd_a, dgammadtau_a;
double dgammadd_b, dgammadgd_b, dgammadtau_b;
double dxdd_a, dxdgd_a, dzdd_a, dzdtau_a;
double dxdd_b, dxdgd_b, dzdd_b, dzdtau_b;
x_ab = x[0] + x[1];
z_ab = z[0] + z[1];
gamma = 1 + alpha_ab*(x_ab + z_ab);
{ /* derivatives of gamma with respect to x and z*/
dgammadx = alpha_ab;
dgammadz = alpha_ab;
}
c_m06l_zx(x[0], z[0], rho[0], tau[0], &dxdd_a, &dxdgd_a, &dzdd_a, &dzdtau_a);
c_m06l_zx(x[1], z[1], rho[1], tau[1], &dxdd_b, &dxdgd_b, &dzdd_b, &dzdtau_b);
{ /*derivatives of gamma with respect to density, gradient and kietic energy*/
dgammadd_a = dgammadx * dxdd_a + dgammadz * dzdd_a;
dgammadd_b = dgammadx * dxdd_b + dgammadz * dzdd_b;
dgammadgd_a = dgammadx * dxdgd_a;
dgammadgd_b = dgammadx * dxdgd_b;
dgammadtau_a = dgammadz * dzdtau_a;
dgammadtau_b = dgammadz * dzdtau_b;
}
xgamma = x_ab/gamma;
zgamma = z_ab/gamma;
/* we initialize h and collect the terms*/
hab1 = 0.0;
dhabdd1[0] = dhabdd1[1] = 0.0;
dhabdgd1[0] = dhabdgd1[1] = dhabdgd1[2] = 0.0;
dhabdtau1[0] = dhabdtau1[1] = 0.0;
{ /* first term */
double g2=pow(gamma,2.);
hab1 += d_ab0/gamma;
dhabdd1[0] += -d_ab0*dgammadd_a/g2;
dhabdd1[1] += -d_ab0*dgammadd_b/g2;
dhabdgd1[0] += -d_ab0*dgammadgd_a/g2;
dhabdgd1[1] += 0.0;
dhabdgd1[2] += -d_ab0*dgammadgd_b/g2;
dhabdtau1[0] += -d_ab0*dgammadtau_a/g2 ;
dhabdtau1[1] += -d_ab0*dgammadtau_b/g2 ;
}
{ /* second term */
double g3=pow(gamma,3.);
hab1 += (d_ab1*xgamma + d_ab2*zgamma)/gamma;
dhabdd1[0] += (gamma*(d_ab1*dxdd_a+d_ab2*dzdd_a)-2*dgammadd_a*(d_ab1*x_ab+d_ab2*z_ab))/g3;
dhabdd1[1] += (gamma*(d_ab1*dxdd_b+d_ab2*dzdd_b)-2*dgammadd_b*(d_ab1*x_ab+d_ab2*z_ab))/g3;
dhabdgd1[0] += (d_ab1*dxdgd_a*gamma -2*(d_ab1*x_ab+d_ab2*z_ab)*dgammadgd_a)/g3;
dhabdgd1[1] += 0.0;
dhabdgd1[2] += (d_ab1*dxdgd_b*gamma -2*(d_ab1*x_ab+d_ab2*z_ab)*dgammadgd_b)/g3;
dhabdtau1[0] += (d_ab2*dzdtau_a*gamma -2*(d_ab1*x_ab+d_ab2*z_ab)*dgammadtau_a)/g3;
dhabdtau1[1] += (d_ab2*dzdtau_b*gamma -2*(d_ab1*x_ab+d_ab2*z_ab)*dgammadtau_b)/g3;
}
{ /* third term */
double g4= pow(gamma,4);
hab1 += (d_ab3*xgamma*xgamma+d_ab4*xgamma*zgamma)/gamma;
dhabdd1[0] += (-3*dgammadd_a*(d_ab3*pow(x_ab,2.)+d_ab4*x_ab*z_ab)+dxdd_a*gamma*(2*d_ab3*x_ab+d_ab4*z_ab)+d_ab4*x_ab*dzdd_a*gamma)/g4;
dhabdd1[1] += (-3*dgammadd_b*(d_ab3*pow(x_ab,2.)+d_ab4*x_ab*z_ab)+dxdd_b*gamma*(2*d_ab3*x_ab+d_ab4*z_ab)+d_ab4*x_ab*dzdd_b*gamma)/g4;
dhabdgd1[0] += (-3*x_ab*(d_ab3*x_ab+d_ab4*z_ab)*dgammadgd_a+gamma*(2*d_ab3*x_ab+d_ab4*z_ab)*dxdgd_a)/g4;
dhabdgd1[1] += 0.0;
dhabdgd1[2] += (-3*x_ab*(d_ab3*x_ab+d_ab4*z_ab)*dgammadgd_b+gamma*(2*d_ab3*x_ab+d_ab4*z_ab)*dxdgd_b)/g4;
dhabdtau1[0] += (d_ab4*x_ab*dzdtau_a*gamma-3*x_ab*(d_ab3*x_ab+d_ab4*z_ab)*dgammadtau_a)/g4;
dhabdtau1[1] += (d_ab4*x_ab*dzdtau_b*gamma-3*x_ab*(d_ab3*x_ab+d_ab4*z_ab)*dgammadtau_b)/g4;
}
*h_ab = hab1;
//derivatives
dh_abdd[0] = dhabdd1[0];
dh_abdd[1] = dhabdd1[1];
dh_abdgd[0] = dhabdgd1[0];
dh_abdgd[1] = dhabdgd1[1];
dh_abdgd[2] = dhabdgd1[2];
dh_abdtau[0] = dhabdtau1[0];
dh_abdtau[1] = dhabdtau1[1];
}
/* Get h_ss for Eq. (14)*/
static
void c_m06l_hss(double x, double z, double rho, double tau, double *h_ss, double *dh_ssdd, double *dh_ssdgd, double *dh_ssdtau)
{
/* define the d_ab,i for Eq. (12)*/
static double d_ss0= 0.4650534, d_ss1= 0.1617589, d_ss2= 0.1833657, d_ss3= 0.0004692100, d_ss4= -0.004990573;
double alpha_ss = 0.00515088;
double hss1, dhssdd1, dhssdgd1, dhssdtau1;
double gamma, xgamma, zgamma;
double dgammadx, dgammadz;
double dgammadd, dgammadgd, dgammadtau;
double dxdd, dxdgd, dzdd, dzdtau;
gamma = 1 + alpha_ss*(x + z);
{ /* derivatives of gamma with respect to x and z*/
dgammadx = alpha_ss;
dgammadz = alpha_ss;
}
c_m06l_zx(x, z, rho, tau, &dxdd, &dxdgd, &dzdd, &dzdtau);
{ /* derivatives of gamma with respect to density, gradient and kinetic energy */
dgammadd = dgammadx * dxdd + dgammadz * dzdd;
dgammadgd = dgammadx * dxdgd;
dgammadtau = dgammadz * dzdtau;
}
xgamma = x/gamma;
zgamma = z/gamma;
/* we initialize h and collect the terms*/
hss1 = 0.0;
dhssdd1 = 0.0;
dhssdgd1 = 0.0;
dhssdtau1 = 0.0;
{ /* first term */
double g2=pow(gamma,2.);
hss1 += d_ss0/gamma;
dhssdd1 += -d_ss0*dgammadd/g2;
dhssdgd1 += -d_ss0*dgammadgd/g2;
dhssdtau1 += -d_ss0*dgammadtau/g2 ;
}
{ /* second term */
double g3=pow(gamma,3.);
hss1 += (d_ss1*xgamma + d_ss2*zgamma)/gamma;
dhssdd1 += (gamma*(d_ss1*dxdd+d_ss2*dzdd)-2*dgammadd*(d_ss1*x+d_ss2*z))/g3;
dhssdgd1 += (d_ss1*dxdgd*gamma -2*(d_ss1*x+d_ss2*z)*dgammadgd)/g3;
dhssdtau1 += (d_ss2*dzdtau*gamma -2*(d_ss1*x+d_ss2*z)*dgammadtau)/g3;
}
{ /* third term */
double g4= pow(gamma,4);
hss1 += (d_ss3*xgamma*xgamma+d_ss4*xgamma*zgamma)/gamma;
dhssdd1 += (-3*dgammadd*(d_ss3*pow(x,2.)+d_ss4*x*z)+dxdd*gamma*(2*d_ss3*x+d_ss4*z)+d_ss4*x*dzdd*gamma)/g4;
dhssdgd1 += (-3*x*(d_ss3*x+d_ss4*z)*dgammadgd+gamma*(2*d_ss3*x+d_ss4*z)*dxdgd)/g4;
dhssdtau1 += (d_ss4*x*dzdtau*gamma-3*x*(d_ss3*x+d_ss4*z)*dgammadtau)/g4;
}
*h_ss = hss1;
//derivatives
*dh_ssdd = dhssdd1;
*dh_ssdgd = dhssdgd1;
*dh_ssdtau = dhssdtau1;
}
static void
c_m06l_para(m06l_params *p, const double *rho, const double *sigmatmp, const double *tautmp,
double *energy, double *dedd, double *vsigma, double *dedtau)
{
double rho2[2], rho2s[2], x[2], z[2], zc_ss[2];
double tau2[2], tauw[2], dens, dens1, sigma[3];
double g_ss[2], h_ss[2], Ec_ss[2], D_ss[2];
double g_ab=0.0, h_ab=0.0, Ec_ab=0.0;
double exunif_ss[2], vxunif_up[2], vxunif_dn[2], vxunif_ss[2];
double exunif =0.0, exunif_ab=0.0, vxunif[2];
//derivatives
double dh_ssdd[2], dh_ssdgd[3], dh_ssdtau[2];
double dg_ssdd[2], dg_ssdgd[3] ;
double dh_abdd[2], dh_abdgd[3], dh_abdtau[2];
double dg_abdd[2], dg_abdgd[3];
double dEc_ssdd[2], dEc_ssdgd[3], dEc_ssdtau[2];
double dEc_abdd[2], dEc_abdgd[3], dEc_abdtau[2];
double dD_ssdd[2], dD_ssdgd[3], dD_ssdtau[2], dD_ssdx[2], dD_ssdz[2];
double dxdd[2], dxdgd[2], dzdd[2], dzdtau[2];
const double Cfermi= (3./5.)*pow(6*M_PI*M_PI,2./3.);
/* put in by cpo for const reasons */
double sigma_[3],tau[2];
sigma_[0] = sigmatmp[0];
sigma_[1] = sigmatmp[1];
sigma_[2] = sigmatmp[2];
tau[0] = tautmp[0];
tau[1] = tautmp[1];
/*calculate |nabla rho|^2 */
sigma_[0] = max(MIN_GRAD*MIN_GRAD, sigma_[0]);
tauw[0] = max(sigma_[0]/(8.0*rho[0]), 1.0e-12);
tau[0] = max(tauw[0], tau[0]);
dens1 = rho[0]+rho[1];
if(p->common.nspin== XC_UNPOLARIZED)
{
tau[1] = 0.0;
rho2[0] = rho[0]/2.;
rho2[1] = rho[0]/2.;
sigma[0] = sigma_[0]/4.;
sigma[1] = sigma_[0]/4.;
sigma[2] = sigma_[0]/4.;
dens = rho[0];
tau2[0] = tau[0]/2.;
tau2[1] = tau[0]/2.;
}else{
sigma_[2] = max(MIN_GRAD*MIN_GRAD, sigma_[2]);
tauw[1] = max(sigma_[2]/(8.0*rho[1]), 1.0e-12);
tau[1] = max(tauw[1], tau[1]);
rho2[0]=rho[0];
rho2[1]=rho[1];
sigma[0] = sigma_[0];
sigma[1] = sigma_[1];
sigma[2] = sigma_[2];
dens = rho[0]+rho[1];
tau2[0] =tau[0];
tau2[1] =tau[1];
}
//get the e_LDA(rho_a,b)
const int np = 1;
XC(lda_exc_vxc)(p->c_aux, np, rho2, &exunif, vxunif);
exunif = exunif*dens;
/*==============get the E_sigma part================*/
/*============ spin up =============*/
rho2s[0]=rho2[0];
rho2s[1]=0.;
//get the e_LDA(rho_up,0)
XC(lda_exc_vxc)(p->c_aux, np, rho2s, &(exunif_ss[0]), vxunif_up);
exunif_ss[0] = exunif_ss[0] * rho2s[0];
vxunif_ss[0] = vxunif_up[0];
/*define variables for rho_up and zc in order to avoid x/0 -> D_ss = -inf */
x[0] = sigma[0]/(pow(rho2s[0], 8./3.));
z[0] = 2*tau2[0]/pow(rho2s[0],5./3.) - Cfermi;
zc_ss[0] = 2*tau2[0]/pow(rho2s[0],5./3.);
/*D_ss = 1 -x/4*(z + Cf), z+Cf = 2*tau2/pow(rho2s[0],5./3.) = zc */
D_ss[0] = 1 - x[0]/(4. * zc_ss[0]);
//derivatives for D_up
dD_ssdx[0] = -1/(4 * zc_ss[0]);
dD_ssdz[0] = 4 * x[0]/pow(4.*zc_ss[0],2.);
c_m06l_zx(x[0], z[0], rho2s[0], tau2[0], &(dxdd[0]), &(dxdgd[0]), &(dzdd[0]), &(dzdtau[0]));
dD_ssdd[0] = dD_ssdx[0] * dxdd[0] + dD_ssdz[0] * dzdd[0];
dD_ssdgd[0] = dD_ssdx[0] * dxdgd[0];
dD_ssdtau[0] = dD_ssdz[0] * dzdtau[0];
/*build up Eq. (14): Ec_sigmasigma*/
c_m06_15(x[0], rho2s[0], &(g_ss[0]), &(dg_ssdd[0]), &(dg_ssdgd[0]));
c_m06l_hss(x[0], z[0], rho2s[0], tau2[0], &(h_ss[0]), &(dh_ssdd[0]), &(dh_ssdgd[0]), &(dh_ssdtau[0]));
Ec_ss[0] = (exunif_ss[0] * (g_ss[0]+h_ss[0]) * D_ss[0]);
//printf("Ec_up %.9e\n", Ec_ss[0]);
/*============== spin down =============*/
rho2s[0]=rho2[1];
rho2s[1]=0.;
//get the e_LDA(0,rho_dn)
XC(lda_exc_vxc)(p->c_aux, np, rho2s, &(exunif_ss[1]), vxunif_dn);
exunif_ss[1] = exunif_ss[1] * rho2s[0];
vxunif_ss[1] = vxunif_dn[0];
/*define variables for rho_beta*/
x[1] = sigma[2]/(pow(rho2s[0], 8./3.));
z[1] = 2*tau2[1]/pow(rho2s[0],5./3.) - Cfermi;
zc_ss[1] = 2*tau2[1]/pow(rho2s[0],5./3.);
//printf("x1 %.9e, zc_ss%.9e\n", x[1], zc_ss[1]);
D_ss[1] = 1 - x[1]/(4.*zc_ss[1]);
//derivatives for D_dn
dD_ssdx[1] = - 1/(4*zc_ss[1]);
dD_ssdz[1] = 4*x[1]/pow(4.*zc_ss[1],2.);
c_m06l_zx(x[1], z[1], rho2s[0], tau2[1], &(dxdd[1]), &(dxdgd[1]), &(dzdd[1]), &(dzdtau[1]));
dD_ssdd[1] = dD_ssdx[1] * dxdd[1] + dD_ssdz[1] * dzdd[1];
dD_ssdgd[2] = dD_ssdx[1] * dxdgd[1];
dD_ssdtau[1] = dD_ssdz[1] * dzdtau[1];
c_m06_15(x[1], rho2s[0], &(g_ss[1]), &(dg_ssdd[1]), &(dg_ssdgd[2]));
c_m06l_hss(x[1], z[1], rho2s[0], tau2[1], &(h_ss[1]), &(dh_ssdd[1]), &(dh_ssdgd[2]), &(dh_ssdtau[1]));
//printf("exunif_ss %.9e, (g_ss[1]+h_ss[1])%.9e, D_ss %.9e\n", exunif_ss[1],(g_ss[1]+h_ss[1]),D_ss[1]);
Ec_ss[1] = (exunif_ss[1] * (g_ss[1]+h_ss[1]) * D_ss[1]);
//printf("Ec_dn %.9e\n", Ec_ss[1]);
// Derivatives for Ec_up and Ec_dn with respect to density and kinetic energy
int i;
for(i=0; i<2; i++){
dEc_ssdd[i] = exunif_ss[i] * dh_ssdd[i] * D_ss[i] + vxunif_ss[i] * h_ss[i] * D_ss[i] + exunif_ss[i] * h_ss[i] * dD_ssdd[i] +
exunif_ss[i] * dg_ssdd[i] * D_ss[i] + vxunif_ss[i] * g_ss[i] * D_ss[i] + exunif_ss[i] * g_ss[i] * dD_ssdd[i];
dEc_ssdtau[i] = exunif_ss[i] * dh_ssdtau[i] * D_ss[i] + exunif_ss[i] * h_ss[i] * dD_ssdtau[i] + exunif_ss[i] * g_ss[i] * dD_ssdtau[i];
}
// Derivatives for Ec_up and Ec_dn with respect to gradient
dEc_ssdgd[0] = exunif_ss[0] * dh_ssdgd[0] * D_ss[0] + exunif_ss[0] * h_ss[0] * dD_ssdgd[0] +
exunif_ss[0] * dg_ssdgd[0] * D_ss[0] + exunif_ss[0] * g_ss[0] * dD_ssdgd[0];
dEc_ssdgd[2] = exunif_ss[1] * dh_ssdgd[2] * D_ss[1] + exunif_ss[1] * h_ss[1] * dD_ssdgd[2] +
exunif_ss[1] * dg_ssdgd[2] * D_ss[1] + exunif_ss[1] * g_ss[1] * dD_ssdgd[2];
/*==============get the E_ab part========================*/
exunif_ab = exunif - exunif_ss[0] - exunif_ss[1];
//x_ab = sigmatot[0] /(pow(rho2[0], 8./3.)) + sigmatot[2] /(pow(rho2[1], 8./3.));
//z_ab = 2*tau2[0]/pow(rho2[0],5./3.) + 2*tau2[1]/pow(rho2[1],5./3.) - 2*Cfermi;
/*build up Eq. (12): Ec_alphabeta*/
c_m06_13(x, rho2, &g_ab, dg_abdd, dg_abdgd);
c_m06l_hab(x, z, rho2, tau2, &h_ab, dh_abdd, dh_abdgd, dh_abdtau);
Ec_ab = exunif_ab * (g_ab+h_ab);
// Derivatives for Ec_ab with respect to density and kinetic energy
for(i=0; i<2; i++){
dEc_abdd[i] = exunif_ab * (dh_abdd[i]+ dg_abdd[i]) + (vxunif[i]- vxunif_ss[i]) * (g_ab+h_ab);
dEc_abdtau[i] = exunif_ab * dh_abdtau[i];
}
// Derivatives for Ec_ab with respect to gradient
for(i=0; i<3; i++){
dEc_abdgd[i] = exunif_ab * (dh_abdgd[i] + dg_abdgd[i]);
}
/*==============get the total energy E_c= E_up + E_dn + E_ab========================*/
/*==============================and derivatives=====================================*/
*energy = (Ec_ss[0] + Ec_ss[1] + Ec_ab)/dens1;
//printf("Ec_ss %.9e, Ec_ss %.9e, Ec_ab %.9e\n", Ec_ss[0], Ec_ss[1], Ec_ab);
//derivative for the total correlation energy
if(p->common.nspin== XC_UNPOLARIZED)
{
dedd[0]=dEc_ssdd[0] + dEc_abdd[0];
dedd[1]=0.0;
vsigma[0]= (dEc_ssdgd[0] + dEc_abdgd[0])/2.;
vsigma[1]= 0.0;
vsigma[2]= 0.0;
dedtau[0]= dEc_ssdtau[0] + dEc_abdtau[0];
dedtau[1]= 0.0;
}else{
dedd[0]=dEc_ssdd[0] + dEc_abdd[0];
dedd[1]=dEc_ssdd[1] + dEc_abdd[1];
vsigma[0]= dEc_ssdgd[0] + dEc_abdgd[0];
vsigma[1]= 0.0;
vsigma[2]= dEc_ssdgd[2] + dEc_abdgd[2];
dedtau[0]= dEc_ssdtau[0] + dEc_abdtau[0];
dedtau[1]= dEc_ssdtau[1] + dEc_abdtau[1];
}
}
static void
XC(mgga_c_m06l)(void *p, const double *rho, const double *sigma, const double *tau,
double *e, double *dedd, double *vsigma, double *dedtau)
{
c_m06l_para(p, rho, sigma, tau, e, dedd, vsigma, dedtau);
}
/* derivatives of x and z with respect to rho, grho and tau: Eq.(1) and Eq.(3)*/
static void
x_m06l_zx(double x, double z, double rho, double tau, double *dxdd, double *dxdgd, double *dzdd, double *dzdtau)
{
*dxdd = -8./3. * x * 1/rho;
*dxdgd = 1./pow(rho,8./3.);
*dzdd = -5./3. * 2* tau/pow(rho, 8./3.);
*dzdtau = 2./pow(rho, 5./3.);
}
/* Build gamma and its derivatives with respect to rho, grho and tau: Eq. (4)*/
static void
x_m06l_gamma(double x, double z, double rho, double tau, double *gamma, double *dgammadd, double *dgammadgd, double *dgammadtau)
{
static double alpha = 0.00186726; /*set alpha of Eq. (4)*/
double dgammadx, dgammadz;
double dxdd, dxdgd, dzdd, dzdtau;
*gamma = 1 + alpha*(x + z);
/*printf("gamma %19.12f\n", *gamma);*/
{ /* derivatives */
dgammadx = alpha;
dgammadz = alpha;
}
x_m06l_zx(x, z, rho, tau, &dxdd, &dxdgd, &dzdd, &dzdtau);
{
*dgammadd = dgammadx*dxdd + dgammadz*dzdd;
*dgammadgd = dgammadx*dxdgd;
*dgammadtau = dgammadz*dzdtau;
}
}
/************************************************************************
Implements Zhao, Truhlar
Meta-gga M06-Local
Correlation part
************************************************************************/
/* calculate h and h derivatives with respect to rho, grho and tau: Equation (5) */
static
void x_m06l_h(double x, double z, double rho, double tau, double *h, double *dhdd, double *dhdgd, double *dhdtau)
{
/* parameters for h(x_sigma,z_sigma) of Eq. (5)*/
static double d0=0.6012244, d1=0.004748822, d2=-0.008635108, d3=-0.000009308062, d4=0.00004482811;
double h1, dhdd1, dhdgd1, dhdtau1;
double gamma, dgammadd, dgammadgd, dgammadtau;
double xgamma, zgamma;
double dxdd, dxdgd, dzdd, dzdtau;
x_m06l_gamma(x, z, rho, tau, &gamma, &dgammadd, &dgammadgd, &dgammadtau);
xgamma = x/gamma;
zgamma = z/gamma;
/* we initialize h and its derivatives and collect the terms*/
h1 = 0.0;
dhdd1 = 0.0;
dhdgd1 = 0.0;
dhdtau1 = 0.0;
{ /* first term */
double g2=pow(gamma,2.);
h1 += d0/gamma;
dhdd1 += -d0*dgammadd/g2;
dhdgd1 += -d0*dgammadgd/g2;
dhdtau1 += -d0*dgammadtau/g2 ;
}
x_m06l_zx(x, z, rho, tau, &dxdd, &dxdgd, &dzdd, &dzdtau);
{ /* second term */
double g3=pow(gamma,3.);
h1 += (d1*xgamma + d2*zgamma)/gamma;
dhdd1 += (gamma*(d1*dxdd+d2*dzdd)-2*dgammadd*(d1*x+d2*z))/g3;
dhdgd1 += (d1*dxdgd*gamma -2*(d1*x+d2*z)*dgammadgd)/g3;
dhdtau1 += (d2*dzdtau*gamma -2*(d1*x+d2*z)*dgammadtau)/g3;
}
{ /* third term */
double g4= pow(gamma,4);
h1 += (d3*xgamma*xgamma+d4*xgamma*zgamma)/gamma;
dhdd1 += (-3*dgammadd*(d3*pow(x,2.)+d4*x*z)+dxdd*gamma*(2*d3*x+d4*z)+d4*x*dzdd*gamma)/g4;
dhdgd1 += (-3*x*(d3*x+d4*z)*dgammadgd+gamma*(2*d3*x+d4*z)*dxdgd)/g4;
dhdtau1 += (d4*x*dzdtau*gamma-3*x*(d3*x+d4*z)*dgammadtau)/g4;
}
*h = h1;
/*printf(" h %19.12f\n", *h);*/
*dhdd = dhdd1;
*dhdgd =dhdgd1;
*dhdtau = dhdtau1;
}
/* f(w) and its derivatives with respect to rho and tau*/
static void
x_m06l_fw(double rho, double tau, double *fw, double *dfwdd, double *dfwdtau)
{
/*define the parameters for fw of Eq. (8) as in the reference paper*/
static double a0= 0.3987756, a1= 0.2548219, a2= 0.3923994, a3= -2.103655, a4= -6.302147, a5= 10.97615,
a6= 30.97273, a7=-23.18489, a8=-56.73480, a9=21.60364, a10= 34.21814, a11= -9.049762;
double tau_lsda, t, w;
double dtdd, dtdtau;
double dfwdw, dwdt, dtau_lsdadd;
double aux = (3./10.) * pow((6*M_PI*M_PI),2./3.); /*3->6 for nspin=2 */
tau_lsda = aux * pow(rho,5./3.);
t = tau_lsda/tau;
dtdtau = -t/tau;
w = (t - 1)/(t + 1);
*fw = a0*pow(w,0.)+a1*pow(w,1.)+a2*pow(w,2.)+a3*pow(w,3.)+a4*pow(w,4.)+
+ a5*pow(w,5.)+a6*pow(w,6.)+a7*pow(w,7.)+a8*pow(w,8.)+a9*pow(w,9.)+a10*pow(w,10.)+a11*pow(w,11.);
dfwdw = 0.0*a0*pow(w,-1)+1.0*a1*pow(w,0.)+2.0*a2*pow(w,1.)+3.0*a3*pow(w,2.)+4.0*a4*pow(w,3.)+
+ 5.0*a5*pow(w,4.)+6.0*a6*pow(w,5.)+7.0*a7*pow(w,6.)+8.0*a8*pow(w,7.)+9.0*a9*pow(w,8.)+
+ 10*a10*pow(w,9.)+11*a11*pow(w,10.);
dwdt = 2/pow((t + 1),2.);
dtau_lsdadd = aux * 5./3.* pow(rho,2./3.);
dtdd = dtau_lsdadd/tau;
*dfwdd = dfwdw * dwdt * dtdd;
*dfwdtau = dfwdw * dwdt * dtdtau;
}
static void
x_m06l_para(m06l_params *pt, double rho, double sigma, double tau, double *energy, double *dedd, double *vsigma, double *dedtau)
{
/*Build Eq. (6) collecting the terms Fx_PBE, fw, e_lsda and h*/
double grad, tauw, tau2, x, z;
double rho2[2],sigmatot[3];
double F_PBE, de_PBEdd[2], de_PBEdgd[3];
double h, dhdd, dhdgd, dhdtau;
double fw, dfwdd, dfwdtau;
double epsx_lsda, depsx_lsdadd;
const double Cfermi = (3./5.) * pow(6*M_PI*M_PI,2./3.);
/* calculate |nabla rho|^2 */
grad = sigma;
grad = max(MIN_GRAD*MIN_GRAD, grad);
tauw = max(grad/(8.0*rho),1.0e-12); /* tau^W = |nabla rho|^2/ 8rho */
tau = max(tau, tauw);
rho2[0]=rho/2.;
rho2[1]=0.0;
sigmatot[0] = grad/4.;
sigmatot[1] = 0.0;
sigmatot[2] = 0.0;
tau2 =tau/2.;
/* get the uniform gas energy and potential a MINUS was missing in the paper*/
epsx_lsda = -(3./2.)*pow(3./(4*M_PI),1./3.)*pow(rho2[0],4./3.);
depsx_lsdadd = -2*pow(3./(4*M_PI),1./3.)*pow(rho2[0],1./3.);
/*get Fx for PBE*/
const int np = 1;
XC(gga_exc_vxc)(pt->x_aux, np, rho2, sigmatot, &F_PBE, de_PBEdd, de_PBEdgd);
/* define x and z from Eq. (1) and Eq. (3) NOTE: we build directly x^2 */
x = grad/(4*pow(rho2[0], 8./3.));
z = 2*tau2/pow(rho2[0],5./3.) - Cfermi; /*THERE IS A 2 IN FRONT AS IN THEOR. CHEM. ACCOUNT 120 215 (2008)*/
/*get h and fw*/
x_m06l_h(x, z, rho2[0], tau2, &h, &dhdd, &dhdgd, &dhdtau);
x_m06l_fw(rho2[0], tau2, &fw, &dfwdd, &dfwdtau);
{ /* Eq. (6) E_x = Int F_PBE*fw + exunif*h, the factor 2 accounts for spin. */
*energy = 2*(F_PBE*rho2[0] *fw + epsx_lsda *h);
*dedd = (de_PBEdd[0] *fw + F_PBE*rho2[0] * dfwdd+ depsx_lsdadd *h + epsx_lsda * dhdd);
*dedtau = (F_PBE * dfwdtau *rho2[0] + epsx_lsda * dhdtau);
*vsigma = (de_PBEdgd[0] *fw + epsx_lsda*dhdgd)/2.;
}
}
void
XC(mgga_x_m06l)(void *p, const double *rho, const double *sigma, const double *tau,
double *e, double *dedd, double *vsigma, double *dedtau)
{
m06l_params *par = (m06l_params*)p;
if(par->common.nspin == XC_UNPOLARIZED){
double en;
x_m06l_para(p, rho[0], sigma[0], tau[0], &en, dedd, vsigma, dedtau);
*e = en/(rho[0]+rho[1]);
}else{
*e = 0.0;
double e2na, e2nb, rhoa[2], rhob[2];
double vsigmapart[3];
rhoa[0]=2*rho[0];
rhoa[1]=0.0;
rhob[0]=2*rho[1];
rhob[1]=0.0;
x_m06l_para(p, rhoa[0], 4*sigma[0], 2.0*tau[0], &e2na, &(dedd[0]), &(vsigmapart[0]), &(dedtau[0]));
x_m06l_para(p, rhob[0], 4*sigma[2], 2.0*tau[1], &e2nb, &(dedd[1]), &(vsigmapart[2]), &(dedtau[1]));
*e = (e2na + e2nb )/(2.*(rho[0]+rho[1]));
vsigma[0] = 2*vsigmapart[0];
vsigma[2] = 2*vsigmapart[2];
}
}
static void m06l_init(void *p)
{
m06l_params *par = (m06l_params*)p;
par->c_aux = (XC(func_type) *) malloc(sizeof(XC(func_type)));
XC(func_init)(par->c_aux, XC_LDA_C_PW, XC_POLARIZED);
par->x_aux = (XC(func_type) *) malloc(sizeof(XC(func_type)));
XC(func_init)(par->x_aux, XC_GGA_X_PBE, XC_POLARIZED);
}
static void m06l_end(void *p)
{
m06l_params *par = (m06l_params*)p;
XC(func_end)(par->c_aux);
free(par->c_aux);
XC(func_end)(par->x_aux);
free(par->x_aux);
}
const mgga_func_info m06l_info = {
sizeof(m06l_params),
&m06l_init,
&m06l_end,
&XC(mgga_x_m06l),
&XC(mgga_c_m06l),
};
gpaw-0.11.0.13004/c/xc/libxc.c 0000664 0001750 0001750 00000073335 12553643466 015575 0 ustar jensj jensj 0000000 0000000 /* Copyright (C) 2003-2007 CAMP
* Copyright (C) 2007-2008 CAMd
* Please see the accompanying LICENSE file for further information. */
#include
#define PY_ARRAY_UNIQUE_SYMBOL GPAW_ARRAY_API
#define NO_IMPORT_ARRAY
#include
#include
#include
#include "xc_gpaw.h"
#include "../extensions.h"
typedef struct
{
PyObject_HEAD
/* exchange-correlation energy second derivatives */
void (*get_fxc)(XC(func_type) *func, double point[7], double der[5][5]);
XC(func_type) xc_functional;
XC(func_type) x_functional;
XC(func_type) c_functional;
XC(func_type) *functional[2]; /* store either x&c, or just xc */
int nspin; /* must be common to x and c, so declared redundantly here */
} lxcXCFunctionalObject;
void XC(lda_fxc_fd)(const XC(func_type) *p, const double *rho, double *fxc);
/* a general call for an LDA functional - finite difference */
void get_fxc_fd_lda(XC(func_type) *func, double point[7], double der[5][5])
{
double v2rho2[3], v2rhosigma[6], v2sigma2[6];
for(int i=0; i<3; i++) v2rho2[i] = 0.0;
for(int i=0; i<6; i++){
v2rhosigma[i] = 0.0;
v2sigma2[i] = 0.0;
}
XC(lda_fxc_fd)(func, point, v2rho2);
der[0][0] = v2rho2[0];
der[0][1] = der[1][0] = v2rho2[1];
der[1][1] = v2rho2[2];
der[0][2] = der[2][0] = v2rhosigma[0];
der[0][3] = der[3][0] = v2rhosigma[1];
der[0][4] = der[4][0] = v2rhosigma[2];
der[1][2] = der[2][1] = v2rhosigma[3];
der[1][3] = der[3][1] = v2rhosigma[4];
der[1][4] = der[4][1] = v2rhosigma[5];
der[2][2] = v2sigma2[0];
der[2][3] = der[3][2] = v2sigma2[1];
der[2][4] = der[4][2] = v2sigma2[2];
der[3][3] = v2sigma2[3];
der[3][4] = der[4][3] = v2sigma2[4];
der[4][4] = v2sigma2[5];
}
// finite difference calculation of second functional derivative
// stolen from libxc/testsuite/xc-consistency.c
double get_point(XC(func_type) *func, double point[7], double *e, double der[5], int which)
{
const int np = 1;
switch(func->info->family)
{
case XC_FAMILY_LDA:
XC(lda_exc_vxc)(func, np, &(point[0]), e, &(der[0]));
break;
case XC_FAMILY_GGA:
case XC_FAMILY_HYB_GGA:
XC(gga_exc_vxc)(func, np, &(point[0]), &(point[2]),
e, &(der[0]), &(der[2]));
break;
}
if(which == 0)
return (*e)*(point[0] + point[1]);
else
return der[which-1];
}
void first_derivative(XC(func_type) *func, double point[7], double der[5], int which,
int nspin)
{
int i;
for(i=0; i<5; i++){
const double delta = 5e-10;
double dd, p[5], v[5];
int j;
if(nspin==1 && (i!=0 && i!=2)){
der[i] = 0.0;
continue;
}
dd = point[i]*delta;
if(dd < delta) dd = delta;
for(j=0; j<5; j++) p[j] = point[j];
if(point[i]>=3.0*dd){ /* centered difference */
double e, em1, em2, ep1, ep2;
p[i] = point[i] + dd;
ep1 = get_point(func, p, &e, v, which);
p[i] = point[i] + 2*dd;
ep2 = get_point(func, p, &e, v, which);
p[i] = point[i] - dd; /* backward point */
em1 = get_point(func, p, &e, v, which);
p[i] = point[i] - 2*dd; /* backward point */
em2 = get_point(func, p, &e, v, which);
der[i] = 1.0/2.0*(ep1 - em1);
der[i] += 1.0/12.0*(em2 - 2*em1 + 2*ep1 - ep2);
der[i] /= dd;
}else{ /* we use a 5 point forward difference */
double e, e1, e2, e3, e4, e5;
p[i] = point[i];
e1 = get_point(func, p, &e, v, which);
p[i] = point[i] + dd;
e2 = get_point(func, p, &e, v, which);
p[i] = point[i] + 2.0*dd;
e3 = get_point(func, p, &e, v, which);
p[i] = point[i] + 3.0*dd;
e4 = get_point(func, p, &e, v, which);
p[i] = point[i] + 4.0*dd;
e5 = get_point(func, p, &e, v, which);
der[i] = (-e1 + e2);
der[i] -= 1.0/2.0*( e1 - 2*e2 + e3);
der[i] += 1.0/3.0*(-e1 + 3*e2 - 3*e3 + e4);
der[i] -= 1.0/4.0*( e1 - 4*e2 + 6*e3 - 4*e4 + e5);
der[i] /= dd;
}
}
}
void first_derivative_spinpaired(XC(func_type) *func, double point[7], double der[5],
int which)
{
first_derivative(func, point, der, which, XC_UNPOLARIZED);
}
void first_derivative_spinpolarized(XC(func_type) *func, double point[7], double der[5],
int which)
{
first_derivative(func, point, der, which, XC_POLARIZED);
}
void second_derivatives_spinpaired(XC(func_type) *func, double point[7], double der[5][5])
{
int i;
for(i=0; i<5; i++){
first_derivative_spinpaired(func, point, der[i], i+1);
}
}
void second_derivatives_spinpolarized(XC(func_type) *func, double point[7], double der[5][5])
{
int i;
for(i=0; i<5; i++){
first_derivative_spinpolarized(func, point, der[i], i+1);
}
}
/* a general call for a functional - finite difference */
void get_fxc_fd_spinpaired(XC(func_type) *func, double point[7], double der[5][5])
{
second_derivatives_spinpaired(func, point, der);
}
/* a general call for a functional - finite difference */
void get_fxc_fd_spinpolarized(XC(func_type) *func, double point[7], double der[5][5])
{
second_derivatives_spinpolarized(func, point, der);
}
static void lxcXCFunctional_dealloc(lxcXCFunctionalObject *self)
{
for (int i=0; i<2; i++)
if (self->functional[i] != NULL) xc_func_end(self->functional[i]);
PyObject_DEL(self);
}
static PyObject*
lxcXCFunctional_is_gga(lxcXCFunctionalObject *self, PyObject *args)
{
int success = 0; /* assume functional is not GGA */
// check family of most-complex functional
if (self->functional[0]->info->family == XC_FAMILY_GGA ||
self->functional[0]->info->family == XC_FAMILY_HYB_GGA) success = XC_FAMILY_GGA;
return Py_BuildValue("i", success);
}
static PyObject*
lxcXCFunctional_is_mgga(lxcXCFunctionalObject *self, PyObject *args)
{
int success = 0; /* assume functional is not MGGA */
// check family of most-complex functional
if (self->functional[0]->info->family == XC_FAMILY_MGGA) success = XC_FAMILY_MGGA;
return Py_BuildValue("i", success);
}
static PyObject*
lxcXCFunctional_CalculateFXC_FD_SpinPaired(lxcXCFunctionalObject *self, PyObject *args)
{
PyArrayObject* n_array; /* rho */
PyArrayObject* v2rho2_array; /* d2E/drho2 */
PyArrayObject* a2_array = 0; /* |nabla rho|^2*/
PyArrayObject* v2rhosigma_array = 0; /* d2E/drhod|nabla rho|^2 */
PyArrayObject* v2sigma2_array = 0; /* d2E/drhod|nabla rho|^2 */
if (!PyArg_ParseTuple(args, "OO|OOO", &n_array, &v2rho2_array, /* object | optional objects*/
&a2_array, &v2rhosigma_array, &v2sigma2_array))
return NULL;
/* find nspin */
int nspin = self->nspin;
assert(nspin == XC_UNPOLARIZED); /* we are spinpaired */
assert (self->functional[0]->info->family != XC_FAMILY_MGGA);
int ng = PyArray_DIMS(n_array)[0]; /* number of grid points */
const double* n_g = DOUBLEP(n_array); /* density on the grid */
double* v2rho2_g = DOUBLEP(v2rho2_array); /* v on the grid */
const double* a2_g = 0; /* a2 on the grid */
double* v2rhosigma_g = 0; /* d2Ednda2 on the grid */
double* v2sigma2_g = 0; /* d2Eda2da2 on the grid */
if ((self->functional[0]->info->family == XC_FAMILY_GGA) ||
(self->functional[0]->info->family == XC_FAMILY_HYB_GGA))
{
a2_g = DOUBLEP(a2_array);
v2rhosigma_g = DOUBLEP(v2rhosigma_array);
v2sigma2_g = DOUBLEP(v2sigma2_array);
}
self->get_fxc = get_fxc_fd_spinpaired;
/* ################################################################ */
for (int g = 0; g < ng; g++)
{
double n = n_g[g];
if (n < NMIN)
n = NMIN;
double a2 = 0.0; /* initialize for lda */
if ((self->functional[0]->info->family == XC_FAMILY_GGA) ||
(self->functional[0]->info->family == XC_FAMILY_HYB_GGA))
{
a2 = a2_g[g];
}
double point[7]; /* generalized point */
// from http://www.tddft.org/programs/octopus/wiki/index.php/Libxc:manual
// rhoa rhob sigmaaa sigmaab sigmabb taua taub
// \sigma[0] = \nabla n_\uparrow \cdot \nabla n_\uparrow \qquad
// \sigma[1] = \nabla n_\uparrow \cdot \nabla n_\downarrow \qquad
// \sigma[2] = \nabla n_\downarrow \cdot \nabla n_\downarrow \qquad
double derivative[5][5]; /* generalized derivative */
double v2rho2[3];
double v2rhosigma[6];
double v2sigma2[6];
// one that uses this: please add description of spin derivative order notation
// (see c/libxc/src/gga_perdew.c) MDTMP
for(int i=0; i<3; i++) v2rho2[i] = 0.0;
for(int i=0; i<6; i++){
v2rhosigma[i] = 0.0;
v2sigma2[i] = 0.0;
}
for(int j=0; j<7; j++)
{
point[j] = 0.0;
}
for(int i=0; i<5; i++)
{
for(int j=0; j<5; j++)
{
derivative[i][j] = 0.0;
}
}
point[0] = n; /* -> rho */
point[2] = a2; /* -> sigma */
for (int i=0; i<2; i++) {
XC(func_type) *func = self->functional[i];
if (func == NULL) continue;
self->get_fxc(func, point, derivative);
v2rho2[0] = derivative[0][0];
v2rho2[1] = derivative[0][1]; // XC_POLARIZED
v2rho2[2] = derivative[1][1]; // XC_POLARIZED
v2rhosigma[0] = derivative[0][2];
v2rhosigma[1] = derivative[0][3]; // XC_POLARIZED
v2rhosigma[2] = derivative[0][4]; // XC_POLARIZED
v2rhosigma[3] = derivative[1][2]; // XC_POLARIZED
v2rhosigma[4] = derivative[1][3]; // XC_POLARIZED
v2rhosigma[5] = derivative[1][4]; // XC_POLARIZED
v2sigma2[0] = derivative[2][2]; /* aa_aa */
v2sigma2[1] = derivative[2][3]; // XC_POLARIZED /* aa_ab */
v2sigma2[2] = derivative[2][4]; // XC_POLARIZED /* aa_bb */
v2sigma2[3] = derivative[3][3]; // XC_POLARIZED /* ab_ab */
v2sigma2[4] = derivative[3][4]; // XC_POLARIZED /* ab_bb */
v2sigma2[5] = derivative[4][4]; // XC_POLARIZED /* bb_bb */
switch(func->info->family)
{
case XC_FAMILY_HYB_GGA:
case XC_FAMILY_GGA:
v2rhosigma_g[g] += v2rhosigma[0];
v2sigma2_g[g] += v2sigma2[0];
// don't break here since we need LDA values as well
case XC_FAMILY_LDA:
v2rho2_g[g] += v2rho2[0];
}
}
}
Py_RETURN_NONE;
}
// Below are changes made by cpo@slac.stanford.edu for libxc 1.2.0
// which allows passing of arrays of points to libxc routines.
// The fundamental design idea (to try to minimize code-duplication) is that
// all libxc routines have input/output arrays that get processed in
// common ways with three special exceptions: n_sg, e_g, dedn_sg. The
// struct "xcptrlist" is used to keep track of these pointers.
// Two libxc features prevent us from using a straightforward
// interface:
// 1) libxc calls memset(0) on output arrays, preventing us
// from adding x/c contributions "in place" without scratch arrays
// 2) for spin-polarized calculations libxc wants spin indices to be
// dense in memory, whereas GPAW probably loops over grid indices
// more often, so we want to keep those dense in memory.
// I asked Miguel Marques to remove the memset, and to add a "stride"
// argument to libxc routines to address the above. He says he will
// consider it in the future. In the meantime we have to "block"
// over gridpoints using some scratch memory.
// What is supported:
// - combined xc-functional mode
// - separate x,c functionals.
// - separate x,c can have differing complexities (e.g. one GGA, one LDA)
// - "exc_vxc" style routines for LDA/GGA/MGGA both unpolarized/polarized
// - "fxc" style routines for LDA/GGA both unpolarized/polarized
// To support a libxc routine other than exc_vxc/fxc one needs to
// copy a "Calculate" routine and change the pointer list setup, and
// associated libxc function calls.
// number of gridpoints we will "block" over when doing xc calculation
#define BLOCKSIZE 1024
// this is the maximum number of BLOCKSIZE arrays that will be put
// into scratch (depends on the "spinsize" values for the various
// arrays. currently determined by fxc, which has input spinsizes
// of 2+3 and output spinsizes of 3+6+6 (totalling 20).
#define MAXARRAYS 20
#define LIBXCSCRATCHSIZE (BLOCKSIZE*MAXARRAYS)
static double *scratch=NULL;
// we don't use lapl, but libxc needs space for them.
static double *scratch_lapl=NULL;
static double *scratch_vlapl=NULL;
// special cases for array behaviors:
// flag to indicate we need to add to existing values for dedn_sg
#define DEDN_SG 1
// flag to indicate we need to apply NMIN cutoff to n_sg
#define N_SG 2
// flag to indicate we need to multiply by density for e_g
#define E_G 4
typedef struct xcptr {
double *p;
int special;
int spinsize;
} xcptr;
#define MAXPTR 10
typedef struct xcptrlist {
int num;
xcptr p[MAXPTR];
} xcptrlist;
typedef struct xcinfo {
int nspin;
bool spinpolarized;
int ng;
} xcinfo;
// these 3 functions make the spin index closest in memory ("gather") or the
// farthest apart in memory ("scatter"). "scatteradd" adds to previous results.
static void gather(const double* src, double* dst, int np, int stride, int nspins) {
const double *dstend = dst+np*nspins;
const double *srcend = src+nspins*stride;
do {
const double *s = src;
do {
*dst++ = *s; s+=stride;
} while (snum; i++) {
inblocklist[i] = next;
next+=blocksize*inlist->p[i].spinsize;
}
for (int i=0; inum; i++) {
outblocklist[i] = next;
next+=blocksize*outlist->p[i].spinsize;
}
// check that we fit in the scratch space
// if we don't, then we need to increase MAXARRAY
assert((next - scratch) <= LIBXCSCRATCHSIZE);
}
// copy a piece of the full data into the block for processing by libxc
static void data2block(const xcinfo *info,
const xcptrlist *inlist, double *inblocklist[],
int blocksize) {
// copy data into the block, taking into account special cases
for (int i=0; inum; i++) {
double *ptr = inlist->p[i].p; double* block = inblocklist[i];
if (info->spinpolarized) {
gather(ptr,block,blocksize,info->ng,inlist->p[i].spinsize);
if (inlist->p[i].special&N_SG)
for (int i=0; ip[i].special&N_SG) for (int i=0; inum; i++) {
double *ptr = outlist->p[i].p; double* block = outblocklist[i];
if (outlist->p[i].special&E_G) {
if (info->spinpolarized) {
for (int i=0; ip[i].special&DEDN_SG) {
if (info->spinpolarized) {
scatteradd(block,ptr,blocksize,info->ng,outlist->p[i].spinsize); // need to add to pre-existing values
} else {
for (int i=0; ispinpolarized) {
scatter(block,ptr,blocksize,info->ng,outlist->p[i].spinsize);
} else {
memcpy(ptr,block,blocksize*sizeof(double));
}
}
}
}
// copy the data from the block back into its final resting place, but add to previous results
static void block2dataadd(const xcinfo *info, double *outblocklist[], const xcptrlist *outlist,
const double *n_sg, int blocksize, int noutcopy) {
for (int i=0; ip[i].p; double* block = outblocklist[i];
if (outlist->p[i].special&E_G) {
if (info->spinpolarized) {
for (int i=0; ispinpolarized) {
scatteradd(block,ptr,blocksize,info->ng,outlist->p[i].spinsize);
} else {
for (int i=0; inspin;
info.spinpolarized = (info.nspin==2);
info.ng = PyArray_DIMS(py_e_g)[0];
xcptrlist inlist,outlist;
inlist.num=0;
outlist.num=0;
int blocksize = BLOCKSIZE;
int remaining = info.ng;
// setup pointers using most complex functional
switch(self->functional[0]->info->family)
{
case XC_FAMILY_MGGA:
inlist.p[2].p = DOUBLEP(py_tau_sg);
inlist.p[2].special = 0;
inlist.p[2].spinsize = 2;
inlist.num++;
outlist.p[3].p = DOUBLEP(py_dedtau_sg);
outlist.p[3].special = 0;
outlist.p[3].spinsize = 2;
outlist.num++;
// don't break here since MGGA also needs GGA ptrs
case XC_FAMILY_HYB_GGA:
case XC_FAMILY_GGA:
inlist.p[1].p = DOUBLEP(py_sigma_xg);
inlist.p[1].special = 0;
inlist.p[1].spinsize = 3;
inlist.num++;
outlist.p[2].p = DOUBLEP(py_dedsigma_xg);
outlist.p[2].special = 0;
outlist.p[2].spinsize = 3;
outlist.num++;
// don't break here since GGA also needs LDA ptrs
case XC_FAMILY_LDA:
inlist.p[0].p = DOUBLEP(py_n_sg);
inlist.p[0].special = N_SG;
inlist.p[0].spinsize = 2;
inlist.num += 1;
outlist.p[0].p = DOUBLEP(py_e_g);
outlist.p[0].special = E_G;
outlist.p[0].spinsize = 1;
outlist.p[1].p = DOUBLEP(py_dedn_sg);
outlist.p[1].special = DEDN_SG;
outlist.p[1].spinsize = 2;
outlist.num += 2;
}
assert(inlist.num < MAXPTR);
assert(outlist.num < MAXPTR);
double *inblock[MAXPTR];
double *outblock[MAXPTR];
setupblockptrs(&info, &inlist, &outlist, &inblock[0], &outblock[0], blocksize);
do {
blocksize = blocksizefunctional[i] == NULL) continue;
XC(func_type) *func = self->functional[i];
int noutcopy=0;
switch(func->info->family)
{
case XC_FAMILY_LDA:
xc_lda_exc_vxc(func, blocksize, n_sg, e_g, dedn_sg);
noutcopy = 2; // potentially decrease the size for block2dataadd if second functional less complex.
break;
case XC_FAMILY_HYB_GGA:
case XC_FAMILY_GGA:
xc_gga_exc_vxc(func, blocksize,
n_sg, sigma_xg, e_g,
dedn_sg, dedsigma_xg);
noutcopy = 3; // potentially decrease the size for block2dataadd if second functional less complex.
break;
case XC_FAMILY_MGGA:
xc_mgga_exc_vxc(func, blocksize, n_sg, sigma_xg, scratch_lapl,
tau_sg, e_g, dedn_sg, dedsigma_xg, scratch_vlapl,
dedtau_sg);
noutcopy = 4; // potentially decrease the size for block2dataadd if second functional less complex.
break;
}
// if we have more than 1 functional, add results
// canonical example: adding "x" results to "c"
if (i==0)
block2data(&info, &outblock[0], &outlist, n_sg, blocksize);
else
block2dataadd(&info, &outblock[0], &outlist, n_sg, blocksize, noutcopy);
}
for (int i=0; i0);
Py_RETURN_NONE;
}
static PyObject*
lxcXCFunctional_CalculateFXC(lxcXCFunctionalObject *self, PyObject *args)
{
PyArrayObject* py_n_sg=NULL;
PyArrayObject* py_v2rho2_xg=NULL;
PyArrayObject* py_sigma_xg=NULL;
PyArrayObject* py_v2rhosigma_yg=NULL;
PyArrayObject* py_v2sigma2_yg=NULL;
if (!PyArg_ParseTuple(args, "OO|OOO", &py_n_sg, &py_v2rho2_xg,
&py_sigma_xg, &py_v2rhosigma_yg, &py_v2sigma2_yg))
return NULL;
xcinfo info;
info.nspin = self->nspin;
info.spinpolarized = (info.nspin==2);
info.ng = (info.spinpolarized) ? PyArray_DIMS(py_n_sg)[0]/2 : PyArray_DIMS(py_n_sg)[0];
xcptrlist inlist,outlist;
inlist.num=0;
outlist.num=0;
int blocksize = BLOCKSIZE;
int remaining = info.ng;
// setup pointers using most complex functional
switch(self->functional[0]->info->family)
{
case XC_FAMILY_MGGA:
// not supported
assert(self->functional[0]->info->family != XC_FAMILY_MGGA);
// don't break here since MGGA also needs GGA ptrs
case XC_FAMILY_HYB_GGA:
case XC_FAMILY_GGA:
inlist.p[1].p = DOUBLEP(py_sigma_xg);
inlist.p[1].special = 0;
inlist.p[1].spinsize = 3;
inlist.num++;
outlist.p[1].p = DOUBLEP(py_v2rhosigma_yg);
outlist.p[1].special = 0;
outlist.p[1].spinsize = 6;
outlist.p[2].p = DOUBLEP(py_v2sigma2_yg);
outlist.p[2].special = 0;
outlist.p[2].spinsize = 6;
outlist.num+=2;
// don't break here since GGA also needs LDA ptrs
case XC_FAMILY_LDA:
inlist.p[0].p = DOUBLEP(py_n_sg);
inlist.p[0].special = N_SG;
inlist.p[0].spinsize = 2;
inlist.num += 1;
outlist.p[0].p = DOUBLEP(py_v2rho2_xg);
outlist.p[0].special = 0;
outlist.p[0].spinsize = 3;
outlist.num++;
}
assert(inlist.num < MAXPTR);
assert(outlist.num < MAXPTR);
double *inblock[MAXPTR];
double *outblock[MAXPTR];
setupblockptrs(&info, &inlist, &outlist, &inblock[0], &outblock[0], blocksize);
do {
blocksize = blocksizefunctional[i] == NULL) continue;
XC(func_type) *func = self->functional[i];
int noutcopy=0;
switch(func->info->family)
{
case XC_FAMILY_LDA:
xc_lda_fxc(func, blocksize, n_sg, v2rho2);
noutcopy = 1; // potentially decrease the size for block2dataadd if second functional less complex.
break;
case XC_FAMILY_HYB_GGA:
case XC_FAMILY_GGA:
xc_gga_fxc(func, blocksize, n_sg, sigma_xg,
v2rho2, v2rhosigma, v2sigma2);
noutcopy = 3; // potentially decrease the size for block2dataadd if second functional less complex.
break;
case XC_FAMILY_MGGA:
// not supported by GPAW yet, so crash
assert (func->info->family!=XC_FAMILY_MGGA);
break;
}
// if we have more than 1 functional, add results
// canonical example: adding "x" results to "c"
if (i==0)
block2data(&info, &outblock[0], &outlist, n_sg, blocksize);
else
block2dataadd(&info, &outblock[0], &outlist, n_sg, blocksize, noutcopy);
}
for (int i=0; i0);
Py_RETURN_NONE;
}
static PyObject*
lxcXCFunctional_tb09(lxcXCFunctionalObject *self, PyObject *args)
{
double c;
PyArrayObject* n_g;
PyArrayObject* sigma_g;
PyArrayObject* lapl_g;
PyArrayObject* tau_g;
PyArrayObject* v_g;
PyArrayObject* vx_g; // for vsigma, vtau, vlapl
if (!PyArg_ParseTuple(args, "dOOOOOO",
&c, &n_g, &sigma_g, &lapl_g, &tau_g, &v_g, &vx_g))
return NULL;
xc_mgga_x_tb09_set_params(self->functional[0], c);
xc_mgga_vxc(self->functional[0], PyArray_DIM(n_g, 0),
PyArray_DATA(n_g),
PyArray_DATA(sigma_g),
PyArray_DATA(lapl_g),
PyArray_DATA(tau_g),
PyArray_DATA(v_g),
PyArray_DATA(vx_g),
PyArray_DATA(vx_g),
PyArray_DATA(vx_g));
Py_RETURN_NONE;
}
static PyMethodDef lxcXCFunctional_Methods[] = {
{"is_gga",
(PyCFunction)lxcXCFunctional_is_gga, METH_VARARGS, 0},
{"is_mgga",
(PyCFunction)lxcXCFunctional_is_mgga, METH_VARARGS, 0},
{"calculate_fxc_fd_spinpaired",
(PyCFunction)lxcXCFunctional_CalculateFXC_FD_SpinPaired, METH_VARARGS, 0},
{"calculate",
(PyCFunction)lxcXCFunctional_Calculate, METH_VARARGS, 0},
{"calculate_fxc_spinpaired",
(PyCFunction)lxcXCFunctional_CalculateFXC, METH_VARARGS, 0},
{"tb09",
(PyCFunction)lxcXCFunctional_tb09, METH_VARARGS, 0},
{NULL, NULL, 0, NULL}
};
PyTypeObject lxcXCFunctionalType = {
PyVarObject_HEAD_INIT(NULL, 0)
"lxcXCFunctional",
sizeof(lxcXCFunctionalObject),
0,
(destructor)lxcXCFunctional_dealloc,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,
"LibXCFunctional object",
0, 0, 0, 0, 0, 0,
lxcXCFunctional_Methods
};
PyObject * NewlxcXCFunctionalObject(PyObject *obj, PyObject *args)
{
int xc, x, c; /* functionals identifier number */
int nspin; /* XC_UNPOLARIZED or XC_POLARIZED */
if (!scratch) {
scratch = (double*)malloc(LIBXCSCRATCHSIZE*sizeof(double));
const int laplsize = BLOCKSIZE*sizeof(double)*2;
scratch_lapl = (double*)malloc(laplsize);
memset(scratch_lapl,0,laplsize);
scratch_vlapl = (double*)malloc(laplsize);
}
if (!PyArg_ParseTuple(args, "iiii", &xc, &x, &c, &nspin)) {
return NULL;
}
/* checking if the numbers xc x c are valid is done at python level */
lxcXCFunctionalObject *self = PyObject_NEW(lxcXCFunctionalObject,
&lxcXCFunctionalType);
if (self == NULL){
return NULL;
}
assert(nspin==XC_UNPOLARIZED || nspin==XC_POLARIZED);
self->nspin = nspin; /* must be common to x and c, so declared redundantly */
int number,family,familyx,familyc;
if (xc != -1) {
xc_family_from_id(xc,&family,&number);
assert (family != XC_FAMILY_UNKNOWN);
XC(func_init)(&self->xc_functional, xc, nspin);
self->functional[0]=&self->xc_functional;
self->functional[1]=NULL;
} else {
assert (x!=-1 || c!=-1);
if (x!=-1) {
xc_family_from_id(x,&familyx,&number);
assert (familyx != XC_FAMILY_UNKNOWN);
XC(func_init)(&self->x_functional, x, nspin);
}
if (c!=-1) {
xc_family_from_id(c,&familyc,&number);
assert (familyc != XC_FAMILY_UNKNOWN);
XC(func_init)(&self->c_functional, c, nspin);
}
if (x!=-1 && c!=-1) {
/* put most complex functional first */
/* important for later loops over functionals */
if (familyx == XC_FAMILY_MGGA) {
self->functional[0]=&self->x_functional;
self->functional[1]=&self->c_functional;
} else if (familyc == XC_FAMILY_MGGA) {
self->functional[0]=&self->c_functional;
self->functional[1]=&self->x_functional;
} else if (familyx == XC_FAMILY_GGA || familyx == XC_FAMILY_HYB_GGA) {
self->functional[0]=&self->x_functional;
self->functional[1]=&self->c_functional;
} else {
// either c is GGA, or both are LDA (so don't care)
self->functional[0]=&self->c_functional;
self->functional[1]=&self->x_functional;
}
} else if (x!=-1) {
self->functional[0]=&self->x_functional;
self->functional[1]=NULL;
} else if (c!=-1) {
self->functional[0]=&self->c_functional;
self->functional[1]=NULL;
}
}
return (PyObject*)self;
}
PyObject * lxcXCFuncNum(PyObject *obj, PyObject *args)
{
char *funcname;
if (!PyArg_ParseTuple(args, "s", &funcname)) {
return NULL;
}
int num = XC(functional_get_number)(funcname);
if (num != -1)
return Py_BuildValue("i",num);
else
Py_RETURN_NONE;
}
gpaw-0.11.0.13004/c/xc/revtpss.c 0000664 0001750 0001750 00000040155 12553643466 016174 0 ustar jensj jensj 0000000 0000000
#include
#include
#include
#include
#include "xc_mgga.h"
typedef struct revtpss_params {
common_params common; // needs to be at the beginning of every functional_params
XC(func_type) *x_aux;
XC(func_type) c_aux1;
XC(func_type) c_aux2;
} revtpss_params;
void gga_c_pbe_revtpss(XC(func_type) *p, const double *rho, const double *sigma,
double *e, double *vrho, double *vsigma,
double *v2rho2, double *v2rhosigma, double *v2sigma2);
/************************************************************************
Implements John P. Perdew, Adrienn Ruzsinszky, Gabor I. Csonka, Lucian A. Constantin, and Jianwei Sun
meta-Generalized Gradient Approximation.
Correlation part
************************************************************************/
/* some parameters */
static double d = 2.8;
/* Equation (14) */
static void
c_revtpss_14(double csi, double zeta, double *C, double *dCdcsi, double *dCdzeta)
{
double fz, C0, dC0dz, dfzdz;
double z2 = zeta*zeta;
/* Equation (13) */
C0 = 0.59 + z2*(0.9269 + z2*(0.6225 + z2*2.1540));
dC0dz = zeta*(2.0*0.9269 + z2*(4.0*0.6225 + z2*6.0*2.1540)); /*OK*/
fz = 0.5*(pow(1.0 + zeta, -4.0/3.0) + pow(1.0 - zeta, -4.0/3.0));
dfzdz = 0.5*(-4.0/3.0)*(pow(1.0 + zeta, -7.0/3.0) - pow(1.0 - zeta, -7.0/3.0)); /*OK*/
{ /* Equation (14) */
double csi2 = csi*csi;
double a = 1.0 + csi2*fz, a4 = pow(a, 4);
*C = C0 / a4;
*dCdcsi = -8.0*C0*csi*fz/(a*a4); /*added C OK*/
*dCdzeta = (dC0dz*a - C0*4.0*csi2*dfzdz)/(a*a4); /*OK*/
}
}
/* Equation (12) */
static void c_revtpss_12(revtpss_params *p, const double *rho, const double *sigma,
double dens, double zeta, double z,
double *e_PKZB, double *de_PKZBdd, double *de_PKZBdsigma, double *de_PKZBdz)
{
/*some incoming variables:
dens = rho[0] + rho[1]
z = tau_w/tau
zeta = (rho[0] - rho[1])/dens*/
double e_PBE, e_PBEup, e_PBEdn;
double de_PBEdd[2], de_PBEdsigma[3], de_PBEddup[2], de_PBEdsigmaup[3], de_PBEdddn[2], de_PBEdsigmadn[3] ;
double aux, zsq;
double dzetadd[2], dcsidd[2], dcsidsigma[3];
double C, dCdcsi, dCdzeta;
double densp[2], densp2[2], sigmatot[3], sigmaup[3], sigmadn[3];
int i;
/*initialize dCdcsi and dCdzeta and the energy*/
dCdcsi = dCdzeta = 0.0;
e_PBE = 0.0;
e_PBEup = 0.0;
e_PBEdn = 0.0;
/* get the PBE stuff */
if(p->common.nspin== XC_UNPOLARIZED)
{ densp[0]=rho[0]/2.;
densp[1]=rho[0]/2.;
sigmatot[0] = sigma[0]/4.;
sigmatot[1] = sigma[0]/4.;
sigmatot[2] = sigma[0]/4.;
}else{
densp[0] = rho[0];
densp[1] = rho[1];
sigmatot[0] = sigma[0];
sigmatot[1] = sigma[1];
sigmatot[2] = sigma[2];
}
/* e_PBE */
XC(func_type) *aux2 = (p->common.nspin == XC_UNPOLARIZED) ? &p->c_aux2 : &p->c_aux1;
gga_c_pbe_revtpss(aux2, densp, sigmatot, &e_PBE, de_PBEdd, de_PBEdsigma, NULL, NULL, NULL);
densp2[0]=densp[0];
densp2[1]=0.0;
if(p->common.nspin== XC_UNPOLARIZED)
{
sigmaup[0] = sigma[0]/4.;
sigmaup[1] = 0.;
sigmaup[2] = 0.;
}else{
sigmaup[0] = sigma[0];
sigmaup[1] = 0.;
sigmaup[2] = 0.;
}
/* e_PBE spin up */
gga_c_pbe_revtpss(aux2, densp2, sigmaup, &e_PBEup, de_PBEddup, de_PBEdsigmaup, NULL, NULL, NULL);
densp2[0]=densp[1];
densp2[1]=0.0;
if(p->common.nspin== XC_UNPOLARIZED)
{
sigmadn[0] = sigma[0]/4.;
sigmadn[1] = 0.;
sigmadn[2] = 0.;
}else{
sigmadn[0] = sigma[2];
sigmadn[1] = 0.;
sigmadn[2] = 0.;
}
/* e_PBE spin down */
gga_c_pbe_revtpss(aux2, densp2, sigmadn, &e_PBEdn, de_PBEdddn, de_PBEdsigmadn, NULL, NULL, NULL);
/*get Eq. (13) and (14) for the polarized case*/
if(p->common.nspin == XC_UNPOLARIZED){
C = 0.59;
dzetadd[0] = 0.0;
dcsidd [0] = 0.0;
dzetadd[1] = 0.0;
dcsidd [1] = 0.0;
for(i=0; i<3; i++) dcsidsigma[i] = 0.0;
}else{
// initialize derivatives
for(i=0; i<2; i++){
dzetadd[i] = 0.0;
dcsidd [i] = 0.0;}
for(i=0; i<3; i++) dcsidsigma[i] = 0.0;
double num, gzeta, csi, a;
/*numerator of csi: derive as grho all components and then square the 3 parts
[2 (grho_a[0]n_b - grho_b[0]n_a) +2 (grho_a[1]n_b - grho_b[1]n_a) + 2 (grho_a[2]n_b - grho_b[2]n_a)]/(n_a+n_b)^2
-> 4 (sigma_aa n_b^2 - 2 sigma_ab n_a n_b + sigma_bb n_b^2)/(n_a+n_b)^2 */
num = sigma[0] * pow(rho[1],2) - 2.* sigma[1]*rho[0]*rho[1]+ sigma[2]*pow(rho[0],2);
num = max(num, 1e-20);
gzeta = sqrt(4*(num))/(dens*dens);
gzeta = max(gzeta, MIN_GRAD);
/*denominator of csi*/
a = 2*pow(3.0*M_PI*M_PI*dens, 1.0/3.0);
csi = gzeta/a;
c_revtpss_14(csi, zeta, &C, &dCdcsi, &dCdzeta);
dzetadd[0] = (1.0 - zeta)/dens; /*OK*/
dzetadd[1] = -(1.0 + zeta)/dens; /*OK*/
dcsidd [0] = 0.5*csi*(-2*sigma[1]*rho[1]+2*sigma[2]*rho[0])/num - 7./3.*csi/dens; /*OK*/
dcsidd [1] = 0.5*csi*(-2*sigma[1]*rho[0]+2*sigma[0]*rho[1])/num - 7./3.*csi/dens; /*OK*/
dcsidsigma[0]= csi*pow(rho[1],2)/(2*num); /*OK*/
dcsidsigma[1]= -csi*rho[0]*rho[1]/num; /*OK*/
dcsidsigma[2]= csi*pow(rho[0],2)/(2*num); /*OK*/
}
aux = (densp[0] * max(e_PBEup, e_PBE) + densp[1] * max(e_PBEdn, e_PBE)) / dens;
double dauxdd[2], dauxdsigma[3];
if(e_PBEup > e_PBE)
{
//case densp[0] * e_PBEup
dauxdd[0] = de_PBEddup[0];
dauxdd[1] = 0.0;
dauxdsigma[0] = de_PBEdsigmaup[0];
dauxdsigma[1] = 0.0;
dauxdsigma[2] = 0.0;
}else{
//case densp[0] * e_PBE
dauxdd[0] = densp[0] / dens * (de_PBEdd[0] - e_PBE) + e_PBE;
dauxdd[1] = densp[0] / dens * (de_PBEdd[1] - e_PBE);
dauxdsigma[0] = densp[0] / dens * de_PBEdsigma[0];
dauxdsigma[1] = densp[0] / dens * de_PBEdsigma[1];
dauxdsigma[2] = densp[0] / dens * de_PBEdsigma[2];
}
if(e_PBEdn > e_PBE)
{//case densp[1] * e_PBEdn
dauxdd[0] += 0.0;
dauxdd[1] += de_PBEdddn[0];
dauxdsigma[0] += 0.0;
dauxdsigma[1] += 0.0;
dauxdsigma[2] += de_PBEdsigmadn[0];
}else{//case densp[1] * e_PBE
dauxdd[0] += densp[1] / dens * (de_PBEdd[0] - e_PBE);
dauxdd[1] += densp[1] / dens * (de_PBEdd[1] - e_PBE) + e_PBE;
dauxdsigma[0] += densp[1] / dens * de_PBEdsigma[0];
dauxdsigma[1] += densp[1] / dens * de_PBEdsigma[1];
dauxdsigma[2] += densp[1] / dens * de_PBEdsigma[2];
}
zsq=z*z;
*e_PKZB = (e_PBE*(1.0 + C * zsq) - (1.0 + C) * zsq * aux);
*de_PKZBdz = dens * e_PBE * C * 2*z - dens * (1.0 + C) * 2*z * aux; /*? think ok*/
double dCdd[2];
dCdd[0] = dCdzeta*dzetadd[0] + dCdcsi*dcsidd[0]; /*OK*/
dCdd[1] = dCdzeta*dzetadd[1] + dCdcsi*dcsidd[1]; /*OK*/
/* partial derivatives*/
de_PKZBdd[0] = de_PBEdd[0] * (1.0 + C*zsq) + dens * e_PBE * dCdd[0] * zsq
- zsq * (dens*dCdd[0] * aux + (1.0 + C) * dauxdd[0]);
de_PKZBdd[1] = de_PBEdd[1] * (1.0 + C*zsq) + dens * e_PBE * dCdd[1] * zsq
- zsq * (dens*dCdd[1] * aux + (1.0 + C) * dauxdd[1]);
int nder = (p->common.nspin==XC_UNPOLARIZED) ? 1 : 3;
for(i=0; icommon.nspin==XC_UNPOLARIZED) dauxdsigma[i] /= 2.;
double dCdsigma[i];
dCdsigma[i]= dCdcsi*dcsidsigma[i];
/* partial derivatives*/
de_PKZBdsigma[i] = de_PBEdsigma[i] * (1.0 + C * zsq) + dens * e_PBE * dCdsigma[i] * zsq
- zsq * (dens * dCdsigma[i] * aux + (1.0 + C) * dauxdsigma[i]);
}
}
static void
XC(mgga_c_revtpss)(void *par, const double *rho, const double *sigmatmp, const double *tau,
double *energy, double *dedd, double *vsigma, double *dedtau)
{
double sigma[3];
revtpss_params *p = (revtpss_params*)par;
double dens, zeta, grad;
double tautr, taut, tauw, z;
double e_PKZB, de_PKZBdd[2], de_PKZBdsigma[3], de_PKZBdz;
int i, is;
sigma[0] = sigmatmp[0];
sigma[1] = sigmatmp[1];
sigma[2] = sigmatmp[2];
zeta = (rho[0]-rho[1])/(rho[0]+rho[1]);
dens = rho[0];
tautr = tau[0];
grad = sigma[0];
if(p->common.nspin == XC_POLARIZED) {
dens += rho[1];
tautr += tau[1];
grad += (2*sigma[1] + sigma[2]);
}
grad = max(MIN_GRAD*MIN_GRAD, grad);
tauw = max(grad/(8.0*dens), 1.0e-12);
taut = max(tautr, tauw);
z = tauw/taut;
sigma[0] = max(MIN_GRAD*MIN_GRAD, sigma[0]);
if(p->common.nspin == XC_POLARIZED)
{
//sigma[1] = max(MIN_GRAD*MIN_GRAD, sigma[1]);
sigma[2] = max(MIN_GRAD*MIN_GRAD, sigma[2]);
}
/* Equation (12) */
c_revtpss_12(p, rho, sigma, dens, zeta, z,
&e_PKZB, de_PKZBdd, de_PKZBdsigma, &de_PKZBdz);
/* Equation (11) */
{
double z2 = z*z, z3 = z2*z;
double dedz;
double dzdd[2], dzdsigma[3], dzdtau;
if(tauw >= tautr || fabs(tauw- tautr)< 1.0e-10){
dzdtau = 0.0;
dzdd[0] = 0.0;
dzdd[1] = 0.0;
dzdsigma[0] = 0.0;
dzdsigma[1] = 0.0;
dzdsigma[2] = 0.0;
}else{
dzdtau = -z/taut;
dzdd[0] = - z/dens;
dzdd[1] = 0.0;
if (p->common.nspin == XC_POLARIZED) dzdd[1] = - z/dens;
dzdsigma[0] = 1.0/(8*dens*taut);
dzdsigma[1] = 0.0;
dzdsigma[2] = 0.0;
if (p->common.nspin == XC_POLARIZED) {
dzdsigma[1] = 2.0/(8*dens*taut);
dzdsigma[2] = 1.0/(8*dens*taut);
}
}
*energy = e_PKZB * (1.0 + d*e_PKZB*z3);
/* due to the definition of na and nb in libxc.c we need to divide by (na+nb) to recover the
* same energy for polarized and unpolarized calculation with the same total density */
if(p->common.nspin == XC_UNPOLARIZED) *energy *= dens/(rho[0]+rho[1]);
dedz = de_PKZBdz*(1.0 + 2.0*d*e_PKZB*z3) + dens*e_PKZB * e_PKZB * d * 3.0*z2;
for(is=0; iscommon.nspin; is++){
dedd[is] = de_PKZBdd[is] * (1.0 + 2.0*d*e_PKZB*z3) + dedz*dzdd[is] - e_PKZB*e_PKZB * d * z3; /*OK*/
dedtau[is] = dedz * dzdtau; /*OK*/
}
int nder = (p->common.nspin==XC_UNPOLARIZED) ? 1 : 3;
for(i=0; ix_aux, np, rho, &exunif, &vxunif);
/* calculate |nabla rho|^2 */
gdms = max(MIN_GRAD*MIN_GRAD, sigma);
/* Eq. (4) */
p = gdms/(4.0*pow(3*M_PI*M_PI, 2.0/3.0)*pow(rho[0], 8.0/3.0));
dpdd = -(8.0/3.0)*p/rho[0];
dpdsigma= 1/(4.0*pow(3*M_PI*M_PI, 2.0/3.0)*pow(rho[0], 8.0/3.0));
/* von Weisaecker kinetic energy density */
tauw = max(gdms/(8.0*rho[0]), 1.0e-12);
tau = max(tau_, tauw);
tau_lsda = aux * pow(rho[0],5./3.);
dtau_lsdadd = aux * 5./3.* pow(rho[0],2./3.);
alpha = (tau - tauw)/tau_lsda;
if(fabs(tauw-tau_)< 1.0e-10){
dalphadsigma = 0.0;
dalphadtau = 0.0;
dalphadd = 0.0;
}else{
dalphadtau = 1./tau_lsda;
dalphadsigma = -1./(tau_lsda*8.0*rho[0]);
dalphadd = (tauw/rho[0]* tau_lsda - (tau - tauw) * dtau_lsdadd)/ pow(tau_lsda,2.);
}
/* get Eq. (10) */
x_revtpss_10(p, alpha, &x, &dxdp, &dxdalpha);
{ /* Eq. (5) */
double a = kappa/(kappa + x);
Fx = 1.0 + kappa*(1.0 - a);
dFxdx = a*a;
}
{ /* Eq. (3) */
*energy = exunif*Fx*rho[0];
//printf("Ex %.9e\n", *energy);
/* exunif is en per particle already so we multiply by n the terms with exunif*/
*dedd = vxunif*Fx + exunif*dFxdx*rho[0]*(dxdp*dpdd + dxdalpha*dalphadd);
*vsigma = exunif*dFxdx*rho[0]*(dxdp*dpdsigma + dxdalpha*dalphadsigma);
*dedtau = exunif*dFxdx*rho[0]*(dxdalpha*dalphadtau);
}
}
void
XC(mgga_x_revtpss)(void *par, const double *rho, const double *sigma, const double *tau,
double *e, double *dedd, double *vsigma, double *dedtau)
{
revtpss_params *p = (revtpss_params*)par;
if(p->common.nspin == XC_UNPOLARIZED){
double en;
x_revtpss_para(p, rho, sigma[0], tau[0], &en, dedd, vsigma, dedtau);
*e = en/(rho[0]+rho[1]);
}else{
/* The spin polarized version is handle using the exact spin scaling
Ex[n1, n2] = (Ex[2*n1] + Ex[2*n2])/2
*/
*e = 0.0;
double e2na, e2nb, rhoa[2], rhob[2];
double vsigmapart[3];
rhoa[0]=2*rho[0];
rhoa[1]=0.0;
rhob[0]=2*rho[1];
rhob[1]=0.0;
x_revtpss_para(p, rhoa, 4*sigma[0], 2.0*tau[0], &e2na, &(dedd[0]), &(vsigmapart[0]), &(dedtau[0]));
x_revtpss_para(p, rhob, 4*sigma[2], 2.0*tau[1], &e2nb, &(dedd[1]), &(vsigmapart[2]), &(dedtau[1]));
*e = (e2na + e2nb )/(2.*(rho[0]+rho[1]));
vsigma[0] = 2*vsigmapart[0];
vsigma[2] = 2*vsigmapart[2];
}
}
static void revtpss_init(void *p) {
revtpss_params *par = (revtpss_params*)p;
par->x_aux = (XC(func_type) *) malloc(sizeof(XC(func_type)));
XC(func_init)(par->x_aux, XC_LDA_X, XC_UNPOLARIZED);
XC(func_init)(&par->c_aux1, XC_LDA_C_PW_MOD, par->common.nspin);
XC(func_init)(&par->c_aux2, XC_LDA_C_PW_MOD, XC_POLARIZED);
}
static void revtpss_end(void *p) {
revtpss_params *par = (revtpss_params*)p;
XC(func_end)(par->x_aux);
free(par->x_aux);
XC(func_end)(&par->c_aux1);
XC(func_end)(&par->c_aux2);
}
const mgga_func_info revtpss_info = {
sizeof(revtpss_params),
&revtpss_init,
&revtpss_end,
&XC(mgga_x_revtpss),
&XC(mgga_c_revtpss)
};
gpaw-0.11.0.13004/c/xc/xc.c 0000664 0001750 0001750 00000021350 12553643466 015074 0 ustar jensj jensj 0000000 0000000 /* Copyright (C) 2003-2007 CAMP
* Copyright (C) 2007-2009 CAMd
* Please see the accompanying LICENSE file for further information. */
#include
#define PY_ARRAY_UNIQUE_SYMBOL GPAW_ARRAY_API
#define NO_IMPORT_ARRAY
#include
#include "xc_gpaw.h"
#include "../extensions.h"
//
// __ 2
// a2 = |\/n|
//
// dE
// dedrs = ---
// dr
// s
//
// dE
// deda2 = ---------
// __ 2
// d(|\/n| )
//
void init_mgga(void** params, int code, int nspin);
void calc_mgga(void** params, int nspin, int ng,
const double* n_g, const double* sigma_g, const double* tau_g,
double *e_g, double *v_g, double *dedsigma_g, double *dedtau_g);
double pbe_exchange(const xc_parameters* par,
double n, double rs, double a2,
double* dedrs, double* deda2);
double pbe_correlation(double n, double rs, double zeta, double a2,
bool gga, bool spinpol,
double* dedrs, double* dedzeta, double* deda2);
double pw91_exchange(const xc_parameters* par,
double n, double rs, double a2,
double* dedrs, double* deda2);
double pw91_correlation(double n, double rs, double zeta, double a2,
bool gga, bool spinpol,
double* dedrs, double* dedzeta, double* deda2);
double rpbe_exchange(const xc_parameters* par,
double n, double rs, double a2,
double* dedrs, double* deda2);
double beefvdw_exchange(const xc_parameters* par,
double n, double rs, double a2,
double* dedrs, double* deda2);
//
typedef struct
{
PyObject_HEAD
double (*exchange)(const xc_parameters* par,
double n, double rs, double a2,
double* dedrs, double* deda2);
double (*correlation)(double n, double rs, double zeta, double a2,
bool gga, bool spinpol,
double* dedrs, double* dedzeta, double* deda2);
xc_parameters par;
// below added by cpo for mgga functionals outside of libxc (TPSS, M06L, etc.)
void* mgga;
} XCFunctionalObject;
static void XCFunctional_dealloc(XCFunctionalObject *self)
{
PyObject_DEL(self);
}
static PyObject*
XCFunctional_calculate(XCFunctionalObject *self, PyObject *args)
{
PyArrayObject* e_array;
PyArrayObject* n_array;
PyArrayObject* v_array;
PyArrayObject* sigma_array = 0;
PyArrayObject* dedsigma_array = 0;
PyArrayObject* tau_array = 0;
PyArrayObject* dedtau_array = 0;
if (!PyArg_ParseTuple(args, "OOO|OOOO", &e_array, &n_array, &v_array,
&sigma_array, &dedsigma_array, &tau_array, &dedtau_array))
return NULL;
int ng = 1;
for (int d = 0; d < PyArray_NDIM(e_array); d++)
ng *= PyArray_DIM(e_array, d);
xc_parameters* par = &self->par;
double* e_g = DOUBLEP(e_array);
const double* n_g = DOUBLEP(n_array);
double* v_g = DOUBLEP(v_array);
const double* sigma_g = 0;
double* dedsigma_g = 0;
if (par->gga)
{
sigma_g = DOUBLEP(sigma_array);
dedsigma_g = DOUBLEP(dedsigma_array);
}
const double* tau_g = 0;
double* dedtau_g = 0;
if (self->mgga)
{
tau_g = DOUBLEP(tau_array);
dedtau_g = DOUBLEP(dedtau_array);
}
if (self->mgga) {
int nspin = PyArray_DIM(n_array, 0) == 1 ? 1 : 2;
calc_mgga(&self->mgga, nspin, ng, n_g, sigma_g, tau_g, e_g, v_g, dedsigma_g, dedtau_g);
Py_RETURN_NONE;
}
if (PyArray_DIM(n_array, 0) == 1)
for (int g = 0; g < ng; g++)
{
double n = n_g[g];
if (n < NMIN)
n = NMIN;
double rs = pow(C0I / n, THIRD);
double dexdrs;
double dexda2;
double ex;
double decdrs;
double decda2;
double ec;
if (par->gga)
{
double a2 = sigma_g[g];
ex = self->exchange(par, n, rs, a2, &dexdrs, &dexda2);
ec = self->correlation(n, rs, 0.0, a2, 1, 0, &decdrs, 0, &decda2);
dedsigma_g[g] = n * (dexda2 + decda2);
}
else
{
ex = self->exchange(par, n, rs, 0.0, &dexdrs, 0);
ec = self->correlation(n, rs, 0.0, 0.0, 0, 0, &decdrs, 0, 0);
}
e_g[g] = n * (ex + ec);
v_g[g] += ex + ec - rs * (dexdrs + decdrs) / 3.0;
}
else
{
const double* na_g = n_g;
double* va_g = v_g;
const double* nb_g = na_g + ng;
double* vb_g = va_g + ng;
const double* sigma0_g = 0;
const double* sigma1_g = 0;
const double* sigma2_g = 0;
double* dedsigma0_g = 0;
double* dedsigma1_g = 0;
double* dedsigma2_g = 0;
const xc_parameters* par = &self->par;
if (par->gga)
{
sigma0_g = sigma_g;
sigma1_g = sigma0_g + ng;
sigma2_g = sigma1_g + ng;
dedsigma0_g = dedsigma_g;
dedsigma1_g = dedsigma0_g + ng;
dedsigma2_g = dedsigma1_g + ng;
}
for (int g = 0; g < ng; g++)
{
double na = 2.0 * na_g[g];
if (na < NMIN)
na = NMIN;
double rsa = pow(C0I / na, THIRD);
double nb = 2.0 * nb_g[g];
if (nb < NMIN)
nb = NMIN;
double rsb = pow(C0I / nb, THIRD);
double n = 0.5 * (na + nb);
double rs = pow(C0I / n, THIRD);
double zeta = 0.5 * (na - nb) / n;
double dexadrs;
double dexada2;
double exa;
double dexbdrs;
double dexbda2;
double exb;
double decdrs;
double decdzeta;
double decda2;
double ec;
if (par->gga)
{
exa = self->exchange(par, na, rsa, 4.0 * sigma0_g[g],
&dexadrs, &dexada2);
exb = self->exchange(par, nb, rsb, 4.0 * sigma2_g[g],
&dexbdrs, &dexbda2);
double a2 = sigma0_g[g] + 2 * sigma1_g[g] + sigma2_g[g];
ec = self->correlation(n, rs, zeta, a2, 1, 1,
&decdrs, &decdzeta, &decda2);
dedsigma0_g[g] = 2 * na * dexada2 + n * decda2;
dedsigma1_g[g] = 2 * n * decda2;
dedsigma2_g[g] = 2 * nb * dexbda2 + n * decda2;
}
else
{
exa = self->exchange(par, na, rsa, 0.0, &dexadrs, 0);
exb = self->exchange(par, nb, rsb, 0.0, &dexbdrs, 0);
ec = self->correlation(n, rs, zeta, 0.0, 0, 1,
&decdrs, &decdzeta, 0);
}
e_g[g] = 0.5 * (na * exa + nb * exb) + n * ec;
va_g[g] += (exa + ec -
(rsa * dexadrs + rs * decdrs) / 3.0 -
(zeta - 1.0) * decdzeta);
vb_g[g] += (exb + ec -
(rsb * dexbdrs + rs * decdrs) / 3.0 -
(zeta + 1.0) * decdzeta);
}
}
Py_RETURN_NONE;
}
static PyMethodDef XCFunctional_Methods[] = {
{"calculate",
(PyCFunction)XCFunctional_calculate, METH_VARARGS, 0},
{NULL, NULL, 0, NULL}
};
PyTypeObject XCFunctionalType = {
PyVarObject_HEAD_INIT(NULL, 0)
"XCFunctional",
sizeof(XCFunctionalObject),
0,
(destructor)XCFunctional_dealloc,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,
"XC object",
0, 0, 0, 0, 0, 0,
XCFunctional_Methods
};
PyObject * NewXCFunctionalObject(PyObject *obj, PyObject *args)
{
int code;
PyArrayObject* parameters = 0;
if (!PyArg_ParseTuple(args, "i|O", &code, ¶meters))
return NULL;
XCFunctionalObject *self = PyObject_NEW(XCFunctionalObject,
&XCFunctionalType);
if (self == NULL)
return NULL;
self->mgga = NULL;
self->par.gga = 1;
self->correlation = pbe_correlation;
self->exchange = pbe_exchange;
if (code == -1) {
// LDA
self->par.gga = 0;
}
else if (code == 0) {
// PBE
self->par.kappa = 0.804;
}
else if (code == 1) {
// revPBE
self->par.kappa = 1.245;
}
else if (code == 2) {
// RPBE
self->exchange = rpbe_exchange;
}
else if (code == 14) {
// PW91
self->exchange = pw91_exchange;
}
else if (code == 20 || code == 21 || code == 22) {
// MGGA
const int nspin = 1; // a guess, perhaps corrected later in calc_mgga
init_mgga(&self->mgga,code,nspin);
}
else {
assert (code == 17);
// BEEF-vdW
self->exchange = beefvdw_exchange;
int n = PyArray_DIM(parameters, 0);
assert(n <= 110);
double* p = (double*)PyArray_BYTES(parameters);
for (int i = 0; i < n; i++)
self->par.parameters[i] = p[i];
self->par.nparameters = n / 2;
}
return (PyObject*)self;
}
gpaw-0.11.0.13004/c/xc/revtpss_c_pbe.c 0000664 0001750 0001750 00000034063 12553643466 017325 0 ustar jensj jensj 0000000 0000000
#include
#include
#include
#include
#include
#include "xc_mgga.h"
/************************************************************************
Implements Perdew, Burke & Ernzerhof Generalized Gradient Approximation
correlation functional.
I based this implementation on a routine from L.C. Balbas and J.M. Soler
************************************************************************/
// from old libxc util.h
#define RS(x) (pow((3.0/(4*M_PI*x)), 1.0/3.0))
typedef struct XC(perdew_t) {
int nspin;
double dens, zeta, gdmt;
double ecunif, vcunif[2], fcunif[3];
double rs, kf, ks, phi, t;
double drs, dkf, dks, dphi, dt, decunif;
double d2rs2, d2rskf, d2rsks, d2rsphi, d2rst, d2rsecunif;
double d2kf2, d2kfks, d2kfphi, d2kft, d2kfecunif;
double d2ks2, d2ksphi, d2kst, d2ksecunif;
double d2phi2, d2phit, d2phiecunif;
double d2t2, d2tecunif;
double d2ecunif2;
} XC(perdew_t);
// from old libxc util.c
/* this function converts the spin-density into total density and
relative magnetization */
inline void
XC(rho2dzeta)(int nspin, const double *rho, double *d, double *zeta)
{
assert(nspin==XC_UNPOLARIZED || nspin==XC_POLARIZED);
if(nspin==XC_UNPOLARIZED){
*d = max(MIN_DENS, rho[0]);
*zeta = 0.0;
}else{
*d = max(MIN_DENS, rho[0]+rho[1]);
*zeta = (*d > MIN_DENS) ? (rho[0]-rho[1])/(*d) : 0.0;
}
}
// from old libxc gga_perdew.c
static void
XC(perdew_params)(const XC(func_type) *gga_p, const double *rho, const double *sigma, int order, XC(perdew_t) *pt)
{
pt->nspin = gga_p->nspin;
XC(rho2dzeta)(pt->nspin, rho, &(pt->dens), &(pt->zeta));
const int np = 1;
switch (order){
case 0:
XC(lda_exc) (gga_p, np, rho, &(pt->ecunif));
break;
case 1:
XC(lda_exc_vxc)(gga_p, np, rho, &(pt->ecunif), pt->vcunif);
break;
case 2:
XC(lda)(gga_p, np, rho, &(pt->ecunif), pt->vcunif, pt->fcunif, NULL);
break;
}
pt->rs = RS(pt->dens);
pt->kf = pow(3.0*M_PI*M_PI*pt->dens, 1.0/3.0);
pt->ks = sqrt(4.0*pt->kf/M_PI);
/* phi is bounded between 2^(-1/3) and 1 */
pt->phi = 0.5*(pow(1.0 + pt->zeta, 2.0/3.0) + pow(1.0 - pt->zeta, 2.0/3.0));
/* get gdmt = |nabla n| */
pt->gdmt = sigma[0];
if(pt->nspin == XC_POLARIZED) pt->gdmt += 2.0*sigma[1] + sigma[2];
if(pt->gdmt < MIN_GRAD*MIN_GRAD) pt->gdmt = MIN_GRAD*MIN_GRAD;
pt->gdmt = sqrt(pt->gdmt);
pt->t = pt->gdmt/(2.0 * pt->phi * pt->ks * pt->dens);
if(order > 0)
pt->drs = pt->dkf = pt->dks = pt->dphi = pt->dt = pt->decunif = 0.0;
if(order > 1){
pt->d2rs2 = pt->d2rskf = pt->d2rsks = pt->d2rsphi = pt->d2rst = pt->d2rsecunif = 0.0;
pt->d2kf2 = pt->d2kfks = pt->d2kfphi = pt->d2kft = pt->d2kfecunif = 0.0;
pt->d2ks2 = pt->d2ksphi = pt->d2kst = pt->d2ksecunif = 0.0;
pt->d2phi2 = pt->d2phit = pt->d2phiecunif = 0.0;
pt->d2t2 = pt->d2tecunif = 0.0;
pt->d2ecunif2 = 0.0;
}
}
static void
XC(perdew_potentials)(XC(perdew_t) *pt, const double *rho, double e_gga, int order,
double *vrho, double *vsigma,
double *v2rho2, double *v2rhosigma, double *v2sigma2)
{
/* alpha = {0->rs, 1->kf, 2->ks, 3->phi, 4->t, 5->ec */
double dalphadd[6][2], dFdalpha[6];
double d2alphadd2[6][3], d2Fdalpha2[6][6];
double dzdd[2], dpdz, d2zdd2[3], d2pdz2;
double dtdsig, d2tdsig2;
int is, js, ks, ns;
if(order < 1) return;
if(pt->nspin == XC_POLARIZED){
dpdz = 0.0;
if(fabs(1.0 + pt->zeta) >= MIN_DENS)
dpdz += 1.0/(3.0*pow(1.0 + pt->zeta, 1.0/3.0));
if(fabs(1.0 - pt->zeta) >= MIN_DENS)
dpdz -= 1.0/(3.0*pow(1.0 - pt->zeta, 1.0/3.0));
dzdd[0] = (1.0 - pt->zeta)/pt->dens;
dzdd[1] = -(1.0 + pt->zeta)/pt->dens;
}else{
dpdz = 0.0;
dzdd[0] = 0.0;
}
dFdalpha[0] = pt->drs;
dFdalpha[1] = pt->dkf;
dFdalpha[2] = pt->dks;
dFdalpha[3] = pt->dphi;
dFdalpha[4] = pt->dt;
dFdalpha[5] = pt->decunif;
for(is=0; isnspin; is++){
dalphadd[0][is] = -pt->rs/(3.0*pt->dens);
dalphadd[1][is] = pt->kf/(3.0*pt->dens);
dalphadd[2][is] = pt->ks*dalphadd[1][is]/(2.0*pt->kf);
dalphadd[3][is] = dpdz*dzdd[is];
dalphadd[4][is] = -pt->t*(1.0/pt->dens + dalphadd[2][is]/pt->ks + dalphadd[3][is]/pt->phi);;
dalphadd[5][is] = (pt->vcunif[is] - pt->ecunif)/pt->dens;
}
/* calculate vrho */
if(vrho != NULL)
for(is=0; isnspin; is++){
if(rho[is] > MIN_DENS){
int k;
vrho[is] = e_gga;
for(k=0; k<6; k++)
vrho[is] += pt->dens * dFdalpha[k]*dalphadd[k][is];
}else{
vrho[is] = 0.0;
}
}
dtdsig = pt->t/(2.0*pt->gdmt*pt->gdmt);
if(vrho != NULL){ /* calculate now vsigma */
vsigma[0] = pt->dens*pt->dt*dtdsig;
if(pt->nspin == XC_POLARIZED){
vsigma[1] = 2.0*vsigma[0];
vsigma[2] = vsigma[0];
}
}
if(order < 2) return;
/* first let us sort d2Fdalpha2 in a matrix format */
d2Fdalpha2[0][0] = pt->d2rs2;
d2Fdalpha2[0][1] = pt->d2rskf;
d2Fdalpha2[0][2] = pt->d2rsks;
d2Fdalpha2[0][3] = pt->d2rst;
d2Fdalpha2[0][4] = pt->d2rsphi;
d2Fdalpha2[0][5] = pt->d2rsecunif;
d2Fdalpha2[1][0] = d2Fdalpha2[0][1];
d2Fdalpha2[1][1] = pt->d2kf2;
d2Fdalpha2[1][2] = pt->d2kfks;
d2Fdalpha2[1][3] = pt->d2kft;
d2Fdalpha2[1][4] = pt->d2kfphi;
d2Fdalpha2[1][5] = pt->d2kfecunif;
d2Fdalpha2[2][0] = d2Fdalpha2[0][2];
d2Fdalpha2[2][1] = d2Fdalpha2[1][2];
d2Fdalpha2[2][2] = pt->d2ks2;
d2Fdalpha2[2][3] = pt->d2kst;
d2Fdalpha2[2][4] = pt->d2ksphi;
d2Fdalpha2[2][5] = pt->d2ksecunif;
d2Fdalpha2[3][0] = d2Fdalpha2[0][3];
d2Fdalpha2[3][1] = d2Fdalpha2[1][3];
d2Fdalpha2[3][2] = d2Fdalpha2[2][3];
d2Fdalpha2[3][3] = pt->d2phi2;
d2Fdalpha2[3][4] = pt->d2phit;
d2Fdalpha2[3][5] = pt->d2phiecunif;
d2Fdalpha2[4][0] = d2Fdalpha2[0][4];
d2Fdalpha2[4][1] = d2Fdalpha2[1][4];
d2Fdalpha2[4][2] = d2Fdalpha2[2][4];
d2Fdalpha2[4][3] = d2Fdalpha2[3][4];
d2Fdalpha2[4][4] = pt->d2t2;
d2Fdalpha2[4][5] = pt->d2tecunif;
d2Fdalpha2[5][0] = d2Fdalpha2[0][5];
d2Fdalpha2[5][1] = d2Fdalpha2[1][5];
d2Fdalpha2[5][2] = d2Fdalpha2[2][5];
d2Fdalpha2[5][3] = d2Fdalpha2[3][5];
d2Fdalpha2[5][4] = d2Fdalpha2[4][5];
d2Fdalpha2[5][5] = pt->d2ecunif2;
/* now we sort d2alphadd2 */
if(pt->nspin == XC_POLARIZED){
d2pdz2 = 0.0;
if(fabs(1.0 + pt->zeta) >= MIN_DENS)
d2pdz2 += -(1.0/9.0)*pow(1.0 + pt->zeta, -4.0/3.0);
if(fabs(1.0 - pt->zeta) >= MIN_DENS)
d2pdz2 += -(1.0/9.0)*pow(1.0 - pt->zeta, -4.0/3.0);
d2zdd2[0] = -2.0*dzdd[0]/pt->dens;
d2zdd2[1] = 2.0*pt->zeta/(pt->dens*pt->dens);
d2zdd2[2] = -2.0*dzdd[1]/pt->dens;
}else{
d2pdz2 = 0.0;
d2zdd2[0] = 0.0;
}
ns = (pt->nspin == XC_UNPOLARIZED) ? 0 : 2;
for(ks=0; ks<=ns; ks++){
is = (ks == 0 || ks == 1) ? 0 : 1;
js = (ks == 0 ) ? 0 : 1;
d2alphadd2[0][ks] = 4.0/9.0*pt->rs/(pt->dens*pt->dens);
d2alphadd2[1][ks] = -2.0/9.0*pt->kf/(pt->dens*pt->dens);
d2alphadd2[2][ks] = pt->ks/(2.0*pt->kf)*
(d2alphadd2[1][ks] - dalphadd[1][is]*dalphadd[1][js]/(2.0*pt->kf));
d2alphadd2[3][ks] = d2pdz2*dzdd[is]*dzdd[js] + dpdz*d2zdd2[ks];
d2alphadd2[4][ks] = pt->t *
(+2.0/(pt->dens*pt->dens)
+2.0/(pt->ks*pt->ks) *(dalphadd[2][is] * dalphadd[2][js])
+2.0/(pt->phi*pt->phi) *(dalphadd[3][is] * dalphadd[3][js])
+1.0/(pt->dens*pt->ks) *(dalphadd[2][is] + dalphadd[2][js])
+1.0/(pt->dens*pt->phi)*(dalphadd[3][is] + dalphadd[3][js])
+1.0/(pt->ks*pt->phi) *(dalphadd[2][is]*dalphadd[3][js] + dalphadd[2][js]*dalphadd[3][is])
-1.0/(pt->ks)*d2alphadd2[2][ks] -1.0/(pt->phi)*d2alphadd2[3][ks]);
d2alphadd2[5][ks] = pt->fcunif[ks]/pt->dens -
(pt->vcunif[is] + pt->vcunif[js] - 2.0*pt->ecunif)/(pt->dens*pt->dens);
}
for(ks=0; ks<=ns; ks++){
int j, k;
is = (ks == 0 || ks == 1) ? 0 : 1;
js = (ks == 0 ) ? 0 : 1;
v2rho2[ks] = 0.0;
for(j=0; j<6; j++){
v2rho2[ks] += dFdalpha[j]*(dalphadd[j][is] + dalphadd[j][js]);
v2rho2[ks] += pt->dens * dFdalpha[j]*d2alphadd2[j][ks];
for(k=0; k<6; k++)
v2rho2[ks] += pt->dens * d2Fdalpha2[j][k]*dalphadd[j][is]*dalphadd[k][js];
}
}
/* now we handle v2rhosigma */
for(is=0; isnspin; is++){
int j;
ks = (is == 0) ? 0 : 5;
v2rhosigma[ks] = dFdalpha[4]*dtdsig;
for(j=0; j<6; j++)
v2rhosigma[ks] += pt->dens * d2Fdalpha2[4][j]*dalphadd[j][is]*dtdsig;
v2rhosigma[ks] += pt->dens * dFdalpha[4]*dalphadd[4][is]/(2.0*pt->gdmt*pt->gdmt);
}
if(pt->nspin == XC_POLARIZED){
v2rhosigma[1] = 2.0*v2rhosigma[0];
v2rhosigma[2] = v2rhosigma[0];
v2rhosigma[3] = v2rhosigma[5];
v2rhosigma[4] = 2.0*v2rhosigma[5];
}
/* now wwe take care of v2sigma2 */
d2tdsig2 = -dtdsig/(2.0*pt->gdmt*pt->gdmt);
v2sigma2[0] = pt->dens*(pt->d2t2*dtdsig*dtdsig + pt->dt*d2tdsig2);
if(pt->nspin == XC_POLARIZED){
v2sigma2[1] = 2.0*v2sigma2[0]; /* aa_ab */
v2sigma2[2] = v2sigma2[0]; /* aa_bb */
v2sigma2[3] = 4.0*v2sigma2[0]; /* ab_ab */
v2sigma2[4] = 2.0*v2sigma2[0]; /* ab_bb */
v2sigma2[5] = v2sigma2[0]; /* bb_bb */
}
}
// from old libxc gga_c_pbe.c
static const double beta[4] = {
0.06672455060314922, /* original PBE */
0.046, /* PBE sol */
0.089809,
0.06672455060314922 /* PBE for revTPSS */
};
static double gamm[4];
static inline void
pbe_eq8(int func, int order, double rs, double ecunif, double phi,
double *A, double *dec, double *dphi, double *drs,
double *dec2, double *decphi, double *dphi2)
{
double phi3, f1, df1dphi, d2f1dphi2, f2, f3, dx, d2x;
phi3 = pow(phi, 3);
f1 = ecunif/(gamm[func]*phi3);
f2 = exp(-f1);
f3 = f2 - 1.0;
*A = beta[func]/(gamm[func]*f3);
if(func == 3) *A *= (1. + 0.1*rs)/(1. + 0.1778*rs);
if(order < 1) return;
df1dphi = -3.0*f1/phi;
dx = (*A)*f2/f3;
*dec = dx/(gamm[func]*phi3);
*dphi = dx*df1dphi;
*drs = 0.0;
if(func == 3) *drs = beta[func]*((0.1-0.1778)/pow(1+0.1778*rs,2))/(gamm[func]*f3);
if(func ==3) return;
if(order < 2) return;
d2f1dphi2 = -4.0*df1dphi/phi;
d2x = dx*(2.0*f2 - f3)/f3;
*dphi2 = d2x*df1dphi*df1dphi + dx*d2f1dphi2;
*decphi = (d2x*df1dphi*f1 + dx*df1dphi)/ecunif;
*dec2 = d2x/(gamm[func]*gamm[func]*phi3*phi3);
}
static void
pbe_eq7(int func, int order, double rs, double phi, double t, double A,
double *H, double *dphi, double *drs, double *dt, double *dA,
double *d2phi, double *d2phit, double *d2phiA, double *d2t2, double *d2tA, double *d2A2)
{
double t2, phi3, f1, f2, f3;
double df1dt, df2drs, df2dt, df1dA, df2dA;
double d2f1dt2, d2f2dt2, d2f2dA2, d2f1dtA, d2f2dtA;
t2 = t*t;
phi3 = pow(phi, 3);
f1 = t2 + A*t2*t2;
f3 = 1.0 + A*f1;
f2 = beta[func]*f1/(gamm[func]*f3);
if(func == 3) f2 *= (1. + 0.1*rs)/(1. + 0.1778*rs);
*H = gamm[func]*phi3*log(1.0 + f2);
if(order < 1) return;
*dphi = 3.0*(*H)/phi;
df1dt = t*(2.0 + 4.0*A*t2);
df2dt = beta[func]/(gamm[func]*f3*f3) * df1dt;
if(func == 3) df2dt*=(1. + 0.1*rs)/(1. + 0.1778*rs);
*dt = gamm[func]*phi3*df2dt/(1.0 + f2);
df1dA = t2*t2;
df2dA = beta[func]/(gamm[func]*f3*f3) * (df1dA - f1*f1);
if(func == 3) df2dA *= (1. + 0.1*rs)/(1. + 0.1778*rs);
*dA = gamm[func]*phi3*df2dA/(1.0 + f2);
df2drs = 0.0;
*drs = 0.0;
if(func == 3){
df2drs = beta[func]*((0.1-0.1778)/pow(1+0.1778*rs,2))*f1/(gamm[func]*f3);
*drs = gamm[func]*phi3*df2drs/(1.0 + f2);
}
if(func ==3) return;
if(order < 2) return;
*d2phi = 2.0*(*dphi)/phi;
*d2phit = 3.0*(*dt)/phi;
*d2phiA = 3.0*(*dA)/phi;
d2f1dt2 = 2.0 + 4.0*3.0*A*t2;
d2f2dt2 = beta[func]/(gamm[func]*f3*f3) * (d2f1dt2 - 2.0*A/f3*df1dt*df1dt);
*d2t2 = gamm[func]*phi3*(d2f2dt2*(1.0 + f2) - df2dt*df2dt)/((1.0 + f2)*(1.0 + f2));
d2f1dtA = 4.0*t*t2;
d2f2dtA = beta[func]/(gamm[func]*f3*f3) *
(d2f1dtA - 2.0*df1dt*(f1 + A*df1dA)/f3);
*d2tA = gamm[func]*phi3*(d2f2dtA*(1.0 + f2) - df2dt*df2dA)/((1.0 + f2)*(1.0 + f2));
d2f2dA2 = beta[func]/(gamm[func]*f3*f3*f3) *(-2.0)*(2.0*f1*df1dA - f1*f1*f1 + A*df1dA*df1dA);
*d2A2 = gamm[func]*phi3*(d2f2dA2*(1.0 + f2) - df2dA*df2dA)/((1.0 + f2)*(1.0 + f2));
}
void
gga_c_pbe_revtpss(XC(func_type) *p, const double *rho, const double *sigma,
double *e, double *vrho, double *vsigma,
double *v2rho2, double *v2rhosigma, double *v2sigma2)
{
gamm[0] = gamm[1] = gamm[3] = (1.0 - log(2.0))/(M_PI*M_PI);
XC(perdew_t) pt;
int func, order;
double me;
double A, dAdec, dAdphi, dAdrs, d2Adec2, d2Adecphi, d2Adphi2;
double H, dHdphi, dHdrs, dHdt, dHdA, d2Hdphi2, d2Hdphit, d2HdphiA, d2Hdt2, d2HdtA, d2HdA2;
d2HdphiA = 0.0;
d2Hdphi2 = 0.0;
d2Adphi2 = 0.0;
d2HdA2 = 0.0;
d2HdtA = 0.0;
d2Hdphit = 0.0;
d2Adecphi = 0.0;
d2Hdt2 = 0.0;
d2Adec2 = 0.0;
dAdrs = 0.0;
dAdphi = 0.0;
dAdec = 0.0;
dHdA = 0.0;
dHdt = 0.0;
dHdrs = 0.0;
dHdphi = 0.0;
func = 3; // for revTPSS
order = 0;
if(vrho != NULL) order = 1;
if(v2rho2 != NULL) order = 2;
XC(perdew_params)(p, rho, sigma, order, &pt);
pbe_eq8(func, order, pt.rs, pt.ecunif, pt.phi,
&A, &dAdec, &dAdphi, &dAdrs, &d2Adec2, &d2Adecphi, &d2Adphi2);
pbe_eq7(func, order, pt.rs, pt.phi, pt.t, A,
&H, &dHdphi, &dHdrs, &dHdt, &dHdA, &d2Hdphi2, &d2Hdphit, &d2HdphiA, &d2Hdt2, &d2HdtA, &d2HdA2);
me = pt.ecunif + H;
if(e != NULL) *e = me;
if(order >= 1){
pt.dphi = dHdphi + dHdA*dAdphi;
pt.drs = dHdrs + dHdA*dAdrs;
pt.dt = dHdt;
pt.decunif = 1.0 + dHdA*dAdec;
}
if(order >= 2){
pt.d2phi2 = d2Hdphi2 + 2.0*d2HdphiA*dAdphi + dHdA*d2Adphi2 + d2HdA2*dAdphi*dAdphi;
pt.d2phit = d2Hdphit + d2HdtA*dAdphi;
pt.d2phiecunif = d2HdphiA*dAdec + d2HdA2*dAdphi*dAdec + dHdA*d2Adecphi;
pt.d2t2 = d2Hdt2;
pt.d2tecunif = d2HdtA*dAdec;
pt.d2ecunif2 = d2HdA2*dAdec*dAdec + dHdA*d2Adec2;
}
XC(perdew_potentials)(&pt, rho, me, order, vrho, vsigma, v2rho2, v2rhosigma, v2sigma2);
}
gpaw-0.11.0.13004/c/xc/rpbe.c 0000664 0001750 0001750 00000001314 12553643466 015410 0 ustar jensj jensj 0000000 0000000 /* Copyright (C) 2003-2007 CAMP
* Please see the accompanying LICENSE file for further information. */
#include
#include "xc_gpaw.h"
double rpbe_exchange(const xc_parameters* par,
double n, double rs, double a2,
double* dedrs, double* deda2)
{
double e = C1 / rs;
*dedrs = -e / rs;
if (par->gga) // not really needed? XXX
{
double c = C2 * rs / n;
c *= c;
double s2 = a2 * c;
double x = exp(-MU * s2 / 0.804);
double Fx = 1.0 + 0.804 * (1 - x);
double dFxds2 = MU * x;
double ds2drs = 8.0 * c * a2 / rs;
*dedrs = *dedrs * Fx + e * dFxds2 * ds2drs;
*deda2 = e * dFxds2 * c;
e *= Fx;
}
return e;
}
gpaw-0.11.0.13004/c/xc/xc_mgga.h 0000664 0001750 0001750 00000002131 12553643466 016070 0 ustar jensj jensj 0000000 0000000
#ifndef GPAW_XC_MGGA_H
#define GPAW_XC_MGGA_H
#define M_PI 3.14159265358979323846
#define MIN_DENS 1.0e-20
#define MIN_GRAD 1.0e-20
#define max(x,y) ((x