diff --git a/include/evolution.h b/include/evolution.h index 2a86be3a..a9e02e0c 100644 --- a/include/evolution.h +++ b/include/evolution.h @@ -56,4 +56,16 @@ void evolve(Grid &par, unsigned int gstate, std::string buffer); +void apply_gauge(Grid &par, double2 *wfc, double2 *Ax, double2 *Ay, + double2 *Az, double renorm_factor_x, + double renorm_factor_y, double renorm_factor_z, bool flip, + cufftHandle plan_1d, cufftHandle plan_dim2, + cufftHandle plan_dim3, double dx, double dy, double dz, + double time, int yDim, int size); + +void apply_gauge(Grid &par, double2 *wfc, double2 *Ax, double2 *Ay, + double renorm_factor_x, double renorm_factor_y, bool flip, + cufftHandle plan_1d, cufftHandle plan_dim2, double dx, + double dy, double dz, double time); + #endif diff --git a/include/kernels.h b/include/kernels.h index 19deb0fc..2185eed3 100644 --- a/include/kernels.h +++ b/include/kernels.h @@ -17,6 +17,32 @@ #define KERNELS_H #include +/** +* @brief derivative of data +* @param input data +* @param output data +* @param stride of derivative, for (xDim, yDim, zDim) derivative, + use stride (1, xDim, xDim*yDim) +* @param grid size for simulation +* @param dx value for derivative +* @ingroup gpu +*/ +__global__ void derive(double *data, double *out, int stride, int gsize, + double dx); + +/** +* @brief derivative of data +* @param input data +* @param output data +* @param stride of derivative, for (xDim, yDim, zDim) derivative, + use stride (1, xDim, xDim*yDim) +* @param grid size for simulation +* @param dx value for derivative +* @ingroup gpu +*/ +__global__ void derive(double2 *data, double2 *out, int stride, int gsize, + double dx); + /** * @brief subtraction operation for 2 double2 values * @ingroup gpu @@ -120,6 +146,15 @@ __host__ __device__ double2 complexMultiply(double2 in1, double2 in2); */ __device__ double2 make_complex(double in, int evolution_type); +/** +* @brief copies a double2 value +* @ingroup gpu +* @param complex input +* @return complex output +*/ + +__global__ void copy(double2 *in, double2 *out); + /** * @brief Sums the absolute value of two complex arrays * @ingroup gpu @@ -129,6 +164,44 @@ __device__ double2 make_complex(double in, int evolution_type); */ __global__ void complexAbsSum(double2 *in1, double2 *in2, double *out); +/** +* @brief Sums double2* and double2* energies +* @ingroup gpu +* @param Array 1 +* @param Array 2 +* @param Output +*/ +__global__ void energy_sum(double2 *in1, double2 *in2, double *out); + +/** +* @brief Sums double* and double2* energies for angular momentum +* @ingroup gpu +* @param Array 1 +* @param Array 2 +* @param Output +*/ +__global__ void energy_lsum(double *in1, double2 *in2, double *out); + +/** +* @brief Sums double2* and double2* to an output double2 +* @ingroup gpu +* @param Array 1 +* @param Array 2 +* @param Output +*/ +__global__ void sum(double2 *in1, double2 *in2, double2 *out); + +/** +* @brief Sums the absolute value of two complex arrays +* @ingroup gpu +* @param Array 1 +* @param Array 2 +* @param Array 3 +* @param Output +*/ +__global__ void complexAbsSum(double2 *in1, double2 *in2, double2 *in3, + double *out); + /** * @brief Complex magnitude of a double2 array * @ingroup gpu @@ -203,11 +276,10 @@ __global__ void cMultPhi(double2* in1, double* in2, double2* out); * @param in2 Evolution operator input * @param out Pass by reference output for multiplication result * @param dt Timestep for evolution -* @param mass Atomic species mass * @param gState If performing real (1) or imaginary (0) time evolution * @param gDenConst a constant for evolution */ -__global__ void cMultDensity(double2* in1, double2* in2, double2* out, double dt, double mass, int gstate, double gDenConst); +__global__ void cMultDensity(double2* in1, double2* in2, double2* out, double dt, int gstate, double gDenConst); /** * @brief Kernel for complex multiplication with nonlinear density term @@ -221,14 +293,13 @@ __global__ void cMultDensity(double2* in1, double2* in2, double2* out, double dt * @param time * @param element number in AST * @param dt Timestep for evolution -* @param mass Atomic species mass * @param gState If performing real (1) or imaginary (0) time evolution * @param gDenConst a constant for evolution */ __global__ void cMultDensity_ast(EqnNode_gpu *eqn, double2* in, double2* out, double dx, double dy, double dz, double time, - int e_num, double dt, double mass, int gstate, + int e_num, double dt, int gstate, double gDenConst); @@ -251,6 +322,33 @@ __global__ void pinVortex(double2* in1, double2* in2, double2* out); */ __global__ void vecMult(double2 *in, double *factor, double2 *out); +/** +* @brief Complex field Summation +* @ingroup gpu +* @param in Complex field to be scaled (divided, not multiplied) +* @param factor Scaling vector to be used +* @param out Pass by reference output for result +*/ +__global__ void vecSum(double2 *in, double *factor, double2 *out); + +/** +* @brief field scaling +* @ingroup gpu +* @param in field to be scaled (divided, not multiplied) +* @param factor Scaling vector to be used +* @param out Pass by reference output for result +*/ +__global__ void vecMult(double *in, double *factor, double *out); + +/** +* @brief field Summation +* @ingroup gpu +* @param in field to be scaled (divided, not multiplied) +* @param factor Scaling vector to be used +* @param out Pass by reference output for result +*/ +__global__ void vecSum(double *in, double *factor, double *out); + /** * @brief performs the l2 normalization of the provided terms * @ingroup gpu @@ -297,11 +395,30 @@ __global__ void scalarDiv(double* in, double factor, double* out); * @brief Complex field scaling and renormalisation. Used mainly post-FFT. * @ingroup gpu * @param in Complex field to be scaled (multiplied, not divided) -* @param factor Scaling factor to be used +* @param scaling factor to be used * @param out Pass by reference output for result */ __global__ void scalarMult(double2* in, double factor, double2* out); +/** +* @brief field scaling and renormalisation. Used mainly post-FFT. +* @ingroup gpu +* @param in field to be scaled (multiplied, not divided) +* @param scalaing factor to be used +* @param out Pass by reference output for result +*/ +__global__ void scalarMult(double* in, double factor, double* out); + +/** +* @brief Complex field scaling and renormalisation. Used mainly post-FFT. +* @ingroup gpu +* @param in Complex field to be scaled (multiplied, not divided) +* @param complex scaling factor to be used +* @param out Pass by reference output for result +*/ +__global__ void scalarMult(double2* in, double2 factor, double2* out); + + /** * @brief Complex field raised to a power * @ingroup gpu @@ -428,7 +545,11 @@ __device__ double2 im_ast(double val, double dt); * @brief Sets boolean array to 0 * @ingroup gpu */ -__global__ void zeros(bool *in, bool *out); +__global__ void zeros(bool *out); + +__global__ void zeros(double *out); + +__global__ void zeros(double2 *out); /** * @brief Sets in2 to be equal to in1 diff --git a/include/operators.h b/include/operators.h index d28c318f..cb8fdad4 100644 --- a/include/operators.h +++ b/include/operators.h @@ -21,6 +21,15 @@ #include //#include +// Laplacian functions +void laplacian(Grid &par, double2 *data, double2* out, int xDim, int yDim, + int zDim, double dx, double dy, double dz); + +void laplacian(Grid &par, double2 *data, double2* out, int xDim, int yDim, + double dx, double dy); + +void laplacian(Grid &par, double2 *data, double2* out, int xDim, double dx); + // function to find Bz, the curl of the gauge field /** * @brief Finds Bz, the curl of the gauge field @@ -245,4 +254,5 @@ __global__ void aux_fields(double *V, double *K, double gdt, double dt, double2* EpAx, double2* EpAy, double2* EpAz); // Function to generate grid and treads void generate_grid(Grid &par); + #endif diff --git a/julia/calc_energy.jl b/julia/calc_energy.jl new file mode 100644 index 00000000..b8dce4a0 --- /dev/null +++ b/julia/calc_energy.jl @@ -0,0 +1,133 @@ +using DelimitedFiles +using FFTW + +function derive(data, dim, dx) + out = Array{Complex,2}(undef,size(data,1), size(data,2)) + dim2 = dim+1 + if dim2 == 3 + dim2 = 1 + end + for i = 1:size(data,dim) + for j = 1:size(data,dim2) + #println(j,'\t',i) + if (dim == 1) + if (i == size(data,dim)) + out[i,j] = (data[1, j] - data[i, j])/dx + else + out[i,j] = (data[i + 1, j] - data[i, j])/dx + end + else + if (i == size(data,dim)) + out[j,i] = (data[j,1] - data[j,i])/dx + else + out[j,i] = (data[j,i + 1] - data[j,i])/dx + end + end + end + end + return out +end + +# We are calculating the energy to check +function calculate_energy(wfc, H_k, H_r, Ax, Ay, g, xDim, yDim, dx, dy) + hbar = 1.05457148e-34 + omega = 0.6 + omegaX = 6.283 + + # Creating momentum and conjugate wavefunctions + wfc_k = fft(wfc) + wfc_c = conj(wfc) + + # Finding the momentum and real-space energy terms + energy_k = wfc_c.*ifft((H_k) .* wfc_k) + energy_r = wfc_c.* (H_r).* wfc + energy_i = wfc_c.* (0.5*g*abs2.(wfc)).* wfc + + energy_l = wfc_c.*(im*hbar*(Ax.*derive(wfc,1,dx) + Ay.*derive(wfc,2,dy))) + + # Integrating over all space + energy_if = 0 + energy_lf = 0 + energy_kf = 0 + energy_rf = 0 + for i = 1:xDim*yDim + energy_if += real(energy_i[i])*dx*dy + energy_rf += real(energy_r[i])*dx*dy + energy_lf += real(energy_l[i])*dx*dy + energy_kf += real(energy_k[i])*dx*dy + end + + println("Kinetic energy:", "\t\t\t", energy_kf) + println("Potential energy:", "\t\t", energy_rf) + println("Internal energy:", "\t\t", energy_if) + println("Angular Momentum energy:", '\t', energy_lf) + println("Total energy:", "\t\t\t", energy_kf+energy_if+energy_rf+energy_lf) + +end + +# Read in param.cfg file +function calculate(param_file::String, data_dir::String) + parameters = Dict() + + for line in readlines(data_dir*param_file) + if line != "[Params]" + tmp = split(line,"=") + parameters[tmp[1]] = tmp[2] + end + end + + xDim = parse(Int64, parameters["xDim"]) + yDim = parse(Int64, parameters["yDim"]) + dx = parse(Float64, parameters["dx"]) + dy = parse(Float64, parameters["dy"]) + + omega = parse(Float64, parameters["omega"]) + omegaX = parse(Float64, parameters["omegaX"]) + + g = parse(Float64, parameters["gDenConst"]) + + Ax = readdlm(data_dir*"Ax_0") + Ay = readdlm(data_dir*"Ay_0") + K = readdlm(data_dir*"K_0") + V = readdlm(data_dir*"V_0") + + Ax = reshape(Ax, xDim, yDim) + Ay = reshape(Ay, xDim, yDim) + K = reshape(K, xDim, yDim) + V = reshape(V, xDim, yDim) + + start = 0 + ende = parse(Int64, parameters["esteps"]) + endg = parse(Int64, parameters["gsteps"]) + incr = parse(Int64, parameters["printSteps"]) + + # Ground State Evolution + println("Starting imaginary time energy calculation") + for i = start:incr:endg + wfc = readdlm(data_dir*"wfc_0_const_"*string(i)) + + readdlm(data_dir*"wfc_0_consti_"*string(i))*im + println(data_dir*"wfc_0_const_"*string(i), '\t', + data_dir*"wfc_0_consti_"*string(i)) + wfc = reshape(wfc, xDim, yDim) + calculate_energy(wfc, K, V, Ax, Ay, g, xDim, yDim, dx, dy) + println() + end + + println() + + # Ground State Evolution + println("Starting real time energy calculation") + for i = start:incr:ende + wfc = readdlm(data_dir*"wfc_ev_"*string(i)) + + readdlm(data_dir*"wfc_evi_"*string(i))*im + println(data_dir*"wfc_0_const_"*string(i), '\t', + data_dir*"wfc_0_consti_"*string(i)) + wfc = reshape(wfc, xDim, yDim) + calculate_energy(wfc, K, V, Ax, Ay, g, xDim, yDim, dx, dy) + println() + end + + +end + +calculate("Params.dat", "../data/") diff --git a/src/ds.cu b/src/ds.cu index 3663da6c..81d3d50f 100644 --- a/src/ds.cu +++ b/src/ds.cu @@ -46,7 +46,7 @@ void generate_plan_other3d(cufftHandle *plan_fft1d, Grid &par, int axis){ cufftResult result; - // Along first dimension (z) + // Along first dimension (x) if (axis == 0){ result = cufftPlan1d(plan_fft1d, xDim, CUFFT_Z2Z, yDim*zDim); } @@ -71,7 +71,7 @@ void generate_plan_other3d(cufftHandle *plan_fft1d, Grid &par, int axis){ } - // Along third dimension (x) + // Along third dimension (z) else if (axis == 2){ int batch = xDim*yDim; diff --git a/src/evolution.cu b/src/evolution.cu index c88a3acc..1b121dff 100644 --- a/src/evolution.cu +++ b/src/evolution.cu @@ -1,6 +1,223 @@ #include "../include/evolution.h" #include "../include/vortex_3d.h" +// 3D +void apply_gauge(Grid &par, double2 *wfc, double2 *Ax, double2 *Ay, + double2 *Az, double renorm_factor_x, + double renorm_factor_y, double renorm_factor_z, bool flip, + cufftHandle plan_1d, cufftHandle plan_dim2, + cufftHandle plan_dim3, double dx, double dy, double dz, + double time, int yDim, int size){ + + cufftResult result; + dim3 grid = par.grid; + dim3 threads = par.threads; + + if (flip){ + + // 1d forward / mult by Az + result = cufftExecZ2Z(plan_1d, wfc, wfc, CUFFT_FORWARD); + scalarMult<<>>(wfc, renorm_factor_z, wfc); + if(par.bval("Az_time")){ + EqnNode_gpu* Az_eqn = par.astval("Az"); + int e_num = par.ival("Az_num"); + ast_cmult<<>>(wfc, wfc, Az_eqn, dx, dy, dz, + time, e_num); + } + else{ + cMult<<>>(wfc, (cufftDoubleComplex*) Az, wfc); + } + result = cufftExecZ2Z(plan_1d, wfc, wfc, CUFFT_INVERSE); + scalarMult<<>>(wfc, renorm_factor_z, wfc); + + // loop to multiply by Ay + for (int i = 0; i < yDim; i++){ + result = cufftExecZ2Z(plan_dim2, &wfc[i*size], + &wfc[i*size], CUFFT_FORWARD); + } + + scalarMult<<>>(wfc, renorm_factor_y, wfc); + if(par.bval("Ay_time")){ + EqnNode_gpu* Ay_eqn = par.astval("Ay"); + int e_num = par.ival("Ay_num"); + ast_cmult<<>>(wfc, wfc, Ay_eqn, dx, dy, dz, + time, e_num); + } + else{ + cMult<<>>(wfc, (cufftDoubleComplex*) Ay, wfc); + } + + for (int i = 0; i < yDim; i++){ + //size = xDim * zDim; + result = cufftExecZ2Z(plan_dim2, &wfc[i*size], + &wfc[i*size], CUFFT_INVERSE); + } + scalarMult<<>>(wfc, renorm_factor_y, wfc); + + // 1D FFT to Ax + result = cufftExecZ2Z(plan_dim3, wfc, wfc, CUFFT_FORWARD); + scalarMult<<>>(wfc, renorm_factor_x, wfc); + + if(par.bval("Ax_time")){ + EqnNode_gpu* Ax_eqn = par.astval("Ax"); + int e_num = par.ival("Ax_num"); + ast_cmult<<>>(wfc, wfc, Ax_eqn, dx, dy, dz, + time, e_num); + } + else{ + cMult<<>>(wfc, (cufftDoubleComplex*) Ax, wfc); + } + + result = cufftExecZ2Z(plan_dim3, wfc, wfc, CUFFT_INVERSE); + scalarMult<<>>(wfc, renorm_factor_x, wfc); + + } + else{ + + // 1D FFT to Ax + result = cufftExecZ2Z(plan_dim3, wfc, wfc, CUFFT_FORWARD); + scalarMult<<>>(wfc, renorm_factor_x, wfc); + + if(par.bval("Ax_time")){ + EqnNode_gpu* Ax_eqn = par.astval("Ax"); + int e_num = par.ival("Ax_num"); + ast_cmult<<>>(wfc, wfc, Ax_eqn, dx, dy, dz, + time, e_num); + } + else{ + cMult<<>>(wfc, (cufftDoubleComplex*) Ax, wfc); + } + + result = cufftExecZ2Z(plan_dim3, wfc, wfc, CUFFT_INVERSE); + scalarMult<<>>(wfc, renorm_factor_x, wfc); + + + // loop to multiply by Ay + for (int i = 0; i < yDim; i++){ + result = cufftExecZ2Z(plan_dim2, &wfc[i*size], + &wfc[i*size], CUFFT_FORWARD); + } + + scalarMult<<>>(wfc, renorm_factor_y, wfc); + if(par.bval("Ay_time")){ + EqnNode_gpu* Ay_eqn = par.astval("Ay"); + int e_num = par.ival("Ay_num"); + ast_cmult<<>>(wfc, wfc, Ay_eqn, dx, dy, dz, + time, e_num); + } + else{ + cMult<<>>(wfc, (cufftDoubleComplex*) Ay, wfc); + } + + for (int i = 0; i < yDim; i++){ + //size = xDim * zDim; + result = cufftExecZ2Z(plan_dim2, &wfc[i*size], + &wfc[i*size], CUFFT_INVERSE); + } + scalarMult<<>>(wfc, renorm_factor_y, wfc); + + // 1d forward / mult by Az + result = cufftExecZ2Z(plan_1d, wfc, wfc, CUFFT_FORWARD); + scalarMult<<>>(wfc, renorm_factor_z, wfc); + if(par.bval("Az_time")){ + EqnNode_gpu* Az_eqn = par.astval("Az"); + int e_num = par.ival("Az_num"); + ast_cmult<<>>(wfc, wfc, Az_eqn, dx, dy, dz, + time, e_num); + } + else{ + cMult<<>>(wfc, (cufftDoubleComplex*) Az, wfc); + } + result = cufftExecZ2Z(plan_1d, wfc, wfc, CUFFT_INVERSE); + scalarMult<<>>(wfc, renorm_factor_z, wfc); + + } + +} + +// 2D +void apply_gauge(Grid &par, double2 *wfc, double2 *Ax, double2 *Ay, + double renorm_factor_x, double renorm_factor_y, bool flip, + cufftHandle plan_1d, cufftHandle plan_dim2, double dx, + double dy, double dz, double time){ + + cufftResult result; + dim3 grid = par.grid; + dim3 threads = par.threads; + + if (flip){ + + // 1d forward / mult by Ay + result = cufftExecZ2Z(plan_1d, wfc, wfc, CUFFT_FORWARD); + scalarMult<<>>(wfc, renorm_factor_y ,wfc); + if(par.bval("Ay_time")){ + EqnNode_gpu* Ay_eqn = par.astval("Ay"); + int e_num = par.ival("Ay_num"); + ast_cmult<<>>(wfc, wfc, Ay_eqn, dx, dy, dz, + time, e_num); + } + else{ + cMult<<>>(wfc, (cufftDoubleComplex*) Ay, wfc); + } + result = cufftExecZ2Z(plan_1d, wfc, wfc, CUFFT_INVERSE); + scalarMult<<>>(wfc, renorm_factor_y, wfc); + + + // 1D FFT to wfc_pAx + result = cufftExecZ2Z(plan_dim2, wfc, wfc, CUFFT_FORWARD); + scalarMult<<>>(wfc, renorm_factor_x, wfc); + if(par.bval("Ax_time")){ + EqnNode_gpu* Ax_eqn = par.astval("Ax"); + int e_num = par.ival("Ax_num"); + ast_cmult<<>>(wfc, wfc, Ax_eqn, dx, dy, dz, + time, e_num); + } + else{ + cMult<<>>(wfc, (cufftDoubleComplex*) Ax, wfc); + } + + result = cufftExecZ2Z(plan_dim2, wfc, wfc, CUFFT_INVERSE); + scalarMult<<>>(wfc, renorm_factor_x, wfc); + } + else{ + + // 1D FFT to wfc_pAx + result = cufftExecZ2Z(plan_dim2, wfc, wfc, CUFFT_FORWARD); + scalarMult<<>>(wfc, renorm_factor_x, wfc); + if(par.bval("Ax_time")){ + EqnNode_gpu* Ax_eqn = par.astval("Ax"); + int e_num = par.ival("Ax_num"); + ast_cmult<<>>(wfc, wfc, Ax_eqn, dx, dy, dz, + time, e_num); + } + else{ + cMult<<>>(wfc, (cufftDoubleComplex*) Ax, wfc); + } + + result = cufftExecZ2Z(plan_dim2, wfc, wfc, CUFFT_INVERSE); + scalarMult<<>>(wfc, renorm_factor_x, wfc); + + + // 1d forward / mult by Ay + result = cufftExecZ2Z(plan_1d, wfc, wfc, CUFFT_FORWARD); + scalarMult<<>>(wfc, renorm_factor_y ,wfc); + if(par.bval("Ay_time")){ + EqnNode_gpu* Ay_eqn = par.astval("Ay"); + int e_num = par.ival("Ay_num"); + ast_cmult<<>>(wfc, wfc, Ay_eqn, dx, dy, dz, + time, e_num); + } + else{ + cMult<<>>(wfc, (cufftDoubleComplex*) Ay, wfc); + } + result = cufftExecZ2Z(plan_1d, wfc, wfc, CUFFT_INVERSE); + scalarMult<<>>(wfc, renorm_factor_y, wfc); + + } + +} + + void evolve(Grid &par, int numSteps, unsigned int gstate, @@ -77,7 +294,9 @@ void evolve(Grid &par, // Because no two operations are created equally. // Multiplication is faster than divisions. double renorm_factor_nd=1.0/pow(gridSize,0.5); - double renorm_factor_1d=1.0/pow(xDim,0.5); + double renorm_factor_x=1.0/pow(xDim,0.5); + double renorm_factor_y=1.0/pow(yDim,0.5); + double renorm_factor_z=1.0/pow(zDim,0.5); clock_t begin, end; double time_spent; @@ -464,6 +683,7 @@ void evolve(Grid &par, } //std::cout << "written" << '\n'; if (par.bval("energy_calc")){ + par.store("time", time); double energy = energy_calc(par,gpuWfc); // Now opening and closing file for writing. std::ofstream energy_out; @@ -499,11 +719,11 @@ void evolve(Grid &par, int e_num = par.ival("V_num"); cMultDensity_ast<<>>(V_eqn,gpuWfc,gpuWfc, dx, dy, dz, time, e_num, 0.5*Dt, - mass,gstate,interaction*gDenConst); + gstate,interaction*gDenConst); } else{ cMultDensity<<>>(V_gpu,gpuWfc,gpuWfc,0.5*Dt, - mass,gstate,interaction*gDenConst); + gstate,interaction*gDenConst); } } else { @@ -544,11 +764,11 @@ void evolve(Grid &par, int e_num = par.ival("V_num"); cMultDensity_ast<<>>(V_eqn,gpuWfc,gpuWfc, dx, dy, dz, time, e_num, 0.5*Dt, - mass,gstate,interaction*gDenConst); + gstate,interaction*gDenConst); } else{ cMultDensity<<>>(V_gpu,gpuWfc,gpuWfc,0.5*Dt, - mass,gstate,interaction*gDenConst); + gstate,interaction*gDenConst); } } else { @@ -585,255 +805,19 @@ void evolve(Grid &par, } int size = xDim*zDim; if (dimnum == 3){ - switch(i%2){ - case 0: //Groundstate solver, even step - - // 1d forward / mult by Az - result = cufftExecZ2Z(plan_1d,gpuWfc,gpuWfc, - CUFFT_FORWARD); - scalarMult<<>>(gpuWfc, - renorm_factor_1d,gpuWfc); - if(par.bval("Ay_time")){ - EqnNode_gpu* Ay_eqn = par.astval("Ay"); - int e_num = par.ival("Ay_num"); - ast_cmult<<>>(gpuWfc, gpuWfc, - Ay_eqn, dx, dy, dz, time, e_num); - } - else{ - cMult<<>>(gpuWfc, - (cufftDoubleComplex*) gpu1dpAy, gpuWfc); - } - result = cufftExecZ2Z(plan_1d,gpuWfc,gpuWfc, - CUFFT_INVERSE); - scalarMult<<>>(gpuWfc, - renorm_factor_1d, gpuWfc); - - // loop to multiply by Ay - for (int i = 0; i < yDim; i++){ - //size = xDim * zDim; - result = cufftExecZ2Z(plan_dim2, - &gpuWfc[i*size], - &gpuWfc[i*size],CUFFT_FORWARD); - } - - scalarMult<<>>(gpuWfc, - renorm_factor_1d,gpuWfc); - if(par.bval("Ax_time")){ - EqnNode_gpu* Ax_eqn = par.astval("Ax"); - int e_num = par.ival("Ax_num"); - ast_cmult<<>>(gpuWfc, gpuWfc, - Ax_eqn, dx, dy, dz, time, e_num); - } - else{ - cMult<<>>(gpuWfc, - (cufftDoubleComplex*) gpu1dpAx, gpuWfc); - } - - for (int i = 0; i < yDim; i++){ - //size = xDim * zDim; - result = cufftExecZ2Z(plan_dim2, - &gpuWfc[i*size], - &gpuWfc[i*size],CUFFT_INVERSE); - } - scalarMult<<>>(gpuWfc, - renorm_factor_1d,gpuWfc); - - // 1D FFT to Ax - result = cufftExecZ2Z(plan_dim3,gpuWfc,gpuWfc, - CUFFT_FORWARD); - scalarMult<<>>(gpuWfc, - renorm_factor_1d,gpuWfc); - if(par.bval("Az_time")){ - EqnNode_gpu* Az_eqn = par.astval("Az"); - int e_num = par.ival("Az_num"); - ast_cmult<<>>(gpuWfc, gpuWfc, - Az_eqn, dx, dy, dz, time, e_num); - } - else{ - cMult<<>>(gpuWfc, - (cufftDoubleComplex*) gpu1dpAz, gpuWfc); - } - - result = cufftExecZ2Z(plan_dim3,gpuWfc,gpuWfc, - CUFFT_INVERSE); - scalarMult<<>>(gpuWfc, - renorm_factor_1d, gpuWfc); - - break; - - case 1: //Groundstate solver, odd step - - // 1D FFT to Ax - result = cufftExecZ2Z(plan_dim3,gpuWfc,gpuWfc, - CUFFT_FORWARD); - scalarMult<<>>(gpuWfc, - renorm_factor_1d,gpuWfc); - if(par.bval("Az_time")){ - EqnNode_gpu* Az_eqn = par.astval("Az"); - int e_num = par.ival("Az_num"); - ast_cmult<<>>(gpuWfc, gpuWfc, - Az_eqn, dx, dy, dz, time, e_num); - } - else{ - cMult<<>>(gpuWfc, - (cufftDoubleComplex*) gpu1dpAz, gpuWfc); - } - - result = cufftExecZ2Z(plan_dim3,gpuWfc,gpuWfc, - CUFFT_INVERSE); - scalarMult<<>>(gpuWfc, - renorm_factor_1d, gpuWfc); - - // loop to multiply by Ay - for (int i = 0; i < yDim; i++){ - //size = xDim * zDim; - result = cufftExecZ2Z(plan_dim2, - &gpuWfc[i*size], - &gpuWfc[i*size],CUFFT_FORWARD); - } - - scalarMult<<>>(gpuWfc, - renorm_factor_1d,gpuWfc); - if(par.bval("Ax_time")){ - EqnNode_gpu* Ax_eqn = par.astval("Ax"); - int e_num = par.ival("Ax_num"); - ast_cmult<<>>(gpuWfc, gpuWfc, - Ax_eqn, dx, dy, dz, time, e_num); - } - else{ - cMult<<>>(gpuWfc, - (cufftDoubleComplex*) gpu1dpAx, gpuWfc); - } - - for (int i = 0; i < yDim; i++){ - //size = xDim * zDim; - result = cufftExecZ2Z(plan_dim2, - &gpuWfc[i*size], - &gpuWfc[i*size],CUFFT_INVERSE); - } - scalarMult<<>>(gpuWfc, - renorm_factor_1d,gpuWfc); - - // 1d forward / mult by Az - result = cufftExecZ2Z(plan_1d,gpuWfc,gpuWfc, - CUFFT_FORWARD); - scalarMult<<>>(gpuWfc, - renorm_factor_1d,gpuWfc); - if(par.bval("Ay_time")){ - EqnNode_gpu* Ay_eqn = par.astval("Ay"); - int e_num = par.ival("Ay_num"); - ast_cmult<<>>(gpuWfc, gpuWfc, - Ay_eqn, dx, dy, dz, time, e_num); - } - else{ - cMult<<>>(gpuWfc, - (cufftDoubleComplex*) gpu1dpAy, gpuWfc); - } - - result = cufftExecZ2Z(plan_1d,gpuWfc,gpuWfc, - CUFFT_INVERSE); - scalarMult<<>>(gpuWfc, - renorm_factor_1d, gpuWfc); - - break; - } + apply_gauge(par, gpuWfc, gpu1dpAx, gpu1dpAy, gpu1dpAz, + renorm_factor_x, renorm_factor_y, renorm_factor_z, + i%2, plan_1d, plan_dim2, plan_dim3, + dx, dy, dz, time, yDim, size); } else if (dimnum == 2){ - switch(i%2){ - case 0: //Groundstate solver, even step - - // 1d forward / mult by Ay - result = cufftExecZ2Z(plan_1d,gpuWfc,gpuWfc, - CUFFT_FORWARD); - scalarMult<<>>(gpuWfc, - renorm_factor_1d,gpuWfc); - if(par.bval("Ay_time")){ - EqnNode_gpu* Ay_eqn = par.astval("Ay"); - int e_num = par.ival("Ay_num"); - ast_cmult<<>>(gpuWfc, gpuWfc, - Ay_eqn, dx, dy, dz, time, e_num); - } - else{ - cMult<<>>(gpuWfc, - (cufftDoubleComplex*) gpu1dpAy, gpuWfc); - } - result = cufftExecZ2Z(plan_1d,gpuWfc,gpuWfc, - CUFFT_INVERSE); - scalarMult<<>>(gpuWfc, - renorm_factor_1d, gpuWfc); - - - // 1D FFT to wfc_pAx - result = cufftExecZ2Z(plan_other2d,gpuWfc,gpuWfc, - CUFFT_FORWARD); - scalarMult<<>>(gpuWfc, - renorm_factor_1d,gpuWfc); - if(par.bval("Ax_time")){ - EqnNode_gpu* Ax_eqn = par.astval("Ax"); - int e_num = par.ival("Ax_num"); - ast_cmult<<>>(gpuWfc, gpuWfc, - Ax_eqn, dx, dy, dz, time, e_num); - } - else{ - cMult<<>>(gpuWfc, - (cufftDoubleComplex*) gpu1dpAx, gpuWfc); - } - - result = cufftExecZ2Z(plan_other2d,gpuWfc,gpuWfc, - CUFFT_INVERSE); - scalarMult<<>>(gpuWfc, - renorm_factor_1d, gpuWfc); - break; - - case 1: //Groundstate solver, odd step - // 1D FFT to wfc_pAx - result = cufftExecZ2Z(plan_other2d,gpuWfc,gpuWfc, - CUFFT_FORWARD); - scalarMult<<>>(gpuWfc, - renorm_factor_1d,gpuWfc); - if(par.bval("Ax_time")){ - EqnNode_gpu* Ax_eqn = par.astval("Ax"); - int e_num = par.ival("Ax_num"); - ast_cmult<<>>(gpuWfc, gpuWfc, - Ax_eqn, dx, dy, dz, time, e_num); - } - else{ - cMult<<>>(gpuWfc, - (cufftDoubleComplex*) gpu1dpAx, gpuWfc); - } - - result = cufftExecZ2Z(plan_other2d,gpuWfc,gpuWfc, - CUFFT_INVERSE); - scalarMult<<>>(gpuWfc, - renorm_factor_1d, gpuWfc); - - // wfc_pAy - result = cufftExecZ2Z(plan_1d,gpuWfc,gpuWfc, - CUFFT_FORWARD); - scalarMult<<>>(gpuWfc, - renorm_factor_1d,gpuWfc); - if(par.bval("Ay_time")){ - EqnNode_gpu* Ay_eqn = par.astval("Ay"); - int e_num = par.ival("Ay_num"); - ast_cmult<<>>(gpuWfc, gpuWfc, - Ay_eqn, dx, dy, dz, time, e_num); - } - else{ - cMult<<>>(gpuWfc, - (cufftDoubleComplex*) gpu1dpAy, gpuWfc); - } - - result = cufftExecZ2Z(plan_1d,gpuWfc,gpuWfc, - CUFFT_INVERSE); - scalarMult<<>>(gpuWfc, - renorm_factor_1d, gpuWfc); - break; - - } + apply_gauge(par, gpuWfc, gpu1dpAx, gpu1dpAy, + renorm_factor_x, renorm_factor_y, i%2, plan_1d, + plan_other2d, dx, dy, dz, time); } else if (dimnum == 1){ result = cufftExecZ2Z(plan_1d,gpuWfc,gpuWfc,CUFFT_FORWARD); - scalarMult<<>>(gpuWfc,renorm_factor_1d,gpuWfc); + scalarMult<<>>(gpuWfc,renorm_factor_x,gpuWfc); if(par.bval("Ax_time")){ EqnNode_gpu* Ax_eqn = par.astval("Ax"); int e_num = par.ival("Ax_num"); @@ -846,7 +830,7 @@ void evolve(Grid &par, } result = cufftExecZ2Z(plan_1d,gpuWfc,gpuWfc, CUFFT_INVERSE); - scalarMult<<>>(gpuWfc, renorm_factor_1d, gpuWfc); + scalarMult<<>>(gpuWfc, renorm_factor_x, gpuWfc); } } diff --git a/src/init.cu b/src/init.cu index 9f24c1a0..dbe558d2 100644 --- a/src/init.cu +++ b/src/init.cu @@ -6,6 +6,8 @@ void check_memory(Grid &par){ int yDim = par.ival("yDim"); int zDim = par.ival("zDim"); + bool energy_calc = par.bval("energy_calc"); + int gSize = xDim*yDim*zDim; size_t free = 0; size_t total = 0; @@ -16,10 +18,17 @@ void check_memory(Grid &par){ // 8 double2* values on the GPU. This is not the case for dynamic fields // and the test should be updated accordingly as these are used more. size_t req_memory = 16*8*(size_t)gSize; + if (energy_calc){ + req_memory += 4*16*(size_t)gSize; + } if (free < req_memory){ std::cout << "Not enough GPU memory for gridsize!\n"; std::cout << "Free memory is: " << free << '\n'; - std::cout << "Required memory memory is: " << req_memory << '\n'; + std::cout << "Required memory is: " << req_memory << '\n'; + if (energy_calc){ + std::cout << "Required memory for energy calc is: " + << 4*16*(size_t)gSize << '\n'; + } std::cout << "xDim is: " << xDim << '\n'; std::cout << "yDim is: " << yDim << '\n'; std::cout << "zDim is: " << zDim << '\n'; @@ -52,14 +61,13 @@ int init(Grid &par){ if (dimnum > 2){ gSize *= zDim; } - double omega = par.dval("omega"); double gdt = par.dval("gdt"); double dt = par.dval("dt"); double omegaX = par.dval("omegaX"); double omegaY = par.dval("omegaY"); double omegaZ = par.dval("omegaZ"); double gammaY = par.dval("gammaY"); //Aspect ratio of trapping geometry. - double l = par.dval("winding"); + double winding = par.dval("winding"); double box_size = par.dval("box_size"); double *Energy; double *r; diff --git a/src/kernels.cu b/src/kernels.cu index 781eb8f8..66f1c76e 100644 --- a/src/kernels.cu +++ b/src/kernels.cu @@ -34,6 +34,42 @@ __device__ unsigned int getGid3d3d(){ return threadId; } +// global kernel to perform derivative +// For xDim derivative, stride = 1 +// For yDim derivative, stride = xDim +// For zDim derivative, stride = xDim*yDim +__global__ void derive(double *data, double *out, int stride, int gsize, + double dx){ + int gid = getGid3d3d(); + if (gid < gsize){ + if (gid + stride < gsize){ + out[gid] = (data[gid+stride] - data[gid])/dx; + } + else{ + out[gid] = data[gid]/dx; + } + } +} + +// global kernel to perform derivative +// For xDim derivative, stride = 1 +// For yDim derivative, stride = xDim +// For zDim derivative, stride = xDim*yDim +__global__ void derive(double2 *data, double2 *out, int stride, int gsize, + double dx){ + int gid = getGid3d3d(); + if (gid < gsize){ + if (gid + stride < gsize){ + out[gid].x = (data[gid+stride].x - data[gid].x)/dx; + out[gid].y = (data[gid+stride].y - data[gid].y)/dx; + } + else{ + out[gid].x = data[gid].x/dx; + out[gid].y = data[gid].y/dx; + } + } +} + __global__ void is_eq(bool *a, bool *b, bool *ans){ int gid = getGid3d3d(); ans[0] = true; @@ -50,6 +86,13 @@ __global__ void make_cufftDoubleComplex(double *in, double2 *out){ out[gid].y = 0; } +// Function to copy double2* values +__global__ void copy(double2 *in, double2 *out){ + int gid = getGid3d3d(); + out[gid] = in[gid]; +} + + // function to perform a transposition (2d) or permutation (3d) // Note: The 3 ints represent the final placement of that data direction // after transposition @@ -160,6 +203,22 @@ __device__ double complexMagnitude(double2 in){ return sqrt(in.x*in.x + in.y*in.y); } +__global__ void energy_sum(double2 *in1, double2 *in2, double *out){ + int gid = getGid3d3d(); + out[gid] = in1[gid].x + in2[gid].x; +} + +__global__ void energy_lsum(double *in1, double2 *in2, double *out){ + int gid = getGid3d3d(); + out[gid] = in1[gid] + in2[gid].x; +} + +__global__ void sum(double2 *in1, double2 *in2, double2 *out){ + int gid = getGid3d3d(); + out[gid].x = in1[gid].x + in2[gid].x; + out[gid].y = in1[gid].y + in2[gid].y; +} + __global__ void complexAbsSum(double2 *in1, double2 *in2, double *out){ int gid = getGid3d3d(); double2 temp; @@ -168,6 +227,15 @@ __global__ void complexAbsSum(double2 *in1, double2 *in2, double *out){ out[gid] = sqrt(temp.x*temp.x + temp.y*temp.y); } +__global__ void complexAbsSum(double2 *in1, double2 *in2, double2 *in3, + double *out){ + int gid = getGid3d3d(); + double2 temp; + temp.x = in1[gid].x + in2[gid].x + in3[gid].x; + temp.y = in1[gid].y + in2[gid].y + in3[gid].y; + out[gid] = sqrt(temp.x*temp.x + temp.y*temp.y); +} + __global__ void complexMagnitude(double2 *in, double *out){ int gid = getGid3d3d(); out[gid] = sqrt(in[gid].x*in[gid].x + in[gid].y*in[gid].y); @@ -240,6 +308,30 @@ __global__ void vecMult(double2 *in, double *factor, double2 *out){ out[gid] = result; } +__global__ void vecMult(double *in, double *factor, double *out){ + double result; + unsigned int gid = getGid3d3d(); + result = in[gid] * factor[gid]; + out[gid] = result; +} + + +__global__ void vecSum(double2 *in, double *factor, double2 *out){ + double2 result; + unsigned int gid = getGid3d3d(); + result.x = in[gid].x + factor[gid]; + result.y = in[gid].y + factor[gid]; + out[gid] = result; +} + +__global__ void vecSum(double *in, double *factor, double *out){ + double result; + unsigned int gid = getGid3d3d(); + result = in[gid] + factor[gid]; + out[gid] = result; +} + + __global__ void l2_norm(double *in1, double *in2, double *in3, double *out){ int gid = getGid3d3d(); @@ -270,7 +362,7 @@ __global__ void l2_norm(double2 *in1, double2 *in2, double *out){ /** * Performs the non-linear evolution term of Gross--Pitaevskii equation. */ -__global__ void cMultDensity(double2* in1, double2* in2, double2* out, double dt, double mass, int gstate, double gDenConst){ +__global__ void cMultDensity(double2* in1, double2* in2, double2* out, double dt, int gstate, double gDenConst){ double2 result; double gDensity; @@ -288,7 +380,6 @@ __global__ void cMultDensity(double2* in1, double2* in2, double2* out, double dt double2 tmp; tmp.x = tin1.x*cos(-gDensity) - tin1.y*sin(-gDensity); tmp.y = tin1.y*cos(-gDensity) + tin1.x*sin(-gDensity); - //printf("%d\t%f\t%f\t%f\t%f\t%f\n", gid, tin1.x, tin1.y, tmp.x, tmp.y, gDensity); result.x = (tmp.x)*tin2.x - (tmp.y)*tin2.y; result.y = (tmp.x)*tin2.y + (tmp.y)*tin2.x; @@ -299,7 +390,7 @@ __global__ void cMultDensity(double2* in1, double2* in2, double2* out, double dt //cMultDensity for ast V __global__ void cMultDensity_ast(EqnNode_gpu *eqn, double2* in, double2* out, double dx, double dy, double dz, double time, - int e_num, double dt, double mass, int gstate, + int e_num, double dt, int gstate, double gDenConst){ double2 result; double gDensity; @@ -367,6 +458,21 @@ __global__ void scalarMult(double2* in, double factor, double2* out){ out[gid] = result; } +__global__ void scalarMult(double* in, double factor, double* out){ + double result; + unsigned int gid = getGid3d3d(); + result = (in[gid] * factor); + out[gid] = result; +} + +__global__ void scalarMult(double2* in, double2 factor, double2* out){ + double2 result; + unsigned int gid = getGid3d3d(); + result.x = (in[gid].x * factor.x - in[gid].y*factor.y); + result.y = (in[gid].x * factor.y + in[gid].y*factor.x); + out[gid] = result; +} + /** * As above, but normalises for wfc */ @@ -571,11 +677,22 @@ __device__ double2 im_ast(double val, double dt){ return {exp(-val*dt), 0}; } -__global__ void zeros(bool *in, bool *out){ +__global__ void zeros( bool *out){ int gid = getGid3d3d(); out[gid] = 0; } +__global__ void zeros(double *out){ + int gid = getGid3d3d(); + out[gid] = 0; +} + +__global__ void zeros(double2 *out){ + int gid = getGid3d3d(); + out[gid].x = 0; + out[gid].y = 0; +} + __global__ void set_eq(double *in1, double *in2){ int gid = getGid3d3d(); in2[gid] = in1[gid]; diff --git a/src/operators.cu b/src/operators.cu index db764f3e..6fa18991 100644 --- a/src/operators.cu +++ b/src/operators.cu @@ -2,6 +2,71 @@ #include "../include/kernels.h" #include "../include/dynamic.h" +void laplacian(Grid &par, double2 *data, double2* out, int xDim, int yDim, + int zDim, double dx, double dy, double dz){ + + dim3 grid = par.grid; + dim3 threads = par.threads; + int gsize = xDim * yDim * zDim; + + double2 *temp_derivative; + cudaMalloc((void **) &temp_derivative, sizeof(double2)*gsize); + derive<<>>(data, temp_derivative, 1, gsize, dx); + derive<<>>(temp_derivative, temp_derivative, 1, gsize, dx); + + copy<<>>(temp_derivative, out); + + derive<<>>(data, temp_derivative, xDim, gsize, dy); + derive<<>>(temp_derivative, temp_derivative, + xDim, gsize, dy); + + sum<<>>(temp_derivative, out, out); + + derive<<>>(data, temp_derivative, xDim*yDim, gsize, dz); + derive<<>>(temp_derivative, temp_derivative, + xDim*yDim, gsize, dz); + + sum<<>>(temp_derivative, out, out); + + cudaFree(temp_derivative); + +} + +void laplacian(Grid &par, double2 *data, double2* out, int xDim, int yDim, + double dx, double dy){ + + + dim3 grid = par.grid; + dim3 threads = par.threads; + int gsize = xDim * yDim; + + double2 *temp_derivative; + cudaMalloc((void **) &temp_derivative, sizeof(double2)*gsize); + derive<<>>(data, temp_derivative, 1, gsize, dx); + derive<<>>(temp_derivative, temp_derivative, 1, gsize, dx); + + copy<<>>(temp_derivative, out); + + derive<<>>(data, temp_derivative, xDim, gsize, dy); + derive<<>>(temp_derivative, temp_derivative, + xDim, gsize, dy); + + sum<<>>(temp_derivative, out, out); + + cudaFree(temp_derivative); +} + +void laplacian(Grid &par, double2 *data, double2* out, int xDim, double dx){ + + dim3 grid = par.grid; + dim3 threads = par.threads; + int gsize = xDim; + + derive<<>>(data, out, 1, gsize, dx); + derive<<>>(out, out, 1, gsize, dx); + +} + double sign(double x){ if (x < 0){ return -1.0; @@ -542,7 +607,7 @@ __global__ void krotation_Ax(double *x, double *y, double *z, double omega, double fudge, double *A){ int gid = getGid3d3d(); int yid = blockDim.y*blockIdx.y + threadIdx.y; - A[gid] = -y[yid] * omega * omegaX; + A[gid] = y[yid] * omega * omegaX; } // Kernel for simple rotational case, Ay @@ -552,7 +617,7 @@ __global__ void krotation_Ay(double *x, double *y, double *z, double omega, double fudge, double *A){ int gid = getGid3d3d(); int xid = blockDim.x*blockIdx.x + threadIdx.x; - A[gid] = x[xid] * omega * omegaY; + A[gid] = -x[xid] * omega * omegaY; } // Kernel for simple rotational case, Ax @@ -844,10 +909,6 @@ void generate_fields(Grid &par){ cudaFree(EpAy_gpu); cudaFree(EpAz_gpu); - cudaFree(Ax_gpu); - cudaFree(Ay_gpu); - cudaFree(Az_gpu); - cudaFree(x_gpu); cudaFree(y_gpu); cudaFree(z_gpu); @@ -859,6 +920,9 @@ void generate_fields(Grid &par){ if (!energy_calc){ cudaFree(K_gpu); cudaFree(V_gpu); + cudaFree(Ax_gpu); + cudaFree(Ay_gpu); + cudaFree(Az_gpu); } else{ par.store("V_gpu",V_gpu); @@ -942,7 +1006,7 @@ __global__ void kstd_wfc(double *x, double *y, double *z, double *items, int yid = blockDim.y*blockIdx.y + threadIdx.y; int zid = blockDim.z*blockIdx.z + threadIdx.z; - phi[gid] = fmod(winding*atan2(y[yid], x[xid]),2*PI); + phi[gid] = -fmod(winding*atan2(y[yid], x[xid]),2*PI); wfc[gid].x = exp(-(x[xid]*x[xid]/(items[14]*items[14]*items[15]*items[15]) + y[yid]*y[yid]/(items[14]*items[14]*items[16]*items[16]) diff --git a/src/parser.cu b/src/parser.cu index 9eb48475..88f3c5aa 100644 --- a/src/parser.cu +++ b/src/parser.cu @@ -48,7 +48,7 @@ Grid parseArgs(int argc, char** argv){ par.store("write_file", true); par.store("fudge", 0.0); par.store("kill_idx", -1); - par.store("mask_2d", 1.5e-4); + par.store("mask_2d", 0.0); par.store("box_size", -0.01); par.store("found_sobel", false); par.store("energy_calc", false); diff --git a/src/split_op.cu b/src/split_op.cu index e351c9ff..0155572b 100644 --- a/src/split_op.cu +++ b/src/split_op.cu @@ -1,4 +1,3 @@ - #include "../include/split_op.h" #include "../include/kernels.h" #include "../include/constants.h" @@ -12,6 +11,7 @@ #include "../include/edge.h" #include "../include/manip.h" #include "../include/vort.h" +#include "../include/evolution.h" #include #include @@ -251,27 +251,32 @@ double energy_calc(Grid &par, double2* wfc){ int zDim = par.ival("zDim"); int gsize = xDim*yDim*zDim; + int dimnum = par.ival("dimnum"); + double dx = par.dval("dx"); double dy = par.dval("dy"); double dz = par.dval("dz"); double dg = dx*dy*dz; + bool corotating = par.bval("corotating"); + bool gpe = par.bval("gpe"); + cufftHandle plan; - if (par.ival("dimnum") == 1){ + if (dimnum == 1){ plan = par.ival("plan_1d"); } - if (par.ival("dimnum") == 2){ + if (dimnum == 2){ plan = par.ival("plan_2d"); } - if (par.ival("dimnum") == 3){ + if (dimnum == 3){ plan = par.ival("plan_3d"); } double renorm_factor = 1.0/pow(gsize,0.5); double2 *wfc_c, *wfc_k; - double2 *energy_r, *energy_k; + double2 *energy_r, *energy_k, *energy_l; double *energy; cudaMalloc((void **) &wfc_c, sizeof(double2)*gsize); @@ -289,6 +294,7 @@ double energy_calc(Grid &par, double2* wfc){ scalarMult<<>>(wfc_k, renorm_factor, wfc_k); vecMult<<>>(wfc_k, K, energy_k); + cudaFree(wfc_k); cufftExecZ2Z(plan, energy_k, energy_k, CUFFT_INVERSE); scalarMult<<>>(energy_k, renorm_factor, energy_k); @@ -296,10 +302,76 @@ double energy_calc(Grid &par, double2* wfc){ cMult<<>>(wfc_c, energy_k, energy_k); // Position-space energy - vecMult<<>>(wfc, V, energy_r); + // Adding in the nonlinear step for GPE (related to cMultDensity) + if (gpe){ + double interaction = par.dval("interaction"); + double gDenConst = par.dval("gDenConst"); + + double *real_comp; + cudaMalloc((void**) &real_comp, sizeof(double)*gsize); + complexMagnitudeSquared<<>>(wfc, real_comp); + scalarMult<<>>(real_comp, + 0.5*gDenConst*interaction, + real_comp); + vecSum<<>>(real_comp, V, real_comp); + vecMult<<>>(wfc, real_comp, energy_r); + + cudaFree(real_comp); + } + else{ + vecMult<<>>(wfc, V, energy_r); + } + cMult<<>>(wfc_c, energy_r, energy_r); - complexAbsSum<<>>(energy_r, energy_k, energy); + energy_sum<<>>(energy_r, energy_k, energy); + //zeros<<>>(energy); + + cudaFree(energy_r); + cudaFree(energy_k); + + // Adding in angular momementum energy if -l flag is on + if (corotating && dimnum > 1){ + + double2 *energy_l, *dwfc; + double *A; + double *check; + check = (double *)malloc(sizeof(double)*10); + + cudaMalloc((void **) &energy_l, sizeof(double2)*gsize); + cudaMalloc((void **) &dwfc, sizeof(double2)*gsize); + + A = par.dsval("Ax_gpu"); + + derive<<>>(wfc, energy_l, 1, gsize, dx); + + vecMult<<>>(energy_l, A, energy_l); + + A = par.dsval("Ay_gpu"); + derive<<>>(wfc, dwfc, xDim, gsize, dy); + + vecMult<<>>(dwfc, A, dwfc); + sum<<>>(dwfc,energy_l, energy_l); + + if (dimnum == 3){ + A = par.dsval("Az_gpu"); + + derive<<>>(wfc, dwfc, xDim*yDim, gsize, dz); + vecMult<<>>(dwfc, A, dwfc); + + sum<<>>(dwfc,energy_l, energy_l); + + } + + cudaFree(dwfc); + + double2 scale = {0, HBAR}; + scalarMult<<>>(energy_l, scale, energy_l); + cMult<<>>(wfc_c, energy_l, energy_l); + + energy_lsum<<>>(energy, energy_l, energy); + cudaFree(energy_l); + } double *energy_cpu; energy_cpu = (double *)malloc(sizeof(double)*gsize); @@ -313,11 +385,8 @@ double energy_calc(Grid &par, double2* wfc){ } free(energy_cpu); - cudaFree(energy_r); - cudaFree(energy_k); cudaFree(energy); cudaFree(wfc_c); - cudaFree(wfc_k); return sum; } diff --git a/src/unit_test.cu b/src/unit_test.cu index f80b8969..bfe3f2cb 100644 --- a/src/unit_test.cu +++ b/src/unit_test.cu @@ -186,6 +186,77 @@ void math_operator_test(){ } std::cout << "Complex addition, subtraction, multiplication, and powers have been tested\n"; + + cudaFree(da); + cudaFree(db); + cudaFree(dc); + + + std::cout << "Now testing the derive() kernels...\n"; + // Now testing the derive function + int dim = 4; + double2 *darray, *darray_gpu, *darray_out; + darray = (double2 *)malloc(sizeof(double2)*dim*dim*dim); + cudaMalloc((void**) &darray_gpu, sizeof(double2)*dim*dim*dim); + cudaMalloc((void**) &darray_out, sizeof(double2)*dim*dim*dim); + + for (int i = 0; i < dim; ++i){ + for (int j = 0; j < dim; ++j){ + for (int k = 0; k < dim; ++k){ + int index = k + j*dim + i*dim*dim; + darray[index].x = i + j + k; + darray[index].y = i + j + k; + } + } + } + + cudaMemcpy(darray_gpu, darray, sizeof(double2)*dim*dim*dim, + cudaMemcpyHostToDevice); + + grid = {1, dim, dim}; + threads = {dim, 1, 1}; + + derive<<>>(darray_gpu, darray_out, 1, dim*dim*dim,1); + cudaMemcpy(darray, darray_out, sizeof(double2)*dim*dim*dim, + cudaMemcpyDeviceToHost); + for (int i = 0; i < dim-1; ++i){ + for (int j = 0; j < dim-1; ++j){ + for (int k = 0; k < dim-1; ++k){ + int index = k + j*dim + i*dim*dim; + assert(darray[index].x == 1); + assert(darray[index].y == 1); + } + } + } + + derive<<>>(darray_gpu, darray_out, dim, dim*dim*dim,1); + cudaMemcpy(darray, darray_out, sizeof(double2)*dim*dim*dim, + cudaMemcpyDeviceToHost); + for (int i = 0; i < dim-1; ++i){ + for (int j = 0; j < dim-1; ++j){ + for (int k = 0; k < dim-1; ++k){ + int index = k + j*dim + i*dim*dim; + assert(darray[index].x == 1); + assert(darray[index].y == 1); + } + } + } + + derive<<>>(darray_gpu, darray_out, dim*dim, dim*dim*dim,1); + cudaMemcpy(darray, darray_out, sizeof(double2)*dim*dim*dim, + cudaMemcpyDeviceToHost); + for (int i = 0; i < dim-1; ++i){ + for (int j = 0; j < dim-1; ++j){ + for (int k = 0; k < dim-1; ++k){ + int index = k + j*dim + i*dim*dim; + assert(darray[index].x == 1); + assert(darray[index].y == 1); + } + } + } + + + std::cout << "derive functions passed!\n"; } __global__ void add_test(double2 *a, double2 *b, double2 *c){ @@ -1127,6 +1198,7 @@ void evolve_test(){ // Setting default values Grid par; + int res = 32; par.store("omega", 0.0); par.store("gammaY", 1.0); par.store("device", 0); @@ -1182,8 +1254,10 @@ void evolve_test(){ par.store("write_file", false); par.store("write_it", false); par.store("energy_calc", true); + par.store("corotating", true); + par.store("omega",0.0); par.store("box_size", 0.00007); - par.store("xDim", 64); + par.store("xDim", res); par.store("yDim", 1); par.store("zDim", 1); @@ -1192,10 +1266,10 @@ void evolve_test(){ // Running through all the dimensions to check the energy for (int i = 1; i <= 3; ++i){ if (i == 2){ - par.store("yDim", 64); + par.store("yDim", res); } if (i == 3){ - par.store("zDim", 64); + par.store("zDim", res); } par.store("dimnum",i); init(par); @@ -1364,9 +1438,11 @@ void cMultPhi_test(){ // Test for available amount of GPU memory void check_memory_test(){ Grid par; - par.store("xDim",100); - par.store("yDim",100); - par.store("zDim",100); + par.store("xDim",10); + par.store("yDim",10); + par.store("zDim",10); + + par.store("energy_calc",true); check_memory(par);