48 int I = (
BSZ-2)*bx + i,
51 if (I >= nx-1 || J >= ny) {
56 Gidx_x = J*(nx-1) + I,
57 Gidx_y = (J-1)*nx + I + N_u;
59 real cTerm, dTerm, Hxn;
71 u[j][i] = q[Gidx_x]/Dy[j][i];
72 v[j][i] = q[Gidx_y]/Dx[j][i];
76 int global_check = ( I==0 || I==(nx-2) || J==0 || J==(ny-1) ),
77 block_check = ( i==0 || i==(
BSZ-1) || j==0 || j==(
BSZ-1) );
80 if( !(global_check || block_check) )
86 Hx[j][i] = - ( (u[j][i+1]+u[j][i])*(u[j][i+1]+u[j][i]) - (u[j][i]+u[j][i-1])*(u[j][i]+u[j][i-1]) )/( 2.0 * (Dx[j][i]+Dx[j][i+1]) ) \
87 - ( (u[j+1][i]+u[j][i])*(v[j+1][i]+v[j+1][i+1]) - (u[j][i]+u[j-1][i])*(v[j][i]+v[j][i+1]) )/( 4.0 * Dy[j][i] );
92 cTerm = gamma*Hx[j][i] + zeta*Hxn;
94 dTerm = alpha*nu*2.0*( \
95 ( Dx[j][i]*u[j][i+1] - (Dx[j][i]+Dx[j][i+1])*u[j][i] + Dx[j][i+1]*u[j][i-1] )/( Dx[j][i]*Dx[j][i+1]*(Dx[j][i]+Dx[j][i+1]) ) \
96 + 4.0*( (Dy[j][i]+Dy[j-1][i])*u[j+1][i] - (Dy[j-1][i] + 2.0*Dy[j][i] + Dy[j+1][i])*u[j][i] + (Dy[j][i]+Dy[j+1][i])*u[j-1][i] ) \
97 /( (Dy[j][i]+Dy[j-1][i]) * (Dy[j-1][i] + 2.0*Dy[j][i] + Dy[j+1][i]) * (Dy[j][i]+Dy[j+1][i]) ) \
99 rn[Gidx_x] = (u[j][i]/dt + cTerm + dTerm) * 0.5*(Dx[j][i]+Dx[j][i+1]);
132 int I = (
BSZ-2)*bx + i,
135 if (I >= nx || J >= ny-1) {
140 Gidx_x = J*(nx-1) + I,
141 Gidx_y = J*nx + I + N_u;
143 real cTerm, dTerm, Hyn;
155 u[j][i] = q[Gidx_x]/Dy[j][i];
156 v[j][i] = q[Gidx_y]/Dx[j][i];
160 int global_check = ( I==0 || I==(nx-1) || J==0 || J==(ny-2) ),
161 block_check = ( i==0 || i==(
BSZ-1) || j==0 || j==(
BSZ-1) );
164 if( !(global_check || block_check) )
171 Hy[j][i] = - ( (u[j+1][i]+u[j][i])*(v[j][i]+v[j][i+1]) - (u[j+1][i-1]+u[j][i-1])*(v[j][i]+v[j][i-1]) )/(4.0 * Dx[j][i]) \
172 - ( (v[j+1][i]+v[j][i])*(v[j+1][i]+v[j][i]) - (v[j-1][i]+v[j][i])*(v[j-1][i]+v[j][i]) )/( 2.0 * (Dy[j+1][i]+Dy[j][i]) );
175 H[Gidx_y] = Hy[j][i];
178 cTerm = gamma*Hy[j][i] + zeta*Hyn;
180 dTerm = alpha*nu*2.0*( \
181 4.0*( (Dx[j][i-1]+Dx[j][i])*v[j][i+1] - (Dx[j][i-1]+2.0*Dx[j][i]+Dx[j][i+1])*v[j][i] + (Dx[j][i]+Dx[j][i+1])*v[j][i-1] ) \
182 /( (Dx[j][i-1]+Dx[j][i]) * (Dx[j][i]+Dx[j][i+1]) * (Dx[j][i-1]+2.0*Dx[j][i]+Dx[j][i+1]) ) \
183 + ( Dy[j][i]*v[j+1][i] - (Dy[j][i]+Dy[j+1][i])*v[j][i] + Dy[j+1][i]*v[j-1][i] )/( Dy[j][i]*Dy[j+1][i]*(Dy[j][i]+Dy[j+1][i]) ) \
185 rn[Gidx_y] = (v[j][i]/dt + cTerm + dTerm) * 0.5*(Dy[j][i]+Dy[j+1][i]);
216 int I = blockIdx.x*blockDim.x + threadIdx.x;
218 if(I==0 || I >= nx-1)
return;
220 int Iu, Iv = I + (nx-1)*ny;
221 real Hn, cTerm, dTerm;
225 ( 0.5*(q[Iv]/dx[I]+q[Iv+1]/dx[I+1]) ) * ( 0.5*(q[I]/dy[0]+q[I+(nx-1)]/dy[1]) ) \
226 - ( 0.5*(q[Iv-1]/dx[I-1]+q[Iv]/dx[I]) ) * ( 0.5*(q[I-1]/dy[0]+q[I-1+(nx-1)]/dy[1]) ) \
229 (0.5*(q[Iv] + q[Iv+nx])/dx[I]) * (0.5*(q[Iv] + q[Iv+nx])/dx[I]) \
230 - (0.5*(bcBottom[I+nx-1] + q[Iv]/dx[I])) * (0.5*(bcBottom[I+nx-1] + q[Iv]/dx[I])) \
231 )/(0.5*(dy[0] + dy[1]));
232 cTerm = gamma*H[Iv] + zeta*Hn;
235 dTerm = alpha*nu*2.0*( \
236 4.0 * ( (dx[I-1]+dx[I])*q[Iv+1]/dx[I+1] - (dx[I-1]+2.0*dx[I]+dx[I+1])*q[Iv]/dx[I] + (dx[I]+dx[I+1])*q[Iv-1]/dx[I-1] ) \
237 /( (dx[I-1]+dx[I]) * (dx[I]+dx[I+1]) * (dx[I-1]+2.0*dx[I]+dx[I+1]) ) \
238 + ( dy[0]*q[Iv+nx]/dx[I] - (dy[0]+dy[1])*q[Iv]/dx[I] + dy[1]*bcBottom[I+nx-1] )/( dy[0]*dy[1]*(dy[0]+dy[1]) ) \
241 rn[Iv] = ( q[Iv]/(dx[I]*dt) + cTerm + dTerm ) * 0.5*(dy[0] + dy[1]);
243 Iu = I + (ny-2)*(nx-1);
244 Iv = I + (nx-1)*ny + (ny-2)*nx;
248 ( 0.5*(q[Iv]/dx[I] + q[Iv+1]/dx[I+1]) )*( 0.5*(q[Iu]/dy[ny-2] + q[Iu+(nx-1)]/dy[ny-1]) ) \
249 - ( 0.5*(q[Iv-1]/dx[I-1] + q[Iv]/dx[I]) )*( 0.5*(q[Iu-1]/dy[ny-2] + q[Iu-1+(nx-1)]/dy[ny-1]) )
252 ( 0.5*(q[Iv]/dx[I] + bcTop[I+nx-1]) )*( 0.5*(q[Iv]/dx[I] + bcTop[I+nx-1]) ) \
253 - ( 0.5*(q[Iv-nx] + q[Iv])/dx[I] )*( 0.5*(q[Iv-nx] + q[Iv])/dx[I] )
254 )/(0.5*(dy[ny-2] + dy[ny-1]));
255 cTerm = gamma*H[Iv] + zeta*Hn;
258 dTerm = alpha*nu*2.0*( \
259 4.0*( (dx[I-1]+dx[I])*q[Iv+1]/dx[I+1] - (dx[I-1]+2.0*dx[I]+dx[I+1])*q[Iv]/dx[I] + (dx[I]+dx[I+1])*q[Iv-1]/dx[I-1] ) \
260 /( (dx[I-1]+dx[I]) * (dx[I]+dx[I+1]) * (dx[I-1]+2.0*dx[I]+dx[I+1]) ) \
261 + ( dy[ny-2]*bcTop[I+nx-1] - (dy[ny-1]+dy[ny-2])*q[Iv]/dx[I] + dy[ny-1]*q[Iv-nx]/dx[I] )/( dy[ny-2]*dy[ny-1]*(dy[ny-2]+dy[ny-1]) ) \
264 rn[Iv] = ( q[Iv]/(dx[I]*dt) + cTerm + dTerm ) * 0.5*(dy[ny-2] + dy[ny-1]);
296 int I = blockIdx.x*blockDim.x + threadIdx.x;
298 if(I >= nx-1)
return;
308 u0x = 0.5*(bcLeft[0] + u);
309 u1x = 0.5*(u + q[I+1]/dy[0]);
314 u0x = 0.5*(q[I-1]/dy[0] + u);
315 u1x = 0.5*(u + bcRight[0]);
320 u0x = 0.5*(q[I-1]/dy[0] + u);
321 u1x = 0.5*(u + q[I+1]/dy[0]);
327 H[I] = - ( u1x*u1x - u0x*u0x )/( 0.5*(dx[I]+dx[I+1]) ) \
328 - ( 0.5*(q[Iv]/dx[I] + q[Iv+1]/dx[I+1]) * 0.5*(u + q[I+(nx-1)]/dy[1]) - 0.5*(bcBottom[I+(nx-1)] + bcBottom[I+1+(nx-1)])*bcBottom[I] )/(dy[0]);
329 cTerm = gamma*H[I] + zeta*Hn;
332 dTerm = alpha*nu*2.0*( \
333 ( dx[I]*ur - (dx[I]+dx[I+1])*u + dx[I+1]*ul )/( dx[I] * (dx[I]+dx[I+1]) * dx[I+1] ) \
334 + 4.0*( dy[0]*q[I+(nx-1)]/dy[1] - (2.0*dy[0]+dy[1])*u + (dy[0]+dy[1])*bcBottom[I] ) \
335 /( dy[0] * (2.0*dy[0]+dy[1]) * (dy[0]+dy[1]) )
338 rn[I] = ( u/dt + cTerm + dTerm ) * 0.5*(dx[I] + dx[I+1]);
340 Iu = I + (ny-1)*(nx-1);
341 Iv = I + (ny-2)*nx + (nx-1)*ny;
345 u0x = 0.5*(bcLeft[ny-1] + u);
346 u1x = 0.5*(u + q[Iu+1]/dy[ny-1]);
348 ur = q[Iu+1]/dy[ny-1];
351 u0x = 0.5*(q[Iu-1]/dy[ny-1] + u);
352 u1x = 0.5*(u + bcRight[ny-1]);
353 ul = q[Iu-1]/dy[ny-1];
357 u0x = 0.5*(q[Iu-1]/dy[ny-1] + u);
358 u1x = 0.5*(u + q[Iu+1]/dy[ny-1]);
359 ul = q[Iu-1]/dy[ny-1];
360 ur = q[Iu+1]/dy[ny-1];
364 H[Iu] = - ( u1x*u1x - u0x*u0x )/( 0.5*(dx[I]+dx[I+1]) ) \
365 - ( bcTop[I]*0.5*(bcTop[I+(nx-1)]+bcTop[I+1+(nx-1)]) - 0.5*(q[Iv]/dx[I] + q[Iv+1]/dx[I+1])*0.5*(u + q[Iu-(nx-1)]/dy[ny-2]) )/(dy[ny-1]);
366 cTerm = gamma*H[Iu] + zeta*Hn;
369 dTerm = alpha*nu*2.0*( \
370 ( dx[I]*ur - (dx[I]+dx[I+1])*u + dx[I+1]*ul )/( dx[I] * (dx[I]+dx[I+1]) * dx[I+1] ) \
371 + 4.0*( (dy[ny-2]+dy[ny-1])*bcTop[I] - (dy[ny-2]+2.0*dy[ny-1])*u + (dy[ny-1])*q[Iu-(nx-1)]/dy[ny-2] ) \
372 /( (dy[ny-2]+dy[ny-1])*(dy[ny-1])*(dy[ny-2]+2.0*dy[ny-1]) )
375 rn[Iu] = ( u/dt + cTerm + dTerm) * 0.5*(dx[I] + dx[I+1]);
405 int I = blockIdx.x*blockDim.x + threadIdx.x;
407 if(I==0 || I >= ny-1)
return;
410 Iv = I*(nx) + (nx-1)*ny;
411 real Hn, cTerm, dTerm;
415 ( 0.5*(q[Iu] + q[Iu+1])/dy[I] ) * ( 0.5*(q[Iu] + q[Iu+1])/dy[I] ) \
416 - ( 0.5*(bcLeft[I] + q[Iu]/dy[I]) ) * ( 0.5*(bcLeft[I] + q[Iu]/dy[I]) ) \
417 )/(0.5*(dx[0]+dx[1])) \
419 ( 0.5*(q[Iv]/dx[0] + q[Iv+1]/dx[1]) ) * ( 0.5*(q[Iu]/dy[I] + q[Iu+(nx-1)]/dy[I+1]) ) \
420 - ( 0.5*(q[Iv-nx]/dx[0] + q[Iv+1-nx]/dx[1]) ) * ( 0.5*(q[Iu-(nx-1)]/dy[I-1] + q[Iu]/dy[I]) ) \
422 cTerm = gamma*H[Iu] + zeta*Hn;
425 dTerm = alpha*nu*2.0*( \
426 ( dx[0]*q[Iu+1]/dy[I] - (dx[0]+dx[1])*q[Iu]/dy[I] + dx[1]*bcLeft[I] )/( dx[0]*dx[1]*(dx[0]+dx[1]) ) \
427 + 4.0 * ( (dy[I-1]+dy[I])*q[Iu+(nx-1)]/dy[I+1] - (dy[I-1]+2.0*dy[I]+dy[I+1])*q[Iu]/dy[I] + (dy[I]+dy[I+1])*q[Iu-(nx-1)]/dy[I-1] ) \
428 /( (dy[I-1]+dy[I]) * (dy[I]+dy[I+1]) * (dy[I-1]+2.0*dy[I]+dy[I+1]) ) \
431 rn[Iu] = ( q[Iu]/(dy[I]*dt) + cTerm + dTerm ) * 0.5*(dx[0] + dx[1]);
433 Iu = I*(nx-1) + (nx-2);
434 Iv = I*(nx) + (nx-1)*ny + (nx-2);
437 ( 0.5*(q[Iu]/dy[I] + bcRight[I]) ) * ( 0.5*(q[Iu]/dy[I] + bcRight[I]) ) \
438 - ( 0.5*(q[Iu-1] + q[Iu])/dy[I] ) * ( 0.5*(q[Iu-1] + q[Iu])/dy[I] ) \
439 )/(0.5*(dx[nx-2]+dx[nx-1])) \
441 ( 0.5*(q[Iv]/dx[nx-2] + q[Iv+1]/dx[nx-1]) ) * ( 0.5*(q[Iu]/dy[I] + q[Iu+(nx-1)]/dy[I+1]) ) \
442 - ( 0.5*(q[Iv-nx]/dx[nx-2] + q[Iv+1-nx]/dx[nx-1]) ) * ( 0.5*(q[Iu-(nx-1)]/dy[I-1] + q[Iu]/dy[I]) ) \
444 cTerm = gamma*H[Iu] + zeta*Hn;
447 dTerm = alpha*nu*2.0*( \
448 ( dx[nx-2]*bcRight[I] - (dx[nx-2]+dx[nx-1])*q[Iu]/dy[I] + dx[nx-1]*q[Iu-1]/dy[I] )/( dx[nx-2]*dx[nx-1]*(dx[nx-2]+dx[nx-1]) ) \
449 + 4.0 * ( (dy[I-1]+dy[I])*q[Iu+(nx-1)]/dy[I+1] - (dy[I-1]+2.0*dy[I]+dy[I+1])*q[Iu]/dy[I] + (dy[I]+dy[I+1])*q[Iu-(nx-1)]/dy[I-1] ) \
450 /( (dy[I-1]+dy[I]) * (dy[I]+dy[I+1]) * (dy[I-1]+2.0*dy[I]+dy[I+1]) ) \
453 rn[Iu] = ( q[Iu]/(dy[I]*dt) + cTerm + dTerm ) * 0.5*(dx[nx-2] + dx[nx-1]);
485 int I = blockIdx.x*blockDim.x + threadIdx.x;
490 Iv = I*nx + (nx-1)*ny;
491 real vb, vt, v0y, v1y, v,
495 v0y = 0.5*(bcBottom[nx-1] + v);
496 v1y = 0.5*(v + q[Iv+nx]/dx[0]);
501 v0y = 0.5*(q[Iv-nx]/dx[0] + v);
502 v1y = 0.5*(q[Iv]/dx[0] + bcTop[nx-1]);
507 v0y = 0.5*(q[Iv-nx]/dx[0] + v);
508 v1y = 0.5*(v + q[Iv+nx]/dx[0]);
515 H[Iv] = -( 0.5*(v + q[Iv+1]/dx[1])*0.5*(q[Iu]/dy[I] + q[Iu+(nx-1)]/dy[I+1]) - bcLeft[I+ny]*0.5*(bcLeft[I] + bcLeft[I+1]) )/dx[0] \
516 -( v1y*v1y - v0y*v0y )/(0.5*(dy[I] + dy[I+1]));
517 cTerm = gamma*H[Iv] + zeta*Hn;
520 dTerm = alpha*nu*2.0*( \
521 4.0 * ( (dx[0])*q[Iv+1]/dx[1] - (2.0*dx[0]+dx[1])*v + (dx[0]+dx[1])*bcLeft[I+ny] ) \
522 /( (dx[0]) * (dx[0]+dx[1]) * (2.0*dx[0]+dx[1]) ) \
523 + ( dy[I]*vt - (dy[I]+dy[I+1])*v + dy[I+1]*vb )/( dy[I]*dy[I+1]*(dy[I]+dy[I+1]) ) \
526 rn[Iv] = ( v/dt + cTerm + dTerm ) * 0.5*(dy[I] + dy[I+1]);
528 Iu = I*(nx-1) + (nx-1);
529 Iv = I*nx + (nx-1)*ny + (nx-1);
532 v0y = 0.5*(bcBottom[nx-1+(nx-1)] + v);
533 v1y = 0.5*(v + q[Iv+nx]/dx[nx-1]);
534 vb = bcBottom[nx-1+(nx-1)];
535 vt = q[Iv+nx]/dx[nx-1];
538 v0y = 0.5*(q[Iv-nx]/dx[nx-1] + v);
539 v1y = 0.5*(q[Iv]/dx[nx-1] + bcTop[nx-1+(nx-1)]);
540 vb = q[Iv-nx]/dx[nx-1];
541 vt = bcTop[nx-1+(nx-1)];
544 v0y = 0.5*(q[Iv-nx]/dx[nx-1] + v);
545 v1y = 0.5*(v + q[Iv+nx]/dx[nx-1]);
546 vb = q[Iv-nx]/dx[nx-1];
547 vt = q[Iv+nx]/dx[nx-1];
552 H[Iv] = -( bcRight[I+ny]*0.5*(bcRight[I]+bcRight[I+1]) - 0.5*(q[Iv-1]/dx[nx-2] + v)*0.5*(q[Iu-1]/dy[I] + q[Iu-1+(nx-1)]/dy[I+1]) )/dx[nx-1] \
553 -( v1y*v1y - v0y*v0y )/(0.5*(dy[I] + dy[I+1]));
554 cTerm = gamma*H[Iv] + zeta*Hn;
557 dTerm = alpha*nu*2.0*( \
558 4.0 * ( (dx[nx-1]+dx[nx-2])*bcRight[I+ny] - (2.0*dx[nx-1]+dx[nx-2])*v + (dx[nx-1])*q[Iv-1]/dx[nx-2] ) \
559 /( (dx[nx-1]) * (dx[nx-1]+dx[nx-2]) * (2.0*dx[nx-1]+dx[nx-2]) ) \
560 + ( dy[I]*vt - (dy[I]+dy[I+1])*v + dy[I+1]*vb )/( dy[I]*dy[I+1]*(dy[I]+dy[I+1]) ) \
563 rn[Iv] = ( v/dt + cTerm + dTerm ) * 0.5*(dy[I] + dy[I+1]);
Declaration of the kernels to generate the explicit terms of the momentum equation.
double real
Is a float or a double depending on the machine precision.
Contains all the custom-written CUDA kernels.
__global__ void convectionTermVLeftRight(real *rn, real *H, real *q, int nx, int ny, real *dx, real *dy, real dt, real gamma, real zeta, real alpha, real nu, real *bcBottom, real *bcTop, real *bcLeft, real *bcRight)
Computes v-component of the vector resulting from the explicit terms of the discretized momentum equa...
__global__ void convectionTermUBottomTop(real *rn, real *H, real *q, int nx, int ny, real *dx, real *dy, real dt, real gamma, real zeta, real alpha, real nu, real *bcBottom, real *bcTop, real *bcLeft, real *bcRight)
Computes u-component of the vector resulting from the explicit terms of the discretized momentum equa...
__global__ void convectionTermVBottomTop(real *rn, real *H, real *q, int nx, int ny, real *dx, real *dy, real dt, real gamma, real zeta, real alpha, real nu, real *bcBottom, real *bcTop)
Computes v-component of the vector resulting from the explicit terms of the discretized momentum equa...
__global__ void convectionTermU(real *rn, real *H, real *q, int nx, int ny, real *dx, real *dy, real dt, real gamma, real zeta, real alpha, real nu)
Computes u-component of the vector resulting from the explicit terms of the discretized momentum equa...
__global__ void convectionTermV(real *rn, real *H, real *q, int nx, int ny, real *dx, real *dy, real dt, real gamma, real zeta, real alpha, real nu)
Computes v-component of the vector resulting from the explicit terms of the discretized momentum equa...
__global__ void convectionTermULeftRight(real *rn, real *H, real *q, int nx, int ny, real *dx, real *dy, real dt, real gamma, real zeta, real alpha, real nu, real *bcLeft, real *bcRight)
Computes u-component of the vector resulting from the explicit terms of the discretized momentum equa...