cuIBM
A GPU-based Immersed Boundary Method code
calculateForce.cu
Go to the documentation of this file.
1 
9 #include "calculateForce.h"
10 
11 
12 #define BSZ 16
13 
14 
19 namespace kernels
20 {
21 
40 __global__
41 void dragLeftRight(real *FxX, real *q, real *lambda, real nu, real *dx, real *dy,
42  int nx, int ny, int I, int J, int ncx, int ncy)
43 {
44  int idx = threadIdx.x + blockIdx.x*blockDim.x;
45  if(idx >= ncy)
46  return;
47  int Ip = (J+idx)*nx + I,
48  Iu = (J+idx)*(nx-1) + (I-1);
49  FxX[idx] = -(
50  // multiply the pressure with the surface area to get p dy
51  (lambda[Ip+ncx]-lambda[Ip-1])*dy[J+idx]
52  +
53  // divide q^2 by dy, so that just u^2 dy is obtained
54  (
55  0.25*(q[Iu+ncx+1] + q[Iu+ncx])*(q[Iu+ncx+1] + q[Iu+ncx])
56  - 0.25*(q[Iu] + q[Iu-1])*(q[Iu] + q[Iu-1])
57  )/dy[J+idx]
58  -
59  // no multiplication or division since du/dx dy = dq/dx
60  nu*
61  (
62  (q[Iu+ncx+1] - q[Iu+ncx])/dx[I+ncx]
63  - (q[Iu] - q[Iu-1])/dx[I-1]
64  )
65  );
66 } // dragLeftRight
67 
68 
86 __global__
87 void dragBottomTop(real *FxY, real *q, real nu, real *dx, real *dy,
88  int nx, int ny, int I, int J, int ncx, int ncy)
89 {
90  int idx = threadIdx.x + blockIdx.x*blockDim.x;
91  if(idx > ncx)
92  return;
93  int Iu = J*(nx-1) + (I-1+idx),
94  Iv = (nx-1)*ny + (J-1)*nx + I+idx;
95  FxY[idx] = -(
96  // multiply by dS
97  (
98  0.25 * ( q[Iu+ncy*(nx-1)]/dy[J+ncy] + q[Iu+(ncy-1)*(nx-1)]/dy[J+ncy-1] )
99  * ( q[Iv+ncy*nx]/dx[I+idx] + q[Iv+ncy*nx-1]/dx[I+idx-1] )
100  -
101  0.25 * ( q[Iu]/dy[J] + q[Iu-(nx-1)]/dy[J-1] )
102  * ( q[Iv]/dx[I+idx] + q[Iv-1]/dx[I+idx-1] )
103  )
104  -
105  // multiply by dS (cannot use the leftRight trick in this case)
106  nu*
107  (
108  (
109  (q[Iu+ncy*(nx-1)]/dy[J+ncy] - q[Iu+(ncy-1)*(nx-1)]/dy[J+ncy-1])/2.0/(dy[J+ncy]+dy[J+ncy-1]) +
110  (q[Iv+ncy*nx]/dx[I+idx] - q[Iv+ncy*nx-1]/dx[I+idx-1])/2.0/(dx[I+idx]+dx[I+idx-1])
111  )
112  -
113  (
114  (q[Iu]/dy[J] - q[Iu-(nx-1)]/dy[J-1])/2.0/(dy[J]+dy[J-1]) +
115  (q[Iv]/dx[I+idx] - q[Iv-1]/dx[I+idx-1])/2.0/(dx[I+idx]+dx[I+idx-1])
116  )
117  )
118  )*0.5*(dx[I+idx]+dx[I+idx-1]);
119 
120 } // dragBottomTop
121 
122 
141 __global__
142 void dragUnsteady(real *FxU, real *q, real *qOld, real *dx, real *dy, real dt,
143  int nx, int ny, int I, int J, int ncx, int ncy)
144 {
145  int idx = threadIdx.x + blockIdx.x*blockDim.x;
146 
147  if(idx >= (ncx+1)*ncy)
148  return;
149 
150  int i = idx%(ncx+1),
151  j = idx/(ncx+1);
152 
153  int Iu = (J+j)*(nx-1) + (I-1+i);
154 
155  FxU[idx] = - (q[Iu] - qOld[Iu])/dt * 0.5*(dx[I+i]+dx[I-1+i]);
156 } // dragUnsteady
157 
158 
176 __global__
177 void liftLeftRight(real *FyX, real *q, real nu, real *dx, real *dy,
178  int nx, int ny, int I, int J, int ncx, int ncy)
179 {
180  int idx = threadIdx.x + blockIdx.x*blockDim.x;
181  if(idx > ncy)
182  return;
183  int Iu = (J+idx)*(nx-1) + (I-1),
184  Iv = (nx-1)*ny + (J-1+idx)*nx + I;
185  FyX[idx] = -(
186  // multiply by dS
187  (
188  0.25 * ( q[Iu+ncx]/dy[J+idx] + q[Iu+ncx-(nx-1)]/dy[J-1+idx] )
189  * ( q[Iv+ncx]/dx[I+ncx] + q[Iv+ncx-1]/dx[I+ncx-1] )
190  -
191  0.25 * ( q[Iu]/dy[J+idx] + q[Iu-(nx-1)]/dy[J-1+idx] )
192  * ( q[Iv]/dx[I] + q[Iv-1]/dx[I-1] )
193  )
194  -
195  // multiply by dS (cannot use the leftRight trick in this case)
196  nu*
197  (
198  (
199  (q[Iu+ncx]/dy[J+idx] - q[Iu+ncx-(nx-1)]/dy[J-1+idx])/2.0/(dy[J+idx]+dy[J-1+idx]) +
200  (q[Iv+ncx]/dx[I+ncx] - q[Iv+ncx-1]/dx[I+ncx-1])/2.0/(dx[I+ncx]+dx[I+ncx-1])
201  )
202  -
203  (
204  (q[Iu]/dy[J+idx] - q[Iu-(nx-1)]/dy[J-1+idx])/2.0/(dy[J+idx]+dy[J-1+idx]) +
205  (q[Iv]/dx[I] - q[Iv-1]/dx[I-1])/2.0/(dx[I]+dx[I-1])
206  )
207  )
208  )*0.5*(dy[J+idx]+dy[J-1+idx]);
209 } // liftLeftRight
210 
211 
230 __global__
231 void liftBottomTop(real *FyY, real *q, real *lambda, real nu, real *dx, real *dy,
232  int nx, int ny, int I, int J, int ncx, int ncy)
233 {
234  int idx = threadIdx.x + blockIdx.x*blockDim.x;
235  if(idx >= ncx)
236  return;
237  int Ip = J*nx + I+idx,
238  Iv = (nx-1)*ny + (J-1)*nx + I+idx;
239  FyY[idx] = -(
240  // multiply the pressure with the surface area to get p dx
241  (lambda[Ip+ncy*nx]-lambda[Ip-nx])*dx[I+idx]
242  +
243  // divide q^2 by dx, so that just v^2 dx is obtained
244  (
245  0.25*(q[Iv+(ncy+1)*nx] + q[Iv+ncy*nx])*(q[Iv+(ncy+1)*nx] + q[Iv+ncy*nx])
246  - 0.25*(q[Iv] + q[Iv-nx])*(q[Iv] + q[Iv-nx])
247  )/dx[I+idx]
248  -
249  // no multiplication or division since dv/dy dx = dq/dy
250  nu*
251  (
252  (q[Iv+(ncy+1)*nx] - q[Iv+ncy*nx])/dy[J+ncy]
253  - (q[Iv] - q[Iv-nx])/dy[J-1]
254  )
255  );
256 } // liftBottomTop
257 
258 
277 __global__
278 void liftUnsteady(real *FyU, real *q, real *qOld, real *dx, real *dy, real dt,
279  int nx, int ny, int I, int J, int ncx, int ncy)
280 {
281  int idx = threadIdx.x + blockIdx.x*blockDim.x;
282 
283  if( idx >= ncx*(ncy+1) )
284  return;
285 
286  int i = idx%ncx,
287  j = idx/ncx;
288 
289  int Iv = (J-1+j)*nx + (I+i) + (nx-1)*ny;
290 
291  FyU[idx] = - (q[Iv] - qOld[Iv])/dt * 0.5*(dy[J+j]+dy[J-1+j]);
292 } // liftUnsteady
293 
294 
298 __global__
299 void forceX(real *f, real *q, real *rn, int *tags,
300  int nx, int ny, real *dx, real *dy,
301  real dt, real alpha, real nu)
302 {
303  int bx = blockIdx.x,
304  by = blockIdx.y,
305  i = threadIdx.x,
306  j = threadIdx.y;
307 
308  // work out global index of first point in block
309  int I = (BSZ-2)*bx + i,
310  J = (BSZ-2)*by + j;
311 
312  if (I >= nx-1 || J >= ny) {
313  return;
314  }
315 
316  int Gidx_x = J*(nx-1) + I;
317 
318  real dTerm;
319 
320  __shared__ real u[BSZ][BSZ];
321 
322  __shared__ real Dx[BSZ][BSZ], Dy[BSZ][BSZ];
323 
324  Dy[j][i] = dy[J];
325  Dx[j][i] = dx[I];
326 
328  u[j][i] = q[Gidx_x]/Dy[j][i];
329  __syncthreads();
330 
332  int global_check = ( I==0 || I==(nx-2) || J==0 || J==(ny-1) ),
333  block_check = ( i==0 || i==(BSZ-1) || j==0 || j==(BSZ-1) );
334 
336  if( !(global_check || block_check) )
337  {
338  dTerm = alpha*nu*2.0*( \
339  ( Dx[j][i]*u[j][i+1] - (Dx[j][i]+Dx[j][i+1])*u[j][i] + Dx[j][i+1]*u[j][i-1] ) / ( Dx[j][i]*Dx[j][i+1]*(Dx[j][i]+Dx[j][i+1]) ) \
340 
341  + 4.0*( (Dy[j][i]+Dy[j-1][i])*u[j+1][i] - (Dy[j-1][i] + 2.0*Dy[j][i] + Dy[j+1][i])*u[j][i] + (Dy[j][i]+Dy[j+1][i])*u[j-1][i] ) \
342  /( (Dy[j][i]+Dy[j-1][i]) * (Dy[j-1][i] + 2.0*Dy[j][i] + Dy[j+1][i]) * (Dy[j][i]+Dy[j+1][i]) ) \
343  );
344 
345  f[Gidx_x] = ( u[j][i]/dt - dTerm - rn[Gidx_x]/(0.5*(Dx[j][i]+Dx[j][i+1])) ) * (!(tags[Gidx_x]==-1));
346  }
347 } // forceX
348 
349 
353 __global__
354 void forceY()
355 {
356 } // forceY
357 
358 } // End of namespace kernels
__global__ void liftBottomTop(real *FyY, real *q, real *lambda, real nu, real *dx, real *dy, int nx, int ny, int I, int J, int ncx, int ncy)
Calculate lift using a control-volume approach (bottom-top).
double real
Is a float or a double depending on the machine precision.
Definition: types.h:116
Declaration of the kernels to calculate the forces acting on a body The method is described in Lai & ...
Contains all the custom-written CUDA kernels.
__global__ void forceY()
Kernel not usable.
__global__ void liftUnsteady(real *FyU, real *q, real *qOld, real *dx, real *dy, real dt, int nx, int ny, int I, int J, int ncx, int ncy)
Calculate lift using a control-volume approach (unsteady).
__global__ void dragLeftRight(real *FxX, real *q, real *lambda, real nu, real *dx, real *dy, int nx, int ny, int I, int J, int ncx, int ncy)
Calculates drag using a control-volume approach (left-right).
#define BSZ
__global__ void forceX(real *f, real *q, real *rn, int *tags, int nx, int ny, real *dx, real *dy, real dt, real alpha, real nu)
Kernel not usable.
__global__ void dragBottomTop(real *FxY, real *q, real nu, real *dx, real *dy, int nx, int ny, int I, int J, int ncx, int ncy)
Calculate drag using a control-volume approach (bottom-top).
__global__ void dragUnsteady(real *FxU, real *q, real *qOld, real *dx, real *dy, real dt, int nx, int ny, int I, int J, int ncx, int ncy)
Calculate drag using a control-volume approach (unsteady).
__global__ void liftLeftRight(real *FyX, real *q, real nu, real *dx, real *dy, int nx, int ny, int I, int J, int ncx, int ncy)
Calculate lift using a control-volume approach (left-right).