cuIBM
A GPU-based Immersed Boundary Method code
generateQT.cu
Go to the documentation of this file.
1 
7 #include "generateQT.h"
8 
9 
18 __device__
20 {
21  real r = fabs(x)/h;
22 
23  if(r>1.5)
24  return 0.0;
25  else if(r>0.5 && r<=1.5)
26  return 1.0/(6*h)*( 5.0 - 3.0*r - sqrt(-3.0*(1-r)*(1-r) + 1.0) );
27  else
28  return 1.0/(3*h)*( 1.0 + sqrt(-3.0*r*r + 1.0) );
29 } // dhRomaDeviceQT
30 
31 
41 __device__
43 {
44  return dhRomaDeviceQT(x, h) * dhRomaDeviceQT(y, h);
45 } // deltaDeviceQT
46 
47 
52 namespace kernels
53 {
54 
64 __global__
65 void updateQ(int *QRows, int *QCols, real *QVals, int QSize, int *tags)
66 {
67  int I = threadIdx.x + blockIdx.x*blockDim.x;
68 
69  if(I < QSize)
70  {
71  QVals[I] *= (tags[QRows[I]] == -1);
72  }
73 } // updateQ
74 
75 
86 __global__
87 void updateQT(int *QTRows, int *QTCols, real *QTVals, int QTSize, int *tags, real *coeffs)
88 {
89  int I = threadIdx.x + blockIdx.x*blockDim.x;
90 
91  if(I < QTSize)
92  {
93  int col = QTCols[I];
94  real val = QTVals[I];
95  QTCols[I] = (tags[col]==-1)*col + (tags[col]!=-1)*tags[col];
96  QTVals[I] = (tags[col]==-1)*val + (tags[col]!=-1)*coeffs[col]*val;
97  }
98 } // updateQT
99 
100 
110 void generateQT(int *QTRows, int *QTCols, real *QTVals, int nx, int ny)
111 {
112  int numU = (nx-1)*ny;
113 
114  int Iu, Iv;
115  int row = 0;
116  int num_elements = 0;
117 
119 
121  for(int j=0; j<ny; j++)
122  {
123  for(int i=0; i<nx; i++)
124  {
125  Iu = j*(nx-1) + i;
126  Iv = j*nx + i + numU;
127 
128  if(i>0)
129  {
130  QTRows[num_elements] = row;
131  QTCols[num_elements] = Iu - 1;
132  QTVals[num_elements] = 1;
133  num_elements++;
134  }
135  if(i<nx-1)
136  {
137  QTRows[num_elements] = row;
138  QTCols[num_elements] = Iu;
139  QTVals[num_elements] = -1;
140  num_elements++;
141  }
142  if(j>0)
143  {
144  QTRows[num_elements] = row;
145  QTCols[num_elements] = Iv - nx;
146  QTVals[num_elements] = 1;
147  num_elements++;
148  }
149  if(j<ny-1)
150  {
151  QTRows[num_elements] = row;
152  QTCols[num_elements] = Iv;
153  QTVals[num_elements] = -1;
154  num_elements++;
155  }
156  row++;
157  }
158  }
159 } // generateQT
160 
161 
183 __global__
184 void updateQT(int *QTRows, int *QTCols, real *QTVals,
185  int *ERows, int *ECols, real *EVals,
186  int nx, int ny, real *x, real *y, real *dx,
187  int totalPoints, real *xB, real *yB, int *I, int *J)
188 {
189  int bodyIdx = threadIdx.x + blockIdx.x*blockDim.x;
190 
191  if(bodyIdx >= totalPoints)
192  return;
193 
194  int Ib=I[bodyIdx],
195  Jb=J[bodyIdx],
196  QTIdx = 4*nx*ny - 2*(nx+ny) + bodyIdx*12,
197  EIdx = bodyIdx*12,
198  i, j;
199 
200  real Dx = dx[Ib];
201 
202  // populate x-components
203  for(j=Jb-1; j<=Jb+1; j++)
204  {
205  for(i=Ib-2; i<=Ib+1; i++)
206  {
207  QTRows[QTIdx] = bodyIdx + nx*ny;
208  ERows[EIdx] = bodyIdx;
209 
210  QTCols[QTIdx] = j*(nx-1) + i;
211  ECols[EIdx] = QTCols[QTIdx];
212 
213  QTVals[QTIdx] = Dx*deltaDeviceQT(x[i+1]-xB[bodyIdx], 0.5*(y[j]+y[j+1])-yB[bodyIdx], Dx);
214  EVals[EIdx] = QTVals[QTIdx];
215 
216  QTIdx++;
217  EIdx++;
218  }
219  }
220 
221  // populate y-components
222  for(j=Jb-2; j<=Jb+1; j++)
223  {
224  for(i=Ib-1; i<=Ib+1; i++)
225  {
226  QTRows[QTIdx+12*totalPoints-12] = bodyIdx + nx*ny + totalPoints;
227  ERows[EIdx+12*totalPoints-12] = bodyIdx + totalPoints;
228 
229  QTCols[QTIdx+12*totalPoints-12] = j*nx + i + (nx-1)*ny;
230  ECols[EIdx+12*totalPoints-12] = QTCols[QTIdx+12*totalPoints-12];
231 
232  QTVals[QTIdx+12*totalPoints-12] = Dx*deltaDeviceQT(0.5*(x[i]+x[i+1])-xB[bodyIdx], y[j+1]-yB[bodyIdx], Dx);
233  EVals[EIdx+12*totalPoints-12] = QTVals[QTIdx+12*totalPoints-12];
234 
235  QTIdx++;
236  EIdx++;
237  }
238  }
239 } // updateQT
240 
241 } // End of namespace kernels
__global__ void updateQT(int *QTRows, int *QTCols, real *QTVals, int QTSize, int *tags, real *coeffs)
Update the divergence operator at forcing nodes.
Definition: generateQT.cu:87
__device__ real dhRomaDeviceQT(real x, real h)
Discrete delta function defined by Roma et al. (1999).
Definition: generateQT.cu:19
double real
Is a float or a double depending on the machine precision.
Definition: types.h:116
Contains all the custom-written CUDA kernels.
void generateQT(int *QTRows, int *QTCols, real *QTVals, int nx, int ny)
Generates the divergence matrix (on the host).
Definition: generateQT.cu:110
__device__ real deltaDeviceQT(real x, real y, real h)
Two-dimensional discrete delta function.
Definition: generateQT.cu:42
Declaration of the kernels to generate gradient matrix and interpolation matrix.
__global__ void updateQ(int *QRows, int *QCols, real *QVals, int QSize, int *tags)
Update the gradient operator at forcing nodes.
Definition: generateQT.cu:65