cuIBM/generateA_8cu_source.html

 #include "generateA.h"


 namespace kernels
 {

 __global__
 void generateA(int *ARows, int *ACols, real *AVals,
                real *MVals,
                int *LRows, int *LCols, real *LVals,
                int ASize, real alpha)
 {
     for (int I=threadIdx.x + blockIdx.x*blockDim.x; I<ASize; I += blockDim.x*gridDim.x)
     {
         ARows[I] = LRows[I];
         ACols[I] = LCols[I];
         AVals[I] = -alpha*LVals[I] + (LRows[I]==LCols[I])*MVals[LRows[I]];
     }
 } // generateA


 __global__
 void generateADirectForcing(int *ARows, int *ACols, real *AVals,
                             real *MVals,
                             int *LRows, int *LCols, real *LVals,
                             int ASize, real alpha, int *tags)
 {
     for(int I=threadIdx.x + blockIdx.x*blockDim.x; I<ASize; I += blockDim.x*gridDim.x)
     {
         ARows[I] = LRows[I];
         ACols[I] = LCols[I];
         AVals[I] =   (tags[LRows[I]] == -1)*(-alpha*LVals[I]) // if the current location is untagged, add -alpha*L
                    + (tags[LRows[I]] != -1)*(-LVals[I]) // if the current location is tagged, add -L
                    + (LRows[I]==LCols[I])*MVals[LRows[I]]; // if it is a diagonal, add M
     }
 } // generateADirectForcing

 } // End of namespace kernels
real
double real
Is a float or a double depending on the machine precision.
Definition: types.h:116

kernels::generateADirectForcing
__global__ void generateADirectForcing(int *ARows, int *ACols, real *AVals, real *MVals, int *LRows, int *LCols, real *LVals, int ASize, real alpha, int *tags)
Generates a block of the matrix resulting from implicit terms in the momentum equation for the direct...
Definition: generateA.cu:77

kernels
Contains all the custom-written CUDA kernels.

kernels::generateA
__global__ void generateA(int *ARows, int *ACols, real *AVals, real *MVals, int *LRows, int *LCols, real *LVals, int ASize, real alpha)
Generates a block of the matrix resulting from implicit terms in the momentum equation.
Definition: generateA.cu:38

generateA.h
Declaration of the kernels to generate the matrix resulting from the implicit terms in the momentum e...