XT-neighbor
Macros | Functions | Variables
kernel.cu File Reference

A collection of most GPU parallel primitives that is implemented as CUDA kernel (most map and expand operations). More...

#include "codec.cu"
#include <limits.h>

Macros

#define MIN3(a, b, c)   ((a) < (b) ? ((a) < (c) ? (a) : (c)) : ((b) < (c) ? (b) : (c)))
 

Functions

template<typename T >
transfer_last_element (T *deviceArr, int n)
 transfer last element of the GPU array to main memory. More...
 
__global__ void cal_combination_len (Int3 *input, int distance, int *output, int n)
 precalculate the number of positions required in the output array of generate combination operation. More...
 
__global__ void cal_pair_len (int *inputRange, int *outputRange, int n)
 precalculate the output range of generate pair operation. More...
 
__global__ void cal_pair_len_diag (int *inputRange, int *outputRange, int n)
 precalculate the output range of pair generation in diagonal position for overlap mode. More...
 
__global__ void cal_pair_len_nondiag (Int2 *pairs, int *seqOffset, int *outputRange, int n)
 precalculate the output range of pair generation in diagonal position for overlap mode. More...
 
__global__ void cal_pair_len_lowerbound (int *indexes, int *inputOffsets, int *outputLengths, int lowerbound, int n)
 precalculate the number of positions required in the output array of generate pair operation with lower bound constratint. More...
 
__global__ void generate_pairs (int *indexes, Int2 *outputs, int *inputOffsets, int *outputOffsets, int *lesserIndex, int lowerbound, int carry, int n)
 combinatorially generate pairs of indexes within the same group. More...
 
__global__ void generate_smaller_index (int *indexes, int *outputs, int *inputOffsets, int *outputOffsets, int carry, int n)
 combinatorially generate pairs of indexes within the same group but record only the partial output. More...
 
__device__ char levenshtein_static (char *allStr, unsigned int start1, unsigned int start2, int len1, int len2)
 calculate Levenshtein distance of 2 strings in GPU where len1,len2<=18. More...
 
__device__ char levenshtein (char *allStr, unsigned int start1, unsigned int start2, int len1, int len2)
 calculate Levenshtein distance of 2 strings in GPU where len1>18 or len2>18. More...
 
__device__ char hamming (char *allStr, unsigned int start1, unsigned int start2, int len1, int len2)
 calculate Hamming distance of 2 strings in GPU. More...
 
__global__ void cal_distance (char *allStr, unsigned int *offsets, Int2 *index, SeqInfo *seqInfo, int *seqOffset, int distance, char measure, char *distanceOutput, char *flagOutput, int n, int seqLen)
 calculate distances of strings from given pairs and flag ones exceeding the threshold. More...
 
__global__ void cal_distance (char *allStr, unsigned int *offsets, Int2 *index, int distance, char measure, char *distanceOutput, char *flagOutput, int n, int seqLen)
 
__global__ void pair2rep (Int2 *pairs, Int2 *indexOut, size_t *freqOut, SeqInfo *seqInfo, int *inputOffsets, int *outputOffsets, int n)
 turning pairs and frequencies from sequence format to repertoire format. More...
 
__global__ void gen_assignment (int *matrix, int *output, int nBit, int nRow, int nColumn)
 expand operation part of solving bin packing for 2D buffer. More...
 
__global__ void gen_bounds (size_t *matrix, int *keyOut, int *valueOut, int nBit, int valueMax, int nRow, int nColumn)
 expand operation part of solving bin packing for lower bound. More...
 
__global__ void flag_lowerbound (int *valueInput, int *valueOffsets, char *output, int lowerbound, int n)
 flag data to be removed after the lower bound has been processed. More...
 
__global__ void make_row_index (int *output, int n, int nRepeat)
 utility to generate keys for matrix processing. More...
 
__global__ void toSizeT (int *input, size_t *output, int n)
 utility to cast types. More...
 
__global__ void init_overlap_output (SeqInfo *info, Int2 *indexOut, size_t *freqOut, int *inputOffsets, int *outputOffsets, int n)
 generate initial output of overlap mode. More...
 
__global__ void toInt3 (char *inputs, unsigned int *offsets, Int3 *output, int n)
 generate Int3 sequence representation from allStr More...
 
__global__ void toInt3 (char *inputs, unsigned int *offsets, SeqInfo *seqInfo, Int3 *output, int n)
 generate Int3 sequence representation from allStr, filtered by seqInfo More...
 

Variables

const size_t MAX = INT_MAX
 

Detailed Description

A collection of most GPU parallel primitives that is implemented as CUDA kernel (most map and expand operations).

Follows Facade design pattern.

Macro Definition Documentation

◆ MIN3

#define MIN3 (   a,
  b,
 
)    ((a) < (b) ? ((a) < (c) ? (a) : (c)) : ((b) < (c) ? (b) : (c)))

Function Documentation

◆ cal_combination_len()

__global__ void cal_combination_len ( Int3 input,
int  distance,
int *  output,
int  n 
)

precalculate the number of positions required in the output array of generate combination operation.

Parameters
inputsequences to generate combination
distanceLevenshtein threshold
outputposition output for each sequence
narray length of input and output

◆ cal_distance() [1/2]

__global__ void cal_distance ( char *  allStr,
unsigned int *  offsets,
Int2 index,
SeqInfo seqInfo,
int *  seqOffset,
int  distance,
char  measure,
char *  distanceOutput,
char *  flagOutput,
int  n,
int  seqLen 
)

calculate distances of strings from given pairs and flag ones exceeding the threshold.

Parameters
allStrcontainer of all sequences
offsetsstart/end position of each sequence
indexpairs of sequence to calculate
seqInfoinfo to lookup originalIndex
seqOffsetoffset to lookup seqInfo
distanceLevenshtein/Hamming distance threshold
measureenum representing Levenshtein/Hamming
distanceOutputoutput distance, maybe null
flagOutputarray output flag
narray length of index
seqLenarray length of seq

◆ cal_distance() [2/2]

__global__ void cal_distance ( char *  allStr,
unsigned int *  offsets,
Int2 index,
int  distance,
char  measure,
char *  distanceOutput,
char *  flagOutput,
int  n,
int  seqLen 
)

◆ cal_pair_len()

__global__ void cal_pair_len ( int *  inputRange,
int *  outputRange,
int  n 
)

precalculate the output range of generate pair operation.

Parameters
inputRangerange of each input group
outputRangerange of each output group
narray length of inputRange and outputRange

◆ cal_pair_len_diag()

__global__ void cal_pair_len_diag ( int *  inputRange,
int *  outputRange,
int  n 
)

precalculate the output range of pair generation in diagonal position for overlap mode.

Parameters
inputRangerange of each input group
outputRangerange of each output group
narray length of inputRange and outputRange

◆ cal_pair_len_lowerbound()

__global__ void cal_pair_len_lowerbound ( int *  indexes,
int *  inputOffsets,
int *  outputLengths,
int  lowerbound,
int  n 
)

precalculate the number of positions required in the output array of generate pair operation with lower bound constratint.

Parameters
indexesvalue of seqIndexes to generate pair
inputOffsetsgroup offsets
outputLengthsoutput position requirement
lowerboundthe processing limit for the indexes
narray length

◆ cal_pair_len_nondiag()

__global__ void cal_pair_len_nondiag ( Int2 pairs,
int *  seqOffset,
int *  outputRange,
int  n 
)

precalculate the output range of pair generation in diagonal position for overlap mode.

Parameters
pairspairs of sequences
seqOffsetrange of each input group
outputRangerange of each output group
narray length of inputRange and outputRange

◆ flag_lowerbound()

__global__ void flag_lowerbound ( int *  valueInput,
int *  valueOffsets,
char *  output,
int  lowerbound,
int  n 
)

flag data to be removed after the lower bound has been processed.

This includes both useless group and processed rows.

Parameters
valueInputseqIndex input
valueOffsetsgroup offset
outputflag output
lowerboundthe lowerbound used
narray length of valueOffsets

◆ gen_assignment()

__global__ void gen_assignment ( int *  matrix,
int *  output,
int  nBit,
int  nRow,
int  nColumn 
)

expand operation part of solving bin packing for 2D buffer.

Parameters
matrixstatistics of all chunks where each row record the histogram count of each chunk and nRow=nChunk
outputassignment of each chunk to the bins
nBitbin capacity expressed in log2 form
nRownumber of rows of the matrix
nColumnnumber of columns of the matrix

◆ gen_bounds()

__global__ void gen_bounds ( size_t *  matrix,
int *  keyOut,
int *  valueOut,
int  nBit,
int  valueMax,
int  nRow,
int  nColumn 
)

expand operation part of solving bin packing for lower bound.

Parameters
matrixstatistics of all chunks where each row record the histogram count of each chunk and nRow=nChunk
keyOutthe regrouping of each bin
valueOutthe upper bound of each grouped bin
nBitbin capacity expressed in log2 form
valueMaxlast sequence index
nRownumber of rows of the matrix
nColumnnumber of columns of the matrix

◆ generate_pairs()

__global__ void generate_pairs ( int *  indexes,
Int2 outputs,
int *  inputOffsets,
int *  outputOffsets,
int *  lesserIndex,
int  lowerbound,
int  carry,
int  n 
)

combinatorially generate pairs of indexes within the same group.

Parameters
indexesvalue of seqIndexes to generate pair
outputspairs output
inputOffsetsprecalculated group offsets
outputOffsetsprecalculated output position requirement
lesserIndexby partial output for histogram
lowerboundthe processing limit for the indexes
carrylatest offset from previous chunk in the stream
narray length of inputOffsets and outputOffsets

◆ generate_smaller_index()

__global__ void generate_smaller_index ( int *  indexes,
int *  outputs,
int *  inputOffsets,
int *  outputOffsets,
int  carry,
int  n 
)

combinatorially generate pairs of indexes within the same group but record only the partial output.

Parameters
indexesvalue of seqIndexes to generate pair
outputssmaller index output
inputOffsetsprecalculated group offsets
outputOffsetsprecalculated output position requirement
carrylatest offset from previous chunk in the stream
narray length of inputOffsets and outputOffsets

◆ hamming()

__device__ char hamming ( char *  allStr,
unsigned int  start1,
unsigned int  start2,
int  len1,
int  len2 
)

calculate Hamming distance of 2 strings in GPU.

Parameters
allStrdatabase of all sequences
start1start index of string one
start2start index of string two
len1length of string one
len2length of string two

◆ init_overlap_output()

__global__ void init_overlap_output ( SeqInfo info,
Int2 indexOut,
size_t *  freqOut,
int *  inputOffsets,
int *  outputOffsets,
int  n 
)

generate initial output of overlap mode.

Parameters
infoinformation of each sequence
indexOutrepertoire pair output
freqOutfrequency output for the pair
inputOffsetsindex range of input to operate on
outputOffsetsindex range of output to operate on
nlength of inputOffset and outputOffset

◆ levenshtein()

__device__ char levenshtein ( char *  allStr,
unsigned int  start1,
unsigned int  start2,
int  len1,
int  len2 
)

calculate Levenshtein distance of 2 strings in GPU where len1>18 or len2>18.

Parameters
allStrdatabase of all sequences
start1start index of string one
start2start index of string two
len1length of string one
len2length of string two

◆ levenshtein_static()

__device__ char levenshtein_static ( char *  allStr,
unsigned int  start1,
unsigned int  start2,
int  len1,
int  len2 
)

calculate Levenshtein distance of 2 strings in GPU where len1,len2<=18.

Parameters
allStrdatabase of all sequences
start1start index of string one
start2start index of string two
len1length of string one
len2length of string two

◆ make_row_index()

__global__ void make_row_index ( int *  output,
int  n,
int  nRepeat 
)

utility to generate keys for matrix processing.

Parameters
outputkey output with range 0 to n-1 each repeating nRepeat time
nnumber of rows
nRepeatnumber of columns

◆ pair2rep()

__global__ void pair2rep ( Int2 pairs,
Int2 indexOut,
size_t *  freqOut,
SeqInfo seqInfo,
int *  inputOffsets,
int *  outputOffsets,
int  n 
)

turning pairs and frequencies from sequence format to repertoire format.

Parameters
pairspair result from nearest neighbor search
indexOutrepertiore pair output
freqOutfrequency output
seqInfoinformation of each CDR3 sequence
inputOffsetsseqInfo Offset
outputOffsetsoutput range of indexOut and freqOut
nnumber of pairs

◆ toInt3() [1/2]

__global__ void toInt3 ( char *  inputs,
unsigned int *  offsets,
Int3 output,
int  n 
)

generate Int3 sequence representation from allStr

Parameters
inputscontainer of all sequences
offsetsstart/end position of each sequence
outputarray of Int3 result with the length of n
nlength of offsets

◆ toInt3() [2/2]

__global__ void toInt3 ( char *  inputs,
unsigned int *  offsets,
SeqInfo seqInfo,
Int3 output,
int  n 
)

generate Int3 sequence representation from allStr, filtered by seqInfo

Parameters
inputscontainer of all sequences
offsetsstart/end position of each sequence
seqInfoselection of strings to generate output, denoted by originalIndex
outputarray of Int3 result with the length of n
nlength of seqInfo

◆ toSizeT()

__global__ void toSizeT ( int *  input,
size_t *  output,
int  n 
)

utility to cast types.

Parameters
inputinput array
outputoutput array
nnumber of rows

◆ transfer_last_element()

template<typename T >
T transfer_last_element ( T *  deviceArr,
int  n 
)

transfer last element of the GPU array to main memory.

Parameters
deviceArrthe GPU array
narray length

Variable Documentation

◆ MAX

const size_t MAX = INT_MAX