XT-neighbor
Functions | Variables
airr.cu File Reference

A collection of patching functions that add airr-format compatibility to cli.cu. More...

#include <stdio.h>
#include <string.h>
#include "codec.cu"

Functions

int split_offset (char *str, int *&offsetOutput, int nColumn)
 private function More...
 
int find_idx (char *haystack, const char *needle, int *offsets, int offsetLen)
 private function More...
 
int find_header (char *line, int &cdrIndexOut, int &freqIndexOut, bool doubleCol)
 private function More...
 
int extract_data (char *line, int cdrIndex, int freqIndex, int nColumn, int lineNumber, SeqArray *seqOut, int &freqOut, bool doubleCol)
 private function More...
 
int parse_airr_input (char *path, SeqArray *seqOut, SeqInfo *freqOut, int len, bool doubleCol)
 read and parse input airr file to SeqArray and maybe SeqInfo More...
 

Variables

const int DEFAULT_MAX_COLUMN = 300
 maximum number of tsv columns More...
 
const int DEFAULT_LINE_SIZE = 5000
 maximum number of characters in a line More...
 
const char DELIMITER = '\t'
 tsv delimitter More...
 
const char CDR_FIELD [8] = "cdr3_aa"
 field header for sequence data More...
 
const char FREQ_FIELD [16] = "duplicate_count"
 field header for sequence data More...
 

Detailed Description

A collection of patching functions that add airr-format compatibility to cli.cu.

Function Documentation

◆ extract_data()

int extract_data ( char *  line,
int  cdrIndex,
int  freqIndex,
int  nColumn,
int  lineNumber,
SeqArray seqOut,
int &  freqOut,
bool  doubleCol 
)

private function

◆ find_header()

int find_header ( char *  line,
int &  cdrIndexOut,
int &  freqIndexOut,
bool  doubleCol 
)

private function

◆ find_idx()

int find_idx ( char *  haystack,
const char *  needle,
int *  offsets,
int  offsetLen 
)

private function

◆ parse_airr_input()

int parse_airr_input ( char *  path,
SeqArray seqOut,
SeqInfo freqOut,
int  len,
bool  doubleCol 
)

read and parse input airr file to SeqArray and maybe SeqInfo

Parameters
pathfile path to read
seqOutoutput sequences
freqOutoutput frequency, if doubleCol is set to true
lenexpected length
doubleColif true, read freqOut
Returns
execution result

◆ split_offset()

int split_offset ( char *  str,
int *&  offsetOutput,
int  nColumn 
)

private function

Variable Documentation

◆ CDR_FIELD

const char CDR_FIELD[8] = "cdr3_aa"

field header for sequence data

◆ DEFAULT_LINE_SIZE

const int DEFAULT_LINE_SIZE = 5000

maximum number of characters in a line

◆ DEFAULT_MAX_COLUMN

const int DEFAULT_MAX_COLUMN = 300

maximum number of tsv columns

◆ DELIMITER

const char DELIMITER = '\t'

tsv delimitter

◆ FREQ_FIELD

const char FREQ_FIELD[16] = "duplicate_count"

field header for sequence data