|
Needle
An application for fast and efficient searches of NGS data.
|
#include <filesystem>#include <iostream>#include <math.h>#include <numeric>#include <string>#include <seqan3/alphabet/container/concatenated_sequences.hpp>#include <seqan3/alphabet/nucleotide/dna4.hpp>#include "shared.hpp"Go to the source code of this file.
Classes | |
| struct | minimiser_arguments |
| struct | RandomGenerator |
| Generates a random integer not greater than a given maximum. More... | |
Functions | |
| void | count (min_arguments const &args, std::vector< std::filesystem::path > sequence_files, std::filesystem::path include_file, std::filesystem::path genome_file, bool paired) |
| Get the concrete expression values (= median of all counts of one transcript) for given experiments. This function can be used to estimate how good the median approach can be, if all count values are available. | |
| void | count_genome (min_arguments const &args, std::filesystem::path include_file, std::filesystem::path exclude_file) |
| Creates a set of minimizers to ignore, which should be used as an input to count. | |
| void | read_binary (std::filesystem::path filename, robin_hood::unordered_node_map< uint64_t, uint16_t > &hash_table) |
| Reads a binary file that needle minimiser creates. | |
| void | read_binary_start (min_arguments &args, std::filesystem::path filename, uint64_t &num_of_minimisers, uint8_t &cutoff) |
| Reads the beginning of a binary file that needle minimiser creates. | |
| std::vector< uint16_t > | ibf (std::vector< std::filesystem::path > const &sequence_files, estimate_ibf_arguments &ibf_args, minimiser_arguments &minimiser_args, std::vector< double > &fpr, std::vector< uint8_t > &cutoffs, std::filesystem::path const expression_by_genome_file="", size_t num_hash=1) |
| Creates IBFs. | |
| std::vector< uint16_t > | ibf (std::vector< std::filesystem::path > const &minimiser_files, estimate_ibf_arguments &ibf_args, std::vector< double > &fpr, std::filesystem::path const expression_by_genome_file="", size_t num_hash=1) |
| Creates IBFs based on the minimiser files. | |
| void | minimiser (std::vector< std::filesystem::path > const &sequence_files, min_arguments const &args, minimiser_arguments &minimiser_args, std::vector< uint8_t > &cutoffs) |
| Create minimiser and header files. | |
| std::vector< uint16_t > | insert (std::vector< std::filesystem::path > const &sequence_files, estimate_ibf_arguments &ibf_args, minimiser_arguments &minimiser_args, std::vector< uint8_t > &cutoffs, std::filesystem::path const expression_by_genome_file, std::filesystem::path path_in, bool samplewise) |
| Insert into IBFs. | |
| std::vector< uint16_t > | insert (std::vector< std::filesystem::path > const &minimiser_files, estimate_ibf_arguments &ibf_args, std::filesystem::path const expression_by_genome_file, std::filesystem::path path_in, bool samplewise) |
| Insert into IBFs based on the minimiser files. | |
| void | delete_bin (std::vector< uint64_t > const &delete_files, estimate_ibf_arguments &ibf_args, std::filesystem::path path_in, bool samplewise) |
| Delete bins from ibfs. | |
| void count | ( | min_arguments const & | args, |
| std::vector< std::filesystem::path > | sequence_files, | ||
| std::filesystem::path | include_file, | ||
| std::filesystem::path | genome_file, | ||
| bool | paired ) |
Get the concrete expression values (= median of all counts of one transcript) for given experiments. This function can be used to estimate how good the median approach can be, if all count values are available.
| args | The minimiser arguments to use (seed, shape, window size). |
| sequence_files | The sequence files, which contains the reads. |
| include_file | A file containing the transcripts which expression values should be determined. |
| genome_file | A "*.genome" file constructed with the command genome. |
| paired | Flag to indicate if input data is paired or not. |
| void count_genome | ( | min_arguments const & | args, |
| std::filesystem::path | include_file, | ||
| std::filesystem::path | exclude_file ) |
Creates a set of minimizers to ignore, which should be used as an input to count.
| args | The minimiser arguments to use (seed, shape, window size). |
| include_file | A file containing the transcripts which expression values should be determined. |
| exclude_file | A file containing minimizers which should be ignored. |
| void delete_bin | ( | std::vector< uint64_t > const & | delete_files, |
| estimate_ibf_arguments & | ibf_args, | ||
| std::filesystem::path | path_in, | ||
| bool | samplewise ) |
Delete bins from ibfs.
| delete_files | A vector of integers specifiying the bins to delete. |
| ibf_args | The IBF specific arguments to use (bin size, number of hash functions, ...). See struct ibf_arguments. |
| path_in | Input directory. |
| samplewise | True, if expression levels were set beforehand. |
| std::vector< uint16_t > ibf | ( | std::vector< std::filesystem::path > const & | minimiser_files, |
| estimate_ibf_arguments & | ibf_args, | ||
| std::vector< double > & | fpr, | ||
| std::filesystem::path const | expression_by_genome_file = "", | ||
| size_t | num_hash = 1 ) |
Creates IBFs based on the minimiser files.
| minimiser_files | A vector of minimiser file paths. |
| ibf_args | The IBF specific arguments to use (bin size, number of hash functions, ...). See struct ibf_arguments. |
| fpr | The average false positive rate that should be used. |
| expression_by_genome_file | File that contains the only minimisers that should be comnsidered for the determination of the expression_thresholds. |
| num_hash | The number of hash functions to use. |
| std::vector< uint16_t > ibf | ( | std::vector< std::filesystem::path > const & | sequence_files, |
| estimate_ibf_arguments & | ibf_args, | ||
| minimiser_arguments & | minimiser_args, | ||
| std::vector< double > & | fpr, | ||
| std::vector< uint8_t > & | cutoffs, | ||
| std::filesystem::path const | expression_by_genome_file = "", | ||
| size_t | num_hash = 1 ) |
Creates IBFs.
| sequence_files | A vector of sequence file paths. |
| ibf_args | The IBF specific arguments to use (bin size, number of hash functions, ...). See struct ibf_arguments. |
| minimiser_args | The minimiser specific arguments to use. |
| fpr | The average false positive rate that should be used. |
| cutoffs | List of cutoffs. |
| expression_by_genome_file | File that contains the only minimisers that should be considered for the determination of the expression thresholds. |
| num_hash | The number of hash functions to use. |
| std::vector< uint16_t > insert | ( | std::vector< std::filesystem::path > const & | minimiser_files, |
| estimate_ibf_arguments & | ibf_args, | ||
| std::filesystem::path const | expression_by_genome_file, | ||
| std::filesystem::path | path_in, | ||
| bool | samplewise ) |
Insert into IBFs based on the minimiser files.
| minimiser_files | A vector of minimiser file paths. |
| ibf_args | The IBF specific arguments to use (bin size, number of hash functions, ...). See struct ibf_arguments. |
| expression_by_genome_file | File that contains the only minimisers that should be comnsidered for the determination of the expression_thresholds. |
| path_in | Input directory. |
| samplewise | True, if expression levels were set beforehand. |
| std::vector< uint16_t > insert | ( | std::vector< std::filesystem::path > const & | sequence_files, |
| estimate_ibf_arguments & | ibf_args, | ||
| minimiser_arguments & | minimiser_args, | ||
| std::vector< uint8_t > & | cutoffs, | ||
| std::filesystem::path const | expression_by_genome_file, | ||
| std::filesystem::path | path_in, | ||
| bool | samplewise ) |
Insert into IBFs.
| sequence_files | A vector of sequence file paths. |
| ibf_args | The IBF specific arguments to use (bin size, number of hash functions, ...). See struct ibf_arguments. |
| minimiser_args | The minimiser specific arguments to use. |
| cutoffs | List of cutoffs. |
| expression_by_genome_file | File that contains the only minimisers that should be considered for the determination of the expression thresholds. |
| path_in | Input directory. |
| samplewise | True, if expression levels were set beforehand. |
| void minimiser | ( | std::vector< std::filesystem::path > const & | sequence_files, |
| min_arguments const & | args, | ||
| minimiser_arguments & | minimiser_args, | ||
| std::vector< uint8_t > & | cutoffs ) |
Create minimiser and header files.
| sequence_files | A vector of sequence file paths. |
| args | The minimiser arguments to use (seed, shape, window size). |
| minimiser_args | The minimiser specific arguments to use. |
| cutoffs | List of cutoffs. |
| void read_binary | ( | std::filesystem::path | filename, |
| robin_hood::unordered_node_map< uint64_t, uint16_t > & | hash_table ) |
Reads a binary file that needle minimiser creates.
| filename | The filename of the binary file. |
| hash_table | The hash table to store minimisers into. |
| void read_binary_start | ( | min_arguments & | args, |
| std::filesystem::path | filename, | ||
| uint64_t & | num_of_minimisers, | ||
| uint8_t & | cutoff ) |
Reads the beginning of a binary file that needle minimiser creates.
| args | Min arguments. |
| filename | The filename of the binary file. |
| num_of_minimisers | Variable, where to number of minimisers should be stored. |
| cutoff | cutoff value. |