| 
    Sample SNPs
    
   Fast ordered sampling of rows from large text or binary files. Special cases for DNA variant files (.bed, VCF, HapMap, etc). 
   | 
 
 
 
 
Go to the documentation of this file.
   43 #include <unordered_map> 
   52 using std::unordered_map;
 
   53 using std::numeric_limits;
 
   84     const double EPS = numeric_limits<double>::epsilon();
 
   86     const double PI = 3.14159265358979323846264338328;
 
  160         virtual void close();
 
  189         GbinFileI(
const string &fileName, 
const size_t &nCols, 
const size_t &elemSize) : 
GbinFile(fileName, nCols, elemSize) {};
 
  237         GbinFileO(
const string &fileName, 
const size_t &nCols, 
const size_t &elemSize) : 
GbinFile(fileName, nCols, elemSize) {};
 
  277         static const unordered_map<char, string> 
_tests;
 
  287         BedFile(
const string &stubName);
 
  353         void _ld(
const char *snp1, 
const char *snp2, 
const size_t &N, 
const unsigned short &pad, 
double &rSq, 
double &Dprime, 
double &dcnt1, 
double &dcnt2);
 
  369         void _ld(
const char *snp1, 
const char *snp2, 
const PopIndex &popID, vector<double> &rSq, vector<double> &Dprime, vector<double> &dcnt1, vector<double> &dcnt2);
 
  501         virtual void close();
 
  568         void sample(
const uint64_t &n, 
const bool &headSkip, 
const char &delim, vector<string> &out);
 
  911         virtual void close();
 
  
GbinFile & operator=(const GbinFile &in)=default
Copy assignment.
 
GtxtFile & operator=(GtxtFile &&in)=default
Move assignment.
 
BedFile(BedFile &&in)=default
Move constructor.
 
virtual void open()=0
Open stream.
 
HmpFileI & operator=(const HmpFileI &in)=default
Copy assignment.
 
TpedFileO(const string &stubName)
File name constructor.
Definition: varfiles.hpp:744
 
fstream _varFile
Variant file stream.
Definition: varfiles.hpp:95
 
~VcfFileI()
Destructor.
Definition: varfiles.hpp:827
 
HmpFile(const string &fileName)
Constructor with file name.
Definition: varfiles.hpp:895
 
void open()
Open stream to write.
Definition: varfiles.cpp:2979
 
GbinFile()
Default constructor.
Definition: varfiles.hpp:134
 
void sample(TpedFileO &out, const uint64_t &n)
Sample SNPs and save to BED file.
Definition: varfiles.cpp:2353
 
Generic binary file output class.
Definition: varfiles.hpp:223
 
VcfFileI()
Default constructor.
Definition: varfiles.hpp:812
 
~VcfFileO()
Destructor.
Definition: varfiles.hpp:872
 
VarFile()
Default constructor (protected)
Definition: varfiles.hpp:98
 
VcfFileI & operator=(VcfFileI &&in)=default
Move assignment.
 
VcfFileI(VcfFileI &&in)=default
Move constructor.
 
void open()
Open stream to write.
Definition: varfiles.cpp:1748
 
TpedFile & operator=(TpedFile &&in)=default
Move assignment.
 
GtxtFileO(const GtxtFileO &in)=default
Copy constructor.
 
void sample(HmpFileO &out, const uint64_t &n)
Sample SNPs and save to HMP file.
Definition: varfiles.cpp:2872
 
BedFileI & operator=(BedFileI &&in)=default
Move assignment.
 
GtxtFileI(const string &fileName, const bool &head)
File name constructor with header specification.
Definition: varfiles.hpp:533
 
~GbinFileO()
Destructor.
Definition: varfiles.hpp:247
 
~HmpFile()
Destructor.
Definition: varfiles.hpp:906
 
TpedFileO(const TpedFileO &in)=default
Copy constructor.
 
virtual void close()
Close stream.
Definition: varfiles.cpp:90
 
virtual void open()
Open stream (does nothing)
Definition: varfiles.hpp:499
 
Base variant file class.
Definition: varfiles.hpp:92
 
virtual uint64_t _numLines()
Get number of rows in the binary file.
Definition: varfiles.cpp:64
 
BedFileO(BedFileO &&in)=default
Move constructor.
 
TpedFileO & operator=(const TpedFileO &in)=default
Copy assignment.
 
~TpedFileO()
Destructor.
Definition: varfiles.hpp:754
 
TpedFile(TpedFile &&in)=default
Move constructor.
 
~HmpFileO()
Destructor.
Definition: varfiles.hpp:992
 
void _ld(const char *snp1, const char *snp2, const size_t &N, const unsigned short &pad, double &rSq, double &Dprime, double &dcnt1, double &dcnt2)
Between-SNP linkage disequilibrium (LD)
Definition: varfiles.cpp:435
 
~BedFile()
Destructor.
Definition: varfiles.cpp:288
 
GtxtFile(const GtxtFile &in)=default
Copy constructor.
 
Generic binary file base class.
Definition: varfiles.hpp:123
 
GbinFileO & operator=(const GbinFileO &in)=default
Copy assignment.
 
HmpFile()
Default constructor.
Definition: varfiles.hpp:889
 
uint64_t _numLines()
Get number of rows in the text file.
Definition: varfiles.cpp:2274
 
void open()
Open stream to write.
Definition: varfiles.cpp:2106
 
TpedFileI(TpedFileI &&in)=default
Move constructor.
 
BedFileO & operator=(BedFileO &&in)=default
Move assignment.
 
VCF file output class.
Definition: varfiles.hpp:851
 
virtual void open()
Open stream (does nothing)
Definition: varfiles.hpp:300
 
void open()
Open stream to read.
Definition: varfiles.cpp:97
 
BedFileI(const BedFileI &in)=default
Copy constructor.
 
HmpFileO()
Default constructor.
Definition: varfiles.hpp:977
 
GbinFileI & operator=(GbinFileI &&in)=default
Move assignment.
 
HmpFileO & operator=(const HmpFileO &in)=default
Copy assignment.
 
GbinFileO(const string &fileName, const size_t &nCols, const size_t &elemSize)
File name constructor.
Definition: varfiles.hpp:237
 
BED file base class.
Definition: varfiles.hpp:257
 
Hapmap (HMP) file base class.
Definition: varfiles.hpp:884
 
uint64_t nlines()
Number of SNPs in the object.
Definition: varfiles.hpp:570
 
TpedFileO(TpedFileO &&in)=default
Move constructor.
 
TpedFileI & operator=(const TpedFileI &in)=default
Copy assignment.
 
~GbinFile()
Destructor.
Definition: varfiles.hpp:155
 
static const vector< char > _masks
Genotype bit masks.
Definition: varfiles.hpp:271
 
static const unordered_map< char, string > _tests
Genotype bit tests.
Definition: varfiles.hpp:277
 
VarFile(const VarFile &in)=default
Copy constructor.
 
uint64_t nindiv()
Number of individuals in the object.
Definition: varfiles.hpp:724
 
BedFile()
Default constructor.
Definition: varfiles.cpp:269
 
TpedFileI(const TpedFileI &in)=default
Copy constructor.
 
~TpedFileI()
Destructor.
Definition: varfiles.hpp:707
 
GbinFile & operator=(GbinFile &&in)=default
Move assignment.
 
BedFileO & operator=(const BedFileO &in)=default
Copy assignment.
 
HMP file output class.
Definition: varfiles.hpp:971
 
fstream _bimFile
Corresponding .bim file stream.
Definition: varfiles.hpp:263
 
virtual void open()
Open stream (does nothing)
Definition: varfiles.hpp:645
 
GbinFileI(const GbinFileI &in)=default
Copy constructor.
 
string _fileStub
File name stub (minus the extension)
Definition: varfiles.hpp:265
 
BedFileI & operator=(const BedFileI &in)=default
Copy assignment.
 
TPED file base class.
Definition: varfiles.hpp:618
 
TpedFile(const string &stubName)
File name constructor.
Definition: varfiles.hpp:632
 
TPED file input class.
Definition: varfiles.hpp:656
 
Connect lines with populations.
 
GtxtFileI(GtxtFileI &&in)=default
Move constructor.
 
GbinFileO & operator=(GbinFileO &&in)=default
Move assignment.
 
virtual void open()
Open stream (does nothing)
Definition: varfiles.hpp:909
 
TpedFile()
Default constructor.
Definition: varfiles.hpp:627
 
virtual void close()
Close stream.
Definition: varfiles.cpp:1803
 
GtxtFileO(const string &fileName, const bool &head)
File name constructor with header specification.
Definition: varfiles.hpp:596
 
BedFileO(const string &stubName)
File name constructor.
Definition: varfiles.hpp:442
 
void _famCopy(fstream &fam)
Copy the .tfam file.
Definition: varfiles.cpp:2234
 
uint64_t nlines()
Number of rows in the object.
Definition: varfiles.hpp:214
 
GtxtFileI(const string &fileName)
File name constructor with header specification.
Definition: varfiles.hpp:527
 
VcfFileO(VcfFileO &&in)=default
Move constructor.
 
uint64_t _numLines()
Get number of SNPs in the VCF file.
Definition: varfiles.cpp:2560
 
virtual void close()=0
Close stream.
 
~VarFile()
Destructor.
Definition: varfiles.hpp:110
 
HmpFileO(HmpFileO &&in)=default
Move constructor.
 
HmpFile & operator=(HmpFile &&in)=default
Move assignment.
 
VarFile & operator=(const VarFile &in)=default
Copy assignment.
 
Generic text file base class.
Definition: varfiles.hpp:463
 
HmpFile(HmpFile &&in)=default
Move constructor.
 
~GtxtFileI()
Destructor.
Definition: varfiles.hpp:543
 
HmpFileO & operator=(HmpFileO &&in)=default
Move assignment.
 
HmpFileI & operator=(HmpFileI &&in)=default
Move assignment.
 
GbinFile(GbinFile &&in)=default
Move constructor.
 
GbinFileO()
Default constructor.
Definition: varfiles.hpp:229
 
VcfFileO & operator=(VcfFileO &&in)=default
Move assignment.
 
VcfFile(const string &fileName)
Constructor with file name.
Definition: varfiles.hpp:776
 
uint64_t _famLines()
Get number of lines in the _tfamFile
Definition: varfiles.cpp:2138
 
void sample(GtxtFileO &out, const uint64_t &n, const bool &headSkip)
Sample rows and save to a text file.
Definition: varfiles.cpp:1876
 
uint64_t nindiv()
Number of individuals in the object.
Definition: varfiles.hpp:422
 
BedFileI(const string &stubName)
File name constructor.
Definition: varfiles.hpp:377
 
GtxtFileO(GtxtFileO &&in)=default
Move constructor.
 
void open()
Open stream to read.
Definition: varfiles.cpp:1809
 
string _fileStub
File name stub (minus the extension)
Definition: varfiles.hpp:623
 
TpedFile(const TpedFile &in)=default
Copy constructor.
 
void open()
Open stream to read.
Definition: varfiles.cpp:2320
 
VCF file input class.
Definition: varfiles.hpp:800
 
uint64_t nsnp()
Number of SNPs in the object.
Definition: varfiles.hpp:722
 
VcfFile & operator=(const VcfFile &in)=default
Copy assignment.
 
VCF file base class.
Definition: varfiles.hpp:765
 
GbinFileI(const string &fileName, const size_t &nCols, const size_t &elemSize)
File name constructor.
Definition: varfiles.hpp:189
 
HmpFileO(const string &fileName)
File name constructor.
Definition: varfiles.hpp:982
 
virtual void open()
Open stream (does nothing)
Definition: varfiles.hpp:158
 
void sample(BedFileO &out, const uint64_t &n)
Sample SNPs and save to BED file.
Definition: varfiles.cpp:1026
 
string _fileName
File name.
Definition: varfiles.hpp:466
 
uint64_t nsnp()
Number of SNPs in the object.
Definition: varfiles.hpp:962
 
void open()
Open stream to read.
Definition: varfiles.cpp:967
 
Generic text file output class.
Definition: varfiles.hpp:579
 
BedFileI(BedFileI &&in)=default
Move constructor.
 
void close()
Close stream.
Definition: varfiles.cpp:2129
 
GtxtFileO()
Default constructor.
Definition: varfiles.hpp:585
 
GtxtFileO(const string &fileName)
File name constructor.
Definition: varfiles.hpp:590
 
HMP file input class.
Definition: varfiles.hpp:920
 
HmpFileI(const HmpFileI &in)=default
Copy constructor.
 
BedFile & operator=(const BedFile &in)=default
Copy assignment.
 
GbinFile(const string &fileName, const size_t &nCols, const size_t &elemSize)
Constructor with file name.
Definition: varfiles.hpp:144
 
string _fileName
File name.
Definition: varfiles.hpp:126
 
void open()
Open stream to write.
Definition: varfiles.cpp:2520
 
VcfFileI & operator=(const VcfFileI &in)=default
Copy assignment.
 
GtxtFile(GtxtFile &&in)=default
Move constructor.
 
BedFile(const BedFile &in)=default
Copy constructor.
 
virtual void close()
Close stream.
Definition: varfiles.cpp:2771
 
BED file output class.
Definition: varfiles.hpp:431
 
fstream _famFile
Corresponding .fam file stream.
Definition: varfiles.hpp:261
 
VcfFile(const VcfFile &in)=default
Copy constructor.
 
GbinFileI()
Default constructor.
Definition: varfiles.hpp:181
 
HmpFile & operator=(const HmpFile &in)=default
Copy assignment.
 
size_t _elemSize
Size of each element in bytes.
Definition: varfiles.hpp:130
 
~BedFileO()
Destructor.
Definition: varfiles.hpp:452
 
~GtxtFile()
Destructor.
Definition: varfiles.hpp:496
 
Binary file input class.
Definition: varfiles.hpp:169
 
VcfFile & operator=(VcfFile &&in)=default
Move assignment.
 
~BedFileI()
Destructor.
Definition: varfiles.hpp:387
 
uint64_t _numLines()
Get number of lines in the _bimFile
Definition: varfiles.cpp:309
 
GtxtFile(const string &fileName, const bool &head)
Constructor with file name and header indicator.
Definition: varfiles.hpp:485
 
BedFile & operator=(BedFile &&in)=default
Move assignment.
 
VarFile & operator=(VarFile &&in)=default
Move assignment.
 
GbinFileO(const GbinFileO &in)=default
Copy constructor.
 
VcfFile(VcfFile &&in)=default
Move constructor.
 
void open()
Open stream to write.
Definition: varfiles.cpp:2749
 
HmpFileI()
Default constructor.
Definition: varfiles.hpp:932
 
GtxtFileO & operator=(GtxtFileO &&in)=default
Move assignment.
 
HmpFile(const HmpFile &in)=default
Copy constructor.
 
BedFileI()
Default constructor.
Definition: varfiles.hpp:372
 
BedFileO(const BedFileO &in)=default
Copy constructor.
 
uint64_t _numLines()
Get number of SNPs in the HMP file.
Definition: varfiles.cpp:2822
 
fstream _tfamFile
Corresponding .tfam file stream.
Definition: varfiles.hpp:621
 
TpedFile & operator=(const TpedFile &in)=default
Copy assignment.
 
~HmpFileI()
Destructor.
Definition: varfiles.hpp:947
 
uint64_t nsnp()
Number of SNPs in the object.
Definition: varfiles.hpp:420
 
~VcfFile()
Destructor.
Definition: varfiles.hpp:787
 
Population index.
Definition: populations.hpp:44
 
VarFile(VarFile &&in)=default
Move constructor.
 
TpedFileO & operator=(TpedFileO &&in)=default
Move assignment.
 
~GtxtFileO()
Destructor.
Definition: varfiles.hpp:606
 
GtxtFileI & operator=(GtxtFileI &&in)=default
Move assignment.
 
GbinFileO(GbinFileO &&in)=default
Move constructor.
 
VcfFileO()
Default constructor.
Definition: varfiles.hpp:857
 
static const size_t BUF_SIZE
Buffer size.
Definition: varfiles.hpp:82
 
size_t _nCols
Number of elements in a row.
Definition: varfiles.hpp:128
 
VcfFileI(const string &fileName)
File name constructor.
Definition: varfiles.hpp:817
 
VcfFileO & operator=(const VcfFileO &in)=default
Copy assignment.
 
uint64_t _famLines()
Get number of lines in the _famFile
Definition: varfiles.cpp:340
 
GtxtFileO & operator=(const GtxtFileO &in)=default
Copy assignment.
 
void close()
Close stream.
Definition: varfiles.cpp:2554
 
~GbinFileI()
Destructor.
Definition: varfiles.hpp:199
 
TpedFileO()
Default constructor.
Definition: varfiles.hpp:739
 
BedFileO()
Default constructor.
Definition: varfiles.hpp:437
 
void sample(GbinFileO &out, const uint64_t &n)
Sample rows and save to a binary file.
Definition: varfiles.cpp:115
 
void sampleLD(const uint64_t &n)
Linkage disequilibrium among sampled sites.
Definition: varfiles.cpp:1282
 
const double EPS
Machine .
Definition: varfiles.hpp:84
 
GtxtFile()
Default constructor.
Definition: varfiles.hpp:472
 
GtxtFileI & operator=(const GtxtFileI &in)=default
Copy assignment.
 
const double PI
pi
Definition: varfiles.hpp:86
 
VcfFileO(const string &fileName)
File name constructor.
Definition: varfiles.hpp:862
 
GtxtFileI(const GtxtFileI &in)=default
Copy constructor.
 
HmpFileI(HmpFileI &&in)=default
Move constructor.
 
bool _head
Is there a header?
Definition: varfiles.hpp:468
 
VcfFile()
Default constructor.
Definition: varfiles.hpp:770
 
void open()
Open stream to write.
Definition: varfiles.cpp:249
 
void open()
Open stream (does nothing)
Definition: varfiles.hpp:790
 
TpedFileI(const string &stubName)
File name constructor.
Definition: varfiles.hpp:697
 
TpedFileI & operator=(TpedFileI &&in)=default
Move assignment.
 
uint64_t nsnp()
Number of SNPs in the object.
Definition: varfiles.hpp:842
 
void close()
Close stream.
Definition: varfiles.cpp:297
 
void sample(VcfFileO &out, const uint64_t &n)
Sample SNPs and save to VCF file.
Definition: varfiles.cpp:2634
 
void open()
Open stream to read.
Definition: varfiles.cpp:2803
 
GbinFileI & operator=(const GbinFileI &in)=default
Copy assignment.
 
GbinFile(const GbinFile &in)=default
Copy constructor.
 
~TpedFile()
Destructor.
Definition: varfiles.cpp:2123
 
HmpFileO(const HmpFileO &in)=default
Copy constructor.
 
void open()
Open stream to read.
Definition: varfiles.cpp:2614
 
virtual uint64_t _numLines()
Get number of rows in the text file.
Definition: varfiles.cpp:1829
 
BED file input class.
Definition: varfiles.hpp:311
 
Text file input class.
Definition: varfiles.hpp:510
 
GtxtFile(const string &fileName)
Constructor with file name.
Definition: varfiles.hpp:478
 
VcfFileI(const VcfFileI &in)=default
Copy constructor.
 
VcfFileO(const VcfFileO &in)=default
Copy constructor.
 
GtxtFile & operator=(const GtxtFile &in)=default
Copy assignment.
 
GbinFileI(GbinFileI &&in)=default
Move constructor.
 
TpedFileI()
Default constructor.
Definition: varfiles.hpp:692
 
GtxtFileI()
Default constructor.
Definition: varfiles.hpp:522
 
TPED file output class.
Definition: varfiles.hpp:733