analyzeAlignments
Estimating sequence diversity from sequence alignments
Loading...
Searching...
No Matches
fastaParser.hpp
Go to the documentation of this file.
1/*
2 * Copyright (c) 2023 Anthony J. Greenberg
3 *
4 * Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
5 *
6 * 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
7 *
8 * 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
9 *
10 * 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
11 *
12 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
13 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
14 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
15 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
16 * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
17 * THE POSSIBILITY OF SUCH DAMAGE.
18 */
19
21
30#pragma once
31
32#include <vector>
33#include <unordered_map>
34#include <utility> // for std::pair
35#include <string>
36#include <iterator>
37#include <cstdint>
38
39namespace BayesicSpace {
40 struct AlignmentStatistics;
41 class ParseFASTA;
42
50 size_t queryStart;
52 };
59 class ParseFASTA {
60 public:
62 ParseFASTA() = default;
69 ParseFASTA(const std::string &fastaFileName);
74 ParseFASTA(const ParseFASTA &toCopy);
79 ParseFASTA(ParseFASTA &&toMove) noexcept;
84 ParseFASTA& operator=(const ParseFASTA &toCopy);
89 ParseFASTA& operator=(ParseFASTA &&toMove) noexcept;
91 ~ParseFASTA() = default;
96 size_t sequenceNumber() const noexcept {return fastaAlignment_.size(); };
101 size_t alignmentLength() const {return fastaAlignment_.at(0).second.size(); };
109 std::string extractConsensusWindow(const size_t &startIdx, const size_t &windowLength) const;
119 std::vector< std::pair< size_t, std::vector<uint32_t> > > diversityInWindows(const size_t &windowSize, const size_t &stepSize) const;
129 std::unordered_map<std::string, uint32_t> extractWindow(const size_t &windowStartPosition, const size_t &windowSize) const;
140 std::vector< std::pair<std::string, uint32_t> > extractWindowSorted(const size_t &windowStartPosition, const size_t &windowSize) const;
149 AlignmentStatistics extractSequence(const std::string &querySequence) const;
154 void imputeMissing();
155 private:
161 std::vector< std::pair<std::string, std::string> > fastaAlignment_;
163 std::string consensus_;
169 void makeConsensus_();
170 };
171}
FASTA alignment parser.
Definition fastaParser.hpp:59
ParseFASTA & operator=(const ParseFASTA &toCopy)
Copy assignment operator.
Definition fastaParser.cpp:108
std::unordered_map< std::string, uint32_t > extractWindow(const size_t &windowStartPosition, const size_t &windowSize) const
Extract an alignment window.
Definition fastaParser.cpp:156
std::string extractConsensusWindow(const size_t &startIdx, const size_t &windowLength) const
Extract a consensus region.
Definition fastaParser.cpp:124
std::vector< std::pair< size_t, std::vector< uint32_t > > > diversityInWindows(const size_t &windowSize, const size_t &stepSize) const
Sequence diversity in windows.
Definition fastaParser.cpp:135
void imputeMissing()
Impute missing values.
Definition fastaParser.cpp:216
ParseFASTA()=default
Default constructor.
~ParseFASTA()=default
Destructor.
std::vector< std::pair< std::string, uint32_t > > extractWindowSorted(const size_t &windowStartPosition, const size_t &windowSize) const
Extract an alignment window and sort.
Definition fastaParser.cpp:168
AlignmentStatistics extractSequence(const std::string &querySequence) const
Extract a region matching a sequence.
Definition fastaParser.cpp:183
size_t alignmentLength() const
Alignment length.
Definition fastaParser.hpp:101
size_t sequenceNumber() const noexcept
Number of sequences in alignment.
Definition fastaParser.hpp:96
Definition extraFunctions.hpp:41
Collection of alignment statistics.
Definition fastaParser.hpp:47
size_t referenceStart
Definition fastaParser.hpp:48
size_t queryStart
Definition fastaParser.hpp:50
size_t referenceLength
Definition fastaParser.hpp:49
size_t queryLength
Definition fastaParser.hpp:51