SeqAn3
dna4.hpp
Go to the documentation of this file.
1 // ============================================================================
2 // SeqAn - The Library for Sequence Analysis
3 // ============================================================================
4 //
5 // Copyright (c) 2006-2018, Knut Reinert & Freie Universitaet Berlin
6 // Copyright (c) 2016-2018, Knut Reinert & MPI Molekulare Genetik
7 // All rights reserved.
8 //
9 // Redistribution and use in source and binary forms, with or without
10 // modification, are permitted provided that the following conditions are met:
11 //
12 // * Redistributions of source code must retain the above copyright
13 // notice, this list of conditions and the following disclaimer.
14 // * Redistributions in binary form must reproduce the above copyright
15 // notice, this list of conditions and the following disclaimer in the
16 // documentation and/or other materials provided with the distribution.
17 // * Neither the name of Knut Reinert or the FU Berlin nor the names of
18 // its contributors may be used to endorse or promote products derived
19 // from this software without specific prior written permission.
20 //
21 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 // ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE
25 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
27 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 // OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
31 // DAMAGE.
32 //
33 // ============================================================================
34 
40 #pragma once
41 
42 #include <vector>
43 
46 
47 // ------------------------------------------------------------------
48 // dna4
49 // ------------------------------------------------------------------
50 
51 namespace seqan3
52 {
53 
54 class rna4;
55 
73 class dna4 : public nucleotide_base<dna4, 4>
74 {
75 private:
78 
80  friend base_t;
82  friend base_t::base_t;
85  friend rna4;
86 
87 public:
91  constexpr dna4() noexcept : base_t{} {}
92  constexpr dna4(dna4 const &) = default;
93  constexpr dna4(dna4 &&) = default;
94  constexpr dna4 & operator=(dna4 const &) = default;
95  constexpr dna4 & operator=(dna4 &&) = default;
96  ~dna4() = default;
97 
98  using base_t::base_t;
99 
101  template <std::Same<rna4> t> // Accept incomplete type
102  constexpr dna4(t const & r) noexcept
103  {
104  assign_rank(r.to_rank());
105  }
107 
108 protected:
110 
112  static constexpr char_type rank_to_char[value_size]
113  {
114  'A',
115  'C',
116  'G',
117  'T'
118  };
119 
121  static constexpr std::array<rank_type, 256> char_to_rank
122  {
123  [] () constexpr
124  {
125  std::array<rank_type, 256> ret{};
126 
127  // reverse mapping for characters and their lowercase
128  for (size_t rnk = 0u; rnk < value_size; ++rnk)
129  {
130  ret[ rank_to_char[rnk] ] = rnk;
131  ret[to_lower(rank_to_char[rnk])] = rnk;
132  }
133 
134  // set U equal to T
135  ret['U'] = ret['T']; ret['u'] = ret['t'];
136 
137  // iupac characters get special treatment, because there is no N
138  ret['R'] = ret['A']; ret['r'] = ret['A']; // or G
139  ret['Y'] = ret['C']; ret['y'] = ret['C']; // or T
140  ret['S'] = ret['C']; ret['s'] = ret['C']; // or G
141  ret['W'] = ret['A']; ret['w'] = ret['A']; // or T
142  ret['K'] = ret['G']; ret['k'] = ret['G']; // or T
143  ret['M'] = ret['A']; ret['m'] = ret['A']; // or T
144  ret['B'] = ret['C']; ret['b'] = ret['C']; // or G or T
145  ret['D'] = ret['A']; ret['d'] = ret['A']; // or G or T
146  ret['H'] = ret['A']; ret['h'] = ret['A']; // or C or T
147  ret['V'] = ret['A']; ret['v'] = ret['A']; // or C or G
148 
149  return ret;
150  }()
151  };
152 
154  static const std::array<dna4, value_size> complement_table;
155 };
156 
157 // ------------------------------------------------------------------
158 // containers
159 // ------------------------------------------------------------------
160 
163 using dna4_vector = std::vector<dna4>;
164 
165 // ------------------------------------------------------------------
166 // literals
167 // ------------------------------------------------------------------
168 
177 constexpr dna4 operator""_dna4(char const c) noexcept
178 {
179  return dna4{}.assign_char(c);
180 }
181 
191 inline dna4_vector operator""_dna4(char const * s, std::size_t n)
192 {
193  dna4_vector r;
194  r.resize(n);
195 
196  for (size_t i = 0; i < n; ++i)
197  r[i].assign_char(s[i]);
198 
199  return r;
200 }
202 
203 // ------------------------------------------------------------------
204 // dna4 (deferred definition)
205 // ------------------------------------------------------------------
206 
207 constexpr std::array<dna4, dna4::value_size> dna4::complement_table
208 {
209  'T'_dna4, // complement of 'A'_dna4
210  'G'_dna4, // complement of 'C'_dna4
211  'C'_dna4, // complement of 'G'_dna4
212  'A'_dna4 // complement of 'T'_dna4
213 };
214 
215 } // namespace seqan3
The four letter DNA alphabet of A,C,G,T.
Definition: dna4.hpp:73
std::vector< dna4 > dna4_vector
Alias for an std::vector of seqan3::dna4.
Definition: dna4.hpp:163
alphabet_concept && assign_char(alphabet_concept &&alph, char_type const chr)
Returns the alphabet letter&#39;s value in character representation.
constexpr dna4(t const &r) noexcept
Allow implicit construction from dna/rna of the same size.
Definition: dna4.hpp:102
The main SeqAn3 namespace.
Definition: aligned_sequence_concept.hpp:58
constexpr dna4 & assign_rank(rank_type const c) noexcept
Assign from a numeric value.
Definition: alphabet_base.hpp:189
static detail::min_viable_uint_t< size > constexpr value_size
The size of the alphabet, i.e. the number of different values it can take.
Definition: alphabet_base.hpp:198
Provides seqan3::nucleotide_base.
Provides utilities for modifying characters.
char_t char_type
The type of the alphabet when converted to char (e.g. via to_char()).
Definition: alphabet_base.hpp:87
The four letter RNA alphabet of A,C,G,U.
Definition: rna4.hpp:71
A CRTP-base that refines seqan3::alphabet_base and is used by the nucleotides.
Definition: nucleotide_base.hpp:65
constexpr char_type to_lower(char_type const c) noexcept
Converts &#39;A&#39;-&#39;Z&#39; to &#39;a&#39;-&#39;z&#39; respectively; other characters are returned as is.
Definition: char_operations.hpp:107