SeqAn3
aa20.hpp
Go to the documentation of this file.
1 // ============================================================================
2 // SeqAn - The Library for Sequence Analysis
3 // ============================================================================
4 //
5 // Copyright (c) 2006-2018, Knut Reinert & Freie Universitaet Berlin
6 // Copyright (c) 2016-2018, Knut Reinert & MPI Molekulare Genetik
7 // All rights reserved.
8 //
9 // Redistribution and use in source and binary forms, with or without
10 // modification, are permitted provided that the following conditions are met:
11 //
12 // * Redistributions of source code must retain the above copyright
13 // notice, this list of conditions and the following disclaimer.
14 // * Redistributions in binary form must reproduce the above copyright
15 // notice, this list of conditions and the following disclaimer in the
16 // documentation and/or other materials provided with the distribution.
17 // * Neither the name of Knut Reinert or the FU Berlin nor the names of
18 // its contributors may be used to endorse or promote products derived
19 // from this software without specific prior written permission.
20 //
21 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 // ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE
25 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
27 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 // OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
31 // DAMAGE.
32 //
33 // ============================================================================
34 
40 #pragma once
41 
42 #include <vector>
43 
46 
47 namespace seqan3
48 {
84 class aa20 : public aminoacid_base<aa20, 20>
85 {
86 private:
89 
91  friend base_t;
93  friend base_t::base_t;
95 
96 public:
100  constexpr aa20() : base_t{} {}
101  constexpr aa20(aa20 const &) = default;
102  constexpr aa20(aa20 &&) = default;
103  constexpr aa20 & operator=(aa20 const &) = default;
104  constexpr aa20 & operator=(aa20 &&) = default;
105  ~aa20() = default;
106 
107  using base_t::base_t;
109 
110 protected:
112  static constexpr char_type rank_to_char[value_size]
113  {
114  'A',
115  'C',
116  'D',
117  'E',
118  'F',
119  'G',
120  'H',
121  'I',
122  'K',
123  'L',
124  'M',
125  'N',
126  'P',
127  'Q',
128  'R',
129  'S',
130  'T',
131  'V',
132  'W',
133  'Y',
134  };
135 
137  static constexpr std::array<rank_type, 256> char_to_rank
138  {
139  [] () constexpr
140  {
141  std::array<rank_type, 256> ret{};
142 
143  // initialize with UNKNOWN (std::array::fill unfortunately not constexpr)
144  for (auto & c : ret)
145  c = 15; // value of 'S', because that appears most frequently
146 
147  // reverse mapping for characters and their lowercase
148  for (rank_type rnk = 0u; rnk < value_size; ++rnk)
149  {
150  ret[static_cast<rank_type>( rank_to_char[rnk]) ] = rnk;
151  ret[static_cast<rank_type>(to_lower(rank_to_char[rnk]))] = rnk;
152  }
153 
154  ret['B'] = ret['D']; ret['b'] = ret['D']; // Convert b (either D/N) to D, since D occurs more frequently.
155  ret['J'] = ret['L']; ret['j'] = ret['L']; // Convert j (either I/L) to L, since L occurs more frequently.
156  ret['O'] = ret['L']; ret['o'] = ret['L']; // Convert Pyrrolysine to lysine.
157  ret['U'] = ret['C']; ret['u'] = ret['C']; // Convert Selenocysteine to cysteine.
158  ret['X'] = ret['S']; ret['x'] = ret['S']; // Convert unknown amino acids to serine.
159  ret['Z'] = ret['E']; ret['z'] = ret['E']; // Convert z (either E/Q) to E, since E occurs more frequently.
160  ret['*'] = ret['W']; // The most common stop codon is UGA. This is most similar to a Tryptophan.
161  return ret;
162  }()
163  };
164 };
165 
166 } // namespace seqan3
167 
168 // ------------------------------------------------------------------
169 // containers
170 // ------------------------------------------------------------------
171 
172 namespace seqan3
173 {
176 using aa20_vector = std::vector<aa20>;
177 
178 } // namespace seqan3
179 
180 // ------------------------------------------------------------------
181 // literals
182 // ------------------------------------------------------------------
183 
184 namespace seqan3
185 {
186 
199 inline aa20_vector operator""_aa20(const char * s, std::size_t n)
200 {
201  aa20_vector r;
202  r.resize(n);
203 
204  for (size_t i = 0; i < n; ++i)
205  r[i].assign_char(s[i]);
206 
207  return r;
208 }
209 
210 } // namespace seqan3
static constexpr std::array< rank_type, 256 > char_to_rank
Char to value conversion table.
Definition: aa20.hpp:138
std::vector< aa20 > aa20_vector
Alias for an std::vector of seqan3::aa20.
Definition: aa20.hpp:176
alphabet_concept && assign_char(alphabet_concept &&alph, char_type const chr)
Returns the alphabet letter&#39;s value in character representation.
detail::min_viable_uint_t< size - 1 > rank_type
The type of the alphabet when represented as a number (e.g. via to_rank()).
Definition: alphabet_base.hpp:89
static constexpr char_type rank_to_char[value_size]
Value to char conversion table.
Definition: aa20.hpp:113
The main SeqAn3 namespace.
Definition: aligned_sequence_concept.hpp:58
The canonical amino acid alphabet.
Definition: aa20.hpp:84
Free function/metafunction wrappers for alphabets with member functions/types.
static detail::min_viable_uint_t< size > constexpr value_size
The size of the alphabet, i.e. the number of different values it can take.
Definition: alphabet_base.hpp:198
char_t char_type
The type of the alphabet when converted to char (e.g. via to_char()).
Definition: alphabet_base.hpp:87
Provides utilities for modifying characters.
A CRTP-base that refines seqan3::alphabet_base and is used by the amino acids.
Definition: aminoacid_base.hpp:57
constexpr char_type to_lower(char_type const c) noexcept
Converts &#39;A&#39;-&#39;Z&#39; to &#39;a&#39;-&#39;z&#39; respectively; other characters are returned as is.
Definition: char_operations.hpp:107