SeqAn3
wuss.hpp
Go to the documentation of this file.
1 // ============================================================================
2 // SeqAn - The Library for Sequence Analysis
3 // ============================================================================
4 //
5 // Copyright (c) 2006-2018, Knut Reinert & Freie Universitaet Berlin
6 // Copyright (c) 2016-2018, Knut Reinert & MPI Molekulare Genetik
7 // All rights reserved.
8 //
9 // Redistribution and use in source and binary forms, with or without
10 // modification, are permitted provided that the following conditions are met:
11 //
12 // * Redistributions of source code must retain the above copyright
13 // notice, this list of conditions and the following disclaimer.
14 // * Redistributions in binary form must reproduce the above copyright
15 // notice, this list of conditions and the following disclaimer in the
16 // documentation and/or other materials provided with the distribution.
17 // * Neither the name of Knut Reinert or the FU Berlin nor the names of
18 // its contributors may be used to endorse or promote products derived
19 // from this software without specific prior written permission.
20 //
21 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 // ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE
25 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
27 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 // OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
31 // DAMAGE.
32 //
33 // ============================================================================
34 
40 #pragma once
41 
42 #include <cmath>
43 #include <vector>
44 
48 
49 // ------------------------------------------------------------------
50 // wuss
51 // ------------------------------------------------------------------
52 
53 namespace seqan3
54 {
55 
81 template <uint8_t SIZE = 51>
82 class wuss : public alphabet_base<wuss<SIZE>, SIZE>
83 {
84 private:
86  using base_t = alphabet_base<wuss<SIZE>, SIZE>;
87 
89  friend base_t;
90 
92  using this_type_deferred = typename base_t::derived_t;
93 
94 public:
95  using base_t::value_size;
96  using base_t::to_rank;
97  using base_t::to_char;
98  using typename base_t::rank_type;
99  using typename base_t::char_type;
100 
104  constexpr wuss() : base_t{} {}
105  constexpr wuss(wuss const &) = default;
106  constexpr wuss(wuss &&) = default;
107  constexpr wuss & operator=(wuss const &) = default;
108  constexpr wuss & operator=(wuss &&) = default;
109  ~wuss() = default;
111 
117 
120  static this_type_deferred constexpr UNPAIRED = this_type_deferred{}.assign_char('.');
122  static this_type_deferred constexpr UNPAIRED1 = this_type_deferred{}.assign_char(':');
124  static this_type_deferred constexpr UNPAIRED2 = this_type_deferred{}.assign_char(',');
126  static this_type_deferred constexpr UNPAIRED3 = this_type_deferred{}.assign_char('-');
128  static this_type_deferred constexpr UNPAIRED4 = this_type_deferred{}.assign_char('_');
130  static this_type_deferred constexpr UNPAIRED5 = this_type_deferred{}.assign_char('~');
132  static this_type_deferred constexpr UNPAIRED6 = this_type_deferred{}.assign_char(';');
133 
135  static this_type_deferred constexpr PAIR_OPEN = this_type_deferred{}.assign_char('<');
137  static this_type_deferred constexpr PAIR_OPEN1 = this_type_deferred{}.assign_char('(');
139  static this_type_deferred constexpr PAIR_OPEN2 = this_type_deferred{}.assign_char('[');
141  static this_type_deferred constexpr PAIR_OPEN3 = this_type_deferred{}.assign_char('{');
142 
144  static this_type_deferred constexpr PAIR_CLOSE = this_type_deferred{}.assign_char('>');
146  static this_type_deferred constexpr PAIR_CLOSE1 = this_type_deferred{}.assign_char(')');
148  static this_type_deferred constexpr PAIR_CLOSE2 = this_type_deferred{}.assign_char(']');
150  static this_type_deferred constexpr PAIR_CLOSE3 = this_type_deferred{}.assign_char('}');
151  // pseudoknots not accessible
153 
156 
160  constexpr bool is_pair_open() const noexcept
161  {
162  return interaction_tab[to_rank()] < 0;
163  }
164 
168  constexpr bool is_pair_close() const noexcept
169  {
170  return interaction_tab[to_rank()] > 0;
171  }
172 
176  constexpr bool is_unpaired() const noexcept
177  {
178  return interaction_tab[to_rank()] == 0;
179  }
180 
184  // formula: (alphabet size - 7 unpaired characters) / 2, as every bracket exists as opening/closing pair
185  static constexpr uint8_t max_pseudoknot_depth{(value_size - 7) / 2};
186 
192  constexpr std::optional<uint8_t> pseudoknot_id() const noexcept
193  {
194  if (interaction_tab[to_rank()] != 0)
195  return std::abs(interaction_tab[to_rank()]) - 1;
196  else
197  return std::nullopt;
198  }
200 
201 protected:
204  static constexpr std::array<char_type, value_size> rank_to_char
205  {
206  [] () constexpr
207  {
208  std::array<char_type, value_size> chars
209  {
210  '.', ':', ',', '-', '_', '~', ';', '<', '(', '[', '{', '>', ')', ']', '}'
211  };
212 
213  // pseudoknot letters
214  for (rank_type rnk = 15u; rnk + 1u < value_size; rnk += 2u)
215  {
216  char_type const off = static_cast<char_type>((rnk - 15u) / 2u);
217  chars[rnk] = 'A' + off;
218  chars[rnk + 1u] = 'a' + off;
219  }
220 
221  return chars;
222  } ()
223  };
224 
226  static constexpr std::array<rank_type, 256> char_to_rank
227  {
228  [] () constexpr
229  {
230  std::array<rank_type, 256> rank_table{};
231 
232  // initialize with unpaired (std::array::fill unfortunately not constexpr)
233  for (rank_type & rnk : rank_table)
234  rnk = 6; // ::UNPAIRED6;
235 
236  // set alphabet values
237  for (rank_type rnk = 0u; rnk < value_size; ++rnk)
238  rank_table[rank_to_char[rnk]] = rnk;
239  return rank_table;
240  } ()
241  };
242 
244  static std::array<int8_t, SIZE> const interaction_tab;
245 };
246 
247 template <uint8_t SIZE>
248 constexpr std::array<int8_t, SIZE> wuss<SIZE>::interaction_tab = [] () constexpr
249 {
250  std::array<int8_t, value_size> interaction_table{};
251  int cnt_open = 0;
252  int cnt_close = 0;
253 
254  for (rank_type rnk = UNPAIRED.to_rank();
255  rnk <= UNPAIRED6.to_rank();
256  ++rnk)
257  {
258  interaction_table[rnk] = 0;
259  }
260 
261  for (rank_type rnk = PAIR_OPEN.to_rank();
262  rnk <= PAIR_OPEN3.to_rank();
263  ++rnk)
264  {
265  interaction_table[rnk] = --cnt_open;
266  }
267 
268  for (rank_type rnk = PAIR_CLOSE.to_rank();
269  rnk <= PAIR_CLOSE3.to_rank();
270  ++rnk)
271  {
272  interaction_table[rnk] = ++cnt_close;
273  }
274 
275  for (rank_type rnk = 15u; rnk + 1 < value_size; rnk += 2u)
276  {
277  interaction_table[rnk] = --cnt_open;
278  interaction_table[rnk + 1] = ++cnt_close;
279  }
280 
281  return interaction_table;
282 } ();
283 
285 typedef wuss<51> wuss51;
286 
287 } // namespace seqan3
288 
289 // ------------------------------------------------------------------
290 // literals
291 // ------------------------------------------------------------------
292 
293 namespace seqan3
294 {
295 
308 inline std::vector<wuss51> operator""_wuss51(const char * str, std::size_t len)
309 {
310  std::vector<wuss51> vec;
311  vec.resize(len);
312 
313  for (size_t idx = 0; idx < len; ++idx)
314  vec[idx].assign_char(str[idx]);
315 
316  return vec;
317 }
318 
319 } // namespace seqan3
constexpr bool is_unpaired() const noexcept
Check whether the character represents an unpaired position in an RNA structure.
Definition: wuss.hpp:176
detail::min_viable_uint_t< size - 1 > rank_type
The type of the alphabet when represented as a number (e.g. via to_rank()).
Definition: alphabet_base.hpp:89
char char_type
The type of the alphabet when converted to char (e.g. via to_char()).
Definition: alphabet_base.hpp:87
constexpr bool is_pair_close() const noexcept
Check whether the character represents a leftward interaction in an RNA structure.
Definition: wuss.hpp:168
constexpr std::optional< uint8_t > pseudoknot_id() const noexcept
Get an identifier for a pseudoknotted interaction. Opening and closing brackets of the same type have...
Definition: wuss.hpp:192
wuss< 51 > wuss51
Alias for the default type wuss51.
Definition: wuss.hpp:285
static this_type_deferred constexpr UNPAIRED4
_ not paired (hairpin loop)
Definition: wuss.hpp:128
static this_type_deferred constexpr PAIR_CLOSE2
] bracket right (internal helix enclosing ())
Definition: wuss.hpp:148
The main SeqAn3 namespace.
Definition: aligned_sequence_concept.hpp:58
static this_type_deferred constexpr PAIR_CLOSE
> bracket right (simple terminal stem)
Definition: wuss.hpp:144
static this_type_deferred constexpr PAIR_OPEN1
( bracket left (internal helix enclosing <>)
Definition: wuss.hpp:137
constexpr bool is_pair_open() const noexcept
Check whether the character represents a rightward interaction in an RNA structure.
Definition: wuss.hpp:160
static this_type_deferred constexpr PAIR_OPEN2
[ bracket left (internal helix enclosing ())
Definition: wuss.hpp:139
static this_type_deferred constexpr UNPAIRED1
: not paired (external residue outside structure)
Definition: wuss.hpp:122
static detail::min_viable_uint_t< size > constexpr value_size
The size of the alphabet, i.e. the number of different values it can take.
Definition: alphabet_base.hpp:198
static this_type_deferred constexpr PAIR_CLOSE1
) bracket right (internal helix enclosing <>)
Definition: wuss.hpp:146
The WUSS structure alphabet of the characters .<>:,-_~;()[]{}AaBbCcDd...
Definition: wuss.hpp:82
detail::min_viable_uint_t< size - 1 > rank_type
The type of the alphabet when represented as a number (e.g. via to_rank()).
Definition: alphabet_base.hpp:89
Provides seqan3::alphabet_base.
static this_type_deferred constexpr UNPAIRED2
, not paired (multifurcation loop)
Definition: wuss.hpp:124
Metafunction that indicates to what extent an alphabet can handle pseudoknots. [value metafunction ba...
Definition: concept_pre.hpp:228
constexpr char_type to_char() const noexcept
Return the letter as a character of char_type.
Definition: alphabet_base.hpp:120
constexpr wuss< SIZE > & assign_char(std::conditional_t< std::Same< char_type, void >, char, char_type > const c) noexcept
Assign from a character.
Definition: alphabet_base.hpp:165
static this_type_deferred constexpr UNPAIRED3
- not paired (bulge, interior loop)
Definition: wuss.hpp:126
Provides utilities for modifying characters.
constexpr rank_type to_rank() const noexcept
Return the letter&#39;s numeric value (rank in the alphabet).
Definition: alphabet_base.hpp:142
static this_type_deferred constexpr PAIR_OPEN
< bracket left (simple terminal stem)
Definition: wuss.hpp:135
Core alphabet concept and free function/metafunction wrappers.
A CRTP-base that makes defining a custom alphabet easier.
Definition: alphabet_base.hpp:77
static this_type_deferred constexpr PAIR_CLOSE3
} bracket right (internal helix enclosing [])
Definition: wuss.hpp:150
static this_type_deferred constexpr UNPAIRED
. not paired (insertion to known structure)
Definition: wuss.hpp:120
static this_type_deferred constexpr UNPAIRED5
~ not paired (due to local alignment)
Definition: wuss.hpp:130
static this_type_deferred constexpr UNPAIRED6
; not paired
Definition: wuss.hpp:132
static this_type_deferred constexpr PAIR_OPEN3
{ bracket left (internal helix enclosing [])
Definition: wuss.hpp:141
char_t char_type
The type of the alphabet when converted to char (e.g. via to_char()).
Definition: alphabet_base.hpp:87