SeqAn3
input.hpp
Go to the documentation of this file.
1 // ============================================================================
2 // SeqAn - The Library for Sequence Analysis
3 // ============================================================================
4 //
5 // Copyright (c) 2006-2018, Knut Reinert & Freie Universitaet Berlin
6 // Copyright (c) 2016-2018, Knut Reinert & MPI Molekulare Genetik
7 // All rights reserved.
8 //
9 // Redistribution and use in source and binary forms, with or without
10 // modification, are permitted provided that the following conditions are met:
11 //
12 // * Redistributions of source code must retain the above copyright
13 // notice, this list of conditions and the following disclaimer.
14 // * Redistributions in binary form must reproduce the above copyright
15 // notice, this list of conditions and the following disclaimer in the
16 // documentation and/or other materials provided with the distribution.
17 // * Neither the name of Knut Reinert or the FU Berlin nor the names of
18 // its contributors may be used to endorse or promote products derived
19 // from this software without specific prior written permission.
20 //
21 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 // ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE
25 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
27 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 // OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
31 // DAMAGE.
32 //
33 // ============================================================================
34 
40 #pragma once
41 
42 #include <cassert>
43 #include <fstream>
44 #include <limits>
45 #include <optional>
46 #include <string>
47 #include <type_traits>
48 #include <utility>
49 #include <variant>
50 #include <vector>
51 
52 #include <range/v3/algorithm/equal.hpp>
53 
60 #include <seqan3/io/exception.hpp>
61 #include <seqan3/io/filesystem.hpp>
62 #include <seqan3/io/record.hpp>
64 #include <seqan3/io/detail/record.hpp>
69 
70 namespace seqan3
71 {
72 // ----------------------------------------------------------------------------
73 // structure_file_input_traits_concept
74 // ----------------------------------------------------------------------------
75 
222 template<typename t>
225 concept structure_file_input_traits_concept = requires(t v)
226 {
227  // TODO(joergi-w) The expensive concept checks are currently omitted. Check again when compiler has improved.
228  // sequence
229  requires alphabet_concept<typename t::seq_alphabet>;
230  requires alphabet_concept<typename t::seq_legal_alphabet>;
231  requires explicitly_convertible_to_concept<typename t::seq_legal_alphabet, typename t::seq_alphabet>;
232  requires sequence_container_concept<typename t::template seq_container<typename t::seq_alphabet>>;
233 // requires sequence_container_concept
234 // <typename t::template seq_container_container
235 // <typename t::template seq_container
236 // <typename t::seq_alphabet>>>;
237 
238  // id
239  requires alphabet_concept<typename t::id_alphabet>;
240  requires sequence_container_concept<typename t::template id_container<typename t::id_alphabet>>;
241 // requires sequence_container_concept
242 // <typename t::template id_container_container
243 // <typename t::template id_container
244 // <typename t::id_alphabet>>>;
245 
246  // bpp
247  requires std::is_floating_point_v<typename t::bpp_prob>;
248  requires std::numeric_limits<typename t::bpp_partner>::is_integer;
249 
250 // requires container_concept // TODO check Associative Container Concept when implemented
251 // <typename t::template bpp_queue
252 // <typename t::template bpp_item
253 // <typename t::bpp_prob, typename t::bpp_partner>>>
254 // && requires(typename t::template bpp_queue // TODO maybe implement also a version that allows emplace_back
255 // <typename t::template bpp_item
256 // <typename t::bpp_prob, typename t::bpp_partner>> value) { value.emplace(1.0, 1); };
257 // requires sequence_container_concept
258 // <typename t::template bpp_container
259 // <typename t::template bpp_queue
260 // <typename t::template bpp_item
261 // <typename t::bpp_prob, typename t::bpp_partner>>>>;
262 // requires sequence_container_concept
263 // <typename t::template bpp_container_container
264 // <typename t::template bpp_container
265 // <typename t::template bpp_queue
266 // <typename t::template bpp_item
267 // <typename t::bpp_prob, typename t::bpp_partner>>>>>;
268 
269  // structure
270  requires std::is_same_v<typename t::structure_alphabet, dssp9> // TODO(joergi-w) add aa_structure_concept
271  || rna_structure_concept<typename t::structure_alphabet>;
272  requires sequence_container_concept<typename t::template structure_container<typename t::structure_alphabet>>;
273 // requires sequence_container_concept
274 // <typename t::template structure_container_container
275 // <typename t::template structure_container
276 // <typename t::structure_alphabet>>>;
277 
278  // structured sequence: cartesian compositions of seq and structure
279  requires std::is_base_of_v<cartesian_composition
280  <typename t::template structured_seq_alphabet
281  <typename t::seq_alphabet, typename t::structure_alphabet>,
282  typename t::seq_alphabet, typename t::structure_alphabet>,
283  typename t::template structured_seq_alphabet<typename t::seq_alphabet, typename t::structure_alphabet>>;
284 // requires sequence_container_concept
285 // <typename t::template structured_seq_container
286 // <typename t::template structured_seq_alphabet
287 // <typename t::seq_alphabet, typename t::structure_alphabet>>>;
288 // requires sequence_container_concept
289 // <typename t::template structured_seq_container_container
290 // <typename t::template structured_seq_container
291 // <typename t::template structured_seq_alphabet
292 // <typename t::seq_alphabet, typename t::structure_alphabet>>>>;
293 
294  // energy: std::optional of floating point number
295  requires std::is_floating_point_v<typename t::energy_type::value_type>;
296  requires sequence_container_concept<typename t::template energy_container<typename t::energy_type>>;
297 
298  // reactivity [error]
299  requires std::is_floating_point_v<typename t::react_type>;
300  requires sequence_container_concept<typename t::template react_container<typename t::react_type>>;
301 // requires sequence_container_concept
302 // <typename t::template react_container_container
303 // <typename t::template react_container
304 // <typename t::react_type>>>;
305 
306  // comment
307  requires alphabet_concept<typename t::comment_alphabet>;
308  requires sequence_container_concept<typename t::template comment_container<typename t::comment_alphabet>>;
309 // requires sequence_container_concept
310 // <typename t::template comment_container_container
311 // <typename t::template comment_container
312 // <typename t::comment_alphabet>>>;
313 
314  // offset
315  requires std::numeric_limits<typename t::offset_type>::is_integer;
316  requires sequence_container_concept<typename t::template offset_container<typename t::offset_type>>;
317 };
319 
320 // ----------------------------------------------------------------------------
321 // structure_file_input_default_traits
322 // ----------------------------------------------------------------------------
323 
338 {
344  // sequence
345  using seq_alphabet = rna5;
346  using seq_legal_alphabet = rna15;
347  template<typename _seq_alphabet>
348  using seq_container = std::vector<_seq_alphabet>;
349  template<typename _seq_container>
351 
352  // id
353  using id_alphabet = char;
354  template<typename _id_alphabet>
355  using id_container = std::basic_string<_id_alphabet>;
356  template<typename _id_container>
358 
359  // base pair probability structure
360  using bpp_prob = double;
361  using bpp_partner = size_t;
362  template<typename _bpp_prec, typename _bpp_partner>
363  using bpp_item = std::pair<_bpp_prec, _bpp_partner>;
364  template<typename _bpp_item>
365  using bpp_queue = std::set<_bpp_item>;
366  template<typename _bpp_queue>
367  using bpp_container = std::vector<_bpp_queue>;
368  template<typename _bpp_container>
369  using bpp_container_container = std::vector<_bpp_container>;
370 
371  // fixed structure
372  using structure_alphabet = wuss51;
373  template<typename _structure_alphabet>
374  using structure_container = std::vector<_structure_alphabet>;
375  template<typename _structure_container>
377 
378  // combined sequence and structure
379  template<typename _seq_alphabet, typename _structure_alphabet>
381  template<typename _structured_seq_alphabet>
382  using structured_seq_container = std::vector<_structured_seq_alphabet>;
383  template<typename _structured_seq_container>
385 
386  // energy
387  using energy_type = std::optional<double>;
388  template<typename _energy_type>
389  using energy_container = std::vector<_energy_type>;
390 
391  // reactivity [error]
392  using react_type = double;
393  template<typename _react_type>
394  using react_container = std::vector<_react_type>;
395  template<typename _react_container>
396  using react_container_container = std::vector<_react_container>;
397 
398  // comment
399  using comment_alphabet = char;
400  template<typename _comment_alphabet>
401  using comment_container = std::basic_string<_comment_alphabet>;
402  template<typename _comment_container>
404 
405  // offset
406  using offset_type = size_t;
407  template<typename _offset_type>
408  using offset_container = std::vector<_offset_type>;
410 };
411 
415 {
420  using seq_alphabet = aa27;
421  using seq_legal_alphabet = aa27;
422  using structure_alphabet = dssp9;
423  template<typename _seq_alphabet, typename _structure_alphabet>
426 };
427 
428 // ----------------------------------------------------------------------------
429 // structure_file_in
430 // ----------------------------------------------------------------------------
431 
664 template<structure_file_input_traits_concept traits_type_ = structure_file_input_default_traits_rna,
665  detail::fields_concept selected_field_ids_ = fields<field::SEQ, field::ID, field::STRUCTURE>,
666  detail::type_list_of_structure_file_input_formats_concept valid_formats_
668  istream_concept<char> stream_type_ = std::ifstream>
670 {
671 public:
676  using traits_type = traits_type_;
679  using selected_field_ids = selected_field_ids_;
681  using valid_formats = valid_formats_;
683  using stream_type = stream_type_;
685 
689  using field_ids = fields<field::SEQ,
690  field::ID,
691  field::BPP,
695  field::REACT,
699 
700  static_assert([]() constexpr
701  {
702  for (field f : selected_field_ids::as_array)
703  if (!field_ids::contains(f))
704  return false;
705  return true;
706  }(),
707  "You selected a field that is not valid for structure files, please refer to the documentation "
708  "of structure_file_in::field_ids for the accepted values.");
709 
710  static_assert([]() constexpr
711  {
712  return !(selected_field_ids::contains(field::STRUCTURED_SEQ) &&
713  (selected_field_ids::contains(field::SEQ) ||
714  (selected_field_ids::contains(field::STRUCTURE))));
715  }(), "You may not select field::STRUCTURED_SEQ and either of field::SEQ and field::STRUCTURE "
716  "at the same time.");
717 
723  using seq_type = typename traits_type::template seq_container<typename traits_type::seq_alphabet>;
726  using id_type = typename traits_type::template id_container<typename traits_type::id_alphabet>;
728  using bpp_type = typename traits_type::template bpp_container
729  <typename traits_type::template bpp_queue
730  <typename traits_type::template bpp_item
731  <typename traits_type::bpp_prob, typename traits_type::bpp_partner>>>;
733  using structure_type = typename traits_type::template structure_container
734  <typename traits_type::structure_alphabet>;
736  using structured_seq_type = typename traits_type::template structured_seq_container
737  <typename traits_type::template structured_seq_alphabet
738  <typename traits_type::seq_alphabet, typename traits_type::structure_alphabet>>;
740  using energy_type = typename traits_type::energy_type;
742  using react_type = typename traits_type::template react_container<typename traits_type::react_type>;
744  using comment_type = typename traits_type::template comment_container
745  <typename traits_type::comment_alphabet>;
747  using offset_type = typename traits_type::offset_type;
748 
752 
757 
763  using seq_column_type = typename traits_type::template seq_container_container<seq_type>;
766  using id_column_type = typename traits_type::template id_container_container<id_type>;
768  using bpp_column_type = typename traits_type::template bpp_container_container<bpp_type>;
770  using structure_column_type = typename traits_type::template structure_container_container<structure_type>;
772  using structured_seq_column_type = typename traits_type::template structured_seq_container_container
773  <structured_seq_type>;
775  using energy_column_type = typename traits_type::template energy_container<energy_type>;
777  using react_column_type = typename traits_type::template react_container_container<react_type>;
779  using comment_column_type = typename traits_type::template comment_container_container<comment_type>;
781  using offset_column_type = typename traits_type::template offset_container<offset_type>;
782 
791  react_column_type,
795  using file_as_tuple_type = record<detail::select_types_with_ids_t<field_column_types, field_ids,
798 
803  using value_type = record_type;
808  using const_reference = void;
810  using size_type = size_t;
812  using difference_type = std::make_signed_t<size_t>;
814  using iterator = detail::in_file_iterator<structure_file_in>;
816  using const_iterator = void;
820 
824  structure_file_in() = delete;
827  structure_file_in(structure_file_in const &) = delete;
829  structure_file_in & operator=(structure_file_in const &) = delete;
831  structure_file_in(structure_file_in &&) = default;
833  structure_file_in & operator=(structure_file_in &&) = default;
835  ~structure_file_in() = default;
836 
846  structure_file_in(filesystem::path const & _file_name,
847  selected_field_ids const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{})
848  {
849  // open stream
850  stream.open(_file_name, std::ios_base::in | std::ios::binary);
851  if (!stream.is_open())
852  throw file_open_error{"Could not open file for reading."};
853 
854  // initialise format handler
855  bool format_found = false;
856  std::string extension = _file_name.extension().string();
857  if (extension.size() > 1)
858  {
859  extension = extension.substr(1); // drop leading "."
860  meta::for_each(valid_formats{}, [&] (auto && fmt)
861  {
862  using fmt_type = remove_cvref_t<decltype(fmt)>;
863 
864  for (auto const & ext : fmt_type::file_extensions)
865  {
866  if (std::ranges::equal(ext, extension))
867  {
868  format = fmt_type{};
869  format_found = true;
870  return;
871  }
872  }
873  });
874  }
875  if (!format_found)
876  throw unhandled_extension_error("No valid format found for this extension.");
877 
878  // buffer first record
879  read_next_record();
880  }
881 
889  template<structure_file_input_format_concept file_format>
891  file_format const & SEQAN3_DOXYGEN_ONLY(format_tag),
892  selected_field_ids const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{}) :
893  stream{std::move(_stream)}, format{file_format{}}
894  {
895  static_assert(meta::in<valid_formats, file_format>::value,
896  "You selected a format that is not in the valid_formats of this file.");
897 
898  // buffer first record
899  read_next_record();
900  }
902 
920  iterator begin() noexcept
921  {
922  return {*this};
923  }
924 
938  sentinel end() noexcept
939  {
940  return {};
941  }
942 
979  reference front() noexcept
980  {
981  return record_buffer;
982  }
984 
989  template<field f>
991  friend auto & get(structure_file_in & file)
992  {
993  static_assert(structure_file_in::selected_field_ids::contains(f),
994  "You requested a field via get that was not selected for the file.");
995 
996  file.read_columns();
997 
998  return seqan3::get<f>(file.columns_buffer);
999  }
1000 
1002  template<field f>
1003  friend auto && get(structure_file_in && file)
1004  {
1005  return std::move(get<f>(file));
1006  }
1007 
1009  template<size_t i>
1010  friend auto & get(structure_file_in & file)
1011  {
1012  static_assert(i < structure_file_in::selected_field_ids::as_array.size(),
1013  "You requested a field number larger than the number of selected fields for the file.");
1014  file.read_columns();
1015 
1016  return std::get<i>(file.columns_buffer);
1017  }
1018 
1020  template<size_t i>
1021  friend auto && get(structure_file_in && file)
1022  {
1023  return std::move(get<i>(file));
1024  }
1025 
1027  template<typename t>
1028  friend auto & get(structure_file_in & file)
1029  {
1030  file.read_columns();
1031 
1032  return std::get<t>(file.columns_buffer);
1033  }
1034 
1036  template<typename t>
1037  friend auto && get(structure_file_in && file)
1038  {
1039  return std::move(get<t>(file));
1040  }
1042 
1044  structure_file_input_options<typename traits_type::seq_legal_alphabet,
1045  selected_field_ids::contains(field::STRUCTURED_SEQ)> options;
1046 
1047 protected:
1049 
1052  record_type record_buffer;
1055  file_as_tuple_type columns_buffer;
1057 
1059  std::string file_name;
1060 
1062  stream_type stream;
1063 
1065  bool at_end{false};
1066 
1068  using format_type = detail::transfer_template_args_onto_t<valid_formats, std::variant>;
1070  format_type format;
1071 
1073  void read_next_record()
1074  {
1075  if (at_end)
1076  return;
1077 
1078  // clear the record
1079  record_buffer.clear();
1080 
1081  // at end if we could not read further
1082  if (stream.eof())
1083  {
1084  at_end = true;
1085  return;
1086  }
1087 
1088  assert(!format.valueless_by_exception());
1089  std::visit([&] (structure_file_input_format_concept & f)
1090  {
1091  // read new record
1092  if constexpr (selected_field_ids::contains(field::STRUCTURED_SEQ))
1093  {
1094  static_assert(!selected_field_ids::contains(field::STRUCTURE),
1095  "You may not select field::STRUCTURED_SEQ and field::STRUCTURE at the same time.");
1096  static_assert(!selected_field_ids::contains(field::SEQ),
1097  "You may not select field::STRUCTURED_SEQ and field::SEQ at the same time.");
1098  f.read(stream,
1099  options,
1100  detail::get_or_ignore<field::STRUCTURED_SEQ>(record_buffer), // seq
1101  detail::get_or_ignore<field::ID>(record_buffer),
1102  detail::get_or_ignore<field::BPP>(record_buffer),
1103  detail::get_or_ignore<field::STRUCTURED_SEQ>(record_buffer), // structure
1104  detail::get_or_ignore<field::ENERGY>(record_buffer),
1105  detail::get_or_ignore<field::REACT>(record_buffer),
1106  detail::get_or_ignore<field::REACT_ERR>(record_buffer),
1107  detail::get_or_ignore<field::COMMENT>(record_buffer),
1108  detail::get_or_ignore<field::OFFSET>(record_buffer));
1109  }
1110  else
1111  {
1112  f.read(stream,
1113  options,
1114  detail::get_or_ignore<field::SEQ>(record_buffer),
1115  detail::get_or_ignore<field::ID>(record_buffer),
1116  detail::get_or_ignore<field::BPP>(record_buffer),
1117  detail::get_or_ignore<field::STRUCTURE>(record_buffer),
1118  detail::get_or_ignore<field::ENERGY>(record_buffer),
1119  detail::get_or_ignore<field::REACT>(record_buffer),
1120  detail::get_or_ignore<field::REACT_ERR>(record_buffer),
1121  detail::get_or_ignore<field::COMMENT>(record_buffer),
1122  detail::get_or_ignore<field::OFFSET>(record_buffer));
1123  }
1124  }, format);
1125  }
1126 
1128  void read_columns()
1129  {
1130  //TODO don't do multiple visits
1131  //TODO create specialised version for concatenated_sequences where we append on the concat
1132  auto & seq_column_buffer = detail::get_or_ignore<field::SEQ>(columns_buffer);
1133  auto & id_column_buffer = detail::get_or_ignore<field::ID>(columns_buffer);
1134  auto & bpp_column_buffer = detail::get_or_ignore<field::BPP>(columns_buffer);
1135  auto & structure_column_buffer = detail::get_or_ignore<field::STRUCTURE>(columns_buffer);
1136  auto & structured_seq_column_buffer = detail::get_or_ignore<field::STRUCTURED_SEQ>(columns_buffer);
1137  auto & energy_column_buffer = detail::get_or_ignore<field::ENERGY>(columns_buffer);
1138  auto & react_column_buffer = detail::get_or_ignore<field::REACT>(columns_buffer);
1139  auto & react_err_column_buffer = detail::get_or_ignore<field::REACT_ERR>(columns_buffer);
1140  auto & comment_column_buffer = detail::get_or_ignore<field::COMMENT>(columns_buffer);
1141  auto & offset_column_buffer = detail::get_or_ignore<field::OFFSET>(columns_buffer);
1142 
1143  // read the remaining records and split into column buffers
1144  for (auto & rec : *this)
1145  {
1146  if constexpr (selected_field_ids::contains(field::SEQ))
1147  seq_column_buffer.push_back(std::move(seqan3::get<field::SEQ>(rec)));
1148  if constexpr (selected_field_ids::contains(field::ID))
1149  id_column_buffer.push_back(std::move(seqan3::get<field::ID>(rec)));
1150  if constexpr (selected_field_ids::contains(field::BPP))
1151  bpp_column_buffer.push_back(std::move(seqan3::get<field::BPP>(rec)));
1152  if constexpr (selected_field_ids::contains(field::STRUCTURE))
1153  structure_column_buffer.push_back(std::move(seqan3::get<field::STRUCTURE>(rec)));
1154  if constexpr (selected_field_ids::contains(field::STRUCTURED_SEQ))
1155  structured_seq_column_buffer.push_back(std::move(seqan3::get<field::STRUCTURED_SEQ>(rec)));
1156  if constexpr (selected_field_ids::contains(field::ENERGY))
1157  energy_column_buffer.push_back(std::move(seqan3::get<field::ENERGY>(rec)));
1158  if constexpr (selected_field_ids::contains(field::REACT))
1159  react_column_buffer.push_back(std::move(seqan3::get<field::REACT>(rec)));
1160  if constexpr (selected_field_ids::contains(field::REACT_ERR))
1161  react_err_column_buffer.push_back(std::move(seqan3::get<field::REACT_ERR>(rec)));
1162  if constexpr (selected_field_ids::contains(field::COMMENT))
1163  comment_column_buffer.push_back(std::move(seqan3::get<field::COMMENT>(rec)));
1164  if constexpr (selected_field_ids::contains(field::OFFSET))
1165  offset_column_buffer.push_back(std::move(seqan3::get<field::OFFSET>(rec)));
1166  }
1167  }
1168 
1170  friend iterator;
1171 };
1172 
1177 template <istream_concept<char> stream_type,
1178  structure_file_input_format_concept file_format,
1179  detail::fields_concept selected_field_ids>
1180 structure_file_in(stream_type && _stream, file_format const &, selected_field_ids const &)
1181  -> structure_file_in<typename structure_file_in<>::traits_type, // actually use the default
1182  selected_field_ids,
1184  std::remove_reference_t<stream_type>>;
1186 
1187 } // namespace seqan3
1188 
1189 // ------------------------------------------------------------------
1190 // std-overloads for the tuple-like interface
1191 // ------------------------------------------------------------------
1192 
1193 namespace std
1194 {
1199  seqan3::detail::fields_concept selected_field_ids,
1200  seqan3::detail::type_list_of_structure_file_input_formats_concept valid_formats,
1202 struct tuple_size<seqan3::structure_file_in<traits_type, selected_field_ids, valid_formats, stream_type>>
1203 {
1205  static constexpr size_t value = selected_field_ids::as_array.size();
1206 };
1207 
1211 template<size_t elem_no,
1213  seqan3::detail::fields_concept selected_field_ids,
1214  seqan3::detail::type_list_of_structure_file_input_formats_concept valid_formats,
1215  seqan3::istream_concept<char> stream_type>
1216 struct tuple_element<elem_no, seqan3::structure_file_in<traits_type, selected_field_ids, valid_formats, stream_type>>
1217  : tuple_element<elem_no, typename seqan3::structure_file_in<traits_type,
1218  selected_field_ids,
1219  valid_formats,
1220  stream_type>::file_as_tuple_type>
1221 {};
1222 
1223 } // namespace std
typename traits_type::template seq_container_container< seq_type > seq_column_type
Column type of field::SEQ (seqan3::concatenated_sequences<seq_type> by default).
Definition: input.hpp:764
A seqan3::cartesian_composition that joins an aminoacid alphabet with a protein structure alphabet...
Definition: structured_aa.hpp:80
Provides seqan3::structure_file_input_format_concept.
void const_iterator
The const iterator type is void, because files are not const-iterable.
Definition: input.hpp:816
Thrown if there is no format that accepts a given file extension.
Definition: exception.hpp:54
structure_file_input_options< typename traits_type::seq_legal_alphabet, selected_field_ids::contains(field::STRUCTURED_SEQ)> options
The options are public and its members can be set directly.
Definition: input.hpp:1045
A class for reading structured sequence files, e.g. Stockholm, Connect, Vienna, ViennaRNA bpp matrix ...
Definition: input.hpp:669
The "sequence", usually a range of nucleotides or amino acids.
The protein structure alphabet of the characters "HGIEBTSCX".
Definition: dssp9.hpp:85
Provides exceptions used in the I/O module.
Sequence and fixed interactions combined in one range.
Energy of a folded sequence, represented by one float number.
typename traits_type::template seq_container< typename traits_type::seq_alphabet > seq_type
The type of the sequence field (default std::vector of seqan3::rna5).
Definition: input.hpp:724
Provides the seqan3::structure_file_format_vienna class.
void const_reference
The const_reference type is void, because files are not const-iterable.
Definition: input.hpp:808
The five letter RNA alphabet of A,C,G,U and the unknown character N.
Definition: rna5.hpp:71
typename traits_type::template energy_container< energy_type > energy_column_type
Column type of field::ENERGY (std::vector<energy_type> by default).
Definition: input.hpp:775
wuss< 51 > wuss51
Alias for the default type wuss51.
Definition: wuss.hpp:285
std::make_signed_t< size_t > difference_type
A signed integer type, usually std::ptrdiff_t.
Definition: input.hpp:812
Comment field of arbitrary content, usually a string.
structure_file_in(filesystem::path const &_file_name, selected_field_ids const &fields_tag=selected_field_ids{})
Construct from filename.
Definition: input.hpp:846
Meta-header for the structure module. It includes all headers from alphabet/structure/.
Provides seqan3::structure_file_input_options.
SeqAn specific customisations in the standard namespace.
Definition: align_result.hpp:221
typename traits_type::template comment_container< typename traits_type::comment_alphabet > comment_type
The type of the comment field (default double).
Definition: input.hpp:745
typename traits_type::offset_type offset_type
The type of the offset field (default size_t).
Definition: input.hpp:747
The requirements a traits_type for seqan3::structure_file_in must meet.
reference front() noexcept
Return the record we are currently at in the file.
Definition: input.hpp:979
::ranges::size size
Alias for ranges::size. Obtains the size of a range whose size can be calculated in constant time...
Definition: ranges:195
The main SeqAn3 namespace.
Definition: aligned_sequence_concept.hpp:58
Concept for input streams.
size_t size_type
An unsigned integer type, usually std::size_t.
Definition: input.hpp:810
typename traits_type::template structured_seq_container_container< structured_seq_type > structured_seq_column_type
Column type of field::STRUCTURED_SEQ (seqan3::concatenated_sequences<structured_seq_type> by default)...
Definition: input.hpp:773
Base pair probability matrix of interactions, usually a matrix of float numbers.
Contains seqan3::rna5, container aliases and string literals.
void read(stream_type &stream, structure_file_input_options< seq_legal_alph_type, structured_seq_combined > const &options, seq_type &seq, id_type &id, bpp_type &bpp, structure_type &structure, energy_type &energy, react_type &react, react_type &react_err, comment_type &comment, offset_type &offset)
Read from the specified stream and back-insert into the given field buffers.
Provides seqan3::concatenated_sequences.
typename traits_type::energy_type energy_type
The type of the energy field (default double).
Definition: input.hpp:740
iterator begin() noexcept
Returns an iterator to current position in the file.
Definition: input.hpp:920
The twenty-seven letter amino acid alphabet.
Definition: aa27.hpp:67
The default traits for seqan3::structure_file_in.
Definition: input.hpp:337
Thrown if there is an unspecified filesystem or stream error while opening, e.g. permission problem...
Definition: exception.hpp:62
A class template that holds a choice of seqan3::field.
Definition: record.hpp:136
Container that stores sequences concatenated internally.
Definition: concatenated_sequences.hpp:117
typename traits_type::template structure_container_container< structure_type > structure_column_type
Column type of field::STRUCTURE (seqan3::concatenated_sequences<structure_type> by default)...
Definition: input.hpp:770
Provides alphabet adaptations for standard char types.
typename traits_type::template structured_seq_container< typename traits_type::template structured_seq_alphabet< typename traits_type::seq_alphabet, typename traits_type::structure_alphabet > > structured_seq_type
The type of the sequence-structure field (default std::vector of structured_rna<rna5, wuss51>).
Definition: input.hpp:738
Reactivity error values given in a vector corresponding to REACT.
Provides the seqan3::record template and the seqan3::field enum.
Sequence (SEQ) relative start position (0-based), unsigned value.
The identifier, usually a string.
The 15 letter RNA alphabet, containing all IUPAC smybols minus the gap.
Definition: rna15.hpp:73
The WUSS structure alphabet of the characters .<>:,-_~;()[]{}AaBbCcDd...
Definition: wuss.hpp:82
stream_type_ stream_type
The type of the underlying stream.
Definition: input.hpp:683
std::remove_cv_t< std::remove_reference_t< t > > remove_cvref_t
Return the input type with const, volatile and references removed [Type metafunction].
Definition: basic.hpp:64
std::ranges::default_sentinel sentinel
The type returned by end().
Definition: input.hpp:818
structure_file_in(stream_type &&_stream, file_format const &format_tag, selected_field_ids const &fields_tag=selected_field_ids{})
Construct from an existing stream and with specified format.
Definition: input.hpp:890
type_list< seq_type, id_type, bpp_type, structure_type, structured_seq_type, energy_type, react_type, react_type, comment_type, offset_type > field_types
The previously defined types aggregated in a seqan3::type_list.
Definition: input.hpp:751
sentinel end() noexcept
Returns a sentinel for comparison with iterator.
Definition: input.hpp:938
Fixed interactions, usually a string of structure alphabet characters.
typename traits_type::template bpp_container_container< bpp_type > bpp_column_type
Column type of field::BPP (std::vector<bpp_type> by default).
Definition: input.hpp:768
typename traits_type::template offset_container< offset_type > offset_column_type
Column type of field::OFFSET (std::vector<offset_type> by default).
Definition: input.hpp:781
A seqan3::cartesian_composition that joins a nucleotide alphabet with an RNA structure alphabet...
Definition: structured_rna.hpp:80
traits_type_ traits_type
A traits type that defines aliases and template for storage of the fields.
Definition: input.hpp:677
Stream concepts.
type_list< seq_column_type, id_column_type, bpp_column_type, structure_column_type, structured_seq_column_type, energy_column_type, react_column_type, react_column_type, comment_column_type, offset_column_type > field_column_types
The previously defined types aggregated in a seqan3::type_list.
Definition: input.hpp:793
typename traits_type::template id_container_container< id_type > id_column_type
Column type of field::ID (seqan3::concatenated_sequences<id_type> by default).
Definition: input.hpp:766
valid_formats_ valid_formats
A seqan3::type_list with the possible formats.
Definition: input.hpp:681
This header includes C++17 filesystem support and imports it into namespace seqan3::filesystem (indep...
::ranges::equal equal
Alias for ranges::equal. Determines if two sets of elements are the same.
Definition: ranges:210
Provides C++20 additions to the type_traits header.
Provides various metafunctions on generic types.
typename traits_type::template react_container_container< react_type > react_column_type
Column type of field::REACT and field::REACT_ERR (std::vector<react_type> by default).
Definition: input.hpp:777
typename traits_type::template bpp_container< typename traits_type::template bpp_queue< typename traits_type::template bpp_item< typename traits_type::bpp_prob, typename traits_type::bpp_partner > >> bpp_type
The type of the base pair probabilies (default std::vector of std::set<std::pair<double, size_t>>).
Definition: input.hpp:731
Meta-header for the aminoacid submodule; includes all headers from alphabet/aminoacid/.
A traits type that specifies input as amino acids.
Definition: input.hpp:414
meta::list< types... > type_list
Type that contains multiple types, an alias for meta::list.
Definition: type_list.hpp:54
field
An enumerator for the fields used in file formats.Some of the fields are shared between formats...
Definition: record.hpp:63
selected_field_ids_ selected_field_ids
A seqan3::fields list with the fields selected for the record.
Definition: input.hpp:679
typename traits_type::template id_container< typename traits_type::id_alphabet > id_type
The type of the ID field (default std::string).
Definition: input.hpp:726
typename traits_type::template comment_container_container< comment_type > comment_column_type
Column type of field::COMMENT (seqan3::concatenated_sequences<comment_type> by default).
Definition: input.hpp:779
Reactivity values of the sequence characters given in a vector of float numbers.
detail::in_file_iterator< structure_file_in > iterator
The iterator type of this view (an input iterator).
Definition: input.hpp:814
Provides the seqan3::detail::in_file_iterator class template.
The generic concept for structure file in formats.
typename traits_type::template react_container< typename traits_type::react_type > react_type
The type of the reactivity and reactivity error fields (default double).
Definition: input.hpp:742
::ranges::default_sentinel default_sentinel
Alias for ranges::default_sentinel. Empty sentinel type for use with iterator types that know the bou...
Definition: ranges:215
The options type defines various option members that influence the behaviour of all or some formats...
Definition: input_options.hpp:54
typename traits_type::template structure_container< typename traits_type::structure_alphabet > structure_type
The type of the structure field (default std::vector of seqan3::wuss51).
Definition: input.hpp:734