52 #include <range/v3/algorithm/equal.hpp> 64 #include <seqan3/io/detail/record.hpp> 225 concept structure_file_input_traits_concept = requires(t v)
229 requires alphabet_concept<typename t::seq_alphabet>;
230 requires alphabet_concept<typename t::seq_legal_alphabet>;
231 requires explicitly_convertible_to_concept<typename t::seq_legal_alphabet, typename t::seq_alphabet>;
232 requires sequence_container_concept<typename t::template seq_container<typename t::seq_alphabet>>;
239 requires alphabet_concept<typename t::id_alphabet>;
240 requires sequence_container_concept<typename t::template id_container<typename t::id_alphabet>>;
247 requires std::is_floating_point_v<typename t::bpp_prob>;
248 requires std::numeric_limits<typename t::bpp_partner>::is_integer;
270 requires std::is_same_v<typename t::structure_alphabet, dssp9>
271 || rna_structure_concept<typename t::structure_alphabet>;
272 requires sequence_container_concept<typename t::template structure_container<typename t::structure_alphabet>>;
279 requires std::is_base_of_v<cartesian_composition
280 <
typename t::template structured_seq_alphabet
281 <
typename t::seq_alphabet,
typename t::structure_alphabet>,
282 typename t::seq_alphabet,
typename t::structure_alphabet>,
283 typename t::template structured_seq_alphabet<typename t::seq_alphabet, typename t::structure_alphabet>>;
295 requires std::is_floating_point_v<typename t::energy_type::value_type>;
296 requires sequence_container_concept<typename t::template energy_container<typename t::energy_type>>;
299 requires std::is_floating_point_v<typename t::react_type>;
300 requires sequence_container_concept<typename t::template react_container<typename t::react_type>>;
307 requires alphabet_concept<typename t::comment_alphabet>;
308 requires sequence_container_concept<typename t::template comment_container<typename t::comment_alphabet>>;
315 requires std::numeric_limits<typename t::offset_type>::is_integer;
316 requires sequence_container_concept<typename t::template offset_container<typename t::offset_type>>;
347 template<
typename _seq_alphabet>
348 using seq_container = std::vector<_seq_alphabet>;
349 template<
typename _seq_container>
353 using id_alphabet = char;
354 template<
typename _
id_alphabet>
355 using id_container = std::basic_string<_id_alphabet>;
356 template<
typename _
id_container>
360 using bpp_prob = double;
361 using bpp_partner = size_t;
362 template<
typename _bpp_prec,
typename _bpp_partner>
363 using bpp_item = std::pair<_bpp_prec, _bpp_partner>;
364 template<
typename _bpp_item>
365 using bpp_queue = std::set<_bpp_item>;
366 template<
typename _bpp_queue>
367 using bpp_container = std::vector<_bpp_queue>;
368 template<
typename _bpp_container>
369 using bpp_container_container = std::vector<_bpp_container>;
373 template<
typename _structure_alphabet>
374 using structure_container = std::vector<_structure_alphabet>;
375 template<
typename _structure_container>
379 template<
typename _seq_alphabet,
typename _structure_alphabet>
381 template<
typename _structured_seq_alphabet>
382 using structured_seq_container = std::vector<_structured_seq_alphabet>;
383 template<
typename _structured_seq_container>
387 using energy_type = std::optional<double>;
388 template<
typename _energy_type>
389 using energy_container = std::vector<_energy_type>;
392 using react_type = double;
393 template<
typename _react_type>
394 using react_container = std::vector<_react_type>;
395 template<
typename _react_container>
396 using react_container_container = std::vector<_react_container>;
399 using comment_alphabet = char;
400 template<
typename _comment_alphabet>
401 using comment_container = std::basic_string<_comment_alphabet>;
402 template<
typename _comment_container>
406 using offset_type = size_t;
407 template<
typename _offset_type>
408 using offset_container = std::vector<_offset_type>;
423 template<
typename _seq_alphabet,
typename _structure_alphabet>
666 detail::type_list_of_structure_file_input_formats_concept valid_formats_
700 static_assert([]() constexpr
702 for (
field f : selected_field_ids::as_array)
703 if (!field_ids::contains(f))
707 "You selected a field that is not valid for structure files, please refer to the documentation " 708 "of structure_file_in::field_ids for the accepted values.");
710 static_assert([]() constexpr
712 return !(selected_field_ids::contains(field::STRUCTURED_SEQ) &&
713 (selected_field_ids::contains(field::SEQ) ||
714 (selected_field_ids::contains(field::STRUCTURE))));
715 }(),
"You may not select field::STRUCTURED_SEQ and either of field::SEQ and field::STRUCTURE " 716 "at the same time.");
723 using seq_type =
typename traits_type::template seq_container<typename traits_type::seq_alphabet>;
726 using id_type =
typename traits_type::template id_container<typename traits_type::id_alphabet>;
728 using bpp_type =
typename traits_type::template bpp_container
729 <
typename traits_type::template bpp_queue
730 <
typename traits_type::template bpp_item
731 <
typename traits_type::bpp_prob,
typename traits_type::bpp_partner>>>;
733 using structure_type =
typename traits_type::template structure_container
734 <
typename traits_type::structure_alphabet>;
737 <
typename traits_type::template structured_seq_alphabet
738 <
typename traits_type::seq_alphabet,
typename traits_type::structure_alphabet>>;
742 using react_type =
typename traits_type::template react_container<typename traits_type::react_type>;
744 using comment_type =
typename traits_type::template comment_container
745 <
typename traits_type::comment_alphabet>;
763 using seq_column_type =
typename traits_type::template seq_container_container<seq_type>;
766 using id_column_type =
typename traits_type::template id_container_container<id_type>;
768 using bpp_column_type =
typename traits_type::template bpp_container_container<bpp_type>;
773 <structured_seq_type>;
814 using iterator = detail::in_file_iterator<structure_file_in>;
847 selected_field_ids
const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{})
850 stream.open(_file_name, std::ios_base::in | std::ios::binary);
851 if (!stream.is_open())
855 bool format_found =
false;
856 std::string extension = _file_name.extension().string();
857 if (extension.size() > 1)
859 extension = extension.substr(1);
864 for (
auto const & ext : fmt_type::file_extensions)
889 template<structure_file_input_format_concept file_format>
891 file_format
const & SEQAN3_DOXYGEN_ONLY(format_tag),
892 selected_field_ids
const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{}) :
893 stream{std::move(_stream)}, format{file_format{}}
895 static_assert(meta::in<valid_formats, file_format>::value,
896 "You selected a format that is not in the valid_formats of this file.");
981 return record_buffer;
993 static_assert(structure_file_in::selected_field_ids::contains(f),
994 "You requested a field via get that was not selected for the file.");
998 return seqan3::get<f>(file.columns_buffer);
1005 return std::move(get<f>(file));
1012 static_assert(i < structure_file_in::selected_field_ids::as_array.
size(),
1013 "You requested a field number larger than the number of selected fields for the file.");
1014 file.read_columns();
1016 return std::get<i>(file.columns_buffer);
1023 return std::move(get<i>(file));
1027 template<
typename t>
1030 file.read_columns();
1032 return std::get<t>(file.columns_buffer);
1036 template<
typename t>
1039 return std::move(get<t>(file));
1045 selected_field_ids::contains(field::STRUCTURED_SEQ)>
options;
1059 std::string file_name;
1068 using format_type = detail::transfer_template_args_onto_t<valid_formats, std::variant>;
1073 void read_next_record()
1079 record_buffer.clear();
1088 assert(!format.valueless_by_exception());
1092 if constexpr (selected_field_ids::contains(field::STRUCTURED_SEQ))
1094 static_assert(!selected_field_ids::contains(field::STRUCTURE),
1095 "You may not select field::STRUCTURED_SEQ and field::STRUCTURE at the same time.");
1096 static_assert(!selected_field_ids::contains(field::SEQ),
1097 "You may not select field::STRUCTURED_SEQ and field::SEQ at the same time.");
1100 detail::get_or_ignore<field::STRUCTURED_SEQ>(record_buffer),
1101 detail::get_or_ignore<field::ID>(record_buffer),
1102 detail::get_or_ignore<field::BPP>(record_buffer),
1103 detail::get_or_ignore<field::STRUCTURED_SEQ>(record_buffer),
1104 detail::get_or_ignore<field::ENERGY>(record_buffer),
1105 detail::get_or_ignore<field::REACT>(record_buffer),
1106 detail::get_or_ignore<field::REACT_ERR>(record_buffer),
1107 detail::get_or_ignore<field::COMMENT>(record_buffer),
1108 detail::get_or_ignore<field::OFFSET>(record_buffer));
1114 detail::get_or_ignore<field::SEQ>(record_buffer),
1115 detail::get_or_ignore<field::ID>(record_buffer),
1116 detail::get_or_ignore<field::BPP>(record_buffer),
1117 detail::get_or_ignore<field::STRUCTURE>(record_buffer),
1118 detail::get_or_ignore<field::ENERGY>(record_buffer),
1119 detail::get_or_ignore<field::REACT>(record_buffer),
1120 detail::get_or_ignore<field::REACT_ERR>(record_buffer),
1121 detail::get_or_ignore<field::COMMENT>(record_buffer),
1122 detail::get_or_ignore<field::OFFSET>(record_buffer));
1132 auto & seq_column_buffer = detail::get_or_ignore<field::SEQ>(columns_buffer);
1133 auto & id_column_buffer = detail::get_or_ignore<field::ID>(columns_buffer);
1134 auto & bpp_column_buffer = detail::get_or_ignore<field::BPP>(columns_buffer);
1135 auto & structure_column_buffer = detail::get_or_ignore<field::STRUCTURE>(columns_buffer);
1136 auto & structured_seq_column_buffer = detail::get_or_ignore<field::STRUCTURED_SEQ>(columns_buffer);
1137 auto & energy_column_buffer = detail::get_or_ignore<field::ENERGY>(columns_buffer);
1138 auto & react_column_buffer = detail::get_or_ignore<field::REACT>(columns_buffer);
1139 auto & react_err_column_buffer = detail::get_or_ignore<field::REACT_ERR>(columns_buffer);
1140 auto & comment_column_buffer = detail::get_or_ignore<field::COMMENT>(columns_buffer);
1141 auto & offset_column_buffer = detail::get_or_ignore<field::OFFSET>(columns_buffer);
1144 for (
auto & rec : *
this)
1146 if constexpr (selected_field_ids::contains(field::SEQ))
1147 seq_column_buffer.push_back(std::move(seqan3::get<field::SEQ>(rec)));
1148 if constexpr (selected_field_ids::contains(field::ID))
1149 id_column_buffer.push_back(std::move(seqan3::get<field::ID>(rec)));
1150 if constexpr (selected_field_ids::contains(field::BPP))
1151 bpp_column_buffer.push_back(std::move(seqan3::get<field::BPP>(rec)));
1152 if constexpr (selected_field_ids::contains(field::STRUCTURE))
1153 structure_column_buffer.push_back(std::move(seqan3::get<field::STRUCTURE>(rec)));
1154 if constexpr (selected_field_ids::contains(field::STRUCTURED_SEQ))
1155 structured_seq_column_buffer.push_back(std::move(seqan3::get<field::STRUCTURED_SEQ>(rec)));
1156 if constexpr (selected_field_ids::contains(field::ENERGY))
1157 energy_column_buffer.push_back(std::move(seqan3::get<field::ENERGY>(rec)));
1158 if constexpr (selected_field_ids::contains(field::REACT))
1159 react_column_buffer.push_back(std::move(seqan3::get<field::REACT>(rec)));
1160 if constexpr (selected_field_ids::contains(field::REACT_ERR))
1161 react_err_column_buffer.push_back(std::move(seqan3::get<field::REACT_ERR>(rec)));
1162 if constexpr (selected_field_ids::contains(field::COMMENT))
1163 comment_column_buffer.push_back(std::move(seqan3::get<field::COMMENT>(rec)));
1165 offset_column_buffer.push_back(std::move(seqan3::get<field::OFFSET>(rec)));
1177 template <istream_concept<
char> stream_type,
1178 structure_file_input_format_concept file_format,
1179 detail::fields_concept selected_field_
ids>
1184 std::remove_reference_t<stream_type>>;
1199 seqan3::detail::fields_concept selected_field_ids,
1200 seqan3::detail::type_list_of_structure_file_input_formats_concept
valid_formats,
1202 struct tuple_size<
seqan3::structure_file_in<traits_type, selected_field_ids, valid_formats, stream_type>>
1205 static constexpr
size_t value = selected_field_ids::as_array.
size();
1211 template<
size_t elem_no,
1213 seqan3::detail::fields_concept selected_field_ids,
1214 seqan3::detail::type_list_of_structure_file_input_formats_concept valid_formats,
1216 struct tuple_element<elem_no,
seqan3::structure_file_in<traits_type, selected_field_ids, valid_formats, stream_type>>
1217 : tuple_element<elem_no, typename seqan3::structure_file_in<traits_type,
1220 stream_type>::file_as_tuple_type>
typename traits_type::template seq_container_container< seq_type > seq_column_type
Column type of field::SEQ (seqan3::concatenated_sequences<seq_type> by default).
Definition: input.hpp:764
A seqan3::cartesian_composition that joins an aminoacid alphabet with a protein structure alphabet...
Definition: structured_aa.hpp:80
void const_iterator
The const iterator type is void, because files are not const-iterable.
Definition: input.hpp:816
Thrown if there is no format that accepts a given file extension.
Definition: exception.hpp:54
structure_file_input_options< typename traits_type::seq_legal_alphabet, selected_field_ids::contains(field::STRUCTURED_SEQ)> options
The options are public and its members can be set directly.
Definition: input.hpp:1045
A class for reading structured sequence files, e.g. Stockholm, Connect, Vienna, ViennaRNA bpp matrix ...
Definition: input.hpp:669
The "sequence", usually a range of nucleotides or amino acids.
The protein structure alphabet of the characters "HGIEBTSCX".
Definition: dssp9.hpp:85
Provides exceptions used in the I/O module.
Sequence and fixed interactions combined in one range.
Energy of a folded sequence, represented by one float number.
typename traits_type::template seq_container< typename traits_type::seq_alphabet > seq_type
The type of the sequence field (default std::vector of seqan3::rna5).
Definition: input.hpp:724
void const_reference
The const_reference type is void, because files are not const-iterable.
Definition: input.hpp:808
The five letter RNA alphabet of A,C,G,U and the unknown character N.
Definition: rna5.hpp:71
typename traits_type::template energy_container< energy_type > energy_column_type
Column type of field::ENERGY (std::vector<energy_type> by default).
Definition: input.hpp:775
wuss< 51 > wuss51
Alias for the default type wuss51.
Definition: wuss.hpp:285
std::make_signed_t< size_t > difference_type
A signed integer type, usually std::ptrdiff_t.
Definition: input.hpp:812
Comment field of arbitrary content, usually a string.
structure_file_in(filesystem::path const &_file_name, selected_field_ids const &fields_tag=selected_field_ids{})
Construct from filename.
Definition: input.hpp:846
Meta-header for the structure module. It includes all headers from alphabet/structure/.
SeqAn specific customisations in the standard namespace.
Definition: align_result.hpp:221
typename traits_type::template comment_container< typename traits_type::comment_alphabet > comment_type
The type of the comment field (default double).
Definition: input.hpp:745
typename traits_type::offset_type offset_type
The type of the offset field (default size_t).
Definition: input.hpp:747
reference front() noexcept
Return the record we are currently at in the file.
Definition: input.hpp:979
::ranges::size size
Alias for ranges::size. Obtains the size of a range whose size can be calculated in constant time...
Definition: ranges:195
The main SeqAn3 namespace.
Definition: aligned_sequence_concept.hpp:58
Concept for input streams.
size_t size_type
An unsigned integer type, usually std::size_t.
Definition: input.hpp:810
typename traits_type::template structured_seq_container_container< structured_seq_type > structured_seq_column_type
Column type of field::STRUCTURED_SEQ (seqan3::concatenated_sequences<structured_seq_type> by default)...
Definition: input.hpp:773
Base pair probability matrix of interactions, usually a matrix of float numbers.
Contains seqan3::rna5, container aliases and string literals.
Provides seqan3::concatenated_sequences.
typename traits_type::energy_type energy_type
The type of the energy field (default double).
Definition: input.hpp:740
iterator begin() noexcept
Returns an iterator to current position in the file.
Definition: input.hpp:920
The twenty-seven letter amino acid alphabet.
Definition: aa27.hpp:67
Thrown if there is an unspecified filesystem or stream error while opening, e.g. permission problem...
Definition: exception.hpp:62
A class template that holds a choice of seqan3::field.
Definition: record.hpp:136
Container that stores sequences concatenated internally.
Definition: concatenated_sequences.hpp:117
typename traits_type::template structure_container_container< structure_type > structure_column_type
Column type of field::STRUCTURE (seqan3::concatenated_sequences<structure_type> by default)...
Definition: input.hpp:770
Provides alphabet adaptations for standard char types.
typename traits_type::template structured_seq_container< typename traits_type::template structured_seq_alphabet< typename traits_type::seq_alphabet, typename traits_type::structure_alphabet > > structured_seq_type
The type of the sequence-structure field (default std::vector of structured_rna<rna5, wuss51>).
Definition: input.hpp:738
Reactivity error values given in a vector corresponding to REACT.
Provides the seqan3::record template and the seqan3::field enum.
Sequence (SEQ) relative start position (0-based), unsigned value.
The identifier, usually a string.
The 15 letter RNA alphabet, containing all IUPAC smybols minus the gap.
Definition: rna15.hpp:73
The WUSS structure alphabet of the characters .<>:,-_~;()[]{}AaBbCcDd...
Definition: wuss.hpp:82
stream_type_ stream_type
The type of the underlying stream.
Definition: input.hpp:683
std::ranges::default_sentinel sentinel
The type returned by end().
Definition: input.hpp:818
structure_file_in(stream_type &&_stream, file_format const &format_tag, selected_field_ids const &fields_tag=selected_field_ids{})
Construct from an existing stream and with specified format.
Definition: input.hpp:890
type_list< seq_type, id_type, bpp_type, structure_type, structured_seq_type, energy_type, react_type, react_type, comment_type, offset_type > field_types
The previously defined types aggregated in a seqan3::type_list.
Definition: input.hpp:751
sentinel end() noexcept
Returns a sentinel for comparison with iterator.
Definition: input.hpp:938
Fixed interactions, usually a string of structure alphabet characters.
typename traits_type::template bpp_container_container< bpp_type > bpp_column_type
Column type of field::BPP (std::vector<bpp_type> by default).
Definition: input.hpp:768
typename traits_type::template offset_container< offset_type > offset_column_type
Column type of field::OFFSET (std::vector<offset_type> by default).
Definition: input.hpp:781
A seqan3::cartesian_composition that joins a nucleotide alphabet with an RNA structure alphabet...
Definition: structured_rna.hpp:80
traits_type_ traits_type
A traits type that defines aliases and template for storage of the fields.
Definition: input.hpp:677
type_list< seq_column_type, id_column_type, bpp_column_type, structure_column_type, structured_seq_column_type, energy_column_type, react_column_type, react_column_type, comment_column_type, offset_column_type > field_column_types
The previously defined types aggregated in a seqan3::type_list.
Definition: input.hpp:793
typename traits_type::template id_container_container< id_type > id_column_type
Column type of field::ID (seqan3::concatenated_sequences<id_type> by default).
Definition: input.hpp:766
valid_formats_ valid_formats
A seqan3::type_list with the possible formats.
Definition: input.hpp:681
This header includes C++17 filesystem support and imports it into namespace seqan3::filesystem (indep...
::ranges::equal equal
Alias for ranges::equal. Determines if two sets of elements are the same.
Definition: ranges:210
Provides C++20 additions to the type_traits header.
Provides various metafunctions on generic types.
typename traits_type::template react_container_container< react_type > react_column_type
Column type of field::REACT and field::REACT_ERR (std::vector<react_type> by default).
Definition: input.hpp:777
typename traits_type::template bpp_container< typename traits_type::template bpp_queue< typename traits_type::template bpp_item< typename traits_type::bpp_prob, typename traits_type::bpp_partner > >> bpp_type
The type of the base pair probabilies (default std::vector of std::set<std::pair<double, size_t>>).
Definition: input.hpp:731
Meta-header for the aminoacid submodule; includes all headers from alphabet/aminoacid/.
meta::list< types... > type_list
Type that contains multiple types, an alias for meta::list.
Definition: type_list.hpp:54
field
An enumerator for the fields used in file formats.Some of the fields are shared between formats...
Definition: record.hpp:63
selected_field_ids_ selected_field_ids
A seqan3::fields list with the fields selected for the record.
Definition: input.hpp:679
typename traits_type::template id_container< typename traits_type::id_alphabet > id_type
The type of the ID field (default std::string).
Definition: input.hpp:726
typename traits_type::template comment_container_container< comment_type > comment_column_type
Column type of field::COMMENT (seqan3::concatenated_sequences<comment_type> by default).
Definition: input.hpp:779
Reactivity values of the sequence characters given in a vector of float numbers.
detail::in_file_iterator< structure_file_in > iterator
The iterator type of this view (an input iterator).
Definition: input.hpp:814
Provides the seqan3::detail::in_file_iterator class template.
typename traits_type::template react_container< typename traits_type::react_type > react_type
The type of the reactivity and reactivity error fields (default double).
Definition: input.hpp:742
::ranges::default_sentinel default_sentinel
Alias for ranges::default_sentinel. Empty sentinel type for use with iterator types that know the bou...
Definition: ranges:215
typename traits_type::template structure_container< typename traits_type::structure_alphabet > structure_type
The type of the structure field (default std::vector of seqan3::wuss51).
Definition: input.hpp:734