48 #include <range/v3/algorithm/equal.hpp> 61 #include <seqan3/io/detail/record.hpp> 144 template <
typename t>
147 concept sequence_file_input_traits_concept = requires (t v)
149 requires alphabet_concept<typename t::sequence_alphabet>;
150 requires alphabet_concept<typename t::sequence_legal_alphabet>;
151 requires explicitly_convertible_to_concept<typename t::sequence_legal_alphabet, typename t::sequence_alphabet>;
152 requires sequence_container_concept<typename t::template sequence_container<typename t::sequence_alphabet>>;
153 requires sequence_container_concept<
typename t::template sequence_container_container<
154 typename t::template sequence_container<typename t::sequence_alphabet>>>;
156 requires alphabet_concept<typename t::id_alphabet>;
157 requires sequence_container_concept<typename t::template id_container<typename t::id_alphabet>>;
158 requires sequence_container_concept<
typename t::template id_container_container<
typename t::template id_container<
159 typename t::id_alphabet>>>;
161 requires quality_concept<typename t::quality_alphabet>;
162 requires sequence_container_concept<typename t::template quality_container<typename t::quality_alphabet>>;
163 requires sequence_container_concept<
typename t::template quality_container_container<
164 typename t::template quality_container<typename t::quality_alphabet>>>;
205 template <
typename _sequence_alphabet>
206 using sequence_container = std::vector<_sequence_alphabet>;
207 template <
typename _sequence_container>
210 using id_alphabet = char;
211 template <
typename _
id_alphabet>
212 using id_container = std::basic_string<_id_alphabet>;
213 template <
typename _
id_container>
217 template <
typename _quality_alphabet>
218 using quality_container = std::vector<_quality_alphabet>;
219 template <
typename _quality_container>
493 static_assert([] () constexpr
495 for (
field f : selected_field_ids::as_array)
496 if (!field_ids::contains(f))
500 "You selected a field that is not valid for sequence files, please refer to the documentation " 501 "of sequence_file_input::field_ids for the accepted values.");
503 static_assert([] () constexpr
506 (selected_field_ids::contains(field::SEQ) ||
507 (selected_field_ids::contains(field::QUAL))));
509 "You may not select field::SEQ_QUAL and either of field::SEQ and field::QUAL at the same time.");
516 using sequence_type =
typename traits_type::template sequence_container<
518 typename traits_type::sequence_alphabet>;
520 using id_type =
typename traits_type::template id_container<
521 typename traits_type::id_alphabet>;
523 using quality_type =
typename traits_type::template quality_container<
524 typename traits_type::quality_alphabet>;
527 template sequence_container<
qualified<
typename traits_type::sequence_alphabet,
528 typename traits_type::quality_alphabet>>;
543 using sequence_column_type =
typename traits_type::template sequence_container_container<sequence_type>;
546 using id_column_type =
typename traits_type::template id_container_container<id_type>;
578 using iterator = detail::in_file_iterator<sequence_file_input>;
611 selected_field_ids
const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{})
614 stream.open(_file_name, std::ios_base::in | std::ios::binary);
615 if (!stream.is_open())
619 bool format_found =
false;
620 std::string extension = _file_name.extension().string();
621 if (extension.size() > 1)
623 extension = extension.substr(1);
628 for (
auto const & ext : fmt_type::file_extensions)
658 template <sequence_file_input_format_concept file_format>
660 file_format
const & SEQAN3_DOXYGEN_ONLY(format_tag),
661 selected_field_ids
const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{}) :
662 stream{std::move(_stream)}, format{file_format{}}
664 static_assert(meta::in<valid_formats, file_format>::value,
665 "You selected a format that is not in the valid_formats of this file.");
750 return record_buffer;
762 static_assert(sequence_file_input::selected_field_ids::contains(f),
763 "You requested a field via get that was not selected for the file.");
767 return seqan3::get<f>(file.columns_buffer);
774 return std::move(get<f>(file));
781 static_assert(i < sequence_file_input::selected_field_ids::as_array.
size(),
782 "You requested a field number larger than the number of selected fields for the file.");
785 return std::get<i>(file.columns_buffer);
792 return std::move(get<i>(file));
796 template <
typename t>
801 return std::get<t>(file.columns_buffer);
805 template <
typename t>
808 return std::move(get<t>(file));
828 std::string file_name;
837 using format_type = detail::transfer_template_args_onto_t<valid_formats, std::variant>;
842 void read_next_record()
848 record_buffer.clear();
857 assert(!format.valueless_by_exception());
865 detail::get_or_ignore<field::SEQ_QUAL>(record_buffer),
866 detail::get_or_ignore<field::ID>(record_buffer),
867 detail::get_or_ignore<field::SEQ_QUAL>(record_buffer));
873 detail::get_or_ignore<field::SEQ>(record_buffer),
874 detail::get_or_ignore<field::ID>(record_buffer),
875 detail::get_or_ignore<field::QUAL>(record_buffer));
885 auto & sequence_column_buffer = detail::get_or_ignore<field::SEQ>(columns_buffer);
886 auto & id_column_buffer = detail::get_or_ignore<field::ID>(columns_buffer);
887 auto & qual_column_buffer = detail::get_or_ignore<field::QUAL>(columns_buffer);
888 auto & seq_qual_column_buffer = detail::get_or_ignore<field::SEQ_QUAL>(columns_buffer);
891 for (
auto & rec : *
this)
893 if constexpr (selected_field_ids::contains(field::SEQ))
894 sequence_column_buffer.push_back(std::move(seqan3::get<field::SEQ>(rec)));
895 if constexpr (selected_field_ids::contains(field::ID))
896 id_column_buffer.push_back(std::move(seqan3::get<field::ID>(rec)));
897 if constexpr (selected_field_ids::contains(field::QUAL))
898 qual_column_buffer.push_back(std::move(seqan3::get<field::QUAL>(rec)));
900 seq_qual_column_buffer.push_back(std::move(seqan3::get<field::SEQ_QUAL>(rec)));
912 template <istream_concept<
char> stream_type,
913 sequence_file_input_format_concept file_format,
914 detail::fields_concept selected_field_
ids>
919 std::remove_reference_t<stream_type>>;
932 seqan3::detail::fields_concept selected_field_ids,
933 seqan3::detail::type_list_of_sequence_file_input_formats_concept
valid_formats,
935 struct tuple_size<
seqan3::sequence_file_input<traits_type, selected_field_ids, valid_formats, stream_type>>
938 static constexpr
size_t value = selected_field_ids::as_array.
size();
942 template <
size_t elem_no,
944 seqan3::detail::fields_concept selected_field_ids,
945 seqan3::detail::type_list_of_sequence_file_input_formats_concept valid_formats,
947 struct tuple_element<elem_no,
seqan3::sequence_file_input<traits_type, selected_field_ids, valid_formats, stream_type>>
948 : tuple_element<elem_no, typename seqan3::sequence_file_input<traits_type,
951 stream_type>::file_as_tuple_type>
Contains quality alphabet compositions.
Thrown if there is no format that accepts a given file extension.
Definition: exception.hpp:54
The "sequence", usually a range of nucleotides or amino acids.
Provides exceptions used in the I/O module.
Contains seqan3::aa27, container aliases and string literals.
SeqAn specific customisations in the standard namespace.
Definition: align_result.hpp:221
The 15 letter DNA alphabet, containing all IUPAC smybols minus the gap.
Definition: dna15.hpp:73
::ranges::size size
Alias for ranges::size. Obtains the size of a range whose size can be calculated in constant time...
Definition: ranges:195
The main SeqAn3 namespace.
Definition: aligned_sequence_concept.hpp:58
Concept for input streams.
The qualities, usually in phred-score notation.
Joins an arbitrary alphabet with a quality alphabet.
Definition: qualified.hpp:85
Provides seqan3::concatenated_sequences.
The twenty-seven letter amino acid alphabet.
Definition: aa27.hpp:67
Thrown if there is an unspecified filesystem or stream error while opening, e.g. permission problem...
Definition: exception.hpp:62
A class template that holds a choice of seqan3::field.
Definition: record.hpp:136
Container that stores sequences concatenated internally.
Definition: concatenated_sequences.hpp:117
The five letter DNA alphabet of A,C,G,T and the unknown character N.
Definition: dna5.hpp:73
Sequence and qualities combined in one range.
Provides alphabet adaptations for standard char types.
Provides the seqan3::record template and the seqan3::field enum.
The identifier, usually a string.
Contains seqan3::phred42 quality scores.
This header includes C++17 filesystem support and imports it into namespace seqan3::filesystem (indep...
::ranges::equal equal
Alias for ranges::equal. Determines if two sets of elements are the same.
Definition: ranges:210
Provides various metafunctions on generic types.
Meta-header for the nucleotide submodule; includes all headers from alphabet/nucleotide/.
meta::list< types... > type_list
Type that contains multiple types, an alias for meta::list.
Definition: type_list.hpp:54
field
An enumerator for the fields used in file formats.Some of the fields are shared between formats...
Definition: record.hpp:63
Quality type for traditional Sanger and modern Illumina Phred scores (typical range).
Definition: phred42.hpp:69
Provides the seqan3::detail::in_file_iterator class template.
::ranges::default_sentinel default_sentinel
Alias for ranges::default_sentinel. Empty sentinel type for use with iterator types that know the bou...
Definition: ranges:215