SeqAn3
output.hpp
Go to the documentation of this file.
1 // ============================================================================
2 // SeqAn - The Library for Sequence Analysis
3 // ============================================================================
4 //
5 // Copyright (c) 2006-2018, Knut Reinert & Freie Universitaet Berlin
6 // Copyright (c) 2016-2018, Knut Reinert & MPI Molekulare Genetik
7 // All rights reserved.
8 //
9 // Redistribution and use in source and binary forms, with or without
10 // modification, are permitted provided that the following conditions are met:
11 //
12 // * Redistributions of source code must retain the above copyright
13 // notice, this list of conditions and the following disclaimer.
14 // * Redistributions in binary form must reproduce the above copyright
15 // notice, this list of conditions and the following disclaimer in the
16 // documentation and/or other materials provided with the distribution.
17 // * Neither the name of Knut Reinert or the FU Berlin nor the names of
18 // its contributors may be used to endorse or promote products derived
19 // from this software without specific prior written permission.
20 //
21 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 // ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE
25 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
27 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 // OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
31 // DAMAGE.
32 //
33 // ============================================================================
34 
40 #pragma once
41 
42 #include <cassert>
43 #include <fstream>
44 #include <string>
45 #include <variant>
46 #include <vector>
47 
48 #include <range/v3/algorithm/equal.hpp>
49 #include <range/v3/view/zip.hpp>
50 
55 #include <seqan3/io/exception.hpp>
56 #include <seqan3/io/filesystem.hpp>
57 #include <seqan3/io/record.hpp>
59 #include <seqan3/io/detail/record.hpp>
65 #include <seqan3/std/ranges>
66 
67 namespace seqan3
68 {
69 
70 // ----------------------------------------------------------------------------
71 // sequence_file_output
72 // ----------------------------------------------------------------------------
73 
267 template <detail::fields_concept selected_field_ids_ = fields<field::SEQ, field::ID, field::QUAL>,
268  detail::type_list_of_sequence_file_output_formats_concept valid_formats_ =
269  type_list<sequence_file_format_fasta, sequence_file_format_fastq>,
270  ostream_concept<char> stream_type_ = std::ofstream>
272 {
273 public:
278  using selected_field_ids = selected_field_ids_;
281  using valid_formats = valid_formats_;
283  using stream_type = stream_type_;
285 
288 
289  static_assert([] () constexpr
290  {
291  for (field f : selected_field_ids::as_array)
292  if (!field_ids::contains(f))
293  return false;
294  return true;
295  }(),
296  "You selected a field that is not valid for sequence files, please refer to the documentation "
297  "of sequence_file_output::field_ids for the accepted values.");
298 
299  static_assert([] () constexpr
300  {
301  return !(selected_field_ids::contains(field::SEQ_QUAL) &&
302  (selected_field_ids::contains(field::SEQ) ||
303  (selected_field_ids::contains(field::QUAL))));
304  }(),
305  "You may not select field::SEQ_QUAL and either of field::SEQ and field::QUAL at the same time.");
306 
311  using value_type = void;
312  using reference = void;
313  using const_reference = void;
314  using size_type = void;
316  using difference_type = std::ptrdiff_t;
318  using iterator = detail::out_file_iterator<sequence_file_output>;
320  using const_iterator = void;
324 
328  sequence_file_output() = delete;
331  sequence_file_output(sequence_file_output const &) = delete;
339  ~sequence_file_output() = default;
340 
350  sequence_file_output(filesystem::path const & _file_name,
351  selected_field_ids const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{})
352  {
353  // open stream
354  stream.open(_file_name, std::ios_base::out | std::ios::binary);
355  if (!stream.is_open())
356  throw file_open_error{"Could not open file for writing."};
357 
358  // initialise format handler or throw if format is not found
359  detail::set_format(format, _file_name);
360  }
361 
368  template <sequence_file_output_format_concept file_format>
370  file_format const & SEQAN3_DOXYGEN_ONLY(format_tag),
371  selected_field_ids const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{}) :
372  stream{std::move(_stream)}, format{file_format{}}
373  {
374  static_assert(meta::in<valid_formats, file_format>::value,
375  "You selected a format that is not in the valid_formats of this file.");
376  }
378 
417  iterator begin() noexcept
418  {
419  return {*this};
420  }
421 
436  sentinel end() noexcept
437  {
438  return {};
439  }
440 
472  template <typename record_t>
473  void push_back(record_t && r)
474  requires tuple_like_concept<record_t> &&
475  requires { requires detail::is_type_specialisation_of_v<remove_cvref_t<record_t>, record>; }
476  {
477  write_record(detail::get_or_ignore<field::SEQ>(r),
478  detail::get_or_ignore<field::ID>(r),
479  detail::get_or_ignore<field::QUAL>(r),
480  detail::get_or_ignore<field::SEQ_QUAL>(r));
481 
482  }
483 
519  template <typename tuple_t>
520  void push_back(tuple_t && t)
522  {
523  // index_of might return npos, but this will be handled well by get_or_ignore (and just return ignore)
524  write_record(detail::get_or_ignore<selected_field_ids::index_of(field::SEQ)>(t),
525  detail::get_or_ignore<selected_field_ids::index_of(field::ID)>(t),
526  detail::get_or_ignore<selected_field_ids::index_of(field::QUAL)>(t),
527  detail::get_or_ignore<selected_field_ids::index_of(field::SEQ_QUAL)>(t));
528  }
529 
567  template <typename arg_t, typename ... arg_types>
568  void emplace_back(arg_t && arg, arg_types && ... args)
569  {
570  push_back(std::tie(arg, args...));
571  }
572 
605  template <std::ranges::InputRange rng_t>
606  sequence_file_output & operator=(rng_t && range)
608  {
609  for (auto && record : range)
610  push_back(std::forward<decltype(record)>(record));
611  return *this;
612  }
613 
659  template <std::ranges::InputRange rng_t>
662  {
663  f = range;
664  return f;
665  }
666 
668  template <std::ranges::InputRange rng_t>
671  {
672  f = range;
673  return std::move(f);
674  }
676 
718  template <typename typelist, typename field_ids>
720  {
721  write_columns(detail::range_wrap_ignore(detail::get_or_ignore<field::SEQ>(r)),
722  detail::range_wrap_ignore(detail::get_or_ignore<field::ID>(r)),
723  detail::range_wrap_ignore(detail::get_or_ignore<field::QUAL>(r)),
724  detail::range_wrap_ignore(detail::get_or_ignore<field::SEQ_QUAL>(r)));
725  return *this;
726  }
727 
764  template <typename ... arg_types>
765  sequence_file_output & operator=(std::tuple<arg_types...> const & t)
766  {
767  // index_of might return npos, but this will be handled well by get_or_ignore (and just return ignore)
768  write_columns(
769  detail::range_wrap_ignore(detail::get_or_ignore<selected_field_ids::index_of(field::SEQ)>(t)),
770  detail::range_wrap_ignore(detail::get_or_ignore<selected_field_ids::index_of(field::ID)>(t)),
771  detail::range_wrap_ignore(detail::get_or_ignore<selected_field_ids::index_of(field::QUAL)>(t)),
772  detail::range_wrap_ignore(detail::get_or_ignore<selected_field_ids::index_of(field::SEQ_QUAL)>(t)));
773  return *this;
774  }
776 
779 
783  stream_type & get_stream()
784  {
785  return stream;
786  }
788 protected:
791  std::string file_name;
792 
794  stream_type stream;
795 
797  using format_type = detail::transfer_template_args_onto_t<valid_formats, std::variant>;
799  format_type format;
800 
802  template <typename seq_t, typename id_t, typename qual_t, typename seq_qual_t>
803  void write_record(seq_t && seq, id_t && id, qual_t && qual, seq_qual_t && seq_qual)
804  {
805  static_assert(detail::decays_to_ignore_v<seq_qual_t> ||
806  (detail::decays_to_ignore_v<seq_t> && detail::decays_to_ignore_v<qual_t>),
807  "You may not select field::SEQ_QUAL and either of field::SEQ and field::QUAL at the same time.");
808 
809  assert(!format.valueless_by_exception());
810  std::visit([&] (auto & f)
811  {
812  if constexpr (!detail::decays_to_ignore_v<seq_qual_t>)
813  {
814  f.write(stream,
815  options,
816  seq_qual | view::convert<typename seq_qual_t::sequence_alphabet_type>,
817  id,
818  seq_qual | view::convert<typename seq_qual_t::quality_alphabet_type>);
819  }
820  else
821  {
822  f.write(stream,
823  options,
824  seq,
825  id,
826  qual);
827  }
828  }, format);
829  }
830 
832  template <std::ranges::InputRange seqs_t,
834  std::ranges::InputRange quals_t,
835  std::ranges::InputRange seq_quals_t>
836  void write_columns(seqs_t && seqs,
837  ids_t && ids,
838  quals_t && quals,
839  seq_quals_t && seq_quals)
840  {
841  static_assert(!(detail::decays_to_ignore_v<reference_t<seqs_t>> &&
842  detail::decays_to_ignore_v<reference_t<ids_t>> &&
843  detail::decays_to_ignore_v<reference_t<quals_t>> &&
844  detail::decays_to_ignore_v<reference_t<seq_quals_t>>),
845  "At least one of the columns must not be set to std::ignore.");
846 
847  static_assert(detail::decays_to_ignore_v<reference_t<seq_quals_t>> ||
848  (detail::decays_to_ignore_v<reference_t<seqs_t>> &&
849  detail::decays_to_ignore_v<reference_t<quals_t>>),
850  "You may not select field::SEQ_QUAL and either of field::SEQ and field::QUAL at the same time.");
851 
852  assert(!format.valueless_by_exception());
853  std::visit([&] (auto & f)
854  {
855  if constexpr (!detail::decays_to_ignore_v<reference_t<seq_quals_t>>)
856  {
857  auto zipped = ranges::view::zip(seq_quals, ids);
858 
859  for (auto && v : zipped)
860  f.write(stream,
861  options,
863  std::get<1>(v),
865  }
866  else
867  {
868  auto zipped = ranges::view::zip(seqs, ids, quals);
869 
870  for (auto && v : zipped)
871  f.write(stream, options, std::get<0>(v), std::get<1>(v), std::get<2>(v));
872  }
873  }, format);
874  }
875 
877  friend iterator;
878 };
879 
884 template <ostream_concept<char> stream_type,
885  sequence_file_output_format_concept file_format,
886  detail::fields_concept selected_field_ids>
887 sequence_file_output(stream_type && _stream, file_format const &, selected_field_ids const &)
890  std::remove_reference_t<stream_type>>;
892 
893 } // namespace seqan3
auto const convert
A view that converts each element in the input range (implicitly or via static_cast).
Definition: convert.hpp:89
std::ranges::default_sentinel sentinel
The type returned by end().
Definition: output.hpp:322
void push_back(record_t &&r) requires tuple_like_concept< record_t > &&requires
Write a seqan3::record to the file.
Definition: output.hpp:473
The "sequence", usually a range of nucleotides or amino acids.
A class for writing sequence files, e.g. FASTA, FASTQ ...
Definition: output.hpp:271
Provides exceptions used in the I/O module.
Provides the seqan3::sequence_file_format_fastq class.
sequence_file_output & operator=(std::tuple< arg_types... > const &t)
Write columns (wrapped in a std::tuple) to the file.
Definition: output.hpp:765
Specifies requirements of a Range type for which begin returns a type that models std::InputIterator...
The class template that file records are based on; behaves like an std::tuple.
Definition: record.hpp:208
Provides seqan3::type_list and auxiliary metafunctions.
sequence_file_output & operator=(record< typelist, field_ids > const &r)
Write columns (wrapped in a seqan3::record) to the file.
Definition: output.hpp:719
Whether a type behaves like a tuple.
sequence_file_output & operator=(rng_t &&range) requires tuple_like_concept< reference_t< rng_t >>
Write a range of records (or tuples) to the file.
Definition: output.hpp:606
detail::out_file_iterator< sequence_file_output > iterator
The iterator type of this view (an output iterator).
Definition: output.hpp:318
Provides seqan3::view::convert.
The main SeqAn3 namespace.
Definition: aligned_sequence_concept.hpp:58
The qualities, usually in phred-score notation.
Thrown if there is an unspecified filesystem or stream error while opening, e.g. permission problem...
Definition: exception.hpp:62
A class template that holds a choice of seqan3::field.
Definition: record.hpp:136
friend sequence_file_output & operator|(rng_t &&range, sequence_file_output &f) requires tuple_like_concept< reference_t< rng_t >>
Write a range of records (or tuples) to the file.
Definition: output.hpp:660
stream_type_ stream_type
The type of the underlying stream.
Definition: output.hpp:283
Sequence and qualities combined in one range.
Provides seqan3::sequence_file_output_options.
void const_iterator
The const iterator type is void, because files are not const-iterable.
Definition: output.hpp:320
Provides seqan3::tuple_like_concept.
Provides the seqan3::record template and the seqan3::field enum.
valid_formats_ valid_formats
A seqan3::type_list with the possible formats.
Definition: output.hpp:281
The identifier, usually a string.
sequence_file_output()=delete
Default constructor is explicitly deleted, you need to give a stream or file name.
void emplace_back(arg_t &&arg, arg_types &&... args)
Write a record to the file by passing individual fields.
Definition: output.hpp:568
Adaptations of concepts from the Ranges TS.
~sequence_file_output()=default
Destructor is defaulted.
void push_back(tuple_t &&t) requires tuple_like_concept< tuple_t >
Write a record in form of a std::tuple to the file.
Definition: output.hpp:520
std::ptrdiff_t difference_type
A signed integer type, usually std::ptrdiff_t.
Definition: output.hpp:316
sequence_file_output(stream_type &&_stream, file_format const &format_tag, selected_field_ids const &fields_tag=selected_field_ids{})
Construct from an existing stream and with specified format.
Definition: output.hpp:369
iterator begin() noexcept
Returns an iterator to current position in the file.
Definition: output.hpp:417
sequence_file_output & operator=(sequence_file_output const &)=delete
Copy assignment is explicitly deleted, because you can&#39;t have multiple access to the same file...
Stream concepts.
This header includes C++17 filesystem support and imports it into namespace seqan3::filesystem (indep...
Provides various metafunctions on generic types.
Provides seqan3::sequence_file_format_out_concept and auxiliary classes.
Provides the seqan3::detail::out_file_iterator class template.
selected_field_ids_ selected_field_ids
A seqan3::fields list with the fields selected for the record.
Definition: output.hpp:279
typename reference< t >::type reference_t
Type metafunction shortcut for seqan3::reference.
Definition: pre.hpp:98
sequence_file_output(filesystem::path const &_file_name, selected_field_ids const &fields_tag=selected_field_ids{})
Construct from filename.
Definition: output.hpp:350
meta::list< types... > type_list
Type that contains multiple types, an alias for meta::list.
Definition: type_list.hpp:54
The options type defines various option members that influence the behaviour of all or some formats...
Definition: output_options.hpp:48
sentinel end() noexcept
Returns a sentinel for comparison with iterator.
Definition: output.hpp:436
sequence_file_output_options options
The options are public and its members can be set directly.
Definition: output.hpp:778
field
An enumerator for the fields used in file formats.Some of the fields are shared between formats...
Definition: record.hpp:63
Provides the seqan3::sequence_file_format_fasta class.
::ranges::default_sentinel default_sentinel
Alias for ranges::default_sentinel. Empty sentinel type for use with iterator types that know the bou...
Definition: ranges:215
friend sequence_file_output operator|(rng_t &&range, sequence_file_output &&f) requires tuple_like_concept< reference_t< rng_t >>
Definition: output.hpp:669