SeqAn3
output.hpp
Go to the documentation of this file.
1 // ============================================================================
2 // SeqAn - The Library for Sequence Analysis
3 // ============================================================================
4 //
5 // Copyright (c) 2006-2018, Knut Reinert & Freie Universitaet Berlin
6 // Copyright (c) 2016-2018, Knut Reinert & MPI Molekulare Genetik
7 // All rights reserved.
8 //
9 // Redistribution and use in source and binary forms, with or without
10 // modification, are permitted provided that the following conditions are met:
11 //
12 // * Redistributions of source code must retain the above copyright
13 // notice, this list of conditions and the following disclaimer.
14 // * Redistributions in binary form must reproduce the above copyright
15 // notice, this list of conditions and the following disclaimer in the
16 // documentation and/or other materials provided with the distribution.
17 // * Neither the name of Knut Reinert or the FU Berlin nor the names of
18 // its contributors may be used to endorse or promote products derived
19 // from this software without specific prior written permission.
20 //
21 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 // ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE
25 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
27 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 // OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
31 // DAMAGE.
32 //
33 // ============================================================================
34 
40 #pragma once
41 
42 #include <cassert>
43 #include <fstream>
44 #include <string>
45 #include <string_view>
46 #include <variant>
47 #include <vector>
48 
57 #include <seqan3/io/detail/record.hpp>
58 #include <seqan3/io/exception.hpp>
59 #include <seqan3/io/filesystem.hpp>
60 #include <seqan3/io/record.hpp>
62 #include <seqan3/std/ranges>
63 
64 namespace seqan3
65 {
66 
67 // ----------------------------------------------------------------------------
68 // alignment_file_output
69 // ----------------------------------------------------------------------------
70 
213 template <detail::fields_concept selected_field_ids_ =
214  fields<field::SEQ,
215  field::ID,
221  field::MAPQ,
222  field::QUAL,
223  field::FLAG,
224  field::MATE,
225  field::TAGS,
229  detail::type_list_of_alignment_file_output_formats_concept valid_formats_ =
230  type_list<alignment_file_format_sam/*,
231  alignment_file_format_bam,
232  alignment_file_format_blast_tabular*/>,
233  ostream_concept<char> stream_type_ = std::ofstream>
235 {
236 public:
241  using selected_field_ids = selected_field_ids_;
244  using valid_formats = valid_formats_;
246  using stream_type = stream_type_;
248 
251  field::SEQ,
252  field::ID,
258  field::MAPQ,
259  field::FLAG,
260  field::QUAL,
261  field::MATE,
262  field::TAGS,
264  field::BIT_SCORE>;
265 
266  static_assert([] () constexpr
267  {
268  for (field f : selected_field_ids::as_array)
269  if (!field_ids::contains(f))
270  return false;
271  return true;
272  }(),
273  "You selected a field that is not valid for alignment files, "
274  "please refer to the documentation of "
275  "seqan3::alignment_file_output::field_ids for the accepted values.");
276 
281  using value_type = void;
282  using reference = void;
283  using const_reference = void;
284  using size_type = void;
286  using difference_type = std::ptrdiff_t;
288  using iterator = detail::out_file_iterator<alignment_file_output>;
290  using const_iterator = void;
294 
298  alignment_file_output() = delete;
309  ~alignment_file_output() = default;
310 
330  alignment_file_output(filesystem::path const & _file_name,
331  selected_field_ids const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{})
332  {
333  // open stream
334  stream.open(_file_name, std::ios_base::out | std::ios::binary);
335  if (!stream.is_open())
336  throw file_open_error{"Could not open file for writing."};
337 
338  // initialise format handler or throw if format is not found
339  detail::set_format(format, _file_name);
340  }
341 
350  template <alignment_file_output_format_concept file_format>
352  file_format const & SEQAN3_DOXYGEN_ONLY(format_tag),
353  selected_field_ids const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{}) :
354  stream{std::move(_stream)}, format{file_format{}}
355  {
356  static_assert(meta::in<valid_formats, file_format>::value,
357  "You selected a format that is not in the valid_formats of this file.");
358  }
360 
382  iterator begin() noexcept
383  {
384  return {*this};
385  }
386 
401  sentinel end() noexcept
402  {
403  return {};
404  }
405 
424  template <typename record_t>
425  void push_back(record_t && r)
427  requires tuple_like_concept<record_t> &&
428  requires { requires detail::is_type_specialisation_of_v<remove_cvref_t<record_t>, record>; }
430  {
431  using default_align_t = std::pair<std::basic_string_view<gapped<char>>, std::basic_string_view<gapped<char>>>;
432  using default_mate_t = std::tuple<std::string_view, uint32_t, uint32_t>;
433 
434  write_record(detail::get_or<field::HEADER_PTR>(r, nullptr),
435  detail::get_or<field::SEQ>(r, std::string_view{}),
436  detail::get_or<field::QUAL>(r, std::string_view{}),
437  detail::get_or<field::ID>(r, std::string_view{}),
438  detail::get_or<field::OFFSET>(r, 0u),
439  detail::get_or<field::REF_SEQ>(r, std::string_view{}),
440  detail::get_or<field::REF_ID>(r, std::string_view{}),
441  detail::get_or<field::REF_OFFSET>(r, -1), // 1 is added in format SAM
442  detail::get_or<field::ALIGNMENT>(r, default_align_t{}),
443  detail::get_or<field::MAPQ>(r, 0u),
444  detail::get_or<field::FLAG>(r, 0u),
445  detail::get_or<field::MATE>(r, default_mate_t{}),
446  detail::get_or<field::TAGS>(r, sam_tag_dictionary{}),
447  detail::get_or<field::EVALUE>(r, 0u),
448  detail::get_or<field::BIT_SCORE>(r, 0u));
449  }
450 
472  template <typename tuple_t>
473  void push_back(tuple_t && t)
477  {
478  using default_align_t = std::pair<std::basic_string_view<gapped<char>>, std::basic_string_view<gapped<char>>>;
479  using default_mate_t = std::tuple<std::string_view, uint32_t, uint32_t>;
480 
481  // index_of might return npos, but this will be handled well by get_or_ignore (and just return ignore)
482  write_record(detail::get_or<selected_field_ids::index_of(field::HEADER_PTR)>(t, nullptr),
483  detail::get_or<selected_field_ids::index_of(field::SEQ)>(t, std::string_view{}),
484  detail::get_or<selected_field_ids::index_of(field::QUAL)>(t, std::string_view{}),
485  detail::get_or<selected_field_ids::index_of(field::ID)>(t, std::string_view{}),
486  detail::get_or<selected_field_ids::index_of(field::OFFSET)>(t, 0u),
487  detail::get_or<selected_field_ids::index_of(field::REF_SEQ)>(t, std::string_view{}),
488  detail::get_or<selected_field_ids::index_of(field::REF_ID)>(t, std::string_view{}),
489  detail::get_or<selected_field_ids::index_of(field::REF_OFFSET)>(t, -1), // 1 is added in format SAM
490  detail::get_or<selected_field_ids::index_of(field::ALIGNMENT)>(t, default_align_t{}),
491  detail::get_or<selected_field_ids::index_of(field::MAPQ)>(t, 0u),
492  detail::get_or<selected_field_ids::index_of(field::FLAG)>(t, 0u),
493  detail::get_or<selected_field_ids::index_of(field::MATE)>(t, default_mate_t{}),
494  detail::get_or<selected_field_ids::index_of(field::TAGS)>(t, sam_tag_dictionary{}),
495  detail::get_or<selected_field_ids::index_of(field::EVALUE)>(t, 0u),
496  detail::get_or<selected_field_ids::index_of(field::BIT_SCORE)>(t, 0u));
497  }
498 
522  template <typename arg_t, typename ... arg_types>
523  void emplace_back(arg_t && arg, arg_types && ... args)
524  {
525  push_back(std::tie(arg, args...));
526  }
527 
560  template <typename rng_t>
565  {
566  for (auto && record : range)
567  push_back(std::forward<decltype(record)>(record));
568  return *this;
569  }
570 
605  template <typename rng_t>
610  {
611  f = range;
612  return f;
613  }
614 
616  template <typename rng_t>
621  {
622  f = range;
623  return std::move(f);
624  }
626 
629 
633  stream_type & get_stream()
634  {
635  return stream;
636  }
638 
650  {
651  if (header_ptr == nullptr)
652  header_ptr = std::unique_ptr<alignment_file_header>(new alignment_file_header);
653 
654  return *header_ptr;
655  }
656 
657 protected:
659 
661  std::unique_ptr<alignment_file_header> header_ptr{nullptr};
662 
664  std::string file_name;
665 
667  stream_type stream;
668 
670  using format_type = detail::transfer_template_args_onto_t<valid_formats, std::variant>;
671 
673  format_type format;
674 
676  template <typename ...pack_type>
677  void write_record(alignment_file_header const * hdr_ptr, pack_type && ...remainder)
678  {
679  if (header_ptr == nullptr && hdr_ptr != nullptr)
680  header_ptr = std::unique_ptr<alignment_file_header>(new alignment_file_header(*hdr_ptr));
681 
682  static_assert((sizeof...(pack_type) == 14), "Wrong parameter list passed to write_record.");
683 
684  assert(!format.valueless_by_exception());
685 
686  std::visit([&] (auto & f)
687  {
688  f.write(stream,
689  options,
690  header_ptr,
691  std::forward<pack_type>(remainder)...);
692 
693  }, format);
694  }
695 
697  friend iterator;
698 };
699 
704 template <ostream_concept<char> stream_type,
705  alignment_file_output_format_concept file_format,
706  detail::fields_concept selected_field_ids>
707 alignment_file_output(stream_type && _stream, file_format const &, selected_field_ids const &)
710  std::remove_reference_t<stream_type>>;
712 
713 } // namespace seqan3
alignment_file_output(filesystem::path const &_file_name, selected_field_ids const &fields_tag=selected_field_ids{})
Construct from filename.
Definition: output.hpp:330
The (reference) "sequence" information, usually a range of nucleotides or amino acids.
The "sequence", usually a range of nucleotides or amino acids.
Provides exceptions used in the I/O module.
valid_formats_ valid_formats
A seqan3::type_list with the possible formats.
Definition: output.hpp:244
alignment_file_output_options options
The options are public and its members can be set directly.
Definition: output.hpp:628
The (pairwise) alignment stored in an seqan3::alignment object.
The alignment flag (bit information), uint16_t value.
void push_back(tuple_t &&t)
Write a record in form of a std::tuple to the file.
Definition: output.hpp:473
Specifies requirements of a Range type for which begin returns a type that models std::InputIterator...
The class template that file records are based on; behaves like an std::tuple.
Definition: record.hpp:208
Provides seqan3::type_list and auxiliary metafunctions.
Whether a type behaves like a tuple.
alignment_file_output()=delete
Default constructor is explicitly deleted, you need to give a stream or file name.
alignment_file_output & operator=(rng_t &&range)
Write a range of records (or tuples) to the file.
Definition: output.hpp:561
Provides the seqan3::alignment_file_format_sam class.
selected_field_ids_ selected_field_ids
A seqan3::fields list with the fields selected for the record.
Definition: output.hpp:242
The main SeqAn3 namespace.
Definition: aligned_sequence_concept.hpp:58
The qualities, usually in phred-score notation.
The e-value (length normalized bit score), double value.
iterator begin() noexcept
Returns an iterator to current position in the file.
Definition: output.hpp:382
Provides seqan3::alignment_file_output_options.
Sequence (REF_SEQ) relative start position (0-based), unsigned value.
Thrown if there is an unspecified filesystem or stream error while opening, e.g. permission problem...
Definition: exception.hpp:62
A class template that holds a choice of seqan3::field.
Definition: record.hpp:136
A class for writing alignment files, e.g. SAM, BAL, BLAST, ...
Definition: output.hpp:234
Provides seqan3::tuple_like_concept.
Provides the seqan3::alignment_file_header class.
Provides the seqan3::record template and the seqan3::field enum.
~alignment_file_output()=default
Destructor is defaulted.
sentinel end() noexcept
Returns a sentinel for comparison with iterator.
Definition: output.hpp:401
Sequence (SEQ) relative start position (0-based), unsigned value.
void emplace_back(arg_t &&arg, arg_types &&... args)
Write a record to the file by passing individual fields.
Definition: output.hpp:523
The identifier, usually a string.
Adaptations of concepts from the Ranges TS.
The mate pair information given as a std::tuple of reference name, offset and template length...
The identifier of the (reference) sequence that SEQ was aligned to.
void push_back(record_t &&r)
Write a seqan3::record to the file.
Definition: output.hpp:425
alignment_file_output & operator=(alignment_file_output const &)=delete
Copy assignment is explicitly deleted, because you can&#39;t have multiple access to the same file...
std::ptrdiff_t difference_type
A signed integer type, usually std::ptrdiff_t.
Definition: output.hpp:286
The options type defines various option members that influence the behavior of all or some formats...
Definition: output_options.hpp:49
A pointer to the seqan3::alignment_file_header object storing header information. ...
Stream concepts.
alignment_file_output(stream_type &&_stream, file_format const &format_tag, selected_field_ids const &fields_tag=selected_field_ids{})
Construct from an existing stream and with specified format.
Definition: output.hpp:351
This header includes C++17 filesystem support and imports it into namespace seqan3::filesystem (indep...
Provides various metafunctions on generic types.
Provides the seqan3::detail::out_file_iterator class template.
Provides seqan3::alignment_file_output_format_concept and auxiliary classes.
friend alignment_file_output operator|(rng_t &&range, alignment_file_output &&f)
Definition: output.hpp:617
meta::list< types... > type_list
Type that contains multiple types, an alias for meta::list.
Definition: type_list.hpp:54
detail::out_file_iterator< alignment_file_output > iterator
The iterator type of this view (an output iterator).
Definition: output.hpp:288
Stores the header information of alignment files.
Definition: header.hpp:52
field
An enumerator for the fields used in file formats.Some of the fields are shared between formats...
Definition: record.hpp:63
The optional tags in the SAM format, stored in a dictionary.
alignment_file_header & header()
Access the file&#39;s header.
Definition: output.hpp:649
std::ranges::default_sentinel sentinel
The type returned by end().
Definition: output.hpp:292
The bit score (statistical significance indicator), unsigned value.
The mapping quality of the SEQ alignment, usually a ohred-scaled score.
::ranges::default_sentinel default_sentinel
Alias for ranges::default_sentinel. Empty sentinel type for use with iterator types that know the bou...
Definition: ranges:215
void const_iterator
The const iterator type is void, because files are not const-iterable.
Definition: output.hpp:290
friend alignment_file_output & operator|(rng_t &&range, alignment_file_output &f)
Write a range of records (or tuples) to the file.
Definition: output.hpp:606
stream_type_ stream_type
The type of the underlying stream.
Definition: output.hpp:246
The SAM tag dictionary class that stores all optional SAM fields.
Definition: sam_tag_dictionary.hpp:339