SeqAn3
output.hpp
Go to the documentation of this file.
1 // ============================================================================
2 // SeqAn - The Library for Sequence Analysis
3 // ============================================================================
4 //
5 // Copyright (c) 2006-2018, Knut Reinert & Freie Universitaet Berlin
6 // Copyright (c) 2016-2018, Knut Reinert & MPI Molekulare Genetik
7 // All rights reserved.
8 //
9 // Redistribution and use in source and binary forms, with or without
10 // modification, are permitted provided that the following conditions are met:
11 //
12 // * Redistributions of source code must retain the above copyright
13 // notice, this list of conditions and the following disclaimer.
14 // * Redistributions in binary form must reproduce the above copyright
15 // notice, this list of conditions and the following disclaimer in the
16 // documentation and/or other materials provided with the distribution.
17 // * Neither the name of Knut Reinert or the FU Berlin nor the names of
18 // its contributors may be used to endorse or promote products derived
19 // from this software without specific prior written permission.
20 //
21 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 // ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE
25 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
27 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 // OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
31 // DAMAGE.
32 //
33 // ============================================================================
34 
40 #pragma once
41 
42 #include <cassert>
43 #include <fstream>
44 #include <optional>
45 #include <string>
46 #include <type_traits>
47 #include <variant>
48 #include <vector>
49 
50 #include <range/v3/algorithm/equal.hpp>
51 #include <range/v3/view/zip.hpp>
52 
58 #include <seqan3/io/exception.hpp>
59 #include <seqan3/io/filesystem.hpp>
60 #include <seqan3/io/record.hpp>
62 #include <seqan3/io/detail/record.hpp>
67 #include <seqan3/std/ranges>
68 
69 namespace seqan3
70 {
71 
72 // ----------------------------------------------------------------------------
73 // structure_file_out
74 // ----------------------------------------------------------------------------
75 
278 template <detail::fields_concept selected_field_ids_ = fields<field::SEQ, field::ID, field::STRUCTURE>,
279  detail::type_list_of_structure_file_output_formats_concept valid_formats_
280  = type_list<structure_file_format_vienna>,
281  ostream_concept<char> stream_type_ = std::ofstream>
283 {
284 public:
289  using selected_field_ids = selected_field_ids_;
292  using valid_formats = valid_formats_;
294  using stream_type = stream_type_;
296 
298  using field_ids = fields<field::SEQ,
299  field::ID,
300  field::BPP,
304  field::REACT,
308 
309  static_assert([] () constexpr
310  {
311  for (field f : selected_field_ids::as_array)
312  if (!field_ids::contains(f))
313  return false;
314  return true;
315  }(),
316  "You selected a field that is not valid for structure files, please refer to the documentation "
317  "of structure_file_out::field_ids for the accepted values.");
318 
319  static_assert([] () constexpr
320  {
321  return !(selected_field_ids::contains(field::STRUCTURED_SEQ) &&
322  (selected_field_ids::contains(field::SEQ) ||
323  (selected_field_ids::contains(field::STRUCTURE))));
324  }(), "You may not select field::STRUCTURED_SEQ and either of field::SEQ and field::STRUCTURE "
325  "at the same time.");
326 
331  using value_type = void;
332  using reference = void;
333  using const_reference = void;
334  using size_type = void;
336  using difference_type = std::ptrdiff_t;
338  using iterator = detail::out_file_iterator<structure_file_out>;
340  using const_iterator = void;
344 
348  structure_file_out() = delete;
351  structure_file_out(structure_file_out const &) = delete;
353  structure_file_out & operator=(structure_file_out const &) = delete;
359  ~structure_file_out() = default;
360 
370  structure_file_out(filesystem::path const & _file_name,
371  selected_field_ids const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{})
372  {
373  // open stream
374  stream.open(_file_name, std::ios_base::out | std::ios::binary);
375  if (!stream.is_open())
376  throw file_open_error{"Could not open file for writing."};
377 
378  // initialise format handler or throw if format is not found
379  detail::set_format(format, _file_name);
380  }
381 
389  template <structure_file_output_format_concept file_format>
391  file_format const & SEQAN3_DOXYGEN_ONLY(format_tag),
392  selected_field_ids const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{}) :
393  stream{std::move(_stream)}, format{file_format{}}
394  {
395  static_assert(meta::in<valid_formats, file_format>::value,
396  "You selected a format that is not in the valid_formats of this file.");
397  }
399 
439  iterator begin() noexcept
440  {
441  return {*this};
442  }
443 
458  sentinel end() noexcept
459  {
460  return {};
461  }
462 
499  template <typename tuple_t>
500  void push_back(tuple_t && t)
502  {
503  // index_of might return npos, but this will be handled well by get_or_ignore (and just return ignore)
504  write_record(detail::get_or_ignore<selected_field_ids::index_of(field::SEQ)>(t),
505  detail::get_or_ignore<selected_field_ids::index_of(field::ID)>(t),
506  detail::get_or_ignore<selected_field_ids::index_of(field::BPP)>(t),
507  detail::get_or_ignore<selected_field_ids::index_of(field::STRUCTURE)>(t),
508  detail::get_or_ignore<selected_field_ids::index_of(field::STRUCTURED_SEQ)>(t),
509  detail::get_or_ignore<selected_field_ids::index_of(field::ENERGY)>(t),
510  detail::get_or_ignore<selected_field_ids::index_of(field::REACT)>(t),
511  detail::get_or_ignore<selected_field_ids::index_of(field::REACT_ERR)>(t),
512  detail::get_or_ignore<selected_field_ids::index_of(field::COMMENT)>(t),
513  detail::get_or_ignore<selected_field_ids::index_of(field::OFFSET)>(t));
514  }
515 
554  template <typename arg_t, typename ... arg_types>
555  void emplace_back(arg_t && arg, arg_types && ... args)
556  {
557  push_back(std::tie(arg, args...));
558  }
559 
592  template <std::ranges::InputRange rng_t>
593  structure_file_out & operator=(rng_t && range)
595  {
596  for (auto && record : range)
597  push_back(std::forward<decltype(record)>(record));
598  return *this;
599  }
600 
645  template <std::ranges::InputRange rng_t>
646  friend structure_file_out & operator|(rng_t && range, structure_file_out & f)
648  {
649  f = range;
650  return f;
651  }
652 
654  template <std::ranges::InputRange rng_t>
655  friend structure_file_out operator|(rng_t && range, structure_file_out && f)
657  {
658  f = range;
659  return std::move(f);
660  }
662 
705  template <typename typelist, typename field_ids>
707  {
708  write_columns(detail::range_wrap_ignore(detail::get_or_ignore<field::SEQ>(r)),
709  detail::range_wrap_ignore(detail::get_or_ignore<field::ID>(r)),
710  detail::range_wrap_ignore(detail::get_or_ignore<field::BPP>(r)),
711  detail::range_wrap_ignore(detail::get_or_ignore<field::STRUCTURE>(r)),
712  detail::range_wrap_ignore(detail::get_or_ignore<field::STRUCTURED_SEQ>(r)),
713  detail::range_wrap_ignore(detail::get_or_ignore<field::ENERGY>(r)),
714  detail::range_wrap_ignore(detail::get_or_ignore<field::REACT>(r)),
715  detail::range_wrap_ignore(detail::get_or_ignore<field::REACT_ERR>(r)),
716  detail::range_wrap_ignore(detail::get_or_ignore<field::COMMENT>(r)),
717  detail::range_wrap_ignore(detail::get_or_ignore<field::OFFSET>(r)));
718  return *this;
719  }
720 
758  template <typename ... arg_types>
759  structure_file_out & operator=(std::tuple<arg_types...> const & t)
760  {
761  // index_of might return npos, but this will be handled well by get_or_ignore (and just return ignore)
762  write_columns(
763  detail::range_wrap_ignore(detail::get_or_ignore<selected_field_ids::index_of(field::SEQ)>(t)),
764  detail::range_wrap_ignore(detail::get_or_ignore<selected_field_ids::index_of(field::ID)>(t)),
765  detail::range_wrap_ignore(detail::get_or_ignore<selected_field_ids::index_of(field::BPP)>(t)),
766  detail::range_wrap_ignore(detail::get_or_ignore<selected_field_ids::index_of(field::STRUCTURE)>(t)),
767  detail::range_wrap_ignore(detail::get_or_ignore<selected_field_ids::index_of(field::STRUCTURED_SEQ)>(t)),
768  detail::range_wrap_ignore(detail::get_or_ignore<selected_field_ids::index_of(field::ENERGY)>(t)),
769  detail::range_wrap_ignore(detail::get_or_ignore<selected_field_ids::index_of(field::REACT)>(t)),
770  detail::range_wrap_ignore(detail::get_or_ignore<selected_field_ids::index_of(field::REACT_ERR)>(t)),
771  detail::range_wrap_ignore(detail::get_or_ignore<selected_field_ids::index_of(field::COMMENT)>(t)),
772  detail::range_wrap_ignore(detail::get_or_ignore<selected_field_ids::index_of(field::OFFSET)>(t)));
773  return *this;
774  }
776 
779 
783  stream_type & get_stream()
784  {
785  return stream;
786  }
788 protected:
791  std::string file_name;
792 
794  stream_type stream;
795 
797  using format_type = detail::transfer_template_args_onto_t<valid_formats, std::variant>;
799  format_type format;
800 
802  template <typename seq_type,
803  typename id_type,
804  typename bpp_type,
805  typename structure_type,
806  typename structured_seq_type,
807  typename energy_type,
808  typename react_type,
809  typename comment_type,
810  typename offset_type>
811  void write_record(seq_type && seq,
812  id_type && id,
813  bpp_type && bpp,
814  structure_type && structure,
815  structured_seq_type && structured_seq,
816  energy_type && energy,
817  react_type && react,
818  react_type && react_error,
819  comment_type && comment,
820  offset_type && offset)
821  {
822  static_assert(detail::decays_to_ignore_v<structured_seq_type> ||
823  (detail::decays_to_ignore_v<seq_type> && detail::decays_to_ignore_v<structure_type>),
824  "You may not select field::STRUCTURED_SEQ and either of field::SEQ and field::STRUCTURE "
825  "at the same time.");
826 
827  assert(!format.valueless_by_exception());
828  std::visit([&] (auto & f)
829  {
830  if constexpr (!detail::decays_to_ignore_v<structured_seq_type>)
831  {
832  f.write(stream,
833  options,
834  structured_seq | view::convert<typename structured_seq_type::sequence_alphabet_type>,
835  id,
836  bpp,
837  structured_seq | view::convert<typename structured_seq_type::structure_alphabet_type>,
838  energy,
839  react,
840  react_error,
841  comment,
842  offset);
843  }
844  else
845  {
846  f.write(stream,
847  options,
848  seq,
849  id,
850  bpp,
851  structure,
852  energy,
853  react,
854  react_error,
855  comment,
856  offset);
857  }
858  }, format);
859  }
860 
862  template <std::ranges::InputRange seq_type,
863  std::ranges::InputRange id_type,
864  std::ranges::InputRange bpp_type,
865  std::ranges::InputRange structure_type,
866  std::ranges::InputRange structured_seq_type,
867  std::ranges::InputRange energy_type,
868  std::ranges::InputRange react_type,
869  std::ranges::InputRange comment_type,
870  std::ranges::InputRange offset_type>
871  void write_columns(seq_type && seq,
872  id_type && id,
873  bpp_type && bpp,
874  structure_type && structure,
875  structured_seq_type && structured_seq,
876  energy_type && energy,
877  react_type && react,
878  react_type && react_error,
879  comment_type && comment,
880  offset_type && offset)
881  {
882  static_assert(!(detail::decays_to_ignore_v<reference_t<seq_type>> &&
883  detail::decays_to_ignore_v<reference_t<id_type>> &&
884  detail::decays_to_ignore_v<reference_t<bpp_type>> &&
885  detail::decays_to_ignore_v<reference_t<structure_type>> &&
886  detail::decays_to_ignore_v<reference_t<structured_seq_type>> &&
887  detail::decays_to_ignore_v<reference_t<energy_type>> &&
888  detail::decays_to_ignore_v<reference_t<react_type>> &&
889  detail::decays_to_ignore_v<reference_t<comment_type>> &&
890  detail::decays_to_ignore_v<reference_t<offset_type>>),
891  "At least one of the columns must not be set to std::ignore.");
892 
893  static_assert(detail::decays_to_ignore_v<reference_t<structured_seq_type>> ||
894  (detail::decays_to_ignore_v<reference_t<seq_type>> &&
895  detail::decays_to_ignore_v<reference_t<structure_type>>),
896  "You may not select field::STRUCTURED_SEQ and either of field::SEQ and field::STRUCTURE "
897  "at the same time.");
898 
899  assert(!format.valueless_by_exception());
900  std::visit([&] (auto & f)
901  {
902  if constexpr (!detail::decays_to_ignore_v<reference_t<structured_seq_type>>)
903  {
904  auto zipped = ranges::view::zip(structured_seq, id, bpp, energy, react, react_error, comment, offset);
905 
906  for (auto && v : zipped)
907  {
908  f.write(stream,
909  options,
910  std::get<0>(v) | view::convert
912  std::get<1>(v), // id
913  std::get<2>(v), // bpp
914  std::get<0>(v) | view::convert
916  std::get<3>(v), // energy
917  std::get<4>(v), // react
918  std::get<5>(v), // react_error
919  std::get<6>(v), // comment
920  std::get<7>(v)); // offset
921  }
922  }
923  else
924  {
925  auto zipped = ranges::view::zip(seq, id, bpp, structure, energy, react, react_error, comment, offset);
926 
927  for (auto && v : zipped)
928  {
929  f.write(stream, options, std::get<0>(v), std::get<1>(v), std::get<2>(v), std::get<3>(v),
930  std::get<4>(v), std::get<5>(v), std::get<6>(v), std::get<7>(v), std::get<8>(v));
931  }
932  }
933  }, format);
934  }
935 
937  friend iterator;
938 };
939 
944 template <ostream_concept<char> stream_type,
945  structure_file_output_format_concept file_format,
946  detail::fields_concept selected_field_ids>
947 structure_file_out(stream_type && _stream, file_format const &, selected_field_ids const &)
950  std::remove_reference_t<stream_type>>;
952 
953 } // namespace seqan3
auto const convert
A view that converts each element in the input range (implicitly or via static_cast).
Definition: convert.hpp:89
friend structure_file_out operator|(rng_t &&range, structure_file_out &&f) requires tuple_like_concept< reference_t< rng_t >>
Definition: output.hpp:655
structure_file_out & operator=(std::tuple< arg_types... > const &t)
Write columns (wrapped in a std::tuple) to the file.
Definition: output.hpp:759
The "sequence", usually a range of nucleotides or amino acids.
structure_file_out & operator=(rng_t &&range) requires tuple_like_concept< reference_t< rng_t >>
Write a range of records (or tuples) to the file.
Definition: output.hpp:593
structure_file_out & operator=(record< typelist, field_ids > const &r)
Write columns (wrapped in a seqan3::record) to the file.
Definition: output.hpp:706
structure_file_out(filesystem::path const &_file_name, selected_field_ids const &fields_tag=selected_field_ids{})
Construct from filename.
Definition: output.hpp:370
~structure_file_out()=default
Destructor is defaulted.
Provides exceptions used in the I/O module.
Sequence and fixed interactions combined in one range.
Energy of a folded sequence, represented by one float number.
Provides the seqan3::structure_file_format_vienna class.
valid_formats_ valid_formats
A seqan3::type_list with the possible formats.
Definition: output.hpp:292
sentinel end() noexcept
Returns a sentinel for comparison with iterator.
Definition: output.hpp:458
Specifies requirements of a Range type for which begin returns a type that models std::InputIterator...
The class template that file records are based on; behaves like an std::tuple.
Definition: record.hpp:208
Provides seqan3::type_list and auxiliary metafunctions.
Whether a type behaves like a tuple.
selected_field_ids_ selected_field_ids
A seqan3::fields list with the fields selected for the record.
Definition: output.hpp:290
Comment field of arbitrary content, usually a string.
Meta-header for the structure module. It includes all headers from alphabet/structure/.
Provides seqan3::view::convert.
void push_back(tuple_t &&t) requires tuple_like_concept< tuple_t >
Write a record in form of a std::tuple to the file.
Definition: output.hpp:500
The main SeqAn3 namespace.
Definition: aligned_sequence_concept.hpp:58
detail::out_file_iterator< structure_file_out > iterator
The iterator type of this view (an output iterator).
Definition: output.hpp:338
Base pair probability matrix of interactions, usually a matrix of float numbers.
structure_file_output_options options
The options are public and its members can be set directly.
Definition: output.hpp:778
Thrown if there is an unspecified filesystem or stream error while opening, e.g. permission problem...
Definition: exception.hpp:62
A class template that holds a choice of seqan3::field.
Definition: record.hpp:136
Provides seqan3::structure_file_output_options.
Provides seqan3::tuple_like_concept.
Reactivity error values given in a vector corresponding to REACT.
Provides the seqan3::record template and the seqan3::field enum.
Sequence (SEQ) relative start position (0-based), unsigned value.
The identifier, usually a string.
Adaptations of concepts from the Ranges TS.
stream_type_ stream_type
The type of the underlying stream.
Definition: output.hpp:294
std::ptrdiff_t difference_type
A signed integer type, usually std::ptrdiff_t.
Definition: output.hpp:336
Provides seqan3::structure_file_output_format_concept and auxiliary classes.
iterator begin() noexcept
Returns an iterator to current position in the file.
Definition: output.hpp:439
std::ranges::default_sentinel sentinel
The type returned by end().
Definition: output.hpp:342
Fixed interactions, usually a string of structure alphabet characters.
The options type defines various option members that influence the behaviour of all or some formats...
Definition: output_options.hpp:50
A class for writing structured sequence files, e.g. Stockholm, Connect, Vienna, ViennaRNA bpp matrix ...
Definition: output.hpp:282
Stream concepts.
structure_file_out()=delete
Default constructor is explicitly deleted, you need to give a stream or file name.
This header includes C++17 filesystem support and imports it into namespace seqan3::filesystem (indep...
Provides C++20 additions to the type_traits header.
Provides various metafunctions on generic types.
Provides the seqan3::detail::out_file_iterator class template.
typename reference< t >::type reference_t
Type metafunction shortcut for seqan3::reference.
Definition: pre.hpp:98
meta::list< types... > type_list
Type that contains multiple types, an alias for meta::list.
Definition: type_list.hpp:54
structure_file_out & operator=(structure_file_out const &)=delete
Copy assignment is explicitly deleted, because you can&#39;t have multiple access to the same file...
field
An enumerator for the fields used in file formats.Some of the fields are shared between formats...
Definition: record.hpp:63
structure_file_out(stream_type &&_stream, file_format const &format_tag, selected_field_ids const &fields_tag=selected_field_ids{})
Construct from an existing stream and with specified format.
Definition: output.hpp:390
void const_iterator
The const iterator type is void, because files are not const-iterable.
Definition: output.hpp:340
void emplace_back(arg_t &&arg, arg_types &&... args)
Write a record to the file by passing individual fields.
Definition: output.hpp:555
Reactivity values of the sequence characters given in a vector of float numbers.
friend structure_file_out & operator|(rng_t &&range, structure_file_out &f) requires tuple_like_concept< reference_t< rng_t >>
Write a range of records (or tuples) to the file.
Definition: output.hpp:646
::ranges::default_sentinel default_sentinel
Alias for ranges::default_sentinel. Empty sentinel type for use with iterator types that know the bou...
Definition: ranges:215