SeqAn3
bitcompressed_vector.hpp
Go to the documentation of this file.
1 // ============================================================================
2 // SeqAn - The Library for Sequence Analysis
3 // ============================================================================
4 //
5 // Copyright (c) 2006-2018, Knut Reinert & Freie Universitaet Berlin
6 // Copyright (c) 2016-2018, Knut Reinert & MPI Molekulare Genetik
7 // All rights reserved.
8 //
9 // Redistribution and use in source and binary forms, with or without
10 // modification, are permitted provided that the following conditions are met:
11 //
12 // * Redistributions of source code must retain the above copyright
13 // notice, this list of conditions and the following disclaimer.
14 // * Redistributions in binary form must reproduce the above copyright
15 // notice, this list of conditions and the following disclaimer in the
16 // documentation and/or other materials provided with the distribution.
17 // * Neither the name of Knut Reinert or the FU Berlin nor the names of
18 // its contributors may be used to endorse or promote products derived
19 // from this software without specific prior written permission.
20 //
21 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 // ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE
25 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
27 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 // OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
31 // DAMAGE.
32 //
33 // ============================================================================
34 
40 #pragma once
41 
42 #include <type_traits>
43 
44 #include <range/v3/iterator_range.hpp>
45 
46 #include <sdsl/int_vector.hpp>
47 
57 #include <seqan3/std/concepts>
58 #include <seqan3/std/iterator>
59 #include <seqan3/std/ranges>
60 
61 namespace seqan3
62 {
63 
64 // forward
65 class debug_stream_type;
66 
95 template <alphabet_concept alphabet_type>
97  requires std::is_same_v<alphabet_type, std::remove_reference_t<alphabet_type>>
100 {
101 private:
103  static constexpr size_t bits_per_letter = std::ceil(std::log2(alphabet_size_v<alphabet_type>));
104 
105  static_assert(bits_per_letter <= 64, "Alphabet must be representable in at most 64bit.");
106 
108  using data_type = sdsl::int_vector<bits_per_letter>;
109 
111  data_type data;
112 
115  class reference_proxy_type : public alphabet_proxy<reference_proxy_type, alphabet_type>
116  {
117  private:
121  friend base_t;
122 
125  static uint8_t constexpr safe_bits_per_letter = (bits_per_letter == 8 ||
126  bits_per_letter == 16 ||
127  bits_per_letter == 32) ? 64 : bits_per_letter;
128 
130  using internal_proxy_type = sdsl::int_vector_reference<sdsl::int_vector<safe_bits_per_letter>>;
132  ranges::semiregular_t<internal_proxy_type> internal_proxy;
133 
135  constexpr void on_update() noexcept
136  {
137  internal_proxy.get() = static_cast<base_t &>(*this).to_rank();
138  }
139 
140  public:
141  // Import from base:
142  using base_t::operator=;
143 
148  constexpr reference_proxy_type() noexcept : base_t{} {}
149  constexpr reference_proxy_type(reference_proxy_type const &) = default;
150  constexpr reference_proxy_type(reference_proxy_type &&) = default;
151  constexpr reference_proxy_type & operator=(reference_proxy_type const &) = default;
152  constexpr reference_proxy_type & operator=(reference_proxy_type &&) = default;
153  ~reference_proxy_type() = default;
154 
156  reference_proxy_type(internal_proxy_type const & internal) noexcept :
157  internal_proxy{internal}
158  {
159  static_cast<base_t &>(*this).assign_rank(internal);
160  }
162  };
163 
166  //NOTE(h-2): it is entirely unclear to me why we need this
167  template <typename t>
168  requires std::is_same_v<value_type_t<remove_cvref_t<t>>, alphabet_type>
169  static constexpr bool has_same_value_type_v = true;
171 
172 public:
176  using value_type = alphabet_type;
179  using reference = std::conditional_t<std::is_lvalue_reference_v<reference_t<data_type>>,
181  reference_proxy_type>;
183  using const_reference = alphabet_type;
185  using iterator = detail::random_access_iterator<bitcompressed_vector>;
187  using const_iterator = detail::random_access_iterator<bitcompressed_vector const>;
193 
195  // this signals to range-v3 that something is a container :|
196  using allocator_type = void;
198 
202  bitcompressed_vector() = default;
203  constexpr bitcompressed_vector(bitcompressed_vector const &) = default;
204  constexpr bitcompressed_vector(bitcompressed_vector &&) = default;
205  constexpr bitcompressed_vector & operator=(bitcompressed_vector const &) = default;
206  constexpr bitcompressed_vector & operator=(bitcompressed_vector &&) = default;
207  ~bitcompressed_vector() = default;
208 
222  template <std::ranges::InputRange other_range_t>
224  requires has_same_value_type_v<other_range_t>
226  explicit bitcompressed_vector(other_range_t && range) :
227  bitcompressed_vector{seqan3::begin(range), seqan3::end(range)}
228  {}
229 
242  bitcompressed_vector(size_type const count, value_type const value) :
243  data(count, to_rank(value))
244  {}
245 
261  template <std::ForwardIterator begin_iterator_type, std::Sentinel<begin_iterator_type> end_iterator_type>
262  bitcompressed_vector(begin_iterator_type begin_it, end_iterator_type end_it)
266  {
267  insert(cend(), begin_it, end_it);
268  }
269 
281  bitcompressed_vector(std::initializer_list<value_type> ilist) :
282  bitcompressed_vector(std::begin(ilist), std::end(ilist))
283  {}
284 
296  bitcompressed_vector & operator=(std::initializer_list<value_type> ilist)
297  {
298  assign(std::begin(ilist), std::end(ilist));
299  return *this;
300  }
301 
315  template <std::ranges::InputRange other_range_t>
316  void assign(other_range_t && range)
320  {
321  bitcompressed_vector rhs{std::forward<other_range_t>(range)};
322  swap(rhs);
323  }
324 
337  void assign(size_type const count, value_type const value)
338  {
339  bitcompressed_vector rhs{count, value};
340  swap(rhs);
341  }
342 
358  template <std::ForwardIterator begin_iterator_type, std::Sentinel<begin_iterator_type> end_iterator_type>
359  void assign(begin_iterator_type begin_it, end_iterator_type end_it)
361  requires std::CommonReference<value_type_t<begin_iterator_type>, value_type>
363  {
364  bitcompressed_vector rhs{begin_it, end_it};
365  swap(rhs);
366  }
367 
379  void assign(std::initializer_list<value_type> ilist)
380  {
381  assign(std::begin(ilist), std::end(ilist));
382  }
383 
385 
402  iterator begin() noexcept
403  {
404  return iterator{*this};
405  }
406 
408  const_iterator begin() const noexcept
409  {
410  return const_iterator{*this};
411  }
412 
414  const_iterator cbegin() const noexcept
415  {
416  return const_iterator{*this};
417  }
418 
432  iterator end() noexcept
433  {
434  return iterator{*this, size()};
435  }
436 
438  const_iterator end() const noexcept
439  {
440  return const_iterator{*this, size()};
441  }
442 
444  const_iterator cend() const noexcept
445  {
446  return const_iterator{*this, size()};
447  }
449 
467  {
468  if (i >= size()) // [[unlikely]]
469  {
470  throw std::out_of_range{"Trying to access element behind the last in bitcompressed_vector."};
471  }
472  return (*this)[i];
473  }
474 
476  const_reference at(size_type const i) const
477  {
478  if (i >= size()) // [[unlikely]]
479  {
480  throw std::out_of_range{"Trying to access element behind the last in bitcompressed_vector."};
481  }
482  return (*this)[i];
483  }
484 
500  reference operator[](size_type const i) noexcept
501  {
502  assert(i < size());
503  return data[i];
504  }
505 
507  const_reference operator[](size_type const i) const noexcept
508  {
509  assert(i < size());
510  return assign_rank(const_reference{}, data[i]);
511  }
512 
526  reference front() noexcept
527  {
528  assert(size() > 0);
529  return (*this)[0];
530  }
531 
533  const_reference front() const noexcept
534  {
535  assert(size() > 0);
536  return (*this)[0];
537  }
538 
552  reference back() noexcept
553  {
554  assert(size() > 0);
555  return (*this)[size()-1];
556  }
557 
559  const_reference back() const noexcept
560  {
561  assert(size() > 0);
562  return (*this)[size()-1];
563  }
564 
579  bool empty() const noexcept
580  {
581  return size() == 0;
582  }
583 
595  size_type size() const noexcept
596  {
597  return data.size();
598  }
599 
614  size_type max_size() const noexcept
615  {
616  return data.max_size();
617  }
618 
634  size_type capacity() const noexcept
635  {
636  return data.capacity();
637  }
638 
657  void reserve(size_type const new_cap)
658  {
659  data.reserve(new_cap);
660  }
661 
678  {
679  data.shrink_to_fit();
680  }
682 
697  void clear() noexcept
698  {
699  data.clear();
700  }
701 
721  {
722  return insert(pos, 1, value);
723  }
724 
744  iterator insert(const_iterator pos, size_type const count, value_type const value)
745  {
746  auto const pos_as_num = std::distance(cbegin(), pos); // we want to insert BEFORE this position
747 
748  data.insert(data.begin() + pos_as_num, count, to_rank(value));
749 
750  return begin() + pos_as_num;
751  }
752 
777  template <std::ForwardIterator begin_iterator_type, std::Sentinel<begin_iterator_type> end_iterator_type>
778  iterator insert(const_iterator pos, begin_iterator_type begin_it, end_iterator_type end_it)
780  requires std::CommonReference<value_type_t<begin_iterator_type>, value_type>
782  {
783  auto const pos_as_num = std::distance(cbegin(), pos);
784 
785  auto v = std::ranges::iterator_range{begin_it, end_it} | seqan3::view::convert<value_type> | seqan3::view::to_rank;
786  data.insert(data.begin() + pos_as_num, seqan3::begin(v), seqan3::end(v));
787 
788  return begin() + pos_as_num;
789  }
790 
809  iterator insert(const_iterator pos, std::initializer_list<value_type> const & ilist)
810  {
811  return insert(pos, ilist.begin(), ilist.end());
812  }
813 
834  {
835  if (begin_it >= end_it) // [[unlikely]]
836  return begin() + std::distance(cbegin(), end_it);
837 
838  auto const begin_it_pos = std::distance(cbegin(), begin_it);
839  auto const end_it_pos = std::distance(cbegin(), end_it);
840 
841  data.erase(data.cbegin() + begin_it_pos,
842  data.cbegin() + end_it_pos);
843 
844  return begin() + begin_it_pos;
845  }
846 
867  {
868  return erase(pos, pos + 1);
869  }
870 
886  void push_back(value_type const value)
887  {
888  data.push_back(to_rank(value));
889  }
890 
907  void pop_back()
908  {
909  assert(size() > 0);
910  data.pop_back();
911  }
912 
939  void resize(size_type const count)
940  {
941  assert(count < max_size());
942  data.resize(count);
943  }
944 
949  void resize(size_type const count, value_type const value)
950  {
951  assert(count < max_size());
952  data.resize(count, to_rank(value));
953  }
954 
966  constexpr void swap(bitcompressed_vector & rhs) noexcept
967  {
968  std::swap(data, rhs.data);
969  }
970 
972  constexpr void swap(bitcompressed_vector && rhs) noexcept
973  {
974  std::swap(data, rhs.data);
975  }
977 
990  friend constexpr void swap(bitcompressed_vector & lhs, bitcompressed_vector & rhs) noexcept
991  {
992  std::swap(lhs, rhs);
993  }
994 
996  friend constexpr void swap(bitcompressed_vector && lhs, bitcompressed_vector && rhs) noexcept
997  {
998  std::swap(lhs, rhs);
999  }
1001 
1004  constexpr bool operator==(bitcompressed_vector const & rhs) const noexcept
1005  {
1006  return data == rhs.data;
1007  }
1008 
1009  constexpr bool operator!=(bitcompressed_vector const & rhs) const noexcept
1010  {
1011  return data != rhs.data;
1012  }
1013 
1014  constexpr bool operator<(bitcompressed_vector const & rhs) const noexcept
1015  {
1016  return data < rhs.data;
1017  }
1018 
1019  constexpr bool operator>(bitcompressed_vector const & rhs) const noexcept
1020  {
1021  return data > rhs.data;
1022  }
1023 
1024  constexpr bool operator<=(bitcompressed_vector const & rhs) const noexcept
1025  {
1026  return data <= rhs.data;
1027  }
1028 
1029  constexpr bool operator>=(bitcompressed_vector const & rhs) const noexcept
1030  {
1031  return data >= rhs.data;
1032  }
1034 
1042  template <cereal_archive_concept archive_t>
1043  void CEREAL_SERIALIZE_FUNCTION_NAME(archive_t & archive)
1044  {
1045  archive(data); //TODO: data not yet serialisable
1046  }
1048 };
1049 
1050 } // namespace seqan3
void pop_back()
Removes the last element of the container.
Definition: bitcompressed_vector.hpp:907
reference front() noexcept
Return the first element. Calling front on an empty container is undefined.
Definition: bitcompressed_vector.hpp:526
iterator end() noexcept
Returns an iterator to the element following the last element of the container.
Definition: bitcompressed_vector.hpp:432
Free function/metafunction wrappers for alphabets with member functions/types.
void assign(other_range_t &&range)
Assign from a different range.
Definition: bitcompressed_vector.hpp:316
::ranges::iterator_range iterator_range
Alias for ranges::iterator_range. Iterator adaptor for a Range type.
Definition: ranges:235
void clear() noexcept
Removes all elements from the container.
Definition: bitcompressed_vector.hpp:697
A CRTP-base that eases the definition of proxy types returned in place of regular alphabets...
Definition: alphabet_proxy.hpp:152
constexpr void swap(bitcompressed_vector &&rhs) noexcept
Swap contents with another instance.
Definition: bitcompressed_vector.hpp:972
void push_back(value_type const value)
Appends the given element value to the end of the container.
Definition: bitcompressed_vector.hpp:886
Provides C++20 additions to the <iterator> header.
Contains various shortcuts for common std::ranges functions.
const_reference back() const noexcept
Return the last element.
Definition: bitcompressed_vector.hpp:559
const_iterator cend() const noexcept
Returns an iterator to the element following the last element of the container.
Definition: bitcompressed_vector.hpp:444
SeqAn specific customisations in the standard namespace.
Definition: align_result.hpp:221
iterator insert(const_iterator pos, begin_iterator_type begin_it, end_iterator_type end_it)
Inserts elements from range [begin_it, end_it) before position in the container.
Definition: bitcompressed_vector.hpp:778
Provides seqan3::view::convert.
size_type max_size() const noexcept
Returns the maximum number of elements the container is able to hold due to system or library impleme...
Definition: bitcompressed_vector.hpp:614
Provides the seqan3::detail::random_access_iterator class.
Provides various metafunctions.
The generic alphabet concept that covers most data types used in ranges.This is the core alphabet con...
The main SeqAn3 namespace.
Definition: aligned_sequence_concept.hpp:58
size_type_t< data_type > size_type
An unsigned integer type (usually std::size_t)
Definition: bitcompressed_vector.hpp:191
reference at(size_type const i)
Return the i-th element.
Definition: bitcompressed_vector.hpp:466
iterator insert(const_iterator pos, value_type const value)
Inserts value before position in the container.
Definition: bitcompressed_vector.hpp:720
reference back() noexcept
Return the last element.
Definition: bitcompressed_vector.hpp:552
detail::random_access_iterator< bitcompressed_vector > iterator
The iterator type of this container (a random access iterator).
Definition: bitcompressed_vector.hpp:185
friend constexpr void swap(bitcompressed_vector &&lhs, bitcompressed_vector &&rhs) noexcept
Definition: bitcompressed_vector.hpp:996
bitcompressed_vector(std::initializer_list< value_type > ilist)
Construct from std::initializer_list.
Definition: bitcompressed_vector.hpp:281
void assign(size_type const count, value_type const value)
Assign with count times value.
Definition: bitcompressed_vector.hpp:337
const_iterator end() const noexcept
Returns an iterator to the element following the last element of the container.
Definition: bitcompressed_vector.hpp:438
iterator insert(const_iterator pos, std::initializer_list< value_type > const &ilist)
Inserts elements from initializer list before position in the container.
Definition: bitcompressed_vector.hpp:809
typename size_type< t >::type size_type_t
Type metafunction shortcut for seqan3::size_type.
Definition: pre.hpp:204
Provides seqan3::view::to_rank.
reference operator[](size_type const i) noexcept
Return the i-th element.
Definition: bitcompressed_vector.hpp:500
constexpr alphabet_type & assign_rank(alphabet_type &alph, underlying_rank_t< alphabet_type > const rank) requires requires(alphabet_type alph)
Implementation of seqan3::semi_alphabet_concept::assign_rank() that delegates to a member function...
Definition: member_exposure.hpp:110
The Concepts library.
const_reference at(size_type const i) const
Return the i-th element.
Definition: bitcompressed_vector.hpp:476
const_reference operator[](size_type const i) const noexcept
Return the i-th element.
Definition: bitcompressed_vector.hpp:507
Adaptations of concepts from the Ranges TS.
The concept std::CommonReference<T, U> specifies that two types T and U share a common reference type...
bitcompressed_vector(other_range_t &&range)
Construct from a different range.
Definition: bitcompressed_vector.hpp:226
Free function/metafunction wrappers for alphabets with member functions/types.
detail::random_access_iterator< bitcompressed_vector const > const_iterator
The const_iterator type of this container (a random access iterator).
Definition: bitcompressed_vector.hpp:187
void assign(begin_iterator_type begin_it, end_iterator_type end_it)
Assign from pair of iterators.
Definition: bitcompressed_vector.hpp:359
alphabet_type value_type
Equals the alphabet_type.
Definition: bitcompressed_vector.hpp:177
Adaptions of concepts from the Cereal library.
const_reference front() const noexcept
Return the first element. Calling front on an empty container is undefined.
Definition: bitcompressed_vector.hpp:533
friend constexpr void swap(bitcompressed_vector &lhs, bitcompressed_vector &rhs) noexcept
Swap contents with another instance.
Definition: bitcompressed_vector.hpp:990
iterator erase(const_iterator begin_it, const_iterator end_it)
Removes specified elements from the container.
Definition: bitcompressed_vector.hpp:833
bitcompressed_vector(begin_iterator_type begin_it, end_iterator_type end_it)
Construct from pair of iterators.
Definition: bitcompressed_vector.hpp:262
typename difference_type< t >::type difference_type_t
Type metafunction shortcut for seqan3::difference_type.
Definition: pre.hpp:178
bitcompressed_vector(size_type const count, value_type const value)
Construct with count times value.
Definition: bitcompressed_vector.hpp:242
void reserve(size_type const new_cap)
Increase the capacity to a value that&#39;s greater or equal to new_cap.
Definition: bitcompressed_vector.hpp:657
bitcompressed_vector & operator=(std::initializer_list< value_type > ilist)
Assign from std::initializer_list.
Definition: bitcompressed_vector.hpp:296
Provides C++20 additions to the type_traits header.
iterator insert(const_iterator pos, size_type const count, value_type const value)
Inserts count copies of value before position in the container.
Definition: bitcompressed_vector.hpp:744
size_type capacity() const noexcept
Returns the number of elements that the container has currently allocated space for.
Definition: bitcompressed_vector.hpp:634
const_iterator cbegin() const noexcept
Returns an iterator to the first element of the container.
Definition: bitcompressed_vector.hpp:414
typename reference< t >::type reference_t
Type metafunction shortcut for seqan3::reference.
Definition: pre.hpp:98
A space-optimised version of std::vector that compresses multiple letters into a single byte...
Definition: bitcompressed_vector.hpp:99
Provides seqan3::view::to_char.
void shrink_to_fit()
Requests the removal of unused capacity.
Definition: bitcompressed_vector.hpp:677
size_type size() const noexcept
Returns the number of elements in the container, i.e. std::distance(begin(), end()).
Definition: bitcompressed_vector.hpp:595
void resize(size_type const count)
Resizes the container to contain count elements.
Definition: bitcompressed_vector.hpp:939
iterator begin() noexcept
Returns an iterator to the first element of the container.
Definition: bitcompressed_vector.hpp:402
difference_type_t< data_type > difference_type
A signed integer type (usually std::ptrdiff_t)
Definition: bitcompressed_vector.hpp:189
const_iterator begin() const noexcept
Returns an iterator to the first element of the container.
Definition: bitcompressed_vector.hpp:408
bool empty() const noexcept
Checks whether the container is empty.
Definition: bitcompressed_vector.hpp:579
alphabet_type const_reference
Equals the alphabet_type / value_type.
Definition: bitcompressed_vector.hpp:183
constexpr underlying_rank_t< alphabet_type > to_rank(alphabet_type const alph) requires requires(alphabet_type alph)
Implementation of seqan3::semi_alphabet_concept::to_rank() that delegates to a member function...
Definition: member_exposure.hpp:97
void assign(std::initializer_list< value_type > ilist)
Assign from std::initializer_list.
Definition: bitcompressed_vector.hpp:379
void resize(size_type const count, value_type const value)
Resizes the container to contain count elements.
Definition: bitcompressed_vector.hpp:949
std::conditional_t< std::is_lvalue_reference_v< reference_t< data_type > >, reference_t< data_type >, reference_proxy_type > reference
A proxy type that enables assignment, if the underlying data structure also provides a proxy...
Definition: bitcompressed_vector.hpp:181
auto const to_rank
A view that calls seqan3::to_rank() on each element in the input range.
Definition: to_rank.hpp:90
iterator erase(const_iterator pos)
Removes specified elements from the container.
Definition: bitcompressed_vector.hpp:866
constexpr void swap(bitcompressed_vector &rhs) noexcept
Swap contents with another instance.
Definition: bitcompressed_vector.hpp:966