SeqAn3
bi_fm_index.hpp
Go to the documentation of this file.
1 // ============================================================================
2 // SeqAn - The Library for Sequence Analysis
3 // ============================================================================
4 //
5 // Copyright (c) 2006-2018, Knut Reinert & Freie Universitaet Berlin
6 // Copyright (c) 2016-2018, Knut Reinert & MPI Molekulare Genetik
7 // All rights reserved.
8 //
9 // Redistribution and use in source and binary forms, with or without
10 // modification, are permitted provided that the following conditions are met:
11 //
12 // * Redistributions of source code must retain the above copyright
13 // notice, this list of conditions and the following disclaimer.
14 // * Redistributions in binary form must reproduce the above copyright
15 // notice, this list of conditions and the following disclaimer in the
16 // documentation and/or other materials provided with the distribution.
17 // * Neither the name of Knut Reinert or the FU Berlin nor the names of
18 // its contributors may be used to endorse or promote products derived
19 // from this software without specific prior written permission.
20 //
21 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 // ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE
25 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
27 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 // OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
31 // DAMAGE.
32 //
33 // ============================================================================
34 
40 #pragma once
41 
42 #include <utility>
43 
45 #include <seqan3/io/filesystem.hpp>
49 
50 namespace seqan3
51 {
52 
65 {
68 
70  using rev_fm_index_traits = fm_index_default_traits; // TODO: trait object without sampling.
71 };
72 
82 template <std::ranges::RandomAccessRange text_t, bi_fm_index_traits_concept index_traits_t = bi_fm_index_default_traits>
88 {
89 protected:
91 
93  text_t const * text = nullptr;
94 
95 public:
96 
100  using text_type = text_t;
103  using rev_text_type = decltype(view::reverse(*text));
105 
106 protected:
108 
112  using sdsl_index_type = typename index_traits_t::fm_index_traits::sdsl_index_type;
114 
116  using rev_sdsl_index_type = typename index_traits_t::rev_fm_index_traits::sdsl_index_type;
117 
121  using sdsl_char_type = typename sdsl_index_type::alphabet_type::char_type;
122 
125 
129 
131  // constructed from the bidirectional index.
132  rev_text_type rev_text;
133 
135  fm_index_type fwd_fm;
136 
138  rev_fm_index_type rev_fm;
139 
140 public:
141 
148  using size_type = typename sdsl_index_type::size_type;
150 
152  using index_traits = index_traits_t;
153 
164 
165  template <typename bi_fm_index_t>
166  friend class bi_fm_index_iterator;
167 
168  template <typename fm_index_t>
169  friend class fm_index_iterator;
170 
174  bi_fm_index() = default;
175  bi_fm_index(bi_fm_index const &) = default;
176  bi_fm_index & operator=(bi_fm_index const &) = default;
177  bi_fm_index(bi_fm_index &&) = default;
178  bi_fm_index & operator=(bi_fm_index &&) = default;
179  ~bi_fm_index() = default;
180 
190  bi_fm_index(text_t const & text)
191  {
192  construct(text);
193  }
194 
196  bi_fm_index(text_t &&) = delete;
197 
199  bi_fm_index(text_t const &&) = delete;
201 
218  void construct(text_t const & text)
219  {
220  // text must not be empty
221  if (text.begin() == text.end())
222  throw std::invalid_argument("The text that is indexed cannot be empty.");
223 
224  this->text = &text;
225  rev_text = view::reverse(text);
226  fwd_fm.construct(text);
227  rev_fm.construct(rev_text);
228 
229  // does not work yet. segmentation fault in bi_fm_index_iterator snippet
230  // bi_fm_index tmp;
231  // tmp.text = &text;
232  // tmp.rev_text = view::reverse(*tmp.text);
233  // tmp.fwd_fm.construct(*tmp.text);
234  // tmp.rev_fm.construct(tmp.rev_text);
235  // std::swap(*this, tmp);
236  // this->text = &text;
237  // rev_text = view::reverse(text);
238  }
239 
241  void construct(text_t &&) = delete;
242 
244  void construct(text_t const &&) = delete;
245 
257  size_type size() const noexcept
258  {
259  return fwd_fm.size();
260  }
261 
273  bool empty() const noexcept
274  {
275  return size() == 0;
276  }
277 
278  // operator== not implemented by sdsl indices yet
279  // bool operator==(fm_index const & rhs) const noexcept
280  // {
281  // return std::tie(fwd_fm, rev_fm) == std::tie(rhs.fwd_fm, rhs.rev_fm);
282  // }
283 
284  // operator== not implemented by sdsl indices yet
285  // bool operator!=(fm_index const & rhs) const noexcept
286  // {
287  // return !(*this == rhs);
288  // }
289 
304  iterator_type begin() const noexcept
305  {
306  return {*this};
307  }
308 
321  fwd_iterator_type fwd_begin() const noexcept
322  {
323  return {fwd_fm};
324  }
325 
339  rev_iterator_type rev_begin() const noexcept
340  {
341  return {rev_fm};
342  }
343 
356  bool load(filesystem::path const & path)
357  {
358  filesystem::path path_fwd{path};
359  filesystem::path path_rev{path};
360  path_fwd += filesystem::path{".fwd"};
361  path_rev += filesystem::path{".rev"};
362  return fwd_fm.load(path_fwd) && rev_fm.load(path_rev);
363  }
364 
377  bool store(filesystem::path const & path) const
378  {
379  filesystem::path path_fwd{path};
380  filesystem::path path_rev{path};
381  path_fwd += filesystem::path{".fwd"};
382  path_rev += filesystem::path{".rev"};
383  return fwd_fm.store(path_fwd) && rev_fm.store(path_rev);
384  }
385 
386 };
387 
389 
390 } // namespace seqan3
Provides seqan3::view::reverse.
bi_fm_index(text_t const &text)
Constructor that immediately constructs the index given a range. The range cannot be an rvalue (i...
Definition: bi_fm_index.hpp:190
text_t text_type
The type of the forward indexed text.
Definition: bi_fm_index.hpp:101
rev_iterator_type rev_begin() const noexcept
Returns a unidirectional seqan3::fm_index_iterator on the reversed text of the bidirectional index th...
Definition: bi_fm_index.hpp:339
bool empty() const noexcept
Checks whether the index is empty.
Definition: bi_fm_index.hpp:273
typename innermost_value_type< t >::type innermost_value_type_t
Shortcut for seqan3::innermost_value_type.
Definition: range.hpp:213
bool load(filesystem::path const &path)
Loads the index from disk. Temporary function until cereal is supported.
Definition: fm_index.hpp:333
void construct(text_t const &text)
Constructs the index given a range. The range cannot be an rvalue (i.e. a temporary object) and has t...
Definition: fm_index.hpp:225
decltype(view::reverse(*text)) rev_text_type
The type of the forward indexed text.
Definition: bi_fm_index.hpp:103
Provides the seqan3::bi_fm_index_iterator for searching in the bidirectional seqan3::bi_fm_index.
void construct(text_t const &text)
Constructs the index given a range. The range cannot be an rvalue (i.e. a temporary object) and has t...
Definition: bi_fm_index.hpp:218
::ranges::size size
Alias for ranges::size. Obtains the size of a range whose size can be calculated in constant time...
Definition: ranges:195
The generic alphabet concept that covers most data types used in ranges.This is the core alphabet con...
The main SeqAn3 namespace.
Definition: aligned_sequence_concept.hpp:58
bool store(filesystem::path const &path) const
Stores the index to disk. Temporary function until cereal is supported.
Definition: bi_fm_index.hpp:377
bool store(filesystem::path const &path) const
Stores the index to disk. Temporary function until cereal is supported.
Definition: fm_index.hpp:356
The SeqAn FM Index Iterator.
Definition: fm_index_iterator.hpp:91
bool load(filesystem::path const &path)
Loads the index from disk. Temporary function until cereal is supported.
Definition: bi_fm_index.hpp:356
fwd_iterator_type fwd_begin() const noexcept
Returns a unidirectional seqan3::fm_index_iterator on the original text of the bidirectional index th...
Definition: bi_fm_index.hpp:321
The default FM Index Configuration.
Definition: fm_index.hpp:87
The default Bidirectional FM Index Configuration.
Definition: bi_fm_index.hpp:64
The concept std::Same<T, U> is satisfied if and only if T and U denote the same type.
size_type size() const noexcept
Returns the length of the indexed text including sentinel characters.
Definition: fm_index.hpp:269
This header includes C++17 filesystem support and imports it into namespace seqan3::filesystem (indep...
iterator_type begin() const noexcept
Returns a seqan3::bi_fm_index_iterator on the index that can be used for searching.
Definition: bi_fm_index.hpp:304
index_traits_t index_traits
The index traits object.
Definition: bi_fm_index.hpp:152
The SeqAn Bidirectional FM Index Iterator.
Definition: bi_fm_index_iterator.hpp:83
Provides various metafunctions used by the range module.
The SeqAn Bidirectional FM Index.
Definition: bi_fm_index.hpp:87
size_type size() const noexcept
Returns the length of the indexed text including sentinel characters.
Definition: bi_fm_index.hpp:257
innermost_value_type_t< text_t > char_type
The type of the underlying character of text_type.
Definition: bi_fm_index.hpp:146
Provides the unidirectional seqan3::fm_index.
constexpr auto reverse
A range adaptor that presents the underlying range in reverse order.
Definition: reverse.hpp:93
typename sdsl_index_type::size_type size_type
Type for representing positions in the indexed text.
Definition: bi_fm_index.hpp:148