44 #include <sdsl/suffix_trees.hpp> 46 #include <range/v3/view/iota.hpp> 47 #include <range/v3/view/slice.hpp> 82 template <
typename index_t>
94 using size_type =
typename index_type::size_type;
111 using sdsl_char_type =
typename index_type::sdsl_char_type;
144 sdsl_char_type _last_char;
155 bool fwd_iter_last_used =
false;
166 template <detail::sdsl_index_concept csa_t>
167 bool bidirectional_search(csa_t
const & csa, sdsl_char_type
const c,
171 assert((l_fwd <= r_fwd) && (r_fwd < csa.size()));
172 assert(r_fwd + 1 >= l_fwd);
173 assert(r_bwd + 1 - l_bwd == r_fwd + 1 - l_fwd);
175 size_type _l_fwd, _r_fwd, _l_bwd, _r_bwd;
180 cc = csa.char2comp[c];
181 if (cc == 0 && c > 0)
186 if (r_fwd + 1 - l_fwd == csa.size())
190 _r_fwd = csa.C[cc + 1] - 1;
196 auto const r_s_b = csa.wavelet_tree.lex_count(l_fwd, r_fwd + 1, c);
197 size_type const rank_l = std::get<0>(r_s_b);
198 size_type const s = std::get<1>(r_s_b), b = std::get<2>(r_s_b);
199 size_type const rank_r = r_fwd - l_fwd - s - b + rank_l;
200 _l_fwd = c_begin + rank_l;
201 _r_fwd = c_begin + rank_r;
206 if (_r_fwd >= _l_fwd)
212 assert(r_fwd + 1 >= l_fwd);
213 assert(r_bwd + 1 - l_bwd == r_fwd + 1 - l_fwd);
220 template <detail::sdsl_index_concept csa_t>
221 bool bidirectional_search_cycle(csa_t
const & csa, sdsl_char_type
const c,
226 assert((l_parent <= r_parent) && (r_parent < csa.size()));
232 c_begin = csa.C[csa.char2comp[c]];
234 auto const r_s_b = csa.wavelet_tree.lex_count(l_parent, r_parent + 1, c);
236 b = std::get<2>(r_s_b),
237 rank_l = std::get<0>(r_s_b),
238 rank_r = r_parent - l_parent - s - b + rank_l;
240 size_type const _l_fwd = c_begin + rank_l;
241 size_type const _r_fwd = c_begin + rank_r;
243 size_type const _r_bwd = r_bwd + 1 + rank_r - rank_l;
245 if (_r_fwd >= _l_fwd)
251 assert(r_fwd + 1 >= l_fwd);
252 assert(r_bwd + 1 - l_bwd == r_fwd + 1 - l_fwd);
273 fwd_lb(0), fwd_rb(_index.size() - 1),
274 rev_lb(0), rev_rb(_index.size() - 1),
293 assert(index !=
nullptr);
295 assert(!(fwd_lb == rhs.fwd_lb && fwd_rb == rhs.fwd_rb && depth == rhs.depth) ||
297 parent_lb == rhs.parent_lb && parent_rb == rhs.parent_rb && _last_char == rhs._last_char);
299 return std::tie(fwd_lb, fwd_rb, depth) == std::tie(rhs.fwd_lb, rhs.fwd_rb, rhs.depth);
316 assert(index !=
nullptr);
318 return !(*
this == rhs);
341 fwd_iter_last_used =
true;
344 assert(index !=
nullptr);
346 size_type new_parent_lb = fwd_lb, new_parent_rb = fwd_rb;
348 sdsl_char_type c = 1;
349 while (c < index->fwd_fm.index.sigma &&
350 !bidirectional_search(index->fwd_fm.index, index->fwd_fm.index.comp2char[c],
351 fwd_lb, fwd_rb, rev_lb, rev_rb))
356 if (c != index->fwd_fm.index.sigma)
358 parent_lb = new_parent_lb;
359 parent_rb = new_parent_rb;
389 fwd_iter_last_used =
false;
392 assert(index !=
nullptr);
394 size_type new_parent_lb = rev_lb, new_parent_rb = rev_rb;
396 sdsl_char_type c = 1;
397 while (c < index->rev_fm.index.sigma &&
398 !bidirectional_search(index->rev_fm.index, index->rev_fm.index.comp2char[c],
399 rev_lb, rev_rb, fwd_lb, fwd_rb))
404 if (c != index->rev_fm.index.sigma)
406 parent_lb = new_parent_lb;
407 parent_rb = new_parent_rb;
431 template <alphabet_concept
char_t>
438 fwd_iter_last_used =
true;
441 assert(index !=
nullptr);
443 size_type new_parent_lb = fwd_lb, new_parent_rb = fwd_rb;
446 if (bidirectional_search(index->fwd_fm.index, c_char, fwd_lb, fwd_rb, rev_lb, rev_rb))
448 parent_lb = new_parent_lb;
449 parent_rb = new_parent_rb;
473 template <alphabet_concept
char_t>
480 fwd_iter_last_used =
false;
483 assert(index !=
nullptr);
485 size_type new_parent_lb = rev_lb, new_parent_rb = rev_rb;
488 if (bidirectional_search(index->rev_fm.index, c_char, rev_lb, rev_rb, fwd_lb, fwd_rb))
490 parent_lb = new_parent_lb;
491 parent_rb = new_parent_rb;
517 template <std::ranges::RandomAccessRange seq_t>
523 assert(index !=
nullptr);
525 auto first = seq.begin();
526 auto last = seq.end();
529 fwd_iter_last_used = (first != last);
532 size_type _fwd_lb = fwd_lb, _fwd_rb = fwd_rb, _rev_lb = rev_lb, _rev_rb = rev_rb;
533 size_type new_parent_lb = parent_lb, new_parent_rb = parent_rb;
534 sdsl_char_type c = _last_char;
536 for (
auto it = first; it != last; ++it)
540 new_parent_lb = _fwd_lb;
541 new_parent_rb = _fwd_rb;
542 if (!bidirectional_search(index->fwd_fm.index, c, _fwd_lb, _fwd_rb, _rev_lb, _rev_rb))
551 parent_lb = new_parent_lb;
552 parent_rb = new_parent_rb;
555 depth += last - first;
580 template <std::ranges::RandomAccessRange seq_t>
582 requires implicitly_convertible_to_concept<innermost_value_type_t<seq_t>,
typename index_t::char_type>
586 assert(index !=
nullptr);
589 auto first = rev_seq.begin();
590 auto last = rev_seq.end();
594 fwd_iter_last_used =
false;
597 size_type _fwd_lb = fwd_lb, _fwd_rb = fwd_rb,
598 _rev_lb = rev_lb, _rev_rb = rev_rb;
599 size_type new_parent_lb = parent_lb, new_parent_rb = parent_rb;
600 sdsl_char_type c = _last_char;
602 for (
auto it = first; it != last; ++it)
606 new_parent_lb = _rev_lb;
607 new_parent_rb = _rev_rb;
608 if (!bidirectional_search(index->rev_fm.index, c, _rev_lb, _rev_rb, _fwd_lb, _fwd_rb))
617 parent_lb = new_parent_lb;
618 parent_rb = new_parent_rb;
620 depth += last - first;
655 assert(fwd_iter_last_used);
660 sdsl_char_type c = _last_char + 1;
662 while (c < index->fwd_fm.index.sigma &&
663 !bidirectional_search_cycle(index->fwd_fm.index, index->fwd_fm.index.comp2char[c],
664 parent_lb, parent_rb, fwd_lb, fwd_rb, rev_lb, rev_rb))
669 if (c != index->fwd_fm.index.sigma)
708 assert(!fwd_iter_last_used);
713 sdsl_char_type c = _last_char + 1;
714 while (c < index->rev_fm.index.sigma &&
715 !bidirectional_search_cycle(index->rev_fm.index, index->rev_fm.index.comp2char[c],
716 parent_lb, parent_rb, rev_lb, rev_rb, fwd_lb, fwd_rb))
721 if (c != index->rev_fm.index.sigma)
751 typename index_t::char_type c;
752 assign_rank(c, index->fwd_fm.index.comp2char[_last_char] - 1);
770 assert(index !=
nullptr);
776 fwd_rb == index->size() - 1));
802 assert(index !=
nullptr);
805 it.parent_lb = parent_lb;
806 it.parent_rb = parent_rb;
807 it.node = {fwd_lb, fwd_rb, depth, _last_char};
810 if (!fwd_iter_last_used)
844 assert(index !=
nullptr);
847 it.parent_lb = parent_lb;
848 it.parent_rb = parent_rb;
849 it.node = {rev_lb, rev_rb, depth, _last_char};
852 if (fwd_iter_last_used)
878 assert(index !=
nullptr && index->text !=
nullptr);
880 size_type const query_begin = offset() - index->fwd_fm.index[fwd_lb];
881 return *index->text | ranges::view::slice(query_begin, query_begin +
query_length());
887 assert(index !=
nullptr && index->text !=
nullptr);
905 assert(index !=
nullptr && (1 + fwd_rb - fwd_lb == 1 + rev_rb - rev_lb));
907 return 1 + fwd_rb - fwd_lb;
923 assert(index !=
nullptr);
925 std::vector<size_type> occ(
count());
926 for (
size_type i = 0; i < occ.size(); ++i)
928 occ[i] = offset() - index->fwd_fm.index[fwd_lb + i];
947 assert(index !=
nullptr);
952 return _offset - index->fwd_fm.index[sa_pos];
constexpr simd_t iota(typename simd_traits< simd_t >::scalar_type const offset)
Fills a seqan3::simd::simd_type vector with the scalar values offset, offset+1, offset+2, ...
Definition: simd_algorithm.hpp:100
constexpr auto transform
A range adaptor that takes a invocable and returns a view of the elements with the invocable applied...
Definition: transform.hpp:95
bool extend_left(char_t const c) noexcept
Tries to extend the query by the character c to the left.
Definition: bi_fm_index_iterator.hpp:477
bool extend_left(seq_t &&seq) noexcept
Tries to extend the query by seq to the left.
Definition: bi_fm_index_iterator.hpp:584
bi_fm_index_iterator() noexcept=default
Default constructor. Accessing member functions on a default constructed object is undefined behavior...
bool cycle_back() noexcept
Tries to replace the rightmost character of the query by the next lexicographically larger character ...
Definition: bi_fm_index_iterator.hpp:651
bool extend_right(seq_t &&seq) noexcept
Tries to extend the query by seq to the right.
Definition: bi_fm_index_iterator.hpp:521
bool operator!=(bi_fm_index_iterator const &rhs) const noexcept
Compares two iterators.
Definition: bi_fm_index_iterator.hpp:314
The main SeqAn3 namespace.
Definition: aligned_sequence_concept.hpp:58
rev_iterator to_rev_iterator() const noexcept
Returns a unidirectional seqan3::fm_index_iterator on the reversed text. query() on the returned unid...
Definition: bi_fm_index_iterator.hpp:842
std::vector< size_type > locate() const
Locates the occurrences of the searched query in the text.
Definition: bi_fm_index_iterator.hpp:921
The SeqAn FM Index Iterator.
Definition: fm_index_iterator.hpp:91
bool extend_right(char_t const c) noexcept
Tries to extend the query by the character c to the right.
Definition: bi_fm_index_iterator.hpp:435
auto query() const noexcept
Returns the searched query.
Definition: bi_fm_index_iterator.hpp:876
size_type query_length() const noexcept
Returns the depth of the iterator node in the implicit suffix tree, i.e. the length of the sequence s...
Definition: bi_fm_index_iterator.hpp:768
auto operator*() const noexcept
Returns the searched query.
Definition: bi_fm_index_iterator.hpp:885
fwd_iterator to_fwd_iterator() const noexcept
Returns a unidirectional seqan3::fm_index_iterator on the original text. query() on the returned unid...
Definition: bi_fm_index_iterator.hpp:800
typename index_type::size_type size_type
Type for representing positions in the indexed text.
Definition: bi_fm_index_iterator.hpp:95
constexpr alphabet_type & assign_rank(alphabet_type &alph, underlying_rank_t< alphabet_type > const rank) requires requires(alphabet_type alph)
Implementation of seqan3::semi_alphabet_concept::assign_rank() that delegates to a member function...
Definition: member_exposure.hpp:110
Meta-header for the alphabet module.
size_type count() const noexcept
Counts the number of occurrences of the searched query in the text.
Definition: bi_fm_index_iterator.hpp:903
index_t::char_type last_char() noexcept
Outputs the rightmost respectively leftmost character depending on whether extend_right() or extend_l...
Definition: bi_fm_index_iterator.hpp:747
auto lazy_locate() const
Locates the occurrences of the searched query in the text on demand, i.e. a ranges::view is returned ...
Definition: bi_fm_index_iterator.hpp:945
The concept std::Same<T, U> is satisfied if and only if T and U denote the same type.
Provides the bidirectional seqan3::bi_fm_index.
The SeqAn Bidirectional FM Index Iterator.
Definition: bi_fm_index_iterator.hpp:83
index_t index_type
Type of the index.
Definition: bi_fm_index_iterator.hpp:89
Resolves to std::ranges::ImplicitlyConvertibleTo<type1, type2>().
Provides various metafunctions used by the range module.
bool operator==(bi_fm_index_iterator const &rhs) const noexcept
Compares two iterators.
Definition: bi_fm_index_iterator.hpp:291
constexpr auto reverse
A range adaptor that presents the underlying range in reverse order.
Definition: reverse.hpp:93
bool extend_left() noexcept
Tries to extend the query by the smallest possible character to the left such that the query is found...
Definition: bi_fm_index_iterator.hpp:386
constexpr underlying_rank_t< alphabet_type > to_rank(alphabet_type const alph) requires requires(alphabet_type alph)
Implementation of seqan3::semi_alphabet_concept::to_rank() that delegates to a member function...
Definition: member_exposure.hpp:97
bool extend_right() noexcept
Tries to extend the query by the smallest possible character to the right such that the query is foun...
Definition: bi_fm_index_iterator.hpp:338
bool cycle_front() noexcept
Tries to replace the leftmost character of the query by the next lexicographically larger character s...
Definition: bi_fm_index_iterator.hpp:704