46 #include <range/v3/algorithm/copy.hpp> 47 #include <range/v3/utility/iterator.hpp> 61 template <
typename align_config_t>
62 concept semi_global_config_concept = requires (align_config_t & cfg)
67 template <
typename align_config_t>
68 concept global_config_concept = has_align_cfg_v<align_cfg::id::global, std::remove_reference_t<align_config_t>>;
70 template <
typename align_config_t>
71 concept max_errors_concept = has_align_cfg_v<align_cfg::id::max_error, std::remove_reference_t<align_config_t>>;
77 template <
typename traits_type>
78 concept edit_distance_trait_concept = requires
80 typename std::remove_reference_t<traits_type>::word_type;
86 struct default_edit_distance_trait_type
89 using word_type = uint64_t;
100 typename align_config_t,
101 edit_distance_trait_concept traits_t = default_edit_distance_trait_type>
102 class pairwise_alignment_edit_distance_unbanded
107 friend alignment_score_matrix<pairwise_alignment_edit_distance_unbanded>;
110 friend alignment_trace_matrix<pairwise_alignment_edit_distance_unbanded>;
118 align_config_t config;
122 using word_type =
typename std::remove_reference_t<traits_t>::word_type;
124 using score_type = int;
126 using database_type = std::remove_reference_t<database_t>;
128 using query_type = std::remove_reference_t<query_t>;
130 using score_matrix_type = detail::alignment_score_matrix<pairwise_alignment_edit_distance_unbanded>;
132 using trace_matrix_type = detail::alignment_trace_matrix<pairwise_alignment_edit_distance_unbanded>;
135 static constexpr uint8_t word_size =
sizeof(word_type) * 8;
141 using query_alphabet_type = std::remove_reference_t<reference_t<query_type>>;
147 static constexpr
bool use_max_errors = detail::max_errors_concept<align_config_t>;
149 static constexpr
bool is_semi_global = detail::semi_global_config_concept<align_config_t>;
151 static constexpr
bool is_global = detail::global_config_concept<align_config_t> && !is_semi_global;
154 static constexpr word_type hp0 = is_global ? 1 : 0;
156 static_assert(8 *
sizeof(word_type) <= 64,
"we assume at most uint64_t as word_type");
157 static_assert((is_global && !is_semi_global) || (!is_global && is_semi_global),
"Either set global or semi-global");
163 word_type score_mask{0};
165 std::vector<word_type> vp{};
167 std::vector<word_type> vn{};
170 std::vector<word_type> bit_masks{};
175 score_type _best_score{};
185 database_iterator _best_score_col{};
190 score_type max_errors{255};
193 size_t last_block{0};
195 word_type last_score_mask{};
199 database_iterator database_it{};
201 database_iterator database_it_end{};
207 std::vector<word_type> vp{};
209 std::vector<word_type> vn{};
213 std::vector<state_type> states{};
218 states.push_back(state_type{vp, vn});
228 pairwise_alignment_edit_distance_unbanded() =
delete;
229 pairwise_alignment_edit_distance_unbanded(pairwise_alignment_edit_distance_unbanded
const &) =
default;
230 pairwise_alignment_edit_distance_unbanded(pairwise_alignment_edit_distance_unbanded &&) =
default;
231 pairwise_alignment_edit_distance_unbanded & operator=(pairwise_alignment_edit_distance_unbanded
const &) =
default;
232 pairwise_alignment_edit_distance_unbanded & operator=(pairwise_alignment_edit_distance_unbanded &&) =
default;
239 pairwise_alignment_edit_distance_unbanded(database_t && _database, query_t && _query, align_config_t _config) :
240 database{std::forward<database_t>(_database)},
241 query{std::forward<query_t>(_query)},
242 config{std::forward<align_config_t>(_config)},
243 _score{
static_cast<score_type
>(query.size())},
244 _best_score{
static_cast<score_type
>(query.size())},
245 _best_score_col{ranges::begin(database)},
246 database_it{ranges::begin(database)},
247 database_it_end{ranges::end(database)}
249 static constexpr
size_t alphabet_size = alphabet_size_v<query_alphabet_type>;
251 if constexpr(use_max_errors)
253 max_errors = get<align_cfg::id::max_error>(config);
254 assert(max_errors >= score_type{0});
257 size_t block_count = (query.size() - 1 + word_size) / word_size;
258 score_mask = (word_type)1 << ((query.size() - 1 + word_size) % word_size);
259 last_score_mask = score_mask;
260 last_block = block_count - 1;
262 if constexpr(use_max_errors)
266 size_t localMaxErrors = std::min<size_t>(max_errors, query.size() - 1);
267 score_mask = (word_type)1 << (localMaxErrors % word_size);
268 last_block = std::min(localMaxErrors / word_size, block_count - 1);
269 _score = localMaxErrors + 1;
270 _best_score = _score;
273 word_type vp0{
static_cast<word_type
>(~0)};
276 vp.resize(block_count, vp0);
277 vn.resize(block_count, vn0);
278 bit_masks.resize((alphabet_size + 1) * block_count, 0);
281 for (
size_t j = 0; j < query.size(); j++)
283 size_t i = block_count *
to_rank(query[j]) + j / word_size;
284 bit_masks[i] |= (word_type)1 << (j % word_size);
294 template <
bool with_overflow_check>
295 void compute_step(word_type b, word_type & hp, word_type & hn, word_type & vp, word_type & vn, word_type & carry_d0, word_type & carry_hp, word_type & carry_hn)
300 t = vp + (x & vp) + (with_overflow_check ? carry_d0 : 0);
304 hp = vn | ~(vp | d0);
306 if constexpr(with_overflow_check)
307 carry_d0 = (carry_d0 != (word_type)0) ? t <= vp : t < vp;
309 x = (hp << 1) | (with_overflow_check ? carry_hp : hp0);
311 vp = (hn << 1) | ~(x | d0) | (with_overflow_check ? carry_hn : 0);
313 if constexpr(with_overflow_check)
315 carry_hp = hp >> (word_size - 1);
316 carry_hn = hn >> (word_size - 1);
321 void advance_score(word_type P, word_type N, word_type mask)
323 if ((P & mask) != (word_type)0)
325 else if ((N & mask) != (word_type)0)
328 if constexpr(is_semi_global)
330 _best_score_col = (_score <= _best_score) ? database_it : _best_score_col;
331 _best_score = (_score <= _best_score) ? _score : _best_score;
336 bool prev_last_active_cell()
339 if (score_mask != (word_type)0)
342 if (is_global && last_block == 0)
347 score_mask = (word_type)1 << (word_size - 1);
352 void next_last_active_cell()
363 bool update_last_active_cell()
366 while (!(_score <= max_errors))
368 advance_score(vn[last_block], vp[last_block], score_mask);
369 if (!prev_last_active_cell())
373 if ((score_mask == last_score_mask) && (last_block == vp.size() - 1))
377 next_last_active_cell();
378 advance_score(vp[last_block], vn[last_block], score_mask);
387 assert(_score <= max_errors);
398 inline bool small_patterns();
401 inline bool large_patterns();
407 if constexpr(use_max_errors && is_global)
409 size_t max_length = query.size() + max_errors + 1;
410 size_t haystack_length = std::min(database.size(), max_length);
411 database_it_end -= database.size() - haystack_length;
421 if constexpr(is_global)
422 _best_score = _score;
431 template <
typename result_type>
432 result_type & operator()(result_type & res)
435 if constexpr (std::tuple_size_v<result_type> >= 2)
437 get<align_result_key::score>(res) =
score();
440 if constexpr (std::tuple_size_v<result_type> >= 3)
442 get<align_result_key::end>(res) = end_coordinate();
445 [[maybe_unused]] alignment_trace_matrix matrix = trace_matrix();
446 if constexpr (std::tuple_size_v<result_type> >= 4)
448 get<align_result_key::begin>(res) = alignment_begin_coordinate(matrix, get<align_result_key::end>(res));
451 if constexpr (std::tuple_size_v<result_type> >= 5)
453 get<align_result_key::trace>(res) = alignment_trace(database, query, matrix, get<align_result_key::end>(res));
459 score_type
score() const noexcept
465 score_matrix_type score_matrix() const noexcept
467 return score_matrix_type{*
this};
471 trace_matrix_type trace_matrix() const noexcept
473 return trace_matrix_type{*
this};
477 alignment_coordinate begin_coordinate() const noexcept
479 alignment_coordinate
end = end_coordinate();
480 return alignment_begin_coordinate(trace_matrix(), end);
484 alignment_coordinate end_coordinate() const noexcept
486 size_t col = database.size() - 1;
487 if constexpr(is_semi_global)
488 col = std::distance(
begin(database), _best_score_col);
490 return {col, query.size() - 1};
494 auto trace() const noexcept
496 return alignment_trace(database, query, trace_matrix(), end_coordinate());
500 template <
typename database_t,
typename query_t,
typename align_config_t,
typename traits_t>
501 bool pairwise_alignment_edit_distance_unbanded<database_t, query_t, align_config_t, traits_t>::small_patterns()
504 while (database_it != database_it_end)
508 word_type b = bit_masks[
to_rank((query_alphabet_type) *database_it)];
509 compute_step<false>(b, hp, hn, vp[0], vn[0], _, _, _);
510 advance_score(hp, hn, score_mask);
512 if constexpr(use_max_errors)
513 if (_score <= max_errors && on_hit())
527 template <
typename database_t,
typename query_t,
typename align_config_t,
typename traits_t>
528 bool pairwise_alignment_edit_distance_unbanded<database_t, query_t, align_config_t, traits_t>::large_patterns()
530 while (database_it != database_it_end)
533 word_type carry_d0{0}, carry_hp{hp0}, carry_hn{0};
534 size_t block_offset = vp.size() *
to_rank((query_alphabet_type) *database_it);
537 for (
size_t current_block = 0; current_block <= last_block; current_block++)
539 word_type b = bit_masks[block_offset + current_block];
540 compute_step<true>(b, hp, hn, vp[current_block], vn[current_block], carry_d0, carry_hp, carry_hn);
542 advance_score(hp, hn, score_mask);
544 if constexpr(use_max_errors)
547 bool additional_block = score_mask >> (word_size - 1);
548 if (last_block + 1 == vp.size())
549 additional_block =
false;
551 if (additional_block)
553 size_t current_block = last_block + 1;
554 word_type b = bit_masks[block_offset + current_block];
555 compute_step<false>(b, hp, hn, vp[current_block], vn[current_block], carry_d0, carry_hp, carry_hn);
559 if (update_last_active_cell())
578 template<
typename database_t,
typename query_t,
typename config_t>
579 pairwise_alignment_edit_distance_unbanded(database_t && database, query_t && query, config_t config)
580 -> pairwise_alignment_edit_distance_unbanded<database_t, query_t, config_t>;
582 template<
typename database_t,
typename query_t,
typename config_t,
typename traits_t>
583 pairwise_alignment_edit_distance_unbanded(database_t && database, query_t && query, config_t config, traits_t)
584 -> pairwise_alignment_edit_distance_unbanded<database_t, query_t, config_t, traits_t>;
588 template<
typename database_t,
typename query_t,
typename align_config_t,
typename traits_t>
589 class alignment_score_matrix<pairwise_alignment_edit_distance_unbanded<database_t, query_t, align_config_t, traits_t>>
590 :
public alignment_score_matrix<std::vector<typename pairwise_alignment_edit_distance_unbanded<database_t, query_t, align_config_t, traits_t>::score_type>>
594 using alignment_type = pairwise_alignment_edit_distance_unbanded<database_t, query_t, align_config_t, traits_t>;
595 using score_type =
typename alignment_type::score_type;
596 using base_score_matrix_type = alignment_score_matrix<std::vector<score_type>>;
597 using word_type =
typename alignment_type::word_type;
599 static constexpr
size_t word_size =
sizeof(word_type)*8;
606 alignment_score_matrix() =
default;
607 alignment_score_matrix(alignment_score_matrix
const &) =
default;
608 alignment_score_matrix(alignment_score_matrix &&) =
default;
609 alignment_score_matrix & operator=(alignment_score_matrix
const &) =
default;
610 alignment_score_matrix & operator=(alignment_score_matrix &&) =
default;
612 alignment_score_matrix(alignment_type
const & alignment) :
613 base_score_matrix_type
616 size_t _cols = alignment.database.size() + 1;
617 size_t _rows = alignment.query.size() + 1;
618 std::vector<score_type> scores{};
619 scores.reserve(_cols * _rows);
622 for (
size_t col = 0; col < _cols; ++col)
623 scores[col] = alignment_type::is_global ? col : 0;
625 auto deltas = [&](
size_t col)
627 return [state = alignment.states[col]](
size_t row)
629 using bitset = std::bitset<word_size>;
631 size_t chunk = row / word_size;
632 size_t row_in_chunk = row % word_size;
633 word_type vp = state.vp[chunk];
634 word_type vn = state.vn[chunk];
636 int8_t p = bitset(vp)[row_in_chunk] ? 1 : 0;
637 int8_t n = bitset(vn)[row_in_chunk] ? 1 : 0;
642 for (
size_t col = 0; col < _cols; ++col)
644 auto delta = deltas(col);
645 for (
size_t row = 1; row < _rows; ++row)
646 scores[row * _cols + col] = scores[(row - 1) * _cols + col] + delta(row - 1);
651 alignment.query.size() + 1,
652 alignment.database.size() + 1
659 template<
typename database_t,
typename query_t,
typename align_config_t,
typename traits_t>
660 class alignment_trace_matrix<pairwise_alignment_edit_distance_unbanded<database_t, query_t, align_config_t, traits_t>>
661 :
public alignment_trace_matrix<database_t const &, query_t const &, align_config_t, alignment_score_matrix<pairwise_alignment_edit_distance_unbanded<database_t, query_t, align_config_t, traits_t>>>
665 using alignment_type = pairwise_alignment_edit_distance_unbanded<database_t, query_t, align_config_t, traits_t>;
666 using score_matrix_type = alignment_score_matrix<alignment_type>;
667 using base_trace_matrix_type = alignment_trace_matrix<database_t const &, query_t const &, align_config_t, score_matrix_type>;
674 alignment_trace_matrix() =
default;
675 alignment_trace_matrix(alignment_trace_matrix
const &) =
default;
676 alignment_trace_matrix(alignment_trace_matrix &&) =
default;
677 alignment_trace_matrix & operator=(alignment_trace_matrix
const &) =
default;
678 alignment_trace_matrix & operator=(alignment_trace_matrix &&) =
default;
680 alignment_trace_matrix(alignment_type
const & alignment) :
681 base_trace_matrix_type{alignment.database, alignment.query, alignment.config, score_matrix_type{alignment}}
Contains the declaration of seqan3::detail::alignment_score_matrix.
Meta-header for the alignment configuration module .
Contains various shortcuts for common std::ranges functions.
Continuous gaps in the beginning and end of the first sequence are not scored.
::ranges::iterator_t iterator_t
Alias for ranges::iterator_t. Obtains the iterator type of a range.
Definition: ranges:225
Adaptations of concepts from the Ranges TS.
::ranges::begin begin
Alias for ranges::begin. Returns an iterator to the beginning of a range.
Definition: ranges:185
Contains the declaration of seqan3::detail::alignment_trace_matrix.
Specifies the requirements of a Range type that is either a std::ranges::View or an lvalue-reference...
Definition: aligned_sequence_concept.hpp:288
Provides seqan3::align_result.
Provides seqan3::detail::alignment_coordinate.
Contains algorithms that operate on seqan3::detail::alignment_trace_matrix.
::ranges::end end
Alias for ranges::end. Returns an iterator to the end of a range.
Definition: ranges:190
constexpr underlying_rank_t< alphabet_type > to_rank(alphabet_type const alph) requires requires(alphabet_type alph)
Implementation of seqan3::semi_alphabet_concept::to_rank() that delegates to a member function...
Definition: member_exposure.hpp:97
constexpr detail::align_config_score_adaptor score
A configuration adaptor for alignment scoring.
Definition: align_config_score.hpp:117