17 #include <sdsl/suffix_trees.hpp> 54 template <
typename index_t>
64 using size_type =
typename index_type::size_type;
73 index_type::text_layout_mode,
74 typename index_type::sdsl_index_type>>;
77 index_type::text_layout_mode,
78 typename index_type::sdsl_index_type>>;
83 using sdsl_char_type =
typename index_type::sdsl_char_type;
85 using sdsl_sigma_type =
typename index_type::sdsl_sigma_type;
87 using index_alphabet_type =
typename index_t::alphabet_type;
106 sdsl_sigma_type sigma;
123 sdsl_char_type _last_char;
134 bool fwd_cursor_last_used =
false;
145 template <detail::sdsl_index csa_t>
146 bool bidirectional_search(csa_t
const & csa, sdsl_char_type
const c,
150 assert((l_fwd <= r_fwd) && (r_fwd < csa.size()));
151 assert(r_fwd + 1 >= l_fwd);
152 assert(r_bwd + 1 - l_bwd == r_fwd + 1 - l_fwd);
154 size_type _l_fwd, _r_fwd, _l_bwd, _r_bwd;
159 cc = csa.char2comp[c];
160 if (cc == 0 && c > 0)
165 if (r_fwd + 1 - l_fwd == csa.size())
169 _r_fwd = csa.C[cc + 1] - 1;
175 auto const r_s_b = csa.wavelet_tree.lex_count(l_fwd, r_fwd + 1, c);
176 size_type const rank_l = std::get<0>(r_s_b);
177 size_type const s = std::get<1>(r_s_b), b = std::get<2>(r_s_b);
178 size_type const rank_r = r_fwd - l_fwd - s - b + rank_l;
179 _l_fwd = c_begin + rank_l;
180 _r_fwd = c_begin + rank_r;
185 if (_r_fwd >= _l_fwd)
191 assert(r_fwd + 1 >= l_fwd);
192 assert(r_bwd + 1 - l_bwd == r_fwd + 1 - l_fwd);
199 template <detail::sdsl_index csa_t>
200 bool bidirectional_search_cycle(csa_t
const & csa, sdsl_char_type
const c,
205 assert((l_parent <= r_parent) && (r_parent < csa.size()));
211 c_begin = csa.C[csa.char2comp[c]];
213 auto const r_s_b = csa.wavelet_tree.lex_count(l_parent, r_parent + 1, c);
215 b = std::get<2>(r_s_b),
216 rank_l = std::get<0>(r_s_b),
217 rank_r = r_parent - l_parent - s - b + rank_l;
219 size_type const _l_fwd = c_begin + rank_l;
220 size_type const _r_fwd = c_begin + rank_r;
222 size_type const _r_bwd = r_bwd + 1 + rank_r - rank_l;
224 if (_r_fwd >= _l_fwd)
230 assert(r_fwd + 1 >= l_fwd);
231 assert(r_bwd + 1 - l_bwd == r_fwd + 1 - l_fwd);
254 fwd_lb(0), fwd_rb(_index.size() - 1),
255 rev_lb(0), rev_rb(_index.size() - 1),
256 sigma(_index.fwd_fm.index.sigma - index_t::text_layout_mode),
275 assert(index !=
nullptr);
277 assert(!(fwd_lb == rhs.fwd_lb && fwd_rb == rhs.fwd_rb && depth == rhs.depth) ||
279 (parent_lb == rhs.parent_lb && parent_rb == rhs.parent_rb && _last_char == rhs._last_char));
281 return std::tie(fwd_lb, fwd_rb, depth) ==
std::tie(rhs.fwd_lb, rhs.fwd_rb, rhs.depth);
298 assert(index !=
nullptr);
300 return !(*
this == rhs);
323 fwd_cursor_last_used =
true;
326 assert(index !=
nullptr);
328 size_type new_parent_lb = fwd_lb, new_parent_rb = fwd_rb;
330 sdsl_char_type c = 1;
332 !bidirectional_search(index->fwd_fm.index, index->fwd_fm.index.comp2char[c],
333 fwd_lb, fwd_rb, rev_lb, rev_rb))
340 parent_lb = new_parent_lb;
341 parent_rb = new_parent_rb;
371 fwd_cursor_last_used =
false;
374 assert(index !=
nullptr);
376 size_type new_parent_lb = rev_lb, new_parent_rb = rev_rb;
378 sdsl_char_type c = 1;
380 !bidirectional_search(index->rev_fm.index, index->rev_fm.index.comp2char[c],
381 rev_lb, rev_rb, fwd_lb, fwd_rb))
388 parent_lb = new_parent_lb;
389 parent_rb = new_parent_rb;
412 template <
typename char_t>
416 fwd_cursor_last_used =
true;
420 "The character must be convertible to the alphabet of the index.");
422 assert(index !=
nullptr);
424 size_type new_parent_lb = fwd_lb, new_parent_rb = fwd_rb;
426 auto c_char =
to_rank(static_cast<index_alphabet_type>(c)) + 1;
427 if (bidirectional_search(index->fwd_fm.index, c_char, fwd_lb, fwd_rb, rev_lb, rev_rb))
429 parent_lb = new_parent_lb;
430 parent_rb = new_parent_rb;
453 template <
typename char_t>
457 fwd_cursor_last_used =
false;
461 "The character must be convertible to the alphabet of the index.");
463 assert(index !=
nullptr);
465 size_type new_parent_lb = rev_lb, new_parent_rb = rev_rb;
467 auto c_char =
to_rank(static_cast<index_alphabet_type>(c)) + 1;
468 if (bidirectional_search(index->rev_fm.index, c_char, rev_lb, rev_rb, fwd_lb, fwd_rb))
470 parent_lb = new_parent_lb;
471 parent_rb = new_parent_rb;
497 template <std::ranges::range seq_t>
500 static_assert(std::ranges::forward_range<seq_t>,
"The query must model forward_range.");
502 "The alphabet of the sequence must be convertible to the alphabet of the index.");
504 assert(index !=
nullptr);
506 auto first = std::ranges::begin(
seq);
507 auto last = std::ranges::end(
seq);
510 fwd_cursor_last_used = (first != last);
513 size_type _fwd_lb = fwd_lb, _fwd_rb = fwd_rb, _rev_lb = rev_lb, _rev_rb = rev_rb;
514 size_type new_parent_lb = parent_lb, new_parent_rb = parent_rb;
515 sdsl_char_type c = _last_char;
518 for (
auto it = first; it != last; ++len, ++it)
520 c =
to_rank(static_cast<index_alphabet_type>(*it)) + 1;
522 new_parent_lb = _fwd_lb;
523 new_parent_rb = _fwd_rb;
524 if (!bidirectional_search(index->fwd_fm.index, c, _fwd_lb, _fwd_rb, _rev_lb, _rev_rb))
533 parent_lb = new_parent_lb;
534 parent_rb = new_parent_rb;
562 template <std::ranges::range seq_t>
565 static_assert(std::ranges::bidirectional_range<seq_t>,
"The query must model bidirectional_range.");
567 "The alphabet of the sequence must be convertible to the alphabet of the index.");
568 assert(index !=
nullptr);
570 auto rev_seq = std::views::reverse(
seq);
571 auto first = std::ranges::begin(rev_seq);
572 auto last = std::ranges::end(rev_seq);
576 fwd_cursor_last_used =
false;
579 size_type _fwd_lb = fwd_lb, _fwd_rb = fwd_rb,
580 _rev_lb = rev_lb, _rev_rb = rev_rb;
581 size_type new_parent_lb = parent_lb, new_parent_rb = parent_rb;
582 sdsl_char_type c = _last_char;
585 for (
auto it = first; it != last; ++len, ++it)
587 c =
to_rank(static_cast<index_alphabet_type>(*it)) + 1;
589 new_parent_lb = _rev_lb;
590 new_parent_rb = _rev_rb;
591 if (!bidirectional_search(index->rev_fm.index, c, _rev_lb, _rev_rb, _fwd_lb, _fwd_rb))
600 parent_lb = new_parent_lb;
601 parent_rb = new_parent_rb;
638 assert(fwd_cursor_last_used);
643 sdsl_char_type c = _last_char + 1;
646 !bidirectional_search_cycle(index->fwd_fm.index, index->fwd_fm.index.comp2char[c],
647 parent_lb, parent_rb, fwd_lb, fwd_rb, rev_lb, rev_rb))
691 assert(!fwd_cursor_last_used);
696 sdsl_char_type c = _last_char + 1;
698 !bidirectional_search_cycle(index->rev_fm.index, index->rev_fm.index.comp2char[c],
699 parent_lb, parent_rb, rev_lb, rev_rb, fwd_lb, fwd_rb))
734 return index->fwd_fm.index.comp2char[_last_char] - 1;
751 assert(index !=
nullptr);
757 fwd_rb == index->size() - 1));
783 assert(index !=
nullptr);
786 cur.parent_lb = parent_lb;
787 cur.parent_rb = parent_rb;
788 cur.node = {fwd_lb, fwd_rb, depth, _last_char};
791 if (!fwd_cursor_last_used)
828 assert(index !=
nullptr);
831 cur.parent_lb = parent_lb;
832 cur.parent_rb = parent_rb;
833 cur.node = {rev_lb, rev_rb, depth, _last_char};
836 if (fwd_cursor_last_used)
863 template <std::ranges::range text_t>
869 static_assert(std::ranges::input_range<text_t>,
"The text must model input_range.");
870 static_assert(dimension_v<text_t> == 1,
"The input cannot be a text collection.");
872 "The alphabet types of the given text and index differ.");
873 assert(index !=
nullptr);
875 size_type const query_begin = offset() - index->fwd_fm.index[fwd_lb];
880 template <std::ranges::range text_t>
886 static_assert(std::ranges::input_range<text_t>,
"The text collection must model input_range.");
887 static_assert(dimension_v<text_t> == 2,
"The input must be a text collection.");
889 "The alphabet types of the given text and index differ.");
890 assert(index !=
nullptr);
892 size_type const loc = offset() - index->fwd_fm.index[fwd_lb];
893 size_type const query_begin = loc - index->fwd_fm.text_begin_rs.rank(loc + 1) + 1;
910 assert(index !=
nullptr && (1 + fwd_rb - fwd_lb == 1 + rev_rb - rev_lb));
912 return 1 + fwd_rb - fwd_lb;
931 assert(index !=
nullptr);
936 occ[i] = offset() - index->fwd_fm.index[fwd_lb + i];
947 assert(index !=
nullptr);
953 size_type loc = offset() - index->fwd_fm.index[fwd_lb + i];
954 size_type sequence_rank = index->fwd_fm.text_begin_rs.rank(loc + 1);
955 size_type sequence_position = loc - index->fwd_fm.text_begin_ss.select(sequence_rank);
978 assert(index !=
nullptr);
980 return std::views::iota(fwd_lb, fwd_lb +
count())
983 return _offset - index->fwd_fm.index[sa_pos];
993 assert(index !=
nullptr);
995 return std::views::iota(fwd_lb, fwd_lb +
count())
998 return _offset - index->fwd_fm.index[sa_pos];
1002 size_type sequence_rank = index->fwd_fm.text_begin_rs.rank(loc + 1);
1003 size_type sequence_position = loc - index->fwd_fm.text_begin_ss.select(sequence_rank);
bool extend_left(char_t const c) noexcept
Tries to extend the query by the character c to the left.
Definition: bi_fm_index_cursor.hpp:454
bool extend_left() noexcept
Tries to extend the query by the smallest possible character to the left such that the query is found...
Definition: bi_fm_index_cursor.hpp:368
bool operator!=(bi_fm_index_cursor const &rhs) const noexcept
Compares two cursors.
Definition: bi_fm_index_cursor.hpp:296
index_t index_type
Type of the index.
Definition: bi_fm_index_cursor.hpp:59
bool cycle_front() noexcept
Tries to replace the leftmost character of the query by the next lexicographically larger character s...
Definition: bi_fm_index_cursor.hpp:687
bi_fm_index_cursor & operator=(bi_fm_index_cursor const &) noexcept=default
Defaulted.
bool extend_right(char_t const c) noexcept
Tries to extend the query by the character c to the right.
Definition: bi_fm_index_cursor.hpp:413
constexpr auto to_rank
Return the rank representation of a (semi-)alphabet object.
Definition: concept.hpp:142
rev_cursor to_rev_cursor() const noexcept
Returns a unidirectional seqan3::fm_index_cursor on the reversed text. path_label() on the returned u...
Definition: bi_fm_index_cursor.hpp:826
~bi_fm_index_cursor()=default
Defaulted.
Provides seqan3::views::join.
std::vector< size_type > locate() const
Locates the occurrences of the searched query in the text.
Definition: bi_fm_index_cursor.hpp:926
seqan3::type_list< trait_t< pack_t >... > transform
Apply a transformation trait to every type in the pack and return a seqan3::type_list of the results...
Definition: traits.hpp:307
The text is a single range.
Definition: concept.hpp:84
The main SeqAn3 namespace.
The "sequence", usually a range of nucleotides or amino acids.
size_type count() const noexcept
Counts the number of occurrences of the searched query in the text.
Definition: bi_fm_index_cursor.hpp:908
auto lazy_locate() const
Locates the occurrences of the searched query in the text on demand, i.e. a ranges::view is returned ...
Definition: bi_fm_index_cursor.hpp:973
size_type size() const noexcept
Returns the length of the indexed text including sentinel characters.
Definition: fm_index.hpp:406
bool operator==(bi_fm_index_cursor const &rhs) const noexcept
Compares two cursors.
Definition: bi_fm_index_cursor.hpp:273
The text is a range of ranges.
Definition: concept.hpp:86
std::vector< std::pair< size_type, size_type > > locate() const
Definition: bi_fm_index_cursor.hpp:942
Meta-header for the alphabet module.
text_layout
The possible text layouts (single, collection) the seqan3::fm_index and seqan3::bi_fm_index can suppo...
Definition: concept.hpp:81
Adaptations of concepts from the Ranges TS.
typename innermost_value_type< t >::type innermost_value_type_t
Shortcut for seqan3::innermost_value_type (transformation_trait shortcut).
Definition: range.hpp:191
fwd_cursor to_fwd_cursor() const noexcept
Returns a unidirectional seqan3::fm_index_cursor on the original text. path_label() on the returned u...
Definition: bi_fm_index_cursor.hpp:781
Provides the bidirectional seqan3::bi_fm_index.
typename index_type::size_type size_type
Type for representing positions in the indexed text.
Definition: bi_fm_index_cursor.hpp:65
The SeqAn Bidirectional FM Index Cursor.
Definition: bi_fm_index_cursor.hpp:55
size_type last_rank() noexcept
Outputs the rightmost respectively leftmost rank depending on whether extend_right() or extend_left()...
Definition: bi_fm_index_cursor.hpp:730
bool cycle_back() noexcept
Tries to replace the rightmost character of the query by the next lexicographically larger character ...
Definition: bi_fm_index_cursor.hpp:634
The concept std::convertible_to<From, To> specifies that an expression of the type and value category...
Provides seqan3::views::slice.
bi_fm_index_cursor() noexcept=default
Default constructor. Accessing member functions on a default constructed object is undefined behavior...
bi_fm_index_cursor(index_t const &_index) noexcept
Construct from given index.
Definition: bi_fm_index_cursor.hpp:253
Provides various transformation traits used by the range module.
auto path_label(text_t &&text) const noexcept
Returns the searched query.
Definition: bi_fm_index_cursor.hpp:864
bool extend_right() noexcept
Tries to extend the query by the smallest possible character to the right such that the query is foun...
Definition: bi_fm_index_cursor.hpp:320
bool extend_right(seq_t &&seq) noexcept
Tries to extend the query by seq to the right.
Definition: bi_fm_index_cursor.hpp:498
The concept std::same_as<T, U> is satisfied if and only if T and U denote the same type...
bool extend_left(seq_t &&seq) noexcept
Tries to extend the query by seq to the left.
Definition: bi_fm_index_cursor.hpp:563
constexpr auto slice
A view adaptor that returns a half-open interval on the underlying range.
Definition: slice.hpp:141
size_type query_length() const noexcept
Returns the depth of the cursor node in the implicit suffix tree, i.e. the length of the sequence sea...
Definition: bi_fm_index_cursor.hpp:749
The SeqAn FM Index Cursor.
Definition: fm_index_cursor.hpp:59
The SeqAn FM Index.
Definition: fm_index.hpp:134
T emplace_back(T... args)