proconlib

This documentation is automatically generated by competitive-verifier/competitive-verifier

View the Project on GitHub anqooqie/proconlib

:warning: Wrapper of atcoder::suffix_array and atcoder::lcp_array (tools/suffix_array.hpp)

It is a wrapper of atcoder::suffix_array and atcoder::lcp_array with additional useful features.

It takes $n$ strings $s_0, s_1, \cdots, s_{n - 1}$. We denote the substring of $s_i$ between $a$-th and $b - 1$-th character by s[i][a..b). Also, we denote $|s_i|$ by |s[i]| and $\sum_{i = 0}^{n - 1} |s_i|$ by $S$.

License

Author

Constructor

(1)
template <typename InputIterator>
suffix_array wrapper(InputIterator begin, InputIterator end);

(2)
template <typename Container>
suffix_array wrapper(std::initializer_list<Container> il);

Constraints

Time Complexity

sa

std::vector<std::pair<int, int>> wrapper.sa;

It is the suffix array of the given strings $s_0, s_1, \cdots, s_{n - 1}$. Here, the suffix array sa is a permutation of $(0, 0), \cdots, (0, |s_0| - 1), \cdots, (n - 1, 0), \cdots, (n - 1, |s_{n - 1}| - 1)$ such that s[sa[i].first][sa[i].second .. |s[sa[i].first]|) <= s[sa[i + 1].first][sa[i + 1].second .. |s[sa[i + 1].first]|) holds for all $i = 0, 1, \cdots, S - 2$.

lcpa

std::vector<int> wrapper.lcpa;

It is the LCP array of the given strings $s_0, s_1, \cdots, s_{n - 1}$. Here, the LCP array is the array of length $\max(0, S - 1)$, such that $i$-th element is the length of the LCP (Longest Common Prefix) of s[sa[i].first][sa[i].second .. |s[sa[i].first]|) and s[sa[i + 1].first][sa[i + 1].second .. |s[sa[i + 1].first]|).

erase_if

template <typename Predicate>
std::size_t wrapper.erase_if(Predicate cond);

It removes the $i$-th element of sa such that cond(sa[i].first, sa[i].second) holds, recalculate lcpa, and returns the number of removed elements of sa.

Constraints

Time Complexity

Depends on

Verified with

Code

#ifndef TOOLS_SUFFIX_ARRAY_HPP
#define TOOLS_SUFFIX_ARRAY_HPP

#include <vector>
#include <utility>
#include <type_traits>
#include <string>
#include <cstddef>
#include <limits>
#include <algorithm>
#include <iterator>
#include <initializer_list>
#include "atcoder/string.hpp"
#include "tools/chmin.hpp"
#include "tools/chmax.hpp"

namespace tools {

  class suffix_array {
  public:
    ::std::vector<::std::pair<int, int>> sa;
    ::std::vector<int> lcpa;

    suffix_array() = default;
    template <typename InputIterator>
    suffix_array(const InputIterator begin, const InputIterator end) {
      using Container = ::std::decay_t<decltype(*::std::declval<InputIterator>())>;
      static_assert(
        ::std::is_same_v<Container, ::std::string> ||
        ::std::is_same_v<Container, ::std::vector<int>> ||
        ::std::is_same_v<Container, ::std::vector<unsigned int>> ||
        ::std::is_same_v<Container, ::std::vector<long long>> ||
        ::std::is_same_v<Container, ::std::vector<unsigned long long>>
      );

      if (begin == end) return;

      ::std::vector<Container> orig(begin, end);

      ::std::vector<::std::size_t> offsets(orig.size());
      offsets[0] = 0;
      for (::std::size_t i = 1; i < orig.size(); ++i) {
        offsets[i] = offsets[i - 1] + orig[i - 1].size() + 1;
      }

      ::std::vector<int> concat;
      concat.reserve(offsets.back() + orig.back().size() + 1);
      int upper;
      if constexpr (::std::is_same_v<Container, ::std::string>) {
        int lower = ::std::numeric_limits<int>::max();
        upper = ::std::numeric_limits<int>::min();
        for (const auto& s : orig) {
          for (const auto s_i : s) {
            ::tools::chmin(lower, s_i);
            ::tools::chmax(upper, s_i);
          }
        }

        for (const auto& s : orig) {
          for (const auto s_i : s) {
            concat.push_back(s_i + (lower == 0));
          }
          concat.push_back(0);
        }
        if (lower == 0) ++upper;
      } else {
        Container compress;
        compress.reserve(concat.size() - orig.size());
        for (const auto& s : orig) {
          ::std::copy(s.begin(), s.end(), ::std::back_inserter(compress));
        }
        ::std::sort(compress.begin(), compress.end());
        compress.erase(::std::unique(compress.begin(), compress.end()), compress.end());

        for (const auto& s : orig) {
          for (const auto s_i : s) {
            concat.push_back(::std::distance(compress.begin(), ::std::lower_bound(compress.begin(), compress.end(), s_i)) + 1);
          }
          concat.push_back(0);
        }

        upper = compress.size();
      }

      ::std::vector<::std::size_t> belongs;
      belongs.reserve(concat.size());
      for (::std::size_t i = 0; i < orig.size(); ++i) {
        for (::std::size_t j = 0; j <= orig[i].size(); ++j) {
          belongs.push_back(i);
        }
      }

      const auto concat_sa = ::atcoder::suffix_array(concat, upper);
      this->lcpa = ::atcoder::lcp_array(concat, concat_sa);
      this->sa.reserve(concat_sa.size());
      for (const auto ij : concat_sa) {
        const int i = belongs[ij];
        const int j = ij - offsets[i];
        this->sa.emplace_back(i, j);
      }

      for (::std::size_t i = 0; i < this->lcpa.size(); ++i) {
        ::tools::chmin(this->lcpa[i], orig[this->sa[i].first].size() - this->sa[i].second);
        ::tools::chmin(this->lcpa[i], orig[this->sa[i + 1].first].size() - this->sa[i + 1].second);
      }

      this->erase_if([&](const int i, const int j) {
        return ::std::cmp_equal(j, orig[i].size());
      });
    }
    template <typename Container>
    suffix_array(const ::std::initializer_list<Container> il) : suffix_array(il.begin(), il.end()) {
    }

    template <typename Predicate>
    ::std::size_t erase_if(const Predicate pred) {
      if (this->sa.empty()) return 0;

      const auto N = this->sa.size();
      ::std::size_t erased = 0;

      ::std::size_t sa_vl = 0, lcpa_vl = 0;
      for (::std::size_t sa_vr = 0, sa_al = 0, sa_ar = 0, lcpa_vr = 0; sa_al < N; sa_vl = sa_vr, sa_al = sa_ar, lcpa_vl = lcpa_vr) {
        const bool removes = pred(this->sa[sa_al].first, this->sa[sa_al].second);

        for (; sa_ar < N && removes == pred(this->sa[sa_ar].first, this->sa[sa_ar].second); ++sa_vr, ++sa_ar);
        if (sa_vl < sa_al) ::std::move(this->sa.begin() + sa_al, this->sa.begin() + sa_ar, this->sa.begin() + sa_vl);

        const auto lcpa_al = sa_al == 0 ? 0 : sa_al - removes;
        const auto lcpa_ar = ::std::min(sa_ar - !removes, N - 1);
        lcpa_vr = lcpa_vl + (lcpa_ar - lcpa_al);
        if (lcpa_vl < lcpa_al) ::std::move(this->lcpa.begin() + lcpa_al, this->lcpa.begin() + lcpa_ar, this->lcpa.begin() + lcpa_vl);

        if (removes) {
          erased += sa_vr - sa_vl;
          sa_vr = sa_vl;
          if (0 < sa_al && sa_ar < N) {
            this->lcpa[lcpa_vl] = *::std::min_element(this->lcpa.begin() + lcpa_vl, this->lcpa.begin() + lcpa_vr);
            lcpa_vr = lcpa_vl + 1;
          } else {
            lcpa_vr = lcpa_vl;
          }
        }
      }

      this->sa.erase(this->sa.begin() + sa_vl, this->sa.end());
      this->lcpa.erase(this->lcpa.begin() + lcpa_vl, this->lcpa.end());

      return erased;
    }
  };
}

#endif
#line 1 "tools/suffix_array.hpp"



#include <vector>
#include <utility>
#include <type_traits>
#include <string>
#include <cstddef>
#include <limits>
#include <algorithm>
#include <iterator>
#include <initializer_list>
#line 1 "lib/ac-library/atcoder/string.hpp"



#line 5 "lib/ac-library/atcoder/string.hpp"
#include <cassert>
#include <numeric>
#line 9 "lib/ac-library/atcoder/string.hpp"

namespace atcoder {

namespace internal {

std::vector<int> sa_naive(const std::vector<int>& s) {
    int n = int(s.size());
    std::vector<int> sa(n);
    std::iota(sa.begin(), sa.end(), 0);
    std::sort(sa.begin(), sa.end(), [&](int l, int r) {
        if (l == r) return false;
        while (l < n && r < n) {
            if (s[l] != s[r]) return s[l] < s[r];
            l++;
            r++;
        }
        return l == n;
    });
    return sa;
}

std::vector<int> sa_doubling(const std::vector<int>& s) {
    int n = int(s.size());
    std::vector<int> sa(n), rnk = s, tmp(n);
    std::iota(sa.begin(), sa.end(), 0);
    for (int k = 1; k < n; k *= 2) {
        auto cmp = [&](int x, int y) {
            if (rnk[x] != rnk[y]) return rnk[x] < rnk[y];
            int rx = x + k < n ? rnk[x + k] : -1;
            int ry = y + k < n ? rnk[y + k] : -1;
            return rx < ry;
        };
        std::sort(sa.begin(), sa.end(), cmp);
        tmp[sa[0]] = 0;
        for (int i = 1; i < n; i++) {
            tmp[sa[i]] = tmp[sa[i - 1]] + (cmp(sa[i - 1], sa[i]) ? 1 : 0);
        }
        std::swap(tmp, rnk);
    }
    return sa;
}

// SA-IS, linear-time suffix array construction
// Reference:
// G. Nong, S. Zhang, and W. H. Chan,
// Two Efficient Algorithms for Linear Time Suffix Array Construction
template <int THRESHOLD_NAIVE = 10, int THRESHOLD_DOUBLING = 40>
std::vector<int> sa_is(const std::vector<int>& s, int upper) {
    int n = int(s.size());
    if (n == 0) return {};
    if (n == 1) return {0};
    if (n == 2) {
        if (s[0] < s[1]) {
            return {0, 1};
        } else {
            return {1, 0};
        }
    }
    if (n < THRESHOLD_NAIVE) {
        return sa_naive(s);
    }
    if (n < THRESHOLD_DOUBLING) {
        return sa_doubling(s);
    }

    std::vector<int> sa(n);
    std::vector<bool> ls(n);
    for (int i = n - 2; i >= 0; i--) {
        ls[i] = (s[i] == s[i + 1]) ? ls[i + 1] : (s[i] < s[i + 1]);
    }
    std::vector<int> sum_l(upper + 1), sum_s(upper + 1);
    for (int i = 0; i < n; i++) {
        if (!ls[i]) {
            sum_s[s[i]]++;
        } else {
            sum_l[s[i] + 1]++;
        }
    }
    for (int i = 0; i <= upper; i++) {
        sum_s[i] += sum_l[i];
        if (i < upper) sum_l[i + 1] += sum_s[i];
    }

    auto induce = [&](const std::vector<int>& lms) {
        std::fill(sa.begin(), sa.end(), -1);
        std::vector<int> buf(upper + 1);
        std::copy(sum_s.begin(), sum_s.end(), buf.begin());
        for (auto d : lms) {
            if (d == n) continue;
            sa[buf[s[d]]++] = d;
        }
        std::copy(sum_l.begin(), sum_l.end(), buf.begin());
        sa[buf[s[n - 1]]++] = n - 1;
        for (int i = 0; i < n; i++) {
            int v = sa[i];
            if (v >= 1 && !ls[v - 1]) {
                sa[buf[s[v - 1]]++] = v - 1;
            }
        }
        std::copy(sum_l.begin(), sum_l.end(), buf.begin());
        for (int i = n - 1; i >= 0; i--) {
            int v = sa[i];
            if (v >= 1 && ls[v - 1]) {
                sa[--buf[s[v - 1] + 1]] = v - 1;
            }
        }
    };

    std::vector<int> lms_map(n + 1, -1);
    int m = 0;
    for (int i = 1; i < n; i++) {
        if (!ls[i - 1] && ls[i]) {
            lms_map[i] = m++;
        }
    }
    std::vector<int> lms;
    lms.reserve(m);
    for (int i = 1; i < n; i++) {
        if (!ls[i - 1] && ls[i]) {
            lms.push_back(i);
        }
    }

    induce(lms);

    if (m) {
        std::vector<int> sorted_lms;
        sorted_lms.reserve(m);
        for (int v : sa) {
            if (lms_map[v] != -1) sorted_lms.push_back(v);
        }
        std::vector<int> rec_s(m);
        int rec_upper = 0;
        rec_s[lms_map[sorted_lms[0]]] = 0;
        for (int i = 1; i < m; i++) {
            int l = sorted_lms[i - 1], r = sorted_lms[i];
            int end_l = (lms_map[l] + 1 < m) ? lms[lms_map[l] + 1] : n;
            int end_r = (lms_map[r] + 1 < m) ? lms[lms_map[r] + 1] : n;
            bool same = true;
            if (end_l - l != end_r - r) {
                same = false;
            } else {
                while (l < end_l) {
                    if (s[l] != s[r]) {
                        break;
                    }
                    l++;
                    r++;
                }
                if (l == n || s[l] != s[r]) same = false;
            }
            if (!same) rec_upper++;
            rec_s[lms_map[sorted_lms[i]]] = rec_upper;
        }

        auto rec_sa =
            sa_is<THRESHOLD_NAIVE, THRESHOLD_DOUBLING>(rec_s, rec_upper);

        for (int i = 0; i < m; i++) {
            sorted_lms[i] = lms[rec_sa[i]];
        }
        induce(sorted_lms);
    }
    return sa;
}

}  // namespace internal

std::vector<int> suffix_array(const std::vector<int>& s, int upper) {
    assert(0 <= upper);
    for (int d : s) {
        assert(0 <= d && d <= upper);
    }
    auto sa = internal::sa_is(s, upper);
    return sa;
}

template <class T> std::vector<int> suffix_array(const std::vector<T>& s) {
    int n = int(s.size());
    std::vector<int> idx(n);
    iota(idx.begin(), idx.end(), 0);
    sort(idx.begin(), idx.end(), [&](int l, int r) { return s[l] < s[r]; });
    std::vector<int> s2(n);
    int now = 0;
    for (int i = 0; i < n; i++) {
        if (i && s[idx[i - 1]] != s[idx[i]]) now++;
        s2[idx[i]] = now;
    }
    return internal::sa_is(s2, now);
}

std::vector<int> suffix_array(const std::string& s) {
    int n = int(s.size());
    std::vector<int> s2(n);
    for (int i = 0; i < n; i++) {
        s2[i] = s[i];
    }
    return internal::sa_is(s2, 255);
}

// Reference:
// T. Kasai, G. Lee, H. Arimura, S. Arikawa, and K. Park,
// Linear-Time Longest-Common-Prefix Computation in Suffix Arrays and Its
// Applications
template <class T>
std::vector<int> lcp_array(const std::vector<T>& s,
                           const std::vector<int>& sa) {
    int n = int(s.size());
    assert(n >= 1);
    std::vector<int> rnk(n);
    for (int i = 0; i < n; i++) {
        rnk[sa[i]] = i;
    }
    std::vector<int> lcp(n - 1);
    int h = 0;
    for (int i = 0; i < n; i++) {
        if (h > 0) h--;
        if (rnk[i] == 0) continue;
        int j = sa[rnk[i] - 1];
        for (; j + h < n && i + h < n; h++) {
            if (s[j + h] != s[i + h]) break;
        }
        lcp[rnk[i] - 1] = h;
    }
    return lcp;
}

std::vector<int> lcp_array(const std::string& s, const std::vector<int>& sa) {
    int n = int(s.size());
    std::vector<int> s2(n);
    for (int i = 0; i < n; i++) {
        s2[i] = s[i];
    }
    return lcp_array(s2, sa);
}

// Reference:
// D. Gusfield,
// Algorithms on Strings, Trees, and Sequences: Computer Science and
// Computational Biology
template <class T> std::vector<int> z_algorithm(const std::vector<T>& s) {
    int n = int(s.size());
    if (n == 0) return {};
    std::vector<int> z(n);
    z[0] = 0;
    for (int i = 1, j = 0; i < n; i++) {
        int& k = z[i];
        k = (j + z[j] <= i) ? 0 : std::min(j + z[j] - i, z[i - j]);
        while (i + k < n && s[k] == s[i + k]) k++;
        if (j + z[j] < i + z[i]) j = i;
    }
    z[0] = n;
    return z;
}

std::vector<int> z_algorithm(const std::string& s) {
    int n = int(s.size());
    std::vector<int> s2(n);
    for (int i = 0; i < n; i++) {
        s2[i] = s[i];
    }
    return z_algorithm(s2);
}

}  // namespace atcoder


#line 1 "tools/chmin.hpp"



#line 6 "tools/chmin.hpp"

namespace tools {

  template <typename M, typename N>
  bool chmin(M& lhs, const N& rhs) {
    bool updated;
    if constexpr (::std::is_integral_v<M> && ::std::is_integral_v<N>) {
      updated = ::std::cmp_less(rhs, lhs);
    } else {
      updated = rhs < lhs;
    }
    if (updated) lhs = rhs;
    return updated;
  }
}


#line 1 "tools/chmax.hpp"



#line 6 "tools/chmax.hpp"

namespace tools {

  template <typename M, typename N>
  bool chmax(M& lhs, const N& rhs) {
    bool updated;
    if constexpr (::std::is_integral_v<M> && ::std::is_integral_v<N>) {
      updated = ::std::cmp_less(lhs, rhs);
    } else {
      updated = lhs < rhs;
    }
    if (updated) lhs = rhs;
    return updated;
  }
}


#line 16 "tools/suffix_array.hpp"

namespace tools {

  class suffix_array {
  public:
    ::std::vector<::std::pair<int, int>> sa;
    ::std::vector<int> lcpa;

    suffix_array() = default;
    template <typename InputIterator>
    suffix_array(const InputIterator begin, const InputIterator end) {
      using Container = ::std::decay_t<decltype(*::std::declval<InputIterator>())>;
      static_assert(
        ::std::is_same_v<Container, ::std::string> ||
        ::std::is_same_v<Container, ::std::vector<int>> ||
        ::std::is_same_v<Container, ::std::vector<unsigned int>> ||
        ::std::is_same_v<Container, ::std::vector<long long>> ||
        ::std::is_same_v<Container, ::std::vector<unsigned long long>>
      );

      if (begin == end) return;

      ::std::vector<Container> orig(begin, end);

      ::std::vector<::std::size_t> offsets(orig.size());
      offsets[0] = 0;
      for (::std::size_t i = 1; i < orig.size(); ++i) {
        offsets[i] = offsets[i - 1] + orig[i - 1].size() + 1;
      }

      ::std::vector<int> concat;
      concat.reserve(offsets.back() + orig.back().size() + 1);
      int upper;
      if constexpr (::std::is_same_v<Container, ::std::string>) {
        int lower = ::std::numeric_limits<int>::max();
        upper = ::std::numeric_limits<int>::min();
        for (const auto& s : orig) {
          for (const auto s_i : s) {
            ::tools::chmin(lower, s_i);
            ::tools::chmax(upper, s_i);
          }
        }

        for (const auto& s : orig) {
          for (const auto s_i : s) {
            concat.push_back(s_i + (lower == 0));
          }
          concat.push_back(0);
        }
        if (lower == 0) ++upper;
      } else {
        Container compress;
        compress.reserve(concat.size() - orig.size());
        for (const auto& s : orig) {
          ::std::copy(s.begin(), s.end(), ::std::back_inserter(compress));
        }
        ::std::sort(compress.begin(), compress.end());
        compress.erase(::std::unique(compress.begin(), compress.end()), compress.end());

        for (const auto& s : orig) {
          for (const auto s_i : s) {
            concat.push_back(::std::distance(compress.begin(), ::std::lower_bound(compress.begin(), compress.end(), s_i)) + 1);
          }
          concat.push_back(0);
        }

        upper = compress.size();
      }

      ::std::vector<::std::size_t> belongs;
      belongs.reserve(concat.size());
      for (::std::size_t i = 0; i < orig.size(); ++i) {
        for (::std::size_t j = 0; j <= orig[i].size(); ++j) {
          belongs.push_back(i);
        }
      }

      const auto concat_sa = ::atcoder::suffix_array(concat, upper);
      this->lcpa = ::atcoder::lcp_array(concat, concat_sa);
      this->sa.reserve(concat_sa.size());
      for (const auto ij : concat_sa) {
        const int i = belongs[ij];
        const int j = ij - offsets[i];
        this->sa.emplace_back(i, j);
      }

      for (::std::size_t i = 0; i < this->lcpa.size(); ++i) {
        ::tools::chmin(this->lcpa[i], orig[this->sa[i].first].size() - this->sa[i].second);
        ::tools::chmin(this->lcpa[i], orig[this->sa[i + 1].first].size() - this->sa[i + 1].second);
      }

      this->erase_if([&](const int i, const int j) {
        return ::std::cmp_equal(j, orig[i].size());
      });
    }
    template <typename Container>
    suffix_array(const ::std::initializer_list<Container> il) : suffix_array(il.begin(), il.end()) {
    }

    template <typename Predicate>
    ::std::size_t erase_if(const Predicate pred) {
      if (this->sa.empty()) return 0;

      const auto N = this->sa.size();
      ::std::size_t erased = 0;

      ::std::size_t sa_vl = 0, lcpa_vl = 0;
      for (::std::size_t sa_vr = 0, sa_al = 0, sa_ar = 0, lcpa_vr = 0; sa_al < N; sa_vl = sa_vr, sa_al = sa_ar, lcpa_vl = lcpa_vr) {
        const bool removes = pred(this->sa[sa_al].first, this->sa[sa_al].second);

        for (; sa_ar < N && removes == pred(this->sa[sa_ar].first, this->sa[sa_ar].second); ++sa_vr, ++sa_ar);
        if (sa_vl < sa_al) ::std::move(this->sa.begin() + sa_al, this->sa.begin() + sa_ar, this->sa.begin() + sa_vl);

        const auto lcpa_al = sa_al == 0 ? 0 : sa_al - removes;
        const auto lcpa_ar = ::std::min(sa_ar - !removes, N - 1);
        lcpa_vr = lcpa_vl + (lcpa_ar - lcpa_al);
        if (lcpa_vl < lcpa_al) ::std::move(this->lcpa.begin() + lcpa_al, this->lcpa.begin() + lcpa_ar, this->lcpa.begin() + lcpa_vl);

        if (removes) {
          erased += sa_vr - sa_vl;
          sa_vr = sa_vl;
          if (0 < sa_al && sa_ar < N) {
            this->lcpa[lcpa_vl] = *::std::min_element(this->lcpa.begin() + lcpa_vl, this->lcpa.begin() + lcpa_vr);
            lcpa_vr = lcpa_vl + 1;
          } else {
            lcpa_vr = lcpa_vl;
          }
        }
      }

      this->sa.erase(this->sa.begin() + sa_vl, this->sa.end());
      this->lcpa.erase(this->lcpa.begin() + lcpa_vl, this->lcpa.end());

      return erased;
    }
  };
}


Back to top page