字符串算法（二）

可持久化 Trie

trie

用 $\text{root}[\cdots]$ 数组来定位每个根节点，不妨设 $\text{trie}$ 中字符集合为 $C$ ，全体字符集为 $A$
当前插入第 $i$ 个字符串，即执行第 $i$ 个版本， $p \leftarrow \text{root}(i-1)$
新建一个节点 $q$ ，即 $q = root(i) = ++tot$ ，假设当前插入字符 $S_j$
如果 $p \neq 0$ ，对于 $\forall c \in \{C\}, \ c \neq S_j$ ，令 $t(q, c) \leftarrow t(p, c)$
（这一步可以检查 $\forall c \in \{A\}$ 全体字符集，令 $t(q, c) \leftarrow t(p, c)$ ，因为下一步 $t(q, S_j)$ 会重新定位到新开的节点上）
新建一个节点，令 $t(q, S_j) = ++tot$ ，即除了 $S_j$ 指针不同外， $p, q$ 的其余信息完全相同
$p \leftarrow t(p, S_j), q \leftarrow t(q, S_j), j \leftarrow j+1$ 直到字符串结尾

最大异或和
可以类似引入一个异或前缀和的概念

$\bigoplus_{i = p}^{n} a_p = S_n \oplus S_{p-1}$

对于添加操作，很简单 $S_{n+1} = S_n \oplus x, \ n = n+1$
如果令 $p' = p-1, \ l-1 \leqslant p' \leqslant r-1$ ，询问操作实际上就是令 $val = x \oplus S_n$
求一个位置 $p$ ，满足 $l-1 \leqslant p \leqslant r-1$ ，使得 $S_p \oplus val$ 最大
这个问题如果没有 $p \in [l-1, r-1]$ 的限制，就是最大异或和问题
对于 $p \leqslant r-1$ ，可以借鉴主席树思想，对 $\text{trie}$ 进行可持久化，在第 $r-1$ 个版本
即 $\text{root}(r-1)$ 中查询最大异或和路径
（ $p = \text{root}(r-1)$ ，从高位到低位尽可能沿着和 $val$ 相反的位走）
对于 $p \geqslant l-1$ $p ⩾ l - 1$ ，只要保证异或和路径上所经过点的时间戳 $\geqslant l-1$ 即可
对于插入操作 $\text{insert}(pre, p, i)$ $insert (p r e, p, i)$ 表示插入第 $i$ $i$ 个字符串
$k$ $k$ 从高位到低位遍历，此时第 $k$ $k$ 位的字符为 $c = S_i \gg k \ \& \ 1$ $c = S_{i} ≫ k & 1$
- 如果 $pre \neq 0$ ，令 $t(p, c\oplus 1) \leftarrow t(pre, c\oplus 1)$
- $t(p, c) = ++tot$ ，于此同时标记节点时间戳 $dfn(p) = i$ ，然后和主席树一样同步往下走
  $p \leftarrow t(p, c), \ pre \leftarrow t(pre, c), \textbf{then} \ dfn(p) = i$

const int N = 600000 + 5;
const int maxn = N * 25;
int n, m, s[N], root[N];

class Trie {
public:
    int t[maxn][2], dfn[maxn];
    int tot;

    Trie() {
        tot = 0;
        memset(t, 0, sizeof 0);
        memset(dfn, 0, sizeof 0);
        dfn[0] = -1;
    }

    void insert(int pre, int p, int ver) {
        dfn[p] = ver;
        for (int k = 25; k >= 0; k--) {
            int c = s[ver] >> k & 1;
            t[p][c^1] = t[pre][c^1];
            t[p][c] = ++tot;
            p = t[p][c], pre = t[pre][c];
            dfn[p] = ver;
        }
    }

    int ask(int p, int val, int lim) {
        for (int k = 25; k >= 0; k--) {
            int c = val >> k & 1;
            if (dfn[ t[p][c^1] ] >= lim) p = t[p][c^1];
            else p = t[p][c];
        }
        return s[dfn[p]] ^ val;
    }
} trie;

int main() {
    freopen("input.txt", "r", stdin);
    cin >> n >> m;

    // init
    for (int i = 1; i <= n; i++) {
        int x;
        scanf("%d", &x);
        s[i] = s[i-1] ^ x;
        root[i] = ++trie.tot;
        trie.insert(root[i-1], root[i], i);
    }
    while (m--) {
        char cmd[2];
        scanf("%s", cmd);
        if (cmd[0] == 'A') {
            int x;
            scanf("%d", &x);
            root[++n] = ++trie.tot;
            s[n] = s[n-1] ^ x;
            trie.insert(root[n-1], root[n], n);
        }
        else {
            int l, r, x;
            scanf("%d%d%d", &l, &r, &x);
            int res = trie.ask(root[r-1], s[n]^x, l-1);
            printf("%d\n", res);
        }
    }
}

树上可持久化 Trie

树上异或

利用可持久化线段树的思路，可以设计出如下算法

$\textbf{dfs}(u)$ 对于 $(u, v)$ ，将 $v$ 看作一个基于 $u$ 的新的版本，建立可持久化 $\text{trie}$
$\text{root}(v) = ++tot, \quad \text{insert}(root(u), root(v))$
注意保存版本信息， $\text{ver}(v) = \text{ver}(u) + 1$
查询操作呢？对于 $\text{ask}(x, y, val)$
不妨令 $d = \text{LCA}(x, y), \ fa \leftarrow fa(\text{lca}(x, y))$
此时分别查询 $\text{root}(x)$ 和 $\text{root}(y)$ ，然后取一个 $\max$ 即可
但是这里是有限制的，在对可持久化 $\text{trie}$ 查询的时候，必须保证 $x, y$ 是 $fa$ 的子节点
这不难，只要让 $p = root(fa), q = root(x)$ 同步往下走，保证 $q$ 经过的点的版本号 $\text{ver}(q) > \text{ver}(p)$
具体来说，以 $root(x)$ 为例，调用 $\text{ask}(p, q, val) = \text{ask}(root(fa), root(x), val)$
从高位到低位检查 $val$ 的位，假设此时第 $k$ 位是 $c$ ， $p, q$ 尽可能沿着 $c \oplus 1$ 的指针往下走
$p \leftarrow t(p, c \oplus 1), q \leftarrow t(q, c \oplus 1), res = res + (1 \ll k)$
如果没有 $c \oplus 1$ 的指针，那么只好沿着 $c$ 指针走，这和最大异或和解决方法大同小异

const int N = 1e5 + 5;
const int maxn = N * 32, H = 17;
int a[N], root[N], n, m;

class Graph {
public:
    int n;
    int tot;
    vector<int> head, ver, ne;

    Graph(int _n) : n(_n) {
        tot = 1;
        head.resize(n), ver.resize(n<<1), ne.resize(n<<1);
    }
    void clear() {
        tot = 1;
        fill(head.begin(), head.end(), 0);
    }

    void add(int a, int b) {
        ver[++tot] = b; ne[tot] = head[a]; head[a] = tot;
    }
} G(N);

class Trie {
public:
    int tot;
    int t[maxn][2], ver[maxn];

    Trie() {
        tot = 0;
        memset(t, 0, sizeof t);
        memset(ver, 0, sizeof ver);
    }
    void clear() {
        tot = 0;
        memset(t, 0, sizeof t);
        memset(ver, 0, sizeof ver);
    }

    void insert(int p, int q, int val) {
        ver[q] = ver[p] + 1;
        for (int k = 16; k >= 0; k--) {
            int c = val >> k & 1;
            t[q][c^1] = t[p][c^1];
            t[q][c] = ++tot;

            p = t[p][c], q = t[q][c];
            ver[q] = ver[p] + 1;
        }
    }

    int query(int fa, int p, int val) {
        int res = 0;
        for (int k = 16; k >= 0; k--) {
            int c = val >> k & 1;
            if (ver[ t[fa][c^1] ] < ver[ t[p][c^1] ]) {
                res += (1<<k); fa = t[fa][c^1], p = t[p][c^1];
            }
            else fa = t[fa][c], p = t[p][c];
        }
        return res;
    }

} trie;

int fa[N][H], dep[N];

void dfs(int u, int pa) {
    fa[u][0] = pa, dep[u] = dep[pa] + 1;
    for (int i = 1; i < H; i++) fa[u][i] = fa[fa[u][i-1]][i-1];
    root[u] = ++trie.tot, trie.insert(root[pa], root[u], a[u]);

    for (int i = G.head[u]; i; i = G.ne[i]) {
        int v = G.ver[i];
        if (v == pa) continue;
        dfs(v, u);
    }
}

int lca(int x, int y) {
    if (dep[y] < dep[x]) swap(x, y);
    for (int i = H-1; i >= 0; i--) {
        if (dep[ fa[y][i] ] >= dep[x]) y = fa[y][i];
    }
    if (y == x) return y;
    for (int i = H-1; i >= 0; i--) {
        if (fa[y][i] != fa[x][i]) y = fa[y][i], x = fa[x][i];
    }
    return fa[x][0];
}

int main() {
    freopen("input.txt", "r", stdin);
    while (~scanf("%d%d", &n, &m)) {
        // init
        trie.clear(), G.clear();
        memset(root, 0, sizeof root);
        memset(fa, 0, sizeof fa);
        memset(dep, 0, sizeof dep);

        for (int i = 1; i <= n; i++) scanf("%d", &a[i]);
        for (int i = 0; i < n-1; i++) {
            int u, v;
            scanf("%d%d", &u, &v);
            G.add(u, v), G.add(v, u);
        }

        // dfs
        dfs(1, 0);

        // query
        while (m--) {
            int x, y, val;
            scanf("%d%d%d", &x, &y, &val);
            int f = fa[lca(x, y)][0];
            int res = max(trie.query(root[f], root[x], val), trie.query(root[f], root[y], val));
            printf("%d\n", res);
        }
    }
}

树上可持久化 Trie 综合

异或粽子

考虑前缀和 $s[i]$ ，原问题等价于对于一个点 $p$ ，找到 $s[1\cdots p-1]$ 的一个点 $p'$
使得 $s[p'] \oplus s[p]$ 最大，可以考虑使用可持久化 $\text{trie}$

对于可持久化 $\text{trie}$ ， $\text{root}(p-1)$ 这个版本中只有区间 $[1\cdots p-1]$ 的信息
利用主席树的思想，可以解决可持久化 $\text{trie}$ 的 $\text{k-query}$ 问题

对于区间 $[1\cdots r]$ ，要找到一个 $l \in [1\cdots r-1]$ ，使得 $s_l \oplus s_r$ 为第 $k$ 大
令 $p \leftarrow \text{root}(r-1), \ val \leftarrow s[r]$ ，从高位到低位检查 $val$ 的第 $b$ 位 $c$
如果 $\textbf{size}(t(p, c \oplus 1)) \geqslant k$ ，那么 $res += (1 \ll b), \ p \leftarrow t(p, c\oplus 1)$
否则的话， $k' \leftarrow k - \textbf{size}(t(p, c\oplus 1))$ ， $p \leftarrow t(p, c)$ ，递归在 $t(p, c)$ 子树查找第 $k'$ 大

需要注意的是边界，想要让 $r = 1$ 时有意义，必须提前在 $\text{trie}$ 树中插入 $\text{insert}(\text{root}(0), 0)$
表示在 $\text{root}(0)$ 初始化插入一个每个位都是 $0$ 的数

具体来说

对于 $H$ 位的数 $val$ ，由于要统计 $\text{size}$ 信息，所以递归地插入
$\textbf{insert}(pre, p, H, val)$ ，递归的边界是 $H < 0, \text{size}(p) = \text{size}(pre) + 1$
（ $H = 0$ 时插入最后一个字符 $c$ ，递归执行 $t(p, c)$ 之后，边界 $H = -1$ ）
$res \leftarrow (i, rk(i))$ ，表示在区间 $[1\cdots i-1]$ 中找到一个 $j$
使得 $res = s_j \oplus x_i$ 为第 $rk(i)$ 大，很显然一开始 $rk(i) = 1$
建立一个优先队列 $\text{que}$ ，对于 $\forall \ r \in [1, n]$
将 $\textbf{ask}(\text{root}(r-1), rk(r), s[r])$ 的结果放入 $\text{que}$ 中
取出堆顶元素，此时堆中最大元素假设为 $(res, p)$
表示此时 $\exist l \in [1, p)$ ，使得 $s_l \oplus s_p$ 为第 $1$ 大，其值为 $res$
将其累加到答案中，删掉 $s_l$ ，注意要接着找到 $[1, l-1] \cup [l+1, p)$ 中第 $1$ 大，将其放入堆中
注意到 $[1, l-1] \cup [l+1, p)$ 中的第 $1$ 大，等价于 $[1, p)$ 中的第 $2$ 大
由此在编程实现上可以更简单一些，一开始令 $rk(i) = 1$ ，取出堆顶元素 $(res, p)$ 之后
执行查询 $res' \leftarrow \textbf{ask}(\text{root}(p), ++rk(p), s[p])$ ，再继续将 $res'$ 放入堆中

typedef pair<ll, int> PII;
const int maxn = 500000 + 10, N = maxn * 35;
const int H = 33;
int n, k, rk[maxn], root[maxn];
ll s[maxn];
priority_queue<PII> heap;

// insert(pre, p, H, val)
// ask(root(p), rk, &ans)

class Trie {
public:
    int tot;
    int t[N][2], sz[N];

    Trie() {
        tot = 0;
        memset(t, 0, sizeof t);
        memset(sz, 0, sizeof sz);
    }

    void insert(int pre, int p, int H, ll val) {
        if (H < 0) {
            sz[p] = sz[pre] + 1;
            return;
        }
        int c = val >> H & 1;
        if (pre) t[p][c^1] = t[pre][c^1];
        t[p][c] = ++tot;
        insert(t[pre][c], t[p][c], H-1, val);
        sz[p] = sz[t[p][c]] + sz[t[p][c^1]];
    }

    void ask(int p, int rk, int H, ll val, ll &res) {
        if (H < 0) return;
        int c = val >> H & 1;
        if (sz[ t[p][c^1] ] >= rk) {
            res = (res << 1 | 1);
            ask(t[p][c^1], rk, H-1, val, res);
        }
        else {
            res <<= 1;
            ask(t[p][c], rk - sz[t[p][c^1]], H-1, val, res);
        }
    }
} trie;

void solve() {
    for (int i = 1; i <= n; i++) {
        ll res = 0;
        trie.ask(root[i-1], rk[i], H, s[i], res);
        heap.push({res, i});
    }
    ll ans = 0;
    while (k--) {
        auto x = heap.top(); heap.pop();
        ans += x.first;
        int r = x.second;
        ll res = 0;
        trie.ask(root[r-1], ++rk[r], H, s[r], res);
        heap.push({res, r});
    }
    printf("%lld\n", ans);
}

int main() {
    freopen("input.txt", "r", stdin);
    // init
    memset(root, 0, sizeof root);

    scanf("%d%d", &n, &k);
    for (int i = 1; i <= n; i++) {
        ll x;
        scanf("%lld", &x);
        s[i] = s[i-1] ^ x;
        rk[i] = 1;
    }

    // per trie
    root[0] = ++trie.tot;
    trie.insert(0, root[0], H, 0);
    for (int i = 1; i <= n; i++) {
        root[i] = ++trie.tot;
        trie.insert(root[i-1], root[i], H, s[i]);
    }

    // solve
    solve();
}

AC 自动机

AC 自动机实际上是 $\text{trie}$ 树上的自动机，是基于 $\text{kmp}$ 思想的
假设在 $\text{kmp}$ 模式匹配中，模板有很多个，这个时候怎么办呢？
一个很直观的想法是把所有模式串插入 $\text{trie}$ 树中，再在 $\text{trie}$ 树中加上失配边
AC-01

AC 自动机字典树的构建

对于模式串 $s_1, s_2, \cdots s_n$ ，将它们全部插入 $\text{trie}$ 树中，此时构成的所有状态集合称为 $Q$
实际上， $Q$ 中每个状态节点对应一个模式串的一个前缀
假设当前的状态节点为 $u$ ， $\textbf{fail}(u) = v$ ，其中 $v \in Q$ ，并且 $v$ 是 $u$ 的最长后缀
换句话说， $\textbf{fail}$ 指针检查所有模式串的任意前缀，找到能和当前串后缀匹配上的
并且要使得匹配上的部分最长

AC 自动机的 fail 指针
AC-02

fail 指针的构建
构建过程类似 $\text{kmp}$ 算法，不过这里是用 $\text{bfs}$ 来递推
不妨设上一个阶段的最后节点为 $p$ （即 $\text{bfs}$ 的队头节点）, 当前正在添加字符 $c$ ，即 $t(p, c) = u$
假设深度小于 $u$ 的所有指针已经求出来了，那么 $\textbf{fail}(u)$ 应该指向谁？
这里要关注 $q = \textbf{fail}(p)$

如果 $t(\text{fail}(p), c) = t(q, c) \neq \textbf{null}$ ，那么加入字符 $c$
相当于在 $p$ 和 $\textbf{fail}(p)$ 后面同时接上 $c$ ，只要令 $\textbf{fail}(u) = t(\text{fail}(p), c)$ 即可
如果 $t(\text{fail}(p), c) = t(q, c) = \textbf{null}$ ，类似 $\text{kmp}$ 算法，不断沿着失配边走
直到走到一个存在 $t(q, c) \neq \text{null}$ 的转移
$q \leftarrow \text{fail}(p)$ ， $\quad \textbf{while} \ q \neq \text{null} \ \textbf{and} \ t(q, c) = \textbf{null}: \ q = \textbf{fail}(q)$
此时令 $\textbf{fail}(u) = t(q, c)$
（如果一直找不到，那么 $q$ 最后就会走到根节点）
（如果还是不存在 $t(0, c)$ ，那么 $t(0, c) = 0$ ，还是指向根节点，并不影响答案）

失配优化
容易证明上述算法的正确性，对于 $t(S, c)$ 相当于在 $S$ 后面添加一个字符 $c$ 变成另一个状态 $S'$
注意到 $\textbf{fail}(S)$ 恰好是 $S$ 的一个后缀，所以上述过程相当于
同步在 $\{S, \textbf{fail}(S)\}$ 后面添加字符 $c$ ，由此 $t(\textbf{fail}(S), c)$ 仍然是 $S'$ 的后缀

那如果 $t(\textbf{fail}(S), c)$ 不存在呢？我们要反复沿着失配边走，直到 $t(\textbf{fail}(S), c) \neq \textbf{null}$
这就造成了额外时间开销，能不能一步到位呢？
AC-03

实际上

while (q.size()) {
    auto u = q.front(), q.pop();
    for (int i = 0; i < 26; i++) {
        if (t[u][i]) { ... }
        else t[u][i] = t[ fail[u] ][i];
    }
}

last 优化
由于 $\textbf{fail}$ 指针不一定都指向表示单词的节点，举个例子
对于模式串 $\{\text{sher, his, hep}\}$ ，那么很显然节点 $9$ 与节点 $2$ 都不是单词节点
那么输出具体方案的时候，我们就可以跳过这些节点

在 $\text{trie}$ 树中， $\textbf{val}(j) > 0$ 的节点我们认为是单词节点
这样就可以考虑增加一个指针 $\textbf{last}(i)$ ，表示从节点 $i$ 处沿着失配指针往回走，
遇到的下一个单词节点的编号，也叫后缀链接

while (q.size()) {
    auto u = q.front(), q.pop();
    for (int i = 0; i < 26; i++) {
        if (t[u][i]) { ... }
        else t[u][i] = t[ fail[u] ][i];
    }

    // 增加 last 指针
    last[u] = val[fail[u]] ? fail[u] : last[fail[u]];
}

AC 自动机的实现

AC 自动机模版

模版串多但是短，文本串很长，正好适合用 AC 自动机，由于要统计模版串出现的次数，也就是模版串能够匹配上的次数
可以这样设计算法

$\text{M}(S_i) = idx$ ，将模版串 $S_i$ 映射为相应的字符串的编号 $idx$ ，执行插入 $\textbf{insert}(S_i, idx)$
这样就可以先在 AC 自动机中执行统计，如果 $p$ 是单词节点，在 $\text{trie}$ 中令 $\text{val}(p) = idx$
对于 $\text{trie}$ 中的节点 $p$ ，如果是单词节点，其匹配上的次数记为 $\textbf{cnt}(\text{val}(p))$
然后离线处理所有模版串， $\forall i \in [1, n], \ res \leftarrow \max (res, \textbf{cnt}(i))$
如果要输出具体的方案，同样可以离线处理，对所有的模版串 $\forall i \in [1, n]$
如果 $\textbf{cnt}(M[S_i]) = res$ ，那么就将 $S_i$ 输出
值得注意的是，如果模版串有重复，插入 $\text{trie}$ 树时，后面的串会覆盖前一个
解决方法很简单，以后出现的为准，只要每次插入到达单词结尾时，更新 $M(S_i) = idx$ 即可
那么问题就只剩下如何统计 $\textbf{cnt}(\text{val}(p))$
$\textbf{find}(T)$ 在文本串 $T$ 中寻找能够匹配的所有模版串
$\forall i \in [1, \text{len}(T)]$ ，与此同时 $p = 0$ 从根节点开始遍历 AC 自动机的 $\text{trie}$ 树
$c \leftarrow T[i], \ p \leftarrow t(p, c)$
如果 $\text{trie}$ 树中的节点 $p$ 为单词节点，那么它表示的单词编号为 $\text{val}(p)$
- 如果 $val(p) \neq 0$ ， $p$ 为单词节点，也就是说 $p$ 表示的单词能匹配上文本串 $T$ 的一个位置， $\textbf{cnt}(\text{val}(p))++$
  另外， $p$ 的所有后缀链接也一定能匹配上 $T$ ，相应节点对应单词的 $\textbf{cnt}$ 也应该 $++$
  这里就需要用到之前的 $\text{last}$ 数组， $\textbf{dfs}(\text{last}(p))$ 递归统计所有后缀链接
- 如果 $val(p) = 0$ ，但是 $\text{last}(p) \neq 0$ ，就说明存在一个后缀单词能够匹配上
  需要 $\textbf{dfs}(\text{last}(p))$ 进行递归统计

const int maxn = 1e6 + 5;
const int N = (150 + 5) * 80;
map<string, int> M;
int n, cnt[N];
string text, P[N];

class AC {
public:
    int t[N][27], fail[N], val[N], last[N];
    int tot;
    AC() {
        memset(t, 0, sizeof t);
        memset(fail, 0, sizeof fail);
        memset(val, 0, sizeof val);
        memset(last, 0, sizeof last);
        tot = 0;
    }
    void clear() {
        tot = 0;
        memset(t, 0, sizeof t);
        memset(fail, 0, sizeof fail);
        memset(val, 0, sizeof val);
        memset(last, 0, sizeof last);
    }

    void insert(const string &str, int idx) {
        int p = 0;
        for (auto x : str) {
            int c = x-'a';
            if (!t[p][c]) t[p][c] = ++tot;
            p = t[p][c];
        }
        val[p] = idx;
    }

    void build() {
        queue<int> que;
        for (int i = 0; i < 26; i++) if (t[0][i]) {
            que.push(t[0][i]);
        }

        while (que.size()) {
            auto u = que.front(); que.pop();
            for (int i = 0; i < 26; i++) {
                if (t[u][i]) {
                    fail[ t[u][i] ] = t[ fail[u] ][i];
                    que.push(t[u][i]);
                }
                else t[u][i] = t[ fail[u] ][i];
            }
            last[u] = val[fail[u]] ? fail[u] : last[ fail[u] ];
        }
    }

    void dfs(int p) {
        if (p) {
            cnt[val[p]]++;
            dfs(last[p]);
        }
    }

    void find(const string &str) {
        int p = 0;
        for (auto x : str) {
            int c = x-'a';
            p = t[p][c];
            if (val[p]) dfs(p);
            else if (last[p]) dfs(last[p]);
        }
    }
} ac;

void solve() {
    int res = -1;
    for (int i = 1; i <= n; i++) res = max(res, cnt[i]);
    printf("%d\n", res);
    for (int i = 1; i <= n; i++) if (cnt[M[P[i]]] == res) {
            printf("%s\n", P[i].c_str());
    }
}

int main() {
    freopen("input.txt", "r", stdin);
    while (scanf("%d", &n) == 1 && n) {
        // init
        ac.clear();
        M.clear();
        memset(cnt, 0, sizeof cnt);

        // get data
        for (int i = 1; i <= n; i++) {
            cin >> P[i];
            M[P[i]] = i;
            ac.insert(P[i], i);
        }

        // ac automata
        cin >> text;
        ac.build();
        ac.find(text);

        // solve
        solve();
    }
}

AC 自动机和全概率

substring

其实就是从 AC 自动机根节点 $p = 0$ 处遍历，求全概率
AC 自动机节点 $u$ 若为单词节点，则 $\text{word}(u) = 1$
那么可以根据全概率公式设计出如下算法

$f(u, L)$ 表示当前在 AC 自动机的 $u$ 节点，还要走 $L$ 步，此时满足条件的概率，可以用记忆化搜索
$ans \ \& \leftarrow f(u, L)$ , $\forall \ \textbf{for} \ c$
$\quad \text{if} \ \text{word}(t(u, c)) = 0: \quad ans += \text{prob}(c) \cdot f(t(u, c), L-1)$

边界为 $L \leqslant 0, \quad \text{return} \ 1$
所求的答案为 $f(0, L)$

另外值得注意的是，构建 AC 自动机 $\text{last}$ 优化处理的时候，令 $\text{word}(u) \mid = \text{word}(\text{fail}(u))$

const int maxn = 400 + 5;
const int N = 100 + 5, SZ = 64;
string P[N];
int K, n, L;
double prob[SZ];

inline int get(const char ch) {
    if (isupper(ch)) return ch-'A';
    else if (islower(ch)) return 26 + ch-'a';
    else return 52 + ch-'0';
}

class AC {
public:
    int t[maxn][SZ], word[maxn], fail[maxn];
    int tot = 0;
    void clear() {
        tot = 0;
        memset(t, 0, sizeof t);
        memset(word, 0, sizeof word);
        memset(fail, 0, sizeof fail);
    }

    void insert(const string &str) {
        int p = 0;
        for (auto x : str) {
            int c = get(x);
            if (!t[p][c]) t[p][c] = ++tot;
            p = t[p][c];
        }
        word[p] = 1;
    }

    void build() {
        queue<int> que;
        int p = 0;
        for (int i = 0; i < SZ; i++) if (t[p][i]) {
            que.push(t[p][i]);
        }

        while (que.size()) {
            auto u = que.front(); que.pop();
            for (int i = 0; i < SZ; i++) {
                if (t[u][i]) {
                    fail[ t[u][i] ] = t[ fail[u] ][i], que.push(t[u][i]);
                }
                else t[u][i] = t[fail[u]][i];
            }
            word[u] |= word[fail[u]];
        }
    }
} ac;

double dp[maxn][N];
int vis[maxn][N];
double f(int u, int L) {
    if (L <= 0) return 1.0;
    if (vis[u][L]) return dp[u][L];
    vis[u][L] = 1;

    double &ans = dp[u][L];
    ans = 0.0;
    for (int i = 0; i < SZ; i++) {
        if (!ac.word[ ac.t[u][i] ]) ans += prob[i] * f(ac.t[u][i], L-1);
    }
    return ans;
}

int main() {
    freopen("input.txt", "r", stdin);
    int T;
    cin >> T;
    int kase = 0;
    while (T--) {
        // init
        printf("Case #%d: ", ++kase);
        ac.clear();
        memset(prob, 0, sizeof prob);
        memset(vis, 0, sizeof vis);
        memset(dp, 0, sizeof dp);

        cin >> K;
        for (int i = 1; i <= K; i++) {
            cin >> P[i];
            ac.insert(P[i]);
        }
        cin >> n;
        for (int i = 1; i <= n; i++) {
            char ch;
            cin >> ch;
            scanf("%lf", &prob[get(ch)]);
            // debug(prob[get(ch)]);
        }
        cin >> L;

        // then solve
        ac.build();
        double res = f(0, L);
        printf("%.6lf\n", res);
    }
}

AC 自动机与二维匹配

二维哈希实现矩阵匹配

求矩阵哈希的时候，注意行和列要用不同的哈希值，根据前缀和思想

$h(i, j) = h(i-1, j) \cdot P_1 + h(i, j-1) \cdot P_2 - h(i-1, j-1) \cdot P_1 \cdot P_2 + a(i, j)$

对于任意矩形 $(x_1, y_1, x_2, y_2)$ ，可以根据前缀和求出哈希值

$\begin{gathered} \text{Hash}(x_1, y_1, x_2, y_2) = h(x_2, y_2) - h(x_1-1, y_2) \cdot P_1^{x_2-x_1+1} - h(x_2, y_1 - 1) \cdot P_2^{y_2 - y_1 + 1} \\ + h(x_1-1, y_1 - 1) \cdot P_1^{x_2 - x_1 + 1} \cdot P_2^{y_2-y_1 + 1} \end{gathered}$

Matrix Matcher

typedef unsigned long long ull;
const int P1 = 13331, P2 = 131;
int n1, m1, n2, m2;
const int maxn = 1000 + 10;
char s1[maxn][maxn], s2[maxn][maxn];
ull h1[maxn][maxn], h2[maxn][maxn], p1[maxn*maxn], p2[maxn*maxn];

void pre() {
    p1[0] = p2[0] = 1;
    for (int i = 1; i <= maxn; i++) {
        p1[i] = p1[i-1] * P1;
        p2[i] = p2[i-1] * P2;
    }
}

void getHash(const char s1[][maxn], ull h[][maxn], int n, int m) {
    for (int i = 1; i <= n; i++) {
        for (int j = 1; j <= m; j++) {
            h[i][j] = h[i-1][j] * P1 + h[i][j-1] * P2 - h[i-1][j-1] * P1 * P2 + (s1[i][j]-'a');
        }
    }
}

ull Hash(const ull h[][maxn], int x1, int y1, int x2, int y2) {
    return h[x2][y2] - h[x1-1][y2] * p1[x2-x1+1] - h[x2][y1-1] * p2[y2-y1+1]
        + h[x1-1][y1-1] * p1[x2-x1+1] * p2[y2-y1+1];
}

void solve() {
    ll ans = 0;
    for (int i = 1; i + n2 - 1 <= n1; i++) {
        for (int j = 1; j + m2 - 1 <= m1; j++) {
            if (Hash(h1, i, j, i+n2-1, j+m2-1) == h2[n2][m2]) ans++;
        }
    }
    printf("%lld\n", ans);
}

int main() {
    freopen("input.txt", "r", stdin);
    // get mi
    pre();

    int T;
    cin >> T;
    while (T--) {
        // init
        scanf("%d%d", &n1, &m1);
        for (int i = 1; i <= n1; i++) scanf("%s", s1[i]+1);
        getHash(s1, h1, n1, m1);

        scanf("%d%d", &n2, &m2);
        for (int i = 1; i <= n2; i++) scanf("%s", s2[i]+1);
        getHash(s2, h2, n2, m2);

        // then solve
        ull res = Hash(h1, 0, 0, 1, 1);
        solve();
    }
}

AC 自动机实现二维匹配

矩阵 $\bold{P}(x \times y), \bold{T}(n \times m)$
很容易想到，将 $P$ 的第 $i$ 行， $\forall i \in [1, x], \ P[i]$ 插入 AC 自动机中
执行 $\text{insert}(P[i], i)$ ，这里需要在行结尾（即字符串末尾节点） $u$ ，维护一个 $\text{vector}$
$\text{vec}[u] = \{\cdots \}$ ，存储 $u$ 这个点是哪些行的结尾？
如果遍历 AC 自动机走到了某个行尾节点， $\text{vec}[u]$ 表示 $P$ 中能和 $T$ 匹配上的行有哪些

查询的时候，对 $T$ 的每一行执行 $\text{find}(T[u])$
接着根据字符串 $T[u]$ 执行标准的 AC 自动机查找
$\text{len}(T[u]) = m, \quad \forall i \in [1 \cdots \text{len}(T[u])]$ ，遍历 AC 自动机
从 $p = 0$ 开始沿着 $c = T[u][i]$ 走
对于某个 $i$ ，此时走到了 AC 自动机的 $p$ 节点

如果 $p$ 为串尾节点， $val(p) \neq 0$ ，那么遍历 $\text{vec}[p], \ \forall \ r \in \text{vec}[p]$
此时 $\bold{P}$ 中第 $r$ 行能够匹配上，也就是说
以 $T(u-r+1, i-y+1)$ 为左上角的 $x \times y$ 矩形，能匹配上的行数 $+1$
即 $\text{cnt}(u-r+1, i-y+1) += 1$
否则的话，如果 $last(p) \neq 0$ ，同样遍历 $\forall r \in \text{vec}[last(p)]$ ，执行 $\text{cnt}(u-r+1, i-y+1) += 1$

最后只要遍历 $(i, j) \in \bold{T}(n \times m)$ ，如果满足 $cnt(i, j) = x$
那么 $ans += 1$

const int maxn = 1000 + 10;
const int N = 10000 + 10, SZ = 27;
char P[maxn][maxn], T[maxn][maxn];
int n, m, x, y, cnt[maxn][maxn];

class AC {
public:
    int t[N][SZ], fail[N], val[N], last[N];
    vector<int> vec[N];
    int tot = 0;

    void clear() {
        tot = 0;
        for (int i = 0; i < N; i++) vec[i].clear();
        memset(t, 0, sizeof t);
        memset(fail, 0, sizeof fail);
        memset(val, 0, sizeof val);
        memset(last, 0, sizeof last);
    }

    void insert(const char *str, int idx) {
        int p = 0;
        assert(strlen(str) == y);
        for (int i = 0; i < strlen(str); i++) {
            int c = str[i] - 'a';
            if (!t[p][c]) t[p][c] = ++tot;
            p = t[p][c];
        }
        val[p] = idx, vec[p].push_back(idx);
    }

    void build() {
        queue<int> que;
        for (int i = 0; i < SZ; i++) {
            if (t[0][i]) que.push(t[0][i]);
        }

        while (que.size()) {
            auto u = que.front(); que.pop();
            for (int i = 0; i < SZ; i++) {
                if (t[u][i]) {
                    fail[t[u][i]] = t[fail[u]][i];
                    que.push(t[u][i]);
                }
                else t[u][i] = t[fail[u]][i];
            }
            last[u] = val[fail[u]] ? fail[u] : last[fail[u]];
        }
    }

    void find(int u) {
        int p = 0;
        for (int i = 1; i <= m; i++) {
            int c = T[u][i] - 'a';
            p = t[p][c];
            if (val[p]) {
                for (auto r : vec[p]) {
                    if (u-r+1 >= 1) ++cnt[u-r+1][i-y+1];
                }
            }
            else if (last[p]) {
                for (auto r : vec[last[p]]) {
                    if (u-r+1 >= 1) ++cnt[u-r+1][i-y+1];
                }
            }
        }
    }
} ac;

void solve() {
    for (int u = 1; u <= n; u++) ac.find(u);
    int ans = 0;
    for (int i = 1; i + x - 1 <= n; i++) {
        for (int j = 1; j + y - 1 <= m; j++) {
            if (cnt[i][j] == x) ans++;
        }
    }
    printf("%d\n", ans);
}

int main() {
    freopen("input.txt", "r", stdin);
    int kase;
    cin >> kase;
    while (kase--) {
        // init
        ac.clear();
        memset(cnt, 0, sizeof cnt);

        scanf("%d%d", &n, &m);
        for (int i = 1; i <= n; i++) scanf("%s", T[i]+1), assert(strlen(T[i]+1) == m);

        scanf("%d%d", &x, &y);
        for (int i = 1; i <= x; i++) {
            scanf("%s", P[i]+1);
            ac.insert(P[i]+1, i);
        }

        if (n < x || m < y) {
            puts("0");
            continue;
        }

        // build
        ac.build();

        // solve
        solve();
    }
}

AC 自动机和 dp

AC 自动机和 dp 有关的算法，经常需要借助 $\text{last}$ 指针写状态转移方程
NVWLS
给出一个字典，每个单词去掉元音字母 $\text{A, E, I, O, U}$ 之后形成了一个新字典
先给出一个只有辅音字母的串，用原字典（包含元音字母）的单词还原该串
如果存在多种还原方式，输出还原后元音字母数量最多的结果

很容易想到，将初始单词插入 $\text{AC}$ 自动机中，不插入元音字母
$\textbf{for} \ \forall i$ 遍历模版串 $\text{str}(i)$ ，并且找到 AC 自动机中表示 $\text{str}(i)$ 的节点 $p$
$\textbf{for} \ \forall k \in p \to \text{last}(p)$ ，如果 $k$ 为单词节点
AC 自动机维护单词节点编号 $id, \quad \text{End}(k) = id$
同时还需要维护 $\text{cnt}(id)$ ，表示 $id$ 这个单词有多少个元音字母
$\text{len}(id)$ 表示 $id$ 这个单词除掉元音字母的长度，由此可以写出状态转移方程
$f(i)$ 表示 $\text{str}(i)$ 这个位置还原补上元音字母后， $\text{str}[0\cdots i]$ 的最长长度，那么
$f(i) = \max (f(i - \text{len}[id])) + \text{cnt}(id)$
$\text{dp}$ 更新的时候记录状态 $\text{used}(i) = id$ ，表示 $i$ 这个位置用了 $id$ 这个单词
输出结果的时候递归输出 $\text{print}(p - \text{len}(\text{used}(p)))$

另外，值得注意的是，有可能单词去掉元音字母后，得到一样的单词
但是， $\text{cnt}(A) > \text{cnt}(B)$ ， $A$ 还原成的单词元音字母大于 $B$ 的
在 AC 自动机中维护 $\text{End}(p) = id$ 以及 $\text{val}(p)$ 的最大值
其中 $\text{val}(p)$ 表示 $p$ 这个节点能够还原成的最多的元音字母数


const int maxn = 3e5 + 10, SZ = 26;
int n, cnt[maxn], len[maxn], f[maxn], used[maxn];
string S[maxn];
char str[maxn];

class AC {
public:
    int t[maxn][SZ], fail[maxn], End[maxn], last[maxn];
    int tot = 0;

    void clear() {
        memset(t, 0, sizeof t);
        memset(fail, 0, sizeof fail);
        memset(End, 0, sizeof End);
        memset(last, 0, sizeof last);
        tot = 0;
    }

   void insert(const string &str, int id) {
        int p = 0;
        for (auto x : str) {
            int c = x - 'A';
            if (!t[p][c]) t[p][c] = ++tot;
            p = t[p][c];
        }
       if (cnt[id] >= cnt[End[p]]) End[p] = id;
    }

    void build() {
        queue<int> que;
        for (int i = 0; i < SZ; i++) {
            if (t[0][i]) que.push(t[0][i]);
        }
        while (que.size()) {
            auto u = que.front(); que.pop();
            for (int i = 0; i < SZ; i++) {
                if (t[u][i]) {
                    fail[t[u][i]] = t[fail[u]][i];
                    que.push(t[u][i]);
                }
                else t[u][i] = t[fail[u]][i];
            }
            last[u] = End[fail[u]] ? fail[u] : last[fail[u]];
        }
    }
} ac;

void dp(const char *str) {
    int N = strlen(str+1);
    for (int i = 1; i <= N; i++) f[i] = -1;
    f[0] = 0;

    int p = 0;
    for (int i = 1; i <= N; i++) {
        int c = str[i] - 'A';
        p = ac.t[p][c];

        for (int j = p; j; j = ac.last[j]) {
            if (!ac.End[j]) j = ac.last[j];
            int id = ac.End[j];
            if (id && f[i - len[id]] != -1 && f[i - len[id]] + cnt[id] > f[i]) {
                f[i] = f[i-len[id]] + cnt[id], used[i] = id;
            }
        }
    }
}

void print(const int p) {
    if (p < 1) return;
    print(p - len[used[p]]);
    printf("%s ", S[used[p]].c_str());
}

inline bool vowel(const char ch) {
    return ch == 'A' || ch == 'E' || ch == 'I' || ch == 'O' || ch == 'U';
}

int main() {
    freopen("input.txt", "r", stdin);
    ac.clear();

    cin >> n;
    for (int i = 1; i <= n; i++) {
        cin >> S[i];
        string t;
        for (auto c : S[i]) {
            if (!vowel(c)) t += c;
            else cnt[i]++;
        }
        len[i] = t.length();
        ac.insert(t, i);
    }
    ac.build();

    scanf("%s", str+1);
    dp(str);

    print(strlen(str+1));
    puts("");
}