String Algorithm
Zhongjun Qiu 元婴开发者

常见的字符串算法以及模板,包含了 KMP、Trie、AC自动机,字符串哈希等。

字符串哈希

java
c++
1
2
3
4
5
6
7
8
9
10
11
// 大M取一个比较大的素数 1e9+7 4294967291 大P取一个比较小的素数 171 131
long M = 0x60000005, P = 171;
long[] p, h;
p[0] = 1;
for (int i = 1;i <= n;i++){
h[i] = (h[i-1]*P+(s.charAt(i-1)-'a'+1))%M;
p[i] = p[i-1]*P%M;
}
long get(int l, int r){
return (h[r]-h[l-1]*p[r-l+1]%M+M)%M;
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
mt19937 rng(chrono::steady_clock::now().time_since_epoch().count());

int rnd(int x, int y) {
return uniform_int_distribution<int>(x, y)(rng);
}

ll MOD = 1e18 + rnd(0, 1e9), BASE = 233 + rnd(0, 1e3);

struct HashString {
vector<__int128_t> P, H;

HashString(string &s) {
int n = s.size();
P.resize(n + 1);
P[0] = 1;
H.resize(n + 1);
H[0] = 0;
for (int i = 1; i <= n; i++) {
P[i] = P[i - 1] * BASE % MOD;
H[i] = (H[i - 1] * BASE + s[i - 1]) % MOD;
}
}

ll query(int l, int r) {
return (H[r] - H[l - 1] * P[r - l + 1] % MOD + MOD) % MOD;
}
};

字典树

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
class Node{
Node[] r = new Node[26];
boolean isEnd = false; // 判断是不是一个字符串末尾
}
static Node root = new Node();
static add(String ss){
Node cur = root;
for (int k = 0;k < ss.length();k++){
int x = ss.charAt(k)-'a';
if (cur.r[x] == null) cur.r[x] = new Node();
cur = cur.r[x];
}
cur.isEnd = true;
}
static boolean qurey(String ss){
Node cur = root;
for (int k = 0;k < ss.length();k++){
int x = ss.charAt(k)-'a';
if (cur.r[x] == null) return false; //下一个字符没有出现 直接false
cur = cur.r[x];
}
return cur.isEnd; //字符都在 判断是不是结束了
}

KMP

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
//两个字符串的下标都从 1 开始
// 求ne过程,是p[]自己和自己匹配的过程; 起始i = 2,要注意
for( int i = 2, j = 0; i <=n; i++){
while( j && p[i] != p[j + 1]) j = ne[j];
if( p[i] == p[j + 1]) j ++;
// 这里不用管是否匹配成功,ne[i]的值都要记录下来
ne[i] = j;
}
// kmp匹配过程
for(int i = 1, j = 0 ; i <= m; i++){
// j 表示是否退回起点
while( j && s[i] != p[ j + 1]) j = ne[j];
if ( s[i] == p [ j + 1 ]) j++;
if( j == n){
// 匹配成功,这里的 i - n 是第一个匹配元素的下标,本应该是 i - n + 1, 由于数组是从1开始计数,所以为 i - n
printf("%d " , i - n );
// 匹配成功后找下一个匹配
j = ne[j];
}
}

fail = new int[m];
for (int i = 1, j = 0; i < m; ++i) {
while (j != 0 && e.charAt(j) != e.charAt(i)) j = fail[j - 1];
if (e.charAt(j) == e.charAt(i)) ++j;
fail[i] = j;
}

AC自动机

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
const int N = 5e4+10, M = 26;

struct ACA {
int tot, ch[N][M], deep[N], fail[N];
int cnt[N];

ACA() {
tot = 0;
for (int i = 0;i < N;i++) memset(ch[i], 0, sizeof(int)*M);
memset(deep, 0, sizeof(int)*N);
memset(fail, 0, sizeof(int)*N);
memset(last, 0, sizeof(int)*N);
memset(cnt, 0, sizeof(int)*N);
}

void insert(string& cur, int cost) {
int n = cur.size(), node = 0;
for (int i = 0;i < n;i++) {
int c = cur[i]-'a';
if (!ch[node][c]) {
ch[node][c] = ++tot;
deep[tot] = i+1;
}
node = ch[node][c];
}
cnt[node] = cost;
}

void build() {
queue<int> q;
for (int i = 0;i < M;i++) {
if (ch[0][i]) {
q.push(ch[0][i]);
}
}
while (q.size()) {
int cur = q.front(); q.pop();
for (int i = 0;i < M;i++) {
int nxt = ch[cur][i], nf = ch[fail[cur]][i];
if (nxt) {
fail[nxt] = nf;
q.push(nxt);
} else {
ch[cur][i] = nf;
}
}
}
}
};

int query(string s) {
int res = 0;
for (int i = 1, root = 0;i <= n;i++) {
int c = target[i-1]-'a';
root = ac.ch[root][c];
for (int j = root;j;j = ac.fail[j]) {
res += ac.cnt[j];
ac.cnt[j] = 0;
}
}
return res;
}
 REWARD AUTHOR
 Comments
Comment plugin failed to load
Loading comment plugin