Description:
There are n strings, which are grouped so that each string belongs to only one group.
For a legal group, at least one of the following two conditions is met:
1. All strings have the same K prefix (i.e. the first k letters are the same)
2. All strings have the same k suffix (i.e. the last k letters are the same)
3. You need to group these strings to minimize the number of groups.
n<=5000,k<=550
Explanation:
Let's first discrete the prefixes and suffixes.
Use double hash or sort.
For a string, there is at least one point to choose after its prefix and suffix are discrete. This is a classic model.
This is a bipartite graph.
Establish super source S and super sink T.
S to the edge whose prefix traffic is 1 after discretization.
The suffix after discretization is to the edge with T-connected flow of 1.
If x, y have at least one edge to choose, X to y connect positive infinity.
Running maximum flow = minimum cut is the answer, which is obvious.
Finally, starting from S, finding the points belonging to S set can determine which edges are cut to determine the grouping.
Code:
#include<cstdio>
#include<cstring>
#include<algorithm>
#define ll long long
#define fo(i, x, y) for(int i = x; i <= y; i ++)
#define fd(i, x, y) for(int i = x; i >= y; i --)
#define min(a, b) ((a) < (b) ? (a) : (b))
using namespace std;
const int N = 100005, M = 555, E = 5e6 + 5, INF = 1 << 30;
const ll p1 = 26455189, mo1 = 43820603;
const ll p2 = 9781909, mo2 = 38577701;
int n, m, k, td, tf; char str[M];
struct node {
ll x, y, i;
} a[N], b[N];
int c[N], d[N];
bool cmp(node a, node b) {
if(a.x < b.x) return 1;
if(a.x > b.x) return 0;
return a.y < b.y;
}
int final[N], to[E], next[E], r[E], tot = 1;
int S, T, co[N], dd[N], cur[N], ans;
void link(int x, int y, int z) {
next[++ tot] = final[x], to[tot] = y, r[tot] = z, final[x] = tot;
next[++ tot] = final[y], to[tot] = x, r[tot] = 0, final[y] = tot;
}
int dg(int x, int flow) {
if(x == T) return flow;
int use = 0;
for(int i = cur[x]; i; i = next[i], cur[x] = i) {
int y = to[i];
if(dd[y] + 1 == dd[x] && r[i]) {
int tmp = dg(y, min(flow - use, r[i]));
use += tmp; r[i] -= tmp; r[i ^ 1] += tmp;
if(use == flow) return use;
}
}
cur[x] = final[x];
if(!(-- co[dd[x]])) dd[T] = T;
++ co[++ dd[x]];
return use;
}
int bz[N];
void dfs(int x) {
if(bz[x]) return;
bz[x] = 1;
for(int i = final[x]; i; i = next[i])
if(r[i]) dfs(to[i]);
}
struct edge {
int final[N], to[N], next[N], tot;
void link(int x, int y) {next[++ tot] = final[x], to[tot] = y, final[x] = tot;}
} e;
int bx[N];
int main() {
freopen("group.in", "r", stdin);
freopen("group.out", "w", stdout);
scanf("%d %d", &n, &k);
fo(i, 1, n) {
scanf("%s", str + 1); m = strlen(str + 1);
fo(j, 1, k) {
a[i].x = (a[i].x * p1 + str[j] - 'A') % mo1;
a[i].y = (a[i].y * p2 + str[j] - 'A') % mo2;
}
fd(j, m, m - k + 1) {
b[i].x = (b[i].x * p1 + str[j] - 'A') % mo1;
b[i].y = (b[i].y * p2 + str[j] - 'A') % mo2;
}
a[i].i = b[i].i = i;
}
sort(a + 1, a + n + 1, cmp), sort(b + 1, b + n + 1, cmp);
fo(i, 1, n) {
if(i == 1 || a[i].x != a[i - 1].x || a[i].y != a[i - 1].y)
td ++;
c[a[i].i] = td;
}
tf = td;
fo(i, 1, n) {
if(i == 1 || b[i].x != b[i - 1].x || b[i].y != b[i - 1].y)
td ++;
d[b[i].i] = td;
}
fo(i, 1, n) e.link(c[i], i), e.link(d[i], i);
S = td + 1; T = S + 1;
fo(i, 1, tf) link(S, i, 1);
fo(i, tf + 1, td) link(i, T, 1);
fo(i, 1, n) link(c[i], d[i], INF);
co[0] = T;
for(; dd[T] < T;) ans += dg(S, INF);
dfs(S);
fo(i, 1, tf) bz[i] = !bz[i];
printf("%d\n", ans);
fo(i, 1, td) if(bz[i]) {
int ss = 0;
for(int j = e.final[i]; j; j = e.next[j])
if(!bx[e.to[j]]) ss ++;
printf("%d", ss);
for(int j = e.final[i]; j; j = e.next[j])
if(!bx[e.to[j]]) bx[e.to[j]] = 1, printf(" %d", e.to[j]);
printf("\n");
}
}