[2014 Dongguan City selection] group

Description:

There are n strings, which are grouped so that each string belongs to only one group.
For a legal group, at least one of the following two conditions is met:
1. All strings have the same K prefix (i.e. the first k letters are the same)
2. All strings have the same k suffix (i.e. the last k letters are the same)
3. You need to group these strings to minimize the number of groups.
n<=5000,k<=550

Explanation:

Let's first discrete the prefixes and suffixes.

Use double hash or sort.

For a string, there is at least one point to choose after its prefix and suffix are discrete. This is a classic model.

This is a bipartite graph.

Establish super source S and super sink T.

S to the edge whose prefix traffic is 1 after discretization.

The suffix after discretization is to the edge with T-connected flow of 1.

If x, y have at least one edge to choose, X to y connect positive infinity.

Running maximum flow = minimum cut is the answer, which is obvious.

Finally, starting from S, finding the points belonging to S set can determine which edges are cut to determine the grouping.

Code:

#include<cstdio>
#include<cstring>
#include<algorithm>
#define ll long long
#define fo(i, x, y) for(int i = x; i <= y; i ++)
#define fd(i, x, y) for(int i = x; i >= y; i --)
#define min(a, b) ((a) < (b) ? (a) : (b))
using namespace std;

const int N = 100005, M = 555, E = 5e6 + 5, INF = 1 << 30;

const ll p1 = 26455189, mo1 = 43820603;
const ll p2 = 9781909, mo2 = 38577701;

int n, m, k, td, tf; char str[M];

struct node {
    ll x, y, i;
} a[N], b[N];

int c[N], d[N];

bool cmp(node a, node b) {
    if(a.x < b.x) return 1;
    if(a.x > b.x) return 0;
    return a.y < b.y;
}

int final[N], to[E], next[E], r[E], tot = 1;
int S, T, co[N], dd[N], cur[N], ans;

void link(int x, int y, int z) {
    next[++ tot] = final[x], to[tot] = y, r[tot] = z, final[x] = tot;
    next[++ tot] = final[y], to[tot] = x, r[tot] = 0, final[y] = tot;
}

int dg(int x, int flow) {
    if(x == T) return flow;
    int use = 0;
    for(int i = cur[x]; i; i = next[i], cur[x] = i) {
        int y = to[i];
        if(dd[y] + 1 == dd[x] && r[i]) {
            int tmp = dg(y, min(flow - use, r[i]));
            use += tmp; r[i] -= tmp; r[i ^ 1] += tmp;
            if(use == flow) return use;
        }
    }
    cur[x] = final[x];
    if(!(-- co[dd[x]])) dd[T] = T;
    ++ co[++ dd[x]];
    return use;
}

int bz[N];

void dfs(int x) {
    if(bz[x]) return;
    bz[x] = 1;
    for(int i = final[x]; i; i = next[i])
        if(r[i]) dfs(to[i]);
}

struct edge {
    int final[N], to[N], next[N], tot;
    void link(int x, int y) {next[++ tot] = final[x], to[tot] = y, final[x] = tot;}
} e;

int bx[N];

int main() {
    freopen("group.in", "r", stdin);
    freopen("group.out", "w", stdout);
    scanf("%d %d", &n, &k);
    fo(i, 1, n) {
        scanf("%s", str + 1); m = strlen(str + 1);
        fo(j, 1, k) {
            a[i].x = (a[i].x * p1 + str[j] - 'A') % mo1;
            a[i].y = (a[i].y * p2 + str[j] - 'A') % mo2;
        }
        fd(j, m, m - k + 1) {
            b[i].x = (b[i].x * p1 + str[j] - 'A') % mo1;
            b[i].y = (b[i].y * p2 + str[j] - 'A') % mo2;
        }
        a[i].i = b[i].i = i;
    }
    sort(a + 1, a + n + 1, cmp), sort(b + 1, b + n + 1, cmp);
    fo(i, 1, n) {
        if(i == 1 || a[i].x != a[i - 1].x || a[i].y != a[i - 1].y)
            td ++;
        c[a[i].i] = td;
    }
    tf = td;
    fo(i, 1, n) {
        if(i == 1 || b[i].x != b[i - 1].x || b[i].y != b[i - 1].y)
            td ++;
        d[b[i].i] = td;
    }
    fo(i, 1, n) e.link(c[i], i), e.link(d[i], i);
    S = td + 1; T = S + 1;
    fo(i, 1, tf) link(S, i, 1);
    fo(i, tf + 1, td) link(i, T, 1);
    fo(i, 1, n) link(c[i], d[i], INF);
    co[0] = T;
    for(; dd[T] < T;) ans += dg(S, INF);
    dfs(S);
    fo(i, 1, tf) bz[i] = !bz[i];
    printf("%d\n", ans);
    fo(i, 1, td) if(bz[i]) {
        int ss = 0;
        for(int j = e.final[i]; j; j = e.next[j])
            if(!bx[e.to[j]]) ss ++;
        printf("%d", ss);
        for(int j = e.final[i]; j; j = e.next[j])
            if(!bx[e.to[j]]) bx[e.to[j]] = 1, printf(" %d", e.to[j]);
        printf("\n");
    }
}

Added by zeno on Sun, 03 May 2020 12:35:02 +0300