Detection of Luogu P2536 [AHOI2005] virus

Problem Address

Solution Trie tree + BFS

  • Build a Trie tree from the RNA fragment of the query, and then find the answer by "walking" in the Trie tree, which is realized by BFS.
  • Consider recording two values on the queue node of the extensive search: Q[][0] represents the point on the Trie tree, and Q[][1] represents the current location matching to the template fragment;
  • Then we consider how to expand the queue node and calculate the answer: (for the convenience of description, remember u = Q [], v = Q [], the template string is s, and the point u along the edge of the dictionary tree is G[u][c])
    • When s[v+1] ='A'or'T'or'C'or'g ': expandable node {G[u] ['A'or'T'or'C'or'g'], v+1};
    • When s[v+1] = '?': because it can replace any letter, the node {G[u] [', A' and 'T' and 'C' and 'g', v+1} can be extended at the same time;
    • When s[v+1] = '*' and '*' does not replace characters: Extensible node {u,v+1};
    • When s[v] = '* and' * replaces more than one character: each character replaced is regarded as A node on the extended queue, then the node {G[u] ['A' and 'T' and 'C' and 'g], V} can be extended at the same time each time (that is, the matching position will not change this time, and it can continue to expand in the same way next time);
    • Until the end of the template string is matched on a node, the number of RNA fragments on node u of the Trie tree is counted.

Notice

  • Through a bool array vis[x][y], the state of matching node x to position Y has been recorded, avoiding unnecessary search (here, the bootset array must be used, otherwise MLE will be used).
  • It should be avoided that the position of the template fragment matched to is larger than the length of the original string or the point reached on the Trie tree is empty, and special judgment should be made when expanding the node.

Code

#include <cstdio>
#include <cstring>
#include <bitset>

using namespace std;
const int Maxn = 0x3f3f3f3f;
const int N = 505, L = N << 1, M = L * 350;
int T = 1, n, Ans, len, t = 0, w = 1;
int G[M][4], val[M], Q[M][2];
char s[L], a[N], c[255]; 
bitset<L> vis[M];

inline int get()
{
    char ch; bool f = false; int res = 0;
    while (((ch = getchar()) < '0' || ch > '9') && ch != '-');
    if (ch == '-') f = true;
     else res = ch - '0';
    while ((ch = getchar()) >='0' && ch <= '9')
        res = (res << 3) + (res << 1) + ch - '0';
    return f? ~res + 1 : res;
}

inline void put(int x)
{
    if (x < 0)
      x = ~x + 1, putchar('-');
    if (x > 9) put(x / 10);
    putchar(x % 10 + 48);
}

inline void Ins()
{
    scanf("%s", a + 1); int l = strlen(a + 1), x = 1;
    for (int i = 1; i <= l; ++i)
    {
        int y = c[a[i]]; 
        if (!G[x][y]) G[x][y] = ++T;
        x = G[x][y];
    } 
    val[x]++;
}

inline void Push(const int x, const int stp)
{
    if (!x || stp > len) return;
    if (!vis[x][stp])
    {
        vis[x][stp] = 1; w = (w + 1) % M;
        Q[w][0] = x; Q[w][1] = stp;
    } 
}

int main()
{
//  freopen("virus.in", "r", stdin);
//  freopen("virus.out", "w", stdout);
    c['A'] = 0; c['C'] = 1; c['T'] = 2; c['G'] = 3; 
    c['?'] = -1; c['*'] = -2;
    scanf("%s", s + 1); len = strlen(s + 1);
    s[0] = '?'; n = get();
    for (int i = 1; i <= n; ++i) Ins();
    int u, v, I; Q[1][0] = 1; Q[1][1] = 0; vis[1][0] = 1;
    while (t != w)
    {
        t = (t + 1) % M;
        u = Q[t][0]; v = Q[t][1];
        if (s[v] == '*') 
         for (int i = 0; i < 4; ++i) Push(G[u][i], v);
        if (v == len) {Ans += val[u]; val[u] = 0; continue;}
        if (c[s[v + 1]] >= 0) Push(G[u][c[s[v + 1]]], v + 1);
         else 
         {
            for (int i = 0; i < 4; ++i) Push(G[u][i], v + 1);
            if (s[v + 1] == '*') Push(u, v + 1);
         }
    }
    return put(n - Ans), 0;
}

Keywords: Fragment

Added by rajns on Tue, 19 May 2020 17:59:46 +0300