计算一个字符串中恰好出现 K 次的 M 长度子字符串

原文:https://www . geeksforgeeks . org/count-m-length-substrings-occurrency-k-times-in-a-string/

给定一个长度为 NS和两个整数 MK ,任务是计算长度为 M子串在字符串 S 中精确出现 K 次的数量。

示例:

输入:S =“abacaba”,M = 3,K = 2 输出: 1 解释:长度为 3 的所有不同子串都是“aba”、“bac”、“aca”、“cab”。 在所有这些子字符串中,只有“aba”在字符串 s 中出现两次。 因此,计数为 1。

输入: S = "geeksforgeeks ",M = 2,K = 1 输出: 4 解释: 长度为 2 的所有不同子串都是“ge”、“ee”、“ek”、“ks”、“sf”、“fo”、“or”、“rg”。 在所有这些字符串中,“sf”、“fo”、“or”、“rg”在字符串 s 中出现一次。 因此,计数为 4。

朴素方法:最简单的方法是生成所有长度为MT5】的子串,并将串中每个子串的频率T8】S 存储在映射中。现在,遍历地图,如果频率等于 K ,那么将计数增加 1 。完成上述步骤后,打印计数作为结果。 时间复杂度:O((N–M) N * M)* 辅助空间:O(N–M)

高效方法:上述方法可以通过使用 KMP 算法来寻找字符串中子串的频率来优化。按照以下步骤解决问题:

下面是上述方法的实现:

C++

// C++ program for the above approach
#include <bits/stdc++.h>
using namespace std;

// Function to compute the LPS array
void computeLPSArray(string pat, int M,
                     int lps[])
{
    // Length of the previous
    // longest prefix suffix
    int len = 0;
    int i = 1;
    lps[0] = 0;

    // Iterate from [1, M - 1] to find lps[i]
    while (i < M) {

        // If the characters match
        if (pat[i] == pat[len]) {

            len++;
            lps[i] = len;
            i++;
        }

        // If pat[i] != pat[len]
        else {

            // If length is non-zero
            if (len != 0) {
                len = lps[len - 1];

                // Also, note that i is
                // not incremented here
            }

            // Otherwise
            else {
                lps[i] = len;
                i++;
            }
        }
    }
}

// Function to find the frequency of
// pat in the string txt
int KMPSearch(string pat, string txt)
{
    // Stores length of both strings
    int M = pat.length();
    int N = txt.length();

    // Initialize lps[] to store the
    // longest prefix suffix values
    // for the string pattern
    int lps[M];

    // Store the index for pat[]
    int j = 0;

    // Preprocess the pattern
    // (calculate lps[] array)
    computeLPSArray(pat, M, lps);

    // Store the index for txt[]
    int i = 0;
    int res = 0;
    int next_i = 0;

    while (i < N) {
        if (pat[j] == txt[i]) {
            j++;
            i++;
        }
        if (j == M) {

            // If pattern is found the
            // first time, iterate again
            // to check for more patterns
            j = lps[j - 1];
            res++;

            // Start i to check for more
            // than once occurrence
            // of pattern, reset i to
            // previous start + 1
            if (lps[j] != 0)
                i = ++next_i;
            j = 0;
        }

        // Mismatch after j matches
        else if (i < N
                 && pat[j] != txt[i]) {

            // Do not match lps[0..lps[j-1]]
            // characters, they will
            // match anyway
            if (j != 0)
                j = lps[j - 1];
            else
                i = i + 1;
        }
    }

    // Return the required frequency
    return res;
}

// Function to find count of substrings
// of length M occurring exactly P times
// in the string, S
void findCount(string& S, int M, int P)
{

    // Store all substrings of length M
    set<string> vec;

    // Store the size of the string, S
    int n = S.length();

    // Pick starting point
    for (int i = 0; i < n; i++) {

        // Pick ending point
        for (int len = 1;
             len <= n - i; len++) {

            // If the substring is of
            // length M, insert it in vec
            string s = S.substr(i, len);
            if (s.length() == M) {
                vec.insert(s);
            }
        }
    }

    // Initialise count as 0 to store
    // the required count of substrings
    int count = 0;

    // Iterate through the set of
    // substrings
    for (auto it : vec) {

        // Store its frequency
        int ans = KMPSearch(it, S);

        // If frequency is equal to P
        if (ans == P) {

            // Increment count by 1
            count++;
        }
    }

    // Print the answer
    cout << count;
}

// Driver Code
int main()
{
    string S = "abacaba";
    int M = 3, P = 2;

    // Function Call
    findCount(S, M, P);

    return 0;
}

Java 语言(一种计算机语言,尤用于创建网站)

// Java Program to implement
// the above approach

import java.io.*;
import java.util.*;

class GFG {

    // Function to compute the LPS array
    static void computeLPSArray(String pat, int M,
                                int lps[])
    {
        // Length of the previous
        // longest prefix suffix
        int len = 0;
        int i = 1;
        lps[0] = 0;

        // Iterate from [1, M - 1] to find lps[i]
        while (i < M) {

            // If the characters match
            if (pat.charAt(i) == pat.charAt(len)) {

                len++;
                lps[i] = len;
                i++;
            }

            // If pat[i] != pat[len]
            else {

                // If length is non-zero
                if (len != 0) {
                    len = lps[len - 1];

                    // Also, note that i is
                    // not incremented here
                }

                // Otherwise
                else {
                    lps[i] = len;
                    i++;
                }
            }
        }
    }

    // Function to find the frequency of
    // pat in the string txt
    static int KMPSearch(String pat, String txt)
    {
        // Stores length of both strings
        int M = pat.length();
        int N = txt.length();

        // Initialize lps[] to store the
        // longest prefix suffix values
        // for the string pattern
        int lps[] = new int[M];

        // Store the index for pat[]
        int j = 0;

        // Preprocess the pattern
        // (calculate lps[] array)
        computeLPSArray(pat, M, lps);

        // Store the index for txt[]
        int i = 0;
        int res = 0;
        int next_i = 0;

        while (i < N) {
            if (pat.charAt(j) == txt.charAt(i)) {
                j++;
                i++;
            }
            if (j == M) {

                // If pattern is found the
                // first time, iterate again
                // to check for more patterns
                j = lps[j - 1];
                res++;

                // Start i to check for more
                // than once occurrence
                // of pattern, reset i to
                // previous start + 1
                if (lps[j] != 0)
                    i = ++next_i;
                j = 0;
            }

            // Mismatch after j matches
            else if (i < N
                     && pat.charAt(j) != txt.charAt(i)) {

                // Do not match lps[0..lps[j-1]]
                // characters, they will
                // match anyway
                if (j != 0)
                    j = lps[j - 1];
                else
                    i = i + 1;
            }
        }

        // Return the required frequency
        return res;
    }

    // Function to find count of substrings
    // of length M occurring exactly P times
    // in the string, S
    static void findCount(String S, int M, int P)
    {

        // Store all substrings of length M
        // set<string> vec;
        TreeSet<String> vec = new TreeSet<>();

        // Store the size of the string, S
        int n = S.length();

        // Pick starting point
        for (int i = 0; i < n; i++) {

            // Pick ending point
            for (int len = 1; len <= n - i; len++) {

                // If the substring is of
                // length M, insert it in vec
                String s = S.substring(i, i + len);
                if (s.length() == M) {
                    vec.add(s);
                }
            }
        }

        // Initialise count as 0 to store
        // the required count of substrings
        int count = 0;

        // Iterate through the set of
        // substrings
        for (String it : vec) {

            // Store its frequency
            int ans = KMPSearch(it, S);

            // If frequency is equal to P
            if (ans == P) {

                // Increment count by 1
                count++;
            }
        }

        // Print the answer
        System.out.println(count);
    }

    // Driver Code
    public static void main(String[] args)
    {

        String S = "abacaba";
        int M = 3, P = 2;

        // Function Call
        findCount(S, M, P);
    }
}

// This code is contributed by kingash.

Python 3

# Python 3 program for the above approach

# Function to compute the LPS array
def computeLPSArray(pat, M, lps):

    # Length of the previous
    # longest prefix suffix
    len1 = 0
    i = 1
    lps[0] = 0

    # Iterate from [1, M - 1] to find lps[i]
    while (i < M):

        # If the characters match
        if (pat[i] == pat[len1]):
            len1 += 1
            lps[i] = len1
            i += 1

        # If pat[i] != pat[len]
        else:
            # If length is non-zero
            if (len1 != 0):
                len1 = lps[len1 - 1]

                # Also, note that i is
                # not incremented here

            # Otherwise
            else:
                lps[i] = len1
                i += 1

# Function to find the frequency of
# pat in the string txt
def KMPSearch(pat, txt):

    # Stores length of both strings
    M = len(pat)
    N = len(txt)

    # Initialize lps[] to store the
    # longest prefix suffix values
    # for the string pattern
    lps = [0 for i in range(M)]

    # Store the index for pat[]
    j = 0

    # Preprocess the pattern
    # (calculate lps[] array)
    computeLPSArray(pat, M, lps)

    # Store the index for txt[]
    i = 0
    res = 0
    next_i = 0

    while (i < N):
        if (pat[j] == txt[i]):
            j += 1
            i += 1
        if (j == M):

            # If pattern is found the
            # first time, iterate again
            # to check for more patterns
            j = lps[j - 1]
            res += 1

            # Start i to check for more
            # than once occurrence
            # of pattern, reset i to
            # previous start + 1
            if (lps[j] != 0):
                next_i += 1
                i = next_i
            j = 0

        # Mismatch after j matches
        elif (i < N and pat[j] != txt[i]):
            # Do not match lps[0..lps[j-1]]
            # characters, they will
            # match anyway
            if (j != 0):
                j = lps[j - 1]
            else:
                i = i + 1

    # Return the required frequency
    return res

# Function to find count of substrings
# of length M occurring exactly P times
# in the string, S
def findCount(S, M, P):

    # Store all substrings of length M
    vec = set()

    # Store the size of the string, S
    n = len(S)

    # Pick starting point
    for i in range(n):

        # Pick ending point
        for len1 in range(n - i + 1):

            # If the substring is of
            # length M, insert it in vec
            s = S[i:len1]

          #  if (len1(s) == M):
           #     vec.add(s)

    # Initialise count as 0 to store
    # the required count of substrings
    count = 1

    # Iterate through the set of
    # substrings
    for it in vec:

        # Store its frequency
        ans = KMPSearch(it, S)

        # If frequency is equal to P
        if (ans == P):

            # Increment count by 1
            count += 1

    # Print the answer
    print(count)

# Driver Code
if __name__ == '__main__':
    S = "abacaba"
    M = 3
    P = 2

    # Function Call
    findCount(S, M, P)

    # This code is contributed by ipg2016107.

C

// C# program for the above approach
using System;
using System.Collections.Generic;
class GFG
{

  // Function to compute the LPS array
  static void computeLPSArray(string pat, int M, int[] lps)
  {

    // Length of the previous
    // longest prefix suffix
    int len = 0;
    int i = 1;
    lps[0] = 0;

    // Iterate from [1, M - 1] to find lps[i]
    while (i < M)
    {

      // If the characters match
      if (pat[i] == pat[len])
      {
        len++;
        lps[i] = len;
        i++;
      }

      // If pat[i] != pat[len]
      else {

        // If length is non-zero
        if (len != 0) {
          len = lps[len - 1];

          // Also, note that i is
          // not incremented here
        }

        // Otherwise
        else {
          lps[i] = len;
          i++;
        }
      }
    }
  }

  // Function to find the frequency of
  // pat in the string txt
  static int KMPSearch(string pat, string txt)
  {

    // Stores length of both strings
    int M = pat.Length;
    int N = txt.Length;

    // Initialize lps[] to store the
    // longest prefix suffix values
    // for the string pattern
    int[] lps = new int[M];

    // Store the index for pat[]
    int j = 0;

    // Preprocess the pattern
    // (calculate lps[] array)
    computeLPSArray(pat, M, lps);

    // Store the index for txt[]
    int i = 0;
    int res = 0;
    int next_i = 0;

    while (i < N) {
      if (pat[j] == txt[i]) {
        j++;
        i++;
      }
      if (j == M) {

        // If pattern is found the
        // first time, iterate again
        // to check for more patterns
        j = lps[j - 1];
        res++;

        // Start i to check for more
        // than once occurrence
        // of pattern, reset i to
        // previous start + 1
        if (lps[j] != 0)
          i = ++next_i;
        j = 0;
      }

      // Mismatch after j matches
      else if (i < N
               && pat[j] != txt[i]) {

        // Do not match lps[0..lps[j-1]]
        // characters, they will
        // match anyway
        if (j != 0)
          j = lps[j - 1];
        else
          i = i + 1;
      }
    }

    // Return the required frequency
    return res;
  }

  // Function to find count of substrings
  // of length M occurring exactly P times
  // in the string, S
  static void findCount(string S, int M, int P)
  {

    // Store all substrings of length M
    HashSet<string> vec = new HashSet<string>();

    // Store the size of the string, S
    int n = S.Length;

    // Pick starting point
    for (int i = 0; i < n; i++) {

      // Pick ending point
      for (int len = 1;
           len <= n - i; len++) {

        // If the substring is of
        // length M, insert it in vec
        string s = S.Substring(i, len);
        if (s.Length == M) {
          vec.Add(s);
        }
      }
    }

    // Initialise count as 0 to store
    // the required count of substrings
    int count = 0;

    // Iterate through the set of
    // substrings
    foreach(string it in vec) {

      // Store its frequency
      int ans = KMPSearch(it, S);

      // If frequency is equal to P
      if (ans == P) {

        // Increment count by 1
        count++;
      }
    }

    // Print the answer
    Console.WriteLine(count);
  }

  // Driver code
  static void Main() {
    string S = "abacaba";
    int M = 3, P = 2;

    // Function Call
    findCount(S, M, P);
  }
}

// This code is contributed by divyeshrabadiya07.

java 描述语言

<script>

//Javascript implementation of the approach

// Function to compute the LPS array
function computeLPSArray(pat, M, lps)
{
    // Length of the previous
    // longest prefix suffix
    var len = 0;
    var i = 1;
    lps[0] = 0;

    // Iterate from [1, M - 1] to find lps[i]
    while (i < M) {

        // If the characters match
        if (pat[i] == pat[len]) {

            len++;
            lps[i] = len;
            i++;
        }

        // If pat[i] != pat[len]
        else {

            // If length is non-zero
            if (len != 0) {
                len = lps[len - 1];

                // Also, note that i is
                // not incremented here
            }

            // Otherwise
            else {
                lps[i] = len;
                i++;
            }
        }
    }
}

// Function to find the frequency of
// pat in the string txt
function KMPSearch(pat, txt)
{
    // Stores length of both strings
    var M = pat.length;
    var N = txt.length;

    // Initialize lps[] to store the
    // longest prefix suffix values
    // for the string pattern
    var lps = new Array(M);

    // Store the index for pat[]
    var j = 0;

    // Preprocess the pattern
    // (calculate lps[] array)
    computeLPSArray(pat, M, lps);

    // Store the index for txt[]
    var i = 0;
    var res = 0;
    var next_i = 0;

    while (i < N) {
        if (pat[j] == txt[i]) {
            j++;
            i++;
        }
        if (j == M) {

            // If pattern is found the
            // first time, iterate again
            // to check for more patterns
            j = lps[j - 1];
            res++;

            // Start i to check for more
            // than once occurrence
            // of pattern, reset i to
            // previous start + 1
            if (lps[j] != 0)
                i = ++next_i;
            j = 0;
        }

        // Mismatch after j matches
        else if (i < N
                 && pat[j] != txt[i]) {

            // Do not match lps[0..lps[j-1]]
            // characters, they will
            // match anyway
            if (j != 0)
                j = lps[j - 1];
            else
                i = i + 1;
        }
    }

    // Return the required frequency
    return res;
}

// Function to find count of substrings
// of length M occurring exactly P times
// in the string, S
function findCount( S, M, P)
{

    // Store all substrings of length M
    var vec = new Set();

    // Store the size of the string, S
    var n = S.length;

    // Pick starting point
    for (var i = 0; i < n; i++) {

        // Pick ending point
        for (var len = 1;
             len <= n - i; len++) {

            // If the substring is of
            // length M, insert it in vec
            var s = S.substring(i, len);
            if (s.length == M) {
                vec.add(s);
            }
        }
    }

    // Initialise count as 0 to store
    // the required count of substrings
    var count = 0;

    // Iterate through the set of
    // substrings
    for (const it of vec){

        // Store its frequency
        var ans = KMPSearch(it, S);

        // If frequency is equal to P
        if (ans == P) {

            // Increment count by 1
            count++;
        }
    }

    // Print the answer
    document.write( count);
}

var S = "abacaba";
var M = 3, P = 2;
// Function Call
findCount(S, M, P);

// This code is contributed by SoumikMondal

</script>

Output: 

1

时间复杂度:O((N * M)+(N2–M2) 辅助空间:O(N–M)