187. Repeated DNA Sequences - Explanation

Problem Link



1. Hash Set

class Solution:
    def findRepeatedDnaSequences(self, s: str) -> List[str]:
        seen, res = set(), set()

        for l in range(len(s) - 9):
            cur = s[l: l + 10]
            if cur in seen:
                res.add(cur)
            seen.add(cur)
        return list(res)

Time & Space Complexity

  • Time complexity: O(n)O(n)
  • Space complexity: O(n)O(n)

2. Hash Map

class Solution:
    def findRepeatedDnaSequences(self, s: str) -> List[str]:
        if len(s) < 10:
            return []

        mp = defaultdict(int)
        res = []
        for l in range(len(s) - 9):
            cur = s[l: l + 10]
            mp[cur] += 1
            if mp[cur] == 2:
                res.append(cur)

        return res

Time & Space Complexity

  • Time complexity: O(n)O(n)
  • Space complexity: O(n)O(n)

3. Rabin-Karp Algorithm (Double Hashing)

class Solution:
    def findRepeatedDnaSequences(self, s: str) -> List[str]:
        n = len(s)
        if n < 10:
            return []

        cnt = defaultdict(int)
        res = []
        base1, base2 = 31, 37
        hash1 = hash2 = 0
        power1, power2 = 1, 1
        MOD1, MOD2 = 685683731, 768258391

        for i in range(9):
            power1 *= base1
            power2 *= base2
            power1 %= MOD1
            power2 %= MOD2

        for i in range(n):
            hash1 = (hash1 * base1 + ord(s[i])) % MOD1
            hash2 = (hash2 * base2 + ord(s[i])) % MOD2

            if i >= 9:
                key = (hash1 << 31) | hash2
                cnt[key] += 1
                if cnt[key] == 2:
                    res.append(s[i - 9 : i + 1])

                hash1 = (hash1 - power1 * ord(s[i - 9]) % MOD1 + MOD1) % MOD1
                hash2 = (hash2 - power2 * ord(s[i - 9]) % MOD2 + MOD2)

        return res

Time & Space Complexity

  • Time complexity: O(n)O(n)
  • Space complexity: O(n)O(n)

4. Bit Mask

class Solution:
    def findRepeatedDnaSequences(self, s: str) -> list[str]:
        if len(s) < 10:
            return []

        mp = {'A': 0, 'C': 1, 'G': 2, 'T': 3}
        seen, res = set(), set()
        mask = 0
        for i in range(len(s)):
            mask = ((mask << 2) | mp[s[i]]) & 0xFFFFF
            if i >= 9:
                if mask in seen:
                    res.add(s[i - 9: i + 1])
                else:
                    seen.add(mask)

        return list(res)

Time & Space Complexity

  • Time complexity: O(n)O(n)
  • Space complexity: O(n)O(n)