fucking-algorithm
fucking-algorithm copied to clipboard
滑动窗口算法延伸:Rabin Karp 字符匹配算法 :: labuladong的算法小抄
好难啊啊😭
纯纯滑动窗口思路Java代码如下:
public static List<String> findRepeatedDnaSequences(String s) {
int L = 10;
Set<String> res = new HashSet();
List<String> Fres = new ArrayList<>();
HashSet<String> seen = new HashSet<>();
List<Character> window = new ArrayList<>();
int left = 0;int right = 0;
while (right < s.length()) {
window.add(s.charAt(right));
right++;
if (right - left == L) {
String windowStr = "";
for (int i = 0 ; i < window.size();i++) {
windowStr += window.get(i);
}
if (seen.contains(windowStr)) {
// System.out.println("找到一个重复子串: "+ windowStr);
res.add(windowStr);
}else {
seen.add(windowStr);
}
window.remove(0);
left++;
}
}
Fres.addAll(res);
return Fres;
}
func findRepeatedDnaSequences(s string) []string {
arr := make([]byte, len(s))
for i, v := range s {
switch string(v) {
case "A":
arr[i] = 0
case "C":
arr[i] = 1
case "G":
arr[i] = 2
case "T":
arr[i] = 3
}
}
left, right := 0, 0
var res []string
resMap := map[string]struct{}{}
seek := map[int]bool{}
tmp := 0
for right < len(arr) {
tmp = addNewStr(tmp, arr[right])
right++
for right-left >= 10 {
if _, exist := seek[tmp]; exist {
resMap[s[left:right]] = struct{}{}
} else {
seek[tmp] = true
}
left++
}
}
for key := range resMap {
res = append(res, key)
}
return res
}
func addNewStr(num int, b byte) int {
num = num << 2
num = num & 0xfffff
num = num | int(b)
return num
}
go语言解决重复的DNA序列问题,感谢大佬提供的思路
取了个巧,刚好这个问题只需要求重复的10个序列,而总共只有4种碱基,利用2个bit位刚好能够表示,总共只需要20个bit位,也就是int32其实就够了
我看官方题解下面也有人在讨论DNA这个题,如本书作者所说(下图1),O(L)的时间复杂度来自于生成子串,可是加入了hash后还是存在生成子串的过程吧(下图2)?所以时间复杂度不是应该还是要考虑L?

TypeScript
function findRepeatedDnaSequences(s: string): string[] {
const ret: Set<string> = new Set();
const n = s.length;
const map = {
'A': 0,
'G': 1,
'C': 2,
'T': 3,
};
const nums: number[] = new Array(n);
// 将字符串转数字
for(let i = 0; i < n; i++) {
nums[i] = map[s[i]];
};
// 数字长度
const L = 10;
// 4 进制
const R = 4;
const RL = Math.pow(R, L - 1);
// 左右窗口边界索引
let left = 0;
let right = 0;
// 记录重复出现的 hash 值
const seenHash = {};
// 窗口内 hash
let windowHash = 0;
while (right < s.length) {
windowHash = windowHash * R + nums[right];
right++;
while (right - left === L) {
if (seenHash[windowHash]) {
ret.add(s.substring(left, right));
} else {
seenHash[windowHash] = true;
}
windowHash = windowHash - nums[left] * RL;
left++;
}
}
return Array.from(ret);
};
滴滴滴打卡
第一章早就刷完了怎么还没小旗子,原来更新了
推导一下公式
hashBase = 1658598167
needleHashBase = hashStep ^ (needle.length - 1)
hash = hash - charToRemove * needleHashBase
=> (hash - charToRemove * needleHashBase) % hashBase // this is not equivalent, will cause hash collision
= ((hash - charToRemove * needleHashBase) + hashBase) % hashBase // (a) % Q = (a + Q) % Q, in case for negative when removing a char
= ((hash % hashBase - charToRemove * needleHashBase % hashBase) + hashBase) % hashBase // (a + b) % Q = (a % Q + b % Q) % Q, mod each one to prevent overflow
= ((hash - charToRemove * needleHashBase % hashBase) + hashBase) % hashBase // hash was alread moded by hashBase in the previous step, so hash % hashBase = hash
-1%3 =2 最后rabin-karp算法中的左移处理时,不加Q也是可以的
打卡 为了避免整型溢出,选一个大素数Q,取模。
windowHash = ((R * windowHash) % Q + txt.charAt(right)) % Q 为什么不是 windowHash = (R * windowHash+ txt.charAt(right)) % Q 或者 windowHash = ((R * windowHash) % Q + txt.charAt(right) % Q ) % Q 呢