sensitive icon indicating copy to clipboard operation
sensitive copied to clipboard

支持英文吗?

Open GabyCheng opened this issue 4 years ago • 3 comments

GabyCheng avatar Dec 31 '20 03:12 GabyCheng

英文带空格 ,等可能有点问题

wmyi avatar Mar 01 '21 08:03 wmyi

问题

如果字典里面有 ass, 那么 the assistant said: 'shit! fuck you!' 中的 assistant 会被替换成 ***istant.

也许可以这样绕过....

       // 测试用的字典 
	dict := `ass
shit
fuck`

	filter := sensitive.New()
	err := filter.Load(strings.NewReader(dict))
	if err != nil {
		log.Fatalf("failed to load dic: %v", err)
	}

	replacement := '*'

	s := "the assistant said: 'shit! fuck you!'"
	// unluckily o == "the ***istant said: '****! **** you!'"
	o := filter.Replace(s, replacement)

	// workaround

	// 单词的边界: 包含标点符号和空格
	wordBoundaries := map[rune]bool{
		' ': true,
		',': true,
		'!': true,
		'?': true,
		'.': true,
	}

	var (
		sRunes = []rune(s)
		oRunes = []rune(o)
	)

	const InvalidInd = -1

	type word struct {
		start, end int
	}

	// 被替换了的词语的 开始 和 结束 (左闭右开) 位置
	var wordRanges []word

	start := InvalidInd
	for i := 0; i < len(sRunes); i++ {
		if sRunes[i] == oRunes[i] && start != InvalidInd {
			wordRanges = append(wordRanges, word{
				start: start,
				end:   i,
			})
			start = InvalidInd
			continue
		}

		if sRunes[i] != oRunes[i] && start == InvalidInd {
			start = i
		}
	}

	// 最后一个单词
	if start != InvalidInd {
		wordRanges = append(wordRanges, word{
			start: start,
			end:   len(oRunes),
		})
	}

	for _, w := range wordRanges {
		// 如果不是一个完整的单词, 那么就使用原文
		if w.end == len(oRunes) || !wordBoundaries[oRunes[w.end]] {
			copy(oRunes[w.start:w.end], sRunes[w.start:w.end])
		}
	}

	// o == the assistant said: '****! **** you!'
	o = string(oRunes)

alexyangfox avatar Nov 05 '21 08:11 alexyangfox

英文带空格 ,等可能有点问题

这个有什么解决方式么

ThePolarBearrr avatar Dec 16 '23 07:12 ThePolarBearrr