v2
v2 copied to clipboard
readability: do not strip out content inside code & table elements
readability strips out code comments in https://2ality.com/2021/01/looping-over-arrays.html
I believe this has to do with .hljs-comment
element classes.
The solution could be to check for enclosing tags (taken from mozilla/readability):
func removeUnlikelyCandidates(document *goquery.Document) {
document.Find("*").Not("html,body").Each(func(i int, s *goquery.Selection) {
class, _ := s.Attr("class")
id, _ := s.Attr("id")
str := class + id
+ if s.Closest("code,table").Length() > 0 {
+ return
+ }
if blacklistCandidatesRegexp.MatchString(str) || (unlikelyCandidatesRegexp.MatchString(str) && !okMaybeItsACandidateRegexp.MatchString(str)) {
removeNodes(s)
}
})
}