colly
colly copied to clipboard
Attempting to fetch nonexisting /sitemap.xml.gz results in "gzip: invalid header"
package main
import (
"log"
"net/http"
"github.com/gocolly/colly/v2"
)
func main() {
// we have to disable Accept-Encoding: gzip,
// because the remote server might send compressed
// 404 page, avoiding the bug
tr := &http.Transport{
DisableCompression: true,
}
c := colly.NewCollector()
c.WithTransport(tr)
c.OnError(func(resp *colly.Response, err error) {
log.Printf("err=%v", err)
})
c.Visit("https://example.org/sitemap.xml.gz")
}
Results in:
2023/01/05 22:30:27 err=gzip: invalid header