DotnetSpider
DotnetSpider copied to clipboard
爬取一个日语酒店数据都是乱码
` public static class HttpResponseMessageExtensions { public static async Task<Response> ToResponseAsync(this HttpResponseMessage httpResponseMessage) { var response = new Response {StatusCode = httpResponseMessage.StatusCode};
foreach (var header in httpResponseMessage.Headers)
{
response.Headers.Add(header.Key, header.Value?.ToString());
}
response.Headers.Add("Content-Type", "text/html; charset=Shift_JIS");
response.Version = httpResponseMessage.Version == null
? HttpVersion.Version11
: httpResponseMessage.Version;
response.Headers.TransferEncodingChunked = httpResponseMessage.Headers.TransferEncodingChunked;
response.Content = new ByteArrayContent(await httpResponseMessage.Content.ReadAsByteArrayAsync());
foreach (var header in httpResponseMessage.Content.Headers)
{
response.Content.Headers.Add(header.Key, header.Value?.ToString());
}
response.Content.Headers.ContentType = "text/html; charset=Shift_JIS";
return response;
}
}
` 莉・荳贋サ」遐∝「槫刈莠?esponse.Headers.Add("Content-Type", "text/html; charset=Shift_JIS"); 霑俶弍荵ア遐?荵滓隼霑?ncoding萓晉┯譏ッ荵ア遐? 隸キ謨呎?惹ケ域裾
response.Headers 是 server 端操作的事情。你设了这一堆,不会有任何作用。httpclient 只是帮你实现了 http 协议的请求/下载功能,并不是浏览器。你找一下 Samples 里有设置解码的例子