nonebot-plugin-resolver2 icon indicating copy to clipboard operation
nonebot-plugin-resolver2 copied to clipboard

境外服务器部署,部分抖音链接将无法解析

Open padoru233 opened this issue 9 months ago • 21 comments

这个问题有几天了,没注意到 bot回复:作品已删除,或资源直链获取失败, 请稍后再试 2025-03-23 11:39:19.027 | SUCCESS | nonebot:handle_event:535 - OneBot V11 xxx | [message.group.normal]: Message xxxxx from xxx@[群:xxx] '3.84 复制打开抖音,看看【[指尖魔力].的图文作品】三星掌门人李在镕访问小米。# 小米 # 雷军 # ... https://v.douyin.com/sCsqQ41kRYU/ yGV:/ 02/13 [email protected] ' 2025-03-23 11:39:20.716 | WARNING | nonebot_plugin_resolver2:parse_share_url:40 - failed to parse video url from https://www.iesdouyin.com/share/note/7484675353898667274/?re, error: parse video json info from html fail 2025-03-23 11:39:21.318 | WARNING | nonebot_plugin_resolver2:parse_share_url:40 - failed to parse video url from https://m.douyin.com/share/note/7484675353898667274, error: parse video json info from html fail 2025-03-23 11:39:22.063 | WARNING | nonebot_plugin_resolver2:parse_share_url:40 - failed to parse video url from https://v.douyin.com/sCsqQ41kRYU, error: parse video json info from html fail

padoru233 avatar Mar 23 '25 03:03 padoru233

不好意思,没有详细描述环境

bot部署在海外服务器上,检查连通性和状态码: curl -I https://www.iesdouyin.com/share/note/7484675353898667274/?re 返回 HTTP/2 302 location: https://www.douyin.com/?previous_page=oversea_share_link&vid=7484675353898667274 3条链接重定向到了相同的地址

curl -I https://www.douyin.com/?previous_page=oversea_share_link&vid=7484675353898667274 返回 HTTP/2 404

可能是服务器问题,但是ping和nslookup www.douyin.com都正常

padoru233 avatar Mar 23 '25 04:03 padoru233

抖音屏蔽海外 ip 了吧,action 测试里,抖音链接全跑不通了

fllesser avatar Mar 23 '25 04:03 fllesser

补药啊😭😭😭

padoru233 avatar Mar 23 '25 05:03 padoru233

我用电脑测试,开不开代理都是404,但是浏览器打开链接会重定向到https://www.douyin.com/?vid=7484675353898667274&recommend=1

padoru233 avatar Mar 23 '25 05:03 padoru233

我用电脑测试,开不开代理都是404,但是浏览器打开链接会重定向到https://www.douyin.com/?vid=7484675353898667274&recommend=1

没加 header

fllesser avatar Mar 23 '25 10:03 fllesser

模拟一下返回的应该是这个:

<!DOCTYPE html><html lang="en"><head><link rel="icon"href="data:;base64,="><head><script>;(function(w,d,u,b,n,pc,ga,ae,po,s,p,e,t,pp){pc='precollect';ga='getAttribute';ae='addEventListener';po='PerformanceObserver';s=function(m){p=[].slice.call(arguments);p.push(Date.now(),location.href);(m==pc?s.p.a:s.q).push(p)};s.q=[];s.p={a:[]};w[n]=s;e=document.createElement('script');e.src=u+'?bid='+b+'&globalName='+n;e.crossOrigin=u.indexOf('sdk-web')>0?'anonymous':'use-credentials';d.getElementsByTagName('head')[0].appendChild(e);if(ae in w){s.pcErr=function(e){e=e||w.event;t=e.target||e.srcElement;if(t instanceof Element||t instanceof HTMLElement){if(t[ga]('integrity')){w[n](pc,'sri',t[ga]('href')||t[ga]('src'))}else{w[n](pc,'st',{tagName:t.tagName,url:t[ga]('href')||t[ga]('src')})}}else{w[n](pc,'err',e.error)}};s.pcRej=function(e){e=e||w.event;w[n](pc,'reject',e.reason||(e.detail&&e.detail.reason))};w[ae]('error',s.pcErr,true);w[ae]('unhandledrejection',s.pcRej,true)};if('PerformanceLongTaskTiming'in w){pp=s.pp={entries:[]};pp.observer=new PerformanceObserver(function(l){pp.entries=pp.entries.concat(l.getEntries())});pp.observer.observe({entryTypes:['longtask','largest-contentful-paint','layout-shift']})}})(window,document,'https://lf3-short.ibytedapm.com/slardar/fe/sdk-web/browser.cn.js','waf_js','WAFJS')</script><script src="https://lf-waf-js.byted-static.com/obj/waf-jschallenge/out-sha256.js"></script></head><body onload="readygo()"><script>function readygo(){var ps="";for(const i of navigator.plugins)ps+=i.name;window.WAFJS('context.merge',{referrer:document.referrer, history:window.history.length,plugins:ps,platform:navigator.platform,webdriver:navigator.webdriver,vendor:navigator.vendor,langs:navigator.languages.length});window.WAFJS('init',{bid:'waf_js'});window.WAFJS('start');var wci="_wafchallengeid",cs="eyJ2Ijp7ImEiOiJDeEZ6YTdHRzIwbm5pbll3NmdlM0ZuMExQblBHS2dMSzFIQk1nVUJ0SzIwPSIsImIiOjE3NDI3MjgxNDcsImMiOiJweEJwZUl2dFBRWHgzTE94ZXlweG9nUjdSMjl2YStSdkpkcDNXTUFJajFBPSJ9LCJzIjoibmxzMWYwZEVUTllwUFBibVJSUFhaZFFNcTVhRGxGczN6MzBBK0YzTUw3cz0ifQ",c=JSON.parse(atob(cs)),prefix=b64tou8a(c.v.a),expect=b64tohex(c.v.c),i=0,iid=setInterval(function(){expect===s256(prefix,""+i)&&(c.d=btoa(""+i),clearInterval(iid),document.cookie=wci+"="+btoa(JSON.stringify(c))+"; Max-Age=1",window.location.reload()),i++,i>1e6&&clearInterval(iid)},1)}</script>Please wait...</body></html>	

GuangChen2333 avatar Mar 23 '25 11:03 GuangChen2333

Image @padoru233

fllesser avatar Mar 23 '25 12:03 fllesser

Btw, 我今天也是这个情况,服务器在香港

GuangChen2333 avatar Mar 23 '25 12:03 GuangChen2333

Image @padoru233

我又回服务器测了一下,换了个终端就能返回200了,但还是失败,然后找别的几个服务器试了一下,就台湾是404剩下的都200

padoru233 avatar Mar 23 '25 12:03 padoru233

https://www.douyin.com/?previous_page=oversea_share_link&vid=7484675353898667274 你看你重定向的这个链接 有个 previous_page=oversea_share_link 以前我是没见过这个东西的

fllesser avatar Mar 23 '25 13:03 fllesser

Image 国内是这个 app_code_link

fllesser avatar Mar 23 '25 13:03 fllesser

看了一下,好像是正则表达式换了

现在的window\._ROUTER_DATA\s*=\s*(.*?)</script>找不到json了已经

self\.__pace_f\.push(.*?)<\/script>能找到但是不完整

GuangChen2333 avatar Mar 23 '25 13:03 GuangChen2333

如果真是这个问题的话可以参考我发现的解决方案:

import json
import re

if __name__ == '__main__':
    with open("../1.html", "r+", encoding="utf-8") as f:
        a = f.read()
        find_res = re.findall(
            r'self\.__pace_f\.push\((.*?)\)</script>',
            a,
            re.DOTALL
        )
        data = json.loads(find_res[-1])[1].split('"$L9",null,')[1][:-2]
        with open("1.json", "w+", encoding="utf-8") as v:
            v.write(data)

返回: https://drive.google.com/file/d/1zkdj9MXWJvaMf86vyoSB8nasBAGkHwxz/view?usp=drive_link

GuangChen2333 avatar Mar 23 '25 14:03 GuangChen2333

看了一下,好像是正则表达式换了

现在的window\._ROUTER_DATA\s*=\s*(.*?)</script>找不到json了已经

self\.__pace_f\.push(.*?)<\/script>能找到但是不完整

并不是噢,要用 苹果手机 的 UA 才会有这个ROUTER_DATA, 目前国内服务器抖音解析是没有问题的

fllesser avatar Mar 23 '25 14:03 fllesser

看了一下,好像是正则表达式换了 现在的window\._ROUTER_DATA\s*=\s*(.*?)</script>找不到json了已经 用self\.__pace_f\.push(.*?)<\/script>能找到但是不完整

并不是噢,要用 苹果手机 的 UA 才会有这个ROUTER_DATA, 目前国内服务器抖音解析是没有问题的

好的,我疏忽了

GuangChen2333 avatar Mar 23 '25 14:03 GuangChen2333

那我感觉返回200还解决不了的大概就是

<!DOCTYPE html><html lang="en"><head><link rel="icon"href="data:;base64,="><head><script>;(function(w,d,u,b,n,pc,ga,ae,po,s,p,e,t,pp){pc='precollect';ga='getAttribute';ae='addEventListener';po='PerformanceObserver';s=function(m){p=[].slice.call(arguments);p.push(Date.now(),location.href);(m==pc?s.p.a:s.q).push(p)};s.q=[];s.p={a:[]};w[n]=s;e=document.createElement('script');e.src=u+'?bid='+b+'&globalName='+n;e.crossOrigin=u.indexOf('sdk-web')>0?'anonymous':'use-credentials';d.getElementsByTagName('head')[0].appendChild(e);if(ae in w){s.pcErr=function(e){e=e||w.event;t=e.target||e.srcElement;if(t instanceof Element||t instanceof HTMLElement){if(t[ga]('integrity')){w[n](pc,'sri',t[ga]('href')||t[ga]('src'))}else{w[n](pc,'st',{tagName:t.tagName,url:t[ga]('href')||t[ga]('src')})}}else{w[n](pc,'err',e.error)}};s.pcRej=function(e){e=e||w.event;w[n](pc,'reject',e.reason||(e.detail&&e.detail.reason))};w[ae]('error',s.pcErr,true);w[ae]('unhandledrejection',s.pcRej,true)};if('PerformanceLongTaskTiming'in w){pp=s.pp={entries:[]};pp.observer=new PerformanceObserver(function(l){pp.entries=pp.entries.concat(l.getEntries())});pp.observer.observe({entryTypes:['longtask','largest-contentful-paint','layout-shift']})}})(window,document,'https://lf3-short.ibytedapm.com/slardar/fe/sdk-web/browser.cn.js','waf_js','WAFJS')</script><script src="https://lf-waf-js.byted-static.com/obj/waf-jschallenge/out-sha256.js"></script></head><body onload="readygo()"><script>function readygo(){var ps="";for(const i of navigator.plugins)ps+=i.name;window.WAFJS('context.merge',{referrer:document.referrer, history:window.history.length,plugins:ps,platform:navigator.platform,webdriver:navigator.webdriver,vendor:navigator.vendor,langs:navigator.languages.length});window.WAFJS('init',{bid:'waf_js'});window.WAFJS('start');var wci="_wafchallengeid",cs="eyJ2Ijp7ImEiOiIxT3Nlc2tDUHc5QUNHYXgxYzRrbUhnMzFqSE9zOFNLSXRSb09vQXpwTktvPSIsImIiOjE3NDI3NDA4NDUsImMiOiIySENzdEYrTFJ3NTVZRlFxci9KYm43bnRoYkkyR1V0bWpLUXZ6a0xGTDh3PSJ9LCJzIjoiZmxRQkFKNlJJTXJvbUpPY1JlMTVtcnEvM1hvMFVXTlhSY2gydm5ObnB3MD0ifQ",c=JSON.parse(atob(cs)),prefix=b64tou8a(c.v.a),expect=b64tohex(c.v.c),i=0,iid=setInterval(function(){expect===s256(prefix,""+i)&&(c.d=btoa(""+i),clearInterval(iid),document.cookie=wci+"="+btoa(JSON.stringify(c))+"; Max-Age=1",window.location.reload()),i++,i>1e6&&clearInterval(iid)},1)}</script>Please wait...</body></html>	

这个了

GuangChen2333 avatar Mar 23 '25 14:03 GuangChen2333

这个是啥

fllesser avatar Mar 23 '25 14:03 fllesser

这个是啥

我本地走香港来测试不是返回这个就是403(用了header) 到浏览器内就是1秒左右会给一个跳转

GuangChen2333 avatar Mar 23 '25 14:03 GuangChen2333

是的,反正就是境外服务器,拿不到这个 ROUTE_DATA了

fllesser avatar Mar 23 '25 15:03 fllesser

action 跑就拿不到 Image 本地就可以 Image

fllesser avatar Mar 23 '25 15:03 fllesser

有够奇怪的

GuangChen2333 avatar Mar 23 '25 15:03 GuangChen2333

修好了吗是🤔这么好

GuangChen2333 avatar Jul 05 '25 16:07 GuangChen2333

修好了吗是🤔这么好

看 action 是好了,之前好像是 aiohttp的问题

fllesser avatar Jul 06 '25 01:07 fllesser