bilibili-API-collect
bilibili-API-collect copied to clipboard
ai字幕api
类似这种 https://aisubtitle.hdslb.com/bfs/subtitle/1cc78982172c6892257eb955d3feef80f2d1560c.json?auth_key=1686750150-89eafc1c901d478eb27b42bc0e92cdb1-0-68bfca61a62dffd3d6eb00b149a4125d ,不太会抓
你的源访问不了。你说的字幕是这个接口吗?https://api.bilibili.com/x/web-interface/view
我试了一下 , @kiddkyd2给的接口是对的。但是这个接口似乎要放SESSDATA,我尝试以Guest Mode打开视频但自动字幕的选项没有跳出来。所以我认为自动字幕这个功能只有登录才会显示,也有可能是我技术不行 :D
以下是代码(nodejs)
const axios = require("axios");
//const md5 = require("md5");
const cheerio = require("cheerio");
const fs = require("fs");
const SESSDATA = "SESSDATA=aaaaaaaa%aaaaaaaaaaaa%aaaaaaa%"+fs.readFileSync("./last_part.txt",{encoding:"utf8"});
var urls = [
"https://www.bilibili.com/video/BV1Rj411m7Um/?spm_id_from=333.1007.tianma.1-1-1.click",
"https://www.bilibili.com/video/BV1MX4y1v7dq/?spm_id_from=333.1007.tianma.1-2-2.click",
"https://www.bilibili.com/video/BV12g4y1A7Hz/?spm_id_from=333.1007.tianma.2-1-3.click",
"https://www.bilibili.com/video/BV1hk4y1N7UX/?spm_id_from=333.1007.tianma.2-2-4.click",
"https://www.bilibili.com/video/BV1bu411h7jp/?spm_id_from=333.1007.tianma.3-2-6.click",
"https://www.bilibili.com/video/BV1NP411v7Uc/?spm_id_from=333.1007.tianma.3-1-5.click",
"https://www.bilibili.com/video/BV1BV411g7LH/?spm_id_from=333.1007.tianma.4-2-12.click",
"https://www.bilibili.com/video/BV1Mg4y1A7aw/?spm_id_from=333.1007.tianma.1-2-2.click",
"https://www.bilibili.com/video/BV19X4y1n7Qd/?spm_id_from=333.1007.tianma.2-1-3.click",
"https://www.bilibili.com/video/BV1Lm4y1Y74H/?spm_id_from=333.1007.tianma.4-2-12.click",
"https://www.bilibili.com/video/BV1Uk4y1N7A2/?spm_id_from=333.1007.tianma.5-3-16.click",
];
main();
async function fetchAid(video_url) {
var final_id = [];
await axios.get(video_url).then((res) => {
var $ = cheerio.load(res.data);
var data = $("script").text();
var position = data.indexOf("aid");
position += 5; //skip a,i,d,",:
while(true) {
var text = data[position];
if(text === ",") break;
else final_id.push(text);
++position;
};
}).catch((err) => {
return err;
})
final_id = final_id.join("");
return final_id;
}
/* 不需要w_rid and wts
function webid(aid) {
var wts = Math.round(Date.now()/1e3);
var params = `aid=${aid}&wts=${wts}`;
var w_rid = md5(params);
return [wts,w_rid];
}*/
async function main() {
for(var loop=0 ; loop<urls.length ; ++loop) {
var url = urls[loop];
var aid = await fetchAid(url);
//var web_params = webid(aid);
//console.log(web_params);
var options = {
method:'get',
url:`https://api.bilibili.com/x/web-interface/wbi/view?aid=${aid}`,//&w_rid=${web_params[1]}&wts=${web_params[0]}`,
headers: {
'Cookie': SESSDATA,
'User-Agent':'Mozilla/5.0',
},
};
await axios.request(options).then(async (result) => {
var code = result.code;
if(code != -404) {
console.log(result.data);
var subtitle_url = result.data.data.subtitle.list[0].subtitle_url;
console.log(subtitle_url);
await axios.get(subtitle_url,{
headers: {
'User-Agent':'Mozilla/5.0',
}
}).then((result) => {
console.log(result.data.body);
}).catch((err) => {
console.log(err);
})
}
else console.log("No video found!");
}).catch((err) => {
console.log(err);
});
console.log(loop);
}
}
~~然后关于SESSDATA,我测试了一下发现如果把SESSDATA以%分开,我们会拿到四个部分。~~
~~第一部分:是可以随便乱写的,只要8个字母就可以了~~ ~~第二部分(12字母) 和 第三部分(7个字母):只要前面两个字母是从(2-9)或者(a-f),然后后面乱写都是可以的~~ ~~只有第四部分要用原本的~~
~~我不确定这样做会不会被封号,毕竟SESSDATA是要登录才有的。我的SESSDATA的第4部分是原本的,所以我也不知道这样做还会不会被检测到。希望有大佬能够提供更好的方法~~
(Edit) 更正: 抱歉这个接口不需要w_rid 和 wts, 不好意思
(Edit 2)
在测试https://api.bilibili.com/x/player/wbi/v2
这个接口时,发现即使改前部分的SESSDATA,bibilibili依旧能够检测到
今天发现,https://api.bilibili.com/x/web-interface/view 这个接口废了,subtitle是null了, 只能用https://api.bilibili.com/x/player/wbi/v2 这个接口
补充一下,https://api.bilibili.com/x/player/v2 也能拿
Ai字幕需要session才能拿到,上传字幕不需要 但session状态下,获取太快会被ban
Ai字幕需要session才能拿到,上传字幕不需要 但session状态下,获取太快会被ban
大佬知道大概怎样的获取速度是安全的吗?