node-tutorial
node-tutorial copied to clipboard
爬虫
fetch.js
var http = require("http");
// Utility function that downloads a URL and invokes
// callback with the data.
function download(url, callback) {
http.get(url, function(res) {
var data = "";
res.on('data', function (chunk) {
data += chunk;
});
res.on("end", function() {
callback(data);
});
}).on("error", function() {
callback(null);
});
}
exports.download = download;
catch.js
安装choorio,抓取页面信息,引入上面写好的fetch模块
var cheerio = require("cheerio");
var server = require("./fetch");
var url = "http://image.baidu.com/search/index?tn=baiduimage&ps=1&ct=201326592&lm=-1&cl=2&nc=1&ie=utf-8&word=%E8%B6%B3%E7%90%83"
server.download(url, function(data) {
if(data) {
//console.log(data);
var $ = cheerio.load(data);
//调用 .each(function(index, element))函数来遍历每一个对象,返回的是HTML DOM Elements
/*$("a").each(function(index, element) {
console.log("第" + index + "个:" + $(element).attr("href"));
});*/
$("img").each(function(index,element){
console.log($(element));
});
/*$("p").each(function(index, element) {
console.log("第" + index + "个:" + $(element).val());
});*/
console.log("检索完毕");
} else {
console.log("检索出错");
}
});
downloadImg.js
获取地址后下载图片
var http = require("http");
var fs = require("fs");
var server = http.createServer(function(req, res) {}).listen(50082);
console.log("http start");
var url = "http://s0.hao123img.com/res/img/logo/logonew.png";
http.get(url, function(res) {
var imgData = "";
res.setEncoding("binary"); //一定要设置response的编码为binary否则会下载下来的图片打不开
res.on("data", function(chunk) {
imgData += chunk;
});
res.on("end", function() {
fs.writeFile("./logonew.png", imgData, "binary", function(err) {
if(err) {
console.log("down fail");
}
console.log("down success");
});
});
});
批量下载图片 DEMO1
var http = require("http");
var https = require("https");
var cheerio = require("cheerio");
var fs = require('fs');
function download(url, callback) {
http.get(url, function(res) {
var data = "";
res.on('data', function(chunk) {
data += chunk;
});
res.on("end", function() {
callback(data);
});
}).on("error", function() {
callback(null);
});
}
var imgArr = [];
download('http://www.mzitu.com/share/comment-page-1',
function(data) {
var $ = cheerio.load(data);
$(data).find("img").each(function(i, e) {
console.log("第" + (i + 1) + "个:" + $(e).attr("src"));
imgArr.push($(e).attr("src"))
})
downloadImg(imgArr);
}
);
function downloadImg(resource) {
resource.forEach(function(src, idx) {
var num = 1;
var filename = src.substring(src.lastIndexOf('/') + 1);
var writestream = fs.createWriteStream("image/" + filename);
http.get(src, function(res) {
res.pipe(writestream);
});
writestream.on('finish', function() {
console.log('page: ' + num + filename);
});
})
}
批量下载图片 DEMO2
var http = require('http');
var fs = require('fs');
function Mzitu(options) {
this.id = 1;
this.initialize.call(this, options);
return this;
}
Mzitu.prototype = {
constructor: Mzitu,
initialize: function _initialize(options) {
this.baseUrl = options.baseUrl;
this.dir = options.dir || '';
this.reg = options.reg;
this.total = options.total;
this.page = options.from || 1;
},
start: function _start() {
this.getPage();
},
getPage: function _getPage() {
var self = this,
data = null;
if(this.page <= this.total) {
http.get(this.baseUrl + this.page, function(res) {
res.setEncoding("utf8");
res.on('data', function(chunk) {
data += chunk;
}).on('end', function() {
self.parseData(data);
});
});
}
},
parseData: function _parseData(data) {
var res = [],
match;
while((match = this.reg.exec(data)) != null) {
res.push(match[1]);
}
this.download(res);
},
download: function _download(resource) {
var self = this,
currentPage = self.page;
resource.forEach(function(src, idx) {
var filename = src.substring(src.lastIndexOf('/') + 1),
writestream = fs.createWriteStream(self.dir + filename);
http.get(src, function(res) {
res.pipe(writestream);
});
writestream.on('finish', function() {
console.log('page: ' + currentPage + ' id: ' + self.id++ + ' download: ' + filename);
});
});
self.page++;
self.getPage();
}
};
var mzitu = new Mzitu({
baseUrl: 'http://www.mzitu.com/share/comment-page-',
dir: 'meizi',
reg: /<img\s*src="(.*?)"\s*alt=".*"\s*\/>/g,
total: 141,
from: 1
});
mzitu.start();
mark , 佛曰,色即是空,空即是色, 哇哈哈!!!~ 题曰: ‘怎么爬接口’、~~~
我爬呀爬呀爬@哈哈
只能默默的看你们爬O__O "…
安装cheerio,not安装choorio QAQ
酷狗批量下载音乐
参考1
var request = require('request');
var cheerio = require('cheerio');
var fs = require('fs');
request('http://www.kugou.com/yy/singer/home/3060.html', function(error, response, body) {
//console.log(body)
var $ = cheerio.load(body);
var arr = $('.song_hid');
//console.log(arr);
for(var num = 0; num < arr.length; num = num + 1) {
console.log($(arr[num]).attr("value"));
//计算字符串的长度
console.log($(arr[num]).attr("value").length);
var length = $(arr[num]).attr("value").length;
//找出第一个|的位置在哪里
console.log($(arr[num]).attr("value").indexOf("|"));
var index = $(arr[num]).attr("value").indexOf("|");
console.log($(arr[num]).attr("value").substring(index + 1, length - 7))
var hash = $(arr[num]).attr("value").substring(index + 1, length - 7);
request('http://www.kugou.com/yy/index.php?r=play/getdata&hash=' + hash, function(error, response, body) {
console.log(JSON.parse(body).data.play_url);
var mp3 = JSON.parse(body).data.play_url;
var audio_name = JSON.parse(body).data.audio_name;
request(mp3).pipe(fs.createWriteStream(audio_name + '.mp3'));
});
}
})
参考2
//1.爬取歌手网页
//2.分析网页,并获取该歌手所有歌曲的id
//3.根据id来拼接url,获取歌曲的下载地址
//4.执行下载
var request = require("request");
var cheerio = require("cheerio");
var fs = require("fs");
var mysql = require('mysql');
var connection = mysql.createConnection({
host: 'localhost',
user: 'wscats',
password: '123456',
database: 'kugou'
});
connection.connect(); //进行连接
request("http://www.kugou.com/singer/3060.html", (err, res, body) => {
//console.log(body)
var $ = cheerio.load(body);
var arr = $(".song_hid");
$(".song_hid").each(function(i, e) {
console.log($(e).attr("value").split("|")[1]);
var link = $(e).attr("value").split("|")[1];
var name = $(e).attr("value").split("|")[0];
request(`http://wwwapi.kugou.com/yy/index.php?r=play/getdata&hash=${link}`, function(err, res, body) {
if(body) {
var url = JSON.parse(body).data.play_url;
console.log(url);
connection.query('INSERT INTO song SET ?', {
name: name,
url: url
}, function(error, results, fields) {
if(error) throw error;
console.log(results);
});
//connection.end();
}
//request(mp3).pipe(fs.createWriteStream(name + '.mp3'));
})
})
})
API
http://tingapi.ting.baidu.com/v1/restserver/ting
列表:
http://tingapi.ting.baidu.com/v1/restserver/ting?method=baidu.ting.billboard.billList&type=1&size=10&offset=0
参数:
- type = 1-新歌榜,2-热歌榜,11-摇滚榜,12-爵士,16-流行,21-欧美金曲榜,22-经典老歌榜,23-情歌对唱榜,24-影视金曲榜,25-网络歌曲榜
- size = 10 //返回条目数量
- offset = 0 //获取偏移
搜索
http://tingapi.ting.baidu.com/v1/restserver/ting?method=baidu.ting.search.catalogSug&query=海阔天空
参数:
- query = '' //搜索关键字
播放
http://tingapi.ting.baidu.com/v1/restserver/ting?method=baidu.ting.song.play&songid=877578
http://tingapi.ting.baidu.com/v1/restserver/ting?method=baidu.ting.song.playAAC&songid=877578
参数:
- songid = 877578 //歌曲id
歌词
http://tingapi.ting.baidu.com/v1/restserver/ting?method=baidu.ting.song.lry&songid=877578
参数:
- songid = 877578 //歌曲id
推荐列表
http://tingapi.ting.baidu.com/v1/restserver/ting?method=baidu.ting.song.getRecommandSongList&song_id=877578&num=5
参数:
- song_id = 877578 //歌曲id
- num = 5 //返回条目数量
下载
http://tingapi.ting.baidu.com/v1/restserver/ting?method=baidu.ting.song.downWeb&songid=877578&bit=24&_t=1393123213
参数:
- songid = 877578 //歌曲id
- bit = 24,64,128,192,256,320,flac //码率
- _t = 1393123213 //时间戳
获取歌手信息
http://tingapi.ting.baidu.com/v1/restserver/ting?method=baidu.ting.artist.getInfo&tinguid=877578
参数:
- tinguid = 877578 //歌手id
获取歌手歌曲列表
http://tingapi.ting.baidu.com/v1/restserver/ting?method=baidu.ting.artist.getSongList&tinguid=877578&limits=6&use_cluster=1&order=2
参数:
- tinguid = 877578 //歌手id
- limits = 6 //返回条目数量
处理lrc格式的切词的方法
parseLyric(lrc) {
var lyrics = lrc.split("\n");
var lrcObj = {};
for (var i = 0; i < lyrics.length; i++) {
var lyric = decodeURIComponent(lyrics[i]);
var timeReg = /\[\d*:\d*((\.|\:)\d*)*\]/g;
var timeRegExpArr = lyric.match(timeReg);
if (!timeRegExpArr) continue;
var clause = lyric.replace(timeReg, '');
for (var k = 0, h = timeRegExpArr.length; k < h; k++) {
var t = timeRegExpArr[k];
var min = Number(String(t.match(/\[\d*/i)).slice(1)),
sec = Number(String(t.match(/\:\d*/i)).slice(1));
var time = min * 60 + sec;
lrcObj[time] = clause;
}
}
return lrcObj;
}
小程序歌词轮播
视图
<view>
<view>{{year}}</view>
<picker-view indicator-style="height: 50px;" style="width: 100%; height: 300px;" value="{{value}}" bindchange="bindChange">
<picker-view-column>
<view wx:for="{{years}}" style="line-height: 50px">{{item}}</view>
</picker-view-column>
</picker-view>
</view>
逻辑
const date = new Date()
const years = []
for (let i = 1990; i <= date.getFullYear(); i++) {
years.push(i)
}
Page({
data: {
years: years,
year: date.getFullYear(),
value: [2],//更改这个参数,让歌词滚动
},
onReady(){
},
bindChange: function (e) {
console.log(e.detail.value)
const val = e.detail.value
this.setData({
year: this.data.years[val[0]],
})
}
})
https://documenter.getpostman.com/view/5326062/RzfgpVeV#67c3965b-ce04-4077-aa2d-e22cd2f343c4