node-tutorial icon indicating copy to clipboard operation
node-tutorial copied to clipboard

爬虫

Open Wscats opened this issue 8 years ago • 9 comments

fetch.js

var http = require("http");

// Utility function that downloads a URL and invokes
// callback with the data.
function download(url, callback) {
  http.get(url, function(res) {
    var data = "";
    res.on('data', function (chunk) {
      data += chunk;
    });
    res.on("end", function() {
      callback(data);
    });
  }).on("error", function() {
    callback(null);
  });
}

exports.download = download;

catch.js

安装choorio,抓取页面信息,引入上面写好的fetch模块

var cheerio = require("cheerio");
var server = require("./fetch");

var url = "http://image.baidu.com/search/index?tn=baiduimage&ps=1&ct=201326592&lm=-1&cl=2&nc=1&ie=utf-8&word=%E8%B6%B3%E7%90%83"

server.download(url, function(data) {
    if(data) {
        //console.log(data);
        var $ = cheerio.load(data);
        //调用 .each(function(index, element))函数来遍历每一个对象,返回的是HTML DOM Elements
        /*$("a").each(function(index, element) {
            console.log("第" + index + "个:" + $(element).attr("href"));
        });*/
        $("img").each(function(index,element){
            console.log($(element));
        });
        /*$("p").each(function(index, element) {
            console.log("第" + index + "个:" + $(element).val());
        });*/
        console.log("检索完毕");
    } else {
        console.log("检索出错");
    }
});

downloadImg.js

获取地址后下载图片

var http = require("http");
var fs = require("fs");
var server = http.createServer(function(req, res) {}).listen(50082);
console.log("http start");
var url = "http://s0.hao123img.com/res/img/logo/logonew.png";
http.get(url, function(res) {
    var imgData = "";
    res.setEncoding("binary"); //一定要设置response的编码为binary否则会下载下来的图片打不开
    res.on("data", function(chunk) {
        imgData += chunk;
    });
    res.on("end", function() {
        fs.writeFile("./logonew.png", imgData, "binary", function(err) {
            if(err) {
                console.log("down fail");
            }
            console.log("down success");
        });
    });
});

Wscats avatar Aug 06 '16 05:08 Wscats

批量下载图片 DEMO1

var http = require("http");
var https = require("https");
var cheerio = require("cheerio");
var fs = require('fs');

function download(url, callback) {
    http.get(url, function(res) {
        var data = "";
        res.on('data', function(chunk) {
            data += chunk;
        });
        res.on("end", function() {
            callback(data);
        });
    }).on("error", function() {
        callback(null);
    });
}

var imgArr = [];
download('http://www.mzitu.com/share/comment-page-1',
    function(data) {
        var $ = cheerio.load(data);
        $(data).find("img").each(function(i, e) {
            console.log("第" + (i + 1) + "个:" + $(e).attr("src"));
            imgArr.push($(e).attr("src"))
        })
        downloadImg(imgArr);
    }
);

function downloadImg(resource) {
    resource.forEach(function(src, idx) {
        var num = 1;
        var filename = src.substring(src.lastIndexOf('/') + 1);
            var writestream = fs.createWriteStream("image/" + filename);
        http.get(src, function(res) {
            res.pipe(writestream);
        });
        writestream.on('finish', function() {
            console.log('page: ' + num + filename);
        });
    })
}

Wscats avatar Sep 04 '16 15:09 Wscats

批量下载图片 DEMO2

var http = require('http');
var fs = require('fs');

function Mzitu(options) {
    this.id = 1;
    this.initialize.call(this, options);
    return this;
}

Mzitu.prototype = {
    constructor: Mzitu,
    initialize: function _initialize(options) {
        this.baseUrl = options.baseUrl;
        this.dir = options.dir || '';
        this.reg = options.reg;
        this.total = options.total;
        this.page = options.from || 1;
    },
    start: function _start() {
        this.getPage();
    },
    getPage: function _getPage() {
        var self = this,
            data = null;

        if(this.page <= this.total) {
            http.get(this.baseUrl + this.page, function(res) {
                res.setEncoding("utf8");
                res.on('data', function(chunk) {
                    data += chunk;
                }).on('end', function() {
                    self.parseData(data);
                });
            });
        }
    },
    parseData: function _parseData(data) {
        var res = [],
            match;

        while((match = this.reg.exec(data)) != null) {
            res.push(match[1]);
        }

        this.download(res);
    },
    download: function _download(resource) {
        var self = this,
            currentPage = self.page;

        resource.forEach(function(src, idx) {
            var filename = src.substring(src.lastIndexOf('/') + 1),
                writestream = fs.createWriteStream(self.dir + filename);

            http.get(src, function(res) {
                res.pipe(writestream);
            });

            writestream.on('finish', function() {
                console.log('page: ' + currentPage + ' id: ' + self.id++ + ' download: ' + filename);
            });
        });

        self.page++;
        self.getPage();
    }
};

var mzitu = new Mzitu({
    baseUrl: 'http://www.mzitu.com/share/comment-page-',
    dir: 'meizi',
    reg: /<img\s*src="(.*?)"\s*alt=".*"\s*\/>/g,
    total: 141,
    from: 1
});

mzitu.start();

Wscats avatar Sep 04 '16 15:09 Wscats

mark , 佛曰,色即是空,空即是色, 哇哈哈!!!~ 题曰: ‘怎么爬接口’、~~~

chenkelvin avatar Sep 05 '16 03:09 chenkelvin

我爬呀爬呀爬@哈哈

Stragiht avatar Sep 21 '16 10:09 Stragiht

只能默默的看你们爬O__O "…

qfliailian avatar Oct 13 '16 09:10 qfliailian

安装cheerio,not安装choorio QAQ

meow4world avatar Mar 07 '17 12:03 meow4world

酷狗批量下载音乐

参考1

var request = require('request');
var cheerio = require('cheerio');
var fs = require('fs');
request('http://www.kugou.com/yy/singer/home/3060.html', function(error, response, body) {
	//console.log(body)
	var $ = cheerio.load(body);
	var arr = $('.song_hid');
	//console.log(arr);
	for(var num = 0; num < arr.length; num = num + 1) {
		console.log($(arr[num]).attr("value"));
		//计算字符串的长度
		console.log($(arr[num]).attr("value").length);
		var length = $(arr[num]).attr("value").length;
		//找出第一个|的位置在哪里
		console.log($(arr[num]).attr("value").indexOf("|"));
		var index = $(arr[num]).attr("value").indexOf("|");
		console.log($(arr[num]).attr("value").substring(index + 1, length - 7))
		var hash = $(arr[num]).attr("value").substring(index + 1, length - 7);
		request('http://www.kugou.com/yy/index.php?r=play/getdata&hash=' + hash, function(error, response, body) {
			console.log(JSON.parse(body).data.play_url);
			var mp3 = JSON.parse(body).data.play_url;
			var audio_name = JSON.parse(body).data.audio_name;
			request(mp3).pipe(fs.createWriteStream(audio_name + '.mp3'));
		});
	}
})

参考2

//1.爬取歌手网页
//2.分析网页,并获取该歌手所有歌曲的id
//3.根据id来拼接url,获取歌曲的下载地址
//4.执行下载
var request = require("request");
var cheerio = require("cheerio");
var fs = require("fs");
var mysql = require('mysql');
var connection = mysql.createConnection({
	host: 'localhost',
	user: 'wscats',
	password: '123456',
	database: 'kugou'
});
connection.connect(); //进行连接
request("http://www.kugou.com/singer/3060.html", (err, res, body) => {
	//console.log(body)
	var $ = cheerio.load(body);
	var arr = $(".song_hid");
	$(".song_hid").each(function(i, e) {
		console.log($(e).attr("value").split("|")[1]);
		var link = $(e).attr("value").split("|")[1];
		var name = $(e).attr("value").split("|")[0];
		request(`http://wwwapi.kugou.com/yy/index.php?r=play/getdata&hash=${link}`, function(err, res, body) {
			if(body) {
				var url = JSON.parse(body).data.play_url;
				console.log(url);	
				connection.query('INSERT INTO song SET ?', {
					name: name,
					url: url
				}, function(error, results, fields) {
					if(error) throw error;
					console.log(results);
				});
				//connection.end();
			}
			//request(mp3).pipe(fs.createWriteStream(name + '.mp3'));
		})
	})
})

Wscats avatar Jul 02 '18 03:07 Wscats

API

http://tingapi.ting.baidu.com/v1/restserver/ting

列表:

http://tingapi.ting.baidu.com/v1/restserver/ting?method=baidu.ting.billboard.billList&type=1&size=10&offset=0

参数:

  • type = 1-新歌榜,2-热歌榜,11-摇滚榜,12-爵士,16-流行,21-欧美金曲榜,22-经典老歌榜,23-情歌对唱榜,24-影视金曲榜,25-网络歌曲榜
  • size = 10 //返回条目数量
  • offset = 0 //获取偏移

搜索

http://tingapi.ting.baidu.com/v1/restserver/ting?method=baidu.ting.search.catalogSug&query=海阔天空

参数:

  • query = '' //搜索关键字

播放

http://tingapi.ting.baidu.com/v1/restserver/ting?method=baidu.ting.song.play&songid=877578

http://tingapi.ting.baidu.com/v1/restserver/ting?method=baidu.ting.song.playAAC&songid=877578

参数:

  • songid = 877578 //歌曲id

歌词

http://tingapi.ting.baidu.com/v1/restserver/ting?method=baidu.ting.song.lry&songid=877578

参数:

  • songid = 877578 //歌曲id

推荐列表

http://tingapi.ting.baidu.com/v1/restserver/ting?method=baidu.ting.song.getRecommandSongList&song_id=877578&num=5

参数:

  • song_id = 877578 //歌曲id
  • num = 5 //返回条目数量

下载

http://tingapi.ting.baidu.com/v1/restserver/ting?method=baidu.ting.song.downWeb&songid=877578&bit=24&_t=1393123213

参数:

  • songid = 877578 //歌曲id
  • bit = 24,64,128,192,256,320,flac //码率
  • _t = 1393123213 //时间戳

获取歌手信息

http://tingapi.ting.baidu.com/v1/restserver/ting?method=baidu.ting.artist.getInfo&tinguid=877578

参数:

  • tinguid = 877578 //歌手id

获取歌手歌曲列表

http://tingapi.ting.baidu.com/v1/restserver/ting?method=baidu.ting.artist.getSongList&tinguid=877578&limits=6&use_cluster=1&order=2

参数:

  • tinguid = 877578 //歌手id
  • limits = 6 //返回条目数量

处理lrc格式的切词的方法

parseLyric(lrc) {
  var lyrics = lrc.split("\n");
  var lrcObj = {};
  for (var i = 0; i < lyrics.length; i++) {
    var lyric = decodeURIComponent(lyrics[i]);
    var timeReg = /\[\d*:\d*((\.|\:)\d*)*\]/g;
    var timeRegExpArr = lyric.match(timeReg);
    if (!timeRegExpArr) continue;
    var clause = lyric.replace(timeReg, '');
    for (var k = 0, h = timeRegExpArr.length; k < h; k++) {
      var t = timeRegExpArr[k];
      var min = Number(String(t.match(/\[\d*/i)).slice(1)),
        sec = Number(String(t.match(/\:\d*/i)).slice(1));
      var time = min * 60 + sec;
      lrcObj[time] = clause;
    }
  }
  return lrcObj;
}

小程序歌词轮播

视图

<view>
  <view>{{year}}</view>
  <picker-view indicator-style="height: 50px;" style="width: 100%; height: 300px;" value="{{value}}" bindchange="bindChange">
    <picker-view-column>
      <view wx:for="{{years}}" style="line-height: 50px">{{item}}</view>
    </picker-view-column>
  </picker-view>
</view>

逻辑

const date = new Date()
const years = []

for (let i = 1990; i <= date.getFullYear(); i++) {
  years.push(i)
}

Page({
  data: {
    years: years,
    year: date.getFullYear(),
    value: [2],//更改这个参数,让歌词滚动
  },
  onReady(){

  },
  bindChange: function (e) {
    console.log(e.detail.value)
    const val = e.detail.value
    this.setData({
      year: this.data.years[val[0]],
    })
  }
})

Wscats avatar Oct 24 '18 02:10 Wscats

https://documenter.getpostman.com/view/5326062/RzfgpVeV#67c3965b-ce04-4077-aa2d-e22cd2f343c4

Wscats avatar Dec 21 '18 03:12 Wscats