你的位置:首页 > Java教程

[Java教程]nodejs 小爬虫


编写爬虫示例:

var http = require('http');var cheerio = require('cheerio');var url = 'http://www.cnblogs.com/tianxintian22/';function filterblogs(html) {  var $ = cheerio.load(html);  var blogs = $('.day');  // [{  //   dayTitle: '',  //  dayCont: {  //    postId: '',  //     postTitle: '',  //     postCont: ''  //  }  // }]  var blogDatas = [];  blogs.each(function(item) {    var blog = $(this);    var dayTitle = blog.find('.dayTitle a').text();    var blogData = {      dayTitle: dayTitle,      dayCont: []    };    var postId = blog.find('.postCon .c_b_p_desc a').attr('href').split('p/')[1].replace('.html', '');    var postTitle = blog.find('.postTitle a').text();    var postCont = blog.find('.postCon .c_b_p_desc').text();    blogData.dayCont.push({      postId: postId,      postTitle: postTitle,      postCont: postCont    });    blogDatas.push(blogData);  })  return blogDatas;}function printBlogInfo(blogDatas) {  blogDatas.forEach(function(item) {    var dayTitle = item.dayTitle;    console.log(dayTitle + '\n');    item.dayCont.forEach(function(blog){      console.log('  【' + blog.postId + '】' + blog.postTitle +'\n');      console.log('  ' + blog.postCont + '\n');    });  })}http.get(url, function (res) {  var html = '';  res.on('data', function(data) {    html += data;  });  res.on('end', function() {    var blogDatas = filterblogs(html);    printBlogInfo(blogDatas);  });}).on('error', function() {  console.log('获取博客数据出错');})