根据url自动抓取标题和描述

const Crawler = require('crawler')

/**
 * 获取链接地址的标题和描述
 * 传入ID: 连接地址
 * 返回:url、title、content、thumbnail
 */
exports.crawler = async (req, res) => {
    const c = new Crawler()
    let { id = '' } = req.query
    id = decodeURIComponent(id)

    c.queue([{
        uri: id,
        callback: (error, response, done) => {
            if (error) {
                console.log(error)
            }
            const { $ } = response
            try {
                res.json({
                    url: id,
                    title: $('title').text() || '',
                    content: $('meta[name="description"]').attr('content') || '',
                    thumbnail: '',
                })
            } catch (err) {
                console.log(err);
            }
        },
    }])
}

类似于掘金社区的外链分享,可以根据链接获取文章标题和描述。至于缩略图,考虑到版权问题,最好不要抓取

posted @ 2019-08-07 09:53:18 浏览(90) NodeJS

avatar