且构网

分享程序员开发的那些事...
且构网 - 分享程序员编程开发的那些事

Scrapy Spider 无限爬行

更新时间:2022-03-12 09:37:21

# -*- coding: utf-8 -*-
from scrapy.linkextractors import LinkExtractor
from scrapy.spiders import CrawlSpider, Rule


class TopartSpider(CrawlSpider):
    name = 'topart'
    allowed_domains = ['topart-online.com']
    start_urls = ['http://topart-online.com/']

    rules = (
        Rule(LinkExtractor(allow=r'/a-'), callback='parse_item', follow=True),
    )

    def parse_item(self, response):
        return {'Link': response.url}