更新时间:2022-03-12 09:37:21
# -*- coding: utf-8 -*-
from scrapy.linkextractors import LinkExtractor
from scrapy.spiders import CrawlSpider, Rule
class TopartSpider(CrawlSpider):
name = 'topart'
allowed_domains = ['topart-online.com']
start_urls = ['http://topart-online.com/']
rules = (
Rule(LinkExtractor(allow=r'/a-'), callback='parse_item', follow=True),
)
def parse_item(self, response):
return {'Link': response.url}