from scrapy import signals from scrapy.http import HtmlResponse from time import sleep
classScrapymiddleDownloaderMiddleware(object): # Not all methods need to be defined. If a method is not defined, # scrapy acts as if the downloader middleware does not modify the # passed objects.
@classmethod deffrom_crawler(cls, crawler): # This method is used by Scrapy to create your spiders. s = cls() crawler.signals.connect(s.spider_opened, signal=signals.spider_opened) return s
defprocess_request(self, request, spider): # Called for each request that goes through the downloader # middleware.
# Must either: # - return None: continue processing this request # - or return a Response object # - or return a Request object # - or raise IgnoreRequest: process_exception() methods of # installed downloader middleware will be called returnNone
defprocess_exception(self, request, exception, spider): # Called when a download handler or a process_request() # (from other downloader middleware) raises an exception.
# Must either: # - return None: continue processing this exception # - return a Response object: stops process_exception() chain # - return a Request object: stops process_exception() chain pass