From 06b8c56655869600ffd194a50c6fe46bd88232b9 Mon Sep 17 00:00:00 2001 From: pchjia Date: Wed, 13 Jan 2016 12:05:57 +0800 Subject: [PATCH] =?UTF-8?q?=E5=9C=A8=E2=80=9CFollowing=20links=E2=80=9D=20?= =?UTF-8?q?=E8=BF=99=E4=B8=80=E8=8A=82=E6=9C=89=E4=B8=AA=E5=B0=8F=E9=94=99?= =?UTF-8?q?=E8=AF=AF?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 文档的response.urljoin有两个参数, 但是第一个参数是Response类的引用, 不能在类外使用,查看文档后得出此处的正确写法为response.urljoin(href.extract()) 以下是文章内容引用: def parse(self, response): for href in response.css("ul.directory.dir-col > li > a::attr('href')"): url = response.urljoin(response.url, href.extract()) yield scrapy.Request(url, callback=self.parse_dir_contents) class Response(object_ref): def urljoin(self, url): """Join this Response's url with a possible relative url to form an absolute interpretation of the latter.""" return urljoin(self.url, url) --- intro/tutorial.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/intro/tutorial.rst b/intro/tutorial.rst index fe7a8d4..668d1fd 100644 --- a/intro/tutorial.rst +++ b/intro/tutorial.rst @@ -401,7 +401,7 @@ Here is a modification to our spider that does just that:: def parse(self, response): for href in response.css("ul.directory.dir-col > li > a::attr('href')"): - url = response.urljoin(response.url, href.extract()) + url = response.urljoin(href.extract()) yield scrapy.Request(url, callback=self.parse_dir_contents) def parse_dir_contents(self, response):