From 06b8c56655869600ffd194a50c6fe46bd88232b9 Mon Sep 17 00:00:00 2001
From: pchjia <pchjia@sina.cn>
Date: Wed, 13 Jan 2016 12:05:57 +0800
Subject: [PATCH] =?UTF-8?q?=E5=9C=A8=E2=80=9CFollowing=20links=E2=80=9D=20?=
 =?UTF-8?q?=E8=BF=99=E4=B8=80=E8=8A=82=E6=9C=89=E4=B8=AA=E5=B0=8F=E9=94=99?=
 =?UTF-8?q?=E8=AF=AF?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

文档的response.urljoin有两个参数， 但是第一个参数是Response类的引用， 不能在类外使用，查看文档后得出此处的正确写法为response.urljoin(href.extract())

以下是文章内容引用： def parse(self, response):

    for href in response.css("ul.directory.dir-col > li > a::attr('href')"):
        url = response.urljoin(response.url, href.extract())
        yield scrapy.Request(url, callback=self.parse_dir_contents)

class Response(object_ref):
    def urljoin(self, url):
        """Join this Response's url with a possible relative url to form an
        absolute interpretation of the latter."""
        return urljoin(self.url, url)
---
 intro/tutorial.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/intro/tutorial.rst b/intro/tutorial.rst
index fe7a8d4..668d1fd 100644
--- a/intro/tutorial.rst
+++ b/intro/tutorial.rst
@@ -401,7 +401,7 @@ Here is a modification to our spider that does just that::
 
         def parse(self, response):
             for href in response.css("ul.directory.dir-col > li > a::attr('href')"):
-                url = response.urljoin(response.url, href.extract())
+                url = response.urljoin(href.extract())
                 yield scrapy.Request(url, callback=self.parse_dir_contents)
 
         def parse_dir_contents(self, response):