Skip to content

Commit

Permalink
Crawling sobre varios dominios.
Browse files Browse the repository at this point in the history
Retorna un json y un data.json por cada dominio.
  • Loading branch information
verena91 committed Sep 8, 2014
1 parent 672bf2d commit a5dada6
Show file tree
Hide file tree
Showing 7 changed files with 1,416 additions and 1,389 deletions.
12 changes: 6 additions & 6 deletions bin/Test.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def call_spider(file):

spider = DataSpider(domains=domains, start_urls=urls)
settings = get_project_settings()
#settings.overrides['FEED_FORMAT'] = 'json'
# settings.overrides['FEED_FORMAT'] = 'json'
#settings.overrides['FEED_URI'] = 'result.json'
crawler = Crawler(settings)
crawler.signals.connect(reactor.stop, signal=signals.spider_closed)
Expand All @@ -44,12 +44,12 @@ def call_spider(file):
log.start(loglevel=log.DEBUG)
reactor.run() # the script will block here

""" Copiar los datos al archivo final """
file_name = domain + ".json"
data_spider.copy_items_to_file(file_name)
""" Copiar los datos a los archivos finales """
data_spider.copy_items_to_files()

""" Convertir el json extraido al json con formato POD """
DataJson.DataJson().convert("items.json", domain)
""" Convertir los archivos json extraidos a json con formato POD """
for domain in domains:
DataJson.DataJson().convert(domain)


results = []
Expand Down
13 changes: 2 additions & 11 deletions bin/aux.html
Original file line number Diff line number Diff line change
@@ -1,17 +1,8 @@
<div>
<div itemtype="http://www.w3.org/2000/01/rdf-schema#Resource" itemscope>
<link itemprop="http://www.w3.org/1999/xhtml/vocab#role" href="http://www.w3.org/1999/xhtml/vocab#button" />
<link itemprop="http://www.w3.org/1999/xhtml/vocab#role" href="http://www.w3.org/1999/xhtml/vocab#checkbox" />
</div>
<div itemtype="http://www.w3.org/2000/01/rdf-schema#Resource" itemscope>
<link itemprop="http://www.w3.org/1999/xhtml/vocab#role" href="http://www.w3.org/1999/xhtml/vocab#navigation" />
</div>
<div itemtype="http://www.w3.org/2000/01/rdf-schema#Resource" itemscope>
<link itemprop="http://www.w3.org/1999/xhtml/vocab#role" href="http://www.w3.org/1999/xhtml/vocab#navigation" />
</div>
<div itemtype="http://www.w3.org/2000/01/rdf-schema#Resource" itemscope>
<link itemprop="http://www.w3.org/1999/xhtml/vocab#role" href="http://www.w3.org/1999/xhtml/vocab#navigation" />
</div>
<div itemtype="http://www.w3.org/2000/01/rdf-schema#Resource" itemscope>
<link itemprop="http://www.w3.org/1999/xhtml/vocab#role" href="http://www.w3.org/1999/xhtml/vocab#navigation" />
<link itemprop="http://www.w3.org/1999/xhtml/vocab#role" href="http://www.w3.org/1999/xhtml/vocab#checkbox" />
</div>
</div>
Loading

0 comments on commit a5dada6

Please sign in to comment.