Cambios en settings-example.py

camilobaezcamba · Sep 15, 2014 · 99cd273 · 99cd273
1 parent 14c4055
commit 99cd273
Show file tree

Hide file tree

Showing 5 changed files with 11 additions and 7 deletions.
diff --git a/bin/DataCrawler.py b/bin/DataCrawler.py
@@ -31,7 +31,7 @@ def main(file):
     created_files = call_spider(file)
     # Finalizar splash
     # p.terminate()
-    import_to_ckan(created_files)
+    # import_to_ckan(created_files)
 
 
 def call_spider(file):

diff --git a/crawler/data_json.py b/crawler/data_json.py
@@ -103,7 +103,7 @@ def convert(self, domain):
                                          'description': property["description"][0],
                                          'contactName': "",
                                          'mbox': "",
-                                         'keywords': keywords_catalog,
+                                         'keyword': keywords_catalog,
                                          'accessLevel': "public",
                                          'publisher': ""})
 

diff --git a/crawler/file_controller.py b/crawler/file_controller.py
@@ -40,7 +40,7 @@ def save_existing_data_json(self, response, domain, to_json):
         if not os.path.exists(subprincipal):
             os.makedirs(subprincipal)
         filename = subprincipal + "/" + "data.json"
-        file_response = codecs.open(filename, 'w+', 'utf-8-sig')
+        file_response = codecs.open(filename, 'wb', 'utf-8-sig')
         if to_json == True:
             file_response.write(json.dumps(response.json(), indent=2, ensure_ascii=False))
         else:

diff --git a/crawler/settings.py → crawler/settings-example.py b/crawler/settings.py → crawler/settings-example.py
@@ -10,8 +10,10 @@
 LOG_LEVEL = 'INFO'
 COOKIES_ENABLED = False
 LOG_FILE = 'datacrowler.log'
-SPLASH_URL = 'http://192.168.200.3:8050/render.html?url='
-API_KEY = ""
+# Especificar aqui la ubicacion donde se levanta el servidor splash
+SPLASH_URL = 'http://localhost:8050/render.html?url='
+# Especificar aqui la API Key del catalago
+API_KEY = "xxxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"
 #DEPTH_LIMIT = 1
 
 # Crawl responsibly by identifying yourself (and your website) on the user-agent

diff --git a/importer/rest.py b/importer/rest.py
@@ -4,13 +4,15 @@
 import os
 import sys
 from zipfile import ZipFile
+from scrapy.utils.project import get_project_settings
 from model import DataEntry, CkanDataset
 
 
 class CKANImporter(object):
 
     def __init__(self):
-        self.headers = {'Authorization': 'xxxxx', 'Content-type':'application/json'}
+        settings = get_project_settings()
+        self.headers = {'Authorization': settings['API_KEY'], 'Content-type':'application/json'}
         self.base_url = 'http://www.datos.gov.py/api/3/action/'
 
     def import_package(self, filename, modalidad):
@@ -100,4 +102,4 @@ def get_organization_id(self, org_name):
     sys.setdefaultencoding("utf-8")
     importer = CKANImporter()
     #Para pruebas sin ejecutar el crawler
-    importer.import_package('/home/desa4/workspace/DataCrawler/results_12_09_14/datos.mec.gov.py/data.json', 'data-hunting')
+    importer.import_package('data.json', 'data-hunting')