From 99cd2734a677254fba2fa8f32e072c551480cfee Mon Sep 17 00:00:00 2001
From: verena91 <vereojeda@gmail.com>
Date: Mon, 15 Sep 2014 15:25:20 -0400
Subject: [PATCH] Cambios en settings-example.py

---
 bin/DataCrawler.py                           | 2 +-
 crawler/data_json.py                         | 2 +-
 crawler/file_controller.py                   | 2 +-
 crawler/{settings.py => settings-example.py} | 6 ++++--
 importer/rest.py                             | 6 ++++--
 5 files changed, 11 insertions(+), 7 deletions(-)
 rename crawler/{settings.py => settings-example.py} (68%)

diff --git a/bin/DataCrawler.py b/bin/DataCrawler.py
index c2ad27a..9c676c9 100644
--- a/bin/DataCrawler.py
+++ b/bin/DataCrawler.py
@@ -31,7 +31,7 @@ def main(file):
     created_files = call_spider(file)
     # Finalizar splash
     # p.terminate()
-    import_to_ckan(created_files)
+    # import_to_ckan(created_files)
 
 
 def call_spider(file):
diff --git a/crawler/data_json.py b/crawler/data_json.py
index 850b3c3..51560ff 100644
--- a/crawler/data_json.py
+++ b/crawler/data_json.py
@@ -103,7 +103,7 @@ def convert(self, domain):
                                          'description': property["description"][0],
                                          'contactName': "",
                                          'mbox': "",
-                                         'keywords': keywords_catalog,
+                                         'keyword': keywords_catalog,
                                          'accessLevel': "public",
                                          'publisher': ""})
 
diff --git a/crawler/file_controller.py b/crawler/file_controller.py
index b6316e1..0f0c60f 100644
--- a/crawler/file_controller.py
+++ b/crawler/file_controller.py
@@ -40,7 +40,7 @@ def save_existing_data_json(self, response, domain, to_json):
         if not os.path.exists(subprincipal):
             os.makedirs(subprincipal)
         filename = subprincipal + "/" + "data.json"
-        file_response = codecs.open(filename, 'w+', 'utf-8-sig')
+        file_response = codecs.open(filename, 'wb', 'utf-8-sig')
         if to_json == True:
             file_response.write(json.dumps(response.json(), indent=2, ensure_ascii=False))
         else:
diff --git a/crawler/settings.py b/crawler/settings-example.py
similarity index 68%
rename from crawler/settings.py
rename to crawler/settings-example.py
index 81cc21b..4cfd7b8 100644
--- a/crawler/settings.py
+++ b/crawler/settings-example.py
@@ -10,8 +10,10 @@
 LOG_LEVEL = 'INFO'
 COOKIES_ENABLED = False
 LOG_FILE = 'datacrowler.log'
-SPLASH_URL = 'http://192.168.200.3:8050/render.html?url='
-API_KEY = ""
+# Especificar aqui la ubicacion donde se levanta el servidor splash
+SPLASH_URL = 'http://localhost:8050/render.html?url='
+# Especificar aqui la API Key del catalago
+API_KEY = "xxxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"
 #DEPTH_LIMIT = 1
 
 # Crawl responsibly by identifying yourself (and your website) on the user-agent
diff --git a/importer/rest.py b/importer/rest.py
index 2ad0318..b3d800a 100644
--- a/importer/rest.py
+++ b/importer/rest.py
@@ -4,13 +4,15 @@
 import os
 import sys
 from zipfile import ZipFile
+from scrapy.utils.project import get_project_settings
 from model import DataEntry, CkanDataset
 
 
 class CKANImporter(object):
 
     def __init__(self):
-        self.headers = {'Authorization': 'xxxxx', 'Content-type':'application/json'}
+        settings = get_project_settings()
+        self.headers = {'Authorization': settings['API_KEY'], 'Content-type':'application/json'}
         self.base_url = 'http://www.datos.gov.py/api/3/action/'
 
     def import_package(self, filename, modalidad):
@@ -100,4 +102,4 @@ def get_organization_id(self, org_name):
     sys.setdefaultencoding("utf-8")
     importer = CKANImporter()
     #Para pruebas sin ejecutar el crawler
-    importer.import_package('/home/desa4/workspace/DataCrawler/results_12_09_14/datos.mec.gov.py/data.json', 'data-hunting')
+    importer.import_package('data.json', 'data-hunting')