Skip to content

Commit

Permalink
Cambios en settings-example.py
Browse files Browse the repository at this point in the history
  • Loading branch information
verena91 committed Sep 15, 2014
1 parent 14c4055 commit 99cd273
Show file tree
Hide file tree
Showing 5 changed files with 11 additions and 7 deletions.
2 changes: 1 addition & 1 deletion bin/DataCrawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def main(file):
created_files = call_spider(file)
# Finalizar splash
# p.terminate()
import_to_ckan(created_files)
# import_to_ckan(created_files)


def call_spider(file):
Expand Down
2 changes: 1 addition & 1 deletion crawler/data_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ def convert(self, domain):
'description': property["description"][0],
'contactName': "",
'mbox': "",
'keywords': keywords_catalog,
'keyword': keywords_catalog,
'accessLevel': "public",
'publisher': ""})

Expand Down
2 changes: 1 addition & 1 deletion crawler/file_controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def save_existing_data_json(self, response, domain, to_json):
if not os.path.exists(subprincipal):
os.makedirs(subprincipal)
filename = subprincipal + "/" + "data.json"
file_response = codecs.open(filename, 'w+', 'utf-8-sig')
file_response = codecs.open(filename, 'wb', 'utf-8-sig')
if to_json == True:
file_response.write(json.dumps(response.json(), indent=2, ensure_ascii=False))
else:
Expand Down
6 changes: 4 additions & 2 deletions crawler/settings.py → crawler/settings-example.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,10 @@
LOG_LEVEL = 'INFO'
COOKIES_ENABLED = False
LOG_FILE = 'datacrowler.log'
SPLASH_URL = 'http://192.168.200.3:8050/render.html?url='
API_KEY = ""
# Especificar aqui la ubicacion donde se levanta el servidor splash
SPLASH_URL = 'http://localhost:8050/render.html?url='
# Especificar aqui la API Key del catalago
API_KEY = "xxxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"
#DEPTH_LIMIT = 1

# Crawl responsibly by identifying yourself (and your website) on the user-agent
Expand Down
6 changes: 4 additions & 2 deletions importer/rest.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,15 @@
import os
import sys
from zipfile import ZipFile
from scrapy.utils.project import get_project_settings
from model import DataEntry, CkanDataset


class CKANImporter(object):

def __init__(self):
self.headers = {'Authorization': 'xxxxx', 'Content-type':'application/json'}
settings = get_project_settings()
self.headers = {'Authorization': settings['API_KEY'], 'Content-type':'application/json'}
self.base_url = 'http://www.datos.gov.py/api/3/action/'

def import_package(self, filename, modalidad):
Expand Down Expand Up @@ -100,4 +102,4 @@ def get_organization_id(self, org_name):
sys.setdefaultencoding("utf-8")
importer = CKANImporter()
#Para pruebas sin ejecutar el crawler
importer.import_package('/home/desa4/workspace/DataCrawler/results_12_09_14/datos.mec.gov.py/data.json', 'data-hunting')
importer.import_package('data.json', 'data-hunting')

0 comments on commit 99cd273

Please sign in to comment.