Skip to content

Commit

Permalink
Merge pull request inspirehep#98 from spirosdelviniotis/hepcrawl_WSP_…
Browse files Browse the repository at this point in the history
…tests

tests: add WSP functional tests
  • Loading branch information
david-caro authored May 9, 2017
2 parents 075edb0 + 5773420 commit 69662a8
Show file tree
Hide file tree
Showing 24 changed files with 618 additions and 21 deletions.
14 changes: 14 additions & 0 deletions .coveragerc
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# -*- coding: utf-8 -*-
#
# This file is part of hepcrawl.
# Copyright (C) 2017 CERN.
#
# hepcrawl is a free software; you can redistribute it and/or modify it
# under the terms of the Revised BSD License; see LICENSE file for
# more details.

[run]
parallel = True
omit =
/hepcrawl_venv/lib/python2.7/site-packages/*

35 changes: 24 additions & 11 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
#
# This file is part of hepcrawl.
# Copyright (C) 2015, 2016 CERN.
# Copyright (C) 2015, 2016, 2017 CERN.
#
# hepcrawl is a free software; you can redistribute it and/or modify it
# under the terms of the Revised BSD License; see LICENSE file for
Expand All @@ -11,29 +11,42 @@ sudo: false

language: python

cache:
- pip
services:
- docker

python:
- '2.7'

env:
- EXTRAS=docs,tests
global:
- EXTRAS=docs,tests
- DOCKER_COMPOSE_VERSION=1.9.0
- DOCKER_DATA="$HOME/hepcrawl_docker_data"
matrix:
- SUITE=unit
- SUITE=functional_wsp

matrix:
fast_finish: true

before_install:
- "travis_retry pip install --upgrade pip setuptools py"
- "travis_retry pip install twine wheel coveralls check-manifest"
- travis_retry pip install twine wheel coveralls check-manifest
- sudo rm -f /usr/local/bin/docker-compose
- curl -L https://github.com/docker/compose/releases/download/${DOCKER_COMPOSE_VERSION}/docker-compose-`uname -s`-`uname -m` > docker-compose
- chmod +x docker-compose
- sudo mv docker-compose /usr/local/bin

install:
- "travis_retry pip install -e .[${EXTRAS}]"
- travis_retry docker-compose -f docker-compose.deps.yml run --rm pip

script:
- pip freeze
- sphinx-build -qnNW docs docs/_build/html
- python setup.py test
- python setup.py sdist && ls dist/*
- travis_retry docker-compose -f docker-compose.test.yml run --rm ${SUITE}

after_success:
- docker-compose -f docker-compose.test.yml kill -s SIGTERM
- docker-compose -f docker-compose.test.yml rm -f
- coverage combine
- sed -i 's@\"/code/@'"\"$(pwd)/"'@g' .coverage
- coveralls

notifications:
Expand Down
21 changes: 21 additions & 0 deletions docker-compose.deps.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# -*- coding: utf-8 -*-
#
# This file is part of hepcrawl.
# Copyright (C) 2015, 2016, 2017 CERN.
#
# hepcrawl is a free software; you can redistribute it and/or modify it
# under the terms of the Revised BSD License; see LICENSE file for
# more details.

version: '2'

services:
pip:
build:
context: ${PWD}/tests/functional
dockerfile: hepcrawl_base.dockerfile
image: hepcrawl_base
command: bash -c "pip install -e .[all] && pip freeze"
volumes:
- ${DOCKER_DATA}/tmp/hepcrawl_venv:/hepcrawl_venv/
- ${PWD}:/code/
80 changes: 80 additions & 0 deletions docker-compose.test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
# -*- coding: utf-8 -*-
#
# This file is part of hepcrawl.
# Copyright (C) 2017 CERN.
#
# hepcrawl is a free software; you can redistribute it and/or modify it
# under the terms of the Revised BSD License; see LICENSE file for
# more details.

version: '2'

services:
functional_wsp:
image: hepcrawl_base # hepcrawl_base image is build at pip service of docker-compose.deps.yml
environment: &env_variables
- APP_BROKER_URL=amqp://guest:guest@rabbitmq:5672//
- APP_CELERY_RESULT_BACKEND=amqp://guest:guest@rabbitmq:5672//
- APP_CRAWLER_HOST_URL=http://scrapyd:6800
- APP_API_PIPELINE_TASK_ENDPOINT_DEFAULT=tests.functional.tasks.submit_results
- COVERAGE_PROCESS_START=/code/.coveragerc
command: py.test -vv tests/functional/WSP/test_wsp.py
volumes: &common_volumes
- ${DOCKER_DATA}/tmp/hepcrawl_venv:/hepcrawl_venv/
- ${PWD}:/code/
- ${PWD}/tests/functional/scrapyd_coverage_runner.conf:/etc/scrapyd/scrapyd.conf
links:
- rabbitmq
- celery
- scrapyd
- ftp_server

unit:
image: hepcrawl_base
environment: *env_variables
command: bash -c "py.test tests/unit && sphinx-build -nNW docs docs/_build/html && python setup.py sdist && ls dist/*"
volumes: *common_volumes

doc:
image: hepcrawl_base
environment: *env_variables
command: bash -c "sphinx-build -qnNW docs docs/_build/html && exec python setup.py sdist && exec ls dist/*"
volumes: *common_volumes

celery:
image: hepcrawl_base
environment: *env_variables
command: celery worker --events --app tests.functional.tasks --loglevel=debug
volumes: *common_volumes
links:
- rabbitmq
- ftp_server

scrapyd:
image: hepcrawl_base
environment: *env_variables
command: bash -c "rm -f twistd.pid && exec scrapyd"
volumes: *common_volumes
links:
- celery
- ftp_server
- rabbitmq
depends_on:
- scrapyd_deploy

scrapyd_deploy:
image: hepcrawl_base
environment: *env_variables
command: bash -c "sleep 8 && scrapyd-deploy" # make sure that the scrapyd is up
volumes: *common_volumes

ftp_server:
image: stilliard/pure-ftpd:hardened
environment:
- PUBLICHOST=localhost
volumes:
- ${PWD}/tests/functional/WSP/fixtures/ftp_server/WSP:/home/ftpusers/bob/WSP
- ${PWD}/tests/functional/WSP/fixtures/ftp_server/pureftpd.passwd:/etc/pure-ftpd/passwd/pureftpd.passwd

rabbitmq:
image: rabbitmq
3 changes: 3 additions & 0 deletions hepcrawl/pipelines.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,13 +183,16 @@ def open_spider(self, spider):

def close_spider(self, spider):
"""Post results to BROKER API."""
from celery.utils.log import get_task_logger
logger = get_task_logger(__name__)
if 'SCRAPY_JOB' in os.environ and self.count > 0:
task_endpoint = spider.settings[
'API_PIPELINE_TASK_ENDPOINT_MAPPING'
].get(
spider.name,
spider.settings['API_PIPELINE_TASK_ENDPOINT_DEFAULT'],
)
logger.info('Triggering celery task: %s.' % task_endpoint)
self.celery.send_task(
task_endpoint,
kwargs=self._prepare_payload(spider),
Expand Down
2 changes: 1 addition & 1 deletion hepcrawl/scrapy.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# This file is part of hepcrawl.
# Copyright (C) 2015, 2016 CERN.
# Copyright (C) 2015, 2016, 2017 CERN.
#
# hepcrawl is a free software; you can redistribute it and/or modify it
# under the terms of the Revised BSD License; see LICENSE file for
Expand Down
9 changes: 6 additions & 3 deletions hepcrawl/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,17 +99,20 @@
# INSPIRE Push Pipeline settings
# ==============================
API_PIPELINE_URL = "http://localhost:5555/api/task/async-apply"
API_PIPELINE_TASK_ENDPOINT_DEFAULT = "inspire_crawler.tasks.submit_results"
API_PIPELINE_TASK_ENDPOINT_DEFAULT = os.environ.get(
"APP_API_PIPELINE_TASK_ENDPOINT_DEFAULT",
"inspire_crawler.tasks.submit_results"
)
API_PIPELINE_TASK_ENDPOINT_MAPPING = {} # e.g. {'my_spider': 'special.task'}

# Celery
# ======
BROKER_URL = os.environ.get(
"APP_BROKER_URL",
"amqp://guest:guest@localhost:5672//")
"amqp://guest:guest@rabbitmq:5672//")
CELERY_RESULT_BACKEND = os.environ.get(
"APP_CELERY_RESULT_BACKEND",
"amqp://guest:guest@localhost:5672//")
"amqp://guest:guest@rabbitmq:5672//")
CELERY_ACCEPT_CONTENT = ['json', 'msgpack', 'yaml']
CELERY_TIMEZONE = 'Europe/Amsterdam'
CELERY_DISABLE_RATE_LIMITS = True
Expand Down
2 changes: 2 additions & 0 deletions hepcrawl/spiders/wsp_spider.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,13 +91,15 @@ def start_requests(self):
yield Request(self.package_path, callback=self.handle_package_file)
else:
ftp_host, ftp_params = ftp_connection_info(self.ftp_host, self.ftp_netrc)

dummy, new_files = ftp_list_files(
self.ftp_folder,
self.target_folder,
server=ftp_host,
user=ftp_params['ftp_user'],
password=ftp_params['ftp_password']
)

for remote_file in new_files:
# Cast to byte-string for scrapy compatibility
remote_file = str(remote_file)
Expand Down
17 changes: 14 additions & 3 deletions hepcrawl/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
# under the terms of the Revised BSD License; see LICENSE file for
# more details.

from __future__ import absolute_import, print_function

import os
import re
from operator import itemgetter
Expand All @@ -17,6 +19,8 @@
from urlparse import urlparse

import ftputil
import ftputil.session
import ftplib
import requests

from scrapy import Selector
Expand All @@ -40,21 +44,28 @@ def unzip_xml_files(filename, target_folder):
return xml_files


def ftp_connection_info(ftp_host, netrc_file):
def ftp_connection_info(ftp_host, netrc_file, passive_mode=False):
"""Return ftp connection info from netrc and optional host address."""
if not ftp_host:
ftp_host = netrc(netrc_file).hosts.keys()[0]
logininfo = netrc(netrc_file).authenticators(ftp_host)
connection_params = {
"ftp_user": logininfo[0],
"ftp_password": logininfo[2],
"ftp_passive": passive_mode,
}
return ftp_host, connection_params


def ftp_list_files(server_folder, target_folder, server, user, password):
def ftp_list_files(server_folder, target_folder, server, user, password, passive_mode=False):
"""List files from given FTP's server folder to target folder."""
with ftputil.FTPHost(server, user, password) as host:
session_factory = ftputil.session.session_factory(
base_class=ftplib.FTP,
port=21,
use_passive_mode=passive_mode,
encrypt_data_channel=True)

with ftputil.FTPHost(server, user, password, session_factory=session_factory) as host:
files = host.listdir(host.curdir + '/' + server_folder)
missing_files = []
all_files = []
Expand Down
4 changes: 2 additions & 2 deletions pytest.ini
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
# -*- coding: utf-8 -*-
#
# This file is part of hepcrawl.
# Copyright (C) 2015, 2016 CERN.
# Copyright (C) 2015, 2016, 2017 CERN.
#
# hepcrawl is a free software; you can redistribute it and/or modify it
# under the terms of the Revised BSD License; see LICENSE file for
# more details.

[pytest]
addopts = --pep8 --ignore=docs --cov=hepcrawl --cov-report=term-missing
addopts = --pep8 --ignore=docs --cov-config .coveragerc --cov=hepcrawl --cov-report=term-missing
pep8ignore =
tests/* ALL
*.py E501
Expand Down
3 changes: 2 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
#
# This file is part of hepcrawl.
# Copyright (C) 2015, 2016 CERN.
# Copyright (C) 2015, 2016, 2017 CERN.
#
# hepcrawl is a free software; you can redistribute it and/or modify it
# under the terms of the Revised BSD License; see LICENSE file for
Expand Down Expand Up @@ -29,6 +29,7 @@
'furl>=0.4.95',
'ftputil>=3.3.1',
'python-dateutil>=2.4.2',
'python-scrapyd-api>=2.0.1',
]

tests_require = [
Expand Down
8 changes: 8 additions & 0 deletions tests/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# -*- coding: utf-8 -*-
#
# This file is part of hepcrawl.
# Copyright (C) 2015, 2016, 2017 CERN.
#
# hepcrawl is a free software; you can redistribute it and/or modify it
# under the terms of the Revised BSD License; see LICENSE file for
# more details.
8 changes: 8 additions & 0 deletions tests/functional/WSP/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# -*- coding: utf-8 -*-
#
# This file is part of hepcrawl.
# Copyright (C) 2015, 2016, 2017 CERN.
#
# hepcrawl is a free software; you can redistribute it and/or modify it
# under the terms of the Revised BSD License; see LICENSE file for
# more details.
3 changes: 3 additions & 0 deletions tests/functional/WSP/fixtures/ftp_server/.netrc
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
machine ftp_server
login bob
password bob
Binary file not shown.
1 change: 1 addition & 0 deletions tests/functional/WSP/fixtures/ftp_server/pureftpd.passwd
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
bob:$1$3ccy4I60$nSpFtRN8U6/BgmmPaxrYR/:1000:1000::/home/ftpusers/bob/./::::::::::::
Loading

0 comments on commit 69662a8

Please sign in to comment.