From 9d28bfdd1d46e0e553b8f4969bbd38308123a737 Mon Sep 17 00:00:00 2001 From: Spiros Delviniotis Date: Fri, 18 Aug 2017 16:01:24 +0200 Subject: [PATCH] global: add `CRAWL_ONCE_PATH` to settings * Adds: variable in `settings.py` to specify where to store `scrapy-crawl-once` DB. Addresses #161 Signed-off-by: Spiros Delviniotis --- docker-compose.test.yml | 2 ++ hepcrawl/settings.py | 4 ++++ hepcrawl/testlib/fixtures.py | 3 ++- tests/Dockerfile.hepcrawl_base | 4 ++-- tests/docker_entrypoint.sh | 3 +++ tests/fix_rights | Bin 8992 -> 9032 bytes tests/fix_rights.c | 8 ++++++-- 7 files changed, 19 insertions(+), 5 deletions(-) diff --git a/docker-compose.test.yml b/docker-compose.test.yml index 7c4c2668..89d29191 100644 --- a/docker-compose.test.yml +++ b/docker-compose.test.yml @@ -17,6 +17,7 @@ services: - APP_CELERY_RESULT_BACKEND=amqp://guest:guest@rabbitmq:5672// - APP_CRAWLER_HOST_URL=http://scrapyd:6800 - APP_API_PIPELINE_TASK_ENDPOINT_DEFAULT=hepcrawl.testlib.tasks.submit_results + - APP_CRAWL_ONCE_PATH=/var/lib/scrapy/crawl_once/ - COVERAGE_PROCESS_START=/code/.coveragerc - BASE_USER_UID=${BASE_USER_UID:-1000} - BASE_USER_GIT=${BASE_USER_GIT:-1000} @@ -25,6 +26,7 @@ services: - ${DOCKER_DATA}/tmp/hepcrawl_venv:/hepcrawl_venv/ - ${PWD}:/code/ - ${PWD}/tests/functional/scrapyd_coverage_runner.conf:/etc/scrapyd/scrapyd.conf + - ${PWD}/.scrapy/crawl_once:/var/lib/scrapy/crawl_once functional_wsp: <<: *service_base diff --git a/hepcrawl/settings.py b/hepcrawl/settings.py index 044520bc..71d34fd3 100644 --- a/hepcrawl/settings.py +++ b/hepcrawl/settings.py @@ -74,6 +74,10 @@ CRAWL_ONCE_ENABLED = True CRAWL_ONCE_DEFAULT = True +CRAWL_ONCE_PATH = os.environ.get( + 'APP_CRAWL_ONCE_PATH', + '/var/lib/scrapy/crawl_once/', +) # Enable or disable extensions # See http://scrapy.readthedocs.org/en/latest/topics/extensions.html diff --git a/hepcrawl/testlib/fixtures.py b/hepcrawl/testlib/fixtures.py index a5da3d56..9fc4182b 100644 --- a/hepcrawl/testlib/fixtures.py +++ b/hepcrawl/testlib/fixtures.py @@ -15,6 +15,7 @@ from scrapy.http import Request, TextResponse from scrapy.selector import Selector +from hepcrawl.settings import CRAWL_ONCE_PATH def fake_response_from_file(file_name, test_suite='unit', url='http://www.example.com', response_type=TextResponse): @@ -134,7 +135,7 @@ def expected_json_results_from_file(*path_chunks, **kwargs): return expected_data -def clean_dir(path=os.path.join(os.getcwd(), '.scrapy')): +def clean_dir(path=CRAWL_ONCE_PATH): """ Args: path(str): path of directory to be deleted. Default path is the produced DB per spider that diff --git a/tests/Dockerfile.hepcrawl_base b/tests/Dockerfile.hepcrawl_base index eb91b69f..8db9f43e 100644 --- a/tests/Dockerfile.hepcrawl_base +++ b/tests/Dockerfile.hepcrawl_base @@ -26,10 +26,10 @@ RUN yum install -y epel-release && \ python-virtualenv && \ yum clean all -RUN mkdir /code /hepcrawl_venv +RUN mkdir /code /hepcrawl_venv /var/lib/scrapy RUN useradd test -RUN chown -R test:test /code /hepcrawl_venv +RUN chown -R test:test /code /hepcrawl_venv /var/lib/scrapy ADD ./docker_entrypoint.sh /docker_entrypoint.sh ADD ./fix_rights /fix_rights diff --git a/tests/docker_entrypoint.sh b/tests/docker_entrypoint.sh index 1762a421..4785699d 100755 --- a/tests/docker_entrypoint.sh +++ b/tests/docker_entrypoint.sh @@ -23,6 +23,8 @@ restore_venv_tmp_code_rights() { /fix_rights --codedir "$BASE_USER_UID:$BASE_USER_GID" echo "Restoring permissions of tmpdir to $BASE_USER_UID:$BASE_USER_GID" /fix_rights --tmpdir "$BASE_USER_UID:$BASE_USER_GID" + echo "Restoring permissions of vardir to $BASE_USER_UID:$BASE_USER_GID" + /fix_rights --vardir "$BASE_USER_UID:$BASE_USER_GID" else echo "No BASE_USER_UID env var defined, skipping venv, codedir, tmpdir permission" \ "restore." @@ -57,6 +59,7 @@ main() { /fix_rights --virtualenv 'test:test' /fix_rights --codedir 'test:test' /fix_rights --tmpdir 'test:test' + /fix_rights --vardir 'test:test' trap restore_venv_tmp_code_rights EXIT if ! [[ -f "$VENV_PATH/bin/activate" ]]; then diff --git a/tests/fix_rights b/tests/fix_rights index 98677b2c948eebb928e2439ed495a81515b06b8a..ecf219b08f9c9e8e4e7d5887f56060738ae56f33 100755 GIT binary patch delta 1261 zcmZ9Me`r%z6vyv<&zHQH)c6vczNBU3$LOy>l2(LJ=WL#~Q!@94DlXH&%IWz*`;lu@(3SyyU;niu*LQfL=vZiTKS%X;W$f=X379Hz znzWhH7AvHtNe<-@I$m7j55=+I=4+BagEN3qPWt+n@*@%y~8u+A1n$=iI|HOjv>H$vneKERCxgF^}85_U;+p?3ACXn&&1+t)wX)7R^*90*Y= zrc+e(?oADJtNZrsJ+Ahp`uAB7!WHZ*bx;cjPwT3tjexxcwiWCTP{HS5M>aI=2H5)> z8x7`O5(MmUct7i61Zrwwu^*aEX~mP2lQ+XLg+gEG^s;w2H}& zS$48Sv_;<0raoQZKve^ z9fQ4GdF&<&kg(&q=HzD#OZ1Jcg8QQHfozGw&8-ea&0!IiCA2gC{2~Vpr@W6F|H#Ez6iXHC%#6Q8~lxUgq-v@3bUe)K?Mu` Oh>(|xyRQg-+4BrIo@@93 delta 1112 zcmZ8gU1$?o6ux(2l9{O{lbCTfhTZ%?t&KINFB@DHlQk{3(n7`7zC>(AO6!9b|0=R9 zE3IOvttrO`L0D1^>vkWy!4bC0vNfU@eX;(6=!=R%{6YLf6szO8lejKDaL>8teBV7k zcjjJd%r<5g)6UV_Q}UopOc|6%??h-PNc4y+C6!|-DQ<33Fi!7!#%qE#$%7~Uec$uf z@w%-&`|74QC-q}O*wT{~MPa&4mrG(+x1Q9w@v9=&9#r% zz%!AZ`)Dcs2_xcOm@6%WtW*mfQhof#t}TJ>eY<+MZwplKjPRSIKFf+IS}U%b9}MFs zv~4qn(TsL27R~YaC&RdaHfexcUL=qi_(6^ctsh{kyeRZ5k`M6#5)S2{ux6gkGC=3r zMUuqO7F^;dI4w6?qnJa`X_eCgO4VUd9C4b{*E#HhdlIjM%cY8)7WEy<1f@9$x;2_d zty)|p1?$kupjP#t-4Ceniwv7RBQymsJKZtz zdNA`#Rr5|@Tp?bU+H{KrK}U8Aiyl6H- z#tHm^q_400zCR{s@K5KGk@QgDBBhJraWICDEf`vjx(oGs7z?eIepgHy5#eztCg-)X LcT#{SzVAK*PI@vR diff --git a/tests/fix_rights.c b/tests/fix_rights.c index 8eaa9911..2dead30c 100644 --- a/tests/fix_rights.c +++ b/tests/fix_rights.c @@ -11,6 +11,7 @@ char *VENV_PATH = "/hepcrawl_venv/"; char *CODE_PATH = "/code/"; char *TMP_PATH = "/tmp/"; +char *VAR_PATH = "/var/"; int main (int argc, char *argv[]) { @@ -30,7 +31,7 @@ int main (int argc, char *argv[]) { if (argc != 3) { fprintf( stderr, - "Usage: %s --virtualenv|--codedir|--tmpdir :\n", + "Usage: %s --virtualenv|--codedir|--tmpdir|--var :\n", argv[0] ); exit(EXIT_FAILURE); @@ -48,11 +49,14 @@ int main (int argc, char *argv[]) { } else if (strcmp(argv[1], "--tmpdir") == 0) { // tmp dir permissions chown_argv[3] = TMP_PATH; + } else if (strcmp(argv[1], "--vardir") == 0) { + // var dir permissions + chown_argv[3] = VAR_PATH; } else { fprintf(stderr, "Bad option %s.", argv[1]); fprintf( stderr, - "Usage: %s --virtualenv|--codedir|--tmpdir :\n", + "Usage: %s --virtualenv|--codedir|--tmpdir|--vardir :\n", argv[0] ); exit(EXIT_FAILURE);