Skip to content

Commit

Permalink
global: add CRAWL_ONCE_PATH to settings
Browse files Browse the repository at this point in the history
* Adds: variable in `settings.py` to specify where to store `scrapy-crawl-once` DB.

Addresses inspirehep#161

Signed-off-by: Spiros Delviniotis <[email protected]>
  • Loading branch information
spirosdelviniotis committed Aug 21, 2017
1 parent f19320d commit abf2c97
Show file tree
Hide file tree
Showing 7 changed files with 15 additions and 5 deletions.
1 change: 1 addition & 0 deletions docker-compose.test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ services:
- ${DOCKER_DATA}/tmp/hepcrawl_venv:/hepcrawl_venv/
- ${PWD}:/code/
- ${PWD}/tests/functional/scrapyd_coverage_runner.conf:/etc/scrapyd/scrapyd.conf
- ${PWD}/.scrapy/crawl_once:/var/lib/scrapy/crawl_once

functional_wsp:
<<: *service_base
Expand Down
1 change: 1 addition & 0 deletions hepcrawl/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@

CRAWL_ONCE_ENABLED = True
CRAWL_ONCE_DEFAULT = True
CRAWL_ONCE_PATH = '/var/lib/scrapy/crawl_once/'

# Enable or disable extensions
# See http://scrapy.readthedocs.org/en/latest/topics/extensions.html
Expand Down
3 changes: 2 additions & 1 deletion hepcrawl/testlib/fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

from scrapy.http import Request, TextResponse
from scrapy.selector import Selector
from hepcrawl.settings import CRAWL_ONCE_PATH


def fake_response_from_file(file_name, test_suite='unit', url='http://www.example.com', response_type=TextResponse):
Expand Down Expand Up @@ -134,7 +135,7 @@ def expected_json_results_from_file(*path_chunks, **kwargs):
return expected_data


def clean_dir(path=os.path.join(os.getcwd(), '.scrapy')):
def clean_dir(path=CRAWL_ONCE_PATH):
"""
Args:
path(str): path of directory to be deleted. Default path is the produced DB per spider that
Expand Down
4 changes: 2 additions & 2 deletions tests/Dockerfile.hepcrawl_base
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,10 @@ RUN yum install -y epel-release && \
python-virtualenv && \
yum clean all

RUN mkdir /code /hepcrawl_venv
RUN mkdir /code /hepcrawl_venv /var/lib/scrapy

RUN useradd test
RUN chown -R test:test /code /hepcrawl_venv
RUN chown -R test:test /code /hepcrawl_venv /var/lib/scrapy

ADD ./docker_entrypoint.sh /docker_entrypoint.sh
ADD ./fix_rights /fix_rights
Expand Down
3 changes: 3 additions & 0 deletions tests/docker_entrypoint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ restore_venv_tmp_code_rights() {
/fix_rights --codedir "$BASE_USER_UID:$BASE_USER_GID"
echo "Restoring permissions of tmpdir to $BASE_USER_UID:$BASE_USER_GID"
/fix_rights --tmpdir "$BASE_USER_UID:$BASE_USER_GID"
echo "Restoring permissions of vardir to $BASE_USER_UID:$BASE_USER_GID"
/fix_rights --vardir "$BASE_USER_UID:$BASE_USER_GID"
else
echo "No BASE_USER_UID env var defined, skipping venv, codedir, tmpdir permission" \
"restore."
Expand Down Expand Up @@ -57,6 +59,7 @@ main() {
/fix_rights --virtualenv 'test:test'
/fix_rights --codedir 'test:test'
/fix_rights --tmpdir 'test:test'
/fix_rights --vardir 'test:test'
trap restore_venv_tmp_code_rights EXIT

if ! [[ -f "$VENV_PATH/bin/activate" ]]; then
Expand Down
Binary file modified tests/fix_rights
Binary file not shown.
8 changes: 6 additions & 2 deletions tests/fix_rights.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
char *VENV_PATH = "/hepcrawl_venv/";
char *CODE_PATH = "/code/";
char *TMP_PATH = "/tmp/";
char *VAR_PATH = "/var/";


int main (int argc, char *argv[]) {
Expand All @@ -30,7 +31,7 @@ int main (int argc, char *argv[]) {
if (argc != 3) {
fprintf(
stderr,
"Usage: %s --virtualenv|--codedir|--tmpdir <user>:<group>\n",
"Usage: %s --virtualenv|--codedir|--tmpdir|--var <user>:<group>\n",
argv[0]
);
exit(EXIT_FAILURE);
Expand All @@ -48,11 +49,14 @@ int main (int argc, char *argv[]) {
} else if (strcmp(argv[1], "--tmpdir") == 0) {
// tmp dir permissions
chown_argv[3] = TMP_PATH;
} else if (strcmp(argv[1], "--vardir") == 0) {
// var dir permissions
chown_argv[3] = VAR_PATH;
} else {
fprintf(stderr, "Bad option %s.", argv[1]);
fprintf(
stderr,
"Usage: %s --virtualenv|--codedir|--tmpdir <user>:<group>\n",
"Usage: %s --virtualenv|--codedir|--tmpdir|--vardir <user>:<group>\n",
argv[0]
);
exit(EXIT_FAILURE);
Expand Down

0 comments on commit abf2c97

Please sign in to comment.