diff --git a/hepcrawl/crawler2hep.py b/hepcrawl/crawler2hep.py
index add53f70..3a8d3ba8 100644
--- a/hepcrawl/crawler2hep.py
+++ b/hepcrawl/crawler2hep.py
@@ -20,22 +20,34 @@
 
 from inspire_schemas.api import LiteratureBuilder
 
-from hepcrawl.utils import get_file_name_from_url
 
-
-def _update_record_fft(record, index_fft_file_paths):
-    def _update_fft_fields(fft_fields, index_fft_file_paths):
-        new_fft_fields = []
-        for fft_field in fft_fields:
-            file_name = get_file_name_from_url(fft_field['path'])
-            if file_name in index_fft_file_paths:
-                fft_field['path'] = index_fft_file_paths[file_name]
-                new_fft_fields.append(fft_field)
-
-        return new_fft_fields
-
-    record['_fft'] = _update_fft_fields(record['_fft'], index_fft_file_paths)
-    return record
+def _get_updated_fft_fields(current_fft_fields, record_files):
+    """
+
+    Params:
+        current_fft_fields(list(dict)): record current fft fields as generated by ``dojson``. We
+             expect each of then to have, at least, a key named ``path``.
+        record_files(list(RecordFile)): files attached to the record as populated by
+             ``FftFilesPipeline``.
+    """
+    record_files_index = {
+        record_file.name: record_file.path
+        for record_file in record_files
+    }
+    new_fft_fields = []
+    import logging
+    logger = logging.getLogger(__name__)
+    logger.log(logging.INFO,
+               "-------------------- _get_updated_fft_fields -------------------")
+    logger.log(logging.INFO,
+               'current_fft_fields: {}'.format(current_fft_fields))
+    for fft_field in current_fft_fields:
+        file_name = os.path.basename(fft_field['path'])
+        if file_name in record_files_index:
+            fft_field['path'] = record_files_index[file_name]
+            new_fft_fields.append(fft_field)
+
+    return new_fft_fields
 
 
 def _has_publication_info(item):
@@ -116,50 +128,47 @@ def _normalize_hepcrawl_record(item, source):
     return item
 
 
-def _generate_acquisition_source(crawler_record, source):
-    crawler_record['acquisition_source'] = {
+def _generate_acquisition_source(source):
+    acquisition_source = {
         'source': source,
         'method': 'hepcrawl',
         'datetime': datetime.datetime.now().isoformat(),
         'submission_number': os.environ.get('SCRAPY_JOB', ''),
     }
-    return crawler_record
+    return acquisition_source
 
 
-def to_hep(
-        item,
-        source,
-        item_format='hepcrawl',
-        fft_file_paths=None,
+def item_to_hep(
+    item,
+    source,
 ):
-    item = _generate_acquisition_source(
-        crawler_record=item,
-        source=source,
-    )
+    item.record['acquisition_source'] = _generate_acquisition_source(source=source)
 
-    if item_format == 'hep':
-        return hep2hep(
-            crawler_record=item,
-            fft_file_paths=fft_file_paths,
+    if item.record_format == 'hep':
+        return hep_to_hep(
+            hep_record=item.record,
+            record_files=item.record_files,
         )
-    elif item_format == 'hepcrawl':
+    elif item.record_format == 'hepcrawl':
         item = _normalize_hepcrawl_record(
             item=item,
             source=source,
         )
-        return crawler2hep(dict(item))
+        return hepcrawl_to_hep(dict(item))
     else:
-        raise Exception('Unknown item_format::{}'.format(item_format))
+        raise Exception('Unknown item_format::{}'.format(item.record_format))
 
 
-def hep2hep(crawler_record, fft_file_paths):
-    if fft_file_paths:
-        crawler_record = _update_record_fft(crawler_record, fft_file_paths)
+def hep_to_hep(hep_record, record_files):
+    hep_record['_fft'] = _get_updated_fft_fields(
+        current_fft_fields=hep_record['_fft'],
+        record_files=record_files,
+    )
 
-    return crawler_record
+    return hep_record
 
 
-def crawler2hep(crawler_record):
+def hepcrawl_to_hep(crawler_record):
 
     def _filter_affiliation(affiliations):
         return [
diff --git a/hepcrawl/pipelines.py b/hepcrawl/pipelines.py
index 05b61361..b8db5bcf 100644
--- a/hepcrawl/pipelines.py
+++ b/hepcrawl/pipelines.py
@@ -24,16 +24,23 @@
 
 from inspire_schemas.utils import validate
 
-from hepcrawl.crawler2hep import to_hep
+from hepcrawl.crawler2hep import item_to_hep
 from hepcrawl.settings import FILES_STORE
-from hepcrawl.utils import get_file_name_from_url
+from hepcrawl.utils import RecordFile
 
 
 class FftFilesPipeline(FilesPipeline):
-    """Download all the FFT files provided by record."""
+    """Download all the FFT files provided by record.
 
-    def __init__(self, *args, **kwargs):
-        super(FftFilesPipeline, self).__init__(FILES_STORE)
+    Note:
+
+         This pipeline only runs if the spider returns a ``ParsedItem`` that has a ``file_urls``
+         property.
+    """
+
+    def __init__(self, store_uri, *args, **kwargs):
+        store_uri = store_uri or FILES_STORE
+        super(FftFilesPipeline, self).__init__(*args, store_uri=store_uri, **kwargs)
 
     def get_media_requests(self, item, info):
         """Download FFT files using FTP."""
@@ -44,24 +51,25 @@ def get_media_requests(self, item, info):
                     meta=item.ftp_params,
                 )
 
+    def get_absolute_file_path(self, path):
+        return os.path.abspath(
+            os.path.join(
+                self.store.basedir,
+                path
+            )
+        )
+
     def item_completed(self, results, item, info):
         """Create a map that connects file names with downloaded files."""
-        def _get_absolute_local_file_path(path):
-            return os.path.abspath(
-                os.path.join(
-                    FILES_STORE,
-                    path
-                )
+        record_files = [
+            RecordFile(
+                path=self.get_absolute_file_path(result_data['path']),
+                name=os.path.basename(result_data['url']),
             )
-
-        map_file_names_paths = {}
-        for ok, result_data in results:
-            if ok:
-                map_file_names_paths[
-                    get_file_name_from_url(result_data['url'])
-                ] = _get_absolute_local_file_path(result_data['path'])
-
-        item.file_paths = map_file_names_paths
+            for ok, result_data in results
+            if ok
+        ]
+        item.record_files = record_files
 
         return item
 
@@ -76,16 +84,11 @@ def open_spider(self, spider):
         self.results_data = []
 
     def _post_enhance_item(self, item, spider):
-        fft_file_paths = item.file_paths
-        item_format = item.item_format
-        item = item.item if item.item else item
         source = spider.name
 
-        return to_hep(
+        return item_to_hep(
             item=item,
             source=source,
-            item_format=item_format,
-            fft_file_paths=fft_file_paths,
         )
 
     def process_item(self, item, spider):
diff --git a/hepcrawl/spiders/desy_spider.py b/hepcrawl/spiders/desy_spider.py
index 5ec79da8..2167d0a2 100644
--- a/hepcrawl/spiders/desy_spider.py
+++ b/hepcrawl/spiders/desy_spider.py
@@ -15,6 +15,7 @@
 
 from lxml import etree
 from dojson.contrib.marc21.utils import create_record
+from six.moves import urllib
 
 from scrapy import Request
 from scrapy.spiders import Spider
@@ -24,8 +25,6 @@
 from hepcrawl.utils import (
     ftp_list_files,
     ftp_connection_info,
-    get_absolute_file_path,
-    get_file_name_from_url,
     ParsedItem,
 )
 
@@ -38,13 +37,14 @@ class DesySpider(Spider):
 
      Examples:
          To run a crawl, you need to pass FTP connection information via
-         ``ftp_host`` and ``ftp_netrc``::
+         ``ftp_host`` and ``ftp_netrc``, if ``ftp_folder`` is not passed, it will fallback to
+          ``DESY``::
 
              $ scrapy crawl desy -a 'ftp_host=ftp.example.com' -a 'ftp_netrc=/path/to/netrc'
 
-        To run a crawl on local folder, you need to pass the absolute ``package_path``::
+        To run a crawl on local folder, you need to pass the absolute ``source_folder``::
 
-             $ scrapy crawl desy -a 'package_path=/path/to/package_dir'
+             $ scrapy crawl desy -a 'source_folder=/path/to/package_dir'
      """
     name = 'desy'
     custom_settings = {}
@@ -52,118 +52,147 @@ class DesySpider(Spider):
 
     def __init__(
         self,
-        package_path=None,
-        ftp_folder='DESY',
+        source_folder=None,
+        ftp_folder='/DESY',
         ftp_host=None,
         ftp_netrc=None,
+        destination_folder='/tmp/DESY',
         *args,
         **kwargs
     ):
-        """Constructor of ``Desy`` spider."""
         super(DesySpider, self).__init__(*args, **kwargs)
         self.ftp_folder = ftp_folder
         self.ftp_host = ftp_host
         self.ftp_netrc = ftp_netrc
-        self.package_path = package_path
-        self.target_folder = '/tmp/DESY'
+        self.source_folder = source_folder
+        self.destination_folder = destination_folder
         self.ftp_enabled = True if self.ftp_host else False
-        if not os.path.exists(self.target_folder):
-            os.makedirs(self.target_folder)
+        if not os.path.exists(self.destination_folder):
+            os.makedirs(self.destination_folder)
+
+    @staticmethod
+    def _list_xml_files_paths(list_files_paths):
+        return [
+            xml_file
+            for xml_file in list_files_paths
+            if xml_file.endswith('.xml')
+        ]
+
+    def crawl_local_directory(self):
+        file_names = os.listdir(self.source_folder)
+        xml_file_names = self._list_xml_files_paths(file_names)
+
+        for file_name in xml_file_names:
+            file_path = os.path.join(self.source_folder, file_name)
+            self.log('Local: Try to crawl local file: {0}'.format(file_path))
+            yield Request(
+                'file://{0}'.format(file_path),
+                callback=self.parse,
+            )
+
+    def crawl_ftp_directory(self):
+        ftp_host, ftp_params = ftp_connection_info(self.ftp_host, self.ftp_netrc)
+
+        remote_files_paths = ftp_list_files(
+            self.ftp_folder,
+            destination_folder=self.destination_folder,
+            ftp_host=ftp_host,
+            user=ftp_params['ftp_user'],
+            password=ftp_params['ftp_password'],
+            only_missing_files=False,
+        )
+
+        xml_remote_files_paths = self._list_xml_files_paths(remote_files_paths)
+
+        for remote_file in xml_remote_files_paths:
+            self.log('Remote: Try to crawl file from FTP: {0}'.format(remote_file))
+            remote_file = str(remote_file)
+            ftp_params['ftp_local_filename'] = os.path.join(
+                self.destination_folder,
+                os.path.basename(remote_file),
+            )
+            remote_url = 'ftp://{0}/{1}'.format(ftp_host, remote_file)
+            yield Request(
+                str(remote_url),
+                meta=ftp_params,
+                callback=self.handle_package_ftp,
+            )
+
+    def handle_package_ftp(self, response):
+        """Yield every XML file found.
+
+        This is an intermediate step before calling ``DesySpider.parse`` to handle ftp downloaded
+         "record collections".
+        """
+        self.log('Visited url {}'.format(response.url))
+        file_path = response.body
+        yield Request(
+            'file://{0}'.format(file_path),
+            meta={'source_folder': file_path},
+            callback=self.parse,
+        )
 
     def start_requests(self):
         """List selected folder on remote FTP and yield files."""
-        def _list_xml_files_paths(list_files_paths):
-            return [
-                xml_file
-                for xml_file in list_files_paths
-                if xml_file.endswith('.xml')
-            ]
-
-        if self.package_path:
-            file_names = os.listdir(self.package_path)
-            xml_file_names = _list_xml_files_paths(file_names)
 
-            for file_name in xml_file_names:
-                file_path = os.path.join(self.package_path, file_name)
-                self.log('Local: Try to crawl local file: {0}'.format(file_path))
-                yield Request(
-                    'file://{0}'.format(file_path),
-                    callback=self.parse,
-                )
+        if self.source_folder:
+            requests = self.crawl_local_directory()
         else:
-            ftp_host, ftp_params = ftp_connection_info(self.ftp_host, self.ftp_netrc)
-
-            remote_files_paths = ftp_list_files(
-                self.ftp_folder,
-                target_folder=self.target_folder,
-                server=ftp_host,
-                user=ftp_params['ftp_user'],
-                password=ftp_params['ftp_password'],
-                lst_missing_files=False,
-            )
+            requests = self.crawl_ftp_directory()
 
-            xml_remote_files_paths = _list_xml_files_paths(remote_files_paths)
+        for request in requests:
+            yield request
 
-            for remote_file in xml_remote_files_paths:
-                self.log('Remote: Try to crawl file from FTP: {0}'.format(remote_file))
-                remote_file = str(remote_file)
-                ftp_params['ftp_local_filename'] = os.path.join(
-                    self.target_folder,
-                    os.path.basename(remote_file),
-                )
-                remote_url = 'ftp://{0}/{1}'.format(ftp_host, remote_file)
-                yield Request(
-                    str(remote_url),
-                    meta=ftp_params,
-                    callback=self.handle_package_ftp,
-                )
+    @staticmethod
+    def _get_full_uri(current_path, base_url, schema, hostname=''):
+        if os.path.isabs(current_path):
+            full_path = current_path
+        else:
+            full_path = os.path.join(base_url, current_path)
+
+        return '{schema}://{hostname}{full_path}'.format(**vars())
 
     def parse(self, response):
-        """Parse a ``Desy`` XML file into a HEP record."""
+        """Parse a ``Desy`` XML file into a ``ParsedItem``."""
+
         self.log('Got record from url/path: {0}'.format(response.url))
         self.log('FTP enabled: {0}'.format(self.ftp_enabled))
         ftp_params = None
 
         if self.ftp_enabled:
-            ftp_host, ftp_params = ftp_connection_info(self.ftp_host, self.ftp_netrc)
-            prefix_url = '{0}://{1}/'.format('ftp', ftp_host)
+            hostname, ftp_params = ftp_connection_info(self.ftp_host, self.ftp_netrc)
+            base_url = self.ftp_folder
+            url_schema = 'ftp'
         else:
-            prefix_url = '{0}://{1}'.format(
-                'file',
-                '/code/tests/functional/desy/fixtures/ftp_server/',  # Temporary - Must be absolute path
-            )
+            base_url = os.path.dirname(urllib.parse.urlparse(response.url).path)
+            url_schema = 'file'
+            hostname = None
 
         marcxml_records = self._get_marcxml_records(response.body)
         hep_records = self._hep_records_from_marcxml(marcxml_records)
 
-        list_fft_old_links = []
         for hep_record in hep_records:
-            list_fft_old_links.extend(hep_record['_fft'])
-
             list_file_urls = [
-                '{0}{1}'.format(prefix_url, fft_link['path'])
-                for fft_link in hep_record['_fft']
+                self._get_full_uri(
+                    current_path=fft_path['path'],
+                    base_url=base_url,
+                    schema=url_schema,
+                    hostname=hostname,
+                )
+                for fft_path in hep_record['_fft']
             ]
 
             parsed_item = ParsedItem(
-                item=hep_record,
+                record=hep_record,
                 file_urls=list_file_urls,
                 ftp_params=ftp_params,
-                item_format='hep',
+                record_format='hep',
             )
 
             yield parsed_item
 
-    def handle_package_ftp(self, response):
-        """Yield every XML file found."""
-        self.log('Visited url {}'.format(response.url))
-        file_path = response.body
-        yield Request(
-            'file://{0}'.format(file_path),
-            meta={'package_path': file_path}
-        )
-
-    def _get_marcxml_records(self, response_body):
+    @staticmethod
+    def _get_marcxml_records(response_body):
         root = etree.fromstring(response_body)
         list_items = root.findall('.//{http://www.loc.gov/MARC21/slim}record')
         if not list_items:
@@ -171,15 +200,16 @@ def _get_marcxml_records(self, response_body):
 
         return [etree.tostring(item) for item in list_items]
 
-    def _hep_records_from_marcxml(self, list_marcxml_records):
-        def _create_json_record(str_xml_record):
-            object_record = create_record(etree.XML(str_xml_record))
+    @staticmethod
+    def _hep_records_from_marcxml(marcxml_records):
+        def _create_json_record(xml_record):
+            object_record = create_record(etree.XML(xml_record))
             dojson_record = hep.do(object_record)
             return dojson_record
 
-        list_hep_records = []
-        for str_xml_record in list_marcxml_records:
-            json_record = _create_json_record(str_xml_record)
-            list_hep_records.append(json_record)
+        hep_records = []
+        for xml_record in marcxml_records:
+            json_record = _create_json_record(xml_record)
+            hep_records.append(json_record)
 
-        return list_hep_records
+        return hep_records
diff --git a/hepcrawl/spiders/edp_spider.py b/hepcrawl/spiders/edp_spider.py
index 499e3edc..d7ed1715 100644
--- a/hepcrawl/spiders/edp_spider.py
+++ b/hepcrawl/spiders/edp_spider.py
@@ -66,11 +66,11 @@ class EDPSpider(Jats, XMLFeedSpider):
 
         To run an ``EDPSpider`` using ``rich`` format::
 
-            $ scrapy crawl EDP -a package_path=file://`pwd`/tests/responses/edp/test_rich.tar.bz2
+            $ scrapy crawl EDP -a source_folder=file://`pwd`/tests/responses/edp/test_rich.tar.bz2
 
         To run an ``EDPSpider`` using ``gz`` format::
 
-            $ scrapy crawl EDP -a package_path=file://`pwd`/tests/responses/edp/test_gz.tar.gz
+            $ scrapy crawl EDP -a source_folder=file://`pwd`/tests/responses/edp/test_gz.tar.gz
 
     Todo:
 
@@ -145,9 +145,9 @@ def start_requests(self):
             ftp_host, ftp_params = ftp_connection_info(
                 self.ftp_host, self.ftp_netrc)
             _, new_files = ftp_list_files(
-                self.ftp_folder,
-                self.target_folder,
-                server=ftp_host,
+                server_folder=self.ftp_folder,
+                destination_folder=self.target_folder,
+                ftp_host=ftp_host,
                 user=ftp_params['ftp_user'],
                 password=ftp_params['ftp_password']
             )
@@ -176,7 +176,7 @@ def handle_package_ftp(self, response):
         for xml_file in xml_files:
             yield Request(
                 "file://{0}".format(xml_file),
-                meta={"package_path": zip_filepath}
+                meta={"source_folder": zip_filepath}
             )
 
     def handle_package_file(self, response):
@@ -189,7 +189,7 @@ def handle_package_file(self, response):
         for xml_file in xml_files:
             request = Request(
                 "file://{0}".format(xml_file),
-                meta={"package_path": zip_filepath}
+                meta={"source_folder": zip_filepath}
             )
             if "xml_rich" in xml_file:
                 request.meta["rich"] = True
diff --git a/hepcrawl/spiders/elsevier_spider.py b/hepcrawl/spiders/elsevier_spider.py
index 78fdd5fd..7dfbb9bb 100644
--- a/hepcrawl/spiders/elsevier_spider.py
+++ b/hepcrawl/spiders/elsevier_spider.py
@@ -181,7 +181,7 @@ def handle_package(self, response):
             xml_url = u"file://{0}".format(os.path.abspath(xml_file))
             yield Request(
                 xml_url,
-                meta={"package_path": zip_filepath,
+                meta={"source_folder": zip_filepath,
                       "xml_url": xml_url},
             )
 
diff --git a/hepcrawl/spiders/wsp_spider.py b/hepcrawl/spiders/wsp_spider.py
index 22d418a9..3e6ec655 100644
--- a/hepcrawl/spiders/wsp_spider.py
+++ b/hepcrawl/spiders/wsp_spider.py
@@ -72,7 +72,7 @@ class WorldScientificSpider(Jats, XMLFeedSpider):
         'rapid-communications'
     ]
 
-    def __init__(self, package_path=None, ftp_folder="WSP", ftp_host=None, ftp_netrc=None, *args, **kwargs):
+    def __init__(self, package_path=None, ftp_folder="/WSP", ftp_host=None, ftp_netrc=None, *args, **kwargs):
         """Construct WSP spider."""
         super(WorldScientificSpider, self).__init__(*args, **kwargs)
         self.ftp_folder = ftp_folder
@@ -98,8 +98,8 @@ def start_requests(self):
 
             new_files_paths = ftp_list_files(
                 self.ftp_folder,
-                target_folder=self.target_folder,
-                server=ftp_host,
+                destination_folder=self.target_folder,
+                ftp_host=ftp_host,
                 user=ftp_params['ftp_user'],
                 password=ftp_params['ftp_password']
             )
@@ -127,7 +127,7 @@ def handle_package_ftp(self, response):
         for xml_file in xml_files:
             yield Request(
                 "file://{0}".format(xml_file),
-                meta={"package_path": zip_filepath}
+                meta={"source_folder": zip_filepath}
             )
 
     def handle_package_file(self, response):
@@ -139,7 +139,7 @@ def handle_package_file(self, response):
         for xml_file in xml_files:
             yield Request(
                 "file://{0}".format(xml_file),
-                meta={"package_path": zip_filepath}
+                meta={"source_folder": zip_filepath}
             )
 
     def parse_node(self, response, node):
diff --git a/hepcrawl/utils.py b/hepcrawl/utils.py
index 71ff3aa6..96dc130e 100644
--- a/hepcrawl/utils.py
+++ b/hepcrawl/utils.py
@@ -31,6 +31,10 @@
 INST_PHRASES = ['for the development', ]
 
 
+class PathDoesNotExist(IOError):
+    pass
+
+
 def unzip_xml_files(filename, target_folder):
     """Unzip files (XML only) into target folder."""
     z = ZipFile(filename)
@@ -58,15 +62,19 @@ def ftp_connection_info(ftp_host, netrc_file, passive_mode=False):
 
 
 def ftp_list_files(
-        server_folder,
-        server,
-        user,
-        password,
-        target_folder=None,
-        passive_mode=False,
-        lst_missing_files=True,
+    server_folder,
+    ftp_host,
+    user,
+    password,
+    destination_folder=None,
+    passive_mode=False,
+    only_missing_files=True,
 ):
-    """List files from given FTP's server folder to target folder."""
+    """List files from given FTP's ftp_host folder to target folder.
+
+    Params:
+
+    """
     session_factory = ftputil.session.session_factory(
         base_class=ftplib.FTP,
         port=21,
@@ -74,10 +82,10 @@ def ftp_list_files(
         encrypt_data_channel=True,
     )
 
-    with ftputil.FTPHost(server, user, password, session_factory=session_factory) as host:
-        file_names = host.listdir(os.path.join(host.curdir, '/', server_folder))
-        if lst_missing_files:
-            return list_missing_files(server_folder, target_folder, file_names)
+    with ftputil.FTPHost(ftp_host, user, password, session_factory=session_factory) as host:
+        file_names = host.listdir(os.path.join(host.curdir, server_folder))
+        if only_missing_files:
+            return list_missing_files(server_folder, destination_folder, file_names)
         else:
             return [
                 os.path.join(
@@ -340,37 +348,69 @@ def get_license_by_text(license_text):
     return license
 
 
-def get_file_name_from_url(url):
-    return url.rsplit('/', 1)[-1]
-
-
 def get_absolute_file_path(file_path):
     """Returns the absolute path of a relative path."""
     return os.path.abspath(file_path)
 
 
+class RecordFile(object):
+    """Metadata of a file needed for a record.
+
+    Params:
+        path(str): local path to the file.
+        name(str): Optional, name of the file, if not passed, will use the name in the path.
+
+    Rises:
+        PathDoesNotExist:
+    """
+    def __init__(self, path, name=None):
+        self.path = path
+        if not os.path.exists(self.path):
+            raise PathDoesNotExist("The given record file path '%s' does not exist." % self.path)
+
+        if name is None:
+            name = os.path.basename(path)
+
+        self.name = name
+
+
 class ParsedItem(dict):
-    """Generate interface to communicate Spider-Pipelines"""
+    """Each of the individual items returned by the spider to the pipeline.
+
+    Params:
+        record(dict): Information about the crawled record, might be in different formats.
+        record_format(str): Format of the above record, for example ``"hep"`` or ``"hepcrawl"``.
+        file_urls(list(str)): URLs to the files to be downloaded by ``FftFilesPipeline``.
+        ftp_params(dict): Parameter for the ``FftFilesPipeline`` to be able to connect to the ftp
+            server, if any.
+        record_files(list(RecordFile)): files attached to the record, usually populated by
+            ``FftFilesPipeline`` from the ``file_urls`` parameter.
+    """
     def __init__(
             self,
-            item,
+            record,
+            record_format,
             file_urls=None,
-            item_format=None,
             ftp_params=None,
-            file_paths=None,
+            record_files=None,
             **kwargs
     ):
         super(ParsedItem, self).__init__(
-            item=item,
+            record=record,
+            record_format=record_format,
             file_urls=file_urls,
-            item_format=item_format,
             ftp_params=ftp_params,
-            file_paths=file_paths,
+            record_files=record_files,
             **kwargs
         )
-        self.item = item
-        self.file_urls = file_urls
-        self.format = item_format
-        self.ftp_params = ftp_params
-        self.file_paths = file_paths
-        self.__dict__ = self
+
+    def __getattr__(self, key):
+        if key not in self:
+            raise AttributeError(
+                "'%s' object has no attribute '%s'" % (self.__class__.__name__, key)
+            )
+
+        return self[key]
+
+    def __setattr__(self, key, value):
+        self[key] = value
diff --git a/tests/functional/desy/fixtures/ftp_server/DESY/desy_collection_records.xml b/tests/functional/desy/fixtures/ftp_server/DESY/desy_collection_records.xml
index 672b8248..4095d62f 100644
--- a/tests/functional/desy/fixtures/ftp_server/DESY/desy_collection_records.xml
+++ b/tests/functional/desy/fixtures/ftp_server/DESY/desy_collection_records.xml
@@ -124,7 +124,7 @@
             <subfield code="p">INSPIRE:HEP</subfield>
         </datafield>
         <datafield tag="FFT" ind1=" " ind2=" ">
-            <subfield code="a">DESY/FFT/test_fft_1.txt</subfield>
+            <subfield code="a">FFT/test_fft_1.txt</subfield>
             <subfield code="d">00013 Decomposition of the problematic rotation curves in our sample according to the best-fit \textsc{core}NFW models. Colors and symbols are as in Figure \ref{fig:dc14_fits}.</subfield>
             <subfield code="f">.txt</subfield>
             <subfield code="n">cNFW_rogue_curves</subfield>
@@ -135,7 +135,7 @@
             <subfield code="z"/>
         </datafield>
         <datafield tag="FFT" ind1=" " ind2=" ">
-            <subfield code="a">DESY/FFT/test_fft_2.txt</subfield>
+            <subfield code="a">FFT/test_fft_2.txt</subfield>
             <subfield code="d">00005 Comparison of the parameters of the best-fit DC14 models to the cosmological halo mass-concentration relation from \cite{dutton14} (left) and the stellar mass-halo mass relation from \cite{behroozi13} (right). The error bars correspond to the extremal values of the multidimensional 68\% confidence region for each fit. The theoretical relations are shown as red lines and their 1$\sigma$ and 2$\sigma$ scatter are represented by the dark and light grey bands, respectively. The mass-concentration relation from \cite{maccio08} and the stellar mass-halo mass relation from \cite{behroozi13} are also shown as the black dashed lines.</subfield>
             <subfield code="f">.txt</subfield>
             <subfield code="n">scalingRelations_DutBeh_DC14_all_Oh</subfield>
diff --git a/tests/functional/desy/fixtures/ftp_server/DESY/desy_no_namespace_collection_records.xml b/tests/functional/desy/fixtures/ftp_server/DESY/desy_no_namespace_collection_records.xml
index b09a992d..fa395bfc 100644
--- a/tests/functional/desy/fixtures/ftp_server/DESY/desy_no_namespace_collection_records.xml
+++ b/tests/functional/desy/fixtures/ftp_server/DESY/desy_no_namespace_collection_records.xml
@@ -124,7 +124,7 @@
             <subfield code="p">INSPIRE:HEP</subfield>
         </datafield>
         <datafield tag="FFT" ind1=" " ind2=" ">
-            <subfield code="a">DESY/FFT/test_fft_1.txt</subfield>
+            <subfield code="a">FFT/test_fft_1.txt</subfield>
             <subfield code="d">00013 Decomposition of the problematic rotation curves in our sample according to the best-fit \textsc{core}NFW models. Colors and symbols are as in Figure \ref{fig:dc14_fits}.</subfield>
             <subfield code="f">.txt</subfield>
             <subfield code="n">cNFW_rogue_curves</subfield>
@@ -135,7 +135,7 @@
             <subfield code="z"/>
         </datafield>
         <datafield tag="FFT" ind1=" " ind2=" ">
-            <subfield code="a">DESY/FFT/test_fft_2.txt</subfield>
+            <subfield code="a">FFT/test_fft_2.txt</subfield>
             <subfield code="d">00005 Comparison of the parameters of the best-fit DC14 models to the cosmological halo mass-concentration relation from \cite{dutton14} (left) and the stellar mass-halo mass relation from \cite{behroozi13} (right). The error bars correspond to the extremal values of the multidimensional 68\% confidence region for each fit. The theoretical relations are shown as red lines and their 1$\sigma$ and 2$\sigma$ scatter are represented by the dark and light grey bands, respectively. The mass-concentration relation from \cite{maccio08} and the stellar mass-halo mass relation from \cite{behroozi13} are also shown as the black dashed lines.</subfield>
             <subfield code="f">.txt</subfield>
             <subfield code="n">scalingRelations_DutBeh_DC14_all_Oh</subfield>
diff --git a/tests/functional/desy/test_desy.py b/tests/functional/desy/test_desy.py
index 64a5c209..5c3f4929 100644
--- a/tests/functional/desy/test_desy.py
+++ b/tests/functional/desy/test_desy.py
@@ -111,7 +111,7 @@ def set_up_local_environment():
         'CRAWLER_HOST_URL': 'http://scrapyd:6800',
         'CRAWLER_PROJECT': 'hepcrawl',
         'CRAWLER_ARGUMENTS': {
-            'package_path': package_location,
+            'source_folder': package_location,
         }
     }
 
diff --git a/tests/functional/wsp/test_wsp.py b/tests/functional/wsp/test_wsp.py
index a0411b8e..8c7b060a 100644
--- a/tests/functional/wsp/test_wsp.py
+++ b/tests/functional/wsp/test_wsp.py
@@ -72,7 +72,7 @@ def set_up_local_environment():
         'CRAWLER_HOST_URL': 'http://scrapyd:6800',
         'CRAWLER_PROJECT': 'hepcrawl',
         'CRAWLER_ARGUMENTS': {
-            'package_path': package_location,
+            'source_folder': package_location,
         }
     }
 
diff --git a/tests/unit/test_crawler2hep.py b/tests/unit/test_crawler2hep.py
index 95375ebf..088178f1 100644
--- a/tests/unit/test_crawler2hep.py
+++ b/tests/unit/test_crawler2hep.py
@@ -12,14 +12,14 @@
 import pytest
 import yaml
 
-from hepcrawl.crawler2hep import crawler2hep
+from hepcrawl.crawler2hep import hepcrawl_to_hep
 from hepcrawl.testlib.fixtures import get_test_suite_path
 
 
 def load_file(file_name):
     path = get_test_suite_path(
         'responses',
-        'crawler2hep',
+        'hepcrawl_to_hep',
         file_name,
     )
     with open(path) as input_data:
@@ -52,7 +52,7 @@ def test_generic_crawler_record(
         input_generic_crawler_record,
         expected_generic_crawler_record
 ):
-    produced_record = crawler2hep(input_generic_crawler_record)
+    produced_record = hepcrawl_to_hep(input_generic_crawler_record)
     assert produced_record == expected_generic_crawler_record
 
 
@@ -60,5 +60,5 @@ def test_no_document_type(
         input_no_document_type_record,
         expected_no_document_type_record
 ):
-    produced_record = crawler2hep(input_no_document_type_record)
+    produced_record = hepcrawl_to_hep(input_no_document_type_record)
     assert produced_record == expected_no_document_type_record
diff --git a/tests/unit/test_edp.py b/tests/unit/test_edp.py
index 9d88d5ad..5dba9990 100644
--- a/tests/unit/test_edp.py
+++ b/tests/unit/test_edp.py
@@ -359,7 +359,7 @@ def test_handle_package_ftp(tarbzfile):
     request = spider.handle_package_ftp(response).next()
 
     assert isinstance(request, Request)
-    assert request.meta["package_path"] == tarbzfile
+    assert request.meta["source_folder"] == tarbzfile
 
 
 def test_no_dois_jats():
diff --git a/tests/unit/test_elsevier.py b/tests/unit/test_elsevier.py
index 109f3d3f..d26e52fb 100644
--- a/tests/unit/test_elsevier.py
+++ b/tests/unit/test_elsevier.py
@@ -1594,11 +1594,11 @@ def test_handle_package(handled_package):
     for astro, nima in zip(astropart, nima):
         assert nima
         assert astro
-        assert astro.meta["package_path"] == "tests/unit/responses/elsevier/fake_astropart.zip"
+        assert astro.meta["source_folder"] == "tests/unit/responses/elsevier/fake_astropart.zip"
         url_to_match = u'file:///tmp/elsevier_fake_astropart_*/0927-6505/aip/S0927650515001656/S0927650515001656.xml'
         assert astro.meta["xml_url"] == fnmatch.filter([astro.meta["xml_url"]], url_to_match)[0]
 
-        assert nima.meta["package_path"] == "tests/unit/responses/elsevier/fake_nima.zip"
+        assert nima.meta["source_folder"] == "tests/unit/responses/elsevier/fake_nima.zip"
         url_to_match = u'file:///tmp/elsevier_fake_nima_*/0168-9002/S0168900215X00398/S0168900215015636/S0168900215015636.xml'
         assert nima.meta["xml_url"] == fnmatch.filter([nima.meta["xml_url"]], url_to_match)[0]