diff --git a/hepcrawl/crawler2hep.py b/hepcrawl/crawler2hep.py
index afbf94b1..add53f70 100644
--- a/hepcrawl/crawler2hep.py
+++ b/hepcrawl/crawler2hep.py
@@ -15,34 +15,138 @@
 
 from __future__ import absolute_import, division, print_function
 
+import os
+import datetime
+
 from inspire_schemas.api import LiteratureBuilder
 
 from hepcrawl.utils import get_file_name_from_url
 
 
-def _update_record_fft_links(record, map_fft_file_paths):
-    def _list_new_fft_links(old_fft_links, map_fft_file_paths):
-        new_fft_links = []
-        for fft_link in old_fft_links:
-            file_name = get_file_name_from_url(fft_link['path'])
-            if file_name in map_fft_file_paths:
-                new_fft_links.append(
-                    {
-                        'path': map_fft_file_paths[file_name],
-                    }
-                )
+def _update_record_fft(record, index_fft_file_paths):
+    def _update_fft_fields(fft_fields, index_fft_file_paths):
+        new_fft_fields = []
+        for fft_field in fft_fields:
+            file_name = get_file_name_from_url(fft_field['path'])
+            if file_name in index_fft_file_paths:
+                fft_field['path'] = index_fft_file_paths[file_name]
+                new_fft_fields.append(fft_field)
 
-        return new_fft_links
+        return new_fft_fields
 
-    old_fft_links = record['_fft']
-    record['_fft'] = _list_new_fft_links(old_fft_links, map_fft_file_paths)
+    record['_fft'] = _update_fft_fields(record['_fft'], index_fft_file_paths)
     return record
 
 
-def to_hep(item, item_format='hepcrawl', fft_file_paths=None):
+def _has_publication_info(item):
+    """If any publication info."""
+    return item.get('pubinfo_freetext') or item.get('journal_volume') or \
+        item.get('journal_title') or \
+        item.get('journal_year') or \
+        item.get('journal_issue') or \
+        item.get('journal_fpage') or \
+        item.get('journal_lpage') or \
+        item.get('journal_artid') or \
+        item.get('journal_doctype')
+
+
+def _filter_fields(item, keys):
+    """Filter away keys."""
+    for key in keys:
+        item.pop(key, None)
+
+
+def _normalize_hepcrawl_record(item, source):
+    if 'related_article_doi' in item:
+        item['dois'] += item.pop('related_article_doi', [])
+
+    item['titles'] = [{
+        'title': item.pop('title', ''),
+        'subtitle': item.pop('subtitle', ''),
+        'source': source,
+    }]
+
+    item['abstracts'] = [{
+        'value': item.pop('abstract', ''),
+        'source': source,
+    }]
+
+    item['imprints'] = [{
+        'date': item.pop('date_published', ''),
+    }]
+
+    item['copyright'] = [{
+        'holder': item.pop('copyright_holder', ''),
+        'year': item.pop('copyright_year', ''),
+        'statement': item.pop('copyright_statement', ''),
+        'material': item.pop('copyright_material', ''),
+    }]
+
+    if _has_publication_info(item):
+        item['publication_info'] = [{
+            'journal_title': item.pop('journal_title', ''),
+            'journal_volume': item.pop('journal_volume', ''),
+            'journal_issue': item.pop('journal_issue', ''),
+            'artid': item.pop('journal_artid', ''),
+            'page_start': item.pop('journal_fpage', ''),
+            'page_end': item.pop('journal_lpage', ''),
+            'note': item.pop('journal_doctype', ''),
+            'pubinfo_freetext': item.pop('pubinfo_freetext', ''),
+            'pubinfo_material': item.pop('pubinfo_material', ''),
+        }]
+        if item.get('journal_year'):
+            item['publication_info'][0]['year'] = int(
+                item.pop('journal_year')
+            )
+
+    # Remove any fields
+    _filter_fields(item, [
+        'journal_title',
+        'journal_volume',
+        'journal_year',
+        'journal_issue',
+        'journal_fpage',
+        'journal_lpage',
+        'journal_doctype',
+        'journal_artid',
+        'pubinfo_freetext',
+        'pubinfo_material',
+    ])
+
+    return item
+
+
+def _generate_acquisition_source(crawler_record, source):
+    crawler_record['acquisition_source'] = {
+        'source': source,
+        'method': 'hepcrawl',
+        'datetime': datetime.datetime.now().isoformat(),
+        'submission_number': os.environ.get('SCRAPY_JOB', ''),
+    }
+    return crawler_record
+
+
+def to_hep(
+        item,
+        source,
+        item_format='hepcrawl',
+        fft_file_paths=None,
+):
+    item = _generate_acquisition_source(
+        crawler_record=item,
+        source=source,
+    )
+
     if item_format == 'hep':
-        return hep2hep(item, fft_file_paths)
+        return hep2hep(
+            crawler_record=item,
+            fft_file_paths=fft_file_paths,
+        )
     elif item_format == 'hepcrawl':
+        item = _normalize_hepcrawl_record(
+            item=item,
+            source=source,
+        )
         return crawler2hep(dict(item))
     else:
         raise Exception('Unknown item_format::{}'.format(item_format))
@@ -50,7 +154,7 @@ def to_hep(item, item_format='hepcrawl', fft_file_paths=None):
 
 def hep2hep(crawler_record, fft_file_paths):
     if fft_file_paths:
-        crawler_record = _update_record_fft_links(crawler_record, fft_file_paths)
+        crawler_record = _update_record_fft(crawler_record, fft_file_paths)
 
     return crawler_record
 
diff --git a/hepcrawl/pipelines.py b/hepcrawl/pipelines.py
index 2244d255..05b61361 100644
--- a/hepcrawl/pipelines.py
+++ b/hepcrawl/pipelines.py
@@ -15,7 +15,6 @@
 
 from __future__ import absolute_import, division, print_function
 
-import datetime
 import os
 
 import requests
@@ -30,24 +29,6 @@
 from hepcrawl.utils import get_file_name_from_url
 
 
-def has_publication_info(item):
-    """If any publication info."""
-    return item.get('pubinfo_freetext') or item.get('journal_volume') or \
-        item.get('journal_title') or \
-        item.get('journal_year') or \
-        item.get('journal_issue') or \
-        item.get('journal_fpage') or \
-        item.get('journal_lpage') or \
-        item.get('journal_artid') or \
-        item.get('journal_doctype')
-
-
-def filter_fields(item, keys):
-    """Filter away keys."""
-    for key in keys:
-        item.pop(key, None)
-
-
 class FftFilesPipeline(FilesPipeline):
     """Download all the FFT files provided by record."""
 
@@ -57,10 +38,10 @@ def __init__(self, *args, **kwargs):
     def get_media_requests(self, item, info):
         """Download FFT files using FTP."""
         if item.get('file_urls'):
-            for fft_url in item.get('file_urls'):
+            for fft_url in item.file_urls:
                 yield Request(
                     url=fft_url,
-                    meta=item['ftp_params'],
+                    meta=item.ftp_params,
                 )
 
     def item_completed(self, results, item, info):
@@ -80,7 +61,7 @@ def _get_absolute_local_file_path(path):
                     get_file_name_from_url(result_data['url'])
                 ] = _get_absolute_local_file_path(result_data['path'])
 
-        item['file_paths'] = map_file_names_paths
+        item.file_paths = map_file_names_paths
 
         return item
 
@@ -95,92 +76,18 @@ def open_spider(self, spider):
         self.results_data = []
 
     def _post_enhance_item(self, item, spider):
-        def _normalize_hepcrawl_record(item, source):
-            if 'related_article_doi' in item:
-                item['dois'] += item.pop('related_article_doi', [])
-
-            item['titles'] = [{
-                'title': item.pop('title', ''),
-                'subtitle': item.pop('subtitle', ''),
-                'source': source,
-            }]
-
-            item['abstracts'] = [{
-                'value': item.pop('abstract', ''),
-                'source': source,
-            }]
-
-            item['imprints'] = [{
-                'date': item.pop('date_published', ''),
-            }]
-
-            item['copyright'] = [{
-                'holder': item.pop('copyright_holder', ''),
-                'year': item.pop('copyright_year', ''),
-                'statement': item.pop('copyright_statement', ''),
-                'material': item.pop('copyright_material', ''),
-            }]
-
-            if has_publication_info(item):
-                item['publication_info'] = [{
-                    'journal_title': item.pop('journal_title', ''),
-                    'journal_volume': item.pop('journal_volume', ''),
-                    'journal_issue': item.pop('journal_issue', ''),
-                    'artid': item.pop('journal_artid', ''),
-                    'page_start': item.pop('journal_fpage', ''),
-                    'page_end': item.pop('journal_lpage', ''),
-                    'note': item.pop('journal_doctype', ''),
-                    'pubinfo_freetext': item.pop('pubinfo_freetext', ''),
-                    'pubinfo_material': item.pop('pubinfo_material', ''),
-                }]
-                if item.get('journal_year'):
-                    item['publication_info'][0]['year'] = int(
-                        item.pop('journal_year')
-                    )
-
-            # Remove any fields
-            filter_fields(item, [
-                'journal_title',
-                'journal_volume',
-                'journal_year',
-                'journal_issue',
-                'journal_fpage',
-                'journal_lpage',
-                'journal_doctype',
-                'journal_artid',
-                'pubinfo_freetext',
-                'pubinfo_material',
-            ])
-
-            return item
-
-        fft_file_paths = item.get('file_paths')
-        item_format = item.get('format', 'hepcrawl')
-        item = item.get('record_item') if item.get('record_item') else item
-        item = self._generate_record_meta(item, spider)
+        fft_file_paths = item.file_paths
+        item_format = item.item_format
+        item = item.item if item.item else item
         source = spider.name
 
-        if item_format != 'hep':
-            item = _normalize_hepcrawl_record(
-                item=item,
-                source=source,
-            )
-
         return to_hep(
             item=item,
+            source=source,
             item_format=item_format,
             fft_file_paths=fft_file_paths,
         )
 
-    def _generate_record_meta(self, json_record, spider):
-        json_record['acquisition_source'] = {
-            'source': spider.name,
-            'method': 'hepcrawl',
-            'datetime': datetime.datetime.now().isoformat(),
-            'submission_number': os.environ.get('SCRAPY_JOB', ''),
-        }
-        return json_record
-
     def process_item(self, item, spider):
         """Convert internal format to INSPIRE data model."""
         self.count += 1
diff --git a/hepcrawl/spiders/alpha_spider.py b/hepcrawl/spiders/alpha_spider.py
index 2ab883f3..ab151fa6 100644
--- a/hepcrawl/spiders/alpha_spider.py
+++ b/hepcrawl/spiders/alpha_spider.py
@@ -20,7 +20,10 @@
 
 from ..items import HEPRecord
 from ..loaders import HEPLoader
-from ..utils import has_numbers
+from hepcrawl.utils import (
+    has_numbers,
+    ParsedItem,
+)
 
 
 class AlphaSpider(CrawlSpider):
@@ -145,4 +148,9 @@ def parse(self, response):
             record.add_value('source', 'Alpha experiment')
             record.add_value('collections', ['HEP', 'THESIS'])
 
-            yield record.load_item()
+            parsed_item = ParsedItem(
+                item=record.load_item(),
+                item_format='hepcrawl',
+            )
+
+            yield parsed_item
diff --git a/hepcrawl/spiders/aps_spider.py b/hepcrawl/spiders/aps_spider.py
index 496e2e8e..d15c690a 100644
--- a/hepcrawl/spiders/aps_spider.py
+++ b/hepcrawl/spiders/aps_spider.py
@@ -20,7 +20,12 @@
 
 from ..items import HEPRecord
 from ..loaders import HEPLoader
-from ..utils import get_licenses, get_nested, build_dict
+from hepcrawl.utils import (
+    get_licenses,
+    get_nested,
+    build_dict,
+    ParsedItem,
+)
 
 
 class APSSpider(Spider):
@@ -110,7 +115,13 @@ def parse(self, response):
             record.add_value('license', license)
 
             record.add_value('collections', ['HEP', 'Citeable', 'Published'])
-            yield record.load_item()
+
+            parsed_item = ParsedItem(
+                item=record.load_item(),
+                item_format='hepcrawl',
+            )
+
+            yield parsed_item
 
         # Pagination support. Will yield until no more "next" pages are found
         if 'Link' in response.headers:
diff --git a/hepcrawl/spiders/arxiv_spider.py b/hepcrawl/spiders/arxiv_spider.py
index d82c8318..8ab0af4f 100644
--- a/hepcrawl/spiders/arxiv_spider.py
+++ b/hepcrawl/spiders/arxiv_spider.py
@@ -17,7 +17,12 @@
 from scrapy.spiders import XMLFeedSpider
 
 from ..mappings import CONFERENCE_WORDS, THESIS_WORDS
-from ..utils import coll_cleanforthe, get_licenses, split_fullname
+from hepcrawl.utils import (
+    coll_cleanforthe,
+    get_licenses,
+    split_fullname,
+    ParsedItem,
+)
 from ..items import HEPRecord
 from ..loaders import HEPLoader
 
@@ -110,8 +115,12 @@ def parse_node(self, response, node):
         )
         record.add_value('license', license)
 
-        parsed_record = dict(record.load_item())
-        return parsed_record
+        parsed_item = ParsedItem(
+            item=record.load_item(),
+            item_format='hepcrawl',
+        )
+
+        return parsed_item
 
     def _get_authors_or_collaboration(self, node):
         """Parse authors, affiliations; extract collaboration"""
diff --git a/hepcrawl/spiders/base_spider.py b/hepcrawl/spiders/base_spider.py
index 5eb22eb7..ee3a7d47 100644
--- a/hepcrawl/spiders/base_spider.py
+++ b/hepcrawl/spiders/base_spider.py
@@ -18,7 +18,12 @@
 
 from ..items import HEPRecord
 from ..loaders import HEPLoader
-from ..utils import get_mime_type, parse_domain, get_node
+from hepcrawl.utils import (
+    get_mime_type,
+    parse_domain,
+    get_node,
+    ParsedItem,
+)
 
 
 class BaseSpider(XMLFeedSpider):
@@ -192,7 +197,13 @@ def build_item(self, response):
         record.add_value("authors", self.get_authors(node))
         record.add_value('thesis', {'degree_type': 'PhD'})
         record.add_value('collections', ['HEP', 'THESIS'])
-        return record.load_item()
+
+        parsed_item = ParsedItem(
+            item=record.load_item(),
+            item_format='hepcrawl',
+        )
+
+        return parsed_item
 
     def scrape_for_pdf(self, response):
         """Scrape splash page for any links to PDFs.
diff --git a/hepcrawl/spiders/brown_spider.py b/hepcrawl/spiders/brown_spider.py
index 6c881252..3581ee1f 100644
--- a/hepcrawl/spiders/brown_spider.py
+++ b/hepcrawl/spiders/brown_spider.py
@@ -21,7 +21,12 @@
 
 from ..items import HEPRecord
 from ..loaders import HEPLoader
-from ..utils import split_fullname, parse_domain, get_mime_type
+from hepcrawl.utils import (
+    split_fullname,
+    parse_domain,
+    get_mime_type,
+    ParsedItem,
+)
 
 
 class BrownSpider(CrawlSpider):
@@ -219,4 +224,9 @@ def build_item(self, response):
         record.add_value('thesis', response.meta.get("thesis"))
         record.add_value('collections', ['HEP', 'THESIS'])
 
-        return record.load_item()
+        parsed_item = ParsedItem(
+            item=record.load_item(),
+            item_format='hepcrawl',
+        )
+
+        yield parsed_item
diff --git a/hepcrawl/spiders/desy_spider.py b/hepcrawl/spiders/desy_spider.py
index 76886b23..5ec79da8 100644
--- a/hepcrawl/spiders/desy_spider.py
+++ b/hepcrawl/spiders/desy_spider.py
@@ -24,6 +24,9 @@
 from hepcrawl.utils import (
     ftp_list_files,
     ftp_connection_info,
+    get_absolute_file_path,
+    get_file_name_from_url,
+    ParsedItem,
 )
 
 
@@ -127,38 +130,29 @@ def parse(self, response):
         else:
             prefix_url = '{0}://{1}'.format(
                 'file',
-                '/code/tests/functional/desy/fixtures/ftp_server/',
-                # Temporary - normally the absolute path of fft_link upgrade schemas
+                '/code/tests/functional/desy/fixtures/ftp_server/',  # Temporary - Must be absolute path
             )
-            # prefix_url = '{0}://'.format('file')
 
         marcxml_records = self._get_marcxml_records(response.body)
-        hep_records = self._json_records_from_marcxml(marcxml_records)
-
-        # list_fft_old_links = []  # Enable after supporting FFT 2 dojson
-        list_fft_old_links = [
-            {
-                'path': 'FFT/test_fft_1.txt',
-            },
-            {
-                'path': 'FFT/test_fft_2.txt',
-            },
-        ]  # Temporary
+        hep_records = self._hep_records_from_marcxml(marcxml_records)
+
+        list_fft_old_links = []
         for hep_record in hep_records:
-            hep_record['_fft'] = list_fft_old_links  # Temporary
-            # list_fft_old_links.extend(json_record['_fft']) # Enable after supporting FFT 2 dojson
+            list_fft_old_links.extend(hep_record['_fft'])
 
             list_file_urls = [
                 '{0}{1}'.format(prefix_url, fft_link['path'])
                 for fft_link in hep_record['_fft']
             ]
 
-            yield {
-                'record_item': hep_record,
-                'file_urls': list_file_urls,
-                'ftp_params': ftp_params,
-                'format': 'hep',
-            }
+            parsed_item = ParsedItem(
+                item=hep_record,
+                file_urls=list_file_urls,
+                ftp_params=ftp_params,
+                item_format='hep',
+            )
+
+            yield parsed_item
 
     def handle_package_ftp(self, response):
         """Yield every XML file found."""
@@ -177,7 +171,7 @@ def _get_marcxml_records(self, response_body):
 
         return [etree.tostring(item) for item in list_items]
 
-    def _json_records_from_marcxml(self, list_marcxml_records):
+    def _hep_records_from_marcxml(self, list_marcxml_records):
         def _create_json_record(str_xml_record):
             object_record = create_record(etree.XML(str_xml_record))
             dojson_record = hep.do(object_record)
diff --git a/hepcrawl/spiders/dnb_spider.py b/hepcrawl/spiders/dnb_spider.py
index 3ac8b901..3dd50b59 100644
--- a/hepcrawl/spiders/dnb_spider.py
+++ b/hepcrawl/spiders/dnb_spider.py
@@ -16,7 +16,12 @@
 
 from ..items import HEPRecord
 from ..loaders import HEPLoader
-from ..utils import get_mime_type, parse_domain, get_node
+from hepcrawl.utils import (
+    get_mime_type,
+    parse_domain,
+    get_node,
+    ParsedItem,
+)
 
 
 class DNBSpider(XMLFeedSpider):
@@ -219,4 +224,10 @@ def build_item(self, response):
 
         record.add_value('thesis', {'degree_type': 'PhD'})
         record.add_value('collections', ['HEP', 'THESIS'])
-        return record.load_item()
+
+        parsed_item = ParsedItem(
+            item=record.load_item(),
+            item_format='hepcrawl',
+        )
+
+        return parsed_item
diff --git a/hepcrawl/spiders/edp_spider.py b/hepcrawl/spiders/edp_spider.py
index beea699d..cfb59af5 100644
--- a/hepcrawl/spiders/edp_spider.py
+++ b/hepcrawl/spiders/edp_spider.py
@@ -22,7 +22,7 @@
 from ..extractors.jats import Jats
 from ..items import HEPRecord
 from ..loaders import HEPLoader
-from ..utils import (
+from hepcrawl.utils import (
     ftp_list_files,
     ftp_connection_info,
     get_first,
@@ -30,6 +30,7 @@
     get_licenses,
     get_node,
     parse_domain,
+    ParsedItem,
 )
 
 
@@ -318,7 +319,12 @@ def build_item_rich(self, response):
             )
         record.add_value("urls", response.meta.get("urls"))
 
-        return record.load_item()
+        parsed_item = ParsedItem(
+            item=record.load_item(),
+            item_format='hepcrawl',
+        )
+
+        yield parsed_item
 
     def build_item_jats(self, response):
         """Build the final HEPRecord with JATS-format XML ('jp')."""
@@ -388,7 +394,12 @@ def build_item_jats(self, response):
         references = self._get_references(node)
         record.add_value("references", references)
 
-        return record.load_item()
+        parsed_item = ParsedItem(
+            item=record.load_item(),
+            item_format='hepcrawl',
+        )
+
+        yield parsed_item
 
     def _get_references(self, node):
         """Get the references."""
diff --git a/hepcrawl/spiders/elsevier_spider.py b/hepcrawl/spiders/elsevier_spider.py
index c9aacc00..78fdd5fd 100644
--- a/hepcrawl/spiders/elsevier_spider.py
+++ b/hepcrawl/spiders/elsevier_spider.py
@@ -25,12 +25,13 @@
 
 from ..items import HEPRecord
 from ..loaders import HEPLoader
-from ..utils import (
+from hepcrawl.utils import (
     get_first,
     get_licenses,
     has_numbers,
     range_as_string,
     unzip_xml_files,
+    ParsedItem,
 )
 
 from ..dateutils import format_year
@@ -1034,4 +1035,9 @@ def build_item(self, response):
         record.add_value('collections', self.get_collections(doctype))
         record.add_value('references', self.get_references(node))
 
-        return record.load_item()
+        parsed_item = ParsedItem(
+            item=record.load_item(),
+            item_format='hepcrawl',
+        )
+
+        return parsed_item
diff --git a/hepcrawl/spiders/hindawi_spider.py b/hepcrawl/spiders/hindawi_spider.py
index 941a3674..37871f3a 100644
--- a/hepcrawl/spiders/hindawi_spider.py
+++ b/hepcrawl/spiders/hindawi_spider.py
@@ -16,7 +16,10 @@
 
 from ..items import HEPRecord
 from ..loaders import HEPLoader
-from ..utils import get_licenses
+from hepcrawl.utils import (
+    get_licenses,
+    ParsedItem,
+)
 
 
 class HindawiSpider(XMLFeedSpider):
@@ -222,4 +225,9 @@ def parse_node(self, response, node):
         record.add_xpath('source',
                          "./datafield[@tag='260']/subfield[@code='b']/text()")
 
-        return record.load_item()
+        parsed_item = ParsedItem(
+            item=record.load_item(),
+            item_format='hepcrawl',
+        )
+
+        yield parsed_item
diff --git a/hepcrawl/spiders/infn_spider.py b/hepcrawl/spiders/infn_spider.py
index 2e970c1c..579ac65b 100644
--- a/hepcrawl/spiders/infn_spider.py
+++ b/hepcrawl/spiders/infn_spider.py
@@ -21,8 +21,10 @@
 
 from ..items import HEPRecord
 from ..loaders import HEPLoader
-from ..utils import get_temporary_file
-
+from hepcrawl.utils import (
+    get_temporary_file,
+    ParsedItem,
+)
 from ..dateutils import format_date
 
 
@@ -240,4 +242,9 @@ def build_item(self, response):
         record.add_value('source', 'INFN')
         record.add_value('collections', ['HEP', 'THESIS'])
 
-        return record.load_item()
+        parsed_item = ParsedItem(
+            item=record.load_item(),
+            item_format='hepcrawl',
+        )
+
+        return parsed_item
diff --git a/hepcrawl/spiders/iop_spider.py b/hepcrawl/spiders/iop_spider.py
index 0e3bae65..90c7809f 100644
--- a/hepcrawl/spiders/iop_spider.py
+++ b/hepcrawl/spiders/iop_spider.py
@@ -23,6 +23,7 @@
 
 from ..items import HEPRecord
 from ..loaders import HEPLoader
+from hepcrawl.utils import ParsedItem
 
 
 class IOPSpider(XMLFeedSpider, NLM):
@@ -222,4 +223,9 @@ def parse_node(self, response, node):
                 record.add_value("additional_files",
                                  self.add_fft_file(pdf_file_path, file_access, file_type))
 
-        return record.load_item()
+        parsed_item = ParsedItem(
+            item=record.load_item(),
+            item_format='hepcrawl',
+        )
+
+        return parsed_item
diff --git a/hepcrawl/spiders/magic_spider.py b/hepcrawl/spiders/magic_spider.py
index 77bf7948..1c83c829 100644
--- a/hepcrawl/spiders/magic_spider.py
+++ b/hepcrawl/spiders/magic_spider.py
@@ -18,7 +18,10 @@
 
 from ..items import HEPRecord
 from ..loaders import HEPLoader
-from ..utils import split_fullname
+from hepcrawl.utils import (
+    split_fullname,
+    ParsedItem,
+)
 
 
 class MagicSpider(XMLFeedSpider):
@@ -176,4 +179,9 @@ def build_item(self, response):
         record.add_value("additional_files", response.meta.get("files"))
         record.add_value('collections', ['HEP', 'THESIS'])
 
-        yield record.load_item()
+        parsed_item = ParsedItem(
+            item=record.load_item(),
+            item_format='hepcrawl',
+        )
+
+        yield parsed_item
diff --git a/hepcrawl/spiders/mit_spider.py b/hepcrawl/spiders/mit_spider.py
index c71234f9..4e099348 100644
--- a/hepcrawl/spiders/mit_spider.py
+++ b/hepcrawl/spiders/mit_spider.py
@@ -23,7 +23,11 @@
 
 from ..items import HEPRecord
 from ..loaders import HEPLoader
-from ..utils import get_temporary_file, split_fullname
+from hepcrawl.utils import (
+    get_temporary_file,
+    split_fullname,
+    ParsedItem,
+)
 
 
 class MITSpider(XMLFeedSpider):
@@ -223,4 +227,9 @@ def build_item(self, response):
         record.add_value('page_nr', self.get_page_nr(node))
         record.add_value('collections', ['HEP', 'THESIS'])
 
-        return record.load_item()
+        parsed_item = ParsedItem(
+            item=record.load_item(),
+            item_format='hepcrawl',
+        )
+
+        return parsed_item
diff --git a/hepcrawl/spiders/phenix_spider.py b/hepcrawl/spiders/phenix_spider.py
index 7200664e..95bc874a 100644
--- a/hepcrawl/spiders/phenix_spider.py
+++ b/hepcrawl/spiders/phenix_spider.py
@@ -18,6 +18,7 @@
 
 from ..items import HEPRecord
 from ..loaders import HEPLoader
+from hepcrawl.utils import ParsedItem
 
 
 class PhenixSpider(XMLFeedSpider):
@@ -128,4 +129,9 @@ def parse_node(self, response, node):
         record.add_value('source', 'PHENIX')
         record.add_value('collections', ['HEP', 'THESIS'])
 
-        return record.load_item()
+        parsed_item = ParsedItem(
+            item=record.load_item(),
+            item_format='hepcrawl',
+        )
+
+        return parsed_item
diff --git a/hepcrawl/spiders/phil_spider.py b/hepcrawl/spiders/phil_spider.py
index 101b1163..8a486292 100644
--- a/hepcrawl/spiders/phil_spider.py
+++ b/hepcrawl/spiders/phil_spider.py
@@ -19,7 +19,11 @@
 
 from ..items import HEPRecord
 from ..loaders import HEPLoader
-from ..utils import parse_domain, get_mime_type
+from hepcrawl.utils import (
+    parse_domain,
+    get_mime_type,
+    ParsedItem,
+)
 
 
 class PhilSpider(CrawlSpider):
@@ -160,4 +164,9 @@ def build_item(self, response):
             if not jsonrecord.get('year') == "forthcoming":
                 record.add_value('journal_year', int(jsonrecord['year']))
 
-        return record.load_item()
+        parsed_item = ParsedItem(
+            item=record.load_item(),
+            item_format='hepcrawl',
+        )
+
+        return parsed_item
diff --git a/hepcrawl/spiders/pos_spider.py b/hepcrawl/spiders/pos_spider.py
index 7d3fb87d..875dbb5e 100644
--- a/hepcrawl/spiders/pos_spider.py
+++ b/hepcrawl/spiders/pos_spider.py
@@ -13,10 +13,16 @@
 
 import re
 
+from urlparse import urljoin
+
 from scrapy import Request, Selector
 from scrapy.spiders import Spider
-from urlparse import urljoin
-from ..utils import get_licenses, get_first
+
+from hepcrawl.utils import (
+    get_licenses,
+    get_first,
+    ParsedItem,
+)
 from ..dateutils import create_valid_date
 from ..items import HEPRecord
 from ..loaders import HEPLoader
@@ -128,7 +134,13 @@ def build_item(self, response):
             record.add_value('extra_data', extra_data)
 
         record.add_value('collections', ['HEP', 'ConferencePaper'])
-        return record.load_item()
+
+        parsed_item = ParsedItem(
+            item=record.load_item(),
+            item_format='hepcrawl',
+        )
+
+        yield parsed_item
 
     def _get_ext_systems_number(self, node):
         return [
diff --git a/hepcrawl/spiders/t2k_spider.py b/hepcrawl/spiders/t2k_spider.py
index 661f0bec..97ae8202 100644
--- a/hepcrawl/spiders/t2k_spider.py
+++ b/hepcrawl/spiders/t2k_spider.py
@@ -18,7 +18,10 @@
 
 from ..items import HEPRecord
 from ..loaders import HEPLoader
-from ..utils import split_fullname
+from hepcrawl.utils import (
+    split_fullname,
+    ParsedItem,
+)
 
 
 class T2kSpider(XMLFeedSpider):
@@ -164,4 +167,9 @@ def build_item(self, response):
         record.add_value("additional_files", response.meta.get("additional_files"))
         record.add_value('collections', ['HEP', 'THESIS'])
 
-        yield record.load_item()
+        parsed_item = ParsedItem(
+            item=record.load_item(),
+            item_format='hepcrawl',
+        )
+
+        yield parsed_item
diff --git a/hepcrawl/spiders/wsp_spider.py b/hepcrawl/spiders/wsp_spider.py
index bef40a72..49b5b725 100644
--- a/hepcrawl/spiders/wsp_spider.py
+++ b/hepcrawl/spiders/wsp_spider.py
@@ -20,12 +20,13 @@
 from ..extractors.jats import Jats
 from ..items import HEPRecord
 from ..loaders import HEPLoader
-from ..utils import (
+from hepcrawl.utils import (
     ftp_list_files,
     ftp_connection_info,
     local_list_files,
     get_licenses,
     unzip_xml_files,
+    ParsedItem,
 )
 
 
@@ -148,7 +149,7 @@ def parse_node(self, response, node):
         self.log("Got article_type {0}".format(article_type))
         if article_type is None or article_type[0] not in self.allowed_article_types:
             # Filter out non-interesting article types
-            return None
+            return
 
         record = HEPLoader(item=HEPRecord(), selector=node, response=response)
         if article_type in ['correction',
@@ -203,9 +204,13 @@ def parse_node(self, response, node):
         record.add_value('license', license)
 
         record.add_value('collections', self._get_collections(node, article_type, journal_title))
-        parsed_record = dict(record.load_item())
 
-        return parsed_record
+        parsed_item = ParsedItem(
+            item=dict(record.load_item()),
+            item_format='hepcrawl',
+        )
+
+        yield parsed_item
 
     def _get_collections(self, node, article_type, current_journal_title):
         """Return this articles' collection."""
diff --git a/hepcrawl/utils.py b/hepcrawl/utils.py
index a212593e..71ff3aa6 100644
--- a/hepcrawl/utils.py
+++ b/hepcrawl/utils.py
@@ -342,3 +342,35 @@ def get_license_by_text(license_text):
 
 def get_file_name_from_url(url):
     return url.rsplit('/', 1)[-1]
+
+
+def get_absolute_file_path(file_path):
+    """Returns the absolute path of a relative path."""
+    return os.path.abspath(file_path)
+
+
+class ParsedItem(dict):
+    """Generate interface to communicate Spider-Pipelines"""
+    def __init__(
+            self,
+            item,
+            file_urls=None,
+            item_format=None,
+            ftp_params=None,
+            file_paths=None,
+            **kwargs
+    ):
+        super(ParsedItem, self).__init__(
+            item=item,
+            file_urls=file_urls,
+            item_format=item_format,
+            ftp_params=ftp_params,
+            file_paths=file_paths,
+            **kwargs
+        )
+        self.item = item
+        self.file_urls = file_urls
+        self.format = item_format
+        self.ftp_params = ftp_params
+        self.file_paths = file_paths
+        self.__dict__ = self
diff --git a/tests/functional/desy/fixtures/desy_ftp_records.json b/tests/functional/desy/fixtures/desy_ftp_records.json
index 6a9b6c62..f685a254 100644
--- a/tests/functional/desy/fixtures/desy_ftp_records.json
+++ b/tests/functional/desy/fixtures/desy_ftp_records.json
@@ -10,10 +10,21 @@
   ],
   "_fft": [
     {
-      "path": "/tmp/file_urls/full/589091f319277bfc3316338b4123b215cee402db.txt"
+      "version": 1,
+      "creation_datetime": "2017-06-27T09:43:17", "description": "00013 Decomposition of the problematic rotation curves in our sample according to the best-fit \\textsc{core}NFW models. Colors and symbols are as in Figure \\ref{fig:dc14_fits}.",
+      "format": ".txt",
+      "path": "/tmp/file_urls/full/809d9d2bebcea6eee5e400e3c49b31795a3acc3d.txt",
+      "type": "Main",
+      "filename": "cNFW_rogue_curves"
     },
     {
-      "path": "/tmp/file_urls/full/d7b1ef2d316488d23a4d66865eca3f686e29a27b.txt"
+      "version": 1,
+      "creation_datetime": "2017-06-27T09:43:16",
+      "description": "00005 Comparison of the parameters of the best-fit DC14 models to the cosmological halo mass-concentration relation from \\cite{dutton14} (left) and the stellar mass-halo mass relation from \\cite{behroozi13} (right). The error bars correspond to the extremal values of the multidimensional 68\\% confidence region for each fit. The theoretical relations are shown as red lines and their 1$\\sigma$ and 2$\\sigma$ scatter are represented by the dark and light grey bands, respectively. The mass-concentration relation from \\cite{maccio08} and the stellar mass-halo mass relation from \\cite{behroozi13} are also shown as the black dashed lines.",
+      "format": ".txt",
+      "path": "/tmp/file_urls/full/1d4b0a4eebdd03b95f882fa7feb9d3f06681ec50.txt",
+      "type": "Main",
+      "filename": "scalingRelations_DutBeh_DC14_all_Oh"
     }
   ],
   "control_number": 111111,
@@ -76,10 +87,21 @@
   ],
   "_fft": [
     {
-      "path": "/tmp/file_urls/full/589091f319277bfc3316338b4123b215cee402db.txt"
+      "version": 1,
+      "creation_datetime": "2017-06-27T09:43:17", "description": "00013 Decomposition of the problematic rotation curves in our sample according to the best-fit \\textsc{core}NFW models. Colors and symbols are as in Figure \\ref{fig:dc14_fits}.",
+      "format": ".txt",
+      "path": "/tmp/file_urls/full/809d9d2bebcea6eee5e400e3c49b31795a3acc3d.txt",
+      "type": "Main",
+      "filename": "cNFW_rogue_curves"
     },
     {
-      "path": "/tmp/file_urls/full/d7b1ef2d316488d23a4d66865eca3f686e29a27b.txt"
+      "version": 1,
+      "creation_datetime": "2017-06-27T09:43:16",
+      "description": "00005 Comparison of the parameters of the best-fit DC14 models to the cosmological halo mass-concentration relation from \\cite{dutton14} (left) and the stellar mass-halo mass relation from \\cite{behroozi13} (right). The error bars correspond to the extremal values of the multidimensional 68\\% confidence region for each fit. The theoretical relations are shown as red lines and their 1$\\sigma$ and 2$\\sigma$ scatter are represented by the dark and light grey bands, respectively. The mass-concentration relation from \\cite{maccio08} and the stellar mass-halo mass relation from \\cite{behroozi13} are also shown as the black dashed lines.",
+      "format": ".txt",
+      "path": "/tmp/file_urls/full/1d4b0a4eebdd03b95f882fa7feb9d3f06681ec50.txt",
+      "type": "Main",
+      "filename": "scalingRelations_DutBeh_DC14_all_Oh"
     }
   ],
   "control_number": 222222,
@@ -142,10 +164,21 @@
   ],
   "_fft": [
     {
-      "path": "/tmp/file_urls/full/589091f319277bfc3316338b4123b215cee402db.txt"
+      "version": 1,
+      "creation_datetime": "2017-06-27T09:43:17", "description": "00013 Decomposition of the problematic rotation curves in our sample according to the best-fit \\textsc{core}NFW models. Colors and symbols are as in Figure \\ref{fig:dc14_fits}.",
+      "format": ".txt",
+      "path": "/tmp/file_urls/full/809d9d2bebcea6eee5e400e3c49b31795a3acc3d.txt",
+      "type": "Main",
+      "filename": "cNFW_rogue_curves"
     },
     {
-      "path": "/tmp/file_urls/full/d7b1ef2d316488d23a4d66865eca3f686e29a27b.txt"
+      "version": 1,
+      "creation_datetime": "2017-06-27T09:43:16",
+      "description": "00005 Comparison of the parameters of the best-fit DC14 models to the cosmological halo mass-concentration relation from \\cite{dutton14} (left) and the stellar mass-halo mass relation from \\cite{behroozi13} (right). The error bars correspond to the extremal values of the multidimensional 68\\% confidence region for each fit. The theoretical relations are shown as red lines and their 1$\\sigma$ and 2$\\sigma$ scatter are represented by the dark and light grey bands, respectively. The mass-concentration relation from \\cite{maccio08} and the stellar mass-halo mass relation from \\cite{behroozi13} are also shown as the black dashed lines.",
+      "format": ".txt",
+      "path": "/tmp/file_urls/full/1d4b0a4eebdd03b95f882fa7feb9d3f06681ec50.txt",
+      "type": "Main",
+      "filename": "scalingRelations_DutBeh_DC14_all_Oh"
     }
   ],
   "control_number": 333333,
@@ -208,10 +241,21 @@
   ],
   "_fft": [
     {
-      "path": "/tmp/file_urls/full/589091f319277bfc3316338b4123b215cee402db.txt"
+      "version": 1,
+      "creation_datetime": "2017-06-27T09:43:17", "description": "00013 Decomposition of the problematic rotation curves in our sample according to the best-fit \\textsc{core}NFW models. Colors and symbols are as in Figure \\ref{fig:dc14_fits}.",
+      "format": ".txt",
+      "path": "/tmp/file_urls/full/809d9d2bebcea6eee5e400e3c49b31795a3acc3d.txt",
+      "type": "Main",
+      "filename": "cNFW_rogue_curves"
     },
     {
-      "path": "/tmp/file_urls/full/d7b1ef2d316488d23a4d66865eca3f686e29a27b.txt"
+      "version": 1,
+      "creation_datetime": "2017-06-27T09:43:16",
+      "description": "00005 Comparison of the parameters of the best-fit DC14 models to the cosmological halo mass-concentration relation from \\cite{dutton14} (left) and the stellar mass-halo mass relation from \\cite{behroozi13} (right). The error bars correspond to the extremal values of the multidimensional 68\\% confidence region for each fit. The theoretical relations are shown as red lines and their 1$\\sigma$ and 2$\\sigma$ scatter are represented by the dark and light grey bands, respectively. The mass-concentration relation from \\cite{maccio08} and the stellar mass-halo mass relation from \\cite{behroozi13} are also shown as the black dashed lines.",
+      "format": ".txt",
+      "path": "/tmp/file_urls/full/1d4b0a4eebdd03b95f882fa7feb9d3f06681ec50.txt",
+      "type": "Main",
+      "filename": "scalingRelations_DutBeh_DC14_all_Oh"
     }
   ],
   "control_number": 444444,
diff --git a/tests/functional/desy/fixtures/desy_local_records.json b/tests/functional/desy/fixtures/desy_local_records.json
index 57d780e9..6fe2c4d0 100644
--- a/tests/functional/desy/fixtures/desy_local_records.json
+++ b/tests/functional/desy/fixtures/desy_local_records.json
@@ -10,10 +10,21 @@
   ],
   "_fft": [
     {
-      "path": "/tmp/file_urls/full/bc8e08681ec71885835e07aab1243b0dccf08f1d.txt"
+      "version": 1,
+      "creation_datetime": "2017-06-27T09:43:17", "description": "00013 Decomposition of the problematic rotation curves in our sample according to the best-fit \\textsc{core}NFW models. Colors and symbols are as in Figure \\ref{fig:dc14_fits}.",
+      "format": ".txt",
+      "path": "/tmp/file_urls/full/49e42fc70c5d7b0cd9dc7aa5defa12ded530e135.txt",
+      "type": "Main",
+      "filename": "cNFW_rogue_curves"
     },
     {
-      "path": "/tmp/file_urls/full/d341fc9296aafc16c169492c9cd2f80c19df6d9c.txt"
+      "version": 1,
+      "creation_datetime": "2017-06-27T09:43:16",
+      "description": "00005 Comparison of the parameters of the best-fit DC14 models to the cosmological halo mass-concentration relation from \\cite{dutton14} (left) and the stellar mass-halo mass relation from \\cite{behroozi13} (right). The error bars correspond to the extremal values of the multidimensional 68\\% confidence region for each fit. The theoretical relations are shown as red lines and their 1$\\sigma$ and 2$\\sigma$ scatter are represented by the dark and light grey bands, respectively. The mass-concentration relation from \\cite{maccio08} and the stellar mass-halo mass relation from \\cite{behroozi13} are also shown as the black dashed lines.",
+      "format": ".txt",
+      "path": "/tmp/file_urls/full/c1cdb1640202896b1ffc446f20d0d660977fc2db.txt",
+      "type": "Main",
+      "filename": "scalingRelations_DutBeh_DC14_all_Oh"
     }
   ],
   "control_number": 111111,
@@ -76,10 +87,21 @@
   ],
   "_fft": [
     {
-      "path": "/tmp/file_urls/full/bc8e08681ec71885835e07aab1243b0dccf08f1d.txt"
+      "version": 1,
+      "creation_datetime": "2017-06-27T09:43:17", "description": "00013 Decomposition of the problematic rotation curves in our sample according to the best-fit \\textsc{core}NFW models. Colors and symbols are as in Figure \\ref{fig:dc14_fits}.",
+      "format": ".txt",
+      "path": "/tmp/file_urls/full/49e42fc70c5d7b0cd9dc7aa5defa12ded530e135.txt",
+      "type": "Main",
+      "filename": "cNFW_rogue_curves"
     },
     {
-      "path": "/tmp/file_urls/full/d341fc9296aafc16c169492c9cd2f80c19df6d9c.txt"
+      "version": 1,
+      "creation_datetime": "2017-06-27T09:43:16",
+      "description": "00005 Comparison of the parameters of the best-fit DC14 models to the cosmological halo mass-concentration relation from \\cite{dutton14} (left) and the stellar mass-halo mass relation from \\cite{behroozi13} (right). The error bars correspond to the extremal values of the multidimensional 68\\% confidence region for each fit. The theoretical relations are shown as red lines and their 1$\\sigma$ and 2$\\sigma$ scatter are represented by the dark and light grey bands, respectively. The mass-concentration relation from \\cite{maccio08} and the stellar mass-halo mass relation from \\cite{behroozi13} are also shown as the black dashed lines.",
+      "format": ".txt",
+      "path": "/tmp/file_urls/full/c1cdb1640202896b1ffc446f20d0d660977fc2db.txt",
+      "type": "Main",
+      "filename": "scalingRelations_DutBeh_DC14_all_Oh"
     }
   ],
   "control_number": 222222,
@@ -142,10 +164,21 @@
   ],
   "_fft": [
     {
-      "path": "/tmp/file_urls/full/bc8e08681ec71885835e07aab1243b0dccf08f1d.txt"
+      "version": 1,
+      "creation_datetime": "2017-06-27T09:43:17", "description": "00013 Decomposition of the problematic rotation curves in our sample according to the best-fit \\textsc{core}NFW models. Colors and symbols are as in Figure \\ref{fig:dc14_fits}.",
+      "format": ".txt",
+      "path": "/tmp/file_urls/full/49e42fc70c5d7b0cd9dc7aa5defa12ded530e135.txt",
+      "type": "Main",
+      "filename": "cNFW_rogue_curves"
     },
     {
-      "path": "/tmp/file_urls/full/d341fc9296aafc16c169492c9cd2f80c19df6d9c.txt"
+      "version": 1,
+      "creation_datetime": "2017-06-27T09:43:16",
+      "description": "00005 Comparison of the parameters of the best-fit DC14 models to the cosmological halo mass-concentration relation from \\cite{dutton14} (left) and the stellar mass-halo mass relation from \\cite{behroozi13} (right). The error bars correspond to the extremal values of the multidimensional 68\\% confidence region for each fit. The theoretical relations are shown as red lines and their 1$\\sigma$ and 2$\\sigma$ scatter are represented by the dark and light grey bands, respectively. The mass-concentration relation from \\cite{maccio08} and the stellar mass-halo mass relation from \\cite{behroozi13} are also shown as the black dashed lines.",
+      "format": ".txt",
+      "path": "/tmp/file_urls/full/c1cdb1640202896b1ffc446f20d0d660977fc2db.txt",
+      "type": "Main",
+      "filename": "scalingRelations_DutBeh_DC14_all_Oh"
     }
   ],
   "control_number": 333333,
@@ -208,10 +241,21 @@
   ],
   "_fft": [
     {
-      "path": "/tmp/file_urls/full/bc8e08681ec71885835e07aab1243b0dccf08f1d.txt"
+      "version": 1,
+      "creation_datetime": "2017-06-27T09:43:17", "description": "00013 Decomposition of the problematic rotation curves in our sample according to the best-fit \\textsc{core}NFW models. Colors and symbols are as in Figure \\ref{fig:dc14_fits}.",
+      "format": ".txt",
+      "path": "/tmp/file_urls/full/49e42fc70c5d7b0cd9dc7aa5defa12ded530e135.txt",
+      "type": "Main",
+      "filename": "cNFW_rogue_curves"
     },
     {
-      "path": "/tmp/file_urls/full/d341fc9296aafc16c169492c9cd2f80c19df6d9c.txt"
+      "version": 1,
+      "creation_datetime": "2017-06-27T09:43:16",
+      "description": "00005 Comparison of the parameters of the best-fit DC14 models to the cosmological halo mass-concentration relation from \\cite{dutton14} (left) and the stellar mass-halo mass relation from \\cite{behroozi13} (right). The error bars correspond to the extremal values of the multidimensional 68\\% confidence region for each fit. The theoretical relations are shown as red lines and their 1$\\sigma$ and 2$\\sigma$ scatter are represented by the dark and light grey bands, respectively. The mass-concentration relation from \\cite{maccio08} and the stellar mass-halo mass relation from \\cite{behroozi13} are also shown as the black dashed lines.",
+      "format": ".txt",
+      "path": "/tmp/file_urls/full/c1cdb1640202896b1ffc446f20d0d660977fc2db.txt",
+      "type": "Main",
+      "filename": "scalingRelations_DutBeh_DC14_all_Oh"
     }
   ],
   "control_number": 444444,
diff --git a/tests/functional/desy/fixtures/ftp_server/FFT/test_fft_1.txt b/tests/functional/desy/fixtures/ftp_server/DESY/FFT/test_fft_1.txt
similarity index 100%
rename from tests/functional/desy/fixtures/ftp_server/FFT/test_fft_1.txt
rename to tests/functional/desy/fixtures/ftp_server/DESY/FFT/test_fft_1.txt
diff --git a/tests/functional/desy/fixtures/ftp_server/FFT/test_fft_2.txt b/tests/functional/desy/fixtures/ftp_server/DESY/FFT/test_fft_2.txt
similarity index 100%
rename from tests/functional/desy/fixtures/ftp_server/FFT/test_fft_2.txt
rename to tests/functional/desy/fixtures/ftp_server/DESY/FFT/test_fft_2.txt
diff --git a/tests/functional/desy/fixtures/ftp_server/DESY/desy_collection_records.xml b/tests/functional/desy/fixtures/ftp_server/DESY/desy_collection_records.xml
index 5a57f51c..6900d746 100644
--- a/tests/functional/desy/fixtures/ftp_server/DESY/desy_collection_records.xml
+++ b/tests/functional/desy/fixtures/ftp_server/DESY/desy_collection_records.xml
@@ -50,6 +50,28 @@
             <subfield code="o">oai:inspirehep.net:1608652</subfield>
             <subfield code="p">INSPIRE:HEP</subfield>
         </datafield>
+        <datafield tag="FFT" ind1=" " ind2=" ">
+            <subfield code="a">DESY/FFT/test_fft_1.txt</subfield>
+            <subfield code="d">00013 Decomposition of the problematic rotation curves in our sample according to the best-fit \textsc{core}NFW models. Colors and symbols are as in Figure \ref{fig:dc14_fits}.</subfield>
+            <subfield code="f">.txt</subfield>
+            <subfield code="n">cNFW_rogue_curves</subfield>
+            <subfield code="r"/>
+            <subfield code="s">2017-06-27 09:43:17</subfield>
+            <subfield code="t">Main</subfield>
+            <subfield code="v">1</subfield>
+            <subfield code="z"/>
+        </datafield>
+        <datafield tag="FFT" ind1=" " ind2=" ">
+            <subfield code="a">DESY/FFT/test_fft_2.txt</subfield>
+            <subfield code="d">00005 Comparison of the parameters of the best-fit DC14 models to the cosmological halo mass-concentration relation from \cite{dutton14} (left) and the stellar mass-halo mass relation from \cite{behroozi13} (right). The error bars correspond to the extremal values of the multidimensional 68\% confidence region for each fit. The theoretical relations are shown as red lines and their 1$\sigma$ and 2$\sigma$ scatter are represented by the dark and light grey bands, respectively. The mass-concentration relation from \cite{maccio08} and the stellar mass-halo mass relation from \cite{behroozi13} are also shown as the black dashed lines.</subfield>
+            <subfield code="f">.txt</subfield>
+            <subfield code="n">scalingRelations_DutBeh_DC14_all_Oh</subfield>
+            <subfield code="r"/>
+            <subfield code="s">2017-06-27 09:43:16</subfield>
+            <subfield code="t">Main</subfield>
+            <subfield code="v">1</subfield>
+            <subfield code="z"/>
+        </datafield>
     </record>
     <record>
         <controlfield tag="001">222222</controlfield>
@@ -101,5 +123,27 @@
             <subfield code="o">oai:inspirehep.net:1608652</subfield>
             <subfield code="p">INSPIRE:HEP</subfield>
         </datafield>
+        <datafield tag="FFT" ind1=" " ind2=" ">
+            <subfield code="a">DESY/FFT/test_fft_1.txt</subfield>
+            <subfield code="d">00013 Decomposition of the problematic rotation curves in our sample according to the best-fit \textsc{core}NFW models. Colors and symbols are as in Figure \ref{fig:dc14_fits}.</subfield>
+            <subfield code="f">.txt</subfield>
+            <subfield code="n">cNFW_rogue_curves</subfield>
+            <subfield code="r"/>
+            <subfield code="s">2017-06-27 09:43:17</subfield>
+            <subfield code="t">Main</subfield>
+            <subfield code="v">1</subfield>
+            <subfield code="z"/>
+        </datafield>
+        <datafield tag="FFT" ind1=" " ind2=" ">
+            <subfield code="a">DESY/FFT/test_fft_2.txt</subfield>
+            <subfield code="d">00005 Comparison of the parameters of the best-fit DC14 models to the cosmological halo mass-concentration relation from \cite{dutton14} (left) and the stellar mass-halo mass relation from \cite{behroozi13} (right). The error bars correspond to the extremal values of the multidimensional 68\% confidence region for each fit. The theoretical relations are shown as red lines and their 1$\sigma$ and 2$\sigma$ scatter are represented by the dark and light grey bands, respectively. The mass-concentration relation from \cite{maccio08} and the stellar mass-halo mass relation from \cite{behroozi13} are also shown as the black dashed lines.</subfield>
+            <subfield code="f">.txt</subfield>
+            <subfield code="n">scalingRelations_DutBeh_DC14_all_Oh</subfield>
+            <subfield code="r"/>
+            <subfield code="s">2017-06-27 09:43:16</subfield>
+            <subfield code="t">Main</subfield>
+            <subfield code="v">1</subfield>
+            <subfield code="z"/>
+        </datafield>
     </record>
 </collection>
\ No newline at end of file
diff --git a/tests/functional/desy/fixtures/ftp_server/DESY/desy_no_namespace_collection_records.xml b/tests/functional/desy/fixtures/ftp_server/DESY/desy_no_namespace_collection_records.xml
index 44266cd4..2067b5e7 100644
--- a/tests/functional/desy/fixtures/ftp_server/DESY/desy_no_namespace_collection_records.xml
+++ b/tests/functional/desy/fixtures/ftp_server/DESY/desy_no_namespace_collection_records.xml
@@ -50,6 +50,28 @@
             <subfield code="o">oai:inspirehep.net:1608652</subfield>
             <subfield code="p">INSPIRE:HEP</subfield>
         </datafield>
+        <datafield tag="FFT" ind1=" " ind2=" ">
+            <subfield code="a">DESY/FFT/test_fft_1.txt</subfield>
+            <subfield code="d">00013 Decomposition of the problematic rotation curves in our sample according to the best-fit \textsc{core}NFW models. Colors and symbols are as in Figure \ref{fig:dc14_fits}.</subfield>
+            <subfield code="f">.txt</subfield>
+            <subfield code="n">cNFW_rogue_curves</subfield>
+            <subfield code="r"/>
+            <subfield code="s">2017-06-27 09:43:17</subfield>
+            <subfield code="t">Main</subfield>
+            <subfield code="v">1</subfield>
+            <subfield code="z"/>
+        </datafield>
+        <datafield tag="FFT" ind1=" " ind2=" ">
+            <subfield code="a">DESY/FFT/test_fft_2.txt</subfield>
+            <subfield code="d">00005 Comparison of the parameters of the best-fit DC14 models to the cosmological halo mass-concentration relation from \cite{dutton14} (left) and the stellar mass-halo mass relation from \cite{behroozi13} (right). The error bars correspond to the extremal values of the multidimensional 68\% confidence region for each fit. The theoretical relations are shown as red lines and their 1$\sigma$ and 2$\sigma$ scatter are represented by the dark and light grey bands, respectively. The mass-concentration relation from \cite{maccio08} and the stellar mass-halo mass relation from \cite{behroozi13} are also shown as the black dashed lines.</subfield>
+            <subfield code="f">.txt</subfield>
+            <subfield code="n">scalingRelations_DutBeh_DC14_all_Oh</subfield>
+            <subfield code="r"/>
+            <subfield code="s">2017-06-27 09:43:16</subfield>
+            <subfield code="t">Main</subfield>
+            <subfield code="v">1</subfield>
+            <subfield code="z"/>
+        </datafield>
     </record>
     <record>
         <controlfield tag="001">444444</controlfield>
@@ -101,5 +123,27 @@
             <subfield code="o">oai:inspirehep.net:1608652</subfield>
             <subfield code="p">INSPIRE:HEP</subfield>
         </datafield>
+        <datafield tag="FFT" ind1=" " ind2=" ">
+            <subfield code="a">DESY/FFT/test_fft_1.txt</subfield>
+            <subfield code="d">00013 Decomposition of the problematic rotation curves in our sample according to the best-fit \textsc{core}NFW models. Colors and symbols are as in Figure \ref{fig:dc14_fits}.</subfield>
+            <subfield code="f">.txt</subfield>
+            <subfield code="n">cNFW_rogue_curves</subfield>
+            <subfield code="r"/>
+            <subfield code="s">2017-06-27 09:43:17</subfield>
+            <subfield code="t">Main</subfield>
+            <subfield code="v">1</subfield>
+            <subfield code="z"/>
+        </datafield>
+        <datafield tag="FFT" ind1=" " ind2=" ">
+            <subfield code="a">DESY/FFT/test_fft_2.txt</subfield>
+            <subfield code="d">00005 Comparison of the parameters of the best-fit DC14 models to the cosmological halo mass-concentration relation from \cite{dutton14} (left) and the stellar mass-halo mass relation from \cite{behroozi13} (right). The error bars correspond to the extremal values of the multidimensional 68\% confidence region for each fit. The theoretical relations are shown as red lines and their 1$\sigma$ and 2$\sigma$ scatter are represented by the dark and light grey bands, respectively. The mass-concentration relation from \cite{maccio08} and the stellar mass-halo mass relation from \cite{behroozi13} are also shown as the black dashed lines.</subfield>
+            <subfield code="f">.txt</subfield>
+            <subfield code="n">scalingRelations_DutBeh_DC14_all_Oh</subfield>
+            <subfield code="r"/>
+            <subfield code="s">2017-06-27 09:43:16</subfield>
+            <subfield code="t">Main</subfield>
+            <subfield code="v">1</subfield>
+            <subfield code="z"/>
+        </datafield>
     </record>
 </collection>
\ No newline at end of file
diff --git a/tests/functional/desy/test_desy.py b/tests/functional/desy/test_desy.py
index 2b4d8c71..321766f9 100644
--- a/tests/functional/desy/test_desy.py
+++ b/tests/functional/desy/test_desy.py
@@ -51,6 +51,7 @@ def get_fft_1_path():
         'desy',
         'fixtures',
         'ftp_server',
+        'DESY',
         'FFT',
         'test_fft_1.txt',
         test_suite='functional',
@@ -63,6 +64,7 @@ def get_fft_2_path():
         'desy',
         'fixtures',
         'ftp_server',
+        'DESY',
         'FFT',
         'test_fft_2.txt',
         test_suite='functional',
@@ -159,8 +161,8 @@ def test_desy_ftp(
     for record in expected_results:
         fft_file_paths = sorted(record['_fft'])
 
-        assert compare_two_files_using_md5(fft_file_paths[0]['path'], get_fft_1_path)
-        assert compare_two_files_using_md5(fft_file_paths[1]['path'], get_fft_2_path)
+        assert compare_two_files_using_md5(fft_file_paths[0]['path'], get_fft_2_path)
+        assert compare_two_files_using_md5(fft_file_paths[1]['path'], get_fft_1_path)
 
 
 @pytest.mark.parametrize(
@@ -205,6 +207,6 @@ def test_desy_local_package_path(
     for record in expected_results:
         fft_file_paths = sorted(record['_fft'])
 
-        assert compare_two_files_using_md5(fft_file_paths[0]['path'], get_fft_1_path)
-        assert compare_two_files_using_md5(fft_file_paths[1]['path'], get_fft_2_path)
-
+        assert compare_two_files_using_md5(fft_file_paths[0]['path'], get_fft_2_path)
+        assert compare_two_files_using_md5(fft_file_paths[1]['path'], get_fft_1_path)
+#
diff --git a/tests/unit/responses/desy/desy_collection_records.xml b/tests/unit/responses/desy/desy_collection_records.xml
index 5a57f51c..d2086694 100644
--- a/tests/unit/responses/desy/desy_collection_records.xml
+++ b/tests/unit/responses/desy/desy_collection_records.xml
@@ -50,6 +50,28 @@
             <subfield code="o">oai:inspirehep.net:1608652</subfield>
             <subfield code="p">INSPIRE:HEP</subfield>
         </datafield>
+        <datafield tag="FFT" ind1=" " ind2=" ">
+            <subfield code="a">DESY/FFT/test_fft_1.txt;1</subfield>
+            <subfield code="d">00013 Decomposition of the problematic rotation curves in our sample according to the best-fit \textsc{core}NFW models. Colors and symbols are as in Figure \ref{fig:dc14_fits}.</subfield>
+            <subfield code="f">.txt</subfield>
+            <subfield code="n">cNFW_rogue_curves</subfield>
+            <subfield code="r"/>
+            <subfield code="s">2017-06-27 09:43:17</subfield>
+            <subfield code="t">Main</subfield>
+            <subfield code="v">1</subfield>
+            <subfield code="z"/>
+        </datafield>
+        <datafield tag="FFT" ind1=" " ind2=" ">
+            <subfield code="a">DESY/FFT/test_fft_2.txt;1</subfield>
+            <subfield code="d">00005 Comparison of the parameters of the best-fit DC14 models to the cosmological halo mass-concentration relation from \cite{dutton14} (left) and the stellar mass-halo mass relation from \cite{behroozi13} (right). The error bars correspond to the extremal values of the multidimensional 68\% confidence region for each fit. The theoretical relations are shown as red lines and their 1$\sigma$ and 2$\sigma$ scatter are represented by the dark and light grey bands, respectively. The mass-concentration relation from \cite{maccio08} and the stellar mass-halo mass relation from \cite{behroozi13} are also shown as the black dashed lines.</subfield>
+            <subfield code="f">.txt</subfield>
+            <subfield code="n">scalingRelations_DutBeh_DC14_all_Oh</subfield>
+            <subfield code="r"/>
+            <subfield code="s">2017-06-27 09:43:16</subfield>
+            <subfield code="t">Main</subfield>
+            <subfield code="v">1</subfield>
+            <subfield code="z"/>
+        </datafield>
     </record>
     <record>
         <controlfield tag="001">222222</controlfield>
@@ -100,6 +122,28 @@
         <datafield tag="909" ind1="C" ind2="O">
             <subfield code="o">oai:inspirehep.net:1608652</subfield>
             <subfield code="p">INSPIRE:HEP</subfield>
+        </datafield>
+                <datafield tag="FFT" ind1=" " ind2=" ">
+            <subfield code="a">DESY/FFT/test_fft_1.txt;1</subfield>
+            <subfield code="d">00013 Decomposition of the problematic rotation curves in our sample according to the best-fit \textsc{core}NFW models. Colors and symbols are as in Figure \ref{fig:dc14_fits}.</subfield>
+            <subfield code="f">.txt</subfield>
+            <subfield code="n">cNFW_rogue_curves</subfield>
+            <subfield code="r"/>
+            <subfield code="s">2017-06-27 09:43:17</subfield>
+            <subfield code="t">Main</subfield>
+            <subfield code="v">1</subfield>
+            <subfield code="z"/>
+        </datafield>
+        <datafield tag="FFT" ind1=" " ind2=" ">
+            <subfield code="a">DESY/FFT/test_fft_2.txt;1</subfield>
+            <subfield code="d">00005 Comparison of the parameters of the best-fit DC14 models to the cosmological halo mass-concentration relation from \cite{dutton14} (left) and the stellar mass-halo mass relation from \cite{behroozi13} (right). The error bars correspond to the extremal values of the multidimensional 68\% confidence region for each fit. The theoretical relations are shown as red lines and their 1$\sigma$ and 2$\sigma$ scatter are represented by the dark and light grey bands, respectively. The mass-concentration relation from \cite{maccio08} and the stellar mass-halo mass relation from \cite{behroozi13} are also shown as the black dashed lines.</subfield>
+            <subfield code="f">.txt</subfield>
+            <subfield code="n">scalingRelations_DutBeh_DC14_all_Oh</subfield>
+            <subfield code="r"/>
+            <subfield code="s">2017-06-27 09:43:16</subfield>
+            <subfield code="t">Main</subfield>
+            <subfield code="v">1</subfield>
+            <subfield code="z"/>
         </datafield>
     </record>
 </collection>
\ No newline at end of file
diff --git a/tests/unit/responses/desy/desy_record.xml b/tests/unit/responses/desy/desy_record.xml
index 8219064f..437c45b3 100644
--- a/tests/unit/responses/desy/desy_record.xml
+++ b/tests/unit/responses/desy/desy_record.xml
@@ -50,5 +50,27 @@
             <subfield code="o">oai:inspirehep.net:1608652</subfield>
             <subfield code="p">INSPIRE:HEP</subfield>
         </datafield>
+        <datafield tag="FFT" ind1=" " ind2=" ">
+            <subfield code="a">DESY/FFT/test_fft_1.txt;1</subfield>
+            <subfield code="d">00013 Decomposition of the problematic rotation curves in our sample according to the best-fit \textsc{core}NFW models. Colors and symbols are as in Figure \ref{fig:dc14_fits}.</subfield>
+            <subfield code="f">.txt</subfield>
+            <subfield code="n">cNFW_rogue_curves</subfield>
+            <subfield code="r"/>
+            <subfield code="s">2017-06-27 09:43:17</subfield>
+            <subfield code="t">Main</subfield>
+            <subfield code="v">1</subfield>
+            <subfield code="z"/>
+        </datafield>
+        <datafield tag="FFT" ind1=" " ind2=" ">
+            <subfield code="a">DESY/FFT/test_fft_2.txt;1</subfield>
+            <subfield code="d">00005 Comparison of the parameters of the best-fit DC14 models to the cosmological halo mass-concentration relation from \cite{dutton14} (left) and the stellar mass-halo mass relation from \cite{behroozi13} (right). The error bars correspond to the extremal values of the multidimensional 68\% confidence region for each fit. The theoretical relations are shown as red lines and their 1$\sigma$ and 2$\sigma$ scatter are represented by the dark and light grey bands, respectively. The mass-concentration relation from \cite{maccio08} and the stellar mass-halo mass relation from \cite{behroozi13} are also shown as the black dashed lines.</subfield>
+            <subfield code="f">.txt</subfield>
+            <subfield code="n">scalingRelations_DutBeh_DC14_all_Oh</subfield>
+            <subfield code="r"/>
+            <subfield code="s">2017-06-27 09:43:16</subfield>
+            <subfield code="t">Main</subfield>
+            <subfield code="v">1</subfield>
+            <subfield code="z"/>
+        </datafield>
     </record>
 </collection>
\ No newline at end of file
diff --git a/tests/unit/test_alpha.py b/tests/unit/test_alpha.py
index eef140b1..ad8f3f03 100644
--- a/tests/unit/test_alpha.py
+++ b/tests/unit/test_alpha.py
@@ -20,13 +20,15 @@
 def results():
     """Return results generator from the Alpha spider."""
     spider = alpha_spider.AlphaSpider()
-    records = list(
+    parsed_items = list(
         spider.parse(
             fake_response_from_file('alpha/test_1.htm')
         )
     )
 
+    records = [parsed_item.item for parsed_item in parsed_items]
     assert records
+
     return records
 
 
diff --git a/tests/unit/test_aps.py b/tests/unit/test_aps.py
index eb53269d..8bc66033 100644
--- a/tests/unit/test_aps.py
+++ b/tests/unit/test_aps.py
@@ -21,7 +21,7 @@ def results():
     from scrapy.http import TextResponse
 
     spider = aps_spider.APSSpider()
-    records = list(
+    parsed_items = list(
         spider.parse(
             fake_response_from_file(
                 'aps/aps_single_response.json',
@@ -30,6 +30,8 @@ def results():
         )
     )
 
+    records = [parsed_item.item for parsed_item in parsed_items]
+
     assert records
     return records
 
diff --git a/tests/unit/test_arxiv_all.py b/tests/unit/test_arxiv_all.py
index d395c494..47ea20db 100644
--- a/tests/unit/test_arxiv_all.py
+++ b/tests/unit/test_arxiv_all.py
@@ -35,7 +35,7 @@ def _get_processed_item(record, spider):
         item = pipeline.process_item(record, spider)
         return item
 
-    records = list(
+    parsed_items = list(
         spider.parse(
             fake_response_from_file(
                 'arxiv/sample_arxiv_record.xml',
@@ -44,10 +44,10 @@ def _get_processed_item(record, spider):
         )
     )
 
-    assert records
     pipeline = InspireCeleryPushPipeline()
     pipeline.open_spider(spider)
-    return [_get_processed_item(record, spider) for record in records]
+
+    return [_get_processed_item(parsed_item, spider) for parsed_item in parsed_items]
 
 
 def test_page_nr(many_results):
diff --git a/tests/unit/test_arxiv_single.py b/tests/unit/test_arxiv_single.py
index b7ca410d..d8e6f9e5 100644
--- a/tests/unit/test_arxiv_single.py
+++ b/tests/unit/test_arxiv_single.py
@@ -31,7 +31,7 @@ def _get_processed_item(record, spider):
 
     crawler = Crawler(spidercls=arxiv_spider.ArxivSpider)
     spider = arxiv_spider.ArxivSpider.from_crawler(crawler)
-    records = list(
+    parsed_items = list(
         spider.parse(
             fake_response_from_file(
                 'arxiv/sample_arxiv_record0.xml',
@@ -40,11 +40,10 @@ def _get_processed_item(record, spider):
         )
     )
 
-    assert records
     pipeline = InspireCeleryPushPipeline()
     pipeline.open_spider(spider)
 
-    return [_get_processed_item(record, spider) for record in records]
+    return [_get_processed_item(parsed_item, spider) for parsed_item in parsed_items]
 
 
 
diff --git a/tests/unit/test_base.py b/tests/unit/test_base.py
index cc6ef093..48551cdf 100644
--- a/tests/unit/test_base.py
+++ b/tests/unit/test_base.py
@@ -38,9 +38,11 @@ def record():
     nodes = selector.xpath('.//%s' % spider.itertag)
     response.meta["record"] = nodes[0].extract()
     response.meta["urls"] = ["http://hdl.handle.net/1885/10005"]
-    parsed_record = spider.build_item(response)
-    assert parsed_record
-    return parsed_record
+
+    parsed_item = spider.build_item(response)
+    assert parsed_item
+
+    return parsed_item.item
 
 
 @pytest.fixture
@@ -169,7 +171,10 @@ def splash():
                 'Content-Type': 'text/html',
             },
         )
-        return spider.scrape_for_pdf(splash_response)
+
+        parsed_item = spider.scrape_for_pdf(splash_response)
+
+        return parsed_item.item
 
 
 def test_splash(splash):
@@ -201,7 +206,10 @@ def parsed_node():
     response = fake_response_from_string(text=body)
     node = get_node(spider, 'OAI-PMH:record', text=body)
     response.meta["record"] = node[0].extract()
-    return spider.parse_node(response, node[0])
+
+    parsed_item = spider.parse_node(response, node[0])
+
+    return parsed_item.item
 
 
 def test_parsed_node(parsed_node):
diff --git a/tests/unit/test_brown.py b/tests/unit/test_brown.py
index 0b42b4df..41e3902d 100644
--- a/tests/unit/test_brown.py
+++ b/tests/unit/test_brown.py
@@ -41,10 +41,12 @@ def record():
 
         splash_response = fake_response_from_file('brown/test_splash.html')
         splash_response.meta["jsonrecord"] = jsonrecord
-        parsed_record = spider.scrape_splash(splash_response)
+        iter_item = spider.scrape_splash(splash_response)
 
-        assert parsed_record
-        return parsed_record
+        parsed_item = iter_item.next()
+        assert parsed_item
+
+        return parsed_item.item
 
 
 @pytest.fixture
@@ -200,7 +202,12 @@ def parsed_node_no_splash():
     jsonrecord = jsonresponse["items"]["docs"][0]
     response.meta["jsonrecord"] = jsonrecord
 
-    return spider.parse(response).next()
+    iter_item = spider.parse(response).next()
+
+    parsed_item = iter_item.next()
+    assert parsed_item
+
+    return parsed_item.item
 
 
 def test_no_splash(parsed_node_no_splash):
diff --git a/tests/unit/test_desy.py b/tests/unit/test_desy.py
index 73999356..5b01f7fd 100644
--- a/tests/unit/test_desy.py
+++ b/tests/unit/test_desy.py
@@ -82,10 +82,34 @@ def test_pipeline_record(generated_record):
         ],
         '_fft': [
             {
-                'path': 'FFT/test_fft_1.txt'
+                'creation_datetime': '2017-06-27T09:43:17',
+                'description': '00013 Decomposition of the problematic rotation curves in our '
+                               'sample according to the best-fit \\textsc{core}NFW models. '
+                               'Colors and symbols are as in Figure \\ref{fig:dc14_fits}.',
+                'filename': 'cNFW_rogue_curves',
+                'format': '.txt',
+                'path': 'DESY/FFT/test_fft_1.txt;1',
+                'type': 'Main',
+                'version': 1,
             },
             {
-                'path': 'FFT/test_fft_2.txt'
+                'creation_datetime': '2017-06-27T09:43:16',
+                'description': '00005 Comparison of the parameters of the best-fit DC14 models to '
+                               'the cosmological halo mass-concentration relation from \\'
+                               'cite{dutton14} (left) and the stellar mass-halo mass relation '
+                               'from \\cite{behroozi13} (right). The error bars correspond to the '
+                               'extremal values of the multidimensional 68\\% confidence region '
+                               'for each fit. The theoretical relations are shown as red lines '
+                               'and their 1$\\sigma$ and 2$\\sigma$ scatter are represented by '
+                               'the dark and light grey bands, respectively. The '
+                               'mass-concentration relation from \\cite{maccio08} and the stellar'
+                               ' mass-halo mass relation from \\cite{behroozi13} are also shown '
+                               'as the black dashed lines.',
+                'filename': 'scalingRelations_DutBeh_DC14_all_Oh',
+                'format': '.txt',
+                'path': 'DESY/FFT/test_fft_2.txt;1',
+                'type': 'Main',
+                'version': 1
             }
         ],
         'abstracts': [
@@ -182,11 +206,35 @@ def test_pipeline_collection_records(generated_records):
             ],
             "_fft": [
                 {
-                    "path": "FFT/test_fft_1.txt"
+                    'creation_datetime': '2017-06-27T09:43:17',
+                    'description': '00013 Decomposition of the problematic rotation curves in our '
+                                   'sample according to the best-fit \\textsc{core}NFW models. '
+                                   'Colors and symbols are as in Figure \\ref{fig:dc14_fits}.',
+                    'filename': 'cNFW_rogue_curves',
+                    'format': '.txt',
+                    'path': 'DESY/FFT/test_fft_1.txt;1',
+                    'type': 'Main',
+                    'version': 1,
                 },
                 {
-                    "path": "FFT/test_fft_2.txt"
-                },
+                    'creation_datetime': '2017-06-27T09:43:16',
+                    'description': '00005 Comparison of the parameters of the best-fit DC14 models to '
+                                   'the cosmological halo mass-concentration relation from \\'
+                                   'cite{dutton14} (left) and the stellar mass-halo mass relation '
+                                   'from \\cite{behroozi13} (right). The error bars correspond to the '
+                                   'extremal values of the multidimensional 68\\% confidence region '
+                                   'for each fit. The theoretical relations are shown as red lines '
+                                   'and their 1$\\sigma$ and 2$\\sigma$ scatter are represented by '
+                                   'the dark and light grey bands, respectively. The '
+                                   'mass-concentration relation from \\cite{maccio08} and the stellar'
+                                   ' mass-halo mass relation from \\cite{behroozi13} are also shown '
+                                   'as the black dashed lines.',
+                    'filename': 'scalingRelations_DutBeh_DC14_all_Oh',
+                    'format': '.txt',
+                    'path': 'DESY/FFT/test_fft_2.txt;1',
+                    'type': 'Main',
+                    'version': 1
+                }
             ],
             "control_number": 111111,
             "public_notes": [
@@ -248,11 +296,35 @@ def test_pipeline_collection_records(generated_records):
             ],
             "_fft": [
                 {
-                    "path": "FFT/test_fft_1.txt"
+                    'creation_datetime': '2017-06-27T09:43:17',
+                    'description': '00013 Decomposition of the problematic rotation curves in our '
+                                   'sample according to the best-fit \\textsc{core}NFW models. '
+                                   'Colors and symbols are as in Figure \\ref{fig:dc14_fits}.',
+                    'filename': 'cNFW_rogue_curves',
+                    'format': '.txt',
+                    'path': 'DESY/FFT/test_fft_1.txt;1',
+                    'type': 'Main',
+                    'version': 1,
                 },
                 {
-                    "path": "FFT/test_fft_2.txt"
-                },
+                    'creation_datetime': '2017-06-27T09:43:16',
+                    'description': '00005 Comparison of the parameters of the best-fit DC14 models to '
+                                   'the cosmological halo mass-concentration relation from \\'
+                                   'cite{dutton14} (left) and the stellar mass-halo mass relation '
+                                   'from \\cite{behroozi13} (right). The error bars correspond to the '
+                                   'extremal values of the multidimensional 68\\% confidence region '
+                                   'for each fit. The theoretical relations are shown as red lines '
+                                   'and their 1$\\sigma$ and 2$\\sigma$ scatter are represented by '
+                                   'the dark and light grey bands, respectively. The '
+                                   'mass-concentration relation from \\cite{maccio08} and the stellar'
+                                   ' mass-halo mass relation from \\cite{behroozi13} are also shown '
+                                   'as the black dashed lines.',
+                    'filename': 'scalingRelations_DutBeh_DC14_all_Oh',
+                    'format': '.txt',
+                    'path': 'DESY/FFT/test_fft_2.txt;1',
+                    'type': 'Main',
+                    'version': 1
+                }
             ],
             "control_number": 222222,
             "public_notes": [
diff --git a/tests/unit/test_dnb.py b/tests/unit/test_dnb.py
index b00aff3d..a1a22dbd 100644
--- a/tests/unit/test_dnb.py
+++ b/tests/unit/test_dnb.py
@@ -72,7 +72,11 @@ def record(scrape_pos_page_body):
             body=scrape_pos_page_body,
             **{'encoding': 'utf-8'}
         )
-        return request.callback(response)
+
+        parsed_item = request.callback(response)
+        assert parsed_item
+
+        return parsed_item.item
 
 
 def test_title(record):
@@ -241,7 +245,9 @@ def parse_without_splash():
                 'Content-Type': 'application/pdf;charset=base64',
             }
         )
-        return spider.parse_node(response, nodes[0])
+
+        parsed_item = spider.parse_node(response, nodes[0])
+        return parsed_item.item
 
 
 def test_parse_without_splash(parse_without_splash):
diff --git a/tests/unit/test_edp.py b/tests/unit/test_edp.py
index cc7885bd..7e841015 100644
--- a/tests/unit/test_edp.py
+++ b/tests/unit/test_edp.py
@@ -40,6 +40,7 @@ def scrape_pos_page_body():
         )
     )
 
+
 @pytest.fixture
 def targzfile():
     """Path to test tar.gz file with JATS XML file."""
@@ -50,6 +51,7 @@ def targzfile():
         'test_gz.tar.gz'
     )
 
+
 @pytest.fixture
 def package_jats(targzfile):
     """Extract tar.gz package with JATS XML file."""
@@ -75,7 +77,12 @@ def record_jats(package_jats, scrape_pos_page_body):
         body=scrape_pos_page_body,
         **{'encoding': 'utf-8'}
     )
-    return request.callback(response)
+
+    iter_item = request.callback(response)
+    parsed_item = iter_item.next()
+    assert parsed_item
+
+    return parsed_item.item
 
 
 @pytest.fixture
@@ -107,7 +114,11 @@ def record_rich(package_rich):
     fake_resp.meta["rich"] = True
     node = get_node(spider, "//EDPSArticle", fake_resp)[0]
 
-    return spider.parse_node(fake_resp, node)
+    iter_item = spider.parse_node(fake_resp, node)
+    parsed_item = iter_item.next()
+    assert parsed_item
+
+    return parsed_item.item
 
 
 def test_title(record_jats):
@@ -145,6 +156,7 @@ def test_abstract(record_jats):
     assert 'abstract' in record_jats
     assert record_jats['abstract'] == abstract
 
+
 def test_date_published(record_jats):
     """Test extracting date_published."""
     date_published = "2015-01-01"
@@ -179,6 +191,7 @@ def test_doi(record_jats):
     assert 'dois' in record_jats
     assert record_jats['dois'][0]['value'] == doi
 
+
 def test_publication_info(record_jats):
     """Test extracting publication info."""
     assert 'journal_title' in record_jats
@@ -206,7 +219,6 @@ def test_keywords(record_jats):
         assert keyw["value"] in keywords
 
 
-
 def test_authors(record_jats):
     """Test authors."""
     authors = ["Arasoglu, Ali", "Ozdemir, Omer Faruk"]
@@ -326,7 +338,6 @@ def test_authors_rich(record_rich):
         assert astr[index]["affiliations"][0]["value"] == affiliations[index]
 
 
-
 def test_tarfile(tarbzfile, tmpdir):
     """Test untarring a tar.bz package with a test XML file.
 
@@ -343,7 +354,6 @@ def test_tarfile(tarbzfile, tmpdir):
     assert "aas/xml_rich/2000/01" not in xml_files_flat[0]
 
 
-
 def test_handle_package_ftp(tarbzfile):
     """Test getting the target folder name for xml files."""
     spider = edp_spider.EDPSpider()
@@ -353,6 +363,7 @@ def test_handle_package_ftp(tarbzfile):
     assert isinstance(request, Request)
     assert request.meta["package_path"] == tarbzfile
 
+
 def test_no_dois_jats():
     """Test parsing when no DOI in record. JATS format."""
     spider = edp_spider.EDPSpider()
@@ -370,7 +381,10 @@ def test_no_dois_jats():
     """
     response = fake_response_from_string(body)
     node = get_node(spider, "//article", response)[0]
-    record = spider.parse_node(response, node)
+
+    iter_item = spider.parse_node(response, node)
+    parsed_item = iter_item.next()
+    record = parsed_item.item
 
     assert "dois" not in record
     assert "additional_files" not in record
@@ -390,7 +404,10 @@ def test_no_dois_rich():
     response = fake_response_from_string(body)
     response.meta["rich"] = True
     node = get_node(spider, "//EDPSArticle", response)[0]
-    record = spider.parse_node(response, node)
+
+    iter_item = spider.parse_node(response, node)
+    parsed_item = iter_item.next()
+    record = parsed_item.item
 
     assert "dois" not in record
     assert "additional_files" not in record
@@ -416,7 +433,10 @@ def test_addendum_jats():
     """
     response = fake_response_from_string(body)
     node = get_node(spider, "//article", response)[0]
-    record = spider.parse_node(response, node)
+
+    iter_item = spider.parse_node(response, node)
+    parsed_item = iter_item.next()
+    record = parsed_item.item
 
     assert "related_article_doi" in record
     assert record["related_article_doi"][0][
@@ -439,7 +459,10 @@ def test_author_with_email():
     """
     response = fake_response_from_string(body)
     node = get_node(spider, "//article", response)[0]
-    record = spider.parse_node(response, node)
+
+    iter_item = spider.parse_node(response, node)
+    parsed_item = iter_item.next()
+    record = parsed_item.item
 
     assert 'email' in record['authors'][0]
     assert record['authors'][0]['email'] == "Fname.Sname@university.org"
@@ -472,7 +495,10 @@ def test_aff_with_email():
     """
     response = fake_response_from_string(body)
     node = get_node(spider, "//article", response)[0]
-    record = spider.parse_node(response, node)
+
+    iter_item = spider.parse_node(response, node)
+    parsed_item = iter_item.next()
+    record = parsed_item.item
 
     affiliation = "Department of Physics, Western Michigan University, Kalamazoo, MI 49008, USA"
     assert 'affiliations' in record['authors'][0]
@@ -481,8 +507,6 @@ def test_aff_with_email():
     assert record['authors'][0]['email'] is None
 
 
-
-
 def test_no_valid_article():
     """Test parsing when filtering out non-interesting article types."""
     spider = edp_spider.EDPSpider()
@@ -506,7 +530,10 @@ def test_collections_review():
     """
     response = fake_response_from_string(body)
     node = get_node(spider, "//article", response)[0]
-    record = spider.parse_node(response, node)
+
+    iter_item = spider.parse_node(response, node)
+    parsed_item = iter_item.next()
+    record = parsed_item.item
 
     assert "collections" in record
     assert record["collections"] == [{'primary': 'HEP'}, {'primary': 'Review'}]
@@ -533,7 +560,11 @@ def record_references_only():
     """
     response = fake_response_from_string(body)
     node = get_node(spider, "//article", response)[0]
-    return spider.parse_node(response, node)
+
+    iter_item = spider.parse_node(response, node)
+    parsed_item = iter_item.next()
+
+    return parsed_item.item
 
 
 def test_references(record_references_only):
diff --git a/tests/unit/test_elsevier.py b/tests/unit/test_elsevier.py
index ca023122..109f3d3f 100644
--- a/tests/unit/test_elsevier.py
+++ b/tests/unit/test_elsevier.py
@@ -41,9 +41,11 @@ def record():
         response.meta["xml_url"] = 'elsevier/sample_consyn_record.xml'
         tag = '//%s' % spider.itertag
         nodes = get_node(spider, tag, response)
-        parsed_record = spider.parse_node(response, nodes)
-        assert parsed_record
-        return parsed_record
+
+        parsed_item = spider.parse_node(response, nodes)
+        assert parsed_item
+
+        return parsed_item.item
 
 
 @pytest.fixture(scope="module")
@@ -97,7 +99,11 @@ def parsed_node():
         response.meta["xml_url"] = 'elsevier/sample_consyn_record.xml'
         parse_response = spider.parse_node(response, node)
         parse_response.status = 404
-        return spider.scrape_sciencedirect(parse_response)
+
+        parsed_item = spider.scrape_sciencedirect(parse_response)
+        assert parsed_item
+
+        return parsed_item.item
 
 
 def test_collection(parsed_node):
@@ -164,7 +170,10 @@ def cover_display_date():
 
     node = get_node(spider, '/doc', text=body)
     response = fake_response_from_string(body)
-    return spider.parse_node(response, node)
+    parse_item = spider.parse_node(response, node)
+    assert parse_item
+
+    return parse_item.item
 
 
 def test_cover_display_date(cover_display_date):
@@ -187,7 +196,10 @@ def cover_display_date_y_m():
     </doc>"""
     node = get_node(spider, '/doc', text=body)
     response = fake_response_from_string(body)
-    return spider.parse_node(response, node)
+    parse_item = spider.parse_node(response, node)
+    assert parse_item
+
+    return parse_item.item
 
 
 def test_cover_display_date_y_m(cover_display_date_y_m):
@@ -210,7 +222,10 @@ def cover_display_date_y():
     </doc>"""
     node = get_node(spider, '/doc', text=body)
     response = fake_response_from_string(body)
-    return spider.parse_node(response, node)
+    parse_item = spider.parse_node(response, node)
+    assert parse_item
+
+    return parse_item.item
 
 
 def test_cover_display_date_y(cover_display_date_y):
@@ -1644,7 +1659,11 @@ def sciencedirect():
         ])
     response.meta["info"] = {}
     response.meta["node"] = get_node(spider, '/head', text=body)
-    return spider.scrape_sciencedirect(response)
+
+    parse_item = spider.scrape_sciencedirect(response)
+    assert parse_item
+
+    return parse_item.item
 
 
 def test_sciencedirect(sciencedirect):
diff --git a/tests/unit/test_hindawi.py b/tests/unit/test_hindawi.py
index 37e5e183..84ebd06d 100644
--- a/tests/unit/test_hindawi.py
+++ b/tests/unit/test_hindawi.py
@@ -26,9 +26,11 @@ def record():
     response = fake_response_from_file("hindawi/test_1.xml")
     nodes = get_node(spider, "//marc:record", response)
 
-    parsed_record = spider.parse_node(response, nodes[0])
-    assert parsed_record
-    return parsed_record
+    iter_item = spider.parse_node(response, nodes[0])
+    parsed_item = iter_item.next()
+    assert parsed_item
+
+    return parsed_item.item
 
 
 def test_title(record):
diff --git a/tests/unit/test_infn.py b/tests/unit/test_infn.py
index 0c60799a..5cd1e27d 100644
--- a/tests/unit/test_infn.py
+++ b/tests/unit/test_infn.py
@@ -28,9 +28,11 @@ def record():
     """Return scraping results from the INFN spider."""
     spider = infn_spider.InfnSpider()
     response = fake_response_from_file('infn/test_splash.html')
-    parsed_record = spider.scrape_splash(response)
-    assert parsed_record
-    return parsed_record
+
+    parsed_item = spider.scrape_splash(response)
+    assert parsed_item
+
+    return parsed_item.item
 
 
 def test_title(record):
@@ -121,6 +123,7 @@ def test_non_thesis():
 
     assert record is None
 
+
 def test_parse_node():
     """Test parse_node function. This should be a scrapy Request object.
 
@@ -148,6 +151,6 @@ def test_parse_node_nolink():
     response = fake_response_from_file('infn/test_1_nolink.html')
     selector = Selector(response, type='html')
     node = selector.xpath('//%s' % spider.itertag)[0]
-    record = spider.parse_node(response, node).next()
+    parsed_item = spider.parse_node(response, node).next()
 
-    assert isinstance(record, hepcrawl.items.HEPRecord)
+    assert isinstance(parsed_item.item, hepcrawl.items.HEPRecord)
diff --git a/tests/unit/test_iop.py b/tests/unit/test_iop.py
index b776adfa..fb8d26d2 100644
--- a/tests/unit/test_iop.py
+++ b/tests/unit/test_iop.py
@@ -38,9 +38,11 @@ def record():
     response = fake_response_from_file('iop/xml/test_standard.xml')
     node = get_node(spider, "Article", response)
     spider.pdf_files = TEST_PDF_DIR
-    parsed_record = spider.parse_node(response, node)
-    assert parsed_record
-    return parsed_record
+
+    parsed_item = spider.parse_node(response, node)
+    assert parsed_item
+
+    return parsed_item.item
 
 
 def test_abstract(record):
@@ -182,10 +184,11 @@ def erratum_open_access_record():
         'iop',
         'pdf',
     )
-    parsed_record = spider.parse_node(response, node)
-    assert parsed_record
-    return parsed_record
 
+    parsed_item = spider.parse_node(response, node)
+    assert parsed_item
+
+    return parsed_item.item
 
 
 def test_files_erratum_open_access_record(erratum_open_access_record):
diff --git a/tests/unit/test_magic.py b/tests/unit/test_magic.py
index eeb574fe..74d9fad4 100644
--- a/tests/unit/test_magic.py
+++ b/tests/unit/test_magic.py
@@ -23,6 +23,7 @@
     get_node,
 )
 
+
 @pytest.fixture
 def record():
     """Return results from the MAGIC spider. First parse node, then scrape,
@@ -39,9 +40,10 @@ def record():
     splash_response.meta["date"] = parsed_node.meta["date"]
     splash_response.meta["urls"] = parsed_node.meta["urls"]
 
-    parsed_record = spider.scrape_for_pdf(splash_response).next()
-    assert parsed_record
-    return parsed_record
+    parsed_item = spider.scrape_for_pdf(splash_response).next()
+    assert parsed_item
+
+    return parsed_item.item
 
 
 def test_abstract(record):
@@ -102,7 +104,6 @@ def test_abstract(record):
     assert record["abstract"] == abstract
 
 
-
 def test_title(record):
     """Test extracting title."""
     title = "Limits to the violation of Lorentz invariance using the emission of the CRAB pulsar at TeV energies, discovered with archival data from the MAGIC telescopes"
@@ -139,6 +140,7 @@ def test_url(record):
     assert 'urls' in record
     assert record['urls'][0]['value'] == url
 
+
 def test_pdf_link(record):
     """Test pdf link(s)"""
     files = "http://stlab.adobe.com/wiki/images/d/d3/Test.pdf"
@@ -164,8 +166,9 @@ def test_no_author_no_date_no_url():
     """
     response = fake_response_from_string(body)
     node = get_node(spider, spider.itertag, text=body)
-    record = spider.parse_node(response, node).next()
+    parsed_item = spider.parse_node(response, node).next()
 
+    record = parsed_item.item
     assert isinstance(record, hepcrawl.items.HEPRecord)
     assert "date" not in record
     assert "authors" not in record
@@ -184,8 +187,9 @@ def test_no_aff():
     </html>
     """
     response = fake_response_from_string(body)
-    record = spider.scrape_for_pdf(response).next()
+    parsed_item = spider.scrape_for_pdf(response).next()
 
+    record = parsed_item.item
     assert isinstance(record, hepcrawl.items.HEPRecord)
     assert "date" not in record
     assert "affiliations" not in record["authors"]
@@ -216,8 +220,9 @@ def test_no_spash_page():
     response.status = 404
     response.meta["title"] = parsed_node.meta["title"]
     response.meta["urls"] = parsed_node.meta["urls"]
-    record = spider.scrape_for_pdf(response).next()
+    parsed_item = spider.scrape_for_pdf(response).next()
 
+    record = parsed_item.item
     assert isinstance(record, hepcrawl.items.HEPRecord)
     assert "urls" in record
     assert "title" in record
diff --git a/tests/unit/test_mit.py b/tests/unit/test_mit.py
index 0253d91f..2629dd34 100644
--- a/tests/unit/test_mit.py
+++ b/tests/unit/test_mit.py
@@ -25,9 +25,11 @@ def record():
     """Return scraping results from the MIT spider."""
     spider = mit_spider.MITSpider()
     response = fake_response_from_file('mit/test_splash.html')
-    parsed_record = spider.build_item(response)
-    assert parsed_record
-    return parsed_record
+
+    parsed_item = spider.build_item(response)
+    assert parsed_item
+
+    return parsed_item.item
 
 
 @pytest.fixture
@@ -37,7 +39,11 @@ def parsed_node():
     response = fake_response_from_file('mit/test_list.html')
     tag = spider.itertag
     node = get_node(spider, tag, response, rtype="html")
-    return spider.parse_node(response, node).next()
+
+    parsed_item = spider.parse_node(response, node).next()
+    assert parsed_item
+
+    return parsed_item
 
 
 def test_url(parsed_node):
@@ -159,7 +165,11 @@ def supervisors():
     <html>
     """
     response = fake_response_from_string(body)
-    return spider.build_item(response)
+
+    parsed_item = spider.build_item(response)
+    assert parsed_item
+
+    return parsed_item.item
 
 
 def test_two_supervisors(supervisors):
diff --git a/tests/unit/test_phenix.py b/tests/unit/test_phenix.py
index 75384350..5a6a5f4c 100644
--- a/tests/unit/test_phenix.py
+++ b/tests/unit/test_phenix.py
@@ -29,9 +29,12 @@ def record():
     response = fake_response_from_file('phenix/test_1.html')
     selector = Selector(response, type='html')
     nodes = selector.xpath('//%s' % spider.itertag)
-    parsed_record = spider.parse_node(response, nodes[0])
-    assert parsed_record
-    return parsed_record
+
+    parsed_item = spider.parse_node(response, nodes[0])
+    assert parsed_item
+
+    return parsed_item.item
+
 
 @pytest.fixture
 def non_thesis():
@@ -49,10 +52,12 @@ def non_thesis():
     node = get_node(spider, '//li', text=body)
     return spider.parse_node(response, node)
 
+
 def test_non_thesis(non_thesis):
     """Test MSc thesis skipping."""
     assert non_thesis is None
 
+
 def test_title(record):
     """Test extracting title."""
     title = "MEASUREMENT OF THE DOUBLE HELICITY ASYMMETRY IN INCLUSIVE $\pi^{0}$ PRODUCTION IN POLARIZED PROTON-PROTON COLLISIONS AT $\sqrt{s}$ = 510 GeV"
@@ -82,6 +87,7 @@ def test_authors(record):
             aff['value'] for aff in record['authors'][index]['affiliations']
         ]
 
+
 def test_pdf_link(record):
     """Test pdf link(s)"""
     files = "http://www.phenix.bnl.gov/phenix/WWW/talk/archive/theses/2015/Guragain_Hari-DISSERTATION.pdf"
diff --git a/tests/unit/test_phil.py b/tests/unit/test_phil.py
index e99064b2..be0da905 100644
--- a/tests/unit/test_phil.py
+++ b/tests/unit/test_phil.py
@@ -33,9 +33,11 @@ def record():
         "http://philpapers.org/go.pl?id=BROBB&proxyId=none&u=http%3A%2F%2Fanalysis.oxfordjournals.org%2Fcontent%2F66%2F3%2F194.full.pdf%2Bhtml%3Fframe%3Dsidebar",
         "http://philpapers.org/go.pl?id=BROBB&proxyId=none&u=http%3A%2F%2Fbrogaardb.googlepages.com%2Ftensedrelationsoffprint.pdf"
     ]
-    parsed_record = spider.build_item(response)
-    assert parsed_record
-    return parsed_record
+
+    parsed_item = spider.build_item(response)
+    assert parsed_item
+
+    return parsed_item.item
 
 
 @pytest.fixture
@@ -48,7 +50,11 @@ def journal():
     response = fake_response_from_file('phil/test_journal.json')
     jsonrecord = json.loads(response.body_as_unicode())
     response.meta["jsonrecord"] = jsonrecord[0]
-    return spider.build_item(response)
+
+    parsed_item = spider.build_item(response)
+    assert parsed_item
+
+    return parsed_item.item
 
 
 @pytest.fixture
@@ -223,7 +229,10 @@ def splash():
         ]
     }
 
-        return spider.scrape_for_pdf(response)
+        parsed_item = spider.scrape_for_pdf(response)
+        assert parsed_item
+
+        return parsed_item.item
 
 
 def test_scrape(splash):
diff --git a/tests/unit/test_pos.py b/tests/unit/test_pos.py
index 897a0112..8c9c8e59 100644
--- a/tests/unit/test_pos.py
+++ b/tests/unit/test_pos.py
@@ -51,9 +51,12 @@ def record(scrape_pos_page_body):
     assert response
     pipeline = InspireCeleryPushPipeline()
     pipeline.open_spider(spider)
-    record = request.callback(response)
-    processed_record = pipeline.process_item(record, spider)
-    return processed_record
+    iter_record = request.callback(response)
+
+    item = iter_record.next()
+    parsed_item = pipeline.process_item(item, spider)
+
+    return parsed_item
 
 
 def test_titles(record):
diff --git a/tests/unit/test_t2k.py b/tests/unit/test_t2k.py
index 283a02e5..caaaaefc 100644
--- a/tests/unit/test_t2k.py
+++ b/tests/unit/test_t2k.py
@@ -36,9 +36,10 @@ def record():
     splash_response.meta["urls"] = parsed_node.meta["urls"]
     splash_response.meta["authors"] = parsed_node.meta["authors"]
 
-    parsed_record = spider.scrape_for_pdf(splash_response).next()
-    assert parsed_record
-    return parsed_record
+    parsed_item = spider.scrape_for_pdf(splash_response).next()
+    assert parsed_item
+
+    return parsed_item.item
 
 
 def test_abstact(record):
@@ -125,9 +126,10 @@ def non_url():
     selector = Selector(response, type='html')
     nodes = selector.xpath('//%s' % spider.itertag)
 
-    parsed_record = spider.parse_node(response, nodes[0]).next()
-    assert parsed_record
-    return parsed_record
+    parsed_item = spider.parse_node(response, nodes[0]).next()
+    assert parsed_item
+
+    return parsed_item.item
 
 
 def test_non_url(non_url):