diff --git a/.travis.yml b/.travis.yml
index 7682b90c..6bc66b84 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -30,6 +30,7 @@ env:
     - SUITE=functional_arxiv
     - SUITE=functional_desy
     - SUITE=functional_cds
+    - SUITE=functional_pos
 
 matrix:
   fast_finish: true
diff --git a/docker-compose.test.yml b/docker-compose.test.yml
index 65bb864b..9a1df2e0 100644
--- a/docker-compose.test.yml
+++ b/docker-compose.test.yml
@@ -33,22 +33,27 @@ services:
   functional_wsp:
     <<: *service_base
     command: py.test -vv tests/functional/wsp
-    links:
-      - scrapyd
-      - ftp_server
+    depends_on:
+      scrapyd:
+        condition: service_healthy
+      ftp_server:
+        condition: service_healthy
 
   functional_desy:
     <<: *service_base
     command: py.test -vv tests/functional/desy
-    links:
-      - scrapyd
-      - ftp_server
+    depends_on:
+      scrapyd:
+        condition: service_healthy
+      ftp_server:
+        condition: service_healthy
 
   functional_arxiv:
     <<: *service_base
     command: py.test -vv tests/functional/arxiv
-    links:
-      - scrapyd
+    depends_on:
+      scrapyd:
+        condition: service_healthy
 
   functional_cds:
     <<: *service_base
@@ -56,6 +61,15 @@ services:
     links:
       - scrapyd
 
+  functional_pos:
+    <<: *service_base
+    command: py.test -vv tests/functional/pos
+    depends_on:
+      scrapyd:
+        condition: service_healthy
+      http-server.local:
+        condition: service_healthy
+
   unit:
     <<: *service_base
     command: bash -c "py.test tests/unit -vv && make -C docs clean && make -C docs html && python setup.py sdist && ls dist/*"
@@ -64,14 +78,16 @@ services:
   celery:
     <<: *service_base
     command: celery worker --events --app hepcrawl.testlib.tasks --loglevel=debug
-    links:
-      - rabbitmq
+    depends_on:
+      rabbitmq:
+        condition: service_healthy
 
   scrapyd:
     <<: *service_base
     command: bash -c "rm -f twistd.pid && exec scrapyd"
-    links:
-      - celery
+    depends_on:
+      celery:
+        condition: service_started
     healthcheck:
       timeout: 5s
       interval: 5s
@@ -83,8 +99,9 @@ services:
   scrapyd-deploy:
     <<: *service_base
     command: bash -c "scrapyd-deploy"
-    links:
-      - scrapyd
+    depends_on:
+      scrapyd:
+        condition: service_healthy
 
   ftp_server:
     image: stilliard/pure-ftpd:hardened
@@ -96,5 +113,29 @@ services:
       - ${PWD}/tests/functional/wsp/fixtures/ftp_server/WSP:/home/ftpusers/bob/WSP
       - ${PWD}/tests/functional/wsp/fixtures/ftp_server/pureftpd.passwd:/etc/pure-ftpd/passwd/pureftpd.passwd
 
+  http-server.local:
+    image: nginx:stable-alpine
+    volumes:
+      - ${PWD}/tests/functional/pos/fixtures/https_server/conf/proxy.conf:/etc/nginx/conf.d/default.conf
+      - ${PWD}/tests/functional/pos/fixtures/https_server/conf/ssl:/etc/nginx/ssl
+      - ${PWD}/tests/functional/pos/fixtures/https_server/records:/etc/nginx/html/
+    ports:
+      - 443:443
+    healthcheck:
+      timeout: 5s
+      interval: 5s
+      retries: 5
+      test:
+        - "CMD-SHELL"
+        - "curl https://localhost:443/"
+
   rabbitmq:
     image: rabbitmq
+    healthcheck:
+      timeout: 5s
+      interval: 5s
+      retries: 5
+      test:
+        - "CMD"
+        - "rabbitmqctl"
+        - "status"
diff --git a/hepcrawl/items.py b/hepcrawl/items.py
index dab67dda..09d0d552 100644
--- a/hepcrawl/items.py
+++ b/hepcrawl/items.py
@@ -44,15 +44,17 @@ class HEPRecord(scrapy.Item):
     file_urls = scrapy.Field()
     """List of files to be downloaded with FilesPipeline and added to files."""
 
-    additional_files = scrapy.Field()
+    documents = scrapy.Field()
     """Files (fulltexts, package) belonging to this item.
 
     Example:
         ::
 
             [{
-                "type": "Fulltext",  # Fulltext, Supplemental, Data, Figure
-                "uri": "file:///path/to/file",  # can also be HTTP
+                "fulltext": true,
+                "url": "file:///path/to/file",
+                "description": "some fancy stuff",
+                "key": "usually_a_file_name.pdf",
             }]
     """
 
diff --git a/hepcrawl/middlewares.py b/hepcrawl/middlewares.py
index 99551e93..ecd518f4 100644
--- a/hepcrawl/middlewares.py
+++ b/hepcrawl/middlewares.py
@@ -116,10 +116,23 @@ def _has_to_be_crawled(self, request, spider):
         request_db_key = self._get_key(request)
 
         if request_db_key not in self.db:
+            LOGGER.debug(
+                'Crawl-Once: key %s for request %s not found in the db, '
+                'should be crawled.' % (request_db_key, request)
+            )
             return True
 
         new_file_timestamp = self._get_timestamp(request, spider)
         old_file_timestamp = self.db.get(key=request_db_key)
+        LOGGER.debug(
+            'Crawl-Once: key %s for request %s found in the db, '
+            'considering timestamps new(%s) and old(%s).' % (
+                request_db_key,
+                request,
+                new_file_timestamp,
+                old_file_timestamp,
+            )
+        )
         return new_file_timestamp > old_file_timestamp
 
     def _get_key(self, request):
diff --git a/hepcrawl/spiders/edp_spider.py b/hepcrawl/spiders/edp_spider.py
index ef075e4d..c051c8ee 100644
--- a/hepcrawl/spiders/edp_spider.py
+++ b/hepcrawl/spiders/edp_spider.py
@@ -312,7 +312,7 @@ def build_item_rich(self, response):
             # NOTE: maybe this should be removed as the 'rich' format records
             # are not open access.
             record.add_value(
-                "additional_files",
+                "documents",
                 self._create_file(
                     get_first(response.meta["pdf_links"]),
                     "INSPIRE-PUBLIC",
@@ -384,7 +384,7 @@ def build_item_jats(self, response):
 
         if "pdf_links" in response.meta:
             record.add_value(
-                "additional_files",
+                "documents",
                 self._create_file(
                     get_first(response.meta["pdf_links"]),
                     "INSPIRE-PUBLIC",
diff --git a/hepcrawl/spiders/elsevier_spider.py b/hepcrawl/spiders/elsevier_spider.py
index 3f1fe0c6..e2d4e919 100644
--- a/hepcrawl/spiders/elsevier_spider.py
+++ b/hepcrawl/spiders/elsevier_spider.py
@@ -995,7 +995,10 @@ def build_item(self, response):
 
         xml_file = response.meta.get("xml_url")
         if xml_file:
-            record.add_value('additional_files', self.add_file(xml_file, "HIDDEN", "Fulltext"))
+            record.add_value(
+                'documents',
+                self.add_file(xml_file, "HIDDEN", "Fulltext"),
+            )
             sd_url = self._get_sd_url(xml_file)
             if requests.head(sd_url).status_code == 200:  # Test if valid url
                 record.add_value("urls", sd_url)
diff --git a/hepcrawl/spiders/hindawi_spider.py b/hepcrawl/spiders/hindawi_spider.py
index 999b7183..5f81f5b4 100644
--- a/hepcrawl/spiders/hindawi_spider.py
+++ b/hepcrawl/spiders/hindawi_spider.py
@@ -154,13 +154,13 @@ def get_journal_pages(node):
         else:
             return journal_pages, ''
 
-    def create_file(self, file_path, file_access, file_type):
-        """Create a structured dictionary to add to 'files' item."""
+    def create_document(self, file_path):
+        """Create a structured dictionary to add to 'documents' item."""
         file_dict = {
-            "access": file_access,
+            "hidden": True,
             "description": self.name.upper(),
             "url": file_path,
-            "type": file_type,
+            "fulltext": True,
         }
         return file_dict
 
@@ -219,9 +219,9 @@ def parse_node(self, response, node):
         record.add_value('file_urls', pdf_links)
         if xml_links:
             record.add_value(
-                'additional_files',
+                'documents',
                 [
-                    self.create_file(xml, "INSPIRE-HIDDEN", "Fulltext")
+                    self.create_document(xml)
                     for xml in xml_links
                 ]
             )
diff --git a/hepcrawl/spiders/infn_spider.py b/hepcrawl/spiders/infn_spider.py
index 23e71708..2e093ab1 100644
--- a/hepcrawl/spiders/infn_spider.py
+++ b/hepcrawl/spiders/infn_spider.py
@@ -232,7 +232,10 @@ def build_item(self, response):
 
         pdf_files = response.meta.get("pdf_links")
         if pdf_files:
-            record.add_value('additional_files', self.add_file(pdf_files, "HIDDEN", "Fulltext"))
+            record.add_value(
+                'documents',
+                self.add_file(pdf_files, "HIDDEN", "Fulltext"),
+            )
         record.add_value('authors', response.meta.get("authors"))
         record.add_value('date_published', response.meta.get("date_published"))
         record.add_value('thesis', response.meta.get("thesis_info"))
diff --git a/hepcrawl/spiders/iop_spider.py b/hepcrawl/spiders/iop_spider.py
index ee778e58..fbca3ae5 100644
--- a/hepcrawl/spiders/iop_spider.py
+++ b/hepcrawl/spiders/iop_spider.py
@@ -152,13 +152,13 @@ def get_pdf_path(self, vol, issue, fpage):
             if pattern in pdf_path:
                 return os.path.join(self.pdf_files, pdf_path)
 
-    def add_file(self, file_path, file_access, file_type):
+    def add_document(self, file_path, hidden, fulltext):
         """Create a structured dictionary and add to 'files' item."""
         file_dict = {
-            "access": file_access,
+            "hidden": hidden,
+            "fulltext": fulltext,
             "description": self.name.upper(),
             "url": file_path,
-            "type": file_type,
         }
         return file_dict
 
@@ -206,21 +206,25 @@ def parse_node(self, response, node):
         record.add_value('collections', self.get_collections(doctype))
 
         xml_file_path = response.url
-        record.add_value("additional_files",
-                         self.add_file(xml_file_path, "INSPIRE-HIDDEN", "Fulltext"))
+        record.add_value(
+            "documents",
+            self.add_document(xml_file_path, hidden=True, fulltext=True),
+        )
         if self.pdf_files:
             pdf_file_path = self.get_pdf_path(volume, issue, fpage)
             if pdf_file_path:
                 if doctype and "erratum" in doctype.lower():
-                    file_type = "Erratum"
+                    fulltext = False
                 else:
-                    file_type = "Fulltext"
+                    fulltext = True
                 if journal_title in self.OPEN_ACCESS_JOURNALS:
-                    file_access = "INSPIRE-PUBLIC"  # FIXME: right?
+                    hidden = False
                 else:
-                    file_access = "INSPIRE-HIDDEN"
-                record.add_value("additional_files",
-                                 self.add_file(pdf_file_path, file_access, file_type))
+                    hidden = True
+                record.add_value(
+                    "documents",
+                    self.add_document(pdf_file_path, hidden=hidden, fulltext=fulltext),
+                )
 
         parsed_item = ParsedItem(
             record=record.load_item(),
diff --git a/hepcrawl/spiders/magic_spider.py b/hepcrawl/spiders/magic_spider.py
index 41687674..8dfd5d51 100644
--- a/hepcrawl/spiders/magic_spider.py
+++ b/hepcrawl/spiders/magic_spider.py
@@ -177,7 +177,7 @@ def build_item(self, response):
         record.add_value('title', response.meta.get("title"))
         record.add_value('urls', response.meta.get("urls"))
         record.add_value("abstract", response.meta.get("abstract"))
-        record.add_value("additional_files", response.meta.get("files"))
+        record.add_value("documents", response.meta.get("files"))
         record.add_value('collections', ['HEP', 'THESIS'])
 
         parsed_item = ParsedItem(
diff --git a/hepcrawl/spiders/mit_spider.py b/hepcrawl/spiders/mit_spider.py
index 8ba61d89..21804873 100644
--- a/hepcrawl/spiders/mit_spider.py
+++ b/hepcrawl/spiders/mit_spider.py
@@ -207,8 +207,10 @@ def build_item(self, response):
 
         pdf_files = node.xpath(".//table[@id='file-table']//td/a/@href").extract()
         if pdf_files:
-            record.add_value('additional_files', self.add_file(
-                pdf_files, "HIDDEN", "Fulltext"))
+            record.add_value(
+                'documents',
+                self.add_file(pdf_files, "HIDDEN", "Fulltext"),
+            )
         record.add_value('authors', self.get_authors(node))
         record.add_xpath('date_published',
                          "//td[contains(text(), 'dc.date.issued')]/following-sibling::td[1]/text()")
diff --git a/hepcrawl/spiders/phenix_spider.py b/hepcrawl/spiders/phenix_spider.py
index a5fcd025..aa54bd98 100644
--- a/hepcrawl/spiders/phenix_spider.py
+++ b/hepcrawl/spiders/phenix_spider.py
@@ -121,7 +121,10 @@ def parse_node(self, response, node):
             return None
 
         pdf_files = node.xpath(".//a/@href").extract()
-        record.add_value('additional_files', self.add_file(pdf_files, "HIDDEN", "Fulltext"))
+        record.add_value(
+            'documents',
+            self.add_file(pdf_files, "HIDDEN", "Fulltext"),
+        )
         record.add_value('authors', self.get_authors(node))
         record.add_value('date_published', year)
         record.add_value('thesis', {'degree_type': thesis_type})
diff --git a/hepcrawl/spiders/pos_spider.py b/hepcrawl/spiders/pos_spider.py
index c388a487..19d4fee5 100644
--- a/hepcrawl/spiders/pos_spider.py
+++ b/hepcrawl/spiders/pos_spider.py
@@ -12,7 +12,7 @@
 from __future__ import absolute_import, division, print_function
 
 import re
-
+import os
 from urlparse import urljoin
 
 from scrapy import Request, Selector
@@ -28,112 +28,231 @@
 )
 
 
+DEFAULT_BASE_URL = 'https://pos.sissa.it'
+DEFAULT_BASE_CONFERENCE_PAPER_URL = (
+    DEFAULT_BASE_URL + '/contribution?id='
+)
+DEFAULT_BASE_PROCEEDINGS_URL = (
+    DEFAULT_BASE_URL + '/cgi-bin/reader/conf.cgi?confid='
+)
+
+
+class PoSExtractionException(Exception):
+    pass
+
+
 class POSSpider(StatefulSpider):
     """POS/Sissa crawler.
 
-    Extracts from metadata:
-        * title
-        * article-id
-        * conf-acronym
-        * authors
-        * affiliations
-        * publication-date
-        * publisher
-        * license
-        * language
-        * link
+    From PoS we create two types of records, a conference paper record, and a
+    conference proceedings record.
+
+    The bulk of the records comes from oaiharvest, and this spider crawls the
+    files generated by it.
+
+    For the conference paper record we have to scrape also the html page of the
+    record on the PoS website to get the pdf link. (see
+    `DEFAULT_BASE_CONFERENCE_PAPER_URL`)
+
+    Then, from that same page, we get the internal conference id.
+
+    With that conference id, then we scrape the conference proceedings page,
+    and extract the information to create the proceedings record. (see
+    `DEFAULT_BASE_PROCEEDINGS_URL`)
+
+    To do that and because each needs the information of the previous, the
+    spider must use the callbacks system provided by scrapy through the
+    :class:`scrapy.html.response.Response` callback parameter, and chain the
+    parser functions.
+
+    The deduplication of the conference proceedings papers is left for the
+    `HepcrawlCrawlOnceMiddleware` middleware.
 
     Example:
         ::
-
-            $ scrapy crawl PoS -a source_file=file://`pwd`/tests/responses/pos/sample_pos_record.xml
+            $ scrapy crawl PoS \\
+                -a "source_file=file://$PWD/tests/unit/responses/pos/sample_pos_record.xml"
     """
-    name = 'PoS'
-    pos_base_url = "https://pos.sissa.it/contribution?id="
+    name = 'pos'
 
-    def __init__(self, source_file=None, **kwargs):
-        """Construct POS spider."""
+    def __init__(
+        self,
+        source_file=None,
+        base_conference_paper_url=DEFAULT_BASE_CONFERENCE_PAPER_URL,
+        base_proceedings_url=DEFAULT_BASE_PROCEEDINGS_URL,
+        **kwargs
+    ):
         super(POSSpider, self).__init__(**kwargs)
         self.source_file = source_file
+        self.base_conference_paper_url = base_conference_paper_url
+        self.base_proceedings_url = base_proceedings_url
 
     def start_requests(self):
         yield Request(self.source_file)
 
     def parse(self, response):
-        """Get PDF information."""
-        node = response.selector
-        node.remove_namespaces()
-        for record in node.xpath('.//record'):
-            identifier = record.xpath('.//metadata/pex-dc/identifier/text()').extract_first()
-            if identifier:
-                # Probably all links lead to same place, so take first
-                pos_url = "{0}{1}".format(self.pos_base_url, identifier)
-                request = Request(pos_url, callback=self.scrape_pos_page)
-                request.meta["url"] = response.url
-                request.meta["record"] = record.extract()
-                yield request
-
-    def scrape_pos_page(self, response):
-        """Parse a page for PDF link."""
-        response.meta["pos_pdf_url"] = response.selector.xpath(
-            "//a[contains(text(),'pdf')]/@href"
+        self.log('Got record from: {response.url}'.format(**vars()))
+
+        response.selector.remove_namespaces()
+        record_xml_selectors = response.selector.xpath('.//record')
+        for record_xml_selector in record_xml_selectors:
+            yield self.get_conference_paper_page_request(
+                xml_selector=record_xml_selector,
+            )
+
+    def get_conference_paper_page_request(self, xml_selector, meta=None):
+        """Gets the conference paper html page, for the pdf link for the
+        conference paper, and later the internal conference id.
+        """
+        meta = meta or {}
+
+        identifier = xml_selector.xpath(
+            './/metadata/pex-dc/identifier/text()'
+        ).extract_first()
+        conference_paper_url = "{0}{1}".format(
+            self.base_conference_paper_url,
+            identifier,
+        )
+        meta['xml_record'] = xml_selector.extract()
+
+        # the meta parameter will be passed over to the callback as a property
+        # in the response parameter
+        return Request(
+            url=conference_paper_url,
+            callback=self.parse_conference_paper,
+            meta=meta
+        )
+
+    def parse_conference_paper(self, response):
+        self.log(
+            'Parsing conference paper from: {response.url}'.format(**vars())
+        )
+        xml_record = response.meta.get('xml_record')
+        conference_paper_url = response.url
+        conference_paper_pdf_url = self._get_conference_paper_pdf_url(
+            conference_paper_page_html=response.body,
+        )
+
+        parsed_conference_paper = self.build_conference_paper_item(
+            xml_record=xml_record,
+            conference_paper_url=conference_paper_url,
+            conference_paper_pdf_url=conference_paper_pdf_url,
+        )
+        yield parsed_conference_paper
+
+        # prepare next callback step
+        response.meta['html_record'] = response.body
+        yield self.get_conference_proceedings_page_request(
+            meta=response.meta,
+        )
+
+    def get_conference_proceedings_page_request(self, meta):
+        """Gets the conference proceedings page, using the indernal conference
+        id from the record html page retrieved before.
+        """
+        if not meta.get('html_record'):
+            raise PoSExtractionException(
+                'PoS conference paper page was empty, current meta:\n%s' % meta
+            )
+
+        proceedings_page_url = self._get_proceedings_page_url(
+            page_html=meta.get('html_record'),
+        )
+
+        page_selector = Selector(
+            text=meta.get('xml_record'),
+            type='xml',
+        )
+        page_selector.remove_namespaces()
+        pos_id = page_selector.xpath(
+            ".//metadata/pex-dc/identifier/text()"
         ).extract_first()
-        response.meta["pos_pdf_url"] = urljoin(self.pos_base_url, response.meta["pos_pdf_url"])
-        response.meta["pos_url"] = response.url
-        return self.build_item(response)
-
-    def build_item(self, response):
-        """Parse an PoS XML exported file into a HEP record."""
-        text = response.meta["record"]
-        node = Selector(text=text, type="xml")
-        node.remove_namespaces()
-        record = HEPLoader(item=HEPRecord(), selector=node)
+        meta['pos_id'] = pos_id
+
+        return Request(
+            url=proceedings_page_url,
+            meta=meta,
+            callback=self.parse_conference_proceedings,
+        )
+
+    def parse_conference_proceedings(self, request):
+        parsed_conference_proceedings = self.build_conference_proceedings_item(
+            proceedings_page_html=request.body,
+            pos_id=request.meta['pos_id'],
+        )
+        yield parsed_conference_proceedings
+
+    def _get_proceedings_page_url(self, page_html):
+        page_selector = Selector(
+            text=page_html,
+            type="html"
+        )
+        internal_url = page_selector.xpath(
+            "//a[not(contains(text(),'pdf'))]/@href",
+        ).extract_first()
+        proceedings_internal_id = internal_url.split('/')[1]
+        return '{0}{1}'.format(
+            self.base_proceedings_url,
+            proceedings_internal_id,
+        )
+
+    def build_conference_paper_item(
+        self,
+        xml_record,
+        conference_paper_url,
+        conference_paper_pdf_url,
+    ):
+        selector = Selector(
+            text=xml_record,
+            type="xml"
+        )
+        selector.remove_namespaces()
+        record = HEPLoader(
+            item=HEPRecord(),
+            selector=selector
+        )
+
+        license_text = selector.xpath(
+            './/metadata/pex-dc/rights/text()'
+        ).extract_first()
+        record.add_value('license', get_licenses(license_text=license_text))
+
+        date, year = self._get_date(selector=selector)
+        record.add_value('date_published', date)
+        record.add_value('journal_year', year)
+
+        identifier = selector.xpath(
+            ".//metadata/pex-dc/identifier/text()"
+        ).extract_first()
+        record.add_value(
+            'journal_title',
+            self._get_journal_title(pos_ext_identifier=identifier),
+        )
+        record.add_value(
+            'journal_volume',
+            self._get_journal_volume(pos_ext_identifier=identifier),
+        )
+        record.add_value(
+            'journal_artid',
+            self._get_journal_artid(pos_ext_identifier=identifier),
+        )
+
         record.add_xpath('title', '//metadata/pex-dc/title/text()')
         record.add_xpath('source', '//metadata/pex-dc/publisher/text()')
-
-        record.add_value('external_system_numbers', self._get_ext_systems_number(node))
-
-        license = get_licenses(
-            license_text=node.xpath(
-                ".//metadata/pex-dc/rights/text()"
-            ).extract_first(),
-        )
-        record.add_value('license', license)
-
-        date, year = self._get_date(node)
-        if date:
-            record.add_value('date_published', date)
-        if year:
-            record.add_value('journal_year', int(year))
-
-        identifier = node.xpath(".//metadata/pex-dc/identifier/text()").extract_first()
-        record.add_value('urls', response.meta['pos_url'])
-        if response.meta['pos_pdf_url']:
-            record.add_value('additional_files', {'type': "Fulltext", "url": response.meta['pos_pdf_url']})
-        if identifier:
-            pbn = re.split('[()]', identifier)
-            if len(pbn) == 3:
-                conf_acronym = pbn[1]
-                article_id = pbn[2]
-                record.add_value('journal_title', pbn[0])
-                record.add_value('journal_volume', conf_acronym)
-                record.add_value('journal_artid', article_id)
-            else:
-                record.add_value('pubinfo_freetext', identifier)
-
-        language = node.xpath(".//metadata/pex-dc/language/text()").extract_first()
-        if language:
-            record.add_value('language', language)
-
-        authors = self._get_authors(node)
-        if authors:
-            record.add_value('authors', authors)
-
-        extra_data = self._get_extra_data(node)
-        if extra_data:
-            record.add_value('extra_data', extra_data)
-
-        record.add_value('collections', ['HEP', 'ConferencePaper'])
+        record.add_value(
+            'external_system_numbers',
+            self._get_ext_systems_number(selector=selector),
+        )
+        record.add_value('language', self._get_language(selector=selector))
+        record.add_value('authors', self._get_authors(selector=selector))
+        record.add_value('collections', ['conferencepaper'])
+        record.add_value('urls', [conference_paper_url])
+        record.add_value(
+            'documents',
+            self.get_documents(
+                path=conference_paper_pdf_url,
+            ),
+        )
 
         parsed_item = ParsedItem(
             record=record.load_item(),
@@ -142,50 +261,165 @@ def build_item(self, response):
 
         return parsed_item
 
-    def _get_ext_systems_number(self, node):
+    def build_conference_proceedings_item(
+        self,
+        proceedings_page_html,
+        pos_id,
+    ):
+        selector = Selector(
+            text=proceedings_page_html,
+            type='html',
+        )
+        selector.remove_namespaces()
+        record = HEPLoader(
+            item=HEPRecord(),
+            selector=selector
+        )
+
+        record.add_value('collections', ['proceedings'])
+        record.add_value(
+            'title',
+            self._get_proceedings_title(selector=selector),
+        )
+        record.add_value(
+            'subtitle',
+            self._get_proceedings_date_place(selector=selector),
+        )
+        record.add_value('journal_title', 'PoS')
+        record.add_value(
+            'journal_volume',
+            self._get_journal_volume(pos_ext_identifier=pos_id),
+        )
+
+        parsed_proceeding = ParsedItem(
+            record=record.load_item(),
+            record_format='hepcrawl',
+        )
+
+        return parsed_proceeding
+
+    def _get_conference_paper_pdf_url(self, conference_paper_page_html):
+        selector = Selector(
+            text=conference_paper_page_html,
+            type='html',
+        )
+        conference_paper_pdf_relative_url = selector.xpath(
+            "//a[contains(text(),'pdf')]/@href",
+        ).extract_first()
+
+        if not conference_paper_pdf_relative_url:
+            raise PoSExtractionException(
+                (
+                    'Unable to get the conference paper pdf url from the html:'
+                    '\n%s'
+                ) % conference_paper_page_html
+            )
+
+        return urljoin(
+            self.base_conference_paper_url,
+            conference_paper_pdf_relative_url,
+        )
+
+    def _get_proceedings_url(self, response):
+        internal_url = response.selector.xpath(
+            "//a[not(contains(text(),'pdf'))]/@href",
+        ).extract_first()
+        proceedings_identifier = internal_url.split('/')[1]
+        return '{0}{1}'.format(
+            self.base_proceedings_url,
+            proceedings_identifier,
+        )
+
+    @staticmethod
+    def get_documents(path):
         return [
             {
-                'institute': 'PoS',
-                'value': node.xpath('.//metadata/pex-dc/identifier/text()').extract_first()
+                'key': os.path.basename(path),
+                'url': path,
+                'original_url': path,
+                'hidden': True,
+                'fulltext': True,
             },
+        ]
+
+    @staticmethod
+    def _get_language(selector):
+        language = selector.xpath(
+            ".//metadata/pex-dc/language/text()"
+        ).extract_first()
+        return language if language != 'en' else None
+
+    @staticmethod
+    def _get_journal_title(pos_ext_identifier):
+        return re.split('[()]', pos_ext_identifier)[0]
+
+    @staticmethod
+    def _get_journal_volume(pos_ext_identifier):
+        return re.split('[()]', pos_ext_identifier)[1]
+
+    @staticmethod
+    def _get_journal_artid(pos_ext_identifier):
+        return re.split('[()]', pos_ext_identifier)[2]
+
+    @staticmethod
+    def _get_ext_systems_number(selector):
+        return [
             {
-                'institute': 'PoS',
-                'value': node.xpath('.//identifier/text()').extract_first()
+                'institute': 'pos',
+                'value': selector.xpath(
+                    './/identifier/text()'
+                ).extract_first()
             },
         ]
 
-    def _get_date(self, node):
-        """Get article date."""
-        date = ''
-        year = ''
-        full_date = node.xpath(".//metadata/pex-dc/date/text()").extract_first()
+    @staticmethod
+    def _get_date(selector):
+        full_date = selector.xpath(
+            ".//metadata/pex-dc/date/text()"
+        ).extract_first()
         date = create_valid_date(full_date)
-        if date:
-            year = date[0:4]
+        year = int(date[0:4])
+
         return date, year
 
-    def _get_authors(self, node):
+    @staticmethod
+    def _get_authors(selector):
         """Get article authors."""
-        author_selectors = node.xpath('.//metadata/pex-dc/creator')
         authors = []
-        for selector in author_selectors:
+        creators = selector.xpath('.//metadata/pex-dc/creator')
+        for creator in creators:
             auth_dict = {}
-            author = Selector(text=selector.extract())
-            auth_dict['raw_name'] = \
-                get_first(author.xpath('.//name//text()').extract(), default='')
-            for affiliation in author.xpath('.//affiliation//text()').extract():
+            author = Selector(text=creator.extract())
+            auth_dict['raw_name'] = get_first(
+                author.xpath('.//name//text()').extract(),
+                default='',
+            )
+            for affiliation in author.xpath(
+                './/affiliation//text()'
+            ).extract():
                 if 'affiliations' in auth_dict:
-                    auth_dict['affiliations'].append({'value': affiliation})
+                    auth_dict['affiliations'].append(
+                        {
+                            'value': affiliation
+                        }
+                    )
                 else:
-                    auth_dict['affiliations'] = [{'value': affiliation}, ]
+                    auth_dict['affiliations'] = [
+                        {
+                            'value': affiliation
+                        },
+                    ]
             if auth_dict:
                 authors.append(auth_dict)
         return authors
 
-    def _get_extra_data(self, node):
-        """Get info to help selection - not for INSPIRE record"""
-        extra_data = {}
+    @staticmethod
+    def _get_proceedings_title(selector):
+        return 'Proceedings, ' + selector.xpath('//h1/text()').extract_first()
 
-        section = node.xpath(".//metadata/pex-dc/description/text()").extract_first()
-        extra_data['section'] = section.split(';', 1)[-1].strip()
-        return extra_data
+    @staticmethod
+    def _get_proceedings_date_place(selector):
+        date_place = selector.xpath(
+            "//div[@class='conference_date']/text()"
+        ).extract()
+        return ''.join(date_place)
diff --git a/hepcrawl/spiders/t2k_spider.py b/hepcrawl/spiders/t2k_spider.py
index 4dd495a9..db18eb1e 100644
--- a/hepcrawl/spiders/t2k_spider.py
+++ b/hepcrawl/spiders/t2k_spider.py
@@ -101,16 +101,16 @@ def get_splash_links(self, node):
 
         return out_links
 
-    def add_file(self, pdf_files, file_access, file_type):
+    def add_document(self, pdf_files):
         """Create a structured dictionary and add to ``files`` item."""
         # NOTE: should this be moved to utils?
         file_dicts = []
         for link in pdf_files:
             file_dict = {
-                "access": file_access,
+                "hidden": True,
+                "fulltext": True,
                 "description": self.name.title(),
                 "url": urljoin(self.domain, link),
-                "type": file_type,
             }
             file_dicts.append(file_dict)
         return file_dicts
@@ -149,7 +149,7 @@ def scrape_for_pdf(self, response):
             "//a[@class='contenttype-file state-internal url']/@href").extract()
 
         response.meta["abstract"] = abstract
-        response.meta["additional_files"] = self.add_file(file_paths, "HIDDEN", "Fulltext")
+        response.meta["documents"] = self.add_document(file_paths)
 
         return self.build_item(response)
 
@@ -165,7 +165,7 @@ def build_item(self, response):
         record.add_value('title', response.meta.get("title"))
         record.add_value('urls', response.meta.get("urls"))
         record.add_value("abstract", response.meta.get("abstract"))
-        record.add_value("additional_files", response.meta.get("additional_files"))
+        record.add_value("documents", response.meta.get("documents"))
         record.add_value('collections', ['HEP', 'THESIS'])
 
         parsed_item = ParsedItem(
diff --git a/hepcrawl/tohep.py b/hepcrawl/tohep.py
index 980a030d..a4529d7c 100644
--- a/hepcrawl/tohep.py
+++ b/hepcrawl/tohep.py
@@ -92,7 +92,7 @@ def _normalize_hepcrawl_record(item, source):
     item['titles'] = [{
         'title': item.pop('title', ''),
         'subtitle': item.pop('subtitle', ''),
-        'source': source,
+        'source': item.pop('source', source),
     }]
 
     item['abstracts'] = [{
@@ -242,13 +242,14 @@ def _filter_affiliation(affiliations):
 
     for author in crawler_record.get('authors', []):
         builder.add_author(builder.make_author(
-            author['full_name'],
+            full_name=author['full_name'],
             affiliations=_filter_affiliation(author['affiliations']),
         ))
 
     for title in crawler_record.get('titles', []):
         builder.add_title(
             title=title.get('title'),
+            subtitle=title.get('subtitle'),
             source=title.get('source')
         )
 
@@ -384,6 +385,20 @@ def _filter_affiliation(affiliations):
             source=report_number.get('source')
         )
 
+    for url in crawler_record.get('urls', []):
+        builder.add_url(url=url.get('value'))
+
+    for document in crawler_record.get('documents', []):
+        builder.add_document(
+            description=document.get('description'),
+            fulltext=document.get('fulltext'),
+            hidden=document.get('hidden'),
+            key=document['key'],
+            material=document.get('material'),
+            original_url=document.get('original_url'),
+            url=document['url'],
+        )
+
     builder.validate_record()
 
     return builder.record
diff --git a/tests/functional/desy/test_desy.py b/tests/functional/desy/test_desy.py
index 3c309e1f..42a51105 100644
--- a/tests/functional/desy/test_desy.py
+++ b/tests/functional/desy/test_desy.py
@@ -63,38 +63,14 @@ def _override(field_key, original_dict, backup_dict, new_value):
     return clean_record
 
 
-def assert_files_equal(file_1, file_2):
-    """Compares two files calculating the md5 hash."""
-    def _generate_md5_hash(file_path):
-        hasher = hashlib.md5()
-        with open(str(file_path), 'rb') as fd:
-            buf = fd.read()
-            hasher.update(buf)
-            return hasher.hexdigest()
-
-    file_1_hash = _generate_md5_hash(file_1)
-    file_2_hash = _generate_md5_hash(file_2)
-    assert file_1_hash == file_2_hash
-
-
-def assert_ffts_content_matches_expected(record):
-    for fft_field in record.get('_fft', []):
-        assert_fft_content_matches_expected(fft_field)
-
-
-def assert_fft_content_matches_expected(fft_field):
-    expected_file_name = get_file_name_from_fft(fft_field)
-    assert_files_equal(expected_file_name, fft_field['path'])
-
-
-def get_file_name_from_fft(fft_field):
+def get_file_name_from_documents(documents_field):
     file_path = get_test_suite_path(
         'desy',
         'fixtures',
         'ftp_server',
         'DESY',
         'FFT',
-        fft_field['filename'] + fft_field['format'],
+        documents_field['key'],
         test_suite='functional',
     )
     return file_path
@@ -213,6 +189,3 @@ def test_desy(
     )
 
     assert gotten_results == expected_results
-
-    for record in gotten_results:
-        assert_ffts_content_matches_expected(record)
diff --git a/tests/functional/pos/fixtures/https_server/conf/proxy.conf b/tests/functional/pos/fixtures/https_server/conf/proxy.conf
new file mode 100644
index 00000000..1591cbcd
--- /dev/null
+++ b/tests/functional/pos/fixtures/https_server/conf/proxy.conf
@@ -0,0 +1,25 @@
+server {
+    listen                     443 ssl;
+    server_name                localhost;
+
+    ssl                        on;
+    ssl_protocols              TLSv1 TLSv1.1 TLSv1.2;
+    ssl_certificate            ssl/cert.pem;
+    ssl_certificate_key        ssl/cert.key;
+
+    location ~ /contribution {
+        if ($args ~* "^id=(.*)") {
+            set $mid $1;
+            set $args '';
+            rewrite ^.*$ /$mid.html permanent;
+        }
+    }
+
+    location ~ /cgi-bin/reader/conf.cgi {
+        if ($args ~* "^confid=(.*)") {
+            set $mid $1;
+            set $args '';
+            rewrite ^.*$ /$mid.html permanent;
+        }
+    }
+}
diff --git a/tests/functional/pos/fixtures/https_server/conf/ssl/cert.key b/tests/functional/pos/fixtures/https_server/conf/ssl/cert.key
new file mode 100755
index 00000000..19e1df68
--- /dev/null
+++ b/tests/functional/pos/fixtures/https_server/conf/ssl/cert.key
@@ -0,0 +1,28 @@
+-----BEGIN PRIVATE KEY-----
+MIIEvwIBADANBgkqhkiG9w0BAQEFAASCBKkwggSlAgEAAoIBAQChhBiOoipMRRcc
+E5waKrGB01/QtRpfIGp5KmJfnif05dR05wWojHO6EtabZ2qbXtcSuyQ0vRNpbZUU
+OzcriFOMk8dujDzuKMkegsq/LE4PyN/R5JZtf34NyGG7v70K6Uq7RV4PUzk+zoum
+1McMUBk1QlGP/E9RsDlSPv9XOblUpicPDuwhCwPf4zi6jporgXjDJ/iUuh+bexxv
+40R7f2dCWkiHYiNiLNLTwXdYkaWBcc3HoTq9FEZZhYDhWRjX0/TuINmMr5lbUvr6
+UYRABOS4VeUyHpb/e7OH9WXQxzR76LuQFfQDSgs0GxXw1KG58aq+P0ni2E77C4Iu
+odQ8iT+jAgMBAAECggEBAIqJeFrXY7p5xIGznEChgBHgUR3+SPlxH4KARVLIoHMh
+s2L2SVcx6Y2f3O38/Wb5KTcKx9polz7l3Go3BHJVg3xfwT7kENsipqeB/g+OHALU
+BI7PJ+wR3/hIePQGWUsDobMRo8U3WDG0DfryJS09gvG4yabb/tkNc41FNdUGUR31
+7VInQFqv2/jZ/2A3s3DZ0Cns9vJuLhmf7629k3MVCuU7Rh0rStnVCA70kjgKzOfP
++26fnfd/MmrQYbaukw04+cwcwifGkF5Jis80qTWsgdF82rkzpwJLDo0Jd2HZFuOa
+AHkWK2QiMzb6PS2Uo7Zarax9E+W2TLahANXZQQ32NAkCgYEAzKw7XbEwzWG/T7yX
+EgNIAN7YtcGYr9sfHlVJ8bWYK7GZBbCkKDlGU+YGRE++plh/jtXYjsIFElWtv01Y
+UpqBdWf7p8mXdtVoq6YyL5WuQVMwpjKHvegTXXwAoreEXZeKr1LKC11B14h+8wsR
+D5uf0GVmdw12nSrzeu3Q4oSgss8CgYEAygU++fItIYuPtZfrC8qDcyEiOLQmAHtX
+eTnEHOPy8ik+bdwF5Rg0nzxLu3RZ47ykGdEOzpGRO4B9V1EevwSEzX6VO7latMUS
+cLKb3Y0bXm6qQcWG+LAlvyaHfAH0oN47xfScLDiUm6BKd4Eo9kpkgaQzSgUfFZNQ
+6DHiA3Emau0CgYEAyel7Y3GjMGomvrXQ3x9HkDxH0/7Z71qe92CyYvZ/2VMKH9fk
+Ch5+p9P8CLYW4anapQGH80WqlSzbDCd0Y4EzB6z+UceJWd0stnFtfw4N6znze3HM
+AegJ+qaTRfL/bQlL8qwc0Fs+0i9A9enL+fbQEVmHXRl2E5TEwFgOQvkOQ3cCgYAA
+4bD6qkHkKZXA9x7BeGrGb9iUYsTfr6ocD1J5xczjnaZ2GEW2UDq6jyrNcJ6LzeDx
+c+YapKv7lH33iZUWxFBIDUtdbVul+k4wS7c+akU6TkVT8Ca8oxgnE2X39pI4uX+N
+R5n+32hWnYZ1qwygtoZlwm+u3QLbtz7dJIqV9UJzqQKBgQCL8Xo9LA0Dm7ZsdDDI
+I93YsjCELvBsonymmD1MTpk7uIA+qH8LAih+Vhonc17NtpXuas8eqc8ntuNLAgON
+Tylvk32uaRqquHWl6MT7bwaaK7pD8KuOIUJdl5SEc+DDUcB2A2XLg7Yv08Dus8A7
+6J5oH8YJ3hqmVGZzbOo75IFerg==
+-----END PRIVATE KEY-----
diff --git a/tests/functional/pos/fixtures/https_server/conf/ssl/cert.pem b/tests/functional/pos/fixtures/https_server/conf/ssl/cert.pem
new file mode 100755
index 00000000..1418c1bb
--- /dev/null
+++ b/tests/functional/pos/fixtures/https_server/conf/ssl/cert.pem
@@ -0,0 +1,19 @@
+-----BEGIN CERTIFICATE-----
+MIIDATCCAemgAwIBAgIJAJRKy2TWwZqTMA0GCSqGSIb3DQEBCwUAMBcxFTATBgNV
+BAMMDGh0dHBzX3NlcnZlcjAeFw0xNzA4MTQxNDQ1MTFaFw0yMDA2MDMxNDQ1MTFa
+MBcxFTATBgNVBAMMDGh0dHBzX3NlcnZlcjCCASIwDQYJKoZIhvcNAQEBBQADggEP
+ADCCAQoCggEBAKGEGI6iKkxFFxwTnBoqsYHTX9C1Gl8gankqYl+eJ/Tl1HTnBaiM
+c7oS1ptnapte1xK7JDS9E2ltlRQ7NyuIU4yTx26MPO4oyR6Cyr8sTg/I39Hklm1/
+fg3IYbu/vQrpSrtFXg9TOT7Oi6bUxwxQGTVCUY/8T1GwOVI+/1c5uVSmJw8O7CEL
+A9/jOLqOmiuBeMMn+JS6H5t7HG/jRHt/Z0JaSIdiI2Is0tPBd1iRpYFxzcehOr0U
+RlmFgOFZGNfT9O4g2YyvmVtS+vpRhEAE5LhV5TIelv97s4f1ZdDHNHvou5AV9ANK
+CzQbFfDUobnxqr4/SeLYTvsLgi6h1DyJP6MCAwEAAaNQME4wHQYDVR0OBBYEFAfu
+RxroDak/yro7MbRfDogKVDmBMB8GA1UdIwQYMBaAFAfuRxroDak/yro7MbRfDogK
+VDmBMAwGA1UdEwQFMAMBAf8wDQYJKoZIhvcNAQELBQADggEBAF5M/Gz6JDC1HoSm
+6HFLBB9ul9TQQI3RhohwreCYyeZ866WrvqZfle+lxcgVburYCSyi5paFpvNK3DH2
+J0A2fDAMekZGcaJ7O5Zx0evTCwXoxDOhS+xO5IlGTXWCEKLeLkU27WJiLC9cTbFr
+kfjL14IMnsioRzUz4a+aX5JllqnEccCDlHjSk1w5YvOvt6GC6Bvenouja2apPes/
+oJJpFwZVO0epqOQo1ndRGbt5NLv6YgZlvdFXWoKNKohzdfDV/RbW9BrbpyKSxFTm
+usrmVcZTQpSf69zbnEVO8N3N6c1zNdETPON1ZGLW1O1MXWkQDZniH6LduXN/Oob7
+vYqvXlw=
+-----END CERTIFICATE-----
diff --git a/tests/functional/pos/fixtures/https_server/records/187.html b/tests/functional/pos/fixtures/https_server/records/187.html
new file mode 100644
index 00000000..0d86221a
--- /dev/null
+++ b/tests/functional/pos/fixtures/https_server/records/187.html
@@ -0,0 +1,125 @@
+<!DOCTYPE html>
+<html>
+    <head>
+        <title>31st International Symposium on Lattice Field Theory LATTICE 2013</title>
+        <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
+        <link rel="stylesheet" type="text/css" href="/css/style.css?v=3" />
+    </head>
+    <body>
+        <img src="/images/headInternal.gif" width="760" height="80" border="0" usemap="#headmap" alt="Main Image"/>
+        <map name="headmap" id="headmap">
+          <area shape="rect" coords="9,9,266,69" href="/" target="_top"/>
+        </map>
+        <h1>31st International Symposium on Lattice Field Theory LATTICE 2013</h1>
+            <script type="text/javascript" src="/js/lib.js"></script>
+                <script type="text/x-mathjax-config">
+                MathJax.Hub.Config({
+                  tex2jax: {inlineMath: [['$','$'], ['\\(','\\)']]}
+                });
+            </script>
+            <script type="text/javascript"
+                    src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.1/MathJax.js?config=TeX-AMS-MML_HTMLorMML">
+            </script>
+        <DIV class=conference_code>LATTICE 2013  - (other <a href="/cgi-bin/reader/family.cgi?code=lattice">lattice</a> conferences)</DIV>
+        <DIV class=conference_date>29 July – 3 August, 2013 <BR/>Mainz, Germany </DIV>
+        <div id="abstract">
+            <p>
+                The annual lattice symposium brings together a global community of researchers
+                from theoretical particle physics and beyond, who employ numerical and
+                computational methods to study the properties of strongly interacting physical
+                systems, above all Quantum Chromodynamics (QCD), the theory describing the
+                interactions of quarks and gluons. Topics include studies of the spectrum and
+                structure of hadrons, lattice studies of matter under extreme conditions,
+                hadronic contributions to weak decay amplitudes, as well as recent
+                developments in simulation algorithms and computer hardware. The 2013
+                conference in Mainz was attended by over 500 participants from all over the
+                globe, making it the biggest in this series so far.
+            </p>
+            <p>
+                This proceedings volume is dedicated to the memory of Nobel Laureate Kenneth
+                G. Wilson (June 8, 1936 - June 15, 2013).
+            </p>
+        </div>
+        <div id="icon"><img src="/archive/images/LATTICE 2013.png" alt="conference main image"/></div>
+        <div id="proceedings">
+            <!-- TOC -->
+            <table>
+                <tr><th>Sessions</th></tr>
+                <tr><td><a href="#session-1727">Preface</a></td></tr>
+                <tr><td><a href="#session-1858">Plenary sessions</a></td></tr>
+                <tr><td><a href="#session-1859">Algorithms and Machines</a></td></tr>
+                <tr><td><a href="#session-1860">Applications beyond QCD</a></td></tr>
+                <tr><td><a href="#session-1861">Physics beyond the Standard Model</a></td></tr>
+                <tr><td><a href="#session-1862">Chiral Symmetry</a></td></tr>
+                <tr><td><a href="#session-1863">Non-zero Temperature and Density</a></td></tr>
+                <tr><td><a href="#session-1864">Hadron Spectroscopy and Interactions</a></td></tr>
+                <tr><td><a href="#session-1865">Hadron Structure</a></td></tr>
+                <tr><td><a href="#session-1866">Standard Model Parameters and Renormalization</a></td></tr>
+                <tr><td><a href="#session-1867">Theoretical Developments</a></td></tr>
+                <tr><td><a href="#session-1868">Vacuum Structure and Confinement</a></td></tr>
+                <tr><td><a href="#session-1869">Weak Decays and Matrix Elements</a></td></tr>
+                <tr><td><a href="#session-1870">Special Session: Coding Efforts</a></td></tr>
+                <tr><td><a href="#session-1871">Posters</a></td></tr>
+            </table>
+            <!-- /TOC -->
+            <table><thead>
+                </thead>
+                <tbody>
+                    <tr id="session-1727" class="title"><td>Preface</td></tr>			<tr>
+                        <td><span class="contrib_title">Foreword</span><br class="contrib_newline"/>
+                            <span class="contrib_code"><a href="/187/503/" class="gray-link">PoS(LATTICE 2013)503</a></span>
+                            <span class="contrib_file"><a class="files" href="/187/503/pdf">pdf</a> </span>
+                            <span class="contrib_authors">H. Wittig</span>
+                        </td>
+                    </tr>
+                    <tr>
+                        <td><span class="contrib_title">Ken Wilson Obituary</span><br class="contrib_newline"/>
+                            <span class="contrib_code"><a href="/187/504/" class="gray-link">PoS(LATTICE 2013)504</a></span>
+                            <span class="contrib_file"><a class="files" href="/187/504/pdf">pdf</a> </span>
+                            <span class="contrib_authors">A. Kronfeld</span>
+                        </td>
+                    </tr>
+                    <tr class="title" id="session-1858">
+                        <td>Plenary sessions</td>
+                    </tr>
+                    <tr>
+                        <td><span class="contrib_title">Heavy Flavour Physics Review</span><br class="contrib_newline"/>
+                            <span class="contrib_code"><a href="/187/001/" class="gray-link">PoS(LATTICE 2013)001</a></span>
+                            <span class="contrib_file"><a class="files" href="/187/001/pdf">pdf</a> </span>
+                            <span class="contrib_authors">A. El-Khadra</span>
+                        </td>
+                    </tr>
+                    <tr>
+                        <td><span class="contrib_title">New Developments for Lattice Field Theory at Non-Zero Density </span><br class="contrib_newline"/>
+                            <span class="contrib_code"><a href="/187/002/" class="gray-link">PoS(LATTICE 2013)002</a></span>
+                            <span class="contrib_file"><a class="files" href="/187/002/pdf">pdf</a> </span>
+                            <span class="contrib_authors">C. Gattringer</span>
+                        </td>
+                    </tr>
+                </tbody>
+            </table>
+        </div>
+        <!-- footer -->
+        <div id="footer">
+            <p>Communicate with the <a href="mailto:%70%6F%73%2D%65%6F%40%70%6F%73%2E%73%69%73%73%61%2E%69%74">PoS Editorial Office</a>
+                | <a href="/cgi-bin/reader/info.cgi?p=cookies" title="Cookie policy">Cookie policy</a>
+            </p>
+        </div>
+        <!-- /footer -->
+        <!-- Piwik -->
+        <script type="text/javascript">
+          var _paq = _paq || [];
+          _paq.push(['trackPageView']);
+          _paq.push(['enableLinkTracking']);
+          (function() {
+            var u="//stats.sissa.it/analytics/";
+            _paq.push(['setTrackerUrl', u+'piwik.php']);
+            _paq.push(['setSiteId', 9]);
+            var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0];
+            g.type='text/javascript'; g.async=true; g.defer=true; g.src=u+'piwik.js'; s.parentNode.insertBefore(g,s);
+          })();
+        </script>
+        <noscript><p><img src="//stats.sissa.it/analytics/piwik.php?idsite=9" style="border:0;" alt="" /></p></noscript>
+        <!-- /Piwik -->
+    </body>
+</html>
diff --git a/tests/functional/pos/fixtures/https_server/records/PoS(LATTICE 2013)001.html b/tests/functional/pos/fixtures/https_server/records/PoS(LATTICE 2013)001.html
new file mode 100644
index 00000000..64ad7a6f
--- /dev/null
+++ b/tests/functional/pos/fixtures/https_server/records/PoS(LATTICE 2013)001.html	
@@ -0,0 +1,58 @@
+<!DOCTYPE html>
+<html>
+	<head>
+		<title>PoS(LATTICE 2013)001</title>
+		<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
+		<link rel="stylesheet" type="text/css" href="/css/style.css?v=3" />
+		<link rel="schema.DCTERMS" href="http://purl.org/dc/terms/" />
+		<link rel="schema.DC" href="http://purl.org/dc/elements/1.1/" />
+		<meta name="DC.Title" content="Heavy Flavour Physics Review"/>
+		<meta name="DC.Type" content="Proceeding"/>
+		<meta name="DC.Relation" content="31st International Symposium on Lattice Field Theory LATTICE 2013"/>
+		<meta name="DC.Creator" content="A. El-Khadra"/>
+	</head>
+	<body>
+		<img src="/images/headInternal.gif" width="760" height="80" border="0" usemap="#headmap" alt="Main Image"/>
+		<map name="headmap" id="headmap">
+		  <area shape="rect" coords="9,9,266,69" href="/" target="_top"/>
+		</map>
+		<h1>PoS(LATTICE 2013)001</h1>
+			<script type="text/javascript" src="/js/lib.js"></script>
+			<script type="text/x-mathjax-config">
+				MathJax.Hub.Config({
+				  tex2jax: {inlineMath: [['$','$'], ['\\(','\\)']]}
+				});
+			</script>
+			<script type="text/javascript"
+					src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.1/MathJax.js?config=TeX-AMS-MML_HTMLorMML">
+			</script>
+		<div id="contrib-identifier">
+			<p><strong>Heavy Flavour Physics Review</strong></p>
+			<p><em>A. El-Khadra</em></p>
+			<p>in <a href="/187/">31st International Symposium on Lattice Field Theory LATTICE 2013</a></p>
+			<p>Contribution: <a href="/187/001/pdf">pdf</a></p>
+		</div>
+		<!-- footer -->
+		<div id="footer">
+			<p>Communicate with the <a href="mailto:%70%6F%73%2D%65%6F%40%70%6F%73%2E%73%69%73%73%61%2E%69%74">PoS Editorial Office</a>
+				| <a href="/cgi-bin/reader/info.cgi?p=cookies" title="Cookie policy">Cookie policy</a>
+			</p>
+		</div>
+		<!-- /footer -->
+		<!-- Piwik -->
+		<script type="text/javascript">
+		  var _paq = _paq || [];
+		  _paq.push(['trackPageView']);
+		  _paq.push(['enableLinkTracking']);
+		  (function() {
+			var u="//stats.sissa.it/analytics/";
+			_paq.push(['setTrackerUrl', u+'piwik.php']);
+			_paq.push(['setSiteId', 9]);
+			var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0];
+			g.type='text/javascript'; g.async=true; g.defer=true; g.src=u+'piwik.js'; s.parentNode.insertBefore(g,s);
+		  })();
+		</script>
+		<noscript><p><img src="//stats.sissa.it/analytics/piwik.php?idsite=9" style="border:0;" alt="" /></p></noscript>
+		<!-- /Piwik -->
+	</body>
+</html>
diff --git a/tests/functional/pos/fixtures/oai_harvested/pos_record.xml b/tests/functional/pos/fixtures/oai_harvested/pos_record.xml
new file mode 100644
index 00000000..f65dfb9e
--- /dev/null
+++ b/tests/functional/pos/fixtures/oai_harvested/pos_record.xml
@@ -0,0 +1,33 @@
+<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+<OAI-PMH xmlns="http://www.openarchives.org/OAI/2.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/ http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd">
+<responseDate>2015-01-29T13:44:13Z</responseDate>
+<request verb="ListRecords" metadataPrefix="pos-ext_dc" set="conference:LATTICE 2013">
+https://pos.sissa.it/cgi-bin/oai/oai-script-spires-extended.cgi
+</request>
+<ListRecords>
+    <record>
+        <header>
+                <identifier>oai:pos.sissa.it:LATTICE 2013/001</identifier>
+                <datestamp>2014-04-28</datestamp>
+                <setSpec>conference:LATTICE 2013</setSpec>
+                <setSpec>group:9</setSpec>
+        </header>
+        <metadata>
+        <pos-ext_dc:pex-dc xmlns:pos-ext_dc="http://pos.sissa.it/pos-ext_dc/pos-ext_dc.xsd" xmlns:pex-dc="http://pos.sissa.it/pos-ext_dc/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://pos.sissa.it/pos-ext_dc/ http://pos.sissa.it/pos-ext_dc/pos-ext_dc.xsd">
+                <pex-dc:title>Heavy Flavour Physics Review</pex-dc:title>
+                <pex-dc:creator><pex-dc:name>Aida El-Khadra</pex-dc:name><pex-dc:affiliation>INFN and Università di Firenze</pex-dc:affiliation></pex-dc:creator>
+                <pex-dc:creator><pex-dc:name>M. T. MacDonald</pex-dc:name><pex-dc:affiliation>U of Pecs</pex-dc:affiliation></pex-dc:creator><pex-dc:subject>Lattice Field Theory</pex-dc:subject>
+                <pex-dc:description>31st International Symposium on Lattice Field Theory LATTICE 2013; Plenary sessions</pex-dc:description>
+                <pex-dc:publisher>Sissa Medialab</pex-dc:publisher>
+                <pex-dc:date>2014-03-19T21:09:30Z</pex-dc:date>
+                <pex-dc:type>Text</pex-dc:type>
+                <pex-dc:format>application/pdf</pex-dc:format>
+                <pex-dc:identifier>PoS(LATTICE 2013)001</pex-dc:identifier>
+                <pex-dc:language>en</pex-dc:language>
+                <pex-dc:relation>LATTICE 2013 (31st International Symposium on Lattice Field Theory LATTICE 2013) isPartOf</pex-dc:relation>
+                <pex-dc:rights>Creative Commons Attribution-NonCommercial-ShareAlike</pex-dc:rights>
+        </pos-ext_dc:pex-dc>
+        </metadata>
+    </record>
+</ListRecords>
+</OAI-PMH>
diff --git a/tests/functional/pos/fixtures/pos_conference_proceedings_records.json b/tests/functional/pos/fixtures/pos_conference_proceedings_records.json
new file mode 100644
index 00000000..94899d88
--- /dev/null
+++ b/tests/functional/pos/fixtures/pos_conference_proceedings_records.json
@@ -0,0 +1,99 @@
+[
+  {
+    "_collections": [ "Literature" ],
+    "curated": false,
+    "publication_info": [
+      {
+        "journal_volume": "LATTICE 2013",
+        "journal_title": "PoS"
+      }
+    ],
+    "document_type": [
+      "proceedings"
+    ],
+    "titles": [
+      {
+        "source": "pos",
+        "title": "Proceedings, 31st International Symposium on Lattice Field Theory LATTICE 2013",
+        "subtitle": "29 July \u2013 3 August, 2013 Mainz, Germany"
+      }
+    ],
+    "acquisition_source": {
+      "source": "pos",
+      "method": "hepcrawl",
+      "submission_number": "5652c7f6190f11e79e8000224dabeaad",
+      "datetime": "2017-04-03T10:26:40.365216"
+    }
+  },
+  {
+    "_collections": [ "Literature" ],
+    "curated": false,
+    "acquisition_source": {
+      "source": "pos",
+      "method": "hepcrawl",
+      "submission_number": "5652c7f6190f11e79e8000224dabeaad",
+      "datetime": "2017-04-03T10:26:40.365216"
+    },
+    "license": [
+      {
+        "license": "Creative Commons Attribution-NonCommercial-ShareAlike"
+      }
+    ],
+    "titles": [
+      {
+        "source": "Sissa Medialab",
+        "title": "Heavy Flavour Physics Review"
+      }
+    ],
+    "documents": [
+      {
+        "fulltext": true,
+        "hidden": true,
+        "url": "https://http-server.local/187/001/pdf",
+        "original_url": "https://http-server.local/187/001/pdf",
+        "key": "pdf",
+        "source": "pos"
+      }
+    ],
+    "urls": [
+      {
+        "value": "https://http-server.local/PoS(LATTICE%202013)001.html"
+      }
+    ],
+    "authors": [
+      {
+        "affiliations": [
+          {
+            "value": "INFN and Universit\u00e0 di Firenze"
+          }
+        ],
+        "full_name": "El-Khadra, Aida"
+      },
+      {
+        "affiliations": [
+          {
+            "value": "U of Pecs"
+          }
+        ],
+        "full_name": "MacDonald, M.T."
+      }
+    ],
+    "publication_info": [
+      {
+        "journal_volume": "LATTICE 2013",
+        "year": 2014,
+        "artid": "001",
+        "journal_title": "PoS"
+      }
+    ],
+    "document_type": [
+      "conference paper"
+    ],
+    "imprints": [
+      {
+        "date": "2014-03-19"
+      }
+    ],
+    "citeable": true
+  }
+]
diff --git a/tests/functional/pos/test_pos.py b/tests/functional/pos/test_pos.py
new file mode 100644
index 00000000..490ad058
--- /dev/null
+++ b/tests/functional/pos/test_pos.py
@@ -0,0 +1,125 @@
+# -*- coding: utf-8 -*-
+#
+# This file is part of hepcrawl.
+# Copyright (C) 2017 CERN.
+#
+# hepcrawl is a free software; you can redistribute it and/or modify it
+# under the terms of the Revised BSD License; see LICENSE file for
+# more details.
+
+"""Functional tests for PoS spider"""
+
+from __future__ import absolute_import, division, print_function
+
+import os
+import pytest
+
+from hepcrawl.testlib.celery_monitor import CeleryMonitor
+from hepcrawl.testlib.fixtures import (
+    get_test_suite_path,
+    expected_json_results_from_file,
+    clean_dir,
+)
+from hepcrawl.testlib.tasks import app as celery_app
+from hepcrawl.testlib.utils import get_crawler_instance
+
+
+@pytest.fixture(scope='function', autouse=True)
+def cleanup():
+    clean_dir()
+    clean_dir(path=os.path.join(os.getcwd(), '.scrapy'))
+    yield
+    clean_dir()
+    clean_dir(path=os.path.join(os.getcwd(), '.scrapy'))
+
+
+def override_generated_fields(record):
+    record['acquisition_source']['datetime'] = u'2017-04-03T10:26:40.365216'
+    record['acquisition_source']['submission_number'] = (
+        u'5652c7f6190f11e79e8000224dabeaad'
+    )
+
+    return record
+
+
+def get_configuration():
+    package_location = get_test_suite_path(
+        'pos',
+        'fixtures',
+        'oai_harvested',
+        'pos_record.xml',
+        test_suite='functional',
+    )
+
+    return {
+        'CRAWLER_HOST_URL': 'http://scrapyd:6800',
+        'CRAWLER_PROJECT': 'hepcrawl',
+        'CRAWLER_ARGUMENTS': {
+            'source_file': 'file://' + package_location,
+            'base_conference_paper_url': (
+                'https://http-server.local/contribution?id='
+            ),
+            'base_proceedings_url': (
+                'https://http-server.local/cgi-bin/reader/conf.cgi?confid='
+            ),
+        }
+    }
+
+
+@pytest.mark.parametrize(
+    'expected_results, config',
+    [
+        (
+            expected_json_results_from_file(
+                'pos',
+                'fixtures',
+                'pos_conference_proceedings_records.json',
+            ),
+            get_configuration(),
+        ),
+    ],
+    ids=[
+        'smoke',
+    ]
+)
+def test_pos_conference_paper_record_and_proceedings_record(
+    expected_results,
+    config,
+):
+    crawler = get_crawler_instance(config['CRAWLER_HOST_URL'])
+
+    results = CeleryMonitor.do_crawl(
+        app=celery_app,
+        monitor_timeout=5,
+        monitor_iter_limit=100,
+        events_limit=2,
+        crawler_instance=crawler,
+        project=config['CRAWLER_PROJECT'],
+        spider='pos',
+        settings={},
+        **config['CRAWLER_ARGUMENTS']
+    )
+
+    gotten_results = [override_generated_fields(result) for result in results]
+    expected_results = [
+        override_generated_fields(expected) for expected in expected_results
+    ]
+
+    gotten_results = sorted(
+        gotten_results,
+        key=lambda x: x['document_type']
+    )
+    expected_results = sorted(
+        expected_results,
+        key=lambda x: x['document_type']
+    )
+
+    assert gotten_results == expected_results
+
+
+# TODO create test that receives conference paper record AND proceedings
+# record. 'Crawl-once' plug-in needed.
+
+
+# TODO create test that receives proceedings record ONLY.
+# 'Crawl-once' plug-in needed.
diff --git a/tests/unit/responses/pos/sample_proceedings_page.html b/tests/unit/responses/pos/sample_proceedings_page.html
new file mode 100644
index 00000000..669e77b4
--- /dev/null
+++ b/tests/unit/responses/pos/sample_proceedings_page.html
@@ -0,0 +1,134 @@
+<!DOCTYPE html>
+<html>
+    <head>
+        <title>31st International Symposium on Lattice Field Theory LATTICE 2013</title>
+        <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
+        <link rel="stylesheet" type="text/css" href="/css/style.css?v=3" />
+    </head>
+    <body>
+        <img src="/images/headInternal.gif" width="760" height="80" border="0" usemap="#headmap" alt="Main Image"/>
+        <map name="headmap" id="headmap">
+            <area shape="rect" coords="9,9,266,69" href="/" target="_top"/>
+        </map>
+        <h1>31st International Symposium on Lattice Field Theory LATTICE 2013</h1>
+        <script type="text/javascript" src="/js/lib.js"></script>
+        <script type="text/x-mathjax-config">
+            MathJax.Hub.Config({
+              tex2jax: {inlineMath: [['$','$'], ['\\(','\\)']]}
+            });
+        </script>
+        <script type="text/javascript"
+                src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.1/MathJax.js?config=TeX-AMS-MML_HTMLorMML">
+        </script>
+
+        <DIV class=conference_code>LATTICE 2013  - (other <a href="/cgi-bin/reader/family.cgi?code=lattice">lattice</a> conferences)</DIV>
+        <DIV class=conference_date>29 July – 3 August, 2013 <BR/>Mainz, Germany </DIV>
+
+        <div id="abstract">
+            <p>
+                The annual lattice symposium brings together a global community of researchers
+                from theoretical particle physics and beyond, who employ numerical and
+                computational methods to study the properties of strongly interacting physical
+                systems, above all Quantum Chromodynamics (QCD), the theory describing the
+                interactions of quarks and gluons. Topics include studies of the spectrum and
+                structure of hadrons, lattice studies of matter under extreme conditions,
+                hadronic contributions to weak decay amplitudes, as well as recent
+                developments in simulation algorithms and computer hardware. The 2013
+                conference in Mainz was attended by over 500 participants from all over the
+                globe, making it the biggest in this series so far.
+            </p>
+            <p>
+                This proceedings volume is dedicated to the memory of Nobel Laureate Kenneth
+                G. Wilson (June 8, 1936 - June 15, 2013).
+            </p>
+        </div>
+        <div id="icon"><img src="/archive/images/LATTICE 2013.png" alt="conference main image"/></div>
+        <div id="proceedings">
+            <!-- TOC -->
+            <table>
+                <tr><th>Sessions</th></tr>
+                <tr><td><a href="#session-1727">Preface</a></td></tr>
+                <tr><td><a href="#session-1858">Plenary sessions</a></td></tr>
+                <tr><td><a href="#session-1859">Algorithms and Machines</a></td></tr>
+                <tr><td><a href="#session-1860">Applications beyond QCD</a></td></tr>
+                <tr><td><a href="#session-1861">Physics beyond the Standard Model</a></td></tr>
+                <tr><td><a href="#session-1862">Chiral Symmetry</a></td></tr>
+                <tr><td><a href="#session-1863">Non-zero Temperature and Density</a></td></tr>
+                <tr><td><a href="#session-1864">Hadron Spectroscopy and Interactions</a></td></tr>
+                <tr><td><a href="#session-1865">Hadron Structure</a></td></tr>
+                <tr><td><a href="#session-1866">Standard Model Parameters and Renormalization</a></td></tr>
+                <tr><td><a href="#session-1867">Theoretical Developments</a></td></tr>
+                <tr><td><a href="#session-1868">Vacuum Structure and Confinement</a></td></tr>
+                <tr><td><a href="#session-1869">Weak Decays and Matrix Elements</a></td></tr>
+                <tr><td><a href="#session-1870">Special Session: Coding Efforts</a></td></tr>
+                <tr><td><a href="#session-1871">Posters</a></td></tr>
+            </table>
+            <!-- /TOC -->
+            <table>
+                <thead></thead>
+                <tbody>
+                    <tr class="title" id="session-1727">
+                        <td>Preface</td>
+                    </tr>
+                    <tr>
+                        <td><span class="contrib_title">Foreword</span><br class="contrib_newline"/>
+                            <span class="contrib_code"><a href="contribution.cgi?id=187/503" class="gray-link">PoS(LATTICE 2013)503</a></span>
+                            <span class="contrib_file"><a class="files" href="/archive/conferences/187/503/LATTICE 2013_503.pdf">pdf</a> </span>
+                            <span class="contrib_authors">H. Wittig</span>
+                        </td>
+                    </tr>
+                    <tr>
+                        <td><span class="contrib_title">Ken Wilson Obituary</span><br class="contrib_newline"/>
+                            <span class="contrib_code"><a href="contribution.cgi?id=187/504" class="gray-link">PoS(LATTICE 2013)504</a></span>
+                            <span class="contrib_file"><a class="files" href="/archive/conferences/187/504/LATTICE 2013_504.pdf">pdf</a> </span>
+                            <span class="contrib_authors">A. Kronfeld</span>
+                        </td>
+                    </tr>
+                    <tr class="title" id="session-1858">
+                        <td>Plenary sessions</td>
+                    </tr>
+                    <tr>
+                        <td><span class="contrib_title">Heavy Flavour Physics Review</span><br class="contrib_newline"/>
+                            <span class="contrib_code"><a href="contribution.cgi?id=187/001" class="gray-link">PoS(LATTICE 2013)001</a></span>
+                            <span class="contrib_file"><a class="files" href="/archive/conferences/187/001/LATTICE 2013_001.pdf">pdf</a> </span>
+                            <span class="contrib_authors">A. El-Khadra</span>
+                        </td>
+                    </tr>
+
+                    <!--- Removed extra <tr> --->
+
+                    <tr>
+                        <td><span class="contrib_title">Charmonium, $D_s$ and $D_s^*$ from overlap fermion on domain wall fermion configurations</span><br class="contrib_newline"/>
+                            <span class="contrib_code"><a href="contribution.cgi?id=187/500" class="gray-link">PoS(LATTICE 2013)500</a></span>
+                            <span class="contrib_file"><a class="files" href="/archive/conferences/187/500/LATTICE 2013_500.pdf">pdf</a> </span>
+                            <span class="contrib_authors">Y.b. Yang, Y. Chen, A. Alexandru, S.J. Dong, T. Draper, M. Gong, F. Lee, A. Li, K.F. Liu, Z. Liu, M. Lujan and N. Mathur</span>
+                        </td>
+                    </tr>
+                </tbody>
+            </table>
+        </div>
+
+        <!-- footer -->
+        <div id="footer">
+            <p>Communicate with the <a href="mailto:%70%6F%73%2D%65%6F%40%70%6F%73%2E%73%69%73%73%61%2E%69%74">PoS Editorial Office</a>
+                | <a href="/cgi-bin/reader/info.cgi?p=cookies" title="Cookie policy">Cookie policy</a>
+            </p>
+        </div>
+        <!-- /footer -->
+        <!-- Piwik -->
+        <script type="text/javascript">
+            var _paq = _paq || [];
+            _paq.push(['trackPageView']);
+            _paq.push(['enableLinkTracking']);
+            (function() {
+                var u="//stats.sissa.it/analytics/";
+                _paq.push(['setTrackerUrl', u+'piwik.php']);
+                _paq.push(['setSiteId', 9]);
+                var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0];
+                g.type='text/javascript'; g.async=true; g.defer=true; g.src=u+'piwik.js'; s.parentNode.insertBefore(g,s);
+            })();
+        </script>
+        <noscript><p><img src="//stats.sissa.it/analytics/piwik.php?idsite=9" style="border:0;" alt="" /></p></noscript>
+        <!-- /Piwik -->
+    </body>
+</html>
diff --git a/tests/unit/test_edp.py b/tests/unit/test_edp.py
index 219245bb..3ff75717 100644
--- a/tests/unit/test_edp.py
+++ b/tests/unit/test_edp.py
@@ -389,7 +389,6 @@ def test_no_dois_jats():
     record = parsed_item.record
 
     assert "dois" not in record
-    assert "additional_files" not in record
     assert isinstance(record, HEPRecord)
 
 
@@ -413,7 +412,6 @@ def test_no_dois_rich():
     record = parsed_item.record
 
     assert "dois" not in record
-    assert "additional_files" not in record
     assert isinstance(record, HEPRecord)
 
 
diff --git a/tests/unit/test_elsevier.py b/tests/unit/test_elsevier.py
index 126792a7..a3c436a5 100644
--- a/tests/unit/test_elsevier.py
+++ b/tests/unit/test_elsevier.py
@@ -326,8 +326,8 @@ def test_authors(record):
 
 def test_files(record):
     """Test file urls."""
-    assert record["additional_files"]
-    assert record["additional_files"][0]['url'] == "elsevier/sample_consyn_record.xml"
+    assert record["documents"]
+    assert record["documents"][0]['url'] == "elsevier/sample_consyn_record.xml"
 
 
 def test_dois(record):
diff --git a/tests/unit/test_hindawi.py b/tests/unit/test_hindawi.py
index 51f0fc77..a8f8f20b 100644
--- a/tests/unit/test_hindawi.py
+++ b/tests/unit/test_hindawi.py
@@ -96,9 +96,9 @@ def test_urls(record):
 def test_additional_files(record):
     """Test additional files."""
     url = "http://downloads.hindawi.com/journals/aa/2010/194946.xml"
-    assert "additional_files" in record
-    assert record["additional_files"][0]["url"] == url
-    assert record["additional_files"][0]["access"] == "INSPIRE-HIDDEN"
+    assert "documents" in record
+    assert record["documents"][0]["url"] == url
+    assert record["documents"][0]["hidden"]
 
 
 def test_collections(record):
diff --git a/tests/unit/test_infn.py b/tests/unit/test_infn.py
index c15ef727..526fdf40 100644
--- a/tests/unit/test_infn.py
+++ b/tests/unit/test_infn.py
@@ -83,8 +83,10 @@ def test_date_published(record):
 
 def test_files(record):
     """Test pdf files."""
-    assert record["additional_files"][0][
-        "url"] == "http://www.infn.it/thesis/PDF/getfile.php?filename=10136-Fedon-dottorato.pdf"
+    assert record["documents"][0]["url"] == (
+        "http://www.infn.it/thesis/PDF/getfile.php"
+        "?filename=10136-Fedon-dottorato.pdf"
+    )
 
 
 def test_thesis(record):
diff --git a/tests/unit/test_iop.py b/tests/unit/test_iop.py
index bb01766c..1e48fb8a 100644
--- a/tests/unit/test_iop.py
+++ b/tests/unit/test_iop.py
@@ -154,10 +154,10 @@ def test_files(record):
     """Test files dictionary."""
     pdf_filename = "test_143_3_336.pdf"
 
-    assert "additional_files" in record
-    assert record["additional_files"][1]["access"] == 'INSPIRE-HIDDEN'
-    assert record["additional_files"][1]["type"] == 'Fulltext'
-    assert record["additional_files"][1]["url"] == os.path.join(TEST_PDF_DIR, pdf_filename)
+    assert "documents" in record
+    assert record["documents"][1]["hidden"]
+    assert record["documents"][1]["fulltext"]
+    assert record["documents"][1]["url"] == os.path.join(TEST_PDF_DIR, pdf_filename)
 
 
 @pytest.fixture
@@ -196,13 +196,12 @@ def erratum_open_access_record():
 def test_files_erratum_open_access_record(erratum_open_access_record):
     """Test files dict with open access journal with erratum article."""
     pdf_filename = "test_143_3_336.pdf"
-    assert "additional_files" in erratum_open_access_record
-    assert erratum_open_access_record["additional_files"][
-        1]["access"] == 'INSPIRE-PUBLIC'
-    assert erratum_open_access_record[
-        "additional_files"][1]["type"] == 'Erratum'
-    assert erratum_open_access_record["additional_files"][
-        1]["url"] == os.path.join(TEST_PDF_DIR, pdf_filename)
+    assert "documents" in erratum_open_access_record
+    assert not erratum_open_access_record["documents"][1]["hidden"]
+    assert not erratum_open_access_record["documents"][1]["fulltext"]
+    assert erratum_open_access_record["documents"][1]["url"] == (
+        os.path.join(TEST_PDF_DIR, pdf_filename)
+    )
 
 
 def test_not_published_record():
diff --git a/tests/unit/test_magic.py b/tests/unit/test_magic.py
index f3c0f355..16c52881 100644
--- a/tests/unit/test_magic.py
+++ b/tests/unit/test_magic.py
@@ -145,8 +145,8 @@ def test_url(record):
 def test_pdf_link(record):
     """Test pdf link(s)"""
     files = "http://stlab.adobe.com/wiki/images/d/d3/Test.pdf"
-    assert 'additional_files' in record
-    assert record['additional_files'][1]['url'] == files
+    assert 'documents' in record
+    assert record['documents'][1]['url'] == files
 
 
 def test_no_author_no_date_no_url():
diff --git a/tests/unit/test_mit.py b/tests/unit/test_mit.py
index 8a185cef..895d2c2d 100644
--- a/tests/unit/test_mit.py
+++ b/tests/unit/test_mit.py
@@ -106,7 +106,10 @@ def test_date_published(record):
 
 def test_files(record):
     """Test pdf files."""
-    assert record["additional_files"][0]["url"] == "http://dspace.mit.edu/bitstream/handle/1721.1/99287/922886248-MIT.pdf?sequence=1"
+    assert record["documents"][0]["url"] == (
+        "http://dspace.mit.edu/bitstream/handle/1721.1/99287/"
+        "922886248-MIT.pdf?sequence=1"
+    )
 
 
 def test_thesis(record):
diff --git a/tests/unit/test_phenix.py b/tests/unit/test_phenix.py
index c272683f..2d9b2c58 100644
--- a/tests/unit/test_phenix.py
+++ b/tests/unit/test_phenix.py
@@ -91,6 +91,9 @@ def test_authors(record):
 
 def test_pdf_link(record):
     """Test pdf link(s)"""
-    files = "http://www.phenix.bnl.gov/phenix/WWW/talk/archive/theses/2015/Guragain_Hari-DISSERTATION.pdf"
-    assert 'additional_files' in record
-    assert record['additional_files'][0]['url'] == files
+    files = (
+        "http://www.phenix.bnl.gov/phenix/WWW/talk/archive/theses/2015/"
+        "Guragain_Hari-DISSERTATION.pdf"
+    )
+    assert 'documents' in record
+    assert record['documents'][0]['url'] == files
diff --git a/tests/unit/test_pos.py b/tests/unit/test_pos.py
index 7ed1dee6..b376c964 100644
--- a/tests/unit/test_pos.py
+++ b/tests/unit/test_pos.py
@@ -25,8 +25,14 @@
 )
 
 
-@pytest.fixture
-def scrape_pos_page_body():
+def override_generated_fields(record):
+    record['acquisition_source']['datetime'] = '2017-08-10T16:03:59.091110'
+
+    return record
+
+
+@pytest.fixture(scope='session')
+def scrape_pos_conference_paper_page_body():
     return pkg_resources.resource_string(
         __name__,
         os.path.join(
@@ -37,24 +43,32 @@ def scrape_pos_page_body():
     )
 
 
-@pytest.fixture
-def record(scrape_pos_page_body):
+@pytest.fixture(scope='session')
+def generated_conference_paper(scrape_pos_conference_paper_page_body):
     """Return results generator from the PoS spider."""
+    # environmental variables needed for the pipelines payload
+    os.environ['SCRAPY_JOB'] = 'scrapy_job'
+    os.environ['SCRAPY_FEED_URI'] = 'scrapy_feed_uri'
+    os.environ['SCRAPY_LOG_FILE'] = 'scrapy_log_file'
+
     crawler = Crawler(spidercls=pos_spider.POSSpider)
     spider = pos_spider.POSSpider.from_crawler(crawler)
     request = spider.parse(
-        fake_response_from_file('pos/sample_pos_record.xml')
+        fake_response_from_file(
+            file_name=str('pos/sample_pos_record.xml'),
+        )
     ).next()
     response = HtmlResponse(
         url=request.url,
         request=request,
-        body=scrape_pos_page_body,
+        body=scrape_pos_conference_paper_page_body,
         **{'encoding': 'utf-8'}
     )
     assert response
+
     pipeline = InspireCeleryPushPipeline()
     pipeline.open_spider(spider)
-    parsed_item = request.callback(response)
+    parsed_item = request.callback(response).next()
     parsed_record = pipeline.process_item(parsed_item, spider)
     assert parsed_record
 
@@ -63,43 +77,43 @@ def record(scrape_pos_page_body):
     clean_dir()
 
 
-def test_titles(record):
+def test_titles(generated_conference_paper):
     """Test extracting title."""
     expected_titles = [
         {
-            'source': 'PoS',
+            'source': 'Sissa Medialab',
             'title': 'Heavy Flavour Physics Review',
         }
     ]
 
-    assert 'titles' in record
-    assert record['titles'] == expected_titles
+    assert 'titles' in generated_conference_paper
+    assert generated_conference_paper['titles'] == expected_titles
 
 
 @pytest.mark.xfail(reason='License texts are not normalized and converted to URLs')
-def test_license(record):
+def test_license(generated_conference_paper):
     """Test extracting license information."""
     expected_license = [{
-        'license': 'CC BY-NC-SA 3.0',
+        'license': 'CC-BY-NC-SA-3.0',
         'url': 'https://creativecommons.org/licenses/by-nc-sa/3.0',
     }]
-    assert record['license'] == expected_license
+    assert generated_conference_paper['license'] == expected_license
 
 
-def test_collections(record):
+def test_collections(generated_conference_paper):
     """Test extracting collections."""
     expected_document_type = ['conference paper']
 
-    assert record.get('citeable')
-    assert record.get('document_type') == expected_document_type
+    assert generated_conference_paper.get('citeable')
+    assert generated_conference_paper.get('document_type') == expected_document_type
 
 
-def test_language(record):
+def test_language(generated_conference_paper):
     """Test extracting language."""
-    assert 'language' not in record
+    assert 'language' not in generated_conference_paper
 
 
-def test_publication_info(record):
+def test_publication_info(generated_conference_paper):
     """Test extracting dois."""
     expected_pub_info = [{
         'artid': '001',
@@ -108,13 +122,13 @@ def test_publication_info(record):
         'year': 2014,
     }]
 
-    assert 'publication_info' in record
+    assert 'publication_info' in generated_conference_paper
 
-    pub_info = record['publication_info']
+    pub_info = generated_conference_paper['publication_info']
     assert pub_info == expected_pub_info
 
 
-def test_authors(record):
+def test_authors(generated_conference_paper):
     """Test authors."""
     expected_authors = [
         {
@@ -127,12 +141,88 @@ def test_authors(record):
         }
     ]
 
-    assert 'authors' in record
+    assert 'authors' in generated_conference_paper
 
-    result_authors = record['authors']
+    result_authors = generated_conference_paper['authors']
 
     assert len(result_authors) == len(expected_authors)
 
     # here we are making sure order is kept
     for author, expected_author in zip(result_authors, expected_authors):
         assert author == expected_author
+
+
+def test_pipeline_conference_paper(generated_conference_paper):
+    expected = {
+        '_collections': ['Literature'],
+        'curated': False,
+        'acquisition_source': {
+            'datetime': '2017-08-10T16:03:59.091110',
+            'method': 'hepcrawl',
+            'source': 'pos',
+            'submission_number': 'scrapy_job'
+        },
+        'authors': [
+            {
+                'affiliations': [
+                    {
+                        'value': u'INFN and Universit\xe0 di Firenze'
+                    }
+                ],
+                'full_name': u'El-Khadra, Aida'
+            },
+            {
+                'affiliations': [
+                    {
+                        'value': u'U of Pecs'
+                    }
+                ],
+                'full_name': u'MacDonald, M.T.'
+            }
+        ],
+        'citeable': True,
+        'document_type': [
+            'conference paper'
+        ],
+        'imprints': [
+            {
+                'date': '2014-03-19'
+            }
+        ],
+        'license': [
+            {
+                'license': 'Creative Commons Attribution-NonCommercial-ShareAlike',
+            }
+        ],
+        'publication_info': [
+            {
+                'artid': u'001',
+                'journal_title': u'PoS',
+                'journal_volume': u'LATTICE 2013',
+                'year': 2014
+            }
+        ],
+        'titles': [
+            {
+                'source': u'Sissa Medialab',
+                'title': u'Heavy Flavour Physics Review'
+            }
+        ],
+        'documents': [
+            {
+                'key': 'LATTICE 2013_001.pdf',
+                'fulltext': True,
+                'hidden': True,
+                'url': u'https://pos.sissa.it/archive/conferences/187/001/LATTICE 2013_001.pdf',
+                'original_url': u'https://pos.sissa.it/archive/conferences/187/001/LATTICE 2013_001.pdf',
+                'source': 'pos',
+            }
+        ],
+        'urls': [
+            {
+                'value': 'https://pos.sissa.it/contribution?id=PoS%28LATTICE+2013%29001'
+            }
+        ]
+    }
+
+    assert override_generated_fields(generated_conference_paper) == expected
diff --git a/tests/unit/test_t2k.py b/tests/unit/test_t2k.py
index d9395aa2..c3fe0e2c 100644
--- a/tests/unit/test_t2k.py
+++ b/tests/unit/test_t2k.py
@@ -113,8 +113,8 @@ def test_url(record):
 def test_pdf_link(record):
     """Test pdf link(s)"""
     files = "http://www.t2k.org/docs/thesis/001/IJT-THESIS"
-    assert 'additional_files' in record
-    assert record['additional_files'][0]['url'] == files
+    assert 'documents' in record
+    assert record['documents'][0]['url'] == files
 
 
 @pytest.fixture