diff --git a/hepcrawl/crawler2hep.py b/hepcrawl/crawler2hep.py
index cacc5590..c2ba80d8 100644
--- a/hepcrawl/crawler2hep.py
+++ b/hepcrawl/crawler2hep.py
@@ -69,7 +69,7 @@ def _normalize_hepcrawl_record(item, source):
     item['titles'] = [{
         'title': item.pop('title', ''),
         'subtitle': item.pop('subtitle', ''),
-        'source': source,
+        'source': item.pop('source', source),
     }]
 
     item['abstracts'] = [{
@@ -178,7 +178,7 @@ def _filter_affiliation(affiliations):
 
     for author in crawler_record.get('authors', []):
         builder.add_author(builder.make_author(
-            author['full_name'],
+            full_name=author['full_name'],
             affiliations=_filter_affiliation(author['affiliations']),
         ))
 
diff --git a/hepcrawl/spiders/pos_spider.py b/hepcrawl/spiders/pos_spider.py
index 0b849a8e..f3209028 100644
--- a/hepcrawl/spiders/pos_spider.py
+++ b/hepcrawl/spiders/pos_spider.py
@@ -32,24 +32,17 @@ class POSSpider(Spider):
     """POS/Sissa crawler.
 
     Extracts from metadata:
-        * title
-        * article-id
-        * conf-acronym
-        * authors
-        * affiliations
-        * publication-date
-        * publisher
-        * license
-        * language
-        * link
+        todo:: be added...
 
     Example:
         ::
 
-            $ scrapy crawl PoS -a source_file=file://`pwd`/tests/responses/pos/sample_pos_record.xml
+            $ scrapy crawl PoS -a source_file=file://`pwd`/tests/unit/responses/pos/
+            sample_pos_record.xml
     """
     name = 'PoS'
-    pos_base_url = "https://pos.sissa.it/contribution?id="
+    conference_paper_url = "https://pos.sissa.it/contribution?id="
+    # pos_proceedings_url = "https://pos.sissa.it/cgi-bin/reader/conf.cgi?confid="
 
     def __init__(self, source_file=None, **kwargs):
         """Construct POS spider."""
@@ -61,79 +54,73 @@ def start_requests(self):
 
     def parse(self, response):
         """Get PDF information."""
+        self.log('Got record from: {response.url}'.format(**vars()))
+
         node = response.selector
         node.remove_namespaces()
         for record in node.xpath('.//record'):
             identifier = record.xpath('.//metadata/pex-dc/identifier/text()').extract_first()
             if identifier:
                 # Probably all links lead to same place, so take first
-                pos_url = "{0}{1}".format(self.pos_base_url, identifier)
-                request = Request(pos_url, callback=self.scrape_pos_page)
+                conference_paper_url = "{0}{1}".format(self.conference_paper_url, identifier)
+                request = Request(conference_paper_url, callback=self.scrape_conference_paper)
                 request.meta["url"] = response.url
                 request.meta["record"] = record.extract()
                 yield request
 
-    def scrape_pos_page(self, response):
+    def scrape_conference_paper(self, response):
         """Parse a page for PDF link."""
-        response.meta["pos_pdf_url"] = response.selector.xpath(
-            "//a[contains(text(),'pdf')]/@href"
-        ).extract_first()
-        response.meta["pos_pdf_url"] = urljoin(self.pos_base_url, response.meta["pos_pdf_url"])
         response.meta["pos_url"] = response.url
-        return self.build_item(response)
-
-    def build_item(self, response):
-        """Parse an PoS XML exported file into a HEP record."""
-        text = response.meta["record"]
-        node = Selector(text=text, type="xml")
-        node.remove_namespaces()
-        record = HEPLoader(item=HEPRecord(), selector=node)
-        record.add_xpath('title', '//metadata/pex-dc/title/text()')
-        record.add_xpath('source', '//metadata/pex-dc/publisher/text()')
+        response.meta["conference_paper_pdf_url"] = self._get_conference_paper_pdf_url(
+            response=response,
+        )
 
-        record.add_value('external_system_numbers', self._get_ext_systems_number(node))
+        # # Yield request for Conference page
+        # proceedings_identifier = response.selector.xpath("//a[contains(@href,'?confid')]/@href").extract_first()
+        # proceedings_identifier = proceedings_identifier.split('=')[1]
+        # pos_url = "{0}{1}".format(self.pos_proceedings_url, proceedings_identifier)
+        # yield Request(pos_url, callback=self.scrape_proceedings)
 
-        license = get_licenses(
-            license_text=node.xpath(
-                ".//metadata/pex-dc/rights/text()"
-            ).extract_first(),
-        )
-        record.add_value('license', license)
+        return self.build_conference_paper_item(response)
 
-        date, year = self._get_date(node)
-        if date:
-            record.add_value('date_published', date)
-        if year:
-            record.add_value('journal_year', int(year))
+    # def scrape_proceedings(self, response):
+    #     # create proceedings record
+    #     import pytest
+    #     pytest.set_trace()
 
-        identifier = node.xpath(".//metadata/pex-dc/identifier/text()").extract_first()
-        record.add_value('urls', response.meta['pos_url'])
-        if response.meta['pos_pdf_url']:
-            record.add_value('additional_files', {'type': "Fulltext", "url": response.meta['pos_pdf_url']})
-        if identifier:
-            pbn = re.split('[()]', identifier)
-            if len(pbn) == 3:
-                conf_acronym = pbn[1]
-                article_id = pbn[2]
-                record.add_value('journal_title', pbn[0])
-                record.add_value('journal_volume', conf_acronym)
-                record.add_value('journal_artid', article_id)
-            else:
-                record.add_value('pubinfo_freetext', identifier)
+    def build_conference_paper_item(self, response):
+        """Parse an PoS XML exported file into a HEP record."""
+        meta = response.meta
+        xml_record = meta.get('record')
+        node = Selector(
+            text=xml_record,
+            type="xml"
+        )
+        node.remove_namespaces()
+        record = HEPLoader(
+            item=HEPRecord(),
+            selector=node
+        )
 
-        language = node.xpath(".//metadata/pex-dc/language/text()").extract_first()
-        if language:
-            record.add_value('language', language)
+        license_text = node.xpath('.//metadata/pex-dc/rights/text()').extract_first()
+        record.add_value('license', get_licenses(license_text=license_text))
 
-        authors = self._get_authors(node)
-        if authors:
-            record.add_value('authors', authors)
+        date, year = self._get_date(node=node)
+        record.add_value('date_published', date)
+        record.add_value('journal_year', year)
 
-        extra_data = self._get_extra_data(node)
-        if extra_data:
-            record.add_value('extra_data', extra_data)
+        identifier = node.xpath(".//metadata/pex-dc/identifier/text()").extract_first()
+        record.add_value('journal_title', self._get_journal_title(identifier=identifier))
+        record.add_value('journal_volume', self._get_journal_volume(identifier=identifier))
+        record.add_value('journal_artid', self._get_journal_artid(identifier=identifier))
 
-        record.add_value('collections', ['HEP', 'ConferencePaper'])
+        record.add_xpath('title', '//metadata/pex-dc/title/text()')
+        record.add_xpath('source', '//metadata/pex-dc/publisher/text()')
+        record.add_value('external_system_numbers', self._get_ext_systems_number(node=node))
+        record.add_value('language', self._get_language(node=node))
+        record.add_value('authors', self._get_authors(node=node))
+        record.add_value('collections', ['conferencepaper'])
+        record.add_value('urls', meta.get('pos_url'))
 
         parsed_item = ParsedItem(
             record=record.load_item(),
@@ -142,50 +129,68 @@ def build_item(self, response):
 
         return parsed_item
 
-    def _get_ext_systems_number(self, node):
+    def _get_conference_paper_pdf_url(self, response):
+        conference_paper_pdf_url = response.selector.xpath(
+            "//a[contains(text(),'pdf')]/@href",
+        ).extract_first()
+
+        return urljoin(
+            self.conference_paper_url,
+            conference_paper_pdf_url,
+        )
+
+    @staticmethod
+    def _get_language(node):
+        language = node.xpath(".//metadata/pex-dc/language/text()").extract_first()
+        return language if language != 'en' else None
+
+    @staticmethod
+    def _get_journal_title(identifier):
+        return re.split('[()]', identifier)[0]
+
+    @staticmethod
+    def _get_journal_volume(identifier):
+        return re.split('[()]', identifier)[1]
+
+    @staticmethod
+    def _get_journal_artid(identifier):
+        return re.split('[()]', identifier)[2]
+
+    @staticmethod
+    def _get_ext_systems_number(node):
         return [
-            {
-                'institute': 'PoS',
-                'value': node.xpath('.//metadata/pex-dc/identifier/text()').extract_first()
-            },
             {
                 'institute': 'PoS',
                 'value': node.xpath('.//identifier/text()').extract_first()
             },
         ]
 
-    def _get_date(self, node):
-        """Get article date."""
-        date = ''
-        year = ''
+    @staticmethod
+    def _get_date(node):
         full_date = node.xpath(".//metadata/pex-dc/date/text()").extract_first()
         date = create_valid_date(full_date)
-        if date:
-            year = date[0:4]
+        year = int(date[0:4])
+
         return date, year
 
-    def _get_authors(self, node):
+    @staticmethod
+    def _get_authors(node):  # To be refactored
         """Get article authors."""
-        author_selectors = node.xpath('.//metadata/pex-dc/creator')
         authors = []
-        for selector in author_selectors:
+        creators = node.xpath('.//metadata/pex-dc/creator')
+        for creator in creators:
             auth_dict = {}
-            author = Selector(text=selector.extract())
-            auth_dict['raw_name'] = \
-                get_first(author.xpath('.//name//text()').extract(), default='')
+            author = Selector(text=creator.extract())
+            auth_dict['raw_name'] = get_first(
+                author.xpath('.//name//text()').extract(),
+                default='',
+            )
             for affiliation in author.xpath('.//affiliation//text()').extract():
                 if 'affiliations' in auth_dict:
                     auth_dict['affiliations'].append({'value': affiliation})
+                    # Todo probably to remove
                 else:
                     auth_dict['affiliations'] = [{'value': affiliation}, ]
             if auth_dict:
                 authors.append(auth_dict)
         return authors
-
-    def _get_extra_data(self, node):
-        """Get info to help selection - not for INSPIRE record"""
-        extra_data = {}
-
-        section = node.xpath(".//metadata/pex-dc/description/text()").extract_first()
-        extra_data['section'] = section.split(';', 1)[-1].strip()
-        return extra_data
diff --git a/tests/unit/responses/pos/sample_proceedings_page.html b/tests/unit/responses/pos/sample_proceedings_page.html
new file mode 100644
index 00000000..669e77b4
--- /dev/null
+++ b/tests/unit/responses/pos/sample_proceedings_page.html
@@ -0,0 +1,134 @@
+<!DOCTYPE html>
+<html>
+    <head>
+        <title>31st International Symposium on Lattice Field Theory LATTICE 2013</title>
+        <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
+        <link rel="stylesheet" type="text/css" href="/css/style.css?v=3" />
+    </head>
+    <body>
+        <img src="/images/headInternal.gif" width="760" height="80" border="0" usemap="#headmap" alt="Main Image"/>
+        <map name="headmap" id="headmap">
+            <area shape="rect" coords="9,9,266,69" href="/" target="_top"/>
+        </map>
+        <h1>31st International Symposium on Lattice Field Theory LATTICE 2013</h1>
+        <script type="text/javascript" src="/js/lib.js"></script>
+        <script type="text/x-mathjax-config">
+            MathJax.Hub.Config({
+              tex2jax: {inlineMath: [['$','$'], ['\\(','\\)']]}
+            });
+        </script>
+        <script type="text/javascript"
+                src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.1/MathJax.js?config=TeX-AMS-MML_HTMLorMML">
+        </script>
+
+        <DIV class=conference_code>LATTICE 2013  - (other <a href="/cgi-bin/reader/family.cgi?code=lattice">lattice</a> conferences)</DIV>
+        <DIV class=conference_date>29 July – 3 August, 2013 <BR/>Mainz, Germany </DIV>
+
+        <div id="abstract">
+            <p>
+                The annual lattice symposium brings together a global community of researchers
+                from theoretical particle physics and beyond, who employ numerical and
+                computational methods to study the properties of strongly interacting physical
+                systems, above all Quantum Chromodynamics (QCD), the theory describing the
+                interactions of quarks and gluons. Topics include studies of the spectrum and
+                structure of hadrons, lattice studies of matter under extreme conditions,
+                hadronic contributions to weak decay amplitudes, as well as recent
+                developments in simulation algorithms and computer hardware. The 2013
+                conference in Mainz was attended by over 500 participants from all over the
+                globe, making it the biggest in this series so far.
+            </p>
+            <p>
+                This proceedings volume is dedicated to the memory of Nobel Laureate Kenneth
+                G. Wilson (June 8, 1936 - June 15, 2013).
+            </p>
+        </div>
+        <div id="icon"><img src="/archive/images/LATTICE 2013.png" alt="conference main image"/></div>
+        <div id="proceedings">
+            <!-- TOC -->
+            <table>
+                <tr><th>Sessions</th></tr>
+                <tr><td><a href="#session-1727">Preface</a></td></tr>
+                <tr><td><a href="#session-1858">Plenary sessions</a></td></tr>
+                <tr><td><a href="#session-1859">Algorithms and Machines</a></td></tr>
+                <tr><td><a href="#session-1860">Applications beyond QCD</a></td></tr>
+                <tr><td><a href="#session-1861">Physics beyond the Standard Model</a></td></tr>
+                <tr><td><a href="#session-1862">Chiral Symmetry</a></td></tr>
+                <tr><td><a href="#session-1863">Non-zero Temperature and Density</a></td></tr>
+                <tr><td><a href="#session-1864">Hadron Spectroscopy and Interactions</a></td></tr>
+                <tr><td><a href="#session-1865">Hadron Structure</a></td></tr>
+                <tr><td><a href="#session-1866">Standard Model Parameters and Renormalization</a></td></tr>
+                <tr><td><a href="#session-1867">Theoretical Developments</a></td></tr>
+                <tr><td><a href="#session-1868">Vacuum Structure and Confinement</a></td></tr>
+                <tr><td><a href="#session-1869">Weak Decays and Matrix Elements</a></td></tr>
+                <tr><td><a href="#session-1870">Special Session: Coding Efforts</a></td></tr>
+                <tr><td><a href="#session-1871">Posters</a></td></tr>
+            </table>
+            <!-- /TOC -->
+            <table>
+                <thead></thead>
+                <tbody>
+                    <tr class="title" id="session-1727">
+                        <td>Preface</td>
+                    </tr>
+                    <tr>
+                        <td><span class="contrib_title">Foreword</span><br class="contrib_newline"/>
+                            <span class="contrib_code"><a href="contribution.cgi?id=187/503" class="gray-link">PoS(LATTICE 2013)503</a></span>
+                            <span class="contrib_file"><a class="files" href="/archive/conferences/187/503/LATTICE 2013_503.pdf">pdf</a> </span>
+                            <span class="contrib_authors">H. Wittig</span>
+                        </td>
+                    </tr>
+                    <tr>
+                        <td><span class="contrib_title">Ken Wilson Obituary</span><br class="contrib_newline"/>
+                            <span class="contrib_code"><a href="contribution.cgi?id=187/504" class="gray-link">PoS(LATTICE 2013)504</a></span>
+                            <span class="contrib_file"><a class="files" href="/archive/conferences/187/504/LATTICE 2013_504.pdf">pdf</a> </span>
+                            <span class="contrib_authors">A. Kronfeld</span>
+                        </td>
+                    </tr>
+                    <tr class="title" id="session-1858">
+                        <td>Plenary sessions</td>
+                    </tr>
+                    <tr>
+                        <td><span class="contrib_title">Heavy Flavour Physics Review</span><br class="contrib_newline"/>
+                            <span class="contrib_code"><a href="contribution.cgi?id=187/001" class="gray-link">PoS(LATTICE 2013)001</a></span>
+                            <span class="contrib_file"><a class="files" href="/archive/conferences/187/001/LATTICE 2013_001.pdf">pdf</a> </span>
+                            <span class="contrib_authors">A. El-Khadra</span>
+                        </td>
+                    </tr>
+
+                    <!--- Removed extra <tr> --->
+
+                    <tr>
+                        <td><span class="contrib_title">Charmonium, $D_s$ and $D_s^*$ from overlap fermion on domain wall fermion configurations</span><br class="contrib_newline"/>
+                            <span class="contrib_code"><a href="contribution.cgi?id=187/500" class="gray-link">PoS(LATTICE 2013)500</a></span>
+                            <span class="contrib_file"><a class="files" href="/archive/conferences/187/500/LATTICE 2013_500.pdf">pdf</a> </span>
+                            <span class="contrib_authors">Y.b. Yang, Y. Chen, A. Alexandru, S.J. Dong, T. Draper, M. Gong, F. Lee, A. Li, K.F. Liu, Z. Liu, M. Lujan and N. Mathur</span>
+                        </td>
+                    </tr>
+                </tbody>
+            </table>
+        </div>
+
+        <!-- footer -->
+        <div id="footer">
+            <p>Communicate with the <a href="mailto:%70%6F%73%2D%65%6F%40%70%6F%73%2E%73%69%73%73%61%2E%69%74">PoS Editorial Office</a>
+                | <a href="/cgi-bin/reader/info.cgi?p=cookies" title="Cookie policy">Cookie policy</a>
+            </p>
+        </div>
+        <!-- /footer -->
+        <!-- Piwik -->
+        <script type="text/javascript">
+            var _paq = _paq || [];
+            _paq.push(['trackPageView']);
+            _paq.push(['enableLinkTracking']);
+            (function() {
+                var u="//stats.sissa.it/analytics/";
+                _paq.push(['setTrackerUrl', u+'piwik.php']);
+                _paq.push(['setSiteId', 9]);
+                var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0];
+                g.type='text/javascript'; g.async=true; g.defer=true; g.src=u+'piwik.js'; s.parentNode.insertBefore(g,s);
+            })();
+        </script>
+        <noscript><p><img src="//stats.sissa.it/analytics/piwik.php?idsite=9" style="border:0;" alt="" /></p></noscript>
+        <!-- /Piwik -->
+    </body>
+</html>
diff --git a/tests/unit/test_pos.py b/tests/unit/test_pos.py
index bea29b34..9d583471 100644
--- a/tests/unit/test_pos.py
+++ b/tests/unit/test_pos.py
@@ -22,6 +22,12 @@
 from hepcrawl.testlib.fixtures import fake_response_from_file
 
 
+def override_generated_fields(record):
+    record['acquisition_source']['datetime'] = '2017-08-10T16:03:59.091110'
+
+    return record
+
+
 @pytest.fixture
 def scrape_pos_page_body():
     return pkg_resources.resource_string(
@@ -35,8 +41,13 @@ def scrape_pos_page_body():
 
 
 @pytest.fixture
-def record(scrape_pos_page_body):
+def generated_record(scrape_pos_page_body):
     """Return results generator from the PoS spider."""
+    # environmental variables needed for the pipelines payload
+    os.environ['SCRAPY_JOB'] = 'scrapy_job'
+    os.environ['SCRAPY_FEED_URI'] = 'scrapy_feed_uri'
+    os.environ['SCRAPY_LOG_FILE'] = 'scrapy_log_file'
+
     crawler = Crawler(spidercls=pos_spider.POSSpider)
     spider = pos_spider.POSSpider.from_crawler(crawler)
     request = spider.parse(
@@ -49,6 +60,7 @@ def record(scrape_pos_page_body):
         **{'encoding': 'utf-8'}
     )
     assert response
+
     pipeline = InspireCeleryPushPipeline()
     pipeline.open_spider(spider)
     parsed_item = request.callback(response)
@@ -58,42 +70,42 @@ def record(scrape_pos_page_body):
     return parsed_record
 
 
-def test_titles(record):
+def test_titles(generated_record):
     """Test extracting title."""
     expected_titles = [
         {
-            'source': 'PoS',
+            'source': 'Sissa Medialab',
             'title': 'Heavy Flavour Physics Review',
         }
     ]
 
-    assert 'titles' in record
-    assert record['titles'] == expected_titles
+    assert 'titles' in generated_record
+    assert generated_record['titles'] == expected_titles
 
 
-def test_license(record):
+def test_license(generated_record):
     """Test extracting license information."""
     expected_license = [{
         'license': 'CC-BY-NC-SA-3.0',
         'url': 'https://creativecommons.org/licenses/by-nc-sa/3.0',
     }]
-    assert record['license'] == expected_license
+    assert generated_record['license'] == expected_license
 
 
-def test_collections(record):
+def test_collections(generated_record):
     """Test extracting collections."""
     expected_document_type = ['conference paper']
 
-    assert record.get('citeable')
-    assert record.get('document_type') == expected_document_type
+    assert generated_record.get('citeable')
+    assert generated_record.get('document_type') == expected_document_type
 
 
-def test_language(record):
+def test_language(generated_record):
     """Test extracting language."""
-    assert 'language' not in record
+    assert 'language' not in generated_record
 
 
-def test_publication_info(record):
+def test_publication_info(generated_record):
     """Test extracting dois."""
     expected_pub_info = [{
         'artid': '001',
@@ -102,13 +114,13 @@ def test_publication_info(record):
         'year': 2014,
     }]
 
-    assert 'publication_info' in record
+    assert 'publication_info' in generated_record
 
-    pub_info = record['publication_info']
+    pub_info = generated_record['publication_info']
     assert pub_info == expected_pub_info
 
 
-def test_authors(record):
+def test_authors(generated_record):
     """Test authors."""
     expected_authors = [
         {
@@ -121,12 +133,72 @@ def test_authors(record):
         }
     ]
 
-    assert 'authors' in record
+    assert 'authors' in generated_record
 
-    result_authors = record['authors']
+    result_authors = generated_record['authors']
 
     assert len(result_authors) == len(expected_authors)
 
     # here we are making sure order is kept
     for author, expected_author in zip(result_authors, expected_authors):
         assert author == expected_author
+
+
+def test_pipeline_record(generated_record):
+    expected = {
+        'acquisition_source': {
+            'datetime': '2017-08-10T16:03:59.091110',
+            'method': 'hepcrawl',
+            'source': 'PoS',
+            'submission_number': 'scrapy_job'
+        },
+        'authors': [
+            {
+                'affiliations': [
+                    {
+                        'value': u'INFN and Universit\xe0 di Firenze'
+                    }
+                ],
+                'full_name': u'El-Khadra, Aida'
+            },
+            {
+                'affiliations': [
+                    {
+                        'value': u'U of Pecs'
+                    }
+                ],
+                'full_name': u'MacDonald, M.T.'
+            }
+        ],
+        'citeable': True,
+        'document_type': [
+            'conference paper'
+        ],
+        'imprints': [
+            {
+                'date': '2014-03-19'
+            }
+        ],
+        'license': [
+            {
+                'license': 'CC-BY-NC-SA-3.0',
+                'url': 'https://creativecommons.org/licenses/by-nc-sa/3.0'
+            }
+        ],
+        'publication_info': [
+            {
+                'artid': u'001',
+                'journal_title': u'PoS',
+                'journal_volume': u'LATTICE 2013',
+                'year': 2014
+            }
+        ],
+        'titles': [
+            {
+                'source': u'Sissa Medialab',
+                'title': u'Heavy Flavour Physics Review'
+            }
+        ]
+    }
+
+    assert override_generated_fields(generated_record) == expected