diff --git a/docker-compose.test.yml b/docker-compose.test.yml
index 9c80c88d..7ffe0122 100644
--- a/docker-compose.test.yml
+++ b/docker-compose.test.yml
@@ -42,7 +42,7 @@ services:
 
   unit:
     <<: *service_base
-    command: bash -c "py.test tests/unit -vv && make -C docs html && python setup.py sdist && ls dist/*"
+    command: bash -c "py.test tests/unit -vv && make -C docs clean && make -C docs html && python setup.py sdist && ls dist/*"
     links: []
 
   celery:
diff --git a/hepcrawl/crawler2hep.py b/hepcrawl/crawler2hep.py
index 72573847..d6898022 100644
--- a/hepcrawl/crawler2hep.py
+++ b/hepcrawl/crawler2hep.py
@@ -185,6 +185,7 @@ def _filter_affiliation(affiliations):
         journal_title=_pub_info.get('journal_title'),
         journal_volume=_pub_info.get('journal_volume'),
         pubinfo_freetext=_pub_info.get('pubinfo_freetext'),
+        material=_pub_info.get('pubinfo_material'),
     )
 
     for report_number in crawler_record.get('report_numbers', []):
diff --git a/hepcrawl/items.py b/hepcrawl/items.py
index a175ec0b..dab67dda 100644
--- a/hepcrawl/items.py
+++ b/hepcrawl/items.py
@@ -178,6 +178,7 @@ class HEPRecord(scrapy.Item):
 
     Example:
         ::
+
             [
                 {
                     'license': license_str,
@@ -209,6 +210,9 @@ class HEPRecord(scrapy.Item):
     pubinfo_freetext = scrapy.Field()
     """Raw journal reference string."""
 
+    pubinfo_material = scrapy.Field()
+    """Material of publication information."""
+
     publication_info = scrapy.Field()
     """Structured publication information."""
 
diff --git a/hepcrawl/loaders.py b/hepcrawl/loaders.py
index 9ab22450..96fa08ad 100644
--- a/hepcrawl/loaders.py
+++ b/hepcrawl/loaders.py
@@ -122,6 +122,7 @@ class HEPLoader(ItemLoader):
     journal_issue_out = TakeFirst()
     journal_doctype_out = TakeFirst()
     pubinfo_freetext_out = TakeFirst()
+    pubinfo_material_out = TakeFirst()
 
     preprint_date_in = MapCompose(
         parse_date,
diff --git a/hepcrawl/pipelines.py b/hepcrawl/pipelines.py
index fb19a745..62ba867c 100644
--- a/hepcrawl/pipelines.py
+++ b/hepcrawl/pipelines.py
@@ -93,6 +93,7 @@ def process_item(self, item, spider):
                     'page_end': item.pop('journal_lpage', ''),
                     'note': item.pop('journal_doctype', ''),
                     'pubinfo_freetext': item.pop('pubinfo_freetext', ''),
+                    'pubinfo_material': item.pop('pubinfo_material', ''),
                 }]
                 if item.get('journal_year'):
                     item['publication_info'][0]['year'] = int(
@@ -110,6 +111,7 @@ def process_item(self, item, spider):
             'journal_doctype',
             'journal_artid',
             'pubinfo_freetext',
+            'pubinfo_material',
         ])
 
         item = crawler2hep(dict(item))
diff --git a/hepcrawl/spiders/arxiv_spider.py b/hepcrawl/spiders/arxiv_spider.py
index dff7c2d0..d82c8318 100644
--- a/hepcrawl/spiders/arxiv_spider.py
+++ b/hepcrawl/spiders/arxiv_spider.py
@@ -64,7 +64,12 @@ def parse_node(self, response, node):
             dois_values=self._get_dois(node=node),
             material='publication',
         )
-        record.add_xpath('pubinfo_freetext', './/journal-ref//text()')
+
+        pubinfo_freetext = node.xpath('.//journal-ref//text()').extract()
+        if pubinfo_freetext:
+            record.add_value('pubinfo_freetext', pubinfo_freetext)
+            record.add_value('pubinfo_material', 'publication')
+
         record.add_value('source', 'arXiv')
 
         authors, collabs = self._get_authors_or_collaboration(node)
diff --git a/tests/unit/responses/arxiv/sample_arxiv_record10_parsed.json b/tests/unit/responses/arxiv/sample_arxiv_record10_parsed.json
index 28b94a15..e591c862 100644
--- a/tests/unit/responses/arxiv/sample_arxiv_record10_parsed.json
+++ b/tests/unit/responses/arxiv/sample_arxiv_record10_parsed.json
@@ -5,7 +5,7 @@
     "results_data": [
         {
             "preprint_date": "2016-06-14", 
-            "citeable": true, 
+            "citeable": true,
             "license": [
                 {
                     "url": "http://arxiv.org/licenses/nonexclusive-distrib/1.0/",
diff --git a/tests/unit/test_arxiv_single.py b/tests/unit/test_arxiv_single.py
index 1954436a..a6ed66d6 100644
--- a/tests/unit/test_arxiv_single.py
+++ b/tests/unit/test_arxiv_single.py
@@ -154,6 +154,7 @@ def test_publication_info(results):
     #TODO: check a more complete example
     expected_pub_info = [
         {
+            'material': 'publication',
             'pubinfo_freetext': 'Phys.Rev. D93 (2015) 016005',
         }
     ]