diff --git a/collatex-pythonport/collatex/__init__.py b/collatex-pythonport/collatex/__init__.py
index 72cd2d045..a277707f2 100755
--- a/collatex-pythonport/collatex/__init__.py
+++ b/collatex-pythonport/collatex/__init__.py
@@ -7,8 +7,7 @@
 
 from collatex.core_functions import Collation
 from collatex.core_functions import collate
-from collatex.core_functions import collate_pretokenized_json
 
-__all__ = ["Collation", "collate", "collate_pretokenized_json"]
+__all__ = ["Collation", "collate"]
 
 
diff --git a/collatex-pythonport/collatex/collatex_suffix.py b/collatex-pythonport/collatex/collatex_suffix.py
index 4cd5ab4ee..713f7f3ac 100644
--- a/collatex-pythonport/collatex/collatex_suffix.py
+++ b/collatex-pythonport/collatex/collatex_suffix.py
@@ -188,7 +188,7 @@ def __init__(self, occurrences, tokens):
     def debug(self):
         result = []
         for occurrence in self.occurrences:
-            result.append(' '.join(self.tokens[occurrence.token_range.slices().next()]))
+            result.append(' '.join(self.tokens[next(occurrence.token_range.slices())]))
         return result
 
 
diff --git a/collatex-pythonport/collatex/core_classes.py b/collatex-pythonport/collatex/core_classes.py
index 6d62f2a06..9f1d21d92 100644
--- a/collatex-pythonport/collatex/core_classes.py
+++ b/collatex-pythonport/collatex/core_classes.py
@@ -13,7 +13,7 @@
 import re
 from prettytable import PrettyTable
 from textwrap import fill
-from collatex.exceptions import TokenError
+from collatex.exceptions import TokenError, UnsupportedError
 
 class Row(object):
     
@@ -161,20 +161,20 @@ def __repr__(self):
 class Witness(object):
     
     def __init__(self, witnessdata):
+        if 'id' not in witnessdata:
+            raise UnsupportedError("No defined id in witnessdata")
         self.sigil = witnessdata['id']
         self._tokens = []
         if 'content' in witnessdata:
-            self.content = witnessdata['content']
-            # print("Witness "+sigil+" TOKENIZER IS CALLED!")
             tokenizer = WordPunctuationTokenizer()
-            tokens_as_strings = tokenizer.tokenize(self.content)
+            tokens_as_strings = tokenizer.tokenize(witnessdata['content'])
             for token_string in tokens_as_strings:
                 self._tokens.append(Token({'t':token_string}))
         elif 'tokens' in witnessdata:
             for tk in witnessdata['tokens']:
                 self._tokens.append(Token(tk))
-            # TODO no idea what this content string is needed for.
-            self.content = ' '.join([x.token_string for x in self._tokens])
+        else:
+            raise UnsupportedError("No defined content/tokens in witness "+self.sigil)
             
     def tokens(self):
         return self._tokens
diff --git a/collatex-pythonport/collatex/core_functions.py b/collatex-pythonport/collatex/core_functions.py
index c2d2d4709..318e0437f 100644
--- a/collatex-pythonport/collatex/core_functions.py
+++ b/collatex-pythonport/collatex/core_functions.py
@@ -27,10 +27,21 @@ def collate(collation, output="table", layout="horizontal", segmentation=True, n
     # check which output format is requested: graph or table
     if output=="graph": 
         return graph
+    
     # create alignment table
     table = AlignmentTable(collation, graph, layout)
+    if collation.pretokenized and not segmentation:
+        token_list = [[tk.token_data for tk in witness.tokens()] for witness in collation.witnesses]
+        # only with segmentation=False
+        # there could be a different comportment of get_tokenized_table if semgentation=True
+        table = get_tokenized_at(table, token_list, segmentation=segmentation, layout=layout)
+        # for display purpose, table and html output will return only token 't' (string) and not the full token_data (dict)
+        if output=="table" or output=="html":
+            for row in table.rows:
+                row.cells = [cell["t"] for cell in row.cells]
+    
     if output == "json":
-        return export_alignment_table_as_json(table)
+        return export_alignment_table_as_json(table, layout=layout)
     if output == "html":
         return display_alignment_table_as_HTML(table)
     if output == "table":
@@ -38,48 +49,37 @@ def collate(collation, output="table", layout="horizontal", segmentation=True, n
     else:
         raise Exception("Unknown output type: "+output)
     
-
-
-#TODO: this only works with a table output at the moment
-#TODO: store the tokens on the graph instead
-def collate_pretokenized_json(json, output='table', layout='horizontal', **kwargs):
-    # Takes more or less the same arguments as collate() above, but with some restrictions.
-    # Only output types 'json' and 'table' are supported.
-    if output not in ['json', 'table']:
-        raise UnsupportedError("Output type" + kwargs['output'] + "not supported for pretokenized collation")
-    if 'segmentation' in kwargs and kwargs['segmentation']:
-        raise UnsupportedError("Segmented output not supported for pretokenized collation")
-    kwargs['segmentation'] = False
-
-    # For each witness given, make a 'shadow' witness based on the normalization tokens
-    # that will actually be collated.
-    tokenized_witnesses = []
-    collation = Collation()
-    for witness in json["witnesses"]:
-        collation.add_witness(witness)
-        tokenized_witnesses.append(witness["tokens"])
-    at = collate(collation, output="table", **kwargs)
-    tokenized_at = AlignmentTable(collation, layout=layout)
-    for row, tokenized_witness in zip(at.rows, tokenized_witnesses):
-        new_row = Row(row.header)
+def get_tokenized_at(table, token_list, segmentation=False, layout="horizontal"):
+    tokenized_at = AlignmentTable(Collation(), layout=layout)
+    for witness_row, witness_tokens in zip(table.rows, token_list):
+        new_row = Row(witness_row.header)
         tokenized_at.rows.append(new_row)
-        token_counter = 0
-        for cell in row.cells:
-            if cell != "-":
-                new_row.cells.append(tokenized_witness[token_counter])
-                token_counter+=1
-            else:
-                #TODO: should probably be null or None instead, but that would break the rendering at the moment 
-                new_row.cells.append({"t":"-"})
-    if output=="json":
-        return export_alignment_table_as_json(tokenized_at)
-    if output=="table":
-        # transform JSON objects to "t" form.
-        for row in tokenized_at.rows:
-            row.cells = [cell["t"]  for cell in row.cells]
-        return tokenized_at
-
-def export_alignment_table_as_json(table, indent=None, status=False):
+        counter = 0
+        for cell in witness_row.cells:
+            if cell == "-":
+                # TODO: should probably be null or None instead, but that would break the rendering at the moment (line 41)
+                new_row.cells.append({"t" : "-"})
+            # if segmentation=False    
+            else: 
+                new_row.cells.append(witness_tokens[counter])
+                counter+=1
+            # else if segmentation=True
+                ##token_list must be a list of Token instead of list of dict (update lines 34, 64)
+                ##line 41 will not be happy in case of table/html output
+                #string = witness_tokens[counter].token_string
+                #token_counter = 1
+                #while string != cell :
+                #    if counter+token_counter-1 < len(witness_tokens)-1:
+                #        #add token_string of the next token until it is equivalent to the string in the cell
+                #        #if we are not at the last token
+                #        string += ' '+witness_tokens[counter+token_counter].token_string
+                #        token_counter += 1
+                ##there is one list level too many in the output
+                #new_row.cells.append([tk.token_data for tk in witness_tokens[counter:counter+token_counter]])
+                #counter += token_counter.
+    return tokenized_at
+
+def export_alignment_table_as_json(table, indent=None, status=False, layout="horizontal"):
     json_output = {}
     json_output["table"]=[]
     sigli = []
@@ -92,6 +92,9 @@ def export_alignment_table_as_json(table, indent=None, status=False):
         for column in table.columns:
             variant_status.append(column.variant)
         json_output["status"]=variant_status
+    if layout=="vertical":
+        new_table = [[row[i] for row in json_output["table"]] for i in range(len(row.cells))]
+        json_output["table"] = new_table
     return json.dumps(json_output, sort_keys=True, indent=indent)
 
 '''
@@ -101,29 +104,40 @@ class Collation(object):
 
     @classmethod
     def create_from_dict(cls, data, limit=None):
+        if "witnesses" not in data:
+            raise UnsupportedError("Json input not valid")
         witnesses = data["witnesses"]
         collation = Collation()
         for witness in witnesses[:limit]:
             # generate collation object from json_data
             collation.add_witness(witness)
+            # determine if data is pretokenized
+            if 'tokens' in witness:
+                collation.pretokenized = True
         return collation
 
+    # json input can be a string or a file
+    @classmethod
+    def create_from_json_string(cls, json_string):
+        data = json.loads(json_string)
+        collation = cls.create_from_dict(data)
+        return collation
+    
     @classmethod
-    # json_data can be a string or a file
-    def create_from_json(cls, json_data):
-        data = json.load(json_data)
+    def create_from_json_file(cls, json_path):
+        with open(json_path, 'r') as json_file:
+            data = json.load(json_file)
         collation = cls.create_from_dict(data)
         return collation
 
     def __init__(self):
         self.witnesses = []
+        self.pretokenized = False
         self.counter = 0
         self.witness_ranges = {}
-        self.combined_string = ""
         self.cached_suffix_array = None
+        self.combined_tokens =[]
 
-    # the tokenization process happens multiple times
-    # and by different tokenizers. This should be fixed
     def add_witness(self, witnessdata):
         # clear the suffix array and LCP array cache
         self.cached_suffix_array = None
@@ -134,9 +148,11 @@ def add_witness(self, witnessdata):
         # the extra one is for the marker token
         self.counter += len(witness.tokens()) +2 # $ + number 
         self.witness_ranges[witness.sigil] = witness_range
-        if not self.combined_string == "":
-            self.combined_string += " $"+str(len(self.witnesses)-1)+ " "
-        self.combined_string += witness.content
+        if len(self.witnesses) > 1:
+            self.combined_tokens.append('$')
+            self.combined_tokens.append(str(len(self.witnesses)-1))
+        for tk in witness.tokens():
+            self.combined_tokens.append(tk.token_string)
 
     def add_plain_witness(self, sigil, content):
         return self.add_witness({'id':sigil, 'content':content})
@@ -146,14 +162,11 @@ def get_range_for_witness(self, witness_sigil):
             raise Exception("Witness "+witness_sigil+" is not added to the collation!")
         return self.witness_ranges[witness_sigil]
 
-    def get_combined_string(self):
-        return self.combined_string
-
     def get_sa(self):
         #NOTE: implemented in a lazy manner, since calculation of the Suffix Array and LCP Array takes time
         if not self.cached_suffix_array:
             # Unit byte is done to skip tokenization in third party library
-            self.cached_suffix_array = SuffixArray(self.tokens, unit=UNIT_BYTE)
+            self.cached_suffix_array = SuffixArray(self.combined_tokens, unit=UNIT_BYTE)
         return self.cached_suffix_array
 
     def get_suffix_array(self):
@@ -164,17 +177,7 @@ def get_lcp_array(self):
         sa = self.get_sa()
         return sa._LCP_values
 
-
     def to_extended_suffix_array(self):
-        return ExtendedSuffixArray(self.tokens, self.get_suffix_array(), self.get_lcp_array())
-
-    @property
-    def tokens(self):
-        #print("COLLATION TOKENIZE IS CALLED!")
-        #TODO: complete set of witnesses is retokenized here!
-        tokenizer = WordPunctuationTokenizer()
-        tokens = tokenizer.tokenize(self.get_combined_string())
-        return tokens
-
+        return ExtendedSuffixArray(self.combined_tokens, self.get_suffix_array(), self.get_lcp_array())
 
 
diff --git a/collatex-pythonport/collatex/suffix_based_scorer.py b/collatex-pythonport/collatex/suffix_based_scorer.py
index cfb2713f2..27c8b468e 100644
--- a/collatex-pythonport/collatex/suffix_based_scorer.py
+++ b/collatex-pythonport/collatex/suffix_based_scorer.py
@@ -147,7 +147,7 @@ def _get_block_witness(self, witness):
                 occurrences.append(occurrence) 
         # sort occurrences on position
         sorted_o = sorted(occurrences, key=attrgetter('lower_end'))
-        block_witness = BlockWitness(sorted_o, self.collation.tokens)
+        block_witness = BlockWitness(sorted_o, self.collation.combined_tokens)
         return block_witness
 
     '''
diff --git a/collatex-pythonport/tests/test_collate_outputs.py b/collatex-pythonport/tests/test_collate_outputs.py
new file mode 100644
index 000000000..cb9c8e5dd
--- /dev/null
+++ b/collatex-pythonport/tests/test_collate_outputs.py
@@ -0,0 +1,275 @@
+'''
+Created on March 24, 2015
+
+@author: Elisa Nury
+'''
+
+import unittest
+from collatex.core_functions import *
+from collatex.exceptions import UnsupportedError
+
+class TestCollate(unittest.TestCase):
+    def test_collate_with_invalid_output(self):
+        data = {"witnesses" :
+            [
+            {"id" : "A", "tokens" :
+                [
+                {"t": "A", "id": 1},
+                {"t": "small"},
+                {"t": "black"},
+                {"t": "cat"}
+                ]
+            },
+            {"id" : "B", "tokens" :
+                [
+                {"t": "A"},
+                {"t": "small"},
+                {"t": "white"},
+                {"t": "kitten.", "n": "cat"}
+                ]
+            }
+            ]
+        }
+        c = Collation.create_from_dict(data)
+        with self.assertRaises(Exception):
+            collate(c, output="xyz")
+    
+    def test_collate_with_empty_collation(self):
+        c = Collation()
+        with self.assertRaises(IndexError):
+            collate(c)
+  
+
+class TestTokenizedJsonOutput(unittest.TestCase):
+    def setUp(self):
+        self.data = {"witnesses" :
+            [
+            {"id" : "A", "tokens" :
+                [
+                {"t": "A", "id": 1},
+                {"t": "small"},
+                {"t": "black"},
+                {"t": "cat"}
+                ]
+            },
+            {"id" : "B", "tokens" :
+                [
+                {"t": "A"},
+                {"t": "small"},
+                {"t": "white"},
+                {"t": "kitten.", "n": "cat"}
+                ]
+            }
+            ]
+        }
+        self.c = Collation.create_from_dict(self.data)
+        self.maxDiff = None
+    
+    #--------------------------------------------------
+    #JSON output
+    def test_tokenized_output_json_segmentationFalse_layoutHorizontal(self):
+        expected = '{"table": [[[{"id": 1, "t": "A"}], [{"t": "small"}], [{"t": "black"}], [{"t": "cat"}]], [[{"t": "A"}], [{"t": "small"}], [{"t": "white"}], [{"n": "cat", "t": "kitten."}]]], "witnesses": ["A", "B"]}'
+        output = collate(self.c, output="json", segmentation=False, layout="horizontal")
+        self.assertEqual(output, expected)
+    
+    def test_tokenized_output_json_segmentationFalse_layoutVertical(self):
+        expected = '{"table": [[[{"id": 1, "t": "A"}], [{"t": "A"}]], [[{"t": "small"}], [{"t": "small"}]], [[{"t": "black"}], [{"t": "white"}]], [[{"t": "cat"}], [{"n": "cat", "t": "kitten."}]]], "witnesses": ["A", "B"]}'
+        output = collate(self.c, output="json", segmentation=False, layout="vertical")
+        self.assertEqual(output, expected)
+    
+    def test_tokenized_output_json_segmentationTrue_layoutHorizontal(self):
+        expected = '{"table": [[["A small"], ["black"], ["cat"]], [["A small"], ["white"], ["cat"]]], "witnesses": ["A", "B"]}'
+        output = collate(self.c, output="json", segmentation=True, layout="horizontal")
+        self.assertEqual(output, expected)
+    
+    def test_tokenized_output_json_segmentationTrue_layoutVertical(self):
+        expected = '{"table": [[["A small"], ["A small"]], [["black"], ["white"]], [["cat"], ["cat"]]], "witnesses": ["A", "B"]}'
+        output = collate(self.c, output="json", segmentation=True, layout="vertical")
+        self.assertEqual(output, expected)
+    
+    #--------------------------------------------------
+    #TABLE output
+
+    def test_tokenized_output_table_segmentationFalse_layoutHorizontal(self):
+        expected = """\
++---+---+-------+-------+---------+
+| A | A | small | black | cat     |
+| B | A | small | white | kitten. |
++---+---+-------+-------+---------+"""
+        output = str(collate(self.c, output="table", segmentation=False, layout="horizontal"))
+        self.assertEqual(output, expected)
+    
+    def test_tokenized_output_table_segmentationFalse_layoutVertical(self):
+        expected = '''\
++-------+---------+
+|   A   |    B    |
++-------+---------+
+|   A   |    A    |
++-------+---------+
+| small |  small  |
++-------+---------+
+| black |  white  |
++-------+---------+
+|  cat  | kitten. |
++-------+---------+'''
+        output = str(collate(self.c, output="table", segmentation=False, layout="vertical"))
+        self.assertEqual(output, expected)
+    
+    def test_tokenized_output_table_segmentationTrue_layoutHorizontal(self):
+        expected = """\
++---+---------+-------+-----+
+| A | A small | black | cat |
+| B | A small | white | cat |
++---+---------+-------+-----+"""
+        output = str(collate(self.c, output="table", segmentation=True, layout="horizontal"))
+        self.assertEqual(output, expected)
+    
+    def test_tokenized_output_table_segmentationTrue_layoutVertical(self):
+        expected = '''\
++---------+---------+
+|    A    |    B    |
++---------+---------+
+| A small | A small |
++---------+---------+
+|  black  |  white  |
++---------+---------+
+|   cat   |   cat   |
++---------+---------+'''
+        output = str(collate(self.c, output="table", segmentation=True, layout="vertical"))
+        self.assertEqual(output, expected)
+   
+    #--------------------------------------------------
+    #HTML output
+
+    def test_tokenized_output_html_segmentationFalse_layoutHorizontal(self):
+        expected = '''\
+<table>
+    <tr>
+        <td>A</td>
+        <td>A</td>
+        <td>small</td>
+        <td>black</td>
+        <td>cat</td>
+    </tr>
+    <tr>
+        <td>B</td>
+        <td>A</td>
+        <td>small</td>
+        <td>white</td>
+        <td>kitten.</td>
+    </tr>
+</table>'''
+        output = collate(self.c, output="html", segmentation=False, layout="horizontal")
+        self.assertEqual(output, expected)
+    
+    def test_tokenized_output_html_segmentationFalse_layoutVertical(self):
+        expected = '''\
+<table>
+    <tr>
+        <th>A</th>
+        <th>B</th>
+    </tr>
+    <tr>
+        <td>A</td>
+        <td>A</td>
+    </tr>
+    <tr>
+        <td>small</td>
+        <td>small</td>
+    </tr>
+    <tr>
+        <td>black</td>
+        <td>white</td>
+    </tr>
+    <tr>
+        <td>cat</td>
+        <td>kitten.</td>
+    </tr>
+</table>'''
+        output = collate(self.c, output="html", segmentation=False, layout="vertical")
+        self.assertEqual(output, expected)
+    
+    def test_tokenized_output_html_segmentationTrue_layoutHorizontal(self):
+        expected = '''\
+<table>
+    <tr>
+        <td>A</td>
+        <td>A small</td>
+        <td>black</td>
+        <td>cat</td>
+    </tr>
+    <tr>
+        <td>B</td>
+        <td>A small</td>
+        <td>white</td>
+        <td>cat</td>
+    </tr>
+</table>'''
+        output = collate(self.c, output="html", segmentation=True, layout="horizontal")
+        self.assertEqual(output, expected)
+    
+    def test_tokenized_output_html_segmentationTrue_layoutVertical(self):
+        expected = '''\
+<table>
+    <tr>
+        <th>A</th>
+        <th>B</th>
+    </tr>
+    <tr>
+        <td>A small</td>
+        <td>A small</td>
+    </tr>
+    <tr>
+        <td>black</td>
+        <td>white</td>
+    </tr>
+    <tr>
+        <td>cat</td>
+        <td>cat</td>
+    </tr>
+</table>'''
+        output = collate(self.c, output="html", segmentation=True, layout="vertical")
+        self.assertEqual(output, expected)
+
+    
+    
+    
+#--------------------------------------------------
+#Empty cells output
+
+class TestOutputEmptyCells(unittest.TestCase):
+    def setUp(self):
+        data = {
+      "witnesses" : [
+        {
+          "id" : "A",
+          "tokens" : [
+              { "t" : "A"},
+              { "t" : "black"},
+              { "t" : "cat"}
+          ]
+        },
+        {
+          "id" : "B",
+          "tokens" : [
+              { "t": "A" },
+              { "t": "kitten.", "n": "cat" }
+          ]
+        }
+    ]
+    }
+        self.c = Collation.create_from_dict(data)
+    
+    def test_json_segmentationTrue_output_with_empty_cells(self):
+        expected = '{"table": [[["A"], ["black"], ["cat"]], [["A"], ["-"], ["cat"]]], "witnesses": ["A", "B"]}'
+        output = collate(self.c, output="json")
+        self.assertEqual(output, expected)
+    
+    def test_json_segmentationFalse_output_with_empty_cells(self):
+        expected = '{"table": [[[{"t": "A"}], [{"t": "black"}], [{"t": "cat"}]], [[{"t": "A"}], [{"t": "-"}], [{"n": "cat", "t": "kitten."}]]], "witnesses": ["A", "B"]}'
+        output = collate(self.c, output="json", segmentation=False)
+        self.assertEqual(output, expected)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/collatex-pythonport/tests/test_collatex_block_witnesses.py b/collatex-pythonport/tests/test_collatex_block_witnesses.py
index 7cb412822..ee7cc3790 100644
--- a/collatex-pythonport/tests/test_collatex_block_witnesses.py
+++ b/collatex-pythonport/tests/test_collatex_block_witnesses.py
@@ -29,16 +29,15 @@ def test_combined_string_hermans_case(self):
         collation.add_plain_witness("W1", "a b c d F g h i ! K ! q r s t")
         collation.add_plain_witness("W2", "a b c d F g h i ! q r s t")
         # $ is meant to separate witnesses here
-        self.assertEquals("a b c d F g h i ! K ! q r s t $1 a b c d F g h i ! q r s t", collation.get_combined_string())
+        self.assertEquals("a b c d F g h i ! K ! q r s t $ 1 a b c d F g h i ! q r s t", " ".join(collation.combined_tokens))
     
     # test whether the witness->range mapping works
-    @unit_disabled
     def test_witness_ranges_hermans_case(self):
         collation = Collation()
         collation.add_plain_witness("W1", "a b c d F g h i ! K ! q r s t")
         collation.add_plain_witness("W2", "a b c d F g h i ! q r s t")
         self.assertEquals(RangeSet("0-14"), collation.get_range_for_witness("W1"))
-        self.assertEquals(RangeSet("16-28"), collation.get_range_for_witness("W2"))
+        self.assertEquals(RangeSet("17-29"), collation.get_range_for_witness("W2"))
 
 # TODO: re-enable test!    
     # Note: LCP intervals can overlap
@@ -74,14 +73,13 @@ def test_lcp_child_intervals_hermans_case(self):
         _, child_lcp_intervals = collation.get_lcp_intervals()
         self.assertFalse(child_lcp_intervals)
 
-    @unit_disabled
     def test_non_overlapping_blocks_black_cat(self):
         collation = Collation()
         collation.add_plain_witness("W1", "the black cat")
         collation.add_plain_witness("W2", "the black cat")
         algorithm = Scorer(collation)
         blocks = algorithm._get_non_overlapping_repeating_blocks()
-        block1 = Block(RangeSet("0-2, 4-6"))
+        block1 = Block(RangeSet("0-2, 5-7"))
         self.assertEqual([block1], blocks)
 
     #TODO: Fix number of siblings!
@@ -97,17 +95,15 @@ def test_blocks_failing_transposition_use_case_old_algorithm(self):
         block3 = Block(RangeSet("2, 8"))
         self.assertEqual([block1, block2, block3], blocks)
 
-    @unit_disabled
     def test_non_overlapping_blocks_Hermans(self):
         collation = Collation()
         collation.add_plain_witness("W1", "a b c d F g h i ! K ! q r s t")
         collation.add_plain_witness("W2", "a b c d F g h i ! q r s t")
         algorithm = Scorer(collation)
         blocks = algorithm._get_non_overlapping_repeating_blocks()
-        self.assertIn(Block(RangeSet("0-8, 16-24")), blocks) # a b c d F g h i !
-        self.assertIn(Block(RangeSet("11-14, 25-28")), blocks) # q r s t
+        self.assertIn(Block(RangeSet("0-8, 17-25")), blocks) # a b c d F g h i !
+        self.assertIn(Block(RangeSet("11-14, 26-29")), blocks) # q r s t
 
-    @unit_disabled
     def test_blocks_Hermans_case_three_witnesses(self):
         collation = Collation()
         collation.add_plain_witness("W1", "a b c d F g h i ! K ! q r s t")
@@ -115,24 +111,22 @@ def test_blocks_Hermans_case_three_witnesses(self):
         collation.add_plain_witness("W3", "a b c d E g h i ! q r s t")
         algorithm = Scorer(collation)
         blocks = algorithm._get_non_overlapping_repeating_blocks()
-        self.assertIn(Block(RangeSet("0-3, 16-19, 30-33")), blocks) # a b c d
-        self.assertIn(Block(RangeSet("5-7, 21-23, 35-37")), blocks) # g h i
-        self.assertIn(Block(RangeSet("10-14, 24-28, 38-42")), blocks) # ! q r s t
-        self.assertIn(Block(RangeSet("4, 20")), blocks) # F
+        self.assertIn(Block(RangeSet("0-3, 17-20, 32-35")), blocks) # a b c d
+        self.assertIn(Block(RangeSet("5-7, 22-24, 37-39")), blocks) # g h i 
+        self.assertIn(Block(RangeSet("10-14, 25-29, 40-44")), blocks) # ! q r s t
+        self.assertIn(Block(RangeSet("4, 21")), blocks) # F
         
 
     # In the new approach nothing should be split
-    @unit_disabled
     def test_blocks_splitting_token_case(self):
         collation = Collation()
         collation.add_plain_witness("W1", "a c b c")
         collation.add_plain_witness("W2", "a c b")
         algorithm = Scorer(collation)
         blocks = algorithm._get_non_overlapping_repeating_blocks()
-        block1 = Block(RangeSet("0-2, 5-7")) # a c b
+        block1 = Block(RangeSet("0-2, 6-8")) # a c b
         self.assertIn(block1, blocks)
 
-    @unit_disabled
     def test_block_witnesses_Hermans_case_two_witnesses(self):
         collation = Collation()
         collation.add_plain_witness("W1", "a b c d F g h i ! K ! q r s t")
@@ -143,7 +137,6 @@ def test_block_witnesses_Hermans_case_two_witnesses(self):
         block_witness = algorithm._get_block_witness(collation.witnesses[1])
         self.assertEquals(["a b c d F g h i !", "q r s t"], block_witness.debug())
 
-    @unit_disabled
     def test_block_witnesses_Hermans_case(self):
         collation = Collation()
         collation.add_plain_witness("W1", "a b c d F g h i ! K ! q r s t")
@@ -241,4 +234,4 @@ def test_filter_potential_blocks(self):
 
 if __name__ == "__main__":
     #import sys;sys.argv = ['', 'Test.testName']
-    unittest.main()
\ No newline at end of file
+    unittest.main()
diff --git a/collatex-pythonport/tests/test_collation_class.py b/collatex-pythonport/tests/test_collation_class.py
new file mode 100644
index 000000000..fd3d2ac7a
--- /dev/null
+++ b/collatex-pythonport/tests/test_collation_class.py
@@ -0,0 +1,85 @@
+'''
+Created on March 24, 2015
+
+@author: Elisa Nury
+'''
+
+import unittest
+from collatex.core_functions import *
+from collatex.exceptions import UnsupportedError
+from testfixtures import TempDirectory
+import os
+import json
+
+class TestCollationMethods(unittest.TestCase):
+    
+    def test_collation_method_create_from_json_file(self):
+        with TempDirectory() as d:
+            #create a temporary file in a temporary directory
+            d.write('testfile.json', b'{"witnesses" : [{"id" : "A", "content" : "The fox."}, {"id" : "B", "content": "The dog"}]}')
+            c = Collation.create_from_json_file(os.path.join(d.path, 'testfile.json'))
+            self.assertEqual(len(c.witnesses), 2)
+    
+    def test_collation_create_from_dict(self):
+        data = {"witnesses" : [{"id" : "A", "content" : "The fox."}, {"id" : "B", "content": "The dog"}]}
+        c = Collation.create_from_dict(data)
+        self.assertEqual(len(c.witnesses), 2)
+  
+
+class TestCollationFunctions(unittest.TestCase):
+    def setUp(self):
+        data = {
+            'witnesses' : [
+                {
+                    'id' : 'A',
+                    'content' : 'The cat'
+                },
+                {
+                    'id' : 'B',
+                    'tokens' : [
+                        { 't' : 'The'},
+                        { 't' : 'kitten'}
+                    ]
+                }
+            ]
+        }
+        self.c = Collation.create_from_dict(data)
+    
+    def test_collation_function_add_plain_witness(self):
+        self.c.add_plain_witness('C', 'A cat')
+        self.assertEqual(len(self.c.witnesses), 3)
+    
+    def test_collation_function_add_witness(self):
+        witnessdata = {'id': 'C', 'tokens': [{ 't' : 'A'},{ 't' : 'cat'}]}
+        self.c.add_witness(witnessdata)
+        self.assertEqual(len(self.c.witnesses), 3)
+    
+    @unittest.expectedFailure
+    def test_collation_function_add_witnesses_with_same_id(self):
+        witnessdata1 = {'id': 'C', 'tokens': [{ 't' : 'The'},{ 't': 'fox'}]}
+        witnessdata2 = {'id': 'C', 'tokens': [{ 't' : 'The'},{ 't': 'dog'}]}
+        self.c.add_witness(witnessdata1)
+        self.c.add_witness(witnessdata2)
+        self.assertEqual(len(self.c.witnesses), 4)
+        
+        #error in the collation result => there should be an exception raised...
+        #json_result = json.loads(collate(self.c, output='json'))
+        #self.assertEqual(json_result['table'][2][1], 'fox')
+        #self.assertEqual(json_result['table'][3][1], 'dog')
+        self.fail("It should not be possible to add 2 witnesses with the same id")
+    
+    def test_collation_function_get_range_for_witness(self):
+        expected_range_B = RangeSet()
+        expected_range_B.add_range(4, 6)
+        self.assertEqual(self.c.get_range_for_witness('B'), expected_range_B)
+        self.assertRaises(Exception, self.c.get_range_for_witness, 'W')
+    
+    #test other functions?
+    #get suffix array
+    #get sa
+    #get lcp array
+    #to extended suffix array
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/collatex-pythonport/tests/test_near_matching_pretokenized.py b/collatex-pythonport/tests/test_near_matching_pretokenized.py
index cad73a67e..7beb7f043 100644
--- a/collatex-pythonport/tests/test_near_matching_pretokenized.py
+++ b/collatex-pythonport/tests/test_near_matching_pretokenized.py
@@ -5,61 +5,63 @@
 '''
 import unittest
 from tests import unit_disabled
-from collatex.core_functions import collate_pretokenized_json
+from collatex.core_functions import *
 
 
 class Test(unittest.TestCase):
-    json_in = {
-      "witnesses" : [
-        {
-          "id" : "A",
-          "tokens" : [
-              { "t" : "I", "ref" : 123 },
-              { "t" : "bought" , "adj" : True },
-              { "t" : "this", "id" : "x3" },
-              { "t" : "glass", "id" : "x4" },
-              { "t" : ",", "type" : "punct" },
-              { "t" : "because", "id" : "x5" },
-              { "t" : "it", "id" : "x6" },
-              { "t" : "matches" },
-              { "t" : "those", "id" : "x7" },
-              { "t" : "dinner", "id" : "x8" },
-              { "t" : "plates", "id" : "x9" },
-              { "t" : ".", "type" : "punct" }
-          ]
-        },
-        {
-          "id" : "B",
-          "tokens" : [
-              { "t" : "I" },
-              { "t" : "bought" , "adj" : True },
-              { "t" : "those", "id" : "abc" },
-              { "t" : "glasses", "id" : "xyz" },
-              { "t" : ".", "type" : "punct" }
-          ]
+    def setUp(self):
+        json_in = {
+        "witnesses" : [
+            {
+              "id" : "A",
+              "tokens" : [
+                  { "t" : "I", "ref" : 123 },
+                  { "t" : "bought" , "adj" : True },
+                  { "t" : "this", "id" : "x3" },
+                  { "t" : "glass", "id" : "x4" },
+                  { "t" : ",", "type" : "punct" },
+                  { "t" : "because", "id" : "x5" },
+                  { "t" : "it", "id" : "x6" },
+                  { "t" : "matches" },
+                  { "t" : "those", "id" : "x7" },
+                  { "t" : "dinner", "id" : "x8" },
+                  { "t" : "plates", "id" : "x9" },
+                  { "t" : ".", "type" : "punct" }
+              ]
+            },
+            {
+            "id" : "B",
+            "tokens" : [
+                  { "t" : "I" },
+                  { "t" : "bought" , "adj" : True },
+                  { "t" : "those", "id" : "abc" },
+                  { "t" : "glasses", "id" : "xyz" },
+                  { "t" : ".", "type" : "punct" }
+              ]
+            }
+            ]
         }
-      ]
-    }
+        self.c = Collation.create_from_dict(json_in)
 
     def test_exact_matching(self):
-        result = collate_pretokenized_json(self.json_in)
-        self.assertEquals(["I", "bought", "this", "glass", ",", "because", "it", "matches", "those", "dinner", "plates", "."],
+        result = collate(self.c, segmentation=False)
+        self.assertEqual(["I", "bought", "this", "glass", ",", "because", "it", "matches", "those", "dinner", "plates", "."],
                           result.rows[0].to_list())
-        self.assertEquals(["I", "bought", "-", "-", "-", "-", "-", "-", "those", "glasses", "-", "."], result.rows[1].to_list())
+        self.assertEqual(["I", "bought", "-", "-", "-", "-", "-", "-", "those", "glasses", "-", "."], result.rows[1].to_list())
 
     def test_near_matching(self):
-        result = collate_pretokenized_json(self.json_in, near_match=True)
-        self.assertEquals(["I", "bought", "this", "glass", ",", "because", "it", "matches", "those", "dinner", "plates", "."],
+        result = collate(self.c, segmentation=False, near_match=True)
+        self.assertEqual(["I", "bought", "this", "glass", ",", "because", "it", "matches", "those", "dinner", "plates", "."],
                           result.rows[0].to_list())
-        self.assertEquals(["I", "bought", "those", "glasses", "-", "-", "-", "-", "-", "-", "-", "."], result.rows[1].to_list())
+        self.assertEqual(["I", "bought", "those", "glasses", "-", "-", "-", "-", "-", "-", "-", "."], result.rows[1].to_list())
 
     # Re-enable this one if segmented output is ever supported on tokenized collation
     @unit_disabled
     def test_near_matching_segmented(self):
-        result = collate_pretokenized_json(self.json_in, near_match=True, segmentation=True)
-        self.assertEquals(["I bought", "this glass, because it matches those dinner plates."],
+        result = collate(self.c, near_match=True, segmentation=True)
+        self.assertEqual(["I bought", "this glass, because it matches those dinner plates."],
                           result.rows[0].to_list())
-        self.assertEquals(["I bought", "those glasses."], result.rows[1].to_list())
+        self.assertEqual(["I bought", "those glasses."], result.rows[1].to_list())
 
 
 if __name__ == "__main__":
diff --git a/collatex-pythonport/tests/test_token_class.py b/collatex-pythonport/tests/test_token_class.py
new file mode 100644
index 000000000..fe4088ad0
--- /dev/null
+++ b/collatex-pythonport/tests/test_token_class.py
@@ -0,0 +1,39 @@
+'''
+Created on March 24, 2015
+
+@author: Elisa Nury
+'''
+
+import unittest
+from collatex.core_classes import Token
+from collatex.exceptions import TokenError
+
+
+class TestToken(unittest.TestCase):
+
+    def test_creation_token_t(self):
+        data = {'t': 'fox', 'id': 123 }
+        t = Token(data)
+        self.assertEqual(t.token_string, 'fox')
+        self.assertEqual(t.token_data, data)
+        
+    def test_creation_token_n(self):
+        data = {'t': 'kitten', 'n': 'cat'}
+        t = Token(data)
+        self.assertEqual(t.token_string, 'cat')
+        self.assertEqual(t.token_data, data)
+    
+    def test_creation_token_none(self):
+        t = Token(None)
+        self.assertEqual(t.token_string, '')
+        self.assertIsNone(t.token_data)
+        
+    def test_invalid_token_raises_exception(self):
+        with self.assertRaises(TokenError):
+            #data = {'x': 'abc'}
+            data = {}
+            Token(data)
+
+if __name__ == '__main__':
+    unittest.main()
+    
diff --git a/collatex-pythonport/tests/test_witness_class.py b/collatex-pythonport/tests/test_witness_class.py
new file mode 100644
index 000000000..8f2e6e33b
--- /dev/null
+++ b/collatex-pythonport/tests/test_witness_class.py
@@ -0,0 +1,54 @@
+'''
+Created on March 24, 2015
+
+@author: Elisa Nury
+'''
+
+import unittest
+from collatex.core_classes import Witness, Token, Tokenizer
+from collatex.exceptions import UnsupportedError, TokenError
+
+class TestWitness(unittest.TestCase):
+
+    def test_creation_witness_plain(self):
+        data = {'id': 'A', 'content': 'The quick brown fox jumped over the lazy dogs.'}
+        w = Witness(data)
+        self.assertEqual(w.sigil, 'A')
+        self.assertEqual(len(w.tokens()), 10)
+        self.assertEqual(w.tokens()[3].token_string, 'fox')
+        
+    def test_creation_witness_pretokenized(self):
+        data = {    'id': 'B',
+                    'tokens': [
+                        {'t': 'A', 'ref': 123},
+                        {'t': 'black and blue', 'adj': True},
+                        {'t': 'cat', 'id': 'xyz'},
+                        {'t': 'bird.', 'id': 'abc'}
+                    ]
+                }
+        w = Witness(data)
+        self.assertEqual(w.sigil, 'B')
+        self.assertEqual(len(w.tokens()), 4)
+    
+    def test_invalid_witness_missing_id(self):
+        data = {'name': 'A', 'content': 'The quick brown fox jumped over the lazy dogs.'}
+        self.assertRaises(UnsupportedError, Witness, data)
+        
+    def test_invalid_witness_missing_content_tokens(self):
+        data = {'id': 'A'}
+        self.assertRaises(UnsupportedError, Witness, data)
+   
+    def test_invalid_witness_content_is_pretokenized(self):
+        #'content' is pretokenized instead of plain text
+        data = {'id': 'A', 'content': [{'t':'the'}, {'t':'fox'}]}
+        self.assertRaises(TypeError, Witness, data)  
+    
+    def test_invalid_witness_tokens_is_plain(self):
+        #'tokens' is plain text instead of pretokenized        
+        data = {'id': 'A', 'tokens': 'The quick brown fox jumped over the lazy dogs.'}    
+        self.assertRaises(TokenError, Witness, data) 
+
+
+if __name__ == '__main__':
+    unittest.main()
+    
diff --git a/collatex-pythonport/tests/test_witness_tokens.py b/collatex-pythonport/tests/test_witness_tokens.py
index 56e97f437..0a0ff7f07 100644
--- a/collatex-pythonport/tests/test_witness_tokens.py
+++ b/collatex-pythonport/tests/test_witness_tokens.py
@@ -6,7 +6,7 @@
 
 import unittest
 from collatex import Collation
-from collatex.core_functions import collate_pretokenized_json
+from collatex.core_functions import collate
 
 
 class Test(unittest.TestCase):
@@ -52,7 +52,8 @@ def testPretokenizedWitness(self):
                 }
             ]
         }
-        result = collate_pretokenized_json(pretokenized_witness)
+        c = Collation.create_from_dict(pretokenized_witness)
+        result = collate(c, segmentation=False)
         self.assertEqual(len(result.rows[0].to_list()), 4)
         self.assertEqual(len(result.rows[1].to_list()), 4)
         # The second witness should have a token that reads 'mousedog bird'.
diff --git a/collatex-pythonport/use_cases/json-test1.json b/collatex-pythonport/use_cases/json-test1.json
new file mode 100644
index 000000000..a419c0e49
--- /dev/null
+++ b/collatex-pythonport/use_cases/json-test1.json
@@ -0,0 +1,29 @@
+{"witnesses" :
+    [
+        {"id" : "A","tokens" :
+            [
+            {"t" : "The"},
+            {"t" : "quick"},
+            {"t" : "brown"},
+            {"t" : "fox"},
+            {"t" : "jumps"},
+            {"t" : "over"},
+            {"t" : "the"},
+            {"t" : "dog."}
+            ]
+        },
+        
+        {"id" : "B", "tokens" :
+            [
+            {"t" : "The"},
+            {"t" : "brown"},
+            {"t" : "fox"},
+            {"t" : "jumps"},
+            {"t" : "over"},
+            {"t" : "the"},
+            {"t" : "lazy"},
+            {"t" : "dog."}
+            ]
+        }
+    ]
+}
diff --git a/collatex-pythonport/use_cases/json-test2.json b/collatex-pythonport/use_cases/json-test2.json
new file mode 100644
index 000000000..ca26f69af
--- /dev/null
+++ b/collatex-pythonport/use_cases/json-test2.json
@@ -0,0 +1,31 @@
+{"witnesses" :
+    [
+        {"id" : "C","tokens" :
+            [
+            {"t" : "The"},
+            {"t" : "quick"},
+            {"t" : "brown"},
+            {"t" : "fox"},
+            {"t" : "jumps"},
+            {"t" : "over"},
+            {"t" : "the"},
+            {"t" : "dog"},
+            {"t" : "."}
+            ]
+        },
+        
+        {"id" : "D", "tokens" :
+            [
+            {"t" : "The"},
+            {"t" : "brown"},
+            {"t" : "fox"},
+            {"t" : "jumps"},
+            {"t" : "over"},
+            {"t" : "the"},
+            {"t" : "lazy"},
+            {"t" : "dog"},
+            {"t" : "."}
+            ]
+        }
+    ]
+}
diff --git a/collatex-pythonport/use_cases/json-test3.json b/collatex-pythonport/use_cases/json-test3.json
new file mode 100644
index 000000000..dcbee333e
--- /dev/null
+++ b/collatex-pythonport/use_cases/json-test3.json
@@ -0,0 +1,29 @@
+{"witnesses" :
+    [
+        {"id" : "E","tokens" :
+            [
+            {"t" : "The", "id": 1, "n": "the"},
+            {"t" : "quick", "id": 2},
+            {"t" : "brown", "id": 3},
+            {"t" : "fox", "id": 4},
+            {"t" : "jumps", "id": 5},
+            {"t" : "over", "id": 6},
+            {"t" : "the", "id": 7},
+            {"t" : "dog.", "id": 8, "n": "dog"}
+            ]
+        },
+        
+        {"id" : "F", "tokens" :
+            [
+            {"t" : "The"},
+            {"t" : "brown"},
+            {"t" : "fox"},
+            {"t" : "jumps"},
+            {"t" : "over"},
+            {"t" : "the"},
+            {"t" : "lazy"},
+            {"t" : "dog."}
+            ]
+        }
+    ]
+}