From d58134fc484bfc80455e8f06540358ab8c8db630 Mon Sep 17 00:00:00 2001
From: Tom Parker <palfrey@tevp.net>
Date: Tue, 14 Jun 2016 11:27:34 +0100
Subject: [PATCH 01/40] Initial version of JSON convertor

---
 tools/sphinx/protobuf-json-docs.py | 147 +++++++++++++++++++++++++++++
 1 file changed, 147 insertions(+)
 create mode 100755 tools/sphinx/protobuf-json-docs.py

diff --git a/tools/sphinx/protobuf-json-docs.py b/tools/sphinx/protobuf-json-docs.py
new file mode 100755
index 00000000..d9363064
--- /dev/null
+++ b/tools/sphinx/protobuf-json-docs.py
@@ -0,0 +1,147 @@
+#!/usr/bin/env python
+
+import sys
+import collections
+
+from google.protobuf.compiler import plugin_pb2 as plugin
+import itertools
+import json
+from google.protobuf.descriptor_pb2 import DescriptorProto, EnumDescriptorProto, EnumValueDescriptorProto, FieldDescriptorProto
+
+def convert_protodef_to_editable(proto):
+    class Editable(object):
+        def __init__(self, prot):
+            self.kind = type(prot)
+            self.name = prot.name
+            self.comment = ""
+            if isinstance(prot, EnumDescriptorProto):
+                self.value = [convert_protodef_to_editable(x) for x in prot.value]
+            elif isinstance(prot, DescriptorProto):
+                self.field = [convert_protodef_to_editable(x) for x in prot.field]
+            elif isinstance(prot, EnumValueDescriptorProto):
+                self.number = prot.number
+            elif isinstance(prot, FieldDescriptorProto):
+                self.type = prot.type
+            else:
+                raise Exception, type(prot)
+
+    return Editable(proto)
+
+def traverse(proto_file):
+
+    def _collapse_comments(comments):
+        return comments["leading_comments"] + comments["trailing_comments"]
+
+    def _traverse(package, items, tree):
+        for item_index, item in enumerate(items):
+            item = convert_protodef_to_editable(item)
+            if item_index in tree:
+                comments = tree[item_index]
+                if "leading_comments" in comments or "trailing_comments" in comments:
+                    item.comments = _collapse_comments(comments)
+                    del comments["leading_comments"]
+                    del comments["trailing_comments"]
+                if item.kind is EnumDescriptorProto:
+                    if 2 in comments: # value in EnumDescriptorProto
+                        for k in comments[2]:
+                            value_comment = comments[2][k]
+                            if value_comment != {}:
+                                item.value[k].comment = _collapse_comments(value_comment)
+                elif item.kind is DescriptorProto:
+                    if 2 in comments: # field in DescriptorProto
+                        for k in comments[2]:
+                            field_comment = comments[2][k]
+                            if field_comment != {}:
+                                item.field[k].comment = _collapse_comments(field_comment)
+                else:
+                    raise Exception, item.kind
+
+            yield item, package
+
+            if isinstance(item, DescriptorProto):
+                for enum in item.enum_type:
+                    yield enum, package
+
+                for nested in item.nested_type:
+                    nested_package = package + item.name
+
+                    for nested_item in _traverse(nested, nested_package):
+                        yield nested_item, nested_package
+
+    tree = collections.defaultdict(collections.defaultdict)
+    for loc in proto_file.source_code_info.location:
+        if loc.leading_comments or loc.trailing_comments:
+            place = tree
+            for p in loc.path:
+                if not place.has_key(p):
+                    place[p] = collections.defaultdict(collections.defaultdict)
+                place = place[p]
+            place["leading_comments"] = loc.leading_comments
+            place["trailing_comments"] = loc.trailing_comments
+
+    return itertools.chain(
+        _traverse(proto_file.package, proto_file.enum_type, tree[5]), # 5 is enum_type in FileDescriptorProto
+        _traverse(proto_file.package, proto_file.message_type, tree[4]), # 4 is enum_type in FileDescriptorProto
+    )
+
+def generate_code(request, response):
+    for proto_file in request.proto_file:
+        output = []
+
+        # Parse request
+        for item, package in traverse(proto_file):
+            data = {
+                'package': proto_file.package or '&lt;root&gt;',
+                'filename': proto_file.name,
+                'name': item.name,
+                'doc': item.comment
+            }
+
+            if item.kind == DescriptorProto:
+                data.update({
+                    'type': 'Message',
+                    'properties': [{
+                        'name': f.name,
+                        'type': int(f.type),
+                        'doc': f.comment
+                        }
+                        for f in item.field]
+                })
+
+            elif item.kind == EnumDescriptorProto:
+                data.update({
+                    'type': 'Enum',
+                    'values': [{
+                        'name': v.name,
+                        'value': v.number,
+                        'doc': v.comment}
+                       for v in item.value]
+                })
+
+            output.append(data)
+
+        # Fill response
+        f = response.file.add()
+        f.name = proto_file.name + '.json'
+        f.content = json.dumps(output, indent=2)
+
+
+if __name__ == '__main__':
+    # Read request message from stdin
+    data = sys.stdin.read()
+
+    # Parse request
+    request = plugin.CodeGeneratorRequest()
+    request.ParseFromString(data)
+
+    # Create response
+    response = plugin.CodeGeneratorResponse()
+
+    # Generate code
+    generate_code(request, response)
+
+    # Serialise response message
+    output = response.SerializeToString()
+
+    # Write to stdout
+    sys.stdout.write(output)

From 4940ac8ee517f98ea95c25944a78c4fbca456ab1 Mon Sep 17 00:00:00 2001
From: Tom Parker <palfrey@tevp.net>
Date: Tue, 14 Jun 2016 13:37:55 +0100
Subject: [PATCH 02/40] Mostly Avro-compliant JSON output from the Protobuf
 parser

---
 tools/sphinx/protobuf-json-docs.py | 89 +++++++++++++++++++++---------
 1 file changed, 63 insertions(+), 26 deletions(-)

diff --git a/tools/sphinx/protobuf-json-docs.py b/tools/sphinx/protobuf-json-docs.py
index d9363064..cc882ab9 100755
--- a/tools/sphinx/protobuf-json-docs.py
+++ b/tools/sphinx/protobuf-json-docs.py
@@ -30,7 +30,7 @@ def __init__(self, prot):
 def traverse(proto_file):
 
     def _collapse_comments(comments):
-        return comments["leading_comments"] + comments["trailing_comments"]
+        return (comments["leading_comments"] + comments["trailing_comments"]).strip()
 
     def _traverse(package, items, tree):
         for item_index, item in enumerate(items):
@@ -38,7 +38,8 @@ def _traverse(package, items, tree):
             if item_index in tree:
                 comments = tree[item_index]
                 if "leading_comments" in comments or "trailing_comments" in comments:
-                    item.comments = _collapse_comments(comments)
+                    item.comment = _collapse_comments(comments)
+                    #raise Exception, item.__dict__
                     del comments["leading_comments"]
                     del comments["trailing_comments"]
                 if item.kind is EnumDescriptorProto:
@@ -68,9 +69,12 @@ def _traverse(package, items, tree):
                     for nested_item in _traverse(nested, nested_package):
                         yield nested_item, nested_package
 
+    import pprint
+    open("dump", "w").write(pprint.pformat(proto_file.source_code_info))
+
     tree = collections.defaultdict(collections.defaultdict)
     for loc in proto_file.source_code_info.location:
-        if loc.leading_comments or loc.trailing_comments:
+        if loc.leading_comments or loc.trailing_comments or loc.leading_detached_comments:
             place = tree
             for p in loc.path:
                 if not place.has_key(p):
@@ -78,47 +82,80 @@ def _traverse(package, items, tree):
                 place = place[p]
             place["leading_comments"] = loc.leading_comments
             place["trailing_comments"] = loc.trailing_comments
+            place["leading_detached_comments"] = loc.leading_detached_comments
+
+    if set(tree.keys()).difference(set([4,5,12])) != set():
+        raise Exception, sorted(tree.keys())
 
-    return itertools.chain(
-        _traverse(proto_file.package, proto_file.enum_type, tree[5]), # 5 is enum_type in FileDescriptorProto
-        _traverse(proto_file.package, proto_file.message_type, tree[4]), # 4 is enum_type in FileDescriptorProto
-    )
+    return {"types":
+        itertools.chain(
+            _traverse(proto_file.package, proto_file.enum_type, tree[5]), # 5 is enum_type in FileDescriptorProto
+            _traverse(proto_file.package, proto_file.message_type, tree[4]), # 4 is enum_type in FileDescriptorProto
+        ),
+        "file": tree[12]
+    }
 
 def generate_code(request, response):
     for proto_file in request.proto_file:
-        output = []
+        types = []
 
-        # Parse request
-        for item, package in traverse(proto_file):
+        results = traverse(proto_file)
+        for item, package in results["types"]:
             data = {
-                'package': proto_file.package or '&lt;root&gt;',
-                'filename': proto_file.name,
                 'name': item.name,
                 'doc': item.comment
             }
 
             if item.kind == DescriptorProto:
                 data.update({
-                    'type': 'Message',
-                    'properties': [{
+                    'type': 'message',
+                    'fields': []
+                })
+                for f in item.field:
+                    if f.type in [1]:
+                        kind = "double"
+                    elif f.type in [3]:
+                        kind = "long"
+                    elif f.type in [5]:
+                        kind = "integer"
+                    elif f.type in [8]:
+                        kind = "boolean"
+                    elif f.type in [9]:
+                        kind = "string"
+                    elif f.type in [11]:
+                        kind = "message"
+                    elif f.type in [12]:
+                        kind = "bytes"
+                    elif f.type in [14]:
+                        kind = "enum"
+                    else:
+                        raise Exception, f.type
+                    data["fields"].append({
                         'name': f.name,
-                        'type': int(f.type),
+                        'type': kind,
                         'doc': f.comment
-                        }
-                        for f in item.field]
-                })
+                        })
 
             elif item.kind == EnumDescriptorProto:
+                comments = ["\n* `%s`: %s"%(v.name, v.comment) for v in item.value]
                 data.update({
-                    'type': 'Enum',
-                    'values': [{
-                        'name': v.name,
-                        'value': v.number,
-                        'doc': v.comment}
-                       for v in item.value]
+                    'type': 'enum',
+                    'symbols': [v.name for v in item.value]
                 })
-
-            output.append(data)
+                data["doc"] += " ".join(comments)
+
+            types.append(data)
+
+        if results["file"].has_key("leading_detached_comments"):
+            comments = "".join(results["file"]["leading_detached_comments"])
+        else:
+            comments = ""
+        output = {
+            "types": types,
+            "protocol": proto_file.name.split("/")[-1].split(".")[0],
+            'doc': comments,
+            "namespace": proto_file.package,
+        }
 
         # Fill response
         f = response.file.add()

From 8e1171eecc6d632802fe3c6680fbad3fc6a14d25 Mon Sep 17 00:00:00 2001
From: Tom Parker <palfrey@tevp.net>
Date: Tue, 14 Jun 2016 13:49:44 +0100
Subject: [PATCH 03/40] Append all the leading_detached_comments to the
 file-related info

---
 tools/sphinx/protobuf-json-docs.py | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/tools/sphinx/protobuf-json-docs.py b/tools/sphinx/protobuf-json-docs.py
index cc882ab9..396aa3df 100755
--- a/tools/sphinx/protobuf-json-docs.py
+++ b/tools/sphinx/protobuf-json-docs.py
@@ -74,7 +74,7 @@ def _traverse(package, items, tree):
 
     tree = collections.defaultdict(collections.defaultdict)
     for loc in proto_file.source_code_info.location:
-        if loc.leading_comments or loc.trailing_comments or loc.leading_detached_comments:
+        if loc.leading_comments or loc.trailing_comments:
             place = tree
             for p in loc.path:
                 if not place.has_key(p):
@@ -82,9 +82,8 @@ def _traverse(package, items, tree):
                 place = place[p]
             place["leading_comments"] = loc.leading_comments
             place["trailing_comments"] = loc.trailing_comments
-            place["leading_detached_comments"] = loc.leading_detached_comments
 
-    if set(tree.keys()).difference(set([4,5,12])) != set():
+    if set(tree.keys()).difference(set([4,5])) != set():
         raise Exception, sorted(tree.keys())
 
     return {"types":
@@ -92,7 +91,7 @@ def _traverse(package, items, tree):
             _traverse(proto_file.package, proto_file.enum_type, tree[5]), # 5 is enum_type in FileDescriptorProto
             _traverse(proto_file.package, proto_file.message_type, tree[4]), # 4 is enum_type in FileDescriptorProto
         ),
-        "file": tree[12]
+        "file": ["".join(x.leading_detached_comments) for x in proto_file.source_code_info.location if len(x.leading_detached_comments) > 0]
     }
 
 def generate_code(request, response):
@@ -146,10 +145,7 @@ def generate_code(request, response):
 
             types.append(data)
 
-        if results["file"].has_key("leading_detached_comments"):
-            comments = "".join(results["file"]["leading_detached_comments"])
-        else:
-            comments = ""
+        comments = "".join(results["file"]).strip()
         output = {
             "types": types,
             "protocol": proto_file.name.split("/")[-1].split(".")[0],

From 23ed0f8ca563915743aab33bad40055ae0615f1b Mon Sep 17 00:00:00 2001
From: Tom Parker <palfrey@tevp.net>
Date: Tue, 14 Jun 2016 14:02:14 +0100
Subject: [PATCH 04/40] Add service support

---
 tools/sphinx/protobuf-json-docs.py | 29 +++++++++++++++++++++++++----
 1 file changed, 25 insertions(+), 4 deletions(-)

diff --git a/tools/sphinx/protobuf-json-docs.py b/tools/sphinx/protobuf-json-docs.py
index 396aa3df..0b435420 100755
--- a/tools/sphinx/protobuf-json-docs.py
+++ b/tools/sphinx/protobuf-json-docs.py
@@ -6,7 +6,7 @@
 from google.protobuf.compiler import plugin_pb2 as plugin
 import itertools
 import json
-from google.protobuf.descriptor_pb2 import DescriptorProto, EnumDescriptorProto, EnumValueDescriptorProto, FieldDescriptorProto
+from google.protobuf.descriptor_pb2 import DescriptorProto, EnumDescriptorProto, EnumValueDescriptorProto, FieldDescriptorProto, ServiceDescriptorProto, MethodDescriptorProto
 
 def convert_protodef_to_editable(proto):
     class Editable(object):
@@ -22,6 +22,11 @@ def __init__(self, prot):
                 self.number = prot.number
             elif isinstance(prot, FieldDescriptorProto):
                 self.type = prot.type
+            elif isinstance(prot, ServiceDescriptorProto):
+                self.method = [convert_protodef_to_editable(x) for x in prot.method]
+            elif isinstance(prot, MethodDescriptorProto):
+                self.input_type = prot.input_type
+                self.output_type = prot.output_type
             else:
                 raise Exception, type(prot)
 
@@ -54,6 +59,12 @@ def _traverse(package, items, tree):
                             field_comment = comments[2][k]
                             if field_comment != {}:
                                 item.field[k].comment = _collapse_comments(field_comment)
+                elif item.kind is ServiceDescriptorProto:
+                    if 2 in comments: # method in ServiceDescriptorProto
+                        for k in comments[2]:
+                            method_comment = comments[2][k]
+                            if method_comment != {}:
+                                item.method[k].comment = _collapse_comments(method_comment)
                 else:
                     raise Exception, item.kind
 
@@ -83,13 +94,14 @@ def _traverse(package, items, tree):
             place["leading_comments"] = loc.leading_comments
             place["trailing_comments"] = loc.trailing_comments
 
-    if set(tree.keys()).difference(set([4,5])) != set():
+    if set(tree.keys()).difference(set([4,5,6])) != set():
         raise Exception, sorted(tree.keys())
 
     return {"types":
         itertools.chain(
+            _traverse(proto_file.package, proto_file.service, tree[6]), # 5 is enum_type in FileDescriptorProto
             _traverse(proto_file.package, proto_file.enum_type, tree[5]), # 5 is enum_type in FileDescriptorProto
-            _traverse(proto_file.package, proto_file.message_type, tree[4]), # 4 is enum_type in FileDescriptorProto
+            _traverse(proto_file.package, proto_file.message_type, tree[4]), # 4 is message_type in FileDescriptorProto
         ),
         "file": ["".join(x.leading_detached_comments) for x in proto_file.source_code_info.location if len(x.leading_detached_comments) > 0]
     }
@@ -110,9 +122,11 @@ def generate_code(request, response):
                     'type': 'message',
                     'fields': []
                 })
-                for f in item.field:
+                for f in item.field: # types from FieldDescriptorProto
                     if f.type in [1]:
                         kind = "double"
+                    elif f.type in [2]:
+                        kind = "float"
                     elif f.type in [3]:
                         kind = "long"
                     elif f.type in [5]:
@@ -142,6 +156,13 @@ def generate_code(request, response):
                     'symbols': [v.name for v in item.value]
                 })
                 data["doc"] += " ".join(comments)
+            elif item.kind == ServiceDescriptorProto:
+                data.update({
+                    'type': 'service',
+                    'methods': [{"name": m.name, "input": m.input_type[1:], "output": m.output_type[1:]} for m in item.method]
+                })
+            else:
+                raise Exception, item.kind
 
             types.append(data)
 

From 509807fdbf6725aa23bae875ea0507689d1fb321 Mon Sep 17 00:00:00 2001
From: Tom Parker <palfrey@tevp.net>
Date: Tue, 14 Jun 2016 15:05:45 +0100
Subject: [PATCH 05/40] Append "messages" separately rather than merging with
 types

---
 tools/sphinx/protobuf-json-docs.py | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/tools/sphinx/protobuf-json-docs.py b/tools/sphinx/protobuf-json-docs.py
index 0b435420..2974d1d9 100755
--- a/tools/sphinx/protobuf-json-docs.py
+++ b/tools/sphinx/protobuf-json-docs.py
@@ -109,6 +109,7 @@ def _traverse(package, items, tree):
 def generate_code(request, response):
     for proto_file in request.proto_file:
         types = []
+        messages = {}
 
         results = traverse(proto_file)
         for item, package in results["types"]:
@@ -148,7 +149,7 @@ def generate_code(request, response):
                         'type': kind,
                         'doc': f.comment
                         })
-
+                types.append(data)
             elif item.kind == EnumDescriptorProto:
                 comments = ["\n* `%s`: %s"%(v.name, v.comment) for v in item.value]
                 data.update({
@@ -156,19 +157,26 @@ def generate_code(request, response):
                     'symbols': [v.name for v in item.value]
                 })
                 data["doc"] += " ".join(comments)
+                types.append(data)
             elif item.kind == ServiceDescriptorProto:
-                data.update({
-                    'type': 'service',
-                    'methods': [{"name": m.name, "input": m.input_type[1:], "output": m.output_type[1:]} for m in item.method]
-                })
+                for m in item.method:
+                    messages[m.name] = {
+                        "doc": m.comment,
+                        "request": {
+                            "name": "request",
+                            "type": m.input_type[1:],
+                        },
+                        "response": m.output_type[1:],
+                        "errors" : [ "GAException" ]
+                    }
             else:
                 raise Exception, item.kind
 
-            types.append(data)
 
         comments = "".join(results["file"]).strip()
         output = {
             "types": types,
+            "messages": messages,
             "protocol": proto_file.name.split("/")[-1].split(".")[0],
             'doc': comments,
             "namespace": proto_file.package,

From 8c642cbced29e15bfa11884e36a6f421a8769987 Mon Sep 17 00:00:00 2001
From: Irene Papakonstantinou <irene@lshift.net>
Date: Tue, 14 Jun 2016 16:44:28 +0100
Subject: [PATCH 06/40] Update the Makefile to use the protoc json plugin
 instead of avro-tools. Remove generated *.rst files from git

---
 .gitignore                                    |   3 +-
 doc/source/schemas/Makefile                   |  38 +-
 .../schemas/alleleAnnotationmethods.rst       | 688 ------------------
 doc/source/schemas/alleleAnnotations.rst      | 561 --------------
 doc/source/schemas/common.rst                 | 107 ---
 doc/source/schemas/index.rst                  |  27 +-
 doc/source/schemas/metadata.rst               | 243 -------
 doc/source/schemas/metadatamethods.rst        | 237 ------
 doc/source/schemas/methods.rst                |   7 -
 doc/source/schemas/readmethods.rst            | 612 ----------------
 doc/source/schemas/reads.rst                  | 465 ------------
 doc/source/schemas/referencemethods.rst       | 379 ----------
 doc/source/schemas/references.rst             | 199 -----
 .../schemas/sequenceAnnotationmethods.rst     | 457 ------------
 doc/source/schemas/sequenceAnnotations.rst    | 342 ---------
 doc/source/schemas/variantmethods.rst         | 475 ------------
 doc/source/schemas/variants.rst               | 297 --------
 tools/sphinx/avpr2rest.py                     |  23 +-
 tools/sphinx/avrodomain.py                    |  12 +-
 tools/sphinx/protobuf-json-docs.py            |   4 +-
 20 files changed, 48 insertions(+), 5128 deletions(-)
 delete mode 100644 doc/source/schemas/alleleAnnotationmethods.rst
 delete mode 100644 doc/source/schemas/alleleAnnotations.rst
 delete mode 100644 doc/source/schemas/common.rst
 delete mode 100644 doc/source/schemas/metadata.rst
 delete mode 100644 doc/source/schemas/metadatamethods.rst
 delete mode 100644 doc/source/schemas/methods.rst
 delete mode 100644 doc/source/schemas/readmethods.rst
 delete mode 100644 doc/source/schemas/reads.rst
 delete mode 100644 doc/source/schemas/referencemethods.rst
 delete mode 100644 doc/source/schemas/references.rst
 delete mode 100644 doc/source/schemas/sequenceAnnotationmethods.rst
 delete mode 100644 doc/source/schemas/sequenceAnnotations.rst
 delete mode 100644 doc/source/schemas/variantmethods.rst
 delete mode 100644 doc/source/schemas/variants.rst

diff --git a/.gitignore b/.gitignore
index fded6d94..979ea845 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,7 +2,7 @@
 target
 *~
 #*
-doc/source/schemas/*.avpr
+doc/source/schemas/*.proto.rst
 build
 
 #********** windows template**********
@@ -73,3 +73,4 @@ target/
 #********** IntelliJ files ******
 *.iml
 
+
diff --git a/doc/source/schemas/Makefile b/doc/source/schemas/Makefile
index e81bb4fe..1e06308a 100644
--- a/doc/source/schemas/Makefile
+++ b/doc/source/schemas/Makefile
@@ -1,10 +1,10 @@
-# avdl-to-rst Makefile
+# proto-to-rst Makefile
 #
-# GA4GH schema docs are generated from avdl comments.  The process is
+# GA4GH schema docs are generated from proto comments.  The process is
 # coordinated by this Makefile in these steps:
-#   * All .avdl files are converted to .avpr using the avro-tools
-#   package, which is downloaded if needed.
-#   * All .avpr files are converted to .rst using a python script in
+#   * All .proto files are converted to .json using the
+#   protoc json plugin `tools/sphinx/my-plugin.py`.
+#   * All .json files are converted to .rst using a python script in
 #   schemas/tools.
 
 
@@ -13,30 +13,26 @@
 .DELETE_ON_ERROR:
 
 CACHE_DIR:=${HOME}/.cache/ga4gh
-AVPR_DIR:=/tmp/ga4gh-${UID}/avpr
-AVDL_DIR:=../../../src/main/resources/avro
+JSON_DIR:=/tmp/ga4gh-${UID}/json
+PROTO_BASE_DIR:=../../../src/main/proto
+PROTO_DIR:=${PROTO_BASE_DIR}/ga4gh
 
 AVPR2REST_PATH:=../../../tools/sphinx/avpr2rest.py
-AVRO_JAR_PATH:=${CACHE_DIR}/avro-tools.jar
-AVRO_JAR_URL:=http://www.us.apache.org/dist/avro/avro-1.7.7/java/avro-tools-1.7.7.jar
+PROTOC_PLUGIN_PATH:=../../../tools/sphinx/protobuf-json-docs.py
 
-AVDL_BASENAMES:=$(subst ${AVDL_DIR}/,,$(wildcard ${AVDL_DIR}/*.avdl))
-AVPR_BASENAMES:=${AVDL_BASENAMES:.avdl=.avpr}
-RST_BASENAMES:=${AVDL_BASENAMES:.avdl=.rst}
+PROTO_BASENAMES:=$(subst ${PROTO_DIR}/,,$(wildcard ${PROTO_DIR}/*.proto))
+AVPR_BASENAMES:=${PROTO_BASENAMES:.proto=.proto.json}
+RST_BASENAMES:=${PROTO_BASENAMES:.proto=.rst}
 
 
 default: ${RST_BASENAMES}
 
+%.proto.json: ${PROTO_DIR}/%.proto
+	mkdir -p ${JSON_DIR}
+	protoc --proto_path ${PROTO_BASE_DIR} --plugin=protoc-gen-custom=${PROTOC_PLUGIN_PATH} --custom_out=${JSON_DIR} $<
 
-${AVRO_JAR_PATH}:
-	mkdir -p ${@D}
-	curl -o $@ ${AVRO_JAR_URL}
-
-%.avpr: ${AVDL_DIR}/%.avdl ${AVRO_JAR_PATH}
-	java -jar ${AVRO_JAR_PATH} idl $< $@
-
-%.rst: %.avpr
-	python ${AVPR2REST_PATH} $< .
+%.rst: %.proto.json
+	python ${AVPR2REST_PATH} ${JSON_DIR}/ga4gh/$< .
 
 .PHONY: clean cleaner cleanest
 clean:
diff --git a/doc/source/schemas/alleleAnnotationmethods.rst b/doc/source/schemas/alleleAnnotationmethods.rst
deleted file mode 100644
index 67192528..00000000
--- a/doc/source/schemas/alleleAnnotationmethods.rst
+++ /dev/null
@@ -1,688 +0,0 @@
-AlleleAnnotationMethods
-***********************
-
- .. function:: searchVariantAnnotationSets(request)
-
-  :param request: SearchVariantAnnotationSetsRequest: This request maps to the body of `POST /variantannotationsets/search` as JSON
-  :return type: SearchVariantAnnotationSetsResponse
-  :throws: GAException
-
-Returns a list of available variant annotation sets
-`POST /variantannotationsets/search` must accept a JSON version of
-`SearchVariantAnnotationSetsRequest` as the post body and will return a JSON
-version of `SearchVariantAnnotationSetsResponse`.
-
- .. function:: searchVariantAnnotations(request)
-
-  :param request: SearchVariantAnnotationsRequest: This request maps to the body of `POST /variantannotations/search` as JSON.
-  :return type: SearchVariantAnnotationsResponse
-  :throws: GAException
-
-Gets a list of `VariantAnnotations` matching the search criteria.
-
-`POST /variantannotations/search` must accept a JSON version of
-`SearchVariantAnnotationsRequest` as the post body and will return a
-JSON version of `SearchVariantAnnotationsResponse`.
-
- .. function:: getVariantAnnotationSet(id)
-
-  :param id: string: The ID of the `VariantAnnotationSet`.
-  :return type: org.ga4gh.models.VariantAnnotationSet
-  :throws: GAException
-
-Gets an `VariantAnnotationSet` by ID.
-`GET /variantannotationsets/{id}` will return a JSON version of
-`VariantAnnotationSet`.
-
-.. avro:error:: GAException
-
-  A general exception type.
-
-.. avro:enum:: Strand
-
-  :symbols: NEG_STRAND|POS_STRAND
-  Indicates the DNA strand associate for some data item.
-  * `NEG_STRAND`: The negative (-) strand.
-  * `POS_STRAND`:  The postive (+) strand.
-
-.. avro:record:: Position
-
-  :field referenceName:
-    The name of the `Reference` on which the `Position` is located.
-  :type referenceName: string
-  :field position:
-    The 0-based offset from the start of the forward strand for that `Reference`.
-      Genomic positions are non-negative integers less than `Reference` length.
-  :type position: long
-  :field strand:
-    Strand the position is associated with.
-  :type strand: Strand
-
-  A `Position` is an unoriented base in some `Reference`. A `Position` is
-  represented by a `Reference` name, and a base number on that `Reference`
-  (0-based).
-
-.. avro:record:: ExternalIdentifier
-
-  :field database:
-    The source of the identifier.
-      (e.g. `Ensembl`)
-  :type database: string
-  :field identifier:
-    The ID defined by the external database.
-      (e.g. `ENST00000000000`)
-  :type identifier: string
-  :field version:
-    The version of the object or the database
-      (e.g. `78`)
-  :type version: string
-
-  Identifier from a public database
-
-.. avro:enum:: CigarOperation
-
-  :symbols: ALIGNMENT_MATCH|INSERT|DELETE|SKIP|CLIP_SOFT|CLIP_HARD|PAD|SEQUENCE_MATCH|SEQUENCE_MISMATCH
-  An enum for the different types of CIGAR alignment operations that exist.
-  Used wherever CIGAR alignments are used. The different enumerated values
-  have the following usage:
-  
-  * `ALIGNMENT_MATCH`: An alignment match indicates that a sequence can be
-    aligned to the reference without evidence of an INDEL. Unlike the
-    `SEQUENCE_MATCH` and `SEQUENCE_MISMATCH` operators, the `ALIGNMENT_MATCH`
-    operator does not indicate whether the reference and read sequences are an
-    exact match. This operator is equivalent to SAM's `M`.
-  * `INSERT`: The insert operator indicates that the read contains evidence of
-    bases being inserted into the reference. This operator is equivalent to
-    SAM's `I`.
-  * `DELETE`: The delete operator indicates that the read contains evidence of
-    bases being deleted from the reference. This operator is equivalent to
-    SAM's `D`.
-  * `SKIP`: The skip operator indicates that this read skips a long segment of
-    the reference, but the bases have not been deleted. This operator is
-    commonly used when working with RNA-seq data, where reads may skip long
-    segments of the reference between exons. This operator is equivalent to
-    SAM's 'N'.
-  * `CLIP_SOFT`: The soft clip operator indicates that bases at the start/end
-    of a read have not been considered during alignment. This may occur if the
-    majority of a read maps, except for low quality bases at the start/end of
-    a read. This operator is equivalent to SAM's 'S'. Bases that are soft clipped
-    will still be stored in the read.
-  * `CLIP_HARD`: The hard clip operator indicates that bases at the start/end of
-    a read have been omitted from this alignment. This may occur if this linear
-    alignment is part of a chimeric alignment, or if the read has been trimmed
-    (e.g., during error correction, or to trim poly-A tails for RNA-seq). This
-    operator is equivalent to SAM's 'H'.
-  * `PAD`: The pad operator indicates that there is padding in an alignment.
-    This operator is equivalent to SAM's 'P'.
-  * `SEQUENCE_MATCH`: This operator indicates that this portion of the aligned
-    sequence exactly matches the reference (e.g., all bases are equal to the
-    reference bases). This operator is equivalent to SAM's '='.
-  * `SEQUENCE_MISMATCH`: This operator indicates that this portion of the
-    aligned sequence is an alignment match to the reference, but a sequence
-    mismatch (e.g., the bases are not equal to the reference). This can
-    indicate a SNP or a read error. This operator is equivalent to SAM's 'X'.
-
-.. avro:record:: CigarUnit
-
-  :field operation:
-    The operation type.
-  :type operation: CigarOperation
-  :field operationLength:
-    The number of bases that the operation runs for.
-  :type operationLength: long
-  :field referenceSequence:
-    `referenceSequence` is only used at mismatches (`SEQUENCE_MISMATCH`)
-      and deletions (`DELETE`). Filling this field replaces the MD tag.
-      If the relevant information is not available, leave this field as `null`.
-  :type referenceSequence: null|string
-
-  A structure for an instance of a CIGAR operation.
-  `FIXME: This belongs under Reads (only readAlignment refers to this)`
-
-.. avro:record:: VariantSetMetadata
-
-  :field key:
-    The top-level key.
-  :type key: string
-  :field value:
-    The value field for simple metadata.
-  :type value: string
-  :field id:
-    User-provided ID field, not enforced by this API.
-      Two or more pieces of structured metadata with identical
-      id and key fields are considered equivalent.
-      `FIXME: If it's not enforced, then why can't it be null?`
-  :type id: string
-  :field type:
-    The type of data.
-  :type type: string
-  :field number:
-    The number of values that can be included in a field described by this
-      metadata.
-  :type number: string
-  :field description:
-    A textual description of this metadata.
-  :type description: string
-  :field info:
-    Remaining structured metadata key-value pairs.
-  :type info: map<array<string>>
-
-  Optional metadata associated with a variant set.
-
-.. avro:record:: VariantSet
-
-  :field id:
-    The variant set ID.
-  :type id: string
-  :field name:
-    The variant set name.
-  :type name: null|string
-  :field datasetId:
-    The ID of the dataset this variant set belongs to.
-  :type datasetId: string
-  :field referenceSetId:
-    The ID of the reference set that describes the sequences used by the variants in this set.
-  :type referenceSetId: string
-  :field metadata:
-    Optional metadata associated with this variant set.
-      This array can be used to store information about the variant set, such as information found
-      in VCF header fields, that isn't already available in first class fields such as "name".
-  :type metadata: array<VariantSetMetadata>
-
-  A VariantSet is a collection of variants and variant calls intended to be analyzed together.
-
-.. avro:record:: CallSet
-
-  :field id:
-    The call set ID.
-  :type id: string
-  :field name:
-    The call set name.
-  :type name: null|string
-  :field sampleId:
-    The sample this call set's data was generated from.
-      Note: the current API does not have a rigorous definition of sample. Therefore, this
-      field actually contains an arbitrary string, typically corresponding to the sampleId
-      field in the read groups used to generate this call set.
-  :type sampleId: null|string
-  :field variantSetIds:
-    The IDs of the variant sets this call set has calls in.
-  :type variantSetIds: array<string>
-  :field created:
-    The date this call set was created in milliseconds from the epoch.
-  :type created: null|long
-  :field updated:
-    The time at which this call set was last updated in
-      milliseconds from the epoch.
-  :type updated: null|long
-  :field info:
-    A map of additional call set information.
-  :type info: map<array<string>>
-
-  A CallSet is a collection of calls that were generated by the same analysis of the same sample.
-
-.. avro:record:: Call
-
-  :field callSetName:
-    The name of the call set this variant call belongs to.
-      If this field is not present, the ordering of the call sets from a
-      `SearchCallSetsRequest` over this `VariantSet` is guaranteed to match
-      the ordering of the calls on this `Variant`.
-      The number of results will also be the same.
-  :type callSetName: null|string
-  :field callSetId:
-    The ID of the call set this variant call belongs to.
-    
-      If this field is not present, the ordering of the call sets from a
-      `SearchCallSetsRequest` over this `VariantSet` is guaranteed to match
-      the ordering of the calls on this `Variant`.
-      The number of results will also be the same.
-  :type callSetId: null|string
-  :field genotype:
-    The genotype of this variant call.
-    
-      A 0 value represents the reference allele of the associated `Variant`. Any
-      other value is a 1-based index into the alternate alleles of the associated
-      `Variant`.
-    
-      If a variant had a referenceBases field of "T", an alternateBases
-      value of ["A", "C"], and the genotype was [2, 1], that would mean the call
-      represented the heterozygous value "CA" for this variant. If the genotype
-      was instead [0, 1] the represented value would be "TA". Ordering of the
-      genotype values is important if the phaseset field is present.
-  :type genotype: array<int>
-  :field phaseset:
-    If this field is not null, this variant call's genotype ordering implies
-      the phase of the bases and is consistent with any other variant calls on
-      the same contig which have the same phaseset string.
-  :type phaseset: null|string
-  :field genotypeLikelihood:
-    The genotype likelihoods for this variant call. Each array entry
-      represents how likely a specific genotype is for this call as
-      log10(P(data | genotype)), analogous to the GL tag in the VCF spec. The
-      value ordering is defined by the GL tag in the VCF spec.
-  :type genotypeLikelihood: array<double>
-  :field info:
-    A map of additional variant call information.
-  :type info: map<array<string>>
-
-  A `Call` represents the determination of genotype with respect to a
-  particular `Variant`.
-  
-  It may include associated information such as quality
-  and phasing. For example, a call might assign a probability of 0.32 to
-  the occurrence of a SNP named rs1234 in a call set with the name NA12345.
-
-.. avro:record:: Variant
-
-  :field id:
-    The variant ID.
-  :type id: string
-  :field variantSetId:
-    The ID of the `VariantSet` this variant belongs to. This transitively defines
-      the `ReferenceSet` against which the `Variant` is to be interpreted.
-  :type variantSetId: string
-  :field names:
-    Names for the variant, for example a RefSNP ID.
-  :type names: array<string>
-  :field created:
-    The date this variant was created in milliseconds from the epoch.
-  :type created: null|long
-  :field updated:
-    The time at which this variant was last updated in
-      milliseconds from the epoch.
-  :type updated: null|long
-  :field referenceName:
-    The reference on which this variant occurs.
-      (e.g. `chr20` or `X`)
-  :type referenceName: string
-  :field start:
-    The start position at which this variant occurs (0-based).
-      This corresponds to the first base of the string of reference bases.
-      Genomic positions are non-negative integers less than reference length.
-      Variants spanning the join of circular genomes are represented as
-      two variants one on each side of the join (position 0).
-  :type start: long
-  :field end:
-    The end position (exclusive), resulting in [start, end) closed-open interval.
-      This is typically calculated by `start + referenceBases.length`.
-  :type end: long
-  :field referenceBases:
-    The reference bases for this variant. They start at the given start position.
-  :type referenceBases: string
-  :field alternateBases:
-    The bases that appear instead of the reference bases. Multiple alternate
-      alleles are possible.
-  :type alternateBases: array<string>
-  :field info:
-    A map of additional variant information.
-  :type info: map<array<string>>
-  :field calls:
-    The variant calls for this particular variant. Each one represents the
-      determination of genotype with respect to this variant. `Call`s in this array
-      are implicitly associated with this `Variant`.
-  :type calls: array<Call>
-
-  A `Variant` represents a change in DNA sequence relative to some reference.
-  For example, a variant could represent a SNP or an insertion.
-  Variants belong to a `VariantSet`.
-  This is equivalent to a row in VCF.
-
-.. avro:record:: OntologyTerm
-
-  :field id:
-    Ontology source identifier - the identifier, a CURIE (preferred) or
-      PURL for an ontology source e.g. http://purl.obolibrary.org/obo/hp.obo
-      It differs from the standard GA4GH schema's :ref:`id <apidesign_object_ids>`
-      in that it is a URI pointing to an information resource outside of the scope
-      of the schema or its resource implementation.
-  :type id: string
-  :field term:
-    Ontology term - the representation the id is pointing to.
-  :type term: null|string
-  :field sourceName:
-    Ontology source name - the name of ontology from which the term is obtained
-      e.g. 'Human Phenotype Ontology'
-  :type sourceName: null|string
-  :field sourceVersion:
-    Ontology source version - the version of the ontology from which the
-      OntologyTerm is obtained; e.g. 2.6.1.
-      There is no standard for ontology versioning and some frequently
-      released ontologies may use a datestamp, or build number.
-  :type sourceVersion: null|string
-
-  An ontology term describing an attribute. (e.g. the phenotype attribute
-    'polydactyly' from HPO)
-
-.. avro:record:: Experiment
-
-  :field id:
-    The experiment UUID. This is globally unique.
-  :type id: string
-  :field name:
-    The name of the experiment.
-  :type name: null|string
-  :field description:
-    A description of the experiment.
-  :type description: null|string
-  :field createDateTime:
-    The time at which this record was created. 
-      Format: :ref:`ISO 8601 <metadata_date_time>`
-  :type createDateTime: string
-  :field updateDateTime:
-    The time at which this record was last updated.
-      Format: :ref:`ISO 8601 <metadata_date_time>`
-  :type updateDateTime: string
-  :field runTime:
-    The time at which this experiment was performed.
-      Granularity here is variable (e.g. date only).
-      Format: :ref:`ISO 8601 <metadata_date_time>`
-  :type runTime: null|string
-  :field molecule:
-    The molecule examined in this experiment. (e.g. genomics DNA, total RNA)
-  :type molecule: null|string
-  :field strategy:
-    The experiment technique or strategy applied to the sample.
-      (e.g. whole genome sequencing, RNA-seq, RIP-seq)
-  :type strategy: null|string
-  :field selection:
-    The method used to enrich the target. (e.g. immunoprecipitation, size
-      fractionation, MNase digestion)
-  :type selection: null|string
-  :field library:
-    The name of the library used as part of this experiment.
-  :type library: null|string
-  :field libraryLayout:
-    The configuration of sequenced reads. (e.g. Single or Paired)
-  :type libraryLayout: null|string
-  :field instrumentModel:
-    The instrument model used as part of this experiment.
-        This maps to sequencing technology in BAM.
-  :type instrumentModel: null|string
-  :field instrumentDataFile:
-    The data file generated by the instrument.
-      TODO: This isn't actually a file is it?
-      Should this be `instrumentData` instead?
-  :type instrumentDataFile: null|string
-  :field sequencingCenter:
-    The sequencing center used as part of this experiment.
-  :type sequencingCenter: null|string
-  :field platformUnit:
-    The platform unit used as part of this experiment. This is a flowcell-barcode
-      or slide unique identifier.
-  :type platformUnit: null|string
-  :field info:
-    A map of additional experiment information.
-  :type info: map<array<string>>
-
-  An experimental preparation of a sample.
-
-.. avro:record:: Dataset
-
-  :field id:
-    The dataset's id, locally unique to the server instance.
-  :type id: string
-  :field name:
-    The name of the dataset.
-  :type name: null|string
-  :field description:
-    Additional, human-readable information on the dataset.
-  :type description: null|string
-
-  A Dataset is a collection of related data of multiple types.
-  Data providers decide how to group data into datasets.
-  See [Metadata API](../api/metadata.html) for a more detailed discussion.
-
-.. avro:record:: Analysis
-
-  :field id:
-    Formats of id | name | description | accessions are described in the
-      documentation on general attributes and formats.
-  :type id: string
-  :field name:
-  :type name: null|string
-  :field description:
-  :type description: null|string
-  :field createDateTime:
-    The time at which this record was created. 
-      Format: :ref:`ISO 8601 <metadata_date_time>`
-  :type createDateTime: null|string
-  :field updateDateTime:
-    The time at which this record was last updated.
-      Format: :ref:`ISO 8601 <metadata_date_time>`
-  :type updateDateTime: string
-  :field type:
-    The type of analysis.
-  :type type: null|string
-  :field software:
-    The software run to generate this analysis.
-  :type software: array<string>
-  :field info:
-    A map of additional analysis information.
-  :type info: map<array<string>>
-
-  An analysis contains an interpretation of one or several experiments.
-  (e.g. SNVs, copy number variations, methylation status) together with
-  information about the methodology used.
-
-.. avro:record:: AnalysisResult
-
-  :field analysisId:
-    The ID of the analysis record for this result
-  :type analysisId: string
-  :field result:
-    The text-based result for this analysis
-  :type result: null|string
-  :field score:
-    The numeric score for this analysis
-  :type score: null|int
-
-  An AnalysisResult record holds the output of a prediction package such
-  as SIFT on a specific allele.
-
-.. avro:record:: AlleleLocation
-
-  :field start:
-    Relative start position of the allele in this coordinate system
-  :type start: int
-  :field end:
-    Relative end position of the allele in this coordinate system
-  :type end: null|int
-  :field referenceSequence:
-    Reference sequence in feature (this should be the codon at CDS level)
-  :type referenceSequence: null|string
-  :field alternateSequence:
-    Alternate sequence in feature (this should be the codon at CDS level)
-  :type alternateSequence: null|string
-
-  An allele location record holds the location of an allele relative to a
-  non-genomic coordinate system such as a CDS or protein and holds the
-  reference and alternate sequence where appropriate
-
-.. avro:record:: VariantAnnotationSet
-
-  :field id:
-    The ID of the variant annotation set record
-  :type id: string
-  :field variantSetId:
-    The ID of the variant set to which this annotation set belongs
-  :type variantSetId: string
-  :field name:
-    The variant annotation set name.
-  :type name: null|string
-  :field analysis:
-    Analysis details. It is essential to supply versions for all software and
-      reference data used.
-  :type analysis: Analysis
-
-  A VariantAnnotationSet record groups VariantAnnotation records. It is derived
-  from a VariantSet and holds information describing the software and reference
-  data used in the annotation.
-
-.. avro:record:: HGVSAnnotation
-
-  :field genomic:
-  :type genomic: null|string
-  :field transcript:
-  :type transcript: null|string
-  :field protein:
-  :type protein: null|string
-
-  A HGVSAnnotation record holds Human Genome Variation Society descriptions
-  of the sequence change with respect to genomic, transcript and protein
-  sequences. See: http://www.hgvs.org/mutnomen/recs.html.
-  Descriptions should be provided at genomic level. Descriptions at transcript
-  level should be provided when the allele lies within a transcript. Descriptions
-  at protein level should be provided when the allele lies within the translated
-  sequence or stop codon.
-
-.. avro:record:: TranscriptEffect
-
-  :field id:
-    The ID of the transcript effect record
-  :type id: string
-  :field featureId:
-    The id of the transcript feature the annotation is relative to
-  :type featureId: string
-  :field alternateBases:
-    Alternate allele - a variant may have more than one alternate allele,
-      each of which will have distinct annotation.
-  :type alternateBases: null|string
-  :field effects:
-    Effect of variant on this feature
-  :type effects: array<OntologyTerm>
-  :field hgvsAnnotation:
-    Human Genome Variation Society variant descriptions
-  :type hgvsAnnotation: HGVSAnnotation
-  :field cDNALocation:
-    Change relative to cDNA
-  :type cDNALocation: null|AlleleLocation
-  :field CDSLocation:
-  :type CDSLocation: null|AlleleLocation
-  :field proteinLocation:
-    Change relative to protein
-  :type proteinLocation: null|AlleleLocation
-  :field analysisResults:
-    Output from prediction packages such as SIFT
-  :type analysisResults: array<AnalysisResult>
-
-  A transcript effect record is a set of information describing the
-  effect of an allele on a transcript
-
-.. avro:record:: VariantAnnotation
-
-  :field id:
-    The ID of this VariantAnnotation.
-  :type id: string
-  :field variantId:
-    The variant ID.
-  :type variantId: string
-  :field variantAnnotationSetId:
-    The ID of the variant annotation set this record belongs to.
-  :type variantAnnotationSetId: string
-  :field createDateTime:
-    The :ref:`ISO 8601 <metadata_date_time>` time at which this record was created.
-  :type createDateTime: null|string
-  :field transcriptEffects:
-    The transcript effect annotation for the alleles of this variant. Each one
-      represents the effect of a single allele on a single transcript.
-  :type transcriptEffects: array<TranscriptEffect>
-  :field info:
-    Additional annotation data in key-value pairs.
-  :type info: map<array<string>>
-
-  A `VariantAnnotation` record represents the result of comparing a variant
-  to a set of reference data.
-
-.. avro:record:: SearchVariantAnnotationsRequest
-
-  :field variantAnnotationSetId:
-    Required. The ID of the variant annotation set to search over.
-  :type variantAnnotationSetId: string
-  :field referenceName:
-    Only return variants with reference alleles on the reference with this
-      name. One of this field or `referenceId` or `features` is required.
-      (case-sensitive, exact match)
-  :type referenceName: null|string
-  :field referenceId:
-    Only return variants with reference alleles on the reference with this
-      ID. One of this field or `referenceName` or `features` is required.
-  :type referenceId: null|string
-  :field start:
-    Required if referenceName or referenceId supplied.
-      The beginning of the window (0-based, inclusive) for which variants with
-      overlapping reference alleles should be returned.
-      Genomic positions are non-negative integers less than reference length.
-      Requests spanning the join of circular genomes are represented as
-      two requests one on each side of the join (position 0).
-  :type start: long
-  :field end:
-    Required if referenceName or referenceId supplied.
-      The end of the window (0-based, exclusive) for which variants with
-      overlapping reference alleles should be returned.
-  :type end: long
-  :field effects:
-    This filter allows variant, transcript combinations to be extracted by effect
-      type(s).
-      Only return variant annotations including any of these effects and only return
-      transcript effects including any of these effects. Exact matching across all
-      fields of the Sequence Ontology OntologyTerm is required.
-      (A transcript effect may have multiple SO effects which will all be reported.)
-      If null, return all variant annotations.
-  :type effects: null|array<org.ga4gh.models.OntologyTerm>
-  :field pageSize:
-    Specifies the maximum number of results to return in a single page.
-      If unspecified, a system default will be used.
-  :type pageSize: null|int
-  :field pageToken:
-    The continuation token, which is used to page through large result sets.
-      To get the next page of results, set this parameter to the value of
-      `nextPageToken` from the previous response.
-  :type pageToken: null|string
-
-  This request maps to the body of `POST /variantannotations/search` as JSON
-
-.. avro:record:: SearchVariantAnnotationsResponse
-
-  :field variantAnnotations:
-    The list of matching variant annotations.
-  :type variantAnnotations: array<org.ga4gh.models.VariantAnnotation>
-  :field nextPageToken:
-    The continuation token, which is used to page through large result sets.
-      Provide this value in a subsequent request to return the next page of
-      results. This field will be empty if there aren't any additional results.
-  :type nextPageToken: null|string
-
-  This is the response from `POST /variantannotations/search` expressed as JSON.
-
-.. avro:record:: SearchVariantAnnotationSetsRequest
-
-  :field variantSetId:
-    Required. The `VariantSet` to search.
-  :type variantSetId: string
-  :field pageSize:
-    Specifies the maximum number of results to return in a single page.
-      If unspecified, a system default will be used.
-  :type pageSize: null|int
-  :field pageToken:
-    The continuation token, which is used to page through large result sets.
-      To get the next page of results, set this parameter to the value of
-      `nextPageToken` from the previous response.
-  :type pageToken: null|string
-
-  This request maps to the body of `POST /variantannotationsets/search` as JSON
-
-.. avro:record:: SearchVariantAnnotationSetsResponse
-
-  :field variantAnnotationSets:
-    The list of matching variant annotation sets.
-  :type variantAnnotationSets: array<org.ga4gh.models.VariantAnnotationSet>
-  :field nextPageToken:
-    The continuation token, which is used to page through large result sets.
-      Provide this value in a subsequent request to return the next page of
-      results. This field will be empty if there aren't any additional results.
-  :type nextPageToken: null|string
-
-  This is the response from `POST /variantannotationsets/search` expressed
-  as JSON.
-
diff --git a/doc/source/schemas/alleleAnnotations.rst b/doc/source/schemas/alleleAnnotations.rst
deleted file mode 100644
index c821c98c..00000000
--- a/doc/source/schemas/alleleAnnotations.rst
+++ /dev/null
@@ -1,561 +0,0 @@
-AlleleAnnotations
-*****************
-
-This protocol defines types used by the GA4GH Allele Annotation API.
-
-.. avro:enum:: Strand
-
-  :symbols: NEG_STRAND|POS_STRAND
-  Indicates the DNA strand associate for some data item.
-  * `NEG_STRAND`: The negative (-) strand.
-  * `POS_STRAND`:  The postive (+) strand.
-
-.. avro:record:: Position
-
-  :field referenceName:
-    The name of the `Reference` on which the `Position` is located.
-  :type referenceName: string
-  :field position:
-    The 0-based offset from the start of the forward strand for that `Reference`.
-      Genomic positions are non-negative integers less than `Reference` length.
-  :type position: long
-  :field strand:
-    Strand the position is associated with.
-  :type strand: Strand
-
-  A `Position` is an unoriented base in some `Reference`. A `Position` is
-  represented by a `Reference` name, and a base number on that `Reference`
-  (0-based).
-
-.. avro:record:: ExternalIdentifier
-
-  :field database:
-    The source of the identifier.
-      (e.g. `Ensembl`)
-  :type database: string
-  :field identifier:
-    The ID defined by the external database.
-      (e.g. `ENST00000000000`)
-  :type identifier: string
-  :field version:
-    The version of the object or the database
-      (e.g. `78`)
-  :type version: string
-
-  Identifier from a public database
-
-.. avro:enum:: CigarOperation
-
-  :symbols: ALIGNMENT_MATCH|INSERT|DELETE|SKIP|CLIP_SOFT|CLIP_HARD|PAD|SEQUENCE_MATCH|SEQUENCE_MISMATCH
-  An enum for the different types of CIGAR alignment operations that exist.
-  Used wherever CIGAR alignments are used. The different enumerated values
-  have the following usage:
-  
-  * `ALIGNMENT_MATCH`: An alignment match indicates that a sequence can be
-    aligned to the reference without evidence of an INDEL. Unlike the
-    `SEQUENCE_MATCH` and `SEQUENCE_MISMATCH` operators, the `ALIGNMENT_MATCH`
-    operator does not indicate whether the reference and read sequences are an
-    exact match. This operator is equivalent to SAM's `M`.
-  * `INSERT`: The insert operator indicates that the read contains evidence of
-    bases being inserted into the reference. This operator is equivalent to
-    SAM's `I`.
-  * `DELETE`: The delete operator indicates that the read contains evidence of
-    bases being deleted from the reference. This operator is equivalent to
-    SAM's `D`.
-  * `SKIP`: The skip operator indicates that this read skips a long segment of
-    the reference, but the bases have not been deleted. This operator is
-    commonly used when working with RNA-seq data, where reads may skip long
-    segments of the reference between exons. This operator is equivalent to
-    SAM's 'N'.
-  * `CLIP_SOFT`: The soft clip operator indicates that bases at the start/end
-    of a read have not been considered during alignment. This may occur if the
-    majority of a read maps, except for low quality bases at the start/end of
-    a read. This operator is equivalent to SAM's 'S'. Bases that are soft clipped
-    will still be stored in the read.
-  * `CLIP_HARD`: The hard clip operator indicates that bases at the start/end of
-    a read have been omitted from this alignment. This may occur if this linear
-    alignment is part of a chimeric alignment, or if the read has been trimmed
-    (e.g., during error correction, or to trim poly-A tails for RNA-seq). This
-    operator is equivalent to SAM's 'H'.
-  * `PAD`: The pad operator indicates that there is padding in an alignment.
-    This operator is equivalent to SAM's 'P'.
-  * `SEQUENCE_MATCH`: This operator indicates that this portion of the aligned
-    sequence exactly matches the reference (e.g., all bases are equal to the
-    reference bases). This operator is equivalent to SAM's '='.
-  * `SEQUENCE_MISMATCH`: This operator indicates that this portion of the
-    aligned sequence is an alignment match to the reference, but a sequence
-    mismatch (e.g., the bases are not equal to the reference). This can
-    indicate a SNP or a read error. This operator is equivalent to SAM's 'X'.
-
-.. avro:record:: CigarUnit
-
-  :field operation:
-    The operation type.
-  :type operation: CigarOperation
-  :field operationLength:
-    The number of bases that the operation runs for.
-  :type operationLength: long
-  :field referenceSequence:
-    `referenceSequence` is only used at mismatches (`SEQUENCE_MISMATCH`)
-      and deletions (`DELETE`). Filling this field replaces the MD tag.
-      If the relevant information is not available, leave this field as `null`.
-  :type referenceSequence: null|string
-
-  A structure for an instance of a CIGAR operation.
-  `FIXME: This belongs under Reads (only readAlignment refers to this)`
-
-.. avro:record:: OntologyTerm
-
-  :field id:
-    Ontology source identifier - the identifier, a CURIE (preferred) or
-      PURL for an ontology source e.g. http://purl.obolibrary.org/obo/hp.obo
-      It differs from the standard GA4GH schema's :ref:`id <apidesign_object_ids>`
-      in that it is a URI pointing to an information resource outside of the scope
-      of the schema or its resource implementation.
-  :type id: string
-  :field term:
-    Ontology term - the representation the id is pointing to.
-  :type term: null|string
-  :field sourceName:
-    Ontology source name - the name of ontology from which the term is obtained
-      e.g. 'Human Phenotype Ontology'
-  :type sourceName: null|string
-  :field sourceVersion:
-    Ontology source version - the version of the ontology from which the
-      OntologyTerm is obtained; e.g. 2.6.1.
-      There is no standard for ontology versioning and some frequently
-      released ontologies may use a datestamp, or build number.
-  :type sourceVersion: null|string
-
-  An ontology term describing an attribute. (e.g. the phenotype attribute
-    'polydactyly' from HPO)
-
-.. avro:record:: Experiment
-
-  :field id:
-    The experiment UUID. This is globally unique.
-  :type id: string
-  :field name:
-    The name of the experiment.
-  :type name: null|string
-  :field description:
-    A description of the experiment.
-  :type description: null|string
-  :field createDateTime:
-    The time at which this record was created. 
-      Format: :ref:`ISO 8601 <metadata_date_time>`
-  :type createDateTime: string
-  :field updateDateTime:
-    The time at which this record was last updated.
-      Format: :ref:`ISO 8601 <metadata_date_time>`
-  :type updateDateTime: string
-  :field runTime:
-    The time at which this experiment was performed.
-      Granularity here is variable (e.g. date only).
-      Format: :ref:`ISO 8601 <metadata_date_time>`
-  :type runTime: null|string
-  :field molecule:
-    The molecule examined in this experiment. (e.g. genomics DNA, total RNA)
-  :type molecule: null|string
-  :field strategy:
-    The experiment technique or strategy applied to the sample.
-      (e.g. whole genome sequencing, RNA-seq, RIP-seq)
-  :type strategy: null|string
-  :field selection:
-    The method used to enrich the target. (e.g. immunoprecipitation, size
-      fractionation, MNase digestion)
-  :type selection: null|string
-  :field library:
-    The name of the library used as part of this experiment.
-  :type library: null|string
-  :field libraryLayout:
-    The configuration of sequenced reads. (e.g. Single or Paired)
-  :type libraryLayout: null|string
-  :field instrumentModel:
-    The instrument model used as part of this experiment.
-        This maps to sequencing technology in BAM.
-  :type instrumentModel: null|string
-  :field instrumentDataFile:
-    The data file generated by the instrument.
-      TODO: This isn't actually a file is it?
-      Should this be `instrumentData` instead?
-  :type instrumentDataFile: null|string
-  :field sequencingCenter:
-    The sequencing center used as part of this experiment.
-  :type sequencingCenter: null|string
-  :field platformUnit:
-    The platform unit used as part of this experiment. This is a flowcell-barcode
-      or slide unique identifier.
-  :type platformUnit: null|string
-  :field info:
-    A map of additional experiment information.
-  :type info: map<array<string>>
-
-  An experimental preparation of a sample.
-
-.. avro:record:: Dataset
-
-  :field id:
-    The dataset's id, locally unique to the server instance.
-  :type id: string
-  :field name:
-    The name of the dataset.
-  :type name: null|string
-  :field description:
-    Additional, human-readable information on the dataset.
-  :type description: null|string
-
-  A Dataset is a collection of related data of multiple types.
-  Data providers decide how to group data into datasets.
-  See [Metadata API](../api/metadata.html) for a more detailed discussion.
-
-.. avro:record:: Analysis
-
-  :field id:
-    Formats of id | name | description | accessions are described in the
-      documentation on general attributes and formats.
-  :type id: string
-  :field name:
-  :type name: null|string
-  :field description:
-  :type description: null|string
-  :field createDateTime:
-    The time at which this record was created. 
-      Format: :ref:`ISO 8601 <metadata_date_time>`
-  :type createDateTime: null|string
-  :field updateDateTime:
-    The time at which this record was last updated.
-      Format: :ref:`ISO 8601 <metadata_date_time>`
-  :type updateDateTime: string
-  :field type:
-    The type of analysis.
-  :type type: null|string
-  :field software:
-    The software run to generate this analysis.
-  :type software: array<string>
-  :field info:
-    A map of additional analysis information.
-  :type info: map<array<string>>
-
-  An analysis contains an interpretation of one or several experiments.
-  (e.g. SNVs, copy number variations, methylation status) together with
-  information about the methodology used.
-
-.. avro:record:: VariantSetMetadata
-
-  :field key:
-    The top-level key.
-  :type key: string
-  :field value:
-    The value field for simple metadata.
-  :type value: string
-  :field id:
-    User-provided ID field, not enforced by this API.
-      Two or more pieces of structured metadata with identical
-      id and key fields are considered equivalent.
-      `FIXME: If it's not enforced, then why can't it be null?`
-  :type id: string
-  :field type:
-    The type of data.
-  :type type: string
-  :field number:
-    The number of values that can be included in a field described by this
-      metadata.
-  :type number: string
-  :field description:
-    A textual description of this metadata.
-  :type description: string
-  :field info:
-    Remaining structured metadata key-value pairs.
-  :type info: map<array<string>>
-
-  Optional metadata associated with a variant set.
-
-.. avro:record:: VariantSet
-
-  :field id:
-    The variant set ID.
-  :type id: string
-  :field name:
-    The variant set name.
-  :type name: null|string
-  :field datasetId:
-    The ID of the dataset this variant set belongs to.
-  :type datasetId: string
-  :field referenceSetId:
-    The ID of the reference set that describes the sequences used by the variants in this set.
-  :type referenceSetId: string
-  :field metadata:
-    Optional metadata associated with this variant set.
-      This array can be used to store information about the variant set, such as information found
-      in VCF header fields, that isn't already available in first class fields such as "name".
-  :type metadata: array<VariantSetMetadata>
-
-  A VariantSet is a collection of variants and variant calls intended to be analyzed together.
-
-.. avro:record:: CallSet
-
-  :field id:
-    The call set ID.
-  :type id: string
-  :field name:
-    The call set name.
-  :type name: null|string
-  :field sampleId:
-    The sample this call set's data was generated from.
-      Note: the current API does not have a rigorous definition of sample. Therefore, this
-      field actually contains an arbitrary string, typically corresponding to the sampleId
-      field in the read groups used to generate this call set.
-  :type sampleId: null|string
-  :field variantSetIds:
-    The IDs of the variant sets this call set has calls in.
-  :type variantSetIds: array<string>
-  :field created:
-    The date this call set was created in milliseconds from the epoch.
-  :type created: null|long
-  :field updated:
-    The time at which this call set was last updated in
-      milliseconds from the epoch.
-  :type updated: null|long
-  :field info:
-    A map of additional call set information.
-  :type info: map<array<string>>
-
-  A CallSet is a collection of calls that were generated by the same analysis of the same sample.
-
-.. avro:record:: Call
-
-  :field callSetName:
-    The name of the call set this variant call belongs to.
-      If this field is not present, the ordering of the call sets from a
-      `SearchCallSetsRequest` over this `VariantSet` is guaranteed to match
-      the ordering of the calls on this `Variant`.
-      The number of results will also be the same.
-  :type callSetName: null|string
-  :field callSetId:
-    The ID of the call set this variant call belongs to.
-    
-      If this field is not present, the ordering of the call sets from a
-      `SearchCallSetsRequest` over this `VariantSet` is guaranteed to match
-      the ordering of the calls on this `Variant`.
-      The number of results will also be the same.
-  :type callSetId: null|string
-  :field genotype:
-    The genotype of this variant call.
-    
-      A 0 value represents the reference allele of the associated `Variant`. Any
-      other value is a 1-based index into the alternate alleles of the associated
-      `Variant`.
-    
-      If a variant had a referenceBases field of "T", an alternateBases
-      value of ["A", "C"], and the genotype was [2, 1], that would mean the call
-      represented the heterozygous value "CA" for this variant. If the genotype
-      was instead [0, 1] the represented value would be "TA". Ordering of the
-      genotype values is important if the phaseset field is present.
-  :type genotype: array<int>
-  :field phaseset:
-    If this field is not null, this variant call's genotype ordering implies
-      the phase of the bases and is consistent with any other variant calls on
-      the same contig which have the same phaseset string.
-  :type phaseset: null|string
-  :field genotypeLikelihood:
-    The genotype likelihoods for this variant call. Each array entry
-      represents how likely a specific genotype is for this call as
-      log10(P(data | genotype)), analogous to the GL tag in the VCF spec. The
-      value ordering is defined by the GL tag in the VCF spec.
-  :type genotypeLikelihood: array<double>
-  :field info:
-    A map of additional variant call information.
-  :type info: map<array<string>>
-
-  A `Call` represents the determination of genotype with respect to a
-  particular `Variant`.
-  
-  It may include associated information such as quality
-  and phasing. For example, a call might assign a probability of 0.32 to
-  the occurrence of a SNP named rs1234 in a call set with the name NA12345.
-
-.. avro:record:: Variant
-
-  :field id:
-    The variant ID.
-  :type id: string
-  :field variantSetId:
-    The ID of the `VariantSet` this variant belongs to. This transitively defines
-      the `ReferenceSet` against which the `Variant` is to be interpreted.
-  :type variantSetId: string
-  :field names:
-    Names for the variant, for example a RefSNP ID.
-  :type names: array<string>
-  :field created:
-    The date this variant was created in milliseconds from the epoch.
-  :type created: null|long
-  :field updated:
-    The time at which this variant was last updated in
-      milliseconds from the epoch.
-  :type updated: null|long
-  :field referenceName:
-    The reference on which this variant occurs.
-      (e.g. `chr20` or `X`)
-  :type referenceName: string
-  :field start:
-    The start position at which this variant occurs (0-based).
-      This corresponds to the first base of the string of reference bases.
-      Genomic positions are non-negative integers less than reference length.
-      Variants spanning the join of circular genomes are represented as
-      two variants one on each side of the join (position 0).
-  :type start: long
-  :field end:
-    The end position (exclusive), resulting in [start, end) closed-open interval.
-      This is typically calculated by `start + referenceBases.length`.
-  :type end: long
-  :field referenceBases:
-    The reference bases for this variant. They start at the given start position.
-  :type referenceBases: string
-  :field alternateBases:
-    The bases that appear instead of the reference bases. Multiple alternate
-      alleles are possible.
-  :type alternateBases: array<string>
-  :field info:
-    A map of additional variant information.
-  :type info: map<array<string>>
-  :field calls:
-    The variant calls for this particular variant. Each one represents the
-      determination of genotype with respect to this variant. `Call`s in this array
-      are implicitly associated with this `Variant`.
-  :type calls: array<Call>
-
-  A `Variant` represents a change in DNA sequence relative to some reference.
-  For example, a variant could represent a SNP or an insertion.
-  Variants belong to a `VariantSet`.
-  This is equivalent to a row in VCF.
-
-.. avro:record:: AnalysisResult
-
-  :field analysisId:
-    The ID of the analysis record for this result
-  :type analysisId: string
-  :field result:
-    The text-based result for this analysis
-  :type result: null|string
-  :field score:
-    The numeric score for this analysis
-  :type score: null|int
-
-  An AnalysisResult record holds the output of a prediction package such
-  as SIFT on a specific allele.
-
-.. avro:record:: AlleleLocation
-
-  :field start:
-    Relative start position of the allele in this coordinate system
-  :type start: int
-  :field end:
-    Relative end position of the allele in this coordinate system
-  :type end: null|int
-  :field referenceSequence:
-    Reference sequence in feature (this should be the codon at CDS level)
-  :type referenceSequence: null|string
-  :field alternateSequence:
-    Alternate sequence in feature (this should be the codon at CDS level)
-  :type alternateSequence: null|string
-
-  An allele location record holds the location of an allele relative to a
-  non-genomic coordinate system such as a CDS or protein and holds the
-  reference and alternate sequence where appropriate
-
-.. avro:record:: VariantAnnotationSet
-
-  :field id:
-    The ID of the variant annotation set record
-  :type id: string
-  :field variantSetId:
-    The ID of the variant set to which this annotation set belongs
-  :type variantSetId: string
-  :field name:
-    The variant annotation set name.
-  :type name: null|string
-  :field analysis:
-    Analysis details. It is essential to supply versions for all software and
-      reference data used.
-  :type analysis: Analysis
-
-  A VariantAnnotationSet record groups VariantAnnotation records. It is derived
-  from a VariantSet and holds information describing the software and reference
-  data used in the annotation.
-
-.. avro:record:: HGVSAnnotation
-
-  :field genomic:
-  :type genomic: null|string
-  :field transcript:
-  :type transcript: null|string
-  :field protein:
-  :type protein: null|string
-
-  A HGVSAnnotation record holds Human Genome Variation Society descriptions
-  of the sequence change with respect to genomic, transcript and protein
-  sequences. See: http://www.hgvs.org/mutnomen/recs.html.
-  Descriptions should be provided at genomic level. Descriptions at transcript
-  level should be provided when the allele lies within a transcript. Descriptions
-  at protein level should be provided when the allele lies within the translated
-  sequence or stop codon.
-
-.. avro:record:: TranscriptEffect
-
-  :field id:
-    The ID of the transcript effect record
-  :type id: string
-  :field featureId:
-    The id of the transcript feature the annotation is relative to
-  :type featureId: string
-  :field alternateBases:
-    Alternate allele - a variant may have more than one alternate allele,
-      each of which will have distinct annotation.
-  :type alternateBases: null|string
-  :field effects:
-    Effect of variant on this feature
-  :type effects: array<OntologyTerm>
-  :field hgvsAnnotation:
-    Human Genome Variation Society variant descriptions
-  :type hgvsAnnotation: HGVSAnnotation
-  :field cDNALocation:
-    Change relative to cDNA
-  :type cDNALocation: null|AlleleLocation
-  :field CDSLocation:
-  :type CDSLocation: null|AlleleLocation
-  :field proteinLocation:
-    Change relative to protein
-  :type proteinLocation: null|AlleleLocation
-  :field analysisResults:
-    Output from prediction packages such as SIFT
-  :type analysisResults: array<AnalysisResult>
-
-  A transcript effect record is a set of information describing the
-  effect of an allele on a transcript
-
-.. avro:record:: VariantAnnotation
-
-  :field id:
-    The ID of this VariantAnnotation.
-  :type id: string
-  :field variantId:
-    The variant ID.
-  :type variantId: string
-  :field variantAnnotationSetId:
-    The ID of the variant annotation set this record belongs to.
-  :type variantAnnotationSetId: string
-  :field createDateTime:
-    The :ref:`ISO 8601 <metadata_date_time>` time at which this record was created.
-  :type createDateTime: null|string
-  :field transcriptEffects:
-    The transcript effect annotation for the alleles of this variant. Each one
-      represents the effect of a single allele on a single transcript.
-  :type transcriptEffects: array<TranscriptEffect>
-  :field info:
-    Additional annotation data in key-value pairs.
-  :type info: map<array<string>>
-
-  A `VariantAnnotation` record represents the result of comparing a variant
-  to a set of reference data.
-
diff --git a/doc/source/schemas/common.rst b/doc/source/schemas/common.rst
deleted file mode 100644
index 99605783..00000000
--- a/doc/source/schemas/common.rst
+++ /dev/null
@@ -1,107 +0,0 @@
-Common
-******
-
-This file defines common types used in other parts of the schema.
-There are no directly associated methods.
-
-.. avro:enum:: Strand
-
-  :symbols: NEG_STRAND|POS_STRAND
-  Indicates the DNA strand associate for some data item.
-  * `NEG_STRAND`: The negative (-) strand.
-  * `POS_STRAND`:  The postive (+) strand.
-
-.. avro:record:: Position
-
-  :field referenceName:
-    The name of the `Reference` on which the `Position` is located.
-  :type referenceName: string
-  :field position:
-    The 0-based offset from the start of the forward strand for that `Reference`.
-      Genomic positions are non-negative integers less than `Reference` length.
-  :type position: long
-  :field strand:
-    Strand the position is associated with.
-  :type strand: Strand
-
-  A `Position` is an unoriented base in some `Reference`. A `Position` is
-  represented by a `Reference` name, and a base number on that `Reference`
-  (0-based).
-
-.. avro:record:: ExternalIdentifier
-
-  :field database:
-    The source of the identifier.
-      (e.g. `Ensembl`)
-  :type database: string
-  :field identifier:
-    The ID defined by the external database.
-      (e.g. `ENST00000000000`)
-  :type identifier: string
-  :field version:
-    The version of the object or the database
-      (e.g. `78`)
-  :type version: string
-
-  Identifier from a public database
-
-.. avro:enum:: CigarOperation
-
-  :symbols: ALIGNMENT_MATCH|INSERT|DELETE|SKIP|CLIP_SOFT|CLIP_HARD|PAD|SEQUENCE_MATCH|SEQUENCE_MISMATCH
-  An enum for the different types of CIGAR alignment operations that exist.
-  Used wherever CIGAR alignments are used. The different enumerated values
-  have the following usage:
-  
-  * `ALIGNMENT_MATCH`: An alignment match indicates that a sequence can be
-    aligned to the reference without evidence of an INDEL. Unlike the
-    `SEQUENCE_MATCH` and `SEQUENCE_MISMATCH` operators, the `ALIGNMENT_MATCH`
-    operator does not indicate whether the reference and read sequences are an
-    exact match. This operator is equivalent to SAM's `M`.
-  * `INSERT`: The insert operator indicates that the read contains evidence of
-    bases being inserted into the reference. This operator is equivalent to
-    SAM's `I`.
-  * `DELETE`: The delete operator indicates that the read contains evidence of
-    bases being deleted from the reference. This operator is equivalent to
-    SAM's `D`.
-  * `SKIP`: The skip operator indicates that this read skips a long segment of
-    the reference, but the bases have not been deleted. This operator is
-    commonly used when working with RNA-seq data, where reads may skip long
-    segments of the reference between exons. This operator is equivalent to
-    SAM's 'N'.
-  * `CLIP_SOFT`: The soft clip operator indicates that bases at the start/end
-    of a read have not been considered during alignment. This may occur if the
-    majority of a read maps, except for low quality bases at the start/end of
-    a read. This operator is equivalent to SAM's 'S'. Bases that are soft clipped
-    will still be stored in the read.
-  * `CLIP_HARD`: The hard clip operator indicates that bases at the start/end of
-    a read have been omitted from this alignment. This may occur if this linear
-    alignment is part of a chimeric alignment, or if the read has been trimmed
-    (e.g., during error correction, or to trim poly-A tails for RNA-seq). This
-    operator is equivalent to SAM's 'H'.
-  * `PAD`: The pad operator indicates that there is padding in an alignment.
-    This operator is equivalent to SAM's 'P'.
-  * `SEQUENCE_MATCH`: This operator indicates that this portion of the aligned
-    sequence exactly matches the reference (e.g., all bases are equal to the
-    reference bases). This operator is equivalent to SAM's '='.
-  * `SEQUENCE_MISMATCH`: This operator indicates that this portion of the
-    aligned sequence is an alignment match to the reference, but a sequence
-    mismatch (e.g., the bases are not equal to the reference). This can
-    indicate a SNP or a read error. This operator is equivalent to SAM's 'X'.
-
-.. avro:record:: CigarUnit
-
-  :field operation:
-    The operation type.
-  :type operation: CigarOperation
-  :field operationLength:
-    The number of bases that the operation runs for.
-  :type operationLength: long
-  :field referenceSequence:
-    `referenceSequence` is only used at mismatches (`SEQUENCE_MISMATCH`)
-      and deletions (`DELETE`). Filling this field replaces the MD tag.
-      If the relevant information is not available, leave this field as `null`.
-  :type referenceSequence: null|string
-
-  A structure for an instance of a CIGAR operation.
-  `FIXME: This belongs under Reads (only readAlignment refers to this)`
-
diff --git a/doc/source/schemas/index.rst b/doc/source/schemas/index.rst
index c343b0b7..f1640056 100644
--- a/doc/source/schemas/index.rst
+++ b/doc/source/schemas/index.rst
@@ -4,17 +4,16 @@ Schemas
 !!!!!!!
 
 .. toctree::
-   common
-   metadata
-   metadatamethods
-   methods
-   readmethods
-   reads
-   referencemethods
-   references
-   variantmethods
-   variants
-   alleleAnnotationmethods
-   alleleAnnotations
-   sequenceAnnotations
-   sequenceAnnotationmethods
+   common.proto.rst
+   metadata.proto.rst
+   metadata_service.proto.rst
+   reads.proto.rst
+   read_service.proto.rst
+   references.proto.rst
+   reference_service.proto.rst
+   variants.proto.rst
+   variant_service.proto.rst
+   allele_annotations.proto.rst
+   allele_annotation_service.proto.rst
+   sequence_annotations.proto.rst
+   sequence_annotation_service.proto.rst
diff --git a/doc/source/schemas/metadata.rst b/doc/source/schemas/metadata.rst
deleted file mode 100644
index 7694731f..00000000
--- a/doc/source/schemas/metadata.rst
+++ /dev/null
@@ -1,243 +0,0 @@
-Metadata
-********
-
-This protocol defines metadata used in the other GA4GH protocols.
-
-.. avro:enum:: Strand
-
-  :symbols: NEG_STRAND|POS_STRAND
-  Indicates the DNA strand associate for some data item.
-  * `NEG_STRAND`: The negative (-) strand.
-  * `POS_STRAND`:  The postive (+) strand.
-
-.. avro:record:: Position
-
-  :field referenceName:
-    The name of the `Reference` on which the `Position` is located.
-  :type referenceName: string
-  :field position:
-    The 0-based offset from the start of the forward strand for that `Reference`.
-      Genomic positions are non-negative integers less than `Reference` length.
-  :type position: long
-  :field strand:
-    Strand the position is associated with.
-  :type strand: Strand
-
-  A `Position` is an unoriented base in some `Reference`. A `Position` is
-  represented by a `Reference` name, and a base number on that `Reference`
-  (0-based).
-
-.. avro:record:: ExternalIdentifier
-
-  :field database:
-    The source of the identifier.
-      (e.g. `Ensembl`)
-  :type database: string
-  :field identifier:
-    The ID defined by the external database.
-      (e.g. `ENST00000000000`)
-  :type identifier: string
-  :field version:
-    The version of the object or the database
-      (e.g. `78`)
-  :type version: string
-
-  Identifier from a public database
-
-.. avro:enum:: CigarOperation
-
-  :symbols: ALIGNMENT_MATCH|INSERT|DELETE|SKIP|CLIP_SOFT|CLIP_HARD|PAD|SEQUENCE_MATCH|SEQUENCE_MISMATCH
-  An enum for the different types of CIGAR alignment operations that exist.
-  Used wherever CIGAR alignments are used. The different enumerated values
-  have the following usage:
-  
-  * `ALIGNMENT_MATCH`: An alignment match indicates that a sequence can be
-    aligned to the reference without evidence of an INDEL. Unlike the
-    `SEQUENCE_MATCH` and `SEQUENCE_MISMATCH` operators, the `ALIGNMENT_MATCH`
-    operator does not indicate whether the reference and read sequences are an
-    exact match. This operator is equivalent to SAM's `M`.
-  * `INSERT`: The insert operator indicates that the read contains evidence of
-    bases being inserted into the reference. This operator is equivalent to
-    SAM's `I`.
-  * `DELETE`: The delete operator indicates that the read contains evidence of
-    bases being deleted from the reference. This operator is equivalent to
-    SAM's `D`.
-  * `SKIP`: The skip operator indicates that this read skips a long segment of
-    the reference, but the bases have not been deleted. This operator is
-    commonly used when working with RNA-seq data, where reads may skip long
-    segments of the reference between exons. This operator is equivalent to
-    SAM's 'N'.
-  * `CLIP_SOFT`: The soft clip operator indicates that bases at the start/end
-    of a read have not been considered during alignment. This may occur if the
-    majority of a read maps, except for low quality bases at the start/end of
-    a read. This operator is equivalent to SAM's 'S'. Bases that are soft clipped
-    will still be stored in the read.
-  * `CLIP_HARD`: The hard clip operator indicates that bases at the start/end of
-    a read have been omitted from this alignment. This may occur if this linear
-    alignment is part of a chimeric alignment, or if the read has been trimmed
-    (e.g., during error correction, or to trim poly-A tails for RNA-seq). This
-    operator is equivalent to SAM's 'H'.
-  * `PAD`: The pad operator indicates that there is padding in an alignment.
-    This operator is equivalent to SAM's 'P'.
-  * `SEQUENCE_MATCH`: This operator indicates that this portion of the aligned
-    sequence exactly matches the reference (e.g., all bases are equal to the
-    reference bases). This operator is equivalent to SAM's '='.
-  * `SEQUENCE_MISMATCH`: This operator indicates that this portion of the
-    aligned sequence is an alignment match to the reference, but a sequence
-    mismatch (e.g., the bases are not equal to the reference). This can
-    indicate a SNP or a read error. This operator is equivalent to SAM's 'X'.
-
-.. avro:record:: CigarUnit
-
-  :field operation:
-    The operation type.
-  :type operation: CigarOperation
-  :field operationLength:
-    The number of bases that the operation runs for.
-  :type operationLength: long
-  :field referenceSequence:
-    `referenceSequence` is only used at mismatches (`SEQUENCE_MISMATCH`)
-      and deletions (`DELETE`). Filling this field replaces the MD tag.
-      If the relevant information is not available, leave this field as `null`.
-  :type referenceSequence: null|string
-
-  A structure for an instance of a CIGAR operation.
-  `FIXME: This belongs under Reads (only readAlignment refers to this)`
-
-.. avro:record:: OntologyTerm
-
-  :field id:
-    Ontology source identifier - the identifier, a CURIE (preferred) or
-      PURL for an ontology source e.g. http://purl.obolibrary.org/obo/hp.obo
-      It differs from the standard GA4GH schema's :ref:`id <apidesign_object_ids>`
-      in that it is a URI pointing to an information resource outside of the scope
-      of the schema or its resource implementation.
-  :type id: string
-  :field term:
-    Ontology term - the representation the id is pointing to.
-  :type term: null|string
-  :field sourceName:
-    Ontology source name - the name of ontology from which the term is obtained
-      e.g. 'Human Phenotype Ontology'
-  :type sourceName: null|string
-  :field sourceVersion:
-    Ontology source version - the version of the ontology from which the
-      OntologyTerm is obtained; e.g. 2.6.1.
-      There is no standard for ontology versioning and some frequently
-      released ontologies may use a datestamp, or build number.
-  :type sourceVersion: null|string
-
-  An ontology term describing an attribute. (e.g. the phenotype attribute
-    'polydactyly' from HPO)
-
-.. avro:record:: Experiment
-
-  :field id:
-    The experiment UUID. This is globally unique.
-  :type id: string
-  :field name:
-    The name of the experiment.
-  :type name: null|string
-  :field description:
-    A description of the experiment.
-  :type description: null|string
-  :field createDateTime:
-    The time at which this record was created. 
-      Format: :ref:`ISO 8601 <metadata_date_time>`
-  :type createDateTime: string
-  :field updateDateTime:
-    The time at which this record was last updated.
-      Format: :ref:`ISO 8601 <metadata_date_time>`
-  :type updateDateTime: string
-  :field runTime:
-    The time at which this experiment was performed.
-      Granularity here is variable (e.g. date only).
-      Format: :ref:`ISO 8601 <metadata_date_time>`
-  :type runTime: null|string
-  :field molecule:
-    The molecule examined in this experiment. (e.g. genomics DNA, total RNA)
-  :type molecule: null|string
-  :field strategy:
-    The experiment technique or strategy applied to the sample.
-      (e.g. whole genome sequencing, RNA-seq, RIP-seq)
-  :type strategy: null|string
-  :field selection:
-    The method used to enrich the target. (e.g. immunoprecipitation, size
-      fractionation, MNase digestion)
-  :type selection: null|string
-  :field library:
-    The name of the library used as part of this experiment.
-  :type library: null|string
-  :field libraryLayout:
-    The configuration of sequenced reads. (e.g. Single or Paired)
-  :type libraryLayout: null|string
-  :field instrumentModel:
-    The instrument model used as part of this experiment.
-        This maps to sequencing technology in BAM.
-  :type instrumentModel: null|string
-  :field instrumentDataFile:
-    The data file generated by the instrument.
-      TODO: This isn't actually a file is it?
-      Should this be `instrumentData` instead?
-  :type instrumentDataFile: null|string
-  :field sequencingCenter:
-    The sequencing center used as part of this experiment.
-  :type sequencingCenter: null|string
-  :field platformUnit:
-    The platform unit used as part of this experiment. This is a flowcell-barcode
-      or slide unique identifier.
-  :type platformUnit: null|string
-  :field info:
-    A map of additional experiment information.
-  :type info: map<array<string>>
-
-  An experimental preparation of a sample.
-
-.. avro:record:: Dataset
-
-  :field id:
-    The dataset's id, locally unique to the server instance.
-  :type id: string
-  :field name:
-    The name of the dataset.
-  :type name: null|string
-  :field description:
-    Additional, human-readable information on the dataset.
-  :type description: null|string
-
-  A Dataset is a collection of related data of multiple types.
-  Data providers decide how to group data into datasets.
-  See [Metadata API](../api/metadata.html) for a more detailed discussion.
-
-.. avro:record:: Analysis
-
-  :field id:
-    Formats of id | name | description | accessions are described in the
-      documentation on general attributes and formats.
-  :type id: string
-  :field name:
-  :type name: null|string
-  :field description:
-  :type description: null|string
-  :field createDateTime:
-    The time at which this record was created. 
-      Format: :ref:`ISO 8601 <metadata_date_time>`
-  :type createDateTime: null|string
-  :field updateDateTime:
-    The time at which this record was last updated.
-      Format: :ref:`ISO 8601 <metadata_date_time>`
-  :type updateDateTime: string
-  :field type:
-    The type of analysis.
-  :type type: null|string
-  :field software:
-    The software run to generate this analysis.
-  :type software: array<string>
-  :field info:
-    A map of additional analysis information.
-  :type info: map<array<string>>
-
-  An analysis contains an interpretation of one or several experiments.
-  (e.g. SNVs, copy number variations, methylation status) together with
-  information about the methodology used.
-
diff --git a/doc/source/schemas/metadatamethods.rst b/doc/source/schemas/metadatamethods.rst
deleted file mode 100644
index 703c69cb..00000000
--- a/doc/source/schemas/metadatamethods.rst
+++ /dev/null
@@ -1,237 +0,0 @@
-MetadataMethods
-***************
-
- .. function:: searchDatasets(request)
-
-  :param request: SearchDatasetsRequest: This request maps to the body of `POST /datasets/search` as JSON.
-  :return type: SearchDatasetsResponse
-  :throws: GAException
-
-Gets a list of datasets accessible through the API.
-
-TODO: Reads and variants both want to have datasets. Are they the same object?
-
-`POST /datasets/search` must accept a JSON version of
-`SearchDatasetsRequest` as the post body and will return a JSON version
-of `SearchDatasetsResponse`.
-
- .. function:: getDataset(id)
-
-  :param id: string: The ID of the `Dataset`.
-  :return type: org.ga4gh.models.Dataset
-  :throws: GAException
-
-Gets a `Dataset` by ID.
-`GET /datasets/{id}` will return a JSON version of `Dataset`.
-
-.. avro:enum:: Strand
-
-  :symbols: NEG_STRAND|POS_STRAND
-  Indicates the DNA strand associate for some data item.
-  * `NEG_STRAND`: The negative (-) strand.
-  * `POS_STRAND`:  The postive (+) strand.
-
-.. avro:record:: Position
-
-  :field referenceName:
-    The name of the `Reference` on which the `Position` is located.
-  :type referenceName: string
-  :field position:
-    The 0-based offset from the start of the forward strand for that `Reference`.
-      Genomic positions are non-negative integers less than `Reference` length.
-  :type position: long
-  :field strand:
-    Strand the position is associated with.
-  :type strand: Strand
-
-  A `Position` is an unoriented base in some `Reference`. A `Position` is
-  represented by a `Reference` name, and a base number on that `Reference`
-  (0-based).
-
-.. avro:record:: ExternalIdentifier
-
-  :field database:
-    The source of the identifier.
-      (e.g. `Ensembl`)
-  :type database: string
-  :field identifier:
-    The ID defined by the external database.
-      (e.g. `ENST00000000000`)
-  :type identifier: string
-  :field version:
-    The version of the object or the database
-      (e.g. `78`)
-  :type version: string
-
-  Identifier from a public database
-
-.. avro:enum:: CigarOperation
-
-  :symbols: ALIGNMENT_MATCH|INSERT|DELETE|SKIP|CLIP_SOFT|CLIP_HARD|PAD|SEQUENCE_MATCH|SEQUENCE_MISMATCH
-  An enum for the different types of CIGAR alignment operations that exist.
-  Used wherever CIGAR alignments are used. The different enumerated values
-  have the following usage:
-  
-  * `ALIGNMENT_MATCH`: An alignment match indicates that a sequence can be
-    aligned to the reference without evidence of an INDEL. Unlike the
-    `SEQUENCE_MATCH` and `SEQUENCE_MISMATCH` operators, the `ALIGNMENT_MATCH`
-    operator does not indicate whether the reference and read sequences are an
-    exact match. This operator is equivalent to SAM's `M`.
-  * `INSERT`: The insert operator indicates that the read contains evidence of
-    bases being inserted into the reference. This operator is equivalent to
-    SAM's `I`.
-  * `DELETE`: The delete operator indicates that the read contains evidence of
-    bases being deleted from the reference. This operator is equivalent to
-    SAM's `D`.
-  * `SKIP`: The skip operator indicates that this read skips a long segment of
-    the reference, but the bases have not been deleted. This operator is
-    commonly used when working with RNA-seq data, where reads may skip long
-    segments of the reference between exons. This operator is equivalent to
-    SAM's 'N'.
-  * `CLIP_SOFT`: The soft clip operator indicates that bases at the start/end
-    of a read have not been considered during alignment. This may occur if the
-    majority of a read maps, except for low quality bases at the start/end of
-    a read. This operator is equivalent to SAM's 'S'. Bases that are soft clipped
-    will still be stored in the read.
-  * `CLIP_HARD`: The hard clip operator indicates that bases at the start/end of
-    a read have been omitted from this alignment. This may occur if this linear
-    alignment is part of a chimeric alignment, or if the read has been trimmed
-    (e.g., during error correction, or to trim poly-A tails for RNA-seq). This
-    operator is equivalent to SAM's 'H'.
-  * `PAD`: The pad operator indicates that there is padding in an alignment.
-    This operator is equivalent to SAM's 'P'.
-  * `SEQUENCE_MATCH`: This operator indicates that this portion of the aligned
-    sequence exactly matches the reference (e.g., all bases are equal to the
-    reference bases). This operator is equivalent to SAM's '='.
-  * `SEQUENCE_MISMATCH`: This operator indicates that this portion of the
-    aligned sequence is an alignment match to the reference, but a sequence
-    mismatch (e.g., the bases are not equal to the reference). This can
-    indicate a SNP or a read error. This operator is equivalent to SAM's 'X'.
-
-.. avro:record:: CigarUnit
-
-  :field operation:
-    The operation type.
-  :type operation: CigarOperation
-  :field operationLength:
-    The number of bases that the operation runs for.
-  :type operationLength: long
-  :field referenceSequence:
-    `referenceSequence` is only used at mismatches (`SEQUENCE_MISMATCH`)
-      and deletions (`DELETE`). Filling this field replaces the MD tag.
-      If the relevant information is not available, leave this field as `null`.
-  :type referenceSequence: null|string
-
-  A structure for an instance of a CIGAR operation.
-  `FIXME: This belongs under Reads (only readAlignment refers to this)`
-
-.. avro:record:: Experiment
-
-  :field id:
-    The experiment UUID. This is globally unique.
-  :type id: string
-  :field name:
-    The name of the experiment.
-  :type name: null|string
-  :field description:
-    A description of the experiment.
-  :type description: null|string
-  :field recordCreateTime:
-    The time at which this record was created. 
-      Format: ISO 8601, YYYY-MM-DDTHH:MM:SS.SSS (e.g. 2015-02-10T00:03:42.123Z)
-  :type recordCreateTime: string
-  :field recordUpdateTime:
-    The time at which this record was last updated.
-      Format: ISO 8601, YYYY-MM-DDTHH:MM:SS.SSS (e.g. 2015-02-10T00:03:42.123Z)
-  :type recordUpdateTime: string
-  :field runTime:
-    The time at which this experiment was performed.
-      Granularity here is variable (e.g. date only).
-      Format: ISO 8601, YYYY-MM-DDTHH:MM:SS (e.g. 2015-02-10T00:03:42)
-  :type runTime: null|string
-  :field molecule:
-    The molecule examined in this experiment. (e.g. genomics DNA, total RNA)
-  :type molecule: null|string
-  :field strategy:
-    The experiment technique or strategy applied to the sample.
-      (e.g. whole genome sequencing, RNA-seq, RIP-seq)
-  :type strategy: null|string
-  :field selection:
-    The method used to enrich the target. (e.g. immunoprecipitation, size
-      fractionation, MNase digestion)
-  :type selection: null|string
-  :field library:
-    The name of the library used as part of this experiment.
-  :type library: null|string
-  :field libraryLayout:
-    The configuration of sequenced reads. (e.g. Single or Paired)
-  :type libraryLayout: null|string
-  :field instrumentModel:
-    The instrument model used as part of this experiment.
-        This maps to sequencing technology in BAM.
-  :type instrumentModel: null|string
-  :field instrumentDataFile:
-    The data file generated by the instrument.
-      TODO: This isn't actually a file is it?
-      Should this be `instrumentData` instead?
-  :type instrumentDataFile: null|string
-  :field sequencingCenter:
-    The sequencing center used as part of this experiment.
-  :type sequencingCenter: null|string
-  :field platformUnit:
-    The platform unit used as part of this experiment. This is a flowcell-barcode
-      or slide unique identifier.
-  :type platformUnit: null|string
-  :field info:
-    A map of additional experiment information.
-  :type info: map<array<string>>
-
-  An experimental preparation of a sample.
-
-.. avro:record:: Dataset
-
-  :field id:
-    The dataset's id, locally unique to the server instance.
-  :type id: string
-  :field name:
-    The name of the dataset.
-  :type name: null|string
-  :field description:
-    Additional, human-readable information on the dataset.
-  :type description: null|string
-
-  A Dataset is a collection of related data of multiple types.
-  Data providers decide how to group data into datasets.
-  See [Metadata API](../api/metadata.html) for a more detailed discussion.
-
-.. avro:error:: GAException
-
-  A general exception type.
-
-.. avro:record:: SearchDatasetsRequest
-
-  :field pageSize:
-    Specifies the maximum number of results to return in a single page.
-      If unspecified, a system default will be used.
-  :type pageSize: null|int
-  :field pageToken:
-    The continuation token, which is used to page through large result sets.
-      To get the next page of results, set this parameter to the value of
-      `nextPageToken` from the previous response.
-  :type pageToken: null|string
-
-  This request maps to the body of `POST /datasets/search` as JSON.
-
-.. avro:record:: SearchDatasetsResponse
-
-  :field datasets:
-    The list of datasets.
-  :type datasets: array<org.ga4gh.models.Dataset>
-  :field nextPageToken:
-    The continuation token, which is used to page through large result sets.
-      Provide this value in a subsequent request to return the next page of
-      results. This field will be empty if there aren't any additional results.
-  :type nextPageToken: null|string
-
-  This is the response from `POST /datasets/search` expressed as JSON.
-
diff --git a/doc/source/schemas/methods.rst b/doc/source/schemas/methods.rst
deleted file mode 100644
index e02a4cb3..00000000
--- a/doc/source/schemas/methods.rst
+++ /dev/null
@@ -1,7 +0,0 @@
-RPC
-***
-
-.. avro:error:: GAException
-
-  A general exception type.
-
diff --git a/doc/source/schemas/readmethods.rst b/doc/source/schemas/readmethods.rst
deleted file mode 100644
index 431504c8..00000000
--- a/doc/source/schemas/readmethods.rst
+++ /dev/null
@@ -1,612 +0,0 @@
-ReadMethods
-***********
-
- .. function:: searchReads(request)
-
-  :param request: SearchReadsRequest: This request maps to the body of `POST /reads/search` as JSON.
-  :return type: SearchReadsResponse
-  :throws: GAException
-
-Gets a list of `ReadAlignment`s for one or more `ReadGroup`s.
-
-`searchReads` operates over a genomic coordinate space of reference sequence
-and position defined by the `Reference`s to which the requested `ReadGroup`s are
-aligned.
-
-If a target positional range is specified, search returns all reads whose
-alignment to the reference genome *overlap* the range. A query which specifies
-only read group IDs yields all reads in those read groups, including unmapped
-reads.
-
-All reads returned (including reads on subsequent pages) are ordered by genomic
-coordinate (by reference sequence, then position). Reads with equivalent genomic
-coordinates are returned in an unspecified order. This order must be consistent
-for a given repository, such that two queries for the same content (regardless
-of page size) yield reads in the same order across their respective streams of
-paginated responses.
-
-`POST /reads/search` must accept a JSON version of `SearchReadsRequest` as
-the post body and will return a JSON version of `SearchReadsResponse`.
-
- .. function:: searchReadGroupSets(request)
-
-  :param request: SearchReadGroupSetsRequest: This request maps to the body of `POST /readgroupsets/search` as JSON.
-  :return type: SearchReadGroupSetsResponse
-  :throws: GAException
-
-Gets a list of `ReadGroupSet` matching the search criteria.
-
-`POST /readgroupsets/search` must accept a JSON version of
-`SearchReadGroupSetsRequest` as the post body and will return a JSON
-version of `SearchReadGroupSetsResponse`.
-
- .. function:: getReadGroupSet(id)
-
-  :param id: string: The ID of the `ReadGroupSet`.
-  :return type: org.ga4gh.models.ReadGroupSet
-  :throws: GAException
-
-Gets a `org.ga4gh.models.ReadGroupSet` by ID.
-`GET /readgroupsets/{id}` will return a JSON version of `ReadGroupSet`.
-
- .. function:: getReadGroup(id)
-
-  :param id: string: The ID of the `ReadGroup`.
-  :return type: org.ga4gh.models.ReadGroup
-  :throws: GAException
-
-Gets a `org.ga4gh.models.ReadGroup` by ID.
-`GET /readgroups/{id}` will return a JSON version of `ReadGroup`.
-
-.. avro:enum:: Strand
-
-  :symbols: NEG_STRAND|POS_STRAND
-  Indicates the DNA strand associate for some data item.
-  * `NEG_STRAND`: The negative (-) strand.
-  * `POS_STRAND`:  The postive (+) strand.
-
-.. avro:record:: Position
-
-  :field referenceName:
-    The name of the `Reference` on which the `Position` is located.
-  :type referenceName: string
-  :field position:
-    The 0-based offset from the start of the forward strand for that `Reference`.
-      Genomic positions are non-negative integers less than `Reference` length.
-  :type position: long
-  :field strand:
-    Strand the position is associated with.
-  :type strand: Strand
-
-  A `Position` is an unoriented base in some `Reference`. A `Position` is
-  represented by a `Reference` name, and a base number on that `Reference`
-  (0-based).
-
-.. avro:record:: ExternalIdentifier
-
-  :field database:
-    The source of the identifier.
-      (e.g. `Ensembl`)
-  :type database: string
-  :field identifier:
-    The ID defined by the external database.
-      (e.g. `ENST00000000000`)
-  :type identifier: string
-  :field version:
-    The version of the object or the database
-      (e.g. `78`)
-  :type version: string
-
-  Identifier from a public database
-
-.. avro:enum:: CigarOperation
-
-  :symbols: ALIGNMENT_MATCH|INSERT|DELETE|SKIP|CLIP_SOFT|CLIP_HARD|PAD|SEQUENCE_MATCH|SEQUENCE_MISMATCH
-  An enum for the different types of CIGAR alignment operations that exist.
-  Used wherever CIGAR alignments are used. The different enumerated values
-  have the following usage:
-  
-  * `ALIGNMENT_MATCH`: An alignment match indicates that a sequence can be
-    aligned to the reference without evidence of an INDEL. Unlike the
-    `SEQUENCE_MATCH` and `SEQUENCE_MISMATCH` operators, the `ALIGNMENT_MATCH`
-    operator does not indicate whether the reference and read sequences are an
-    exact match. This operator is equivalent to SAM's `M`.
-  * `INSERT`: The insert operator indicates that the read contains evidence of
-    bases being inserted into the reference. This operator is equivalent to
-    SAM's `I`.
-  * `DELETE`: The delete operator indicates that the read contains evidence of
-    bases being deleted from the reference. This operator is equivalent to
-    SAM's `D`.
-  * `SKIP`: The skip operator indicates that this read skips a long segment of
-    the reference, but the bases have not been deleted. This operator is
-    commonly used when working with RNA-seq data, where reads may skip long
-    segments of the reference between exons. This operator is equivalent to
-    SAM's 'N'.
-  * `CLIP_SOFT`: The soft clip operator indicates that bases at the start/end
-    of a read have not been considered during alignment. This may occur if the
-    majority of a read maps, except for low quality bases at the start/end of
-    a read. This operator is equivalent to SAM's 'S'. Bases that are soft clipped
-    will still be stored in the read.
-  * `CLIP_HARD`: The hard clip operator indicates that bases at the start/end of
-    a read have been omitted from this alignment. This may occur if this linear
-    alignment is part of a chimeric alignment, or if the read has been trimmed
-    (e.g., during error correction, or to trim poly-A tails for RNA-seq). This
-    operator is equivalent to SAM's 'H'.
-  * `PAD`: The pad operator indicates that there is padding in an alignment.
-    This operator is equivalent to SAM's 'P'.
-  * `SEQUENCE_MATCH`: This operator indicates that this portion of the aligned
-    sequence exactly matches the reference (e.g., all bases are equal to the
-    reference bases). This operator is equivalent to SAM's '='.
-  * `SEQUENCE_MISMATCH`: This operator indicates that this portion of the
-    aligned sequence is an alignment match to the reference, but a sequence
-    mismatch (e.g., the bases are not equal to the reference). This can
-    indicate a SNP or a read error. This operator is equivalent to SAM's 'X'.
-
-.. avro:record:: CigarUnit
-
-  :field operation:
-    The operation type.
-  :type operation: CigarOperation
-  :field operationLength:
-    The number of bases that the operation runs for.
-  :type operationLength: long
-  :field referenceSequence:
-    `referenceSequence` is only used at mismatches (`SEQUENCE_MISMATCH`)
-      and deletions (`DELETE`). Filling this field replaces the MD tag.
-      If the relevant information is not available, leave this field as `null`.
-  :type referenceSequence: null|string
-
-  A structure for an instance of a CIGAR operation.
-  `FIXME: This belongs under Reads (only readAlignment refers to this)`
-
-.. avro:error:: GAException
-
-  A general exception type.
-
-.. avro:record:: OntologyTerm
-
-  :field id:
-    Ontology source identifier - the identifier, a CURIE (preferred) or
-      PURL for an ontology source e.g. http://purl.obolibrary.org/obo/hp.obo
-      It differs from the standard GA4GH schema's :ref:`id <apidesign_object_ids>`
-      in that it is a URI pointing to an information resource outside of the scope
-      of the schema or its resource implementation.
-  :type id: string
-  :field term:
-    Ontology term - the representation the id is pointing to.
-  :type term: null|string
-  :field sourceName:
-    Ontology source name - the name of ontology from which the term is obtained
-      e.g. 'Human Phenotype Ontology'
-  :type sourceName: null|string
-  :field sourceVersion:
-    Ontology source version - the version of the ontology from which the
-      OntologyTerm is obtained; e.g. 2.6.1.
-      There is no standard for ontology versioning and some frequently
-      released ontologies may use a datestamp, or build number.
-  :type sourceVersion: null|string
-
-  An ontology term describing an attribute. (e.g. the phenotype attribute
-    'polydactyly' from HPO)
-
-.. avro:record:: Experiment
-
-  :field id:
-    The experiment UUID. This is globally unique.
-  :type id: string
-  :field name:
-    The name of the experiment.
-  :type name: null|string
-  :field description:
-    A description of the experiment.
-  :type description: null|string
-  :field createDateTime:
-    The time at which this record was created. 
-      Format: :ref:`ISO 8601 <metadata_date_time>`
-  :type createDateTime: string
-  :field updateDateTime:
-    The time at which this record was last updated.
-      Format: :ref:`ISO 8601 <metadata_date_time>`
-  :type updateDateTime: string
-  :field runTime:
-    The time at which this experiment was performed.
-      Granularity here is variable (e.g. date only).
-      Format: :ref:`ISO 8601 <metadata_date_time>`
-  :type runTime: null|string
-  :field molecule:
-    The molecule examined in this experiment. (e.g. genomics DNA, total RNA)
-  :type molecule: null|string
-  :field strategy:
-    The experiment technique or strategy applied to the sample.
-      (e.g. whole genome sequencing, RNA-seq, RIP-seq)
-  :type strategy: null|string
-  :field selection:
-    The method used to enrich the target. (e.g. immunoprecipitation, size
-      fractionation, MNase digestion)
-  :type selection: null|string
-  :field library:
-    The name of the library used as part of this experiment.
-  :type library: null|string
-  :field libraryLayout:
-    The configuration of sequenced reads. (e.g. Single or Paired)
-  :type libraryLayout: null|string
-  :field instrumentModel:
-    The instrument model used as part of this experiment.
-        This maps to sequencing technology in BAM.
-  :type instrumentModel: null|string
-  :field instrumentDataFile:
-    The data file generated by the instrument.
-      TODO: This isn't actually a file is it?
-      Should this be `instrumentData` instead?
-  :type instrumentDataFile: null|string
-  :field sequencingCenter:
-    The sequencing center used as part of this experiment.
-  :type sequencingCenter: null|string
-  :field platformUnit:
-    The platform unit used as part of this experiment. This is a flowcell-barcode
-      or slide unique identifier.
-  :type platformUnit: null|string
-  :field info:
-    A map of additional experiment information.
-  :type info: map<array<string>>
-
-  An experimental preparation of a sample.
-
-.. avro:record:: Dataset
-
-  :field id:
-    The dataset's id, locally unique to the server instance.
-  :type id: string
-  :field name:
-    The name of the dataset.
-  :type name: null|string
-  :field description:
-    Additional, human-readable information on the dataset.
-  :type description: null|string
-
-  A Dataset is a collection of related data of multiple types.
-  Data providers decide how to group data into datasets.
-  See [Metadata API](../api/metadata.html) for a more detailed discussion.
-
-.. avro:record:: Analysis
-
-  :field id:
-    Formats of id | name | description | accessions are described in the
-      documentation on general attributes and formats.
-  :type id: string
-  :field name:
-  :type name: null|string
-  :field description:
-  :type description: null|string
-  :field createDateTime:
-    The time at which this record was created. 
-      Format: :ref:`ISO 8601 <metadata_date_time>`
-  :type createDateTime: null|string
-  :field updateDateTime:
-    The time at which this record was last updated.
-      Format: :ref:`ISO 8601 <metadata_date_time>`
-  :type updateDateTime: string
-  :field type:
-    The type of analysis.
-  :type type: null|string
-  :field software:
-    The software run to generate this analysis.
-  :type software: array<string>
-  :field info:
-    A map of additional analysis information.
-  :type info: map<array<string>>
-
-  An analysis contains an interpretation of one or several experiments.
-  (e.g. SNVs, copy number variations, methylation status) together with
-  information about the methodology used.
-
-.. avro:record:: Program
-
-  :field commandLine:
-    The command line used to run this program.
-  :type commandLine: null|string
-  :field id:
-    The user specified ID of the program.
-  :type id: null|string
-  :field name:
-    The name of the program.
-  :type name: null|string
-  :field prevProgramId:
-    The ID of the program run before this one.
-  :type prevProgramId: null|string
-  :field version:
-    The version of the program run.
-  :type version: null|string
-
-  Program can be used to track the provenance of how read data was generated.
-
-.. avro:record:: ReadStats
-
-  :field alignedReadCount:
-    The number of aligned reads.
-  :type alignedReadCount: null|long
-  :field unalignedReadCount:
-    The number of unaligned reads.
-  :type unalignedReadCount: null|long
-  :field baseCount:
-    The total number of bases.
-      This is equivalent to the sum of `alignedSequence.length` for all reads.
-  :type baseCount: null|long
-
-  ReadStats can be used to provide summary statistics about read data.
-
-.. avro:record:: ReadGroup
-
-  :field id:
-    The read group ID.
-  :type id: string
-  :field datasetId:
-    The ID of the dataset this read group belongs to.
-  :type datasetId: null|string
-  :field name:
-    The read group name.
-  :type name: null|string
-  :field description:
-    The read group description.
-  :type description: null|string
-  :field sampleId:
-    The sample this read group's data was generated from.
-      Note: the current API does not have a rigorous definition of sample. Therefore, this
-      field actually contains an arbitrary string, typically corresponding to the SM tag in a
-      BAM file.
-  :type sampleId: null|string
-  :field experiment:
-    The experiment used to generate this read group.
-  :type experiment: null|Experiment
-  :field predictedInsertSize:
-    The predicted insert size of this read group.
-  :type predictedInsertSize: null|int
-  :field created:
-    The time at which this read group was created in milliseconds from the epoch.
-  :type created: null|long
-  :field updated:
-    The time at which this read group was last updated in milliseconds
-      from the epoch.
-  :type updated: null|long
-  :field stats:
-    Statistical data on reads in this read group.
-  :type stats: null|ReadStats
-  :field programs:
-    The programs used to generate this read group.
-  :type programs: array<Program>
-  :field referenceSetId:
-    The ID of the reference set to which the reads in this read group are aligned.
-      Required if there are any read alignments.
-  :type referenceSetId: null|string
-  :field info:
-    A map of additional read group information.
-  :type info: map<array<string>>
-
-  A ReadGroup is a set of reads derived from one physical sequencing process.
-
-.. avro:record:: ReadGroupSet
-
-  :field id:
-    The read group set ID.
-  :type id: string
-  :field datasetId:
-    The ID of the dataset this read group set belongs to.
-  :type datasetId: null|string
-  :field name:
-    The read group set name.
-  :type name: null|string
-  :field stats:
-    Statistical data on reads in this read group set.
-  :type stats: null|ReadStats
-  :field readGroups:
-    The read groups in this set.
-  :type readGroups: array<ReadGroup>
-
-  A ReadGroupSet is a logical collection of ReadGroups. Typically one ReadGroupSet
-  represents all the reads from one experimental sample.
-
-.. avro:record:: LinearAlignment
-
-  :field position:
-    The position of this alignment.
-  :type position: Position
-  :field mappingQuality:
-    The mapping quality of this alignment, meaning the likelihood that the read
-      maps to this position.
-    
-      Specifically, this is -10 log10 Pr(mapping position is wrong), rounded to the
-      nearest integer.
-  :type mappingQuality: null|int
-  :field cigar:
-    Represents the local alignment of this sequence (alignment matches, indels, etc)
-      versus the reference.
-  :type cigar: array<CigarUnit>
-
-  A linear alignment describes the alignment of a read to a Reference, using a
-  position and CIGAR array.
-
-.. avro:record:: ReadAlignment
-
-  :field id:
-    The read alignment ID. This ID is unique within the read group this
-      alignment belongs to.
-    
-      For performance reasons, this field may be omitted by a backend.
-      If provided, its intended use is to make caching and UI display easier for
-      genome browsers and other lightweight clients.
-  :type id: null|string
-  :field readGroupId:
-    The ID of the read group this read belongs to.
-      (Every read must belong to exactly one read group.)
-  :type readGroupId: string
-  :field fragmentName:
-    The fragment name. Equivalent to QNAME (query template name) in SAM.
-  :type fragmentName: string
-  :field properPlacement:
-    The orientation and the distance between reads from the fragment are
-      consistent with the sequencing protocol (equivalent to SAM flag 0x2)
-  :type properPlacement: null|boolean
-  :field duplicateFragment:
-    The fragment is a PCR or optical duplicate (SAM flag 0x400).
-  :type duplicateFragment: null|boolean
-  :field numberReads:
-    The number of reads in the fragment (extension to SAM flag 0x1)
-  :type numberReads: null|int
-  :field fragmentLength:
-    The observed length of the fragment, equivalent to TLEN in SAM.
-  :type fragmentLength: null|int
-  :field readNumber:
-    The read ordinal in the fragment, 0-based and less than numberReads. This
-      field replaces SAM flag 0x40 and 0x80 and is intended to more cleanly
-      represent multiple reads per fragment.
-  :type readNumber: null|int
-  :field failedVendorQualityChecks:
-    The read fails platform or vendor quality checks (SAM flag 0x200).
-  :type failedVendorQualityChecks: null|boolean
-  :field alignment:
-    The alignment for this alignment record. This field will be null if the read
-      is unmapped.
-  :type alignment: null|LinearAlignment
-  :field secondaryAlignment:
-    Whether this alignment is secondary. Equivalent to SAM flag 0x100.
-      A secondary alignment represents an alternative to the primary alignment
-      for this read. Aligners may return secondary alignments if a read can map
-      ambiguously to multiple coordinates in the genome.
-    
-      By convention, each read has one and only one alignment where both
-      secondaryAlignment and supplementaryAlignment are false.
-  :type secondaryAlignment: null|boolean
-  :field supplementaryAlignment:
-    Whether this alignment is supplementary. Equivalent to SAM flag 0x800.
-      Supplementary alignments are used in the representation of a chimeric
-      alignment. In a chimeric alignment, a read is split into multiple
-      linear alignments that map to different reference contigs. The first
-      linear alignment in the read will be designated as the representative alignment;
-      the remaining linear alignments will be designated as supplementary alignments.
-      These alignments may have different mapping quality scores.
-    
-      In each linear alignment in a chimeric alignment, the read will be hard clipped.
-      The `alignedSequence` and `alignedQuality` fields in the alignment record will
-      only represent the bases for its respective linear alignment.
-  :type supplementaryAlignment: null|boolean
-  :field alignedSequence:
-    The bases of the read sequence contained in this alignment record (equivalent
-      to SEQ in SAM).
-    
-      `alignedSequence` and `alignedQuality` may be shorter than the full read sequence
-      and quality. This will occur if the alignment is part of a chimeric alignment,
-      or if the read was trimmed. When this occurs, the CIGAR for this read will
-      begin/end with a hard clip operator that will indicate the length of the
-      excised sequence.
-  :type alignedSequence: null|string
-  :field alignedQuality:
-    The quality of the read sequence contained in this alignment record
-      (equivalent to QUAL in SAM).
-    
-      `alignedSequence` and `alignedQuality` may be shorter than the full read sequence
-      and quality. This will occur if the alignment is part of a chimeric alignment,
-      or if the read was trimmed. When this occurs, the CIGAR for this read will
-      begin/end with a hard clip operator that will indicate the length of the excised sequence.
-  :type alignedQuality: array<int>
-  :field nextMatePosition:
-    The mapping of the primary alignment of the `(readNumber+1)%numberReads`
-      read in the fragment. It replaces mate position and mate strand in SAM.
-  :type nextMatePosition: null|Position
-  :field info:
-    A map of additional read alignment information.
-  :type info: map<array<string>>
-
-  Each read alignment describes an alignment with additional information
-  about the fragment and the read. A read alignment object is equivalent to a
-  line in a SAM file.
-
-.. avro:record:: SearchReadsRequest
-
-  :field readGroupIds:
-    The ReadGroups to search. At least one id must be specified.
-  :type readGroupIds: array<string>
-  :field referenceId:
-    The reference to query. Leaving blank returns results from all
-      references, including unmapped reads - this could be very large.
-  :type referenceId: null|string
-  :field start:
-    The start position (0-based) of this query.
-      If a reference is specified, this defaults to 0.
-      Genomic positions are non-negative integers less than reference length.
-      Requests spanning the join of circular genomes are represented as
-      two requests one on each side of the join (position 0).
-  :type start: null|long
-  :field end:
-    The end position (0-based, exclusive) of this query.
-      If a reference is specified, this defaults to the
-      reference's length.
-  :type end: null|long
-  :field pageSize:
-    Specifies the maximum number of results to return in a single page.
-      If unspecified, a system default will be used.
-  :type pageSize: null|int
-  :field pageToken:
-    The continuation token, which is used to page through large result sets.
-      To get the next page of results, set this parameter to the value of
-      `nextPageToken` from the previous response.
-  :type pageToken: null|string
-
-  This request maps to the body of `POST /reads/search` as JSON.
-  
-  If a reference is specified, all queried `ReadGroup`s must be aligned
-  to `ReferenceSet`s containing that same `Reference`. If no reference is
-  specified, all queried `ReadGroup`s must be aligned to the same `ReferenceSet`.
-
-.. avro:record:: SearchReadsResponse
-
-  :field alignments:
-    The list of matching alignment records, sorted by position.
-      Unmapped reads, which have no position, are returned last.
-  :type alignments: array<org.ga4gh.models.ReadAlignment>
-  :field nextPageToken:
-    The continuation token, which is used to page through large result sets.
-      Provide this value in a subsequent request to return the next page of
-      results. This field will be empty if there aren't any additional results.
-  :type nextPageToken: null|string
-
-  This is the response from `POST /reads/search` expressed as JSON.
-
-.. avro:record:: SearchReadGroupSetsRequest
-
-  :field datasetId:
-    The dataset to search.
-  :type datasetId: string
-  :field name:
-    Only return read group sets with this name (case-sensitive, exact match).
-  :type name: null|string
-  :field pageSize:
-    Specifies the maximum number of results to return in a single page.
-      If unspecified, a system default will be used.
-  :type pageSize: null|int
-  :field pageToken:
-    The continuation token, which is used to page through large result sets.
-      To get the next page of results, set this parameter to the value of
-      `nextPageToken` from the previous response.
-  :type pageToken: null|string
-
-  This request maps to the body of `POST /readgroupsets/search` as JSON.
-  
-  TODO: Factor this out to a common API patterns section.
-  - If searching by a resource ID, and that resource is not found, the method
-  will return a `404` HTTP status code (`NOT_FOUND`).
-  - If searching by other attributes, e.g. `name`, and no matches are found, the
-  method will return a `200` HTTP status code (`OK`) with an empty result list.
-
-.. avro:record:: SearchReadGroupSetsResponse
-
-  :field readGroupSets:
-    The list of matching read group sets.
-  :type readGroupSets: array<org.ga4gh.models.ReadGroupSet>
-  :field nextPageToken:
-    The continuation token, which is used to page through large result sets.
-      Provide this value in a subsequent request to return the next page of
-      results. This field will be empty if there aren't any additional results.
-  :type nextPageToken: null|string
-
-  This is the response from `POST /readgroupsets/search` expressed as JSON.
-
diff --git a/doc/source/schemas/reads.rst b/doc/source/schemas/reads.rst
deleted file mode 100644
index 7bcc1f6b..00000000
--- a/doc/source/schemas/reads.rst
+++ /dev/null
@@ -1,465 +0,0 @@
-Reads
-*****
-
-This file defines the objects used to represent a reads and alignments, most importantly
-ReadGroupSet, ReadGroup, and ReadAlignment.
-See {TODO: LINK TO READS OVERVIEW} for more information.
-
-.. avro:enum:: Strand
-
-  :symbols: NEG_STRAND|POS_STRAND
-  Indicates the DNA strand associate for some data item.
-  * `NEG_STRAND`: The negative (-) strand.
-  * `POS_STRAND`:  The postive (+) strand.
-
-.. avro:record:: Position
-
-  :field referenceName:
-    The name of the `Reference` on which the `Position` is located.
-  :type referenceName: string
-  :field position:
-    The 0-based offset from the start of the forward strand for that `Reference`.
-      Genomic positions are non-negative integers less than `Reference` length.
-  :type position: long
-  :field strand:
-    Strand the position is associated with.
-  :type strand: Strand
-
-  A `Position` is an unoriented base in some `Reference`. A `Position` is
-  represented by a `Reference` name, and a base number on that `Reference`
-  (0-based).
-
-.. avro:record:: ExternalIdentifier
-
-  :field database:
-    The source of the identifier.
-      (e.g. `Ensembl`)
-  :type database: string
-  :field identifier:
-    The ID defined by the external database.
-      (e.g. `ENST00000000000`)
-  :type identifier: string
-  :field version:
-    The version of the object or the database
-      (e.g. `78`)
-  :type version: string
-
-  Identifier from a public database
-
-.. avro:enum:: CigarOperation
-
-  :symbols: ALIGNMENT_MATCH|INSERT|DELETE|SKIP|CLIP_SOFT|CLIP_HARD|PAD|SEQUENCE_MATCH|SEQUENCE_MISMATCH
-  An enum for the different types of CIGAR alignment operations that exist.
-  Used wherever CIGAR alignments are used. The different enumerated values
-  have the following usage:
-  
-  * `ALIGNMENT_MATCH`: An alignment match indicates that a sequence can be
-    aligned to the reference without evidence of an INDEL. Unlike the
-    `SEQUENCE_MATCH` and `SEQUENCE_MISMATCH` operators, the `ALIGNMENT_MATCH`
-    operator does not indicate whether the reference and read sequences are an
-    exact match. This operator is equivalent to SAM's `M`.
-  * `INSERT`: The insert operator indicates that the read contains evidence of
-    bases being inserted into the reference. This operator is equivalent to
-    SAM's `I`.
-  * `DELETE`: The delete operator indicates that the read contains evidence of
-    bases being deleted from the reference. This operator is equivalent to
-    SAM's `D`.
-  * `SKIP`: The skip operator indicates that this read skips a long segment of
-    the reference, but the bases have not been deleted. This operator is
-    commonly used when working with RNA-seq data, where reads may skip long
-    segments of the reference between exons. This operator is equivalent to
-    SAM's 'N'.
-  * `CLIP_SOFT`: The soft clip operator indicates that bases at the start/end
-    of a read have not been considered during alignment. This may occur if the
-    majority of a read maps, except for low quality bases at the start/end of
-    a read. This operator is equivalent to SAM's 'S'. Bases that are soft clipped
-    will still be stored in the read.
-  * `CLIP_HARD`: The hard clip operator indicates that bases at the start/end of
-    a read have been omitted from this alignment. This may occur if this linear
-    alignment is part of a chimeric alignment, or if the read has been trimmed
-    (e.g., during error correction, or to trim poly-A tails for RNA-seq). This
-    operator is equivalent to SAM's 'H'.
-  * `PAD`: The pad operator indicates that there is padding in an alignment.
-    This operator is equivalent to SAM's 'P'.
-  * `SEQUENCE_MATCH`: This operator indicates that this portion of the aligned
-    sequence exactly matches the reference (e.g., all bases are equal to the
-    reference bases). This operator is equivalent to SAM's '='.
-  * `SEQUENCE_MISMATCH`: This operator indicates that this portion of the
-    aligned sequence is an alignment match to the reference, but a sequence
-    mismatch (e.g., the bases are not equal to the reference). This can
-    indicate a SNP or a read error. This operator is equivalent to SAM's 'X'.
-
-.. avro:record:: CigarUnit
-
-  :field operation:
-    The operation type.
-  :type operation: CigarOperation
-  :field operationLength:
-    The number of bases that the operation runs for.
-  :type operationLength: long
-  :field referenceSequence:
-    `referenceSequence` is only used at mismatches (`SEQUENCE_MISMATCH`)
-      and deletions (`DELETE`). Filling this field replaces the MD tag.
-      If the relevant information is not available, leave this field as `null`.
-  :type referenceSequence: null|string
-
-  A structure for an instance of a CIGAR operation.
-  `FIXME: This belongs under Reads (only readAlignment refers to this)`
-
-.. avro:record:: OntologyTerm
-
-  :field id:
-    Ontology source identifier - the identifier, a CURIE (preferred) or
-      PURL for an ontology source e.g. http://purl.obolibrary.org/obo/hp.obo
-      It differs from the standard GA4GH schema's :ref:`id <apidesign_object_ids>`
-      in that it is a URI pointing to an information resource outside of the scope
-      of the schema or its resource implementation.
-  :type id: string
-  :field term:
-    Ontology term - the representation the id is pointing to.
-  :type term: null|string
-  :field sourceName:
-    Ontology source name - the name of ontology from which the term is obtained
-      e.g. 'Human Phenotype Ontology'
-  :type sourceName: null|string
-  :field sourceVersion:
-    Ontology source version - the version of the ontology from which the
-      OntologyTerm is obtained; e.g. 2.6.1.
-      There is no standard for ontology versioning and some frequently
-      released ontologies may use a datestamp, or build number.
-  :type sourceVersion: null|string
-
-  An ontology term describing an attribute. (e.g. the phenotype attribute
-    'polydactyly' from HPO)
-
-.. avro:record:: Experiment
-
-  :field id:
-    The experiment UUID. This is globally unique.
-  :type id: string
-  :field name:
-    The name of the experiment.
-  :type name: null|string
-  :field description:
-    A description of the experiment.
-  :type description: null|string
-  :field createDateTime:
-    The time at which this record was created. 
-      Format: :ref:`ISO 8601 <metadata_date_time>`
-  :type createDateTime: string
-  :field updateDateTime:
-    The time at which this record was last updated.
-      Format: :ref:`ISO 8601 <metadata_date_time>`
-  :type updateDateTime: string
-  :field runTime:
-    The time at which this experiment was performed.
-      Granularity here is variable (e.g. date only).
-      Format: :ref:`ISO 8601 <metadata_date_time>`
-  :type runTime: null|string
-  :field molecule:
-    The molecule examined in this experiment. (e.g. genomics DNA, total RNA)
-  :type molecule: null|string
-  :field strategy:
-    The experiment technique or strategy applied to the sample.
-      (e.g. whole genome sequencing, RNA-seq, RIP-seq)
-  :type strategy: null|string
-  :field selection:
-    The method used to enrich the target. (e.g. immunoprecipitation, size
-      fractionation, MNase digestion)
-  :type selection: null|string
-  :field library:
-    The name of the library used as part of this experiment.
-  :type library: null|string
-  :field libraryLayout:
-    The configuration of sequenced reads. (e.g. Single or Paired)
-  :type libraryLayout: null|string
-  :field instrumentModel:
-    The instrument model used as part of this experiment.
-        This maps to sequencing technology in BAM.
-  :type instrumentModel: null|string
-  :field instrumentDataFile:
-    The data file generated by the instrument.
-      TODO: This isn't actually a file is it?
-      Should this be `instrumentData` instead?
-  :type instrumentDataFile: null|string
-  :field sequencingCenter:
-    The sequencing center used as part of this experiment.
-  :type sequencingCenter: null|string
-  :field platformUnit:
-    The platform unit used as part of this experiment. This is a flowcell-barcode
-      or slide unique identifier.
-  :type platformUnit: null|string
-  :field info:
-    A map of additional experiment information.
-  :type info: map<array<string>>
-
-  An experimental preparation of a sample.
-
-.. avro:record:: Dataset
-
-  :field id:
-    The dataset's id, locally unique to the server instance.
-  :type id: string
-  :field name:
-    The name of the dataset.
-  :type name: null|string
-  :field description:
-    Additional, human-readable information on the dataset.
-  :type description: null|string
-
-  A Dataset is a collection of related data of multiple types.
-  Data providers decide how to group data into datasets.
-  See [Metadata API](../api/metadata.html) for a more detailed discussion.
-
-.. avro:record:: Analysis
-
-  :field id:
-    Formats of id | name | description | accessions are described in the
-      documentation on general attributes and formats.
-  :type id: string
-  :field name:
-  :type name: null|string
-  :field description:
-  :type description: null|string
-  :field createDateTime:
-    The time at which this record was created. 
-      Format: :ref:`ISO 8601 <metadata_date_time>`
-  :type createDateTime: null|string
-  :field updateDateTime:
-    The time at which this record was last updated.
-      Format: :ref:`ISO 8601 <metadata_date_time>`
-  :type updateDateTime: string
-  :field type:
-    The type of analysis.
-  :type type: null|string
-  :field software:
-    The software run to generate this analysis.
-  :type software: array<string>
-  :field info:
-    A map of additional analysis information.
-  :type info: map<array<string>>
-
-  An analysis contains an interpretation of one or several experiments.
-  (e.g. SNVs, copy number variations, methylation status) together with
-  information about the methodology used.
-
-.. avro:record:: Program
-
-  :field commandLine:
-    The command line used to run this program.
-  :type commandLine: null|string
-  :field id:
-    The user specified ID of the program.
-  :type id: null|string
-  :field name:
-    The name of the program.
-  :type name: null|string
-  :field prevProgramId:
-    The ID of the program run before this one.
-  :type prevProgramId: null|string
-  :field version:
-    The version of the program run.
-  :type version: null|string
-
-  Program can be used to track the provenance of how read data was generated.
-
-.. avro:record:: ReadStats
-
-  :field alignedReadCount:
-    The number of aligned reads.
-  :type alignedReadCount: null|long
-  :field unalignedReadCount:
-    The number of unaligned reads.
-  :type unalignedReadCount: null|long
-  :field baseCount:
-    The total number of bases.
-      This is equivalent to the sum of `alignedSequence.length` for all reads.
-  :type baseCount: null|long
-
-  ReadStats can be used to provide summary statistics about read data.
-
-.. avro:record:: ReadGroup
-
-  :field id:
-    The read group ID.
-  :type id: string
-  :field datasetId:
-    The ID of the dataset this read group belongs to.
-  :type datasetId: null|string
-  :field name:
-    The read group name.
-  :type name: null|string
-  :field description:
-    The read group description.
-  :type description: null|string
-  :field sampleId:
-    The sample this read group's data was generated from.
-      Note: the current API does not have a rigorous definition of sample. Therefore, this
-      field actually contains an arbitrary string, typically corresponding to the SM tag in a
-      BAM file.
-  :type sampleId: null|string
-  :field experiment:
-    The experiment used to generate this read group.
-  :type experiment: null|Experiment
-  :field predictedInsertSize:
-    The predicted insert size of this read group.
-  :type predictedInsertSize: null|int
-  :field created:
-    The time at which this read group was created in milliseconds from the epoch.
-  :type created: null|long
-  :field updated:
-    The time at which this read group was last updated in milliseconds
-      from the epoch.
-  :type updated: null|long
-  :field stats:
-    Statistical data on reads in this read group.
-  :type stats: null|ReadStats
-  :field programs:
-    The programs used to generate this read group.
-  :type programs: array<Program>
-  :field referenceSetId:
-    The ID of the reference set to which the reads in this read group are aligned.
-      Required if there are any read alignments.
-  :type referenceSetId: null|string
-  :field info:
-    A map of additional read group information.
-  :type info: map<array<string>>
-
-  A ReadGroup is a set of reads derived from one physical sequencing process.
-
-.. avro:record:: ReadGroupSet
-
-  :field id:
-    The read group set ID.
-  :type id: string
-  :field datasetId:
-    The ID of the dataset this read group set belongs to.
-  :type datasetId: null|string
-  :field name:
-    The read group set name.
-  :type name: null|string
-  :field stats:
-    Statistical data on reads in this read group set.
-  :type stats: null|ReadStats
-  :field readGroups:
-    The read groups in this set.
-  :type readGroups: array<ReadGroup>
-
-  A ReadGroupSet is a logical collection of ReadGroups. Typically one ReadGroupSet
-  represents all the reads from one experimental sample.
-
-.. avro:record:: LinearAlignment
-
-  :field position:
-    The position of this alignment.
-  :type position: Position
-  :field mappingQuality:
-    The mapping quality of this alignment, meaning the likelihood that the read
-      maps to this position.
-    
-      Specifically, this is -10 log10 Pr(mapping position is wrong), rounded to the
-      nearest integer.
-  :type mappingQuality: null|int
-  :field cigar:
-    Represents the local alignment of this sequence (alignment matches, indels, etc)
-      versus the reference.
-  :type cigar: array<CigarUnit>
-
-  A linear alignment describes the alignment of a read to a Reference, using a
-  position and CIGAR array.
-
-.. avro:record:: ReadAlignment
-
-  :field id:
-    The read alignment ID. This ID is unique within the read group this
-      alignment belongs to.
-    
-      For performance reasons, this field may be omitted by a backend.
-      If provided, its intended use is to make caching and UI display easier for
-      genome browsers and other lightweight clients.
-  :type id: null|string
-  :field readGroupId:
-    The ID of the read group this read belongs to.
-      (Every read must belong to exactly one read group.)
-  :type readGroupId: string
-  :field fragmentName:
-    The fragment name. Equivalent to QNAME (query template name) in SAM.
-  :type fragmentName: string
-  :field properPlacement:
-    The orientation and the distance between reads from the fragment are
-      consistent with the sequencing protocol (equivalent to SAM flag 0x2)
-  :type properPlacement: null|boolean
-  :field duplicateFragment:
-    The fragment is a PCR or optical duplicate (SAM flag 0x400).
-  :type duplicateFragment: null|boolean
-  :field numberReads:
-    The number of reads in the fragment (extension to SAM flag 0x1)
-  :type numberReads: null|int
-  :field fragmentLength:
-    The observed length of the fragment, equivalent to TLEN in SAM.
-  :type fragmentLength: null|int
-  :field readNumber:
-    The read ordinal in the fragment, 0-based and less than numberReads. This
-      field replaces SAM flag 0x40 and 0x80 and is intended to more cleanly
-      represent multiple reads per fragment.
-  :type readNumber: null|int
-  :field failedVendorQualityChecks:
-    The read fails platform or vendor quality checks (SAM flag 0x200).
-  :type failedVendorQualityChecks: null|boolean
-  :field alignment:
-    The alignment for this alignment record. This field will be null if the read
-      is unmapped.
-  :type alignment: null|LinearAlignment
-  :field secondaryAlignment:
-    Whether this alignment is secondary. Equivalent to SAM flag 0x100.
-      A secondary alignment represents an alternative to the primary alignment
-      for this read. Aligners may return secondary alignments if a read can map
-      ambiguously to multiple coordinates in the genome.
-    
-      By convention, each read has one and only one alignment where both
-      secondaryAlignment and supplementaryAlignment are false.
-  :type secondaryAlignment: null|boolean
-  :field supplementaryAlignment:
-    Whether this alignment is supplementary. Equivalent to SAM flag 0x800.
-      Supplementary alignments are used in the representation of a chimeric
-      alignment. In a chimeric alignment, a read is split into multiple
-      linear alignments that map to different reference contigs. The first
-      linear alignment in the read will be designated as the representative alignment;
-      the remaining linear alignments will be designated as supplementary alignments.
-      These alignments may have different mapping quality scores.
-    
-      In each linear alignment in a chimeric alignment, the read will be hard clipped.
-      The `alignedSequence` and `alignedQuality` fields in the alignment record will
-      only represent the bases for its respective linear alignment.
-  :type supplementaryAlignment: null|boolean
-  :field alignedSequence:
-    The bases of the read sequence contained in this alignment record (equivalent
-      to SEQ in SAM).
-    
-      `alignedSequence` and `alignedQuality` may be shorter than the full read sequence
-      and quality. This will occur if the alignment is part of a chimeric alignment,
-      or if the read was trimmed. When this occurs, the CIGAR for this read will
-      begin/end with a hard clip operator that will indicate the length of the
-      excised sequence.
-  :type alignedSequence: null|string
-  :field alignedQuality:
-    The quality of the read sequence contained in this alignment record
-      (equivalent to QUAL in SAM).
-    
-      `alignedSequence` and `alignedQuality` may be shorter than the full read sequence
-      and quality. This will occur if the alignment is part of a chimeric alignment,
-      or if the read was trimmed. When this occurs, the CIGAR for this read will
-      begin/end with a hard clip operator that will indicate the length of the excised sequence.
-  :type alignedQuality: array<int>
-  :field nextMatePosition:
-    The mapping of the primary alignment of the `(readNumber+1)%numberReads`
-      read in the fragment. It replaces mate position and mate strand in SAM.
-  :type nextMatePosition: null|Position
-  :field info:
-    A map of additional read alignment information.
-  :type info: map<array<string>>
-
-  Each read alignment describes an alignment with additional information
-  about the fragment and the read. A read alignment object is equivalent to a
-  line in a SAM file.
-
diff --git a/doc/source/schemas/referencemethods.rst b/doc/source/schemas/referencemethods.rst
deleted file mode 100644
index e287d4df..00000000
--- a/doc/source/schemas/referencemethods.rst
+++ /dev/null
@@ -1,379 +0,0 @@
-ReferenceMethods
-****************
-
- .. function:: getReferenceSet(id)
-
-  :param id: string: The ID of the `ReferenceSet`.
-  :return type: org.ga4gh.models.ReferenceSet
-  :throws: GAException
-
-Gets a `ReferenceSet` by ID.
-`GET /referencesets/{id}` will return a JSON version of `ReferenceSet`.
-
- .. function:: getReference(id)
-
-  :param id: string: The ID of the `Reference`.
-  :return type: org.ga4gh.models.Reference
-  :throws: GAException
-
-Gets a `Reference` by ID.
-`GET /references/{id}` will return a JSON version of `Reference`.
-
- .. function:: searchReferences(request)
-
-  :param request: SearchReferencesRequest: This request maps to the body of `POST /references/search`
-  as JSON.
-  :return type: SearchReferencesResponse
-  :throws: GAException
-
-Gets a list of `Reference` matching the search criteria.
-
-`POST /references/search` must accept a JSON version of
-`SearchReferencesRequest` as the post body and will return a JSON
-version of `SearchReferencesResponse`.
-
- .. function:: getReferenceBases(id, request)
-
-  :param id: string: The ID of the `Reference`.
-  :param request: ListReferenceBasesRequest: Additional request parameters to restrict the query.
-  :return type: ListReferenceBasesResponse
-  :throws: GAException
-
-Lists `Reference` bases by ID and optional range.
-`GET /references/{id}/bases` will return a JSON version of
-`ListReferenceBasesResponse`.
-
- .. function:: searchReferenceSets(request)
-
-  :param request: SearchReferenceSetsRequest: This request maps to the body of `POST /referencesets/search`
-  as JSON.
-  :return type: SearchReferenceSetsResponse
-  :throws: GAException
-
-Gets a list of `ReferenceSet` matching the search criteria.
-
-`POST /referencesets/search` must accept a JSON version of
-`SearchReferenceSetsRequest` as the post body and will return a JSON
-version of `SearchReferenceSetsResponse`.
-
-.. avro:enum:: Strand
-
-  :symbols: NEG_STRAND|POS_STRAND
-  Indicates the DNA strand associate for some data item.
-  * `NEG_STRAND`: The negative (-) strand.
-  * `POS_STRAND`:  The postive (+) strand.
-
-.. avro:record:: Position
-
-  :field referenceName:
-    The name of the `Reference` on which the `Position` is located.
-  :type referenceName: string
-  :field position:
-    The 0-based offset from the start of the forward strand for that `Reference`.
-      Genomic positions are non-negative integers less than `Reference` length.
-  :type position: long
-  :field strand:
-    Strand the position is associated with.
-  :type strand: Strand
-
-  A `Position` is an unoriented base in some `Reference`. A `Position` is
-  represented by a `Reference` name, and a base number on that `Reference`
-  (0-based).
-
-.. avro:record:: ExternalIdentifier
-
-  :field database:
-    The source of the identifier.
-      (e.g. `Ensembl`)
-  :type database: string
-  :field identifier:
-    The ID defined by the external database.
-      (e.g. `ENST00000000000`)
-  :type identifier: string
-  :field version:
-    The version of the object or the database
-      (e.g. `78`)
-  :type version: string
-
-  Identifier from a public database
-
-.. avro:enum:: CigarOperation
-
-  :symbols: ALIGNMENT_MATCH|INSERT|DELETE|SKIP|CLIP_SOFT|CLIP_HARD|PAD|SEQUENCE_MATCH|SEQUENCE_MISMATCH
-  An enum for the different types of CIGAR alignment operations that exist.
-  Used wherever CIGAR alignments are used. The different enumerated values
-  have the following usage:
-  
-  * `ALIGNMENT_MATCH`: An alignment match indicates that a sequence can be
-    aligned to the reference without evidence of an INDEL. Unlike the
-    `SEQUENCE_MATCH` and `SEQUENCE_MISMATCH` operators, the `ALIGNMENT_MATCH`
-    operator does not indicate whether the reference and read sequences are an
-    exact match. This operator is equivalent to SAM's `M`.
-  * `INSERT`: The insert operator indicates that the read contains evidence of
-    bases being inserted into the reference. This operator is equivalent to
-    SAM's `I`.
-  * `DELETE`: The delete operator indicates that the read contains evidence of
-    bases being deleted from the reference. This operator is equivalent to
-    SAM's `D`.
-  * `SKIP`: The skip operator indicates that this read skips a long segment of
-    the reference, but the bases have not been deleted. This operator is
-    commonly used when working with RNA-seq data, where reads may skip long
-    segments of the reference between exons. This operator is equivalent to
-    SAM's 'N'.
-  * `CLIP_SOFT`: The soft clip operator indicates that bases at the start/end
-    of a read have not been considered during alignment. This may occur if the
-    majority of a read maps, except for low quality bases at the start/end of
-    a read. This operator is equivalent to SAM's 'S'. Bases that are soft clipped
-    will still be stored in the read.
-  * `CLIP_HARD`: The hard clip operator indicates that bases at the start/end of
-    a read have been omitted from this alignment. This may occur if this linear
-    alignment is part of a chimeric alignment, or if the read has been trimmed
-    (e.g., during error correction, or to trim poly-A tails for RNA-seq). This
-    operator is equivalent to SAM's 'H'.
-  * `PAD`: The pad operator indicates that there is padding in an alignment.
-    This operator is equivalent to SAM's 'P'.
-  * `SEQUENCE_MATCH`: This operator indicates that this portion of the aligned
-    sequence exactly matches the reference (e.g., all bases are equal to the
-    reference bases). This operator is equivalent to SAM's '='.
-  * `SEQUENCE_MISMATCH`: This operator indicates that this portion of the
-    aligned sequence is an alignment match to the reference, but a sequence
-    mismatch (e.g., the bases are not equal to the reference). This can
-    indicate a SNP or a read error. This operator is equivalent to SAM's 'X'.
-
-.. avro:record:: CigarUnit
-
-  :field operation:
-    The operation type.
-  :type operation: CigarOperation
-  :field operationLength:
-    The number of bases that the operation runs for.
-  :type operationLength: long
-  :field referenceSequence:
-    `referenceSequence` is only used at mismatches (`SEQUENCE_MISMATCH`)
-      and deletions (`DELETE`). Filling this field replaces the MD tag.
-      If the relevant information is not available, leave this field as `null`.
-  :type referenceSequence: null|string
-
-  A structure for an instance of a CIGAR operation.
-  `FIXME: This belongs under Reads (only readAlignment refers to this)`
-
-.. avro:error:: GAException
-
-  A general exception type.
-
-.. avro:record:: Reference
-
-  :field id:
-    The reference ID. Unique within the repository.
-  :type id: string
-  :field length:
-    The length of this reference's sequence.
-  :type length: long
-  :field md5checksum:
-    The MD5 checksum uniquely representing this `Reference` as a lower-case
-      hexadecimal string, calculated as the MD5 of the upper-case sequence
-      excluding all whitespace characters (this is equivalent to SQ:M5 in SAM).
-  :type md5checksum: string
-  :field name:
-    The name of this reference. (e.g. '22').
-  :type name: string
-  :field sourceURI:
-    The URI from which the sequence was obtained. Specifies a FASTA format
-      file/string with one name, sequence pair. In most cases, clients should call
-      the `getReferenceBases()` method to obtain sequence bases for a `Reference`
-      instead of attempting to retrieve this URI.
-  :type sourceURI: null|string
-  :field sourceAccessions:
-    All known corresponding accession IDs in INSDC (GenBank/ENA/DDBJ) which must include
-      a version number, e.g. `GCF_000001405.26`.
-  :type sourceAccessions: array<string>
-  :field isDerived:
-    A sequence X is said to be derived from source sequence Y, if X and Y
-      are of the same length and the per-base sequence divergence at A/C/G/T bases
-      is sufficiently small. Two sequences derived from the same official
-      sequence share the same coordinates and annotations, and
-      can be replaced with the official sequence for certain use cases.
-  :type isDerived: boolean
-  :field sourceDivergence:
-    The `sourceDivergence` is the fraction of non-indel bases that do not match the
-      reference this record was derived from.
-  :type sourceDivergence: null|float
-  :field ncbiTaxonId:
-    ID from http://www.ncbi.nlm.nih.gov/taxonomy (e.g. 9606->human).
-  :type ncbiTaxonId: null|int
-
-  A `Reference` is a canonical assembled contig, intended to act as a
-  reference coordinate space for other genomic annotations. A single
-  `Reference` might represent the human chromosome 1, for instance.
-  
-  `Reference`s are designed to be immutable.
-
-.. avro:record:: ReferenceSet
-
-  :field id:
-    The reference set ID. Unique in the repository.
-  :type id: string
-  :field name:
-    The reference set name.
-  :type name: null|string
-  :field md5checksum:
-    Order-independent MD5 checksum which identifies this `ReferenceSet`.
-    
-      To compute this checksum, make a list of `Reference.md5checksum` for all
-      `Reference`s in this set. Then sort that list, and take the MD5 hash of
-      all the strings concatenated together. Express the hash as a lower-case
-      hexadecimal string.
-  :type md5checksum: string
-  :field ncbiTaxonId:
-    ID from http://www.ncbi.nlm.nih.gov/taxonomy (e.g. 9606->human) indicating
-      the species which this assembly is intended to model. Note that contained
-      `Reference`s may specify a different `ncbiTaxonId`, as assemblies may
-      contain reference sequences which do not belong to the modeled species, e.g.
-      EBV in a human reference genome.
-  :type ncbiTaxonId: null|int
-  :field description:
-    Optional free text description of this reference set.
-  :type description: null|string
-  :field assemblyId:
-    Public id of this reference set, such as `GRCh37`.
-  :type assemblyId: null|string
-  :field sourceURI:
-    Specifies a FASTA format file/string.
-  :type sourceURI: null|string
-  :field sourceAccessions:
-    All known corresponding accession IDs in INSDC (GenBank/ENA/DDBJ) ideally
-      with a version number, e.g. `NC_000001.11`.
-  :type sourceAccessions: array<string>
-  :field isDerived:
-    A reference set may be derived from a source if it contains
-      additional sequences, or some of the sequences within it are derived
-      (see the definition of `isDerived` in `Reference`).
-  :type isDerived: boolean
-
-  A `ReferenceSet` is a set of `Reference`s which typically comprise a
-  reference assembly, such as `GRCh38`. A `ReferenceSet` defines a common
-  coordinate space for comparing reference-aligned experimental data.
-
-.. avro:record:: SearchReferenceSetsRequest
-
-  :field md5checksum:
-    If not null, return the reference sets for which the
-      `md5checksum` matches this string (case-sensitive, exact match).
-      See `ReferenceSet::md5checksum` for details.
-  :type md5checksum: null|string
-  :field accession:
-    If not null, return the reference sets for which the `accession`
-      matches this string (case-sensitive, exact match).
-  :type accession: null|string
-  :field assemblyId:
-    If not null, return the reference sets for which the `assemblyId`
-      matches this string (case-sensitive, exact match).
-  :type assemblyId: null|string
-  :field pageSize:
-    Specifies the maximum number of results to return in a single page.
-      If unspecified, a system default will be used.
-  :type pageSize: null|int
-  :field pageToken:
-    The continuation token, which is used to page through large result sets.
-      To get the next page of results, set this parameter to the value of
-      `nextPageToken` from the previous response.
-  :type pageToken: null|string
-
-  This request maps to the body of `POST /referencesets/search`
-  as JSON.
-
-.. avro:record:: SearchReferenceSetsResponse
-
-  :field referenceSets:
-    The list of matching reference sets.
-  :type referenceSets: array<org.ga4gh.models.ReferenceSet>
-  :field nextPageToken:
-    The continuation token, which is used to page through large result sets.
-      Provide this value in a subsequent request to return the next page of
-      results. This field will be empty if there aren't any additional results.
-  :type nextPageToken: null|string
-
-  This is the response from `POST /referencesets/search`
-  expressed as JSON.
-
-.. avro:record:: SearchReferencesRequest
-
-  :field referenceSetId:
-    The `ReferenceSet` to search.
-  :type referenceSetId: string
-  :field md5checksum:
-    If not null, return the references for which the
-      `md5checksum` matches this string (case-sensitive, exact match).
-      See `ReferenceSet::md5checksum` for details.
-  :type md5checksum: null|string
-  :field accession:
-    If not null, return the references for which the `accession`
-      matches this string (case-sensitive, exact match).
-  :type accession: null|string
-  :field pageSize:
-    Specifies the maximum number of results to return in a single page.
-      If unspecified, a system default will be used.
-  :type pageSize: null|int
-  :field pageToken:
-    The continuation token, which is used to page through large result sets.
-      To get the next page of results, set this parameter to the value of
-      `nextPageToken` from the previous response.
-  :type pageToken: null|string
-
-  This request maps to the body of `POST /references/search`
-  as JSON.
-
-.. avro:record:: SearchReferencesResponse
-
-  :field references:
-    The list of matching references.
-  :type references: array<org.ga4gh.models.Reference>
-  :field nextPageToken:
-    The continuation token, which is used to page through large result sets.
-      Provide this value in a subsequent request to return the next page of
-      results. This field will be empty if there aren't any additional results.
-  :type nextPageToken: null|string
-
-  This is the response from `POST /references/search` expressed as JSON.
-
-.. avro:record:: ListReferenceBasesRequest
-
-  :field start:
-    The start position (0-based) of this query. Defaults to 0.
-      Genomic positions are non-negative integers less than reference length.
-      Requests spanning the join of circular genomes are represented as
-      two requests one on each side of the join (position 0).
-  :type start: long
-  :field end:
-    The end position (0-based, exclusive) of this query. Defaults
-      to the length of this `Reference`.
-  :type end: null|long
-  :field pageToken:
-    The continuation token, which is used to page through large result sets.
-      To get the next page of results, set this parameter to the value of
-      `nextPageToken` from the previous response.
-  :type pageToken: null|string
-
-  The query parameters for a request to `GET /references/{id}/bases`, for
-  example:
-  
-  `GET /references/{id}/bases?start=100&end=200`
-
-.. avro:record:: ListReferenceBasesResponse
-
-  :field offset:
-    The offset position (0-based) of the given sequence from the start of this
-      `Reference`. This value will differ for each page in a paginated request.
-  :type offset: long
-  :field sequence:
-    A substring of the bases that make up this reference. Bases are represented
-      as IUPAC-IUB codes; this string matches the regexp `[ACGTMRWSYKVHDBN]*`.
-  :type sequence: string
-  :field nextPageToken:
-    The continuation token, which is used to page through large result sets.
-      Provide this value in a subsequent request to return the next page of
-      results. This field will be empty if there aren't any additional results.
-  :type nextPageToken: null|string
-
-  The response from `GET /references/{id}/bases` expressed as JSON.
-
diff --git a/doc/source/schemas/references.rst b/doc/source/schemas/references.rst
deleted file mode 100644
index 9473784d..00000000
--- a/doc/source/schemas/references.rst
+++ /dev/null
@@ -1,199 +0,0 @@
-References
-**********
-
-Defines types used by the GA4GH References API.
-
-.. avro:enum:: Strand
-
-  :symbols: NEG_STRAND|POS_STRAND
-  Indicates the DNA strand associate for some data item.
-  * `NEG_STRAND`: The negative (-) strand.
-  * `POS_STRAND`:  The postive (+) strand.
-
-.. avro:record:: Position
-
-  :field referenceName:
-    The name of the `Reference` on which the `Position` is located.
-  :type referenceName: string
-  :field position:
-    The 0-based offset from the start of the forward strand for that `Reference`.
-      Genomic positions are non-negative integers less than `Reference` length.
-  :type position: long
-  :field strand:
-    Strand the position is associated with.
-  :type strand: Strand
-
-  A `Position` is an unoriented base in some `Reference`. A `Position` is
-  represented by a `Reference` name, and a base number on that `Reference`
-  (0-based).
-
-.. avro:record:: ExternalIdentifier
-
-  :field database:
-    The source of the identifier.
-      (e.g. `Ensembl`)
-  :type database: string
-  :field identifier:
-    The ID defined by the external database.
-      (e.g. `ENST00000000000`)
-  :type identifier: string
-  :field version:
-    The version of the object or the database
-      (e.g. `78`)
-  :type version: string
-
-  Identifier from a public database
-
-.. avro:enum:: CigarOperation
-
-  :symbols: ALIGNMENT_MATCH|INSERT|DELETE|SKIP|CLIP_SOFT|CLIP_HARD|PAD|SEQUENCE_MATCH|SEQUENCE_MISMATCH
-  An enum for the different types of CIGAR alignment operations that exist.
-  Used wherever CIGAR alignments are used. The different enumerated values
-  have the following usage:
-  
-  * `ALIGNMENT_MATCH`: An alignment match indicates that a sequence can be
-    aligned to the reference without evidence of an INDEL. Unlike the
-    `SEQUENCE_MATCH` and `SEQUENCE_MISMATCH` operators, the `ALIGNMENT_MATCH`
-    operator does not indicate whether the reference and read sequences are an
-    exact match. This operator is equivalent to SAM's `M`.
-  * `INSERT`: The insert operator indicates that the read contains evidence of
-    bases being inserted into the reference. This operator is equivalent to
-    SAM's `I`.
-  * `DELETE`: The delete operator indicates that the read contains evidence of
-    bases being deleted from the reference. This operator is equivalent to
-    SAM's `D`.
-  * `SKIP`: The skip operator indicates that this read skips a long segment of
-    the reference, but the bases have not been deleted. This operator is
-    commonly used when working with RNA-seq data, where reads may skip long
-    segments of the reference between exons. This operator is equivalent to
-    SAM's 'N'.
-  * `CLIP_SOFT`: The soft clip operator indicates that bases at the start/end
-    of a read have not been considered during alignment. This may occur if the
-    majority of a read maps, except for low quality bases at the start/end of
-    a read. This operator is equivalent to SAM's 'S'. Bases that are soft clipped
-    will still be stored in the read.
-  * `CLIP_HARD`: The hard clip operator indicates that bases at the start/end of
-    a read have been omitted from this alignment. This may occur if this linear
-    alignment is part of a chimeric alignment, or if the read has been trimmed
-    (e.g., during error correction, or to trim poly-A tails for RNA-seq). This
-    operator is equivalent to SAM's 'H'.
-  * `PAD`: The pad operator indicates that there is padding in an alignment.
-    This operator is equivalent to SAM's 'P'.
-  * `SEQUENCE_MATCH`: This operator indicates that this portion of the aligned
-    sequence exactly matches the reference (e.g., all bases are equal to the
-    reference bases). This operator is equivalent to SAM's '='.
-  * `SEQUENCE_MISMATCH`: This operator indicates that this portion of the
-    aligned sequence is an alignment match to the reference, but a sequence
-    mismatch (e.g., the bases are not equal to the reference). This can
-    indicate a SNP or a read error. This operator is equivalent to SAM's 'X'.
-
-.. avro:record:: CigarUnit
-
-  :field operation:
-    The operation type.
-  :type operation: CigarOperation
-  :field operationLength:
-    The number of bases that the operation runs for.
-  :type operationLength: long
-  :field referenceSequence:
-    `referenceSequence` is only used at mismatches (`SEQUENCE_MISMATCH`)
-      and deletions (`DELETE`). Filling this field replaces the MD tag.
-      If the relevant information is not available, leave this field as `null`.
-  :type referenceSequence: null|string
-
-  A structure for an instance of a CIGAR operation.
-  `FIXME: This belongs under Reads (only readAlignment refers to this)`
-
-.. avro:record:: Reference
-
-  :field id:
-    The reference ID. Unique within the repository.
-  :type id: string
-  :field length:
-    The length of this reference's sequence.
-  :type length: long
-  :field md5checksum:
-    The MD5 checksum uniquely representing this `Reference` as a lower-case
-      hexadecimal string, calculated as the MD5 of the upper-case sequence
-      excluding all whitespace characters (this is equivalent to SQ:M5 in SAM).
-  :type md5checksum: string
-  :field name:
-    The name of this reference. (e.g. '22').
-  :type name: string
-  :field sourceURI:
-    The URI from which the sequence was obtained. Specifies a FASTA format
-      file/string with one name, sequence pair. In most cases, clients should call
-      the `getReferenceBases()` method to obtain sequence bases for a `Reference`
-      instead of attempting to retrieve this URI.
-  :type sourceURI: null|string
-  :field sourceAccessions:
-    All known corresponding accession IDs in INSDC (GenBank/ENA/DDBJ) which must include
-      a version number, e.g. `GCF_000001405.26`.
-  :type sourceAccessions: array<string>
-  :field isDerived:
-    A sequence X is said to be derived from source sequence Y, if X and Y
-      are of the same length and the per-base sequence divergence at A/C/G/T bases
-      is sufficiently small. Two sequences derived from the same official
-      sequence share the same coordinates and annotations, and
-      can be replaced with the official sequence for certain use cases.
-  :type isDerived: boolean
-  :field sourceDivergence:
-    The `sourceDivergence` is the fraction of non-indel bases that do not match the
-      reference this record was derived from.
-  :type sourceDivergence: null|float
-  :field ncbiTaxonId:
-    ID from http://www.ncbi.nlm.nih.gov/taxonomy (e.g. 9606->human).
-  :type ncbiTaxonId: null|int
-
-  A `Reference` is a canonical assembled contig, intended to act as a
-  reference coordinate space for other genomic annotations. A single
-  `Reference` might represent the human chromosome 1, for instance.
-  
-  `Reference`s are designed to be immutable.
-
-.. avro:record:: ReferenceSet
-
-  :field id:
-    The reference set ID. Unique in the repository.
-  :type id: string
-  :field name:
-    The reference set name.
-  :type name: null|string
-  :field md5checksum:
-    Order-independent MD5 checksum which identifies this `ReferenceSet`.
-    
-      To compute this checksum, make a list of `Reference.md5checksum` for all
-      `Reference`s in this set. Then sort that list, and take the MD5 hash of
-      all the strings concatenated together. Express the hash as a lower-case
-      hexadecimal string.
-  :type md5checksum: string
-  :field ncbiTaxonId:
-    ID from http://www.ncbi.nlm.nih.gov/taxonomy (e.g. 9606->human) indicating
-      the species which this assembly is intended to model. Note that contained
-      `Reference`s may specify a different `ncbiTaxonId`, as assemblies may
-      contain reference sequences which do not belong to the modeled species, e.g.
-      EBV in a human reference genome.
-  :type ncbiTaxonId: null|int
-  :field description:
-    Optional free text description of this reference set.
-  :type description: null|string
-  :field assemblyId:
-    Public id of this reference set, such as `GRCh37`.
-  :type assemblyId: null|string
-  :field sourceURI:
-    Specifies a FASTA format file/string.
-  :type sourceURI: null|string
-  :field sourceAccessions:
-    All known corresponding accession IDs in INSDC (GenBank/ENA/DDBJ) ideally
-      with a version number, e.g. `NC_000001.11`.
-  :type sourceAccessions: array<string>
-  :field isDerived:
-    A reference set may be derived from a source if it contains
-      additional sequences, or some of the sequences within it are derived
-      (see the definition of `isDerived` in `Reference`).
-  :type isDerived: boolean
-
-  A `ReferenceSet` is a set of `Reference`s which typically comprise a
-  reference assembly, such as `GRCh38`. A `ReferenceSet` defines a common
-  coordinate space for comparing reference-aligned experimental data.
-
diff --git a/doc/source/schemas/sequenceAnnotationmethods.rst b/doc/source/schemas/sequenceAnnotationmethods.rst
deleted file mode 100644
index 01815cc6..00000000
--- a/doc/source/schemas/sequenceAnnotationmethods.rst
+++ /dev/null
@@ -1,457 +0,0 @@
-SequenceAnnotationMethods
-*************************
-
- .. function:: searchFeatureSets(request)
-
-  :param request: SearchFeatureSetsRequest: This request maps to the body of `POST /featuresets/search` as JSON.
-  :return type: SearchFeatureSetsResponse
-  :throws: GAException
-
-Gets a list of `FeatureSet` matching the search criteria.
-
-  `POST /featuresets/search` must accept a JSON version of
-  `SearchFeatureSetsRequest` as the post body and will return a JSON version
-  of `SearchFeatureSetsResponse`.
-
- .. function:: getFeatureSet(id)
-
-  :param id: string: The ID of the `FeatureSet`.
-  :return type: org.ga4gh.models.FeatureSet
-  :throws: GAException
-
-Gets a `FeatureSet` by ID.
-  `GET /featuresets/{id}` will return a JSON version of `FeatureSet`.
-
- .. function:: getFeature(id)
-
-  :param id: string: The ID of the `Feature`.
-  :return type: org.ga4gh.models.Feature
-  :throws: GAException
-
-Gets a `org.ga4gh.models.Feature` by ID.
-  `GET /features/{id}` will return a JSON version of `Feature`.
-
- .. function:: searchFeatures(request)
-
-  :param request: SearchFeaturesRequest: This request maps to the body of `POST /features/search` as JSON.
-  :return type: SearchFeaturesResponse
-  :throws: GAException
-
-Gets a list of `Feature` matching the search criteria.
-
-  `POST /features/search` must accept a JSON version of
-  `SearchFeaturesRequest` as the post body and will return a JSON version of
-  `SearchFeaturesResponse`.
-
-.. avro:enum:: Strand
-
-  :symbols: NEG_STRAND|POS_STRAND
-  Indicates the DNA strand associate for some data item.
-  * `NEG_STRAND`: The negative (-) strand.
-  * `POS_STRAND`:  The postive (+) strand.
-
-.. avro:record:: Position
-
-  :field referenceName:
-    The name of the `Reference` on which the `Position` is located.
-  :type referenceName: string
-  :field position:
-    The 0-based offset from the start of the forward strand for that `Reference`.
-      Genomic positions are non-negative integers less than `Reference` length.
-  :type position: long
-  :field strand:
-    Strand the position is associated with.
-  :type strand: Strand
-
-  A `Position` is an unoriented base in some `Reference`. A `Position` is
-  represented by a `Reference` name, and a base number on that `Reference`
-  (0-based).
-
-.. avro:record:: ExternalIdentifier
-
-  :field database:
-    The source of the identifier.
-      (e.g. `Ensembl`)
-  :type database: string
-  :field identifier:
-    The ID defined by the external database.
-      (e.g. `ENST00000000000`)
-  :type identifier: string
-  :field version:
-    The version of the object or the database
-      (e.g. `78`)
-  :type version: string
-
-  Identifier from a public database
-
-.. avro:enum:: CigarOperation
-
-  :symbols: ALIGNMENT_MATCH|INSERT|DELETE|SKIP|CLIP_SOFT|CLIP_HARD|PAD|SEQUENCE_MATCH|SEQUENCE_MISMATCH
-  An enum for the different types of CIGAR alignment operations that exist.
-  Used wherever CIGAR alignments are used. The different enumerated values
-  have the following usage:
-  
-  * `ALIGNMENT_MATCH`: An alignment match indicates that a sequence can be
-    aligned to the reference without evidence of an INDEL. Unlike the
-    `SEQUENCE_MATCH` and `SEQUENCE_MISMATCH` operators, the `ALIGNMENT_MATCH`
-    operator does not indicate whether the reference and read sequences are an
-    exact match. This operator is equivalent to SAM's `M`.
-  * `INSERT`: The insert operator indicates that the read contains evidence of
-    bases being inserted into the reference. This operator is equivalent to
-    SAM's `I`.
-  * `DELETE`: The delete operator indicates that the read contains evidence of
-    bases being deleted from the reference. This operator is equivalent to
-    SAM's `D`.
-  * `SKIP`: The skip operator indicates that this read skips a long segment of
-    the reference, but the bases have not been deleted. This operator is
-    commonly used when working with RNA-seq data, where reads may skip long
-    segments of the reference between exons. This operator is equivalent to
-    SAM's 'N'.
-  * `CLIP_SOFT`: The soft clip operator indicates that bases at the start/end
-    of a read have not been considered during alignment. This may occur if the
-    majority of a read maps, except for low quality bases at the start/end of
-    a read. This operator is equivalent to SAM's 'S'. Bases that are soft clipped
-    will still be stored in the read.
-  * `CLIP_HARD`: The hard clip operator indicates that bases at the start/end of
-    a read have been omitted from this alignment. This may occur if this linear
-    alignment is part of a chimeric alignment, or if the read has been trimmed
-    (e.g., during error correction, or to trim poly-A tails for RNA-seq). This
-    operator is equivalent to SAM's 'H'.
-  * `PAD`: The pad operator indicates that there is padding in an alignment.
-    This operator is equivalent to SAM's 'P'.
-  * `SEQUENCE_MATCH`: This operator indicates that this portion of the aligned
-    sequence exactly matches the reference (e.g., all bases are equal to the
-    reference bases). This operator is equivalent to SAM's '='.
-  * `SEQUENCE_MISMATCH`: This operator indicates that this portion of the
-    aligned sequence is an alignment match to the reference, but a sequence
-    mismatch (e.g., the bases are not equal to the reference). This can
-    indicate a SNP or a read error. This operator is equivalent to SAM's 'X'.
-
-.. avro:record:: CigarUnit
-
-  :field operation:
-    The operation type.
-  :type operation: CigarOperation
-  :field operationLength:
-    The number of bases that the operation runs for.
-  :type operationLength: long
-  :field referenceSequence:
-    `referenceSequence` is only used at mismatches (`SEQUENCE_MISMATCH`)
-      and deletions (`DELETE`). Filling this field replaces the MD tag.
-      If the relevant information is not available, leave this field as `null`.
-  :type referenceSequence: null|string
-
-  A structure for an instance of a CIGAR operation.
-  `FIXME: This belongs under Reads (only readAlignment refers to this)`
-
-.. avro:error:: GAException
-
-  A general exception type.
-
-.. avro:record:: OntologyTerm
-
-  :field id:
-    Ontology source identifier - the identifier, a CURIE (preferred) or
-      PURL for an ontology source e.g. http://purl.obolibrary.org/obo/hp.obo
-      It differs from the standard GA4GH schema's :ref:`id <apidesign_object_ids>`
-      in that it is a URI pointing to an information resource outside of the scope
-      of the schema or its resource implementation.
-  :type id: string
-  :field term:
-    Ontology term - the representation the id is pointing to.
-  :type term: null|string
-  :field sourceName:
-    Ontology source name - the name of ontology from which the term is obtained
-      e.g. 'Human Phenotype Ontology'
-  :type sourceName: null|string
-  :field sourceVersion:
-    Ontology source version - the version of the ontology from which the
-      OntologyTerm is obtained; e.g. 2.6.1.
-      There is no standard for ontology versioning and some frequently
-      released ontologies may use a datestamp, or build number.
-  :type sourceVersion: null|string
-
-  An ontology term describing an attribute. (e.g. the phenotype attribute
-    'polydactyly' from HPO)
-
-.. avro:record:: Experiment
-
-  :field id:
-    The experiment UUID. This is globally unique.
-  :type id: string
-  :field name:
-    The name of the experiment.
-  :type name: null|string
-  :field description:
-    A description of the experiment.
-  :type description: null|string
-  :field createDateTime:
-    The time at which this record was created. 
-      Format: :ref:`ISO 8601 <metadata_date_time>`
-  :type createDateTime: string
-  :field updateDateTime:
-    The time at which this record was last updated.
-      Format: :ref:`ISO 8601 <metadata_date_time>`
-  :type updateDateTime: string
-  :field runTime:
-    The time at which this experiment was performed.
-      Granularity here is variable (e.g. date only).
-      Format: :ref:`ISO 8601 <metadata_date_time>`
-  :type runTime: null|string
-  :field molecule:
-    The molecule examined in this experiment. (e.g. genomics DNA, total RNA)
-  :type molecule: null|string
-  :field strategy:
-    The experiment technique or strategy applied to the sample.
-      (e.g. whole genome sequencing, RNA-seq, RIP-seq)
-  :type strategy: null|string
-  :field selection:
-    The method used to enrich the target. (e.g. immunoprecipitation, size
-      fractionation, MNase digestion)
-  :type selection: null|string
-  :field library:
-    The name of the library used as part of this experiment.
-  :type library: null|string
-  :field libraryLayout:
-    The configuration of sequenced reads. (e.g. Single or Paired)
-  :type libraryLayout: null|string
-  :field instrumentModel:
-    The instrument model used as part of this experiment.
-        This maps to sequencing technology in BAM.
-  :type instrumentModel: null|string
-  :field instrumentDataFile:
-    The data file generated by the instrument.
-      TODO: This isn't actually a file is it?
-      Should this be `instrumentData` instead?
-  :type instrumentDataFile: null|string
-  :field sequencingCenter:
-    The sequencing center used as part of this experiment.
-  :type sequencingCenter: null|string
-  :field platformUnit:
-    The platform unit used as part of this experiment. This is a flowcell-barcode
-      or slide unique identifier.
-  :type platformUnit: null|string
-  :field info:
-    A map of additional experiment information.
-  :type info: map<array<string>>
-
-  An experimental preparation of a sample.
-
-.. avro:record:: Dataset
-
-  :field id:
-    The dataset's id, locally unique to the server instance.
-  :type id: string
-  :field name:
-    The name of the dataset.
-  :type name: null|string
-  :field description:
-    Additional, human-readable information on the dataset.
-  :type description: null|string
-
-  A Dataset is a collection of related data of multiple types.
-  Data providers decide how to group data into datasets.
-  See [Metadata API](../api/metadata.html) for a more detailed discussion.
-
-.. avro:record:: Analysis
-
-  :field id:
-    Formats of id | name | description | accessions are described in the
-      documentation on general attributes and formats.
-  :type id: string
-  :field name:
-  :type name: null|string
-  :field description:
-  :type description: null|string
-  :field createDateTime:
-    The time at which this record was created. 
-      Format: :ref:`ISO 8601 <metadata_date_time>`
-  :type createDateTime: null|string
-  :field updateDateTime:
-    The time at which this record was last updated.
-      Format: :ref:`ISO 8601 <metadata_date_time>`
-  :type updateDateTime: string
-  :field type:
-    The type of analysis.
-  :type type: null|string
-  :field software:
-    The software run to generate this analysis.
-  :type software: array<string>
-  :field info:
-    A map of additional analysis information.
-  :type info: map<array<string>>
-
-  An analysis contains an interpretation of one or several experiments.
-  (e.g. SNVs, copy number variations, methylation status) together with
-  information about the methodology used.
-
-.. avro:record:: Attributes
-
-  :field vals:
-  :type vals: map<array<string|ExternalIdentifier|OntologyTerm>>
-
-  Type defining a collection of attributes associated with various protocol
-    records.  Each attribute is a name that maps to an array of one or more
-    values.  Values can be strings, external identifiers, or ontology terms.
-    Values should be split into the array elements instead of using a separator
-    syntax that needs to parsed.
-
-.. avro:record:: Feature
-
-  :field id:
-    Id of this annotation node.
-  :type id: string
-  :field parentId:
-    Parent Id of this node. Set to empty string if node has no parent.
-  :type parentId: string
-  :field childIds:
-    Ordered array of Child Ids of this node.
-        Since not all child nodes are ordered by genomic coordinates,
-        this can't always be reconstructed from parentId's of the children alone.
-  :type childIds: array<string>
-  :field featureSetId:
-    Identifier for the containing feature set.
-  :type featureSetId: string
-  :field referenceName:
-    The reference on which this feature occurs.
-        (e.g. `chr20` or `X`)
-  :type referenceName: string
-  :field start:
-    The start position at which this feature occurs (0-based).
-        This corresponds to the first base of the string of reference bases.
-        Genomic positions are non-negative integers less than reference length.
-        Features spanning the join of circular genomes are represented as
-        two features one on each side of the join (position 0).
-  :type start: long
-  :field end:
-    The end position (exclusive), resulting in [start, end) closed-open interval.
-        This is typically calculated by `start + referenceBases.length`.
-  :type end: long
-  :field strand:
-    The strand on which the feature is present.
-  :type strand: Strand
-  :field featureType:
-    Feature that is annotated by this region.  Normally, this will be a term in
-        the Sequence Ontology.
-  :type featureType: OntologyTerm
-  :field attributes:
-    Name/value attributes of the annotation.  Attribute names follow the GFF3
-        naming convention of reserved names starting with an upper cases
-        character, and user-define names start with lower-case.  Most GFF3
-        pre-defined attributes apply, the exceptions are ID and Parent, which are
-        defined as fields. Additional, the following attributes are added:
-        * Score - the GFF3 score column
-        * Phase - the GFF3 phase column for CDS features.
-  :type attributes: Attributes
-
-  Node in the annotation graph that annotates a contiguous region of a
-    sequence.
-
-.. avro:record:: FeatureSet
-
-  :field id:
-    The ID of this annotation set.
-  :type id: string
-  :field datasetId:
-    The ID of the dataset this annotation set belongs to.
-  :type datasetId: null|string
-  :field referenceSetId:
-    The ID of the reference set which defines the coordinate-space for this
-        set of annotations.
-  :type referenceSetId: null|string
-  :field name:
-    The display name for this annotation set.
-  :type name: null|string
-  :field sourceURI:
-    The source URI describing the file from which this annotation set was
-        generated, if any.
-  :type sourceURI: null|string
-  :field info:
-    Remaining structured metadata key-value pairs.
-  :type info: map<array<string>>
-
-.. avro:record:: SearchFeatureSetsRequest
-
-  :field datasetId:
-    The `Dataset` to search.
-  :type datasetId: string
-  :field pageSize:
-    Specifies the maximum number of results to return in a single page.
-        If unspecified, a system default will be used.
-  :type pageSize: null|int
-  :field pageToken:
-    The continuation token, which is used to page through large result sets.
-        To get the next page of results, set this parameter to the value of
-        `nextPageToken` from the previous response.
-  :type pageToken: null|string
-
-  This request maps to the body of `POST /featuresets/search` as JSON.
-
-.. avro:record:: SearchFeatureSetsResponse
-
-  :field featureSets:
-    The list of matching feature sets.
-  :type featureSets: array<org.ga4gh.models.FeatureSet>
-  :field nextPageToken:
-    The continuation token, which is used to page through large result sets.
-        Provide this value in a subsequent request to return the next page of
-        results. This field will be empty if there aren't any additional results.
-  :type nextPageToken: null|string
-
-  This is the response from `POST /featuresets/search` expressed as JSON.
-
-.. avro:record:: SearchFeaturesRequest
-
-  :field featureSetId:
-    The annotation set to search within. Either `featureSetId` or
-        `parentId` must be non-empty.
-  :type featureSetId: null|string
-  :field parentId:
-    Restricts the search to direct children of the given parent `feature`
-        ID. Either `featureSetId` or `parentId` must be non-empty.
-  :type parentId: null|string
-  :field referenceName:
-    Only return features on the reference with this name 
-        (matched to literal reference name as imported from the GFF3).
-  :type referenceName: string
-  :field start:
-    Required. The beginning of the window (0-based, inclusive) for which
-        overlapping features should be returned.  Genomic positions are
-        non-negative integers less than reference length.  Requests spanning the
-        join of circular genomes are represented as two requests one on each side
-        of the join (position 0).
-  :type start: long
-  :field end:
-    Required. The end of the window (0-based, exclusive) for which overlapping
-        features should be returned.
-  :type end: long
-  :field featureTypes:
-    If specified, this query matches only annotations whose `featureType`
-        matches one of the provided ontology terms.
-  :type featureTypes: array<string>
-  :field pageSize:
-    Specifies the maximum number of results to return in a single page.
-        If unspecified, a system default will be used.
-  :type pageSize: null|int
-  :field pageToken:
-    The continuation token, which is used to page through large result sets.
-        To get the next page of results, set this parameter to the value of
-        `nextPageToken` from the previous response.
-  :type pageToken: null|string
-
-  This request maps to the body of `POST /features/search` as JSON.
-
-.. avro:record:: SearchFeaturesResponse
-
-  :field features:
-    The list of matching annotations, sorted by start position. Annotations which
-        share a start position are returned in a deterministic order.
-  :type features: array<org.ga4gh.models.Feature>
-  :field nextPageToken:
-    The continuation token, which is used to page through large result sets.
-        Provide this value in a subsequent request to return the next page of
-        results. This field will be empty if there aren't any additional results.
-  :type nextPageToken: null|string
-
-  This is the response from `POST /features/search` expressed as JSON.
-
diff --git a/doc/source/schemas/sequenceAnnotations.rst b/doc/source/schemas/sequenceAnnotations.rst
deleted file mode 100644
index a50b12d3..00000000
--- a/doc/source/schemas/sequenceAnnotations.rst
+++ /dev/null
@@ -1,342 +0,0 @@
-SequenceAnnotations
-*******************
-
-This protocol defines annotations on GA4GH genomic sequences It includes two
-types of annotations: continuous and discrete hierarchical.
-
-The discrete hierarchical annotations are derived from the Sequence Ontology
-(SO) and GFF3 work 
-
-   http://www.sequenceontology.org/gff3.shtml
-
-The goal is to be able to store annotations using the GFF3 and SO conceptual
-model, although there is not necessarly a one-to-one mapping in Avro records
-to GFF3 records.
-
-The minimum requirement is to be able to accurately represent the current
-state of the art annotation data and the full SO model.  Feature is the
-core generic record which corresponds to the a GFF3 record.
-
-.. avro:enum:: Strand
-
-  :symbols: NEG_STRAND|POS_STRAND
-  Indicates the DNA strand associate for some data item.
-  * `NEG_STRAND`: The negative (-) strand.
-  * `POS_STRAND`:  The postive (+) strand.
-
-.. avro:record:: Position
-
-  :field referenceName:
-    The name of the `Reference` on which the `Position` is located.
-  :type referenceName: string
-  :field position:
-    The 0-based offset from the start of the forward strand for that `Reference`.
-      Genomic positions are non-negative integers less than `Reference` length.
-  :type position: long
-  :field strand:
-    Strand the position is associated with.
-  :type strand: Strand
-
-  A `Position` is an unoriented base in some `Reference`. A `Position` is
-  represented by a `Reference` name, and a base number on that `Reference`
-  (0-based).
-
-.. avro:record:: ExternalIdentifier
-
-  :field database:
-    The source of the identifier.
-      (e.g. `Ensembl`)
-  :type database: string
-  :field identifier:
-    The ID defined by the external database.
-      (e.g. `ENST00000000000`)
-  :type identifier: string
-  :field version:
-    The version of the object or the database
-      (e.g. `78`)
-  :type version: string
-
-  Identifier from a public database
-
-.. avro:enum:: CigarOperation
-
-  :symbols: ALIGNMENT_MATCH|INSERT|DELETE|SKIP|CLIP_SOFT|CLIP_HARD|PAD|SEQUENCE_MATCH|SEQUENCE_MISMATCH
-  An enum for the different types of CIGAR alignment operations that exist.
-  Used wherever CIGAR alignments are used. The different enumerated values
-  have the following usage:
-  
-  * `ALIGNMENT_MATCH`: An alignment match indicates that a sequence can be
-    aligned to the reference without evidence of an INDEL. Unlike the
-    `SEQUENCE_MATCH` and `SEQUENCE_MISMATCH` operators, the `ALIGNMENT_MATCH`
-    operator does not indicate whether the reference and read sequences are an
-    exact match. This operator is equivalent to SAM's `M`.
-  * `INSERT`: The insert operator indicates that the read contains evidence of
-    bases being inserted into the reference. This operator is equivalent to
-    SAM's `I`.
-  * `DELETE`: The delete operator indicates that the read contains evidence of
-    bases being deleted from the reference. This operator is equivalent to
-    SAM's `D`.
-  * `SKIP`: The skip operator indicates that this read skips a long segment of
-    the reference, but the bases have not been deleted. This operator is
-    commonly used when working with RNA-seq data, where reads may skip long
-    segments of the reference between exons. This operator is equivalent to
-    SAM's 'N'.
-  * `CLIP_SOFT`: The soft clip operator indicates that bases at the start/end
-    of a read have not been considered during alignment. This may occur if the
-    majority of a read maps, except for low quality bases at the start/end of
-    a read. This operator is equivalent to SAM's 'S'. Bases that are soft clipped
-    will still be stored in the read.
-  * `CLIP_HARD`: The hard clip operator indicates that bases at the start/end of
-    a read have been omitted from this alignment. This may occur if this linear
-    alignment is part of a chimeric alignment, or if the read has been trimmed
-    (e.g., during error correction, or to trim poly-A tails for RNA-seq). This
-    operator is equivalent to SAM's 'H'.
-  * `PAD`: The pad operator indicates that there is padding in an alignment.
-    This operator is equivalent to SAM's 'P'.
-  * `SEQUENCE_MATCH`: This operator indicates that this portion of the aligned
-    sequence exactly matches the reference (e.g., all bases are equal to the
-    reference bases). This operator is equivalent to SAM's '='.
-  * `SEQUENCE_MISMATCH`: This operator indicates that this portion of the
-    aligned sequence is an alignment match to the reference, but a sequence
-    mismatch (e.g., the bases are not equal to the reference). This can
-    indicate a SNP or a read error. This operator is equivalent to SAM's 'X'.
-
-.. avro:record:: CigarUnit
-
-  :field operation:
-    The operation type.
-  :type operation: CigarOperation
-  :field operationLength:
-    The number of bases that the operation runs for.
-  :type operationLength: long
-  :field referenceSequence:
-    `referenceSequence` is only used at mismatches (`SEQUENCE_MISMATCH`)
-      and deletions (`DELETE`). Filling this field replaces the MD tag.
-      If the relevant information is not available, leave this field as `null`.
-  :type referenceSequence: null|string
-
-  A structure for an instance of a CIGAR operation.
-  `FIXME: This belongs under Reads (only readAlignment refers to this)`
-
-.. avro:record:: OntologyTerm
-
-  :field id:
-    Ontology source identifier - the identifier, a CURIE (preferred) or
-      PURL for an ontology source e.g. http://purl.obolibrary.org/obo/hp.obo
-      It differs from the standard GA4GH schema's :ref:`id <apidesign_object_ids>`
-      in that it is a URI pointing to an information resource outside of the scope
-      of the schema or its resource implementation.
-  :type id: string
-  :field term:
-    Ontology term - the representation the id is pointing to.
-  :type term: null|string
-  :field sourceName:
-    Ontology source name - the name of ontology from which the term is obtained
-      e.g. 'Human Phenotype Ontology'
-  :type sourceName: null|string
-  :field sourceVersion:
-    Ontology source version - the version of the ontology from which the
-      OntologyTerm is obtained; e.g. 2.6.1.
-      There is no standard for ontology versioning and some frequently
-      released ontologies may use a datestamp, or build number.
-  :type sourceVersion: null|string
-
-  An ontology term describing an attribute. (e.g. the phenotype attribute
-    'polydactyly' from HPO)
-
-.. avro:record:: Experiment
-
-  :field id:
-    The experiment UUID. This is globally unique.
-  :type id: string
-  :field name:
-    The name of the experiment.
-  :type name: null|string
-  :field description:
-    A description of the experiment.
-  :type description: null|string
-  :field createDateTime:
-    The time at which this record was created. 
-      Format: :ref:`ISO 8601 <metadata_date_time>`
-  :type createDateTime: string
-  :field updateDateTime:
-    The time at which this record was last updated.
-      Format: :ref:`ISO 8601 <metadata_date_time>`
-  :type updateDateTime: string
-  :field runTime:
-    The time at which this experiment was performed.
-      Granularity here is variable (e.g. date only).
-      Format: :ref:`ISO 8601 <metadata_date_time>`
-  :type runTime: null|string
-  :field molecule:
-    The molecule examined in this experiment. (e.g. genomics DNA, total RNA)
-  :type molecule: null|string
-  :field strategy:
-    The experiment technique or strategy applied to the sample.
-      (e.g. whole genome sequencing, RNA-seq, RIP-seq)
-  :type strategy: null|string
-  :field selection:
-    The method used to enrich the target. (e.g. immunoprecipitation, size
-      fractionation, MNase digestion)
-  :type selection: null|string
-  :field library:
-    The name of the library used as part of this experiment.
-  :type library: null|string
-  :field libraryLayout:
-    The configuration of sequenced reads. (e.g. Single or Paired)
-  :type libraryLayout: null|string
-  :field instrumentModel:
-    The instrument model used as part of this experiment.
-        This maps to sequencing technology in BAM.
-  :type instrumentModel: null|string
-  :field instrumentDataFile:
-    The data file generated by the instrument.
-      TODO: This isn't actually a file is it?
-      Should this be `instrumentData` instead?
-  :type instrumentDataFile: null|string
-  :field sequencingCenter:
-    The sequencing center used as part of this experiment.
-  :type sequencingCenter: null|string
-  :field platformUnit:
-    The platform unit used as part of this experiment. This is a flowcell-barcode
-      or slide unique identifier.
-  :type platformUnit: null|string
-  :field info:
-    A map of additional experiment information.
-  :type info: map<array<string>>
-
-  An experimental preparation of a sample.
-
-.. avro:record:: Dataset
-
-  :field id:
-    The dataset's id, locally unique to the server instance.
-  :type id: string
-  :field name:
-    The name of the dataset.
-  :type name: null|string
-  :field description:
-    Additional, human-readable information on the dataset.
-  :type description: null|string
-
-  A Dataset is a collection of related data of multiple types.
-  Data providers decide how to group data into datasets.
-  See [Metadata API](../api/metadata.html) for a more detailed discussion.
-
-.. avro:record:: Analysis
-
-  :field id:
-    Formats of id | name | description | accessions are described in the
-      documentation on general attributes and formats.
-  :type id: string
-  :field name:
-  :type name: null|string
-  :field description:
-  :type description: null|string
-  :field createDateTime:
-    The time at which this record was created. 
-      Format: :ref:`ISO 8601 <metadata_date_time>`
-  :type createDateTime: null|string
-  :field updateDateTime:
-    The time at which this record was last updated.
-      Format: :ref:`ISO 8601 <metadata_date_time>`
-  :type updateDateTime: string
-  :field type:
-    The type of analysis.
-  :type type: null|string
-  :field software:
-    The software run to generate this analysis.
-  :type software: array<string>
-  :field info:
-    A map of additional analysis information.
-  :type info: map<array<string>>
-
-  An analysis contains an interpretation of one or several experiments.
-  (e.g. SNVs, copy number variations, methylation status) together with
-  information about the methodology used.
-
-.. avro:record:: Attributes
-
-  :field vals:
-  :type vals: map<array<string|ExternalIdentifier|OntologyTerm>>
-
-  Type defining a collection of attributes associated with various protocol
-    records.  Each attribute is a name that maps to an array of one or more
-    values.  Values can be strings, external identifiers, or ontology terms.
-    Values should be split into the array elements instead of using a separator
-    syntax that needs to parsed.
-
-.. avro:record:: Feature
-
-  :field id:
-    Id of this annotation node.
-  :type id: string
-  :field parentId:
-    Parent Id of this node. Set to empty string if node has no parent.
-  :type parentId: string
-  :field childIds:
-    Ordered array of Child Ids of this node.
-        Since not all child nodes are ordered by genomic coordinates,
-        this can't always be reconstructed from parentId's of the children alone.
-  :type childIds: array<string>
-  :field featureSetId:
-    Identifier for the containing feature set.
-  :type featureSetId: string
-  :field referenceName:
-    The reference on which this feature occurs.
-        (e.g. `chr20` or `X`)
-  :type referenceName: string
-  :field start:
-    The start position at which this feature occurs (0-based).
-        This corresponds to the first base of the string of reference bases.
-        Genomic positions are non-negative integers less than reference length.
-        Features spanning the join of circular genomes are represented as
-        two features one on each side of the join (position 0).
-  :type start: long
-  :field end:
-    The end position (exclusive), resulting in [start, end) closed-open interval.
-        This is typically calculated by `start + referenceBases.length`.
-  :type end: long
-  :field strand:
-    The strand on which the feature is present.
-  :type strand: Strand
-  :field featureType:
-    Feature that is annotated by this region.  Normally, this will be a term in
-        the Sequence Ontology.
-  :type featureType: OntologyTerm
-  :field attributes:
-    Name/value attributes of the annotation.  Attribute names follow the GFF3
-        naming convention of reserved names starting with an upper cases
-        character, and user-define names start with lower-case.  Most GFF3
-        pre-defined attributes apply, the exceptions are ID and Parent, which are
-        defined as fields. Additional, the following attributes are added:
-        * Score - the GFF3 score column
-        * Phase - the GFF3 phase column for CDS features.
-  :type attributes: Attributes
-
-  Node in the annotation graph that annotates a contiguous region of a
-    sequence.
-
-.. avro:record:: FeatureSet
-
-  :field id:
-    The ID of this annotation set.
-  :type id: string
-  :field datasetId:
-    The ID of the dataset this annotation set belongs to.
-  :type datasetId: null|string
-  :field referenceSetId:
-    The ID of the reference set which defines the coordinate-space for this
-        set of annotations.
-  :type referenceSetId: null|string
-  :field name:
-    The display name for this annotation set.
-  :type name: null|string
-  :field sourceURI:
-    The source URI describing the file from which this annotation set was
-        generated, if any.
-  :type sourceURI: null|string
-  :field info:
-    Remaining structured metadata key-value pairs.
-  :type info: map<array<string>>
-
diff --git a/doc/source/schemas/variantmethods.rst b/doc/source/schemas/variantmethods.rst
deleted file mode 100644
index a3641a85..00000000
--- a/doc/source/schemas/variantmethods.rst
+++ /dev/null
@@ -1,475 +0,0 @@
-VariantMethods
-**************
-
- .. function:: searchVariants(request)
-
-  :param request: SearchVariantsRequest: This request maps to the body of `POST /variants/search` as JSON.
-  :return type: SearchVariantsResponse
-  :throws: GAException
-
-Gets a list of `Variant` matching the search criteria.
-
-`POST /variants/search` must accept a JSON version of `SearchVariantsRequest`
-as the post body and will return a JSON version of `SearchVariantsResponse`.
-
- .. function:: getCallSet(id)
-
-  :param id: string: The ID of the `CallSet`.
-  :return type: org.ga4gh.models.CallSet
-  :throws: GAException
-
-Gets a `CallSet` by ID.
-`GET /callsets/{id}` will return a JSON version of `CallSet`.
-
- .. function:: searchVariantSets(request)
-
-  :param request: SearchVariantSetsRequest: This request maps to the body of `POST /variantsets/search` as JSON.
-  :return type: SearchVariantSetsResponse
-  :throws: GAException
-
-Gets a list of `VariantSet` matching the search criteria.
-
-`POST /variantsets/search` must accept a JSON version of
-`SearchVariantSetsRequest` as the post body and will return a JSON version
-of `SearchVariantSetsResponse`.
-
- .. function:: getVariantSet(id)
-
-  :param id: string: The ID of the `VariantSet`.
-  :return type: org.ga4gh.models.VariantSet
-  :throws: GAException
-
-Gets a `VariantSet` by ID.
-`GET /variantsets/{id}` will return a JSON version of `VariantSet`.
-
- .. function:: getVariant(id)
-
-  :param id: string: The ID of the `Variant`.
-  :return type: org.ga4gh.models.Variant
-  :throws: GAException
-
-Gets a `Variant` by ID.
-`GET /variants/{id}` will return a JSON version of `Variant`.
-
- .. function:: searchCallSets(request)
-
-  :param request: SearchCallSetsRequest: This request maps to the body of `POST /callsets/search` as JSON.
-  :return type: SearchCallSetsResponse
-  :throws: GAException
-
-Gets a list of `CallSet` matching the search criteria.
-
-`POST /callsets/search` must accept a JSON version of `SearchCallSetsRequest`
-as the post body and will return a JSON version of `SearchCallSetsResponse`.
-
-.. avro:error:: GAException
-
-  A general exception type.
-
-.. avro:enum:: Strand
-
-  :symbols: NEG_STRAND|POS_STRAND
-  Indicates the DNA strand associate for some data item.
-  * `NEG_STRAND`: The negative (-) strand.
-  * `POS_STRAND`:  The postive (+) strand.
-
-.. avro:record:: Position
-
-  :field referenceName:
-    The name of the `Reference` on which the `Position` is located.
-  :type referenceName: string
-  :field position:
-    The 0-based offset from the start of the forward strand for that `Reference`.
-      Genomic positions are non-negative integers less than `Reference` length.
-  :type position: long
-  :field strand:
-    Strand the position is associated with.
-  :type strand: Strand
-
-  A `Position` is an unoriented base in some `Reference`. A `Position` is
-  represented by a `Reference` name, and a base number on that `Reference`
-  (0-based).
-
-.. avro:record:: ExternalIdentifier
-
-  :field database:
-    The source of the identifier.
-      (e.g. `Ensembl`)
-  :type database: string
-  :field identifier:
-    The ID defined by the external database.
-      (e.g. `ENST00000000000`)
-  :type identifier: string
-  :field version:
-    The version of the object or the database
-      (e.g. `78`)
-  :type version: string
-
-  Identifier from a public database
-
-.. avro:enum:: CigarOperation
-
-  :symbols: ALIGNMENT_MATCH|INSERT|DELETE|SKIP|CLIP_SOFT|CLIP_HARD|PAD|SEQUENCE_MATCH|SEQUENCE_MISMATCH
-  An enum for the different types of CIGAR alignment operations that exist.
-  Used wherever CIGAR alignments are used. The different enumerated values
-  have the following usage:
-  
-  * `ALIGNMENT_MATCH`: An alignment match indicates that a sequence can be
-    aligned to the reference without evidence of an INDEL. Unlike the
-    `SEQUENCE_MATCH` and `SEQUENCE_MISMATCH` operators, the `ALIGNMENT_MATCH`
-    operator does not indicate whether the reference and read sequences are an
-    exact match. This operator is equivalent to SAM's `M`.
-  * `INSERT`: The insert operator indicates that the read contains evidence of
-    bases being inserted into the reference. This operator is equivalent to
-    SAM's `I`.
-  * `DELETE`: The delete operator indicates that the read contains evidence of
-    bases being deleted from the reference. This operator is equivalent to
-    SAM's `D`.
-  * `SKIP`: The skip operator indicates that this read skips a long segment of
-    the reference, but the bases have not been deleted. This operator is
-    commonly used when working with RNA-seq data, where reads may skip long
-    segments of the reference between exons. This operator is equivalent to
-    SAM's 'N'.
-  * `CLIP_SOFT`: The soft clip operator indicates that bases at the start/end
-    of a read have not been considered during alignment. This may occur if the
-    majority of a read maps, except for low quality bases at the start/end of
-    a read. This operator is equivalent to SAM's 'S'. Bases that are soft clipped
-    will still be stored in the read.
-  * `CLIP_HARD`: The hard clip operator indicates that bases at the start/end of
-    a read have been omitted from this alignment. This may occur if this linear
-    alignment is part of a chimeric alignment, or if the read has been trimmed
-    (e.g., during error correction, or to trim poly-A tails for RNA-seq). This
-    operator is equivalent to SAM's 'H'.
-  * `PAD`: The pad operator indicates that there is padding in an alignment.
-    This operator is equivalent to SAM's 'P'.
-  * `SEQUENCE_MATCH`: This operator indicates that this portion of the aligned
-    sequence exactly matches the reference (e.g., all bases are equal to the
-    reference bases). This operator is equivalent to SAM's '='.
-  * `SEQUENCE_MISMATCH`: This operator indicates that this portion of the
-    aligned sequence is an alignment match to the reference, but a sequence
-    mismatch (e.g., the bases are not equal to the reference). This can
-    indicate a SNP or a read error. This operator is equivalent to SAM's 'X'.
-
-.. avro:record:: CigarUnit
-
-  :field operation:
-    The operation type.
-  :type operation: CigarOperation
-  :field operationLength:
-    The number of bases that the operation runs for.
-  :type operationLength: long
-  :field referenceSequence:
-    `referenceSequence` is only used at mismatches (`SEQUENCE_MISMATCH`)
-      and deletions (`DELETE`). Filling this field replaces the MD tag.
-      If the relevant information is not available, leave this field as `null`.
-  :type referenceSequence: null|string
-
-  A structure for an instance of a CIGAR operation.
-  `FIXME: This belongs under Reads (only readAlignment refers to this)`
-
-.. avro:record:: VariantSetMetadata
-
-  :field key:
-    The top-level key.
-  :type key: string
-  :field value:
-    The value field for simple metadata.
-  :type value: string
-  :field id:
-    User-provided ID field, not enforced by this API.
-      Two or more pieces of structured metadata with identical
-      id and key fields are considered equivalent.
-      `FIXME: If it's not enforced, then why can't it be null?`
-  :type id: string
-  :field type:
-    The type of data.
-  :type type: string
-  :field number:
-    The number of values that can be included in a field described by this
-      metadata.
-  :type number: string
-  :field description:
-    A textual description of this metadata.
-  :type description: string
-  :field info:
-    Remaining structured metadata key-value pairs.
-  :type info: map<array<string>>
-
-  Optional metadata associated with a variant set.
-
-.. avro:record:: VariantSet
-
-  :field id:
-    The variant set ID.
-  :type id: string
-  :field name:
-    The variant set name.
-  :type name: null|string
-  :field datasetId:
-    The ID of the dataset this variant set belongs to.
-  :type datasetId: string
-  :field referenceSetId:
-    The ID of the reference set that describes the sequences used by the variants in this set.
-  :type referenceSetId: string
-  :field metadata:
-    Optional metadata associated with this variant set.
-      This array can be used to store information about the variant set, such as information found
-      in VCF header fields, that isn't already available in first class fields such as "name".
-  :type metadata: array<VariantSetMetadata>
-
-  A VariantSet is a collection of variants and variant calls intended to be analyzed together.
-
-.. avro:record:: CallSet
-
-  :field id:
-    The call set ID.
-  :type id: string
-  :field name:
-    The call set name.
-  :type name: null|string
-  :field sampleId:
-    The sample this call set's data was generated from.
-      Note: the current API does not have a rigorous definition of sample. Therefore, this
-      field actually contains an arbitrary string, typically corresponding to the sampleId
-      field in the read groups used to generate this call set.
-  :type sampleId: null|string
-  :field variantSetIds:
-    The IDs of the variant sets this call set has calls in.
-  :type variantSetIds: array<string>
-  :field created:
-    The date this call set was created in milliseconds from the epoch.
-  :type created: null|long
-  :field updated:
-    The time at which this call set was last updated in
-      milliseconds from the epoch.
-  :type updated: null|long
-  :field info:
-    A map of additional call set information.
-  :type info: map<array<string>>
-
-  A CallSet is a collection of calls that were generated by the same analysis of the same sample.
-
-.. avro:record:: Call
-
-  :field callSetName:
-    The name of the call set this variant call belongs to.
-      If this field is not present, the ordering of the call sets from a
-      `SearchCallSetsRequest` over this `VariantSet` is guaranteed to match
-      the ordering of the calls on this `Variant`.
-      The number of results will also be the same.
-  :type callSetName: null|string
-  :field callSetId:
-    The ID of the call set this variant call belongs to.
-    
-      If this field is not present, the ordering of the call sets from a
-      `SearchCallSetsRequest` over this `VariantSet` is guaranteed to match
-      the ordering of the calls on this `Variant`.
-      The number of results will also be the same.
-  :type callSetId: null|string
-  :field genotype:
-    The genotype of this variant call.
-    
-      A 0 value represents the reference allele of the associated `Variant`. Any
-      other value is a 1-based index into the alternate alleles of the associated
-      `Variant`.
-    
-      If a variant had a referenceBases field of "T", an alternateBases
-      value of ["A", "C"], and the genotype was [2, 1], that would mean the call
-      represented the heterozygous value "CA" for this variant. If the genotype
-      was instead [0, 1] the represented value would be "TA". Ordering of the
-      genotype values is important if the phaseset field is present.
-  :type genotype: array<int>
-  :field phaseset:
-    If this field is not null, this variant call's genotype ordering implies
-      the phase of the bases and is consistent with any other variant calls on
-      the same contig which have the same phaseset string.
-  :type phaseset: null|string
-  :field genotypeLikelihood:
-    The genotype likelihoods for this variant call. Each array entry
-      represents how likely a specific genotype is for this call as
-      log10(P(data | genotype)), analogous to the GL tag in the VCF spec. The
-      value ordering is defined by the GL tag in the VCF spec.
-  :type genotypeLikelihood: array<double>
-  :field info:
-    A map of additional variant call information.
-  :type info: map<array<string>>
-
-  A `Call` represents the determination of genotype with respect to a
-  particular `Variant`.
-  
-  It may include associated information such as quality
-  and phasing. For example, a call might assign a probability of 0.32 to
-  the occurrence of a SNP named rs1234 in a call set with the name NA12345.
-
-.. avro:record:: Variant
-
-  :field id:
-    The variant ID.
-  :type id: string
-  :field variantSetId:
-    The ID of the `VariantSet` this variant belongs to. This transitively defines
-      the `ReferenceSet` against which the `Variant` is to be interpreted.
-  :type variantSetId: string
-  :field names:
-    Names for the variant, for example a RefSNP ID.
-  :type names: array<string>
-  :field created:
-    The date this variant was created in milliseconds from the epoch.
-  :type created: null|long
-  :field updated:
-    The time at which this variant was last updated in
-      milliseconds from the epoch.
-  :type updated: null|long
-  :field referenceName:
-    The reference on which this variant occurs.
-      (e.g. `chr20` or `X`)
-  :type referenceName: string
-  :field start:
-    The start position at which this variant occurs (0-based).
-      This corresponds to the first base of the string of reference bases.
-      Genomic positions are non-negative integers less than reference length.
-      Variants spanning the join of circular genomes are represented as
-      two variants one on each side of the join (position 0).
-  :type start: long
-  :field end:
-    The end position (exclusive), resulting in [start, end) closed-open interval.
-      This is typically calculated by `start + referenceBases.length`.
-  :type end: long
-  :field referenceBases:
-    The reference bases for this variant. They start at the given start position.
-  :type referenceBases: string
-  :field alternateBases:
-    The bases that appear instead of the reference bases. Multiple alternate
-      alleles are possible.
-  :type alternateBases: array<string>
-  :field info:
-    A map of additional variant information.
-  :type info: map<array<string>>
-  :field calls:
-    The variant calls for this particular variant. Each one represents the
-      determination of genotype with respect to this variant. `Call`s in this array
-      are implicitly associated with this `Variant`.
-  :type calls: array<Call>
-
-  A `Variant` represents a change in DNA sequence relative to some reference.
-  For example, a variant could represent a SNP or an insertion.
-  Variants belong to a `VariantSet`.
-  This is equivalent to a row in VCF.
-
-.. avro:record:: SearchVariantSetsRequest
-
-  :field datasetId:
-    The `Dataset` to search.
-  :type datasetId: string
-  :field pageSize:
-    Specifies the maximum number of results to return in a single page.
-      If unspecified, a system default will be used.
-  :type pageSize: null|int
-  :field pageToken:
-    The continuation token, which is used to page through large result sets.
-      To get the next page of results, set this parameter to the value of
-      `nextPageToken` from the previous response.
-  :type pageToken: null|string
-
-  This request maps to the body of `POST /variantsets/search` as JSON.
-
-.. avro:record:: SearchVariantSetsResponse
-
-  :field variantSets:
-    The list of matching variant sets.
-  :type variantSets: array<org.ga4gh.models.VariantSet>
-  :field nextPageToken:
-    The continuation token, which is used to page through large result sets.
-      Provide this value in a subsequent request to return the next page of
-      results. This field will be empty if there aren't any additional results.
-  :type nextPageToken: null|string
-
-  This is the response from `POST /variantsets/search` expressed as JSON.
-
-.. avro:record:: SearchVariantsRequest
-
-  :field variantSetId:
-    The `VariantSet` to search.
-  :type variantSetId: string
-  :field callSetIds:
-    Only return variant calls which belong to call sets with these IDs.
-      If an empty array, returns variants without any call objects.
-      If null, returns all variant calls.
-  :type callSetIds: null|array<string>
-  :field referenceName:
-    Required. Only return variants on this reference.
-  :type referenceName: string
-  :field start:
-    Required. The beginning of the window (0-based, inclusive) for
-      which overlapping variants should be returned.
-      Genomic positions are non-negative integers less than reference length.
-      Requests spanning the join of circular genomes are represented as
-      two requests one on each side of the join (position 0).
-  :type start: long
-  :field end:
-    Required. The end of the window (0-based, exclusive) for which overlapping
-      variants should be returned.
-  :type end: long
-  :field pageSize:
-    Specifies the maximum number of results to return in a single page.
-      If unspecified, a system default will be used.
-  :type pageSize: null|int
-  :field pageToken:
-    The continuation token, which is used to page through large result sets.
-      To get the next page of results, set this parameter to the value of
-      `nextPageToken` from the previous response.
-  :type pageToken: null|string
-
-  This request maps to the body of `POST /variants/search` as JSON.
-
-.. avro:record:: SearchVariantsResponse
-
-  :field variants:
-    The list of matching variants.
-      If the `callSetId` field on the returned calls is not present,
-      the ordering of the call sets from a `SearchCallSetsRequest`
-      over the parent `VariantSet` is guaranteed to match the ordering
-      of the calls on each `Variant`. The number of results will also be
-      the same.
-  :type variants: array<org.ga4gh.models.Variant>
-  :field nextPageToken:
-    The continuation token, which is used to page through large result sets.
-      Provide this value in a subsequent request to return the next page of
-      results. This field will be empty if there aren't any additional results.
-  :type nextPageToken: null|string
-
-  This is the response from `POST /variants/search` expressed as JSON.
-
-.. avro:record:: SearchCallSetsRequest
-
-  :field variantSetId:
-    The VariantSet to search.
-  :type variantSetId: string
-  :field name:
-    Only return call sets with this name (case-sensitive, exact match).
-  :type name: null|string
-  :field pageSize:
-    Specifies the maximum number of results to return in a single page.
-      If unspecified, a system default will be used.
-  :type pageSize: null|int
-  :field pageToken:
-    The continuation token, which is used to page through large result sets.
-      To get the next page of results, set this parameter to the value of
-      `nextPageToken` from the previous response.
-  :type pageToken: null|string
-
-  This request maps to the body of `POST /callsets/search` as JSON.
-
-.. avro:record:: SearchCallSetsResponse
-
-  :field callSets:
-    The list of matching call sets.
-  :type callSets: array<org.ga4gh.models.CallSet>
-  :field nextPageToken:
-    The continuation token, which is used to page through large result sets.
-      Provide this value in a subsequent request to return the next page of
-      results. This field will be empty if there aren't any additional results.
-  :type nextPageToken: null|string
-
-  This is the response from `POST /callsets/search` expressed as JSON.
-
diff --git a/doc/source/schemas/variants.rst b/doc/source/schemas/variants.rst
deleted file mode 100644
index f51ccd9c..00000000
--- a/doc/source/schemas/variants.rst
+++ /dev/null
@@ -1,297 +0,0 @@
-Variants
-********
-
-This file defines the objects used to represent variant calls, most importantly
-VariantSet, Variant, and Call.
-See {TODO: LINK TO VARIANTS OVERVIEW} for more information.
-
-.. avro:enum:: Strand
-
-  :symbols: NEG_STRAND|POS_STRAND
-  Indicates the DNA strand associate for some data item.
-  * `NEG_STRAND`: The negative (-) strand.
-  * `POS_STRAND`:  The postive (+) strand.
-
-.. avro:record:: Position
-
-  :field referenceName:
-    The name of the `Reference` on which the `Position` is located.
-  :type referenceName: string
-  :field position:
-    The 0-based offset from the start of the forward strand for that `Reference`.
-      Genomic positions are non-negative integers less than `Reference` length.
-  :type position: long
-  :field strand:
-    Strand the position is associated with.
-  :type strand: Strand
-
-  A `Position` is an unoriented base in some `Reference`. A `Position` is
-  represented by a `Reference` name, and a base number on that `Reference`
-  (0-based).
-
-.. avro:record:: ExternalIdentifier
-
-  :field database:
-    The source of the identifier.
-      (e.g. `Ensembl`)
-  :type database: string
-  :field identifier:
-    The ID defined by the external database.
-      (e.g. `ENST00000000000`)
-  :type identifier: string
-  :field version:
-    The version of the object or the database
-      (e.g. `78`)
-  :type version: string
-
-  Identifier from a public database
-
-.. avro:enum:: CigarOperation
-
-  :symbols: ALIGNMENT_MATCH|INSERT|DELETE|SKIP|CLIP_SOFT|CLIP_HARD|PAD|SEQUENCE_MATCH|SEQUENCE_MISMATCH
-  An enum for the different types of CIGAR alignment operations that exist.
-  Used wherever CIGAR alignments are used. The different enumerated values
-  have the following usage:
-  
-  * `ALIGNMENT_MATCH`: An alignment match indicates that a sequence can be
-    aligned to the reference without evidence of an INDEL. Unlike the
-    `SEQUENCE_MATCH` and `SEQUENCE_MISMATCH` operators, the `ALIGNMENT_MATCH`
-    operator does not indicate whether the reference and read sequences are an
-    exact match. This operator is equivalent to SAM's `M`.
-  * `INSERT`: The insert operator indicates that the read contains evidence of
-    bases being inserted into the reference. This operator is equivalent to
-    SAM's `I`.
-  * `DELETE`: The delete operator indicates that the read contains evidence of
-    bases being deleted from the reference. This operator is equivalent to
-    SAM's `D`.
-  * `SKIP`: The skip operator indicates that this read skips a long segment of
-    the reference, but the bases have not been deleted. This operator is
-    commonly used when working with RNA-seq data, where reads may skip long
-    segments of the reference between exons. This operator is equivalent to
-    SAM's 'N'.
-  * `CLIP_SOFT`: The soft clip operator indicates that bases at the start/end
-    of a read have not been considered during alignment. This may occur if the
-    majority of a read maps, except for low quality bases at the start/end of
-    a read. This operator is equivalent to SAM's 'S'. Bases that are soft clipped
-    will still be stored in the read.
-  * `CLIP_HARD`: The hard clip operator indicates that bases at the start/end of
-    a read have been omitted from this alignment. This may occur if this linear
-    alignment is part of a chimeric alignment, or if the read has been trimmed
-    (e.g., during error correction, or to trim poly-A tails for RNA-seq). This
-    operator is equivalent to SAM's 'H'.
-  * `PAD`: The pad operator indicates that there is padding in an alignment.
-    This operator is equivalent to SAM's 'P'.
-  * `SEQUENCE_MATCH`: This operator indicates that this portion of the aligned
-    sequence exactly matches the reference (e.g., all bases are equal to the
-    reference bases). This operator is equivalent to SAM's '='.
-  * `SEQUENCE_MISMATCH`: This operator indicates that this portion of the
-    aligned sequence is an alignment match to the reference, but a sequence
-    mismatch (e.g., the bases are not equal to the reference). This can
-    indicate a SNP or a read error. This operator is equivalent to SAM's 'X'.
-
-.. avro:record:: CigarUnit
-
-  :field operation:
-    The operation type.
-  :type operation: CigarOperation
-  :field operationLength:
-    The number of bases that the operation runs for.
-  :type operationLength: long
-  :field referenceSequence:
-    `referenceSequence` is only used at mismatches (`SEQUENCE_MISMATCH`)
-      and deletions (`DELETE`). Filling this field replaces the MD tag.
-      If the relevant information is not available, leave this field as `null`.
-  :type referenceSequence: null|string
-
-  A structure for an instance of a CIGAR operation.
-  `FIXME: This belongs under Reads (only readAlignment refers to this)`
-
-.. avro:record:: VariantSetMetadata
-
-  :field key:
-    The top-level key.
-  :type key: string
-  :field value:
-    The value field for simple metadata.
-  :type value: string
-  :field id:
-    User-provided ID field, not enforced by this API.
-      Two or more pieces of structured metadata with identical
-      id and key fields are considered equivalent.
-      `FIXME: If it's not enforced, then why can't it be null?`
-  :type id: string
-  :field type:
-    The type of data.
-  :type type: string
-  :field number:
-    The number of values that can be included in a field described by this
-      metadata.
-  :type number: string
-  :field description:
-    A textual description of this metadata.
-  :type description: string
-  :field info:
-    Remaining structured metadata key-value pairs.
-  :type info: map<array<string>>
-
-  Optional metadata associated with a variant set.
-
-.. avro:record:: VariantSet
-
-  :field id:
-    The variant set ID.
-  :type id: string
-  :field name:
-    The variant set name.
-  :type name: null|string
-  :field datasetId:
-    The ID of the dataset this variant set belongs to.
-  :type datasetId: string
-  :field referenceSetId:
-    The ID of the reference set that describes the sequences used by the variants in this set.
-  :type referenceSetId: string
-  :field metadata:
-    Optional metadata associated with this variant set.
-      This array can be used to store information about the variant set, such as information found
-      in VCF header fields, that isn't already available in first class fields such as "name".
-  :type metadata: array<VariantSetMetadata>
-
-  A VariantSet is a collection of variants and variant calls intended to be analyzed together.
-
-.. avro:record:: CallSet
-
-  :field id:
-    The call set ID.
-  :type id: string
-  :field name:
-    The call set name.
-  :type name: null|string
-  :field sampleId:
-    The sample this call set's data was generated from.
-      Note: the current API does not have a rigorous definition of sample. Therefore, this
-      field actually contains an arbitrary string, typically corresponding to the sampleId
-      field in the read groups used to generate this call set.
-  :type sampleId: null|string
-  :field variantSetIds:
-    The IDs of the variant sets this call set has calls in.
-  :type variantSetIds: array<string>
-  :field created:
-    The date this call set was created in milliseconds from the epoch.
-  :type created: null|long
-  :field updated:
-    The time at which this call set was last updated in
-      milliseconds from the epoch.
-  :type updated: null|long
-  :field info:
-    A map of additional call set information.
-  :type info: map<array<string>>
-
-  A CallSet is a collection of calls that were generated by the same analysis of the same sample.
-
-.. avro:record:: Call
-
-  :field callSetName:
-    The name of the call set this variant call belongs to.
-      If this field is not present, the ordering of the call sets from a
-      `SearchCallSetsRequest` over this `VariantSet` is guaranteed to match
-      the ordering of the calls on this `Variant`.
-      The number of results will also be the same.
-  :type callSetName: null|string
-  :field callSetId:
-    The ID of the call set this variant call belongs to.
-    
-      If this field is not present, the ordering of the call sets from a
-      `SearchCallSetsRequest` over this `VariantSet` is guaranteed to match
-      the ordering of the calls on this `Variant`.
-      The number of results will also be the same.
-  :type callSetId: null|string
-  :field genotype:
-    The genotype of this variant call.
-    
-      A 0 value represents the reference allele of the associated `Variant`. Any
-      other value is a 1-based index into the alternate alleles of the associated
-      `Variant`.
-    
-      If a variant had a referenceBases field of "T", an alternateBases
-      value of ["A", "C"], and the genotype was [2, 1], that would mean the call
-      represented the heterozygous value "CA" for this variant. If the genotype
-      was instead [0, 1] the represented value would be "TA". Ordering of the
-      genotype values is important if the phaseset field is present.
-  :type genotype: array<int>
-  :field phaseset:
-    If this field is not null, this variant call's genotype ordering implies
-      the phase of the bases and is consistent with any other variant calls on
-      the same contig which have the same phaseset string.
-  :type phaseset: null|string
-  :field genotypeLikelihood:
-    The genotype likelihoods for this variant call. Each array entry
-      represents how likely a specific genotype is for this call as
-      log10(P(data | genotype)), analogous to the GL tag in the VCF spec. The
-      value ordering is defined by the GL tag in the VCF spec.
-  :type genotypeLikelihood: array<double>
-  :field info:
-    A map of additional variant call information.
-  :type info: map<array<string>>
-
-  A `Call` represents the determination of genotype with respect to a
-  particular `Variant`.
-  
-  It may include associated information such as quality
-  and phasing. For example, a call might assign a probability of 0.32 to
-  the occurrence of a SNP named rs1234 in a call set with the name NA12345.
-
-.. avro:record:: Variant
-
-  :field id:
-    The variant ID.
-  :type id: string
-  :field variantSetId:
-    The ID of the `VariantSet` this variant belongs to. This transitively defines
-      the `ReferenceSet` against which the `Variant` is to be interpreted.
-  :type variantSetId: string
-  :field names:
-    Names for the variant, for example a RefSNP ID.
-  :type names: array<string>
-  :field created:
-    The date this variant was created in milliseconds from the epoch.
-  :type created: null|long
-  :field updated:
-    The time at which this variant was last updated in
-      milliseconds from the epoch.
-  :type updated: null|long
-  :field referenceName:
-    The reference on which this variant occurs.
-      (e.g. `chr20` or `X`)
-  :type referenceName: string
-  :field start:
-    The start position at which this variant occurs (0-based).
-      This corresponds to the first base of the string of reference bases.
-      Genomic positions are non-negative integers less than reference length.
-      Variants spanning the join of circular genomes are represented as
-      two variants one on each side of the join (position 0).
-  :type start: long
-  :field end:
-    The end position (exclusive), resulting in [start, end) closed-open interval.
-      This is typically calculated by `start + referenceBases.length`.
-  :type end: long
-  :field referenceBases:
-    The reference bases for this variant. They start at the given start position.
-  :type referenceBases: string
-  :field alternateBases:
-    The bases that appear instead of the reference bases. Multiple alternate
-      alleles are possible.
-  :type alternateBases: array<string>
-  :field info:
-    A map of additional variant information.
-  :type info: map<array<string>>
-  :field calls:
-    The variant calls for this particular variant. Each one represents the
-      determination of genotype with respect to this variant. `Call`s in this array
-      are implicitly associated with this `Variant`.
-  :type calls: array<Call>
-
-  A `Variant` represents a change in DNA sequence relative to some reference.
-  For example, a variant could represent a SNP or an insertion.
-  Variants belong to a `VariantSet`.
-  This is equivalent to a row in VCF.
-
diff --git a/tools/sphinx/avpr2rest.py b/tools/sphinx/avpr2rest.py
index 143fd018..0f0df1c4 100644
--- a/tools/sphinx/avpr2rest.py
+++ b/tools/sphinx/avpr2rest.py
@@ -56,16 +56,13 @@ def cleanup_doc(doc,indent=0):
       # process formal parameters ('request')
       request = message_def['request']
       # collect the names
-      param_names = []
-      for param in request:
-        param_names.append(param['name'])
+
       response = message_def['response']
       errors = message_def['errors']
       output += " .. function:: %s(%s)\n\n" % (message_name,
-                                               ', '.join(param_names))
-      for param in request:
-        output += "  :param %s: %s: %s\n" % (param['name'], param['type'],
-                                             param['doc'])
+                                               ', '.join([request['name']]))
+      output += "  :param %s: %s: %s\n" % (request['name'], request['type'],
+                                           request['doc'])
       output += "  :return type: %s\n" % response
       output += "  :throws: %s\n\n" % ', '.join(errors)
       output += cleanup_doc(doc)
@@ -74,7 +71,7 @@ def cleanup_doc(doc,indent=0):
     for item in data['types']:
       output += '.. avro:%s:: %s\n\n' % (item['type'], item['name'])
 
-      if item['type'] == 'record':
+      if item['type'] == 'message':
         for field in item['fields']:
           output += '  :field %s:\n' % field['name']
           if 'doc' in field:
@@ -142,16 +139,14 @@ def typename(typeobject):
       # process formal parameters ('request')
       request = message_def['request']
       # collect the names
-      param_names = []
-      for param in request:
-        param_names.append(param['name'])
+      param_names = [request['name']]
       response = message_def['response']
       errors = message_def['errors']
       output += " .. function:: %s(%s)\n\n" % (message_name,
                                                ', '.join(param_names))
       for param in request:
-        output += "  :param %s: %s: %s\n" % (param['name'], param['type'],
-                                             param['doc'])
+        output += "  :param %s: %s: %s\n" % (request['name'], request['type'],
+                                             request['doc'])
       output += "  :return type: %s\n" % response
       output += "  :throws: %s\n\n" % ', '.join(errors)
       output += cleanup_doc(doc)
@@ -160,7 +155,7 @@ def typename(typeobject):
     for item in data['types']:
       output += '.. avro:%s:: %s\n\n' % (item['type'], item['name'])
 
-      if item['type'] == 'record':
+      if item['type'] == 'message':
         for field in item['fields']:
           output += '  :field %s:\n' % field['name']
           if 'doc' in field:
diff --git a/tools/sphinx/avrodomain.py b/tools/sphinx/avrodomain.py
index f0453541..23a9c238 100644
--- a/tools/sphinx/avrodomain.py
+++ b/tools/sphinx/avrodomain.py
@@ -71,7 +71,7 @@ def get_index_text(self,name):
       return _('%s (Avro fixed-width value)') % name
     if self.objtype == 'enum':
       return _('%s (Avro enum)') % name
-    if self.objtype == 'record':
+    if self.objtype == 'message':
       return _('%s (Avro record)') % name
     if self.objtype == 'error':
       return _('%s (Avro error)') % name
@@ -109,12 +109,12 @@ class AvroEnum(AvroObject):
   ]
 
 class AvroRecord(AvroObject):
-  prefix = 'record'
+  prefix = 'message'
   doc_field_types = [
     TypedField('fields', label=l_('Fields'),
                names=('field','member'),
                typenames=('type',),
-               typerolename='record')
+               typerolename='message')
   ]
 
 class AvroError(AvroRecord):
@@ -139,7 +139,7 @@ class AvroDomain(Domain):
   object_types = {
     'fixed':  ObjType(l_('fixed'),  'fixed'),
     'enum':   ObjType(l_('enum'),   'enum'),
-    'record': ObjType(l_('record'), 'record'),
+    'message': ObjType(l_('message'), 'message'),
     'error':  ObjType(l_('error'),  'error'),
     'rpc':    ObjType(l_('rpc'),    'rpc'),
   }
@@ -147,7 +147,7 @@ class AvroDomain(Domain):
   directives = {
     'fixed':  AvroFixedField,
     'enum':   AvroEnum,
-    'record': AvroRecord,
+    'message': AvroRecord,
     'error':  AvroError,
     'rpc':    AvroRPCMessage
   }
@@ -155,7 +155,7 @@ class AvroDomain(Domain):
   roles = {
     'fixed':  XRefRole(),
     'enum':   XRefRole(),
-    'record': XRefRole(),
+    'message': XRefRole(),
     'error':  XRefRole(),
     'rpc':    XRefRole()
   }
diff --git a/tools/sphinx/protobuf-json-docs.py b/tools/sphinx/protobuf-json-docs.py
index 2974d1d9..f2943888 100755
--- a/tools/sphinx/protobuf-json-docs.py
+++ b/tools/sphinx/protobuf-json-docs.py
@@ -80,9 +80,6 @@ def _traverse(package, items, tree):
                     for nested_item in _traverse(nested, nested_package):
                         yield nested_item, nested_package
 
-    import pprint
-    open("dump", "w").write(pprint.pformat(proto_file.source_code_info))
-
     tree = collections.defaultdict(collections.defaultdict)
     for loc in proto_file.source_code_info.location:
         if loc.leading_comments or loc.trailing_comments:
@@ -165,6 +162,7 @@ def generate_code(request, response):
                         "request": {
                             "name": "request",
                             "type": m.input_type[1:],
+                            "doc": ''
                         },
                         "response": m.output_type[1:],
                         "errors" : [ "GAException" ]

From ed3f56591119e91478352a3217be15c36d8f52a4 Mon Sep 17 00:00:00 2001
From: Tom Parker <palfrey@tevp.net>
Date: Tue, 14 Jun 2016 17:26:58 +0100
Subject: [PATCH 07/40] Request args for services should be a list

---
 tools/sphinx/protobuf-json-docs.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/sphinx/protobuf-json-docs.py b/tools/sphinx/protobuf-json-docs.py
index f2943888..e5bf7940 100755
--- a/tools/sphinx/protobuf-json-docs.py
+++ b/tools/sphinx/protobuf-json-docs.py
@@ -159,11 +159,11 @@ def generate_code(request, response):
                 for m in item.method:
                     messages[m.name] = {
                         "doc": m.comment,
-                        "request": {
+                        "request": [{
                             "name": "request",
                             "type": m.input_type[1:],
                             "doc": ''
-                        },
+                        }],
                         "response": m.output_type[1:],
                         "errors" : [ "GAException" ]
                     }

From bf975a72c369212988a3a96d2e18c9598c9abac1 Mon Sep 17 00:00:00 2001
From: Irene Papakonstantinou <irene@lshift.net>
Date: Tue, 14 Jun 2016 17:39:07 +0100
Subject: [PATCH 08/40] Undo previous change to avpr2rest.py now that request
 args is a list

---
 tools/sphinx/avpr2rest.py | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/tools/sphinx/avpr2rest.py b/tools/sphinx/avpr2rest.py
index 0f0df1c4..4cb86e0d 100644
--- a/tools/sphinx/avpr2rest.py
+++ b/tools/sphinx/avpr2rest.py
@@ -56,13 +56,16 @@ def cleanup_doc(doc,indent=0):
       # process formal parameters ('request')
       request = message_def['request']
       # collect the names
-
+      param_names = []
+      for param in request:
+        param_names.append(param['name'])
       response = message_def['response']
       errors = message_def['errors']
       output += " .. function:: %s(%s)\n\n" % (message_name,
-                                               ', '.join([request['name']]))
-      output += "  :param %s: %s: %s\n" % (request['name'], request['type'],
-                                           request['doc'])
+                                               ', '.join(param_names))
+      for param in request:
+        output += "  :param %s: %s: %s\n" % (param['name'], param['type'],
+                                             param['doc'])
       output += "  :return type: %s\n" % response
       output += "  :throws: %s\n\n" % ', '.join(errors)
       output += cleanup_doc(doc)
@@ -139,14 +142,16 @@ def typename(typeobject):
       # process formal parameters ('request')
       request = message_def['request']
       # collect the names
-      param_names = [request['name']]
+      param_names = []
+      for param in request:
+        param_names.append(param['name'])
       response = message_def['response']
       errors = message_def['errors']
       output += " .. function:: %s(%s)\n\n" % (message_name,
                                                ', '.join(param_names))
       for param in request:
-        output += "  :param %s: %s: %s\n" % (request['name'], request['type'],
-                                             request['doc'])
+        output += "  :param %s: %s: %s\n" % (param['name'], param['type'],
+                                             param['doc'])
       output += "  :return type: %s\n" % response
       output += "  :throws: %s\n\n" % ', '.join(errors)
       output += cleanup_doc(doc)

From 7c7979487ada1be6a4b57fa52c215e7c5bb54c38 Mon Sep 17 00:00:00 2001
From: Irene Papakonstantinou <irene@lshift.net>
Date: Tue, 14 Jun 2016 18:44:57 +0100
Subject: [PATCH 09/40] Show the bullet list for enum values docs

---
 tools/sphinx/protobuf-json-docs.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/sphinx/protobuf-json-docs.py b/tools/sphinx/protobuf-json-docs.py
index e5bf7940..a6792505 100755
--- a/tools/sphinx/protobuf-json-docs.py
+++ b/tools/sphinx/protobuf-json-docs.py
@@ -153,7 +153,7 @@ def generate_code(request, response):
                     'type': 'enum',
                     'symbols': [v.name for v in item.value]
                 })
-                data["doc"] += " ".join(comments)
+                data["doc"] += "\n" + " ".join(comments)
                 types.append(data)
             elif item.kind == ServiceDescriptorProto:
                 for m in item.method:

From 883982d1ae2aed74dc2ebc23b73ddaa487d3cbb9 Mon Sep 17 00:00:00 2001
From: Tom Parker <palfrey@tevp.net>
Date: Wed, 15 Jun 2016 11:06:13 +0100
Subject: [PATCH 10/40] Add Conda environment and RTD config

---
 environment.yml | 17 +++++++++++++++++
 readthedocs.yml |  2 ++
 2 files changed, 19 insertions(+)
 create mode 100644 environment.yml
 create mode 100644 readthedocs.yml

diff --git a/environment.yml b/environment.yml
new file mode 100644
index 00000000..edf5ea0b
--- /dev/null
+++ b/environment.yml
@@ -0,0 +1,17 @@
+name: ga4gh
+dependencies:
+- ioos::protobuf=3.0.0b2.post2=py27_3
+- openssl=1.0.2h=1
+- pip=8.1.2=py27_0
+- python=2.7.11=0
+- readline=6.2=2
+- setuptools=23.0.0=py27_0
+- six=1.10.0=py27_0
+- sqlite=3.13.0=0
+- tk=8.5.18=0
+- wheel=0.29.0=py27_0
+- zlib=1.2.8=3
+- pip:
+  - protobuf==3.0.0b2
+prefix: /home/palfrey/.miniconda2/envs/ga4gh
+
diff --git a/readthedocs.yml b/readthedocs.yml
new file mode 100644
index 00000000..5d3b36c7
--- /dev/null
+++ b/readthedocs.yml
@@ -0,0 +1,2 @@
+conda:
+    file: environment.yml

From cc632f8906234522caa99e8d0089a821bb90f12f Mon Sep 17 00:00:00 2001
From: Tom Parker <palfrey@tevp.net>
Date: Wed, 15 Jun 2016 11:08:57 +0100
Subject: [PATCH 11/40] Remove name from environment.yml

---
 environment.yml | 2 --
 1 file changed, 2 deletions(-)

diff --git a/environment.yml b/environment.yml
index edf5ea0b..850c674a 100644
--- a/environment.yml
+++ b/environment.yml
@@ -1,4 +1,3 @@
-name: ga4gh
 dependencies:
 - ioos::protobuf=3.0.0b2.post2=py27_3
 - openssl=1.0.2h=1
@@ -14,4 +13,3 @@ dependencies:
 - pip:
   - protobuf==3.0.0b2
 prefix: /home/palfrey/.miniconda2/envs/ga4gh
-

From d71d6236d526f6a2346cdb474a18c10870859479 Mon Sep 17 00:00:00 2001
From: Tom Parker <palfrey@tevp.net>
Date: Wed, 15 Jun 2016 11:15:17 +0100
Subject: [PATCH 12/40] Correct channel data for Conda (split out "ioos")

---
 environment.yml | 3 +--
 readthedocs.yml | 2 ++
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/environment.yml b/environment.yml
index 850c674a..69cab5a9 100644
--- a/environment.yml
+++ b/environment.yml
@@ -1,5 +1,5 @@
 dependencies:
-- ioos::protobuf=3.0.0b2.post2=py27_3
+- protobuf=3.0.0b2.post2=py27_3
 - openssl=1.0.2h=1
 - pip=8.1.2=py27_0
 - python=2.7.11=0
@@ -12,4 +12,3 @@ dependencies:
 - zlib=1.2.8=3
 - pip:
   - protobuf==3.0.0b2
-prefix: /home/palfrey/.miniconda2/envs/ga4gh
diff --git a/readthedocs.yml b/readthedocs.yml
index 5d3b36c7..b259c80c 100644
--- a/readthedocs.yml
+++ b/readthedocs.yml
@@ -1,2 +1,4 @@
 conda:
     file: environment.yml
+     channels:
+         - ioos

From 6a5bef3c381b0a0d198473d80bc063187c987e39 Mon Sep 17 00:00:00 2001
From: Tom Parker <palfrey@tevp.net>
Date: Wed, 15 Jun 2016 11:17:50 +0100
Subject: [PATCH 13/40] Fix spacing issues in readthedocs.yml

---
 readthedocs.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/readthedocs.yml b/readthedocs.yml
index b259c80c..997c5553 100644
--- a/readthedocs.yml
+++ b/readthedocs.yml
@@ -1,4 +1,4 @@
 conda:
-    file: environment.yml
-     channels:
-         - ioos
+  file: environment.yml
+  channels:
+    - ioos

From bb70a42dc125aa998ca83eaf46eead519479f9e8 Mon Sep 17 00:00:00 2001
From: Tom Parker <palfrey@tevp.net>
Date: Wed, 15 Jun 2016 11:19:23 +0100
Subject: [PATCH 14/40] Conda channels need to be in conda environment, not RTD
 config

---
 environment.yml | 4 ++--
 readthedocs.yml | 2 --
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/environment.yml b/environment.yml
index 69cab5a9..f6d88421 100644
--- a/environment.yml
+++ b/environment.yml
@@ -1,3 +1,5 @@
+channels:
+- ioos
 dependencies:
 - protobuf=3.0.0b2.post2=py27_3
 - openssl=1.0.2h=1
@@ -10,5 +12,3 @@ dependencies:
 - tk=8.5.18=0
 - wheel=0.29.0=py27_0
 - zlib=1.2.8=3
-- pip:
-  - protobuf==3.0.0b2
diff --git a/readthedocs.yml b/readthedocs.yml
index 997c5553..aebb9e03 100644
--- a/readthedocs.yml
+++ b/readthedocs.yml
@@ -1,4 +1,2 @@
 conda:
   file: environment.yml
-  channels:
-    - ioos

From 199c1a3c85c5bc150bfc033b52a1fc2b3fe4a816 Mon Sep 17 00:00:00 2001
From: Tom Parker <palfrey@tevp.net>
Date: Wed, 15 Jun 2016 11:31:18 +0100
Subject: [PATCH 15/40] Test running protoc

---
 doc/source/conf.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/doc/source/conf.py b/doc/source/conf.py
index a393c1e3..7d0a3b13 100644
--- a/doc/source/conf.py
+++ b/doc/source/conf.py
@@ -14,6 +14,7 @@
 
 import sys
 import os
+import subprocess
 
 # If extensions (or modules to document with autodoc) are in another directory,
 # add these directories to sys.path here. If the directory is relative to the
@@ -36,6 +37,8 @@
     'avrodomain',
 ]
 
+subprocess.check_call("protoc")
+
 # Add any paths that contain templates here, relative to this directory.
 templates_path = ['_templates']
 

From bb8a7ee51432a87fd12e310c08ff25f9f4e1350b Mon Sep 17 00:00:00 2001
From: Tom Parker <palfrey@tevp.net>
Date: Wed, 15 Jun 2016 11:49:56 +0100
Subject: [PATCH 16/40] Build protobuf files before running the rest of Sphinx

---
 doc/source/conf.py | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/doc/source/conf.py b/doc/source/conf.py
index 7d0a3b13..72e76ef7 100644
--- a/doc/source/conf.py
+++ b/doc/source/conf.py
@@ -19,7 +19,8 @@
 # If extensions (or modules to document with autodoc) are in another directory,
 # add these directories to sys.path here. If the directory is relative to the
 # documentation root, use os.path.abspath to make it absolute, like shown here.
-sys.path.insert(0, os.path.abspath('../../tools/sphinx'))
+sphinx_path = '../../tools/sphinx'
+sys.path.insert(0, os.path.abspath(sphinx_path))
 
 # -- General configuration ------------------------------------------------
 
@@ -37,7 +38,18 @@
     'avrodomain',
 ]
 
-subprocess.check_call("protoc")
+base_dir = "../../src/main/proto"
+json_dir = os.path.join(base_dir, "json")
+schema_dir = os.path.join(base_dir, "ga4gh")
+for protofile in os.listdir(schema_dir):
+    fullpath = os.path.join(schema_dir, protofile)
+    json_file = protofile.replace(".proto", ".json")
+    cmd = "protoc --proto_path %s --plugin=protoc-gen-custom=%s --custom_out=%s %s" % (base_dir, os.path.join(sphinx_path, "protobuf-json-docs.py"), json_dir, fullpath)
+    print cmd
+    subprocess.check_call(cmd, shell=True)
+    cmd = "python %s %s/ga4gh/%s %s" %(os.path.join(sphinx_path, "avpr2rest.py"), json_dir, json_file, "schemas/%s.rst" % protofile)
+    print cmd
+    subprocess.check_call(cmd, shell=True)
 
 # Add any paths that contain templates here, relative to this directory.
 templates_path = ['_templates']

From 1ecb071443c51d2774f6090c93a9e30c0be92e1a Mon Sep 17 00:00:00 2001
From: Tom Parker <palfrey@tevp.net>
Date: Wed, 15 Jun 2016 11:52:57 +0100
Subject: [PATCH 17/40] Use requirements to pull in Protobuf python bits

---
 readthedocs.yml  | 1 +
 requirements.txt | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/readthedocs.yml b/readthedocs.yml
index aebb9e03..1a9d1cf9 100644
--- a/readthedocs.yml
+++ b/readthedocs.yml
@@ -1,2 +1,3 @@
 conda:
   file: environment.yml
+requirements_file: requirements.txt
diff --git a/requirements.txt b/requirements.txt
index 9eff163c..2effd325 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,5 @@
 PyYAML
-avro
+protobuf==3.0.0b3
 flake8
 humanize
 nose

From cf6aaeb166c32a55984f20a47a9653c8e8baba98 Mon Sep 17 00:00:00 2001
From: Tom Parker <palfrey@tevp.net>
Date: Wed, 15 Jun 2016 11:55:15 +0100
Subject: [PATCH 18/40] Add Protobuf to Conda deps

---
 environment.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/environment.yml b/environment.yml
index f6d88421..8bb8e716 100644
--- a/environment.yml
+++ b/environment.yml
@@ -12,3 +12,5 @@ dependencies:
 - tk=8.5.18=0
 - wheel=0.29.0=py27_0
 - zlib=1.2.8=3
+- pip:
+    - protobuf==3.0.0b3

From a0621a7441f86e902933e5b86ddd7e272339a087 Mon Sep 17 00:00:00 2001
From: Tom Parker <palfrey@tevp.net>
Date: Wed, 15 Jun 2016 11:59:46 +0100
Subject: [PATCH 19/40] Make json dir before using it

---
 doc/source/conf.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/doc/source/conf.py b/doc/source/conf.py
index 72e76ef7..bb3114c5 100644
--- a/doc/source/conf.py
+++ b/doc/source/conf.py
@@ -40,6 +40,8 @@
 
 base_dir = "../../src/main/proto"
 json_dir = os.path.join(base_dir, "json")
+if not os.path.exists(json_dir):
+    os.mkdir(json_dir)
 schema_dir = os.path.join(base_dir, "ga4gh")
 for protofile in os.listdir(schema_dir):
     fullpath = os.path.join(schema_dir, protofile)

From 54685e3772e500b6f9f263cab512c118cc047c10 Mon Sep 17 00:00:00 2001
From: Tom Parker <palfrey@tevp.net>
Date: Wed, 15 Jun 2016 12:05:47 +0100
Subject: [PATCH 20/40] Fix avpr2rest command line

---
 doc/source/conf.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/source/conf.py b/doc/source/conf.py
index bb3114c5..b5aad21a 100644
--- a/doc/source/conf.py
+++ b/doc/source/conf.py
@@ -45,11 +45,11 @@
 schema_dir = os.path.join(base_dir, "ga4gh")
 for protofile in os.listdir(schema_dir):
     fullpath = os.path.join(schema_dir, protofile)
-    json_file = protofile.replace(".proto", ".json")
+    json_file = protofile + ".json"
     cmd = "protoc --proto_path %s --plugin=protoc-gen-custom=%s --custom_out=%s %s" % (base_dir, os.path.join(sphinx_path, "protobuf-json-docs.py"), json_dir, fullpath)
     print cmd
     subprocess.check_call(cmd, shell=True)
-    cmd = "python %s %s/ga4gh/%s %s" %(os.path.join(sphinx_path, "avpr2rest.py"), json_dir, json_file, "schemas/%s.rst" % protofile)
+    cmd = "python %s %s/ga4gh/%s schemas/" %(os.path.join(sphinx_path, "avpr2rest.py"), json_dir, json_file)
     print cmd
     subprocess.check_call(cmd, shell=True)
 

From bb5e16f2000247fd206f7a2cb7941e676cb4d0ec Mon Sep 17 00:00:00 2001
From: Tom Parker <palfrey@tevp.net>
Date: Wed, 15 Jun 2016 12:20:17 +0100
Subject: [PATCH 21/40] Tidy up the JSON temp directory, and put
 environment.yml under docs

---
 doc/source/conf.py                            | 2 +-
 environment.yml => doc/source/environment.yml | 0
 readthedocs.yml                               | 3 +--
 3 files changed, 2 insertions(+), 3 deletions(-)
 rename environment.yml => doc/source/environment.yml (100%)

diff --git a/doc/source/conf.py b/doc/source/conf.py
index b5aad21a..54ab0d79 100644
--- a/doc/source/conf.py
+++ b/doc/source/conf.py
@@ -39,7 +39,7 @@
 ]
 
 base_dir = "../../src/main/proto"
-json_dir = os.path.join(base_dir, "json")
+json_dir = os.path.join("_build", "json-temp")
 if not os.path.exists(json_dir):
     os.mkdir(json_dir)
 schema_dir = os.path.join(base_dir, "ga4gh")
diff --git a/environment.yml b/doc/source/environment.yml
similarity index 100%
rename from environment.yml
rename to doc/source/environment.yml
diff --git a/readthedocs.yml b/readthedocs.yml
index 1a9d1cf9..80e30687 100644
--- a/readthedocs.yml
+++ b/readthedocs.yml
@@ -1,3 +1,2 @@
 conda:
-  file: environment.yml
-requirements_file: requirements.txt
+  file: doc/source/environment.yml

From f6525553d689b441fd2a942d802ae9a955c1c2d0 Mon Sep 17 00:00:00 2001
From: Tom Parker <palfrey@tevp.net>
Date: Wed, 15 Jun 2016 14:11:38 +0100
Subject: [PATCH 22/40] Use makedirs not mkdir to make the whole temporary json
 path

---
 doc/source/conf.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/conf.py b/doc/source/conf.py
index 54ab0d79..29c341e1 100644
--- a/doc/source/conf.py
+++ b/doc/source/conf.py
@@ -41,7 +41,7 @@
 base_dir = "../../src/main/proto"
 json_dir = os.path.join("_build", "json-temp")
 if not os.path.exists(json_dir):
-    os.mkdir(json_dir)
+    os.makedirs(json_dir)
 schema_dir = os.path.join(base_dir, "ga4gh")
 for protofile in os.listdir(schema_dir):
     fullpath = os.path.join(schema_dir, protofile)

From 60f33eb01cabe7efdd2888c968987b70d2107aee Mon Sep 17 00:00:00 2001
From: Irene Papakonstantinou <irene@lshift.net>
Date: Wed, 15 Jun 2016 15:20:00 +0100
Subject: [PATCH 23/40] Handle multiline comments

---
 tools/sphinx/protobuf-json-docs.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tools/sphinx/protobuf-json-docs.py b/tools/sphinx/protobuf-json-docs.py
index a6792505..b35f17fb 100755
--- a/tools/sphinx/protobuf-json-docs.py
+++ b/tools/sphinx/protobuf-json-docs.py
@@ -35,7 +35,8 @@ def __init__(self, prot):
 def traverse(proto_file):
 
     def _collapse_comments(comments):
-        return (comments["leading_comments"] + comments["trailing_comments"]).strip()
+        return '\n'.join(
+            [c.strip() for c in (comments["leading_comments"] + comments["trailing_comments"]).split('\n')])
 
     def _traverse(package, items, tree):
         for item_index, item in enumerate(items):
@@ -170,8 +171,7 @@ def generate_code(request, response):
             else:
                 raise Exception, item.kind
 
-
-        comments = "".join(results["file"]).strip()
+        comments = "\n".join(results["file"])
         output = {
             "types": types,
             "messages": messages,

From 72c9a7c5313e03f649653838cad2a82083682382 Mon Sep 17 00:00:00 2001
From: Irene Papakonstantinou <irene@lshift.net>
Date: Wed, 15 Jun 2016 15:43:33 +0100
Subject: [PATCH 24/40] Remove no longer used doc field

---
 .gitignore                         | 1 +
 tools/sphinx/avpr2rest.py          | 6 ++----
 tools/sphinx/protobuf-json-docs.py | 1 -
 3 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/.gitignore b/.gitignore
index 979ea845..cc03585d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,6 +3,7 @@ target
 *~
 #*
 doc/source/schemas/*.proto.rst
+doc/source/schemas/build.rst
 build
 
 #********** windows template**********
diff --git a/tools/sphinx/avpr2rest.py b/tools/sphinx/avpr2rest.py
index 4cb86e0d..23bb7287 100644
--- a/tools/sphinx/avpr2rest.py
+++ b/tools/sphinx/avpr2rest.py
@@ -64,8 +64,7 @@ def cleanup_doc(doc,indent=0):
       output += " .. function:: %s(%s)\n\n" % (message_name,
                                                ', '.join(param_names))
       for param in request:
-        output += "  :param %s: %s: %s\n" % (param['name'], param['type'],
-                                             param['doc'])
+        output += "  :param %s: %s\n" % (param['name'], param['type'])
       output += "  :return type: %s\n" % response
       output += "  :throws: %s\n\n" % ', '.join(errors)
       output += cleanup_doc(doc)
@@ -150,8 +149,7 @@ def typename(typeobject):
       output += " .. function:: %s(%s)\n\n" % (message_name,
                                                ', '.join(param_names))
       for param in request:
-        output += "  :param %s: %s: %s\n" % (param['name'], param['type'],
-                                             param['doc'])
+        output += "  :param %s: %s\n" % (param['name'], param['type'])
       output += "  :return type: %s\n" % response
       output += "  :throws: %s\n\n" % ', '.join(errors)
       output += cleanup_doc(doc)
diff --git a/tools/sphinx/protobuf-json-docs.py b/tools/sphinx/protobuf-json-docs.py
index b35f17fb..6251914d 100755
--- a/tools/sphinx/protobuf-json-docs.py
+++ b/tools/sphinx/protobuf-json-docs.py
@@ -163,7 +163,6 @@ def generate_code(request, response):
                         "request": [{
                             "name": "request",
                             "type": m.input_type[1:],
-                            "doc": ''
                         }],
                         "response": m.output_type[1:],
                         "errors" : [ "GAException" ]

From b25826742d977f475c9aef7d76e86ea7c04d1172 Mon Sep 17 00:00:00 2001
From: Irene Papakonstantinou <irene@lshift.net>
Date: Wed, 15 Jun 2016 16:02:45 +0100
Subject: [PATCH 25/40] Fix rst errors

---
 doc/source/api/reads.rst | 36 ++++++++++++++++++------------------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/doc/source/api/reads.rst b/doc/source/api/reads.rst
index ccb52fc7..fe05f4f9 100644
--- a/doc/source/api/reads.rst
+++ b/doc/source/api/reads.rst
@@ -20,35 +20,35 @@ The model has the following data types:
 ============================== ============================================ ==================
 Record                         | Description                                SAM/BAM rough equivalent
 ============================== ============================================ ==================
-:avro:record:`ReadAlignment`   | One alignment for one read                 A single line in a file
-:avro:record:`ReadGroup`       | A group of read alignments                 A single RG tag
-:avro:record:`ReadGroupSet`    | Collecton of ReadGroups that map to the    Single SAM/BAM file
+:avro:message:`ReadAlignment`   | One alignment for one read                 A single line in a file
+:avro:message:`ReadGroup`       | A group of read alignments                 A single RG tag
+:avro:message:`ReadGroupSet`    | Collecton of ReadGroups that map to the    Single SAM/BAM file
                                | same genome
-:avro:record:`Program`         | Software version and parameters that were  PN, CL tags in SAM header
+:avro:message:`Program`         | Software version and parameters that were  PN, CL tags in SAM header
                                | used to align reads to the genome
-:avro:record:`ReadStats`       | Counts of aligned and unaligned reads      Samtools flagstats on a file
+:avro:message:`ReadStats`       | Counts of aligned and unaligned reads      Samtools flagstats on a file
                                | for a ReadGroup or ReadGroupSet
 ============================== ============================================ ==================
 
 The relationships are mostly one to many (e.g. each
-:avro:record:`ReadAlignment` is part of exactly one
-:avro:record:`ReadGroup`), with the exception that a
-:avro:record:`ReadGroup` is allowed to be part of more than one
-:avro:record:`ReadGroupSet`.
+:avro:message:`ReadAlignment` is part of exactly one
+:avro:message:`ReadGroup`), with the exception that a
+:avro:message:`ReadGroup` is allowed to be part of more than one
+:avro:message:`ReadGroupSet`.
 
-:avro:record:`Dataset` --< :avro:record:`ReadGroupSet` >--< :avro:record:`ReadGroup` --< :avro:record:`ReadAlignment`
+:avro:message:`Dataset` --< :avro:message:`ReadGroupSet` >--< :avro:message:`ReadGroup` --< :avro:message:`ReadAlignment`
 
-* A :avro:record:`Dataset` is a general-purpose container, defined in
+* A :avro:message:`Dataset` is a general-purpose container, defined in
   metadata.avdl.
-* A :avro:record:`ReadGroupSet` is a logical collection of ReadGroups,
+* A :avro:message:`ReadGroupSet` is a logical collection of ReadGroups,
   as determined by the data owner.  Typically one
-  :avro:record:`ReadGroupSet` represents all the Reads from one
+  :avro:message:`ReadGroupSet` represents all the Reads from one
   experimental sample, which traditionally would be stored in a single
   BAM file.
-* A :avro:record:`ReadGroup` is all the data that's processed the same
+* A :avro:message:`ReadGroup` is all the data that's processed the same
   way by the sequencer.  There are typically 1-10 ReadGroups in a
-  :avro:record:`ReadGroupSet`.
-* A :avro:record:`ReadAlignment` object is a flattened representation
+  :avro:message:`ReadGroupSet`.
+* A :avro:message:`ReadAlignment` object is a flattened representation
   of several layers of bioinformatics hierarchy, including fragments,
   reads, and alignments, stored in one object for easy access.
 
@@ -56,9 +56,9 @@ The relationships are mostly one to many (e.g. each
 ReadAlignment: detailed discussion
 @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
 
-One :avro:record:`ReadAlignment` object represents the following
+One :avro:message:`ReadAlignment` object represents the following
 logical hierarchy. See the field definitions in the
-:avro:record:`ReadAlignment` object for more details.
+:avro:message:`ReadAlignment` object for more details.
 
 .. image:: /_static/read_alignment_diagrams.png
 

From d48bec991e19298d53d73068a44027293fa84bf9 Mon Sep 17 00:00:00 2001
From: Irene Papakonstantinou <irene@lshift.net>
Date: Wed, 15 Jun 2016 16:30:46 +0100
Subject: [PATCH 26/40] Add doc/source/_build to .gitignore

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index cc03585d..b04b71f2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,6 +4,7 @@ target
 #*
 doc/source/schemas/*.proto.rst
 doc/source/schemas/build.rst
+doc/source/_build/
 build
 
 #********** windows template**********

From 3e523808c54f8c3e4dbde503d89088f45a491374 Mon Sep 17 00:00:00 2001
From: Tom Parker <palfrey@tevp.net>
Date: Wed, 15 Jun 2016 16:55:56 +0100
Subject: [PATCH 27/40] Link enum/message references

---
 tools/sphinx/protobuf-json-docs.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tools/sphinx/protobuf-json-docs.py b/tools/sphinx/protobuf-json-docs.py
index 6251914d..f6dc9fac 100755
--- a/tools/sphinx/protobuf-json-docs.py
+++ b/tools/sphinx/protobuf-json-docs.py
@@ -21,6 +21,8 @@ def __init__(self, prot):
             elif isinstance(prot, EnumValueDescriptorProto):
                 self.number = prot.number
             elif isinstance(prot, FieldDescriptorProto):
+                if prot.type in [11, 14]:
+                    self.ref_type = prot.type_name.replace(".ga4gh.", "")
                 self.type = prot.type
             elif isinstance(prot, ServiceDescriptorProto):
                 self.method = [convert_protodef_to_editable(x) for x in prot.method]
@@ -134,12 +136,10 @@ def generate_code(request, response):
                         kind = "boolean"
                     elif f.type in [9]:
                         kind = "string"
-                    elif f.type in [11]:
-                        kind = "message"
+                    elif f.type in [11, 14]:
+                        kind = ":avro:message:`%s`" % f.ref_type
                     elif f.type in [12]:
                         kind = "bytes"
-                    elif f.type in [14]:
-                        kind = "enum"
                     else:
                         raise Exception, f.type
                     data["fields"].append({

From 051ffb5523b03c4b12b5eae2fd1c419fdddb9c33 Mon Sep 17 00:00:00 2001
From: Tom Parker <palfrey@tevp.net>
Date: Wed, 15 Jun 2016 16:58:46 +0100
Subject: [PATCH 28/40] Also link service request/response types

---
 tools/sphinx/protobuf-json-docs.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tools/sphinx/protobuf-json-docs.py b/tools/sphinx/protobuf-json-docs.py
index f6dc9fac..27a6b317 100755
--- a/tools/sphinx/protobuf-json-docs.py
+++ b/tools/sphinx/protobuf-json-docs.py
@@ -162,10 +162,10 @@ def generate_code(request, response):
                         "doc": m.comment,
                         "request": [{
                             "name": "request",
-                            "type": m.input_type[1:],
+                            "type": ":avro:message:`%s`" % m.input_type.replace(".ga4gh.", ""),
                         }],
-                        "response": m.output_type[1:],
-                        "errors" : [ "GAException" ]
+                        "response": ":avro:message:`%s`" % m.output_type.replace(".ga4gh.", ""),
+                        "errors" : [ ":avro:message:`GAException`" ]
                     }
             else:
                 raise Exception, item.kind

From 03dc144b24ec2689f038aced2eecf77ee2c581c7 Mon Sep 17 00:00:00 2001
From: Tom Parker <palfrey@tevp.net>
Date: Wed, 15 Jun 2016 17:23:47 +0100
Subject: [PATCH 29/40] Correct nested item support

---
 tools/sphinx/protobuf-json-docs.py | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/tools/sphinx/protobuf-json-docs.py b/tools/sphinx/protobuf-json-docs.py
index 27a6b317..7a7a9fdb 100755
--- a/tools/sphinx/protobuf-json-docs.py
+++ b/tools/sphinx/protobuf-json-docs.py
@@ -18,6 +18,8 @@ def __init__(self, prot):
                 self.value = [convert_protodef_to_editable(x) for x in prot.value]
             elif isinstance(prot, DescriptorProto):
                 self.field = [convert_protodef_to_editable(x) for x in prot.field]
+                self.enum_type = [convert_protodef_to_editable(x) for x in prot.enum_type]
+                self.nested_type = prot.nested_type
             elif isinstance(prot, EnumValueDescriptorProto):
                 self.number = prot.number
             elif isinstance(prot, FieldDescriptorProto):
@@ -32,6 +34,7 @@ def __init__(self, prot):
             else:
                 raise Exception, type(prot)
 
+
     return Editable(proto)
 
 def traverse(proto_file):
@@ -73,15 +76,15 @@ def _traverse(package, items, tree):
 
             yield item, package
 
-            if isinstance(item, DescriptorProto):
+            if item.kind is DescriptorProto:
                 for enum in item.enum_type:
                     yield enum, package
 
                 for nested in item.nested_type:
-                    nested_package = package + item.name
+                    nested_package = package + "." + item.name
 
-                    for nested_item in _traverse(nested, nested_package):
-                        yield nested_item, nested_package
+                    for nested_item, np in _traverse(nested_package, [nested], tree[item_index]):
+                        yield nested_item, np
 
     tree = collections.defaultdict(collections.defaultdict)
     for loc in proto_file.source_code_info.location:
@@ -114,7 +117,7 @@ def generate_code(request, response):
         results = traverse(proto_file)
         for item, package in results["types"]:
             data = {
-                'name': item.name,
+                'name': (package + "." + item.name).replace("ga4gh.", ""),
                 'doc': item.comment
             }
 

From dbfadb3920a4e2e3edec42288beeafd271a83fb6 Mon Sep 17 00:00:00 2001
From: Tom Parker <palfrey@tevp.net>
Date: Wed, 15 Jun 2016 17:45:27 +0100
Subject: [PATCH 30/40] Add "list of" to repeated fields

---
 tools/sphinx/protobuf-json-docs.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tools/sphinx/protobuf-json-docs.py b/tools/sphinx/protobuf-json-docs.py
index 7a7a9fdb..402055a5 100755
--- a/tools/sphinx/protobuf-json-docs.py
+++ b/tools/sphinx/protobuf-json-docs.py
@@ -26,6 +26,7 @@ def __init__(self, prot):
                 if prot.type in [11, 14]:
                     self.ref_type = prot.type_name.replace(".ga4gh.", "")
                 self.type = prot.type
+                self.label = prot.label
             elif isinstance(prot, ServiceDescriptorProto):
                 self.method = [convert_protodef_to_editable(x) for x in prot.method]
             elif isinstance(prot, MethodDescriptorProto):
@@ -141,6 +142,8 @@ def generate_code(request, response):
                         kind = "string"
                     elif f.type in [11, 14]:
                         kind = ":avro:message:`%s`" % f.ref_type
+                        if f.label == 3: # LABEL_REPEATED
+                            kind = "list of " + kind
                     elif f.type in [12]:
                         kind = "bytes"
                     else:

From a56dd953c0fd6105fda9284ca3fe9ca4635d8038 Mon Sep 17 00:00:00 2001
From: Tom Parker <palfrey@tevp.net>
Date: Wed, 15 Jun 2016 17:46:42 +0100
Subject: [PATCH 31/40] Rename avpr2rest -> protodoc2rst

---
 doc/source/conf.py                             | 2 +-
 doc/source/schemas/Makefile                    | 2 +-
 tools/sphinx/{avpr2rest.py => protodoc2rst.py} | 0
 3 files changed, 2 insertions(+), 2 deletions(-)
 rename tools/sphinx/{avpr2rest.py => protodoc2rst.py} (100%)

diff --git a/doc/source/conf.py b/doc/source/conf.py
index 29c341e1..91b3ae4b 100644
--- a/doc/source/conf.py
+++ b/doc/source/conf.py
@@ -49,7 +49,7 @@
     cmd = "protoc --proto_path %s --plugin=protoc-gen-custom=%s --custom_out=%s %s" % (base_dir, os.path.join(sphinx_path, "protobuf-json-docs.py"), json_dir, fullpath)
     print cmd
     subprocess.check_call(cmd, shell=True)
-    cmd = "python %s %s/ga4gh/%s schemas/" %(os.path.join(sphinx_path, "avpr2rest.py"), json_dir, json_file)
+    cmd = "python %s %s/ga4gh/%s schemas/" %(os.path.join(sphinx_path, "protodoc2rst.py"), json_dir, json_file)
     print cmd
     subprocess.check_call(cmd, shell=True)
 
diff --git a/doc/source/schemas/Makefile b/doc/source/schemas/Makefile
index 1e06308a..292ecb69 100644
--- a/doc/source/schemas/Makefile
+++ b/doc/source/schemas/Makefile
@@ -17,7 +17,7 @@ JSON_DIR:=/tmp/ga4gh-${UID}/json
 PROTO_BASE_DIR:=../../../src/main/proto
 PROTO_DIR:=${PROTO_BASE_DIR}/ga4gh
 
-AVPR2REST_PATH:=../../../tools/sphinx/avpr2rest.py
+AVPR2REST_PATH:=../../../tools/sphinx/protodoc2rst.py
 PROTOC_PLUGIN_PATH:=../../../tools/sphinx/protobuf-json-docs.py
 
 PROTO_BASENAMES:=$(subst ${PROTO_DIR}/,,$(wildcard ${PROTO_DIR}/*.proto))
diff --git a/tools/sphinx/avpr2rest.py b/tools/sphinx/protodoc2rst.py
similarity index 100%
rename from tools/sphinx/avpr2rest.py
rename to tools/sphinx/protodoc2rst.py

From 5f9c4d820d9ba2ca8134b37d3b62055863216bf6 Mon Sep 17 00:00:00 2001
From: Tom Parker <palfrey@tevp.net>
Date: Thu, 16 Jun 2016 10:20:05 +0100
Subject: [PATCH 32/40] Fix one-of support

---
 tools/sphinx/protobuf-json-docs.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/tools/sphinx/protobuf-json-docs.py b/tools/sphinx/protobuf-json-docs.py
index 402055a5..239481a8 100755
--- a/tools/sphinx/protobuf-json-docs.py
+++ b/tools/sphinx/protobuf-json-docs.py
@@ -20,6 +20,7 @@ def __init__(self, prot):
                 self.field = [convert_protodef_to_editable(x) for x in prot.field]
                 self.enum_type = [convert_protodef_to_editable(x) for x in prot.enum_type]
                 self.nested_type = prot.nested_type
+                self.oneof_decl = prot.oneof_decl
             elif isinstance(prot, EnumValueDescriptorProto):
                 self.number = prot.number
             elif isinstance(prot, FieldDescriptorProto):
@@ -51,7 +52,6 @@ def _traverse(package, items, tree):
                 comments = tree[item_index]
                 if "leading_comments" in comments or "trailing_comments" in comments:
                     item.comment = _collapse_comments(comments)
-                    #raise Exception, item.__dict__
                     del comments["leading_comments"]
                     del comments["trailing_comments"]
                 if item.kind is EnumDescriptorProto:
@@ -153,6 +153,13 @@ def generate_code(request, response):
                         'type': kind,
                         'doc': f.comment
                         })
+                if len(item.oneof_decl) > 0:
+                    data["fields"] = [
+                        {
+                            "name": item.oneof_decl[0].name,
+                            "type": [" %s "% x["type"] for x in data["fields"]],
+                            "doc": ", ".join([x["doc"] for x in data["fields"] if x["doc"] != ""])
+                        }]
                 types.append(data)
             elif item.kind == EnumDescriptorProto:
                 comments = ["\n* `%s`: %s"%(v.name, v.comment) for v in item.value]

From 734717b8d79ec2596aea4975e041c1b98a842c50 Mon Sep 17 00:00:00 2001
From: Tom Parker <palfrey@tevp.net>
Date: Thu, 16 Jun 2016 11:56:12 +0100
Subject: [PATCH 33/40] Add proper map type support

---
 tools/sphinx/protobuf-json-docs.py | 71 ++++++++++++++++++++----------
 tools/sphinx/protodoc2rst.py       |  4 +-
 2 files changed, 51 insertions(+), 24 deletions(-)

diff --git a/tools/sphinx/protobuf-json-docs.py b/tools/sphinx/protobuf-json-docs.py
index 239481a8..b7e9134e 100755
--- a/tools/sphinx/protobuf-json-docs.py
+++ b/tools/sphinx/protobuf-json-docs.py
@@ -14,6 +14,7 @@ def __init__(self, prot):
             self.kind = type(prot)
             self.name = prot.name
             self.comment = ""
+            self.options = dict([(key.name, value) for (key, value) in prot.options.ListFields()])
             if isinstance(prot, EnumDescriptorProto):
                 self.value = [convert_protodef_to_editable(x) for x in prot.value]
             elif isinstance(prot, DescriptorProto):
@@ -102,23 +103,66 @@ def _traverse(package, items, tree):
         raise Exception, sorted(tree.keys())
 
     return {"types":
-        itertools.chain(
+        list(itertools.chain(
             _traverse(proto_file.package, proto_file.service, tree[6]), # 5 is enum_type in FileDescriptorProto
             _traverse(proto_file.package, proto_file.enum_type, tree[5]), # 5 is enum_type in FileDescriptorProto
             _traverse(proto_file.package, proto_file.message_type, tree[4]), # 4 is message_type in FileDescriptorProto
-        ),
+        )),
         "file": ["".join(x.leading_detached_comments) for x in proto_file.source_code_info.location if len(x.leading_detached_comments) > 0]
     }
 
+def type_to_string(f, package, map_types):
+    if f.type in [1]:
+        return "double"
+    elif f.type in [2]:
+        return "float"
+    elif f.type in [3]:
+        return "long"
+    elif f.type in [5]:
+        return "integer"
+    elif f.type in [8]:
+        return "boolean"
+    elif f.type in [9]:
+        return "string"
+    elif f.type in [11, 14]:
+        ref_name = (package + "." + f.ref_type)
+        if ref_name in map_types:
+            ref_fields = map_types[ref_name]
+            return {
+                "type": "map",
+                "key": " %s "% type_to_string(ref_fields["key"], package, map_types),
+                "value": " %s "% type_to_string(ref_fields["value"], package, map_types)
+                }
+        else:
+            kind = ":avro:message:`%s`" % f.ref_type
+            if f.label == 3: # LABEL_REPEATED
+                return "list of " + kind
+            else:
+                return kind
+    elif f.type in [12]:
+        return "bytes"
+    else:
+        raise Exception, f.type
+
 def generate_code(request, response):
     for proto_file in request.proto_file:
         types = []
         messages = {}
 
         results = traverse(proto_file)
+        map_types = {}
+        def full_name(package, item):
+            return "%s.%s" % (package, item.name)
+        for item, package in results["types"]:
+            if item.options.has_key("map_entry"):
+                map_types[full_name(package, item)] = dict([(x.name,x) for x in item.field])
         for item, package in results["types"]:
+            name = full_name(package, item)
+            if name in map_types:
+                continue
+                pass
             data = {
-                'name': (package + "." + item.name).replace("ga4gh.", ""),
+                'name': name.replace("ga4gh.", ""),
                 'doc': item.comment
             }
 
@@ -128,26 +172,7 @@ def generate_code(request, response):
                     'fields': []
                 })
                 for f in item.field: # types from FieldDescriptorProto
-                    if f.type in [1]:
-                        kind = "double"
-                    elif f.type in [2]:
-                        kind = "float"
-                    elif f.type in [3]:
-                        kind = "long"
-                    elif f.type in [5]:
-                        kind = "integer"
-                    elif f.type in [8]:
-                        kind = "boolean"
-                    elif f.type in [9]:
-                        kind = "string"
-                    elif f.type in [11, 14]:
-                        kind = ":avro:message:`%s`" % f.ref_type
-                        if f.label == 3: # LABEL_REPEATED
-                            kind = "list of " + kind
-                    elif f.type in [12]:
-                        kind = "bytes"
-                    else:
-                        raise Exception, f.type
+                    kind = type_to_string(f, package, map_types)
                     data["fields"].append({
                         'name': f.name,
                         'type': kind,
diff --git a/tools/sphinx/protodoc2rst.py b/tools/sphinx/protodoc2rst.py
index 23bb7287..bb7f0c9b 100644
--- a/tools/sphinx/protodoc2rst.py
+++ b/tools/sphinx/protodoc2rst.py
@@ -21,7 +21,9 @@ def typename(typeobject):
     if typeobject['type'] == 'array':
       return 'array<%s>' % typename(typeobject['items'])
     elif typeobject['type'] == 'map':
-      return 'map<%s>' % typename(typeobject['values'])
+      return 'map<%s, %s>' % (typename(typeobject['key']), typename(typeobject['value']))
+    else:
+      raise Exception, "Unsupported type object: %s" %(typeobject['type'])
 
   elif isinstance(typeobject, basestring):
     return typeobject

From e64628276de7cf4d08e82f2e9e04ac46ce755aee Mon Sep 17 00:00:00 2001
From: Tom Parker <palfrey@tevp.net>
Date: Thu, 16 Jun 2016 11:56:31 +0100
Subject: [PATCH 34/40] Remove duplicate code in protodoc2rst

---
 tools/sphinx/protodoc2rst.py | 91 ++----------------------------------
 1 file changed, 3 insertions(+), 88 deletions(-)

diff --git a/tools/sphinx/protodoc2rst.py b/tools/sphinx/protodoc2rst.py
index bb7f0c9b..7cd0a919 100644
--- a/tools/sphinx/protodoc2rst.py
+++ b/tools/sphinx/protodoc2rst.py
@@ -5,96 +5,9 @@
 import re
 import argparse
 
-def get_file_locations():
-  parser = argparse.ArgumentParser()
-  parser.add_argument('input', help='Input AVPR filename(s)', nargs='+')
-  parser.add_argument('output', help='Output directory')
-  args = parser.parse_args()
-  return (args.input, args.output)
-
-def typename(typeobject):
-  if isinstance(typeobject, list):
-    union_names = [typename(item) for item in typeobject]
-    return '|'.join(union_names)
-
-  elif isinstance(typeobject, dict):
-    if typeobject['type'] == 'array':
-      return 'array<%s>' % typename(typeobject['items'])
-    elif typeobject['type'] == 'map':
-      return 'map<%s, %s>' % (typename(typeobject['key']), typename(typeobject['value']))
-    else:
-      raise Exception, "Unsupported type object: %s" %(typeobject['type'])
-
-  elif isinstance(typeobject, basestring):
-    return typeobject
-
-  raise ValueError
-
 def cleanup_doc(doc,indent=0):
   return '\n'.join([' '*indent + line for line in doc.split('\n')])
 
-if __name__ == '__main__':
-
-  avpr_filenames, rest_directory = get_file_locations()
-
-  for avpr_filename in avpr_filenames:
-    base_filename = os.path.basename(avpr_filename)
-    name = os.path.splitext(base_filename)[0]
-
-    rest_filename = os.path.join(rest_directory, name+'.rst')
-
-    with open(avpr_filename,'r') as f:
-      data = json.load(f)
-
-    output = data['protocol'] + '\n'
-    output += '*' * len(data['protocol']) + '\n\n'
-
-    if 'doc' in data:
-      output += cleanup_doc(data['doc']) + '\n\n'
-
-    for message_name in data['messages']:
-      message_def = data['messages'][message_name]
-      doc = message_def['doc']
-      # process formal parameters ('request')
-      request = message_def['request']
-      # collect the names
-      param_names = []
-      for param in request:
-        param_names.append(param['name'])
-      response = message_def['response']
-      errors = message_def['errors']
-      output += " .. function:: %s(%s)\n\n" % (message_name,
-                                               ', '.join(param_names))
-      for param in request:
-        output += "  :param %s: %s\n" % (param['name'], param['type'])
-      output += "  :return type: %s\n" % response
-      output += "  :throws: %s\n\n" % ', '.join(errors)
-      output += cleanup_doc(doc)
-      output += "\n\n"
-
-    for item in data['types']:
-      output += '.. avro:%s:: %s\n\n' % (item['type'], item['name'])
-
-      if item['type'] == 'message':
-        for field in item['fields']:
-          output += '  :field %s:\n' % field['name']
-          if 'doc' in field:
-            output += cleanup_doc(field['doc'],indent=4) + '\n'
-          output += '  :type %s: %s\n' % (field['name'], typename(field['type']))
-        output += '\n'
-
-      if item['type'] == 'enum':
-        output += '  :symbols: %s\n' % '|'.join(item['symbols'])
-
-      if item['type'] == 'fixed':
-        output += '  :size: %s\n' % item['size']
-
-      if 'doc' in item:
-        output += cleanup_doc(item['doc'],indent=2) + '\n\n'
-
-    with open(rest_filename,'w') as f:
-      f.write(output)
-
 def get_file_locations():
   parser = argparse.ArgumentParser()
   parser.add_argument('input', help='Input AVPR filename(s)', nargs='+')
@@ -111,7 +24,9 @@ def typename(typeobject):
     if typeobject['type'] == 'array':
       return 'array<%s>' % typename(typeobject['items'])
     elif typeobject['type'] == 'map':
-      return 'map<%s>' % typename(typeobject['values'])
+      return 'map<%s, %s>' % (typename(typeobject['key']), typename(typeobject['value']))
+    else:
+      raise Exception, "Unsupported type object: %s" %(typeobject['type'])
 
   elif isinstance(typeobject, basestring):
     return typeobject

From df7b4a7a3b67fdc4f229ac37e598f48dd5667f6b Mon Sep 17 00:00:00 2001
From: Tom Parker <palfrey@tevp.net>
Date: Thu, 16 Jun 2016 14:04:35 +0100
Subject: [PATCH 35/40] Fix refs to Protobuf internal items

---
 doc/source/conf.py                 | 24 ++++++++++++++----------
 doc/source/schemas/index.rst       |  1 +
 tools/sphinx/protobuf-json-docs.py | 29 ++++++++++++++++-------------
 3 files changed, 31 insertions(+), 23 deletions(-)

diff --git a/doc/source/conf.py b/doc/source/conf.py
index 91b3ae4b..a3edc911 100644
--- a/doc/source/conf.py
+++ b/doc/source/conf.py
@@ -42,16 +42,20 @@
 json_dir = os.path.join("_build", "json-temp")
 if not os.path.exists(json_dir):
     os.makedirs(json_dir)
-schema_dir = os.path.join(base_dir, "ga4gh")
-for protofile in os.listdir(schema_dir):
-    fullpath = os.path.join(schema_dir, protofile)
-    json_file = protofile + ".json"
-    cmd = "protoc --proto_path %s --plugin=protoc-gen-custom=%s --custom_out=%s %s" % (base_dir, os.path.join(sphinx_path, "protobuf-json-docs.py"), json_dir, fullpath)
-    print cmd
-    subprocess.check_call(cmd, shell=True)
-    cmd = "python %s %s/ga4gh/%s schemas/" %(os.path.join(sphinx_path, "protodoc2rst.py"), json_dir, json_file)
-    print cmd
-    subprocess.check_call(cmd, shell=True)
+schema_dir = base_dir
+for root, dirs, files in os.walk(schema_dir):
+    for f in files:
+        fullpath = os.path.join(root, f)
+        json_file = f + ".json"
+        cmd = "protoc --proto_path %s --plugin=protoc-gen-custom=%s --custom_out=%s %s" % (base_dir, os.path.join(sphinx_path, "protobuf-json-docs.py"), json_dir, fullpath)
+        print cmd
+        subprocess.check_call(cmd, shell=True)
+
+for root, dirs, files in os.walk(json_dir):
+    for f in files:
+        cmd = "python %s %s/%s schemas/" %(os.path.join(sphinx_path, "protodoc2rst.py"), root, f)
+        print cmd
+        subprocess.check_call(cmd, shell=True)
 
 # Add any paths that contain templates here, relative to this directory.
 templates_path = ['_templates']
diff --git a/doc/source/schemas/index.rst b/doc/source/schemas/index.rst
index f1640056..d9d47360 100644
--- a/doc/source/schemas/index.rst
+++ b/doc/source/schemas/index.rst
@@ -17,3 +17,4 @@ Schemas
    allele_annotation_service.proto.rst
    sequence_annotations.proto.rst
    sequence_annotation_service.proto.rst
+   struct.proto.rst
diff --git a/tools/sphinx/protobuf-json-docs.py b/tools/sphinx/protobuf-json-docs.py
index b7e9134e..c0ed7386 100755
--- a/tools/sphinx/protobuf-json-docs.py
+++ b/tools/sphinx/protobuf-json-docs.py
@@ -8,6 +8,9 @@
 import json
 from google.protobuf.descriptor_pb2 import DescriptorProto, EnumDescriptorProto, EnumValueDescriptorProto, FieldDescriptorProto, ServiceDescriptorProto, MethodDescriptorProto
 
+def simplify_name(name):
+    return name.split(".")[-1]
+
 def convert_protodef_to_editable(proto):
     class Editable(object):
         def __init__(self, prot):
@@ -26,7 +29,7 @@ def __init__(self, prot):
                 self.number = prot.number
             elif isinstance(prot, FieldDescriptorProto):
                 if prot.type in [11, 14]:
-                    self.ref_type = prot.type_name.replace(".ga4gh.", "")
+                    self.ref_type = prot.type_name[1:]
                 self.type = prot.type
                 self.label = prot.label
             elif isinstance(prot, ServiceDescriptorProto):
@@ -37,7 +40,6 @@ def __init__(self, prot):
             else:
                 raise Exception, type(prot)
 
-
     return Editable(proto)
 
 def traverse(proto_file):
@@ -111,7 +113,7 @@ def _traverse(package, items, tree):
         "file": ["".join(x.leading_detached_comments) for x in proto_file.source_code_info.location if len(x.leading_detached_comments) > 0]
     }
 
-def type_to_string(f, package, map_types):
+def type_to_string(f, map_types):
     if f.type in [1]:
         return "double"
     elif f.type in [2]:
@@ -125,16 +127,18 @@ def type_to_string(f, package, map_types):
     elif f.type in [9]:
         return "string"
     elif f.type in [11, 14]:
-        ref_name = (package + "." + f.ref_type)
+        ref_name = f.ref_type
         if ref_name in map_types:
             ref_fields = map_types[ref_name]
             return {
                 "type": "map",
-                "key": " %s "% type_to_string(ref_fields["key"], package, map_types),
-                "value": " %s "% type_to_string(ref_fields["value"], package, map_types)
-                }
+                "key": " %s "% type_to_string(ref_fields["key"], map_types),
+                "value": " %s "% type_to_string(ref_fields["value"], map_types)
+            }
+        elif ref_name.find("InfoEntry") != -1:
+            raise Exception, (f.__dict__, ref_name)
         else:
-            kind = ":avro:message:`%s`" % f.ref_type
+            kind = ":avro:message:`%s`" % simplify_name(f.ref_type)
             if f.label == 3: # LABEL_REPEATED
                 return "list of " + kind
             else:
@@ -160,9 +164,8 @@ def full_name(package, item):
             name = full_name(package, item)
             if name in map_types:
                 continue
-                pass
             data = {
-                'name': name.replace("ga4gh.", ""),
+                'name': simplify_name(name),
                 'doc': item.comment
             }
 
@@ -172,7 +175,7 @@ def full_name(package, item):
                     'fields': []
                 })
                 for f in item.field: # types from FieldDescriptorProto
-                    kind = type_to_string(f, package, map_types)
+                    kind = type_to_string(f, map_types)
                     data["fields"].append({
                         'name': f.name,
                         'type': kind,
@@ -200,9 +203,9 @@ def full_name(package, item):
                         "doc": m.comment,
                         "request": [{
                             "name": "request",
-                            "type": ":avro:message:`%s`" % m.input_type.replace(".ga4gh.", ""),
+                            "type": ":avro:message:`%s`" % simplify_name(m.input_type),
                         }],
-                        "response": ":avro:message:`%s`" % m.output_type.replace(".ga4gh.", ""),
+                        "response": ":avro:message:`%s`" % simplify_name(m.output_type),
                         "errors" : [ ":avro:message:`GAException`" ]
                     }
             else:

From f2f33afe66946b3a7611f33995dc7a11da7c85f7 Mon Sep 17 00:00:00 2001
From: Tom Parker <palfrey@tevp.net>
Date: Thu, 16 Jun 2016 14:42:56 +0100
Subject: [PATCH 36/40] Remove remaining internal refs to Avro

---
 doc/source/api/metadata.rst                   |  2 +-
 doc/source/api/reads.rst                      | 51 +++++-----
 doc/source/api/variants.rst                   | 14 +--
 doc/source/conf.py                            |  2 +-
 .../proto/ga4gh/sequence_annotations.proto    |  2 +-
 tools/sphinx/protobuf-json-docs.py            |  8 +-
 .../{avrodomain.py => protobufdomain.py}      | 92 +++++++++----------
 tools/sphinx/protodoc2rst.py                  |  2 +-
 8 files changed, 86 insertions(+), 87 deletions(-)
 rename tools/sphinx/{avrodomain.py => protobufdomain.py} (73%)

diff --git a/doc/source/api/metadata.rst b/doc/source/api/metadata.rst
index 4aa50d94..6075fe25 100644
--- a/doc/source/api/metadata.rst
+++ b/doc/source/api/metadata.rst
@@ -24,7 +24,7 @@ data-provider-specified collection of related data of multiple types.
 Logically, it's akin to a folder, where it's up to the provider what
 goes into the folder. Individual data objects are linked by
 `datasetId` fields to `Dataset objects
-<../schemas/metadata.html#avro.Dataset>`_.
+<../schemas/metadata.proto.html#protobuf.Dataset>`_.
 
 Since the grouping of content in a dataset is determined by the data
 provider, users should not make semantic assumptions about that data.
diff --git a/doc/source/api/reads.rst b/doc/source/api/reads.rst
index fe05f4f9..c337465a 100644
--- a/doc/source/api/reads.rst
+++ b/doc/source/api/reads.rst
@@ -17,38 +17,38 @@ specific genomic regions instead.
 
 The model has the following data types:
 
-============================== ============================================ ==================
-Record                         | Description                                SAM/BAM rough equivalent
-============================== ============================================ ==================
-:avro:message:`ReadAlignment`   | One alignment for one read                 A single line in a file
-:avro:message:`ReadGroup`       | A group of read alignments                 A single RG tag
-:avro:message:`ReadGroupSet`    | Collecton of ReadGroups that map to the    Single SAM/BAM file
-                               | same genome
-:avro:message:`Program`         | Software version and parameters that were  PN, CL tags in SAM header
-                               | used to align reads to the genome
-:avro:message:`ReadStats`       | Counts of aligned and unaligned reads      Samtools flagstats on a file
-                               | for a ReadGroup or ReadGroupSet
-============================== ============================================ ==================
+==================================== =========================================== ========================
+Record                                Description                                SAM/BAM rough equivalent
+==================================== =========================================== ========================
+:protobuf:message:`ReadAlignment`     One alignment for one read                 A single line in a file
+:protobuf:message:`ReadGroup`         A group of read alignments                 A single RG tag
+:protobuf:message:`ReadGroupSet`      Collecton of ReadGroups that map to the    Single SAM/BAM file
+                                      same genome
+:protobuf:message:`Program`           Software version and parameters that were  PN, CL tags in SAM header
+                                      used to align reads to the genome
+:protobuf:message:`ReadStats`         Counts of aligned and unaligned reads      Samtools flagstats on a file
+                                      for a ReadGroup or ReadGroupSet
+==================================== =========================================== ========================
 
 The relationships are mostly one to many (e.g. each
-:avro:message:`ReadAlignment` is part of exactly one
-:avro:message:`ReadGroup`), with the exception that a
-:avro:message:`ReadGroup` is allowed to be part of more than one
-:avro:message:`ReadGroupSet`.
+:protobuf:message:`ReadAlignment` is part of exactly one
+:protobuf:message:`ReadGroup`), with the exception that a
+:protobuf:message:`ReadGroup` is allowed to be part of more than one
+:protobuf:message:`ReadGroupSet`.
 
-:avro:message:`Dataset` --< :avro:message:`ReadGroupSet` >--< :avro:message:`ReadGroup` --< :avro:message:`ReadAlignment`
+:protobuf:message:`Dataset` --< :protobuf:message:`ReadGroupSet` >--< :protobuf:message:`ReadGroup` --< :protobuf:message:`ReadAlignment`
 
-* A :avro:message:`Dataset` is a general-purpose container, defined in
+* A :protobuf:message:`Dataset` is a general-purpose container, defined in
   metadata.avdl.
-* A :avro:message:`ReadGroupSet` is a logical collection of ReadGroups,
+* A :protobuf:message:`ReadGroupSet` is a logical collection of ReadGroups,
   as determined by the data owner.  Typically one
-  :avro:message:`ReadGroupSet` represents all the Reads from one
+  :protobuf:message:`ReadGroupSet` represents all the Reads from one
   experimental sample, which traditionally would be stored in a single
   BAM file.
-* A :avro:message:`ReadGroup` is all the data that's processed the same
+* A :protobuf:message:`ReadGroup` is all the data that's processed the same
   way by the sequencer.  There are typically 1-10 ReadGroups in a
-  :avro:message:`ReadGroupSet`.
-* A :avro:message:`ReadAlignment` object is a flattened representation
+  :protobuf:message:`ReadGroupSet`.
+* A :protobuf:message:`ReadAlignment` object is a flattened representation
   of several layers of bioinformatics hierarchy, including fragments,
   reads, and alignments, stored in one object for easy access.
 
@@ -56,9 +56,9 @@ The relationships are mostly one to many (e.g. each
 ReadAlignment: detailed discussion
 @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
 
-One :avro:message:`ReadAlignment` object represents the following
+One :protobuf:message:`ReadAlignment` object represents the following
 logical hierarchy. See the field definitions in the
-:avro:message:`ReadAlignment` object for more details.
+:protobuf:message:`ReadAlignment` object for more details.
 
 .. image:: /_static/read_alignment_diagrams.png
 
@@ -88,4 +88,3 @@ identified by that ID. Records are represented by blue rectangles;
 dotted lines indicate records defined in other schemas.
 
 .. image:: /_static/reads_schema.png
- 
diff --git a/doc/source/api/variants.rst b/doc/source/api/variants.rst
index abb25149..cfa4eb46 100644
--- a/doc/source/api/variants.rst
+++ b/doc/source/api/variants.rst
@@ -24,20 +24,20 @@ constitute the genotype matrix.
 
 The lowest-level entity is a Call:
 
-    * a :avro:record:`Call` encodes the genotype of an individual with
+    * a :protobuf:message:`Call` encodes the genotype of an individual with
       respect to a variant, as determined by some analysis of
       experimental data.
 
 The other entities can be thought of as collections of Calls that have
 something in common:
 
-    * a :avro:record:`VariantSet` supports working with a collection
+    * a :protobuf:message:`VariantSet` supports working with a collection
       of Calls intended to be analyzed together.
-    * a :avro:record:`Variant` supports working with the subset of
+    * a :protobuf:message:`Variant` supports working with the subset of
       Calls in a VariantSet that are at the same site and are
       described using the same set of alleles. The Variant entity
       contains:
-    
+
         * a variant description: a potential difference between
           experimental DNA and a reference sequence, including the
           site (position of the difference) and alleles (how the bases
@@ -46,7 +46,7 @@ something in common:
           evidence for actual instances of that difference, as seen in
           analyses of experimental data
 
-    * a :avro:record:`CallSet` supports working with the subset of
+    * a :protobuf:message:`CallSet` supports working with the subset of
       Calls in a VariantSet that were generated by the same analysis
       of the same sample. The CallSet includes information about which
       sample was analyzed and how it was analyzed, and is linked to
@@ -54,9 +54,9 @@ something in common:
 
 The following diagram shows the relationship of these four entities to
 each other and to other GA4GH API entities. It shows which entities
-contain other entities (such as :avro:record:`VariantSetMetadata`),
+contain other entities (such as :protobuf:message:`VariantSetMetadata`),
 and which contain IDs that can be used to get information from other
-entities (such as :avro:record:`Variant`'s ``variantSetId``). The
+entities (such as :protobuf:message:`Variant`'s ``variantSetId``). The
 arrow points *from* the entity that contains the ID *to* the entity
 that can be identified by that ID.
 
diff --git a/doc/source/conf.py b/doc/source/conf.py
index a3edc911..146cf1e5 100644
--- a/doc/source/conf.py
+++ b/doc/source/conf.py
@@ -35,7 +35,7 @@
     'sphinx.ext.intersphinx',
     'sphinx.ext.todo',
     'sphinx.ext.coverage',
-    'avrodomain',
+    'protobufdomain',
 ]
 
 base_dir = "../../src/main/proto"
diff --git a/src/main/proto/ga4gh/sequence_annotations.proto b/src/main/proto/ga4gh/sequence_annotations.proto
index 0cbe44a3..e94bf601 100644
--- a/src/main/proto/ga4gh/sequence_annotations.proto
+++ b/src/main/proto/ga4gh/sequence_annotations.proto
@@ -16,7 +16,7 @@ The discrete hierarchical annotations are derived from the Sequence Ontology
    http://www.sequenceontology.org/gff3.shtml
 
 The goal is to be able to store annotations using the GFF3 and SO conceptual
-model, although there is not necessarly a one-to-one mapping in Avro records
+model, although there is not necessarly a one-to-one mapping in Protobuf messages
 to GFF3 records.
 
 The minimum requirement is to be able to accurately represent the current
diff --git a/tools/sphinx/protobuf-json-docs.py b/tools/sphinx/protobuf-json-docs.py
index c0ed7386..ad12969b 100755
--- a/tools/sphinx/protobuf-json-docs.py
+++ b/tools/sphinx/protobuf-json-docs.py
@@ -138,7 +138,7 @@ def type_to_string(f, map_types):
         elif ref_name.find("InfoEntry") != -1:
             raise Exception, (f.__dict__, ref_name)
         else:
-            kind = ":avro:message:`%s`" % simplify_name(f.ref_type)
+            kind = ":protobuf:message:`%s`" % simplify_name(f.ref_type)
             if f.label == 3: # LABEL_REPEATED
                 return "list of " + kind
             else:
@@ -203,10 +203,10 @@ def full_name(package, item):
                         "doc": m.comment,
                         "request": [{
                             "name": "request",
-                            "type": ":avro:message:`%s`" % simplify_name(m.input_type),
+                            "type": ":protobuf:message:`%s`" % simplify_name(m.input_type),
                         }],
-                        "response": ":avro:message:`%s`" % simplify_name(m.output_type),
-                        "errors" : [ ":avro:message:`GAException`" ]
+                        "response": ":protobuf:message:`%s`" % simplify_name(m.output_type),
+                        "errors" : [ ":protobuf:message:`GAException`" ]
                     }
             else:
                 raise Exception, item.kind
diff --git a/tools/sphinx/avrodomain.py b/tools/sphinx/protobufdomain.py
similarity index 73%
rename from tools/sphinx/avrodomain.py
rename to tools/sphinx/protobufdomain.py
index 23a9c238..fa21540b 100644
--- a/tools/sphinx/avrodomain.py
+++ b/tools/sphinx/protobufdomain.py
@@ -1,13 +1,13 @@
 # -*- coding: utf-8 -*-
 """
-    avrodomain
+    protobufdomain
     ~~~~~~~~~~
 
-    Apache Avro domain.
+    Protobuf domain.
 """
 
 __version__ = "0.1"
-# for this module's sphinx doc 
+# for this module's sphinx doc
 release = __version__
 version = release.rsplit('.', 1)[0]
 
@@ -29,7 +29,7 @@
 # By default, disable this warning.
 WARN_ABOUT_DUPLICATES = False
 
-avro_sig_regex = re.compile(
+protobuf_sig_regex = re.compile(
   r'''^
       ([^(]*?)       # type
       (\w+)          # name
@@ -37,23 +37,23 @@
       $
    ''', re.X)
 
-class AvroObject(ObjectDescription):
-  """Description of a general Avro object."""
+class ProtobufObject(ObjectDescription):
+  """Description of a general Protobuf object."""
   prefix = None
-  
+
   def handle_signature(self,sig,signode):
     sig = sig.strip()
-    type_name, name, arglist = avro_sig_regex.match(sig).groups()
-    
+    type_name, name, arglist = protobuf_sig_regex.match(sig).groups()
+
     if self.prefix:
       signode += addnodes.desc_annotation(self.prefix+' ', self.prefix+' ')
-    
+
     if type_name:
       signode += addnodes.desc_type(type_name, type_name)
-    
+
     if name:
       signode += addnodes.desc_name(name,name)
-    
+
     if arglist:
       paramlist = addnodes.desc_parameterlist()
       for arg in arglist.split(','):
@@ -63,52 +63,52 @@ def handle_signature(self,sig,signode):
         param += nodes.emphasis(' '+argname,' '+argname)
         paramlist += param
       signode += paramlist
-    
+
     return name
-  
+
   def get_index_text(self,name):
     if self.objtype == 'fixed':
-      return _('%s (Avro fixed-width value)') % name
+      return _('%s (Protobuf fixed-width value)') % name
     if self.objtype == 'enum':
-      return _('%s (Avro enum)') % name
+      return _('%s (Protobuf enum)') % name
     if self.objtype == 'message':
-      return _('%s (Avro record)') % name
+      return _('%s (Protobuf message)') % name
     if self.objtype == 'error':
-      return _('%s (Avro error)') % name
+      return _('%s (Protobuf error)') % name
     if self.objtype == 'rpc':
-      return _('%s (Avro RPC)') % name
-  
+      return _('%s (Protobuf RPC)') % name
+
   def add_target_and_index(self, name, sig, signode):
-    targetname = 'avro.' + name
+    targetname = 'protobuf.' + name
     if targetname not in self.state.document.ids:
       signode['names'].append(targetname)
       signode['ids'].append(targetname)
       signode['first'] = (not self.names)
       self.state.document.note_explicit_target(signode)
-      objects = self.env.domaindata['avro']['objects']
+      objects = self.env.domaindata['protobuf']['objects']
       if name in objects and WARN_ABOUT_DUPLICATES:
-        self.state_machine.reporter.warning('duplicate Avro object description of %s.' % name, line=self.lineno)
+        self.state_machine.reporter.warning('duplicate Protobuf object description of %s.' % name, line=self.lineno)
       objects[name] = (self.env.docname, self.objtype)
-    
+
     indextext = self.get_index_text(name)
     if indextext:
       self.indexnode['entries'].append(('single',indextext,targetname,''))
 
-class AvroFixedField(AvroObject):
+class ProtobufFixedField(ProtobufObject):
   prefix = 'fixed'
   doc_field_types = [
     Field('size', label=l_('Size'),
           names=('size',))
   ]
 
-class AvroEnum(AvroObject):
+class ProtobufEnum(ProtobufObject):
   prefix = 'enum'
   doc_field_types = [
     Field('symbols', label=l_('Symbols'),
           names=('symbols',))
   ]
 
-class AvroRecord(AvroObject):
+class ProtobufMessage(ProtobufObject):
   prefix = 'message'
   doc_field_types = [
     TypedField('fields', label=l_('Fields'),
@@ -117,10 +117,10 @@ class AvroRecord(AvroObject):
                typerolename='message')
   ]
 
-class AvroError(AvroRecord):
+class ProtobufError(ProtobufMessage):
   prefix = 'error'
 
-class AvroRPCMessage(AvroObject):
+class ProtobufRPCMessage(ProtobufObject):
   doc_field_types = [
     TypedField('arguments', label=l_('Arguments'),
                names=('argument','arg','param'),
@@ -132,10 +132,10 @@ class AvroRPCMessage(AvroObject):
           names=('returns','return'))
   ]
 
-class AvroDomain(Domain):
-  name = "avro"
-  label = "Apache Avro"
-  
+class ProtobufDomain(Domain):
+  name = "protobuf"
+  label = "Apache Protobuf"
+
   object_types = {
     'fixed':  ObjType(l_('fixed'),  'fixed'),
     'enum':   ObjType(l_('enum'),   'enum'),
@@ -143,15 +143,15 @@ class AvroDomain(Domain):
     'error':  ObjType(l_('error'),  'error'),
     'rpc':    ObjType(l_('rpc'),    'rpc'),
   }
-  
+
   directives = {
-    'fixed':  AvroFixedField,
-    'enum':   AvroEnum,
-    'message': AvroRecord,
-    'error':  AvroError,
-    'rpc':    AvroRPCMessage
+    'fixed':  ProtobufFixedField,
+    'enum':   ProtobufEnum,
+    'message': ProtobufMessage,
+    'error':  ProtobufError,
+    'rpc':    ProtobufRPCMessage
   }
-  
+
   roles = {
     'fixed':  XRefRole(),
     'enum':   XRefRole(),
@@ -159,20 +159,20 @@ class AvroDomain(Domain):
     'error':  XRefRole(),
     'rpc':    XRefRole()
   }
-  
+
   initial_data = {
     'objects': {}
   }
-  
+
   def resolve_xref(self, env, fromdocname, builder, typ, target, node, contnode):
     if target not in self.data['objects']:
       return None
     obj = self.data['objects'][target]
-    return make_refnode(builder, fromdocname, obj[0], 'avro.' + target, contnode, target)
-  
+    return make_refnode(builder, fromdocname, obj[0], 'protobuf.' + target, contnode, target)
+
   def get_objects(self):
     for refname, (docname, type) in list(self.data['objects'].items()):
-      yield (refname, refname, type, docname, 'avro.' + refname, 1)
+      yield (refname, refname, type, docname, 'protobuf.' + refname, 1)
 
 def setup(app):
-  app.add_domain(AvroDomain)
+  app.add_domain(ProtobufDomain)
diff --git a/tools/sphinx/protodoc2rst.py b/tools/sphinx/protodoc2rst.py
index 7cd0a919..956a0ffe 100644
--- a/tools/sphinx/protodoc2rst.py
+++ b/tools/sphinx/protodoc2rst.py
@@ -73,7 +73,7 @@ def typename(typeobject):
       output += "\n\n"
 
     for item in data['types']:
-      output += '.. avro:%s:: %s\n\n' % (item['type'], item['name'])
+      output += '.. protobuf:%s:: %s\n\n' % (item['type'], item['name'])
 
       if item['type'] == 'message':
         for field in item['fields']:

From 43cdcb0537e092db13c72d8d019ce18bcd041afb Mon Sep 17 00:00:00 2001
From: Tom Parker <palfrey@tevp.net>
Date: Thu, 16 Jun 2016 15:25:01 +0100
Subject: [PATCH 37/40] Add various documentation to Protobuf convertors

---
 tools/sphinx/protobuf-json-docs.py | 34 +++++++++++++++++++++++++-----
 tools/sphinx/protobufdomain.py     |  2 +-
 tools/sphinx/protodoc2rst.py       | 10 ++++-----
 3 files changed, 35 insertions(+), 11 deletions(-)

diff --git a/tools/sphinx/protobuf-json-docs.py b/tools/sphinx/protobuf-json-docs.py
index ad12969b..55f06a97 100755
--- a/tools/sphinx/protobuf-json-docs.py
+++ b/tools/sphinx/protobuf-json-docs.py
@@ -1,4 +1,14 @@
 #!/usr/bin/env python
+"""
+Plugin for generation of Sphinx-suitable JSON from Protobuf definitions
+It's a plugin for protoc as per https://developers.google.com/protocol-buffers/docs/reference/other
+
+Usage:
+     protoc --plugin=protoc-gen-custom=<script path>/protobuf-json-docs.py <proto file>
+
+The JSON output can then be interpreted by protobufdomain.py to make RST files for Sphinx
+
+"""
 
 import sys
 import collections
@@ -9,9 +19,14 @@
 from google.protobuf.descriptor_pb2 import DescriptorProto, EnumDescriptorProto, EnumValueDescriptorProto, FieldDescriptorProto, ServiceDescriptorProto, MethodDescriptorProto
 
 def simplify_name(name):
+    "Remove all the namespace information to make short names for Sphinx"
     return name.split(".")[-1]
 
 def convert_protodef_to_editable(proto):
+    """
+    Protobuf objects can't have arbitrary fields addedd and we need to later on
+    add comments to them, so we instead make "Editable" objects that can do so
+    """
     class Editable(object):
         def __init__(self, prot):
             self.kind = type(prot)
@@ -43,7 +58,11 @@ def __init__(self, prot):
     return Editable(proto)
 
 def traverse(proto_file):
-
+    """
+    proto_file is a FileDescriptorProto from protoc. We walk the SourceCodeInfo
+    in this file, and find all the comments, and return a flattened out tree
+    of all the messages and enums
+    """
     def _collapse_comments(comments):
         return '\n'.join(
             [c.strip() for c in (comments["leading_comments"] + comments["trailing_comments"]).split('\n')])
@@ -114,6 +133,9 @@ def _traverse(package, items, tree):
     }
 
 def type_to_string(f, map_types):
+    """
+    Convert type info to pretty names, based on numbers from from FieldDescriptorProto
+    """
     if f.type in [1]:
         return "double"
     elif f.type in [2]:
@@ -135,8 +157,6 @@ def type_to_string(f, map_types):
                 "key": " %s "% type_to_string(ref_fields["key"], map_types),
                 "value": " %s "% type_to_string(ref_fields["value"], map_types)
             }
-        elif ref_name.find("InfoEntry") != -1:
-            raise Exception, (f.__dict__, ref_name)
         else:
             kind = ":protobuf:message:`%s`" % simplify_name(f.ref_type)
             if f.label == 3: # LABEL_REPEATED
@@ -149,6 +169,10 @@ def type_to_string(f, map_types):
         raise Exception, f.type
 
 def generate_code(request, response):
+    """
+    Core function. Starts from a CodeGeneratorRequest and adds files to
+    a CodeGeneratorResponse
+    """
     for proto_file in request.proto_file:
         types = []
         messages = {}
@@ -174,7 +198,7 @@ def full_name(package, item):
                     'type': 'message',
                     'fields': []
                 })
-                for f in item.field: # types from FieldDescriptorProto
+                for f in item.field:
                     kind = type_to_string(f, map_types)
                     data["fields"].append({
                         'name': f.name,
@@ -186,7 +210,7 @@ def full_name(package, item):
                         {
                             "name": item.oneof_decl[0].name,
                             "type": [" %s "% x["type"] for x in data["fields"]],
-                            "doc": ", ".join([x["doc"] for x in data["fields"] if x["doc"] != ""])
+                            "doc": "\n".join(["%s: %s"%(x["type"],x["doc"]) for x in data["fields"] if x["doc"] != ""])
                         }]
                 types.append(data)
             elif item.kind == EnumDescriptorProto:
diff --git a/tools/sphinx/protobufdomain.py b/tools/sphinx/protobufdomain.py
index fa21540b..5185b5cd 100644
--- a/tools/sphinx/protobufdomain.py
+++ b/tools/sphinx/protobufdomain.py
@@ -3,7 +3,7 @@
     protobufdomain
     ~~~~~~~~~~
 
-    Protobuf domain.
+    Protobuf Sphinx domain.
 """
 
 __version__ = "0.1"
diff --git a/tools/sphinx/protodoc2rst.py b/tools/sphinx/protodoc2rst.py
index 956a0ffe..693082ed 100644
--- a/tools/sphinx/protodoc2rst.py
+++ b/tools/sphinx/protodoc2rst.py
@@ -10,7 +10,7 @@ def cleanup_doc(doc,indent=0):
 
 def get_file_locations():
   parser = argparse.ArgumentParser()
-  parser.add_argument('input', help='Input AVPR filename(s)', nargs='+')
+  parser.add_argument('input', help='Input Protobuf JSON filename(s)', nargs='+')
   parser.add_argument('output', help='Output directory')
   args = parser.parse_args()
   return (args.input, args.output)
@@ -35,15 +35,15 @@ def typename(typeobject):
 
 if __name__ == '__main__':
 
-  avpr_filenames, rest_directory = get_file_locations()
+  json_filenames, rest_directory = get_file_locations()
 
-  for avpr_filename in avpr_filenames:
-    base_filename = os.path.basename(avpr_filename)
+  for json_filename in json_filenames:
+    base_filename = os.path.basename(json_filename)
     name = os.path.splitext(base_filename)[0]
 
     rest_filename = os.path.join(rest_directory, name+'.rst')
 
-    with open(avpr_filename,'r') as f:
+    with open(json_filename,'r') as f:
       data = json.load(f)
 
     output = data['protocol'] + '\n'

From cec4e682b28a4578ba11529d8ea404d0799957ea Mon Sep 17 00:00:00 2001
From: Tom Parker <palfrey@tevp.net>
Date: Thu, 16 Jun 2016 17:22:17 +0100
Subject: [PATCH 38/40] Only try and run .proto files through protoc

---
 doc/source/conf.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/doc/source/conf.py b/doc/source/conf.py
index 146cf1e5..b59de165 100644
--- a/doc/source/conf.py
+++ b/doc/source/conf.py
@@ -45,6 +45,8 @@
 schema_dir = base_dir
 for root, dirs, files in os.walk(schema_dir):
     for f in files:
+        if not f.endswith(".proto"):
+            continue
         fullpath = os.path.join(root, f)
         json_file = f + ".json"
         cmd = "protoc --proto_path %s --plugin=protoc-gen-custom=%s --custom_out=%s %s" % (base_dir, os.path.join(sphinx_path, "protobuf-json-docs.py"), json_dir, fullpath)

From c70eb4a6d262703f77ffd1480e5b87d2786bc349 Mon Sep 17 00:00:00 2001
From: Tom Parker <palfrey@tevp.net>
Date: Fri, 17 Jun 2016 10:22:35 +0100
Subject: [PATCH 39/40] Also skip non-JSON when doing the RST step

---
 doc/source/conf.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/doc/source/conf.py b/doc/source/conf.py
index b59de165..1b61507c 100644
--- a/doc/source/conf.py
+++ b/doc/source/conf.py
@@ -55,6 +55,8 @@
 
 for root, dirs, files in os.walk(json_dir):
     for f in files:
+        if not f.endswith(".json"):
+            continue
         cmd = "python %s %s/%s schemas/" %(os.path.join(sphinx_path, "protodoc2rst.py"), root, f)
         print cmd
         subprocess.check_call(cmd, shell=True)

From 9b1484d57af5bff12947de775ebd9103dc4b8eea Mon Sep 17 00:00:00 2001
From: Tom Parker <palfrey@tevp.net>
Date: Fri, 17 Jun 2016 10:23:03 +0100
Subject: [PATCH 40/40] JSON/RST build is now all done by sphinx build

---
 Makefile                    |  9 +-------
 doc/source/schemas/Makefile | 43 -------------------------------------
 2 files changed, 1 insertion(+), 51 deletions(-)
 delete mode 100644 doc/source/schemas/Makefile

diff --git a/Makefile b/Makefile
index 4a093870..b96dfa0e 100644
--- a/Makefile
+++ b/Makefile
@@ -23,13 +23,9 @@ help:
 # doc/source is the root of the rst files; the ../.. components effectively
 # counter the cd doc/source to place the docs at the schemas root
 .PHONY: docs
-docs: docs-schemas
+docs:
 	cd doc/source; sphinx-build -b html -d ../../${BUILD_DIR}/doctrees . ../../${BUILD_DIR}/html
 
-#=> docs-schema -- generate rst files from avdl
-docs-schemas:
-	make -C doc/source/schemas default
-
 .PHONY: package
 package:
 	mvn package
@@ -38,10 +34,7 @@ package:
 .PHONY: clean cleaner cleanest
 clean:
 	find . -regex '.*\(~\|\.bak\)' -print0 | xargs -0r /bin/rm -v
-	make -C doc/source/schemas $@
 cleaner: clean
-	make -C doc/source/schemas $@
 cleanest: cleaner
 	find . -regex '.*\(\.orig\)' -print0 | xargs -0r /bin/rm -v
 	rm -fr target
-	make -C doc/source/schemas $@
diff --git a/doc/source/schemas/Makefile b/doc/source/schemas/Makefile
deleted file mode 100644
index 292ecb69..00000000
--- a/doc/source/schemas/Makefile
+++ /dev/null
@@ -1,43 +0,0 @@
-# proto-to-rst Makefile
-#
-# GA4GH schema docs are generated from proto comments.  The process is
-# coordinated by this Makefile in these steps:
-#   * All .proto files are converted to .json using the
-#   protoc json plugin `tools/sphinx/my-plugin.py`.
-#   * All .json files are converted to .rst using a python script in
-#   schemas/tools.
-
-
-.PHONY: FORCE
-.SUFFIXES:
-.DELETE_ON_ERROR:
-
-CACHE_DIR:=${HOME}/.cache/ga4gh
-JSON_DIR:=/tmp/ga4gh-${UID}/json
-PROTO_BASE_DIR:=../../../src/main/proto
-PROTO_DIR:=${PROTO_BASE_DIR}/ga4gh
-
-AVPR2REST_PATH:=../../../tools/sphinx/protodoc2rst.py
-PROTOC_PLUGIN_PATH:=../../../tools/sphinx/protobuf-json-docs.py
-
-PROTO_BASENAMES:=$(subst ${PROTO_DIR}/,,$(wildcard ${PROTO_DIR}/*.proto))
-AVPR_BASENAMES:=${PROTO_BASENAMES:.proto=.proto.json}
-RST_BASENAMES:=${PROTO_BASENAMES:.proto=.rst}
-
-
-default: ${RST_BASENAMES}
-
-%.proto.json: ${PROTO_DIR}/%.proto
-	mkdir -p ${JSON_DIR}
-	protoc --proto_path ${PROTO_BASE_DIR} --plugin=protoc-gen-custom=${PROTOC_PLUGIN_PATH} --custom_out=${JSON_DIR} $<
-
-%.rst: %.proto.json
-	python ${AVPR2REST_PATH} ${JSON_DIR}/ga4gh/$< .
-
-.PHONY: clean cleaner cleanest
-clean:
-	/bin/rm -f *~
-cleaner: clean
-	/bin/rm -f *.avpr
-cleanest: cleaner
-	/bin/rm -f ${RST_BASENAMES}
\ No newline at end of file