feat: complete work on gdocs paste conversion

CondeNast · Mar 21, 2024 · 745069e · 745069e
1 parent 62f1e94
commit 745069e
Show file tree

Hide file tree

Showing 2 changed files with 264 additions and 210 deletions.
diff --git a/packages/@atjson/source-gdocs-paste/src/converter.ts b/packages/@atjson/source-gdocs-paste/src/converter.ts
@@ -4,6 +4,7 @@ import {
   InlineAnnotation,
   ParseAnnotation,
   SliceAnnotation,
+  TextAnnotation,
   compareAnnotations,
   is,
 } from "@atjson/document";
@@ -23,8 +24,11 @@ import uuid from "uuid-random";
 
 // eslint-disable-next-line no-control-regex
 const VERTICAL_TABS = /\u000B/g;
+// eslint-disable-next-line no-control-regex
 const TABLE = /\u0010(\u0012(\u001c.*\n)+)+\u0011/g;
+// eslint-disable-next-line no-control-regex
 const TABLE_ROW = /\u0012(\u001c.*\n)+/gmu;
+// eslint-disable-next-line no-control-regex
 const TABLE_CELL = /\u001c(.*)\n/gmu;
 const NEWLINE_PARAGRAPH_SEPARATOR = /\n(\s*\n)*/g;
 
@@ -35,6 +39,10 @@ const ALIGNMENT = {
   3: "justify",
 } as const;
 
+function snakecase(text: string) {
+  return text.toLowerCase().replace(/\s+/, "_");
+}
+
 GDocsSource.defineConverterTo(OffsetSource, (doc) => {
   // Remove zero-length annotations
   // This is a bit of a workaround to deal with complications that arise when trying to
@@ -157,99 +165,6 @@ GDocsSource.defineConverterTo(OffsetSource, (doc) => {
     );
   }
 
-  // Convert vertical tabs to line breaks
-  for (let table of doc.match(TABLE)) {
-    let id = uuid();
-    let rows: Array<Array<{ slice: string; jsonValue: string }>> = [];
-
-    // Gather the dataset info first
-    for (let tableRow of doc.match(TABLE_ROW, table.start, table.end)) {
-      let row: Array<{ slice: string; jsonValue: string }> = [];
-      doc.addAnnotations(
-        new ParseAnnotation({
-          start: tableRow.start,
-          end: tableRow.start + 1,
-        })
-      );
-
-      for (let tableCell of doc.match(
-        TABLE_CELL,
-        tableRow.start + 1,
-        tableRow.end
-      )) {
-        let slice = new SliceAnnotation({
-          start: tableCell.start,
-          end: tableCell.end,
-          attributes: {
-            refs: [id],
-          },
-        });
-        row.push({
-          slice: slice.id,
-          jsonValue: tableCell.matches[1],
-        });
-        doc.addAnnotations(
-          new ParseAnnotation({
-            start: tableCell.start,
-            end: tableCell.start + 1,
-          }),
-          slice,
-          new ParseAnnotation({
-            start: tableCell.end - 1,
-            end: tableCell.end,
-          })
-        );
-      }
-      rows.push(row);
-    }
-    let [header, ...body] = rows;
-    let columnNames = header.map((header) => header.jsonValue);
-    let records = body.map((row) => {
-      return row.reduce((E, cell, index) => {
-        E[columnNames[index]] = cell;
-        return E;
-      }, {} as Record<string, { slice: string; jsonValue: string }>);
-    });
-    let schema = columnNames.reduce((E, columnName) => {
-      E[columnName] = ColumnType.RICH_TEXT;
-      return E;
-    }, {} as Record<string, ColumnType>);
-
-    let dataset = new DataSet({
-      id,
-      start: table.start + 1,
-      end: table.end + 1,
-      attributes: {
-        schema,
-        records,
-      },
-    });
-    doc.addAnnotations(dataset);
-    doc.addAnnotations(
-      new ParseAnnotation({
-        start: table.start,
-        end: table.start + 1,
-      }),
-      new Table({
-        start: table.end - 1,
-        end: table.end,
-        attributes: {
-          dataSet: dataset.id,
-          columns: columnNames.map((columnName, index) => {
-            return {
-              name: columnName,
-              slice: header[index].slice,
-            };
-          }),
-        },
-      }),
-      new ParseAnnotation({
-        start: table.end - 1,
-        end: table.end,
-      })
-    );
-  }
-
   // Convert newlines to Paragraphs. Paragraphs must not cross the boundary of a BlockAnnotation, so
   // divide the document into 'block boundaries' and then look for single/multiple new lines within each
   // block boundary
@@ -441,5 +356,111 @@ GDocsSource.defineConverterTo(OffsetSource, (doc) => {
       mark.end = end;
     });
 
+  // Convert vertical tabs to line breaks
+  for (let table of doc.match(TABLE)) {
+    let id = uuid();
+    let rows: Array<Array<{ slice: string; jsonValue: string }>> = [];
+
+    // Remove all paragraphs inside of tables
+    doc
+      .where(
+        (a) => is(a, Paragraph) && a.start >= table.start && a.end <= table.end
+      )
+      .update((a) => {
+        doc.replaceAnnotation(
+          a,
+          new TextAnnotation({
+            start: a.start,
+            end: a.end,
+          })
+        );
+      });
+
+    // Gather the dataset info first
+    for (let tableRow of doc.match(TABLE_ROW, table.start, table.end)) {
+      let row: Array<{ slice: string; jsonValue: string }> = [];
+      doc.addAnnotations(
+        new ParseAnnotation({
+          start: tableRow.start,
+          end: tableRow.start + 1,
+        })
+      );
+
+      for (let tableCell of doc.match(
+        TABLE_CELL,
+        tableRow.start + 1,
+        tableRow.end
+      )) {
+        let slice = new SliceAnnotation({
+          start: tableCell.start,
+          end: tableCell.end,
+          attributes: {
+            refs: [id],
+          },
+        });
+        row.push({
+          slice: slice.id,
+          jsonValue: tableCell.matches[1],
+        });
+        doc.addAnnotations(
+          new ParseAnnotation({
+            start: tableCell.start,
+            end: tableCell.start + 1,
+          }),
+          slice
+        );
+      }
+      rows.push(row);
+    }
+    let [header, ...body] = rows;
+    let columnNames = header.map((header) => snakecase(header.jsonValue));
+    let records = body.map((row) => {
+      return row.reduce((E, cell, index) => {
+        E[columnNames[index]] = cell;
+        return E;
+      }, {} as Record<string, { slice: string; jsonValue: string }>);
+    });
+    let schema = columnNames.reduce((E, columnName) => {
+      E[columnName] = ColumnType.RICH_TEXT;
+      return E;
+    }, {} as Record<string, ColumnType>);
+
+    let dataset = new DataSet({
+      id,
+      start: table.start,
+      end: table.end,
+      attributes: {
+        schema,
+        records,
+      },
+    });
+
+    doc.addAnnotations(
+      dataset,
+      new ParseAnnotation({
+        start: table.start,
+        end: table.start + 1,
+      }),
+      new Table({
+        start: table.start,
+        end: table.end,
+        attributes: {
+          dataSet: dataset.id,
+          showColumnHeaders: true,
+          columns: header.map((cell, index) => {
+            return {
+              name: columnNames[index],
+              slice: cell.slice,
+            };
+          }),
+        },
+      }),
+      new ParseAnnotation({
+        start: table.end - 1,
+        end: table.end,
+      })
+    );
+  }
+
   return doc;
 });