Merge pull request #432 from wasade/language-docs

Language docs
biocore · Apr 13, 2022 · ed91d3e · ed91d3e
2 parents 0f356e8 + 64ac6a9
commit ed91d3e
Show file tree

Hide file tree

Showing 5 changed files with 160 additions and 0 deletions.
diff --git a/doc/babel.md b/doc/babel.md
@@ -0,0 +1,11 @@
+# To extract and update email translations 
+
+```bash
+cd microsetta-private-api
+pybabel extract -F ../babel.cfg -o translations/base.pot .
+pybabel update -i translations/base.pot -d translations
+```
+
+# To generate naive automatic translations
+
+Please see the `naive_translate.py` script as part of `microsetta-interface`
diff --git a/doc/consent.md b/doc/consent.md
@@ -0,0 +1,5 @@
+# Consent files
+
+Translated consents are located under `microsetta_private_api/LEGACY/locale_data`. These are `dict`s where the contained keys are used as lookups on render by `microsetta-interface`. 
+
+Their content is sourced by `microsetta_private_api/localization.py`.
diff --git a/doc/dump-survey.py b/doc/dump-survey.py
@@ -0,0 +1,31 @@
+import psycopg2
+import sys
+import pandas as pd
+import numpy as np
+
+con = psycopg2.connect(host='localhost', database='ag_test')
+cursor = con.cursor()
+sid = int(sys.argv[1])
+
+sql = """SELECT survey_question_id,
+        survey_group,
+        american,
+        question_shortname,
+        response,
+        ag.survey_question_response.display_index
+            AS response_index
+     FROM ag.survey_question
+     LEFT JOIN ag.survey_question_response
+         USING (survey_question_id)
+     LEFT JOIN ag.group_questions USING (survey_question_id)
+     LEFT JOIN ag.surveys USING (survey_group)
+     WHERE survey_id = %d""" % sid
+df = pd.read_sql(sql, con)
+
+
+# sorts so that questions emmulate survey order
+df = df.sort_values(by=['survey_group',
+            'survey_question_id',
+            'response_index']).drop(columns='survey_group')
+df['response_index'] = df['response_index'].apply(lambda x: None if np.isnan(x) else int(x), convert_dtype=False)
+df.to_csv(sys.argv[2], sep='\t', index=False, header=True)
diff --git a/doc/format_language_patches.py b/doc/format_language_patches.py
@@ -0,0 +1,96 @@
+import pandas as pd
+import click
+from psycopg2 import connect, sql
+
+_conn = connect(host='localhost', database='ag_test')
+_update_question = sql.SQL("UPDATE ag.survey_question "
+                           "SET {lang} = {question} "
+                           "WHERE survey_question_id = {qid};")
+_update_response = sql.SQL("UPDATE ag.survey_question_response "
+                           "SET {lang} = {response} "
+                           "WHERE survey_question_id = {qid} AND "
+                           "    display_index = {response_index};")
+_update_survey_response = sql.SQL("UPDATE ag.survey_response "
+                                  "SET {language_field} = {response}, "
+                                  "WHERE response = {american_response};")
+
+
+def _format_update_survey_resp(american_response, language_field, response):
+    fmt = _update_survey_response.format(american_response=sql.Literal(american_response),
+                                         language_field=sql.Identifier(language_field),
+                                         response=sql.Literal(response))
+
+    return fmt.as_string(_conn) + '\n'
+
+def _format_update_question(lang, qid, question):
+    fmt = _update_question.format(lang=sql.Identifier(lang),
+                                  question=sql.Literal(question),
+                                  qid=sql.Literal(qid))
+    return fmt.as_string(_conn) + '\n'
+
+
+def _format_update_resp(lang, qid, response, response_index):
+    fmt = _update_response.format(lang=sql.Identifier(lang),
+                                  response=sql.Literal(response),
+                                  qid=sql.Literal(qid),
+                                  response_index=sql.Literal(response_index))
+    return fmt.as_string(_conn) + '\n'
+
+
+@click.command()
+@click.option('--input', type=click.Path(exists=True), required=True,
+              help="Input excel spreadsheet w/ translations")
+@click.option('--output', type=click.Path(exists=False), required=True,
+              help="The patch file to write")
+@click.option('--lang', type=str, required=True,
+              help="The name of the language")
+def mapper(input, output, lang):
+    def stripper(x):
+        if pd.isnull(x):
+            return None
+        else:
+            return str.strip(x)
+
+    # sheet_name=None -> load all sheets
+    sheets = pd.read_excel(input, dtype=str, sheet_name=None)
+
+    with open(output, 'w') as out:
+        out.write("ALTER TABLE ag.survey_question\n"
+                  "    ADD COLUMN %s varchar;\n" % lang)
+        out.write("ALTER TABLE ag.survey_question_response\n"
+                  "    ADD COLUMN %s varchar;\n" % lang)
+
+        for sheet, df in sheets.items():
+            # we use row['american'] as the translated spreadsheets
+            # did not alter the header
+            df['american'] = df['american'].apply(stripper)
+            df['response'] = df['response'].apply(stripper)
+
+            for qid, qblock in df.groupby('survey_question_id'):
+                i = qblock.iloc[0]
+
+                # we use row['american'] as the translated spreadsheets
+                # did not alter the header
+                out.write(_format_update_question(lang, qid, i['american']))
+
+                if len(qblock) == 1 and not pd.isnull(i['response_index']):
+                    raise ValueError("Unexpected null on qid: %s" % qid)
+                elif len(qblock) == 1:
+                    # no response to add (e.g., free text)
+                    continue
+                else:
+                    for row in qblock.itertuples():
+                        resp = row.response
+                        respix = row.response_index
+                        amer = row.american
+                        # if the response is nan it means the translation
+                        # missed a response
+                        if pd.isnull(resp):
+                            raise ValueError("Null response: %s" % str(row))
+
+                        out.write(_format_update_resp(lang, qid, resp, respix))
+                        out.write(_format_update_survey_resp(amer, lang, resp))
+
+
+if __name__ == '__main__':
+    mapper()
diff --git a/doc/surveys.md b/doc/surveys.md
@@ -0,0 +1,17 @@
+# Dumping a survey
+
+To dump a survey, refer to the survey ID (see `select survey_id, american from ag.surveys join ag.survey_group on survey_group=group_order;`).
+
+```bash
+python dump-survey.py <survey_id_number>
+```
+
+# Constructing the bulk of a language patch
+
+The majority of the patch for the survey in a new language can be constructed with:
+
+```bash
+python format_language_patches.py --input <your_xls_file> --output <the_patch> --lang <language_name>
+```
+
+This will not automatically create the survey name and group entries. Please see `microsetta_private_api/db/patches/0081.sql` for an example