Skip to content

Commit

Permalink
Merge pull request #432 from wasade/language-docs
Browse files Browse the repository at this point in the history
Language docs
  • Loading branch information
cassidysymons authored Apr 13, 2022
2 parents 0f356e8 + 64ac6a9 commit ed91d3e
Show file tree
Hide file tree
Showing 5 changed files with 160 additions and 0 deletions.
11 changes: 11 additions & 0 deletions doc/babel.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# To extract and update email translations

```bash
cd microsetta-private-api
pybabel extract -F ../babel.cfg -o translations/base.pot .
pybabel update -i translations/base.pot -d translations
```

# To generate naive automatic translations

Please see the `naive_translate.py` script as part of `microsetta-interface`
5 changes: 5 additions & 0 deletions doc/consent.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# Consent files

Translated consents are located under `microsetta_private_api/LEGACY/locale_data`. These are `dict`s where the contained keys are used as lookups on render by `microsetta-interface`.

Their content is sourced by `microsetta_private_api/localization.py`.
31 changes: 31 additions & 0 deletions doc/dump-survey.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import psycopg2
import sys
import pandas as pd
import numpy as np

con = psycopg2.connect(host='localhost', database='ag_test')
cursor = con.cursor()
sid = int(sys.argv[1])

sql = """SELECT survey_question_id,
survey_group,
american,
question_shortname,
response,
ag.survey_question_response.display_index
AS response_index
FROM ag.survey_question
LEFT JOIN ag.survey_question_response
USING (survey_question_id)
LEFT JOIN ag.group_questions USING (survey_question_id)
LEFT JOIN ag.surveys USING (survey_group)
WHERE survey_id = %d""" % sid
df = pd.read_sql(sql, con)


# sorts so that questions emmulate survey order
df = df.sort_values(by=['survey_group',
'survey_question_id',
'response_index']).drop(columns='survey_group')
df['response_index'] = df['response_index'].apply(lambda x: None if np.isnan(x) else int(x), convert_dtype=False)
df.to_csv(sys.argv[2], sep='\t', index=False, header=True)
96 changes: 96 additions & 0 deletions doc/format_language_patches.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
import pandas as pd
import click
from psycopg2 import connect, sql

_conn = connect(host='localhost', database='ag_test')
_update_question = sql.SQL("UPDATE ag.survey_question "
"SET {lang} = {question} "
"WHERE survey_question_id = {qid};")
_update_response = sql.SQL("UPDATE ag.survey_question_response "
"SET {lang} = {response} "
"WHERE survey_question_id = {qid} AND "
" display_index = {response_index};")
_update_survey_response = sql.SQL("UPDATE ag.survey_response "
"SET {language_field} = {response}, "
"WHERE response = {american_response};")


def _format_update_survey_resp(american_response, language_field, response):
fmt = _update_survey_response.format(american_response=sql.Literal(american_response),
language_field=sql.Identifier(language_field),
response=sql.Literal(response))

return fmt.as_string(_conn) + '\n'

def _format_update_question(lang, qid, question):
fmt = _update_question.format(lang=sql.Identifier(lang),
question=sql.Literal(question),
qid=sql.Literal(qid))
return fmt.as_string(_conn) + '\n'


def _format_update_resp(lang, qid, response, response_index):
fmt = _update_response.format(lang=sql.Identifier(lang),
response=sql.Literal(response),
qid=sql.Literal(qid),
response_index=sql.Literal(response_index))
return fmt.as_string(_conn) + '\n'


@click.command()
@click.option('--input', type=click.Path(exists=True), required=True,
help="Input excel spreadsheet w/ translations")
@click.option('--output', type=click.Path(exists=False), required=True,
help="The patch file to write")
@click.option('--lang', type=str, required=True,
help="The name of the language")
def mapper(input, output, lang):
def stripper(x):
if pd.isnull(x):
return None
else:
return str.strip(x)

# sheet_name=None -> load all sheets
sheets = pd.read_excel(input, dtype=str, sheet_name=None)

with open(output, 'w') as out:
out.write("ALTER TABLE ag.survey_question\n"
" ADD COLUMN %s varchar;\n" % lang)
out.write("ALTER TABLE ag.survey_question_response\n"
" ADD COLUMN %s varchar;\n" % lang)

for sheet, df in sheets.items():
# we use row['american'] as the translated spreadsheets
# did not alter the header
df['american'] = df['american'].apply(stripper)
df['response'] = df['response'].apply(stripper)

for qid, qblock in df.groupby('survey_question_id'):
i = qblock.iloc[0]

# we use row['american'] as the translated spreadsheets
# did not alter the header
out.write(_format_update_question(lang, qid, i['american']))

if len(qblock) == 1 and not pd.isnull(i['response_index']):
raise ValueError("Unexpected null on qid: %s" % qid)
elif len(qblock) == 1:
# no response to add (e.g., free text)
continue
else:
for row in qblock.itertuples():
resp = row.response
respix = row.response_index
amer = row.american
# if the response is nan it means the translation
# missed a response
if pd.isnull(resp):
raise ValueError("Null response: %s" % str(row))

out.write(_format_update_resp(lang, qid, resp, respix))
out.write(_format_update_survey_resp(amer, lang, resp))


if __name__ == '__main__':
mapper()
17 changes: 17 additions & 0 deletions doc/surveys.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Dumping a survey

To dump a survey, refer to the survey ID (see `select survey_id, american from ag.surveys join ag.survey_group on survey_group=group_order;`).

```bash
python dump-survey.py <survey_id_number>
```

# Constructing the bulk of a language patch

The majority of the patch for the survey in a new language can be constructed with:

```bash
python format_language_patches.py --input <your_xls_file> --output <the_patch> --lang <language_name>
```

This will not automatically create the survey name and group entries. Please see `microsetta_private_api/db/patches/0081.sql` for an example

0 comments on commit ed91d3e

Please sign in to comment.