-
Notifications
You must be signed in to change notification settings - Fork 19
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #432 from wasade/language-docs
Language docs
- Loading branch information
Showing
5 changed files
with
160 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
# To extract and update email translations | ||
|
||
```bash | ||
cd microsetta-private-api | ||
pybabel extract -F ../babel.cfg -o translations/base.pot . | ||
pybabel update -i translations/base.pot -d translations | ||
``` | ||
|
||
# To generate naive automatic translations | ||
|
||
Please see the `naive_translate.py` script as part of `microsetta-interface` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
# Consent files | ||
|
||
Translated consents are located under `microsetta_private_api/LEGACY/locale_data`. These are `dict`s where the contained keys are used as lookups on render by `microsetta-interface`. | ||
|
||
Their content is sourced by `microsetta_private_api/localization.py`. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
import psycopg2 | ||
import sys | ||
import pandas as pd | ||
import numpy as np | ||
|
||
con = psycopg2.connect(host='localhost', database='ag_test') | ||
cursor = con.cursor() | ||
sid = int(sys.argv[1]) | ||
|
||
sql = """SELECT survey_question_id, | ||
survey_group, | ||
american, | ||
question_shortname, | ||
response, | ||
ag.survey_question_response.display_index | ||
AS response_index | ||
FROM ag.survey_question | ||
LEFT JOIN ag.survey_question_response | ||
USING (survey_question_id) | ||
LEFT JOIN ag.group_questions USING (survey_question_id) | ||
LEFT JOIN ag.surveys USING (survey_group) | ||
WHERE survey_id = %d""" % sid | ||
df = pd.read_sql(sql, con) | ||
|
||
|
||
# sorts so that questions emmulate survey order | ||
df = df.sort_values(by=['survey_group', | ||
'survey_question_id', | ||
'response_index']).drop(columns='survey_group') | ||
df['response_index'] = df['response_index'].apply(lambda x: None if np.isnan(x) else int(x), convert_dtype=False) | ||
df.to_csv(sys.argv[2], sep='\t', index=False, header=True) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,96 @@ | ||
import pandas as pd | ||
import click | ||
from psycopg2 import connect, sql | ||
|
||
_conn = connect(host='localhost', database='ag_test') | ||
_update_question = sql.SQL("UPDATE ag.survey_question " | ||
"SET {lang} = {question} " | ||
"WHERE survey_question_id = {qid};") | ||
_update_response = sql.SQL("UPDATE ag.survey_question_response " | ||
"SET {lang} = {response} " | ||
"WHERE survey_question_id = {qid} AND " | ||
" display_index = {response_index};") | ||
_update_survey_response = sql.SQL("UPDATE ag.survey_response " | ||
"SET {language_field} = {response}, " | ||
"WHERE response = {american_response};") | ||
|
||
|
||
def _format_update_survey_resp(american_response, language_field, response): | ||
fmt = _update_survey_response.format(american_response=sql.Literal(american_response), | ||
language_field=sql.Identifier(language_field), | ||
response=sql.Literal(response)) | ||
|
||
return fmt.as_string(_conn) + '\n' | ||
|
||
def _format_update_question(lang, qid, question): | ||
fmt = _update_question.format(lang=sql.Identifier(lang), | ||
question=sql.Literal(question), | ||
qid=sql.Literal(qid)) | ||
return fmt.as_string(_conn) + '\n' | ||
|
||
|
||
def _format_update_resp(lang, qid, response, response_index): | ||
fmt = _update_response.format(lang=sql.Identifier(lang), | ||
response=sql.Literal(response), | ||
qid=sql.Literal(qid), | ||
response_index=sql.Literal(response_index)) | ||
return fmt.as_string(_conn) + '\n' | ||
|
||
|
||
@click.command() | ||
@click.option('--input', type=click.Path(exists=True), required=True, | ||
help="Input excel spreadsheet w/ translations") | ||
@click.option('--output', type=click.Path(exists=False), required=True, | ||
help="The patch file to write") | ||
@click.option('--lang', type=str, required=True, | ||
help="The name of the language") | ||
def mapper(input, output, lang): | ||
def stripper(x): | ||
if pd.isnull(x): | ||
return None | ||
else: | ||
return str.strip(x) | ||
|
||
# sheet_name=None -> load all sheets | ||
sheets = pd.read_excel(input, dtype=str, sheet_name=None) | ||
|
||
with open(output, 'w') as out: | ||
out.write("ALTER TABLE ag.survey_question\n" | ||
" ADD COLUMN %s varchar;\n" % lang) | ||
out.write("ALTER TABLE ag.survey_question_response\n" | ||
" ADD COLUMN %s varchar;\n" % lang) | ||
|
||
for sheet, df in sheets.items(): | ||
# we use row['american'] as the translated spreadsheets | ||
# did not alter the header | ||
df['american'] = df['american'].apply(stripper) | ||
df['response'] = df['response'].apply(stripper) | ||
|
||
for qid, qblock in df.groupby('survey_question_id'): | ||
i = qblock.iloc[0] | ||
|
||
# we use row['american'] as the translated spreadsheets | ||
# did not alter the header | ||
out.write(_format_update_question(lang, qid, i['american'])) | ||
|
||
if len(qblock) == 1 and not pd.isnull(i['response_index']): | ||
raise ValueError("Unexpected null on qid: %s" % qid) | ||
elif len(qblock) == 1: | ||
# no response to add (e.g., free text) | ||
continue | ||
else: | ||
for row in qblock.itertuples(): | ||
resp = row.response | ||
respix = row.response_index | ||
amer = row.american | ||
# if the response is nan it means the translation | ||
# missed a response | ||
if pd.isnull(resp): | ||
raise ValueError("Null response: %s" % str(row)) | ||
|
||
out.write(_format_update_resp(lang, qid, resp, respix)) | ||
out.write(_format_update_survey_resp(amer, lang, resp)) | ||
|
||
|
||
if __name__ == '__main__': | ||
mapper() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
# Dumping a survey | ||
|
||
To dump a survey, refer to the survey ID (see `select survey_id, american from ag.surveys join ag.survey_group on survey_group=group_order;`). | ||
|
||
```bash | ||
python dump-survey.py <survey_id_number> | ||
``` | ||
|
||
# Constructing the bulk of a language patch | ||
|
||
The majority of the patch for the survey in a new language can be constructed with: | ||
|
||
```bash | ||
python format_language_patches.py --input <your_xls_file> --output <the_patch> --lang <language_name> | ||
``` | ||
|
||
This will not automatically create the survey name and group entries. Please see `microsetta_private_api/db/patches/0081.sql` for an example |