-
-
Notifications
You must be signed in to change notification settings - Fork 154
/
Copy pathgenerate.sh
27 lines (22 loc) · 807 Bytes
/
generate.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
#!/usr/bin/env bash
# GENERATE STATS.
INPUT_DIR="/Users/premy/Downloads/curate"
TMP_DIR="/tmp/generate"
TMP_DIR2="/tmp/generate2"
MAX_NAMES="1000"
DEBUG="--debug"
#DEBUG=""
rm -rf ${TMP_DIR} ${TMP_DIR2}
# one by one because it's too memory intensive.
for option in "first_by_country" "last_by_country" "country_by_first" "country_by_last" "gender_by_first"
do
python generate_stats.py --input_dir ${INPUT_DIR} \
--output_dir ${TMP_DIR} \
--option ${option} \
${DEBUG}
done
# FILTER NAMES.
python filter_records.py --input_dir ${TMP_DIR} \
--output_dir ${TMP_DIR2} \
--trunc_first_names_count ${MAX_NAMES} \
--trunc_last_names_count ${MAX_NAMES}