Skip to content

Commit

Permalink
Merge pull request #30 from aaowens/fixdox
Browse files Browse the repository at this point in the history
Fix warnings
  • Loading branch information
aaowens authored Apr 8, 2020
2 parents ccacc52 + adeb9f1 commit 2f6d417
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 12 deletions.
2 changes: 1 addition & 1 deletion src/process_codebook.jl
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
AbstractTrees.children(x::AbstractXMLNode) = collect(child_elements(x));
AbstractTrees.printnode(io::IO, x::AbstractXMLNode) = print(io, name(x));

"Processes XML codebook tree into a usable JSON table"
#Processes XML codebook tree into a usable JSON table
function process_codebook()
xdoc = parse_file("J265684_codebook.xml");
r = root(xdoc);
Expand Down
22 changes: 11 additions & 11 deletions src/use_codebook.jl
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
"Transforms a string like \"This time in 1996\" to \"This time in YEAR\""
#Transforms a string like \"This time in 1996\" to \"This time in YEAR\"
function year2year(s)
rtest = r"(19|20)\d{2}"
replace(s, rtest => "YEAR")
end
"If x or y are supersets of each other, keep the superset. Otherwise OR them"
#If x or y are supersets of each other, keep the superset. Otherwise OR them
function checkerror(x, y)
x, y = year2year(x), year2year(y)
if x == y
Expand All @@ -19,15 +19,15 @@ end

dropY(s) = parse(Int, match(r"(19|20)\d{2}", s).match)

"Check if this label describes a missing value code"
#Check if this label describes a missing value code
function checkmissing(s)
for r in (r"NA", r"DK", r"Inap.", r"Wild code", r"Missing")
occursin(r, s) && return true
end
return false
end

"Check if this is a continuous variable"
#Check if this is a continuous variable
function iscontinuous(k)
for key in k
out = tryparse(Float64, key)
Expand All @@ -40,7 +40,7 @@ end

dropcomma(s) = String([c for c in s if !(c == ',')])

"Try to parse this value as a float"
#Try to parse this value as a float
function parse2(s, v)
out = tryparse(Float64, dropcomma(s))
# if this isn't a Float, maybe it was a range "-89.0 - -0.4"
Expand All @@ -54,15 +54,15 @@ function parse2(s, v)
end
narrowtypes(A) = [a for a in A]

```
"""
Inputs:
name: The variable ID we want to match
var2ind_dict: The crosswalk table
df_vars: The data
codebook_df: The codebook table
fastfind: Dict mapping from variable IDs to their index in the codebook
Processes a variable ID, finds all years thats match, and collects the labels
```
"""
function process_varname(name, var2ind_dict, df_vars, codebook_df, fastfind)
## Find the row in the crosswalk we can find this variable in
myrow = var2ind_dict[name]
Expand All @@ -84,13 +84,13 @@ end



```
"""
Sometimes the labels uses a comma in one year and a semicolon in another,
but are otherwise identical.
This function parses the different labels and drops these duplicates.
It also keeps only labels which are unique after cleaning, and constructs
a label which is a union of the parts (A OR B OR C)
```
"""
function trimlabel(s)
sp = strip.(split(s, "PSIDOR"))
# find common substrings
Expand Down Expand Up @@ -120,11 +120,11 @@ function trimlabel(s)
return reduce((x, y) -> "$x OR $y", newsp[2:end], init = newsp[1])
end
end
```
"""
Processes input JSON file
Reads the crosswalk and codebook table from disk and
harmonizes the labels. Constructs the output JSON
```
"""
function process_input(inputjson)
@assert last(splitext(inputjson)) == ".json"
codebook_json = jsontable(read("output/codebook.json", String));
Expand Down

0 comments on commit 2f6d417

Please sign in to comment.