diff --git a/Project.toml b/Project.toml index 39710df..f1f6bef 100644 --- a/Project.toml +++ b/Project.toml @@ -1,18 +1,20 @@ name = "JSONFBCModels" uuid = "475c1105-d6ed-49c1-9b32-c11adca6d3e8" -authors = ["Mirek Kratochvil "] -version = "0.1.0" +authors = ["The authors of JSONFBCModels.jl"] +version = "0.1.1" [deps] AbstractFBCModels = "5a4f3dfa-1789-40f8-8221-69268c29937c" DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae" JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" +PikaParser = "3bbf5609-3e7b-44cd-8549-7c69f321e792" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" [compat] AbstractFBCModels = "0.1, 0.2" DocStringExtensions = "0.8, 0.9" JSON = "0.21" +PikaParser = "0.6" SparseArrays = "1" Test = "1" julia = "1" diff --git a/src/JSONFBCModels.jl b/src/JSONFBCModels.jl index c626204..a1ccda8 100644 --- a/src/JSONFBCModels.jl +++ b/src/JSONFBCModels.jl @@ -12,5 +12,6 @@ include("constants.jl") include("interface.jl") include("io.jl") include("utils.jl") +include("grr_utils.jl") end diff --git a/src/grr_utils.jl b/src/grr_utils.jl new file mode 100644 index 0000000..cfa6dec --- /dev/null +++ b/src/grr_utils.jl @@ -0,0 +1,186 @@ + +import PikaParser as PP + +""" +`PikaParser.jl` grammar for stringy GRR expressions. +""" +const grr_grammar = begin + # characters that typically form the identifiers + isident(x::Char) = + isletter(x) || + isdigit(x) || + x == '_' || + x == '-' || + x == ':' || + x == '.' || + x == '\'' || + x == '[' || + x == ']' + + # scanner helpers + eat(p) = m -> begin + last = 0 + for i in eachindex(m) + p(m[i]) || break + last = i + end + last + end + + # eat one of keywords + kws(w...) = m -> begin + last = eat(isident)(m) + m[begin:last] in w ? last : 0 + end + + PP.make_grammar( + [:expr], + PP.flatten( + Dict( + :space => PP.first(PP.scan(eat(isspace)), PP.epsilon), + :id => PP.scan(eat(isident)), + :orop => + PP.first(PP.tokens("||"), PP.token('|'), PP.scan(kws("OR", "or"))), + :andop => PP.first( + PP.tokens("&&"), + PP.token('&'), + PP.scan(kws("AND", "and")), + ), + :expr => PP.seq(:space, :orexpr, :space, PP.end_of_input), + :orexpr => PP.first( + :or => PP.seq(:andexpr, :space, :orop, :space, :orexpr), + :andexpr, + ), + :andexpr => PP.first( + :and => PP.seq(:baseexpr, :space, :andop, :space, :andexpr), + :baseexpr, + ), + :baseexpr => PP.first( + :id, + :parenexpr => PP.seq( + PP.token('('), + :space, + :orexpr, + :space, + PP.token(')'), + ), + ), + ), + Char, + ), + ) +end + +grr_grammar_open(m, _) = + m.rule == :expr ? Bool[0, 1, 0, 0] : + m.rule == :parenexpr ? Bool[0, 0, 1, 0, 0] : + m.rule in [:or, :and] ? Bool[1, 0, 0, 0, 1] : + m.rule in [:andexpr, :orexpr, :notexpr, :baseexpr] ? Bool[1] : + (false for _ in m.submatches) + +grr_grammar_fold(m, _, subvals) = + m.rule == :id ? Expr(:call, :gene, String(m.view)) : + m.rule == :and ? Expr(:call, :and, subvals[1], subvals[5]) : + m.rule == :or ? Expr(:call, :or, subvals[1], subvals[5]) : + m.rule == :parenexpr ? subvals[3] : + m.rule == :expr ? subvals[2] : isempty(subvals) ? nothing : subvals[1] + +""" +$(TYPEDSIGNATURES) + +Parses a JSON-ish data reference to a `Expr`-typed gene association. Contains +"calls" to `gene`, `and` and `or` functions that describe the association. +""" +function parse_gene_association(str::String)::Maybe{Expr} + all(isspace, str) && return nothing + tree = PP.parse_lex(grr_grammar, str) + match = PP.find_match_at!(tree, :expr, 1) + match > 0 || throw(DomainError(str, "cannot parse GRR")) + PP.traverse_match(tree, match, open = grr_grammar_open, fold = grr_grammar_fold) +end + +""" +$(TYPEDSIGNATURES) + +Evaluate the gene association expression with the reference values given by the +`val` function. +""" +function eval_gene_association(ga::Expr, val::Function)::Bool + (ga.head == :call && length(ga.args) >= 2) || + throw(DomainError(ga, "invalid gene association expr")) + if ga.args[1] == :gene && length(ga.args) == 2 + val(ga.args[2]) + elseif ga.args[1] == :and + all(eval_gene_association.(ga.args[2:end], Ref(val))) + elseif ga.args[1] == :or + any(eval_gene_association.(ga.args[2:end], Ref(val))) + else + throw(DomainError(ga, "unsupported gene association function")) + end +end + +""" +$(TYPEDSIGNATURES) + +A helper for producing predictable unique sequences. Might be faster if +compacting would be done directly in sort(). +""" +function sortunique(x) + o = collect(x) + sort!(o) + put = prevind(o, firstindex(o)) + for i in eachindex(o) + if put >= firstindex(o) && o[i] == o[put] + # we already have this one + continue + else + put = nextind(o, put) + if put != i + o[put] = o[i] + end + end + end + o[begin:put] +end + +""" +$(TYPEDSIGNATURES) + +Convert the given gene association expression to DNF. +""" +function flatten_gene_association(ga::Expr)::A.GeneAssociationDNF + function fold_and(dnfs::Vector{Vector{Vector{String}}})::Vector{Vector{String}} + if isempty(dnfs) + [String[]] + else + sortunique( + sortunique(String[l; r]) for l in dnfs[1] for r in fold_and(dnfs[2:end]) + ) + end + end + + (ga.head == :call && length(ga.args) >= 2) || + throw(DomainError(ga, "invalid gene association expr")) + if ga.args[1] == :gene && length(ga.args) == 2 + [[ga.args[2]]] + elseif ga.args[1] == :and + fold_and(flatten_gene_association.(ga.args[2:end])) + elseif ga.args[1] == :or + sortunique(vcat(flatten_gene_association.(ga.args[2:end])...)) + else + throw(DomainError(ga, "unsupported gene association function")) + end +end + +""" +$(TYPEDSIGNATURES) + +Formats a DNF gene association as a `String`. +""" +function format_gene_association_dnf( + grr::A.GeneAssociationDNF; + and = " && ", + or = " || ", +)::String + return join(("(" * join(gr, and) * ")" for gr in grr), or) +end diff --git a/src/interface.jl b/src/interface.jl index 10c2299..d9c128d 100644 --- a/src/interface.jl +++ b/src/interface.jl @@ -90,12 +90,25 @@ A.objective(model::JSONFBCModel) = sparsevec( Float64[float(get(rxn, "objective_coefficient", 0.0)) for rxn in model.reactions], ) -A.reaction_gene_products_available(model::JSONFBCModel, rid::String, available::Function) = - A.reaction_gene_products_available_from_dnf(model, rid, available) - -A.reaction_gene_association_dnf(model::JSONFBCModel, rid::String) = parse_grr( - get(model.reactions[model.reaction_index[rid]], "gene_reaction_rule", nothing), +function A.reaction_gene_products_available( + model::JSONFBCModel, + rid::String, + available::Function, ) + x = get(model.reactions[model.reaction_index[rid]], "gene_reaction_rule", nothing) + isnothing(x) && return nothing + x = parse_gene_association(x) + isnothing(x) && return nothing + eval_gene_association(x, available) +end + +function A.reaction_gene_association_dnf(model::JSONFBCModel, rid::String) + x = get(model.reactions[model.reaction_index[rid]], "gene_reaction_rule", nothing) + isnothing(x) && return nothing + x = parse_gene_association(x) + isnothing(x) && return nothing + flatten_gene_association(x) +end A.metabolite_formula(model::JSONFBCModel, mid::String) = parse_formula(get(model.metabolites[model.metabolite_index[mid]], "formula", nothing)) @@ -188,7 +201,7 @@ function Base.convert(::Type{JSONFBCModel}, mm::A.AbstractFBCModel) grr = A.reaction_gene_association_dnf(mm, rid) if !isnothing(grr) - res["gene_reaction_rule"] = unparse_grr(grr) + res["gene_reaction_rule"] = format_gene_association_dnf(grr) end res["lower_bound"] = lbs[ri] diff --git a/src/utils.jl b/src/utils.jl index 08b01c5..ec052ef 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -5,20 +5,6 @@ extract_json_metabolite_id(m, i) = string(get(m, "id", "met$i")) extract_json_gene_id(g, i) = string(get(g, "id", "gene$i")) -function parse_grr(str::Maybe{String}) - isnothing(str) && return nothing - isempty(str) && return nothing - - dnf = A.GeneAssociationDNF() - for isozyme in string.(split(str, " or ")) - push!( - dnf, - string.(split(replace(isozyme, "(" => "", ")" => "", " and " => " "), " ")), - ) - end - return dnf -end - function parse_formula(x::Maybe{String}) isnothing(x) && return nothing x == "" && return nothing @@ -66,8 +52,3 @@ function unparse_formula(x::Maybe{A.MetaboliteFormula}) ks = sort(collect(keys(x))) join(k * string(x[k]) for k in ks) end - -function unparse_grr(xs::Maybe{A.GeneAssociationDNF}) - isnothing(xs) && return nothing - join((join(x, " and ") for x in xs), " or ") -end diff --git a/test/misc.jl b/test/misc.jl index 0629410..5a0c9a2 100644 --- a/test/misc.jl +++ b/test/misc.jl @@ -13,7 +13,8 @@ end @testset "Corner cases" begin - import JSONFBCModels: parse_charge + import JSONFBCModels: + eval_gene_association, flatten_gene_association, parse_charge, sortunique @test parse_charge(1) == 1 @test parse_charge(2.0) == 2 @@ -22,4 +23,7 @@ end @test parse_charge(nothing) == nothing @test_throws ArgumentError parse_charge("totally positive charge") @test_throws DomainError parse_charge(["very charged"]) + @test_throws DomainError eval_gene_association(:(xor(gene("a"), gene("b"))), _ -> false) + @test_throws DomainError flatten_gene_association(:(xor(gene("a"), gene("b")))) + @test sortunique([3, 2, 2, 1]) == [1, 2, 3] end