Skip to content

Commit

Permalink
Merge pull request #25 from DenisTitovLab/add-plotting-functions
Browse files Browse the repository at this point in the history
add plot_fit_on_data() function and tests
  • Loading branch information
Denis-Titov authored Jun 14, 2024
2 parents b0881b8 + 9657ac2 commit 2834af3
Show file tree
Hide file tree
Showing 5 changed files with 261 additions and 0 deletions.
3 changes: 3 additions & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,19 @@ version = "0.1.0"
[deps]
CMAEvolutionStrategy = "8d3b24bd-414e-49e0-94fb-163cc3a3e411"
CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0"
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"
Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
Symbolics = "0c5d862f-8b57-4792-8d23-62f2024744c7"
TestEnv = "1e6cf692-eddd-4d53-88a5-2d735e33781b"

[compat]
CMAEvolutionStrategy = "0.2.6"
CSV = "0.10"
CairoMakie = "0.11"
DataFrames = "1.6"
Statistics = "1.6"
Symbolics = "5.25"
Expand Down
1 change: 1 addition & 0 deletions src/DataDrivenEnzymeRateEqs.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ include("qssa_general_rate_equation_derivation.jl")
include("rate_equation_fitting.jl")
include("data_driven_rate_equation_selection.jl")
include("helper_functions.jl")
include("plotting_functions.jl")

export @derive_general_mwc_rate_eq
export @derive_general_qssa_rate_eq
Expand Down
216 changes: 216 additions & 0 deletions src/plotting_functions.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,216 @@
using CairoMakie, CSV, DataFrames, Printf, Statistics

"""
plot_fit_on_data(
rate_equation,
nt_kinetic_params,
metab_names,
data;
enzyme_name::String = "",
num_col::Int = 5,
scaler = 4.0
)
Plot the fit of `rate_equation` with `nt_kinetic_params` on `data`.
# Arguments
- `rate_equation::Function`: Function that calculates the rate of the enzyme.
- `nt_kinetic_params::NamedTuple`: NamedTuple containing the kinetic parameters of the enzyme.
- `metab_names::Vector{Symbol}`: Vector of metabolite names.
- `data::DataFrame`: DataFrame containing the data to plot.
# Optional Arguments
- `enzyme_name::String = ""`: Name of the enzyme to be displayed on the plot.
- `num_col::Int = 5`: Number of columns in the plot.
- `scaler::Float = 4.0`: Scaling factor for the plot.
# Returns
- `fig::Figure`: Figure containing the plot.
"""
function plot_fit_on_data(
rate_equation,
nt_kinetic_params,
metab_names,
data;
enzyme_name::String = "",
num_col::Int = 5,
scaler = 4.0
)
fontsize = scaler * 5
markersize = scaler * 3
linewidth = scaler * 1
set_theme!(
Theme(
fontsize = fontsize,
Axis = (
titlefont = :regular,
xticksize = scaler * 1,
yticksize = scaler * 1,
xlabelfont = :bold,
ylabelfont = :bold,
yticklabelpad = scaler * 1,
ylabelpadding = scaler * 3
),
Legend = (titlefont = :regular,)
),
)
num_rows = ceil(Int, length(unique(data.source)) / num_col)
size_inches = scaler .* (7, num_rows)
size_pt = 72 .* size_inches

fig = Figure(size = size_pt)
for (i, figure) in enumerate(unique(data.source))
grid_layout = fig[i % num_col == 0 ? i ÷ num_col : 1 + i ÷ num_col, i % num_col == 0 ? num_col : i % num_col] = GridLayout()

fig_data_points = data[data.source .== figure, :]
#assert that all data points in the figure have the same X_axis_label
@assert all(fig_data_points[:, :X_axis_label] .==
fig_data_points[:, :X_axis_label][1])

x_axis_metabolite = fig_data_points[:, :X_axis_label][1]
other_metabolites = setdiff(metab_names, [Symbol(x_axis_metabolite)])
fig_Vmax_for_fit = exp(mean(-DataDrivenEnzymeRateEqs.log_ratio_predict_vs_data(
rate_equation,
Tables.columntable(fig_data_points),
nt_kinetic_params
)))
#make Axis for plotting
ax = Axis(
grid_layout[1, 1],
title = replace(figure, "_" => " "),
xticks = LinearTicks(3),
yticks = LinearTicks(3),
limits = begin
maximum(fig_data_points.Rate) > 0.0 ?
(nothing, (-0.05 * maximum(fig_data_points.Rate), nothing)) :
(nothing, (nothing, -0.05 * minimum(fig_data_points.Rate)))
end,
ytickformat = ys -> ["$(round(x/maximum(abs.(ys)), sigdigits=1))" for x in ys],
xtickformat = xs -> ["$(round(x/1e-3, sigdigits=2))" for x in xs],
yticklabelrotation = π / 2,
xlabelpadding = scaler * 0,
ylabelpadding = scaler * 0,
xticklabelpad = 0,
yticklabelpad = 0,
xlabel = crop_to_vowel_end(string(x_axis_metabolite), 5) * ", mM",
ylabel = begin
i % num_col == 1 ? "$(enzyme_name) Rate" : ""
end
)
#find unque and changing metabolites concs in fig_data_points except x_axis_metabolite
const_metab_concs = [metab
for metab in other_metabolites
if length(unique(fig_data_points[!, metab])) == 1 &&
unique(fig_data_points[!, metab])[1] != 0.0]
changing_metab_concs = [metab
for metab in other_metabolites
if length(unique(fig_data_points[!, metab])) > 1]
#loop over unique values of metabolites other than x_axis_metabolite and plot data and fit
for (i, unique_metab) in enumerate(eachrow(unique(fig_data_points[
:, other_metabolites])))

#get data points for the unique_metab
data_for_scatter = filter(
row -> all([row[metab] == unique_metab[metab]
for metab in other_metabolites]),
fig_data_points)
#get the fit for the unique_metab using nt_kinetic_params
function fit(x)
fig_Vmax_for_fit * rate_equation(
merge((; Symbol(x_axis_metabolite) => x,), NamedTuple(unique_metab)),
nt_kinetic_params)
end
#make a label of changing metabolites conc
metab_conc_label = join(
[unit_conc_convertion(unique_metab[metab])
for metab in changing_metab_concs],
", "
)
#plot data and fit
scatter!(ax, data_for_scatter[!, x_axis_metabolite],
data_for_scatter.Rate, markersize = markersize, label = metab_conc_label)
lines!(ax, 0 .. maximum(fig_data_points[!, x_axis_metabolite]),
x -> fit(x), linewidth = linewidth, label = metab_conc_label)
end
#make legend title
legend_title = begin
str = ""
for (i, metab) in enumerate(const_metab_concs)
str = str * crop_to_vowel_end(string(metab), 5) * "=" *
unit_conc_convertion(unique(fig_data_points[!, metab])[1])
str = str * "\n"
end
for (i, metab) in enumerate(changing_metab_concs)
str = str * crop_to_vowel_end(string(metab), 5) * ""
if i < length(changing_metab_concs)
str = str * ", "
end
end
str
end
leg = Legend(grid_layout[1, 2],
ax,
legend_title,
merge = true,
unique = true,
labelsize = fontsize,
titlesize = fontsize,
patchsize = scaler .* (6.0f0, 5.0f0),
patchlabelgap = scaler * 2,
padding = scaler .* (0.5f0, 0.0f0, 0.0f0, 0.0f0),
framevisible = false,
rowgap = 0,
titlegap = 0,
titlevalign = :top,
titlehalign = :left,
valign = :top,
halign = :left
)
colgap!(grid_layout, 1)
end
colgap!(fig.layout, scaler * 5)
rowgap!(fig.layout, scaler * 5)
fig
end

function crop_to_vowel_end(str, max_length::Int = 5)
vowels = Set(['a', 'e', 'i', 'o', 'u', 'y', 'A', 'E', 'I', 'O', 'U', 'Y'])
if length(str) > max_length
str = str[1:max_length]
end
while length(str) > 0 && (last(str) in vowels)
str = str[1:(end - 1)]
end
return str
end

function unit_conc_convertion(conc::Number)
if conc == 0.0
str = "––––"
elseif conc >= 0.1
str = @sprintf("%.2f", round(conc, sigdigits = 2)) * "M"
elseif conc >= 10e-3
str = @sprintf("%.0f", round(conc, sigdigits = 2)/1e-3) * "mM"
elseif conc >= 1e-3
str = @sprintf("%.1f", round(conc, sigdigits = 2)/1e-3) * "mM"
elseif conc >= 100e-6
str = @sprintf("%.0f", round(conc, sigdigits = 2)/1e-6) * "µM"
elseif conc >= 10e-6
str = @sprintf("%.0f", round(conc, sigdigits = 2)/1e-6) * "µM"
elseif conc >= 1e-6
str = @sprintf("%.1f", round(conc, sigdigits = 2)/1e-6) * "µM"
elseif conc >= 100e-9
str = @sprintf("%.0f", round(conc, sigdigits = 2)/1e-9) * "nM"
elseif conc >= 10e-9
str = @sprintf("%.0f", round(conc, sigdigits = 2)/1e-9) * "nM"
elseif conc >= 1e-9
str = @sprintf("%.1f", round(conc, sigdigits = 2)/1e-9) * "nM"
elseif conc >= 0.1e-9
str = @sprintf("%.0f", round(conc, sigdigits = 2)/1e-12) * "pM"
elseif conc == 0.0
str = "0.0 "
else
str = "Number Out of Scale"
end
return str
end
3 changes: 3 additions & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ using SafeTestsets
@safetestset "MWC Rate Eq Fitting" begin
include("tests_for_rate_eq_fitting.jl")
end
@safetestset "Plotting" begin
include("tests_for_plotting.jl")
end
@safetestset "MWC Rate Eq Subset Selection" begin
include("tests_for_optimal_rate_eq_selection.jl")
end
Expand Down
38 changes: 38 additions & 0 deletions test/tests_for_plotting.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# using TestEnv
# TestEnv.activate()

##
using CairoMakie, DataDrivenEnzymeRateEqs, CSV, DataFrames, Printf, Statistics, Test

#Test the loss_rate_equation speed using PKM2 enzyme data
PKM2_enzyme = (;
substrates=[:PEP, :ADP],
products=[:Pyruvate, :ATP],
regulators=[:F16BP, :Phenylalanine],
Keq=20_000.0,
oligomeric_state=4,
rate_equation_name=:pkm2_rate_equation
)
metab_names, param_names = @derive_general_mwc_rate_eq(PKM2_enzyme)
pkm2_rate_equation_no_Keq(metabs, p) = pkm2_rate_equation(metabs, p, 20000.0)
#Load and process data
PKM2_data_for_fit = CSV.read(joinpath(@__DIR__, "Data_for_tests/PKM2_data.csv"), DataFrame)
#Add source column that uniquely identifies a figure from publication
PKM2_data_for_fit.source = PKM2_data_for_fit.Article .* "_" .* PKM2_data_for_fit.Fig
data = PKM2_data_for_fit
#get kinetic_params from fitting results
nt_kinetic_params = (L=21.20386803833623, Vmax_a=0.6621530787152484, Vmax_i=0.6621530787152484, K_a_PEP=0.00016232173554865876, K_i_PEP=0.007270929135951147, K_a_ADP=0.00020861163680429678, K_i_ADP=0.00020861163680429678, K_a_Pyruvate=0.018115663373606324, K_i_Pyruvate=0.018115663373606324, K_a_ATP=0.00019564943473363028, K_i_ATP=0.00019564943473363028, K_a_F16BP_reg=2.000165919689415e-7, K_i_F16BP_reg=Inf, K_a_Phenylalanine_reg=Inf, K_i_Phenylalanine_reg=0.00022505058445537455, alpha_PEP_Pyruvate=0.0, alpha_PEP_ATP=0.0, alpha_PEP_F16BP=1.0, alpha_PEP_Phenylalanine=1.0, alpha_ADP_Pyruvate=1.0, alpha_ADP_ATP=0.0, alpha_ADP_F16BP=1.0, alpha_ADP_Phenylalanine=1.0, alpha_Pyruvate_F16BP=1.0, alpha_Pyruvate_Phenylalanine=1.0, alpha_ATP_F16BP=1.0, alpha_ATP_Phenylalanine=1.0, alpha_F16BP_Phenylalanine=1.0)

plot_of_fit = DataDrivenEnzymeRateEqs.plot_fit_on_data(
pkm2_rate_equation_no_Keq,
nt_kinetic_params,
metab_names,
data;
enzyme_name="PKM2",
num_col=5,
scaler=4.0
)

@test plot_of_fit isa Figure
@test !isempty(plot_of_fit.content)
@test length(plot_of_fit.content) == 56

0 comments on commit 2834af3

Please sign in to comment.