From 3726bf2247868a529e817aad655e0943e4fe0406 Mon Sep 17 00:00:00 2001 From: AndrewIOM Date: Thu, 28 Mar 2024 10:11:51 +0000 Subject: [PATCH] Refactor shrub sample --- samples/3-shrub-nitrogen.fsx | 170 +++++++------------------ samples/bristlecone.fsx | 2 +- samples/components/allometric.fsx | 13 +- src/Bristlecone.Dendro/Plant.fs | 19 ++- tests/Bristlecone.Benchmark/Program.fs | 24 ++-- 5 files changed, 89 insertions(+), 139 deletions(-) diff --git a/samples/3-shrub-nitrogen.fsx b/samples/3-shrub-nitrogen.fsx index 565eef0..ac4e7c7 100644 --- a/samples/3-shrub-nitrogen.fsx +++ b/samples/3-shrub-nitrogen.fsx @@ -3,8 +3,8 @@ // #nuget: Bristlecone //////////////////////////////////////////////////// -/// Plant nitrogen limitation using wood rings -/// and nitrogen isotopes +// Plant nitrogen limitation using wood rings +// and nitrogen isotopes //////////////////////////////////////////////////// (* An example Bristlecone script for working with @@ -140,97 +140,67 @@ let engine = // configuration can find known parameters for a model. If this step fails, there is an // issue with either your model, or the Bristlecone configuration. +let testSettings = + Test.create + |> Test.addNoise (Test.Noise.tryAddNormal "sigma[y]" "N") + |> Test.addNoise (Test.Noise.tryAddNormal "sigma[x]" "bs") + |> Test.addGenerationRules [ + Test.GenerationRules.alwaysMoreThan -3. "N" + Test.GenerationRules.alwaysLessThan 20. "N" + Test.GenerationRules.alwaysMoreThan 0. "bs" + Test.GenerationRules.monotonicallyIncreasing "x" ] // There must be at least 10mm of wood production + |> Test.addStartValues [ + "x", 5.0 + "bs", 5.0 |> Allometric.Proxies.toBiomassMM + "N", 3.64 ] + |> Test.withTimeSeriesLength 30 + |> Test.endWhen (Optimisation.EndConditions.afterIteration 1000) + +let testResult = + hypotheses + |> List.map(fst >> Bristlecone.testModel engine testSettings) + // 5. Load Real Data // ---------------------------- -// Here, we are using the FSharp.Data type provider to read in .csv datasets. +// Here, we are using the Bristlecone.Dendro package to +// read in dendroecological data. +open Bristlecone.Dendro -// 6. Fit Models to Real Data -// ----------------------------------- +let shrubData = + let plants = Data.PlantIndividual.loadRingWidths (__SOURCE_DIRECTORY__ + "/../data/yamal-rw.csv") + let isotopeData = Data.PlantIndividual.loadLocalEnvironmentVariable (__SOURCE_DIRECTORY__ + "/../data/yuribei-d15N-imputed.csv") + plants |> PlantIndividual.zipEnvMany "N" isotopeData -// 7. Calculate model comparison statistics +// 6. Fit Models to Real Data // ----------------------------------- -// TEMP - old script. How to refactor into central libraries? -// ..... - -// Could do with defnining a tree ring type that specifies the ways they work. -// - Tree ring type has transforms between measurements. e.g. ring width to biomass. -// - Can have other measurements for the individual (isotopes etc.). -// - Can have associated climate data (that applies across many individuals). -// Things that need to be able to to: -// - Define start values -// - Load in and add additional measurements / climate data -// Static type: -// - Gen for fake start values - -module DendroPlant = - - type DendroPlant = { - Series: GrowthSeries.GrowthSeries - } - +// 7. Calculate model comparison statistics +// ----------------------------------- - - //2. Test using fake data - //---------------------------- - let startValues = [ code "x", 5.; code "N", 3.64; code "bs", (5. |> ModelComponents.Proxies.toBiomassMM)] |> Map.ofList - let generationRules = - [ code "bs", fun data -> data |> Seq.min > 0. - code "N", fun data -> data |> Seq.min > -3.0 - code "x", fun data -> data |> Seq.pairwise |> Seq.sumBy (fun (a,b) -> b - a) > 10. // There must be at least 10mm of wood production - code "N", fun data -> data |> Seq.max < 20. ] // N must not get to levels above 20 units - - let addNoise p data = - let random = System.Random() - data |> Map.map(fun key value -> - let sigma = - match key with - | k when k = code "N" -> p |> Pool.getEstimate "sigma[y]" - | k when k = code "bs" -> p |> Pool.getEstimate "sigma[x]" - | _ -> invalidOp "No sigma for this!" - let draw = Bristlecone.Statistics.Distributions.Normal.draw random 0. sigma - value |> TimeSeries.map (fun (x,_) -> x + draw())) - - let testResult = - hypotheses - |> Seq.head - |> Bristlecone.testModel Options.engine 30 startValues Options.endWhen generationRules addNoise - - + // 3. Load Real Data and Estimate // ---------------------------- -let shrubs = - let yuribei = Data.PlantIndividual.loadRingWidths (__SOURCE_DIRECTORY__ + "/../data/yamal-rw.csv") - let d15N = Data.PlantIndividual.loadLocalEnvironmentVariable (__SOURCE_DIRECTORY__ + "/../data/yuribei-d15N-imputed.csv") - yuribei - |> Seq.map (fun s -> (s.Identifier.Value, s)) - |> Seq.keyMatch d15N - |> Seq.map (fun (_,plant,d15N) -> PlantIndividual.zipEnv (code "N") plant d15N) - |> Seq.toList - -let getStartValues (startDate:System.DateTime) (plant:PlantIndividual) = +// Define the start values for a model system, as follows: +// initial radius = +let startValues (startDate:System.DateTime) (plant:PlantIndividual.PlantIndividual) = let initialRadius = match plant.Growth with - | RingWidth s -> + | PlantIndividual.PlantGrowth.RingWidth s -> match s with - | Absolute c -> c.Head |> fst |> removeUnit - | Cumulative c -> - let start = (c |> TimeSeries.trimStart (startDate - System.TimeSpan.FromDays(366.))).Values |> Seq.head |> removeUnit - // printfn "Start cumulative growth = %f" start - start - | Relative _ -> invalidOp "Not implemented" - | _ -> invalidOp "Not implemented 2" - let initialMass = initialRadius |> removeUnit |> ModelComponents.Proxies.toBiomassMM + | GrowthSeries.Absolute c -> c.Head |> fst + | _ -> invalidOp "Not applicable" + | _ -> invalidOp "Not applicable" + let initialMass = initialRadius |> ModelComponents.Proxies.toBiomassMM let initialNitrogen = plant.Environment.[code "N"].Head |> fst - [ (code "x", initialRadius) - (code "N", initialNitrogen) - (code "bs", initialMass) ] |> Map.ofList + [ ("x", initialRadius) + ("N", initialNitrogen) + ("bs", initialMass) ] |> Map.ofList let workPackages shrubs hypotheses engine saveDirectory = seq { @@ -239,7 +209,7 @@ let workPackages shrubs hypotheses engine saveDirectory = // 1. Arrange the subject and settings let shrub = s |> PlantIndividual.toCumulativeGrowth let common = shrub |> PlantIndividual.keepCommonYears - let startDate = (common.Environment.[code "N"]).StartDate |> snd + let startDate = (common.Environment.["N"]).StartDate |> snd let startConditions = getStartValues startDate shrub let e = engine |> Bristlecone.withConditioning (Custom startConditions) @@ -259,61 +229,19 @@ let work = workPackages shrubs hypotheses Options.engine Options.resultsDirector let run() = work |> Seq.iter (OrchestrationMessage.StartWorkPackage >> Options.orchestrator.Post) -// Temp -module Temp = - - open Bristlecone.Data - - /// Load an `EstimationResult` that has previously been saved as - /// three seperate dataframes. Results will only be reconstructed - /// when file names and formats are in original Bristlecone format. - let loadAll directory subject (modelSystem:ModelSystem) modelId = - let mles = MLE.load directory subject modelId |> Seq.map(fun (k,v) -> k.ToString(), v) - let series = Series.load directory subject modelId |> Seq.map(fun (k,v) -> k.ToString(), v) - let traces = Trace.load directory subject modelId |> Seq.map(fun (k,v) -> k.ToString(), v) - - printfn "%s %i" subject modelId - - let updateParameter (k:ShortCode) v newParameters = - printfn "New key is %s" k.Value - if k = code "conductivity" then - match newParameters |> Map.tryFind k with - | Some i -> Parameter.setEstimate v i - | None -> Parameter.setEstimate v (newParameters |> Map.find (code "insulation")) - else Parameter.setEstimate v (newParameters |> Map.find k) - - mles - |> Seq.keyMatch series - |> Seq.map(fun (k,v1,v2) -> (k, (v1, v2))) - |> Seq.keyMatch traces - |> Seq.map(fun (k,t,(s,(l,p))) -> - { ResultId = k |> System.Guid.Parse - Likelihood = l - Parameters = modelSystem.Parameters |> Map.map(fun k v -> updateParameter k v p) - Series = s - Trace = t }) - - let saveDiagnostics () = // 1. Get all results sliced by plant and hypothesis let results = - let get subject model modelId = Temp.loadAll Options.resultsDirectory subject.Identifier.Value model modelId + let get subject model modelId = Bristlecone.Data.EstimationResult.loadAll Options.resultsDirectory subject.Identifier.Value model modelId Bristlecone.ModelSelection.ResultSet.arrangeResultSets shrubs hypotheses get // 2. Save convergence statistics to file // NB include only results within 5 likelihood of the minimum (to remove outliers) results - |> Seq.map(fun (x,a,b,c) -> x.Identifier.Value,a,b,c) - |> Seq.toList - // |> List.map(fun (x,a,b,c) -> - // if c.IsSome - // then - // let all = fst c.Value - // let minimum = snd c.Value - // (x,a,b,Some (all |> Seq.where(fun e -> e.Likelihood < (minimum.Likelihood) + 5.), minimum)) - // else (x,a,b,c)) - |> Diagnostics.Convergence.gelmanRubinAll 10000 3 + // |> Seq.map(fun (x,a,b,c) -> x.Identifier.Value,a,b,c) + // |> Seq.toList + |> Diagnostics.Convergence.gelmanRubinAll 10000 |> Data.Convergence.save Options.resultsDirectory // 3. Save Akaike weights to file diff --git a/samples/bristlecone.fsx b/samples/bristlecone.fsx index 9d9ffca..4410edf 100644 --- a/samples/bristlecone.fsx +++ b/samples/bristlecone.fsx @@ -4,4 +4,4 @@ #r "../src/Bristlecone/bin/Debug/netstandard2.0/Microsoft.Research.Oslo.dll" #r "../src/Bristlecone/bin/Debug/netstandard2.0/Bristlecone.dll" -//#r "../src/Bristlecone.Dendro/bin/Debug/netstandard2.0/Bristlecone.Dendro.dll" \ No newline at end of file +#r "../src/Bristlecone.Dendro/bin/Debug/netstandard2.0/Bristlecone.Dendro.dll" \ No newline at end of file diff --git a/samples/components/allometric.fsx b/samples/components/allometric.fsx index 4480e5c..0568638 100644 --- a/samples/components/allometric.fsx +++ b/samples/components/allometric.fsx @@ -3,7 +3,6 @@ module Allometric #load "../bristlecone.fsx" open Bristlecone -open Bristlecone.Language let pi = System.Math.PI @@ -81,6 +80,9 @@ module Götmark2016_ShrubModel = module Allometrics = open Götmark2016_ShrubModel + open Bristlecone.Dendro + + let private removeUnit (x:float<_>) = float x let mass woodDensity volume = volume * woodDensity @@ -88,8 +90,9 @@ module Allometrics = let massToVolume woodDensity mass = mass / woodDensity - let shrubBiomass b a rtip p lmin k5 k6 n woodDensity radius = + let shrubBiomass b a rtip p lmin k5 k6 n woodDensity (radius:float) = radius + |> removeUnit |> NiklasAndSpatz_Allometry.stemLength k5 k6 |> shrubVolume b a rtip p lmin k5 k6 n |> snd |> mass woodDensity @@ -108,9 +111,11 @@ module Allometrics = module Proxies = + open Bristlecone.Dendro + /// Radius in millimetres - let toBiomassMM radiusMM = - radiusMM / 10. |> Allometrics.shrubBiomass Constants.b Constants.a Constants.rtip Constants.p Constants.lmin Constants.k5 Constants.k6 Constants.numberOfStems Constants.salixWoodDensity + let toBiomassMM (radius:float) = + radius / 10. |> Allometrics.shrubBiomass Constants.b Constants.a Constants.rtip Constants.p Constants.lmin Constants.k5 Constants.k6 Constants.numberOfStems Constants.salixWoodDensity /// Biomass in grams. let toRadiusMM biomassGrams = diff --git a/src/Bristlecone.Dendro/Plant.fs b/src/Bristlecone.Dendro/Plant.fs index 8706c91..2ee27fb 100755 --- a/src/Bristlecone.Dendro/Plant.fs +++ b/src/Bristlecone.Dendro/Plant.fs @@ -18,7 +18,6 @@ module EnvironmentalVariables = [] module PlantIndividual = - type PlantGrowth = | RingWidth of GrowthSeries.GrowthSeries | BasalArea of GrowthSeries.GrowthSeries @@ -39,7 +38,19 @@ module PlantIndividual = float x let zipEnv envName envData (plant:PlantIndividual) = - { plant with Environment = plant.Environment.Add (envName, envData) } + let code = ShortCode.create envName + match code with + | None -> failwith "%s is not a valid environment code." + | Some c -> { plant with Environment = plant.Environment.Add (c, envData) } + + /// Assigns local environmental conditions to each plant in a sequence, + /// given a sequence of environmental time-series where each time-series + /// has the code of the plant associated with it. + let zipEnvMany envName (env:(string * TimeSeries.TimeSeries) seq) plants = + plants + |> Seq.map (fun s -> (s.Identifier.Value, s)) + |> Seq.keyMatch env + |> Seq.map (fun (_,plant,e) -> zipEnv envName plant e) let growthSeries plant = match plant with @@ -94,10 +105,12 @@ module PlantIndividual = |> List.unzip let startDate = startDates |> List.map snd |> List.max let endDate = endDates |> List.min - let mapts f = TimeSeries.map f { plant with Environment = plant.Environment |> Map.toList |> List.map (fun (x,y) -> x,y |> TimeSeries.bound startDate endDate |> Option.get) |> Map.ofList Growth = plant.Growth |> growthSeries |> GrowthSeries.growthToTime |> TimeSeries.bound startDate endDate |> Option.get |> TimeSeries.map (fun (x,t) -> x * 1.) |> GrowthSeries.Absolute |> RingWidth } + /// Where a plant has associated environmental data, discard the beginning + /// or end of the growth and environment time-series where not all data + /// are present. let keepCommonYears (plant:PlantIndividual) = let allSeries = let response = plant.Growth |> growthSeries |> GrowthSeries.growthToTime diff --git a/tests/Bristlecone.Benchmark/Program.fs b/tests/Bristlecone.Benchmark/Program.fs index 972492d..c49baae 100755 --- a/tests/Bristlecone.Benchmark/Program.fs +++ b/tests/Bristlecone.Benchmark/Program.fs @@ -35,16 +35,18 @@ module TestSuite = let twoDim f (x: float[]) = f x.[0] x.[1] let oneDim f (x: float[]) = f x.[0] + let multidim f (x: float[]) = f x // Functions, input domain, global minimum, and global minimum point(s) let fixedDimension = - [ "Bukin Sixth", bukinSixth |> twoDim, [ (-15., -5.); (-3., 3.) ], 0., [ [ -10.; 1. ] ] + [ "Ackley (2D)", ackley |> multidim, [ (-32.768, 32.768); (-32.768, 32.768) ], 0., [[0.; 0.]] + "Bukin Sixth", bukinSixth |> twoDim, [ (-15., -5.); (-3., 3.) ], 0., [ [ -10.; 1. ] ] "Holder Table", holderTable |> twoDim, [ (-10., 10.); (-10., 10.) ], -19.20850257, [[8.05502; 9.66459]; [8.05502; -9.66459]; [-8.05502; 9.66459]; [-8.05502; -9.66459]] "Cross in tray", crossInTray |> twoDim, [ (-10., 10.); (-10., 10.) ], -2.06261185, [[1.3491; -1.3491]; [1.3491; 1.3491]; [-1.3491; 1.3491]; [-1.3491; -1.3491]] - // "Dropwave", dropWave |> twoDim, [ 1 .. Config.startPointCount ] |> List.map(fun _ -> twoDimension -5.12 5.12), -1., [[0.;0.]] - // "Eggholder", eggHolder |> twoDim, [ 1 .. Config.startPointCount ] |> List.map(fun _ -> twoDimension -5.12 5.12), -959.6406627, [[512.; 404.2319]] - // "Gramarcy-Lee", gramacyLee |> oneDim, [ 1 .. Config.startPointCount ] |> List.map(fun _ -> [between 0.5 2.5]), -0.869011134989500, [[0.548563444114526]] - // "Langermann", langermann |> twoDim, [(0., 10.); (0., 10.)], -5.1621259, [[2.00299219; 1.006096]] + // "Dropwave", dropWave |> twoDim, [ (-512., 512.) ], 1., [[0.;0.]] + // "Eggholder", eggHolder |> twoDim, [ (-512., 512.) ], -959.6406627, [[512.; 404.2319]] + // "Gramarcy-Lee", gramacyLee |> oneDim, [ (0.5, 2.5) ], -0.869011134989500, [[0.548563444114526]] + // "Langermann", langermann |> twoDim, [(0., 10.); (0., 10.)], -5.1621259, [[2.00299219; 1.006096]] ] // let nDimensional = [ @@ -194,14 +196,16 @@ module TimeSeriesTests = MillisecondsMedian = successes |> List.map (snd >> float) |> median } - let engine optimise = + let engine optimise isAmoeba = Bristlecone.Bristlecone.mkContinuous |> Bristlecone.Bristlecone.withCustomOptimisation optimise + |> (fun b -> if isAmoeba then { b with Constrain = Bristlecone.Parameter.ConstraintMode.Transform } else b) let runTimeSeriesTests timeModels optimFunctions = List.allPairs optimFunctions timeModels - |> List.map(fun ((optimName, optimise), (modelName, modelFn, startValues)) -> - runReplicated Config.startPointCount (fun () -> Bristlecone.Bristlecone.testModel (engine optimise) settings modelFn) + |> List.map(fun ((optimName: string, optimise), (modelName, modelFn, startValues)) -> + let isAmoeba = optimName.Contains("amoeba") + runReplicated Config.startPointCount (fun () -> Bristlecone.Bristlecone.testModel (engine optimise isAmoeba) settings modelFn) |> summarise modelName optimName ) @@ -220,7 +224,7 @@ let annealSettings = EndConditions.afterIteration 10000 x) } let optimFunctions = - [ //"amoeba single", Amoeba.Solver.solve Amoeba.Solver.Default + [ "amoeba single", Amoeba.Solver.solve Amoeba.Solver.Default // "amoeba swarm", Amoeba.swarm 5 20 Amoeba.Solver.Default // "anneal classic", MonteCarlo.SimulatedAnnealing.classicalSimulatedAnnealing 0.01 false annealSettings // "anneal cauchy", MonteCarlo.SimulatedAnnealing.fastSimulatedAnnealing 0.01 false annealSettings @@ -228,7 +232,7 @@ let optimFunctions = // "automatic MCMC", MonteCarlo.``Automatic (Adaptive Diagnostics)`` // "metropolis-gibbs", MonteCarlo.``Metropolis-within Gibbs`` // "adaptive metropolis", MonteCarlo.adaptiveMetropolis 0.250 500 - "random walk MCMC", MonteCarlo.randomWalk [] + // "random walk MCMC", MonteCarlo.randomWalk [] // "random walk w/ tuning", MonteCarlo.randomWalk [ MonteCarlo.TuneMethod.CovarianceWithScale 0.25, 500, EndConditions.afterIteration 10000 ] ]