diff --git a/eval.sh b/eval.sh index f394b7ba..23ddd297 100644 --- a/eval.sh +++ b/eval.sh @@ -8,4 +8,6 @@ # julia --project qc/benchmarks/tool.jl -f "LangSiblingDerivedGenerator{STLC}(Main.Expr.t,Pair{Type,Integer}[Main.Expr.t=>2,Main.Typ.t=>1],2,3)" "Pair{SpecEntropy{STLC},Float64}[SpecEntropy{STLC}(2,200,wellTyped)=>0.3]" "2" "0.1" # current command for unif types: -(cd $DICEREPO && julia --project $TOOL -f "LangSiblingDerivedGenerator{STLC}(Main.Expr.t,Pair{Type,Integer}[Main.Expr.t=>5,Main.Typ.t=>2],2,3)" "Pair{FeatureSpecEntropy{STLC},Float64}[FeatureSpecEntropy{STLC}(2,200,wellTyped,typecheck_ft)=>0.3]" "2000" "0.1") \ No newline at end of file +(cd $DICEREPO && julia --project $TOOL -f "LangSiblingDerivedGenerator{STLC}(Main.Expr.t,Pair{Type,Integer}[Main.Expr.t=>5,Main.Typ.t=>2],2,3)" "Pair{FeatureSpecEntropy{STLC},Float64}[FeatureSpecEntropy{STLC}(2,200,wellTyped,typecheck_ft,false)=>0.3]" "2000" "0.1") + +(cd $DICEREPO && julia --project $TOOL -f "LangSiblingDerivedGenerator{STLC}(Main.Expr.t,Pair{Type,Integer}[Main.Expr.t=>5,Main.Typ.t=>2],2,3)" "Pair{FeatureSpecEntropy{STLC},Float64}[FeatureSpecEntropy{STLC}(2,200,wellTyped,typecheck_ft,true)=>0.3]" "2000" "0.1") \ No newline at end of file diff --git a/qc/benchmarks/benchmarks.jl b/qc/benchmarks/benchmarks.jl index 98b9438d..de198d16 100644 --- a/qc/benchmarks/benchmarks.jl +++ b/qc/benchmarks/benchmarks.jl @@ -396,12 +396,14 @@ struct FeatureSpecEntropy{T} <: LossConfig{T} samples_per_batch::Integer property::Function feature::Function # deterministic Dice.Dist -> Hashable + train_feature::Bool end -function FeatureSpecEntropy{T}(; resampling_frequency, samples_per_batch, property, feature) where T - FeatureSpecEntropy{T}(resampling_frequency, samples_per_batch, property, feature) +function FeatureSpecEntropy{T}(; resampling_frequency, samples_per_batch, property, feature, train_feature) where T + FeatureSpecEntropy{T}(resampling_frequency, samples_per_batch, property, feature, train_feature) end to_subpath(p::FeatureSpecEntropy) = [ "feature_spec_entropy", + "train_feature=$(p.train_feature)", "freq=$(p.resampling_frequency)-spb=$(p.samples_per_batch)", "prop=$(p.property)", "feature=$(p.feature)", @@ -446,12 +448,14 @@ function produce_loss(rs::RunState, m::FeatureSpecEntropyLossMgr, epoch::Integer loss, actual_loss = sum( if m.consider(sample) num_meeting += 1 - empirical_feature_logpr = log(feature_counts[m.p.feature(sample)]/length(samples)) - # TODO: I think this expand_logprs is unnecessary? - lpr_eq = Dice.expand_logprs(l, LogPr(prob_equals(m.generation.value, sample))) - # [lpr_eq * empirical_feature_logpr, empirical_feature_logpr] - [lpr_eq * compute(a, lpr_eq), lpr_eq] + lpr_eq = LogPr(prob_equals(m.generation.value, sample)) + if m.p.train_feature + empirical_feature_logpr = log(feature_counts[m.p.feature(sample)]/length(samples)) + [lpr_eq * empirical_feature_logpr, empirical_feature_logpr] + else + [lpr_eq * compute(a, lpr_eq), lpr_eq] + end else [Dice.Constant(0), Dice.Constant(0)] end diff --git a/qc/benchmarks/main.jl b/qc/benchmarks/main.jl index 5296e1e1..fafa8269 100644 --- a/qc/benchmarks/main.jl +++ b/qc/benchmarks/main.jl @@ -2,10 +2,10 @@ include("benchmarks.jl") using Infiltrator GENERATION_PARAMS_LIST = [ - LangBespokeSTLCGenerator( - expr_size=5, - typ_size=2, - ), + # LangBespokeSTLCGenerator( + # expr_size=5, + # typ_size=2, + # ), LangSiblingDerivedGenerator{STLC}( root_ty=Expr.t, ty_sizes=[Expr.t=>5, Typ.t=>2], @@ -26,12 +26,11 @@ GENERATION_PARAMS_LIST = [ # ), ] # LR_LIST = [0.3] -LR_LIST = [0.001] -FP_LIST = [0.] +LR_LIST = [0.01, 0.03, 0.1, 0.3] # RESAMPLING_FREQUENCY_LIST = [1,2,5] -SAMPLES_PER_BATCH_LIST = [50] -EPOCHS_LIST = [3] +SAMPLES_PER_BATCH_LIST = [200, 2000] +EPOCHS_LIST = [2000] # SAMPLES_PER_BATCH_LIST = [nothing] BOUND_LIST = [0.] @@ -55,43 +54,8 @@ println() LOSS_CONFIG_WEIGHT_PAIRS_LIST = collect(Iterators.flatten([ ( [ - # ApproxSTLCConstructorEntropy() => lr, - # SatisfyPropertyLoss{RBT}(MultipleInvariants([ - # BookkeepingInvariant(), - # BalanceInvariant(), - # OrderInvariant(), - # ])) => lr, - # MLELossConfig{RBT}(RBTDepth(), Uniform()) => lr, - # SamplingEntropy{STLC}( - # resampling_frequency=resampling_frequency, - # samples_per_batch=samples_per_batch, - # property=property, - # eq=eq, - # failure_penalty=fp, - # forgiveness=forgiveness, - # rand_forgiveness=rand_forgiveness, - # keyf=:identity, - # ) => lr, - # SamplingEntropy{BST}( - # resampling_frequency=resampling_frequency, - # samples_per_batch=samples_per_batch, - # property=BSTOrderInvariant(), - # eq=eq, - # failure_penalty=fp, - # ) => lr, - # SamplingEntropy{RBT}( - # resampling_frequency=resampling_frequency, - # samples_per_batch=samples_per_batch, - # property=MultipleInvariants([ - # BookkeepingInvariant(), - # BalanceInvariant(), - # OrderInvariant(), - # ]), - # eq=eq, - # failure_penalty=fp, - # forgiveness=forgiveness, - # rand_forgiveness=rand_forgiveness, - # ) => lr, + FeatureSpecEntropy{STLC}(2,200,wellTyped,typecheck_ft,false) => lr, + FeatureSpecEntropy{STLC}(2,200,wellTyped,typecheck_ft,true) => lr, ] for lr in LR_LIST for property in PROPERTY_LIST @@ -138,7 +102,7 @@ end # ])) # EPOCHS_LIST = [100] -TOOL_PATH = "examples/qc/benchmarks/tool.jl" +TOOL_PATH = "qc/benchmarks/tool.jl" @sync for (p, lcws, epochs, bound) in Base.product(GENERATION_PARAMS_LIST, LOSS_CONFIG_WEIGHT_PAIRS_LIST, EPOCHS_LIST, BOUND_LIST) flags = join([s for s in ARGS if startswith(s, "-")], " ") diff --git a/qc/benchmarks/tool.jl b/qc/benchmarks/tool.jl index de7c9f4c..eeabc4da 100644 --- a/qc/benchmarks/tool.jl +++ b/qc/benchmarks/tool.jl @@ -2,6 +2,7 @@ include("benchmarks.jl") TAG = "v103_unif_type" TAG = "v104_unif_type_ACTUALLY_SPEC_ENT" +TAG = "v105_train_feature_param" OUT_TOP_DIR = "../tuning-output" ## PARSE ARGS