wip param train feature

Tractables · Dec 3, 2024 · 9a70fcb · 9a70fcb
1 parent 9fb3bae
commit 9a70fcb
Show file tree

Hide file tree

Showing 4 changed files with 25 additions and 54 deletions.
diff --git a/eval.sh b/eval.sh
@@ -8,4 +8,6 @@
 # julia --project qc/benchmarks/tool.jl -f "LangSiblingDerivedGenerator{STLC}(Main.Expr.t,Pair{Type,Integer}[Main.Expr.t=>2,Main.Typ.t=>1],2,3)" "Pair{SpecEntropy{STLC},Float64}[SpecEntropy{STLC}(2,200,wellTyped)=>0.3]" "2" "0.1"
 
 # current command for unif types:
-(cd $DICEREPO && julia --project $TOOL -f "LangSiblingDerivedGenerator{STLC}(Main.Expr.t,Pair{Type,Integer}[Main.Expr.t=>5,Main.Typ.t=>2],2,3)" "Pair{FeatureSpecEntropy{STLC},Float64}[FeatureSpecEntropy{STLC}(2,200,wellTyped,typecheck_ft)=>0.3]" "2000" "0.1")
+(cd $DICEREPO && julia --project $TOOL -f "LangSiblingDerivedGenerator{STLC}(Main.Expr.t,Pair{Type,Integer}[Main.Expr.t=>5,Main.Typ.t=>2],2,3)" "Pair{FeatureSpecEntropy{STLC},Float64}[FeatureSpecEntropy{STLC}(2,200,wellTyped,typecheck_ft,false)=>0.3]" "2000" "0.1")
+
+(cd $DICEREPO && julia --project $TOOL -f "LangSiblingDerivedGenerator{STLC}(Main.Expr.t,Pair{Type,Integer}[Main.Expr.t=>5,Main.Typ.t=>2],2,3)" "Pair{FeatureSpecEntropy{STLC},Float64}[FeatureSpecEntropy{STLC}(2,200,wellTyped,typecheck_ft,true)=>0.3]" "2000" "0.1")
diff --git a/qc/benchmarks/benchmarks.jl b/qc/benchmarks/benchmarks.jl
@@ -396,12 +396,14 @@ struct FeatureSpecEntropy{T} <: LossConfig{T}
     samples_per_batch::Integer
     property::Function
     feature::Function # deterministic Dice.Dist -> Hashable 
+    train_feature::Bool
 end
-function FeatureSpecEntropy{T}(; resampling_frequency, samples_per_batch, property, feature) where T
-    FeatureSpecEntropy{T}(resampling_frequency, samples_per_batch, property, feature)
+function FeatureSpecEntropy{T}(; resampling_frequency, samples_per_batch, property, feature, train_feature) where T
+    FeatureSpecEntropy{T}(resampling_frequency, samples_per_batch, property, feature, train_feature)
 end
 to_subpath(p::FeatureSpecEntropy) = [
     "feature_spec_entropy",
+    "train_feature=$(p.train_feature)",
     "freq=$(p.resampling_frequency)-spb=$(p.samples_per_batch)",
     "prop=$(p.property)",
     "feature=$(p.feature)",
@@ -446,12 +448,14 @@ function produce_loss(rs::RunState, m::FeatureSpecEntropyLossMgr, epoch::Integer
         loss, actual_loss = sum(
             if m.consider(sample)
                 num_meeting += 1
-                empirical_feature_logpr = log(feature_counts[m.p.feature(sample)]/length(samples))
 
-                # TODO: I think this expand_logprs is unnecessary?
-                lpr_eq = Dice.expand_logprs(l, LogPr(prob_equals(m.generation.value, sample)))
-                # [lpr_eq * empirical_feature_logpr, empirical_feature_logpr]
-                [lpr_eq * compute(a, lpr_eq), lpr_eq]
+                lpr_eq = LogPr(prob_equals(m.generation.value, sample))
+                if m.p.train_feature
+                    empirical_feature_logpr = log(feature_counts[m.p.feature(sample)]/length(samples))
+                    [lpr_eq * empirical_feature_logpr, empirical_feature_logpr]
+                else
+                    [lpr_eq * compute(a, lpr_eq), lpr_eq]
+                end
             else
                 [Dice.Constant(0), Dice.Constant(0)]
             end

diff --git a/qc/benchmarks/main.jl b/qc/benchmarks/main.jl
@@ -2,10 +2,10 @@ include("benchmarks.jl")
 using Infiltrator
 
 GENERATION_PARAMS_LIST = [
-    LangBespokeSTLCGenerator(
-        expr_size=5,
-        typ_size=2,
-    ),
+    # LangBespokeSTLCGenerator(
+    #     expr_size=5,
+    #     typ_size=2,
+    # ),
     LangSiblingDerivedGenerator{STLC}(
         root_ty=Expr.t,
         ty_sizes=[Expr.t=>5, Typ.t=>2],
@@ -26,12 +26,11 @@ GENERATION_PARAMS_LIST = [
 #    ),
 ]
 # LR_LIST = [0.3]
-LR_LIST = [0.001]
-FP_LIST = [0.]
+LR_LIST = [0.01, 0.03, 0.1, 0.3]
 # RESAMPLING_FREQUENCY_LIST = [1,2,5]
 
-SAMPLES_PER_BATCH_LIST = [50]
-EPOCHS_LIST = [3]
+SAMPLES_PER_BATCH_LIST = [200, 2000]
+EPOCHS_LIST = [2000]
 
 # SAMPLES_PER_BATCH_LIST = [nothing]
 BOUND_LIST = [0.]
@@ -55,43 +54,8 @@ println()
 LOSS_CONFIG_WEIGHT_PAIRS_LIST = collect(Iterators.flatten([
     (
         [
-            # ApproxSTLCConstructorEntropy() => lr,
-            # SatisfyPropertyLoss{RBT}(MultipleInvariants([
-            #     BookkeepingInvariant(),
-            #     BalanceInvariant(),
-            #     OrderInvariant(),
-            # ])) => lr,
-            # MLELossConfig{RBT}(RBTDepth(), Uniform()) => lr,
-            # SamplingEntropy{STLC}(
-            #     resampling_frequency=resampling_frequency,
-            #     samples_per_batch=samples_per_batch,
-            #     property=property,
-            #     eq=eq,
-            #     failure_penalty=fp,
-            #     forgiveness=forgiveness,
-            #     rand_forgiveness=rand_forgiveness,
-            #     keyf=:identity,
-            # ) => lr,
-            # SamplingEntropy{BST}(
-            #     resampling_frequency=resampling_frequency,
-            #     samples_per_batch=samples_per_batch,
-            #     property=BSTOrderInvariant(),
-            #     eq=eq,
-            #     failure_penalty=fp,
-            # ) => lr,
-            # SamplingEntropy{RBT}(
-            #     resampling_frequency=resampling_frequency,
-            #     samples_per_batch=samples_per_batch,
-            #     property=MultipleInvariants([
-            #         BookkeepingInvariant(),
-            #         BalanceInvariant(),
-            #         OrderInvariant(),
-            #     ]),
-            #     eq=eq,
-            #     failure_penalty=fp,
-            #     forgiveness=forgiveness,
-            #     rand_forgiveness=rand_forgiveness,
-            # ) => lr,
+            FeatureSpecEntropy{STLC}(2,200,wellTyped,typecheck_ft,false) => lr,
+            FeatureSpecEntropy{STLC}(2,200,wellTyped,typecheck_ft,true) => lr,
         ]
         for lr in LR_LIST
         for property in PROPERTY_LIST
@@ -138,7 +102,7 @@ end
 # ]))
 # EPOCHS_LIST = [100]
 
-TOOL_PATH = "examples/qc/benchmarks/tool.jl"
+TOOL_PATH = "qc/benchmarks/tool.jl"
 
 @sync for (p, lcws, epochs, bound) in Base.product(GENERATION_PARAMS_LIST, LOSS_CONFIG_WEIGHT_PAIRS_LIST, EPOCHS_LIST, BOUND_LIST)
     flags = join([s for s in ARGS if startswith(s, "-")], " ")

diff --git a/qc/benchmarks/tool.jl b/qc/benchmarks/tool.jl
@@ -2,6 +2,7 @@ include("benchmarks.jl")
 
 TAG = "v103_unif_type"
 TAG = "v104_unif_type_ACTUALLY_SPEC_ENT"
+TAG = "v105_train_feature_param"
 OUT_TOP_DIR = "../tuning-output"
 
 ## PARSE ARGS