Skip to content

Commit

Permalink
Completely revamp variable usage analysis
Browse files Browse the repository at this point in the history
A new analysis now distinguishes between:
-  whether a variable may possibly not appear in the C code after the C
   preprocessor has run, triggering an unused variable error; and
- the upper bound of the number of run-time uses of a variable (for
  substitution and elimination purposes).

Quoting the comment:

(* This module implements a tree traversal that updates the `mark` field of
   nodes (specifically: of let-binders, branch binders and function parameter binders) with
   information regarding the usage of those variables. This module computes two
   slightly different pieces of information.
   - The first one is whether a given variable occurs in the generated C *after*
     C preprocessing. This is called an occurrence, and computing it involves
     reasoning about ifdefs. This is used exclusively to defeat C compilers'
     unused variable warnings.
   - The second one is an upper bound on the number of uses of a given variable.
     We call it a usage, and it allows removing unused variables (i.e, used at
     most zero times), as well as inlining variables used at most once. Both of
     these optimizations are subject to various syntactic criteria.
*)
  • Loading branch information
msprotz committed Oct 3, 2023
1 parent d1d94a4 commit a41167e
Show file tree
Hide file tree
Showing 10 changed files with 299 additions and 155 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,5 @@ lib/Version.ml
*.runlim
karamel-nofstar.opam
lib/AutoConfig.ml
*.orig
.exrc
3 changes: 2 additions & 1 deletion lib/Ast.ml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ and ident = string [@ opaque]
and poly_comp = K.poly_comp [@ opaque]
and forward_kind = Common.forward_kind [@ opaque]
and lident = ident list * ident [@ opaque]
and valuation = Mark.occurrence * Mark.usage [@ opaque]
[@@deriving show,
visitors { variety = "iter"; name = "iter_misc"; polymorphic = true },
visitors { variety = "reduce"; name = "reduce_misc"; polymorphic = true },
Expand Down Expand Up @@ -303,7 +304,7 @@ and var =
and binder' = {
name: ident;
mut: bool;
mark: int ref;
mark: valuation ref;
meta: meta option;
atom: atom_t;
(** Only makes sense when opened! *)
Expand Down
8 changes: 4 additions & 4 deletions lib/DataTypes.ml
Original file line number Diff line number Diff line change
Expand Up @@ -824,7 +824,7 @@ let remove_trivial_matches = object (self)
method! visit_ELet (_, t) b e1 e2 =
match open_binder b e2 with
| b, { node = EMatch (c, { node = EOpen (_, a); _ }, branches); _ } when
is_special b.node.name && !(b.node.mark) = 1 &&
is_special b.node.name && Mark.is_atmost 1 (snd !(b.node.mark)) &&
Atom.equal a b.node.atom ->
self#visit_EMatch ((), t) c e1 branches
| _ ->
Expand All @@ -836,7 +836,7 @@ let remove_trivial_matches = object (self)
| EUnit, [ [], { node = PUnit; _ }, body ] ->
(* match () with () -> ... is routinely generated by F*'s extraction. *)
(self#visit_expr_w () body).node
| _, [ [], { node = PBool true; _ }, b1; [ v ], { node = PBound 0; _ }, b2 ] when !(v.node.mark) = 0 ->
| _, [ [], { node = PBool true; _ }, b1; [ v ], { node = PBound 0; _ }, b2 ] when snd !(v.node.mark) = AtMost 0 ->
(* An if-then-else is generated as:
* match e with true -> ... | uu____???? -> ...
* where uu____???? is unused. *)
Expand All @@ -854,7 +854,7 @@ let remove_trivial_matches = object (self)

method! visit_branch env (binders, pat, expr) =
let _, binders, pat, expr = List.fold_left (fun (i, binders, pat, expr) b ->
if !(b.node.mark) = 0 && is_special b.node.name then
if snd !(b.node.mark) = AtMost 0 && is_special b.node.name then
i, binders, DeBruijn.subst_p pwild i pat, DeBruijn.subst any i expr
else
i + 1, b :: binders, pat, expr
Expand Down Expand Up @@ -897,7 +897,7 @@ let rec compile_pattern env scrut pat expr =
let b = with_type pat.typ {
name = i;
mut = false;
mark = ref 0;
mark = ref Mark.default;
meta = None;
atom = b
} in
Expand Down
4 changes: 2 additions & 2 deletions lib/Helpers.ml
Original file line number Diff line number Diff line change
Expand Up @@ -132,15 +132,15 @@ let mk_deref t ?(const=false) e =
(* Binder nodes ***************************************************************)

let fresh_binder ?(mut=false) name typ =
with_type typ { name; mut; mark = ref 0; meta = None; atom = Atom.fresh () }
with_type typ { name; mut; mark = ref Mark.default; meta = None; atom = Atom.fresh () }

let mark_mut b =
{ b with node = { b.node with mut = true }}

let sequence_binding () = with_type TUnit {
name = "_";
mut = false;
mark = ref 0;
mark = ref Mark.default;
meta = Some MetaSequence;
atom = Atom.fresh ()
}
Expand Down
13 changes: 13 additions & 0 deletions lib/Mark.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
(* See comments in UseAnalysis.ml *)

(* Whether the variable may occur, post-C preprocessor, in the generated code. *)
type occurrence = MaybeAbsent | Present
[@@deriving show]

(* How many times this variable will end up being used, at runtime. *)
type usage = AtMost of int
[@@deriving show]

let is_atmost k (AtMost n) = n <= k

let default = MaybeAbsent, AtMost max_int
4 changes: 3 additions & 1 deletion lib/PrintAst.ml
Original file line number Diff line number Diff line change
Expand Up @@ -145,8 +145,10 @@ and print_flag = function
string "no_inline"

and print_binder { typ; node = { name; mut; meta; mark; _ }} =
let o, u = !mark in
(if mut then string "mutable" ^^ break 1 else empty) ^^
group (group (string name ^^ lparen ^^ int !mark ^^ comma ^^ space ^^ print_meta meta ^^
group (group (string name ^^ lparen ^^ string (Mark.show_occurrence o) ^^ comma ^^
string (Mark.show_usage u) ^^ comma ^^ space ^^ print_meta meta ^^
rparen ^^ colon) ^/^
nest 2 (print_typ typ))

Expand Down
189 changes: 44 additions & 145 deletions lib/Simplify.ml
Original file line number Diff line number Diff line change
Expand Up @@ -113,115 +113,39 @@ let safe_pure_use e =
| Safe -> failwith "F* isn't supposed to nest uu__'s this deep, how did we miss it?"


(* Count the number of occurrences of each variable ***************************)

(* This phase does several things.
- It mutates the mark field of each binder with a conservative approximation
of the number of times this variable syntactically appears in the
continuation of the let-binding. (To be used by remove_uu.)
- For those variables that are at used at most (exactly) zero times, either
inline their definition away (some syntactic criteria permitting), or wrap
them in a call to ignore (hence no longer making them unused).
This phase is implemented with a cheap top-down traversal, which makes it
fast but imprecise. Notably:
- the use-count is conservative, meaning `let x in if ... then x else x`
marks `x` as used twice (indeed, it does appear syntactically twice); this
means `remove_uu` misses an optimization opportunity -- to give better
information to `remove_uu`, this phase could instead compute a three-valued
map that to each variable associates either Unused, UsedAtMostOnce, or
Unknown
- the use-count only counts syntactic occurrences and is not aware of
ifdefs, meaning that a non-zero syntactic count does NOT mean the C
compiler won't emit an unused variable error; consider `let x in ifdef
FOOBAR then x else 0` -- `x` *may* end up not appearing in C if FOOBAR is
undefined. This requires a different analysis that does not behave the same
as the use-count and computes a two-valued map that to each variable
associates either MaybeAbsent (initial state, variable may not
syntactically occur post pre-processing in C), or DefinitelyAppears (we're
sure it'll be there meaning no unused variable warning). Notably,
`MaybeAbsent + DefinitelyAppears` = DefinitelyAppears (regular if) but =
MaybeAbsent (ifdef).
*)

let count_and_remove_locals final = object (self)

inherit [_] map as super

method! extend env binder =
binder.node.mark := 0;
binder :: env

method! visit_EBound (env, _) i =
let b = List.nth env i in
incr b.node.mark;
EBound i
(* This phase tries to substitute away variables that are temporary (named
uu____ in F*, or "scrut" if inserted by the pattern matches compilation
phase), or that are of a type that cannot be copied (to hopefully skirt
future failures). This phase assumes the mark field of each binder contains a
conservative approximation of the number of uses of that binder. *)
let use_mark_to_inline_temporaries = object (self)

method private remove_trivial_let e =
match e with
| ELet (_, e1, { node = EBound 0; _ }) when Helpers.is_readonly_c_expression e1 ->
e1.node
| _ ->
e

method! visit_ELet (env, _) b e1 e2 =
(* Remove unused variables. Important to get rid of calls to [HST.get()]. *)
(* Note: we only visit e1 if we know it's going to be in the rewritten expression, otherwise it
generates spurious uses. This might influence the test below (is_readonly_c_expression e1)
but I don't think this has any concrete adverse consequences. *)
let env0 = env in
let env = self#extend env b in
let e2 = self#visit_expr_w env e2 in
if !(b.node.mark) = 0 && is_readonly_c_expression e1 then
self#remove_trivial_let (snd (open_binder b e2)).node
else if !(b.node.mark) = 0 then
let e1 = self#visit_expr_w env0 e1 in
if e1.typ = TUnit then
self#remove_trivial_let (ELet ({ b with node = { b.node with meta = Some MetaSequence }}, e1, e2))
else if Helpers.is_readonly_c_expression e1 && e2.node = EBound 0 then
e1.node
else if is_bufcreate e1 then
ELet (b, e1, with_type e2.typ (
ELet (sequence_binding (),
push_ignore (with_type b.typ (EBound 0)),
DeBruijn.lift 1 e2)))
else
ELet ({ node = { b.node with meta = Some MetaSequence }; typ = TUnit},
push_ignore e1,
e2)
else
let e1 = self#visit_expr_w env0 e1 in
self#remove_trivial_let (ELet (b, e1, e2))
inherit [_] map

method! visit_DFunction env cc flags n ret name binders body =
if not final then
super#visit_DFunction env cc flags n ret name binders body
method! visit_ELet _ b e1 e2 =
let e1 = self#visit_expr_w () e1 in
let e2 = self#visit_expr_w () e2 in
let _, v = !(b.node.mark) in
if (Helpers.is_uu b.node.name || b.node.name = "scrut" || Structs.should_rewrite b.typ = NoCopies) &&
v = AtMost 1 && (
is_readonly_c_expression e1 &&
safe_readonly_use e2 ||
safe_pure_use e2
) (* || is_readonly_and_variable_free_c_expression e1 && b.node.mut *)
then
(DeBruijn.subst e1 0 e2).node
else
let env = List.fold_left self#extend env binders in
let body = self#visit_expr_w env body in
let l = List.length binders in
let ignores = List.mapi (fun i b ->
if !(b.node.mark) = 0 && b.typ <> TUnit then
(* unit arguments will be eliminated, always, based on a type analysis *)
Some (push_ignore (with_type b.typ (EBound (l - i - 1))))
else
None
) binders in
let ignores = KList.filter_some ignores in
let body =
if ignores = [] then
body
else
List.fold_right (fun i body ->
let b = sequence_binding () in
with_type body.typ (ELet (b, i, DeBruijn.lift 1 body))
) ignores body
in
DFunction (cc, flags, n, ret, name, binders, body)


ELet (b, e1, e2)
end

let optimize_lets ?ifdefs files =
let open UseAnalysis in
let _ = (build_usage_map_and_mark ifdefs)#visit_files [] files in
let files = (use_mark_to_remove_or_ignore (not (ifdefs = None)))#visit_files () files in
let _ = (build_usage_map_and_mark ifdefs)#visit_files [] files in
let files = use_mark_to_inline_temporaries#visit_files () files in
files

(* Unused parameter elimination ***********************************************)

(* Done by typing (in general), and based on usage information (strictly for
Expand Down Expand Up @@ -283,7 +207,7 @@ let unused private_count_table lid ts (i: int) =
Hashtbl.mem private_count_table lid && (
let l = Hashtbl.find private_count_table lid in
i < List.length l &&
List.nth l i = 0
snd (List.nth l i) = Mark.AtMost 0
) ||
(* Second case: it's a unit, so here type-based elimination *)
List.nth ts i = TUnit
Expand Down Expand Up @@ -369,13 +293,13 @@ let remove_unused_parameters = object (self)
* parameter of [f]. This is an opportunity to update the entry for
* [f]. *)
Hashtbl.replace private_use_table current_lid
(List.mapi (fun k count ->
(List.mapi (fun k (o, Mark.AtMost count) ->
(* 0 is the last parameter, l - 1 the first *)
if k == l - 1 - (j - i) then begin
assert (count > 0);
count - 1
o, Mark.AtMost (count - 1)
end else
count
o, Mark.AtMost count
) (Hashtbl.find private_use_table current_lid))
| _ ->
(* TODO: we could be smarter here *)
Expand Down Expand Up @@ -467,30 +391,6 @@ let wrapping_arithmetic = object (self)
end


(* This phase tries to substitute away variables that are temporary (named
uu____ in F*, or "scrut" if inserted by the pattern matches compilation
phase), or that are of a type that cannot be copied (to hopefully skirt
future failures). This phase assumes the mark field of each binder contains a
conservative approximation of the number of uses of that binder. *)
let remove_uu = object (self)

inherit [_] map

method! visit_ELet _ b e1 e2 =
let e1 = self#visit_expr_w () e1 in
let e2 = self#visit_expr_w () e2 in
if (Helpers.is_uu b.node.name || b.node.name = "scrut" || Structs.should_rewrite b.typ = NoCopies) &&
!(b.node.mark) = 1 && (
is_readonly_c_expression e1 &&
safe_readonly_use e2 ||
safe_pure_use e2
) (* || is_readonly_and_variable_free_c_expression e1 && b.node.mut *)
then
(DeBruijn.subst e1 0 e2).node
else
ELet (b, e1, e2)
end

let remove_ignore_unit = object

inherit [_] map as super
Expand Down Expand Up @@ -748,7 +648,7 @@ let misc_cosmetic = object (self)
{ node = EBufWrite (e4, e4_index, { node = EFlat fields; _ }); _ })
when b'.node.meta = Some MetaSequence &&
List.exists (fun (f, x) -> f <> None && x.node = EBound 1) fields &&
!(b.node.mark) = 2 ->
Mark.is_atmost 2 (snd !(b.node.mark)) ->

(* if true then Warn.fatal_error "MATCHED: %a" pexpr (with_unit (ELet (b, e1, e2))); *)

Expand Down Expand Up @@ -794,7 +694,7 @@ let misc_cosmetic = object (self)
when b'.node.meta = Some MetaSequence &&
b''.node.meta = Some MetaSequence &&
List.exists (fun (f, x) -> f <> None && x.node = EBound 1) fields &&
!(b.node.mark) = 2 ->
Mark.is_atmost 2 (snd !(b.node.mark)) ->

(* if true then Warn.fatal_error "MATCHED: %a" pexpr (with_unit (ELet (b, e1, e2))); *)

Expand Down Expand Up @@ -843,7 +743,7 @@ let misc_cosmetic = object (self)
| EAny, ELet (b', { node = EApp (e3, [ { node = EAddrOf ({ node = EBound 0; _ }); _ } ]); _ },
{ node = EAssign (e4, { node = EBound 1; _ }); _ })
when b'.node.meta = Some MetaSequence &&
!(b.node.mark) = 2 ->
Mark.is_atmost 2 (snd !(b.node.mark)) ->

(* KPrint.bprintf "MATCHED: %a" pexpr (with_unit (ELet (b, e1, e2))); *)

Expand All @@ -862,7 +762,7 @@ let misc_cosmetic = object (self)
{ node = EApp (e3, [ { node = EAddrOf ({ node = EBound 0; _ }); _ } ]); _ },
{ node = ELet (b'', { node = EAssign (e4, { node = EBound 1; _ }); _ }, e5); _ })
when b'.node.meta = Some MetaSequence &&
!(b.node.mark) = 2 ->
Mark.is_atmost 2 (snd !(b.node.mark)) ->

(* KPrint.bprintf "MATCHED: %a" pexpr (with_unit (ELet (b, e1, e2))); *)

Expand Down Expand Up @@ -1999,8 +1899,7 @@ end
* compilation, once. *)
let simplify0 (files: file list): file list =
let files = remove_local_function_bindings#visit_files () files in
let files = (count_and_remove_locals false)#visit_files [] files in
let files = remove_uu#visit_files () files in
let files = optimize_lets files in
let files = remove_ignore_unit#visit_files () files in
let files = remove_proj_is#visit_files () files in
let files = combinators#visit_files () files in
Expand All @@ -2011,15 +1910,16 @@ let simplify0 (files: file list): file list =
* calls of the form recall foobar in them. *)
let simplify1 (files: file list): file list =
let files = sequence_to_let#visit_files () files in
let files = (count_and_remove_locals false)#visit_files [] files in
let files = optimize_lets files in
let files = let_to_sequence#visit_files () files in
files

(* This should be run late since inlining may create more opportunities for the
* removal of unused variables. *)
let remove_unused (files: file list): file list =
(* Relying on the side-effect of refreshing the mark. *)
let files = optimize_lets files in
let parameter_table = Hashtbl.create 41 in
let files = (count_and_remove_locals false)#visit_files [] files in
unused_parameter_table#visit_files parameter_table files;
ignore_non_first_order#visit_files parameter_table files;
let files = remove_unused_parameters#visit_files (parameter_table, 0) files in
Expand All @@ -2032,17 +1932,16 @@ let remove_unused (files: file list): file list =
let simplify2 ifdefs (files: file list): file list =
let files = sequence_to_let#visit_files () files in
(* Quality of hoisting is WIDELY improved if we remove un-necessary
* let-bindings. *)
let files = (count_and_remove_locals true)#visit_files [] files in
(* This one is mostly for the sake of removing copies of spinlock and the
like. *)
let files = remove_uu#visit_files () files in
* let-bindings. Also removes occurrences of spinlock and the like. *)
let files = optimize_lets ~ifdefs files in
let files = if !Options.wasm then files else fixup_while_tests#visit_files () files in
let files = hoist#visit_files [] files in
let files = if !Options.c89_scope then SimplifyC89.hoist_lets#visit_files (ref []) files else files in
let files = if !Options.wasm then files else fixup_hoist#visit_files () files in
let files = if !Options.wasm then files else let_if_to_assign#visit_files () files in
let files = if !Options.wasm then files else hoist_bufcreate#visit_files ifdefs files in
(* This phase relies on up-to-date mark information. TODO move up after
optimize_lets. *)
let files = misc_cosmetic#visit_files () files in
let files = misc_cosmetic2#visit_files () files in
let files = functional_updates#visit_files false files in
Expand Down
2 changes: 1 addition & 1 deletion lib/SimplifyWasm.ml
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ let eta_expand = object
let tret, targs = flatten_arrow t in
let n = List.length targs in
let binders, args = List.split (List.mapi (fun i t ->
with_type t { name = Printf.sprintf "x%d" i; mut = false; mark = ref 0; meta = None; atom = Atom.fresh () },
with_type t { name = Printf.sprintf "x%d" i; mut = false; mark = ref Mark.default; meta = None; atom = Atom.fresh () },
{ node = EBound (n - i - 1); typ = t }
) targs) in
let body = { node = EApp (body, args); typ = tret } in
Expand Down
Loading

0 comments on commit a41167e

Please sign in to comment.