-
-
Notifications
You must be signed in to change notification settings - Fork 331
/
Copy pathlib.rs
374 lines (338 loc) · 13.3 KB
/
lib.rs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
//! A libfuzzer-like fuzzer with llmp-multithreading support and restarts
//! The `launcher` will spawn new processes for each cpu core.
//! This is the drop-in replacement for libfuzzer, to be used together with [`Atheris`](https://github.com/google/atheris)
//! for python instrumentation and fuzzing.
use core::{convert::TryInto, ffi::c_void, slice, time::Duration};
use std::{
env,
os::raw::{c_char, c_int},
path::PathBuf,
};
use clap::{AppSettings, Arg, Command};
use libafl::{
bolts::{
core_affinity::Cores,
current_nanos,
launcher::Launcher,
rands::StdRand,
shmem::{ShMemProvider, StdShMemProvider},
tuples::{tuple_list, Merge},
AsSlice,
},
corpus::{Corpus, InMemoryCorpus, OnDiskCorpus},
events::EventConfig,
executors::{inprocess::InProcessExecutor, ExitKind, TimeoutExecutor},
feedback_or,
feedbacks::{CrashFeedback, MaxMapFeedback, TimeFeedback, TimeoutFeedback},
fuzzer::{Fuzzer, StdFuzzer},
generators::RandBytesGenerator,
inputs::{BytesInput, HasTargetBytes},
monitors::MultiMonitor,
mutators::{
scheduled::{havoc_mutations, tokens_mutations, StdScheduledMutator},
token_mutations::{I2SRandReplace, Tokens},
},
observers::{HitcountsMapObserver, TimeObserver},
schedulers::{IndexesLenTimeMinimizerScheduler, QueueScheduler},
stages::{StdMutationalStage, TracingStage},
state::{HasCorpus, HasMetadata, StdState},
Error,
};
use libafl_targets::{
CmpLogObserver, __sanitizer_cov_trace_cmp1, __sanitizer_cov_trace_cmp2,
__sanitizer_cov_trace_cmp4, __sanitizer_cov_trace_cmp8, std_edges_map_observer, EDGES_MAP_PTR,
MAX_EDGES_NUM,
};
/// Set up our coverage map.
#[no_mangle]
pub fn __sanitizer_cov_8bit_counters_init(start: *mut u8, stop: *mut u8) {
unsafe {
EDGES_MAP_PTR = start;
MAX_EDGES_NUM = (stop as usize - start as usize) / 8;
}
}
/// `pcs` tables seem to be unused by `Atheris`, so we can ignore this setup function,
/// but the symbol is still being called and, hence, required.
#[no_mangle]
pub fn __sanitizer_cov_pcs_init(_pcs_beg: *mut u8, _pcs_end: *mut u8) {
// noop
}
/// Allow the python code to use `cmplog`.
/// This is a PoC implementation and could be improved.
/// For example, it only takes up to 8 bytes into consideration.
#[no_mangle]
pub fn __sanitizer_weak_hook_memcmp(
_caller_pc: *const c_void,
s1: *const c_void,
s2: *const c_void,
n: usize,
_result: c_int,
) {
unsafe {
let s1 = slice::from_raw_parts(s1 as *const u8, n);
let s2 = slice::from_raw_parts(s2 as *const u8, n);
match n {
0 => (),
1 => __sanitizer_cov_trace_cmp1(
u8::from_ne_bytes(s1.try_into().unwrap()),
u8::from_ne_bytes(s2.try_into().unwrap()),
),
2..=3 => __sanitizer_cov_trace_cmp2(
u16::from_ne_bytes(s1.try_into().unwrap()),
u16::from_ne_bytes(s2.try_into().unwrap()),
),
4..=7 => __sanitizer_cov_trace_cmp4(
u32::from_ne_bytes(s1.try_into().unwrap()),
u32::from_ne_bytes(s2.try_into().unwrap()),
),
_ => __sanitizer_cov_trace_cmp8(
u64::from_ne_bytes(s1.try_into().unwrap()),
u64::from_ne_bytes(s2.try_into().unwrap()),
),
}
}
}
/// It's called by Atheris after the fuzzer has been initialized.
/// The main entrypoint to our fuzzer, which will be called by `Atheris` when fuzzing starts.
/// The `harness_fn` parameter is the function that will be called by `LibAFL` for each iteration
/// and jumps back into `Atheris'` instrumented python code.
#[no_mangle]
#[allow(non_snake_case)]
pub fn LLVMFuzzerRunDriver(
_argc: *const c_int,
_argv: *const *const c_char,
harness_fn: Option<extern "C" fn(*const u8, usize) -> c_int>,
) {
// Registry the metadata types used in this fuzzer
// Needed only on no_std
//RegistryBuilder::register::<Tokens>();
if harness_fn.is_none() {
panic!("No harness callback provided");
}
let harness_fn = harness_fn.unwrap();
if unsafe { EDGES_MAP_PTR.is_null() } {
panic!(
"Edges map was never initialized - __sanitizer_cov_8bit_counters_init never got called"
);
}
println!("Args: {:?}", std::env::args());
let matches = Command::new("libafl_atheris")
.version("0.1.0")
.setting(AppSettings::AllowExternalSubcommands)
.arg(Arg::new("script")) // The python script is the first arg
.arg(
Arg::new("cores")
.short('c')
.long("cores")
.required(true)
.takes_value(true),
)
.arg(
Arg::new("broker_port")
.short('p')
.long("broker-port")
.required(false)
.takes_value(true),
)
.arg(
Arg::new("output")
.short('o')
.long("output")
.required(false)
.takes_value(true),
)
.arg(
Arg::new("input")
.short('i')
.long("input")
.required(true)
.takes_value(true),
)
.arg(
Arg::new("remote_broker_addr")
.short('B')
.long("remote-broker-addr")
.required(false)
.takes_value(true),
)
.arg(
Arg::new("timeout")
.short('t')
.long("timeout")
.required(false)
.takes_value(true),
)
.get_matches();
let workdir = env::current_dir().unwrap();
println!(
"Workdir: {:?}",
env::current_dir().unwrap().to_string_lossy().to_string()
);
let cores = Cores::from_cmdline(matches.get_one::<String>("cores").unwrap())
.expect("No valid core count given!");
let broker_port = matches
.get_one::<String>("broker_port")
.map(|s| s.parse().expect("Invalid broker port"))
.unwrap_or(1337);
let remote_broker_addr = matches
.get_one::<String>("remote_broker_addr")
.map(|s| s.parse().expect("Invalid broker address"));
let input_dirs: Vec<PathBuf> = matches
.values_of("input")
.map(|v| v.map(PathBuf::from).collect())
.unwrap_or_default();
let output_dir = matches
.get_one::<String>("output")
.map(PathBuf::from)
.unwrap_or_else(|| workdir.clone());
let token_files: Vec<&str> = matches
.values_of("tokens")
.map(|v| v.collect())
.unwrap_or_default();
let timeout_ms = matches
.get_one::<String>("timeout")
.map(|s| s.parse().expect("Invalid timeout"))
.unwrap_or(10000);
// let cmplog_enabled = matches.is_present("cmplog");
println!("Workdir: {:?}", workdir.to_string_lossy().to_string());
let shmem_provider = StdShMemProvider::new().expect("Failed to init shared memory");
let monitor = MultiMonitor::new(|s| println!("{s}"));
// TODO: we need to handle Atheris calls to `exit` on errors somhow.
let mut run_client = |state: Option<_>, mut mgr, _core_id| {
// Create an observation channel using the coverage map
let edges_observer = unsafe { HitcountsMapObserver::new(std_edges_map_observer("edges")) };
// Create an observation channel to keep track of the execution time
let time_observer = TimeObserver::new("time");
// Create the Cmp observer
let cmplog_observer = CmpLogObserver::new("cmplog", true);
// Feedback to rate the interestingness of an input
// This one is composed by two Feedbacks in OR
let mut feedback = feedback_or!(
// New maximization map feedback linked to the edges observer and the feedback state
MaxMapFeedback::new_tracking(&edges_observer, true, false),
// Time feedback, this one does not need a feedback state
TimeFeedback::new_with_observer(&time_observer)
);
// A feedback to choose if an input is a solution or not
let mut objective = feedback_or!(CrashFeedback::new(), TimeoutFeedback::new());
// If not restarting, create a State from scratch
let mut state = state.unwrap_or_else(|| {
StdState::new(
// RNG
StdRand::with_seed(current_nanos()),
// Corpus that will be evolved, we keep it in memory for performance
InMemoryCorpus::new(),
// Corpus in which we store solutions (crashes in this example),
// on disk so the user can get them after stopping the fuzzer
OnDiskCorpus::new(output_dir.clone()).unwrap(),
// States of the feedbacks.
// The feedbacks can report the data that should persist in the State.
&mut feedback,
// Same for objective feedbacks
&mut objective,
)
.unwrap()
});
// Create a dictionary if not existing
if state.metadata().get::<Tokens>().is_none() {
for tokens_file in &token_files {
state.add_metadata(Tokens::from_file(tokens_file)?);
}
}
// A minimization+queue policy to get testcasess from the corpus
let scheduler = IndexesLenTimeMinimizerScheduler::new(QueueScheduler::new());
// A fuzzer with feedbacks and a corpus scheduler
let mut fuzzer = StdFuzzer::new(scheduler, feedback, objective);
// The wrapped harness function, calling out to the LLVM-style harness
let mut harness = |input: &BytesInput| {
let target = input.target_bytes();
let buf = target.as_slice();
harness_fn(buf.as_ptr(), buf.len());
ExitKind::Ok
};
// Create the executor for an in-process function with one observer for edge coverage and one for the execution time
let mut executor = TimeoutExecutor::new(
InProcessExecutor::new(
&mut harness,
tuple_list!(edges_observer, time_observer),
&mut fuzzer,
&mut state,
&mut mgr,
)?,
Duration::from_millis(timeout_ms),
);
// Secondary harness due to mut ownership
let mut harness = |input: &BytesInput| {
let target = input.target_bytes();
let buf = target.as_slice();
harness_fn(buf.as_ptr(), buf.len());
ExitKind::Ok
};
// Setup a tracing stage in which we log comparisons
let tracing = TracingStage::new(InProcessExecutor::new(
&mut harness,
tuple_list!(cmplog_observer),
&mut fuzzer,
&mut state,
&mut mgr,
)?);
// Setup a randomic Input2State stage
let i2s =
StdMutationalStage::new(StdScheduledMutator::new(tuple_list!(I2SRandReplace::new())));
// Setup a basic mutator
let mutator = StdScheduledMutator::new(havoc_mutations().merge(tokens_mutations()));
let mutational = StdMutationalStage::new(mutator);
// The order of the stages matter!
let mut stages = tuple_list!(tracing, i2s, mutational);
// In case the corpus is empty (on first run), reset
if state.corpus().count() < 1 {
if input_dirs.is_empty() {
// Generator of printable bytearrays of max size 32
let mut generator = RandBytesGenerator::new(32);
// Generate 8 initial inputs
state
.generate_initial_inputs(
&mut fuzzer,
&mut executor,
&mut generator,
&mut mgr,
8,
)
.expect("Failed to generate the initial corpus");
println!(
"We imported {} inputs from the generator.",
state.corpus().count()
);
} else {
println!("Loading from {:?}", &input_dirs);
// Load from disk
// we used _forced since some Atheris testcases don't touch the map at all, hence, wolud not load any data.
state
.load_initial_inputs_forced(&mut fuzzer, &mut executor, &mut mgr, &input_dirs)
.unwrap_or_else(|_| {
panic!("Failed to load initial corpus at {:?}", &input_dirs)
});
println!("We imported {} inputs from disk.", state.corpus().count());
}
}
fuzzer.fuzz_loop(&mut stages, &mut executor, &mut state, &mut mgr)?;
Ok(())
};
// Let's go. Python fuzzing ftw!
match Launcher::builder()
.shmem_provider(shmem_provider)
.configuration(EventConfig::from_name("default"))
.monitor(monitor)
.run_client(&mut run_client)
.cores(&cores)
.broker_port(broker_port)
.remote_broker_addr(remote_broker_addr)
// remove this comment to sience the target.
//.stdout_file(Some("/dev/null"))
.build()
.launch()
{
Ok(_) | Err(Error::ShuttingDown) => (),
Err(e) => panic!("Error in fuzzer: {}", e),
};
}