Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

--regex support for search-pattern command #860

Merged
merged 1 commit into from
Jul 2, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions docs/commands/search-pattern.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,3 +35,12 @@ Sometimes, you may need to search for a very common pattern. To limit the search
gef➤ search-pattern 0x4005f6 little libc
gef➤ search-pattern 0x4005f6 little 0x603100-0x603200
```
### Searching in a specific range using regex ###
Sometimes, you may need an advanced search using regex. Just use --regex arg.

Example: how to find null-end-printable(from x20-x7e) C strings (min size >=2 bytes) with a regex:

```
gef➤ search-pattern --regex 0x401000 0x401500 ([\\x20-\\x7E]{2,})(?=\\x00)

```
58 changes: 54 additions & 4 deletions gef.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@

import abc
import argparse
import ast
import binascii
import codecs
import collections
Expand Down Expand Up @@ -5686,10 +5687,16 @@ class SearchPatternCommand(GenericCommand):
_cmdline_ = "search-pattern"
_syntax_ = f"{_cmdline_} PATTERN [little|big] [section]"
_aliases_ = ["grep", "xref"]
_example_ = (f"\n{_cmdline_} AAAAAAAA"
f"\n{_cmdline_} 0x555555554000 little stack"
f"\n{_cmdline_} AAAA 0x600000-0x601000")

_example_ = [f"{_cmdline_} AAAAAAAA",
f"{_cmdline_} 0x555555554000 little stack",
f"{_cmdline_} AAAA 0x600000-0x601000",
f"{_cmdline_} --regex 0x401000 0x401500 ([\\\\x20-\\\\x7E]{{2,}})(?=\\\\x00) <-- It matchs null-end-printable(from x20-x7e) C strings (min size 2 bytes)"]

def __init__(self) -> None:
super().__init__()
self["max_size_preview"] = (10, "max size preview of bytes")
self["nr_pages_chunk"] = (0x400, "number of pages readed for each memory read chunk")
therealdreg marked this conversation as resolved.
Show resolved Hide resolved

def print_section(self, section: Section) -> None:
title = "In "
if section.path:
Expand Down Expand Up @@ -5746,6 +5753,37 @@ def search_pattern_by_address(self, pattern: str, start_address: int, end_addres
del mem

return locations

def search_binpattern_by_address(self, binpattern: bytes, start_address: int, end_address: int) -> List[Tuple[int, int, Optional[str]]]:
"""Search a binary pattern within a range defined by arguments."""

step = self["nr_pages_chunk"] * gef.session.pagesize
locations = []

for chunk_addr in range(start_address, end_address, step):
if chunk_addr + step > end_address:
chunk_size = end_address - chunk_addr
else:
chunk_size = step

try:
mem = gef.memory.read(chunk_addr, chunk_size)
except gdb.MemoryError as e:
return []
preview_size = self["max_size_preview"]
for match in re.finditer(binpattern, mem):
start = chunk_addr + match.start()
preview = str(mem[slice(*match.span())][0:preview_size]) + "..."
size_match = match.span()[1] - match.span()[0]
if size_match > 0:
size_match -= 1
end = start + size_match

locations.append((start, end, preview))

del mem

return locations

def search_pattern(self, pattern: str, section_name: str) -> None:
"""Search a pattern within the whole userland memory."""
Expand Down Expand Up @@ -5774,6 +5812,18 @@ def do_invoke(self, argv: List[str]) -> None:
if argc < 1:
self.usage()
return

if argc > 3 and argv[0].startswith("--regex"):
pattern = ' '.join(argv[3:])
pattern = ast.literal_eval("b'" + pattern + "'")
therealdreg marked this conversation as resolved.
Show resolved Hide resolved

addr_start = parse_address(argv[1])
addr_end = parse_address(argv[2])

for loc in self.search_binpattern_by_address(pattern, addr_start, addr_end):
self.print_loc(loc)

return

pattern = argv[0]
endian = gef.arch.endianness
Expand Down
15 changes: 14 additions & 1 deletion tests/commands/search_pattern.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"""


from tests.utils import BIN_SH, GefUnitTestGeneric, gdb_run_cmd, gdb_start_silent_cmd
from tests.utils import BIN_SH, GefUnitTestGeneric, gdb_run_cmd, gdb_start_silent_cmd, gdb_start_silent_cmd_last_line


class SearchPatternCommand(GefUnitTestGeneric):
Expand All @@ -15,3 +15,16 @@ def test_cmd_search_pattern(self):
res = gdb_start_silent_cmd(f"grep {BIN_SH}")
self.assertNoException(res)
self.assertIn("0x", res)

def test_cmd_search_pattern_regex(self):
res = gdb_start_silent_cmd_last_line("set {char[6]} $sp = { 0x41, 0x42, 0x43, 0x44, 0x45, 0x00 }",
after=[r"search-pattern --regex $sp $sp+7 ([\\x20-\\x7E]{2,})(?=\\x00)",])
self.assertNoException(res)
self.assertTrue(r"b'ABCDE'" in res)
# this should not match because binary string is not null ended:
res = gdb_start_silent_cmd_last_line("set {char[6]} $sp = { 0x41, 0x42, 0x43, 0x44, 0x45, 0x03 }",
after=[r"search-pattern --regex $sp $sp+7 ([\\x20-\\x7E]{2,})(?=\\x00)",])
self.assertNoException(res)
self.assertTrue(r"b'ABCDE'" not in res)