Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for scanning APK files #3517

Merged
merged 25 commits into from
Nov 15, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
3334239
apk initial
joeleonjr Oct 28, 2024
38b636e
Merge branch 'main' into apk-scanning
joeleonjr Oct 28, 2024
5efbacd
added a couple tests
joeleonjr Oct 28, 2024
3133d4d
Update pkg/handlers/handlers.go
joeleonjr Oct 28, 2024
a8e51c1
Merge branch 'main' into apk-scanning
joeleonjr Oct 28, 2024
3b15fbb
updating log status
joeleonjr Oct 29, 2024
8d83df9
Merge branch 'main' into apk-scanning
joeleonjr Oct 29, 2024
c99a4dc
refactored + added new integration test
joeleonjr Oct 30, 2024
21d0312
updated test
joeleonjr Oct 30, 2024
adb2ff7
Merge branch 'main' into apk-scanning
joeleonjr Oct 30, 2024
a8276d0
fixed linting error
joeleonjr Oct 30, 2024
866a5ba
Merge branch 'main' into apk-scanning
joeleonjr Oct 30, 2024
971b9c9
Merge branch 'main' into apk-scanning
joeleonjr Oct 31, 2024
cf3b23c
Merge branch 'main' into apk-scanning
joeleonjr Nov 1, 2024
eac29fe
added keyword-value pairs during dex file scanning
joeleonjr Nov 1, 2024
d2aac61
fixed test
joeleonjr Nov 1, 2024
46d6239
Merge branch 'main' into apk-scanning
joeleonjr Nov 12, 2024
0091202
performance updates
joeleonjr Nov 14, 2024
640ce73
Merge branch 'main' into apk-scanning
joeleonjr Nov 14, 2024
224dab1
fixing decodeXML memory issue
joeleonjr Nov 14, 2024
b2497d4
added feature flag
joeleonjr Nov 14, 2024
454037b
refactored keyword matching + ahocorasick
joeleonjr Nov 14, 2024
68045e5
small updates
joeleonjr Nov 15, 2024
7404d53
refactored for dextk.WithReadCache()
joeleonjr Nov 15, 2024
b71039d
Merge branch 'main' into apk-scanning
joeleonjr Nov 15, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,7 @@ require (
github.com/apache/arrow/go/v14 v14.0.2 // indirect
github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2 // indirect
github.com/atotto/clipboard v0.1.4 // indirect
github.com/avast/apkparser v0.0.0-20240729092610-90591e0804ae // indirect
github.com/aws/smithy-go v1.20.1 // indirect
github.com/aymanbagabas/go-osc52/v2 v2.0.1 // indirect
github.com/aymerick/douceur v0.2.0 // indirect
Expand All @@ -179,6 +180,7 @@ require (
github.com/couchbase/goprotostellar v1.0.2 // indirect
github.com/couchbaselabs/gocbconnstr/v2 v2.0.0-20240607131231-fb385523de28 // indirect
github.com/cpuguy83/dockercfg v0.3.2 // indirect
github.com/csnewman/dextk v0.3.0 // indirect
github.com/cyphar/filepath-securejoin v0.2.4 // indirect
github.com/danieljoos/wincred v1.1.2 // indirect
github.com/davecgh/go-spew v1.1.1 // indirect
Expand Down
4 changes: 4 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,8 @@ github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2 h1:DklsrG3d
github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2/go.mod h1:WaHUgvxTVq04UNunO+XhnAqY/wQc+bxr74GqbsZ/Jqw=
github.com/atotto/clipboard v0.1.4 h1:EH0zSVneZPSuFR11BlR9YppQTVDbh5+16AmcJi4g1z4=
github.com/atotto/clipboard v0.1.4/go.mod h1:ZY9tmq7sm5xIbd9bOK4onWV4S6X0u6GY7Vn0Yu86PYI=
github.com/avast/apkparser v0.0.0-20240729092610-90591e0804ae h1:rDNramK9mnAbvUBJyIRZnzHchM45cXexHIX9pS9da4Q=
github.com/avast/apkparser v0.0.0-20240729092610-90591e0804ae/go.mod h1:GNvprXNmXaDjpHmN3RFxz5QdK5VXTUvmQludCbjoBy4=
github.com/aws/aws-sdk-go v1.55.5 h1:KKUZBfBoyqy5d3swXyiC7Q76ic40rYcbqH7qjh59kzU=
github.com/aws/aws-sdk-go v1.55.5/go.mod h1:eRwEWoyTWFMVYVQzKMNHWP5/RV4xIUGMQfXQHfHkpNU=
github.com/aws/smithy-go v1.20.1 h1:4SZlSlMr36UEqC7XOyRVb27XMeZubNcBNN+9IgEPIQw=
Expand Down Expand Up @@ -249,6 +251,8 @@ github.com/creack/pty v1.1.18 h1:n56/Zwd5o6whRC5PMGretI4IdRLlmBXYNjScPaBgsbY=
github.com/creack/pty v1.1.18/go.mod h1:MOBLtS5ELjhRRrroQr9kyvTxUAFNvYEK993ew/Vr4O4=
github.com/crewjam/rfc5424 v0.1.0 h1:MSeXJm22oKovLzWj44AHwaItjIMUMugYGkEzfa831H8=
github.com/crewjam/rfc5424 v0.1.0/go.mod h1:RCi9M3xHVOeerf6ULZzqv2xOGRO/zYaVUeRyPnBW3gQ=
github.com/csnewman/dextk v0.3.0 h1:gigNZlZRNfCuARV7depunRlafEAzGhyvgBQo1FT3/0M=
github.com/csnewman/dextk v0.3.0/go.mod h1:FcDoI3258ea0KPQogyv4iazQRGcLFNOW+I4pHBUfNO0=
github.com/cyphar/filepath-securejoin v0.2.4 h1:Ugdm7cg7i6ZK6x3xDF1oEu1nfkyfH53EtKeQYTC3kyg=
github.com/cyphar/filepath-securejoin v0.2.4/go.mod h1:aPGpWjXOXUn2NCNjFvBE6aRxGGx79pTxQpKOJNYHHl4=
github.com/danieljoos/wincred v1.1.2 h1:QLdCxFs1/Yl4zduvBdcHB8goaYk9RARS2SgLLRuAyr0=
Expand Down
363 changes: 363 additions & 0 deletions pkg/handlers/apk.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,363 @@
package handlers

import (
"archive/zip"
"bytes"
"encoding/xml"
"errors"
"fmt"
"io"
"log"
"regexp"
joeleonjr marked this conversation as resolved.
Show resolved Hide resolved
"strings"
"time"

dextk "github.com/csnewman/dextk"

"github.com/avast/apkparser"
logContext "github.com/trufflesecurity/trufflehog/v3/pkg/context"
)

// General Note: There are tools that can fully decompile an apk (e.g. jadx, apktool, etc.)
// However, none of these are in golang + they take awhile to run +
// they will decompile files that most likely don't contain secrets. So instead, we have a
// lightweight version that will search for secrets in the most common files that contain them.
// And run in a fraction of the time (ex: 15 seconds vs. 5 minutes)

// ToDo: Scan nested APKs (aka XAPK files). ATM the archive.go file will skip over them.
Copy link

@bugbaba bugbaba Oct 30, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

for .xapk files here is how MobSF a popular security scanning tool handling it.

its unzipping the archive -> reading the manifest.json file -> extracting the apk with base id and only scanning that apk.

MobSF/Mobile-Security-Framework-MobSF@a558693

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

.apkm is another common format (at least for ApkMirror).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@bugbaba I appreciate the idea re: .xapk files. IMO the cleanest way to resolve the lack of .xapk scanning is to address it in the archive.go file. Basically, unzip .xapk like any other zip, and then call back out to the HandleFile function in handlers.go, so that any unique file that requires a special handler can be dealt with. And maybe it's not that exact approach, but something along those lines.

I'll put some effort into that in a different PR.

// ToDo: Provide file location information to secret output.

var (
targetFileTypes = []string{".xml", ".dex", ".json"}
// Note: Only targeting xml, dex, and json files for now. This might need to be expanded.
// If expanding, ensure the processFile function is updated to handle the new file types.
targetInstructionTypes = []string{"const-string", "iput-object"}
// Note: We're only looking at `const-string` and `iput-objects` for now. This might need to be expanded.
// If expanding, ensure the formatInstruction function is updated to handle the relevant instructions.
// - const-string: loads a string into a register (value)
// - iput-object: stores a string into a field (key)
)

// apkHandler handles apk archive formats.
type apkHandler struct{ *defaultHandler }

// newapkHandler creates an apkHandler.
func newAPKHandler() *apkHandler {
return &apkHandler{defaultHandler: newDefaultHandler(apkHandlerType)}
}

// HandleFile processes apk formatted files.
func (h *apkHandler) HandleFile(ctx logContext.Context, input fileReader) (chan []byte, error) {
apkChan := make(chan []byte, defaultBufferSize)

go func() {
ctx, cancel := logContext.WithTimeout(ctx, maxTimeout)
defer cancel()
defer close(apkChan)

// Update the metrics for the file processing.
start := time.Now()
var err error
defer func() {
h.measureLatencyAndHandleErrors(start, err)
h.metrics.incFilesProcessed()
}()

// Defer a panic recovery to handle any panics that occur during the APK processing.
defer func() {
if r := recover(); r != nil {
// Return the panic as an error.
if e, ok := r.(error); ok {
err = e
} else {
err = fmt.Errorf("panic occurred: %v", r)
}
ctx.Logger().Error(err, "Panic occurred when reading apk archive")
}
}()

if err = h.ProcessAPK(ctx, input, apkChan); err != nil {
ctx.Logger().Error(err, "error handling apk.")
}
}()
return apkChan, nil
}

// processAPK processes the apk file and sends the extracted data to the provided channel.
func (h *apkHandler) ProcessAPK(ctx logContext.Context, input fileReader, apkChan chan []byte) error {

// Create a ZIP reader from the input fileReader
zipReader, err := createZipReader(input)
if err != nil {
return err
}

// Extract the resources.arsc file into a ResourceTable (needed for XML decoding)
resTable, err := parseResTable(zipReader)
if err != nil {
return err
}

// Process the ResourceTable file for secrets
if err := h.processResources(ctx, resTable, apkChan); err != nil {
ctx.Logger().Error(err, "failed to process resources.arsc")
}

// Process all xml, json and dex files for secrets
for _, file := range zipReader.File {
if hasSuffix(file.Name, targetFileTypes) {
if err := h.processFile(ctx, file, resTable, apkChan); err != nil {
ctx.Logger().Error(err, fmt.Sprintf("failed to process file: %s", file.Name))
}
}
}
return nil
}

// processResources processes the resources.arsc file and sends the extracted data to the provided channel.
func (h *apkHandler) processResources(ctx logContext.Context, resTable *apkparser.ResourceTable, apkChan chan []byte) error {
if resTable == nil {
return errors.New("ResourceTable is nil")
}
resourcesStrings, err := extractStringsFromResTable(resTable)
if err != nil {
return fmt.Errorf("failed to parse strings from resources.arsc: %w", err)
}
h.handleAPKFileContent(ctx, resourcesStrings, "resources.arsc", apkChan)
return nil
}

// processFile processes the file and sends the extracted data to the provided channel.
func (h *apkHandler) processFile(ctx logContext.Context, file *zip.File, resTable *apkparser.ResourceTable, apkChan chan []byte) error {
data, err := readFile(file)
if err != nil {
return fmt.Errorf("failed to read file %s: %w", file.Name, err)
}
if len(data) == 0 {
return nil
}

// Decode the file based on its extension
switch {
joeleonjr marked this conversation as resolved.
Show resolved Hide resolved
case strings.HasSuffix(file.Name, ".xml"):
xmlData, err := decodeXML(data, resTable)
if err != nil {
return fmt.Errorf("failed to decode xml file %s: %w", file.Name, err)
joeleonjr marked this conversation as resolved.
Show resolved Hide resolved
}
h.handleAPKFileContent(ctx, xmlData, file.Name, apkChan)
case strings.HasSuffix(file.Name, ".dex"):
dexStrings, err := decodeDexStrings(data)
if err != nil {
return fmt.Errorf("failed to decode dex file %s: %w", file.Name, err)
}
h.handleAPKFileContent(ctx, dexStrings, file.Name, apkChan)
case strings.HasSuffix(file.Name, ".json"):
h.handleAPKFileContent(ctx, string(data), file.Name, apkChan)
}
return nil
}

// handleAPKFileContent sends the extracted data to the provided channel via the handleNonArchiveContent function.
// Reviewers Note: If there's a better way to handle this, please let me know.
func (h *apkHandler) handleAPKFileContent(ctx logContext.Context, data string, fileName string, apkChan chan []byte) {
r := mimeTypeReader{mimeExt: "", Reader: bytes.NewReader([]byte(data))}

ctx = logContext.WithValues(
ctx,
"filename", fileName,
"size", len(data),
)

if err := h.handleNonArchiveContent(ctx, r, apkChan); err != nil {
ctx.Logger().Error(err, "error handling apk file")
}
}

// createZipReader creates a new ZIP reader from the input fileReader.
func createZipReader(input fileReader) (*zip.Reader, error) {
size, err := input.Seek(0, io.SeekEnd)
joeleonjr marked this conversation as resolved.
Show resolved Hide resolved
if err != nil {
return nil, err
}
// Reset the reader position to the start
_, err = input.Seek(0, io.SeekStart)
if err != nil {
return nil, err
}
// Create a new ZIP reader for the data
zipReader, err := zip.NewReader(input, size)
if err != nil {
return nil, err
}
return zipReader, err
}

// parseResTable parses the resources.arsc file and returns the ResourceTable.
func parseResTable(zipReader *zip.Reader) (*apkparser.ResourceTable, error) {
for _, file := range zipReader.File {
if file.Name == "resources.arsc" {
data, err := readFile(file)
if err != nil {
return nil, err
}
resTable, err := apkparser.ParseResourceTable(bytes.NewReader(data))
if err != nil {
return nil, err
}
return resTable, nil
}
}
return nil, errors.New("resources.arsc file not found")
}

// readFile reads the file from the zip archive and returns the data as a byte slice.
func readFile(file *zip.File) ([]byte, error) {
rc, err := file.Open()
if err != nil {
return nil, err
}
var buf bytes.Buffer
_, copyErr := io.Copy(&buf, rc)
rc.Close() // Close immediately after reading
if copyErr != nil {
return nil, copyErr
}
return buf.Bytes(), nil
}

// hasSuffix checks if the name has any of the provided suffixes.
func hasSuffix(name string, suffixes []string) bool {
for _, suffix := range suffixes {
if strings.HasSuffix(name, suffix) {
return true
}
}
return false
}

// hasSubstring checks if the string contains any of the provided substrings.
func hasSubstring(s string, substrings []string) bool {
for _, sub := range substrings {
if strings.Contains(s, sub) {
return true
}
}
return false
}

// extractStringsFromResTable extracts the strings from the resources table
// Note: This is a hacky way to get the strings from the resources table
// APK strings are typically (always?) stored in the 0x7f000000-0x7fffffff range
// https://chromium.googlesource.com/chromium/src/+/master/build/android/docs/life_of_a_resource.md
func extractStringsFromResTable(resTable *apkparser.ResourceTable) (string, error) {
var resourceStrings string
inStrings := false
for i := 0x7f000000; i <= 0x7fffffff; i++ {
entry, _ := resTable.GetResourceEntry(uint32(i))
if entry == nil {
continue
}
if entry.ResourceType == "string" {
inStrings = true
val, err := entry.GetValue().String()
if err != nil {
return "", err
}
resourceStrings += fmt.Sprintf("%s: %s\n", entry.Key, val)
}
// Exit the loop if we've finished processing the strings
if inStrings && entry.ResourceType != "string" {
break
}
}
return resourceStrings, nil
}

// decodeDexStrings decodes the dex file and returns the string representation of the instructions
func decodeDexStrings(data []byte) (string, error) {
// Read in dex file
f := bytes.NewReader(data)
r, err := dextk.Read(f)
if err != nil {
log.Panicln(err)
}

// Get strings from the dex file
var dexOutput strings.Builder
ci := r.ClassIter()
for ci.HasNext() {
node, err := ci.Next()
if err != nil {
break
}

for _, method := range node.DirectMethods {
out, err := processDexMethod(r, method)
if err != nil {
return "", err
}
dexOutput.WriteString(out)
}

for _, method := range node.VirtualMethods {
out, err := processDexMethod(r, method)
if err != nil {
return "", err
}
dexOutput.WriteString(out)
}
}
return dexOutput.String(), nil
}

// processDexMethod processes a dex method and returns the string representation of the instruction
func processDexMethod(r *dextk.Reader, m dextk.MethodNode) (string, error) {
if m.CodeOff == 0 {
return "", nil
}

c, err := r.ReadCodeAndParse(m.CodeOff)
if err != nil {
return "", err
}

var s strings.Builder
for _, o := range c.Ops {
if hasSubstring(o.String(), targetInstructionTypes) {
s.WriteString(fmt.Sprintf("%s\n", formatInstruction(o.String())))
}
}
return s.String(), nil
}

// formatInstruction removes unnecessary information from the dex instruction
// Note: This is critical for ensuring secret + keyword are in close proximity.
// If we expand the instructions we're looking at, this function will need to be updated.
func formatInstruction(line string) string {
reFieldPrefix := regexp.MustCompile(`iput-object obj=\d+ field=com/[a-zA-Z0-9/_]+:`)
reTypeSuffix := regexp.MustCompile(`Ljava/lang/String; src=\d+`)
reConstString := regexp.MustCompile(`const-string dst=\d+`)

line = reFieldPrefix.ReplaceAllString(line, "")
line = reTypeSuffix.ReplaceAllString(line, "")
line = reConstString.ReplaceAllString(line, "")
return line
}

func decodeXML(xmlData []byte, resTable *apkparser.ResourceTable) (string, error) {
// Create a buffer to store the formatted XML data
var buf bytes.Buffer
enc := xml.NewEncoder(&buf)

// Parse the XML data using the apkparser library + resource table
rdr := bytes.NewReader(xmlData)
err := apkparser.ParseXml(rdr, enc, resTable)
if err != nil {
// If the error is due to plaintext XML, return the plaintext XML stringified
if err.Error() == "xml is in plaintext, binary form expected" {
joeleonjr marked this conversation as resolved.
Show resolved Hide resolved
return string(xmlData), nil
}
return "", err
}
return buf.String(), nil
}
Loading
Loading