Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for scanning APK files #3517

Merged
merged 25 commits into from
Nov 15, 2024
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
3334239
apk initial
joeleonjr Oct 28, 2024
38b636e
Merge branch 'main' into apk-scanning
joeleonjr Oct 28, 2024
5efbacd
added a couple tests
joeleonjr Oct 28, 2024
3133d4d
Update pkg/handlers/handlers.go
joeleonjr Oct 28, 2024
a8e51c1
Merge branch 'main' into apk-scanning
joeleonjr Oct 28, 2024
3b15fbb
updating log status
joeleonjr Oct 29, 2024
8d83df9
Merge branch 'main' into apk-scanning
joeleonjr Oct 29, 2024
c99a4dc
refactored + added new integration test
joeleonjr Oct 30, 2024
21d0312
updated test
joeleonjr Oct 30, 2024
adb2ff7
Merge branch 'main' into apk-scanning
joeleonjr Oct 30, 2024
a8276d0
fixed linting error
joeleonjr Oct 30, 2024
866a5ba
Merge branch 'main' into apk-scanning
joeleonjr Oct 30, 2024
971b9c9
Merge branch 'main' into apk-scanning
joeleonjr Oct 31, 2024
cf3b23c
Merge branch 'main' into apk-scanning
joeleonjr Nov 1, 2024
eac29fe
added keyword-value pairs during dex file scanning
joeleonjr Nov 1, 2024
d2aac61
fixed test
joeleonjr Nov 1, 2024
46d6239
Merge branch 'main' into apk-scanning
joeleonjr Nov 12, 2024
0091202
performance updates
joeleonjr Nov 14, 2024
640ce73
Merge branch 'main' into apk-scanning
joeleonjr Nov 14, 2024
224dab1
fixing decodeXML memory issue
joeleonjr Nov 14, 2024
b2497d4
added feature flag
joeleonjr Nov 14, 2024
454037b
refactored keyword matching + ahocorasick
joeleonjr Nov 14, 2024
68045e5
small updates
joeleonjr Nov 15, 2024
7404d53
refactored for dextk.WithReadCache()
joeleonjr Nov 15, 2024
b71039d
Merge branch 'main' into apk-scanning
joeleonjr Nov 15, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,7 @@ require (
github.com/apache/arrow/go/v14 v14.0.2 // indirect
github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2 // indirect
github.com/atotto/clipboard v0.1.4 // indirect
github.com/avast/apkparser v0.0.0-20240729092610-90591e0804ae // indirect
github.com/aws/smithy-go v1.20.1 // indirect
github.com/aymanbagabas/go-osc52/v2 v2.0.1 // indirect
github.com/aymerick/douceur v0.2.0 // indirect
Expand All @@ -179,6 +180,7 @@ require (
github.com/couchbase/goprotostellar v1.0.2 // indirect
github.com/couchbaselabs/gocbconnstr/v2 v2.0.0-20240607131231-fb385523de28 // indirect
github.com/cpuguy83/dockercfg v0.3.2 // indirect
github.com/csnewman/dextk v0.3.0 // indirect
github.com/cyphar/filepath-securejoin v0.2.4 // indirect
github.com/danieljoos/wincred v1.1.2 // indirect
github.com/davecgh/go-spew v1.1.1 // indirect
Expand Down
4 changes: 4 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,8 @@ github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2 h1:DklsrG3d
github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2/go.mod h1:WaHUgvxTVq04UNunO+XhnAqY/wQc+bxr74GqbsZ/Jqw=
github.com/atotto/clipboard v0.1.4 h1:EH0zSVneZPSuFR11BlR9YppQTVDbh5+16AmcJi4g1z4=
github.com/atotto/clipboard v0.1.4/go.mod h1:ZY9tmq7sm5xIbd9bOK4onWV4S6X0u6GY7Vn0Yu86PYI=
github.com/avast/apkparser v0.0.0-20240729092610-90591e0804ae h1:rDNramK9mnAbvUBJyIRZnzHchM45cXexHIX9pS9da4Q=
github.com/avast/apkparser v0.0.0-20240729092610-90591e0804ae/go.mod h1:GNvprXNmXaDjpHmN3RFxz5QdK5VXTUvmQludCbjoBy4=
github.com/aws/aws-sdk-go v1.55.5 h1:KKUZBfBoyqy5d3swXyiC7Q76ic40rYcbqH7qjh59kzU=
github.com/aws/aws-sdk-go v1.55.5/go.mod h1:eRwEWoyTWFMVYVQzKMNHWP5/RV4xIUGMQfXQHfHkpNU=
github.com/aws/smithy-go v1.20.1 h1:4SZlSlMr36UEqC7XOyRVb27XMeZubNcBNN+9IgEPIQw=
Expand Down Expand Up @@ -249,6 +251,8 @@ github.com/creack/pty v1.1.18 h1:n56/Zwd5o6whRC5PMGretI4IdRLlmBXYNjScPaBgsbY=
github.com/creack/pty v1.1.18/go.mod h1:MOBLtS5ELjhRRrroQr9kyvTxUAFNvYEK993ew/Vr4O4=
github.com/crewjam/rfc5424 v0.1.0 h1:MSeXJm22oKovLzWj44AHwaItjIMUMugYGkEzfa831H8=
github.com/crewjam/rfc5424 v0.1.0/go.mod h1:RCi9M3xHVOeerf6ULZzqv2xOGRO/zYaVUeRyPnBW3gQ=
github.com/csnewman/dextk v0.3.0 h1:gigNZlZRNfCuARV7depunRlafEAzGhyvgBQo1FT3/0M=
github.com/csnewman/dextk v0.3.0/go.mod h1:FcDoI3258ea0KPQogyv4iazQRGcLFNOW+I4pHBUfNO0=
github.com/cyphar/filepath-securejoin v0.2.4 h1:Ugdm7cg7i6ZK6x3xDF1oEu1nfkyfH53EtKeQYTC3kyg=
github.com/cyphar/filepath-securejoin v0.2.4/go.mod h1:aPGpWjXOXUn2NCNjFvBE6aRxGGx79pTxQpKOJNYHHl4=
github.com/danieljoos/wincred v1.1.2 h1:QLdCxFs1/Yl4zduvBdcHB8goaYk9RARS2SgLLRuAyr0=
Expand Down
363 changes: 363 additions & 0 deletions pkg/handlers/apk.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,363 @@
package handlers

import (
"archive/zip"
"bytes"
"encoding/xml"
"errors"
"fmt"
"io"
"regexp"
joeleonjr marked this conversation as resolved.
Show resolved Hide resolved
"strings"
"time"

dextk "github.com/csnewman/dextk"

"github.com/avast/apkparser"
logContext "github.com/trufflesecurity/trufflehog/v3/pkg/context"
)

// General Note: There are tools that can fully decompile an apk (e.g. jadx, apktool, etc.)
// However, none of these are in golang + they take awhile to run +
// they will decompile files that most likely don't contain secrets. So instead, we have a
// lightweight version that will search for secrets in the most common files that contain them.
// And run in a fraction of the time (ex: 15 seconds vs. 5 minutes)

// ToDo: Scan nested APKs (aka XAPK files). ATM the archive.go file will skip over them.
Copy link

@bugbaba bugbaba Oct 30, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

for .xapk files here is how MobSF a popular security scanning tool handling it.

its unzipping the archive -> reading the manifest.json file -> extracting the apk with base id and only scanning that apk.

MobSF/Mobile-Security-Framework-MobSF@a558693

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

.apkm is another common format (at least for ApkMirror).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@bugbaba I appreciate the idea re: .xapk files. IMO the cleanest way to resolve the lack of .xapk scanning is to address it in the archive.go file. Basically, unzip .xapk like any other zip, and then call back out to the HandleFile function in handlers.go, so that any unique file that requires a special handler can be dealt with. And maybe it's not that exact approach, but something along those lines.

I'll put some effort into that in a different PR.

// ToDo: Provide file location information to secret output.

var (
targetInstructionTypes = []string{"const-string", "iput-object"}
// Note: We're only looking at `const-string` and `iput-objects` for now. This might need to be expanded.
// If expanding, update precompiled REGEX below + update the formatInstruction function.
// - const-string: loads a string into a register (value)
// - iput-object: stores a string into a field (key)
reFieldPrefix = regexp.MustCompile(`iput-object obj=\d+ field=com/[a-zA-Z0-9/_]+:`)
reTypeSuffix = regexp.MustCompile(`Ljava/lang/String; src=\d+`)
reConstString = regexp.MustCompile(`const-string dst=\d+`)
// Precompiling regexes for performance
)

// apkHandler handles apk archive formats.
type apkHandler struct{ *defaultHandler }

// newapkHandler creates an apkHandler.
func newAPKHandler() *apkHandler {
return &apkHandler{defaultHandler: newDefaultHandler(apkHandlerType)}
}

// HandleFile processes apk formatted files.
func (h *apkHandler) HandleFile(ctx logContext.Context, input fileReader) (chan []byte, error) {
apkChan := make(chan []byte, defaultBufferSize)

go func() {
ctx, cancel := logContext.WithTimeout(ctx, maxTimeout)
defer cancel()
defer close(apkChan)

// Update the metrics for the file processing.
start := time.Now()
var err error
defer func() {
h.measureLatencyAndHandleErrors(start, err)
h.metrics.incFilesProcessed()
}()

// Defer a panic recovery to handle any panics that occur during the APK processing.
defer func() {
if r := recover(); r != nil {
// Return the panic as an error.
if e, ok := r.(error); ok {
err = e
} else {
err = fmt.Errorf("panic occurred: %v", r)
}
ctx.Logger().Error(err, "Panic occurred when reading apk archive")
}
}()

if err = h.processAPK(ctx, input, apkChan); err != nil {
ctx.Logger().Error(err, "error handling apk.")
}
}()
return apkChan, nil
}

// processAPK processes the apk file and sends the extracted data to the provided channel.
func (h *apkHandler) processAPK(ctx logContext.Context, input fileReader, apkChan chan []byte) error {

// Create a ZIP reader from the input fileReader
zipReader, err := createZipReader(input)
if err != nil {
return err
}

// Extract the resources.arsc file into a ResourceTable (needed for XML decoding)
resTable, err := parseResTable(zipReader)
if err != nil {
return err
}

// Process the ResourceTable file for secrets
if err := h.processResources(ctx, resTable, apkChan); err != nil {
ctx.Logger().Error(err, "failed to process resources.arsc")
}

// Process all files for secrets
for _, file := range zipReader.File {
if err := h.processFile(ctx, file, resTable, apkChan); err != nil {
ctx.Logger().V(2).Info(fmt.Sprintf("failed to process file: %s", file.Name), "error", err)
}
}
return nil
}

// processResources processes the resources.arsc file and sends the extracted data to the provided channel.
func (h *apkHandler) processResources(ctx logContext.Context, resTable *apkparser.ResourceTable, apkChan chan []byte) error {
if resTable == nil {
return errors.New("ResourceTable is nil")
}
rscStrRdr, err := extractStringsFromResTable(resTable)
if err != nil {
return fmt.Errorf("failed to parse strings from resources.arsc: %w", err)
}
return h.handleAPKFileContent(ctx, rscStrRdr, "resources.arsc", apkChan)
}

// processFile processes the file and sends the extracted data to the provided channel.
func (h *apkHandler) processFile(ctx logContext.Context, file *zip.File, resTable *apkparser.ResourceTable, apkChan chan []byte) error {
// check if the file is empty
if file.UncompressedSize64 == 0 {
return nil
}

// Read the file data
rdr, err := readFile(file)
if err != nil {
return fmt.Errorf("failed to read file %s: %w", file.Name, err)
}
defer rdr.Close()

// Decode the file based on its extension
switch {
joeleonjr marked this conversation as resolved.
Show resolved Hide resolved
case strings.HasSuffix(file.Name, ".xml"):
xmlRdr, err := decodeXML(rdr, resTable)
if err != nil {
return fmt.Errorf("failed to decode xml file %s: %w", file.Name, err)
joeleonjr marked this conversation as resolved.
Show resolved Hide resolved
}
return h.handleAPKFileContent(ctx, xmlRdr, file.Name, apkChan)
case strings.HasSuffix(file.Name, ".dex"):
dexRdr, err := processDexFile(ctx, rdr)
if err != nil {
return fmt.Errorf("failed to decode dex file %s: %w", file.Name, err)
}
return h.handleAPKFileContent(ctx, dexRdr, file.Name, apkChan)
default:
return h.handleAPKFileContent(ctx, rdr, file.Name, apkChan)
}
}

// handleAPKFileContent sends the extracted data to the provided channel via the handleNonArchiveContent function.
func (h *apkHandler) handleAPKFileContent(ctx logContext.Context, rdr io.Reader, fileName string, apkChan chan []byte) error {
mimeReader, err := newMimeTypeReader(rdr)
if err != nil {
return fmt.Errorf("failed to create mimeTypeReader for file %s: %w", fileName, err)
}
ctx = logContext.WithValues(
ctx,
"filename", fileName,
)
return h.handleNonArchiveContent(ctx, mimeReader, apkChan)
}

// createZipReader creates a new ZIP reader from the input fileReader.
func createZipReader(input fileReader) (*zip.Reader, error) {
size, err := input.Seek(0, io.SeekEnd)
joeleonjr marked this conversation as resolved.
Show resolved Hide resolved
if err != nil {
return nil, err
}
// Reset the reader position to the start
_, err = input.Seek(0, io.SeekStart)
if err != nil {
return nil, err
}
// Create a new ZIP reader for the data
zipReader, err := zip.NewReader(input, size)
if err != nil {
return nil, err
}
return zipReader, err
}

// parseResTable parses the resources.arsc file and returns the ResourceTable.
func parseResTable(zipReader *zip.Reader) (*apkparser.ResourceTable, error) {
for _, file := range zipReader.File {
if file.Name == "resources.arsc" {
rdr, err := readFile(file)
if err != nil {
return nil, err
}
defer rdr.Close()
joeleonjr marked this conversation as resolved.
Show resolved Hide resolved

resTable, err := apkparser.ParseResourceTable(rdr)
if err != nil {
return nil, err
}
return resTable, nil
}
}
return nil, errors.New("resources.arsc file not found in the APK archive")
}

// readFile reads the file from the zip archive and returns the data as an io.ReadCloser
// Note: responsibility of calling function to close the reader
func readFile(file *zip.File) (io.ReadCloser, error) {
joeleonjr marked this conversation as resolved.
Show resolved Hide resolved
rc, err := file.Open()
if err != nil {
return nil, err
}
return rc, nil
}

// hasSubstring checks if the string contains any of the provided substrings.
func hasSubstring(s string, substrings []string) bool {
for _, sub := range substrings {
if strings.Contains(s, sub) {
return true
}
}
return false
}

// extractStringsFromResTable extracts the strings from the resources table
// Note: This is a hacky way to get the strings from the resources table
// APK strings are typically (always?) stored in the 0x7f000000-0x7fffffff range
// https://chromium.googlesource.com/chromium/src/+/master/build/android/docs/life_of_a_resource.md
func extractStringsFromResTable(resTable *apkparser.ResourceTable) (io.Reader, error) {
var resourceStrings bytes.Buffer
inStrings := false
for i := 0x7f000000; i <= 0x7fffffff; i++ {
entry, _ := resTable.GetResourceEntry(uint32(i))
if entry == nil {
continue
}
if entry.ResourceType == "string" {
inStrings = true
val, err := entry.GetValue().String()
if err != nil {
return nil, err
}
// Write directly to the buffer
_, err = resourceStrings.WriteString(fmt.Sprintf("%s: %s\n", entry.Key, val))
joeleonjr marked this conversation as resolved.
Show resolved Hide resolved
if err != nil {
return nil, err
}
}
// Exit the loop if we've finished processing the strings
if inStrings && entry.ResourceType != "string" {
break
}
}
return &resourceStrings, nil
}

// processDexFile decodes the dex file and returns the relevant instructions
func processDexFile(ctx logContext.Context, rdr io.ReadCloser) (io.Reader, error) {
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be nice to have this outside of the apk specific case like if we find .dex file directly outside

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Great idea. Might be something for a separate PR, just so that this can get out the door. But I like where you're going with this. I think we should probably have several additional file handlers to handle specific types like dex, pyc, etc.

// dextk.Read() requires an io.ReaderAt interface,
// so we first convert the reader to a byte slice
data, err := io.ReadAll(rdr)
if err != nil {
return nil, err
}
bytesRdr := bytes.NewReader(data)

// Read the dex file
dexReader, err := dextk.Read(bytesRdr)
joeleonjr marked this conversation as resolved.
Show resolved Hide resolved
if err != nil {
return nil, err
}

// Get relevant instruction data from the dex file
var dexOutput bytes.Buffer
ci := dexReader.ClassIter()
for ci.HasNext() {
node, err := ci.Next()
if err != nil {
break
}
processDexClass(ctx, dexReader, node, &dexOutput)
}
return &dexOutput, nil
joeleonjr marked this conversation as resolved.
Show resolved Hide resolved
}

// processDexClass processes a single class node's methods
func processDexClass(ctx logContext.Context, dexReader *dextk.Reader, node dextk.ClassNode, dexOutput *bytes.Buffer) {
// Process Direct Methods
processDexMethod(ctx, dexReader, node.DirectMethods, dexOutput)
// Process Virtual Methods
processDexMethod(ctx, dexReader, node.VirtualMethods, dexOutput)
}

// processDexMethod iterates over a slice of methods, processes each method,
// handles errors, and writes the output to dexOutput.
func processDexMethod(ctx logContext.Context, dexReader *dextk.Reader, methods []dextk.MethodNode, dexOutput *bytes.Buffer) {
for _, method := range methods {
out, err := parseDexInstructions(dexReader, method)
if err != nil {
ctx.Logger().V(2).Info("failed to process dex method", "error", err)
continue // Continue processing other methods even if one fails
}
dexOutput.WriteString(out)
}
}

// parseDexInstructions processes a dex method and returns the string representation of the instruction
func parseDexInstructions(r *dextk.Reader, m dextk.MethodNode) (string, error) {
if m.CodeOff == 0 {
return "", nil
}

c, err := r.ReadCodeAndParse(m.CodeOff)
if err != nil {
return "", err
}

var s strings.Builder
for _, o := range c.Ops {
if hasSubstring(o.String(), targetInstructionTypes) {
s.WriteString(fmt.Sprintf("%s\n", formatInstruction(o.String())))
}
}
return s.String(), nil
}

// formatInstruction removes unnecessary information from the dex instruction
// Note: This is critical for ensuring secret + keyword are in close proximity.
// If we expand the instructions we're looking at, this function will need to be updated.
func formatInstruction(line string) string {
line = reFieldPrefix.ReplaceAllString(line, "")
line = reTypeSuffix.ReplaceAllString(line, "")
line = reConstString.ReplaceAllString(line, "")
return line
}

func decodeXML(rdr io.ReadCloser, resTable *apkparser.ResourceTable) (io.Reader, error) {
joeleonjr marked this conversation as resolved.
Show resolved Hide resolved
// Create a buffer to store the formatted XML data
var buf bytes.Buffer
enc := xml.NewEncoder(&buf)

// Parse the XML data using the apkparser library + resource table
err := apkparser.ParseXml(rdr, enc, resTable)
if err != nil {
// If the error is due to plaintext XML, return the plaintext XML stringified
if err.Error() == "xml is in plaintext, binary form expected" {
joeleonjr marked this conversation as resolved.
Show resolved Hide resolved
xmlData, readErr := io.ReadAll(rdr)
if readErr != nil {
return nil, readErr
}
return bytes.NewReader(xmlData), nil
}
return nil, err
}
return &buf, nil
}
Loading
Loading