-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* add extractor * file refactor * added walker log file discovery * add whitespace * WIP full refactor in progress * fixed decoding * add tests, updated README * removed unused errors, removed args * test fixes * fix typo * fix newlines * rename function * remove variable, expand err msg * edit error message * replace conditionals with require * replace conditional with require#2 * add cleanup helper, replace defer with t.cleanup * reorg args * add t.Helper() * replace t.fatal with require * fix arg order * modify random string func in testing * replace t.Fatalf with require * move global variables to smaller scopes * rename error Co-authored-by: Antonio Navarro Perez <[email protected]> * remove unused anon vars Co-authored-by: Antonio Navarro Perez <[email protected]> * edit comment Co-authored-by: Antonio Navarro Perez <[email protected]> * rename error Co-authored-by: Antonio Navarro Perez <[email protected]> * update map storage for paths Co-authored-by: Antonio Navarro Perez <[email protected]> * update map storage for paths #2 Co-authored-by: Antonio Navarro Perez <[email protected]> * add conversion check Co-authored-by: Antonio Navarro Perez <[email protected]> * add error check Co-authored-by: Antonio Navarro Perez <[email protected]> * format & handle errors * move cleanup above err check * Revert "move cleanup above err check" This reverts commit 872fd12. * swap arg order * wip tests * added more tests * modify source path * parallel test failing, save * fix broken tests --------- Co-authored-by: Antonio Navarro Perez <[email protected]>
- Loading branch information
Showing
8 changed files
with
1,123 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,3 @@ | ||
txexport.log | ||
txexport.log.gz | ||
.idea |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
.idea | ||
logs | ||
extracted | ||
.env | ||
testdir | ||
test |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
# Gno Source Code Extractor | ||
|
||
This tool is a simple parser to extract source code (packages & realms) from logs created by the [tx-archive](https://github.com/gnolang/tx-archive) tool for Gno chains. | ||
|
||
## Running the extractor | ||
|
||
The extractor takes in three arguments: | ||
- the filetype of the archive files, | ||
- output directory for the extracted packages, | ||
- the root directory where the archive files are located. | ||
|
||
``` | ||
USAGE | ||
[flags] | ||
The Gno source code extractor service | ||
FLAGS | ||
-file-type .jsonl the file type for analysis, with a preceding period (ie .log) | ||
-output-dir ./extracted the output directory for the extracted Gno source code | ||
-source-dir . the root folder containing transaction data | ||
``` | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
module extractor | ||
|
||
go 1.20 | ||
|
||
require ( | ||
github.com/gnolang/gno v0.0.0-20231006162410-fa8eb7753dc5 | ||
github.com/go-test/deep v1.1.0 | ||
github.com/peterbourgon/ff/v3 v3.4.0 | ||
github.com/stretchr/testify v1.8.4 | ||
golang.org/x/sync v0.4.0 | ||
) | ||
|
||
require ( | ||
github.com/btcsuite/btcd v0.22.0-beta.0.20220111032746-97732e52810c // indirect | ||
github.com/btcsuite/btcd/btcutil v1.0.0 // indirect | ||
github.com/cespare/xxhash v1.1.0 // indirect | ||
github.com/cespare/xxhash/v2 v2.1.1 // indirect | ||
github.com/cockroachdb/apd v1.1.0 // indirect | ||
github.com/davecgh/go-spew v1.1.1 // indirect | ||
github.com/dgraph-io/badger/v3 v3.2103.4 // indirect | ||
github.com/dgraph-io/ristretto v0.1.1 // indirect | ||
github.com/dustin/go-humanize v1.0.0 // indirect | ||
github.com/gnolang/goleveldb v0.0.9 // indirect | ||
github.com/gnolang/overflow v0.0.0-20170615021017-4d914c927216 // indirect | ||
github.com/gogo/protobuf v1.3.2 // indirect | ||
github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b // indirect | ||
github.com/golang/groupcache v0.0.0-20190702054246-869f871628b6 // indirect | ||
github.com/golang/protobuf v1.5.3 // indirect | ||
github.com/golang/snappy v0.0.4 // indirect | ||
github.com/google/flatbuffers v1.12.1 // indirect | ||
github.com/jmhodges/levigo v1.0.0 // indirect | ||
github.com/klauspost/compress v1.12.3 // indirect | ||
github.com/linxGnu/grocksdb v1.8.4 // indirect | ||
github.com/pkg/errors v0.9.1 // indirect | ||
github.com/pmezard/go-difflib v1.0.0 // indirect | ||
github.com/tecbot/gorocksdb v0.0.0-20191217155057-f0fad39f321c // indirect | ||
go.etcd.io/bbolt v1.3.7 // indirect | ||
go.opencensus.io v0.22.5 // indirect | ||
go.uber.org/atomic v1.7.0 // indirect | ||
go.uber.org/multierr v1.9.0 // indirect | ||
golang.org/x/crypto v0.13.0 // indirect | ||
golang.org/x/net v0.15.0 // indirect | ||
golang.org/x/sys v0.12.0 // indirect | ||
golang.org/x/tools v0.6.0 // indirect | ||
google.golang.org/protobuf v1.31.0 // indirect | ||
gopkg.in/yaml.v3 v3.0.1 // indirect | ||
) |
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,311 @@ | ||
package main | ||
|
||
import ( | ||
"bufio" | ||
"context" | ||
"encoding/json" | ||
"errors" | ||
"flag" | ||
"fmt" | ||
"github.com/gnolang/gno/gno.land/pkg/sdk/vm" | ||
"github.com/gnolang/gno/tm2/pkg/amino" | ||
"github.com/gnolang/gno/tm2/pkg/std" | ||
"github.com/peterbourgon/ff/v3/ffcli" | ||
"golang.org/x/sync/errgroup" | ||
"io" | ||
"os" | ||
"path/filepath" | ||
"strings" | ||
) | ||
|
||
// Define constants | ||
const ( | ||
packageMetadataFile = "pkg_metadata.json" | ||
) | ||
|
||
var ( | ||
errInvalidFileType = errors.New("no file type specified") | ||
errInvalidSourceDir = errors.New("invalid source directory") | ||
errInvalidOutputDir = errors.New("invalid output directory") | ||
errNoSourceFilesFound = errors.New("no source files found, exiting") | ||
) | ||
|
||
// Define extractor config | ||
type extractorCfg struct { | ||
fileType string | ||
sourceDir string | ||
outputDir string | ||
} | ||
|
||
func main() { | ||
var ( | ||
cfg = &extractorCfg{} | ||
fs = flag.NewFlagSet("root", flag.ExitOnError) | ||
) | ||
|
||
// Register the flags | ||
cfg.registerFlags(fs) | ||
|
||
// Create the command | ||
cmd := &ffcli.Command{ | ||
ShortUsage: "[flags]", | ||
LongHelp: "The Gno / TM2 source code extractor service", | ||
FlagSet: fs, | ||
Exec: func(ctx context.Context, _ []string) error { | ||
return execExtract(ctx, cfg) | ||
}, | ||
} | ||
|
||
// Run the command | ||
if err := cmd.ParseAndRun(context.Background(), os.Args[1:]); err != nil { | ||
fmt.Fprintf(os.Stderr, "%+v", err) | ||
|
||
os.Exit(1) | ||
} | ||
} | ||
|
||
// registerFlags registers the extractor service flag set | ||
func (c *extractorCfg) registerFlags(fs *flag.FlagSet) { | ||
fs.StringVar( | ||
&c.fileType, | ||
"file-type", | ||
".jsonl", | ||
"the file type for analysis, with a preceding period (ie .jsonl)", | ||
) | ||
|
||
fs.StringVar( | ||
&c.sourceDir, | ||
"source-dir", | ||
".", | ||
"the root folder containing transaction data", | ||
) | ||
|
||
fs.StringVar( | ||
&c.outputDir, | ||
"output-dir", | ||
"./extracted", | ||
"the output directory for the extracted Gno source code", | ||
) | ||
} | ||
|
||
// execExtract runs the extract service for Gno source code | ||
func execExtract(ctx context.Context, cfg *extractorCfg) error { | ||
// Check the file type is valid | ||
if cfg.fileType == "" { | ||
return errInvalidFileType | ||
} | ||
|
||
// Check the source dir is valid | ||
if cfg.sourceDir == "" { | ||
return errInvalidSourceDir | ||
} | ||
|
||
// Check the output dir is valid | ||
if cfg.outputDir == "" { | ||
return errInvalidOutputDir | ||
} | ||
|
||
// Find the files that need to be analyzed | ||
sourceFiles, findErr := findFilePaths(cfg.sourceDir, cfg.fileType) | ||
if findErr != nil { | ||
return fmt.Errorf("unable to find file paths, %w", findErr) | ||
} | ||
|
||
if len(sourceFiles) == 0 { | ||
return errNoSourceFilesFound | ||
} | ||
|
||
// Concurrently process the source files | ||
g, ctx := errgroup.WithContext(ctx) | ||
|
||
for _, sourceFile := range sourceFiles { | ||
sourceFile := sourceFile | ||
|
||
g.Go(func() error { | ||
// Extract messages | ||
msgs, processErr := extractAddMessages(sourceFile) | ||
if processErr != nil { | ||
return processErr | ||
} | ||
|
||
// Process messages | ||
for _, msg := range msgs { | ||
outputDir := filepath.Join(cfg.outputDir, strings.TrimLeft(msg.Package.Path, "gno.land/")) | ||
|
||
// Write dir before writing files | ||
if dirWriteErr := os.MkdirAll(outputDir, os.ModePerm); dirWriteErr != nil { | ||
return fmt.Errorf("unable to write dir, %w", dirWriteErr) | ||
} | ||
|
||
// Write the package source code | ||
if writeErr := writePackageFiles(msg, outputDir); writeErr != nil { | ||
return writeErr | ||
} | ||
|
||
// Write the package metadata | ||
if writeErr := writePackageMetadata(metadataFromMsg(msg), outputDir); writeErr != nil { | ||
return writeErr | ||
} | ||
} | ||
|
||
return nil | ||
}) | ||
} | ||
|
||
return g.Wait() | ||
} | ||
|
||
// writePackageFiles writes all files from a single package to the output directory | ||
func writePackageFiles(msg vm.MsgAddPackage, outputDir string) error { | ||
for _, file := range msg.Package.Files { | ||
// Get the output path | ||
writePath := filepath.Join(outputDir, file.Name) | ||
|
||
if writeErr := os.WriteFile(writePath, []byte(file.Body), 0644); writeErr != nil { | ||
return fmt.Errorf("unable to write file %s, %w", file.Name, writeErr) | ||
} | ||
} | ||
|
||
return nil | ||
} | ||
|
||
// writePackageMetadata writes the package metadata to the output directory | ||
func writePackageMetadata(metadata Metadata, outputDir string) error { | ||
// Get the output path | ||
writePath := filepath.Join(outputDir, packageMetadataFile) | ||
|
||
// Get the JSON metadata | ||
metadataRaw, marshalErr := json.Marshal(metadata) | ||
if marshalErr != nil { | ||
return fmt.Errorf("unable to JSON marshal metadata, %w", marshalErr) | ||
} | ||
|
||
if writeErr := os.WriteFile(writePath, metadataRaw, 0644); writeErr != nil { | ||
return fmt.Errorf("unable to write package metadata, %w", writeErr) | ||
} | ||
|
||
return nil | ||
} | ||
|
||
func extractAddMessages(filePath string) ([]vm.MsgAddPackage, error) { | ||
file, err := os.Open(filePath) | ||
if err != nil { | ||
return nil, fmt.Errorf("unable to open file, %w", err) | ||
} | ||
|
||
cleanup := func() error { | ||
if closeErr := file.Close(); closeErr != nil { | ||
return fmt.Errorf("unable to gracefully close file, %w", closeErr) | ||
} | ||
return nil | ||
} | ||
|
||
reader := bufio.NewReader(file) | ||
|
||
// Used to track what was parsed in the past | ||
touchMap := make(map[string]bool) | ||
|
||
// Msg array to be returned for further processing | ||
msgArr := make([]vm.MsgAddPackage, 0) | ||
|
||
// Buffer to handle lines longer than 64kb | ||
tempBuf := make([]byte, 0) | ||
|
||
for { | ||
var tx std.Tx | ||
line, isPrefix, err := reader.ReadLine() | ||
|
||
// Exit if no more lines in file | ||
if errors.Is(err, io.EOF) { | ||
break | ||
} | ||
if err != nil { | ||
return nil, fmt.Errorf("error reading lines; %w", err) | ||
} | ||
|
||
// If line is too long, save it in a temporary buffer and continue reading line | ||
if isPrefix { | ||
tempBuf = append(tempBuf, line...) | ||
continue | ||
} | ||
|
||
// Handle long lines | ||
if len(tempBuf) != 0 { | ||
// Append last part of line to temporary buffer | ||
tempBuf = append(tempBuf, line...) | ||
|
||
// Use line variable to pass it on to amino | ||
line = tempBuf | ||
} | ||
|
||
if err := amino.UnmarshalJSON(line, &tx); err != nil { | ||
fmt.Errorf("Error while parsing amino JSON at line: %w\nLine:%s\n", err, line) | ||
continue | ||
} | ||
|
||
// Reset tempBuf in case it was used for a long line | ||
if tempBuf != nil { | ||
tempBuf = nil | ||
} | ||
|
||
for _, msg := range tx.Msgs { | ||
// Only MsgAddPkg should be parsed | ||
if msg.Type() != "add_package" { | ||
continue | ||
} | ||
|
||
msgAddPkg, ok := msg.(vm.MsgAddPackage) | ||
if !ok { | ||
return nil, errors.New("could not cast into MsgAddPackage") | ||
} | ||
|
||
if msgAddPkg.Package == nil { | ||
return nil, errors.New("MsgAddPackage is nil") | ||
} | ||
path := msgAddPkg.Package.Path | ||
|
||
if _, parsed := touchMap[path]; parsed { | ||
// Package already parsed | ||
continue | ||
} | ||
|
||
touchMap[path] = true | ||
msgArr = append(msgArr, msgAddPkg) | ||
} | ||
} | ||
|
||
return msgArr, cleanup() | ||
} | ||
|
||
// findFilePaths gathers the file paths for specific file types | ||
func findFilePaths(startPath string, fileType string) ([]string, error) { | ||
filePaths := make([]string, 0) | ||
|
||
walkFn := func(path string, info os.FileInfo, err error) error { | ||
if err != nil { | ||
return fmt.Errorf("error accessing file: %w", err) | ||
} | ||
|
||
// Check if the file is a dir | ||
if info.IsDir() { | ||
return nil | ||
} | ||
|
||
// Check if the file type matches | ||
if !strings.HasSuffix(info.Name(), fileType) { | ||
return nil | ||
} | ||
|
||
// File is not a directory, and is of the type | ||
filePaths = append(filePaths, path) | ||
|
||
return nil | ||
} | ||
|
||
// Walk the directory root recursively | ||
if walkErr := filepath.Walk(startPath, walkFn); walkErr != nil { | ||
return nil, fmt.Errorf("unable to walk directory, %w", walkErr) | ||
} | ||
|
||
return filePaths, nil | ||
} |
Oops, something went wrong.