Skip to content

Commit

Permalink
Adding test cases for digits.
Browse files Browse the repository at this point in the history
  • Loading branch information
algogrit committed Feb 7, 2024
1 parent d60ec91 commit ac08f33
Show file tree
Hide file tree
Showing 10 changed files with 87 additions and 44 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
project-resources
data/
statements
oss/
experiments/
66 changes: 32 additions & 34 deletions cmd/process_statements/main.go
Original file line number Diff line number Diff line change
@@ -1,12 +1,7 @@
package main

import (
"fmt"

"github.com/dslipak/pdf"
)

// TODOs
// 0. Port PyMuPDF, PyPdf, PdfPlumber to Go?
// 1. Figure out which file actually contains the statement (-0 vs -1)
// Approach:
// 2. Figure out if the year matches the statement title
Expand All @@ -17,35 +12,38 @@ import (
// 4. Build intelligence through analysis of past data & news reports & any other source

func main() {
// pdf.DebugOn = true
content, err := readPdf("./statements/ASIANPAINT/2022-2023.pdf") // Read local pdf file
if err != nil {
panic(err)
}
fmt.Println(content)
return
// unipdf.
}

func readPdf(path string) (string, error) {
r, err := pdf.Open(path)
if err != nil {
return "", err
}
totalPage := r.NumPage()
// // pdf.DebugOn = true
// content, err := readPdf("./statements/ASIANPAINT/2022-2023.pdf") // Read local pdf file
// if err != nil {
// panic(err)
// }
// fmt.Println(content)
// return
// }

for pageIndex := 1; pageIndex <= totalPage; pageIndex++ {
p := r.Page(pageIndex)
if p.V.IsNull() {
continue
}
// func readPdf(path string) (string, error) {
// r, err := pdf.Open(path)
// if err != nil {
// return "", err
// }
// totalPage := r.NumPage()

rows, _ := p.GetTextByRow()
for _, row := range rows {
println(">>>> row: ", row.Position)
for _, word := range row.Content {
fmt.Println(word.S)
}
}
}
return "", nil
}
// for pageIndex := 1; pageIndex <= totalPage; pageIndex++ {
// p := r.Page(pageIndex)
// if p.V.IsNull() {
// continue
// }

// rows, _ := p.GetTextByRow()
// for _, row := range rows {
// println(">>>> row: ", row.Position)
// for _, word := range row.Content {
// fmt.Println(word.S)
// }
// }
// }
// return "", nil
// }
5 changes: 3 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ go 1.21.6
require github.com/sirupsen/logrus v1.9.3

require (
github.com/dslipak/pdf v0.0.2 // indirect
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8 // indirect
github.com/stretchr/testify v1.7.1 // indirect
golang.org/x/sys v0.15.0 // indirect
gopkg.in/yaml.v3 v3.0.0 // indirect
)
11 changes: 6 additions & 5 deletions go.sum
Original file line number Diff line number Diff line change
@@ -1,17 +1,18 @@
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/dslipak/pdf v0.0.2 h1:djAvcM5neg9Ush+zR6QXB+VMJzR6TdnX766HPIg1JmI=
github.com/dslipak/pdf v0.0.2/go.mod h1:2L3SnkI9cQwnAS9gfPz2iUoLC0rUZwbucpbKi5R1mUo=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ=
github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY=
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8 h1:0A+M6Uqn+Eje4kHMK80dtF3JCXC4ykBgQG4Fe06QRhQ=
github.com/stretchr/testify v1.7.1 h1:5TQK59W5E3v0r2duFAb7P95B6hEeOyEnHRa8MjYSMTY=
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.15.0 h1:h48lPFYpsTvQJZF4EKyI4aLHaev3CxivZmv7yZig9pc=
golang.org/x/sys v0.15.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.0 h1:hjy8E9ON/egN1tAYqKb61G10WtihqetD4sz2H+8nIeA=
gopkg.in/yaml.v3 v3.0.0/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
9 changes: 9 additions & 0 deletions notes/240206.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# Notes (6th Feb 2024)

- Extracting data from Annual Statements

- Extreme information dense
- Highly visual data with legends and notes
- Some images need to be annotated, automagicaly!

-
8 changes: 5 additions & 3 deletions pkg/mathext/digits.go
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
package mathext

func DigitCount(n int) int {
if n/10 == 0 {
return 1
count := 1
for n/10 != 0 {
count++
n = n / 10
}
return 1 + DigitCount(n/10)
return count
}
30 changes: 30 additions & 0 deletions pkg/mathext/digits_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
package mathext_test

import (
"testing"

"codermana.com/go/pkg/value_analysis/pkg/mathext"
)

func TestDigitCount(t *testing.T) {
testCases := []struct {
input int
expected int
}{
{1123512, 7},
{12, 2},
{0, 1},
{1, 1},
{100, 3},
}

for _, testCase := range testCases {
actual := mathext.DigitCount(testCase.input)

if testCase.expected != actual {
t.Log("Expected:", testCase.expected)
t.Log("Actual:", actual)
t.Fail()
}
}
}
Binary file added samples/2012-2013-0.pdf
Binary file not shown.
Binary file added samples/2012-2013-1.pdf
Binary file not shown.
Binary file added samples/2022-2023.pdf
Binary file not shown.

0 comments on commit ac08f33

Please sign in to comment.