Skip to content

Commit

Permalink
geo: move projection data to embedded compressed file
Browse files Browse the repository at this point in the history
The geoprojbase package embeds projection info as constants, leading
to a 6MB code file. Large code files are undesirable especially from
the perspective of static analysis tools, IDEs, etc.

This change moves the projections data to an embedded json.gz file. We
define the schema of this file in a new `embeddedproj` subpackage. The
data is loaded lazily.

The data file was obtained by modifying the existing constants to fill
out an `embeddedproj.Data`:
https://github.com/RaduBerinde/cockroach/blob/geospatial-proj-data/pkg/geo/geoprojbase/embeddedproj/data_test.go

The `generate-spatial-ref-sys` command is also updated to generate
this file from the `.csv`.

The `make buildshort` binary size is decreased by ~7MB.

Fixes #63969.

Release note: None
  • Loading branch information
RaduBerinde committed Dec 21, 2021
1 parent ea6bfb2 commit 5ea5ef4
Show file tree
Hide file tree
Showing 10 changed files with 205 additions and 92,294 deletions.
2 changes: 1 addition & 1 deletion pkg/cmd/generate-spatial-ref-sys/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,10 @@ go_library(
deps = [
"//pkg/geo/geoproj",
"//pkg/geo/geoprojbase",
"//pkg/geo/geoprojbase/embeddedproj",
"//pkg/util/ctxgroup",
"//pkg/util/httputil",
"//pkg/util/syncutil",
"//pkg/util/timeutil",
],
)

Expand Down
45 changes: 0 additions & 45 deletions pkg/cmd/generate-spatial-ref-sys/generate.tmpl

This file was deleted.

158 changes: 54 additions & 104 deletions pkg/cmd/generate-spatial-ref-sys/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,11 @@
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.

// This package generated the hardcoded spatial ref sys mapping based
// on a given data set.
// This package generates the ref sys mapping based on a given data set as a
// json.gz file (with the schema defined in the embeddedproj package).
//
// Sample run:
// go run ./pkg/cmd/generate-spatial-ref-sys --src='/tmp/srids.csv' --dest='./pkg/geo/geoprojbase/projections.go' --template="./pkg/cmd/generate-spatial-ref-sys/generate.tmpl"
// go run ./pkg/cmd/generate-spatial-ref-sys --src='/tmp/srids.csv' --dest='./pkg/geo/geoprojbase/data/proj.json.gz'

package main

Expand All @@ -28,19 +28,17 @@ import (
"math"
"net/http"
"os"
"os/exec"
"sort"
"strconv"
"strings"
"text/template"
"time"

"github.com/cockroachdb/cockroach/pkg/geo/geoproj"
"github.com/cockroachdb/cockroach/pkg/geo/geoprojbase"
"github.com/cockroachdb/cockroach/pkg/geo/geoprojbase/embeddedproj"
"github.com/cockroachdb/cockroach/pkg/util/ctxgroup"
"github.com/cockroachdb/cockroach/pkg/util/httputil"
"github.com/cockroachdb/cockroach/pkg/util/syncutil"
"github.com/cockroachdb/cockroach/pkg/util/timeutil"
)

var (
Expand All @@ -51,84 +49,37 @@ var (
)
flagDEST = flag.String(
"dest",
"",
"The resulting map file.",
)
flagTemplate = flag.String(
"template",
"generate.tmpl",
"Which template file to use",
"proj.json.gz",
"The resulting .json.gz file.",
)
)

type templateVars struct {
Year int
Package string
Projections []projection
Spheroids []spheroid
}

type projectionBounds struct {
MinX string
MaxX string
MinY string
MaxY string
}

type projection struct {
SRID string
AuthName string
AuthSRID string
SRText string
Proj4Text string
Bounds *projectionBounds

IsLatLng bool
SpheroidVarName string
}

type spheroid struct {
VarName string
MajorAxis string
Flattening string
}

func main() {
flag.Parse()
tmplVars := getTemplateVars()

tmpl, err := template.ParseFiles(*flagTemplate)
if err != nil {
log.Fatal(err)
}
data := buildData()

out, err := os.Create(*flagDEST)
if err != nil {
log.Fatal(err)
}

if err := tmpl.Execute(out, tmplVars); err != nil {
if err := embeddedproj.Encode(data, out); err != nil {
log.Fatal(err)
}

if err := out.Close(); err != nil {
log.Fatal(err)
}

if err := exec.Command("crlfmt", "-w", "-tab", "2", *flagDEST).Run(); err != nil {
log.Fatal(err)
}
}

func getTemplateVars() templateVars {
func buildData() embeddedproj.Data {
type spheroidKey struct {
majorAxis float64
eccentricitySquared float64
}
foundSpheroids := make(map[spheroidKey]string)
foundSpheroids := make(map[spheroidKey]int64)
var mu syncutil.Mutex
var projections []projection
var spheroids []spheroid
var d embeddedproj.Data

g := ctxgroup.WithContext(context.Background())
records := readRecords()
Expand All @@ -151,25 +102,28 @@ func getTemplateVars() templateVars {

key := spheroidKey{s.Radius, s.Flattening}
mu.Lock()
spheroidVarName, ok := foundSpheroids[key]
spheroidHash, ok := foundSpheroids[key]
if !ok {
hash := sha256.Sum256([]byte(
shaBytes := sha256.Sum256([]byte(
strconv.FormatFloat(s.Radius, 'f', -1, 64) + "," + strconv.FormatFloat(s.Flattening, 'f', -1, 64),
))
spheroidVarName = fmt.Sprintf(`spheroid%s`, strings.ToUpper(fmt.Sprintf("%x", hash[:6])))
foundSpheroids[key] = spheroidVarName
spheroids = append(
spheroids,
spheroid{
VarName: spheroidVarName,
MajorAxis: strconv.FormatFloat(s.Radius, 'f', -1, 64),
Flattening: strconv.FormatFloat(s.Flattening, 'f', -1, 64),
spheroidHash = 0
for _, b := range shaBytes[:6] {
spheroidHash = (spheroidHash << 8) | int64(b)
}
foundSpheroids[key] = spheroidHash
d.Spheroids = append(
d.Spheroids,
embeddedproj.Spheroid{
Hash: spheroidHash,
Radius: s.Radius,
Flattening: s.Flattening,
},
)
}
mu.Unlock()

var bounds *projectionBounds
var bounds embeddedproj.Bounds
if record[1] == "EPSG" {
var results struct {
Results []struct {
Expand Down Expand Up @@ -262,27 +216,37 @@ func getTemplateVars() templateVars {
if skip {
continue
}
bounds = &projectionBounds{
MinX: strconv.FormatFloat(xCoords[0], 'f', -1, 64),
MaxX: strconv.FormatFloat(xCoords[3], 'f', -1, 64),
MinY: strconv.FormatFloat(yCoords[0], 'f', -1, 64),
MaxY: strconv.FormatFloat(yCoords[3], 'f', -1, 64),
bounds = embeddedproj.Bounds{
MinX: xCoords[0],
MaxX: xCoords[3],
MinY: yCoords[0],
MaxY: yCoords[3],
}
}

srid, err := strconv.ParseInt(record[0], 0, 64)
if err != nil {
return err
}

authSRID, err := strconv.ParseInt(record[2], 0, 64)
if err != nil {
return err
}

mu.Lock()
projections = append(
projections,
projection{
SRID: record[0],
d.Projections = append(
d.Projections,
embeddedproj.Projection{
SRID: int(srid),
AuthName: record[1],
AuthSRID: record[2],
AuthSRID: int(authSRID),
SRText: record[3],
Proj4Text: proj4text,

Bounds: bounds,
IsLatLng: isLatLng,
SpheroidVarName: spheroidVarName,
Bounds: bounds,
IsLatLng: isLatLng,
Spheroid: spheroidHash,
},
)
mu.Unlock()
Expand All @@ -294,33 +258,19 @@ func getTemplateVars() templateVars {
if err := g.Wait(); err != nil {
log.Fatal(err)
}
sort.Slice(projections, func(i, j int) bool {
l, err := strconv.ParseInt(projections[i].SRID, 0, 64)
if err != nil {
log.Fatal(err)
}
r, err := strconv.ParseInt(projections[j].SRID, 0, 64)
if err != nil {
log.Fatal(err)
}
return l < r
sort.Slice(d.Projections, func(i, j int) bool {
return d.Projections[i].SRID < d.Projections[j].SRID
})
sort.Slice(spheroids, func(i, j int) bool {
return spheroids[i].VarName < spheroids[j].VarName
sort.Slice(d.Spheroids, func(i, j int) bool {
return d.Spheroids[i].Hash < d.Spheroids[j].Hash
})
pkgName := strings.Split(*flagDEST, "/")
return templateVars{
Year: timeutil.Now().Year(),
Package: pkgName[len(pkgName)-2],
Projections: projections,
Spheroids: spheroids,
}
return d
}

func readRecords() [][]string {
in, err := os.Open(*flagSRC)
if err != nil {
log.Fatal(err)
log.Fatalf("cannot open CSV file '%s'", *flagSRC)
}
defer func() {
if err := in.Close(); err != nil {
Expand Down
2 changes: 2 additions & 0 deletions pkg/geo/geoprojbase/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,13 @@ go_library(
"geoprojbase.go",
"projections.go",
],
embedsrcs = ["data/proj.json.gz"],
importpath = "github.com/cockroachdb/cockroach/pkg/geo/geoprojbase",
visibility = ["//visibility:public"],
deps = [
"//pkg/geo/geographiclib",
"//pkg/geo/geopb",
"//pkg/geo/geoprojbase/embeddedproj",
"@com_github_cockroachdb_errors//:errors",
],
)
Expand Down
Binary file added pkg/geo/geoprojbase/data/proj.json.gz
Binary file not shown.
8 changes: 8 additions & 0 deletions pkg/geo/geoprojbase/embeddedproj/BUILD.bazel
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
load("@io_bazel_rules_go//go:def.bzl", "go_library")

go_library(
name = "embeddedproj",
srcs = ["embedded_proj.go"],
importpath = "github.com/cockroachdb/cockroach/pkg/geo/geoprojbase/embeddedproj",
visibility = ["//visibility:public"],
)
Loading

0 comments on commit 5ea5ef4

Please sign in to comment.