Skip to content

Commit

Permalink
Merge pull request #19 from redis-performance/fix.geoshape
Browse files Browse the repository at this point in the history
Fixed GEOMETRY/GEOSHAPE changes on Search Index
  • Loading branch information
filipecosta90 authored Jul 31, 2023
2 parents 1e2dc86 + c9fa212 commit 1da98bc
Show file tree
Hide file tree
Showing 7 changed files with 299 additions and 34 deletions.
8 changes: 8 additions & 0 deletions Readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,14 @@ wget -c https://github.com/redis-performance/geo-bench/releases/latest/download/
wget https://s3.us-east-2.amazonaws.com/redis.benchmarks.spec/datasets/geoshape/polygons.json.bz2
bzip2 -d polygons.json.bz2

# get 100K dataset ( around 3GB uncompressed )
wget https://s3.us-east-2.amazonaws.com/redis.benchmarks.spec/datasets/geoshape/polygons.100k.json.bz2
bzip2 -d polygons.100k.json.bz2

# get a simplified version of the 100K polygons
wget https://s3.us-east-2.amazonaws.com/redis.benchmarks.spec/datasets/geoshape/polygons.100k.simplified-threshold-0.001.json.bz2
bzip2 -d polygons.100k.simplified-threshold-0.001.json.bz2

# get tool
wget -c https://github.com/redis-performance/geo-bench/releases/latest/download/geo-bench-$(uname -mrs | awk '{ print tolower($1) }')-$(dpkg --print-architecture).tar.gz -O - | tar -xz
```
Expand Down
3 changes: 2 additions & 1 deletion cmd/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,8 @@ type GeoPoint struct {
}

type GeoShape struct {
Shape string `json:"shape"`
Shape string `json:"shape"`
NPoints int64 `json:"npoints"`
}

func lineToLonLat(line string) (float64, float64) {
Expand Down
7 changes: 4 additions & 3 deletions cmd/load.go
Original file line number Diff line number Diff line change
Expand Up @@ -264,14 +264,14 @@ func setupStageGeoPoint(uri, password, db string, indexSearch bool, indexName, f
log.Printf("Finished setup stage for %s DB\n", db)
}

func updateCLI(tick *time.Ticker, c chan os.Signal, message_limit uint64, activeConnsPtr *int64, loop bool, datapointsChan chan datapoint, start time.Time, testTime int) (bool, time.Time, time.Duration, uint64, []float64, map[int]int, float64) {
func updateCLI(tick *time.Ticker, c chan os.Signal, message_limit uint64, activeConnsPtr *int64, loop bool, datapointsChan chan datapoint, start time.Time, testTime int) (bool, time.Time, time.Duration, uint64, []float64, map[int64]int64, float64) {
var currentErr uint64 = 0
var currentCount uint64 = 0
var currentReplySize int64 = 0
prevTime := time.Now()
prevMessageCount := uint64(0)
messageRateTs := []float64{}
var histogram = make(map[int]int)
var histogram = make(map[int64]int64)
var dp datapoint
fmt.Printf("%26s %7s %25s %25s %7s %25s %25s %25s\n", "Test time", " ", "Total Commands", "Total Errors", "", "Command Rate", "p50 lat. (msec)", "Active Conns")
for {
Expand All @@ -288,7 +288,7 @@ func updateCLI(tick *time.Ticker, c chan os.Signal, message_limit uint64, active
currentErr++
}
currentCount++
histogram[int(dp.resultset_size)]++
histogram[int64(dp.resultset_size)]++
currentReplySize += dp.resultset_size
}
case <-tick.C:
Expand Down Expand Up @@ -425,6 +425,7 @@ func loadWorkerGeoshapeElastic(ec *elastic.ElasticWrapper, queue chan string, co
ctx := context.Background()
for line := range queue {
polygon := lineToPolygon(line)

previousOpsVal := atomic.LoadUint64(ops)
if previousOpsVal >= totalDatapoints {
break
Expand Down
45 changes: 26 additions & 19 deletions cmd/query.go
Original file line number Diff line number Diff line change
Expand Up @@ -157,28 +157,35 @@ var queryCmd = &cobra.Command{
fmt.Println(fmt.Sprintf("Finished sending %d queries of type %s", finishedCommands, queryType))

if replyHistogram != "" {
// sort reply sizes to be able to produce an ordered csv
var replySizes []int
for k := range histogramReplySize {
replySizes = append(replySizes, k)
}
sort.Sort(sort.Reverse(sort.IntSlice(replySizes)))
saveHistogram(replyHistogram, histogramReplySize)
}
},
}

fmt.Println(fmt.Sprintf("Storing csv file with reply size histogram in %s", replyHistogram))
csvFile, err := os.Create(replyHistogram)
func saveHistogram(replyHistogram string, histogramReplySize map[int64]int64) {
// sort reply sizes to be able to produce an ordered csv
var replySizes []int
for k := range histogramReplySize {
replySizes = append(replySizes, int(k))
}
sort.Sort(sort.Reverse(sort.IntSlice(replySizes)))

if err != nil {
log.Fatalf("Failed creating csv file: %s", err)
}
csvwriter := csv.NewWriter(csvFile)
_ = csvwriter.Write([]string{"reply_size", "count"})
for replyCount := range replySizes {
_ = csvwriter.Write([]string{fmt.Sprintf("%d", replyCount), fmt.Sprintf("%d", histogramReplySize[replyCount])})
}
csvwriter.Flush()
csvFile.Close()
fmt.Println(fmt.Sprintf("Storing csv file with reply size histogram in %s", replyHistogram))
csvFile, err := os.Create(replyHistogram)

if err != nil {
log.Fatalf("Failed creating csv file: %s", err)
}
csvwriter := csv.NewWriter(csvFile)
_ = csvwriter.Write([]string{"reply_size", "count"})
for value := range replySizes {
ocurrences := histogramReplySize[int64(value)]
if ocurrences > 0 {
_ = csvwriter.Write([]string{fmt.Sprintf("%d ", value), fmt.Sprintf(" %d", ocurrences)})
}
},
}
csvwriter.Flush()
csvFile.Close()
}

func init() {
Expand Down
178 changes: 178 additions & 0 deletions cmd/simplify.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
/*
Copyright © 2023 NAME HERE <EMAIL ADDRESS>
*/
package cmd

import (
"bufio"
"encoding/json"
"fmt"
"github.com/paulmach/orb"
"github.com/paulmach/orb/encoding/wkt"
"github.com/paulmach/orb/simplify"
"github.com/schollz/progressbar/v3"
"github.com/spf13/cobra"
"log"
"os"
"strings"
)

// simplifyCmd represents the simplify command
var simplifyCmd = &cobra.Command{
Use: "simplify",
Short: "A brief description of your command",
Long: `A longer description that spans multiple lines and likely contains examples
and usage of using your command. For example:
Cobra is a CLI library for Go that empowers applications.
This application is a tool to generate the needed files
to quickly create a Cobra application.`,
Run: func(cmd *cobra.Command, args []string) {
pflags := cmd.Flags()
input, _ := pflags.GetString("input")
output, _ := pflags.GetString("output")
inputType, _ := pflags.GetString("input-type")
histogramOriginalFilename, _ := pflags.GetString("input-histogram-csv")
histogramFinalFilename, _ := pflags.GetString("output-histogram-csv")
padBound, _ := pflags.GetFloat64("pad-bound")
outerRingOnly, _ := pflags.GetBool("outer-ring-only")
useBoundsOnly, _ := pflags.GetBool("use-bounds-only")
douglasPeuckerThreshold, _ := pflags.GetFloat64("douglas-peucker-threshold")
if strings.Compare(inputType, INPUT_TYPE_GEOSHAPE) != 0 {
log.Printf("Only %s type can be simplified. Exiting...", INPUT_TYPE_GEOSHAPE)
}
if !useBoundsOnly && padBound > 0.0 {
log.Printf("You've speficied a pad bound of %f but this is only applied when --%s", padBound, "use-bounds-only")
}

requests, _ := pflags.GetInt("requests")
file, err := os.Open(input)
nLines, err := LineCounter(file)
if err != nil {
log.Fatal(err)
}
file.Close()
nDatapoints := requests
if nLines < nDatapoints || nDatapoints < 0 {
nDatapoints = nLines
}
if outerRingOnly {
log.Printf("Will preserve only the outer rings of the polygons given --%s was specified", "outer-ring-only")
}
log.Printf("There are a total of %d datapoints in %s", nLines, input)

file, err = os.Open(input)
if err != nil {
log.Fatal(err)
}
// Close when the functin returns
defer file.Close()

ofile, err := os.OpenFile(output, os.O_CREATE|os.O_WRONLY, 0644)
if err != nil {
log.Fatal(err)
}
// Close when the functin returns
defer ofile.Close()

scanner := bufio.NewScanner(file)
buf := make([]byte, 512*1024*1024)
scanner.Buffer(buf, 512*1024*1024)
var histogramOriginal = make(map[int64]int64)
var histogramFinal = make(map[int64]int64)
var originalPointCount int64 = 0
var finalPointCount int64 = 0

bar := progressbar.Default(int64(nDatapoints))
n := 0
for scanner.Scan() {
finalInputLine := scanner.Text()
polygon := lineToPolygon(finalInputLine)
original, err := wkt.UnmarshalPolygon(polygon)
totalPointsOriginal := getNPoints(original)
if err != nil {
fmt.Printf("can't unmarshal: %v. Input was %s\n", err, polygon)
continue
}
var final orb.Polygon
final = original
if outerRingOnly {
final = getOuterRing(original)
}
if douglasPeuckerThreshold > 0.0 {
final = simplify.DouglasPeucker(douglasPeuckerThreshold).Simplify(final.Clone()).(orb.Polygon)
}
totalPointsFinal := getNPoints(final)
if useBoundsOnly {
final = final.Bound().Pad(padBound).ToPolygon()
totalPointsFinal = getNPoints(final)
}
finalOutputPolygon := wkt.Marshal(final)

finalShape := GeoShape{
Shape: string(finalOutputPolygon),
NPoints: totalPointsFinal,
}
finalBytes, err := json.Marshal(finalShape)
if err != nil {
fmt.Printf("can't marshal: %v. Input was %v\n", err, finalShape)
continue
}
ofile.Write(append(finalBytes, '\n'))
originalPointCount += totalPointsOriginal
finalPointCount += totalPointsFinal
histogramOriginal[totalPointsOriginal]++
histogramFinal[totalPointsFinal]++
bar.Add(1)
n = n + 1
if n >= nDatapoints {
break
}
}
log.Printf("Avg input polygon size %.0f", float64(originalPointCount)/(float64(n)))
log.Printf("Avg output polygon size %.0f", float64(finalPointCount)/(float64(n)))

if histogramOriginalFilename != "" {
saveHistogram(histogramOriginalFilename, histogramOriginal)
}
if histogramFinalFilename != "" {
saveHistogram(histogramFinalFilename, histogramFinal)
}
},
}

func getOuterRing(original orb.Polygon) (outer orb.Polygon) {
// Polygon is a closed area. The first LineString is the outer ring.
// The others are the holes. Each LineString is expected to be closed
// ie. the first point matches the last.
outer = make([]orb.Ring, 1, 1)
outer[0] = original[0]
return
}
func getNPoints(original orb.Polygon) (totalPointsOriginal int64) {
// Polygon is a closed area. The first LineString is the outer ring.
// The others are the holes. Each LineString is expected to be closed
// ie. the first point matches the last.
totalPointsOriginal = 0
for _, ring := range original {
for _, linestring := range ring {
totalPointsOriginal += int64(len(linestring))
}
}
return
}

func init() {
rootCmd.AddCommand(simplifyCmd)
flags := simplifyCmd.Flags()
flags.StringP("input", "i", "polygons.json", "Input json file")
flags.StringP("output", "o", "polygons.simplified.json", "Output json file")
flags.StringP("input-type", "", INPUT_TYPE_GEOSHAPE, "Input type. One of 'geopoint' or 'geoshape'")
flags.IntP("requests", "n", -1, "Requests. If -1 then it will use all input datapoints")
flags.Float64P("pad-bound", "", 0.0, "extends the bound in all directions by the given value. Only used when --use-bounds-only=true")
flags.BoolP("outer-ring-only", "", false, "Polygon is a closed area. The first LineString is the outer ring. The others are the holes. When enabled this preserves only the outer ring on the output file.")
flags.BoolP("use-bounds-only", "", false, "When enabled this preserves only bound around the input polygon on the output file.")
flags.Float64P("douglas-peucker-threshold", "", 0.0, " distance (in units of input coordinates) of a vertex to other segments to be removed.")
flags.StringP("input-histogram-csv", "", "input-npoints-histogram.csv", "Store input polygon's number of points histogram into a csv file. If empty wont store it.")
flags.StringP("output-histogram-csv", "", "output-npoints-histogram.csv", "Store output polygon's number of points histogram into a csv file. If empty wont store it.")
}
9 changes: 8 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,22 @@ require (
github.com/HdrHistogram/hdrhistogram-go v1.1.2
github.com/cenkalti/backoff/v4 v4.1.3
github.com/elastic/go-elasticsearch/v8 v8.7.1
github.com/paulmach/orb v0.9.2
github.com/redis/rueidis v0.0.89
github.com/schollz/progressbar/v3 v3.13.1
github.com/spf13/cobra v1.6.1
github.com/spf13/pflag v1.0.5
)

require (
github.com/elastic/elastic-transport-go/v8 v8.2.0 // indirect
github.com/inconshreveable/mousetrap v1.0.1 // indirect
github.com/mattn/go-runewidth v0.0.14 // indirect
github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db // indirect
github.com/rivo/uniseg v0.4.4 // indirect
github.com/stretchr/testify v1.8.1 // indirect
golang.org/x/sys v0.8.0 // indirect
golang.org/x/term v0.8.0 // indirect
)

replace github.com/redis/rueidis => github.com/filipecosta90/rueidis v0.0.0-20230514183601-25fb0c71c8a5
replace github.com/redis/rueidis => github.com/filipecosta90/rueidis v0.0.0-20230731125225-704e76e542e7
Loading

0 comments on commit 1da98bc

Please sign in to comment.