Skip to content

Commit

Permalink
materialize implementation and and optimization
Browse files Browse the repository at this point in the history
  • Loading branch information
barakmich committed Aug 6, 2014
1 parent f441fc4 commit 09244dd
Show file tree
Hide file tree
Showing 3 changed files with 267 additions and 0 deletions.
15 changes: 15 additions & 0 deletions graph/iterator.go
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,19 @@ func Next(it Iterator) (Value, bool) {
return nil, false
}

// Height is a convienence function to measure the height of an iterator tree.
func Height(it Iterator) int {
subs := it.SubIterators()
maxDepth := 0
for _, sub := range subs {
h := Height(sub)
if h > maxDepth {
maxDepth = h
}
}
return maxDepth + 1
}

// FixedIterator wraps iterators that are modifiable by addition of fixed value sets.
type FixedIterator interface {
Iterator
Expand Down Expand Up @@ -180,6 +193,7 @@ const (
Fixed
Not
Optional
Materialize
)

var (
Expand All @@ -200,6 +214,7 @@ var (
"fixed",
"not",
"optional",
"materialize",
}
)

Expand Down
17 changes: 17 additions & 0 deletions graph/iterator/and_iterator_optimize.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,8 @@ func (it *And) Optimize() (graph.Iterator, bool) {
// now a permutation of itself, but the contents are unchanged.
its = optimizeOrder(its)

its = materializeIts(its)

// Okay! At this point we have an optimized order.

// The easiest thing to do at this point is merely to create a new And iterator
Expand Down Expand Up @@ -293,6 +295,21 @@ func hasOneUsefulIterator(its []graph.Iterator) graph.Iterator {
return nil
}

func materializeIts(its []graph.Iterator) []graph.Iterator {
var out []graph.Iterator
for _, it := range its {
stats := it.Stats()
if stats.Size*stats.NextCost < stats.ContainsCost {
if graph.Height(it) > 10 {
out = append(out, NewMaterialize(it))
continue
}
}
out = append(out, it)
}
return out
}

// and.Stats() lives here in and-iterator-optimize.go because it may
// in the future return different statistics based on how it is optimized.
// For now, however, it's pretty static.
Expand Down
235 changes: 235 additions & 0 deletions graph/iterator/materialize_iterator.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,235 @@
// Copyright 2014 The Cayley Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package iterator

// A simple iterator that, when first called Contains() or Next() upon, materializes the whole subiterator, stores it locally, and responds. Essentially a cache.

import (
"fmt"
"strings"

"github.com/google/cayley/graph"
)

var abortMaterializeAt = 1000

type result struct {
id graph.Value
tags map[string]graph.Value
}

type Materialize struct {
uid uint64
tags graph.Tagger
containsMap map[graph.Value]int
values []result
lastIndex int
subIt graph.Iterator
hasRun bool
aborted bool
}

func NewMaterialize(sub graph.Iterator) *Materialize {
return &Materialize{
uid: NextUID(),
containsMap: make(map[graph.Value]int),
subIt: sub,
}
}

func (it *Materialize) UID() uint64 {
return it.uid
}

func (it *Materialize) Reset() {
it.subIt.Reset()
it.lastIndex = 0
}

func (it *Materialize) Close() {
it.subIt.Close()
it.containsMap = nil
it.values = nil
it.hasRun = false
}

func (it *Materialize) Tagger() *graph.Tagger {
return &it.tags
}

func (it *Materialize) TagResults(dst map[string]graph.Value) {
if !it.hasRun {
return
}
for _, tag := range it.tags.Tags() {
dst[tag] = it.Result()
}

for tag, value := range it.values[it.lastIndex].tags {
dst[tag] = value
}
}

func (it *Materialize) Clone() graph.Iterator {
out := NewMaterialize(it.subIt.Clone())
out.tags.CopyFrom(it)
return out
}

// Print some information about the iterator.
func (it *Materialize) DebugString(indent int) string {
return fmt.Sprintf("%s(%s tags: %s Size: %d\n%s)",
strings.Repeat(" ", indent),
it.Type(),
it.tags.Tags(),
len(it.values),
it.subIt.DebugString(indent+4),
)
}

// Register this iterator as a Materialize iterator.
func (it *Materialize) Type() graph.Type { return graph.Materialize }

// DEPRECATED
func (it *Materialize) ResultTree() *graph.ResultTree {
tree := graph.NewResultTree(it.Result())
tree.AddSubtree(it.subIt.ResultTree())
return tree
}

func (it *Materialize) Result() graph.Value {
if it.lastIndex+1 > len(it.values) {
return nil
}
return it.values[it.lastIndex].id
}

func (it *Materialize) SubIterators() []graph.Iterator {
return []graph.Iterator{it.subIt}
}

func (it *Materialize) Optimize() (graph.Iterator, bool) {
newSub, changed := it.subIt.Optimize()
if changed {
it.subIt = newSub
if it.subIt.Type() == graph.Null {
return it.subIt, true
}
}
return it, false
}

// Size is the number of values stored, if we've got them all.
// Otherwise, guess based on the size of the subiterator.
func (it *Materialize) Size() (int64, bool) {
if it.hasRun {
return int64(len(it.values)), true
}
return it.subIt.Size()
}

// The entire point of Materialize is to amortize the cost by
// putting it all up front.
func (it *Materialize) Stats() graph.IteratorStats {
overhead := int64(2)
size, _ := it.Size()
subitStats := it.subIt.Stats()
return graph.IteratorStats{
ContainsCost: overhead * subitStats.NextCost,
NextCost: overhead * subitStats.NextCost,
Size: size,
}
}

func (it *Materialize) Next() (graph.Value, bool) {
if !it.hasRun {
it.materializeSet()
}
if it.aborted {
return graph.Next(it.subIt)
}

lastVal := it.Result()
for it.lastIndex < len(it.values) {
it.lastIndex++
if it.Result() != lastVal {
return it.Result(), true
}
}
return nil, false
}

func (it *Materialize) Contains(v graph.Value) bool {
if !it.hasRun {
it.materializeSet()
}
if it.aborted {
return it.subIt.Contains(v)
}
if i, ok := it.containsMap[v]; ok {
it.lastIndex = i
return true
}
return false
}

func (it *Materialize) NextResult() bool {
if !it.hasRun {
it.materializeSet()
}
if it.aborted {
return it.subIt.NextResult()
}

i := it.lastIndex + 1
if i == len(it.values) {
return false
}
if it.Result() == it.values[i].id {
it.lastIndex = i
return true
}
return false
}

func (it *Materialize) materializeSet() {
i := 0
for {
val, ok := graph.Next(it.subIt)
if !ok {
break
}
i += 1
if i > abortMaterializeAt {
it.aborted = true
break
}
tags := make(map[string]graph.Value)
it.subIt.TagResults(tags)
it.containsMap[val] = len(it.values)
it.values = append(it.values, result{id: val, tags: tags})
for it.subIt.NextResult() == true {
tags := make(map[string]graph.Value)
it.subIt.TagResults(tags)
it.values = append(it.values, result{id: val, tags: tags})
}
}
if it.aborted {
it.values = nil
it.containsMap = nil
it.subIt.Reset()
}
it.hasRun = true
}

0 comments on commit 09244dd

Please sign in to comment.