Skip to content

Commit

Permalink
isvalid as an operation (#148)
Browse files Browse the repository at this point in the history
* Stubs for 'validity' on all layout types.

* Validity rules implemented for all array node types.

* Validity checking is done. (And it caught an error in the tests\!)
  • Loading branch information
jpivarski authored Mar 6, 2020
1 parent c953d87 commit c413274
Show file tree
Hide file tree
Showing 55 changed files with 746 additions and 391 deletions.
2 changes: 1 addition & 1 deletion VERSION_INFO
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.2.1
0.2.2
1 change: 1 addition & 0 deletions include/awkward/Content.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ namespace awkward {
virtual const std::vector<std::string> keys() const = 0;

// operations
virtual const std::string validityerror(const std::string& path) const = 0;
virtual const Index64 count64() const = 0;
virtual const std::shared_ptr<Content> count(int64_t axis) const = 0;
virtual const std::shared_ptr<Content> flatten(int64_t axis) const = 0;
Expand Down
1 change: 1 addition & 0 deletions include/awkward/array/EmptyArray.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ namespace awkward {
const std::vector<std::string> keys() const override;

// operations
const std::string validityerror(const std::string& path) const override;
const Index64 count64() const override;
const std::shared_ptr<Content> count(int64_t axis) const override;
const std::shared_ptr<Content> flatten(int64_t axis) const override;
Expand Down
1 change: 1 addition & 0 deletions include/awkward/array/IndexedArray.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ namespace awkward {
const std::vector<std::string> keys() const override;

// operations
const std::string validityerror(const std::string& path) const override;
const Index64 count64() const override;
const std::shared_ptr<Content> count(int64_t axis) const override;
const std::shared_ptr<Content> flatten(int64_t axis) const override;
Expand Down
1 change: 1 addition & 0 deletions include/awkward/array/ListArray.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ namespace awkward {
const std::vector<std::string> keys() const override;

// operations
const std::string validityerror(const std::string& path) const override;
const Index64 count64() const override;
const std::shared_ptr<Content> count(int64_t axis) const override;
const std::shared_ptr<Content> flatten(int64_t axis) const override;
Expand Down
1 change: 1 addition & 0 deletions include/awkward/array/ListOffsetArray.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ namespace awkward {
const std::vector<std::string> keys() const override;

// operations
const std::string validityerror(const std::string& path) const override;
const Index64 count64() const override;
const std::shared_ptr<Content> count(int64_t axis) const override;
const std::shared_ptr<Content> flatten(int64_t axis) const override;
Expand Down
1 change: 1 addition & 0 deletions include/awkward/array/None.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ namespace awkward {
const std::vector<std::string> keys() const override;

// operations
const std::string validityerror(const std::string& path) const override;
const Index64 count64() const override;
const std::shared_ptr<Content> count(int64_t axis) const override;
const std::shared_ptr<Content> flatten(int64_t axis) const override;
Expand Down
1 change: 1 addition & 0 deletions include/awkward/array/NumpyArray.h
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ namespace awkward {
const std::vector<std::string> keys() const override;

// operations
const std::string validityerror(const std::string& path) const override;
const Index64 count64() const override;
const std::shared_ptr<Content> count(int64_t axis) const override;
const std::shared_ptr<Content> flatten(int64_t axis) const override;
Expand Down
6 changes: 5 additions & 1 deletion include/awkward/array/RawArray.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ namespace awkward {
, length_(length)
, itemsize_(itemsize) {
if (sizeof(T) != itemsize) {
throw std::runtime_error("sizeof(T) != itemsize");
throw std::invalid_argument("sizeof(T) != itemsize");
}
}

Expand Down Expand Up @@ -417,6 +417,10 @@ namespace awkward {

// operations

const std::string validityerror(const std::string& path) const override {
return std::string();
}

const Index64 count64() const override {
throw std::invalid_argument("RawArray cannot be counted because it is one-dimentional");
}
Expand Down
1 change: 1 addition & 0 deletions include/awkward/array/Record.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ namespace awkward {
const std::vector<std::string> keys() const override;

// operations
const std::string validityerror(const std::string& path) const override;
const Index64 count64() const override;
const std::shared_ptr<Content> count(int64_t axis) const override;
const std::shared_ptr<Content> flatten(int64_t axis) const override;
Expand Down
1 change: 1 addition & 0 deletions include/awkward/array/RecordArray.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ namespace awkward {
const std::vector<std::string> keys() const override;

// operations
const std::string validityerror(const std::string& path) const override;
const Index64 count64() const override;
const std::shared_ptr<Content> count(int64_t axis) const override;
const std::shared_ptr<Content> flatten(int64_t axis) const override;
Expand Down
1 change: 1 addition & 0 deletions include/awkward/array/RegularArray.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ namespace awkward {
const std::vector<std::string> keys() const override;

// operations
const std::string validityerror(const std::string& path) const override;
const Index64 count64() const override;
const std::shared_ptr<Content> count(int64_t axis) const override;
const std::shared_ptr<Content> flatten(int64_t axis) const override;
Expand Down
1 change: 1 addition & 0 deletions include/awkward/array/UnionArray.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ namespace awkward {
const std::vector<std::string> keys() const override;

// operations
const std::string validityerror(const std::string& path) const override;
const Index64 count64() const override;
const std::shared_ptr<Content> count(int64_t axis) const override;
const std::shared_ptr<Content> flatten(int64_t axis) const override;
Expand Down
12 changes: 12 additions & 0 deletions include/awkward/cpu-kernels/operations.h
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,18 @@ extern "C" {
EXPORT_SYMBOL struct Error awkward_unionarray8_U32_simplify_one_to8_64(int8_t* totags, int64_t* toindex, const int8_t* fromtags, int64_t fromtagsoffset, const uint32_t* fromindex, int64_t fromindexoffset, int64_t towhich, int64_t fromwhich, int64_t length, int64_t base);
EXPORT_SYMBOL struct Error awkward_unionarray8_64_simplify_one_to8_64(int8_t* totags, int64_t* toindex, const int8_t* fromtags, int64_t fromtagsoffset, const int64_t* fromindex, int64_t fromindexoffset, int64_t towhich, int64_t fromwhich, int64_t length, int64_t base);

EXPORT_SYMBOL struct Error awkward_listarray32_validity(const int32_t* starts, int64_t startsoffset, const int32_t* stops, int64_t stopsoffset, int64_t length, int64_t lencontent);
EXPORT_SYMBOL struct Error awkward_listarrayU32_validity(const uint32_t* starts, int64_t startsoffset, const uint32_t* stops, int64_t stopsoffset, int64_t length, int64_t lencontent);
EXPORT_SYMBOL struct Error awkward_listarray64_validity(const int64_t* starts, int64_t startsoffset, const int64_t* stops, int64_t stopsoffset, int64_t length, int64_t lencontent);

EXPORT_SYMBOL struct Error awkward_indexedarray32_validity(const int32_t* index, int64_t indexoffset, int64_t length, int64_t lencontent, bool isoption);
EXPORT_SYMBOL struct Error awkward_indexedarrayU32_validity(const uint32_t* index, int64_t indexoffset, int64_t length, int64_t lencontent, bool isoption);
EXPORT_SYMBOL struct Error awkward_indexedarray64_validity(const int64_t* index, int64_t indexoffset, int64_t length, int64_t lencontent, bool isoption);

EXPORT_SYMBOL struct Error awkward_unionarray8_32_validity(const int8_t* tags, int64_t tagsoffset, const int32_t* index, int64_t indexoffset, int64_t length, int64_t numcontents, const int64_t* lencontents);
EXPORT_SYMBOL struct Error awkward_unionarray8_U32_validity(const int8_t* tags, int64_t tagsoffset, const uint32_t* index, int64_t indexoffset, int64_t length, int64_t numcontents, const int64_t* lencontents);
EXPORT_SYMBOL struct Error awkward_unionarray8_64_validity(const int8_t* tags, int64_t tagsoffset, const int64_t* index, int64_t indexoffset, int64_t length, int64_t numcontents, const int64_t* lencontents);

}

#endif // AWKWARDCPU_GETITEM_H_
9 changes: 9 additions & 0 deletions include/awkward/util.h
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,15 @@ namespace awkward {
template <typename T>
ERROR awkward_indexedarray_reduce_next_64(int64_t* nextcarry, int64_t* nextparents, const T* index, int64_t indexoffset, int64_t* parents, int64_t parentsoffset, int64_t length);

template <typename T>
ERROR awkward_listarray_validity(const T* starts, int64_t startsoffset, const T* stops, int64_t stopsoffset, int64_t length, int64_t lencontent);

template <typename T>
ERROR awkward_indexedarray_validity(const T* index, int64_t indexoffset, int64_t length, int64_t lencontent, bool isoption);

template <typename T, typename I>
ERROR awkward_unionarray_validity(const T* tags, int64_t tagsoffset, const I* index, int64_t indexoffset, int64_t length, int64_t numcontents, const int64_t* lencontents);

}
}

Expand Down
8 changes: 6 additions & 2 deletions src/awkward1/highlevel.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
_dir_pattern = re.compile(r"^[a-zA-Z_]\w*$")

class Array(awkward1._numpy.NDArrayOperatorsMixin, awkward1._pandas.PandasMixin, Sequence):
def __init__(self, data, behavior=None):
def __init__(self, data, behavior=None, checkvalid=False):
if isinstance(data, awkward1.layout.Content):
layout = data
elif isinstance(data, Array):
Expand All @@ -38,6 +38,8 @@ def __init__(self, data, behavior=None):

self.layout = layout
self.behavior = behavior
if checkvalid:
awkward1.operations.describe.validityerror(self, exception=True)

@property
def layout(self):
Expand Down Expand Up @@ -169,7 +171,7 @@ def numbatype(self):
return numba.typeof(self._numbaview)

class Record(awkward1._numpy.NDArrayOperatorsMixin):
def __init__(self, data, behavior=None):
def __init__(self, data, behavior=None, checkvalid=False):
# FIXME: more checks here
layout = data
if not isinstance(layout, awkward1.layout.Record):
Expand All @@ -180,6 +182,8 @@ def __init__(self, data, behavior=None):

self.layout = layout
self.behavior = behavior
if checkvalid:
awkward1.operations.describe.validityerror(self, exception=True)

@property
def layout(self):
Expand Down
27 changes: 24 additions & 3 deletions src/awkward1/operations/describe.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,6 @@ def typeof(array):
elif isinstance(array, (numpy.int8, numpy.int16, numpy.int32, numpy.int64, numpy.uint8, numpy.uint16, numpy.uint32, numpy.uint64, numpy.float32, numpy.float64)):
return awkward1.types.PrimitiveType(typeof.dtype2primitive[array.dtype.type])

elif isinstance(array, numpy.generic):
raise ValueError("cannot describe {0} as a PrimitiveType".format(type(array)))

elif isinstance(array, (awkward1.highlevel.Array, awkward1.highlevel.Record, awkward1.highlevel.ArrayBuilder)):
return array.type

Expand Down Expand Up @@ -80,4 +77,28 @@ def typeof(array):
numpy.float64: "float64",
}

def validityerror(array, exception=False):
if isinstance(array, (awkward1.highlevel.Array, awkward1.highlevel.Record)):
return validityerror(array.layout, exception=exception)

elif isinstance(array, awkward1.highlevel.ArrayBuilder):
return validityerror(array.snapshot().layout, exception=exception)

elif isinstance(array, (awkward1.layout.Content, awkward1.layout.Record)):
out = array.validityerror()
if out is not None and exception:
raise ValueError(out)
else:
return out

elif isinstance(array, awkward1.layout.ArrayBuilder):
return validityerror(array.snapshot(), exception=exception)

else:
raise TypeError("not an awkward array: {0}".format(repr(array)))

def isvalid(array, exception=False):
out = validityerror(array, exception=exception)
return out is None

__all__ = [x for x in list(globals()) if not x.startswith("_") and x not in ("numbers", "numpy", "awkward1")]
100 changes: 100 additions & 0 deletions src/cpu-kernels/operations.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -682,3 +682,103 @@ ERROR awkward_unionarray8_U32_simplify_one_to8_64(int8_t* totags, int64_t* toind
ERROR awkward_unionarray8_64_simplify_one_to8_64(int8_t* totags, int64_t* toindex, const int8_t* fromtags, int64_t fromtagsoffset, const int64_t* fromindex, int64_t fromindexoffset, int64_t towhich, int64_t fromwhich, int64_t length, int64_t base) {
return awkward_unionarray_simplify_one<int8_t, int64_t, int8_t, int64_t>(totags, toindex, fromtags, fromtagsoffset, fromindex, fromindexoffset, towhich, fromwhich, length, base);
}

template <typename C>
ERROR awkward_listarray_validity(const C* starts, int64_t startsoffset, const C* stops, int64_t stopsoffset, int64_t length, int64_t lencontent) {
for (int64_t i = 0; i < length; i++) {
C start = starts[startsoffset + i];
C stop = stops[stopsoffset + i];
if (start != stop) {
if (start > stop) {
return failure("start[i] > stop[i]", i, kSliceNone);
}
if (start < 0) {
return failure("start[i] < 0", i, kSliceNone);
}
if (stop > lencontent) {
return failure("stop[i] > len(content)", i, kSliceNone);
}
}
}
return success();
}
ERROR awkward_listarray32_validity(const int32_t* starts, int64_t startsoffset, const int32_t* stops, int64_t stopsoffset, int64_t length, int64_t lencontent) {
return awkward_listarray_validity<int32_t>(starts, startsoffset, stops, stopsoffset, length, lencontent);
}
ERROR awkward_listarrayU32_validity(const uint32_t* starts, int64_t startsoffset, const uint32_t* stops, int64_t stopsoffset, int64_t length, int64_t lencontent) {
return awkward_listarray_validity<uint32_t>(starts, startsoffset, stops, stopsoffset, length, lencontent);
}
ERROR awkward_listarray64_validity(const int64_t* starts, int64_t startsoffset, const int64_t* stops, int64_t stopsoffset, int64_t length, int64_t lencontent) {
return awkward_listarray_validity<int64_t>(starts, startsoffset, stops, stopsoffset, length, lencontent);
}

template <typename C, bool ISOPTION>
ERROR awkward_indexedarray_validity(const C* index, int64_t indexoffset, int64_t length, int64_t lencontent) {
for (int64_t i = 0; i < length; i++) {
C idx = index[indexoffset + i];
if (!ISOPTION) {
if (idx < 0) {
return failure("index[i] < 0", i, kSliceNone);
}
}
if (idx >= lencontent) {
return failure("index[i] >= len(content)", i, kSliceNone);
}
}
return success();
}
ERROR awkward_indexedarray32_validity(const int32_t* index, int64_t indexoffset, int64_t length, int64_t lencontent, bool isoption) {
if (isoption) {
return awkward_indexedarray_validity<int32_t, true>(index, indexoffset, length, lencontent);
}
else {
return awkward_indexedarray_validity<int32_t, false>(index, indexoffset, length, lencontent);
}
}
ERROR awkward_indexedarrayU32_validity(const uint32_t* index, int64_t indexoffset, int64_t length, int64_t lencontent, bool isoption) {
if (isoption) {
return awkward_indexedarray_validity<uint32_t, true>(index, indexoffset, length, lencontent);
}
else {
return awkward_indexedarray_validity<uint32_t, false>(index, indexoffset, length, lencontent);
}
}
ERROR awkward_indexedarray64_validity(const int64_t* index, int64_t indexoffset, int64_t length, int64_t lencontent, bool isoption) {
if (isoption) {
return awkward_indexedarray_validity<int64_t, true>(index, indexoffset, length, lencontent);
}
else {
return awkward_indexedarray_validity<int64_t, false>(index, indexoffset, length, lencontent);
}
}

template <typename T, typename I>
ERROR awkward_unionarray_validity(const T* tags, int64_t tagsoffset, const I* index, int64_t indexoffset, int64_t length, int64_t numcontents, const int64_t* lencontents) {
for (int64_t i = 0; i < length; i++) {
T tag = tags[tagsoffset + i];
I idx = index[indexoffset + i];
if (tag < 0) {
return failure("tags[i] < 0", i, kSliceNone);
}
if (idx < 0) {
return failure("index[i] < 0", i, kSliceNone);
}
if (tag >= numcontents) {
return failure("tags[i] >= len(contents)", i, kSliceNone);
}
int64_t lencontent = lencontents[tag];
if (idx >= lencontent) {
return failure("index[i] >= len(content[tags[i]])", i, kSliceNone);
}
}
return success();
}
ERROR awkward_unionarray8_32_validity(const int8_t* tags, int64_t tagsoffset, const int32_t* index, int64_t indexoffset, int64_t length, int64_t numcontents, const int64_t* lencontents) {
return awkward_unionarray_validity<int8_t, int32_t>(tags, tagsoffset, index, indexoffset, length, numcontents, lencontents);
}
ERROR awkward_unionarray8_U32_validity(const int8_t* tags, int64_t tagsoffset, const uint32_t* index, int64_t indexoffset, int64_t length, int64_t numcontents, const int64_t* lencontents) {
return awkward_unionarray_validity<int8_t, uint32_t>(tags, tagsoffset, index, indexoffset, length, numcontents, lencontents);
}
ERROR awkward_unionarray8_64_validity(const int8_t* tags, int64_t tagsoffset, const int64_t* index, int64_t indexoffset, int64_t length, int64_t numcontents, const int64_t* lencontents) {
return awkward_unionarray_validity<int8_t, int64_t>(tags, tagsoffset, index, indexoffset, length, numcontents, lencontents);
}
4 changes: 4 additions & 0 deletions src/libawkward/array/EmptyArray.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,10 @@ namespace awkward {
return std::vector<std::string>();
}

const std::string EmptyArray::validityerror(const std::string& path) const {
return std::string();
}

const Index64 EmptyArray::count64() const {
return Index64(0);
}
Expand Down
16 changes: 16 additions & 0 deletions src/libawkward/array/IndexedArray.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -661,6 +661,22 @@ namespace awkward {
return content_.get()->keys();
}

template <typename T, bool ISOPTION>
const std::string IndexedArrayOf<T, ISOPTION>::validityerror(const std::string& path) const {
struct Error err = util::awkward_indexedarray_validity<T>(
index_.ptr().get(),
index_.offset(),
index_.length(),
content_.get()->length(),
ISOPTION);
if (err.str == nullptr) {
return content_.get()->validityerror(path + std::string(".content"));
}
else {
return std::string("at ") + path + std::string(" (") + classname() + std::string("): ") + std::string(err.str) + std::string(" at i=") + std::to_string(err.identity);
}
}

template <typename T, bool ISOPTION>
const Index64 IndexedArrayOf<T, ISOPTION>::count64() const {
Index64 contentcount = content_.get()->count64();
Expand Down
23 changes: 22 additions & 1 deletion src/libawkward/array/ListArray.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,11 @@ namespace awkward {
: Content(identities, parameters)
, starts_(starts)
, stops_(stops)
, content_(content) { }
, content_(content) {
if (stops.length() < starts.length()) {
throw std::invalid_argument("ListArray stops must not be shorter than its starts");
}
}

template <typename T>
const IndexOf<T> ListArrayOf<T>::starts() const {
Expand Down Expand Up @@ -437,6 +441,23 @@ namespace awkward {
return content_.get()->keys();
}

template <typename T>
const std::string ListArrayOf<T>::validityerror(const std::string& path) const {
struct Error err = util::awkward_listarray_validity<T>(
starts_.ptr().get(),
starts_.offset(),
stops_.ptr().get(),
stops_.offset(),
starts_.length(),
content_.get()->length());
if (err.str == nullptr) {
return content_.get()->validityerror(path + std::string(".content"));
}
else {
return std::string("at ") + path + std::string(" (") + classname() + std::string("): ") + std::string(err.str) + std::string(" at i=") + std::to_string(err.identity);
}
}

template <typename T>
const Index64 ListArrayOf<T>::count64() const {
int64_t lenstarts = starts_.length();
Expand Down
Loading

0 comments on commit c413274

Please sign in to comment.