Skip to content

Commit

Permalink
Don't set array capacity in repeat-expr (costly in invalid files)
Browse files Browse the repository at this point in the history
Cc @armijnhemel

Although the pre-allocation of the array can speed up parsing of the
repeated field by a few percent (assuming that the parsed file is
completely valid), it poses a serious issue for invalid files where some
random group of high bytes has been interpreted as the number of
entries. That will allocate a ridiculously large array (for example 2
million entries or more), which is costly and unnecessary (because the
parsing is likely to fail much sooner than after parsing all 2 million
records).

This change puts the `repeat: expr` in line with `eos` and `until`, so
it was suddenly possible to refactor the piece of code taking care of
creating the array to a common place - the `condRepeatExprHeader()`
method, which is called from all types of repeats.
  • Loading branch information
generalmimon committed Apr 15, 2022
1 parent 5773e73 commit f5fe28e
Show file tree
Hide file tree
Showing 14 changed files with 145 additions and 239 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -274,15 +274,17 @@ class CSharpCompiler(typeProvider: ClassTypeProvider, config: RuntimeConfig)

override def condIfFooter(expr: expr): Unit = fileFooter(null)

override def condRepeatEosHeader(id: Identifier, io: String, dataType: DataType, needRaw: NeedRaw): Unit = {
override def condRepeatCommonInit(id: Identifier, dataType: DataType, needRaw: NeedRaw): Unit = {
importList.add("System.Collections.Generic")

if (needRaw.level >= 1)
out.puts(s"${privateMemberName(RawIdentifier(id))} = new List<byte[]>();")
if (needRaw.level >= 2)
out.puts(s"${privateMemberName(RawIdentifier(RawIdentifier(id)))} = new List<byte[]>();")

out.puts(s"${privateMemberName(id)} = new ${kaitaiType2NativeType(ArrayTypeInStream(dataType))}();")
}

override def condRepeatEosHeader(id: Identifier, io: String, dataType: DataType): Unit = {
out.puts("{")
out.inc
out.puts("var i = 0;")
Expand All @@ -302,33 +304,18 @@ class CSharpCompiler(typeProvider: ClassTypeProvider, config: RuntimeConfig)
out.puts("}")
}

override def condRepeatExprHeader(id: Identifier, io: String, dataType: DataType, needRaw: NeedRaw, repeatExpr: expr): Unit = {
importList.add("System.Collections.Generic")

if (needRaw.level >= 1)
out.puts(s"${privateMemberName(RawIdentifier(id))} = new List<byte[]>((int) (${expression(repeatExpr)}));")
if (needRaw.level >= 2)
out.puts(s"${privateMemberName(RawIdentifier(RawIdentifier(id)))} = new List<byte[]>((int) (${expression(repeatExpr)}));")
out.puts(s"${privateMemberName(id)} = new ${kaitaiType2NativeType(ArrayTypeInStream(dataType))}((int) (${expression(repeatExpr)}));")
override def condRepeatExprHeader(id: Identifier, io: String, dataType: DataType, repeatExpr: expr): Unit = {
out.puts(s"for (var i = 0; i < ${expression(repeatExpr)}; i++)")
out.puts("{")
out.inc
}

override def handleAssignmentRepeatExpr(id: Identifier, expr: String): Unit = {
out.puts(s"${privateMemberName(id)}.Add($expr);")
}
override def handleAssignmentRepeatExpr(id: Identifier, expr: String): Unit =
handleAssignmentRepeatEos(id, expr)

override def condRepeatExprFooter: Unit = fileFooter(null)

override def condRepeatUntilHeader(id: Identifier, io: String, dataType: DataType, needRaw: NeedRaw, untilExpr: expr): Unit = {
importList.add("System.Collections.Generic")

if (needRaw.level >= 1)
out.puts(s"${privateMemberName(RawIdentifier(id))} = new List<byte[]>();")
if (needRaw.level >= 2)
out.puts(s"${privateMemberName(RawIdentifier(RawIdentifier(id)))} = new List<byte[]>();")
out.puts(s"${privateMemberName(id)} = new ${kaitaiType2NativeType(ArrayTypeInStream(dataType))}();")
override def condRepeatUntilHeader(id: Identifier, io: String, dataType: DataType, untilExpr: expr): Unit = {
out.puts("{")
out.inc
out.puts("var i = 0;")
Expand All @@ -347,7 +334,7 @@ class CSharpCompiler(typeProvider: ClassTypeProvider, config: RuntimeConfig)
out.puts(s"${privateMemberName(id)}.Add($tempVar);")
}

override def condRepeatUntilFooter(id: Identifier, io: String, dataType: DataType, needRaw: NeedRaw, untilExpr: expr): Unit = {
override def condRepeatUntilFooter(id: Identifier, io: String, dataType: DataType, untilExpr: expr): Unit = {
typeProvider._currentIteratorType = Some(dataType)
out.puts("i++;")
out.dec
Expand Down
49 changes: 10 additions & 39 deletions shared/src/main/scala/io/kaitai/struct/languages/CppCompiler.scala
Original file line number Diff line number Diff line change
Expand Up @@ -619,7 +619,7 @@ class CppCompiler(
outSrc.puts("}")
}

override def condRepeatEosHeader(id: Identifier, io: String, dataType: DataType, needRaw: NeedRaw): Unit = {
override def condRepeatCommonInit(id: Identifier, dataType: DataType, needRaw: NeedRaw): Unit = {
importListHdr.addSystem("vector")

if (needRaw.level >= 1) {
Expand All @@ -632,6 +632,9 @@ class CppCompiler(
outSrc.puts(s"${privateMemberName(RawIdentifier(RawIdentifier(id)))} = ${newVector(CalcBytesType)};")
}
outSrc.puts(s"${privateMemberName(id)} = ${newVector(dataType)};")
}

override def condRepeatEosHeader(id: Identifier, io: String, dataType: DataType): Unit = {
outSrc.puts("{")
outSrc.inc
outSrc.puts("int i = 0;")
Expand All @@ -651,54 +654,22 @@ class CppCompiler(
outSrc.puts("}")
}

override def condRepeatExprHeader(id: Identifier, io: String, dataType: DataType, needRaw: NeedRaw, repeatExpr: Ast.expr): Unit = {
importListHdr.addSystem("vector")

override def condRepeatExprHeader(id: Identifier, io: String, dataType: DataType, repeatExpr: Ast.expr): Unit = {
val lenVar = s"l_${idToStr(id)}"
outSrc.puts(s"int $lenVar = ${expression(repeatExpr)};")
if (needRaw.level >= 1) {
val rawId = privateMemberName(RawIdentifier(id))
outSrc.puts(s"$rawId = ${newVector(CalcBytesType)};")
outSrc.puts(s"$rawId->reserve($lenVar);")
if (needRaw.hasIO) {
val ioId = privateMemberName(IoStorageIdentifier(RawIdentifier(id)))
outSrc.puts(s"$ioId = ${newVector(OwnedKaitaiStreamType)};")
outSrc.puts(s"$ioId->reserve($lenVar);")
}
}
if (needRaw.level >= 2) {
val rawId = privateMemberName(RawIdentifier(RawIdentifier(id)))
outSrc.puts(s"$rawId = ${newVector(CalcBytesType)};")
outSrc.puts(s"$rawId->reserve($lenVar);")
}
outSrc.puts(s"${privateMemberName(id)} = ${newVector(dataType)};")
outSrc.puts(s"${privateMemberName(id)}->reserve($lenVar);")
outSrc.puts(s"const int $lenVar = ${expression(repeatExpr)};")
outSrc.puts(s"for (int i = 0; i < $lenVar; i++) {")
outSrc.inc
}

override def handleAssignmentRepeatExpr(id: Identifier, expr: String): Unit = {
outSrc.puts(s"${privateMemberName(id)}->push_back(${stdMoveWrap(expr)});")
}
override def handleAssignmentRepeatExpr(id: Identifier, expr: String): Unit =
handleAssignmentRepeatEos(id, expr)

override def condRepeatExprFooter: Unit = {
outSrc.dec
outSrc.puts("}")
}

override def condRepeatUntilHeader(id: Identifier, io: String, dataType: DataType, needRaw: NeedRaw, untilExpr: expr): Unit = {
importListHdr.addSystem("vector")

if (needRaw.level >= 1) {
outSrc.puts(s"${privateMemberName(RawIdentifier(id))} = ${newVector(CalcBytesType)};")
if (needRaw.hasIO) {
outSrc.puts(s"${privateMemberName(IoStorageIdentifier(RawIdentifier(id)))} = ${newVector(OwnedKaitaiStreamType)};")
}
}
if (needRaw.level >= 2) {
outSrc.puts(s"${privateMemberName(RawIdentifier(RawIdentifier(id)))} = ${newVector(CalcBytesType)};")
}
outSrc.puts(s"${privateMemberName(id)} = ${newVector(dataType)};")
override def condRepeatUntilHeader(id: Identifier, io: String, dataType: DataType, untilExpr: expr): Unit = {
outSrc.puts("{")
outSrc.inc
outSrc.puts("int i = 0;")
Expand Down Expand Up @@ -732,7 +703,7 @@ class CppCompiler(
outSrc.puts(s"${privateMemberName(id)}->push_back($wrappedTempVar);")
}

override def condRepeatUntilFooter(id: Identifier, io: String, dataType: DataType, needRaw: NeedRaw, untilExpr: expr): Unit = {
override def condRepeatUntilFooter(id: Identifier, io: String, dataType: DataType, untilExpr: expr): Unit = {
typeProvider._currentIteratorType = Some(dataType)
outSrc.puts("i++;")
outSrc.dec
Expand Down
42 changes: 18 additions & 24 deletions shared/src/main/scala/io/kaitai/struct/languages/GoCompiler.scala
Original file line number Diff line number Diff line change
Expand Up @@ -293,12 +293,12 @@ class GoCompiler(typeProvider: ClassTypeProvider, config: RuntimeConfig)
out.inc
}

override def condRepeatEosHeader(id: Identifier, io: String, dataType: DataType, needRaw: NeedRaw): Unit = {
if (needRaw.level >= 1)
out.puts(s"${privateMemberName(RawIdentifier(id))} = make([][]byte, 0);")
if (needRaw.level >= 2)
out.puts(s"${privateMemberName(RawIdentifier(RawIdentifier(id)))} = make([][]byte, 0);")
//out.puts(s"${privateMemberName(id)} = make(${kaitaiType2NativeType(ArrayType(dataType))})")
override def condRepeatCommonInit(id: Identifier, dataType: DataType, needRaw: NeedRaw): Unit = {
// slices don't have to be manually initialized in Go: the built-in append()
// function works even on `nil` slices (https://go.dev/tour/moretypes/15)
}

override def condRepeatEosHeader(id: Identifier, io: String, dataType: DataType): Unit = {
out.puts(s"for i := 1;; i++ {")
out.inc

Expand All @@ -318,27 +318,21 @@ class GoCompiler(typeProvider: ClassTypeProvider, config: RuntimeConfig)
out.puts(s"$name = append($name, $expr)")
}

override def condRepeatExprHeader(id: Identifier, io: String, dataType: DataType, needRaw: NeedRaw, repeatExpr: Ast.expr): Unit = {
if (needRaw.level >= 1)
out.puts(s"${privateMemberName(RawIdentifier(id))} = make([][]byte, ${expression(repeatExpr)})")
if (needRaw.level >= 2)
out.puts(s"${privateMemberName(RawIdentifier(RawIdentifier(id)))} = make([][]byte, ${expression(repeatExpr)})")
out.puts(s"${privateMemberName(id)} = make(${kaitaiType2NativeType(ArrayTypeInStream(dataType))}, ${expression(repeatExpr)})")
out.puts(s"for i := range ${privateMemberName(id)} {")
override def condRepeatExprHeader(id: Identifier, io: String, dataType: DataType, repeatExpr: Ast.expr): Unit = {
out.puts(s"for i := 0; i < int(${expression(repeatExpr)}); i++ {")
out.inc
// FIXME: Go throws a fatal compile error when the `i` variable is not used (unused variables
// can only use the blank identifier `_`, see https://go.dev/doc/effective_go#blank), so we have
// to silence it like this. It would be nice to be able to analyze all expressions that appear
// in the loop body to decide whether to generate `for _ := range` or `for i := range` here, but
// that would be really difficult to do properly in KSC with the current architecture.
out.puts("_ = i")
}

override def handleAssignmentRepeatExpr(id: Identifier, r: TranslatorResult): Unit = {
val name = privateMemberName(id)
val expr = translator.resToStr(r)
out.puts(s"$name[i] = $expr")
}
override def handleAssignmentRepeatExpr(id: Identifier, r: TranslatorResult): Unit =
handleAssignmentRepeatEos(id, r)

override def condRepeatUntilHeader(id: Identifier, io: String, dataType: DataType, needRaw: NeedRaw, untilExpr: Ast.expr): Unit = {
if (needRaw.level >= 1)
out.puts(s"${privateMemberName(RawIdentifier(id))} = make([][]byte, 0);")
if (needRaw.level >= 2)
out.puts(s"${privateMemberName(RawIdentifier(RawIdentifier(id)))} = make([][]byte, 0);")
override def condRepeatUntilHeader(id: Identifier, io: String, dataType: DataType, untilExpr: Ast.expr): Unit = {
out.puts(s"for i := 1;; i++ {")
out.inc
}
Expand All @@ -350,7 +344,7 @@ class GoCompiler(typeProvider: ClassTypeProvider, config: RuntimeConfig)
out.puts(s"${privateMemberName(id)} = append(${privateMemberName(id)}, $tempVar)")
}

override def condRepeatUntilFooter(id: Identifier, io: String, dataType: DataType, needRaw: NeedRaw, untilExpr: Ast.expr): Unit = {
override def condRepeatUntilFooter(id: Identifier, io: String, dataType: DataType, untilExpr: Ast.expr): Unit = {
typeProvider._currentIteratorType = Some(dataType)
out.puts(s"if ${expression(untilExpr)} {")
out.inc
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -356,12 +356,15 @@ class JavaCompiler(typeProvider: ClassTypeProvider, config: RuntimeConfig)
out.inc
}

override def condRepeatEosHeader(id: Identifier, io: String, dataType: DataType, needRaw: NeedRaw): Unit = {
override def condRepeatCommonInit(id: Identifier, dataType: DataType, needRaw: NeedRaw): Unit = {
if (needRaw.level >= 1)
out.puts(s"${privateMemberName(RawIdentifier(id))} = new ArrayList<byte[]>();")
if (needRaw.level >= 2)
out.puts(s"${privateMemberName(RawIdentifier(RawIdentifier(id)))} = new ArrayList<byte[]>();")
out.puts(s"${privateMemberName(id)} = new ${kaitaiType2JavaType(ArrayTypeInStream(dataType))}();")
}

override def condRepeatEosHeader(id: Identifier, io: String, dataType: DataType): Unit = {
out.puts("{")
out.inc
out.puts("int i = 0;")
Expand All @@ -383,28 +386,17 @@ class JavaCompiler(typeProvider: ClassTypeProvider, config: RuntimeConfig)
out.puts("}")
}

override def condRepeatExprHeader(id: Identifier, io: String, dataType: DataType, needRaw: NeedRaw, repeatExpr: expr): Unit = {
if (needRaw.level >= 1)
out.puts(s"${privateMemberName(RawIdentifier(id))} = new ArrayList<byte[]>(((Number) (${expression(repeatExpr)})).intValue());")
if (needRaw.level >= 2)
out.puts(s"${privateMemberName(RawIdentifier(RawIdentifier(id)))} = new ArrayList<byte[]>(((Number) (${expression(repeatExpr)})).intValue());")
out.puts(s"${idToStr(id)} = new ${kaitaiType2JavaType(ArrayTypeInStream(dataType))}(((Number) (${expression(repeatExpr)})).intValue());")
override def condRepeatExprHeader(id: Identifier, io: String, dataType: DataType, repeatExpr: expr): Unit = {
out.puts(s"for (int i = 0; i < ${expression(repeatExpr)}; i++) {")
out.inc

importList.add("java.util.ArrayList")
}

override def handleAssignmentRepeatExpr(id: Identifier, expr: String): Unit = {
out.puts(s"${privateMemberName(id)}.add($expr);")
}
override def handleAssignmentRepeatExpr(id: Identifier, expr: String): Unit =
handleAssignmentRepeatEos(id, expr)

override def condRepeatUntilHeader(id: Identifier, io: String, dataType: DataType, needRaw: NeedRaw, untilExpr: expr): Unit = {
if (needRaw.level >= 1)
out.puts(s"${privateMemberName(RawIdentifier(id))} = new ArrayList<byte[]>();")
if (needRaw.level >= 2)
out.puts(s"${privateMemberName(RawIdentifier(RawIdentifier(id)))} = new ArrayList<byte[]>();")
out.puts(s"${privateMemberName(id)} = new ${kaitaiType2JavaType(ArrayTypeInStream(dataType))}();")
override def condRepeatUntilHeader(id: Identifier, io: String, dataType: DataType, untilExpr: expr): Unit = {
out.puts("{")
out.inc
out.puts(s"${kaitaiType2JavaType(dataType)} ${translator.doName("_")};")
Expand All @@ -425,7 +417,7 @@ class JavaCompiler(typeProvider: ClassTypeProvider, config: RuntimeConfig)
out.puts(s"${privateMemberName(id)}.add($tempVar);")
}

override def condRepeatUntilFooter(id: Identifier, io: String, dataType: DataType, needRaw: NeedRaw, untilExpr: expr): Unit = {
override def condRepeatUntilFooter(id: Identifier, io: String, dataType: DataType, untilExpr: expr): Unit = {
typeProvider._currentIteratorType = Some(dataType)
out.puts("i++;")
out.dec
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -291,19 +291,23 @@ class JavaScriptCompiler(typeProvider: ClassTypeProvider, config: RuntimeConfig)
out.inc
}

// TODO: replace this with UniversalFooter
override def condIfFooter(expr: expr): Unit = {
out.dec
out.puts("}")
}

override def condRepeatEosHeader(id: Identifier, io: String, dataType: DataType, needRaw: NeedRaw): Unit = {
override def condRepeatCommonInit(id: Identifier, dataType: DataType, needRaw: NeedRaw): Unit = {
if (needRaw.level >= 1)
out.puts(s"${privateMemberName(RawIdentifier(id))} = [];")
if (needRaw.level >= 2)
out.puts(s"${privateMemberName(RawIdentifier(RawIdentifier(id)))} = [];")
out.puts(s"${privateMemberName(id)} = [];")
if (config.readStoresPos)
out.puts(s"this._debug.${idToStr(id)}.arr = [];")
}

override def condRepeatEosHeader(id: Identifier, io: String, dataType: DataType): Unit = {
out.puts("var i = 0;")
out.puts(s"while (!$io.isEof()) {")
out.inc
Expand All @@ -319,35 +323,20 @@ class JavaScriptCompiler(typeProvider: ClassTypeProvider, config: RuntimeConfig)
out.puts("}")
}

override def condRepeatExprHeader(id: Identifier, io: String, dataType: DataType, needRaw: NeedRaw, repeatExpr: expr): Unit = {
if (needRaw.level >= 1)
out.puts(s"${privateMemberName(RawIdentifier(id))} = new Array(${expression(repeatExpr)});")
if (needRaw.level >= 2)
out.puts(s"${privateMemberName(RawIdentifier(RawIdentifier(id)))} = new Array(${expression(repeatExpr)});")
out.puts(s"${privateMemberName(id)} = new Array(${expression(repeatExpr)});")
if (config.readStoresPos)
out.puts(s"this._debug.${idToStr(id)}.arr = new Array(${expression(repeatExpr)});")
override def condRepeatExprHeader(id: Identifier, io: String, dataType: DataType, repeatExpr: expr): Unit = {
out.puts(s"for (var i = 0; i < ${expression(repeatExpr)}; i++) {")
out.inc
}

override def handleAssignmentRepeatExpr(id: Identifier, expr: String): Unit = {
out.puts(s"${privateMemberName(id)}[i] = $expr;")
}
override def handleAssignmentRepeatExpr(id: Identifier, expr: String): Unit =
handleAssignmentRepeatEos(id, expr)

override def condRepeatExprFooter: Unit = {
out.dec
out.puts("}")
}

override def condRepeatUntilHeader(id: Identifier, io: String, dataType: DataType, needRaw: NeedRaw, untilExpr: expr): Unit = {
if (needRaw.level >= 1)
out.puts(s"${privateMemberName(RawIdentifier(id))} = []")
if (needRaw.level >= 2)
out.puts(s"${privateMemberName(RawIdentifier(RawIdentifier(id)))} = [];")
out.puts(s"${privateMemberName(id)} = []")
if (config.readStoresPos)
out.puts(s"this._debug.${idToStr(id)}.arr = [];")
override def condRepeatUntilHeader(id: Identifier, io: String, dataType: DataType, untilExpr: expr): Unit = {
out.puts("var i = 0;")
out.puts("do {")
out.inc
Expand All @@ -359,7 +348,7 @@ class JavaScriptCompiler(typeProvider: ClassTypeProvider, config: RuntimeConfig)
out.puts(s"${privateMemberName(id)}.push($tmpName);")
}

override def condRepeatUntilFooter(id: Identifier, io: String, dataType: DataType, needRaw: NeedRaw, untilExpr: expr): Unit = {
override def condRepeatUntilFooter(id: Identifier, io: String, dataType: DataType, untilExpr: expr): Unit = {
typeProvider._currentIteratorType = Some(dataType)
out.puts("i++;")
out.dec
Expand Down
Loading

0 comments on commit f5fe28e

Please sign in to comment.