Skip to content

Commit

Permalink
Fix for string values (#58)
Browse files Browse the repository at this point in the history
  • Loading branch information
R-maan authored Jun 11, 2020
1 parent 88b9ce5 commit 82b4807
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 12 deletions.
5 changes: 0 additions & 5 deletions ion/integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,6 @@ var malformedIonsSkipList = []string{
"localSymbolTableWithMultipleSymbolsAndImportsFields.ion",
"localSymbolTableWithMultipleSymbolsFields.10n",
"localSymbolTableWithMultipleSymbolsFields.ion",
"longStringRawControlCharacter.ion",
"minLongWithLenTooLarge.10n",
"minLongWithLenTooSmall.10n",
"negativeIntZero.10n",
Expand All @@ -203,8 +202,6 @@ var malformedIonsSkipList = []string{
"nopPadWithAnnotations.10n",
"nullDotCommentInt.ion",
"sexpOperatorAnnotation.ion",
"stringLenTooLarge.10n",
"stringRawControlCharacter.ion",
"stringWithLatinEncoding.10n",
"structOrderedEmpty.10n",
"surrogate_1.ion",
Expand All @@ -225,7 +222,6 @@ var malformedIonsSkipList = []string{

var equivsSkipList = []string{
"annotatedIvms.ion",
"clobs.ion",
"localSymbolTableAppend.ion",
"localSymbolTableNullSlots.ion",
"localSymbolTableWithAnnotations.ion",
Expand All @@ -234,7 +230,6 @@ var equivsSkipList = []string{
"nonIVMNoOps.ion",
"sexps.ion",
"stringUtf8.ion",
"strings.ion",
"structsFieldsDiffOrder.ion",
"structsFieldsRepeatedNames.ion",
"systemSymbols.ion",
Expand Down
42 changes: 35 additions & 7 deletions ion/tokenizer.go
Original file line number Diff line number Diff line change
Expand Up @@ -542,11 +542,12 @@ func (t *tokenizer) readString() (string, error) {
if err != nil {
return "", err
}

switch c {
case -1, '\n':
// -1 denotes EOF, and new lines are not allowed in short string
if c == -1 || c == '\n' || isProhibitedControlChar(c) {
return "", t.invalidChar(c)
}

switch c {
case '"':
return ret.String(), nil

Expand Down Expand Up @@ -582,20 +583,25 @@ func (t *tokenizer) readLongString() (string, error) {
if err != nil {
return "", err
}

switch c {
case -1:
// -1 denotes EOF
if c == -1 || isProhibitedControlChar(c) {
return "", t.invalidChar(c)
}

switch c {
case '\'':
startPosition := t.pos
ok, err := t.skipEndOfLongString(t.skipCommentsHandler)
if err != nil {
return "", err
}
if ok {
return ret.String(), nil
}

if startPosition == t.pos {
// No character has been consumed. It is single '.
ret.WriteByte(byte(c))
}
case '\\':
c, err = t.peek()
if err != nil {
Expand Down Expand Up @@ -1263,3 +1269,25 @@ func (t *tokenizer) unread(c int) {
t.pos--
t.buffer = append(t.buffer, c)
}

func isProhibitedControlChar(c int) bool {
// Values between 0 to 31 are non-displayable ASCII characters; except for new line and white space characters.
if c < 0x00 || c > 0x1F {
return false
}
if isStringWhitespace(c) || isNewLineChar(c) {
return false
}
return true
}

func isStringWhitespace(c int) bool {
return c == 0x09 || //horizontal tab
c == 0x0B || //vertical tab
c == 0x0C // form feed
}

func isNewLineChar(c int) bool {
return c == 0x0A || //new line
c == 0x0D //carriage return
}

0 comments on commit 82b4807

Please sign in to comment.