Skip to content

Commit

Permalink
Add locale for correctly parsing hexadecimal floating point number
Browse files Browse the repository at this point in the history
  • Loading branch information
rupertnash committed Sep 11, 2023
1 parent 43b3288 commit cc7713a
Show file tree
Hide file tree
Showing 8 changed files with 456 additions and 0 deletions.
3 changes: 3 additions & 0 deletions Code/confcheck.cc
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#include <string>

#include "configuration/SimConfig.h"
#include "io/ensure_hexfloat.h"
#include "net/MpiEnvironment.h"

using SimConfig = hemelb::configuration::SimConfig;
Expand All @@ -24,6 +25,8 @@ int main(int argc, char *argv[])

const auto xml_path = std::string{argv[1]};

hemelb::io::GlobalHexFloatLocale ensure_hexfloat;

// When #755 is closed, remove MPI
hemelb::net::MpiEnvironment mpi(argc, argv);
try {
Expand Down
1 change: 1 addition & 0 deletions Code/io/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ add_library(hemelb_io OBJECT
xml.cc
Checkpointer.cc
TimePattern.cc
hexfloat.cc
)

target_link_libraries(hemelb_io PUBLIC
Expand Down
27 changes: 27 additions & 0 deletions Code/io/ensure_hexfloat.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
// This file is part of HemeLB and is Copyright (C)
// the HemeLB team and/or their institutions, as detailed in the
// file AUTHORS. This software is provided under the terms of the
// license in the file LICENSE.

#ifndef HEMELB_IO_ENSURE_HEXFLOAT_H
#define HEMELB_IO_ENSURE_HEXFLOAT_H

#include <memory>

namespace hemelb::io {

// If the current global locale doesn't support reading hexfloats,
// add our one.
//
// Destructor restores the original.
class GlobalHexFloatLocale {
struct Impl;
std::unique_ptr<Impl> impl;
public:
static bool CurrentCanParseHexFloats();
GlobalHexFloatLocale();
~GlobalHexFloatLocale();
};
}

#endif
204 changes: 204 additions & 0 deletions Code/io/hexfloat.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,204 @@
// This file is part of HemeLB and is Copyright (C)
// the HemeLB team and/or their institutions, as detailed in the
// file AUTHORS. This software is provided under the terms of the
// license in the file LICENSE.

#include "io/hexfloat.h"
#include "io/ensure_hexfloat.h"
#include <sstream>
#include "Exception.h"
#include "log/Logger.h"

namespace hemelb::io {

namespace {
constexpr char _tolower(char c) {
if (c >= 'A' && c <= 'Z')
return c + 32;
return c;
}
constexpr bool _is_dec_digit(char c) {
return c >= '0' && c <= '9';
}
constexpr bool _is_hex_digit(char c) {
return _is_dec_digit(c) || (c >= 'a' && c <= 'f');
}
}

// We have a horrible state machine to determine which
// characters are allowed, as have decimal and hexadecimal
// floats to handle.
//
// Consider letters as lowercase.
//
// Dec = decimal, Hex = hexadecimal
// Man == mantissa, exp = exponent
// Pre and post refer to the decimal point
//
// Any condition not handled is end of number
//
// Start: +/- => ManSign, 0 => UnknownMan, 1-9 => DecManPre
//
// ManSign: 0 => UnknownMan, 1-9 => DecManPre
//
// UnknownMan: x => HexManPre, 0-9 => DecManPre, . => DecDP
//
// DecManPre: 0-9 => DecManPre, . => DecDP, e => DecExpStart
//
// DecDP: 0-9 => DecManPost, e => DecExpStart
// DecManPost: 0-9 => DecManPost, e => DecExpStart
// NOTE: since these have the same transitions, we can merge
//
// DecExpStart: +- => DecExp, 0-9 => DecExp
//
// DecExp: 0-9 => DecExp
//
// HexManPre: 0-f => HexManPre, . => HexDP, p => DecExpStart
//
// HexDP: 0-f => HexManPost, p => DecExpStart
//
// HexManPost: 0-f => HexManPost, p => DecExpStart
//
// NOTE: yes, hex floats have decimal exponents!

bool FloatCharAccumulator::add_char(char c) {
char lc = _tolower(c);
using enum State;

switch (state) {

case Start:
if (c == '+' || c == '-') {
state = ManSign;
} else if (c == '0') {
state = UnknownMan;
} else if (c > '0' && c <= '9') {
state = DecManPre;
} else {
state = Error;
}
break;

case ManSign:
if (c == '0') {
state = UnknownMan;
} else if (c > '0' && c <= '9') {
state = DecManPre;
} else {
state = Error;
}
break;

case UnknownMan:
if (lc == 'x') {
state = HexManPre;
} else if (_is_dec_digit(c)) {
state = DecManPre;
} else if (c == '.') {
state = DecManPost; // Would be DecDP but same as this.
} else {
state = Error;
}
break;

case DecManPre:
if (_is_dec_digit(c)) {
state = DecManPre;
} else if (c == '.') {
state = DecManPost; // Would be DecDP but same as this.
} else if (lc == 'e') {
state = DecExpStart;
} else {
state = Error;
}
break;

case DecManPost:
if (_is_dec_digit(c)) {
state = DecManPost;
} else if (lc == 'e') {
state = DecExpStart;
} else {
state = Error;
}
break;

case DecExpStart:
if (c == '+' || c == '-' || _is_dec_digit(c)) {
state = DecExp;
} else {
state = Error;
}
break;

case DecExp:
if (_is_dec_digit(c)) {
state = DecExp;
} else {
state = Error;
}
break;

case HexManPre:
if (_is_hex_digit(lc)) {
state = HexManPre;
} else if (c == '.') {
state = HexManPost; // Would be HexDP but same as this.
} else if (lc == 'p') {
state = DecExpStart;
} else {
state = Error;
}
break;

case HexManPost:
if (_is_hex_digit(lc)) {
state = HexManPost;
} else if (lc == 'p') {
state = DecExpStart;
} else {
state = Error;
}
break;

default:
throw (Exception() << "Unknown case in hexfloat character accumulation");
}

if (state == Error) {
return true;
} else {
buf.push_back(c);
return false;
}
}

bool GlobalHexFloatLocale::CurrentCanParseHexFloats() {
// 0.3 can't be exactly represented as FP
double expected = 0.3;
auto rep = "0x1.3333333333333p-2";
std::istringstream s(rep);
double read = 0;
s >> read;
return read == expected;
}

struct GlobalHexFloatLocale::Impl {
std::locale original;
};
GlobalHexFloatLocale::GlobalHexFloatLocale() : impl(std::make_unique<Impl>()) {
if (!CurrentCanParseHexFloats()) {
log::Logger::Log<log::Info, log::Singleton>(
"Default locale does not parse hexadecimal floats, installing custom global facets");
auto hexloc = std::locale(
std::locale(impl->original, new reliable_hexfloat_numeric_facet<char>),
new reliable_hexfloat_numeric_facet<wchar_t>
);
std::locale::global(hexloc);
}
}

GlobalHexFloatLocale::~GlobalHexFloatLocale() {
std::locale::global(impl->original);
}
}
Loading

0 comments on commit cc7713a

Please sign in to comment.