[mathicgb] 305/393: Added a new Scanner class that makes reading formatted input SOOO much nicer.

Doug Torrance dtorrance-guest at moszumanska.debian.org
Fri Apr 3 15:59:26 UTC 2015


This is an automated email from the git hooks/post-receive script.

dtorrance-guest pushed a commit to branch upstream
in repository mathicgb.

commit 61282bff2f25733847ed3bcfb2faf02ac54afb09
Author: Bjarke Hammersholt Roune <bjarkehr.code at gmail.com>
Date:   Wed May 1 16:10:53 2013 +0200

    Added a new Scanner class that makes reading formatted input SOOO much nicer.
---
 src/mathicgb/Scanner.cpp | 147 +++++++++++++++++++++++++++++++
 src/mathicgb/Scanner.hpp | 225 +++++++++++++++++++++++++++++++++++++++++++++++
 src/mathicgb/Unchar.hpp  |  39 ++++++++
 src/test/Scanner.cpp     |  97 ++++++++++++++++++++
 4 files changed, 508 insertions(+)

diff --git a/src/mathicgb/Scanner.cpp b/src/mathicgb/Scanner.cpp
new file mode 100755
index 0000000..6fc72a9
--- /dev/null
+++ b/src/mathicgb/Scanner.cpp
@@ -0,0 +1,147 @@
+#include "stdinc.h"
+#include "Scanner.hpp"
+
+#include <mathic.h>
+#include <limits>
+#include <sstream>
+#include <cstring>
+
+void reportSyntaxError(std::string s) {
+  mathic::reportError(s);
+}
+
+void Scanner::reportError(std::string msg) const {
+  reportSyntaxError(msg);
+}
+
+static const size_t BufferSize = 10 * 1024;
+
+Scanner::Scanner(FILE* input):
+  mFile(input),
+  mStream(0),
+  mLineCount(1),
+  mChar(' '),
+  mBuffer(BufferSize),
+  mBufferPos(mBuffer.end())
+{
+  get();
+}
+
+Scanner::Scanner(std::istream& input):
+  mFile(0),
+  mStream(&input),
+  mLineCount(1),
+  mChar(' '),
+  mBuffer(BufferSize),
+  mBufferPos(mBuffer.end())
+{
+  get();
+}
+
+Scanner::Scanner(const char* const input):
+  mFile(0),
+  mStream(0),
+  mLineCount(1),
+  mChar(' '),
+  mBuffer(input, input + std::strlen(input)),
+  mBufferPos(mBuffer.end())
+{
+  get();
+}
+
+void Scanner::expect(const char* str) {
+  MATHICGB_ASSERT(str != 0);
+
+  eatWhite();
+
+  const char* it = str;
+  while (*it != '\0') {
+    int character = get();
+    if (*it == character) {
+      ++it;
+      continue;
+    }
+
+    // Read the rest of what is there to improve error message.
+    // TODO: read at least one char in total even if not alnum.
+    std::ostringstream got;
+    if (character == EOF && it == str)
+      got << "no more input";
+    else {
+      got << '\"' << std::string(str, it);
+      if (isalnum(character))
+        got << static_cast<char>(character);
+      while (isalnum(peek()))
+        got << static_cast<char>(get());
+      got << '\"';
+    }
+
+    reportErrorUnexpectedToken(str, got.str());
+  }
+}
+
+void Scanner::expectEOF() {
+  eatWhite();
+  if (get() != EOF)
+    reportErrorUnexpectedToken("no more input", "");
+}
+
+void Scanner::errorExpectTwo(char a, char b, int got) {
+  MATHICGB_ASSERT(a != got && b != got);
+  std::ostringstream err;
+  err << '\'' << a << "' or '" << b << '\'';
+  reportErrorUnexpectedToken(err.str(), got);
+}
+
+void Scanner::errorExpectOne(char expected, int got) {
+  MATHICGB_ASSERT(expected != got);
+  std::ostringstream err;
+  err << '\'' << expected << '\'';
+  reportErrorUnexpectedToken(err.str(), got);
+}
+
+void Scanner::reportErrorUnexpectedToken(const std::string& expected, int got) {
+  std::ostringstream gotDescription;
+  if (got == EOF)
+    gotDescription << "no more input";
+  else
+    gotDescription << '\'' << static_cast<char>(got) << '\'';
+  reportErrorUnexpectedToken(expected, gotDescription.str());
+}
+
+void Scanner::reportErrorUnexpectedToken(
+  const std::string& expected,
+  const std::string& got
+) {
+  std::ostringstream errorMsg;
+  errorMsg << "Expected " << expected;
+  if (got != "")
+    errorMsg << ", but got " << got;
+  errorMsg << '.';
+  reportSyntaxError(errorMsg.str());
+}
+
+int Scanner::readBuffer() {
+  size_t read;
+  if (mFile != 0) {
+    if (mBuffer.size() < mBuffer.capacity() && (feof(mFile) || ferror(mFile)))
+      return EOF;
+    mBuffer.resize(mBuffer.capacity());
+    read = fread(&mBuffer[0], 1, mBuffer.capacity(), mFile);
+  } else if (mStream != 0) {
+    MATHICGB_ASSERT(mStream != 0);
+    if (mBuffer.size() < mBuffer.capacity() && !mStream->good())
+      return EOF;
+    mBuffer.resize(mBuffer.capacity());
+    mStream->read(reinterpret_cast<char*>(mBuffer.data()), mBuffer.size());
+    read = mStream->gcount();
+  } else
+    return EOF;
+  mBuffer.resize(read);
+  mBufferPos = mBuffer.begin();
+  if (read == 0)
+    return EOF;
+  const char c = *mBufferPos;
+  ++mBufferPos;
+  return c;
+}
diff --git a/src/mathicgb/Scanner.hpp b/src/mathicgb/Scanner.hpp
new file mode 100755
index 0000000..505a7c5
--- /dev/null
+++ b/src/mathicgb/Scanner.hpp
@@ -0,0 +1,225 @@
+#ifndef MATHICGB_SCANNER_GUARD
+#define MATHICGB_SCANNER_GUARD
+
+#include "Unchar.hpp"
+#include <string>
+#include <cstdio>
+#include <vector>
+#include <limits>
+#include <sstream>
+#include <istream>
+
+/// This class offers an input interface which is more convenient and
+/// often more efficient than dealing with a FILE* or std::istream
+/// directly. It keeps track of the current line number to report
+/// better error messages. Only one Scanner should be reading from a
+/// given FILE* or std::istream due to buffering and line number counting.
+///
+/// All input methods whose documentation does not specifically say
+/// otherwise skip whitespace.
+///
+/// There are four concepts for consuming input through a Scanner:
+///
+/// Read X: Require an X to be in the input, and return what is read.
+///
+/// Expect X: Require the exact value X to be in the input and skip past it.
+///
+/// Match X: Return true if the exact value X is in the input, and in that case
+///   skip past it. Otherwise return false and do nothing else.
+///
+/// MatchRead X: Return true of an X is in the input. In that case,
+///   read the X into a reference parameter and return true.
+///
+/// Peek X: Return true if X is the next thing int he input. Do not skip
+///   past anything. May or may not skip whitespace depending on what X is.
+///
+/// If a requirement is not met, Scanner reports a syntax error.
+class Scanner {
+public:
+  /// Construct a Scanner object reading from the input FILE*.
+  Scanner(FILE* input);
+
+  /// Construct a Scanner object reading from the input std::istream.
+  Scanner(std::istream& input);
+
+  /// Construct a Scanner object reading from the input string.
+  Scanner(const char* const input);
+
+  /// Reads a single character from the stream.
+  int get();
+
+  /// Return true if the next character is c, and in that case skip
+  /// past it.
+  bool match(char c);
+
+  /// Return true if no more input.
+  bool matchEOF();
+
+  /// Require the next character to be equal to expected. This
+  /// character is skipped past.
+  void expect(char expected);
+
+  /// Require the next character to be equal to a or b. This character
+  /// is skipped past.
+  void expect(char a, char b);
+
+  /// Require the following characters to be equal to str. These
+  /// characters are skipped past.
+  void expect(const char* str);
+
+  /// Require the following characters to be equal to str. These
+  /// characters are skipped past.
+  void expect(const std::string& str) {expect(str.c_str());}
+
+  /// Require that there is no more input.
+  void expectEOF();
+
+  /// Reads a T. T must be an integer type with a std::numeric_limits
+  /// specialization. Negative numbers are allows if T is signed.
+  template<class T>
+  T readInteger(bool negate = false);
+
+  /// Reads a T if it is there. Does not recognize + or - as the start
+  /// of an integer.
+  template<class T>
+  bool matchReadIntegerNoSign(T& t, bool negate = false);
+
+  /// Returns the next character or EOF. Does not skip whitespace.
+  int peek() {return mChar;}
+
+  /// Returns true if the next character is a digit. Does not skip
+  /// whitespace.
+  bool peekDigit() {return std::isdigit(peek());}
+
+  /// Returns true if the next character is whitespace. Does not skip
+  /// whitespace. Whitespace is defined by std::isspace().
+  bool peekWhite() {return isspace(peek());}
+
+  /// Returns the number of newlines seen plus one. Does not skip
+  /// whitespace.
+  uint64 lineCount() const {return mLineCount;}
+
+  /// Reads past any whitespace.
+  inline void eatWhite();
+
+  void reportError(std::string msg) const;
+
+private:
+  void errorExpectTwo(char a, char b, int got);
+  void errorExpectOne(char expected, int got);
+
+  void reportErrorUnexpectedToken(const std::string& expected, int got);
+  void reportErrorUnexpectedToken
+    (const std::string& expected, const std::string& got);
+
+  int readBuffer();
+
+  FILE* mFile;
+  std::istream* mStream;
+  uint64 mLineCount;
+  int mChar; // next character on stream
+
+  std::vector<char> mBuffer;
+  std::vector<char>::iterator mBufferPos;
+};
+
+inline bool Scanner::matchEOF() {
+  eatWhite();
+  return peek() == EOF;
+}
+
+inline bool Scanner::match(char c) {
+  eatWhite();
+  if (c == peek()) {
+    get();
+    return true;
+  } else
+    return false;
+}
+
+inline void Scanner::expect(char a, char b) {
+  eatWhite();
+  int got = get();
+  if (got != a && got != b)
+    errorExpectTwo(a, b, got);
+}
+
+inline void Scanner::expect(char expected) {
+  eatWhite();
+  int got = get();
+  if (got != expected)
+    errorExpectOne(expected, got);
+}
+
+inline void Scanner::eatWhite() {
+  while (peekWhite())
+    get();
+}
+
+inline int Scanner::get() {
+  if (mChar == '\n')
+    ++mLineCount;
+  int oldChar = mChar;
+  if (mBufferPos == mBuffer.end())
+    mChar = readBuffer();
+  else {
+    mChar = *mBufferPos;
+    ++mBufferPos;
+  }
+  std::cout << "read '" << char(oldChar) << "' (" << oldChar << ")\n";
+  return oldChar;
+}
+
+template<class T>
+T Scanner::readInteger(const bool negate) {
+  std::cout << "***" << std::endl;
+  static_assert(std::numeric_limits<T>::is_integer, "");
+
+  eatWhite();
+  const bool minus = !match('+') && match('-');
+  const bool positive = minus == negate;
+  if (!peekDigit())
+    reportErrorUnexpectedToken("an integer", "");
+  // Skip leading zeroes and return if the number is zero.
+  if (peek() == '0') {
+    while (peek() == '0')
+      get();
+    if (!peekDigit())
+      return static_cast<T>(0);
+  }
+
+  MATHICGB_ASSERT(peekDigit());
+  MATHICGB_ASSERT(peek() != 0);
+
+  // Checking this here allows us to recognize -0 as non-negative.
+  if (!positive && !std::numeric_limits<T>::is_signed)
+    reportErrorUnexpectedToken("a positive integer", "");
+
+  const auto min = std::numeric_limits<T>::min();
+  const auto max = std::numeric_limits<T>::max();
+  auto t = static_cast<T>(0);
+  while (peekDigit()) {
+    const auto c = static_cast<char>(get());
+    const auto d = positive ? c - '0' : -(c - '0');
+    if (positive ? t > (max - d) / 10 : t < (min - d) / 10) {
+      std::ostringstream err;
+      err << "an integer in the range [" << unchar(min)
+          << ", " << unchar(max) << ']';
+      reportErrorUnexpectedToken(err.str(), "");
+    }
+    t = t * 10 + d;
+  }
+  MATHICGB_ASSERT(t != static_cast<T>(0)); // We already handled zero above.
+  return t;
+}
+
+template<class T>
+bool Scanner::matchReadIntegerNoSign(T& t, bool negate) {
+  if (peekDigit()) {
+    t = readInteger<T>(negate);
+    return true;
+  } else
+    return false;
+}
+
+#endif
diff --git a/src/mathicgb/Unchar.hpp b/src/mathicgb/Unchar.hpp
new file mode 100755
index 0000000..f93e0fc
--- /dev/null
+++ b/src/mathicgb/Unchar.hpp
@@ -0,0 +1,39 @@
+#ifndef MATHICGB_UNCHAR_GUARD
+#define MATHICGB_UNCHAR_GUARD
+
+#include <type_traits>
+
+/// std::ostream and std::istream handle characters differently from
+/// other integers. That is not desired when using char as an
+/// integer. Use Unchar and unchar() to cast chars to a different type
+/// that get handled as other integers do.
+template<class T>
+struct Unchar {typedef T type;};
+
+// Strange but true: char, signed char and unsigned char are 3
+// distinct types. Also, the signedness of char is unspecified. This
+// is in contrast to all the other built-in types. For example, int
+// and signed int are always the exact same type.
+
+namespace UncharInternal {
+  // Two cases depending on whether char is signed or not.
+  template<bool Signed = std::is_signed<char>::value>
+  struct ExtendedChar {typedef signed short type;};
+  template<>
+  struct ExtendedChar<false> {typedef unsigned short type;};
+    
+};
+
+template<>
+struct Unchar<char> {
+  typedef typename UncharInternal::ExtendedChar<>::type type;
+};
+template<>
+struct Unchar<signed char> {typedef short type;};
+template<>
+struct Unchar<unsigned char> {typedef unsigned short type;};
+
+template<class T>
+typename Unchar<T>::type unchar(const T& t) {return t;}
+
+#endif
diff --git a/src/test/Scanner.cpp b/src/test/Scanner.cpp
new file mode 100755
index 0000000..428dcba
--- /dev/null
+++ b/src/test/Scanner.cpp
@@ -0,0 +1,97 @@
+#include "mathicgb/stdinc.h"
+#include "mathicgb/Scanner.hpp"
+
+#include <gtest/gtest.h>
+#include <iterator>
+
+namespace {
+  const char* const alpha = "abcdefghijkl";
+  const char* const alphaSpaced = "a bc def ghij kl";
+  const char* const alphas[] = {"a", "bc", "def", "ghij", "kl"};
+}
+
+TEST(Scanner, NoOp) {
+  std::istringstream in;
+  Scanner sc(in);
+}
+
+TEST(Scanner, PeekAndGet) {
+  std::stringstream s(alphaSpaced);
+  Scanner in(s);
+  for (size_t i = 0; alpha[i] != '\0'; ++i) {
+    ASSERT_EQ(alphaSpaced[i], in.peek());
+    ASSERT_EQ(alphaSpaced[i], in.get());
+  }
+}
+
+TEST(Scanner, Match) {
+  std::stringstream s(alphaSpaced);
+  Scanner in(s);
+  for (size_t i = 0; alpha[i] != '\0'; ++i) {
+    ASSERT_FALSE(in.match('!'));
+    ASSERT_FALSE(in.matchEOF());
+    ASSERT_TRUE(in.match(alpha[i]));
+  }
+  ASSERT_TRUE(in.matchEOF());
+}
+
+TEST(Scanner, ExpectChar) {
+  std::stringstream s(alphaSpaced);
+  Scanner in(s);
+  for (size_t i = 0; alpha[i] != '\0'; ++i)
+    in.expect(alpha[i]);
+  in.expectEOF();
+}
+
+TEST(Scanner, ExpectTwoChars) {
+  std::stringstream s(alphaSpaced);
+  Scanner in(s);
+  for (size_t i = 0; alpha[i] != '\0'; ++i) {
+    if (i % 2 == 0)
+      in.expect('!', alpha[i]);
+    else
+      in.expect(alpha[i], '!');
+  }
+  in.expectEOF();
+}
+
+TEST(Scanner, ExpectString) {
+  std::stringstream s(alphaSpaced);
+  Scanner in(s);
+  const auto size = sizeof(alphas) / sizeof(*alphas);
+  for (size_t i = 0; i < size; ++i) {
+    if (i % 2 == 0)
+      in.expect(alphas[i]);
+    else
+      in.expect(std::string(alphas[i]));
+  }
+}
+
+TEST(Scanner, readInteger) {
+  std::stringstream s("0 1 +0 -0 +1 -1 127 -128 128");
+  Scanner in(s);
+  ASSERT_EQ(0, in.readInteger<signed char>());
+  ASSERT_EQ(1, in.readInteger<char>());
+  ASSERT_EQ(0, in.readInteger<unsigned char>());
+  ASSERT_EQ(0, in.readInteger<char>());
+  ASSERT_EQ(1, in.readInteger<char>());
+  ASSERT_EQ(-1, in.readInteger<char>());
+  ASSERT_EQ(127, in.readInteger<char>());
+  ASSERT_EQ(-128, in.readInteger<char>());
+  ASSERT_EQ(-128, in.readInteger<char>(true));
+}
+
+TEST(Scanner, WhiteAndLineCount) {
+  std::stringstream s(" \t\n\rx\n\n\ny");
+  Scanner in(s);
+  ASSERT_EQ(1, in.lineCount());
+  ASSERT_TRUE(in.peek() == ' ');
+  ASSERT_TRUE(in.peekWhite());
+  in.eatWhite();
+  ASSERT_TRUE(in.peek() == 'x');
+  ASSERT_TRUE(in.match('x'));
+  ASSERT_EQ(2, in.lineCount());
+  ASSERT_TRUE(in.match('y'));
+  in.expectEOF();
+  ASSERT_EQ(5, in.lineCount());
+}

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/mathicgb.git



More information about the debian-science-commits mailing list