[mathicgb] 305/393: Added a new Scanner class that makes reading formatted input SOOO much nicer.
Doug Torrance
dtorrance-guest at moszumanska.debian.org
Fri Apr 3 15:59:26 UTC 2015
This is an automated email from the git hooks/post-receive script.
dtorrance-guest pushed a commit to branch upstream
in repository mathicgb.
commit 61282bff2f25733847ed3bcfb2faf02ac54afb09
Author: Bjarke Hammersholt Roune <bjarkehr.code at gmail.com>
Date: Wed May 1 16:10:53 2013 +0200
Added a new Scanner class that makes reading formatted input SOOO much nicer.
---
src/mathicgb/Scanner.cpp | 147 +++++++++++++++++++++++++++++++
src/mathicgb/Scanner.hpp | 225 +++++++++++++++++++++++++++++++++++++++++++++++
src/mathicgb/Unchar.hpp | 39 ++++++++
src/test/Scanner.cpp | 97 ++++++++++++++++++++
4 files changed, 508 insertions(+)
diff --git a/src/mathicgb/Scanner.cpp b/src/mathicgb/Scanner.cpp
new file mode 100755
index 0000000..6fc72a9
--- /dev/null
+++ b/src/mathicgb/Scanner.cpp
@@ -0,0 +1,147 @@
+#include "stdinc.h"
+#include "Scanner.hpp"
+
+#include <mathic.h>
+#include <limits>
+#include <sstream>
+#include <cstring>
+
+void reportSyntaxError(std::string s) {
+ mathic::reportError(s);
+}
+
+void Scanner::reportError(std::string msg) const {
+ reportSyntaxError(msg);
+}
+
+static const size_t BufferSize = 10 * 1024;
+
+Scanner::Scanner(FILE* input):
+ mFile(input),
+ mStream(0),
+ mLineCount(1),
+ mChar(' '),
+ mBuffer(BufferSize),
+ mBufferPos(mBuffer.end())
+{
+ get();
+}
+
+Scanner::Scanner(std::istream& input):
+ mFile(0),
+ mStream(&input),
+ mLineCount(1),
+ mChar(' '),
+ mBuffer(BufferSize),
+ mBufferPos(mBuffer.end())
+{
+ get();
+}
+
+Scanner::Scanner(const char* const input):
+ mFile(0),
+ mStream(0),
+ mLineCount(1),
+ mChar(' '),
+ mBuffer(input, input + std::strlen(input)),
+ mBufferPos(mBuffer.end())
+{
+ get();
+}
+
+void Scanner::expect(const char* str) {
+ MATHICGB_ASSERT(str != 0);
+
+ eatWhite();
+
+ const char* it = str;
+ while (*it != '\0') {
+ int character = get();
+ if (*it == character) {
+ ++it;
+ continue;
+ }
+
+ // Read the rest of what is there to improve error message.
+ // TODO: read at least one char in total even if not alnum.
+ std::ostringstream got;
+ if (character == EOF && it == str)
+ got << "no more input";
+ else {
+ got << '\"' << std::string(str, it);
+ if (isalnum(character))
+ got << static_cast<char>(character);
+ while (isalnum(peek()))
+ got << static_cast<char>(get());
+ got << '\"';
+ }
+
+ reportErrorUnexpectedToken(str, got.str());
+ }
+}
+
+void Scanner::expectEOF() {
+ eatWhite();
+ if (get() != EOF)
+ reportErrorUnexpectedToken("no more input", "");
+}
+
+void Scanner::errorExpectTwo(char a, char b, int got) {
+ MATHICGB_ASSERT(a != got && b != got);
+ std::ostringstream err;
+ err << '\'' << a << "' or '" << b << '\'';
+ reportErrorUnexpectedToken(err.str(), got);
+}
+
+void Scanner::errorExpectOne(char expected, int got) {
+ MATHICGB_ASSERT(expected != got);
+ std::ostringstream err;
+ err << '\'' << expected << '\'';
+ reportErrorUnexpectedToken(err.str(), got);
+}
+
+void Scanner::reportErrorUnexpectedToken(const std::string& expected, int got) {
+ std::ostringstream gotDescription;
+ if (got == EOF)
+ gotDescription << "no more input";
+ else
+ gotDescription << '\'' << static_cast<char>(got) << '\'';
+ reportErrorUnexpectedToken(expected, gotDescription.str());
+}
+
+void Scanner::reportErrorUnexpectedToken(
+ const std::string& expected,
+ const std::string& got
+) {
+ std::ostringstream errorMsg;
+ errorMsg << "Expected " << expected;
+ if (got != "")
+ errorMsg << ", but got " << got;
+ errorMsg << '.';
+ reportSyntaxError(errorMsg.str());
+}
+
+int Scanner::readBuffer() {
+ size_t read;
+ if (mFile != 0) {
+ if (mBuffer.size() < mBuffer.capacity() && (feof(mFile) || ferror(mFile)))
+ return EOF;
+ mBuffer.resize(mBuffer.capacity());
+ read = fread(&mBuffer[0], 1, mBuffer.capacity(), mFile);
+ } else if (mStream != 0) {
+ MATHICGB_ASSERT(mStream != 0);
+ if (mBuffer.size() < mBuffer.capacity() && !mStream->good())
+ return EOF;
+ mBuffer.resize(mBuffer.capacity());
+ mStream->read(reinterpret_cast<char*>(mBuffer.data()), mBuffer.size());
+ read = mStream->gcount();
+ } else
+ return EOF;
+ mBuffer.resize(read);
+ mBufferPos = mBuffer.begin();
+ if (read == 0)
+ return EOF;
+ const char c = *mBufferPos;
+ ++mBufferPos;
+ return c;
+}
diff --git a/src/mathicgb/Scanner.hpp b/src/mathicgb/Scanner.hpp
new file mode 100755
index 0000000..505a7c5
--- /dev/null
+++ b/src/mathicgb/Scanner.hpp
@@ -0,0 +1,225 @@
+#ifndef MATHICGB_SCANNER_GUARD
+#define MATHICGB_SCANNER_GUARD
+
+#include "Unchar.hpp"
+#include <string>
+#include <cstdio>
+#include <vector>
+#include <limits>
+#include <sstream>
+#include <istream>
+
+/// This class offers an input interface which is more convenient and
+/// often more efficient than dealing with a FILE* or std::istream
+/// directly. It keeps track of the current line number to report
+/// better error messages. Only one Scanner should be reading from a
+/// given FILE* or std::istream due to buffering and line number counting.
+///
+/// All input methods whose documentation does not specifically say
+/// otherwise skip whitespace.
+///
+/// There are four concepts for consuming input through a Scanner:
+///
+/// Read X: Require an X to be in the input, and return what is read.
+///
+/// Expect X: Require the exact value X to be in the input and skip past it.
+///
+/// Match X: Return true if the exact value X is in the input, and in that case
+/// skip past it. Otherwise return false and do nothing else.
+///
+/// MatchRead X: Return true of an X is in the input. In that case,
+/// read the X into a reference parameter and return true.
+///
+/// Peek X: Return true if X is the next thing int he input. Do not skip
+/// past anything. May or may not skip whitespace depending on what X is.
+///
+/// If a requirement is not met, Scanner reports a syntax error.
+class Scanner {
+public:
+ /// Construct a Scanner object reading from the input FILE*.
+ Scanner(FILE* input);
+
+ /// Construct a Scanner object reading from the input std::istream.
+ Scanner(std::istream& input);
+
+ /// Construct a Scanner object reading from the input string.
+ Scanner(const char* const input);
+
+ /// Reads a single character from the stream.
+ int get();
+
+ /// Return true if the next character is c, and in that case skip
+ /// past it.
+ bool match(char c);
+
+ /// Return true if no more input.
+ bool matchEOF();
+
+ /// Require the next character to be equal to expected. This
+ /// character is skipped past.
+ void expect(char expected);
+
+ /// Require the next character to be equal to a or b. This character
+ /// is skipped past.
+ void expect(char a, char b);
+
+ /// Require the following characters to be equal to str. These
+ /// characters are skipped past.
+ void expect(const char* str);
+
+ /// Require the following characters to be equal to str. These
+ /// characters are skipped past.
+ void expect(const std::string& str) {expect(str.c_str());}
+
+ /// Require that there is no more input.
+ void expectEOF();
+
+ /// Reads a T. T must be an integer type with a std::numeric_limits
+ /// specialization. Negative numbers are allows if T is signed.
+ template<class T>
+ T readInteger(bool negate = false);
+
+ /// Reads a T if it is there. Does not recognize + or - as the start
+ /// of an integer.
+ template<class T>
+ bool matchReadIntegerNoSign(T& t, bool negate = false);
+
+ /// Returns the next character or EOF. Does not skip whitespace.
+ int peek() {return mChar;}
+
+ /// Returns true if the next character is a digit. Does not skip
+ /// whitespace.
+ bool peekDigit() {return std::isdigit(peek());}
+
+ /// Returns true if the next character is whitespace. Does not skip
+ /// whitespace. Whitespace is defined by std::isspace().
+ bool peekWhite() {return isspace(peek());}
+
+ /// Returns the number of newlines seen plus one. Does not skip
+ /// whitespace.
+ uint64 lineCount() const {return mLineCount;}
+
+ /// Reads past any whitespace.
+ inline void eatWhite();
+
+ void reportError(std::string msg) const;
+
+private:
+ void errorExpectTwo(char a, char b, int got);
+ void errorExpectOne(char expected, int got);
+
+ void reportErrorUnexpectedToken(const std::string& expected, int got);
+ void reportErrorUnexpectedToken
+ (const std::string& expected, const std::string& got);
+
+ int readBuffer();
+
+ FILE* mFile;
+ std::istream* mStream;
+ uint64 mLineCount;
+ int mChar; // next character on stream
+
+ std::vector<char> mBuffer;
+ std::vector<char>::iterator mBufferPos;
+};
+
+inline bool Scanner::matchEOF() {
+ eatWhite();
+ return peek() == EOF;
+}
+
+inline bool Scanner::match(char c) {
+ eatWhite();
+ if (c == peek()) {
+ get();
+ return true;
+ } else
+ return false;
+}
+
+inline void Scanner::expect(char a, char b) {
+ eatWhite();
+ int got = get();
+ if (got != a && got != b)
+ errorExpectTwo(a, b, got);
+}
+
+inline void Scanner::expect(char expected) {
+ eatWhite();
+ int got = get();
+ if (got != expected)
+ errorExpectOne(expected, got);
+}
+
+inline void Scanner::eatWhite() {
+ while (peekWhite())
+ get();
+}
+
+inline int Scanner::get() {
+ if (mChar == '\n')
+ ++mLineCount;
+ int oldChar = mChar;
+ if (mBufferPos == mBuffer.end())
+ mChar = readBuffer();
+ else {
+ mChar = *mBufferPos;
+ ++mBufferPos;
+ }
+ std::cout << "read '" << char(oldChar) << "' (" << oldChar << ")\n";
+ return oldChar;
+}
+
+template<class T>
+T Scanner::readInteger(const bool negate) {
+ std::cout << "***" << std::endl;
+ static_assert(std::numeric_limits<T>::is_integer, "");
+
+ eatWhite();
+ const bool minus = !match('+') && match('-');
+ const bool positive = minus == negate;
+ if (!peekDigit())
+ reportErrorUnexpectedToken("an integer", "");
+ // Skip leading zeroes and return if the number is zero.
+ if (peek() == '0') {
+ while (peek() == '0')
+ get();
+ if (!peekDigit())
+ return static_cast<T>(0);
+ }
+
+ MATHICGB_ASSERT(peekDigit());
+ MATHICGB_ASSERT(peek() != 0);
+
+ // Checking this here allows us to recognize -0 as non-negative.
+ if (!positive && !std::numeric_limits<T>::is_signed)
+ reportErrorUnexpectedToken("a positive integer", "");
+
+ const auto min = std::numeric_limits<T>::min();
+ const auto max = std::numeric_limits<T>::max();
+ auto t = static_cast<T>(0);
+ while (peekDigit()) {
+ const auto c = static_cast<char>(get());
+ const auto d = positive ? c - '0' : -(c - '0');
+ if (positive ? t > (max - d) / 10 : t < (min - d) / 10) {
+ std::ostringstream err;
+ err << "an integer in the range [" << unchar(min)
+ << ", " << unchar(max) << ']';
+ reportErrorUnexpectedToken(err.str(), "");
+ }
+ t = t * 10 + d;
+ }
+ MATHICGB_ASSERT(t != static_cast<T>(0)); // We already handled zero above.
+ return t;
+}
+
+template<class T>
+bool Scanner::matchReadIntegerNoSign(T& t, bool negate) {
+ if (peekDigit()) {
+ t = readInteger<T>(negate);
+ return true;
+ } else
+ return false;
+}
+
+#endif
diff --git a/src/mathicgb/Unchar.hpp b/src/mathicgb/Unchar.hpp
new file mode 100755
index 0000000..f93e0fc
--- /dev/null
+++ b/src/mathicgb/Unchar.hpp
@@ -0,0 +1,39 @@
+#ifndef MATHICGB_UNCHAR_GUARD
+#define MATHICGB_UNCHAR_GUARD
+
+#include <type_traits>
+
+/// std::ostream and std::istream handle characters differently from
+/// other integers. That is not desired when using char as an
+/// integer. Use Unchar and unchar() to cast chars to a different type
+/// that get handled as other integers do.
+template<class T>
+struct Unchar {typedef T type;};
+
+// Strange but true: char, signed char and unsigned char are 3
+// distinct types. Also, the signedness of char is unspecified. This
+// is in contrast to all the other built-in types. For example, int
+// and signed int are always the exact same type.
+
+namespace UncharInternal {
+ // Two cases depending on whether char is signed or not.
+ template<bool Signed = std::is_signed<char>::value>
+ struct ExtendedChar {typedef signed short type;};
+ template<>
+ struct ExtendedChar<false> {typedef unsigned short type;};
+
+};
+
+template<>
+struct Unchar<char> {
+ typedef typename UncharInternal::ExtendedChar<>::type type;
+};
+template<>
+struct Unchar<signed char> {typedef short type;};
+template<>
+struct Unchar<unsigned char> {typedef unsigned short type;};
+
+template<class T>
+typename Unchar<T>::type unchar(const T& t) {return t;}
+
+#endif
diff --git a/src/test/Scanner.cpp b/src/test/Scanner.cpp
new file mode 100755
index 0000000..428dcba
--- /dev/null
+++ b/src/test/Scanner.cpp
@@ -0,0 +1,97 @@
+#include "mathicgb/stdinc.h"
+#include "mathicgb/Scanner.hpp"
+
+#include <gtest/gtest.h>
+#include <iterator>
+
+namespace {
+ const char* const alpha = "abcdefghijkl";
+ const char* const alphaSpaced = "a bc def ghij kl";
+ const char* const alphas[] = {"a", "bc", "def", "ghij", "kl"};
+}
+
+TEST(Scanner, NoOp) {
+ std::istringstream in;
+ Scanner sc(in);
+}
+
+TEST(Scanner, PeekAndGet) {
+ std::stringstream s(alphaSpaced);
+ Scanner in(s);
+ for (size_t i = 0; alpha[i] != '\0'; ++i) {
+ ASSERT_EQ(alphaSpaced[i], in.peek());
+ ASSERT_EQ(alphaSpaced[i], in.get());
+ }
+}
+
+TEST(Scanner, Match) {
+ std::stringstream s(alphaSpaced);
+ Scanner in(s);
+ for (size_t i = 0; alpha[i] != '\0'; ++i) {
+ ASSERT_FALSE(in.match('!'));
+ ASSERT_FALSE(in.matchEOF());
+ ASSERT_TRUE(in.match(alpha[i]));
+ }
+ ASSERT_TRUE(in.matchEOF());
+}
+
+TEST(Scanner, ExpectChar) {
+ std::stringstream s(alphaSpaced);
+ Scanner in(s);
+ for (size_t i = 0; alpha[i] != '\0'; ++i)
+ in.expect(alpha[i]);
+ in.expectEOF();
+}
+
+TEST(Scanner, ExpectTwoChars) {
+ std::stringstream s(alphaSpaced);
+ Scanner in(s);
+ for (size_t i = 0; alpha[i] != '\0'; ++i) {
+ if (i % 2 == 0)
+ in.expect('!', alpha[i]);
+ else
+ in.expect(alpha[i], '!');
+ }
+ in.expectEOF();
+}
+
+TEST(Scanner, ExpectString) {
+ std::stringstream s(alphaSpaced);
+ Scanner in(s);
+ const auto size = sizeof(alphas) / sizeof(*alphas);
+ for (size_t i = 0; i < size; ++i) {
+ if (i % 2 == 0)
+ in.expect(alphas[i]);
+ else
+ in.expect(std::string(alphas[i]));
+ }
+}
+
+TEST(Scanner, readInteger) {
+ std::stringstream s("0 1 +0 -0 +1 -1 127 -128 128");
+ Scanner in(s);
+ ASSERT_EQ(0, in.readInteger<signed char>());
+ ASSERT_EQ(1, in.readInteger<char>());
+ ASSERT_EQ(0, in.readInteger<unsigned char>());
+ ASSERT_EQ(0, in.readInteger<char>());
+ ASSERT_EQ(1, in.readInteger<char>());
+ ASSERT_EQ(-1, in.readInteger<char>());
+ ASSERT_EQ(127, in.readInteger<char>());
+ ASSERT_EQ(-128, in.readInteger<char>());
+ ASSERT_EQ(-128, in.readInteger<char>(true));
+}
+
+TEST(Scanner, WhiteAndLineCount) {
+ std::stringstream s(" \t\n\rx\n\n\ny");
+ Scanner in(s);
+ ASSERT_EQ(1, in.lineCount());
+ ASSERT_TRUE(in.peek() == ' ');
+ ASSERT_TRUE(in.peekWhite());
+ in.eatWhite();
+ ASSERT_TRUE(in.peek() == 'x');
+ ASSERT_TRUE(in.match('x'));
+ ASSERT_EQ(2, in.lineCount());
+ ASSERT_TRUE(in.match('y'));
+ in.expectEOF();
+ ASSERT_EQ(5, in.lineCount());
+}
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/mathicgb.git
More information about the debian-science-commits
mailing list