blob: 3f3460b68498e823e3af9aeb15aeef111105ae8b [file] [log] [blame]
// Copyright (c) 2013-2014 Sandstorm Development Group, Inc. and contributors
// Licensed under the MIT License:
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
#include "lexer.h"
#include "../message.h"
#include <kj/compat/gtest.h>
namespace capnp {
namespace compiler {
namespace {
class TestFailingErrorReporter: public ErrorReporter {
public:
void addError(uint32_t startByte, uint32_t endByte, kj::StringPtr message) override {
KJ_FAIL_EXPECT("Parse failed.", startByte, endByte, message);
}
bool hadErrors() override {
// Not used by lexer.
return false;
}
};
template <typename LexResult>
kj::String doLex(kj::StringPtr constText) {
// Parse the given string into the given Cap'n Proto struct type using lex(), then stringify the
// result and return that string. Additionally, single quotes in the input are converted to
// double quotes, and double quotes in the output are converted to single quotes, to reduce the
// amount of escaping needed in the test strings.
//
// Comparing stringifications against golden strings is ugly and brittle. If we had a
// text-format parser we could use that. Except that said parser would probably be built on
// the very lexer being tested here, so... maybe this is the best we can reasonably do.
kj::String text = heapString(constText);
for (char& c: text) {
// Make it easier to write input strings below.
if (c == '\'') c = '\"';
}
MallocMessageBuilder message;
auto file = message.initRoot<LexResult>();
TestFailingErrorReporter errorReporter;
EXPECT_TRUE(lex(text, file, errorReporter));
kj::String result = kj::str(file);
for (char& c: result) {
// Make it easier to write golden strings below.
if (c == '\"') c = '\'';
}
return result;
}
TEST(Lexer, Tokens) {
EXPECT_STREQ(
"(tokens = ["
"(identifier = 'foo', startByte = 0, endByte = 3), "
"(identifier = 'bar', startByte = 4, endByte = 7)"
"])",
doLex<LexedTokens>("foo bar").cStr());
EXPECT_STREQ(
"(tokens = ["
"(identifier = 'foo', startByte = 0, endByte = 3), "
"(identifier = 'bar', startByte = 15, endByte = 18)"
"])",
doLex<LexedTokens>("foo # comment\n bar").cStr());
EXPECT_STREQ(
"(tokens = ["
"(stringLiteral = 'foo ', startByte = 2, endByte = 11), "
"(integerLiteral = 123, startByte = 12, endByte = 15), "
"(floatLiteral = 2.75, startByte = 16, endByte = 20), "
"(floatLiteral = 60000, startByte = 21, endByte = 24), "
"(operator = '+', startByte = 25, endByte = 26), "
"(operator = '-=', startByte = 27, endByte = 29)"
"])",
doLex<LexedTokens>(" 'foo\\x20' 123 2.75 6e4 + -= ").cStr());
EXPECT_STREQ(
"(tokens = ["
"(parenthesizedList = ["
"["
"(identifier = 'foo', startByte = 1, endByte = 4), "
"(identifier = 'bar', startByte = 5, endByte = 8)"
"], ["
"(identifier = 'baz', startByte = 10, endByte = 13), "
"(identifier = 'qux', startByte = 14, endByte = 17)"
"], ["
"(identifier = 'corge', startByte = 19, endByte = 24), "
"(identifier = 'grault', startByte = 25, endByte = 31)"
"]"
"], startByte = 0, endByte = 32)"
"])",
doLex<LexedTokens>("(foo bar, baz qux, corge grault)").cStr());
EXPECT_STREQ(
"(tokens = ["
"(parenthesizedList = ["
"["
"(identifier = 'foo', startByte = 1, endByte = 4), "
"(identifier = 'bar', startByte = 5, endByte = 8)"
"]"
"], startByte = 0, endByte = 9)"
"])",
doLex<LexedTokens>("(foo bar)").cStr());
// Empty parentheses should result in an empty list-of-lists, *not* a list containing an empty
// list.
EXPECT_STREQ(
"(tokens = ["
"(parenthesizedList = [], startByte = 0, endByte = 4)"
"])",
doLex<LexedTokens>("( )").cStr());
EXPECT_STREQ(
"(tokens = ["
"(bracketedList = ["
"["
"(identifier = 'foo', startByte = 1, endByte = 4), "
"(identifier = 'bar', startByte = 5, endByte = 8)"
"], ["
"(identifier = 'baz', startByte = 10, endByte = 13), "
"(identifier = 'qux', startByte = 14, endByte = 17)"
"], ["
"(identifier = 'corge', startByte = 19, endByte = 24), "
"(identifier = 'grault', startByte = 25, endByte = 31)"
"]"
"], startByte = 0, endByte = 32)"
"])",
doLex<LexedTokens>("[foo bar, baz qux, corge grault]").cStr());
// Trailing commas should not create an empty final list item, but be stripped by the lexer.
EXPECT_STREQ(
"(tokens = ["
"(bracketedList = ["
"["
"(identifier = 'foo', startByte = 1, endByte = 4)"
"], ["
"(identifier = 'bar', startByte = 6, endByte = 9)"
"]"
"], startByte = 0, endByte = 11)"
"])",
doLex<LexedTokens>("[foo, bar,]").cStr());
EXPECT_STREQ(
"(tokens = ["
"(bracketedList = ["
"["
"(identifier = 'foo', startByte = 1, endByte = 4)"
"], ["
"(parenthesizedList = ["
"["
"(identifier = 'bar', startByte = 7, endByte = 10)"
"], ["
"(identifier = 'baz', startByte = 12, endByte = 15)"
"]"
"], startByte = 6, endByte = 16)"
"]"
"], startByte = 0, endByte = 17), "
"(identifier = 'qux', startByte = 18, endByte = 21)"
"])",
doLex<LexedTokens>("[foo, (bar, baz)] qux").cStr());
EXPECT_STREQ(
"(tokens = ["
"(identifier = 'foo', startByte = 0, endByte = 3), "
"(identifier = 'bar', startByte = 7, endByte = 10)"
"])",
doLex<LexedTokens>("foo\n\r\t\vbar").cStr());
}
TEST(Lexer, Statements) {
EXPECT_STREQ(
"(statements = ["
"(tokens = ["
"(identifier = 'foo', startByte = 0, endByte = 3), "
"(identifier = 'bar', startByte = 4, endByte = 7)"
"], line = void, startByte = 0, endByte = 8)"
"])",
doLex<LexedStatements>("foo bar;").cStr());
EXPECT_STREQ(
"(statements = ["
"(tokens = ["
"(identifier = 'foo', startByte = 0, endByte = 3)"
"], line = void, startByte = 0, endByte = 4), "
"(tokens = ["
"(identifier = 'bar', startByte = 5, endByte = 8)"
"], line = void, startByte = 5, endByte = 9), "
"(tokens = ["
"(identifier = 'baz', startByte = 10, endByte = 13)"
"], line = void, startByte = 10, endByte = 14)"
"])",
doLex<LexedStatements>("foo; bar; baz; ").cStr());
EXPECT_STREQ(
"(statements = ["
"("
"tokens = ["
"(identifier = 'foo', startByte = 0, endByte = 3)"
"], "
"block = ["
"(tokens = ["
"(identifier = 'bar', startByte = 5, endByte = 8)"
"], line = void, startByte = 5, endByte = 9), "
"(tokens = ["
"(identifier = 'baz', startByte = 10, endByte = 13)"
"], line = void, startByte = 10, endByte = 14)"
"], "
"startByte = 0, endByte = 15"
"), "
"(tokens = ["
"(identifier = 'qux', startByte = 16, endByte = 19)"
"], line = void, startByte = 16, endByte = 20)"
"])",
doLex<LexedStatements>("foo {bar; baz;} qux;").cStr());
}
TEST(Lexer, DocComments) {
EXPECT_STREQ(
"(statements = ["
"("
"tokens = ["
"(identifier = 'foo', startByte = 0, endByte = 3)"
"], "
"line = void, "
"docComment = 'blah blah\\n', "
"startByte = 0, endByte = 16"
")"
"])",
doLex<LexedStatements>("foo; # blah blah").cStr());
EXPECT_STREQ(
"(statements = ["
"("
"tokens = ["
"(identifier = 'foo', startByte = 0, endByte = 3)"
"], "
"line = void, "
"docComment = 'blah blah\\n', "
"startByte = 0, endByte = 15"
")"
"])",
doLex<LexedStatements>("foo; #blah blah").cStr());
EXPECT_STREQ(
"(statements = ["
"("
"tokens = ["
"(identifier = 'foo', startByte = 0, endByte = 3)"
"], "
"line = void, "
"docComment = ' blah blah\\n', "
"startByte = 0, endByte = 17"
")"
"])",
doLex<LexedStatements>("foo; # blah blah").cStr());
EXPECT_STREQ(
"(statements = ["
"("
"tokens = ["
"(identifier = 'foo', startByte = 0, endByte = 3)"
"], "
"line = void, "
"docComment = 'blah blah\\n', "
"startByte = 0, endByte = 16"
")"
"])",
doLex<LexedStatements>("foo;\n# blah blah").cStr());
EXPECT_STREQ(
"(statements = ["
"("
"tokens = ["
"(identifier = 'foo', startByte = 0, endByte = 3)"
"], "
"line = void, "
"startByte = 0, endByte = 4"
")"
"])",
doLex<LexedStatements>("foo;\n\n# blah blah").cStr());
EXPECT_STREQ(
"(statements = ["
"("
"tokens = ["
"(identifier = 'foo', startByte = 0, endByte = 3)"
"], "
"line = void, "
"docComment = 'bar baz\\nqux corge\\n', "
"startByte = 0, endByte = 30"
")"
"])",
doLex<LexedStatements>("foo;\n # bar baz\n # qux corge\n\n# grault\n# garply").cStr());
EXPECT_STREQ(
"(statements = ["
"("
"tokens = ["
"(identifier = 'foo', startByte = 0, endByte = 3)"
"], "
"block = ["
"(tokens = ["
"(identifier = 'bar', startByte = 17, endByte = 20)"
"], line = void, docComment = 'hi\\n', startByte = 17, endByte = 27), "
"(tokens = ["
"(identifier = 'baz', startByte = 28, endByte = 31)"
"], line = void, startByte = 28, endByte = 32)"
"], "
"docComment = 'blah blah\\n', "
"startByte = 0, endByte = 44"
"), "
"(tokens = ["
"(identifier = 'qux', startByte = 44, endByte = 47)"
"], line = void, startByte = 44, endByte = 48)"
"])",
doLex<LexedStatements>("foo {# blah blah\nbar; # hi\n baz;} # ignored\nqux;").cStr());
EXPECT_STREQ(
"(statements = ["
"("
"tokens = ["
"(identifier = 'foo', startByte = 0, endByte = 3)"
"], "
"block = ["
"(tokens = ["
"(identifier = 'bar', startByte = 5, endByte = 8)"
"], line = void, startByte = 5, endByte = 9), "
"(tokens = ["
"(identifier = 'baz', startByte = 10, endByte = 13)"
"], line = void, startByte = 10, endByte = 14)"
"], "
"docComment = 'late comment\\n', "
"startByte = 0, endByte = 31"
"), "
"(tokens = ["
"(identifier = 'qux', startByte = 31, endByte = 34)"
"], line = void, startByte = 31, endByte = 35)"
"])",
doLex<LexedStatements>("foo {bar; baz;}\n# late comment\nqux;").cStr());
}
TEST(Lexer, Utf8Bom) {
EXPECT_STREQ(
"(tokens = ["
"(identifier = 'foo', startByte = 3, endByte = 6), "
"(identifier = 'bar', startByte = 7, endByte = 10), "
"(identifier = 'baz', startByte = 13, endByte = 16)"
"])",
doLex<LexedTokens>("\xef\xbb\xbf""foo bar\xef\xbb\xbf""baz").cStr());
}
} // namespace
} // namespace compiler
} // namespace capnp