blob: 02a1dab572520732618a732bba0ed7ab2cb87dad [file] [log] [blame]
// Copyright (c) 2013-2014 Sandstorm Development Group, Inc. and contributors
// Licensed under the MIT License:
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
#include "lexer.h"
#include <kj/parse/char.h>
#include <kj/debug.h>
namespace capnp {
namespace compiler {
namespace p = kj::parse;
bool lex(kj::ArrayPtr<const char> input, LexedStatements::Builder result,
ErrorReporter& errorReporter) {
Lexer lexer(Orphanage::getForMessageContaining(result), errorReporter);
auto parser = p::sequence(lexer.getParsers().statementSequence, p::endOfInput);
Lexer::ParserInput parserInput(input.begin(), input.end());
kj::Maybe<kj::Array<Orphan<Statement>>> parseOutput = parser(parserInput);
KJ_IF_MAYBE(output, parseOutput) {
auto l = result.initStatements(output->size());
for (uint i = 0; i < output->size(); i++) {
l.adoptWithCaveats(i, kj::mv((*output)[i]));
}
return true;
} else {
uint32_t best = parserInput.getBest();
errorReporter.addError(best, best, kj::str("Parse error."));
return false;
}
}
bool lex(kj::ArrayPtr<const char> input, LexedTokens::Builder result,
ErrorReporter& errorReporter) {
Lexer lexer(Orphanage::getForMessageContaining(result), errorReporter);
auto parser = p::sequence(lexer.getParsers().tokenSequence, p::endOfInput);
Lexer::ParserInput parserInput(input.begin(), input.end());
kj::Maybe<kj::Array<Orphan<Token>>> parseOutput = parser(parserInput);
KJ_IF_MAYBE(output, parseOutput) {
auto l = result.initTokens(output->size());
for (uint i = 0; i < output->size(); i++) {
l.adoptWithCaveats(i, kj::mv((*output)[i]));
}
return true;
} else {
uint32_t best = parserInput.getBest();
errorReporter.addError(best, best, kj::str("Parse error."));
return false;
}
}
namespace {
typedef p::Span<uint32_t> Location;
Token::Builder initTok(Orphan<Token>& t, const Location& loc) {
auto builder = t.get();
builder.setStartByte(loc.begin());
builder.setEndByte(loc.end());
return builder;
}
void buildTokenSequenceList(List<List<Token>>::Builder builder,
kj::Array<kj::Array<Orphan<Token>>>&& items) {
for (uint i = 0; i < items.size(); i++) {
auto& item = items[i];
auto itemBuilder = builder.init(i, item.size());
for (uint j = 0; j < item.size(); j++) {
itemBuilder.adoptWithCaveats(j, kj::mv(item[j]));
}
}
}
void attachDocComment(Statement::Builder statement, kj::Array<kj::String>&& comment) {
size_t size = 0;
for (auto& line: comment) {
size += line.size() + 1; // include newline
}
Text::Builder builder = statement.initDocComment(size);
char* pos = builder.begin();
for (auto& line: comment) {
memcpy(pos, line.begin(), line.size());
pos += line.size();
*pos++ = '\n';
}
KJ_ASSERT(pos == builder.end());
}
constexpr auto discardComment =
sequence(p::exactChar<'#'>(), p::discard(p::many(p::discard(p::anyOfChars("\n").invert()))),
p::oneOf(p::exactChar<'\n'>(), p::endOfInput));
constexpr auto saveComment =
sequence(p::exactChar<'#'>(), p::discard(p::optional(p::exactChar<' '>())),
p::charsToString(p::many(p::anyOfChars("\n").invert())),
p::oneOf(p::exactChar<'\n'>(), p::endOfInput));
constexpr auto utf8Bom =
sequence(p::exactChar<'\xef'>(), p::exactChar<'\xbb'>(), p::exactChar<'\xbf'>());
constexpr auto bomsAndWhitespace =
sequence(p::discardWhitespace,
p::discard(p::many(sequence(utf8Bom, p::discardWhitespace))));
constexpr auto commentsAndWhitespace =
sequence(bomsAndWhitespace,
p::discard(p::many(sequence(discardComment, bomsAndWhitespace))));
constexpr auto discardLineWhitespace =
p::discard(p::many(p::discard(p::whitespaceChar.invert().orAny("\r\n").invert())));
constexpr auto newline = p::oneOf(
p::exactChar<'\n'>(),
sequence(p::exactChar<'\r'>(), p::discard(p::optional(p::exactChar<'\n'>()))));
constexpr auto docComment = p::optional(p::sequence(
discardLineWhitespace,
p::discard(p::optional(newline)),
p::oneOrMore(p::sequence(discardLineWhitespace, saveComment))));
// Parses a set of comment lines preceded by at most one newline and with no intervening blank
// lines.
} // namespace
Lexer::Lexer(Orphanage orphanageParam, ErrorReporter& errorReporter)
: orphanage(orphanageParam) {
// Note that because passing an lvalue to a parser constructor uses it by-referencee, it's safe
// for us to use parsers.tokenSequence even though we haven't yet constructed it.
auto& tokenSequence = parsers.tokenSequence;
auto& commaDelimitedList = arena.copy(p::transform(
p::sequence(tokenSequence, p::many(p::sequence(p::exactChar<','>(), tokenSequence))),
[](kj::Array<Orphan<Token>>&& first, kj::Array<kj::Array<Orphan<Token>>>&& rest)
-> kj::Array<kj::Array<Orphan<Token>>> {
if (first == nullptr && rest == nullptr) {
// Completely empty list.
return nullptr;
} else {
uint restSize = rest.size();
if (restSize > 0 && rest[restSize - 1] == nullptr) {
// Allow for trailing commas by shortening the list by one item if the final token is
// nullptr
restSize--;
}
auto result = kj::heapArrayBuilder<kj::Array<Orphan<Token>>>(1 + restSize); // first+rest
result.add(kj::mv(first));
for (uint i = 0; i < restSize ; i++) {
result.add(kj::mv(rest[i]));
}
return result.finish();
}
}));
auto& token = arena.copy(p::oneOf(
p::transformWithLocation(p::identifier,
[this](Location loc, kj::String name) -> Orphan<Token> {
auto t = orphanage.newOrphan<Token>();
initTok(t, loc).setIdentifier(name);
return t;
}),
p::transformWithLocation(p::doubleQuotedString,
[this](Location loc, kj::String text) -> Orphan<Token> {
auto t = orphanage.newOrphan<Token>();
initTok(t, loc).setStringLiteral(text);
return t;
}),
p::transformWithLocation(p::doubleQuotedHexBinary,
[this](Location loc, kj::Array<byte> data) -> Orphan<Token> {
auto t = orphanage.newOrphan<Token>();
initTok(t, loc).setBinaryLiteral(data);
return t;
}),
p::transformWithLocation(p::integer,
[this](Location loc, uint64_t i) -> Orphan<Token> {
auto t = orphanage.newOrphan<Token>();
initTok(t, loc).setIntegerLiteral(i);
return t;
}),
p::transformWithLocation(p::number,
[this](Location loc, double x) -> Orphan<Token> {
auto t = orphanage.newOrphan<Token>();
initTok(t, loc).setFloatLiteral(x);
return t;
}),
p::transformWithLocation(
p::charsToString(p::oneOrMore(p::anyOfChars("!$%&*+-./:<=>?@^|~"))),
[this](Location loc, kj::String text) -> Orphan<Token> {
auto t = orphanage.newOrphan<Token>();
initTok(t, loc).setOperator(text);
return t;
}),
p::transformWithLocation(
sequence(p::exactChar<'('>(), commaDelimitedList, p::exactChar<')'>()),
[this](Location loc, kj::Array<kj::Array<Orphan<Token>>>&& items) -> Orphan<Token> {
auto t = orphanage.newOrphan<Token>();
buildTokenSequenceList(
initTok(t, loc).initParenthesizedList(items.size()), kj::mv(items));
return t;
}),
p::transformWithLocation(
sequence(p::exactChar<'['>(), commaDelimitedList, p::exactChar<']'>()),
[this](Location loc, kj::Array<kj::Array<Orphan<Token>>>&& items) -> Orphan<Token> {
auto t = orphanage.newOrphan<Token>();
buildTokenSequenceList(
initTok(t, loc).initBracketedList(items.size()), kj::mv(items));
return t;
}),
p::transformOrReject(p::transformWithLocation(
p::oneOf(sequence(p::exactChar<'\xff'>(), p::exactChar<'\xfe'>()),
sequence(p::exactChar<'\xfe'>(), p::exactChar<'\xff'>()),
sequence(p::exactChar<'\x00'>())),
[&errorReporter](Location loc) -> kj::Maybe<Orphan<Token>> {
errorReporter.addError(loc.begin(), loc.end(),
"Non-UTF-8 input detected. Cap'n Proto schema files must be UTF-8 text.");
return nullptr;
}), [](kj::Maybe<Orphan<Token>> param) { return param; })));
parsers.tokenSequence = arena.copy(p::sequence(
commentsAndWhitespace, p::many(p::sequence(token, commentsAndWhitespace))));
auto& statementSequence = parsers.statementSequence;
auto& statementEnd = arena.copy(p::oneOf(
transform(p::sequence(p::exactChar<';'>(), docComment),
[this](kj::Maybe<kj::Array<kj::String>>&& comment) -> Orphan<Statement> {
auto result = orphanage.newOrphan<Statement>();
auto builder = result.get();
KJ_IF_MAYBE(c, comment) {
attachDocComment(builder, kj::mv(*c));
}
builder.setLine();
return result;
}),
transform(
p::sequence(p::exactChar<'{'>(), docComment, statementSequence, p::exactChar<'}'>(),
docComment),
[this](kj::Maybe<kj::Array<kj::String>>&& comment,
kj::Array<Orphan<Statement>>&& statements,
kj::Maybe<kj::Array<kj::String>>&& lateComment)
-> Orphan<Statement> {
auto result = orphanage.newOrphan<Statement>();
auto builder = result.get();
KJ_IF_MAYBE(c, comment) {
attachDocComment(builder, kj::mv(*c));
} else KJ_IF_MAYBE(c, lateComment) {
attachDocComment(builder, kj::mv(*c));
}
auto list = builder.initBlock(statements.size());
for (uint i = 0; i < statements.size(); i++) {
list.adoptWithCaveats(i, kj::mv(statements[i]));
}
return result;
})
));
auto& statement = arena.copy(p::transformWithLocation(p::sequence(tokenSequence, statementEnd),
[](Location loc, kj::Array<Orphan<Token>>&& tokens, Orphan<Statement>&& statement) {
auto builder = statement.get();
auto tokensBuilder = builder.initTokens(tokens.size());
for (uint i = 0; i < tokens.size(); i++) {
tokensBuilder.adoptWithCaveats(i, kj::mv(tokens[i]));
}
builder.setStartByte(loc.begin());
builder.setEndByte(loc.end());
return kj::mv(statement);
}));
parsers.statementSequence = arena.copy(sequence(
commentsAndWhitespace, many(sequence(statement, commentsAndWhitespace))));
parsers.token = token;
parsers.statement = statement;
parsers.emptySpace = commentsAndWhitespace;
}
Lexer::~Lexer() noexcept(false) {}
} // namespace compiler
} // namespace capnp