blob: 56932351676469986dd6d1b16ec4484b0a9bd75b [file] [log] [blame]
// Copyright (c) 2013, Kenton Varda <[email protected]>
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// 1. Redistributions of source code must retain the above copyright notice, this
// list of conditions and the following disclaimer.
// 2. Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
// ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "lexer.h"
#include <kj/parse/char.h>
#include <kj/debug.h>
namespace capnp {
namespace compiler {
namespace p = kj::parse;
bool lex(kj::ArrayPtr<const char> input, LexedStatements::Builder result,
const ErrorReporter& errorReporter) {
Lexer lexer(Orphanage::getForMessageContaining(result), errorReporter);
auto parser = p::sequence(lexer.getParsers().statementSequence, p::endOfInput);
Lexer::ParserInput parserInput(input.begin(), input.end());
kj::Maybe<kj::Array<Orphan<Statement>>> parseOutput = parser(parserInput);
KJ_IF_MAYBE(output, parseOutput) {
auto l = result.initStatements(output->size());
for (uint i = 0; i < output->size(); i++) {
l.adoptWithCaveats(i, kj::mv((*output)[i]));
}
return true;
} else {
uint32_t best = parserInput.getBest();
errorReporter.addError(best, best, kj::str("Parse error."));
return false;
}
}
bool lex(kj::ArrayPtr<const char> input, LexedTokens::Builder result,
const ErrorReporter& errorReporter) {
Lexer lexer(Orphanage::getForMessageContaining(result), errorReporter);
auto parser = p::sequence(lexer.getParsers().tokenSequence, p::endOfInput);
Lexer::ParserInput parserInput(input.begin(), input.end());
kj::Maybe<kj::Array<Orphan<Token>>> parseOutput = parser(parserInput);
KJ_IF_MAYBE(output, parseOutput) {
auto l = result.initTokens(output->size());
for (uint i = 0; i < output->size(); i++) {
l.adoptWithCaveats(i, kj::mv((*output)[i]));
}
return true;
} else {
uint32_t best = parserInput.getBest();
errorReporter.addError(best, best, kj::str("Parse error."));
return false;
}
}
namespace {
typedef p::Span<uint32_t> Location;
Token::Body::Builder initTok(Orphan<Token>& t, const Location& loc) {
auto tb = t.get();
tb.setStartByte(loc.begin());
tb.setEndByte(loc.end());
return tb.getBody();
}
void buildTokenSequenceList(List<List<Token>>::Builder builder,
kj::Array<kj::Array<Orphan<Token>>>&& items) {
for (uint i = 0; i < items.size(); i++) {
auto& item = items[i];
auto itemBuilder = builder.init(i, item.size());
for (uint j = 0; j < item.size(); j++) {
itemBuilder.adoptWithCaveats(j, kj::mv(item[j]));
}
}
}
void attachDocComment(Statement::Builder statement, kj::Array<kj::String>&& comment) {
size_t size = 0;
for (auto& line: comment) {
size += line.size() + 1; // include newline
}
Text::Builder builder = statement.initDocComment(size);
char* pos = builder.begin();
for (auto& line: comment) {
memcpy(pos, line.begin(), line.size());
pos += line.size();
*pos++ = '\n';
}
KJ_ASSERT(pos == builder.end());
}
constexpr auto discardComment =
sequence(p::exactChar<'#'>(), p::discard(p::many(p::discard(p::anyOfChars("\n").invert()))),
p::oneOf(p::exactChar<'\n'>(), p::endOfInput));
constexpr auto saveComment =
sequence(p::exactChar<'#'>(), p::discard(p::optional(p::exactChar<' '>())),
p::charsToString(p::many(p::anyOfChars("\n").invert())),
p::oneOf(p::exactChar<'\n'>(), p::endOfInput));
constexpr auto commentsAndWhitespace =
sequence(p::discardWhitespace,
p::discard(p::many(sequence(discardComment, p::discardWhitespace))));
constexpr auto discardLineWhitespace =
p::discard(p::many(p::discard(p::whitespaceChar.invert().orAny("\r\n").invert())));
constexpr auto newline = p::oneOf(
p::exactChar<'\n'>(),
sequence(p::exactChar<'\r'>(), p::discard(p::optional(p::exactChar<'\n'>()))));
constexpr auto docComment = p::optional(p::sequence(
discardLineWhitespace,
p::discard(p::optional(newline)),
p::oneOrMore(p::sequence(discardLineWhitespace, saveComment))));
// Parses a set of comment lines preceded by at most one newline and with no intervening blank
// lines.
} // namespace
Lexer::Lexer(Orphanage orphanageParam, const ErrorReporter& errorReporterParam)
: orphanage(orphanageParam), errorReporter(errorReporterParam) {
// Note that because passing an lvalue to a parser constructor uses it by-referencee, it's safe
// for us to use parsers.tokenSequence even though we haven't yet constructed it.
auto& tokenSequence = parsers.tokenSequence;
auto& commaDelimitedList = arena.copy(p::transform(
p::sequence(tokenSequence, p::many(p::sequence(p::exactChar<','>(), tokenSequence))),
[this](kj::Array<Orphan<Token>>&& first, kj::Array<kj::Array<Orphan<Token>>>&& rest)
-> kj::Array<kj::Array<Orphan<Token>>> {
if (first == nullptr && rest == nullptr) {
// Completely empty list.
return nullptr;
} else {
auto result = kj::heapArrayBuilder<kj::Array<Orphan<Token>>>(rest.size() + 1);
result.add(kj::mv(first));
for (auto& item: rest) {
result.add(kj::mv(item));
}
return result.finish();
}
}));
auto& token = arena.copy(p::oneOf(
p::transformWithLocation(p::identifier,
[this](Location loc, kj::String name) -> Orphan<Token> {
auto t = orphanage.newOrphan<Token>();
initTok(t, loc).setIdentifier(name);
return t;
}),
p::transformWithLocation(p::doubleQuotedString,
[this](Location loc, kj::String text) -> Orphan<Token> {
auto t = orphanage.newOrphan<Token>();
initTok(t, loc).setStringLiteral(text);
return t;
}),
p::transformWithLocation(p::integer,
[this](Location loc, uint64_t i) -> Orphan<Token> {
auto t = orphanage.newOrphan<Token>();
initTok(t, loc).setIntegerLiteral(i);
return t;
}),
p::transformWithLocation(p::number,
[this](Location loc, double x) -> Orphan<Token> {
auto t = orphanage.newOrphan<Token>();
initTok(t, loc).setFloatLiteral(x);
return t;
}),
p::transformWithLocation(
p::charsToString(p::oneOrMore(p::anyOfChars("!$%&*+-./:<=>?@^|~"))),
[this](Location loc, kj::String text) -> Orphan<Token> {
auto t = orphanage.newOrphan<Token>();
initTok(t, loc).setOperator(text);
return t;
}),
p::transformWithLocation(
sequence(p::exactChar<'('>(), commaDelimitedList, p::exactChar<')'>()),
[this](Location loc, kj::Array<kj::Array<Orphan<Token>>>&& items) -> Orphan<Token> {
auto t = orphanage.newOrphan<Token>();
buildTokenSequenceList(
initTok(t, loc).initParenthesizedList(items.size()), kj::mv(items));
return t;
}),
p::transformWithLocation(
sequence(p::exactChar<'['>(), commaDelimitedList, p::exactChar<']'>()),
[this](Location loc, kj::Array<kj::Array<Orphan<Token>>>&& items) -> Orphan<Token> {
auto t = orphanage.newOrphan<Token>();
buildTokenSequenceList(
initTok(t, loc).initBracketedList(items.size()), kj::mv(items));
return t;
})
));
parsers.tokenSequence = arena.copy(p::sequence(
commentsAndWhitespace, p::many(p::sequence(token, commentsAndWhitespace))));
auto& statementSequence = parsers.statementSequence;
auto& statementEnd = arena.copy(p::oneOf(
transform(p::sequence(p::exactChar<';'>(), docComment),
[this](kj::Maybe<kj::Array<kj::String>>&& comment) -> Orphan<Statement> {
auto result = orphanage.newOrphan<Statement>();
auto builder = result.get();
KJ_IF_MAYBE(c, comment) {
attachDocComment(builder, kj::mv(*c));
}
builder.getBlock().setNone();
return result;
}),
transform(
p::sequence(p::exactChar<'{'>(), docComment, statementSequence, p::exactChar<'}'>(),
docComment),
[this](kj::Maybe<kj::Array<kj::String>>&& comment,
kj::Array<Orphan<Statement>>&& statements,
kj::Maybe<kj::Array<kj::String>>&& lateComment)
-> Orphan<Statement> {
auto result = orphanage.newOrphan<Statement>();
auto builder = result.get();
KJ_IF_MAYBE(c, comment) {
attachDocComment(builder, kj::mv(*c));
} else KJ_IF_MAYBE(c, lateComment) {
attachDocComment(builder, kj::mv(*c));
}
auto list = builder.getBlock().initStatements(statements.size());
for (uint i = 0; i < statements.size(); i++) {
list.adoptWithCaveats(i, kj::mv(statements[i]));
}
return result;
})
));
auto& statement = arena.copy(p::transformWithLocation(p::sequence(tokenSequence, statementEnd),
[this](Location loc, kj::Array<Orphan<Token>>&& tokens, Orphan<Statement>&& statement) {
auto builder = statement.get();
auto tokensBuilder = builder.initTokens(tokens.size());
for (uint i = 0; i < tokens.size(); i++) {
tokensBuilder.adoptWithCaveats(i, kj::mv(tokens[i]));
}
builder.setStartByte(loc.begin());
builder.setEndByte(loc.end());
return kj::mv(statement);
}));
parsers.statementSequence = arena.copy(sequence(
commentsAndWhitespace, many(sequence(statement, commentsAndWhitespace))));
parsers.token = token;
parsers.statement = statement;
parsers.emptySpace = commentsAndWhitespace;
}
Lexer::~Lexer() {}
} // namespace compiler
} // namespace capnp