| // Copyright (c) 2013-2014 Sandstorm Development Group, Inc. and contributors |
| // Licensed under the MIT License: |
| // |
| // Permission is hereby granted, free of charge, to any person obtaining a copy |
| // of this software and associated documentation files (the "Software"), to deal |
| // in the Software without restriction, including without limitation the rights |
| // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
| // copies of the Software, and to permit persons to whom the Software is |
| // furnished to do so, subject to the following conditions: |
| // |
| // The above copyright notice and this permission notice shall be included in |
| // all copies or substantial portions of the Software. |
| // |
| // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
| // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
| // THE SOFTWARE. |
| |
| #include "lexer.h" |
| #include <kj/parse/char.h> |
| #include <kj/debug.h> |
| |
| namespace capnp { |
| namespace compiler { |
| |
| namespace p = kj::parse; |
| |
| bool lex(kj::ArrayPtr<const char> input, LexedStatements::Builder result, |
| ErrorReporter& errorReporter) { |
| Lexer lexer(Orphanage::getForMessageContaining(result), errorReporter); |
| |
| auto parser = p::sequence(lexer.getParsers().statementSequence, p::endOfInput); |
| |
| Lexer::ParserInput parserInput(input.begin(), input.end()); |
| kj::Maybe<kj::Array<Orphan<Statement>>> parseOutput = parser(parserInput); |
| |
| KJ_IF_MAYBE(output, parseOutput) { |
| auto l = result.initStatements(output->size()); |
| for (uint i = 0; i < output->size(); i++) { |
| l.adoptWithCaveats(i, kj::mv((*output)[i])); |
| } |
| return true; |
| } else { |
| uint32_t best = parserInput.getBest(); |
| errorReporter.addError(best, best, kj::str("Parse error.")); |
| return false; |
| } |
| } |
| |
| bool lex(kj::ArrayPtr<const char> input, LexedTokens::Builder result, |
| ErrorReporter& errorReporter) { |
| Lexer lexer(Orphanage::getForMessageContaining(result), errorReporter); |
| |
| auto parser = p::sequence(lexer.getParsers().tokenSequence, p::endOfInput); |
| |
| Lexer::ParserInput parserInput(input.begin(), input.end()); |
| kj::Maybe<kj::Array<Orphan<Token>>> parseOutput = parser(parserInput); |
| |
| KJ_IF_MAYBE(output, parseOutput) { |
| auto l = result.initTokens(output->size()); |
| for (uint i = 0; i < output->size(); i++) { |
| l.adoptWithCaveats(i, kj::mv((*output)[i])); |
| } |
| return true; |
| } else { |
| uint32_t best = parserInput.getBest(); |
| errorReporter.addError(best, best, kj::str("Parse error.")); |
| return false; |
| } |
| } |
| |
| namespace { |
| |
| typedef p::Span<uint32_t> Location; |
| |
| Token::Builder initTok(Orphan<Token>& t, const Location& loc) { |
| auto builder = t.get(); |
| builder.setStartByte(loc.begin()); |
| builder.setEndByte(loc.end()); |
| return builder; |
| } |
| |
| void buildTokenSequenceList(List<List<Token>>::Builder builder, |
| kj::Array<kj::Array<Orphan<Token>>>&& items) { |
| for (uint i = 0; i < items.size(); i++) { |
| auto& item = items[i]; |
| auto itemBuilder = builder.init(i, item.size()); |
| for (uint j = 0; j < item.size(); j++) { |
| itemBuilder.adoptWithCaveats(j, kj::mv(item[j])); |
| } |
| } |
| } |
| |
| void attachDocComment(Statement::Builder statement, kj::Array<kj::String>&& comment) { |
| size_t size = 0; |
| for (auto& line: comment) { |
| size += line.size() + 1; // include newline |
| } |
| Text::Builder builder = statement.initDocComment(size); |
| char* pos = builder.begin(); |
| for (auto& line: comment) { |
| memcpy(pos, line.begin(), line.size()); |
| pos += line.size(); |
| *pos++ = '\n'; |
| } |
| KJ_ASSERT(pos == builder.end()); |
| } |
| |
| constexpr auto discardComment = |
| sequence(p::exactChar<'#'>(), p::discard(p::many(p::discard(p::anyOfChars("\n").invert()))), |
| p::oneOf(p::exactChar<'\n'>(), p::endOfInput)); |
| constexpr auto saveComment = |
| sequence(p::exactChar<'#'>(), p::discard(p::optional(p::exactChar<' '>())), |
| p::charsToString(p::many(p::anyOfChars("\n").invert())), |
| p::oneOf(p::exactChar<'\n'>(), p::endOfInput)); |
| |
| constexpr auto utf8Bom = |
| sequence(p::exactChar<'\xef'>(), p::exactChar<'\xbb'>(), p::exactChar<'\xbf'>()); |
| |
| constexpr auto bomsAndWhitespace = |
| sequence(p::discardWhitespace, |
| p::discard(p::many(sequence(utf8Bom, p::discardWhitespace)))); |
| |
| constexpr auto commentsAndWhitespace = |
| sequence(bomsAndWhitespace, |
| p::discard(p::many(sequence(discardComment, bomsAndWhitespace)))); |
| |
| constexpr auto discardLineWhitespace = |
| p::discard(p::many(p::discard(p::whitespaceChar.invert().orAny("\r\n").invert()))); |
| constexpr auto newline = p::oneOf( |
| p::exactChar<'\n'>(), |
| sequence(p::exactChar<'\r'>(), p::discard(p::optional(p::exactChar<'\n'>())))); |
| |
| constexpr auto docComment = p::optional(p::sequence( |
| discardLineWhitespace, |
| p::discard(p::optional(newline)), |
| p::oneOrMore(p::sequence(discardLineWhitespace, saveComment)))); |
| // Parses a set of comment lines preceded by at most one newline and with no intervening blank |
| // lines. |
| |
| } // namespace |
| |
| Lexer::Lexer(Orphanage orphanageParam, ErrorReporter& errorReporter) |
| : orphanage(orphanageParam) { |
| |
| // Note that because passing an lvalue to a parser constructor uses it by-referencee, it's safe |
| // for us to use parsers.tokenSequence even though we haven't yet constructed it. |
| auto& tokenSequence = parsers.tokenSequence; |
| |
| auto& commaDelimitedList = arena.copy(p::transform( |
| p::sequence(tokenSequence, p::many(p::sequence(p::exactChar<','>(), tokenSequence))), |
| [](kj::Array<Orphan<Token>>&& first, kj::Array<kj::Array<Orphan<Token>>>&& rest) |
| -> kj::Array<kj::Array<Orphan<Token>>> { |
| if (first == nullptr && rest == nullptr) { |
| // Completely empty list. |
| return nullptr; |
| } else { |
| uint restSize = rest.size(); |
| if (restSize > 0 && rest[restSize - 1] == nullptr) { |
| // Allow for trailing commas by shortening the list by one item if the final token is |
| // nullptr |
| restSize--; |
| } |
| auto result = kj::heapArrayBuilder<kj::Array<Orphan<Token>>>(1 + restSize); // first+rest |
| result.add(kj::mv(first)); |
| for (uint i = 0; i < restSize ; i++) { |
| result.add(kj::mv(rest[i])); |
| } |
| return result.finish(); |
| } |
| })); |
| |
| auto& token = arena.copy(p::oneOf( |
| p::transformWithLocation(p::identifier, |
| [this](Location loc, kj::String name) -> Orphan<Token> { |
| auto t = orphanage.newOrphan<Token>(); |
| initTok(t, loc).setIdentifier(name); |
| return t; |
| }), |
| p::transformWithLocation(p::doubleQuotedString, |
| [this](Location loc, kj::String text) -> Orphan<Token> { |
| auto t = orphanage.newOrphan<Token>(); |
| initTok(t, loc).setStringLiteral(text); |
| return t; |
| }), |
| p::transformWithLocation(p::doubleQuotedHexBinary, |
| [this](Location loc, kj::Array<byte> data) -> Orphan<Token> { |
| auto t = orphanage.newOrphan<Token>(); |
| initTok(t, loc).setBinaryLiteral(data); |
| return t; |
| }), |
| p::transformWithLocation(p::integer, |
| [this](Location loc, uint64_t i) -> Orphan<Token> { |
| auto t = orphanage.newOrphan<Token>(); |
| initTok(t, loc).setIntegerLiteral(i); |
| return t; |
| }), |
| p::transformWithLocation(p::number, |
| [this](Location loc, double x) -> Orphan<Token> { |
| auto t = orphanage.newOrphan<Token>(); |
| initTok(t, loc).setFloatLiteral(x); |
| return t; |
| }), |
| p::transformWithLocation( |
| p::charsToString(p::oneOrMore(p::anyOfChars("!$%&*+-./:<=>?@^|~"))), |
| [this](Location loc, kj::String text) -> Orphan<Token> { |
| auto t = orphanage.newOrphan<Token>(); |
| initTok(t, loc).setOperator(text); |
| return t; |
| }), |
| p::transformWithLocation( |
| sequence(p::exactChar<'('>(), commaDelimitedList, p::exactChar<')'>()), |
| [this](Location loc, kj::Array<kj::Array<Orphan<Token>>>&& items) -> Orphan<Token> { |
| auto t = orphanage.newOrphan<Token>(); |
| buildTokenSequenceList( |
| initTok(t, loc).initParenthesizedList(items.size()), kj::mv(items)); |
| return t; |
| }), |
| p::transformWithLocation( |
| sequence(p::exactChar<'['>(), commaDelimitedList, p::exactChar<']'>()), |
| [this](Location loc, kj::Array<kj::Array<Orphan<Token>>>&& items) -> Orphan<Token> { |
| auto t = orphanage.newOrphan<Token>(); |
| buildTokenSequenceList( |
| initTok(t, loc).initBracketedList(items.size()), kj::mv(items)); |
| return t; |
| }), |
| p::transformOrReject(p::transformWithLocation( |
| p::oneOf(sequence(p::exactChar<'\xff'>(), p::exactChar<'\xfe'>()), |
| sequence(p::exactChar<'\xfe'>(), p::exactChar<'\xff'>()), |
| sequence(p::exactChar<'\x00'>())), |
| [&errorReporter](Location loc) -> kj::Maybe<Orphan<Token>> { |
| errorReporter.addError(loc.begin(), loc.end(), |
| "Non-UTF-8 input detected. Cap'n Proto schema files must be UTF-8 text."); |
| return nullptr; |
| }), [](kj::Maybe<Orphan<Token>> param) { return param; }))); |
| parsers.tokenSequence = arena.copy(p::sequence( |
| commentsAndWhitespace, p::many(p::sequence(token, commentsAndWhitespace)))); |
| |
| auto& statementSequence = parsers.statementSequence; |
| |
| auto& statementEnd = arena.copy(p::oneOf( |
| transform(p::sequence(p::exactChar<';'>(), docComment), |
| [this](kj::Maybe<kj::Array<kj::String>>&& comment) -> Orphan<Statement> { |
| auto result = orphanage.newOrphan<Statement>(); |
| auto builder = result.get(); |
| KJ_IF_MAYBE(c, comment) { |
| attachDocComment(builder, kj::mv(*c)); |
| } |
| builder.setLine(); |
| return result; |
| }), |
| transform( |
| p::sequence(p::exactChar<'{'>(), docComment, statementSequence, p::exactChar<'}'>(), |
| docComment), |
| [this](kj::Maybe<kj::Array<kj::String>>&& comment, |
| kj::Array<Orphan<Statement>>&& statements, |
| kj::Maybe<kj::Array<kj::String>>&& lateComment) |
| -> Orphan<Statement> { |
| auto result = orphanage.newOrphan<Statement>(); |
| auto builder = result.get(); |
| KJ_IF_MAYBE(c, comment) { |
| attachDocComment(builder, kj::mv(*c)); |
| } else KJ_IF_MAYBE(c, lateComment) { |
| attachDocComment(builder, kj::mv(*c)); |
| } |
| auto list = builder.initBlock(statements.size()); |
| for (uint i = 0; i < statements.size(); i++) { |
| list.adoptWithCaveats(i, kj::mv(statements[i])); |
| } |
| return result; |
| }) |
| )); |
| |
| auto& statement = arena.copy(p::transformWithLocation(p::sequence(tokenSequence, statementEnd), |
| [](Location loc, kj::Array<Orphan<Token>>&& tokens, Orphan<Statement>&& statement) { |
| auto builder = statement.get(); |
| auto tokensBuilder = builder.initTokens(tokens.size()); |
| for (uint i = 0; i < tokens.size(); i++) { |
| tokensBuilder.adoptWithCaveats(i, kj::mv(tokens[i])); |
| } |
| builder.setStartByte(loc.begin()); |
| builder.setEndByte(loc.end()); |
| return kj::mv(statement); |
| })); |
| |
| parsers.statementSequence = arena.copy(sequence( |
| commentsAndWhitespace, many(sequence(statement, commentsAndWhitespace)))); |
| |
| parsers.token = token; |
| parsers.statement = statement; |
| parsers.emptySpace = commentsAndWhitespace; |
| } |
| |
| Lexer::~Lexer() noexcept(false) {} |
| |
| } // namespace compiler |
| } // namespace capnp |