| // Copyright (c) 2013, Kenton Varda <[email protected]> |
| // All rights reserved. |
| // |
| // Redistribution and use in source and binary forms, with or without |
| // modification, are permitted provided that the following conditions are met: |
| // |
| // 1. Redistributions of source code must retain the above copyright notice, this |
| // list of conditions and the following disclaimer. |
| // 2. Redistributions in binary form must reproduce the above copyright notice, |
| // this list of conditions and the following disclaimer in the documentation |
| // and/or other materials provided with the distribution. |
| // |
| // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND |
| // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
| // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
| // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR |
| // ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES |
| // (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
| // LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND |
| // ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
| // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| |
| #include "lexer.h" |
| #include <kj/parse/char.h> |
| #include <kj/debug.h> |
| |
| namespace capnp { |
| namespace compiler { |
| |
| namespace p = kj::parse; |
| |
| bool lex(kj::ArrayPtr<const char> input, LexedStatements::Builder result, |
| const ErrorReporter& errorReporter) { |
| Lexer lexer(Orphanage::getForMessageContaining(result), errorReporter); |
| |
| auto parser = p::sequence(lexer.getParsers().statementSequence, p::endOfInput); |
| |
| Lexer::ParserInput parserInput(input.begin(), input.end()); |
| kj::Maybe<kj::Array<Orphan<Statement>>> parseOutput = parser(parserInput); |
| |
| KJ_IF_MAYBE(output, parseOutput) { |
| auto l = result.initStatements(output->size()); |
| for (uint i = 0; i < output->size(); i++) { |
| l.adoptWithCaveats(i, kj::mv((*output)[i])); |
| } |
| return true; |
| } else { |
| uint32_t best = parserInput.getBest(); |
| errorReporter.addError(best, best, kj::str("Parse error.")); |
| return false; |
| } |
| } |
| |
| bool lex(kj::ArrayPtr<const char> input, LexedTokens::Builder result, |
| const ErrorReporter& errorReporter) { |
| Lexer lexer(Orphanage::getForMessageContaining(result), errorReporter); |
| |
| auto parser = p::sequence(lexer.getParsers().tokenSequence, p::endOfInput); |
| |
| Lexer::ParserInput parserInput(input.begin(), input.end()); |
| kj::Maybe<kj::Array<Orphan<Token>>> parseOutput = parser(parserInput); |
| |
| KJ_IF_MAYBE(output, parseOutput) { |
| auto l = result.initTokens(output->size()); |
| for (uint i = 0; i < output->size(); i++) { |
| l.adoptWithCaveats(i, kj::mv((*output)[i])); |
| } |
| return true; |
| } else { |
| uint32_t best = parserInput.getBest(); |
| errorReporter.addError(best, best, kj::str("Parse error.")); |
| return false; |
| } |
| } |
| |
| namespace { |
| |
| typedef p::Span<uint32_t> Location; |
| |
| Token::Body::Builder initTok(Orphan<Token>& t, const Location& loc) { |
| auto tb = t.get(); |
| tb.setStartByte(loc.begin()); |
| tb.setEndByte(loc.end()); |
| return tb.getBody(); |
| } |
| |
| void buildTokenSequenceList(List<List<Token>>::Builder builder, |
| kj::Array<kj::Array<Orphan<Token>>>&& items) { |
| for (uint i = 0; i < items.size(); i++) { |
| auto& item = items[i]; |
| auto itemBuilder = builder.init(i, item.size()); |
| for (uint j = 0; j < item.size(); j++) { |
| itemBuilder.adoptWithCaveats(j, kj::mv(item[j])); |
| } |
| } |
| } |
| |
| void attachDocComment(Statement::Builder statement, kj::Array<kj::String>&& comment) { |
| size_t size = 0; |
| for (auto& line: comment) { |
| size += line.size() + 1; // include newline |
| } |
| Text::Builder builder = statement.initDocComment(size); |
| char* pos = builder.begin(); |
| for (auto& line: comment) { |
| memcpy(pos, line.begin(), line.size()); |
| pos += line.size(); |
| *pos++ = '\n'; |
| } |
| KJ_ASSERT(pos == builder.end()); |
| } |
| |
| constexpr auto discardComment = |
| sequence(p::exactChar<'#'>(), p::discard(p::many(p::discard(p::anyOfChars("\n").invert()))), |
| p::oneOf(p::exactChar<'\n'>(), p::endOfInput)); |
| constexpr auto saveComment = |
| sequence(p::exactChar<'#'>(), p::discard(p::optional(p::exactChar<' '>())), |
| p::charsToString(p::many(p::anyOfChars("\n").invert())), |
| p::oneOf(p::exactChar<'\n'>(), p::endOfInput)); |
| |
| constexpr auto commentsAndWhitespace = |
| sequence(p::discardWhitespace, |
| p::discard(p::many(sequence(discardComment, p::discardWhitespace)))); |
| |
| constexpr auto discardLineWhitespace = |
| p::discard(p::many(p::discard(p::whitespaceChar.invert().orAny("\r\n").invert()))); |
| constexpr auto newline = p::oneOf( |
| p::exactChar<'\n'>(), |
| sequence(p::exactChar<'\r'>(), p::discard(p::optional(p::exactChar<'\n'>())))); |
| |
| constexpr auto docComment = p::optional(p::sequence( |
| discardLineWhitespace, |
| p::discard(p::optional(newline)), |
| p::oneOrMore(p::sequence(discardLineWhitespace, saveComment)))); |
| // Parses a set of comment lines preceded by at most one newline and with no intervening blank |
| // lines. |
| |
| } // namespace |
| |
| Lexer::Lexer(Orphanage orphanageParam, const ErrorReporter& errorReporterParam) |
| : orphanage(orphanageParam), errorReporter(errorReporterParam) { |
| |
| // Note that because passing an lvalue to a parser constructor uses it by-referencee, it's safe |
| // for us to use parsers.tokenSequence even though we haven't yet constructed it. |
| auto& tokenSequence = parsers.tokenSequence; |
| |
| auto& commaDelimitedList = arena.copy(p::transform( |
| p::sequence(tokenSequence, p::many(p::sequence(p::exactChar<','>(), tokenSequence))), |
| [this](kj::Array<Orphan<Token>>&& first, kj::Array<kj::Array<Orphan<Token>>>&& rest) |
| -> kj::Array<kj::Array<Orphan<Token>>> { |
| if (first == nullptr && rest == nullptr) { |
| // Completely empty list. |
| return nullptr; |
| } else { |
| auto result = kj::heapArrayBuilder<kj::Array<Orphan<Token>>>(rest.size() + 1); |
| result.add(kj::mv(first)); |
| for (auto& item: rest) { |
| result.add(kj::mv(item)); |
| } |
| return result.finish(); |
| } |
| })); |
| |
| auto& token = arena.copy(p::oneOf( |
| p::transformWithLocation(p::identifier, |
| [this](Location loc, kj::String name) -> Orphan<Token> { |
| auto t = orphanage.newOrphan<Token>(); |
| initTok(t, loc).setIdentifier(name); |
| return t; |
| }), |
| p::transformWithLocation(p::doubleQuotedString, |
| [this](Location loc, kj::String text) -> Orphan<Token> { |
| auto t = orphanage.newOrphan<Token>(); |
| initTok(t, loc).setStringLiteral(text); |
| return t; |
| }), |
| p::transformWithLocation(p::integer, |
| [this](Location loc, uint64_t i) -> Orphan<Token> { |
| auto t = orphanage.newOrphan<Token>(); |
| initTok(t, loc).setIntegerLiteral(i); |
| return t; |
| }), |
| p::transformWithLocation(p::number, |
| [this](Location loc, double x) -> Orphan<Token> { |
| auto t = orphanage.newOrphan<Token>(); |
| initTok(t, loc).setFloatLiteral(x); |
| return t; |
| }), |
| p::transformWithLocation( |
| p::charsToString(p::oneOrMore(p::anyOfChars("!$%&*+-./:<=>?@^|~"))), |
| [this](Location loc, kj::String text) -> Orphan<Token> { |
| auto t = orphanage.newOrphan<Token>(); |
| initTok(t, loc).setOperator(text); |
| return t; |
| }), |
| p::transformWithLocation( |
| sequence(p::exactChar<'('>(), commaDelimitedList, p::exactChar<')'>()), |
| [this](Location loc, kj::Array<kj::Array<Orphan<Token>>>&& items) -> Orphan<Token> { |
| auto t = orphanage.newOrphan<Token>(); |
| buildTokenSequenceList( |
| initTok(t, loc).initParenthesizedList(items.size()), kj::mv(items)); |
| return t; |
| }), |
| p::transformWithLocation( |
| sequence(p::exactChar<'['>(), commaDelimitedList, p::exactChar<']'>()), |
| [this](Location loc, kj::Array<kj::Array<Orphan<Token>>>&& items) -> Orphan<Token> { |
| auto t = orphanage.newOrphan<Token>(); |
| buildTokenSequenceList( |
| initTok(t, loc).initBracketedList(items.size()), kj::mv(items)); |
| return t; |
| }) |
| )); |
| parsers.tokenSequence = arena.copy(p::sequence( |
| commentsAndWhitespace, p::many(p::sequence(token, commentsAndWhitespace)))); |
| |
| auto& statementSequence = parsers.statementSequence; |
| |
| auto& statementEnd = arena.copy(p::oneOf( |
| transform(p::sequence(p::exactChar<';'>(), docComment), |
| [this](kj::Maybe<kj::Array<kj::String>>&& comment) -> Orphan<Statement> { |
| auto result = orphanage.newOrphan<Statement>(); |
| auto builder = result.get(); |
| KJ_IF_MAYBE(c, comment) { |
| attachDocComment(builder, kj::mv(*c)); |
| } |
| builder.getBlock().setNone(); |
| return result; |
| }), |
| transform( |
| p::sequence(p::exactChar<'{'>(), docComment, statementSequence, p::exactChar<'}'>(), |
| docComment), |
| [this](kj::Maybe<kj::Array<kj::String>>&& comment, |
| kj::Array<Orphan<Statement>>&& statements, |
| kj::Maybe<kj::Array<kj::String>>&& lateComment) |
| -> Orphan<Statement> { |
| auto result = orphanage.newOrphan<Statement>(); |
| auto builder = result.get(); |
| KJ_IF_MAYBE(c, comment) { |
| attachDocComment(builder, kj::mv(*c)); |
| } else KJ_IF_MAYBE(c, lateComment) { |
| attachDocComment(builder, kj::mv(*c)); |
| } |
| auto list = builder.getBlock().initStatements(statements.size()); |
| for (uint i = 0; i < statements.size(); i++) { |
| list.adoptWithCaveats(i, kj::mv(statements[i])); |
| } |
| return result; |
| }) |
| )); |
| |
| auto& statement = arena.copy(p::transformWithLocation(p::sequence(tokenSequence, statementEnd), |
| [this](Location loc, kj::Array<Orphan<Token>>&& tokens, Orphan<Statement>&& statement) { |
| auto builder = statement.get(); |
| auto tokensBuilder = builder.initTokens(tokens.size()); |
| for (uint i = 0; i < tokens.size(); i++) { |
| tokensBuilder.adoptWithCaveats(i, kj::mv(tokens[i])); |
| } |
| builder.setStartByte(loc.begin()); |
| builder.setEndByte(loc.end()); |
| return kj::mv(statement); |
| })); |
| |
| parsers.statementSequence = arena.copy(sequence( |
| commentsAndWhitespace, many(sequence(statement, commentsAndWhitespace)))); |
| |
| parsers.token = token; |
| parsers.statement = statement; |
| parsers.emptySpace = commentsAndWhitespace; |
| } |
| |
| Lexer::~Lexer() {} |
| |
| } // namespace compiler |
| } // namespace capnp |