mirror of
https://github.com/intel/llvm.git
synced 2026-01-27 14:50:42 +08:00
Semi-affine maps and address spaces are not yet supported (someone want to take this on?). We also don't generate IR objects for types yet, which I plan to tackle next. PiperOrigin-RevId: 201754283
524 lines
15 KiB
C++
524 lines
15 KiB
C++
//===- Parser.cpp - MLIR Parser Implementation ----------------------------===//
|
|
//
|
|
// Copyright 2019 The MLIR Authors.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
// =============================================================================
|
|
//
|
|
// This file implements the parser for the MLIR textual form.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "mlir/Parser.h"
|
|
#include "Lexer.h"
|
|
#include "mlir/IR/Module.h"
|
|
#include "llvm/Support/SourceMgr.h"
|
|
using namespace mlir;
|
|
using llvm::SourceMgr;
|
|
|
|
namespace {
|
|
/// Simple enum to make code read better. Failure is "true" in a boolean
|
|
/// context.
|
|
enum ParseResult {
|
|
ParseSuccess,
|
|
ParseFailure
|
|
};
|
|
|
|
/// Main parser implementation.
|
|
class Parser {
|
|
public:
|
|
Parser(llvm::SourceMgr &sourceMgr) : lex(sourceMgr), curToken(lex.lexToken()){
|
|
module.reset(new Module());
|
|
}
|
|
|
|
Module *parseModule();
|
|
private:
|
|
// State.
|
|
Lexer lex;
|
|
|
|
// This is the next token that hasn't been consumed yet.
|
|
Token curToken;
|
|
|
|
// This is the result module we are parsing into.
|
|
std::unique_ptr<Module> module;
|
|
|
|
private:
|
|
// Helper methods.
|
|
|
|
/// Emit an error and return failure.
|
|
ParseResult emitError(const Twine &message);
|
|
|
|
/// Advance the current lexer onto the next token.
|
|
void consumeToken() {
|
|
assert(curToken.isNot(Token::eof, Token::error) &&
|
|
"shouldn't advance past EOF or errors");
|
|
curToken = lex.lexToken();
|
|
}
|
|
|
|
/// Advance the current lexer onto the next token, asserting what the expected
|
|
/// current token is. This is preferred to the above method because it leads
|
|
/// to more self-documenting code with better checking.
|
|
void consumeToken(Token::TokenKind kind) {
|
|
assert(curToken.is(kind) && "consumed an unexpected token");
|
|
consumeToken();
|
|
}
|
|
|
|
/// If the current token has the specified kind, consume it and return true.
|
|
/// If not, return false.
|
|
bool consumeIf(Token::TokenKind kind) {
|
|
if (curToken.isNot(kind))
|
|
return false;
|
|
consumeToken(kind);
|
|
return true;
|
|
}
|
|
|
|
ParseResult parseCommaSeparatedList(Token::TokenKind rightToken,
|
|
const std::function<ParseResult()> &parseElement,
|
|
bool allowEmptyList = true);
|
|
|
|
// Type parsing.
|
|
ParseResult parsePrimitiveType();
|
|
ParseResult parseElementType();
|
|
ParseResult parseVectorType();
|
|
ParseResult parseDimensionListRanked(SmallVectorImpl<int> &dimensions);
|
|
ParseResult parseTensorType();
|
|
ParseResult parseMemRefType();
|
|
ParseResult parseFunctionType();
|
|
ParseResult parseType();
|
|
ParseResult parseTypeList();
|
|
|
|
// Top level entity parsing.
|
|
ParseResult parseFunctionSignature(StringRef &name);
|
|
ParseResult parseExtFunc();
|
|
};
|
|
} // end anonymous namespace
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Helper methods.
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
ParseResult Parser::emitError(const Twine &message) {
|
|
// If we hit a parse error in response to a lexer error, then the lexer
|
|
// already emitted an error.
|
|
if (curToken.is(Token::error))
|
|
return ParseFailure;
|
|
|
|
// TODO(clattner): If/when we want to implement a -verify mode, this will need
|
|
// to package up errors into SMDiagnostic and report them.
|
|
lex.getSourceMgr().PrintMessage(curToken.getLoc(), SourceMgr::DK_Error,
|
|
message);
|
|
return ParseFailure;
|
|
}
|
|
|
|
/// Parse a comma-separated list of elements, terminated with an arbitrary
|
|
/// token. This allows empty lists if allowEmptyList is true.
|
|
///
|
|
/// abstract-list ::= rightToken // if allowEmptyList == true
|
|
/// abstract-list ::= element (',' element)* rightToken
|
|
///
|
|
ParseResult Parser::
|
|
parseCommaSeparatedList(Token::TokenKind rightToken,
|
|
const std::function<ParseResult()> &parseElement,
|
|
bool allowEmptyList) {
|
|
// Handle the empty case.
|
|
if (curToken.is(rightToken)) {
|
|
if (!allowEmptyList)
|
|
return emitError("expected list element");
|
|
consumeToken(rightToken);
|
|
return ParseSuccess;
|
|
}
|
|
|
|
// Non-empty case starts with an element.
|
|
if (parseElement())
|
|
return ParseFailure;
|
|
|
|
// Otherwise we have a list of comma separated elements.
|
|
while (consumeIf(Token::comma)) {
|
|
if (parseElement())
|
|
return ParseFailure;
|
|
}
|
|
|
|
// Consume the end character.
|
|
if (!consumeIf(rightToken))
|
|
return emitError("expected ',' or ')'");
|
|
|
|
return ParseSuccess;
|
|
}
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Type Parsing
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
/// Parse the low-level fixed dtypes in the system.
|
|
///
|
|
/// primitive-type
|
|
/// ::= `f16` | `bf16` | `f32` | `f64` // Floating point
|
|
/// | `i1` | `i8` | `i16` | `i32` | `i64` // Sized integers
|
|
/// | `int`
|
|
///
|
|
ParseResult Parser::parsePrimitiveType() {
|
|
// TODO: Build IR objects.
|
|
switch (curToken.getKind()) {
|
|
default: return emitError("expected type");
|
|
case Token::kw_bf16:
|
|
consumeToken(Token::kw_bf16);
|
|
return ParseSuccess;
|
|
case Token::kw_f16:
|
|
consumeToken(Token::kw_f16);
|
|
return ParseSuccess;
|
|
case Token::kw_f32:
|
|
consumeToken(Token::kw_f32);
|
|
return ParseSuccess;
|
|
case Token::kw_f64:
|
|
consumeToken(Token::kw_f64);
|
|
return ParseSuccess;
|
|
case Token::kw_i1:
|
|
consumeToken(Token::kw_i1);
|
|
return ParseSuccess;
|
|
case Token::kw_i16:
|
|
consumeToken(Token::kw_i16);
|
|
return ParseSuccess;
|
|
case Token::kw_i32:
|
|
consumeToken(Token::kw_i32);
|
|
return ParseSuccess;
|
|
case Token::kw_i64:
|
|
consumeToken(Token::kw_i64);
|
|
return ParseSuccess;
|
|
case Token::kw_i8:
|
|
consumeToken(Token::kw_i8);
|
|
return ParseSuccess;
|
|
case Token::kw_int:
|
|
consumeToken(Token::kw_int);
|
|
return ParseSuccess;
|
|
}
|
|
}
|
|
|
|
/// Parse the element type of a tensor or memref type.
|
|
///
|
|
/// element-type ::= primitive-type | vector-type
|
|
///
|
|
ParseResult Parser::parseElementType() {
|
|
if (curToken.is(Token::kw_vector))
|
|
return parseVectorType();
|
|
|
|
return parsePrimitiveType();
|
|
}
|
|
|
|
/// Parse a vector type.
|
|
///
|
|
/// vector-type ::= `vector` `<` const-dimension-list primitive-type `>`
|
|
/// const-dimension-list ::= (integer-literal `x`)+
|
|
///
|
|
ParseResult Parser::parseVectorType() {
|
|
consumeToken(Token::kw_vector);
|
|
|
|
if (!consumeIf(Token::less))
|
|
return emitError("expected '<' in vector type");
|
|
|
|
if (curToken.isNot(Token::integer))
|
|
return emitError("expected dimension size in vector type");
|
|
|
|
SmallVector<unsigned, 4> dimensions;
|
|
while (curToken.is(Token::integer)) {
|
|
// Make sure this integer value is in bound and valid.
|
|
auto dimension = curToken.getUnsignedIntegerValue();
|
|
if (!dimension.hasValue())
|
|
return emitError("invalid dimension in vector type");
|
|
dimensions.push_back(dimension.getValue());
|
|
|
|
consumeToken(Token::integer);
|
|
|
|
// Make sure we have an 'x' or something like 'xbf32'.
|
|
if (curToken.isNot(Token::bare_identifier) ||
|
|
curToken.getSpelling()[0] != 'x')
|
|
return emitError("expected 'x' in vector dimension list");
|
|
|
|
// If we had a prefix of 'x', lex the next token immediately after the 'x'.
|
|
if (curToken.getSpelling().size() != 1)
|
|
lex.resetPointer(curToken.getSpelling().data()+1);
|
|
|
|
// Consume the 'x'.
|
|
consumeToken(Token::bare_identifier);
|
|
}
|
|
|
|
// Parse the element type.
|
|
if (parsePrimitiveType())
|
|
return ParseFailure;
|
|
|
|
if (!consumeIf(Token::greater))
|
|
return emitError("expected '>' in vector type");
|
|
|
|
// TODO: Form IR object.
|
|
|
|
return ParseSuccess;
|
|
}
|
|
|
|
/// Parse a dimension list of a tensor or memref type. This populates the
|
|
/// dimension list, returning -1 for the '?' dimensions.
|
|
///
|
|
/// dimension-list-ranked ::= (dimension `x`)*
|
|
/// dimension ::= `?` | integer-literal
|
|
///
|
|
ParseResult Parser::parseDimensionListRanked(SmallVectorImpl<int> &dimensions) {
|
|
while (curToken.isAny(Token::integer, Token::question)) {
|
|
if (consumeIf(Token::question)) {
|
|
dimensions.push_back(-1);
|
|
} else {
|
|
// Make sure this integer value is in bound and valid.
|
|
auto dimension = curToken.getUnsignedIntegerValue();
|
|
if (!dimension.hasValue() || (int)dimension.getValue() < 0)
|
|
return emitError("invalid dimension");
|
|
dimensions.push_back((int)dimension.getValue());
|
|
consumeToken(Token::integer);
|
|
}
|
|
|
|
// Make sure we have an 'x' or something like 'xbf32'.
|
|
if (curToken.isNot(Token::bare_identifier) ||
|
|
curToken.getSpelling()[0] != 'x')
|
|
return emitError("expected 'x' in dimension list");
|
|
|
|
// If we had a prefix of 'x', lex the next token immediately after the 'x'.
|
|
if (curToken.getSpelling().size() != 1)
|
|
lex.resetPointer(curToken.getSpelling().data()+1);
|
|
|
|
// Consume the 'x'.
|
|
consumeToken(Token::bare_identifier);
|
|
}
|
|
|
|
return ParseSuccess;
|
|
}
|
|
|
|
/// Parse a tensor type.
|
|
///
|
|
/// tensor-type ::= `tensor` `<` dimension-list element-type `>`
|
|
/// dimension-list ::= dimension-list-ranked | `??`
|
|
///
|
|
ParseResult Parser::parseTensorType() {
|
|
consumeToken(Token::kw_tensor);
|
|
|
|
if (!consumeIf(Token::less))
|
|
return emitError("expected '<' in tensor type");
|
|
|
|
bool isUnranked;
|
|
SmallVector<int, 4> dimensions;
|
|
|
|
if (consumeIf(Token::questionquestion)) {
|
|
isUnranked = true;
|
|
} else {
|
|
isUnranked = false;
|
|
if (parseDimensionListRanked(dimensions))
|
|
return ParseFailure;
|
|
}
|
|
|
|
// Parse the element type.
|
|
if (parseElementType())
|
|
return ParseFailure;
|
|
|
|
if (!consumeIf(Token::greater))
|
|
return emitError("expected '>' in tensor type");
|
|
|
|
// TODO: Form IR object.
|
|
|
|
return ParseSuccess;
|
|
}
|
|
|
|
/// Parse a memref type.
|
|
///
|
|
/// memref-type ::= `memref` `<` dimension-list-ranked element-type
|
|
/// (`,` semi-affine-map-composition)? (`,` memory-space)? `>`
|
|
///
|
|
/// semi-affine-map-composition ::= (semi-affine-map `,` )* semi-affine-map
|
|
/// memory-space ::= integer-literal /* | TODO: address-space-id */
|
|
///
|
|
ParseResult Parser::parseMemRefType() {
|
|
consumeToken(Token::kw_memref);
|
|
|
|
if (!consumeIf(Token::less))
|
|
return emitError("expected '<' in memref type");
|
|
|
|
SmallVector<int, 4> dimensions;
|
|
if (parseDimensionListRanked(dimensions))
|
|
return ParseFailure;
|
|
|
|
// Parse the element type.
|
|
if (parseElementType())
|
|
return ParseFailure;
|
|
|
|
// TODO: Parse semi-affine-map-composition.
|
|
// TODO: Parse memory-space.
|
|
|
|
if (!consumeIf(Token::greater))
|
|
return emitError("expected '>' in memref type");
|
|
|
|
// TODO: Form IR object.
|
|
|
|
return ParseSuccess;
|
|
}
|
|
|
|
|
|
|
|
/// Parse a function type.
|
|
///
|
|
/// function-type ::= type-list-parens `->` type-list
|
|
///
|
|
ParseResult Parser::parseFunctionType() {
|
|
assert(curToken.is(Token::l_paren));
|
|
|
|
if (parseTypeList())
|
|
return ParseFailure;
|
|
|
|
if (!consumeIf(Token::arrow))
|
|
return emitError("expected '->' in function type");
|
|
|
|
if (parseTypeList())
|
|
return ParseFailure;
|
|
|
|
// TODO: Build IR object.
|
|
return ParseSuccess;
|
|
}
|
|
|
|
|
|
/// Parse an arbitrary type.
|
|
///
|
|
/// type ::= primitive-type
|
|
/// | vector-type
|
|
/// | tensor-type
|
|
/// | memref-type
|
|
/// | function-type
|
|
/// element-type ::= primitive-type | vector-type
|
|
///
|
|
ParseResult Parser::parseType() {
|
|
switch (curToken.getKind()) {
|
|
case Token::kw_memref: return parseMemRefType();
|
|
case Token::kw_tensor: return parseTensorType();
|
|
case Token::kw_vector: return parseVectorType();
|
|
case Token::l_paren: return parseFunctionType();
|
|
default:
|
|
return parsePrimitiveType();
|
|
}
|
|
}
|
|
|
|
/// Parse a "type list", which is a singular type, or a parenthesized list of
|
|
/// types.
|
|
///
|
|
/// type-list ::= type-list-parens | type
|
|
/// type-list-parens ::= `(` `)`
|
|
/// | `(` type (`,` type)* `)`
|
|
///
|
|
ParseResult Parser::parseTypeList() {
|
|
// If there is no parens, then it must be a singular type.
|
|
if (!consumeIf(Token::l_paren))
|
|
return parseType();
|
|
|
|
if (parseCommaSeparatedList(Token::r_paren,
|
|
[&]() -> ParseResult {
|
|
// TODO: Add to list of IR values we're parsing.
|
|
return parseType();
|
|
})) {
|
|
return ParseFailure;
|
|
}
|
|
|
|
// TODO: Build IR objects.
|
|
return ParseSuccess;
|
|
}
|
|
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Top-level entity parsing.
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
/// Parse a function signature, starting with a name and including the parameter
|
|
/// list.
|
|
///
|
|
/// argument-list ::= type (`,` type)* | /*empty*/
|
|
/// function-signature ::= function-id `(` argument-list `)` (`->` type-list)?
|
|
///
|
|
ParseResult Parser::parseFunctionSignature(StringRef &name) {
|
|
if (curToken.isNot(Token::at_identifier))
|
|
return emitError("expected a function identifier like '@foo'");
|
|
|
|
name = curToken.getSpelling().drop_front();
|
|
consumeToken(Token::at_identifier);
|
|
|
|
if (curToken.isNot(Token::l_paren))
|
|
return emitError("expected '(' in function signature");
|
|
|
|
if (parseTypeList())
|
|
return ParseFailure;
|
|
|
|
// Parse the return type if present.
|
|
if (consumeIf(Token::arrow)) {
|
|
if (parseTypeList())
|
|
return ParseFailure;
|
|
|
|
// TODO: Build IR object.
|
|
}
|
|
|
|
return ParseSuccess;
|
|
}
|
|
|
|
|
|
/// External function declarations.
|
|
///
|
|
/// ext-func ::= `extfunc` function-signature
|
|
///
|
|
ParseResult Parser::parseExtFunc() {
|
|
consumeToken(Token::kw_extfunc);
|
|
|
|
StringRef name;
|
|
if (parseFunctionSignature(name))
|
|
return ParseFailure;
|
|
|
|
|
|
// Okay, the external function definition was parsed correctly.
|
|
module->functionList.push_back(new Function(name));
|
|
return ParseSuccess;
|
|
}
|
|
|
|
|
|
/// This is the top-level module parser.
|
|
Module *Parser::parseModule() {
|
|
while (1) {
|
|
switch (curToken.getKind()) {
|
|
default:
|
|
emitError("expected a top level entity");
|
|
return nullptr;
|
|
|
|
// If we got to the end of the file, then we're done.
|
|
case Token::eof:
|
|
return module.release();
|
|
|
|
// If we got an error token, then the lexer already emitted an error, just
|
|
// stop. Someday we could introduce error recovery if there was demand for
|
|
// it.
|
|
case Token::error:
|
|
return nullptr;
|
|
|
|
case Token::kw_extfunc:
|
|
if (parseExtFunc())
|
|
return nullptr;
|
|
break;
|
|
|
|
// TODO: cfgfunc, mlfunc, affine entity declarations, etc.
|
|
}
|
|
}
|
|
}
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
/// This parses the file specified by the indicated SourceMgr and returns an
|
|
/// MLIR module if it was valid. If not, it emits diagnostics and returns null.
|
|
Module *mlir::parseSourceFile(llvm::SourceMgr &sourceMgr) {
|
|
return Parser(sourceMgr).parseModule();
|
|
}
|