/* This file is part of KDevelop
Copyright 2002-2005 Roberto Raggi <roberto@kdevelop.org>
Copyright 2007-2008 David Nolden <david.nolden.kdevelop@art-master.de>
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public
License version 2 as published by the Free Software Foundation.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Library General Public License for more details.
You should have received a copy of the GNU Library General Public License
along with this library; see the file COPYING.LIB. If not, write to
the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
Boston, MA 02110-1301, USA.
*/
#ifndef LEXER_H
#define LEXER_H
#include "cppparser_export.h"
#include "symbol.h"
#include <QtCore/QString>
#include <cstdlib>
#include "indexedstring.h"
#include "problem.h"
struct NameSymbol;
class Lexer;
class Control;
class ParseSession;
typedef void (Lexer::*scan_fun_ptr)();
/**Token.*/
class CPPPARSER_EXPORT Token
{
public:
///kind of the token @see TOKEN_KIND enum reference.
int kind;
///position in the preprocessed buffer
std::size_t position;
///size of the token in the preprocessed buffer. Do not confuse this with symbolLength.
std::size_t size;
///pointer to the parse session.
const ParseSession* session;
//Symbol associated to the token. This only works if this is a simple symbol
//only consisting of one identifier(not comments), does not work for operators like "->" or numbers like "50"
IndexedString symbol() const;
//This always works, but is expensive
QString symbolString() const;
QByteArray symbolByteArray() const;
uint symbolLength() const;
///@todo adymo: find out what @p right_brace is
union
{
//const NameSymbol *symbol;
std::size_t right_brace;
} extra;
};
/**Stream of tokens found by lexer.
Internally works like an array of @ref Token continuosly allocated.
All tokens are destructed when this stream is deleted.
The stream has a "cursor" which is simply an integer which defines
the offset (index) of the token currently "observed" from the beginning of
the stream.*/
class CPPPARSER_EXPORT TokenStream
{
private:
TokenStream(const TokenStream &);
void operator = (const TokenStream &);
public:
/**Creates a token stream with the default size of 1024 tokens.*/
inline TokenStream(std::size_t size = 1024)
: tokens(0),
index(0),
token_count(0)
{
resize(size);
}
inline ~TokenStream()
{ ::free(tokens); }
/**@return the size of the token stream.*/
inline std::size_t size() const
{ return token_count; }
/**@return the "cursor" - the offset (index) of the token
currently "observed" from the beginning of the stream.*/
inline std::size_t cursor() const
{ return index; }
/**Sets the cursor to the position @p i.*/
inline void rewind(int i)
{ index = i; }
/**Resizes the token stream.*/
void resize(std::size_t size)
{
Q_ASSERT(size > 0);
tokens = (Token*) ::realloc(tokens, sizeof(Token) * size);
token_count = size;
}
/**Updates the cursor position to point to the next token and returns
the cursor.*/
inline std::size_t nextToken()
{ return index++; }
/**@return the kind of the next (LA) token in the stream.*/
inline int lookAhead(std::size_t i = 0) const
{ return tokens[index + i].kind; }
/**@return the kind of the current token in the stream.*/
inline int kind(std::size_t i) const
{ return tokens[i].kind; }
/**@return the position of the current token in the c++ source buffer.*/
inline std::size_t position(std::size_t i) const
{ return tokens[i].position; }
/**@return the name symbol of the current token.*/
//inline const NameSymbol *symbol(std::size_t i) const
//{ return tokens[i].extra.symbol; }
/**@return the position of the matching right brace in the
c++ source buffer.
@todo this doesn't seem to work as the lexer does not provide this
information at the moment.*/
inline std::size_t matchingBrace(std::size_t i) const
{ return tokens[i].extra.right_brace; }
/**@return the token at position @p index.*/
inline Token &operator[](int index)
{ Q_ASSERT(index >= 0 && index < (int)token_count); return tokens[index]; }
/**@return the token at position @p index.*/
inline const Token &token(int index) const
{ return tokens[index]; }
private:
Token *tokens;
std::size_t index;
std::size_t token_count;
private:
friend class Lexer;
};
/**C++ Lexer.*/
class CPPPARSER_EXPORT Lexer
{
public:
/**
* Constructor.
*
* \param token_stream Provides a stream of tokens to the lexer.
* \param location_table a table which will be filled with non-preprocessed line -> offset values
* \param line_table a table which will be filled with (non-preproccessed line which contains a preprocessor line) -> offset values
*/
Lexer(Control *control);
/**Finds tokens in the @p contents buffer and fills the @ref token_stream.*/
void tokenize(ParseSession* session);
ParseSession* session;
private:
void skipComment();
/**Fills the scan table with method pointers.*/
void initialize_scan_table();
void scan_newline();
void scan_white_spaces();
void scan_identifier_or_keyword();
void scan_identifier_or_literal();
void scan_int_constant();
void scan_char_constant();
void scan_string_constant();
void scan_invalid_input();
void scan_preprocessor();
// keywords
void scanKeyword0();
void scanKeyword2();
void scanKeyword3();
void scanKeyword4();
void scanKeyword5();
void scanKeyword6();
void scanKeyword7();
void scanKeyword8();
void scanKeyword9();
void scanKeyword10();
void scanKeyword11();
void scanKeyword12();
void scanKeyword13();
void scanKeyword14();
void scanKeyword16();
// operators
void scan_not();
void scan_remainder();
void scan_and();
void scan_left_paren();
void scan_right_paren();
void scan_star();
void scan_plus();
void scan_comma();
void scan_minus();
void scan_dot();
void scan_divide();
void scan_colon();
void scan_semicolon();
void scan_less();
void scan_equal();
void scan_greater();
void scan_question();
void scan_left_bracket();
void scan_right_bracket();
void scan_xor();
void scan_left_brace();
void scan_or();
void scan_right_brace();
void scan_tilde();
void scan_EOF();
Problem *createProblem() const;
private:
Control *control;
struct SpecialCursor {
bool operator==(uint index) const {
return *current == index;
}
bool operator==(char character) const {
return *current == (character | 0xffff0000);
}
bool isChar() const {
return ((*current) & 0xffff0000) == 0xffff0000;
}
inline char operator*() const {
if(isChar())
return (char)*current;
else
return 'a'; //Return a valid character, because the identifiers created by the preprocessor are alpha-numerical
}
void operator++() {
++current;
}
void operator+=(int offset) {
current += offset;
}
bool operator !=(const SpecialCursor& rhs) const {
return current != rhs.current;
}
bool operator !=(const uint* rhs) const {
return current != rhs;
}
void operator--() {
--current;
}
bool operator<(const uint* end) const {
return current < end;
}
int operator -(const SpecialCursor& rhs) const {
return (((char*)current) - ((char*)rhs.current)) / sizeof(uint);
}
uint offsetIn(const uint* base) const {
return ((char*)current - (char*)base) / sizeof(uint);
}
SpecialCursor operator +(int offset) {
SpecialCursor ret(*this);
ret.current += offset;
return ret;
}
uint* current;
};
SpecialCursor cursor;
const uint* endCursor;
std::size_t index;
bool m_leaveSize; //Marks the current token that its size should not be automatically set
bool m_canMergeComment; //Whether we may append new comments to the last encountered one
bool m_firstInLine; //Whether the next token is the first one in a line
///scan table contains pointers to the methods to scan for various token types
static scan_fun_ptr s_scan_table[];
static scan_fun_ptr s_scan_keyword_table[];
static bool s_initialized;
};
#endif // LEXER_H