LSToken.h
// LSToken.h
//
// Author David Barrett-Lennard
// (C)opyright Cedanet Pty Ltd 2006
#pragma once
#ifndef Ceda_cxMacroExpander_LSToken_H
#define Ceda_cxMacroExpander_LSToken_H
#include "cxMacroExpander.h"
#include "Ceda/cxUtils/xstring.h"
#include "Ceda/cxUtils/xostream.h"
#include "Ceda/cxUtils/IException.h"
#include "Ceda/cxUtils/SubString.h"
namespace ceda
{
cxMacroExpander_API void UnstringifyQuotedString(SubString s, xostream& os);
///////////////////////////////////////////////////////////////////////////////////////////////////
// EToken
enum EToken
{
TOKEN_UNDEF = -1, // Internal use only!
TOKEN_END = 10000, // Indicates end of file
TOKEN_COMMENT,
TOKEN_PREPROC_COMMENT,
TOKEN_INTEGER, // 100 23
TOKEN_HEX_INTEGER, // 0xFE01
TOKEN_FLOAT, // 100.03 4.0E6 10.2
TOKEN_SINGLE_QUOTED_STRING, // 'This is a string\n'
TOKEN_DOUBLE_QUOTED_STRING, // "This is a string\n"
TOKEN_IDENTIFIER, // x1
TOKEN_LESS, // <
TOKEN_GREATER, // >
TOKEN_AMPERSAND, // &
TOKEN_VLINE, // |
TOKEN_CARET, // ^
TOKEN_LESS_EQUAL, // <=
TOKEN_GREATER_EQUAL, // >=
TOKEN_AMPERSAND_EQUAL, // &=
TOKEN_VLINE_EQUAL, // |=
TOKEN_CARET_EQUAL, // ^=
TOKEN_DBL_LESS, // <<
TOKEN_DBL_GREATER, // >>
TOKEN_DBL_AMPERSAND, // &&
TOKEN_DBL_VLINE, // ||
TOKEN_DBL_CARET, // ^^
TOKEN_DBL_LESS_EQUAL, // <<=
TOKEN_DBL_GREATER_EQUAL, // >>=
TOKEN_DBL_AMPERSAND_EQUAL, // &&=
TOKEN_DBL_VLINE_EQUAL, // ||=
TOKEN_DBL_CARET_EQUAL, // ^^=
TOKEN_PLUS, // +
TOKEN_MINUS, // -
TOKEN_ASTERISK, // *
TOKEN_FORWARD_SLASH, // /
TOKEN_PERCENT, // %
TOKEN_EXCLAM, // !
TOKEN_EQUAL, // =
TOKEN_QUESTION, // ?
TOKEN_DBL_PLUS, // ++
TOKEN_DBL_MINUS, // --
TOKEN_PLUS_EQUAL, // +=
TOKEN_MINUS_EQUAL, // -=
TOKEN_ASTERISK_EQUAL, // *=
TOKEN_SLASH_EQUAL, // /=
TOKEN_PERCENT_EQUAL, // %=
TOKEN_EXCLAM_EQUAL, // !=
TOKEN_DBL_EQUAL, // ==
TOKEN_QUESTION_EQUAL, // ?=
TOKEN_MINUS_GREATER, // ->
TOKEN_COLON, // :
TOKEN_COLON_COLON, // ::
TOKEN_DOLLAR, // $
TOKEN_DOLLAR_DOLLAR, // $$
TOKEN_DOT, // .
TOKEN_COMMA, // ,
TOKEN_SEMICOLON, // ;
TOKEN_LEFT_PAREN, // (
TOKEN_RIGHT_PAREN, // )
TOKEN_LEFT_CURLY, // {
TOKEN_RIGHT_CURLY, // }
TOKEN_LEFT_SQUARE, // [
TOKEN_RIGHT_SQUARE, // ]
TOKEN_TILDE, // ~
TOKEN_AT, // @
TOKEN_BACK_SLASH, // '\'
TOKEN_BACK_QUOTE, // '`'
TOKEN_HASH, // #
};
///////////////////////////////////////////////////////////////////////////////////////////////////
// LSToken
class cxMacroExpander_API LSToken
{
public:
LSToken(EToken type = TOKEN_UNDEF) : m_type(type) {}
LSToken(EToken type, SubString s) : m_type(type), m_str(s) {}
LSToken(EToken type, const xchar* p1, const xchar* p2) : m_type(type), m_str(p1,p2) {}
bool operator==(const LSToken& rhs) const { return m_type == rhs.m_type && m_str == rhs.m_str; }
bool operator!=(const LSToken& rhs) const { return m_type != rhs.m_type || m_str != rhs.m_str; }
EToken GetType() const { return m_type; }
bool operator==(EToken type) const { return m_type == type; }
bool operator!=(EToken type) const { return m_type != type; }
bool operator==(ConstStringZ s) const { return m_str == s; }
bool operator!=(ConstStringZ s) const { return m_str != s; }
xstring GetTokenString() const { return m_str.GetString(); }
void GetLiteralString(xstring& str) const;
xchar GetLiteralCharacter() const;
int32 GetLiteralInt32() const;
int64 GetLiteralInt64() const;
int32 GetLiteralHexInt32() const;
int64 GetLiteralHexInt64() const;
double GetLiteralDouble() const;
explicit operator bool() const { return m_type != TOKEN_END; }
public:
EToken m_type;
SubString m_str;
};
cxMacroExpander_API xostream& operator<<(xostream& os, const LSToken& x);
///////////////////////////////////////////////////////////////////////////////////////////////////
// LexScannerException
struct cxMacroExpander_API LexScannerException : public IException
{
LexScannerException(const xstring& description, const xchar* pos) :
m_description(description),
m_lineNumber(-1),
m_pos(pos)
{
//cxAssert(m_pos);
}
LexScannerException(const xstring& description, SubString s) :
m_description(description),
m_lineNumber(-1),
m_pos(s.ptr())
{
//cxAssert(m_pos);
}
LexScannerException(const xstring& description, LSToken t) :
m_description(description),
m_lineNumber(-1),
m_pos(t.m_str.ptr())
{
//cxAssert(m_pos);
}
virtual ConstStringZ what() const
{
return m_description.c_str();
}
void Write(xostream& os) const;
xstring m_description;
xstring m_path;
ssize_t m_lineNumber;
const xchar* m_pos;
};
///////////////////////////////////////////////////////////////////////////////////////////////////
// Scan functions on SubString
// Helper functions for scanning a token
cxMacroExpander_API EToken LsScanNumber(SubString& s);
cxMacroExpander_API bool LsScanEscapeCharacter(SubString& s);
cxMacroExpander_API bool LsScanLiteralCharacter(SubString& s);
cxMacroExpander_API bool LsScanLiteralString(SubString& s, xchar delimiter, ConstStringZ name);
// Scan the next token
cxMacroExpander_API LSToken LsScanToken(SubString& s, bool returnComments = false);
// Scan the given next token or else throw a ScannedBadTokenException exception
cxMacroExpander_API void ReadGivenToken(SubString& s, EToken type, ConstStringZ desc);
// Returns the width of the white space assumimg tabs are worth 4 space characters
cxMacroExpander_API ssize_t ScanBackwardsThroughSpacesAndTabs(SubString& s);
cxMacroExpander_API LSToken ScanArgument(SubString& s, SubString& arg, EToken rightBracket = TOKEN_RIGHT_PAREN);
// Read a bracketted section (......). The section may contain nested bracketted sections.
// Brackets are counted in order to determine the end of the section
//
// The returned section is depicted as follows
//
// ( ................... )
// ^ ^
// p1 p2
//
cxMacroExpander_API SubString ScanBrackettedSection(SubString& s, bool readInitialBracket, EToken leftBracket, EToken rightBracket);
inline SubString ScanRndBrackettedSection(SubString& s, bool readInitialBracket)
{
return ScanBrackettedSection(s,readInitialBracket,TOKEN_LEFT_PAREN,TOKEN_RIGHT_PAREN);
}
inline SubString ScanSquareBrackettedSection(SubString& s, bool readInitialBracket)
{
return ScanBrackettedSection(s,readInitialBracket,TOKEN_LEFT_SQUARE,TOKEN_RIGHT_SQUARE);
}
inline SubString ScanCurlyBrackettedSection(SubString& s, bool readInitialBracket)
{
return ScanBrackettedSection(s,readInitialBracket,TOKEN_LEFT_CURLY,TOKEN_RIGHT_CURLY);
}
// Scan a bracketted string (typically in curly or round brackets). The returned substring doesn't
// include the outer brackets. The indentation of the string is returned. This is -1 if
// the string was not a line based 'block' of text.
cxMacroExpander_API SubString ScanStringBlock(SubString& s, ssize_t& indentation,
EToken leftBracket, EToken rightBracket, ConstStringZ leftBracketName, ConstStringZ rightBracketName,
bool requireBrackets);
cxMacroExpander_API SubString ScanSubstString(SubString& s, ssize_t& indentation);
cxMacroExpander_API SubString ScanRndBrackettedString(SubString& s, ssize_t& indentation);
cxMacroExpander_API bool SubStringsAreTokenEquivalent(SubString s1, SubString s2);
} // namespace ceda
#endif // include guard