LSToken.h

// LSToken.h
//
// Author David Barrett-Lennard
// (C)opyright Cedanet Pty Ltd 2006

#pragma once
#ifndef Ceda_cxMacroExpander_LSToken_H
#define Ceda_cxMacroExpander_LSToken_H

#include "cxMacroExpander.h"
#include "Ceda/cxUtils/xstring.h"
#include "Ceda/cxUtils/xostream.h"
#include "Ceda/cxUtils/IException.h"
#include "Ceda/cxUtils/SubString.h"

namespace ceda
{
cxMacroExpander_API void UnstringifyQuotedString(SubString s, xostream& os);

///////////////////////////////////////////////////////////////////////////////////////////////////
// EToken

enum EToken
{
    TOKEN_UNDEF = -1,       // Internal use only!
    
    TOKEN_END = 10000,      // Indicates end of file
    
    TOKEN_COMMENT,
    TOKEN_PREPROC_COMMENT,

    TOKEN_INTEGER,          // 100  23
    TOKEN_HEX_INTEGER,      // 0xFE01
    TOKEN_FLOAT,            // 100.03    4.0E6   10.2
    TOKEN_SINGLE_QUOTED_STRING,    // 'This is a string\n'
    TOKEN_DOUBLE_QUOTED_STRING,    // "This is a string\n"

    TOKEN_IDENTIFIER,       // x1

    TOKEN_LESS,             // <
    TOKEN_GREATER,          // >
    TOKEN_AMPERSAND,        // &
    TOKEN_VLINE,            // |
    TOKEN_CARET,            // ^

    TOKEN_LESS_EQUAL,       // <=
    TOKEN_GREATER_EQUAL,    // >=
    TOKEN_AMPERSAND_EQUAL,  // &=
    TOKEN_VLINE_EQUAL,      // |=
    TOKEN_CARET_EQUAL,      // ^=

    TOKEN_DBL_LESS,         // <<
    TOKEN_DBL_GREATER,      // >>
    TOKEN_DBL_AMPERSAND,    // &&
    TOKEN_DBL_VLINE,        // ||
    TOKEN_DBL_CARET,        // ^^

    TOKEN_DBL_LESS_EQUAL,       // <<=
    TOKEN_DBL_GREATER_EQUAL,    // >>=
    TOKEN_DBL_AMPERSAND_EQUAL,  // &&=
    TOKEN_DBL_VLINE_EQUAL,      // ||=
    TOKEN_DBL_CARET_EQUAL,      // ^^=

    TOKEN_PLUS,             // +
    TOKEN_MINUS,            // -
    TOKEN_ASTERISK,         // *
    TOKEN_FORWARD_SLASH,    // /
    TOKEN_PERCENT,          // %
    TOKEN_EXCLAM,           // !
    TOKEN_EQUAL,            // =
    TOKEN_QUESTION,         // ?

    TOKEN_DBL_PLUS,         // ++
    TOKEN_DBL_MINUS,        // --

    TOKEN_PLUS_EQUAL,       // +=
    TOKEN_MINUS_EQUAL,      // -=
    TOKEN_ASTERISK_EQUAL,   // *=
    TOKEN_SLASH_EQUAL,      // /=
    TOKEN_PERCENT_EQUAL,    // %=
    TOKEN_EXCLAM_EQUAL,     // !=
    TOKEN_DBL_EQUAL,        // ==
    TOKEN_QUESTION_EQUAL,   // ?=

    TOKEN_MINUS_GREATER,    // ->

    TOKEN_COLON,            // :
    TOKEN_COLON_COLON,      // ::

    TOKEN_DOLLAR,           // $
    TOKEN_DOLLAR_DOLLAR,    // $$

    TOKEN_DOT,              // .
    TOKEN_COMMA,            // ,
    TOKEN_SEMICOLON,        // ;
    TOKEN_LEFT_PAREN,       // (
    TOKEN_RIGHT_PAREN,      // )
    TOKEN_LEFT_CURLY,       // {
    TOKEN_RIGHT_CURLY,      // }
    TOKEN_LEFT_SQUARE,      // [
    TOKEN_RIGHT_SQUARE,     // ]
    TOKEN_TILDE,            // ~
    TOKEN_AT,               // @
    TOKEN_BACK_SLASH,       // '\'
    TOKEN_BACK_QUOTE,       // '`'

    TOKEN_HASH,             // #
};

///////////////////////////////////////////////////////////////////////////////////////////////////
// LSToken

class cxMacroExpander_API LSToken
{
public:
    LSToken(EToken type = TOKEN_UNDEF) : m_type(type) {}
    LSToken(EToken type, SubString s) : m_type(type), m_str(s) {}
    LSToken(EToken type, const xchar* p1, const xchar* p2) : m_type(type), m_str(p1,p2) {}

    bool operator==(const LSToken& rhs) const { return m_type == rhs.m_type && m_str == rhs.m_str; }
    bool operator!=(const LSToken& rhs) const { return m_type != rhs.m_type || m_str != rhs.m_str; }

    EToken GetType() const { return m_type; }
    bool operator==(EToken type) const { return m_type == type; }
    bool operator!=(EToken type) const { return m_type != type; }

    bool operator==(ConstStringZ s) const { return m_str == s; }
    bool operator!=(ConstStringZ s) const { return m_str != s; }

    xstring GetTokenString() const { return m_str.GetString(); }

    void GetLiteralString(xstring& str) const;
    xchar GetLiteralCharacter() const;

    int32 GetLiteralInt32() const;
    int64 GetLiteralInt64() const;
    int32 GetLiteralHexInt32() const;
    int64 GetLiteralHexInt64() const;
    double GetLiteralDouble() const;

    explicit operator bool() const { return m_type != TOKEN_END; }

public:
    EToken m_type;
    SubString m_str;
};

cxMacroExpander_API xostream& operator<<(xostream& os, const LSToken& x);

///////////////////////////////////////////////////////////////////////////////////////////////////
// LexScannerException

struct cxMacroExpander_API LexScannerException : public IException
{
    LexScannerException(const xstring& description, const xchar* pos) : 
        m_description(description),
        m_lineNumber(-1),
        m_pos(pos)
    {
        //cxAssert(m_pos);
    }

    LexScannerException(const xstring& description, SubString s) : 
        m_description(description),
        m_lineNumber(-1),
        m_pos(s.ptr())
    {
        //cxAssert(m_pos);
    }

    LexScannerException(const xstring& description, LSToken t) : 
        m_description(description),
        m_lineNumber(-1),
        m_pos(t.m_str.ptr())
    {
        //cxAssert(m_pos);
    }

    virtual ConstStringZ what() const 
    { 
        return m_description.c_str(); 
    }

    void Write(xostream& os) const;

    xstring m_description;
    xstring m_path;
    ssize_t m_lineNumber;
    const xchar* m_pos;
};

///////////////////////////////////////////////////////////////////////////////////////////////////
// Scan functions on SubString

// Helper functions for scanning a token
cxMacroExpander_API EToken LsScanNumber(SubString& s);
cxMacroExpander_API bool LsScanEscapeCharacter(SubString& s);
cxMacroExpander_API bool LsScanLiteralCharacter(SubString& s);
cxMacroExpander_API bool LsScanLiteralString(SubString& s, xchar delimiter, ConstStringZ name);

// Scan the next token
cxMacroExpander_API LSToken LsScanToken(SubString& s, bool returnComments = false);

// Scan the given next token or else throw a ScannedBadTokenException exception
cxMacroExpander_API void ReadGivenToken(SubString& s, EToken type, ConstStringZ desc);

// Returns the width of the white space assumimg tabs are worth 4 space characters
cxMacroExpander_API ssize_t ScanBackwardsThroughSpacesAndTabs(SubString& s);


cxMacroExpander_API LSToken ScanArgument(SubString& s, SubString& arg, EToken rightBracket = TOKEN_RIGHT_PAREN);

// Read a bracketted section  (......).  The section may contain nested bracketted sections.
// Brackets are counted in order to determine the end of the section
//
// The returned section is depicted as follows
//
//         ( ................... )
//           ^                   ^
//           p1                  p2
// 
cxMacroExpander_API SubString ScanBrackettedSection(SubString& s, bool readInitialBracket, EToken leftBracket, EToken rightBracket);

inline SubString ScanRndBrackettedSection(SubString& s, bool readInitialBracket)
{
    return ScanBrackettedSection(s,readInitialBracket,TOKEN_LEFT_PAREN,TOKEN_RIGHT_PAREN);
}

inline SubString ScanSquareBrackettedSection(SubString& s, bool readInitialBracket)
{
    return ScanBrackettedSection(s,readInitialBracket,TOKEN_LEFT_SQUARE,TOKEN_RIGHT_SQUARE);
}

inline SubString ScanCurlyBrackettedSection(SubString& s, bool readInitialBracket)
{
    return ScanBrackettedSection(s,readInitialBracket,TOKEN_LEFT_CURLY,TOKEN_RIGHT_CURLY);
}

// Scan a bracketted string (typically in curly or round brackets).  The returned substring doesn't 
// include the outer brackets.  The indentation of the string is returned.  This is -1 if
// the string was not a line based 'block' of text.
cxMacroExpander_API SubString ScanStringBlock(SubString& s, ssize_t& indentation,
    EToken leftBracket, EToken rightBracket, ConstStringZ leftBracketName, ConstStringZ rightBracketName,
    bool requireBrackets);

cxMacroExpander_API SubString ScanSubstString(SubString& s, ssize_t& indentation);
cxMacroExpander_API SubString ScanRndBrackettedString(SubString& s, ssize_t& indentation);

cxMacroExpander_API bool SubStringsAreTokenEquivalent(SubString s1, SubString s2);

} // namespace ceda

#endif // include guard