///////////////////////////////////////////////////////////////////////////// // Name: No names yet. // Purpose: Contrib. demo // Author: Aleksandras Gluchovas // Modified by: // Created: 22/09/98 // RCS-ID: $Id: cjparser.cpp 35650 2005-09-23 12:56:45Z MR $ // Copyright: (c) Aleskandars Gluchovas // Licence: wxWindows licence ///////////////////////////////////////////////////////////////////////////// // For compilers that support precompilation, includes "wx/wx.h". #include "wx/wxprec.h" #ifdef __BORLANDC__ #pragma hdrstop #endif #ifndef WX_PRECOMP #include "wx/wx.h" #endif #include "cjparser.h" #if defined( wxUSE_TEMPLATE_STL ) #include #else #include "wxstlac.h" #endif /***** Implementation for class SJParser *****/ // statics used by inline'ed C helper-functions static char* _gSrcStart = 0; static char* _gSrcEnd = 0; static wxChar* _gLastSuppresedComment = 0; static int _gLineNo = 0; // FOR NOW:: comments queue is static #define MAX_CQ_ENTRIES 128 static char* _gCommentsQueue[MAX_CQ_ENTRIES]; static int _gCQSize = 0; /***** keyword map related structures *****/ struct less_c_str { inline bool operator()( char* x, char* y) const { return ( strcmp( x,y ) < 0 ); } }; //WXSTL_MAP(CharPtrT,CharPtrT, LESS_THEN_FUNCTOR(CharPtrT)); #if defined( wxUSE_TEMPLATE_STL ) typedef map< char*, char*, less_c_str > KeywordMapT; #else typedef char* CharPtrT; typedef WXSTL_MAP( CharPtrT, CharPtrT ,less_c_str) KeywordMapT; #endif static KeywordMapT __gMultiLangMap; static int __gMapReady = 0; static char* __gKeyWords[] = { "public", "protected", "private", "class", "struct", "union", "enum", "interface", "package", "import", "typedef", "template", "friend", "const", "volatile", "mutable", "virtual", "inline", "static", "register", "final", "abstract", "native", "__stdcall", "extern", 0 }; static void check_keyword_map() { if ( !__gMapReady ) { __gMapReady = 1; // "make sure" the address of the first member of non-polimorphic class // coinsides with the address of the instance char** keyword = __gKeyWords; while ( (*keyword) != 0 ) { __gMultiLangMap.insert( KeywordMapT::value_type( *keyword, *keyword ) ); ++keyword; } } } /***** helper functions *****/ static inline void skip_to_eol( char*& cur ) { while( *(cur) != 10 && *cur != 13 && cur < _gSrcEnd) ++cur; } static inline void skip_eol( char*& cur ) { if ( *cur == 13 ) cur += 2; else cur += 1; ++_gLineNo; } static inline bool skip_to_next_comment_in_the_line( char*& cur ) { do { while( cur < _gSrcEnd && *cur != 10 && *cur != 13 && *cur != '/' ) ++cur; if ( cur == _gSrcEnd ) return false; if ( *cur == '/' ) { if ( (*(cur+1) == '*') || (*(cur+1) == '/') ) return true; else { ++cur; continue; } } return false; } while(1); } inline static void store_line_no( int& toVar ) { toVar = _gLineNo; } inline static void restore_line_no( int storedLineNo ) { _gLineNo = storedLineNo; } inline static int get_line_no() { return _gLineNo; } static void skip_to_prev_line( char*& cur ) { while( cur >= _gSrcStart && *cur != 10 && *cur != 13 ) --cur; // NOTE:: '\n' is 13,10 for DOS // '\n' is 10 for UNIX // NOTE1: '\n' symbol is not used here, // to provide possibility of loading // file as binary --cur; if ( *cur == 10 ) { ++cur; return; } if ( *cur == 13 ) --cur; while( cur >= _gSrcStart && *cur != 10 && *cur != 13 ) --cur; ++cur; // move to the first character in the line } static inline void skip_comments( char*& cur ) { ++cur; // skip '/' token if ( *cur != '/' && *cur != '*' ) return; // first, store position of the comment into the queue // (which further will be attached to the next context // found) if ( cur-1 != _gLastSuppresedComment ) { if ( _gCQSize == MAX_CQ_ENTRIES ) { size_t i = MAX_CQ_ENTRIES-1; while( i != 0 ) { _gCommentsQueue[i-1] = _gCommentsQueue[i]; --i; } --_gCQSize ; } _gCommentsQueue[_gCQSize++] = cur-1; } // if signle-line comment, skip it now if ( *cur == '/' ) { skip_to_eol( cur ); skip_eol( cur ); return; } size_t level = 1; // check for multiline comment (handle nested multiline comments!) int line_len = 0; ++cur; ++cur; do { // TBD:: check eof cond. // detect and remove vertical columns of '*''s while ( *cur != '/' && cur < _gSrcEnd ) { switch (*cur) { case '*' : { if ( *(cur+1) != '/' ) { if ( line_len == 1 ) *cur = ' '; } break; } case 13 : line_len = 0; break; case 10 : { line_len = 0; ++_gLineNo; } break; default : ++line_len; } ++cur; } if ( cur >= _gSrcEnd ) return; ++cur; if ( *(cur-2) == '*' ) { --level; if ( level == 0 ) break; } else if ( *cur == '*' ) { ++cur; ++cur; ++level; } } while(1); } static inline void clear_commets_queue() { _gCQSize = 0; } static inline void skip_quoted_string( char*& cur ) { ++cur; // skip first quote '"' // check if quote wasn't prefixed if ( *(cur-2) == '\\' ) return; do { while ( *cur != '"' && cur < _gSrcEnd ) { if ( *cur == 10 ) ++_gLineNo; ++cur; } if ( cur >= _gSrcEnd ) return; ++cur; // skip the last quote // check if it wasn't prefixed if ( *(cur-2) != '\\' ) break; } while (1); } // skips subsequent white space and comments // (return false if the end of source code reached) static inline bool get_next_token( char*& cur ) { for( ; cur < _gSrcEnd; ++cur ) { switch( *(cur) ) { case ' ' : continue; case '\t': continue; case 13 : continue; case 10 : { ++_gLineNo;continue; } case '/' : skip_comments( cur ); --cur; continue; default : break; }; break; } if ( cur >= _gSrcEnd ) return false; else return true; } static inline void skip_preprocessor_dir( wxChar*& cur ) { do { skip_to_eol(cur); if ( *(cur-1) != _T('\\') ) break; if ( cur < _gSrcEnd ) skip_eol( cur ); else break; } while(1); } static void skip_token( char*& cur ) { if ( *cur == '"' ) { skip_quoted_string( cur ); return; } if ( *cur == ',' || *cur == ';' || *cur == ')' || *cur == '(' ) { ++cur; return; } // special case of "!=", "<=", ... 2 character composite tokens if ( *cur == '<' || *cur == '>' || *cur == '=' || *cur == '!' ) { cur++; if ( *cur == '=' ) cur++; return; } ++cur; // leading character is always skipped for( ; cur < _gSrcEnd ; ++cur ) { switch ( *cur ) { case ' ' : break; case '\t': break; case 13 : break; case 10 : break; case ',' : break; case ';' : break; case '<' : break; case '>' : break; // FIXME:: QUICK-HACK:: to treat scope resolution // tokens are a part of the string - e.g. SomeSpace::SubName would // become one token case ':' : if ( *(cur+1) == ':' ) { ++cur; continue; } break; case '=' : break; case '(' : break; case ')' : break; case '{' : break; case '}' : break; default : continue; }; break; } } static inline size_t get_token_len( char* tok ) { char* start = tok; skip_token( tok ); return size_t( tok - start ); } // returns true, if given tokens are equel static inline bool cmp_tokens( char* tok1, char* tok2 ) { // NOTE:: the case one token includes // other in it's entirely is not handled size_t len = get_token_len( tok1 ); // assuming that tokens are non-zero length do { if ( *(tok1++) != *(tok2++) ) return false; --len; } while ( --len ); return true; } static inline bool cmp_tokens_fast( char* tok1, char* tok2, size_t len ) { do { if ( *(tok1++) != *(tok2++) ) return false; } while ( --len ); return true; } static inline void skip_tempalate_statement( char*& cur ) { size_t level = 0; // go one level deeper while( *cur != '<' && cur < _gSrcEnd ) { if (*cur == 10 ) ++_gLineNo; ++cur; } // FIXME:: template should be checked statement for // comments inside of it do { if ( *cur == '<' ) ++level; else --level; ++cur; // skip '<' or '>' token if ( level == 0 ) return; while( *cur != '<' && *cur != '>' && cur < _gSrcEnd ) { if (*cur == 10 ) ++_gLineNo; ++cur; } } while (1); } static inline void skip_statement( char*& cur ) { for( ; cur < _gSrcEnd; ++cur ) switch (*cur) { case ';' : ++cur; // skip statement-terminator token return; case '"' : skip_quoted_string(cur); --cur; continue; case 10 : ++_gLineNo; continue; case '/' : skip_comments( cur ); --cur; continue; default : continue; } } // "reversed" versions of skip_token() and get_next_token() static inline void skip_token_back( char*& cur ) { // FIXME:: now, when moving backwards, neither strings nor // comment blocks are checked --cur; // skip to the trailing character if ( *cur == ',' || *cur == ')' || *cur == '(' ) return; for( ; cur < _gSrcEnd ; --cur ) { switch ( *cur ) { case ' ' : break; case '\t': break; case 13 : break; case 10 : break; case ',' : break; case '(' : break; default : continue; }; break; } ++cur; // get to the leading character of the token } static inline void skip_next_token_back( char*& cur ) { --cur; // skip leading character of the current token if ( *cur == ',' || *cur == ')' || *cur == '(' ) { ++cur; return; } for( ; cur < _gSrcEnd; --cur ) { switch ( *cur ) { case ' ' : continue; case '\t': continue; case 13 : continue; case 10 : continue; case ',' : continue; case '(' : continue; default : break; }; break; } ++cur; // position after the trailing charcter of the prev token } static wxString get_token_str( char* cur ) { return wxString( cur, get_token_len( cur ) ); } // skips token or whole expression which may have // nested expressions between '(' ')' brackets. // // Upon return, the cursor points to the terminating bracket ')', // // Return value is the size of the block static size_t skip_block( char*& cur ) { size_t level = 0; // nesting level char* start = cur; // NOTE:: assumed that block not necessarely starts // with bracket rightaway if ( *cur == '(' ) { ++level; } do { skip_token( cur ); char* savedPos = cur; int tmpLnNo; store_line_no( tmpLnNo ); get_next_token( cur ); if ( cur >= _gSrcEnd ) return 0; if ( *cur == '(' ) { ++level; } else if ( *cur == ')' ) { if ( level == 0 ) { cur = savedPos; restore_line_no( tmpLnNo ); return size_t(cur-start); } --level; if ( level == 0 ) { ++cur; // QUICK-HACK::to easily handle function prototypes , // it works, besause theoretically there should // be no cast-expressions in non-implementation // scope (e.g. "time( (long*)(ptr+1) )" should not // appear in the declarations, thus it is most likelly // for the ")(" fragment to be within a function // prototype in the declarations scope if ( *cur == '(' ) { ++level; continue; } else return size_t(cur-start); } } else { if ( level == 0 ) { cur = savedPos; restore_line_no( tmpLnNo ); return size_t(cur-start); } } } while(1); } // returns 0, if end of source reached static inline bool skip_imp_block( char*& cur ) { while( *cur != '{' && cur < _gSrcEnd ) { skip_token( cur ); if ( !get_next_token( cur ) ) return false; } while( *cur != '}' && cur < _gSrcEnd ) { skip_token( cur ); if ( !get_next_token( cur ) ) return false; } ++cur; return true; } static bool is_class_token( char*& cur ) { // FIXME:: the below mess should be cleaned in it's entirely if ( *cur == 'i' ) if ( *(cur+1) == 'n' ) return cmp_tokens_fast( cur, "interface", 9 ); if ( *cur == 'c' ) if ( *(cur+1) == 'l' ) return cmp_tokens_fast( cur, "class", 5 ); if ( *cur == 's' ) if ( *(cur+1) == 't' ) return cmp_tokens_fast( cur, "struct", 6 ); if ( *cur == 'u' ) if ( *(cur+1) == 'n' ) return cmp_tokens_fast( cur, "union", 5 ); return false; } inline static bool is_forward_decl( char* cur ) { do { switch( *cur ) { case ':' : return false; case '{' : return false; case '(' : return false; case ';' : return true; default : break; }; ++cur; } while (cur < _gSrcEnd); // prevent running out of bounds return false; } inline static bool is_function( char* cur, bool& isAMacro ) { isAMacro = false; int tmpLnNo; store_line_no( tmpLnNo ); // NOTE:: comments and quoted strings are not checked here // first,check for "single-line hanginging macros" like: // ___UNICODE // char* eol = cur; skip_to_eol( eol ); skip_token( cur ); get_next_token( cur ); if ( cur > eol ) { isAMacro = true; restore_line_no( tmpLnNo ); return true; } // it's not a macro, go to the begining of arg. list do { // if bracket found, it's a function or a begining // of some macro if ( *cur == '(' ) { restore_line_no( tmpLnNo ); return true; } // end of statement found without any brackets in it // - it cannot be a function if ( *cur == ';' ) { restore_line_no( tmpLnNo ); return false; } ++cur; } while( cur < _gSrcEnd); isAMacro = 1; restore_line_no( tmpLnNo ); return false; } // upon return the cursor is positioned after the // terminating curly brace static inline void skip_scope_block( char*& cur ) { size_t level = 0; for( ; cur < _gSrcEnd ; ++cur ) switch( *cur ) { case '/' : skip_comments( cur ); --cur; continue; case '"' : skip_quoted_string( cur ); --cur; continue; case '{' : ++level; continue; case '}' :--level; if ( level == 0 ) { ++cur; // skip final closing curly brace return; } case 10 : ++_gLineNo; continue; default : continue; }; } // moves tokens like '*' '**', '***', '&' from the name // to the type static void arrange_indirection_tokens_between( wxString& type, wxString& identifier ) { // TBD:: FIXME:: return value of operators ! while ( identifier[0u] == _T('*') || identifier[0u] == _T('&') ) { type += identifier[0u]; identifier.erase(0,1); if ( !identifier.length() ) return; } } // the only function where multi-lang keyword map is accessed static bool is_keyword( char* cur ) { size_t len = get_token_len( cur ); // put a terminating zero after the given token char tmp = *(cur + len); *(cur+len) = '\0'; KeywordMapT::iterator i; i = __gMultiLangMap.find( cur ); // restore original character suppresed by terminating zero *(cur + len) = tmp; return i == __gMultiLangMap.end() ? false : true; } static inline void get_string_between( wxChar* start, wxChar* end, wxString* pStr ) { char saved = *end; *end = _T('\0'); *pStr = start; *end = saved; } static wxChar* set_comment_text( wxString& text, wxChar* start ) { wxChar* end = start; // to avoid poluting the queue with this comment _gLastSuppresedComment = start; skip_comments( end ); if ( *(end-1) == _T('/') ) end -= 2; start += 2; // skip multiple leading '/''s or '*''s while( *start == _T('/') && start < end ) ++start; while( *start == _T('*') && start < end ) ++start; get_string_between( start, end, &text ); return end; } /***** Implementation for class CJSourceParser *****/ CJSourceParser::CJSourceParser( bool collectCommnets, bool collectMacros ) : mpStart(0), mpEnd(0), mpCurCtx( 0 ), mCommentsOn( collectCommnets ), mMacrosOn ( collectMacros ) { check_keyword_map(); } spFile* CJSourceParser::Parse( char* start, char* end ) { // set up state variables mCurVis = SP_VIS_PRIVATE; spFile* pTopCtx = new spFile(); mpCurCtx = pTopCtx; mIsVirtual = 0; mIsTemplate = 0; mNestingLevel = 0; m_cur = start; mpStart = start; mpEnd = end; _gSrcEnd = mpEnd; // let all the C-functions "smell" the end of file _gSrcStart = start; _gLineNo = 0; clear_commets_queue(); // main parsing loop do { if ( !get_next_token( m_cur ) ) // end of source reached return pTopCtx; if ( memcmp( m_cur, "ScriptSection( const string&", strlen( "ScriptSection( const string&" ) ) == 0 ) { // int o = 0; // ++o; } switch (*m_cur) { case '#' : { AddMacroNode( m_cur ); continue; } case ':' : { skip_token( m_cur ); continue; } case ';' : { skip_token( m_cur ); continue; } case ')' : { skip_token( m_cur ); continue; } case '=' : { skip_token( m_cur ); continue; } default: break; } // 'const' is a part of the return type, not a keyword here if ( strncmp(m_cur, "const", 5) != 0 && is_keyword( m_cur ) ) { // parses, token, if token identifies // the container context (e.g. class/namespace) // the corresponding context object is created // and set as current context ParseKeyword( m_cur ); continue; } if ( *m_cur >= _T('0') && *m_cur <= _T('9') ) { skip_token( m_cur ); continue; } if ( *m_cur == _T('}') ) { if ( mCurCtxType != SP_CTX_CLASS ) { // FOR NOW:: disable the below assertion // DBG:: unexpected closing-bracket found //ASSERT(0); skip_token( m_cur ); // just skip it continue; } if ( mpCurCtx->GetType() == SP_CTX_CLASS ) { int curOfs = ( (m_cur+1) - _gSrcStart ); mpCurCtx->mContextLength = ( curOfs - mpCurCtx->mSrcOffset ); } --mNestingLevel; // terminate operation/class/namespace context // TBD:: check if it's really this type of context wxASSERT( mpCurCtx ); mpCurCtx = mpCurCtx->GetOutterContext(); wxASSERT( mpCurCtx ); if ( mNestingLevel == 0 ) { mCurCtxType = SP_CTX_FILE; // not-nested class delclaration finished, // rest template flag in any case mIsTemplate = 0; } skip_token( m_cur ); continue; } bool isAMacro = false; if ( is_function( m_cur, isAMacro ) ) { if ( isAMacro ) { skip_token( m_cur ); continue; } char* savedPos = m_cur; int tmpLnNo; store_line_no( tmpLnNo ); wxUnusedVar( tmpLnNo ); isAMacro = false; if ( !ParseNameAndRetVal( m_cur, isAMacro ) ) { if ( !isAMacro ) { m_cur = savedPos; SkipFunction( m_cur ); } continue; } if ( !ParseArguments( m_cur ) ) { // failure while parsing arguments, // remove enclosing operation context spContext* pFailed = mpCurCtx; mpCurCtx = mpCurCtx->GetOutterContext(); mpCurCtx->RemoveChild( pFailed ); skip_to_eol( m_cur ); //m_cur = savedPos; } else { // otherwise, successfully close operation context: clear_commets_queue(); SkipFunctionBody( m_cur ); mpCurCtx = mpCurCtx->GetOutterContext(); // DBG:: wxASSERT( mpCurCtx ); } } else // otherwise it's declaration of a variable; { // now, the cursor point to the end of statement (';' token) if ( mCurCtxType != SP_CTX_CLASS ) { // non-class members are ignored skip_token( m_cur ); // skip the end of statement continue; } ParseMemberVar( m_cur ); } } while( 1 ); } void CJSourceParser::AttachComments( spContext& ctx, wxChar* cur ) { if ( !mCommentsOn ) return; MCommentListT& lst = ctx.GetCommentList(); wxChar* prevComEnd = 0; int tmpLnNo; store_line_no( tmpLnNo ); // attach comments which were found before the given context for( int i = 0; i != _gCQSize; ++i ) { spComment* pComment = new spComment(); lst.push_back( pComment ); // find the end of comment wxChar* start = _gCommentsQueue[i]; pComment->mIsMultiline = ( *(start+1) == _T('*') ); // first comment in the queue and multiline // comments are always treated as a begining // of the new paragraph in the comment text if ( i == 0 ) { pComment->mStartsPar = true; } else if ( pComment->mIsMultiline ) { pComment->mStartsPar = true; } else { // find out wheather there is a new-line // between to adjecent comments wxChar* prevLine = start; skip_to_prev_line(prevLine); if ( prevLine >= prevComEnd ) pComment->mStartsPar = true; else pComment->mStartsPar = false; } prevComEnd = set_comment_text( pComment->m_Text, start ); } // attach comments which are at the end of the line // of the given context (if any) if ( skip_to_next_comment_in_the_line( cur ) ) { spComment* pComment = new spComment(); lst.push_back( pComment ); set_comment_text( pComment->m_Text, cur ); pComment->mStartsPar = 1; pComment->mIsMultiline = ( *(cur+1) == _T('*') ); // mark this comment, so that it would not // get in the comments list of the next context _gLastSuppresedComment = cur; } restore_line_no( tmpLnNo ); clear_commets_queue(); } void CJSourceParser::AddMacroNode( wxChar*& cur ) { wxChar* start = cur; int lineNo = get_line_no(); skip_preprocessor_dir( cur ); int tmpLnNo; store_line_no( tmpLnNo ); if ( !mMacrosOn ) return; spPreprocessorLine* pPL = new spPreprocessorLine(); pPL->mSrcLineNo = lineNo; AttachComments( *pPL, cur ); get_string_between( start, cur, &pPL->m_Line ); ++start; // skip '#' get_next_token( start ); pPL->mDefType = SP_PREP_DEF_OTHER; // if we found a definition or redefinition, // determine the type exactly and assign // a name to the context if ( *start == _T('d') ) { if ( cmp_tokens_fast( start, _T("define"), 6 ) ) { char* tok = start+6; get_next_token( tok ); pPL->m_Name = get_token_str( tok ); skip_token( tok ); get_next_token( tok); if ( tok > cur ) pPL->mDefType = SP_PREP_DEF_DEFINE_SYMBOL; else pPL->mDefType = SP_PREP_DEF_REDEFINE_SYMBOL; } } else if ( *start == _T('i') ) { if ( cmp_tokens_fast( start, _T("include"), 7 ) ) { pPL->mDefType = SP_PREP_DEF_INCLUDE_FILE; } else if ( *++start == _T('f') ) { // either "#if" or "#ifdef" cur = start; skip_token( cur ); get_next_token( cur ); wxString condition = get_token_str( cur ); // currently, everything except '0' is true if ( condition == _T("0") ) { // skip until the following else or enif while ( cur < _gSrcEnd ) { skip_to_eol( cur ); skip_eol( cur ); get_next_token( cur ); if ( *cur++ == _T('#') && *cur == _T('e') ) break; } } // TODO parse the condition... } } else if ( cmp_tokens_fast( start, _T("else"), 4 ) ) { // skip until "#endif" while ( cur < _gSrcEnd ) { skip_to_eol( cur ); skip_eol( cur ); get_next_token( cur ); if ( *cur++ == _T('#') && cmp_tokens_fast( cur, "endif", 5 ) ) break; } } mpCurCtx->AddMember( pPL ); skip_to_eol( cur ); skip_eol( cur ); restore_line_no( tmpLnNo ); clear_commets_queue(); } void CJSourceParser::ParseKeyword( char*& cur ) { // analyze token, which identifies the begining of a new context if ( CheckVisibilty( cur ) ) { skip_token( cur ); return; } if ( is_class_token( cur ) ) { if ( is_forward_decl( cur ) ) { // forward declarations are ignored; skip_token( cur ); return; } if ( mNestingLevel == 0 ) { // change context form global class context mCurCtxType = SP_CTX_CLASS; } ++mNestingLevel; // add information about new class (name, inheritance, etc) AddClassNode( cur ); // the default visiblity for class members is 'private' mCurVis = SP_VIS_PRIVATE; return; } size_t len = get_token_len( cur ); if ( cmp_tokens_fast( cur, "typedef", len ) ) { skip_token(cur); get_next_token(cur); if ( cmp_tokens_fast( cur, "struct", len ) || cmp_tokens_fast( cur, "union", len ) || cmp_tokens_fast( cur, "class", len ) ) { if ( mNestingLevel == 0 ) { // change context form global class context mCurCtxType = SP_CTX_CLASS; } ++mNestingLevel; // add information about new class (name, inheritance, etc) AddClassNode( cur ); // the default visiblity for class members is 'private' mCurVis = SP_VIS_PRIVATE; return; // FOR NOW:: typedef struct, etc are also ignored //skip_scope_block( cur ); } if ( cmp_tokens_fast( cur, "enum", len ) ) { AddEnumNode( cur ); return; } AddTypeDefNode( cur ); return; } if ( cmp_tokens_fast( cur, "enum", len ) ) { AddEnumNode( cur ); return; } if ( cmp_tokens_fast( cur, "extern", len ) ) { // extern's are ignored (both extern "C" and extern vars) while ( *cur != '{' && *cur != ';' ) { skip_token( cur ); get_next_token( cur ); } return; } if ( cmp_tokens_fast( cur, "enum", len ) ) { // enumeration blocks are ignored skip_scope_block( cur ); get_next_token( cur ); skip_token( cur ); // skip ';' token; return; } if ( cmp_tokens_fast( cur, "package", len ) ) { // packages are ignored skip_statement( cur ); return; }; if ( cmp_tokens_fast( cur, "import", len ) ) { // import statements are ignored skip_statement( cur ); return; } if ( cmp_tokens_fast( cur, "virtual", len ) ) { // probably the virtual method is in front of us; mIsVirtual = 1; skip_token( cur ); return; } if ( cmp_tokens_fast( cur, "template", len ) ) { mIsTemplate = 1; skip_tempalate_statement( cur ); return; } if ( cmp_tokens_fast( cur, "friend", len ) ) { skip_statement( cur ); return; } // ingnore "unsigificant" tokens (i.e. which do not // affect the current parsing context) skip_token( cur ); } bool CJSourceParser::ParseNameAndRetVal( char*& cur, bool& isAMacro ) { isAMacro = false; // FOR NOW:: all functions in the global // scope are ignored int lineNo = get_line_no(); char* start = cur; bool isVirtual = false; while( *cur != '(' ) { if ( get_token_str( cur ) == "virtual" ) isVirtual = true; skip_token( cur ); if ( !get_next_token( cur ) ) return false; } char* bracketPos = cur; char* savedPos = cur + 1; int tmpLnNo; store_line_no( tmpLnNo ); // skip gap between function name and start of paramters list while ( *(cur-1) == ' ' ) --cur; // check if it's not a macro, and let plugin handle it, if so if ( mpPlugin ) { skip_token_back( cur ); char* tmp = cur; if ( mpPlugin->CanUnderstandContext( tmp, _gSrcEnd, mpCurCtx ) ) { cur = tmp; mpPlugin->ParseContext( _gSrcStart, cur, _gSrcEnd, mpCurCtx ); isAMacro = true; return false; } } spOperation* pOp = new spOperation(); pOp->mSrcLineNo = lineNo; pOp->mSrcOffset = int( start - _gSrcStart ); pOp->mHeaderLength = int( bracketPos - start ); if ( mpCurCtx->GetContextType() == SP_CTX_CLASS ) pOp->mScope = mpCurCtx->m_Name; mpCurCtx->AddMember( pOp ); pOp->mVisibility = mCurVis; pOp->mIsVirtual = isVirtual; // add comments about operation AttachComments( *pOp, cur ); // go backwards to method name skip_token_back( cur ); pOp->m_Name = get_token_str( cur ); // checker whether it's not an operator char chFirst = *pOp->m_Name.c_str(); if ( !isalpha(chFirst) && chFirst != '_' && chFirst != '~' ) { // skip 'operator' skip_next_token_back( cur ); skip_token_back( cur ); wxString lastToken = get_token_str( cur ); if ( lastToken == "operator" ) { lastToken += pOp->m_Name; pOp->m_Name = lastToken; } else { // ok, it wasn't an operator after all skip_token( cur ); } } else if ( pOp->m_Name == "operator" ) { skip_token( cur ); get_next_token( cur ); wxString oper = get_token_str( cur ); pOp->m_Name += oper; } // go backwards to method return type skip_next_token_back( cur ); if ( cur >= start ) { wxString rettype = wxString( start, size_t( cur-start ) ); // FIXME just for now... wxString::size_type pos = 0; wxString toerase("WXDLLEXPORT "); while((pos = rettype.find(toerase, pos)) != wxString::npos) rettype.erase(pos, toerase.length()); pOp->m_RetType = rettype; } arrange_indirection_tokens_between( pOp->m_RetType, pOp->m_Name ); cur = savedPos; restore_line_no( tmpLnNo ); // now, enter operation context mpCurCtx = pOp; return true; } bool CJSourceParser::ParseArguments( char*& cur ) { // DANGER-MACROS:: // now cursor position is right after the first opening bracket // of the function declaration char* blocks [16]; // used exclusivelly for iterative "lean out" // of macros and misc. not-obviouse grammar // (dirty,, but we cannot do it very nice, // we're not preprocessor-free C/C++ code) int blockSizes[16]; do { size_t blocksSkipped = 0; get_next_token( cur ); bool first_blk = true; while( *cur != ')' && *cur != ',' ) { blocks[blocksSkipped] = cur; if ( first_blk ) { char* prev = cur; skip_token( cur ); blockSizes[blocksSkipped] = size_t(cur-prev); first_blk = 0; } else blockSizes[blocksSkipped] = skip_block( cur ); get_next_token( cur ); ++blocksSkipped; } if ( blocksSkipped == 1 ) { // check if the empty arg. list stressed with "void" inside if ( cmp_tokens_fast( blocks[0] , "void", 4 ) ) { cur++; // skip ')' break; } // FIXME:: TBD:: K&R-style function declarations! // if only one block enclosed, than it's probably // some macro, there should be at least two blocks, // one for argument type and another for it's identifier return false; } if ( blocksSkipped == 0 ) { if ( *cur == 10 ) ++_gLineNo; ++cur; // skip ')' break; // function without paramters } // we should be in the operation context now spOperation* pOp = (spOperation*)mpCurCtx; spParameter* pPar = new spParameter(); pOp->AddMember( pPar ); // FOR NOW:: line number is not exact if argument list is mutiline pPar->mSrcLineNo = get_line_no(); size_t nameBlock = blocksSkipped - 1; size_t typeBlock = nameBlock - 1; // check if default values present if ( *blocks[typeBlock] == '=' ) { // expressions like "int = 5" are ignored, // since name for paramters is required if ( blocksSkipped == 3 ) { if ( *cur == ')' ) { ++cur; break; } else continue; } pPar->m_InitVal = wxString( blocks[nameBlock], blockSizes[nameBlock] ); nameBlock = nameBlock - 2; // skip '=' token and default value block typeBlock = nameBlock - 1; } // attach comments about the parameter AttachComments( *pPar, blocks[nameBlock] ); // retrieve argument name pPar->m_Name = wxString( blocks[nameBlock], blockSizes[nameBlock] ); // retreive argument type size_t len = blockSizes[ typeBlock ]; len = size_t ( (blocks[ typeBlock ] + len) - blocks[ 0 ] ); pPar->m_Type = wxString( blocks[0], len ); arrange_indirection_tokens_between( pPar->m_Type, pPar->m_Name ); if ( *cur == ')' ) { ++cur; break; } ++cur; // skip comma get_next_token(cur); } while(1); // skip possible whitespace between ')' and following "const" while ( isspace(*cur) ) cur++; // check if it was really a function not a macro, // if so, than it should be terminated with semicolon ';' // or opening implemenetaton bracket '{' char* tok = cur; int tmpLnNo; store_line_no( tmpLnNo ); bool result = true; do { if ( *tok == '{' || *tok == ';' ) { restore_line_no(tmpLnNo); break; } // check for unexpected tokens if ( *tok == '=' || *tok == '0' ) { skip_token(tok); if ( !get_next_token(tok) ) return false; continue; } if ( *tok == '}' ) return false; // if initialization list found if ( *tok == ':' ) { restore_line_no(tmpLnNo); break; } if ( cmp_tokens_fast( tok, "const", 5 ) ) { ((spOperation*)mpCurCtx)->mIsConstant = true; skip_token(tok); if ( !get_next_token(tok) ) return false; continue; } if ( CheckVisibilty( tok ) ) return false; // if next context found if ( is_keyword( tok ) ) return false; skip_token(tok); if ( !get_next_token(tok) ) return false; } while(1); return result; } void CJSourceParser::ParseMemberVar( char*& cur ) { MMemberListT& members = mpCurCtx->GetMembers(); bool firstMember = true; wxString type; // jump to the end of statement // and start collecting same-type varibles // back-to-front towards the type identifier skip_statement( cur ); char* savedPos = cur; int tmpLnNo; store_line_no( tmpLnNo ); --cur; // rewind back to ';' do { spAttribute* pAttr = new spAttribute(); // FOR NOW:: line not is not exact, if member declaration is multiline pAttr->mSrcLineNo = get_line_no(); mpCurCtx->AddMember( pAttr ); pAttr->mVisibility = mCurVis; pAttr->mIsConstant = 0; if ( firstMember ) { firstMember = 0; } skip_token_back( cur ); // attach comments about the attribute AttachComments( *pAttr, cur ); pAttr->m_Name = get_token_str( cur ); // guessing that this going to be variable type skip_next_token_back( cur ); skip_token_back( cur ); pAttr->m_Type = get_token_str( cur ); // if comma, than variable list continues // otherwise the variable type reached - stop if ( *cur == _T('=') ) { // yes, we've mistaken, it was not a identifier, // but it's default value pAttr->m_InitVal = pAttr->m_Name; // skip default value and '=' symbol skip_next_token_back( cur ); skip_token_back( cur ); pAttr->m_Name = get_token_str( cur ); skip_next_token_back( cur ); skip_token_back( cur ); } if ( *cur != ',' ) { type = get_token_str( cur ); break; } } while(1); size_t first = 0; // set up types for all collected (same-type) attributes; while ( first != members.size() - 1 ) { spAttribute* pAttr = members[first++]->CastToAttribute(); if ( !pAttr ) continue; if ( pAttr->m_Type.empty() ) pAttr->m_Type = type; pAttr->mVisibility = mCurVis; if ( !pAttr->m_Name.empty() ) arrange_indirection_tokens_between( pAttr->m_Type, pAttr->m_Name ); } cur = savedPos; restore_line_no( tmpLnNo ); clear_commets_queue(); } void CJSourceParser::SkipFunction( char*& cur ) { while ( *cur != '(' && cur < _gSrcEnd ) { if (*cur == 10 ) ++_gLineNo; ++cur; } skip_next_token_back( cur ); // go back and skip function identifier skip_token_back( cur ); // go back and skip return type skip_block( cur ); // now, go ahead and skip whole declaration SkipFunctionBody( cur ); } void CJSourceParser::SkipFunctionBody( char*& cur ) { // FIXME:: check for comments and quoted stirngs here bool hasDefinition = false; while( *cur != '{' && *cur != ';' ) { if (*cur == 10 ) ++_gLineNo; ++cur; } if ( *cur == ';' ) { ++cur; } else { hasDefinition = true; skip_scope_block( cur ); // skip the whole imp. } if ( mpCurCtx->GetType() == SP_CTX_OPERATION ) { spOperation& op = *((spOperation*)mpCurCtx); int curOfs = int ( cur - _gSrcStart ); op.mContextLength = curOfs - mpCurCtx->mSrcOffset; op.mHasDefinition = hasDefinition; // separate scope resolution token from the name of operation for( size_t i = 0; i != op.m_Name.length(); ++i ) { if ( op.m_Name[i] == ':' && op.m_Name[i+1] == ':' ) { wxString unscoped( op.m_Name, i+2, op.m_Name.length() - ( i + 2 ) ); op.mScope = wxString( op.m_Name, 0, i ); op.m_Name = unscoped; break; } } } } bool CJSourceParser::CheckVisibilty( char*& cur ) { size_t len = get_token_len( cur ); if ( cmp_tokens_fast( cur, "public:", len ) ) { mCurVis = SP_VIS_PUBLIC; return true; } if ( cmp_tokens_fast( cur, "protected:", len ) ) { mCurVis = SP_VIS_PROTECTED; return true; } if ( cmp_tokens_fast( cur, "private:", len ) ) { mCurVis = SP_VIS_PRIVATE; return true; } return false; } void CJSourceParser::AddClassNode( char*& cur ) { char* ctxStart = cur; wxString classkeyword = get_token_str( cur ); skip_token( cur ); // skip 'class' keyword if ( !get_next_token( cur ) ) return; // in C++ if ( *cur == ':' ) { skip_token( cur ); get_next_token( cur ); } // by default all class members are private mCurVis = SP_VIS_PRIVATE; spClass* pClass = new spClass(); if ( classkeyword == "class" ) pClass->mClassSubType = SP_CLTYPE_CLASS; else if ( classkeyword == "struct" ) { pClass->mClassSubType = SP_CLTYPE_STRUCTURE; mCurVis = SP_VIS_PUBLIC; } else if ( classkeyword == "union" ) { pClass->mClassSubType = SP_CLTYPE_UNION; mCurVis = SP_VIS_PUBLIC; } else if ( classkeyword == "interface" ) pClass->mClassSubType = SP_CLTYPE_INTERFACE; else { pClass->mClassSubType = SP_CLTYPE_INVALID; wxFAIL_MSG("unknown class keyword"); } mpCurCtx->AddMember( pClass ); // attach comments about the class AttachComments( *pClass, cur ); pClass->mSrcLineNo = get_line_no(); pClass->mSrcOffset = int( ctxStart - _gSrcStart ); char* nameTok = cur; pClass->m_Name = get_token_str( cur ); bool isDerived = 0; // DANGER-MACROS:: do { skip_token( cur ); if ( !get_next_token( cur ) ) return; if ( *cur == ':' ) { isDerived = 1; char* tok = cur; int tmpLn; store_line_no( tmpLn ); skip_next_token_back( tok ); skip_token_back( tok ); restore_line_no( tmpLn ); // class name should precend ':' colon, thus // the one which was captured before was // proablty something else (like __dllexport MyClass : ... ) if ( nameTok != tok ) { pClass->m_Name = get_token_str( tok ); } } if ( *cur == '{' ) break; if ( *cur == ',' ) continue; size_t len = get_token_len( cur ); // skip neglectable C++ modifieres if ( cmp_tokens_fast( cur, "public", len ) ) continue; if ( cmp_tokens_fast( cur, "protected", len ) ) continue; if ( cmp_tokens_fast( cur, "private", len ) ) continue; if ( cmp_tokens_fast( cur, "virtual", len ) ) continue; // skip neglectable JAVA modifieres if ( cmp_tokens_fast( cur, "extends", len ) ) { isDerived = 1; continue; } if ( cmp_tokens_fast( cur, "implements", len ) ) { isDerived = 1; continue; } // all we need to know is superclass or interface char* tok = cur; int tmpLn; store_line_no( tmpLn ); skip_token(tok); get_next_token(tok); restore_line_no( tmpLn ); if ( *tok != ':' && *cur != ':' ) pClass->m_SuperClassNames.push_back( wxString( cur, len ) ); } while(1); if ( !isDerived ) { int tmpLn; store_line_no( tmpLn ); while ( pClass->m_SuperClassNames.size() ) pClass->m_SuperClassNames.erase( &pClass->m_SuperClassNames[0] ); char* tok = cur; // some non-obviouse token was following "class" keyword - // we've confused it with class name - thus now we're reverting this mistake skip_next_token_back( tok ); skip_token_back( tok ); pClass->m_Name = get_token_str( tok ); restore_line_no( tmpLn ); } ++cur; // skip opening curly brace pClass->mHeaderLength = ( cur - ctxStart ); // now, enter the class context mpCurCtx = pClass; clear_commets_queue(); } void CJSourceParser::AddEnumNode( wxChar*& cur ) { // now the cursor is at "enum" keyword wxChar* start = cur; spEnumeration* pEnum = new spEnumeration(); mpCurCtx->AddMember( pEnum ); pEnum->mSrcLineNo = get_line_no(); AttachComments( *pEnum, cur ); skip_token( cur ); if ( !get_next_token( cur ) ) return; // check if enumeration has got it's identifier if ( *cur != '{' ) { pEnum->m_Name = get_token_str( cur ); } if ( !skip_imp_block( cur ) ) return; get_string_between( start, cur, &pEnum->m_EnumContent ); if ( get_next_token(cur) ) { // check if the identifier if after the {...} block if ( *cur != ';' ) pEnum->m_Name = get_token_str( cur ); } clear_commets_queue(); } void CJSourceParser::AddTypeDefNode( wxChar*& cur ) { // now the cursor at the token next to "typedef" keyword if ( !get_next_token(cur) ) return; wxChar* start = cur; spTypeDef* pTDef = new spTypeDef(); mpCurCtx->AddMember( pTDef ); pTDef->mSrcLineNo = get_line_no(); AttachComments( *pTDef, cur ); skip_statement( cur ); int tmpLnNo; store_line_no( tmpLnNo ); wxChar* tok = cur-1; skip_next_token_back( tok ); wxChar* nameEnd = tok; skip_token_back( tok ); wxChar* nameStart = tok; skip_next_token_back( tok ); wxChar* typeEnd = tok; // check if it's function prototype if ( *nameStart == ')' ) { typeEnd = nameStart+1; // skip argument list while ( *nameStart != '(' ) --nameStart; // skip to function type definition while ( *nameStart != ')' ) --nameStart; skip_next_token_back( nameStart ); nameEnd = nameStart; skip_token_back( nameStart ); if ( *nameStart == '*' ) ++nameStart; } get_string_between( start, typeEnd, &pTDef->m_OriginalType ); get_string_between( nameStart, nameEnd, &pTDef->m_Name ); clear_commets_queue(); restore_line_no( tmpLnNo ); }