{(*} (*------------------------------------------------------------------------------ Delphi Code formatter source code The Original Code is Tokens.pas, released June 2003. The Initial Developer of the Original Code is Anthony Steele. Portions created by Anthony Steele are Copyright (C) 1999-2008 Anthony Steele. All Rights Reserved. Contributor(s): Anthony Steele, Adem Baba The contents of this file are subject to the Mozilla Public License Version 1.1 (the "License"). you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.mozilla.org/NPL/ Software distributed under the License is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for the specific language governing rights and limitations under the License. Alternatively, the contents of this file may be used under the terms of the GNU General Public License Version 2 or later (the "GPL") See http://www.gnu.org/licenses/gpl.html ------------------------------------------------------------------------------*) {*)} unit Tokens; { Enumerates the kinds of words and symbols found in a delphi program If you knew the code for Jcf 0.x series, then this now unifies the enumerations for TTokenType and TWordType There were 149 words and about 22 tokens So there are now over 160 token types (some were removed on joining) will not add in the parse tree node types into this enum as this would push the total over 256 and you cannot have sets over enums with more than 256 elements I get a lot of value from sets over these enums there will still be space for new keywords later without reaching that limit When these are textual tokens, the enum item name is the token prefixed with 'tt' ie 'while' -> ttWhile } {$I JcfGlobal.inc} interface type TWordType = (wtNotAWord, wtReservedWord, wtReservedWordDirective, wtBuiltInConstant, wtBuiltInType, wtIdentifier, wtOperator); TWordTypeSet = set of TWordType; TTokenType = ( // errors - should only occur when input is bad, or parser is in error ttPunctuation, // non-aphanum characters - a catch-all category for other symbols ttUnknown, // default category used for unrecognised input // spacing ttReturn, // CR & LF chars ttWhiteSpace, // spaces & tabs ttComment, // one of these ttConditionalCompilationRemoved, ttNumber, // a numeric constant ttQuotedLiteralString, // 'This is a string' ttSemicolon, // ; ttColon, // : ttComma, ttOpenBracket, ttCloseBracket, ttOpenSquareBracket, ttCloseSquareBracket, ttDot, ttHash, ttDoubleDot, // '..' as in '[1 .. 2]' ttAssign, // := ttAmpersand, // '&' is used in Asm ttIdentifier, // a user-defined name for a var, type, unit, etc { built-in keywords } ttArray, ttAsm, ttBegin, ttCase, ttClass, ttConst, ttConstref, ttContains, ttConstructor, ttDestructor, ttDispinterface, ttDo, ttDownto, ttElse, ttEnd, ttEnumerator, ttExcept, ttExports, ttFile, ttFinalization, ttFinally, ttFor, ttFunction, ttGoto, ttIf, ttImplementation, ttInherited, ttInitialization, ttInline, ttInterface, ttLabel, ttLibrary, ttObject, ttOf, ttOperator, ttOut, ttOtherwise, ttPackage, ttPacked, ttProcedure, ttProgram, ttProperty, ttRaise, ttRecord, ttRepeat, ttRequires, ttResourcestring, ttSet, ttSpecialize, ttThen, ttThreadvar, ttTo, ttTry, ttType, ttUnit, ttUntil, ttUses, ttVar, ttWhile, ttWith, ttAt, ttOn, { reserved words that are directives } ttAbsolute, ttExternal, ttPascal, ttSafecall, ttAbstract, ttFar, ttPrivate, ttStrict, ttStdcall, ttAssembler, ttForward, ttProtected, ttStored, ttAutomated, ttIndex, ttPublic, ttVirtual, ttCdecl, ttMessage, ttPublished, ttWrite, ttDefault, ttName, ttRead, ttWriteOnly, ttDispId, ttNear, ttReadOnly, ttDynamic, ttNodefault, ttRegister, ttExport, ttOverride, ttOverload, ttResident, ttLocal, ttImplements, ttReintroduce, ttGeneric, ttCVar, ttNostackframe, ttNested, ttVectorcall, // used in asm ttOffset, ttPtr, ttVmtOffset, ttDmtOffset, { Delphi 6 directives } ttDeprecated, ttPlatform, { delphi.net keywords and directives } ttHelper, ttStatic, ttSealed, ttFinal, ttAdd, ttRemove, ttUnsafe, ttVarArgs, { delphi 2009 } ttReference, { Additional Free Pascal directives } ttExperimental, ttUnimplemented, ttInterrupt, { built-in constants } ttNil, ttTrue, ttFalse, { built in types } ttBoolean, ttByteBool, ttWordBool, ttLongBool, ttInteger, ttCardinal, ttShortint, ttSmallint, ttLongint, ttInt64, ttByte, ttWord, ttLongword, ttChar, ttWidechar, ttString, ttAnsiString, ttWidestring, ttPchar, ttSingle, ttDouble, ttExtended, ttReal, ttReal48, ttComp, ttCurrency, ttVariant, ttOleVariant, { operators that are words not symbols } ttAnd, ttAs, ttDiv, ttIn, ttIs, ttMod, ttNot, ttOr, ttShl, ttShr, ttXor, // symbol operators ttAtSign, ttHat, ttTimes, ttFloatDiv, ttExponent, ttPlus, ttMinus, ttEquals, ttGreaterThan, ttLessThan, ttGreaterThanOrEqual, ttLessThanOrEqual, ttNotEqual, ttBackSlash, { legal in char literals } // FreePascal c-style operators ttPlusAssign, // += ttMinusAssign, // -= ttTimesAssign, // *= ttFloatDivAssign // /= ); TTokenTypeSet = set of TTokenType; const TextualWordTypes: TWordTypeSet = [wtReservedWord, wtReservedWordDirective, wtBuiltInConstant, wtOperator, wtBuiltInType, wtIdentifier]; // identifiers can use these word categories IdentifierTypes: TWordTypeSet = [wtReservedWordDirective, wtBuiltInType, wtBuiltInConstant, wtIdentifier]; // a literal string must start with ', # or ^ LiteralStringStarters: TTokenTypeSet = [ttQuotedLiteralString, ttHat, ttHash]; { all tokens spelled with a-z } TextualTokens: TTokenTypeSet = [ttIdentifier .. ttXor]; IdentiferTokens: TTokenTypeSet = [ttIdentifier .. ttXor]; { same as above, with numbers added } TextOrNumberTokens: TTokenTypeSet = [ttNumber, ttIdentifier .. ttXor]; BracketTokens: TTokenTypeSet = [ttOpenBracket, ttCloseBracket, ttOpenSquareBracket, ttCloseSquareBracket]; OpenBrackets: TTokenTypeSet = [ttOpenBracket, ttOpenSquareBracket]; CloseBrackets: TTokenTypeSet = [ttCloseBracket, ttCloseSquareBracket]; NotSolidTokens: TTokenTypeSet = [ttWhiteSpace, ttComment, ttReturn, ttConditionalCompilationRemoved]; { procedure can have local declarations of vars, const and yes, types } Declarations: TTokenTypeSet = [ttConst, ttResourceString, ttVar, ttThreadVar, ttType, ttLabel, ttExports]; ParamTypes: TTokenTypeSet = [ttVar, ttConst, ttConstref, ttOut]; BlockOutdentWords: TTokenTypeSet = [ttVar, ttThreadVar, ttConst, ttResourceString, ttType, ttLabel, ttBegin, ttEnd, ttTry, ttFinally, ttExcept, ttWhile, ttFor, ttRepeat, ttUntil, ttWith, ttAsm, ttCase, ttInitialization, ttFinalization]; PropertyDirectives: TTokenTypeSet = { the basics } [ttRead, ttWrite, { the advanced stuff } ttStored, ttDefault, ttNoDefault, ttImplements, { for COM interface properties } ttReadOnly, ttWriteOnly, ttDispId, // hints ttDeprecated, ttLibrary, ttPlatform, // free pascal hints ttExperimental, ttUnimplemented, // Delphi.Net ttAdd, ttRemove ]; ExportDirectives: TTokenTypeSet = [ttIndex, ttName]; VariableDirectives: TTokenTypeSet = [ttAbsolute, ttDeprecated, ttLibrary, ttPlatform, ttExperimental, ttUnimplemented, ttCVar]; ClassVisibility: TTokenTypeSet = [ttPrivate, ttProtected, ttPublic, ttPublished, ttAutomated]; ProcedureDirectives: TTokenTypeSet = [ttExternal, ttPascal, ttSafecall, ttAbstract, ttFar, ttStdcall, ttAssembler, ttInline, ttForward, ttVirtual, ttCdecl, ttMessage, ttName, ttRegister, ttDispId, ttNear, ttDynamic, ttExport, ttOverride, ttResident, ttLocal, ttOverload, ttReintroduce, ttDeprecated, ttLibrary, ttPlatform, ttExperimental, ttUnimplemented, ttStatic, ttFinal, ttVarArgs, ttUnsafe, ttEnumerator, ttNostackframe, ttInterrupt, ttPublic, ttVectorcall]; ClassDirectives: TTokenTypeSet = [ttPrivate, ttProtected, ttPublic, ttPublished, ttAutomated, ttStrict]; HintDirectives: TTokenTypeSet = [ttDeprecated, ttLibrary, ttPlatform, ttExperimental, ttUnimplemented, ttStatic]; AllDirectives: TTokenTypeSet = [ttAbsolute, ttExternal, ttPascal, ttSafecall, ttAbstract, ttFar, ttPrivate, ttStdcall, ttAssembler, ttForward, ttProtected, ttStored, ttAutomated, ttIndex, ttPublic, ttVirtual, ttCdecl, ttMessage, ttPublished, ttWrite, ttDefault, ttName, ttRead, ttWriteOnly, ttDispId, ttNear, ttReadOnly, ttDynamic, ttNoDefault, ttRegister, ttExport, ttOverride, ttOverload, ttResident, ttLocal, ttImplements, ttReintroduce, ttLibrary, ttPlatform, ttStatic, ttFinal, ttVarArgs, ttCVar, ttVectorcall]; ProcedureWords: TTokenTypeSet = [ttProcedure, ttFunction, ttConstructor, ttDestructor, ttOperator]; StructuredTypeWords: TTokenTypeSet = [ttClass, ttObject, ttInterface, ttDispinterface, ttRecord]; ObjectTypeWords: TTokenTypeSet = [ttClass, ttObject, ttInterface, ttDispinterface]; InterfaceWords: TTokenTypeSet = [ttInterface, ttDispinterface]; ConstWords: TTokenTypeSet = [ttConst, ttResourceString]; StructStatementWords: TTokenTypeSet = [ttBegin, ttAsm, ttIf, ttCase, ttRepeat, ttWhile, ttFor, ttWith, ttTry]; VariantTypes: TTokenTypeSet = [ttVariant, ttOleVariant]; Operators: TTokenTypeSet = [ttAnd .. ttNotEqual, ttEnumerator]; { these words are - operators - can be unary - have no alphabet chars in them } PossiblyUnarySymbolOperators: TTokenTypeSet = [ttAtSign, ttHat, ttPlus, ttMinus]; RelationalOperators: TTokenTypeSet = [ ttIn, ttIs, ttAs, ttGreaterThan, ttLessThan, ttGreaterThanOrEqual, ttLessThanOrEqual, ttEquals, ttNotEqual]; AddOperators: TTokenTypeSet = [ttPlus, ttMinus, ttOr, ttXor]; MulOperators: TTokenTypeSet = [ttTimes, ttFloatDiv, ttDiv, ttMod, ttAnd, ttShl, ttShr, ttExponent]; SingleSpaceOperators = [ // some unary operators ttNot, // all operators that are always binary ttAnd, ttAs, ttDiv, ttIn, ttIs, ttMod, ttOr, ttShl, ttShr, ttXor, ttTimes, ttFloatDiv, ttExponent, ttEquals, ttGreaterThan, ttLessThan, ttGreaterThanOrEqual, ttLessThanOrEqual, ttNotEqual]; StringWords: TTokenTypeSet = [ttString, ttAnsiString, ttWideString]; RealTypes: TTokenTypeSet = [ttReal48, ttReal, ttSingle, ttDouble, ttExtended, ttCurrency, ttComp]; OrdTypes: TTokenTypeSet = [ttShortInt, ttSmallInt, ttInteger, ttByte, ttLongInt, ttInt64, ttWord, ttBoolean, ttByteBool, ttWordBool, ttLongBool, ttChar, ttWideChar, ttLongWord, ttPChar]; UsesWords: TTokenTypeSet = [ttUses, ttRequires, ttContains]; BuiltInConstants: TTokenTypeSet = [ttNil, ttTrue, ttFalse]; BuiltInTypes: TTokenTypeSet = [ttBoolean .. ttOleVariant]; AsmOffsets: TTokenTypeSet = [ttVmtOffset, ttDmtOffset]; AssignmentDirectives: TTokenTypeSet = [ttAssign, ttPlusAssign, ttMinusAssign, ttTimesAssign, ttFloatDivAssign]; { interpret a string as a token } procedure TypeOfToken(const psWord: string; out peWordType: TWordType; out peToken: TTokenType); overload; function TypeOfToken(const psWord: string): TTokenType; overload; function WordTypeOfToken(const peTokenType: TTokenType): TWordType; overload; { back to the string for error message } function TokenTypeToString(const peToken: TTokenType): string; { similarly for a token set } function TokenTypesToString(const peTokens: TTokenTypeSet): string; { chars used to make the comment } { these} (* or these *) // or these type TCommentStyle = (eNotAComment, eDoubleSlash, eBracketStar, eCurlyBrace, eCompilerDirective); TCommentStyleSet = set of TCommentStyle; const CURLY_COMMENTS: TCommentStyleSet = [eCurlyBrace, eCompilerDirective]; { preprocessor symbols } type TPreProcessorSymbolType = (ppNone, ppDefine, ppUndef, ppIfDef, ppIfNotDef, ppIfOpt, ppIfExpr, ppElseIf, ppElse, ppEndIf, ppIfEnd); TPreProcessorSymbolTypeSet = set of TPreProcessorSymbolType; const PREPROC_BLOCK_END = [ppElseIf, ppElse, ppEndIf, ppIfEnd]; procedure GetPreprocessorSymbolData(const psSourceCode: String; var peSymbolType: TPreProcessorSymbolType; var psText: String); function PreProcSymbolTypeToString(const peSymbolType: TPreProcessorSymbolType): string; function PreProcSymbolTypeSetToString( const peSymbolTypes: TPreProcessorSymbolTypeSet): string; implementation uses { system } {$IFNDEF FPC} Windows, {$ENDIF} SysUtils, { local } JcfStringUtils; { the majority of these tokens have a fixed textual representation e.g. ':=', 'if'. Those that don't include comments, numbers, literal strings and identifiers Recognising text as tokens is data driven, so here is the data } type TRTokenTextMap = record sToken: string; iLength: integer; // length of string above eWordType: TWordType; eToken: TTokenType; end; const { the longest token 'resourcestring' = 14 chars } LONGEST_KEYWORD_LEN = 14; { a value larger than the number of keywords, used as an initial size for the dynamic array } INITIAL_MAX_KEYWORDS = 200; var { final number of keywords added } miKeyWordCount: integer; { the keyword data } mrKeywordTextMap: array of TRTokenTextMap; { Array is sorted by length - this indexes where in the array tokens of a particular length start } miKeywordLengthIndexes: array[1 .. LONGEST_KEYWORD_LEN + 1] of integer; procedure AddKeyWord(psToken: string; const peWordType: TWordType; const peToken: TTokenType); var liLength: integer; begin { the UpperCase fn seems to be safe from locale - it works on chars a..z and we are not feeding it any accented chars } psToken := Trim(UpperCase(psToken)); liLength := Length(psToken); Assert(liLength <= LONGEST_KEYWORD_LEN, 'Found a longer token: ' + psToken + ' length ' + IntToStr(liLength)); mrKeywordTextMap[miKeyWordCount].sToken := psToken; mrKeywordTextMap[miKeyWordCount].iLength := liLength; mrKeywordTextMap[miKeyWordCount].eWordType := peWordType; mrKeywordTextMap[miKeyWordCount].eToken := peToken; Inc(miKeyWordCount); Assert(miKeyWordCount < INITIAL_MAX_KEYWORDS); end; procedure AddKeyWords; begin {NOTES: Adding the keywords to a dynamic array adds negligible overhead but it is quite flexible as we can add stuff later. Array size is not fixed Plus, we will sort it so that all the searches will be faster. } SetLength(mrKeywordTextMap, INITIAL_MAX_KEYWORDS); {Something sufficiently large} miKeyWordCount := 0; // add all the data into the array AddKeyword(';', wtNotAWord, ttSemicolon); AddKeyword(':', wtNotAWord, ttColon); AddKeyword(',', wtNotAWord, ttComma); AddKeyword('(', wtNotAWord, ttOpenBracket); AddKeyword(')', wtNotAWord, ttCloseBracket); AddKeyword('[', wtNotAWord, ttOpenSquareBracket); AddKeyword(']', wtNotAWord, ttCloseSquareBracket); AddKeyword('..', wtNotAWord, ttDoubleDot); AddKeyword('.', wtNotAWord, ttDot); AddKeyword('#', wtNotAWord, ttHash); AddKeyword(':=', wtNotAWord, ttAssign); AddKeyword('&', wtNotAWord, ttAmpersand); { reserved words } AddKeyword('array', wtReservedWord, ttArray); AddKeyword('asm', wtReservedWord, ttAsm); AddKeyword('begin', wtReservedWord, ttBegin); AddKeyword('bitpacked', wtReservedWord, ttPacked); AddKeyword('case', wtReservedWord, ttCase); AddKeyword('class', wtReservedWord, ttClass); AddKeyword('const', wtReservedWord, ttConst); AddKeyword('constref', wtReservedWordDirective, ttConstref); AddKeyword('constructor', wtReservedWord, ttConstructor); AddKeyword('destructor', wtReservedWord, ttDestructor); AddKeyword('dispinterface', wtReservedWord, ttDispinterface); AddKeyword('do', wtReservedWord, ttDo); AddKeyword('downto', wtReservedWord, ttDownTo); AddKeyword('else', wtReservedWord, ttElse); AddKeyword('end', wtReservedWord, ttEnd); AddKeyword('enumerator', wtReservedWord, ttEnumerator); AddKeyword('except', wtReservedWord, ttExcept); AddKeyword('exports', wtReservedWord, ttExports); AddKeyword('file', wtReservedWord, ttFile); AddKeyword('finalization', wtReservedWord, ttFinalization); AddKeyword('finally', wtReservedWord, ttFinally); AddKeyword('for', wtReservedWord, ttFor); AddKeyword('function', wtReservedWord, ttFunction); AddKeyword('goto', wtReservedWord, ttGoto); AddKeyword('if', wtReservedWord, ttIf); AddKeyword('implementation', wtReservedWord, ttImplementation); AddKeyword('inherited', wtReservedWord, ttInherited); AddKeyword('initialization', wtReservedWord, ttInitialization); AddKeyword('inline', wtReservedWord, ttInline); AddKeyword('interface', wtReservedWord, ttInterface); AddKeyword('label', wtReservedWord, ttLabel); AddKeyword('library', wtReservedWord, ttLibrary); AddKeyword('object', wtReservedWord, ttObject); AddKeyword('of', wtReservedWord, ttOf); AddKeyword('operator', wtReservedWord, ttOperator); AddKeyword('out', wtReservedWordDirective, ttOut); AddKeyword('otherwise', wtReservedWord, ttOtherwise); AddKeyword('packed', wtReservedWord, ttPacked); AddKeyword('procedure', wtReservedWord, ttProcedure); AddKeyword('program', wtReservedWord, ttProgram); AddKeyword('property', wtReservedWord, ttProperty); AddKeyword('raise', wtReservedWord, ttRaise); AddKeyword('record', wtReservedWord, ttRecord); AddKeyword('repeat', wtReservedWord, ttRepeat); AddKeyword('resourcestring', wtReservedWord, ttResourceString); AddKeyword('set', wtReservedWord, ttSet); AddKeyword('specialize', wtReservedWord, ttSpecialize); AddKeyword('then', wtReservedWord, ttThen); AddKeyword('threadvar', wtReservedWord, ttThreadvar); AddKeyword('to', wtReservedWord, ttTo); AddKeyword('try', wtReservedWord, ttTry); AddKeyword('type', wtReservedWord, ttType); AddKeyword('unit', wtReservedWord, ttUnit); AddKeyword('until', wtReservedWord, ttUntil); AddKeyword('uses', wtReservedWord, ttUses); AddKeyword('var', wtReservedWord, ttVar); AddKeyword('while', wtReservedWord, ttWhile); AddKeyword('with', wtReservedWord, ttWith); { reseved words that must be parsed as directives because they can be identifier names } AddKeyword('at', wtReservedWordDirective, ttAt); AddKeyword('on', wtReservedWordDirective, ttOn); AddKeyword('package', wtReservedWordDirective, ttPackage); AddKeyword('contains', wtReservedWordDirective, ttContains); AddKeyword('requires', wtReservedWordDirective, ttRequires); { reseved words that are directives } AddKeyword('absolute', wtReservedWordDirective, ttAbsolute); AddKeyword('external', wtReservedWordDirective, ttExternal); AddKeyword('pascal', wtReservedWordDirective, ttPascal); AddKeyword('safecall', wtReservedWordDirective, ttSafecall); AddKeyword('abstract', wtReservedWordDirective, ttAbstract); AddKeyword('far', wtReservedWordDirective, ttFar); AddKeyword('private', wtReservedWordDirective, ttPrivate); AddKeyword('strict', wtReservedWordDirective, ttStrict); AddKeyword('stdcall', wtReservedWordDirective, ttStdCall); AddKeyword('assembler', wtReservedWordDirective, ttAssembler); AddKeyword('forward', wtReservedWordDirective, ttForward); AddKeyword('protected', wtReservedWordDirective, ttProtected); AddKeyword('stored', wtReservedWordDirective, ttStored); AddKeyword('automated', wtReservedWordDirective, ttAutomated); AddKeyword('index', wtReservedWordDirective, ttIndex); AddKeyword('public', wtReservedWordDirective, ttPublic); AddKeyword('virtual', wtReservedWordDirective, ttVirtual); AddKeyword('cdecl', wtReservedWordDirective, ttCdecl); AddKeyword('message', wtReservedWordDirective, ttMessage); AddKeyword('published', wtReservedWordDirective, ttPublished); AddKeyword('write', wtReservedWordDirective, ttWrite); AddKeyword('default', wtReservedWordDirective, ttDefault); AddKeyword('name', wtReservedWordDirective, ttName); AddKeyword('read', wtReservedWordDirective, ttRead); AddKeyword('writeonly', wtReservedWordDirective, ttWriteOnly); AddKeyword('dispid', wtReservedWordDirective, ttDispId); AddKeyword('near', wtReservedWordDirective, ttNear); AddKeyword('readonly', wtReservedWordDirective, ttReadOnly); AddKeyword('dynamic', wtReservedWordDirective, ttDynamic); AddKeyword('nodefault', wtReservedWordDirective, ttNoDefault); AddKeyword('register', wtReservedWordDirective, ttRegister); AddKeyword('export', wtReservedWordDirective, ttExport); AddKeyword('override', wtReservedWordDirective, ttOverride); AddKeyword('overload', wtReservedWordDirective, ttOverload); AddKeyword('resident', wtReservedWordDirective, ttResident); AddKeyword('local', wtReservedWordDirective, ttLocal); AddKeyword('generic', wtReservedWordDirective, ttGeneric); AddKeyword('vectorcall', wtReservedWordDirective, ttVectorcall); AddKeyword('implements', wtReservedWordDirective, ttImplements); AddKeyword('reintroduce', wtReservedWordDirective, ttReintroduce); AddKeyword('cvar', wtReservedWordDirective, ttCVar); AddKeyword('nostackframe', wtReservedWordDirective, ttNostackframe); AddKeyword('nested', wtReservedWordDirective, ttNested); // asm AddKeyword('offset', wtReservedWordDirective, ttOffset); AddKeyword('ptr', wtReservedWordDirective, ttPtr); AddKeyword('vmtoffset', wtReservedWordDirective, ttVmtOffset); AddKeyword('dmtoffset', wtReservedWordDirective, ttDmtOffset); { D6 directives } AddKeyword('deprecated', wtReservedWordDirective, ttDeprecated); AddKeyword('platform', wtReservedWordDirective, ttPlatform); { delphi.net directives} AddKeyword('helper', wtReservedWordDirective, ttHelper); AddKeyword('sealed', wtReservedWordDirective, ttSealed); AddKeyword('static', wtReservedWordDirective, ttStatic); AddKeyword('final', wtReservedWordDirective, ttFinal); AddKeyword('add', wtReservedWordDirective, ttAdd); AddKeyword('remove', wtReservedWordDirective, ttRemove); AddKeyword('unsafe', wtReservedWordDirective, ttUnsafe); AddKeyword('varargs', wtReservedWordDirective, ttVarArgs); { delphi 2009 } AddKeyword('reference', wtReservedWordDirective, ttReference); { Additional Free Pascal directives } AddKeyword('experimental', wtReservedWordDirective, ttExperimental); AddKeyword('unimplemented', wtReservedWordDirective, ttUnimplemented); AddKeyword('interrupt', wtReservedWordDirective, ttInterrupt); { operators that are words not symbols } AddKeyword('and', wtOperator, ttAnd); AddKeyword('as', wtOperator, ttAs); AddKeyword('div', wtOperator, ttDiv); AddKeyword('in', wtOperator, ttIn); AddKeyword('is', wtOperator, ttIs); AddKeyword('mod', wtOperator, ttMod); AddKeyword('not', wtOperator, ttNot); AddKeyword('or', wtOperator, ttOr); AddKeyword('shl', wtOperator, ttShl); AddKeyword('shr', wtOperator, ttShr); AddKeyword('xor', wtOperator, ttXor); { built-in constants } AddKeyword('nil', wtBuiltInConstant, ttNil); AddKeyword('true', wtBuiltInConstant, ttTrue); AddKeyword('false', wtBuiltInConstant, ttFalse); { built-in types } AddKeyword('boolean', wtBuiltInType, ttBoolean); AddKeyword('ByteBool', wtBuiltInType, ttByteBool); AddKeyword('WordBool', wtBuiltInType, ttWordBool); AddKeyword('LongBool', wtBuiltInType, ttLongBool); AddKeyword('integer', wtBuiltInType, ttInteger); AddKeyword('cardinal', wtBuiltInType, ttCardinal); AddKeyword('shortint', wtBuiltInType, ttShortInt); AddKeyword('smallint', wtBuiltInType, ttSmallInt); AddKeyword('longint', wtBuiltInType, ttLongInt); AddKeyword('int64', wtBuiltInType, ttInt64); AddKeyword('byte', wtBuiltInType, ttByte); AddKeyword('word', wtBuiltInType, ttWord); AddKeyword('longword', wtBuiltInType, ttLongWord); AddKeyword('char', wtBuiltInType, ttChar); AddKeyword('widechar', wtBuiltInType, ttWideChar); AddKeyword('string', wtBuiltInType, ttString); AddKeyword('ansistring', wtBuiltInType, ttAnsiString); AddKeyword('widestring', wtBuiltInType, ttWideString); AddKeyword('pChar', wtBuiltInType, ttPchar); AddKeyword('single', wtBuiltInType, ttSingle); AddKeyword('double', wtBuiltInType, ttDouble); AddKeyword('extended', wtBuiltInType, ttExtended); AddKeyword('real', wtBuiltInType, ttReal); AddKeyword('real48', wtBuiltInType, ttReal48); AddKeyword('comp', wtBuiltInType, ttComp); AddKeyword('currency', wtBuiltInType, ttCurrency); AddKeyword('variant', wtBuiltInType, ttVariant); AddKeyword('OleVariant', wtBuiltInType, ttOleVariant); { operators that are symbols } AddKeyword('@', wtOperator, ttAtSign); AddKeyword('^', wtOperator, ttHat); AddKeyword('*', wtOperator, ttTimes); AddKeyword('**', wtOperator, ttExponent); // in FreePascal AddKeyword('/', wtOperator, ttFloatDiv); AddKeyword('+', wtOperator, ttPlus); AddKeyword('-', wtOperator, ttMinus); AddKeyword('=', wtOperator, ttEquals); AddKeyword('>=', wtOperator, ttGreaterThanOrEqual); AddKeyword('<=', wtOperator, ttLessThanOrEqual); AddKeyword('<>', wtOperator, ttNotEqual); // these must come after the above as they are shorter AddKeyword('>', wtOperator, ttGreaterThan); AddKeyword('<', wtOperator, ttLessThan); AddKeyword('\', wtOperator, ttBackSlash); // FreePascal c-style operators AddKeyword('+=', wtNotAWord, ttPlusAssign); AddKeyword('-=', wtNotAWord, ttMinusAssign); AddKeyword('*=', wtNotAWord, ttTimesAssign); AddKeyword('/=', wtNotAWord, ttFloatDivAssign); {Now that we know how many keywords were added, we can set the actual size of the array } SetLength(mrKeywordTextMap, miKeyWordCount); end; { sort order for the keyword data array } function KeyWordCompare(const prMap1, prMap2: TRTokenTextMap): integer; var liIndex: integer; begin { first sort by length } Result := prMap1.iLength - prMap2.iLength; if Result <> 0 then exit; { then for tokensof the same length: alphabetic, not localised } liIndex := 1; while (Result = 0) and (liIndex <= prMap1.iLength) do begin Result := Ord(prMap1.sToken[liIndex]) - Ord(prMap2.sToken[liIndex]); Inc(liIndex); end; end; procedure SortKeywords; var liIndex: integer; lrSwap: TRTokenTextMap; lbDoneWork: boolean; begin {We have a fairly small number of items, < 200. Bubble sort is good enough, if not the best here } lbDoneWork := True; while lbDoneWork do begin lbDoneWork := False; for liIndex := 0 to (miKeyWordCount - 2) do begin if KeyWordCompare(mrKeywordTextMap[Succ(liIndex)], mrKeywordTextMap[liIndex]) < 0 then begin { swap the items } lrSwap := mrKeywordTextMap[Succ(liIndex)]; mrKeywordTextMap[Succ(liIndex)] := mrKeywordTextMap[liIndex]; mrKeywordTextMap[liIndex] := lrSwap; // have made a swap. List was not sorted. lbDoneWork := True; end; end; end; end; procedure InitLengthOffestIndexes; var liLoop, liLen: integer; begin { init all indexes to a dummy value } for liLoop := low(miKeywordLengthIndexes) to High(miKeywordLengthIndexes) do miKeywordLengthIndexes[liLoop] := -1; { the keywords are sorted firstly by length } for liLoop := 0 to Pred(miKeyWordCount) do begin liLen := mrKeywordTextMap[liLoop].iLength; { keywords of length liLen are found at index liLoop onwards } if miKeywordLengthIndexes[liLen] < 0 then miKeywordLengthIndexes[liLen] := liLoop; end; { init all remaining to past the last item } for liLoop := low(miKeywordLengthIndexes) to High(miKeywordLengthIndexes) do if miKeywordLengthIndexes[liLoop] < 0 then miKeywordLengthIndexes[liLoop] := miKeyWordCount; end; { turn text to enum. Assumes data is sorted out and sorted } procedure TypeOfToken(const psWord: string; out peWordType: TWordType; out peToken: TTokenType); var liMapItemLoop: integer; liCharIndex: integer; liStart, liEnd: integer; liTokenLength: integer; lbFoundItem: boolean; begin Assert(psWord <> ''); { if its not found in the list, it is unknown } peWordType := wtNotAWord; peToken := ttUnknown; // the token in should already be trimmed liTokenLength := Length(psWord); { pointless and dangerous to continue if the keyword is longer than any known } if liTokenLength > LONGEST_KEYWORD_LEN then exit; // where in the map do we find tokens of this length? liStart := miKeywordLengthIndexes[liTokenLength]; // tokens of this length stop when the longer ones start :) liEnd := miKeywordLengthIndexes[liTokenLength + 1] - 1; { of course the ultimate would be binary search not for-loop but it is questionable if that is needed seeing as the part of the list to be searched has already been greatly limited } for liMapItemLoop := liStart to liEnd do begin { 'a simplified version of Boyer-Moore comparison technique' } if (mrKeywordTextMap[liMapItemLoop].sToken[1] = UpCase(psWord[1])) and (mrKeywordTextMap[liMapItemLoop].sToken[liTokenLength] = UpCase(psWord[liTokenLength])) then begin lbFoundItem := True; liCharIndex := 2; {We have already checked the 2 ends of the string. Check the rest } while lbFoundItem and (liCharIndex < liTokenLength) do begin lbFoundItem := lbFoundItem and (mrKeywordTextMap[liMapItemLoop].sToken[liCharIndex] = UpCase(psWord[liCharIndex])); Inc(liCharIndex); end; if lbFoundItem then begin peWordType := mrKeywordTextMap[liMapItemLoop].eWordType; peToken := mrKeywordTextMap[liMapItemLoop].eToken; break; end; end; { matched first and last and uppercase } end; { for loop through the array } end; function TypeOfToken(const psWord: string): TTokenType; overload; var leWordType: TWordType; begin TypeOfToken(psWord, leWordType, Result); end; function TokenTypeToString(const peToken: TTokenType): string; var lbFound: boolean; liLoop: integer; begin lbFound := False; case peToken of ttPunctuation: begin Result := 'Unknown punctuation'; lbFound := True; end; ttUnknown: begin Result := 'Unknown'; lbFound := True; end; ttReturn: begin Result := 'Return'; lbFound := True; end; ttWhiteSpace: begin Result := 'White space'; lbFound := True; end; ttIdentifier: begin // identifier not in the list as it has no fixed text Result := 'Identifier'; lbFound := True; end; ttNumber: begin Result := 'Number'; lbFound := True; end; ttQuotedLiteralString: begin Result := 'Quoted literal string'; lbFound := True; end; ttComment: begin Result := 'comment'; lbFound := True; end; ttConditionalCompilationRemoved: begin Result := 'cond compilation removed'; lbFound := True; end else begin for liLoop := Low(mrKeywordTextMap) to High(mrKeywordTextMap) do begin if peToken = mrKeywordTextMap[liLoop].eToken then begin Result := mrKeywordTextMap[liLoop].sToken; lbFound := True; break; end; end; end; end; if not lbFound then Result := 'Token ' + IntToStr(Ord(peToken)) + ' not found'; end; function TokenTypesToString(const peTokens: TTokenTypeSet): string; var liLoop: integer; begin if peTokens = [] then Result := '[]' else begin Result := ''; for liLoop := Low(mrKeywordTextMap) to High(mrKeywordTextMap) do begin if mrKeywordTextMap[liLoop].eToken in peTokens then begin if Result <> '' then Result := Result + ' '; Result := Result + mrKeywordTextMap[liLoop].sToken; end; end; end; end; function WordTypeOfToken(const peTokenType: TTokenType): TWordType; overload; var liLoop: integer; begin Result := wtNotAWord; if peTokenType = ttIdentifier then begin // identifier not in the list as it has no fixed text Result := wtIdentifier; end else begin for liLoop := Low(mrKeywordTextMap) to High(mrKeywordTextMap) do begin if mrKeywordTextMap[liLoop].eToken = peTokenType then begin Result := mrKeywordTextMap[liLoop].eWordType; break; end; end; end; end; const PreProcessorSymbolData: array[TPreProcessorSymbolType] of string = ( '$$$$$$$$$$', '{$DEFINE', '{$UNDEF', '{$IFDEF', '{$IFNDEF', '{$IFOPT', '{$IF', '{$ELSEIF', '{$ELSE', '{$ENDIF', '{$IFEND' ); { given a token, identify the preprocessor symbol and the text after it } procedure GetPreprocessorSymbolData(const psSourceCode: String; var peSymbolType: TPreProcessorSymbolType; var psText: String); var leLoop: TPreProcessorSymbolType; liItemLen: integer; begin peSymbolType := ppNone; psText := ''; for leLoop := low(TPreProcessorSymbolType) to High(TPreProcessorSymbolType) do begin if leLoop = ppNone then continue; liItemLen := Length(PreProcessorSymbolData[leLoop]); if AnsiSameText(StrLeft(psSourceCode, liItemLen), PreProcessorSymbolData[leLoop]) and ( not CharIsAlpha(psSourceCode[liItemLen + 1])) then begin peSymbolType := leLoop; break; end; end; if peSymbolType = ppNone then exit; psText := StrRestOf(psSourceCode, Length(PreProcessorSymbolData[peSymbolType]) + 1); if psText <> '' then begin if StrRight(psText, 1) = '}' then psText := StrChopRight(psText, 1); psText := Trim(psText); end; end; function PreProcSymbolTypeToString(const peSymbolType: TPreProcessorSymbolType): string; begin case peSymbolType of ppNone: Result := 'none'; ppDefine: Result := '$DEFINE'; ppUndef: Result := '$UNDEF'; ppIfDef: Result := '$IFDEF'; ppIfNotDef: Result := '$IFNDEF'; ppIfOpt: Result := '$IFOPT'; ppIfExpr: Result := '$IFEXPR'; ppElseIf: Result := '$ELSEIF'; ppElse: Result := '$ELSE'; ppEndIf: Result := '$ENDIF'; ppIfEnd: Result := '$IFEND'; else Assert(False); end; end; function PreProcSymbolTypeSetToString( const peSymbolTypes: TPreProcessorSymbolTypeSet): string; var leLoop: TPreProcessorSymbolType; begin Result := ''; for leLoop := Low(TPreProcessorSymbolType) to High(TPreProcessorSymbolType) do begin if leLoop in peSymbolTypes then begin if Result <> '' then Result := Result + ', '; Result := Result + PreProcSymbolTypeToString(leLoop); end; end; end; initialization AddKeywords; SortKeywords; InitLengthOffestIndexes; end.