1// Copyright (C) 2006 Google Inc. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15 16/** 17 * @fileoverview 18 * some functions for browser-side pretty printing of code contained in html. 19 * 20 * <p> 21 * For a fairly comprehensive set of languages see the 22 * <a href="http://google-code-prettify.googlecode.com/svn/trunk/README.html#langs">README</a> 23 * file that came with this source. At a minimum, the lexer should work on a 24 * number of languages including C and friends, Java, Python, Bash, SQL, HTML, 25 * XML, CSS, Javascript, and Makefiles. It works passably on Ruby, PHP and Awk 26 * and a subset of Perl, but, because of commenting conventions, doesn't work on 27 * Smalltalk, Lisp-like, or CAML-like languages without an explicit lang class. 28 * <p> 29 * Usage: <ol> 30 * <li> include this source file in an html page via 31 * {@code <script type="text/javascript" src="/path/to/prettify.js"></script>} 32 * <li> define style rules. See the example page for examples. 33 * <li> mark the {@code <pre>} and {@code <code>} tags in your source with 34 * {@code class=prettyprint.} 35 * You can also use the (html deprecated) {@code <xmp>} tag, but the pretty 36 * printer needs to do more substantial DOM manipulations to support that, so 37 * some css styles may not be preserved. 38 * </ol> 39 * That's it. I wanted to keep the API as simple as possible, so there's no 40 * need to specify which language the code is in, but if you wish, you can add 41 * another class to the {@code <pre>} or {@code <code>} element to specify the 42 * language, as in {@code <pre class="prettyprint lang-java">}. Any class that 43 * starts with "lang-" followed by a file extension, specifies the file type. 44 * See the "lang-*.js" files in this directory for code that implements 45 * per-language file handlers. 46 * <p> 47 * Change log:<br> 48 * cbeust, 2006/08/22 49 * <blockquote> 50 * Java annotations (start with "@") are now captured as literals ("lit") 51 * </blockquote> 52 * @requires console 53 */ 54 55// JSLint declarations 56/*global console, document, navigator, setTimeout, window, define */ 57 58/** 59 * Split {@code prettyPrint} into multiple timeouts so as not to interfere with 60 * UI events. 61 * If set to {@code false}, {@code prettyPrint()} is synchronous. 62 */ 63window['PR_SHOULD_USE_CONTINUATION'] = true; 64 65/** 66 * Find all the {@code <pre>} and {@code <code>} tags in the DOM with 67 * {@code class=prettyprint} and prettify them. 68 * 69 * @param {Function?} opt_whenDone if specified, called when the last entry 70 * has been finished. 71 */ 72var prettyPrintOne; 73/** 74 * Pretty print a chunk of code. 75 * 76 * @param {string} sourceCodeHtml code as html 77 * @return {string} code as html, but prettier 78 */ 79var prettyPrint; 80 81 82(function () { 83 var win = window; 84 // Keyword lists for various languages. 85 // We use things that coerce to strings to make them compact when minified 86 // and to defeat aggressive optimizers that fold large string constants. 87 var FLOW_CONTROL_KEYWORDS = ["break,continue,do,else,for,if,return,while"]; 88 var C_KEYWORDS = [FLOW_CONTROL_KEYWORDS,"auto,case,char,const,default," + 89 "double,enum,extern,float,goto,int,long,register,short,signed,sizeof,module," + 90 "static,struct,switch,typedef,union,unsigned,void,volatile"]; 91 var COMMON_KEYWORDS = [C_KEYWORDS,"catch,class,delete,false,import," + 92 "new,operator,private,protected,public,this,throw,true,try,typeof"]; 93 var CPP_KEYWORDS = [COMMON_KEYWORDS,"alignof,align_union,asm,axiom,bool," + 94 "concept,concept_map,const_cast,constexpr,decltype," + 95 "dynamic_cast,explicit,export,friend,inline,late_check," + 96 "mutable,namespace,nullptr,reinterpret_cast,static_assert,static_cast," + 97 "template,typeid,typename,using,virtual,where,request_req"]; 98 var JAVA_KEYWORDS = [COMMON_KEYWORDS, 99 "abstract,boolean,byte,extends,final,finally,implements,import," + 100 "instanceof,null,native,package,strictfp,super,synchronized,throws," + 101 "transient"]; 102 var CSHARP_KEYWORDS = [JAVA_KEYWORDS, 103 "as,base,by,checked,decimal,delegate,descending,dynamic,event," + 104 "fixed,foreach,from,group,implicit,in,interface,internal,into,is,let," + 105 "lock,object,out,override,orderby,params,partial,readonly,ref,sbyte," + 106 "sealed,stackalloc,string,select,uint,ulong,unchecked,unsafe,ushort," + 107 "var,virtual,where"]; 108 var COFFEE_KEYWORDS = "all,and,by,catch,class,else,extends,false,finally," + 109 "for,if,in,is,isnt,loop,new,no,not,null,of,off,on,or,return,super,then," + 110 "throw,true,try,unless,until,when,while,yes"; 111 var JSCRIPT_KEYWORDS = [COMMON_KEYWORDS, 112 "debugger,eval,export,function,get,null,set,undefined,var,with," + 113 "Infinity,NaN"]; 114 var PERL_KEYWORDS = "caller,delete,die,do,dump,else,elsif,eval,exit,foreach,for," + 115 "goto,if,import,last,local,my,next,no,our,print,printf,package,redo,require," + 116 "sub,undef,unless,until,use,wantarray,while,BEGIN,END"; 117 var PHP_KEYWORDS = "abstract,and,array,as,break,case,catch,cfunction,class," + 118 "clone,const,continue,declare,default,do,else,elseif,enddeclare,endfor," + 119 "endforeach,endif,endswitch,endwhile,extends,final,for,foreach,function," + 120 "global,goto,if,implements,interface,instanceof,namespace,new,old_function," + 121 "or,private,protected,public,static,switch,throw,try,use,var,while,xor," + 122 "die,echo,empty,exit,eval,include,include_once,isset,list,require," + 123 "require_once,return,print,unset"; 124 var PYTHON_KEYWORDS = [FLOW_CONTROL_KEYWORDS, "and,as,assert,class,def,del," + 125 "elif,except,exec,finally,from,global,import,in,is,lambda," + 126 "nonlocal,not,or,pass,print,raise,try,with,yield," + 127 "False,True,None"]; 128 var RUBY_KEYWORDS = [FLOW_CONTROL_KEYWORDS, "alias,and,begin,case,class," + 129 "def,defined,elsif,end,ensure,false,in,module,next,nil,not,or,redo," + 130 "rescue,retry,self,super,then,true,undef,unless,until,when,yield," + 131 "BEGIN,END"]; 132 var SH_KEYWORDS = [FLOW_CONTROL_KEYWORDS, "case,done,elif,esac,eval,fi," + 133 "function,in,local,set,then,until,echo"]; 134 var CONFIG_ENVS = ["User-Agent,HTTP_USER_AGENT,HTTP_REFERER,HTTP_COOKIE,HTTP_FORWARDED,HTTP_HOST,HTTP_PROXY_CONNECTION,HTTP_ACCEPT,REMOTE_ADDR,REMOTE_HOST,REMOTE_PORT,REMOTE_USER,REMOTE_IDENT,REQUEST_METHOD,SCRIPT_FILENAME,PATH_INFO,QUERY_STRING,AUTH_TYPE,DOCUMENT_ROOT,SERVER_ADMIN,SERVER_NAME,SERVER_ADDR,SERVER_PORT,SERVER_PROTOCOL,SERVER_SOFTWARE,TIME_YEAR,TIME_MON,TIME_DAY,TIME_HOUR,TIME_MIN,TIME_SEC,TIME_WDAY,TIME,API_VERSION,THE_REQUEST,REQUEST_URI,REQUEST_FILENAME,IS_SUBREQ,HTTPS,REQUEST_SCHEME"]; 135 var CONFIG_KEYWORDS = ["Macro,UndefMacro,Use,AuthLDAPURL,AcceptFilter,AcceptPathInfo,AccessFileName,Action,AddAlt,AddAltByEncoding,AddAltByType,AddCharset,AddDefaultCharset,AddDescription,AddEncoding,AddHandler,AddIcon,AddIconByEncoding,AddIconByType,AddInputFilter,AddLanguage,AddModuleInfo,AddOutputFilter,AddOutputFilterByType,AddType,Alias,AliasMatch,Allow,AllowCONNECT,AllowEncodedSlashes,AllowMethods,AllowOverride,AllowOverrideList,Anonymous,Anonymous_LogEmail,Anonymous_MustGiveEmail,Anonymous_NoUserID,Anonymous_VerifyEmail,AsyncRequestWorkerFactor,AuthBasicAuthoritative,AuthBasicProvider,AuthDBDUserPWQuery,AuthDBDUserRealmQuery,AuthDBMGroupFile,AuthDBMType,AuthDBMUserFile,AuthDigestAlgorithm,AuthDigestDomain,AuthDigestNcCheck,AuthDigestNonceFormat,AuthDigestNonceLifetime,AuthDigestProvider,AuthDigestQop,AuthDigestShmemSize,AuthFormAuthoritative,AuthFormBody,AuthFormDisableNoStore,AuthFormFakeBasicAuth,AuthFormLocation,AuthFormLoginRequiredLocation,AuthFormLoginSuccessLocation,AuthFormLogoutLocation,AuthFormMethod,AuthFormMimetype,AuthFormPassword,AuthFormProvider,AuthFormSitePassphrase,AuthFormSize,AuthFormUsername,AuthGroupFile,AuthLDAPAuthorizePrefix,AuthLDAPBindAuthoritative,AuthLDAPBindDN,AuthLDAPBindPassword,AuthLDAPCharsetConfig,AuthLDAPCompareAsUser,AuthLDAPCompareDNOnServer,AuthLDAPDereferenceAliases,AuthLDAPGroupAttribute,AuthLDAPGroupAttributeIsDN,AuthLDAPInitialBindAsUser,AuthLDAPInitialBindPattern,AuthLDAPMaxSubGroupDepth,AuthLDAPRemoteUserAttribute,AuthLDAPRemoteUserIsDN,AuthLDAPSearchAsUser,AuthLDAPSubGroupAttribute,AuthLDAPSubGroupClass,AuthLDAPUrl,AuthMerging,AuthName,AuthnCacheContext,AuthnCacheEnable,AuthnCacheProvideFor,AuthnCacheSOCache,AuthnCacheTimeout,<AuthnProviderAlias>,AuthType,AuthUserFile,AuthzDBDLoginToReferer,AuthzDBDQuery,AuthzDBDRedirectQuery,AuthzDBMType,<AuthzProviderAlias>,AuthzSendForbiddenOnFailure,BalancerGrowth,BalancerMember,BrowserMatch,BrowserMatchNoCase,BufferedLogs,BufferSize,CacheDefaultExpire,CacheDetailHeader,CacheDirLength,CacheDirLevels,CacheDisable,CacheEnable,CacheFile,CacheHeader,CacheIgnoreCacheControl,CacheIgnoreHeaders,CacheIgnoreNoLastMod,CacheIgnoreQueryString,CacheIgnoreURLSessionIdentifiers,CacheKeyBaseURL,CacheLastModifiedFactor,CacheLock,CacheLockMaxAge,CacheLockPath,CacheMaxExpire,CacheMaxFileSize,CacheMinExpire,CacheMinFileSize,CacheNegotiatedDocs,CacheQuickHandler,CacheReadSize,CacheReadTime,CacheRoot,CacheStaleOnError,CacheStoreExpired,CacheStoreNoStore,CacheStorePrivate,CGIMapExtension,CharsetDefault,CharsetOptions,CharsetSourceEnc,CheckCaseOnly,CheckSpelling,ChrootDir,ContentDigest,CookieDomain,CookieExpires,CookieName,CookieStyle,CookieTracking,CoreDumpDirectory,CustomLog,Dav,DavDepthInfinity,DavGenericLockDB,DavLockDB,DavMinTimeout,DBDExptime,DBDInitSQL,DBDKeep,DBDMax,DBDMin,DBDParams,DBDPersist,DBDPrepareSQL,DBDriver,DefaultIcon,DefaultLanguage,DefaultRuntimeDir,DefaultType,Define,DeflateBufferSize,DeflateCompressionLevel,DeflateFilterNote,DeflateMemLevel,DeflateWindowSize,Deny,<Directory>,DirectoryIndex,DirectoryIndexRedirect,<DirectoryMatch>,DirectorySlash,DocumentRoot,DTracePrivileges,DumpIOInput,DumpIOOutput,<Else>,<ElseIf>,EnableExceptionHook,EnableMMAP,EnableSendfile,Error,ErrorDocument,ErrorLog,ErrorLogFormat,Example,ExpiresActive,ExpiresByType,ExpiresDefault,ExtendedStatus,ExtFilterDefine,ExtFilterOptions,FallbackResource,FileETag,<Files>,<FilesMatch>,FilterChain,FilterDeclare,FilterProtocol,FilterProvider,FilterTrace,ForceLanguagePriority,ForceType,ForensicLog,GprofDir,GracefulShutdownTimeout,Group,Header,HeaderName,HeartbeatAddress,HeartbeatListen,HeartbeatMaxServers,HeartbeatStorage,HeartbeatStorage,HostnameLookups,IdentityCheck,IdentityCheckTimeout,<If>,<IfDefine>,<IfModule>,<IfVersion>,ImapBase,ImapDefault,ImapMenu,Include,IncludeOptional,IndexHeadInsert,IndexIgnore,IndexIgnoreReset,IndexOptions,IndexOrderDefault,IndexStyleSheet,InputSed,ISAPIAppendLogToErrors,ISAPIAppendLogToQuery,ISAPICacheFile,ISAPIFakeAsync,ISAPILogNotSupported,ISAPIReadAheadBuffer,KeepAlive,KeepAliveTimeout,KeptBodySize,LanguagePriority,LDAPCacheEntries,LDAPCacheTTL,LDAPConnectionPoolTTL,LDAPConnectionTimeout,LDAPLibraryDebug,LDAPOpCacheEntries,LDAPOpCacheTTL,LDAPReferralHopLimit,LDAPReferrals,LDAPRetries,LDAPRetryDelay,LDAPSharedCacheFile,LDAPSharedCacheSize,LDAPTimeout,LDAPTrustedClientCert,LDAPTrustedGlobalCert,LDAPTrustedMode,LDAPVerifyServerCert,<Limit>,<LimitExcept>,LimitInternalRecursion,LimitRequestBody,LimitRequestFields,LimitRequestFieldSize,LimitRequestLine,LimitXMLRequestBody,Listen,ListenBackLog,LoadFile,LoadModule,<Location>,<LocationMatch>,LogFormat,LogLevel,LogMessage,LuaCodeCache,LuaHookAccessChecker,LuaHookAuthChecker,LuaAuthzProvider,LuaHookCheckUserID,LuaHookFixups,LuaHookInsertFilter,LuaHookMapToStorage,LuaHookTranslateName,LuaHookTypeChecker,LuaInherit,LuaInputFilter,LuaMapHandler,LuaOutputFilter,LuaPackageCPath,LuaPackagePath,LuaQuickHandler,LuaRoot,LuaScope,MaxConnectionsPerChild,MaxKeepAliveRequests,MaxMemFree,MaxRangeOverlaps,MaxRangeReversals,MaxRanges,MaxRequestWorkers,MaxSpareServers,MaxSpareThreads,MaxThreads,MetaDir,MetaFiles,MetaSuffix,MimeMagicFile,MinSpareServers,MinSpareThreads,MMapFile,ModemStandard,ModMimeUsePathInfo,MultiviewsMatch,Mutex,NameVirtualHost,NoProxy,NWSSLTrustedCerts,NWSSLUpgradeable,Options,Order,OutputSed,PassEnv,PidFile,PrivilegesMode,Protocol,ProtocolEcho,<Proxy>,ProxyAddHeaders,ProxyBadHeader,ProxyBlock,ProxyDomain,ProxyErrorOverride,ProxyExpressDBMFile,ProxyExpressDBMType,ProxyExpressEnable,ProxyFtpDirCharset,ProxyFtpEscapeWildcards,ProxyFtpListOnWildcard,ProxyHTMLBufSize,ProxyHTMLCharsetOut,ProxyHTMLDocType,ProxyHTMLEnable,ProxyHTMLEvents,ProxyHTMLExtended,ProxyHTMLFixups,ProxyHTMLInterp,ProxyHTMLLinks,ProxyHTMLStripComments,ProxyHTMLURLMap,ProxyIOBufferSize,<ProxyMatch>,ProxyMaxForwards,ProxyPass,ProxyPassInterpolateEnv,ProxyPassMatch,ProxyPassReverse,ProxyPassReverseCookieDomain,ProxyPassReverseCookiePath,ProxyPreserveHost,ProxyReceiveBufferSize,ProxyRemote,ProxyRemoteMatch,ProxyRequests,ProxySCGIInternalRedirect,ProxySCGISendfile,ProxySet,ProxySourceAddress,ProxyStatus,ProxyTimeout,ProxyVia,ReadmeName,ReceiveBufferSize,Redirect,RedirectMatch,RedirectPermanent,RedirectTemp,ReflectorHeader,RemoteIPHeader,RemoteIPInternalProxy,RemoteIPInternalProxyList,RemoteIPProxiesHeader,RemoteIPTrustedProxy,RemoteIPTrustedProxyList,RemoveCharset,RemoveEncoding,RemoveHandler,RemoveInputFilter,RemoveLanguage,RemoveOutputFilter,RemoveType,RequestHeader,RequestReadTimeout,Require,<RequireAll>,<RequireAny>,<RequireNone>,RewriteBase,RewriteCond,RewriteEngine,RewriteMap,RewriteOptions,RewriteRule,RLimitCPU,RLimitMEM,RLimitNPROC,Satisfy,ScoreBoardFile,Script,ScriptAlias,ScriptAliasMatch,ScriptInterpreterSource,ScriptLog,ScriptLogBuffer,ScriptLogLength,ScriptSock,SecureListen,SeeRequestTail,SendBufferSize,ServerAdmin,ServerAlias,ServerLimit,ServerName,ServerPath,ServerRoot,ServerSignature,ServerTokens,Session,SessionCookieName,SessionCookieName2,SessionCookieRemove,SessionCryptoCipher,SessionCryptoDriver,SessionCryptoPassphrase,SessionCryptoPassphraseFile,SessionDBDCookieName,SessionDBDCookieName2,SessionDBDCookieRemove,SessionDBDDeleteLabel,SessionDBDInsertLabel,SessionDBDPerUser,SessionDBDSelectLabel,SessionDBDUpdateLabel,SessionEnv,SessionExclude,SessionHeader,SessionInclude,SessionMaxAge,SetEnv,SetEnvIf,SetEnvIfExpr,SetEnvIfNoCase,SetHandler,SetInputFilter,SetOutputFilter,SSIEndTag,SSIErrorMsg,SSIETag,SSILastModified,SSILegacyExprParser,SSIStartTag,SSITimeFormat,SSIUndefinedEcho,SSLCACertificateFile,SSLCACertificatePath,SSLCADNRequestFile,SSLCADNRequestPath,SSLCARevocationCheck,SSLCARevocationFile,SSLCARevocationPath,SSLCertificateChainFile,SSLCertificateFile,SSLCertificateKeyFile,SSLCipherSuite,SSLCryptoDevice,SSLEngine,SSLFIPS,SSLHonorCipherOrder,SSLInsecureRenegotiation,SSLOCSPDefaultResponder,SSLOCSPEnable,SSLOCSPOverrideResponder,SSLOCSPResponderTimeout,SSLOCSPResponseMaxAge,SSLOCSPResponseTimeSkew,SSLOptions,SSLPassPhraseDialog,SSLProtocol,SSLProxyCACertificateFile,SSLProxyCACertificatePath,SSLProxyCARevocationCheck,SSLProxyCARevocationFile,SSLProxyCARevocationPath,SSLProxyCheckPeerCN,SSLProxyCheckPeerExpire,SSLProxyCipherSuite,SSLProxyEngine,SSLProxyMachineCertificateChainFile,SSLProxyMachineCertificateFile,SSLProxyMachineCertificatePath,SSLProxyProtocol,SSLProxyVerify,SSLProxyVerifyDepth,SSLRandomSeed,SSLRenegBufferSize,SSLRequire,SSLRequireSSL,SSLSessionCache,SSLSessionCacheTimeout,SSLSessionTicketKeyFile,SSLStaplingCache,SSLStaplingErrorCacheTimeout,SSLStaplingFakeTryLater,SSLStaplingForceURL,SSLStaplingResponderTimeout,SSLStaplingResponseMaxAge,SSLStaplingResponseTimeSkew,SSLStaplingReturnResponderErrors,SSLStaplingStandardCacheTimeout,SSLStrictSNIVHostCheck,SSLUserName,SSLUseStapling,SSLVerifyClient,SSLVerifyDepth,StartServers,StartThreads,Substitute,Suexec,SuexecUserGroup,ThreadLimit,ThreadsPerChild,ThreadStackSize,TimeOut,TraceEnable,TransferLog,TypesConfig,UnDefine,UnsetEnv,UseCanonicalName,UseCanonicalPhysicalPort,User,UserDir,VHostCGIMode,VHostCGIPrivs,VHostGroup,VHostPrivs,VHostSecure,VHostUser,VirtualDocumentRoot,VirtualDocumentRootIP,<VirtualHost>,VirtualScriptAlias,VirtualScriptAliasIP,WatchdogInterval,XBitHack,xml2EncAlias,xml2EncDefault,xml2StartParse,RewriteLog,RewriteLogLevel"]; 136 var CONFIG_OPTIONS = /^[\\+\\-]?(AuthConfig|IncludesNOEXEC|ExecCGI|FollowSymLinks|MultiViews|Includes|Indexes|SymLinksIfOwnerMatch)\b/i; 137 var ALL_KEYWORDS = [ 138 CPP_KEYWORDS, CSHARP_KEYWORDS, JSCRIPT_KEYWORDS, PERL_KEYWORDS + 139 PYTHON_KEYWORDS, RUBY_KEYWORDS, SH_KEYWORDS, CONFIG_KEYWORDS, PHP_KEYWORDS]; 140 var C_TYPES = /^(DIR|FILE|vector|(de|priority_)?queue|list|stack|(const_)?iterator|(multi)?(set|map)|bitset|u?(int|float|char|void|const|static|struct)\d*(_t)?\b)|[a-z_]+_rec|cmd_parms\b/; 141 142 // token style names. correspond to css classes 143 /** 144 * token style for a string literal 145 * @const 146 */ 147 var PR_STRING = 'str'; 148 /** 149 * token style for a keyword 150 * @const 151 */ 152 var PR_KEYWORD = 'kwd'; 153 /** 154 * token style for a comment 155 * @const 156 */ 157 var PR_COMMENT = 'com'; 158 /** 159 * token style for a type 160 * @const 161 */ 162 var PR_TYPE = 'typ'; 163 /** 164 * token style for a literal value. e.g. 1, null, true. 165 * @const 166 */ 167 var PR_LITERAL = 'lit'; 168 /** 169 * token style for a punctuation string. 170 * @const 171 */ 172 var PR_PUNCTUATION = 'pun'; 173 /** 174 * token style for plain text. 175 * @const 176 */ 177 var PR_PLAIN = 'pln'; 178 179 /** 180 * token style for an sgml tag. 181 * @const 182 */ 183 var PR_TAG = 'tag'; 184 /** 185 * token style for a markup declaration such as a DOCTYPE. 186 * @const 187 */ 188 var PR_DECLARATION = 'dec'; 189 /** 190 * token style for embedded source. 191 * @const 192 */ 193 var PR_SOURCE = 'src'; 194 /** 195 * token style for an sgml attribute name. 196 * @const 197 */ 198 var PR_ATTRIB_NAME = 'atn'; 199 /** 200 * token style for an sgml attribute value. 201 * @const 202 */ 203 var PR_ATTRIB_VALUE = 'atv'; 204 205 /** 206 * A class that indicates a section of markup that is not code, e.g. to allow 207 * embedding of line numbers within code listings. 208 * @const 209 */ 210 var PR_NOCODE = 'nocode'; 211 212 213 214/** 215 * A set of tokens that can precede a regular expression literal in 216 * javascript 217 * http://web.archive.org/web/20070717142515/http://www.mozilla.org/js/language/js20/rationale/syntax.html 218 * has the full list, but I've removed ones that might be problematic when 219 * seen in languages that don't support regular expression literals. 220 * 221 * <p>Specifically, I've removed any keywords that can't precede a regexp 222 * literal in a syntactically legal javascript program, and I've removed the 223 * "in" keyword since it's not a keyword in many languages, and might be used 224 * as a count of inches. 225 * 226 * <p>The link above does not accurately describe EcmaScript rules since 227 * it fails to distinguish between (a=++/b/i) and (a++/b/i) but it works 228 * very well in practice. 229 * 230 * @private 231 * @const 232 */ 233var REGEXP_PRECEDER_PATTERN = '(?:^^\\.?|[+-]|[!=]=?=?|\\#|%=?|&&?=?|\\(|\\*=?|[+\\-]=|->|\\/=?|::?|<<?=?|>>?>?=?|,|;|\\?|@|\\[|~|{|\\^\\^?=?|\\|\\|?=?|break|case|continue|delete|do|else|finally|instanceof|return|throw|try|typeof)\\s*'; 234 235// CAVEAT: this does not properly handle the case where a regular 236// expression immediately follows another since a regular expression may 237// have flags for case-sensitivity and the like. Having regexp tokens 238// adjacent is not valid in any language I'm aware of, so I'm punting. 239// TODO: maybe style special characters inside a regexp as punctuation. 240 241 242 /** 243 * Given a group of {@link RegExp}s, returns a {@code RegExp} that globally 244 * matches the union of the sets of strings matched by the input RegExp. 245 * Since it matches globally, if the input strings have a start-of-input 246 * anchor (/^.../), it is ignored for the purposes of unioning. 247 * @param {Array.<RegExp>} regexs non multiline, non-global regexs. 248 * @return {RegExp} a global regex. 249 */ 250 function combinePrefixPatterns(regexs) { 251 var capturedGroupIndex = 0; 252 253 var needToFoldCase = false; 254 var ignoreCase = false; 255 for (var i = 0, n = regexs.length; i < n; ++i) { 256 var regex = regexs[i]; 257 if (regex.ignoreCase) { 258 ignoreCase = true; 259 } else if (/[a-z]/i.test(regex.source.replace( 260 /\\u[0-9a-f]{4}|\\x[0-9a-f]{2}|\\[^ux]/gi, ''))) { 261 needToFoldCase = true; 262 ignoreCase = false; 263 break; 264 } 265 } 266 267 var escapeCharToCodeUnit = { 268 'b': 8, 269 't': 9, 270 'n': 0xa, 271 'v': 0xb, 272 'f': 0xc, 273 'r': 0xd 274 }; 275 276 function decodeEscape(charsetPart) { 277 var cc0 = charsetPart.charCodeAt(0); 278 if (cc0 !== 92 /* \\ */) { 279 return cc0; 280 } 281 var c1 = charsetPart.charAt(1); 282 cc0 = escapeCharToCodeUnit[c1]; 283 if (cc0) { 284 return cc0; 285 } else if ('0' <= c1 && c1 <= '7') { 286 return parseInt(charsetPart.substring(1), 8); 287 } else if (c1 === 'u' || c1 === 'x') { 288 return parseInt(charsetPart.substring(2), 16); 289 } else { 290 return charsetPart.charCodeAt(1); 291 } 292 } 293 294 function encodeEscape(charCode) { 295 if (charCode < 0x20) { 296 return (charCode < 0x10 ? '\\x0' : '\\x') + charCode.toString(16); 297 } 298 var ch = String.fromCharCode(charCode); 299 return (ch === '\\' || ch === '-' || ch === ']' || ch === '^') 300 ? "\\" + ch : ch; 301 } 302 303 function caseFoldCharset(charSet) { 304 var charsetParts = charSet.substring(1, charSet.length - 1).match( 305 new RegExp( 306 '\\\\u[0-9A-Fa-f]{4}' 307 + '|\\\\x[0-9A-Fa-f]{2}' 308 + '|\\\\[0-3][0-7]{0,2}' 309 + '|\\\\[0-7]{1,2}' 310 + '|\\\\[\\s\\S]' 311 + '|-' 312 + '|[^-\\\\]', 313 'g')); 314 var ranges = []; 315 var inverse = charsetParts[0] === '^'; 316 317 var out = ['[']; 318 if (inverse) { out.push('^'); } 319 320 for (var i = inverse ? 1 : 0, n = charsetParts.length; i < n; ++i) { 321 var p = charsetParts[i]; 322 if (/\\[bdsw]/i.test(p)) { // Don't muck with named groups. 323 out.push(p); 324 } else { 325 var start = decodeEscape(p); 326 var end; 327 if (i + 2 < n && '-' === charsetParts[i + 1]) { 328 end = decodeEscape(charsetParts[i + 2]); 329 i += 2; 330 } else { 331 end = start; 332 } 333 ranges.push([start, end]); 334 // If the range might intersect letters, then expand it. 335 // This case handling is too simplistic. 336 // It does not deal with non-latin case folding. 337 // It works for latin source code identifiers though. 338 if (!(end < 65 || start > 122)) { 339 if (!(end < 65 || start > 90)) { 340 ranges.push([Math.max(65, start) | 32, Math.min(end, 90) | 32]); 341 } 342 if (!(end < 97 || start > 122)) { 343 ranges.push([Math.max(97, start) & ~32, Math.min(end, 122) & ~32]); 344 } 345 } 346 } 347 } 348 349 // [[1, 10], [3, 4], [8, 12], [14, 14], [16, 16], [17, 17]] 350 // -> [[1, 12], [14, 14], [16, 17]] 351 ranges.sort(function (a, b) { return (a[0] - b[0]) || (b[1] - a[1]); }); 352 var consolidatedRanges = []; 353 var lastRange = []; 354 for (var i = 0; i < ranges.length; ++i) { 355 var range = ranges[i]; 356 if (range[0] <= lastRange[1] + 1) { 357 lastRange[1] = Math.max(lastRange[1], range[1]); 358 } else { 359 consolidatedRanges.push(lastRange = range); 360 } 361 } 362 363 for (var i = 0; i < consolidatedRanges.length; ++i) { 364 var range = consolidatedRanges[i]; 365 out.push(encodeEscape(range[0])); 366 if (range[1] > range[0]) { 367 if (range[1] + 1 > range[0]) { out.push('-'); } 368 out.push(encodeEscape(range[1])); 369 } 370 } 371 out.push(']'); 372 return out.join(''); 373 } 374 375 function allowAnywhereFoldCaseAndRenumberGroups(regex) { 376 // Split into character sets, escape sequences, punctuation strings 377 // like ('(', '(?:', ')', '^'), and runs of characters that do not 378 // include any of the above. 379 var parts = regex.source.match( 380 new RegExp( 381 '(?:' 382 + '\\[(?:[^\\x5C\\x5D]|\\\\[\\s\\S])*\\]' // a character set 383 + '|\\\\u[A-Fa-f0-9]{4}' // a unicode escape 384 + '|\\\\x[A-Fa-f0-9]{2}' // a hex escape 385 + '|\\\\[0-9]+' // a back-reference or octal escape 386 + '|\\\\[^ux0-9]' // other escape sequence 387 + '|\\(\\?[:!=]' // start of a non-capturing group 388 + '|[\\(\\)\\^]' // start/end of a group, or line start 389 + '|[^\\x5B\\x5C\\(\\)\\^]+' // run of other characters 390 + ')', 391 'g')); 392 var n = parts.length; 393 394 // Maps captured group numbers to the number they will occupy in 395 // the output or to -1 if that has not been determined, or to 396 // undefined if they need not be capturing in the output. 397 var capturedGroups = []; 398 399 // Walk over and identify back references to build the capturedGroups 400 // mapping. 401 for (var i = 0, groupIndex = 0; i < n; ++i) { 402 var p = parts[i]; 403 if (p === '(') { 404 // groups are 1-indexed, so max group index is count of '(' 405 ++groupIndex; 406 } else if ('\\' === p.charAt(0)) { 407 var decimalValue = +p.substring(1); 408 if (decimalValue) { 409 if (decimalValue <= groupIndex) { 410 capturedGroups[decimalValue] = -1; 411 } else { 412 // Replace with an unambiguous escape sequence so that 413 // an octal escape sequence does not turn into a backreference 414 // to a capturing group from an earlier regex. 415 parts[i] = encodeEscape(decimalValue); 416 } 417 } 418 } 419 } 420 421 // Renumber groups and reduce capturing groups to non-capturing groups 422 // where possible. 423 for (var i = 1; i < capturedGroups.length; ++i) { 424 if (-1 === capturedGroups[i]) { 425 capturedGroups[i] = ++capturedGroupIndex; 426 } 427 } 428 for (var i = 0, groupIndex = 0; i < n; ++i) { 429 var p = parts[i]; 430 if (p === '(') { 431 ++groupIndex; 432 if (!capturedGroups[groupIndex]) { 433 parts[i] = '(?:'; 434 } 435 } else if ('\\' === p.charAt(0)) { 436 var decimalValue = +p.substring(1); 437 if (decimalValue && decimalValue <= groupIndex) { 438 parts[i] = '\\' + capturedGroups[decimalValue]; 439 } 440 } 441 } 442 443 // Remove any prefix anchors so that the output will match anywhere. 444 // ^^ really does mean an anchored match though. 445 for (var i = 0; i < n; ++i) { 446 if ('^' === parts[i] && '^' !== parts[i + 1]) { parts[i] = ''; } 447 } 448 449 // Expand letters to groups to handle mixing of case-sensitive and 450 // case-insensitive patterns if necessary. 451 if (regex.ignoreCase && needToFoldCase) { 452 for (var i = 0; i < n; ++i) { 453 var p = parts[i]; 454 var ch0 = p.charAt(0); 455 if (p.length >= 2 && ch0 === '[') { 456 parts[i] = caseFoldCharset(p); 457 } else if (ch0 !== '\\') { 458 // TODO: handle letters in numeric escapes. 459 parts[i] = p.replace( 460 /[a-zA-Z]/g, 461 function (ch) { 462 var cc = ch.charCodeAt(0); 463 return '[' + String.fromCharCode(cc & ~32, cc | 32) + ']'; 464 }); 465 } 466 } 467 } 468 469 return parts.join(''); 470 } 471 472 var rewritten = []; 473 for (var i = 0, n = regexs.length; i < n; ++i) { 474 var regex = regexs[i]; 475 if (regex.global || regex.multiline) { throw new Error('' + regex); } 476 rewritten.push( 477 '(?:' + allowAnywhereFoldCaseAndRenumberGroups(regex) + ')'); 478 } 479 480 return new RegExp(rewritten.join('|'), ignoreCase ? 'gi' : 'g'); 481 } 482 483 484 /** 485 * Split markup into a string of source code and an array mapping ranges in 486 * that string to the text nodes in which they appear. 487 * 488 * <p> 489 * The HTML DOM structure:</p> 490 * <pre> 491 * (Element "p" 492 * (Element "b" 493 * (Text "print ")) ; #1 494 * (Text "'Hello '") ; #2 495 * (Element "br") ; #3 496 * (Text " + 'World';")) ; #4 497 * </pre> 498 * <p> 499 * corresponds to the HTML 500 * {@code <p><b>print </b>'Hello '<br> + 'World';</p>}.</p> 501 * 502 * <p> 503 * It will produce the output:</p> 504 * <pre> 505 * { 506 * sourceCode: "print 'Hello '\n + 'World';", 507 * // 1 2 508 * // 012345678901234 5678901234567 509 * spans: [0, #1, 6, #2, 14, #3, 15, #4] 510 * } 511 * </pre> 512 * <p> 513 * where #1 is a reference to the {@code "print "} text node above, and so 514 * on for the other text nodes. 515 * </p> 516 * 517 * <p> 518 * The {@code} spans array is an array of pairs. Even elements are the start 519 * indices of substrings, and odd elements are the text nodes (or BR elements) 520 * that contain the text for those substrings. 521 * Substrings continue until the next index or the end of the source. 522 * </p> 523 * 524 * @param {Node} node an HTML DOM subtree containing source-code. 525 * @param {boolean} isPreformatted true if white-space in text nodes should 526 * be considered significant. 527 * @return {Object} source code and the text nodes in which they occur. 528 */ 529 function extractSourceSpans(node, isPreformatted) { 530 var nocode = /(?:^|\s)nocode(?:\s|$)/; 531 532 var chunks = []; 533 var length = 0; 534 var spans = []; 535 var k = 0; 536 537 function walk(node) { 538 switch (node.nodeType) { 539 case 1: // Element 540 if (nocode.test(node.className)) { return; } 541 for (var child = node.firstChild; child; child = child.nextSibling) { 542 walk(child); 543 } 544 var nodeName = node.nodeName.toLowerCase(); 545 if ('br' === nodeName || 'li' === nodeName) { 546 chunks[k] = '\n'; 547 spans[k << 1] = length++; 548 spans[(k++ << 1) | 1] = node; 549 } 550 break; 551 case 3: case 4: // Text 552 var text = node.nodeValue; 553 if (text.length) { 554 if (!isPreformatted) { 555 text = text.replace(/[ \t\r\n]+/g, ' '); 556 } else { 557 text = text.replace(/\r\n?/g, '\n'); // Normalize newlines. 558 text = text.replace(/^(\r?\n\s*)+/g, ''); // Remove leading newlines 559 text = text.replace(/^\s*/g, ''); // Remove leading spaces due to indented formatting 560 text = text.replace(/(\r?\n\s*)+$/g, ''); // Remove ending newlines 561 562 } 563 // TODO: handle tabs here? 564 chunks[k] = text; 565 spans[k << 1] = length; 566 length += text.length; 567 spans[(k++ << 1) | 1] = node; 568 } 569 break; 570 } 571 } 572 573 walk(node); 574 575 return { 576 sourceCode: chunks.join('').replace(/\n$/, ''), 577 spans: spans 578 }; 579 } 580 581 582 /** 583 * Apply the given language handler to sourceCode and add the resulting 584 * decorations to out. 585 * @param {number} basePos the index of sourceCode within the chunk of source 586 * whose decorations are already present on out. 587 */ 588 function appendDecorations(basePos, sourceCode, langHandler, out) { 589 if (!sourceCode) { return; } 590 var job = { 591 sourceCode: sourceCode, 592 basePos: basePos 593 }; 594 langHandler(job); 595 out.push.apply(out, job.decorations); 596 } 597 598 var notWs = /\S/; 599 600 /** 601 * Given an element, if it contains only one child element and any text nodes 602 * it contains contain only space characters, return the sole child element. 603 * Otherwise returns undefined. 604 * <p> 605 * This is meant to return the CODE element in {@code <pre><code ...>} when 606 * there is a single child element that contains all the non-space textual 607 * content, but not to return anything where there are multiple child elements 608 * as in {@code <pre><code>...</code><code>...</code></pre>} or when there 609 * is textual content. 610 */ 611 function childContentWrapper(element) { 612 var wrapper = undefined; 613 for (var c = element.firstChild; c; c = c.nextSibling) { 614 var type = c.nodeType; 615 wrapper = (type === 1) // Element Node 616 ? (wrapper ? element : c) 617 : (type === 3) // Text Node 618 ? (notWs.test(c.nodeValue) ? element : wrapper) 619 : wrapper; 620 } 621 return wrapper === element ? undefined : wrapper; 622 } 623 624 /** Given triples of [style, pattern, context] returns a lexing function, 625 * The lexing function interprets the patterns to find token boundaries and 626 * returns a decoration list of the form 627 * [index_0, style_0, index_1, style_1, ..., index_n, style_n] 628 * where index_n is an index into the sourceCode, and style_n is a style 629 * constant like PR_PLAIN. index_n-1 <= index_n, and style_n-1 applies to 630 * all characters in sourceCode[index_n-1:index_n]. 631 * 632 * The stylePatterns is a list whose elements have the form 633 * [style : string, pattern : RegExp, DEPRECATED, shortcut : string]. 634 * 635 * Style is a style constant like PR_PLAIN, or can be a string of the 636 * form 'lang-FOO', where FOO is a language extension describing the 637 * language of the portion of the token in $1 after pattern executes. 638 * E.g., if style is 'lang-lisp', and group 1 contains the text 639 * '(hello (world))', then that portion of the token will be passed to the 640 * registered lisp handler for formatting. 641 * The text before and after group 1 will be restyled using this decorator 642 * so decorators should take care that this doesn't result in infinite 643 * recursion. For example, the HTML lexer rule for SCRIPT elements looks 644 * something like ['lang-js', /<[s]cript>(.+?)<\/script>/]. This may match 645 * '<script>foo()<\/script>', which would cause the current decorator to 646 * be called with '<script>' which would not match the same rule since 647 * group 1 must not be empty, so it would be instead styled as PR_TAG by 648 * the generic tag rule. The handler registered for the 'js' extension would 649 * then be called with 'foo()', and finally, the current decorator would 650 * be called with '<\/script>' which would not match the original rule and 651 * so the generic tag rule would identify it as a tag. 652 * 653 * Pattern must only match prefixes, and if it matches a prefix, then that 654 * match is considered a token with the same style. 655 * 656 * Context is applied to the last non-whitespace, non-comment token 657 * recognized. 658 * 659 * Shortcut is an optional string of characters, any of which, if the first 660 * character, gurantee that this pattern and only this pattern matches. 661 * 662 * @param {Array} shortcutStylePatterns patterns that always start with 663 * a known character. Must have a shortcut string. 664 * @param {Array} fallthroughStylePatterns patterns that will be tried in 665 * order if the shortcut ones fail. May have shortcuts. 666 * 667 * @return {function (Object)} a 668 * function that takes source code and returns a list of decorations. 669 */ 670 function createSimpleLexer(shortcutStylePatterns, fallthroughStylePatterns) { 671 var shortcuts = {}; 672 var tokenizer; 673 (function () { 674 var allPatterns = shortcutStylePatterns.concat(fallthroughStylePatterns); 675 var allRegexs = []; 676 var regexKeys = {}; 677 for (var i = 0, n = allPatterns.length; i < n; ++i) { 678 var patternParts = allPatterns[i]; 679 var shortcutChars = patternParts[3]; 680 if (shortcutChars) { 681 for (var c = shortcutChars.length; --c >= 0;) { 682 shortcuts[shortcutChars.charAt(c)] = patternParts; 683 } 684 } 685 var regex = patternParts[1]; 686 var k = '' + regex; 687 if (!regexKeys.hasOwnProperty(k)) { 688 allRegexs.push(regex); 689 regexKeys[k] = null; 690 } 691 } 692 allRegexs.push(/[\0-\uffff]/); 693 tokenizer = combinePrefixPatterns(allRegexs); 694 })(); 695 696 var nPatterns = fallthroughStylePatterns.length; 697 698 /** 699 * Lexes job.sourceCode and produces an output array job.decorations of 700 * style classes preceded by the position at which they start in 701 * job.sourceCode in order. 702 * 703 * @param {Object} job an object like <pre>{ 704 * sourceCode: {string} sourceText plain text, 705 * basePos: {int} position of job.sourceCode in the larger chunk of 706 * sourceCode. 707 * }</pre> 708 */ 709 var decorate = function (job) { 710 var sourceCode = job.sourceCode, basePos = job.basePos; 711 /** Even entries are positions in source in ascending order. Odd enties 712 * are style markers (e.g., PR_COMMENT) that run from that position until 713 * the end. 714 * @type {Array.<number|string>} 715 */ 716 var decorations = [basePos, PR_PLAIN]; 717 var pos = 0; // index into sourceCode 718 var tokens = sourceCode.match(tokenizer) || []; 719 var styleCache = {}; 720 721 for (var ti = 0, nTokens = tokens.length; ti < nTokens; ++ti) { 722 var token = tokens[ti]; 723 var style = styleCache[token]; 724 var match = void 0; 725 726 var isEmbedded; 727 if (typeof style === 'string') { 728 isEmbedded = false; 729 } else { 730 var patternParts = shortcuts[token.charAt(0)]; 731 if (patternParts) { 732 match = token.match(patternParts[1]); 733 style = patternParts[0]; 734 } else { 735 for (var i = 0; i < nPatterns; ++i) { 736 patternParts = fallthroughStylePatterns[i]; 737 match = token.match(patternParts[1]); 738 if (match) { 739 style = patternParts[0]; 740 break; 741 } 742 } 743 744 if (!match) { // make sure that we make progress 745 style = PR_PLAIN; 746 } 747 } 748 749 isEmbedded = style.length >= 5 && 'lang-' === style.substring(0, 5); 750 if (isEmbedded && !(match && typeof match[1] === 'string')) { 751 isEmbedded = false; 752 style = PR_SOURCE; 753 } 754 755 if (!isEmbedded) { styleCache[token] = style; } 756 } 757 758 var tokenStart = pos; 759 pos += token.length; 760 761 if (!isEmbedded) { 762 decorations.push(basePos + tokenStart, style); 763 } else { // Treat group 1 as an embedded block of source code. 764 var embeddedSource = match[1]; 765 var embeddedSourceStart = token.indexOf(embeddedSource); 766 var embeddedSourceEnd = embeddedSourceStart + embeddedSource.length; 767 if (match[2]) { 768 // If embeddedSource can be blank, then it would match at the 769 // beginning which would cause us to infinitely recurse on the 770 // entire token, so we catch the right context in match[2]. 771 embeddedSourceEnd = token.length - match[2].length; 772 embeddedSourceStart = embeddedSourceEnd - embeddedSource.length; 773 } 774 var lang = style.substring(5); 775 // Decorate the left of the embedded source 776 appendDecorations( 777 basePos + tokenStart, 778 token.substring(0, embeddedSourceStart), 779 decorate, decorations); 780 // Decorate the embedded source 781 appendDecorations( 782 basePos + tokenStart + embeddedSourceStart, 783 embeddedSource, 784 langHandlerForExtension(lang, embeddedSource), 785 decorations); 786 // Decorate the right of the embedded section 787 appendDecorations( 788 basePos + tokenStart + embeddedSourceEnd, 789 token.substring(embeddedSourceEnd), 790 decorate, decorations); 791 } 792 } 793 job.decorations = decorations; 794 }; 795 return decorate; 796 } 797 798 /** returns a function that produces a list of decorations from source text. 799 * 800 * This code treats ", ', and ` as string delimiters, and \ as a string 801 * escape. It does not recognize perl's qq() style strings. 802 * It has no special handling for double delimiter escapes as in basic, or 803 * the tripled delimiters used in python, but should work on those regardless 804 * although in those cases a single string literal may be broken up into 805 * multiple adjacent string literals. 806 * 807 * It recognizes C, C++, and shell style comments. 808 * 809 * @param {Object} options a set of optional parameters. 810 * @return {function (Object)} a function that examines the source code 811 * in the input job and builds the decoration list. 812 */ 813 function sourceDecorator(options) { 814 var shortcutStylePatterns = [], fallthroughStylePatterns = []; 815 if (options['tripleQuotedStrings']) { 816 // '''multi-line-string''', 'single-line-string', and double-quoted 817 shortcutStylePatterns.push( 818 [PR_STRING, /^(?:\'\'\'(?:[^\'\\]|\\[\s\S]|\'{1,2}(?=[^\']))*(?:\'\'\'|$)|\"\"\"(?:[^\"\\]|\\[\s\S]|\"{1,2}(?=[^\"]))*(?:\"\"\"|$)|\'(?:[^\\\']|\\[\s\S])*(?:\'|$)|\"(?:[^\\\"]|\\[\s\S])*(?:\"|$))/, 819 null, '\'"']); 820 } else if (options['multiLineStrings']) { 821 // 'multi-line-string', "multi-line-string" 822 shortcutStylePatterns.push( 823 [PR_STRING, /^(?:\'(?:[^\\\']|\\[\s\S])*(?:\'|$)|\"(?:[^\\\"]|\\[\s\S])*(?:\"|$)|\`(?:[^\\\`]|\\[\s\S])*(?:\`|$))/, 824 null, '\'"`']); 825 } else { 826 // 'single-line-string', "single-line-string" 827 shortcutStylePatterns.push( 828 [PR_STRING, 829 /^(?:\'(?:[^\\\'\r\n]|\\.)*(?:\'|$)|\"(?:[^\\\"\r\n]|\\.)*(?:\"|$))/, 830 null, '"\'']); 831 } 832 if (options['verbatimStrings']) { 833 // verbatim-string-literal production from the C# grammar. See issue 93. 834 fallthroughStylePatterns.push( 835 [PR_STRING, /^@\"(?:[^\"]|\"\")*(?:\"|$)/, null]); 836 } 837 var hc = options['hashComments']; 838 if (hc) { 839 if (options['cStyleComments']) { 840 if (hc > 1) { // multiline hash comments 841 shortcutStylePatterns.push( 842 [PR_COMMENT, /^#(?:##(?:[^#]|#(?!##))*(?:###|$)|.*)/, null, '#']); 843 } else { 844 // Stop C preprocessor declarations at an unclosed open comment 845 shortcutStylePatterns.push( 846 [PR_COMMENT, /^#(?:(?:define|elif|else|endif|error|ifdef|include|ifndef|line|pragma|undef|warning)\b|[^\r\n]*)/, 847 null, '#']); 848 } 849 // #include <stdio.h> 850 fallthroughStylePatterns.push( 851 [PR_STRING, 852 /^<(?:(?:(?:\.\.\/)*|\/?)(?:[\w-]+(?:\/[\w-]+)+)?[\w-]+\.h(?:h|pp|\+\+)?|[a-z]\w*)>/, 853 null]); 854 } else { 855 shortcutStylePatterns.push([PR_COMMENT, /^#[^\r\n]*/, null, '#']); 856 } 857 } 858 if (options['cStyleComments']) { 859 fallthroughStylePatterns.push([PR_COMMENT, /^\/\/[^\r\n]*/, null]); 860 fallthroughStylePatterns.push( 861 [PR_COMMENT, /^\/\*[\s\S]*?(?:\*\/|$)/, null]); 862 } 863 if (options['regexLiterals']) { 864 /** 865 * @const 866 */ 867 var REGEX_LITERAL = ( 868 // A regular expression literal starts with a slash that is 869 // not followed by * or / so that it is not confused with 870 // comments. 871 '/(?=[^/*])' 872 // and then contains any number of raw characters, 873 + '(?:[^/\\x5B\\x5C]' 874 // escape sequences (\x5C), 875 + '|\\x5C[\\s\\S]' 876 // or non-nesting character sets (\x5B\x5D); 877 + '|\\x5B(?:[^\\x5C\\x5D]|\\x5C[\\s\\S])*(?:\\x5D|$))+' 878 // finally closed by a /. 879 + '/'); 880 fallthroughStylePatterns.push( 881 ['lang-regex', 882 new RegExp('^' + REGEXP_PRECEDER_PATTERN + '(' + REGEX_LITERAL + ')') 883 ]); 884 } 885 886 var types = options['types']; 887 if (types) { 888 fallthroughStylePatterns.push([PR_TYPE, types]); 889 } 890 891 if (options['strings']) { 892 var strings = ("" + options['strings']).replace(/^ | $/g, '').replace(/-/g, '\\-'); 893 fallthroughStylePatterns.push( 894 [PR_STRING, 895 new RegExp('(?:' + strings.replace(/[\s,]+/g, '|') + ')'), 896 , null] 897 ); 898 } 899 900 var keywords = ("" + options['keywords']).replace(/^ | $/g, ''); 901 if (keywords.length) { 902 fallthroughStylePatterns.push( 903 [PR_KEYWORD, 904 new RegExp('^(?:' + keywords.replace(/[\s,]+/g, '|') + ')\\b'), 905 null]); 906 } 907 908 shortcutStylePatterns.push([PR_PLAIN, /^\s+/, null, ' \r\n\t\xA0']); 909 if (options['httpdComments']) { 910 fallthroughStylePatterns.push( 911 [PR_PLAIN, /^.*\S.*#/i, null] 912 ); 913 } 914 915 fallthroughStylePatterns.push( 916 // TODO(mikesamuel): recognize non-latin letters and numerals in idents 917 [PR_LITERAL, /^@[a-z_$][a-z_$@0-9]*|\bNULL\b/i, null], 918 [PR_LITERAL, CONFIG_OPTIONS, null], 919 //[PR_STRING, CONFIG_ENVS, null], 920 [PR_TAG, /^\b(AuthzProviderAlias|AuthnProviderAlias|RequireAny|RequireAll|RequireNone|Directory|DirectoryMatch|Location|LocationMatch|VirtualHost|If|Else|ElseIf|Proxy\b|LoadBalancer|Files|FilesMatch|Limit|LimitExcept|IfDefine|IfModule|IfVersion)\b/, null], 921 [PR_TYPE, /^(?:[@_]?[A-Z]+[a-z][A-Za-z_$@0-9]*|\w+_(t|req|module)\b)/, null], 922 [PR_TAG, /^apr_[a-z_0-9]+|ap_[a-z_0-9]+/i, null], 923 [PR_PLAIN, /^[a-z_$][a-z_$@0-9\-]*/i, null], 924 [PR_LITERAL, 925 new RegExp( 926 '^(?:' 927 // A hex number 928 + '0x[a-f0-9]+' 929 // An IPv6 Address 930 + '|[a-f0-9:]+:[a-f0-9:]+:[a-f0-9:]+:[a-f0-9:]+:[a-f0-9:]+:[a-f0-9:]+' 931 // or an octal or decimal number, 932 + '|(?:\\d(?:_\\d+)*\\d*(?:\\.\\d*)?|\\.\\d\\+)' 933 // possibly in scientific notation 934 + '(?:e[+\\-]?\\d+)?' 935 + ')' 936 // with an optional modifier like UL for unsigned long 937 + '[a-z]*', 'i'), 938 null, '0123456789'], 939 // Don't treat escaped quotes in bash as starting strings. See issue 144. 940 [PR_PLAIN, /^\\[\s\S]?/, null], 941 [PR_PUNCTUATION, /^.[^\s\w\.$@\'\"\`\/\#\\]*/, null]); 942 943 return createSimpleLexer(shortcutStylePatterns, fallthroughStylePatterns); 944 } 945 946 var decorateSource = sourceDecorator({ 947 'keywords': ALL_KEYWORDS, 948 'hashComments': true, 949 'cStyleComments': true, 950 'multiLineStrings': true, 951 'regexLiterals': true 952 }); 953 954 /** 955 * Given a DOM subtree, wraps it in a list, and puts each line into its own 956 * list item. 957 * 958 * @param {Node} node modified in place. Its content is pulled into an 959 * HTMLOListElement, and each line is moved into a separate list item. 960 * This requires cloning elements, so the input might not have unique 961 * IDs after numbering. 962 * @param {boolean} isPreformatted true iff white-space in text nodes should 963 * be treated as significant. 964 */ 965 function numberLines(node, opt_startLineNum, isPreformatted) { 966 var nocode = /(?:^|\s)nocode(?:\s|$)/; 967 var lineBreak = /\r\n?|\n/; 968 969 var document = node.ownerDocument; 970 971 var li = document.createElement('li'); 972 while (node.firstChild) { 973 li.appendChild(node.firstChild); 974 } 975 // An array of lines. We split below, so this is initialized to one 976 // un-split line. 977 var listItems = [li]; 978 979 function walk(node) { 980 switch (node.nodeType) { 981 case 1: // Element 982 if (nocode.test(node.className)) { break; } 983 if ('br' === node.nodeName) { 984 breakAfter(node); 985 // Discard the <BR> since it is now flush against a </LI>. 986 if (node.parentNode) { 987 node.parentNode.removeChild(node); 988 } 989 } else { 990 for (var child = node.firstChild; child; child = child.nextSibling) { 991 walk(child); 992 } 993 } 994 break; 995 case 3: case 4: // Text 996 if (isPreformatted) { 997 var text = node.nodeValue; 998 var match = text.match(lineBreak); 999 if (match) { 1000 var firstLine = text.substring(0, match.index); 1001 node.nodeValue = firstLine; 1002 var tail = text.substring(match.index + match[0].length); 1003 if (tail) { 1004 var parent = node.parentNode; 1005 parent.insertBefore( 1006 document.createTextNode(tail), node.nextSibling); 1007 } 1008 breakAfter(node); 1009 if (!firstLine) { 1010 // Don't leave blank text nodes in the DOM. 1011 node.parentNode.removeChild(node); 1012 } 1013 } 1014 } 1015 break; 1016 } 1017 } 1018 1019 // Split a line after the given node. 1020 function breakAfter(lineEndNode) { 1021 // If there's nothing to the right, then we can skip ending the line 1022 // here, and move root-wards since splitting just before an end-tag 1023 // would require us to create a bunch of empty copies. 1024 while (!lineEndNode.nextSibling) { 1025 lineEndNode = lineEndNode.parentNode; 1026 if (!lineEndNode) { return; } 1027 } 1028 1029 function breakLeftOf(limit, copy) { 1030 // Clone shallowly if this node needs to be on both sides of the break. 1031 var rightSide = copy ? limit.cloneNode(false) : limit; 1032 var parent = limit.parentNode; 1033 if (parent) { 1034 // We clone the parent chain. 1035 // This helps us resurrect important styling elements that cross lines. 1036 // E.g. in <i>Foo<br>Bar</i> 1037 // should be rewritten to <li><i>Foo</i></li><li><i>Bar</i></li>. 1038 var parentClone = breakLeftOf(parent, 1); 1039 // Move the clone and everything to the right of the original 1040 // onto the cloned parent. 1041 var next = limit.nextSibling; 1042 parentClone.appendChild(rightSide); 1043 for (var sibling = next; sibling; sibling = next) { 1044 next = sibling.nextSibling; 1045 parentClone.appendChild(sibling); 1046 } 1047 } 1048 return rightSide; 1049 } 1050 1051 var copiedListItem = breakLeftOf(lineEndNode.nextSibling, 0); 1052 1053 // Walk the parent chain until we reach an unattached LI. 1054 for (var parent; 1055 // Check nodeType since IE invents document fragments. 1056 (parent = copiedListItem.parentNode) && parent.nodeType === 1;) { 1057 copiedListItem = parent; 1058 } 1059 // Put it on the list of lines for later processing. 1060 listItems.push(copiedListItem); 1061 } 1062 1063 // Split lines while there are lines left to split. 1064 for (var i = 0; // Number of lines that have been split so far. 1065 i < listItems.length; // length updated by breakAfter calls. 1066 ++i) { 1067 walk(listItems[i]); 1068 } 1069 1070 // Make sure numeric indices show correctly. 1071 if (opt_startLineNum === (opt_startLineNum|0)) { 1072 listItems[0].setAttribute('value', opt_startLineNum); 1073 } 1074 1075 var ol = document.createElement('ol'); 1076 ol.className = 'linenums'; 1077 var offset = Math.max(0, ((opt_startLineNum - 1 /* zero index */)) | 0) || 0; 1078 for (var i = 0, n = listItems.length; i < n; ++i) { 1079 li = listItems[i]; 1080 // Stick a class on the LIs so that stylesheets can 1081 // color odd/even rows, or any other row pattern that 1082 // is co-prime with 10. 1083 li.className = 'L' + ((i + offset) % 1); 1084 if (!li.firstChild) { 1085 li.appendChild(document.createTextNode('\xA0')); 1086 } 1087 ol.appendChild(li); 1088 } 1089 1090 node.appendChild(ol); 1091 } 1092 1093 /** 1094 * Breaks {@code job.sourceCode} around style boundaries in 1095 * {@code job.decorations} and modifies {@code job.sourceNode} in place. 1096 * @param {Object} job like <pre>{ 1097 * sourceCode: {string} source as plain text, 1098 * spans: {Array.<number|Node>} alternating span start indices into source 1099 * and the text node or element (e.g. {@code <BR>}) corresponding to that 1100 * span. 1101 * decorations: {Array.<number|string} an array of style classes preceded 1102 * by the position at which they start in job.sourceCode in order 1103 * }</pre> 1104 * @private 1105 */ 1106 function recombineTagsAndDecorations(job) { 1107 var isIE8OrEarlier = /\bMSIE\s(\d+)/.exec(navigator.userAgent); 1108 isIE8OrEarlier = isIE8OrEarlier && +isIE8OrEarlier[1] <= 8; 1109 var newlineRe = /\n/g; 1110 1111 var source = job.sourceCode; 1112 var sourceLength = source.length; 1113 // Index into source after the last code-unit recombined. 1114 var sourceIndex = 0; 1115 1116 var spans = job.spans; 1117 var nSpans = spans.length; 1118 // Index into spans after the last span which ends at or before sourceIndex. 1119 var spanIndex = 0; 1120 1121 var decorations = job.decorations; 1122 var nDecorations = decorations.length; 1123 // Index into decorations after the last decoration which ends at or before 1124 // sourceIndex. 1125 var decorationIndex = 0; 1126 1127 // Remove all zero-length decorations. 1128 decorations[nDecorations] = sourceLength; 1129 var decPos, i; 1130 for (i = decPos = 0; i < nDecorations;) { 1131 if (decorations[i] !== decorations[i + 2]) { 1132 decorations[decPos++] = decorations[i++]; 1133 decorations[decPos++] = decorations[i++]; 1134 } else { 1135 i += 2; 1136 } 1137 } 1138 nDecorations = decPos; 1139 1140 // Simplify decorations. 1141 for (i = decPos = 0; i < nDecorations;) { 1142 var startPos = decorations[i]; 1143 // Conflate all adjacent decorations that use the same style. 1144 var startDec = decorations[i + 1]; 1145 var end = i + 2; 1146 while (end + 2 <= nDecorations && decorations[end + 1] === startDec) { 1147 end += 2; 1148 } 1149 decorations[decPos++] = startPos; 1150 decorations[decPos++] = startDec; 1151 i = end; 1152 } 1153 1154 nDecorations = decorations.length = decPos; 1155 1156 var sourceNode = job.sourceNode; 1157 var oldDisplay; 1158 if (sourceNode) { 1159 oldDisplay = sourceNode.style.display; 1160 sourceNode.style.display = 'none'; 1161 } 1162 try { 1163 var decoration = null; 1164 var X = 0; 1165 while (spanIndex < nSpans) { 1166 X = X + 1; 1167 if (X > 5000) { break; } 1168 var spanStart = spans[spanIndex]; 1169 var spanEnd = spans[spanIndex + 2] || sourceLength; 1170 1171 var decEnd = decorations[decorationIndex + 2] || sourceLength; 1172 1173 var end = Math.min(spanEnd, decEnd); 1174 1175 var textNode = spans[spanIndex + 1]; 1176 var styledText; 1177 if (textNode.nodeType !== 1 // Don't muck with <BR>s or <LI>s 1178 // Don't introduce spans around empty text nodes. 1179 && (styledText = source.substring(sourceIndex, end))) { 1180 // This may seem bizarre, and it is. Emitting LF on IE causes the 1181 // code to display with spaces instead of line breaks. 1182 // Emitting Windows standard issue linebreaks (CRLF) causes a blank 1183 // space to appear at the beginning of every line but the first. 1184 // Emitting an old Mac OS 9 line separator makes everything spiffy. 1185 if (isIE8OrEarlier) { 1186 styledText = styledText.replace(newlineRe, '\r'); 1187 } 1188 textNode.nodeValue = styledText; 1189 var document = textNode.ownerDocument; 1190 var span = document.createElement('span'); 1191 span.className = decorations[decorationIndex + 1]; 1192 var parentNode = textNode.parentNode; 1193 parentNode.replaceChild(span, textNode); 1194 span.appendChild(textNode); 1195 if (sourceIndex < spanEnd) { // Split off a text node. 1196 spans[spanIndex + 1] = textNode 1197 // TODO: Possibly optimize by using '' if there's no flicker. 1198 = document.createTextNode(source.substring(end, spanEnd)); 1199 parentNode.insertBefore(textNode, span.nextSibling); 1200 } 1201 } 1202 1203 sourceIndex = end; 1204 1205 if (sourceIndex >= spanEnd) { 1206 spanIndex += 2; 1207 } 1208 if (sourceIndex >= decEnd) { 1209 decorationIndex += 2; 1210 } 1211 } 1212 } finally { 1213 if (sourceNode) { 1214 sourceNode.style.display = oldDisplay; 1215 } 1216 } 1217 } 1218 1219 1220 /** Maps language-specific file extensions to handlers. */ 1221 var langHandlerRegistry = {}; 1222 /** Register a language handler for the given file extensions. 1223 * @param {function (Object)} handler a function from source code to a list 1224 * of decorations. Takes a single argument job which describes the 1225 * state of the computation. The single parameter has the form 1226 * {@code { 1227 * sourceCode: {string} as plain text. 1228 * decorations: {Array.<number|string>} an array of style classes 1229 * preceded by the position at which they start in 1230 * job.sourceCode in order. 1231 * The language handler should assigned this field. 1232 * basePos: {int} the position of source in the larger source chunk. 1233 * All positions in the output decorations array are relative 1234 * to the larger source chunk. 1235 * } } 1236 * @param {Array.<string>} fileExtensions 1237 */ 1238 function registerLangHandler(handler, fileExtensions) { 1239 for (var i = fileExtensions.length; --i >= 0;) { 1240 var ext = fileExtensions[i]; 1241 if (!langHandlerRegistry.hasOwnProperty(ext)) { 1242 langHandlerRegistry[ext] = handler; 1243 } else if (win['console']) { 1244 console['warn']('cannot override language handler %s', ext); 1245 } 1246 } 1247 } 1248 function langHandlerForExtension(extension, source) { 1249 if (!(extension && langHandlerRegistry.hasOwnProperty(extension))) { 1250 // Treat it as markup if the first non whitespace character is a < and 1251 // the last non-whitespace character is a >. 1252 extension = /^\s*</.test(source) 1253 ? 'default-markup' 1254 : 'default-code'; 1255 } 1256 return langHandlerRegistry[extension]; 1257 } 1258 registerLangHandler(decorateSource, ['default-code']); 1259 registerLangHandler( 1260 createSimpleLexer( 1261 [], 1262 [ 1263 [PR_PLAIN, /^[^<?]+/], 1264 [PR_DECLARATION, /^<!\w[^>]*(?:>|$)/], 1265 [PR_COMMENT, /^<\!--[\s\S]*?(?:-\->|$)/], 1266 // Unescaped content in an unknown language 1267 ['lang-', /^<\?([\s\S]+?)(?:\?>|$)/], 1268 ['lang-', /^<%([\s\S]+?)(?:%>|$)/], 1269 [PR_PUNCTUATION, /^(?:<[%?]|[%?]>)/], 1270 ['lang-', /^<xmp\b[^>]*>([\s\S]+?)<\/xmp\b[^>]*>/i], 1271 // Unescaped content in javascript. (Or possibly vbscript). 1272 ['lang-js', /^<script\b[^>]*>([\s\S]*?)(<\/script\b[^>]*>)/i], 1273 // Contains unescaped stylesheet content 1274 ['lang-css', /^<style\b[^>]*>([\s\S]*?)(<\/style\b[^>]*>)/i], 1275 ['lang-in.tag', /^(<\/?[a-z][^<>]*>)/i] 1276 ]), 1277 ['default-markup', 'htm', 'html', 'mxml', 'xhtml', 'xml', 'xsl']); 1278 registerLangHandler( 1279 createSimpleLexer( 1280 [ 1281 [PR_PLAIN, /^[\s]+/, null, ' \t\r\n'], 1282 [PR_ATTRIB_VALUE, /^(?:\"[^\"]*\"?|\'[^\']*\'?)/, null, '\"\''] 1283 ], 1284 [ 1285 [PR_TAG, /^^<\/?[a-z](?:[\w.:-]*\w)?|\/?>$/i], 1286 [PR_ATTRIB_NAME, /^(?!style[\s=]|on)[a-z](?:[\w:-]*\w)?/i], 1287 ['lang-uq.val', /^=\s*([^>\'\"\s]*(?:[^>\'\"\s\/]|\/(?=\s)))/], 1288 [PR_PUNCTUATION, /^[=<>\/]+/], 1289 ['lang-js', /^on\w+\s*=\s*\"([^\"]+)\"/i], 1290 ['lang-js', /^on\w+\s*=\s*\'([^\']+)\'/i], 1291 ['lang-js', /^on\w+\s*=\s*([^\"\'>\s]+)/i], 1292 ['lang-css', /^style\s*=\s*\"([^\"]+)\"/i], 1293 ['lang-css', /^style\s*=\s*\'([^\']+)\'/i], 1294 ['lang-css', /^style\s*=\s*([^\"\'>\s]+)/i] 1295 ]), 1296 ['in.tag']); 1297 registerLangHandler( 1298 createSimpleLexer([], [[PR_ATTRIB_VALUE, /^[\s\S]+/]]), ['uq.val']); 1299 registerLangHandler(sourceDecorator({ 1300 'keywords': CPP_KEYWORDS, 1301 'hashComments': true, 1302 'cStyleComments': true, 1303 'types': C_TYPES 1304 }), ['c', 'cc', 'cpp', 'cxx', 'cyc', 'm']); 1305 registerLangHandler(sourceDecorator({ 1306 'keywords': PHP_KEYWORDS, 1307 'hashComments': false, 1308 'cStyleComments': true, 1309 'multiLineStrings': true, 1310 'regexLiterals': true 1311// 'types': C_TYPES, 1312 }), ['php', 'phtml', 'inc']); 1313 registerLangHandler(sourceDecorator({ 1314 'keywords': 'null,true,false' 1315 }), ['json']); 1316 registerLangHandler(sourceDecorator({ 1317 'keywords': CSHARP_KEYWORDS, 1318 'hashComments': true, 1319 'cStyleComments': true, 1320 'verbatimStrings': true, 1321 'types': C_TYPES 1322 }), ['cs']); 1323 registerLangHandler(sourceDecorator({ 1324 'keywords': JAVA_KEYWORDS, 1325 'cStyleComments': true 1326 }), ['java']); 1327 registerLangHandler(sourceDecorator({ 1328 'keywords': SH_KEYWORDS, 1329 'hashComments': true, 1330 'multiLineStrings': true 1331 }), ['bsh', 'csh', 'sh']); 1332 registerLangHandler(sourceDecorator({ 1333 'keywords': PYTHON_KEYWORDS, 1334 'hashComments': true, 1335 'multiLineStrings': true, 1336 'tripleQuotedStrings': true 1337 }), ['cv', 'py']); 1338 registerLangHandler(sourceDecorator({ 1339 'keywords': PERL_KEYWORDS, 1340 'hashComments': true, 1341 'multiLineStrings': true, 1342 'regexLiterals': true 1343 }), ['perl', 'pl', 'pm']); 1344 registerLangHandler(sourceDecorator({ 1345 'keywords': RUBY_KEYWORDS, 1346 'hashComments': true, 1347 'multiLineStrings': true, 1348 'regexLiterals': true 1349 }), ['rb']); 1350 registerLangHandler(sourceDecorator({ 1351 'keywords': JSCRIPT_KEYWORDS, 1352 'cStyleComments': true, 1353 'regexLiterals': true 1354 }), ['js']); 1355 registerLangHandler(sourceDecorator({ 1356 'keywords': COFFEE_KEYWORDS, 1357 'hashComments': 3, // ### style block comments 1358 'cStyleComments': true, 1359 'multilineStrings': true, 1360 'tripleQuotedStrings': true, 1361 'regexLiterals': true 1362 }), ['coffee']); 1363 registerLangHandler( 1364 createSimpleLexer([], [[PR_STRING, /^[\s\S]+/]]), ['regex']); 1365 registerLangHandler(sourceDecorator({ 1366 'keywords': CONFIG_KEYWORDS, 1367 'literals': CONFIG_OPTIONS, 1368 'strings': CONFIG_ENVS, 1369 'hashComments': true, 1370 'cStyleComments': false, 1371 'multiLineStrings': false, 1372 'regexLiterals': false, 1373 'httpdComments': true 1374 }), ['config']); 1375 1376 function applyDecorator(job) { 1377 var opt_langExtension = job.langExtension; 1378 1379 try { 1380 // Extract tags, and convert the source code to plain text. 1381 var sourceAndSpans = extractSourceSpans(job.sourceNode, job.pre); 1382 /** Plain text. @type {string} */ 1383 var source = sourceAndSpans.sourceCode; 1384 job.sourceCode = source; 1385 job.spans = sourceAndSpans.spans; 1386 job.basePos = 0; 1387 1388 // Apply the appropriate language handler 1389 langHandlerForExtension(opt_langExtension, source)(job); 1390 1391 // Integrate the decorations and tags back into the source code, 1392 // modifying the sourceNode in place. 1393 recombineTagsAndDecorations(job); 1394 } catch (e) { 1395 if (win['console']) { 1396 console['log'](e && e['stack'] ? e['stack'] : e); 1397 } 1398 } 1399 } 1400 1401 /** 1402 * @param sourceCodeHtml {string} The HTML to pretty print. 1403 * @param opt_langExtension {string} The language name to use. 1404 * Typically, a filename extension like 'cpp' or 'java'. 1405 * @param opt_numberLines {number|boolean} True to number lines, 1406 * or the 1-indexed number of the first line in sourceCodeHtml. 1407 */ 1408 function prettyPrintOne(sourceCodeHtml, opt_langExtension, opt_numberLines) { 1409 var container = document.createElement('pre'); 1410 // This could cause images to load and onload listeners to fire. 1411 // E.g. <img onerror="alert(1337)" src="nosuchimage.png">. 1412 // We assume that the inner HTML is from a trusted source. 1413 container.innerHTML = sourceCodeHtml; 1414 if (opt_numberLines) { 1415 numberLines(container, opt_numberLines, true); 1416 } 1417 1418 var job = { 1419 langExtension: opt_langExtension, 1420 numberLines: opt_numberLines, 1421 sourceNode: container, 1422 pre: 1 1423 }; 1424 applyDecorator(job); 1425 return container.innerHTML; 1426 } 1427 1428 function prettyPrint(opt_whenDone) { 1429 function byTagName(tn) { return document.getElementsByTagName(tn); } 1430 // fetch a list of nodes to rewrite 1431 var codeSegments = [byTagName('pre'), byTagName('code'), byTagName('xmp')]; 1432 var elements = []; 1433 for (var i = 0; i < codeSegments.length; ++i) { 1434 for (var j = 0, n = codeSegments[i].length; j < n; ++j) { 1435 elements.push(codeSegments[i][j]); 1436 } 1437 } 1438 codeSegments = null; 1439 1440 var clock = Date; 1441 if (!clock['now']) { 1442 clock = { 'now': function () { return +(new Date); } }; 1443 } 1444 1445 // The loop is broken into a series of continuations to make sure that we 1446 // don't make the browser unresponsive when rewriting a large page. 1447 var k = 0; 1448 var prettyPrintingJob; 1449 1450 var langExtensionRe = /\blang(?:uage)?-([\w.]+)(?!\S)/; 1451 var prettyPrintRe = /\bprettyprint\b/; 1452 var prettyPrintedRe = /\bprettyprinted\b/; 1453 var preformattedTagNameRe = /pre|xmp/i; 1454 var codeRe = /^code$/i; 1455 var preCodeXmpRe = /^(?:pre|code|xmp)$/i; 1456 1457 function doWork() { 1458 var endTime = (win['PR_SHOULD_USE_CONTINUATION'] ? 1459 clock['now']() + 250 /* ms */ : 1460 Infinity); 1461 for (; k < elements.length && clock['now']() < endTime; k++) { 1462 var cs = elements[k]; 1463 var className = cs.className; 1464 if (prettyPrintRe.test(className) 1465 // Don't redo this if we've already done it. 1466 // This allows recalling pretty print to just prettyprint elements 1467 // that have been added to the page since last call. 1468 && !prettyPrintedRe.test(className)) { 1469 1470 // make sure this is not nested in an already prettified element 1471 var nested = false; 1472 for (var p = cs.parentNode; p; p = p.parentNode) { 1473 var tn = p.tagName; 1474 if (preCodeXmpRe.test(tn) 1475 && p.className && prettyPrintRe.test(p.className)) { 1476 nested = true; 1477 break; 1478 } 1479 } 1480 if (!nested) { 1481 // Mark done. If we fail to prettyprint for whatever reason, 1482 // we shouldn't try again. 1483 cs.className += ' prettyprinted'; 1484 1485 // If the classes includes a language extensions, use it. 1486 // Language extensions can be specified like 1487 // <pre class="prettyprint lang-cpp"> 1488 // the language extension "cpp" is used to find a language handler 1489 // as passed to PR.registerLangHandler. 1490 // HTML5 recommends that a language be specified using "language-" 1491 // as the prefix instead. Google Code Prettify supports both. 1492 // http://dev.w3.org/html5/spec-author-view/the-code-element.html 1493 var langExtension = className.match(langExtensionRe); 1494 // Support <pre class="prettyprint"><code class="language-c"> 1495 var wrapper; 1496 if (!langExtension && (wrapper = childContentWrapper(cs)) 1497 && codeRe.test(wrapper.tagName)) { 1498 langExtension = wrapper.className.match(langExtensionRe); 1499 } 1500 1501 if (langExtension) { langExtension = langExtension[1]; } 1502 1503 var preformatted; 1504 if (preformattedTagNameRe.test(cs.tagName)) { 1505 preformatted = 1; 1506 } else { 1507 var currentStyle = cs['currentStyle']; 1508 var whitespace = ( 1509 currentStyle 1510 ? currentStyle['whiteSpace'] 1511 : (document.defaultView 1512 && document.defaultView.getComputedStyle) 1513 ? document.defaultView.getComputedStyle(cs, null) 1514 .getPropertyValue('white-space') 1515 : 0); 1516 preformatted = whitespace 1517 && 'pre' === whitespace.substring(0, 3); 1518 } 1519 1520 // Look for a class like linenums or linenums:<n> where <n> is the 1521 // 1-indexed number of the first line. 1522 var lineNums = cs.className.match(/\blinenums\b(?::(\d+))?/); 1523 lineNums = lineNums 1524 ? lineNums[1] && lineNums[1].length ? +lineNums[1] : true 1525 : false; 1526 if (lineNums) { numberLines(cs, lineNums, preformatted); } 1527 1528 // do the pretty printing 1529 prettyPrintingJob = { 1530 langExtension: langExtension, 1531 sourceNode: cs, 1532 numberLines: lineNums, 1533 pre: preformatted 1534 }; 1535 applyDecorator(prettyPrintingJob); 1536 } 1537 } 1538 } 1539 if (k < elements.length) { 1540 // finish up in a continuation 1541 setTimeout(doWork, 250); 1542 } else if (opt_whenDone) { 1543 opt_whenDone(); 1544 } 1545 } 1546 1547 doWork(); 1548 } 1549 1550 /** 1551 * Contains functions for creating and registering new language handlers. 1552 * @type {Object} 1553 */ 1554 var PR = win['PR'] = { 1555 'createSimpleLexer': createSimpleLexer, 1556 'registerLangHandler': registerLangHandler, 1557 'sourceDecorator': sourceDecorator, 1558 'PR_ATTRIB_NAME': PR_ATTRIB_NAME, 1559 'PR_ATTRIB_VALUE': PR_ATTRIB_VALUE, 1560 'PR_COMMENT': PR_COMMENT, 1561 'PR_DECLARATION': PR_DECLARATION, 1562 'PR_KEYWORD': PR_KEYWORD, 1563 'PR_LITERAL': PR_LITERAL, 1564 'PR_NOCODE': PR_NOCODE, 1565 'PR_PLAIN': PR_PLAIN, 1566 'PR_PUNCTUATION': PR_PUNCTUATION, 1567 'PR_SOURCE': PR_SOURCE, 1568 'PR_STRING': PR_STRING, 1569 'PR_TAG': PR_TAG, 1570 'PR_TYPE': PR_TYPE, 1571 'prettyPrintOne': win['prettyPrintOne'] = prettyPrintOne, 1572 'prettyPrint': win['prettyPrint'] = prettyPrint 1573 }; 1574 1575 1576/* Register Lua syntaxes */ 1577 PR['registerLangHandler']( 1578 PR['createSimpleLexer']( 1579 [ 1580 // Whitespace 1581 [PR['PR_PLAIN'], /^[\t\n\r \xA0]+/, null, '\t\n\r \xA0'], 1582 // A double or single quoted, possibly multi-line, string. 1583 [PR['PR_STRING'], /^(?:\"(?:[^\"\\]|\\[\s\S])*(?:\"|$)|\'(?:[^\'\\]|\\[\s\S])*(?:\'|$))/, null, '"\''] 1584 ], 1585 [ 1586 // A comment is either a line comment that starts with two dashes, or 1587 // two dashes preceding a long bracketed block. 1588 [PR['PR_COMMENT'], /^--(?:\[(=*)\[[\s\S]*?(?:\]\1\]|$)|[^\r\n]*)/], 1589 [PR['PR_TYPE'], /^nil|false|true/], 1590 // A long bracketed block not preceded by -- is a string. 1591 [PR['PR_STRING'], /^\[(=*)\[[\s\S]*?(?:\]\1\]|$)/], 1592 [PR['PR_KEYWORD'], /^(?:and|break|do|else|elseif|end|for|function|if|in|local|not|or|repeat|require|return|then|until|while)\b/, null], 1593 // A number is a hex integer literal, a decimal real literal, or in 1594 // scientific notation. 1595 [PR['PR_LITERAL'], 1596 /^[+-]?(?:0x[\da-f]+|(?:(?:\.\d+|\d+(?:\.\d*)?)(?:e[+\-]?\d+)?))/i], 1597 // An identifier 1598 [PR['PR_PLAIN'], /^[a-z_]\w*/i], 1599 // A run of punctuation 1600 [PR['PR_PUNCTUATION'], /^[^\w\t\n\r \xA0][^\w\t\n\r \xA0\"\'\-\+=]*/] 1601 ]), 1602 ['lua']); 1603 1604 1605 // Make PR available via the Asynchronous Module Definition (AMD) API. 1606 // Per https://github.com/amdjs/amdjs-api/wiki/AMD: 1607 // The Asynchronous Module Definition (AMD) API specifies a 1608 // mechanism for defining modules such that the module and its 1609 // dependencies can be asynchronously loaded. 1610 // ... 1611 // To allow a clear indicator that a global define function (as 1612 // needed for script src browser loading) conforms to the AMD API, 1613 // any global define function SHOULD have a property called "amd" 1614 // whose value is an object. This helps avoid conflict with any 1615 // other existing JavaScript code that could have defined a define() 1616 // function that does not conform to the AMD API. 1617 if (typeof define === "function" && define['amd']) { 1618 define("google-code-prettify", [], function () { 1619 return PR; 1620 }); 1621 } 1622})(); 1623