1#!/usr/bin/env python 2# 3# 4# Licensed to the Apache Software Foundation (ASF) under one 5# or more contributor license agreements. See the NOTICE file 6# distributed with this work for additional information 7# regarding copyright ownership. The ASF licenses this file 8# to you under the Apache License, Version 2.0 (the 9# "License"); you may not use this file except in compliance 10# with the License. You may obtain a copy of the License at 11# 12# http://www.apache.org/licenses/LICENSE-2.0 13# 14# Unless required by applicable law or agreed to in writing, 15# software distributed under the License is distributed on an 16# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 17# KIND, either express or implied. See the License for the 18# specific language governing permissions and limitations 19# under the License. 20# 21# 22"""getctype.py - Generate the svn_ctype character classification table. 23""" 24 25# Table of ASCII character names 26names = ('nul', 'soh', 'stx', 'etx', 'eot', 'enq', 'ack', 'bel', 27 'bs', 'ht', 'nl', 'vt', 'np', 'cr', 'so', 'si', 28 'dle', 'dc1', 'dc2', 'dc3', 'dc4', 'nak', 'syn', 'etb', 29 'can', 'em', 'sub', 'esc', 'fs', 'gs', 'rs', 'us', 30 'sp', '!', '"', '#', '$', '%', '&', '\'', 31 '(', ')', '*', '+', ',', '-', '.', '/', 32 '0', '1', '2', '3', '4', '5', '6', '7', 33 '8', '9', ':', ';', '<', '=', '>', '?', 34 '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 35 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 36 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 37 'X', 'Y', 'Z', '[', '\\', ']', '^', '_', 38 '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 39 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 40 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 41 'x', 'y', 'z', '{', '|', '}', '~', 'del') 42 43# All whitespace characters: 44# horizontal tab, vertical tab, new line, form feed, carriage return, space 45whitespace = (9, 10, 11, 12, 13, 32) 46 47# Bytes not valid in UTF-8 sequences 48utf8_invalid = (0xE0, 0xF0, 0xF8, 0xFC, 0xFE, 0xFF) 49 50print(' /* **** DO NOT EDIT! ****') 51print(' This table was generated by genctype.py, make changes there. */') 52 53for c in range(256): 54 bits = [] 55 56 # Ascii subrange 57 if c < 128: 58 bits.append('SVN_CTYPE_ASCII') 59 60 if len(names[c]) == 1: 61 name = names[c].center(3) 62 else: 63 name = names[c].ljust(3) 64 65 # Control characters 66 if c < 32 or c == 127: 67 bits.append('SVN_CTYPE_CNTRL') 68 69 # Whitespace characters 70 if c in whitespace: 71 bits.append('SVN_CTYPE_SPACE') 72 73 # Punctuation marks 74 if c >= 33 and c < 48 \ 75 or c >= 58 and c < 65 \ 76 or c >= 91 and c < 97 \ 77 or c >= 123 and c < 127: 78 bits.append('SVN_CTYPE_PUNCT') 79 80 # Decimal digits 81 elif c >= 48 and c < 58: 82 bits.append('SVN_CTYPE_DIGIT') 83 84 # Uppercase letters 85 elif c >= 65 and c < 91: 86 bits.append('SVN_CTYPE_UPPER') 87 # Hexadecimal digits 88 if c <= 70: 89 bits.append('SVN_CTYPE_XALPHA') 90 91 # Lowercase letters 92 elif c >= 97 and c < 123: 93 bits.append('SVN_CTYPE_LOWER') 94 # Hexadecimal digits 95 if c <= 102: 96 bits.append('SVN_CTYPE_XALPHA') 97 98 # UTF-8 multibyte sequences 99 else: 100 name = hex(c)[1:] 101 102 # Lead bytes (start of sequence) 103 if c > 0xC0 and c < 0xFE and c not in utf8_invalid: 104 bits.append('SVN_CTYPE_UTF8LEAD') 105 106 # Continuation bytes 107 elif (c & 0xC0) == 0x80: 108 bits.append('SVN_CTYPE_UTF8CONT') 109 110 if len(bits) == 0: 111 flags = '0' 112 else: 113 flags = ' | '.join(bits) 114 print(' /* %s */ %s,' % (name, flags)) 115