diff options
| author | Michele Calgaro <michele.calgaro@yahoo.it> | 2023-12-01 12:38:43 +0900 | 
|---|---|---|
| committer | Michele Calgaro <michele.calgaro@yahoo.it> | 2023-12-01 12:38:43 +0900 | 
| commit | b67b7f2b784c7105e88a5e639d9d84736ae2cbc1 (patch) | |
| tree | 0fd16d439c681c07d57d7f0d544c7582e04c3a31 /debian/uncrustify-trinity/uncrustify-trinity-0.78.0/scripts/tokenizer.py | |
| parent | c0a6f1b84c84749908961579b84513fd9f9d9eac (diff) | |
| download | extra-dependencies-b67b7f2b784c7105e88a5e639d9d84736ae2cbc1.tar.gz extra-dependencies-b67b7f2b784c7105e88a5e639d9d84736ae2cbc1.zip | |
uncrustify-trinity: updated based on upstream version 0.78.1
Signed-off-by: Michele Calgaro <michele.calgaro@yahoo.it>
Diffstat (limited to 'debian/uncrustify-trinity/uncrustify-trinity-0.78.0/scripts/tokenizer.py')
| -rwxr-xr-x | debian/uncrustify-trinity/uncrustify-trinity-0.78.0/scripts/tokenizer.py | 316 | 
1 files changed, 0 insertions, 316 deletions
| diff --git a/debian/uncrustify-trinity/uncrustify-trinity-0.78.0/scripts/tokenizer.py b/debian/uncrustify-trinity/uncrustify-trinity-0.78.0/scripts/tokenizer.py deleted file mode 100755 index 0bc33bac..00000000 --- a/debian/uncrustify-trinity/uncrustify-trinity-0.78.0/scripts/tokenizer.py +++ /dev/null @@ -1,316 +0,0 @@ -#! /usr/bin/env python -# tokenize.py -# -# Parses a C/C++/C#/D/Java/Pawn/whatever file in an array of -# tuples (string, type) -# - -# punctuator lookup table -punc_table = [ -   [ '!',  25,  26, '!'   ],   #   0: '!' -   [ '#',  24,  35, '#'   ],   #   1: '#' -   [ '$',  23,   0, '$'   ],   #   2: '$' -   [ '%',  22,  36, '%'   ],   #   3: '%' -   [ '&',  21,  41, '&'   ],   #   4: '&' -   [ '(',  20,   0, '('   ],   #   5: '(' -   [ ')',  19,   0, ')'   ],   #   6: ')' -   [ '*',  18,  43, '*'   ],   #   7: '*' -   [ '+',  17,  44, '+'   ],   #   8: '+' -   [ ',',  16,   0, ','   ],   #   9: ',' -   [ '-',  15,  46, '-'   ],   #  10: '-' -   [ '.',  14,  50, '.'   ],   #  11: '.' -   [ '/',  13,  53, '/'   ],   #  12: '/' -   [ ':',  12,  54, ':'   ],   #  13: ':' -   [ ';',  11,   0, ';'   ],   #  14: ';' -   [ '<',  10,  56, '<'   ],   #  15: '<' -   [ '=',   9,  63, '='   ],   #  16: '=' -   [ '>',   8,  65, '>'   ],   #  17: '>' -   [ '?',   7,   0, '?'   ],   #  18: '?' -   [ '[',   6,  70, '['   ],   #  19: '[' -   [ ']',   5,   0, ']'   ],   #  20: ']' -   [ '^',   4,  71, '^'   ],   #  21: '^' -   [ '{',   3,   0, '{'   ],   #  22: '{' -   [ '|',   2,  72, '|'   ],   #  23: '|' -   [ '}',   1,   0, '}'   ],   #  24: '}' -   [ '~',   0,  74, '~'   ],   #  25: '~' -   [ '<',   3,  30, '!<'  ],   #  26: '!<' -   [ '=',   2,  33, '!='  ],   #  27: '!=' -   [ '>',   1,  34, '!>'  ],   #  28: '!>' -   [ '~',   0,   0, '!~'  ],   #  29: '!~' -   [ '=',   1,   0, '!<=' ],   #  30: '!<=' -   [ '>',   0,  32, '!<>' ],   #  31: '!<>' -   [ '=',   0,   0, '!<>='],   #  32: '!<>=' -   [ '=',   0,   0, '!==' ],   #  33: '!==' -   [ '=',   0,   0, '!>=' ],   #  34: '!>=' -   [ '#',   0,   0, '##'  ],   #  35: '##' -   [ ':',   2,  39, '%:'  ],   #  36: '%:' -   [ '=',   1,   0, '%='  ],   #  37: '%=' -   [ '>',   0,   0, '%>'  ],   #  38: '%>' -   [ '%',   0,  40, None  ],   #  39: '%:%' -   [ ':',   0,   0, '%:%:'],   #  40: '%:%:' -   [ '&',   1,   0, '&&'  ],   #  41: '&&' -   [ '=',   0,   0, '&='  ],   #  42: '&=' -   [ '=',   0,   0, '*='  ],   #  43: '*=' -   [ '+',   1,   0, '++'  ],   #  44: '++' -   [ '=',   0,   0, '+='  ],   #  45: '+=' -   [ '-',   2,   0, '--'  ],   #  46: '--' -   [ '=',   1,   0, '-='  ],   #  47: '-=' -   [ '>',   0,  49, '->'  ],   #  48: '->' -   [ '*',   0,   0, '->*' ],   #  49: '->*' -   [ '*',   1,   0, '.*'  ],   #  50: '.*' -   [ '.',   0,  52, '..'  ],   #  51: '..' -   [ '.',   0,   0, '...' ],   #  52: '...' -   [ '=',   0,   0, '/='  ],   #  53: '/=' -   [ ':',   1,   0, '::'  ],   #  54: '::' -   [ '>',   0,   0, ':>'  ],   #  55: ':>' -   [ '%',   4,   0, '<%'  ],   #  56: '<%' -   [ ':',   3,   0, '<:'  ],   #  57: '<:' -   [ '<',   2,  61, '<<'  ],   #  58: '<<' -   [ '=',   1,   0, '<='  ],   #  59: '<=' -   [ '>',   0,  62, '<>'  ],   #  60: '<>' -   [ '=',   0,   0, '<<=' ],   #  61: '<<=' -   [ '=',   0,   0, '<>=' ],   #  62: '<>=' -   [ '=',   0,  64, '=='  ],   #  63: '==' -   [ '=',   0,   0, '===' ],   #  64: '===' -   [ '=',   1,   0, '>='  ],   #  65: '>=' -   [ '>',   0,  67, '>>'  ],   #  66: '>>' -   [ '=',   1,   0, '>>=' ],   #  67: '>>=' -   [ '>',   0,  69, '>>>' ],   #  68: '>>>' -   [ '=',   0,   0, '>>>='],   #  69: '>>>=' -   [ ']',   0,   0, '[]'  ],   #  70: '[]' -   [ '=',   0,   0, '^='  ],   #  71: '^=' -   [ '=',   1,   0, '|='  ],   #  72: '|=' -   [ '|',   0,   0, '||'  ],   #  73: '||' -   [ '=',   1,   0, '~='  ],   #  74: '~=' -   [ '~',   0,   0, '~~'  ],   #  75: '~~' -] - - -# -# Token types: -#  0 = newline -#  1 = punctuator -#  2 = integer -#  3 = float -#  4 = string -#  5 = identifier -# -class Tokenizer: -    def __init__(self): -        self.tokens = [] -        self.text = '' -        self.text_idx = 0 - -    def tokenize_text(self, in_text): -        self.tokens = [] -        self.text = in_text -        self.text_idx = 0 - -        print(in_text) -        try: -            while self.text_idx < len(self.text): -                if self.parse_whitespace(): -                    continue -                elif self.text[self.text_idx] == '\\' and self.text[self.text_idx + 1] == '\n': -                    self.text_idx += 2 -                    continue -                elif self.parse_comment(): -                    continue -                elif self.parse_number(): -                    continue -                elif self.parse_identifier(): -                    continue -                elif self.parse_string(): -                    continue -                elif self.parse_punctuator(): -                    continue -                else: -                    print("confused: %s" % self.text[self.text_idx:]) -                    break -        except: -            print("bombed") -            raise - -    def parse_whitespace(self): -        start_idx = self.text_idx -        hit_newline = False -        while self.text_idx < len(self.text): -            if self.text[self.text_idx] in '\n\r': -                hit_newline = True -            elif not self.text[self.text_idx] in ' \t': -                break -            self.text_idx += 1 - -        if hit_newline: -            self.tokens.append(('\n', 0)) -        return start_idx != self.text_idx - -    def parse_comment(self): -        if not self.text[self.text_idx] == '/' or not self.text[self.text_idx + 1] in '/*': -            return False -        if self.text[self.text_idx + 1] == '/': -            while self.text_idx < len(self.text): -                if self.text[self.text_idx] in '\n\r': -                    break -                self.text_idx += 1 -        else: -            while self.text_idx < len(self.text) - 1: -                if self.text[self.text_idx] == '*' and self.text[self.text_idx + 1] == '/': -                    self.text_idx += 2 -                    break -                self.text_idx += 1 -        return True - -    def parse_identifier(self): -        if not self.text[self.text_idx].upper() in '@_ABCDEFGHIJKLMNOPQRSTUVWXYZ': -            return False -        start_idx = self.text_idx -        while self.text_idx < len(self.text) and \ -                self.text[self.text_idx].upper() in '@_ABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890': -            self.text_idx += 1 -        self.tokens.append((self.text[start_idx : self.text_idx], 5)) -        return True - -    def parse_string(self): -        starter = 0 -        start_ch = self.text[self.text_idx] -        if start_ch == 'L': -            starter = 1 -            start_ch = self.text[self.text_idx + 1] -        if not start_ch in '"\'': -            return False -        start_idx = self.text_idx -        self.text_idx += starter + 1 -        escaped = False -        while self.text_idx < len(self.text): -            if escaped: -                escaped = False -            else: -                if self.text[self.text_idx] == '\\': -                    escaped = True -                elif self.text[self.text_idx] == start_ch: -                    self.text_idx += 1 -                    break -            self.text_idx += 1 - -        self.tokens.append((self.text[start_idx : self.text_idx], 4)) -        return True - -    # Checks for punctuators -    # Returns whether a punctuator was consumed (True or False) -    def parse_punctuator(self): -        tab_idx = 0 -        punc_len = 0 -        saved_punc = None -        while 1: -            pte = punc_table[tab_idx] -            if pte[0] == self.text[self.text_idx]: -                if pte[3] is not None: -                    saved_punc = pte[3] -                self.text_idx += 1 -                tab_idx = pte[2] -                if tab_idx == 0: -                    break -            elif pte[1] == 0: -                break -            else: -                tab_idx += 1 -        if saved_punc is not None: -            self.tokens.append((saved_punc, 1)) -            return True -        return False - -    def parse_number(self): -        # A number must start with a digit or a dot followed by a digit -        ch = self.text[self.text_idx] -        if not ch.isdigit() and (ch != '.' or not self.text[self.text_idx + 1].isdigit()): -            return False -        token_type = 2 # integer -        if ch == '.': -            token_type = 3 # float -        did_hex = False -        start_idx = self.text_idx - -        # Check for Hex, Octal, or Binary -        # Note that only D and Pawn support binary, but who cares? -        # -        if ch == '0': -            self.text_idx += 1 -            ch = self.text[self.text_idx].upper() -            if ch == 'X':                # hex -                did_hex = True -                self.text_idx += 1 -                while self.text[self.text_idx] in '_0123456789abcdefABCDEF': -                    self.text_idx += 1 -            elif ch == 'B':              # binary -                self.text_idx += 1 -                while self.text[self.text_idx] in '_01': -                    self.text_idx += 1 -            elif ch >= '0' and ch <= 7:  # octal (but allow decimal) -                self.text_idx += 1 -                while self.text[self.text_idx] in '_0123456789': -                    self.text_idx += 1 -            else: -                # either just 0 or 0.1 or 0UL, etc -                pass -        else: -            # Regular int or float -            while self.text[self.text_idx] in '_0123456789': -                self.text_idx += 1 - -        # Check if we stopped on a decimal point -        if self.text[self.text_idx] == '.': -            self.text_idx += 1 -            token_type = 3 # float -            if did_hex: -                while self.text[self.text_idx] in '_0123456789abcdefABCDEF': -                    self.text_idx += 1 -            else: -                while self.text[self.text_idx] in '_0123456789': -                    self.text_idx += 1 - -        # Check exponent -        # Valid exponents per language (not that it matters): -        # C/C++/D/Java: eEpP -        # C#/Pawn:      eE -        if self.text[self.text_idx] in 'eEpP': -            token_type = 3 # float -            self.text_idx += 1 -            if self.text[self.text_idx] in '+-': -                self.text_idx += 1 -            while self.text[self.text_idx] in '_0123456789': -                self.text_idx += 1 - -        # Check the suffixes -        # Valid suffixes per language (not that it matters): -        #        Integer       Float -        # C/C++: uUlL          lLfF -        # C#:    uUlL          fFdDMm -        # D:     uUL           ifFL -        # Java:  lL            fFdD -        # Pawn:  (none)        (none) -        # -        # Note that i, f, d, and m only appear in floats. -        while 1: -            if self.text[self.text_idx] in 'tTfFdDmM': -                token_type = 3 # float -            elif not self.text[self.text_idx] in 'lLuU': -                break -            self.text_idx += 1 - -        self.tokens.append((self.text[start_idx : self.text_idx], token_type)) -        return True - -text = """ -1.23+4-3*16%2 *sin(1.e-3 + .5p32) "hello" and "hello\\"there" -123 // some comment -a = b + c; -#define abc \\ -        5 -d = 5 /* hello */ + 3; -""" - -t = Tokenizer() -t.tokenize_text(text) -print(t.tokens) - | 
