diff options
author | Michele Calgaro <michele.calgaro@yahoo.it> | 2020-12-17 20:50:19 +0900 |
---|---|---|
committer | Michele Calgaro <michele.calgaro@yahoo.it> | 2020-12-17 23:51:16 +0900 |
commit | 4ae7b32dc09eb7acd9411a8af63a767660aa64ec (patch) | |
tree | 3d150131675502f5d5a3ff054ec6fc3dbe4e78fd /lib/antlr/antlr/TokenStreamRewriteEngine.h | |
parent | 25071ecede33675972c3163f4b46df964a06dadf (diff) | |
download | tdevelop-4ae7b32dc09eb7acd9411a8af63a767660aa64ec.tar.gz tdevelop-4ae7b32dc09eb7acd9411a8af63a767660aa64ec.zip |
Renaming of files in preparation for code style tools.
Signed-off-by: Michele Calgaro <michele.calgaro@yahoo.it>
(cherry picked from commit 3e3d9eda9d1dd8c67c1c27c6a9bdc68bdecfcc30)
Diffstat (limited to 'lib/antlr/antlr/TokenStreamRewriteEngine.h')
-rw-r--r-- | lib/antlr/antlr/TokenStreamRewriteEngine.h | 439 |
1 files changed, 439 insertions, 0 deletions
diff --git a/lib/antlr/antlr/TokenStreamRewriteEngine.h b/lib/antlr/antlr/TokenStreamRewriteEngine.h new file mode 100644 index 00000000..20f3e8f4 --- /dev/null +++ b/lib/antlr/antlr/TokenStreamRewriteEngine.h @@ -0,0 +1,439 @@ +#ifndef INC_TokenStreamRewriteEngine_h__ +#define INC_TokenStreamRewriteEngine_h__ + +/* ANTLR Translator Generator + * Project led by Terence Parr at http://www.jGuru.com + * Software rights: http://www.antlr.org/license.html + */ + +#include <string> +#include <list> +#include <vector> +#include <map> +#include <utility> +#include <iostream> +#include <iterator> +#include <cassert> +#include <algorithm> + +#include <antlr/config.h> + +#include <antlr/TokenStream.h> +#include <antlr/TokenWithIndex.h> +#include <antlr/BitSet.h> + +#ifdef ANTLR_CXX_SUPPORTS_NAMESPACE +namespace antlr { +#endif + +/** This token stream tracks the *entire* token stream coming from + * a lexer, but does not pass on the whitespace (or whatever else + * you want to discard) to the parser. + * + * This class can then be asked for the ith token in the input stream. + * Useful for dumping out the input stream exactly after doing some + * augmentation or other manipulations. Tokens are index from 0..n-1 + * + * You can insert stuff, replace, and delete chunks. Note that the + * operations are done lazily--only if you convert the buffer to a + * String. This is very efficient because you are not moving data around + * all the time. As the buffer of tokens is converted to strings, the + * toString() method(s) check to see if there is an operation at the + * current index. If so, the operation is done and then normal String + * rendering continues on the buffer. This is like having multiple Turing + * machine instruction streams (programs) operating on a single input tape. :) + * + * Since the operations are done lazily at toString-time, operations do not + * screw up the token index values. That is, an insert operation at token + * index i does not change the index values for tokens i+1..n-1. + * + * Because operations never actually alter the buffer, you may always get + * the original token stream back without undoing anything. Since + * the instructions are queued up, you can easily simulate transactions and + * roll back any changes if there is an error just by removing instructions. + * For example, + * + * TokenStreamRewriteEngine rewriteEngine = + * new TokenStreamRewriteEngine(lexer); + * JavaRecognizer parser = new JavaRecognizer(rewriteEngine); + * ... + * rewriteEngine.insertAfter("pass1", t, "foobar");} + * rewriteEngine.insertAfter("pass2", u, "start");} + * System.out.println(rewriteEngine.toString("pass1")); + * System.out.println(rewriteEngine.toString("pass2")); + * + * You can also have multiple "instruction streams" and get multiple + * rewrites from a single pass over the input. Just name the instruction + * streams and use that name again when printing the buffer. This could be + * useful for generating a C file and also its header file--all from the + * same buffer. + * + * If you don't use named rewrite streams, a "default" stream is used. + * + * Terence Parr, parrt@cs.usfca.edu + * University of San Francisco + * February 2004 + */ +class TokenStreamRewriteEngine : public TokenStream +{ +public: + typedef ANTLR_USE_NAMESPACE(std)vector<antlr::RefTokenWithIndex> token_list; + static const char* DEFAULT_PROGRAM_NAME; +#ifndef NO_STATIC_CONSTS + static const size_t MIN_TOKEN_INDEX; + static const int PROGRAM_INIT_SIZE; +#else + enum { + MIN_TOKEN_INDEX = 0, + PROGRAM_INIT_SIZE = 100 + }; +#endif + + struct tokenToStream { + tokenToStream( ANTLR_USE_NAMESPACE(std)ostream& o ) : out(o) {} + template <typename T> void operator() ( const T& t ) { + out << t->getText(); + } + ANTLR_USE_NAMESPACE(std)ostream& out; + }; + + class RewriteOperation { + protected: + RewriteOperation( size_t idx, const ANTLR_USE_NAMESPACE(std)string& txt ) + : index(idx), text(txt) + { + } + public: + virtual ~RewriteOperation() + { + } + /** Execute the rewrite operation by possibly adding to the buffer. + * Return the index of the next token to operate on. + */ + virtual size_t execute( ANTLR_USE_NAMESPACE(std)ostream& /* out */ ) { + return index; + } + virtual size_t getIndex() const { + return index; + } + virtual const char* type() const { + return "RewriteOperation"; + } + protected: + size_t index; + ANTLR_USE_NAMESPACE(std)string text; + }; + + struct executeOperation { + ANTLR_USE_NAMESPACE(std)ostream& out; + executeOperation( ANTLR_USE_NAMESPACE(std)ostream& s ) : out(s) {} + void operator () ( RewriteOperation* t ) { + t->execute(out); + } + }; + + /// list of rewrite operations + typedef ANTLR_USE_NAMESPACE(std)list<RewriteOperation*> operation_list; + /// map program name to <program counter,program> tuple + typedef ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,operation_list> program_map; + + class InsertBeforeOp : public RewriteOperation + { + public: + InsertBeforeOp( size_t index, const ANTLR_USE_NAMESPACE(std)string& text ) + : RewriteOperation(index, text) + { + } + virtual ~InsertBeforeOp() {} + virtual size_t execute( ANTLR_USE_NAMESPACE(std)ostream& out ) + { + out << text; + return index; + } + virtual const char* type() const { + return "InsertBeforeOp"; + } + }; + + class ReplaceOp : public RewriteOperation + { + public: + ReplaceOp(size_t from, size_t to, ANTLR_USE_NAMESPACE(std)string text) + : RewriteOperation(from,text) + , lastIndex(to) + { + } + virtual ~ReplaceOp() {} + virtual size_t execute( ANTLR_USE_NAMESPACE(std)ostream& out ) { + out << text; + return lastIndex+1; + } + virtual const char* type() const { + return "ReplaceOp"; + } + protected: + size_t lastIndex; + }; + + class DeleteOp : public ReplaceOp { + public: + DeleteOp(size_t from, size_t to) + : ReplaceOp(from,to,"") + { + } + virtual const char* type() const { + return "DeleteOp"; + } + }; + + TokenStreamRewriteEngine(TokenStream& upstream); + + TokenStreamRewriteEngine(TokenStream& upstream, size_t initialSize); + + RefToken nextToken( void ); + + void rollback(size_t instructionIndex) { + rollback(DEFAULT_PROGRAM_NAME, instructionIndex); + } + + /** Rollback the instruction stream for a program so that + * the indicated instruction (via instructionIndex) is no + * longer in the stream. UNTESTED! + */ + void rollback(const ANTLR_USE_NAMESPACE(std)string& programName, + size_t instructionIndex ); + + void deleteProgram() { + deleteProgram(DEFAULT_PROGRAM_NAME); + } + + /** Reset the program so that no instructions exist */ + void deleteProgram(const ANTLR_USE_NAMESPACE(std)string& programName) { + rollback(programName, MIN_TOKEN_INDEX); + } + + void insertAfter( RefTokenWithIndex t, + const ANTLR_USE_NAMESPACE(std)string& text ) + { + insertAfter(DEFAULT_PROGRAM_NAME, t, text); + } + + void insertAfter(size_t index, const ANTLR_USE_NAMESPACE(std)string& text) { + insertAfter(DEFAULT_PROGRAM_NAME, index, text); + } + + void insertAfter( const ANTLR_USE_NAMESPACE(std)string& programName, + RefTokenWithIndex t, + const ANTLR_USE_NAMESPACE(std)string& text ) + { + insertAfter(programName, t->getIndex(), text); + } + + void insertAfter( const ANTLR_USE_NAMESPACE(std)string& programName, + size_t index, + const ANTLR_USE_NAMESPACE(std)string& text ) + { + // to insert after, just insert before next index (even if past end) + insertBefore(programName,index+1, text); + } + + void insertBefore( RefTokenWithIndex t, + const ANTLR_USE_NAMESPACE(std)string& text ) + { + // std::cout << "insertBefore index " << t->getIndex() << " " << text << std::endl; + insertBefore(DEFAULT_PROGRAM_NAME, t, text); + } + + void insertBefore(size_t index, const ANTLR_USE_NAMESPACE(std)string& text) { + insertBefore(DEFAULT_PROGRAM_NAME, index, text); + } + + void insertBefore( const ANTLR_USE_NAMESPACE(std)string& programName, + RefTokenWithIndex t, + const ANTLR_USE_NAMESPACE(std)string& text ) + { + insertBefore(programName, t->getIndex(), text); + } + + void insertBefore( const ANTLR_USE_NAMESPACE(std)string& programName, + size_t index, + const ANTLR_USE_NAMESPACE(std)string& text ) + { + addToSortedRewriteList(programName, new InsertBeforeOp(index,text)); + } + + void replace(size_t index, const ANTLR_USE_NAMESPACE(std)string& text) + { + replace(DEFAULT_PROGRAM_NAME, index, index, text); + } + + void replace( size_t from, size_t to, + const ANTLR_USE_NAMESPACE(std)string& text) + { + replace(DEFAULT_PROGRAM_NAME, from, to, text); + } + + void replace( RefTokenWithIndex indexT, + const ANTLR_USE_NAMESPACE(std)string& text ) + { + replace(DEFAULT_PROGRAM_NAME, indexT->getIndex(), indexT->getIndex(), text); + } + + void replace( RefTokenWithIndex from, + RefTokenWithIndex to, + const ANTLR_USE_NAMESPACE(std)string& text ) + { + replace(DEFAULT_PROGRAM_NAME, from, to, text); + } + + void replace(const ANTLR_USE_NAMESPACE(std)string& programName, + size_t from, size_t to, + const ANTLR_USE_NAMESPACE(std)string& text ) + { + addToSortedRewriteList(programName,new ReplaceOp(from, to, text)); + } + + void replace( const ANTLR_USE_NAMESPACE(std)string& programName, + RefTokenWithIndex from, + RefTokenWithIndex to, + const ANTLR_USE_NAMESPACE(std)string& text ) + { + replace(programName, + from->getIndex(), + to->getIndex(), + text); + } + + void remove(size_t index) { + remove(DEFAULT_PROGRAM_NAME, index, index); + } + + void remove(size_t from, size_t to) { + remove(DEFAULT_PROGRAM_NAME, from, to); + } + + void remove(RefTokenWithIndex indexT) { + remove(DEFAULT_PROGRAM_NAME, indexT, indexT); + } + + void remove(RefTokenWithIndex from, RefTokenWithIndex to) { + remove(DEFAULT_PROGRAM_NAME, from, to); + } + + void remove( const ANTLR_USE_NAMESPACE(std)string& programName, + size_t from, size_t to) + { + replace(programName,from,to,""); + } + + void remove( const ANTLR_USE_NAMESPACE(std)string& programName, + RefTokenWithIndex from, RefTokenWithIndex to ) + { + replace(programName,from,to,""); + } + + void discard(int ttype) { + discardMask.add(ttype); + } + + RefToken getToken( size_t i ) + { + return RefToken(tokens.at(i)); + } + + size_t getTokenStreamSize() const { + return tokens.size(); + } + + void originalToStream( ANTLR_USE_NAMESPACE(std)ostream& out ) const { + ANTLR_USE_NAMESPACE(std)for_each( tokens.begin(), tokens.end(), tokenToStream(out) ); + } + + void originalToStream( ANTLR_USE_NAMESPACE(std)ostream& out, + size_t start, size_t end ) const; + + void toStream( ANTLR_USE_NAMESPACE(std)ostream& out ) const { + toStream( out, MIN_TOKEN_INDEX, getTokenStreamSize()); + } + + void toStream( ANTLR_USE_NAMESPACE(std)ostream& out, + const ANTLR_USE_NAMESPACE(std)string& programName ) const + { + toStream( out, programName, MIN_TOKEN_INDEX, getTokenStreamSize()); + } + + void toStream( ANTLR_USE_NAMESPACE(std)ostream& out, + size_t start, size_t end ) const + { + toStream(out, DEFAULT_PROGRAM_NAME, start, end); + } + + void toStream( ANTLR_USE_NAMESPACE(std)ostream& out, + const ANTLR_USE_NAMESPACE(std)string& programName, + size_t firstToken, size_t lastToken ) const; + + void toDebugStream( ANTLR_USE_NAMESPACE(std)ostream& out ) const { + toDebugStream( out, MIN_TOKEN_INDEX, getTokenStreamSize()); + } + + void toDebugStream( ANTLR_USE_NAMESPACE(std)ostream& out, + size_t start, size_t end ) const; + + size_t getLastRewriteTokenIndex() const { + return getLastRewriteTokenIndex(DEFAULT_PROGRAM_NAME); + } + + /** Return the last index for the program named programName + * return 0 if the program does not exist or the program is empty. + * (Note this is different from the java implementation that returns -1) + */ + size_t getLastRewriteTokenIndex(const ANTLR_USE_NAMESPACE(std)string& programName) const { + program_map::const_iterator rewrites = programs.find(programName); + + if( rewrites == programs.end() ) + return 0; + + const operation_list& prog = rewrites->second; + if( !prog.empty() ) + { + operation_list::const_iterator last = prog.end(); + --last; + return (*last)->getIndex(); + } + return 0; + } + +protected: + /** If op.index > lastRewriteTokenIndexes, just add to the end. + * Otherwise, do linear */ + void addToSortedRewriteList(RewriteOperation* op) { + addToSortedRewriteList(DEFAULT_PROGRAM_NAME, op); + } + + void addToSortedRewriteList( const ANTLR_USE_NAMESPACE(std)string& programName, + RewriteOperation* op ); + +protected: + /** Who do we suck tokens from? */ + TokenStream& stream; + /** track index of tokens */ + size_t index; + + /** Track the incoming list of tokens */ + token_list tokens; + + /** You may have multiple, named streams of rewrite operations. + * I'm calling these things "programs." + * Maps String (name) -> rewrite (List) + */ + program_map programs; + + /** Which (whitespace) token(s) to throw out */ + BitSet discardMask; +}; + +#ifdef ANTLR_CXX_SUPPORTS_NAMESPACE +} +#endif + +#endif |