/*
 *  Copyright (c) 2002-2003 Jesper K. Pedersen <blackie@kde.org>
 *
 *  This library is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU Library General Public
 *  License version 2 as published by the Free Software Foundation.
 *
 *  This library is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *  Library General Public License for more details.
 *
 *  You should have received a copy of the GNU Library General Public License
 *  along with this library; see the file COPYING.LIB.  If not, write to
 *  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
 *  Boston, MA 02110-1301, USA.
 **/
%option noyywrap

%{ 
  #include <qstring.h>
  #include "textrangeregexp.h"
  #include "gen_qregexpparser.hh"
#ifdef QT_ONLY
  #include "compat.h"
#endif
  void parseRange( char* txt, int* min, int* max );  
  RegExp* parseCharClass( char* match );
%}
  
Escape   \\.
BackRef  \\[1-9][0-9]*
CharClass \[^?\]?[^]]*\]
Range \{[0-9]*(,[0-9]*)?\}
HexChar \\x[0-9a-fA-F]{1,4}
OctChar \\0[0-7]{1,4}
SpecialEsc \\[afnrtv]
%%
"\\b"      return TOK_PosWordChar;
"\\B"      return TOK_PosNonWordChar;
"\\d"      { 
             TextRangeRegExp* regexp = new TextRangeRegExp( false ); 
             regexp->setDigit( true );
             qregexplval.regexp = regexp;
             return TOK_CharClass;
           }
"\\D"      { 
             TextRangeRegExp* regexp = new TextRangeRegExp( false ); 
             regexp->setNonDigit( true );
             qregexplval.regexp = regexp;
             return TOK_CharClass;
           }
"\\s"      { 
             TextRangeRegExp* regexp = new TextRangeRegExp( false ); 
             regexp->setSpace( true );
             qregexplval.regexp = regexp;
             return TOK_CharClass;
           }
"\\S"      { 
             TextRangeRegExp* regexp = new TextRangeRegExp( false ); 
             regexp->setNonSpace( true );
             qregexplval.regexp = regexp;
             return TOK_CharClass;
           }
"\\w"      { 
             TextRangeRegExp* regexp = new TextRangeRegExp( false ); 
             regexp->setWordChar( true );
             qregexplval.regexp = regexp;
             return TOK_CharClass;
           }
"\\W"      { 
             TextRangeRegExp* regexp = new TextRangeRegExp( false ); 
             regexp->setNonWordChar( true );
             qregexplval.regexp = regexp;
             return TOK_CharClass;
           }
{SpecialEsc} {
               TextRangeRegExp* regexp = new TextRangeRegExp( false );
               regexp->addCharacter( QString::fromLocal8Bit( yytext ) );
               qregexplval.regexp = regexp;
               return TOK_CharClass;
             }

{HexChar}  {
             TextRangeRegExp* regexp = new TextRangeRegExp( false ); 
             regexp->addCharacter( QString::fromLocal8Bit(yytext) );
             qregexplval.regexp = regexp;
             return TOK_CharClass;
           }
{OctChar}  {
             TextRangeRegExp* regexp = new TextRangeRegExp( false ); 
             regexp->addCharacter( QString::fromLocal8Bit(yytext) );
             qregexplval.regexp = regexp;
             return TOK_CharClass;
           }
"."        return TOK_Dot;
"$"        return TOK_Dollar;
"^"        return TOK_Carat;
"(?:"      return TOK_MagicLeftParent;
"(?="      return TOK_PosLookAhead;
"(?!"      return TOK_NegLookAhead;
"("        return TOK_LeftParen;
")"        return TOK_RightParent;
"|"        return TOK_Bar;
"*"        { qregexplval.range.min = 0; qregexplval.range.max=-1; return TOK_Quantifier; }
"?"        { qregexplval.range.min = 0; qregexplval.range.max=1;  return TOK_Quantifier; }
"+"        { qregexplval.range.min = 1; qregexplval.range.max=-1; return TOK_Quantifier; }
{Range}     { parseRange( yytext, &qregexplval.range.min, &qregexplval.range.max ); return TOK_Quantifier; }  
{CharClass} { qregexplval.regexp = parseCharClass(yytext); return TOK_CharClass; }
{BackRef}   { qregexplval.backRef = atoi( yytext+1 ); return TOK_BackRef; }
{Escape}    { qregexplval.ch = yytext[1]; return TOK_EscapeChar; }
.           { qregexplval.ch = yytext[0]; return TOK_Char; }

%%

void setParseData( QString qstr ) {  
  const char* cstr;
  if ( qstr.isNull() ) 
    cstr = "";
  else
    cstr = qstr.latin1();
  yy_switch_to_buffer( yy_scan_string( cstr ) );
}

/**
   This function parses a range in a form similar to "{3,4}", "{,7}"
   etc. and returns the value in the integers pointed to by min and max.
*/
void parseRange( char* txt, int* min, int* max ) 
{

  /*  
      case  txt   min  max
       1    {}     0   -1
       2    {,}    0   -1
       3    {5}    5    5
       4    {5,}   5   -1
       5    {,7}   0    7
       6    {5,7}  5    7
  */    
  char c;
  int i = 1;
  int minimum=0, maximum=0;
  int minFound=0, maxFound=0, commaFound = 0;

  while ( (c = txt[i++]) != ',' && c != '}') {
    minimum = minimum*10+ c-'0';
    minFound=1;
  }

  if ( c == ',' )
    commaFound = 1;

  if ( c != '}' ) {
    while ( (c = txt[i++]) != '}') {
      maximum = maximum*10+ c-'0';
      maxFound = 1;
    }
  }
  
  *min = minimum;
  if ( maxFound ) 
    *max = maximum;   /* case 5,6 */
  else if ( !minFound )
    *max = -1;        /* case 1,2 */
  else if ( commaFound )
    *max = -1;        /* case 4 */
  else
    *max = minimum;   /* case 3 */
}


/** 
    This function parses a character range like "[^ab1-4]".
*/
RegExp* parseCharClass( char* match )
{
  TextRangeRegExp* res = new TextRangeRegExp( false );
  QString txt = QString::fromLocal8Bit( match );
  txt = txt.mid(1,txt.length()-2);
  
  unsigned int i = 0;
  QChar ch = txt.at(i++);
  QString pendingChar;
  QString thisChar;
  bool charPending = false;
  bool rangePending = false;
  bool flushPending = false;
  
  if ( ch == QChar('^') ) {
    res->setNegate( true );
    ch = txt.at(i++);
  }

  do {
    // If a character is pending, and the next char is '-' then we are
    // possible looking at a range.
    if ( ch == QChar('-') && charPending ) {
      rangePending = true;
      ch = txt.at(i++);
      continue;
    }

    // If we have a pending character, but do not also have a pending
    // range, then the pending character was not part of a range, and
    // should therefore just be added as a single character.
    if ( charPending && !rangePending ) {
      res->addCharacter( pendingChar );
      charPending = false;
    }

    if ( ch == QChar('\\') ) {
      // Handle the cases where an escape character is specified.
      ch = txt.at(i++);
      
      if ( ch == QChar('a') || ch == QChar('f') || ch == QChar('n') || ch == QChar('r') || ch == QChar('t') || ch == QChar('v') ) {
        // These are just seen as normal characters.
        thisChar = QString::fromLocal8Bit("\\") + ch;
      }
      else if ( ch == QChar('d') ) {  
        // The following characters represent character groups. If any of
        // these are seen in a range, then the range is ignored, thus [a-\s]
        // matches an 'a', a '-', and a space (\s means space).
        res->setDigit( true );
        flushPending = true;
      }
      else if ( ch == QChar('D') ) {
        res->setNonDigit( true );
        flushPending = true;
      }
      else if ( ch == QChar('s') ) {
        res->setSpace( true );
        flushPending = true;
      }
      else if ( ch == QChar('S') ) {
        res->setNonSpace( true );
        flushPending = true;
      }
      else if ( ch == QChar('w') ) {
        res->setWordChar( true );
        flushPending = true;
      }
      else if ( ch == QChar('W') ) {
        res->setNonWordChar( true );
        flushPending = true;
      }
      else if ( ch == QChar('x') || ch == QChar('X') ) { 
        // This is a hexidecimal character: \xHHHH
        QString str;
        for ( int j=0; j<4; j++) {
          ch = txt.at(i++);
            if ( ch == 'a' || ch == 'A' || ch == 'b' || ch == 'B' || ch == 'c' || ch == 'C' || ch == 'd' || ch == 'D' || 
                 ch == 'e' || ch == 'E' || ch == 'f' || ch == 'F' ||
                 ch == '0' || ch == '1' || ch == '2' || ch == '3' || ch == '4' || ch == '5' || ch == '6' || ch == '7' || 
                 ch == '8' || ch == '9' )
              str += ch;
            else
              i--;
        }
        thisChar = QString::fromLocal8Bit("\\x") + str;
      }
      else if ( ch == QChar('0') ) {
        // This is an octal character
        QString str;
        for ( int j=0; j<4; j++) {
          ch = txt.at(i++);
          if ( ch == '0' || ch == '1' || ch == '2' || ch == '3' || ch == '4' || ch == '5' || ch == '6' || ch == '7' )
            str += ch;
          else
            i--;
        }
        thisChar = QString::fromLocal8Bit("\\x") + str ;
      }
      else {
        // Anything else escaped just means the character itself.
        thisChar = ch;
      }
    }
    else {
      // A non escaped character.
      thisChar = ch;
    }
    
    // The characters \s,\S,\w,\W,\d or \D, can not be part of a range,
    // thus if they are meet in what looks like a range, then the
    // characters of the range is justed seen as normal non range
    // characters. thus [a-\s] matches an 'a', a '-', and a space (\s means
    // space). 
    if ( flushPending ) {
      if ( charPending ) 
        res->addCharacter( pendingChar );
      if ( rangePending ) 
        res->addCharacter( QString::fromLocal8Bit("-") );
      flushPending = false; 
      charPending = false;
      rangePending = false;
    }
    else {
      if ( rangePending ) {
        res->addRange( pendingChar, thisChar );
        charPending = false;
        rangePending = false;
      }
      else {
        pendingChar = thisChar;
        charPending = true;
      }
    }
    ch = txt.at(i++);
  }
  while ( ch != QChar(']') && i <= txt.length() );

  if ( charPending ) 
    res->addCharacter( pendingChar );
  if ( rangePending ) 
    res->addCharacter( QString::fromLocal8Bit("-") );

  return res;
}