Copy the KDE 3.5 branch to branches/trinity for new KDE 3.5 features.

BUG:215923 git-svn-id: svn://anonsvn.kde.org/home/kde/branches/trinity/kdepim@1054174 283d02a7-25f6-0310-bc7c-ecb5cbfe19da
author: toma <toma@283d02a7-25f6-0310-bc7c-ecb5cbfe19da> 2009-11-25 17:56:58 +0000
committer: toma <toma@283d02a7-25f6-0310-bc7c-ecb5cbfe19da> 2009-11-25 17:56:58 +0000
commit: 460c52653ab0dcca6f19a4f492ed2c5e4e963ab0 (patch)
tree: 67208f7c145782a7e90b123b982ca78d88cc2c87 /libkmime/kmime_codec_qp.cpp
download: tdepim-460c52653ab0dcca6f19a4f492ed2c5e4e963ab0.tar.gz
tdepim-460c52653ab0dcca6f19a4f492ed2c5e4e963ab0.zip
1 files changed, 644 insertions, 0 deletions
diff --git a/libkmime/kmime_codec_qp.cpp b/libkmime/kmime_codec_qp.cpp
new file mode 100644
index 00000000..c867a634
--- /dev/null
+++ b/libkmime/kmime_codec_qp.cpp
@@ -0,0 +1,644 @@
+/*  -*- c++ -*-
+    kmime_codec_qp.cpp
+
+    This file is part of KMime, the KDE internet mail/usenet news message library.
+    Copyright (c) 2002 Marc Mutz <mutz@kde.org>
+
+    KMime is free software; you can redistribute it and/or modify it
+    under the terms of the GNU General Public License, version 2, as
+    published by the Free Software Foundation.
+
+    KMime is distributed in the hope that it will be useful, but
+    WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this library; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+
+    In addition, as a special exception, the copyright holders give
+    permission to link the code of this library with any edition of
+    the Qt library by Trolltech AS, Norway (or with modified versions
+    of Qt that use the same license as Qt), and distribute linked
+    combinations including the two.  You must obey the GNU General
+    Public License in all respects for all of the code used other than
+    Qt.  If you modify this file, you may extend this exception to
+    your version of the file, but you are not obligated to do so.  If
+    you do not wish to do so, delete this exception statement from
+    your version.
+*/
+
+#include "kmime_codec_qp.h"
+
+#include "kmime_util.h"
+
+#include <kdebug.h>
+
+#include <cassert>
+
+using namespace KMime;
+
+namespace KMime {
+
+// some helpful functions:
+
+static inline char binToHex( uchar value ) {
+  if ( value > 9 )
+    return value + 'A' - 10;
+  else
+    return value + '0';
+}
+
+static inline uchar highNibble( uchar ch ) {
+  return ch >> 4;
+}
+
+static inline uchar lowNibble( uchar ch ) {
+  return ch & 0xF;
+}
+
+static inline bool keep( uchar ch ) {
+  // no CTLs, except HT and not '?'
+  return !( ch < ' ' && ch != '\t' || ch == '?' );
+}
+
+//
+// QuotedPrintableCodec
+//
+
+class QuotedPrintableEncoder : public Encoder {
+  char mInputBuffer[16];
+  uchar mCurrentLineLength; // 0..76
+  uchar mAccu;
+  uint mInputBufferReadCursor  : 4; // 0..15
+  uint mInputBufferWriteCursor : 4; // 0..15
+  enum {
+    Never, AtBOL, Definitely
+  } mAccuNeedsEncoding    : 2;
+  bool mSawLineEnd        : 1;
+  bool mSawCR             : 1;
+  bool mFinishing         : 1;
+  bool mFinished          : 1;
+protected:
+  friend class QuotedPrintableCodec;
+  QuotedPrintableEncoder( bool withCRLF=false )
+    : Encoder( withCRLF ), mCurrentLineLength(0), mAccu(0),
+      mInputBufferReadCursor(0), mInputBufferWriteCursor(0),
+      mAccuNeedsEncoding(Never),
+      mSawLineEnd(false), mSawCR(false), mFinishing(false),
+      mFinished(false) {}
+
+  bool needsEncoding( uchar ch ) {
+    return ( ch > '~' || ch < ' ' && ch != '\t' || ch == '=' );
+  }
+  bool needsEncodingAtEOL( uchar ch ) {
+    return ( ch == ' ' || ch == '\t' );
+  }
+  bool needsEncodingAtBOL( uchar ch ) {
+    return ( ch == 'F' || ch == '.' || ch == '-' );
+  }
+  bool fillInputBuffer( const char* & scursor, const char * const send );
+  bool processNextChar();
+  void createOutputBuffer( char* & dcursor, const char * const dend );
+public:
+  virtual ~QuotedPrintableEncoder() {}
+
+  bool encode( const char* & scursor, const char * const send,
+	       char* & dcursor, const char * const dend );
+
+  bool finish( char* & dcursor, const char * const dend );
+};
+
+
+class QuotedPrintableDecoder : public Decoder {
+  const char mEscapeChar;
+  char mBadChar;
+  /** @p accu holds the msb nibble of the hexchar or zero. */
+  uchar mAccu;
+  /** @p insideHexChar is true iff we're inside an hexchar (=XY).
+      Together with @ref mAccu, we can build this states:
+      @li @p insideHexChar == @p false:
+          normal text
+      @li @p insideHexChar == @p true, @p mAccu == 0:
+          saw the leading '='
+      @li @p insideHexChar == @p true, @p mAccu != 0:
+          saw the first nibble '=X'
+   */
+  const bool mQEncoding;
+  bool mInsideHexChar;
+  bool mFlushing;
+  bool mExpectLF;
+  bool mHaveAccu;
+protected:
+  friend class QuotedPrintableCodec;
+  friend class Rfc2047QEncodingCodec;
+  friend class Rfc2231EncodingCodec;
+  QuotedPrintableDecoder( bool withCRLF=false,
+			  bool aQEncoding=false, char aEscapeChar='=' )
+    : Decoder( withCRLF ),
+      mEscapeChar(aEscapeChar),
+      mBadChar(0),
+      mAccu(0),
+      mQEncoding(aQEncoding),
+      mInsideHexChar(false),
+      mFlushing(false),
+      mExpectLF(false),
+      mHaveAccu(false) {}
+public:
+  virtual ~QuotedPrintableDecoder() {}
+
+  bool decode( const char* & scursor, const char * const send,
+	       char* & dcursor, const char * const dend );
+  // ### really no finishing needed???
+  bool finish( char* &, const char * const ) { return true; }
+};
+
+
+class Rfc2047QEncodingEncoder : public Encoder {
+  uchar      mAccu;
+  uchar      mStepNo;
+  const char mEscapeChar;
+  bool       mInsideFinishing : 1;
+protected:
+  friend class Rfc2047QEncodingCodec;
+  friend class Rfc2231EncodingCodec;
+  Rfc2047QEncodingEncoder( bool withCRLF=false, char aEscapeChar='=' )
+    : Encoder( withCRLF ),
+      mAccu(0), mStepNo(0), mEscapeChar( aEscapeChar ),
+      mInsideFinishing( false )
+  {
+    // else an optimization in ::encode might break.
+    assert( aEscapeChar == '=' || aEscapeChar == '%' );
+  }
+
+  // this code assumes that isEText( mEscapeChar ) == false!
+  bool needsEncoding( uchar ch ) {
+    if ( ch > 'z' ) return true; // {|}~ DEL and 8bit chars need
+    if ( !isEText( ch ) ) return true; // all but a-zA-Z0-9!/*+- need, too
+    if ( mEscapeChar == '%' && ( ch == '*' || ch == '/' ) )
+      return true; // not allowed in rfc2231 encoding
+    return false;
+  }
+
+public:
+  virtual ~Rfc2047QEncodingEncoder() {}
+
+  bool encode( const char* & scursor, const char * const send,
+	       char* & dcursor, const char * const dend );
+  bool finish( char* & dcursor, const char * const dend );
+};
+
+// this doesn't access any member variables, so it can be defined static
+// but then we can't call it from virtual functions
+static int QuotedPrintableDecoder_maxDecodedSizeFor( int insize, bool withCRLF ) {
+  // all chars unencoded:
+  int result = insize;
+  // but maybe all of them are \n and we need to make them \r\n :-o
+  if ( withCRLF )
+    result += insize;
+
+  // there might be an accu plus escape
+  result += 2;
+
+  return result;
+}
+
+Encoder * QuotedPrintableCodec::makeEncoder( bool withCRLF ) const {
+  return new QuotedPrintableEncoder( withCRLF );
+}
+
+Decoder * QuotedPrintableCodec::makeDecoder( bool withCRLF ) const {
+  return new QuotedPrintableDecoder( withCRLF );
+}
+
+int QuotedPrintableCodec::maxDecodedSizeFor( int insize, bool withCRLF ) const {
+    return QuotedPrintableDecoder_maxDecodedSizeFor(insize, withCRLF);
+}
+
+Encoder * Rfc2047QEncodingCodec::makeEncoder( bool withCRLF ) const {
+  return new Rfc2047QEncodingEncoder( withCRLF );
+}
+
+Decoder * Rfc2047QEncodingCodec::makeDecoder( bool withCRLF ) const {
+  return new QuotedPrintableDecoder( withCRLF, true );
+}
+
+int Rfc2047QEncodingCodec::maxDecodedSizeFor( int insize, bool withCRLF ) const {
+    return QuotedPrintableDecoder_maxDecodedSizeFor(insize, withCRLF);
+}
+
+Encoder * Rfc2231EncodingCodec::makeEncoder( bool withCRLF ) const {
+  return new Rfc2047QEncodingEncoder( withCRLF, '%' );
+}
+
+Decoder * Rfc2231EncodingCodec::makeDecoder( bool withCRLF ) const {
+  return new QuotedPrintableDecoder( withCRLF, true, '%' );
+}
+
+int Rfc2231EncodingCodec::maxDecodedSizeFor( int insize, bool withCRLF ) const {
+    return QuotedPrintableDecoder_maxDecodedSizeFor(insize, withCRLF);
+}
+
+  /********************************************************/
+  /********************************************************/
+  /********************************************************/
+
+bool QuotedPrintableDecoder::decode( const char* & scursor, const char * const send,
+				     char* & dcursor, const char * const dend ) {
+  if ( mWithCRLF )
+    kdWarning() << "CRLF output for decoders isn't yet supported!" << endl;
+
+  while ( scursor != send && dcursor != dend ) {
+    if ( mFlushing ) {
+      // we have to flush chars in the aftermath of an decoding
+      // error. The way to request a flush is to
+      // - store the offending character in mBadChar and
+      // - set mFlushing to true.
+      // The supported cases are (H: hexchar, X: bad char):
+      // =X, =HX, CR
+      // mBadChar is only written out if it is not by itself illegal in
+      // quoted-printable (e.g. CTLs, 8Bits).
+      // A fast way to suppress mBadChar output is to set it to NUL.
+      if ( mInsideHexChar ) {
+	// output '='
+	*dcursor++ = mEscapeChar;
+	mInsideHexChar = false;
+      } else if ( mHaveAccu ) {
+	// output the high nibble of the accumulator:
+	*dcursor++ = binToHex( highNibble( mAccu ) );
+	mHaveAccu = false;
+	mAccu = 0;
+      } else {
+	// output mBadChar
+	assert( mAccu == 0 );
+	if ( mBadChar ) {
+	  if ( mBadChar >= '>' && mBadChar <= '~' ||
+	       mBadChar >= '!' && mBadChar <= '<' )
+	    *dcursor++ = mBadChar;
+	  mBadChar = 0;
+	}
+	mFlushing = false;
+      }
+      continue;
+    }
+    assert( mBadChar == 0 );
+
+    uchar ch = *scursor++;
+    uchar value = 255;
+
+    if ( mExpectLF && ch != '\n' ) {
+      kdWarning() << "QuotedPrintableDecoder: "
+	"illegally formed soft linebreak or lonely CR!" << endl;
+      mInsideHexChar = false;
+      mExpectLF = false;
+      assert( mAccu == 0 );
+    }
+
+    if ( mInsideHexChar ) {
+      // next char(s) represent nibble instead of itself:
+      if ( ch <= '9' ) {
+	if ( ch >= '0' ) {
+	  value = ch - '0';
+	} else {
+	  switch ( ch ) {
+	  case '\r':
+	    mExpectLF = true;
+	    break;
+	  case '\n':
+	    // soft line break, but only if mAccu is NUL.
+	    if ( !mHaveAccu ) {
+	      mExpectLF = false;
+	      mInsideHexChar = false;
+	      break;
+	    }
+	    // else fall through
+	  default:
+	    kdWarning() << "QuotedPrintableDecoder: "
+	      "illegally formed hex char! Outputting verbatim." << endl;
+	    mBadChar = ch;
+	    mFlushing = true;
+	  }
+	  continue;
+	}
+      } else { // ch > '9'
+	if ( ch <= 'F' ) {
+	  if ( ch >= 'A' ) {
+	    value = 10 + ch - 'A';
+	  } else { // [:-@]
+	    mBadChar = ch;
+	    mFlushing = true;
+	    continue;
+	  }
+	} else { // ch > 'F'
+	  if ( ch <= 'f' && ch >= 'a' ) {
+	    value = 10 + ch - 'a';
+	  } else {
+	    mBadChar = ch;
+	    mFlushing = true;
+	    continue;
+	  }
+	}
+      }
+
+      assert( value < 16 );
+      assert( mBadChar == 0 );
+      assert( !mExpectLF );
+
+      if ( mHaveAccu ) {
+	*dcursor++ = char( mAccu | value );
+	mAccu = 0;
+	mHaveAccu = false;
+	mInsideHexChar = false;
+      } else {
+	mHaveAccu = true;
+	mAccu = value << 4;
+      }
+    } else { // not mInsideHexChar
+      if ( ch <= '~' && ch >= ' ' || ch == '\t' ) {
+	if ( ch == mEscapeChar ) {
+	  mInsideHexChar = true;
+	} else if ( mQEncoding && ch == '_' ) {
+	  *dcursor++ = char(0x20);
+	} else {
+	  *dcursor++ = char(ch);
+	}
+      } else if ( ch == '\n' ) {
+	*dcursor++ = '\n';
+	mExpectLF = false;
+      } else if ( ch == '\r' ) {
+	mExpectLF = true;
+      } else {
+	kdWarning() << "QuotedPrintableDecoder: " << ch <<
+	  " illegal character in input stream! Ignoring." << endl;
+      }
+    }
+  }
+
+  return (scursor == send);
+}
+
+bool QuotedPrintableEncoder::fillInputBuffer( const char* & scursor,
+					      const char * const send ) {
+  // Don't read more if there's still a tail of a line in the buffer:
+  if ( mSawLineEnd )
+    return true;
+
+  // Read until the buffer is full or we have found CRLF or LF (which
+  // don't end up in the input buffer):
+  for ( ; ( mInputBufferWriteCursor + 1 ) % 16 != mInputBufferReadCursor
+	  && scursor != send ; mInputBufferWriteCursor++ ) {
+    char ch = *scursor++;
+    if ( ch == '\r' ) {
+      mSawCR = true;
+    } else if ( ch == '\n' ) {
+      // remove the CR from the input buffer (if any) and return that
+      // we found a line ending:
+      if ( mSawCR ) {
+	mSawCR = false;
+	assert( mInputBufferWriteCursor != mInputBufferReadCursor );
+	mInputBufferWriteCursor--;
+      }
+      mSawLineEnd = true;
+      return true; // saw CRLF or LF
+    } else {
+      mSawCR = false;
+    }
+    mInputBuffer[ mInputBufferWriteCursor ] = ch;
+  }
+  mSawLineEnd = false;
+  return false; // didn't see a line ending...
+}
+
+bool QuotedPrintableEncoder::processNextChar() {
+
+  // If we process a buffer which doesn't end in a line break, we
+  // can't process all of it, since the next chars that will be read
+  // could be a line break. So we empty the buffer only until a fixed
+  // number of chars is left (except when mFinishing, which means that
+  // the data doesn't end in newline):
+  const int minBufferFillWithoutLineEnd = 4;
+
+  assert( mOutputBufferCursor == 0 );
+
+  int bufferFill = int(mInputBufferWriteCursor) - int(mInputBufferReadCursor) ;
+  if ( bufferFill < 0 )
+    bufferFill += 16;
+
+  assert( bufferFill >=0 && bufferFill <= 15 );
+
+  if ( !mFinishing && !mSawLineEnd &&
+       bufferFill < minBufferFillWithoutLineEnd )
+    return false;
+
+  // buffer is empty, return false:
+  if ( mInputBufferReadCursor == mInputBufferWriteCursor )
+    return false;
+
+  // Real processing goes here:
+  mAccu = mInputBuffer[ mInputBufferReadCursor++ ];
+  if ( needsEncoding( mAccu ) ) // always needs encoding or
+    mAccuNeedsEncoding = Definitely;
+  else if ( ( mSawLineEnd || mFinishing )  // needs encoding at end of line
+	    && bufferFill == 1             // or end of buffer
+	    && needsEncodingAtEOL( mAccu ) )
+    mAccuNeedsEncoding = Definitely;
+  else if ( needsEncodingAtBOL( mAccu ) )
+    mAccuNeedsEncoding = AtBOL;
+  else
+    // never needs encoding
+    mAccuNeedsEncoding = Never;
+
+  return true;
+}
+
+// Outputs processed (verbatim or hex-encoded) chars and inserts soft
+// line breaks as necessary. Depends on processNextChar's directions
+// on whether or not to encode the current char, and whether or not
+// the current char is the last one in it's input line:
+void QuotedPrintableEncoder::createOutputBuffer( char* & dcursor,
+						 const char * const dend )
+{
+  const int maxLineLength = 76; // rfc 2045
+
+  assert( mOutputBufferCursor == 0 );
+
+  bool lastOneOnThisLine = mSawLineEnd
+    && mInputBufferReadCursor == mInputBufferWriteCursor;
+
+  int neededSpace = 1;
+  if ( mAccuNeedsEncoding == Definitely)
+    neededSpace = 3;
+
+  // reserve space for the soft hyphen (=)
+  if ( !lastOneOnThisLine )
+    neededSpace++;
+
+  if ( mCurrentLineLength > maxLineLength - neededSpace ) {
+    // current line too short, insert soft line break:
+    write( '=', dcursor, dend );
+    writeCRLF( dcursor, dend );
+    mCurrentLineLength = 0;
+  }
+
+  if ( Never == mAccuNeedsEncoding ||
+       AtBOL == mAccuNeedsEncoding && mCurrentLineLength != 0 ) {
+    write( mAccu, dcursor, dend );
+    mCurrentLineLength++;
+  } else {
+    write( '=', dcursor, dend );
+    write( binToHex( highNibble( mAccu ) ), dcursor, dend );
+    write( binToHex( lowNibble( mAccu ) ), dcursor, dend );
+    mCurrentLineLength += 3;
+  }
+}
+
+
+bool QuotedPrintableEncoder::encode( const char* & scursor, const char * const send,
+				     char* & dcursor, const char * const dend )
+{
+  // support probing by the caller:
+  if ( mFinishing ) return true;
+
+  while ( scursor != send && dcursor != dend ) {
+    if ( mOutputBufferCursor && !flushOutputBuffer( dcursor, dend ) )
+      return (scursor == send);
+
+    assert( mOutputBufferCursor == 0 );
+
+    // fill input buffer until eol has been reached or until the
+    // buffer is full, whatever comes first:
+    fillInputBuffer( scursor, send );
+
+    if ( processNextChar() )
+      // there was one...
+      createOutputBuffer( dcursor, dend );
+    else if ( mSawLineEnd &&
+	      mInputBufferWriteCursor == mInputBufferReadCursor ) {
+      // load a hard line break into output buffer:
+      writeCRLF( dcursor, dend );
+      // signal fillInputBuffer() we are ready for the next line:
+      mSawLineEnd = false;
+      mCurrentLineLength = 0;
+    } else
+      // we are supposedly finished with this input block:
+      break;
+  }
+
+  // make sure we write as much as possible and don't stop _writing_
+  // just because we have no more _input_:
+  if ( mOutputBufferCursor ) flushOutputBuffer( dcursor, dend );
+
+  return (scursor == send);
+
+} // encode
+
+bool QuotedPrintableEncoder::finish( char* & dcursor,
+				     const char * const dend ) {
+  mFinishing = true;
+
+  if ( mFinished )
+    return flushOutputBuffer( dcursor, dend );
+
+  while ( dcursor != dend ) {
+    if ( mOutputBufferCursor && !flushOutputBuffer( dcursor, dend ) )
+      return false;
+
+    assert( mOutputBufferCursor == 0 );
+
+    if ( processNextChar() )
+      // there was one...
+      createOutputBuffer( dcursor, dend );
+    else if ( mSawLineEnd &&
+	      mInputBufferWriteCursor == mInputBufferReadCursor ) {
+      // load a hard line break into output buffer:
+      writeCRLF( dcursor, dend );
+      mSawLineEnd = false;
+      mCurrentLineLength = 0;
+    } else {
+      mFinished = true;
+      return flushOutputBuffer( dcursor, dend );
+    }
+  }
+
+  return mFinished && !mOutputBufferCursor;
+
+} // finish
+
+
+bool Rfc2047QEncodingEncoder::encode( const char* & scursor, const char * const send,
+				      char* & dcursor, const char * const dend )
+{
+  if ( mInsideFinishing ) return true;
+
+  while ( scursor != send && dcursor != dend ) {
+    uchar value;
+    switch ( mStepNo ) {
+    case 0:
+      // read the next char and decide if and how do encode:
+      mAccu = *scursor++;
+      if ( !needsEncoding( mAccu ) ) {
+	*dcursor++ = char(mAccu);
+      } else if ( mEscapeChar == '=' && mAccu == 0x20 ) {
+	// shortcut encoding for 0x20 (latin-1/us-ascii SPACE)
+	// (not for rfc2231 encoding)
+	*dcursor++ = '_';
+      } else {
+	// needs =XY encoding - write escape char:
+	*dcursor++ = mEscapeChar;
+	mStepNo = 1;
+      }
+      continue;
+    case 1:
+      // extract hi-nibble:
+      value = highNibble(mAccu);
+      mStepNo = 2;
+      break;
+    case 2:
+      // extract lo-nibble:
+      value = lowNibble(mAccu);
+      mStepNo = 0;
+      break;
+    default: assert( 0 );
+    }
+
+    // and write:
+    *dcursor++ = binToHex( value );
+  }
+
+  return (scursor == send);
+} // encode
+
+#include <qstring.h>
+
+bool Rfc2047QEncodingEncoder::finish( char* & dcursor, const char * const dend ) {
+  mInsideFinishing = true;
+
+  // write the last bits of mAccu, if any:
+  while ( mStepNo != 0 && dcursor != dend ) {
+    uchar value;
+    switch ( mStepNo ) {
+    case 1:
+      // extract hi-nibble:
+      value = highNibble(mAccu);
+      mStepNo = 2;
+      break;
+    case 2:
+      // extract lo-nibble:
+      value = lowNibble(mAccu);
+      mStepNo = 0;
+      break;
+    default: assert( 0 );
+    }
+
+    // and write:
+    *dcursor++ = binToHex( value );
+  }
+
+  return mStepNo == 0;
+}
+
+
+
+
+} // namespace KMime
author	toma <toma@283d02a7-25f6-0310-bc7c-ecb5cbfe19da>	2009-11-25 17:56:58 +0000
committer	toma <toma@283d02a7-25f6-0310-bc7c-ecb5cbfe19da>	2009-11-25 17:56:58 +0000
commit	460c52653ab0dcca6f19a4f492ed2c5e4e963ab0 (patch)
tree	67208f7c145782a7e90b123b982ca78d88cc2c87 /libkmime/kmime_codec_qp.cpp
download	tdepim-460c52653ab0dcca6f19a4f492ed2c5e4e963ab0.tar.gz tdepim-460c52653ab0dcca6f19a4f492ed2c5e4e963ab0.zip