/*
    This file was taken from the KDE 4.x libraries and backported to TQt 3.

    Copyright (C) 1999 Lars Knoll (knoll@mpi-hd.mpg.de)
    Copyright (C) 2007 Nick Shaforostoff (shafff@ukr.net)

    This library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Library General Public
    License as published by the Free Software Foundation; either
    version 2 of the License, or (at your option) any later version.

    This library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Library General Public License for more details.

    You should have received a copy of the GNU Library General Public License
    along with this library; see the file COPYING.LIB.  If not, write to
    the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
    Boston, MA 02110-1301, USA.

*/
#ifndef ENCODINGDETECTOR_H
#define ENCODINGDETECTOR_H

#include <tqstring.h>

class TQTextCodec;
class TQTextDecoder;
class EncodingDetectorPrivate;

/**
 * @short Provides encoding detection capabilities.
 *
 * Searches for encoding declaration inside raw data -- meta and xml tags. 
 * In the case it can't find it, uses heuristics for specified language.
 *
 * If it finds unicode BOM marks, it changes encoding regardless of what the user has told
 *
 * Intended lifetime of the object: one instance per document.
 *
 * Typical use:
 * \code
 * TQByteArray data;
 * ...
 * EncodingDetector detector;
 * detector.setAutoDetectLanguage(EncodingDetector::Cyrillic);
 * TQString out=detector.decode(data);
 * \endcode
 *
 *
 * Do not mix decode() with decodeWithBuffering()
 *
 * @short Guess encoding of char array
 *
 */
class EncodingDetector
{
public:
    enum EncodingChoiceSource
    {
        DefaultEncoding,
        AutoDetectedEncoding,
        BOM,
        EncodingFromXMLHeader,
        EncodingFromMetaTag,
        EncodingFromHTTPHeader,
        UserChosenEncoding
    };

    enum AutoDetectScript
    {
        None,
        SemiautomaticDetection,
        Arabic,
        Baltic,
        CentralEuropean,
        ChineseSimplified,
        ChineseTraditional,
        Cyrillic,
        Greek,
        Hebrew,
        Japanese,
        Korean,
        NorthernSaami,
        SouthEasternEurope,
        Thai,
        Turkish,
        Unicode,
        WesternEuropean
    };

    /**
     * Default codec is latin1 (as html spec says), EncodingChoiceSource is default, AutoDetectScript=Semiautomatic
     */
    EncodingDetector();

    /**
     * Allows to set Default codec, EncodingChoiceSource, AutoDetectScript
     */
    EncodingDetector(TQTextCodec* codec, EncodingChoiceSource source, AutoDetectScript script=None);
    ~EncodingDetector();

    //const TQTextCodec* codec() const;

    /**
    * @returns true if specified encoding was recognized
    */
    bool setEncoding(const char *encoding, EncodingChoiceSource type);

    /**
    * Convenience method.
    * @returns mime name of detected encoding
    */
    const char* encoding() const;

    bool visuallyOrdered() const;

//     void setAutoDetectLanguage( const TQString& );
//     const TQString& autoDetectLanguage() const;

    void setAutoDetectLanguage( AutoDetectScript );
    AutoDetectScript autoDetectLanguage() const;

    EncodingChoiceSource encodingChoiceSource() const;

    /**
    * Analyze text data.
    * @returns true if there was enough data for accurate detection
    */
    bool analyze( const char *data, int len );

    /**
    * Analyze text data.
    * @returns true if there was enough data for accurate detection
    */
    bool analyze( const TQByteArray &data );

    /**
     * Takes lang name _after_ it were i18n()'ed
     */
    static AutoDetectScript scriptForName(const TQString& lang);
    static TQString nameForScript(AutoDetectScript);
    static AutoDetectScript scriptForLanguageCode(const TQString &lang);
    static bool hasAutoDetectionForScript(AutoDetectScript);

protected:
    /**
     * Check if we are really utf8. Taken from kate
     *
     * @returns true if current encoding is utf8 and the text cannot be in this encoding
     *
     * Please somebody read http://de.wikipedia.org/wiki/UTF-8 and check this code...
     */
    bool errorsIfUtf8 (const char* data, int length);

    /**
    * @returns TQTextDecoder for detected encoding
    */
    TQTextDecoder* decoder();

private:
    EncodingDetectorPrivate* const d;
};

#endif