diff options
author | tpearson <tpearson@283d02a7-25f6-0310-bc7c-ecb5cbfe19da> | 2010-02-15 18:23:18 +0000 |
---|---|---|
committer | tpearson <tpearson@283d02a7-25f6-0310-bc7c-ecb5cbfe19da> | 2010-02-15 18:23:18 +0000 |
commit | 3133bbc63a2d32dac638db58fa13e966488e88b5 (patch) | |
tree | 326595b5fefc87fd7cf5ab3905bc00883fe3c919 /lib/libchmfile/libchmfileimpl.h | |
download | kchmviewer-3133bbc63a2d32dac638db58fa13e966488e88b5.tar.gz kchmviewer-3133bbc63a2d32dac638db58fa13e966488e88b5.zip |
Added abandoned KDE3 version of kchmviewer
git-svn-id: svn://anonsvn.kde.org/home/kde/branches/trinity/applications/kchmviewer@1090662 283d02a7-25f6-0310-bc7c-ecb5cbfe19da
Diffstat (limited to 'lib/libchmfile/libchmfileimpl.h')
-rw-r--r-- | lib/libchmfile/libchmfileimpl.h | 286 |
1 files changed, 286 insertions, 0 deletions
diff --git a/lib/libchmfile/libchmfileimpl.h b/lib/libchmfile/libchmfileimpl.h new file mode 100644 index 0000000..b609b16 --- /dev/null +++ b/lib/libchmfile/libchmfileimpl.h @@ -0,0 +1,286 @@ +/*************************************************************************** + * Copyright (C) 2004-2007 by Georgy Yunaev, gyunaev@ulduzsoft.com * + * Portions Copyright (C) 2003 Razvan Cojocaru <razvanco@gmx.net> * + * Please do not use email address above for bug reports; see * + * the README file * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * + ***************************************************************************/ + +#include "chm_lib.h" + +#include "libchmfile.h" +#include "libchmtocimage.h" + +#include <sys/types.h> /* for u_int{32,64}_t */ + +//! Keeps the intermediate search result +class LCHMSearchProgressResult +{ + public: + inline LCHMSearchProgressResult() {} + inline LCHMSearchProgressResult( u_int32_t t, u_int32_t u ) : titleoff(t),urloff(u) {} + + QValueVector<u_int64_t> offsets; + u_int32_t titleoff; + u_int32_t urloff; +}; + +//! An array to keeps the intermediate search results +typedef QT34VECTOR<LCHMSearchProgressResult> LCHMSearchProgressResults; + + +//! CHM files processor; the implementation +class LCHMFileImpl +{ + public: + LCHMFileImpl(); + ~LCHMFileImpl(); + + // Implementations for LCHMFile members + bool loadFile( const QString& archiveName ); + void closeAll(); + + QString title() const { return encodeWithCurrentCodec( m_title ); } + QString homeUrl() const { return encodeWithCurrentCodec( m_home ); } + + bool getFileContentAsString( QString * str, const QString& url, bool internal_encoding = false ); + bool getFileContentAsBinary( QByteArray * data, const QString& url ) const; + bool getFileSize( unsigned int * size, const QString& url ); + + bool enumerateFiles( QStringList * files ); + QString getTopicByUrl ( const QString& url ) const; + + const QPixmap * getBookIconPixmap( unsigned int imagenum ); + + bool setCurrentEncoding( const LCHMTextEncoding * encoding ); + + //! Parse the HHC or HHS file, and fill the context (asIndex is false) or index (asIndex is true) array. + bool parseFileAndFillArray (const QString& file, QT34VECTOR< LCHMParsedEntry > * data, bool asIndex ); + + /*! + * \brief Fast search using the $FIftiMain file in the .chm. + * \param text The text we're looking for. + * \param wholeWords Are we looking for whole words only? + * \param titlesOnly Are we looking for titles only? + * \param results A string-string hashmap that will hold + * the results in case of successful search. The keys are + * the URLs and the values are the page titles. + * \param phrase_search Indicates that word offset information should be kept. + * \return true if the search found something, false otherwise. + */ + bool searchWord( const QString& word, + bool wholeWords, + bool titlesOnly, + LCHMSearchProgressResults& results, + bool phrase_search ); + + /*! + * \brief Finalize the search, resolve the matches, the and generate the results array. + * \param tempres Temporary search results from SearchWord. + * \param results A string-string hashmap that will hold the results in case of successful search. + * The keys are the URLs and the values are the page titles. + */ + void getSearchResults( const LCHMSearchProgressResults& tempres, + QStringList * results, + unsigned int limit_results = 500 ); + + //! Looks up fileName in the archive. + bool ResolveObject( const QString& fileName, chmUnitInfo *ui ) const; + + //! Retrieves an uncompressed chunk of a file in the .chm. + size_t RetrieveObject(const chmUnitInfo *ui, unsigned char *buffer, LONGUINT64 fileOffset, LONGINT64 bufferSize) const; + + //! Encode the string with the currently selected text codec, if possible. Or return as-is, if not. + inline QString encodeWithCurrentCodec (const QString& str) const + { + return (m_textCodec ? m_textCodec->toUnicode (str) : str); + } + + //! Encode the string with the currently selected text codec, if possible. Or return as-is, if not. + inline QString encodeWithCurrentCodec (const char * str) const + { + return (m_textCodec ? m_textCodec->toUnicode (str) : (QString) str); + } + + //! Encode the string from internal files with the currently selected text codec, if possible. + //! Or return as-is, if not. + inline QString encodeInternalWithCurrentCodec (const QString& str) const + { + return (m_textCodecForSpecialFiles ? m_textCodecForSpecialFiles->toUnicode (str) : str); + } + + //! Encode the string from internal files with the currently selected text codec, if possible. + //! Or return as-is, if not. + inline QString encodeInternalWithCurrentCodec (const char * str) const + { + return (m_textCodecForSpecialFiles ? m_textCodecForSpecialFiles->toUnicode (str) : (QString) str); + } + + //! Helper. Translates from Win32 encodings to generic wxWidgets ones. + const char * GetFontEncFromCharSet (const QString& font) const; + + //! Helper. Returns the $FIftiMain offset of leaf node or 0. + u_int32_t GetLeafNodeOffset(const QString& text, + u_int32_t initalOffset, + u_int32_t buffSize, + u_int16_t treeDepth ); + + //! Helper. Processes the word location code entries while searching. + bool ProcessWLC(u_int64_t wlc_count, + u_int64_t wlc_size, + u_int32_t wlc_offset, + unsigned char ds, + unsigned char dr, + unsigned char cs, + unsigned char cr, + unsigned char ls, + unsigned char lr, + LCHMSearchProgressResults& results, + bool phrase_search ); + + //! Looks up as much information as possible from #WINDOWS/#STRINGS. + bool getInfoFromWindows(); + + //! Looks up as much information as possible from #SYSTEM. + bool getInfoFromSystem(); + + //! Fill the topic-url map + void fillTopicsUrlMap(); + + //! Sets up textCodec + void setupTextCodec (const char * name); + + //! Guess used text encoding, using m_detectedLCID and m_font. Set up m_textCodec + bool guessTextEncoding (); + + //! Change the current CHM encoding for internal files and texts. + //! Encoding could be either simple Qt codepage, or set like CP1251/KOI8, which allows to + //! set up encodings separately for text (first) and internal files (second) + bool changeFileEncoding( const char *qtencoding ); + + //! Convert the word, so it has an appropriate encoding + QCString convertSearchWord ( const QString &src ); + + /*! + * Helper procedure in TOC parsing, decodes the string between the quotes (first or last) with decoding HTML + * entities like í + */ + int findStringInQuotes (const QString& tag, int offset, QString& value, bool firstquote, bool decodeentities ); + + /*! + * Decodes Unicode HTML entities according to current encoding. + */ + QString decodeEntity (const QString& entity ); + + /*! + * \brief Returns the list of all available text encodings. + * \return A pointer to the beginning of the text encoding table. The table could be + * enumerated until language == 0, which means end of table. + * + * \ingroup encoding + */ + static const LCHMTextEncoding * getTextEncodingTable(); + + /*! + * \brief Looks up for encoding by LCID + * \param lcid LCID to look up + * \return A pointer to encoding structure. + * + * \ingroup encoding + */ + static const LCHMTextEncoding * lookupByLCID( short lcid ); + + /*! + * \brief Get the encoding index + * \param enc Encoding + * \return An index in encoding table. getTextEncodingTable() + i gets the encoding. + * + * \ingroup encoding + */ + static int getEncodingIndex( const LCHMTextEncoding * enc); + + /*! + * Normalizes path to search in internal arrays + */ + QString normalizeUrl (const QString& path ) const; + + + // Members + + //! Pointer to the chmlib structure + chmFile * m_chmFile; + + //! Opened file name + QString m_filename; + + //! Home url, got from CHM file + QString m_home; + + //! Context tree filename. Got from CHM file + QString m_topicsFile; + + //! Index filename. Got from CHM file + QString m_indexFile; + + //! Chm Title. Got from CHM file + QString m_title; + + // Localization stuff + //! LCID from CHM file, used in encoding detection + short m_detectedLCID; + + //! font charset from CHM file, used in encoding detection + QString m_font; + + //! Chosen text codec + QTextCodec * m_textCodec; + QTextCodec * m_textCodecForSpecialFiles; + + //! Current encoding + const LCHMTextEncoding * m_currentEncoding; + + //! Map to decode HTML entitles like ´ based on current encoding + QMap<QString, QString> m_entityDecodeMap; + + //! TRUE if /#TOPICS, /#STRINGS, /#URLTBL and /#URLSTR are resolved, and the members below are valid + bool m_lookupTablesValid; + + //! pointer to /#TOPICS + chmUnitInfo m_chmTOPICS; + + //! pointer to /#STRINGS + chmUnitInfo m_chmSTRINGS; + + //! pointer to /#URLTBL + chmUnitInfo m_chmURLTBL; + + //! pointer to /#URLSTR + chmUnitInfo m_chmURLSTR; + + //! Indicates whether the built-in search is available. This is true only when m_lookupTablesValid + //! is TRUE, and m_chmFIftiMain is resolved. + bool m_searchAvailable; + + //! pointer to /$FIftiMain + chmUnitInfo m_chmFIftiMain; + + //! Book TOC icon images storage + LCHMTocImageKeeper m_imagesKeeper; + + //! Map url->topic + QMap< QString, QString > m_url2topics; +}; |