/*************************************************************************** * Copyright (C) 2004-2007 by Georgy Yunaev, gyunaev@ulduzsoft.com * * Portions Copyright (C) 2003 Razvan Cojocaru * * Please do not use email address above for bug reports; see * * the README file * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the * * Free Software Foundation, Inc., * * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * ***************************************************************************/ #include "chm_lib.h" #include "libchmfile.h" #include "libchmtocimage.h" #include /* for u_int{32,64}_t */ //! Keeps the intermediate search result class LCHMSearchProgressResult { public: inline LCHMSearchProgressResult() {} inline LCHMSearchProgressResult( u_int32_t t, u_int32_t u ) : titleoff(t),urloff(u) {} TQValueVector offsets; u_int32_t titleoff; u_int32_t urloff; }; //! An array to keeps the intermediate search results typedef QT34VECTOR LCHMSearchProgressResults; //! CHM files processor; the implementation class LCHMFileImpl { public: LCHMFileImpl(); ~LCHMFileImpl(); // Implementations for LCHMFile members bool loadFile( const TQString& archiveName ); void closeAll(); TQString title() const { return encodeWithCurrentCodec( m_title ); } TQString homeUrl() const { return encodeWithCurrentCodec( m_home ); } bool getFileContentAsString( TQString * str, const TQString& url, bool internal_encoding = false ); bool getFileContentAsBinary( TQByteArray * data, const TQString& url ) const; bool getFileSize( unsigned int * size, const TQString& url ); bool enumerateFiles( TQStringList * files ); TQString getTopicByUrl ( const TQString& url ) const; const TQPixmap * getBookIconPixmap( unsigned int imagenum ); bool setCurrentEncoding( const LCHMTextEncoding * encoding ); //! Parse the HHC or HHS file, and fill the context (asIndex is false) or index (asIndex is true) array. bool parseFileAndFillArray (const TQString& file, QT34VECTOR< LCHMParsedEntry > * data, bool asIndex ); /*! * \brief Fast search using the $FIftiMain file in the .chm. * \param text The text we're looking for. * \param wholeWords Are we looking for whole words only? * \param titlesOnly Are we looking for titles only? * \param results A string-string hashmap that will hold * the results in case of successful search. The keys are * the URLs and the values are the page titles. * \param phrase_search Indicates that word offset information should be kept. * \return true if the search found something, false otherwise. */ bool searchWord( const TQString& word, bool wholeWords, bool titlesOnly, LCHMSearchProgressResults& results, bool phrase_search ); /*! * \brief Finalize the search, resolve the matches, the and generate the results array. * \param tempres Temporary search results from SearchWord. * \param results A string-string hashmap that will hold the results in case of successful search. * The keys are the URLs and the values are the page titles. */ void getSearchResults( const LCHMSearchProgressResults& tempres, TQStringList * results, unsigned int limit_results = 500 ); //! Looks up fileName in the archive. bool ResolveObject( const TQString& fileName, chmUnitInfo *ui ) const; //! Retrieves an uncompressed chunk of a file in the .chm. size_t RetrieveObject(const chmUnitInfo *ui, unsigned char *buffer, LONGUINT64 fileOffset, LONGINT64 bufferSize) const; //! Encode the string with the currently selected text codec, if possible. Or return as-is, if not. inline TQString encodeWithCurrentCodec (const TQString& str) const { return (m_textCodec ? m_textCodec->toUnicode (str) : str); } //! Encode the string with the currently selected text codec, if possible. Or return as-is, if not. inline TQString encodeWithCurrentCodec (const char * str) const { return (m_textCodec ? m_textCodec->toUnicode (str) : (TQString) str); } //! Encode the string from internal files with the currently selected text codec, if possible. //! Or return as-is, if not. inline TQString encodeInternalWithCurrentCodec (const TQString& str) const { return (m_textCodecForSpecialFiles ? m_textCodecForSpecialFiles->toUnicode (str) : str); } //! Encode the string from internal files with the currently selected text codec, if possible. //! Or return as-is, if not. inline TQString encodeInternalWithCurrentCodec (const char * str) const { return (m_textCodecForSpecialFiles ? m_textCodecForSpecialFiles->toUnicode (str) : (TQString) str); } //! Helper. Translates from Win32 encodings to generic wxWidgets ones. const char * GetFontEncFromCharSet (const TQString& font) const; //! Helper. Returns the $FIftiMain offset of leaf node or 0. u_int32_t GetLeafNodeOffset(const TQString& text, u_int32_t initalOffset, u_int32_t buffSize, u_int16_t treeDepth ); //! Helper. Processes the word location code entries while searching. bool ProcessWLC(u_int64_t wlc_count, u_int64_t wlc_size, u_int32_t wlc_offset, unsigned char ds, unsigned char dr, unsigned char cs, unsigned char cr, unsigned char ls, unsigned char lr, LCHMSearchProgressResults& results, bool phrase_search ); //! Looks up as much information as possible from #WINDOWS/#STRINGS. bool getInfoFromWindows(); //! Looks up as much information as possible from #SYSTEM. bool getInfoFromSystem(); //! Fill the topic-url map void fillTopicsUrlMap(); //! Sets up textCodec void setupTextCodec (const char * name); //! Guess used text encoding, using m_detectedLCID and m_font. Set up m_textCodec bool guessTextEncoding (); //! Change the current CHM encoding for internal files and texts. //! Encoding could be either simple TQt codepage, or set like CP1251/KOI8, which allows to //! set up encodings separately for text (first) and internal files (second) bool changeFileEncoding( const char *qtencoding ); //! Convert the word, so it has an appropriate encoding TQCString convertSearchWord ( const TQString &src ); /*! * Helper procedure in TOC parsing, decodes the string between the quotes (first or last) with decoding HTML * entities like í */ int findStringInQuotes (const TQString& tag, int offset, TQString& value, bool firstquote, bool decodeentities ); /*! * Decodes Unicode HTML entities according to current encoding. */ TQString decodeEntity (const TQString& entity ); /*! * \brief Returns the list of all available text encodings. * \return A pointer to the beginning of the text encoding table. The table could be * enumerated until language == 0, which means end of table. * * \ingroup encoding */ static const LCHMTextEncoding * getTextEncodingTable(); /*! * \brief Looks up for encoding by LCID * \param lcid LCID to look up * \return A pointer to encoding structure. * * \ingroup encoding */ static const LCHMTextEncoding * lookupByLCID( short lcid ); /*! * \brief Get the encoding index * \param enc Encoding * \return An index in encoding table. getTextEncodingTable() + i gets the encoding. * * \ingroup encoding */ static int getEncodingIndex( const LCHMTextEncoding * enc); /*! * Normalizes path to search in internal arrays */ TQString normalizeUrl (const TQString& path ) const; // Members //! Pointer to the chmlib structure chmFile * m_chmFile; //! Opened file name TQString m_filename; //! Home url, got from CHM file TQString m_home; //! Context tree filename. Got from CHM file TQString m_topicsFile; //! Index filename. Got from CHM file TQString m_indexFile; //! Chm Title. Got from CHM file TQString m_title; // Localization stuff //! LCID from CHM file, used in encoding detection short m_detectedLCID; //! font charset from CHM file, used in encoding detection TQString m_font; //! Chosen text codec TQTextCodec * m_textCodec; TQTextCodec * m_textCodecForSpecialFiles; //! Current encoding const LCHMTextEncoding * m_currentEncoding; //! Map to decode HTML entitles like ´ based on current encoding TQMap m_entityDecodeMap; //! TRUE if /#TOPICS, /#STRINGS, /#URLTBL and /#URLSTR are resolved, and the members below are valid bool m_lookupTablesValid; //! pointer to /#TOPICS chmUnitInfo m_chmTOPICS; //! pointer to /#STRINGS chmUnitInfo m_chmSTRINGS; //! pointer to /#URLTBL chmUnitInfo m_chmURLTBL; //! pointer to /#URLSTR chmUnitInfo m_chmURLSTR; //! Indicates whether the built-in search is available. This is true only when m_lookupTablesValid //! is TRUE, and m_chmFIftiMain is resolved. bool m_searchAvailable; //! pointer to /$FIftiMain chmUnitInfo m_chmFIftiMain; //! Book TOC icon images storage LCHMTocImageKeeper m_imagesKeeper; //! Map url->topic TQMap< TQString, TQString > m_url2topics; };