summaryrefslogtreecommitdiffstats
path: root/lib/chmlib/chm_lib.c
diff options
context:
space:
mode:
Diffstat (limited to 'lib/chmlib/chm_lib.c')
-rw-r--r--lib/chmlib/chm_lib.c1917
1 files changed, 1917 insertions, 0 deletions
diff --git a/lib/chmlib/chm_lib.c b/lib/chmlib/chm_lib.c
new file mode 100644
index 0000000..e7da0b0
--- /dev/null
+++ b/lib/chmlib/chm_lib.c
@@ -0,0 +1,1917 @@
+/* $Id: chm_lib.c 148 2007-03-07 09:22:36Z gyunaev $ */
+/***************************************************************************
+ * chm_lib.c - CHM archive manipulation routines *
+ * ------------------- *
+ * *
+ * author: Jed Wing <jedwin@ugcs.caltech.edu> *
+ * version: 0.3 *
+ * notes: These routines are meant for the manipulation of microsoft *
+ * .chm (compiled html help) files, but may likely be used *
+ * for the manipulation of any ITSS archive, if ever ITSS *
+ * archives are used for any other purpose. *
+ * *
+ * Note also that the section names are statically handled. *
+ * To be entirely correct, the section names should be read *
+ * from the section names meta-file, and then the various *
+ * content sections and the "transforms" to apply to the data *
+ * they contain should be inferred from the section name and *
+ * the meta-files referenced using that name; however, all of *
+ * the files I've been able to get my hands on appear to have *
+ * only two sections: Uncompressed and MSCompressed. *
+ * Additionally, the ITSS.DLL file included with Windows does *
+ * not appear to handle any different transforms than the *
+ * simple LZX-transform. Furthermore, the list of transforms *
+ * to apply is broken, in that only half the required space *
+ * is allocated for the list. (It appears as though the *
+ * space is allocated for ASCII strings, but the strings are *
+ * written as unicode. As a result, only the first half of *
+ * the string appears.) So this is probably not too big of *
+ * a deal, at least until CHM v4 (MS .lit files), which also *
+ * incorporate encryption, of some description. *
+ * *
+ * switches: CHM_MT: compile library with thread-safety *
+ * *
+ * switches (Linux only): *
+ * CHM_USE_PREAD: compile library to use pread instead of *
+ * lseek/read *
+ * CHM_USE_IO64: compile library to support full 64-bit I/O *
+ * as is needed to properly deal with the *
+ * 64-bit file offsets. *
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of the GNU Lesser General Public License as *
+ * published by the Free Software Foundation; either version 2.1 of the *
+ * License, or (at your option) any later version. *
+ * *
+ ***************************************************************************/
+
+#include "chm_lib.h"
+
+#ifdef CHM_MT
+#define _REENTRANT
+#endif
+
+#include "lzx.h"
+
+#include <stdlib.h>
+#include <string.h>
+#ifdef CHM_DEBUG
+#include <stdio.h>
+#endif
+
+#if __sun || __sgi
+#include <strings.h>
+#endif
+
+#ifdef WIN32
+#include <windows.h>
+#include <malloc.h>
+#ifdef _WIN32_WCE
+#define strcasecmp _stricmp
+#define strncasecmp _strnicmp
+#else
+#define strcasecmp stricmp
+#define strncasecmp strnicmp
+#endif
+#else
+/* basic Linux system includes */
+#define _XOPEN_SOURCE 500
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+/* #include <dmalloc.h> */
+#endif
+
+/* includes/defines for threading, if using them */
+#ifdef CHM_MT
+#ifdef WIN32
+#define CHM_ACQUIRE_LOCK(a) do { \
+ EnterCriticalSection(&(a)); \
+ } while(0)
+#define CHM_RELEASE_LOCK(a) do { \
+ EnterCriticalSection(&(a)); \
+ } while(0)
+
+#else
+#include <pthread.h>
+
+#define CHM_ACQUIRE_LOCK(a) do { \
+ pthread_mutex_lock(&(a)); \
+ } while(0)
+#define CHM_RELEASE_LOCK(a) do { \
+ pthread_mutex_unlock(&(a)); \
+ } while(0)
+
+#endif
+#else
+#define CHM_ACQUIRE_LOCK(a) /* do nothing */
+#define CHM_RELEASE_LOCK(a) /* do nothing */
+#endif
+
+#ifdef WIN32
+#define CHM_NULL_FD (INVALID_HANDLE_VALUE)
+#define CHM_USE_WIN32IO 1
+#define CHM_CLOSE_FILE(fd) CloseHandle((fd))
+#else
+#define CHM_NULL_FD (-1)
+#define CHM_CLOSE_FILE(fd) close((fd))
+#endif
+
+/*
+ * defines related to tuning
+ */
+#ifndef CHM_MAX_BLOCKS_CACHED
+#define CHM_MAX_BLOCKS_CACHED 5
+#endif
+
+/*
+ * architecture specific defines
+ *
+ * Note: as soon as C99 is more widespread, the below defines should
+ * probably just use the C99 sized-int types.
+ *
+ * The following settings will probably work for many platforms. The sizes
+ * don't have to be exactly correct, but the types must accommodate at least as
+ * many bits as they specify.
+ */
+
+/* i386, 32-bit, Windows */
+#ifdef WIN32
+typedef unsigned char UChar;
+typedef __int16 Int16;
+typedef unsigned __int16 UInt16;
+typedef __int32 Int32;
+typedef unsigned __int32 UInt32;
+typedef __int64 Int64;
+typedef unsigned __int64 UInt64;
+
+/* I386, 32-bit, non-Windows */
+/* Sparc */
+/* MIPS */
+/* PPC */
+#elif __i386__ || __sun || __sgi || __ppc__
+typedef unsigned char UChar;
+typedef short Int16;
+typedef unsigned short UInt16;
+typedef long Int32;
+typedef unsigned long UInt32;
+typedef long long Int64;
+typedef unsigned long long UInt64;
+
+/* x86-64 */
+/* Note that these may be appropriate for other 64-bit machines. */
+#elif __x86_64__ || __ia64__
+typedef unsigned char UChar;
+typedef short Int16;
+typedef unsigned short UInt16;
+typedef int Int32;
+typedef unsigned int UInt32;
+typedef long Int64;
+typedef unsigned long UInt64;
+
+#else
+
+/* yielding an error is preferable to yielding incorrect behavior */
+#error "Please define the sized types for your platform in chm_lib.c"
+#endif
+
+/* GCC */
+#ifdef __GNUC__
+#define memcmp __builtin_memcmp
+#define memcpy __builtin_memcpy
+#define strlen __builtin_strlen
+
+#elif defined(WIN32)
+static int ffs(unsigned int val)
+{
+ int bit=1, idx=1;
+ while (bit != 0 && (val & bit) == 0)
+ {
+ bit <<= 1;
+ ++idx;
+ }
+ if (bit == 0)
+ return 0;
+ else
+ return idx;
+}
+
+#endif
+
+/* utilities for unmarshalling data */
+static int _unmarshal_char_array(unsigned char **pData,
+ unsigned int *pLenRemain,
+ char *dest,
+ int count)
+{
+ if (count <= 0 || (unsigned int)count > *pLenRemain)
+ return 0;
+ memcpy(dest, (*pData), count);
+ *pData += count;
+ *pLenRemain -= count;
+ return 1;
+}
+
+static int _unmarshal_uchar_array(unsigned char **pData,
+ unsigned int *pLenRemain,
+ unsigned char *dest,
+ int count)
+{
+ if (count <= 0 || (unsigned int)count > *pLenRemain)
+ return 0;
+ memcpy(dest, (*pData), count);
+ *pData += count;
+ *pLenRemain -= count;
+ return 1;
+}
+
+#if 0
+static int _unmarshal_int16(unsigned char **pData,
+ unsigned int *pLenRemain,
+ Int16 *dest)
+{
+ if (2 > *pLenRemain)
+ return 0;
+ *dest = (*pData)[0] | (*pData)[1]<<8;
+ *pData += 2;
+ *pLenRemain -= 2;
+ return 1;
+}
+
+static int _unmarshal_uint16(unsigned char **pData,
+ unsigned int *pLenRemain,
+ UInt16 *dest)
+{
+ if (2 > *pLenRemain)
+ return 0;
+ *dest = (*pData)[0] | (*pData)[1]<<8;
+ *pData += 2;
+ *pLenRemain -= 2;
+ return 1;
+}
+#endif
+
+static int _unmarshal_int32(unsigned char **pData,
+ unsigned int *pLenRemain,
+ Int32 *dest)
+{
+ if (4 > *pLenRemain)
+ return 0;
+ *dest = (*pData)[0] | (*pData)[1]<<8 | (*pData)[2]<<16 | (*pData)[3]<<24;
+ *pData += 4;
+ *pLenRemain -= 4;
+ return 1;
+}
+
+static int _unmarshal_uint32(unsigned char **pData,
+ unsigned int *pLenRemain,
+ UInt32 *dest)
+{
+ if (4 > *pLenRemain)
+ return 0;
+ *dest = (*pData)[0] | (*pData)[1]<<8 | (*pData)[2]<<16 | (*pData)[3]<<24;
+ *pData += 4;
+ *pLenRemain -= 4;
+ return 1;
+}
+
+static int _unmarshal_int64(unsigned char **pData,
+ unsigned int *pLenRemain,
+ Int64 *dest)
+{
+ Int64 temp;
+ int i;
+ if (8 > *pLenRemain)
+ return 0;
+ temp=0;
+ for(i=8; i>0; i--)
+ {
+ temp <<= 8;
+ temp |= (*pData)[i-1];
+ }
+ *dest = temp;
+ *pData += 8;
+ *pLenRemain -= 8;
+ return 1;
+}
+
+static int _unmarshal_uint64(unsigned char **pData,
+ unsigned int *pLenRemain,
+ UInt64 *dest)
+{
+ UInt64 temp;
+ int i;
+ if (8 > *pLenRemain)
+ return 0;
+ temp=0;
+ for(i=8; i>0; i--)
+ {
+ temp <<= 8;
+ temp |= (*pData)[i-1];
+ }
+ *dest = temp;
+ *pData += 8;
+ *pLenRemain -= 8;
+ return 1;
+}
+
+static int _unmarshal_uuid(unsigned char **pData,
+ unsigned int *pDataLen,
+ unsigned char *dest)
+{
+ return _unmarshal_uchar_array(pData, pDataLen, dest, 16);
+}
+
+/* names of sections essential to decompression */
+static const char _CHMU_RESET_TABLE[] =
+ "::DataSpace/Storage/MSCompressed/Transform/"
+ "{7FC28940-9D31-11D0-9B27-00A0C91E9C7C}/"
+ "InstanceData/ResetTable";
+static const char _CHMU_LZXC_CONTROLDATA[] =
+ "::DataSpace/Storage/MSCompressed/ControlData";
+static const char _CHMU_CONTENT[] =
+ "::DataSpace/Storage/MSCompressed/Content";
+static const char _CHMU_SPANINFO[] =
+ "::DataSpace/Storage/MSCompressed/SpanInfo";
+
+/*
+ * structures local to this module
+ */
+
+/* structure of ITSF headers */
+#define _CHM_ITSF_V2_LEN (0x58)
+#define _CHM_ITSF_V3_LEN (0x60)
+struct chmItsfHeader
+{
+ char signature[4]; /* 0 (ITSF) */
+ Int32 version; /* 4 */
+ Int32 header_len; /* 8 */
+ Int32 unknown_000c; /* c */
+ UInt32 last_modified; /* 10 */
+ UInt32 lang_id; /* 14 */
+ UChar dir_uuid[16]; /* 18 */
+ UChar stream_uuid[16]; /* 28 */
+ UInt64 unknown_offset; /* 38 */
+ UInt64 unknown_len; /* 40 */
+ UInt64 dir_offset; /* 48 */
+ UInt64 dir_len; /* 50 */
+ UInt64 data_offset; /* 58 (Not present before V3) */
+}; /* __attribute__ ((aligned (1))); */
+
+static int _unmarshal_itsf_header(unsigned char **pData,
+ unsigned int *pDataLen,
+ struct chmItsfHeader *dest)
+{
+ /* we only know how to deal with the 0x58 and 0x60 byte structures */
+ if (*pDataLen != _CHM_ITSF_V2_LEN && *pDataLen != _CHM_ITSF_V3_LEN)
+ return 0;
+
+ /* unmarshal common fields */
+ _unmarshal_char_array(pData, pDataLen, dest->signature, 4);
+ _unmarshal_int32 (pData, pDataLen, &dest->version);
+ _unmarshal_int32 (pData, pDataLen, &dest->header_len);
+ _unmarshal_int32 (pData, pDataLen, &dest->unknown_000c);
+ _unmarshal_uint32 (pData, pDataLen, &dest->last_modified);
+ _unmarshal_uint32 (pData, pDataLen, &dest->lang_id);
+ _unmarshal_uuid (pData, pDataLen, dest->dir_uuid);
+ _unmarshal_uuid (pData, pDataLen, dest->stream_uuid);
+ _unmarshal_uint64 (pData, pDataLen, &dest->unknown_offset);
+ _unmarshal_uint64 (pData, pDataLen, &dest->unknown_len);
+ _unmarshal_uint64 (pData, pDataLen, &dest->dir_offset);
+ _unmarshal_uint64 (pData, pDataLen, &dest->dir_len);
+
+ /* error check the data */
+ /* XXX: should also check UUIDs, probably, though with a version 3 file,
+ * current MS tools do not seem to use them.
+ */
+ if (memcmp(dest->signature, "ITSF", 4) != 0)
+ return 0;
+ if (dest->version == 2)
+ {
+ if (dest->header_len < _CHM_ITSF_V2_LEN)
+ return 0;
+ }
+ else if (dest->version == 3)
+ {
+ if (dest->header_len < _CHM_ITSF_V3_LEN)
+ return 0;
+ }
+ else
+ return 0;
+
+ /* now, if we have a V3 structure, unmarshal the rest.
+ * otherwise, compute it
+ */
+ if (dest->version == 3)
+ {
+ if (*pDataLen != 0)
+ _unmarshal_uint64(pData, pDataLen, &dest->data_offset);
+ else
+ return 0;
+ }
+ else
+ dest->data_offset = dest->dir_offset + dest->dir_len;
+
+ return 1;
+}
+
+/* structure of ITSP headers */
+#define _CHM_ITSP_V1_LEN (0x54)
+struct chmItspHeader
+{
+ char signature[4]; /* 0 (ITSP) */
+ Int32 version; /* 4 */
+ Int32 header_len; /* 8 */
+ Int32 unknown_000c; /* c */
+ UInt32 block_len; /* 10 */
+ Int32 blockidx_intvl; /* 14 */
+ Int32 index_depth; /* 18 */
+ Int32 index_root; /* 1c */
+ Int32 index_head; /* 20 */
+ Int32 unknown_0024; /* 24 */
+ UInt32 num_blocks; /* 28 */
+ Int32 unknown_002c; /* 2c */
+ UInt32 lang_id; /* 30 */
+ UChar system_uuid[16]; /* 34 */
+ UChar unknown_0044[16]; /* 44 */
+}; /* __attribute__ ((aligned (1))); */
+
+static int _unmarshal_itsp_header(unsigned char **pData,
+ unsigned int *pDataLen,
+ struct chmItspHeader *dest)
+{
+ /* we only know how to deal with a 0x54 byte structures */
+ if (*pDataLen != _CHM_ITSP_V1_LEN)
+ return 0;
+
+ /* unmarshal fields */
+ _unmarshal_char_array(pData, pDataLen, dest->signature, 4);
+ _unmarshal_int32 (pData, pDataLen, &dest->version);
+ _unmarshal_int32 (pData, pDataLen, &dest->header_len);
+ _unmarshal_int32 (pData, pDataLen, &dest->unknown_000c);
+ _unmarshal_uint32 (pData, pDataLen, &dest->block_len);
+ _unmarshal_int32 (pData, pDataLen, &dest->blockidx_intvl);
+ _unmarshal_int32 (pData, pDataLen, &dest->index_depth);
+ _unmarshal_int32 (pData, pDataLen, &dest->index_root);
+ _unmarshal_int32 (pData, pDataLen, &dest->index_head);
+ _unmarshal_int32 (pData, pDataLen, &dest->unknown_0024);
+ _unmarshal_uint32 (pData, pDataLen, &dest->num_blocks);
+ _unmarshal_int32 (pData, pDataLen, &dest->unknown_002c);
+ _unmarshal_uint32 (pData, pDataLen, &dest->lang_id);
+ _unmarshal_uuid (pData, pDataLen, dest->system_uuid);
+ _unmarshal_uchar_array(pData, pDataLen, dest->unknown_0044, 16);
+
+ /* error check the data */
+ if (memcmp(dest->signature, "ITSP", 4) != 0)
+ return 0;
+ if (dest->version != 1)
+ return 0;
+ if (dest->header_len != _CHM_ITSP_V1_LEN)
+ return 0;
+
+ return 1;
+}
+
+/* structure of PMGL headers */
+static const char _chm_pmgl_marker[4] = "PMGL";
+#define _CHM_PMGL_LEN (0x14)
+struct chmPmglHeader
+{
+ char signature[4]; /* 0 (PMGL) */
+ UInt32 free_space; /* 4 */
+ UInt32 unknown_0008; /* 8 */
+ Int32 block_prev; /* c */
+ Int32 block_next; /* 10 */
+}; /* __attribute__ ((aligned (1))); */
+
+static int _unmarshal_pmgl_header(unsigned char **pData,
+ unsigned int *pDataLen,
+ struct chmPmglHeader *dest)
+{
+ /* we only know how to deal with a 0x14 byte structures */
+ if (*pDataLen != _CHM_PMGL_LEN)
+ return 0;
+
+ /* unmarshal fields */
+ _unmarshal_char_array(pData, pDataLen, dest->signature, 4);
+ _unmarshal_uint32 (pData, pDataLen, &dest->free_space);
+ _unmarshal_uint32 (pData, pDataLen, &dest->unknown_0008);
+ _unmarshal_int32 (pData, pDataLen, &dest->block_prev);
+ _unmarshal_int32 (pData, pDataLen, &dest->block_next);
+
+ /* check structure */
+ if (memcmp(dest->signature, _chm_pmgl_marker, 4) != 0)
+ return 0;
+
+ return 1;
+}
+
+/* structure of PMGI headers */
+static const char _chm_pmgi_marker[4] = "PMGI";
+#define _CHM_PMGI_LEN (0x08)
+struct chmPmgiHeader
+{
+ char signature[4]; /* 0 (PMGI) */
+ UInt32 free_space; /* 4 */
+}; /* __attribute__ ((aligned (1))); */
+
+static int _unmarshal_pmgi_header(unsigned char **pData,
+ unsigned int *pDataLen,
+ struct chmPmgiHeader *dest)
+{
+ /* we only know how to deal with a 0x8 byte structures */
+ if (*pDataLen != _CHM_PMGI_LEN)
+ return 0;
+
+ /* unmarshal fields */
+ _unmarshal_char_array(pData, pDataLen, dest->signature, 4);
+ _unmarshal_uint32 (pData, pDataLen, &dest->free_space);
+
+ /* check structure */
+ if (memcmp(dest->signature, _chm_pmgi_marker, 4) != 0)
+ return 0;
+
+ return 1;
+}
+
+/* structure of LZXC reset table */
+#define _CHM_LZXC_RESETTABLE_V1_LEN (0x28)
+struct chmLzxcResetTable
+{
+ UInt32 version;
+ UInt32 block_count;
+ UInt32 unknown;
+ UInt32 table_offset;
+ UInt64 uncompressed_len;
+ UInt64 compressed_len;
+ UInt64 block_len;
+}; /* __attribute__ ((aligned (1))); */
+
+static int _unmarshal_lzxc_reset_table(unsigned char **pData,
+ unsigned int *pDataLen,
+ struct chmLzxcResetTable *dest)
+{
+ /* we only know how to deal with a 0x28 byte structures */
+ if (*pDataLen != _CHM_LZXC_RESETTABLE_V1_LEN)
+ return 0;
+
+ /* unmarshal fields */
+ _unmarshal_uint32 (pData, pDataLen, &dest->version);
+ _unmarshal_uint32 (pData, pDataLen, &dest->block_count);
+ _unmarshal_uint32 (pData, pDataLen, &dest->unknown);
+ _unmarshal_uint32 (pData, pDataLen, &dest->table_offset);
+ _unmarshal_uint64 (pData, pDataLen, &dest->uncompressed_len);
+ _unmarshal_uint64 (pData, pDataLen, &dest->compressed_len);
+ _unmarshal_uint64 (pData, pDataLen, &dest->block_len);
+
+ /* check structure */
+ if (dest->version != 2)
+ return 0;
+
+ return 1;
+}
+
+/* structure of LZXC control data block */
+#define _CHM_LZXC_MIN_LEN (0x18)
+#define _CHM_LZXC_V2_LEN (0x1c)
+struct chmLzxcControlData
+{
+ UInt32 size; /* 0 */
+ char signature[4]; /* 4 (LZXC) */
+ UInt32 version; /* 8 */
+ UInt32 resetInterval; /* c */
+ UInt32 windowSize; /* 10 */
+ UInt32 windowsPerReset; /* 14 */
+ UInt32 unknown_18; /* 18 */
+};
+
+static int _unmarshal_lzxc_control_data(unsigned char **pData,
+ unsigned int *pDataLen,
+ struct chmLzxcControlData *dest)
+{
+ /* we want at least 0x18 bytes */
+ if (*pDataLen < _CHM_LZXC_MIN_LEN)
+ return 0;
+
+ /* unmarshal fields */
+ _unmarshal_uint32 (pData, pDataLen, &dest->size);
+ _unmarshal_char_array(pData, pDataLen, dest->signature, 4);
+ _unmarshal_uint32 (pData, pDataLen, &dest->version);
+ _unmarshal_uint32 (pData, pDataLen, &dest->resetInterval);
+ _unmarshal_uint32 (pData, pDataLen, &dest->windowSize);
+ _unmarshal_uint32 (pData, pDataLen, &dest->windowsPerReset);
+
+ if (*pDataLen >= _CHM_LZXC_V2_LEN)
+ _unmarshal_uint32 (pData, pDataLen, &dest->unknown_18);
+ else
+ dest->unknown_18 = 0;
+
+ if (dest->version == 2)
+ {
+ dest->resetInterval *= 0x8000;
+ dest->windowSize *= 0x8000;
+ }
+ if (dest->windowSize == 0 || dest->resetInterval == 0)
+ return 0;
+
+ /* for now, only support resetInterval a multiple of windowSize/2 */
+ if (dest->windowSize == 1)
+ return 0;
+ if ((dest->resetInterval % (dest->windowSize/2)) != 0)
+ return 0;
+
+ /* check structure */
+ if (memcmp(dest->signature, "LZXC", 4) != 0)
+ return 0;
+
+ return 1;
+}
+
+/* the structure used for chm file handles */
+struct chmFile
+{
+#ifdef WIN32
+ HANDLE fd;
+#else
+ int fd;
+#endif
+
+#ifdef CHM_MT
+#ifdef WIN32
+ CRITICAL_SECTION mutex;
+ CRITICAL_SECTION lzx_mutex;
+ CRITICAL_SECTION cache_mutex;
+#else
+ pthread_mutex_t mutex;
+ pthread_mutex_t lzx_mutex;
+ pthread_mutex_t cache_mutex;
+#endif
+#endif
+
+ UInt64 dir_offset;
+ UInt64 dir_len;
+ UInt64 data_offset;
+ Int32 index_root;
+ Int32 index_head;
+ UInt32 block_len;
+
+ UInt64 span;
+ struct chmUnitInfo rt_unit;
+ struct chmUnitInfo cn_unit;
+ struct chmLzxcResetTable reset_table;
+
+ /* LZX control data */
+ int compression_enabled;
+ UInt32 window_size;
+ UInt32 reset_interval;
+ UInt32 reset_blkcount;
+
+ /* decompressor state */
+ struct LZXstate *lzx_state;
+ int lzx_last_block;
+
+ /* cache for decompressed blocks */
+ UChar **cache_blocks;
+ UInt64 *cache_block_indices;
+ Int32 cache_num_blocks;
+};
+
+/*
+ * utility functions local to this module
+ */
+
+/* utility function to handle differences between {pread,read}(64)? */
+static Int64 _chm_fetch_bytes(struct chmFile *h,
+ UChar *buf,
+ UInt64 os,
+ Int64 len)
+{
+ Int64 readLen=0, oldOs=0;
+ if (h->fd == CHM_NULL_FD)
+ return readLen;
+
+ CHM_ACQUIRE_LOCK(h->mutex);
+#ifdef CHM_USE_WIN32IO
+ /* NOTE: this might be better done with CreateFileMapping, et cetera... */
+ {
+ DWORD origOffsetLo=0, origOffsetHi=0;
+ DWORD offsetLo, offsetHi;
+ DWORD actualLen=0;
+
+ /* awkward Win32 Seek/Tell */
+ offsetLo = (unsigned int)(os & 0xffffffffL);
+ offsetHi = (unsigned int)((os >> 32) & 0xffffffffL);
+ origOffsetLo = SetFilePointer(h->fd, 0, &origOffsetHi, FILE_CURRENT);
+ offsetLo = SetFilePointer(h->fd, offsetLo, &offsetHi, FILE_BEGIN);
+
+ /* read the data */
+ if (ReadFile(h->fd,
+ buf,
+ (DWORD)len,
+ &actualLen,
+ NULL) == TRUE)
+ readLen = actualLen;
+ else
+ readLen = 0;
+
+ /* restore original position */
+ SetFilePointer(h->fd, origOffsetLo, &origOffsetHi, FILE_BEGIN);
+ }
+#else
+#ifdef CHM_USE_PREAD
+#ifdef CHM_USE_IO64
+ readLen = pread64(h->fd, buf, (long)len, os);
+#else
+ readLen = pread(h->fd, buf, (long)len, (unsigned int)os);
+#endif
+#else
+#ifdef CHM_USE_IO64
+ oldOs = lseek64(h->fd, 0, SEEK_CUR);
+ lseek64(h->fd, os, SEEK_SET);
+ readLen = read(h->fd, buf, len);
+ lseek64(h->fd, oldOs, SEEK_SET);
+#else
+ oldOs = lseek(h->fd, 0, SEEK_CUR);
+ lseek(h->fd, (long)os, SEEK_SET);
+ readLen = read(h->fd, buf, len);
+ lseek(h->fd, (long)oldOs, SEEK_SET);
+#endif
+#endif
+#endif
+ CHM_RELEASE_LOCK(h->mutex);
+ return readLen;
+}
+
+/* open an ITS archive */
+#ifdef PPC_BSTR
+/* RWE 6/12/2003 */
+struct chmFile *chm_open(BSTR filename)
+#else
+struct chmFile *chm_open(const char *filename)
+#endif
+{
+ unsigned char sbuffer[256];
+ unsigned int sremain;
+ unsigned char *sbufpos;
+ struct chmFile *newHandle=NULL;
+ struct chmItsfHeader itsfHeader;
+ struct chmItspHeader itspHeader;
+#if 0
+ struct chmUnitInfo uiSpan;
+#endif
+ struct chmUnitInfo uiLzxc;
+ struct chmLzxcControlData ctlData;
+
+ /* allocate handle */
+ newHandle = (struct chmFile *)malloc(sizeof(struct chmFile));
+ if (newHandle == NULL)
+ return NULL;
+ newHandle->fd = CHM_NULL_FD;
+ newHandle->lzx_state = NULL;
+ newHandle->cache_blocks = NULL;
+ newHandle->cache_block_indices = NULL;
+ newHandle->cache_num_blocks = 0;
+
+ /* open file */
+#ifdef WIN32
+#ifdef PPC_BSTR
+ if ((newHandle->fd=CreateFile(filename,
+ GENERIC_READ,
+ FILE_SHARE_READ,
+ NULL,
+ OPEN_EXISTING,
+ FILE_ATTRIBUTE_NORMAL,
+ NULL)) == CHM_NULL_FD)
+ {
+ free(newHandle);
+ return NULL;
+ }
+#else
+ if ((newHandle->fd=CreateFileA(filename,
+ GENERIC_READ,
+ 0,
+ NULL,
+ OPEN_EXISTING,
+ FILE_ATTRIBUTE_NORMAL,
+ NULL)) == CHM_NULL_FD)
+ {
+ free(newHandle);
+ return NULL;
+ }
+#endif
+#else
+ if ((newHandle->fd=open(filename, O_RDONLY)) == CHM_NULL_FD)
+ {
+ free(newHandle);
+ return NULL;
+ }
+#endif
+
+ /* initialize mutexes, if needed */
+#ifdef CHM_MT
+#ifdef WIN32
+ InitializeCriticalSection(&newHandle->mutex);
+ InitializeCriticalSection(&newHandle->lzx_mutex);
+ InitializeCriticalSection(&newHandle->cache_mutex);
+#else
+ pthread_mutex_init(&newHandle->mutex, NULL);
+ pthread_mutex_init(&newHandle->lzx_mutex, NULL);
+ pthread_mutex_init(&newHandle->cache_mutex, NULL);
+#endif
+#endif
+
+ /* read and verify header */
+ sremain = _CHM_ITSF_V3_LEN;
+ sbufpos = sbuffer;
+ if (_chm_fetch_bytes(newHandle, sbuffer, (UInt64)0, sremain) != sremain ||
+ !_unmarshal_itsf_header(&sbufpos, &sremain, &itsfHeader))
+ {
+ chm_close(newHandle);
+ return NULL;
+ }
+
+ /* stash important values from header */
+ newHandle->dir_offset = itsfHeader.dir_offset;
+ newHandle->dir_len = itsfHeader.dir_len;
+ newHandle->data_offset = itsfHeader.data_offset;
+
+ /* now, read and verify the directory header chunk */
+ sremain = _CHM_ITSP_V1_LEN;
+ sbufpos = sbuffer;
+ if (_chm_fetch_bytes(newHandle, sbuffer,
+ (UInt64)itsfHeader.dir_offset, sremain) != sremain ||
+ !_unmarshal_itsp_header(&sbufpos, &sremain, &itspHeader))
+ {
+ chm_close(newHandle);
+ return NULL;
+ }
+
+ /* grab essential information from ITSP header */
+ newHandle->dir_offset += itspHeader.header_len;
+ newHandle->dir_len -= itspHeader.header_len;
+ newHandle->index_root = itspHeader.index_root;
+ newHandle->index_head = itspHeader.index_head;
+ newHandle->block_len = itspHeader.block_len;
+
+ /* if the index root is -1, this means we don't have any PMGI blocks.
+ * as a result, we must use the sole PMGL block as the index root
+ */
+ if (newHandle->index_root <= -1)
+ newHandle->index_root = newHandle->index_head;
+
+ /* By default, compression is enabled. */
+ newHandle->compression_enabled = 1;
+
+/* Jed, Sun Jun 27: 'span' doesn't seem to be used anywhere?! */
+#if 0
+ /* fetch span */
+ if (CHM_RESOLVE_SUCCESS != chm_resolve_object(newHandle,
+ _CHMU_SPANINFO,
+ &uiSpan) ||
+ uiSpan.space == CHM_COMPRESSED)
+ {
+ chm_close(newHandle);
+ return NULL;
+ }
+
+ /* N.B.: we've already checked that uiSpan is in the uncompressed section,
+ * so this should not require attempting to decompress, which may
+ * rely on having a valid "span"
+ */
+ sremain = 8;
+ sbufpos = sbuffer;
+ if (chm_retrieve_object(newHandle, &uiSpan, sbuffer,
+ 0, sremain) != sremain ||
+ !_unmarshal_uint64(&sbufpos, &sremain, &newHandle->span))
+ {
+ chm_close(newHandle);
+ return NULL;
+ }
+#endif
+
+ /* prefetch most commonly needed unit infos */
+ if (CHM_RESOLVE_SUCCESS != chm_resolve_object(newHandle,
+ _CHMU_RESET_TABLE,
+ &newHandle->rt_unit) ||
+ newHandle->rt_unit.space == CHM_COMPRESSED ||
+ CHM_RESOLVE_SUCCESS != chm_resolve_object(newHandle,
+ _CHMU_CONTENT,
+ &newHandle->cn_unit) ||
+ newHandle->cn_unit.space == CHM_COMPRESSED ||
+ CHM_RESOLVE_SUCCESS != chm_resolve_object(newHandle,
+ _CHMU_LZXC_CONTROLDATA,
+ &uiLzxc) ||
+ uiLzxc.space == CHM_COMPRESSED)
+ {
+ newHandle->compression_enabled = 0;
+ }
+
+ /* read reset table info */
+ if (newHandle->compression_enabled)
+ {
+ sremain = _CHM_LZXC_RESETTABLE_V1_LEN;
+ sbufpos = sbuffer;
+ if (chm_retrieve_object(newHandle, &newHandle->rt_unit, sbuffer,
+ 0, sremain) != sremain ||
+ !_unmarshal_lzxc_reset_table(&sbufpos, &sremain,
+ &newHandle->reset_table))
+ {
+ newHandle->compression_enabled = 0;
+ }
+ }
+
+ /* read control data */
+ if (newHandle->compression_enabled)
+ {
+ sremain = (unsigned int)uiLzxc.length;
+ if (uiLzxc.length > sizeof(sbuffer))
+ {
+ chm_close(newHandle);
+ return NULL;
+ }
+
+ sbufpos = sbuffer;
+ if (chm_retrieve_object(newHandle, &uiLzxc, sbuffer,
+ 0, sremain) != sremain ||
+ !_unmarshal_lzxc_control_data(&sbufpos, &sremain,
+ &ctlData))
+ {
+ newHandle->compression_enabled = 0;
+ }
+
+ newHandle->window_size = ctlData.windowSize;
+ newHandle->reset_interval = ctlData.resetInterval;
+
+/* Jed, Mon Jun 28: Experimentally, it appears that the reset block count */
+/* must be multiplied by this formerly unknown ctrl data field in */
+/* order to decompress some files. */
+#if 0
+ newHandle->reset_blkcount = newHandle->reset_interval /
+ (newHandle->window_size / 2);
+#else
+ newHandle->reset_blkcount = newHandle->reset_interval /
+ (newHandle->window_size / 2) *
+ ctlData.windowsPerReset;
+#endif
+ }
+
+ /* initialize cache */
+ chm_set_param(newHandle, CHM_PARAM_MAX_BLOCKS_CACHED,
+ CHM_MAX_BLOCKS_CACHED);
+
+ return newHandle;
+}
+
+/* close an ITS archive */
+void chm_close(struct chmFile *h)
+{
+ if (h != NULL)
+ {
+ if (h->fd != CHM_NULL_FD)
+ CHM_CLOSE_FILE(h->fd);
+ h->fd = CHM_NULL_FD;
+
+#ifdef CHM_MT
+#ifdef WIN32
+ DeleteCriticalSection(&h->mutex);
+ DeleteCriticalSection(&h->lzx_mutex);
+ DeleteCriticalSection(&h->cache_mutex);
+#else
+ pthread_mutex_destroy(&h->mutex);
+ pthread_mutex_destroy(&h->lzx_mutex);
+ pthread_mutex_destroy(&h->cache_mutex);
+#endif
+#endif
+
+ if (h->lzx_state)
+ LZXteardown(h->lzx_state);
+ h->lzx_state = NULL;
+
+ if (h->cache_blocks)
+ {
+ int i;
+ for (i=0; i<h->cache_num_blocks; i++)
+ {
+ if (h->cache_blocks[i])
+ free(h->cache_blocks[i]);
+ }
+ free(h->cache_blocks);
+ h->cache_blocks = NULL;
+ }
+
+ if (h->cache_block_indices)
+ free(h->cache_block_indices);
+ h->cache_block_indices = NULL;
+
+ free(h);
+ }
+}
+
+/*
+ * set a parameter on the file handle.
+ * valid parameter types:
+ * CHM_PARAM_MAX_BLOCKS_CACHED:
+ * how many decompressed blocks should be cached? A simple
+ * caching scheme is used, wherein the index of the block is
+ * used as a hash value, and hash collision results in the
+ * invalidation of the previously cached block.
+ */
+void chm_set_param(struct chmFile *h,
+ int paramType,
+ int paramVal)
+{
+ switch (paramType)
+ {
+ case CHM_PARAM_MAX_BLOCKS_CACHED:
+ CHM_ACQUIRE_LOCK(h->cache_mutex);
+ if (paramVal != h->cache_num_blocks)
+ {
+ UChar **newBlocks;
+ UInt64 *newIndices;
+ int i;
+
+ /* allocate new cached blocks */
+ newBlocks = (UChar **)malloc(paramVal * sizeof (UChar *));
+ if (newBlocks == NULL) return;
+ newIndices = (UInt64 *)malloc(paramVal * sizeof (UInt64));
+ if (newIndices == NULL) { free(newBlocks); return; }
+ for (i=0; i<paramVal; i++)
+ {
+ newBlocks[i] = NULL;
+ newIndices[i] = 0;
+ }
+
+ /* re-distribute old cached blocks */
+ if (h->cache_blocks)
+ {
+ for (i=0; i<h->cache_num_blocks; i++)
+ {
+ int newSlot = (int)(h->cache_block_indices[i] % paramVal);
+
+ if (h->cache_blocks[i])
+ {
+ /* in case of collision, destroy newcomer */
+ if (newBlocks[newSlot])
+ {
+ free(h->cache_blocks[i]);
+ h->cache_blocks[i] = NULL;
+ }
+ else
+ {
+ newBlocks[newSlot] = h->cache_blocks[i];
+ newIndices[newSlot] =
+ h->cache_block_indices[i];
+ }
+ }
+ }
+
+ free(h->cache_blocks);
+ free(h->cache_block_indices);
+ }
+
+ /* now, set new values */
+ h->cache_blocks = newBlocks;
+ h->cache_block_indices = newIndices;
+ h->cache_num_blocks = paramVal;
+ }
+ CHM_RELEASE_LOCK(h->cache_mutex);
+ break;
+
+ default:
+ break;
+ }
+}
+
+/*
+ * helper methods for chm_resolve_object
+ */
+
+/* skip a compressed dword */
+static void _chm_skip_cword(UChar **pEntry)
+{
+ while (*(*pEntry)++ >= 0x80)
+ ;
+}
+
+/* skip the data from a PMGL entry */
+static void _chm_skip_PMGL_entry_data(UChar **pEntry)
+{
+ _chm_skip_cword(pEntry);
+ _chm_skip_cword(pEntry);
+ _chm_skip_cword(pEntry);
+}
+
+/* parse a compressed dword */
+static UInt64 _chm_parse_cword(UChar **pEntry)
+{
+ UInt64 accum = 0;
+ UChar temp;
+ while ((temp=*(*pEntry)++) >= 0x80)
+ {
+ accum <<= 7;
+ accum += temp & 0x7f;
+ }
+
+ return (accum << 7) + temp;
+}
+
+/* parse a utf-8 string into an ASCII char buffer */
+static int _chm_parse_UTF8(UChar **pEntry, UInt64 count, char *path)
+{
+ /* XXX: implement UTF-8 support, including a real mapping onto
+ * ISO-8859-1? probably there is a library to do this? As is
+ * immediately apparent from the below code, I'm presently not doing
+ * any special handling for files in which none of the strings contain
+ * UTF-8 multi-byte characters.
+ */
+ while (count != 0)
+ {
+ *path++ = (char)(*(*pEntry)++);
+ --count;
+ }
+
+ *path = '\0';
+ return 1;
+}
+
+/* parse a PMGL entry into a chmUnitInfo struct; return 1 on success. */
+static int _chm_parse_PMGL_entry(UChar **pEntry, struct chmUnitInfo *ui)
+{
+ UInt64 strLen;
+
+ /* parse str len */
+ strLen = _chm_parse_cword(pEntry);
+ if (strLen > CHM_MAX_PATHLEN)
+ return 0;
+
+ /* parse path */
+ if (! _chm_parse_UTF8(pEntry, strLen, ui->path))
+ return 0;
+
+ /* parse info */
+ ui->space = (int)_chm_parse_cword(pEntry);
+ ui->start = _chm_parse_cword(pEntry);
+ ui->length = _chm_parse_cword(pEntry);
+ return 1;
+}
+
+/* find an exact entry in PMGL; return NULL if we fail */
+static UChar *_chm_find_in_PMGL(UChar *page_buf,
+ UInt32 block_len,
+ const char *objPath)
+{
+ /* XXX: modify this to do a binary search using the nice index structure
+ * that is provided for us.
+ */
+ struct chmPmglHeader header;
+ unsigned int hremain;
+ UChar *end;
+ UChar *cur;
+ UChar *temp;
+ UInt64 strLen;
+ char buffer[CHM_MAX_PATHLEN+1];
+
+ /* figure out where to start and end */
+ cur = page_buf;
+ hremain = _CHM_PMGL_LEN;
+ if (! _unmarshal_pmgl_header(&cur, &hremain, &header))
+ return NULL;
+ end = page_buf + block_len - (header.free_space);
+
+ /* now, scan progressively */
+ while (cur < end)
+ {
+ /* grab the name */
+ temp = cur;
+ strLen = _chm_parse_cword(&cur);
+ if (strLen > CHM_MAX_PATHLEN)
+ return NULL;
+ if (! _chm_parse_UTF8(&cur, strLen, buffer))
+ return NULL;
+
+ /* check if it is the right name */
+ if (! strcasecmp(buffer, objPath))
+ return temp;
+
+ _chm_skip_PMGL_entry_data(&cur);
+ }
+
+ return NULL;
+}
+
+/* find which block should be searched next for the entry; -1 if no block */
+static Int32 _chm_find_in_PMGI(UChar *page_buf,
+ UInt32 block_len,
+ const char *objPath)
+{
+ /* XXX: modify this to do a binary search using the nice index structure
+ * that is provided for us
+ */
+ struct chmPmgiHeader header;
+ unsigned int hremain;
+ int page=-1;
+ UChar *end;
+ UChar *cur;
+ UInt64 strLen;
+ char buffer[CHM_MAX_PATHLEN+1];
+
+ /* figure out where to start and end */
+ cur = page_buf;
+ hremain = _CHM_PMGI_LEN;
+ if (! _unmarshal_pmgi_header(&cur, &hremain, &header))
+ return -1;
+ end = page_buf + block_len - (header.free_space);
+
+ /* now, scan progressively */
+ while (cur < end)
+ {
+ /* grab the name */
+ strLen = _chm_parse_cword(&cur);
+ if (strLen > CHM_MAX_PATHLEN)
+ return -1;
+ if (! _chm_parse_UTF8(&cur, strLen, buffer))
+ return -1;
+
+ /* check if it is the right name */
+ if (strcasecmp(buffer, objPath) > 0)
+ return page;
+
+ /* load next value for path */
+ page = (int)_chm_parse_cword(&cur);
+ }
+
+ return page;
+}
+
+/* resolve a particular object from the archive */
+int chm_resolve_object(struct chmFile *h,
+ const char *objPath,
+ struct chmUnitInfo *ui)
+{
+ /*
+ * XXX: implement caching scheme for dir pages
+ */
+
+ Int32 curPage;
+
+ /* buffer to hold whatever page we're looking at */
+ /* RWE 6/12/2003 */
+ UChar *page_buf = malloc(h->block_len);
+ if (page_buf == NULL)
+ return CHM_RESOLVE_FAILURE;
+
+ /* starting page */
+ curPage = h->index_root;
+
+ /* until we have either returned or given up */
+ while (curPage != -1)
+ {
+
+ /* try to fetch the index page */
+ if (_chm_fetch_bytes(h, page_buf,
+ (UInt64)h->dir_offset + (UInt64)curPage*h->block_len,
+ h->block_len) != h->block_len)
+ {
+ free(page_buf);
+ return CHM_RESOLVE_FAILURE;
+ }
+
+ /* now, if it is a leaf node: */
+ if (memcmp(page_buf, _chm_pmgl_marker, 4) == 0)
+ {
+ /* scan block */
+ UChar *pEntry = _chm_find_in_PMGL(page_buf,
+ h->block_len,
+ objPath);
+ if (pEntry == NULL)
+ {
+ free(page_buf);
+ return CHM_RESOLVE_FAILURE;
+ }
+
+ /* parse entry and return */
+ _chm_parse_PMGL_entry(&pEntry, ui);
+ free(page_buf);
+ return CHM_RESOLVE_SUCCESS;
+ }
+
+ /* else, if it is a branch node: */
+ else if (memcmp(page_buf, _chm_pmgi_marker, 4) == 0)
+ curPage = _chm_find_in_PMGI(page_buf, h->block_len, objPath);
+
+ /* else, we are confused. give up. */
+ else
+ {
+ free(page_buf);
+ return CHM_RESOLVE_FAILURE;
+ }
+ }
+
+ /* didn't find anything. fail. */
+ free(page_buf);
+ return CHM_RESOLVE_FAILURE;
+}
+
+/*
+ * utility methods for dealing with compressed data
+ */
+
+/* get the bounds of a compressed block. return 0 on failure */
+static int _chm_get_cmpblock_bounds(struct chmFile *h,
+ UInt64 block,
+ UInt64 *start,
+ Int64 *len)
+{
+ UChar buffer[8], *dummy;
+ unsigned int remain;
+
+ /* for all but the last block, use the reset table */
+ if (block < h->reset_table.block_count-1)
+ {
+ /* unpack the start address */
+ dummy = buffer;
+ remain = 8;
+ if (_chm_fetch_bytes(h, buffer,
+ (UInt64)h->data_offset
+ + (UInt64)h->rt_unit.start
+ + (UInt64)h->reset_table.table_offset
+ + (UInt64)block*8,
+ remain) != remain ||
+ !_unmarshal_uint64(&dummy, &remain, start))
+ return 0;
+
+ /* unpack the end address */
+ dummy = buffer;
+ remain = 8;
+ if (_chm_fetch_bytes(h, buffer,
+ (UInt64)h->data_offset
+ + (UInt64)h->rt_unit.start
+ + (UInt64)h->reset_table.table_offset
+ + (UInt64)block*8 + 8,
+ remain) != remain ||
+ !_unmarshal_int64(&dummy, &remain, len))
+ return 0;
+ }
+
+ /* for the last block, use the span in addition to the reset table */
+ else
+ {
+ /* unpack the start address */
+ dummy = buffer;
+ remain = 8;
+ if (_chm_fetch_bytes(h, buffer,
+ (UInt64)h->data_offset
+ + (UInt64)h->rt_unit.start
+ + (UInt64)h->reset_table.table_offset
+ + (UInt64)block*8,
+ remain) != remain ||
+ !_unmarshal_uint64(&dummy, &remain, start))
+ return 0;
+
+ *len = h->reset_table.compressed_len;
+ }
+
+ /* compute the length and absolute start address */
+ *len -= *start;
+ *start += h->data_offset + h->cn_unit.start;
+
+ return 1;
+}
+
+/* decompress the block. must have lzx_mutex. */
+static Int64 _chm_decompress_block(struct chmFile *h,
+ UInt64 block,
+ UChar **ubuffer)
+{
+ UChar *cbuffer = malloc(((unsigned int)h->reset_table.block_len + 6144));
+ UInt64 cmpStart; /* compressed start */
+ Int64 cmpLen; /* compressed len */
+ int indexSlot; /* cache index slot */
+ UChar *lbuffer; /* local buffer ptr */
+ UInt32 blockAlign = (UInt32)(block % h->reset_blkcount); /* reset intvl. aln. */
+ UInt32 i; /* local loop index */
+
+ if (cbuffer == NULL)
+ return -1;
+
+ /* let the caching system pull its weight! */
+ if (block - blockAlign <= h->lzx_last_block &&
+ block >= h->lzx_last_block)
+ blockAlign = (block - h->lzx_last_block);
+
+ /* check if we need previous blocks */
+ if (blockAlign != 0)
+ {
+ /* fetch all required previous blocks since last reset */
+ for (i = blockAlign; i > 0; i--)
+ {
+ UInt32 curBlockIdx = block - i;
+
+ /* check if we most recently decompressed the previous block */
+ if (h->lzx_last_block != curBlockIdx)
+ {
+ if ((curBlockIdx % h->reset_blkcount) == 0)
+ {
+#ifdef CHM_DEBUG
+ fprintf(stderr, "***RESET (1)***\n");
+#endif
+ LZXreset(h->lzx_state);
+ }
+
+ indexSlot = (int)((curBlockIdx) % h->cache_num_blocks);
+ if (! h->cache_blocks[indexSlot])
+ h->cache_blocks[indexSlot] = (UChar *)malloc((unsigned int)(h->reset_table.block_len));
+ if (! h->cache_blocks[indexSlot])
+ {
+ free(cbuffer);
+ return -1;
+ }
+ h->cache_block_indices[indexSlot] = curBlockIdx;
+ lbuffer = h->cache_blocks[indexSlot];
+
+ /* decompress the previous block */
+#ifdef CHM_DEBUG
+ fprintf(stderr, "Decompressing block #%4d (EXTRA)\n", curBlockIdx);
+#endif
+ if (!_chm_get_cmpblock_bounds(h, curBlockIdx, &cmpStart, &cmpLen) ||
+ cmpLen < 0 ||
+ cmpLen > h->reset_table.block_len + 6144 ||
+ _chm_fetch_bytes(h, cbuffer, cmpStart, cmpLen) != cmpLen ||
+ LZXdecompress(h->lzx_state, cbuffer, lbuffer, (int)cmpLen,
+ (int)h->reset_table.block_len) != DECR_OK)
+ {
+#ifdef CHM_DEBUG
+ fprintf(stderr, " (DECOMPRESS FAILED!)\n");
+#endif
+ free(cbuffer);
+ return (Int64)0;
+ }
+
+ h->lzx_last_block = (int)curBlockIdx;
+ }
+ }
+ }
+ else
+ {
+ if ((block % h->reset_blkcount) == 0)
+ {
+#ifdef CHM_DEBUG
+ fprintf(stderr, "***RESET (2)***\n");
+#endif
+ LZXreset(h->lzx_state);
+ }
+ }
+
+ /* allocate slot in cache */
+ indexSlot = (int)(block % h->cache_num_blocks);
+ if (! h->cache_blocks[indexSlot])
+ h->cache_blocks[indexSlot] = (UChar *)malloc(((unsigned int)h->reset_table.block_len));
+ if (! h->cache_blocks[indexSlot])
+ {
+ free(cbuffer);
+ return -1;
+ }
+ h->cache_block_indices[indexSlot] = block;
+ lbuffer = h->cache_blocks[indexSlot];
+ *ubuffer = lbuffer;
+
+ /* decompress the block we actually want */
+#ifdef CHM_DEBUG
+ fprintf(stderr, "Decompressing block #%4d (REAL )\n", block);
+#endif
+ if (! _chm_get_cmpblock_bounds(h, block, &cmpStart, &cmpLen) ||
+ _chm_fetch_bytes(h, cbuffer, cmpStart, cmpLen) != cmpLen ||
+ LZXdecompress(h->lzx_state, cbuffer, lbuffer, (int)cmpLen,
+ (int)h->reset_table.block_len) != DECR_OK)
+ {
+#ifdef CHM_DEBUG
+ fprintf(stderr, " (DECOMPRESS FAILED!)\n");
+#endif
+ free(cbuffer);
+ return (Int64)0;
+ }
+ h->lzx_last_block = (int)block;
+
+ /* XXX: modify LZX routines to return the length of the data they
+ * decompressed and return that instead, for an extra sanity check.
+ */
+ free(cbuffer);
+ return h->reset_table.block_len;
+}
+
+/* grab a region from a compressed block */
+static Int64 _chm_decompress_region(struct chmFile *h,
+ UChar *buf,
+ UInt64 start,
+ Int64 len)
+{
+ UInt64 nBlock, nOffset;
+ UInt64 nLen;
+ UInt64 gotLen;
+ UChar *ubuffer;
+
+ if (len <= 0)
+ return (Int64)0;
+
+ /* figure out what we need to read */
+ nBlock = start / h->reset_table.block_len;
+ nOffset = start % h->reset_table.block_len;
+ nLen = len;
+ if (nLen > (h->reset_table.block_len - nOffset))
+ nLen = h->reset_table.block_len - nOffset;
+
+ /* if block is cached, return data from it. */
+ CHM_ACQUIRE_LOCK(h->lzx_mutex);
+ CHM_ACQUIRE_LOCK(h->cache_mutex);
+ if (h->cache_block_indices[nBlock % h->cache_num_blocks] == nBlock &&
+ h->cache_blocks[nBlock % h->cache_num_blocks] != NULL)
+ {
+ memcpy(buf,
+ h->cache_blocks[nBlock % h->cache_num_blocks] + nOffset,
+ (unsigned int)nLen);
+ CHM_RELEASE_LOCK(h->cache_mutex);
+ CHM_RELEASE_LOCK(h->lzx_mutex);
+ return nLen;
+ }
+ CHM_RELEASE_LOCK(h->cache_mutex);
+
+ /* data request not satisfied, so... start up the decompressor machine */
+ if (! h->lzx_state)
+ {
+ int window_size = ffs(h->window_size) - 1;
+ h->lzx_last_block = -1;
+ h->lzx_state = LZXinit(window_size);
+ }
+
+ /* decompress some data */
+ gotLen = _chm_decompress_block(h, nBlock, &ubuffer);
+ if (gotLen < nLen)
+ nLen = gotLen;
+ memcpy(buf, ubuffer+nOffset, (unsigned int)nLen);
+ CHM_RELEASE_LOCK(h->lzx_mutex);
+ return nLen;
+}
+
+/* retrieve (part of) an object */
+LONGINT64 chm_retrieve_object(struct chmFile *h,
+ const struct chmUnitInfo *ui,
+ unsigned char *buf,
+ LONGUINT64 addr,
+ LONGINT64 len)
+{
+ /* must be valid file handle */
+ if (h == NULL)
+ return (Int64)0;
+
+ /* starting address must be in correct range */
+ if ( addr >= ui->length)
+ return (Int64)0;
+
+ /* clip length */
+ if (addr + len > ui->length)
+ len = ui->length - addr;
+
+ /* if the file is uncompressed, it's simple */
+ if (ui->space == CHM_UNCOMPRESSED)
+ {
+ /* read data */
+ return _chm_fetch_bytes(h,
+ buf,
+ (UInt64)h->data_offset + (UInt64)ui->start + (UInt64)addr,
+ len);
+ }
+
+ /* else if the file is compressed, it's a little trickier */
+ else /* ui->space == CHM_COMPRESSED */
+ {
+ Int64 swath=0, total=0;
+
+ /* if compression is not enabled for this file... */
+ if (! h->compression_enabled)
+ return total;
+
+ do {
+
+ /* swill another mouthful */
+ swath = _chm_decompress_region(h, buf, ui->start + addr, len);
+
+ /* if we didn't get any... */
+ if (swath == 0)
+ return total;
+
+ /* update stats */
+ total += swath;
+ len -= swath;
+ addr += swath;
+ buf += swath;
+
+ } while (len != 0);
+
+ return total;
+ }
+}
+
+/* enumerate the objects in the .chm archive */
+int chm_enumerate(struct chmFile *h,
+ int what,
+ CHM_ENUMERATOR e,
+ void *context)
+{
+ Int32 curPage;
+
+ /* buffer to hold whatever page we're looking at */
+ /* RWE 6/12/2003 */
+ UChar *page_buf = malloc((unsigned int)h->block_len);
+ struct chmPmglHeader header;
+ UChar *end;
+ UChar *cur;
+ unsigned int lenRemain;
+ UInt64 ui_path_len;
+
+ /* the current ui */
+ struct chmUnitInfo ui;
+ int type_bits = (what & 0x7);
+ int filter_bits = (what & 0xF8);
+
+ if (page_buf == NULL)
+ return 0;
+
+ /* starting page */
+ curPage = h->index_head;
+
+ /* until we have either returned or given up */
+ while (curPage != -1)
+ {
+
+ /* try to fetch the index page */
+ if (_chm_fetch_bytes(h,
+ page_buf,
+ (UInt64)h->dir_offset + (UInt64)curPage*h->block_len,
+ h->block_len) != h->block_len)
+ {
+ free(page_buf);
+ return 0;
+ }
+
+ /* figure out start and end for this page */
+ cur = page_buf;
+ lenRemain = _CHM_PMGL_LEN;
+ if (! _unmarshal_pmgl_header(&cur, &lenRemain, &header))
+ {
+ free(page_buf);
+ return 0;
+ }
+ end = page_buf + h->block_len - (header.free_space);
+
+ /* loop over this page */
+ while (cur < end)
+ {
+ ui.flags = 0;
+
+ if (! _chm_parse_PMGL_entry(&cur, &ui))
+ {
+ free(page_buf);
+ return 0;
+ }
+
+ /* get the length of the path */
+ ui_path_len = strlen(ui.path)-1;
+
+ /* check for DIRS */
+ if (ui.path[ui_path_len] == '/')
+ ui.flags |= CHM_ENUMERATE_DIRS;
+
+ /* check for FILES */
+ if (ui.path[ui_path_len] != '/')
+ ui.flags |= CHM_ENUMERATE_FILES;
+
+ /* check for NORMAL vs. META */
+ if (ui.path[0] == '/')
+ {
+
+ /* check for NORMAL vs. SPECIAL */
+ if (ui.path[1] == '#' || ui.path[1] == '$')
+ ui.flags |= CHM_ENUMERATE_SPECIAL;
+ else
+ ui.flags |= CHM_ENUMERATE_NORMAL;
+ }
+ else
+ ui.flags |= CHM_ENUMERATE_META;
+
+ if (! (type_bits & ui.flags))
+ continue;
+
+ if (filter_bits && ! (filter_bits & ui.flags))
+ continue;
+
+ /* call the enumerator */
+ {
+ int status = (*e)(h, &ui, context);
+ switch (status)
+ {
+ case CHM_ENUMERATOR_FAILURE:
+ free(page_buf);
+ return 0;
+ case CHM_ENUMERATOR_CONTINUE:
+ break;
+ case CHM_ENUMERATOR_SUCCESS:
+ free(page_buf);
+ return 1;
+ default:
+ break;
+ }
+ }
+ }
+
+ /* advance to next page */
+ curPage = header.block_next;
+ }
+
+ free(page_buf);
+ return 1;
+}
+
+int chm_enumerate_dir(struct chmFile *h,
+ const char *prefix,
+ int what,
+ CHM_ENUMERATOR e,
+ void *context)
+{
+ /*
+ * XXX: do this efficiently (i.e. using the tree index)
+ */
+
+ Int32 curPage;
+
+ /* buffer to hold whatever page we're looking at */
+ /* RWE 6/12/2003 */
+ UChar *page_buf = malloc((unsigned int)h->block_len);
+ struct chmPmglHeader header;
+ UChar *end;
+ UChar *cur;
+ unsigned int lenRemain;
+
+ /* set to 1 once we've started */
+ int it_has_begun=0;
+
+ /* the current ui */
+ struct chmUnitInfo ui;
+ int type_bits = (what & 0x7);
+ int filter_bits = (what & 0xF8);
+ UInt64 ui_path_len;
+
+ /* the length of the prefix */
+ char prefixRectified[CHM_MAX_PATHLEN+1];
+ int prefixLen;
+ char lastPath[CHM_MAX_PATHLEN+1];
+ int lastPathLen;
+
+ if (page_buf == NULL)
+ return 0;
+
+ /* starting page */
+ curPage = h->index_head;
+
+ /* initialize pathname state */
+ strncpy(prefixRectified, prefix, CHM_MAX_PATHLEN);
+ prefixRectified[CHM_MAX_PATHLEN] = '\0';
+ prefixLen = strlen(prefixRectified);
+ if (prefixLen != 0)
+ {
+ if (prefixRectified[prefixLen-1] != '/')
+ {
+ prefixRectified[prefixLen] = '/';
+ prefixRectified[prefixLen+1] = '\0';
+ ++prefixLen;
+ }
+ }
+ lastPath[0] = '\0';
+ lastPathLen = -1;
+
+ /* until we have either returned or given up */
+ while (curPage != -1)
+ {
+
+ /* try to fetch the index page */
+ if (_chm_fetch_bytes(h,
+ page_buf,
+ (UInt64)h->dir_offset + (UInt64)curPage*h->block_len,
+ h->block_len) != h->block_len)
+ {
+ free(page_buf);
+ return 0;
+ }
+
+ /* figure out start and end for this page */
+ cur = page_buf;
+ lenRemain = _CHM_PMGL_LEN;
+ if (! _unmarshal_pmgl_header(&cur, &lenRemain, &header))
+ {
+ free(page_buf);
+ return 0;
+ }
+ end = page_buf + h->block_len - (header.free_space);
+
+ /* loop over this page */
+ while (cur < end)
+ {
+ ui.flags = 0;
+
+ if (! _chm_parse_PMGL_entry(&cur, &ui))
+ {
+ free(page_buf);
+ return 0;
+ }
+
+ /* check if we should start */
+ if (! it_has_begun)
+ {
+ if (ui.length == 0 && strncasecmp(ui.path, prefixRectified, prefixLen) == 0)
+ it_has_begun = 1;
+ else
+ continue;
+
+ if (ui.path[prefixLen] == '\0')
+ continue;
+ }
+
+ /* check if we should stop */
+ else
+ {
+ if (strncasecmp(ui.path, prefixRectified, prefixLen) != 0)
+ {
+ free(page_buf);
+ return 1;
+ }
+ }
+
+ /* check if we should include this path */
+ if (lastPathLen != -1)
+ {
+ if (strncasecmp(ui.path, lastPath, lastPathLen) == 0)
+ continue;
+ }
+ strncpy(lastPath, ui.path, CHM_MAX_PATHLEN);
+ lastPath[CHM_MAX_PATHLEN] = '\0';
+ lastPathLen = strlen(lastPath);
+
+ /* get the length of the path */
+ ui_path_len = strlen(ui.path)-1;
+
+ /* check for DIRS */
+ if (ui.path[ui_path_len] == '/')
+ ui.flags |= CHM_ENUMERATE_DIRS;
+
+ /* check for FILES */
+ if (ui.path[ui_path_len] != '/')
+ ui.flags |= CHM_ENUMERATE_FILES;
+
+ /* check for NORMAL vs. META */
+ if (ui.path[0] == '/')
+ {
+
+ /* check for NORMAL vs. SPECIAL */
+ if (ui.path[1] == '#' || ui.path[1] == '$')
+ ui.flags |= CHM_ENUMERATE_SPECIAL;
+ else
+ ui.flags |= CHM_ENUMERATE_NORMAL;
+ }
+ else
+ ui.flags |= CHM_ENUMERATE_META;
+
+ if (! (type_bits & ui.flags))
+ continue;
+
+ if (filter_bits && ! (filter_bits & ui.flags))
+ continue;
+
+ /* call the enumerator */
+ {
+ int status = (*e)(h, &ui, context);
+ switch (status)
+ {
+ case CHM_ENUMERATOR_FAILURE:
+ free(page_buf);
+ return 0;
+ case CHM_ENUMERATOR_CONTINUE:
+ break;
+ case CHM_ENUMERATOR_SUCCESS:
+ free(page_buf);
+ return 1;
+ default:
+ break;
+ }
+ }
+ }
+
+ /* advance to next page */
+ curPage = header.block_next;
+ }
+
+ free(page_buf);
+ return 1;
+}