From 8066e87c6ca6eb12af5c427b5fb73ccfe6c97d04 Mon Sep 17 00:00:00 2001 From: Timothy Pearson Date: Wed, 1 Oct 2014 20:48:00 -0500 Subject: Use libmagic for mime type determination This relates to Bug 656, Bug 661, and others --- tdeio/tdeio/CMakeLists.txt | 2 +- tdeio/tdeio/kmimemagic.cpp | 2258 ++++---------------------------------------- tdeio/tdeio/kmimemagic.h | 4 +- 3 files changed, 167 insertions(+), 2097 deletions(-) (limited to 'tdeio/tdeio') diff --git a/tdeio/tdeio/CMakeLists.txt b/tdeio/tdeio/CMakeLists.txt index 5f04568c5..2b90c0107 100644 --- a/tdeio/tdeio/CMakeLists.txt +++ b/tdeio/tdeio/CMakeLists.txt @@ -117,7 +117,7 @@ set( ${target}_SRCS tde_add_library( ${target} STATIC_PIC AUTOMOC SOURCES ${${target}_SRCS} - LINK ${GAMIN_LIBRARIES} + LINK magic ${GAMIN_LIBRARIES} ) diff --git a/tdeio/tdeio/kmimemagic.cpp b/tdeio/tdeio/kmimemagic.cpp index 6aae4e39d..ea0a30498 100644 --- a/tdeio/tdeio/kmimemagic.cpp +++ b/tdeio/tdeio/kmimemagic.cpp @@ -1,4 +1,7 @@ -/* This file is part of the KDE libraries +/* This file is part of the TDE libraries + Copyright (C) 2014 Timothy Pearson + + Small portions (the original KDE interface and utime code) are: Copyright (C) 2000 Fritz Elfert Copyright (C) 2004 Allan Sandfeld Jensen @@ -26,29 +29,23 @@ #include #include -static int fsmagic(struct config_rec* conf, const char *fn, KDE_struct_stat *sb); -static void process(struct config_rec* conf, const TQString &); -static int ascmagic(struct config_rec* conf, unsigned char *buf, int nbytes); -static int tagmagic(unsigned char *buf, int nbytes); -static int textmagic(struct config_rec* conf, unsigned char *, int); +#include -static void tryit(struct config_rec* conf, unsigned char *buf, int nb); -static int match(struct config_rec* conf, unsigned char *, int); +static void process(struct config_rec* conf, const TQString &); KMimeMagic* KMimeMagic::s_pSelf; static KStaticDeleter kmimemagicsd; -KMimeMagic* KMimeMagic::self() -{ - if( !s_pSelf ) - initStatic(); - return s_pSelf; +KMimeMagic* KMimeMagic::self() { + if( !s_pSelf ) { + initStatic(); + } + return s_pSelf; } -void KMimeMagic::initStatic() -{ - s_pSelf = kmimemagicsd.setObject( s_pSelf, new KMimeMagic() ); - s_pSelf->setFollowLinks( true ); +void KMimeMagic::initStatic() { + s_pSelf = kmimemagicsd.setObject( s_pSelf, new KMimeMagic() ); + s_pSelf->setFollowLinks( true ); } #include @@ -66,449 +63,16 @@ void KMimeMagic::initStatic() #include #include -//#define MIME_MAGIC_DEBUG_TABLE // untested - -// Uncomment to debug the config-file parsing phase -//#define DEBUG_APPRENTICE -// Uncomment to debug the matching phase -//#define DEBUG_MIMEMAGIC - -#if (defined DEBUG_MIMEMAGIC || defined DEBUG_APPRENTICE) -#define DEBUG_LINENUMBERS -#endif +#define HOWMANY 4000 /* big enough to recognize most WWW files, and skip GPL-headers */ -/* - * Buitltin Mime types - */ -#define MIME_BINARY_UNKNOWN "application/octet-stream" -#define MIME_BINARY_UNREADABLE "application/x-unreadable" -#define MIME_BINARY_ZEROSIZE "application/x-zerosize" -#define MIME_TEXT_UNKNOWN "text/plain" -#define MIME_TEXT_PLAIN "text/plain" #define MIME_INODE_DIR "inode/directory" #define MIME_INODE_CDEV "inode/chardevice" #define MIME_INODE_BDEV "inode/blockdevice" #define MIME_INODE_FIFO "inode/fifo" #define MIME_INODE_LINK "inode/link" #define MIME_INODE_SOCK "inode/socket" -// Following should go in magic-file - Fritz -#define MIME_APPL_TROFF "application/x-troff" -#define MIME_APPL_TAR "application/x-tar" -#define MIME_TEXT_FORTRAN "text/x-fortran" - -#define MAXMIMESTRING 256 - -#define HOWMANY 4000 /* big enough to recognize most WWW files, and skip GPL-headers */ -#define MAXDESC 50 /* max leng of text description */ -#define MAXstring 64 /* max leng of "string" types */ - -typedef union VALUETYPE { - unsigned char b; - unsigned short h; - unsigned long l; - char s[MAXstring]; - unsigned char hs[2]; /* 2 bytes of a fixed-endian "short" */ - unsigned char hl[4]; /* 2 bytes of a fixed-endian "long" */ -} VALUETYPE; - -struct magic { - struct magic *next; /* link to next entry */ -#ifdef DEBUG_LINENUMBERS - int lineno; /* line number from magic file - doesn't say from which one ;) */ -#endif - - short flag; -#define INDIR 1 /* if '>(...)' appears, */ -#define UNSIGNED 2 /* comparison is unsigned */ - short cont_level; /* level of ">" */ - struct { - char type; /* byte short long */ - long offset; /* offset from indirection */ - } in; - long offset; /* offset to magic number */ - unsigned char reln; /* relation (0=eq, '>'=gt, etc) */ - char type; /* int, short, long or string. */ - char vallen; /* length of string value, if any */ -#define BYTE 1 -#define SHORT 2 -#define LONG 4 -#define STRING 5 -#define DATE 6 -#define BESHORT 7 -#define BELONG 8 -#define BEDATE 9 -#define LESHORT 10 -#define LELONG 11 -#define LEDATE 12 - VALUETYPE value; /* either number or string */ - unsigned long mask; /* mask before comparison with value */ - char nospflag; /* suppress space character */ - - /* NOTE: this string is suspected of overrunning - find it! */ - char desc[MAXDESC]; /* description */ -}; - -/* - * data structures for tar file recognition - * -------------------------------------------------------------------------- - * Header file for public domain tar (tape archive) program. - * - * @(#)tar.h 1.20 86/10/29 Public Domain. Created 25 August 1985 by John - * Gilmore, ihnp4!hoptoad!gnu. - * - * Header block on tape. - * - * I'm going to use traditional DP naming conventions here. A "block" is a big - * chunk of stuff that we do I/O on. A "record" is a piece of info that we - * care about. Typically many "record"s fit into a "block". - */ -#define RECORDSIZE 512 -#define NAMSIZ 100 -#define TUNMLEN 32 -#define TGNMLEN 32 - -union record { - char charptr[RECORDSIZE]; - struct header { - char name[NAMSIZ]; - char mode[8]; - char uid[8]; - char gid[8]; - char size[12]; - char mtime[12]; - char chksum[8]; - char linkflag; - char linkname[NAMSIZ]; - char magic[8]; - char uname[TUNMLEN]; - char gname[TGNMLEN]; - char devmajor[8]; - char devminor[8]; - } header; -}; - -/* The magic field is filled with this if uname and gname are valid. */ -#define TMAGIC "ustar " /* 7 chars and a null */ - -/* - * file-function prototypes - */ -static int is_tar(unsigned char *, int); -static unsigned long signextend(struct magic *, unsigned long); -static int getvalue(struct magic *, char **); -static int hextoint(int); -static char *getstr(char *, char *, int, int *); -static int mget(union VALUETYPE *, unsigned char *, struct magic *, int); -static int mcheck(union VALUETYPE *, struct magic *); -static int mconvert(union VALUETYPE *, struct magic *); -static long from_oct(int, char *); - -/* - * includes for ASCII substring recognition formerly "names.h" in file - * command - * - * Original notes: names and types used by ascmagic in file(1). - * These tokens are - * here because they can appear anywhere in the first HOWMANY bytes, while - * tokens in /etc/magic must appear at fixed offsets into the file. Don't - * make HOWMANY too high unless you have a very fast CPU. - */ - -/* these types are used calculate index to 'types': keep em in sync! */ -/* HTML inserted in first because this is a web server module now */ -/* ENG removed because stupid */ -#define L_HTML 0x001 /* HTML */ -#define L_C 0x002 /* first and foremost on UNIX */ -#define L_MAKE 0x004 /* Makefiles */ -#define L_PLI 0x008 /* PL/1 */ -#define L_MACH 0x010 /* some kinda assembler */ -#define L_PAS 0x020 /* Pascal */ -#define L_JAVA 0x040 /* Java source */ -#define L_CPP 0x080 /* C++ */ -#define L_MAIL 0x100 /* Electronic mail */ -#define L_NEWS 0x200 /* Usenet Netnews */ -#define L_DIFF 0x400 /* Output of diff */ -#define L_OBJC 0x800 /* Objective C */ - -// Note: this is not a type, it's just used to mark items that should count more -#define FLAG_STRONG 0x1000 - -#define P_HTML 0 /* HTML */ -#define P_C 1 /* first and foremost on UNIX */ -#define P_MAKE 2 /* Makefiles */ -#define P_PLI 3 /* PL/1 */ -#define P_MACH 4 /* some kinda assembler */ -#define P_PAS 5 /* Pascal */ -#define P_JAVA 6 /* Java source */ -#define P_CPP 7 /* C++ */ -#define P_MAIL 8 /* Electronic mail */ -#define P_NEWS 9 /* Usenet Netnews */ -#define P_DIFF 10 /* Output of diff */ -#define P_OBJC 11 /* Objective C */ - -typedef struct asc_type { - const char *type; - int kwords; - double weight; -} asc_type; - -static const asc_type types[] = { - { "text/html", 19, 2 }, // 10 items but 10 different words only - { "text/x-c", 13, 1 }, - { "text/x-makefile", 4, 1.9 }, - { "text/x-pli", 1, 3 }, - { "text/x-assembler", 6, 2.1 }, - { "text/x-pascal", 1, 1 }, - { "text/x-java", 12, 1 }, - { "text/x-c++", 19, 1 }, - { "message/rfc822", 4, 1.9 }, - { "message/news", 3, 2 }, - { "text/x-diff", 4, 2 }, - { "text/x-objc", 10, 1 } -}; - -#define NTYPES (sizeof(types)/sizeof(asc_type)) - -static struct names { - const char *name; - short type; -} const names[] = { - { - "' for tokens, so this one won't work { - ">From", L_MAIL - },*/ - { - "Return-Path:", L_MAIL - }, - { - "Cc:", L_MAIL - }, - { - "Newsgroups:", L_NEWS - }, - { - "Path:", L_NEWS - }, - { - "Organization:", L_NEWS - }, - { - "---", L_DIFF - }, - { - "+++", L_DIFF - }, - { - "***", L_DIFF - }, - { - "@@", L_DIFF - }, - { - NULL, 0 - } -}; +#define MIME_BINARY_UNREADABLE "application/x-unreadable" +#define MIME_BINARY_ZEROSIZE "application/x-zerosize" /** * Configuration for the utime() problem. @@ -520,830 +84,84 @@ static struct names { * anywhere else, because that breaks archiving programs, that check the ctime. * Hence this class, to configure the directories where the atime should be restored. */ -class KMimeMagicUtimeConf -{ -public: - KMimeMagicUtimeConf() - { - tmpDirs << TQString::fromLatin1("/tmp"); // default value - - // The trick is that we also don't want the user to override globally set - // directories. So we have to misuse TDEStandardDirs :} - TQStringList confDirs = TDEGlobal::dirs()->resourceDirs( "config" ); - if ( !confDirs.isEmpty() ) - { - TQString globalConf = confDirs.last() + "kmimemagicrc"; - if ( TQFile::exists( globalConf ) ) - { - KSimpleConfig cfg( globalConf ); - cfg.setGroup( "Settings" ); - tmpDirs = cfg.readListEntry( "atimeDirs" ); - } - if ( confDirs.count() > 1 ) - { - TQString localConf = confDirs.first() + "kmimemagicrc"; - if ( TQFile::exists( localConf ) ) - { - KSimpleConfig cfg( localConf ); - cfg.setGroup( "Settings" ); - tmpDirs += cfg.readListEntry( "atimeDirs" ); - } - } - for ( TQStringList::Iterator it = tmpDirs.begin() ; it != tmpDirs.end() ; ++it ) - { - TQString dir = *it; - if ( !dir.isEmpty() && dir[ dir.length()-1 ] != '/' ) - (*it) += '/'; - } - } -#if 0 - // debug code - for ( TQStringList::Iterator it = tmpDirs.begin() ; it != tmpDirs.end() ; ++it ) - kdDebug(7018) << " atimeDir: " << *it << endl; -#endif - } - - bool restoreAccessTime( const TQString & file ) const - { - TQString dir = file.left( file.findRev( '/' ) ); - bool res = tmpDirs.contains( dir ); - //kdDebug(7018) << "restoreAccessTime " << file << " dir=" << dir << " result=" << res << endl; - return res; - } - TQStringList tmpDirs; -}; - -/* current config */ -struct config_rec { - bool followLinks; - TQString resultBuf; - int accuracy; - - struct magic *magic, /* head of magic config list */ - *last; - KMimeMagicUtimeConf * utimeConf; -}; - -#ifdef MIME_MAGIC_DEBUG_TABLE -static void -test_table() -{ - struct magic *m; - struct magic *prevm = NULL; - - kdDebug(7018) << "test_table : started" << endl; - for (m = conf->magic; m; m = m->next) { - if (isprint((((unsigned long) m) >> 24) & 255) && - isprint((((unsigned long) m) >> 16) & 255) && - isprint((((unsigned long) m) >> 8) & 255) && - isprint(((unsigned long) m) & 255)) { - //debug("test_table: POINTER CLOBBERED! " - //"m=\"%c%c%c%c\" line=%d", - (((unsigned long) m) >> 24) & 255, - (((unsigned long) m) >> 16) & 255, - (((unsigned long) m) >> 8) & 255, - ((unsigned long) m) & 255, - prevm ? prevm->lineno : -1); - break; - } - prevm = m; - } -} -#endif - -#define EATAB {while (isascii((unsigned char) *l) && \ - isspace((unsigned char) *l)) ++l;} - -int KMimeMagic::parse_line(char *line, int *rule, int lineno) -{ - int ws_offset; - - /* delete newline */ - if (line[0]) { - line[strlen(line) - 1] = '\0'; - } - /* skip leading whitespace */ - ws_offset = 0; - while (line[ws_offset] && isspace(line[ws_offset])) { - ws_offset++; - } - - /* skip blank lines */ - if (line[ws_offset] == 0) { - return 0; - } - /* comment, do not parse */ - if (line[ws_offset] == '#') - return 0; - - /* if we get here, we're going to use it so count it */ - (*rule)++; - - /* parse it */ - return (parse(line + ws_offset, lineno) != 0); -} - -/* - * apprentice - load configuration from the magic file. - */ -int KMimeMagic::apprentice( const TQString& magicfile ) -{ - FILE *f; - char line[BUFSIZ + 1]; - int errs = 0; - int lineno; - int rule = 0; - TQCString fname; - - if (magicfile.isEmpty()) - return -1; - fname = TQFile::encodeName(magicfile); - f = fopen(fname, "r"); - if (f == NULL) { - kdError(7018) << "can't read magic file " << fname.data() << ": " << strerror(errno) << endl; - return -1; - } - - /* parse it */ - for (lineno = 1; fgets(line, BUFSIZ, f) != NULL; lineno++) - if (parse_line(line, &rule, lineno)) - errs++; - - fclose(f); - -#ifdef DEBUG_APPRENTICE - kdDebug(7018) << "apprentice: conf=" << conf << " file=" << magicfile << " m=" << (conf->magic ? "set" : "NULL") << " m->next=" << ((conf->magic && conf->magic->next) ? "set" : "NULL") << " last=" << (conf->last ? "set" : "NULL") << endl; - kdDebug(7018) << "apprentice: read " << lineno << " lines, " << rule << " rules, " << errs << " errors" << endl; -#endif - -#ifdef MIME_MAGIC_DEBUG_TABLE - test_table(); -#endif - - return (errs ? -1 : 0); -} - -int KMimeMagic::buff_apprentice(char *buff) -{ - char line[BUFSIZ + 2]; - int errs = 0; - int lineno = 1; - char *start = buff; - char *end; - int count = 0; - int rule = 0; - int len = strlen(buff) + 1; - - /* parse it */ - do { - count = (len > BUFSIZ-1)?BUFSIZ-1:len; - strncpy(line, start, count); - line[count] = '\0'; - if ((end = strchr(line, '\n'))) { - *(++end) = '\0'; - count = strlen(line); - } else - strcat(line, "\n"); - start += count; - len -= count; - if (parse_line(line, &rule, lineno)) - errs++; - lineno++; - } while (len > 0); - -#ifdef DEBUG_APPRENTICE - kdDebug(7018) << "buff_apprentice: conf=" << conf << " m=" << (conf->magic ? "set" : "NULL") << " m->next=" << ((conf->magic && conf->magic->next) ? "set" : "NULL") << " last=" << (conf->last ? "set" : "NULL") << endl; - kdDebug(7018) << "buff_apprentice: read " << lineno << " lines, " << rule << " rules, " << errs << " errors" << endl; -#endif - -#ifdef MIME_MAGIC_DEBUG_TABLE - test_table(); -#endif - - return (errs ? -1 : 0); -} - -/* - * extend the sign bit if the comparison is to be signed - */ -static unsigned long -signextend(struct magic *m, unsigned long v) -{ - if (!(m->flag & UNSIGNED)) - switch (m->type) { - /* - * Do not remove the casts below. They are vital. - * When later compared with the data, the sign - * extension must have happened. - */ - case BYTE: - v = (char) v; - break; - case SHORT: - case BESHORT: - case LESHORT: - v = (short) v; - break; - case DATE: - case BEDATE: - case LEDATE: - case LONG: - case BELONG: - case LELONG: - v = (long) v; - break; - case STRING: - break; - default: - kdError(7018) << "" << "signextend" << ": can't happen: m->type=" << m->type << endl; - return 998; //good value - } - return v; -} - -/* - * parse one line from magic file, put into magic[index++] if valid - */ -int KMimeMagic::parse(char *l, int -#ifdef DEBUG_LINENUMBERS - lineno -#endif - ) -{ - int i = 0; - struct magic *m; - char *t, - *s; - /* allocate magic structure entry */ - if ((m = (struct magic *) calloc(1, sizeof(struct magic))) == NULL) { - kdError(7018) << "parse: Out of memory." << endl; - return -1; - } - /* append to linked list */ - m->next = NULL; - if (!conf->magic || !conf->last) { - conf->magic = conf->last = m; - } else { - conf->last->next = m; - conf->last = m; - } - - /* set values in magic structure */ - m->flag = 0; - m->cont_level = 0; -#ifdef DEBUG_LINENUMBERS - m->lineno = lineno; -#endif - - while (*l == '>') { - ++l; /* step over */ - m->cont_level++; - } - - if (m->cont_level != 0 && *l == '(') { - ++l; /* step over */ - m->flag |= INDIR; - } - /* get offset, then skip over it */ - m->offset = (int) strtol(l, &t, 0); - if (l == t) { - kdError(7018) << "parse: offset " << l << " invalid" << endl; - } - l = t; - - if (m->flag & INDIR) { - m->in.type = LONG; - m->in.offset = 0; - /* - * read [.lbs][+-]nnnnn) - */ - if (*l == '.') { - switch (*++l) { - case 'l': - m->in.type = LONG; - break; - case 's': - m->in.type = SHORT; - break; - case 'b': - m->in.type = BYTE; - break; - default: - kdError(7018) << "parse: indirect offset type " << *l << " invalid" << endl; - break; +class KMimeMagicUtimeConf { + public: + KMimeMagicUtimeConf() { + tmpDirs << TQString::fromLatin1("/tmp"); // default value + + // The trick is that we also don't want the user to override globally set + // directories. So we have to misuse TDEStandardDirs :} + TQStringList confDirs = TDEGlobal::dirs()->resourceDirs( "config" ); + if ( !confDirs.isEmpty() ) { + TQString globalConf = confDirs.last() + "kmimemagicrc"; + if ( TQFile::exists( globalConf ) ) { + KSimpleConfig cfg( globalConf ); + cfg.setGroup( "Settings" ); + tmpDirs = cfg.readListEntry( "atimeDirs" ); + } + if ( confDirs.count() > 1 ) { + TQString localConf = confDirs.first() + "kmimemagicrc"; + if ( TQFile::exists( localConf ) ) { + KSimpleConfig cfg( localConf ); + cfg.setGroup( "Settings" ); + tmpDirs += cfg.readListEntry( "atimeDirs" ); + } + } + for ( TQStringList::Iterator it = tmpDirs.begin() ; it != tmpDirs.end() ; ++it ) { + TQString dir = *it; + if ( !dir.isEmpty() && dir[ dir.length()-1 ] != '/' ) { + (*it) += '/'; + } + } } - l++; + #if 0 + // debug code + for ( TQStringList::Iterator it = tmpDirs.begin() ; it != tmpDirs.end() ; ++it ) { + kdDebug(7018) << " atimeDir: " << *it << endl; + } + #endif } - s = l; - if (*l == '+' || *l == '-') - l++; - if (isdigit((unsigned char) *l)) { - m->in.offset = strtol(l, &t, 0); - if (*s == '-') - m->in.offset = -m->in.offset; - } else - t = l; - if (*t++ != ')') { - kdError(7018) << "parse: missing ')' in indirect offset" << endl; + + bool restoreAccessTime( const TQString & file ) const { + TQString dir = file.left( file.findRev( '/' ) ); + bool res = tmpDirs.contains( dir ); + //kdDebug(7018) << "restoreAccessTime " << file << " dir=" << dir << " result=" << res << endl; + return res; } - l = t; - } - while (isascii((unsigned char) *l) && isdigit((unsigned char) *l)) - ++l; - EATAB; - -#define NBYTE 4 -#define NSHORT 5 -#define NLONG 4 -#define NSTRING 6 -#define NDATE 4 -#define NBESHORT 7 -#define NBELONG 6 -#define NBEDATE 6 -#define NLESHORT 7 -#define NLELONG 6 -#define NLEDATE 6 + TQStringList tmpDirs; +}; - if (*l == 'u') { - ++l; - m->flag |= UNSIGNED; +TQString fixupMagicOutput(TQString &mime) { + if (mime == "inode/x-empty") { + return MIME_BINARY_ZEROSIZE; } - /* get type, skip it */ - if (strncmp(l, "byte", NBYTE) == 0) { - m->type = BYTE; - l += NBYTE; - } else if (strncmp(l, "short", NSHORT) == 0) { - m->type = SHORT; - l += NSHORT; - } else if (strncmp(l, "long", NLONG) == 0) { - m->type = LONG; - l += NLONG; - } else if (strncmp(l, "string", NSTRING) == 0) { - m->type = STRING; - l += NSTRING; - } else if (strncmp(l, "date", NDATE) == 0) { - m->type = DATE; - l += NDATE; - } else if (strncmp(l, "beshort", NBESHORT) == 0) { - m->type = BESHORT; - l += NBESHORT; - } else if (strncmp(l, "belong", NBELONG) == 0) { - m->type = BELONG; - l += NBELONG; - } else if (strncmp(l, "bedate", NBEDATE) == 0) { - m->type = BEDATE; - l += NBEDATE; - } else if (strncmp(l, "leshort", NLESHORT) == 0) { - m->type = LESHORT; - l += NLESHORT; - } else if (strncmp(l, "lelong", NLELONG) == 0) { - m->type = LELONG; - l += NLELONG; - } else if (strncmp(l, "ledate", NLEDATE) == 0) { - m->type = LEDATE; - l += NLEDATE; - } else { - kdError(7018) << "parse: type " << l << " invalid" << endl; - return -1; + else if (mime.contains("no read permission")) { + return MIME_BINARY_UNREADABLE; } - /* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */ - if (*l == '&') { - ++l; - m->mask = signextend(m, strtol(l, &l, 0)); - } else - m->mask = (unsigned long) ~0L; - EATAB; - - switch (*l) { - case '>': - case '<': - /* Old-style anding: "0 byte &0x80 dynamically linked" */ - case '&': - case '^': - case '=': - m->reln = *l; - ++l; - break; - case '!': - if (m->type != STRING) { - m->reln = *l; - ++l; - break; - } - /* FALL THROUGH */ - default: - if (*l == 'x' && isascii((unsigned char) l[1]) && - isspace((unsigned char) l[1])) { - m->reln = *l; - ++l; - goto GetDesc; /* Bill The Cat */ - } - m->reln = '='; - break; + else { + return mime; } - EATAB; - - if (getvalue(m, &l)) - return -1; - /* - * now get last part - the description - */ - GetDesc: - EATAB; - if (l[0] == '\b') { - ++l; - m->nospflag = 1; - } else if ((l[0] == '\\') && (l[1] == 'b')) { - ++l; - ++l; - m->nospflag = 1; - } else - m->nospflag = 0; - // Copy description - until EOL or '#' (for comments) - while (*l != '\0' && *l != '#' && i < MAXDESC-1) - m->desc[i++] = *l++; - m->desc[i] = '\0'; - // Remove trailing spaces - while (--i>0 && isspace( m->desc[i] )) - m->desc[i] = '\0'; - - // old code - //while ((m->desc[i++] = *l++) != '\0' && i < MAXDESC) /* NULLBODY */ ; - -#ifdef DEBUG_APPRENTICE - kdDebug(7018) << "parse: line=" << lineno << " m=" << m << " next=" << m->next << " cont=" << m->cont_level << " desc=" << (m->desc ? m->desc : "NULL") << endl; -#endif - return 0; -} - -/* - * Read a numeric value from a pointer, into the value union of a magic - * pointer, according to the magic type. Update the string pointer to point - * just after the number read. Return 0 for success, non-zero for failure. - */ -static int -getvalue(struct magic *m, char **p) -{ - int slen; - - if (m->type == STRING) { - *p = getstr(*p, m->value.s, sizeof(m->value.s), &slen); - m->vallen = slen; - } else if (m->reln != 'x') - m->value.l = signextend(m, strtol(*p, p, 0)); - return 0; } -/* - * Convert a string containing C character escapes. Stop at an unescaped - * space or tab. Copy the converted version to "p", returning its length in - * *slen. Return updated scan pointer as function result. - */ -static char * -getstr(register char *s, register char *p, int plen, int *slen) -{ - char *origs = s, - *origp = p; - char *pmax = p + plen - 1; - register int c; - register int val; - - while ((c = *s++) != '\0') { - if (isspace((unsigned char) c)) - break; - if (p >= pmax) { - kdError(7018) << "String too long: " << origs << endl; - break; - } - if (c == '\\') { - switch (c = *s++) { - - case '\0': - goto out; - - default: - *p++ = (char) c; - break; - - case 'n': - *p++ = '\n'; - break; - - case 'r': - *p++ = '\r'; - break; - - case 'b': - *p++ = '\b'; - break; - - case 't': - *p++ = '\t'; - break; - - case 'f': - *p++ = '\f'; - break; - - case 'v': - *p++ = '\v'; - break; - - /* \ and up to 3 octal digits */ - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - val = c - '0'; - c = *s++; /* try for 2 */ - if (c >= '0' && c <= '7') { - val = (val << 3) | (c - '0'); - c = *s++; /* try for 3 */ - if (c >= '0' && c <= '7') - val = (val << 3) | (c - '0'); - else - --s; - } else - --s; - *p++ = (char) val; - break; - - /* \x and up to 3 hex digits */ - case 'x': - val = 'x'; /* Default if no digits */ - c = hextoint(*s++); /* Get next char */ - if (c >= 0) { - val = c; - c = hextoint(*s++); - if (c >= 0) { - val = (val << 4) + c; - c = hextoint(*s++); - if (c >= 0) { - val = (val << 4) + c; - } else - --s; - } else - --s; - } else - --s; - *p++ = (char) val; - break; - } - } else - *p++ = (char) c; - } - out: - *p = '\0'; - *slen = p - origp; - //for ( char* foo = origp; foo < p ; ++foo ) - // kdDebug(7018) << " " << *foo << endl; - return s; -} +/* current config */ +struct config_rec { + bool followLinks; + TQString resultBuf; + int accuracy; + magic_t magic; + TQStringList databases; -/* Single hex char to int; -1 if not a hex char. */ -static int -hextoint(int c) -{ - if (!isascii((unsigned char) c)) - return -1; - if (isdigit((unsigned char) c)) - return c - '0'; - if ((c >= 'a') && (c <= 'f')) - return c + 10 - 'a'; - if ((c >= 'A') && (c <= 'F')) - return c + 10 - 'A'; - return -1; -} + KMimeMagicUtimeConf * utimeConf; +}; /* - * Convert the byte order of the data we are looking at + * apprentice - load configuration from the magic file. */ -static int -mconvert(union VALUETYPE *p, struct magic *m) -{ - switch (m->type) { - case BYTE: - return 1; - case STRING: - /* Null terminate */ - p->s[sizeof(p->s) - 1] = '\0'; - return 1; -#ifndef WORDS_BIGENDIAN - case SHORT: -#endif - case BESHORT: - p->h = (short) ((p->hs[0] << 8) | (p->hs[1])); - return 1; -#ifndef WORDS_BIGENDIAN - case LONG: - case DATE: -#endif - case BELONG: - case BEDATE: - p->l = (long) - ((p->hl[0] << 24) | (p->hl[1] << 16) | (p->hl[2] << 8) | (p->hl[3])); - return 1; -#ifdef WORDS_BIGENDIAN - case SHORT: -#endif - case LESHORT: - p->h = (short) ((p->hs[1] << 8) | (p->hs[0])); - return 1; -#ifdef WORDS_BIGENDIAN - case LONG: - case DATE: -#endif - case LELONG: - case LEDATE: - p->l = (long) - ((p->hl[3] << 24) | (p->hl[2] << 16) | (p->hl[1] << 8) | (p->hl[0])); - return 1; - default: - kdError(7018) << "mconvert: invalid type " << m->type << endl; - return 0; - } -} - - -static int -mget(union VALUETYPE *p, unsigned char *s, struct magic *m, - int nbytes) -{ - long offset = m->offset; - switch ( m->type ) - { - case BYTE: - if ( offset + 1 > nbytes-1 ) // nbytes = (size of file) + 1 - return 0; - break; - case SHORT: - case BESHORT: - case LESHORT: - if ( offset + 2 > nbytes-1 ) - return 0; - break; - case LONG: - case BELONG: - case LELONG: - case DATE: - case BEDATE: - case LEDATE: - if ( offset + 4 > nbytes-1 ) - return 0; - break; - case STRING: - break; - } - -// The file length might be < sizeof(union VALUETYPE) (David) -// -> pad with zeros (the 'file' command does it this way) -// Thanks to Stan Covington for detailed report - if (offset + (int)sizeof(union VALUETYPE) > nbytes) - { - int have = nbytes - offset; - memset(p, 0, sizeof(union VALUETYPE)); - if (have > 0) - memcpy(p, s + offset, have); - } else - memcpy(p, s + offset, sizeof(union VALUETYPE)); - - if (!mconvert(p, m)) - return 0; - - if (m->flag & INDIR) { - - switch (m->in.type) { - case BYTE: - offset = p->b + m->in.offset; - break; - case SHORT: - offset = p->h + m->in.offset; - break; - case LONG: - offset = p->l + m->in.offset; - break; - } - - if (offset + (int)sizeof(union VALUETYPE) > nbytes) - return 0; - - memcpy(p, s + offset, sizeof(union VALUETYPE)); - - if (!mconvert(p, m)) - return 0; - } - return 1; -} - -static int -mcheck(union VALUETYPE *p, struct magic *m) -{ - register unsigned long l = m->value.l; - register unsigned long v; - int matched; - - if ((m->value.s[0] == 'x') && (m->value.s[1] == '\0')) { - kdError(7018) << "BOINK" << endl; - return 1; - } - switch (m->type) { - case BYTE: - v = p->b; - break; - - case SHORT: - case BESHORT: - case LESHORT: - v = p->h; - break; - - case LONG: - case BELONG: - case LELONG: - case DATE: - case BEDATE: - case LEDATE: - v = p->l; - break; - - case STRING: - l = 0; - /* - * What we want here is: v = strncmp(m->value.s, p->s, - * m->vallen); but ignoring any nulls. bcmp doesn't give - * -/+/0 and isn't universally available anyway. - */ - v = 0; - { - register unsigned char *a = (unsigned char *) m->value.s; - register unsigned char *b = (unsigned char *) p->s; - register int len = m->vallen; - Q_ASSERT(len); - - while (--len >= 0) - if ((v = *b++ - *a++) != 0) - break; - } - break; - default: - kdError(7018) << "mcheck: invalid type " << m->type << endl; - return 0; /* NOTREACHED */ - } -#if 0 - tqDebug("Before signextend %08x", v); -#endif - v = signextend(m, v) & m->mask; -#if 0 - tqDebug("After signextend %08x", v); -#endif - - switch (m->reln) { - case 'x': - matched = 1; - break; - - case '!': - matched = v != l; - break; - - case '=': - matched = v == l; - break; - - case '>': - if (m->flag & UNSIGNED) - matched = v > l; - else - matched = (long) v > (long) l; - break; - - case '<': - if (m->flag & UNSIGNED) - matched = v < l; - else - matched = (long) v < (long) l; - break; - - case '&': - matched = (v & l) == l; - break; - - case '^': - matched = (v & l) != l; - break; - - default: - matched = 0; - kdError(7018) << "mcheck: can't happen: invalid relation " << m->reln << "." << endl; - break; /* NOTREACHED */ - } - - return matched; +int KMimeMagic::apprentice( const TQString& magicfile ) { + conf->databases.clear(); + conf->databases.append(magicfile); + return magic_load(conf->magic, conf->databases[0].latin1()); } /* @@ -1351,867 +169,121 @@ mcheck(union VALUETYPE *p, struct magic *m) * fixed-size buffer to begin processing the contents. */ -void process(struct config_rec* conf, const TQString & fn) -{ - int fd = 0; - unsigned char buf[HOWMANY + 1]; /* one extra for terminating '\0' */ +void process(struct config_rec* conf, const TQString & fn) { KDE_struct_stat sb; - int nbytes = 0; /* number of bytes read from a datafile */ - int tagbytes = 0; /* size of prefixed tag */ TQCString fileName = TQFile::encodeName( fn ); - /* - * first try judging the file based on its filesystem status - */ - if (fsmagic(conf, fileName, &sb) != 0) { - //resultBuf += "\n"; - return; - } - if ((fd = KDE_open(fileName, O_RDONLY)) < 0) { - /* We can't open it, but we were able to stat it. */ - /* - * if (sb.st_mode & 0002) addResult("writable, "); - * if (sb.st_mode & 0111) addResult("executable, "); - */ - //kdDebug(7018) << "can't read `" << fn << "' (" << strerror(errno) << ")." << endl; - conf->resultBuf = MIME_BINARY_UNREADABLE; - return; - } - /* - * try looking at the first HOWMANY bytes - */ - if ((nbytes = read(fd, (char *) buf, HOWMANY)) == -1) { - kdError(7018) << "" << fn << " read failed (" << strerror(errno) << ")." << endl; - conf->resultBuf = MIME_BINARY_UNREADABLE; - (void)close(fd); - return; - } - if ((tagbytes = tagmagic(buf, nbytes))) { - // Read buffer at new position - lseek(fd, tagbytes, SEEK_SET); - nbytes = read(fd, (char*)buf, HOWMANY); - if (nbytes < 0) { - conf->resultBuf = MIME_BINARY_UNREADABLE; - (void)close(fd); - return; - } - } - if (nbytes == 0) { - conf->resultBuf = MIME_BINARY_ZEROSIZE; - } else { - buf[nbytes++] = '\0'; /* null-terminate it */ - tryit(conf, buf, nbytes); - } - - if ( conf->utimeConf && conf->utimeConf->restoreAccessTime( fn ) ) - { - /* - * Try to restore access, modification times if read it. - * This changes the "change" time (ctime), but we can't do anything - * about that. - */ - struct utimbuf utbuf; - utbuf.actime = sb.st_atime; - utbuf.modtime = sb.st_mtime; - (void) utime(fileName, &utbuf); - } - (void) close(fd); -} - - -static void tryit(struct config_rec* conf, unsigned char *buf, int nb) -{ - /* try tests in /etc/magic (or surrogate magic file) */ - if (match(conf, buf, nb)) - return; - - /* try known keywords, check for ascii-ness too. */ - if (ascmagic(conf, buf, nb) == 1) - return; - - /* see if it's plain text */ - if (textmagic(conf, buf, nb)) - return; - - /* abandon hope, all ye who remain here */ - conf->resultBuf = MIME_BINARY_UNKNOWN; - conf->accuracy = 0; -} - -static int -fsmagic(struct config_rec* conf, const char *fn, KDE_struct_stat *sb) -{ - int ret = 0; - - /* - * Fstat is cheaper but fails for files you don't have read perms on. - * On 4.2BSD and similar systems, use lstat() to identify symlinks. - */ - ret = KDE_lstat(fn, sb); /* don't merge into if; see "ret =" above */ - - if (ret) { - return 1; - - } - /* - * if (sb->st_mode & S_ISUID) resultBuf += "setuid "; - * if (sb->st_mode & S_ISGID) resultBuf += "setgid "; - * if (sb->st_mode & S_ISVTX) resultBuf += "sticky "; - */ - - switch (sb->st_mode & S_IFMT) { - case S_IFDIR: - conf->resultBuf = MIME_INODE_DIR; - return 1; - case S_IFCHR: - conf->resultBuf = MIME_INODE_CDEV; - return 1; - case S_IFBLK: - conf->resultBuf = MIME_INODE_BDEV; - return 1; - /* TODO add code to handle V7 MUX and Blit MUX files */ -#ifdef S_IFIFO - case S_IFIFO: - conf->resultBuf = MIME_INODE_FIFO; - return 1; -#endif -#ifdef S_IFLNK - case S_IFLNK: - { - char buf[BUFSIZ + BUFSIZ + 4]; - register int nch; - KDE_struct_stat tstatbuf; - - if ((nch = readlink(fn, buf, BUFSIZ - 1)) <= 0) { - conf->resultBuf = MIME_INODE_LINK; - //conf->resultBuf += "\nunreadable"; - return 1; - } - buf[nch] = '\0'; /* readlink(2) forgets this */ - /* If broken symlink, say so and quit early. */ - if (*buf == '/') { - if (KDE_stat(buf, &tstatbuf) < 0) { - conf->resultBuf = MIME_INODE_LINK; - //conf->resultBuf += "\nbroken"; - return 1; - } - } else { - char *tmp; - char buf2[BUFSIZ + BUFSIZ + 4]; - - strncpy(buf2, fn, BUFSIZ); - buf2[BUFSIZ] = 0; - - if ((tmp = strrchr(buf2, '/')) == NULL) { - tmp = buf; /* in current dir */ - } else { - /* dir part plus (rel.) link */ - *++tmp = '\0'; - strcat(buf2, buf); - tmp = buf2; - } - if (KDE_stat(tmp, &tstatbuf) < 0) { - conf->resultBuf = MIME_INODE_LINK; - //conf->resultBuf += "\nbroken"; - return 1; - } else - strcpy(buf, tmp); - } - if (conf->followLinks) - process( conf, TQFile::decodeName( buf ) ); - else - conf->resultBuf = MIME_INODE_LINK; - return 1; - } - return 1; -#endif -#ifdef S_IFSOCK -#ifndef __COHERENT__ - case S_IFSOCK: - conf->resultBuf = MIME_INODE_SOCK; - return 1; -#endif -#endif - case S_IFREG: - break; - default: - kdError(7018) << "KMimeMagic::fsmagic: invalid mode 0" << sb->st_mode << "." << endl; - /* NOTREACHED */ - } - - /* - * regular file, check next possibility - */ - if (sb->st_size == 0) { - conf->resultBuf = MIME_BINARY_ZEROSIZE; - return 1; - } - return 0; -} - -/* - * Go through the whole list, stopping if you find a match. Process all the - * continuations of that match before returning. - * - * We support multi-level continuations: - * - * At any time when processing a successful top-level match, there is a current - * continuation level; it represents the level of the last successfully - * matched continuation. - * - * Continuations above that level are skipped as, if we see one, it means that - * the continuation that controls them - i.e, the lower-level continuation - * preceding them - failed to match. - * - * Continuations below that level are processed as, if we see one, it means - * we've finished processing or skipping higher-level continuations under the - * control of a successful or unsuccessful lower-level continuation, and are - * now seeing the next lower-level continuation and should process it. The - * current continuation level reverts to the level of the one we're seeing. - * - * Continuations at the current level are processed as, if we see one, there's - * no lower-level continuation that may have failed. - * - * If a continuation matches, we bump the current continuation level so that - * higher-level continuations are processed. - */ -static int -match(struct config_rec* conf, unsigned char *s, int nbytes) -{ - int cont_level = 0; - union VALUETYPE p; - struct magic *m; - -#ifdef DEBUG_MIMEMAGIC - kdDebug(7018) << "match: conf=" << conf << " m=" << (conf->magic ? "set" : "NULL") << " m->next=" << ((conf->magic && conf->magic->next) ? "set" : "NULL") << " last=" << (conf->last ? "set" : "NULL") << endl; - for (m = conf->magic; m; m = m->next) { - if (isprint((((unsigned long) m) >> 24) & 255) && - isprint((((unsigned long) m) >> 16) & 255) && - isprint((((unsigned long) m) >> 8) & 255) && - isprint(((unsigned long) m) & 255)) { - kdDebug(7018) << "match: POINTER CLOBBERED! " << endl; - break; - } + int magic_flags = MAGIC_CONTINUE|MAGIC_ERROR|MAGIC_MIME_TYPE/*|MAGIC_DEBUG*/; + if (conf->followLinks) { + magic_flags |= MAGIC_SYMLINK; } -#endif - - for (m = conf->magic; m; m = m->next) { -#ifdef DEBUG_MIMEMAGIC - kdDebug(7018) << "match: line=" << m->lineno << " desc=" << m->desc << endl; -#endif - memset(&p, 0, sizeof(union VALUETYPE)); - - /* check if main entry matches */ - if (!mget(&p, s, m, nbytes) || - !mcheck(&p, m)) { - struct magic *m_cont; - - /* - * main entry didn't match, flush its continuations - */ - if (!m->next || (m->next->cont_level == 0)) { - continue; - } - m_cont = m->next; - while (m_cont && (m_cont->cont_level != 0)) { -#ifdef DEBUG_MIMEMAGIC - kdDebug(7018) << "match: line=" << m->lineno << " cont=" << m_cont->cont_level << " mc=" << m_cont->lineno << " mc->next=" << m_cont << " " << endl; -#endif - /* - * this trick allows us to keep *m in sync - * when the continue advances the pointer - */ - m = m_cont; - m_cont = m_cont->next; - } - continue; - } - /* if we get here, the main entry rule was a match */ - /* this will be the last run through the loop */ -#ifdef DEBUG_MIMEMAGIC - kdDebug(7018) << "match: rule matched, line=" << m->lineno << " type=" << m->type << " " << ((m->type == STRING) ? m->value.s : "") << endl; -#endif + magic_setflags(conf->magic, magic_flags); + conf->resultBuf = TQString(magic_file(conf->magic, fileName)); + conf->resultBuf = fixupMagicOutput(conf->resultBuf); - /* remember the match */ - conf->resultBuf = m->desc; - - cont_level++; + if ( conf->utimeConf && conf->utimeConf->restoreAccessTime( fn ) ) { /* - * while (m && m->next && m->next->cont_level != 0 && ( m = - * m->next )) - */ - m = m->next; - while (m && (m->cont_level != 0)) { -#ifdef DEBUG_MIMEMAGIC - kdDebug(7018) << "match: line=" << m->lineno << " cont=" << m->cont_level << " type=" << m->type << " " << ((m->type == STRING) ? m->value.s : "") << endl; -#endif - if (cont_level >= m->cont_level) { - if (cont_level > m->cont_level) { - /* - * We're at the end of the level - * "cont_level" continuations. - */ - cont_level = m->cont_level; - } - if (mget(&p, s, m, nbytes) && - mcheck(&p, m)) { - /* - * This continuation matched. Print - * its message, with a blank before - * it if the previous item printed - * and this item isn't empty. - */ -#ifdef DEBUG_MIMEMAGIC - kdDebug(7018) << "continuation matched" << endl; -#endif - conf->resultBuf = m->desc; - cont_level++; - } - } - /* move to next continuation record */ - m = m->next; + * Try to restore access, modification times if read it. + * This changes the "change" time (ctime), but we can't do anything + * about that. + */ + struct utimbuf utbuf; + utbuf.actime = sb.st_atime; + utbuf.modtime = sb.st_mtime; + (void) utime(fileName, &utbuf); + } +} + +KMimeMagic::KMimeMagic() { + // Magic file detection init + TQString mimefile = locate( "mime", "magic" ); + init( mimefile ); + // Add snippets from share/config/magic/* + TQStringList snippets = TDEGlobal::dirs()->findAllResources( "config", "magic/*.magic", true ); + for ( TQStringList::Iterator it = snippets.begin() ; it != snippets.end() ; ++it ) { + if ( !mergeConfig( *it ) ) { + kdWarning() << k_funcinfo << "Failed to parse " << *it << endl; } - // KDE-specific: need an actual mimetype for a real match - // If we only matched a rule with continuations but no mimetype, it's not a match - if ( !conf->resultBuf.isEmpty() ) - { -#ifdef DEBUG_MIMEMAGIC - kdDebug(7018) << "match: matched" << endl; -#endif - return 1; /* all through */ - } } -#ifdef DEBUG_MIMEMAGIC - kdDebug(7018) << "match: failed" << endl; -#endif - return 0; /* no match at all */ } -// Try to parse prefixed tags before matching on content -// Sofar only ID3v2 tags (<=.4) are handled -static int tagmagic(unsigned char *buf, int nbytes) -{ - if(nbytes<40) return 0; - if(buf[0] == 'I' && buf[1] == 'D' && buf[2] == '3') { - int size = 10; - // Sanity (known version, no unknown flags) - if(buf[3] > 4) return 0; - if(buf[5] & 0x0F) return 0; - // Tag has v4 footer - if(buf[5] & 0x10) size += 10; - // Calculated syncsafe size - size += buf[9]; - size += buf[8] << 7; - size += buf[7] << 14; - size += buf[6] << 21; - return size; - } - return 0; -} - -struct Token { - char *data; - int length; -}; - -struct Tokenizer -{ - Tokenizer(char* buf, int nbytes) { - data = buf; - length = nbytes; - pos = 0; - } - bool isNewLine() { - return newline; - } - Token* nextToken() { - if (pos == 0) - newline = true; - else - newline = false; - token.data = data+pos; - token.length = 0; - while(pos': - if (token.length == 0) token.data++; - else - return &token; - break; - default: - token.length++; - } - pos++; - } - return &token; - } - -private: - Token token; - char* data; - int length; - int pos; - bool newline; -}; - - -/* an optimization over plain strcmp() */ -//#define STREQ(a, b) (*(a) == *(b) && strcmp((a), (b)) == 0) -static inline bool STREQ(const Token *token, const char *b) { - const char *a = token->data; - int len = token->length; - if (a == b) return true; - while(*a && *b && len > 0) { - if (*a != *b) return false; - a++; b++; len--; - } - return (len == 0 && *b == 0); -} - -static int ascmagic(struct config_rec* conf, unsigned char *buf, int nbytes) -{ - int i; - double pct, maxpct, pctsum; - double pcts[NTYPES]; - int mostaccurate, tokencount; - int typeset, jonly, conly, jconly, objconly, cpponly; - int has_escapes = 0; - //unsigned char *s; - //char nbuf[HOWMANY + 1]; /* one extra for terminating '\0' */ - - /* these are easy, do them first */ - conf->accuracy = 70; - - /* - * for troff, look for . + letter + letter or .\"; this must be done - * to disambiguate tar archives' ./file and other trash from real - * troff input. - */ - if (*buf == '.') { - unsigned char *tp = buf + 1; - - while (isascii(*tp) && isspace(*tp)) - ++tp; /* skip leading whitespace */ - if ((isascii(*tp) && (isalnum(*tp) || *tp == '\\') && - isascii(*(tp + 1)) && (isalnum(*(tp + 1)) || *tp == '"'))) { - conf->resultBuf = MIME_APPL_TROFF; - return 1; - } - } - if ((*buf == 'c' || *buf == 'C') && - isascii(*(buf + 1)) && isspace(*(buf + 1))) { - /* Fortran */ - conf->resultBuf = MIME_TEXT_FORTRAN; - return 1; - } - assert(nbytes-1 < HOWMANY + 1); - /* look for tokens - this is expensive! */ - has_escapes = (memchr(buf, '\033', nbytes) != NULL); - Tokenizer tokenizer((char*)buf, nbytes); - const Token* token; - bool linecomment = false, blockcomment = false; - const struct names *p; - int typecount[NTYPES]; -/* - * Fritz: - * Try a little harder on C/C++/Java. - */ - memset(&typecount, 0, sizeof(typecount)); - typeset = 0; - jonly = 0; - conly = 0; - jconly = 0; - objconly = 0; - cpponly = 0; - tokencount = 0; - bool foundClass = false; // mandatory for java - // first collect all possible types and count matches - // we stop at '>' too, because of "blah" on HTML pages - while ((token = tokenizer.nextToken())->length > 0) { -#ifdef DEBUG_MIMEMAGIC - kdDebug(7018) << "KMimeMagic::ascmagic token=" << token << endl; -#endif - if (linecomment && tokenizer.isNewLine()) - linecomment = false; - if (blockcomment && STREQ(token, "*/")) { - blockcomment = false; - continue; - } - for (p = names; p->name ; p++) { - if (STREQ(token, p->name)) { -#ifdef DEBUG_MIMEMAGIC - kdDebug(7018) << "KMimeMagic::ascmagic token matches ! name=" << p->name << " type=" << p->type << endl; -#endif - tokencount++; - typeset |= p->type; - if(p->type & (L_C|L_CPP|L_JAVA|L_OBJC)) { - if (linecomment || blockcomment) { - continue; - } - else { - switch(p->type & (L_C|L_CPP|L_JAVA|L_OBJC)) - { - case L_JAVA: - jonly++; - break; - case L_OBJC: - objconly++; - break; - case L_CPP: - cpponly++; - break; - case (L_CPP|L_JAVA): - jconly++; - if ( !foundClass && STREQ(token, "class") ) - foundClass = true; - break; - case (L_C|L_CPP): - conly++; - break; - default: - if (STREQ(token, "//")) linecomment = true; - if (STREQ(token, "/*")) blockcomment = true; - } - } - } - for (i = 0; i < (int)NTYPES; i++) { - if ((1 << i) & p->type) typecount[i]+= p->type & FLAG_STRONG ? 2 : 1; - } - } - } - } - - if (typeset & (L_C|L_CPP|L_JAVA|L_OBJC)) { - conf->accuracy = 60; - if (!(typeset & ~(L_C|L_CPP|L_JAVA|L_OBJC))) { -#ifdef DEBUG_MIMEMAGIC - kdDebug(7018) << "C/C++/Java/ObjC: jonly=" << jonly << " conly=" << conly << " jconly=" << jconly << " objconly=" << objconly << endl; -#endif - if (jonly > 1 && foundClass) { - // At least two java-only tokens have matched, including "class" - conf->resultBuf = TQString(types[P_JAVA].type); - return 1; - } - if (jconly > 1) { - // At least two non-C (only C++ or Java) token have matched. - if (typecount[P_JAVA] < typecount[P_CPP]) - conf->resultBuf = TQString(types[P_CPP].type); - else - conf->resultBuf = TQString(types[P_JAVA].type); - return 1; - } - if (conly + cpponly > 1) { - // Either C or C++. - if (cpponly > 0) - conf->resultBuf = TQString(types[P_CPP].type); - else - conf->resultBuf = TQString(types[P_C].type); - return 1; - } - if (objconly > 0) { - conf->resultBuf = TQString(types[P_OBJC].type); - return 1; - } - } - } - - /* Neither C, C++ or Java (or all of them without able to distinguish): - * Simply take the token-class with the highest - * matchcount > 0 - */ - mostaccurate = -1; - maxpct = pctsum = 0.0; - for (i = 0; i < (int)NTYPES; i++) { - if (typecount[i] > 1) { // one word is not enough, we need at least two - pct = (double)typecount[i] / (double)types[i].kwords * - (double)types[i].weight; - pcts[i] = pct; - pctsum += pct; - if (pct > maxpct) { - maxpct = pct; - mostaccurate = i; - } -#ifdef DEBUG_MIMEMAGIC - kdDebug(7018) << "" << types[i].type << " has " << typecount[i] << " hits, " << types[i].kwords << " kw, weight " << types[i].weight << ", " << pct << " -> max = " << maxpct << "\n" << endl; -#endif - } - } - if (mostaccurate >= 0) { - if ( mostaccurate != P_JAVA || foundClass ) // 'class' mandatory for java - { - conf->accuracy = (int)(pcts[mostaccurate] / pctsum * 60); -#ifdef DEBUG_MIMEMAGIC - kdDebug(7018) << "mostaccurate=" << mostaccurate << " pcts=" << pcts[mostaccurate] << " pctsum=" << pctsum << " accuracy=" << conf->accuracy << endl; -#endif - conf->resultBuf = TQString(types[mostaccurate].type); - return 1; - } - } - - switch (is_tar(buf, nbytes)) { - case 1: - /* V7 tar archive */ - conf->resultBuf = MIME_APPL_TAR; - conf->accuracy = 90; - return 1; - case 2: - /* POSIX tar archive */ - conf->resultBuf = MIME_APPL_TAR; - conf->accuracy = 90; - return 1; - } - - for (i = 0; i < nbytes; i++) { - if (!isascii(*(buf + i))) - return 0; /* not all ascii */ - } - - /* all else fails, but it is ascii... */ - conf->accuracy = 90; - if (has_escapes) { - /* text with escape sequences */ - /* we leave this open for further differentiation later */ - conf->resultBuf = MIME_TEXT_UNKNOWN; - } else { - /* plain text */ - conf->resultBuf = MIME_TEXT_PLAIN; - } - return 1; -} - - -/* This code is taken from the "file" command, where it is licensed - * in the "beer-ware license" :-) - * Original author: - * Simplified by David Faure to avoid the static array char[256]. - * Drastically simplified by Laurent Dard for the Trinity Desktop Environment - * Configuration files with big lines are still text files: - * line length checking is now avoided here. - */ -static int textmagic(struct config_rec* conf, unsigned char * buf, int nbytes) -{ - int i; - unsigned char *cp; - - nbytes--; - - /* Look whether there are "unreasonable" characters. */ - for (i = 0, cp = buf; i < nbytes; i++, cp++) - if ((*cp < 8) || (*cp>13 && *cp<32 && *cp!=27 ) || (*cp==0x7F)) - return 0; - - conf->resultBuf = MIME_TEXT_PLAIN; - return 1; +KMimeMagic::KMimeMagic(const TQString & _configfile) { + init( _configfile ); } - -/* - * is_tar() -- figure out whether file is a tar archive. - * - * Stolen (by author of file utility) from the public domain tar program: Public - * Domain version written 26 Aug 1985 John Gilmore (ihnp4!hoptoad!gnu). - * - * @(#)list.c 1.18 9/23/86 Public Domain - gnu $Id: mod_mime_magic.c,v 1.7 - * 1997/06/24 00:41:02 ikluft Exp ikluft $ - * - * Comments changed and some code/comments reformatted for file command by Ian - * Darwin. - */ - -#define isodigit(c) ( ((c) >= '0') && ((c) <= '7') ) - -/* - * Return 0 if the checksum is bad (i.e., probably not a tar archive), 1 for - * old UNIX tar file, 2 for Unix Std (POSIX) tar file. - */ - -static int -is_tar(unsigned char *buf, int nbytes) -{ - register union record *header = (union record *) buf; - register int i; - register long sum, - recsum; - register char *p; - - if (nbytes < (int)sizeof(union record)) - return 0; - - recsum = from_oct(8, header->header.chksum); - - sum = 0; - p = header->charptr; - for (i = sizeof(union record); --i >= 0;) { - /* - * We can't use unsigned char here because of old compilers, - * e.g. V7. - */ - sum += 0xFF & *p++; - } - - /* Adjust checksum to count the "chksum" field as blanks. */ - for (i = sizeof(header->header.chksum); --i >= 0;) - sum -= 0xFF & header->header.chksum[i]; - sum += ' ' * sizeof header->header.chksum; - - if (sum != recsum) - return 0; /* Not a tar archive */ - - if (0 == strcmp(header->header.magic, TMAGIC)) - return 2; /* Unix Standard tar archive */ - - return 1; /* Old fashioned tar archive */ -} - - -/* - * Quick and dirty octal conversion. - * - * Result is -1 if the field is invalid (all blank, or nonoctal). - */ -static long -from_oct(int digs, char *where) -{ - register long value; - - while (isspace(*where)) { /* Skip spaces */ - where++; - if (--digs <= 0) - return -1; /* All blank field */ - } - value = 0; - while (digs > 0 && isodigit(*where)) { /* Scan til nonoctal */ - value = (value << 3) | (*where++ - '0'); - --digs; - } - - if (digs > 0 && *where && !isspace(*where)) - return -1; /* Ended on non-space/nul */ - - return value; -} - -KMimeMagic::KMimeMagic() -{ - // Magic file detection init - TQString mimefile = locate( "mime", "magic" ); - init( mimefile ); - // Add snippets from share/config/magic/* - TQStringList snippets = TDEGlobal::dirs()->findAllResources( "config", "magic/*.magic", true ); - for ( TQStringList::Iterator it = snippets.begin() ; it != snippets.end() ; ++it ) - if ( !mergeConfig( *it ) ) - kdWarning() << k_funcinfo << "Failed to parse " << *it << endl; -} - -KMimeMagic::KMimeMagic(const TQString & _configfile) -{ - init( _configfile ); -} - -void KMimeMagic::init( const TQString& _configfile ) -{ +void KMimeMagic::init( const TQString& _configfile ) { int result; conf = new config_rec; - /* set up the magic list (empty) */ - conf->magic = conf->last = NULL; + /* initialize libmagic */ + conf->magic = magic_open(MAGIC_MIME_TYPE); magicResult = NULL; conf->followLinks = false; conf->utimeConf = 0L; // created on demand /* on the first time through we read the magic file */ result = apprentice(_configfile); - if (result == -1) + if (result == -1) { return; -#ifdef MIME_MAGIC_DEBUG_TABLE - test_table(); -#endif + } } /* * The destructor. * Free the magic-table and other resources. */ -KMimeMagic::~KMimeMagic() -{ +KMimeMagic::~KMimeMagic() { if (conf) { - struct magic *p = conf->magic; - struct magic *q; - while (p) { - q = p; - p = p->next; - free(q); - } + magic_close(conf->magic); delete conf->utimeConf; delete conf; } delete magicResult; } -bool -KMimeMagic::mergeConfig(const TQString & _configfile) -{ - kdDebug(7018) << k_funcinfo << _configfile << endl; - int result; - - if (_configfile.isEmpty()) - return false; - result = apprentice(_configfile); - if (result == -1) { +bool KMimeMagic::mergeConfig(const TQString & _configfile) { + conf->databases.append(_configfile); + TQString merged_databases = conf->databases.join(":"); + if (magic_load(conf->magic, merged_databases.latin1()) == 0) { + return true; + } + else { return false; } -#ifdef MIME_MAGIC_DEBUG_TABLE - test_table(); -#endif - return true; } -void -KMimeMagic::setFollowLinks( bool _enable ) -{ +void KMimeMagic::setFollowLinks( bool _enable ) { conf->followLinks = _enable; } -KMimeMagicResult * -KMimeMagic::findBufferType(const TQByteArray &array) -{ - unsigned char buf[HOWMANY + 1]; /* one extra for terminating '\0' */ - +KMimeMagicResult *KMimeMagic::findBufferType(const TQByteArray &array) { conf->resultBuf = TQString::null; - if ( !magicResult ) - magicResult = new KMimeMagicResult(); + if ( !magicResult ) { + magicResult = new KMimeMagicResult(); + } magicResult->setInvalid(); conf->accuracy = 100; int nbytes = array.size(); - - if (nbytes > HOWMANY) - nbytes = HOWMANY; - memcpy(buf, array.data(), nbytes); - if (nbytes == 0) { - conf->resultBuf = MIME_BINARY_ZEROSIZE; - } else { - buf[nbytes++] = '\0'; /* null-terminate it */ - tryit(conf, buf, nbytes); - } - /* if we have any results, put them in the request structure */ + if (nbytes == 0) { + conf->resultBuf = MIME_BINARY_ZEROSIZE; + } + else { + int magic_flags = MAGIC_CONTINUE|MAGIC_ERROR|MAGIC_MIME_TYPE/*|MAGIC_DEBUG*/; + if (conf->followLinks) { + magic_flags |= MAGIC_SYMLINK; + } + magic_setflags(conf->magic, magic_flags); + conf->resultBuf = TQString(magic_buffer(conf->magic, array.data(), nbytes)); + conf->resultBuf = fixupMagicOutput(conf->resultBuf); + } + /* if we have any results, put them in the request structure */ magicResult->setMimeType(conf->resultBuf.stripWhiteSpace()); magicResult->setAccuracy(conf->accuracy); - return magicResult; + return magicResult; } -static void -refineResult(KMimeMagicResult *r, const TQString & _filename) -{ +static void refineResult(KMimeMagicResult *r, const TQString & _filename) { TQString tmp = r->mimeType(); if (tmp.isEmpty()) return; @@ -2246,10 +318,7 @@ refineResult(KMimeMagicResult *r, const TQString & _filename) } } -KMimeMagicResult * -KMimeMagic::findBufferFileType( const TQByteArray &data, - const TQString &fn) -{ +KMimeMagicResult *KMimeMagic::findBufferFileType( const TQByteArray &data, const TQString &fn) { KMimeMagicResult * r = findBufferType( data ); refineResult(r, fn); return r; @@ -2258,28 +327,29 @@ KMimeMagic::findBufferFileType( const TQByteArray &data, /* * Find the content-type of the given file. */ -KMimeMagicResult* KMimeMagic::findFileType(const TQString & fn) -{ +KMimeMagicResult* KMimeMagic::findFileType(const TQString & fn) { #ifdef DEBUG_MIMEMAGIC - kdDebug(7018) << "KMimeMagic::findFileType " << fn << endl; + kdDebug(7018) << "KMimeMagic::findFileType " << fn << endl; #endif - conf->resultBuf = TQString::null; - - if ( !magicResult ) - magicResult = new KMimeMagicResult(); + conf->resultBuf = TQString::null; + + if ( !magicResult ) { + magicResult = new KMimeMagicResult(); + } magicResult->setInvalid(); conf->accuracy = 100; - - if ( !conf->utimeConf ) - conf->utimeConf = new KMimeMagicUtimeConf(); - - /* process it based on the file contents */ - process(conf, fn ); - - /* if we have any results, put them in the request structure */ - //finishResult(); + + if ( !conf->utimeConf ) { + conf->utimeConf = new KMimeMagicUtimeConf(); + } + + /* process it based on the file contents */ + process(conf, fn ); + + /* if we have any results, put them in the request structure */ + //finishResult(); magicResult->setMimeType(conf->resultBuf.stripWhiteSpace()); magicResult->setAccuracy(conf->accuracy); refineResult(magicResult, fn); - return magicResult; + return magicResult; } diff --git a/tdeio/tdeio/kmimemagic.h b/tdeio/tdeio/kmimemagic.h index d812650f2..1dce2c7af 100644 --- a/tdeio/tdeio/kmimemagic.h +++ b/tdeio/tdeio/kmimemagic.h @@ -43,7 +43,7 @@ class KMimeMagic; // see below (read this one first) * It contains the mimetype and the encoding of * the file or buffer read. */ -class TDEIO_EXPORT_DEPRECATED KMimeMagicResult +class TDEIO_EXPORT KMimeMagicResult { public: KMimeMagicResult() { m_iAccuracy = 100; } @@ -98,7 +98,7 @@ protected: * * The result is contained in the class KMimeMagicResult. */ -class TDEIO_EXPORT_DEPRECATED KMimeMagic +class TDEIO_EXPORT KMimeMagic { public: /** -- cgit v1.2.3