00001 
00002 
00003 
00004 
00005 
00006 
00007 
00008 
00009 
00010 
00011 
00012 
00013 
00014 
00015 
00016 
00017 
00018 #include "kmimemagic.h"
00019 #include <kdebug.h>
00020 #include <kapplication.h>
00021 #include <qfile.h>
00022 #include <ksimpleconfig.h>
00023 #include <kstandarddirs.h>
00024 #include <kstaticdeleter.h>
00025 #include <klargefile.h>
00026 #include <assert.h>
00027 
00028 static int fsmagic(struct config_rec* conf, const char *fn, KDE_struct_stat *sb);
00029 static void process(struct config_rec* conf,  const QString &);
00030 static int ascmagic(struct config_rec* conf, unsigned char *buf, int nbytes);
00031 static int tagmagic(unsigned char *buf, int nbytes);
00032 static int textmagic(struct config_rec* conf, unsigned char *, int);
00033 
00034 static void tryit(struct config_rec* conf, unsigned char *buf, int nb);
00035 static int match(struct config_rec* conf, unsigned char *, int);
00036 
00037 KMimeMagic* KMimeMagic::s_pSelf;
00038 static KStaticDeleter<KMimeMagic> kmimemagicsd;
00039 
00040 KMimeMagic* KMimeMagic::self()
00041 {
00042   if( !s_pSelf )
00043     initStatic();
00044   return s_pSelf;
00045 }
00046 
00047 void KMimeMagic::initStatic()
00048 {
00049   s_pSelf = kmimemagicsd.setObject( s_pSelf, new KMimeMagic() );
00050   s_pSelf->setFollowLinks( true );
00051 }
00052 
00053 #include <stdio.h>
00054 #include <unistd.h>
00055 #include <stdlib.h>
00056 #include <sys/wait.h>
00057 #include <sys/types.h>
00058 #include <sys/stat.h>
00059 #include <fcntl.h>
00060 #include <errno.h>
00061 #include <ctype.h>
00062 #include <time.h>
00063 #include <utime.h>
00064 #include <stdarg.h>
00065 #include <qregexp.h>
00066 #include <qstring.h>
00067 
00068 
00069 
00070 
00071 
00072 
00073 
00074 
00075 #if (defined DEBUG_MIMEMAGIC || defined DEBUG_APPRENTICE)
00076 #define DEBUG_LINENUMBERS
00077 #endif
00078 
00079 
00080 
00081 
00082 #define DECLINED 999
00083 #define ERROR    998
00084 #define OK         0
00085 
00086 
00087 
00088 
00089 #define MIME_BINARY_UNKNOWN    "application/octet-stream"
00090 #define MIME_BINARY_UNREADABLE "application/x-unreadable"
00091 #define MIME_BINARY_ZEROSIZE   "application/x-zerosize"
00092 #define MIME_TEXT_UNKNOWN      "text/plain"
00093 #define MIME_TEXT_PLAIN        "text/plain"
00094 #define MIME_INODE_DIR         "inode/directory"
00095 #define MIME_INODE_CDEV        "inode/chardevice"
00096 #define MIME_INODE_BDEV        "inode/blockdevice"
00097 #define MIME_INODE_FIFO        "inode/fifo"
00098 #define MIME_INODE_LINK        "inode/link"
00099 #define MIME_INODE_SOCK        "inode/socket"
00100 
00101 #define MIME_APPL_TROFF        "application/x-troff"
00102 #define MIME_APPL_TAR          "application/x-tar"
00103 #define MIME_TEXT_FORTRAN      "text/x-fortran"
00104 
00105 #define MAXMIMESTRING        256
00106 
00107 #define HOWMANY 1024            
00108 #define MAXDESC   50            
00109 #define MAXstring 64            
00110 
00111 typedef union VALUETYPE {
00112     unsigned char b;
00113     unsigned short h;
00114     unsigned long l;
00115     char s[MAXstring];
00116     unsigned char hs[2];    
00117     unsigned char hl[4];    
00118 } VALUETYPE;
00119 
00120 struct magic {
00121     struct magic *next;     
00122 #ifdef DEBUG_LINENUMBERS
00123     int lineno;             
00124 #endif
00125 
00126     short flag;
00127 #define INDIR    1              
00128 #define UNSIGNED 2              
00129     short cont_level;       
00130     struct {
00131         char type;      
00132         long offset;    
00133     } in;
00134     long offset;            
00135     unsigned char reln;     
00136     char type;              
00137     char vallen;            
00138 #define BYTE       1
00139 #define SHORT      2
00140 #define LONG       4
00141 #define STRING     5
00142 #define DATE       6
00143 #define BESHORT    7
00144 #define BELONG     8
00145 #define BEDATE     9
00146 #define LESHORT   10
00147 #define LELONG    11
00148 #define LEDATE    12
00149     VALUETYPE value;        
00150     unsigned long mask;     
00151     char nospflag;          
00152 
00153     
00154     char desc[MAXDESC];     
00155 };
00156 
00157 
00158 
00159 
00160 
00161 
00162 
00163 
00164 
00165 
00166 
00167 
00168 
00169 
00170 
00171 #define RECORDSIZE    512
00172 #define NAMSIZ    100
00173 #define TUNMLEN    32
00174 #define TGNMLEN    32
00175 
00176 union record {
00177     char charptr[RECORDSIZE];
00178     struct header {
00179         char name[NAMSIZ];
00180         char mode[8];
00181         char uid[8];
00182         char gid[8];
00183         char size[12];
00184         char mtime[12];
00185         char chksum[8];
00186         char linkflag;
00187         char linkname[NAMSIZ];
00188         char magic[8];
00189         char uname[TUNMLEN];
00190         char gname[TGNMLEN];
00191         char devmajor[8];
00192         char devminor[8];
00193     } header;
00194 };
00195 
00196 
00197 #define    TMAGIC        "ustar  "  
00198 
00199 
00200 
00201 
00202 static int is_tar(unsigned char *, int);
00203 static unsigned long signextend(struct magic *, unsigned long);
00204 static int getvalue(struct magic *, char **);
00205 static int hextoint(int);
00206 static char *getstr(char *, char *, int, int *);
00207 static int mget(union VALUETYPE *, unsigned char *, struct magic *, int);
00208 static int mcheck(union VALUETYPE *, struct magic *);
00209 static int mconvert(union VALUETYPE *, struct magic *);
00210 static long from_oct(int, char *);
00211 
00212 
00213 
00214 
00215 
00216 
00217 
00218 
00219 
00220 
00221 
00222 
00223 
00224 
00225 
00226 #define L_HTML   0x001          
00227 #define L_C      0x002          
00228 #define L_MAKE   0x004          
00229 #define L_PLI    0x008          
00230 #define L_MACH   0x010          
00231 #define L_PAS    0x020          
00232 #define L_JAVA   0x040          
00233 #define L_CPP    0x080          
00234 #define L_MAIL   0x100          
00235 #define L_NEWS   0x200          
00236 #define L_DIFF   0x400          
00237 
00238 #define P_HTML   0          
00239 #define P_C      1          
00240 #define P_MAKE   2          
00241 #define P_PLI    3          
00242 #define P_MACH   4          
00243 #define P_PAS    5          
00244 #define P_JAVA   6          
00245 #define P_CPP    7          
00246 #define P_MAIL   8          
00247 #define P_NEWS   9          
00248 #define P_DIFF  10          
00249 
00250 typedef struct asc_type {
00251     const char *type;
00252     int  kwords;
00253     double  weight;
00254 } asc_type;
00255 
00256 static const asc_type types[] = {
00257     { "text/html",          19, 2 }, 
00258     { "text/x-c",           9, 1.3 },
00259     { "text/x-makefile",    4, 1.9 },
00260     { "text/x-pli",         1, 3 },
00261     { "text/x-assembler",   6, 2.1 },
00262     { "text/x-pascal",      1, 1 },
00263     { "text/x-java",       14, 1 },
00264     { "text/x-c++",        14, 1 },
00265     { "message/rfc822",     4, 1.9 },
00266     { "message/news",       3, 2 },
00267         { "text/x-diff",        4, 2 }
00268 };
00269 
00270 #define NTYPES (sizeof(types)/sizeof(asc_type))
00271 
00272 static struct names {
00273     const char *name;
00274     short type;
00275 } const names[] = {
00276     {
00277         "<html", L_HTML
00278     },
00279     {
00280         "<HTML", L_HTML
00281     },
00282     {
00283         "<head", L_HTML
00284     },
00285     {
00286         "<HEAD", L_HTML
00287     },
00288     {
00289         "<body", L_HTML
00290     },
00291     {
00292         "<BODY", L_HTML
00293     },
00294     {
00295         "<title", L_HTML
00296     },
00297     {
00298         "<TITLE", L_HTML
00299     },
00300     {
00301         "<h1", L_HTML
00302     },
00303     {
00304         "<H1", L_HTML
00305     },
00306     {
00307         "<a", L_HTML
00308     },
00309     {
00310         "<A", L_HTML
00311     },
00312     {
00313         "<img", L_HTML
00314     },
00315     {
00316         "<IMG", L_HTML
00317     },
00318     {
00319         "<!--", L_HTML
00320     },
00321     {
00322         "<!doctype", L_HTML
00323     },
00324     {
00325         "<!DOCTYPE", L_HTML
00326     },
00327     {
00328         "<div", L_HTML
00329     },
00330     {
00331         "<DIV", L_HTML
00332     },
00333     {
00334         "<frame", L_HTML
00335     },
00336     {
00337         "<FRAME", L_HTML
00338     },
00339     {
00340         "<frameset", L_HTML
00341     },
00342     {
00343         "<FRAMESET", L_HTML
00344     },
00345         {
00346                 "<script", L_HTML
00347         },
00348         {
00349                 "<SCRIPT", L_HTML
00350         },
00351     {
00352         "/*", L_C|L_CPP|L_JAVA
00353     },                      
00354     {
00355         "//", L_CPP|L_JAVA
00356     },                      
00357     {
00358         "#include", L_C|L_CPP
00359     },
00360     {
00361         "char", L_C|L_CPP|L_JAVA
00362     },
00363     {
00364         "double", L_C|L_CPP|L_JAVA
00365     },
00366     {
00367         "extern", L_C|L_CPP
00368     },
00369     {
00370         "float", L_C|L_CPP|L_JAVA
00371     },
00372     {
00373         "real", L_C|L_CPP|L_JAVA
00374     },
00375     {
00376         "struct", L_C|L_CPP
00377     },
00378     {
00379         "union", L_C|L_CPP
00380     },
00381     {
00382         "implements", L_JAVA
00383     },
00384     {
00385         "super", L_JAVA
00386     },
00387     {
00388         "import", L_JAVA
00389     },
00390     {
00391         "class", L_CPP|L_JAVA
00392     },
00393     {
00394         "public", L_CPP|L_JAVA
00395     },
00396     {
00397         "private", L_CPP|L_JAVA
00398     },
00399     {
00400         "CFLAGS", L_MAKE
00401     },
00402     {
00403         "LDFLAGS", L_MAKE
00404     },
00405     {
00406         "all:", L_MAKE
00407     },
00408     {
00409         ".PHONY:", L_MAKE
00410     },
00411     {
00412         "srcdir", L_MAKE
00413     },
00414     {
00415         "exec_prefix", L_MAKE
00416     },
00417     
00418 
00419 
00420 
00421     {
00422         ".ascii", L_MACH
00423     },
00424     {
00425         ".asciiz", L_MACH
00426     },
00427     {
00428         ".byte", L_MACH
00429     },
00430     {
00431         ".even", L_MACH
00432     },
00433     {
00434         ".globl", L_MACH
00435     },
00436     {
00437         "clr", L_MACH
00438     },
00439     {
00440         "(input", L_PAS
00441     },
00442     {
00443         "dcl", L_PLI
00444     },
00445     {
00446         "Received:", L_MAIL
00447     },
00448     
00449 
00450 
00451     {
00452         "Return-Path:", L_MAIL
00453     },
00454     {
00455         "Cc:", L_MAIL
00456     },
00457     {
00458         "Newsgroups:", L_NEWS
00459     },
00460     {
00461         "Path:", L_NEWS
00462     },
00463     {
00464         "Organization:", L_NEWS
00465     },
00466     {
00467         "---", L_DIFF
00468     },
00469     {
00470         "+++", L_DIFF
00471     },
00472     {
00473         "***", L_DIFF
00474     },
00475     {
00476         "@@", L_DIFF
00477     },
00478     {
00479         NULL, 0
00480     }
00481 };
00482 
00493 class KMimeMagicUtimeConf
00494 {
00495 public:
00496     KMimeMagicUtimeConf()
00497     {
00498         tmpDirs << QString::fromLatin1("/tmp"); 
00499 
00500         
00501         
00502         QStringList confDirs = KGlobal::dirs()->resourceDirs( "config" );
00503         if ( !confDirs.isEmpty() )
00504         {
00505             QString globalConf = confDirs.last() + "kmimemagicrc";
00506             if ( QFile::exists( globalConf ) )
00507             {
00508                 KSimpleConfig cfg( globalConf );
00509                 cfg.setGroup( "Settings" );
00510                 tmpDirs = cfg.readListEntry( "atimeDirs" );
00511             }
00512             if ( confDirs.count() > 1 )
00513             {
00514                 QString localConf = confDirs.first() + "kmimemagicrc";
00515                 if ( QFile::exists( localConf ) )
00516                 {
00517                     KSimpleConfig cfg( localConf );
00518                     cfg.setGroup( "Settings" );
00519                     tmpDirs += cfg.readListEntry( "atimeDirs" );
00520                 }
00521             }
00522             for ( QStringList::Iterator it = tmpDirs.begin() ; it != tmpDirs.end() ; ++it )
00523             {
00524                 QString dir = *it;
00525                 if ( !dir.isEmpty() && dir[ dir.length()-1 ] != '/' )
00526                     (*it) += '/';
00527             }
00528         }
00529 #if 0
00530         
00531         for ( QStringList::Iterator it = tmpDirs.begin() ; it != tmpDirs.end() ; ++it )
00532             kdDebug(7018) << " atimeDir: " << *it << endl;
00533 #endif
00534     }
00535 
00536     bool restoreAccessTime( const QString & file ) const
00537     {
00538         QString dir = file.left( file.findRev( '/' ) );
00539         bool res = tmpDirs.contains( dir );
00540         
00541         return res;
00542     }
00543     QStringList tmpDirs;
00544 };
00545 
00546 
00547 struct config_rec {
00548     bool followLinks;
00549     QString resultBuf;
00550     int accuracy;
00551 
00552     struct magic *magic,    
00553     *last;
00554     KMimeMagicUtimeConf * utimeConf;
00555 };
00556 
00557 #ifdef MIME_MAGIC_DEBUG_TABLE
00558 static void
00559 test_table()
00560 {
00561     struct magic *m;
00562     struct magic *prevm = NULL;
00563 
00564     kdDebug(7018) << "test_table : started" << endl;
00565     for (m = conf->magic; m; m = m->next) {
00566         if (isprint((((unsigned long) m) >> 24) & 255) &&
00567             isprint((((unsigned long) m) >> 16) & 255) &&
00568             isprint((((unsigned long) m) >> 8) & 255) &&
00569             isprint(((unsigned long) m) & 255)) {
00570             
00571             
00572                   (((unsigned long) m) >> 24) & 255,
00573                   (((unsigned long) m) >> 16) & 255,
00574                   (((unsigned long) m) >> 8) & 255,
00575                   ((unsigned long) m) & 255,
00576                   prevm ? prevm->lineno : -1);
00577             break;
00578         }
00579         prevm = m;
00580     }
00581 }
00582 #endif
00583 
00584 #define    EATAB {while (isascii((unsigned char) *l) && \
00585           isspace((unsigned char) *l))  ++l;}
00586 
00587 int KMimeMagic::parse_line(char *line, int *rule, int lineno)
00588 {
00589     int ws_offset;
00590 
00591     
00592     if (line[0]) {
00593         line[strlen(line) - 1] = '\0';
00594     }
00595     
00596     ws_offset = 0;
00597     while (line[ws_offset] && isspace(line[ws_offset])) {
00598         ws_offset++;
00599     }
00600 
00601     
00602     if (line[ws_offset] == 0) {
00603         return 0;
00604     }
00605     
00606     if (line[ws_offset] == '#')
00607         return 0;
00608 
00609     
00610     (*rule)++;
00611 
00612     
00613     return (parse(line + ws_offset, lineno) != 0);
00614 }
00615 
00616 
00617 
00618 
00619 int KMimeMagic::apprentice( const QString& magicfile )
00620 {
00621     FILE *f;
00622     char line[BUFSIZ + 1];
00623     int errs = 0;
00624     int lineno;
00625     int rule = 0;
00626     QCString fname;
00627 
00628     if (magicfile.isEmpty())
00629         return -1;
00630     fname = QFile::encodeName(magicfile);
00631     f = fopen(fname, "r");
00632     if (f == NULL) {
00633         kdError(7018) << "can't read magic file " << fname.data() << ": " << strerror(errno) << endl;
00634         return -1;
00635     }
00636 
00637     
00638     for (lineno = 1; fgets(line, BUFSIZ, f) != NULL; lineno++)
00639         if (parse_line(line, &rule, lineno))
00640             errs++;
00641 
00642     fclose(f);
00643 
00644 #ifdef DEBUG_APPRENTICE
00645     kdDebug(7018) << "apprentice: conf=" << conf << " file=" << magicfile << " m=" << (conf->magic ? "set" : "NULL") << " m->next=" << ((conf->magic && conf->magic->next) ? "set" : "NULL") << " last=" << (conf->last ? "set" : "NULL") << endl;
00646     kdDebug(7018) << "apprentice: read " << lineno << " lines, " << rule << " rules, " << errs << " errors" << endl;
00647 #endif
00648 
00649 #ifdef MIME_MAGIC_DEBUG_TABLE
00650     test_table();
00651 #endif
00652 
00653     return (errs ? -1 : 0);
00654 }
00655 
00656 int KMimeMagic::buff_apprentice(char *buff)
00657 {
00658     char line[BUFSIZ + 2];
00659     int errs = 0;
00660     int lineno = 1;
00661     char *start = buff;
00662     char *end;
00663     int count = 0;
00664     int rule = 0;
00665     int len = strlen(buff) + 1;
00666 
00667     
00668     do {
00669         count = (len > BUFSIZ-1)?BUFSIZ-1:len;
00670         strncpy(line, start, count);
00671         line[count] = '\0';
00672         if ((end = strchr(line, '\n'))) {
00673             *(++end) = '\0';
00674             count = strlen(line);
00675         } else
00676           strcat(line, "\n");
00677         start += count;
00678         len -= count;
00679         if (parse_line(line, &rule, lineno))
00680             errs++;
00681         lineno++;
00682     } while (len > 0);
00683 
00684 #ifdef DEBUG_APPRENTICE
00685     kdDebug(7018) << "buff_apprentice: conf=" << conf << " m=" << (conf->magic ? "set" : "NULL") << " m->next=" << ((conf->magic && conf->magic->next) ? "set" : "NULL") << " last=" << (conf->last ? "set" : "NULL") << endl;
00686     kdDebug(7018) << "buff_apprentice: read " << lineno << " lines, " << rule << " rules, " << errs << " errors" << endl;
00687 #endif
00688 
00689 #ifdef MIME_MAGIC_DEBUG_TABLE
00690     test_table();
00691 #endif
00692 
00693     return (errs ? -1 : 0);
00694 }
00695 
00696 
00697 
00698 
00699 static unsigned long
00700 signextend(struct magic *m, unsigned long v)
00701 {
00702     if (!(m->flag & UNSIGNED))
00703         switch (m->type) {
00704                 
00705 
00706 
00707 
00708 
00709             case BYTE:
00710                 v = (char) v;
00711                 break;
00712             case SHORT:
00713             case BESHORT:
00714             case LESHORT:
00715                 v = (short) v;
00716                 break;
00717             case DATE:
00718             case BEDATE:
00719             case LEDATE:
00720             case LONG:
00721             case BELONG:
00722             case LELONG:
00723                 v = (long) v;
00724                 break;
00725             case STRING:
00726                 break;
00727             default:
00728                 kdError(7018) << "" << "signextend" << ": can't happen: m->type=" << m->type << endl;
00729                 return ERROR;
00730         }
00731     return v;
00732 }
00733 
00734 
00735 
00736 
00737 int KMimeMagic::parse(char *l, int
00738 #ifdef DEBUG_LINENUMBERS
00739     lineno
00740 #endif
00741         )
00742 {
00743     int i = 0;
00744     struct magic *m;
00745     char *t,
00746     *s;
00747     
00748     if ((m = (struct magic *) calloc(1, sizeof(struct magic))) == NULL) {
00749         kdError(7018) << "parse: Out of memory." << endl;
00750         return -1;
00751     }
00752     
00753     m->next = NULL;
00754     if (!conf->magic || !conf->last) {
00755         conf->magic = conf->last = m;
00756     } else {
00757         conf->last->next = m;
00758         conf->last = m;
00759     }
00760 
00761     
00762     m->flag = 0;
00763     m->cont_level = 0;
00764 #ifdef DEBUG_LINENUMBERS
00765     m->lineno = lineno;
00766 #endif
00767 
00768     while (*l == '>') {
00769         ++l;            
00770         m->cont_level++;
00771     }
00772 
00773     if (m->cont_level != 0 && *l == '(') {
00774         ++l;            
00775         m->flag |= INDIR;
00776     }
00777     
00778     m->offset = (int) strtol(l, &t, 0);
00779     if (l == t) {
00780             kdError(7018) << "parse: offset " << l << " invalid" << endl;
00781     }
00782     l = t;
00783 
00784     if (m->flag & INDIR) {
00785         m->in.type = LONG;
00786         m->in.offset = 0;
00787         
00788 
00789 
00790         if (*l == '.') {
00791             switch (*++l) {
00792                 case 'l':
00793                     m->in.type = LONG;
00794                     break;
00795                 case 's':
00796                     m->in.type = SHORT;
00797                     break;
00798                 case 'b':
00799                     m->in.type = BYTE;
00800                     break;
00801                 default:
00802                     kdError(7018) << "parse: indirect offset type " << *l << " invalid" << endl;
00803                     break;
00804             }
00805             l++;
00806         }
00807         s = l;
00808         if (*l == '+' || *l == '-')
00809             l++;
00810         if (isdigit((unsigned char) *l)) {
00811             m->in.offset = strtol(l, &t, 0);
00812             if (*s == '-')
00813                 m->in.offset = -m->in.offset;
00814         } else
00815             t = l;
00816         if (*t++ != ')') {
00817             kdError(7018) << "parse: missing ')' in indirect offset" << endl;
00818         }
00819         l = t;
00820     }
00821     while (isascii((unsigned char) *l) && isdigit((unsigned char) *l))
00822         ++l;
00823     EATAB;
00824 
00825 #define NBYTE       4
00826 #define NSHORT      5
00827 #define NLONG       4
00828 #define NSTRING     6
00829 #define NDATE       4
00830 #define NBESHORT    7
00831 #define NBELONG     6
00832 #define NBEDATE     6
00833 #define NLESHORT    7
00834 #define NLELONG     6
00835 #define NLEDATE     6
00836 
00837     if (*l == 'u') {
00838         ++l;
00839         m->flag |= UNSIGNED;
00840     }
00841     
00842     if (strncmp(l, "byte", NBYTE) == 0) {
00843         m->type = BYTE;
00844         l += NBYTE;
00845     } else if (strncmp(l, "short", NSHORT) == 0) {
00846         m->type = SHORT;
00847         l += NSHORT;
00848     } else if (strncmp(l, "long", NLONG) == 0) {
00849         m->type = LONG;
00850         l += NLONG;
00851     } else if (strncmp(l, "string", NSTRING) == 0) {
00852         m->type = STRING;
00853         l += NSTRING;
00854     } else if (strncmp(l, "date", NDATE) == 0) {
00855         m->type = DATE;
00856         l += NDATE;
00857     } else if (strncmp(l, "beshort", NBESHORT) == 0) {
00858         m->type = BESHORT;
00859         l += NBESHORT;
00860     } else if (strncmp(l, "belong", NBELONG) == 0) {
00861         m->type = BELONG;
00862         l += NBELONG;
00863     } else if (strncmp(l, "bedate", NBEDATE) == 0) {
00864         m->type = BEDATE;
00865         l += NBEDATE;
00866     } else if (strncmp(l, "leshort", NLESHORT) == 0) {
00867         m->type = LESHORT;
00868         l += NLESHORT;
00869     } else if (strncmp(l, "lelong", NLELONG) == 0) {
00870         m->type = LELONG;
00871         l += NLELONG;
00872     } else if (strncmp(l, "ledate", NLEDATE) == 0) {
00873         m->type = LEDATE;
00874         l += NLEDATE;
00875     } else {
00876         kdError(7018) << "parse: type " << l << " invalid" << endl;
00877         return -1;
00878     }
00879     
00880     if (*l == '&') {
00881         ++l;
00882         m->mask = signextend(m, strtol(l, &l, 0));
00883     } else
00884         m->mask = (unsigned long) ~0L;
00885     EATAB;
00886 
00887     switch (*l) {
00888         case '>':
00889         case '<':
00890             
00891         case '&':
00892         case '^':
00893         case '=':
00894             m->reln = *l;
00895             ++l;
00896             break;
00897         case '!':
00898             if (m->type != STRING) {
00899                 m->reln = *l;
00900                 ++l;
00901                 break;
00902             }
00903             
00904         default:
00905             if (*l == 'x' && isascii((unsigned char) l[1]) &&
00906                 isspace((unsigned char) l[1])) {
00907                 m->reln = *l;
00908                 ++l;
00909                 goto GetDesc;   
00910             }
00911             m->reln = '=';
00912             break;
00913     }
00914     EATAB;
00915 
00916     if (getvalue(m, &l))
00917         return -1;
00918     
00919 
00920 
00921       GetDesc:
00922     EATAB;
00923     if (l[0] == '\b') {
00924         ++l;
00925         m->nospflag = 1;
00926     } else if ((l[0] == '\\') && (l[1] == 'b')) {
00927         ++l;
00928         ++l;
00929         m->nospflag = 1;
00930     } else
00931         m->nospflag = 0;
00932         
00933         while (*l != '\0' && *l != '#' && i < MAXDESC-1)
00934             m->desc[i++] = *l++;
00935         m->desc[i] = '\0';
00936         
00937         while (--i>0 && isspace( m->desc[i] ))
00938             m->desc[i] = '\0';
00939 
00940         
00941     
00942 
00943 #ifdef DEBUG_APPRENTICE
00944     kdDebug(7018) << "parse: line=" << lineno << " m=" << m << " next=" << m->next << " cont=" << m->cont_level << " desc=" << (m->desc ? m->desc : "NULL") << endl;
00945 #endif
00946     return 0;
00947 }
00948 
00949 
00950 
00951 
00952 
00953 
00954 static int
00955 getvalue(struct magic *m, char **p)
00956 {
00957     int slen;
00958 
00959     if (m->type == STRING) {
00960         *p = getstr(*p, m->value.s, sizeof(m->value.s), &slen);
00961         m->vallen = slen;
00962     } else if (m->reln != 'x')
00963         m->value.l = signextend(m, strtol(*p, p, 0));
00964     return 0;
00965 }
00966 
00967 
00968 
00969 
00970 
00971 
00972 static char *
00973 getstr(register char *s, register char *p, int plen, int *slen)
00974 {
00975     char *origs = s,
00976     *origp = p;
00977     char *pmax = p + plen - 1;
00978     register int c;
00979     register int val;
00980 
00981     while ((c = *s++) != '\0') {
00982         if (isspace((unsigned char) c))
00983             break;
00984         if (p >= pmax) {
00985             kdError(7018) << "String too long: " << origs << endl;
00986             break;
00987         }
00988         if (c == '\\') {
00989             switch (c = *s++) {
00990 
00991                 case '\0':
00992                     goto out;
00993 
00994                 default:
00995                     *p++ = (char) c;
00996                     break;
00997 
00998                 case 'n':
00999                     *p++ = '\n';
01000                     break;
01001 
01002                 case 'r':
01003                     *p++ = '\r';
01004                     break;
01005 
01006                 case 'b':
01007                     *p++ = '\b';
01008                     break;
01009 
01010                 case 't':
01011                     *p++ = '\t';
01012                     break;
01013 
01014                 case 'f':
01015                     *p++ = '\f';
01016                     break;
01017 
01018                 case 'v':
01019                     *p++ = '\v';
01020                     break;
01021 
01022                     
01023                 case '0':
01024                 case '1':
01025                 case '2':
01026                 case '3':
01027                 case '4':
01028                 case '5':
01029                 case '6':
01030                 case '7':
01031                     val = c - '0';
01032                     c = *s++;   
01033                     if (c >= '0' && c <= '7') {
01034                         val = (val << 3) | (c - '0');
01035                         c = *s++;   
01036                         if (c >= '0' && c <= '7')
01037                             val = (val << 3) | (c - '0');
01038                         else
01039                             --s;
01040                     } else
01041                         --s;
01042                     *p++ = (char) val;
01043                     break;
01044 
01045                     
01046                 case 'x':
01047                     val = 'x';  
01048                     c = hextoint(*s++); 
01049                     if (c >= 0) {
01050                         val = c;
01051                         c = hextoint(*s++);
01052                         if (c >= 0) {
01053                             val = (val << 4) + c;
01054                             c = hextoint(*s++);
01055                             if (c >= 0) {
01056                                 val = (val << 4) + c;
01057                             } else
01058                                 --s;
01059                         } else
01060                             --s;
01061                     } else
01062                         --s;
01063                     *p++ = (char) val;
01064                     break;
01065             }
01066         } else
01067             *p++ = (char) c;
01068     }
01069       out:
01070     *p = '\0';
01071     *slen = p - origp;
01072     
01073     
01074     return s;
01075 }
01076 
01077 
01078 
01079 static int
01080 hextoint(int c)
01081 {
01082     if (!isascii((unsigned char) c))
01083         return -1;
01084     if (isdigit((unsigned char) c))
01085         return c - '0';
01086     if ((c >= 'a') && (c <= 'f'))
01087         return c + 10 - 'a';
01088     if ((c >= 'A') && (c <= 'F'))
01089         return c + 10 - 'A';
01090     return -1;
01091 }
01092 
01093 
01094 
01095 
01096 static int
01097 mconvert(union VALUETYPE *p, struct magic *m)
01098 {
01099     switch (m->type) {
01100         case BYTE:
01101             return 1;
01102         case STRING:
01103             
01104             p->s[sizeof(p->s) - 1] = '\0';
01105             return 1;
01106 #ifndef WORDS_BIGENDIAN
01107         case SHORT:
01108 #endif
01109         case BESHORT:
01110             p->h = (short) ((p->hs[0] << 8) | (p->hs[1]));
01111             return 1;
01112 #ifndef WORDS_BIGENDIAN
01113         case LONG:
01114         case DATE:
01115 #endif
01116         case BELONG:
01117         case BEDATE:
01118             p->l = (long)
01119                 ((p->hl[0] << 24) | (p->hl[1] << 16) | (p->hl[2] << 8) | (p->hl[3]));
01120             return 1;
01121 #ifdef WORDS_BIGENDIAN
01122         case SHORT:
01123 #endif
01124         case LESHORT:
01125             p->h = (short) ((p->hs[1] << 8) | (p->hs[0]));
01126             return 1;
01127 #ifdef WORDS_BIGENDIAN
01128         case LONG:
01129         case DATE:
01130 #endif
01131         case LELONG:
01132         case LEDATE:
01133             p->l = (long)
01134                 ((p->hl[3] << 24) | (p->hl[2] << 16) | (p->hl[1] << 8) | (p->hl[0]));
01135             return 1;
01136         default:
01137             kdError(7018) << "mconvert: invalid type " << m->type << endl;
01138             return 0;
01139     }
01140 }
01141 
01142 
01143 static int
01144 mget(union VALUETYPE *p, unsigned char *s, struct magic *m,
01145      int nbytes)
01146 {
01147     long offset = m->offset;
01148 
01149 
01150 
01151     if (offset + (int)sizeof(union VALUETYPE) > nbytes)
01152     {
01153       int have = nbytes - offset;
01154       memset(p, 0, sizeof(union VALUETYPE));
01155       if (have > 0)
01156         memcpy(p, s + offset, have);
01157     } else
01158       memcpy(p, s + offset, sizeof(union VALUETYPE));
01159 
01160     if (!mconvert(p, m))
01161         return 0;
01162 
01163     if (m->flag & INDIR) {
01164 
01165         switch (m->in.type) {
01166             case BYTE:
01167                 offset = p->b + m->in.offset;
01168                 break;
01169             case SHORT:
01170                 offset = p->h + m->in.offset;
01171                 break;
01172             case LONG:
01173                 offset = p->l + m->in.offset;
01174                 break;
01175         }
01176 
01177         if (offset + (int)sizeof(union VALUETYPE) > nbytes)
01178              return 0;
01179 
01180         memcpy(p, s + offset, sizeof(union VALUETYPE));
01181 
01182         if (!mconvert(p, m))
01183             return 0;
01184     }
01185     return 1;
01186 }
01187 
01188 static int
01189 mcheck(union VALUETYPE *p, struct magic *m)
01190 {
01191     register unsigned long l = m->value.l;
01192     register unsigned long v;
01193     int matched;
01194 
01195     if ((m->value.s[0] == 'x') && (m->value.s[1] == '\0')) {
01196         kdError(7018) << "BOINK" << endl;
01197         return 1;
01198     }
01199     switch (m->type) {
01200         case BYTE:
01201             v = p->b;
01202             break;
01203 
01204         case SHORT:
01205         case BESHORT:
01206         case LESHORT:
01207             v = p->h;
01208             break;
01209 
01210         case LONG:
01211         case BELONG:
01212         case LELONG:
01213         case DATE:
01214         case BEDATE:
01215         case LEDATE:
01216             v = p->l;
01217             break;
01218 
01219         case STRING:
01220             l = 0;
01221             
01222 
01223 
01224 
01225 
01226             v = 0;
01227             {
01228                 register unsigned char *a = (unsigned char *) m->value.s;
01229                 register unsigned char *b = (unsigned char *) p->s;
01230                 register int len = m->vallen;
01231                 Q_ASSERT(len);
01232 
01233                 while (--len >= 0)
01234                     if ((v = *b++ - *a++) != 0)
01235                         break;
01236             }
01237             break;
01238         default:
01239             kdError(7018) << "mcheck: invalid type " << m->type << endl;
01240             return 0;   
01241     }
01242 #if 0
01243     qDebug("Before signextend %08x", v);
01244 #endif
01245     v = signextend(m, v) & m->mask;
01246 #if 0
01247     qDebug("After signextend %08x", v);
01248 #endif
01249 
01250     switch (m->reln) {
01251         case 'x':
01252             matched = 1;
01253             break;
01254 
01255         case '!':
01256             matched = v != l;
01257             break;
01258 
01259         case '=':
01260             matched = v == l;
01261             break;
01262 
01263         case '>':
01264             if (m->flag & UNSIGNED)
01265                 matched = v > l;
01266             else
01267                 matched = (long) v > (long) l;
01268             break;
01269 
01270         case '<':
01271             if (m->flag & UNSIGNED)
01272                 matched = v < l;
01273             else
01274                 matched = (long) v < (long) l;
01275             break;
01276 
01277         case '&':
01278             matched = (v & l) == l;
01279             break;
01280 
01281         case '^':
01282             matched = (v & l) != l;
01283             break;
01284 
01285         default:
01286             matched = 0;
01287             kdError(7018) << "mcheck: can't happen: invalid relation " << m->reln << "." << endl;
01288             break;  
01289     }
01290 
01291     return matched;
01292 }
01293 
01294 #if 0
01295 
01296 
01297 typedef enum {
01298     rsl_leading_space, rsl_type, rsl_subtype, rsl_separator, rsl_encoding
01299 } rsl_states;
01300 
01301 
01302 int
01303 KMimeMagic::finishResult()
01304 {
01305     int cur_pos,            
01306      type_pos,              
01307      type_len,              
01308      encoding_pos,          
01309      encoding_len;          
01310 
01311     int state;
01312     
01313     state = rsl_leading_space;
01314     type_pos = type_len = 0;
01315     encoding_pos = encoding_len = 0;
01316         
01317     
01318     for (cur_pos = 0; cur_pos < (int)resultBuf.length(); cur_pos++) {
01319         if (resultBuf[cur_pos].isSpace()) {
01320             
01321             if (state == rsl_leading_space) {
01322                 
01323                 continue;
01324             } else if (state == rsl_type) {
01325                 
01326                 return DECLINED;
01327             } else if (state == rsl_subtype) {
01328                 
01329                 state++;
01330                 continue;
01331             } else if (state == rsl_separator) {
01332                 
01333                 continue;
01334             } else if (state == rsl_encoding) {
01335                 
01336                 
01337                 break;
01338             } else {
01339                 
01340                 
01341                 kdError(7018) << "KMimeMagic::finishResult: bad state " << state << " (ws)" << endl;
01342                 return DECLINED;
01343             }
01344             
01345         } else if (state == rsl_type &&
01346                resultBuf.at(cur_pos) == '/') {
01347             
01348             type_len++;
01349             state++;
01350         } else {
01351             
01352             if (state == rsl_leading_space) {
01353                 
01354                 state++;
01355                 type_pos = cur_pos;
01356                 type_len = 1;
01357                 continue;
01358             } else if (state == rsl_type ||
01359                    state == rsl_subtype) {
01360                 
01361                 type_len++;
01362                 continue;
01363             } else if (state == rsl_separator) {
01364                 
01365                 state++;
01366                 encoding_pos = cur_pos;
01367                 encoding_len = 1;
01368                 continue;
01369             } else if (state == rsl_encoding) {
01370                 
01371                 encoding_len++;
01372                 continue;
01373             } else {
01374                 
01375                 
01376                 kdError(7018) << " KMimeMagic::finishResult: bad state " << state << " (ns)" << endl;
01377                 return DECLINED;
01378             }
01379             
01380         }
01381         
01382     }
01383 
01384     
01385     if (state != rsl_subtype && state != rsl_separator &&
01386         state != rsl_encoding) {
01387         
01388         return DECLINED;
01389     }
01390     
01391     if (state == rsl_subtype || state == rsl_encoding ||
01392         state == rsl_encoding || state == rsl_separator) {
01393         magicResult->setMimeType(conf->resultBuf.mid(type_pos, type_len).ascii());
01394     }
01395     if (state == rsl_encoding)
01396         magicResult->setEncoding(conf->resultBuf.mid(encoding_pos,
01397                                encoding_len).ascii());
01398     
01399     if (!magicResult->mimeType() ||
01400         (state == rsl_encoding && !magicResult->encoding())) {
01401         return -1;
01402     }
01403     
01404     return OK;
01405 }
01406 #endif
01407 
01408 
01409 
01410 
01411 
01412 
01413 static void process(struct config_rec* conf, const QString & fn)
01414 {
01415     int fd = 0;
01416     unsigned char buf[HOWMANY + 1]; 
01417     KDE_struct_stat sb;
01418     int nbytes = 0;         
01419         int tagbytes = 0;       
01420         QCString fileName = QFile::encodeName( fn );
01421 
01422     
01423 
01424 
01425     if (fsmagic(conf, fileName, &sb) != 0) {
01426         
01427         return;
01428     }
01429     if ((fd = KDE_open(fileName, O_RDONLY)) < 0) {
01430         
01431         
01432 
01433 
01434 
01435         
01436         conf->resultBuf = MIME_BINARY_UNREADABLE;
01437         return;
01438     }
01439     
01440 
01441 
01442     if ((nbytes = read(fd, (char *) buf, HOWMANY)) == -1) {
01443         kdError(7018) << "" << fn << " read failed (" << strerror(errno) << ")." << endl;
01444         conf->resultBuf = MIME_BINARY_UNREADABLE;
01445         return;
01446     }
01447         if ((tagbytes = tagmagic(buf, nbytes))) {
01448         
01449         lseek(fd, tagbytes, SEEK_SET);
01450         nbytes = read(fd, (char*)buf, HOWMANY);
01451         if (nbytes < 0) {
01452             conf->resultBuf = MIME_BINARY_UNREADABLE;
01453             return;
01454         }
01455         }
01456     if (nbytes == 0) {
01457         conf->resultBuf = MIME_BINARY_ZEROSIZE;
01458     } else {
01459         buf[nbytes++] = '\0';   
01460         tryit(conf, buf, nbytes);
01461     }
01462 
01463         if ( conf->utimeConf && conf->utimeConf->restoreAccessTime( fn ) )
01464         {
01465             
01466 
01467 
01468 
01469 
01470             struct utimbuf utbuf;
01471             utbuf.actime = sb.st_atime;
01472             utbuf.modtime = sb.st_mtime;
01473             (void) utime(fileName, &utbuf);
01474         }
01475     (void) close(fd);
01476 }
01477 
01478 
01479 static void tryit(struct config_rec* conf, unsigned char *buf, int nb)
01480 {
01481     
01482     if (match(conf, buf, nb))
01483         return;
01484 
01485     
01486     if (ascmagic(conf, buf, nb) == 1)
01487         return;
01488 
01489         
01490         if (textmagic(conf, buf, nb))
01491                 return;
01492 
01493     
01494     conf->resultBuf = MIME_BINARY_UNKNOWN;
01495     conf->accuracy = 0;
01496 }
01497 
01498 static int
01499 fsmagic(struct config_rec* conf, const char *fn, KDE_struct_stat *sb)
01500 {
01501     int ret = 0;
01502 
01503     
01504 
01505 
01506 
01507     ret = KDE_lstat(fn, sb);  
01508 
01509     if (ret) {
01510         return 1;
01511 
01512     }
01513     
01514 
01515 
01516 
01517 
01518 
01519     switch (sb->st_mode & S_IFMT) {
01520     case S_IFDIR:
01521         conf->resultBuf = MIME_INODE_DIR;
01522         return 1;
01523     case S_IFCHR:
01524         conf->resultBuf = MIME_INODE_CDEV;
01525         return 1;
01526     case S_IFBLK:
01527         conf->resultBuf = MIME_INODE_BDEV;
01528         return 1;
01529         
01530 #ifdef    S_IFIFO
01531     case S_IFIFO:
01532         conf->resultBuf = MIME_INODE_FIFO;
01533         return 1;
01534 #endif
01535 #ifdef    S_IFLNK
01536     case S_IFLNK:
01537     {
01538         char buf[BUFSIZ + BUFSIZ + 4];
01539         register int nch;
01540         KDE_struct_stat tstatbuf;
01541 
01542         if ((nch = readlink(fn, buf, BUFSIZ - 1)) <= 0) {
01543             conf->resultBuf = MIME_INODE_LINK;
01544             
01545             return 1;
01546         }
01547         buf[nch] = '\0'; 
01548         
01549         if (*buf == '/') {
01550             if (KDE_stat(buf, &tstatbuf) < 0) {
01551                 conf->resultBuf = MIME_INODE_LINK;
01552                 
01553                 return 1;
01554             }
01555         } else {
01556             char *tmp;
01557             char buf2[BUFSIZ + BUFSIZ + 4];
01558 
01559             strncpy(buf2, fn, BUFSIZ);
01560             buf2[BUFSIZ] = 0;
01561 
01562             if ((tmp = strrchr(buf2, '/')) == NULL) {
01563                 tmp = buf; 
01564             } else {
01565                 
01566                 *++tmp = '\0';
01567                 strcat(buf2, buf);
01568                 tmp = buf2;
01569             }
01570             if (KDE_stat(tmp, &tstatbuf) < 0) {
01571                 conf->resultBuf = MIME_INODE_LINK;
01572                 
01573                 return 1;
01574             } else
01575                 strcpy(buf, tmp);
01576         }
01577         if (conf->followLinks)
01578             process( conf, QFile::decodeName( buf ) );
01579         else
01580             conf->resultBuf = MIME_INODE_LINK;
01581         return 1;
01582     }
01583     return 1;
01584 #endif
01585 #ifdef    S_IFSOCK
01586 #ifndef __COHERENT__
01587     case S_IFSOCK:
01588         conf->resultBuf = MIME_INODE_SOCK;
01589         return 1;
01590 #endif
01591 #endif
01592     case S_IFREG:
01593         break;
01594     default:
01595         kdError(7018) << "KMimeMagic::fsmagic: invalid mode 0" << sb->st_mode << "." << endl;
01596         
01597     }
01598 
01599     
01600 
01601 
01602     if (sb->st_size == 0) {
01603         conf->resultBuf = MIME_BINARY_ZEROSIZE;
01604         return 1;
01605     }
01606     return 0;
01607 }
01608 
01609 
01610 
01611 
01612 
01613 
01614 
01615 
01616 
01617 
01618 
01619 
01620 
01621 
01622 
01623 
01624 
01625 
01626 
01627 
01628 
01629 
01630 
01631 
01632 
01633 
01634 
01635 static int
01636 match(struct config_rec* conf, unsigned char *s, int nbytes)
01637 {
01638     int cont_level = 0;
01639     union VALUETYPE p;
01640     struct magic *m;
01641 
01642 #ifdef DEBUG_MIMEMAGIC
01643     kdDebug(7018) << "match: conf=" << conf << " m=" << (conf->magic ? "set" : "NULL") << " m->next=" << ((conf->magic && conf->magic->next) ? "set" : "NULL") << " last=" << (conf->last ? "set" : "NULL") << endl;
01644     for (m = conf->magic; m; m = m->next) {
01645         if (isprint((((unsigned long) m) >> 24) & 255) &&
01646             isprint((((unsigned long) m) >> 16) & 255) &&
01647             isprint((((unsigned long) m) >> 8) & 255) &&
01648             isprint(((unsigned long) m) & 255)) {
01649             kdDebug(7018) << "match: POINTER CLOBBERED! " << endl;
01650             break;
01651         }
01652     }
01653 #endif
01654 
01655     for (m = conf->magic; m; m = m->next) {
01656 #ifdef DEBUG_MIMEMAGIC
01657         kdDebug(7018) << "match: line=" << m->lineno << " desc=" << m->desc << endl;
01658 #endif
01659         memset(&p, 0, sizeof(union VALUETYPE));
01660 
01661         
01662         if (!mget(&p, s, m, nbytes) ||
01663             !mcheck(&p, m)) {
01664             struct magic *m_cont;
01665 
01666             
01667 
01668 
01669             if (!m->next || (m->next->cont_level == 0)) {
01670                 continue;
01671             }
01672             m_cont = m->next;
01673             while (m_cont && (m_cont->cont_level != 0)) {
01674 #ifdef DEBUG_MIMEMAGIC
01675                 kdDebug(7018) << "match: line=" << m->lineno << " cont=" << m_cont->cont_level << " mc=" << m_cont->lineno << " mc->next=" << m_cont << " " << endl;
01676 #endif
01677                 
01678 
01679 
01680 
01681                 m = m_cont;
01682                 m_cont = m_cont->next;
01683             }
01684             continue;
01685         }
01686         
01687         
01688 #ifdef DEBUG_MIMEMAGIC
01689         kdDebug(7018) << "match: rule matched, line=" << m->lineno << " type=" << m->type << " " << ((m->type == STRING) ? m->value.s : "") << endl;
01690 #endif
01691 
01692         
01693         conf->resultBuf = m->desc;
01694 
01695         cont_level++;
01696         
01697 
01698 
01699 
01700         m = m->next;
01701         while (m && (m->cont_level != 0)) {
01702 #ifdef DEBUG_MIMEMAGIC
01703                     kdDebug(7018) << "match: line=" << m->lineno << " cont=" << m->cont_level << " type=" << m->type << " " << ((m->type == STRING) ? m->value.s : "") << endl;
01704 #endif
01705                     if (cont_level >= m->cont_level) {
01706                 if (cont_level > m->cont_level) {
01707                     
01708 
01709 
01710 
01711                     cont_level = m->cont_level;
01712                 }
01713                 if (mget(&p, s, m, nbytes) &&
01714                     mcheck(&p, m)) {
01715                     
01716 
01717 
01718 
01719 
01720 
01721 #ifdef DEBUG_MIMEMAGIC
01722                                     kdDebug(7018) << "continuation matched" << endl;
01723 #endif
01724                                     conf->resultBuf = m->desc;
01725                     cont_level++;
01726                 }
01727             }
01728             
01729             m = m->next;
01730         }
01731                 
01732                 
01733                 if ( !conf->resultBuf.isEmpty() )
01734                 {
01735 #ifdef DEBUG_MIMEMAGIC
01736                     kdDebug(7018) << "match: matched" << endl;
01737 #endif
01738                     return 1;       
01739                 }
01740     }
01741 #ifdef DEBUG_MIMEMAGIC
01742     kdDebug(7018) << "match: failed" << endl;
01743 #endif
01744     return 0;               
01745 }
01746 
01747 
01748 
01749 static int tagmagic(unsigned char *buf, int nbytes)
01750 {
01751     if(nbytes<40) return 0;
01752     if(buf[0] == 'I' && buf[1] == 'D' && buf[2] == '3') {
01753         int size = 10;
01754         
01755         if(buf[3] > 4) return 0;
01756         if(buf[5] & 0x0F) return 0;
01757         
01758         if(buf[5] & 0x10) size += 10;
01759         
01760         size += buf[9];
01761         size += buf[8] << 7;
01762         size += buf[7] << 14;
01763         size += buf[6] << 21;
01764         return size;
01765     }
01766     return 0;
01767 }
01768 
01769 
01770 
01771 #define    STREQ(a, b)    (*(a) == *(b) && strcmp((a), (b)) == 0)
01772 
01773 static int ascmagic(struct config_rec* conf, unsigned char *buf, int nbytes)
01774 {
01775     int i;
01776     double pct, maxpct, pctsum;
01777     double pcts[NTYPES];
01778     int mostaccurate, tokencount;
01779     int typeset, jonly, conly, jconly, cppcomm, ccomm;
01780     int has_escapes = 0;
01781     unsigned char *s;
01782     char nbuf[HOWMANY + 1]; 
01783     char *token;
01784     register const struct names *p;
01785     int typecount[NTYPES];
01786 
01787     
01788     conf->accuracy = 70;
01789 
01790     
01791 
01792 
01793 
01794 
01795     if (*buf == '.') {
01796         unsigned char *tp = buf + 1;
01797 
01798         while (isascii(*tp) && isspace(*tp))
01799             ++tp;   
01800         if ((isascii(*tp) && (isalnum(*tp) || *tp == '\\') &&
01801              isascii(*(tp + 1)) && (isalnum(*(tp + 1)) || *tp == '"'))) {
01802             conf->resultBuf = MIME_APPL_TROFF;
01803             return 1;
01804         }
01805     }
01806     if ((*buf == 'c' || *buf == 'C') &&
01807         isascii(*(buf + 1)) && isspace(*(buf + 1))) {
01808         
01809         conf->resultBuf = MIME_TEXT_FORTRAN;
01810         return 1;
01811     }
01812     assert(nbytes-1 < HOWMANY + 1);
01813     
01814     
01815     s = (unsigned char *) memcpy(nbuf, buf, nbytes);
01816     s[nbytes-1] = '\0';
01817     has_escapes = (memchr(s, '\033', nbytes) != NULL);
01818 
01819 
01820 
01821 
01822     memset(&typecount, 0, sizeof(typecount));
01823     typeset = 0;
01824     jonly = 0;
01825     conly = 0;
01826     jconly = 0;
01827     cppcomm = 0;
01828     ccomm = 0;
01829     tokencount = 0;
01830         bool foundClass = false; 
01831     
01832         
01833     while ((token = strtok((char *) s, " \t\n\r\f,;>")) != NULL) {
01834         s = NULL;       
01835 #ifdef DEBUG_MIMEMAGIC
01836                 kdDebug(7018) << "KMimeMagic::ascmagic token=" << token << endl;
01837 #endif
01838         for (p = names; p->name ; p++) {
01839             if (STREQ(p->name, token)) {
01840 #ifdef DEBUG_MIMEMAGIC
01841                                 kdDebug(7018) << "KMimeMagic::ascmagic token matches ! name=" << p->name << " type=" << p->type << endl;
01842 #endif
01843                     tokencount++;
01844                 typeset |= p->type;
01845                 if (p->type == L_JAVA)
01846                     jonly++;
01847                 if ((p->type & (L_C|L_CPP|L_JAVA))
01848                     == (L_CPP|L_JAVA)) {
01849                     jconly++;
01850                                         if ( !foundClass && STREQ("class", token) )
01851                                             foundClass = true;
01852                                 }
01853                 if ((p->type & (L_C|L_CPP|L_JAVA))
01854                     == (L_C|L_CPP))
01855                     conly++;
01856                 if (STREQ(token, "//"))
01857                     cppcomm++;
01858                 if (STREQ(token, "/*"))
01859                     ccomm++;
01860                 for (i = 0; i < (int)NTYPES; i++)
01861                     if ((1 << i) & p->type)
01862                         typecount[i]++;
01863             }
01864         }
01865     }
01866 
01867     if (typeset & (L_C|L_CPP|L_JAVA)) {
01868         conf->accuracy = 40;
01869             if (!(typeset & ~(L_C|L_CPP|L_JAVA))) {
01870 #ifdef DEBUG_MIMEMAGIC
01871                         kdDebug(7018) << "C/C++/Java: jonly=" << jonly << " conly=" << conly << " jconly=" << jconly << " ccomm=" << ccomm << endl;
01872 #endif
01873             if (jonly && conly)
01874                             
01875                             if ( jonly > conly )
01876                                 conly = 0;
01877                             else
01878                                 jonly = 0;
01879             if (jonly > 1 && foundClass) {
01880                 
01881                 conf->resultBuf = QString(types[P_JAVA].type);
01882                 return 1;
01883             }
01884             if (jconly > 1) {
01885                 
01886                 if (typecount[P_JAVA] > typecount[P_CPP])
01887                   conf->resultBuf = QString(types[P_JAVA].type);
01888                 else
01889                   conf->resultBuf = QString(types[P_CPP].type);
01890                 return 1;
01891             }
01892             if (conly) {
01893                 
01894                 if (cppcomm)
01895                   conf->resultBuf = QString(types[P_CPP].type);
01896                 else
01897                   conf->resultBuf = QString(types[P_C].type);
01898                 return 1;
01899             }
01900             if (ccomm) {
01901                 conf->resultBuf = QString(types[P_C].type);
01902                 return 1;
01903             }
01904           }
01905     }
01906 
01907     
01908 
01909 
01910 
01911     mostaccurate = -1;
01912     maxpct = pctsum = 0.0;
01913     for (i = 0; i < (int)NTYPES; i++) {
01914       if (typecount[i] > 1) { 
01915         pct = (double)typecount[i] / (double)types[i].kwords *
01916             (double)types[i].weight;
01917         pcts[i] = pct;
01918         pctsum += pct;
01919         if (pct > maxpct) {
01920             maxpct = pct;
01921             mostaccurate = i;
01922           }
01923 #ifdef DEBUG_MIMEMAGIC
01924           kdDebug(7018) << "" << types[i].type << " has " << typecount[i] << " hits, " << types[i].kwords << " kw, weight " << types[i].weight << ", " << pct << " -> max = " << maxpct << "\n" << endl;
01925 #endif
01926       }
01927     }
01928     if (mostaccurate >= 0) {
01929             if ( mostaccurate != P_JAVA || foundClass ) 
01930             {
01931         conf->accuracy = (int)(pcts[mostaccurate] / pctsum * 60);
01932 #ifdef DEBUG_MIMEMAGIC
01933                 kdDebug(7018) << "mostaccurate=" << mostaccurate << " pcts=" << pcts[mostaccurate] << " pctsum=" << pctsum << " accuracy=" << accuracy << endl;
01934 #endif
01935         conf->resultBuf = QString(types[mostaccurate].type);
01936         return 1;
01937             }
01938     }
01939 
01940     switch (is_tar(buf, nbytes)) {
01941         case 1:
01942             
01943             conf->resultBuf = MIME_APPL_TAR;
01944             conf->accuracy = 90;
01945             return 1;
01946         case 2:
01947             
01948             conf->resultBuf = MIME_APPL_TAR;
01949             conf->accuracy = 90;
01950             return 1;
01951     }
01952 
01953     for (i = 0; i < nbytes; i++) {
01954         if (!isascii(*(buf + i)))
01955             return 0;   
01956     }
01957 
01958     
01959     conf->accuracy = 90;
01960     if (has_escapes) {
01961         
01962         
01963         conf->resultBuf = MIME_TEXT_UNKNOWN;
01964     } else {
01965         
01966         conf->resultBuf = MIME_TEXT_PLAIN;
01967     }
01968     return 1;
01969 }
01970 
01971 
01972 #define TEXT_MAXLINELEN 300
01973 
01974 
01975 
01976 
01977 
01978 static int textmagic(struct config_rec* conf, unsigned char * buf, int nbytes)
01979 {
01980     int i;
01981     unsigned char *cp;
01982 
01983     nbytes--;
01984 
01985     
01986     for (i = 0, cp = buf; i < nbytes; i++, cp++)
01987         if ((*cp < 8) || (*cp>13 && *cp<32 && *cp!=27 ) || (*cp==0x7F))
01988             return 0;
01989 
01990     
01991 
01992 
01993     for (i = 0; i < nbytes;) {
01994         cp = (unsigned char *) memchr(buf, '\n', nbytes - i);
01995         if (cp == NULL) {
01996             
01997             if (i + TEXT_MAXLINELEN >= nbytes)
01998                 break;
01999             else
02000                 return 0;
02001         }
02002         if (cp - buf > TEXT_MAXLINELEN)
02003             return 0;
02004         i += (cp - buf + 1);
02005         buf = cp + 1;
02006     }
02007     conf->resultBuf = MIME_TEXT_PLAIN;
02008     return 1;
02009 }
02010 
02011 
02012 
02013 
02014 
02015 
02016 
02017 
02018 
02019 
02020 
02021 
02022 
02023 
02024 
02025 #define    isodigit(c)    ( ((c) >= '0') && ((c) <= '7') )
02026 
02027 
02028 
02029 
02030 
02031 
02032 static int
02033 is_tar(unsigned char *buf, int nbytes)
02034 {
02035     register union record *header = (union record *) buf;
02036     register int i;
02037     register long sum,
02038      recsum;
02039     register char *p;
02040 
02041     if (nbytes < (int)sizeof(union record))
02042          return 0;
02043 
02044     recsum = from_oct(8, header->header.chksum);
02045 
02046     sum = 0;
02047     p = header->charptr;
02048     for (i = sizeof(union record); --i >= 0;) {
02049         
02050 
02051 
02052 
02053         sum += 0xFF & *p++;
02054     }
02055 
02056     
02057     for (i = sizeof(header->header.chksum); --i >= 0;)
02058         sum -= 0xFF & header->header.chksum[i];
02059     sum += ' ' * sizeof header->header.chksum;
02060 
02061     if (sum != recsum)
02062         return 0;       
02063 
02064     if (0 == strcmp(header->header.magic, TMAGIC))
02065         return 2;       
02066 
02067     return 1;               
02068 }
02069 
02070 
02071 
02072 
02073 
02074 
02075 
02076 static long
02077 from_oct(int digs, char *where)
02078 {
02079     register long value;
02080 
02081     while (isspace(*where)) {   
02082         where++;
02083         if (--digs <= 0)
02084             return -1;  
02085     }
02086     value = 0;
02087     while (digs > 0 && isodigit(*where)) {  
02088         value = (value << 3) | (*where++ - '0');
02089         --digs;
02090     }
02091 
02092     if (digs > 0 && *where && !isspace(*where))
02093         return -1;      
02094 
02095     return value;
02096 }
02097 
02098 KMimeMagic::KMimeMagic()
02099 {
02100     
02101     QString mimefile = locate( "mime", "magic" );
02102     init( mimefile );
02103     
02104     QStringList snippets = KGlobal::dirs()->findAllResources( "config", "magic/*.magic", true );
02105     for ( QStringList::Iterator it = snippets.begin() ; it != snippets.end() ; ++it )
02106         if ( !mergeConfig( *it ) )
02107             kdWarning() << k_funcinfo << "Failed to parse " << *it << endl;
02108 }
02109 
02110 KMimeMagic::KMimeMagic(const QString & _configfile)
02111 {
02112     init( _configfile );
02113 }
02114 
02115 void KMimeMagic::init( const QString& _configfile )
02116 {
02117     int result;
02118     conf = new config_rec;
02119 
02120     
02121     conf->magic = conf->last = NULL;
02122     magicResult = NULL;
02123     conf->followLinks = false;
02124 
02125         conf->utimeConf = 0L; 
02126     
02127     result = apprentice(_configfile);
02128     if (result == -1)
02129         return;
02130 #ifdef MIME_MAGIC_DEBUG_TABLE
02131     test_table();
02132 #endif
02133 }
02134 
02135 
02136 
02137 
02138 
02139 KMimeMagic::~KMimeMagic()
02140 {
02141     if (conf) {
02142         struct magic *p = conf->magic;
02143         struct magic *q;
02144         while (p) {
02145             q = p;
02146             p = p->next;
02147             free(q);
02148         }
02149                 delete conf->utimeConf;
02150         delete conf;
02151     }
02152         delete magicResult;
02153 }
02154 
02155 bool
02156 KMimeMagic::mergeConfig(const QString & _configfile)
02157 {
02158     kdDebug(7018) << k_funcinfo << _configfile << endl;
02159     int result;
02160 
02161     if (_configfile.isEmpty())
02162         return false;
02163     result = apprentice(_configfile);
02164     if (result == -1) {
02165         return false;
02166     }
02167 #ifdef MIME_MAGIC_DEBUG_TABLE
02168     test_table();
02169 #endif
02170     return true;
02171 }
02172 
02173 bool
02174 KMimeMagic::mergeBufConfig(char * _configbuf)
02175 {
02176     int result;
02177 
02178     if (conf) {
02179         result = buff_apprentice(_configbuf);
02180         if (result == -1)
02181             return false;
02182 #ifdef MIME_MAGIC_DEBUG_TABLE
02183         test_table();
02184 #endif
02185         return true;
02186     }
02187     return false;
02188 }
02189 
02190 void
02191 KMimeMagic::setFollowLinks( bool _enable )
02192 {
02193     conf->followLinks = _enable;
02194 }
02195 
02196 KMimeMagicResult *
02197 KMimeMagic::findBufferType(const QByteArray &array)
02198 {
02199     unsigned char buf[HOWMANY + 1]; 
02200 
02201     conf->resultBuf = QString::null;
02202     if ( !magicResult )
02203       magicResult = new KMimeMagicResult();
02204     magicResult->setInvalid();
02205     conf->accuracy = 100;
02206 
02207     int nbytes = array.size();
02208 
02209         if (nbytes > HOWMANY)
02210                 nbytes = HOWMANY;
02211         memcpy(buf, array.data(), nbytes);
02212         if (nbytes == 0) {
02213                 conf->resultBuf = MIME_BINARY_ZEROSIZE;
02214         } else {
02215                 buf[nbytes++] = '\0';   
02216                 tryit(conf, buf, nbytes);
02217         }
02218         
02219         
02220     magicResult->setMimeType(conf->resultBuf.stripWhiteSpace());
02221     magicResult->setAccuracy(conf->accuracy);
02222         return magicResult;
02223 }
02224 
02225 static void
02226 refineResult(KMimeMagicResult *r, const QString & _filename)
02227 {
02228     QString tmp = r->mimeType();
02229     if (tmp.isEmpty())
02230         return;
02231     if ( tmp == "text/x-c"  ||
02232          tmp == "text/x-c++" )
02233     {
02234         if ( _filename.right(2) == ".h" )
02235             tmp += "hdr";
02236         else
02237             tmp += "src";
02238         r->setMimeType(tmp);
02239     }
02240 }
02241 
02242 KMimeMagicResult *
02243 KMimeMagic::findBufferFileType( const QByteArray &data,
02244                 const QString &fn)
02245 {
02246         KMimeMagicResult * r = findBufferType( data );
02247     refineResult(r, fn);
02248         return r;
02249 }
02250 
02251 
02252 
02253 
02254 KMimeMagicResult* KMimeMagic::findFileType(const QString & fn)
02255 {
02256 #ifdef DEBUG_MIMEMAGIC
02257     kdDebug(7018) << "KMimeMagic::findFileType " << fn << endl;
02258 #endif
02259     conf->resultBuf = QString::null;
02260 
02261         if ( !magicResult )
02262       magicResult = new KMimeMagicResult();
02263     magicResult->setInvalid();
02264     conf->accuracy = 100;
02265 
02266         if ( !conf->utimeConf )
02267             conf->utimeConf = new KMimeMagicUtimeConf();
02268 
02269         
02270         process(conf, fn );
02271 
02272         
02273         
02274     magicResult->setMimeType(conf->resultBuf.stripWhiteSpace());
02275     magicResult->setAccuracy(conf->accuracy);
02276     refineResult(magicResult, fn);
02277         return magicResult;
02278 }