Module Gnumed.pycommon.gmMimeMagic

magic.py determines a file type by its magic number

(C)opyright 2000 Jason Petrone jp@demonseed.net All Rights Reserved

Command Line Usage: running as python3 magic.py file will print a description of what 'file' is.

Module Usage: magic.whatis(data): when passed a string 'data' containing binary or text data, a description of what the data is will be returned.

             magic.filedesc(filename): returns a description of what the file
                                                                                                     'filename' contains.

Acknowledgements: This module has been pulled from the web. Thanks to Jason Petrone for providing it to the community. It is based on his version = '0.1'

Functions

def filedesc(file)
Expand source code
def filedesc(file):
        try:
                return whatis(open(file, 'r').read(8192))
        except Exception as e:
                if str(e) == '[Errno 21] Is a directory':
                        return 'directory'
                else:
                        raise e
def load(file)
Expand source code
def load(file):
        global magicNumbers
        lines = open(file, mode = 'rt', encoding = 'utf8')
        last = { 0: None }
        for line in lines:
                if re.match(r'\s*#', line):
                        # comment
                        continue
                else:
                        # split up by space delimiters, and remove trailing space
                        line = line.rstrip()
                        line = re.split(r'\s*', line)
                        if len(line) < 3:
                                # bad line
                                continue
                        offset = line[0]
                        type = line[1]
                        value = line[2]
                        level = 0
                        while offset[0] == '>':
                                # count the level of the type
                                level = level + 1
                                offset = offset[1:]
                        l = magicNumbers
                        if level > 0:
                                l = last[level - 1].subTests
                        if offset[0] == '(':
                                # don't handle indirect offsets just yet
                                print('SKIPPING ' + ' '.join(list(line[3:])))
                                pass
                        elif offset[0] == '&':
                                # don't handle relative offsets just yet
                                print('SKIPPING ' + ' '.join(list(line[3:])))
                                pass
                        else:
                                operands = ['=', '<', '>', '&']
                                if operands.count(value[0]) > 0:
                                        # a comparison operator is specified
                                        op = value[0] 
                                        value = value[1:]
                                else:
                                        print(str([value, operands]))
                                        if len(value) >1 and value[0] == '\\' and operands.count(value[1]) >0:
                                                # literal value that collides with operands is escaped
                                                value = value[1:]
                                        op = '='

                                mask = None
                                if type == 'string':
                                        while 1:
                                                value = unescape(value)
                                                if value[len(value)-1] == ' ' and len(line) > 3:
                                                        # last value was an escaped space, join
                                                        value = value + line[3]
                                                        del line[3]
                                                else:
                                                        break
                                else:
                                        if value.count('&') != 0:
                                                mask = value[(value.index('&') + 1):]
                                                print('MASK: ' + mask)
                                                value = value[:(value.index('&')+1)]
                                        try: value = strToNum(value)
                                        except Exception: continue
                                        msg = ' '.join(list(line[3:]))
                                new = magicTest(offset, type, op, value, msg, mask)
                                last[level] = new
                                l.append(new)
def strToNum(n)
Expand source code
def strToNum(n):
        val = 0
        col = int(1)
        if n[:1] == 'x': n = '0' + n
        if n[:2] == '0x':
                # hex
                n = n[2:].lower()
                while len(n) > 0:
                        l = n[len(n) - 1]
                        val = val + string.hexdigits.index(l) * col
                        col = col * 16
                        n = n[:len(n)-1]
        elif n[0] == '\\':
                # octal
                n = n[1:]
                while len(n) > 0:
                        l = n[len(n) - 1]
                        if ord(l) < 48 or ord(l) > 57: break
                        val = val + int(l) * col
                        col = col * 8
                        n = n[:len(n)-1]
        else:
                val = int(n)
        return val
def unescape(s)
Expand source code
def unescape(s):
        # replace string escape sequences
        while 1:
                m = re.search(r'\\', s)
                if not m: break
                x = m.start()+1
                if m.end() == len(s): 
                        # escaped space at end
                        s = s[:len(s)-1] + ' '
                elif s[x:x+2] == '0x':
                        # hex ascii value
                        c = chr(strToNum(s[x:x+4]))
                        s = s[:x-1] + c + s[x+4:]
                elif s[m.start()+1] == 'x':
                        # hex ascii value
                        c = chr(strToNum(s[x:x+3]))
                        s = s[:x-1] + c + s[x+3:]
                elif ord(s[x]) > 47 and ord(s[x]) < 58:
                        # octal ascii value
                        end = x
                        while (ord(s[end]) > 47 and ord(s[end]) < 58):
                                end = end + 1
                                if end > len(s) - 1: break
                        c = chr(strToNum(s[x-1:end]))
                        s = s[:x-1] + c + s[end:]
                elif s[x] == 'n':
                        # newline
                        s = s[:x-1] + '\n' + s[x+1:]
                else:
                        break
        return s
def whatis(data)
Expand source code
def whatis(data):
        for test in magicNumbers:
                 m = test.compare(data)
                 if m: return m
        # no matching, magic number. is it binary or text?
        for c in data:
                if ord(c) > 128:
                        return 'data'
        # its ASCII, now do text tests
        if data.find('The', 0, 8192) > -1:
                return 'English text'
        if data.find('def', 0, 8192) > -1:
                return 'Python Source'
        return 'ASCII text'

Classes

class magicTest (offset, t, op, value, msg, mask=None)
Expand source code
class magicTest:
        def __init__(self, offset, t, op, value, msg, mask = None):
                if t.count('&') > 0:
                        mask = strToNum(t[t.index('&')+1:])  
                        t = t[:t.index('&')]
                if type(offset) == type('a'):
                        self.offset = strToNum(offset)
                else:
                        self.offset = offset
                self.type = t
                self.msg = msg
                self.subTests = []
                self.op = op
                self.mask = mask
                self.value = value

        def test(self, data):
                if self.mask:
                        data = data & self.mask
                if self.op == '=': 
                        if self.value == data: return self.msg
                elif self.op ==  '<':
                        pass
                elif self.op ==  '>':
                        pass
                elif self.op ==  '&':
                        pass
                elif self.op ==  '^':
                        pass
                return None

        def compare(self, data):
                #print str([self.type, self.value, self.msg])
                try: 
                        if self.type == 'string':
                                c = ''; s = ''
                                for i in range(0, len(self.value)+1):
                                        if (i + self.offset) > (len(data) - 1): break
                                        s = s + c
                                        [c] = struct.unpack('c', data[self.offset + i])
                                data = s
                        elif self.type == 'short':
                                [data] = struct.unpack('h', data[self.offset : self.offset + 2])
                        elif self.type == 'leshort':
                                [data] = struct.unpack('<h', data[self.offset : self.offset + 2])
                        elif self.type == 'beshort':
                                [data] = struct.unpack('>H', data[self.offset : self.offset + 2])
                        elif self.type == 'long':
                                [data] = struct.unpack('l', data[self.offset : self.offset + 4])
                        elif self.type == 'lelong':
                                [data] = struct.unpack('<l', data[self.offset : self.offset + 4])
                        elif self.type == 'belong':
                                [data] = struct.unpack('>l', data[self.offset : self.offset + 4])
                        else:
                                #print('UNKNOWN TYPE: ' + self.type)
                                pass
                except Exception:
                        return None

#    print str([self.msg, self.value, data])
                return self.test(data)

Methods

def compare(self, data)
Expand source code
        def compare(self, data):
                #print str([self.type, self.value, self.msg])
                try: 
                        if self.type == 'string':
                                c = ''; s = ''
                                for i in range(0, len(self.value)+1):
                                        if (i + self.offset) > (len(data) - 1): break
                                        s = s + c
                                        [c] = struct.unpack('c', data[self.offset + i])
                                data = s
                        elif self.type == 'short':
                                [data] = struct.unpack('h', data[self.offset : self.offset + 2])
                        elif self.type == 'leshort':
                                [data] = struct.unpack('<h', data[self.offset : self.offset + 2])
                        elif self.type == 'beshort':
                                [data] = struct.unpack('>H', data[self.offset : self.offset + 2])
                        elif self.type == 'long':
                                [data] = struct.unpack('l', data[self.offset : self.offset + 4])
                        elif self.type == 'lelong':
                                [data] = struct.unpack('<l', data[self.offset : self.offset + 4])
                        elif self.type == 'belong':
                                [data] = struct.unpack('>l', data[self.offset : self.offset + 4])
                        else:
                                #print('UNKNOWN TYPE: ' + self.type)
                                pass
                except Exception:
                        return None

#    print str([self.msg, self.value, data])
                return self.test(data)
def test(self, data)
Expand source code
def test(self, data):
        if self.mask:
                data = data & self.mask
        if self.op == '=': 
                if self.value == data: return self.msg
        elif self.op ==  '<':
                pass
        elif self.op ==  '>':
                pass
        elif self.op ==  '&':
                pass
        elif self.op ==  '^':
                pass
        return None