Module Gnumed.pycommon.gmMimeMagic
magic.py determines a file type by its magic number
(C)opyright 2000 Jason Petrone jp@demonseed.net All Rights Reserved
Command Line Usage: running as python3 magic.py file will print
a description of what 'file' is.
Module Usage: magic.whatis(data): when passed a string 'data' containing binary or text data, a description of what the data is will be returned.
magic.filedesc(filename): returns a description of what the file
'filename' contains.
Acknowledgements: This module has been pulled from the web. Thanks to Jason Petrone for providing it to the community. It is based on his version = '0.1'
Functions
def filedesc(file)-
Expand source code
def filedesc(file): try: return whatis(open(file, 'r').read(8192)) except Exception as e: if str(e) == '[Errno 21] Is a directory': return 'directory' else: raise e def load(file)-
Expand source code
def load(file): global magicNumbers lines = open(file, mode = 'rt', encoding = 'utf8') last = { 0: None } for line in lines: if re.match(r'\s*#', line): # comment continue else: # split up by space delimiters, and remove trailing space line = line.rstrip() line = re.split(r'\s*', line) if len(line) < 3: # bad line continue offset = line[0] type = line[1] value = line[2] level = 0 while offset[0] == '>': # count the level of the type level = level + 1 offset = offset[1:] l = magicNumbers if level > 0: l = last[level - 1].subTests if offset[0] == '(': # don't handle indirect offsets just yet print('SKIPPING ' + ' '.join(list(line[3:]))) pass elif offset[0] == '&': # don't handle relative offsets just yet print('SKIPPING ' + ' '.join(list(line[3:]))) pass else: operands = ['=', '<', '>', '&'] if operands.count(value[0]) > 0: # a comparison operator is specified op = value[0] value = value[1:] else: print(str([value, operands])) if len(value) >1 and value[0] == '\\' and operands.count(value[1]) >0: # literal value that collides with operands is escaped value = value[1:] op = '=' mask = None if type == 'string': while 1: value = unescape(value) if value[len(value)-1] == ' ' and len(line) > 3: # last value was an escaped space, join value = value + line[3] del line[3] else: break else: if value.count('&') != 0: mask = value[(value.index('&') + 1):] print('MASK: ' + mask) value = value[:(value.index('&')+1)] try: value = strToNum(value) except Exception: continue msg = ' '.join(list(line[3:])) new = magicTest(offset, type, op, value, msg, mask) last[level] = new l.append(new) def strToNum(n)-
Expand source code
def strToNum(n): val = 0 col = int(1) if n[:1] == 'x': n = '0' + n if n[:2] == '0x': # hex n = n[2:].lower() while len(n) > 0: l = n[len(n) - 1] val = val + string.hexdigits.index(l) * col col = col * 16 n = n[:len(n)-1] elif n[0] == '\\': # octal n = n[1:] while len(n) > 0: l = n[len(n) - 1] if ord(l) < 48 or ord(l) > 57: break val = val + int(l) * col col = col * 8 n = n[:len(n)-1] else: val = int(n) return val def unescape(s)-
Expand source code
def unescape(s): # replace string escape sequences while 1: m = re.search(r'\\', s) if not m: break x = m.start()+1 if m.end() == len(s): # escaped space at end s = s[:len(s)-1] + ' ' elif s[x:x+2] == '0x': # hex ascii value c = chr(strToNum(s[x:x+4])) s = s[:x-1] + c + s[x+4:] elif s[m.start()+1] == 'x': # hex ascii value c = chr(strToNum(s[x:x+3])) s = s[:x-1] + c + s[x+3:] elif ord(s[x]) > 47 and ord(s[x]) < 58: # octal ascii value end = x while (ord(s[end]) > 47 and ord(s[end]) < 58): end = end + 1 if end > len(s) - 1: break c = chr(strToNum(s[x-1:end])) s = s[:x-1] + c + s[end:] elif s[x] == 'n': # newline s = s[:x-1] + '\n' + s[x+1:] else: break return s def whatis(data)-
Expand source code
def whatis(data): for test in magicNumbers: m = test.compare(data) if m: return m # no matching, magic number. is it binary or text? for c in data: if ord(c) > 128: return 'data' # its ASCII, now do text tests if data.find('The', 0, 8192) > -1: return 'English text' if data.find('def', 0, 8192) > -1: return 'Python Source' return 'ASCII text'
Classes
class magicTest (offset, t, op, value, msg, mask=None)-
Expand source code
class magicTest: def __init__(self, offset, t, op, value, msg, mask = None): if t.count('&') > 0: mask = strToNum(t[t.index('&')+1:]) t = t[:t.index('&')] if type(offset) == type('a'): self.offset = strToNum(offset) else: self.offset = offset self.type = t self.msg = msg self.subTests = [] self.op = op self.mask = mask self.value = value def test(self, data): if self.mask: data = data & self.mask if self.op == '=': if self.value == data: return self.msg elif self.op == '<': pass elif self.op == '>': pass elif self.op == '&': pass elif self.op == '^': pass return None def compare(self, data): #print str([self.type, self.value, self.msg]) try: if self.type == 'string': c = ''; s = '' for i in range(0, len(self.value)+1): if (i + self.offset) > (len(data) - 1): break s = s + c [c] = struct.unpack('c', data[self.offset + i]) data = s elif self.type == 'short': [data] = struct.unpack('h', data[self.offset : self.offset + 2]) elif self.type == 'leshort': [data] = struct.unpack('<h', data[self.offset : self.offset + 2]) elif self.type == 'beshort': [data] = struct.unpack('>H', data[self.offset : self.offset + 2]) elif self.type == 'long': [data] = struct.unpack('l', data[self.offset : self.offset + 4]) elif self.type == 'lelong': [data] = struct.unpack('<l', data[self.offset : self.offset + 4]) elif self.type == 'belong': [data] = struct.unpack('>l', data[self.offset : self.offset + 4]) else: #print('UNKNOWN TYPE: ' + self.type) pass except Exception: return None # print str([self.msg, self.value, data]) return self.test(data)Methods
def compare(self, data)-
Expand source code
def compare(self, data): #print str([self.type, self.value, self.msg]) try: if self.type == 'string': c = ''; s = '' for i in range(0, len(self.value)+1): if (i + self.offset) > (len(data) - 1): break s = s + c [c] = struct.unpack('c', data[self.offset + i]) data = s elif self.type == 'short': [data] = struct.unpack('h', data[self.offset : self.offset + 2]) elif self.type == 'leshort': [data] = struct.unpack('<h', data[self.offset : self.offset + 2]) elif self.type == 'beshort': [data] = struct.unpack('>H', data[self.offset : self.offset + 2]) elif self.type == 'long': [data] = struct.unpack('l', data[self.offset : self.offset + 4]) elif self.type == 'lelong': [data] = struct.unpack('<l', data[self.offset : self.offset + 4]) elif self.type == 'belong': [data] = struct.unpack('>l', data[self.offset : self.offset + 4]) else: #print('UNKNOWN TYPE: ' + self.type) pass except Exception: return None # print str([self.msg, self.value, data]) return self.test(data) def test(self, data)-
Expand source code
def test(self, data): if self.mask: data = data & self.mask if self.op == '=': if self.value == data: return self.msg elif self.op == '<': pass elif self.op == '>': pass elif self.op == '&': pass elif self.op == '^': pass return None