Module Gnumed.pycommon.gmMimeLib
This module encapsulates mime operations.
Functions
def adjust_extension_by_mimetype(filename: str) ‑> str
-
Expand source code
def adjust_extension_by_mimetype(filename:str) -> str: """Rename file to have proper extension as per its mimetype. Returns: Original filename if no suffix found or empty suffix found or existing suffix already correct (case insensitive). New filename if renamed. New filename will have any old suffix removed and the new suffix appende. """ mimetype = guess_mimetype(filename) mime_suffix = guess_ext_by_mimetype(mimetype) _log.debug('%s -> %s', mimetype, mime_suffix) if mime_suffix is None: return filename if mime_suffix.strip() == '': return filename mime_suffix = mime_suffix.lstrip('.') base_name_with_path, old_ext = os.path.splitext(filename) old_ext = old_ext.lstrip('.') if old_ext.casefold() == mime_suffix.casefold(): return filename new_filename = '%s.%s' % (base_name_with_path, mime_suffix) _log.debug('[%s] -> [%s]', filename, new_filename) renamed = gmTools.rename_file ( filename = filename, new_filename = new_filename, overwrite = True, allow_symlink = True ) if renamed: return new_filename return None
Rename file to have proper extension as per its mimetype.
Returns
Original filename if no suffix found or empty suffix found or existing suffix already correct (case insensitive).
New filename if renamed. New filename will have any old suffix removed and the new suffix appende.
def call_editor_on_file(filename=None, block=True)
-
Expand source code
def call_editor_on_file(filename=None, block=True): """Try to find an appropriate editor with all tricks and call it. block: try to detach from editor or not, None means to use mailcap default. """ if not os.path.isdir(filename): # is the file accessible at all ? try: open(filename).close() except Exception: _log.exception('cannot read [%s]', filename) msg = _('[%s] is not a readable file') % filename return False, msg mime_type = guess_mimetype(filename) editor_cmd = get_editor_cmd(mime_type, filename) if editor_cmd is not None: if gmShellAPI.run_command_in_shell(command = editor_cmd, blocking = block): return True, '' viewer_cmd = get_viewer_cmd(mime_type, filename) if viewer_cmd is not None: if gmShellAPI.run_command_in_shell(command = viewer_cmd, blocking = block): return True, '' _log.warning("no editor or viewer found via standard mailcap system") if os.name == "posix": _log.warning("you should add an editor and/or viewer for this mime type to your mailcap file") _log.info("let's see what the OS can do about that") # does the file already have a useful extension ? (path_name, f_ext) = os.path.splitext(filename) if f_ext in ['', '.tmp']: # try to guess one f_ext = guess_ext_by_mimetype(mime_type) if f_ext is None: _log.warning("no suitable file extension found, trying anyway") file_to_display = filename f_ext = '?unknown?' else: file_to_display = filename + f_ext shutil.copyfile(filename, file_to_display) else: file_to_display = filename file_to_display = os.path.normpath(file_to_display) _log.debug("file %s <type %s> (ext %s) -> file %s" % (filename, mime_type, f_ext, file_to_display)) # try to detect any of the UNIX openers (will only find viewers) found, startfile_cmd = _get_system_startfile_cmd(filename) if found: if gmShellAPI.run_command_in_shell(command = startfile_cmd, blocking = block): return True, '' # last resort: hand over to Python itself try: os.startfile(file_to_display) return True, '' except AttributeError: _log.exception('os.startfile() does not exist on this platform') except Exception: _log.exception('os.startfile(%s) failed', file_to_display) msg = _("Unable to edit/view the file:\n\n" " [%s]\n\n" "Your system does not seem to have a (working)\n" "editor or viewer registered for the file type\n" " [%s]" ) % (file_to_display, mime_type) return False, msg
Try to find an appropriate editor with all tricks and call it.
block: try to detach from editor or not, None means to use mailcap default.
def call_viewer_on_file(aFile=None, block=None)
-
Expand source code
def call_viewer_on_file(aFile = None, block=None): """Try to find an appropriate viewer with all tricks and call it. block: try to detach from viewer or not, None means to use mailcap default """ if not os.path.isdir(aFile): # is the file accessible at all ? try: open(aFile).close() except Exception: _log.exception('cannot read [%s]', aFile) msg = _('[%s] is not a readable file') % aFile return False, msg # try to detect any of the UNIX openers found, startfile_cmd = _get_system_startfile_cmd(aFile) if found: if gmShellAPI.run_command_in_shell(command = startfile_cmd, blocking = block): return True, '' mime_type = guess_mimetype(aFile) viewer_cmd = get_viewer_cmd(mime_type, aFile) if viewer_cmd is not None: if gmShellAPI.run_command_in_shell(command = viewer_cmd, blocking = block): return True, '' _log.warning("no viewer found via standard mailcap system") if os.name == "posix": _log.warning("you should add a viewer for this mime type to your mailcap file") _log.info("let's see what the OS can do about that") # does the file already have an extension ? (path_name, f_ext) = os.path.splitext(aFile) # no if f_ext in ['', '.tmp']: # try to guess one f_ext = guess_ext_by_mimetype(mime_type) if f_ext is None: _log.warning("no suitable file extension found, trying anyway") file_to_display = aFile f_ext = '?unknown?' else: file_to_display = aFile + f_ext shutil.copyfile(aFile, file_to_display) # yes else: file_to_display = aFile file_to_display = os.path.normpath(file_to_display) _log.debug("file %s <type %s> (ext %s) -> file %s" % (aFile, mime_type, f_ext, file_to_display)) try: os.startfile(file_to_display) return True, '' except AttributeError: _log.exception('os.startfile() does not exist on this platform') except Exception: _log.exception('os.startfile(%s) failed', file_to_display) msg = _("Unable to display the file:\n\n" " [%s]\n\n" "Your system does not seem to have a (working)\n" "viewer registered for the file type\n" " [%s]" ) % (file_to_display, mime_type) return False, msg
Try to find an appropriate viewer with all tricks and call it.
block: try to detach from viewer or not, None means to use mailcap default
def convert_file(filename=None,
target_mime=None,
target_filename=None,
target_extension=None,
verbose=False,
max_pages: int = 25)-
Expand source code
def convert_file(filename=None, target_mime=None, target_filename=None, target_extension=None, verbose=False, max_pages:int=25): """Convert file from one format into another. Args: target_mime: mime type to convert file into """ assert (target_mime is not None), '<target_mime> must not be None' assert (filename is not None), '<filename> must not be None' assert (filename != target_filename), '<target_filename> must be different from <filename>' source_mime = guess_mimetype(filename = filename) target_mime_parts = target_mime.rsplit('/', 1) if (len(target_mime_parts) == 1) or (target_mime_parts[1].strip().casefold() in ['', '*', 'any']): _log.debug('generic target mime type') target_mime = target_mime_parts[0] + '/' if source_mime.casefold().startswith(target_mime.casefold()): _log.debug('source file [%s] already target mime type [%s]', filename, target_mime) if target_filename is None: return filename shutil.copyfile(filename, target_filename) return target_filename converted_ext = guess_ext_by_mimetype(target_mime) if converted_ext is None: if target_filename is not None: tmp, converted_ext = os.path.splitext(target_filename) if converted_ext is None: converted_ext = target_extension # can still stay None converted_ext = gmTools.coalesce(converted_ext, '').strip().lstrip('.') converted_fname = gmTools.get_unique_filename(suffix = converted_ext) _log.debug('attempting conversion: [%s] -> [<%s>:%s]', filename, target_mime, gmTools.coalesce(target_filename, converted_fname)) # try user-local conversion script script_name = 'gm-convert_file' binary = os.path.join(gmTools.gmPaths().home_dir, 'bin', script_name) _log.debug('trying user-local script: %s', binary) _log.debug('<%s> API: SOURCEFILE TARGET_MIMETYPE TARGET_EXTENSION TARGET_FILENAME', script_name) found, binary = gmShellAPI.detect_external_binary(binary = binary) if found: cmd_line = [ binary, filename, target_mime, converted_ext, converted_fname ] success, returncode, stdout = gmShellAPI.run_process(cmd_line = cmd_line, verbose = True) if success: if target_filename is None: return converted_fname shutil.copyfile(converted_fname, target_filename) return target_filename # try built-in conversions _log.debug('trying built-in conversion functions') try: conversion_func = __CONVERSION_DELEGATES[source_mime][target_mime] except KeyError: conversion_func = None if conversion_func is not None: converted_fname = conversion_func(filename = filename, verbose = verbose, max_pages = max_pages) if converted_fname is not None: if target_filename is None: return converted_fname shutil.copyfile(converted_fname, target_filename) return target_filename # try system-wide conversion script paths = gmTools.gmPaths() local_script = os.path.join(paths.local_base_dir, '..', 'external-tools', script_name) candidates = [ script_name, local_script ] #, script_name + u'.bat' _log.debug('trying system-wide scripts: %s', candidates) found, binary = gmShellAPI.find_first_binary(binaries = candidates) if not found: # try anyway _log.debug('trying anyway as last-ditch resort') binary = script_name# + r'.bat' cmd_line = [ binary, filename, target_mime, converted_ext, converted_fname ] success, returncode, stdout = gmShellAPI.run_process(cmd_line = cmd_line, verbose = True) if success: if target_filename is None: return converted_fname shutil.copyfile(converted_fname, target_filename) return target_filename # seems to have failed but check for target file anyway _log.error('conversion script returned error exit code, checking target file anyway') if not os.path.exists(converted_fname): return None _log.info('conversion target file found') stats = os.stat(converted_fname) if stats.st_size == 0: return None _log.info('conversion target file size > 0') achieved_mime = guess_mimetype(filename = converted_fname) if not achieved_mime.casefold().startswith(target_mime.casefold()): _log.error('target: [%s], achieved: [%s]', target_mime, achieved_mime) return None # we may actually have something despite a non-0 exit code _log.info('conversion target file mime type [%s], as expected, might be usable', achieved_mime) if target_filename is None: return converted_fname shutil.copyfile(converted_fname, target_filename) return target_filename
Convert file from one format into another.
Args
target_mime
- mime type to convert file into
def convert_file_to_image(filename: str = None, verbose: bool = False, max_pages: int = 10) ‑> list[str]
-
Expand source code
def convert_file_to_image(filename:str=None, verbose:bool=False, max_pages:int=10) -> list[str]: return convert_file(filename = filename, target_mime = 'image/*', verbose = verbose, max_pages = max_pages)
def convert_file_to_text(filename: str = None, verbose: bool = False) ‑> str
-
Expand source code
def convert_file_to_text(filename:str=None, verbose:bool=False) -> str: return convert_file(filename = filename, target_mime = 'text/*', verbose = verbose)
def convert_latex_to_pdf(filename: str = None,
verbose: bool = False,
is_sandboxed: bool = False,
max_pages: int = 25) ‑> str-
Expand source code
def convert_latex_to_pdf(filename:str=None, verbose:bool=False, is_sandboxed:bool=False, max_pages:int=25) -> str: """Compile LaTeX code to PDF using pdflatex. Args: is_sandboxed: whether or not to create a sandbox for compiling Returns: Name of resulting PDF, or None on failure. """ global __LaTeX_version_checked global __pdflatex_executable if not __LaTeX_version_checked: __LaTeX_version_checked = True found, __pdflatex_executable = gmShellAPI.detect_external_binary(binary = 'pdflatex') if not found: _log.error('pdflatex not found') return None cmd_line = [__pdflatex_executable, '-version'] success, ret_code, stdout = gmShellAPI.run_process(cmd_line = cmd_line, encoding = 'utf8', verbose = True) if not success: _log.error('[%s] failed, LaTeX not usable', cmd_line) return None if is_sandboxed: sandbox_dir = os.path.split(filename)[0] else: sandbox_dir = gmTools.mk_sandbox_dir(prefix = gmTools.fname_stem(filename) + '_') shutil.copy(filename, sandbox_dir) filename = os.path.join(sandbox_dir, os.path.split(filename)[1]) _log.debug('LaTeX sandbox directory: [%s]', sandbox_dir) cmd_final = [ __pdflatex_executable, '-recorder', '-interaction=nonstopmode', "-output-directory=%s" % sandbox_dir ] cmd_draft = cmd_final + ['-draftmode'] # LaTeX can need up to three runs to get cross references et al right for cmd2run in [cmd_draft, cmd_draft, cmd_final]: success, ret_code, stdout = gmShellAPI.run_process ( cmd_line = cmd2run + [filename], acceptable_return_codes = [0], encoding = 'utf8', verbose = True #_cfg.get(option = 'debug') ) if not success: _log.error('problem running pdflatex, cannot generate form output, trying diagnostics') found, binary = gmShellAPI.find_first_binary(binaries = ['lacheck', 'miktex-lacheck.exe']) if not found: _log.debug('lacheck not found') else: cmd_line = [binary, filename] success, ret_code, stdout = gmShellAPI.run_process(cmd_line = cmd_line, encoding = 'utf8', verbose = True) found, binary = gmShellAPI.find_first_binary(binaries = ['chktex', 'ChkTeX.exe']) if not found: _log.debug('chcktex not found') else: cmd_line = [binary, '--verbosity=2', '--headererr', filename] success, ret_code, stdout = gmShellAPI.run_process(cmd_line = cmd_line, encoding = 'utf8', verbose = True) return None return '%s.pdf' % os.path.splitext(filename)[0]
Compile LaTeX code to PDF using pdflatex.
Args
is_sandboxed
- whether or not to create a sandbox for compiling
Returns
Name of resulting PDF, or None on failure.
def describe_file(filename, callback=None, cookie=None)
-
Expand source code
def describe_file(filename, callback=None, cookie=None): if callback is None: return __run_file_describer(filename) payload_kwargs = {'filename': filename, 'cookie': cookie} gmWorkerThread.execute_in_worker_thread ( payload_function = __run_file_describer, payload_kwargs = payload_kwargs, completion_callback = callback )
def get_editor_cmd(mimetype=None, filename=None)
-
Expand source code
def get_editor_cmd(mimetype=None, filename=None): if filename is None: _log.error("You should specify a file name for the replacement of %s.") # last resort: if no file name given replace %s in original with literal '%s' # and hope for the best - we certainly don't want the module default "/dev/null" filename = """%s""" mailcaps = _mailcap.getcaps() (editor, junk) = _mailcap.findmatch(mailcaps, mimetype, key = 'edit', filename = '%s' % filename) # FIXME: we should check for "x-token" flags _log.debug("<%s> editor: [%s]" % (mimetype, editor)) return editor
def get_viewer_cmd(aMimeType=None, aFileName=None, aToken=None)
-
Expand source code
def get_viewer_cmd(aMimeType = None, aFileName = None, aToken = None): """Return command for viewer for this mime type complete with this file""" if aFileName is None: _log.error("You should specify a file name for the replacement of %s.") # last resort: if no file name given replace %s in original with literal '%s' # and hope for the best - we certainly don't want the module default "/dev/null" aFileName = """%s""" mailcaps = _mailcap.getcaps() (viewer, junk) = _mailcap.findmatch(mailcaps, aMimeType, key = 'view', filename = '%s' % aFileName) # FIXME: we should check for "x-token" flags _log.debug("<%s> viewer: [%s]" % (aMimeType, viewer)) return viewer
Return command for viewer for this mime type complete with this file
def guess_ext_by_mimetype(mimetype='')
-
Expand source code
def guess_ext_by_mimetype(mimetype=''): """Return file extension based on what the OS thinks a file of this mimetype should end in.""" # ask system first ext = mimetypes.guess_extension(mimetype) if ext is not None: _log.debug('<%s>: %s', mimetype, ext) return ext _log.error("<%s>: no suitable file extension known to the OS" % mimetype) # try to help the OS a bit cfg = gmCfgINI.gmCfgData() ext = cfg.get ( group = 'extensions', option = mimetype, source_order = [('user-mime', 'return'), ('system-mime', 'return')] ) if ext and ext.startswith('.'): ext = ext[1:] if ext: _log.debug('<%s>: %s', mimetype, ext) return ext _log.error("<%s>: no suitable file extension found in config files", mimetype) return ext
Return file extension based on what the OS thinks a file of this mimetype should end in.
def guess_ext_for_file(aFile: str = None) ‑> str
-
Expand source code
def guess_ext_for_file(aFile:str=None) -> str: """Guesses an approprate file name extension based on mimetype. Args: aFile: the name of an existing file """ if aFile is None: return None (path_name, f_ext) = os.path.splitext(aFile) if f_ext: return f_ext mime_type = guess_mimetype(aFile) f_ext = guess_ext_by_mimetype(mime_type) if f_ext is None: _log.error('unable to guess file name extension for mime type [%s]' % mime_type) return None return f_ext
Guesses an approprate file name extension based on mimetype.
Args
aFile
- the name of an existing file
def guess_mimetype(filename: str = None) ‑> str
-
Expand source code
def guess_mimetype(filename:str=None) -> str: """Guess mime type of arbitrary file. Returns: Detected mimetype or 'application/octet-stream'. """ _log.debug('guessing mime type of [%s]', filename) mimetype, encoding = mimetypes.guess_type(filename) if mimetype not in [WORST_CASE_MIMETYPE, None]: _log.debug('"%s" -> <%s> (%s)', filename, mimetype, encoding) return mimetype mimetype = __guess_mimetype__pylibextractor(filename = filename) if mimetype: return mimetype mimetype = __guess_mimetype__file(filename = filename) if mimetype: return mimetype # 3) use "extract" shell level libextractor wrapper mimetype = __guess_mimetype__extract(filename = filename) if mimetype: return mimetype # If we and up here we either have an insufficient systemwide # magic number file or we suffer from a deficient operating system # alltogether. It can't get much worse if we try ourselves. _log.info("OS level mime detection failed, falling back to built-in magic") from Gnumed.pycommon import gmMimeMagic mimetype = gmTools.coalesce(gmMimeMagic.filedesc(filename), WORST_CASE_MIMETYPE) del gmMimeMagic _log.debug('"%s" -> <%s>' % (filename, mimetype)) return mimetype
Guess mime type of arbitrary file.
Returns
Detected mimetype or 'application/octet-stream'.
def is_probably_image(filename: str = None) ‑> bool
-
Expand source code
def is_probably_image(filename:str=None) -> bool: """Check whether a file might be an image file by mime type.""" if guess_mimetype(filename).startswith('image/'): return True return False
Check whether a file might be an image file by mime type.
def is_probably_pdf(filename: str = None) ‑> bool
-
Expand source code
def is_probably_pdf(filename:str=None) -> bool: """Check whether a file might be a PDF file by mime type.""" if guess_mimetype(filename) == 'application/pdf': return True return False
Check whether a file might be a PDF file by mime type.
def is_probably_textfile(filename: str = None) ‑> bool
-
Expand source code
def is_probably_textfile(filename:str=None) -> bool: """Check whether a file might be a text file by mime type.""" if guess_mimetype(filename).startswith('text/'): return True return False
Check whether a file might be a text file by mime type.
def join_files_as_pdf(files: list[str] = None, pdf_name: str = None) ‑> str
-
Expand source code
def join_files_as_pdf(files:list[str]=None, pdf_name:str=None) -> str: """Convert files to PDF and joins them into one final PDF. Returns: Name of final PDF or None """ assert (files is not None), '<files> must not be None' if len(files) == 0: return None sandbox = gmTools.mk_sandbox_dir() pdf_pages = [] page_idx = 1 for fname in files: pdf = convert_file ( filename = fname, target_mime = 'application/pdf', target_filename = gmTools.get_unique_filename(prefix = '%s-' % page_idx, suffix = '.pdf', tmp_dir = sandbox), target_extension = '.pdf', verbose = True ) if pdf is None: return None pdf_pages.append(pdf) page_idx += 1 if pdf_name is None: pdf_name = gmTools.get_unique_filename(suffix = '.pdf') cmd_line = ['pdfunite'] cmd_line.extend(pdf_pages) cmd_line.append(pdf_name) success, returncode, stdout = gmShellAPI.run_process(cmd_line = cmd_line, verbose = True) if not success: _log.debug('cannot join files into one PDF') return None return pdf_name
Convert files to PDF and joins them into one final PDF.
Returns
Name of final PDF or None
def split_multipage_image(filename: str = None) ‑> list[str]
-
Expand source code
def split_multipage_image(filename:str=None) -> list[str]: sandbox = gmTools.mk_sandbox_dir() cmd_line = [ 'convert', '-verbose', filename, os.path.join(sandbox, '%s.%%d' % gmTools.fname_from_path(filename)) ] success, returncode, stdout = gmShellAPI.run_process(cmd_line = cmd_line, verbose = True) if not success: return [] fname_stem = gmTools.fname_stem(filename) items = os.listdir(sandbox) image_pages = [] for item in items: if not item.startswith(fname_stem): continue image_pages.append(os.path.join(sandbox, item)) return sorted(image_pages)