Module Gnumed.pycommon.gmMimeLib

This module encapsulates mime operations.

http://www.dwheeler.com/essays/open-files-urls.html

Expand source code
# -*- coding: utf-8 -*-

"""This module encapsulates mime operations.

        http://www.dwheeler.com/essays/open-files-urls.html
"""
#=======================================================================================
__author__ = "Karsten Hilbert <Karsten.Hilbert@gmx.net>"
__license__ = "GPL"

# stdlib
import sys
import os
import mimetypes
import subprocess
import shutil
import logging
from typing import Callable
try:
        import mailcap as _mailcap
except ImportError:             # Python 3.11 deprecated mailcap, in 3.13 it will be gone ...
        import _mailcap__copy as _mailcap                       # type: ignore


# GNUmed
if __name__ == '__main__':
        sys.path.insert(0, '../../')
        _ = lambda x:x
from Gnumed.pycommon import gmShellAPI
from Gnumed.pycommon import gmTools
from Gnumed.pycommon import gmCfgINI
from Gnumed.pycommon import gmWorkerThread


_log = logging.getLogger('gm.mime')

WORST_CASE_MIMETYPE = 'application/octet-stream'

#=======================================================================================
# mime type handling
#---------------------------------------------------------------------------------------
def is_probably_textfile(filename:str=None) -> bool:
        """Check whether a file might be a text file by mime type."""
        if guess_mimetype(filename).startswith('text/'):
                return True

        return False

#---------------------------------------------------------------------------------------
def is_probably_image(filename:str=None) -> bool:
        """Check whether a file might be an image file by mime type."""
        if guess_mimetype(filename).startswith('image/'):
                return True

        return False

#---------------------------------------------------------------------------------------
def is_probably_pdf(filename:str=None) -> bool:
        """Check whether a file might be a PDF file by mime type."""
        if guess_mimetype(filename) == 'application/pdf':
                return True

        return False

#---------------------------------------------------------------------------------------
def split_multipage_image(filename:str=None) -> list[str]:
        sandbox = gmTools.mk_sandbox_dir()
        cmd_line = [
                'convert',
                '-verbose',
                filename,
                os.path.join(sandbox, '%s.%%d' % gmTools.fname_from_path(filename))
        ]
        success, returncode, stdout = gmShellAPI.run_process(cmd_line = cmd_line, verbose = True)
        if not success:
                return []

        fname_stem = gmTools.fname_stem(filename)
        items = os.listdir(sandbox)
        image_pages = []
        for item in items:
                if not item.startswith(fname_stem):
                        continue
                image_pages.append(os.path.join(sandbox, item))
        return sorted(image_pages)

#---------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------
def __guess_mimetype__pylibextractor(filename:str=None) -> str:
        # Python libextractor
        try:
                import extractor
        except ImportError:
                _log.debug('module <extractor> (python wrapper for libextractor) not installed')
                return None

        except OSError as exc:
                # winerror 126, errno 22
                if exc.errno != 22:
                        raise
                _log.exception('module <extractor> (python wrapper for libextractor) not installed')
                return None

        xtractor = extractor.Extractor()
        props = xtractor.extract(filename = filename)
        for prop, val in props:
                if prop != 'mimetype':
                        continue
                _log.debug('[import extractor]: <%s>' % val)
                if val != WORST_CASE_MIMETYPE:
                        return val

        return None

#---------------------------------------------------------------------------------------
def __guess_mimetype__file(filename:str=None) -> str:
        # this only works on POSIX with 'file' installed (which is standard, however)
        # it might work on Cygwin installations
        mime_guesser_cmd = 'file --mime-type --brief "%s"' % filename
        pipe = os.popen(mime_guesser_cmd, 'r')
        if pipe is None:
                _log.debug("cannot open pipe to [%s]" % mime_guesser_cmd)
                return None

        pipe_output = pipe.readline().replace('\n', '').strip()
        ret_code = pipe.close()
        if ret_code is not None:
                _log.error('[%s] on %s (%s): failed with exit(%s)' % (mime_guesser_cmd, os.name, sys.platform, ret_code))
                return None

        _log.debug('[%s]: <%s>' % (mime_guesser_cmd, pipe_output))
        if pipe_output in ['', WORST_CASE_MIMETYPE]:
                return None

        return pipe_output

#---------------------------------------------------------------------------------------
def __guess_mimetype__extract(filename:str=None) -> str:
        mime_guesser_cmd = 'extract -p mimetype "%s"' % filename
        pipe = os.popen(mime_guesser_cmd, 'r')
        if pipe is None:
                _log.debug("cannot open pipe to [%s]" % mime_guesser_cmd)
                return None

        pipe_output = pipe.readline()[11:].replace('\n', '').strip()
        ret_code = pipe.close()
        if ret_code is not None:
                _log.error('[%s] on %s (%s): failed with exit(%s)' % (mime_guesser_cmd, os.name, sys.platform, ret_code))
                return None

        _log.debug('[%s]: <%s>' % (mime_guesser_cmd, pipe_output))
        if pipe_output in ['', WORST_CASE_MIMETYPE]:
                return None

        return pipe_output

#---------------------------------------------------------------------------------------
def guess_mimetype(filename:str=None) -> str:
        """Guess mime type of arbitrary file.

        Returns:
                Detected mimetype or 'application/octet-stream'.
        """
        _log.debug('guessing mime type of [%s]', filename)
        mimetype, encoding = mimetypes.guess_type(filename)
        if mimetype not in [WORST_CASE_MIMETYPE, None]:
                _log.debug('"%s" -> <%s> (%s)', filename, mimetype, encoding)
                return mimetype

        mimetype = __guess_mimetype__pylibextractor(filename = filename)
        if mimetype:
                return mimetype

        mimetype = __guess_mimetype__file(filename = filename)
        if mimetype:
                return mimetype

        # 3) use "extract" shell level libextractor wrapper
        mimetype = __guess_mimetype__extract(filename = filename)
        if mimetype:
                return mimetype

        # If we and up here we either have an insufficient systemwide
        # magic number file or we suffer from a deficient operating system
        # alltogether. It can't get much worse if we try ourselves.
        _log.info("OS level mime detection failed, falling back to built-in magic")
        from Gnumed.pycommon import gmMimeMagic
        mimetype = gmTools.coalesce(gmMimeMagic.filedesc(filename), WORST_CASE_MIMETYPE)
        del gmMimeMagic
        _log.debug('"%s" -> <%s>' % (filename, mimetype))
        return mimetype

#-----------------------------------------------------------------------------------
def get_viewer_cmd(aMimeType = None, aFileName = None, aToken = None):
        """Return command for viewer for this mime type complete with this file"""

        if aFileName is None:
                _log.error("You should specify a file name for the replacement of %s.")
                # last resort: if no file name given replace %s in original with literal '%s'
                # and hope for the best - we certainly don't want the module default "/dev/null"
                aFileName = """%s"""

        mailcaps = _mailcap.getcaps()
        (viewer, junk) = _mailcap.findmatch(mailcaps, aMimeType, key = 'view', filename = '%s' % aFileName)
        # FIXME: we should check for "x-token" flags

        _log.debug("<%s> viewer: [%s]" % (aMimeType, viewer))

        return viewer

#-----------------------------------------------------------------------------------
def get_editor_cmd(mimetype=None, filename=None):

        if filename is None:
                _log.error("You should specify a file name for the replacement of %s.")
                # last resort: if no file name given replace %s in original with literal '%s'
                # and hope for the best - we certainly don't want the module default "/dev/null"
                filename = """%s"""

        mailcaps = _mailcap.getcaps()
        (editor, junk) = _mailcap.findmatch(mailcaps, mimetype, key = 'edit', filename = '%s' % filename)

        # FIXME: we should check for "x-token" flags

        _log.debug("<%s> editor: [%s]" % (mimetype, editor))

        return editor

#-----------------------------------------------------------------------------------
def guess_ext_by_mimetype(mimetype=''):
        """Return file extension based on what the OS thinks a file of this mimetype should end in."""

        # ask system first
        ext = mimetypes.guess_extension(mimetype)
        if ext is not None:
                _log.debug('<%s>: %s', mimetype, ext)
                return ext

        _log.error("<%s>: no suitable file extension known to the OS" % mimetype)
        # try to help the OS a bit
        cfg = gmCfgINI.gmCfgData()
        ext = cfg.get (
                group = 'extensions',
                option = mimetype,
                source_order = [('user-mime', 'return'), ('system-mime', 'return')]
        )
        if ext is not None:
                _log.debug('<%s>: %s', mimetype, ext)
                return ext

        _log.error("<%s>: no suitable file extension found in config files", mimetype)
        return ext

#-----------------------------------------------------------------------------------
def guess_ext_for_file(aFile:str=None) -> str:
        """Guesses an approprate file name extension based on mimetype.

        Args:
                aFile: the name of an existing file
        """
        if aFile is None:
                return None

        (path_name, f_ext) = os.path.splitext(aFile)
        if f_ext:
                return f_ext

        mime_type = guess_mimetype(aFile)
        f_ext = guess_ext_by_mimetype(mime_type)
        if f_ext is None:
                _log.error('unable to guess file name extension for mime type [%s]' % mime_type)
                return None

        return f_ext

#-----------------------------------------------------------------------------------
def adjust_extension_by_mimetype(filename:str) -> str:
        """Rename file to have proper extension as per its mimetype.

        Returns:
                Original filename if no suffix found or empty suffix found or existing suffix already correct (case insensitive).

                New filename if renamed. New filename will have any old suffix removed and the new suffix appende.
        """
        mimetype = guess_mimetype(filename)
        mime_suffix = guess_ext_by_mimetype(mimetype)
        _log.debug('%s -> %s', mimetype, mime_suffix)
        if mime_suffix is None:
                return filename

        if mime_suffix.strip() == '':
                return filename

        mime_suffix = mime_suffix.lstrip('.')
        base_name_with_path, old_ext = os.path.splitext(filename)
        old_ext = old_ext.lstrip('.')
        if old_ext.casefold() == mime_suffix.casefold():
                return filename

        new_filename = '%s.%s' % (base_name_with_path, mime_suffix)
        _log.debug('[%s] -> [%s]', filename, new_filename)
        renamed = gmTools.rename_file (
                filename = filename,
                new_filename = new_filename,
                overwrite = True,
                allow_symlink = True
        )
        if renamed:
                return new_filename

        return None

#-----------------------------------------------------------------------------------
_system_startfile_cmd = None

open_cmds = {
        'xdg-open': 'xdg-open "%s"',                    # nascent standard on Linux
        'kfmclient': 'kfmclient exec "%s"',             # KDE
        'gnome-open': 'gnome-open "%s"',                # GNOME
        'exo-open': 'exo-open "%s"',
        'op': 'op "%s"',
        'open': 'open "%s"',                                    # MacOSX: "open -a AppName file" (-a allows to override the default app for the file type)
        'cmd.exe': 'cmd.exe /c "%s"'                    # Windows
        #'run-mailcap'
        #'explorer'
}

def _get_system_startfile_cmd(filename:str):

        global _system_startfile_cmd

        if _system_startfile_cmd == '':
                return False, None

        if _system_startfile_cmd is not None:
                return True, _system_startfile_cmd % filename

        open_cmd_candidates = list(open_cmds)

        for candidate in open_cmd_candidates:
                found, binary = gmShellAPI.detect_external_binary(binary = candidate)
                if not found:
                        continue
                _system_startfile_cmd = open_cmds[candidate]
                _log.info('detected local startfile cmd: [%s]', _system_startfile_cmd)
                return True, _system_startfile_cmd % filename

        _system_startfile_cmd = ''
        return False, None

#-----------------------------------------------------------------------------------
def join_files_as_pdf(files:list[str]=None, pdf_name:str=None) -> str:
        """Convert files to PDF and joins them into one final PDF.

        Returns:
                Name of final PDF or None
        """
        assert (files is not None), '<files> must not be None'

        if len(files) == 0:
                return None

        sandbox = gmTools.mk_sandbox_dir()
        pdf_pages = []
        page_idx = 1
        for fname in files:
                pdf = convert_file (
                        filename = fname,
                        target_mime = 'application/pdf',
                        target_filename = gmTools.get_unique_filename(prefix = '%s-' % page_idx, suffix = '.pdf', tmp_dir = sandbox),
                        target_extension = '.pdf',
                        verbose = True
                )
                if pdf is None:
                        return None

                pdf_pages.append(pdf)
                page_idx += 1

        if pdf_name is None:
                pdf_name = gmTools.get_unique_filename(suffix = '.pdf')
        cmd_line = ['pdfunite']
        cmd_line.extend(pdf_pages)
        cmd_line.append(pdf_name)
        success, returncode, stdout = gmShellAPI.run_process(cmd_line = cmd_line, verbose = True)
        if not success:
                _log.debug('cannot join files into one PDF')
                return None

        return pdf_name

#-----------------------------------------------------------------------------------
#-----------------------------------------------------------------------------------
# mimetype conversion helpers
#-----------------------------------------------------------------------------------
__LaTeX_version_checked = False
__pdflatex_executable = None

def convert_latex_to_pdf(filename:str=None, verbose:bool=False, is_sandboxed:bool=False, max_pages:int=25) -> str:
        """Compile LaTeX code to PDF using pdflatex.

        Args:
                is_sandboxed: whether or not to create a sandbox for compiling

        Returns:
                Name of resulting PDF, or None on failure.
        """
        global __LaTeX_version_checked
        global __pdflatex_executable
        if not __LaTeX_version_checked:
                __LaTeX_version_checked = True
                found, __pdflatex_executable = gmShellAPI.detect_external_binary(binary = 'pdflatex')
                if not found:
                        _log.error('pdflatex not found')
                        return None

                cmd_line = [__pdflatex_executable, '-version']
                success, ret_code, stdout = gmShellAPI.run_process(cmd_line = cmd_line, encoding = 'utf8', verbose = True)
                if not success:
                        _log.error('[%s] failed, LaTeX not usable', cmd_line)
                        return None

        if is_sandboxed:
                sandbox_dir = os.path.split(filename)[0]
        else:
                sandbox_dir = gmTools.mk_sandbox_dir(prefix = gmTools.fname_stem(filename) + '_')
                shutil.copy(filename, sandbox_dir)
                filename = os.path.join(sandbox_dir, os.path.split(filename)[1])
        _log.debug('LaTeX sandbox directory: [%s]', sandbox_dir)
        cmd_final = [
                __pdflatex_executable,
                '-recorder',
                '-interaction=nonstopmode',
                "-output-directory=%s" % sandbox_dir
        ]
        cmd_draft = cmd_final + ['-draftmode']
        # LaTeX can need up to three runs to get cross references et al right
        for cmd2run in [cmd_draft, cmd_draft, cmd_final]:
                success, ret_code, stdout = gmShellAPI.run_process (
                        cmd_line = cmd2run + [filename],
                        acceptable_return_codes = [0],
                        encoding = 'utf8',
                        verbose = True  #_cfg.get(option = 'debug')
                )
                if not success:
                        _log.error('problem running pdflatex, cannot generate form output, trying diagnostics')
                        found, binary = gmShellAPI.find_first_binary(binaries = ['lacheck', 'miktex-lacheck.exe'])
                        if not found:
                                _log.debug('lacheck not found')
                        else:
                                cmd_line = [binary, filename]
                                success, ret_code, stdout = gmShellAPI.run_process(cmd_line = cmd_line, encoding = 'utf8', verbose = True)
                        found, binary = gmShellAPI.find_first_binary(binaries = ['chktex', 'ChkTeX.exe'])
                        if not found:
                                _log.debug('chcktex not found')
                        else:
                                cmd_line = [binary, '--verbosity=2', '--headererr', filename]
                                success, ret_code, stdout = gmShellAPI.run_process(cmd_line = cmd_line, encoding = 'utf8', verbose = True)
                        return None

        return '%s.pdf' % os.path.splitext(filename)[0]

#-----------------------------------------------------------------------------------
def __convert_odt_to_pdf(filename:str=None, verbose:bool=False, max_pages:int=25):
        cmd_line = [
                'lowriter',
                '--convert-to', 'pdf',
                '--outdir', os.path.split(filename)[0],
                filename
        ]
        success, returncode, stdout = gmShellAPI.run_process(cmd_line = cmd_line, verbose = True)
        if not success:
                return None

        return gmTools.fname_stem_with_path(filename) + '.pdf'

#-----------------------------------------------------------------------------------
def __convert_pdf_to_image(filename:str=None, verbose:bool=False, max_pages:int=25) -> str:
        cmd_line = ['convert']
        if verbose:
                cmd_line.append('-verbose')
        cmd_line.append('-density')
        cmd_line.append('150x150')
        cmd_line.append('%s[0-%s]' % (filename, max_pages-1))
        sandbox = gmTools.mk_sandbox_dir()
        cmd_line.append(os.path.join(sandbox, '%s.%%d.tiff' % gmTools.fname_from_path(filename)))
        success, returncode, stdout = gmShellAPI.run_process(cmd_line = cmd_line, verbose = True)
        if not success:
                return None

        pdf_stem = gmTools.fname_stem(filename)
        items = os.listdir(sandbox)
        image_pages = []
        for item in items:
                if not item.endswith('.tiff'):
                        continue
                if not item.startswith(pdf_stem):
                        continue
                image_pages.append(os.path.join(sandbox, item))
        cmd_line = ['convert']
        if verbose:
                cmd_line.append('-verbose')
        cmd_line.extend(sorted(image_pages))
        cmd_line.append('-adjoin')
        tiff_name = os.path.join(sandbox, '%s.tiff' % filename)
        cmd_line.append(tiff_name)
        success, returncode, stdout = gmShellAPI.run_process(cmd_line = cmd_line, verbose = True)
        if not success:
                return None

        return tiff_name

#-----------------------------------------------------------------------------------
def __convert_pdf_to_text(filename:str=None, verbose:bool=False, max_pages:int=25) -> str:
        txt_fname = '%s.txt' % filename
        cmd_line = [
                'pdftotext',
                '-f', '1',
                '-l', '1',
                '-layout',
                filename,
                txt_fname
        ]
        success, returncode, stdout = gmShellAPI.run_process(cmd_line = cmd_line, verbose = True)
        if not success:
                return None

        return txt_fname

#-----------------------------------------------------------------------------------
#-----------------------------------------------------------------------------------
__CONVERSION_DELEGATES:dict[str, dict[str, Callable]] = {
        'application/vnd.oasis.opendocument.text': {
                'application/pdf': __convert_odt_to_pdf
        },
        'text/latex': {
                'application/pdf': convert_latex_to_pdf
        },
        'application/pdf': {
                'image/any': __convert_pdf_to_image,
                'image/*': __convert_pdf_to_image,
                'image/': __convert_pdf_to_image,
                'image': __convert_pdf_to_image,
                'text/plain': __convert_pdf_to_text,
                'text/any': __convert_pdf_to_text,
                'text/*': __convert_pdf_to_text,
                'text/': __convert_pdf_to_text,
                'text': __convert_pdf_to_text
        }
}

__CONVERSION_DELEGATES['text/tex'] = __CONVERSION_DELEGATES['text/latex']
__CONVERSION_DELEGATES['text/x-tex'] = __CONVERSION_DELEGATES['text/latex']
__CONVERSION_DELEGATES['application/x-latex'] = __CONVERSION_DELEGATES['text/latex']
__CONVERSION_DELEGATES['application/x-tex'] = __CONVERSION_DELEGATES['text/latex']

#-----------------------------------------------------------------------------------
def convert_file_to_image(filename:str=None, verbose:bool=False, max_pages:int=10) -> list[str]:
        return convert_file(filename = filename, target_mime = 'image/*', verbose = verbose, max_pages = max_pages)

#-----------------------------------------------------------------------------------
def convert_file_to_text(filename:str=None, verbose:bool=False) -> str:
        return convert_file(filename = filename, target_mime = 'text/*', verbose = verbose)

#-----------------------------------------------------------------------------------
def convert_file(filename=None, target_mime=None, target_filename=None, target_extension=None, verbose=False, max_pages:int=25):
        """Convert file from one format into another.

                target_mime: a mime type
        """
        assert (target_mime is not None), '<target_mime> must not be None'
        assert (filename is not None), '<filename> must not be None'
        assert (filename != target_filename), '<target_filename> must be different from <filename>'

        source_mime = guess_mimetype(filename = filename)
        target_mime_parts = target_mime.rsplit('/', 1)
        if (len(target_mime_parts) == 1) or (target_mime_parts[1].strip().casefold() in ['', '*', 'any']):
                _log.debug('generic target mime type')
                target_mime = target_mime_parts[0] + '/'

        if source_mime.casefold().startswith(target_mime.casefold()):
                _log.debug('source file [%s] already target mime type [%s]', filename, target_mime)
                if target_filename is None:
                        return filename

                shutil.copyfile(filename, target_filename)
                return target_filename

        converted_ext = guess_ext_by_mimetype(target_mime)
        if converted_ext is None:
                if target_filename is not None:
                        tmp, converted_ext = os.path.splitext(target_filename)
        if converted_ext is None:
                converted_ext = target_extension                # can still stay None
        converted_ext = gmTools.coalesce(converted_ext, '').strip().lstrip('.')
        converted_fname = gmTools.get_unique_filename(suffix = converted_ext)
        _log.debug('attempting conversion: [%s] -> [<%s>:%s]', filename, target_mime, gmTools.coalesce(target_filename, converted_fname))

        # try user-local conversion script
        script_name = 'gm-convert_file'
        binary = os.path.join(gmTools.gmPaths().home_dir, 'bin', script_name)
        _log.debug('trying user-local script: %s', binary)
        _log.debug('<%s> API: SOURCEFILE TARGET_MIMETYPE TARGET_EXTENSION TARGET_FILENAME', script_name)
        found, binary = gmShellAPI.detect_external_binary(binary = binary)
        if found:
                cmd_line = [
                        binary,
                        filename,
                        target_mime,
                        converted_ext,
                        converted_fname
                ]
                success, returncode, stdout = gmShellAPI.run_process(cmd_line = cmd_line, verbose = True)
                if success:
                        if target_filename is None:
                                return converted_fname

                        shutil.copyfile(converted_fname, target_filename)
                        return target_filename

        # try built-in conversions
        _log.debug('trying built-in conversion functions')
        try:
                conversion_func = __CONVERSION_DELEGATES[source_mime][target_mime]
        except KeyError:
                conversion_func = None
        if conversion_func is not None:
                converted_fname = conversion_func(filename = filename, verbose = verbose, max_pages = max_pages)
                if converted_fname is not None:
                        if target_filename is None:
                                return converted_fname

                        shutil.copyfile(converted_fname, target_filename)
                        return target_filename

        # try system-wide conversion script
        paths = gmTools.gmPaths()
        local_script = os.path.join(paths.local_base_dir, '..', 'external-tools', script_name)
        candidates = [ script_name, local_script ]              #, script_name + u'.bat'
        _log.debug('trying system-wide scripts: %s', candidates)
        found, binary = gmShellAPI.find_first_binary(binaries = candidates)
        if not found:   # try anyway
                _log.debug('trying anyway as last-ditch resort')
                binary = script_name# + r'.bat'
        cmd_line = [
                binary,
                filename,
                target_mime,
                converted_ext,
                converted_fname
        ]
        success, returncode, stdout = gmShellAPI.run_process(cmd_line = cmd_line, verbose = True)
        if success:
                if target_filename is None:
                        return converted_fname

                shutil.copyfile(converted_fname, target_filename)
                return target_filename

        # seems to have failed but check for target file anyway
        _log.error('conversion script returned error exit code, checking target file anyway')
        if not os.path.exists(converted_fname):
                return None

        _log.info('conversion target file found')
        stats = os.stat(converted_fname)
        if stats.st_size == 0:
                return None

        _log.info('conversion target file size > 0')
        achieved_mime = guess_mimetype(filename = converted_fname)
        if not achieved_mime.casefold().startswith(target_mime.casefold()):
                _log.error('target: [%s], achieved: [%s]', target_mime, achieved_mime)
                return None

        # we may actually have something despite a non-0 exit code
        _log.info('conversion target file mime type [%s], as expected, might be usable', achieved_mime)
        if target_filename is None:
                return converted_fname

        shutil.copyfile(converted_fname, target_filename)
        return target_filename

#-----------------------------------------------------------------------------------
def __run_file_describer(filename=None, cookie=None):
        base_name = 'gm-describe_file'
        paths = gmTools.gmPaths()
        local_script = os.path.join(paths.local_base_dir, '..', 'external-tools', base_name)
        candidates = [base_name, local_script]          #, base_name + '.bat'
        found, binary = gmShellAPI.find_first_binary(binaries = candidates)
        if not found:
                _log.error('cannot find <%s(.bat)>', base_name)
                return (False, _('<%s(.bat)> not found') % base_name, cookie)

        cmd_line = [binary, filename]
        _log.debug('describing: %s', cmd_line)
        try:
                proc_result = subprocess.run (
                        args = cmd_line,
                        stdin = subprocess.PIPE,
                        stdout = subprocess.PIPE,
                        stderr = subprocess.PIPE,
                        #timeout = timeout,
                        encoding = 'utf8',
                        errors = 'backslashreplace'
                )
        except (subprocess.TimeoutExpired, FileNotFoundError):
                _log.exception('there was a problem running external process')
                return (False, _('problem with <%s>') % binary, cookie)

        _log.info('exit code [%s]', proc_result.returncode)
        if proc_result.returncode != 0:
                _log.error('[%s] failed', binary)
                _log.error('STDERR:\n%s', proc_result.stderr)
                _log.error('STDOUT:\n%s', proc_result.stdout)
                return (False, _('problem with <%s>') % binary, cookie)

        return (True, proc_result.stdout, cookie)

#-----------------------------------------------------------------------------------
def describe_file(filename, callback=None, cookie=None):
        if callback is None:
                return __run_file_describer(filename)

        payload_kwargs = {'filename': filename, 'cookie': cookie}
        gmWorkerThread.execute_in_worker_thread (
                payload_function = __run_file_describer,
                payload_kwargs = payload_kwargs,
                completion_callback = callback
        )

#-----------------------------------------------------------------------------------
def call_viewer_on_file(aFile = None, block=None):
        """Try to find an appropriate viewer with all tricks and call it.

        block: try to detach from viewer or not, None means to use mailcap default
        """
        if not os.path.isdir(aFile):
                # is the file accessible at all ?
                try:
                        open(aFile).close()
                except Exception:
                        _log.exception('cannot read [%s]', aFile)
                        msg = _('[%s] is not a readable file') % aFile
                        return False, msg

        # try to detect any of the UNIX openers
        found, startfile_cmd = _get_system_startfile_cmd(aFile)
        if found:
                if gmShellAPI.run_command_in_shell(command = startfile_cmd, blocking = block):
                        return True, ''

        mime_type = guess_mimetype(aFile)
        viewer_cmd = get_viewer_cmd(mime_type, aFile)
        if viewer_cmd is not None:
                if gmShellAPI.run_command_in_shell(command = viewer_cmd, blocking = block):
                        return True, ''

        _log.warning("no viewer found via standard mailcap system")
        if os.name == "posix":
                _log.warning("you should add a viewer for this mime type to your mailcap file")

        _log.info("let's see what the OS can do about that")

        # does the file already have an extension ?
        (path_name, f_ext) = os.path.splitext(aFile)
        # no
        if f_ext in ['', '.tmp']:
                # try to guess one
                f_ext = guess_ext_by_mimetype(mime_type)
                if f_ext is None:
                        _log.warning("no suitable file extension found, trying anyway")
                        file_to_display = aFile
                        f_ext = '?unknown?'
                else:
                        file_to_display = aFile + f_ext
                        shutil.copyfile(aFile, file_to_display)
        # yes
        else:
                file_to_display = aFile

        file_to_display = os.path.normpath(file_to_display)
        _log.debug("file %s <type %s> (ext %s) -> file %s" % (aFile, mime_type, f_ext, file_to_display))

        try:
                os.startfile(file_to_display)
                return True, ''
        except AttributeError:
                _log.exception('os.startfile() does not exist on this platform')
        except Exception:
                _log.exception('os.startfile(%s) failed', file_to_display)

        msg = _("Unable to display the file:\n\n"
                        " [%s]\n\n"
                        "Your system does not seem to have a (working)\n"
                        "viewer registered for the file type\n"
                        " [%s]"
        ) % (file_to_display, mime_type)
        return False, msg

#-----------------------------------------------------------------------------------
def call_editor_on_file(filename=None, block=True):
        """Try to find an appropriate editor with all tricks and call it.

        block: try to detach from editor or not, None means to use mailcap default.
        """
        if not os.path.isdir(filename):
                # is the file accessible at all ?
                try:
                        open(filename).close()
                except Exception:
                        _log.exception('cannot read [%s]', filename)
                        msg = _('[%s] is not a readable file') % filename
                        return False, msg

        mime_type = guess_mimetype(filename)

        editor_cmd = get_editor_cmd(mime_type, filename)
        if editor_cmd is not None:
                if gmShellAPI.run_command_in_shell(command = editor_cmd, blocking = block):
                        return True, ''
        viewer_cmd = get_viewer_cmd(mime_type, filename)
        if viewer_cmd is not None:
                if gmShellAPI.run_command_in_shell(command = viewer_cmd, blocking = block):
                        return True, ''
        _log.warning("no editor or viewer found via standard mailcap system")

        if os.name == "posix":
                _log.warning("you should add an editor and/or viewer for this mime type to your mailcap file")

        _log.info("let's see what the OS can do about that")
        # does the file already have a useful extension ?
        (path_name, f_ext) = os.path.splitext(filename)
        if f_ext in ['', '.tmp']:
                # try to guess one
                f_ext = guess_ext_by_mimetype(mime_type)
                if f_ext is None:
                        _log.warning("no suitable file extension found, trying anyway")
                        file_to_display = filename
                        f_ext = '?unknown?'
                else:
                        file_to_display = filename + f_ext
                        shutil.copyfile(filename, file_to_display)
        else:
                file_to_display = filename

        file_to_display = os.path.normpath(file_to_display)
        _log.debug("file %s <type %s> (ext %s) -> file %s" % (filename, mime_type, f_ext, file_to_display))

        # try to detect any of the UNIX openers (will only find viewers)
        found, startfile_cmd = _get_system_startfile_cmd(filename)
        if found:
                if gmShellAPI.run_command_in_shell(command = startfile_cmd, blocking = block):
                        return True, ''

        # last resort: hand over to Python itself
        try:
                os.startfile(file_to_display)
                return True, ''
        except AttributeError:
                _log.exception('os.startfile() does not exist on this platform')
        except Exception:
                _log.exception('os.startfile(%s) failed', file_to_display)

        msg = _("Unable to edit/view the file:\n\n"
                        " [%s]\n\n"
                        "Your system does not seem to have a (working)\n"
                        "editor or viewer registered for the file type\n"
                        " [%s]"
        ) % (file_to_display, mime_type)
        return False, msg

#=======================================================================================
if __name__ == "__main__":

        if len(sys.argv) < 2:
                sys.exit()

        if sys.argv[1] != 'test':
                sys.exit()

        # for testing:
        logging.basicConfig(level = logging.DEBUG)

        filename = sys.argv[2]
        _get_system_startfile_cmd(filename)

        #--------------------------------------------------------
        def test_edit():

                mimetypes = [
                        'application/x-latex',
                        'application/x-tex',
                        'text/latex',
                        'text/tex',
                        'text/plain'
                ]

                for mimetype in mimetypes:
                        editor_cmd = get_editor_cmd(mimetype, filename)
                        if editor_cmd is not None:
                                break

                if editor_cmd is None:
                        # LaTeX code is text: also consider text *viewers*
                        # since pretty much any of them will be an editor as well
                        for mimetype in mimetypes:
                                editor_cmd = get_viewer_cmd(mimetype, filename)
                                if editor_cmd is not None:
                                        break

                if editor_cmd is None:
                        return False

                result = gmShellAPI.run_command_in_shell(command = editor_cmd, blocking = True)

                return result

        #--------------------------------------------------------
        def test_describer():
                status, desc = describe_file(filename)
                print(status)
                print(desc)

        #--------------------------------------------------------
        def test_convert_file():
                print(convert_file (
                        filename = sys.argv[2],
                        target_mime = sys.argv[3]
                ))

        #--------------------------------------------------------
        def test_join_files_as_pdf():
                print(join_files_as_pdf(files = gmTools.dir_list_files(sys.argv[2])))

        #--------------------------------------------------------
        def test_check_is_textfile():
                for fname in gmTools.dir_list_files(sys.argv[2]):
                        print(fname)
                        print(' =>', is_probably_textfile(filename = fname))

        #--------------------------------------------------------
        def test__convert_pdf_to_img():
                img_name = __convert_pdf_to_image(sys.argv[2], verbose = True, max_pages = 3)
                print(img_name)
                #print(split_multipage_image(img_name))

        #--------------------------------------------------------
        def test__split_multipage():
                print(split_multipage_image(sys.argv[2]))

        #--------------------------------------------------------
#       print(_system_startfile_cmd)
#       print(guess_mimetype(filename))
#       print(get_viewer_cmd(guess_mimetype(filename), filename))
#       print(get_editor_cmd(guess_mimetype(filename), filename))
#       print(get_editor_cmd('application/x-latex', filename))
#       print(get_editor_cmd('application/x-tex', filename))
#       print(get_editor_cmd('text/latex', filename))
#       print(get_editor_cmd('text/tex', filename))
#       print(get_editor_cmd('text/plain', filename))
        #print(get_editor_cmd('text/x-tex', filename))
        #print(guess_ext_by_mimetype(mimetype=filename))
        #call_viewer_on_file(aFile = filename, block = True)
        #call_editor_on_file(filename)
        #test_describer()
        #print(test_edit())
        #test_convert_file()
        test__convert_pdf_to_img()
        #test_join_files_as_pdf()
        #test_check_is_textfile()
        #test__split_multipage()

Functions

def adjust_extension_by_mimetype(filename: str) ‑> str

Rename file to have proper extension as per its mimetype.

Returns

Original filename if no suffix found or empty suffix found or existing suffix already correct (case insensitive).

New filename if renamed. New filename will have any old suffix removed and the new suffix appende.

Expand source code
def adjust_extension_by_mimetype(filename:str) -> str:
        """Rename file to have proper extension as per its mimetype.

        Returns:
                Original filename if no suffix found or empty suffix found or existing suffix already correct (case insensitive).

                New filename if renamed. New filename will have any old suffix removed and the new suffix appende.
        """
        mimetype = guess_mimetype(filename)
        mime_suffix = guess_ext_by_mimetype(mimetype)
        _log.debug('%s -> %s', mimetype, mime_suffix)
        if mime_suffix is None:
                return filename

        if mime_suffix.strip() == '':
                return filename

        mime_suffix = mime_suffix.lstrip('.')
        base_name_with_path, old_ext = os.path.splitext(filename)
        old_ext = old_ext.lstrip('.')
        if old_ext.casefold() == mime_suffix.casefold():
                return filename

        new_filename = '%s.%s' % (base_name_with_path, mime_suffix)
        _log.debug('[%s] -> [%s]', filename, new_filename)
        renamed = gmTools.rename_file (
                filename = filename,
                new_filename = new_filename,
                overwrite = True,
                allow_symlink = True
        )
        if renamed:
                return new_filename

        return None
def call_editor_on_file(filename=None, block=True)

Try to find an appropriate editor with all tricks and call it.

block: try to detach from editor or not, None means to use mailcap default.

Expand source code
def call_editor_on_file(filename=None, block=True):
        """Try to find an appropriate editor with all tricks and call it.

        block: try to detach from editor or not, None means to use mailcap default.
        """
        if not os.path.isdir(filename):
                # is the file accessible at all ?
                try:
                        open(filename).close()
                except Exception:
                        _log.exception('cannot read [%s]', filename)
                        msg = _('[%s] is not a readable file') % filename
                        return False, msg

        mime_type = guess_mimetype(filename)

        editor_cmd = get_editor_cmd(mime_type, filename)
        if editor_cmd is not None:
                if gmShellAPI.run_command_in_shell(command = editor_cmd, blocking = block):
                        return True, ''
        viewer_cmd = get_viewer_cmd(mime_type, filename)
        if viewer_cmd is not None:
                if gmShellAPI.run_command_in_shell(command = viewer_cmd, blocking = block):
                        return True, ''
        _log.warning("no editor or viewer found via standard mailcap system")

        if os.name == "posix":
                _log.warning("you should add an editor and/or viewer for this mime type to your mailcap file")

        _log.info("let's see what the OS can do about that")
        # does the file already have a useful extension ?
        (path_name, f_ext) = os.path.splitext(filename)
        if f_ext in ['', '.tmp']:
                # try to guess one
                f_ext = guess_ext_by_mimetype(mime_type)
                if f_ext is None:
                        _log.warning("no suitable file extension found, trying anyway")
                        file_to_display = filename
                        f_ext = '?unknown?'
                else:
                        file_to_display = filename + f_ext
                        shutil.copyfile(filename, file_to_display)
        else:
                file_to_display = filename

        file_to_display = os.path.normpath(file_to_display)
        _log.debug("file %s <type %s> (ext %s) -> file %s" % (filename, mime_type, f_ext, file_to_display))

        # try to detect any of the UNIX openers (will only find viewers)
        found, startfile_cmd = _get_system_startfile_cmd(filename)
        if found:
                if gmShellAPI.run_command_in_shell(command = startfile_cmd, blocking = block):
                        return True, ''

        # last resort: hand over to Python itself
        try:
                os.startfile(file_to_display)
                return True, ''
        except AttributeError:
                _log.exception('os.startfile() does not exist on this platform')
        except Exception:
                _log.exception('os.startfile(%s) failed', file_to_display)

        msg = _("Unable to edit/view the file:\n\n"
                        " [%s]\n\n"
                        "Your system does not seem to have a (working)\n"
                        "editor or viewer registered for the file type\n"
                        " [%s]"
        ) % (file_to_display, mime_type)
        return False, msg
def call_viewer_on_file(aFile=None, block=None)

Try to find an appropriate viewer with all tricks and call it.

block: try to detach from viewer or not, None means to use mailcap default

Expand source code
def call_viewer_on_file(aFile = None, block=None):
        """Try to find an appropriate viewer with all tricks and call it.

        block: try to detach from viewer or not, None means to use mailcap default
        """
        if not os.path.isdir(aFile):
                # is the file accessible at all ?
                try:
                        open(aFile).close()
                except Exception:
                        _log.exception('cannot read [%s]', aFile)
                        msg = _('[%s] is not a readable file') % aFile
                        return False, msg

        # try to detect any of the UNIX openers
        found, startfile_cmd = _get_system_startfile_cmd(aFile)
        if found:
                if gmShellAPI.run_command_in_shell(command = startfile_cmd, blocking = block):
                        return True, ''

        mime_type = guess_mimetype(aFile)
        viewer_cmd = get_viewer_cmd(mime_type, aFile)
        if viewer_cmd is not None:
                if gmShellAPI.run_command_in_shell(command = viewer_cmd, blocking = block):
                        return True, ''

        _log.warning("no viewer found via standard mailcap system")
        if os.name == "posix":
                _log.warning("you should add a viewer for this mime type to your mailcap file")

        _log.info("let's see what the OS can do about that")

        # does the file already have an extension ?
        (path_name, f_ext) = os.path.splitext(aFile)
        # no
        if f_ext in ['', '.tmp']:
                # try to guess one
                f_ext = guess_ext_by_mimetype(mime_type)
                if f_ext is None:
                        _log.warning("no suitable file extension found, trying anyway")
                        file_to_display = aFile
                        f_ext = '?unknown?'
                else:
                        file_to_display = aFile + f_ext
                        shutil.copyfile(aFile, file_to_display)
        # yes
        else:
                file_to_display = aFile

        file_to_display = os.path.normpath(file_to_display)
        _log.debug("file %s <type %s> (ext %s) -> file %s" % (aFile, mime_type, f_ext, file_to_display))

        try:
                os.startfile(file_to_display)
                return True, ''
        except AttributeError:
                _log.exception('os.startfile() does not exist on this platform')
        except Exception:
                _log.exception('os.startfile(%s) failed', file_to_display)

        msg = _("Unable to display the file:\n\n"
                        " [%s]\n\n"
                        "Your system does not seem to have a (working)\n"
                        "viewer registered for the file type\n"
                        " [%s]"
        ) % (file_to_display, mime_type)
        return False, msg
def convert_file(filename=None, target_mime=None, target_filename=None, target_extension=None, verbose=False, max_pages: int = 25)

Convert file from one format into another.

target_mime: a mime type

Expand source code
def convert_file(filename=None, target_mime=None, target_filename=None, target_extension=None, verbose=False, max_pages:int=25):
        """Convert file from one format into another.

                target_mime: a mime type
        """
        assert (target_mime is not None), '<target_mime> must not be None'
        assert (filename is not None), '<filename> must not be None'
        assert (filename != target_filename), '<target_filename> must be different from <filename>'

        source_mime = guess_mimetype(filename = filename)
        target_mime_parts = target_mime.rsplit('/', 1)
        if (len(target_mime_parts) == 1) or (target_mime_parts[1].strip().casefold() in ['', '*', 'any']):
                _log.debug('generic target mime type')
                target_mime = target_mime_parts[0] + '/'

        if source_mime.casefold().startswith(target_mime.casefold()):
                _log.debug('source file [%s] already target mime type [%s]', filename, target_mime)
                if target_filename is None:
                        return filename

                shutil.copyfile(filename, target_filename)
                return target_filename

        converted_ext = guess_ext_by_mimetype(target_mime)
        if converted_ext is None:
                if target_filename is not None:
                        tmp, converted_ext = os.path.splitext(target_filename)
        if converted_ext is None:
                converted_ext = target_extension                # can still stay None
        converted_ext = gmTools.coalesce(converted_ext, '').strip().lstrip('.')
        converted_fname = gmTools.get_unique_filename(suffix = converted_ext)
        _log.debug('attempting conversion: [%s] -> [<%s>:%s]', filename, target_mime, gmTools.coalesce(target_filename, converted_fname))

        # try user-local conversion script
        script_name = 'gm-convert_file'
        binary = os.path.join(gmTools.gmPaths().home_dir, 'bin', script_name)
        _log.debug('trying user-local script: %s', binary)
        _log.debug('<%s> API: SOURCEFILE TARGET_MIMETYPE TARGET_EXTENSION TARGET_FILENAME', script_name)
        found, binary = gmShellAPI.detect_external_binary(binary = binary)
        if found:
                cmd_line = [
                        binary,
                        filename,
                        target_mime,
                        converted_ext,
                        converted_fname
                ]
                success, returncode, stdout = gmShellAPI.run_process(cmd_line = cmd_line, verbose = True)
                if success:
                        if target_filename is None:
                                return converted_fname

                        shutil.copyfile(converted_fname, target_filename)
                        return target_filename

        # try built-in conversions
        _log.debug('trying built-in conversion functions')
        try:
                conversion_func = __CONVERSION_DELEGATES[source_mime][target_mime]
        except KeyError:
                conversion_func = None
        if conversion_func is not None:
                converted_fname = conversion_func(filename = filename, verbose = verbose, max_pages = max_pages)
                if converted_fname is not None:
                        if target_filename is None:
                                return converted_fname

                        shutil.copyfile(converted_fname, target_filename)
                        return target_filename

        # try system-wide conversion script
        paths = gmTools.gmPaths()
        local_script = os.path.join(paths.local_base_dir, '..', 'external-tools', script_name)
        candidates = [ script_name, local_script ]              #, script_name + u'.bat'
        _log.debug('trying system-wide scripts: %s', candidates)
        found, binary = gmShellAPI.find_first_binary(binaries = candidates)
        if not found:   # try anyway
                _log.debug('trying anyway as last-ditch resort')
                binary = script_name# + r'.bat'
        cmd_line = [
                binary,
                filename,
                target_mime,
                converted_ext,
                converted_fname
        ]
        success, returncode, stdout = gmShellAPI.run_process(cmd_line = cmd_line, verbose = True)
        if success:
                if target_filename is None:
                        return converted_fname

                shutil.copyfile(converted_fname, target_filename)
                return target_filename

        # seems to have failed but check for target file anyway
        _log.error('conversion script returned error exit code, checking target file anyway')
        if not os.path.exists(converted_fname):
                return None

        _log.info('conversion target file found')
        stats = os.stat(converted_fname)
        if stats.st_size == 0:
                return None

        _log.info('conversion target file size > 0')
        achieved_mime = guess_mimetype(filename = converted_fname)
        if not achieved_mime.casefold().startswith(target_mime.casefold()):
                _log.error('target: [%s], achieved: [%s]', target_mime, achieved_mime)
                return None

        # we may actually have something despite a non-0 exit code
        _log.info('conversion target file mime type [%s], as expected, might be usable', achieved_mime)
        if target_filename is None:
                return converted_fname

        shutil.copyfile(converted_fname, target_filename)
        return target_filename
def convert_file_to_image(filename: str = None, verbose: bool = False, max_pages: int = 10) ‑> list[str]
Expand source code
def convert_file_to_image(filename:str=None, verbose:bool=False, max_pages:int=10) -> list[str]:
        return convert_file(filename = filename, target_mime = 'image/*', verbose = verbose, max_pages = max_pages)
def convert_file_to_text(filename: str = None, verbose: bool = False) ‑> str
Expand source code
def convert_file_to_text(filename:str=None, verbose:bool=False) -> str:
        return convert_file(filename = filename, target_mime = 'text/*', verbose = verbose)
def convert_latex_to_pdf(filename: str = None, verbose: bool = False, is_sandboxed: bool = False, max_pages: int = 25) ‑> str

Compile LaTeX code to PDF using pdflatex.

Args

is_sandboxed
whether or not to create a sandbox for compiling

Returns

Name of resulting PDF, or None on failure.

Expand source code
def convert_latex_to_pdf(filename:str=None, verbose:bool=False, is_sandboxed:bool=False, max_pages:int=25) -> str:
        """Compile LaTeX code to PDF using pdflatex.

        Args:
                is_sandboxed: whether or not to create a sandbox for compiling

        Returns:
                Name of resulting PDF, or None on failure.
        """
        global __LaTeX_version_checked
        global __pdflatex_executable
        if not __LaTeX_version_checked:
                __LaTeX_version_checked = True
                found, __pdflatex_executable = gmShellAPI.detect_external_binary(binary = 'pdflatex')
                if not found:
                        _log.error('pdflatex not found')
                        return None

                cmd_line = [__pdflatex_executable, '-version']
                success, ret_code, stdout = gmShellAPI.run_process(cmd_line = cmd_line, encoding = 'utf8', verbose = True)
                if not success:
                        _log.error('[%s] failed, LaTeX not usable', cmd_line)
                        return None

        if is_sandboxed:
                sandbox_dir = os.path.split(filename)[0]
        else:
                sandbox_dir = gmTools.mk_sandbox_dir(prefix = gmTools.fname_stem(filename) + '_')
                shutil.copy(filename, sandbox_dir)
                filename = os.path.join(sandbox_dir, os.path.split(filename)[1])
        _log.debug('LaTeX sandbox directory: [%s]', sandbox_dir)
        cmd_final = [
                __pdflatex_executable,
                '-recorder',
                '-interaction=nonstopmode',
                "-output-directory=%s" % sandbox_dir
        ]
        cmd_draft = cmd_final + ['-draftmode']
        # LaTeX can need up to three runs to get cross references et al right
        for cmd2run in [cmd_draft, cmd_draft, cmd_final]:
                success, ret_code, stdout = gmShellAPI.run_process (
                        cmd_line = cmd2run + [filename],
                        acceptable_return_codes = [0],
                        encoding = 'utf8',
                        verbose = True  #_cfg.get(option = 'debug')
                )
                if not success:
                        _log.error('problem running pdflatex, cannot generate form output, trying diagnostics')
                        found, binary = gmShellAPI.find_first_binary(binaries = ['lacheck', 'miktex-lacheck.exe'])
                        if not found:
                                _log.debug('lacheck not found')
                        else:
                                cmd_line = [binary, filename]
                                success, ret_code, stdout = gmShellAPI.run_process(cmd_line = cmd_line, encoding = 'utf8', verbose = True)
                        found, binary = gmShellAPI.find_first_binary(binaries = ['chktex', 'ChkTeX.exe'])
                        if not found:
                                _log.debug('chcktex not found')
                        else:
                                cmd_line = [binary, '--verbosity=2', '--headererr', filename]
                                success, ret_code, stdout = gmShellAPI.run_process(cmd_line = cmd_line, encoding = 'utf8', verbose = True)
                        return None

        return '%s.pdf' % os.path.splitext(filename)[0]
def describe_file(filename, callback=None, cookie=None)
Expand source code
def describe_file(filename, callback=None, cookie=None):
        if callback is None:
                return __run_file_describer(filename)

        payload_kwargs = {'filename': filename, 'cookie': cookie}
        gmWorkerThread.execute_in_worker_thread (
                payload_function = __run_file_describer,
                payload_kwargs = payload_kwargs,
                completion_callback = callback
        )
def get_editor_cmd(mimetype=None, filename=None)
Expand source code
def get_editor_cmd(mimetype=None, filename=None):

        if filename is None:
                _log.error("You should specify a file name for the replacement of %s.")
                # last resort: if no file name given replace %s in original with literal '%s'
                # and hope for the best - we certainly don't want the module default "/dev/null"
                filename = """%s"""

        mailcaps = _mailcap.getcaps()
        (editor, junk) = _mailcap.findmatch(mailcaps, mimetype, key = 'edit', filename = '%s' % filename)

        # FIXME: we should check for "x-token" flags

        _log.debug("<%s> editor: [%s]" % (mimetype, editor))

        return editor
def get_viewer_cmd(aMimeType=None, aFileName=None, aToken=None)

Return command for viewer for this mime type complete with this file

Expand source code
def get_viewer_cmd(aMimeType = None, aFileName = None, aToken = None):
        """Return command for viewer for this mime type complete with this file"""

        if aFileName is None:
                _log.error("You should specify a file name for the replacement of %s.")
                # last resort: if no file name given replace %s in original with literal '%s'
                # and hope for the best - we certainly don't want the module default "/dev/null"
                aFileName = """%s"""

        mailcaps = _mailcap.getcaps()
        (viewer, junk) = _mailcap.findmatch(mailcaps, aMimeType, key = 'view', filename = '%s' % aFileName)
        # FIXME: we should check for "x-token" flags

        _log.debug("<%s> viewer: [%s]" % (aMimeType, viewer))

        return viewer
def guess_ext_by_mimetype(mimetype='')

Return file extension based on what the OS thinks a file of this mimetype should end in.

Expand source code
def guess_ext_by_mimetype(mimetype=''):
        """Return file extension based on what the OS thinks a file of this mimetype should end in."""

        # ask system first
        ext = mimetypes.guess_extension(mimetype)
        if ext is not None:
                _log.debug('<%s>: %s', mimetype, ext)
                return ext

        _log.error("<%s>: no suitable file extension known to the OS" % mimetype)
        # try to help the OS a bit
        cfg = gmCfgINI.gmCfgData()
        ext = cfg.get (
                group = 'extensions',
                option = mimetype,
                source_order = [('user-mime', 'return'), ('system-mime', 'return')]
        )
        if ext is not None:
                _log.debug('<%s>: %s', mimetype, ext)
                return ext

        _log.error("<%s>: no suitable file extension found in config files", mimetype)
        return ext
def guess_ext_for_file(aFile: str = None) ‑> str

Guesses an approprate file name extension based on mimetype.

Args

aFile
the name of an existing file
Expand source code
def guess_ext_for_file(aFile:str=None) -> str:
        """Guesses an approprate file name extension based on mimetype.

        Args:
                aFile: the name of an existing file
        """
        if aFile is None:
                return None

        (path_name, f_ext) = os.path.splitext(aFile)
        if f_ext:
                return f_ext

        mime_type = guess_mimetype(aFile)
        f_ext = guess_ext_by_mimetype(mime_type)
        if f_ext is None:
                _log.error('unable to guess file name extension for mime type [%s]' % mime_type)
                return None

        return f_ext
def guess_mimetype(filename: str = None) ‑> str

Guess mime type of arbitrary file.

Returns

Detected mimetype or 'application/octet-stream'.

Expand source code
def guess_mimetype(filename:str=None) -> str:
        """Guess mime type of arbitrary file.

        Returns:
                Detected mimetype or 'application/octet-stream'.
        """
        _log.debug('guessing mime type of [%s]', filename)
        mimetype, encoding = mimetypes.guess_type(filename)
        if mimetype not in [WORST_CASE_MIMETYPE, None]:
                _log.debug('"%s" -> <%s> (%s)', filename, mimetype, encoding)
                return mimetype

        mimetype = __guess_mimetype__pylibextractor(filename = filename)
        if mimetype:
                return mimetype

        mimetype = __guess_mimetype__file(filename = filename)
        if mimetype:
                return mimetype

        # 3) use "extract" shell level libextractor wrapper
        mimetype = __guess_mimetype__extract(filename = filename)
        if mimetype:
                return mimetype

        # If we and up here we either have an insufficient systemwide
        # magic number file or we suffer from a deficient operating system
        # alltogether. It can't get much worse if we try ourselves.
        _log.info("OS level mime detection failed, falling back to built-in magic")
        from Gnumed.pycommon import gmMimeMagic
        mimetype = gmTools.coalesce(gmMimeMagic.filedesc(filename), WORST_CASE_MIMETYPE)
        del gmMimeMagic
        _log.debug('"%s" -> <%s>' % (filename, mimetype))
        return mimetype
def is_probably_image(filename: str = None) ‑> bool

Check whether a file might be an image file by mime type.

Expand source code
def is_probably_image(filename:str=None) -> bool:
        """Check whether a file might be an image file by mime type."""
        if guess_mimetype(filename).startswith('image/'):
                return True

        return False
def is_probably_pdf(filename: str = None) ‑> bool

Check whether a file might be a PDF file by mime type.

Expand source code
def is_probably_pdf(filename:str=None) -> bool:
        """Check whether a file might be a PDF file by mime type."""
        if guess_mimetype(filename) == 'application/pdf':
                return True

        return False
def is_probably_textfile(filename: str = None) ‑> bool

Check whether a file might be a text file by mime type.

Expand source code
def is_probably_textfile(filename:str=None) -> bool:
        """Check whether a file might be a text file by mime type."""
        if guess_mimetype(filename).startswith('text/'):
                return True

        return False
def join_files_as_pdf(files: list[str] = None, pdf_name: str = None) ‑> str

Convert files to PDF and joins them into one final PDF.

Returns

Name of final PDF or None

Expand source code
def join_files_as_pdf(files:list[str]=None, pdf_name:str=None) -> str:
        """Convert files to PDF and joins them into one final PDF.

        Returns:
                Name of final PDF or None
        """
        assert (files is not None), '<files> must not be None'

        if len(files) == 0:
                return None

        sandbox = gmTools.mk_sandbox_dir()
        pdf_pages = []
        page_idx = 1
        for fname in files:
                pdf = convert_file (
                        filename = fname,
                        target_mime = 'application/pdf',
                        target_filename = gmTools.get_unique_filename(prefix = '%s-' % page_idx, suffix = '.pdf', tmp_dir = sandbox),
                        target_extension = '.pdf',
                        verbose = True
                )
                if pdf is None:
                        return None

                pdf_pages.append(pdf)
                page_idx += 1

        if pdf_name is None:
                pdf_name = gmTools.get_unique_filename(suffix = '.pdf')
        cmd_line = ['pdfunite']
        cmd_line.extend(pdf_pages)
        cmd_line.append(pdf_name)
        success, returncode, stdout = gmShellAPI.run_process(cmd_line = cmd_line, verbose = True)
        if not success:
                _log.debug('cannot join files into one PDF')
                return None

        return pdf_name
def split_multipage_image(filename: str = None) ‑> list[str]
Expand source code
def split_multipage_image(filename:str=None) -> list[str]:
        sandbox = gmTools.mk_sandbox_dir()
        cmd_line = [
                'convert',
                '-verbose',
                filename,
                os.path.join(sandbox, '%s.%%d' % gmTools.fname_from_path(filename))
        ]
        success, returncode, stdout = gmShellAPI.run_process(cmd_line = cmd_line, verbose = True)
        if not success:
                return []

        fname_stem = gmTools.fname_stem(filename)
        items = os.listdir(sandbox)
        image_pages = []
        for item in items:
                if not item.startswith(fname_stem):
                        continue
                image_pages.append(os.path.join(sandbox, item))
        return sorted(image_pages)