"""
:synopsis: Define the basic :mod:`parser` classes that will parse the data from the report file.
.. moduleauthor:: Tao Sauvage
"""
import os
import re
import fnmatch
import json
from lxml import etree
[docs]class AbstractParser(object):
"""Abstract representation of a parser.
.. note::
This class will be extended for each pentesting tool. That way, each tool will add its own parsing
specificities.
"""
#: :class:`str` -- Name of the tool.
__tool__ = None
#: :class:`str` -- Format of reports it supports.
__format__ = None
#: :class:`list` -- Versions it can supports.
__version__ = ''
[docs] def __init__(self, pathname='./', filename='*', light=False, first=True):
"""Initialize :class:`AbstractParser`.
:param str pathname: Path to the report directory.
:param str filename: Regex matching the report file.
:param bool light: `True` to only parse the ranking of the findings from the report.
:param bool first: Only process first file (``True``) or each file that matched (``False``).
"""
#: :class:`str` -- Path to the report directory.
self.pathname = pathname
#: :class:`type` -- i/o stream of the report.
self.stream = self.handle_file(pathname, filename, first=first)
#: :class:`list` -- List of dict of the results found in the report.
self.vulns = []
#: :class:`dict` -- Dict of the metadata found in the report.
self.metadata = {}
#: :class:`bool` -- Should fully parse the report or not.
self.light = light
[docs] @staticmethod
def _recursive_find(pathname='./', file_regex='*', first=True):
"""Retrieve the full path to the report file(s).
:param str pathname: The root directory where to start searching.
:param re file_regex: The regex that will be matched against all files from within `pathname`.
:param bool first: Stop the search as soon as a file has been matched.
:return: A list of path to the matched files that have been found.
:rtype: list
.. note::
The search occurs starting from `pathname` as the root directory.
"""
founds = []
for base, _, files in os.walk(pathname):
try:
is_str = isinstance(files, basestring) # Python 2.x.
except NameError:
is_str = isinstance(files, str) # Python 3.x.
if is_str and fnmatch.fnmatch(files, file_regex): # Only one file has been found (str).
founds.append(os.path.join(base, files))
else: # Several files have been found (list).
matched_files = fnmatch.filter(files, file_regex)
founds.extend(os.path.join(base, matched_file) for matched_file in matched_files)
if founds and first:
founds = [founds[0]]
break
return founds
[docs] @classmethod
def handle_file(cls, pathname='./', filename='*', first=True):
"""Process the report file.
:param str pathname: Path to the report directory.
:param str filename: Regex matching the report file.
:param bool first: Stop the search as soon as a file has been matched.
:raises: :class:`NotImplementedError` because this is an abstract method.
"""
raise NotImplementedError('`handle_file` function MUST be defined for each parser.')
[docs] @classmethod
def is_mine(cls):
"""Check if it can handle the report file.
:raises: :class:`NotImplementedError` because this is an abstract method.
"""
raise NotImplementedError('`is_mine` function MUST be defined for each parser.')
[docs] @classmethod
def check_version(cls, metadata, key='version'):
"""Checks the version in the metadata against the supported one(s).
:param dict metadata: The metadata in which to find the version.
:param str key: The :attr:`metadata` key containing the version value.
:return: `True` if it can parse the report, `False` otherwise.
:rtype: bool
"""
try:
# Is there a non-empty result from re.findall?
if list(filter(None, re.findall(cls.__version__, metadata[key], re.IGNORECASE))):
return True
except KeyError:
pass
return False
[docs] def parse_report(self):
"""Parse the results of a report file.
:raises: NotImplementedError because this is an abstract method.
"""
raise NotImplementedError('`parse_report` function MUST be defined for each parser.')
[docs]class XMLParser(AbstractParser):
"""Specialized parser for XML files.
Define the special :func:`handle_file` function in order to process the XML report file.
"""
#: str -- XMLParser only supports XML files.
__format__ = 'xml'
[docs] def __init__(self, pathname='./', filename='*.xml', **kwargs):
"""Initialize :class:`XMLParser`."""
AbstractParser.__init__(self, pathname, filename, **kwargs)
[docs] @classmethod
def handle_file(cls, pathname='./', filename='*.xml', first=True):
"""Return the root node of the XML file.
:param str pathname: Path to the report directory.
:param str filename: Regex matching the report file.
:param bool first: Stop the search as soon as a file has been matched.
:raises IOError: when the report file cannot be found.
:raises TypeError: when the report file has not the right extension.
:raises lxml.etree.XMLSyntaxError: when Lxml cannot parse the XML file.
:return: handle on the root node element from the XML file.
:rtype: :class:`lxml.etree._Element`
"""
fullpath = cls._recursive_find(pathname, filename, first=first)
if not len(fullpath):
raise IOError("Report matching '%s' cannot be found." % os.path.join(pathname, filename))
fullpath = fullpath[0]
if not fullpath.endswith(cls.__format__):
raise TypeError("This parser only supports '%s' files" % cls.__format__)
return etree.parse(fullpath).getroot()
[docs]class FileParser(AbstractParser):
"""Specialized parser for generic report file(s).
Define the special :func:`handle_file` function in order to process the generic report file.
"""
[docs] @classmethod
def handle_file(cls, pathname='./', filename='*', first=True):
"""Return a string of the content of the file.
:param str pathname: Path to the report directory.
:param str filename: Regex matching the report file.
:param bool first: Stop the search as soon as a file has been matched.
:raises IOError: when the report file cannot be found or an error occurs when opening/reading.
:raises OSError: when an error occurs when opening/reading the report file.
:return: data from the file.
:rtype: str
"""
fullpath = cls._recursive_find(pathname, filename, first=first)
if not len(fullpath):
raise IOError("Report matching '%s' cannot be found." % filename)
fullpath = fullpath[0]
data = ''
with open(fullpath, 'r') as f:
data = f.read()
return data
[docs]class LineParser(AbstractParser):
"""Specialized parser for generic report files.
Define the special :func:`handle_file` function in order to process the generic report file.
.. note::
Contrary to :class:`FileParser`, this class reads the file line by line and return a :class:`list`, instead of
a :class:`str`.
"""
[docs] @classmethod
def handle_file(cls, pathname='./', filename='*', skip_empty=True, first=True):
"""Return a list of the lines of the file.
:param str pathname: Path to the report directory.
:param str filename: Regex matching the report file.
:param bool first: Stop the search as soon as a file has been matched.
:param bool skip_empty: skip the empty lines that can occur in the file if `True`, otherwise keep them.
:raises IOError: when the report file cannot be found or an error occurs when opening/reading.
:raises OSError: when an error occurs when opening/reading the report file.
:return: all the data from the file, line by line.
:rtype: list
"""
fullpath = cls._recursive_find(pathname, filename, first=first)
if not len(fullpath):
raise IOError("Report matching '%s' cannot be found." % filename)
data = []
for current_file in fullpath:
with open(current_file, 'r') as f:
if skip_empty:
data.extend([line.rstrip('\n\r') for line in f.readlines() if line.rstrip('\n\r')])
else:
data.extend([line.rstrip('\n\r') for line in f.readlines()])
return data
class JSONParser(AbstractParser):
"""Specialized parser for JSON files.
Define the special :func:`handle_file` function in order to process the JSON report file.
"""
#: str -- JSONParser only supports json files.
__format__ = 'json'
@classmethod
def handle_file(cls, pathname='./', filename='*.json', first=True):
"""Return the dict of the JSON file.
:param str pathname: Path to the report directory.
:param str filename: Regex matching the report file.
:param bool first: Stop the search as soon as a file has been matched.
:raises IOError: when the report file cannot be found.
:raises TypeError: when the report file has not the right extension.
:raises ValueError: when json cannot parse the JSON file.
"""
fullpath = cls._recursive_find(pathname, filename, first=first)
if not len(fullpath):
raise IOError("Report matching '%s' cannot be found." % filename)
fullpath = fullpath[0]
if not fullpath.endswith(cls.__format__):
raise TypeError("This parser only supports '%s' files" % cls.__format__)
with open(fullpath, 'r') as data:
return json.load(data)