Source code for ptp.tools.arachni.parser

"""

:synopsis: Specialized :class:`ptp.libptp.parser.AbstractParser` classes for the tool Arachni.

.. moduleauthor:: Tao Sauvage

"""

import re

from lxml.etree import XMLSyntaxError

from ptp.libptp import constants
from ptp.libptp.exceptions import NotSupportedVersionError
from ptp.libptp.parser import XMLParser, JSONParser


[docs]class ArachniXMLParser(XMLParser):
    """Arachni XML specialized parser."""

    __tool__ = 'arachni'
    __format__ = 'xml'
    __version__ = (
        r'(^0\.4\.[0-9]+$)|'
        r'(^1\.[0-9]+(\.[0-9]+)?$)')

    HIGH = 'high'
    MEDIUM = 'medium'
    LOW = 'low'
    INFO = 'informational'

    RANKING_SCALE = {
        HIGH: constants.HIGH,
        MEDIUM: constants.MEDIUM,
        LOW: constants.LOW,
        INFO: constants.INFO}

[docs]    @classmethod
    def is_mine(cls, pathname, filename='*.xml', light=False, first=True):
        """Check if it can handle the report file.

        :param str pathname: Path to the report directory.
        :param str filename: Regex matching the report file.
        :param bool light: `True` to only parse the ranking of the findings from the report.
        :param bool first: Only process first file (``True``) or each file that matched (``False``).

        :raises IOError: when the report file cannot be found.
        :raises OSError: when the report file cannot be found.

        :return: `True` if it supports the report, `False` otherwise.
        :rtype: :class:`bool`

        """
        try:
            stream = cls.handle_file(pathname, filename, first=first)
        except (TypeError, XMLSyntaxError):
            return False
        version = stream.find('.//version')
        if version is None:
            return False
        if not re.findall(cls.__version__, version.text, re.IGNORECASE):
            return False
        return True

[docs]    def parse_metadata(self):
        """Parse the metadata of the report.

        :raises: :class:`NotSupportedVersionError` -- if it does not support the version of this report.

        :return: The metadata of the report.
        :rtype: dict

        """
        # Find the version of Arachni.
        version = self.stream.find('.//version')
        # Reconstruct the metadata
        # TODO: Retrieve the other metadata likes the date, etc.
        self.metadata = {version.tag: version.text}
        if not self.check_version(self.metadata):
            raise NotSupportedVersionError('PTP does NOT support this version of Arachni.')
        return self.metadata

[docs]    def _parse_report_full(self, tree):
        """Parse Arachni XML reports to extract additional information.

        Arachni HTTP traffic is divided into following fields:
            * request
            * response status code
            * response headers
            * response body

        :return: List of dicts where each entry is the HTTP traffic generated for the issue.
        :rtype: :class:`list`

        """
        data = []
        for i in range(len(tree)):
            t_req = tree[i][1]
            t_res = tree[i][2]
            temp = t_req.find('.//body').text
            temp_body = '' if temp is None else temp
            # Somehow follow naming conventions from http://docs.python-requests.org/en/master/
            data.append({
                'request': t_req.find('.//raw').text + temp_body + '\n',
                'status_code': t_res.find('.//code').text,
                'headers': t_res.find('.//raw_headers').text.strip(),
                'body': t_res.find('.//body').text.strip()
            })
        return data

[docs]    def parse_report(self):
        """Parse the results of the report.

        :return: List of dicts where each one represents a discovery.
        :rtype: :class:`list`

        """
        self.vulns = [
            {'ranking': self.RANKING_SCALE[vuln.find('.//severity').text.lower()]}
            for vuln in self.stream.find('.//issues')]
        if not self.light:
            temp = []
            for record in self.stream.xpath('//variations//variation//referring_page'):
                temp.append(record.getchildren())
            self.vulns.append({'ranking': constants.UNKNOWN, 'transactions': self._parse_report_full(temp)})
        return self.vulns


[docs]class ArachniJSONParser(JSONParser):
    """Arachni XML specialized parser."""

    __tool__ = 'arachni'
    __format__ = 'xml'
    __version__ = r'(^1\.[0-9]+(\.[0-9]+)?$)'

    HIGH = 'high'
    MEDIUM = 'medium'
    LOW = 'low'
    INFO = 'informational'

    RANKING_SCALE = {
        HIGH: constants.HIGH,
        MEDIUM: constants.MEDIUM,
        LOW: constants.LOW,
        INFO: constants.INFO}

[docs]    @classmethod
    def is_mine(cls, pathname, filename='*.json', light=False, first=True):
        """Check if it can handle the report file.

        :param str pathname: Path to the report directory.
        :param str filename: Regex matching the report file.
        :param bool light: `True` to only parse the ranking of the findings from the report.
        :param bool first: Only process first file (``True``) or each file that matched (``False``).

        :return: `True` if it supports the report, `False` otherwise.
        :rtype: :class:`bool`

        """
        try:
            stream = cls.handle_file(pathname, filename, first=first)
        except (TypeError, ValueError):
            return False
        if 'version' in stream:
            version = stream['version']
        else:
            return False
        if not re.findall(cls.__version__, version, re.IGNORECASE):
            return False
        return True

[docs]    def parse_metadata(self):
        """Parse the metadata of the report.

        :raises: :class:`NotSupportedVersionError` -- if it does not support the version of this report.
        :return: The metadata of the report.
        :rtype: dict

        """
        # Find the version of Arachni.
        version = self.stream['version']
        # Reconstruct the metadata
        # TODO: Retrieve the other metadata likes the date, etc.
        self.metadata = {'version': version}
        if not self.check_version(self.metadata):
            raise NotSupportedVersionError('PTP does NOT support this version of Arachni.')
        return self.metadata

[docs]    def _parse_report_full(self, issues):
        """Parse Arachni JSON reports to extract additional information.

        Arachni HTTP traffic is divided into following fields:
            * request
            * response status code
            * response headers
            * response body

        :return: List of dicts where each entry is the HTTP traffic generated for the issue.
        :rtype: :class:`list`

        """
        data = []
        for issue in issues:
            for variation in issue['variations']:
                temp_body = '' if variation['request']['body'] is None else variation['request']['body']
                # Somehow follow naming conventions from http://docs.python-requests.org/en/master/
                data.append({
                    'request': variation['request']['headers_string'] + temp_body + '\n',
                    'status_code': variation['response']['code'],
                    'header': variation['response']['headers_string'],
                    'body': variation['response']['body']
                })
        return data

[docs]    def parse_report(self):
        """Parse the results of the report.

        :return: List of dicts where each one represents a discovery.
        :rtype: :class:`list`

        """
        self.vulns = [{'ranking': self.RANKING_SCALE[vuln['severity'].lower()]} for vuln in self.stream['issues']]
        if not self.light:
            self.vulns.append({'ranking': constants.UNKNOWN, 'transactions': self._parse_report_full(self.stream['issues'])})
        return self.vulns