#!/usr/local/bin/python3.11
# vim: ts=4 sw=4 expandtab :

# Copyright 2010 Javier Ruere
# Copyright 2011 Christian Theune <ct@gocept.com>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.


import copy
from itertools import izip
import logging
from optparse import OptionParser
import os
import tempfile


__version__ = '0.1.2'


LOGGER = logging.getLogger('merge-rrds')

# The best ElementTree implementation available.
ET = None


def import_element_tree():
    """Returns the best implementation of ElementTree it can find."""
    try:
        from xml.etree import cElementTree
        LOGGER.info('Using C ET implementation.')
        return cElementTree
    except ImportError:
        pass

    try:
        # No C code.
        from xml.etree import ElementTree
        LOGGER.info('Using Python ET implementation.')
        return ElementTree
    except ImportError:
        pass

    try:
        # Python < 2.4.
        from elementtree import ElementTree

        try:
            # Just check if the lib is available.
            import xml.parsers.expat
            LOGGER.info('Using Python 3rd party ET implementation.')
        except ImportError:
            # No expat, now things will get really slow.
            from elementtree import SimpleXMLTreeBuilder
            ElementTree.XMLTreeBuilder = SimpleXMLTreeBuilder.TreeBuilder
            LOGGER.warn('Using very slow XML parser! You might want to check '
                    'your setup.')

        return ElementTree
    except ImportError:
        LOGGER.error('Failed to find an ElementTree implementation. '
                'Please install one.')
    return None


def compare_element_texts(el1, el2, expressions, errmsg=''):
    for expr in expressions:
        if el1.findtext(expr) != el2.findtext(expr):
            LOGGER.error('%s "%s" differs.', errmsg, expr)
            return False
    return True

RRA_COMPATIBILITY_PROPERTIES = ( './cf', './pdp_per_row' )
def are_rra_compatible(n_rra, o_rra):
    """Checks for compatibility at the RRA level."""
    return compare_element_texts(n_rra, o_rra, RRA_COMPATIBILITY_PROPERTIES,
            'RRAs are not compatible.')

RRD_COMPATIBILITY_PROPERTIES = ( './version', './step' )
def check_compatibility(et1, et2):
    """Checks for compatibility at the RRD level."""
    msg = 'RRDs are not compatible.'
    if not compare_element_texts(et1, et2, RRD_COMPATIBILITY_PROPERTIES, msg):
        raise AssertionError(msg)

def generate_rrd(et, rrd_or_xml_fn):
    write_xml = rrd_or_xml_fn.lower().endswith('.xml')

    if write_xml:
        xml = open(rrd_or_xml_fn, 'w')
    else:
        xml = tempfile.NamedTemporaryFile()
    try:
        # XXX There must be a way to do this correctly.
        xml.write('<?xml version="1.0" encoding="utf-8"?>\n')
        et.write(xml, encoding="utf-8")

        if write_xml:
            ret = 0 # Already finished!
        else:
            xml.flush()
            ret = os.system("rrdtool restore -f -r '%s' '%s'"
                    % (xml.name.replace("'", r"\'"),
                        rrd_or_xml_fn.replace("'", r"\'")))
    finally:
        xml.close()
    if ret:
        LOGGER.critical("Failed to create merged rrd '%s'. (%d)", rrd_or_xml_fn,
                ret)
    else:
        LOGGER.info("Created merged rrd '%s'.", rrd_or_xml_fn)

def create_empty_row(rra_et):
    field_count = len(rra_et.find('./database/row').findall('v'))
    assert field_count > 0, "Rows with no elements don't make sense."
    row_et = ET.Element('row')
    for _ in range(field_count):
        v_et = ET.SubElement(row_et, 'v')
        v_et.text = 'NaN'
    return row_et

def database2dict(row_elems, total, lastupdate, step, sparse=False):
    """Create a dict with the non NaN data from database."""
    times = xrange(lastupdate - total * step, lastupdate + step, step)
    data = {}
    for time, row_elem in izip(times, row_elems):
        for value_elem in row_elem:
            # This is useful for sparse files (which is my case).
            # XXX Could be avoided for mostly full files for a speedup.
            if 'NaN' not in value_elem.text:
                data[time] = row_elem
                break
    return data

def merge_rrd_xml(old_et, new_et):
    log = logging.getLogger('merge-rrds.merge_rrd_xml')

    # Get some parameters to calculate the time of the rows.
    step_elem = new_et.find('./step')
    step = int(step_elem.text)
    n_lastupdate_elem = new_et.find('./lastupdate')
    n_lastupdate = int(n_lastupdate_elem.text)
    assert step == int(old_et.find('./step').text)
    o_lastupdate = int(old_et.find('./lastupdate').text)
    assert o_lastupdate < n_lastupdate
    new_timeframe = n_lastupdate - o_lastupdate

    # Create the merged tree and add the configuration.
    root = ET.Element('rrd')
    version_elem = ET.SubElement(root, 'version')
    version_elem.text = '0003' # This version is generated.
    root.append(copy.deepcopy(step_elem))
    root.append(copy.deepcopy(n_lastupdate_elem))
    n_dses = new_et.findall('./ds')
    assert len(n_dses) == len(old_et.findall('./ds')), \
            "RRDs have different number of DSes."
    if [ ds.findtext('./name') for ds in n_dses ] \
            != [ n.text for n in old_et.findall('./ds/name') ]:
        log.warn("RRDs have DSes with different names")
    for n_ds in n_dses:
        root.append(copy.deepcopy(n_ds))

    for n_rra, o_rra in izip(new_et.findall('./rra'), old_et.findall('./rra')):
        if not are_rra_compatible(n_rra, o_rra):
            raise AssertionError('Incompatible RRAs.')
        n_rows = n_rra.findall('./database/row')
        o_rows = o_rra.findall('./database/row')
        assert len(n_rows) == len(o_rows)

        # Compute the cut-off-point between the old and the new RRA
        pdp_per_row = int(n_rra.find('./pdp_per_row').text)
        keep_new_rows = new_timeframe / (pdp_per_row*step)
        old_rows = len(n_rows) - keep_new_rows
        old_rows = max(old_rows, 0)

        # Create the merged RRA and add the configuration.
        m_rra = ET.SubElement(root, 'rra')
        m_rra.append(copy.deepcopy(n_rra.find('./cf')))
        m_rra.append(copy.deepcopy(n_rra.find('./pdp_per_row')))
        m_rra.append(copy.deepcopy(n_rra.find('./params')))
        m_rra.append(copy.deepcopy(n_rra.find('./cdp_prep')))

        if old_rows:
            merged_rows = o_rows[-old_rows:]+ n_rows[old_rows:]
        else:
            merged_rows = n_rows
        assert len(merged_rows) == len(n_rows)
        db_elem = ET.SubElement(m_rra, 'database')
        for x in map(copy.deepcopy, merged_rows):
            db_elem.append(x)

    return ET.ElementTree(root)

def get_xml(*rrd_or_xml_fns):
    """Returns an ElementTree of the dump of each given filename.

    Using the extension, the file type is inferred. If the file is an XML,
    it is read directly. If it's an RRD, it's dumped and then read.
    """
    xml_streams = []
    for rrd_or_xml_fn in rrd_or_xml_fns:
        if rrd_or_xml_fn.lower().endswith('.xml'):
            xml_streams.append(open(rrd_or_xml_fn))
        else:
            if not rrd_or_xml_fn.lower().endswith('.rrd'):
                LOGGER.warn('Unknown file type. Guessing it is an RRD.')

            xml_streams.append(os.popen(
                "rrdtool dump '%s'" % (rrd_or_xml_fn.replace("'", r"\'"))))

    return [ ET.parse(xml_stream) for xml_stream in xml_streams ]

def get_configuration():
    parser = OptionParser(version="%prog v" + __version__,
            usage="%prog <old rrd> <new rrd> <merged rrd>")
    parser.add_option("-v", "--verbose", action="count", default=0,
                      help="produce more output")
    options, args = parser.parse_args()
    if len(args) != 3:
        parser.error("Wrong number of arguments.")

    return [ options ] + args

def main():
    options, old_fn, new_fn, merged_fn = get_configuration()

    logging.basicConfig()
    LOGGER.setLevel(max(logging.DEBUG, logging.WARNING - options.verbose * 10))

    global ET
    ET = import_element_tree()
    if ET is None:
        return 1

    LOGGER.info("Merging '%s' and '%s', where the latter has precedence, into "
            "'%s'.", old_fn, new_fn, merged_fn)

    old_et, new_et = get_xml(old_fn, new_fn)

    check_compatibility(old_et, new_et)

    merged_et = merge_rrd_xml(old_et, new_et)

    generate_rrd(merged_et, merged_fn)

    return 0


if __name__ == '__main__':
    import sys
    sys.exit(main())

