demo of stats analysis | Python Fiddle

#! /usr/bin/env python
# -*- coding: utf-8 -*-
import re
import collections

def collect_ICStatusCheck_stats():
    stats = []
    with open('/home/tigeral/Desktop/ICStatusCheck') as f:
        for line in f:
            search_result = re.search('name="([^"]+)"', line)
            if search_result:
                path = search_result.group(1)
                splitted_path = path.split('/')

if len(splitted_path) == 1:
                    stat_type = 'simple'
                    name = path
                    stats.append((name, stat_type))
                elif splitted_path[-1] in ['count', 'last', 'max', 'online', 'offline', 'newoffline', 'recentoffline',
                                           'percentUsed', 'usagepercent', 'remaining', 'uptime', 'mediaTypeCount',
                                           'mediastat_typeCount', 'queuesize', 'suspended', 'terminated', 'new',
                                           'active', 'install']:
                    stat_type = splitted_path[-1]
                    name = path[:path.rfind('/')]
                    stats.append((name, stat_type))
                else:
                    stat_type = 'bucket'
                    is_avedur = 'avedur' == splitted_path[-1]
                    name = '/'.join(splitted_path[:-3 if is_avedur else -2])
                    duration = splitted_path[-3 if is_avedur else -2]
                    bucket = splitted_path[-2 if is_avedur else -1]
                    stats.append((name, stat_type, duration, bucket, is_avedur))
                # else:
                #     print('parsing error. {}'.format(path))
    return stats

def collect_stats_jsp_stats():
    stats = []
    name = None
    stat_type = None
    duration = -1
    bucket = None
    with open('/home/tigeral/Desktop/stats') as f:
        prev_line = '\n'
        for line in f:
            if prev_line == '\n':
                # this line is a stat block header
                search_result = re.search('^([^:]+): per (.+) now offset $secs$ \tcount \tdur $millis$ \tave dur $millis$$', line)
                if search_result:
                    name = search_result.group(1)
                    stat_type = 'bucket'
                    frame_name = search_result.group(2)
                    if frame_name == 'min':
                        duration = 60
                    elif frame_name == 'ten min':
                        duration = 600
                    elif frame_name == 'hour':
                        duration = 3600
                    elif frame_name == '4 hour':
                        duration = 14400
                else:
                    print('Wrong stat header line')
            elif line != '\n':
                splitted_values = line.split(' 	')
                time_offset = int(splitted_values[0])
                bucket = int(time_offset / duration)
                stats.append((name, stat_type, str(duration), str(bucket), False))
                stats.append((name, stat_type, str(duration), str(bucket), True))
            prev_line = line
    return stats

def collect_xmppstats_jsp_stats():
    stats = []
    name = None
    stat_type = None
    duration = -1
    bucket = None
    with open('/home/tigeral/Desktop/xmppstats') as f:
        prev_line = '\n'
        for line in f:
            if prev_line == '\n':
                # this line is a stat block header
                search_result = re.search('^([^:]+): per (.+) now offset $secs$ \tcount \tdur $millis$ \tave dur $millis$$', line)
                if search_result:
                    name = search_result.group(1)
                    stat_type = 'bucket'
                    time_frame_name = search_result.group(2)
                    if time_frame_name == 'sec':
                        duration = 1
                    elif time_frame_name == 'min':
                        duration = 60
                    elif time_frame_name == 'ten min':
                        duration = 600
                    elif time_frame_name == 'hour':
                        duration = 3600
                    elif time_frame_name == '4 hour':
                        duration = 14400
                    else:
                        print('Unknown time frame name name "{}"'.format(time_frame_name))
                else:
                    print('Wrong stat header line')
            elif line != '\n':
                splitted_values = line.split(' 	')
                time_offset = int(splitted_values[0])
                bucket = int(time_offset / duration)
                stats.append((name, stat_type, str(duration), str(bucket), False))
                stats.append((name, stat_type, str(duration), str(bucket), True))
            prev_line = line
    return stats

def _compose_stat_comparision_key(stat):
    result = ''
    for elem in stat:
        if isinstance(elem, str) and re.match('^[0-9]+$', elem):
            result += str.zfill(elem, 6)
        else:
            result += str(elem)
    return result

def is_stats_equal(stat1, stat2):
    if len(stat1) != len(stat2):
        return False
    for i in range(0, len(stat1)):
        if stat1[i] != stat2[i]:
            return False
    return True

def list_stats(stats):
    for stat in sorted(stats, key=_compose_stat_comparision_key):
        print(stat)

def list_stat_names(stats):
    stat_names = set([stat[0] for stat in stats])
    for stat_name in sorted(stat_names):
        print(stat_name)

def list_common_stat_names(stats_1, stats_2):
    stat_1_names = set([stat[0] for stat in stats_1])
    common_names = set([stat[0] for stat in stats_2 if stat[0] in stat_1_names])
    for stat_name in sorted(common_names):
        print(stat_name)

def calculate_buckets_count(stats):
    # Calculate number of buckets grouped by stat_name and bucket_duration complex key.
    # avedur stats and non 'bucket' stat type are ignored.
    counter = collections.Counter((stat[0], stat[2]) for stat in stats if stat[1] == 'bucket' and stat[4] == False)
    # group stats by bucket_duration and bucket_count
    buckets = collections.defaultdict(list)
    for bucket_key, buckets_count in counter.items():
        buckets[(bucket_key[1], buckets_count)].append(bucket_key[0])
    # print all bucket_duration x buckets_count combinations and show list of stats which uses such buckets combination
    for bucket in sorted(buckets.items(), key=lambda item: str.zfill(item[0][0], 6) + str.zfill(str(item[0][1]), 6)):
        print(bucket)

if __name__ == '__main__':
    # list_stats(collect_ICStatusCheck_stats())
    # list_stats(collect_stats_jsp_stats())
    # list_stats(collect_xmppstats_jsp_stats())
    # list_stat_names(collect_ICStatusCheck_stats())
    # list_stat_names(collect_stats_jsp_stats())
    # list_stat_names(collect_xmppstats_jsp_stats())
    # list_common_stat_names(collect_ICStatusCheck_stats(), collect_stats_jsp_stats())
    # list_common_stat_names(collect_ICStatusCheck_stats(), collect_xmppstats_jsp_stats())
    calculate_buckets_count(collect_ICStatusCheck_stats())
    calculate_buckets_count(collect_stats_jsp_stats())
    calculate_buckets_count(collect_xmppstats_jsp_stats())

Python Fiddle

Python Cloud IDE