process_middleware_timers_experiment_gets.py

"""
     ASL project - fall 2017

        author: Jovan Nikolic

        Processes logs generated by middleware
"""
import numpy as np
import csv
import math


class TimerStruct:
    def __init__(self):
        self.worker_id = -1
        self.command = "none"
        self.number_of_keys = -1
        self.queue_size = -1
        self.request_received_time = -1
        self.put_in_queue_time = -1
        self.taken_out_of_queue_time = -1
        self.sent_to_server_time = -1
        self.received_complete_response_time = -1
        self.response_sent_to_client_time = -1
        self.dump_to_disk_flag = -1

path_base_gets = "data/experiment_gets/middleware"
agg_path_base = "aggregated_data/experiment_gets/"
plot_base_path = "plots/experiment_gets/"
client_threads_basename = "clientThreads_"
worker_threads_basename = "_workerThreads_"
counters_basename = "counter_"
timers_basename = "timers_"

number_of_middlewares = 2
repetitions = 3
step = 1e9

command_type = "_S0-G10"
cpt = 2
wt = 64
num_keys = [1, 3, 6, 9]
suffixes = ["sharded", "nonsharded"]
metrics = ["mean", "std"]

all_data = {}
buckets = []


def create_buckets():
    b = 0
    bucket_step = 0.1
    while True:
        b += bucket_step
        if round(b, 2) >= 15:
            break
        buckets.append(round(b, 2))


def get_bucket(original_time):
    rounded_time = round(original_time, 1)
    if rounded_time - original_time >= 0:
        return rounded_time
    else:
        return round(rounded_time + 0.1, 1)


def read_experiments():
    for suffix in suffixes:
        all_data[suffix] = {}
        for keys in num_keys:
            all_data[suffix][keys] = {}
            for rep in range(repetitions):
                print("Read " + suffix + " keys = " + str(keys) + " rep = " + str(rep))
                all_data[suffix][keys][rep] = []
                current_rep = rep + 1

                for mw in range(1, number_of_middlewares + 1):
                    for worker in range(wt):

                        path = path_base_gets + str(mw) + "/" + "clientThreads_" + str(cpt) + \
                               "_workerThreads_" + str(wt) + command_type + "_rep" + str(current_rep) + \
                               "_" + suffix + "_keys" + str(keys) + "/logs/timers_" + str(worker) + ".log"

                        with open(path, 'r') as timer_file:
                            timer_data = timer_file.readlines()
                            timer_data = [x.strip() for x in timer_data]
                            for k, line in enumerate(timer_data):
                                if k == 0:
                                    continue
                                parsed_line = line.split(',')
                                [x.strip() for x in parsed_line]
                                ts = TimerStruct()
                                ts.worker_id = int(parsed_line[0])
                                ts.command = parsed_line[1]
                                ts.number_of_keys = int(parsed_line[2])
                                ts.queue_size = int(parsed_line[3])
                                ts.request_received_time = int(parsed_line[4])
                                ts.put_in_queue_time = int(parsed_line[5])
                                ts.taken_out_of_queue_time = int(parsed_line[6])
                                ts.sent_to_server_time = int(parsed_line[7])
                                ts.received_complete_response_time = int(parsed_line[8])
                                ts.response_sent_to_client_time = int(parsed_line[9])
                                ts.dump_to_disk_flag = int(parsed_line[10])

                                all_data[suffix][keys][rep].append(ts)


def process_for_histograms():
    hist_data_respt = {}
    hist_data_sst = {}
    hist_data_wpt = {}
    for suffix in suffixes:
        hist_data_respt[suffix] = {}
        hist_data_sst[suffix] = {}
        hist_data_wpt[suffix] = {}
        for keys in num_keys:
            hist_data_respt[suffix][keys] = {}
            hist_data_sst[suffix][keys] = {}
            hist_data_wpt[suffix][keys] = {}
            seen_requests = 0
            out_of_bucket_requests = 0
            for rep in range(repetitions):
                hist_data_respt[suffix][keys][rep] = {}
                hist_data_sst[suffix][keys][rep] = {}
                hist_data_wpt[suffix][keys][rep] = {}

                for bucket in buckets:
                    hist_data_respt[suffix][keys][rep][bucket] = 0
                    hist_data_sst[suffix][keys][rep][bucket] = 0
                    hist_data_wpt[suffix][keys][rep][bucket] = 0

                for ts in all_data[suffix][keys][rep]:
                    if ts.command == 'SET':
                        continue
                    if ts.number_of_keys != keys:
                        print("~~~")
                        continue
                    seen_requests += 1
                    res_time = abs(ts.response_sent_to_client_time - ts.request_received_time) / 1e6
                    ss_time = abs(ts.received_complete_response_time - ts.sent_to_server_time) / 1e6
                    wp_time = abs(ts.response_sent_to_client_time - ts.received_complete_response_time) / 1e6
                    if res_time > 7:
                        print("Flag = " + str(ts.dump_to_disk_flag) + " for response time = " + str(res_time))
                    b = get_bucket(res_time)
                    if b not in hist_data_respt[suffix][keys][rep]:
                        out_of_bucket_requests += 1
                    else:
                        hist_data_respt[suffix][keys][rep][b] += 1

                    b = get_bucket(ss_time)
                    if b in hist_data_sst[suffix][keys][rep]:
                        hist_data_sst[suffix][keys][rep][b] += 1

                    b = get_bucket(wp_time)
                    if b in hist_data_wpt[suffix][keys][rep]:
                        hist_data_wpt[suffix][keys][rep][b] += 1

            print("Seen requests: " + str(seen_requests) + ", out of bucket was: " + str(out_of_bucket_requests))

    print_full_mw_hists(hist_data_respt, "response_time")
    print_full_mw_hists(hist_data_sst, "server_service_time")
    print_full_mw_hists(hist_data_wpt, "worker_postprocessing_time")

    final_data = {}
    for suffix in suffixes:
        final_data[suffix] = {}
        for keys in num_keys:
            final_data[suffix][keys] = {}
            for b in buckets:
                final_data[suffix][keys][b] = {}
                jobs = []
                for rep in range(repetitions):
                    jobs.append(hist_data_respt[suffix][keys][rep][b])
                mrt = np.mean(np.asarray(jobs))
                stdrt = np.std(np.asarray(jobs))
                final_data[suffix][keys][b][metrics[0]] = mrt
                final_data[suffix][keys][b][metrics[1]] = stdrt

    print_final_hists(final_data)


def print_final_hists(final_data):
    header = ["#Buckets",
              "Mean jobs - Keys 1", "Std jobs - Keys 1",
              "Mean jobs - Keys 3", "Std jobs - Keys 3",
              "Mean jobs - Keys 6", "Std jobs - Keys 6",
              "Mean jobs - Keys 9", "Std jobs - Keys 9"]

    for suffix in suffixes:

        path = plot_base_path + "histograms_mw_" + suffix + ".csv"
        with open(path, 'w') as csv_file:
            writer = csv.DictWriter(csv_file, fieldnames=header)
            writer.writeheader()

            for row in range(len(buckets)):
                one_row = {}
                i = 0
                one_row[header[i]] = buckets[row]
                i += 1
                for keys in num_keys:
                    for metric in metrics:
                        one_row[header[i]] = final_data[suffix][keys][buckets[row]][metric]
                        i += 1
                writer.writerow(one_row)
            csv_file.close()


def print_full_mw_hists(hist_data, tag):
    header = ["#Buckets",
              "Keys 1 - rep 1", "Keys 1 - rep 2", "Keys 1 - rep 3",
              "Keys 3 - rep 1", "Keys 3 - rep 2", "Keys 3 - rep 3",
              "Keys 6 - rep 1", "Keys 6 - rep 2", "Keys 6 - rep 3",
              "Keys 9 - rep 1", "Keys 9 - rep 2", "Keys 9 - rep 3"]

    for suffix in suffixes:

        path = agg_path_base + "histograms_mw_" + suffix + "_" + tag + ".csv"

        with open(path, 'w') as csv_file:
            writer = csv.DictWriter(csv_file, fieldnames=header)
            writer.writeheader()

            for row in range(len(buckets)):
                one_row = {}
                i = 0
                one_row[header[i]] = buckets[row]
                i += 1
                for keys in num_keys:
                    for rep in range(repetitions):
                        one_row[header[i]] = hist_data[suffix][keys][rep][buckets[row]]
                        i += 1
                writer.writerow(one_row)
            csv_file.close()


def main():
    create_buckets()
    read_experiments()
    process_for_histograms()

if __name__ == "__main__":
    main()