소스 검색

SCRIPTS: Changes to analyze and plot scripts

Refactored some initializations in the analyze script, and now
extracting the stderr instead of the variance

Refactored heavily plot script, the main changes are:
- we now plot also the bars corresponding to the stderr of the
  measurements
- we place the charts in a chart folder
- now the plot phase is refactored out in two helper functions
- we now divide the benchmarks in two categories (run time on cpu more
  or less than 30 sec.). On the basis of this division we group the
  benchmarks in two different charts, to better highlight differences
  between the run time and power usage
- We now add a label that plots the height of each bar
Andrea Gussoni 8 년 전
부모
커밋
b6f4eddf28
2개의 변경된 파일147개의 추가작업 그리고 68개의 파일을 삭제
  1. 8 10
      utils/analyze.py
  2. 139 58
      utils/plot.py

+ 8 - 10
utils/analyze.py

@@ -23,23 +23,21 @@ def computeAverages( platform ):
         nameset.add(name)
 
     # Helper dicts to store the results computed in the next loop
-    timesAverages = {}
-    powerAverages = {}
-    timesVariances = {}
-    powerVariances = {}
+    timesAverages, powerAverages, timesVariances, powerVariances  = {}, {}, {}, {}
 
     # For each benchmark we iterate over the results, take into account the values and save the average and the variance in a corresponding record in the dicts
     for benchmarkName in nameset:
 
         # Temporary variables to store the results of the currently analyzed benchmark
-        times = []
-        power = []
+        times, power = [], []
 
         # For each record we copy the values if the name is equal to the currently analyzed
         for record in data:
             if benchmarkName == record['name']:
                 times.append(record['time'])
-                power.append(record['power'])
+
+                # Before writing the value we convert it from Watt/hour to mWatt/hour
+                power.append(record['power']*1000)
 
         # Since the power measurement utility sometimes is not able to get the results from the measurement device, we cleanse the list containing the power measurement values from the invalid (nan) values
         cleanedPower = [x for x in power if math.isnan(x) == False]
@@ -47,8 +45,8 @@ def computeAverages( platform ):
         # We compute the average of time and consumption and store it in the corresponding entries in the dict
         timesAverages[benchmarkName] = np.average(times)
         powerAverages[benchmarkName] = np.average(cleanedPower)
-        timesVariances[benchmarkName] = np.var(times)
-        powerVariances[benchmarkName] = np.var(cleanedPower)
+        timesVariances[benchmarkName] = np.std(times)
+        powerVariances[benchmarkName] = np.std(cleanedPower)
 
     # Write on file the averages
     with open('results/' + platform + '/average.csv', 'wb') as csvfile:
@@ -58,7 +56,7 @@ def computeAverages( platform ):
             filewriter.writerow([benchmarkName, timesAverages[benchmarkName], powerAverages[benchmarkName]])
 
     # Write on file the variance
-    with open('results/' + platform + '/variance.csv', 'wb') as csvfile:
+    with open('results/' + platform + '/stderr.csv', 'wb') as csvfile:
         filewriter = csv.writer(csvfile, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL)
 
         for benchmarkName in nameset:

+ 139 - 58
utils/plot.py

@@ -3,66 +3,147 @@ import numpy as np
 import matplotlib.mlab as mlab
 import matplotlib.pyplot as plt
 import matplotlib.patches as mpatches
+import seaborn
+seaborn.set()
+import os
+
+# Helper function to attach labels to the bars
+def autolabel(subplot, bars):
+    for bar in bars:
+        height = bar.get_height()
+        subplot.text(bar.get_x() + bar.get_width()/2., 1.05*height, '%d' % int(height), ha='center', va='bottom')
+
+# Helper function to strip away the "opencl/" prefix and other stuff from all the names
+def stripNames (category, names):
+    stripped_names = []
+    for elem in names:
+        elem = elem.replace("opencl/", "")
+        elem = elem.replace("/ocl", "")
+        elem = elem.replace("/OpenCL", "")
+        stripped_names.append(elem)
+    return stripped_names
+
+# List containing all the platform names
+platforms = ['cpu', 'gpu-primary', 'gpu-secondary']
+
+# List containing the categories of the benchamrks
+categories = ['short', 'long']
+
+averageTotal = {}
+stderrTotal = {}
 
 # Import data from file
-data_cpu = np.genfromtxt('results/cpu/average.csv', dtype=None, delimiter=',', names=['name', 'time', 'power'])
-data_gpu_primary = np.genfromtxt('results/gpu-primary/average.csv', dtype=None, delimiter=',', names=['name', 'time', 'power'])
-data_gpu_secondary = np.genfromtxt('results/gpu-secondary/average.csv', dtype=None, delimiter=',', names=['name', 'time', 'power'])
+averageTotal['cpu'] = np.genfromtxt('results/cpu/average.csv', dtype=None, delimiter=',', names=['name', 'time', 'power'])
+averageTotal['gpu-primary'] = np.genfromtxt('results/gpu-primary/average.csv', dtype=None, delimiter=',', names=['name', 'time', 'power'])
+averageTotal['gpu-secondary'] = np.genfromtxt('results/gpu-secondary/average.csv', dtype=None, delimiter=',', names=['name', 'time', 'power'])
+stderrTotal['cpu'] = np.genfromtxt('results/cpu/stderr.csv', dtype=None, delimiter=',', names=['name', 'time', 'power'])
+stderrTotal['gpu-primary'] = np.genfromtxt('results/gpu-primary/stderr.csv', dtype=None, delimiter=',', names=['name', 'time', 'power'])
+stderrTotal['gpu-secondary'] = np.genfromtxt('results/gpu-secondary/stderr.csv', dtype=None, delimiter=',', names=['name', 'time', 'power'])
+
+
+# Create the folder where to store the charts, if it doesn't already exist
+if not os.path.exists('charts'):
+    os.makedirs('charts')
+
+# Dict that contains the indexes(entry in the data files) of the benchmarks taking into account their category
+indexes = {'short': [], 'long': []}
 
-# We construct a set in which we insert each benchmark name once
-nameset = set()
-names = data_cpu['name']
-for name in names:
-    nameset.add(name)
+# We cycle on the benchmarks results for the cpu (since usually they are the ones with the longest run time) and divide in two groups the benchmarks, putting the relative indexes in two lists
+for i in range(0,17):
+    record = averageTotal['cpu'].take(i)
+    if record['time'] < 30:
+        indexes['short'].append(i)
+    else:
+        indexes['long'].append(i)
+
+
+# We instantiate a new dict to contain the average and the std err of the measurements on the various platforms
+average = {'cpu': {}, 'gpu-primary': {}, 'gpu-secondary': {}}
+stderr = {'cpu': {}, 'gpu-primary': {}, 'gpu-secondary': {}}
+
+for platform in platforms:
+    for category in categories:
+        average[platform][category] = averageTotal[platform].take(indexes[category])
+        stderr[platform][category] = stderrTotal[platform].take(indexes[category])
 
 # Generate an array as placeholder for the x axis (we need to pass from a list to an array to take advantage of range)
-x = range(0, 17)
-# Strip away the "opencl/" prefix from all the name
-stripped_names = []
-for elem in data_cpu['name']:
-    elem = elem.replace("opencl/", "")
-    elem = elem.replace("/ocl", "")
-    elem = elem.replace("/OpenCL", "")
-    stripped_names.append(elem)
-
-
-# Create the bar plot for the time values
-plt.bar(x, data_cpu['time'], width=0.3, color='b', align='edge')
-plt.bar(x, data_gpu_primary['time'], width=-0.3, color='r', align='center')
-plt.bar(x, data_gpu_secondary['time'], width=-0.3, color='g', align='edge')
-plt.xticks(x, stripped_names)
-plt.title('Execution time of the various benchmarks expressed in seconds')
-plt.xlabel('Benchmark')
-plt.ylabel('seconds')
-
-# Add some patches as legend of the colors used for the various benchmarks
-red_patch = mpatches.Patch(color='blue', label='Execution time for cpu')
-blue_patch = mpatches.Patch(color='red', label='Execution time for gpu(4 core)')
-green_patch = mpatches.Patch(color='green', label='Execution time for gpu(2 core)')
-
-plt.legend(handles=[red_patch, blue_patch, green_patch])
-
-# Save the obtained plot on file
-plt.savefig('times.pdf')
-plt.show()
-
-
-# Create the bar plot for the power values
-plt.bar(x, data_cpu['power'], width=0.3, color='b', align='edge')
-plt.bar(x, data_gpu_primary['power'], width=-0.3, color='r', align='center')
-plt.bar(x, data_gpu_secondary['power'], width=-0.3, color='g', align='edge')
-plt.xticks(x, stripped_names)
-plt.title('Power consumption of the various benchmarks expressed in Watt/hour')
-plt.xlabel('Benchmark')
-plt.ylabel('Watt/hour')
-
-# Add some patches as legend of the colors used for the various benchmarks
-red_patch = mpatches.Patch(color='blue', label='Power consumption for cpu')
-blue_patch = mpatches.Patch(color='red', label='Power consumption for gpu(4 core)')
-green_patch = mpatches.Patch(color='green', label='Execution time for gpu(2 core)')
-
-plt.legend(handles=[red_patch, blue_patch, green_patch])
-
-# Save the obtained plot on file
-plt.savefig('power.pdf')
-plt.show()
+x = {}
+x['short'] = np.arange(len(indexes['short']))
+x['long'] = np.arange(len(indexes['long']))
+
+# Strip from the names of the benchmarks unwanted parts, taking advantage of the helper function defined before
+x_names_stripped = {'short': [], 'long': []}
+for category in categories:
+    x_names = averageTotal['cpu'].take(indexes[category])['name'].tolist()
+    x_names_stripped[category] = stripNames(category, x_names)
+
+# Helper function that create the plots of the execution time, takes as parameter the category of the benchmark we want to plot
+def plotTimes (category):
+
+    # Initialization of the figures
+    fig, ax = plt.subplots(figsize=(20, 10))
+
+    # Create the bar plot for the time values
+    time_cpu_bars = ax.bar(x[category]-0.3, average['cpu'][category]['time'], width=0.2, color='b', align='edge', yerr=stderr['cpu'][category]['time'])
+    time_gpu_primary_bars = ax.bar(x[category], average['gpu-primary'][category]['time'], width=0.2, color='r', align='center', yerr=stderr['gpu-primary'][category]['time'])
+    time_gpu_secondary_bars = ax.bar(x[category]+0.3, average['gpu-secondary'][category]['time'], width=-0.2, color='g', align='edge', yerr=stderr['gpu-secondary'][category]['time'])
+
+    # Change the labels of the x axis to contain the names of the benchmarks
+    ax.set_xticks(x[category])
+    ax.set_xticklabels(x_names_stripped[category])
+    ax.set_title('Execution time of the various benchmarks expressed in seconds')
+    ax.set_xlabel('Benchmark')
+    ax.set_ylabel('seconds')
+
+    # Add some patches as legend of the colors used for the various benchmarks
+    red_patch = mpatches.Patch(color='blue', label='Execution time for cpu')
+    blue_patch = mpatches.Patch(color='red', label='Execution time for gpu(4 core)')
+    green_patch = mpatches.Patch(color='green', label='Execution time for gpu(2 core)')
+
+    ax.legend(handles=[red_patch, blue_patch, green_patch])
+
+    # Invoke the helper function to attach labels for each bar
+    autolabel(ax, time_cpu_bars)
+    autolabel(ax, time_gpu_primary_bars)
+    autolabel(ax, time_gpu_secondary_bars)
+
+    # Save the obtained plot on file
+    plt.savefig('charts/times-' + category + '.pdf')
+
+# Helper function that create the plots of the execution time, takes as parameter the category of the benchmark we want to plot
+def plotPower (category):
+
+    # Create a new figure
+    fig, bx = plt.subplots(figsize=(20, 10))
+
+    # Create the bar plot for the power values
+    power_cpu_bars = bx.bar(x[category]-0.3, average['cpu'][category]['power'], width=0.2, color='b', align='edge', yerr=stderr['cpu'][category]['power'])
+    power_gpu_primary_bars = bx.bar(x[category], average['gpu-primary'][category]['power'], width=0.2, color='r', align='center', yerr=stderr['gpu-primary'][category]['power'])
+    power_gpu_secondary_bars = bx.bar(x[category]+0.3, average['gpu-secondary'][category]['power'], width=-0.2, color='g', align='edge', yerr=stderr['gpu-secondary'][category]['power'])
+
+    # Change the labels of the x axis to contain the names of the benchmarks
+    bx.set_xticks(x[category])
+    bx.set_xticklabels(x_names_stripped[category])
+    bx.set_title('Power consumption of the various benchmarks expressed in mWatt/hour')
+    bx.set_xlabel('Benchmark')
+    bx.set_ylabel('mWatt/hour')
+
+    # Add some patches as legend of the colors used for the various benchmarks
+    red_patch = mpatches.Patch(color='blue', label='Power consumption for cpu')
+    blue_patch = mpatches.Patch(color='red', label='Power consumption for gpu(4 core)')
+    green_patch = mpatches.Patch(color='green', label='Execution time for gpu(2 core)')
+
+    bx.legend(handles=[red_patch, blue_patch, green_patch])
+
+    # Invoke the helper function to attach labels one for each bar
+    autolabel(bx, power_cpu_bars)
+    autolabel(bx, power_gpu_primary_bars)
+    autolabel(bx, power_gpu_secondary_bars)
+
+    # Save the obtained plot on file
+    plt.savefig('charts/power-' + category + '.pdf')
+
+# Invoke the helper functions to plot the data
+for category in categories:
+    plotTimes(category)
+    plotPower(category)