#!/usr/bin/env python from tempfile import NamedTemporaryFile import shutil import csv # Helper function that processes one platform at a time def purgeDouble(platform): # File containing the unprocessed raw data filename = 'results/' + platform + '/total.dat' #Temporary file for storing the changes tempfile = NamedTemporaryFile(delete=False) with open(filename, 'rb') as csvFile, tempfile: reader = csv.reader(csvFile, delimiter=',', quotechar='"') writer = csv.writer(tempfile, delimiter=',', quotechar='"') # At the first iteration we manually initialize the previousRow value previousRow = [] for row in reader: # If we are not at the first iteration, the values of two power measurement are equal but the times are really different, it means that the data collection duplicated erroneously a value, so we set to null that field if previousRow != [] and previousRow[2] == row[2] and (float(previousRow[1]) - float(row[1])) > 2.5: print("duplicate found and removed in:") print(previousRow) print(row) row[2] = '' else: row = row writer.writerow(row) previousRow = row # Move the temporary file to overwrite the original file shutil.move(tempfile.name, filename) # List of all the platforms platforms = ['cpu', 'gpu-primary', 'gpu-secondary'] # Invoke the purgeDouble function for each platform for platform in platforms: purgeDouble(platform)