preprocess.py 1.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142
  1. #!/usr/bin/env python
  2. from tempfile import NamedTemporaryFile
  3. import shutil
  4. import csv
  5. # Helper function that processes one platform at a time
  6. def purgeDouble(platform):
  7. # File containing the unprocessed raw data
  8. filename = 'results/' + platform + '/total.dat'
  9. #Temporary file for storing the changes
  10. tempfile = NamedTemporaryFile(delete=False)
  11. with open(filename, 'rb') as csvFile, tempfile:
  12. reader = csv.reader(csvFile, delimiter=',', quotechar='"')
  13. writer = csv.writer(tempfile, delimiter=',', quotechar='"')
  14. # At the first iteration we manually initialize the previousRow value
  15. previousRow = []
  16. for row in reader:
  17. # If we are not at the first iteration, the values of two power measurement are equal but the times are really different, it means that the data collection duplicated erroneously a value, so we set to null that field
  18. if previousRow != [] and previousRow[2] == row[2] and (float(previousRow[1]) - float(row[1])) > 2.5:
  19. print("duplicate found and removed in:")
  20. print(previousRow)
  21. print(row)
  22. row[2] = ''
  23. else:
  24. row = row
  25. writer.writerow(row)
  26. previousRow = row
  27. # Move the temporary file to overwrite the original file
  28. shutil.move(tempfile.name, filename)
  29. # List of all the platforms
  30. platforms = ['cpu', 'gpu-primary', 'gpu-secondary']
  31. # Invoke the purgeDouble function for each platform
  32. for platform in platforms:
  33. purgeDouble(platform)