tune_cauchy.py 2.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263
  1. import math
  2. import json
  3. import argparse
  4. import itertools
  5. from pathlib import Path
  6. from tuner import KernelTuner
  7. def forward_params_list(N):
  8. blocksize_params = ('MAX_BLOCK_SIZE_VALUE', [64, 128, 256, 512, 1024])
  9. thread_value_default = [2, 4, 8, 16, 32, 32, 32, 32, 32, 32]
  10. thread_values_supported = [2, 4, 8, 16, 32, 64, 128]
  11. log_N_half = int(math.log2(N)) - 1
  12. thread_values = []
  13. for val in thread_values_supported:
  14. if val <= N // 2:
  15. array = list(thread_value_default)
  16. array[log_N_half - 1] = val
  17. thread_values.append('{' + ', '.join(str(v) for v in array) + '}')
  18. thread_params = ('ITEMS_PER_THREAD_SYM_FWD_VALUES', thread_values)
  19. value_prod = itertools.product(thread_params[1], blocksize_params[1])
  20. params_list = [{thread_params[0]: value[0], blocksize_params[0]: value[1]}
  21. for value in value_prod]
  22. return params_list
  23. def backward_params_list(L):
  24. thread_value_supported = [8, 16, 32, 64, 128]
  25. thread_params = ('ITEMS_PER_THREAD_SYM_BWD_VALUE', [v for v in thread_value_supported
  26. if (L + v - 1) // v <= 1024])
  27. params_list = [{thread_params[0]: value} for value in thread_params[1]]
  28. return params_list
  29. parser = argparse.ArgumentParser(description='Tuning Cauchy multiply')
  30. parser.add_argument('--mode', default='forward', choices=['forward', 'backward'])
  31. parser.add_argument('-N', default=64, type=int)
  32. parser.add_argument('-L', default=2 ** 14, type=int)
  33. parser.add_argument('--filename', default='tuning_result.json')
  34. if __name__ == '__main__':
  35. args = parser.parse_args()
  36. extension_dir = Path(__file__).absolute().parent
  37. source_files = ['cauchy_cuda.cu']
  38. if args.mode == 'forward':
  39. params_list = forward_params_list(args.N)
  40. tuner = KernelTuner(extension_dir, source_files, params_list,
  41. benchmark_script='benchmark_cauchy_tune.py',
  42. benchmark_args=['--mode', 'forward', '-N', str(args.N), '-L', '16384'],
  43. npool=16)
  44. else:
  45. params_list = backward_params_list(args.L)
  46. tuner = KernelTuner(extension_dir, source_files, params_list,
  47. benchmark_script='benchmark_cauchy_tune.py',
  48. benchmark_args=['--mode', 'backward', '-N', '64', '-L', str(args.L)],
  49. npool=16)
  50. result = tuner.tune()
  51. with open(args.filename, 'w') as f:
  52. json.dump(result, f)