gendiff.py 1.3 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950
  1. #!/usr/bin/env python3
  2. """
  3. Clean up control characters and trailing whitespace in the listed source files
  4. and create a unified diff between them.
  5. If more or less than two files are listed on the command line, the cleanup is
  6. performed on all files, but no diff is created.
  7. The source files are assumed to be terminal output captured by the `script`
  8. command.
  9. The original source files are backed up with the .orig extension.
  10. """
  11. import sys,re
  12. from difflib import unified_diff
  13. fns = sys.argv[1:]
  14. translate = {
  15. '\r': '[CR]\n',
  16. '\b': '[BS]',
  17. }
  18. def cleanup_file(fn):
  19. data = open(fn).read()
  20. def gen_text():
  21. for line in data.splitlines():
  22. line = re.sub('\r\n','\n',line) # DOS CRLF to Unix LF
  23. line = line.translate({ord(a):b for a,b in translate.items()})
  24. line = re.sub(r'\s+$','',line) # trailing whitespace
  25. yield line
  26. ret = list(gen_text())
  27. open(fn+'.orig','w').write(data)
  28. open(fn,'w').write('\n'.join(ret))
  29. return ret
  30. cleaned_texts = [cleanup_file(fn) for fn in fns]
  31. if len(fns) == 2:
  32. """
  33. chunk headers have trailing newlines, hence the rstrip()
  34. """
  35. print(
  36. f'diff a/{fns[0]} b/{fns[1]}\n' +
  37. '\n'.join(a.rstrip() for a in unified_diff(*cleaned_texts,fromfile=f'a/{fns[0]}',tofile=f'b/{fns[1]}'))
  38. )
  39. else:
  40. print(f'{len(fns)} input files. Not generating diff.')