2021-10-06 13:22:34 +00:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
|
|
|
|
|
|
"""
|
2022-11-14 09:54:07 +00:00
|
|
|
scripts/gendiff.py:
|
|
|
|
|
|
2021-10-06 13:22:34 +00:00
|
|
|
Clean up control characters and trailing whitespace in the listed source files
|
|
|
|
|
and create a unified diff between them.
|
|
|
|
|
|
|
|
|
|
If more or less than two files are listed on the command line, the cleanup is
|
|
|
|
|
performed on all files, but no diff is created.
|
|
|
|
|
|
|
|
|
|
The source files are assumed to be terminal output captured by the `script`
|
|
|
|
|
command.
|
|
|
|
|
|
2022-01-15 14:00:10 +00:00
|
|
|
The cleaned source files are saved with the .clean extension.
|
2021-10-06 13:22:34 +00:00
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
import sys,re
|
|
|
|
|
from difflib import unified_diff
|
|
|
|
|
|
2024-10-08 12:55:58 +00:00
|
|
|
fns = sys.argv[1:3]
|
|
|
|
|
diff_opts = sys.argv[4:] if sys.argv[3:4] == ['--'] else None
|
2021-10-06 13:22:34 +00:00
|
|
|
|
|
|
|
|
translate = {
|
2022-01-22 14:26:12 +00:00
|
|
|
'\r': None,
|
2021-10-06 13:22:34 +00:00
|
|
|
'\b': '[BS]',
|
2022-01-15 14:00:10 +00:00
|
|
|
# chr(4): '', # Ctrl-D, EOT
|
2021-10-06 13:22:34 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
def cleanup_file(fn):
|
2022-01-15 14:00:10 +00:00
|
|
|
|
2022-01-22 14:26:12 +00:00
|
|
|
# must use binary mode to prevent conversion of DOS CR into newline
|
|
|
|
|
with open(fn,'rb') as fp:
|
|
|
|
|
data = fp.read().decode()
|
2022-01-15 14:00:10 +00:00
|
|
|
|
2021-10-06 13:22:34 +00:00
|
|
|
def gen_text():
|
2022-01-22 14:26:12 +00:00
|
|
|
for line in data.split('\n'): # do not use splitlines()
|
2021-10-06 13:22:34 +00:00
|
|
|
line = line.translate({ord(a):b for a,b in translate.items()})
|
|
|
|
|
line = re.sub(r'\s+$','',line) # trailing whitespace
|
|
|
|
|
yield line
|
2022-01-15 14:00:10 +00:00
|
|
|
|
2021-10-06 13:22:34 +00:00
|
|
|
ret = list(gen_text())
|
2022-01-15 14:00:10 +00:00
|
|
|
|
|
|
|
|
sys.stderr.write(f'Saving cleaned file to {fn}.clean\n')
|
|
|
|
|
|
|
|
|
|
with open(f'{fn}.clean','w') as fp:
|
2022-01-06 20:24:21 +00:00
|
|
|
fp.write('\n'.join(ret))
|
2022-01-15 14:00:10 +00:00
|
|
|
|
2021-10-06 13:22:34 +00:00
|
|
|
return ret
|
|
|
|
|
|
2022-01-15 14:00:10 +00:00
|
|
|
if len(fns) != 2:
|
|
|
|
|
sys.stderr.write(f'{len(fns)} input files. Not generating diff.\n')
|
|
|
|
|
|
2021-10-06 13:22:34 +00:00
|
|
|
cleaned_texts = [cleanup_file(fn) for fn in fns]
|
|
|
|
|
|
|
|
|
|
if len(fns) == 2:
|
2023-10-11 12:58:51 +00:00
|
|
|
# chunk headers have trailing newlines, hence the rstrip()
|
2022-01-15 14:00:10 +00:00
|
|
|
sys.stderr.write('Generating diff\n')
|
2024-10-08 12:55:58 +00:00
|
|
|
if diff_opts:
|
|
|
|
|
from subprocess import run
|
|
|
|
|
run(['diff', '-u'] + [f'{fn}.clean' for fn in fns])
|
|
|
|
|
else:
|
|
|
|
|
print(
|
|
|
|
|
f'diff a/{fns[0]} b/{fns[1]}\n' +
|
|
|
|
|
'\n'.join(a.rstrip() for a in unified_diff(*cleaned_texts,fromfile=f'a/{fns[0]}',tofile=f'b/{fns[1]}'))
|
|
|
|
|
)
|