Do not rewrite file unless necessary

This commit is contained in:
Thierry Deo 2021-09-28 23:33:51 +02:00
parent cf059f0d24
commit 5c0618dc82
No known key found for this signature in database
GPG key ID: 01D05452427CEF66
2 changed files with 36 additions and 50 deletions

View file

@ -1,51 +1,43 @@
import argparse
import os
from typing import IO
from typing import Optional
from typing import Sequence
def fix_file(file_obj: IO[bytes]) -> int:
def _process_file(file_obj: bytes) -> bytes:
# Test for newline at end of file
# Empty files will throw IOError here
try:
file_obj.seek(-1, os.SEEK_END)
except OSError:
return 0
last_character = file_obj.read(1)
# last_character will be '' for an empty file
if last_character not in {b'\n', b'\r'} and last_character != b'':
# Needs this seek for windows, otherwise IOError
file_obj.seek(0, os.SEEK_END)
file_obj.write(b'\n')
return 1
while last_character in {b'\n', b'\r'}:
# Deal with the beginning of the file
if file_obj.tell() == 1:
# If we've reached the beginning of the file and it is all
# linebreaks then we can make this file empty
file_obj.seek(0)
file_obj.truncate()
return 1
while len(file_obj):
if file_obj[-2:] == b'\r\n':
if len(file_obj) == 2:
return b''
elif file_obj[-3:-2] not in {b'\n', b'\r'}:
return file_obj
else:
file_obj = file_obj[:-2]
elif file_obj[-1:] in {b'\n', b'\r'}:
if len(file_obj) == 1:
return b''
elif file_obj[-2:-1] not in {b'\n', b'\r'}:
return file_obj
else:
file_obj = file_obj[:-1]
else:
return file_obj + b'\n'
# Go back two bytes and read a character
file_obj.seek(-2, os.SEEK_CUR)
last_character = file_obj.read(1)
return file_obj
# Our current position is at the end of the file just before any amount of
# newlines. If we find extraneous newlines, then backtrack and trim them.
position = file_obj.tell()
remaining = file_obj.read()
for sequence in (b'\n', b'\r\n', b'\r'):
if remaining == sequence:
return 0
elif remaining.startswith(sequence):
file_obj.seek(position + len(sequence))
file_obj.truncate()
return 1
return 0
def _fix_file(filename: str) -> bool:
with open(filename, mode='rb') as file_processed:
file_content = file_processed.read()
newcontent = _process_file(file_content)
if newcontent != file_content:
with open(filename, mode='wb') as file_processed:
file_processed.write(newcontent)
return True
else:
return False
def main(argv: Optional[Sequence[str]] = None) -> int:
@ -57,11 +49,9 @@ def main(argv: Optional[Sequence[str]] = None) -> int:
for filename in args.filenames:
# Read as binary so we can read byte-by-byte
with open(filename, 'rb+') as file_obj:
ret_for_file = fix_file(file_obj)
if ret_for_file:
print(f'Fixing {filename}')
retv |= ret_for_file
if _fix_file(filename):
print(f'Fixing {filename}')
retv = 1
return retv