Replies: 2 comments
-
i found this and adjusted it to my need. But perhaps there is an easier way? #!/usr/bin/env python3
import argparse
import os
import sys
import subprocess
import tempfile
import uuid
from chardet import detect
try:
import git_filter_repo as fr
except ImportError:
raise SystemExit("Error: Couldn't find git_filter_repo.py. Did you forget to make a symlink to git-filter-repo named git_filter_repo.py or did you forget to put the latter in your PYTHONPATH?")
tmpdir = None
blobs_handled = {}
cat_file_process = None
def lint_with_real_filenames(commit, metadata):
filenames_to_tmp_map = {}
#print("Manipulating commit ",commit.original_id)
for change in commit.file_changes:
if change.type == b'D':
continue
elif not change.filename.lower().endswith(b".java"):
continue
else:
# Get the old blob contents
cat_file_process.stdin.write(change.blob_id + b'\n')
cat_file_process.stdin.flush()
objhash, objtype, objsize = cat_file_process.stdout.readline().split()
contents_plus_newline = cat_file_process.stdout.read(int(objsize)+1)
# Write it out to a file
filename = os.path.join(tmpdir, os.fsencode(str(uuid.uuid4()) + ".java"))
with open(filename, "wb") as f:
f.write(contents_plus_newline[:-1])
#chardet didn't detected the files correct. but linux file did. so we use file for encoding detection
result = subprocess.run(['file', '--brief','--mime-encoding','--print0',filename.decode('utf-8')], stdout=subprocess.PIPE)
encoding=result.stdout.decode('utf-8').strip()
os.remove(filename)
if "utf-8" in encoding:
print("encoding guess of java file: [",change.filename.decode("utf-8"),"] was [",encoding,"] skipping.")
continue
if "us-ascii" in encoding:
print("encoding guess of java file: [",change.filename.decode("utf-8"),"] was [",encoding,"] no conversion needed. skipping.")
continue
if "unknown-8bit" in encoding:
print("encoding guess of java file: [",change.filename.decode("utf-8"),"] was [",encoding,"] skipping.")
continue
if "binary" in encoding:
print("encoding guess of java file: [",change.filename.decode("utf-8"),"] was [",encoding,"] skipping.")
continue
print("encoding guess of java file: [",change.filename.decode("utf-8"),"] was [",encoding,"]")
# Write it to a temp file encoded in utf-8
filenameUtf8 = os.path.join(tmpdir, os.fsencode(str(uuid.uuid4()) + ".java"))
filenames_to_tmp_map[change.filename] = filenameUtf8
contents_plus_newline = contents_plus_newline.decode(encoding).encode("utf-8")
with open(filenameUtf8, "wb") as f:
f.write(contents_plus_newline[:-1])
# update history
for change in commit.file_changes:
if change.blob_id in blobs_handled:
change.blob_id = blobs_handled[change.blob_id]
elif change.type == b'D':
continue
elif not change.filename.lower().endswith(b".java"):
continue
elif not change.filename in filenames_to_tmp_map:
continue
else:
filename = filenames_to_tmp_map[change.filename]
# Get the new contents
with open(filename, "rb") as f:
blob = fr.Blob(f.read())
# Insert the new file into the filter's stream, and remove the tempfile
filter.insert(blob)
os.remove(filename)
# Record our handling of the blob and use it for this change
blobs_handled[change.blob_id] = blob.id
change.blob_id = blob.id
args = fr.FilteringOptions.default_options()
args.force = True
# actually start formatting procedure
tmpdir = tempfile.mkdtemp().encode()
cat_file_process = subprocess.Popen(['git', 'cat-file', '--batch'],
stdin = subprocess.PIPE,
stdout = subprocess.PIPE)
filter = fr.RepoFilter(args, commit_callback=lint_with_real_filenames)
filter.run()
cat_file_process.stdin.close()
cat_file_process.wait() |
Beta Was this translation helpful? Give feedback.
-
The example you found was essentially a copy of contrib/filter-repo-demos/lint-history in this repository, with a few small tweaks. I suspect it pre-dated the --relevant option of lint-history. If you made a simple script that took a simple filename as a parameter and would convert it to utf-8, so that you could run e.g. But, copying the contrib/filter-repo-demos/lint-history file and tweaking it to suit your needs (as you indirectly did via copying that other example you found) works too. |
Beta Was this translation helpful? Give feedback.
-
Is there any way to filter specific files and don't touch the other files/directories? My goal is to convert all files "*.java" to utf-8 in a project. If i do:
the java files are perfect converted. But i lost all other files. Or did i miss something.
Beta Was this translation helpful? Give feedback.
All reactions