| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546 |
- # -*- coding: utf-8 -*-
- """Convert unicode escape sequences to Chinese characters in source files."""
- import re
- import os
- import codecs
- BASE_DIR = r'f:\Workspace2016\BidiRAG'
- def decode_unicode_in_file(filepath):
- """Convert all \\uXXXX sequences in a file to actual Unicode characters."""
- with open(filepath, 'r', encoding='utf-8') as f:
- content = f.read()
-
- if not re.search(r'\\u[0-9a-fA-F]{4}', content):
- return False
-
- def replace_unicode(match):
- try:
- return chr(int(match.group(0)[2:], 16))
- except:
- return match.group(0)
-
- new_content = re.sub(r'\\u[0-9a-fA-F]{4}', replace_unicode, content)
-
- with open(filepath, 'w', encoding='utf-8') as f:
- f.write(new_content)
-
- return True
- files_to_convert = [
- os.path.join(BASE_DIR, 'bdirag', 'rag_methods.py'),
- os.path.join(BASE_DIR, 'bdirag', 'config.py'),
- os.path.join(BASE_DIR, 'examples', 'test_bm25.py'),
- ]
- for filepath in files_to_convert:
- if os.path.exists(filepath):
- changed = decode_unicode_in_file(filepath)
- if changed:
- print("Converted: {}".format(filepath))
- else:
- print("No changes needed: {}".format(filepath))
- else:
- print("File not found: {}".format(filepath))
- print("Done!")
|