luojiehua
/
BidiRag


			
				
					
						
						
							12345678910111213141516171819202122232425262728293031323334353637383940414243444546
							# -*- coding: utf-8 -*-
"""Convert unicode escape sequences to Chinese characters in source files."""
import re
import os
import codecs

BASE_DIR = r'f:\Workspace2016\BidiRAG'

def decode_unicode_in_file(filepath):
    """Convert all \\uXXXX sequences in a file to actual Unicode characters."""
    with open(filepath, 'r', encoding='utf-8') as f:
        content = f.read()
    
    if not re.search(r'\\u[0-9a-fA-F]{4}', content):
        return False
    
    def replace_unicode(match):
        try:
            return chr(int(match.group(0)[2:], 16))
        except:
            return match.group(0)
    
    new_content = re.sub(r'\\u[0-9a-fA-F]{4}', replace_unicode, content)
    
    with open(filepath, 'w', encoding='utf-8') as f:
        f.write(new_content)
    
    return True

files_to_convert = [
    os.path.join(BASE_DIR, 'bdirag', 'rag_methods.py'),
    os.path.join(BASE_DIR, 'bdirag', 'config.py'),
    os.path.join(BASE_DIR, 'examples', 'test_bm25.py'),
]

for filepath in files_to_convert:
    if os.path.exists(filepath):
        changed = decode_unicode_in_file(filepath)
        if changed:
            print("Converted: {}".format(filepath))
        else:
            print("No changes needed: {}".format(filepath))
    else:
        print("File not found: {}".format(filepath))

print("Done!")