Comments
Это делается в 2 этапа - сначала проходим по переводу и с помощью LLM выбираем все собственные имена, промптом:
sys_prompt_names = f"""
identify all personal names in text fragment and return them separated by comma. Don't add conmment and don't translate - """сохраняя все имена в текстовый файл, потом делаем список имен уникальным:
import codecs
from collections import Counter
import argparse
def process_unique_names(input_filename, output_filename='unique-names.txt'):
"""
Reads a large file, counts unique names, and writes results to a file.
"""
name_counts = Counter()
print(f"Reading {input_filename}...")
try:
# Use a context manager to ensure file handles are closed automatically
with codecs.open(input_filename, 'r', 'utf-8') as file:
for line in file:
# Normalize separators (Chinese commas to standard commas)
line = line.replace(",", ",").replace("、", ",")
# Split and clean names
names = [n.strip() for n in line.split(',') if n.strip()]
for name in names:
# Filter out noise (like lines containing full stops)
if "。" in name:
continue
name_counts[name] += 1
print(f"Found {len(name_counts)} unique names. Writing to {output_filename}...")
with open(output_filename, 'w', encoding='utf-8') as file_o:
for name, count in name_counts.items():
file_o.write(f"{name},{count}\n")
print("Success.")
except FileNotFoundError:
print(f"Error: File '{input_filename}' not found.")
except Exception as e:
print(f"An error occurred: {e}")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Generate a unique list of names with counts from a file.")
parser.add_argument("filename", help="Path to the input text file.")
args = parser.parse_args()
process_unique_names(args.filename)
Ну а в получившийся csv добавляем колонку с переводом и полом и подсовываем его скрипту перевода.

Это делается в 2 этапа -…