from html.entities import html5
from pathlib import Path

from django import setup
from os import environ
from re import compile

from django.db.models import QuerySet

environ.setdefault('DJANGO_SETTINGS_MODULE', 'eloquent.settings')
setup()
from elo.models import Word


class ConvertChinese:
    def __init__(self):
        self.pat = compile(r'&#(\d*);')

    def run(self, qs: QuerySet):
        for word in qs:
            tx = word.translated_text
            if self.pat.search(tx):
                word.translated_text = ''.join([chr(int(i)) for i in
                                                self.pat.findall(tx)])
                word.save()
                print(word.source_text, word.translated_text)


class ConvertUmlaut(ConvertChinese):
    def __init__(self):
        super().__init__()
        self.pat = compile(r'&(\w+);')

    def run(self, qs):
        for word in qs:
            tx = word.translated_text
            replaced = self.pat.sub(self.h5, tx)
            replaced = replaced.replace('&#039;', "'")

            if not replaced == tx:
                print(tx, '->', replaced)
                word.translated_text = replaced
                word.save()

    def h5(self, m):
        return html5[m.group(1)]


if __name__ == '__main__':
    cc = ConvertUmlaut()
    cc.run(Word.objects.all())
