from django import setup
from os import environ

environ.setdefault("DJANGO_SETTINGS_MODULE", "eloquent.settings")
setup()

from elo.models import Word, WordDialect, Translation
from pathlib import Path
from elo import logger


class RemoveDuplicateWords:
    def __init__(self):
        self.words = Word.objects.all()

    def remove_duplicates(self):
        id_set = set()
        for word in self.words:
            duplicates = Word.objects.filter(source_language=word.source_language,
                                             source_text=word.source_text,
                                             )
            for dup in duplicates:
                if (Translation.objects.filter(word=dup).exists() or
                        WordDialect.objects.filter(word=dup).exists()):
                    pass  # skip word if it is being used
                else:
                    id_set.add(dup.id)

        logger.info(f'length of words to delete: {len(id_set)}')
        logger.info(id_set)
        Word.objects.filter(id__in=id_set).delete()

    def remove_duplicate_translations(self):
        id_set = set()

        for trans in Translation.objects.reverse():  # better delete last duplicates
            # first
            duplicates = Translation.objects.filter(word=trans.word, lang=trans.lang,
                                          text=trans.text)
            first = Translation.objects.filter(word=trans.word, lang=trans.lang,
                                               text=trans.text).first()
            for dup in duplicates:
                if dup == first:
                    logger.debug(f'{dup.word}-{dup.id} {dup.text} skipped')
                    continue
                id_set.add(dup.id)
                logger.debug(f'{dup.word}-{dup.id} erased')

        Translation.objects.filter(id__in=id_set).delete()

    def remove_duplicate_worddialect(self):
        id_set = set()
        for wd in WordDialect.objects.reverse():
            duplicates = WordDialect.objects.filter(word=wd.word, dialect=wd.dialect)
            first = duplicates.first()
            for dup in duplicates:
                if dup == first:
                    logger.debug(f'skipped {dup.word} {dup.dialect}')
                    continue
                id_set.add(dup.id)
                logger.debug(f'erased {dup.word} {dup.dialect}')
        WordDialect.objects.filter(id__in=id_set).delete()


    def mark_unused_mp3(self):
        p = Path('media/mp3')
        found_counter = 0
        not_found_counter = 0
        for mp3 in p.glob('*.mp3'):
            pp = Path(mp3).relative_to('media')
            if not Word.objects.filter(mp3=pp).exists():
                not_found_counter += 1
            else:
                found_counter += 1

        logger.info(f"not used {not_found_counter}, used {found_counter}")


if __name__ == "__main__":
    logger.info(f'number of worddialect = {WordDialect.objects.count()}')
    rd = RemoveDuplicateWords()
    # rd.remove_duplicates()
    rd.remove_duplicate_worddialect()
    logger.info(f'number of words = {WordDialect.objects.count()}')
