import re
from pathlib import Path

from django import setup
from os import environ
from django.db.models.functions import Lower

from elo import logger

environ.setdefault("DJANGO_SETTINGS_MODULE", "eloquent.settings")
setup()

from elo.models import Word


class FixBrokenMp3Names:
    """
    when django uploads a file into a FileField, the following rules apply:
    1. if the file already exists, a new file is created.
    2. if a new file is created, it is givven a suffix in the form _\w{7}
    3. if the file contains blanks or parenteces the following rule is applied:
       word (extra).mp3 ==> word_extra.mp3
    """
    def __init__(self):
        # acquire list of all distinct words ordered case insensitive
        # problem manifests only in English words
        self.words = Word.objects.filter(source_language=1).order_by(Lower("source_text"))
        self.pat = re.compile(r"([\w.]*)(_\w+)(.mp3)")
        self.bad_pat = re.compile(r"(.*) \((.*)\).mp3")
        self.errors = list()

    def change_bad_django_names(self):
        """
        change the names of files appertaining to rule (3)
        so

        fancyword (adj).mp3

        would become

        fancyword_adj.mp3


        :return:
        """

        p = Path('media/mp3')
        for f in p.glob('*.mp3'):
            m = self.bad_pat.search(f.name)
            if m:
                a, b = m.groups()
                f.rename(f"{a}_{b}.mp3")

    def go(self):
        media = Path('media')

        for word in self.words:
            path = media / Path(word.mp3.name)
            name = path.parts[-1]  # last part of a path is the file name
            # logger.info(f"{path=}, {name=}")
            if path.exists():
                # file ok
                pass
            else:
                # try truncating superfluous django crap
                m = self.pat.match(name)
                if m:
                    a, b, c = m.groups()
                    filename = Path(path.parts[0]) / path.parts[1] / Path(a + c)
                    logger.info(f"{filename=}")
                    if filename.exists():
                        word.mp3.name = 'mp3/'+a+c
                        word.save()
                        logger.info(f"saved file {word.mp3}")
                    else:
                        logger.error(f"not found file {a + c}")
                        self.errors.append(word)

        logger.error(f"no equivalent found for {len(self.errors)} words")
        logger.error("words for whom no equivalent has been found:")
        for w in self.errors:
            logger.error(f"            {w.id}, {w.source_text}, {w.mp3}")



if __name__ == '__main__':
    fbmn = FixBrokenMp3Names()
    fbmn.go()
