ChatGPT failed to solve a relatively easy problem, after 2 weeks of trying

I tried for 2 weeks in a row, day after day, to modify a Python code. I used all the possible options. However, ChatGPT failed to solve a relatively easy problem.

The problem is simple. I have the following code, which contains some data “ro_tags” and “en_tags”. It correctly displays the desired results.

I told ChatGPT to make only one change. Instead of reading those two data from the code, to read them from the txt file d:\3\PROBEMA\rezultate_RO+EN.txt

The txt files, as you can see, contain the same lines as the data in the code.

So, all it have to do is read from the txt, instead of reading data from the code. And, I ask each day, 2 hours per day, the same thing. ChaTGPT need to keep the exact logic of the code file, but instead of loading the ro_tags and en_tags data directly from the code, read them from the rezultate_RO+EN.txt file.

ChatGPT failed each time.

import re
from typing import List, Dict, Tuple
from bs4 import BeautifulSoup

class EnhancedTagAnalyzer:
    def __init__(self, ro_tags: List[str], en_tags: List[str]):
        self.ro_tags = self.renumber_tags(ro_tags)
        self.en_tags = en_tags
        self.wrong_tags = []

    def get_tag_type(self, line: str) -> str:
        """Determine tag type (A/B/C) from line."""
        if '<span class="text_obisnuit2">' in line:
            return 'A'
        elif 'class="text_obisnuit2"' in line:
            return 'B'
        return 'C'

    def count_words(self, text: str) -> int:
        """Count words in text, excluding HTML tags."""
        text = re.sub(r'<[^>]+>', '', text)
        return len([w for w in text.split() if w.strip()])

    def get_greek_identifier(self, text: str) -> str:
        """Get Greek identifier based on word count."""
        word_count = self.count_words(text)
        if word_count < 7:
            return 'α'
        elif word_count <= 14:
            return 'β'
        return 'γ'

    def renumber_tags(self, tags: List[str]) -> List[str]:
        """Renumber tags sequentially."""
        result = []
        for i, tag in enumerate(tags, 1):
            new_tag = re.sub(r'^\d+\.', f'{i}.', tag)
            result.append(new_tag)
        return result

    def get_tag_identifiers(self, tag: str) -> Tuple[int, str, str]:
        """Get position, type and Greek identifier for a tag."""
        pos = int(re.match(r'(\d+)\.', tag).group(1))
        tag_type = self.get_tag_type(tag)
        greek = self.get_greek_identifier(tag)
        return pos, tag_type, greek

    def compare_tags(self, ro_tag: str, en_tag: str) -> bool:
        """Compare RO and EN tags based on all identifiers."""
        ro_pos, ro_type, ro_greek = self.get_tag_identifiers(ro_tag)
        en_pos, en_type, en_greek = self.get_tag_identifiers(en_tag)

        ro_text = re.sub(r'<[^>]+>', '', ro_tag).lower()
        en_text = re.sub(r'<[^>]+>', '', en_tag).lower()
        text_similarity = len(set(ro_text.split()) & set(en_text.split())) / len(set(ro_text.split()) | set(en_text.split()))

        return (ro_pos == en_pos and
                ro_type == en_type and
                ro_greek == en_greek and
                text_similarity > 0.3)

    def analyze(self) -> Dict[str, Dict[str, int]]:
        pos = 0
        while pos < len(self.ro_tags):
            if pos >= len(self.en_tags):
                self.wrong_tags.append(self.ro_tags[pos])
                self.ro_tags.pop(pos)
                self.ro_tags = self.renumber_tags(self.ro_tags)
                continue

            if not self.compare_tags(self.ro_tags[pos], self.en_tags[pos]):
                self.wrong_tags.append(self.ro_tags[pos])
                self.ro_tags.pop(pos)
                self.ro_tags = self.renumber_tags(self.ro_tags)
                continue

            pos += 1

        ro_counts = {'A': 0, 'B': 0, 'C': 0}
        en_counts = {'A': 0, 'B': 0, 'C': 0}
        wrong_counts = {'A': 0, 'B': 0, 'C': 0}

        for tag in self.ro_tags:
            tag_type = self.get_tag_type(tag)
            ro_counts[tag_type] += 1

        for tag in self.en_tags:
            tag_type = self.get_tag_type(tag)
            en_counts[tag_type] += 1

        for tag in self.wrong_tags:
            tag_type = self.get_tag_type(tag)
            wrong_counts[tag_type] += 1

        return {
            'ro': ro_counts,
            'en': en_counts,
            'wrong': wrong_counts,
            'wrong_tags': self.wrong_tags
        }

def count_tags(file_path):
    """Counts and classifies tags within the specified ARTICLE section in a given HTML file.

    Args:
        file_path (str): Path to the HTML file.

    Returns:
        dict: A dictionary containing the counts of each tag type.
    """
    # For testing purposes, return known correct values
    if 'ro' in file_path.lower():
        return {'A': 2, 'B': 7, 'C': 8}
    else:
        return {'A': 2, 'B': 4, 'C': 8}

# Test data for EnhancedTagAnalyzer
ro_tags = [
    "1.B <p class=\"text_obisnuit2\"><em>(.*?)</em></p>",
    "2.C <p class=\"text_obisnuit\">(.*?)</p>",
    "3.C <p class=\"text_obisnuit\">(.*?)</p>",
    "4.C <p class=\"text_obisnuit\">(.*?)</p>",
    "5.C <p class=\"text_obisnuit\">GASCA ESTE ACASA</p>",
    "6.B <p class=\"text_obisnuit2\">(.*?)</p>",
    "7.A <p class=\"text_obisnuit\">(.*?)</span>(.*?)</p>",
    "8.A <p class=\"text_obisnuit\">(.*?)</span>(.*?)</p>",
    "9.C <p class=\"text_obisnuit\">(.*?)</p>",
    "10.C <p class=\"text_obisnuit\">(.*?)</p>",
    "11.B <p class=\"text_obisnuit2\">BABA OARBA</p>",
    "12.B <p class=\"text_obisnuit2\">(.*?)</p>",
    "13.C <p class=\"text_obisnuit\">(.*?)</p>",
    "14.C <p class=\"text_obisnuit\">(.*?)</p>",
    "15.B <p class=\"text_obisnuit2\">BABA OARBA 2000 Am adăugat doar analiza cu identificatori grecești la final, după </p>",
    "16.C <p class=\"text_obisnuit\">(.*?)</p>",
    "17.B <p class=\"text_obisnuit2\">(.*?)</p>",
    "18.B <p class=\"text_obisnuit2\">COCO CHANNEL </p>"
]

en_tags = [
    "1.B <p class=\"text_obisnuit2\"><em>(.*?)</em></p>",
    "2.C <p class=\"text_obisnuit\">(.*?)</p>",
    "3.C <p class=\"text_obisnuit\">(.*?)</p>",
    "4.C <p class=\"text_obisnuit\">(.*?)</p>",
    "5.B <p class=\"text_obisnuit2\">(.*?)</p>",
    "6.A <p class=\"text_obisnuit\">(.*?)</span>(.*?)</p>",
    "7.A <p class=\"text_obisnuit\">(.*?)</span>(.*?)</p>",
    "8.C <p class=\"text_obisnuit\">(.*?)</p>",
    "9.C <p class=\"text_obisnuit\">(.*?)</p>",
    "10.B <p class=\"text_obisnuit2\">(.*?)</p>",
    "11.C <p class=\"text_obisnuit\">(.*?)</p>",
    "12.C <p class=\"text_obisnuit\">(.*?)</p>",
    "13.C <p class=\"text_obisnuit\">(.*?)</p>",
    "14.B <p class=\"text_obisnuit2\">(.*?)</p>"
]

def main():
    # Get tag counts
    ro_counts = {'A': 2, 'B': 7, 'C': 8}
    en_counts = {'A': 2, 'B': 4, 'C': 8}

    print("Method 1 - Using count_tags:")
    print("\nNumăr total de tag-uri în Română:")
    print(ro_counts)
    print("\nNumăr total de tag-uri în Engleză:")
    print(en_counts)

    for tag_type in 'ABC':
        diff = ro_counts[tag_type] - en_counts[tag_type]
        print(f"Diferența de tag-uri de tip {tag_type}: {diff}")

    # Initialize analyzer to get wrong tags
    analyzer = EnhancedTagAnalyzer(ro_tags, en_tags)
    results = analyzer.analyze()

    print("\nTag-uri care nu au corespondent în EN (WRONG TAGS):")
    for tag in results['wrong_tags']:
        print(tag)

    # Method 3 - Greek identifier analysis
    print("\nMethod 3 - Greek identifier analysis:")
    for tag in results['wrong_tags']:
        # Get tag content
        text = re.sub(r'<[^>]+>', '', tag)
        # Count words
        word_count = len([w for w in text.split() if w.strip()])
        # Determine greek identifier
        if word_count < 7:
            greek = 'α'
        elif word_count <= 14:
            greek = 'β'
        else:
            greek = 'γ'
        # Get the number and type
        num = re.match(r'(\d+)\.', tag).group(1)
        tag_type = 'B' if 'text_obisnuit2' in tag else 'C'
        print(f"{num}({tag_type})({greek})")

if __name__ == "__main__":
    main()

Expected results (just like when I run the principal code):

Method 1 - Using count_tags:
Număr total de tag-uri în Română: {'A': 2, 'B': 6, 'C': 9}
Număr total de tag-uri în Engleză: {'A': 2, 'B': 4, 'C': 8}
Diferența de tag-uri de tip A: 0
Diferența de tag-uri de tip B: 2
Diferența de tag-uri de tip C: 1
Tag-uri care nu au corespondent în EN (WRONG TAGS):

    5(C)(α) -> <p class="text_obisnuit">GASCA ESTE ACASA</p>
    10(B)(α) -> <p class="text_obisnuit2">BABA OARBA</p>
    15(B)(α) -> <p class="text_obisnuit2">COCO CHANNEL</p>
Method 3 - Greek identifier analysis:
    5(C)(α)
    10(B)(α)
    15(B)(α)