analitics

Pages

Wednesday, April 16, 2025

Python Qt6 : Simple tool to convert HTML.

This is based on old tutorial from this post.
I add a class AgentPY class with all features for processing input and HtmlEditor class for .
Maybe I will use the agentpy module , but now works well without agents.
This is the source code:
from PyQt6.QtWidgets import QApplication, QMainWindow, QTextEdit, QMenu
from bs4 import BeautifulSoup

class AgentPy:
    """Clasă pentru procesarea și curățarea HTML-ului."""
    @staticmethod
    def clean_all_styles(html_content):
        soup = BeautifulSoup(html_content, 'html.parser')
        for tag in soup.find_all(True):
            if tag.name == "a":
                attrs_to_keep = {"href": tag.attrs.get("href")} if "href" in tag.attrs else {}
                tag.attrs = attrs_to_keep
            else:
                tag.attrs = {}
        return str(soup)

    @staticmethod
    def clean_empty_tags(html_content):
        soup = BeautifulSoup(html_content, 'html.parser')
        for tag in soup.find_all(True):
            if not tag.contents or all(str(content).strip() == "" for content in tag.contents):
                tag.decompose()
        return str(soup)

    @staticmethod
    def clean_duplicate_tags(html_content):
        soup = BeautifulSoup(html_content, 'html.parser')
        unique_tags = {}
        for tag in soup.find_all(True):
            tag_key = (tag.name, str(tag.attrs))
            if tag_key in unique_tags:
                tag.decompose()
            else:
                unique_tags[tag_key] = tag
        return str(soup)

    @staticmethod
    def convert_to_html(source_code):
        """Convertim caractere speciale din cod sursă în entități HTML."""
        # Creăm un dicționar pentru conversia caracterelor
        html_entities = {
            '<': '<',
            '>': '>',
            '&': '&',
            '"': '"',
            "'": ''',
            '[': '[',
            ']': ']',
        }
        # Înlocuim caracterele în codul sursă
        for char, entity in html_entities.items():
            source_code = source_code.replace(char, entity)
        return source_code

class HtmlEditor(QMainWindow):
    def __init__(self):
        super().__init__()
        self.setWindowTitle("catafest-cleaner-HTML")
        self.setGeometry(100, 100, 800, 600)

        # Editor de text
        self.editor = QTextEdit(self)
        self.setCentralWidget(self.editor)

        # Meniu contextual
        self.editor.setContextMenuPolicy(Qt.ContextMenuPolicy.CustomContextMenu)
        self.editor.customContextMenuRequested.connect(self.show_context_menu)

    def show_context_menu(self, position):
        menu = QMenu(self)
        clean_styles_action = menu.addAction("Clean basic HTML")
        clean_styles_action.triggered.connect(self.clean_all_styles)
        clean_empty_tags_action = menu.addAction("Clean Empty Tags")
        clean_empty_tags_action.triggered.connect(self.clean_empty_tags)
        clean_agentpy_action = menu.addAction("Clean AgentPy")
        clean_agentpy_action.triggered.connect(self.clean_duplicate_tags)
        convert_to_html_action = menu.addAction("Convert to HTML")  # Noua opțiune
        convert_to_html_action.triggered.connect(self.convert_to_html)
        menu.exec(self.editor.mapToGlobal(position))

    def clean_all_styles(self):
        html_content = self.editor.toPlainText()
        clean_html = AgentPy.clean_all_styles(html_content)
        self.editor.setPlainText(clean_html)

    def clean_empty_tags(self):
        html_content = self.editor.toPlainText()
        clean_html = AgentPy.clean_empty_tags(html_content)
        self.editor.setPlainText(clean_html)

    def clean_duplicate_tags(self):
        html_content = self.editor.toPlainText()
        clean_html = AgentPy.clean_duplicate_tags(html_content)
        self.editor.setPlainText(clean_html)

    def convert_to_html(self):
        source_code = self.editor.toPlainText()
        html_content = AgentPy.convert_to_html(source_code)
        self.editor.setPlainText(html_content)

if __name__ == "__main__":
    import sys
    from PyQt6.QtCore import Qt
    app = QApplication(sys.argv)
    window = HtmlEditor()
    window.show()
    sys.exit(app.exec())