Today I tested a python source code with PyQt6.
This source code let you to clean the text by HTML tags and regular expression in realtime.
If you want to parse in realtime then check the Realtime and add the regular expresion in editbox.
This is the result:

This is the source code I used to parse realtime regular expresion on editbox
from PyQt6.QtWidgets import QApplication, QMainWindow, QTextEdit, QVBoxLayout, QHBoxLayout, QWidget, QPushButton, QCheckBox, QLineEdit, QLabel
from PyQt6.QtGui import QTextDocument
from PyQt6.QtCore import Qt
import re
class MainWindow(QMainWindow):
    def __init__(self):
        super().__init__()
        self.setWindowTitle("HTML Cleaner")
        self.text_edit = QTextEdit()
        self.clean_button = QPushButton("Clean HTML")
        self.transform_div_checkbox = QCheckBox("Transform  tags")
        self.realtime_checkbox = QCheckBox("Realtime")
        self.regex_edit = QLineEdit()
        self.regex_edit.setPlaceholderText("Enter regex pattern")
        self.regex_edit.setEnabled(False)  # Dezactivăm inițial
        top_layout = QHBoxLayout()
        top_layout.addWidget(self.clean_button)
        top_layout.addWidget(self.transform_div_checkbox)
        top_layout.addWidget(QLabel("Regex:"))
        top_layout.addWidget(self.regex_edit)
        top_layout.addWidget(self.realtime_checkbox)
        main_layout = QVBoxLayout()
        main_layout.addLayout(top_layout)
        main_layout.addWidget(self.text_edit)
        container = QWidget()
        container.setLayout(main_layout)
        self.setCentralWidget(container)
        self.clean_button.clicked.connect(self.clean_html)
        self.realtime_checkbox.stateChanged.connect(self.toggle_realtime)
        self.regex_edit.textChanged.connect(self.realtime_update)
    def clean_html(self):
        html_text = self.text_edit.toPlainText()
        clean_text = self.remove_html_tags(html_text)
        self.text_edit.setPlainText(clean_text)
    def remove_html_tags(self, text):
        # Remove CSS
        text = re.sub(r'.*?', '', text, flags=re.DOTALL)
        # Remove JavaScript
        text = re.sub(r'.*?', '', text, flags=re.DOTALL)
        # Remove HTML comments
        text = re.sub(r'', '', text, flags=re.DOTALL)
        # Transform  tags if checkbox is checked
        if self.transform_div_checkbox.isChecked():
            text = re.sub(r']*>', '', text)
        # Remove HTML tags but keep content
        clean = re.compile('<.*?>')
        text = re.sub(clean, '', text)
        # Remove empty lines
        text = re.sub(r'\n\s*\n', '\n', text)
        return text
    def toggle_realtime(self):
        if self.realtime_checkbox.isChecked():
            self.regex_edit.setEnabled(True)  # Activăm editbox-ul
            self.text_edit.textChanged.connect(self.realtime_update)
        else:
            self.regex_edit.setEnabled(False)  # Dezactivăm editbox-ul
            self.text_edit.textChanged.disconnect(self.realtime_update)
    def realtime_update(self):
        if self.realtime_checkbox.isChecked():
            html_text = self.text_edit.toPlainText()
            regex_pattern = self.regex_edit.text()
            if regex_pattern:
                try:
                    html_text = re.sub(regex_pattern, '', html_text)
                except re.error:
                    pass  # Ignore regex errors
            self.text_edit.blockSignals(True)
            self.text_edit.setPlainText(html_text)
            self.text_edit.blockSignals(False)
app = QApplication([])
window = MainWindow()
window.show()
app.exec()
  
          

