Today I tested a python source code with PyQt6.
This source code let you to clean the text by HTML tags and regular expression in realtime.
If you want to parse in realtime then check the Realtime and add the regular expresion in editbox.
This is the result:

This is the source code I used to parse realtime regular expresion on editbox
from PyQt6.QtWidgets import QApplication, QMainWindow, QTextEdit, QVBoxLayout, QHBoxLayout, QWidget, QPushButton, QCheckBox, QLineEdit, QLabel from PyQt6.QtGui import QTextDocument from PyQt6.QtCore import Qt import re class MainWindow(QMainWindow): def __init__(self): super().__init__() self.setWindowTitle("HTML Cleaner") self.text_edit = QTextEdit() self.clean_button = QPushButton("Clean HTML") self.transform_div_checkbox = QCheckBox("Transform
tags") self.realtime_checkbox = QCheckBox("Realtime") self.regex_edit = QLineEdit() self.regex_edit.setPlaceholderText("Enter regex pattern") self.regex_edit.setEnabled(False) # Dezactivăm inițial top_layout = QHBoxLayout() top_layout.addWidget(self.clean_button) top_layout.addWidget(self.transform_div_checkbox) top_layout.addWidget(QLabel("Regex:")) top_layout.addWidget(self.regex_edit) top_layout.addWidget(self.realtime_checkbox) main_layout = QVBoxLayout() main_layout.addLayout(top_layout) main_layout.addWidget(self.text_edit) container = QWidget() container.setLayout(main_layout) self.setCentralWidget(container) self.clean_button.clicked.connect(self.clean_html) self.realtime_checkbox.stateChanged.connect(self.toggle_realtime) self.regex_edit.textChanged.connect(self.realtime_update) def clean_html(self): html_text = self.text_edit.toPlainText() clean_text = self.remove_html_tags(html_text) self.text_edit.setPlainText(clean_text) def remove_html_tags(self, text): # Remove CSS text = re.sub(r'
.*?', '', text, flags=re.DOTALL) # Remove JavaScript text = re.sub(r' .*?', '', text, flags=re.DOTALL) # Remove HTML comments text = re.sub(r'', '', text, flags=re.DOTALL) # Transform tags if checkbox is checked if self.transform_div_checkbox.isChecked(): text = re.sub(r'
]*>', '', text) # Remove HTML tags but keep content clean = re.compile('<.*?>') text = re.sub(clean, '', text) # Remove empty lines text = re.sub(r'\n\s*\n', '\n', text) return text def toggle_realtime(self): if self.realtime_checkbox.isChecked(): self.regex_edit.setEnabled(True) # Activăm editbox-ul self.text_edit.textChanged.connect(self.realtime_update) else: self.regex_edit.setEnabled(False) # Dezactivăm editbox-ul self.text_edit.textChanged.disconnect(self.realtime_update) def realtime_update(self): if self.realtime_checkbox.isChecked(): html_text = self.text_edit.toPlainText() regex_pattern = self.regex_edit.text() if regex_pattern: try: html_text = re.sub(regex_pattern, '', html_text) except re.error: pass # Ignore regex errors self.text_edit.blockSignals(True) self.text_edit.setPlainText(html_text) self.text_edit.blockSignals(False) app = QApplication([]) window = MainWindow() window.show() app.exec()