Today I tested a python source code with PyQt6.
This source code let you to clean the text by HTML tags and regular expression in realtime.
If you want to parse in realtime then check the Realtime and add the regular expresion in editbox.
This is the result:
This is the source code I used to parse realtime regular expresion on editbox
from PyQt6.QtWidgets import QApplication, QMainWindow, QTextEdit, QVBoxLayout, QHBoxLayout, QWidget, QPushButton, QCheckBox, QLineEdit, QLabel
from PyQt6.QtGui import QTextDocument
from PyQt6.QtCore import Qt
import re
class MainWindow(QMainWindow):
def __init__(self):
super().__init__()
self.setWindowTitle("HTML Cleaner")
self.text_edit = QTextEdit()
self.clean_button = QPushButton("Clean HTML")
self.transform_div_checkbox = QCheckBox("Transform tags")
self.realtime_checkbox = QCheckBox("Realtime")
self.regex_edit = QLineEdit()
self.regex_edit.setPlaceholderText("Enter regex pattern")
self.regex_edit.setEnabled(False) # Dezactivăm inițial
top_layout = QHBoxLayout()
top_layout.addWidget(self.clean_button)
top_layout.addWidget(self.transform_div_checkbox)
top_layout.addWidget(QLabel("Regex:"))
top_layout.addWidget(self.regex_edit)
top_layout.addWidget(self.realtime_checkbox)
main_layout = QVBoxLayout()
main_layout.addLayout(top_layout)
main_layout.addWidget(self.text_edit)
container = QWidget()
container.setLayout(main_layout)
self.setCentralWidget(container)
self.clean_button.clicked.connect(self.clean_html)
self.realtime_checkbox.stateChanged.connect(self.toggle_realtime)
self.regex_edit.textChanged.connect(self.realtime_update)
def clean_html(self):
html_text = self.text_edit.toPlainText()
clean_text = self.remove_html_tags(html_text)
self.text_edit.setPlainText(clean_text)
def remove_html_tags(self, text):
# Remove CSS
text = re.sub(r'.*?', '', text, flags=re.DOTALL)
# Remove JavaScript
text = re.sub(r'.*?', '', text, flags=re.DOTALL)
# Remove HTML comments
text = re.sub(r'', '', text, flags=re.DOTALL)
# Transform tags if checkbox is checked
if self.transform_div_checkbox.isChecked():
text = re.sub(r']*>', '', text)
# Remove HTML tags but keep content
clean = re.compile('<.*?>')
text = re.sub(clean, '', text)
# Remove empty lines
text = re.sub(r'\n\s*\n', '\n', text)
return text
def toggle_realtime(self):
if self.realtime_checkbox.isChecked():
self.regex_edit.setEnabled(True) # Activăm editbox-ul
self.text_edit.textChanged.connect(self.realtime_update)
else:
self.regex_edit.setEnabled(False) # Dezactivăm editbox-ul
self.text_edit.textChanged.disconnect(self.realtime_update)
def realtime_update(self):
if self.realtime_checkbox.isChecked():
html_text = self.text_edit.toPlainText()
regex_pattern = self.regex_edit.text()
if regex_pattern:
try:
html_text = re.sub(regex_pattern, '', html_text)
except re.error:
pass # Ignore regex errors
self.text_edit.blockSignals(True)
self.text_edit.setPlainText(html_text)
self.text_edit.blockSignals(False)
app = QApplication([])
window = MainWindow()
window.show()
app.exec()