analitics

Pages

Showing posts with label re. Show all posts
Showing posts with label re. Show all posts

Monday, September 9, 2024

Python Qt6 : Use regular expression with PyQt6.

Today I tested a python source code with PyQt6.
This source code let you to clean the text by HTML tags and regular expression in realtime.
If you want to parse in realtime then check the Realtime and add the regular expresion in editbox.
This is the result:
This is the source code I used to parse realtime regular expresion on editbox
from PyQt6.QtWidgets import QApplication, QMainWindow, QTextEdit, QVBoxLayout, QHBoxLayout, QWidget, QPushButton, QCheckBox, QLineEdit, QLabel
from PyQt6.QtGui import QTextDocument
from PyQt6.QtCore import Qt
import re

class MainWindow(QMainWindow):
    def __init__(self):
        super().__init__()

        self.setWindowTitle("HTML Cleaner")

        self.text_edit = QTextEdit()
        self.clean_button = QPushButton("Clean HTML")
        self.transform_div_checkbox = QCheckBox("Transform 
tags") self.realtime_checkbox = QCheckBox("Realtime") self.regex_edit = QLineEdit() self.regex_edit.setPlaceholderText("Enter regex pattern") self.regex_edit.setEnabled(False) # Dezactivăm inițial top_layout = QHBoxLayout() top_layout.addWidget(self.clean_button) top_layout.addWidget(self.transform_div_checkbox) top_layout.addWidget(QLabel("Regex:")) top_layout.addWidget(self.regex_edit) top_layout.addWidget(self.realtime_checkbox) main_layout = QVBoxLayout() main_layout.addLayout(top_layout) main_layout.addWidget(self.text_edit) container = QWidget() container.setLayout(main_layout) self.setCentralWidget(container) self.clean_button.clicked.connect(self.clean_html) self.realtime_checkbox.stateChanged.connect(self.toggle_realtime) self.regex_edit.textChanged.connect(self.realtime_update) def clean_html(self): html_text = self.text_edit.toPlainText() clean_text = self.remove_html_tags(html_text) self.text_edit.setPlainText(clean_text) def remove_html_tags(self, text): # Remove CSS text = re.sub(r'.*?', '', text, flags=re.DOTALL) # Remove JavaScript text = re.sub(r'.*?', '', text, flags=re.DOTALL) # Remove HTML comments text = re.sub(r'', '', text, flags=re.DOTALL) # Transform
tags if checkbox is checked if self.transform_div_checkbox.isChecked(): text = re.sub(r']*>', '
', text) # Remove HTML tags but keep content clean = re.compile('<.*?>') text = re.sub(clean, '', text) # Remove empty lines text = re.sub(r'\n\s*\n', '\n', text) return text def toggle_realtime(self): if self.realtime_checkbox.isChecked(): self.regex_edit.setEnabled(True) # Activăm editbox-ul self.text_edit.textChanged.connect(self.realtime_update) else: self.regex_edit.setEnabled(False) # Dezactivăm editbox-ul self.text_edit.textChanged.disconnect(self.realtime_update) def realtime_update(self): if self.realtime_checkbox.isChecked(): html_text = self.text_edit.toPlainText() regex_pattern = self.regex_edit.text() if regex_pattern: try: html_text = re.sub(regex_pattern, '', html_text) except re.error: pass # Ignore regex errors self.text_edit.blockSignals(True) self.text_edit.setPlainText(html_text) self.text_edit.blockSignals(False) app = QApplication([]) window = MainWindow() window.show() app.exec()