analitics

Pages

Showing posts with label threading. Show all posts
Showing posts with label threading. Show all posts

Monday, September 8, 2025

Python 3.13.0 : Script for python modules then installs them - updated with fix.

This script scans a folder full of .py files Python scripts, identifies all the external modules they import, filters out built-in ones, writes the installable ones to a requirements.txt file, and then installs them using pip—in parallel threads for speed.
I use the copilot and some comments are into my language, but I tested and works well:
NOTE: I updated with detection python modules based "from" and another issue: check if python module is instaled and step over that python module ...
This script will try to install many python modules, I can update to be better with these issues:
...some modules are default , some scripts are from another area, see Blender 3D with bpy python modules, some packages comes with same modules, this can be soleved with defined lists with unique items.
import subprocess
import sys
import os
import shutil
import importlib.util
import re
import concurrent.futures
from typing import List, Tuple, Set

class ModuleManager:
    def __init__(self):
        self.modules: Set[str] = set()
        self.pip_path = self._get_pip_path()

    def _get_pip_path(self) -> str:
        possible_path = os.path.join(sys.exec_prefix, "Scripts", "pip.exe")
        return shutil.which("pip") or (possible_path if os.path.exists(possible_path) else None)

    def extract_imports_from_file(self, file_path: str) -> List[Tuple[str, str]]:
        imports = []
        try:
            with open(file_path, 'r', encoding='utf-8') as file:
                for line in file:
                    # Detect 'import module'
                    import_match = re.match(r'^\s*import\s+([a-zA-Z0-9_]+)(\s+as\s+.*)?$', line)
                    if import_match:
                        module = import_match.group(1)
                        imports.append((module, line.strip()))
                        continue
                    
                    # Detect 'from module import ...'
                    from_match = re.match(r'^\s*from\s+([a-zA-Z0-9_]+)\s+import\s+.*$', line)
                    if from_match:
                        module = from_match.group(1)
                        imports.append((module, line.strip()))
        except FileNotFoundError:
            print(f"❌ Fișierul {file_path} nu a fost găsit.")
        except Exception as e:
            print(f"❌ Eroare la citirea fișierului {file_path}: {e}")
        return imports

    def scan_directory_for_py_files(self, directory: str = '.') -> List[str]:
        py_files = []
        for root, _, files in os.walk(directory):
            for file in files:
                if file.endswith('.py'):
                    py_files.append(os.path.join(root, file))
        return py_files

    def collect_unique_modules(self, directory: str = '.') -> None:
        py_files = self.scan_directory_for_py_files(directory)
        all_imports = []
        with concurrent.futures.ThreadPoolExecutor() as executor:
            future_to_file = {executor.submit(self.extract_imports_from_file, file_path): file_path for file_path in py_files}
            for future in concurrent.futures.as_completed(future_to_file):
                imports = future.result()
                all_imports.extend(imports)
        
        for module, _ in all_imports:
            self.modules.add(module)

    def is_module_installed(self, module: str) -> bool:
        return importlib.util.find_spec(module) is not None

    def run_pip_install(self, module: str) -> bool:
        if not self.pip_path:
            print(f"❌ Nu am găsit pip pentru {module}.")
            return False
        try:
            subprocess.check_call([self.pip_path, "install", module])
            print(f"✅ Pachetul {module} a fost instalat cu succes.")
            return True
        except subprocess.CalledProcessError as e:
            print(f"❌ Eroare la instalarea pachetului {module}: {e}")
            return False

    def check_and_install_modules(self) -> None:
        def process_module(module):
            print(f"\n🔎 Verific dacă {module} este instalat...")
            if self.is_module_installed(module):
                print(f"✅ {module} este deja instalat.")
            else:
                print(f"📦 Instalez {module}...")
                self.run_pip_install(module)
                # Re-verifică după instalare
                if self.is_module_installed(module):
                    print(f"✅ {module} funcționează acum.")
                else:
                    print(f"❌ {module} nu funcționează după instalare.")

        with concurrent.futures.ThreadPoolExecutor() as executor:
            executor.map(process_module, self.modules)

def main():
    print("🔍 Verific pip...")
    manager = ModuleManager()
    if manager.pip_path:
        print(f"✅ Pip este disponibil la: {manager.pip_path}")
    else:
        print("⚠️ Pip nu este disponibil.")
        return

    directory = sys.argv[1] if len(sys.argv) > 1 else '.'
    print(f"\n📜 Scanez directorul {directory} pentru fișiere .py...")
    manager.collect_unique_modules(directory)
    
    if not manager.modules:
        print("⚠️ Nu s-au găsit module în importuri.")
        return
    
    print(f"\nModule unice detectate: {', '.join(manager.modules)}")
    manager.check_and_install_modules()

if __name__ == "__main__":
    main()

Tuesday, February 25, 2020

Python 3.7.6 : The new concepts of execution in python 3 - part 001.

The main goal of these tutorials series is learning to deal with python source code using the new concepts of execution in python 3.
When two or more events are concurrent it means that they are happening at the same time.
Concurrent programming is not equivalent to parallel execution.
In computing, concurrency is the execution of pieces of work or tasks by a computer at the same time.
Concurrency is a property which more than one operation can be run simultaneously.
When multiple computations or operations are carried out at the same time or in parallel with the goal of speeding up the computation process then this process is named parallelism.
Parallelism is a property which operations are actually being run simultaneously using the multiprocessing.
Multiprocessing, on the other hand, involves utilizing two or more processor units on a computer to achieve parallelism.
Multithreading is a property that refers to the ability of a CPU to execute multiple threads concurrently.
Python’s concurrency methods including threading, multiprocessing, and asyncio.
The difference between the threading and multiprocessing is this: the threading module uses threads, the multiprocessing module uses processes.
The threading is the package that provides API to create and manage threads.
With multiprocessing, Python creates new processes using an API similar to the threading module.
The asyncio is a library to write concurrent code using the async/await syntax.
The keyword async indicates that our function is a coroutine meaning they choose when to pause and let others execute and run coroutines multitask cooperatively.
The three fundamental advantages of async and await over threads are:
  • cooperative multitasking - you can reasonably have millions of concurrent tasks;
  • using await makes visible where the schedule points;
  • if a task doesn’t yield then it can accidentally block all other tasks from running;
  • tasks can support cancellation.
The next source of code show us how can deal with the execution code in Python 3 using the threading and multiprocessing python packages.
The timeit python package is used to benchmark the code write in the code_to_test variable:
Let's test the multi-threading execution with python:
[mythcat@desk ~]$ python3 
Python 3.7.6 (default, Jan 30 2020, 09:44:41) 
[GCC 9.2.1 20190827 (Red Hat 9.2.1-1)] on linux
Type "help", "copyright", "credits" or "license" for more information.
>>> import timeit
>>> code_to_test = """
... import threading
... 
... text = "Hello World"
... 
... def print_text(text):
...     for char in text:
...             print (char)
... 
... # multi-threading execution
... def multi_threads():
...     thread_1 = threading.Thread(target=print_text(text))
...     thread_2 = threading.Thread(target=print_text(text))
...     thread_1.start()
...     thread_2.start()
...     thread_1.join()
...     thread_2.join()
... multi_threads()
... """
>>> 
>>> elapsed_time = timeit.timeit(code_to_test, number=1)
H
e
l
l
o
 
W
o
r
l
d
H
e
l
l
o
 
W
o
r
l
d
>>> print(elapsed_time)
0.010613240000566293
Let's test the serially execution with python:
[mythcat@desk ~]$ python3 
Python 3.7.6 (default, Jan 30 2020, 09:44:41) 
[GCC 9.2.1 20190827 (Red Hat 9.2.1-1)] on linux
Type "help", "copyright", "credits" or "license" for more information.
>>> import timeit
>>> code_to_test = """
... import threading
... 
... text = "Hello World"
... 
... def print_text(text):
...     for char in text:
...             print (char)
... # serially execution
... def serially():
...     print_text(text)
...     print_text(text)
... serially()
... """
>>> elapsed_time = timeit.timeit(code_to_test, number=1)
H
e
l
l
o
 
W
o
r
l
d
H
e
l
l
o
 
W
o
r
l
d
>>> print(elapsed_time)
0.011771811000471644
Let's test the multiprocessing execution with python:
[mythcat@desk ~]$ python3 
Python 3.7.6 (default, Jan 30 2020, 09:44:41) 
[GCC 9.2.1 20190827 (Red Hat 9.2.1-1)] on linux
Type "help", "copyright", "credits" or "license" for more information.
>>> import timeit
>>> code_to_test = """
... import multiprocessing
... 
... text = "Hello World"
... 
... def print_text(text):
...     for char in text:
...             print (char)
... 
... # multiprocessing execution
... def multiprocessing_test():
...      process_1 = multiprocessing.Process(target=print_text(text))
...      process_2 = multiprocessing.Process(target=print_text(text))
...      process_1.start()
...      process_2.start()
...      process_1.join()
...      process_2.join()
... multiprocessing_test()
... """
>>> elapsed_time = timeit.timeit(code_to_test, number=1)
H
e
l
l
o
 
W
o
r
l
d
H
e
l
l
o
 
W
o
r
l
d
>>> print(elapsed_time)
0.3649730779998208
Since asyncio is a little complex, I will write about this in the next tutorial.