exo icon indicating copy to clipboard operation
exo copied to clipboard

add script to calculate pipsize

Open AlexCheema opened this issue 1 year ago • 1 comments

AlexCheema avatar Oct 02 '24 20:10 AlexCheema

Here's a proposed change that removes pkg_resources (deprecated), and also adds a total line at the bottom:

import os
import importlib.metadata
import importlib.util
from tabulate import tabulate


def calc_container(path):
    """Calculate total size of a directory or file."""
    if os.path.isfile(path):
        try:
            return os.path.getsize(path)
        except (OSError, FileNotFoundError):
            return 0

    total_size = 0
    for dirpath, dirnames, filenames in os.walk(path):
        for f in filenames:
            fp = os.path.join(dirpath, f)
            try:
                total_size += os.path.getsize(fp)
            except (OSError, FileNotFoundError):
                continue
    return total_size


def get_package_location(package_name):
    """Get the actual location of a package's files."""
    try:
        spec = importlib.util.find_spec(package_name)
        if spec is None:
            return None

        if spec.submodule_search_locations:
            # Return the first location for namespace packages
            return spec.submodule_search_locations[0]
        elif spec.origin:
            # For single-file modules, return the file path itself
            return spec.origin
    except ImportError:
        return None


def get_package_sizes(min_size_mb=0.1):
    """Get sizes of installed packages above minimum size threshold."""
    package_sizes = []

    # Get all installed distributions
    for dist in importlib.metadata.distributions():
        try:
            package_name = dist.metadata["Name"]
            location = get_package_location(package_name.replace("-", "_"))

            if location and os.path.exists(location):
                size = calc_container(location)
                size_mb = size / (1024 * 1024)

                if size_mb > min_size_mb:
                    package_sizes.append((package_name, size))
        except Exception as e:
            print(
                f"Error processing {dist.metadata.get('Name', 'Unknown package')}: {e}"
            )

    return package_sizes


# Get and sort package sizes
package_sizes = get_package_sizes()
package_sizes.sort(key=lambda x: x[1], reverse=True)

# Convert sizes to MB and prepare data for tabulation
table_data = [(name, f"{size/(1024*1024):.2f}") for name, size in package_sizes]

# Print sorted packages in a tabular format
headers = ["Package", "Size (MB)"]
print(tabulate(table_data, headers=headers, tablefmt="grid"))
print("Total size:", f"{sum(size for _, size in package_sizes)/(1024*1024):.2f} MB\n")

dtnewman avatar Nov 07 '24 21:11 dtnewman