exo
exo copied to clipboard
add script to calculate pipsize
Here's a proposed change that removes pkg_resources (deprecated), and also adds a total line at the bottom:
import os
import importlib.metadata
import importlib.util
from tabulate import tabulate
def calc_container(path):
"""Calculate total size of a directory or file."""
if os.path.isfile(path):
try:
return os.path.getsize(path)
except (OSError, FileNotFoundError):
return 0
total_size = 0
for dirpath, dirnames, filenames in os.walk(path):
for f in filenames:
fp = os.path.join(dirpath, f)
try:
total_size += os.path.getsize(fp)
except (OSError, FileNotFoundError):
continue
return total_size
def get_package_location(package_name):
"""Get the actual location of a package's files."""
try:
spec = importlib.util.find_spec(package_name)
if spec is None:
return None
if spec.submodule_search_locations:
# Return the first location for namespace packages
return spec.submodule_search_locations[0]
elif spec.origin:
# For single-file modules, return the file path itself
return spec.origin
except ImportError:
return None
def get_package_sizes(min_size_mb=0.1):
"""Get sizes of installed packages above minimum size threshold."""
package_sizes = []
# Get all installed distributions
for dist in importlib.metadata.distributions():
try:
package_name = dist.metadata["Name"]
location = get_package_location(package_name.replace("-", "_"))
if location and os.path.exists(location):
size = calc_container(location)
size_mb = size / (1024 * 1024)
if size_mb > min_size_mb:
package_sizes.append((package_name, size))
except Exception as e:
print(
f"Error processing {dist.metadata.get('Name', 'Unknown package')}: {e}"
)
return package_sizes
# Get and sort package sizes
package_sizes = get_package_sizes()
package_sizes.sort(key=lambda x: x[1], reverse=True)
# Convert sizes to MB and prepare data for tabulation
table_data = [(name, f"{size/(1024*1024):.2f}") for name, size in package_sizes]
# Print sorted packages in a tabular format
headers = ["Package", "Size (MB)"]
print(tabulate(table_data, headers=headers, tablefmt="grid"))
print("Total size:", f"{sum(size for _, size in package_sizes)/(1024*1024):.2f} MB\n")