out['image_stats'] = pd.DataFrame(img_info)

# Quick printable tables print("=== File extensions ===") for ext, cnt in ext_counts.most_common(): print(f"ext or '[no ext]': cnt")

duplicates = h:paths for h,paths in hashes.items() if len(paths) > 1 out['duplicates'] = duplicates

print("\n=== Duplicate files (SHA‑256) ===") for h, paths in duplicates.items(): print(f"h:") for p in paths: print(f" - p")

“An Exploratory Analysis of the smile.zip Dataset (3.16 MB): Structure, Content, and Potential Applications”

out['csv_summaries'] = csv_summaries