Package Insights
((week_daily_avg - month_daily_avg) / month_daily_avg) * 100Weekly Downloads
GitHub Stars
Downloads by OS
Python Versions
Top Countries
Dependencies
- data-prep-toolkit >=1.1.8.dev0
342 optional dependencies
- boto3[code]
- boto3[all]
- boto3[fdedup]
- boto3[language]
- bs4[code]
- bs4[code-quality]
- bs4[all]
- clamd[code]
- clamd[all]
- click[code]
- click[code-profiler]
- click[all]
- cmudict[gneissweb]
- cmudict[language]
- cmudict[readability]
- cmudict[all]
- colorlog[code]
- colorlog[ray]
- colorlog[repo-level-order]
- colorlog[all]
- data-prep-connector[all]
- data-prep-connector[code]
- data-prep-connector[language]
- data-prep-connector[web2parquet]
- data-prep-toolkit[ray]
- datatrove[all]
- datatrove[enrichment]
- datatrove[language]
- disjoint-set[all]
- disjoint-set[code]
- disjoint-set[fdedup]
- disjoint-set[language]
- docling[all]
- docling[docling2parquet]
- docling[language]
- docling-core[all]
- docling-core[doc-chunk]
- docling-core[docling2parquet]
- docling-core[language]
- docling-ibm-models[docling2parquet]
- docling-ibm-models[all]
- docling-ibm-models[language]
- docling-parse[all]
- docling-parse[docling2parquet]
- docling-parse[language]
- duckdb[all]
- duckdb[code]
- duckdb[filter]
- duckdb[gneissweb]
- duckdb[language]
- easyocr[all]
- easyocr[docling2parquet]
- easyocr[language]
- emerge-viz[all]
- emerge-viz[code]
- emerge-viz[ray]
- emerge-viz[repo-level-order]
- fasttext-wheel[all]
- fasttext-wheel[enrichment]
- fasttext-wheel[gneissweb]
- fasttext-wheel[gneissweb-classification]
- fasttext-wheel[lang-id]
- fasttext-wheel[language]
- filetype[all]
- filetype[docling2parquet]
- filetype[language]
- flair[all]
- flair[language]
- flair[pii-redactor]
- ftfy[all]
- ftfy[enrichment]
- ftfy[language]
- func-timeout[all]
- func-timeout[code]
- func-timeout[ray]
- func-timeout[repo-level-order]
- gputil[all]
- gputil[code]
- gputil[gneissweb]
- gputil[language]
- gputil[rep-removal]
- hf-xet[text-encoder]
- hf-xet[all]
- hf-xet[language]
- httpx-sse[all]
- httpx-sse[code]
- httpx-sse[code-profiler]
- huggingface-hub[all]
- huggingface-hub[bloom]
- huggingface-hub[code]
- huggingface-hub[gneissweb]
- huggingface-hub[gneissweb-classification]
- huggingface-hub[lang-id]
- huggingface-hub[language]
- ibm-generative-ai[all]
- ibm-generative-ai[code]
- ibm-generative-ai[code-profiler]
- kubernetes[all]
- kubernetes[code]
- kubernetes[fdedup]
- kubernetes[language]
- lancedb[all]
- lancedb[language]
- lancedb[text-encoder]
- langcodes[all]
- langcodes[gneissweb]
- langcodes[gneissweb-classification]
- langcodes[lang-id]
- langcodes[language]
- llama-index-core[all]
- llama-index-core[doc-chunk]
- llama-index-core[language]
- markupsafe[dev]
- matplotlib[all]
- matplotlib[code]
- matplotlib[code-profiler]
- matplotlib[language]
- matplotlib[pii-redactor]
- matplotlib-inline[all]
- matplotlib-inline[code]
- matplotlib-inline[code-profiler]
- mlx-vlm[all]
- mlx-vlm[docling2parquet]
- mlx-vlm[language]
- mmh3[all]
- mmh3[code]
- mmh3[ededup]
- mmh3[fdedup]
- mmh3[language]
- mmh3[profiler]
- moto[dev]
- networkx[all]
- networkx[code]
- networkx[code-profiler]
- networkx[ray]
- networkx[repo-level-order]
- nltk[all]
- nltk[code]
- nltk[enrichment]
- nltk[fineweb-quality-annotator]
- nltk[gneissweb]
- nltk[gopher-repetition-annotator]
- nltk[hap]
- nltk[language]
- nltk[rep-removal]
- nltk[similarity]
- normality[all]
- normality[code]
- normality[header-cleanser]
- numpy[all]
- numpy[code]
- numpy[code-profiler]
- numpy[docling2parquet]
- numpy[faces]
- numpy[fdedup]
- numpy[gneissweb]
- numpy[gneissweb-classification]
- numpy[images]
- numpy[lang-id]
- numpy[language]
- numpy[nsfw]
- numpy[people]
- numpy[pii-redactor]
- omegaconf[faces]
- omegaconf[images]
- omegaconf[nsfw]
- omegaconf[people]
- opencv-python[people]
- opencv-python[faces]
- opencv-python[images]
- opencv-python[nsfw]
- opensearch-py[all]
- opensearch-py[code]
- opensearch-py[language]
- opensearch-py[opensearch]
- pandas[all]
- pandas[bloom]
- pandas[code]
- pandas[code-profiler]
- pandas[faces]
- pandas[filter]
- pandas[gneissweb]
- pandas[hap]
- pandas[images]
- pandas[language]
- pandas[nsfw]
- pandas[people]
- pandas[pii-redactor]
- pandas[readability]
- pandas[repo-level-order]
- pandas[rep-removal]
- pandas[similarity]
- pillow[faces]
- pillow[images]
- pillow[nsfw]
- pillow[people]
- plotly[all]
- plotly[code]
- plotly[code-profiler]
- polars[all]
- polars[code]
- polars[extreme-tokenized]
- polars[faces]
- polars[fdedup]
- polars[gneissweb]
- polars[images]
- polars[language]
- polars[nsfw]
- polars[people]
- polars[readability]
- pre-commit[dev]
- presidio-analyzer[all]
- presidio-analyzer[language]
- presidio-analyzer[pii-redactor]
- presidio-anonymizer[all]
- presidio-anonymizer[language]
- presidio-anonymizer[pii-redactor]
- psutil[all]
- psutil[code]
- psutil[gneissweb]
- psutil[language]
- psutil[rep-removal]
- pyarrow[all]
- pyarrow[bloom]
- pyarrow[code]
- pyarrow[code-profiler]
- pyarrow[gneissweb]
- pyarrow[language]
- pydantic[all]
- pydantic[doc-chunk]
- pydantic[language]
- pydantic[pii-redactor]
- pygtrie[all]
- pygtrie[blocklist]
- pygtrie[code]
- pygtrie[language]
- pylance[all]
- pylance[language]
- pylance[text-encoder]
- pytest[dev]
- pytest-cov[dev]
- pytest-dotenv[dev]
- pytest-env[dev]
- pytest-mock[dev]
- python-dotenv[all]
- python-dotenv[code]
- python-dotenv[gneissweb]
- python-dotenv[language]
- python-dotenv[tokenization]
- python-dotenv[tokenization2arrow]
- pyyaml[all]
- pyyaml[code]
- pyyaml[fdedup]
- pyyaml[language]
- pyyaml[ml-filter]
- rbloom[all]
- rbloom[bloom]
- rbloom[code]
- rbloom[gneissweb]
- rbloom[language]
- requests[all]
- requests[code]
- requests[gneissweb]
- requests[language]
- requests[rep-removal]
- requests[similarity]
- scancode-toolkit[all]
- scancode-toolkit[code]
- scancode-toolkit[header-cleanser]
- scipy[all]
- scipy[code]
- scipy[fdedup]
- scipy[language]
- sentencepiece[all]
- sentencepiece[code]
- sentencepiece[fdedup]
- sentencepiece[language]
- sentence-transformers[all]
- sentence-transformers[code]
- sentence-transformers[code-profiler]
- sentence-transformers[language]
- sentence-transformers[text-encoder]
- setuptools[all]
- setuptools[gneissweb]
- setuptools[language]
- setuptools[readability]
- spacy[all]
- spacy[enrichment]
- spacy[language]
- streamlit[all]
- streamlit[code]
- streamlit[code-profiler]
- textstat[all]
- textstat[gneissweb]
- textstat[language]
- textstat[readability]
- timeout-timer[all]
- timeout-timer[code]
- timeout-timer[header-cleanser]
- torch[all]
- torch[code]
- torch[hap]
- torch[language]
- trafilatura[html2parquet]
- transformers[all]
- transformers[code]
- transformers[code-quality]
- transformers[faces]
- transformers[gneissweb]
- transformers[hap]
- transformers[images]
- transformers[lang-id]
- transformers[language]
- transformers[nsfw]
- transformers[people]
- transformers[rep-removal]
- transformers[similarity]
- transformers[text-encoder]
- transformers[tokenization]
- transformers[tokenization2arrow]
- tree-sitter[all]
- tree-sitter[code]
- tree-sitter[code-profiler]
- tree-sitter-languages[all]
- tree-sitter-languages[code]
- tree-sitter-languages[code-profiler]
- twine[dev]
- ultralytics[faces]
- ultralytics[images]
- ultralytics[nsfw]
- ultralytics[people]
- unicategories[all]
- unicategories[enrichment]
- unicategories[language]
- unicodedataplus[all]
- unicodedataplus[enrichment]
- unicodedataplus[language]
- xxhash[all]
- xxhash[code]
- xxhash[ededup]
- xxhash[language]
- xxhash[profiler]