allenai/objaverse | https://huggingface.co/datasets/allenai/objaverse | 4347932 | | odc-by | | | 2023-03-31 11:05:57+00:00 | | dataset_readmes/allenai__objaverse_README.md | 159 | rich | | | 8900556689527 | | todo |
nebius/SWE-rebench | https://huggingface.co/datasets/nebius/SWE-rebench | 3840462 | | cc-by-4.0 | | other | 2025-08-08 09:59:09+00:00 | | dataset_readmes/nebius__SWE-rebench_README.md | 843 | rich | | | 481218187 | | todo |
huggingface/documentation-images | https://huggingface.co/datasets/huggingface/documentation-images | 2649771 | | cc-by-nc-sa-4.0 | | | 2025-09-18 14:22:40+00:00 | | dataset_readmes/huggingface__documentation-images_README.md | 31 | rich | | | 4011112634 | | todo |
SWE-Gym/SWE-Gym | https://huggingface.co/datasets/SWE-Gym/SWE-Gym | 2183365 | | mit | | | 2025-05-10 03:27:03+00:00 | | dataset_readmes/SWE-Gym__SWE-Gym_README.md | 22 | rich | | | 43644473 | | todo |
hf-doc-build/doc-build-dev | https://huggingface.co/datasets/hf-doc-build/doc-build-dev | 1377022 | | mit | documentation | | 2025-09-19 06:29:58+00:00 | | dataset_readmes/hf-doc-build__doc-build-dev_README.md | 33 | rich | | | 420955236306 | | todo |
princeton-nlp/SWE-bench_Verified | https://huggingface.co/datasets/princeton-nlp/SWE-bench_Verified | 1354884 | | | | | 2025-02-18 23:48:55+00:00 | | dataset_readmes/princeton-nlp__SWE-bench_Verified_README.md | 378 | rich | | | 8393818 | | todo |
lavita/medical-qa-shared-task-v1-toy | https://huggingface.co/datasets/lavita/medical-qa-shared-task-v1-toy | 888253 | | | | | 2023-07-20 00:29:06+00:00 | short description | dataset_readmes/lavita__medical-qa-shared-task-v1-toy_README.md | 8 | minimal | | | 13880912 | | todo |
IPEC-COMMUNITY/language_table_lerobot | https://huggingface.co/datasets/IPEC-COMMUNITY/language_table_lerobot | 874760 | | apache-2.0 | LeRobot, LeRobot, language_table, rlds, openx, xarm | robotics | 2025-03-20 11:33:45+00:00 | | dataset_readmes/IPEC-COMMUNITY__language_table_lerobot_README.md | 204 | rich | | | 76515470594 | | todo |
nvidia/PhysicalAI-Robotics-GR00T-X-Embodiment-Sim | https://huggingface.co/datasets/nvidia/PhysicalAI-Robotics-GR00T-X-Embodiment-Sim | 826578 | | cc-by-4.0 | robotics | robotics | 2025-09-17 21:58:25+00:00 | | dataset_readmes/nvidia__PhysicalAI-Robotics-GR00T-X-Embodiment-Sim_README.md | 607 | rich | | | 816649538369 | | todo |
Salesforce/wikitext | https://huggingface.co/datasets/Salesforce/wikitext | 802116 | | ['cc-by-sa-3.0', 'gfdl'] | | text-generation, fill-mask | 2024-01-04 16:49:18+00:00 | | dataset_readmes/Salesforce__wikitext_README.md | 802 | rich | | | 11667545537 | | todo |
jat-project/jat-dataset | https://huggingface.co/datasets/jat-project/jat-dataset | 797910 | | apache-2.0 | imitation-learning, reinforcement-learning, text-generation, question-answering, generalist-agent | reinforcement-learning, text-generation, question-answering | 2024-02-16 13:52:52+00:00 | | dataset_readmes/jat-project__jat-dataset_README.md | 2559 | rich | | | 3552987541012 | | todo |
permutans/fineweb-bbc-news | https://huggingface.co/datasets/permutans/fineweb-bbc-news | 795107 | | odc-by | news, fineweb | | 2025-01-27 17:07:18+00:00 | | dataset_readmes/permutans__fineweb-bbc-news_README.md | 438 | rich | | | 29064033591 | | todo |
IPEC-COMMUNITY/droid_lerobot | https://huggingface.co/datasets/IPEC-COMMUNITY/droid_lerobot | 656068 | | apache-2.0 | LeRobot, LeRobot, droid, rlds, openx, franka | robotics | 2025-04-28 02:59:32+00:00 | | dataset_readmes/IPEC-COMMUNITY__droid_lerobot_README.md | 276 | rich | | | 392063513246 | | todo |
cadene/droid_1.0.1 | https://huggingface.co/datasets/cadene/droid_1.0.1 | 629540 | | apache-2.0 | LeRobot | robotics | 2025-03-20 13:14:51+00:00 | | dataset_readmes/cadene__droid_1.0.1_README.md | 681 | rich | | | 399287567377 | | todo |
openclimatefix/met-office-uk-deterministic-solar | https://huggingface.co/datasets/openclimatefix/met-office-uk-deterministic-solar | 591090 | | cc-by-4.0 | weather, nwp, met-office, deterministic, zarr, climate, solar | time-series-forecasting | 2025-03-06 12:07:15+00:00 | | dataset_readmes/openclimatefix__met-office-uk-deterministic-solar_README.md | 390 | rich | | | 2515376945832 | | todo |
banned-historical-archives/banned-historical-archives | https://huggingface.co/datasets/banned-historical-archives/banned-historical-archives | 588927 | | | | | 2025-04-13 15:18:38+00:00 | | dataset_readmes/banned-historical-archives__banned-historical-archives_README.md | 102 | rich | | | 869429045513 | | todo |
nyu-mll/glue | https://huggingface.co/datasets/nyu-mll/glue | 509704 | | ['other'] | qa-nli, coreference-nli, paraphrase-identification | text-classification | 2024-01-30 07:41:18+00:00 | | dataset_readmes/nyu-mll__glue_README.md | 2947 | rich | | | 4006562287 | | todo |
yyyzzzzyyy/envss | https://huggingface.co/datasets/yyyzzzzyyy/envss | 488856 | | | | | | Failed to load card | | 0 | minimal | | | 70048032774 | | todo |
allenai/c4 | https://huggingface.co/datasets/allenai/c4 | 482671 | | ['odc-by'] | | text-generation, fill-mask | 2024-01-09 19:14:03+00:00 | | dataset_readmes/allenai__c4_README.md | 1440 | rich | | | 33055538287189 | | todo |
hf-doc-build/doc-build | https://huggingface.co/datasets/hf-doc-build/doc-build | 481380 | | mit | | | 2025-09-19 04:01:54+00:00 | short description | dataset_readmes/hf-doc-build__doc-build_README.md | 21 | minimal | | | 296439431810 | | todo |
IPEC-COMMUNITY/bridge_orig_lerobot | https://huggingface.co/datasets/IPEC-COMMUNITY/bridge_orig_lerobot | 477043 | | apache-2.0 | LeRobot, LeRobot, bridge_orig, rlds, openx, widowx | robotics | 2025-02-23 06:25:52+00:00 | | dataset_readmes/IPEC-COMMUNITY__bridge_orig_lerobot_README.md | 312 | rich | | | 21462027262 | | todo |
applied-ai-018/pretraining_v1-omega_books | https://huggingface.co/datasets/applied-ai-018/pretraining_v1-omega_books | 469259 | | | | | 2024-08-05 19:01:31+00:00 | short description | dataset_readmes/applied-ai-018__pretraining_v1-omega_books_README.md | 0 | minimal | | | 12392869991819 | | todo |
huggingface/badges | https://huggingface.co/datasets/huggingface/badges | 467984 | | mit | | | 2025-07-17 17:25:53+00:00 | | dataset_readmes/huggingface__badges_README.md | 561 | rich | | | 696206 | | todo |
IPEC-COMMUNITY/kuka_lerobot | https://huggingface.co/datasets/IPEC-COMMUNITY/kuka_lerobot | 455652 | | apache-2.0 | LeRobot, LeRobot, kuka, rlds, openx, kuka_iiwa | robotics | 2025-02-24 15:19:23+00:00 | | dataset_readmes/IPEC-COMMUNITY__kuka_lerobot_README.md | 204 | rich | | | 35373768103 | | todo |
jat-project/jat-dataset-tokenized | https://huggingface.co/datasets/jat-project/jat-dataset-tokenized | 438763 | | | | | 2023-12-22 22:17:42+00:00 | short description | dataset_readmes/jat-project__jat-dataset-tokenized_README.md | 8 | minimal | | | 889947883646 | | todo |
hf-internal-testing/transformers_circleci_workflow_runs | https://huggingface.co/datasets/hf-internal-testing/transformers_circleci_workflow_runs | 417351 | | | | | | Failed to load card | | 0 | minimal | | | 507015606 | | todo |
openai/gsm8k | https://huggingface.co/datasets/openai/gsm8k | 404130 | | ['mit'] | math-word-problems | text2text-generation | 2024-01-04 12:05:15+00:00 | | dataset_readmes/openai__gsm8k_README.md | 875 | rich | | | 95470379 | | todo |
cadene/droid | https://huggingface.co/datasets/cadene/droid | 353894 | | apache-2.0 | LeRobot, openx | robotics | 2025-02-27 14:00:10+00:00 | | dataset_readmes/cadene__droid_README.md | 358 | rich | | | 400707154490 | | todo |
huggingface-course/documentation-images | https://huggingface.co/datasets/huggingface-course/documentation-images | 353758 | | apache-2.0 | | | 2025-06-13 08:13:44+00:00 | short description | dataset_readmes/huggingface-course__documentation-images_README.md | 0 | minimal | | | 41436010 | | todo |
cais/mmlu | https://huggingface.co/datasets/cais/mmlu | 346811 | | ['mit'] | | question-answering | 2024-03-08 20:36:26+00:00 | | dataset_readmes/cais__mmlu_README.md | 766 | rich | | | 95421341342 | | todo |
IPEC-COMMUNITY/fractal20220817_data_lerobot | https://huggingface.co/datasets/IPEC-COMMUNITY/fractal20220817_data_lerobot | 344333 | | apache-2.0 | LeRobot, LeRobot, fractal20220817_data, rlds, openx, google_robot | robotics | 2025-02-23 06:35:48+00:00 | | dataset_readmes/IPEC-COMMUNITY__fractal20220817_data_lerobot_README.md | 204 | rich | | | 21925215109 | | todo |
HuggingFaceFW/fineweb | https://huggingface.co/datasets/HuggingFaceFW/fineweb | 343982 | | odc-by | | text-generation | 2025-07-11 20:16:53+00:00 | | dataset_readmes/HuggingFaceFW__fineweb_README.md | 3707 | rich | | | 117390103814494 | | todo |
rtrm/debug | https://huggingface.co/datasets/rtrm/debug | 343070 | | | | | 2023-08-14 12:26:52+00:00 | No metadata and no description | dataset_readmes/rtrm__debug_README.md | 1 | minimal | | | 357762063556 | | todo |
HuggingFaceFW/fineweb-edu | https://huggingface.co/datasets/HuggingFaceFW/fineweb-edu | 326982 | | odc-by | | text-generation | 2025-07-11 20:16:53+00:00 | | dataset_readmes/HuggingFaceFW__fineweb-edu_README.md | 1621 | rich | | | 6190715174200 | | todo |
adams-story/datacomp200m | https://huggingface.co/datasets/adams-story/datacomp200m | 319647 | | | | | 2023-07-19 02:44:42+00:00 | No metadata but has description | dataset_readmes/adams-story__datacomp200m_README.md | 98 | minimal | | | 1449628746669 | | todo |
TAUR-Lab/Taur_CoT_Analysis_Project___gpt-4o-2024-08-06 | https://huggingface.co/datasets/TAUR-Lab/Taur_CoT_Analysis_Project___gpt-4o-2024-08-06 | 302823 | | | | | 2024-10-19 21:20:37+00:00 | short description | dataset_readmes/TAUR-Lab__Taur_CoT_Analysis_Project___gpt-4o-2024-08-06_README.md | 0 | minimal | | | 334242288 | | todo |
m-a-p/FineFineWeb | https://huggingface.co/datasets/m-a-p/FineFineWeb | 274595 | | apache-2.0 | | text-classification, text2text-generation, text-generation | 2024-12-19 11:34:03+00:00 | | dataset_readmes/m-a-p__FineFineWeb_README.md | 2351 | rich | | | 23159296534795 | | todo |
mlfoundations/MINT-1T-PDF-CC-2024-10 | https://huggingface.co/datasets/mlfoundations/MINT-1T-PDF-CC-2024-10 | 273764 | | cc-by-4.0 | multimodal | image-to-text, text-generation | 2024-09-19 21:03:25+00:00 | | dataset_readmes/mlfoundations__MINT-1T-PDF-CC-2024-10_README.md | 1169 | rich | | | 16838627484807 | | todo |
agents-course/course-images | https://huggingface.co/datasets/agents-course/course-images | 250253 | | | | | | Failed to load card | | 0 | minimal | | | 192961596 | | todo |
espnet/yodas2 | https://huggingface.co/datasets/espnet/yodas2 | 243556 | | cc-by-3.0 | | | 2025-05-15 22:28:55+00:00 | | dataset_readmes/espnet__yodas2_README.md | 380 | rich | | | 60272635921266 | | todo |
allenai/ai2_arc | https://huggingface.co/datasets/allenai/ai2_arc | 235842 | | ['cc-by-sa-4.0'] | | question-answering | 2023-12-21 15:09:48+00:00 | | dataset_readmes/allenai__ai2_arc_README.md | 695 | rich | | | 94480182 | | todo |
HuggingFaceM4/FineVision | https://huggingface.co/datasets/HuggingFaceM4/FineVision | 230577 | | | | | 2025-09-04 19:54:22+00:00 | | dataset_readmes/HuggingFaceM4__FineVision_README.md | 440 | rich | | | 9961228706139 | | todo |
stanfordnlp/imdb | https://huggingface.co/datasets/stanfordnlp/imdb | 222834 | | ['other'] | | text-classification | 2024-01-04 12:09:45+00:00 | | dataset_readmes/stanfordnlp__imdb_README.md | 469 | rich | | | 3057178368 | | todo |
HuggingFaceM4/the_cauldron | https://huggingface.co/datasets/HuggingFaceM4/the_cauldron | 222395 | | | | | 2024-05-06 13:37:52+00:00 | | dataset_readmes/HuggingFaceM4__the_cauldron_README.md | 3346 | rich | | | 302544269006 | | todo |
Gourieff/ReActor | https://huggingface.co/datasets/Gourieff/ReActor | 221929 | | mit | | | 2025-03-23 18:44:36+00:00 | | dataset_readmes/Gourieff__ReActor_README.md | 79 | rich | | | 6980074496 | | todo |
apple/DataCompDR-1B | https://huggingface.co/datasets/apple/DataCompDR-1B | 220734 | | apple-amlr | | text-to-image, image-to-text | 2025-02-28 18:39:32+00:00 | | dataset_readmes/apple__DataCompDR-1B_README.md | 535 | rich | | | 133686881329069 | | todo |
chcorbi/helvipad | https://huggingface.co/datasets/chcorbi/helvipad | 207550 | | cc0-1.0 | omnidirectional, stereo-matching, depth-estimation, image | depth-estimation | 2025-08-26 13:37:38+00:00 | | dataset_readmes/chcorbi__helvipad_README.md | 737 | rich | | | 105018887235 | | todo |
fancyzhx/ag_news | https://huggingface.co/datasets/fancyzhx/ag_news | 205764 | | ['unknown'] | | text-classification | 2024-03-07 12:02:37+00:00 | | dataset_readmes/fancyzhx__ag_news_README.md | 531 | rich | | | 447004035 | | todo |
atokforps/latent_v1_alpha_07 | https://huggingface.co/datasets/atokforps/latent_v1_alpha_07 | 200118 | | | | | | Failed to load card | | 0 | minimal | | | 708191616 | | todo |
AquaV/genshin-voices-separated | https://huggingface.co/datasets/AquaV/genshin-voices-separated | 194912 | | | | | | Failed to load card | | 0 | minimal | | | 362199833185 | | todo |