When I'm trying to download datasets from HuggingFace, it throws the NotImplementedError: Cannot
and KeyError
Error. My code and the errors as follows:
Code:
from datasets import load_dataset
dataset = load_dataset("archanatikayatray/aeroBERT-NER")
Error:
Downloading and preparing dataset text/archanatikayatray--aeroBERT-NER to /home/ubuntu/.cache/huggingface/datasets/archanatikayatray___text/archanatikayatray--aeroBERT-NER-35784c68a0685bbb/0.0.0/cb1e9bd71a82ad27976be3b12b407850fe2837d80c22c5e03a28949843a8ace2...
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
File ~/anaconda3/envs/cihat/lib/python3.8/site-packages/traitlets/traitlets.py:653, in TraitType.get(self, obj, cls)
652 try:
--> 653 value = obj._trait_values[self.name]
654 except KeyError:
655 # Check for a dynamic initializer.
KeyError: 'layout'
During handling of the above exception, another exception occurred:
NotImplementedError Traceback (most recent call last)
Input In [6], in <cell line: 1>()
----> 1 dataset = load_dataset("archanatikayatray/aeroBERT-NER")
File ~/anaconda3/envs/cihat/lib/python3.8/site-packages/datasets/load.py:1797, in load_dataset(path, name, data_dir, data_files, split, cache_dir, features, download_config, download_mode, verification_mode, ignore_verifications, keep_in_memory, save_infos, revision, use_auth_token, task, streaming, num_proc, storage_options, **config_kwargs)
1794 try_from_hf_gcs = path not in _PACKAGED_DATASETS_MODULES
1796 # Download and prepare data
-> 1797 builder_instance.download_and_prepare(
1798 download_config=download_config,
1799 download_mode=download_mode,
1800 verification_mode=verification_mode,
1801 try_from_hf_gcs=try_from_hf_gcs,
1802 num_proc=num_proc,
1803 storage_options=storage_options,
1804 )
1806 # Build dataset for splits
1807 keep_in_memory = (
1808 keep_in_memory if keep_in_memory is not None else is_small_dataset(builder_instance.info.dataset_size)
1809 )
File ~/anaconda3/envs/cihat/lib/python3.8/site-packages/datasets/builder.py:890, in DatasetBuilder.download_and_prepare(self, output_dir, download_config, download_mode, verification_mode, ignore_verifications, try_from_hf_gcs, dl_manager, base_path, use_auth_token, file_format, max_shard_size, num_proc, storage_options, **download_and_prepare_kwargs)
888 if num_proc is not None:
889 prepare_split_kwargs["num_proc"] = num_proc
--> 890 self._download_and_prepare(
891 dl_manager=dl_manager,
892 verification_mode=verification_mode,
893 **prepare_split_kwargs,
894 **download_and_prepare_kwargs,
895 )
896 # Sync info
897 self.info.dataset_size = sum(split.num_bytes for split in self.info.splits.values())
File ~/anaconda3/envs/cihat/lib/python3.8/site-packages/datasets/builder.py:963, in DatasetBuilder._download_and_prepare(self, dl_manager, verification_mode, **prepare_split_kwargs)
961 split_dict = SplitDict(dataset_name=self.name)
962 split_generators_kwargs = self._make_split_generators_kwargs(prepare_split_kwargs)
--> 963 split_generators = self._split_generators(dl_manager, **split_generators_kwargs)
965 # Checksums verification
966 if verification_mode == VerificationMode.ALL_CHECKS and dl_manager.record_checksums:
File ~/anaconda3/envs/cihat/lib/python3.8/site-packages/datasets/packaged_modules/text/text.py:42, in Text._split_generators(self, dl_manager)
40 if not self.config.data_files:
41 raise ValueError(f"At least one data file must be specified, but got data_files={self.config.data_files}")
---> 42 data_files = dl_manager.download_and_extract(self.config.data_files)
43 if isinstance(data_files, (str, list, tuple)):
44 files = data_files
File ~/anaconda3/envs/cihat/lib/python3.8/site-packages/datasets/download/download_manager.py:564, in DownloadManager.download_and_extract(self, url_or_urls)
548 def download_and_extract(self, url_or_urls):
549 """Download and extract given `url_or_urls`.
550
551 Is roughly equivalent to:
(...)
562 extracted_path(s): `str`, extracted paths of given URL(s).
563 """
--> 564 return self.extract(self.download(url_or_urls))
File ~/anaconda3/envs/cihat/lib/python3.8/site-packages/datasets/download/download_manager.py:427, in DownloadManager.download(self, url_or_urls)
424 download_func = partial(self._download, download_config=download_config)
426 start_time = datetime.now()
--> 427 downloaded_path_or_paths = map_nested(
428 download_func,
429 url_or_urls,
430 map_tuple=True,
431 num_proc=download_config.num_proc,
432 disable_tqdm=not is_progress_bar_enabled(),
433 desc="Downloading data files",
434 )
435 duration = datetime.now() - start_time
436 logger.info(f"Downloading took {duration.total_seconds() // 60} min")
File ~/anaconda3/envs/cihat/lib/python3.8/site-packages/datasets/utils/py_utils.py:445, in map_nested(function, data_struct, dict_only, map_list, map_tuple, map_numpy, num_proc, parallel_min_length, types, disable_tqdm, desc)
441 num_proc = 1
442 if num_proc <= 1 or len(iterable) < parallel_min_length:
443 mapped = [
444 _single_map_nested((function, obj, types, None, True, None))
--> 445 for obj in logging.tqdm(iterable, disable=disable_tqdm, desc=desc)
446 ]
447 else:
448 num_proc = num_proc if num_proc <= len(iterable) else len(iterable)
File ~/anaconda3/envs/cihat/lib/python3.8/site-packages/datasets/utils/logging.py:206, in _tqdm_cls.__call__(self, *args, **kwargs)
204 def __call__(self, *args, **kwargs):
205 if _tqdm_active:
--> 206 return tqdm_lib.tqdm(*args, **kwargs)
207 else:
208 return EmptyTqdm(*args, **kwargs)
File ~/.local/lib/python3.8/site-packages/tqdm/notebook.py:242, in tqdm_notebook.__init__(self, *args, **kwargs)
240 unit_scale = 1 if self.unit_scale is True else self.unit_scale or 1
241 total = self.total * unit_scale if self.total else self.total
--> 242 self.container = self.status_printer(self.fp, total, self.desc, self.ncols)
243 self.container.pbar = proxy(self)
244 self.displayed = False
File ~/.local/lib/python3.8/site-packages/tqdm/notebook.py:120, in tqdm_notebook.status_printer(_, total, desc, ncols)
118 raise ImportError(WARN_NOIPYW)
119 if total:
--> 120 pbar = IProgress(min=0, max=total)
121 else: # No total? Show info style bar with no progress tqdm status
122 pbar = IProgress(min=0, max=1)
File ~/anaconda3/envs/cihat/lib/python3.8/site-packages/ipywidgets/widgets/widget_float.py:26, in _Float.__init__(self, value, **kwargs)
24 if value is not None:
25 kwargs['value'] = value
---> 26 super().__init__(**kwargs)
File ~/anaconda3/envs/cihat/lib/python3.8/site-packages/ipywidgets/widgets/widget_description.py:35, in DescriptionWidget.__init__(self, *args, **kwargs)
33 kwargs.setdefault('tooltip', kwargs['description_tooltip'])
34 del kwargs['description_tooltip']
---> 35 super().__init__(*args, **kwargs)
File ~/anaconda3/envs/cihat/lib/python3.8/site-packages/ipywidgets/widgets/widget.py:504, in Widget.__init__(self, **kwargs)
501 super().__init__(**kwargs)
503 Widget._call_widget_constructed(self)
--> 504 self.open()
File ~/anaconda3/envs/cihat/lib/python3.8/site-packages/ipywidgets/widgets/widget.py:517, in Widget.open(self)
515 """Open a comm to the frontend if one isn't already open."""
516 if self.comm is None:
--> 517 state, buffer_paths, buffers = _remove_buffers(self.get_state())
519 args = dict(target_name='jupyter.widget',
520 data={'state': state, 'buffer_paths': buffer_paths},
521 buffers=buffers,
522 metadata={'version': __protocol_version__}
523 )
524 if self._model_id is not None:
File ~/anaconda3/envs/cihat/lib/python3.8/site-packages/ipywidgets/widgets/widget.py:615, in Widget.get_state(self, key, drop_defaults)
613 for k in keys:
614 to_json = self.trait_metadata(k, 'to_json', self._trait_to_json)
--> 615 value = to_json(getattr(self, k), self)
616 if not drop_defaults or not self._compare(value, traits[k].default_value):
617 state[k] = value
File ~/anaconda3/envs/cihat/lib/python3.8/site-packages/traitlets/traitlets.py:700, in TraitType.__get__(self, obj, cls)
698 return self
699 else:
--> 700 return self.get(obj, cls)
File ~/anaconda3/envs/cihat/lib/python3.8/site-packages/traitlets/traitlets.py:656, in TraitType.get(self, obj, cls)
653 value = obj._trait_values[self.name]
654 except KeyError:
655 # Check for a dynamic initializer.
--> 656 default = obj.trait_defaults(self.name)
657 if default is Undefined:
658 warn(
659 "Explicit using of Undefined as the default value "
660 "is deprecated in traitlets 5.0, and may cause "
(...)
663 stacklevel=2,
664 )
File ~/anaconda3/envs/cihat/lib/python3.8/site-packages/traitlets/traitlets.py:1868, in HasTraits.trait_defaults(self, *names, **metadata)
1865 raise TraitError(f"'{n}' is not a trait of '{type(self).__name__}' instances")
1867 if len(names) == 1 and len(metadata) == 0:
-> 1868 return self._get_trait_default_generator(names[0])(self)
1870 trait_names = self.trait_names(**metadata)
1871 trait_names.extend(names)
File ~/anaconda3/envs/cihat/lib/python3.8/site-packages/traitlets/traitlets.py:624, in TraitType.default(self, obj)
622 return self.default_value
623 elif hasattr(self, "make_dynamic_default"):
--> 624 return self.make_dynamic_default()
625 else:
626 # Undefined will raise in TraitType.get
627 return self.default_value
File ~/anaconda3/envs/cihat/lib/python3.8/site-packages/ipywidgets/widgets/trait_types.py:365, in InstanceDict.make_dynamic_default(self)
364 def make_dynamic_default(self):
--> 365 return self.klass(*(self.default_args or ()),
366 **(self.default_kwargs or {}))
File ~/anaconda3/envs/cihat/lib/python3.8/site-packages/ipywidgets/widgets/widget_layout.py:86, in Layout.__init__(self, **kwargs)
83 for side in ['top', 'right', 'bottom', 'left']:
84 kwargs.setdefault(f'border_{side}', border)
---> 86 super().__init__(**kwargs)
File ~/anaconda3/envs/cihat/lib/python3.8/site-packages/ipywidgets/widgets/widget.py:504, in Widget.__init__(self, **kwargs)
501 super().__init__(**kwargs)
503 Widget._call_widget_constructed(self)
--> 504 self.open()
File ~/anaconda3/envs/cihat/lib/python3.8/site-packages/ipywidgets/widgets/widget.py:535, in Widget.open(self)
531 from ipykernel.comm import Comm
533 return Comm(**kwargs)
--> 535 self.comm = create_comm(**args)
File ~/anaconda3/envs/cihat/lib/python3.8/site-packages/comm/__init__.py:27, in _create_comm(*args, **kwargs)
22 def _create_comm(*args, **kwargs):
23 """Create a Comm.
24
25 This method is intended to be replaced, so that it returns your Comm instance.
26 """
---> 27 raise NotImplementedError("Cannot ")
NotImplementedError: Cannot
I researched this issue but found nothing. Can you help me on this one ?
import os
os.environ["CURL_CA_BUNDLE"] = ""
solved my problem.