Search code examples
pythonpython-3.xhuggingface-datasets

HuggingFace - load_datasets gives "NotImplementedError: Cannot" error


When I'm trying to download datasets from HuggingFace, it throws the NotImplementedError: Cannot and KeyError Error. My code and the errors as follows:

Code:

from datasets import load_dataset

dataset = load_dataset("archanatikayatray/aeroBERT-NER")

Error:

Downloading and preparing dataset text/archanatikayatray--aeroBERT-NER to /home/ubuntu/.cache/huggingface/datasets/archanatikayatray___text/archanatikayatray--aeroBERT-NER-35784c68a0685bbb/0.0.0/cb1e9bd71a82ad27976be3b12b407850fe2837d80c22c5e03a28949843a8ace2...
---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
File ~/anaconda3/envs/cihat/lib/python3.8/site-packages/traitlets/traitlets.py:653, in TraitType.get(self, obj, cls)
    652 try:
--> 653     value = obj._trait_values[self.name]
    654 except KeyError:
    655     # Check for a dynamic initializer.

KeyError: 'layout'

During handling of the above exception, another exception occurred:

NotImplementedError                       Traceback (most recent call last)
Input In [6], in <cell line: 1>()
----> 1 dataset = load_dataset("archanatikayatray/aeroBERT-NER")

File ~/anaconda3/envs/cihat/lib/python3.8/site-packages/datasets/load.py:1797, in load_dataset(path, name, data_dir, data_files, split, cache_dir, features, download_config, download_mode, verification_mode, ignore_verifications, keep_in_memory, save_infos, revision, use_auth_token, task, streaming, num_proc, storage_options, **config_kwargs)
   1794 try_from_hf_gcs = path not in _PACKAGED_DATASETS_MODULES
   1796 # Download and prepare data
-> 1797 builder_instance.download_and_prepare(
   1798     download_config=download_config,
   1799     download_mode=download_mode,
   1800     verification_mode=verification_mode,
   1801     try_from_hf_gcs=try_from_hf_gcs,
   1802     num_proc=num_proc,
   1803     storage_options=storage_options,
   1804 )
   1806 # Build dataset for splits
   1807 keep_in_memory = (
   1808     keep_in_memory if keep_in_memory is not None else is_small_dataset(builder_instance.info.dataset_size)
   1809 )

File ~/anaconda3/envs/cihat/lib/python3.8/site-packages/datasets/builder.py:890, in DatasetBuilder.download_and_prepare(self, output_dir, download_config, download_mode, verification_mode, ignore_verifications, try_from_hf_gcs, dl_manager, base_path, use_auth_token, file_format, max_shard_size, num_proc, storage_options, **download_and_prepare_kwargs)
    888     if num_proc is not None:
    889         prepare_split_kwargs["num_proc"] = num_proc
--> 890     self._download_and_prepare(
    891         dl_manager=dl_manager,
    892         verification_mode=verification_mode,
    893         **prepare_split_kwargs,
    894         **download_and_prepare_kwargs,
    895     )
    896 # Sync info
    897 self.info.dataset_size = sum(split.num_bytes for split in self.info.splits.values())

File ~/anaconda3/envs/cihat/lib/python3.8/site-packages/datasets/builder.py:963, in DatasetBuilder._download_and_prepare(self, dl_manager, verification_mode, **prepare_split_kwargs)
    961 split_dict = SplitDict(dataset_name=self.name)
    962 split_generators_kwargs = self._make_split_generators_kwargs(prepare_split_kwargs)
--> 963 split_generators = self._split_generators(dl_manager, **split_generators_kwargs)
    965 # Checksums verification
    966 if verification_mode == VerificationMode.ALL_CHECKS and dl_manager.record_checksums:

File ~/anaconda3/envs/cihat/lib/python3.8/site-packages/datasets/packaged_modules/text/text.py:42, in Text._split_generators(self, dl_manager)
     40 if not self.config.data_files:
     41     raise ValueError(f"At least one data file must be specified, but got data_files={self.config.data_files}")
---> 42 data_files = dl_manager.download_and_extract(self.config.data_files)
     43 if isinstance(data_files, (str, list, tuple)):
     44     files = data_files

File ~/anaconda3/envs/cihat/lib/python3.8/site-packages/datasets/download/download_manager.py:564, in DownloadManager.download_and_extract(self, url_or_urls)
    548 def download_and_extract(self, url_or_urls):
    549     """Download and extract given `url_or_urls`.
    550 
    551     Is roughly equivalent to:
   (...)
    562         extracted_path(s): `str`, extracted paths of given URL(s).
    563     """
--> 564     return self.extract(self.download(url_or_urls))

File ~/anaconda3/envs/cihat/lib/python3.8/site-packages/datasets/download/download_manager.py:427, in DownloadManager.download(self, url_or_urls)
    424 download_func = partial(self._download, download_config=download_config)
    426 start_time = datetime.now()
--> 427 downloaded_path_or_paths = map_nested(
    428     download_func,
    429     url_or_urls,
    430     map_tuple=True,
    431     num_proc=download_config.num_proc,
    432     disable_tqdm=not is_progress_bar_enabled(),
    433     desc="Downloading data files",
    434 )
    435 duration = datetime.now() - start_time
    436 logger.info(f"Downloading took {duration.total_seconds() // 60} min")

File ~/anaconda3/envs/cihat/lib/python3.8/site-packages/datasets/utils/py_utils.py:445, in map_nested(function, data_struct, dict_only, map_list, map_tuple, map_numpy, num_proc, parallel_min_length, types, disable_tqdm, desc)
    441     num_proc = 1
    442 if num_proc <= 1 or len(iterable) < parallel_min_length:
    443     mapped = [
    444         _single_map_nested((function, obj, types, None, True, None))
--> 445         for obj in logging.tqdm(iterable, disable=disable_tqdm, desc=desc)
    446     ]
    447 else:
    448     num_proc = num_proc if num_proc <= len(iterable) else len(iterable)

File ~/anaconda3/envs/cihat/lib/python3.8/site-packages/datasets/utils/logging.py:206, in _tqdm_cls.__call__(self, *args, **kwargs)
    204 def __call__(self, *args, **kwargs):
    205     if _tqdm_active:
--> 206         return tqdm_lib.tqdm(*args, **kwargs)
    207     else:
    208         return EmptyTqdm(*args, **kwargs)

File ~/.local/lib/python3.8/site-packages/tqdm/notebook.py:242, in tqdm_notebook.__init__(self, *args, **kwargs)
    240 unit_scale = 1 if self.unit_scale is True else self.unit_scale or 1
    241 total = self.total * unit_scale if self.total else self.total
--> 242 self.container = self.status_printer(self.fp, total, self.desc, self.ncols)
    243 self.container.pbar = proxy(self)
    244 self.displayed = False

File ~/.local/lib/python3.8/site-packages/tqdm/notebook.py:120, in tqdm_notebook.status_printer(_, total, desc, ncols)
    118     raise ImportError(WARN_NOIPYW)
    119 if total:
--> 120     pbar = IProgress(min=0, max=total)
    121 else:  # No total? Show info style bar with no progress tqdm status
    122     pbar = IProgress(min=0, max=1)

File ~/anaconda3/envs/cihat/lib/python3.8/site-packages/ipywidgets/widgets/widget_float.py:26, in _Float.__init__(self, value, **kwargs)
     24 if value is not None:
     25     kwargs['value'] = value
---> 26 super().__init__(**kwargs)

File ~/anaconda3/envs/cihat/lib/python3.8/site-packages/ipywidgets/widgets/widget_description.py:35, in DescriptionWidget.__init__(self, *args, **kwargs)
     33     kwargs.setdefault('tooltip', kwargs['description_tooltip'])
     34     del kwargs['description_tooltip']
---> 35 super().__init__(*args, **kwargs)

File ~/anaconda3/envs/cihat/lib/python3.8/site-packages/ipywidgets/widgets/widget.py:504, in Widget.__init__(self, **kwargs)
    501 super().__init__(**kwargs)
    503 Widget._call_widget_constructed(self)
--> 504 self.open()

File ~/anaconda3/envs/cihat/lib/python3.8/site-packages/ipywidgets/widgets/widget.py:517, in Widget.open(self)
    515 """Open a comm to the frontend if one isn't already open."""
    516 if self.comm is None:
--> 517     state, buffer_paths, buffers = _remove_buffers(self.get_state())
    519     args = dict(target_name='jupyter.widget',
    520                 data={'state': state, 'buffer_paths': buffer_paths},
    521                 buffers=buffers,
    522                 metadata={'version': __protocol_version__}
    523                 )
    524     if self._model_id is not None:

File ~/anaconda3/envs/cihat/lib/python3.8/site-packages/ipywidgets/widgets/widget.py:615, in Widget.get_state(self, key, drop_defaults)
    613 for k in keys:
    614     to_json = self.trait_metadata(k, 'to_json', self._trait_to_json)
--> 615     value = to_json(getattr(self, k), self)
    616     if not drop_defaults or not self._compare(value, traits[k].default_value):
    617         state[k] = value

File ~/anaconda3/envs/cihat/lib/python3.8/site-packages/traitlets/traitlets.py:700, in TraitType.__get__(self, obj, cls)
    698     return self
    699 else:
--> 700     return self.get(obj, cls)

File ~/anaconda3/envs/cihat/lib/python3.8/site-packages/traitlets/traitlets.py:656, in TraitType.get(self, obj, cls)
    653     value = obj._trait_values[self.name]
    654 except KeyError:
    655     # Check for a dynamic initializer.
--> 656     default = obj.trait_defaults(self.name)
    657     if default is Undefined:
    658         warn(
    659             "Explicit using of Undefined as the default value "
    660             "is deprecated in traitlets 5.0, and may cause "
   (...)
    663             stacklevel=2,
    664         )

File ~/anaconda3/envs/cihat/lib/python3.8/site-packages/traitlets/traitlets.py:1868, in HasTraits.trait_defaults(self, *names, **metadata)
   1865         raise TraitError(f"'{n}' is not a trait of '{type(self).__name__}' instances")
   1867 if len(names) == 1 and len(metadata) == 0:
-> 1868     return self._get_trait_default_generator(names[0])(self)
   1870 trait_names = self.trait_names(**metadata)
   1871 trait_names.extend(names)

File ~/anaconda3/envs/cihat/lib/python3.8/site-packages/traitlets/traitlets.py:624, in TraitType.default(self, obj)
    622     return self.default_value
    623 elif hasattr(self, "make_dynamic_default"):
--> 624     return self.make_dynamic_default()
    625 else:
    626     # Undefined will raise in TraitType.get
    627     return self.default_value

File ~/anaconda3/envs/cihat/lib/python3.8/site-packages/ipywidgets/widgets/trait_types.py:365, in InstanceDict.make_dynamic_default(self)
    364 def make_dynamic_default(self):
--> 365     return self.klass(*(self.default_args or ()),
    366                       **(self.default_kwargs or {}))

File ~/anaconda3/envs/cihat/lib/python3.8/site-packages/ipywidgets/widgets/widget_layout.py:86, in Layout.__init__(self, **kwargs)
     83     for side in ['top', 'right', 'bottom', 'left']:
     84         kwargs.setdefault(f'border_{side}', border)
---> 86 super().__init__(**kwargs)

File ~/anaconda3/envs/cihat/lib/python3.8/site-packages/ipywidgets/widgets/widget.py:504, in Widget.__init__(self, **kwargs)
    501 super().__init__(**kwargs)
    503 Widget._call_widget_constructed(self)
--> 504 self.open()

File ~/anaconda3/envs/cihat/lib/python3.8/site-packages/ipywidgets/widgets/widget.py:535, in Widget.open(self)
    531         from ipykernel.comm import Comm
    533         return Comm(**kwargs)
--> 535 self.comm = create_comm(**args)

File ~/anaconda3/envs/cihat/lib/python3.8/site-packages/comm/__init__.py:27, in _create_comm(*args, **kwargs)
     22 def _create_comm(*args, **kwargs):
     23     """Create a Comm.
     24 
     25     This method is intended to be replaced, so that it returns your Comm instance.
     26     """
---> 27     raise NotImplementedError("Cannot ")

NotImplementedError: Cannot  

I researched this issue but found nothing. Can you help me on this one ?


Solution

  • import os
    os.environ["CURL_CA_BUNDLE"] = ""
    

    solved my problem.