I have installed spacy onto a Jupyter Notebook in Jupyter Lab that I access through the Anaconda Navigator, all on a remote desktop.
I was able to install spacy using
!pip install spacy
But when I tried to run
import spacy
nlp = spacy.load('en_core_web_sm')
I got a common OSError 50 that 'en_core_web_sm' was not a Python package. So I went in circles for a while and finally downloaded en_core_web_sm 3.7.0 to my Downloads folder from here and then used
!pip install Downloads/en_core_web_sm-3.7.0.tar.gz
to install the english language model. I used !pip list
and confirmed that I am using en-core-web-sm 3.7.0
and spacy 3.7.2
and when I tried to load that model, I got this weird attribute error:
nlp = spacy.load('en_core_web_sm')
AttributeError Traceback (most recent call last)
Cell In[2], line 1
----> 1 nlp = spacy.load('en_core_web_sm')
File ~\AppData\Roaming\Python\Python311\site-packages\spacy\__init__.py:51, in load(name, vocab, disable, enable, exclude, config)
27 def load(
28 name: Union[str, Path],
29 *,
(...)
34 config: Union[Dict[str, Any], Config] = util.SimpleFrozenDict(),
35 ) -> Language:
36 """Load a spaCy model from an installed package or a local path.
37
38 name (str): Package name or model path.
(...)
49 RETURNS (Language): The loaded nlp object.
50 """
---> 51 return util.load_model(
52 name,
53 vocab=vocab,
54 disable=disable,
55 enable=enable,
56 exclude=exclude,
57 config=config,
58 )
File ~\AppData\Roaming\Python\Python311\site-packages\spacy\util.py:465, in load_model(name, vocab, disable, enable, exclude, config)
463 return get_lang_class(name.replace("blank:", ""))()
464 if is_package(name): # installed as package
--> 465 return load_model_from_package(name, **kwargs) # type: ignore[arg-type]
466 if Path(name).exists(): # path to model data directory
467 return load_model_from_path(Path(name), **kwargs) # type: ignore[arg-type]
File ~\AppData\Roaming\Python\Python311\site-packages\spacy\util.py:501, in load_model_from_package(name, vocab, disable, enable, exclude, config)
484 """Load a model from an installed package.
485
486 name (str): The package name.
(...)
498 RETURNS (Language): The loaded nlp object.
499 """
500 cls = importlib.import_module(name)
--> 501 return cls.load(vocab=vocab, disable=disable, enable=enable, exclude=exclude, config=config)
File ~\AppData\Roaming\Python\Python311\site-packages\en_core_web_sm\__init__.py:10, in load(**overrides)
9 def load(**overrides):
---> 10 return load_model_from_init_py(__file__, **overrides)
File ~\AppData\Roaming\Python\Python311\site-packages\spacy\util.py:682, in load_model_from_init_py(init_file, vocab, disable, enable, exclude, config)
680 if not model_path.exists():
681 raise IOError(Errors.E052.format(path=data_path))
--> 682 return load_model_from_path(
683 data_path,
684 vocab=vocab,
685 meta=meta,
686 disable=disable,
687 enable=enable,
688 exclude=exclude,
689 config=config,
690 )
File ~\AppData\Roaming\Python\Python311\site-packages\spacy\util.py:539, in load_model_from_path(model_path, meta, vocab, disable, enable, exclude, config)
537 overrides = dict_to_dot(config, for_overrides=True)
538 config = load_config(config_path, overrides=overrides)
--> 539 nlp = load_model_from_config(
540 config,
541 vocab=vocab,
542 disable=disable,
543 enable=enable,
544 exclude=exclude,
545 meta=meta,
546 )
547 return nlp.from_disk(model_path, exclude=exclude, overrides=overrides)
File ~\AppData\Roaming\Python\Python311\site-packages\spacy\util.py:587, in load_model_from_config(config, meta, vocab, disable, enable, exclude, auto_fill, validate)
584 # This will automatically handle all codes registered via the languages
585 # registry, including custom subclasses provided via entry points
586 lang_cls = get_lang_class(nlp_config["lang"])
--> 587 nlp = lang_cls.from_config(
588 config,
589 vocab=vocab,
590 disable=disable,
591 enable=enable,
592 exclude=exclude,
593 auto_fill=auto_fill,
594 validate=validate,
595 meta=meta,
596 )
597 return nlp
File ~\AppData\Roaming\Python\Python311\site-packages\spacy\language.py:1830, in Language.from_config(cls, config, vocab, disable, enable, exclude, meta, auto_fill, validate)
1824 warn_if_jupyter_cupy()
1826 # Note that we don't load vectors here, instead they get loaded explicitly
1827 # inside stuff like the spacy train function. If we loaded them here,
1828 # then we would load them twice at runtime: once when we make from config,
1829 # and then again when we load from disk.
-> 1830 nlp = lang_cls(
1831 vocab=vocab,
1832 create_tokenizer=create_tokenizer,
1833 create_vectors=create_vectors,
1834 meta=meta,
1835 )
1836 if after_creation is not None:
1837 nlp = after_creation(nlp)
File ~\AppData\Roaming\Python\Python311\site-packages\spacy\language.py:188, in Language.__init__(self, vocab, max_length, meta, create_tokenizer, create_vectors, batch_size, **kwargs)
166 """Initialise a Language object.
167
168 vocab (Vocab): A `Vocab` object. If `True`, a vocab is created.
(...)
183 DOCS: https://spacy.io/api/language#init
184 """
185 # We're only calling this to import all factories provided via entry
186 # points. The factory decorator applied to these functions takes care
187 # of the rest.
--> 188 util.registry._entry_point_factories.get_all()
190 self._config = DEFAULT_CONFIG.merge(self.default_config)
191 self._meta = dict(meta)
File ~\AppData\Roaming\Python\Python311\site-packages\catalogue\__init__.py:110, in Registry.get_all(self)
108 result = {}
109 if self.entry_points:
--> 110 result.update(self.get_entry_points())
111 for keys, value in REGISTRY.copy().items():
112 if len(self.namespace) == len(keys) - 1 and all(
113 self.namespace[i] == keys[i] for i in range(len(self.namespace))
114 ):
File ~\AppData\Roaming\Python\Python311\site-packages\catalogue\__init__.py:125, in Registry.get_entry_points(self)
123 result = {}
124 for entry_point in self._get_entry_points():
--> 125 result[entry_point.name] = entry_point.load()
126 return result
File C:\ProgramData\anaconda3\Lib\importlib\metadata\__init__.py:202, in EntryPoint.load(self)
197 """Load the entry point from its definition. If only a module
198 is indicated by the value, return that module. Otherwise,
199 return the named object.
200 """
201 match = self.pattern.match(self.value)
--> 202 module = import_module(match.group('module'))
203 attrs = filter(None, (match.group('attr') or '').split('.'))
204 return functools.reduce(getattr, attrs, module)
File C:\ProgramData\anaconda3\Lib\importlib\__init__.py:126, in import_module(name, package)
124 break
125 level += 1
--> 126 return _bootstrap._gcd_import(name[level:], package, level)
File <frozen importlib._bootstrap>:1204, in _gcd_import(name, package, level)
File <frozen importlib._bootstrap>:1176, in _find_and_load(name, import_)
File <frozen importlib._bootstrap>:1126, in _find_and_load_unlocked(name, import_)
File <frozen importlib._bootstrap>:241, in _call_with_frames_removed(f, *args, **kwds)
File <frozen importlib._bootstrap>:1204, in _gcd_import(name, package, level)
File <frozen importlib._bootstrap>:1176, in _find_and_load(name, import_)
File <frozen importlib._bootstrap>:1147, in _find_and_load_unlocked(name, import_)
File <frozen importlib._bootstrap>:690, in _load_unlocked(spec)
File <frozen importlib._bootstrap_external>:940, in exec_module(self, module)
File <frozen importlib._bootstrap>:241, in _call_with_frames_removed(f, *args, **kwds)
File ~\AppData\Roaming\Python\Python311\site-packages\spacy_curated_transformers\pipeline\__init__.py:1
----> 1 from .transformer import CuratedTransformer
File ~\AppData\Roaming\Python\Python311\site-packages\spacy_curated_transformers\pipeline\transformer.py:26
23 from thinc.types import Ragged
25 from ..errors import Errors
---> 26 from ..models.listeners import ListenerStateUtils
27 from ..models.output import DocTransformerOutput, TransformerModelOutput
28 from ..models.types import TransformerListenerModelT
File ~\AppData\Roaming\Python\Python311\site-packages\spacy_curated_transformers\models\__init__.py:1
----> 1 from .architectures import (
2 build_albert_transformer_model_v1,
3 build_bert_transformer_model_v1,
4 build_camembert_transformer_model_v1,
5 build_roberta_transformer_model_v1,
6 build_xlmr_transformer_model_v1,
7 build_pytorch_checkpoint_loader_v1,
8 )
9 from .hf_loader import build_hf_transformer_encoder_loader_v1
10 from .scalar_weight import build_scalar_weight_v1
File ~\AppData\Roaming\Python\Python311\site-packages\spacy_curated_transformers\models\architectures.py:32
29 from thinc.types import ArgsKwargs, Floats2d, Ints1d
31 from ..errors import Errors
---> 32 from ..tokenization.types import Tok2PiecesModelT
33 from .listeners import (
34 WrappedTransformerAndListener,
35 replace_listener_callback,
36 replace_listener_cfg_callback,
37 )
38 from .output import TransformerModelOutput
File ~\AppData\Roaming\Python\Python311\site-packages\spacy_curated_transformers\tokenization\__init__.py:3
1 from .bbpe_encoder import build_byte_bpe_encoder_loader_v1, build_byte_bpe_encoder_v1
2 from .char_encoder import build_char_encoder_loader_v1, build_char_encoder_v1
----> 3 from .hf_loader import build_hf_piece_encoder_loader_v1
4 from .sentencepiece_encoder import (
5 build_camembert_sentencepiece_encoder_v1,
6 build_sentencepiece_encoder_loader_v1,
7 build_sentencepiece_encoder_v1,
8 build_xlmr_sentencepiece_encoder_v1,
9 )
10 from .wordpiece_encoder import (
11 build_bert_wordpiece_encoder_v1,
12 build_wordpiece_encoder_loader_v1,
13 build_wordpiece_encoder_v1,
14 )
File ~\AppData\Roaming\Python\Python311\site-packages\spacy_curated_transformers\tokenization\hf_loader.py:17
13 from ..errors import Errors
15 if has_hf_transformers:
16 SUPPORTED_TOKENIZERS = (
---> 17 transformers.BertTokenizerFast,
18 transformers.RobertaTokenizerFast,
19 transformers.XLMRobertaTokenizerFast,
20 transformers.CamembertTokenizerFast,
21 transformers.BertJapaneseTokenizer,
22 )
23 else:
24 SUPPORTED_TOKENIZERS = () # type: ignore
AttributeError: module 'transformers' has no attribute 'BertTokenizerFast'
Try downgrading spacy
to 3.6.1 with:
pip install spacy==3.6.1
and then try
nlp = spacy.load('en_core_web_sm')