Tried to replicate this code using fastai lib, but I'm running in two major issues.
This par of code:
data_lm = TextLMDataBunch.from_df('data', train_df, valid_df, text_cols='idea')
Gives this kind of error:
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
<ipython-input-39-74ec5bcc1e2a> in <module>
----> 1 data_lm = TextLMDataBunch.from_df('data', train_df, valid_df, text_cols='idea')
~\Anaconda3\lib\site-packages\fastai\text\data.py in from_df(cls, path, train_df, valid_df, test_df, tokenizer, vocab, **kwargs)
325 k_names = ['max_vocab', 'min_freq', 'n_labels', 'txt_cols', 'label_cols', 'clear_cache']
326 txt_kwargs, kwargs = extract_kwargs(k_names, kwargs)
--> 327 train_ds = TextDataset.from_df(path, train_df, tokenizer, 'train', vocab=vocab, **txt_kwargs)
328 datasets = [train_ds, TextDataset.from_df(path, valid_df, tokenizer, 'valid', vocab=train_ds.vocab, **txt_kwargs)]
329 if test_df is not None: datasets.append(TextDataset.from_df(path, test_df, tokenizer, 'test', vocab=train_ds.vocab, **txt_kwargs))
~\Anaconda3\lib\site-packages\fastai\text\data.py in from_df(cls, folder, df, tokenizer, name, **kwargs)
150 tokenizer = ifnone(tokenizer, Tokenizer())
151 chunksize = 1 if (type(df) == DataFrame) else df.chunksize
--> 152 return cls(folder, tokenizer, df=df, create_mtd=TextMtd.DF, name=name, chunksize=chunksize, **kwargs)
153
154 @classmethod
~\Anaconda3\lib\site-packages\fastai\text\data.py in __init__(self, path, tokenizer, vocab, max_vocab, chunksize, name, df, min_freq, n_labels, txt_cols, label_cols, create_mtd, classes, clear_cache)
35 os.makedirs(self.path, exist_ok=True)
36 if clear_cache: self.clear()
---> 37 if not self.check_toks(): self.tokenize()
38 if not self.check_ids(): self.numericalize()
39
~\Anaconda3\lib\site-packages\fastai\text\data.py in tokenize(self)
86 df = next(dfs) if (type(dfs) == pd.io.parsers.TextFileReader) else self.df
87 lbl_type = np.float32 if len(self.label_cols) > 1 else np.int64
---> 88 lbls = df[self.label_cols].values.astype(lbl_type) if (len(self.label_cols) > 0) else []
89 self.txt_cols = ifnone(self.txt_cols, list(range(len(self.label_cols),len(df.columns))))
90 texts = f'{FLD} {1} ' + df[self.txt_cols[0]].astype(str)
~\Anaconda3\lib\site-packages\pandas\core\frame.py in __getitem__(self, key)
2680 if isinstance(key, (Series, np.ndarray, Index, list)):
2681 # either boolean or fancy integer index
-> 2682 return self._getitem_array(key)
2683 elif isinstance(key, DataFrame):
2684 return self._getitem_frame(key)
~\Anaconda3\lib\site-packages\pandas\core\frame.py in _getitem_array(self, key)
2724 return self._take(indexer, axis=0)
2725 else:
-> 2726 indexer = self.loc._convert_to_indexer(key, axis=1)
2727 return self._take(indexer, axis=1)
2728
~\Anaconda3\lib\site-packages\pandas\core\indexing.py in _convert_to_indexer(self, obj, axis, is_setter)
1325 if mask.any():
1326 raise KeyError('{mask} not in index'
-> 1327 .format(mask=objarr[mask]))
1328
1329 return com._values_from_object(indexer)
KeyError: '[0] not in index'
and this part of code:
learn = language_model_learner(data_lm, pretrained_model=URLs.WT103, drop_mult=0.5)
learn.fit_one_cycle(1, 1e-2)
Running in this error:
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-37-7b61575a202a> in <module>
----> 1 learn = language_model_learner(data_lm, pretrained_model=URLs.WT103, drop_mult=0.5)
2 learn.fit_one_cycle(1, 1e-2)
NameError: name 'language_model_learner' is not defined
I had issues to install fastai, but I have no clue what's the issue.
Well, your question was 2 months ago and the library went through a lot of changes since then. It seems to me that your first error is because you don't specify a column with labels, self.labels_cols is set to [0] and as a result, it is not in the index of your Dataframe. I believe this behaviour has changed since your post and that today, not specifying label_cols will work as intended.
Concerning your second issue, languague_model_learner used to be call differently before a refactor. Were you on the latest version of fastai when trying to use it. Feel free to test that again with the latest version of fastai and see if you get the same errors.