Search code examples
pythonmachine-learningpytorchhuggingface-transformerslangchain

using llama_index with mac m1


Question #1:

Is there a way of using Mac with M1 CPU and llama_index together?

I cannot pass the bellow assertion:

AssertionError                            Traceback (most recent call last)
<ipython-input-1-f2d62b66882b> in <module>
      6 from transformers import pipeline
      7 
----> 8 class customLLM(LLM):
      9     model_name = "google/flan-t5-large"
     10     pipeline = pipeline("text2text-generation", model=model_name, device=0, model_kwargs={"torch_dtype":torch.bfloat16})

<ipython-input-1-f2d62b66882b> in customLLM()
      8 class customLLM(LLM):
      9     model_name = "google/flan-t5-large"
---> 10     pipeline = pipeline("text2text-generation", model=model_name, device=0, model_kwargs={"torch_dtype":torch.bfloat16})
     11 
     12     def _call(self, prompt, stop=None):

~/Library/Python/3.9/lib/python/site-packages/transformers/pipelines/__init__.py in pipeline(task, model, config, tokenizer, feature_extractor, framework, revision, use_fast, use_auth_token, device, device_map, torch_dtype, trust_remote_code, model_kwargs, pipeline_class, **kwargs)
    868         kwargs["device"] = device
    869 
--> 870     return pipeline_class(model=model, framework=framework, task=task, **kwargs)

~/Library/Python/3.9/lib/python/site-packages/transformers/pipelines/text2text_generation.py in __init__(self, *args, **kwargs)
     63 
     64     def __init__(self, *args, **kwargs):
---> 65         super().__init__(*args, **kwargs)
     66 
     67         self.check_model_type(

~/Library/Python/3.9/lib/python/site-packages/transformers/pipelines/base.py in __init__(self, model, tokenizer, feature_extractor, modelcard, framework, task, args_parser, device, binary_output, **kwargs)
    776         # Special handling
    777         if self.framework == "pt" and self.device.type != "cpu":
--> 778             self.model = self.model.to(self.device)
    779 
    780         # Update config with task specific parameters

~/Library/Python/3.9/lib/python/site-packages/transformers/modeling_utils.py in to(self, *args, **kwargs)
   1680             )
   1681         else:
-> 1682             return super().to(*args, **kwargs)
   1683 
   1684     def half(self, *args):

~/Library/Python/3.9/lib/python/site-packages/torch/nn/modules/module.py in to(self, *args, **kwargs)
   1143             return t.to(device, dtype if t.is_floating_point() or t.is_complex() else None, non_blocking)
   1144 
-> 1145         return self._apply(convert)
   1146 
   1147     def register_full_backward_pre_hook(

~/Library/Python/3.9/lib/python/site-packages/torch/nn/modules/module.py in _apply(self, fn)
    795     def _apply(self, fn):
    796         for module in self.children():
--> 797             module._apply(fn)
    798 
    799         def compute_should_use_set_data(tensor, tensor_applied):

~/Library/Python/3.9/lib/python/site-packages/torch/nn/modules/module.py in _apply(self, fn)
    818             # `with torch.no_grad():`
    819             with torch.no_grad():
--> 820                 param_applied = fn(param)
    821             should_use_set_data = compute_should_use_set_data(param, param_applied)
    822             if should_use_set_data:

~/Library/Python/3.9/lib/python/site-packages/torch/nn/modules/module.py in convert(t)
   1141                 return t.to(device, dtype if t.is_floating_point() or t.is_complex() else None,
   1142                             non_blocking, memory_format=convert_to_format)
-> 1143             return t.to(device, dtype if t.is_floating_point() or t.is_complex() else None, non_blocking)
   1144 
   1145         return self._apply(convert)

~/Library/Python/3.9/lib/python/site-packages/torch/cuda/__init__.py in _lazy_init()
    237                 "multiprocessing, you must use the 'spawn' start method")
    238         if not hasattr(torch._C, '_cuda_getDeviceCount'):
--> 239             raise AssertionError("Torch not compiled with CUDA enabled")
    240         if _cudart is None:
    241             raise AssertionError(

AssertionError: Torch not compiled with CUDA enabled

Obviously I've no Nvidia card, but I've read Pytorch is now supporting Mac M1 as well

I'm trying to run the below example:

from llama_index import SimpleDirectoryReader, LangchainEmbedding, GPTListIndex,GPTSimpleVectorIndex, PromptHelper
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from llama_index import LLMPredictor, ServiceContext
import torch
from langchain.llms.base import LLM
from transformers import pipeline

class customLLM(LLM):
    model_name = "google/flan-t5-large"
    pipeline = pipeline("text2text-generation", model=model_name, device=0, model_kwargs={"torch_dtype":torch.bfloat16})

    def _call(self, prompt, stop=None):
        return self.pipeline(prompt, max_length=9999)[0]["generated_text"]
 
    def _identifying_params(self):
        return {"name_of_model": self.model_name}

    def _llm_type(self):
        return "custom"


llm_predictor = LLMPredictor(llm=customLLM())

Question #2:

Assuming the answer for the above is no - I don't mind using Google Colab with GPU, but once the index will be made, will it be possible to download it and use it on my Mac?

i.e. something like:

on Google Colab:

service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, embed_model=embed_model)
index = GPTSimpleVectorIndex.from_documents(documents, service_context=service_context)
index.save_to_disk('index.json')

... and later on my Mac use load_from_file


Solution

  • Why are you passing device=0? If isinstance(device, int), PyTorch will assume device is the index of a CUDA device, hence the error. Try device="cpu" (or maybe simply removing the device kwarg), and this issue should disappear.