Search code examples
pythongoogle-cloud-dataflowsentry

No module named 'sentry_sdk' in Python GCP dataflow uploaded by terraform


I am uploading GCP dataflow with terraform. My terraform build, deploy and run dataflow

resource "null_resource" "create_env" {
  provisioner "local-exec" {
    command = "python3 -m venv venv && venv/bin/pip install wheel 'apache-beam[gcp]' sentry-sdk"
  }
}


resource "null_resource" "compile_and_upload" {
  depends_on = [
    null_resource.create_env
  ]

  provisioner "local-exec" {
    command = <<EOT
      venv/bin/python -m main \
      --runner DataflowRunner \
      --project ${var.project} \
      --staging_location gs://... \
      --temp_location gs://... \
      --template_location gs://... \
      --region ${var.region} \
      --sentry-dsn ${var.sentry_dsn}
    EOT
  }
}


resource "google_dataflow_job" "me_pipeline" {
  depends_on = [
    null_resource.compile_and_upload
  ]

  name = "my_pipeline"
  temp_gcs_location = "gs://..."
  template_gcs_path = "gs://..."

  on_delete = "drain"
}

This dataflow was created, but int the logs in GCP I see an error:

Error message from worker: generic::unknown: Traceback (most recent call last)...
...
ModuleNotFoundError: No module named 'sentry_sdk'```

Whole error log

Error message from worker: generic::unknown: Traceback (most recent call last): File "/usr/local/lib/python3.8/site-packages/apache_beam/internal/dill_pickler.py",
   line 285, in loads return dill.loads(s) File "/usr/local/lib/python3.8/site-packages/dill/_dill.py",
   line 275, in loads return load(file, ignore, **kwds) File "/usr/local/lib/python3.8/site-packages/dill/_dill.py",
   line 270, in load return Unpickler(file, ignore=ignore, **kwds).load() File "/usr/local/lib/python3.8/site-packages/dill/_dill.py",
   line 472, in load obj = StockUnpickler.load(self) File "/usr/local/lib/python3.8/site-packages/dill/_dill.py",
   line 827, in _import_module return getattr(__import__(module, None, None, [obj]), obj) ModuleNotFoundError: No module named 'sentry_sdk' During handling of the above exception, another exception occurred: Traceback (most recent call last): File "/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/sdk_worker.py",
   line 284, in _execute response = task() File "/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/sdk_worker.py",
   line 357, in <lambda> lambda: self.create_worker().do_instruction(request), request) File "/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/sdk_worker.py",
   line 597, in do_instruction return getattr(self, request_type)( File "/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/sdk_worker.py",
   line 628, in process_bundle bundle_processor = self.bundle_processor_cache.get( File "/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/sdk_worker.py",
   line 458, in get processor = bundle_processor.BundleProcessor( File "/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/bundle_processor.py",
   line 871, in __init__ self.ops = self.create_execution_tree(self.process_bundle_descriptor) File "/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/bundle_processor.py",
   line 924, in create_execution_tree return collections.OrderedDict([( File "/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/bundle_processor.py",
   line 927, in <listcomp> get_operation(transform_id))) for transform_id in sorted( File "/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/bundle_processor.py",
   line 814, in wrapper result = cache[args] = func(*args) File "/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/bundle_processor.py",
   line 906, in get_operation transform_consumers = { File "/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/bundle_processor.py",
   line 907, in <dictcomp> tag: [get_operation(op) for op in pcoll_consumers[pcoll_id]] File "/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/bundle_processor.py",
   line 907, in <listcomp> tag: [get_operation(op) for op in pcoll_consumers[pcoll_id]] File "/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/bundle_processor.py",
   line 814, in wrapper result = cache[args] = func(*args) File "/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/bundle_processor.py",
   line 906, in get_operation transform_consumers = { File "/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/bundle_processor.py",
   line 907, in <dictcomp> tag: [get_operation(op) for op in pcoll_consumers[pcoll_id]] File "/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/bundle_processor.py",
   line 907, in <listcomp> tag: [get_operation(op) for op in pcoll_consumers[pcoll_id]] File "/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/bundle_processor.py",
   line 814, in wrapper result = cache[args] = func(*args) File "/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/bundle_processor.py",
   line 906, in get_operation transform_consumers = { File "/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/bundle_processor.py",
   line 907, in <dictcomp> tag: [get_operation(op) for op in pcoll_consumers[pcoll_id]] File "/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/bundle_processor.py",
   line 907, in <listcomp> tag: [get_operation(op) for op in pcoll_consumers[pcoll_id]] File "/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/bundle_processor.py",
   line 814, in wrapper result = cache[args] = func(*args) File "/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/bundle_processor.py",
   line 906, in get_operation transform_consumers = { File "/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/bundle_processor.py",
   line 907, in <dictcomp> tag: [get_operation(op) for op in pcoll_consumers[pcoll_id]] File "/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/bundle_processor.py",
   line 907, in <listcomp> tag: [get_operation(op) for op in pcoll_consumers[pcoll_id]] File "/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/bundle_processor.py",
   line 814, in wrapper result = cache[args] = func(*args) File "/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/bundle_processor.py",
   line 906, in get_operation transform_consumers = { File "/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/bundle_processor.py",
   line 907, in <dictcomp> tag: [get_operation(op) for op in pcoll_consumers[pcoll_id]] File "/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/bundle_processor.py",
   line 907, in <listcomp> tag: [get_operation(op) for op in pcoll_consumers[pcoll_id]] File "/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/bundle_processor.py",
   line 814, in wrapper result = cache[args] = func(*args) File "/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/bundle_processor.py",
   line 906, in get_operation transform_consumers = { File "/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/bundle_processor.py",
   line 907, in <dictcomp> tag: [get_operation(op) for op in pcoll_consumers[pcoll_id]] File "/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/bundle_processor.py",
   line 907, in <listcomp> tag: [get_operation(op) for op in pcoll_consumers[pcoll_id]] File "/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/bundle_processor.py",
   line 814, in wrapper result = cache[args] = func(*args) File "/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/bundle_processor.py",
   line 906, in get_operation transform_consumers = { File "/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/bundle_processor.py",
   line 907, in <dictcomp> tag: [get_operation(op) for op in pcoll_consumers[pcoll_id]] File "/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/bundle_processor.py",
   line 907, in <listcomp> tag: [get_operation(op) for op in pcoll_consumers[pcoll_id]] File "/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/bundle_processor.py",
   line 814, in wrapper result = cache[args] = func(*args) File "/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/bundle_processor.py",
   line 911, in get_operation return transform_factory.create_operation( File "/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/bundle_processor.py",
   line 1206, in create_operation return creator(self, transform_id, transform_proto, payload, consumers) File "/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/bundle_processor.py",
   line 1553, in create_par_do return _create_pardo_operation( File "/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/bundle_processor.py",
   line 1596, in _create_pardo_operation dofn_data = pickler.loads(serialized_fn) File "/usr/local/lib/python3.8/site-packages/apache_beam/internal/pickler.py",
   line 51, in loads return desired_pickle_lib.loads( File "/usr/local/lib/python3.8/site-packages/apache_beam/internal/dill_pickler.py",
   line 289, in loads return dill.loads(s) File "/usr/local/lib/python3.8/site-packages/dill/_dill.py",
   line 275, in loads return load(file, ignore, **kwds) File "/usr/local/lib/python3.8/site-packages/dill/_dill.py",
   line 270, in load return Unpickler(file, ignore=ignore, **kwds).load() File "/usr/local/lib/python3.8/site-packages/dill/_dill.py",
   line 472, in load obj = StockUnpickler.load(self) File "/usr/local/lib/python3.8/site-packages/dill/_dill.py",
   line 827, in _import_module return getattr(__import__(module, None, None, [obj]), obj) ModuleNotFoundError: No module named 'sentry_sdk'

Should I do some extra things to run sentry with dataflow?


Solution

  • You will need to pass --requirements_file myreqs.txt, where myreqs.txt contains the sentry_sdk package as well as any other dependencies that you have.

    Check out this resource for other tips on managing the Beam dependencies in Python.