I am uploading GCP dataflow with terraform. My terraform build, deploy and run dataflow
resource "null_resource" "create_env" {
provisioner "local-exec" {
command = "python3 -m venv venv && venv/bin/pip install wheel 'apache-beam[gcp]' sentry-sdk"
}
}
resource "null_resource" "compile_and_upload" {
depends_on = [
null_resource.create_env
]
provisioner "local-exec" {
command = <<EOT
venv/bin/python -m main \
--runner DataflowRunner \
--project ${var.project} \
--staging_location gs://... \
--temp_location gs://... \
--template_location gs://... \
--region ${var.region} \
--sentry-dsn ${var.sentry_dsn}
EOT
}
}
resource "google_dataflow_job" "me_pipeline" {
depends_on = [
null_resource.compile_and_upload
]
name = "my_pipeline"
temp_gcs_location = "gs://..."
template_gcs_path = "gs://..."
on_delete = "drain"
}
This dataflow was created, but int the logs in GCP I see an error:
Error message from worker: generic::unknown: Traceback (most recent call last)...
...
ModuleNotFoundError: No module named 'sentry_sdk'```
Whole error log
Error message from worker: generic::unknown: Traceback (most recent call last): File "/usr/local/lib/python3.8/site-packages/apache_beam/internal/dill_pickler.py",
line 285, in loads return dill.loads(s) File "/usr/local/lib/python3.8/site-packages/dill/_dill.py",
line 275, in loads return load(file, ignore, **kwds) File "/usr/local/lib/python3.8/site-packages/dill/_dill.py",
line 270, in load return Unpickler(file, ignore=ignore, **kwds).load() File "/usr/local/lib/python3.8/site-packages/dill/_dill.py",
line 472, in load obj = StockUnpickler.load(self) File "/usr/local/lib/python3.8/site-packages/dill/_dill.py",
line 827, in _import_module return getattr(__import__(module, None, None, [obj]), obj) ModuleNotFoundError: No module named 'sentry_sdk' During handling of the above exception, another exception occurred: Traceback (most recent call last): File "/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/sdk_worker.py",
line 284, in _execute response = task() File "/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/sdk_worker.py",
line 357, in <lambda> lambda: self.create_worker().do_instruction(request), request) File "/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/sdk_worker.py",
line 597, in do_instruction return getattr(self, request_type)( File "/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/sdk_worker.py",
line 628, in process_bundle bundle_processor = self.bundle_processor_cache.get( File "/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/sdk_worker.py",
line 458, in get processor = bundle_processor.BundleProcessor( File "/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/bundle_processor.py",
line 871, in __init__ self.ops = self.create_execution_tree(self.process_bundle_descriptor) File "/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/bundle_processor.py",
line 924, in create_execution_tree return collections.OrderedDict([( File "/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/bundle_processor.py",
line 927, in <listcomp> get_operation(transform_id))) for transform_id in sorted( File "/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/bundle_processor.py",
line 814, in wrapper result = cache[args] = func(*args) File "/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/bundle_processor.py",
line 906, in get_operation transform_consumers = { File "/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/bundle_processor.py",
line 907, in <dictcomp> tag: [get_operation(op) for op in pcoll_consumers[pcoll_id]] File "/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/bundle_processor.py",
line 907, in <listcomp> tag: [get_operation(op) for op in pcoll_consumers[pcoll_id]] File "/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/bundle_processor.py",
line 814, in wrapper result = cache[args] = func(*args) File "/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/bundle_processor.py",
line 906, in get_operation transform_consumers = { File "/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/bundle_processor.py",
line 907, in <dictcomp> tag: [get_operation(op) for op in pcoll_consumers[pcoll_id]] File "/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/bundle_processor.py",
line 907, in <listcomp> tag: [get_operation(op) for op in pcoll_consumers[pcoll_id]] File "/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/bundle_processor.py",
line 814, in wrapper result = cache[args] = func(*args) File "/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/bundle_processor.py",
line 906, in get_operation transform_consumers = { File "/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/bundle_processor.py",
line 907, in <dictcomp> tag: [get_operation(op) for op in pcoll_consumers[pcoll_id]] File "/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/bundle_processor.py",
line 907, in <listcomp> tag: [get_operation(op) for op in pcoll_consumers[pcoll_id]] File "/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/bundle_processor.py",
line 814, in wrapper result = cache[args] = func(*args) File "/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/bundle_processor.py",
line 906, in get_operation transform_consumers = { File "/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/bundle_processor.py",
line 907, in <dictcomp> tag: [get_operation(op) for op in pcoll_consumers[pcoll_id]] File "/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/bundle_processor.py",
line 907, in <listcomp> tag: [get_operation(op) for op in pcoll_consumers[pcoll_id]] File "/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/bundle_processor.py",
line 814, in wrapper result = cache[args] = func(*args) File "/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/bundle_processor.py",
line 906, in get_operation transform_consumers = { File "/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/bundle_processor.py",
line 907, in <dictcomp> tag: [get_operation(op) for op in pcoll_consumers[pcoll_id]] File "/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/bundle_processor.py",
line 907, in <listcomp> tag: [get_operation(op) for op in pcoll_consumers[pcoll_id]] File "/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/bundle_processor.py",
line 814, in wrapper result = cache[args] = func(*args) File "/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/bundle_processor.py",
line 906, in get_operation transform_consumers = { File "/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/bundle_processor.py",
line 907, in <dictcomp> tag: [get_operation(op) for op in pcoll_consumers[pcoll_id]] File "/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/bundle_processor.py",
line 907, in <listcomp> tag: [get_operation(op) for op in pcoll_consumers[pcoll_id]] File "/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/bundle_processor.py",
line 814, in wrapper result = cache[args] = func(*args) File "/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/bundle_processor.py",
line 906, in get_operation transform_consumers = { File "/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/bundle_processor.py",
line 907, in <dictcomp> tag: [get_operation(op) for op in pcoll_consumers[pcoll_id]] File "/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/bundle_processor.py",
line 907, in <listcomp> tag: [get_operation(op) for op in pcoll_consumers[pcoll_id]] File "/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/bundle_processor.py",
line 814, in wrapper result = cache[args] = func(*args) File "/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/bundle_processor.py",
line 911, in get_operation return transform_factory.create_operation( File "/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/bundle_processor.py",
line 1206, in create_operation return creator(self, transform_id, transform_proto, payload, consumers) File "/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/bundle_processor.py",
line 1553, in create_par_do return _create_pardo_operation( File "/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/bundle_processor.py",
line 1596, in _create_pardo_operation dofn_data = pickler.loads(serialized_fn) File "/usr/local/lib/python3.8/site-packages/apache_beam/internal/pickler.py",
line 51, in loads return desired_pickle_lib.loads( File "/usr/local/lib/python3.8/site-packages/apache_beam/internal/dill_pickler.py",
line 289, in loads return dill.loads(s) File "/usr/local/lib/python3.8/site-packages/dill/_dill.py",
line 275, in loads return load(file, ignore, **kwds) File "/usr/local/lib/python3.8/site-packages/dill/_dill.py",
line 270, in load return Unpickler(file, ignore=ignore, **kwds).load() File "/usr/local/lib/python3.8/site-packages/dill/_dill.py",
line 472, in load obj = StockUnpickler.load(self) File "/usr/local/lib/python3.8/site-packages/dill/_dill.py",
line 827, in _import_module return getattr(__import__(module, None, None, [obj]), obj) ModuleNotFoundError: No module named 'sentry_sdk'
Should I do some extra things to run sentry with dataflow?
You will need to pass --requirements_file myreqs.txt
, where myreqs.txt
contains the sentry_sdk package as well as any other dependencies that you have.
Check out this resource for other tips on managing the Beam dependencies in Python.