Search code examples
clinuxlinux-kernellinux-device-driverkernel-module

Linux Kernel Block Device Driver: NULL Pointer Dereference When Calling add_disk in module_init


I wrote a very basic block device driver follow the steps here https://linux-kernel-labs.github.io/refs/heads/master/labs/block_device_drivers.html. The module_init function my_block_init is provided below:

static int create_block_device(struct my_block_dev *dev) {
  int err;

  dev->size = NR_SECTORS * KERNEL_SECTOR_SIZE;
  dev->data = vmalloc(dev->size);
  if (dev->data == NULL) {
    printk(KERN_ERR "vmalloc: out of memory\n");
    err = -ENOMEM;
    goto out_vmalloc;
  }

  /* Initialize tag set. */
  dev->tag_set.ops = &my_queue_ops;
  dev->tag_set.nr_hw_queues = 1;
  dev->tag_set.queue_depth = 128;
  dev->tag_set.numa_node = NUMA_NO_NODE;
  dev->tag_set.cmd_size = 0;
  dev->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
  err = blk_mq_alloc_tag_set(&dev->tag_set);
  if (err) {
    printk(KERN_ERR "blk_mq_alloc_tag_set: can't allocate tag set\n");
    goto out_alloc_tag_set;
  }

  /* Allocate queue. */
  dev->queue = blk_mq_init_queue(&dev->tag_set);
  if (IS_ERR(dev->queue)) {
    printk(KERN_ERR "blk_mq_init_queue: out of memory\n");
    err = -ENOMEM;
    goto out_blk_init;
  }
  blk_queue_logical_block_size(dev->queue, KERNEL_SECTOR_SIZE);
  dev->queue->queuedata = dev;

  /* initialize the gendisk structure */
  dev->gd = blk_alloc_disk(NUMA_NO_NODE);
  if (!dev->gd) {
    printk(KERN_ERR "alloc_disk: failure\n");
    err = -ENOMEM;
    goto out_alloc_disk;
  }
  
  dev->gd->major = MY_BLOCK_MAJOR;
  dev->gd->minors = 1;
  dev->gd->first_minor = 0;
  dev->gd->fops = &my_block_ops;
  dev->gd->queue = dev->queue;
  dev->gd->private_data = dev;
  snprintf(dev->gd->disk_name, DISK_NAME_LEN, "myblock");
  set_capacity(dev->gd, NR_SECTORS);

  if (add_disk(dev->gd)) {
    err = -ENOMEM;
    goto out_alloc_disk;
  }

  return 0;

out_alloc_disk:
  blk_put_queue(dev->queue);
out_blk_init:
  blk_mq_free_tag_set(&dev->tag_set);
out_alloc_tag_set:
  vfree(dev->data);
out_vmalloc:
  return err;
}

static int my_block_init(void) {
  int status;

  status = register_blkdev(MY_BLOCK_MAJOR, MY_BLKDEV_NAME);
  if (status < 0) {
    printk(KERN_ERR "unable to register mybdev block device\n");
    return -EBUSY;
  }
  printk(KERN_INFO
         "Block device with major(%d) and name(%s) successfully created\n",
         MY_BLOCK_MAJOR, MY_BLKDEV_NAME);
  status = create_block_device(&dev);
  if (status < 0) {
    printk(KERN_ERR "unable to create block device\n");
    return -EBUSY;
  }

  return 0;
}

After I compiled and executed insmod on the ko module file, the insmod process got killed and exited.

Then I use the dmesg | tail -100 checked the logs and it looks like there is a NULL pointer dereference when calling the add_disk function:

[  108.621987] Block device with major(240) and name(mybdev) successfully created
[  108.624629] BUG: kernel NULL pointer dereference, address: 0000000000000264
[  108.624637] #PF: supervisor read access in kernel mode
[  108.624639] #PF: error_code(0x0000) - not-present page
[  108.624641] PGD 0 P4D 0 
[  108.624643] Oops: 0000 [#1] PREEMPT SMP PTI
[  108.624646] CPU: 0 PID: 2767 Comm: insmod Tainted: G           OE      6.2.0-33-generic #33~22.04.1-Ubuntu
[  108.624648] Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 11/12/2020
[  108.624651] RIP: 0010:kobject_get+0xe/0x90
[  108.624658] Code: c2 a8 68 05 b9 eb d2 0f 1f 44 00 00 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 55 48 89 e5 41 54 49 89 fc 48 85 ff 74 22 <f6> 47 3c 01 74 2f 49 8d 7c 24 38 b8 01 00 00 00 f0 41 0f c1 44 24
[  108.624659] RSP: 0018:ffffbef08649fa68 EFLAGS: 00010206
[  108.624661] RAX: ffffffffb8f920c4 RBX: 0000000000000228 RCX: 0000000000000000
[  108.624662] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000228
[  108.624663] RBP: ffffbef08649fa70 R08: 0000000000000000 R09: 0000000000000000
[  108.624663] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000228
[  108.624664] R13: 0000000000000000 R14: ffff967146f53400 R15: ffff967146f53410
[  108.624665] FS:  00007f80b1aaa000(0000) GS:ffff967179e00000(0000) knlGS:0000000000000000
[  108.624666] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  108.624667] CR2: 0000000000000264 CR3: 0000000056e9c003 CR4: 0000000000370ef0
[  108.624670] Call Trace:
[  108.624672]  <TASK>
[  108.624676]  ? show_regs+0x72/0x90
[  108.624681]  ? __die+0x25/0x80
[  108.624682]  ? page_fault_oops+0x79/0x190
[  108.624685]  ? mod_memcg_lruvec_state+0x2b/0x60
[  108.624689]  ? mod_objcg_state+0x1ad/0x2e0
[  108.624692]  ? do_user_addr_fault+0x30c/0x640
[  108.624693]  ? exc_page_fault+0x81/0x1b0
[  108.624698]  ? asm_exc_page_fault+0x27/0x30
[  108.624702]  ? kobject_get+0xe/0x90
[  108.624704]  kobject_add_internal+0x35/0x310
[  108.624706]  kobject_add+0x7a/0xf0
[  108.624709]  elv_register_queue+0x3a/0xa0
[  108.624712]  blk_register_queue+0xf2/0x220
[  108.624715]  device_add_disk+0x249/0x400
[  108.624722]  ? __pfx_init_module+0x10/0x10 [mybdev]
[  108.624726]  my_block_init+0x193/0xec0 [mybdev]
[  108.624729]  do_one_initcall+0x46/0x240
[  108.624733]  ? kmalloc_trace+0x2a/0xb0
[  108.624736]  do_init_module+0x52/0x240
[  108.624739]  load_module+0xb96/0xd60
[  108.624741]  ? kernel_read_file+0x25c/0x2b0
[  108.624746]  __do_sys_finit_module+0xcc/0x150
[  108.624748]  ? __do_sys_finit_module+0xcc/0x150
[  108.624750]  __x64_sys_finit_module+0x18/0x30
[  108.624752]  do_syscall_64+0x59/0x90
[  108.624755]  ? ksys_mmap_pgoff+0x123/0x270
[  108.624759]  ? exit_to_user_mode_prepare+0x3b/0xd0
[  108.624761]  ? syscall_exit_to_user_mode+0x38/0x60
[  108.624762]  ? do_syscall_64+0x69/0x90
[  108.624764]  ? syscall_exit_to_user_mode+0x38/0x60
[  108.624766]  ? do_syscall_64+0x69/0x90
[  108.624767]  ? do_syscall_64+0x69/0x90
[  108.624769]  ? do_syscall_64+0x69/0x90
[  108.624771]  entry_SYSCALL_64_after_hwframe+0x72/0xdc
[  108.624772] RIP: 0033:0x7f80b131ea3d

Add the exception happens inside elv_register_queue with the source code below:

int elv_register_queue(struct request_queue *q, bool uevent)
{
    struct elevator_queue *e = q->elevator;
    int error;

    lockdep_assert_held(&q->sysfs_lock);

    error = kobject_add(&e->kobj, &q->disk->queue_kobj, "iosched");
    if (!error) {
        struct elv_fs_entry *attr = e->type->elevator_attrs;
        if (attr) {
            while (attr->attr.name) {
                if (sysfs_create_file(&e->kobj, &attr->attr))
                    break;
                attr++;
            }
        }
        if (uevent)
            kobject_uevent(&e->kobj, KOBJ_ADD);

        set_bit(ELEVATOR_FLAG_REGISTERED, &e->flags);
    }
    return error;
}

After looked up several sources I couldn't find out what was uninitialized and generates the exception. Is there anyone familiar with this part of kernel or is there a better way to approach this problem?

Kernel Version: v6.2.0


Solution

  • I got around this issue by checking the source code of block/genhd.c and discovered that the call to blk_alloc_disk allocates a request_queue for the provided gendisk, so I removed my own initialization of a request_queue and the code finally worked through.

      dev->gd = blk_alloc_disk(NUMA_NO_NODE);
    
      dev->queue = dev->gd->queue; // use the queue allocated during blk_alloc_disk