I'm currently developing a memory scanning program for a lab assignment, where the goal is to read and analyze the memory map of the process itself, specifically by reading from /proc/self/maps
. The program is designed to parse this file, identify readable memory regions, and then count occurrences of the character 'A' within these regions. This functionality has been implemented and appears to work correctly during normal execution.
However, when I run my program under Valgrind with the options --leak-check=full --track-origins=yes --error-exitcode=1 --show-leak-kinds=all --read-var-info=yes --malloc-fill=0xAA --free-fill=0xFF
, it reports multiple errors related to uninitialized values and invalid reads of memory. Specifically, Valgrind outputs warnings about "Conditional jump or move depends on uninitialised value(s)" and "Invalid read of size 1", indicating that my program might be attempting to read memory beyond allocated regions or using uninitialized memory for decision-making.
Interestingly, Valgrind also reports that there are no memory leaks, which suggests that the issue is not with memory not being freed but rather with how memory is accessed and used during the program's execution. This discrepancy between the program running without apparent issues normally and Valgrind reporting serious errors is puzzling.
The relevant portion of the Valgrind output includes warnings about unhandled DW_OP_ codes, which might hint at a deeper issue related to debugging information or the way memory addresses are interpreted. Additionally, the error message "Address 0x4037000 is 0 bytes after the brk data segment limit 0x4037000" suggests an attempt to read memory right at the boundary of what is allowed, which is consistent with the invalid read errors.
Given these details, I'm seeking guidance on how to diagnose and resolve these Valgrind-reported errors in my memory scanning program. I'm particularly interested in understanding why these uninitialized values and invalid reads are occurring and how to modify my program to eliminate these issues while maintaining its functionality.
Here is my code
/// @file main.c
#include "parser.h"
#include "scanner.h"
#include <stdio.h>
#include <stdlib.h>
int main() {
MemoryRegion *regions = NULL;
int count = 0;
if (parse_maps(®ions, &count) != 0) {
fprintf(stderr, "Error parsing /proc/self/maps\n");
return 1;
}
scan_memory(regions, count);
free(regions); // Clean up
return 0;
}
/// @file parser.c
#include "parser.h"
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int parse_maps(MemoryRegion **regions, int *count) {
assert(regions != NULL);
FILE *file = fopen("/proc/self/maps", "r");
if (!file) {
perror("Failed to open /proc/self/maps");
return -1;
}
char line[496]; // buffer for reading lines
*count = 0;
int capacity = 12; // initial capacity
*regions = calloc(capacity, sizeof(MemoryRegion));
if (!*regions) {
perror("Failed to allocate memory for regions");
fclose(file);
return -1;
}
// loop through each line in the file
while (fgets(line, sizeof(line), file)) {
MemoryRegion region; // temporary region to store the current line
char *ptr = line;
char *endptr;
region.start_addr = strtoul(ptr, &endptr, 16); // parse the start address
if (ptr == endptr) {
continue;
}
ptr = endptr + 1; // skip the '-'
region.end_addr = strtoul(ptr, &endptr, 16); // parse the end address
if (ptr == endptr) {
continue;
}
ptr = endptr + 1; // skip the space
if (sscanf(ptr, "%4s", region.permissions) != 1) {
continue;
}
if (strstr(line, "[vvar]") || region.permissions[0] != 'r') {
continue;
}
// check if needed to reallocate memory
if (*count == capacity) {
capacity *= 2;
*regions = realloc(*regions, capacity * sizeof(MemoryRegion));
if (!*regions) {
perror("Failed to reallocate memory for regions");
fclose(file);
return -1;
}
}
(*regions)[*count] = region; // store the region
(*count)++;
}
fclose(file);
return 0;
}
/// @file parser.h
#ifndef MEMSCAN_PARSER_H
#define MEMSCAN_PARSER_H
#include <stdio.h>
/// @brief MemoryRegion - struct to store memory region information
typedef struct {
unsigned long start_addr;
unsigned long end_addr;
char permissions[5];
} MemoryRegion;
/// @brief parse_maps - parses /proc/self/maps and stores the memory regions
/// @param regions - pointer to an array of MemoryRegion structs
/// @param count - pointer to an integer to store the number of memory regions
/// @return - 0 on success, -1 on failure
int parse_maps(MemoryRegion **regions, int *count);
/// @file scanner.c
#include "scanner.h"
#include "parser.h"
#include <assert.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
int count_a_in_region(unsigned long start, unsigned long end) {
int count = 0;
// Ensure we do not attempt to read the end address
for (char *ptr = (char *)(uintptr_t)start;
ptr < (char *)(uintptr_t)end - 1; ptr++) {
if (*ptr == 'A') {
count++;
}
}
return count;
}
void scan_memory(MemoryRegion *regions, int count) {
assert(regions != NULL);
for (int i = 0; i < count; i++) {
MemoryRegion region = regions[i];
assert(region.end_addr > region.start_addr); // ensure end is greater than start
size_t size = region.end_addr - region.start_addr;
int countA = 0;
if (region.permissions[0] == 'r') {
countA = count_a_in_region(region.start_addr, region.end_addr);
if (countA == -1) {
fprintf(stderr, "Error counting 'A' in region %d\n", i);
continue;
}
}
printf("%d: 0x%lx - 0x%lx %s Number of bytes read [%zu] Number of 'A' is [%d]\n", i, region.start_addr, region.end_addr, region.permissions, size, countA);
}
}
/// @file scanner.h
#ifndef SCANNER_H
#define SCANNER_H
#include "parser.h"
/// @brief count_a_in_region - counts the number of 'A' characters in the specified memory region
/// @param start - the start address of the memory region
/// @param end - the end address of the memory region
/// @return - the number of 'A' characters in the region, or -1 on error
int count_a_in_region(unsigned long start, unsigned long end);
/// @brief scan_memory - scans the memory regions for the specified pattern
/// @param regions - pointer to an array of MemoryRegion structs
/// @param count - the number of memory regions
void scan_memory(MemoryRegion *regions, int count);
#endif //SCANNER_H
I've tried -1 in the count function, it when I get rid of counting a letter, no valgrind errors
Coincidentally I'm working on the code in Valgrind that does the mmap parsing at the moment. This is used for two things - getting the memory mapping of the Valgrind tool itself (Valgrind can't intercept mmap for its own mapping since it's not running) and also for mmap checking if you run with --sanity-level=3
or higher.
warnings about unhandled DW_OP_ codes
Probably an old version of Valgrind. Try to get something more recent.
--leak-check=full --track-origins=yes --error-exitcode=1 --show-leak-kinds=all --read-var-info=yes
You don't need any of those options for what you are doing. They will just make your runs slower. I recommend that you don't use them, or at least only use them after seeing that there is an error.
Are you really looking for 'A' (0x41) or are you looking for 0xAA? I'm not sure as you are using --malloc-fill=0xAA
.
So, why are you seeing problems? Memcheck knows about all of the memory that has been allocated (both in your code and in the startup code before main()). It also knows about the stack. malloc()
doesn't just allocate one bit of memory each time you call it. Rather it uses pools of various power-of-two sizes and sub-allocates from those pools. For large allocations it will use anonymous mmap to get the memory - see the malloc man page. That means two things.
Conditional jump or move depends on uninitialised value(s)
from the "if (*ptr == 'A')"Invalid read of size 1
.Try something like this. This is rather brute force.
#include <memcheck.h>
int count_a_in_region(unsigned long start, unsigned long end) {
int count = 0;
VALGRIND_DISABLE_ADDR_ERROR_REPORTING_IN_RANGE(start, end - start);
// Ensure we do not attempt to read the end address
for (char *ptr = (char *)(uintptr_t)start;
ptr < (char *)(uintptr_t)end - 1; ptr++) {
if (*ptr == 'A') {
count++;
}
}
VALGRIND_ENABLE_ADDR_ERROR_REPORTING_IN_RANGE(start, end - start);
return count;
}
If the above doesn't work, or you want to have more control over what is happening, look at using VALGRIND_CHECK_MEM_IS_ADDRESSABLE
. This will return 0 if the range is addressable. If it is not all addressable it will return the first non-addressable address. You will probably need to use it more than once to find all addressble ranges in an mmap'd region.
If you need to turn off "Conditional jump" errors, try VALGRIND_MAKE_MEM_DEFINED_IF_ADDRESSABLE
or VALGRIND_CHECK_MEM_IS_DEFINED
.