From c6463efd510c526ae2f1e2c76ded709337a4ed1f Mon Sep 17 00:00:00 2001 From: Victor Olin Date: Thu, 23 Feb 2023 13:00:15 +0100 Subject: [PATCH] Documented the library --- src/GC/include/heap.hpp | 22 ++-- src/GC/lib/heap.cpp | 229 +++++++++++++++++++++++------------- src/GC/tests/extern_lib.cpp | 107 +++++++++++------ src/GC/tests/h_test.cpp | 20 +++- src/GC/todo.md | 34 +----- 5 files changed, 257 insertions(+), 155 deletions(-) diff --git a/src/GC/include/heap.hpp b/src/GC/include/heap.hpp index 575d6f7..30f1a7a 100644 --- a/src/GC/include/heap.hpp +++ b/src/GC/include/heap.hpp @@ -23,6 +23,7 @@ namespace GC { private: + //Private constructor according to the singleton pattern Heap() { m_heap = reinterpret_cast(malloc(HEAP_SIZE)); m_size = 0; @@ -50,8 +51,8 @@ namespace GC { public: - static Heap *the() { - if (m_instance) + static inline Heap *the() { // TODO: make private + if (m_instance) // if m_instance is not a nullptr return m_instance; m_instance = new Heap(); return m_instance; @@ -62,12 +63,19 @@ namespace GC { std::free((char *)m_heap); } - void *alloc(size_t size); - void init(); + /** + * These are the only two functions which are exposed + * as the API for LLVM. At the absolute start of the + * program the developer has to call init() to ensure + * that the address of the topmost stack frame is + * saved as the limit for scanning the stack in collect. + */ + void *alloc(size_t size); // TODO: make static + void init(); // TODO: make static // DEBUG ONLY - void check_init(); - void collect(uint flags); - void print_contents(); + void collect(uint flags); // conditional collection + void check_init(); // print dummy things + void print_contents(); // print dummy things }; } \ No newline at end of file diff --git a/src/GC/lib/heap.cpp b/src/GC/lib/heap.cpp index 58c4966..ea47bc4 100644 --- a/src/GC/lib/heap.cpp +++ b/src/GC/lib/heap.cpp @@ -16,16 +16,8 @@ namespace GC { * Initialises the heap singleton and saves the address * of the calling stack frame as the stack_end. Presumeably * this address points to the stack frame of the compiled - * LLVM executable after linking. (NOT CONFIRMED) + * LLVM executable after linking. */ - void Heap::check_init() { - auto heap = Heap::the(); - cout << "Heap addr:\t" << heap << endl; - cout << "GC m_stack_end:\t" << heap->m_stack_end << endl; - auto stack_start = reinterpret_cast(__builtin_frame_address(0)); - cout << "GC stack_start:\t" << stack_start << endl; - } - void Heap::init() { Heap *heap = Heap::the(); heap->m_stack_end = reinterpret_cast(__builtin_frame_address(1)); @@ -76,6 +68,24 @@ namespace GC { return new_chunk->start; } + /** + * Tries to recycle used and freed chunks that are + * already allocated objects by the OS but freed + * from our Heap. This reduces the amount of GC + * objects slightly which saves time from malloc'ing + * memory from the OS. + * + * @param heap Pointer to the singleton Heap instance + * + * @param size Amount of bytes needed for the object + * which is about to be allocated. + * + * @returns If a chunk is found and recycled, a + * pointer to the allocated memory for + * the object is returned. If not, a + * nullptr is returned to signify no + * chunks were found. + */ uintptr_t *Heap::try_recycle_chunks(Heap *heap, size_t size) { // Check if there are any freed chunks large enough for current request for (size_t i = 0; i < heap->m_freed_chunks.size(); i++) { @@ -107,12 +117,23 @@ namespace GC { return nullptr; } + /** + * Collection phase of the garbage collector. When + * an allocation is requested and there is no space + * left on the heap, a collection is triggered. This + * function is private so that the user cannot trigger + * a collection unneccessarily. + * + * @param heap Heap singleton instance, only for avoiding + * redundant calls to the singleton get + */ void Heap::collect(Heap *heap) { - // Get the adress of the current stack frame - - uintptr_t *stack_end; + // get current stack auto stack_start = reinterpret_cast(__builtin_frame_address(0)); + + // fix this block, it's nästy + uintptr_t *stack_end; if (heap->m_stack_end != nullptr) stack_end = heap->m_stack_end; else @@ -126,6 +147,87 @@ namespace GC { free(heap); } + /** + * Iterates through the stack, if an element on the stack points to a chunk + * that chunk is marked (i.e. reachable). It only marks element which are directly + * reachable from the chunk, so no chain of pointers from the stack are detected. + * If a chunk is marked it is removed from the worklist, since it's no longer of + * concern for this method. + * + * @param start Pointer to the start of the stack frame. + * @param end Pointer to the end of the stack frame. + * @param worklist The currently allocated chunks. + */ + void Heap::mark(uintptr_t *start, const uintptr_t *end, vector worklist) { + int counter = 0; + // To find adresses thats in the worklist + for (; start < end; start++) { + counter++; + // all pointers must be aligned as double words + + for (auto it = worklist.begin(); it != worklist.end();) { + Chunk *chunk = *it; + + auto c_start = reinterpret_cast(chunk->start); + auto c_size = reinterpret_cast(chunk->size); + auto c_end = reinterpret_cast(c_start + c_size); + + cout << "Start points to:\t" << hex << *start << endl; + cout << "Chunk start:\t\t" << hex << c_start << endl; + cout << "Chunk end:\t\t" << hex << c_end << "\n" << endl; + + // Check if the stack pointer aligns with the chunk + if (c_start <= *start && *start < c_end) { + + if (!chunk->marked) { + chunk->marked = true; + it = worklist.erase(it); + } + else { + ++it; + } + } + else { + ++it; + } + } + } + cout << "Counter: " << counter << endl; + } + + /** + * Sweeps the heap, unmarks the marked chunks for the next cycle, + * adds the unmarked nodes to the vector of freed chunks; to be freed. + * + * @param heap Pointer to the heap to oporate on. + */ + void Heap::sweep(Heap *heap) { + for (auto it = heap->m_allocated_chunks.begin(); it != heap->m_allocated_chunks.end();) { + Chunk *chunk = *it; + + // Unmark the marked chunks for the next iteration. + if (chunk->marked) { + chunk->marked = false; + ++it; + } + else { + // Add the unmarked chunks to freed chunks and remove from + // the list of allocated chunks + heap->m_freed_chunks.push_back(chunk); + it = heap->m_allocated_chunks.erase(it); + } + } + } + + /** + * Frees chunks that was moved to the list m_freed_chunks + * by the sweep phase. If there are more than a certain + * amount of free chunks, delete the free chunks to + * avoid cluttering. + * + * @param heap Heap singleton instance, only for avoiding + * redundant calls to the singleton get + */ void Heap::free(Heap *heap) { if (heap->m_freed_chunks.size() > FREE_THRESH) { while (heap->m_freed_chunks.size()) { @@ -134,15 +236,25 @@ namespace GC { delete chunk; } } + // if there are chunks but not more than FREE_THRESH else if (heap->m_freed_chunks.size()) { + // essentially, always check for overlap between + // chunks before finishing the allocation free_overlap(heap); } - // No freed chunks, nothing to free - else { - return; - } } + /** + * Checks for overlaps between freed chunks of memory + * and removes overlapping chunks while prioritizing + * the chunks at lower addresses. + * + * @param heap Heap singleton instance, only for avoiding + * redundant calls to the singleton get + * + * @note Maybe this should be changed to prioritizing + * larger chunks. + */ void Heap::free_overlap(Heap *heap) { std::vector filtered; size_t i = 0; @@ -161,6 +273,25 @@ namespace GC { heap->m_freed_chunks.swap(filtered); } + // ----- ONLY DEBUGGING ----------------------------------------------------------------------- + + /** + * Prints the result of Heap::init() and a dummy value + * for the current stack frame for reference. + */ + void Heap::check_init() { + auto heap = Heap::the(); + cout << "Heap addr:\t" << heap << endl; + cout << "GC m_stack_end:\t" << heap->m_stack_end << endl; + auto stack_start = reinterpret_cast(__builtin_frame_address(0)); + cout << "GC stack_start:\t" << stack_start << endl; + } + + /** + * Conditional collection, only to be used in debugging + * + * @param flags Bitmap of flags + */ void Heap::collect(uint flags) { cout << "DEBUG COLLECT\nFLAGS: "; @@ -200,68 +331,6 @@ namespace GC { } } - /** - * Sweeps the heap, unmarks the marked chunks for the next cycle, - * adds the unmarked nodes to the vector of freed chunks; to be freed. - * - * @param *heap The heap to oporate on. - */ - void Heap::sweep(Heap *heap) { - for (auto it = heap->m_allocated_chunks.begin(); it != heap->m_allocated_chunks.end();) { - Chunk *chunk = *it; - - // Unmark the marked chunks for the next iteration. - if (chunk->marked) { - chunk->marked = false; - ++it; - } - else { - // Add the unmarked chunks to freed chunks and remove from - // the list of allocated chunks - heap->m_freed_chunks.push_back(chunk); - it = heap->m_allocated_chunks.erase(it); - } - } - } - - // This assumes that there are no chains of pointers, will be fixed later on - void Heap::mark(uintptr_t *start, const uintptr_t *end, vector worklist) { - int counter = 0; - // To find adresses thats in the worklist - for (; start < end; start++) { - counter++; - // all pointers must be aligned as double words - - for (auto it = worklist.begin(); it != worklist.end();) { - Chunk *chunk = *it; - if (chunk == nullptr) { - assert(false && "EPIC FAIL"); - } - uintptr_t c_start = reinterpret_cast(chunk->start); - uintptr_t c_end = reinterpret_cast(chunk->start + chunk->size); - // Check if the stack pointer aligns with the chunk - if (c_start <= *start && *start < c_end) { - //if (c_start == *start) { - cout << "Start points to:\t" << hex << *start << endl; - cout << "Chunk start:\t\t" << hex << c_start << endl; - cout << "Chunk end:\t\t" << hex << c_end << "\n" << endl; - - if (!chunk->marked) { - chunk->marked = true; - it = worklist.erase(it); - } - else { - ++it; - } - } - else { - ++it; - } - } - } - cout << "Counter: " << counter << endl; - } - // Mark child references from the root references void mark_test(vector worklist) { while (worklist.size() > 0) { @@ -305,7 +374,7 @@ namespace GC { void Heap::print_contents() { auto heap = Heap::the(); if (heap->m_allocated_chunks.size()) { - cout << "\nALLOCATED CHUNKS #" << heap->m_allocated_chunks.size() << endl; + cout << "\nALLOCATED CHUNKS #" << dec << heap->m_allocated_chunks.size() << endl; for (auto chunk : heap->m_allocated_chunks) { print_line(chunk); } @@ -313,7 +382,7 @@ namespace GC { cout << "NO ALLOCATIONS\n" << endl; } if (heap->m_freed_chunks.size()) { - cout << "\nFREED CHUNKS #" << heap->m_freed_chunks.size() << endl; + cout << "\nFREED CHUNKS #" << dec << heap->m_freed_chunks.size() << endl; for (auto fchunk : heap->m_freed_chunks) { print_line(fchunk); } diff --git a/src/GC/tests/extern_lib.cpp b/src/GC/tests/extern_lib.cpp index 2d47576..9ee3a5b 100644 --- a/src/GC/tests/extern_lib.cpp +++ b/src/GC/tests/extern_lib.cpp @@ -7,40 +7,6 @@ GC::Heap *singleton_test(); void init_gc(GC::Heap *heap); void frame_test(GC::Heap *heap); -GC::Heap *singleton_test() { - std::cout << "TESTING SINGLETON INSTANCES" << std::endl; - std::cout << "===========================" << std::endl; - std::cout << "Call 1:\t" << GC::Heap::the() << std::endl; - GC::Heap *heap = GC::Heap::the(); - std::cout << "Call 2:\t" << heap << std::endl; - std::cout << "===========================" << std::endl; - return heap; -} - -void init_gc(GC::Heap *heap){ - std::cout << "\n\n INITIALIZING THE HEAP" << std::endl; - std::cout << "===========================" << std::endl; - heap->init(); - std::cout << "===========================" << std::endl; -} - -void frame_test(GC::Heap *heap) { - std::cout << "\n\n TESTING FRAME ADDRESSES" << std::endl; - std::cout << "===========================" << std::endl; - -#pragma clang diagnostic ignored "-Wframe-address" - auto curr_frame = reinterpret_cast(__builtin_frame_address(0)); - std::cout << "Current stack frame:\t" << curr_frame << std::endl; -#pragma clang diagnostic ignored "-Wframe-address" - auto prev_frame = reinterpret_cast(__builtin_frame_address(1)); - std::cout << "Previous stack frame:\t" << prev_frame << std::endl; - - heap->check_init(); - // auto alloced = heap->alloc(sizeof(unsigned long)); - - std::cout << "===========================" << std::endl; -} - int main() { std::cout << "in main" << std::endl; auto heap = singleton_test(); @@ -49,4 +15,77 @@ int main() { frame_test(heap); return 0; +} + +/** + * This test is supposed to determine if the singleton pattern + * implementation is working correctly. This test passes if the + * first and second call prints the same memory address. + * + * Result: pass + * + * @return Pointer to the Heap singleton instance +*/ +GC::Heap *singleton_test() { + std::cout << "TESTING SINGLETON INSTANCES" << std::endl; + std::cout << "===========================" << std::endl; + std::cout << "Call 1:\t" << GC::Heap::the() << std::endl; // First call which initializes the singleton instance + GC::Heap *heap = GC::Heap::the(); // Second call which should return the initialized instance + std::cout << "Call 2:\t" << heap << std::endl; + std::cout << "===========================" << std::endl; + return heap; +} + + +/** + * This test calls Heap::init() which saves the stack-frame + * address from the calling function (this function). + * Heap::init() is supposed to be called at the absolute + * start of the program to save the address of the + * topmost stack frame. This test doesn't do anything + * but prepares for the next test(s). + * + * @param heap The Heap pointer to the singleton instance. + * +*/ +void init_gc(GC::Heap *heap){ + std::cout << "\n\n INITIALIZING THE HEAP" << std::endl; + std::cout << "===========================" << std::endl; + heap->init(); + std::cout << "===========================" << std::endl; +} + +/** + * This function tests the functionality of the intrinsic + * function `__builtin_frame_address` which returns the + * address of the corresponding level of stack frame. + * When given a param of 0, it returns the current stack frame. + * When given a param of 1, it returns the previous stack + * frame, and so on. + * + * This test passes on two conditions: + * 1) if the address of the current frame is smaller than + * the address of the previous frame (assumed). + * 2) if the previous frame has the same address as the one + * saved in the Heap instance after running Heap::init(). + * + * Result: pass + * + * @param heap The Heap instance +*/ +void frame_test(GC::Heap *heap) { + std::cout << "\n\n TESTING FRAME ADDRESSES" << std::endl; + std::cout << "===========================" << std::endl; + +#pragma clang diagnostic ignored "-Wframe-address" // clang++ directive to ignore warnings about __b_f_a + auto curr_frame = reinterpret_cast(__builtin_frame_address(0)); // addr of curr stack frame + std::cout << "Current stack frame:\t" << curr_frame << std::endl; +#pragma clang diagnostic ignored "-Wframe-address" + auto prev_frame = reinterpret_cast(__builtin_frame_address(1)); // addr of prev stack frame + std::cout << "Previous stack frame:\t" << prev_frame << std::endl; + + heap->check_init(); // prints the saved absolute top of the stack + // auto alloced = heap->alloc(sizeof(unsigned long)); + + std::cout << "===========================" << std::endl; } \ No newline at end of file diff --git a/src/GC/tests/h_test.cpp b/src/GC/tests/h_test.cpp index 5c263f1..a71c62f 100644 --- a/src/GC/tests/h_test.cpp +++ b/src/GC/tests/h_test.cpp @@ -35,7 +35,7 @@ void detach_pointer(long **ptr) { *ptr = dummy_ptr; } -void test_chain(int depth, bool detach) { +Node *test_chain(int depth, bool detach) { auto stack_start = reinterpret_cast(__builtin_frame_address(0)); std::cout << "Stack start from test_chain:\t" << stack_start << std::endl; @@ -43,6 +43,7 @@ void test_chain(int depth, bool detach) { // This generates a segmentation fault (should be investigated further) if (detach) node_chain->child = nullptr; + return node_chain; } @@ -63,8 +64,8 @@ void test_some_types() { } int main() { - //gc->init(); - //gc->check_init(); + gc->init(); + gc->check_init(); auto stack_start = reinterpret_cast(__builtin_frame_address(0)); std::cout << "Stack start from main:\t" << stack_start << std::endl; @@ -74,13 +75,20 @@ int main() { // long *l = static_cast(gc->alloc(sizeof(long))); // 0x6-0xd | 0x // This is allocated outside of the scope of the GC (if gc->init() isn't called), thus garbage - long *longs[21]; + /* long *longs[21]; std::cout << "Pointer to ints:\t" << longs << std::endl; for (int i = 0; i < 21; i++) { longs[i] = static_cast(gc->alloc(sizeof(long))); - } + } */ + //Node *root; + Node *root = test_chain(3, false); + std::cout << "Adress of root:\t" << &root << std::endl; + std::cout << "Root points to:\t" << root << std::endl; + // 0x7ffdd7556bd8 + int *i = static_cast(gc->alloc(sizeof(int))); + std::cout << "Adress of i:\t" << &i << std::endl; - gc->collect(MARK | SWEEP | FREE); // free misses some chunks + gc->collect(MARK); // free misses some chunks gc->print_contents(); return 0; } \ No newline at end of file diff --git a/src/GC/todo.md b/src/GC/todo.md index 7011610..dba3eee 100644 --- a/src/GC/todo.md +++ b/src/GC/todo.md @@ -1,5 +1,7 @@ # Garbage collection + + ## Project Goal for next week (24/2): @@ -7,34 +9,10 @@ Goal for next week (24/2): ## GC TODO: - Merge to main branch -- Fix singleton references -- Get a good grasp of how the adressing of stack frames actually works. -- Debug "free()", sometimes it skips chunks that should be freed. -- Check alignment of chunks. -- Think about how we want to determine if some object is a pointer or not, probably will have to discuss that with Samuel. Since it is not ideal to determine in the GC if an object is a pointer or not. It should preferably be done in a previous stage. +- Switch std::vector to std::list +- Make alloc and init static, move the() to private +- stack_end, stack_start -> stack_top, stack_bottom +- Double check m_heap_size functionality and when a collection is triggered ## Tests TODO -### Library linking -**Victor fixes this** -Compile the GC lib and a test separately, link them together -and evalutate the following: - __builtin_frame_address(0) - __builtin_frame_address(1) - __builtin_return_address(0) - __builtin_return_address(1) - -### GC Init and __b_f_a -1. Save the first stack fram globally as the stack start -2. For each call to collect, save the prev stack frame as the stack end -3. Scan through the span - - gc_init() - global stack_end = __builtin_frame_address(1) - - collect() - local stack_start = __builtin_frame_address(1) - - sweep() - for all addr in range(stack_end, stack_start) - mark if chunk