diff --git a/.gitignore b/.gitignore index 8d1bad3..1daddd6 100644 --- a/.gitignore +++ b/.gitignore @@ -3,5 +3,12 @@ dist-newstyle *.x *.bak src/Grammar + language llvm.ll +/language +.vscode/ + +src/GC/lib/*.o +src/GC/lib/*.so +src/GC/tests/*.out diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..8057192 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,77 @@ +{ + "files.associations": { + "array": "cpp", + "bitset": "cpp", + "string_view": "cpp", + "initializer_list": "cpp", + "ranges": "cpp", + "span": "cpp", + "utility": "cpp", + "__hash_table": "cpp", + "__split_buffer": "cpp", + "deque": "cpp", + "queue": "cpp", + "string": "cpp", + "unordered_map": "cpp", + "vector": "cpp", + "atomic": "cpp", + "bit": "cpp", + "*.tcc": "cpp", + "cctype": "cpp", + "charconv": "cpp", + "chrono": "cpp", + "clocale": "cpp", + "cmath": "cpp", + "compare": "cpp", + "concepts": "cpp", + "condition_variable": "cpp", + "cstdarg": "cpp", + "cstddef": "cpp", + "cstdint": "cpp", + "cstdio": "cpp", + "cstdlib": "cpp", + "cstring": "cpp", + "ctime": "cpp", + "cwchar": "cpp", + "cwctype": "cpp", + "exception": "cpp", + "algorithm": "cpp", + "functional": "cpp", + "iterator": "cpp", + "memory": "cpp", + "memory_resource": "cpp", + "numeric": "cpp", + "optional": "cpp", + "random": "cpp", + "ratio": "cpp", + "system_error": "cpp", + "tuple": "cpp", + "type_traits": "cpp", + "iosfwd": "cpp", + "iostream": "cpp", + "istream": "cpp", + "limits": "cpp", + "mutex": "cpp", + "new": "cpp", + "ostream": "cpp", + "sstream": "cpp", + "stdexcept": "cpp", + "stop_token": "cpp", + "streambuf": "cpp", + "thread": "cpp", + "typeinfo": "cpp", + "variant": "cpp", + "__bit_reference": "cpp", + "__config": "cpp", + "__debug": "cpp", + "__errc": "cpp", + "__locale": "cpp", + "__mutex_base": "cpp", + "__node_handle": "cpp", + "__threading_support": "cpp", + "__verbose_abort": "cpp", + "ios": "cpp", + "locale": "cpp", + "semaphore": "cpp" + } +} \ No newline at end of file diff --git a/src/Collector.cpp b/src/Collector.cpp new file mode 100644 index 0000000..ac1229c --- /dev/null +++ b/src/Collector.cpp @@ -0,0 +1,6 @@ +#include + +int main() { + std::cout << "i am garbage"; + return 0; +} \ No newline at end of file diff --git a/src/GC/Makefile b/src/GC/Makefile new file mode 100644 index 0000000..92b02e8 --- /dev/null +++ b/src/GC/Makefile @@ -0,0 +1,44 @@ +CC = clang++ +CWD = $(shell pwd) +LIB_INCL = -I$(CWD)/include +LIB_SO = -L$(CWD)/lib +LIB_LINK = $(CWD)/lib +CFLAGS = -Wall -Wextra -v -g -std=gnu++20 -stdlib=libc++ -I +VGFLAGS = --leak-check=full --show-leak-kinds=all +STDFLAGS = -std=gnu++20 -stdlib=libc++ +WFLAGS = -Wall -Wextra +DBGFLAGS = -g + +advance: + $(CC) $(WFLAGS) $(STDFLAGS) tests/advance.cpp -o tests/advance.out + +heap: + $(CC) $(WFLAGS) $(STDFLAGS) $(LIB_INCL) lib/heap.cpp + +h_test: + rm -f tests/h_test.out + $(CC) $(WFLAGS) $(STDFLAGS) $(LIB_INCL) tests/h_test.cpp lib/heap.cpp -o tests/h_test.out + +h_test_vg: + make h_test + valgrind $(VGFLAGS) tests/h_test.out + +h_test_dbg: + make h_test + lldb tests/h_test.out launch + +linker: + rm -f tests/linker.out + $(CC) $(WFLAGS) $(STDFLAGS) $(LIB_INCL) tests/linker.cpp lib/heap.cpp -o tests/linker.out + +linker_vg: + make linker + valgrind $(VGFLAGS) tests/linker.out + +extern_lib: + rm -f lib/heap.o lib/libheap.so tests/extern_lib.out + $(CC) $(STDFLAGS) -c -fPIC -o lib/heap.o lib/heap.cpp + $(CC) $(STDFLAGS) -shared -o lib/libheap.so lib/heap.o + $(CC) $(STDFLAGS) $(WFLAGS) $(LIB_INCL) -v tests/extern_lib.cpp lib/heap.cpp -o tests/extern_lib.out + $(CC) $(STDFLAGS) $(LIB_INCL) $(LIB_SO) -v -Wall -o tests/extern_lib.out tests/extern_lib.cpp -lheap + LD_LIBRARY_PATH=$(LIB_LINK) tests/extern_lib.out \ No newline at end of file diff --git a/src/GC/docs/heap.md b/src/GC/docs/heap.md new file mode 100644 index 0000000..5db98e6 --- /dev/null +++ b/src/GC/docs/heap.md @@ -0,0 +1,47 @@ +## Heap Documentation + +### Algorithm notes + + void mark_test(vector worklist) { + while (worklist.size() > 0) { + Chunk *ref = worklist.pop_back(); + Chunk *child = (Chunk*) *ref; + if (child != NULL && !child->marked) { + child->marked = true; + worklist.push_back(child); + mark_test(worklist); + } + } + } + + void mark_from_roots(uintptr_t *start, const uintptr_t *end) { + vector worklist; + for (;start > end; start--) { + Chunk *ref = *start; + if (ref != NULL && !ref->marked) { + ref->marked = true; + worklist.push_back(ref); + mark_test(worklist); + } + } + } + +Alternative marking, pseudocode + + mark_from_roots(): + worklist <- empty + for fld in Roots + ref <- *fld + if ref ≠ null && !marked(ref) + set_marked(ref) + worklist.add(ref) + mark() + + mark(): + while size(worklist) > 0 + ref <- remove_first(worklist) + for fld in Pointers(ref) + child <- *fld + if child ≠ null && !marked(child) + set_marked(child) + worklist.add(child) \ No newline at end of file diff --git a/src/GC/include/chunk.hpp b/src/GC/include/chunk.hpp new file mode 100644 index 0000000..7aa4fb7 --- /dev/null +++ b/src/GC/include/chunk.hpp @@ -0,0 +1,15 @@ +#pragma once + +#include + +#define CHUNK_LIST_CAP 1024 + +namespace GC { + + struct Chunk { + bool marked; + uintptr_t *start; + size_t size; + }; + +} \ No newline at end of file diff --git a/src/GC/include/heap.hpp b/src/GC/include/heap.hpp new file mode 100644 index 0000000..c70ee54 --- /dev/null +++ b/src/GC/include/heap.hpp @@ -0,0 +1,90 @@ +#pragma once + +#include +#include +#include +#include +#include + +#include "chunk.hpp" + +#define HEAP_SIZE 65536 + +#define MARK (uint) 0x1 +#define SWEEP (uint) 0x2 +#define FREE (uint) 0x4 +#define COLLECT_ALL (uint) 0x7 + +#define FREE_THRESH (uint) 20 + +namespace GC { + + class Heap { + + private: + + //Private constructor according to the singleton pattern + Heap() { + m_heap = reinterpret_cast(malloc(HEAP_SIZE)); + m_size = 0; + m_allocated_size = 0; + } + + // BEWARE only for testing, this should be adressed + ~Heap() { + std::free((char *)m_heap); + } + + static inline Heap *the() { // TODO: make private + if (m_instance) // if m_instance is not a nullptr + return m_instance; + m_instance = new Heap(); + return m_instance; + } + + static inline Chunk *getAt(std::list list, size_t n) { + auto iter = list.begin(); + if (!n) + return *iter; + std::advance(iter, n); + return *iter; + } + + void collect(); + void sweep(Heap *heap); + uintptr_t *try_recycle_chunks(size_t size); + void free(Heap* heap); + void free_overlap(Heap *heap); + void mark(uintptr_t *start, const uintptr_t *end, std::list worklist); + void print_line(Chunk *chunk); + void print_worklist(std::list list); + + inline static Heap *m_instance = nullptr; + const char *m_heap; + size_t m_size; + size_t m_allocated_size; + uintptr_t *m_stack_top = nullptr; + + // maybe change to std::list + std::list m_allocated_chunks; + std::list m_freed_chunks; + + public: + + /** + * These are the only two functions which are exposed + * as the API for LLVM. At the absolute start of the + * program the developer has to call init() to ensure + * that the address of the topmost stack frame is + * saved as the limit for scanning the stack in collect. + */ + static void init(); // TODO: make static + static void dispose(); // -||- + static void *alloc(size_t size); // -||- + + // DEBUG ONLY + void collect(uint flags); // conditional collection + void check_init(); // print dummy things + void print_contents(); // print dummy things + }; +} \ No newline at end of file diff --git a/src/GC/lib/heap.cpp b/src/GC/lib/heap.cpp new file mode 100644 index 0000000..c17f680 --- /dev/null +++ b/src/GC/lib/heap.cpp @@ -0,0 +1,412 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../include/heap.hpp" +using namespace std; + +namespace GC { + + /** + * Initialises the heap singleton and saves the address + * of the calling stack frame as the stack_top. Presumeably + * this address points to the stack frame of the compiled + * LLVM executable after linking. + */ + void Heap::init() { + Heap *heap = Heap::the(); + heap->m_stack_top = reinterpret_cast(__builtin_frame_address(1)); + } + + /** + * Disposes the heap at program exit. + */ + void Heap::dispose() { + Heap *heap = Heap::the(); + delete heap; + } + + /** + * Allocates a given amount of bytes on the heap. + * + * @param size The amount of bytes to be allocated. + * + * @return A pointer to the address where the memory + * has been allocated. This pointer is supposed + * to be casted to and object pointer. + */ + void *Heap::alloc(size_t size) { + + // Singleton + Heap *heap = Heap::the(); + + if (size < 0) { + cout << "Heap: Cannot alloc less than 0B. No bytes allocated." << endl; + return nullptr; + } + + if (heap->m_size + size > HEAP_SIZE) { + heap->collect(); + // If collect failed, crash with OOM error + assert(heap->m_size + size <= HEAP_SIZE && "Heap: Out Of Memory"); + } + + // If a chunk was recycled, return the old chunk address + uintptr_t *reused_chunk = heap->try_recycle_chunks(size); + if (reused_chunk != nullptr) { + return (void *)reused_chunk; + } + + // If no free chunks was found (reused_chunk is a nullptr), + // then create a new chunk + auto new_chunk = new Chunk; + new_chunk->size = size; + new_chunk->start = (uintptr_t *)(heap->m_heap + heap->m_size); + + heap->m_size += size; + + heap->m_allocated_chunks.push_back(new_chunk); + + // new_chunk should probably be a unique pointer, if that isn't implicit already + return new_chunk->start; + } + + /** + * Tries to recycle used and freed chunks that are + * already allocated objects by the OS but freed + * from our Heap. This reduces the amount of GC + * objects slightly which saves time from malloc'ing + * memory from the OS. + * + * @param size Amount of bytes needed for the object + * which is about to be allocated. + * + * @returns If a chunk is found and recycled, a + * pointer to the allocated memory for + * the object is returned. If not, a + * nullptr is returned to signify no + * chunks were found. + */ + uintptr_t *Heap::try_recycle_chunks(size_t size) { + auto heap = Heap::the(); + // Check if there are any freed chunks large enough for current request + for (size_t i = 0; i < heap->m_freed_chunks.size(); i++) { + // auto cp = heap->m_freed_chunks.at(i); + auto cp = getAt(heap->m_freed_chunks, i); + auto iter = heap->m_freed_chunks.begin(); + advance(iter, i); + if (cp->size > size) + { + // Split the chunk, use one part and add the remaining part to + // the list of freed chunks + size_t diff = cp->size - size; + + auto chunk_complement = new Chunk; + chunk_complement->size = diff; + chunk_complement->start = cp->start + cp->size; + + heap->m_freed_chunks.erase(iter); + heap->m_freed_chunks.push_back(chunk_complement); + heap->m_allocated_chunks.push_back(cp); + + return cp->start; + } + else if (cp->size == size) + { + // Reuse the whole chunk + heap->m_freed_chunks.erase(iter); + heap->m_allocated_chunks.push_back(cp); + return cp->start; + } + } + return nullptr; + } + + /** + * Collection phase of the garbage collector. When + * an allocation is requested and there is no space + * left on the heap, a collection is triggered. This + * function is private so that the user cannot trigger + * a collection unneccessarily. + */ + void Heap::collect() { + // Get instance + auto heap = Heap::the(); + + // get current stack + auto stack_bottom = reinterpret_cast(__builtin_frame_address(0)); + + // fix this block, it's nästy + uintptr_t *stack_top; + if (heap->m_stack_top != nullptr) + stack_top = heap->m_stack_top; + else + stack_top = (uintptr_t *)0; // temporary + + auto work_list = heap->m_allocated_chunks; + mark(stack_bottom, stack_top, work_list); + + sweep(heap); + + free(heap); + } + + /** + * Iterates through the stack, if an element on the stack points to a chunk, + * called a root chunk, that chunk is marked (i.e. reachable). + * Then it recursively follows all chunks which are possibly reachable from + * the root chunk and mark those chunks. + * If a chunk is marked it is removed from the worklist, since it's no longer of + * concern for this method. + * + * @param start Pointer to the start of the stack frame. + * @param end Pointer to the end of the stack frame. + * @param worklist The currently allocated chunks, which haven't been marked. + */ + void Heap::mark(uintptr_t *start, const uintptr_t *end, list worklist) { + int counter = 0; + // To find adresses thats in the worklist + for (; start < end; start++) { + counter++; + auto it = worklist.begin(); + auto stop = worklist.end(); + // for (auto it = worklist.begin(); it != worklist.end();) { + while (it != stop) { + Chunk *chunk = *it; + + auto c_start = reinterpret_cast(chunk->start); + auto c_size = reinterpret_cast(chunk->size); + auto c_end = reinterpret_cast(c_start + c_size); + + cout << "Start points to:\t" << hex << *start << endl; + cout << "Chunk start:\t\t" << hex << c_start << endl; + cout << "Chunk end:\t\t" << hex << c_end << "\n" << endl; + + // Check if the stack pointer aligns with the chunk + if (c_start <= *start && *start < c_end) { + + if (!chunk->marked) { + chunk->marked = true; + // Remove the marked chunk from the worklist + it = worklist.erase(it); + // Recursively call mark, to see if the reachable chunk further points to another chunk + mark((uintptr_t*) c_start, (uintptr_t*) c_end, worklist); + } + else { + ++it; + } + } + else { + ++it; + } + } + } + cout << "Counter: " << counter << endl; + } + + /** + * Sweeps the heap, unmarks the marked chunks for the next cycle, + * adds the unmarked nodes to the list of freed chunks; to be freed. + * + * @param heap Pointer to the heap singleton instance. + */ + void Heap::sweep(Heap *heap) { + auto iter = heap->m_allocated_chunks.begin(); + auto stop = heap->m_allocated_chunks.end(); + // for (auto it = heap->m_allocated_chunks.begin(); it != heap->m_allocated_chunks.end();) { + while (iter != stop) { + Chunk *chunk = *iter; + + // Unmark the marked chunks for the next iteration. + if (chunk->marked) { + chunk->marked = false; + ++iter; + } + else { + // Add the unmarked chunks to freed chunks and remove from + // the list of allocated chunks + heap->m_freed_chunks.push_back(chunk); + iter = heap->m_allocated_chunks.erase(iter); + } + } + } + + /** + * Frees chunks that was moved to the list m_freed_chunks + * by the sweep phase. If there are more than a certain + * amount of free chunks, delete the free chunks to + * avoid cluttering. + * + * @param heap Heap singleton instance, only for avoiding + * redundant calls to the singleton get + */ + void Heap::free(Heap *heap) { + if (heap->m_freed_chunks.size() > FREE_THRESH) { + while (heap->m_freed_chunks.size()) { + auto chunk = heap->m_freed_chunks.back(); + heap->m_freed_chunks.pop_back(); + delete chunk; + } + } + // if there are chunks but not more than FREE_THRESH + else if (heap->m_freed_chunks.size()) { + // essentially, always check for overlap between + // chunks before finishing the allocation + free_overlap(heap); + } + } + + /** + * Checks for overlaps between freed chunks of memory + * and removes overlapping chunks while prioritizing + * the chunks at lower addresses. + * + * @param heap Heap singleton instance, only for avoiding + * redundant calls to the singleton get + * + * @note Maybe this should be changed to prioritizing + * larger chunks. + */ + void Heap::free_overlap(Heap *heap) { + std::list filtered; + size_t i = 0; + // filtered.push_back(heap->m_freed_chunks.at(i++)); + filtered.push_back(getAt(heap->m_freed_chunks, i++)); + cout << filtered.back()->start << endl; + for (; i < heap->m_freed_chunks.size(); i++) { + auto prev = filtered.back(); + // auto next = heap->m_freed_chunks.at(i); + auto next = getAt(heap->m_freed_chunks, i); + auto p_start = (uintptr_t)(prev->start); + auto p_size = (uintptr_t)(prev->size); + auto n_start = (uintptr_t)(next->start); + if (n_start >= (p_start + p_size)) { + filtered.push_back(next); + } + } + heap->m_freed_chunks.swap(filtered); + } + + // ----- ONLY DEBUGGING ----------------------------------------------------------------------- + + /** + * Prints the result of Heap::init() and a dummy value + * for the current stack frame for reference. + */ + void Heap::check_init() { + auto heap = Heap::the(); + cout << "Heap addr:\t" << heap << endl; + cout << "GC m_stack_top:\t" << heap->m_stack_top << endl; + auto stack_bottom = reinterpret_cast(__builtin_frame_address(0)); + cout << "GC stack_bottom:\t" << stack_bottom << endl; + } + + /** + * Conditional collection, only to be used in debugging + * + * @param flags Bitmap of flags + */ + void Heap::collect(uint flags) { + + cout << "DEBUG COLLECT\nFLAGS: "; + if (flags & MARK) + cout << "\n - MARK"; + if (flags & SWEEP) + cout << "\n - SWEEP"; + if (flags & FREE) + cout << "\n - FREE"; + cout << endl; + + auto heap = Heap::the(); + + // get the frame adress, whwere local variables and saved registers are located + auto stack_bottom = reinterpret_cast(__builtin_frame_address(0)); + cout << "Stack bottom in collect:\t" << stack_bottom << endl; + uintptr_t *stack_top; + + if (heap->m_stack_top != nullptr) + stack_top = heap->m_stack_top; + else + stack_top = (uintptr_t *) stack_bottom + 80; // dummy value + + cout << "Stack end in collect:\t " << stack_top << endl; + auto work_list = heap->m_allocated_chunks; + + if (flags & MARK) { + mark(stack_bottom, stack_top, work_list); + } + + if (flags & SWEEP) { + sweep(heap); + } + + if (flags & FREE) { + free(heap); + } + } + + // Mark child references from the root references + void mark_test(vector worklist) { + while (worklist.size() > 0) { + Chunk *ref = worklist.back(); + worklist.pop_back(); + Chunk *child = (Chunk*) ref; // this is probably not correct + if (child != nullptr && !child->marked) { + child->marked = true; + worklist.push_back(child); + mark_test(worklist); + } + } + } + + // Mark the root references and look for child references to them + void mark_from_roots(uintptr_t *start, const uintptr_t *end) { + vector worklist; + for (;start > end; start --) { + if (*start % 8 == 0) { // all pointers must be aligned as double words + Chunk *ref = (Chunk*) *start; + if (ref != nullptr && !ref->marked) { + ref->marked = true; + worklist.push_back(ref); + mark_test(worklist); + } + } + } + } + + // For testing purposes + void Heap::print_line(Chunk *chunk) { + cout << "Marked: " << chunk->marked << "\nStart adr: " << chunk->start << "\nSize: " << chunk->size << " B\n" << endl; + } + + void Heap::print_worklist(std::list list) { + for (auto cp : list) { + cout << "Chunk at:\t" << cp->start << "\nSize:\t\t" << cp->size << endl; + } + } + + void Heap::print_contents() { + auto heap = Heap::the(); + if (heap->m_allocated_chunks.size()) { + cout << "\nALLOCATED CHUNKS #" << dec << heap->m_allocated_chunks.size() << endl; + for (auto chunk : heap->m_allocated_chunks) { + print_line(chunk); + } + } else { + cout << "NO ALLOCATIONS\n" << endl; + } + if (heap->m_freed_chunks.size()) { + cout << "\nFREED CHUNKS #" << dec << heap->m_freed_chunks.size() << endl; + for (auto fchunk : heap->m_freed_chunks) { + print_line(fchunk); + } + } else { + cout << "NO FREED CHUNKS" << endl; + } + } +} \ No newline at end of file diff --git a/src/GC/tests/advance.cpp b/src/GC/tests/advance.cpp new file mode 100644 index 0000000..0a8a177 --- /dev/null +++ b/src/GC/tests/advance.cpp @@ -0,0 +1,34 @@ +#include +#include +#include + +using namespace std; + +int main() { + list l; + char c = 'a'; + for (int i = 1; i <= 5; i++) { + l.push_back(c++); + } + + auto iter = l.begin(); + auto stop = l.end(); + + while (iter != stop) { + cout << *iter << " "; + + iter++; + } + cout << endl; + iter = l.begin(); + while (*iter != *stop) { + cout << *iter << " "; + iter++; + } + cout << endl; + + cout << "rebased" << endl; + // cout << "iter: " << *iter << "\nstop: " << *stop << endl; + + return 0; +} \ No newline at end of file diff --git a/src/GC/tests/alloc_free.cpp b/src/GC/tests/alloc_free.cpp new file mode 100644 index 0000000..0e277dc --- /dev/null +++ b/src/GC/tests/alloc_free.cpp @@ -0,0 +1,29 @@ +#include + +#include "heap.hpp" + +struct Obj { + int a; + int b; + int c; +}; + +int main() { + GC::Heap *heap = GC::Heap::the2(); + Obj *obj; + + for (int i = 0; i < 4; i++) { + obj = static_cast(heap->alloc(sizeof(Obj))); + obj->a = i * i + 1; + obj->b = i * i + 2; + obj->c = i * i + 3; + } + + // heap->force_collect(); + + std::cout << obj->a << ", " << obj->b << ", " << obj->c << std::endl; + + //delete heap; + + return 0; +} \ No newline at end of file diff --git a/src/GC/tests/extern_lib.cpp b/src/GC/tests/extern_lib.cpp new file mode 100644 index 0000000..9ee3a5b --- /dev/null +++ b/src/GC/tests/extern_lib.cpp @@ -0,0 +1,91 @@ +#include +#include + +#include "heap.hpp" + +GC::Heap *singleton_test(); +void init_gc(GC::Heap *heap); +void frame_test(GC::Heap *heap); + +int main() { + std::cout << "in main" << std::endl; + auto heap = singleton_test(); + + init_gc(heap); + frame_test(heap); + + return 0; +} + +/** + * This test is supposed to determine if the singleton pattern + * implementation is working correctly. This test passes if the + * first and second call prints the same memory address. + * + * Result: pass + * + * @return Pointer to the Heap singleton instance +*/ +GC::Heap *singleton_test() { + std::cout << "TESTING SINGLETON INSTANCES" << std::endl; + std::cout << "===========================" << std::endl; + std::cout << "Call 1:\t" << GC::Heap::the() << std::endl; // First call which initializes the singleton instance + GC::Heap *heap = GC::Heap::the(); // Second call which should return the initialized instance + std::cout << "Call 2:\t" << heap << std::endl; + std::cout << "===========================" << std::endl; + return heap; +} + + +/** + * This test calls Heap::init() which saves the stack-frame + * address from the calling function (this function). + * Heap::init() is supposed to be called at the absolute + * start of the program to save the address of the + * topmost stack frame. This test doesn't do anything + * but prepares for the next test(s). + * + * @param heap The Heap pointer to the singleton instance. + * +*/ +void init_gc(GC::Heap *heap){ + std::cout << "\n\n INITIALIZING THE HEAP" << std::endl; + std::cout << "===========================" << std::endl; + heap->init(); + std::cout << "===========================" << std::endl; +} + +/** + * This function tests the functionality of the intrinsic + * function `__builtin_frame_address` which returns the + * address of the corresponding level of stack frame. + * When given a param of 0, it returns the current stack frame. + * When given a param of 1, it returns the previous stack + * frame, and so on. + * + * This test passes on two conditions: + * 1) if the address of the current frame is smaller than + * the address of the previous frame (assumed). + * 2) if the previous frame has the same address as the one + * saved in the Heap instance after running Heap::init(). + * + * Result: pass + * + * @param heap The Heap instance +*/ +void frame_test(GC::Heap *heap) { + std::cout << "\n\n TESTING FRAME ADDRESSES" << std::endl; + std::cout << "===========================" << std::endl; + +#pragma clang diagnostic ignored "-Wframe-address" // clang++ directive to ignore warnings about __b_f_a + auto curr_frame = reinterpret_cast(__builtin_frame_address(0)); // addr of curr stack frame + std::cout << "Current stack frame:\t" << curr_frame << std::endl; +#pragma clang diagnostic ignored "-Wframe-address" + auto prev_frame = reinterpret_cast(__builtin_frame_address(1)); // addr of prev stack frame + std::cout << "Previous stack frame:\t" << prev_frame << std::endl; + + heap->check_init(); // prints the saved absolute top of the stack + // auto alloced = heap->alloc(sizeof(unsigned long)); + + std::cout << "===========================" << std::endl; +} \ No newline at end of file diff --git a/src/GC/tests/h_test.cpp b/src/GC/tests/h_test.cpp new file mode 100644 index 0000000..ac55bf7 --- /dev/null +++ b/src/GC/tests/h_test.cpp @@ -0,0 +1,95 @@ +#include "../include/heap.hpp" + +GC::Heap *gc = GC::Heap::the(); + +struct Node { + int id; + Node *child; +}; + +Node *create_chain(int depth) { + std::vector nodes; + if (depth > 0) { + Node *last_node = static_cast(gc->alloc(sizeof(Node))); + last_node->id = depth; + last_node->child = nullptr; + nodes.push_back(last_node); + for (int i = 0; i < depth; i++) { + Node *node = static_cast(gc->alloc(sizeof(Node))); + node->id = depth-i; + node->child = nodes[i]; + nodes.push_back(node); + } + for (size_t i = 0; i < nodes.size(); i++) { + std::cout << "Element at " << i << ":\t" << nodes.at(i) << std::endl; + } + return nodes[depth]; + } + else + return 0; +} + +void create_array(size_t size) { + int *arr = static_cast(gc->alloc(sizeof(int) * size)); +} + +void detach_pointer(long **ptr) { + long *dummy_ptr = nullptr; + *ptr = dummy_ptr; +} + +Node *test_chain(int depth, bool detach) { + auto stack_start = reinterpret_cast(__builtin_frame_address(0)); + std::cout << "Stack start from test_chain:\t" << stack_start << std::endl; + + Node *node_chain = create_chain(depth); + // This generates a segmentation fault (should be investigated further) + if (detach) + node_chain->child = nullptr; + return node_chain; + +} + +void test_some_types() { + auto stack_start = reinterpret_cast(__builtin_frame_address(0)); + std::cout << "Stack start from test_some_types:\t" << stack_start << std::endl; + + long *l = static_cast(gc->alloc(sizeof(long))); + std::cout << "l points to:\t\t" << l << std::endl; + detach_pointer(&l); + std::cout << "l points to:\t\t" << l << std::endl; + + // Some more dummy values of different sizes, to test stack pointer alignment + int *i = static_cast(gc->alloc(sizeof(int))); + char *c = static_cast(gc->alloc(sizeof(int))); + short *s = static_cast(gc->alloc(sizeof(short))); +} + +int main() { + gc->init(); + gc->check_init(); + auto stack_start = reinterpret_cast(__builtin_frame_address(0)); + std::cout << "Stack start from main:\t" << stack_start << std::endl; + + // char *c = static_cast(gc->alloc(sizeof(char))); // 0x0 | 0x0 + // int *i = static_cast(gc->alloc(sizeof(int))); // 0x1-0x4 | 0x4-0x8 + // char *c2 = static_cast(gc->alloc(sizeof(char)));// 0x5 | 0x9-0x + // long *l = static_cast(gc->alloc(sizeof(long))); // 0x6-0xd | 0x + + // This is allocated outside of the scope of the GC (if gc->init() isn't called), thus garbage + /* long *longs[21]; + std::cout << "Pointer to ints:\t" << longs << std::endl; + for (int i = 0; i < 21; i++) { + longs[i] = static_cast(gc->alloc(sizeof(long))); + } */ + + //Node *root = static_cast(gc->alloc(sizeof(Node))); + Node *root = test_chain(100, true); + std::cout << "Adress of root:\t" << &root << std::endl; + std::cout << "Root points to:\t" << root << std::endl; + std::cout << "Root child:\t" << root->child << std::endl; + + gc->collect(MARK); + gc->print_contents(); + return 0; +} \ No newline at end of file diff --git a/src/GC/tests/linker.cpp b/src/GC/tests/linker.cpp new file mode 100644 index 0000000..f3b12f0 --- /dev/null +++ b/src/GC/tests/linker.cpp @@ -0,0 +1,30 @@ +#include + +#include "heap.hpp" + +struct Obj { + int a; + int b; + int c; +}; + +int main() { + auto heap = GC::Heap::the2(); + + std::cout << "heap:\t" << heap << std::endl; + + auto obj = static_cast(heap->alloc(sizeof(Obj))); + + std::cout << "obj: \t" << obj << std::endl; + + obj->a = 3; + obj->b = 4; + obj->c = 5; + + std::cout << obj->a << ", " << obj->b << ", " << obj->c << std::endl; + + heap->print_contents(); + //delete heap; + + return 0; +} \ No newline at end of file diff --git a/src/GC/tests/stack.cpp b/src/GC/tests/stack.cpp new file mode 100644 index 0000000..8f8382e --- /dev/null +++ b/src/GC/tests/stack.cpp @@ -0,0 +1,76 @@ +#include +#include +#include +#include + +/* + * Stack.cpp + * - Tests stack scanning and stack pointers + * + * Goal: Find the values of the following variables + * and their position on the stack + * - unsigned long a + * - unsigned long b + * - unsigned long global_1 + * - unsigned long global_2 + * + * Result: Passed +*/ + + + + +std::vector iv; + +void collect() { + std::cout << "in collect" << std::endl; + + uintptr_t *stack_start = reinterpret_cast(__builtin_frame_address(0)); + + // denna orsakar segfault om man ger __b_f_a ett värde större än 2 + // uintptr_t *stack_end = reinterpret_cast(__builtin_frame_address(100)); + + std::cout << "SP1:\t" << stack_start << "\nSP2:\t" << (stack_start - 1*sizeof(int)) << std::endl; + std::cout << "SP-:\t" << --stack_start << std::endl; + + const uintptr_t *stack_end = (stack_start + 30*sizeof(int)); + int vars_found = 0; + + while (stack_start < stack_end) { + + if (std::find(iv.begin(), iv.end(), stack_start) != iv.end()) { + vars_found++; + std::cout << "Found " << *(reinterpret_cast(stack_start)) << " at " << stack_start << std::endl; + } + + // std::cout << "SP address:\t\t" << stack_start << "\nSP value:\t\t" << *(reinterpret_cast(stack_start)) << std::endl; + + stack_start++; + } + + if (vars_found == 0) { + std::cout << "Found nothing" << std::endl; + } +} + +int add(unsigned long a, unsigned long b) { + iv.push_back(reinterpret_cast(&a)); + iv.push_back(reinterpret_cast(&b)); + std::cout << "'a':\t" << &a << "\n'b':\t" << &b << std::endl; + collect(); + return a + b; +} + +int main() { + + unsigned long global_1 = 16; + unsigned long global_2 = 32; + + iv.push_back(&global_1); + iv.push_back(&global_2); + + std::cout << "'g1':\t" << &global_1 << "\n'g2':\t" << &global_2 << std::endl; + + add(3,2); + return 0; +} \ No newline at end of file diff --git a/src/GC/tests/stack2.cpp b/src/GC/tests/stack2.cpp new file mode 100644 index 0000000..f1a78bc --- /dev/null +++ b/src/GC/tests/stack2.cpp @@ -0,0 +1,51 @@ +#include +#include + +void dummy1(); +void dummy2(); + +int main() { + + uintptr_t *prev1 = reinterpret_cast(__builtin_frame_address(0)); + uintptr_t *prev2 = static_cast(__builtin_frame_address(0)); + + std::cout << "reinterpret:\t" << prev1 << "\nstatic:\t\t" << prev2 << std::endl; + + std::cout << "Start:\t\t" << prev1 << std::endl; +#pragma clang diagnostic ignored "-Wframe-address" + uintptr_t *tmp = reinterpret_cast(__builtin_frame_address(1)); + std::cout << "Frame 1:\t" << tmp << "\t\tDiff:\t" << std::hex << "0x"<< tmp - prev1 << std::endl; + prev1 = tmp; + +#pragma clang diagnostic ignored "-Wframe-address" + tmp = reinterpret_cast(__builtin_frame_address(2)); + std::cout << "Frame 2:\t" << tmp << "\tDiff:\t" << std::hex << "0x" << tmp - prev1 << std::endl; + prev1 = tmp; + +// arg > 2 for __builtin_frame_address() results in segfault +// #pragma clang diagnostic ignored "-Wframe-address" +// tmp = reinterpret_cast(__builtin_frame_address(3)); +// std::cout << "Frame 3:\t" << tmp << "\tDiff:\t" << std::hex << "0x" << prev1 - tmp << std::endl; + + dummy1(); + + return 0; +} + +void dummy1() { + std::cout << "D1 SFrame:\t" << __builtin_frame_address(0); +#pragma clang diagnostic ignored "-Wframe-address" + std::cout << "\t\tPrev:\t" << __builtin_frame_address(1) << std::endl; + std::cout << "D1 RA:\t\t" << std::hex << __builtin_return_address(0) << std::endl; + dummy2(); +} + +void dummy2() { + std::cout << "Frame:\t\t" << __builtin_frame_address(0); +#pragma clang diagnostic ignored "-Wframe-address" + std::cout << "\t\tPrev:\t" << __builtin_frame_address(1) << std::endl; + void *ra = __builtin_return_address(0); + std::cout << "D2 RA:\t\t" << std::hex << ra << std::endl; + // gives same value as pure 'ra' + // std::cout << "D2 ERA:\t\t" << std::hex << __builtin_extract_return_addr(ra) << std::endl; +} \ No newline at end of file diff --git a/src/GC/todo.md b/src/GC/todo.md new file mode 100644 index 0000000..f9492da --- /dev/null +++ b/src/GC/todo.md @@ -0,0 +1,15 @@ +# Garbage collection + +## Project + +Goal for next week (24/2): +- Write more complex tests + +## GC TODO: +- Merge to main branch +- Double check m_heap_size functionality and when a collection is triggered +- Kolla vektor vs list complexity + +## Tests TODO +- Write complex datastructures for tests with larger programs + diff --git a/src/MarkSweep.cpp b/src/MarkSweep.cpp new file mode 100644 index 0000000..ab219d2 --- /dev/null +++ b/src/MarkSweep.cpp @@ -0,0 +1,87 @@ +#include +#include +#define HEAP_SIZE 65536 // Arbitrary for now, 2^16 +using namespace std; + +/* A simple mark and sweep algorithm */ + +// Shouldn't be exposed. For now, it is +struct ObjectHeader { + size_t size = sizeof(this); + bool marked = false; + +}; + +struct Object : ObjectHeader { + char name; // should be something like id, but for testing sake its char + Object* child; + // Object(char name_) {} + Object(char name_, Object* child_) { + name = name_; + child = child_; + } +}; + +// Representing the heap as a simple struct for now +struct Heap { + Object heap_space[HEAP_SIZE]; +}; + +// For now it assumes that it is given root objects from the start, no root finding included +class MarkSweep { + public: + void mark(Object* obj) { + if (!markedBit(obj)) { + markBit(obj); + Object* ref = obj->child; + if (ref != nullptr) { + mark(ref); + } + } + } + + void sweep(vector worklist) { + for (Object* obj: worklist) { + if (!markedBit(obj) && obj != nullptr) { + delete obj; + } + } + } + + private: + bool markedBit(Object* obj) { + return obj->marked; + } + + void markBit(Object* obj) { + obj->marked = true; + } + +}; + +int main() { + Object* b = new Object('B', nullptr); + // b->name = 'B'; + // b->child = nullptr; + Object* c = new Object('C', b); + // c->name = 'C'; + // c->child = b; // c -> d + Object* d = new Object('D', nullptr); + // d->name = 'D'; + // d->child = nullptr; + + //Heap* heap = new Heap{*c, *b, *d}; + vector worklist = {c, b, d}; + MarkSweep* gc = new MarkSweep(); + + gc->mark(c); + cout << "Expected 1, got: " << b->marked << '\n'; + cout << "Expected 1, got: " << c->marked << '\n'; + cout << "Expected 0, got: " << d->marked << '\n'; + + gc->sweep(worklist); + cout << b->name << '\n'; + cout << c->name << '\n'; + cout << d->name << '\n'; // The object at d is now deleted (freed) + return 0; +} \ No newline at end of file