Documented the library

This commit is contained in:
Victor Olin 2023-02-23 13:00:15 +01:00
parent bdca6ffc85
commit 7b068d6e88
5 changed files with 257 additions and 155 deletions

View file

@ -23,6 +23,7 @@ namespace GC {
private: private:
//Private constructor according to the singleton pattern
Heap() { Heap() {
m_heap = reinterpret_cast<char *>(malloc(HEAP_SIZE)); m_heap = reinterpret_cast<char *>(malloc(HEAP_SIZE));
m_size = 0; m_size = 0;
@ -50,8 +51,8 @@ namespace GC {
public: public:
static Heap *the() { static inline Heap *the() { // TODO: make private
if (m_instance) if (m_instance) // if m_instance is not a nullptr
return m_instance; return m_instance;
m_instance = new Heap(); m_instance = new Heap();
return m_instance; return m_instance;
@ -62,12 +63,19 @@ namespace GC {
std::free((char *)m_heap); std::free((char *)m_heap);
} }
void *alloc(size_t size); /**
void init(); * These are the only two functions which are exposed
* as the API for LLVM. At the absolute start of the
* program the developer has to call init() to ensure
* that the address of the topmost stack frame is
* saved as the limit for scanning the stack in collect.
*/
void *alloc(size_t size); // TODO: make static
void init(); // TODO: make static
// DEBUG ONLY // DEBUG ONLY
void check_init(); void collect(uint flags); // conditional collection
void collect(uint flags); void check_init(); // print dummy things
void print_contents(); void print_contents(); // print dummy things
}; };
} }

View file

@ -16,16 +16,8 @@ namespace GC {
* Initialises the heap singleton and saves the address * Initialises the heap singleton and saves the address
* of the calling stack frame as the stack_end. Presumeably * of the calling stack frame as the stack_end. Presumeably
* this address points to the stack frame of the compiled * this address points to the stack frame of the compiled
* LLVM executable after linking. (NOT CONFIRMED) * LLVM executable after linking.
*/ */
void Heap::check_init() {
auto heap = Heap::the();
cout << "Heap addr:\t" << heap << endl;
cout << "GC m_stack_end:\t" << heap->m_stack_end << endl;
auto stack_start = reinterpret_cast<uintptr_t *>(__builtin_frame_address(0));
cout << "GC stack_start:\t" << stack_start << endl;
}
void Heap::init() { void Heap::init() {
Heap *heap = Heap::the(); Heap *heap = Heap::the();
heap->m_stack_end = reinterpret_cast<uintptr_t *>(__builtin_frame_address(1)); heap->m_stack_end = reinterpret_cast<uintptr_t *>(__builtin_frame_address(1));
@ -76,6 +68,24 @@ namespace GC {
return new_chunk->start; return new_chunk->start;
} }
/**
* Tries to recycle used and freed chunks that are
* already allocated objects by the OS but freed
* from our Heap. This reduces the amount of GC
* objects slightly which saves time from malloc'ing
* memory from the OS.
*
* @param heap Pointer to the singleton Heap instance
*
* @param size Amount of bytes needed for the object
* which is about to be allocated.
*
* @returns If a chunk is found and recycled, a
* pointer to the allocated memory for
* the object is returned. If not, a
* nullptr is returned to signify no
* chunks were found.
*/
uintptr_t *Heap::try_recycle_chunks(Heap *heap, size_t size) { uintptr_t *Heap::try_recycle_chunks(Heap *heap, size_t size) {
// Check if there are any freed chunks large enough for current request // Check if there are any freed chunks large enough for current request
for (size_t i = 0; i < heap->m_freed_chunks.size(); i++) { for (size_t i = 0; i < heap->m_freed_chunks.size(); i++) {
@ -107,12 +117,23 @@ namespace GC {
return nullptr; return nullptr;
} }
/**
* Collection phase of the garbage collector. When
* an allocation is requested and there is no space
* left on the heap, a collection is triggered. This
* function is private so that the user cannot trigger
* a collection unneccessarily.
*
* @param heap Heap singleton instance, only for avoiding
* redundant calls to the singleton get
*/
void Heap::collect(Heap *heap) { void Heap::collect(Heap *heap) {
// Get the adress of the current stack frame
uintptr_t *stack_end;
// get current stack
auto stack_start = reinterpret_cast<uintptr_t *>(__builtin_frame_address(0)); auto stack_start = reinterpret_cast<uintptr_t *>(__builtin_frame_address(0));
// fix this block, it's nästy
uintptr_t *stack_end;
if (heap->m_stack_end != nullptr) if (heap->m_stack_end != nullptr)
stack_end = heap->m_stack_end; stack_end = heap->m_stack_end;
else else
@ -126,6 +147,87 @@ namespace GC {
free(heap); free(heap);
} }
/**
* Iterates through the stack, if an element on the stack points to a chunk
* that chunk is marked (i.e. reachable). It only marks element which are directly
* reachable from the chunk, so no chain of pointers from the stack are detected.
* If a chunk is marked it is removed from the worklist, since it's no longer of
* concern for this method.
*
* @param start Pointer to the start of the stack frame.
* @param end Pointer to the end of the stack frame.
* @param worklist The currently allocated chunks.
*/
void Heap::mark(uintptr_t *start, const uintptr_t *end, vector<Chunk*> worklist) {
int counter = 0;
// To find adresses thats in the worklist
for (; start < end; start++) {
counter++;
// all pointers must be aligned as double words
for (auto it = worklist.begin(); it != worklist.end();) {
Chunk *chunk = *it;
auto c_start = reinterpret_cast<uintptr_t>(chunk->start);
auto c_size = reinterpret_cast<uintptr_t>(chunk->size);
auto c_end = reinterpret_cast<uintptr_t>(c_start + c_size);
cout << "Start points to:\t" << hex << *start << endl;
cout << "Chunk start:\t\t" << hex << c_start << endl;
cout << "Chunk end:\t\t" << hex << c_end << "\n" << endl;
// Check if the stack pointer aligns with the chunk
if (c_start <= *start && *start < c_end) {
if (!chunk->marked) {
chunk->marked = true;
it = worklist.erase(it);
}
else {
++it;
}
}
else {
++it;
}
}
}
cout << "Counter: " << counter << endl;
}
/**
* Sweeps the heap, unmarks the marked chunks for the next cycle,
* adds the unmarked nodes to the vector of freed chunks; to be freed.
*
* @param heap Pointer to the heap to oporate on.
*/
void Heap::sweep(Heap *heap) {
for (auto it = heap->m_allocated_chunks.begin(); it != heap->m_allocated_chunks.end();) {
Chunk *chunk = *it;
// Unmark the marked chunks for the next iteration.
if (chunk->marked) {
chunk->marked = false;
++it;
}
else {
// Add the unmarked chunks to freed chunks and remove from
// the list of allocated chunks
heap->m_freed_chunks.push_back(chunk);
it = heap->m_allocated_chunks.erase(it);
}
}
}
/**
* Frees chunks that was moved to the list m_freed_chunks
* by the sweep phase. If there are more than a certain
* amount of free chunks, delete the free chunks to
* avoid cluttering.
*
* @param heap Heap singleton instance, only for avoiding
* redundant calls to the singleton get
*/
void Heap::free(Heap *heap) { void Heap::free(Heap *heap) {
if (heap->m_freed_chunks.size() > FREE_THRESH) { if (heap->m_freed_chunks.size() > FREE_THRESH) {
while (heap->m_freed_chunks.size()) { while (heap->m_freed_chunks.size()) {
@ -134,15 +236,25 @@ namespace GC {
delete chunk; delete chunk;
} }
} }
// if there are chunks but not more than FREE_THRESH
else if (heap->m_freed_chunks.size()) { else if (heap->m_freed_chunks.size()) {
// essentially, always check for overlap between
// chunks before finishing the allocation
free_overlap(heap); free_overlap(heap);
} }
// No freed chunks, nothing to free
else {
return;
}
} }
/**
* Checks for overlaps between freed chunks of memory
* and removes overlapping chunks while prioritizing
* the chunks at lower addresses.
*
* @param heap Heap singleton instance, only for avoiding
* redundant calls to the singleton get
*
* @note Maybe this should be changed to prioritizing
* larger chunks.
*/
void Heap::free_overlap(Heap *heap) { void Heap::free_overlap(Heap *heap) {
std::vector<Chunk *> filtered; std::vector<Chunk *> filtered;
size_t i = 0; size_t i = 0;
@ -161,6 +273,25 @@ namespace GC {
heap->m_freed_chunks.swap(filtered); heap->m_freed_chunks.swap(filtered);
} }
// ----- ONLY DEBUGGING -----------------------------------------------------------------------
/**
* Prints the result of Heap::init() and a dummy value
* for the current stack frame for reference.
*/
void Heap::check_init() {
auto heap = Heap::the();
cout << "Heap addr:\t" << heap << endl;
cout << "GC m_stack_end:\t" << heap->m_stack_end << endl;
auto stack_start = reinterpret_cast<uintptr_t *>(__builtin_frame_address(0));
cout << "GC stack_start:\t" << stack_start << endl;
}
/**
* Conditional collection, only to be used in debugging
*
* @param flags Bitmap of flags
*/
void Heap::collect(uint flags) { void Heap::collect(uint flags) {
cout << "DEBUG COLLECT\nFLAGS: "; cout << "DEBUG COLLECT\nFLAGS: ";
@ -200,68 +331,6 @@ namespace GC {
} }
} }
/**
* Sweeps the heap, unmarks the marked chunks for the next cycle,
* adds the unmarked nodes to the vector of freed chunks; to be freed.
*
* @param *heap The heap to oporate on.
*/
void Heap::sweep(Heap *heap) {
for (auto it = heap->m_allocated_chunks.begin(); it != heap->m_allocated_chunks.end();) {
Chunk *chunk = *it;
// Unmark the marked chunks for the next iteration.
if (chunk->marked) {
chunk->marked = false;
++it;
}
else {
// Add the unmarked chunks to freed chunks and remove from
// the list of allocated chunks
heap->m_freed_chunks.push_back(chunk);
it = heap->m_allocated_chunks.erase(it);
}
}
}
// This assumes that there are no chains of pointers, will be fixed later on
void Heap::mark(uintptr_t *start, const uintptr_t *end, vector<Chunk*> worklist) {
int counter = 0;
// To find adresses thats in the worklist
for (; start < end; start++) {
counter++;
// all pointers must be aligned as double words
for (auto it = worklist.begin(); it != worklist.end();) {
Chunk *chunk = *it;
if (chunk == nullptr) {
assert(false && "EPIC FAIL");
}
uintptr_t c_start = reinterpret_cast<uintptr_t>(chunk->start);
uintptr_t c_end = reinterpret_cast<uintptr_t>(chunk->start + chunk->size);
// Check if the stack pointer aligns with the chunk
if (c_start <= *start && *start < c_end) {
//if (c_start == *start) {
cout << "Start points to:\t" << hex << *start << endl;
cout << "Chunk start:\t\t" << hex << c_start << endl;
cout << "Chunk end:\t\t" << hex << c_end << "\n" << endl;
if (!chunk->marked) {
chunk->marked = true;
it = worklist.erase(it);
}
else {
++it;
}
}
else {
++it;
}
}
}
cout << "Counter: " << counter << endl;
}
// Mark child references from the root references // Mark child references from the root references
void mark_test(vector<Chunk *> worklist) { void mark_test(vector<Chunk *> worklist) {
while (worklist.size() > 0) { while (worklist.size() > 0) {
@ -305,7 +374,7 @@ namespace GC {
void Heap::print_contents() { void Heap::print_contents() {
auto heap = Heap::the(); auto heap = Heap::the();
if (heap->m_allocated_chunks.size()) { if (heap->m_allocated_chunks.size()) {
cout << "\nALLOCATED CHUNKS #" << heap->m_allocated_chunks.size() << endl; cout << "\nALLOCATED CHUNKS #" << dec << heap->m_allocated_chunks.size() << endl;
for (auto chunk : heap->m_allocated_chunks) { for (auto chunk : heap->m_allocated_chunks) {
print_line(chunk); print_line(chunk);
} }
@ -313,7 +382,7 @@ namespace GC {
cout << "NO ALLOCATIONS\n" << endl; cout << "NO ALLOCATIONS\n" << endl;
} }
if (heap->m_freed_chunks.size()) { if (heap->m_freed_chunks.size()) {
cout << "\nFREED CHUNKS #" << heap->m_freed_chunks.size() << endl; cout << "\nFREED CHUNKS #" << dec << heap->m_freed_chunks.size() << endl;
for (auto fchunk : heap->m_freed_chunks) { for (auto fchunk : heap->m_freed_chunks) {
print_line(fchunk); print_line(fchunk);
} }

View file

@ -7,40 +7,6 @@ GC::Heap *singleton_test();
void init_gc(GC::Heap *heap); void init_gc(GC::Heap *heap);
void frame_test(GC::Heap *heap); void frame_test(GC::Heap *heap);
GC::Heap *singleton_test() {
std::cout << "TESTING SINGLETON INSTANCES" << std::endl;
std::cout << "===========================" << std::endl;
std::cout << "Call 1:\t" << GC::Heap::the() << std::endl;
GC::Heap *heap = GC::Heap::the();
std::cout << "Call 2:\t" << heap << std::endl;
std::cout << "===========================" << std::endl;
return heap;
}
void init_gc(GC::Heap *heap){
std::cout << "\n\n INITIALIZING THE HEAP" << std::endl;
std::cout << "===========================" << std::endl;
heap->init();
std::cout << "===========================" << std::endl;
}
void frame_test(GC::Heap *heap) {
std::cout << "\n\n TESTING FRAME ADDRESSES" << std::endl;
std::cout << "===========================" << std::endl;
#pragma clang diagnostic ignored "-Wframe-address"
auto curr_frame = reinterpret_cast<uintptr_t *>(__builtin_frame_address(0));
std::cout << "Current stack frame:\t" << curr_frame << std::endl;
#pragma clang diagnostic ignored "-Wframe-address"
auto prev_frame = reinterpret_cast<uintptr_t *>(__builtin_frame_address(1));
std::cout << "Previous stack frame:\t" << prev_frame << std::endl;
heap->check_init();
// auto alloced = heap->alloc(sizeof(unsigned long));
std::cout << "===========================" << std::endl;
}
int main() { int main() {
std::cout << "in main" << std::endl; std::cout << "in main" << std::endl;
auto heap = singleton_test(); auto heap = singleton_test();
@ -50,3 +16,76 @@ int main() {
return 0; return 0;
} }
/**
* This test is supposed to determine if the singleton pattern
* implementation is working correctly. This test passes if the
* first and second call prints the same memory address.
*
* Result: pass
*
* @return Pointer to the Heap singleton instance
*/
GC::Heap *singleton_test() {
std::cout << "TESTING SINGLETON INSTANCES" << std::endl;
std::cout << "===========================" << std::endl;
std::cout << "Call 1:\t" << GC::Heap::the() << std::endl; // First call which initializes the singleton instance
GC::Heap *heap = GC::Heap::the(); // Second call which should return the initialized instance
std::cout << "Call 2:\t" << heap << std::endl;
std::cout << "===========================" << std::endl;
return heap;
}
/**
* This test calls Heap::init() which saves the stack-frame
* address from the calling function (this function).
* Heap::init() is supposed to be called at the absolute
* start of the program to save the address of the
* topmost stack frame. This test doesn't do anything
* but prepares for the next test(s).
*
* @param heap The Heap pointer to the singleton instance.
*
*/
void init_gc(GC::Heap *heap){
std::cout << "\n\n INITIALIZING THE HEAP" << std::endl;
std::cout << "===========================" << std::endl;
heap->init();
std::cout << "===========================" << std::endl;
}
/**
* This function tests the functionality of the intrinsic
* function `__builtin_frame_address` which returns the
* address of the corresponding level of stack frame.
* When given a param of 0, it returns the current stack frame.
* When given a param of 1, it returns the previous stack
* frame, and so on.
*
* This test passes on two conditions:
* 1) if the address of the current frame is smaller than
* the address of the previous frame (assumed).
* 2) if the previous frame has the same address as the one
* saved in the Heap instance after running Heap::init().
*
* Result: pass
*
* @param heap The Heap instance
*/
void frame_test(GC::Heap *heap) {
std::cout << "\n\n TESTING FRAME ADDRESSES" << std::endl;
std::cout << "===========================" << std::endl;
#pragma clang diagnostic ignored "-Wframe-address" // clang++ directive to ignore warnings about __b_f_a
auto curr_frame = reinterpret_cast<uintptr_t *>(__builtin_frame_address(0)); // addr of curr stack frame
std::cout << "Current stack frame:\t" << curr_frame << std::endl;
#pragma clang diagnostic ignored "-Wframe-address"
auto prev_frame = reinterpret_cast<uintptr_t *>(__builtin_frame_address(1)); // addr of prev stack frame
std::cout << "Previous stack frame:\t" << prev_frame << std::endl;
heap->check_init(); // prints the saved absolute top of the stack
// auto alloced = heap->alloc(sizeof(unsigned long));
std::cout << "===========================" << std::endl;
}

View file

@ -35,7 +35,7 @@ void detach_pointer(long **ptr) {
*ptr = dummy_ptr; *ptr = dummy_ptr;
} }
void test_chain(int depth, bool detach) { Node *test_chain(int depth, bool detach) {
auto stack_start = reinterpret_cast<uintptr_t *>(__builtin_frame_address(0)); auto stack_start = reinterpret_cast<uintptr_t *>(__builtin_frame_address(0));
std::cout << "Stack start from test_chain:\t" << stack_start << std::endl; std::cout << "Stack start from test_chain:\t" << stack_start << std::endl;
@ -43,6 +43,7 @@ void test_chain(int depth, bool detach) {
// This generates a segmentation fault (should be investigated further) // This generates a segmentation fault (should be investigated further)
if (detach) if (detach)
node_chain->child = nullptr; node_chain->child = nullptr;
return node_chain;
} }
@ -63,8 +64,8 @@ void test_some_types() {
} }
int main() { int main() {
//gc->init(); gc->init();
//gc->check_init(); gc->check_init();
auto stack_start = reinterpret_cast<uintptr_t *>(__builtin_frame_address(0)); auto stack_start = reinterpret_cast<uintptr_t *>(__builtin_frame_address(0));
std::cout << "Stack start from main:\t" << stack_start << std::endl; std::cout << "Stack start from main:\t" << stack_start << std::endl;
@ -74,13 +75,20 @@ int main() {
// long *l = static_cast<long *>(gc->alloc(sizeof(long))); // 0x6-0xd | 0x // long *l = static_cast<long *>(gc->alloc(sizeof(long))); // 0x6-0xd | 0x
// This is allocated outside of the scope of the GC (if gc->init() isn't called), thus garbage // This is allocated outside of the scope of the GC (if gc->init() isn't called), thus garbage
long *longs[21]; /* long *longs[21];
std::cout << "Pointer to ints:\t" << longs << std::endl; std::cout << "Pointer to ints:\t" << longs << std::endl;
for (int i = 0; i < 21; i++) { for (int i = 0; i < 21; i++) {
longs[i] = static_cast<long *>(gc->alloc(sizeof(long))); longs[i] = static_cast<long *>(gc->alloc(sizeof(long)));
} } */
//Node *root;
Node *root = test_chain(3, false);
std::cout << "Adress of root:\t" << &root << std::endl;
std::cout << "Root points to:\t" << root << std::endl;
// 0x7ffdd7556bd8
int *i = static_cast<int *>(gc->alloc(sizeof(int)));
std::cout << "Adress of i:\t" << &i << std::endl;
gc->collect(MARK | SWEEP | FREE); // free misses some chunks gc->collect(MARK); // free misses some chunks
gc->print_contents(); gc->print_contents();
return 0; return 0;
} }

View file

@ -1,5 +1,7 @@
# Garbage collection # Garbage collection
## Project ## Project
Goal for next week (24/2): Goal for next week (24/2):
@ -7,34 +9,10 @@ Goal for next week (24/2):
## GC TODO: ## GC TODO:
- Merge to main branch - Merge to main branch
- Fix singleton references - Switch std::vector to std::list
- Get a good grasp of how the adressing of stack frames actually works. - Make alloc and init static, move the() to private
- Debug "free()", sometimes it skips chunks that should be freed. - stack_end, stack_start -> stack_top, stack_bottom
- Check alignment of chunks. - Double check m_heap_size functionality and when a collection is triggered
- Think about how we want to determine if some object is a pointer or not, probably will have to discuss that with Samuel. Since it is not ideal to determine in the GC if an object is a pointer or not. It should preferably be done in a previous stage.
## Tests TODO ## Tests TODO
### Library linking
**Victor fixes this**
Compile the GC lib and a test separately, link them together
and evalutate the following:
__builtin_frame_address(0)
__builtin_frame_address(1)
__builtin_return_address(0)
__builtin_return_address(1)
### GC Init and __b_f_a
1. Save the first stack fram globally as the stack start
2. For each call to collect, save the prev stack frame as the stack end
3. Scan through the span
gc_init()
global stack_end = __builtin_frame_address(1)
collect()
local stack_start = __builtin_frame_address(1)
sweep()
for all addr in range(stack_end, stack_start)
mark if chunk