Yoinked a newer GC.

This commit is contained in:
Samuel Hammersberg 2023-05-06 12:47:40 +02:00
parent a388f480e5
commit ab87f7fb15
6 changed files with 1015 additions and 196 deletions

View file

@ -4,11 +4,13 @@
#include <vector>
#include <unordered_map>
#include <chrono>
#include <queue>
#include <set>
#include "heap.hpp"
#define time_now std::chrono::high_resolution_clock::now()
#define to_us std::chrono::duration_cast<std::chrono::microseconds>
#define time_now std::chrono::high_resolution_clock::now()
#define to_us std::chrono::duration_cast<std::chrono::microseconds>
using std::cout, std::endl, std::vector, std::hex, std::dec, std::unordered_map;
@ -18,10 +20,10 @@ namespace GC
* This implementation of the() guarantees laziness
* on the instance and a correct destruction with
* the destructor.
*
*
* @returns The singleton object.
*/
Heap &Heap::the()
*/
Heap& Heap::the()
{
static Heap instance;
return instance;
@ -42,7 +44,7 @@ namespace GC
#pragma clang diagnostic ignored "-Wframe-address"
heap.m_stack_top = static_cast<uintptr_t *>(__builtin_frame_address(1));
// TODO: handle this below
// heap.m_heap_top = heap.m_heap;
//heap.m_heap_top = heap.m_heap;
}
void Heap::set_profiler_log_options(RecordOption flags)
@ -77,7 +79,7 @@ namespace GC
// Singleton
Heap &heap = Heap::the();
bool profiler_enabled = heap.profiler_enabled();
if (profiler_enabled)
Profiler::record(AllocStart, size);
@ -91,13 +93,14 @@ namespace GC
{
// auto a_ms = to_us(c_start - a_start);
// Profiler::record(AllocStart, a_ms);
heap.collect();
auto stack_bottom = reinterpret_cast<uintptr_t *>(__builtin_frame_address(0));
heap.collect(stack_bottom);
// If memory is not enough after collect, crash with OOM error
if (heap.m_size > HEAP_SIZE)
{
throw std::runtime_error(std::string("Error: Heap out of memory"));
}
// throw std::runtime_error(std::string("Error: Heap out of memory"));
//throw std::runtime_error(std::string("Error: Heap out of memory"));
}
if (heap.m_size + size > HEAP_SIZE)
{
@ -124,7 +127,7 @@ namespace GC
heap.m_size += size;
// TODO: handle this below
// heap.m_total_size += size;
//heap.m_total_size += size;
heap.m_allocated_chunks.push_back(new_chunk);
if (profiler_enabled)
@ -158,11 +161,11 @@ namespace GC
// Check if there are any freed chunks large enough for current request
for (size_t i = 0; i < heap.m_freed_chunks.size(); i++)
{
// auto chunk = Heap::get_at(heap.m_freed_chunks, i);
//auto chunk = Heap::get_at(heap.m_freed_chunks, i);
auto chunk = heap.m_freed_chunks[i];
auto iter = heap.m_freed_chunks.begin();
i++;
// advance(iter, i);
//advance(iter, i);
if (chunk->m_size > size)
{
// Split the chunk, use one part and add the remaining part to
@ -191,12 +194,11 @@ namespace GC
/**
* Returns a bool whether the profiler is enabled
* or not.
*
*
* @returns True or false if the profiler is enabled
* or disabled respectively.
*/
bool Heap::profiler_enabled()
{
*/
bool Heap::profiler_enabled() {
Heap &heap = Heap::the();
return heap.m_profiler_enable;
}
@ -208,7 +210,7 @@ namespace GC
* function is private so that the user cannot trigger
* a collection unneccessarily.
*/
void Heap::collect()
void Heap::collect(uintptr_t *stack_bottom)
{
auto c_start = time_now;
@ -218,29 +220,53 @@ namespace GC
Profiler::record(CollectStart);
// get current stack frame
auto stack_bottom = reinterpret_cast<uintptr_t *>(__builtin_frame_address(0));
stack_bottom = reinterpret_cast<uintptr_t *>(__builtin_frame_address(0));
if (heap.m_stack_top == nullptr)
throw std::runtime_error(std::string("Error: Heap is not initialized, read the docs!"));
uintptr_t *stack_top = heap.m_stack_top;
// uintptr_t *stack_top = heap.m_stack_top;
// auto work_list = heap.m_allocated_chunks;
// mark(stack_bottom, stack_top, work_list);
//auto work_list = heap.m_allocated_chunks;
//mark(stack_bottom, stack_top, work_list);
// Testing mark_hash, previous woking implementation above
create_table();
mark_hash(stack_bottom, stack_top);
// create_table();
// mark_hash(stack_bottom, stack_top);
vector<uintptr_t *> roots;
// cout << "\nb4 find_roots\n";
find_roots(stack_bottom, roots);
// cout << "b4 mark\n";''
mark(roots);
// cout << "b4 sweep\n";
sweep(heap);
// cout << "b4 free\n";
free(heap);
auto c_end = time_now;
Profiler::record(CollectStart, to_us(c_end - c_start));
}
void Heap::find_roots(uintptr_t *stack_bottom, vector<uintptr_t *> &roots)
{
auto heap_bottom = reinterpret_cast<const uintptr_t>(m_heap);
auto heap_top = reinterpret_cast<const uintptr_t>(m_heap + HEAP_SIZE);
while (stack_bottom < m_stack_top)
{
if (heap_bottom < *stack_bottom && *stack_bottom < heap_top)
{
roots.push_back(stack_bottom);
}
stack_bottom++;
}
}
/**
* Iterates through the stack, if an element on the stack points to a chunk,
* called a root chunk, that chunk is marked (i.e. reachable).
@ -248,8 +274,8 @@ namespace GC
* the root chunk and mark those chunks.
* If a chunk is marked it is removed from the worklist, since it's no longer of
* concern for this method.
*
* Time complexity: 0(N^2 * log(N)) as upper bound.
*
* Time complexity: 0(N^2 * log(N)) as upper bound.
* Where N is either the size of the worklist or the size of
* the stack frame, depending on which is the largest.
*
@ -257,133 +283,71 @@ namespace GC
* @param end Pointer to the end of the stack frame.
* @param worklist The currently allocated chunks, which haven't been marked.
*/
void Heap::mark(uintptr_t *start, const uintptr_t *const end, vector<Chunk *> &worklist)
void Heap::mark(vector<uintptr_t *> &roots)
{
// cout << "\nWorklist size: " << worklist.size() << "\n";
Heap &heap = Heap::the();
bool profiler_enabled = heap.m_profiler_enable;
if (profiler_enabled)
bool prof_enabled = m_profiler_enable;
if (prof_enabled)
Profiler::record(MarkStart);
vector<AddrRange *> rangeWL;
auto iter = roots.begin(), end = roots.end();
std::queue<std::pair<uintptr_t, uintptr_t>> chunk_spaces;
// To find adresses thats in the worklist
for (; start <= end; start++)
while (iter != end)
{
auto it = worklist.begin();
auto stop = worklist.end();
while (it != stop)
{
Chunk *chunk = *it;
auto c_start = reinterpret_cast<uintptr_t>(chunk->m_start);
auto c_size = reinterpret_cast<uintptr_t>(chunk->m_size);
auto c_end = reinterpret_cast<uintptr_t>(c_start + c_size);
// Check if the stack pointer points to something within the chunk
if (c_start <= *start && *start < c_end)
{
if (!chunk->m_marked)
{
if (profiler_enabled)
Profiler::record(ChunkMarked, chunk);
chunk->m_marked = true;
it = worklist.erase(it);
/* Chunk *next = find_pointer((uintptr_t *) c_start, (uintptr_t *) c_end, worklist);
while (next != NULL) {
if (!next->m_marked)
{
next->m_marked = true;
auto c_start = reinterpret_cast<uintptr_t>(next->m_start);
auto c_size = reinterpret_cast<uintptr_t>(next->m_size);
auto c_end = reinterpret_cast<uintptr_t>(c_start + c_size);
next = find_pointer((uintptr_t *) c_start, (uintptr_t *) c_end, worklist);
}
} */
// Recursively call mark, to see if the reachable chunk further points to another chunk
// mark((uintptr_t *)c_start, (uintptr_t *)c_end, worklist);
// AddrRange *range = new AddrRange((uintptr_t *)c_start, (uintptr_t *)c_end);
rangeWL.push_back(new AddrRange((uintptr_t *)c_start, (uintptr_t *)c_end));
}
else
{
++it;
}
}
else
{
++it;
}
}
find_chunks(*iter++, chunk_spaces);
}
mark_range(rangeWL, worklist);
rangeWL.clear();
}
void Heap::mark_range(vector<AddrRange *> &ranges, vector<Chunk *> &worklist)
{
Heap &heap = Heap::the();
bool profiler_enabled = heap.m_profiler_enable;
if (profiler_enabled)
Profiler::record(MarkStart);
auto iter = ranges.begin();
auto stop = ranges.end();
while (iter != stop)
while (!chunk_spaces.empty())
{
auto range = *iter++;
uintptr_t *start = (uintptr_t *)range->start;
const uintptr_t *end = range->end;
if (start == nullptr)
cout << "\nstart is null\n";
for (; start <= end; start++)
{
auto wliter = worklist.begin();
auto wlstop = worklist.end();
while (wliter != wlstop)
{
Chunk *chunk = *wliter;
auto c_start = reinterpret_cast<uintptr_t>(chunk->m_start);
auto c_size = reinterpret_cast<uintptr_t>(chunk->m_size);
auto c_end = reinterpret_cast<uintptr_t>(c_start + c_size);
auto range = chunk_spaces.front();
chunk_spaces.pop();
if (c_start <= *start && *start < c_end)
{
if (!chunk->m_marked)
{
chunk->m_marked = true;
wliter = worklist.erase(wliter);
ranges.push_back(new AddrRange((uintptr_t *)c_start, (uintptr_t *)c_end));
stop = ranges.end();
}
else
{
wliter++;
}
}
else
{
wliter++;
}
}
auto addr_bottom = reinterpret_cast<uintptr_t *>(range.first);
auto addr_top = reinterpret_cast<uintptr_t *>(range.second);
while (addr_bottom < addr_top)
{
find_chunks(addr_bottom, chunk_spaces);
addr_bottom++;
}
}
}
void Heap::create_table()
void Heap::find_chunks(uintptr_t *stack_addr, std::queue<std::pair<uintptr_t, uintptr_t>> &chunk_spaces)
{
auto iter = m_allocated_chunks.begin();
auto end = m_allocated_chunks.end();
while (iter != end)
{
auto chunk = *iter++;
if (chunk->m_marked)
continue;
auto c_start = reinterpret_cast<uintptr_t>(chunk->m_start);
auto c_size = reinterpret_cast<uintptr_t>(chunk->m_size);
auto c_end = reinterpret_cast<uintptr_t>(c_start + c_size);
if (c_start < *stack_addr && *stack_addr < c_end)
{
chunk->m_marked = true;
chunk_spaces.push(std::make_pair(c_start, c_end));
}
}
}
void Heap::create_table()
{
Heap &heap = Heap::the();
unordered_map<uintptr_t, Chunk *> chunk_table;
for (auto chunk : heap.m_allocated_chunks)
{
unordered_map<uintptr_t, Chunk*> chunk_table;
for (auto chunk : heap.m_allocated_chunks) {
auto pair = std::make_pair(reinterpret_cast<uintptr_t>(chunk->m_start), chunk);
heap.m_chunk_table.insert(pair);
heap.m_chunk_table.insert(pair);
}
}
void Heap::mark_hash(uintptr_t *start, const uintptr_t *const end)
void Heap::mark_hash(uintptr_t *start, const uintptr_t* const end)
{
Heap &heap = Heap::the();
@ -391,7 +355,7 @@ namespace GC
if (profiler_enabled)
Profiler::record(MarkStart);
for (; start <= end; start++)
for (; start <= end; start++)
{
auto search = heap.m_chunk_table.find(*start);
if (search != heap.m_chunk_table.end())
@ -399,19 +363,19 @@ namespace GC
Chunk *chunk = search->second;
auto c_start = reinterpret_cast<uintptr_t>(chunk->m_start);
auto c_size = reinterpret_cast<uintptr_t>(chunk->m_size);
auto c_end = reinterpret_cast<uintptr_t *>(c_start + c_size);
if (!chunk->m_marked)
auto c_end = reinterpret_cast<uintptr_t*>(c_start + c_size);
if (!chunk->m_marked)
{
chunk->m_marked = true;
if (profiler_enabled)
Profiler::record(ChunkMarked, chunk);
// mark_hash(chunk->m_start, c_end);
Chunk *next = find_pointer_hash((uintptr_t *)c_start, (uintptr_t *)c_end);
while (next != NULL)
//mark_hash(chunk->m_start, c_end);
Chunk *next = find_pointer_hash((uintptr_t *) c_start, (uintptr_t *) c_end);
while (next != NULL)
{
if (!next->m_marked)
if (!next->m_marked)
{
next->m_marked = true;
@ -419,9 +383,9 @@ namespace GC
Profiler::record(ChunkMarked, chunk);
auto c_start = reinterpret_cast<uintptr_t>(next->m_start);
auto c_size = reinterpret_cast<uintptr_t>(next->m_size);
auto c_end = reinterpret_cast<uintptr_t>(c_start + c_size);
next = find_pointer_hash((uintptr_t *)c_start, (uintptr_t *)c_end);
auto c_size = reinterpret_cast<uintptr_t>(next->m_size);
auto c_end = reinterpret_cast<uintptr_t>(c_start + c_size);
next = find_pointer_hash((uintptr_t *) c_start, (uintptr_t *) c_end);
}
}
}
@ -433,9 +397,9 @@ namespace GC
* Sweeps the heap, unmarks the marked chunks for the next cycle,
* adds the unmarked nodes to the list of freed chunks; to be freed.
*
* Time complexity: O(N^2), where N is the number of allocated chunks.
* It is quadratic, in the worst case,
* since each call to erase() is linear.
* Time complexity: O(N^2), where N is the number of allocated chunks.
* It is quadratic, in the worst case,
* since each call to erase() is linear.
*
* @param heap Pointer to the heap singleton instance.
*/
@ -446,7 +410,7 @@ namespace GC
Profiler::record(SweepStart);
auto iter = heap.m_allocated_chunks.begin();
// std::cout << "Chunks alloced: " << heap.m_allocated_chunks.size() << std::endl;
// This cannot "iter != stop", results in seg fault, since the end gets updated, I think.
// This cannot "iter != stop", results in seg fault, since the end gets updated, I think.
while (iter != heap.m_allocated_chunks.end())
{
Chunk *chunk = *iter;
@ -465,7 +429,7 @@ namespace GC
Profiler::record(ChunkSwept, chunk);
heap.m_freed_chunks.push_back(chunk);
iter = heap.m_allocated_chunks.erase(iter);
// heap.m_size -= chunk->m_size;
heap.m_size -= chunk->m_size;
// cout << "Decremented total heap size with: " << chunk->m_size << endl;
// cout << "Total size is: " << heap.m_size << endl;
}
@ -478,7 +442,7 @@ namespace GC
* by the sweep phase. If there are more than a certain
* amount of free chunks, delete the free chunks to
* avoid cluttering.
*
*
* Time complexity: O(N^2), where N is the freed chunks.
* If free_overlap() is called, it runs in O(N^2),
* otherwise O(N).
@ -500,7 +464,7 @@ namespace GC
heap.m_freed_chunks.pop_back();
if (profiler_enabled)
Profiler::record(ChunkFreed, chunk);
heap.m_size -= chunk->m_size;
// heap.m_size -= chunk->m_size;
// cout << "Decremented total heap size with: " << chunk->m_size << endl;
// cout << "Total size is: " << heap.m_size << endl;
delete chunk;
@ -519,7 +483,7 @@ namespace GC
* Checks for overlaps between freed chunks of memory
* and removes overlapping chunks while prioritizing
* the chunks at lower addresses.
*
*
* Time complexity: O(N^2), where N is the number of freed chunks.
* At each iteration get_at() is called, which is linear.
*
@ -534,7 +498,7 @@ namespace GC
{
std::vector<Chunk *> filtered;
size_t i = 0;
// auto prev = Heap::get_at(heap.m_freed_chunks, i++);
//auto prev = Heap::get_at(heap.m_freed_chunks, i++);
auto prev = heap.m_freed_chunks[i++];
prev->m_marked = true;
filtered.push_back(prev);
@ -542,7 +506,7 @@ namespace GC
for (; i < heap.m_freed_chunks.size(); i++)
{
prev = filtered.back();
// auto next = Heap::get_at(heap.m_freed_chunks, i);
//auto next = Heap::get_at(heap.m_freed_chunks, i);
auto next = heap.m_freed_chunks[i];
auto p_start = (uintptr_t)(prev->m_start);
auto p_size = (uintptr_t)(prev->m_size);
@ -554,7 +518,7 @@ namespace GC
}
}
heap.m_freed_chunks.swap(filtered);
bool profiler_enabled = heap.m_profiler_enable;
// After swap m_freed_chunks contains still available chunks
// and filtered contains all the chunks, so delete unused chunks
@ -566,8 +530,8 @@ namespace GC
if (profiler_enabled)
Profiler::record(ChunkFreed, chunk);
heap.m_size -= chunk->m_size;
// cout << "Decremented total heap size with: " << chunk->m_size << endl;
// cout << "Total size is: " << heap.m_size << endl;
cout << "Decremented total heap size with: " << chunk->m_size << endl;
cout << "Total size is: " << heap.m_size << endl;
delete chunk;
}
else
@ -583,23 +547,21 @@ namespace GC
heap.m_profiler_enable = mode;
}
Chunk *find_pointer(uintptr_t *start, const uintptr_t *const end, vector<Chunk *> &worklist)
{
for (; start <= end; start++)
{
Chunk* find_pointer(uintptr_t *start, const uintptr_t* const end, vector<Chunk *> &worklist) {
for (; start <= end; start++) {
auto it = worklist.begin();
auto stop = worklist.end();
while (it != stop)
{
Chunk *chunk = *it;
auto c_start = reinterpret_cast<uintptr_t>(chunk->m_start);
auto c_size = reinterpret_cast<uintptr_t>(chunk->m_size);
auto c_end = reinterpret_cast<uintptr_t>(c_start + c_size);
auto c_size = reinterpret_cast<uintptr_t>(chunk->m_size);
auto c_end = reinterpret_cast<uintptr_t>(c_start + c_size);
// Check if the stack pointer points to something within the chunk
if (c_start <= *start && *start < c_end)
{
return chunk;
return chunk;
}
return NULL;
}
@ -607,14 +569,11 @@ namespace GC
}
// Checks if a given chunk points to another chunk and returns it
Chunk *Heap::find_pointer_hash(uintptr_t *start, const uintptr_t *const end)
{
Chunk* Heap::find_pointer_hash(uintptr_t *start, const uintptr_t* const end) {
Heap &heap = Heap::the();
for (; start <= end; start++)
{
for (; start <= end; start++) {
auto search = heap.m_chunk_table.find(*start);
if (search != heap.m_chunk_table.end())
{
if (search != heap.m_chunk_table.end()) {
return search->second;
}
return NULL;
@ -737,8 +696,7 @@ namespace GC
}
else
{
cout << "NO ALLOCATIONS\n"
<< endl;
cout << "NO ALLOCATIONS\n" << endl;
}
if (heap.m_freed_chunks.size())
{
@ -761,8 +719,7 @@ namespace GC
}
else
{
cout << "NO ALLOCATIONS\n"
<< endl;
cout << "NO ALLOCATIONS\n" << endl;
}
if (heap.m_freed_chunks.size())
{
@ -774,12 +731,9 @@ namespace GC
}
}
void Heap::print_allocated_chunks(Heap *heap)
{
cout << "--- Allocated Chunks ---\n"
<< endl;
for (auto chunk : heap->m_allocated_chunks)
{
void Heap::print_allocated_chunks(Heap *heap) {
cout << "--- Allocated Chunks ---\n" << endl;
for (auto chunk : heap->m_allocated_chunks) {
print_line(chunk);
}
}
@ -790,9 +744,9 @@ namespace GC
// Check if there are any freed chunks large enough for current request
for (size_t i = 0; i < heap.m_freed_chunks.size(); i++)
{
auto chunk = heap.m_freed_chunks[i]; // Heap::get_at(heap.m_freed_chunks, i);
auto chunk = heap.m_freed_chunks[i]; //Heap::get_at(heap.m_freed_chunks, i);
auto iter = heap.m_freed_chunks.begin();
// advance(iter, i);
//advance(iter, i);
i++;
if (chunk->m_size > size)
{
@ -823,14 +777,14 @@ namespace GC
{
std::vector<Chunk *> filtered;
size_t i = 0;
auto prev = heap.m_freed_chunks[i++]; // Heap::get_at(heap.m_freed_chunks, i++);
auto prev = heap.m_freed_chunks[i++]; //Heap::get_at(heap.m_freed_chunks, i++);
prev->m_marked = true;
filtered.push_back(prev);
cout << filtered.back()->m_start << endl;
for (; i < heap.m_freed_chunks.size(); i++)
{
prev = filtered.back();
auto next = heap.m_freed_chunks[i]; // Heap::get_at(heap.m_freed_chunks, i);
auto next = heap.m_freed_chunks[i]; //Heap::get_at(heap.m_freed_chunks, i);
auto p_start = (uintptr_t)(prev->m_start);
auto p_size = (uintptr_t)(prev->m_size);
auto n_start = (uintptr_t)(next->m_start);
@ -841,7 +795,7 @@ namespace GC
}
}
heap.m_freed_chunks.swap(filtered);
bool profiler_enabled = heap.m_profiler_enable;
// After swap m_freed_chunks contains still available chunks
// and filtered contains all the chunks, so delete unused chunks