Skip to content

Reduce some codegen allocations #857

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Aug 22, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 6 additions & 9 deletions src/asm_writing/icinfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -183,14 +183,14 @@ ICSlotInfo* ICInfo::pickEntryForRewrite(const char* debug_name) {
}

ICInfo::ICInfo(void* start_addr, void* slowpath_rtn_addr, void* continue_addr, StackInfo stack_info, int num_slots,
int slot_size, llvm::CallingConv::ID calling_conv, const std::unordered_set<int>& live_outs,
int slot_size, llvm::CallingConv::ID calling_conv, LiveOutSet _live_outs,
assembler::GenericRegister return_register, TypeRecorder* type_recorder)
: next_slot_to_try(0),
stack_info(stack_info),
num_slots(num_slots),
slot_size(slot_size),
calling_conv(calling_conv),
live_outs(live_outs.begin(), live_outs.end()),
live_outs(std::move(_live_outs)),
return_register(return_register),
type_recorder(type_recorder),
retry_in(0),
Expand All @@ -200,15 +200,14 @@ ICInfo::ICInfo(void* start_addr, void* slowpath_rtn_addr, void* continue_addr, S
slowpath_rtn_addr(slowpath_rtn_addr),
continue_addr(continue_addr) {
for (int i = 0; i < num_slots; i++) {
slots.push_back(ICSlotInfo(this, i));
slots.emplace_back(this, i);
}
}

static llvm::DenseMap<void*, ICInfo*> ics_by_return_addr;
std::unique_ptr<ICInfo> registerCompiledPatchpoint(uint8_t* start_addr, uint8_t* slowpath_start_addr,
uint8_t* continue_addr, uint8_t* slowpath_rtn_addr,
const ICSetupInfo* ic, StackInfo stack_info,
std::unordered_set<int> live_outs) {
const ICSetupInfo* ic, StackInfo stack_info, LiveOutSet live_outs) {
assert(slowpath_start_addr - start_addr >= ic->num_slots * ic->slot_size);
assert(slowpath_rtn_addr > slowpath_start_addr);
assert(slowpath_rtn_addr <= start_addr + ic->totalSize());
Expand All @@ -221,9 +220,7 @@ std::unique_ptr<ICInfo> registerCompiledPatchpoint(uint8_t* start_addr, uint8_t*
static const int DWARF_RAX = 0;
// It's possible that the return value doesn't get used, in which case
// we can avoid copying back into RAX at the end
if (live_outs.count(DWARF_RAX)) {
live_outs.erase(DWARF_RAX);
}
live_outs.clear(DWARF_RAX);

// TODO we only need to do this if 0 was in live_outs, since if it wasn't, that indicates
// the return value won't be used and we can optimize based on that.
Expand All @@ -247,7 +244,7 @@ std::unique_ptr<ICInfo> registerCompiledPatchpoint(uint8_t* start_addr, uint8_t*
}

ICInfo* icinfo = new ICInfo(start_addr, slowpath_rtn_addr, continue_addr, stack_info, ic->num_slots, ic->slot_size,
ic->getCallingConvention(), live_outs, return_register, ic->type_recorder);
ic->getCallingConvention(), std::move(live_outs), return_register, ic->type_recorder);

ics_by_return_addr[slowpath_rtn_addr] = icinfo;

Expand Down
12 changes: 7 additions & 5 deletions src/asm_writing/icinfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@

#include "asm_writing/assembler.h"
#include "asm_writing/types.h"
#include "core/util.h"

namespace pyston {

Expand Down Expand Up @@ -91,6 +92,8 @@ class ICSlotRewrite {
friend class ICInfo;
};

typedef BitSet<16> LiveOutSet;

class ICInfo {
private:
std::vector<ICSlotInfo> slots;
Expand All @@ -105,7 +108,7 @@ class ICInfo {
const int num_slots;
const int slot_size;
const llvm::CallingConv::ID calling_conv;
const std::vector<int> live_outs;
LiveOutSet live_outs;
const assembler::GenericRegister return_register;
TypeRecorder* const type_recorder;
int retry_in, retry_backoff;
Expand All @@ -116,14 +119,14 @@ class ICInfo {

public:
ICInfo(void* start_addr, void* slowpath_rtn_addr, void* continue_addr, StackInfo stack_info, int num_slots,
int slot_size, llvm::CallingConv::ID calling_conv, const std::unordered_set<int>& live_outs,
int slot_size, llvm::CallingConv::ID calling_conv, LiveOutSet live_outs,
assembler::GenericRegister return_register, TypeRecorder* type_recorder);
void* const start_addr, *const slowpath_rtn_addr, *const continue_addr;

int getSlotSize() { return slot_size; }
int getNumSlots() { return num_slots; }
llvm::CallingConv::ID getCallingConvention() { return calling_conv; }
const std::vector<int>& getLiveOuts() { return live_outs; }
const LiveOutSet& getLiveOuts() { return live_outs; }

std::unique_ptr<ICSlotRewrite> startRewrite(const char* debug_name);
void clear(ICSlotInfo* entry);
Expand All @@ -138,8 +141,7 @@ class ICSetupInfo;
struct CompiledFunction;
std::unique_ptr<ICInfo> registerCompiledPatchpoint(uint8_t* start_addr, uint8_t* slowpath_start_addr,
uint8_t* continue_addr, uint8_t* slowpath_rtn_addr,
const ICSetupInfo*, StackInfo stack_info,
std::unordered_set<int> live_outs);
const ICSetupInfo*, StackInfo stack_info, LiveOutSet live_outs);
void deregisterCompiledPatchpoint(ICInfo* ic);

ICInfo* getICInfo(void* rtn_addr);
Expand Down
20 changes: 11 additions & 9 deletions src/asm_writing/rewriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1786,7 +1786,7 @@ TypeRecorder* Rewriter::getTypeRecorder() {
return rewrite->getTypeRecorder();
}

Rewriter::Rewriter(std::unique_ptr<ICSlotRewrite> rewrite, int num_args, const std::vector<int>& live_outs)
Rewriter::Rewriter(std::unique_ptr<ICSlotRewrite> rewrite, int num_args, const LiveOutSet& live_outs)
: rewrite(std::move(rewrite)),
assembler(this->rewrite->getAssembler()),
picked_slot(NULL),
Expand Down Expand Up @@ -1837,6 +1837,11 @@ Rewriter::Rewriter(std::unique_ptr<ICSlotRewrite> rewrite, int num_args, const s
var->locations.push_back(l);
}

// Make sure there weren't duplicates in the live_outs list.
// Probably not a big deal if there were, but we shouldn't be generating those.
assert(std::find(this->live_out_regs.begin(), this->live_out_regs.end(), dwarf_regnum)
== this->live_out_regs.end());

this->live_outs.push_back(var);
this->live_out_regs.push_back(dwarf_regnum);
}
Expand Down Expand Up @@ -2034,25 +2039,23 @@ void setSlowpathFunc(uint8_t* pp_addr, void* func) {
}

PatchpointInitializationInfo initializePatchpoint3(void* slowpath_func, uint8_t* start_addr, uint8_t* end_addr,
int scratch_offset, int scratch_size,
const std::unordered_set<int>& live_outs, SpillMap& remapped) {
int scratch_offset, int scratch_size, LiveOutSet live_outs,
SpillMap& remapped) {
assert(start_addr < end_addr);

int est_slowpath_size = INITIAL_CALL_SIZE;

std::vector<assembler::GenericRegister> regs_to_spill;
std::vector<assembler::Register> regs_to_reload;

std::unordered_set<int> live_outs_for_slot;

for (int dwarf_regnum : live_outs) {
assembler::GenericRegister ru = assembler::GenericRegister::fromDwarf(dwarf_regnum);

assert(!(ru.type == assembler::GenericRegister::GP && ru.gp == assembler::R11) && "We assume R11 is free!");

if (ru.type == assembler::GenericRegister::GP) {
if (ru.gp == assembler::RSP || ru.gp.isCalleeSave()) {
live_outs_for_slot.insert(dwarf_regnum);
live_outs.set(dwarf_regnum);
continue;
}
}
Expand All @@ -2068,7 +2071,7 @@ PatchpointInitializationInfo initializePatchpoint3(void* slowpath_func, uint8_t*
continue;
}

live_outs_for_slot.insert(dwarf_regnum);
live_outs.set(dwarf_regnum);

regs_to_spill.push_back(ru);

Expand Down Expand Up @@ -2125,7 +2128,6 @@ PatchpointInitializationInfo initializePatchpoint3(void* slowpath_func, uint8_t*
assem.fillWithNops();
assert(!assem.hasFailed());

return PatchpointInitializationInfo(slowpath_start, slowpath_rtn_addr, continue_addr,
std::move(live_outs_for_slot));
return PatchpointInitializationInfo(slowpath_start, slowpath_rtn_addr, continue_addr, std::move(live_outs));
}
}
10 changes: 5 additions & 5 deletions src/asm_writing/rewriter.h
Original file line number Diff line number Diff line change
Expand Up @@ -355,7 +355,7 @@ class Rewriter : public ICSlotRewrite::CommitHook {
llvm::SmallVector<RewriterVar*, 8> args;
llvm::SmallVector<RewriterVar*, 8> live_outs;

Rewriter(std::unique_ptr<ICSlotRewrite> rewrite, int num_args, const std::vector<int>& live_outs);
Rewriter(std::unique_ptr<ICSlotRewrite> rewrite, int num_args, const LiveOutSet& live_outs);

llvm::SmallVector<RewriterAction, 32> actions;
void addAction(std::function<void()> action, llvm::ArrayRef<RewriterVar*> vars, ActionType type) {
Expand Down Expand Up @@ -555,19 +555,19 @@ struct PatchpointInitializationInfo {
uint8_t* slowpath_start;
uint8_t* slowpath_rtn_addr;
uint8_t* continue_addr;
std::unordered_set<int> live_outs;
LiveOutSet live_outs;

PatchpointInitializationInfo(uint8_t* slowpath_start, uint8_t* slowpath_rtn_addr, uint8_t* continue_addr,
std::unordered_set<int>&& live_outs)
LiveOutSet live_outs)
: slowpath_start(slowpath_start),
slowpath_rtn_addr(slowpath_rtn_addr),
continue_addr(continue_addr),
live_outs(std::move(live_outs)) {}
};

PatchpointInitializationInfo initializePatchpoint3(void* slowpath_func, uint8_t* start_addr, uint8_t* end_addr,
int scratch_offset, int scratch_size,
const std::unordered_set<int>& live_outs, SpillMap& remapped);
int scratch_offset, int scratch_size, LiveOutSet live_outs,
SpillMap& remapped);

template <> inline RewriterVar* RewriterVar::getAttrCast<bool, bool>(int offset, Location loc) {
return getAttr(offset, loc, assembler::MovType::ZBL);
Expand Down
10 changes: 5 additions & 5 deletions src/codegen/baseline_jit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ std::unique_ptr<JitFragmentWriter> JitCodeBlock::newFragment(CFGBlock* block, in

int scratch_offset = num_stack_args * 8;
StackInfo stack_info(scratch_size, scratch_offset);
std::unordered_set<int> live_outs;
LiveOutSet live_outs;

void* fragment_start = a.curInstPointer() - patch_jump_offset;
long fragment_offset = a.bytesWritten() - patch_jump_offset;
Expand Down Expand Up @@ -612,13 +612,13 @@ int JitFragmentWriter::finishCompilation() {
uint8_t* end_addr = pp_info.end_addr;
PatchpointInitializationInfo initialization_info
= initializePatchpoint3(pp_info.func_addr, start_addr, end_addr, 0 /* scratch_offset */,
0 /* scratch_size */, std::unordered_set<int>(), _spill_map);
0 /* scratch_size */, LiveOutSet(), _spill_map);
uint8_t* slowpath_start = initialization_info.slowpath_start;
uint8_t* slowpath_rtn_addr = initialization_info.slowpath_rtn_addr;

std::unique_ptr<ICInfo> pp = registerCompiledPatchpoint(
start_addr, slowpath_start, initialization_info.continue_addr, slowpath_rtn_addr, pp_info.ic.get(),
pp_info.stack_info, std::unordered_set<int>());
std::unique_ptr<ICInfo> pp
= registerCompiledPatchpoint(start_addr, slowpath_start, initialization_info.continue_addr,
slowpath_rtn_addr, pp_info.ic.get(), pp_info.stack_info, LiveOutSet());
pp.release();
}

Expand Down
9 changes: 5 additions & 4 deletions src/codegen/irgen/irgenerator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ ScopeInfo* IRGenState::getScopeInfoForNode(AST* node) {
class IREmitterImpl : public IREmitter {
private:
IRGenState* irstate;
IRBuilder* builder;
std::unique_ptr<IRBuilder> builder;
llvm::BasicBlock*& curblock;
IRGenerator* irgenerator;

Expand Down Expand Up @@ -348,6 +348,9 @@ class IREmitterImpl : public IREmitter {
public:
explicit IREmitterImpl(IRGenState* irstate, llvm::BasicBlock*& curblock, IRGenerator* irgenerator)
: irstate(irstate), builder(new IRBuilder(g.context)), curblock(curblock), irgenerator(irgenerator) {
// Perf note: it seems to be more efficient to separately allocate the "builder" member,
// even though we could allocate it in-line; maybe it's infrequently used enough that it's better
// to not have it take up cache space.

RELEASE_ASSERT(irstate->getSourceInfo()->scoping->areGlobalsFromModule(),
"jit doesn't support custom globals yet");
Expand All @@ -356,7 +359,7 @@ class IREmitterImpl : public IREmitter {
builder->SetInsertPoint(curblock);
}

IRBuilder* getBuilder() override { return builder; }
IRBuilder* getBuilder() override { return &*builder; }

GCBuilder* getGC() override { return irstate->getGC(); }

Expand Down Expand Up @@ -557,8 +560,6 @@ class IRGeneratorImpl : public IRGenerator {
types(types),
state(RUNNING) {}

~IRGeneratorImpl() { delete emitter.getBuilder(); }

private:
OpInfo getOpInfoForNode(AST* ast, const UnwindInfo& unw_info) {
assert(ast);
Expand Down
34 changes: 16 additions & 18 deletions src/codegen/patchpoints.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@

namespace pyston {

void PatchpointInfo::addFrameVar(const std::string& name, CompilerType* type) {
void PatchpointInfo::addFrameVar(llvm::StringRef name, CompilerType* type) {
frame_vars.push_back(FrameVarInfo({.name = name, .type = type }));
}

Expand Down Expand Up @@ -91,7 +91,7 @@ void PatchpointInfo::parseLocationMap(StackMap::Record* r, LocationMap* map) {
int num_args = frame_var.type->numFrameArgs();

llvm::SmallVector<StackMap::Record::Location, 1> locations;
locations.append(&r->locations[cur_arg], &r->locations[cur_arg + num_args]);
locations.append(r->locations.data() + cur_arg, r->locations.data() + cur_arg + num_args);

// printf("%s %d %d\n", frame_var.name.c_str(), r->locations[cur_arg].type, r->locations[cur_arg].regnum);

Expand All @@ -117,14 +117,14 @@ static int extractScratchOffset(PatchpointInfo* pp, StackMap::Record* r) {
return l.offset;
}

static std::unordered_set<int> extractLiveOuts(StackMap::Record* r, llvm::CallingConv::ID cc) {
std::unordered_set<int> live_outs;
static LiveOutSet extractLiveOuts(StackMap::Record* r, llvm::CallingConv::ID cc) {
LiveOutSet live_outs;

// Using the C calling convention, there shouldn't be any non-callee-save registers in here,
// but LLVM is conservative and will add some. So with the C cc, ignored the specified live outs
if (cc != llvm::CallingConv::C) {
for (const auto& live_out : r->live_outs) {
live_outs.insert(live_out.regnum);
live_outs.set(live_out.regnum);
}
}

Expand All @@ -133,11 +133,11 @@ static std::unordered_set<int> extractLiveOuts(StackMap::Record* r, llvm::Callin
// sense to track them as live_outs.
// Unfortunately this means we need to be conservative about it unless
// we can change llvm's behavior.
live_outs.insert(3);
live_outs.insert(12);
live_outs.insert(13);
live_outs.insert(14);
live_outs.insert(15);
live_outs.set(3);
live_outs.set(12);
live_outs.set(13);
live_outs.set(14);
live_outs.set(15);

return live_outs;
}
Expand All @@ -151,7 +151,7 @@ void processStackmap(CompiledFunction* cf, StackMap* stackmap) {
cf->location_map->constants = stackmap->constants;

for (int i = 0; i < nrecords; i++) {
StackMap::Record* r = stackmap->records[i];
StackMap::Record* r = &stackmap->records[i];

assert(stackmap->stack_size_records.size() == 1);
const StackMap::StackSizeRecord& stack_size_record = stackmap->stack_size_records[0];
Expand Down Expand Up @@ -212,12 +212,12 @@ void processStackmap(CompiledFunction* cf, StackMap* stackmap) {
if (ic == NULL) {
// We have to be using the C calling convention here, so we don't need to check the live outs
// or save them across the call.
initializePatchpoint3(slowpath_func, start_addr, end_addr, scratch_rbp_offset, scratch_size,
std::unordered_set<int>(), frame_remapped);
initializePatchpoint3(slowpath_func, start_addr, end_addr, scratch_rbp_offset, scratch_size, LiveOutSet(),
frame_remapped);
continue;
}

std::unordered_set<int> live_outs(extractLiveOuts(r, ic->getCallingConvention()));
LiveOutSet live_outs(extractLiveOuts(r, ic->getCallingConvention()));

if (ic->hasReturnValue()) {
assert(ic->getCallingConvention() == llvm::CallingConv::C
Expand All @@ -226,14 +226,12 @@ void processStackmap(CompiledFunction* cf, StackMap* stackmap) {
static const int DWARF_RAX = 0;
// It's possible that the return value doesn't get used, in which case
// we can avoid copying back into RAX at the end
if (live_outs.count(DWARF_RAX)) {
live_outs.erase(DWARF_RAX);
}
live_outs.clear(DWARF_RAX);
}


auto initialization_info = initializePatchpoint3(slowpath_func, start_addr, end_addr, scratch_rbp_offset,
scratch_size, live_outs, frame_remapped);
scratch_size, std::move(live_outs), frame_remapped);

ASSERT(initialization_info.slowpath_start - start_addr >= ic->num_slots * ic->slot_size,
"Used more slowpath space than expected; change ICSetupInfo::totalSize()?");
Expand Down
4 changes: 2 additions & 2 deletions src/codegen/patchpoints.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ void processStackmap(CompiledFunction* cf, StackMap* stackmap);
struct PatchpointInfo {
public:
struct FrameVarInfo {
std::string name;
llvm::StringRef name;
CompilerType* type;
};

Expand Down Expand Up @@ -74,7 +74,7 @@ struct PatchpointInfo {
int scratchStackmapArg() { return 0; }
int scratchSize() { return 80 + MAX_FRAME_SPILLS * sizeof(void*); }

void addFrameVar(const std::string& name, CompilerType* type);
void addFrameVar(llvm::StringRef name, CompilerType* type);
void setNumFrameArgs(int num_frame_args) {
assert(num_frame_stackmap_args == -1);
num_frame_stackmap_args = num_frame_args;
Expand Down
Loading