Skip to content

Commit 896bc69

Browse files
committed
Reduce allocations in stackmap parsing
Maybe a better overall approach is to not convert it out of its original flat format; we keep that original memory around anyway and I don't think it's that much faster to scan than our parsed version. But for now, optimize the current approach: - convert vector->SmallVector - call ensure since we usually know how many elements the vectors will have - use a StringMap instead of an unordered_map<std::string>
1 parent 08fe381 commit 896bc69

File tree

5 files changed

+36
-30
lines changed

5 files changed

+36
-30
lines changed

src/codegen/patchpoints.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929

3030
namespace pyston {
3131

32-
void PatchpointInfo::addFrameVar(const std::string& name, CompilerType* type) {
32+
void PatchpointInfo::addFrameVar(llvm::StringRef name, CompilerType* type) {
3333
frame_vars.push_back(FrameVarInfo({.name = name, .type = type }));
3434
}
3535

@@ -91,7 +91,7 @@ void PatchpointInfo::parseLocationMap(StackMap::Record* r, LocationMap* map) {
9191
int num_args = frame_var.type->numFrameArgs();
9292

9393
llvm::SmallVector<StackMap::Record::Location, 1> locations;
94-
locations.append(&r->locations[cur_arg], &r->locations[cur_arg + num_args]);
94+
locations.append(r->locations.data() + cur_arg, r->locations.data() + cur_arg + num_args);
9595

9696
// printf("%s %d %d\n", frame_var.name.c_str(), r->locations[cur_arg].type, r->locations[cur_arg].regnum);
9797

@@ -151,7 +151,7 @@ void processStackmap(CompiledFunction* cf, StackMap* stackmap) {
151151
cf->location_map->constants = stackmap->constants;
152152

153153
for (int i = 0; i < nrecords; i++) {
154-
StackMap::Record* r = stackmap->records[i];
154+
StackMap::Record* r = &stackmap->records[i];
155155

156156
assert(stackmap->stack_size_records.size() == 1);
157157
const StackMap::StackSizeRecord& stack_size_record = stackmap->stack_size_records[0];

src/codegen/patchpoints.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ void processStackmap(CompiledFunction* cf, StackMap* stackmap);
4242
struct PatchpointInfo {
4343
public:
4444
struct FrameVarInfo {
45-
std::string name;
45+
llvm::StringRef name;
4646
CompilerType* type;
4747
};
4848

@@ -74,7 +74,7 @@ struct PatchpointInfo {
7474
int scratchStackmapArg() { return 0; }
7575
int scratchSize() { return 80 + MAX_FRAME_SPILLS * sizeof(void*); }
7676

77-
void addFrameVar(const std::string& name, CompilerType* type);
77+
void addFrameVar(llvm::StringRef name, CompilerType* type);
7878
void setNumFrameArgs(int num_frame_args) {
7979
assert(num_frame_stackmap_args == -1);
8080
num_frame_stackmap_args = num_frame_args;

src/codegen/stackmaps.cpp

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ StackMap* parseStackMap() {
7070

7171
if (VERBOSITY() >= 3)
7272
printf("%d functions\n", nfunctions);
73+
cur_map->stack_size_records.reserve(nfunctions);
7374
for (int i = 0; i < nfunctions; i++) {
7475
const StackMap::StackSizeRecord& size_record = *ptr.size_record++;
7576
cur_map->stack_size_records.push_back(size_record);
@@ -79,6 +80,7 @@ StackMap* parseStackMap() {
7980

8081
if (VERBOSITY() >= 3)
8182
printf("%d constants\n", nconstants);
83+
cur_map->constants.reserve(nconstants);
8284

8385
for (int i = 0; i < nconstants; i++) {
8486
uint64_t constant = *ptr.u64++;
@@ -89,16 +91,18 @@ StackMap* parseStackMap() {
8991

9092
if (VERBOSITY() >= 3)
9193
printf("%d records\n", nrecords);
94+
cur_map->records.reserve(nrecords);
9295

9396
for (int i = 0; i < nrecords; i++) {
94-
StackMap::Record* record = new StackMap::Record();
95-
cur_map->records.push_back(record);
97+
cur_map->records.emplace_back();
98+
StackMap::Record* record = &cur_map->records.back();
9699

97100
record->id = *ptr.u64++;
98101
record->offset = *ptr.u32++;
99102
record->flags = *ptr.u16++; // reserved (record flags)
100103

101104
int numlocations = *ptr.u16++;
105+
record->locations.reserve(numlocations);
102106

103107
if (VERBOSITY() >= 3)
104108
printf("Stackmap record %ld at 0x%x has %d locations:\n", record->id, record->offset, numlocations);
@@ -125,6 +129,7 @@ StackMap* parseStackMap() {
125129

126130
ptr.u16++; // padding
127131
int num_live_outs = *ptr.u16++;
132+
record->live_outs.reserve(num_live_outs);
128133
for (int i = 0; i < num_live_outs; i++) {
129134
const StackMap::Record::LiveOut& r = *ptr.record_liveout++;
130135
record->live_outs.push_back(r);

src/codegen/stackmaps.h

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include <vector>
2121

2222
#include "llvm/ADT/SmallVector.h"
23+
#include "llvm/ADT/StringMap.h"
2324

2425
namespace llvm {
2526
class JITEventListener;
@@ -61,20 +62,20 @@ struct StackMap {
6162
uint64_t id;
6263
uint32_t offset;
6364
uint16_t flags;
64-
std::vector<Location> locations;
65-
std::vector<LiveOut> live_outs;
65+
llvm::SmallVector<Location, 8> locations;
66+
llvm::SmallVector<LiveOut, 8> live_outs;
6667
};
6768

68-
std::vector<StackSizeRecord> stack_size_records;
69+
llvm::SmallVector<StackSizeRecord, 1> stack_size_records;
6970
uint32_t header;
70-
std::vector<uint64_t> constants;
71-
std::vector<Record*> records;
71+
llvm::SmallVector<uint64_t, 8> constants;
72+
std::vector<Record> records;
7273
};
7374

7475
// TODO this belongs somewhere else?
7576
class LocationMap {
7677
public:
77-
std::vector<uint64_t> constants;
78+
llvm::SmallVector<uint64_t, 8> constants;
7879

7980
StackMap::Record::Location frame_info_location;
8081
bool frameInfoFound() { return frame_info_location.type != 0; }
@@ -88,10 +89,10 @@ class LocationMap {
8889
CompilerType* type;
8990
llvm::SmallVector<StackMap::Record::Location, 1> locations;
9091
};
91-
std::vector<LocationEntry> locations;
92+
llvm::SmallVector<LocationEntry, 2> locations;
9293
};
9394

94-
std::unordered_map<std::string, LocationTable> names;
95+
llvm::StringMap<LocationTable> names;
9596
};
9697

9798
StackMap* parseStackMap();

src/codegen/unwinding.cpp

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -866,7 +866,7 @@ DeoptState getDeoptState() {
866866
std::unordered_set<std::string> is_undefined;
867867

868868
for (const auto& p : cf->location_map->names) {
869-
if (!startswith(p.first, "!is_defined_"))
869+
if (!startswith(p.first(), "!is_defined_"))
870870
continue;
871871

872872
for (const LocationMap::LocationTable::LocationEntry& e : p.second.locations) {
@@ -876,35 +876,35 @@ DeoptState getDeoptState() {
876876
assert(locs.size() == 1);
877877
uint64_t v = frame_iter->readLocation(locs[0]);
878878
if ((v & 1) == 0)
879-
is_undefined.insert(p.first.substr(12));
879+
is_undefined.insert(p.first().substr(12));
880880

881881
break;
882882
}
883883
}
884884
}
885885

886886
for (const auto& p : cf->location_map->names) {
887-
if (p.first[0] == '!')
887+
if (p.first()[0] == '!')
888888
continue;
889889

890-
if (is_undefined.count(p.first))
890+
if (is_undefined.count(p.first()))
891891
continue;
892892

893893
for (const LocationMap::LocationTable::LocationEntry& e : p.second.locations) {
894894
if (e.offset < offset && offset <= e.offset + e.length) {
895895
const auto& locs = e.locations;
896896

897897
llvm::SmallVector<uint64_t, 1> vals;
898-
// printf("%s: %s\n", p.first.c_str(), e.type->debugName().c_str());
898+
// printf("%s: %s\n", p.first().c_str(), e.type->debugName().c_str());
899899

900900
for (auto& loc : locs) {
901901
vals.push_back(frame_iter->readLocation(loc));
902902
}
903903

904904
Box* v = e.type->deserializeFromFrame(vals);
905-
// printf("%s: (pp id %ld) %p\n", p.first.c_str(), e._debug_pp_id, v);
905+
// printf("%s: (pp id %ld) %p\n", p.first().c_str(), e._debug_pp_id, v);
906906
ASSERT(gc::isValidGCObject(v), "%p", v);
907-
d->d[boxString(p.first)] = v;
907+
d->d[boxString(p.first())] = v;
908908
}
909909
}
910910
}
@@ -961,7 +961,7 @@ Box* PythonFrameIterator::fastLocalsToBoxedLocals() {
961961
std::unordered_set<std::string> is_undefined;
962962

963963
for (const auto& p : cf->location_map->names) {
964-
if (!startswith(p.first, "!is_defined_"))
964+
if (!startswith(p.first(), "!is_defined_"))
965965
continue;
966966

967967
for (const LocationMap::LocationTable::LocationEntry& e : p.second.locations) {
@@ -971,29 +971,29 @@ Box* PythonFrameIterator::fastLocalsToBoxedLocals() {
971971
assert(locs.size() == 1);
972972
uint64_t v = impl->readLocation(locs[0]);
973973
if ((v & 1) == 0)
974-
is_undefined.insert(p.first.substr(12));
974+
is_undefined.insert(p.first().substr(12));
975975

976976
break;
977977
}
978978
}
979979
}
980980

981981
for (const auto& p : cf->location_map->names) {
982-
if (p.first[0] == '!')
982+
if (p.first()[0] == '!')
983983
continue;
984984

985-
if (p.first[0] == '#')
985+
if (p.first()[0] == '#')
986986
continue;
987987

988-
if (is_undefined.count(p.first))
988+
if (is_undefined.count(p.first()))
989989
continue;
990990

991991
for (const LocationMap::LocationTable::LocationEntry& e : p.second.locations) {
992992
if (e.offset < offset && offset <= e.offset + e.length) {
993993
const auto& locs = e.locations;
994994

995995
llvm::SmallVector<uint64_t, 1> vals;
996-
// printf("%s: %s\n", p.first.c_str(), e.type->debugName().c_str());
996+
// printf("%s: %s\n", p.first().c_str(), e.type->debugName().c_str());
997997
// printf("%ld locs\n", locs.size());
998998

999999
for (auto& loc : locs) {
@@ -1004,9 +1004,9 @@ Box* PythonFrameIterator::fastLocalsToBoxedLocals() {
10041004
}
10051005

10061006
Box* v = e.type->deserializeFromFrame(vals);
1007-
// printf("%s: (pp id %ld) %p\n", p.first.c_str(), e._debug_pp_id, v);
1007+
// printf("%s: (pp id %ld) %p\n", p.first().c_str(), e._debug_pp_id, v);
10081008
assert(gc::isValidGCObject(v));
1009-
d->d[boxString(p.first)] = v;
1009+
d->d[boxString(p.first())] = v;
10101010
}
10111011
}
10121012
}

0 commit comments

Comments
 (0)