Skip to content

Commit

Permalink
Merge pull request #1335 from undingen/minor_mem
Browse files Browse the repository at this point in the history
Misc memory size reductions
  • Loading branch information
undingen authored Aug 22, 2016
2 parents 7c1ee4c + 8481459 commit 0324606
Show file tree
Hide file tree
Showing 18 changed files with 211 additions and 207 deletions.
13 changes: 3 additions & 10 deletions src/analysis/function_analysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -549,22 +549,15 @@ std::unique_ptr<PhiAnalysis> computeRequiredPhis(const ParamNames& args, CFG* cf
initial_map[vreg] = DefinednessAnalysis::Undefined;
}

auto maybe_add = [&](AST_Name* n) {
for (AST_Name* n : args.allArgsAsName()) {
ScopeInfo::VarScopeType vst = n->lookup_type;
assert(vst != ScopeInfo::VarScopeType::UNKNOWN);
assert(vst != ScopeInfo::VarScopeType::GLOBAL); // global-and-local error
if (vst == ScopeInfo::VarScopeType::NAME)
return;
continue;
assert(n->vreg >= 0);
initial_map[n->vreg] = DefinednessAnalysis::Defined;
};

for (auto e : args.arg_names)
maybe_add(e);
if (args.vararg_name)
maybe_add(args.vararg_name);
if (args.kwarg_name)
maybe_add(args.kwarg_name);
}

assert(initial_map.numVregs() == vreg_info.getTotalNumOfVRegs());

Expand Down
23 changes: 4 additions & 19 deletions src/analysis/type_analysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -859,30 +859,15 @@ TypeAnalysis* doTypeAnalysis(CFG* cfg, const ParamNames& arg_names, const std::v

TypeMap initial_types(cfg->getVRegInfo().getTotalNumOfVRegs());
int i = 0;

auto maybe_add = [&](AST_Name* n) {
for (AST_Name* n : arg_names.allArgsAsName()) {
ScopeInfo::VarScopeType vst = n->lookup_type;
assert(vst != ScopeInfo::VarScopeType::UNKNOWN);
assert(vst != ScopeInfo::VarScopeType::GLOBAL); // global-and-local error
if (vst == ScopeInfo::VarScopeType::NAME)
return;
initial_types[n->vreg] = unboxedType(arg_types[i]);
if (vst != ScopeInfo::VarScopeType::NAME)
initial_types[n->vreg] = unboxedType(arg_types[i]);
++i;
};

for (; i < arg_names.args.size(); i++) {
maybe_add(arg_names.arg_names[i]);
}

if (arg_names.vararg.size()) {
maybe_add(arg_names.vararg_name);
i++;
}

if (arg_names.kwarg.size()) {
maybe_add(arg_names.kwarg_name);
i++;
}

assert(i == arg_types.size());

return PropagatingTypeAnalysis::doAnalysis(speculation, scope_info, std::move(initial_types),
Expand Down
36 changes: 22 additions & 14 deletions src/asm_writing/icinfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -252,15 +252,19 @@ void ICSlotRewrite::addDependenceOn(ICInvalidator& invalidator) {
int ICInfo::calculateSuggestedSize() {
// if we never rewrote this IC just return the whole IC size for now
if (!times_rewritten)
return slots[0].size;
return slots.begin()->size;

int additional_space_per_slot = 50;
// if there are less rewrites than slots we can give a very accurate estimate
if (times_rewritten < slots.size()) {
// add up the sizes of all used slots
int size = 0;
for (int i = 0; i < times_rewritten; ++i) {
size += slots[i].size + additional_space_per_slot;
int i = 0;
for (auto&& slot : slots) {
if (i >= times_rewritten)
break;
size += slot.size + additional_space_per_slot;
++i;
}
return size;
}
Expand All @@ -286,18 +290,23 @@ ICSlotInfo* ICInfo::pickEntryForRewrite(const char* debug_name) {
int num_slots = slots.size();
int fallback_to_in_use_slot = -1;

llvm::SmallVector<ICSlotInfo*, 8> slots_vec;
for (auto&& slot : slots) {
slots_vec.push_back(&slot);
}

// we prefer to use a unused slot and if non is available we will fallback to a slot which is in use (but no one is
// inside)
for (int _i = 0; _i < num_slots; _i++) {
int i = (_i + next_slot_to_try) % num_slots;

ICSlotInfo& sinfo = slots[i];
assert(sinfo.num_inside >= 0);
ICSlotInfo* sinfo = slots_vec[i];
assert(sinfo->num_inside >= 0);

if (sinfo.num_inside || sinfo.size == 0)
if (sinfo->num_inside || sinfo->size == 0)
continue;

if (sinfo.used) {
if (sinfo->used) {
if (fallback_to_in_use_slot == -1)
fallback_to_in_use_slot = i;
continue;
Expand All @@ -308,7 +317,7 @@ ICSlotInfo* ICInfo::pickEntryForRewrite(const char* debug_name) {
}

next_slot_to_try = i;
return &sinfo;
return sinfo;
}

if (fallback_to_in_use_slot != -1) {
Expand All @@ -317,7 +326,7 @@ ICSlotInfo* ICInfo::pickEntryForRewrite(const char* debug_name) {
}

next_slot_to_try = fallback_to_in_use_slot;
return &slots[fallback_to_in_use_slot];
return slots_vec[fallback_to_in_use_slot];
}

if (VERBOSITY() >= 4)
Expand Down Expand Up @@ -357,7 +366,6 @@ ICInfo::~ICInfo() {
ics_by_return_addr.erase(slowpath_rtn_addr);
if (node)
ics_by_ast_node.erase(node);
deregisterGCTrackedICInfo(this);

for (auto& slot : slots) {
// Calling a full clear() might be overkill here, but probably better safe than sorry:
Expand Down Expand Up @@ -413,8 +421,6 @@ std::unique_ptr<ICInfo> registerCompiledPatchpoint(uint8_t* start_addr, uint8_t*
assert(!ics_by_return_addr.count(slowpath_rtn_addr));
ics_by_return_addr[slowpath_rtn_addr] = icinfo;

registerGCTrackedICInfo(icinfo);

return std::unique_ptr<ICInfo>(icinfo);
}

Expand Down Expand Up @@ -447,11 +453,13 @@ void ICInfo::invalidate(ICSlotInfo* icentry) {

llvm::sys::Memory::InvalidateInstructionCache(start, icentry->size);

for (int i = 0; i < slots.size(); ++i) {
if (&slots[i] == icentry) {
int i = 0;
for (auto&& slot : slots) {
if (&slot == icentry) {
next_slot_to_try = i;
break;
}
++i;
}

icentry->used = false;
Expand Down
19 changes: 8 additions & 11 deletions src/asm_writing/icinfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,13 @@
#ifndef PYSTON_ASMWRITING_ICINFO_H
#define PYSTON_ASMWRITING_ICINFO_H

#include <deque>
#include <list>
#include <memory>
#include <unordered_set>
#include <vector>

#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/TinyPtrVector.h"
#include "llvm/IR/CallingConv.h"

#include "asm_writing/assembler.h"
Expand Down Expand Up @@ -62,15 +63,16 @@ struct ICSlotInfo {

ICInfo* ic;
uint8_t* start_addr;

std::vector<void*> gc_references;
std::vector<DecrefInfo> decref_infos;
llvm::TinyPtrVector<ICInvalidator*> invalidators; // ICInvalidators that reference this slotinfo

int num_inside; // the number of stack frames that are currently inside this slot will also get increased during a
// rewrite
int size;
bool used; // if this slot is empty or got invalidated

std::vector<void*> gc_references;
std::vector<DecrefInfo> decref_infos;
std::vector<ICInvalidator*> invalidators; // ICInvalidators that reference this slotinfo

void clear(bool should_invalidate = true);
};

Expand All @@ -79,7 +81,7 @@ typedef BitSet<16> LiveOutSet;
class ICSlotRewrite;
class ICInfo {
private:
std::deque<ICSlotInfo> slots;
std::list<ICSlotInfo> slots;
// For now, just use a round-robin eviction policy.
// This is probably a bunch worse than LRU, but it's also
// probably a bunch better than the "always evict slot #0" policy
Expand Down Expand Up @@ -196,11 +198,6 @@ class ICSlotRewrite {
friend class ICInfo;
};

inline void registerGCTrackedICInfo(ICInfo* ic) {
}
inline void deregisterGCTrackedICInfo(ICInfo* ic) {
}

class ICSetupInfo;
struct CompiledFunction;
std::unique_ptr<ICInfo>
Expand Down
35 changes: 13 additions & 22 deletions src/codegen/ast_interpreter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ class ASTInterpreter {
static Box* executeInner(ASTInterpreter& interpreter, CFGBlock* start_block, AST_stmt* start_at);

private:
Value createFunction(AST* node, AST_arguments* args, const std::vector<AST_stmt*>& body);
Value createFunction(AST* node, AST_arguments* args);
Value doBinOp(AST_expr* node, Value left, Value right, int op, BinExpType exp_type);
void doStore(AST_expr* node, STOLEN(Value) value);
void doStore(AST_Name* name, STOLEN(Value) value);
Expand Down Expand Up @@ -272,26 +272,21 @@ void ASTInterpreter::initArguments(BoxedClosure* _closure, BoxedGenerator* _gene

const ParamNames& param_names = getMD()->param_names;

// make sure the AST_Name nodes are set
assert(param_names.args.size() == param_names.arg_names.size());
assert(param_names.vararg.empty() == (param_names.vararg_name == NULL));
assert(param_names.kwarg.empty() == (param_names.kwarg_name == NULL));

int i = 0;
for (auto& name : param_names.arg_names) {
for (auto& name : param_names.argsAsName()) {
doStore(name, Value(incref(getArg(i++, arg1, arg2, arg3, args)), 0));
}

if (param_names.vararg_name)
doStore(param_names.vararg_name, Value(incref(getArg(i++, arg1, arg2, arg3, args)), 0));
if (param_names.has_vararg_name)
doStore(param_names.varArgAsName(), Value(incref(getArg(i++, arg1, arg2, arg3, args)), 0));

if (param_names.kwarg_name) {
if (param_names.has_kwarg_name) {
Box* val = getArg(i++, arg1, arg2, arg3, args);
if (!val)
val = createDict();
else
Py_INCREF(val);
doStore(param_names.kwarg_name, Value(val, 0));
doStore(param_names.kwArgAsName(), Value(val, 0));
}
assert(i == param_names.totalParameters());
}
Expand All @@ -314,13 +309,9 @@ void ASTInterpreter::startJITing(CFGBlock* block, int exit_offset, llvm::DenseSe
// small optimization: we know that the passed arguments in the entry block are non zero
if (block == block->cfg->getStartingBlock() && block->predecessors.empty()) {
auto param_names = getMD()->param_names;
for (auto&& arg : param_names.arg_names) {
for (auto&& arg : param_names.allArgsAsName()) {
known_non_null_vregs.insert(arg->vreg);
}
if (param_names.vararg_name)
known_non_null_vregs.insert(param_names.vararg_name->vreg);
if (param_names.kwarg_name)
known_non_null_vregs.insert(param_names.kwarg_name->vreg);
}
jit = code_block->newFragment(block, exit_offset, std::move(known_non_null_vregs));
}
Expand Down Expand Up @@ -1131,8 +1122,8 @@ Value ASTInterpreter::visit_return(AST_Return* node) {
return s;
}

Value ASTInterpreter::createFunction(AST* node, AST_arguments* args, const std::vector<AST_stmt*>& body) {
FunctionMetadata* md = wrapFunction(node, args, body, source_info);
Value ASTInterpreter::createFunction(AST* node, AST_arguments* args) {
FunctionMetadata* md = wrapFunction(node, args, source_info);

std::vector<Box*> defaults;
llvm::SmallVector<RewriterVar*, 4> defaults_vars;
Expand Down Expand Up @@ -1232,7 +1223,7 @@ Value ASTInterpreter::visit_makeFunction(AST_MakeFunction* mkfn) {
for (AST_expr* d : node->decorator_list)
decorators.push_back(visit_expr(d));

Value func = createFunction(node, args, node->body);
Value func = createFunction(node, args);

for (int i = decorators.size() - 1; i >= 0; i--) {
func.o = runtimeCall(autoDecref(decorators[i].o), ArgPassSpec(1), autoDecref(func.o), 0, 0, 0, 0);
Expand Down Expand Up @@ -1271,7 +1262,7 @@ Value ASTInterpreter::visit_makeClass(AST_MakeClass* mkclass) {
closure = created_closure;
assert(closure);
}
FunctionMetadata* md = wrapFunction(node, nullptr, node->body, source_info);
FunctionMetadata* md = wrapFunction(node, nullptr, source_info);

Box* passed_globals = NULL;
if (!getMD()->source->scoping->areGlobalsFromModule())
Expand Down Expand Up @@ -1553,7 +1544,7 @@ Value ASTInterpreter::visit_call(AST_Call* node) {

AUTO_DECREF(func.o);

std::vector<Box*> args;
llvm::SmallVector<Box*, 8> args;
llvm::SmallVector<RewriterVar*, 8> args_vars;
args.reserve(node->args.size());
args_vars.reserve(node->args.size());
Expand Down Expand Up @@ -1586,7 +1577,7 @@ Value ASTInterpreter::visit_call(AST_Call* node) {
args_vars.push_back(v);
}

AUTO_DECREF_ARRAY(&args[0], args.size());
AUTO_DECREF_ARRAY(args.data(), args.size());

ArgPassSpec argspec(node->args.size(), node->keywords.size(), node->starargs, node->kwargs);

Expand Down
8 changes: 0 additions & 8 deletions src/codegen/baseline_jit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -882,14 +882,6 @@ std::pair<int, llvm::DenseSet<int>> JitFragmentWriter::finishCompilation() {
code_block.fragmentFinished(assembler->bytesWritten(), num_bytes_overlapping, next_fragment_start,
std::move(ic_infos), *ic_info);

#if MOVING_GC
// If JitFragmentWriter is destroyed, we don't necessarily want the ICInfo to be destroyed also,
// because it may contain a list of references to pointers in generated code that still exists
// and we need to keep those around.
// TODO: When should these ICInfo be freed?
registerGCTrackedICInfo(ic_info.release());
#endif

return std::make_pair(exit_info.num_bytes, std::move(known_non_null_vregs));
}

Expand Down
2 changes: 1 addition & 1 deletion src/codegen/baseline_jit.h
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ class JitFragmentWriter;
class JitCodeBlock {
public:
static constexpr int scratch_size = 256;
static constexpr int memory_size = 32768; // must fit the EH frame + generated code
static constexpr int memory_size = 6 * 4096; // must fit the EH frame + generated code
static constexpr int num_stack_args = 2;

// scratch size + space for passing additional args on the stack without having to adjust the SP when calling
Expand Down
17 changes: 8 additions & 9 deletions src/codegen/codegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,22 +40,22 @@ namespace pyston {
FunctionMetadata::FunctionMetadata(int num_args, bool takes_varargs, bool takes_kwargs,
std::unique_ptr<SourceInfo> source)
: code_obj(NULL),
num_args(num_args),
takes_varargs(takes_varargs),
takes_kwargs(takes_kwargs),
source(std::move(source)),
param_names(this->source->ast, this->source->getInternedStrings()),
takes_varargs(takes_varargs),
takes_kwargs(takes_kwargs),
num_args(num_args),
times_interpreted(0),
internal_callable(NULL, NULL) {
}

FunctionMetadata::FunctionMetadata(int num_args, bool takes_varargs, bool takes_kwargs, const ParamNames& param_names)
: code_obj(NULL),
num_args(num_args),
takes_varargs(takes_varargs),
takes_kwargs(takes_kwargs),
source(nullptr),
param_names(param_names),
takes_varargs(takes_varargs),
takes_kwargs(takes_kwargs),
num_args(num_args),
times_interpreted(0),
internal_callable(NULL, NULL) {
}
Expand Down Expand Up @@ -87,12 +87,12 @@ void FunctionMetadata::addVersion(CompiledFunction* compiled) {
assert(compiled->spec->arg_types.size() == numReceivedArgs());
versions.push_back(compiled);
} else {
osr_versions[compiled->entry_descriptor] = compiled;
osr_versions.emplace_front(compiled->entry_descriptor, compiled);
}
}

SourceInfo::SourceInfo(BoxedModule* m, ScopingAnalysis* scoping, FutureFlags future_flags, AST* ast, BoxedString* fn)
: parent_module(m), scoping(scoping), scope_info(NULL), future_flags(future_flags), ast(ast), cfg(NULL) {
: parent_module(m), scoping(scoping), scope_info(NULL), ast(ast), cfg(NULL), future_flags(future_flags) {
assert(fn);

// TODO: this is a very bad way of handling this:
Expand Down Expand Up @@ -272,7 +272,6 @@ llvm::JITEventListener* makeRegistryListener() {
return new RegistryEventListener();
}


FunctionSpecialization::FunctionSpecialization(ConcreteCompilerType* rtn_type) : rtn_type(rtn_type) {
accepts_all_inputs = true;
boxed_return_value = (rtn_type->llvmType() == UNKNOWN->llvmType());
Expand Down
Loading

0 comments on commit 0324606

Please sign in to comment.