diff --git a/regression/cbmc-java/jsr1/edu/emory/mathcs/backport/java/util/concurrent/ConcurrentHashMap.class b/regression/cbmc-java/jsr1/edu/emory/mathcs/backport/java/util/concurrent/ConcurrentHashMap.class new file mode 100644 index 00000000000..3ada3700fc1 Binary files /dev/null and b/regression/cbmc-java/jsr1/edu/emory/mathcs/backport/java/util/concurrent/ConcurrentHashMap.class differ diff --git a/regression/cbmc-java/jsr1/test.desc b/regression/cbmc-java/jsr1/test.desc new file mode 100644 index 00000000000..abcbe1427a1 --- /dev/null +++ b/regression/cbmc-java/jsr1/test.desc @@ -0,0 +1,8 @@ +CORE +edu/emory/mathcs/backport/java/util/concurrent/ConcurrentHashMap.class +--show-goto-functions +^EXIT=0$ +^SIGNAL=0$ +-- +\\"statement\\" (\\"jsr\\") +\\"statement\\" (\\"ret\\") diff --git a/regression/cbmc-java/jsr2/test.class b/regression/cbmc-java/jsr2/test.class new file mode 100644 index 00000000000..326874579e6 Binary files /dev/null and b/regression/cbmc-java/jsr2/test.class differ diff --git a/regression/cbmc-java/jsr2/test.desc b/regression/cbmc-java/jsr2/test.desc new file mode 100644 index 00000000000..0fd8981318e --- /dev/null +++ b/regression/cbmc-java/jsr2/test.desc @@ -0,0 +1,13 @@ +CORE +test.class +--show-goto-functions +^EXIT=0$ +^SIGNAL=0$ +Labels: pc3 +Labels: pc6 +Labels: pc9 +Labels: pc12 +Labels: pc13 +-- +\\"statement\\" (\\"jsr\\") +\\"statement\\" (\\"ret\\") diff --git a/regression/cbmc-java/jsr2/test.j b/regression/cbmc-java/jsr2/test.j new file mode 100644 index 00000000000..cedcc40c97b --- /dev/null +++ b/regression/cbmc-java/jsr2/test.j @@ -0,0 +1,28 @@ +; This is Jasmin assembler syntax-- see http://jasmin.sourceforge.net/guide.html or apt-get install jasmin-sable + +.class public test +.super java/lang/Object + +; standard initializer +.method public ()V + aload_0 + invokenonvirtual java/lang/Object/()V + return +.end method + +.method public static main()V + .limit locals 4 + .limit stack 1 + goto runsrs +sr1: + astore_0 + ret 0 +runsrs: + jsr sr1 + jsr sr2 + return +sr2: + astore_3 + ret 3 +.end method + diff --git a/src/java_bytecode/java_bytecode_convert_method.cpp b/src/java_bytecode/java_bytecode_convert_method.cpp index 49b621aed89..dbe219d2b41 100644 --- a/src/java_bytecode/java_bytecode_convert_method.cpp +++ b/src/java_bytecode/java_bytecode_convert_method.cpp @@ -543,6 +543,9 @@ codet java_bytecode_convert_methodt::convert_instructions( address_mapt address_map; std::set targets; + std::vector jsr_ret_targets; + std::vector ret_instructions; + for(instructionst::const_iterator i_it=instructions.begin(); i_it!=instructions.end(); @@ -585,6 +588,15 @@ codet java_bytecode_convert_methodt::convert_instructions( targets.insert(target); a_entry.first->second.successors.push_back(target); + + if(i_it->statement=="jsr" || + i_it->statement=="jsr_w") + { + instructionst::const_iterator next=i_it; + assert(++next!=instructions.end() && "jsr without valid return address?"); + targets.insert(next->address); + jsr_ret_targets.push_back(next->address); + } } else if(i_it->statement=="tableswitch" || i_it->statement=="lookupswitch") @@ -604,6 +616,23 @@ codet java_bytecode_convert_methodt::convert_instructions( } } } + else if(i_it->statement=="ret") + { + // Finish these later, once we've seen all jsr instructions. + ret_instructions.push_back(i_it); + } + } + + // Draw edges from every `ret` to every `jsr` successor. + // Could do better with flow analysis to distinguish multiple subroutines within + // the same function. + for(const auto retinst : ret_instructions) + { + auto& a_entry=address_map.at(retinst->address); + a_entry.successors.insert( + a_entry.successors.end(), + jsr_ret_targets.begin(), + jsr_ret_targets.end()); } for(address_mapt::iterator @@ -925,6 +954,48 @@ codet java_bytecode_convert_methodt::convert_instructions( code_gotot code_goto(label(number)); c=code_goto; } + else if(statement=="jsr" || statement=="jsr_w") + { + // As 'goto', except we must also push the subroutine return address: + assert(op.empty() && results.size()==1); + irep_idt number=to_constant_expr(arg0).get_value(); + code_gotot code_goto(label(number)); + c=code_goto; + results[0]=as_number( + std::next(i_it)->address, + pointer_typet(void_typet(), 64)); + } + else if(statement=="ret") + { + // Since we have a bounded target set, make life easier on our analyses + // and write something like: + // if(retaddr==5) goto 5; else if(retaddr==10) goto 10; ... + assert(op.empty() && results.empty()); + code_blockt branches; + auto retvar=variable(arg0, 'a', i_it->address, INST_INDEX, NO_CAST); + assert(!jsr_ret_targets.empty()); + for(size_t idx=0, idxlim=jsr_ret_targets.size(); idx!=idxlim; ++idx) + { + irep_idt number=std::to_string(jsr_ret_targets[idx]); + code_gotot g(label(number)); + g.add_source_location()=i_it->source_location; + if(idx==idxlim-1) + branches.move_to_operands(g); + else + { + code_ifthenelset branch; + auto address_ptr=as_number( + jsr_ret_targets[idx], + pointer_typet(void_typet(), 64)); + branch.cond()=equal_exprt(retvar, address_ptr); + branch.cond().add_source_location()=i_it->source_location; + branch.then_case()=g; + branch.add_source_location()=i_it->source_location; + branches.move_to_operands(branch); + } + } + c=std::move(branches); + } else if(statement=="iconst_m1") { assert(results.size()==1); @@ -1530,8 +1601,19 @@ codet java_bytecode_convert_methodt::convert_instructions( else { c.make_block(); - forall_operands(o_it, more_code) - c.copy_to_operands(*o_it); + auto& last_statement=to_code_block(c).find_last_statement(); + if(last_statement.get_statement()==ID_goto) + { + // Insert stack twiddling before branch: + last_statement.make_block(); + last_statement.operands().insert( + last_statement.operands().begin(), + more_code.operands().begin(), + more_code.operands().end()); + } + else + forall_operands(o_it, more_code) + c.copy_to_operands(*o_it); } }