0.目的
接上篇,环境及版本等仍相同
invokestatic
的汇编实现- 参数传递
- 所谓方法解析(resolve)的内涵
1.invokestatic的汇编实现
先大概过一遍相关核心代码,这样便于与最终生成的汇编对照,以助于理解其逻辑:
hotspot/src/cpu/x86/vm/templateTable_x86_64.cpp
1
2
3
4
5
6
7
8
9
10
void TemplateTable::invokestatic(int byte_no) {
transition(vtos, vtos);
assert(byte_no == f1_byte, "use this argument");
// rbx寄存器用来接收invokestatic指向的Method*
prepare_invoke(byte_no, rbx); // get f1 Method*
// do the call
__ profile_call(rax);
__ profile_arguments_type(rax, rbx, r13, false);
__ jump_from_interpreted(rbx, rax);
}
接着看prepare_invoke,逻辑实现于load_invoke_cp_cache_entry:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
void TemplateTable::load_invoke_cp_cache_entry(int byte_no,
Register method,
Register itable_index,
Register flags,
bool is_invokevirtual,
bool is_invokevfinal, /*unused*/
bool is_invokedynamic) {
const Register cache = rcx;
const Register index = rdx;
......
// 这个地方offset的计算稍微有点绕,需要关注一下ConstantPool,ConstantPoolCache和
// ConstantPoolCacheEntry类的定义及相互关系。其中ConstantPoolCache的对象分配allocate
// 方法使用了C++ placement new语义,也就是说提前分配了ConstantPoolCache对象+length*sizeof(ConstantPoolCacheEntry)
// 长度的内存,然后返回了ConstantPoolCache*。此后通过ConstantPoolCache*就可以以下标方式访问ConstantPoolCacheEntry
// 元素了.
// 此外Address(cache, index, Address::times_ptr, method_offset) => mov 0x18(%rcx,%rdx,8),%rbx
// 相当于说是先通过下标到达大概位置,然后再加上偏移量。而直觉计算是先加上偏移量到达ConstantPoolCacheEntry
// 数组头部,然后再通过下标索引
const int method_offset = in_bytes(
ConstantPoolCache::base_offset() +
((byte_no == f2_byte)
? ConstantPoolCacheEntry::f2_offset()
: ConstantPoolCacheEntry::f1_offset()));
const int flags_offset = in_bytes(ConstantPoolCache::base_offset() +
ConstantPoolCacheEntry::flags_offset());
// access constant pool cache fields
const int index_offset = in_bytes(ConstantPoolCache::base_offset() +
ConstantPoolCacheEntry::f2_offset());
size_t index_size = (is_invokedynamic ? sizeof(u4) : sizeof(u2));
resolve_cache_and_index(byte_no, cache, index, index_size);
// 编译为mov 0x18(%rcx,%rdx,8),%rbx,因为rbx作为入参来接收method
__ movptr(method, Address(cache, index, Address::times_ptr, method_offset));
if (itable_index != noreg) {
// pick up itable or appendix index from f2 also:
__ movptr(itable_index, Address(cache, index, Address::times_ptr, index_offset));
}
__ movl(flags, Address(cache, index, Address::times_ptr, flags_offset));
}
再看resolve_cache_and_index
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
void TemplateTable::resolve_cache_and_index(int byte_no,
Register Rcache,
Register index,
size_t index_size) {
const Register temp = rbx;
Label resolved;
// 路径1: 通过ConstantPoolCache返回
__ get_cache_and_index_and_bytecode_at_bcp(Rcache, index, temp, byte_no, 1, index_size);
// 0xb8即为ByteCodes::_invokestatic
__ cmpl(temp, (int) bytecode()); // 汇编结果:cmp $0xb8,%ebx
// 如果已经完成解析(resolve),那么直接跳到方法末尾
__ jcc(Assembler::equal, resolved);
// 路径2: 方法解析
// 首次调用,需要完成方法解析,并将Method* 放入ConstantPoolCache相应下标位置保存
address entry;
switch (bytecode()) {
case Bytecodes::_getstatic:
case Bytecodes::_putstatic:
case Bytecodes::_getfield:
case Bytecodes::_putfield:
entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_get_put);
break;
case Bytecodes::_invokevirtual:
case Bytecodes::_invokespecial:
case Bytecodes::_invokestatic:
case Bytecodes::_invokeinterface:
entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invoke);
break;
......
default:
fatal(err_msg("unexpected bytecode: %s", Bytecodes::name(bytecode())));
break;
}
__ movl(temp, (int) bytecode());
__ call_VM(noreg, entry, temp);
// Update registers with resolved info
__ get_cache_and_index_at_bcp(Rcache, index, 1, index_size);
__ bind(resolved);
}
在看路径1: __ get_cache_and_index_and_bytecode_at_bcp
hotspot/src/cpu/x86/vm/interp_masm_x86_64.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
void InterpreterMacroAssembler::get_cache_and_index_and_bytecode_at_bcp(Register cache,
Register index,
Register bytecode,
int byte_no,
int bcp_offset,
size_t index_size) {
// 取得ConstantPoolCache指针与invokestatic对应方法常量池索引
get_cache_and_index_at_bcp(cache, index, bcp_offset, index_size);
// 从ConstantPoolCache后续ConstantPoolCacheEntry数组下标处取得bytecode, 如果此前完成解析则最终应为0xb8(invokestatic)
movl(bytecode, Address(cache, index, Address::times_ptr, ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::indices_offset()));
const int shift_count = (1 + byte_no) * BitsPerByte;
shrl(bytecode, shift_count);
andl(bytecode, ConstantPoolCacheEntry::bytecode_1_mask);
}
先看get_cache_and_index_at_bcp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
void InterpreterMacroAssembler::get_cache_and_index_at_bcp(Register cache,
Register index,
int bcp_offset,
size_t index_size) {
get_cache_index_at_bcp(index, bcp_offset, index_size);
movptr(cache, Address(rbp, frame::interpreter_frame_cache_offset * wordSize));
shll(index, 2);
}
void InterpreterMacroAssembler::get_cache_index_at_bcp(Register index,
int bcp_offset,
size_t index_size) {
if (index_size == sizeof(u2)) {
// 从字节码流中取得short型数值,因为invokestatic后面紧跟着两个字节的方法常量池索引
load_unsigned_short(index, Address(r13, bcp_offset));
} else if (index_size == sizeof(u4)) {
...
} else if (index_size == sizeof(u1)) {
load_unsigned_byte(index, Address(r13, bcp_offset));
}
}
再看一下ConstantPoolCacheEntry定义,b1字段在方法完成解析后,即保存了字节码定义值(此处为0xb8),若为解析则应该为0,所以可以通过简单的__ cmpl(temp, (int) bytecode())
来判断是否完成解析。我们还需要再看一下路径2才能完全明白方法解析内涵。
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
// A ConstantPoolCacheEntry describes an individual entry of the constant
// pool cache. There's 2 principal kinds of entries: field entries for in-
// stance & static field access, and method entries for invokes. Some of
// the entry layout is shared and looks as follows:
//
// bit number |31 0|
// bit length |-8--|-8--|---16----|
// --------------------------------
// _indices [ b2 | b1 | index ] index = constant_pool_index
// _f1 [ entry specific ] metadata ptr (method or klass)
// _f2 [ entry specific ] vtable or res_ref index, or vfinal method ptr
// _flags [tos|0|F=1|0|0|0|f|v|0 |0000|field_index] (for field entries)
// bit length [ 4 |1| 1 |1|1|1|1|1|1 |-4--|----16-----]
// _flags [tos|0|F=0|M|A|I|f|0|vf|0000|00000|psize] (for method entries)
// bit length [ 4 |1| 1 |1|1|1|1|1|1 |-4--|--8--|--8--]
回看路径2: 方法解析
1
entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invoke);
hotspot/src/share/vm/interpreter/interpreterRuntime.cpp
其实方法解析到底是什么,就是通过常量池信息,回溯其定义类及基类,最终找到该方法精确定义位置(比如要实现多态调用等)并返回其在jvm内部的Method*。这一部分内容就和类的加载和解析关联起来,不详述。不过在resolve_invoke返回前,做了一件重要的事情
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
switch (info.call_kind()) {
case CallInfo::direct_call:
cache_entry(thread)->set_direct_call(
bytecode,
info.resolved_method());
break;
case CallInfo::vtable_call:
cache_entry(thread)->set_vtable_call(
bytecode,
info.resolved_method(),
info.vtable_index());
break;
......
default: ShouldNotReachHere();
}
hotspot/src/share/vm/oops/cpCache.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
void ConstantPoolCacheEntry::set_direct_or_vtable_call(Bytecodes::Code invoke_code,
methodHandle method,
int vtable_index) {
switch (invoke_code) {
......
case Bytecodes::_invokestatic:
set_method_flags(as_TosState(method->result_type()),
((is_vfinal() ? 1 : 0) << is_vfinal_shift) |
((method->is_final_method() ? 1 : 0) << is_final_shift),
method()->size_of_parameters());
set_f1(method());
byte_no = 1;
break;
default:
ShouldNotReachHere();
break;
}
if (byte_no == 1) {
set_bytecode_1(invoke_code);
}
......
}
void set_f1(Metadata* f1) {
Metadata* existing_f1 = (Metadata*)_f1; // read once
assert(existing_f1 == NULL || existing_f1 == f1, "illegal field change");
_f1 = f1;
}
void ConstantPoolCacheEntry::set_bytecode_1(Bytecodes::Code code) {
OrderAccess::release_store_ptr(&_indices, _indices | ((u_char)code << bytecode_1_shift));
}
再去看一下ConstantPoolCacheEntry注释,即明白set_f1完成了静态类型方法Method*设置,set_bytecode_1在b1位置保存了字节码定义值。此后即不需要再次解析,从ConstantPoolCache返回即可。这也就是方法解析及调用机制的内涵。
2.关键源码与汇编比照
下图是源码汇编实现与最终编译结果关联比照:
3.测试代码
1
2
3
4
5
6
7
8
9
10
public class InterpretStatic {
public static void main(String[] args) {
InterpretStatic.int_static_test_method();
}
public static int int_static_test_method() {
int i = 1;
return i;
}
}
关键信息:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
public static void main(java.lang.String[]);
descriptor: ([Ljava/lang/String;)V
flags: ACC_PUBLIC, ACC_STATIC
Code:
stack=1, locals=1, args_size=1
0: invokestatic #2 // Method int_static_test_method:()I
3: pop
4: return
Constant pool:
#1 = Methodref #4.#15 // java/lang/Object."<init>":()V
#2 = Methodref #3.#16 // InterpretStatic.int_static_test_method:()I
#3 = Class #17 // InterpretStatic
#4 = Class #18 // java/lang/Object
#5 = Utf8 <init>
#6 = Utf8 ()V
#7 = Utf8 Code
#8 = Utf8 LineNumberTable
#9 = Utf8 main
#10 = Utf8 ([Ljava/lang/String;)V
#11 = Utf8 int_static_test_method
#12 = Utf8 ()I
#13 = Utf8 SourceFile
#14 = Utf8 InterpretStatic.java
#15 = NameAndType #5:#6 // "<init>":()V
#16 = NameAndType #11:#12 // int_static_test_method:()I
#17 = Utf8 InterpretStatic
#18 = Utf8 java/lang/Object
4.debug过程
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
gdb /var/shared/openjdk/build/linux-x86_64-normal-server-slowdebug/jdk/bin/java
// 第一轮:保留入口点内存地址0x00007fffe1043690
run -XX:+PrintInterpreter InterpretStatic | grep -E -A 20 'invokestatic|zerolocals'
// jdk/src/share/bin/java.c
// 首先打且仅打该断点
b java.c:472 // breakpoint 1
run InterpretStatic
// 查验信息
p 'TemplateInterpreter::_active_table'.table_for(vtos)[184]
p 'AbstractInterpreter::_entry_table'[0] // zerolocals
// 再次断点
b *0x00007fffe1043690 // invokestatic vtos入口
// 查验信息
p (unsigned char)*($r13)
p (unsigned char)*($r13+1)
p (unsigned char)*($r13+2)
p (unsigned char)*($r13+3)
p (unsigned char)*($r13+4)
// 再次断点,即将进入解释器入口zerolocals
b *0x00007fffe1043950 // jmpq *0x58(%rbx)
p ((Method*)($rbx))->name() // int_static_test_method
图示1:
图示2:
5.栈帧
在1.invokestatic的汇编实现中出现实现过程中,有两处涉及帧偏移量的引用:
1
2
3
4
5
// 代码具体位置可自行搜索
movptr(Address(rbp, frame::interpreter_frame_bcx_offset * wordSize), r13);
// 在invokestatic实现中,该行代码意味着即将调用一个静态方法时,需要从当前方法帧获取常量池指针,以进一步拿到静态方法信息
// rbp是当前栈帧基址
movptr(cache, Address(rbp, frame::interpreter_frame_cache_offset * wordSize));
那么上面的frame::interpreter_frame_***_offset是什么呢?hotspot/src/cpu/x86/vm/frame_x86.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
enum {
pc_return_offset = 0,
// All frames
link_offset = 0,
return_addr_offset = 1,
// non-interpreter frames
sender_sp_offset = 2,
// Interpreter frames
interpreter_frame_result_handler_offset = 3, // for native calls only
interpreter_frame_oop_temp_offset = 2, // for native calls only
interpreter_frame_sender_sp_offset = -1,
// outgoing sp before a call to an invoked method
interpreter_frame_last_sp_offset = interpreter_frame_sender_sp_offset - 1,
interpreter_frame_method_offset = interpreter_frame_last_sp_offset - 1,
interpreter_frame_mdx_offset = interpreter_frame_method_offset - 1,
interpreter_frame_cache_offset = interpreter_frame_mdx_offset - 1,
interpreter_frame_locals_offset = interpreter_frame_cache_offset - 1,
interpreter_frame_bcx_offset = interpreter_frame_locals_offset - 1,
interpreter_frame_initial_sp_offset = interpreter_frame_bcx_offset - 1,
interpreter_frame_monitor_block_top_offset = interpreter_frame_initial_sp_offset,
interpreter_frame_monitor_block_bottom_offset = interpreter_frame_initial_sp_offset,
......
};
调用参数由当前方法传递,而被调用java静态方法栈帧由zerolocals类型方法解释器入口generate_normal_entry准备,更准确的说其中固定部分由generate_fixed_frame准备。我们可以分析上面frame::_offset定义与generate_fixed_frame对应关系,如下图:
根据上面的分析,我们可以看出,不论是从C++进入java main方法(上一篇博文),还是invokestatic之类的字节码形式方法调用,最终殊途同归,都进入到相应类型入口generate_normal_entry,帧也必然相同。
6.栈帧的简单验证
简单查验传参,方法内局部变量在栈内情况
6.1 验证程序
1
2
3
4
5
6
7
8
9
10
11
12
public class InterpretStatic {
public static void main(String[] args) {
int i = 1;
int j = 2;
InterpretStatic.int_static_test_method(i, j);
}
public static int int_static_test_method(int i, int j) {
int a = i+j;
return a;
}
}
编译结果:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
public static void main(java.lang.String[]);
descriptor: ([Ljava/lang/String;)V
flags: ACC_PUBLIC, ACC_STATIC
Code:
stack=2, locals=3, args_size=1
0: iconst_1
1: istore_1
2: iconst_2
3: istore_2
4: iload_1
5: iload_2
6: invokestatic #2 // Method int_static_test_method:(II)I
9: pop
10: return
public static int int_static_test_method(int, int);
descriptor: (II)I
flags: ACC_PUBLIC, ACC_STATIC
Code:
stack=2, locals=3, args_size=2
0: iload_0
1: iload_1
2: iadd
3: istore_2
4: iload_2
5: ireturn
6.2 栈帧分析
6.3 debug过程
思路:查验栈帧中入参和局部变量在执行过程的变化
- 在main方法即将执行int_static_test_method时,方法入参情况
- 进入int_static_test_method后,执行完istore_2,局部变量a完成赋值,查验该处内存
- 断点打在zerolocals和iload_2入口处即可
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
gdb /var/shared/openjdk/build/linux-x86_64-normal-server-slowdebug/jdk/bin/java
// 第一轮:保留入口点内存地址0x00007fffe1043690
run -XX:+PrintInterpreter InterpretStatic | grep -E -A 2000 'invokestatic|zerolocals' > static.log
run -XX:+PrintInterpreter InterpretStatic | grep -E -A 20 'invokestatic|zerolocals'
// jdk/src/share/bin/java.c
// 首先打且仅打该断点
b java.c:472 // breakpoint 1
run InterpretStatic
// 查验信息
p 'TemplateInterpreter::_active_table'.table_for(vtos)[184]
p 'AbstractInterpreter::_entry_table'[0] // zerolocals 0x00007fffe101e2e0
// 再次断点
b *0x00007fffe1043690 // invokestatic vtos入口
// 查验信息
p (unsigned char)*($r13) // invokestatic
p (unsigned char)*($r13+1)
p (unsigned char)*($r13+2)
p (unsigned char)*($r13+3) // pop
p (unsigned char)*($r13+4) // return
// 再次断点,即将进入解释器入口zerolocals
b *0x00007fffe101e2e0 // zerolocals
p ((Method*)($rbx))->name() // int_static_test_method
p $37->as_C_string()
$38 = 0x7ffff00097e8 "int_static_test_method"
// 单步执行到0x7fffe101e443处:test %edx,%edx 即将分配方法内局部变量
// 此时验证入参
p /x $rsp
$39 = 0x7ffff7fe9728
p *0x7ffff7fe9728
$40 = 2
p *(0x7ffff7fe9728+8)
$41 = 1
p *(0x7ffff7fe9728-8)
$42 = 0
p 'TemplateInterpreter::_active_table'.table_for(vtos)[28] // iload_2 0x7fffe102d8f0
b *0x7fffe102d8f0 // iload_2 vtos入口
p *(0x7ffff7fe9720) // int_static_test_method局部变量a地址
$44 = 3
6.4 方法退出时栈帧变化
6.4.1 代码分析
当方法int_static_test_method通过invokestatic在main函数内执行完毕时,栈是什么样子的呢?我们可以分析进入解释执行int_static_test_method时以及方法最后一个字节码ireturn对栈帧做了哪些操作。其中有两处关键:
-
generate_fixed_frame
1 2 3 4 5 6 7 8 9 10
void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) { // initialize fixed part of activation frame __ push(rax); // save return address __ enter(); // save old & set new rbp __ push(r13); // set sender sp __ push((int)NULL_WORD); // leave last_sp as null __ movptr(r13, Address(rbx, Method::const_offset())); // get ConstMethod* __ lea(r13, Address(r13, ConstMethod::codes_offset())); // get codebase __ push(rbx); // save Method* ......
-
ireturn字节码实现TemplateTable::_return
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
__ remove_activation(state, r13); __ jmp(r13); // ---------------------------------------------- // __ remove_activation // remove activation // get sender sp movptr(rbx, Address(rbp, frame::interpreter_frame_sender_sp_offset * wordSize)); leave(); // remove frame anchor pop(ret_addr); // get return address mov(rsp, rbx); // set sp to sender sp // 上述代码编译后 0x00007fffe103dda4: mov (%rsp),%eax 0x00007fffe103dda7: add $0x8,%rsp 0x00007fffe103ddab: mov -0x8(%rbp),%rbx 0x00007fffe103ddaf: leaveq 0x00007fffe103ddb0: pop %r13 0x00007fffe103ddb2: mov %rbx,%rsp 0x00007fffe103ddb5: jmpq *%r13
上面__ push(r13);
与mov(rsp, rbx);
是对应的,一个保存,一个恢复,图示:
入参由调用方准备,因此通过ireturn返回后,回到栈帧最后一个入参顶部是符合直觉逻辑的,中间部分全部销毁。
6.4.2 debug验证
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
gdb /var/shared/openjdk/build/linux-x86_64-normal-server-slowdebug/jdk/bin/java
// 第一轮:保留入口点内存地址0x00007fffe1043690
// 生成并保留汇编代码,以便于debug及断点
run -XX:+PrintInterpreter InterpretStatic | grep -E -A 3000 'invokestatic|zerolocals|ireturn' > frame.log
// jdk/src/share/bin/java.c
// 首先打且仅打该断点
b java.c:472 // breakpoint 1
run InterpretStatic
// 查验信息
p 'TemplateInterpreter::_active_table'.table_for(itos)[172] // ireturn 0x7fffe103d487
// 再次断点
b *0x00007fffe1043690 // invokestatic vtos入口
// 再次断点,即将进入解释器入口zerolocals
b *0x00007fffe101e2e0 // zerolocals
// 单步执行到0x7fffe101e443处:test %edx,%edx 即将分配方法内局部变量
// 此时验证保存的send sp
p /x $r13
$25 = 0x7ffff7fe9728
p *0x7ffff7fe9728
$26 = 2
p *(0x7ffff7fe9728+8)
$27 = 1
// ireturn返回时恢复栈帧
b *0x00007fffe103ddb5
(gdb) p /x $r13
$28 = 0x7fffe10070f0
(gdb) p /x $rsp
$29 = 0x7ffff7fe9728
(gdb) p /x *0x7ffff7fe9728
$31 = 0x2
方法栈帧准备:
ireturn退出恢复:
所以,所谓sender sp,可以理解为来者(调用方) stack pointer,即为入参栈顶