单步调试java invokestatic在hotspot中的解释执行

Posted by My Blog on January 17, 2024

0.目的

接上篇,环境及版本等仍相同

  • invokestatic的汇编实现
  • 参数传递
  • 所谓方法解析(resolve)的内涵

1.invokestatic的汇编实现

先大概过一遍相关核心代码,这样便于与最终生成的汇编对照,以助于理解其逻辑:

hotspot/src/cpu/x86/vm/templateTable_x86_64.cpp

1
2
3
4
5
6
7
8
9
10
void TemplateTable::invokestatic(int byte_no) {
  transition(vtos, vtos);
  assert(byte_no == f1_byte, "use this argument");
  // rbx寄存器用来接收invokestatic指向的Method*
  prepare_invoke(byte_no, rbx);  // get f1 Method*
  // do the call
  __ profile_call(rax);
  __ profile_arguments_type(rax, rbx, r13, false);
  __ jump_from_interpreted(rbx, rax);
}

接着看prepare_invoke,逻辑实现于load_invoke_cp_cache_entry

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
void TemplateTable::load_invoke_cp_cache_entry(int byte_no,
                                               Register method,
                                               Register itable_index,
                                               Register flags,
                                               bool is_invokevirtual,
                                               bool is_invokevfinal, /*unused*/
                                               bool is_invokedynamic) {
  const Register cache = rcx;
  const Register index = rdx;
  ......
  // 这个地方offset的计算稍微有点绕,需要关注一下ConstantPool,ConstantPoolCache和
  // ConstantPoolCacheEntry类的定义及相互关系。其中ConstantPoolCache的对象分配allocate
  // 方法使用了C++ placement new语义,也就是说提前分配了ConstantPoolCache对象+length*sizeof(ConstantPoolCacheEntry)
  // 长度的内存,然后返回了ConstantPoolCache*。此后通过ConstantPoolCache*就可以以下标方式访问ConstantPoolCacheEntry
  // 元素了.
  // 此外Address(cache, index, Address::times_ptr, method_offset) => mov 0x18(%rcx,%rdx,8),%rbx
  // 相当于说是先通过下标到达大概位置,然后再加上偏移量。而直觉计算是先加上偏移量到达ConstantPoolCacheEntry
  // 数组头部,然后再通过下标索引
  const int method_offset = in_bytes(
    ConstantPoolCache::base_offset() +
      ((byte_no == f2_byte)
       ? ConstantPoolCacheEntry::f2_offset()
       : ConstantPoolCacheEntry::f1_offset()));
  const int flags_offset = in_bytes(ConstantPoolCache::base_offset() +
                                    ConstantPoolCacheEntry::flags_offset());
  // access constant pool cache fields
  const int index_offset = in_bytes(ConstantPoolCache::base_offset() +
                                    ConstantPoolCacheEntry::f2_offset());

  size_t index_size = (is_invokedynamic ? sizeof(u4) : sizeof(u2));
  resolve_cache_and_index(byte_no, cache, index, index_size);
  // 编译为mov 0x18(%rcx,%rdx,8),%rbx,因为rbx作为入参来接收method
    __ movptr(method, Address(cache, index, Address::times_ptr, method_offset));

  if (itable_index != noreg) {
    // pick up itable or appendix index from f2 also:
    __ movptr(itable_index, Address(cache, index, Address::times_ptr, index_offset));
  }
  __ movl(flags, Address(cache, index, Address::times_ptr, flags_offset));
}

再看resolve_cache_and_index

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
void TemplateTable::resolve_cache_and_index(int byte_no,
                                            Register Rcache,
                                            Register index,
                                            size_t index_size) {
  const Register temp = rbx;
  Label resolved;
  	// 路径1: 通过ConstantPoolCache返回
    __ get_cache_and_index_and_bytecode_at_bcp(Rcache, index, temp, byte_no, 1, index_size);
  	// 0xb8即为ByteCodes::_invokestatic
    __ cmpl(temp, (int) bytecode());  // 汇编结果:cmp    $0xb8,%ebx
    // 如果已经完成解析(resolve),那么直接跳到方法末尾
    __ jcc(Assembler::equal, resolved);

  // 路径2: 方法解析
  // 首次调用,需要完成方法解析,并将Method* 放入ConstantPoolCache相应下标位置保存
  address entry;
  switch (bytecode()) {
  case Bytecodes::_getstatic:
  case Bytecodes::_putstatic:
  case Bytecodes::_getfield:
  case Bytecodes::_putfield:
    entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_get_put);
    break;
  case Bytecodes::_invokevirtual:
  case Bytecodes::_invokespecial:
  case Bytecodes::_invokestatic:
  case Bytecodes::_invokeinterface:
    entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invoke);
    break;
  ......
  default:
    fatal(err_msg("unexpected bytecode: %s", Bytecodes::name(bytecode())));
    break;
  }
  __ movl(temp, (int) bytecode());
  __ call_VM(noreg, entry, temp);

  // Update registers with resolved info
  __ get_cache_and_index_at_bcp(Rcache, index, 1, index_size);
  __ bind(resolved);
}

在看路径1: __ get_cache_and_index_and_bytecode_at_bcp

hotspot/src/cpu/x86/vm/interp_masm_x86_64.cpp

1
2
3
4
5
6
7
8
9
10
11
12
13
14
void InterpreterMacroAssembler::get_cache_and_index_and_bytecode_at_bcp(Register cache,
                                                                        Register index,
                                                                        Register bytecode,
                                                                        int byte_no,
                                                                        int bcp_offset,
                                                                        size_t index_size) {
  // 取得ConstantPoolCache指针与invokestatic对应方法常量池索引
  get_cache_and_index_at_bcp(cache, index, bcp_offset, index_size);
  // 从ConstantPoolCache后续ConstantPoolCacheEntry数组下标处取得bytecode, 如果此前完成解析则最终应为0xb8(invokestatic)
  movl(bytecode, Address(cache, index, Address::times_ptr, ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::indices_offset()));
  const int shift_count = (1 + byte_no) * BitsPerByte;
  shrl(bytecode, shift_count);
  andl(bytecode, ConstantPoolCacheEntry::bytecode_1_mask);
}

先看get_cache_and_index_at_bcp

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
void InterpreterMacroAssembler::get_cache_and_index_at_bcp(Register cache,
                                                           Register index,
                                                           int bcp_offset,
                                                           size_t index_size) {
  get_cache_index_at_bcp(index, bcp_offset, index_size);
  movptr(cache, Address(rbp, frame::interpreter_frame_cache_offset * wordSize));
  shll(index, 2);
}

void InterpreterMacroAssembler::get_cache_index_at_bcp(Register index,
                                                       int bcp_offset,
                                                       size_t index_size) {
  if (index_size == sizeof(u2)) {
    // 从字节码流中取得short型数值,因为invokestatic后面紧跟着两个字节的方法常量池索引
    load_unsigned_short(index, Address(r13, bcp_offset));
  } else if (index_size == sizeof(u4)) {
    ...
  } else if (index_size == sizeof(u1)) {
    load_unsigned_byte(index, Address(r13, bcp_offset));
  } 
}

再看一下ConstantPoolCacheEntry定义,b1字段在方法完成解析后,即保存了字节码定义值(此处为0xb8),若为解析则应该为0,所以可以通过简单的__ cmpl(temp, (int) bytecode())来判断是否完成解析。我们还需要再看一下路径2才能完全明白方法解析内涵。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
// A ConstantPoolCacheEntry describes an individual entry of the constant
// pool cache. There's 2 principal kinds of entries: field entries for in-
// stance & static field access, and method entries for invokes. Some of
// the entry layout is shared and looks as follows:
//
// bit number |31                0|
// bit length |-8--|-8--|---16----|
// --------------------------------
// _indices   [ b2 | b1 |  index  ]  index = constant_pool_index
// _f1        [  entry specific   ]  metadata ptr (method or klass)
// _f2        [  entry specific   ]  vtable or res_ref index, or vfinal method ptr
// _flags     [tos|0|F=1|0|0|0|f|v|0 |0000|field_index] (for field entries)
// bit length [ 4 |1| 1 |1|1|1|1|1|1 |-4--|----16-----]
// _flags     [tos|0|F=0|M|A|I|f|0|vf|0000|00000|psize] (for method entries)
// bit length [ 4 |1| 1 |1|1|1|1|1|1 |-4--|--8--|--8--]

回看路径2: 方法解析

1
entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invoke);

hotspot/src/share/vm/interpreter/interpreterRuntime.cpp

其实方法解析到底是什么,就是通过常量池信息,回溯其定义类及基类,最终找到该方法精确定义位置(比如要实现多态调用等)并返回其在jvm内部的Method*。这一部分内容就和类的加载和解析关联起来,不详述。不过在resolve_invoke返回前,做了一件重要的事情

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
  switch (info.call_kind()) {
  case CallInfo::direct_call:
    cache_entry(thread)->set_direct_call(
      bytecode,
      info.resolved_method());
    break;
  case CallInfo::vtable_call:
    cache_entry(thread)->set_vtable_call(
      bytecode,
      info.resolved_method(),
      info.vtable_index());
    break;
    ......
  default:  ShouldNotReachHere();
  }

hotspot/src/share/vm/oops/cpCache.cpp

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
void ConstantPoolCacheEntry::set_direct_or_vtable_call(Bytecodes::Code invoke_code,
                                                       methodHandle method,
                                                       int vtable_index) {
  switch (invoke_code) {
      ......
    case Bytecodes::_invokestatic:
      set_method_flags(as_TosState(method->result_type()),
                       ((is_vfinal()               ? 1 : 0) << is_vfinal_shift) |
                       ((method->is_final_method() ? 1 : 0) << is_final_shift),
                       method()->size_of_parameters());
      set_f1(method());
      byte_no = 1;
      break;
    default:
      ShouldNotReachHere();
      break;
  }

  if (byte_no == 1) {
    set_bytecode_1(invoke_code);
  } 
  ......
}

void set_f1(Metadata* f1)                            {
    Metadata* existing_f1 = (Metadata*)_f1; // read once
    assert(existing_f1 == NULL || existing_f1 == f1, "illegal field change");
    _f1 = f1;
  }

void ConstantPoolCacheEntry::set_bytecode_1(Bytecodes::Code code) {
  OrderAccess::release_store_ptr(&_indices, _indices | ((u_char)code << bytecode_1_shift));
}

再去看一下ConstantPoolCacheEntry注释,即明白set_f1完成了静态类型方法Method*设置,set_bytecode_1b1位置保存了字节码定义值。此后即不需要再次解析,从ConstantPoolCache返回即可。这也就是方法解析及调用机制的内涵。

2.关键源码与汇编比照

下图是源码汇编实现与最终编译结果关联比照:

image

3.测试代码

1
2
3
4
5
6
7
8
9
10
public class InterpretStatic {
    public static void main(String[] args) {
        InterpretStatic.int_static_test_method();
    }

    public static int int_static_test_method() {
        int i = 1;
        return i;
    }
}

关键信息:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
  public static void main(java.lang.String[]);
    descriptor: ([Ljava/lang/String;)V
    flags: ACC_PUBLIC, ACC_STATIC
    Code:
      stack=1, locals=1, args_size=1
         0: invokestatic  #2                  // Method int_static_test_method:()I
         3: pop
         4: return
  
  Constant pool:
   #1 = Methodref          #4.#15         //  java/lang/Object."<init>":()V
   #2 = Methodref          #3.#16         //  InterpretStatic.int_static_test_method:()I
   #3 = Class              #17            //  InterpretStatic
   #4 = Class              #18            //  java/lang/Object
   #5 = Utf8               <init>
   #6 = Utf8               ()V
   #7 = Utf8               Code
   #8 = Utf8               LineNumberTable
   #9 = Utf8               main
  #10 = Utf8               ([Ljava/lang/String;)V
  #11 = Utf8               int_static_test_method
  #12 = Utf8               ()I
  #13 = Utf8               SourceFile
  #14 = Utf8               InterpretStatic.java
  #15 = NameAndType        #5:#6          //  "<init>":()V
  #16 = NameAndType        #11:#12        //  int_static_test_method:()I
  #17 = Utf8               InterpretStatic
  #18 = Utf8               java/lang/Object

4.debug过程

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
gdb /var/shared/openjdk/build/linux-x86_64-normal-server-slowdebug/jdk/bin/java

// 第一轮:保留入口点内存地址0x00007fffe1043690
run -XX:+PrintInterpreter InterpretStatic | grep -E -A 20 'invokestatic|zerolocals'

// jdk/src/share/bin/java.c
// 首先打且仅打该断点
b java.c:472 // breakpoint 1

run InterpretStatic

// 查验信息
p 'TemplateInterpreter::_active_table'.table_for(vtos)[184]
p 'AbstractInterpreter::_entry_table'[0]  // zerolocals

// 再次断点
b *0x00007fffe1043690 // invokestatic vtos入口
// 查验信息
p (unsigned char)*($r13)
p (unsigned char)*($r13+1)
p (unsigned char)*($r13+2)
p (unsigned char)*($r13+3)
p (unsigned char)*($r13+4)

// 再次断点,即将进入解释器入口zerolocals
b *0x00007fffe1043950 // jmpq   *0x58(%rbx)
p ((Method*)($rbx))->name() // int_static_test_method

图示1:

image

图示2:

image

5.栈帧

1.invokestatic的汇编实现中出现实现过程中,有两处涉及帧偏移量的引用:

1
2
3
4
5
// 代码具体位置可自行搜索
movptr(Address(rbp, frame::interpreter_frame_bcx_offset * wordSize), r13);
// 在invokestatic实现中,该行代码意味着即将调用一个静态方法时,需要从当前方法帧获取常量池指针,以进一步拿到静态方法信息
// rbp是当前栈帧基址
movptr(cache, Address(rbp, frame::interpreter_frame_cache_offset * wordSize));

那么上面的frame::interpreter_frame_***_offset是什么呢?hotspot/src/cpu/x86/vm/frame_x86.hpp

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
enum {
    pc_return_offset                                 =  0,
    // All frames
    link_offset                                      =  0,
    return_addr_offset                               =  1,
    // non-interpreter frames
    sender_sp_offset                                 =  2,
    
    // Interpreter frames
    interpreter_frame_result_handler_offset          =  3, // for native calls only
    interpreter_frame_oop_temp_offset                =  2, // for native calls only

    interpreter_frame_sender_sp_offset               = -1,
    // outgoing sp before a call to an invoked method
    interpreter_frame_last_sp_offset                 = interpreter_frame_sender_sp_offset - 1,
    interpreter_frame_method_offset                  = interpreter_frame_last_sp_offset - 1,
    interpreter_frame_mdx_offset                     = interpreter_frame_method_offset - 1,
    interpreter_frame_cache_offset                   = interpreter_frame_mdx_offset - 1,
    interpreter_frame_locals_offset                  = interpreter_frame_cache_offset - 1,
    interpreter_frame_bcx_offset                     = interpreter_frame_locals_offset - 1,
    interpreter_frame_initial_sp_offset              = interpreter_frame_bcx_offset - 1,
    interpreter_frame_monitor_block_top_offset       = interpreter_frame_initial_sp_offset,
    interpreter_frame_monitor_block_bottom_offset    = interpreter_frame_initial_sp_offset,
		......
  };

调用参数由当前方法传递,而被调用java静态方法栈帧由zerolocals类型方法解释器入口generate_normal_entry准备,更准确的说其中固定部分由generate_fixed_frame准备。我们可以分析上面frame::_offset定义与generate_fixed_frame对应关系,如下图:

image

根据上面的分析,我们可以看出,不论是从C++进入java main方法(上一篇博文),还是invokestatic之类的字节码形式方法调用,最终殊途同归,都进入到相应类型入口generate_normal_entry,帧也必然相同。

6.栈帧的简单验证

简单查验传参,方法内局部变量在栈内情况

6.1 验证程序

1
2
3
4
5
6
7
8
9
10
11
12
public class InterpretStatic {
    public static void main(String[] args) {
        int i = 1;
        int j = 2;
        InterpretStatic.int_static_test_method(i, j);
    }

    public static int int_static_test_method(int i, int j) {
        int a = i+j;
        return a;
    }
}

编译结果:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
public static void main(java.lang.String[]);
    descriptor: ([Ljava/lang/String;)V
    flags: ACC_PUBLIC, ACC_STATIC
    Code:
      stack=2, locals=3, args_size=1
         0: iconst_1
         1: istore_1
         2: iconst_2
         3: istore_2
         4: iload_1
         5: iload_2
         6: invokestatic  #2                  // Method int_static_test_method:(II)I
         9: pop
        10: return

  public static int int_static_test_method(int, int);
    descriptor: (II)I
    flags: ACC_PUBLIC, ACC_STATIC
    Code:
      stack=2, locals=3, args_size=2
         0: iload_0
         1: iload_1
         2: iadd
         3: istore_2
         4: iload_2
         5: ireturn

6.2 栈帧分析

image

6.3 debug过程

思路:查验栈帧中入参和局部变量在执行过程的变化

  • 在main方法即将执行int_static_test_method时,方法入参情况
  • 进入int_static_test_method后,执行完istore_2,局部变量a完成赋值,查验该处内存
  • 断点打在zerolocalsiload_2入口处即可
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
gdb /var/shared/openjdk/build/linux-x86_64-normal-server-slowdebug/jdk/bin/java

// 第一轮:保留入口点内存地址0x00007fffe1043690
run -XX:+PrintInterpreter InterpretStatic | grep -E -A 2000 'invokestatic|zerolocals' > static.log
run -XX:+PrintInterpreter InterpretStatic | grep -E -A 20 'invokestatic|zerolocals'

// jdk/src/share/bin/java.c
// 首先打且仅打该断点
b java.c:472 // breakpoint 1

run InterpretStatic

// 查验信息
p 'TemplateInterpreter::_active_table'.table_for(vtos)[184]
p 'AbstractInterpreter::_entry_table'[0]  // zerolocals 0x00007fffe101e2e0

// 再次断点
b *0x00007fffe1043690 // invokestatic vtos入口
// 查验信息
p (unsigned char)*($r13) // invokestatic
p (unsigned char)*($r13+1)
p (unsigned char)*($r13+2)
p (unsigned char)*($r13+3) // pop
p (unsigned char)*($r13+4) // return

// 再次断点,即将进入解释器入口zerolocals
b *0x00007fffe101e2e0 // zerolocals
p ((Method*)($rbx))->name() // int_static_test_method
p $37->as_C_string()
$38 = 0x7ffff00097e8 "int_static_test_method"
// 单步执行到0x7fffe101e443处:test   %edx,%edx 即将分配方法内局部变量
// 此时验证入参
p /x $rsp
$39 = 0x7ffff7fe9728
p *0x7ffff7fe9728
$40 = 2
p *(0x7ffff7fe9728+8)
$41 = 1
p *(0x7ffff7fe9728-8)
$42 = 0

p 'TemplateInterpreter::_active_table'.table_for(vtos)[28] // iload_2 0x7fffe102d8f0
b *0x7fffe102d8f0 // iload_2 vtos入口

p *(0x7ffff7fe9720) // int_static_test_method局部变量a地址
$44 = 3

image

image

image

6.4 方法退出时栈帧变化

6.4.1 代码分析

当方法int_static_test_method通过invokestatic在main函数内执行完毕时,栈是什么样子的呢?我们可以分析进入解释执行int_static_test_method时以及方法最后一个字节码ireturn对栈帧做了哪些操作。其中有两处关键:

  • generate_fixed_frame

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    
    void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) {
      // initialize fixed part of activation frame
      __ push(rax);        // save return address
      __ enter();          // save old & set new rbp
      __ push(r13);        // set sender sp
      __ push((int)NULL_WORD); // leave last_sp as null
      __ movptr(r13, Address(rbx, Method::const_offset()));      // get ConstMethod*
      __ lea(r13, Address(r13, ConstMethod::codes_offset())); // get codebase
      __ push(rbx);        // save Method*
      ......
    
  • ireturn字节码实现TemplateTable::_return

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    
    __ remove_activation(state, r13);
    __ jmp(r13);
      
    // ----------------------------------------------
    // __ remove_activation
    // remove activation
    // get sender sp
    movptr(rbx, Address(rbp, frame::interpreter_frame_sender_sp_offset * wordSize));
    leave();                           // remove frame anchor
    pop(ret_addr);                     // get return address
    mov(rsp, rbx);                     // set sp to sender sp
      
    // 上述代码编译后
      0x00007fffe103dda4: mov    (%rsp),%eax
      0x00007fffe103dda7: add    $0x8,%rsp
      0x00007fffe103ddab: mov    -0x8(%rbp),%rbx
      0x00007fffe103ddaf: leaveq 
      0x00007fffe103ddb0: pop    %r13
      0x00007fffe103ddb2: mov    %rbx,%rsp
      0x00007fffe103ddb5: jmpq   *%r13
    

上面__ push(r13);mov(rsp, rbx);是对应的,一个保存,一个恢复,图示:

image

入参由调用方准备,因此通过ireturn返回后,回到栈帧最后一个入参顶部是符合直觉逻辑的,中间部分全部销毁。

6.4.2 debug验证

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
gdb /var/shared/openjdk/build/linux-x86_64-normal-server-slowdebug/jdk/bin/java

// 第一轮:保留入口点内存地址0x00007fffe1043690
// 生成并保留汇编代码,以便于debug及断点
run -XX:+PrintInterpreter InterpretStatic | grep -E -A 3000 'invokestatic|zerolocals|ireturn' > frame.log

// jdk/src/share/bin/java.c
// 首先打且仅打该断点
b java.c:472 // breakpoint 1

run InterpretStatic

// 查验信息
p 'TemplateInterpreter::_active_table'.table_for(itos)[172] // ireturn 0x7fffe103d487

// 再次断点
b *0x00007fffe1043690 // invokestatic vtos入口

// 再次断点,即将进入解释器入口zerolocals
b *0x00007fffe101e2e0 // zerolocals

// 单步执行到0x7fffe101e443处:test   %edx,%edx 即将分配方法内局部变量
// 此时验证保存的send sp
p /x $r13 
$25 = 0x7ffff7fe9728
p *0x7ffff7fe9728
$26 = 2
p *(0x7ffff7fe9728+8)
$27 = 1

// ireturn返回时恢复栈帧
b *0x00007fffe103ddb5
(gdb) p /x $r13
$28 = 0x7fffe10070f0
(gdb) p /x $rsp
$29 = 0x7ffff7fe9728
(gdb) p /x *0x7ffff7fe9728
$31 = 0x2

方法栈帧准备:

image

ireturn退出恢复:

image

所以,所谓sender sp,可以理解为来者(调用方) stack pointer,即为入参栈顶