synchronized方法和synchronized块的区别

Difference between synchronized method and synchronized block

我试图通过示例来理解同步块和同步方法之间的区别。考虑以下简单的 class:

public class Main {
    private static final Object lock = new Object();
    private static long l;
    public static void main(String[] args) { 

    }

    public static void action(){
        synchronized(lock){
            l = (l + 1) * 2;
            System.out.println(l);
        }
    }
}

编译后的 Main::action() 将如下所示:

public static void action();
  Code:
     0: getstatic     #2                  // Field lock:Ljava/lang/Object;
     3: dup
     4: astore_0
     5: monitorenter                      // <---- ENTERING
     6: getstatic     #3                  // Field l:J
     9: lconst_1
     10: ladd
     11: ldc2_w        #4                  // long 2l
     14: lmul
     15: putstatic     #3                  // Field l:J
     18: getstatic     #6                  // Field java/lang/System.out:Ljava/io/PrintStream;
     21: getstatic     #3                  // Field l:J
     24: invokevirtual #7                  // Method java/io/PrintStream.println:(J)V
     27: aload_0
     28: monitorexit                       // <---- EXITING
     29: goto          37
     32: astore_1
     33: aload_0
     34: monitorexit                       // <---- EXITING TWICE????
     35: aload_1
     36: athrow
     37: return

我认为我们最好使用同步块而不是同步方法,因为它提供了更多的封装,防止客户端影响同步策略(使用同步方法任何客户端都可以获得影响同步策略的锁)。但从性能的角度来看,在我看来几乎是一样的。现在考虑同步方法版本:

public static synchronized void action(){
    l = (l + 1) * 2;
    System.out.println(l);
}

public static synchronized void action();
Code:
   0: getstatic     #2                  // Field l:J
   3: lconst_1
   4: ladd
   5: ldc2_w        #3                  // long 2l
   8: lmul
   9: putstatic     #2                  // Field l:J
  12: getstatic     #5                  // Field java/lang/System.out:Ljava/io/PrintStream;
  15: getstatic     #2                  // Field l:J
  18: invokevirtual #6                  // Method java/io/PrintStream.println:(J)V
  21: return

因此,在同步方法版本中,要执行的指令要少得多,所以我会说它更快。

问题:同步方法比同步块快吗?

考虑到您的 synchronized 块有一个 goto 在它之后否定 6 条左右的指令,指令的数量实际上并没有什么不同。

它实际上归结为如何最好地跨多个访问线程公开一个对象。

相反,实际上同步方法应该比同步块慢得多,因为同步方法会使更多代码顺序化。

但是,如果两者包含相同数量的代码,那么下面的测试所支持的性能应该不会有太大差异。

支持类

public interface TestMethod {
    public void test(double[] array);
    public String getName();
}

public class TestSynchronizedBlock implements TestMethod{
    private static final Object lock = new Object();

    public synchronized void test(double[] arr) {
        synchronized (lock) {
            double sum = 0;
            for(double d : arr) {
                for(double d1 : arr) {
                    sum += d*d1;
                }
            }
            //System.out.print(sum + " ");
        }

    }

    @Override
    public String getName() {
        return getClass().getName();
    }
}

public class TestSynchronizedMethod implements TestMethod {
    public synchronized void test(double[] arr) {
        double sum = 0;
        for(double d : arr) {
            for(double d1 : arr) {
                sum += d*d1;
            }
        }
        //System.out.print(sum + " ");
    }

    @Override
    public String getName() {
        return getClass().getName();
    }
}

主要Class

import java.util.Random;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicLong;

public class TestSynchronizedMain {
    public static void main(String[] args) {
        TestSynchronizedMain main = new TestSynchronizedMain();
        TestMethod testMethod = null;

        Random rand = new Random();
        double[] arr = new double[10000];
        for(int j = 0; j < arr.length; j++) {
            arr[j] = rand.nextDouble() * 10000;
        }

        /*testMethod = new TestSynchronizedBlock();
        main.testSynchronized(testMethod, arr);*/

        testMethod = new TestSynchronizedMethod();
        main.testSynchronized(testMethod, arr);

    }

    public void testSynchronized(final TestMethod testMethod, double[] arr) {
        System.out.println("Testing " + testMethod.getName());

        ExecutorService executor = Executors.newCachedThreadPool();
        AtomicLong time = new AtomicLong();
        AtomicLong startCounter = new AtomicLong();
        AtomicLong endCounter = new AtomicLong();

        for (int i = 0; i < 100; i++) {
            executor.submit(new Runnable() {
                @Override
                public void run() {
                    // System.out.println("Started");
                    startCounter.incrementAndGet();
                    long startTime = System.currentTimeMillis();

                    testMethod.test(arr);

                    long endTime = System.currentTimeMillis();
                    long delta = endTime - startTime;
                    //System.out.print(delta + " ");
                    time.addAndGet(delta);
                    endCounter.incrementAndGet();
                }
            });
        }

        executor.shutdown();
        try {
            executor.awaitTermination(Long.MAX_VALUE, TimeUnit.SECONDS);
            System.out.println("time taken = " + (time.get() / 1000.0) + " : starts = " + startCounter.get() + " : ends = " + endCounter);
        } catch (InterruptedException e) {
            e.printStackTrace();
        }
    }
}

多次运行的主要输出

1.  Testing TestSynchronizedBlock
    time taken = 537.974 : starts = 100 : ends = 100

    Testing TestSynchronizedMethod
    time taken = 537.052 : starts = 100 : ends = 100

2.  Testing TestSynchronizedBlock
    time taken = 535.983 : starts = 100 : ends = 100

    Testing TestSynchronizedMethod
    time taken = 537.534 : starts = 100 : ends = 100

3.  Testing TestSynchronizedBlock
    time taken = 553.964 : starts = 100 : ends = 100

    Testing TestSynchronizedMethod
    time taken = 552.352 : starts = 100 : ends = 100

注意: 测试是在 windows 8、64 位、i7 机器上完成的。 实际时间并不重要,但相对值很重要。

使用此答案底部发布的 Java 代码进行的快速测试导致 synchronized method 更快。 运行 i7 上 Windows JVM 上的代码产生以下平均值

synchronized block: 0.004254 s

synchronized method: 0.001056 s

这意味着 synchronized method 实际上根据您的 byte-code 评估更快。

然而,让我感到困惑的是两次的明显差异。我会假设 JVM 仍会锁定底层同步方法,并且时间差异可以忽略不计,但这不是最终结果。由于 Oracle JVM 已关闭,我查看了 OpenJDK hotspot JVM source 并深入研究了处理同步的字节码解释器 methods/blocks。重申一下,以下 JVM 代码是针对 OpenJDK 的,但我认为官方 JVM 在处理这种情况的方式上具有类似的性质。

构建 .class 文件时,如果方法是同步的,则会放入字节代码,提醒 JVM 该方法已同步(类似于方法是 [=17 时添加的字节代码) =], 等), 并且底层 JVM 代码在方法结构上设置了一个标志来达到这个效果。

当 byte-code 解释器点击 byte-code 进行方法调用时,在调用方法之前调用以下代码以检查是否需要锁定:

case method_entry: {
  /* CODE_EDIT: irrelevant code removed for brevities sake */

  // lock method if synchronized
  if (METHOD->is_synchronized()) {
      // oop rcvr = locals[0].j.r;
      oop rcvr;
      if (METHOD->is_static()) {
        rcvr = METHOD->constants()->pool_holder()->java_mirror();
      } else {
        rcvr = LOCALS_OBJECT(0);
        VERIFY_OOP(rcvr);
      }
      // The initial monitor is ours for the taking
      BasicObjectLock* mon = &istate->monitor_base()[-1];
      oop monobj = mon->obj();
      assert(mon->obj() == rcvr, "method monitor mis-initialized");

      bool success = UseBiasedLocking;
      if (UseBiasedLocking) {
        /* CODE_EDIT: this code is only run if you have biased locking enabled as a JVM option */
      }
      if (!success) {
        markOop displaced = rcvr->mark()->set_unlocked();
        mon->lock()->set_displaced_header(displaced);
        if (Atomic::cmpxchg_ptr(mon, rcvr->mark_addr(), displaced) != displaced) {
          // Is it simple recursive case?
          if (THREAD->is_lock_owned((address) displaced->clear_lock_bits())) {
            mon->lock()->set_displaced_header(NULL);
          } else {
            CALL_VM(InterpreterRuntime::monitorenter(THREAD, mon), handle_exception);
          }
        }
      }
  }
  
  /* CODE_EDIT: irrelevant code removed for brevities sake */

  goto run;
}

然后,当方法完成并returns到JVM函数处理程序时,将调用以下代码来解锁方法(注意布尔值method_unlock_needed是在调用方法之前设置的到 bool method_unlock_needed = METHOD->is_synchronized()):

if (method_unlock_needed) {
    if (base->obj() == NULL) {
      /* CODE_EDIT: irrelevant code removed for brevities sake */
    } else {
      oop rcvr = base->obj();
      if (rcvr == NULL) {
        if (!suppress_error) {
          VM_JAVA_ERROR_NO_JUMP(vmSymbols::java_lang_NullPointerException(), "");
          illegal_state_oop = THREAD->pending_exception();
          THREAD->clear_pending_exception();
        }
      } else {
        BasicLock* lock = base->lock();
        markOop header = lock->displaced_header();
        base->set_obj(NULL);
        // If it isn't recursive we either must swap old header or call the runtime
        if (header != NULL) {
          if (Atomic::cmpxchg_ptr(header, rcvr->mark_addr(), lock) != lock) {
            // restore object for the slow case
            base->set_obj(rcvr);
            {
              // Prevent any HandleMarkCleaner from freeing our live handles
              HandleMark __hm(THREAD);
              CALL_VM_NOCHECK(InterpreterRuntime::monitorexit(THREAD, base));
            }
            if (THREAD->has_pending_exception()) {
              if (!suppress_error) illegal_state_oop = THREAD->pending_exception();
              THREAD->clear_pending_exception();
            }
          }
        }
      }
    }
}

语句 CALL_VM(InterpreterRuntime::monitorenter(THREAD, mon), handle_exception);CALL_VM_NOCHECK(InterpreterRuntime::monitorexit(THREAD, base));,更具体地说,函数 InterpreterRuntime::monitorenterInterpreterRuntime::monitorexit 是在 JVM 中为同步方法和块调用的代码到 lock/unlock 底层对象。代码中的 run 标签是大量的 byte-code 解释器 switch 语句,用于处理正在解析的不同字节码。

从这里开始,如果遇到同步块操作码(monitorentermonitorexitbyte-codes),下面的case语句是运行 (分别对应 monitorentermonitorexit):

CASE(_monitorenter): {
    oop lockee = STACK_OBJECT(-1);
    // derefing's lockee ought to provoke implicit null check
    CHECK_NULL(lockee);
    // find a free monitor or one already allocated for this object
    // if we find a matching object then we need a new monitor
    // since this is recursive enter
    BasicObjectLock* limit = istate->monitor_base();
    BasicObjectLock* most_recent = (BasicObjectLock*) istate->stack_base();
    BasicObjectLock* entry = NULL;
    while (most_recent != limit ) {
      if (most_recent->obj() == NULL) entry = most_recent;
      else if (most_recent->obj() == lockee) break;
      most_recent++;
    }
    if (entry != NULL) {
      entry->set_obj(lockee);
      markOop displaced = lockee->mark()->set_unlocked();
      entry->lock()->set_displaced_header(displaced);
      if (Atomic::cmpxchg_ptr(entry, lockee->mark_addr(), displaced) != displaced) {
        // Is it simple recursive case?
        if (THREAD->is_lock_owned((address) displaced->clear_lock_bits())) {
          entry->lock()->set_displaced_header(NULL);
        } else {
          CALL_VM(InterpreterRuntime::monitorenter(THREAD, entry), handle_exception);
        }
      }
      UPDATE_PC_AND_TOS_AND_CONTINUE(1, -1);
    } else {
      istate->set_msg(more_monitors);
      UPDATE_PC_AND_RETURN(0); // Re-execute
    }
}

CASE(_monitorexit): {
    oop lockee = STACK_OBJECT(-1);
    CHECK_NULL(lockee);
    // derefing's lockee ought to provoke implicit null check
    // find our monitor slot
    BasicObjectLock* limit = istate->monitor_base();
    BasicObjectLock* most_recent = (BasicObjectLock*) istate->stack_base();
    while (most_recent != limit ) {
      if ((most_recent)->obj() == lockee) {
        BasicLock* lock = most_recent->lock();
        markOop header = lock->displaced_header();
        most_recent->set_obj(NULL);
        // If it isn't recursive we either must swap old header or call the runtime
        if (header != NULL) {
          if (Atomic::cmpxchg_ptr(header, lockee->mark_addr(), lock) != lock) {
            // restore object for the slow case
            most_recent->set_obj(lockee);
            CALL_VM(InterpreterRuntime::monitorexit(THREAD, most_recent), handle_exception);
          }
        }
        UPDATE_PC_AND_TOS_AND_CONTINUE(1, -1);
      }
      most_recent++;
    }
    // Need to throw illegal monitor state exception
    CALL_VM(InterpreterRuntime::throw_illegal_monitor_state_exception(THREAD), handle_exception);
    ShouldNotReachHere();
}

同样,调用了相同的 InterpreterRuntime::monitorenterInterpreterRuntime::monitorexit 函数来锁定底层对象,但是在这个过程中有更多的开销,这解释了为什么与同步的时间不同方法和同步块。

显然synchronized方法和synchronized块在使用时都有pros/cons需要考虑的问题,但问题是问哪个更快,根据初步测试和OpenJDK的来源,它会似乎同步方法(单独)确实比同步块(单独)更快。尽管您的结果可能会有所不同(尤其是代码越复杂),因此如果性能是一个问题,最好进行您自己的测试并从那里衡量什么对您的情况有意义。

这里是相关的 Java 测试代码:

Java 测试码

public class Main
{
    public static final Object lock = new Object();
    private static long l = 0;

    public static void SyncLock()
    {
        synchronized (lock) {
            ++l;
        }
    }

    public static synchronized void SyncFunction()
    {
        ++l;
    }

    public static class ThreadSyncLock implements Runnable
    {
        @Override
        public void run()
        {
            for (int i = 0; i < 10000; ++i) {
                SyncLock();
            }
        }
    }

    public static class ThreadSyncFn implements Runnable
    {
        @Override
        public void run()
        {
            for (int i = 0; i < 10000; ++i) {
                SyncFunction();
            }
        }
    }

    public static void main(String[] args)
    {
        l = 0;
        try {
            java.util.ArrayList<Thread> threads = new java.util.ArrayList<Thread>();
            long start, end;
            double avg1 = 0, avg2 = 0;
            for (int x = 0; x < 1000; ++x) {
                threads.clear();
                for (int i = 0; i < 8; ++i) { threads.add(new Thread(new ThreadSyncLock())); }
                start = System.currentTimeMillis();
                for (int i = 0; i < 8; ++i) { threads.get(i).start(); }
                for (int i = 0; i < 8; ++i) { threads.get(i).join(); }
                end = System.currentTimeMillis();
                avg1 += ((end - start) / 1000f);
                l = 0;
                threads.clear();
                for (int i = 0; i < 8; ++i) { threads.add(new Thread(new ThreadSyncFn())); }
                start = System.currentTimeMillis();
                for (int i = 0; i < 8; ++i) { threads.get(i).start(); }
                for (int i = 0; i < 8; ++i) { threads.get(i).join(); }
                end = System.currentTimeMillis();
                avg2 += ((end - start) / 1000f);
                l = 0;
            }
            System.out.format("avg1: %f s\navg2: %f s\n", (avg1/1000), (avg2/1000));
            l = 0;
        } catch (Throwable t) {
            System.out.println(t.toString());
        }
    }
}

希望这可以帮助增加一些清晰度。