kvm: Flush coalesced MMIO buffer periodly

The default action of coalesced MMIO is, cache the writing in buffer, until:
1. The buffer is full.
2. Or the exit to QEmu due to other reasons.

But this would result in a very late writing in some condition.
1. The each time write to MMIO content is small.
2. The writing interval is big.
3. No need for input or accessing other devices frequently.

This issue was observed in a experimental embbed system. The test image
simply print "test" every 1 seconds. The output in QEmu meets expectation,
but the output in KVM is delayed for seconds.

Per Avi's suggestion, I hooked flushing coalesced MMIO buffer in VGA update
handler. By this way, We don't need vcpu explicit exit to QEmu to
handle this issue.

Signed-off-by: Sheng Yang <[email protected]>
Signed-off-by: Marcelo Tosatti <[email protected]>
diff --git a/cpu-all.h b/cpu-all.h
index 57b69f8..1ccc9a8 100644
--- a/cpu-all.h
+++ b/cpu-all.h
@@ -915,6 +915,8 @@
 
 void qemu_unregister_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size);
 
+void qemu_flush_coalesced_mmio_buffer(void);
+
 /*******************************************/
 /* host CPU ticks (if available) */
 
diff --git a/exec.c b/exec.c
index 76831a1..67fabae 100644
--- a/exec.c
+++ b/exec.c
@@ -2406,6 +2406,12 @@
         kvm_uncoalesce_mmio_region(addr, size);
 }
 
+void qemu_flush_coalesced_mmio_buffer(void)
+{
+    if (kvm_enabled())
+        kvm_flush_coalesced_mmio_buffer();
+}
+
 ram_addr_t qemu_ram_alloc(ram_addr_t size)
 {
     RAMBlock *new_block;
diff --git a/kvm-all.c b/kvm-all.c
index 15ec38e..f8350c9 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -59,6 +59,9 @@
     int vmfd;
     int regs_modified;
     int coalesced_mmio;
+#ifdef KVM_CAP_COALESCED_MMIO
+    struct kvm_coalesced_mmio_ring *coalesced_mmio_ring;
+#endif
     int broken_set_mem_region;
     int migration_log;
     int vcpu_events;
@@ -200,6 +203,12 @@
         goto err;
     }
 
+#ifdef KVM_CAP_COALESCED_MMIO
+    if (s->coalesced_mmio && !s->coalesced_mmio_ring)
+        s->coalesced_mmio_ring = (void *) env->kvm_run +
+		s->coalesced_mmio * PAGE_SIZE;
+#endif
+
     ret = kvm_arch_init_vcpu(env);
     if (ret == 0) {
         qemu_register_reset(kvm_reset_vcpu, env);
@@ -466,10 +475,10 @@
         goto err;
     }
 
+    s->coalesced_mmio = 0;
 #ifdef KVM_CAP_COALESCED_MMIO
     s->coalesced_mmio = kvm_check_extension(s, KVM_CAP_COALESCED_MMIO);
-#else
-    s->coalesced_mmio = 0;
+    s->coalesced_mmio_ring = NULL;
 #endif
 
     s->broken_set_mem_region = 1;
@@ -544,14 +553,12 @@
     return 1;
 }
 
-static void kvm_run_coalesced_mmio(CPUState *env, struct kvm_run *run)
+void kvm_flush_coalesced_mmio_buffer(void)
 {
 #ifdef KVM_CAP_COALESCED_MMIO
     KVMState *s = kvm_state;
-    if (s->coalesced_mmio) {
-        struct kvm_coalesced_mmio_ring *ring;
-
-        ring = (void *)run + (s->coalesced_mmio * TARGET_PAGE_SIZE);
+    if (s->coalesced_mmio_ring) {
+        struct kvm_coalesced_mmio_ring *ring = s->coalesced_mmio_ring;
         while (ring->first != ring->last) {
             struct kvm_coalesced_mmio *ent;
 
@@ -609,7 +616,7 @@
             abort();
         }
 
-        kvm_run_coalesced_mmio(env, run);
+        kvm_flush_coalesced_mmio_buffer();
 
         ret = 0; /* exit loop */
         switch (run->exit_reason) {
diff --git a/kvm.h b/kvm.h
index 1c93ac5..59cba18 100644
--- a/kvm.h
+++ b/kvm.h
@@ -53,6 +53,7 @@
 
 int kvm_coalesce_mmio_region(target_phys_addr_t start, ram_addr_t size);
 int kvm_uncoalesce_mmio_region(target_phys_addr_t start, ram_addr_t size);
+void kvm_flush_coalesced_mmio_buffer(void);
 
 int kvm_insert_breakpoint(CPUState *current_env, target_ulong addr,
                           target_ulong len, int type);
diff --git a/vl.c b/vl.c
index 39833fc..50f133d 100644
--- a/vl.c
+++ b/vl.c
@@ -2996,6 +2996,7 @@
     DisplayState *ds = opaque;
     DisplayChangeListener *dcl = ds->listeners;
 
+    qemu_flush_coalesced_mmio_buffer();
     dpy_refresh(ds);
 
     while (dcl != NULL) {
@@ -3011,6 +3012,7 @@
 {
     uint64_t interval = GUI_REFRESH_INTERVAL;
 
+    qemu_flush_coalesced_mmio_buffer();
     qemu_mod_timer(nographic_timer, interval + qemu_get_clock(rt_clock));
 }