[PATCH 0/5] mmc: add double buffering for mmc block requests
From: Russell King - ARM Linux <hidden>
Date: 2011-02-05 17:03:22
Also in:
linux-mmc, lkml
Subsystem:
arm port, arm/faraday fa526 port, the rest · Maintainers:
Russell King, Hans Ulli Kroll, Linus Torvalds
On Wed, Jan 12, 2011 at 07:13:58PM +0100, Per Forlin wrote:
Add support to prepare one MMC request while another is active on the host. This is done by making the issue_rw_rq() asynchronous. The increase in throughput is proportional to the time it takes to prepare a request and how fast the memory is. The faster the MMC/SD is the more significant the prepare request time becomes. Measurements on U5500 and U8500 on eMMC shows significant performance gain for DMA on MMC for large reads. In the PIO case there is some gain in performance for large reads too. There seems to be no or small performance gain for write, don't have a good explanation for this yet.
It might be worth seeing what effect the following patch has. This moves the dsb out of the cache operations into a separate function, so we only do one dsb per DMA mapping/unmapping operation. That's particularly significant for the scattergather code. I don't remember the reason why this was dropped as a candidate for merging - could that be because the dsb needs to be before the outer cache maintainence? Adding Catalin for comment on that. arch/arm/include/asm/cacheflush.h | 4 ++++ arch/arm/include/asm/dma-mapping.h | 8 ++++++++ arch/arm/mm/cache-fa.S | 13 +++++++------ arch/arm/mm/cache-v3.S | 3 +++ arch/arm/mm/cache-v4.S | 3 +++ arch/arm/mm/cache-v4wb.S | 9 +++++++-- arch/arm/mm/cache-v4wt.S | 3 +++ arch/arm/mm/cache-v6.S | 13 +++++++------ arch/arm/mm/cache-v7.S | 9 ++++++--- arch/arm/mm/dma-mapping.c | 12 ++++++++++++ arch/arm/mm/proc-arm1020e.S | 10 +++++++--- arch/arm/mm/proc-arm1022.S | 10 +++++++--- arch/arm/mm/proc-arm1026.S | 10 +++++++--- arch/arm/mm/proc-arm920.S | 10 +++++++--- arch/arm/mm/proc-arm922.S | 10 +++++++--- arch/arm/mm/proc-arm925.S | 10 +++++++--- arch/arm/mm/proc-arm926.S | 10 +++++++--- arch/arm/mm/proc-arm940.S | 10 +++++++--- arch/arm/mm/proc-arm946.S | 10 +++++++--- arch/arm/mm/proc-feroceon.S | 13 ++++++++----- arch/arm/mm/proc-mohawk.S | 10 +++++++--- arch/arm/mm/proc-xsc3.S | 10 +++++++--- arch/arm/mm/proc-xscale.S | 10 +++++++--- 23 files changed, 152 insertions(+), 58 deletions(-)
diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h
index e290885..5928e78 100644
--- a/arch/arm/include/asm/cacheflush.h
+++ b/arch/arm/include/asm/cacheflush.h@@ -223,6 +223,7 @@ struct cpu_cache_fns { void (*dma_map_area)(const void *, size_t, int); void (*dma_unmap_area)(const void *, size_t, int); + void (*dma_barrier)(void); void (*dma_flush_range)(const void *, const void *); };
@@ -250,6 +251,7 @@ extern struct cpu_cache_fns cpu_cache; */ #define dmac_map_area cpu_cache.dma_map_area #define dmac_unmap_area cpu_cache.dma_unmap_area +#define dmac_barrier cpu_cache.dma_barrier #define dmac_flush_range cpu_cache.dma_flush_range #else
@@ -278,10 +280,12 @@ extern void __cpuc_flush_dcache_area(void *, size_t); */ #define dmac_map_area __glue(_CACHE,_dma_map_area) #define dmac_unmap_area __glue(_CACHE,_dma_unmap_area) +#define dmac_barrier __glue(_CACHE,_dma_barrier) #define dmac_flush_range __glue(_CACHE,_dma_flush_range) extern void dmac_map_area(const void *, size_t, int); extern void dmac_unmap_area(const void *, size_t, int); +extern void dmac_barrier(void); extern void dmac_flush_range(const void *, const void *); #endif
diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h
index 256ee1c..1371db7 100644
--- a/arch/arm/include/asm/dma-mapping.h
+++ b/arch/arm/include/asm/dma-mapping.h@@ -115,6 +115,8 @@ static inline void __dma_page_dev_to_cpu(struct page *page, unsigned long off, ___dma_page_dev_to_cpu(page, off, size, dir); } +extern void __dma_barrier(enum dma_data_direction); + /* * Return whether the given device DMA address mask can be supported * properly. For example, if your device can only drive the low 24-bits
@@ -378,6 +380,7 @@ static inline dma_addr_t dma_map_single(struct device *dev, void *cpu_addr, BUG_ON(!valid_dma_direction(dir)); addr = __dma_map_single(dev, cpu_addr, size, dir); + __dma_barrier(dir); debug_dma_map_page(dev, virt_to_page(cpu_addr), (unsigned long)cpu_addr & ~PAGE_MASK, size, dir, addr, true);
@@ -407,6 +410,7 @@ static inline dma_addr_t dma_map_page(struct device *dev, struct page *page, BUG_ON(!valid_dma_direction(dir)); addr = __dma_map_page(dev, page, offset, size, dir); + __dma_barrier(dir); debug_dma_map_page(dev, page, offset, size, dir, addr, false); return addr;
@@ -431,6 +435,7 @@ static inline void dma_unmap_single(struct device *dev, dma_addr_t handle, { debug_dma_unmap_page(dev, handle, size, dir, true); __dma_unmap_single(dev, handle, size, dir); + __dma_barrier(dir); } /**
@@ -452,6 +457,7 @@ static inline void dma_unmap_page(struct device *dev, dma_addr_t handle, { debug_dma_unmap_page(dev, handle, size, dir, false); __dma_unmap_page(dev, handle, size, dir); + __dma_barrier(dir); } /**
@@ -484,6 +490,7 @@ static inline void dma_sync_single_range_for_cpu(struct device *dev, return; __dma_single_dev_to_cpu(dma_to_virt(dev, handle) + offset, size, dir); + __dma_barrier(dir); } static inline void dma_sync_single_range_for_device(struct device *dev,
@@ -498,6 +505,7 @@ static inline void dma_sync_single_range_for_device(struct device *dev, return; __dma_single_cpu_to_dev(dma_to_virt(dev, handle) + offset, size, dir); + __dma_barrier(dir); } static inline void dma_sync_single_for_cpu(struct device *dev,
diff --git a/arch/arm/mm/cache-fa.S b/arch/arm/mm/cache-fa.S
index 7148e53..cdcfae2 100644
--- a/arch/arm/mm/cache-fa.S
+++ b/arch/arm/mm/cache-fa.S@@ -179,8 +179,6 @@ fa_dma_inv_range: add r0, r0, #CACHE_DLINESIZE cmp r0, r1 blo 1b - mov r0, #0 - mcr p15, 0, r0, c7, c10, 4 @ drain write buffer mov pc, lr /*
@@ -197,8 +195,6 @@ fa_dma_clean_range: add r0, r0, #CACHE_DLINESIZE cmp r0, r1 blo 1b - mov r0, #0 - mcr p15, 0, r0, c7, c10, 4 @ drain write buffer mov pc, lr /*
@@ -212,8 +208,6 @@ ENTRY(fa_dma_flush_range) add r0, r0, #CACHE_DLINESIZE cmp r0, r1 blo 1b - mov r0, #0 - mcr p15, 0, r0, c7, c10, 4 @ drain write buffer mov pc, lr /*
@@ -240,6 +234,12 @@ ENTRY(fa_dma_unmap_area) mov pc, lr ENDPROC(fa_dma_unmap_area) +ENTRY(fa_dma_barrier) + mov r0, #0 + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer + mov pc, lr +ENDPROC(fa_dma_barrier) + __INITDATA .type fa_cache_fns, #object
@@ -253,5 +253,6 @@ ENTRY(fa_cache_fns) .long fa_flush_kern_dcache_area .long fa_dma_map_area .long fa_dma_unmap_area + .long fa_dma_barrier .long fa_dma_flush_range .size fa_cache_fns, . - fa_cache_fns
diff --git a/arch/arm/mm/cache-v3.S b/arch/arm/mm/cache-v3.S
index c2ff3c5..df34458 100644
--- a/arch/arm/mm/cache-v3.S
+++ b/arch/arm/mm/cache-v3.S@@ -123,9 +123,11 @@ ENTRY(v3_dma_unmap_area) * - dir - DMA direction */ ENTRY(v3_dma_map_area) +ENTRY(v3_dma_barrier) mov pc, lr ENDPROC(v3_dma_unmap_area) ENDPROC(v3_dma_map_area) +ENDPROC(v3_dma_barrier) __INITDATA
@@ -140,5 +142,6 @@ ENTRY(v3_cache_fns) .long v3_flush_kern_dcache_area .long v3_dma_map_area .long v3_dma_unmap_area + .long v3_dma_barrier .long v3_dma_flush_range .size v3_cache_fns, . - v3_cache_fns
diff --git a/arch/arm/mm/cache-v4.S b/arch/arm/mm/cache-v4.S
index 4810f7e..20260b1 100644
--- a/arch/arm/mm/cache-v4.S
+++ b/arch/arm/mm/cache-v4.S@@ -135,9 +135,11 @@ ENTRY(v4_dma_unmap_area) * - dir - DMA direction */ ENTRY(v4_dma_map_area) +ENTRY(v4_dma_barrier) mov pc, lr ENDPROC(v4_dma_unmap_area) ENDPROC(v4_dma_map_area) +ENDPROC(v4_dma_barrier) __INITDATA
@@ -152,5 +154,6 @@ ENTRY(v4_cache_fns) .long v4_flush_kern_dcache_area .long v4_dma_map_area .long v4_dma_unmap_area + .long v4_dma_barrier .long v4_dma_flush_range .size v4_cache_fns, . - v4_cache_fns
diff --git a/arch/arm/mm/cache-v4wb.S b/arch/arm/mm/cache-v4wb.S
index df8368a..9c9c875 100644
--- a/arch/arm/mm/cache-v4wb.S
+++ b/arch/arm/mm/cache-v4wb.S@@ -194,7 +194,6 @@ v4wb_dma_inv_range: add r0, r0, #CACHE_DLINESIZE cmp r0, r1 blo 1b - mcr p15, 0, r0, c7, c10, 4 @ drain write buffer mov pc, lr /*
@@ -211,7 +210,6 @@ v4wb_dma_clean_range: add r0, r0, #CACHE_DLINESIZE cmp r0, r1 blo 1b - mcr p15, 0, r0, c7, c10, 4 @ drain write buffer mov pc, lr /*
@@ -251,6 +249,12 @@ ENTRY(v4wb_dma_unmap_area) mov pc, lr ENDPROC(v4wb_dma_unmap_area) +ENTRY(v4wb_dma_barrier) + mov r0, #0 + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer + mov pc, lr +ENDPROC(v4wb_dma_barrier) + __INITDATA .type v4wb_cache_fns, #object
@@ -264,5 +268,6 @@ ENTRY(v4wb_cache_fns) .long v4wb_flush_kern_dcache_area .long v4wb_dma_map_area .long v4wb_dma_unmap_area + .long v4wb_dma_barrier .long v4wb_dma_flush_range .size v4wb_cache_fns, . - v4wb_cache_fns
diff --git a/arch/arm/mm/cache-v4wt.S b/arch/arm/mm/cache-v4wt.S
index 45c7031..223eea4 100644
--- a/arch/arm/mm/cache-v4wt.S
+++ b/arch/arm/mm/cache-v4wt.S@@ -191,9 +191,11 @@ ENTRY(v4wt_dma_unmap_area) * - dir - DMA direction */ ENTRY(v4wt_dma_map_area) +ENTRY(v4wt_dma_barrier) mov pc, lr ENDPROC(v4wt_dma_unmap_area) ENDPROC(v4wt_dma_map_area) +ENDPROC(v4wt_dma_barrier) __INITDATA
@@ -208,5 +210,6 @@ ENTRY(v4wt_cache_fns) .long v4wt_flush_kern_dcache_area .long v4wt_dma_map_area .long v4wt_dma_unmap_area + .long v4wt_dma_barrier .long v4wt_dma_flush_range .size v4wt_cache_fns, . - v4wt_cache_fns
diff --git a/arch/arm/mm/cache-v6.S b/arch/arm/mm/cache-v6.S
index 9d89c67..b294854 100644
--- a/arch/arm/mm/cache-v6.S
+++ b/arch/arm/mm/cache-v6.S@@ -238,8 +238,6 @@ v6_dma_inv_range: strlo r2, [r0] @ write for ownership #endif blo 1b - mov r0, #0 - mcr p15, 0, r0, c7, c10, 4 @ drain write buffer mov pc, lr /*
@@ -261,8 +259,6 @@ v6_dma_clean_range: add r0, r0, #D_CACHE_LINE_SIZE cmp r0, r1 blo 1b - mov r0, #0 - mcr p15, 0, r0, c7, c10, 4 @ drain write buffer mov pc, lr /*
@@ -289,8 +285,6 @@ ENTRY(v6_dma_flush_range) strlob r2, [r0] @ write for ownership #endif blo 1b - mov r0, #0 - mcr p15, 0, r0, c7, c10, 4 @ drain write buffer mov pc, lr /*
@@ -327,6 +321,12 @@ ENTRY(v6_dma_unmap_area) mov pc, lr ENDPROC(v6_dma_unmap_area) +ENTRY(v6_dma_barrier) + mov r0, #0 + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer + mov pc, lr +ENDPROC(v6_dma_barrier) + __INITDATA .type v6_cache_fns, #object
@@ -340,5 +340,6 @@ ENTRY(v6_cache_fns) .long v6_flush_kern_dcache_area .long v6_dma_map_area .long v6_dma_unmap_area + .long v6_dma_barrier .long v6_dma_flush_range .size v6_cache_fns, . - v6_cache_fns
diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S
index bcd64f2..d89d55a 100644
--- a/arch/arm/mm/cache-v7.S
+++ b/arch/arm/mm/cache-v7.S@@ -255,7 +255,6 @@ v7_dma_inv_range: add r0, r0, r2 cmp r0, r1 blo 1b - dsb mov pc, lr ENDPROC(v7_dma_inv_range)
@@ -273,7 +272,6 @@ v7_dma_clean_range: add r0, r0, r2 cmp r0, r1 blo 1b - dsb mov pc, lr ENDPROC(v7_dma_clean_range)
@@ -291,7 +289,6 @@ ENTRY(v7_dma_flush_range) add r0, r0, r2 cmp r0, r1 blo 1b - dsb mov pc, lr ENDPROC(v7_dma_flush_range)
@@ -321,6 +318,11 @@ ENTRY(v7_dma_unmap_area) mov pc, lr ENDPROC(v7_dma_unmap_area) +ENTRY(v7_dma_barrier) + dsb + mov pc, lr +ENDPROC(v7_dma_barrier) + __INITDATA .type v7_cache_fns, #object
@@ -334,5 +336,6 @@ ENTRY(v7_cache_fns) .long v7_flush_kern_dcache_area .long v7_dma_map_area .long v7_dma_unmap_area + .long v7_dma_barrier .long v7_dma_flush_range .size v7_cache_fns, . - v7_cache_fns
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index 64daef2..d807f38 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c@@ -97,6 +97,7 @@ static struct page *__dma_alloc_buffer(struct device *dev, size_t size, gfp_t gf memset(ptr, 0, size); dmac_flush_range(ptr, ptr + size); outer_flush_range(__pa(ptr), __pa(ptr) + size); + dmac_barrier(); return page; }
@@ -542,6 +543,12 @@ void ___dma_page_dev_to_cpu(struct page *page, unsigned long off, } EXPORT_SYMBOL(___dma_page_dev_to_cpu); +void __dma_barrier(enum dma_data_direction dir) +{ + dmac_barrier(); +} +EXPORT_SYMBOL(__dma_barrier); + /** * dma_map_sg - map a set of SG buffers for streaming mode DMA * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
@@ -572,6 +579,7 @@ int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, if (dma_mapping_error(dev, s->dma_address)) goto bad_mapping; } + __dma_barrier(dir); debug_dma_map_sg(dev, sg, nents, nents, dir); return nents;
@@ -602,6 +610,8 @@ void dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, for_each_sg(sg, s, nents, i) __dma_unmap_page(dev, sg_dma_address(s), sg_dma_len(s), dir); + + __dma_barrier(dir); } EXPORT_SYMBOL(dma_unmap_sg);
@@ -627,6 +637,7 @@ void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, s->length, dir); } + __dma_barrier(dir); debug_dma_sync_sg_for_cpu(dev, sg, nents, dir); } EXPORT_SYMBOL(dma_sync_sg_for_cpu);
@@ -653,6 +664,7 @@ void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, s->length, dir); } + __dma_barrier(dir);
debug_dma_sync_sg_for_device(dev, sg, nents, dir); } EXPORT_SYMBOL(dma_sync_sg_for_device);
diff --git a/arch/arm/mm/proc-arm1020e.S b/arch/arm/mm/proc-arm1020e.S
index d278298..fea33c9 100644
--- a/arch/arm/mm/proc-arm1020e.S
+++ b/arch/arm/mm/proc-arm1020e.S@@ -281,7 +281,6 @@ arm1020e_dma_inv_range: cmp r0, r1 blo 1b #endif - mcr p15, 0, ip, c7, c10, 4 @ drain WB mov pc, lr /*
@@ -303,7 +302,6 @@ arm1020e_dma_clean_range: cmp r0, r1 blo 1b #endif - mcr p15, 0, ip, c7, c10, 4 @ drain WB mov pc, lr /*
@@ -323,7 +321,6 @@ ENTRY(arm1020e_dma_flush_range) cmp r0, r1 blo 1b #endif - mcr p15, 0, ip, c7, c10, 4 @ drain WB mov pc, lr /*
@@ -350,6 +347,12 @@ ENTRY(arm1020e_dma_unmap_area) mov pc, lr ENDPROC(arm1020e_dma_unmap_area) +ENTRY(arm1020e_dma_barrier) + mov r0, #0 + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr +ENDPROC(arm1020e_dma_barrier) + ENTRY(arm1020e_cache_fns) .long arm1020e_flush_icache_all .long arm1020e_flush_kern_cache_all
@@ -360,6 +363,7 @@ ENTRY(arm1020e_cache_fns) .long arm1020e_flush_kern_dcache_area .long arm1020e_dma_map_area .long arm1020e_dma_unmap_area + .long arm1020e_dma_barrier .long arm1020e_dma_flush_range .align 5
diff --git a/arch/arm/mm/proc-arm1022.S b/arch/arm/mm/proc-arm1022.S
index ce13e4a..ba1a7df 100644
--- a/arch/arm/mm/proc-arm1022.S
+++ b/arch/arm/mm/proc-arm1022.S@@ -270,7 +270,6 @@ arm1022_dma_inv_range: cmp r0, r1 blo 1b #endif - mcr p15, 0, ip, c7, c10, 4 @ drain WB mov pc, lr /*
@@ -292,7 +291,6 @@ arm1022_dma_clean_range: cmp r0, r1 blo 1b #endif - mcr p15, 0, ip, c7, c10, 4 @ drain WB mov pc, lr /*
@@ -312,7 +310,6 @@ ENTRY(arm1022_dma_flush_range) cmp r0, r1 blo 1b #endif - mcr p15, 0, ip, c7, c10, 4 @ drain WB mov pc, lr /*
@@ -339,6 +336,12 @@ ENTRY(arm1022_dma_unmap_area) mov pc, lr ENDPROC(arm1022_dma_unmap_area) +ENTRY(arm1022_dma_barrier) + mov r0, #0 + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr +ENDPROC(arm1022_dma_barrier) + ENTRY(arm1022_cache_fns) .long arm1022_flush_icache_all .long arm1022_flush_kern_cache_all
@@ -349,6 +352,7 @@ ENTRY(arm1022_cache_fns) .long arm1022_flush_kern_dcache_area .long arm1022_dma_map_area .long arm1022_dma_unmap_area + .long arm1022_dma_barrier .long arm1022_dma_flush_range .align 5
diff --git a/arch/arm/mm/proc-arm1026.S b/arch/arm/mm/proc-arm1026.S
index 636672a..de648f1 100644
--- a/arch/arm/mm/proc-arm1026.S
+++ b/arch/arm/mm/proc-arm1026.S@@ -264,7 +264,6 @@ arm1026_dma_inv_range: cmp r0, r1 blo 1b #endif - mcr p15, 0, ip, c7, c10, 4 @ drain WB mov pc, lr /*
@@ -286,7 +285,6 @@ arm1026_dma_clean_range: cmp r0, r1 blo 1b #endif - mcr p15, 0, ip, c7, c10, 4 @ drain WB mov pc, lr /*
@@ -306,7 +304,6 @@ ENTRY(arm1026_dma_flush_range) cmp r0, r1 blo 1b #endif - mcr p15, 0, ip, c7, c10, 4 @ drain WB mov pc, lr /*
@@ -333,6 +330,12 @@ ENTRY(arm1026_dma_unmap_area) mov pc, lr ENDPROC(arm1026_dma_unmap_area) +ENTRY(arm1026_dma_barrier) + mov r0, #0 + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr +ENDPROC(arm1026_dma_barrier) + ENTRY(arm1026_cache_fns) .long arm1026_flush_icache_all .long arm1026_flush_kern_cache_all
@@ -343,6 +346,7 @@ ENTRY(arm1026_cache_fns) .long arm1026_flush_kern_dcache_area .long arm1026_dma_map_area .long arm1026_dma_unmap_area + .long arm1026_dma_barrier .long arm1026_dma_flush_range .align 5
diff --git a/arch/arm/mm/proc-arm920.S b/arch/arm/mm/proc-arm920.S
index 8be8199..ec74093 100644
--- a/arch/arm/mm/proc-arm920.S
+++ b/arch/arm/mm/proc-arm920.S@@ -252,7 +252,6 @@ arm920_dma_inv_range: add r0, r0, #CACHE_DLINESIZE cmp r0, r1 blo 1b - mcr p15, 0, r0, c7, c10, 4 @ drain WB mov pc, lr /*
@@ -271,7 +270,6 @@ arm920_dma_clean_range: add r0, r0, #CACHE_DLINESIZE cmp r0, r1 blo 1b - mcr p15, 0, r0, c7, c10, 4 @ drain WB mov pc, lr /*
@@ -288,7 +286,6 @@ ENTRY(arm920_dma_flush_range) add r0, r0, #CACHE_DLINESIZE cmp r0, r1 blo 1b - mcr p15, 0, r0, c7, c10, 4 @ drain WB mov pc, lr /*
@@ -315,6 +312,12 @@ ENTRY(arm920_dma_unmap_area) mov pc, lr ENDPROC(arm920_dma_unmap_area) +ENTRY(arm920_dma_barrier) + mov r0, #0 + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr +ENDPROC(arm920_dma_barrier) + ENTRY(arm920_cache_fns) .long arm920_flush_icache_all .long arm920_flush_kern_cache_all
@@ -325,6 +328,7 @@ ENTRY(arm920_cache_fns) .long arm920_flush_kern_dcache_area .long arm920_dma_map_area .long arm920_dma_unmap_area + .long arm920_dma_barrier .long arm920_dma_flush_range #endif
diff --git a/arch/arm/mm/proc-arm922.S b/arch/arm/mm/proc-arm922.S
index c0ff8e4..474d4c6 100644
--- a/arch/arm/mm/proc-arm922.S
+++ b/arch/arm/mm/proc-arm922.S@@ -254,7 +254,6 @@ arm922_dma_inv_range: add r0, r0, #CACHE_DLINESIZE cmp r0, r1 blo 1b - mcr p15, 0, r0, c7, c10, 4 @ drain WB mov pc, lr /*
@@ -273,7 +272,6 @@ arm922_dma_clean_range: add r0, r0, #CACHE_DLINESIZE cmp r0, r1 blo 1b - mcr p15, 0, r0, c7, c10, 4 @ drain WB mov pc, lr /*
@@ -290,7 +288,6 @@ ENTRY(arm922_dma_flush_range) add r0, r0, #CACHE_DLINESIZE cmp r0, r1 blo 1b - mcr p15, 0, r0, c7, c10, 4 @ drain WB mov pc, lr /*
@@ -317,6 +314,12 @@ ENTRY(arm922_dma_unmap_area) mov pc, lr ENDPROC(arm922_dma_unmap_area) +ENTRY(arm922_dma_barrier) + mov r0, #0 + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr +ENDPROC(arm922_dma_barrier) + ENTRY(arm922_cache_fns) .long arm922_flush_icache_all .long arm922_flush_kern_cache_all
@@ -327,6 +330,7 @@ ENTRY(arm922_cache_fns) .long arm922_flush_kern_dcache_area .long arm922_dma_map_area .long arm922_dma_unmap_area + .long arm922_dma_barrier .long arm922_dma_flush_range #endif
diff --git a/arch/arm/mm/proc-arm925.S b/arch/arm/mm/proc-arm925.S
index 3c6cffe..0336ae3 100644
--- a/arch/arm/mm/proc-arm925.S
+++ b/arch/arm/mm/proc-arm925.S@@ -302,7 +302,6 @@ arm925_dma_inv_range: add r0, r0, #CACHE_DLINESIZE cmp r0, r1 blo 1b - mcr p15, 0, r0, c7, c10, 4 @ drain WB mov pc, lr /*
@@ -323,7 +322,6 @@ arm925_dma_clean_range: cmp r0, r1 blo 1b #endif - mcr p15, 0, r0, c7, c10, 4 @ drain WB mov pc, lr /*
@@ -345,7 +343,6 @@ ENTRY(arm925_dma_flush_range) add r0, r0, #CACHE_DLINESIZE cmp r0, r1 blo 1b - mcr p15, 0, r0, c7, c10, 4 @ drain WB mov pc, lr /*
@@ -372,6 +369,12 @@ ENTRY(arm925_dma_unmap_area) mov pc, lr ENDPROC(arm925_dma_unmap_area) +ENTRY(arm925_dma_barrier) + mov r0, #0 + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr +ENDPROC(arm925_dma_barrier) + ENTRY(arm925_cache_fns) .long arm925_flush_icache_all .long arm925_flush_kern_cache_all
@@ -382,6 +385,7 @@ ENTRY(arm925_cache_fns) .long arm925_flush_kern_dcache_area .long arm925_dma_map_area .long arm925_dma_unmap_area + .long arm925_dma_barrier .long arm925_dma_flush_range ENTRY(cpu_arm925_dcache_clean_area)
diff --git a/arch/arm/mm/proc-arm926.S b/arch/arm/mm/proc-arm926.S
index 75b707c..473bbe6 100644
--- a/arch/arm/mm/proc-arm926.S
+++ b/arch/arm/mm/proc-arm926.S@@ -265,7 +265,6 @@ arm926_dma_inv_range: add r0, r0, #CACHE_DLINESIZE cmp r0, r1 blo 1b - mcr p15, 0, r0, c7, c10, 4 @ drain WB mov pc, lr /*
@@ -286,7 +285,6 @@ arm926_dma_clean_range: cmp r0, r1 blo 1b #endif - mcr p15, 0, r0, c7, c10, 4 @ drain WB mov pc, lr /*
@@ -308,7 +306,6 @@ ENTRY(arm926_dma_flush_range) add r0, r0, #CACHE_DLINESIZE cmp r0, r1 blo 1b - mcr p15, 0, r0, c7, c10, 4 @ drain WB mov pc, lr /*
@@ -335,6 +332,12 @@ ENTRY(arm926_dma_unmap_area) mov pc, lr ENDPROC(arm926_dma_unmap_area) +ENTRY(arm926_dma_barrier) + mov r0, #0 + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr +ENDPROC(arm926_dma_barrier) + ENTRY(arm926_cache_fns) .long arm926_flush_icache_all .long arm926_flush_kern_cache_all
@@ -345,6 +348,7 @@ ENTRY(arm926_cache_fns) .long arm926_flush_kern_dcache_area .long arm926_dma_map_area .long arm926_dma_unmap_area + .long arm926_dma_barrier .long arm926_dma_flush_range ENTRY(cpu_arm926_dcache_clean_area)
diff --git a/arch/arm/mm/proc-arm940.S b/arch/arm/mm/proc-arm940.S
index 1af1657..c44c963 100644
--- a/arch/arm/mm/proc-arm940.S
+++ b/arch/arm/mm/proc-arm940.S@@ -187,7 +187,6 @@ arm940_dma_inv_range: bcs 2b @ entries 63 to 0 subs r1, r1, #1 << 4 bcs 1b @ segments 7 to 0 - mcr p15, 0, ip, c7, c10, 4 @ drain WB mov pc, lr /*
@@ -211,7 +210,6 @@ ENTRY(cpu_arm940_dcache_clean_area) subs r1, r1, #1 << 4 bcs 1b @ segments 7 to 0 #endif - mcr p15, 0, ip, c7, c10, 4 @ drain WB mov pc, lr /*
@@ -237,7 +235,6 @@ ENTRY(arm940_dma_flush_range) bcs 2b @ entries 63 to 0 subs r1, r1, #1 << 4 bcs 1b @ segments 7 to 0 - mcr p15, 0, ip, c7, c10, 4 @ drain WB mov pc, lr /*
@@ -264,6 +261,12 @@ ENTRY(arm940_dma_unmap_area) mov pc, lr ENDPROC(arm940_dma_unmap_area) +ENTRY(arm940_dma_barrier) + mov r0, #0 + mcr p15, 0, ip, c7, c10, 4 @ drain WB + mov pc, lr +ENDPROC(arm940_dma_barrier) + ENTRY(arm940_cache_fns) .long arm940_flush_icache_all .long arm940_flush_kern_cache_all
@@ -274,6 +277,7 @@ ENTRY(arm940_cache_fns) .long arm940_flush_kern_dcache_area .long arm940_dma_map_area .long arm940_dma_unmap_area + .long arm940_dma_barrier .long arm940_dma_flush_range __CPUINIT
diff --git a/arch/arm/mm/proc-arm946.S b/arch/arm/mm/proc-arm946.S
index 1664b6a..11e9ad7 100644
--- a/arch/arm/mm/proc-arm946.S
+++ b/arch/arm/mm/proc-arm946.S@@ -234,7 +234,6 @@ arm946_dma_inv_range: add r0, r0, #CACHE_DLINESIZE cmp r0, r1 blo 1b - mcr p15, 0, r0, c7, c10, 4 @ drain WB mov pc, lr /*
@@ -255,7 +254,6 @@ arm946_dma_clean_range: cmp r0, r1 blo 1b #endif - mcr p15, 0, r0, c7, c10, 4 @ drain WB mov pc, lr /*
@@ -279,7 +277,6 @@ ENTRY(arm946_dma_flush_range) add r0, r0, #CACHE_DLINESIZE cmp r0, r1 blo 1b - mcr p15, 0, r0, c7, c10, 4 @ drain WB mov pc, lr /*
@@ -306,6 +303,12 @@ ENTRY(arm946_dma_unmap_area) mov pc, lr ENDPROC(arm946_dma_unmap_area) +ENTRY(arm946_dma_barrier) + mov r0, #0 + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr +ENDPROC(arm946_dma_barrier) + ENTRY(arm946_cache_fns) .long arm946_flush_icache_all .long arm946_flush_kern_cache_all
@@ -316,6 +319,7 @@ ENTRY(arm946_cache_fns) .long arm946_flush_kern_dcache_area .long arm946_dma_map_area .long arm946_dma_unmap_area + .long arm946_dma_barrier .long arm946_dma_flush_range
diff --git a/arch/arm/mm/proc-feroceon.S b/arch/arm/mm/proc-feroceon.S
index 53e6323..50a309e 100644
--- a/arch/arm/mm/proc-feroceon.S
+++ b/arch/arm/mm/proc-feroceon.S@@ -290,7 +290,6 @@ feroceon_dma_inv_range: add r0, r0, #CACHE_DLINESIZE cmp r0, r1 blo 1b - mcr p15, 0, r0, c7, c10, 4 @ drain WB mov pc, lr .align 5
@@ -326,7 +325,6 @@ feroceon_dma_clean_range: add r0, r0, #CACHE_DLINESIZE cmp r0, r1 blo 1b - mcr p15, 0, r0, c7, c10, 4 @ drain WB mov pc, lr .align 5
@@ -339,7 +337,6 @@ feroceon_range_dma_clean_range: mcr p15, 5, r0, c15, c13, 0 @ D clean range start mcr p15, 5, r1, c15, c13, 1 @ D clean range top msr cpsr_c, r2 @ restore interrupts - mcr p15, 0, r0, c7, c10, 4 @ drain WB mov pc, lr /*
@@ -357,7 +354,6 @@ ENTRY(feroceon_dma_flush_range) add r0, r0, #CACHE_DLINESIZE cmp r0, r1 blo 1b - mcr p15, 0, r0, c7, c10, 4 @ drain WB mov pc, lr .align 5
@@ -370,7 +366,6 @@ ENTRY(feroceon_range_dma_flush_range) mcr p15, 5, r0, c15, c15, 0 @ D clean/inv range start mcr p15, 5, r1, c15, c15, 1 @ D clean/inv range top msr cpsr_c, r2 @ restore interrupts - mcr p15, 0, r0, c7, c10, 4 @ drain WB mov pc, lr /*
@@ -411,6 +406,12 @@ ENTRY(feroceon_dma_unmap_area) mov pc, lr ENDPROC(feroceon_dma_unmap_area) +ENTRY(feroceon_dma_barrier) + mov r0, #0 + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr +ENDPROC(feroceon_dma_barrier) + ENTRY(feroceon_cache_fns) .long feroceon_flush_icache_all .long feroceon_flush_kern_cache_all
@@ -421,6 +422,7 @@ ENTRY(feroceon_cache_fns) .long feroceon_flush_kern_dcache_area .long feroceon_dma_map_area .long feroceon_dma_unmap_area + .long feroceon_dma_barrier .long feroceon_dma_flush_range ENTRY(feroceon_range_cache_fns)
@@ -433,6 +435,7 @@ ENTRY(feroceon_range_cache_fns) .long feroceon_range_flush_kern_dcache_area .long feroceon_range_dma_map_area .long feroceon_dma_unmap_area + .long feroceon_dma_barrier .long feroceon_range_dma_flush_range .align 5
diff --git a/arch/arm/mm/proc-mohawk.S b/arch/arm/mm/proc-mohawk.S
index caa3115..09e8883 100644
--- a/arch/arm/mm/proc-mohawk.S
+++ b/arch/arm/mm/proc-mohawk.S@@ -224,7 +224,6 @@ mohawk_dma_inv_range: add r0, r0, #CACHE_DLINESIZE cmp r0, r1 blo 1b - mcr p15, 0, r0, c7, c10, 4 @ drain WB mov pc, lr /*
@@ -243,7 +242,6 @@ mohawk_dma_clean_range: add r0, r0, #CACHE_DLINESIZE cmp r0, r1 blo 1b - mcr p15, 0, r0, c7, c10, 4 @ drain WB mov pc, lr /*
@@ -261,7 +259,6 @@ ENTRY(mohawk_dma_flush_range) add r0, r0, #CACHE_DLINESIZE cmp r0, r1 blo 1b - mcr p15, 0, r0, c7, c10, 4 @ drain WB mov pc, lr /*
@@ -288,6 +285,12 @@ ENTRY(mohawk_dma_unmap_area) mov pc, lr ENDPROC(mohawk_dma_unmap_area) +ENTRY(mohawk_dma_barrier) + mov r0, #0 + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr +ENDPROC(mohawk_dma_barrier) + ENTRY(mohawk_cache_fns) .long mohawk_flush_kern_cache_all .long mohawk_flush_user_cache_all
@@ -297,6 +300,7 @@ ENTRY(mohawk_cache_fns) .long mohawk_flush_kern_dcache_area .long mohawk_dma_map_area .long mohawk_dma_unmap_area + .long mohawk_dma_barrier .long mohawk_dma_flush_range ENTRY(cpu_mohawk_dcache_clean_area)
diff --git a/arch/arm/mm/proc-xsc3.S b/arch/arm/mm/proc-xsc3.S
index 046b3d8..d033ed4 100644
--- a/arch/arm/mm/proc-xsc3.S
+++ b/arch/arm/mm/proc-xsc3.S@@ -274,7 +274,6 @@ xsc3_dma_inv_range: add r0, r0, #CACHELINESIZE cmp r0, r1 blo 1b - mcr p15, 0, r0, c7, c10, 4 @ data write barrier mov pc, lr /*
@@ -291,7 +290,6 @@ xsc3_dma_clean_range: add r0, r0, #CACHELINESIZE cmp r0, r1 blo 1b - mcr p15, 0, r0, c7, c10, 4 @ data write barrier mov pc, lr /*
@@ -308,7 +306,6 @@ ENTRY(xsc3_dma_flush_range) add r0, r0, #CACHELINESIZE cmp r0, r1 blo 1b - mcr p15, 0, r0, c7, c10, 4 @ data write barrier mov pc, lr /*
@@ -335,6 +332,12 @@ ENTRY(xsc3_dma_unmap_area) mov pc, lr ENDPROC(xsc3_dma_unmap_area) +ENTRY(xsc3_dma_barrier) + mov r0, #0 + mcr p15, 0, r0, c7, c10, 4 @ data write barrier + mov pc, lr +ENDPROC(xsc3_dma_barrier) + ENTRY(xsc3_cache_fns) .long xsc3_flush_icache_all .long xsc3_flush_kern_cache_all
@@ -345,6 +348,7 @@ ENTRY(xsc3_cache_fns) .long xsc3_flush_kern_dcache_area .long xsc3_dma_map_area .long xsc3_dma_unmap_area + .long xsc3_dma_barrier .long xsc3_dma_flush_range ENTRY(cpu_xsc3_dcache_clean_area)
diff --git a/arch/arm/mm/proc-xscale.S b/arch/arm/mm/proc-xscale.S
index 63037e2..e390ae6 100644
--- a/arch/arm/mm/proc-xscale.S
+++ b/arch/arm/mm/proc-xscale.S@@ -332,7 +332,6 @@ xscale_dma_inv_range: add r0, r0, #CACHELINESIZE cmp r0, r1 blo 1b - mcr p15, 0, r0, c7, c10, 4 @ Drain Write (& Fill) Buffer mov pc, lr /*
@@ -349,7 +348,6 @@ xscale_dma_clean_range: add r0, r0, #CACHELINESIZE cmp r0, r1 blo 1b - mcr p15, 0, r0, c7, c10, 4 @ Drain Write (& Fill) Buffer mov pc, lr /*
@@ -367,7 +365,6 @@ ENTRY(xscale_dma_flush_range) add r0, r0, #CACHELINESIZE cmp r0, r1 blo 1b - mcr p15, 0, r0, c7, c10, 4 @ Drain Write (& Fill) Buffer mov pc, lr /*
@@ -407,6 +404,12 @@ ENTRY(xscale_dma_unmap_area) mov pc, lr ENDPROC(xscale_dma_unmap_area) +ENTRY(xscale_dma_barrier) + mov r0, #0 + mcr p15, 0, r0, c7, c10, 4 @ Drain Write (& Fill) Buffer + mov pc, lr +ENDPROC(xscsale_dma_barrier) + ENTRY(xscale_cache_fns) .long xscale_flush_icache_all .long xscale_flush_kern_cache_all
@@ -417,6 +420,7 @@ ENTRY(xscale_cache_fns) .long xscale_flush_kern_dcache_area .long xscale_dma_map_area .long xscale_dma_unmap_area + .long xscale_dma_barrier .long xscale_dma_flush_range /*