optimization - Is gcc reordering my volatile variables? -


i developping on stm32f746 (arm cortex-m7) gnu arm toolchain (gcc 5.4.1) using freertos. using following cflags set:

-g -werror -wextra -o2  -fno-common -ffunction-sections -fmessage-length=0 -g -mcpu=cortex-m7 -mthumb -mfloat-abi=hard -mfpu=fpv5-sp-d16 -fno-common -ffunction-sections -fmessage-length=0 -g -mcpu=cortex-m7 -mthumb -mfloat-abi=hard -mfpu=fpv5-sp-d16 

the following code shows '0' in console if dma controller not powered on. normally, writing rcc_ahb1enr_dma1en in ahb1enr register power on dma controller.

/* *** other peripheral configurations *** */ rcc->ahb1enr |= rcc_ahb1enr_dma1en;                                      dma1_stream3->cr = dma_sxcr_pl | dma_sxcr_minc |                                 dma_sxcr_tcie | dma_sxcr_teie | dma_sxcr_dmeie; debug_printf("%x\r\n", dma1_stream3->cr); 

but following code printing value of dma1_stream3->cr.

/* *** other peripheral configurations *** */ rcc->ahb1enr |= rcc_ahb1enr_dma1en; __asm__ __volatile__ ("" ::: "memory");                                      dma1_stream3->cr = dma_sxcr_pl | dma_sxcr_minc |                                 dma_sxcr_tcie | dma_sxcr_teie | dma_sxcr_dmeie; debug_printf("%x\r\n", dma1_stream3->cr); 

i never had add memory barriers other controllers why have this one? thinking either load/store instructions reordering gcc either timing issue. checked in errata sheet of cpu (stm32f746) or datasheet didn't find anything.

here definition of rcc structure:

typedef struct {     /* ... */     __io uint32_t ahb1enr;     /* ... */ } rcc_typedef; 

and here definition of dma1_stream3 structure type:

typedef struct                                                                   {                                                                                  __io uint32_t cr;     /*!< dma stream x configuration register      */           __io uint32_t ndtr;   /*!< dma stream x number of data register     */           __io uint32_t par;    /*!< dma stream x peripheral address register */           __io uint32_t m0ar;   /*!< dma stream x memory 0 address register   */           __io uint32_t m1ar;   /*!< dma stream x memory 1 address register   */           __io uint32_t fcr;    /*!< dma stream x fifo control register       */         } dma_stream_typedef; 

__io defined volatile.

here assembly code generated function:

08003f4c <bsp_initspiadis>:  8003f4c:       4956            ldr     r1, [pc, #344]  ; (80040a8 <bsp_initspiadis+0x15c>)  8003f4e:       4a57            ldr     r2, [pc, #348]  ; (80040ac <bsp_initspiadis+0x160>)  8003f50:       6b08            ldr     r0, [r1, #48]   ; 0x30  8003f52:       f440 7080       orr.w   r0, r0, #256    ; 0x100  8003f56:       b5f8            push    {r3, r4, r5, r6, r7, lr}  8003f58:       6308            str     r0, [r1, #48]   ; 0x30  8003f5a:       2500            movs    r5, #0  8003f5c:       6810            ldr     r0, [r2, #0]  8003f5e:       f240 3607       movw    r6, #775        ; 0x307  8003f62:       4b53            ldr     r3, [pc, #332]  ; (80040b0 <bsp_initspiadis+0x164>)  8003f64:       f44f 5eb8       mov.w   lr, #5888       ; 0x1700  8003f68:       f020 000c       bic.w   r0, r0, #12  8003f6c:       4c51            ldr     r4, [pc, #324]  ; (80040b4 <bsp_initspiadis+0x168>)  8003f6e:       4f52            ldr     r7, [pc, #328]  ; (80040b8 <bsp_initspiadis+0x16c>)  8003f70:       6010            str     r0, [r2, #0]  8003f72:       6810            ldr     r0, [r2, #0]  8003f74:       f040 0008       orr.w   r0, r0, #8  8003f78:       6010            str     r0, [r2, #0]  8003f7a:       6890            ldr     r0, [r2, #8]  8003f7c:       f020 000c       bic.w   r0, r0, #12  8003f80:       6090            str     r0, [r2, #8]  8003f82:       6890            ldr     r0, [r2, #8]  8003f84:       f040 0004       orr.w   r0, r0, #4  8003f88:       6090            str     r0, [r2, #8]  8003f8a:       6890            ldr     r0, [r2, #8]  8003f8c:       f040 0008       orr.w   r0, r0, #8  8003f90:       6090            str     r0, [r2, #8]  8003f92:       6a10            ldr     r0, [r2, #32]  8003f94:       f020 00f0       bic.w   r0, r0, #240    ; 0xf0  8003f98:       6210            str     r0, [r2, #32]  8003f9a:       6a10            ldr     r0, [r2, #32]  8003f9c:       f040 0010       orr.w   r0, r0, #16  8003fa0:       6210            str     r0, [r2, #32]  8003fa2:       6a10            ldr     r0, [r2, #32]  8003fa4:       f040 0040       orr.w   r0, r0, #64     ; 0x40  8003fa8:       6210            str     r0, [r2, #32]  8003faa:       6b08            ldr     r0, [r1, #48]   ; 0x30  8003fac:       4a43            ldr     r2, [pc, #268]  ; (80040bc <bsp_initspiadis+0x170>)  8003fae:       f040 0002       orr.w   r0, r0, #2  8003fb2:       6308            str     r0, [r1, #48]   ; 0x30  8003fb4:       6818            ldr     r0, [r3, #0]  8003fb6:       f020 5040       bic.w   r0, r0, #805306368      ; 0x30000000  8003fba:       6018            str     r0, [r3, #0]  8003fbc:       6818            ldr     r0, [r3, #0]  8003fbe:       f040 5000       orr.w   r0, r0, #536870912      ; 0x20000000  8003fc2:       6018            str     r0, [r3, #0]  8003fc4:       6898            ldr     r0, [r3, #8]  8003fc6:       f020 5040       bic.w   r0, r0, #805306368      ; 0x30000000  8003fca:       6098            str     r0, [r3, #8]  8003fcc:       6898            ldr     r0, [r3, #8]  8003fce:       f040 5080       orr.w   r0, r0, #268435456      ; 0x10000000  8003fd2:       6098            str     r0, [r3, #8]  8003fd4:       6898            ldr     r0, [r3, #8]  8003fd6:       f040 5000       orr.w   r0, r0, #536870912      ; 0x20000000  8003fda:       6098            str     r0, [r3, #8]  8003fdc:       6a58            ldr     r0, [r3, #36]   ; 0x24  8003fde:       f020 6070       bic.w   r0, r0, #251658240      ; 0xf000000  8003fe2:       6258            str     r0, [r3, #36]   ; 0x24  8003fe4:       6a58            ldr     r0, [r3, #36]   ; 0x24  8003fe6:       f040 7080       orr.w   r0, r0, #16777216       ; 0x1000000  8003fea:       6258            str     r0, [r3, #36]   ; 0x24  8003fec:       6a58            ldr     r0, [r3, #36]   ; 0x24  8003fee:       f040 6080       orr.w   r0, r0, #67108864       ; 0x4000000  8003ff2:       6258            str     r0, [r3, #36]   ; 0x24  8003ff4:       6b08            ldr     r0, [r1, #48]   ; 0x30  8003ff6:       f040 0002       orr.w   r0, r0, #2  8003ffa:       6308            str     r0, [r1, #48]   ; 0x30  8003ffc:       6818            ldr     r0, [r3, #0]  8003ffe:       f020 4040       bic.w   r0, r0, #3221225472     ; 0xc0000000  8004002:       6018            str     r0, [r3, #0]  8004004:       6818            ldr     r0, [r3, #0]  8004006:       f040 4000       orr.w   r0, r0, #2147483648     ; 0x80000000  800400a:       6018            str     r0, [r3, #0]  800400c:       6898            ldr     r0, [r3, #8]  800400e:       f020 4040       bic.w   r0, r0, #3221225472     ; 0xc0000000  8004012:       6098            str     r0, [r3, #8]  8004014:       6898            ldr     r0, [r3, #8]  8004016:       f040 4080       orr.w   r0, r0, #1073741824     ; 0x40000000  800401a:       6098            str     r0, [r3, #8]  800401c:       6898            ldr     r0, [r3, #8]  800401e:       f040 4000       orr.w   r0, r0, #2147483648     ; 0x80000000  8004022:       6098            str     r0, [r3, #8]  8004024:       6a58            ldr     r0, [r3, #36]   ; 0x24  8004026:       f020 4070       bic.w   r0, r0, #4026531840     ; 0xf0000000  800402a:       6258            str     r0, [r3, #36]   ; 0x24  800402c:       6a58            ldr     r0, [r3, #36]   ; 0x24  800402e:       f040 5080       orr.w   r0, r0, #268435456      ; 0x10000000  8004032:       6258            str     r0, [r3, #36]   ; 0x24  8004034:       6a58            ldr     r0, [r3, #36]   ; 0x24  8004036:       f040 4080       orr.w   r0, r0, #1073741824     ; 0x40000000  800403a:       6258            str     r0, [r3, #36]   ; 0x24  800403c:       6c0b            ldr     r3, [r1, #64]   ; 0x40  800403e:       4820            ldr     r0, [pc, #128]  ; (80040c0 <bsp_initspiadis+0x174>)  8004040:       f443 4380       orr.w   r3, r3, #16384  ; 0x4000  8004044:       640b            str     r3, [r1, #64]   ; 0x40  8004046:       61e5            str     r5, [r4, #28]  8004048:       6026            str     r6, [r4, #0]  800404a:       6823            ldr     r3, [r4, #0]  800404c:       4e1d            ldr     r6, [pc, #116]  ; (80040c4 <bsp_initspiadis+0x178>)  800404e:       f023 0338       bic.w   r3, r3, #56     ; 0x38  8004052:       6023            str     r3, [r4, #0]  8004054:       6823            ldr     r3, [r4, #0]  8004056:       f043 0338       orr.w   r3, r3, #56     ; 0x38  800405a:       6023            str     r3, [r4, #0]  800405c:       f8c4 e004       str.w   lr, [r4, #4]  8004060:       6b0b            ldr     r3, [r1, #48]   ; 0x30  8004062:       4c19            ldr     r4, [pc, #100]  ; (80040c8 <bsp_initspiadis+0x17c>)  8004064:       f443 1300       orr.w   r3, r3, #2097152        ; 0x200000  8004068:       630b            str     r3, [r1, #48]   ; 0x30  800406a:       603a            str     r2, [r7, #0]  800406c:       6839            ldr     r1, [r7, #0]  800406e:       f000 faab       bl      80045c8 <printf>  8004072:       4b16            ldr     r3, [pc, #88]   ; (80040cc <bsp_initspiadis+0x180>)  8004074:       4629            mov     r1, r5  8004076:       2203            movs    r2, #3  8004078:       601c            str     r4, [r3, #0]  800407a:       2001            movs    r0, #1  800407c:       f7fe f90e       bl      800229c <xqueuegenericcreate>  8004080:       4629            mov     r1, r5  8004082:       6170            str     r0, [r6, #20]  8004084:       2203            movs    r2, #3  8004086:       2001            movs    r0, #1  8004088:       f44f 4480       mov.w   r4, #16384      ; 0x4000  800408c:       f7fe f906       bl      800229c <xqueuegenericcreate>  8004090:       4b0f            ldr     r3, [pc, #60]   ; (80040d0 <bsp_initspiadis+0x184>)  8004092:       2240            movs    r2, #64 ; 0x40  8004094:       f44f 4100       mov.w   r1, #32768      ; 0x8000  8004098:       6130            str     r0, [r6, #16]  800409a:       601c            str     r4, [r3, #0]  800409c:       6019            str     r1, [r3, #0]  800409e:       f883 230e       strb.w  r2, [r3, #782]  ; 0x30e  80040a2:       f883 230f       strb.w  r2, [r3, #783]  ; 0x30f  80040a6:       bdf8            pop     {r3, r4, r5, r6, r7, pc}  80040a8:       40023800        andmi   r3, r2, r0, lsl #16  80040ac:       40022000        andmi   r2, r2, r0  80040b0:       40020400        andmi   r0, r2, r0, lsl #8  80040b4:       40003800        andmi   r3, r0, r0, lsl #16  80040b8:       40026058        andmi   r6, r2, r8, asr r0  80040bc:       00030416        andeq   r0, r3, r6, lsl r4  80040c0:       08008ec8        stmdaeq r0, {r3, r6, r7, r9, sl, fp, pc}  80040c4:       2000c2d0        ldrdcs  ip, [r0], -r0  80040c8:       00030456        andeq   r0, r3, r6, asr r4  80040cc:       40026070        andmi   r6, r2, r0, ror r0  80040d0:       e000e100        ,     lr, r0, r0, lsl #2 

yes after operation enabling peripheral clock there bug in both stm32f4 & f7 micros. there small delay needed

i use __dsb(); it. can provide delay (it described in errata not affected uc (actually have not found unaffected f4 or f7)). use dsb advised in f4 errata, reminds me bug.

2.1.5 delay after rcc peripheral clock enabling description delay between rcc peripheral clock enable , effective peripheral enabling should taken account in order manage peripheral read/write registers. delay depends on peripheral mapping: • if peripheral mapped on ahb: delay should equal 2 ahb cycles. • if peripheral mapped on apb: delay should equal 1 + (ahb/apb prescaler) cycles. workarounds 1. use dsb instructio n stall cortex ® -m4 cpu pipeline until instruction completed. 2. insert “n” nops between rcc enable bi t write , peripheral register writes (n = 2 ahb peripherals, n = 1 + ahb/ apb prescaler in case of apb peripherals). 3. or insert dummy read operation corresponding register after enabling peripheral clock.


Comments

Popular posts from this blog

android - InAppBilling registering BroadcastReceiver in AndroidManifest -

python Tkinter Capturing keyboard events save as one single string -

sql server - Why does Linq-to-SQL add unnecessary COUNT()? -