papoon_usb icon indicating copy to clipboard operation
papoon_usb copied to clipboard

a failed attempt for USB bulk double-buffered Tx IN transfer on STM32F103 based on STM32_USB-FS-Device_Lib_V4.1.0

Open wuwbobo2021 opened this issue 1 year ago • 0 comments

Here's a failed attempt for USB bulk double-buffered Tx IN transfer (from device to host) on STM32F103, based on VirtualComport_Loopback example in STM32_USB-FS-Device_Lib_V4.1.0.

Unfortunately there's no sample application on the web doing such a thing, but merely a few clues: Audio_Speaker in STM32_USB-FS-Device_Lib_V4.1.0 (isochronous Rx OUT transfer), https://github.com/catphish/stm32-doublebuffer (a bit strange), https://community.st.com/t5/mems-sensors/inprove-the-speed-of-the-mass-storage-as-u-disk/td-p/523996. USB PMA double buffering is unimplemented in this papoon_usb according to the TODO list, and it seems unimplemented in these two libaries: https://github.com/stm32-rs/stm32-usbd, https://github.com/embassy-rs/embassy. But it's probably managed in the ST's HAL library.

While worrying about the intelectual ability of myself, I begin to suspect that the STM32 USB's double buffering mechanism itself is probably unreliable for correct bulk transfers (?). But I'll probably rewrite the test program based on stm32f1xx_ll_usb.c or stm32f1xx_hal_pcd.c, which will hopefully do some double buffer bulk management.

I don't have a good understanding of how double buffering should be handled in the USB registers of STM32, even after reading those related sections in the reference manual. The small test below doesn't work as expected. Changing the transfer unit from 64B to 60B makes no difference. Expected behavior: frame2_seq_num - frame1_seq_num = 1 for every two contiguous frames of size 64B except the situation of frame2_seq_num = 0 and frame1_seq_num = 0xff, this can be ensured without double-buffering.

The key of the failure: the program failed to determine whether (and when) the half buffer storing the previous packet is transferred successfully, by EP_CTR_TX or by comparing DTOG with SW_BUF, before overwriting or retrying.

in usb_conf.h:

#define EP_NUM              (4)
/*-------------------------------------------------------------*/
/* --------------   Buffer Description Table  -----------------*/
/*-------------------------------------------------------------*/
/* buffer table base address */
/* buffer table base address */
#define BTABLE_ADDRESS      (0x00)

/* EP0 Control */
/* rx/tx buffer base address */
#define ENDP0_RXADDR        (0x40)
#define ENDP0_TXADDR        (0x80)

/* buffers for other endpoints, */
// TX: IN    RX: OUT
#define ENDP1_TX_BUF0_ADDR  (0xC0)  // bulk in (dev to host), size 0x40
#define ENDP1_TX_BUF1_ADDR  (0x100) // bulk in (dev to host), size 0x40, orginal ENDP1_RX buffer
#define ENDP2_TXADDR        (0x140) // interrupt, size 0x10
#define ENDP3_RXADDR        (0x150) // bulk out (host to dev), size 0x40, unused

in usb_prop.c:

  /* Initialize Endpoint 1 */
  SetEPType(ENDP1, EP_BULK);
  SetEPDoubleBuff(ENDP1);
  SetEPDblBuffAddr(ENDP1, ENDP1_TX_BUF0_ADDR, ENDP1_TX_BUF1_ADDR);
  SetEPDblBuffCount(ENDP1, EP_DBUF_IN, 0);
  ClearDTOG_RX(ENDP1);
  ClearDTOG_TX(ENDP1);
  ToggleDTOG_TX(ENDP1);
  SetEPTxStatus(ENDP1, EP_TX_NAK);
  SetEPRxStatus(ENDP1, EP_RX_DIS);

main.c:

#include <stm32f10x.h>
#include <system_stm32f10x.h>

#include "usb_lib.h"
#include "usb_pwr.h"
#include "usb_desc.h"

static volatile uint32_t delay_remain = 0;

static void delay_init()
{
	NVIC_PriorityGroupConfig(NVIC_PriorityGroup_2);

	NVIC_InitTypeDef NVIC_InitStruct;
	NVIC_InitStruct.NVIC_IRQChannel = SysTick_IRQn;
	NVIC_InitStruct.NVIC_IRQChannelPreemptionPriority = 0; //set to highest priority
	NVIC_InitStruct.NVIC_IRQChannelSubPriority = 0;
	NVIC_InitStruct.NVIC_IRQChannelCmd = ENABLE;
	NVIC_Init(&NVIC_InitStruct);

	SystemCoreClockUpdate(); //get the current system clock frequency (Hz)

	SysTick_Config(SystemCoreClock / 1000 / 1000);
}

void SysTick_Handler()
{
	if (delay_remain > 0)
		delay_remain--;
}

// CANNOT be used in interrupt handler
static inline void delay_us(uint32_t us)
{
	delay_remain = us;
	while (delay_remain > 0);
}

// `sz` should be even number
static inline void usb_send_data(const volatile uint8_t* ptr, uint8_t sz)
{
	// defined for the IN (Tx) endpoint
	#define DTOG   EP_DTOG_TX
	#define SW_BUF EP_DTOG_RX
	#define ENDP1_DTOG()   (_GetENDPOINT(ENDP1) & DTOG)
	#define ENDP1_SW_BUF() (_GetENDPOINT(ENDP1) & SW_BUF)
	#define ENDP1_SW_BUF_TOGGLE() { _ToggleDTOG_RX(ENDP1); }

	if (sz > VIRTUAL_COM_PORT_DATA_SIZE)
		sz = VIRTUAL_COM_PORT_DATA_SIZE;
	
	if (ENDP1_DTOG() == ENDP1_SW_BUF()) {
		_SetEPTxStatus(ENDP1, EP_TX_DIS);
		ENDP1_SW_BUF_TOGGLE();
		_SetEPTxStatus(ENDP1, EP_TX_NAK);
	}

	uint32_t* p_dst;
	if (ENDP1_DTOG()) {
		_SetEPDblBuf0Count(ENDP1, EP_DBUF_IN, sz); //app should use Buf0 (orginal tx)
		p_dst = (uint32_t*)(PMAAddr + 2*ENDP1_TX_BUF0_ADDR);
	} else {
		_SetEPDblBuf1Count(ENDP1, EP_DBUF_IN, sz); //app should use Buf1 (orginal rx)
		p_dst = (uint32_t*)(PMAAddr + 2*ENDP1_TX_BUF1_ADDR);
	}

	// Note: not for STM32F303xE or STM32F302x8
	const volatile uint16_t* p_src = (const volatile uint16_t*) ptr;
	for (uint8_t i = (sz + 1) >> 1; i > 0; i--, p_src++, p_dst++)
		*(uint16_t*)p_dst = *p_src;

	ENDP1_SW_BUF_TOGGLE();
	if ((_GetEPTxStatus(ENDP1) & EP_TX_NAK))
		_SetEPTxStatus(ENDP1, EP_TX_VALID);

	while (ENDP1_DTOG() == ENDP1_SW_BUF()) {
		if (bDeviceState != CONFIGURED) return;
	}
}

static volatile uint8_t Seq_Loop[64] = {
	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
	0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
	0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
	0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
	0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
	0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
	0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
	0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77
};

int main()
{
	delay_init();
	USB_Config(); delay_us(1000*1000);
	
	while (1) {
		if (bDeviceState != CONFIGURED) continue;
		usb_send_data(Seq_Loop, sizeof(Seq_Loop));
		Seq_Loop[0] += 1;
	}
}

data received by the host (ended soon):

00000000 00 17 20 21 22 23 24 25   26 27 30 31 32 33 34 35 	
00000016 36 37 40 41 42 43 44 45   46 47 50 51 52 53 54 55 	67@ABCDE  FGPQRSTU
00000032 56 57 60 61 62 63 64 65   66 67 70 71 72 73 74 75 	VW`abcde  fgpqrstu
00000048 76 77 ce 01 02 03 04 05   06 07 10 11 12 13 14 15 	vw.␁␂␃␄␅  ␆␇␐␑␒␓␔␕
00000064 16 17 20 21 22 23 24 25   26 27 30 31 32 33 34 35 	␖␗ !"#$%  &'012345
00000080 36 37 40 41 42 43 44 45   46 47 50 51 52 53 54 55 	67@ABCDE  FGPQRSTU
00000096 56 57 60 61 62 63 64 65   66 67 70 71 72 73 74 75 	VW`abcde  fgpqrstu
00000112 76 77 cf 01 02 03 04 05   06 07 10 11 12 13 14 15 	vw.␁␂␃␄␅  ␆␇␐␑␒␓␔␕
00000128 16 17 20 21 22 23 24 25   26 27 30 31 32 33 34 35 	␖␗ !"#$%  &'012345
00000144 36 37 40 41 42 43 44 45   46 47 50 51 52 53 54 55 	67@ABCDE  FGPQRSTU
00000160 56 57 60 61 62 63 64 65   66 67 70 71 72 73 74 75 	VW`abcde  fgpqrstu
00000176 76 77 d4 01 02 03 04 05   06 07 10 11 12 13 14 15 	vw.␁␂␃␄␅  ␆␇␐␑␒␓␔␕
00000192 16 17 20 21 22 23 24 25   26 27 30 31 32 33 34 35 	␖␗ !"#$%  &'012345
00000208 36 37 40 41 42 43 44 45   46 47 50 51 52 53 54 55 	67@ABCDE  FGPQRSTU
00000224 56 57 60 61 62 63 64 65   66 67 70 71 72 73 74 75 	VW`abcde  fgpqrstu
00000240 76 77 d5 01 02 03 04 05   06 07 10 11 12 13 14 15 	vw.␁␂␃␄␅  ␆␇␐␑␒␓␔␕
00000256 16 17 20 21 22 23 24 25   26 27 30 31 32 33 34 35 	␖␗ !"#$%  &'012345
00000272 36 37 40 41 42 43 44 45   46 47 50 51 52 53 54 55 	67@ABCDE  FGPQRSTU
00000288 56 57 60 61 62 63 64 65   66 67 70 71 72 73 74 75 	VW`abcde  fgpqrstu
00000304 76 77 db 01 02 03 04 05   06 07 10 11 12 13 14 15 	vw.␁␂␃␄␅  ␆␇␐␑␒␓␔␕
00000320 16 17 20 21 22 23 24 25   26 27 30 31 32 33 34 35 	␖␗ !"#$%  &'012345
00000336 36 37 40 41 42 43 44 45   46 47 50 51 52 53 54 55 	67@ABCDE  FGPQRSTU
00000352 56 57 60 61 62 63 64 65   66 67 70 71 72 73 74 75 	VW`abcde  fgpqrstu
00000368 76 77 d5 01 02 03 04 05   06 07 10 11 12 13 14 15 	vw.␁␂␃␄␅  ␆␇␐␑␒␓␔␕
00000384 16 17 20 21 22 23 24 25   26 27 30 31 32 33 34 35 	␖␗ !"#$%  &'012345
00000400 36 37 40 41 42 43 44 45   46 47 50 51 52 53 54 55 	67@ABCDE  FGPQRSTU
00000416 56 57 60 61 62 63 64 65   66 67 70 71 72 73 74 75 	VW`abcde  fgpqrstu
00000432 76 77 e1 01 02 03 04 05   06 07 10 11 12 13 14 15 	vw.␁␂␃␄␅  ␆␇␐␑␒␓␔␕
00000448 16 17 20 21 22 23 24 25   26 27 30 31 32 33 34 35 	␖␗ !"#$%  &'012345
00000464 36 37 40 41 42 43 44 45   46 47 50 51 52 53 54 55 	67@ABCDE  FGPQRSTU
00000480 56 57 60 61 62 63 64 65   66 67 70 71 72 73 74 75 	VW`abcde  fgpqrstu
00000496 76 77 d5 01 02 03 04 05   06 07 10 11 12 13 14 15 	vw.␁␂␃␄␅  ␆␇␐␑␒␓␔␕
00000512 16 17 20 21 22 23 24 25   26 27 30 31 32 33 34 35 	␖␗ !"#$%  &'012345
00000528 36 37 40 41 42 43 44 45   46 47 50 51 52 53 54 55 	67@ABCDE  FGPQRSTU
00000544 56 57 60 61 62 63 64 65   66 67 70 71 72 73 74 75 	VW`abcde  fgpqrstu
00000560 76 77 e7 01 02 03 04 05   06 07 10 11 12 13 14 15 	vw.␁␂␃␄␅  ␆␇␐␑␒␓␔␕
00000576 16 17 20 21 22 23 24 25   26 27 30 31 32 33 34 35 	␖␗ !"#$%  &'012345
00000592 36 37 40 41 42 43 44 45   46 47 50 51 52 53 54 55 	67@ABCDE  FGPQRSTU
00000608 56 57 60 61 62 63 64 65   66 67 70 71 72 73 74 75 	VW`abcde  fgpqrstu
00000624 76 77 d5 01 02 03 04 05   06 07 10 11 12 13 14 15 	vw.␁␂␃␄␅  ␆␇␐␑␒␓␔␕
00000640 16 17 20 21 22 23 24 25   26 27 30 31 32 33 34 35 	␖␗ !"#$%  &'012345
00000656 36 37 40 41 42 43 44 45   46 47 50 51 52 53 54 55 	67@ABCDE  FGPQRSTU
00000672 56 57 60 61 62 63 64 65   66 67 70 71 72 73 74 75 	VW`abcde  fgpqrstu
00000688 76 77 ed 01 02 03 04 05   06 07 10 11 12 13 14 15 	vw.␁␂␃␄␅  ␆␇␐␑␒␓␔␕
00000704 16 17 20 21 22 23 24 25   26 27 30 31 32 33 34 35 	␖␗ !"#$%  &'012345
00000720 36 37 40 41 42 43 44 45   46 47 50 51 52 53 54 55 	67@ABCDE  FGPQRSTU
00000736 56 57 60 61 62 63 64 65   66 67 70 71 72 73 74 75 	VW`abcde  fgpqrstu
00000752 76 77 d5 01 02 03 04 05   06 07 10 11 12 13 14 15 	vw.␁␂␃␄␅  ␆␇␐␑␒␓␔␕
00000768 16 17 20 21 22 23 24 25   26 27 30 31 32 33 34 35 	␖␗ !"#$%  &'012345
00000784 36 37 40 41 42 43 44 45   46 47 50 51 52 53 54 55 	67@ABCDE  FGPQRSTU
00000800 56 57 60 61 62 63 64 65   66 67 70 71 72 73 74 75 	VW`abcde  fgpqrstu
00000816 76 77 f4 01 02 03 04 05   06 07 10 11 12 13 14 15 	vw.␁␂␃␄␅  ␆␇␐␑␒␓␔␕
00000832 16 17 20 21 22 23 24 25   26 27 30 31 32 33 34 35 	␖␗ !"#$%  &'012345
00000848 36 37 40 41 42 43 44 45   46 47 50 51 52 53 54 55 	67@ABCDE  FGPQRSTU
00000864 56 57 60 61 62 63 64 65   66 67 70 71 72 73 74 75 	VW`abcde  fgpqrstu
00000880 76 77 f5 01 02 03 04 05   06 07 10 11 12 13 14 15 	vw.␁␂␃␄␅  ␆␇␐␑␒␓␔␕
00000896 16 17 20 21 22 23 24 25   26 27 30 31 32 33 34 35 	␖␗ !"#$%  &'012345
00000912 36 37 40 41 42 43 44 45   46 47 50 51 52 53 54 55 	67@ABCDE  FGPQRSTU
00000928 56 57 60 61 62 63 64 65   66 67 70 71 72 73 74 75 	VW`abcde  fgpqrstu
00000944 76 77                                             	vw

usb_send_data() can be modified (badly?) to make it send data continuously, the raw data rate is 1088KB/s on Linux (USB 2.0 host controller, USB CDC tty, but it should be slower with libusb/Windows. it would be 950KB/s with single endpoint PMA buffer, slower with libusb/Windows.), but it consists of lots of repeated frames. Uncommenting the two _SetEPTxStatus(ENDP1, EP_TX_NAK) will change the rate to about 10KB/s, but it's still repeated frames (with the same frame number).

static inline void usb_send_data(const uint8_t* ptr, uint16_t sz)
{
	// defined for the IN (TX) endpoint
	#define DTOG EP_DTOG_TX
	#define SW_BUF EP_DTOG_RX
	#define ENDP1_DTOG() (_GetENDPOINT(ENDP1) & DTOG)
	#define ENDP1_SW_BUF() (_GetENDPOINT(ENDP1) & SW_BUF)
	#define ENDP1_SW_BUF_TOGGLE() { _ToggleDTOG_RX(ENDP1); }

	if (sz > VIRTUAL_COM_PORT_DATA_SIZE)
		sz = VIRTUAL_COM_PORT_DATA_SIZE;
	const uint16_t* p_src;

retry:
	if (ENDP1_DTOG() == ENDP1_SW_BUF()) { //shouldn't happen here?
		_SetEPTxStatus(ENDP1, EP_TX_NAK);
		ENDP1_SW_BUF_TOGGLE();
	}

	uint32_t* p_dst;
	if (ENDP1_DTOG()) {
		_SetEPDblBuf0Count(ENDP1, EP_DBUF_IN, sz); //app should use Buf0
		p_dst = (uint32_t*)(PMAAddr + 2*ENDP1_TX_BUF0_ADDR);
	} else {
		_SetEPDblBuf1Count(ENDP1, EP_DBUF_IN, sz); //app should use Buf1
		p_dst = (uint32_t*)(PMAAddr + 2*ENDP1_TX_BUF1_ADDR);
	}

	p_src = (const uint16_t*) ptr;
	for (uint8_t i = (sz + 1) >> 1; i > 0; i--, p_src++, p_dst++)
		*(uint16_t*)p_dst = *p_src;

	ENDP1_SW_BUF_TOGGLE();
	if ((_GetEPTxStatus(ENDP1) & EP_TX_NAK))
		_SetEPTxStatus(ENDP1, EP_TX_VALID);

	for (uint32_t i = 0; i < 7*1000; i++) {
		if (ENDP1_DTOG() != ENDP1_SW_BUF()) {
			for (uint32_t i = 0; i < 10; i++) {
				if (GetENDPOINT(ENDP1) & EP_CTR_TX) {
					_ClearEP_CTR_TX(ENDP1); return;
				}
				delay_us(1);
			}
			//_SetEPTxStatus(ENDP1, EP_TX_NAK);
			goto retry;
		}
		delay_us(1);
	}
	//_SetEPTxStatus(ENDP1, EP_TX_NAK);
	goto retry;
}

host speed test program:

use std::{io::Read, time::Duration};

fn main() {
	let mut port_name = String::new();
	io::stdin().read_line(&mut port_name)
		.expect("failed to get port name");
	
    let mut port = serialport::new(&port_name, 9600)
    	.timeout(Duration::from_millis(1000))
    	.open_native().unwrap();
    
    loop {
		let mut serial_buf: Vec<u8> = vec![0; 1_920_000];
		let t = 1000. * measure_exec_seconds(|| {
    		let mut bytes_read = 0;
    		while bytes_read < serial_buf.len() {
    			if let Ok(t) = port.read(&mut serial_buf[bytes_read..]) {
    				bytes_read += t;
    			}
    		}
		});
		
		println!("data rate (receiving): {} KB/s",
    		(serial_buf.len() as f64) / t);
	}
}

fn measure_exec_seconds<F: FnOnce()>(fn_exec: F) -> f64 {
	use std::time::SystemTime;
	let t_start = SystemTime::now();
	fn_exec();
	SystemTime::now().duration_since(t_start).unwrap().as_secs_f64()
}

wuwbobo2021 avatar Aug 01 '24 17:08 wuwbobo2021