Quantcast
Channel: Raspberry Pi Forums
Viewing all articles
Browse latest Browse all 8082

General • Re: RP2350 8 Slot Backplane with LVDS

$
0
0
I figured I would give ChatGPT a try to see if it knows PIO programing. It didn't hesitate and gave me something that it thinks will work. I'll have to try it out and see what it does.

Let’s lock this down with a pin plan starting at GPIO0, and give you working PIO + DMA C++ for:

Master: generates CLK @ 20 MHz, streams 256B forward (DMA), and simultaneously captures 4096 bits from DATA_REV (DMA)

Slave (per node index 0..7):

forwards DATA_FWD with 1-bit delay and captures its 32B output slot (DMA)

forwards DATA_REV with 1-bit delay except during its slot where it overwrites 32B input slot from DMA-fed TX FIFO

Pin plan (lower numbers, simple, contiguous-ish)
Master board

GPIO0 = SYNC_OUT (to all nodes)

GPIO1 = CLK_OUT (daisy chained through nodes)

GPIO2 = DATA_FWD_OUT (to node0)

GPIO3 = DATA_REV_IN (from node0)

Each slave board (all 8 identical wiring)

GPIO0 = SYNC_IN

GPIO1 = CLK_IN

GPIO2 = DATA_FWD_IN

GPIO3 = DATA_REV_IN (from downstream node, or from “end terminator” on last node)

GPIO4 = DATA_FWD_OUT (to downstream node)

GPIO5 = DATA_REV_OUT (to upstream node)

(So every hop carries CLK + FWD downstream, REV upstream, and SYNC common.)

PIO programs
master_clk_fwd.pio


One bit per 3 instructions → set SM instruction clock to 60 MHz for 20 Mbit/s.

Code:

.program master_clk_fwd.side_set 1; OUT pin base: DATA_FWD_OUT; Side-set pin: CLK_OUT.wrap_targetbitloop:    nop         side 0     ; CLK low    out pins, 1 side 0     ; drive DATA while CLK low    nop         side 1     ; CLK high (sample point)    jmp bitloop.wrap
master_rev_in.pio

Samples DATA_REV_IN on CLK rising edges.

Code:

.program master_rev_in; IN pin base: DATA_REV_IN; Uses absolute GPIO waits for CLK_OUT%define CLK_GPIO 1.wrap_targetrevloop:    wait 0 gpio CLK_GPIO    wait 1 gpio CLK_GPIO    in pins, 1    jmp revloop.wrap
slave_fwd.pio

Wait SYNC high

Pull slot_start_bits and slot_len_bits (256)

Code:

Repeat:wait CLK risingout previous sampled bit (1-bit delay)in current bitmov osr,isr to advance delayCaptures exactly 256 bits into RX FIFO (autopush 32) during its slot.program slave_fwd%define SYNC_GPIO 0%define CLK_GPIO  1; IN pin base  = DATA_FWD_IN; OUT pin base = DATA_FWD_OUT.wrap_targetframe_wait:    wait 1 gpio SYNC_GPIO    ; Load slot parameters each frame    pull block    mov x, osr              ; slot_start_bits    pull block    mov y, osr              ; slot_len_bits (256)    ; Prime delay bit = 0    set isr, 0    mov osr, isrskip_loop:    jmp x-- do_skip    jmp cap_loop_enterdo_skip:    wait 0 gpio CLK_GPIO    wait 1 gpio CLK_GPIO    out pins, 1    in  pins, 1    mov osr, isr    jmp skip_loopcap_loop_enter:cap_loop:    wait 0 gpio CLK_GPIO    wait 1 gpio CLK_GPIO    out pins, 1    in  pins, 1            ; autopush every 32 bits    mov osr, isr    jmp y-- cap_loop    ; Forward remainder of forward phase (and beyond) until SYNC fallstail:    wait 0 gpio CLK_GPIO    wait 1 gpio CLK_GPIO    out pins, 1    in  pins, 1    mov osr, isr    jmp tail.wrap
slave_rev.pio

Wait SYNC high

Pull burn_bits = 2048 (forward phase length) and burn those clocks in pass-through mode

Pull slot_start_bits, slot_len_bits=256

Pass-through until slot, then overwrite 256 bits from TX FIFO (autopull 32), then pass-through forever

Code:

.program slave_rev%define SYNC_GPIO 0%define CLK_GPIO  1; IN pin base  = DATA_REV_IN; OUT pin base = DATA_REV_OUT.wrap_targetframe_wait:    wait 1 gpio SYNC_GPIO    ; Prime delay bit = 0    set isr, 0    mov osr, isr    ; Burn forward phase clocks    pull block    mov x, osr              ; burn_bits (2048)burn:    wait 0 gpio CLK_GPIO    wait 1 gpio CLK_GPIO    out pins, 1    in  pins, 1    mov osr, isr    jmp x-- burn    ; Load reverse slot parameters    pull block    mov x, osr              ; slot_start_bits    pull block    mov y, osr              ; slot_len_bits (256)pass_loop:    jmp x-- do_pass    jmp overwrite_enterdo_pass:    wait 0 gpio CLK_GPIO    wait 1 gpio CLK_GPIO    out pins, 1    in  pins, 1    mov osr, isr    jmp pass_loopoverwrite_enter:overwrite_loop:    wait 0 gpio CLK_GPIO    wait 1 gpio CLK_GPIO    out pins, 1            ; from OSR (autopull provides payload)    ; Optional sample to keep timing symmetric:    in  pins, 1    jmp y-- overwrite_looptail:    wait 0 gpio CLK_GPIO    wait 1 gpio CLK_GPIO    out pins, 1    in  pins, 1    mov osr, isr    jmp tail.wrap
C++: Master (DMA streaming)

This sends 256B forward and records the reverse stream.

Code:

#include "pico/stdlib.h"#include "hardware/pio.h"#include "hardware/dma.h"#include "hardware/clocks.h"#include "master_clk_fwd.pio.h"#include "master_rev_in.pio.h"static constexpr uint32_t NODES=8, NODE_BYTES=32;static constexpr uint32_t FWD_BYTES=NODES*NODE_BYTES;          // 256static constexpr uint32_t REV_BYTES=NODES*NODE_BYTES;          // 256static constexpr uint32_t FWD_BITS =FWD_BYTES*8;               // 2048static constexpr uint32_t REV_BITS =REV_BYTES*8;               // 2048static constexpr uint32_t FRAME_BITS=FWD_BITS+REV_BITS;        // 4096static constexpr uint32_t FRAME_WORDS=FRAME_BITS/32;           // 128struct Frame { uint8_t fwd[FWD_BYTES]; uint8_t rev[REV_BYTES]; };static void pack_tx(const Frame& fr, uint32_t* txw) {    for (uint32_t i=0;i<FRAME_WORDS;i++) txw[i]=0;    uint32_t bit=0;    auto putb=[&](bool b){ txw[bit>>5] |= (uint32_t(b)<<(bit&31)); bit++; };    for (uint32_t i=0;i<FWD_BYTES;i++){ uint8_t v=fr.fwd[i]; for(int b=0;b<8;b++) putb((v>>b)&1); }    for (uint32_t i=0;i<REV_BITS;i++) putb(false);}static void unpack_rev(const uint32_t* rxw, Frame& fr) {    auto getb=[&](uint32_t bit)->bool{ return (rxw[bit>>5]>>(bit&31))&1u; };    uint32_t base=FWD_BITS;    for (uint32_t i=0;i<REV_BYTES;i++){        uint8_t v=0;        for(int b=0;b<8;b++) if(getb(base+i*8+b)) v|=(1u<<b);        fr.rev[i]=v;    }}int main() {    stdio_init_all();    const uint PIN_SYNC=0, PIN_CLK=1, PIN_FWD=2, PIN_REV=3;    gpio_init(PIN_SYNC); gpio_set_dir(PIN_SYNC, GPIO_OUT); gpio_put(PIN_SYNC, 0);    PIO pio=pio0;    uint sm_tx=0, sm_rx=1;    uint off_tx=pio_add_program(pio, &master_clk_fwd_program);    uint off_rx=pio_add_program(pio, &master_rev_in_program);    // TX SM    {        pio_sm_config c = master_clk_fwd_program_get_default_config(off_tx);        sm_config_set_out_pins(&c, PIN_FWD, 1);        sm_config_set_sideset_pins(&c, PIN_CLK);        sm_config_set_out_shift(&c, true, true, 32); // shift_right, autopull=32        // 20 Mbit/s => 20 MHz clock => 1 bit per 3 instr => 60 MHz SM        float div = (float)clock_get_hz(clk_sys) / 60'000'000.0f;        sm_config_set_clkdiv(&c, div);        pio_sm_set_consecutive_pindirs(pio, sm_tx, PIN_FWD, 1, true);        pio_sm_set_consecutive_pindirs(pio, sm_tx, PIN_CLK, 1, true);        pio_sm_init(pio, sm_tx, off_tx, &c);    }    // RX SM    {        pio_sm_config c = master_rev_in_program_get_default_config(off_rx);        sm_config_set_in_pins(&c, PIN_REV);        sm_config_set_in_shift(&c, false, true, 32); // shift_left, autopush=32        sm_config_set_clkdiv(&c, 1.0f);        pio_sm_set_consecutive_pindirs(pio, sm_rx, PIN_REV, 1, false);        pio_sm_init(pio, sm_rx, off_rx, &c);    }    int dma_tx=dma_claim_unused_channel(true);    int dma_rx=dma_claim_unused_channel(true);    auto cfg_tx=dma_channel_get_default_config(dma_tx);    channel_config_set_transfer_data_size(&cfg_tx, DMA_SIZE_32);    channel_config_set_read_increment(&cfg_tx, true);    channel_config_set_write_increment(&cfg_tx, false);    channel_config_set_dreq(&cfg_tx, pio_get_dreq(pio, sm_tx, true));    auto cfg_rx=dma_channel_get_default_config(dma_rx);    channel_config_set_transfer_data_size(&cfg_rx, DMA_SIZE_32);    channel_config_set_read_increment(&cfg_rx, false);    channel_config_set_write_increment(&cfg_rx, true);    channel_config_set_dreq(&cfg_rx, pio_get_dreq(pio, sm_rx, false));    Frame fr{};    for (uint32_t i=0;i<FWD_BYTES;i++) fr.fwd[i]=(uint8_t)i;    uint32_t txw[FRAME_WORDS];    uint32_t rxw[FRAME_WORDS];    while(true){        pack_tx(fr, txw);        for(uint32_t i=0;i<FRAME_WORDS;i++) rxw[i]=0;        pio_sm_set_enabled(pio, sm_tx, false);        pio_sm_set_enabled(pio, sm_rx, false);        pio_sm_clear_fifos(pio, sm_tx);        pio_sm_clear_fifos(pio, sm_rx);        pio_sm_restart(pio, sm_tx);        pio_sm_restart(pio, sm_rx);        dma_channel_configure(dma_rx,&cfg_rx, rxw, &pio->rxf[sm_rx], FRAME_WORDS, true);        dma_channel_configure(dma_tx,&cfg_tx, &pio->txf[sm_tx], txw, FRAME_WORDS, true);        // Frame start        gpio_put(PIN_SYNC, 1);        sleep_us(2);        gpio_put(PIN_SYNC, 0);        pio_sm_set_enabled(pio, sm_rx, true);        pio_sm_set_enabled(pio, sm_tx, true);        dma_channel_wait_for_finish_blocking(dma_tx);        dma_channel_wait_for_finish_blocking(dma_rx);        pio_sm_set_enabled(pio, sm_tx, false);        pio_sm_set_enabled(pio, sm_rx, false);        unpack_rev(rxw, fr);        // fr.rev now holds 8*32 bytes from slaves        tight_loop_contents();    }}
C++: Slave (per node index)

This captures its 32B from forward and injects its 32B into reverse.

Code:

#include "pico/stdlib.h"#include "hardware/pio.h"#include "hardware/dma.h"#include "slave_fwd.pio.h"#include "slave_rev.pio.h"static constexpr uint32_t NODES=8, NODE_BYTES=32;static constexpr uint32_t SLOT_BITS=NODE_BYTES*8;          // 256static constexpr uint32_t FWD_BYTES=NODES*NODE_BYTES;      // 256static constexpr uint32_t FWD_BITS =FWD_BYTES*8;           // 2048static constexpr uint32_t NODE_INDEX = 0; // <-- set 0..7 per boardstatic uint32_t out_from_master[8]; // 32 bytes (8 words) capturedstatic uint32_t in_to_master[8];    // 32 bytes (8 words) to injectint main() {    stdio_init_all();    const uint PIN_SYNC=0, PIN_CLK=1, PIN_FWD_IN=2, PIN_REV_IN=3, PIN_FWD_OUT=4, PIN_REV_OUT=5;    gpio_init(PIN_SYNC); gpio_set_dir(PIN_SYNC, GPIO_IN);    gpio_init(PIN_CLK);  gpio_set_dir(PIN_CLK,  GPIO_IN);    gpio_init(PIN_FWD_IN); gpio_set_dir(PIN_FWD_IN, GPIO_IN);    gpio_init(PIN_REV_IN); gpio_set_dir(PIN_REV_IN, GPIO_IN);    gpio_init(PIN_FWD_OUT); gpio_set_dir(PIN_FWD_OUT, GPIO_OUT);    gpio_init(PIN_REV_OUT); gpio_set_dir(PIN_REV_OUT, GPIO_OUT);    PIO pio=pio0;    uint sm_fwd=0, sm_rev=1;    uint off_fwd=pio_add_program(pio, &slave_fwd_program);    uint off_rev=pio_add_program(pio, &slave_rev_program);    // FWD SM config    {        pio_sm_config c = slave_fwd_program_get_default_config(off_fwd);        sm_config_set_in_pins(&c, PIN_FWD_IN);        sm_config_set_out_pins(&c, PIN_FWD_OUT, 1);        sm_config_set_in_shift(&c, false, true, 32);   // shift_left, autopush=32        sm_config_set_out_shift(&c, true, false, 32);  // shift_right, no autopull        sm_config_set_clkdiv(&c, 1.0f);        pio_sm_set_consecutive_pindirs(pio, sm_fwd, PIN_FWD_OUT, 1, true);        pio_sm_set_consecutive_pindirs(pio, sm_fwd, PIN_FWD_IN,  1, false);        pio_sm_init(pio, sm_fwd, off_fwd, &c);    }    // REV SM config    {        pio_sm_config c = slave_rev_program_get_default_config(off_rev);        sm_config_set_in_pins(&c, PIN_REV_IN);        sm_config_set_out_pins(&c, PIN_REV_OUT, 1);        sm_config_set_in_shift(&c, false, false, 32);        sm_config_set_out_shift(&c, true, true, 32); // autopull for payload        sm_config_set_clkdiv(&c, 1.0f);        pio_sm_set_consecutive_pindirs(pio, sm_rev, PIN_REV_OUT, 1, true);        pio_sm_set_consecutive_pindirs(pio, sm_rev, PIN_REV_IN,  1, false);        pio_sm_init(pio, sm_rev, off_rev, &c);    }    // DMA: capture 8 words (32 bytes) from FWD RX FIFO    int dma_rx=dma_claim_unused_channel(true);    auto cfg_rx=dma_channel_get_default_config(dma_rx);    channel_config_set_transfer_data_size(&cfg_rx, DMA_SIZE_32);    channel_config_set_read_increment(&cfg_rx, false);    channel_config_set_write_increment(&cfg_rx, true);    channel_config_set_dreq(&cfg_rx, pio_get_dreq(pio, sm_fwd, false));    // DMA: feed REV TX FIFO with [burn_bits][slot_start][slot_len][8 payload words] = 11 words    int dma_tx=dma_claim_unused_channel(true);    auto cfg_tx=dma_channel_get_default_config(dma_tx);    channel_config_set_transfer_data_size(&cfg_tx, DMA_SIZE_32);    channel_config_set_read_increment(&cfg_tx, true);    channel_config_set_write_increment(&cfg_tx, false);    channel_config_set_dreq(&cfg_tx, pio_get_dreq(pio, sm_rev, true));    const uint32_t slot_start_bits = NODE_INDEX * SLOT_BITS;    uint32_t tx_staging[11];    while(true){        // Your app updates input image here        for(int i=0;i<8;i++) in_to_master[i]=0x12340000u | (NODE_INDEX<<8) | i;        // Build TX staging per frame        tx_staging[0]=FWD_BITS;        tx_staging[1]=slot_start_bits;        tx_staging[2]=SLOT_BITS;        for(int i=0;i<8;i++) tx_staging[3+i]=in_to_master[i];        // Restart/arm each frame; SMs wait on SYNC        pio_sm_set_enabled(pio, sm_fwd, false);        pio_sm_set_enabled(pio, sm_rev, false);        pio_sm_clear_fifos(pio, sm_fwd);        pio_sm_clear_fifos(pio, sm_rev);        pio_sm_restart(pio, sm_fwd);        pio_sm_restart(pio, sm_rev);        // Push FWD slot params (consumed after SYNC)        pio_sm_put_blocking(pio, sm_fwd, slot_start_bits);        pio_sm_put_blocking(pio, sm_fwd, SLOT_BITS);        // Arm DMA        dma_channel_configure(dma_rx,&cfg_rx, out_from_master, &pio->rxf[sm_fwd], 8, true);        dma_channel_configure(dma_tx,&cfg_tx, &pio->txf[sm_rev], tx_staging, 11, true);        // Enable SMs (they block waiting for SYNC)        pio_sm_set_enabled(pio, sm_fwd, true);        pio_sm_set_enabled(pio, sm_rev, true);        // Wait until our 32B output slot captured        dma_channel_wait_for_finish_blocking(dma_rx);        // out_from_master now contains 32 bytes from the master for this node        // (You can process immediately; reverse injection will still be occurring.)        dma_channel_wait_for_finish_blocking(dma_tx);        tight_loop_contents();    }}
CMake (both master and slave projects)
Add your .pio files and generate headers:

Code:

pico_generate_pio_header(${PROJECT_NAME} ${CMAKE_CURRENT_LIST_DIR}/master_clk_fwd.pio)pico_generate_pio_header(${PROJECT_NAME} ${CMAKE_CURRENT_LIST_DIR}/master_rev_in.pio)pico_generate_pio_header(${PROJECT_NAME} ${CMAKE_CURRENT_LIST_DIR}/slave_fwd.pio)pico_generate_pio_header(${PROJECT_NAME} ${CMAKE_CURRENT_LIST_DIR}/slave_rev.pio)
One practical detail

slave_fwd/slave_rev forward forever after they finish their slot; since you restart them each frame in C++, that’s fine. If you later want “no restart”, I can rewrite them to preserve X/Y by reloading from scratch registers instead of decrementing.

If you want, next I can give you a master + node0 loopback test (single slave wired directly back) so you can validate:

1-bit forwarding delay behavior

correct slot extraction/insertion

and confirm the 32-byte blocks land where expected.

Statistics: Posted by theoutfield — Fri Jan 09, 2026 1:26 am



Viewing all articles
Browse latest Browse all 8082

Trending Articles