Skip to content

Commit

Permalink
floppy: apply NCO-based 2nd-order PLL to WDATA line
Browse files Browse the repository at this point in the history
Centurion Finch Floppy Controller
(FFC, https://github.com/Nakazoto/CenturionComputer/wiki/FFC-Board)
is an 8" floppy controller that applies up to 350ns write
precompensation. FlashFloppy's existing approach of re-aligning
the bit clock to the last pulse edge causes enough additional error
for bitcells to be missed or injected in this case.

This commit applies a 2nd-order (proportional and integral) PLL to
the incoming WDATA to mitigate both frequency offset and extreme
write precompensation. The implementation is based on a
virtual numerically-controlled oscillator nominally running at the
same frequency at the WDATA timer but with a 16.16 fixed-point
representation allowing for considerable precision in frequency
adjustment.  Proportional and integral constants were calcuated to
nominally provide a 1440kHz natural loop frequency and a damping
factor of 0.25 to allow for 2% frequency offset and strong jitter
rejection against write precompenstaion.
  • Loading branch information
mx-shift committed Jan 22, 2024
1 parent c6310b8 commit bdcbd3c
Showing 1 changed file with 115 additions and 16 deletions.
131 changes: 115 additions & 16 deletions src/floppy_generic.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,14 @@
* See the file COPYING for more details, or visit <http://unlicense.org>.
*/

/*
* PLL nominally runs at the same frequency as the timer's clock but with a
* 16.16 fixed-point representation. This allows for lots of precision in phase
* error calcuations. NOMINAL_PLL_PHASE_STEP is the PLL phase increment which
* causes the PLL to exactly equal the timer's frequency.
*/
#define NOMINAL_PLL_PHASE_STEP (1 << 16)

/* A DMA buffer for running a timer associated with a floppy-data I/O pin. */
struct dma_ring {
/* Current state of DMA (RDATA):
Expand All @@ -33,7 +41,12 @@ struct dma_ring {
uint16_t cons;
union {
uint16_t prod; /* dma_rd: our producer index for flux samples */
uint16_t prev_sample; /* dma_wr: previous CCRx sample value */
struct {
uint32_t phase_step; /* PLL period */
int32_t phase_integral; /* PLL's running sum of phase errors */
uint32_t prev_bc_left; /* PLL timestamp of left edge of previous bitcell */
uint32_t curr_bc_left; /* PLL timestamp of left edge of current bitcell */
} wr;
};
/* DMA ring buffer of timer values (ARR or CCRx). */
uint16_t buf[1024];
Expand Down Expand Up @@ -401,6 +414,10 @@ static void wdata_start(void)
}

dma_wr->state = DMA_starting;
dma_wr->wr.phase_step = NOMINAL_PLL_PHASE_STEP;
dma_wr->wr.phase_integral = 0;
dma_wr->wr.prev_bc_left = 0;
dma_wr->wr.curr_bc_left = 0;

/* Start timer. */
tim_wdata->egr = TIM_EGR_UG;
Expand Down Expand Up @@ -643,13 +660,15 @@ static void IRQ_rdata_dma(void)
static void IRQ_wdata_dma(void)
{
const uint16_t buf_mask = ARRAY_SIZE(dma_rd->buf) - 1;
uint16_t cons, prod, prev, next;
uint16_t cons, prod;
uint32_t bc_dat = 0, bc_prod;
uint32_t *bc_buf = image->bufs.write_bc.p;
unsigned int sync = image->sync;
unsigned int bc_bufmask = (image->bufs.write_bc.len / 4) - 1;
int curr, cell = image->write_bc_ticks;
struct write *write = NULL;
uint32_t next_edge, distance_from_prev_bc_left, distance_from_curr_bc_left;
uint32_t bc_step;
int32_t phase_error;

/* Clear DMA peripheral interrupts. */
dma1->ifcr = DMA_IFCR_CGIF(dma_wdata_ch);
Expand All @@ -669,27 +688,56 @@ static void IRQ_wdata_dma(void)
}

/* Process the flux timings into the raw bitcell buffer. */
prev = dma_wr->prev_sample;
bc_prod = image->bufs.write_bc.prod;
bc_dat = image->write_bc_window;
for (cons = dma_wr->cons; cons != prod; cons = (cons+1) & buf_mask) {
next = dma_wr->buf[cons];
curr = (int16_t)(next - prev) - (cell >> 1);
if (unlikely(curr < 0)) {
/* Runt flux, much shorter than bitcell clock. Merge it forward. */
/* Calculate duration of a bitcell using the PLL's current period */
bc_step = dma_wr->wr.phase_step * (uint32_t)image->write_bc_ticks;

/*
* Incoming sample ticks are shifted up 16-bits to match PLL's internal
* precision
*/
next_edge = (uint32_t)dma_wr->buf[cons] << 16;

/*
* If this is the first pulse after WGATE was asserted, treat it as
* perfectly in phase.
*/
if (dma_wr->wr.prev_bc_left == 0 && dma_wr->wr.curr_bc_left == 0) {
dma_wr->wr.curr_bc_left = next_edge - (bc_step / 2);
dma_wr->wr.prev_bc_left = dma_wr->wr.curr_bc_left - bc_step;
}

/* By computing distance, wraparound is accounted for naturally. */
distance_from_prev_bc_left = next_edge - dma_wr->wr.prev_bc_left;

/* If the next edge would fall within the previous bitcell, ignore it. */
if (distance_from_prev_bc_left < (dma_wr->wr.curr_bc_left - dma_wr->wr.prev_bc_left))
{
continue;
}
prev = next;
while ((curr -= cell) > 0) {

/* Advance to the current bitcell */
distance_from_curr_bc_left = next_edge - dma_wr->wr.curr_bc_left;

/* Record zeros for each bitcell that passed before this pulse */
while (distance_from_curr_bc_left > bc_step)
{
bc_dat <<= 1;
bc_prod++;
if (!(bc_prod&31))
bc_buf[((bc_prod-1) / 32) & bc_bufmask] = htobe32(bc_dat);

if (!(bc_prod & 31))
bc_buf[((bc_prod - 1) / 32) & bc_bufmask] = htobe32(bc_dat);

distance_from_curr_bc_left -= bc_step;
dma_wr->wr.curr_bc_left += bc_step;
}
curr += cell >> 1; /* remove the 1/2-cell bias */
prev -= curr >> 2; /* de-jitter/precomp: carry 1/4 of phase error */

/* Record a one for this bitcell */
bc_dat = (bc_dat << 1) | 1;
bc_prod++;

switch (sync) {
case SYNC_fm:
/* FM clock sync clock byte is 0xc7. Check for:
Expand All @@ -704,6 +752,59 @@ static void IRQ_wdata_dma(void)
}
if (!(bc_prod&31))
bc_buf[((bc_prod-1) / 32) & bc_bufmask] = htobe32(bc_dat);

/*
* Figure out the phase error of the current sample before we start
* mucking with state
*/
phase_error = ((int32_t)distance_from_curr_bc_left - ((int32_t)bc_step / 2)) / (int32_t)image->write_bc_ticks;

/* Adjust bitcell history for next iteration */
dma_wr->wr.prev_bc_left = dma_wr->wr.curr_bc_left;
dma_wr->wr.curr_bc_left += bc_step;

/* Accumulate error into integral, saturating as necessary */
if (dma_wr->wr.phase_integral > 0 && phase_error > INT32_MAX - dma_wr->wr.phase_integral) {
dma_wr->wr.phase_integral = INT32_MAX;
} else if (dma_wr->wr.phase_integral < 0 && phase_error < INT32_MIN - dma_wr->wr.phase_integral) {
dma_wr->wr.phase_integral = INT32_MIN;
} else {
dma_wr->wr.phase_integral += phase_error;
}

/*
* Calculate next phase_step by applying a PI loop using the constants
* from the following analysis (based on
* https://www.dsprelated.com/showarticle/967.php):
*
* f_s = NCO base frequency
* k_p = phase detector gain
* k_nco = NCO feedback scaler
* zeta = loop dampening coefficient
* f_n = loop natural frequency
* k_l = proportional scaler
* k_i = integral scaler
*
* w_n = f_n * 2 * pi
* Ts = 1 / f_s
* k_l = 2 * zeta * w_n * Ts / (k_p * k_nco)
* k_i = w_n^2 * Ts^2 / (k_p * k_nco)
*
* Assumptions:
* f_s = 72,000,000 Hz
* k_p = 1.0
* k_nco = 1.0
* f_n = 1,440,000 Hz (2% of f_s to allow for moderate freq offset)
* zeta = 0.25 (strong jitter rejection for write precomp)
*
* Results:
* k_l = 0.06283185307 ~= 1/16
* k_i = 0.01579136704 ~= 1/64
*/
dma_wr->wr.phase_step = (uint32_t)(
(int32_t)NOMINAL_PLL_PHASE_STEP
+ phase_error / 16
+ dma_wr->wr.phase_integral / 64);
}

if (bc_prod & 31)
Expand All @@ -718,14 +819,12 @@ static void IRQ_wdata_dma(void)
/* Initialise decoder state for the start of the next write. */
bc_prod = (bc_prod + 31) & ~31;
bc_dat = ~0;
prev = 0;
}

/* Save our progress for next time. */
image->write_bc_window = bc_dat;
image->bufs.write_bc.prod = bc_prod;
dma_wr->cons = cons;
dma_wr->prev_sample = prev;
}

/*
Expand Down

0 comments on commit bdcbd3c

Please sign in to comment.