Skip to content

Commit

Permalink
Fix #15, optional loop unroll optimization (#16)
Browse files Browse the repository at this point in the history
- fix #15, loop unroll option, improving performance, kudos to nt314p
- fixed bug in test program (see #15)
- added flag to select LOOP UNROLL (is optional as it gives larger code size)
- optimized the not unrolled loop with ideas of the unrolling version.
- corrected type lastValue to uint8_t
- add FastShiftOut_scope_test.ino
- update readme.md
- minor edits
  • Loading branch information
RobTillaart authored Sep 19, 2024
1 parent 9ac57a7 commit d926090
Show file tree
Hide file tree
Showing 11 changed files with 523 additions and 71 deletions.
12 changes: 12 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,18 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/)
and this project adheres to [Semantic Versioning](http://semver.org/).


## [0.4.0] - 2024-09-03
- fix #15, loop unroll option, improving performance, kudos to nt314p
- fixed bug in test program (see #15)
- added flag to select LOOP UNROLL (is optional as it gives larger code size)
- optimized the not unrolled loop with ideas of the unrolling version.
- corrected type lastValue to uint8_t
- add FastShiftOut_scope_test.ino
- update readme.md
- minor edits

----

## [0.3.3] - 2024-07-23
- Fix #13, add wrapper functions
- write16/24/32, write(array, size)
Expand Down
205 changes: 180 additions & 25 deletions FastShiftOut.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
//
// FILE: FastShiftOut.cpp
// AUTHOR: Rob Tillaart
// VERSION: 0.3.2
// VERSION: 0.4.0
// PURPOSE: ShiftOut that implements the Print interface
// DATE: 2013-08-22
// URL: https://github.com/RobTillaart/FastShiftOut
Expand Down Expand Up @@ -47,7 +47,6 @@ size_t FastShiftOut::write(uint8_t data)
}


// EXPERIMENTAL 0.3.3
size_t FastShiftOut::write16(uint16_t data)
{
if (_bitOrder == LSBFIRST)
Expand All @@ -64,7 +63,6 @@ size_t FastShiftOut::write16(uint16_t data)
}


// EXPERIMENTAL 0.3.3
size_t FastShiftOut::write24(uint32_t data)
{
if (_bitOrder == LSBFIRST)
Expand All @@ -85,7 +83,6 @@ size_t FastShiftOut::write24(uint32_t data)
}


// EXPERIMENTAL 0.3.3
size_t FastShiftOut::write32(uint32_t data)
{
if (_bitOrder == LSBFIRST)
Expand All @@ -109,10 +106,8 @@ size_t FastShiftOut::write32(uint32_t data)
}


// EXPERIMENTAL 0.3.3
size_t FastShiftOut::write(uint8_t * array, size_t size)
{
size_t n = 0;
if (_bitOrder == LSBFIRST)
{
for (size_t i = size; i > 0; ) // from end to begin ????
Expand Down Expand Up @@ -155,31 +150,110 @@ uint8_t FastShiftOut::getBitOrder(void)
}



size_t FastShiftOut::writeLSBFIRST(uint8_t data)
{
uint8_t value = data;
_lastValue = value;

#if defined(ARDUINO_ARCH_AVR) || defined(ARDUINO_ARCH_MEGAAVR)

#if defined(FASTSHIFTOUT_AVR_LOOP_UNROLLED) // AVR SPEED OPTIMIZED

uint8_t cbmask1 = _clockBit;
uint8_t cbmask2 = ~_clockBit;
uint8_t outmask1 = _dataOutBit;
uint8_t outmask2 = ~_dataOutBit;

volatile uint8_t* localDataOutRegister = _dataOutRegister;
volatile uint8_t* localClockRegister = _clockRegister;

// disable interrupts (for all bits)
uint8_t oldSREG = SREG;
noInterrupts();

if ((value & 0x01) == 0) *localDataOutRegister &= outmask2;
else *localDataOutRegister |= outmask1;
// *_clockRegister |= cbmask1;
// *_clockRegister &= cbmask2;
// following code is allowed as interrupts are disabled.
// so register can not change
uint8_t r = *localClockRegister;
*localClockRegister = r | cbmask1; // set one bit
*localClockRegister = r; // reset bit

if ((value & 0x02) == 0) *localDataOutRegister &= outmask2;
else *localDataOutRegister |= outmask1;
r = *localClockRegister;
*localClockRegister = r | cbmask1; // set one bit
*localClockRegister = r; // reset it

if ((value & 0x04) == 0) *localDataOutRegister &= outmask2;
else *localDataOutRegister |= outmask1;
r = *localClockRegister;
*localClockRegister = r | cbmask1; // set one bit
*localClockRegister = r; // reset it

if ((value & 0x08) == 0) *localDataOutRegister &= outmask2;
else *localDataOutRegister |= outmask1;
r = *localClockRegister;
*localClockRegister = r | cbmask1; // set one bit
*localClockRegister = r; // reset it

if ((value & 0x10) == 0) *localDataOutRegister &= outmask2;
else *localDataOutRegister |= outmask1;
r = *localClockRegister;
*localClockRegister = r | cbmask1; // set one bit
*localClockRegister = r; // reset it

if ((value & 0x20) == 0) *localDataOutRegister &= outmask2;
else *localDataOutRegister |= outmask1;
r = *localClockRegister;
*localClockRegister = r | cbmask1; // set one bit
*localClockRegister = r; // reset it

if ((value & 0x40) == 0) *localDataOutRegister &= outmask2;
else *localDataOutRegister |= outmask1;
r = *localClockRegister;
*localClockRegister = r | cbmask1; // set one bit
*localClockRegister = r; // reset it

if ((value & 0x80) == 0) *localDataOutRegister &= outmask2;
else *localDataOutRegister |= outmask1;
r = *localClockRegister;
*localClockRegister = r | cbmask1; // set one bit
*localClockRegister = r; // reset it

// restore interrupt state
SREG = oldSREG;

#else // AVR SIZE OPTIMIZED

uint8_t cbmask1 = _clockBit;
uint8_t outmask1 = _dataOutBit;
uint8_t outmask2 = ~_dataOutBit;

volatile uint8_t* localDataOutRegister = _dataOutRegister;
volatile uint8_t* localClockRegister = _clockRegister;

// disable interrupts (for all bits)
uint8_t oldSREG = SREG;
noInterrupts();

for (uint8_t m = 1; m > 0; m <<= 1)
{
uint8_t oldSREG = SREG;
noInterrupts();
if ((value & m) == 0) *_dataOutRegister &= outmask2;
else *_dataOutRegister |= outmask1;
*_clockRegister |= cbmask1;
*_clockRegister &= cbmask2;
SREG = oldSREG;
// process one bit
if ((value & m) == 0) *localDataOutRegister &= outmask2;
else *localDataOutRegister |= outmask1;
uint8_t r = *localClockRegister;
*localClockRegister = r | cbmask1; // set one bit
*localClockRegister = r; // reset it
}

#else
// restore interrupt state
SREG = oldSREG;

#endif // if (AVR)

#else // other platforms reference shiftOut()

shiftOut(_dataPinOut, _clockPin, LSBFIRST, value);

Expand All @@ -196,23 +270,104 @@ size_t FastShiftOut::writeMSBFIRST(uint8_t data)

#if defined(ARDUINO_ARCH_AVR) || defined(ARDUINO_ARCH_MEGAAVR)

#if defined(FASTSHIFTOUT_AVR_LOOP_UNROLLED) // AVR SPEED OPTIMIZED

uint8_t cbmask1 = _clockBit;
// uint8_t cbmask2 = ~_clockBit;
uint8_t outmask1 = _dataOutBit;
uint8_t outmask2 = ~_dataOutBit;

volatile uint8_t* localDataOutRegister = _dataOutRegister;
volatile uint8_t* localClockRegister = _clockRegister;

// disable interrupts (for all bits)
uint8_t oldSREG = SREG;
noInterrupts();

if ((value & 0x80) == 0) *localDataOutRegister &= outmask2;
else *localDataOutRegister |= outmask1;
// *localClockRegister |= cbmask1;
// *localClockRegister &= cbmask2;
// following code is allowed as interrupts are disabled.
// so register can not change
uint8_t r = *localClockRegister;
*localClockRegister = r | cbmask1; // set one bit
*localClockRegister = r; // reset it

if ((value & 0x40) == 0) *localDataOutRegister &= outmask2;
else *localDataOutRegister |= outmask1;
r = *localClockRegister;
*localClockRegister = r | cbmask1; // set one bit
*localClockRegister = r; // reset it

if ((value & 0x20) == 0) *localDataOutRegister &= outmask2;
else *localDataOutRegister |= outmask1;
r = *localClockRegister;
*localClockRegister = r | cbmask1; // set one bit
*localClockRegister = r; // reset it

if ((value & 0x10) == 0) *localDataOutRegister &= outmask2;
else *localDataOutRegister |= outmask1;
r = *localClockRegister;
*localClockRegister = r | cbmask1; // set one bit
*localClockRegister = r; // reset it

if ((value & 0x08) == 0) *localDataOutRegister &= outmask2;
else *localDataOutRegister |= outmask1;
r = *localClockRegister;
*localClockRegister = r | cbmask1; // set one bit
*localClockRegister = r; // reset it

if ((value & 0x04) == 0) *localDataOutRegister &= outmask2;
else *localDataOutRegister |= outmask1;
r = *localClockRegister;
*localClockRegister = r | cbmask1; // set one bit
*localClockRegister = r; // reset it

if ((value & 0x02) == 0) *localDataOutRegister &= outmask2;
else *localDataOutRegister |= outmask1;
r = *localClockRegister;
*localClockRegister = r | cbmask1; // set one bit
*localClockRegister = r; // reset it

if ((value & 0x01) == 0) *localDataOutRegister &= outmask2;
else *localDataOutRegister |= outmask1;
r = *localClockRegister;
*localClockRegister = r | cbmask1; // set one bit
*localClockRegister = r; // reset it

// restore interrupt state
SREG = oldSREG;

#else // AVR SIZE OPTIMIZED

uint8_t cbmask1 = _clockBit;
uint8_t cbmask2 = ~_clockBit;
uint8_t outmask1 = _dataOutBit;
uint8_t outmask2 = ~_dataOutBit;

volatile uint8_t* localDataOutRegister = _dataOutRegister;
volatile uint8_t* localClockRegister = _clockRegister;

// disable interrupts (for all bits)
uint8_t oldSREG = SREG;
noInterrupts();

for (uint8_t m = 0x80; m > 0; m >>= 1)
{
uint8_t oldSREG = SREG;
noInterrupts();
if ((value & m) == 0) *_dataOutRegister &= outmask2;
else *_dataOutRegister |= outmask1;
*_clockRegister |= cbmask1;
*_clockRegister &= cbmask2;
SREG = oldSREG;
// process one bit
if ((value & m) == 0) *localDataOutRegister &= outmask2;
else *localDataOutRegister |= outmask1;
uint8_t r = *localClockRegister;
*localClockRegister = r | cbmask1; // set one bit
*localClockRegister = r; // reset it
}

#else
// restore interrupt state
SREG = oldSREG;

#endif // if (AVR)

#else // other platforms reference shiftOut()

shiftOut(_dataPinOut, _clockPin, MSBFIRST, value);

Expand Down
11 changes: 7 additions & 4 deletions FastShiftOut.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
//
// FILE: FastShiftOut.h
// AUTHOR: Rob Tillaart
// VERSION: 0.3.3
// VERSION: 0.4.0
// PURPOSE: shiftOut class that implements the Print interface
// DATE: 2013-08-22
// URL: https://github.com/RobTillaart/FastShiftOut
Expand All @@ -11,7 +11,10 @@
#include "Arduino.h"
#include "Print.h"

#define FASTSHIFTOUT_LIB_VERSION (F("0.3.3"))
#define FASTSHIFTOUT_LIB_VERSION (F("0.4.0"))

// uncomment next line to get SPEED OPTIMIZED CODE
#define FASTSHIFTOUT_AVR_LOOP_UNROLLED 1


class FastShiftOut : public Print
Expand Down Expand Up @@ -39,7 +42,7 @@ class FastShiftOut : public Print

private:
uint8_t _bitOrder;
int _lastValue;
uint8_t _lastValue;


#if defined(ARDUINO_ARCH_AVR) || defined(ARDUINO_ARCH_MEGAAVR)
Expand All @@ -59,4 +62,4 @@ class FastShiftOut : public Print
};


// -- END OF FILE --
// -- END OF FILE --
Loading

0 comments on commit d926090

Please sign in to comment.