diff --git a/challenge_12/go/erocs/README.md b/challenge_12/go/erocs/README.md new file mode 100644 index 000000000..b82cc24a4 --- /dev/null +++ b/challenge_12/go/erocs/README.md @@ -0,0 +1,16 @@ +# Challenge 12: Compression and Decompression + +## 1. Solution Description + +This challenge is all about run length encoding (RLE). + +Encode() counts consecutive runs of a single character. If multiple are encountered it adds the character, a pound sign, and the number of occurrences to the output buffer. Single characters are directly transfered to the output buffer. + +Decode() has to reverse this but in doing so has to handle more error conditions due to malformed input. Otherwise it detects if a pound sign immediately follows the current character and if so expands that character out the appropriate count into the output buffer. + +## 2. Running Tests + +In bash in this directory: + + export GOPATH=`pwd` + go test c12 diff --git a/challenge_12/go/erocs/src/c12/c12.go b/challenge_12/go/erocs/src/c12/c12.go new file mode 100644 index 000000000..61f90bde8 --- /dev/null +++ b/challenge_12/go/erocs/src/c12/c12.go @@ -0,0 +1,129 @@ +package c12 + +import ( + "fmt" + "strconv" + "strings" +) + +const MaxRunLength = 0xFFFF + +func isAToZ(ch rune) bool { + return ('a' <= ch && 'z' >= ch) || ('A' <= ch && 'Z' >= ch) +} + +// Run Length Encodes the given string, compressing runs of single characters +// into the form c#1, where c is the character and 1 is the appropriate count. +// This only supports encoding strings which contain the character set +// [a-zA-Z]. No other characters are allowed. Character runs exceeding a 65535 +// length will be split into 65535 length encodings. +func Encode(s string) (string, error) { + ss := []string{} + c := 0 + cch := '\x00' + for _, ch := range s { + if !isAToZ(ch) { + return "", fmt.Errorf("Non-alphabetic character encountered: %c", ch) + } + large_count := false + if cch == ch { + c++ + if c < MaxRunLength { + continue + } + large_count = true + } + if c > 1 { + ss = append(ss, fmt.Sprintf("%c#%d", cch, c)) + } else if c == 1 { + ss = append(ss, string(cch)) + } + cch = ch + c = 1 + if large_count { + c = 0 + } + } + if c > 1 { + ss = append(ss, fmt.Sprintf("%c#%d", cch, c)) + } else if c == 1 { + ss = append(ss, string(cch)) + } + return strings.Join(ss, ""), nil +} + +func is0To9(ch rune) bool { + return '0' <= ch && '9' >= ch +} + +// Returns parsed count, new index, error value +func parseRepeatCount(rs []rune, i int) (idx, count int, err error) { + idx = i + mark := idx + for idx < len(rs) && is0To9(rs[idx]) { + idx++ + } + if mark == idx { + if idx >= len(rs) { + err = fmt.Errorf("No repeat count specified") + } else { + err = fmt.Errorf("Non-digit character encountered for repeat count: %c", rs[idx]) + } + return + } + c64, err := strconv.ParseInt(string(rs[mark:idx]), 10, 64) + if err != nil { + return + } + if c64 > MaxRunLength { + err = fmt.Errorf("Excessive repeat count: %v", c64) + return + } + count = int(c64) + return +} + +// rs is in/out +func appendRuneNTimes(rs *[]rune, ch rune, n int) { + rout := *rs + if cap(rout) < len(rout)+n { + // Make the length identical to the current length to leverage copy(). + tmp := make([]rune, len(rout), cap(rout)*2+n) + copy(tmp, rout) + rout = tmp + } + for n > 0 { + rout = append(rout, ch) + n-- + } + *rs = rout +} + +// Expands the given Run Length Encoded string to its original value. +// Compressed runs of a single character are represented in the form c#1, where +// c is the character and 1 is the appropriate count. This only supports +// decoding strings which contain the character set [a-zA-Z]. No other +// characters are allowed. A maximum count of 65535 for a given character is +// allowed. +func Decode(s string) (string, error) { + rs := []rune(s) + rout := make([]rune, 0, len(rs)*2) + for i := 0; i < len(rs); i++ { + ch := rs[i] + if !isAToZ(ch) { + return "", fmt.Errorf("Non-alphabetic character encountered: %c", ch) + } + if i+1 >= len(rs) || rs[i+1] != '#' { + rout = append(rout, ch) + continue + } + iTmp, c, err := parseRepeatCount(rs, i+2) + if err != nil { + return "", err + } + // Rewind one character to counter the numeric character search. + i = iTmp - 1 + appendRuneNTimes(&rout, ch, c) + } + return string(rout), nil +} diff --git a/challenge_12/go/erocs/src/c12/c12_test.go b/challenge_12/go/erocs/src/c12/c12_test.go new file mode 100644 index 000000000..3edf119b6 --- /dev/null +++ b/challenge_12/go/erocs/src/c12/c12_test.go @@ -0,0 +1,117 @@ +package c12 + +import "testing" + +func CheckGood(t *testing.T, raw, expect string) { + s, err := Encode(raw) + if err != nil { + t.Error("Error encoding:", err) + } else if s != expect { + t.Error("Bad encoding:", s, "Expected:", expect) + } + s, err = Decode(s) + if err != nil { + t.Error("Error decoding:", err) + } else if s != raw { + t.Error("Bad decoding:", s, "Expected:", raw) + } +} + +func TestEmpty(t *testing.T) { + CheckGood(t, "", "") +} + +func TestSingleChar(t *testing.T) { + CheckGood(t, "a", "a") +} + +func TestMultiSameChar(t *testing.T) { + CheckGood(t, "aaaaaaaaaaaa", "a#12") +} + +func TestMultipleSingles(t *testing.T) { + CheckGood(t, "abcdefgh", "abcdefgh") +} + +func TestMultipleMultiples(t *testing.T) { + CheckGood(t, "aaabbbaaacccaaaaaaaaaaaadddddddddddddddddd", "a#3b#3a#3c#3a#12d#18") +} + +func TestMixed1(t *testing.T) { + CheckGood(t, "aaabaaacaaadaaaeaaa", "a#3ba#3ca#3da#3ea#3") +} + +func TestMixed2(t *testing.T) { + CheckGood(t, "abbbacccaddda", "ab#3ac#3ad#3a") +} + +func TestMixed3(t *testing.T) { + CheckGood(t, "aaab", "a#3b") +} + +func TestMixed4(t *testing.T) { + CheckGood(t, "abbb", "ab#3") +} + +func TestUppercase(t *testing.T) { + CheckGood(t, "AaBBbbcCddDD", "AaB#2b#2cCd#2D#2") +} + +func TestReallyLongRune(t *testing.T) { + ll := 0x10011 + rs := make([]rune, 0, ll) + for i := 0; i < ll; i++ { + rs = append(rs, 'a') + } + s := string(rs) + CheckGood(t, s, "a#65535a#18") +} + +func TestBadChar1(t *testing.T) { + _, err := Encode("@") + if err == nil { + t.Fail() + } +} + +func TestBadChar2(t *testing.T) { + _, err := Encode("[") + if err == nil { + t.Fail() + } +} + +func TestBadChar3(t *testing.T) { + _, err := Encode("{") + if err == nil { + t.Fail() + } +} + +func TestBadDecoding1(t *testing.T) { + _, err := Decode("a#") + if err == nil { + t.Fail() + } +} + +func TestBadDecoding2(t *testing.T) { + _, err := Decode("a#a") + if err == nil { + t.Fail() + } +} + +func TestBadDecoding3(t *testing.T) { + _, err := Decode("a#99999") + if err == nil { + t.Fail() + } +} + +func TestBadDecoding4(t *testing.T) { + _, err := Decode("a#9999999999999999999999999999999999999999999") + if err == nil { + t.Fail() + } +}