Run-length encoding is a simple form of lessless compression that takes an input string and returns a string comprising of a sequence of numbers and characters, with each number representing the number of occurances of the character following it.
For example, an input string of:
abc123xyzaaabbbcccd
Will be compressed to:
1a1b1c1112131x1y1z3a3b3c1d
The 3 methods in the RunLengthEncoder
return the following sequences:
encode_a
- Returns a sequence of occurances followed by characters, however will not return the occurances of a character if said characted only appears once. It will instead just return the character itself.
For example:
abc123xyzaaabbbcccd
Will return:
abc123xyz3a3b3cd
encode_b
- Returns a sequence of occurances and characters, regardless of how many times they appear.encode_c
- Returns the same result asencode_b
but uses a list comprehension to build the return string.
Code
run_length_encoder.py
class RunLengthEncoder:
"""
class containing 3 methods that can be used to return a run-length
encoded string.
"""
def encode_a(self, text):
"""
Returns a run-length encoded string from an input string.
Note: This function will not return the character count in the return
string if only a single instance of the character is found.
Args:
text (str): A string to encode
Returns:
str: A run length encoded string
Example:
input: "aaabbcdddd"
returns: "3a2bc4d"
"""
count = 1
previous = ""
mapping = list()
for character in text:
if character != previous:
if previous:
mapping.append((previous, count))
count = 1
previous = character
else:
count += 1
else:
mapping.append((character, count))
result = ""
for character, count in mapping:
if count == 1:
result += character
else:
result += str(count)
result += character
return result
def encode_b(self, text):
"""
Returns a run-length encoded string from an input string.
Args:
text (str): A string to encode
Returns:
str: A run length encoded string
Example:
input: "aaabbcdddd"
returns: "3a2b1c4d"
"""
count = 1
previous = ""
mapping = list()
for character in text:
if character != previous:
if previous:
mapping.append((previous, count))
count = 1
previous = character
else:
count += 1
else:
mapping.append((character, count))
result = ""
for character, count in mapping:
result += str(count)
result += character
return result
def encode_c(self, text):
"""
Returns a run-length encoded string from an input string.
This method uses a list comprehension to build the return
string.
Args:
text (str): A string to encode
Returns:
str: A run length encoded string
Example:
input: "aaabbcdddd"
returns: "3a2b1c4d"
"""
count = 1
previous = ""
mapping = list()
for character in text:
if character != previous:
if previous:
mapping.append((previous, count))
count = 1
previous = character
else:
count += 1
else:
mapping.append((character, count))
result = "".join(f"{str(count)}{character}" for character, count in mapping)
return result
Tests
tests.py
import unittest
from run_length_encoder import RunLengthEncoder
class TestRunLengthEncoders(unittest.TestCase):
# Tests for encoder_a
def test_encoder_a(self):
encoder = RunLengthEncoder()
text = "aaabbcdddd"
expected_result = "3a2bc4d"
self.assertEqual(encoder.encode_a(text), expected_result)
def test_encoder_a2(self):
encoder = RunLengthEncoder()
text = "aaabbbcccdddeeefffggghhhiiijjjkkklllmmmnnnoooppp"
expected_result = "3a3b3c3d3e3f3g3h3i3j3k3l3m3n3o3p"
self.assertEqual(encoder.encode_a(text), expected_result)
def test_encoder_a3(self):
encoder = RunLengthEncoder()
text = "abcabcabcabcabcabc"
expected_result = "abcabcabcabcabcabc"
self.assertEqual(encoder.encode_a(text), expected_result)
def test_encoder_a4(self):
encoder = RunLengthEncoder()
text = "1223334444"
expected_result = "1223344"
self.assertEqual(encoder.encode_a(text), expected_result)
def test_encoder_a5(self):
encoder = RunLengthEncoder()
text = "abc123xyz"
expected_result = "abc123xyz"
self.assertEqual(encoder.encode_a(text), expected_result)
# Tests for encoder_b
def test_encoder_b(self):
encoder = RunLengthEncoder()
text = "aaabbcdddd"
expected_result = "3a2b1c4d"
self.assertEqual(encoder.encode_b(text), expected_result)
def test_encoder_b2(self):
encoder = RunLengthEncoder()
text = "aaabbbcccdddeeefffggghhhiiijjjkkklllmmmnnnoooppp"
expected_result = "3a3b3c3d3e3f3g3h3i3j3k3l3m3n3o3p"
self.assertEqual(encoder.encode_b(text), expected_result)
def test_encoder_b3(self):
encoder = RunLengthEncoder()
text = "abcabcabcabcabcabc"
expected_result = "1a1b1c1a1b1c1a1b1c1a1b1c1a1b1c1a1b1c"
self.assertEqual(encoder.encode_b(text), expected_result)
def test_encoder_b4(self):
encoder = RunLengthEncoder()
text = "1223334444"
expected_result = "11223344"
self.assertEqual(encoder.encode_b(text), expected_result)
def test_encoder_b5(self):
encoder = RunLengthEncoder()
text = "abc123xyz"
expected_result = "1a1b1c1112131x1y1z"
self.assertEqual(encoder.encode_b(text), expected_result)
# Tests for encoder_c
def test_encoder_c(self):
encoder = RunLengthEncoder()
text = "aaabbcdddd"
expected_result = "3a2b1c4d"
self.assertEqual(encoder.encode_c(text), expected_result)
def test_encoder_c2(self):
encoder = RunLengthEncoder()
text = "aaabbbcccdddeeefffggghhhiiijjjkkklllmmmnnnoooppp"
expected_result = "3a3b3c3d3e3f3g3h3i3j3k3l3m3n3o3p"
self.assertEqual(encoder.encode_c(text), expected_result)
def test_encoder_c3(self):
encoder = RunLengthEncoder()
text = "abcabcabcabcabcabc"
expected_result = "1a1b1c1a1b1c1a1b1c1a1b1c1a1b1c1a1b1c"
self.assertEqual(encoder.encode_c(text), expected_result)
def test_encoder_c4(self):
encoder = RunLengthEncoder()
text = "1223334444"
expected_result = "11223344"
self.assertEqual(encoder.encode_c(text), expected_result)
def test_encoder_c5(self):
encoder = RunLengthEncoder()
text = "abc123xyz"
expected_result = "1a1b1c1112131x1y1z"
self.assertEqual(encoder.encode_c(text), expected_result)
if __name__ == "__main__":
unittest.main()
Example
example.py
from run_length_encoder import RunLengthEncoder
encoder = RunLengthEncoder()
string_to_compress = "abc123xyzaaabbbcccd"
print(f"Compressing string: {string_to_compress}")
result = encoder.encode_a(string_to_compress)
print(result)
result = encoder.encode_b(string_to_compress)
print(result)
result = encoder.encode_c(string_to_compress)
print(result)
Usage
To play around with the encoder, clone this repo and run the following:
python example.py
Output:
Compressing string: abc123xyzaaabbbcccd
Output a: abc123xyz3a3b3cd
Output b: 1a1b1c1112131x1y1z3a3b3c1d
Output c: 1a1b1c1112131x1y1z3a3b3c1d
Testing
To run the small suite of unit tests:
python tests.py