worm-blossom

worm-blossom

Truncated Exponential Golomb Codes

The truncated exponential golomb codes are a bit-level prefix code for natural numbers that are known to be less than some upper bound. Like the exponential golomb codes, it assigns shorter codes to smaller numbers. Numbers close to the upper bound are encoded more efficiently than with regular exponential golomb codes, however.

Here is how to encode a natural number n that is strictly less than the natural number max:

Some example values:

nmax = 1max = 2max = 3max = 4max = 5max = 9max = 12max = 16
0
0
1
1
1
1
1
1
1undefined
1
0
0
0
0
0
10
0
10
0
10
0
10
2undefinedundefined
0
1
0
10
0
11
0
11
0
11
0
11
3undefinedundefinedundefined
0
11
00
0
00
100
00
100
00
100
4undefinedundefinedundefinedundefined
00
1
00
101
00
101
00
101
5undefinedundefinedundefinedundefinedundefined
00
110
00
110
00
110
6undefinedundefinedundefinedundefinedundefined
00
111
00
111
00
111
7undefinedundefinedundefinedundefinedundefined
000
0
000
00
000
000
8undefinedundefinedundefinedundefinedundefined
000
1
000
01
000
001
9undefinedundefinedundefinedundefinedundefinedundefined
000
10
000
010
10undefinedundefinedundefinedundefinedundefinedundefined
000
110
000
011
11undefinedundefinedundefinedundefinedundefinedundefined
000
111
000
100
12undefinedundefinedundefinedundefinedundefinedundefinedundefined
000
101
13undefinedundefinedundefinedundefinedundefinedundefinedundefined
000
110
14undefinedundefinedundefinedundefinedundefinedundefinedundefined
000
1110
15undefinedundefinedundefinedundefinedundefinedundefinedundefined
000
1111

And some Rust code to demonstrate how it works (not ready for production in any way; this code can exhibit overflows when working with numbers close to 2^32):

/// Writes the
/// [truncated binary code](https://en.wikipedia.org/wiki/Truncated_binary_encoding)
/// of the number `n` into the least significant bits of the
/// returned `u32`, for `0 <= n <= max`.
/// The returned `u8` is the number of bits of the code.
pub fn enc_truncated_binary(n: u32, max: u32) -> (u32, u8) {
    let k = max.ilog2();

    // Set u to the number of unused codewords, i.e.,
    // 2^(k+1) - max.
    let u = (1 << k + 1) - max;

    if n < u {
        return (n, k as u8);
    } else {
        return (n + u, k as u8 + 1);
    }
}

/// Writes the
/// [exponential golomb code](https://en.wikipedia.org/wiki/Exponential-Golomb_coding)
/// of the number `n` into the least significant bits of the
/// returned `u32`.
/// The returned `u8` is the number of bits of the code.
pub fn enc_exp_golomb(n: u32) -> (u32, u8) {
    let size_of_binary_part = 32 - (n + 1).leading_zeros();
    return (n + 1, ((size_of_binary_part * 2) - 1) as u8);
}

/// Writes the truncated exponential golomb code of the
/// number `n` into the least significant bits of the
/// returned `u32`, for `0 <= n <= max`.
/// The returned `u8` is the number of bits of the code.
pub fn enc_truncated_exp_golomb(n: u32, max: u32) -> (u32, u8) {
    if max == 1 {
        return (0, 0);
    }

    let threshold = (max.next_power_of_two() / 2) - 1;

    if n < threshold {
        return enc_exp_golomb(n);
    } else {
        let (truncated_binary_code, truncated_binary_width) =
            enc_truncated_binary(n - threshold, max - threshold);
        return (
            truncated_binary_code,
            truncated_binary_width
              + (32 - threshold.leading_zeros()) as u8,
        );
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_truncated_binary() {
        assert_eq!(enc_truncated_binary(0, 5), (0b00, 2));
        assert_eq!(enc_truncated_binary(1, 5), (0b01, 2));
        assert_eq!(enc_truncated_binary(2, 5), (0b10, 2));
        assert_eq!(enc_truncated_binary(3, 5), (0b110, 3));
        assert_eq!(enc_truncated_binary(4, 5), (0b111, 3));

        assert_eq!(enc_truncated_binary(0, 4), (0b00, 2));
        assert_eq!(enc_truncated_binary(1, 4), (0b01, 2));
        assert_eq!(enc_truncated_binary(2, 4), (0b10, 2));
        assert_eq!(enc_truncated_binary(3, 4), (0b11, 2));
    }

    #[test]
    fn test_exp_golomb() {
        assert_eq!(enc_exp_golomb(0), (1, 1));
        assert_eq!(enc_exp_golomb(1), (0b010, 3));
        assert_eq!(enc_exp_golomb(2), (0b011, 3));
        assert_eq!(enc_exp_golomb(3), (0b00100, 5));
        assert_eq!(enc_exp_golomb(4), (0b00101, 5));
        assert_eq!(enc_exp_golomb(5), (0b00110, 5));
        assert_eq!(enc_exp_golomb(6), (0b00111, 5));
        assert_eq!(enc_exp_golomb(7), (0b0001000, 7));
        assert_eq!(enc_exp_golomb(8), (0b0001001, 7));
    }

    #[test]
    fn test_truncated_exp_golomb() {
        assert_eq!(enc_truncated_exp_golomb(0, 16), (0b1, 1));
        assert_eq!(enc_truncated_exp_golomb(1, 16), (0b01_0, 3));
        assert_eq!(enc_truncated_exp_golomb(2, 16), (0b01_1, 3));
        assert_eq!(enc_truncated_exp_golomb(3, 16), (0b001_00, 5));
        assert_eq!(enc_truncated_exp_golomb(4, 16), (0b001_01, 5));
        assert_eq!(enc_truncated_exp_golomb(5, 16), (0b001_10, 5));
        assert_eq!(enc_truncated_exp_golomb(6, 16), (0b001_11, 5));
        assert_eq!(enc_truncated_exp_golomb(7, 16), (0b000_000, 6));
        assert_eq!(enc_truncated_exp_golomb(8, 16), (0b000_001, 6));
        assert_eq!(enc_truncated_exp_golomb(9, 16), (0b000_010, 6));
        assert_eq!(enc_truncated_exp_golomb(10, 16), (0b000_011, 6));
        assert_eq!(enc_truncated_exp_golomb(11, 16), (0b000_100, 6));
        assert_eq!(enc_truncated_exp_golomb(12, 16), (0b000_101, 6));
        assert_eq!(enc_truncated_exp_golomb(13, 16), (0b000_110, 6));
        assert_eq!(enc_truncated_exp_golomb(14, 16), (0b000_1110, 7));
        assert_eq!(enc_truncated_exp_golomb(15, 16), (0b000_1111, 7));

        assert_eq!(enc_truncated_exp_golomb(0, 12), (0b1, 1));
        assert_eq!(enc_truncated_exp_golomb(1, 12), (0b01_0, 3));
        assert_eq!(enc_truncated_exp_golomb(2, 12), (0b01_1, 3));
        assert_eq!(enc_truncated_exp_golomb(3, 12), (0b001_00, 5));
        assert_eq!(enc_truncated_exp_golomb(4, 12), (0b001_01, 5));
        assert_eq!(enc_truncated_exp_golomb(5, 12), (0b001_10, 5));
        assert_eq!(enc_truncated_exp_golomb(6, 12), (0b001_11, 5));
        assert_eq!(enc_truncated_exp_golomb(7, 12), (0b000_00, 5));
        assert_eq!(enc_truncated_exp_golomb(8, 12), (0b000_01, 5));
        assert_eq!(enc_truncated_exp_golomb(9, 12), (0b000_10, 5));
        assert_eq!(enc_truncated_exp_golomb(10, 12), (0b000_110, 6));
        assert_eq!(enc_truncated_exp_golomb(11, 12), (0b000_111, 6));

        assert_eq!(enc_truncated_exp_golomb(0, 9), (0b1, 1));
        assert_eq!(enc_truncated_exp_golomb(1, 9), (0b01_0, 3));
        assert_eq!(enc_truncated_exp_golomb(2, 9), (0b01_1, 3));
        assert_eq!(enc_truncated_exp_golomb(3, 9), (0b001_00, 5));
        assert_eq!(enc_truncated_exp_golomb(4, 9), (0b001_01, 5));
        assert_eq!(enc_truncated_exp_golomb(5, 9), (0b001_10, 5));
        assert_eq!(enc_truncated_exp_golomb(6, 9), (0b001_11, 5));
        assert_eq!(enc_truncated_exp_golomb(7, 9), (0b000_0, 4));
        assert_eq!(enc_truncated_exp_golomb(8, 9), (0b000_1, 4));

        assert_eq!(enc_truncated_exp_golomb(0, 5), (0b1, 1));
        assert_eq!(enc_truncated_exp_golomb(1, 5), (0b010, 3));
        assert_eq!(enc_truncated_exp_golomb(2, 5), (0b011, 3));
        assert_eq!(enc_truncated_exp_golomb(3, 5), (0b000, 3));
        assert_eq!(enc_truncated_exp_golomb(4, 5), (0b001, 3));

        assert_eq!(enc_truncated_exp_golomb(0, 4), (0b1, 1));
        assert_eq!(enc_truncated_exp_golomb(1, 4), (0b00, 2));
        assert_eq!(enc_truncated_exp_golomb(2, 4), (0b010, 3));
        assert_eq!(enc_truncated_exp_golomb(3, 4), (0b011, 3));

        assert_eq!(enc_truncated_exp_golomb(0, 3), (0b1, 1));
        assert_eq!(enc_truncated_exp_golomb(1, 3), (0b00, 2));
        assert_eq!(enc_truncated_exp_golomb(2, 3), (0b01, 2));

        assert_eq!(enc_truncated_exp_golomb(0, 2), (0b0, 1));
        assert_eq!(enc_truncated_exp_golomb(1, 2), (0b1, 1));
    }
}