Truncated Exponential Golomb Codes

The truncated exponential golomb codes are a bit-level prefix code for natural numbers that are known to be less than some upper bound. Like the exponential golomb codes, it assigns shorter codes to smaller numbers. Numbers close to the upper bound are encoded more efficiently than with regular exponential golomb codes, however.

Here is how to encode a natural number n that is strictly less than the natural number max:

If max is 1, the only code is the empty string.
Otherwise, let next_power be the least power of two that is greater than or equal to max. Let threshold be (next_power / 2) - 1.
If n < threshold, the code is simply the normal exponential golomb code. Specifically:
- the minimal-width (i.e., no leading zeros) binary encoding of n + 1, prepended by as many zero bits as the bit-width of that binary encoding.
Otherwise, the code is the concatenation of:
- as many zero bits as the bit-width of the minimal-width binary encoding of threshold, and
- the truncated binary encoding of n - threshold for an alphabet size of max - threshold.
  - Let k be floor(log2(max - threshold)), and let u be 2^(k + 1) - (max - threshold).
  - If n - threshold < u, the truncated binary encoding is the minimal-width binary encoding of n - threshold.
  - Otherwise, it is the minimal-width binary encoding of u + n - threshold.

Some example values:

`n`	`max = 1`	`max = 2`	`max = 3`	`max = 4`	`max = 5`	`max = 9`	`max = 12`	`max = 16`
0		0	1	1	1	1	1	1
1	undefined	1	0 0	0 0	0 10	0 10	0 10	0 10
2	undefined	undefined	0 1	0 10	0 11	0 11	0 11	0 11
3	undefined	undefined	undefined	0 11	00 0	00 100	00 100	00 100
4	undefined	undefined	undefined	undefined	00 1	00 101	00 101	00 101
5	undefined	undefined	undefined	undefined	undefined	00 110	00 110	00 110
6	undefined	undefined	undefined	undefined	undefined	00 111	00 111	00 111
7	undefined	undefined	undefined	undefined	undefined	000 0	000 00	000 000
8	undefined	undefined	undefined	undefined	undefined	000 1	000 01	000 001
9	undefined	undefined	undefined	undefined	undefined	undefined	000 10	000 010
10	undefined	undefined	undefined	undefined	undefined	undefined	000 110	000 011
11	undefined	undefined	undefined	undefined	undefined	undefined	000 111	000 100
12	undefined	undefined	undefined	undefined	undefined	undefined	undefined	000 101
13	undefined	undefined	undefined	undefined	undefined	undefined	undefined	000 110
14	undefined	undefined	undefined	undefined	undefined	undefined	undefined	000 1110
15	undefined	undefined	undefined	undefined	undefined	undefined	undefined	000 1111

And some Rust code to demonstrate how it works (not ready for production in any way; this code can exhibit overflows when working with numbers close to 2^32):

/// Writes the
/// [truncated binary code](https://en.wikipedia.org/wiki/Truncated_binary_encoding)
/// of the number `n` into the least significant bits of the
/// returned `u32`, for `0 <= n <= max`.
/// The returned `u8` is the number of bits of the code.
pub fn enc_truncated_binary(n: u32, max: u32) -> (u32, u8) {
    let k = max.ilog2();

    // Set u to the number of unused codewords, i.e.,
    // 2^(k+1) - max.
    let u = (1 << k + 1) - max;

    if n < u {
        return (n, k as u8);
    } else {
        return (n + u, k as u8 + 1);
    }
}

/// Writes the
/// [exponential golomb code](https://en.wikipedia.org/wiki/Exponential-Golomb_coding)
/// of the number `n` into the least significant bits of the
/// returned `u32`.
/// The returned `u8` is the number of bits of the code.
pub fn enc_exp_golomb(n: u32) -> (u32, u8) {
    let size_of_binary_part = 32 - (n + 1).leading_zeros();
    return (n + 1, ((size_of_binary_part * 2) - 1) as u8);
}

/// Writes the truncated exponential golomb code of the
/// number `n` into the least significant bits of the
/// returned `u32`, for `0 <= n <= max`.
/// The returned `u8` is the number of bits of the code.
pub fn enc_truncated_exp_golomb(n: u32, max: u32) -> (u32, u8) {
    if max == 1 {
        return (0, 0);
    }

    let threshold = (max.next_power_of_two() / 2) - 1;

    if n < threshold {
        return enc_exp_golomb(n);
    } else {
        let (truncated_binary_code, truncated_binary_width) =
            enc_truncated_binary(n - threshold, max - threshold);
        return (
            truncated_binary_code,
            truncated_binary_width
              + (32 - threshold.leading_zeros()) as u8,
        );
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_truncated_binary() {
        assert_eq!(enc_truncated_binary(0, 5), (0b00, 2));
        assert_eq!(enc_truncated_binary(1, 5), (0b01, 2));
        assert_eq!(enc_truncated_binary(2, 5), (0b10, 2));
        assert_eq!(enc_truncated_binary(3, 5), (0b110, 3));
        assert_eq!(enc_truncated_binary(4, 5), (0b111, 3));

        assert_eq!(enc_truncated_binary(0, 4), (0b00, 2));
        assert_eq!(enc_truncated_binary(1, 4), (0b01, 2));
        assert_eq!(enc_truncated_binary(2, 4), (0b10, 2));
        assert_eq!(enc_truncated_binary(3, 4), (0b11, 2));
    }

    #[test]
    fn test_exp_golomb() {
        assert_eq!(enc_exp_golomb(0), (1, 1));
        assert_eq!(enc_exp_golomb(1), (0b010, 3));
        assert_eq!(enc_exp_golomb(2), (0b011, 3));
        assert_eq!(enc_exp_golomb(3), (0b00100, 5));
        assert_eq!(enc_exp_golomb(4), (0b00101, 5));
        assert_eq!(enc_exp_golomb(5), (0b00110, 5));
        assert_eq!(enc_exp_golomb(6), (0b00111, 5));
        assert_eq!(enc_exp_golomb(7), (0b0001000, 7));
        assert_eq!(enc_exp_golomb(8), (0b0001001, 7));
    }

    #[test]
    fn test_truncated_exp_golomb() {
        assert_eq!(enc_truncated_exp_golomb(0, 16), (0b1, 1));
        assert_eq!(enc_truncated_exp_golomb(1, 16), (0b01_0, 3));
        assert_eq!(enc_truncated_exp_golomb(2, 16), (0b01_1, 3));
        assert_eq!(enc_truncated_exp_golomb(3, 16), (0b001_00, 5));
        assert_eq!(enc_truncated_exp_golomb(4, 16), (0b001_01, 5));
        assert_eq!(enc_truncated_exp_golomb(5, 16), (0b001_10, 5));
        assert_eq!(enc_truncated_exp_golomb(6, 16), (0b001_11, 5));
        assert_eq!(enc_truncated_exp_golomb(7, 16), (0b000_000, 6));
        assert_eq!(enc_truncated_exp_golomb(8, 16), (0b000_001, 6));
        assert_eq!(enc_truncated_exp_golomb(9, 16), (0b000_010, 6));
        assert_eq!(enc_truncated_exp_golomb(10, 16), (0b000_011, 6));
        assert_eq!(enc_truncated_exp_golomb(11, 16), (0b000_100, 6));
        assert_eq!(enc_truncated_exp_golomb(12, 16), (0b000_101, 6));
        assert_eq!(enc_truncated_exp_golomb(13, 16), (0b000_110, 6));
        assert_eq!(enc_truncated_exp_golomb(14, 16), (0b000_1110, 7));
        assert_eq!(enc_truncated_exp_golomb(15, 16), (0b000_1111, 7));

        assert_eq!(enc_truncated_exp_golomb(0, 12), (0b1, 1));
        assert_eq!(enc_truncated_exp_golomb(1, 12), (0b01_0, 3));
        assert_eq!(enc_truncated_exp_golomb(2, 12), (0b01_1, 3));
        assert_eq!(enc_truncated_exp_golomb(3, 12), (0b001_00, 5));
        assert_eq!(enc_truncated_exp_golomb(4, 12), (0b001_01, 5));
        assert_eq!(enc_truncated_exp_golomb(5, 12), (0b001_10, 5));
        assert_eq!(enc_truncated_exp_golomb(6, 12), (0b001_11, 5));
        assert_eq!(enc_truncated_exp_golomb(7, 12), (0b000_00, 5));
        assert_eq!(enc_truncated_exp_golomb(8, 12), (0b000_01, 5));
        assert_eq!(enc_truncated_exp_golomb(9, 12), (0b000_10, 5));
        assert_eq!(enc_truncated_exp_golomb(10, 12), (0b000_110, 6));
        assert_eq!(enc_truncated_exp_golomb(11, 12), (0b000_111, 6));

        assert_eq!(enc_truncated_exp_golomb(0, 9), (0b1, 1));
        assert_eq!(enc_truncated_exp_golomb(1, 9), (0b01_0, 3));
        assert_eq!(enc_truncated_exp_golomb(2, 9), (0b01_1, 3));
        assert_eq!(enc_truncated_exp_golomb(3, 9), (0b001_00, 5));
        assert_eq!(enc_truncated_exp_golomb(4, 9), (0b001_01, 5));
        assert_eq!(enc_truncated_exp_golomb(5, 9), (0b001_10, 5));
        assert_eq!(enc_truncated_exp_golomb(6, 9), (0b001_11, 5));
        assert_eq!(enc_truncated_exp_golomb(7, 9), (0b000_0, 4));
        assert_eq!(enc_truncated_exp_golomb(8, 9), (0b000_1, 4));

        assert_eq!(enc_truncated_exp_golomb(0, 5), (0b1, 1));
        assert_eq!(enc_truncated_exp_golomb(1, 5), (0b010, 3));
        assert_eq!(enc_truncated_exp_golomb(2, 5), (0b011, 3));
        assert_eq!(enc_truncated_exp_golomb(3, 5), (0b000, 3));
        assert_eq!(enc_truncated_exp_golomb(4, 5), (0b001, 3));

        assert_eq!(enc_truncated_exp_golomb(0, 4), (0b1, 1));
        assert_eq!(enc_truncated_exp_golomb(1, 4), (0b00, 2));
        assert_eq!(enc_truncated_exp_golomb(2, 4), (0b010, 3));
        assert_eq!(enc_truncated_exp_golomb(3, 4), (0b011, 3));

        assert_eq!(enc_truncated_exp_golomb(0, 3), (0b1, 1));
        assert_eq!(enc_truncated_exp_golomb(1, 3), (0b00, 2));
        assert_eq!(enc_truncated_exp_golomb(2, 3), (0b01, 2));

        assert_eq!(enc_truncated_exp_golomb(0, 2), (0b0, 1));
        assert_eq!(enc_truncated_exp_golomb(1, 2), (0b1, 1));
    }
}