如何快速将一个float打包成4个字节?
How to quickly pack a float to 4 bytes?
我一直在寻找一种在 WebGL 纹理上存储浮点数的方法。我在互联网上找到 some solutions,但那些只处理 [0..1) 范围内的浮点数。我希望能够存储任意浮点数,为此,需要扩展这样的函数以存储指数(比如在第一个字节上)。不过,我不太了解它们是如何工作的,因此如何操作并不明显。简而言之:
将一个浮点数打包成4个字节的高效算法是什么?
一个简单的方法是首先就您支持的浮动范围达成一致,并在打包前将其重新映射到 [0...1) 范围。
const MIN = -100;
const MAX = 100;
function packRemap(v){
return (v - MIN) / (MAX - MIN);
}
function unpackRemap(p){
return MIN + p * (MAX - MIN);
}
嗯,float
是一个32位的数字(23 bits for mantissa + 1 bit implicitly, 8 bits for exponent and 1 bit for sign),RGBA8纹理的纹素也是32位的。因此我们只需要一个编码方案,它可以用 JS(或任何其他语言)打包并用 GLSL 解包(给定限制 os GLSL ES 1.0,例如缺少按位操作)。这是我的建议(在 C++ 中):
#include <cstdint>
#include <iostream>
#include <cmath>
// for storing result of encoding
struct rgba {
uint8_t r, g, b, a;
};
rgba float2rgba(float x) {
union {
float xc;
uint32_t xi;
};
// let's "convert" our float number to uint32_t so we can mess with it's bits
xc = x;
// in v we'll pack sign bit and mantissa, that would be exactly 24 bits
int_least32_t v =
// sign bit
(xi >> 31 & 1) |
// mantissa
((xi & 0x7fffff) << 1);
rgba r;
// then we just split into bytes and store them in RGB channels
r.r = v / 0x10000;
r.g = (v % 0x10000) / 0x100;
r.b = v % 0x100;
// and we'll put the exponent to alpha channel
r.a = xi >> 23 & 0xff;
return r;
}
float rgba2float(rgba r) {
// let's "rebuild" mantissa and sign bit first
uint32_t v = (r.b / 2) + r.g * 0x80 + r.r * 0x8000;
return
// let's apply sign (it's in least significant bit of v)
(r.b % 2 ? -1.f : 1.f) *
// and reconstruct the number itself
(1.f + v * pow(2.f, -23.f)) * pow(2.f, static_cast<unsigned>(r.a) - 127);
}
int main() {
const float a = -1.34320e32f;
rgba r = float2rgba(a);
std::cout <<
a << '\n' <<
static_cast<unsigned>(r.r) << ',' <<
static_cast<unsigned>(r.g) << ',' <<
static_cast<unsigned>(r.b) << ',' <<
static_cast<unsigned>(r.a) << '\n' <<
rgba2float(r) << std::endl;
}
输出:
-1.3432e+32
167,214,213,233
-1.3432e+32
由于我找不到任何可以解决我的问题的东西,所以我组装了这个解决方案:
function fract(x){
return x - Math.floor(x);
};
function packFloat(x) {
var s = x > 0 ? 1 : -1;
var e = Math.floor(Math.log2(s*x));
var m = s*x/Math.pow(2, e);
return [
Math.floor(fract((m-1)*256*256)*256),
Math.floor(fract((m-1)*256)*256),
Math.floor(fract((m-1)*1)*256),
((e+63) + (x>0?128:0))];
}
function unpackFloat(v){
var s = v[3] >= 128 ? 1 : -1;
var e = v[3] - (v[3] >= 128 ? 128 : 0) - 63;
var m = 1 + v[0]/256/256/256 + v[1]/256/256 + v[2]/256;
return s * Math.pow(2, e) * m;
};
for (var i=0; i<10; ++i){
var num = (Math.random()*2.0-1.0)*1000;
console.log(num, packFloat(num), unpackFloat(packFloat(num)));
}
它将一个浮点数来回转换为 4 个字节。与其他解决方案相反,它不限于一个小的或预定义的范围,并且能够表示形状 s * m * 2^e
上的任何数字,其中 s = -1 or 1
、m = 1 til 2
(与24 位精度)和 e = -63 to 64
。将它移植到 GLSL 是微不足道的,因为它只使用常见的浮点运算。
我不确定我是否理解这个问题但是。
为什么不直接使用浮点纹理?
var ext = gl.getExtension("OES_texture_float");
if (!ext) {
// sorry no floating point support)
}
至于将数据放入纹理中,您只需使用Float32Array
。
var data = new Float32Array([0.123456, Math.sqrt(2), ...]);
gl.texImage2D(gl.TARGET_2D, 0, gl.RGBA, width, height, 0,
gl.RGBA, gl.FLOAT, data);
大多数硬件都支持从浮点纹理读取。较少支持渲染到浮点纹理。参见
我还要指出,您可以从 JavaScript
中的浮点数中获取字节
var arrayOf10Floats = new Float32Array(10);
var arrayOf40bytes = new Uint8Array(arrayOf10Floats.buffer);
这两个数组共享相同的内存。它们都是基础 ArrayBuffer
.
的 ArrayBufferView
速度不快,但可行。 (请注意,GLSL 1.00 浮点文字在编译器中存在转换错误)。
struct Bitset8Bits {
mediump vec4 bit0;
mediump vec4 bit1;
mediump vec4 bit2;
mediump vec4 bit3;
mediump vec4 bit4;
mediump vec4 bit5;
mediump vec4 bit6;
mediump vec4 bit7;
};
vec4 when_gt (vec4 l, vec4 r) {
return max(sign(l - r), 0.0);
}
Bitset8Bits unpack_4_bytes (lowp vec4 byte) {
Bitset8Bits result;
result.bit7 = when_gt(byte, vec4(127.5));
vec4 bits0to6 = byte - 128.0 * result.bit7;
result.bit6 = when_gt(bits0to6, vec4(63.5));
vec4 bits0to5 = bits0to6 - 64.0 * result.bit6;
result.bit5 = when_gt(bits0to5, vec4(31.5));
vec4 bits0to4 = bits0to5 - 32.0 * result.bit5;
result.bit4 = when_gt(bits0to4, vec4(15.5));
vec4 bits0to3 = bits0to4 - 16.0 * result.bit4;
result.bit3 = when_gt(bits0to3, vec4(7.5));
vec4 bits0to2 = bits0to3 - 8.0 * result.bit3;
result.bit2 = when_gt(bits0to2, vec4(3.5));
vec4 bits0to1 = bits0to2 - 4.0 * result.bit2;
result.bit1 = when_gt(bits0to1, vec4(1.5));
vec4 bit0 = bits0to1 - 2.0 * result.bit1;
result.bit0 = when_gt(bit0, vec4(0.5));
return result;
}
float when_gt (float l, float r) {
return max(sign(l - r), 0.0);
}
vec4 pack_4_bytes (Bitset8Bits state) {
vec4 data;
data = state.bit0
+ 2.0 * state.bit1
+ 4.0 * state.bit2
+ 8.0 * state.bit3
+ 16.0 * state.bit4
+ 32.0 * state.bit5
+ 64.0 * state.bit6
+ 128.0 * state.bit7;
return data;
}
vec4 brians_float_pack (
float original_value) {
// Remove the sign
float pos_value = abs(original_value);
float exp_real = floor(log2(pos_value));
float multiplier = pow(2.0, exp_real);
float normalized = pos_value / multiplier - 1.0;
float exp_v = exp_real + 127.0;
// if exp_v == -Inf -> 0
// if exp_v == +Inf -> 255
// if exp_v < -126.0 -> denormalized (remove the "1")
// otherwise + 127.0;
Bitset8Bits packed_v;
packed_v.bit7.a =
step(sign(original_value) - 1.0, -1.5); // pos
// Exponent 8 bits
packed_v.bit6.a = when_gt(exp_v, 127.5);
float bits0to6 = exp_v - 128.0 * packed_v.bit6.a;
packed_v.bit5.a = when_gt(bits0to6, 63.5);
float bits0to5 = bits0to6 - 64.0 * packed_v.bit5.a;
packed_v.bit4.a = when_gt(bits0to5, 31.5);
float bits0to4 = bits0to5 - 32.0 * packed_v.bit4.a;
packed_v.bit3.a = when_gt(bits0to4, 15.5);
float bits0to3 = bits0to4 - 16.0 * packed_v.bit3.a;
packed_v.bit2.a = when_gt(bits0to3, 7.5);
float bits0to2 = bits0to3 - 8.0 * packed_v.bit2.a;
packed_v.bit1.a = when_gt(bits0to2, 3.5);
float bits0to1 = bits0to2 - 4.0 * packed_v.bit1.a;
packed_v.bit0.a = when_gt(bits0to1, 1.5);
float bit0 = bits0to1 - 2.0 * packed_v.bit0.a;
packed_v.bit7.b = when_gt(bit0, 0.5);
// Significand 23 bits
float factor = 0.5;
// 0.4999999
// Significand MSB bit 22:
packed_v.bit6.b =
when_gt(normalized, factor - 0.00000005);
normalized = normalized - factor * packed_v.bit6.b;
factor = 0.5 * factor;
packed_v.bit5.b =
when_gt(normalized, factor - 0.00000005);
normalized = normalized - factor * packed_v.bit5.b;
factor = 0.5 * factor;
packed_v.bit4.b =
when_gt(normalized, factor - 0.00000005);
normalized = normalized - factor * packed_v.bit4.b;
factor = 0.5 * factor;
packed_v.bit3.b =
when_gt(normalized, factor - 0.00000005);
normalized = normalized - factor * packed_v.bit3.b;
factor = 0.5 * factor;
packed_v.bit2.b =
when_gt(normalized, factor - 0.00000005);
normalized = normalized - factor * packed_v.bit2.b;
factor = 0.5 * factor;
packed_v.bit1.b =
when_gt(normalized, factor - 0.00000005);
normalized = normalized - factor * packed_v.bit1.b;
factor = 0.5 * factor;
packed_v.bit0.b =
when_gt(normalized, factor - 0.00000005);
normalized = normalized - factor * packed_v.bit0.b;
factor = 0.5 * factor;
packed_v.bit7.g =
when_gt(normalized, factor - 0.00000005);
normalized = normalized - factor * packed_v.bit7.g;
factor = 0.5 * factor;
packed_v.bit6.g =
when_gt(normalized, factor - 0.00000005);
normalized = normalized - factor * packed_v.bit6.g;
factor = 0.5 * factor;
packed_v.bit5.g =
when_gt(normalized, factor - 0.00000005);
normalized = normalized - factor * packed_v.bit5.g;
factor = 0.5 * factor;
packed_v.bit4.g =
when_gt(normalized, factor - 0.00000005);
normalized = normalized - factor * packed_v.bit4.g;
factor = 0.5 * factor;
packed_v.bit3.g =
when_gt(normalized, factor - 0.00000005);
normalized = normalized - factor * packed_v.bit3.g;
factor = 0.5 * factor;
packed_v.bit2.g =
when_gt(normalized, factor - 0.00000005);
normalized = normalized - factor * packed_v.bit2.g;
factor = 0.5 * factor;
packed_v.bit1.g =
when_gt(normalized, factor - 0.00000005);
normalized = normalized - factor * packed_v.bit1.g;
factor = 0.5 * factor;
packed_v.bit0.g =
when_gt(normalized, factor - 0.00000005);
normalized = normalized - factor * packed_v.bit0.g;
factor = 0.5 * factor;
packed_v.bit7.r =
when_gt(normalized, factor - 0.00000005);
normalized = normalized - factor * packed_v.bit7.r;
factor = 0.5 * factor;
packed_v.bit6.r =
when_gt(normalized, factor - 0.00000005);
normalized = normalized - factor * packed_v.bit6.r;
factor = 0.5 * factor;
packed_v.bit5.r =
when_gt(normalized, factor - 0.00000005);
normalized = normalized - factor * packed_v.bit5.r;
factor = 0.5 * factor;
packed_v.bit4.r =
when_gt(normalized, factor - 0.00000005);
normalized = normalized - factor * packed_v.bit4.r;
factor = 0.5 * factor;
packed_v.bit3.r =
when_gt(normalized, factor - 0.00000005);
normalized = normalized - factor * packed_v.bit3.r;
factor = 0.5 * factor;
packed_v.bit2.r =
when_gt(normalized, factor - 0.00000005);
normalized = normalized - factor * packed_v.bit2.r;
factor = 0.5 * factor;
packed_v.bit1.r =
when_gt(normalized, factor - 0.00000005);
normalized = normalized - factor * packed_v.bit1.r;
factor = 0.5 * factor;
// LSB bit 0
packed_v.bit0.r =
when_gt(normalized, factor - 0.00000005);
vec4 result = pack_4_bytes(packed_v);
return result;
}
我一直在寻找一种在 WebGL 纹理上存储浮点数的方法。我在互联网上找到 some solutions,但那些只处理 [0..1) 范围内的浮点数。我希望能够存储任意浮点数,为此,需要扩展这样的函数以存储指数(比如在第一个字节上)。不过,我不太了解它们是如何工作的,因此如何操作并不明显。简而言之:
将一个浮点数打包成4个字节的高效算法是什么?
一个简单的方法是首先就您支持的浮动范围达成一致,并在打包前将其重新映射到 [0...1) 范围。
const MIN = -100;
const MAX = 100;
function packRemap(v){
return (v - MIN) / (MAX - MIN);
}
function unpackRemap(p){
return MIN + p * (MAX - MIN);
}
嗯,float
是一个32位的数字(23 bits for mantissa + 1 bit implicitly, 8 bits for exponent and 1 bit for sign),RGBA8纹理的纹素也是32位的。因此我们只需要一个编码方案,它可以用 JS(或任何其他语言)打包并用 GLSL 解包(给定限制 os GLSL ES 1.0,例如缺少按位操作)。这是我的建议(在 C++ 中):
#include <cstdint>
#include <iostream>
#include <cmath>
// for storing result of encoding
struct rgba {
uint8_t r, g, b, a;
};
rgba float2rgba(float x) {
union {
float xc;
uint32_t xi;
};
// let's "convert" our float number to uint32_t so we can mess with it's bits
xc = x;
// in v we'll pack sign bit and mantissa, that would be exactly 24 bits
int_least32_t v =
// sign bit
(xi >> 31 & 1) |
// mantissa
((xi & 0x7fffff) << 1);
rgba r;
// then we just split into bytes and store them in RGB channels
r.r = v / 0x10000;
r.g = (v % 0x10000) / 0x100;
r.b = v % 0x100;
// and we'll put the exponent to alpha channel
r.a = xi >> 23 & 0xff;
return r;
}
float rgba2float(rgba r) {
// let's "rebuild" mantissa and sign bit first
uint32_t v = (r.b / 2) + r.g * 0x80 + r.r * 0x8000;
return
// let's apply sign (it's in least significant bit of v)
(r.b % 2 ? -1.f : 1.f) *
// and reconstruct the number itself
(1.f + v * pow(2.f, -23.f)) * pow(2.f, static_cast<unsigned>(r.a) - 127);
}
int main() {
const float a = -1.34320e32f;
rgba r = float2rgba(a);
std::cout <<
a << '\n' <<
static_cast<unsigned>(r.r) << ',' <<
static_cast<unsigned>(r.g) << ',' <<
static_cast<unsigned>(r.b) << ',' <<
static_cast<unsigned>(r.a) << '\n' <<
rgba2float(r) << std::endl;
}
输出:
-1.3432e+32
167,214,213,233
-1.3432e+32
由于我找不到任何可以解决我的问题的东西,所以我组装了这个解决方案:
function fract(x){
return x - Math.floor(x);
};
function packFloat(x) {
var s = x > 0 ? 1 : -1;
var e = Math.floor(Math.log2(s*x));
var m = s*x/Math.pow(2, e);
return [
Math.floor(fract((m-1)*256*256)*256),
Math.floor(fract((m-1)*256)*256),
Math.floor(fract((m-1)*1)*256),
((e+63) + (x>0?128:0))];
}
function unpackFloat(v){
var s = v[3] >= 128 ? 1 : -1;
var e = v[3] - (v[3] >= 128 ? 128 : 0) - 63;
var m = 1 + v[0]/256/256/256 + v[1]/256/256 + v[2]/256;
return s * Math.pow(2, e) * m;
};
for (var i=0; i<10; ++i){
var num = (Math.random()*2.0-1.0)*1000;
console.log(num, packFloat(num), unpackFloat(packFloat(num)));
}
它将一个浮点数来回转换为 4 个字节。与其他解决方案相反,它不限于一个小的或预定义的范围,并且能够表示形状 s * m * 2^e
上的任何数字,其中 s = -1 or 1
、m = 1 til 2
(与24 位精度)和 e = -63 to 64
。将它移植到 GLSL 是微不足道的,因为它只使用常见的浮点运算。
我不确定我是否理解这个问题但是。
为什么不直接使用浮点纹理?
var ext = gl.getExtension("OES_texture_float");
if (!ext) {
// sorry no floating point support)
}
至于将数据放入纹理中,您只需使用Float32Array
。
var data = new Float32Array([0.123456, Math.sqrt(2), ...]);
gl.texImage2D(gl.TARGET_2D, 0, gl.RGBA, width, height, 0,
gl.RGBA, gl.FLOAT, data);
大多数硬件都支持从浮点纹理读取。较少支持渲染到浮点纹理。参见
我还要指出,您可以从 JavaScript
中的浮点数中获取字节var arrayOf10Floats = new Float32Array(10);
var arrayOf40bytes = new Uint8Array(arrayOf10Floats.buffer);
这两个数组共享相同的内存。它们都是基础 ArrayBuffer
.
ArrayBufferView
速度不快,但可行。 (请注意,GLSL 1.00 浮点文字在编译器中存在转换错误)。
struct Bitset8Bits {
mediump vec4 bit0;
mediump vec4 bit1;
mediump vec4 bit2;
mediump vec4 bit3;
mediump vec4 bit4;
mediump vec4 bit5;
mediump vec4 bit6;
mediump vec4 bit7;
};
vec4 when_gt (vec4 l, vec4 r) {
return max(sign(l - r), 0.0);
}
Bitset8Bits unpack_4_bytes (lowp vec4 byte) {
Bitset8Bits result;
result.bit7 = when_gt(byte, vec4(127.5));
vec4 bits0to6 = byte - 128.0 * result.bit7;
result.bit6 = when_gt(bits0to6, vec4(63.5));
vec4 bits0to5 = bits0to6 - 64.0 * result.bit6;
result.bit5 = when_gt(bits0to5, vec4(31.5));
vec4 bits0to4 = bits0to5 - 32.0 * result.bit5;
result.bit4 = when_gt(bits0to4, vec4(15.5));
vec4 bits0to3 = bits0to4 - 16.0 * result.bit4;
result.bit3 = when_gt(bits0to3, vec4(7.5));
vec4 bits0to2 = bits0to3 - 8.0 * result.bit3;
result.bit2 = when_gt(bits0to2, vec4(3.5));
vec4 bits0to1 = bits0to2 - 4.0 * result.bit2;
result.bit1 = when_gt(bits0to1, vec4(1.5));
vec4 bit0 = bits0to1 - 2.0 * result.bit1;
result.bit0 = when_gt(bit0, vec4(0.5));
return result;
}
float when_gt (float l, float r) {
return max(sign(l - r), 0.0);
}
vec4 pack_4_bytes (Bitset8Bits state) {
vec4 data;
data = state.bit0
+ 2.0 * state.bit1
+ 4.0 * state.bit2
+ 8.0 * state.bit3
+ 16.0 * state.bit4
+ 32.0 * state.bit5
+ 64.0 * state.bit6
+ 128.0 * state.bit7;
return data;
}
vec4 brians_float_pack (
float original_value) {
// Remove the sign
float pos_value = abs(original_value);
float exp_real = floor(log2(pos_value));
float multiplier = pow(2.0, exp_real);
float normalized = pos_value / multiplier - 1.0;
float exp_v = exp_real + 127.0;
// if exp_v == -Inf -> 0
// if exp_v == +Inf -> 255
// if exp_v < -126.0 -> denormalized (remove the "1")
// otherwise + 127.0;
Bitset8Bits packed_v;
packed_v.bit7.a =
step(sign(original_value) - 1.0, -1.5); // pos
// Exponent 8 bits
packed_v.bit6.a = when_gt(exp_v, 127.5);
float bits0to6 = exp_v - 128.0 * packed_v.bit6.a;
packed_v.bit5.a = when_gt(bits0to6, 63.5);
float bits0to5 = bits0to6 - 64.0 * packed_v.bit5.a;
packed_v.bit4.a = when_gt(bits0to5, 31.5);
float bits0to4 = bits0to5 - 32.0 * packed_v.bit4.a;
packed_v.bit3.a = when_gt(bits0to4, 15.5);
float bits0to3 = bits0to4 - 16.0 * packed_v.bit3.a;
packed_v.bit2.a = when_gt(bits0to3, 7.5);
float bits0to2 = bits0to3 - 8.0 * packed_v.bit2.a;
packed_v.bit1.a = when_gt(bits0to2, 3.5);
float bits0to1 = bits0to2 - 4.0 * packed_v.bit1.a;
packed_v.bit0.a = when_gt(bits0to1, 1.5);
float bit0 = bits0to1 - 2.0 * packed_v.bit0.a;
packed_v.bit7.b = when_gt(bit0, 0.5);
// Significand 23 bits
float factor = 0.5;
// 0.4999999
// Significand MSB bit 22:
packed_v.bit6.b =
when_gt(normalized, factor - 0.00000005);
normalized = normalized - factor * packed_v.bit6.b;
factor = 0.5 * factor;
packed_v.bit5.b =
when_gt(normalized, factor - 0.00000005);
normalized = normalized - factor * packed_v.bit5.b;
factor = 0.5 * factor;
packed_v.bit4.b =
when_gt(normalized, factor - 0.00000005);
normalized = normalized - factor * packed_v.bit4.b;
factor = 0.5 * factor;
packed_v.bit3.b =
when_gt(normalized, factor - 0.00000005);
normalized = normalized - factor * packed_v.bit3.b;
factor = 0.5 * factor;
packed_v.bit2.b =
when_gt(normalized, factor - 0.00000005);
normalized = normalized - factor * packed_v.bit2.b;
factor = 0.5 * factor;
packed_v.bit1.b =
when_gt(normalized, factor - 0.00000005);
normalized = normalized - factor * packed_v.bit1.b;
factor = 0.5 * factor;
packed_v.bit0.b =
when_gt(normalized, factor - 0.00000005);
normalized = normalized - factor * packed_v.bit0.b;
factor = 0.5 * factor;
packed_v.bit7.g =
when_gt(normalized, factor - 0.00000005);
normalized = normalized - factor * packed_v.bit7.g;
factor = 0.5 * factor;
packed_v.bit6.g =
when_gt(normalized, factor - 0.00000005);
normalized = normalized - factor * packed_v.bit6.g;
factor = 0.5 * factor;
packed_v.bit5.g =
when_gt(normalized, factor - 0.00000005);
normalized = normalized - factor * packed_v.bit5.g;
factor = 0.5 * factor;
packed_v.bit4.g =
when_gt(normalized, factor - 0.00000005);
normalized = normalized - factor * packed_v.bit4.g;
factor = 0.5 * factor;
packed_v.bit3.g =
when_gt(normalized, factor - 0.00000005);
normalized = normalized - factor * packed_v.bit3.g;
factor = 0.5 * factor;
packed_v.bit2.g =
when_gt(normalized, factor - 0.00000005);
normalized = normalized - factor * packed_v.bit2.g;
factor = 0.5 * factor;
packed_v.bit1.g =
when_gt(normalized, factor - 0.00000005);
normalized = normalized - factor * packed_v.bit1.g;
factor = 0.5 * factor;
packed_v.bit0.g =
when_gt(normalized, factor - 0.00000005);
normalized = normalized - factor * packed_v.bit0.g;
factor = 0.5 * factor;
packed_v.bit7.r =
when_gt(normalized, factor - 0.00000005);
normalized = normalized - factor * packed_v.bit7.r;
factor = 0.5 * factor;
packed_v.bit6.r =
when_gt(normalized, factor - 0.00000005);
normalized = normalized - factor * packed_v.bit6.r;
factor = 0.5 * factor;
packed_v.bit5.r =
when_gt(normalized, factor - 0.00000005);
normalized = normalized - factor * packed_v.bit5.r;
factor = 0.5 * factor;
packed_v.bit4.r =
when_gt(normalized, factor - 0.00000005);
normalized = normalized - factor * packed_v.bit4.r;
factor = 0.5 * factor;
packed_v.bit3.r =
when_gt(normalized, factor - 0.00000005);
normalized = normalized - factor * packed_v.bit3.r;
factor = 0.5 * factor;
packed_v.bit2.r =
when_gt(normalized, factor - 0.00000005);
normalized = normalized - factor * packed_v.bit2.r;
factor = 0.5 * factor;
packed_v.bit1.r =
when_gt(normalized, factor - 0.00000005);
normalized = normalized - factor * packed_v.bit1.r;
factor = 0.5 * factor;
// LSB bit 0
packed_v.bit0.r =
when_gt(normalized, factor - 0.00000005);
vec4 result = pack_4_bytes(packed_v);
return result;
}