learning-notes icon indicating copy to clipboard operation
learning-notes copied to clipboard

计算机中浮点数的阶码为什么用”移码“表示?

Open daa233 opened this issue 4 years ago • 2 comments

浮点数的表示为: |+/-|----exp----|------------frac--------------| 分为符号位、阶码(exp)、尾数(frac)三部分,其中阶码部分采用了移码(即符号位也取反的补码)

float_num = +/- 1.frac * 2^exp

例如,7.0 = 1.75 * 2^2,对应的二进制码为:01000000111000000000000000000000

daa233 avatar Mar 21 '20 08:03 daa233

阶码(exp)部分使用移码主要是为了保证“所有位为0时表示0”。

参考:

  1. 浮点数的阶码为什么用移码表示
  2. Floating point number representation

daa233 avatar Mar 21 '20 08:03 daa233

下面的代码可以辅助理解:

// C
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

// modified from https://jameshfisher.com/2017/02/23/printing-bits/
char* get_bits(unsigned char * bytes, size_t num_bytes) {
    char *bits_str = (char *) malloc(num_bytes*(8+1));
    if (!bits_str) return NULL;
    memset(bits_str, ' ', num_bytes*(8+1)); // here ' ' is used for sep
    for (size_t i = 0; i < num_bytes; i++) {
        // display the little endian bytes as big endian
        char byte = bytes[num_bytes-i-1];
        for (int j = 7, k = 0; j >= 0; j--, k++) {
            char ch = byte & (1 << j) ? '1' : '0';
            bits_str[i*9+k] = ch;
        }
    }
    bits_str[num_bytes*9-1] = '\0';
    return bits_str;
}

#define GET_BITS(T, V) ({T x = V; get_bits((unsigned char*) &x, sizeof(x));})


int main()
{
    int a = 37;
    // NOTE: the float points are represented as
    // |+/-|----exp----|------------frac--------------|
    // +/- 1.frac * 2^exp
    float f = *(float *)&a; // interpret the memory of int as float
    char *bs = GET_BITS(int, a);
    printf("int %d (%s) -> float %e\n", a, bs, f);   // int 37 -> float 5.184804e-44
    free(bs);

    f = 7.34;
    short s = *(short *)&f; // interpret the memory of float as short
    char *bs_f = GET_BITS(float, f);
    char *bs_s = GET_BITS(short, s);
    printf("float %e (%s) -> short %d (%s)\n", f, bs_f, s, bs_s);
    free(bs_f);
    free(bs_s);

    return 0;
}

// Output:
// int 37 (00000000 00000000 00000000 00100101) -> float 5.184804e-44
// float 7.340000e+00 (01000000 11101010 11100001 01001000) -> short -7864 (11100001 01001000)
// C++
#include <iostream>
#include <bitset>

using std::cout;
using std::endl;
using std::bitset;

int main()
{
    int a = 37;
    bitset<32> x(a);
    cout << "int " << a << " (" << x << ")" << endl;
    // NOTE: the float points are represented as
    // |+/-|----exp----|------------frac--------------|
    // +/- 1.frac * 2^exp
    float f = *(float *)&a; // interpret the memory of int as float
    cout << "int " << a << " -> float " << f << endl;      // 5.1848e-44

    f = 7.34;
    x = *(int *)&f;
    cout << "float " << f << " (" << x << ")" << endl;

    short s = *(short *)&f; // interpret the memory of float as short
    bitset<16> y(s);
    cout << "float " << f << " -> short " << s << " (" << y << ")" << endl;

    return 0;
}

// Output:
// int 37 (00000000000000000000000000100101)
// int 37 -> float 5.1848e-44
// float 7.34 (01000000111010101110000101001000)
// float 7.34 -> short -7864 (1110000101001000)

daa233 avatar Mar 21 '20 17:03 daa233