-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdivide.h
130 lines (113 loc) · 4.23 KB
/
divide.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
//Copyright 2014 The FSC Authors. All Rights Reserved.
//
//Licensed under the Apache License, Version 2.0 (the "License");
//you may not use this file except in compliance with the License.
//You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//------------------------------------------------------------------------------
//
// Everything needed to implement divide-by-multiply
//
// You need to define RECIPROCAL_BITS before including this header, with value
// being either 32, 16 or 0 (=use float)
// This header will define the following:
// * inv_t type (to store the data needed for the reciprocal calculation)
// * void FSCInitDivide(ANSProba p, inv_t* div):
// to be called to initialize the data to compute 1/p
// * ANSStateW FSCDivide(ANSStateW x, inv_t div)
// to perform x/p (corner case: returns 'x' for p==0).
//
// A bit of theory from this paper (mentioned by Ryg):
// Alverson 1991: "Integer Division Using Reciprocals"
// http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.33.1710
//
// If x and p are unsigned words represented by FIX bits (say: FIX=32 e.g.),
// then you can *exactly* compute x/p (floor-rounded) using:
// k = ceil(log2(p)) <- number of bits needed to represent 'p'
// s = FIX + k
// a = ceil((1 << s) / y) = ((1 << s) - 1) / p + 1
// x/p = (a * x) >> s = ((a * x) >> FIX) >> k
// So, one need to store 'a' using FIX + 1 bits, and to be able to perform
// the multiply 'a * x' with (FIX + 1) x FIX bits operands.
// Fortunately we can decompose the multiply in two steps:
// b = (a * x) >> FIX (usually easier with specialized instruction),
// followed by 'b >> k'.
// Also, 'a' always have a leading '1' at bit-position FIX. We don't need to
// store it and just store a - (1 << FIX), which fits in 'FIX' bits exactly.
// Then the final multiply can be rewritten as:
// b = (a * x) >> FIX // <- FIX*FIX multiply, keeping the upper FIX bits
// x/p = (b + x) >> k
// By using this trick, one can use the full word range for probabilities 'p'.
// Note that the last 'b+x' addition must take care of potential overflow by
// one bit. Beware!
#include "./fsc_utils.h"
#ifndef HAVE_CEIL_LOG2
#define HAVE_CEIL_LOG2
static inline int CeilLog2(uint64_t p) {
int s = 0;
while (p > (1ull << s)) ++s;
return s;
}
#endif // HAVE_CEIL_LOG2
#if (RECIPROCAL_BITS == 32)
typedef struct {
uint64_t mult_;
int shift_;
} inv_t;
#define DIV_FIX 64
static inline void FSCInitDivide(ANSProba p, inv_t* div) {
if (p > 0) {
const int s = CeilLog2(p);
assert(s <= PROBA_BITS);
const uint64_t base = 1ull << (s + DIV_FIX - 32);
const uint64_t hi = (base / p) << 32; // remove leading '1' bit
const uint64_t lo = (((base % p) << 32) + p - 1) / p;
div->mult_ = hi | lo;
div->shift_ = s;
} else {
div->mult_ = 0;
div->shift_ = 0;
}
}
static inline ANSStateW FSCDivide(ANSStateW x, inv_t div) {
const ANSStateW tmp = ((unsigned __int128)x * div.mult_) >> DIV_FIX;
return ((unsigned __int128)tmp + x) >> div.shift_;
}
#undef DIV_FIX
#elif (RECIPROCAL_BITS == 16)
typedef struct {
uint32_t mult_;
int shift_;
} inv_t;
#define DIV_FIX 32
static inline void FSCInitDivide(ANSProba p, inv_t* div) {
if (p > 0) {
const int s = CeilLog2(p);
assert(s <= PROBA_BITS);
const uint64_t base = (1ull << (DIV_FIX + s)) - 1;
div->mult_ = (base / p + 1) & 0xfffffffful; // remove leading '1' bit
div->shift_ = s;
} else {
div->mult_ = 0;
div->shift_ = 0;
}
}
static inline ANSStateW FSCDivide(ANSStateW x, inv_t div) {
const uint32_t tmp = ((uint64_t)x * div.mult_) >> DIV_FIX;
return ((uint64_t)tmp + x) >> div.shift_; // (should be add-shift-with-carry-overflow)
}
#undef DIV_FIX
#elif (RECIPROCAL_BITS == 0)
typedef ANSProba inv_t;
static inline void FSCInitDivide(ANSProba p, inv_t* div) { *div = p; }
static inline ANSStateW FSCDivide(ANSStateW x, inv_t div) {
return x / div;
}
#endif