summaryrefslogtreecommitdiffstats
path: root/mpeglib/lib/util/render/dither/colorTableHighBit.cpp
blob: 76038a5ce0756fc5465eb77d1c5b5236fda8324f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
/*
  colorTables for 16,32 Bit depth
  Copyright (C) 2000  Martin Vogt

  This program is free software; you can redistribute it and/or modify
  it under the terms of the GNU Library General Public License as published by
  the Free Software Foundation.

  For more information look at the file COPYRIGHT in this package

 */



#include "colorTableHighBit.h"

//#define INTERPOLATE


/*
 * Erik Corry's multi-byte dither routines.
 *
 * The basic idea is that the Init generates all the necessary tables.
 * The tables incorporate the information about the tqlayout of pixels
 * in the XImage, so that it should be able to cope with 15-bit, 16-bit
 * 24-bit (non-packed) and 32-bit (10-11 bits per color!) screens.
 * At present it cannot cope with 24-bit packed mode, since this involves
 * getting down to byte level again. It is assumed that the bits for each
 * color are contiguous in the longword.
 * 
 * Writing to memory is done in shorts or ints. (Unfortunately, short is not
 * very fast on Alpha, so there is room for improvement here). There is no
 * dither time check for overflow - instead the tables have slack at
 * each end. This is likely to be faster than an 'if' test as many modern
 * architectures are really bad at ifs. Potentially, each '&&' causes a 
 * pipeline flush!
 *
 * There is no shifting and fixed point arithmetic, as I really doubt you
 * can see the difference, and it costs. This may be just my bias, since I
 * heard that Intel is really bad at shifting.
 */


/*
 * How many 1 bits are there in the PIXVALword.
 * Low performance, do not call often.
 */
static int number_of_bits_set(unsigned PIXVAL a) {
    if(!a) return 0;
    if(a & 1) return 1 + number_of_bits_set(a >> 1);
    return(number_of_bits_set(a >> 1));
}



/*
 * How many 0 bits are there at most significant end of PIXVALword.
 * Low performance, do not call often.
 */
static int free_bits_at_top(unsigned PIXVAL a) {
      /* assume char is 8 bits */
    if(!a) return sizeof(unsigned PIXVAL) * 8;
        /* assume twos complement */
    if(((PIXVAL)a) < 0l) return 0;
    return 1 + free_bits_at_top ( a << 1);
}

/*
 * How many 0 bits are there at least significant end of PIXVALword.
 * Low performance, do not call often.
 */
static int free_bits_at_bottom(unsigned PIXVAL a) {
      /* assume char is 8 bits */
    if(!a) return sizeof(unsigned PIXVAL) * 8;
    if(((PIXVAL)a) & 1l) return 0;
    return 1 + free_bits_at_bottom ( a >> 1);
}



ColorTableHighBit::ColorTableHighBit(int bpp,unsigned int redMask,
				     unsigned int greenMask,
				     unsigned int blueMask) {
  this->bpp=bpp;
  this->redMask=redMask;
  this->greenMask=greenMask;
  this->blueMask=blueMask;

  colortab = new TABTYPE[5*256];
  
  Cr_r_tab = &colortab[0*256];
  Cr_g_tab = &colortab[1*256];
  Cb_g_tab = &colortab[2*256];
  Cb_b_tab = &colortab[3*256];
  L_tab    = &colortab[4*256];
  
  rgb_2_pix = new PIXVAL [3*768];

  r_2_pix_alloc = &rgb_2_pix[0*768];
  g_2_pix_alloc = &rgb_2_pix[1*768];
  b_2_pix_alloc = &rgb_2_pix[2*768];
  
  initHighColor(bpp>=24,redMask,greenMask,blueMask);
  
}


ColorTableHighBit::~ColorTableHighBit() {
  delete colortab;
  delete rgb_2_pix;
}

/*
 *--------------------------------------------------------------
 *
 * InitColor16Dither --
 *
 *	To get rid of the multiply and other conversions in color
 *	dither, we use a lookup table.
 *
 * Results:
 *	None.
 *
 * Side effects:
 *	The lookup tables are initialized.
 *
 *--------------------------------------------------------------
 */

void ColorTableHighBit::initHighColor(int thirty2,unsigned int redMask,
				      unsigned int greenMask,
				      unsigned int blueMask) {
  
  unsigned PIXVAL red_mask = redMask;
  unsigned PIXVAL green_mask =greenMask;
  unsigned PIXVAL blue_mask = blueMask;

  int CR, CB, i;
    

  for (i=0; i<256; i++) {
    L_tab[i] = i;
    if (gammaCorrectFlag) {
      L_tab[i] = (TABTYPE)GAMMA_CORRECTION(i);
    }
    
    CB = CR = i;
    
    if (chromaCorrectFlag) {
      CB -= 128; 
      CB = CHROMA_CORRECTION128(CB);
      CR -= 128;
      CR = CHROMA_CORRECTION128(CR);
    } else {
      CB -= 128; CR -= 128;
    }
/* was
      Cr_r_tab[i] =  1.596 * CR;
      Cr_g_tab[i] = -0.813 * CR;
      Cb_g_tab[i] = -0.391 * CB;   
      Cb_b_tab[i] =  2.018 * CB;
  but they were just messed up.
  Then was (_Video Deymstified_):
      Cr_r_tab[i] =  1.366 * CR;
      Cr_g_tab[i] = -0.700 * CR;
      Cb_g_tab[i] = -0.334 * CB;   
      Cb_b_tab[i] =  1.732 * CB;
  but really should be:
   (from ITU-R BT.470-2 System B, G and SMPTE 170M )
*/
      Cr_r_tab[i] = (TABTYPE) ( (0.419/0.299) * CR  );
      Cr_g_tab[i] = (TABTYPE) ( -(0.299/0.419) * CR );
      Cb_g_tab[i] = (TABTYPE) ( -(0.114/0.331) * CB ); 
      Cb_b_tab[i] = (TABTYPE) (  (0.587/0.331) * CB );

/*
  though you could argue for:
    SMPTE 240M
      Cr_r_tab[i] =  (0.445/0.212) * CR;
      Cr_g_tab[i] = -(0.212/0.445) * CR;
      Cb_g_tab[i] = -(0.087/0.384) * CB; 
      Cb_b_tab[i] =  (0.701/0.384) * CB;
    FCC 
      Cr_r_tab[i] =  (0.421/0.30) * CR;
      Cr_g_tab[i] = -(0.30/0.421) * CR;
      Cb_g_tab[i] = -(0.11/0.331) * CB; 
      Cb_b_tab[i] =  (0.59/0.331) * CB;
    ITU-R BT.709 
      Cr_r_tab[i] =  (0.454/0.2125) * CR;
      Cr_g_tab[i] = -(0.2125/0.454) * CR;
      Cb_g_tab[i] = -(0.0721/0.386) * CB; 
      Cb_b_tab[i] =  (0.7154/0.386) * CB;
*/
    }

    /* 
     * Set up entries 0-255 in rgb-to-pixel value tables.
     */
    for (i = 0; i < 256; i++) {
      r_2_pix_alloc[i + 256] = i >> (8 - number_of_bits_set(red_mask));
      r_2_pix_alloc[i + 256] <<= free_bits_at_bottom(red_mask);
      g_2_pix_alloc[i + 256] = i >> (8 - number_of_bits_set(green_mask));
      g_2_pix_alloc[i + 256] <<= free_bits_at_bottom(green_mask);
      b_2_pix_alloc[i + 256] = i >> (8 - number_of_bits_set(blue_mask));
      b_2_pix_alloc[i + 256] <<= free_bits_at_bottom(blue_mask);
      /*
       * If we have 16-bit output depth, then we double the value
       * in the top word. This means that we can write out both
       * pixels in the pixel doubling mode with one op. It is 
       * harmless in the normal case as storing a 32-bit value
       * through a short pointer will lose the top bits anyway.
       * A similar optimisation for Alpha for 64 bit has been
       * prepared for, but is not yet implemented.
       */
      if(!thirty2) {
	r_2_pix_alloc[i + 256] |= (r_2_pix_alloc[i + 256]) << 16;
	g_2_pix_alloc[i + 256] |= (g_2_pix_alloc[i + 256]) << 16;
	b_2_pix_alloc[i + 256] |= (b_2_pix_alloc[i + 256]) << 16;

      }
#ifdef SIXTYFOUR_BIT
      if(thirty2) {

	r_2_pix_alloc[i + 256] |= (r_2_pix_alloc[i + 256]) << 32;
	g_2_pix_alloc[i + 256] |= (g_2_pix_alloc[i + 256]) << 32;
	b_2_pix_alloc[i + 256] |= (b_2_pix_alloc[i + 256]) << 32;

      }
#endif
    }

    /*
     * Spread out the values we have to the rest of the array so that
     * we do not need to check for overflow.
     */
    for (i = 0; i < 256; i++) {
      r_2_pix_alloc[i] = r_2_pix_alloc[256];
      r_2_pix_alloc[i+ 512] = r_2_pix_alloc[511];
      g_2_pix_alloc[i] = g_2_pix_alloc[256];
      g_2_pix_alloc[i+ 512] = g_2_pix_alloc[511];
      b_2_pix_alloc[i] = b_2_pix_alloc[256];
      b_2_pix_alloc[i+ 512] = b_2_pix_alloc[511];
    }

    r_2_pix = r_2_pix_alloc + 256;
    g_2_pix = g_2_pix_alloc + 256;
    b_2_pix = b_2_pix_alloc + 256;
}