drm: add initial r300 3D support.
[safe/jmp/linux-2.6] / drivers / char / drm / radeon_state.c
1 /* radeon_state.c -- State support for Radeon -*- linux-c -*-
2  *
3  * Copyright 2000 VA Linux Systems, Inc., Fremont, California.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice (including the next
14  * paragraph) shall be included in all copies or substantial portions of the
15  * Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20  * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
21  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
22  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23  * DEALINGS IN THE SOFTWARE.
24  *
25  * Authors:
26  *    Gareth Hughes <gareth@valinux.com>
27  *    Kevin E. Martin <martin@valinux.com>
28  */
29
30 #include "drmP.h"
31 #include "drm.h"
32 #include "drm_sarea.h"
33 #include "radeon_drm.h"
34 #include "radeon_drv.h"
35
36 /* ================================================================
37  * Helper functions for client state checking and fixup
38  */
39
40 static __inline__ int radeon_check_and_fixup_offset( drm_radeon_private_t *dev_priv,
41                                                      drm_file_t *filp_priv,
42                                                      u32 *offset ) {
43         u32 off = *offset;
44         struct drm_radeon_driver_file_fields *radeon_priv;
45
46         if ( off >= dev_priv->fb_location &&
47              off < ( dev_priv->gart_vm_start + dev_priv->gart_size ) )
48                 return 0;
49
50         radeon_priv = filp_priv->driver_priv;
51         off += radeon_priv->radeon_fb_delta;
52
53         DRM_DEBUG( "offset fixed up to 0x%x\n", off );
54
55         if ( off < dev_priv->fb_location ||
56              off >= ( dev_priv->gart_vm_start + dev_priv->gart_size ) )
57                 return DRM_ERR( EINVAL );
58
59         *offset = off;
60
61         return 0;
62 }
63
64 static __inline__ int radeon_check_and_fixup_packets( drm_radeon_private_t *dev_priv,
65                                                       drm_file_t *filp_priv,
66                                                       int id,
67                                                       u32 __user *data ) {
68         switch ( id ) {
69
70         case RADEON_EMIT_PP_MISC:
71                 if ( radeon_check_and_fixup_offset( dev_priv, filp_priv,
72                                                     &data[( RADEON_RB3D_DEPTHOFFSET
73                                                             - RADEON_PP_MISC ) / 4] ) ) {
74                         DRM_ERROR( "Invalid depth buffer offset\n" );
75                         return DRM_ERR( EINVAL );
76                 }
77                 break;
78
79         case RADEON_EMIT_PP_CNTL:
80                 if ( radeon_check_and_fixup_offset( dev_priv, filp_priv,
81                                                     &data[( RADEON_RB3D_COLOROFFSET
82                                                             - RADEON_PP_CNTL ) / 4] ) ) {
83                         DRM_ERROR( "Invalid colour buffer offset\n" );
84                         return DRM_ERR( EINVAL );
85                 }
86                 break;
87
88         case R200_EMIT_PP_TXOFFSET_0:
89         case R200_EMIT_PP_TXOFFSET_1:
90         case R200_EMIT_PP_TXOFFSET_2:
91         case R200_EMIT_PP_TXOFFSET_3:
92         case R200_EMIT_PP_TXOFFSET_4:
93         case R200_EMIT_PP_TXOFFSET_5:
94                 if ( radeon_check_and_fixup_offset( dev_priv, filp_priv,
95                                                     &data[0] ) ) {
96                         DRM_ERROR( "Invalid R200 texture offset\n" );
97                         return DRM_ERR( EINVAL );
98                 }
99                 break;
100
101         case RADEON_EMIT_PP_TXFILTER_0:
102         case RADEON_EMIT_PP_TXFILTER_1:
103         case RADEON_EMIT_PP_TXFILTER_2:
104                 if ( radeon_check_and_fixup_offset( dev_priv, filp_priv,
105                                                     &data[( RADEON_PP_TXOFFSET_0
106                                                             - RADEON_PP_TXFILTER_0 ) / 4] ) ) {
107                         DRM_ERROR( "Invalid R100 texture offset\n" );
108                         return DRM_ERR( EINVAL );
109                 }
110                 break;
111
112         case R200_EMIT_PP_CUBIC_OFFSETS_0:
113         case R200_EMIT_PP_CUBIC_OFFSETS_1:
114         case R200_EMIT_PP_CUBIC_OFFSETS_2:
115         case R200_EMIT_PP_CUBIC_OFFSETS_3:
116         case R200_EMIT_PP_CUBIC_OFFSETS_4:
117         case R200_EMIT_PP_CUBIC_OFFSETS_5: {
118                 int i;
119                 for ( i = 0; i < 5; i++ ) {
120                         if ( radeon_check_and_fixup_offset( dev_priv, filp_priv,
121                                                             &data[i] ) ) {
122                                 DRM_ERROR( "Invalid R200 cubic texture offset\n" );
123                                 return DRM_ERR( EINVAL );
124                         }
125                 }
126                 break;
127         }
128
129         case RADEON_EMIT_PP_CUBIC_OFFSETS_T0:
130         case RADEON_EMIT_PP_CUBIC_OFFSETS_T1:
131         case RADEON_EMIT_PP_CUBIC_OFFSETS_T2:{
132                         int i;
133                         for (i = 0; i < 5; i++) {
134                                 if (radeon_check_and_fixup_offset(dev_priv,
135                                                                   filp_priv,
136                                                                   &data[i])) {
137                                         DRM_ERROR
138                                             ("Invalid R100 cubic texture offset\n");
139                                         return DRM_ERR(EINVAL);
140                                 }
141                         }
142                 }
143                 break;
144
145         case RADEON_EMIT_RB3D_COLORPITCH:
146         case RADEON_EMIT_RE_LINE_PATTERN:
147         case RADEON_EMIT_SE_LINE_WIDTH:
148         case RADEON_EMIT_PP_LUM_MATRIX:
149         case RADEON_EMIT_PP_ROT_MATRIX_0:
150         case RADEON_EMIT_RB3D_STENCILREFMASK:
151         case RADEON_EMIT_SE_VPORT_XSCALE:
152         case RADEON_EMIT_SE_CNTL:
153         case RADEON_EMIT_SE_CNTL_STATUS:
154         case RADEON_EMIT_RE_MISC:
155         case RADEON_EMIT_PP_BORDER_COLOR_0:
156         case RADEON_EMIT_PP_BORDER_COLOR_1:
157         case RADEON_EMIT_PP_BORDER_COLOR_2:
158         case RADEON_EMIT_SE_ZBIAS_FACTOR:
159         case RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT:
160         case RADEON_EMIT_SE_TCL_MATERIAL_EMMISSIVE_RED:
161         case R200_EMIT_PP_TXCBLEND_0:
162         case R200_EMIT_PP_TXCBLEND_1:
163         case R200_EMIT_PP_TXCBLEND_2:
164         case R200_EMIT_PP_TXCBLEND_3:
165         case R200_EMIT_PP_TXCBLEND_4:
166         case R200_EMIT_PP_TXCBLEND_5:
167         case R200_EMIT_PP_TXCBLEND_6:
168         case R200_EMIT_PP_TXCBLEND_7:
169         case R200_EMIT_TCL_LIGHT_MODEL_CTL_0:
170         case R200_EMIT_TFACTOR_0:
171         case R200_EMIT_VTX_FMT_0:
172         case R200_EMIT_VAP_CTL:
173         case R200_EMIT_MATRIX_SELECT_0:
174         case R200_EMIT_TEX_PROC_CTL_2:
175         case R200_EMIT_TCL_UCP_VERT_BLEND_CTL:
176         case R200_EMIT_PP_TXFILTER_0:
177         case R200_EMIT_PP_TXFILTER_1:
178         case R200_EMIT_PP_TXFILTER_2:
179         case R200_EMIT_PP_TXFILTER_3:
180         case R200_EMIT_PP_TXFILTER_4:
181         case R200_EMIT_PP_TXFILTER_5:
182         case R200_EMIT_VTE_CNTL:
183         case R200_EMIT_OUTPUT_VTX_COMP_SEL:
184         case R200_EMIT_PP_TAM_DEBUG3:
185         case R200_EMIT_PP_CNTL_X:
186         case R200_EMIT_RB3D_DEPTHXY_OFFSET:
187         case R200_EMIT_RE_AUX_SCISSOR_CNTL:
188         case R200_EMIT_RE_SCISSOR_TL_0:
189         case R200_EMIT_RE_SCISSOR_TL_1:
190         case R200_EMIT_RE_SCISSOR_TL_2:
191         case R200_EMIT_SE_VAP_CNTL_STATUS:
192         case R200_EMIT_SE_VTX_STATE_CNTL:
193         case R200_EMIT_RE_POINTSIZE:
194         case R200_EMIT_TCL_INPUT_VTX_VECTOR_ADDR_0:
195         case R200_EMIT_PP_CUBIC_FACES_0:
196         case R200_EMIT_PP_CUBIC_FACES_1:
197         case R200_EMIT_PP_CUBIC_FACES_2:
198         case R200_EMIT_PP_CUBIC_FACES_3:
199         case R200_EMIT_PP_CUBIC_FACES_4:
200         case R200_EMIT_PP_CUBIC_FACES_5:
201         case RADEON_EMIT_PP_TEX_SIZE_0:
202         case RADEON_EMIT_PP_TEX_SIZE_1:
203         case RADEON_EMIT_PP_TEX_SIZE_2:
204         case R200_EMIT_RB3D_BLENDCOLOR:
205         case R200_EMIT_TCL_POINT_SPRITE_CNTL:
206         case RADEON_EMIT_PP_CUBIC_FACES_0:
207         case RADEON_EMIT_PP_CUBIC_FACES_1:
208         case RADEON_EMIT_PP_CUBIC_FACES_2:
209         case R200_EMIT_PP_TRI_PERF_CNTL:
210                 /* These packets don't contain memory offsets */
211                 break;
212
213         default:
214                 DRM_ERROR( "Unknown state packet ID %d\n", id );
215                 return DRM_ERR( EINVAL );
216         }
217
218         return 0;
219 }
220
221 static __inline__ int radeon_check_and_fixup_packet3( drm_radeon_private_t *dev_priv,
222                                                       drm_file_t *filp_priv,
223                                                       drm_radeon_cmd_buffer_t *cmdbuf,
224                                                       unsigned int *cmdsz ) {
225         u32 *cmd = (u32 *) cmdbuf->buf;
226
227         *cmdsz = 2 + ( ( cmd[0] & RADEON_CP_PACKET_COUNT_MASK ) >> 16 );
228
229         if ( ( cmd[0] & 0xc0000000 ) != RADEON_CP_PACKET3 ) {
230                 DRM_ERROR( "Not a type 3 packet\n" );
231                 return DRM_ERR( EINVAL );
232         }
233
234         if ( 4 * *cmdsz > cmdbuf->bufsz ) {
235                 DRM_ERROR( "Packet size larger than size of data provided\n" );
236                 return DRM_ERR( EINVAL );
237         }
238
239         /* Check client state and fix it up if necessary */
240         if ( cmd[0] & 0x8000 ) { /* MSB of opcode: next DWORD GUI_CNTL */
241                 u32 offset;
242
243                 if ( cmd[1] & ( RADEON_GMC_SRC_PITCH_OFFSET_CNTL
244                               | RADEON_GMC_DST_PITCH_OFFSET_CNTL ) ) {
245                         offset = cmd[2] << 10;
246                         if ( radeon_check_and_fixup_offset( dev_priv, filp_priv, &offset ) ) {
247                                 DRM_ERROR( "Invalid first packet offset\n" );
248                                 return DRM_ERR( EINVAL );
249                         }
250                         cmd[2] = ( cmd[2] & 0xffc00000 ) | offset >> 10;
251                 }
252
253                 if ( ( cmd[1] & RADEON_GMC_SRC_PITCH_OFFSET_CNTL ) &&
254                      ( cmd[1] & RADEON_GMC_DST_PITCH_OFFSET_CNTL ) ) {
255                         offset = cmd[3] << 10;
256                         if ( radeon_check_and_fixup_offset( dev_priv, filp_priv, &offset ) ) {
257                                 DRM_ERROR( "Invalid second packet offset\n" );
258                                 return DRM_ERR( EINVAL );
259                         }
260                         cmd[3] = ( cmd[3] & 0xffc00000 ) | offset >> 10;
261                 }
262         }
263
264         return 0;
265 }
266
267
268 /* ================================================================
269  * CP hardware state programming functions
270  */
271
272 static __inline__ void radeon_emit_clip_rect( drm_radeon_private_t *dev_priv,
273                                           drm_clip_rect_t *box )
274 {
275         RING_LOCALS;
276
277         DRM_DEBUG( "   box:  x1=%d y1=%d  x2=%d y2=%d\n",
278                    box->x1, box->y1, box->x2, box->y2 );
279
280         BEGIN_RING( 4 );
281         OUT_RING( CP_PACKET0( RADEON_RE_TOP_LEFT, 0 ) );
282         OUT_RING( (box->y1 << 16) | box->x1 );
283         OUT_RING( CP_PACKET0( RADEON_RE_WIDTH_HEIGHT, 0 ) );
284         OUT_RING( ((box->y2 - 1) << 16) | (box->x2 - 1) );
285         ADVANCE_RING();
286 }
287
288 /* Emit 1.1 state
289  */
290 static int radeon_emit_state( drm_radeon_private_t *dev_priv,
291                               drm_file_t *filp_priv,
292                               drm_radeon_context_regs_t *ctx,
293                               drm_radeon_texture_regs_t *tex,
294                               unsigned int dirty )
295 {
296         RING_LOCALS;
297         DRM_DEBUG( "dirty=0x%08x\n", dirty );
298
299         if ( dirty & RADEON_UPLOAD_CONTEXT ) {
300                 if ( radeon_check_and_fixup_offset( dev_priv, filp_priv,
301                                                     &ctx->rb3d_depthoffset ) ) {
302                         DRM_ERROR( "Invalid depth buffer offset\n" );
303                         return DRM_ERR( EINVAL );
304                 }
305
306                 if ( radeon_check_and_fixup_offset( dev_priv, filp_priv,
307                                                     &ctx->rb3d_coloroffset ) ) {
308                         DRM_ERROR( "Invalid depth buffer offset\n" );
309                         return DRM_ERR( EINVAL );
310                 }
311
312                 BEGIN_RING( 14 );
313                 OUT_RING( CP_PACKET0( RADEON_PP_MISC, 6 ) );
314                 OUT_RING( ctx->pp_misc );
315                 OUT_RING( ctx->pp_fog_color );
316                 OUT_RING( ctx->re_solid_color );
317                 OUT_RING( ctx->rb3d_blendcntl );
318                 OUT_RING( ctx->rb3d_depthoffset );
319                 OUT_RING( ctx->rb3d_depthpitch );
320                 OUT_RING( ctx->rb3d_zstencilcntl );
321                 OUT_RING( CP_PACKET0( RADEON_PP_CNTL, 2 ) );
322                 OUT_RING( ctx->pp_cntl );
323                 OUT_RING( ctx->rb3d_cntl );
324                 OUT_RING( ctx->rb3d_coloroffset );
325                 OUT_RING( CP_PACKET0( RADEON_RB3D_COLORPITCH, 0 ) );
326                 OUT_RING( ctx->rb3d_colorpitch );
327                 ADVANCE_RING();
328         }
329
330         if ( dirty & RADEON_UPLOAD_VERTFMT ) {
331                 BEGIN_RING( 2 );
332                 OUT_RING( CP_PACKET0( RADEON_SE_COORD_FMT, 0 ) );
333                 OUT_RING( ctx->se_coord_fmt );
334                 ADVANCE_RING();
335         }
336
337         if ( dirty & RADEON_UPLOAD_LINE ) {
338                 BEGIN_RING( 5 );
339                 OUT_RING( CP_PACKET0( RADEON_RE_LINE_PATTERN, 1 ) );
340                 OUT_RING( ctx->re_line_pattern );
341                 OUT_RING( ctx->re_line_state );
342                 OUT_RING( CP_PACKET0( RADEON_SE_LINE_WIDTH, 0 ) );
343                 OUT_RING( ctx->se_line_width );
344                 ADVANCE_RING();
345         }
346
347         if ( dirty & RADEON_UPLOAD_BUMPMAP ) {
348                 BEGIN_RING( 5 );
349                 OUT_RING( CP_PACKET0( RADEON_PP_LUM_MATRIX, 0 ) );
350                 OUT_RING( ctx->pp_lum_matrix );
351                 OUT_RING( CP_PACKET0( RADEON_PP_ROT_MATRIX_0, 1 ) );
352                 OUT_RING( ctx->pp_rot_matrix_0 );
353                 OUT_RING( ctx->pp_rot_matrix_1 );
354                 ADVANCE_RING();
355         }
356
357         if ( dirty & RADEON_UPLOAD_MASKS ) {
358                 BEGIN_RING( 4 );
359                 OUT_RING( CP_PACKET0( RADEON_RB3D_STENCILREFMASK, 2 ) );
360                 OUT_RING( ctx->rb3d_stencilrefmask );
361                 OUT_RING( ctx->rb3d_ropcntl );
362                 OUT_RING( ctx->rb3d_planemask );
363                 ADVANCE_RING();
364         }
365
366         if ( dirty & RADEON_UPLOAD_VIEWPORT ) {
367                 BEGIN_RING( 7 );
368                 OUT_RING( CP_PACKET0( RADEON_SE_VPORT_XSCALE, 5 ) );
369                 OUT_RING( ctx->se_vport_xscale );
370                 OUT_RING( ctx->se_vport_xoffset );
371                 OUT_RING( ctx->se_vport_yscale );
372                 OUT_RING( ctx->se_vport_yoffset );
373                 OUT_RING( ctx->se_vport_zscale );
374                 OUT_RING( ctx->se_vport_zoffset );
375                 ADVANCE_RING();
376         }
377
378         if ( dirty & RADEON_UPLOAD_SETUP ) {
379                 BEGIN_RING( 4 );
380                 OUT_RING( CP_PACKET0( RADEON_SE_CNTL, 0 ) );
381                 OUT_RING( ctx->se_cntl );
382                 OUT_RING( CP_PACKET0( RADEON_SE_CNTL_STATUS, 0 ) );
383                 OUT_RING( ctx->se_cntl_status );
384                 ADVANCE_RING();
385         }
386
387         if ( dirty & RADEON_UPLOAD_MISC ) {
388                 BEGIN_RING( 2 );
389                 OUT_RING( CP_PACKET0( RADEON_RE_MISC, 0 ) );
390                 OUT_RING( ctx->re_misc );
391                 ADVANCE_RING();
392         }
393
394         if ( dirty & RADEON_UPLOAD_TEX0 ) {
395                 if ( radeon_check_and_fixup_offset( dev_priv, filp_priv,
396                                                     &tex[0].pp_txoffset ) ) {
397                         DRM_ERROR( "Invalid texture offset for unit 0\n" );
398                         return DRM_ERR( EINVAL );
399                 }
400
401                 BEGIN_RING( 9 );
402                 OUT_RING( CP_PACKET0( RADEON_PP_TXFILTER_0, 5 ) );
403                 OUT_RING( tex[0].pp_txfilter );
404                 OUT_RING( tex[0].pp_txformat );
405                 OUT_RING( tex[0].pp_txoffset );
406                 OUT_RING( tex[0].pp_txcblend );
407                 OUT_RING( tex[0].pp_txablend );
408                 OUT_RING( tex[0].pp_tfactor );
409                 OUT_RING( CP_PACKET0( RADEON_PP_BORDER_COLOR_0, 0 ) );
410                 OUT_RING( tex[0].pp_border_color );
411                 ADVANCE_RING();
412         }
413
414         if ( dirty & RADEON_UPLOAD_TEX1 ) {
415                 if ( radeon_check_and_fixup_offset( dev_priv, filp_priv,
416                                                     &tex[1].pp_txoffset ) ) {
417                         DRM_ERROR( "Invalid texture offset for unit 1\n" );
418                         return DRM_ERR( EINVAL );
419                 }
420
421                 BEGIN_RING( 9 );
422                 OUT_RING( CP_PACKET0( RADEON_PP_TXFILTER_1, 5 ) );
423                 OUT_RING( tex[1].pp_txfilter );
424                 OUT_RING( tex[1].pp_txformat );
425                 OUT_RING( tex[1].pp_txoffset );
426                 OUT_RING( tex[1].pp_txcblend );
427                 OUT_RING( tex[1].pp_txablend );
428                 OUT_RING( tex[1].pp_tfactor );
429                 OUT_RING( CP_PACKET0( RADEON_PP_BORDER_COLOR_1, 0 ) );
430                 OUT_RING( tex[1].pp_border_color );
431                 ADVANCE_RING();
432         }
433
434         if ( dirty & RADEON_UPLOAD_TEX2 ) {
435                 if ( radeon_check_and_fixup_offset( dev_priv, filp_priv,
436                                                     &tex[2].pp_txoffset ) ) {
437                         DRM_ERROR( "Invalid texture offset for unit 2\n" );
438                         return DRM_ERR( EINVAL );
439                 }
440
441                 BEGIN_RING( 9 );
442                 OUT_RING( CP_PACKET0( RADEON_PP_TXFILTER_2, 5 ) );
443                 OUT_RING( tex[2].pp_txfilter );
444                 OUT_RING( tex[2].pp_txformat );
445                 OUT_RING( tex[2].pp_txoffset );
446                 OUT_RING( tex[2].pp_txcblend );
447                 OUT_RING( tex[2].pp_txablend );
448                 OUT_RING( tex[2].pp_tfactor );
449                 OUT_RING( CP_PACKET0( RADEON_PP_BORDER_COLOR_2, 0 ) );
450                 OUT_RING( tex[2].pp_border_color );
451                 ADVANCE_RING();
452         }
453
454         return 0;
455 }
456
457 /* Emit 1.2 state
458  */
459 static int radeon_emit_state2( drm_radeon_private_t *dev_priv,
460                                drm_file_t *filp_priv,
461                                drm_radeon_state_t *state )
462 {
463         RING_LOCALS;
464
465         if (state->dirty & RADEON_UPLOAD_ZBIAS) {
466                 BEGIN_RING( 3 );
467                 OUT_RING( CP_PACKET0( RADEON_SE_ZBIAS_FACTOR, 1 ) );
468                 OUT_RING( state->context2.se_zbias_factor ); 
469                 OUT_RING( state->context2.se_zbias_constant ); 
470                 ADVANCE_RING();
471         }
472
473         return radeon_emit_state( dev_priv, filp_priv, &state->context,
474                            state->tex, state->dirty );
475 }
476
477 /* New (1.3) state mechanism.  3 commands (packet, scalar, vector) in
478  * 1.3 cmdbuffers allow all previous state to be updated as well as
479  * the tcl scalar and vector areas.  
480  */
481 static struct { 
482         int start; 
483         int len; 
484         const char *name;
485 } packet[RADEON_MAX_STATE_PACKETS] = {
486         { RADEON_PP_MISC,7,"RADEON_PP_MISC" },
487         { RADEON_PP_CNTL,3,"RADEON_PP_CNTL" },
488         { RADEON_RB3D_COLORPITCH,1,"RADEON_RB3D_COLORPITCH" },
489         { RADEON_RE_LINE_PATTERN,2,"RADEON_RE_LINE_PATTERN" },
490         { RADEON_SE_LINE_WIDTH,1,"RADEON_SE_LINE_WIDTH" },
491         { RADEON_PP_LUM_MATRIX,1,"RADEON_PP_LUM_MATRIX" },
492         { RADEON_PP_ROT_MATRIX_0,2,"RADEON_PP_ROT_MATRIX_0" },
493         { RADEON_RB3D_STENCILREFMASK,3,"RADEON_RB3D_STENCILREFMASK" },
494         { RADEON_SE_VPORT_XSCALE,6,"RADEON_SE_VPORT_XSCALE" },
495         { RADEON_SE_CNTL,2,"RADEON_SE_CNTL" },
496         { RADEON_SE_CNTL_STATUS,1,"RADEON_SE_CNTL_STATUS" },
497         { RADEON_RE_MISC,1,"RADEON_RE_MISC" },
498         { RADEON_PP_TXFILTER_0,6,"RADEON_PP_TXFILTER_0" },
499         { RADEON_PP_BORDER_COLOR_0,1,"RADEON_PP_BORDER_COLOR_0" },
500         { RADEON_PP_TXFILTER_1,6,"RADEON_PP_TXFILTER_1" },
501         { RADEON_PP_BORDER_COLOR_1,1,"RADEON_PP_BORDER_COLOR_1" },
502         { RADEON_PP_TXFILTER_2,6,"RADEON_PP_TXFILTER_2" },
503         { RADEON_PP_BORDER_COLOR_2,1,"RADEON_PP_BORDER_COLOR_2" },
504         { RADEON_SE_ZBIAS_FACTOR,2,"RADEON_SE_ZBIAS_FACTOR" },
505         { RADEON_SE_TCL_OUTPUT_VTX_FMT,11,"RADEON_SE_TCL_OUTPUT_VTX_FMT" },
506         { RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED,17,"RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED" },
507         { R200_PP_TXCBLEND_0, 4, "R200_PP_TXCBLEND_0" },
508         { R200_PP_TXCBLEND_1, 4, "R200_PP_TXCBLEND_1" },
509         { R200_PP_TXCBLEND_2, 4, "R200_PP_TXCBLEND_2" },
510         { R200_PP_TXCBLEND_3, 4, "R200_PP_TXCBLEND_3" },
511         { R200_PP_TXCBLEND_4, 4, "R200_PP_TXCBLEND_4" },
512         { R200_PP_TXCBLEND_5, 4, "R200_PP_TXCBLEND_5" },
513         { R200_PP_TXCBLEND_6, 4, "R200_PP_TXCBLEND_6" },
514         { R200_PP_TXCBLEND_7, 4, "R200_PP_TXCBLEND_7" },
515         { R200_SE_TCL_LIGHT_MODEL_CTL_0, 6, "R200_SE_TCL_LIGHT_MODEL_CTL_0" },
516         { R200_PP_TFACTOR_0, 6, "R200_PP_TFACTOR_0" },
517         { R200_SE_VTX_FMT_0, 4, "R200_SE_VTX_FMT_0" },
518         { R200_SE_VAP_CNTL, 1, "R200_SE_VAP_CNTL" },
519         { R200_SE_TCL_MATRIX_SEL_0, 5, "R200_SE_TCL_MATRIX_SEL_0" },
520         { R200_SE_TCL_TEX_PROC_CTL_2, 5, "R200_SE_TCL_TEX_PROC_CTL_2" },
521         { R200_SE_TCL_UCP_VERT_BLEND_CTL, 1, "R200_SE_TCL_UCP_VERT_BLEND_CTL" },
522         { R200_PP_TXFILTER_0, 6, "R200_PP_TXFILTER_0" },
523         { R200_PP_TXFILTER_1, 6, "R200_PP_TXFILTER_1" },
524         { R200_PP_TXFILTER_2, 6, "R200_PP_TXFILTER_2" },
525         { R200_PP_TXFILTER_3, 6, "R200_PP_TXFILTER_3" },
526         { R200_PP_TXFILTER_4, 6, "R200_PP_TXFILTER_4" },
527         { R200_PP_TXFILTER_5, 6, "R200_PP_TXFILTER_5" },
528         { R200_PP_TXOFFSET_0, 1, "R200_PP_TXOFFSET_0" },
529         { R200_PP_TXOFFSET_1, 1, "R200_PP_TXOFFSET_1" },
530         { R200_PP_TXOFFSET_2, 1, "R200_PP_TXOFFSET_2" },
531         { R200_PP_TXOFFSET_3, 1, "R200_PP_TXOFFSET_3" },
532         { R200_PP_TXOFFSET_4, 1, "R200_PP_TXOFFSET_4" },
533         { R200_PP_TXOFFSET_5, 1, "R200_PP_TXOFFSET_5" },
534         { R200_SE_VTE_CNTL, 1, "R200_SE_VTE_CNTL" },
535         { R200_SE_TCL_OUTPUT_VTX_COMP_SEL, 1, "R200_SE_TCL_OUTPUT_VTX_COMP_SEL" },
536         { R200_PP_TAM_DEBUG3, 1, "R200_PP_TAM_DEBUG3" },
537         { R200_PP_CNTL_X, 1, "R200_PP_CNTL_X" }, 
538         { R200_RB3D_DEPTHXY_OFFSET, 1, "R200_RB3D_DEPTHXY_OFFSET" }, 
539         { R200_RE_AUX_SCISSOR_CNTL, 1, "R200_RE_AUX_SCISSOR_CNTL" }, 
540         { R200_RE_SCISSOR_TL_0, 2, "R200_RE_SCISSOR_TL_0" }, 
541         { R200_RE_SCISSOR_TL_1, 2, "R200_RE_SCISSOR_TL_1" }, 
542         { R200_RE_SCISSOR_TL_2, 2, "R200_RE_SCISSOR_TL_2" }, 
543         { R200_SE_VAP_CNTL_STATUS, 1, "R200_SE_VAP_CNTL_STATUS" }, 
544         { R200_SE_VTX_STATE_CNTL, 1, "R200_SE_VTX_STATE_CNTL" }, 
545         { R200_RE_POINTSIZE, 1, "R200_RE_POINTSIZE" }, 
546         { R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0, 4, "R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0" },
547         { R200_PP_CUBIC_FACES_0, 1, "R200_PP_CUBIC_FACES_0" }, /* 61 */
548         { R200_PP_CUBIC_OFFSET_F1_0, 5, "R200_PP_CUBIC_OFFSET_F1_0" }, /* 62 */
549         { R200_PP_CUBIC_FACES_1, 1, "R200_PP_CUBIC_FACES_1" },
550         { R200_PP_CUBIC_OFFSET_F1_1, 5, "R200_PP_CUBIC_OFFSET_F1_1" },
551         { R200_PP_CUBIC_FACES_2, 1, "R200_PP_CUBIC_FACES_2" },
552         { R200_PP_CUBIC_OFFSET_F1_2, 5, "R200_PP_CUBIC_OFFSET_F1_2" },
553         { R200_PP_CUBIC_FACES_3, 1, "R200_PP_CUBIC_FACES_3" },
554         { R200_PP_CUBIC_OFFSET_F1_3, 5, "R200_PP_CUBIC_OFFSET_F1_3" },
555         { R200_PP_CUBIC_FACES_4, 1, "R200_PP_CUBIC_FACES_4" },
556         { R200_PP_CUBIC_OFFSET_F1_4, 5, "R200_PP_CUBIC_OFFSET_F1_4" },
557         { R200_PP_CUBIC_FACES_5, 1, "R200_PP_CUBIC_FACES_5" },
558         { R200_PP_CUBIC_OFFSET_F1_5, 5, "R200_PP_CUBIC_OFFSET_F1_5" },
559         { RADEON_PP_TEX_SIZE_0, 2, "RADEON_PP_TEX_SIZE_0" },
560         { RADEON_PP_TEX_SIZE_1, 2, "RADEON_PP_TEX_SIZE_1" },
561         { RADEON_PP_TEX_SIZE_2, 2, "RADEON_PP_TEX_SIZE_2" },
562         { R200_RB3D_BLENDCOLOR, 3, "R200_RB3D_BLENDCOLOR" },
563         { R200_SE_TCL_POINT_SPRITE_CNTL, 1, "R200_SE_TCL_POINT_SPRITE_CNTL" },
564         { RADEON_PP_CUBIC_FACES_0, 1, "RADEON_PP_CUBIC_FACES_0"},
565         { RADEON_PP_CUBIC_OFFSET_T0_0, 5, "RADEON_PP_CUBIC_OFFSET_T0_0"},
566         { RADEON_PP_CUBIC_FACES_1, 1, "RADEON_PP_CUBIC_FACES_1"},
567         { RADEON_PP_CUBIC_OFFSET_T1_0, 5, "RADEON_PP_CUBIC_OFFSET_T1_0"},
568         { RADEON_PP_CUBIC_FACES_2, 1, "RADEON_PP_CUBIC_FACES_2"},
569         { RADEON_PP_CUBIC_OFFSET_T2_0, 5, "RADEON_PP_CUBIC_OFFSET_T2_0"},
570         { R200_PP_TRI_PERF, 2, "R200_PP_TRI_PERF"},
571 };
572
573
574
575 /* ================================================================
576  * Performance monitoring functions
577  */
578
579 static void radeon_clear_box( drm_radeon_private_t *dev_priv,
580                               int x, int y, int w, int h,
581                               int r, int g, int b )
582 {
583         u32 color;
584         RING_LOCALS;
585
586         x += dev_priv->sarea_priv->boxes[0].x1;
587         y += dev_priv->sarea_priv->boxes[0].y1;
588
589         switch ( dev_priv->color_fmt ) {
590         case RADEON_COLOR_FORMAT_RGB565:
591                 color = (((r & 0xf8) << 8) |
592                          ((g & 0xfc) << 3) |
593                          ((b & 0xf8) >> 3));
594                 break;
595         case RADEON_COLOR_FORMAT_ARGB8888:
596         default:
597                 color = (((0xff) << 24) | (r << 16) | (g <<  8) | b);
598                 break;
599         }
600
601         BEGIN_RING( 4 );
602         RADEON_WAIT_UNTIL_3D_IDLE();            
603         OUT_RING( CP_PACKET0( RADEON_DP_WRITE_MASK, 0 ) );
604         OUT_RING( 0xffffffff );
605         ADVANCE_RING();
606
607         BEGIN_RING( 6 );
608
609         OUT_RING( CP_PACKET3( RADEON_CNTL_PAINT_MULTI, 4 ) );
610         OUT_RING( RADEON_GMC_DST_PITCH_OFFSET_CNTL |
611                   RADEON_GMC_BRUSH_SOLID_COLOR |
612                   (dev_priv->color_fmt << 8) |
613                   RADEON_GMC_SRC_DATATYPE_COLOR |
614                   RADEON_ROP3_P |
615                   RADEON_GMC_CLR_CMP_CNTL_DIS );
616
617         if ( dev_priv->page_flipping && dev_priv->current_page == 1 ) { 
618                 OUT_RING( dev_priv->front_pitch_offset );
619         } else {         
620                 OUT_RING( dev_priv->back_pitch_offset );
621         } 
622
623         OUT_RING( color );
624
625         OUT_RING( (x << 16) | y );
626         OUT_RING( (w << 16) | h );
627
628         ADVANCE_RING();
629 }
630
631 static void radeon_cp_performance_boxes( drm_radeon_private_t *dev_priv )
632 {
633         /* Collapse various things into a wait flag -- trying to
634          * guess if userspase slept -- better just to have them tell us.
635          */
636         if (dev_priv->stats.last_frame_reads > 1 ||
637             dev_priv->stats.last_clear_reads > dev_priv->stats.clears) {
638                 dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
639         }
640
641         if (dev_priv->stats.freelist_loops) {
642                 dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
643         }
644
645         /* Purple box for page flipping
646          */
647         if ( dev_priv->stats.boxes & RADEON_BOX_FLIP ) 
648                 radeon_clear_box( dev_priv, 4, 4, 8, 8, 255, 0, 255 );
649
650         /* Red box if we have to wait for idle at any point
651          */
652         if ( dev_priv->stats.boxes & RADEON_BOX_WAIT_IDLE ) 
653                 radeon_clear_box( dev_priv, 16, 4, 8, 8, 255, 0, 0 );
654
655         /* Blue box: lost context?
656          */
657
658         /* Yellow box for texture swaps
659          */
660         if ( dev_priv->stats.boxes & RADEON_BOX_TEXTURE_LOAD ) 
661                 radeon_clear_box( dev_priv, 40, 4, 8, 8, 255, 255, 0 );
662
663         /* Green box if hardware never idles (as far as we can tell)
664          */
665         if ( !(dev_priv->stats.boxes & RADEON_BOX_DMA_IDLE) ) 
666                 radeon_clear_box( dev_priv, 64, 4, 8, 8, 0, 255, 0 );
667
668
669         /* Draw bars indicating number of buffers allocated 
670          * (not a great measure, easily confused)
671          */
672         if (dev_priv->stats.requested_bufs) {
673                 if (dev_priv->stats.requested_bufs > 100)
674                         dev_priv->stats.requested_bufs = 100;
675
676                 radeon_clear_box( dev_priv, 4, 16,  
677                                   dev_priv->stats.requested_bufs, 4,
678                                   196, 128, 128 );
679         }
680
681         memset( &dev_priv->stats, 0, sizeof(dev_priv->stats) );
682
683 }
684 /* ================================================================
685  * CP command dispatch functions
686  */
687
688 static void radeon_cp_dispatch_clear( drm_device_t *dev,
689                                       drm_radeon_clear_t *clear,
690                                       drm_radeon_clear_rect_t *depth_boxes )
691 {
692         drm_radeon_private_t *dev_priv = dev->dev_private;
693         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
694         drm_radeon_depth_clear_t *depth_clear = &dev_priv->depth_clear;
695         int nbox = sarea_priv->nbox;
696         drm_clip_rect_t *pbox = sarea_priv->boxes;
697         unsigned int flags = clear->flags;
698         u32 rb3d_cntl = 0, rb3d_stencilrefmask= 0;
699         int i;
700         RING_LOCALS;
701         DRM_DEBUG( "flags = 0x%x\n", flags );
702
703         dev_priv->stats.clears++;
704
705         if ( dev_priv->page_flipping && dev_priv->current_page == 1 ) {
706                 unsigned int tmp = flags;
707
708                 flags &= ~(RADEON_FRONT | RADEON_BACK);
709                 if ( tmp & RADEON_FRONT ) flags |= RADEON_BACK;
710                 if ( tmp & RADEON_BACK )  flags |= RADEON_FRONT;
711         }
712
713         if ( flags & (RADEON_FRONT | RADEON_BACK) ) {
714
715                 BEGIN_RING( 4 );
716
717                 /* Ensure the 3D stream is idle before doing a
718                  * 2D fill to clear the front or back buffer.
719                  */
720                 RADEON_WAIT_UNTIL_3D_IDLE();
721                 
722                 OUT_RING( CP_PACKET0( RADEON_DP_WRITE_MASK, 0 ) );
723                 OUT_RING( clear->color_mask );
724
725                 ADVANCE_RING();
726
727                 /* Make sure we restore the 3D state next time.
728                  */
729                 dev_priv->sarea_priv->ctx_owner = 0;
730
731                 for ( i = 0 ; i < nbox ; i++ ) {
732                         int x = pbox[i].x1;
733                         int y = pbox[i].y1;
734                         int w = pbox[i].x2 - x;
735                         int h = pbox[i].y2 - y;
736
737                         DRM_DEBUG( "dispatch clear %d,%d-%d,%d flags 0x%x\n",
738                                    x, y, w, h, flags );
739
740                         if ( flags & RADEON_FRONT ) {
741                                 BEGIN_RING( 6 );
742                                 
743                                 OUT_RING( CP_PACKET3( RADEON_CNTL_PAINT_MULTI, 4 ) );
744                                 OUT_RING( RADEON_GMC_DST_PITCH_OFFSET_CNTL |
745                                           RADEON_GMC_BRUSH_SOLID_COLOR |
746                                           (dev_priv->color_fmt << 8) |
747                                           RADEON_GMC_SRC_DATATYPE_COLOR |
748                                           RADEON_ROP3_P |
749                                           RADEON_GMC_CLR_CMP_CNTL_DIS );
750
751                                 OUT_RING( dev_priv->front_pitch_offset );
752                                 OUT_RING( clear->clear_color );
753                                 
754                                 OUT_RING( (x << 16) | y );
755                                 OUT_RING( (w << 16) | h );
756                                 
757                                 ADVANCE_RING();
758                         }
759                         
760                         if ( flags & RADEON_BACK ) {
761                                 BEGIN_RING( 6 );
762                                 
763                                 OUT_RING( CP_PACKET3( RADEON_CNTL_PAINT_MULTI, 4 ) );
764                                 OUT_RING( RADEON_GMC_DST_PITCH_OFFSET_CNTL |
765                                           RADEON_GMC_BRUSH_SOLID_COLOR |
766                                           (dev_priv->color_fmt << 8) |
767                                           RADEON_GMC_SRC_DATATYPE_COLOR |
768                                           RADEON_ROP3_P |
769                                           RADEON_GMC_CLR_CMP_CNTL_DIS );
770                                 
771                                 OUT_RING( dev_priv->back_pitch_offset );
772                                 OUT_RING( clear->clear_color );
773
774                                 OUT_RING( (x << 16) | y );
775                                 OUT_RING( (w << 16) | h );
776
777                                 ADVANCE_RING();
778                         }
779                 }
780         }
781         
782         /* hyper z clear */
783         /* no docs available, based on reverse engeneering by Stephane Marchesin */
784         if ((flags & (RADEON_DEPTH | RADEON_STENCIL)) && (flags & RADEON_CLEAR_FASTZ)) {
785
786                 int i;
787                 int depthpixperline = dev_priv->depth_fmt==RADEON_DEPTH_FORMAT_16BIT_INT_Z? 
788                         (dev_priv->depth_pitch / 2): (dev_priv->depth_pitch / 4);
789                 
790                 u32 clearmask;
791
792                 u32 tempRB3D_DEPTHCLEARVALUE = clear->clear_depth |
793                         ((clear->depth_mask & 0xff) << 24);
794         
795                 
796                 /* Make sure we restore the 3D state next time.
797                  * we haven't touched any "normal" state - still need this?
798                  */
799                 dev_priv->sarea_priv->ctx_owner = 0;
800
801                 if ((dev_priv->flags & CHIP_HAS_HIERZ) && (flags & RADEON_USE_HIERZ)) {
802                 /* FIXME : reverse engineer that for Rx00 cards */
803                 /* FIXME : the mask supposedly contains low-res z values. So can't set
804                    just to the max (0xff? or actually 0x3fff?), need to take z clear
805                    value into account? */
806                 /* pattern seems to work for r100, though get slight
807                    rendering errors with glxgears. If hierz is not enabled for r100,
808                    only 4 bits which indicate clear (15,16,31,32, all zero) matter, the
809                    other ones are ignored, and the same clear mask can be used. That's
810                    very different behaviour than R200 which needs different clear mask
811                    and different number of tiles to clear if hierz is enabled or not !?!
812                 */
813                         clearmask = (0xff<<22)|(0xff<<6)| 0x003f003f;
814                 }
815                 else {
816                 /* clear mask : chooses the clearing pattern.
817                    rv250: could be used to clear only parts of macrotiles
818                    (but that would get really complicated...)?
819                    bit 0 and 1 (either or both of them ?!?!) are used to
820                    not clear tile (or maybe one of the bits indicates if the tile is
821                    compressed or not), bit 2 and 3 to not clear tile 1,...,.
822                    Pattern is as follows:
823                         | 0,1 | 4,5 | 8,9 |12,13|16,17|20,21|24,25|28,29|
824                    bits -------------------------------------------------
825                         | 2,3 | 6,7 |10,11|14,15|18,19|22,23|26,27|30,31|
826                    rv100: clearmask covers 2x8 4x1 tiles, but one clear still
827                    covers 256 pixels ?!?
828                 */
829                         clearmask = 0x0;
830                 }
831
832                 BEGIN_RING( 8 );
833                 RADEON_WAIT_UNTIL_2D_IDLE();
834                 OUT_RING_REG( RADEON_RB3D_DEPTHCLEARVALUE,
835                         tempRB3D_DEPTHCLEARVALUE);
836                 /* what offset is this exactly ? */
837                 OUT_RING_REG( RADEON_RB3D_ZMASKOFFSET, 0 );
838                 /* need ctlstat, otherwise get some strange black flickering */
839                 OUT_RING_REG( RADEON_RB3D_ZCACHE_CTLSTAT, RADEON_RB3D_ZC_FLUSH_ALL );
840                 ADVANCE_RING();
841
842                 for (i = 0; i < nbox; i++) {
843                         int tileoffset, nrtilesx, nrtilesy, j;
844                         /* it looks like r200 needs rv-style clears, at least if hierz is not enabled? */
845                         if ((dev_priv->flags&CHIP_HAS_HIERZ) && !(dev_priv->microcode_version==UCODE_R200)) {
846                                 /* FIXME : figure this out for r200 (when hierz is enabled). Or
847                                    maybe r200 actually doesn't need to put the low-res z value into
848                                    the tile cache like r100, but just needs to clear the hi-level z-buffer?
849                                    Works for R100, both with hierz and without.
850                                    R100 seems to operate on 2x1 8x8 tiles, but...
851                                    odd: offset/nrtiles need to be 64 pix (4 block) aligned? Potentially
852                                    problematic with resolutions which are not 64 pix aligned? */
853                                 tileoffset = ((pbox[i].y1 >> 3) * depthpixperline + pbox[i].x1) >> 6;
854                                 nrtilesx = ((pbox[i].x2 & ~63) - (pbox[i].x1 & ~63)) >> 4;
855                                 nrtilesy = (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
856                                 for (j = 0; j <= nrtilesy; j++) {
857                                         BEGIN_RING( 4 );
858                                         OUT_RING( CP_PACKET3( RADEON_3D_CLEAR_ZMASK, 2 ) );
859                                         /* first tile */
860                                         OUT_RING( tileoffset * 8 );
861                                         /* the number of tiles to clear */
862                                         OUT_RING( nrtilesx + 4 );
863                                         /* clear mask : chooses the clearing pattern. */
864                                         OUT_RING( clearmask );
865                                         ADVANCE_RING();
866                                         tileoffset += depthpixperline >> 6;
867                                 }
868                         }
869                         else if (dev_priv->microcode_version==UCODE_R200) {
870                                 /* works for rv250. */
871                                 /* find first macro tile (8x2 4x4 z-pixels on rv250) */
872                                 tileoffset = ((pbox[i].y1 >> 3) * depthpixperline + pbox[i].x1) >> 5;
873                                 nrtilesx = (pbox[i].x2 >> 5) - (pbox[i].x1 >> 5);
874                                 nrtilesy = (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
875                                 for (j = 0; j <= nrtilesy; j++) {
876                                         BEGIN_RING( 4 );
877                                         OUT_RING( CP_PACKET3( RADEON_3D_CLEAR_ZMASK, 2 ) );
878                                         /* first tile */
879                                         /* judging by the first tile offset needed, could possibly
880                                            directly address/clear 4x4 tiles instead of 8x2 * 4x4
881                                            macro tiles, though would still need clear mask for
882                                            right/bottom if truely 4x4 granularity is desired ? */
883                                         OUT_RING( tileoffset * 16 );
884                                         /* the number of tiles to clear */
885                                         OUT_RING( nrtilesx + 1 );
886                                         /* clear mask : chooses the clearing pattern. */
887                                         OUT_RING( clearmask );
888                                         ADVANCE_RING();
889                                         tileoffset += depthpixperline >> 5;
890                                 }
891                         }
892                         else { /* rv 100 */
893                                 /* rv100 might not need 64 pix alignment, who knows */
894                                 /* offsets are, hmm, weird */
895                                 tileoffset = ((pbox[i].y1 >> 4) * depthpixperline + pbox[i].x1) >> 6;
896                                 nrtilesx = ((pbox[i].x2 & ~63) - (pbox[i].x1 & ~63)) >> 4;
897                                 nrtilesy = (pbox[i].y2 >> 4) - (pbox[i].y1 >> 4);
898                                 for (j = 0; j <= nrtilesy; j++) {
899                                         BEGIN_RING( 4 );
900                                         OUT_RING( CP_PACKET3( RADEON_3D_CLEAR_ZMASK, 2 ) );
901                                         OUT_RING( tileoffset * 128 );
902                                         /* the number of tiles to clear */
903                                         OUT_RING( nrtilesx + 4 );
904                                         /* clear mask : chooses the clearing pattern. */
905                                         OUT_RING( clearmask );
906                                         ADVANCE_RING();
907                                         tileoffset += depthpixperline >> 6;
908                                 }
909                         }
910                 }
911
912                 /* TODO don't always clear all hi-level z tiles */
913                 if ((dev_priv->flags & CHIP_HAS_HIERZ) && (dev_priv->microcode_version==UCODE_R200)
914                         && (flags & RADEON_USE_HIERZ))
915                 /* r100 and cards without hierarchical z-buffer have no high-level z-buffer */
916                 /* FIXME : the mask supposedly contains low-res z values. So can't set
917                    just to the max (0xff? or actually 0x3fff?), need to take z clear
918                    value into account? */
919                 {
920                         BEGIN_RING( 4 );
921                         OUT_RING( CP_PACKET3( RADEON_3D_CLEAR_HIZ, 2 ) );
922                         OUT_RING( 0x0 ); /* First tile */
923                         OUT_RING( 0x3cc0 );
924                         OUT_RING( (0xff<<22)|(0xff<<6)| 0x003f003f);
925                         ADVANCE_RING();
926                 }
927         }
928
929         /* We have to clear the depth and/or stencil buffers by
930          * rendering a quad into just those buffers.  Thus, we have to
931          * make sure the 3D engine is configured correctly.
932          */
933         if ((dev_priv->microcode_version == UCODE_R200) &&
934             (flags & (RADEON_DEPTH | RADEON_STENCIL))) {
935
936                 int tempPP_CNTL;
937                 int tempRE_CNTL;
938                 int tempRB3D_CNTL;
939                 int tempRB3D_ZSTENCILCNTL;
940                 int tempRB3D_STENCILREFMASK;
941                 int tempRB3D_PLANEMASK;
942                 int tempSE_CNTL;
943                 int tempSE_VTE_CNTL;
944                 int tempSE_VTX_FMT_0;
945                 int tempSE_VTX_FMT_1;
946                 int tempSE_VAP_CNTL;
947                 int tempRE_AUX_SCISSOR_CNTL;
948
949                 tempPP_CNTL = 0;
950                 tempRE_CNTL = 0;
951
952                 tempRB3D_CNTL = depth_clear->rb3d_cntl;
953
954                 tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
955                 tempRB3D_STENCILREFMASK = 0x0;
956
957                 tempSE_CNTL = depth_clear->se_cntl;
958
959
960
961                 /* Disable TCL */
962
963                 tempSE_VAP_CNTL = (/* SE_VAP_CNTL__FORCE_W_TO_ONE_MASK |  */
964                                    (0x9 << SE_VAP_CNTL__VF_MAX_VTX_NUM__SHIFT));
965
966                 tempRB3D_PLANEMASK = 0x0;
967
968                 tempRE_AUX_SCISSOR_CNTL = 0x0;
969
970                 tempSE_VTE_CNTL =
971                         SE_VTE_CNTL__VTX_XY_FMT_MASK |
972                         SE_VTE_CNTL__VTX_Z_FMT_MASK;
973
974                 /* Vertex format (X, Y, Z, W)*/
975                 tempSE_VTX_FMT_0 =
976                         SE_VTX_FMT_0__VTX_Z0_PRESENT_MASK |
977                         SE_VTX_FMT_0__VTX_W0_PRESENT_MASK;
978                 tempSE_VTX_FMT_1 = 0x0;
979
980
981                 /* 
982                  * Depth buffer specific enables 
983                  */
984                 if (flags & RADEON_DEPTH) {
985                         /* Enable depth buffer */
986                         tempRB3D_CNTL |= RADEON_Z_ENABLE;
987                 } else {
988                         /* Disable depth buffer */
989                         tempRB3D_CNTL &= ~RADEON_Z_ENABLE;
990                 }
991
992                 /* 
993                  * Stencil buffer specific enables
994                  */
995                 if ( flags & RADEON_STENCIL ) {
996                         tempRB3D_CNTL |=  RADEON_STENCIL_ENABLE;
997                         tempRB3D_STENCILREFMASK = clear->depth_mask; 
998                 } else {
999                         tempRB3D_CNTL &= ~RADEON_STENCIL_ENABLE;
1000                         tempRB3D_STENCILREFMASK = 0x00000000;
1001                 }
1002
1003                 if (flags & RADEON_USE_COMP_ZBUF) {
1004                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
1005                                 RADEON_Z_DECOMPRESSION_ENABLE;
1006                 }
1007                 if (flags & RADEON_USE_HIERZ) {
1008                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
1009                 }
1010
1011                 BEGIN_RING( 26 );
1012                 RADEON_WAIT_UNTIL_2D_IDLE();
1013
1014                 OUT_RING_REG( RADEON_PP_CNTL, tempPP_CNTL );
1015                 OUT_RING_REG( R200_RE_CNTL, tempRE_CNTL );
1016                 OUT_RING_REG( RADEON_RB3D_CNTL, tempRB3D_CNTL );
1017                 OUT_RING_REG( RADEON_RB3D_ZSTENCILCNTL,
1018                               tempRB3D_ZSTENCILCNTL );
1019                 OUT_RING_REG( RADEON_RB3D_STENCILREFMASK, 
1020                               tempRB3D_STENCILREFMASK );
1021                 OUT_RING_REG( RADEON_RB3D_PLANEMASK, tempRB3D_PLANEMASK );
1022                 OUT_RING_REG( RADEON_SE_CNTL, tempSE_CNTL );
1023                 OUT_RING_REG( R200_SE_VTE_CNTL, tempSE_VTE_CNTL );
1024                 OUT_RING_REG( R200_SE_VTX_FMT_0, tempSE_VTX_FMT_0 );
1025                 OUT_RING_REG( R200_SE_VTX_FMT_1, tempSE_VTX_FMT_1 );
1026                 OUT_RING_REG( R200_SE_VAP_CNTL, tempSE_VAP_CNTL );
1027                 OUT_RING_REG( R200_RE_AUX_SCISSOR_CNTL, 
1028                               tempRE_AUX_SCISSOR_CNTL );
1029                 ADVANCE_RING();
1030
1031                 /* Make sure we restore the 3D state next time.
1032                  */
1033                 dev_priv->sarea_priv->ctx_owner = 0;
1034
1035                 for ( i = 0 ; i < nbox ; i++ ) {
1036                         
1037                         /* Funny that this should be required -- 
1038                          *  sets top-left?
1039                          */
1040                         radeon_emit_clip_rect( dev_priv,
1041                                                &sarea_priv->boxes[i] );
1042
1043                         BEGIN_RING( 14 );
1044                         OUT_RING( CP_PACKET3( R200_3D_DRAW_IMMD_2, 12 ) );
1045                         OUT_RING( (RADEON_PRIM_TYPE_RECT_LIST |
1046                                    RADEON_PRIM_WALK_RING |
1047                                    (3 << RADEON_NUM_VERTICES_SHIFT)) );
1048                         OUT_RING( depth_boxes[i].ui[CLEAR_X1] );
1049                         OUT_RING( depth_boxes[i].ui[CLEAR_Y1] );
1050                         OUT_RING( depth_boxes[i].ui[CLEAR_DEPTH] );
1051                         OUT_RING( 0x3f800000 );
1052                         OUT_RING( depth_boxes[i].ui[CLEAR_X1] );
1053                         OUT_RING( depth_boxes[i].ui[CLEAR_Y2] );
1054                         OUT_RING( depth_boxes[i].ui[CLEAR_DEPTH] );
1055                         OUT_RING( 0x3f800000 );
1056                         OUT_RING( depth_boxes[i].ui[CLEAR_X2] );
1057                         OUT_RING( depth_boxes[i].ui[CLEAR_Y2] );
1058                         OUT_RING( depth_boxes[i].ui[CLEAR_DEPTH] );
1059                         OUT_RING( 0x3f800000 );
1060                         ADVANCE_RING();
1061                 }
1062         } 
1063         else if ( (flags & (RADEON_DEPTH | RADEON_STENCIL)) ) {
1064
1065                 int tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
1066
1067                 rb3d_cntl = depth_clear->rb3d_cntl;
1068
1069                 if ( flags & RADEON_DEPTH ) {
1070                         rb3d_cntl |=  RADEON_Z_ENABLE;
1071                 } else {
1072                         rb3d_cntl &= ~RADEON_Z_ENABLE;
1073                 }
1074
1075                 if ( flags & RADEON_STENCIL ) {
1076                         rb3d_cntl |=  RADEON_STENCIL_ENABLE;
1077                         rb3d_stencilrefmask = clear->depth_mask; /* misnamed field */
1078                 } else {
1079                         rb3d_cntl &= ~RADEON_STENCIL_ENABLE;
1080                         rb3d_stencilrefmask = 0x00000000;
1081                 }
1082
1083                 if (flags & RADEON_USE_COMP_ZBUF) {
1084                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
1085                                 RADEON_Z_DECOMPRESSION_ENABLE;
1086                 }
1087                 if (flags & RADEON_USE_HIERZ) {
1088                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
1089                 }
1090
1091                 BEGIN_RING( 13 );
1092                 RADEON_WAIT_UNTIL_2D_IDLE();
1093
1094                 OUT_RING( CP_PACKET0( RADEON_PP_CNTL, 1 ) );
1095                 OUT_RING( 0x00000000 );
1096                 OUT_RING( rb3d_cntl );
1097                 
1098                 OUT_RING_REG( RADEON_RB3D_ZSTENCILCNTL, tempRB3D_ZSTENCILCNTL );
1099                 OUT_RING_REG( RADEON_RB3D_STENCILREFMASK,
1100                               rb3d_stencilrefmask );
1101                 OUT_RING_REG( RADEON_RB3D_PLANEMASK,
1102                               0x00000000 );
1103                 OUT_RING_REG( RADEON_SE_CNTL,
1104                               depth_clear->se_cntl );
1105                 ADVANCE_RING();
1106
1107                 /* Make sure we restore the 3D state next time.
1108                  */
1109                 dev_priv->sarea_priv->ctx_owner = 0;
1110
1111                 for ( i = 0 ; i < nbox ; i++ ) {
1112                         
1113                         /* Funny that this should be required -- 
1114                          *  sets top-left?
1115                          */
1116                         radeon_emit_clip_rect( dev_priv,
1117                                                &sarea_priv->boxes[i] );
1118
1119                         BEGIN_RING( 15 );
1120
1121                         OUT_RING( CP_PACKET3( RADEON_3D_DRAW_IMMD, 13 ) );
1122                         OUT_RING( RADEON_VTX_Z_PRESENT |
1123                                   RADEON_VTX_PKCOLOR_PRESENT);
1124                         OUT_RING( (RADEON_PRIM_TYPE_RECT_LIST |
1125                                    RADEON_PRIM_WALK_RING |
1126                                    RADEON_MAOS_ENABLE |
1127                                    RADEON_VTX_FMT_RADEON_MODE |
1128                                    (3 << RADEON_NUM_VERTICES_SHIFT)) );
1129
1130
1131                         OUT_RING( depth_boxes[i].ui[CLEAR_X1] );
1132                         OUT_RING( depth_boxes[i].ui[CLEAR_Y1] );
1133                         OUT_RING( depth_boxes[i].ui[CLEAR_DEPTH] );
1134                         OUT_RING( 0x0 );
1135
1136                         OUT_RING( depth_boxes[i].ui[CLEAR_X1] );
1137                         OUT_RING( depth_boxes[i].ui[CLEAR_Y2] );
1138                         OUT_RING( depth_boxes[i].ui[CLEAR_DEPTH] );
1139                         OUT_RING( 0x0 );
1140
1141                         OUT_RING( depth_boxes[i].ui[CLEAR_X2] );
1142                         OUT_RING( depth_boxes[i].ui[CLEAR_Y2] );
1143                         OUT_RING( depth_boxes[i].ui[CLEAR_DEPTH] );
1144                         OUT_RING( 0x0 );
1145
1146                         ADVANCE_RING();
1147                 }
1148         }
1149
1150         /* Increment the clear counter.  The client-side 3D driver must
1151          * wait on this value before performing the clear ioctl.  We
1152          * need this because the card's so damned fast...
1153          */
1154         dev_priv->sarea_priv->last_clear++;
1155
1156         BEGIN_RING( 4 );
1157
1158         RADEON_CLEAR_AGE( dev_priv->sarea_priv->last_clear );
1159         RADEON_WAIT_UNTIL_IDLE();
1160
1161         ADVANCE_RING();
1162 }
1163
1164 static void radeon_cp_dispatch_swap( drm_device_t *dev )
1165 {
1166         drm_radeon_private_t *dev_priv = dev->dev_private;
1167         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1168         int nbox = sarea_priv->nbox;
1169         drm_clip_rect_t *pbox = sarea_priv->boxes;
1170         int i;
1171         RING_LOCALS;
1172         DRM_DEBUG( "\n" );
1173
1174         /* Do some trivial performance monitoring...
1175          */
1176         if (dev_priv->do_boxes)
1177                 radeon_cp_performance_boxes( dev_priv );
1178
1179
1180         /* Wait for the 3D stream to idle before dispatching the bitblt.
1181          * This will prevent data corruption between the two streams.
1182          */
1183         BEGIN_RING( 2 );
1184
1185         RADEON_WAIT_UNTIL_3D_IDLE();
1186
1187         ADVANCE_RING();
1188
1189         for ( i = 0 ; i < nbox ; i++ ) {
1190                 int x = pbox[i].x1;
1191                 int y = pbox[i].y1;
1192                 int w = pbox[i].x2 - x;
1193                 int h = pbox[i].y2 - y;
1194
1195                 DRM_DEBUG( "dispatch swap %d,%d-%d,%d\n",
1196                            x, y, w, h );
1197
1198                 BEGIN_RING( 7 );
1199
1200                 OUT_RING( CP_PACKET3( RADEON_CNTL_BITBLT_MULTI, 5 ) );
1201                 OUT_RING( RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
1202                           RADEON_GMC_DST_PITCH_OFFSET_CNTL |
1203                           RADEON_GMC_BRUSH_NONE |
1204                           (dev_priv->color_fmt << 8) |
1205                           RADEON_GMC_SRC_DATATYPE_COLOR |
1206                           RADEON_ROP3_S |
1207                           RADEON_DP_SRC_SOURCE_MEMORY |
1208                           RADEON_GMC_CLR_CMP_CNTL_DIS |
1209                           RADEON_GMC_WR_MSK_DIS );
1210                 
1211                 /* Make this work even if front & back are flipped:
1212                  */
1213                 if (dev_priv->current_page == 0) {
1214                         OUT_RING( dev_priv->back_pitch_offset );
1215                         OUT_RING( dev_priv->front_pitch_offset );
1216                 } 
1217                 else {
1218                         OUT_RING( dev_priv->front_pitch_offset );
1219                         OUT_RING( dev_priv->back_pitch_offset );
1220                 }
1221
1222                 OUT_RING( (x << 16) | y );
1223                 OUT_RING( (x << 16) | y );
1224                 OUT_RING( (w << 16) | h );
1225
1226                 ADVANCE_RING();
1227         }
1228
1229         /* Increment the frame counter.  The client-side 3D driver must
1230          * throttle the framerate by waiting for this value before
1231          * performing the swapbuffer ioctl.
1232          */
1233         dev_priv->sarea_priv->last_frame++;
1234
1235         BEGIN_RING( 4 );
1236
1237         RADEON_FRAME_AGE( dev_priv->sarea_priv->last_frame );
1238         RADEON_WAIT_UNTIL_2D_IDLE();
1239
1240         ADVANCE_RING();
1241 }
1242
1243 static void radeon_cp_dispatch_flip( drm_device_t *dev )
1244 {
1245         drm_radeon_private_t *dev_priv = dev->dev_private;
1246         drm_sarea_t *sarea = (drm_sarea_t *)dev_priv->sarea->handle;
1247         int offset = (dev_priv->current_page == 1)
1248                    ? dev_priv->front_offset : dev_priv->back_offset;
1249         RING_LOCALS;
1250         DRM_DEBUG( "%s: page=%d pfCurrentPage=%d\n", 
1251                 __FUNCTION__, 
1252                 dev_priv->current_page,
1253                 dev_priv->sarea_priv->pfCurrentPage);
1254
1255         /* Do some trivial performance monitoring...
1256          */
1257         if (dev_priv->do_boxes) {
1258                 dev_priv->stats.boxes |= RADEON_BOX_FLIP;
1259                 radeon_cp_performance_boxes( dev_priv );
1260         }
1261
1262         /* Update the frame offsets for both CRTCs
1263          */
1264         BEGIN_RING( 6 );
1265
1266         RADEON_WAIT_UNTIL_3D_IDLE();
1267         OUT_RING_REG( RADEON_CRTC_OFFSET, ( ( sarea->frame.y * dev_priv->front_pitch
1268                                               + sarea->frame.x 
1269                                               * ( dev_priv->color_fmt - 2 ) ) & ~7 )
1270                                           + offset );
1271         OUT_RING_REG( RADEON_CRTC2_OFFSET, dev_priv->sarea_priv->crtc2_base
1272                                            + offset );
1273
1274         ADVANCE_RING();
1275
1276         /* Increment the frame counter.  The client-side 3D driver must
1277          * throttle the framerate by waiting for this value before
1278          * performing the swapbuffer ioctl.
1279          */
1280         dev_priv->sarea_priv->last_frame++;
1281         dev_priv->sarea_priv->pfCurrentPage = dev_priv->current_page =
1282                                               1 - dev_priv->current_page;
1283
1284         BEGIN_RING( 2 );
1285
1286         RADEON_FRAME_AGE( dev_priv->sarea_priv->last_frame );
1287
1288         ADVANCE_RING();
1289 }
1290
1291 static int bad_prim_vertex_nr( int primitive, int nr )
1292 {
1293         switch (primitive & RADEON_PRIM_TYPE_MASK) {
1294         case RADEON_PRIM_TYPE_NONE:
1295         case RADEON_PRIM_TYPE_POINT:
1296                 return nr < 1;
1297         case RADEON_PRIM_TYPE_LINE:
1298                 return (nr & 1) || nr == 0;
1299         case RADEON_PRIM_TYPE_LINE_STRIP:
1300                 return nr < 2;
1301         case RADEON_PRIM_TYPE_TRI_LIST:
1302         case RADEON_PRIM_TYPE_3VRT_POINT_LIST:
1303         case RADEON_PRIM_TYPE_3VRT_LINE_LIST:
1304         case RADEON_PRIM_TYPE_RECT_LIST:
1305                 return nr % 3 || nr == 0;
1306         case RADEON_PRIM_TYPE_TRI_FAN:
1307         case RADEON_PRIM_TYPE_TRI_STRIP:
1308                 return nr < 3;
1309         default:
1310                 return 1;
1311         }       
1312 }
1313
1314
1315
1316 typedef struct {
1317         unsigned int start;
1318         unsigned int finish;
1319         unsigned int prim;
1320         unsigned int numverts;
1321         unsigned int offset;   
1322         unsigned int vc_format;
1323 } drm_radeon_tcl_prim_t;
1324
1325 static void radeon_cp_dispatch_vertex( drm_device_t *dev,
1326                                        drm_buf_t *buf,
1327                                        drm_radeon_tcl_prim_t *prim )
1328
1329 {
1330         drm_radeon_private_t *dev_priv = dev->dev_private;
1331         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1332         int offset = dev_priv->gart_buffers_offset + buf->offset + prim->start;
1333         int numverts = (int)prim->numverts;
1334         int nbox = sarea_priv->nbox;
1335         int i = 0;
1336         RING_LOCALS;
1337
1338         DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d %d verts\n",
1339                   prim->prim,
1340                   prim->vc_format,
1341                   prim->start,
1342                   prim->finish,
1343                   prim->numverts);
1344
1345         if (bad_prim_vertex_nr( prim->prim, prim->numverts )) {
1346                 DRM_ERROR( "bad prim %x numverts %d\n", 
1347                            prim->prim, prim->numverts );
1348                 return;
1349         }
1350
1351         do {
1352                 /* Emit the next cliprect */
1353                 if ( i < nbox ) {
1354                         radeon_emit_clip_rect( dev_priv, 
1355                                                &sarea_priv->boxes[i] );
1356                 }
1357
1358                 /* Emit the vertex buffer rendering commands */
1359                 BEGIN_RING( 5 );
1360
1361                 OUT_RING( CP_PACKET3( RADEON_3D_RNDR_GEN_INDX_PRIM, 3 ) );
1362                 OUT_RING( offset );
1363                 OUT_RING( numverts );
1364                 OUT_RING( prim->vc_format );
1365                 OUT_RING( prim->prim | RADEON_PRIM_WALK_LIST |
1366                           RADEON_COLOR_ORDER_RGBA |
1367                           RADEON_VTX_FMT_RADEON_MODE |
1368                           (numverts << RADEON_NUM_VERTICES_SHIFT) );
1369
1370                 ADVANCE_RING();
1371
1372                 i++;
1373         } while ( i < nbox );
1374 }
1375
1376
1377
1378 static void radeon_cp_discard_buffer( drm_device_t *dev, drm_buf_t *buf )
1379 {
1380         drm_radeon_private_t *dev_priv = dev->dev_private;
1381         drm_radeon_buf_priv_t *buf_priv = buf->dev_private;
1382         RING_LOCALS;
1383
1384         buf_priv->age = ++dev_priv->sarea_priv->last_dispatch;
1385
1386         /* Emit the vertex buffer age */
1387         BEGIN_RING( 2 );
1388         RADEON_DISPATCH_AGE( buf_priv->age );
1389         ADVANCE_RING();
1390
1391         buf->pending = 1;
1392         buf->used = 0;
1393 }
1394
1395 static void radeon_cp_dispatch_indirect( drm_device_t *dev,
1396                                          drm_buf_t *buf,
1397                                          int start, int end )
1398 {
1399         drm_radeon_private_t *dev_priv = dev->dev_private;
1400         RING_LOCALS;
1401         DRM_DEBUG( "indirect: buf=%d s=0x%x e=0x%x\n",
1402                    buf->idx, start, end );
1403
1404         if ( start != end ) {
1405                 int offset = (dev_priv->gart_buffers_offset
1406                               + buf->offset + start);
1407                 int dwords = (end - start + 3) / sizeof(u32);
1408
1409                 /* Indirect buffer data must be an even number of
1410                  * dwords, so if we've been given an odd number we must
1411                  * pad the data with a Type-2 CP packet.
1412                  */
1413                 if ( dwords & 1 ) {
1414                         u32 *data = (u32 *)
1415                                 ((char *)dev->agp_buffer_map->handle
1416                                  + buf->offset + start);
1417                         data[dwords++] = RADEON_CP_PACKET2;
1418                 }
1419
1420                 /* Fire off the indirect buffer */
1421                 BEGIN_RING( 3 );
1422
1423                 OUT_RING( CP_PACKET0( RADEON_CP_IB_BASE, 1 ) );
1424                 OUT_RING( offset );
1425                 OUT_RING( dwords );
1426
1427                 ADVANCE_RING();
1428         }
1429 }
1430
1431
1432 static void radeon_cp_dispatch_indices( drm_device_t *dev,
1433                                         drm_buf_t *elt_buf,
1434                                         drm_radeon_tcl_prim_t *prim )
1435 {
1436         drm_radeon_private_t *dev_priv = dev->dev_private;
1437         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1438         int offset = dev_priv->gart_buffers_offset + prim->offset;
1439         u32 *data;
1440         int dwords;
1441         int i = 0;
1442         int start = prim->start + RADEON_INDEX_PRIM_OFFSET;
1443         int count = (prim->finish - start) / sizeof(u16);
1444         int nbox = sarea_priv->nbox;
1445
1446         DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d offset: %x nr %d\n",
1447                   prim->prim,
1448                   prim->vc_format,
1449                   prim->start,
1450                   prim->finish,
1451                   prim->offset,
1452                   prim->numverts);
1453
1454         if (bad_prim_vertex_nr( prim->prim, count )) {
1455                 DRM_ERROR( "bad prim %x count %d\n", 
1456                            prim->prim, count );
1457                 return;
1458         }
1459
1460
1461         if ( start >= prim->finish ||
1462              (prim->start & 0x7) ) {
1463                 DRM_ERROR( "buffer prim %d\n", prim->prim );
1464                 return;
1465         }
1466
1467         dwords = (prim->finish - prim->start + 3) / sizeof(u32);
1468
1469         data = (u32 *)((char *)dev->agp_buffer_map->handle +
1470                        elt_buf->offset + prim->start);
1471
1472         data[0] = CP_PACKET3( RADEON_3D_RNDR_GEN_INDX_PRIM, dwords-2 );
1473         data[1] = offset;
1474         data[2] = prim->numverts;
1475         data[3] = prim->vc_format;
1476         data[4] = (prim->prim |
1477                    RADEON_PRIM_WALK_IND |
1478                    RADEON_COLOR_ORDER_RGBA |
1479                    RADEON_VTX_FMT_RADEON_MODE |
1480                    (count << RADEON_NUM_VERTICES_SHIFT) );
1481
1482         do {
1483                 if ( i < nbox ) 
1484                         radeon_emit_clip_rect( dev_priv, 
1485                                                &sarea_priv->boxes[i] );
1486
1487                 radeon_cp_dispatch_indirect( dev, elt_buf,
1488                                              prim->start,
1489                                              prim->finish );
1490
1491                 i++;
1492         } while ( i < nbox );
1493
1494 }
1495
1496 #define RADEON_MAX_TEXTURE_SIZE (RADEON_BUFFER_SIZE - 8 * sizeof(u32))
1497
1498 static int radeon_cp_dispatch_texture( DRMFILE filp,
1499                                        drm_device_t *dev,
1500                                        drm_radeon_texture_t *tex,
1501                                        drm_radeon_tex_image_t *image )
1502 {
1503         drm_radeon_private_t *dev_priv = dev->dev_private;
1504         drm_file_t *filp_priv;
1505         drm_buf_t *buf;
1506         u32 format;
1507         u32 *buffer;
1508         const u8 __user *data;
1509         int size, dwords, tex_width, blit_width;
1510         u32 height;
1511         int i;
1512         u32 texpitch, microtile;
1513         RING_LOCALS;
1514
1515         DRM_GET_PRIV_WITH_RETURN( filp_priv, filp );
1516
1517         if ( radeon_check_and_fixup_offset( dev_priv, filp_priv, &tex->offset ) ) {
1518                 DRM_ERROR( "Invalid destination offset\n" );
1519                 return DRM_ERR( EINVAL );
1520         }
1521
1522         dev_priv->stats.boxes |= RADEON_BOX_TEXTURE_LOAD;
1523
1524         /* Flush the pixel cache.  This ensures no pixel data gets mixed
1525          * up with the texture data from the host data blit, otherwise
1526          * part of the texture image may be corrupted.
1527          */
1528         BEGIN_RING( 4 );
1529         RADEON_FLUSH_CACHE();
1530         RADEON_WAIT_UNTIL_IDLE();
1531         ADVANCE_RING();
1532
1533 #ifdef __BIG_ENDIAN
1534         /* The Mesa texture functions provide the data in little endian as the
1535          * chip wants it, but we need to compensate for the fact that the CP
1536          * ring gets byte-swapped
1537          */
1538         BEGIN_RING( 2 );
1539         OUT_RING_REG( RADEON_RBBM_GUICNTL, RADEON_HOST_DATA_SWAP_32BIT );
1540         ADVANCE_RING();
1541 #endif
1542
1543
1544         /* The compiler won't optimize away a division by a variable,
1545          * even if the only legal values are powers of two.  Thus, we'll
1546          * use a shift instead.
1547          */
1548         switch ( tex->format ) {
1549         case RADEON_TXFORMAT_ARGB8888:
1550         case RADEON_TXFORMAT_RGBA8888:
1551                 format = RADEON_COLOR_FORMAT_ARGB8888;
1552                 tex_width = tex->width * 4;
1553                 blit_width = image->width * 4;
1554                 break;
1555         case RADEON_TXFORMAT_AI88:
1556         case RADEON_TXFORMAT_ARGB1555:
1557         case RADEON_TXFORMAT_RGB565:
1558         case RADEON_TXFORMAT_ARGB4444:
1559         case RADEON_TXFORMAT_VYUY422:
1560         case RADEON_TXFORMAT_YVYU422:
1561                 format = RADEON_COLOR_FORMAT_RGB565;
1562                 tex_width = tex->width * 2;
1563                 blit_width = image->width * 2;
1564                 break;
1565         case RADEON_TXFORMAT_I8:
1566         case RADEON_TXFORMAT_RGB332:
1567                 format = RADEON_COLOR_FORMAT_CI8;
1568                 tex_width = tex->width * 1;
1569                 blit_width = image->width * 1;
1570                 break;
1571         default:
1572                 DRM_ERROR( "invalid texture format %d\n", tex->format );
1573                 return DRM_ERR(EINVAL);
1574         }
1575         texpitch = tex->pitch;
1576         if ((texpitch << 22) & RADEON_DST_TILE_MICRO) {
1577                 microtile = 1;
1578                 if (tex_width < 64) {
1579                         texpitch &= ~(RADEON_DST_TILE_MICRO >> 22);
1580                         /* we got tiled coordinates, untile them */
1581                         image->x *= 2;
1582                 }
1583         }
1584         else microtile = 0;
1585
1586         DRM_DEBUG("tex=%dx%d blit=%d\n", tex_width, tex->height, blit_width );
1587
1588         do {
1589                 DRM_DEBUG( "tex: ofs=0x%x p=%d f=%d x=%hd y=%hd w=%hd h=%hd\n",
1590                            tex->offset >> 10, tex->pitch, tex->format,
1591                            image->x, image->y, image->width, image->height );
1592
1593                 /* Make a copy of some parameters in case we have to
1594                  * update them for a multi-pass texture blit.
1595                  */
1596                 height = image->height;
1597                 data = (const u8 __user *)image->data;
1598                 
1599                 size = height * blit_width;
1600
1601                 if ( size > RADEON_MAX_TEXTURE_SIZE ) {
1602                         height = RADEON_MAX_TEXTURE_SIZE / blit_width;
1603                         size = height * blit_width;
1604                 } else if ( size < 4 && size > 0 ) {
1605                         size = 4;
1606                 } else if ( size == 0 ) {
1607                         return 0;
1608                 }
1609
1610                 buf = radeon_freelist_get( dev );
1611                 if ( 0 && !buf ) {
1612                         radeon_do_cp_idle( dev_priv );
1613                         buf = radeon_freelist_get( dev );
1614                 }
1615                 if ( !buf ) {
1616                         DRM_DEBUG("radeon_cp_dispatch_texture: EAGAIN\n");
1617                         if (DRM_COPY_TO_USER( tex->image, image, sizeof(*image) ))
1618                                 return DRM_ERR(EFAULT);
1619                         return DRM_ERR(EAGAIN);
1620                 }
1621
1622
1623                 /* Dispatch the indirect buffer.
1624                  */
1625                 buffer = (u32*)((char*)dev->agp_buffer_map->handle + buf->offset);
1626                 dwords = size / 4;
1627                 buffer[0] = CP_PACKET3( RADEON_CNTL_HOSTDATA_BLT, dwords + 6 );
1628                 buffer[1] = (RADEON_GMC_DST_PITCH_OFFSET_CNTL |
1629                              RADEON_GMC_BRUSH_NONE |
1630                              (format << 8) |
1631                              RADEON_GMC_SRC_DATATYPE_COLOR |
1632                              RADEON_ROP3_S |
1633                              RADEON_DP_SRC_SOURCE_HOST_DATA |
1634                              RADEON_GMC_CLR_CMP_CNTL_DIS |
1635                              RADEON_GMC_WR_MSK_DIS);
1636                 
1637                 buffer[2] = (texpitch << 22) | (tex->offset >> 10);
1638                 buffer[3] = 0xffffffff;
1639                 buffer[4] = 0xffffffff;
1640                 buffer[5] = (image->y << 16) | image->x;
1641                 buffer[6] = (height << 16) | image->width;
1642                 buffer[7] = dwords;
1643                 buffer += 8;
1644
1645                 
1646
1647                 if (microtile) {
1648                         /* texture micro tiling in use, minimum texture width is thus 16 bytes.
1649                            however, we cannot use blitter directly for texture width < 64 bytes,
1650                            since minimum tex pitch is 64 bytes and we need this to match
1651                            the texture width, otherwise the blitter will tile it wrong.
1652                            Thus, tiling manually in this case. Additionally, need to special
1653                            case tex height = 1, since our actual image will have height 2
1654                            and we need to ensure we don't read beyond the texture size
1655                            from user space. */
1656                         if (tex->height == 1) {
1657                                 if (tex_width >= 64 || tex_width <= 16) {
1658                                         if (DRM_COPY_FROM_USER(buffer, data,
1659                                                                tex_width * sizeof(u32))) {
1660                                                 DRM_ERROR("EFAULT on pad, %d bytes\n",
1661                                                           tex_width);
1662                                                 return DRM_ERR(EFAULT);
1663                                         }
1664                                 } else if (tex_width == 32) {
1665                                         if (DRM_COPY_FROM_USER(buffer, data, 16)) {
1666                                                 DRM_ERROR("EFAULT on pad, %d bytes\n",
1667                                                           tex_width);
1668                                                 return DRM_ERR(EFAULT);
1669                                         }
1670                                         if (DRM_COPY_FROM_USER(buffer + 8, data + 16, 16)) {
1671                                                 DRM_ERROR("EFAULT on pad, %d bytes\n",
1672                                                           tex_width);
1673                                                 return DRM_ERR(EFAULT);
1674                                         }
1675                                 }
1676                         } else if (tex_width >= 64 || tex_width == 16) {
1677                                 if (DRM_COPY_FROM_USER(buffer, data,
1678                                                        dwords * sizeof(u32))) {
1679                                         DRM_ERROR("EFAULT on data, %d dwords\n",
1680                                                   dwords);
1681                                         return DRM_ERR(EFAULT);
1682                                 }
1683                         } else if (tex_width < 16) {
1684                                 for (i = 0; i < tex->height; i++) {
1685                                         if (DRM_COPY_FROM_USER(buffer, data, tex_width)) {
1686                                                 DRM_ERROR("EFAULT on pad, %d bytes\n",
1687                                                           tex_width);
1688                                                 return DRM_ERR(EFAULT);
1689                                         }
1690                                         buffer += 4;
1691                                         data += tex_width;
1692                                 }
1693                         } else if (tex_width == 32) {
1694                                 /* TODO: make sure this works when not fitting in one buffer
1695                                    (i.e. 32bytes x 2048...) */
1696                                 for (i = 0; i < tex->height; i += 2) {
1697                                         if (DRM_COPY_FROM_USER(buffer, data, 16)) {
1698                                                 DRM_ERROR("EFAULT on pad, %d bytes\n",
1699                                                           tex_width);
1700                                                 return DRM_ERR(EFAULT);
1701                                         }
1702                                         data += 16;
1703                                         if (DRM_COPY_FROM_USER(buffer + 8, data, 16)) {
1704                                                 DRM_ERROR("EFAULT on pad, %d bytes\n",
1705                                                           tex_width);
1706                                                 return DRM_ERR(EFAULT);
1707                                         }
1708                                         data += 16;
1709                                         if (DRM_COPY_FROM_USER(buffer + 4, data, 16)) {
1710                                                 DRM_ERROR("EFAULT on pad, %d bytes\n",
1711                                                           tex_width);
1712                                                 return DRM_ERR(EFAULT);
1713                                         }
1714                                         data += 16;
1715                                         if (DRM_COPY_FROM_USER(buffer + 12, data, 16)) {
1716                                                 DRM_ERROR("EFAULT on pad, %d bytes\n",
1717                                                           tex_width);
1718                                                 return DRM_ERR(EFAULT);
1719                                         }
1720                                         data += 16;
1721                                         buffer += 16;
1722                                 }
1723                         }
1724                 }
1725                 else {
1726                         if (tex_width >= 32) {
1727                                 /* Texture image width is larger than the minimum, so we
1728                                  * can upload it directly.
1729                                  */
1730                                 if (DRM_COPY_FROM_USER(buffer, data,
1731                                                        dwords * sizeof(u32))) {
1732                                         DRM_ERROR("EFAULT on data, %d dwords\n",
1733                                                   dwords);
1734                                         return DRM_ERR(EFAULT);
1735                                 }
1736                         } else {
1737                                 /* Texture image width is less than the minimum, so we
1738                                  * need to pad out each image scanline to the minimum
1739                                  * width.
1740                                  */
1741                                 for (i = 0 ; i < tex->height ; i++) {
1742                                         if (DRM_COPY_FROM_USER(buffer, data, tex_width )) {
1743                                                 DRM_ERROR("EFAULT on pad, %d bytes\n", tex_width);
1744                                                 return DRM_ERR(EFAULT);
1745                                         }
1746                                         buffer += 8;
1747                                         data += tex_width;
1748                                 }
1749                         }
1750                 }
1751
1752                 buf->filp = filp;
1753                 buf->used = (dwords + 8) * sizeof(u32);
1754                 radeon_cp_dispatch_indirect( dev, buf, 0, buf->used );
1755                 radeon_cp_discard_buffer( dev, buf );
1756
1757                 /* Update the input parameters for next time */
1758                 image->y += height;
1759                 image->height -= height;
1760                 image->data = (const u8 __user *)image->data + size;
1761         } while (image->height > 0);
1762
1763         /* Flush the pixel cache after the blit completes.  This ensures
1764          * the texture data is written out to memory before rendering
1765          * continues.
1766          */
1767         BEGIN_RING( 4 );
1768         RADEON_FLUSH_CACHE();
1769         RADEON_WAIT_UNTIL_2D_IDLE();
1770         ADVANCE_RING();
1771         return 0;
1772 }
1773
1774
1775 static void radeon_cp_dispatch_stipple( drm_device_t *dev, u32 *stipple )
1776 {
1777         drm_radeon_private_t *dev_priv = dev->dev_private;
1778         int i;
1779         RING_LOCALS;
1780         DRM_DEBUG( "\n" );
1781
1782         BEGIN_RING( 35 );
1783
1784         OUT_RING( CP_PACKET0( RADEON_RE_STIPPLE_ADDR, 0 ) );
1785         OUT_RING( 0x00000000 );
1786
1787         OUT_RING( CP_PACKET0_TABLE( RADEON_RE_STIPPLE_DATA, 31 ) );
1788         for ( i = 0 ; i < 32 ; i++ ) {
1789                 OUT_RING( stipple[i] );
1790         }
1791
1792         ADVANCE_RING();
1793 }
1794
1795 static void radeon_apply_surface_regs(int surf_index, drm_radeon_private_t *dev_priv)
1796 {
1797         if (!dev_priv->mmio)
1798                 return;
1799
1800         radeon_do_cp_idle(dev_priv);
1801
1802         RADEON_WRITE(RADEON_SURFACE0_INFO + 16*surf_index,
1803                 dev_priv->surfaces[surf_index].flags);
1804         RADEON_WRITE(RADEON_SURFACE0_LOWER_BOUND + 16*surf_index,
1805                 dev_priv->surfaces[surf_index].lower);
1806         RADEON_WRITE(RADEON_SURFACE0_UPPER_BOUND + 16*surf_index,
1807                 dev_priv->surfaces[surf_index].upper);
1808 }
1809
1810
1811 /* Allocates a virtual surface
1812  * doesn't always allocate a real surface, will stretch an existing 
1813  * surface when possible.
1814  *
1815  * Note that refcount can be at most 2, since during a free refcount=3
1816  * might mean we have to allocate a new surface which might not always
1817  * be available.
1818  * For example : we allocate three contigous surfaces ABC. If B is 
1819  * freed, we suddenly need two surfaces to store A and C, which might
1820  * not always be available.
1821  */
1822 static int alloc_surface(drm_radeon_surface_alloc_t* new, drm_radeon_private_t *dev_priv, DRMFILE filp)
1823 {
1824         struct radeon_virt_surface *s;
1825         int i;
1826         int virt_surface_index;
1827         uint32_t new_upper, new_lower;
1828
1829         new_lower = new->address;
1830         new_upper = new_lower + new->size - 1;
1831
1832         /* sanity check */
1833         if ((new_lower >= new_upper) || (new->flags == 0) || (new->size == 0) ||
1834                 ((new_upper & RADEON_SURF_ADDRESS_FIXED_MASK) != RADEON_SURF_ADDRESS_FIXED_MASK) ||
1835                 ((new_lower & RADEON_SURF_ADDRESS_FIXED_MASK) != 0))
1836                 return -1;
1837
1838         /* make sure there is no overlap with existing surfaces */
1839         for (i = 0; i < RADEON_MAX_SURFACES; i++) {
1840                 if ((dev_priv->surfaces[i].refcount != 0) &&
1841                 (( (new_lower >= dev_priv->surfaces[i].lower) &&
1842                         (new_lower < dev_priv->surfaces[i].upper) ) ||
1843                  ( (new_lower < dev_priv->surfaces[i].lower) &&
1844                         (new_upper > dev_priv->surfaces[i].lower) )) ){
1845                 return -1;}
1846         }
1847
1848         /* find a virtual surface */
1849         for (i = 0; i < 2*RADEON_MAX_SURFACES; i++)
1850                 if (dev_priv->virt_surfaces[i].filp == 0)
1851                         break;
1852         if (i == 2*RADEON_MAX_SURFACES) {
1853                 return -1;}
1854         virt_surface_index = i;
1855
1856         /* try to reuse an existing surface */
1857         for (i = 0; i < RADEON_MAX_SURFACES; i++) {
1858                 /* extend before */
1859                 if ((dev_priv->surfaces[i].refcount == 1) &&
1860                   (new->flags == dev_priv->surfaces[i].flags) &&
1861                   (new_upper + 1 == dev_priv->surfaces[i].lower)) {
1862                         s = &(dev_priv->virt_surfaces[virt_surface_index]);
1863                         s->surface_index = i;
1864                         s->lower = new_lower;
1865                         s->upper = new_upper;
1866                         s->flags = new->flags;
1867                         s->filp = filp;
1868                         dev_priv->surfaces[i].refcount++;
1869                         dev_priv->surfaces[i].lower = s->lower;
1870                         radeon_apply_surface_regs(s->surface_index, dev_priv);
1871                         return virt_surface_index;
1872                 }
1873
1874                 /* extend after */
1875                 if ((dev_priv->surfaces[i].refcount == 1) &&
1876                   (new->flags == dev_priv->surfaces[i].flags) &&
1877                   (new_lower == dev_priv->surfaces[i].upper + 1)) {
1878                         s = &(dev_priv->virt_surfaces[virt_surface_index]);
1879                         s->surface_index = i;
1880                         s->lower = new_lower;
1881                         s->upper = new_upper;
1882                         s->flags = new->flags;
1883                         s->filp = filp;
1884                         dev_priv->surfaces[i].refcount++;
1885                         dev_priv->surfaces[i].upper = s->upper;
1886                         radeon_apply_surface_regs(s->surface_index, dev_priv);
1887                         return virt_surface_index;
1888                 }
1889         }
1890
1891         /* okay, we need a new one */
1892         for (i = 0; i < RADEON_MAX_SURFACES; i++) {
1893                 if (dev_priv->surfaces[i].refcount == 0) {
1894                         s = &(dev_priv->virt_surfaces[virt_surface_index]);
1895                         s->surface_index = i;
1896                         s->lower = new_lower;
1897                         s->upper = new_upper;
1898                         s->flags = new->flags;
1899                         s->filp = filp;
1900                         dev_priv->surfaces[i].refcount = 1;
1901                         dev_priv->surfaces[i].lower = s->lower;
1902                         dev_priv->surfaces[i].upper = s->upper;
1903                         dev_priv->surfaces[i].flags = s->flags;
1904                         radeon_apply_surface_regs(s->surface_index, dev_priv);
1905                         return virt_surface_index;
1906                 }
1907         }
1908
1909         /* we didn't find anything */
1910         return -1;
1911 }
1912
1913 static int free_surface(DRMFILE filp, drm_radeon_private_t *dev_priv, int lower)
1914 {
1915         struct radeon_virt_surface *s;
1916         int i;
1917         /* find the virtual surface */
1918         for(i = 0; i < 2*RADEON_MAX_SURFACES; i++) {
1919                 s = &(dev_priv->virt_surfaces[i]);
1920                 if (s->filp) {
1921                         if ((lower == s->lower) && (filp == s->filp)) {
1922                                 if (dev_priv->surfaces[s->surface_index].lower == s->lower)
1923                                         dev_priv->surfaces[s->surface_index].lower = s->upper;
1924
1925                                 if (dev_priv->surfaces[s->surface_index].upper == s->upper)
1926                                         dev_priv->surfaces[s->surface_index].upper = s->lower;
1927
1928                                 dev_priv->surfaces[s->surface_index].refcount--;
1929                                 if (dev_priv->surfaces[s->surface_index].refcount == 0)
1930                                         dev_priv->surfaces[s->surface_index].flags = 0;
1931                                 s->filp = NULL;
1932                                 radeon_apply_surface_regs(s->surface_index, dev_priv);
1933                                 return 0;
1934                         }
1935                 }
1936         }
1937         return 1;
1938 }
1939
1940 static void radeon_surfaces_release(DRMFILE filp, drm_radeon_private_t *dev_priv)
1941 {
1942         int i;
1943         for( i = 0; i < 2*RADEON_MAX_SURFACES; i++)
1944         {
1945                 if (dev_priv->virt_surfaces[i].filp == filp)
1946                         free_surface(filp, dev_priv, dev_priv->virt_surfaces[i].lower);
1947         }
1948 }
1949
1950 /* ================================================================
1951  * IOCTL functions
1952  */
1953 static int radeon_surface_alloc(DRM_IOCTL_ARGS)
1954 {
1955         DRM_DEVICE;
1956         drm_radeon_private_t *dev_priv = dev->dev_private;
1957         drm_radeon_surface_alloc_t alloc;
1958
1959         if (!dev_priv) {
1960                 DRM_ERROR( "%s called with no initialization\n", __FUNCTION__ );
1961                 return DRM_ERR(EINVAL);
1962         }
1963
1964         DRM_COPY_FROM_USER_IOCTL(alloc, (drm_radeon_surface_alloc_t __user *)data,
1965                                   sizeof(alloc));
1966
1967         if (alloc_surface(&alloc, dev_priv, filp) == -1)
1968                 return DRM_ERR(EINVAL);
1969         else
1970                 return 0;
1971 }
1972
1973 static int radeon_surface_free(DRM_IOCTL_ARGS)
1974 {
1975         DRM_DEVICE;
1976         drm_radeon_private_t *dev_priv = dev->dev_private;
1977         drm_radeon_surface_free_t memfree;
1978
1979         if (!dev_priv) {
1980                 DRM_ERROR( "%s called with no initialization\n", __FUNCTION__ );
1981                 return DRM_ERR(EINVAL);
1982         }
1983
1984         DRM_COPY_FROM_USER_IOCTL(memfree, (drm_radeon_mem_free_t __user *)data,
1985                                   sizeof(memfree) );
1986
1987         if (free_surface(filp, dev_priv, memfree.address))
1988                 return DRM_ERR(EINVAL);
1989         else
1990                 return 0;
1991 }
1992
1993 static int radeon_cp_clear( DRM_IOCTL_ARGS )
1994 {
1995         DRM_DEVICE;
1996         drm_radeon_private_t *dev_priv = dev->dev_private;
1997         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1998         drm_radeon_clear_t clear;
1999         drm_radeon_clear_rect_t depth_boxes[RADEON_NR_SAREA_CLIPRECTS];
2000         DRM_DEBUG( "\n" );
2001
2002         LOCK_TEST_WITH_RETURN( dev, filp );
2003
2004         DRM_COPY_FROM_USER_IOCTL( clear, (drm_radeon_clear_t __user *)data,
2005                              sizeof(clear) );
2006
2007         RING_SPACE_TEST_WITH_RETURN( dev_priv );
2008
2009         if ( sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS )
2010                 sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
2011
2012         if ( DRM_COPY_FROM_USER( &depth_boxes, clear.depth_boxes,
2013                              sarea_priv->nbox * sizeof(depth_boxes[0]) ) )
2014                 return DRM_ERR(EFAULT);
2015
2016         radeon_cp_dispatch_clear( dev, &clear, depth_boxes );
2017
2018         COMMIT_RING();
2019         return 0;
2020 }
2021
2022
2023 /* Not sure why this isn't set all the time:
2024  */ 
2025 static int radeon_do_init_pageflip( drm_device_t *dev )
2026 {
2027         drm_radeon_private_t *dev_priv = dev->dev_private;
2028         RING_LOCALS;
2029
2030         DRM_DEBUG( "\n" );
2031
2032         BEGIN_RING( 6 );
2033         RADEON_WAIT_UNTIL_3D_IDLE();
2034         OUT_RING( CP_PACKET0( RADEON_CRTC_OFFSET_CNTL, 0 ) );
2035         OUT_RING( RADEON_READ( RADEON_CRTC_OFFSET_CNTL ) | RADEON_CRTC_OFFSET_FLIP_CNTL );
2036         OUT_RING( CP_PACKET0( RADEON_CRTC2_OFFSET_CNTL, 0 ) );
2037         OUT_RING( RADEON_READ( RADEON_CRTC2_OFFSET_CNTL ) | RADEON_CRTC_OFFSET_FLIP_CNTL );
2038         ADVANCE_RING();
2039
2040         dev_priv->page_flipping = 1;
2041         dev_priv->current_page = 0;
2042         dev_priv->sarea_priv->pfCurrentPage = dev_priv->current_page;
2043
2044         return 0;
2045 }
2046
2047 /* Called whenever a client dies, from drm_release.
2048  * NOTE:  Lock isn't necessarily held when this is called!
2049  */
2050 static int radeon_do_cleanup_pageflip( drm_device_t *dev )
2051 {
2052         drm_radeon_private_t *dev_priv = dev->dev_private;
2053         DRM_DEBUG( "\n" );
2054
2055         if (dev_priv->current_page != 0)
2056                 radeon_cp_dispatch_flip( dev );
2057
2058         dev_priv->page_flipping = 0;
2059         return 0;
2060 }
2061
2062 /* Swapping and flipping are different operations, need different ioctls.
2063  * They can & should be intermixed to support multiple 3d windows.  
2064  */
2065 static int radeon_cp_flip( DRM_IOCTL_ARGS )
2066 {
2067         DRM_DEVICE;
2068         drm_radeon_private_t *dev_priv = dev->dev_private;
2069         DRM_DEBUG( "\n" );
2070
2071         LOCK_TEST_WITH_RETURN( dev, filp );
2072
2073         RING_SPACE_TEST_WITH_RETURN( dev_priv );
2074
2075         if (!dev_priv->page_flipping) 
2076                 radeon_do_init_pageflip( dev );
2077                 
2078         radeon_cp_dispatch_flip( dev );
2079
2080         COMMIT_RING();
2081         return 0;
2082 }
2083
2084 static int radeon_cp_swap( DRM_IOCTL_ARGS )
2085 {
2086         DRM_DEVICE;
2087         drm_radeon_private_t *dev_priv = dev->dev_private;
2088         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2089         DRM_DEBUG( "\n" );
2090
2091         LOCK_TEST_WITH_RETURN( dev, filp );
2092
2093         RING_SPACE_TEST_WITH_RETURN( dev_priv );
2094
2095         if ( sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS )
2096                 sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
2097
2098         radeon_cp_dispatch_swap( dev );
2099         dev_priv->sarea_priv->ctx_owner = 0;
2100
2101         COMMIT_RING();
2102         return 0;
2103 }
2104
2105 static int radeon_cp_vertex( DRM_IOCTL_ARGS )
2106 {
2107         DRM_DEVICE;
2108         drm_radeon_private_t *dev_priv = dev->dev_private;
2109         drm_file_t *filp_priv;
2110         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2111         drm_device_dma_t *dma = dev->dma;
2112         drm_buf_t *buf;
2113         drm_radeon_vertex_t vertex;
2114         drm_radeon_tcl_prim_t prim;
2115
2116         LOCK_TEST_WITH_RETURN( dev, filp );
2117
2118         DRM_GET_PRIV_WITH_RETURN( filp_priv, filp );
2119
2120         DRM_COPY_FROM_USER_IOCTL( vertex, (drm_radeon_vertex_t __user *)data,
2121                              sizeof(vertex) );
2122
2123         DRM_DEBUG( "pid=%d index=%d count=%d discard=%d\n",
2124                    DRM_CURRENTPID,
2125                    vertex.idx, vertex.count, vertex.discard );
2126
2127         if ( vertex.idx < 0 || vertex.idx >= dma->buf_count ) {
2128                 DRM_ERROR( "buffer index %d (of %d max)\n",
2129                            vertex.idx, dma->buf_count - 1 );
2130                 return DRM_ERR(EINVAL);
2131         }
2132         if ( vertex.prim < 0 ||
2133              vertex.prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST ) {
2134                 DRM_ERROR( "buffer prim %d\n", vertex.prim );
2135                 return DRM_ERR(EINVAL);
2136         }
2137
2138         RING_SPACE_TEST_WITH_RETURN( dev_priv );
2139         VB_AGE_TEST_WITH_RETURN( dev_priv );
2140
2141         buf = dma->buflist[vertex.idx];
2142
2143         if ( buf->filp != filp ) {
2144                 DRM_ERROR( "process %d using buffer owned by %p\n",
2145                            DRM_CURRENTPID, buf->filp );
2146                 return DRM_ERR(EINVAL);
2147         }
2148         if ( buf->pending ) {
2149                 DRM_ERROR( "sending pending buffer %d\n", vertex.idx );
2150                 return DRM_ERR(EINVAL);
2151         }
2152
2153         /* Build up a prim_t record:
2154          */
2155         if (vertex.count) {
2156                 buf->used = vertex.count; /* not used? */
2157
2158                 if ( sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS ) {
2159                         if ( radeon_emit_state( dev_priv, filp_priv,
2160                                                 &sarea_priv->context_state,
2161                                                 sarea_priv->tex_state,
2162                                                 sarea_priv->dirty ) ) {
2163                                 DRM_ERROR( "radeon_emit_state failed\n" );
2164                                 return DRM_ERR( EINVAL );
2165                         }
2166
2167                         sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
2168                                                RADEON_UPLOAD_TEX1IMAGES |
2169                                                RADEON_UPLOAD_TEX2IMAGES |
2170                                                RADEON_REQUIRE_QUIESCENCE);
2171                 }
2172
2173                 prim.start = 0;
2174                 prim.finish = vertex.count; /* unused */
2175                 prim.prim = vertex.prim;
2176                 prim.numverts = vertex.count;
2177                 prim.vc_format = dev_priv->sarea_priv->vc_format;
2178                 
2179                 radeon_cp_dispatch_vertex( dev, buf, &prim );
2180         }
2181
2182         if (vertex.discard) {
2183                 radeon_cp_discard_buffer( dev, buf );
2184         }
2185
2186         COMMIT_RING();
2187         return 0;
2188 }
2189
2190 static int radeon_cp_indices( DRM_IOCTL_ARGS )
2191 {
2192         DRM_DEVICE;
2193         drm_radeon_private_t *dev_priv = dev->dev_private;
2194         drm_file_t *filp_priv;
2195         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2196         drm_device_dma_t *dma = dev->dma;
2197         drm_buf_t *buf;
2198         drm_radeon_indices_t elts;
2199         drm_radeon_tcl_prim_t prim;
2200         int count;
2201
2202         LOCK_TEST_WITH_RETURN( dev, filp );
2203
2204         if ( !dev_priv ) {
2205                 DRM_ERROR( "%s called with no initialization\n", __FUNCTION__ );
2206                 return DRM_ERR(EINVAL);
2207         }
2208
2209         DRM_GET_PRIV_WITH_RETURN( filp_priv, filp );
2210
2211         DRM_COPY_FROM_USER_IOCTL( elts, (drm_radeon_indices_t __user *)data,
2212                              sizeof(elts) );
2213
2214         DRM_DEBUG( "pid=%d index=%d start=%d end=%d discard=%d\n",
2215                    DRM_CURRENTPID,
2216                    elts.idx, elts.start, elts.end, elts.discard );
2217
2218         if ( elts.idx < 0 || elts.idx >= dma->buf_count ) {
2219                 DRM_ERROR( "buffer index %d (of %d max)\n",
2220                            elts.idx, dma->buf_count - 1 );
2221                 return DRM_ERR(EINVAL);
2222         }
2223         if ( elts.prim < 0 ||
2224              elts.prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST ) {
2225                 DRM_ERROR( "buffer prim %d\n", elts.prim );
2226                 return DRM_ERR(EINVAL);
2227         }
2228
2229         RING_SPACE_TEST_WITH_RETURN( dev_priv );
2230         VB_AGE_TEST_WITH_RETURN( dev_priv );
2231
2232         buf = dma->buflist[elts.idx];
2233
2234         if ( buf->filp != filp ) {
2235                 DRM_ERROR( "process %d using buffer owned by %p\n",
2236                            DRM_CURRENTPID, buf->filp );
2237                 return DRM_ERR(EINVAL);
2238         }
2239         if ( buf->pending ) {
2240                 DRM_ERROR( "sending pending buffer %d\n", elts.idx );
2241                 return DRM_ERR(EINVAL);
2242         }
2243
2244         count = (elts.end - elts.start) / sizeof(u16);
2245         elts.start -= RADEON_INDEX_PRIM_OFFSET;
2246
2247         if ( elts.start & 0x7 ) {
2248                 DRM_ERROR( "misaligned buffer 0x%x\n", elts.start );
2249                 return DRM_ERR(EINVAL);
2250         }
2251         if ( elts.start < buf->used ) {
2252                 DRM_ERROR( "no header 0x%x - 0x%x\n", elts.start, buf->used );
2253                 return DRM_ERR(EINVAL);
2254         }
2255
2256         buf->used = elts.end;
2257
2258         if ( sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS ) {
2259                 if ( radeon_emit_state( dev_priv, filp_priv,
2260                                         &sarea_priv->context_state,
2261                                         sarea_priv->tex_state,
2262                                         sarea_priv->dirty ) ) {
2263                         DRM_ERROR( "radeon_emit_state failed\n" );
2264                         return DRM_ERR( EINVAL );
2265                 }
2266
2267                 sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
2268                                        RADEON_UPLOAD_TEX1IMAGES |
2269                                        RADEON_UPLOAD_TEX2IMAGES |
2270                                        RADEON_REQUIRE_QUIESCENCE);
2271         }
2272
2273
2274         /* Build up a prim_t record:
2275          */
2276         prim.start = elts.start;
2277         prim.finish = elts.end; 
2278         prim.prim = elts.prim;
2279         prim.offset = 0;        /* offset from start of dma buffers */
2280         prim.numverts = RADEON_MAX_VB_VERTS; /* duh */
2281         prim.vc_format = dev_priv->sarea_priv->vc_format;
2282         
2283         radeon_cp_dispatch_indices( dev, buf, &prim );
2284         if (elts.discard) {
2285                 radeon_cp_discard_buffer( dev, buf );
2286         }
2287
2288         COMMIT_RING();
2289         return 0;
2290 }
2291
2292 static int radeon_cp_texture( DRM_IOCTL_ARGS )
2293 {
2294         DRM_DEVICE;
2295         drm_radeon_private_t *dev_priv = dev->dev_private;
2296         drm_radeon_texture_t tex;
2297         drm_radeon_tex_image_t image;
2298         int ret;
2299
2300         LOCK_TEST_WITH_RETURN( dev, filp );
2301
2302         DRM_COPY_FROM_USER_IOCTL( tex, (drm_radeon_texture_t __user *)data, sizeof(tex) );
2303
2304         if ( tex.image == NULL ) {
2305                 DRM_ERROR( "null texture image!\n" );
2306                 return DRM_ERR(EINVAL);
2307         }
2308
2309         if ( DRM_COPY_FROM_USER( &image,
2310                              (drm_radeon_tex_image_t __user *)tex.image,
2311                              sizeof(image) ) )
2312                 return DRM_ERR(EFAULT);
2313
2314         RING_SPACE_TEST_WITH_RETURN( dev_priv );
2315         VB_AGE_TEST_WITH_RETURN( dev_priv );
2316
2317         ret = radeon_cp_dispatch_texture( filp, dev, &tex, &image );
2318
2319         COMMIT_RING();
2320         return ret;
2321 }
2322
2323 static int radeon_cp_stipple( DRM_IOCTL_ARGS )
2324 {
2325         DRM_DEVICE;
2326         drm_radeon_private_t *dev_priv = dev->dev_private;
2327         drm_radeon_stipple_t stipple;
2328         u32 mask[32];
2329
2330         LOCK_TEST_WITH_RETURN( dev, filp );
2331
2332         DRM_COPY_FROM_USER_IOCTL( stipple, (drm_radeon_stipple_t __user *)data,
2333                              sizeof(stipple) );
2334
2335         if ( DRM_COPY_FROM_USER( &mask, stipple.mask, 32 * sizeof(u32) ) )
2336                 return DRM_ERR(EFAULT);
2337
2338         RING_SPACE_TEST_WITH_RETURN( dev_priv );
2339
2340         radeon_cp_dispatch_stipple( dev, mask );
2341
2342         COMMIT_RING();
2343         return 0;
2344 }
2345
2346 static int radeon_cp_indirect( DRM_IOCTL_ARGS )
2347 {
2348         DRM_DEVICE;
2349         drm_radeon_private_t *dev_priv = dev->dev_private;
2350         drm_device_dma_t *dma = dev->dma;
2351         drm_buf_t *buf;
2352         drm_radeon_indirect_t indirect;
2353         RING_LOCALS;
2354
2355         LOCK_TEST_WITH_RETURN( dev, filp );
2356
2357         if ( !dev_priv ) {
2358                 DRM_ERROR( "%s called with no initialization\n", __FUNCTION__ );
2359                 return DRM_ERR(EINVAL);
2360         }
2361
2362         DRM_COPY_FROM_USER_IOCTL( indirect, (drm_radeon_indirect_t __user *)data,
2363                              sizeof(indirect) );
2364
2365         DRM_DEBUG( "indirect: idx=%d s=%d e=%d d=%d\n",
2366                    indirect.idx, indirect.start,
2367                    indirect.end, indirect.discard );
2368
2369         if ( indirect.idx < 0 || indirect.idx >= dma->buf_count ) {
2370                 DRM_ERROR( "buffer index %d (of %d max)\n",
2371                            indirect.idx, dma->buf_count - 1 );
2372                 return DRM_ERR(EINVAL);
2373         }
2374
2375         buf = dma->buflist[indirect.idx];
2376
2377         if ( buf->filp != filp ) {
2378                 DRM_ERROR( "process %d using buffer owned by %p\n",
2379                            DRM_CURRENTPID, buf->filp );
2380                 return DRM_ERR(EINVAL);
2381         }
2382         if ( buf->pending ) {
2383                 DRM_ERROR( "sending pending buffer %d\n", indirect.idx );
2384                 return DRM_ERR(EINVAL);
2385         }
2386
2387         if ( indirect.start < buf->used ) {
2388                 DRM_ERROR( "reusing indirect: start=0x%x actual=0x%x\n",
2389                            indirect.start, buf->used );
2390                 return DRM_ERR(EINVAL);
2391         }
2392
2393         RING_SPACE_TEST_WITH_RETURN( dev_priv );
2394         VB_AGE_TEST_WITH_RETURN( dev_priv );
2395
2396         buf->used = indirect.end;
2397
2398         /* Wait for the 3D stream to idle before the indirect buffer
2399          * containing 2D acceleration commands is processed.
2400          */
2401         BEGIN_RING( 2 );
2402
2403         RADEON_WAIT_UNTIL_3D_IDLE();
2404
2405         ADVANCE_RING();
2406
2407         /* Dispatch the indirect buffer full of commands from the
2408          * X server.  This is insecure and is thus only available to
2409          * privileged clients.
2410          */
2411         radeon_cp_dispatch_indirect( dev, buf, indirect.start, indirect.end );
2412         if (indirect.discard) {
2413                 radeon_cp_discard_buffer( dev, buf );
2414         }
2415
2416
2417         COMMIT_RING();
2418         return 0;
2419 }
2420
2421 static int radeon_cp_vertex2( DRM_IOCTL_ARGS )
2422 {
2423         DRM_DEVICE;
2424         drm_radeon_private_t *dev_priv = dev->dev_private;
2425         drm_file_t *filp_priv;
2426         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2427         drm_device_dma_t *dma = dev->dma;
2428         drm_buf_t *buf;
2429         drm_radeon_vertex2_t vertex;
2430         int i;
2431         unsigned char laststate;
2432
2433         LOCK_TEST_WITH_RETURN( dev, filp );
2434
2435         if ( !dev_priv ) {
2436                 DRM_ERROR( "%s called with no initialization\n", __FUNCTION__ );
2437                 return DRM_ERR(EINVAL);
2438         }
2439
2440         DRM_GET_PRIV_WITH_RETURN( filp_priv, filp );
2441
2442         DRM_COPY_FROM_USER_IOCTL( vertex, (drm_radeon_vertex2_t __user *)data,
2443                              sizeof(vertex) );
2444
2445         DRM_DEBUG( "pid=%d index=%d discard=%d\n",
2446                    DRM_CURRENTPID,
2447                    vertex.idx, vertex.discard );
2448
2449         if ( vertex.idx < 0 || vertex.idx >= dma->buf_count ) {
2450                 DRM_ERROR( "buffer index %d (of %d max)\n",
2451                            vertex.idx, dma->buf_count - 1 );
2452                 return DRM_ERR(EINVAL);
2453         }
2454
2455         RING_SPACE_TEST_WITH_RETURN( dev_priv );
2456         VB_AGE_TEST_WITH_RETURN( dev_priv );
2457
2458         buf = dma->buflist[vertex.idx];
2459
2460         if ( buf->filp != filp ) {
2461                 DRM_ERROR( "process %d using buffer owned by %p\n",
2462                            DRM_CURRENTPID, buf->filp );
2463                 return DRM_ERR(EINVAL);
2464         }
2465
2466         if ( buf->pending ) {
2467                 DRM_ERROR( "sending pending buffer %d\n", vertex.idx );
2468                 return DRM_ERR(EINVAL);
2469         }
2470         
2471         if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2472                 return DRM_ERR(EINVAL);
2473
2474         for (laststate = 0xff, i = 0 ; i < vertex.nr_prims ; i++) {
2475                 drm_radeon_prim_t prim;
2476                 drm_radeon_tcl_prim_t tclprim;
2477                 
2478                 if ( DRM_COPY_FROM_USER( &prim, &vertex.prim[i], sizeof(prim) ) )
2479                         return DRM_ERR(EFAULT);
2480                 
2481                 if ( prim.stateidx != laststate ) {
2482                         drm_radeon_state_t state;                              
2483                                 
2484                         if ( DRM_COPY_FROM_USER( &state, 
2485                                              &vertex.state[prim.stateidx], 
2486                                              sizeof(state) ) )
2487                                 return DRM_ERR(EFAULT);
2488
2489                         if ( radeon_emit_state2( dev_priv, filp_priv, &state ) ) {
2490                                 DRM_ERROR( "radeon_emit_state2 failed\n" );
2491                                 return DRM_ERR( EINVAL );
2492                         }
2493
2494                         laststate = prim.stateidx;
2495                 }
2496
2497                 tclprim.start = prim.start;
2498                 tclprim.finish = prim.finish;
2499                 tclprim.prim = prim.prim;
2500                 tclprim.vc_format = prim.vc_format;
2501
2502                 if ( prim.prim & RADEON_PRIM_WALK_IND ) {
2503                         tclprim.offset = prim.numverts * 64;
2504                         tclprim.numverts = RADEON_MAX_VB_VERTS; /* duh */
2505
2506                         radeon_cp_dispatch_indices( dev, buf, &tclprim );
2507                 } else {
2508                         tclprim.numverts = prim.numverts;
2509                         tclprim.offset = 0; /* not used */
2510
2511                         radeon_cp_dispatch_vertex( dev, buf, &tclprim );
2512                 }
2513                 
2514                 if (sarea_priv->nbox == 1)
2515                         sarea_priv->nbox = 0;
2516         }
2517
2518         if ( vertex.discard ) {
2519                 radeon_cp_discard_buffer( dev, buf );
2520         }
2521
2522         COMMIT_RING();
2523         return 0;
2524 }
2525
2526
2527 static int radeon_emit_packets( 
2528         drm_radeon_private_t *dev_priv,
2529         drm_file_t *filp_priv,
2530         drm_radeon_cmd_header_t header,
2531         drm_radeon_cmd_buffer_t *cmdbuf )
2532 {
2533         int id = (int)header.packet.packet_id;
2534         int sz, reg;
2535         int *data = (int *)cmdbuf->buf;
2536         RING_LOCALS;
2537    
2538         if (id >= RADEON_MAX_STATE_PACKETS)
2539                 return DRM_ERR(EINVAL);
2540
2541         sz = packet[id].len;
2542         reg = packet[id].start;
2543
2544         if (sz * sizeof(int) > cmdbuf->bufsz) {
2545                 DRM_ERROR( "Packet size provided larger than data provided\n" );
2546                 return DRM_ERR(EINVAL);
2547         }
2548
2549         if ( radeon_check_and_fixup_packets( dev_priv, filp_priv, id, data ) ) {
2550                 DRM_ERROR( "Packet verification failed\n" );
2551                 return DRM_ERR( EINVAL );
2552         }
2553
2554         BEGIN_RING(sz+1);
2555         OUT_RING( CP_PACKET0( reg, (sz-1) ) );
2556         OUT_RING_TABLE( data, sz );
2557         ADVANCE_RING();
2558
2559         cmdbuf->buf += sz * sizeof(int);
2560         cmdbuf->bufsz -= sz * sizeof(int);
2561         return 0;
2562 }
2563
2564 static __inline__ int radeon_emit_scalars( 
2565         drm_radeon_private_t *dev_priv,
2566         drm_radeon_cmd_header_t header,
2567         drm_radeon_cmd_buffer_t *cmdbuf )
2568 {
2569         int sz = header.scalars.count;
2570         int start = header.scalars.offset;
2571         int stride = header.scalars.stride;
2572         RING_LOCALS;
2573
2574         BEGIN_RING( 3+sz );
2575         OUT_RING( CP_PACKET0( RADEON_SE_TCL_SCALAR_INDX_REG, 0 ) );
2576         OUT_RING( start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
2577         OUT_RING( CP_PACKET0_TABLE( RADEON_SE_TCL_SCALAR_DATA_REG, sz-1 ) );
2578         OUT_RING_TABLE( cmdbuf->buf, sz );
2579         ADVANCE_RING();
2580         cmdbuf->buf += sz * sizeof(int);
2581         cmdbuf->bufsz -= sz * sizeof(int);
2582         return 0;
2583 }
2584
2585 /* God this is ugly
2586  */
2587 static __inline__ int radeon_emit_scalars2( 
2588         drm_radeon_private_t *dev_priv,
2589         drm_radeon_cmd_header_t header,
2590         drm_radeon_cmd_buffer_t *cmdbuf )
2591 {
2592         int sz = header.scalars.count;
2593         int start = ((unsigned int)header.scalars.offset) + 0x100;
2594         int stride = header.scalars.stride;
2595         RING_LOCALS;
2596
2597         BEGIN_RING( 3+sz );
2598         OUT_RING( CP_PACKET0( RADEON_SE_TCL_SCALAR_INDX_REG, 0 ) );
2599         OUT_RING( start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
2600         OUT_RING( CP_PACKET0_TABLE( RADEON_SE_TCL_SCALAR_DATA_REG, sz-1 ) );
2601         OUT_RING_TABLE( cmdbuf->buf, sz );
2602         ADVANCE_RING();
2603         cmdbuf->buf += sz * sizeof(int);
2604         cmdbuf->bufsz -= sz * sizeof(int);
2605         return 0;
2606 }
2607
2608 static __inline__ int radeon_emit_vectors( 
2609         drm_radeon_private_t *dev_priv,
2610         drm_radeon_cmd_header_t header,
2611         drm_radeon_cmd_buffer_t *cmdbuf )
2612 {
2613         int sz = header.vectors.count;
2614         int start = header.vectors.offset;
2615         int stride = header.vectors.stride;
2616         RING_LOCALS;
2617
2618         BEGIN_RING( 3+sz );
2619         OUT_RING( CP_PACKET0( RADEON_SE_TCL_VECTOR_INDX_REG, 0 ) );
2620         OUT_RING( start | (stride << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT));
2621         OUT_RING( CP_PACKET0_TABLE( RADEON_SE_TCL_VECTOR_DATA_REG, (sz-1) ) );
2622         OUT_RING_TABLE( cmdbuf->buf, sz );
2623         ADVANCE_RING();
2624
2625         cmdbuf->buf += sz * sizeof(int);
2626         cmdbuf->bufsz -= sz * sizeof(int);
2627         return 0;
2628 }
2629
2630
2631 static int radeon_emit_packet3( drm_device_t *dev,
2632                                 drm_file_t *filp_priv,
2633                                 drm_radeon_cmd_buffer_t *cmdbuf )
2634 {
2635         drm_radeon_private_t *dev_priv = dev->dev_private;
2636         unsigned int cmdsz;
2637         int ret;
2638         RING_LOCALS;
2639
2640         DRM_DEBUG("\n");
2641
2642         if ( ( ret = radeon_check_and_fixup_packet3( dev_priv, filp_priv,
2643                                                      cmdbuf, &cmdsz ) ) ) {
2644                 DRM_ERROR( "Packet verification failed\n" );
2645                 return ret;
2646         }
2647
2648         BEGIN_RING( cmdsz );
2649         OUT_RING_TABLE( cmdbuf->buf, cmdsz );
2650         ADVANCE_RING();
2651
2652         cmdbuf->buf += cmdsz * 4;
2653         cmdbuf->bufsz -= cmdsz * 4;
2654         return 0;
2655 }
2656
2657
2658 static int radeon_emit_packet3_cliprect( drm_device_t *dev,
2659                                          drm_file_t *filp_priv,
2660                                          drm_radeon_cmd_buffer_t *cmdbuf,
2661                                          int orig_nbox )
2662 {
2663         drm_radeon_private_t *dev_priv = dev->dev_private;
2664         drm_clip_rect_t box;
2665         unsigned int cmdsz;
2666         int ret;
2667         drm_clip_rect_t __user *boxes = cmdbuf->boxes;
2668         int i = 0;
2669         RING_LOCALS;
2670
2671         DRM_DEBUG("\n");
2672
2673         if ( ( ret = radeon_check_and_fixup_packet3( dev_priv, filp_priv,
2674                                                      cmdbuf, &cmdsz ) ) ) {
2675                 DRM_ERROR( "Packet verification failed\n" );
2676                 return ret;
2677         }
2678
2679         if (!orig_nbox)
2680                 goto out;
2681
2682         do {
2683                 if ( i < cmdbuf->nbox ) {
2684                         if (DRM_COPY_FROM_USER( &box, &boxes[i], sizeof(box) ))
2685                                 return DRM_ERR(EFAULT);
2686                         /* FIXME The second and subsequent times round
2687                          * this loop, send a WAIT_UNTIL_3D_IDLE before
2688                          * calling emit_clip_rect(). This fixes a
2689                          * lockup on fast machines when sending
2690                          * several cliprects with a cmdbuf, as when
2691                          * waving a 2D window over a 3D
2692                          * window. Something in the commands from user
2693                          * space seems to hang the card when they're
2694                          * sent several times in a row. That would be
2695                          * the correct place to fix it but this works
2696                          * around it until I can figure that out - Tim
2697                          * Smith */
2698                         if ( i ) {
2699                                 BEGIN_RING( 2 );
2700                                 RADEON_WAIT_UNTIL_3D_IDLE();
2701                                 ADVANCE_RING();
2702                         }
2703                         radeon_emit_clip_rect( dev_priv, &box );
2704                 }
2705                 
2706                 BEGIN_RING( cmdsz );
2707                 OUT_RING_TABLE( cmdbuf->buf, cmdsz );
2708                 ADVANCE_RING();
2709
2710         } while ( ++i < cmdbuf->nbox );
2711         if (cmdbuf->nbox == 1)
2712                 cmdbuf->nbox = 0;
2713
2714  out:
2715         cmdbuf->buf += cmdsz * 4;
2716         cmdbuf->bufsz -= cmdsz * 4;
2717         return 0;
2718 }
2719
2720
2721 static int radeon_emit_wait( drm_device_t *dev, int flags )
2722 {
2723         drm_radeon_private_t *dev_priv = dev->dev_private;
2724         RING_LOCALS;
2725
2726         DRM_DEBUG("%s: %x\n", __FUNCTION__, flags);
2727         switch (flags) {
2728         case RADEON_WAIT_2D:
2729                 BEGIN_RING( 2 );
2730                 RADEON_WAIT_UNTIL_2D_IDLE(); 
2731                 ADVANCE_RING();
2732                 break;
2733         case RADEON_WAIT_3D:
2734                 BEGIN_RING( 2 );
2735                 RADEON_WAIT_UNTIL_3D_IDLE(); 
2736                 ADVANCE_RING();
2737                 break;
2738         case RADEON_WAIT_2D|RADEON_WAIT_3D:
2739                 BEGIN_RING( 2 );
2740                 RADEON_WAIT_UNTIL_IDLE(); 
2741                 ADVANCE_RING();
2742                 break;
2743         default:
2744                 return DRM_ERR(EINVAL);
2745         }
2746
2747         return 0;
2748 }
2749
2750 static int radeon_cp_cmdbuf( DRM_IOCTL_ARGS )
2751 {
2752         DRM_DEVICE;
2753         drm_radeon_private_t *dev_priv = dev->dev_private;
2754         drm_file_t *filp_priv;
2755         drm_device_dma_t *dma = dev->dma;
2756         drm_buf_t *buf = NULL;
2757         int idx;
2758         drm_radeon_cmd_buffer_t cmdbuf;
2759         drm_radeon_cmd_header_t header;
2760         int orig_nbox, orig_bufsz;
2761         char *kbuf=NULL;
2762
2763         LOCK_TEST_WITH_RETURN( dev, filp );
2764
2765         if ( !dev_priv ) {
2766                 DRM_ERROR( "%s called with no initialization\n", __FUNCTION__ );
2767                 return DRM_ERR(EINVAL);
2768         }
2769
2770         DRM_GET_PRIV_WITH_RETURN( filp_priv, filp );
2771
2772         DRM_COPY_FROM_USER_IOCTL( cmdbuf, (drm_radeon_cmd_buffer_t __user *)data,
2773                              sizeof(cmdbuf) );
2774
2775         RING_SPACE_TEST_WITH_RETURN( dev_priv );
2776         VB_AGE_TEST_WITH_RETURN( dev_priv );
2777
2778         if (cmdbuf.bufsz > 64*1024 || cmdbuf.bufsz<0) {
2779                 return DRM_ERR(EINVAL);
2780         }
2781
2782         /* Allocate an in-kernel area and copy in the cmdbuf.  Do this to avoid
2783          * races between checking values and using those values in other code,
2784          * and simply to avoid a lot of function calls to copy in data.
2785          */
2786         orig_bufsz = cmdbuf.bufsz;
2787         if (orig_bufsz != 0) {
2788                 kbuf = drm_alloc(cmdbuf.bufsz, DRM_MEM_DRIVER);
2789                 if (kbuf == NULL)
2790                         return DRM_ERR(ENOMEM);
2791                 if (DRM_COPY_FROM_USER(kbuf, cmdbuf.buf, cmdbuf.bufsz)) {
2792                         drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2793                         return DRM_ERR(EFAULT);
2794                 }
2795                 cmdbuf.buf = kbuf;
2796         }
2797
2798         orig_nbox = cmdbuf.nbox;
2799
2800         if(dev_priv->microcode_version == UCODE_R300) {
2801                 int temp;
2802                 temp=r300_do_cp_cmdbuf(dev, filp, filp_priv, &cmdbuf);
2803         
2804                 if (orig_bufsz != 0)
2805                         drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2806         
2807                 return temp;
2808         }
2809
2810         /* microcode_version != r300 */
2811         while ( cmdbuf.bufsz >= sizeof(header) ) {
2812
2813                 header.i = *(int *)cmdbuf.buf;
2814                 cmdbuf.buf += sizeof(header);
2815                 cmdbuf.bufsz -= sizeof(header);
2816
2817                 switch (header.header.cmd_type) {
2818                 case RADEON_CMD_PACKET: 
2819                         DRM_DEBUG("RADEON_CMD_PACKET\n");
2820                         if (radeon_emit_packets( dev_priv, filp_priv, header, &cmdbuf )) {
2821                                 DRM_ERROR("radeon_emit_packets failed\n");
2822                                 goto err;
2823                         }
2824                         break;
2825
2826                 case RADEON_CMD_SCALARS:
2827                         DRM_DEBUG("RADEON_CMD_SCALARS\n");
2828                         if (radeon_emit_scalars( dev_priv, header, &cmdbuf )) {
2829                                 DRM_ERROR("radeon_emit_scalars failed\n");
2830                                 goto err;
2831                         }
2832                         break;
2833
2834                 case RADEON_CMD_VECTORS:
2835                         DRM_DEBUG("RADEON_CMD_VECTORS\n");
2836                         if (radeon_emit_vectors( dev_priv, header, &cmdbuf )) {
2837                                 DRM_ERROR("radeon_emit_vectors failed\n");
2838                                 goto err;
2839                         }
2840                         break;
2841
2842                 case RADEON_CMD_DMA_DISCARD:
2843                         DRM_DEBUG("RADEON_CMD_DMA_DISCARD\n");
2844                         idx = header.dma.buf_idx;
2845                         if ( idx < 0 || idx >= dma->buf_count ) {
2846                                 DRM_ERROR( "buffer index %d (of %d max)\n",
2847                                            idx, dma->buf_count - 1 );
2848                                 goto err;
2849                         }
2850
2851                         buf = dma->buflist[idx];
2852                         if ( buf->filp != filp || buf->pending ) {
2853                                 DRM_ERROR( "bad buffer %p %p %d\n",
2854                                            buf->filp, filp, buf->pending);
2855                                 goto err;
2856                         }
2857
2858                         radeon_cp_discard_buffer( dev, buf );
2859                         break;
2860
2861                 case RADEON_CMD_PACKET3:
2862                         DRM_DEBUG("RADEON_CMD_PACKET3\n");
2863                         if (radeon_emit_packet3( dev, filp_priv, &cmdbuf )) {
2864                                 DRM_ERROR("radeon_emit_packet3 failed\n");
2865                                 goto err;
2866                         }
2867                         break;
2868
2869                 case RADEON_CMD_PACKET3_CLIP:
2870                         DRM_DEBUG("RADEON_CMD_PACKET3_CLIP\n");
2871                         if (radeon_emit_packet3_cliprect( dev, filp_priv, &cmdbuf, orig_nbox )) {
2872                                 DRM_ERROR("radeon_emit_packet3_clip failed\n");
2873                                 goto err;
2874                         }
2875                         break;
2876
2877                 case RADEON_CMD_SCALARS2:
2878                         DRM_DEBUG("RADEON_CMD_SCALARS2\n");
2879                         if (radeon_emit_scalars2( dev_priv, header, &cmdbuf )) {
2880                                 DRM_ERROR("radeon_emit_scalars2 failed\n");
2881                                 goto err;
2882                         }
2883                         break;
2884
2885                 case RADEON_CMD_WAIT:
2886                         DRM_DEBUG("RADEON_CMD_WAIT\n");
2887                         if (radeon_emit_wait( dev, header.wait.flags )) {
2888                                 DRM_ERROR("radeon_emit_wait failed\n");
2889                                 goto err;
2890                         }
2891                         break;
2892                 default:
2893                         DRM_ERROR("bad cmd_type %d at %p\n", 
2894                                   header.header.cmd_type,
2895                                   cmdbuf.buf - sizeof(header));
2896                         goto err;
2897                 }
2898         }
2899
2900         if (orig_bufsz != 0)
2901                 drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2902
2903         DRM_DEBUG("DONE\n");
2904         COMMIT_RING();
2905         return 0;
2906
2907 err:
2908         if (orig_bufsz != 0)
2909                 drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2910         return DRM_ERR(EINVAL);
2911 }
2912
2913
2914
2915 static int radeon_cp_getparam( DRM_IOCTL_ARGS )
2916 {
2917         DRM_DEVICE;
2918         drm_radeon_private_t *dev_priv = dev->dev_private;
2919         drm_radeon_getparam_t param;
2920         int value;
2921
2922         if ( !dev_priv ) {
2923                 DRM_ERROR( "%s called with no initialization\n", __FUNCTION__ );
2924                 return DRM_ERR(EINVAL);
2925         }
2926
2927         DRM_COPY_FROM_USER_IOCTL( param, (drm_radeon_getparam_t __user *)data,
2928                              sizeof(param) );
2929
2930         DRM_DEBUG( "pid=%d\n", DRM_CURRENTPID );
2931
2932         switch( param.param ) {
2933         case RADEON_PARAM_GART_BUFFER_OFFSET:
2934                 value = dev_priv->gart_buffers_offset;
2935                 break;
2936         case RADEON_PARAM_LAST_FRAME:
2937                 dev_priv->stats.last_frame_reads++;
2938                 value = GET_SCRATCH( 0 );
2939                 break;
2940         case RADEON_PARAM_LAST_DISPATCH:
2941                 value = GET_SCRATCH( 1 );
2942                 break;
2943         case RADEON_PARAM_LAST_CLEAR:
2944                 dev_priv->stats.last_clear_reads++;
2945                 value = GET_SCRATCH( 2 );
2946                 break;
2947         case RADEON_PARAM_IRQ_NR:
2948                 value = dev->irq;
2949                 break;
2950         case RADEON_PARAM_GART_BASE:
2951                 value = dev_priv->gart_vm_start;
2952                 break;
2953         case RADEON_PARAM_REGISTER_HANDLE:
2954                 value = dev_priv->mmio_offset;
2955                 break;
2956         case RADEON_PARAM_STATUS_HANDLE:
2957                 value = dev_priv->ring_rptr_offset;
2958                 break;
2959 #if BITS_PER_LONG == 32
2960         /*
2961          * This ioctl() doesn't work on 64-bit platforms because hw_lock is a
2962          * pointer which can't fit into an int-sized variable.  According to
2963          * Michel Dänzer, the ioctl() is only used on embedded platforms, so
2964          * not supporting it shouldn't be a problem.  If the same functionality
2965          * is needed on 64-bit platforms, a new ioctl() would have to be added,
2966          * so backwards-compatibility for the embedded platforms can be
2967          * maintained.  --davidm 4-Feb-2004.
2968          */
2969         case RADEON_PARAM_SAREA_HANDLE:
2970                 /* The lock is the first dword in the sarea. */
2971                 value = (long)dev->lock.hw_lock;
2972                 break;
2973 #endif
2974         case RADEON_PARAM_GART_TEX_HANDLE:
2975                 value = dev_priv->gart_textures_offset;
2976                 break;
2977         default:
2978                 return DRM_ERR(EINVAL);
2979         }
2980
2981         if ( DRM_COPY_TO_USER( param.value, &value, sizeof(int) ) ) {
2982                 DRM_ERROR( "copy_to_user\n" );
2983                 return DRM_ERR(EFAULT);
2984         }
2985         
2986         return 0;
2987 }
2988
2989 static int radeon_cp_setparam( DRM_IOCTL_ARGS ) {
2990         DRM_DEVICE;
2991         drm_radeon_private_t *dev_priv = dev->dev_private;
2992         drm_file_t *filp_priv;
2993         drm_radeon_setparam_t sp;
2994         struct drm_radeon_driver_file_fields *radeon_priv;
2995
2996         if ( !dev_priv ) {
2997                 DRM_ERROR( "%s called with no initialization\n", __FUNCTION__ );
2998                 return DRM_ERR( EINVAL );
2999         }
3000
3001         DRM_GET_PRIV_WITH_RETURN( filp_priv, filp );
3002
3003         DRM_COPY_FROM_USER_IOCTL( sp, ( drm_radeon_setparam_t __user * )data,
3004                                   sizeof( sp ) );
3005
3006         switch( sp.param ) {
3007         case RADEON_SETPARAM_FB_LOCATION:
3008                 radeon_priv = filp_priv->driver_priv;
3009                 radeon_priv->radeon_fb_delta = dev_priv->fb_location - sp.value;
3010                 break;
3011         case RADEON_SETPARAM_SWITCH_TILING:
3012                 if (sp.value == 0) {
3013                         DRM_DEBUG( "color tiling disabled\n" );
3014                         dev_priv->front_pitch_offset &= ~RADEON_DST_TILE_MACRO;
3015                         dev_priv->back_pitch_offset &= ~RADEON_DST_TILE_MACRO;
3016                         dev_priv->sarea_priv->tiling_enabled = 0;
3017                 }
3018                 else if (sp.value == 1) {
3019                         DRM_DEBUG( "color tiling enabled\n" );
3020                         dev_priv->front_pitch_offset |= RADEON_DST_TILE_MACRO;
3021                         dev_priv->back_pitch_offset |= RADEON_DST_TILE_MACRO;
3022                         dev_priv->sarea_priv->tiling_enabled = 1;
3023                 }
3024                 break;  
3025         default:
3026                 DRM_DEBUG( "Invalid parameter %d\n", sp.param );
3027                 return DRM_ERR( EINVAL );
3028         }
3029
3030         return 0;
3031 }
3032
3033 /* When a client dies:
3034  *    - Check for and clean up flipped page state
3035  *    - Free any alloced GART memory.
3036  *
3037  * DRM infrastructure takes care of reclaiming dma buffers.
3038  */
3039 void radeon_driver_prerelease(drm_device_t *dev, DRMFILE filp)
3040 {
3041         if ( dev->dev_private ) {                               
3042                 drm_radeon_private_t *dev_priv = dev->dev_private; 
3043                 if ( dev_priv->page_flipping ) {                
3044                         radeon_do_cleanup_pageflip( dev );      
3045                 }                                               
3046                 radeon_mem_release( filp, dev_priv->gart_heap ); 
3047                 radeon_mem_release( filp, dev_priv->fb_heap );  
3048                 radeon_surfaces_release(filp, dev_priv);
3049         }                               
3050 }
3051
3052 void radeon_driver_pretakedown(drm_device_t *dev)
3053 {
3054         radeon_do_release(dev);
3055 }
3056
3057 int radeon_driver_open_helper(drm_device_t *dev, drm_file_t *filp_priv)
3058 {
3059         drm_radeon_private_t *dev_priv = dev->dev_private;
3060         struct drm_radeon_driver_file_fields *radeon_priv;
3061         
3062         radeon_priv = (struct drm_radeon_driver_file_fields *)drm_alloc(sizeof(*radeon_priv), DRM_MEM_FILES);
3063         
3064         if (!radeon_priv)
3065                 return -ENOMEM;
3066
3067         filp_priv->driver_priv = radeon_priv;
3068         if ( dev_priv )
3069                 radeon_priv->radeon_fb_delta = dev_priv->fb_location;
3070         else
3071                 radeon_priv->radeon_fb_delta = 0;
3072         return 0;
3073 }
3074
3075
3076 void radeon_driver_free_filp_priv(drm_device_t *dev, drm_file_t *filp_priv)
3077 {
3078          struct drm_radeon_driver_file_fields *radeon_priv = filp_priv->driver_priv;
3079          
3080          drm_free(radeon_priv, sizeof(*radeon_priv), DRM_MEM_FILES);
3081 }
3082
3083 drm_ioctl_desc_t radeon_ioctls[] = {
3084         [DRM_IOCTL_NR(DRM_RADEON_CP_INIT)]    = { radeon_cp_init,      1, 1 },
3085         [DRM_IOCTL_NR(DRM_RADEON_CP_START)]   = { radeon_cp_start,     1, 1 },
3086         [DRM_IOCTL_NR(DRM_RADEON_CP_STOP)]    = { radeon_cp_stop,      1, 1 },
3087         [DRM_IOCTL_NR(DRM_RADEON_CP_RESET)]   = { radeon_cp_reset,     1, 1 },
3088         [DRM_IOCTL_NR(DRM_RADEON_CP_IDLE)]    = { radeon_cp_idle,      1, 0 },
3089         [DRM_IOCTL_NR(DRM_RADEON_CP_RESUME)]  = { radeon_cp_resume,    1, 0 },
3090         [DRM_IOCTL_NR(DRM_RADEON_RESET)]      = { radeon_engine_reset, 1, 0 },
3091         [DRM_IOCTL_NR(DRM_RADEON_FULLSCREEN)] = { radeon_fullscreen,   1, 0 },
3092         [DRM_IOCTL_NR(DRM_RADEON_SWAP)]       = { radeon_cp_swap,      1, 0 },
3093         [DRM_IOCTL_NR(DRM_RADEON_CLEAR)]      = { radeon_cp_clear,     1, 0 },
3094         [DRM_IOCTL_NR(DRM_RADEON_VERTEX)]     = { radeon_cp_vertex,    1, 0 },
3095         [DRM_IOCTL_NR(DRM_RADEON_INDICES)]    = { radeon_cp_indices,   1, 0 },
3096         [DRM_IOCTL_NR(DRM_RADEON_TEXTURE)]    = { radeon_cp_texture,   1, 0 },
3097         [DRM_IOCTL_NR(DRM_RADEON_STIPPLE)]    = { radeon_cp_stipple,   1, 0 },
3098         [DRM_IOCTL_NR(DRM_RADEON_INDIRECT)]   = { radeon_cp_indirect,  1, 1 },
3099         [DRM_IOCTL_NR(DRM_RADEON_VERTEX2)]    = { radeon_cp_vertex2,   1, 0 },
3100         [DRM_IOCTL_NR(DRM_RADEON_CMDBUF)]     = { radeon_cp_cmdbuf,    1, 0 },
3101         [DRM_IOCTL_NR(DRM_RADEON_GETPARAM)]   = { radeon_cp_getparam,  1, 0 },
3102         [DRM_IOCTL_NR(DRM_RADEON_FLIP)]       = { radeon_cp_flip,      1, 0 },
3103         [DRM_IOCTL_NR(DRM_RADEON_ALLOC)]      = { radeon_mem_alloc,    1, 0 },
3104         [DRM_IOCTL_NR(DRM_RADEON_FREE)]       = { radeon_mem_free,     1, 0 },
3105         [DRM_IOCTL_NR(DRM_RADEON_INIT_HEAP)]  = { radeon_mem_init_heap,1, 1 },
3106         [DRM_IOCTL_NR(DRM_RADEON_IRQ_EMIT)]   = { radeon_irq_emit,     1, 0 },
3107         [DRM_IOCTL_NR(DRM_RADEON_IRQ_WAIT)]   = { radeon_irq_wait,     1, 0 },
3108         [DRM_IOCTL_NR(DRM_RADEON_SETPARAM)]   = { radeon_cp_setparam,  1, 0 },
3109         [DRM_IOCTL_NR(DRM_RADEON_SURF_ALLOC)] = { radeon_surface_alloc,1, 0 },
3110         [DRM_IOCTL_NR(DRM_RADEON_SURF_FREE)]  = { radeon_surface_free, 1, 0 }
3111 };
3112
3113 int radeon_max_ioctl = DRM_ARRAY_SIZE(radeon_ioctls);