drm: add initial r300 3D support.
[safe/jmp/linux-2.6] / drivers / char / drm / r300_cmdbuf.c
1 /* r300_cmdbuf.c -- Command buffer emission for R300 -*- linux-c -*-
2  *
3  * Copyright (C) The Weather Channel, Inc.  2002.
4  * Copyright (C) 2004 Nicolai Haehnle.
5  * All Rights Reserved.
6  *
7  * The Weather Channel (TM) funded Tungsten Graphics to develop the
8  * initial release of the Radeon 8500 driver under the XFree86 license.
9  * This notice must be preserved.
10  *
11  * Permission is hereby granted, free of charge, to any person obtaining a
12  * copy of this software and associated documentation files (the "Software"),
13  * to deal in the Software without restriction, including without limitation
14  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
15  * and/or sell copies of the Software, and to permit persons to whom the
16  * Software is furnished to do so, subject to the following conditions:
17  *
18  * The above copyright notice and this permission notice (including the next
19  * paragraph) shall be included in all copies or substantial portions of the
20  * Software.
21  *
22  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
25  * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
26  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
27  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
28  * DEALINGS IN THE SOFTWARE.
29  *
30  * Authors:
31  *    Nicolai Haehnle <prefect_@gmx.net>
32  */
33
34 #include "drmP.h"
35 #include "drm.h"
36 #include "radeon_drm.h"
37 #include "radeon_drv.h"
38 #include "r300_reg.h"
39
40
41 #define R300_SIMULTANEOUS_CLIPRECTS             4
42
43 /* Values for R300_RE_CLIPRECT_CNTL depending on the number of cliprects
44  */
45 static const int r300_cliprect_cntl[4] = {
46         0xAAAA,
47         0xEEEE,
48         0xFEFE,
49         0xFFFE
50 };
51
52
53 /**
54  * Emit up to R300_SIMULTANEOUS_CLIPRECTS cliprects from the given command
55  * buffer, starting with index n.
56  */
57 static int r300_emit_cliprects(drm_radeon_private_t* dev_priv,
58                                drm_radeon_cmd_buffer_t* cmdbuf,
59                                int n)
60 {
61         drm_clip_rect_t box;
62         int nr;
63         int i;
64         RING_LOCALS;
65
66         nr = cmdbuf->nbox - n;
67         if (nr > R300_SIMULTANEOUS_CLIPRECTS)
68                 nr = R300_SIMULTANEOUS_CLIPRECTS;
69
70         DRM_DEBUG("%i cliprects\n", nr);
71
72         if (nr) {
73                 BEGIN_RING(6 + nr*2);
74                 OUT_RING( CP_PACKET0( R300_RE_CLIPRECT_TL_0, nr*2 - 1 ) );
75
76                 for(i = 0; i < nr; ++i) {
77                         if (DRM_COPY_FROM_USER_UNCHECKED(&box, &cmdbuf->boxes[n+i], sizeof(box))) {
78                                 DRM_ERROR("copy cliprect faulted\n");
79                                 return DRM_ERR(EFAULT);
80                         }
81
82                         box.x1 = (box.x1 + R300_CLIPRECT_OFFSET) & R300_CLIPRECT_MASK;
83                         box.y1 = (box.y1 + R300_CLIPRECT_OFFSET) & R300_CLIPRECT_MASK;
84                         box.x2 = (box.x2 + R300_CLIPRECT_OFFSET) & R300_CLIPRECT_MASK;
85                         box.y2 = (box.y2 + R300_CLIPRECT_OFFSET) & R300_CLIPRECT_MASK;
86
87                         OUT_RING((box.x1 << R300_CLIPRECT_X_SHIFT) |
88                                         (box.y1 << R300_CLIPRECT_Y_SHIFT));
89                         OUT_RING((box.x2 << R300_CLIPRECT_X_SHIFT) |
90                                         (box.y2 << R300_CLIPRECT_Y_SHIFT));
91                 }
92
93                 OUT_RING_REG( R300_RE_CLIPRECT_CNTL, r300_cliprect_cntl[nr-1] );
94
95                 /* TODO/SECURITY: Force scissors to a safe value, otherwise the
96                 * client might be able to trample over memory.
97                 * The impact should be very limited, but I'd rather be safe than
98                 * sorry.
99                 */
100                 OUT_RING( CP_PACKET0( R300_RE_SCISSORS_TL, 1 ) );
101                 OUT_RING( 0 );
102                 OUT_RING( R300_SCISSORS_X_MASK | R300_SCISSORS_Y_MASK );
103                 ADVANCE_RING();
104                 } else {
105                 /* Why we allow zero cliprect rendering:
106                  * There are some commands in a command buffer that must be submitted
107                  * even when there are no cliprects, e.g. DMA buffer discard
108                  * or state setting (though state setting could be avoided by
109                  * simulating a loss of context).
110                  *
111                  * Now since the cmdbuf interface is so chaotic right now (and is
112                  * bound to remain that way for a bit until things settle down),
113                  * it is basically impossible to filter out the commands that are
114                  * necessary and those that aren't.
115                  *
116                  * So I choose the safe way and don't do any filtering at all;
117                  * instead, I simply set up the engine so that all rendering
118                  * can't produce any fragments.
119                  */
120                 BEGIN_RING(2);
121                 OUT_RING_REG( R300_RE_CLIPRECT_CNTL, 0 );
122                 ADVANCE_RING();
123                 }
124
125         return 0;
126 }
127
128 u8  r300_reg_flags[0x10000>>2];
129
130
131 void r300_init_reg_flags(void)
132 {
133         int i;
134         memset(r300_reg_flags, 0, 0x10000>>2);
135         #define ADD_RANGE_MARK(reg, count,mark) \
136                 for(i=((reg)>>2);i<((reg)>>2)+(count);i++)\
137                         r300_reg_flags[i]|=(mark);
138         
139         #define MARK_SAFE               1
140         #define MARK_CHECK_OFFSET       2
141         
142         #define ADD_RANGE(reg, count)   ADD_RANGE_MARK(reg, count, MARK_SAFE)
143
144         /* these match cmducs() command in r300_driver/r300/r300_cmdbuf.c */
145         ADD_RANGE(R300_SE_VPORT_XSCALE, 6);
146         ADD_RANGE(0x2080, 1);
147         ADD_RANGE(R300_SE_VTE_CNTL, 2);
148         ADD_RANGE(0x2134, 2);
149         ADD_RANGE(0x2140, 1);
150         ADD_RANGE(R300_VAP_INPUT_CNTL_0, 2);
151         ADD_RANGE(0x21DC, 1);
152         ADD_RANGE(0x221C, 1);
153         ADD_RANGE(0x2220, 4);
154         ADD_RANGE(0x2288, 1);
155         ADD_RANGE(R300_VAP_OUTPUT_VTX_FMT_0, 2);
156         ADD_RANGE(R300_VAP_PVS_CNTL_1, 3);
157         ADD_RANGE(R300_GB_ENABLE, 1);
158         ADD_RANGE(R300_GB_MSPOS0, 5);
159         ADD_RANGE(R300_TX_ENABLE, 1);
160         ADD_RANGE(0x4200, 4);
161         ADD_RANGE(0x4214, 1);
162         ADD_RANGE(R300_RE_POINTSIZE, 1);
163         ADD_RANGE(0x4230, 3);
164         ADD_RANGE(R300_RE_LINE_CNT, 1);
165         ADD_RANGE(0x4238, 1);
166         ADD_RANGE(0x4260, 3);
167         ADD_RANGE(0x4274, 4);
168         ADD_RANGE(0x4288, 5);
169         ADD_RANGE(0x42A0, 1);
170         ADD_RANGE(R300_RE_ZBIAS_T_FACTOR, 4);
171         ADD_RANGE(0x42B4, 1);
172         ADD_RANGE(R300_RE_CULL_CNTL, 1);
173         ADD_RANGE(0x42C0, 2);
174         ADD_RANGE(R300_RS_CNTL_0, 2);
175         ADD_RANGE(R300_RS_INTERP_0, 8);
176         ADD_RANGE(R300_RS_ROUTE_0, 8);
177         ADD_RANGE(0x43A4, 2);
178         ADD_RANGE(0x43E8, 1);
179         ADD_RANGE(R300_PFS_CNTL_0, 3);
180         ADD_RANGE(R300_PFS_NODE_0, 4);
181         ADD_RANGE(R300_PFS_TEXI_0, 64);
182         ADD_RANGE(0x46A4, 5);
183         ADD_RANGE(R300_PFS_INSTR0_0, 64);
184         ADD_RANGE(R300_PFS_INSTR1_0, 64);
185         ADD_RANGE(R300_PFS_INSTR2_0, 64);
186         ADD_RANGE(R300_PFS_INSTR3_0, 64);
187         ADD_RANGE(0x4BC0, 1);
188         ADD_RANGE(0x4BC8, 3);
189         ADD_RANGE(R300_PP_ALPHA_TEST, 2);
190         ADD_RANGE(0x4BD8, 1);
191         ADD_RANGE(R300_PFS_PARAM_0_X, 64);
192         ADD_RANGE(0x4E00, 1);
193         ADD_RANGE(R300_RB3D_CBLEND, 2);
194         ADD_RANGE(R300_RB3D_COLORMASK, 1);
195         ADD_RANGE(0x4E10, 3);
196         ADD_RANGE_MARK(R300_RB3D_COLOROFFSET0, 1, MARK_CHECK_OFFSET); /* check offset */
197         ADD_RANGE(R300_RB3D_COLORPITCH0, 1);
198         ADD_RANGE(0x4E50, 9);
199         ADD_RANGE(0x4E88, 1);
200         ADD_RANGE(0x4EA0, 2);
201         ADD_RANGE(R300_RB3D_ZSTENCIL_CNTL_0, 3);
202         ADD_RANGE(0x4F10, 4);
203         ADD_RANGE_MARK(R300_RB3D_DEPTHOFFSET, 1, MARK_CHECK_OFFSET); /* check offset */
204         ADD_RANGE(R300_RB3D_DEPTHPITCH, 1); 
205         ADD_RANGE(0x4F28, 1);
206         ADD_RANGE(0x4F30, 2);
207         ADD_RANGE(0x4F44, 1);
208         ADD_RANGE(0x4F54, 1);
209
210         ADD_RANGE(R300_TX_FILTER_0, 16);
211         ADD_RANGE(R300_TX_UNK1_0, 16);
212         ADD_RANGE(R300_TX_SIZE_0, 16);
213         ADD_RANGE(R300_TX_FORMAT_0, 16);
214                 /* Texture offset is dangerous and needs more checking */
215         ADD_RANGE_MARK(R300_TX_OFFSET_0, 16, MARK_CHECK_OFFSET);
216         ADD_RANGE(R300_TX_UNK4_0, 16);
217         ADD_RANGE(R300_TX_BORDER_COLOR_0, 16);
218
219         /* Sporadic registers used as primitives are emitted */
220         ADD_RANGE(0x4f18, 1);
221         ADD_RANGE(R300_RB3D_DSTCACHE_CTLSTAT, 1);
222         ADD_RANGE(R300_VAP_INPUT_ROUTE_0_0, 8);
223         ADD_RANGE(R300_VAP_INPUT_ROUTE_1_0, 8);
224
225 }
226
227 static __inline__ int r300_check_range(unsigned  reg, int count)
228 {
229         int i;
230         if(reg & ~0xffff)return -1;
231         for(i=(reg>>2);i<(reg>>2)+count;i++)
232                 if(r300_reg_flags[i]!=MARK_SAFE)return 1;
233         return 0;
234 }
235
236   /* we expect offsets passed to the framebuffer to be either within video memory or
237       within AGP space */
238 static __inline__ int r300_check_offset(drm_radeon_private_t* dev_priv, u32 offset)
239 {
240         /* we realy want to check against end of video aperture
241                 but this value is not being kept. 
242                 This code is correct for now (does the same thing as the
243                 code that sets MC_FB_LOCATION) in radeon_cp.c */
244         if((offset>=dev_priv->fb_location) && 
245                 (offset<dev_priv->gart_vm_start))return 0;
246         if((offset>=dev_priv->gart_vm_start) &&
247                  (offset<dev_priv->gart_vm_start+dev_priv->gart_size))return 0;
248         return 1;
249 }
250
251 static __inline__ int r300_emit_carefully_checked_packet0(drm_radeon_private_t* dev_priv,
252                                                 drm_radeon_cmd_buffer_t* cmdbuf,
253                                                 drm_r300_cmd_header_t header)
254 {
255         int reg;
256         int sz;
257         int i;
258         int values[64];
259         RING_LOCALS;
260
261         sz = header.packet0.count;
262         reg = (header.packet0.reghi << 8) | header.packet0.reglo;
263         
264         if((sz>64)||(sz<0)){
265                 DRM_ERROR("Cannot emit more than 64 values at a time (reg=%04x sz=%d)\n", reg, sz);
266                 return DRM_ERR(EINVAL);
267                 }
268         for(i=0;i<sz;i++){
269                 values[i]=((int __user*)cmdbuf->buf)[i];
270                 switch(r300_reg_flags[(reg>>2)+i]){
271                 case MARK_SAFE:
272                         break;
273                 case MARK_CHECK_OFFSET:
274                         if(r300_check_offset(dev_priv, (u32)values[i])){
275                                 DRM_ERROR("Offset failed range check (reg=%04x sz=%d)\n", reg, sz);
276                                 return DRM_ERR(EINVAL);
277                                 }
278                         break;
279                 default:
280                         DRM_ERROR("Register %04x failed check as flag=%02x\n", reg+i*4, r300_reg_flags[(reg>>2)+i]);
281                         return DRM_ERR(EINVAL);
282                         }
283                 }
284                 
285         BEGIN_RING(1+sz);
286         OUT_RING( CP_PACKET0( reg, sz-1 ) );
287         OUT_RING_TABLE( values, sz );
288         ADVANCE_RING();
289
290         cmdbuf->buf += sz*4;
291         cmdbuf->bufsz -= sz*4;
292
293         return 0;
294 }
295
296 /**
297  * Emits a packet0 setting arbitrary registers.
298  * Called by r300_do_cp_cmdbuf.
299  *
300  * Note that checks are performed on contents and addresses of the registers
301  */
302 static __inline__ int r300_emit_packet0(drm_radeon_private_t* dev_priv,
303                                                 drm_radeon_cmd_buffer_t* cmdbuf,
304                                                 drm_r300_cmd_header_t header)
305 {
306         int reg;
307         int sz;
308         RING_LOCALS;
309
310         sz = header.packet0.count;
311         reg = (header.packet0.reghi << 8) | header.packet0.reglo;
312
313         if (!sz)
314                 return 0;
315
316         if (sz*4 > cmdbuf->bufsz)
317                 return DRM_ERR(EINVAL);
318                 
319         if (reg+sz*4 >= 0x10000){
320                 DRM_ERROR("No such registers in hardware reg=%04x sz=%d\n", reg, sz);
321                 return DRM_ERR(EINVAL);
322                 }
323
324         if(r300_check_range(reg, sz)){
325                 /* go and check everything */
326                 return r300_emit_carefully_checked_packet0(dev_priv, cmdbuf, header);
327                 }
328         /* the rest of the data is safe to emit, whatever the values the user passed */
329
330         BEGIN_RING(1+sz);
331         OUT_RING( CP_PACKET0( reg, sz-1 ) );
332         OUT_RING_TABLE( (int __user*)cmdbuf->buf, sz );
333         ADVANCE_RING();
334
335         cmdbuf->buf += sz*4;
336         cmdbuf->bufsz -= sz*4;
337
338         return 0;
339 }
340
341
342 /**
343  * Uploads user-supplied vertex program instructions or parameters onto
344  * the graphics card.
345  * Called by r300_do_cp_cmdbuf.
346  */
347 static __inline__ int r300_emit_vpu(drm_radeon_private_t* dev_priv,
348                                     drm_radeon_cmd_buffer_t* cmdbuf,
349                                     drm_r300_cmd_header_t header)
350 {
351         int sz;
352         int addr;
353         RING_LOCALS;
354
355         sz = header.vpu.count;
356         addr = (header.vpu.adrhi << 8) | header.vpu.adrlo;
357
358         if (!sz)
359                 return 0;
360         if (sz*16 > cmdbuf->bufsz)
361                 return DRM_ERR(EINVAL);
362
363         BEGIN_RING(5+sz*4);
364         /* Wait for VAP to come to senses.. */
365         /* there is no need to emit it multiple times, (only once before VAP is programmed,
366            but this optimization is for later */
367         OUT_RING_REG( R300_VAP_PVS_WAITIDLE, 0 );
368         OUT_RING_REG( R300_VAP_PVS_UPLOAD_ADDRESS, addr );
369         OUT_RING( CP_PACKET0_TABLE( R300_VAP_PVS_UPLOAD_DATA, sz*4 - 1 ) );
370         OUT_RING_TABLE( (int __user*)cmdbuf->buf, sz*4 );
371
372         ADVANCE_RING();
373
374         cmdbuf->buf += sz*16;
375         cmdbuf->bufsz -= sz*16;
376
377         return 0;
378 }
379
380
381 /**
382  * Emit a clear packet from userspace.
383  * Called by r300_emit_packet3.
384  */
385 static __inline__ int r300_emit_clear(drm_radeon_private_t* dev_priv,
386                                       drm_radeon_cmd_buffer_t* cmdbuf)
387 {
388         RING_LOCALS;
389
390         if (8*4 > cmdbuf->bufsz)
391                 return DRM_ERR(EINVAL);
392
393         BEGIN_RING(10);
394         OUT_RING( CP_PACKET3( R200_3D_DRAW_IMMD_2, 8 ) );
395         OUT_RING( R300_PRIM_TYPE_POINT|R300_PRIM_WALK_RING|
396                   (1<<R300_PRIM_NUM_VERTICES_SHIFT) );
397         OUT_RING_TABLE( (int __user*)cmdbuf->buf, 8 );
398         ADVANCE_RING();
399
400         cmdbuf->buf += 8*4;
401         cmdbuf->bufsz -= 8*4;
402
403         return 0;
404 }
405
406 static __inline__ int r300_emit_3d_load_vbpntr(drm_radeon_private_t* dev_priv,
407                                       drm_radeon_cmd_buffer_t* cmdbuf,
408                                       u32 header)
409 {
410         int count, i,k;
411         #define MAX_ARRAY_PACKET  64
412         u32 payload[MAX_ARRAY_PACKET];
413         u32 narrays;
414         RING_LOCALS;
415
416         count=(header>>16) & 0x3fff;
417         
418         if((count+1)>MAX_ARRAY_PACKET){
419                 DRM_ERROR("Too large payload in 3D_LOAD_VBPNTR (count=%d)\n", count);
420                 return DRM_ERR(EINVAL);
421                 }
422         memset(payload, 0, MAX_ARRAY_PACKET*4);
423         memcpy(payload, cmdbuf->buf+4, (count+1)*4);    
424         
425         /* carefully check packet contents */
426         
427         narrays=payload[0];
428         k=0;
429         i=1;
430         while((k<narrays) && (i<(count+1))){
431                 i++; /* skip attribute field */
432                 if(r300_check_offset(dev_priv, payload[i])){
433                         DRM_ERROR("Offset failed range check (k=%d i=%d) while processing 3D_LOAD_VBPNTR packet.\n", k, i);
434                         return DRM_ERR(EINVAL);
435                         }
436                 k++;
437                 i++;
438                 if(k==narrays)break;
439                 /* have one more to process, they come in pairs */
440                 if(r300_check_offset(dev_priv, payload[i])){
441                         DRM_ERROR("Offset failed range check (k=%d i=%d) while processing 3D_LOAD_VBPNTR packet.\n", k, i);
442                         return DRM_ERR(EINVAL);
443                         }
444                 k++;
445                 i++;                    
446                 }
447         /* do the counts match what we expect ? */
448         if((k!=narrays) || (i!=(count+1))){
449                 DRM_ERROR("Malformed 3D_LOAD_VBPNTR packet (k=%d i=%d narrays=%d count+1=%d).\n", k, i, narrays, count+1);
450                 return DRM_ERR(EINVAL);
451                 }
452
453         /* all clear, output packet */
454
455         BEGIN_RING(count+2);
456         OUT_RING(header);
457         OUT_RING_TABLE(payload, count+1);
458         ADVANCE_RING();
459
460         cmdbuf->buf += (count+2)*4;
461         cmdbuf->bufsz -= (count+2)*4;
462
463         return 0;
464 }
465
466 static __inline__ int r300_emit_raw_packet3(drm_radeon_private_t* dev_priv,
467                                       drm_radeon_cmd_buffer_t* cmdbuf)
468 {
469         u32 header;
470         int count;
471         RING_LOCALS;
472
473         if (4 > cmdbuf->bufsz)
474                 return DRM_ERR(EINVAL);
475
476         /* Fixme !! This simply emits a packet without much checking.
477            We need to be smarter. */
478
479         /* obtain first word - actual packet3 header */
480         header = *(u32 __user*)cmdbuf->buf;
481
482         /* Is it packet 3 ? */
483         if( (header>>30)!=0x3 ) {
484                 DRM_ERROR("Not a packet3 header (0x%08x)\n", header);
485                 return DRM_ERR(EINVAL);
486                 }
487
488         count=(header>>16) & 0x3fff;
489
490         /* Check again now that we know how much data to expect */
491         if ((count+2)*4 > cmdbuf->bufsz){
492                 DRM_ERROR("Expected packet3 of length %d but have only %d bytes left\n",
493                         (count+2)*4, cmdbuf->bufsz);
494                 return DRM_ERR(EINVAL);
495                 }
496
497         /* Is it a packet type we know about ? */
498         switch(header & 0xff00){
499         case RADEON_3D_LOAD_VBPNTR: /* load vertex array pointers */
500                 return r300_emit_3d_load_vbpntr(dev_priv, cmdbuf, header);
501
502         case RADEON_CP_3D_DRAW_IMMD_2: /* triggers drawing using in-packet vertex data */
503         case RADEON_CP_3D_DRAW_VBUF_2: /* triggers drawing of vertex buffers setup elsewhere */
504         case RADEON_CP_3D_DRAW_INDX_2: /* triggers drawing using indices to vertex buffer */
505         case RADEON_CP_INDX_BUFFER: /* DRAW_INDX_2 without INDX_BUFFER seems to lock up the gpu */
506         case RADEON_WAIT_FOR_IDLE:
507         case RADEON_CP_NOP:
508                 /* these packets are safe */
509                 break;
510         default:
511                 DRM_ERROR("Unknown packet3 header (0x%08x)\n", header);
512                 return DRM_ERR(EINVAL);
513                 }
514
515
516         BEGIN_RING(count+2);
517         OUT_RING(header);
518         OUT_RING_TABLE( (int __user*)(cmdbuf->buf+4), count+1);
519         ADVANCE_RING();
520
521         cmdbuf->buf += (count+2)*4;
522         cmdbuf->bufsz -= (count+2)*4;
523
524         return 0;
525 }
526
527
528 /**
529  * Emit a rendering packet3 from userspace.
530  * Called by r300_do_cp_cmdbuf.
531  */
532 static __inline__ int r300_emit_packet3(drm_radeon_private_t* dev_priv,
533                                         drm_radeon_cmd_buffer_t* cmdbuf,
534                                         drm_r300_cmd_header_t header)
535 {
536         int n;
537         int ret;
538         char __user* orig_buf = cmdbuf->buf;
539         int orig_bufsz = cmdbuf->bufsz;
540
541         /* This is a do-while-loop so that we run the interior at least once,
542          * even if cmdbuf->nbox is 0. Compare r300_emit_cliprects for rationale.
543          */
544         n = 0;
545         do {
546                 if (cmdbuf->nbox > R300_SIMULTANEOUS_CLIPRECTS) {
547                         ret = r300_emit_cliprects(dev_priv, cmdbuf, n);
548                         if (ret)
549                                 return ret;
550
551                         cmdbuf->buf = orig_buf;
552                         cmdbuf->bufsz = orig_bufsz;
553                         }
554
555                 switch(header.packet3.packet) {
556                 case R300_CMD_PACKET3_CLEAR:
557                         DRM_DEBUG("R300_CMD_PACKET3_CLEAR\n");
558                         ret = r300_emit_clear(dev_priv, cmdbuf);
559                         if (ret) {
560                                 DRM_ERROR("r300_emit_clear failed\n");
561                                 return ret;
562                                 }
563                         break;
564
565                 case R300_CMD_PACKET3_RAW:
566                         DRM_DEBUG("R300_CMD_PACKET3_RAW\n");
567                         ret = r300_emit_raw_packet3(dev_priv, cmdbuf);
568                         if (ret) {
569                                 DRM_ERROR("r300_emit_raw_packet3 failed\n");
570                                 return ret;
571                                 }
572                         break;
573
574                 default:
575                         DRM_ERROR("bad packet3 type %i at %p\n",
576                                 header.packet3.packet,
577                                 cmdbuf->buf - sizeof(header));
578                         return DRM_ERR(EINVAL);
579                         }
580
581                 n += R300_SIMULTANEOUS_CLIPRECTS;
582         } while(n < cmdbuf->nbox);
583
584         return 0;
585 }
586
587 /* Some of the R300 chips seem to be extremely touchy about the two registers
588  * that are configured in r300_pacify.
589  * Among the worst offenders seems to be the R300 ND (0x4E44): When userspace
590  * sends a command buffer that contains only state setting commands and a
591  * vertex program/parameter upload sequence, this will eventually lead to a
592  * lockup, unless the sequence is bracketed by calls to r300_pacify.
593  * So we should take great care to *always* call r300_pacify before
594  * *anything* 3D related, and again afterwards. This is what the
595  * call bracket in r300_do_cp_cmdbuf is for.
596  */
597
598 /**
599  * Emit the sequence to pacify R300.
600  */
601 static __inline__ void r300_pacify(drm_radeon_private_t* dev_priv)
602 {
603         RING_LOCALS;
604
605         BEGIN_RING(6);
606         OUT_RING( CP_PACKET0( R300_RB3D_DSTCACHE_CTLSTAT, 0 ) );
607         OUT_RING( 0xa );
608         OUT_RING( CP_PACKET0( 0x4f18, 0 ) );
609         OUT_RING( 0x3 );
610         OUT_RING( CP_PACKET3( RADEON_CP_NOP, 0 ) );
611         OUT_RING( 0x0 );
612         ADVANCE_RING();
613 }
614
615
616 /**
617  * Called by r300_do_cp_cmdbuf to update the internal buffer age and state.
618  * The actual age emit is done by r300_do_cp_cmdbuf, which is why you must
619  * be careful about how this function is called.
620  */
621 static void r300_discard_buffer(drm_device_t * dev, drm_buf_t * buf)
622 {
623         drm_radeon_private_t *dev_priv = dev->dev_private;
624         drm_radeon_buf_priv_t *buf_priv = buf->dev_private;
625
626         buf_priv->age = ++dev_priv->sarea_priv->last_dispatch;
627         buf->pending = 1;
628         buf->used = 0;
629 }
630
631
632 /**
633  * Parses and validates a user-supplied command buffer and emits appropriate
634  * commands on the DMA ring buffer.
635  * Called by the ioctl handler function radeon_cp_cmdbuf.
636  */
637 int r300_do_cp_cmdbuf(drm_device_t* dev,
638                           DRMFILE filp,
639                       drm_file_t* filp_priv,
640                       drm_radeon_cmd_buffer_t* cmdbuf)
641 {
642         drm_radeon_private_t *dev_priv = dev->dev_private;
643         drm_device_dma_t *dma = dev->dma;
644         drm_buf_t *buf = NULL;
645         int emit_dispatch_age = 0;
646         int ret = 0;
647
648         DRM_DEBUG("\n");
649
650         /* See the comment above r300_emit_begin3d for why this call must be here,
651          * and what the cleanup gotos are for. */
652         r300_pacify(dev_priv);
653
654         if (cmdbuf->nbox <= R300_SIMULTANEOUS_CLIPRECTS) {
655                 ret = r300_emit_cliprects(dev_priv, cmdbuf, 0);
656                 if (ret)
657                         goto cleanup;
658                 }
659
660         while(cmdbuf->bufsz >= sizeof(drm_r300_cmd_header_t)) {
661                 int idx;
662                 drm_r300_cmd_header_t header;
663
664                 header.u = *(unsigned int *)cmdbuf->buf;
665
666                 cmdbuf->buf += sizeof(header);
667                 cmdbuf->bufsz -= sizeof(header);
668
669                 switch(header.header.cmd_type) {
670                 case R300_CMD_PACKET0: 
671                         DRM_DEBUG("R300_CMD_PACKET0\n");
672                         ret = r300_emit_packet0(dev_priv, cmdbuf, header);
673                         if (ret) {
674                                 DRM_ERROR("r300_emit_packet0 failed\n");
675                                 goto cleanup;
676                                 }
677                         break;
678
679                 case R300_CMD_VPU:
680                         DRM_DEBUG("R300_CMD_VPU\n");
681                         ret = r300_emit_vpu(dev_priv, cmdbuf, header);
682                         if (ret) {
683                                 DRM_ERROR("r300_emit_vpu failed\n");
684                                 goto cleanup;
685                                 }
686                         break;
687
688                 case R300_CMD_PACKET3:
689                         DRM_DEBUG("R300_CMD_PACKET3\n");
690                         ret = r300_emit_packet3(dev_priv, cmdbuf, header);
691                         if (ret) {
692                                 DRM_ERROR("r300_emit_packet3 failed\n");
693                                 goto cleanup;
694                                 }
695                         break;
696
697                 case R300_CMD_END3D:
698                         DRM_DEBUG("R300_CMD_END3D\n");
699                         /* TODO: 
700                                 Ideally userspace driver should not need to issue this call, 
701                                 i.e. the drm driver should issue it automatically and prevent
702                                 lockups.
703                                 
704                                 In practice, we do not understand why this call is needed and what
705                                 it does (except for some vague guesses that it has to do with cache
706                                 coherence) and so the user space driver does it. 
707                                 
708                                 Once we are sure which uses prevent lockups the code could be moved
709                                 into the kernel and the userspace driver will not
710                                 need to use this command.
711
712                                 Note that issuing this command does not hurt anything
713                                 except, possibly, performance */
714                         r300_pacify(dev_priv);
715                         break;
716
717                 case R300_CMD_CP_DELAY:
718                         /* simple enough, we can do it here */
719                         DRM_DEBUG("R300_CMD_CP_DELAY\n");
720                         {
721                                 int i;
722                                 RING_LOCALS;
723
724                                 BEGIN_RING(header.delay.count);
725                                 for(i=0;i<header.delay.count;i++)
726                                         OUT_RING(RADEON_CP_PACKET2);
727                                 ADVANCE_RING();
728                         }
729                         break;
730
731                 case R300_CMD_DMA_DISCARD:
732                         DRM_DEBUG("RADEON_CMD_DMA_DISCARD\n");
733                         idx = header.dma.buf_idx;
734                         if (idx < 0 || idx >= dma->buf_count) {
735                                 DRM_ERROR("buffer index %d (of %d max)\n",
736                                         idx, dma->buf_count - 1);
737                                 ret = DRM_ERR(EINVAL);
738                                 goto cleanup;
739                                 }
740
741                         buf = dma->buflist[idx];
742                         if (buf->filp != filp || buf->pending) {
743                                 DRM_ERROR("bad buffer %p %p %d\n",
744                                 buf->filp, filp, buf->pending);
745                                 ret = DRM_ERR(EINVAL);
746                                 goto cleanup;
747                                 }
748
749                         emit_dispatch_age = 1;
750                         r300_discard_buffer(dev, buf);
751                         break;
752
753                 case R300_CMD_WAIT:
754                         /* simple enough, we can do it here */
755                         DRM_DEBUG("R300_CMD_WAIT\n");
756                         if(header.wait.flags==0)break; /* nothing to do */
757
758                         {
759                                 RING_LOCALS;
760
761                                 BEGIN_RING(2);
762                                 OUT_RING( CP_PACKET0( RADEON_WAIT_UNTIL, 0 ) );
763                                 OUT_RING( (header.wait.flags & 0xf)<<14 );
764                                 ADVANCE_RING();
765                         }
766                         break;
767
768                 default:
769                         DRM_ERROR("bad cmd_type %i at %p\n",
770                                   header.header.cmd_type,
771                                   cmdbuf->buf - sizeof(header));
772                         ret = DRM_ERR(EINVAL);
773                         goto cleanup;
774                         }
775         }
776
777         DRM_DEBUG("END\n");
778
779 cleanup:
780         r300_pacify(dev_priv);
781
782         /* We emit the vertex buffer age here, outside the pacifier "brackets"
783          * for two reasons:
784          *  (1) This may coalesce multiple age emissions into a single one and
785          *  (2) more importantly, some chips lock up hard when scratch registers
786          *      are written inside the pacifier bracket.
787          */
788         if (emit_dispatch_age) {
789                 RING_LOCALS;
790
791                 /* Emit the vertex buffer age */
792                 BEGIN_RING(2);
793                 RADEON_DISPATCH_AGE(dev_priv->sarea_priv->last_dispatch);
794                 ADVANCE_RING();
795                 }
796
797         COMMIT_RING();
798
799         return ret;
800 }
801